cli-test-framework 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli_test_framework/__init__.py +21 -0
- cli_test_framework/cli.py +132 -0
- cli_test_framework/core/__init__.py +15 -0
- {core → cli_test_framework/core}/assertions.py +31 -31
- cli_test_framework/file_comparator/__init__.py +9 -0
- cli_test_framework/file_comparator/base_comparator.py +107 -0
- cli_test_framework/file_comparator/binary_comparator.py +242 -0
- cli_test_framework/file_comparator/csv_comparator.py +140 -0
- cli_test_framework/file_comparator/factory.py +141 -0
- cli_test_framework/file_comparator/h5_comparator.py +423 -0
- cli_test_framework/file_comparator/json_comparator.py +326 -0
- cli_test_framework/file_comparator/result.py +191 -0
- cli_test_framework/file_comparator/text_comparator.py +177 -0
- cli_test_framework/file_comparator/xml_comparator.py +150 -0
- cli_test_framework/runners/__init__.py +13 -0
- {runners → cli_test_framework/runners}/yaml_runner.py +92 -92
- cli_test_framework/utils/__init__.py +11 -0
- {utils → cli_test_framework/utils}/report_generator.py +67 -67
- cli_test_framework-0.2.2.dist-info/METADATA +445 -0
- cli_test_framework-0.2.2.dist-info/RECORD +30 -0
- cli_test_framework-0.2.2.dist-info/top_level.txt +1 -0
- cli_test_framework-0.2.0.dist-info/METADATA +0 -21
- cli_test_framework-0.2.0.dist-info/RECORD +0 -18
- cli_test_framework-0.2.0.dist-info/top_level.txt +0 -3
- core/__init__.py +0 -4
- runners/__init__.py +0 -3
- utils/__init__.py +0 -3
- {core → cli_test_framework/core}/base_runner.py +0 -0
- {core → cli_test_framework/core}/parallel_runner.py +0 -0
- {core → cli_test_framework/core}/process_worker.py +0 -0
- {core → cli_test_framework/core}/test_case.py +0 -0
- {runners → cli_test_framework/runners}/json_runner.py +0 -0
- {runners → cli_test_framework/runners}/parallel_json_runner.py +0 -0
- {utils → cli_test_framework/utils}/path_resolver.py +0 -0
- {cli_test_framework-0.2.0.dist-info → cli_test_framework-0.2.2.dist-info}/WHEEL +0 -0
- {cli_test_framework-0.2.0.dist-info → cli_test_framework-0.2.2.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CLI Test Framework - A powerful command-line testing framework
|
|
3
|
+
|
|
4
|
+
This package provides tools for testing command-line applications and scripts
|
|
5
|
+
with support for parallel execution and advanced file comparison capabilities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.2.2"
|
|
9
|
+
__author__ = "Xiaotong Wang"
|
|
10
|
+
__email__ = "xiaotongwang98@gmail.com"
|
|
11
|
+
|
|
12
|
+
# Import main classes for convenient access
|
|
13
|
+
from .runners.json_runner import JSONRunner
|
|
14
|
+
from .runners.parallel_json_runner import ParallelJSONRunner
|
|
15
|
+
from .runners.yaml_runner import YAMLRunner
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
'JSONRunner',
|
|
19
|
+
'ParallelJSONRunner',
|
|
20
|
+
'YAMLRunner',
|
|
21
|
+
]
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
CLI Test Framework - Command Line Interface
|
|
6
|
+
|
|
7
|
+
This module provides the main command-line interface for the CLI Testing Framework.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import sys
|
|
12
|
+
import os
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from .runners import JSONRunner, ParallelJSONRunner, YAMLRunner
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def create_parser():
|
|
19
|
+
"""Create and configure the argument parser"""
|
|
20
|
+
parser = argparse.ArgumentParser(
|
|
21
|
+
description="CLI Testing Framework - A powerful tool for testing command-line applications",
|
|
22
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
23
|
+
epilog="""
|
|
24
|
+
Examples:
|
|
25
|
+
cli-test run test_cases.json
|
|
26
|
+
cli-test run test_cases.json --parallel --workers 4
|
|
27
|
+
cli-test run test_cases.yaml --workspace /path/to/project
|
|
28
|
+
"""
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
subparsers = parser.add_subparsers(dest='command', help='Available commands')
|
|
32
|
+
|
|
33
|
+
# Run command
|
|
34
|
+
run_parser = subparsers.add_parser('run', help='Run test cases from a configuration file')
|
|
35
|
+
run_parser.add_argument('config_file', help='Path to the test configuration file (JSON or YAML)')
|
|
36
|
+
run_parser.add_argument('--workspace', '-w', help='Working directory for test execution')
|
|
37
|
+
run_parser.add_argument('--parallel', '-p', action='store_true', help='Run tests in parallel')
|
|
38
|
+
run_parser.add_argument('--workers', type=int, help='Number of parallel workers (default: CPU count)')
|
|
39
|
+
run_parser.add_argument('--execution-mode', choices=['thread', 'process'], default='thread',
|
|
40
|
+
help='Parallel execution mode (default: thread)')
|
|
41
|
+
run_parser.add_argument('--output-format', choices=['text', 'json', 'html'], default='text',
|
|
42
|
+
help='Output format for test results')
|
|
43
|
+
run_parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose output')
|
|
44
|
+
run_parser.add_argument('--debug', action='store_true', help='Enable debug mode')
|
|
45
|
+
|
|
46
|
+
return parser
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def run_tests(args):
|
|
50
|
+
"""Run tests based on command line arguments"""
|
|
51
|
+
config_file = Path(args.config_file)
|
|
52
|
+
|
|
53
|
+
if not config_file.exists():
|
|
54
|
+
print(f"Error: Configuration file not found: {config_file}")
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
# Determine file type
|
|
58
|
+
file_ext = config_file.suffix.lower()
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
if args.parallel:
|
|
62
|
+
# Use parallel runner
|
|
63
|
+
runner = ParallelJSONRunner(
|
|
64
|
+
config_file=str(config_file),
|
|
65
|
+
workspace=args.workspace,
|
|
66
|
+
max_workers=args.workers,
|
|
67
|
+
execution_mode=args.execution_mode
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
# Use appropriate single-threaded runner
|
|
71
|
+
if file_ext in ['.json']:
|
|
72
|
+
runner = JSONRunner(
|
|
73
|
+
config_file=str(config_file),
|
|
74
|
+
workspace=args.workspace
|
|
75
|
+
)
|
|
76
|
+
elif file_ext in ['.yaml', '.yml']:
|
|
77
|
+
runner = YAMLRunner(
|
|
78
|
+
config_file=str(config_file),
|
|
79
|
+
workspace=args.workspace
|
|
80
|
+
)
|
|
81
|
+
else:
|
|
82
|
+
print(f"Error: Unsupported configuration file format: {file_ext}")
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
# Run tests
|
|
86
|
+
print(f"Running tests from: {config_file}")
|
|
87
|
+
if args.parallel:
|
|
88
|
+
print(f"Parallel mode: {args.execution_mode}, workers: {args.workers or 'auto'}")
|
|
89
|
+
|
|
90
|
+
success = runner.run_tests()
|
|
91
|
+
|
|
92
|
+
# Output results
|
|
93
|
+
if hasattr(runner, 'results'):
|
|
94
|
+
results = runner.results
|
|
95
|
+
print(f"\nTest Results:")
|
|
96
|
+
print(f"Total tests: {results.get('total_tests', 0)}")
|
|
97
|
+
print(f"Passed: {results.get('passed', 0)}")
|
|
98
|
+
print(f"Failed: {results.get('failed', 0)}")
|
|
99
|
+
|
|
100
|
+
if args.verbose and 'details' in results:
|
|
101
|
+
print("\nDetailed Results:")
|
|
102
|
+
for result in results['details']:
|
|
103
|
+
status_symbol = "✓" if result['status'] == 'passed' else "✗"
|
|
104
|
+
print(f" {status_symbol} {result['name']}: {result['status']}")
|
|
105
|
+
if result['status'] == 'failed' and result.get('message'):
|
|
106
|
+
print(f" Error: {result['message']}")
|
|
107
|
+
|
|
108
|
+
return success
|
|
109
|
+
|
|
110
|
+
except Exception as e:
|
|
111
|
+
print(f"Error running tests: {e}")
|
|
112
|
+
if args.debug:
|
|
113
|
+
import traceback
|
|
114
|
+
traceback.print_exc()
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def main():
|
|
119
|
+
"""Main entry point for the CLI"""
|
|
120
|
+
parser = create_parser()
|
|
121
|
+
args = parser.parse_args()
|
|
122
|
+
|
|
123
|
+
if args.command == 'run':
|
|
124
|
+
success = run_tests(args)
|
|
125
|
+
sys.exit(0 if success else 1)
|
|
126
|
+
else:
|
|
127
|
+
parser.print_help()
|
|
128
|
+
sys.exit(1)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
if __name__ == '__main__':
|
|
132
|
+
main()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core components for the CLI Testing Framework
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .base_runner import BaseRunner
|
|
6
|
+
from .parallel_runner import ParallelRunner
|
|
7
|
+
from .test_case import TestCase
|
|
8
|
+
from .assertions import Assertions
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
'BaseRunner',
|
|
12
|
+
'ParallelRunner',
|
|
13
|
+
'TestCase',
|
|
14
|
+
'Assertions'
|
|
15
|
+
]
|
|
@@ -1,32 +1,32 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import Any, Pattern
|
|
3
|
-
|
|
4
|
-
class Assertions:
|
|
5
|
-
@staticmethod
|
|
6
|
-
def equals(actual: Any, expected: Any, message: str = "") -> bool:
|
|
7
|
-
if actual != expected:
|
|
8
|
-
raise AssertionError(f"{message} Expected: {expected}, but got: {actual}")
|
|
9
|
-
return True
|
|
10
|
-
|
|
11
|
-
@staticmethod
|
|
12
|
-
def contains(container: str, item: str, message: str = "") -> bool:
|
|
13
|
-
"""
|
|
14
|
-
Check if the item is contained within the container string.
|
|
15
|
-
This method returns True if the item is found anywhere within the container,
|
|
16
|
-
even if the container contains other information.
|
|
17
|
-
"""
|
|
18
|
-
if item not in container:
|
|
19
|
-
raise AssertionError(f"{message} Expected to contain: {item}")
|
|
20
|
-
return True
|
|
21
|
-
|
|
22
|
-
@staticmethod
|
|
23
|
-
def matches(text: str, pattern: str, message: str = "") -> bool:
|
|
24
|
-
if not re.search(pattern, text):
|
|
25
|
-
raise AssertionError(f"{message} Text does not match pattern: {pattern}")
|
|
26
|
-
return True
|
|
27
|
-
|
|
28
|
-
@staticmethod
|
|
29
|
-
def return_code_equals(actual: int, expected: int, message: str = "") -> bool:
|
|
30
|
-
if actual != expected:
|
|
31
|
-
raise AssertionError(f"{message} Expected return code: {expected}, got: {actual}")
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any, Pattern
|
|
3
|
+
|
|
4
|
+
class Assertions:
|
|
5
|
+
@staticmethod
|
|
6
|
+
def equals(actual: Any, expected: Any, message: str = "") -> bool:
|
|
7
|
+
if actual != expected:
|
|
8
|
+
raise AssertionError(f"{message} Expected: {expected}, but got: {actual}")
|
|
9
|
+
return True
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def contains(container: str, item: str, message: str = "") -> bool:
|
|
13
|
+
"""
|
|
14
|
+
Check if the item is contained within the container string.
|
|
15
|
+
This method returns True if the item is found anywhere within the container,
|
|
16
|
+
even if the container contains other information.
|
|
17
|
+
"""
|
|
18
|
+
if item not in container:
|
|
19
|
+
raise AssertionError(f"{message} Expected to contain: {item}")
|
|
20
|
+
return True
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def matches(text: str, pattern: str, message: str = "") -> bool:
|
|
24
|
+
if not re.search(pattern, text):
|
|
25
|
+
raise AssertionError(f"{message} Text does not match pattern: {pattern}")
|
|
26
|
+
return True
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def return_code_equals(actual: int, expected: int, message: str = "") -> bool:
|
|
30
|
+
if actual != expected:
|
|
31
|
+
raise AssertionError(f"{message} Expected return code: {expected}, got: {actual}")
|
|
32
32
|
return True
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File comparison module for cli-test-framework.
|
|
3
|
+
This module provides functionality for comparing different types of files.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .factory import ComparatorFactory
|
|
7
|
+
from .result import ComparisonResult
|
|
8
|
+
|
|
9
|
+
__all__ = ['ComparatorFactory', 'ComparisonResult']
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
@file base_comparator.py
|
|
6
|
+
@brief Base abstract class for file comparison operations
|
|
7
|
+
@author Xiaotong Wang
|
|
8
|
+
@date 2025
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from abc import ABC, abstractmethod
|
|
12
|
+
import logging
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from .result import ComparisonResult, Difference
|
|
15
|
+
|
|
16
|
+
class BaseComparator(ABC):
|
|
17
|
+
"""
|
|
18
|
+
@brief Base abstract class for all file comparators
|
|
19
|
+
@details This class defines the interface and common functionality for all file comparators.
|
|
20
|
+
It provides basic file comparison operations and logging capabilities.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, encoding="utf-8", chunk_size=8192, verbose=False):
|
|
24
|
+
"""
|
|
25
|
+
@brief Initialize the base comparator
|
|
26
|
+
@param encoding str: File encoding to use (default: "utf-8")
|
|
27
|
+
@param chunk_size int: Size of chunks for reading large files (default: 8192)
|
|
28
|
+
@param verbose bool: Enable verbose logging (default: False)
|
|
29
|
+
"""
|
|
30
|
+
self.encoding = encoding
|
|
31
|
+
self.chunk_size = chunk_size
|
|
32
|
+
self.logger = logging.getLogger(f"file_comparator.{self.__class__.__name__}")
|
|
33
|
+
if verbose:
|
|
34
|
+
self.logger.setLevel(logging.DEBUG)
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def read_content(self, file_path, start_line=0, end_line=None, start_column=0, end_column=None):
|
|
38
|
+
"""
|
|
39
|
+
@brief Read file content with specified range
|
|
40
|
+
@param file_path Path: Path to the file to read
|
|
41
|
+
@param start_line int: Starting line number (0-based)
|
|
42
|
+
@param end_line int: Ending line number (0-based, None for end of file)
|
|
43
|
+
@param start_column int: Starting column number (0-based)
|
|
44
|
+
@param end_column int: Ending column number (0-based, None for end of line)
|
|
45
|
+
@return object: File content in a format suitable for comparison
|
|
46
|
+
"""
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
@abstractmethod
|
|
50
|
+
def compare_content(self, content1, content2):
|
|
51
|
+
"""
|
|
52
|
+
@brief Compare two content objects and return comparison details
|
|
53
|
+
@param content1 object: First content object to compare
|
|
54
|
+
@param content2 object: Second content object to compare
|
|
55
|
+
@return tuple: (bool, list) - (identical, differences)
|
|
56
|
+
"""
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def compare_files(self, file1, file2, start_line=0, end_line=None, start_column=0, end_column=None):
|
|
60
|
+
"""
|
|
61
|
+
@brief Compare two files with the specified parameters
|
|
62
|
+
@param file1 Path: Path to the first file
|
|
63
|
+
@param file2 Path: Path to the second file
|
|
64
|
+
@param start_line int: Starting line number (0-based)
|
|
65
|
+
@param end_line int: Ending line number (0-based, None for end of file)
|
|
66
|
+
@param start_column int: Starting column number (0-based)
|
|
67
|
+
@param end_column int: Ending column number (0-based, None for end of line)
|
|
68
|
+
@return ComparisonResult: Result object containing comparison details
|
|
69
|
+
"""
|
|
70
|
+
result = ComparisonResult(
|
|
71
|
+
file1=str(file1),
|
|
72
|
+
file2=str(file2),
|
|
73
|
+
start_line=start_line,
|
|
74
|
+
end_line=end_line,
|
|
75
|
+
start_column=start_column,
|
|
76
|
+
end_column=end_column
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
self.logger.info(f"Comparing files: {file1} and {file2}")
|
|
81
|
+
|
|
82
|
+
# Record file metadata
|
|
83
|
+
file1_path = Path(file1)
|
|
84
|
+
file2_path = Path(file2)
|
|
85
|
+
result.file1_size = file1_path.stat().st_size
|
|
86
|
+
result.file2_size = file2_path.stat().st_size
|
|
87
|
+
|
|
88
|
+
# Read content with specified ranges
|
|
89
|
+
self.logger.debug(f"Reading content from files")
|
|
90
|
+
content1 = self.read_content(file1, start_line, end_line, start_column, end_column)
|
|
91
|
+
content2 = self.read_content(file2, start_line, end_line, start_column, end_column)
|
|
92
|
+
|
|
93
|
+
# Compare content
|
|
94
|
+
self.logger.debug(f"Comparing content")
|
|
95
|
+
identical, differences = self.compare_content(content1, content2)
|
|
96
|
+
|
|
97
|
+
# Update result
|
|
98
|
+
result.identical = identical
|
|
99
|
+
result.differences = differences
|
|
100
|
+
|
|
101
|
+
return result
|
|
102
|
+
|
|
103
|
+
except Exception as e:
|
|
104
|
+
self.logger.error(f"Error during comparison: {str(e)}")
|
|
105
|
+
result.error = str(e)
|
|
106
|
+
result.identical = False
|
|
107
|
+
return result
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
@file binary_comparator.py
|
|
6
|
+
@brief Binary file comparator implementation with efficient byte-level comparison
|
|
7
|
+
@author Xiaotong Wang
|
|
8
|
+
@date 2025
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
from .base_comparator import BaseComparator
|
|
13
|
+
from .result import Difference
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
15
|
+
|
|
16
|
+
class BinaryComparator(BaseComparator):
|
|
17
|
+
"""
|
|
18
|
+
@brief Comparator for binary files with efficient byte-level comparison
|
|
19
|
+
@details This class implements binary file comparison with support for:
|
|
20
|
+
- Byte-level difference detection
|
|
21
|
+
- Similarity index calculation using LCS
|
|
22
|
+
- Parallel processing for large files
|
|
23
|
+
- File hash calculation
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, encoding="utf-8", chunk_size=8192, verbose=False, similarity=False, num_threads=4):
|
|
27
|
+
"""
|
|
28
|
+
@brief Initialize the binary comparator
|
|
29
|
+
@param encoding str: File encoding (not used for binary files)
|
|
30
|
+
@param chunk_size int: Size of chunks for reading large files
|
|
31
|
+
@param verbose bool: Enable verbose logging
|
|
32
|
+
@param similarity bool: Enable similarity index calculation
|
|
33
|
+
@param num_threads int: Number of threads for parallel processing
|
|
34
|
+
"""
|
|
35
|
+
super().__init__(encoding, chunk_size, verbose)
|
|
36
|
+
self.similarity = similarity
|
|
37
|
+
self.num_threads = num_threads
|
|
38
|
+
|
|
39
|
+
def read_content(self, file_path, start_line=0, end_line=None, start_column=0, end_column=None):
|
|
40
|
+
"""
|
|
41
|
+
@brief Read binary content with specified range
|
|
42
|
+
@param file_path Path: Path to the binary file to read
|
|
43
|
+
@param start_line int: Starting byte offset (interpreted as bytes for binary files)
|
|
44
|
+
@param end_line int: Ending byte offset (interpreted as bytes for binary files)
|
|
45
|
+
@param start_column int: Ignored for binary files
|
|
46
|
+
@param end_column int: Ignored for binary files
|
|
47
|
+
@return bytes: Binary content within the specified range
|
|
48
|
+
@throws ValueError: If byte offsets are invalid
|
|
49
|
+
@throws FileNotFoundError: If file doesn't exist
|
|
50
|
+
@throws IOError: If there are other file reading errors
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
self.logger.debug(f"Reading binary file: {file_path}")
|
|
54
|
+
|
|
55
|
+
# For binary files, interpret start_line as byte offset
|
|
56
|
+
start_offset = start_line
|
|
57
|
+
end_offset = end_line
|
|
58
|
+
|
|
59
|
+
with open(file_path, 'rb') as f:
|
|
60
|
+
if start_offset > 0:
|
|
61
|
+
f.seek(start_offset)
|
|
62
|
+
|
|
63
|
+
if end_offset is not None:
|
|
64
|
+
if end_offset <= start_offset:
|
|
65
|
+
raise ValueError("End offset must be greater than start offset")
|
|
66
|
+
bytes_to_read = end_offset - start_offset
|
|
67
|
+
content = f.read(bytes_to_read)
|
|
68
|
+
else:
|
|
69
|
+
content = f.read()
|
|
70
|
+
|
|
71
|
+
return content
|
|
72
|
+
|
|
73
|
+
except FileNotFoundError:
|
|
74
|
+
raise ValueError(f"File not found: {file_path}")
|
|
75
|
+
except IOError as e:
|
|
76
|
+
raise ValueError(f"Error reading file {file_path}: {str(e)}")
|
|
77
|
+
|
|
78
|
+
def compare_content(self, content1, content2):
|
|
79
|
+
"""
|
|
80
|
+
@brief Compare binary content efficiently
|
|
81
|
+
@param content1 bytes: First binary content to compare
|
|
82
|
+
@param content2 bytes: Second binary content to compare
|
|
83
|
+
@return tuple: (bool, list) - (identical, differences)
|
|
84
|
+
@details Performs efficient byte-level comparison of binary content.
|
|
85
|
+
Reports differences with hex context and limits the number
|
|
86
|
+
of differences to avoid overwhelming output.
|
|
87
|
+
"""
|
|
88
|
+
self.logger.debug(f"Comparing binary content")
|
|
89
|
+
|
|
90
|
+
if len(content1) != len(content2):
|
|
91
|
+
differences = [Difference(
|
|
92
|
+
position="file size",
|
|
93
|
+
expected=f"{len(content1)} bytes",
|
|
94
|
+
actual=f"{len(content2)} bytes",
|
|
95
|
+
diff_type="size"
|
|
96
|
+
)]
|
|
97
|
+
identical = False
|
|
98
|
+
elif content1 == content2:
|
|
99
|
+
differences = []
|
|
100
|
+
identical = True
|
|
101
|
+
else:
|
|
102
|
+
identical = False
|
|
103
|
+
differences = []
|
|
104
|
+
offset = 0
|
|
105
|
+
max_differences = 10 # Limit number of differences reported
|
|
106
|
+
|
|
107
|
+
for i in range(0, len(content1), self.chunk_size):
|
|
108
|
+
chunk1 = content1[i:i+self.chunk_size]
|
|
109
|
+
chunk2 = content2[i:i+self.chunk_size]
|
|
110
|
+
|
|
111
|
+
if chunk1 != chunk2:
|
|
112
|
+
# Find the exact byte position where the difference starts
|
|
113
|
+
for j in range(len(chunk1)):
|
|
114
|
+
if j >= len(chunk2) or chunk1[j] != chunk2[j]:
|
|
115
|
+
diff_pos = i + j
|
|
116
|
+
# Show a few bytes before and after the difference for context
|
|
117
|
+
context_size = 8
|
|
118
|
+
start_ctx = max(0, diff_pos - context_size)
|
|
119
|
+
end_ctx = min(len(content1), diff_pos + context_size)
|
|
120
|
+
|
|
121
|
+
# Create hex representations of the differing sections
|
|
122
|
+
expected_bytes = content1[start_ctx:end_ctx]
|
|
123
|
+
actual_bytes = content2[start_ctx:min(len(content2), end_ctx)]
|
|
124
|
+
|
|
125
|
+
expected_hex = ' '.join(f"{b:02x}" for b in expected_bytes)
|
|
126
|
+
actual_hex = ' '.join(f"{b:02x}" for b in actual_bytes)
|
|
127
|
+
|
|
128
|
+
differences.append(Difference(
|
|
129
|
+
position=f"byte {diff_pos}",
|
|
130
|
+
expected=expected_hex,
|
|
131
|
+
actual=actual_hex,
|
|
132
|
+
diff_type="content"
|
|
133
|
+
))
|
|
134
|
+
break
|
|
135
|
+
|
|
136
|
+
if len(differences) >= max_differences:
|
|
137
|
+
differences.append(Difference(
|
|
138
|
+
position=None,
|
|
139
|
+
expected=None,
|
|
140
|
+
actual=None,
|
|
141
|
+
diff_type=f"more differences not shown"
|
|
142
|
+
))
|
|
143
|
+
break
|
|
144
|
+
|
|
145
|
+
return identical, differences
|
|
146
|
+
|
|
147
|
+
def compute_lcs_length(self, a: bytes, b: bytes) -> int:
|
|
148
|
+
"""
|
|
149
|
+
@brief Compute the length of the longest common subsequence
|
|
150
|
+
@param a bytes: First binary sequence
|
|
151
|
+
@param b bytes: Second binary sequence
|
|
152
|
+
@return int: Length of the longest common subsequence
|
|
153
|
+
@details Uses dynamic programming with memory optimization to compute LCS.
|
|
154
|
+
Supports parallel processing for large sequences.
|
|
155
|
+
"""
|
|
156
|
+
if not a or not b:
|
|
157
|
+
return 0
|
|
158
|
+
|
|
159
|
+
def lcs_worker(start, end):
|
|
160
|
+
previous = [0] * (len(b) + 1)
|
|
161
|
+
for i in range(start, end):
|
|
162
|
+
current = [0] * (len(b) + 1)
|
|
163
|
+
for j in range(1, len(b) + 1):
|
|
164
|
+
if a[i - 1] == b[j - 1]:
|
|
165
|
+
current[j] = previous[j - 1] + 1
|
|
166
|
+
else:
|
|
167
|
+
current[j] = max(previous[j], current[j - 1])
|
|
168
|
+
previous = current
|
|
169
|
+
return previous[len(b)]
|
|
170
|
+
|
|
171
|
+
chunk_size = len(a) // self.num_threads
|
|
172
|
+
futures = []
|
|
173
|
+
|
|
174
|
+
with ThreadPoolExecutor(max_workers=self.num_threads) as executor:
|
|
175
|
+
for i in range(self.num_threads):
|
|
176
|
+
start = i * chunk_size
|
|
177
|
+
end = (i + 1) * chunk_size if i != self.num_threads - 1 else len(a)
|
|
178
|
+
futures.append(executor.submit(lcs_worker, start, end))
|
|
179
|
+
|
|
180
|
+
lcs_length = sum(f.result() for f in futures)
|
|
181
|
+
return lcs_length
|
|
182
|
+
|
|
183
|
+
def compare_files(self, file1, file2, start_line=0, end_line=None, start_column=0, end_column=None):
|
|
184
|
+
"""
|
|
185
|
+
@brief Compare two binary files with optional similarity calculation
|
|
186
|
+
@param file1 Path: Path to the first binary file
|
|
187
|
+
@param file2 Path: Path to the second binary file
|
|
188
|
+
@param start_line int: Starting byte offset
|
|
189
|
+
@param end_line int: Ending byte offset
|
|
190
|
+
@param start_column int: Ignored for binary files
|
|
191
|
+
@param end_column int: Ignored for binary files
|
|
192
|
+
@return ComparisonResult: Result object containing comparison details
|
|
193
|
+
"""
|
|
194
|
+
from pathlib import Path
|
|
195
|
+
from .result import ComparisonResult
|
|
196
|
+
result = ComparisonResult(
|
|
197
|
+
file1=str(file1),
|
|
198
|
+
file2=str(file2),
|
|
199
|
+
start_line=start_line,
|
|
200
|
+
end_line=end_line,
|
|
201
|
+
start_column=start_column,
|
|
202
|
+
end_column=end_column
|
|
203
|
+
)
|
|
204
|
+
try:
|
|
205
|
+
self.logger.info(f"Comparing files: {file1} and {file2}")
|
|
206
|
+
file1_path = Path(file1)
|
|
207
|
+
file2_path = Path(file2)
|
|
208
|
+
result.file1_size = file1_path.stat().st_size
|
|
209
|
+
result.file2_size = file2_path.stat().st_size
|
|
210
|
+
self.logger.debug("Reading content from files")
|
|
211
|
+
content1 = self.read_content(file1, start_line, end_line, start_column, end_column)
|
|
212
|
+
content2 = self.read_content(file2, start_line, end_line, start_column, end_column)
|
|
213
|
+
self.logger.debug("Comparing content")
|
|
214
|
+
identical, differences = self.compare_content(content1, content2)
|
|
215
|
+
result.identical = identical
|
|
216
|
+
result.differences = differences
|
|
217
|
+
if self.similarity:
|
|
218
|
+
if (len(content1) + len(content2)) > 0:
|
|
219
|
+
lcs_len = self.compute_lcs_length(content1, content2)
|
|
220
|
+
similarity = 2 * lcs_len / (len(content1) + len(content2))
|
|
221
|
+
else:
|
|
222
|
+
similarity = 1
|
|
223
|
+
result.similarity = similarity
|
|
224
|
+
return result
|
|
225
|
+
except Exception as e:
|
|
226
|
+
self.logger.error(f"Error during comparison: {str(e)}")
|
|
227
|
+
result.error = str(e)
|
|
228
|
+
result.identical = False
|
|
229
|
+
return result
|
|
230
|
+
|
|
231
|
+
def get_file_hash(self, file_path, chunk_size=8192):
|
|
232
|
+
"""
|
|
233
|
+
@brief Calculate SHA-256 hash of a file efficiently
|
|
234
|
+
@param file_path Path: Path to the file to hash
|
|
235
|
+
@param chunk_size int: Size of chunks for reading large files
|
|
236
|
+
@return str: Hexadecimal representation of the file's SHA-256 hash
|
|
237
|
+
"""
|
|
238
|
+
h = hashlib.sha256()
|
|
239
|
+
with open(file_path, 'rb') as f:
|
|
240
|
+
for chunk in iter(lambda: f.read(chunk_size), b''):
|
|
241
|
+
h.update(chunk)
|
|
242
|
+
return h.hexdigest()
|