compare-dir 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compare_dir-0.1.0/PKG-INFO +11 -0
- compare_dir-0.1.0/README.md +3 -0
- compare_dir-0.1.0/pyproject.toml +13 -0
- compare_dir-0.1.0/setup.cfg +4 -0
- compare_dir-0.1.0/src/compare_dir/__init__.py +0 -0
- compare_dir-0.1.0/src/compare_dir/compare_dir.py +271 -0
- compare_dir-0.1.0/src/compare_dir.egg-info/PKG-INFO +11 -0
- compare_dir-0.1.0/src/compare_dir.egg-info/SOURCES.txt +9 -0
- compare_dir-0.1.0/src/compare_dir.egg-info/dependency_links.txt +1 -0
- compare_dir-0.1.0/src/compare_dir.egg-info/entry_points.txt +2 -0
- compare_dir-0.1.0/src/compare_dir.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: compare-dir
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Tool to compare two directories
|
|
5
|
+
Project-URL: repository, https://github.com/kojiishi/compare-dir
|
|
6
|
+
Requires-Python: >=3.13
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
|
|
9
|
+
# compare-dir
|
|
10
|
+
|
|
11
|
+
Tool to compare files in two directories.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "compare-dir"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Tool to compare two directories"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.13"
|
|
7
|
+
dependencies = []
|
|
8
|
+
|
|
9
|
+
[project.urls]
|
|
10
|
+
repository = "https://github.com/kojiishi/compare-dir"
|
|
11
|
+
|
|
12
|
+
[project.scripts]
|
|
13
|
+
compare-dir = "compare_dir.compare_dir:main"
|
|
File without changes
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import filecmp
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
from collections import deque
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
import time
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
class FileComparisonResult:
|
|
12
|
+
"""
|
|
13
|
+
A class to store the comparison result for a single file.
|
|
14
|
+
"""
|
|
15
|
+
# Classification constants
|
|
16
|
+
ONLY_IN_DIR1 = 1
|
|
17
|
+
ONLY_IN_DIR2 = 2
|
|
18
|
+
IN_BOTH = 3
|
|
19
|
+
|
|
20
|
+
def __init__(self, relative_path: str, classification: int):
|
|
21
|
+
self.relative_path = str(relative_path)
|
|
22
|
+
self.classification = classification # Should be one of the constants
|
|
23
|
+
# Comparison results stored as bool/int. None means not applicable.
|
|
24
|
+
self.modified_time_comparison: int | None = None # 1: dir1 > dir2, -1: dir2 > dir1, 0: same
|
|
25
|
+
self.size_comparison: int | None = None # 1: dir1 > dir2, -1: dir2 > dir1, 0: same
|
|
26
|
+
self.is_content_same: bool | None = None # True: same, False: different
|
|
27
|
+
|
|
28
|
+
def is_identical(self):
|
|
29
|
+
"""Returns True if the file exists in both directories and is identical."""
|
|
30
|
+
return (self.classification == self.IN_BOTH and
|
|
31
|
+
self.modified_time_comparison == 0 and
|
|
32
|
+
self.size_comparison == 0 and
|
|
33
|
+
self.is_content_same is True)
|
|
34
|
+
|
|
35
|
+
def to_string(self, dir1_name: str = 'dir1', dir2_name: str = 'dir2'):
|
|
36
|
+
"""String representation of the file comparison result."""
|
|
37
|
+
list = []
|
|
38
|
+
if self.classification == self.ONLY_IN_DIR1:
|
|
39
|
+
list.append(f"Only in {dir1_name}")
|
|
40
|
+
elif self.classification == self.ONLY_IN_DIR2:
|
|
41
|
+
list.append(f"Only in {dir2_name}")
|
|
42
|
+
elif self.classification == self.IN_BOTH:
|
|
43
|
+
# list.append("Exists in both directories")
|
|
44
|
+
pass
|
|
45
|
+
else:
|
|
46
|
+
list.append("Unknown")
|
|
47
|
+
|
|
48
|
+
if self.modified_time_comparison is None:
|
|
49
|
+
list.append("Modified time not applicable")
|
|
50
|
+
elif self.modified_time_comparison > 0:
|
|
51
|
+
list.append(f"{dir1_name} is newer")
|
|
52
|
+
elif self.modified_time_comparison < 0:
|
|
53
|
+
list.append(f"{dir2_name} is newer")
|
|
54
|
+
|
|
55
|
+
if self.size_comparison is None:
|
|
56
|
+
pass
|
|
57
|
+
elif self.size_comparison > 0:
|
|
58
|
+
list.append(f"Size of {dir1_name} is larger")
|
|
59
|
+
elif self.size_comparison < 0:
|
|
60
|
+
list.append(f"Size of {dir2_name} is larger")
|
|
61
|
+
|
|
62
|
+
if self.is_content_same is None:
|
|
63
|
+
if not self.size_comparison:
|
|
64
|
+
list.append("Content comparison not applicable")
|
|
65
|
+
elif not self.is_content_same:
|
|
66
|
+
list.append("Content differ")
|
|
67
|
+
|
|
68
|
+
details = ", ".join(list)
|
|
69
|
+
return f"{self.relative_path}: {details}"
|
|
70
|
+
|
|
71
|
+
class ComparisonSummary:
|
|
72
|
+
"""Collects and prints a summary of comparison results."""
|
|
73
|
+
def __init__(self):
|
|
74
|
+
self.in_both = 0
|
|
75
|
+
self.only_in_dir1 = 0
|
|
76
|
+
self.only_in_dir2 = 0
|
|
77
|
+
self.dir1_newer = 0
|
|
78
|
+
self.dir2_newer = 0
|
|
79
|
+
self.same_time_diff_size = 0
|
|
80
|
+
self.same_time_size_diff_content = 0
|
|
81
|
+
|
|
82
|
+
def update(self, result: FileComparisonResult):
|
|
83
|
+
"""Updates the summary counters based on a single comparison result."""
|
|
84
|
+
if result.classification == FileComparisonResult.ONLY_IN_DIR1:
|
|
85
|
+
self.only_in_dir1 += 1
|
|
86
|
+
elif result.classification == FileComparisonResult.ONLY_IN_DIR2:
|
|
87
|
+
self.only_in_dir2 += 1
|
|
88
|
+
elif result.classification == FileComparisonResult.IN_BOTH:
|
|
89
|
+
self.in_both += 1
|
|
90
|
+
if result.modified_time_comparison == 1:
|
|
91
|
+
self.dir1_newer += 1
|
|
92
|
+
elif result.modified_time_comparison == -1:
|
|
93
|
+
self.dir2_newer += 1
|
|
94
|
+
elif result.size_comparison != 0:
|
|
95
|
+
self.same_time_diff_size += 1
|
|
96
|
+
elif result.is_content_same is False:
|
|
97
|
+
self.same_time_size_diff_content += 1
|
|
98
|
+
|
|
99
|
+
def print(self, dir1_name: str, dir2_name: str):
|
|
100
|
+
"""Prints the formatted summary."""
|
|
101
|
+
print(f"Files in both: {self.in_both}")
|
|
102
|
+
print(f"Files only in {dir1_name}: {self.only_in_dir1}")
|
|
103
|
+
print(f"Files only in {dir2_name}: {self.only_in_dir2}")
|
|
104
|
+
print(f"Files in both ({dir1_name} is newer): {self.dir1_newer}")
|
|
105
|
+
print(f"Files in both ({dir2_name} is newer): {self.dir2_newer}")
|
|
106
|
+
print(f"Files in both (same time, different size): {self.same_time_diff_size}")
|
|
107
|
+
print(f"Files in both (same time and size, different content): {self.same_time_size_diff_content}")
|
|
108
|
+
|
|
109
|
+
class DirectoryComparer:
|
|
110
|
+
"""
|
|
111
|
+
Compares two directories and yields FileComparisonResult objects for each file.
|
|
112
|
+
"""
|
|
113
|
+
def __init__(self, dir1: Path | str, dir2: Path | str, max_workers: int = 0):
|
|
114
|
+
self.dir1 = Path(dir1)
|
|
115
|
+
self.dir2 = Path(dir2)
|
|
116
|
+
self._max_workers = max_workers if max_workers > 0 else None
|
|
117
|
+
|
|
118
|
+
def __enter__(self):
|
|
119
|
+
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=self._max_workers)
|
|
120
|
+
return self
|
|
121
|
+
|
|
122
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
123
|
+
# When exiting the 'with' block, shut down the executor.
|
|
124
|
+
# If the exit is due to a KeyboardInterrupt, we want to shut down
|
|
125
|
+
# quickly without waiting for running tasks.
|
|
126
|
+
if exc_type is KeyboardInterrupt:
|
|
127
|
+
print("\nInterrupted by user. Shutting down...", file=sys.stderr)
|
|
128
|
+
self.executor.shutdown(wait=False, cancel_futures=True)
|
|
129
|
+
else:
|
|
130
|
+
self.executor.shutdown(wait=True)
|
|
131
|
+
|
|
132
|
+
@staticmethod
|
|
133
|
+
def _compare(value1, value2) -> int:
|
|
134
|
+
"""Compares two modification times and returns -1, 0, or 1."""
|
|
135
|
+
if value1 > value2:
|
|
136
|
+
return 1
|
|
137
|
+
if value2 > value1:
|
|
138
|
+
return -1
|
|
139
|
+
return 0
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def _get_files_in_directory(directory_path: Path | str) -> dict[str, Path]:
|
|
143
|
+
"""
|
|
144
|
+
Walks through a directory and returns a dictionary of relative paths to absolute paths.
|
|
145
|
+
"""
|
|
146
|
+
base_directory = Path(directory_path)
|
|
147
|
+
file_map = {}
|
|
148
|
+
for dirpath, _, filenames in os.walk(base_directory):
|
|
149
|
+
current_dir_path = Path(dirpath)
|
|
150
|
+
for filename in filenames:
|
|
151
|
+
full_path = current_dir_path / filename
|
|
152
|
+
relative_path = full_path.relative_to(base_directory)
|
|
153
|
+
file_map[str(relative_path)] = full_path
|
|
154
|
+
return file_map
|
|
155
|
+
|
|
156
|
+
def _compare_file_pair(self, rel_path: str, dir1_files: dict[str, Path], dir2_files: dict[str, Path]) -> FileComparisonResult:
|
|
157
|
+
"""Compares a single pair of files that exist in both directories."""
|
|
158
|
+
result = FileComparisonResult(rel_path, FileComparisonResult.IN_BOTH)
|
|
159
|
+
file1_path = dir1_files[rel_path]
|
|
160
|
+
file2_path = dir2_files[rel_path]
|
|
161
|
+
|
|
162
|
+
# Compare modified times and sizes.
|
|
163
|
+
stat1 = file1_path.stat()
|
|
164
|
+
stat2 = file2_path.stat()
|
|
165
|
+
result.modified_time_comparison = DirectoryComparer._compare(stat1.st_mtime, stat2.st_mtime)
|
|
166
|
+
result.size_comparison = DirectoryComparer._compare(stat1.st_size, stat2.st_size)
|
|
167
|
+
|
|
168
|
+
if result.size_comparison == 0:
|
|
169
|
+
# If size is the same, check file content
|
|
170
|
+
logging.info("Comparing content: %s", rel_path)
|
|
171
|
+
result.is_content_same = filecmp.cmp(
|
|
172
|
+
file1_path, file2_path, shallow=False
|
|
173
|
+
)
|
|
174
|
+
return result
|
|
175
|
+
|
|
176
|
+
def __iter__(self):
|
|
177
|
+
"""Yields FileComparisonResult objects for each file."""
|
|
178
|
+
start_time = time.monotonic()
|
|
179
|
+
logging.info("Scanning directories: %s %s", self.dir1, self.dir2)
|
|
180
|
+
future1 = self.executor.submit(self._get_files_in_directory, self.dir1)
|
|
181
|
+
future2 = self.executor.submit(self._get_files_in_directory, self.dir2)
|
|
182
|
+
|
|
183
|
+
dir1_files = future1.result()
|
|
184
|
+
dir2_files = future2.result()
|
|
185
|
+
logging.info("Scanning finished in %.2f seconds.", time.monotonic() - start_time)
|
|
186
|
+
|
|
187
|
+
all_files = sorted(set(dir1_files.keys()) | set(dir2_files.keys()))
|
|
188
|
+
# A deque to hold futures and pre-computed results in sorted order.
|
|
189
|
+
pending_queue = deque()
|
|
190
|
+
|
|
191
|
+
# This loop will both queue work and yield completed results.
|
|
192
|
+
for rel_path in all_files:
|
|
193
|
+
in_dir1 = rel_path in dir1_files
|
|
194
|
+
in_dir2 = rel_path in dir2_files
|
|
195
|
+
|
|
196
|
+
if in_dir1 and not in_dir2:
|
|
197
|
+
pending_queue.append(FileComparisonResult(rel_path, FileComparisonResult.ONLY_IN_DIR1))
|
|
198
|
+
elif not in_dir1 and in_dir2:
|
|
199
|
+
pending_queue.append(FileComparisonResult(rel_path, FileComparisonResult.ONLY_IN_DIR2))
|
|
200
|
+
else: # Exists in both
|
|
201
|
+
future = self.executor.submit(self._compare_file_pair, rel_path, dir1_files, dir2_files)
|
|
202
|
+
pending_queue.append(future)
|
|
203
|
+
|
|
204
|
+
# Try to yield from the front of the queue if the result is ready.
|
|
205
|
+
# This allows us to yield results while still queuing up more work.
|
|
206
|
+
while pending_queue:
|
|
207
|
+
first_item = pending_queue[0]
|
|
208
|
+
if isinstance(first_item, concurrent.futures.Future):
|
|
209
|
+
# If it's a future, check if it's done without blocking.
|
|
210
|
+
if not first_item.done():
|
|
211
|
+
# The first item in the queue is not ready, so we can't
|
|
212
|
+
# yield it yet. Break and queue more work.
|
|
213
|
+
break
|
|
214
|
+
pending_queue.popleft()
|
|
215
|
+
yield first_item.result()
|
|
216
|
+
continue
|
|
217
|
+
assert isinstance(first_item, FileComparisonResult)
|
|
218
|
+
yield pending_queue.popleft()
|
|
219
|
+
|
|
220
|
+
# After the main loop, yield any remaining results from the queue.
|
|
221
|
+
while pending_queue:
|
|
222
|
+
item = pending_queue.popleft()
|
|
223
|
+
yield item.result() if isinstance(item, concurrent.futures.Future) else item
|
|
224
|
+
|
|
225
|
+
def main():
|
|
226
|
+
"""
|
|
227
|
+
Main function to parse arguments and print comparison results.
|
|
228
|
+
"""
|
|
229
|
+
parser = argparse.ArgumentParser(description="Compare two directories.")
|
|
230
|
+
parser.add_argument("dir1", help="Path to the first directory.")
|
|
231
|
+
parser.add_argument("dir2", help="Path to the second directory.")
|
|
232
|
+
parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose logging to stderr.")
|
|
233
|
+
parser.add_argument("-p", "--parallel", type=int, default=0, help="Number of parallel threads for file comparison. If 0, uses the default.")
|
|
234
|
+
args = parser.parse_args()
|
|
235
|
+
|
|
236
|
+
dir1_path = Path(args.dir1)
|
|
237
|
+
dir2_path = Path(args.dir2)
|
|
238
|
+
|
|
239
|
+
if args.verbose:
|
|
240
|
+
logging.basicConfig(level=logging.INFO,
|
|
241
|
+
format='%(levelname)s: %(message)s',
|
|
242
|
+
stream=sys.stderr)
|
|
243
|
+
|
|
244
|
+
if not dir1_path.is_dir():
|
|
245
|
+
print(f"Error: Directory not found at '{args.dir1}'")
|
|
246
|
+
return
|
|
247
|
+
if not dir2_path.is_dir():
|
|
248
|
+
print(f"Error: Directory not found at '{args.dir2}'")
|
|
249
|
+
return
|
|
250
|
+
|
|
251
|
+
start_time = time.monotonic()
|
|
252
|
+
summary = ComparisonSummary()
|
|
253
|
+
try:
|
|
254
|
+
with DirectoryComparer(args.dir1, args.dir2, max_workers=args.parallel) as comparer:
|
|
255
|
+
for result in comparer:
|
|
256
|
+
summary.update(result)
|
|
257
|
+
if result.is_identical():
|
|
258
|
+
continue
|
|
259
|
+
print(result.to_string(args.dir1, args.dir2))
|
|
260
|
+
|
|
261
|
+
# Print the summary only if the comparison completes without interruption.
|
|
262
|
+
print("\n--- Comparison Summary ---")
|
|
263
|
+
summary.print(args.dir1, args.dir2)
|
|
264
|
+
except KeyboardInterrupt:
|
|
265
|
+
# The __exit__ method of DirectoryComparer handles the shutdown.
|
|
266
|
+
pass
|
|
267
|
+
finally:
|
|
268
|
+
print(f"Comparison finished in {time.monotonic() - start_time:.2f} seconds.")
|
|
269
|
+
|
|
270
|
+
if __name__ == "__main__":
|
|
271
|
+
main()
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: compare-dir
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Tool to compare two directories
|
|
5
|
+
Project-URL: repository, https://github.com/kojiishi/compare-dir
|
|
6
|
+
Requires-Python: >=3.13
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
|
|
9
|
+
# compare-dir
|
|
10
|
+
|
|
11
|
+
Tool to compare files in two directories.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/compare_dir/__init__.py
|
|
4
|
+
src/compare_dir/compare_dir.py
|
|
5
|
+
src/compare_dir.egg-info/PKG-INFO
|
|
6
|
+
src/compare_dir.egg-info/SOURCES.txt
|
|
7
|
+
src/compare_dir.egg-info/dependency_links.txt
|
|
8
|
+
src/compare_dir.egg-info/entry_points.txt
|
|
9
|
+
src/compare_dir.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
compare_dir
|