diffinite 0.8.0__tar.gz → 0.9.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {diffinite-0.8.0/src/diffinite.egg-info → diffinite-0.9.3}/PKG-INFO +11 -4
  2. {diffinite-0.8.0 → diffinite-0.9.3}/README.md +10 -3
  3. {diffinite-0.8.0 → diffinite-0.9.3}/pyproject.toml +1 -1
  4. diffinite-0.9.3/src/diffinite/__init__.py +14 -0
  5. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/cli.py +79 -0
  6. diffinite-0.9.3/src/diffinite/differ.py +621 -0
  7. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/evidence.py +2 -4
  8. diffinite-0.9.3/src/diffinite/languages/data.py +64 -0
  9. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/models.py +32 -0
  10. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/pdf_gen.py +33 -7
  11. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/pipeline.py +59 -10
  12. {diffinite-0.8.0 → diffinite-0.9.3/src/diffinite.egg-info}/PKG-INFO +11 -4
  13. diffinite-0.9.3/tests/test_differ.py +107 -0
  14. diffinite-0.8.0/src/diffinite/__init__.py +0 -7
  15. diffinite-0.8.0/src/diffinite/differ.py +0 -293
  16. diffinite-0.8.0/src/diffinite/languages/data.py +0 -36
  17. diffinite-0.8.0/tests/test_differ.py +0 -56
  18. {diffinite-0.8.0 → diffinite-0.9.3}/LICENSE +0 -0
  19. {diffinite-0.8.0 → diffinite-0.9.3}/NOTICE +0 -0
  20. {diffinite-0.8.0 → diffinite-0.9.3}/setup.cfg +0 -0
  21. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/__main__.py +0 -0
  22. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/collector.py +0 -0
  23. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/deep_compare.py +0 -0
  24. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/fingerprint.py +0 -0
  25. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/__init__.py +0 -0
  26. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/_registry.py +0 -0
  27. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/_spec.py +0 -0
  28. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/c_family.py +0 -0
  29. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/csharp.py +0 -0
  30. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/go_rust_swift.py +0 -0
  31. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/java.py +0 -0
  32. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/javascript.py +0 -0
  33. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/markup.py +0 -0
  34. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/python.py +0 -0
  35. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/languages/scripting.py +0 -0
  36. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite/parser.py +0 -0
  37. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite.egg-info/SOURCES.txt +0 -0
  38. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite.egg-info/dependency_links.txt +0 -0
  39. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite.egg-info/entry_points.txt +0 -0
  40. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite.egg-info/requires.txt +0 -0
  41. {diffinite-0.8.0 → diffinite-0.9.3}/src/diffinite.egg-info/top_level.txt +0 -0
  42. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_cli.py +0 -0
  43. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_collector.py +0 -0
  44. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_deep_compare.py +0 -0
  45. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_differ_extended.py +0 -0
  46. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_evidence.py +0 -0
  47. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_evidence_hash.py +0 -0
  48. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_fingerprint.py +0 -0
  49. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_languages.py +0 -0
  50. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_normalize.py +0 -0
  51. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_parser.py +0 -0
  52. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_pdf_gen.py +0 -0
  53. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_pipeline.py +0 -0
  54. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_plagiarism_dataset.py +0 -0
  55. {diffinite-0.8.0 → diffinite-0.9.3}/tests/test_sqlite_integration.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffinite
3
- Version: 0.8.0
3
+ Version: 0.9.3
4
4
  Summary: Forensic source-code comparison tool — Winnowing fingerprints and professional PDF reports for IP litigation & code audit
5
5
  Author: nash-dir
6
6
  License: Apache-2.0
@@ -119,6 +119,8 @@ Each matched pair gets a side-by-side diff page with:
119
119
 
120
120
  - **Green highlight** — Lines present only in File B (additions)
121
121
  - **Red highlight** — Lines present only in File A (deletions)
122
+ - **Purple highlight** — Lines moved from this position (`--detect-moved`)
123
+ - **Blue highlight** — Lines moved to this position (`--detect-moved`)
122
124
  - **No highlight** — Identical lines (with configurable context folding)
123
125
 
124
126
  ### Deep Compare Section
@@ -139,7 +141,7 @@ Jaccard similarity is a well-defined set metric: `|A∩B| / |A∪B|`. Its interp
139
141
  |--------|-----------|----------|
140
142
  | `--page-number` | `Page 3 / 47` | Bottom-right |
141
143
  | `--file-number` | `File 2 / 12` | Bottom-left |
142
- | `--bates-number` | `DIFF-000003` | Bottom-center |
144
+ | `--bates-number` | `TEST-000003-CONF` | Bottom-center |
143
145
  | `--show-filename` | `com/example/Foo.java` | Top-right |
144
146
 
145
147
  ---
@@ -178,6 +180,7 @@ dir_b Path to the comparison source directory (B)
178
180
  | `--squash-blanks` | off | Collapse runs of 3+ blank lines. ⚠️ Changes line numbers — not recommended for forensic line-tracing. |
179
181
  | `--threshold N` | `60` | Fuzzy file-name matching threshold (0–100). Lower = more aggressive matching. |
180
182
  | `--collapse-identical` | off | Fold unchanged code blocks (3 context lines around each change) |
183
+ | `--detect-moved` | off | Detect moved code blocks and highlight with distinct colors (purple=original, blue=destination) |
181
184
 
182
185
  ### Deep Compare Options
183
186
 
@@ -203,6 +206,9 @@ dir_b Path to the comparison source directory (B)
203
206
  | `--page-number` | Show `Page n / N` at the bottom-right |
204
207
  | `--file-number` | Show `File n / N` at the bottom-left |
205
208
  | `--bates-number` | Stamp sequential Bates numbers at the bottom-center |
209
+ | `--bates-prefix TEXT` | Bates number prefix (e.g. `PLAINTIFF-`). Combined as: `{prefix}{number}{suffix}` |
210
+ | `--bates-suffix TEXT` | Bates number suffix (e.g. `-CONFIDENTIAL`) |
211
+ | `--bates-start N` | Starting Bates number (default: `1`). Useful for continuing numbering across reports. |
206
212
  | `--show-filename` | Show filename at the top-right |
207
213
 
208
214
  ---
@@ -215,8 +221,9 @@ dir_b Path to the comparison source directory (B)
215
221
  # Full forensic report with all annotations
216
222
  diffinite plaintiff_code/ defendant_code/ -o exhibit_A.pdf \
217
223
  --no-comments \
218
- --bates-number --page-number --file-number --show-filename \
219
- --collapse-identical
224
+ --bates-number --bates-prefix "CASE2026-" --bates-suffix "-CONFIDENTIAL" \
225
+ --bates-start 1 --page-number --file-number --show-filename \
226
+ --collapse-identical --detect-moved
220
227
  ```
221
228
 
222
229
  ### Code Audit (Quick HTML)
@@ -82,6 +82,8 @@ Each matched pair gets a side-by-side diff page with:
82
82
 
83
83
  - **Green highlight** — Lines present only in File B (additions)
84
84
  - **Red highlight** — Lines present only in File A (deletions)
85
+ - **Purple highlight** — Lines moved from this position (`--detect-moved`)
86
+ - **Blue highlight** — Lines moved to this position (`--detect-moved`)
85
87
  - **No highlight** — Identical lines (with configurable context folding)
86
88
 
87
89
  ### Deep Compare Section
@@ -102,7 +104,7 @@ Jaccard similarity is a well-defined set metric: `|A∩B| / |A∪B|`. Its interp
102
104
  |--------|-----------|----------|
103
105
  | `--page-number` | `Page 3 / 47` | Bottom-right |
104
106
  | `--file-number` | `File 2 / 12` | Bottom-left |
105
- | `--bates-number` | `DIFF-000003` | Bottom-center |
107
+ | `--bates-number` | `TEST-000003-CONF` | Bottom-center |
106
108
  | `--show-filename` | `com/example/Foo.java` | Top-right |
107
109
 
108
110
  ---
@@ -141,6 +143,7 @@ dir_b Path to the comparison source directory (B)
141
143
  | `--squash-blanks` | off | Collapse runs of 3+ blank lines. ⚠️ Changes line numbers — not recommended for forensic line-tracing. |
142
144
  | `--threshold N` | `60` | Fuzzy file-name matching threshold (0–100). Lower = more aggressive matching. |
143
145
  | `--collapse-identical` | off | Fold unchanged code blocks (3 context lines around each change) |
146
+ | `--detect-moved` | off | Detect moved code blocks and highlight with distinct colors (purple=original, blue=destination) |
144
147
 
145
148
  ### Deep Compare Options
146
149
 
@@ -166,6 +169,9 @@ dir_b Path to the comparison source directory (B)
166
169
  | `--page-number` | Show `Page n / N` at the bottom-right |
167
170
  | `--file-number` | Show `File n / N` at the bottom-left |
168
171
  | `--bates-number` | Stamp sequential Bates numbers at the bottom-center |
172
+ | `--bates-prefix TEXT` | Bates number prefix (e.g. `PLAINTIFF-`). Combined as: `{prefix}{number}{suffix}` |
173
+ | `--bates-suffix TEXT` | Bates number suffix (e.g. `-CONFIDENTIAL`) |
174
+ | `--bates-start N` | Starting Bates number (default: `1`). Useful for continuing numbering across reports. |
169
175
  | `--show-filename` | Show filename at the top-right |
170
176
 
171
177
  ---
@@ -178,8 +184,9 @@ dir_b Path to the comparison source directory (B)
178
184
  # Full forensic report with all annotations
179
185
  diffinite plaintiff_code/ defendant_code/ -o exhibit_A.pdf \
180
186
  --no-comments \
181
- --bates-number --page-number --file-number --show-filename \
182
- --collapse-identical
187
+ --bates-number --bates-prefix "CASE2026-" --bates-suffix "-CONFIDENTIAL" \
188
+ --bates-start 1 --page-number --file-number --show-filename \
189
+ --collapse-identical --detect-moved
183
190
  ```
184
191
 
185
192
  ### Code Audit (Quick HTML)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "diffinite"
7
- version = "0.8.0"
7
+ version = "0.9.3"
8
8
  description = "Forensic source-code comparison tool — Winnowing fingerprints and professional PDF reports for IP litigation & code audit"
9
9
  readme = "README.md"
10
10
  license = {text = "Apache-2.0"}
@@ -0,0 +1,14 @@
1
+ """Diffinite — Forensic source-code diff tool.
2
+
3
+ Compare two source directories, track logic movement across files
4
+ (N:M cross-matching), and generate syntax-highlighted PDF reports.
5
+ """
6
+
7
+ from importlib.metadata import version as _pkg_version, PackageNotFoundError
8
+
9
+ try:
10
+ __version__: str = _pkg_version("diffinite")
11
+ except PackageNotFoundError:
12
+ # Fallback for editable installs or running from source without install
13
+ __version__ = "0.0.0-dev"
14
+
@@ -95,6 +95,34 @@ def main(argv: list[str] | None = None) -> None:
95
95
  default=FUZZY_THRESHOLD,
96
96
  help=f"Fuzzy matching threshold 0–100 (default: {FUZZY_THRESHOLD})",
97
97
  )
98
+ parser.add_argument(
99
+ "--encoding",
100
+ default="auto",
101
+ help=(
102
+ "Source file encoding. 'auto' (default) uses charset-normalizer "
103
+ "auto-detection with Korean-optimized fallback (utf-8 -> euc-kr -> cp949). "
104
+ "Specify an explicit encoding (e.g. euc-kr, utf-8, cp949, shift_jis, "
105
+ "gb2312) to force-decode all files with that encoding."
106
+ ),
107
+ )
108
+ parser.add_argument(
109
+ "--sort-by",
110
+ choices=["filename", "size", "ratio"],
111
+ default=None,
112
+ dest="sort_by",
113
+ help=(
114
+ "Sort matched file pairs in the report. "
115
+ "'filename' sorts by file path, 'size' by file size, "
116
+ "'ratio' by similarity ratio. Default: insertion order (no sort)."
117
+ ),
118
+ )
119
+ parser.add_argument(
120
+ "--sort-order",
121
+ choices=["asc", "desc"],
122
+ default="asc",
123
+ dest="sort_order",
124
+ help="Sort direction (default: asc). Only effective with --sort-by.",
125
+ )
98
126
 
99
127
  # ── Output modes ──────────────────────────────────────────────────
100
128
  parser.add_argument(
@@ -123,6 +151,33 @@ def main(argv: list[str] | None = None) -> None:
123
151
  default=False,
124
152
  help="Stamp Bates numbers at the bottom-center of each page",
125
153
  )
154
+ parser.add_argument(
155
+ "--bates-prefix",
156
+ type=str,
157
+ default="",
158
+ help=(
159
+ "Bates number prefix (e.g. 'PLAINTIFF-'). "
160
+ "Combined as: {prefix}{number}{suffix}"
161
+ ),
162
+ )
163
+ parser.add_argument(
164
+ "--bates-suffix",
165
+ type=str,
166
+ default="",
167
+ help=(
168
+ "Bates number suffix (e.g. '-CONFIDENTIAL'). "
169
+ "Combined as: {prefix}{number}{suffix}"
170
+ ),
171
+ )
172
+ parser.add_argument(
173
+ "--bates-start",
174
+ type=int,
175
+ default=1,
176
+ help=(
177
+ "Starting Bates number (default: 1). "
178
+ "Useful for continuing numbering across multiple reports."
179
+ ),
180
+ )
126
181
  parser.add_argument(
127
182
  "--show-filename",
128
183
  action="store_true",
@@ -139,6 +194,16 @@ def main(argv: list[str] | None = None) -> None:
139
194
  "Without this flag, the full diff is shown."
140
195
  ),
141
196
  )
197
+ parser.add_argument(
198
+ "--detect-moved",
199
+ action="store_true",
200
+ default=False,
201
+ help=(
202
+ "Detect moved code blocks and highlight them with distinct colors "
203
+ "(purple=original position, blue=moved position) instead of "
204
+ "plain delete/add. Works in both simple and deep modes."
205
+ ),
206
+ )
142
207
 
143
208
  # ── Report format options ─────────────────────────────────────────
144
209
  format_group = parser.add_argument_group(
@@ -280,6 +345,9 @@ def main(argv: list[str] | None = None) -> None:
280
345
  autojunk=not args.no_autojunk,
281
346
  )
282
347
 
348
+ # Resolve encoding
349
+ encoding = args.encoding if args.encoding.lower() != "auto" else None
350
+
283
351
  run_pipeline(
284
352
  dir_a=args.dir_a,
285
353
  dir_b=args.dir_b,
@@ -313,6 +381,17 @@ def main(argv: list[str] | None = None) -> None:
313
381
  report_html=args.report_html,
314
382
  report_md=args.report_md,
315
383
  report_json=args.report_json,
384
+ # Encoding
385
+ encoding=encoding,
386
+ # Sorting
387
+ sort_by=args.sort_by,
388
+ sort_order=args.sort_order,
389
+ # Moved block detection
390
+ detect_moved=args.detect_moved,
391
+ # Bates prefix/suffix
392
+ bates_prefix=args.bates_prefix,
393
+ bates_suffix=args.bates_suffix,
394
+ bates_start=args.bates_start,
316
395
  )
317
396
 
318
397