diffinite 0.9.6__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {diffinite-0.9.6/src/diffinite.egg-info → diffinite-0.10.0}/PKG-INFO +1 -1
  2. {diffinite-0.9.6 → diffinite-0.10.0}/pyproject.toml +1 -1
  3. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/cli.py +51 -27
  4. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/models.py +6 -0
  5. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/pdf_gen.py +66 -25
  6. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/pipeline.py +127 -52
  7. {diffinite-0.9.6 → diffinite-0.10.0/src/diffinite.egg-info}/PKG-INFO +1 -1
  8. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_cli.py +40 -2
  9. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_pdf_gen.py +76 -11
  10. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_pipeline.py +4 -4
  11. {diffinite-0.9.6 → diffinite-0.10.0}/LICENSE +0 -0
  12. {diffinite-0.9.6 → diffinite-0.10.0}/NOTICE +0 -0
  13. {diffinite-0.9.6 → diffinite-0.10.0}/README.md +0 -0
  14. {diffinite-0.9.6 → diffinite-0.10.0}/setup.cfg +0 -0
  15. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/__init__.py +0 -0
  16. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/__main__.py +0 -0
  17. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/collector.py +0 -0
  18. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/deep_compare.py +0 -0
  19. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/differ.py +0 -0
  20. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/evidence.py +0 -0
  21. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/fingerprint.py +0 -0
  22. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/__init__.py +0 -0
  23. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/_registry.py +0 -0
  24. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/_spec.py +0 -0
  25. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/c_family.py +0 -0
  26. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/csharp.py +0 -0
  27. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/data.py +0 -0
  28. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/go_rust_swift.py +0 -0
  29. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/java.py +0 -0
  30. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/javascript.py +0 -0
  31. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/markup.py +0 -0
  32. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/python.py +0 -0
  33. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/languages/scripting.py +0 -0
  34. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite/parser.py +0 -0
  35. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite.egg-info/SOURCES.txt +0 -0
  36. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite.egg-info/dependency_links.txt +0 -0
  37. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite.egg-info/entry_points.txt +0 -0
  38. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite.egg-info/requires.txt +0 -0
  39. {diffinite-0.9.6 → diffinite-0.10.0}/src/diffinite.egg-info/top_level.txt +0 -0
  40. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_collector.py +0 -0
  41. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_deep_compare.py +0 -0
  42. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_differ.py +0 -0
  43. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_differ_extended.py +0 -0
  44. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_evidence.py +0 -0
  45. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_evidence_hash.py +0 -0
  46. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_fingerprint.py +0 -0
  47. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_languages.py +0 -0
  48. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_normalize.py +0 -0
  49. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_parser.py +0 -0
  50. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_plagiarism_dataset.py +0 -0
  51. {diffinite-0.9.6 → diffinite-0.10.0}/tests/test_sqlite_integration.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffinite
3
- Version: 0.9.6
3
+ Version: 0.10.0
4
4
  Summary: Forensic source-code comparison tool — Winnowing fingerprints and professional PDF reports for IP litigation & code audit
5
5
  Author: nash-dir
6
6
  License: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "diffinite"
7
- version = "0.9.6"
7
+ version = "0.10.0"
8
8
  description = "Forensic source-code comparison tool — Winnowing fingerprints and professional PDF reports for IP litigation & code audit"
9
9
  readme = "README.md"
10
10
  license = {text = "Apache-2.0"}
@@ -58,14 +58,6 @@ def main(argv: list[str] | None = None) -> None:
58
58
  ),
59
59
  )
60
60
 
61
- # ── Output ────────────────────────────────────────────────────────
62
- parser.add_argument(
63
- "--output-pdf", "-o",
64
- default="report.pdf",
65
- help="Output PDF file path (default: report.pdf). "
66
- "Ignored when any --report-* option is specified.",
67
- )
68
-
69
61
  # ── Comparison options ────────────────────────────────────────────
70
62
  parser.add_argument(
71
63
  "--by-word",
@@ -74,7 +66,7 @@ def main(argv: list[str] | None = None) -> None:
74
66
  help="Compare by word instead of by line",
75
67
  )
76
68
  parser.add_argument(
77
- "--no-comments",
69
+ "--strip-comments",
78
70
  action="store_true",
79
71
  default=False,
80
72
  help="Strip comments before comparison (uses 2-pass parser)",
@@ -85,7 +77,7 @@ def main(argv: list[str] | None = None) -> None:
85
77
  default=False,
86
78
  help=(
87
79
  "Collapse runs of 3+ blank lines after comment stripping. "
88
- "Only effective with --no-comments. WARNING: changes line "
80
+ "Only effective with --strip-comments. WARNING: changes line "
89
81
  "numbers — do not use for forensic line-tracing."
90
82
  ),
91
83
  )
@@ -107,13 +99,14 @@ def main(argv: list[str] | None = None) -> None:
107
99
  )
108
100
  parser.add_argument(
109
101
  "--sort-by",
110
- choices=["filename", "size", "ratio"],
102
+ choices=["filename", "path", "similarity", "ratio"],
111
103
  default=None,
112
104
  dest="sort_by",
113
105
  help=(
114
106
  "Sort matched file pairs in the report. "
115
- "'filename' sorts by file path, 'size' by file size, "
116
- "'ratio' by similarity ratio. Default: insertion order (no sort)."
107
+ "'filename' sorts by file basename, 'path' by full path, "
108
+ "'similarity' by name match score, 'ratio' by content "
109
+ "similarity. Default: insertion order (no sort)."
117
110
  ),
118
111
  )
119
112
  parser.add_argument(
@@ -179,7 +172,7 @@ def main(argv: list[str] | None = None) -> None:
179
172
  ),
180
173
  )
181
174
  parser.add_argument(
182
- "--show-filename",
175
+ "--filename",
183
176
  action="store_true",
184
177
  default=False,
185
178
  help="Show the filename at the top-right of each page",
@@ -204,15 +197,35 @@ def main(argv: list[str] | None = None) -> None:
204
197
  "plain delete/add. Works in both simple and deep modes."
205
198
  ),
206
199
  )
200
+ parser.add_argument(
201
+ "--include-uncompared",
202
+ action=argparse.BooleanOptionalAction,
203
+ default=True,
204
+ help=(
205
+ "Include unmatched (uncompared) file lists in the report. "
206
+ "Use --no-include-uncompared to hide them (default: included)."
207
+ ),
208
+ )
209
+ parser.add_argument(
210
+ "--binary-handling",
211
+ choices=["exclude", "hash", "error"],
212
+ default="hash",
213
+ dest="binary_handling",
214
+ help=(
215
+ "How to handle binary (non-decodable) files: "
216
+ "'exclude' skips them entirely, 'hash' shows SHA-256 match "
217
+ "status, 'error' shows decode error (default: hash)."
218
+ ),
219
+ )
207
220
 
208
221
  # ── Report format options ─────────────────────────────────────────
209
222
  format_group = parser.add_argument_group(
210
223
  "Report Format",
211
224
  "Output format(s). Multiple can be combined. "
212
- "If none specified, defaults to --output-pdf.",
225
+ "If none specified, defaults to PDF (report.pdf).",
213
226
  )
214
227
  format_group.add_argument(
215
- "--report-pdf",
228
+ "--report-pdf", "-o",
216
229
  metavar="PATH",
217
230
  default=None,
218
231
  help="Generate a merged PDF report at the given path",
@@ -243,7 +256,7 @@ def main(argv: list[str] | None = None) -> None:
243
256
  "'--mode deep').",
244
257
  )
245
258
  deep_group.add_argument(
246
- "--k-gram", "--kgram-size",
259
+ "--k-gram",
247
260
  type=int,
248
261
  default=DEFAULT_K,
249
262
  dest="k_gram",
@@ -253,7 +266,7 @@ def main(argv: list[str] | None = None) -> None:
253
266
  ),
254
267
  )
255
268
  deep_group.add_argument(
256
- "--window", "--window-size",
269
+ "--window",
257
270
  type=int,
258
271
  default=DEFAULT_W,
259
272
  dest="window",
@@ -263,12 +276,12 @@ def main(argv: list[str] | None = None) -> None:
263
276
  ),
264
277
  )
265
278
  deep_group.add_argument(
266
- "--threshold-deep", "--min-jaccard",
279
+ "--threshold-deep",
267
280
  type=float,
268
- default=0.05,
281
+ default=5,
269
282
  dest="threshold_deep",
270
283
  help=(
271
- "Minimum Jaccard similarity to report (default: 0.05). "
284
+ "Minimum Jaccard similarity 0–100 to report (default: 5). "
272
285
  "Below 5%% is considered noise."
273
286
  ),
274
287
  )
@@ -336,38 +349,45 @@ def main(argv: list[str] | None = None) -> None:
336
349
 
337
350
  args = parser.parse_args(argv)
338
351
 
352
+ # Convert threshold-deep from 0-100 (user-facing) to 0-1 (internal)
353
+ min_jaccard_internal = args.threshold_deep / 100.0
354
+
339
355
  # Build analysis metadata (embedded in every report for transparency)
340
356
  metadata = AnalysisMetadata(
341
357
  exec_mode=args.mode,
342
358
  k=args.k_gram,
343
359
  w=args.window,
344
- threshold=args.threshold_deep,
360
+ threshold=args.threshold_deep, # 0-100 scale in metadata
345
361
  autojunk=not args.no_autojunk,
346
362
  )
347
363
 
348
364
  # Resolve encoding
349
365
  encoding = args.encoding if args.encoding.lower() != "auto" else None
350
366
 
367
+ # Resolve default PDF output if no --report-* specified
368
+ report_pdf = args.report_pdf
369
+ if report_pdf is None and args.report_html is None and args.report_md is None and args.report_json is None:
370
+ report_pdf = "report.pdf"
371
+
351
372
  run_pipeline(
352
373
  dir_a=args.dir_a,
353
374
  dir_b=args.dir_b,
354
375
  by_word=args.by_word,
355
- compare_comment=not args.no_comments,
376
+ strip_comments=args.strip_comments,
356
377
  squash_blanks=args.squash_blanks,
357
- output_pdf=args.output_pdf,
358
378
  threshold=args.threshold,
359
379
  no_merge=args.no_merge,
360
380
  show_page_number=args.page_number,
361
381
  show_file_number=args.file_number,
362
382
  show_bates_number=args.bates_number,
363
- show_filename=args.show_filename,
383
+ show_filename=args.filename,
364
384
  collapse_identical=args.collapse_identical,
365
385
  # Execution mode & deep compare
366
386
  exec_mode=args.mode,
367
387
  workers=args.workers,
368
388
  kgram_size=args.k_gram,
369
389
  window_size=args.window,
370
- min_jaccard=args.threshold_deep,
390
+ min_jaccard=min_jaccard_internal,
371
391
  normalize=args.normalize,
372
392
  metadata=metadata,
373
393
  # Forensic options
@@ -377,7 +397,7 @@ def main(argv: list[str] | None = None) -> None:
377
397
  embed_hash=args.embed_hash,
378
398
  bundle_path=args.bundle_path,
379
399
  # Multi-format output
380
- report_pdf=args.report_pdf,
400
+ report_pdf=report_pdf,
381
401
  report_html=args.report_html,
382
402
  report_md=args.report_md,
383
403
  report_json=args.report_json,
@@ -388,10 +408,14 @@ def main(argv: list[str] | None = None) -> None:
388
408
  sort_order=args.sort_order,
389
409
  # Moved block detection
390
410
  detect_moved=args.detect_moved,
411
+ # Uncompared files
412
+ include_uncompared=args.include_uncompared,
391
413
  # Bates prefix/suffix
392
414
  bates_prefix=args.bates_prefix,
393
415
  bates_suffix=args.bates_suffix,
394
416
  bates_start=args.bates_start,
417
+ # Binary handling
418
+ binary_handling=args.binary_handling,
395
419
  )
396
420
 
397
421
 
@@ -98,6 +98,12 @@ class DiffResult:
98
98
  error: Optional[str] = None
99
99
  """None이 아니면 디코딩/읽기 실패 등의 에러 메시지. 이 경우 위 필드는 0/빈값."""
100
100
 
101
+ binary: bool = False
102
+ """True if file pair was detected as binary (non-decodable)."""
103
+
104
+ hash_match: Optional[bool] = None
105
+ """SHA-256 match status for binary files. None for text files."""
106
+
101
107
 
102
108
  # ──────────────────────────────────────────────────────────────────────
103
109
  # Winnowing 핑거프린트 엔트리
@@ -82,6 +82,7 @@ table.summary th, table.summary td {
82
82
  border: 1px solid #ccc;
83
83
  padding: 5px 8px;
84
84
  text-align: left;
85
+ word-break: break-all;
85
86
  }
86
87
  table.summary th {
87
88
  background: #0078d4;
@@ -178,6 +179,7 @@ table.deep th, table.deep td {
178
179
  border: 1px solid #ccc;
179
180
  padding: 4px 6px;
180
181
  text-align: left;
182
+ word-break: break-all;
181
183
  }
182
184
  table.deep th {
183
185
  background: #6c5ce7;
@@ -212,6 +214,27 @@ table.deep tr:nth-child(even) {
212
214
  # ---------------------------------------------------------------------------
213
215
  # Helpers
214
216
  # ---------------------------------------------------------------------------
217
+ def _break_path(path_str: str) -> str:
218
+ """Insert zero-width spaces after path separator symbols for line-breaking.
219
+
220
+ xhtml2pdf는 긴 파일 경로를 자동 줄바꿈하지 못하므로,
221
+ 경로 구분자(/, \\, ., _) 뒤에 zero-width space를 삽입하여
222
+ 자연스러운 줄바꿈 지점을 제공한다.
223
+
224
+ Args:
225
+ path_str: HTML-escaped 경로 문자열.
226
+
227
+ Returns:
228
+ 줄바꿈 힌트가 삽입된 경로 문자열.
229
+ """
230
+ # HTML entity for zero-width space
231
+ zwsp = "​"
232
+ result = path_str
233
+ for sep in ("/", "\\", ".", "_"):
234
+ result = result.replace(sep, sep + zwsp)
235
+ return result
236
+
237
+
215
238
  def _ratio_badge(ratio: float) -> str:
216
239
  """Return an HTML badge span for a similarity ratio."""
217
240
  pct = ratio * 100
@@ -251,7 +274,7 @@ def build_hash_table_html(
251
274
  parts.append(
252
275
  f'<tr>'
253
276
  f'<td>{idx}</td>'
254
- f'<td>{html.escape(h.rel_path)}</td>'
277
+ f'<td>{_break_path(html.escape(h.rel_path))}</td>'
255
278
  f'<td style="font-family:monospace;font-size:8px">{short_hash}</td>'
256
279
  f'<td>{h.size_bytes:,}</td>'
257
280
  f'</tr>\n'
@@ -368,17 +391,18 @@ def build_cover_body(
368
391
  dir_a: str,
369
392
  dir_b: str,
370
393
  by_word: bool,
371
- compare_comment: bool,
394
+ strip_comments: bool,
372
395
  *,
373
396
  deep_results: Optional[list[DeepMatchResult]] = None,
374
397
  metadata: Optional["AnalysisMetadata"] = None,
375
398
  hash_table_html: Optional[str] = None,
399
+ include_uncompared: bool = True,
376
400
  ) -> str:
377
401
  """Build the cover-page body fragment (no DOCTYPE/html/head wrapper)."""
378
402
  from diffinite.models import AnalysisMetadata as _AM # avoid circular at module level
379
403
 
380
404
  unit = "word" if by_word else "line"
381
- comment_mode = "included" if compare_comment else "excluded"
405
+ comment_mode = "stripped" if strip_comments else "included"
382
406
 
383
407
  # Analysis metadata banner (transparency)
384
408
  meta_html = ""
@@ -396,26 +420,43 @@ def build_cover_body(
396
420
 
397
421
  summary_rows = ""
398
422
  for idx, r in enumerate(results, 1):
399
- badge = _ratio_badge(r.ratio)
400
- err = (
401
- f' <em style="color:red">({html.escape(r.error)})</em>'
402
- if r.error else ""
403
- )
404
- summary_rows += (
405
- f"<tr>"
406
- f"<td>{idx}</td>"
407
- f"<td>{html.escape(r.match.rel_path_a)}</td>"
408
- f"<td>{html.escape(r.match.rel_path_b)}</td>"
409
- f"<td>{r.match.similarity:.1f}</td>"
410
- f"<td>{badge}{err}</td>"
411
- f"<td style='color:green'>+{r.additions}</td>"
412
- f"<td style='color:red'>-{r.deletions}</td>"
413
- f"</tr>\n"
414
- )
423
+ if r.binary:
424
+ if r.hash_match:
425
+ status = '<span class="badge badge-high">✓ Binary Match</span>'
426
+ else:
427
+ status = '<span class="badge badge-low">✗ Binary Mismatch</span>'
428
+ summary_rows += (
429
+ f"<tr>"
430
+ f"<td>{idx}</td>"
431
+ f"<td>{_break_path(html.escape(r.match.rel_path_a))}</td>"
432
+ f"<td>{_break_path(html.escape(r.match.rel_path_b))}</td>"
433
+ f"<td>{r.match.similarity:.1f}</td>"
434
+ f"<td>{status}</td>"
435
+ f"<td>—</td>"
436
+ f"<td>—</td>"
437
+ f"</tr>\n"
438
+ )
439
+ else:
440
+ badge = _ratio_badge(r.ratio)
441
+ err = (
442
+ f' <em style="color:red">({html.escape(r.error)})</em>'
443
+ if r.error else ""
444
+ )
445
+ summary_rows += (
446
+ f"<tr>"
447
+ f"<td>{idx}</td>"
448
+ f"<td>{_break_path(html.escape(r.match.rel_path_a))}</td>"
449
+ f"<td>{_break_path(html.escape(r.match.rel_path_b))}</td>"
450
+ f"<td>{r.match.similarity:.1f}</td>"
451
+ f"<td>{badge}{err}</td>"
452
+ f"<td style='color:green'>+{r.additions}</td>"
453
+ f"<td style='color:red'>-{r.deletions}</td>"
454
+ f"</tr>\n"
455
+ )
415
456
 
416
- # Unmatched lists
457
+ # Unmatched lists (only when include_uncompared is True)
417
458
  unmatched_html = ""
418
- if unmatched_a or unmatched_b:
459
+ if include_uncompared and (unmatched_a or unmatched_b):
419
460
  unmatched_html += "<h2>Unmatched Files</h2>\n"
420
461
  if unmatched_a:
421
462
  unmatched_html += (
@@ -423,7 +464,7 @@ def build_cover_body(
423
464
  "<ul class='unmatched'>\n"
424
465
  )
425
466
  for f in unmatched_a:
426
- unmatched_html += f" <li>{html.escape(f)}</li>\n"
467
+ unmatched_html += f" <li>{_break_path(html.escape(f))}</li>\n"
427
468
  unmatched_html += "</ul>\n"
428
469
  if unmatched_b:
429
470
  unmatched_html += (
@@ -431,7 +472,7 @@ def build_cover_body(
431
472
  "<ul class='unmatched'>\n"
432
473
  )
433
474
  for f in unmatched_b:
434
- unmatched_html += f" <li>{html.escape(f)}</li>\n"
475
+ unmatched_html += f" <li>{_break_path(html.escape(f))}</li>\n"
435
476
  unmatched_html += "</ul>\n"
436
477
 
437
478
  deep_html = ""
@@ -447,8 +488,8 @@ def build_cover_body(
447
488
  jbadge = _ratio_badge(jaccard)
448
489
  deep_html += (
449
490
  f"<tr>"
450
- f"<td>{html.escape(dr.file_a)}</td>"
451
- f"<td>{html.escape(b_file)}</td>"
491
+ f"<td>{_break_path(html.escape(dr.file_a))}</td>"
492
+ f"<td>{_break_path(html.escape(b_file))}</td>"
452
493
  f"<td>{shared}</td>"
453
494
  f"<td>{jbadge}</td>"
454
495
  f"</tr>\n"
@@ -36,19 +36,20 @@ import json
36
36
  import logging
37
37
  import os
38
38
  import tempfile
39
- from pathlib import Path
39
+ from pathlib import Path, PurePosixPath
40
40
 
41
41
  from diffinite.collector import collect_files, match_files, FUZZY_THRESHOLD
42
42
  from diffinite.deep_compare import run_deep_compare
43
43
  from diffinite.differ import compute_diff, generate_html_diff, read_file
44
44
  from diffinite.evidence import (
45
+ _sha256_file,
45
46
  compute_file_hashes,
46
47
  create_evidence_bundle,
47
48
  write_manifest,
48
49
  )
49
50
  from diffinite.fingerprint import DEFAULT_K, DEFAULT_W
50
51
  from diffinite.models import AnalysisMetadata, DiffResult, DeepMatchResult
51
- from diffinite.parser import strip_comments
52
+ from diffinite.parser import strip_comments as _strip_comments_fn
52
53
  from diffinite.pdf_gen import (
53
54
  _html_wrap,
54
55
  add_bates_numbers,
@@ -113,15 +114,16 @@ def _generate_markdown_report(
113
114
  dir_a: str,
114
115
  dir_b: str,
115
116
  by_word: bool,
116
- compare_comment: bool,
117
+ strip_comments: bool,
117
118
  deep_results: list[DeepMatchResult] | None,
118
119
  output_path: str,
119
120
  *,
120
121
  metadata: AnalysisMetadata | None = None,
122
+ include_uncompared: bool = True,
121
123
  ) -> None:
122
124
  """Generate a Markdown summary report."""
123
125
  unit = "word" if by_word else "line"
124
- comment_mode = "included" if compare_comment else "excluded"
126
+ comment_mode = "stripped" if strip_comments else "included"
125
127
 
126
128
  lines: list[str] = []
127
129
  lines.append("# Diffinite — Source Code Diff Report\n")
@@ -141,16 +143,24 @@ def _generate_markdown_report(
141
143
  lines.append("| # | File A | File B | Name Sim. | Match | +Added | −Deleted |")
142
144
  lines.append("|---|--------|--------|:---------:|:-----:|:------:|:--------:|")
143
145
  for idx, r in enumerate(results, 1):
144
- pct = r.ratio * 100
145
- err = f" ⚠ {r.error}" if r.error else ""
146
- lines.append(
147
- f"| {idx} | `{r.match.rel_path_a}` | `{r.match.rel_path_b}` "
148
- f"| {r.match.similarity:.1f} | {pct:.1f}%{err} "
149
- f"| +{r.additions} | −{r.deletions} |"
150
- )
146
+ if r.binary:
147
+ status = " Match" if r.hash_match else "✗ Mismatch"
148
+ lines.append(
149
+ f"| {idx} | `{r.match.rel_path_a}` | `{r.match.rel_path_b}` "
150
+ f"| {r.match.similarity:.1f} | [Binary: {status}] "
151
+ f"| | |"
152
+ )
153
+ else:
154
+ pct = r.ratio * 100
155
+ err = f" ⚠ {r.error}" if r.error else ""
156
+ lines.append(
157
+ f"| {idx} | `{r.match.rel_path_a}` | `{r.match.rel_path_b}` "
158
+ f"| {r.match.similarity:.1f} | {pct:.1f}%{err} "
159
+ f"| +{r.additions} | −{r.deletions} |"
160
+ )
151
161
 
152
162
  # Unmatched
153
- if unmatched_a or unmatched_b:
163
+ if include_uncompared and (unmatched_a or unmatched_b):
154
164
  lines.append("\n## Unmatched Files\n")
155
165
  if unmatched_a:
156
166
  lines.append(f"### Only in A (`{dir_a}`)\n")
@@ -188,11 +198,12 @@ def _generate_json_report(
188
198
  dir_a: str,
189
199
  dir_b: str,
190
200
  by_word: bool,
191
- compare_comment: bool,
201
+ strip_comments: bool,
192
202
  deep_results: list[DeepMatchResult] | None,
193
203
  output_path: str,
194
204
  *,
195
205
  metadata: AnalysisMetadata | None = None,
206
+ include_uncompared: bool = True,
196
207
  ) -> None:
197
208
  """Generate a JSON report for programmatic consumption.
198
209
 
@@ -201,7 +212,7 @@ def _generate_json_report(
201
212
  re-running the pipeline.
202
213
  """
203
214
  unit = "word" if by_word else "line"
204
- comment_mode = "included" if compare_comment else "excluded"
215
+ comment_mode = "stripped" if strip_comments else "included"
205
216
 
206
217
  meta_dict = None
207
218
  if metadata is not None:
@@ -215,7 +226,7 @@ def _generate_json_report(
215
226
 
216
227
  result_list = []
217
228
  for r in results:
218
- result_list.append({
229
+ entry = {
219
230
  "file_a": r.match.rel_path_a,
220
231
  "file_b": r.match.rel_path_b,
221
232
  "name_similarity": r.match.similarity,
@@ -224,7 +235,11 @@ def _generate_json_report(
224
235
  "deletions": r.deletions,
225
236
  "html_diff": r.html_diff,
226
237
  "error": r.error,
227
- })
238
+ "binary": r.binary,
239
+ }
240
+ if r.binary:
241
+ entry["hash_match"] = r.hash_match
242
+ result_list.append(entry)
228
243
 
229
244
  deep_list = None
230
245
  if deep_results is not None:
@@ -251,13 +266,13 @@ def _generate_json_report(
251
266
  "comment_mode": comment_mode,
252
267
  "summary": {
253
268
  "matched_pairs": len(results),
254
- "unmatched_a": len(unmatched_a),
255
- "unmatched_b": len(unmatched_b),
269
+ "unmatched_a_count": len(unmatched_a),
270
+ "unmatched_b_count": len(unmatched_b),
256
271
  },
257
272
  "results": result_list,
258
273
  "deep_results": deep_list,
259
- "unmatched_a": unmatched_a,
260
- "unmatched_b": unmatched_b,
274
+ "unmatched_a": unmatched_a if include_uncompared else [],
275
+ "unmatched_b": unmatched_b if include_uncompared else [],
261
276
  }
262
277
 
263
278
  out = Path(output_path)
@@ -276,21 +291,23 @@ def _generate_html_report(
276
291
  dir_a: str,
277
292
  dir_b: str,
278
293
  by_word: bool,
279
- compare_comment: bool,
294
+ strip_comments: bool,
280
295
  deep_results: list[DeepMatchResult] | None,
281
296
  output_path: str,
282
297
  ln_col_width: int = 28,
283
298
  *,
284
299
  metadata: AnalysisMetadata | None = None,
285
300
  hash_table_html: str | None = None,
301
+ include_uncompared: bool = True,
286
302
  ) -> None:
287
303
  """Generate a standalone HTML report with all diffs inline."""
288
304
  cover_html_body = build_cover_body(
289
305
  results, unmatched_a, unmatched_b,
290
- dir_a, dir_b, by_word, compare_comment,
306
+ dir_a, dir_b, by_word, strip_comments,
291
307
  deep_results=deep_results,
292
308
  metadata=metadata,
293
309
  hash_table_html=hash_table_html,
310
+ include_uncompared=include_uncompared,
294
311
  )
295
312
 
296
313
  # Append all inline diffs
@@ -346,9 +363,8 @@ def run_pipeline(
346
363
  dir_a: str,
347
364
  dir_b: str,
348
365
  by_word: bool = False,
349
- compare_comment: bool = True,
366
+ strip_comments: bool = False,
350
367
  squash_blanks: bool = False,
351
- output_pdf: str = "report.pdf",
352
368
  threshold: float = FUZZY_THRESHOLD,
353
369
  *,
354
370
  no_merge: bool = False,
@@ -384,10 +400,14 @@ def run_pipeline(
384
400
  sort_order: str = "asc",
385
401
  # Moved block detection
386
402
  detect_moved: bool = False,
403
+ # Uncompared files
404
+ include_uncompared: bool = True,
387
405
  # Bates prefix/suffix
388
406
  bates_prefix: str = "",
389
407
  bates_suffix: str = "",
390
408
  bates_start: int = 1,
409
+ # Binary handling
410
+ binary_handling: str = "hash",
391
411
  ) -> None:
392
412
  """Execute the full diff-to-report pipeline.
393
413
 
@@ -410,7 +430,7 @@ def run_pipeline(
410
430
  """
411
431
  # Determine effective output paths
412
432
  if report_pdf is None and report_html is None and report_md is None and report_json is None:
413
- report_pdf = output_pdf
433
+ report_pdf = "report.pdf"
414
434
 
415
435
  # Build default metadata if caller didn't provide one
416
436
  if metadata is None:
@@ -463,15 +483,30 @@ def run_pipeline(
463
483
  text_b = read_file(abs_b, encoding=encoding)
464
484
 
465
485
  if text_a is None or text_b is None:
466
- results.append(DiffResult(
467
- match=m, ratio=0.0, additions=0, deletions=0,
468
- html_diff="", error="Could not decode one or both files",
469
- ))
486
+ if binary_handling == "exclude":
487
+ continue
488
+ elif binary_handling == "hash":
489
+ hash_a = _sha256_file(str(root_a / m.rel_path_a))
490
+ hash_b = _sha256_file(str(root_b / m.rel_path_b))
491
+ hash_match = hash_a == hash_b
492
+ results.append(DiffResult(
493
+ match=m,
494
+ ratio=1.0 if hash_match else 0.0,
495
+ additions=0, deletions=0,
496
+ html_diff="",
497
+ binary=True,
498
+ hash_match=hash_match,
499
+ ))
500
+ else: # "error"
501
+ results.append(DiffResult(
502
+ match=m, ratio=0.0, additions=0, deletions=0,
503
+ html_diff="", error="Could not decode one or both files",
504
+ ))
470
505
  continue
471
506
 
472
- if not compare_comment:
473
- text_a = strip_comments(text_a, ext, squash_blanks=squash_blanks)
474
- text_b = strip_comments(text_b, ext, squash_blanks=squash_blanks)
507
+ if strip_comments:
508
+ text_a = _strip_comments_fn(text_a, ext, squash_blanks=squash_blanks)
509
+ text_b = _strip_comments_fn(text_b, ext, squash_blanks=squash_blanks)
475
510
 
476
511
  all_line_counts.append(text_a.count("\n") + 1)
477
512
  all_line_counts.append(text_b.count("\n") + 1)
@@ -493,11 +528,11 @@ def run_pipeline(
493
528
  ln_col_width, max(all_line_counts) if all_line_counts else 0)
494
529
 
495
530
  # Generate HTML diffs with unified column width
496
- for m_idx, m in enumerate(matches):
497
- r = results[m_idx]
498
- if r.error:
531
+ for r_idx, r in enumerate(results):
532
+ if r.error or r.binary:
499
533
  continue
500
534
 
535
+ m = r.match
501
536
  abs_a = str(root_a / m.rel_path_a)
502
537
  abs_b = str(root_b / m.rel_path_b)
503
538
  ext = Path(m.rel_path_a).suffix.lower()
@@ -506,9 +541,9 @@ def run_pipeline(
506
541
  text_b = read_file(abs_b, encoding=encoding)
507
542
  if text_a is None or text_b is None:
508
543
  continue
509
- if not compare_comment:
510
- text_a = strip_comments(text_a, ext, squash_blanks=squash_blanks)
511
- text_b = strip_comments(text_b, ext, squash_blanks=squash_blanks)
544
+ if strip_comments:
545
+ text_a = _strip_comments_fn(text_a, ext, squash_blanks=squash_blanks)
546
+ text_b = _strip_comments_fn(text_b, ext, squash_blanks=squash_blanks)
512
547
 
513
548
  html_diff = generate_html_diff(
514
549
  text_a, text_b,
@@ -522,7 +557,7 @@ def run_pipeline(
522
557
  by_word=by_word,
523
558
  detect_moved=detect_moved,
524
559
  )
525
- results[m_idx] = DiffResult(
560
+ results[r_idx] = DiffResult(
526
561
  match=r.match,
527
562
  ratio=r.ratio,
528
563
  additions=r.additions,
@@ -536,16 +571,16 @@ def run_pipeline(
536
571
  if sort_by:
537
572
  reverse = sort_order == "desc"
538
573
  if sort_by == "filename":
574
+ results.sort(
575
+ key=lambda r: PurePosixPath(r.match.rel_path_a).name.lower(),
576
+ reverse=reverse,
577
+ )
578
+ elif sort_by == "path":
539
579
  results.sort(key=lambda r: r.match.rel_path_a.lower(), reverse=reverse)
580
+ elif sort_by == "similarity":
581
+ results.sort(key=lambda r: r.match.similarity, reverse=reverse)
540
582
  elif sort_by == "ratio":
541
583
  results.sort(key=lambda r: r.ratio, reverse=reverse)
542
- elif sort_by == "size":
543
- def _file_size(r: DiffResult) -> int:
544
- try:
545
- return os.path.getsize(str(root_a / r.match.rel_path_a))
546
- except OSError:
547
- return 0
548
- results.sort(key=_file_size, reverse=reverse)
549
584
  logger.info(" Sorted by %s (%s)", sort_by, sort_order)
550
585
 
551
586
  # Deep Compare (only in deep mode)
@@ -569,9 +604,10 @@ def run_pipeline(
569
604
  logger.info("Generating JSON report …")
570
605
  _generate_json_report(
571
606
  results, unmatched_a, unmatched_b,
572
- dir_a, dir_b, by_word, compare_comment,
607
+ dir_a, dir_b, by_word, strip_comments,
573
608
  deep_results, report_json,
574
609
  metadata=metadata,
610
+ include_uncompared=include_uncompared,
575
611
  )
576
612
 
577
613
  # Markdown report
@@ -579,9 +615,10 @@ def run_pipeline(
579
615
  logger.info("Generating Markdown report …")
580
616
  _generate_markdown_report(
581
617
  results, unmatched_a, unmatched_b,
582
- dir_a, dir_b, by_word, compare_comment,
618
+ dir_a, dir_b, by_word, strip_comments,
583
619
  deep_results, report_md,
584
620
  metadata=metadata,
621
+ include_uncompared=include_uncompared,
585
622
  )
586
623
 
587
624
  # HTML report
@@ -589,10 +626,11 @@ def run_pipeline(
589
626
  logger.info("Generating HTML report …")
590
627
  _generate_html_report(
591
628
  results, unmatched_a, unmatched_b,
592
- dir_a, dir_b, by_word, compare_comment,
629
+ dir_a, dir_b, by_word, strip_comments,
593
630
  deep_results, report_html, ln_col_width,
594
631
  metadata=metadata,
595
632
  hash_table_html=hash_table_html,
633
+ include_uncompared=include_uncompared,
596
634
  )
597
635
 
598
636
  # PDF report
@@ -600,7 +638,7 @@ def run_pipeline(
600
638
  logger.info("Generating PDF report (divide-and-conquer) …")
601
639
  _generate_pdf_report(
602
640
  results, unmatched_a, unmatched_b,
603
- dir_a, dir_b, by_word, compare_comment,
641
+ dir_a, dir_b, by_word, strip_comments,
604
642
  deep_results, report_pdf,
605
643
  no_merge=no_merge,
606
644
  show_page_number=show_page_number,
@@ -614,6 +652,7 @@ def run_pipeline(
614
652
  bates_prefix=bates_prefix,
615
653
  bates_suffix=bates_suffix,
616
654
  bates_start=bates_start,
655
+ include_uncompared=include_uncompared,
617
656
  )
618
657
 
619
658
  logger.info("Done (reports) ✓")
@@ -658,7 +697,7 @@ def _generate_pdf_report(
658
697
  dir_a: str,
659
698
  dir_b: str,
660
699
  by_word: bool,
661
- compare_comment: bool,
700
+ strip_comments: bool,
662
701
  deep_results: list[DeepMatchResult] | None,
663
702
  output_pdf: str,
664
703
  *,
@@ -674,6 +713,7 @@ def _generate_pdf_report(
674
713
  bates_prefix: str = "",
675
714
  bates_suffix: str = "",
676
715
  bates_start: int = 1,
716
+ include_uncompared: bool = True,
677
717
  ) -> None:
678
718
  """Generate PDF report with divide-and-conquer merging."""
679
719
  if no_merge:
@@ -686,10 +726,11 @@ def _generate_pdf_report(
686
726
  # (1) Cover page
687
727
  cover_body = build_cover_body(
688
728
  results, unmatched_a, unmatched_b,
689
- dir_a, dir_b, by_word, compare_comment,
729
+ dir_a, dir_b, by_word, strip_comments,
690
730
  deep_results=deep_results,
691
731
  metadata=metadata,
692
732
  hash_table_html=hash_table_html,
733
+ include_uncompared=include_uncompared,
693
734
  )
694
735
  cover_html = _html_wrap("Diffinite — Cover", cover_body)
695
736
  if no_merge:
@@ -700,9 +741,43 @@ def _generate_pdf_report(
700
741
  if cover_ok:
701
742
  logger.info(" Cover page → OK")
702
743
 
744
+ # ── Pre-flight: warn about large diffs that may slow PDF ────
745
+ # 500 KB of HTML ≈ 500+ source lines in side-by-side diff table.
746
+ # xhtml2pdf layout becomes noticeably slow above this threshold.
747
+ _LARGE_DIFF_BYTES = 500_000
748
+ large_files = [
749
+ (i, r) for i, r in enumerate(results, 1)
750
+ if not r.error and not r.binary and len(r.html_diff) > _LARGE_DIFF_BYTES
751
+ ]
752
+ if large_files:
753
+ logger.warning(
754
+ "⚠ %d file(s) have large diffs — PDF rendering may be "
755
+ "slow or hang:", len(large_files),
756
+ )
757
+ for i, r in large_files:
758
+ size_kb = len(r.html_diff) / 1024
759
+ logger.warning(
760
+ " %d. %s (%.0f KB HTML)",
761
+ i, r.match.rel_path_a, size_kb,
762
+ )
763
+ logger.warning(
764
+ " Consider: --collapse-identical (shrink diffs), "
765
+ "--no-merge (split PDFs), or --report-html (fast export)."
766
+ )
767
+
703
768
  # (2) Per-file diff pages
704
769
  diff_pdf_pairs: list[tuple[str, DiffResult]] = []
705
770
  for idx, r in enumerate(results, 1):
771
+ # Per-file warning for large diffs
772
+ if (not r.error and not r.binary
773
+ and len(r.html_diff) > _LARGE_DIFF_BYTES):
774
+ size_kb = len(r.html_diff) / 1024
775
+ logger.warning(
776
+ "⚠ Rendering PDF %d/%d (%s, %.0f KB) — "
777
+ "this may take a while…",
778
+ idx, len(results), r.match.rel_path_a, size_kb,
779
+ )
780
+
706
781
  diff_html = build_diff_page_html(
707
782
  r, idx, unit,
708
783
  show_page_number=show_page_number,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffinite
3
- Version: 0.9.6
3
+ Version: 0.10.0
4
4
  Summary: Forensic source-code comparison tool — Winnowing fingerprints and professional PDF reports for IP litigation & code audit
5
5
  Author: nash-dir
6
6
  License: Apache-2.0
@@ -69,7 +69,7 @@ class TestDeepCompareArgs:
69
69
  "--mode", "deep",
70
70
  "--k-gram", "5",
71
71
  "--window", "3",
72
- "--threshold-deep", "0.20",
72
+ "--threshold-deep", "20",
73
73
  ])
74
74
 
75
75
 
@@ -84,7 +84,7 @@ class TestAnnotationsAndReportFlags:
84
84
  "-o", str(tmp_path / "out.pdf"),
85
85
  "--collapse-identical",
86
86
  "--page-number", "--file-number",
87
- "--bates-number", "--show-filename",
87
+ "--bates-number", "--filename",
88
88
  ])
89
89
 
90
90
  def test_threshold_accepts_value(self, tmp_path):
@@ -107,3 +107,41 @@ class TestAnnotationsAndReportFlags:
107
107
  ])
108
108
  from pathlib import Path
109
109
  assert Path(json_path).exists()
110
+
111
+
112
+ class TestIncludeUncomparedFlag:
113
+ """Verify --include-uncompared / --no-include-uncompared flags."""
114
+
115
+ def test_include_uncompared_default_true(self, tmp_path):
116
+ """Default behavior includes uncompared files."""
117
+ d_a = tmp_path / "a"; d_a.mkdir()
118
+ d_b = tmp_path / "b"; d_b.mkdir()
119
+ (d_a / "only_a.py").write_text("x = 1\n", encoding="utf-8")
120
+ json_path = str(tmp_path / "out.json")
121
+ main([
122
+ str(d_a), str(d_b),
123
+ "--report-json", json_path,
124
+ ])
125
+ import json
126
+ from pathlib import Path
127
+ data = json.loads(Path(json_path).read_text(encoding="utf-8"))
128
+ assert "only_a.py" in data["unmatched_a"]
129
+
130
+ def test_no_include_uncompared_excludes(self, tmp_path):
131
+ """--no-include-uncompared excludes unmatched file lists."""
132
+ d_a = tmp_path / "a"; d_a.mkdir()
133
+ d_b = tmp_path / "b"; d_b.mkdir()
134
+ (d_a / "only_a.py").write_text("x = 1\n", encoding="utf-8")
135
+ json_path = str(tmp_path / "out.json")
136
+ main([
137
+ str(d_a), str(d_b),
138
+ "--report-json", json_path,
139
+ "--no-include-uncompared",
140
+ ])
141
+ import json
142
+ from pathlib import Path
143
+ data = json.loads(Path(json_path).read_text(encoding="utf-8"))
144
+ assert data["unmatched_a"] == []
145
+ assert data["unmatched_b"] == []
146
+ # Summary counts should still show the real values
147
+ assert data["summary"]["unmatched_a_count"] == 1
@@ -3,7 +3,7 @@
3
3
  import pytest
4
4
 
5
5
  from diffinite.models import DiffResult, FileMatch, DeepMatchResult
6
- from diffinite.pdf_gen import build_cover_body, build_diff_page_html
6
+ from diffinite.pdf_gen import build_cover_body, build_diff_page_html, _break_path
7
7
 
8
8
 
9
9
  # ---------------------------------------------------------------------------
@@ -37,7 +37,7 @@ def _cover(results=None, *, deep_results=None):
37
37
  dir_a="dir_a",
38
38
  dir_b="dir_b",
39
39
  by_word=False,
40
- compare_comment=True,
40
+ strip_comments=False,
41
41
  deep_results=deep_results,
42
42
  )
43
43
 
@@ -55,8 +55,9 @@ class TestBuildCoverHtml:
55
55
 
56
56
  def test_contains_file_names(self):
57
57
  html = _cover()
58
- assert "handler.java" in html
59
- assert "looper.java" in html
58
+ # _break_path inserts &#8203; after path separators (., /, \, _)
59
+ assert "handler." in html
60
+ assert "looper." in html
60
61
 
61
62
  def test_contains_ratio(self):
62
63
  html = _cover()
@@ -79,10 +80,10 @@ class TestBuildCoverHtml:
79
80
  dir_a="left",
80
81
  dir_b="right",
81
82
  by_word=False,
82
- compare_comment=True,
83
+ strip_comments=False,
83
84
  )
84
- assert "orphan_a.py" in html
85
- assert "orphan_b.py" in html
85
+ # _break_path inserts &#8203; after separators, so check partial strings
86
+ assert "orphan" in html
86
87
 
87
88
  def test_deep_results_without_channels(self):
88
89
  deep = [
@@ -92,8 +93,8 @@ class TestBuildCoverHtml:
92
93
  ),
93
94
  ]
94
95
  html = _cover(deep_results=deep)
95
- assert "foo.py" in html
96
- assert "bar.py" in html
96
+ assert "foo." in html
97
+ assert "bar." in html
97
98
 
98
99
  def test_deep_results_display(self):
99
100
  deep = [
@@ -104,8 +105,8 @@ class TestBuildCoverHtml:
104
105
  ),
105
106
  ]
106
107
  html = _cover(deep_results=deep)
107
- assert "foo.py" in html
108
- assert "bar.py" in html
108
+ assert "foo." in html
109
+ assert "bar." in html
109
110
  assert "50" in html # shared hashes
110
111
 
111
112
 
@@ -155,3 +156,67 @@ class TestBuildDiffPageHtml:
155
156
  show_filename=True,
156
157
  )
157
158
  assert "annotated.py" in html
159
+
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # _break_path tests
163
+ # ---------------------------------------------------------------------------
164
+ class TestBreakPath:
165
+ """Verify _break_path inserts zero-width spaces at path separators."""
166
+
167
+ def test_slash(self):
168
+ result = _break_path("src/main/java")
169
+ assert "src/&#8203;main/&#8203;java" == result
170
+
171
+ def test_backslash(self):
172
+ result = _break_path("src\\main\\java")
173
+ assert "src\\&#8203;main\\&#8203;java" == result
174
+
175
+ def test_dot(self):
176
+ result = _break_path("handler.java")
177
+ assert "handler.&#8203;java" == result
178
+
179
+ def test_underscore(self):
180
+ result = _break_path("my_file_name")
181
+ assert "my_&#8203;file_&#8203;name" == result
182
+
183
+ def test_combined(self):
184
+ result = _break_path("src/com/example/my_handler.java")
185
+ assert "&#8203;" in result
186
+
187
+ def test_empty(self):
188
+ assert _break_path("") == ""
189
+
190
+
191
+ # ---------------------------------------------------------------------------
192
+ # include_uncompared tests
193
+ # ---------------------------------------------------------------------------
194
+ class TestIncludeUncompared:
195
+ """Verify include_uncompared parameter on build_cover_body."""
196
+
197
+ def test_excludes_unmatched_when_false(self):
198
+ html = build_cover_body(
199
+ _make_results(),
200
+ unmatched_a=["orphan_a.py"],
201
+ unmatched_b=["orphan_b.py"],
202
+ dir_a="left",
203
+ dir_b="right",
204
+ by_word=False,
205
+ strip_comments=False,
206
+ include_uncompared=False,
207
+ )
208
+ assert "orphan" not in html
209
+ assert "Unmatched Files" not in html
210
+
211
+ def test_includes_unmatched_by_default(self):
212
+ html = build_cover_body(
213
+ _make_results(),
214
+ unmatched_a=["orphan_a.py"],
215
+ unmatched_b=["orphan_b.py"],
216
+ dir_a="left",
217
+ dir_b="right",
218
+ by_word=False,
219
+ strip_comments=False,
220
+ )
221
+ assert "orphan" in html
222
+ assert "Unmatched Files" in html
@@ -23,9 +23,9 @@ class TestPipelineE2E:
23
23
  run_pipeline(
24
24
  dir_a=EXAMPLE_LEFT,
25
25
  dir_b=EXAMPLE_RIGHT,
26
- output_pdf=output,
26
+ report_pdf=output,
27
27
  by_word=False,
28
- compare_comment=False,
28
+ strip_comments=True,
29
29
  )
30
30
  assert Path(output).exists()
31
31
  assert Path(output).stat().st_size > 0
@@ -37,7 +37,7 @@ class TestPipelineE2E:
37
37
  run_pipeline(
38
38
  dir_a=EXAMPLE_LEFT,
39
39
  dir_b=EXAMPLE_RIGHT,
40
- output_pdf=output,
40
+ report_pdf=output,
41
41
  exec_mode="deep",
42
42
  workers=2,
43
43
  kgram_size=5,
@@ -53,7 +53,7 @@ class TestPipelineE2E:
53
53
  run_pipeline(
54
54
  dir_a=EXAMPLE_LEFT,
55
55
  dir_b=EXAMPLE_RIGHT,
56
- output_pdf=output,
56
+ report_pdf=output,
57
57
  no_merge=True,
58
58
  )
59
59
  files_dir = tmp_path / "individual_files"
File without changes
File without changes
File without changes
File without changes