diffinite 0.4.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {diffinite-0.4.0/src/diffinite.egg-info → diffinite-0.6.0}/PKG-INFO +1 -1
  2. {diffinite-0.4.0 → diffinite-0.6.0}/pyproject.toml +1 -1
  3. diffinite-0.6.0/src/diffinite/__main__.py +11 -0
  4. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/cli.py +1 -1
  5. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/deep_compare.py +3 -1
  6. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/fingerprint.py +2 -2
  7. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/models.py +1 -1
  8. {diffinite-0.4.0 → diffinite-0.6.0/src/diffinite.egg-info}/PKG-INFO +1 -1
  9. diffinite-0.4.0/src/diffinite/__main__.py +0 -5
  10. {diffinite-0.4.0 → diffinite-0.6.0}/LICENSE +0 -0
  11. {diffinite-0.4.0 → diffinite-0.6.0}/NOTICE +0 -0
  12. {diffinite-0.4.0 → diffinite-0.6.0}/README.md +0 -0
  13. {diffinite-0.4.0 → diffinite-0.6.0}/setup.cfg +0 -0
  14. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/__init__.py +0 -0
  15. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/collector.py +0 -0
  16. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/differ.py +0 -0
  17. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/evidence.py +0 -0
  18. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/__init__.py +0 -0
  19. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/_registry.py +0 -0
  20. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/_spec.py +0 -0
  21. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/c_family.py +0 -0
  22. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/csharp.py +0 -0
  23. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/data.py +0 -0
  24. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/go_rust_swift.py +0 -0
  25. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/java.py +0 -0
  26. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/javascript.py +0 -0
  27. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/markup.py +0 -0
  28. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/python.py +0 -0
  29. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/languages/scripting.py +0 -0
  30. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/parser.py +0 -0
  31. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/pdf_gen.py +0 -0
  32. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite/pipeline.py +0 -0
  33. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite.egg-info/SOURCES.txt +0 -0
  34. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite.egg-info/dependency_links.txt +0 -0
  35. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite.egg-info/entry_points.txt +0 -0
  36. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite.egg-info/requires.txt +0 -0
  37. {diffinite-0.4.0 → diffinite-0.6.0}/src/diffinite.egg-info/top_level.txt +0 -0
  38. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_cli.py +0 -0
  39. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_collector.py +0 -0
  40. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_deep_compare.py +0 -0
  41. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_differ.py +0 -0
  42. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_differ_extended.py +0 -0
  43. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_evidence.py +0 -0
  44. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_evidence_hash.py +0 -0
  45. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_fingerprint.py +0 -0
  46. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_languages.py +0 -0
  47. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_normalize.py +0 -0
  48. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_parser.py +0 -0
  49. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_pdf_gen.py +0 -0
  50. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_pipeline.py +0 -0
  51. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_plagiarism_dataset.py +0 -0
  52. {diffinite-0.4.0 → diffinite-0.6.0}/tests/test_sqlite_integration.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffinite
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: Forensic source-code comparison tool — Winnowing fingerprints and professional PDF reports for IP litigation & code audit
5
5
  Author: nash-dir
6
6
  License: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "diffinite"
7
- version = "0.4.0"
7
+ version = "0.6.0"
8
8
  description = "Forensic source-code comparison tool — Winnowing fingerprints and professional PDF reports for IP litigation & code audit"
9
9
  readme = "README.md"
10
10
  license = {text = "Apache-2.0"}
@@ -0,0 +1,11 @@
1
+ """Allow ``python -m diffinite``."""
2
+
3
+ import multiprocessing
4
+
5
+ from diffinite.cli import main
6
+
7
+ # Required for PyInstaller frozen executables.
8
+ # Without this, ProcessPoolExecutor child processes crash on Windows.
9
+ # Ref: https://docs.python.org/3/library/multiprocessing.html#multiprocessing.freeze_support
10
+ multiprocessing.freeze_support()
11
+ main()
@@ -194,7 +194,7 @@ def main(argv: list[str] | None = None) -> None:
194
194
  dest="window",
195
195
  help=(
196
196
  f"Winnowing window size (default: {DEFAULT_W}). "
197
- "Density guarantee: (W+K1) shared tokens always detected."
197
+ "Density guarantee: >=(W+K-1) shared tokens always detected."
198
198
  ),
199
199
  )
200
200
  deep_group.add_argument(
@@ -31,6 +31,7 @@ Winnowing 핑거프린트의 **역 인덱스(Inverted Index)** 를 활용하여,
31
31
  from __future__ import annotations
32
32
 
33
33
  import logging
34
+ import multiprocessing
34
35
  from collections import defaultdict
35
36
  from concurrent.futures import ProcessPoolExecutor
36
37
  from pathlib import Path
@@ -176,7 +177,8 @@ def run_deep_compare(
176
177
  fp_b: dict[str, set[int]] = {}
177
178
 
178
179
  all_items = items_a + items_b
179
- with ProcessPoolExecutor(max_workers=workers) as pool:
180
+ ctx = multiprocessing.get_context("spawn")
181
+ with ProcessPoolExecutor(max_workers=workers, mp_context=ctx) as pool:
180
182
  results = list(pool.map(_extract_one, all_items))
181
183
 
182
184
  for i, (rel, hset, cnt) in enumerate(results):
@@ -4,9 +4,9 @@ Stanford MOSS 스타일 문서 핑거프린팅 파이프라인을 구현한다.
4
4
  전체 흐름: Tokenize → K-gram → Rolling Hash → Winnow → Fingerprint Set
5
5
 
6
6
  핵심 보장:
7
- **밀도 보장 (Density Guarantee)** 두 문서가 (W + K 1) 토큰의
7
+ **밀도 보장 (Density Guarantee)** -- 두 문서가 >= (W + K - 1) 토큰의
8
8
  공통 부분 문자열을 공유하면, 반드시 1개 이상의 공통 핑거프린트가 생성된다.
9
- 현재 설정(K=5, W=4)에서 8 토큰 공유 시 탐지 보장.
9
+ 현재 설정(K=5, W=4)에서 >= 8 토큰 공유 시 탐지 보장.
10
10
 
11
11
  참조:
12
12
  Schleimer, Wilkerson, Aiken. "Winnowing: Local Algorithms for Document
@@ -163,7 +163,7 @@ class AnalysisMetadata:
163
163
  """K-gram 크기. 커질수록 정밀도 ↑ / 재현율 ↓."""
164
164
 
165
165
  w: int
166
- """Winnowing 윈도우 크기. 밀도 보장: (W+K-1) 토큰 공유 시 반드시 탐지."""
166
+ """Winnowing 윈도우 크기. 밀도 보장: >=(W+K-1) 토큰 공유 시 반드시 탐지."""
167
167
 
168
168
  threshold: float
169
169
  """최소 Jaccard 유사도 임계값. 이 미만의 매칭은 결과에서 제외."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffinite
3
- Version: 0.4.0
3
+ Version: 0.6.0
4
4
  Summary: Forensic source-code comparison tool — Winnowing fingerprints and professional PDF reports for IP litigation & code audit
5
5
  Author: nash-dir
6
6
  License: Apache-2.0
@@ -1,5 +0,0 @@
1
- """Allow ``python -m diffinite``."""
2
-
3
- from diffinite.cli import main
4
-
5
- main()
File without changes
File without changes
File without changes
File without changes
File without changes