open-receipt-extractor 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. open_receipt_extractor-0.1.0/.gitignore +87 -0
  2. open_receipt_extractor-0.1.0/ARCHITECTURE.md +605 -0
  3. open_receipt_extractor-0.1.0/CHANGELOG.md +39 -0
  4. open_receipt_extractor-0.1.0/CONTRIBUTING.md +481 -0
  5. open_receipt_extractor-0.1.0/LICENSE +21 -0
  6. open_receipt_extractor-0.1.0/PKG-INFO +321 -0
  7. open_receipt_extractor-0.1.0/README.md +264 -0
  8. open_receipt_extractor-0.1.0/docs/project-deliverables.md +1431 -0
  9. open_receipt_extractor-0.1.0/docs/receipt-processing-design.md +493 -0
  10. open_receipt_extractor-0.1.0/examples/README.md +55 -0
  11. open_receipt_extractor-0.1.0/pyproject.toml +202 -0
  12. open_receipt_extractor-0.1.0/receipt_processor/__init__.py +62 -0
  13. open_receipt_extractor-0.1.0/receipt_processor/config.py +472 -0
  14. open_receipt_extractor-0.1.0/receipt_processor/core/__init__.py +80 -0
  15. open_receipt_extractor-0.1.0/receipt_processor/core/exceptions.py +115 -0
  16. open_receipt_extractor-0.1.0/receipt_processor/core/models.py +480 -0
  17. open_receipt_extractor-0.1.0/receipt_processor/core/types.py +336 -0
  18. open_receipt_extractor-0.1.0/receipt_processor/layout/__init__.py +12 -0
  19. open_receipt_extractor-0.1.0/receipt_processor/layout/reconstruct.py +219 -0
  20. open_receipt_extractor-0.1.0/receipt_processor/normalize/__init__.py +12 -0
  21. open_receipt_extractor-0.1.0/receipt_processor/normalize/document_loader.py +244 -0
  22. open_receipt_extractor-0.1.0/receipt_processor/normalize/pdf_render.py +167 -0
  23. open_receipt_extractor-0.1.0/receipt_processor/ocr/__init__.py +13 -0
  24. open_receipt_extractor-0.1.0/receipt_processor/ocr/adapters/__init__.py +42 -0
  25. open_receipt_extractor-0.1.0/receipt_processor/ocr/adapters/easyocr_adapter.py +237 -0
  26. open_receipt_extractor-0.1.0/receipt_processor/ocr/adapters/mock_adapter.py +285 -0
  27. open_receipt_extractor-0.1.0/receipt_processor/ocr/adapters/paddleocr_adapter.py +253 -0
  28. open_receipt_extractor-0.1.0/receipt_processor/ocr/base.py +215 -0
  29. open_receipt_extractor-0.1.0/receipt_processor/output/__init__.py +13 -0
  30. open_receipt_extractor-0.1.0/receipt_processor/output/artifacts.py +252 -0
  31. open_receipt_extractor-0.1.0/receipt_processor/output/json_serializer.py +86 -0
  32. open_receipt_extractor-0.1.0/receipt_processor/output/tabular.py +128 -0
  33. open_receipt_extractor-0.1.0/receipt_processor/parse/__init__.py +12 -0
  34. open_receipt_extractor-0.1.0/receipt_processor/parse/base.py +595 -0
  35. open_receipt_extractor-0.1.0/receipt_processor/parse/extractors/__init__.py +20 -0
  36. open_receipt_extractor-0.1.0/receipt_processor/parse/extractors/date_extractor.py +427 -0
  37. open_receipt_extractor-0.1.0/receipt_processor/parse/extractors/header_extractor.py +320 -0
  38. open_receipt_extractor-0.1.0/receipt_processor/parse/extractors/items_extractor.py +296 -0
  39. open_receipt_extractor-0.1.0/receipt_processor/parse/extractors/payment_extractor.py +159 -0
  40. open_receipt_extractor-0.1.0/receipt_processor/parse/extractors/tax_extractor.py +229 -0
  41. open_receipt_extractor-0.1.0/receipt_processor/parse/extractors/totals_extractor.py +208 -0
  42. open_receipt_extractor-0.1.0/receipt_processor/parse/rules/__init__.py +12 -0
  43. open_receipt_extractor-0.1.0/receipt_processor/parse/rules/keywords.py +98 -0
  44. open_receipt_extractor-0.1.0/receipt_processor/parse/rules/keywords_en.py +157 -0
  45. open_receipt_extractor-0.1.0/receipt_processor/parse/rules/keywords_fr.py +164 -0
  46. open_receipt_extractor-0.1.0/receipt_processor/parse/rules/numbers.py +238 -0
  47. open_receipt_extractor-0.1.0/receipt_processor/pipeline/__init__.py +18 -0
  48. open_receipt_extractor-0.1.0/receipt_processor/pipeline/runner.py +507 -0
  49. open_receipt_extractor-0.1.0/receipt_processor/preprocess/__init__.py +14 -0
  50. open_receipt_extractor-0.1.0/receipt_processor/preprocess/ops.py +422 -0
  51. open_receipt_extractor-0.1.0/receipt_processor/preprocess/pipeline.py +132 -0
  52. open_receipt_extractor-0.1.0/receipt_processor/validate/__init__.py +12 -0
  53. open_receipt_extractor-0.1.0/receipt_processor/validate/scoring.py +220 -0
  54. open_receipt_extractor-0.1.0/tests/__init__.py +0 -0
  55. open_receipt_extractor-0.1.0/tests/conftest.py +169 -0
  56. open_receipt_extractor-0.1.0/tests/golden/README.md +186 -0
  57. open_receipt_extractor-0.1.0/tests/golden/__init__.py +0 -0
  58. open_receipt_extractor-0.1.0/tests/golden/manifest.json +5 -0
  59. open_receipt_extractor-0.1.0/tests/golden/receipts/.gitkeep +0 -0
  60. open_receipt_extractor-0.1.0/tests/integration/__init__.py +0 -0
  61. open_receipt_extractor-0.1.0/tests/integration/metrics.py +302 -0
  62. open_receipt_extractor-0.1.0/tests/integration/test_pipeline.py +187 -0
  63. open_receipt_extractor-0.1.0/tests/unit/__init__.py +0 -0
  64. open_receipt_extractor-0.1.0/tests/unit/test_artifacts.py +563 -0
  65. open_receipt_extractor-0.1.0/tests/unit/test_config.py +324 -0
  66. open_receipt_extractor-0.1.0/tests/unit/test_date_extractor.py +554 -0
  67. open_receipt_extractor-0.1.0/tests/unit/test_document_loader.py +423 -0
  68. open_receipt_extractor-0.1.0/tests/unit/test_exceptions.py +243 -0
  69. open_receipt_extractor-0.1.0/tests/unit/test_header_extractor.py +439 -0
  70. open_receipt_extractor-0.1.0/tests/unit/test_items_extractor.py +751 -0
  71. open_receipt_extractor-0.1.0/tests/unit/test_json_serializer.py +489 -0
  72. open_receipt_extractor-0.1.0/tests/unit/test_keywords.py +230 -0
  73. open_receipt_extractor-0.1.0/tests/unit/test_models.py +559 -0
  74. open_receipt_extractor-0.1.0/tests/unit/test_numbers.py +466 -0
  75. open_receipt_extractor-0.1.0/tests/unit/test_ocr_adapters.py +1529 -0
  76. open_receipt_extractor-0.1.0/tests/unit/test_payment_extractor.py +492 -0
  77. open_receipt_extractor-0.1.0/tests/unit/test_pdf_render.py +326 -0
  78. open_receipt_extractor-0.1.0/tests/unit/test_pipeline_runner.py +540 -0
  79. open_receipt_extractor-0.1.0/tests/unit/test_preprocess_ops.py +760 -0
  80. open_receipt_extractor-0.1.0/tests/unit/test_preprocess_pipeline.py +394 -0
  81. open_receipt_extractor-0.1.0/tests/unit/test_reconstruct.py +652 -0
  82. open_receipt_extractor-0.1.0/tests/unit/test_scoring.py +791 -0
  83. open_receipt_extractor-0.1.0/tests/unit/test_tabular.py +482 -0
  84. open_receipt_extractor-0.1.0/tests/unit/test_tax_extractor.py +913 -0
  85. open_receipt_extractor-0.1.0/tests/unit/test_totals_extractor.py +805 -0
  86. open_receipt_extractor-0.1.0/tests/unit/test_types.py +282 -0
@@ -0,0 +1,87 @@
1
+ # ---------------------------------------------------------------------------
2
+ # Python byte-code & cache
3
+ # ---------------------------------------------------------------------------
4
+ __pycache__/
5
+ *.py[cod]
6
+ *.pyo
7
+ *.pyd
8
+ *.so
9
+
10
+ # ---------------------------------------------------------------------------
11
+ # Virtual environments
12
+ # ---------------------------------------------------------------------------
13
+ .venv/
14
+ venv/
15
+ env/
16
+ ENV/
17
+ env.bak/
18
+ venv.bak/
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Build / distribution artefacts
22
+ # ---------------------------------------------------------------------------
23
+ *.egg-info/
24
+ *.egg
25
+ dist/
26
+ build/
27
+ wheels/
28
+ MANIFEST
29
+
30
+ # ---------------------------------------------------------------------------
31
+ # Static analysis caches
32
+ # ---------------------------------------------------------------------------
33
+ .mypy_cache/
34
+ .ruff_cache/
35
+ .dmypy.json
36
+ dmypy.json
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Test & coverage
40
+ # ---------------------------------------------------------------------------
41
+ .pytest_cache/
42
+ *.coverage
43
+ .coverage
44
+ .coverage.*
45
+ htmlcov/
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Runtime artefacts produced by the pipeline
49
+ # Local artifact storage directory written by OUT-004 (ArtifactManager).
50
+ # Never commit processed receipt data to source control.
51
+ # ---------------------------------------------------------------------------
52
+ artifacts/
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Secrets and local configuration overrides
56
+ # ---------------------------------------------------------------------------
57
+ .env
58
+ .env.*
59
+ !.env.example
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # IDE / editor files
63
+ # ---------------------------------------------------------------------------
64
+ .idea/
65
+ .vscode/
66
+ *.swp
67
+ *.swo
68
+ *~
69
+ .DS_Store
70
+ Thumbs.db
71
+
72
+ # ---------------------------------------------------------------------------
73
+ # Jupyter notebooks (if used for analysis/exploration)
74
+ # ---------------------------------------------------------------------------
75
+ .ipynb_checkpoints/
76
+ *.ipynb
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Generated test metrics (tracked separately per run; not committed)
80
+ # ---------------------------------------------------------------------------
81
+ tests/integration/metrics_history.jsonl
82
+
83
+ # ---------------------------------------------------------------------------
84
+ # Documentation build output
85
+ # ---------------------------------------------------------------------------
86
+ docs/_build/
87
+ site/