docreader-ocr 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/PKG-INFO +1 -1
  2. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/pyproject.toml +1 -1
  3. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/hub.py +3 -3
  4. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/tests/test_pipeline.py +9 -5
  5. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/.github/workflows/publish.yaml +0 -0
  6. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/.gitignore +0 -0
  7. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/LICENSE +0 -0
  8. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/README.md +0 -0
  9. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/__init__.py +0 -0
  10. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/classifier/__init__.py +0 -0
  11. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/classifier/base.py +0 -0
  12. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/classifier/yolo_classifier.py +0 -0
  13. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/config.py +0 -0
  14. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/detector/__init__.py +0 -0
  15. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/detector/base.py +0 -0
  16. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/detector/yolo_obb.py +0 -0
  17. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/factory.py +0 -0
  18. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/ocr/__init__.py +0 -0
  19. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/ocr/base.py +0 -0
  20. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/ocr/easyocr_engine.py +0 -0
  21. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/pipeline.py +0 -0
  22. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/preprocessing/__init__.py +0 -0
  23. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/preprocessing/geometry.py +0 -0
  24. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/schemas.py +0 -0
  25. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/src/docreader/utils.py +0 -0
  26. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/tests/test_hub.py +0 -0
  27. {docreader_ocr-0.2.0 → docreader_ocr-0.2.2}/tests/test_run.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docreader-ocr
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Document OCR pipeline: classify → detect fields → recognize text
5
5
  Project-URL: Homepage, https://github.com/mishanyacorleone/docreader
6
6
  Project-URL: Repository, https://github.com/mishanyacorleone/docreader
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "docreader-ocr"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "Document OCR pipeline: classify → detect fields → recognize text"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -17,15 +17,15 @@ from tqdm import tqdm
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
- _BASE_URL_CLASSIFIER = "https://github.com/mishanyacorleone/docreader/releases/download/v0.2.0"
20
+ _BASE_URL_CLASSIFIER = "https://github.com/mishanyacorleone/docreader/releases/download/v0.2.1"
21
21
  _BASE_URL = "https://github.com/mishanyacorleone/docreader/releases/download/v0.1.0"
22
22
 
23
23
  MODEL_REGISTRY: dict[str, dict] = {
24
24
  # === Классификатор документов (YOLO OBB) ===
25
25
  "doc_classifier.pt": {
26
26
  "url": f"{_BASE_URL_CLASSIFIER}/doc_classifier.pt",
27
- "sha256": "b1af689fe58849474a6a5cf879458fcba6d017233ca1bd54b5d83098cd9387f5",
28
- "size_mb": 5.49,
27
+ "sha256": "d912884d8517cf776e989dc4fced855f34c2ee1d8b17732b778d7e84b7de84fc",
28
+ "size_mb": 6.03,
29
29
  },
30
30
 
31
31
  # === Детекторы зон ===
@@ -21,9 +21,13 @@
21
21
  # print(f"📄 Тип документа: {result.doc_type}")
22
22
  # print(f"🔍 Поля: {result.fields}")
23
23
 
24
- from docreader import DocReader
24
+ from docreader import create_detector, create_classifier, create_ocr
25
25
 
26
- reader = DocReader()
27
- result = reader.process("/mnt/mishutqa/PycharmProjects/sirius/docreader/tests/2-4-_jpg.rf.0f2c4d41aca84c3aa35969658498e905.jpg")
28
- for doc in result.documents:
29
- print(doc.zones)
26
+ # clf = create_classifier()
27
+ # res = clf.classify("/mnt/mishutqa/PycharmProjects/sirius/docreader/tests/1cb43a27a0baa9ba3bd003bdca1c3cd2_png.rf.556fec7fcd33c49b265283a3bdb79552.jpg")
28
+ # for doc in res:
29
+ # print(doc.doc_type)
30
+
31
+ det = create_detector()
32
+ res = det.detect("/mnt/mishutqa/PycharmProjects/sirius/docreader/tests/1cb43a27a0baa9ba3bd003bdca1c3cd2_png.rf.556fec7fcd33c49b265283a3bdb79552.jpg", doc_type="passport")
33
+ print(res)
File without changes
File without changes
File without changes