PyPI - mmar-mapi - Versions diffs - 1.0.19__tar.gz → 1.0.21__tar.gz - Mend

mmar-mapi 1.0.19tar.gz → 1.0.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mmar-mapi might be problematic. Click here for more details.

Files changed (19) hide show

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mmar-mapi
-Version: 1.0.19
+Version: 1.0.21
 Summary: Common pure/IO utilities for multi-modal architectures team
 Keywords:
 Author: Eugene Tagin

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "mmar-mapi"
 # dynamic version is not supported yet on uv_build
-version = "1.0.19"
+version = "1.0.21"
 description = "Common pure/IO utilities for multi-modal architectures team"
 authors = [{name = "Eugene Tagin", email = "tagin@airi.net"}]
 license = "MIT"

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/api.py RENAMED Viewed

@@ -71,13 +71,18 @@ class TextExtractorAPI:
         raise NotImplementedError
-PageRange = Annotated[tuple[int, int], AfterValidator(lambda rng: rng[0] <= rng[1])]
+def _validate_page_range(v: tuple[int, int]) -> tuple[int, int]:
+    if v[0] < 1 or v[1] < v[0]:
+        raise ValueError("Invalid page range: start must be ≥ 1 and end must be ≥ start.")
+    return v
+PageRange = Annotated[tuple[int, int], AfterValidator(_validate_page_range)]
 ForceOCR = StrEnum("ForceOCR", ["ENABLED", "DISABLED", "AUTO"])
 OutputType = StrEnum("OutputType", ["RAW", "PLAIN", "MARKDOWN"])
-class DocExtractionSpec(BaseModel):
-    page_range: PageRange | None = None
+class ExtractionEngineSpec(BaseModel):
     output_type: OutputType = OutputType.MARKDOWN
     force_ocr: ForceOCR = ForceOCR.AUTO
     do_ocr: bool = False
@@ -88,19 +93,28 @@ class DocExtractionSpec(BaseModel):
     generate_page_images: bool = False
     images_scale: float = 2.0
+class DocExtractionSpec(BaseModel):
+    page_range: PageRange | None = None
+    engine: ExtractionEngineSpec = ExtractionEngineSpec()
     def _update(self, **update):
         return self.model_copy(update=update)
+    def _update_engine(self, **engine_update):
+        return self._update(engine=self.engine.model_copy(update=engine_update))
     # fmt: off
-    def with_output_type_raw(self): return self._update(output_type=OutputType.RAW)
-    def with_output_type_plain(self): return self._update(output_type=OutputType.PLAIN)
-    def with_ocr(self): return self._update(do_ocr=True)
-    def with_tables(self): return self._update(do_table_structure=True, do_cell_matching=True)
-    def with_images(self): return self._update(do_image_extraction=True)
-    def with_annotations(self): return self._update(do_annotations=True)
-    def with_force_ocr_enabled(self): return self._update(force_ocr=ForceOCR.ENABLED)
-    def with_force_ocr_disabled(self): return self._update(force_ocr=ForceOCR.DISABLED)
-    def with_page_images(self): return self._update(generate_page_images=True)
+    def with_output_type_raw(self): return self._update_engine(output_type=OutputType.RAW)
+    def with_output_type_plain(self): return self._update_engine(output_type=OutputType.PLAIN)
+    def with_ocr(self): return self._update_engine(do_ocr=True)
+    def with_tables(self): return self._update_engine(do_table_structure=True, do_cell_matching=True)
+    def with_images(self): return self._update_engine(do_image_extraction=True)
+    def with_annotations(self): return self._update_engine(do_annotations=True)
+    def with_force_ocr_enabled(self): return self._update_engine(force_ocr=ForceOCR.ENABLED)
+    def with_force_ocr_disabled(self): return self._update_engine(force_ocr=ForceOCR.DISABLED)
+    def with_page_images(self): return self._update_engine(generate_page_images=True)
     def with_page_range(self, page_range: PageRange): return self._update(page_range=page_range)
     # fmt: on
@@ -130,7 +144,7 @@ class ExtractedPageImage(ExtractedImage):
 class DocExtractionOutput(BaseModel):
-    config: DocExtractionSpec
+    spec: DocExtractionSpec
     text: str = ""
     tables: list[ExtractedTable] = []
     pictures: list[ExtractedPicture] = []
@@ -138,6 +152,6 @@ class DocExtractionOutput(BaseModel):
 class DocumentExtractorAPI:
-    def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId:
+    def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId | None:
         """returns file with DocExtractionOutput"""
         raise NotImplementedError

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/file_storage.py RENAMED Viewed

@@ -96,6 +96,9 @@ class FileStorage:
         res = self.download_text(resource_id).split("\n")
         return res
+    def get_path(self, resource_id: ResourceId | None) -> Path | None:
+        return self._get_path(resource_id)
     def _get_path(self, resource_id: ResourceId | None) -> Path | None:
         if not resource_id:
             return None

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/LICENSE RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/README.md RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/__init__.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/decorators_maybe_lru_cache.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/models/__init__.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/models/base.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/models/chat.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/models/chat_item.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/models/enums.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/models/tracks.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/models/widget.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/type_union.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/utils.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/utils_import.py RENAMED Viewed

File without changes

{mmar_mapi-1.0.19 → mmar_mapi-1.0.21}/src/mmar_mapi/xml_parser.py RENAMED Viewed

File without changes

mmar-mapi 1.0.19__tar.gz → 1.0.21__tar.gz

Potentially problematic release.

mmar-mapi 1.0.19tar.gz → 1.0.21tar.gz