mmar-mapi 1.0.19__py3-none-any.whl → 1.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmar-mapi might be problematic. Click here for more details.

mmar_mapi/api.py CHANGED
@@ -76,8 +76,7 @@ ForceOCR = StrEnum("ForceOCR", ["ENABLED", "DISABLED", "AUTO"])
76
76
  OutputType = StrEnum("OutputType", ["RAW", "PLAIN", "MARKDOWN"])
77
77
 
78
78
 
79
- class DocExtractionSpec(BaseModel):
80
- page_range: PageRange | None = None
79
+ class ExtractionEngineSpec(BaseModel):
81
80
  output_type: OutputType = OutputType.MARKDOWN
82
81
  force_ocr: ForceOCR = ForceOCR.AUTO
83
82
  do_ocr: bool = False
@@ -88,19 +87,28 @@ class DocExtractionSpec(BaseModel):
88
87
  generate_page_images: bool = False
89
88
  images_scale: float = 2.0
90
89
 
90
+
91
+ class DocExtractionSpec(BaseModel):
92
+ page_range: PageRange | None = None
93
+ engine: ExtractionEngineSpec = ExtractionEngineSpec()
94
+
91
95
  def _update(self, **update):
92
96
  return self.model_copy(update=update)
93
97
 
98
+ def _update_engine(self, **engine_update):
99
+ return self._update(engine=self.engine.model_copy(update=engine_update))
100
+
94
101
  # fmt: off
95
- def with_output_type_raw(self): return self._update(output_type=OutputType.RAW)
96
- def with_output_type_plain(self): return self._update(output_type=OutputType.PLAIN)
97
- def with_ocr(self): return self._update(do_ocr=True)
98
- def with_tables(self): return self._update(do_table_structure=True, do_cell_matching=True)
99
- def with_images(self): return self._update(do_image_extraction=True)
100
- def with_annotations(self): return self._update(do_annotations=True)
101
- def with_force_ocr_enabled(self): return self._update(force_ocr=ForceOCR.ENABLED)
102
- def with_force_ocr_disabled(self): return self._update(force_ocr=ForceOCR.DISABLED)
103
- def with_page_images(self): return self._update(generate_page_images=True)
102
+ def with_output_type_raw(self): return self._update_engine(output_type=OutputType.RAW)
103
+ def with_output_type_plain(self): return self._update_engine(output_type=OutputType.PLAIN)
104
+ def with_ocr(self): return self._update_engine(do_ocr=True)
105
+ def with_tables(self): return self._update_engine(do_table_structure=True, do_cell_matching=True)
106
+ def with_images(self): return self._update_engine(do_image_extraction=True)
107
+ def with_annotations(self): return self._update_engine(do_annotations=True)
108
+ def with_force_ocr_enabled(self): return self._update_engine(force_ocr=ForceOCR.ENABLED)
109
+ def with_force_ocr_disabled(self): return self._update_engine(force_ocr=ForceOCR.DISABLED)
110
+ def with_page_images(self): return self._update_engine(generate_page_images=True)
111
+
104
112
  def with_page_range(self, page_range: PageRange): return self._update(page_range=page_range)
105
113
  # fmt: on
106
114
 
@@ -130,7 +138,7 @@ class ExtractedPageImage(ExtractedImage):
130
138
 
131
139
 
132
140
  class DocExtractionOutput(BaseModel):
133
- config: DocExtractionSpec
141
+ spec: DocExtractionSpec
134
142
  text: str = ""
135
143
  tables: list[ExtractedTable] = []
136
144
  pictures: list[ExtractedPicture] = []
@@ -138,6 +146,6 @@ class DocExtractionOutput(BaseModel):
138
146
 
139
147
 
140
148
  class DocumentExtractorAPI:
141
- def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId:
149
+ def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId | None:
142
150
  """returns file with DocExtractionOutput"""
143
151
  raise NotImplementedError
mmar_mapi/file_storage.py CHANGED
@@ -96,6 +96,9 @@ class FileStorage:
96
96
  res = self.download_text(resource_id).split("\n")
97
97
  return res
98
98
 
99
+ def get_path(self, resource_id: ResourceId | None) -> Path | None:
100
+ return self._get_path(resource_id)
101
+
99
102
  def _get_path(self, resource_id: ResourceId | None) -> Path | None:
100
103
  if not resource_id:
101
104
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmar-mapi
3
- Version: 1.0.19
3
+ Version: 1.0.20
4
4
  Summary: Common pure/IO utilities for multi-modal architectures team
5
5
  Keywords:
6
6
  Author: Eugene Tagin
@@ -1,7 +1,7 @@
1
1
  mmar_mapi/__init__.py,sha256=9Q5xsrj26uUnn7ZWvvJUvdVIuzC2oCIeNB4dEoqjF-o,1256
2
- mmar_mapi/api.py,sha256=R9v-1QQWocj5OjNk70T4XnEUTBYGujlwBFurbodiBZA,4373
2
+ mmar_mapi/api.py,sha256=LCWO4HN8mAmgpZI5KJ5MSZwI55Y7hWuplOo1e3EGC_I,4670
3
3
  mmar_mapi/decorators_maybe_lru_cache.py,sha256=eO2I6t1fHLUNRABClK1c8EZzHAmCeSK6O-hbJGb2c9E,444
4
- mmar_mapi/file_storage.py,sha256=kxh2DcKY1M9MMb-U03doDYmowHH9VoGYetqBubIJhLI,4937
4
+ mmar_mapi/file_storage.py,sha256=xJU59HmXFsfc53XALdx53IwyqV_k4218AzzXq1Q65Js,5052
5
5
  mmar_mapi/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  mmar_mapi/models/base.py,sha256=mKtXV2x51XVj7W-et9tjGcPMDUUUMelW-BywMgFc2p0,411
7
7
  mmar_mapi/models/chat.py,sha256=-XilkiderIOFG1oSKRDG9NDOEN21sBpbTPHUrqVPjc4,15400
@@ -13,7 +13,7 @@ mmar_mapi/type_union.py,sha256=diwmzcnbqkpGFckPHNw9o8zyQ955mOGNvhTlcBJ0RMI,1905
13
13
  mmar_mapi/utils.py,sha256=FlW9n-84xz2zSHsahHzJ3Y4Wu5mjpFer6t9z6PF6lS0,488
14
14
  mmar_mapi/utils_import.py,sha256=pUyMFd8SItTxBKI-GO9JhRmy43jG_OQlUPr8QCBOSwg,1682
15
15
  mmar_mapi/xml_parser.py,sha256=VvLIX_XCZao9i0qqpTVx8nx0vbFXSe8pEbdJdXnj97g,568
16
- mmar_mapi-1.0.19.dist-info/licenses/LICENSE,sha256=2A90w8WjhOgQXnFuUijKJYazaqZ4_NTokYb9Po4y-9k,1061
17
- mmar_mapi-1.0.19.dist-info/WHEEL,sha256=-neZj6nU9KAMg2CnCY6T3w8J53nx1kFGw_9HfoSzM60,79
18
- mmar_mapi-1.0.19.dist-info/METADATA,sha256=iFhcn0K4RZtc0xJSmsQM1fBbrbG1XvmMTeuC0XSdAHc,944
19
- mmar_mapi-1.0.19.dist-info/RECORD,,
16
+ mmar_mapi-1.0.20.dist-info/licenses/LICENSE,sha256=2A90w8WjhOgQXnFuUijKJYazaqZ4_NTokYb9Po4y-9k,1061
17
+ mmar_mapi-1.0.20.dist-info/WHEEL,sha256=-neZj6nU9KAMg2CnCY6T3w8J53nx1kFGw_9HfoSzM60,79
18
+ mmar_mapi-1.0.20.dist-info/METADATA,sha256=IoMO8Hdd07-s3F4aC3OuA7dgYgZddbXiK7ES5Jwnyck,944
19
+ mmar_mapi-1.0.20.dist-info/RECORD,,