mmar-mapi 1.0.18__tar.gz → 1.0.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmar-mapi might be problematic. Click here for more details.
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/PKG-INFO +1 -1
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/pyproject.toml +1 -1
- mmar_mapi-1.0.20/src/mmar_mapi/api.py +151 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/file_storage.py +11 -2
- mmar_mapi-1.0.18/src/mmar_mapi/api.py +0 -62
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/LICENSE +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/README.md +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/__init__.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/decorators_maybe_lru_cache.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/models/__init__.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/models/base.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/models/chat.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/models/chat_item.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/models/enums.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/models/tracks.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/models/widget.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/type_union.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/utils.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/utils_import.py +0 -0
- {mmar_mapi-1.0.18 → mmar_mapi-1.0.20}/src/mmar_mapi/xml_parser.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "mmar-mapi"
|
|
3
3
|
# dynamic version is not supported yet on uv_build
|
|
4
|
-
version = "1.0.
|
|
4
|
+
version = "1.0.20"
|
|
5
5
|
description = "Common pure/IO utilities for multi-modal architectures team"
|
|
6
6
|
authors = [{name = "Eugene Tagin", email = "tagin@airi.net"}]
|
|
7
7
|
license = "MIT"
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
2
|
+
from typing import Annotated
|
|
3
|
+
|
|
4
|
+
from pydantic import AfterValidator, BaseModel
|
|
5
|
+
|
|
6
|
+
from mmar_mapi.file_storage import ResourceId
|
|
7
|
+
from mmar_mapi.models.chat import Chat, ChatMessage
|
|
8
|
+
from mmar_mapi.models.tracks import DomainInfo, TrackInfo
|
|
9
|
+
|
|
10
|
+
Value = str
|
|
11
|
+
Interpretation = str
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ChatManagerAPI:
|
|
15
|
+
def get_domains(self, *, client_id: str, language_code: str = "ru") -> list[DomainInfo]:
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
|
|
18
|
+
def get_tracks(self, *, client_id: str, language_code: str = "ru") -> list[TrackInfo]:
|
|
19
|
+
raise NotImplementedError
|
|
20
|
+
|
|
21
|
+
def get_response(self, *, chat: Chat) -> list[ChatMessage]:
|
|
22
|
+
raise NotImplementedError
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TextGeneratorAPI:
|
|
26
|
+
def process(self, *, chat: Chat) -> str:
|
|
27
|
+
raise NotImplementedError
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ContentInterpreterRemoteResponse(BaseModel):
|
|
31
|
+
interpretation: str
|
|
32
|
+
resource_fname: str
|
|
33
|
+
resource: bytes
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ContentInterpreterRemoteAPI:
|
|
37
|
+
def interpret_remote(
|
|
38
|
+
self, *, kind: str, query: str, resource: bytes, chat: Chat | None = None
|
|
39
|
+
) -> ContentInterpreterRemoteResponse:
|
|
40
|
+
raise NotImplementedError
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ClassifierAPI:
|
|
44
|
+
def get_values(self) -> list[Value]:
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
|
|
47
|
+
def evaluate(self, *, chat: Chat) -> Value:
|
|
48
|
+
raise NotImplementedError
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class CriticAPI:
|
|
52
|
+
def evaluate(self, *, text: str, chat: Chat | None = None) -> float:
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ContentInterpreterAPI:
|
|
57
|
+
def interpret(
|
|
58
|
+
self, *, kind: str, query: str, resource_id: str = "", chat: Chat | None = None
|
|
59
|
+
) -> tuple[Interpretation, ResourceId | None]:
|
|
60
|
+
raise NotImplementedError
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class TextProcessorAPI:
|
|
64
|
+
def process(self, *, text: str, chat: Chat | None = None) -> str:
|
|
65
|
+
raise NotImplementedError
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class TextExtractorAPI:
|
|
69
|
+
def extract(self, *, resource_id: ResourceId) -> ResourceId:
|
|
70
|
+
"""returns file with text"""
|
|
71
|
+
raise NotImplementedError
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
PageRange = Annotated[tuple[int, int], AfterValidator(lambda rng: rng[0] <= rng[1])]
|
|
75
|
+
ForceOCR = StrEnum("ForceOCR", ["ENABLED", "DISABLED", "AUTO"])
|
|
76
|
+
OutputType = StrEnum("OutputType", ["RAW", "PLAIN", "MARKDOWN"])
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class ExtractionEngineSpec(BaseModel):
|
|
80
|
+
output_type: OutputType = OutputType.MARKDOWN
|
|
81
|
+
force_ocr: ForceOCR = ForceOCR.AUTO
|
|
82
|
+
do_ocr: bool = False
|
|
83
|
+
do_table_structure: bool = False
|
|
84
|
+
do_cell_matching: bool = False
|
|
85
|
+
do_annotations: bool = False
|
|
86
|
+
do_image_extraction: bool = False
|
|
87
|
+
generate_page_images: bool = False
|
|
88
|
+
images_scale: float = 2.0
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class DocExtractionSpec(BaseModel):
|
|
92
|
+
page_range: PageRange | None = None
|
|
93
|
+
engine: ExtractionEngineSpec = ExtractionEngineSpec()
|
|
94
|
+
|
|
95
|
+
def _update(self, **update):
|
|
96
|
+
return self.model_copy(update=update)
|
|
97
|
+
|
|
98
|
+
def _update_engine(self, **engine_update):
|
|
99
|
+
return self._update(engine=self.engine.model_copy(update=engine_update))
|
|
100
|
+
|
|
101
|
+
# fmt: off
|
|
102
|
+
def with_output_type_raw(self): return self._update_engine(output_type=OutputType.RAW)
|
|
103
|
+
def with_output_type_plain(self): return self._update_engine(output_type=OutputType.PLAIN)
|
|
104
|
+
def with_ocr(self): return self._update_engine(do_ocr=True)
|
|
105
|
+
def with_tables(self): return self._update_engine(do_table_structure=True, do_cell_matching=True)
|
|
106
|
+
def with_images(self): return self._update_engine(do_image_extraction=True)
|
|
107
|
+
def with_annotations(self): return self._update_engine(do_annotations=True)
|
|
108
|
+
def with_force_ocr_enabled(self): return self._update_engine(force_ocr=ForceOCR.ENABLED)
|
|
109
|
+
def with_force_ocr_disabled(self): return self._update_engine(force_ocr=ForceOCR.DISABLED)
|
|
110
|
+
def with_page_images(self): return self._update_engine(generate_page_images=True)
|
|
111
|
+
|
|
112
|
+
def with_page_range(self, page_range: PageRange): return self._update(page_range=page_range)
|
|
113
|
+
# fmt: on
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class ExtractedImage(BaseModel):
|
|
117
|
+
page: int
|
|
118
|
+
image_resource_id: ResourceId | None = None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class ExtractedImageMetadata(BaseModel):
|
|
122
|
+
annotation: str = ""
|
|
123
|
+
caption: str = ""
|
|
124
|
+
width: int | None = None
|
|
125
|
+
height: int | None = None
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class ExtractedPicture(ExtractedImage, ExtractedImageMetadata):
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class ExtractedTable(ExtractedImage, ExtractedImageMetadata):
|
|
133
|
+
formatted_str: str
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class ExtractedPageImage(ExtractedImage):
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class DocExtractionOutput(BaseModel):
|
|
141
|
+
spec: DocExtractionSpec
|
|
142
|
+
text: str = ""
|
|
143
|
+
tables: list[ExtractedTable] = []
|
|
144
|
+
pictures: list[ExtractedPicture] = []
|
|
145
|
+
page_images: list[ExtractedPageImage] = []
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class DocumentExtractorAPI:
|
|
149
|
+
def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId | None:
|
|
150
|
+
"""returns file with DocExtractionOutput"""
|
|
151
|
+
raise NotImplementedError
|
|
@@ -45,7 +45,7 @@ class FileStorage:
|
|
|
45
45
|
resource_id = self.upload(content, fname)
|
|
46
46
|
return resource_id
|
|
47
47
|
|
|
48
|
-
def upload(self, content: bytes | str, fname: str) -> ResourceId:
|
|
48
|
+
def upload(self, content: bytes | str, fname: str, origin: str | None = None) -> ResourceId:
|
|
49
49
|
if isinstance(content, str):
|
|
50
50
|
content = content.encode()
|
|
51
51
|
|
|
@@ -56,7 +56,7 @@ class FileStorage:
|
|
|
56
56
|
|
|
57
57
|
fpath_md = fpath.with_suffix(SUFFIX_METADATA)
|
|
58
58
|
update_date = f"{datetime.now():%Y-%m-%d--%H-%M-%S}"
|
|
59
|
-
metadata = {"fname": fname, "update_date": update_date, "size": len(content)}
|
|
59
|
+
metadata = {"fname": fname, "update_date": update_date, "size": len(content), "origin": origin}
|
|
60
60
|
fpath_md.write_text(json.dumps(metadata, ensure_ascii=False))
|
|
61
61
|
|
|
62
62
|
return str(fpath)
|
|
@@ -67,6 +67,12 @@ class FileStorage:
|
|
|
67
67
|
return None
|
|
68
68
|
return json.loads(metadata_path.read_text())
|
|
69
69
|
|
|
70
|
+
def get_fname(self, resource_id: ResourceId) -> str | None:
|
|
71
|
+
metadata = self.get_metadata(resource_id)
|
|
72
|
+
if metadata is None:
|
|
73
|
+
return None
|
|
74
|
+
return metadata.get("fname")
|
|
75
|
+
|
|
70
76
|
async def upload_async(self, content: bytes | str, fname: str) -> ResourceId:
|
|
71
77
|
return self.upload(content, fname)
|
|
72
78
|
|
|
@@ -90,6 +96,9 @@ class FileStorage:
|
|
|
90
96
|
res = self.download_text(resource_id).split("\n")
|
|
91
97
|
return res
|
|
92
98
|
|
|
99
|
+
def get_path(self, resource_id: ResourceId | None) -> Path | None:
|
|
100
|
+
return self._get_path(resource_id)
|
|
101
|
+
|
|
93
102
|
def _get_path(self, resource_id: ResourceId | None) -> Path | None:
|
|
94
103
|
if not resource_id:
|
|
95
104
|
return None
|
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
from mmar_mapi.models.chat import Chat, ChatMessage
|
|
2
|
-
from mmar_mapi.models.tracks import DomainInfo, TrackInfo
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
Value = str
|
|
7
|
-
Interpretation = str
|
|
8
|
-
ResourceId = str
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class ChatManagerAPI:
|
|
12
|
-
def get_domains(self, *, client_id: str, language_code: str = "ru") -> list[DomainInfo]:
|
|
13
|
-
raise NotImplementedError
|
|
14
|
-
|
|
15
|
-
def get_tracks(self, *, client_id: str, language_code: str = "ru") -> list[TrackInfo]:
|
|
16
|
-
raise NotImplementedError
|
|
17
|
-
|
|
18
|
-
def get_response(self, *, chat: Chat) -> list[ChatMessage]:
|
|
19
|
-
raise NotImplementedError
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class TextGeneratorAPI:
|
|
23
|
-
def process(self, *, chat: Chat) -> str:
|
|
24
|
-
raise NotImplementedError
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class ContentInterpreterRemoteResponse(BaseModel):
|
|
28
|
-
interpretation: str
|
|
29
|
-
resource_fname: str
|
|
30
|
-
resource: bytes
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class ContentInterpreterRemoteAPI:
|
|
34
|
-
def interpret_remote(
|
|
35
|
-
self, *, kind: str, query: str, resource: bytes, chat: Chat | None = None
|
|
36
|
-
) -> ContentInterpreterRemoteResponse:
|
|
37
|
-
raise NotImplementedError
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class ClassifierAPI:
|
|
41
|
-
def get_values(self) -> list[Value]:
|
|
42
|
-
raise NotImplementedError
|
|
43
|
-
|
|
44
|
-
def evaluate(self, *, chat: Chat) -> Value:
|
|
45
|
-
raise NotImplementedError
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class CriticAPI:
|
|
49
|
-
def evaluate(self, *, text: str, chat: Chat | None = None) -> float:
|
|
50
|
-
raise NotImplementedError
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
class ContentInterpreterAPI:
|
|
54
|
-
def interpret(
|
|
55
|
-
self, *, kind: str, query: str, resource_id: str = "", chat: Chat | None = None
|
|
56
|
-
) -> tuple[Interpretation, ResourceId | None]:
|
|
57
|
-
raise NotImplementedError
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class TextProcessorAPI:
|
|
61
|
-
def process(self, *, text: str, chat: Chat | None = None) -> str:
|
|
62
|
-
raise NotImplementedError
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|