mmar-mapi 1.0.17__tar.gz → 1.0.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mmar-mapi might be problematic. Click here for more details.
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/PKG-INFO +1 -1
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/pyproject.toml +1 -1
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/__init__.py +2 -0
- mmar_mapi-1.0.19/src/mmar_mapi/api.py +143 -0
- mmar_mapi-1.0.19/src/mmar_mapi/decorators_maybe_lru_cache.py +14 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/file_storage.py +8 -2
- mmar_mapi-1.0.17/src/mmar_mapi/api.py +0 -62
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/LICENSE +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/README.md +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/models/__init__.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/models/base.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/models/chat.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/models/chat_item.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/models/enums.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/models/tracks.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/models/widget.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/type_union.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/utils.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/utils_import.py +0 -0
- {mmar_mapi-1.0.17 → mmar_mapi-1.0.19}/src/mmar_mapi/xml_parser.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "mmar-mapi"
|
|
3
3
|
# dynamic version is not supported yet on uv_build
|
|
4
|
-
version = "1.0.
|
|
4
|
+
version = "1.0.19"
|
|
5
5
|
description = "Common pure/IO utilities for multi-modal architectures team"
|
|
6
6
|
authors = [{name = "Eugene Tagin", email = "tagin@airi.net"}]
|
|
7
7
|
license = "MIT"
|
|
@@ -18,6 +18,7 @@ from .models.widget import Widget
|
|
|
18
18
|
from .utils import make_session_id, chunked
|
|
19
19
|
from .xml_parser import XMLParser
|
|
20
20
|
from .utils_import import load_main_objects
|
|
21
|
+
from .decorators_maybe_lru_cache import maybe_lru_cache
|
|
21
22
|
|
|
22
23
|
__all__ = [
|
|
23
24
|
"AIMessage",
|
|
@@ -47,4 +48,5 @@ __all__ = [
|
|
|
47
48
|
"load_main_objects",
|
|
48
49
|
"make_content",
|
|
49
50
|
"make_session_id",
|
|
51
|
+
"maybe_lru_cache",
|
|
50
52
|
]
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
2
|
+
from typing import Annotated
|
|
3
|
+
|
|
4
|
+
from pydantic import AfterValidator, BaseModel
|
|
5
|
+
|
|
6
|
+
from mmar_mapi.file_storage import ResourceId
|
|
7
|
+
from mmar_mapi.models.chat import Chat, ChatMessage
|
|
8
|
+
from mmar_mapi.models.tracks import DomainInfo, TrackInfo
|
|
9
|
+
|
|
10
|
+
Value = str
|
|
11
|
+
Interpretation = str
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class ChatManagerAPI:
|
|
15
|
+
def get_domains(self, *, client_id: str, language_code: str = "ru") -> list[DomainInfo]:
|
|
16
|
+
raise NotImplementedError
|
|
17
|
+
|
|
18
|
+
def get_tracks(self, *, client_id: str, language_code: str = "ru") -> list[TrackInfo]:
|
|
19
|
+
raise NotImplementedError
|
|
20
|
+
|
|
21
|
+
def get_response(self, *, chat: Chat) -> list[ChatMessage]:
|
|
22
|
+
raise NotImplementedError
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TextGeneratorAPI:
|
|
26
|
+
def process(self, *, chat: Chat) -> str:
|
|
27
|
+
raise NotImplementedError
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ContentInterpreterRemoteResponse(BaseModel):
|
|
31
|
+
interpretation: str
|
|
32
|
+
resource_fname: str
|
|
33
|
+
resource: bytes
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ContentInterpreterRemoteAPI:
|
|
37
|
+
def interpret_remote(
|
|
38
|
+
self, *, kind: str, query: str, resource: bytes, chat: Chat | None = None
|
|
39
|
+
) -> ContentInterpreterRemoteResponse:
|
|
40
|
+
raise NotImplementedError
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ClassifierAPI:
|
|
44
|
+
def get_values(self) -> list[Value]:
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
|
|
47
|
+
def evaluate(self, *, chat: Chat) -> Value:
|
|
48
|
+
raise NotImplementedError
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class CriticAPI:
|
|
52
|
+
def evaluate(self, *, text: str, chat: Chat | None = None) -> float:
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ContentInterpreterAPI:
|
|
57
|
+
def interpret(
|
|
58
|
+
self, *, kind: str, query: str, resource_id: str = "", chat: Chat | None = None
|
|
59
|
+
) -> tuple[Interpretation, ResourceId | None]:
|
|
60
|
+
raise NotImplementedError
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class TextProcessorAPI:
|
|
64
|
+
def process(self, *, text: str, chat: Chat | None = None) -> str:
|
|
65
|
+
raise NotImplementedError
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class TextExtractorAPI:
|
|
69
|
+
def extract(self, *, resource_id: ResourceId) -> ResourceId:
|
|
70
|
+
"""returns file with text"""
|
|
71
|
+
raise NotImplementedError
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
PageRange = Annotated[tuple[int, int], AfterValidator(lambda rng: rng[0] <= rng[1])]
|
|
75
|
+
ForceOCR = StrEnum("ForceOCR", ["ENABLED", "DISABLED", "AUTO"])
|
|
76
|
+
OutputType = StrEnum("OutputType", ["RAW", "PLAIN", "MARKDOWN"])
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class DocExtractionSpec(BaseModel):
|
|
80
|
+
page_range: PageRange | None = None
|
|
81
|
+
output_type: OutputType = OutputType.MARKDOWN
|
|
82
|
+
force_ocr: ForceOCR = ForceOCR.AUTO
|
|
83
|
+
do_ocr: bool = False
|
|
84
|
+
do_table_structure: bool = False
|
|
85
|
+
do_cell_matching: bool = False
|
|
86
|
+
do_annotations: bool = False
|
|
87
|
+
do_image_extraction: bool = False
|
|
88
|
+
generate_page_images: bool = False
|
|
89
|
+
images_scale: float = 2.0
|
|
90
|
+
|
|
91
|
+
def _update(self, **update):
|
|
92
|
+
return self.model_copy(update=update)
|
|
93
|
+
|
|
94
|
+
# fmt: off
|
|
95
|
+
def with_output_type_raw(self): return self._update(output_type=OutputType.RAW)
|
|
96
|
+
def with_output_type_plain(self): return self._update(output_type=OutputType.PLAIN)
|
|
97
|
+
def with_ocr(self): return self._update(do_ocr=True)
|
|
98
|
+
def with_tables(self): return self._update(do_table_structure=True, do_cell_matching=True)
|
|
99
|
+
def with_images(self): return self._update(do_image_extraction=True)
|
|
100
|
+
def with_annotations(self): return self._update(do_annotations=True)
|
|
101
|
+
def with_force_ocr_enabled(self): return self._update(force_ocr=ForceOCR.ENABLED)
|
|
102
|
+
def with_force_ocr_disabled(self): return self._update(force_ocr=ForceOCR.DISABLED)
|
|
103
|
+
def with_page_images(self): return self._update(generate_page_images=True)
|
|
104
|
+
def with_page_range(self, page_range: PageRange): return self._update(page_range=page_range)
|
|
105
|
+
# fmt: on
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class ExtractedImage(BaseModel):
|
|
109
|
+
page: int
|
|
110
|
+
image_resource_id: ResourceId | None = None
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class ExtractedImageMetadata(BaseModel):
|
|
114
|
+
annotation: str = ""
|
|
115
|
+
caption: str = ""
|
|
116
|
+
width: int | None = None
|
|
117
|
+
height: int | None = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class ExtractedPicture(ExtractedImage, ExtractedImageMetadata):
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class ExtractedTable(ExtractedImage, ExtractedImageMetadata):
|
|
125
|
+
formatted_str: str
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class ExtractedPageImage(ExtractedImage):
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class DocExtractionOutput(BaseModel):
|
|
133
|
+
config: DocExtractionSpec
|
|
134
|
+
text: str = ""
|
|
135
|
+
tables: list[ExtractedTable] = []
|
|
136
|
+
pictures: list[ExtractedPicture] = []
|
|
137
|
+
page_images: list[ExtractedPageImage] = []
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class DocumentExtractorAPI:
|
|
141
|
+
def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId:
|
|
142
|
+
"""returns file with DocExtractionOutput"""
|
|
143
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from functools import lru_cache
|
|
3
|
+
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def maybe_lru_cache(maxsize: int, func: Callable) -> tuple[str, Callable]:
|
|
8
|
+
if maxsize >= 0:
|
|
9
|
+
maxsize = maxsize or None
|
|
10
|
+
logger.info(f"Caching for {func.__name__}: enabled: maxsize={maxsize}")
|
|
11
|
+
func = lru_cache(maxsize=maxsize)(func)
|
|
12
|
+
else:
|
|
13
|
+
logger.info(f"Caching for {func.__name__}: disabled")
|
|
14
|
+
return func
|
|
@@ -45,7 +45,7 @@ class FileStorage:
|
|
|
45
45
|
resource_id = self.upload(content, fname)
|
|
46
46
|
return resource_id
|
|
47
47
|
|
|
48
|
-
def upload(self, content: bytes | str, fname: str) -> ResourceId:
|
|
48
|
+
def upload(self, content: bytes | str, fname: str, origin: str | None = None) -> ResourceId:
|
|
49
49
|
if isinstance(content, str):
|
|
50
50
|
content = content.encode()
|
|
51
51
|
|
|
@@ -56,7 +56,7 @@ class FileStorage:
|
|
|
56
56
|
|
|
57
57
|
fpath_md = fpath.with_suffix(SUFFIX_METADATA)
|
|
58
58
|
update_date = f"{datetime.now():%Y-%m-%d--%H-%M-%S}"
|
|
59
|
-
metadata = {"fname": fname, "update_date": update_date, "size": len(content)}
|
|
59
|
+
metadata = {"fname": fname, "update_date": update_date, "size": len(content), "origin": origin}
|
|
60
60
|
fpath_md.write_text(json.dumps(metadata, ensure_ascii=False))
|
|
61
61
|
|
|
62
62
|
return str(fpath)
|
|
@@ -67,6 +67,12 @@ class FileStorage:
|
|
|
67
67
|
return None
|
|
68
68
|
return json.loads(metadata_path.read_text())
|
|
69
69
|
|
|
70
|
+
def get_fname(self, resource_id: ResourceId) -> str | None:
|
|
71
|
+
metadata = self.get_metadata(resource_id)
|
|
72
|
+
if metadata is None:
|
|
73
|
+
return None
|
|
74
|
+
return metadata.get("fname")
|
|
75
|
+
|
|
70
76
|
async def upload_async(self, content: bytes | str, fname: str) -> ResourceId:
|
|
71
77
|
return self.upload(content, fname)
|
|
72
78
|
|
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
from mmar_mapi.models.chat import Chat, ChatMessage
|
|
2
|
-
from mmar_mapi.models.tracks import DomainInfo, TrackInfo
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
Value = str
|
|
7
|
-
Interpretation = str
|
|
8
|
-
ResourceId = str
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class ChatManagerAPI:
|
|
12
|
-
def get_domains(self, *, client_id: str, language_code: str = "ru") -> list[DomainInfo]:
|
|
13
|
-
raise NotImplementedError
|
|
14
|
-
|
|
15
|
-
def get_tracks(self, *, client_id: str, language_code: str = "ru") -> list[TrackInfo]:
|
|
16
|
-
raise NotImplementedError
|
|
17
|
-
|
|
18
|
-
def get_response(self, *, chat: Chat) -> list[ChatMessage]:
|
|
19
|
-
raise NotImplementedError
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class TextGeneratorAPI:
|
|
23
|
-
def process(self, *, chat: Chat) -> str:
|
|
24
|
-
raise NotImplementedError
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class ContentInterpreterRemoteResponse(BaseModel):
|
|
28
|
-
interpretation: str
|
|
29
|
-
resource_fname: str
|
|
30
|
-
resource: bytes
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class ContentInterpreterRemoteAPI:
|
|
34
|
-
def interpret_remote(
|
|
35
|
-
self, *, kind: str, query: str, resource: bytes, chat: Chat | None = None
|
|
36
|
-
) -> ContentInterpreterRemoteResponse:
|
|
37
|
-
raise NotImplementedError
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class ClassifierAPI:
|
|
41
|
-
def get_values(self) -> list[Value]:
|
|
42
|
-
raise NotImplementedError
|
|
43
|
-
|
|
44
|
-
def evaluate(self, *, chat: Chat) -> Value:
|
|
45
|
-
raise NotImplementedError
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class CriticAPI:
|
|
49
|
-
def evaluate(self, *, text: str, chat: Chat | None = None) -> float:
|
|
50
|
-
raise NotImplementedError
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
class ContentInterpreterAPI:
|
|
54
|
-
def interpret(
|
|
55
|
-
self, *, kind: str, query: str, resource_id: str = "", chat: Chat | None = None
|
|
56
|
-
) -> tuple[Interpretation, ResourceId | None]:
|
|
57
|
-
raise NotImplementedError
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class TextProcessorAPI:
|
|
61
|
-
def process(self, *, text: str, chat: Chat | None = None) -> str:
|
|
62
|
-
raise NotImplementedError
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|