mmar-mapi 1.0.18__tar.gz → 1.0.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmar-mapi might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmar-mapi
3
- Version: 1.0.18
3
+ Version: 1.0.19
4
4
  Summary: Common pure/IO utilities for multi-modal architectures team
5
5
  Keywords:
6
6
  Author: Eugene Tagin
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "mmar-mapi"
3
3
  # dynamic version is not supported yet on uv_build
4
- version = "1.0.18"
4
+ version = "1.0.19"
5
5
  description = "Common pure/IO utilities for multi-modal architectures team"
6
6
  authors = [{name = "Eugene Tagin", email = "tagin@airi.net"}]
7
7
  license = "MIT"
@@ -0,0 +1,143 @@
1
+ from enum import StrEnum
2
+ from typing import Annotated
3
+
4
+ from pydantic import AfterValidator, BaseModel
5
+
6
+ from mmar_mapi.file_storage import ResourceId
7
+ from mmar_mapi.models.chat import Chat, ChatMessage
8
+ from mmar_mapi.models.tracks import DomainInfo, TrackInfo
9
+
10
+ Value = str
11
+ Interpretation = str
12
+
13
+
14
+ class ChatManagerAPI:
15
+ def get_domains(self, *, client_id: str, language_code: str = "ru") -> list[DomainInfo]:
16
+ raise NotImplementedError
17
+
18
+ def get_tracks(self, *, client_id: str, language_code: str = "ru") -> list[TrackInfo]:
19
+ raise NotImplementedError
20
+
21
+ def get_response(self, *, chat: Chat) -> list[ChatMessage]:
22
+ raise NotImplementedError
23
+
24
+
25
+ class TextGeneratorAPI:
26
+ def process(self, *, chat: Chat) -> str:
27
+ raise NotImplementedError
28
+
29
+
30
+ class ContentInterpreterRemoteResponse(BaseModel):
31
+ interpretation: str
32
+ resource_fname: str
33
+ resource: bytes
34
+
35
+
36
+ class ContentInterpreterRemoteAPI:
37
+ def interpret_remote(
38
+ self, *, kind: str, query: str, resource: bytes, chat: Chat | None = None
39
+ ) -> ContentInterpreterRemoteResponse:
40
+ raise NotImplementedError
41
+
42
+
43
+ class ClassifierAPI:
44
+ def get_values(self) -> list[Value]:
45
+ raise NotImplementedError
46
+
47
+ def evaluate(self, *, chat: Chat) -> Value:
48
+ raise NotImplementedError
49
+
50
+
51
+ class CriticAPI:
52
+ def evaluate(self, *, text: str, chat: Chat | None = None) -> float:
53
+ raise NotImplementedError
54
+
55
+
56
+ class ContentInterpreterAPI:
57
+ def interpret(
58
+ self, *, kind: str, query: str, resource_id: str = "", chat: Chat | None = None
59
+ ) -> tuple[Interpretation, ResourceId | None]:
60
+ raise NotImplementedError
61
+
62
+
63
+ class TextProcessorAPI:
64
+ def process(self, *, text: str, chat: Chat | None = None) -> str:
65
+ raise NotImplementedError
66
+
67
+
68
+ class TextExtractorAPI:
69
+ def extract(self, *, resource_id: ResourceId) -> ResourceId:
70
+ """returns file with text"""
71
+ raise NotImplementedError
72
+
73
+
74
+ PageRange = Annotated[tuple[int, int], AfterValidator(lambda rng: rng[0] <= rng[1])]
75
+ ForceOCR = StrEnum("ForceOCR", ["ENABLED", "DISABLED", "AUTO"])
76
+ OutputType = StrEnum("OutputType", ["RAW", "PLAIN", "MARKDOWN"])
77
+
78
+
79
+ class DocExtractionSpec(BaseModel):
80
+ page_range: PageRange | None = None
81
+ output_type: OutputType = OutputType.MARKDOWN
82
+ force_ocr: ForceOCR = ForceOCR.AUTO
83
+ do_ocr: bool = False
84
+ do_table_structure: bool = False
85
+ do_cell_matching: bool = False
86
+ do_annotations: bool = False
87
+ do_image_extraction: bool = False
88
+ generate_page_images: bool = False
89
+ images_scale: float = 2.0
90
+
91
+ def _update(self, **update):
92
+ return self.model_copy(update=update)
93
+
94
+ # fmt: off
95
+ def with_output_type_raw(self): return self._update(output_type=OutputType.RAW)
96
+ def with_output_type_plain(self): return self._update(output_type=OutputType.PLAIN)
97
+ def with_ocr(self): return self._update(do_ocr=True)
98
+ def with_tables(self): return self._update(do_table_structure=True, do_cell_matching=True)
99
+ def with_images(self): return self._update(do_image_extraction=True)
100
+ def with_annotations(self): return self._update(do_annotations=True)
101
+ def with_force_ocr_enabled(self): return self._update(force_ocr=ForceOCR.ENABLED)
102
+ def with_force_ocr_disabled(self): return self._update(force_ocr=ForceOCR.DISABLED)
103
+ def with_page_images(self): return self._update(generate_page_images=True)
104
+ def with_page_range(self, page_range: PageRange): return self._update(page_range=page_range)
105
+ # fmt: on
106
+
107
+
108
+ class ExtractedImage(BaseModel):
109
+ page: int
110
+ image_resource_id: ResourceId | None = None
111
+
112
+
113
+ class ExtractedImageMetadata(BaseModel):
114
+ annotation: str = ""
115
+ caption: str = ""
116
+ width: int | None = None
117
+ height: int | None = None
118
+
119
+
120
+ class ExtractedPicture(ExtractedImage, ExtractedImageMetadata):
121
+ pass
122
+
123
+
124
+ class ExtractedTable(ExtractedImage, ExtractedImageMetadata):
125
+ formatted_str: str
126
+
127
+
128
+ class ExtractedPageImage(ExtractedImage):
129
+ pass
130
+
131
+
132
+ class DocExtractionOutput(BaseModel):
133
+ config: DocExtractionSpec
134
+ text: str = ""
135
+ tables: list[ExtractedTable] = []
136
+ pictures: list[ExtractedPicture] = []
137
+ page_images: list[ExtractedPageImage] = []
138
+
139
+
140
+ class DocumentExtractorAPI:
141
+ def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId:
142
+ """returns file with DocExtractionOutput"""
143
+ raise NotImplementedError
@@ -45,7 +45,7 @@ class FileStorage:
45
45
  resource_id = self.upload(content, fname)
46
46
  return resource_id
47
47
 
48
- def upload(self, content: bytes | str, fname: str) -> ResourceId:
48
+ def upload(self, content: bytes | str, fname: str, origin: str | None = None) -> ResourceId:
49
49
  if isinstance(content, str):
50
50
  content = content.encode()
51
51
 
@@ -56,7 +56,7 @@ class FileStorage:
56
56
 
57
57
  fpath_md = fpath.with_suffix(SUFFIX_METADATA)
58
58
  update_date = f"{datetime.now():%Y-%m-%d--%H-%M-%S}"
59
- metadata = {"fname": fname, "update_date": update_date, "size": len(content)}
59
+ metadata = {"fname": fname, "update_date": update_date, "size": len(content), "origin": origin}
60
60
  fpath_md.write_text(json.dumps(metadata, ensure_ascii=False))
61
61
 
62
62
  return str(fpath)
@@ -67,6 +67,12 @@ class FileStorage:
67
67
  return None
68
68
  return json.loads(metadata_path.read_text())
69
69
 
70
+ def get_fname(self, resource_id: ResourceId) -> str | None:
71
+ metadata = self.get_metadata(resource_id)
72
+ if metadata is None:
73
+ return None
74
+ return metadata.get("fname")
75
+
70
76
  async def upload_async(self, content: bytes | str, fname: str) -> ResourceId:
71
77
  return self.upload(content, fname)
72
78
 
@@ -1,62 +0,0 @@
1
- from mmar_mapi.models.chat import Chat, ChatMessage
2
- from mmar_mapi.models.tracks import DomainInfo, TrackInfo
3
- from pydantic import BaseModel
4
-
5
-
6
- Value = str
7
- Interpretation = str
8
- ResourceId = str
9
-
10
-
11
- class ChatManagerAPI:
12
- def get_domains(self, *, client_id: str, language_code: str = "ru") -> list[DomainInfo]:
13
- raise NotImplementedError
14
-
15
- def get_tracks(self, *, client_id: str, language_code: str = "ru") -> list[TrackInfo]:
16
- raise NotImplementedError
17
-
18
- def get_response(self, *, chat: Chat) -> list[ChatMessage]:
19
- raise NotImplementedError
20
-
21
-
22
- class TextGeneratorAPI:
23
- def process(self, *, chat: Chat) -> str:
24
- raise NotImplementedError
25
-
26
-
27
- class ContentInterpreterRemoteResponse(BaseModel):
28
- interpretation: str
29
- resource_fname: str
30
- resource: bytes
31
-
32
-
33
- class ContentInterpreterRemoteAPI:
34
- def interpret_remote(
35
- self, *, kind: str, query: str, resource: bytes, chat: Chat | None = None
36
- ) -> ContentInterpreterRemoteResponse:
37
- raise NotImplementedError
38
-
39
-
40
- class ClassifierAPI:
41
- def get_values(self) -> list[Value]:
42
- raise NotImplementedError
43
-
44
- def evaluate(self, *, chat: Chat) -> Value:
45
- raise NotImplementedError
46
-
47
-
48
- class CriticAPI:
49
- def evaluate(self, *, text: str, chat: Chat | None = None) -> float:
50
- raise NotImplementedError
51
-
52
-
53
- class ContentInterpreterAPI:
54
- def interpret(
55
- self, *, kind: str, query: str, resource_id: str = "", chat: Chat | None = None
56
- ) -> tuple[Interpretation, ResourceId | None]:
57
- raise NotImplementedError
58
-
59
-
60
- class TextProcessorAPI:
61
- def process(self, *, text: str, chat: Chat | None = None) -> str:
62
- raise NotImplementedError
File without changes
File without changes