mmar-mapi 1.0.7__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mmar-mapi might be problematic. Click here for more details.

mmar_mapi/__init__.py CHANGED
@@ -11,19 +11,19 @@ from .models.chat import (
11
11
  Content,
12
12
  BaseMessage,
13
13
  )
14
- from .models.chat_item import ChatItem, OuterContextItem, InnerContextItem, ReplicaItem
15
14
  from .models.enums import MTRSLabelEnum, DiagnosticsXMLTagEnum, MTRSXMLTagEnum, DoctorChoiceXMLTagEnum
16
15
  from .models.tracks import TrackInfo, DomainInfo
17
16
  from .models.widget import Widget
18
- from .utils import make_session_id
17
+ from .utils import make_session_id, chunked
19
18
  from .xml_parser import XMLParser
19
+ from .utils_import import load_main_objects
20
+ from .decorators_maybe_lru_cache import maybe_lru_cache
20
21
 
21
22
  __all__ = [
22
23
  "AIMessage",
23
24
  "Base",
24
25
  "BaseMessage",
25
26
  "Chat",
26
- "ChatItem",
27
27
  "ChatMessage",
28
28
  "Content",
29
29
  "Context",
@@ -32,16 +32,16 @@ __all__ = [
32
32
  "DomainInfo",
33
33
  "FileStorage",
34
34
  "HumanMessage",
35
- "InnerContextItem",
36
35
  "MTRSLabelEnum",
37
36
  "MTRSXMLTagEnum",
38
37
  "MiscMessage",
39
- "OuterContextItem",
40
- "ReplicaItem",
41
38
  "ResourceId",
42
39
  "TrackInfo",
43
40
  "Widget",
44
41
  "XMLParser",
42
+ "chunked",
43
+ "load_main_objects",
45
44
  "make_content",
46
45
  "make_session_id",
46
+ "maybe_lru_cache",
47
47
  ]
mmar_mapi/api.py CHANGED
@@ -1,11 +1,14 @@
1
+ from enum import StrEnum
2
+ from typing import Annotated, Any
3
+
4
+ from pydantic import AfterValidator, BaseModel
5
+
6
+ from mmar_mapi.file_storage import ResourceId
1
7
  from mmar_mapi.models.chat import Chat, ChatMessage
2
8
  from mmar_mapi.models.tracks import DomainInfo, TrackInfo
3
- from pydantic import BaseModel
4
-
5
9
 
6
10
  Value = str
7
11
  Interpretation = str
8
- ResourceId = str
9
12
 
10
13
 
11
14
  class ChatManagerAPI:
@@ -37,16 +40,59 @@ class ContentInterpreterRemoteAPI:
37
40
  raise NotImplementedError
38
41
 
39
42
 
40
- class ClassifierAPI:
41
- def get_values(self) -> list[Value]:
43
+ class BinaryClassifiersAPI:
44
+ def get_classifiers(self) -> list[str]:
42
45
  raise NotImplementedError
43
46
 
44
- def evaluate(self, *, chat: Chat) -> Value:
47
+ def evaluate(self, *, classifier: str | None = None, text: str) -> bool:
48
+ raise NotImplementedError
49
+
50
+
51
+ class LLMAccessorAPI:
52
+ def get_entrypoint_keys(self) -> list[str]:
53
+ raise NotImplementedError
54
+
55
+ def get_response(
56
+ self,
57
+ *,
58
+ prompt: str,
59
+ resource_id: ResourceId | None = None,
60
+ entrypoint_key: str | None = None,
61
+ max_retries: int = 1,
62
+ ) -> str:
63
+ raise NotImplementedError
64
+
65
+ def get_response_by_payload(
66
+ self,
67
+ *,
68
+ payload: dict[str, Any],
69
+ resource_id: ResourceId | None = None,
70
+ entrypoint_key: str | None = None,
71
+ max_retries: int = 1,
72
+ ) -> str:
73
+ raise NotImplementedError
74
+
75
+ def get_embedding(
76
+ self,
77
+ *,
78
+ prompt: str,
79
+ resource_id: ResourceId | None = None,
80
+ entrypoint_key: str | None = None,
81
+ max_retries: int = 1,
82
+ ) -> list[float]:
83
+ raise NotImplementedError
84
+
85
+
86
+ class TranslatorAPI:
87
+ def get_lang_codes(self) -> list[str]:
88
+ raise NotImplementedError
89
+
90
+ def translate(self, *, text: str, lang_code_from: str | None = None, lang_code_to: str) -> str:
45
91
  raise NotImplementedError
46
92
 
47
93
 
48
94
  class CriticAPI:
49
- def evaluate(self, *, text: str, chat: Chat | None = None) -> float:
95
+ def evaluate(self, *, text: str, chat: Chat | None = None) -> float: # TODO replace float with bool
50
96
  raise NotImplementedError
51
97
 
52
98
 
@@ -60,3 +106,99 @@ class ContentInterpreterAPI:
60
106
  class TextProcessorAPI:
61
107
  def process(self, *, text: str, chat: Chat | None = None) -> str:
62
108
  raise NotImplementedError
109
+
110
+
111
+ class TextExtractorAPI:
112
+ def extract(self, *, resource_id: ResourceId) -> ResourceId:
113
+ """returns file with text"""
114
+ raise NotImplementedError
115
+
116
+
117
+ def _validate_page_range(v: tuple[int, int]) -> tuple[int, int]:
118
+ if v[0] < 1 or v[1] < v[0]:
119
+ raise ValueError("Invalid page range: start must be ≥ 1 and end must be ≥ start.")
120
+ return v
121
+
122
+
123
+ PageRange = Annotated[tuple[int, int], AfterValidator(_validate_page_range)]
124
+ ForceOCR = StrEnum("ForceOCR", ["ENABLED", "DISABLED", "AUTO"])
125
+ OutputType = StrEnum("OutputType", ["RAW", "PLAIN", "MARKDOWN"])
126
+
127
+
128
+ class ExtractionEngineSpec(BaseModel, frozen=True):
129
+ output_type: OutputType = OutputType.MARKDOWN
130
+ force_ocr: ForceOCR = ForceOCR.AUTO
131
+ do_ocr: bool = False
132
+ do_table_structure: bool = False
133
+ do_cell_matching: bool = False
134
+ do_annotations: bool = False
135
+ do_image_extraction: bool = False
136
+ generate_page_images: bool = False
137
+ images_scale: float = 2.0
138
+
139
+
140
+ class DocExtractionSpec(BaseModel, frozen=True):
141
+ page_range: PageRange | None = None
142
+ engine: ExtractionEngineSpec = ExtractionEngineSpec()
143
+
144
+ def _update(self, **update):
145
+ return self.model_copy(update=update)
146
+
147
+ def _update_engine(self, **engine_update):
148
+ return self._update(engine=self.engine.model_copy(update=engine_update))
149
+
150
+ # fmt: off
151
+ def with_output_type_raw(self): return self._update_engine(output_type=OutputType.RAW)
152
+ def with_output_type_plain(self): return self._update_engine(output_type=OutputType.PLAIN)
153
+ def with_ocr(self): return self._update_engine(do_ocr=True)
154
+ def with_tables(self): return self._update_engine(do_table_structure=True, do_cell_matching=True)
155
+ def with_images(self): return self._update_engine(do_image_extraction=True)
156
+ def with_annotations(self): return self._update_engine(do_annotations=True)
157
+ def with_force_ocr_enabled(self): return self._update_engine(force_ocr=ForceOCR.ENABLED)
158
+ def with_force_ocr_disabled(self): return self._update_engine(force_ocr=ForceOCR.DISABLED)
159
+ def with_page_images(self): return self._update_engine(generate_page_images=True)
160
+
161
+ def with_page_range(self, page_range: PageRange): return self._update(page_range=page_range)
162
+ # fmt: on
163
+
164
+
165
+ class ExtractedImage(BaseModel):
166
+ page: int
167
+ image_resource_id: ResourceId | None = None
168
+
169
+
170
+ class ExtractedImageMetadata(BaseModel):
171
+ annotation: str = ""
172
+ caption: str = ""
173
+ width: int | None = None
174
+ height: int | None = None
175
+
176
+
177
+ class ExtractedPicture(ExtractedImage, ExtractedImageMetadata):
178
+ "Image of part of page"
179
+
180
+ pass
181
+
182
+
183
+ class ExtractedTable(ExtractedImage, ExtractedImageMetadata):
184
+ formatted_str: str
185
+
186
+
187
+ class ExtractedPageImage(ExtractedImage):
188
+ "Image of all page"
189
+
190
+ pass
191
+
192
+
193
+ class DocExtractionOutput(BaseModel):
194
+ spec: DocExtractionSpec
195
+ text: str = ""
196
+ tables: list[ExtractedTable] = []
197
+ pictures: list[ExtractedPicture] = []
198
+ page_images: list[ExtractedPageImage] = []
199
+
200
+
201
+ class DocumentExtractorAPI:
202
+ def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId | None:
203
+ """returns file with DocExtractionOutput"""
204
+ raise NotImplementedError
@@ -0,0 +1,14 @@
1
+ from collections.abc import Callable
2
+ from functools import lru_cache
3
+
4
+ from loguru import logger
5
+
6
+
7
+ def maybe_lru_cache(maxsize: int, func: Callable) -> tuple[str, Callable]:
8
+ if maxsize >= 0:
9
+ maxsize = maxsize or None
10
+ logger.info(f"Caching for {func.__name__}: enabled: maxsize={maxsize}")
11
+ func = lru_cache(maxsize=maxsize)(func)
12
+ else:
13
+ logger.info(f"Caching for {func.__name__}: disabled")
14
+ return func
mmar_mapi/file_storage.py CHANGED
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  from zipfile import ZipFile, is_zipfile
7
7
 
8
8
  ResourceId = str
9
- ASCII_DIGITS = set(string.ascii_lowercase + string.digits)
9
+ ASCII_DIGITS_SPECIAL = set(string.ascii_lowercase + string.digits + "-")
10
10
  SUFFIX_DIR = ".dir"
11
11
  SUFFIX_METADATA = ".metadata"
12
12
 
@@ -18,7 +18,7 @@ def _validate_exist(files_dir):
18
18
 
19
19
 
20
20
  def _validate_dtype(dtype: str):
21
- if all(map(ASCII_DIGITS.__contains__, dtype)):
21
+ if all(map(ASCII_DIGITS_SPECIAL.__contains__, dtype)):
22
22
  return
23
23
  raise ValueError(f"Bad dtype: {dtype}")
24
24
 
@@ -45,7 +45,7 @@ class FileStorage:
45
45
  resource_id = self.upload(content, fname)
46
46
  return resource_id
47
47
 
48
- def upload(self, content: bytes | str, fname: str) -> ResourceId:
48
+ def upload(self, content: bytes | str, fname: str, origin: str | None = None) -> ResourceId:
49
49
  if isinstance(content, str):
50
50
  content = content.encode()
51
51
 
@@ -56,17 +56,29 @@ class FileStorage:
56
56
 
57
57
  fpath_md = fpath.with_suffix(SUFFIX_METADATA)
58
58
  update_date = f"{datetime.now():%Y-%m-%d--%H-%M-%S}"
59
- metadata = {"fname": fname, "update_date": update_date, "size": len(content)}
59
+ metadata = {"fname": fname, "update_date": update_date, "size": len(content), "origin": origin}
60
60
  fpath_md.write_text(json.dumps(metadata, ensure_ascii=False))
61
61
 
62
62
  return str(fpath)
63
63
 
64
+ def get_metadata(self, resource_id: ResourceId) -> dict | None:
65
+ metadata_path = Path(resource_id).with_suffix(SUFFIX_METADATA)
66
+ if not metadata_path.exists():
67
+ return None
68
+ return json.loads(metadata_path.read_text())
69
+
70
+ def get_fname(self, resource_id: ResourceId) -> str | None:
71
+ metadata = self.get_metadata(resource_id)
72
+ if metadata is None:
73
+ return None
74
+ return metadata.get("fname")
75
+
64
76
  async def upload_async(self, content: bytes | str, fname: str) -> ResourceId:
65
77
  return self.upload(content, fname)
66
78
 
67
79
  def upload_dir(self, resource_ids: list[ResourceId]) -> ResourceId:
68
80
  content = "\n".join(resource_ids)
69
- res = self.upload(content, "dir")
81
+ res = self.upload(content=content, fname=".dir")
70
82
  return res
71
83
 
72
84
  def download(self, resource_id: ResourceId) -> bytes:
@@ -84,6 +96,9 @@ class FileStorage:
84
96
  res = self.download_text(resource_id).split("\n")
85
97
  return res
86
98
 
99
+ def get_path(self, resource_id: ResourceId | None) -> Path | None:
100
+ return self._get_path(resource_id)
101
+
87
102
  def _get_path(self, resource_id: ResourceId | None) -> Path | None:
88
103
  if not resource_id:
89
104
  return None
@@ -103,7 +118,7 @@ class FileStorage:
103
118
  return path and path.suffix == SUFFIX_DIR
104
119
 
105
120
  def get_dtype(self, resource_id: ResourceId | None) -> str | None:
106
- return resource_id and resource_id.rsplit(".")[-1]
121
+ return resource_id and resource_id.rsplit(".")[-1].lower()
107
122
 
108
123
  def unzip_file(self, resource_id: str) -> ResourceId:
109
124
  """takes resource_id which refer to zip-archive, unpacks it and returns directory ResourceId with content of zip-archive"""
mmar_mapi/models/chat.py CHANGED
@@ -1,21 +1,37 @@
1
1
  import warnings
2
2
  from collections.abc import Callable
3
- from copy import deepcopy
4
3
  from datetime import datetime
5
- from typing import Any, Literal, TypeVar
4
+ from typing import Any, Literal, NotRequired, TypedDict, TypeVar
5
+
6
+ from pydantic import Field
6
7
 
7
- from mmar_mapi.models.chat_item import ChatItem, ReplicaItem, OuterContextItem
8
8
  from mmar_mapi.models.widget import Widget
9
9
  from mmar_mapi.type_union import TypeUnion
10
- from pydantic import Field, ValidationError
11
10
 
12
11
  from .base import Base
13
12
 
14
-
15
13
  _DT_FORMAT: str = "%Y-%m-%d-%H-%M-%S"
16
14
  _EXAMPLE_DT: str = datetime(year=1970, month=1, day=1).strftime(_DT_FORMAT)
17
15
  StrDict = dict[str, Any]
18
- ContentBase = str | Widget | StrDict
16
+
17
+
18
+ class ResourceDict(TypedDict):
19
+ type: Literal["resource_id"]
20
+ resource_id: str
21
+ resource_name: NotRequired[str]
22
+
23
+
24
+ class TextDict(TypedDict):
25
+ type: Literal["text"]
26
+ text: str
27
+
28
+
29
+ class CommandDict(TypedDict):
30
+ type: Literal["command"]
31
+ command: StrDict
32
+
33
+
34
+ ContentBase = str | Widget | ResourceDict | CommandDict | TextDict | StrDict
19
35
  Content = ContentBase | list[ContentBase]
20
36
  T = TypeVar("T")
21
37
 
@@ -33,10 +49,12 @@ class Context(Base):
33
49
 
34
50
  def create_id(self, short: bool = False) -> str:
35
51
  uid, sid, cid = self.user_id, self.session_id, self.client_id
36
- if short:
37
- return f"{cid}_{uid}_{sid}"
38
52
  return f"client_{cid}_user_{uid}_session_{sid}"
39
53
 
54
+ def create_trace_id(self) -> str:
55
+ uid, sid, cid = self.user_id, self.session_id, self.client_id
56
+ return f"{cid}_{uid}_{sid}"
57
+
40
58
  def _get_deprecated_extra(self, field, default):
41
59
  # legacy: eliminate after migration
42
60
  res = (self.extra or {}).get(field, default)
@@ -96,6 +114,22 @@ def _get_resource_id(obj: Content) -> str | None:
96
114
  return None
97
115
 
98
116
 
117
+ def _get_resource_name(obj: Content) -> str | None:
118
+ if isinstance(obj, list):
119
+ return next((el for el in map(_get_resource_name, obj) if el), None)
120
+ if isinstance(obj, dict) and obj.get("type") == "resource_id":
121
+ return _get_field(obj, "resource_name", str)
122
+ return None
123
+
124
+
125
+ def _get_resource(obj: Content) -> str | None:
126
+ if isinstance(obj, list):
127
+ return next((el for el in map(_get_resource_id, obj) if el), None)
128
+ if isinstance(obj, dict) and obj.get("type") == "resource_id":
129
+ return obj
130
+ return None
131
+
132
+
99
133
  def _get_command(obj: Content) -> dict | None:
100
134
  if isinstance(obj, list):
101
135
  return next((el for el in map(_get_command, obj) if el), None)
@@ -127,7 +161,10 @@ class BaseMessage(Base):
127
161
 
128
162
  def modify_text(self, callback: Callable[[str], str]) -> "BaseMessage":
129
163
  content_upd = _modify_text(self.content, callback)
130
- return self.model_copy(update=dict(content=content_upd))
164
+ return self.with_content(content_upd)
165
+
166
+ def with_content(self, content: Content) -> "BaseMessage":
167
+ return self.model_copy(update=dict(content=content))
131
168
 
132
169
  @property
133
170
  def body(self) -> str:
@@ -138,6 +175,15 @@ class BaseMessage(Base):
138
175
  def resource_id(self) -> str | None:
139
176
  return _get_resource_id(self.content)
140
177
 
178
+ @property
179
+ def resource_name(self) -> str | None:
180
+ res = _get_resource_name(self.content)
181
+ return res
182
+
183
+ @property
184
+ def resource(self) -> dict | None:
185
+ return _get_resource(self.content)
186
+
141
187
  @property
142
188
  def command(self) -> dict | None:
143
189
  return _get_command(self.content)
@@ -162,7 +208,7 @@ class BaseMessage(Base):
162
208
  return _DT_FORMAT
163
209
 
164
210
  @staticmethod
165
- def find_resource_id(msg: "BaseMessage", ext: str | None = None, type: str=None) -> str | None:
211
+ def find_resource_id(msg: "BaseMessage", ext: str | None = None, type: str = None) -> str | None:
166
212
  resource_id = msg.resource_id
167
213
  if type and type != msg.type:
168
214
  return None
@@ -185,6 +231,9 @@ class AIMessage(BaseMessage):
185
231
  def action(self) -> str:
186
232
  return (self.extra or {}).get("action", "")
187
233
 
234
+ def with_state(self, state: str) -> "AIMessage":
235
+ return self.model_copy(update=dict(state=state))
236
+
188
237
 
189
238
  class MiscMessage(BaseMessage):
190
239
  type: Literal["misc"] = "misc"
@@ -213,11 +262,8 @@ class Chat(Base):
213
262
  return self.context.create_id(short)
214
263
 
215
264
  @staticmethod
216
- def parse(chat_obj: str | dict | ChatItem) -> "Chat":
217
- return _parse_chat_compat(chat_obj)
218
-
219
- def to_chat_item(self) -> ChatItem:
220
- return convert_chat_to_chat_item(self)
265
+ def parse(chat_obj: str | dict) -> "Chat":
266
+ return _parse_chat(chat_obj)
221
267
 
222
268
  def add_message(self, message: ChatMessage):
223
269
  self.messages.append(message)
@@ -242,180 +288,63 @@ class Chat(Base):
242
288
  def rfind_in_messages(self, func: Callable[[ChatMessage], T | None]) -> T | None:
243
289
  return find_in_messages(self.messages[::-1], func)
244
290
 
291
+ def get_last_user_message(self) -> HumanMessage | None:
292
+ messages = self.messages
293
+ if not messages:
294
+ return []
295
+ message = messages[-1]
296
+ return message if isinstance(message, HumanMessage) else None
297
+
298
+ def count_messages(self, func: Callable[[ChatMessage], bool] | type) -> int:
299
+ if isinstance(func, type):
300
+ msg_type = func
301
+ func = lambda msg: isinstance(msg, msg_type)
302
+ return sum(map(func, self.messages))
303
+
245
304
 
246
305
  def make_content(
247
306
  text: str | None = None,
248
307
  *,
249
308
  resource_id: str | None = None,
309
+ resource: dict | None = None,
250
310
  command: dict | None = None,
251
311
  widget: Widget | None = None,
252
312
  content: Content | None = None,
253
313
  ) -> Content:
254
- resource_id = (resource_id or None) and {"type": "resource_id", "resource_id": resource_id}
314
+ if resource and resource_id:
315
+ raise ValueError("Cannot pass both 'resource' and 'resource_id'")
316
+
317
+ if resource_id:
318
+ resource = {"type": "resource_id", "resource_id": resource_id}
319
+ elif resource:
320
+ if not isinstance(resource, dict):
321
+ raise TypeError("'resource' must be a dict")
322
+ resource_id = resource.get("resource_id")
323
+ if not resource_id:
324
+ raise ValueError("'resource' must contain 'resource_id'")
325
+ resource_name = resource.get("resource_name")
326
+ resource = {"type": "resource_id", "resource_id": resource_id}
327
+ if resource_name:
328
+ resource["resource_name"] = resource_name
329
+ else:
330
+ resource = None
331
+
255
332
  command = (command or None) and {"type": "command", "command": command}
256
333
 
257
334
  content = content if isinstance(content, list) else [content] if content else []
258
- content += list(filter(None, [text, resource_id, command, widget]))
335
+ content += list(filter(None, [text, resource, command, widget]))
259
336
  if len(content) == 0:
260
337
  content = ""
261
338
  elif len(content) == 1:
262
339
  content = content[0]
263
-
264
340
  return content
265
341
 
266
342
 
267
- def convert_replica_item_to_message(replica: ReplicaItem) -> ChatMessage:
268
- date_time = replica.date_time
269
- content = make_content(
270
- text=replica.body,
271
- resource_id=replica.resource_id,
272
- command=replica.command,
273
- widget=replica.widget,
274
- )
275
- # legacy: eliminate after migration
276
- resource_id = (replica.resource_id or None) and {"type": "resource_id", "resource_id": replica.resource_id}
277
- body = replica.body
278
- command = (replica.command or None) and {"type": "command", "command": replica.command}
279
- widget = replica.widget
280
- date_time = replica.date_time
281
-
282
- content = list(filter(None, [body, resource_id, command, widget]))
283
- if len(content) == 0:
284
- content = ""
285
- elif len(content) == 1:
286
- content = content[0]
287
-
288
- is_bot_message = replica.role
289
-
290
- if is_bot_message:
291
- kwargs = dict(
292
- content=content,
293
- date_time=date_time,
294
- state=replica.state,
295
- extra=dict(
296
- **(replica.extra or {}),
297
- action=replica.action,
298
- moderation=replica.moderation,
299
- ),
300
- )
301
- res = AIMessage(**kwargs)
302
- else:
303
- kwargs = dict(content=content, date_time=date_time)
304
- res = HumanMessage(**kwargs)
305
- return res
306
-
307
-
308
- def convert_outer_context_to_context(octx: OuterContextItem) -> Context:
309
- # legacy: eliminate after migration
310
- context = Context(
311
- client_id=octx.client_id,
312
- user_id=octx.user_id,
313
- session_id=octx.session_id,
314
- track_id=octx.track_id,
315
- extra=dict(
316
- sex=octx.sex,
317
- age=octx.age,
318
- parent_session_id=octx.parent_session_id,
319
- entrypoint_key=octx.entrypoint_key,
320
- language_code=octx.language_code,
321
- ),
322
- )
323
- return context
324
-
325
-
326
- def convert_chat_item_to_chat(chat_item: ChatItem) -> Chat:
327
- # legacy: eliminate after migration
328
- context = convert_outer_context_to_context(chat_item.outer_context)
329
- messages = list(map(convert_replica_item_to_message, chat_item.inner_context.replicas))
330
- res = Chat(context=context, messages=messages)
331
- return res
332
-
333
-
334
- def convert_context_to_outer_context(context: Context) -> OuterContextItem:
335
- # legacy: eliminate after migration
336
- extra = context.extra or {}
337
- return OuterContextItem(
338
- client_id=context.client_id,
339
- user_id=context.user_id,
340
- session_id=context.session_id,
341
- track_id=context.track_id,
342
- sex=extra.get("sex"),
343
- age=extra.get("age"),
344
- parent_session_id=extra.get("parent_session_id"),
345
- entrypoint_key=extra.get("entrypoint_key"),
346
- language_code=extra.get("language_code"),
347
- )
348
-
349
-
350
- def convert_message_to_replica_item(message: ChatMessage) -> ReplicaItem | None:
351
- # legacy: eliminate after migration
352
- m_type = message.type
353
- if m_type in {"ai", "human"}:
354
- role = m_type == "ai"
355
- else:
356
- return None
357
-
358
- extra = deepcopy(message.extra) if message.extra else {}
359
- action = extra.pop("action", "")
360
- moderation = extra.pop("moderation", "OK")
361
-
362
- kwargs = dict(
363
- role=role,
364
- body=message.text,
365
- resource_id=message.resource_id,
366
- command=message.command,
367
- widget=message.widget,
368
- date_time=message.date_time,
369
- extra=extra or None,
370
- state=getattr(message, "state", ""),
371
- action=action,
372
- moderation=moderation,
373
- )
374
- return ReplicaItem(**kwargs)
375
-
376
-
377
- def convert_chat_to_chat_item(chat: Chat) -> ChatItem:
378
- # legacy: eliminate after migration
379
- return ChatItem(
380
- outer_context=convert_context_to_outer_context(chat.context),
381
- inner_context=dict(replicas=list(map(convert_message_to_replica_item, chat.messages))),
382
- )
383
-
384
-
385
- def parse_chat_item_as_chat(chat_obj: str | dict | ChatItem) -> Chat:
386
- # legacy: eliminate after migration
387
- if isinstance(chat_obj, ChatItem):
388
- chat_item = chat_obj
389
- else:
390
- chat_item = ChatItem.parse(chat_obj)
391
- res = convert_chat_item_to_chat(chat_item)
392
- return res
393
-
394
-
395
- def _parse_chat(chat_obj: str | dict) -> Chat:
343
+ def _parse_chat(chat_obj: str | dict | Chat) -> Chat:
344
+ if isinstance(chat_obj, Chat):
345
+ return chat_obj
396
346
  if isinstance(chat_obj, dict):
397
347
  return Chat.model_validate(chat_obj)
398
-
399
- return Chat.model_validate_json(chat_obj)
400
-
401
-
402
- def is_chat_item(chat_obj: str | dict | ChatItem) -> bool:
403
- if isinstance(chat_obj, ChatItem):
404
- return True
405
- if isinstance(chat_obj, dict):
406
- return "OuterContext" in chat_obj
407
348
  if isinstance(chat_obj, str):
408
- return "OuterContext" in chat_obj
409
- warnings.warn(f"Unexpected chat object: {chat_obj} :: {type(chat_obj)}")
410
- return False
411
-
412
-
413
- def _parse_chat_compat(chat_obj: str | dict | ChatItem) -> Chat:
414
- # legacy: eliminate after migration
415
- if is_chat_item(chat_obj):
416
- return parse_chat_item_as_chat(chat_obj)
417
- try:
418
- return _parse_chat(chat_obj)
419
- except ValidationError as ex:
420
- warnings.warn(f"Failed to parse chat: {ex}")
421
- return parse_chat_item_as_chat(chat_obj)
349
+ return Chat.model_validate_json(chat_obj)
350
+ raise ValueError(f"Bad chat_obj {type(chat_obj)}: {chat_obj}")
@@ -1,6 +1,6 @@
1
1
  from typing import Self, Literal
2
2
 
3
- from more_itertools import chunked
3
+ from mmar_mapi.utils import chunked
4
4
  from pydantic import BaseModel, model_validator
5
5
 
6
6
 
@@ -26,9 +26,17 @@ class Widget(BaseModel):
26
26
  def make_inline_buttons(ibuttons: dict[str, str], by=1) -> "Widget":
27
27
  return _make_inline_buttons(ibuttons=ibuttons, by=by)
28
28
 
29
+ @staticmethod
30
+ def make_buttons(buttons: list[str], by=1) -> "Widget":
31
+ return _make_buttons(buttons=buttons, by=1)
32
+
29
33
 
30
34
  def _make_inline_buttons(ibuttons: dict[str, str], by=1) -> "Widget":
31
35
  ibs0 = [f"{key}:{val}" for key, val in ibuttons.items()]
32
- ibs = list(chunked(ibs0, n=by))
33
- res = Widget(ibuttons=ibs)
36
+ res = Widget(ibuttons=list(chunked(ibs0, n=by)))
37
+ return res
38
+
39
+
40
+ def _make_buttons(buttons: list[str], by=1) -> "Widget":
41
+ res = Widget(buttons=list(chunked(buttons, n=by)))
34
42
  return res
mmar_mapi/utils.py CHANGED
@@ -1,5 +1,18 @@
1
+ from itertools import islice
2
+ from collections.abc import Iterable
1
3
  from datetime import datetime
2
4
 
3
5
 
4
- def make_session_id() -> str:
5
- return f"{datetime.now():%Y-%m-%d--%H-%M-%S}"
6
+ def make_session_id(with_millis=False) -> str:
7
+ dt = datetime.now()
8
+ fmt = "%Y-%m-%d--%H-%M-%S-%f" if with_millis else "%Y-%m-%d--%H-%M-%S"
9
+ return dt.strftime(fmt)
10
+
11
+
12
+ def chunked(items: Iterable, n) -> list:
13
+ "behavior like in more_itertools.chunked"
14
+ iterator = iter(items)
15
+ res = []
16
+ while chunk := list(islice(iterator, n)):
17
+ res.append(chunk)
18
+ return res
@@ -0,0 +1,47 @@
1
+ from pkgutil import iter_modules
2
+ from importlib import import_module
3
+ from types import ModuleType
4
+
5
+ from loguru import logger
6
+
7
+
8
+ def convert_snake_to_pascal(name: str) -> str:
9
+ """snake_to_pascal -> SnakeToPascal"""
10
+ return "".join(map(str.capitalize, name.split("_")))
11
+
12
+
13
+ def get_main_object_name(module: type[ModuleType]) -> str:
14
+ module_name = module.__name__.rsplit(".", 1)[-1]
15
+ main_object_name = convert_snake_to_pascal(module_name)
16
+ return main_object_name
17
+
18
+
19
+ def get_main_object(module: type[ModuleType], obj_type):
20
+ """tries to find SomeObject in object src.some_object"""
21
+ main_object_name = get_main_object_name(module)
22
+ try:
23
+ res = getattr(module, main_object_name)
24
+ if not isinstance(res, obj_type) and not issubclass(res, obj_type):
25
+ logger.error(f"Failed to load {module}.{main_object_name}: expected {obj_type} but found {type(res)}")
26
+ return None
27
+ return res
28
+ except AttributeError as ex:
29
+ logger.error(f"Failed to load {module}.{main_object_name}: {ex}")
30
+ return None
31
+
32
+
33
+ def load_modules(package_name: str) -> list[type[ModuleType]]:
34
+ try:
35
+ package = import_module(package_name)
36
+ except ModuleNotFoundError:
37
+ logger.error(f"Not found module: {package_name}")
38
+ return []
39
+ res = [import_module(module_name) for _, module_name, _ in iter_modules(package.__path__, package_name + ".")]
40
+ return res
41
+
42
+
43
+ def load_main_objects(package_name: str, obj_type: type) -> dict[str, object]:
44
+ modules = load_modules(package_name)
45
+ main_objects = [get_main_object(m, obj_type) for m in modules]
46
+ res = {obj.__name__: obj for obj in main_objects}
47
+ return res
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mmar-mapi
3
- Version: 1.0.7
3
+ Version: 1.1.1
4
4
  Summary: Common pure/IO utilities for multi-modal architectures team
5
5
  Keywords:
6
6
  Author: Eugene Tagin
@@ -20,7 +20,8 @@ Classifier: Topic :: Software Development
20
20
  Classifier: Topic :: Utilities
21
21
  Classifier: Typing :: Typed
22
22
  Requires-Dist: pydantic~=2.11.7
23
- Requires-Python: >=3.12
23
+ Requires-Dist: loguru~=0.7.3
24
+ Requires-Python: >=3.11
24
25
  Description-Content-Type: text/markdown
25
26
 
26
27
  # mmar-mapi
@@ -0,0 +1,18 @@
1
+ mmar_mapi/__init__.py,sha256=Wq3MAvrtONYfUY63ShtqzbO-05uhxPzKU5I6p27aZc0,1085
2
+ mmar_mapi/api.py,sha256=VGoZ2nYIvm35U4FO4yhFG9nYSaK-gjOUC3pNyfBrq3o,6109
3
+ mmar_mapi/decorators_maybe_lru_cache.py,sha256=eO2I6t1fHLUNRABClK1c8EZzHAmCeSK6O-hbJGb2c9E,444
4
+ mmar_mapi/file_storage.py,sha256=gSz2lM5UIQtjK8f-PatGe3br_UOSVJwgbTMi1sDjXC8,5075
5
+ mmar_mapi/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ mmar_mapi/models/base.py,sha256=mKtXV2x51XVj7W-et9tjGcPMDUUUMelW-BywMgFc2p0,411
7
+ mmar_mapi/models/chat.py,sha256=6qVuiOCQ0J5CbmGtVfuytAcpWklBCYjhAn0FkonSBqo,10999
8
+ mmar_mapi/models/enums.py,sha256=J-GNpql9MCnKnWiV9aJRQGI-pAybvV86923RZs99grA,1006
9
+ mmar_mapi/models/tracks.py,sha256=HKDp-BX1p7AlDfSEKfOKCu0TRSK9cD4Dmq1vJt8oRjw,307
10
+ mmar_mapi/models/widget.py,sha256=ue5o4AkN8SG09aA8eQLOOxbwah-mkZir0ZGL6OA8S9Q,1355
11
+ mmar_mapi/type_union.py,sha256=diwmzcnbqkpGFckPHNw9o8zyQ955mOGNvhTlcBJ0RMI,1905
12
+ mmar_mapi/utils.py,sha256=FlW9n-84xz2zSHsahHzJ3Y4Wu5mjpFer6t9z6PF6lS0,488
13
+ mmar_mapi/utils_import.py,sha256=pUyMFd8SItTxBKI-GO9JhRmy43jG_OQlUPr8QCBOSwg,1682
14
+ mmar_mapi/xml_parser.py,sha256=VvLIX_XCZao9i0qqpTVx8nx0vbFXSe8pEbdJdXnj97g,568
15
+ mmar_mapi-1.1.1.dist-info/licenses/LICENSE,sha256=2A90w8WjhOgQXnFuUijKJYazaqZ4_NTokYb9Po4y-9k,1061
16
+ mmar_mapi-1.1.1.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
17
+ mmar_mapi-1.1.1.dist-info/METADATA,sha256=cc6x65HLcK7oHBNEC92IjB_ZWh9OF4qw9VfwM-3UO3Q,943
18
+ mmar_mapi-1.1.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.8.17
2
+ Generator: uv 0.8.24
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,157 +0,0 @@
1
- from datetime import datetime
2
- from typing import Annotated, Any
3
- from collections.abc import Callable
4
-
5
- from mmar_mapi.models.widget import Widget
6
- from pydantic import Field, ConfigDict, BeforeValidator, AfterValidator
7
-
8
- from .base import Base
9
-
10
-
11
- _DT_FORMAT: str = "%Y-%m-%d-%H-%M-%S"
12
- _EXAMPLE_DT_0 = datetime(1970, 1, 1, 0, 0, 0)
13
- _EXAMPLE_DT: str = _EXAMPLE_DT_0.strftime(_DT_FORMAT)
14
-
15
-
16
- def now_pretty() -> str:
17
- return datetime.now().strftime(ReplicaItem.DATETIME_FORMAT())
18
-
19
-
20
- class OuterContextItem(Base):
21
- # remove annoying warning for protected `model_` namespace
22
- model_config = ConfigDict(protected_namespaces=())
23
-
24
- sex: bool = Field(False, alias="Sex", description="True = male, False = female", examples=[True])
25
- age: int = Field(0, alias="Age", examples=[20])
26
- user_id: str = Field("", alias="UserId", examples=["123456789"])
27
- parent_session_id: str | None = Field(None, alias="ParentSessionId", examples=["987654320"])
28
- session_id: str = Field("", alias="SessionId", examples=["987654321"])
29
- client_id: str = Field("", alias="ClientId", examples=["543216789"])
30
- track_id: str = Field(default="Consultation", alias="TrackId")
31
- entrypoint_key: str = Field("", alias="EntrypointKey", examples=["giga"])
32
- language_code: str = Field("ru", alias="LanguageCode", examples=["ru"])
33
-
34
- def create_id(self, short: bool = False) -> str:
35
- uid, sid, cid = self.user_id, self.session_id, self.client_id
36
- if short:
37
- return f"{uid}_{sid}_{cid}"
38
- return f"user_{uid}_session_{sid}_client_{cid}"
39
-
40
- def to_dict(self) -> dict[str, Any]:
41
- return self.model_dump(by_alias=True)
42
-
43
-
44
- LABELS = {
45
- 0: "OK",
46
- 1: "NON_MED",
47
- 2: "CHILD",
48
- 3: "ABSURD",
49
- 4: "GREETING",
50
- 5: "RECEIPT",
51
- }
52
-
53
-
54
- def fix_deprecated_moderation(moderation):
55
- if isinstance(moderation, int):
56
- return LABELS.get(moderation, "OK")
57
- elif isinstance(moderation, str):
58
- return moderation
59
- else:
60
- raise ValueError(f"Unsupported moderation: {moderation} :: {type(moderation)}")
61
-
62
-
63
- def nullify_empty(text: str) -> str | None:
64
- return text or None
65
-
66
-
67
- class ReplicaItem(Base):
68
- body: str = Field("", alias="Body", examples=["Привет"])
69
- resource_id: Annotated[str | None, AfterValidator(nullify_empty)] = Field(
70
- None, alias="ResourceId", examples=["<link-id>"]
71
- )
72
- widget: Widget | None = Field(None, alias="Widget", examples=[None])
73
- command: dict | None = Field(None, alias="Command", examples=[None])
74
- role: bool = Field(False, alias="Role", description="True = ai, False = client", examples=[False])
75
- date_time: str = Field(
76
- default_factory=now_pretty, alias="DateTime", examples=[_EXAMPLE_DT], description=f"Format: {_DT_FORMAT}"
77
- )
78
- state: str = Field("", alias="State", description="chat manager fsm state", examples=["COLLECTION"])
79
- action: str = Field("", alias="Action", description="chat manager fsm action", examples=["DIAGNOSIS"])
80
- # todo fix: support loading from `moderation: int`
81
- moderation: Annotated[str, BeforeValidator(str)] = Field(
82
- "OK", alias="Moderation", description="moderation outcome", examples=["OK"]
83
- )
84
- extra: dict | None = Field(None, alias="Extra", examples=[None])
85
-
86
- def to_dict(self) -> dict[str, Any]:
87
- return self.model_dump(by_alias=True)
88
-
89
- @staticmethod
90
- def DATETIME_FORMAT() -> str:
91
- return _DT_FORMAT
92
-
93
- def with_now_datetime(self):
94
- return self.model_copy(update=dict(date_time=now_pretty()))
95
-
96
- @property
97
- def is_ai(self):
98
- return self.role
99
-
100
- @property
101
- def is_human(self):
102
- return not self.role
103
-
104
- def modify_text(self, callback: Callable[[str], str]) -> "ReplicaItem":
105
- body_upd = callback(self.body)
106
- return self.model_copy(update=dict(body=body_upd))
107
-
108
-
109
- class InnerContextItem(Base):
110
- replicas: list[ReplicaItem] = Field(alias="Replicas")
111
- attrs: dict[str, str | int] | None = Field(default={}, alias="Attrs")
112
-
113
- def to_dict(self) -> dict[str, list]:
114
- return self.model_dump(by_alias=True)
115
-
116
-
117
- class ChatItem(Base):
118
- outer_context: OuterContextItem = Field(alias="OuterContext")
119
- inner_context: InnerContextItem = Field(alias="InnerContext")
120
-
121
- def create_id(self, short: bool = False) -> str:
122
- return self.outer_context.create_id(short)
123
-
124
- def to_dict(self) -> dict[str, Any]:
125
- return self.model_dump(by_alias=True)
126
-
127
- def add_replica(self, replica: ReplicaItem):
128
- self.inner_context.replicas.append(replica)
129
-
130
- def add_replicas(self, replicas: list[ReplicaItem]):
131
- for replica in replicas:
132
- self.inner_context.replicas.append(replica)
133
-
134
- def replace_replicas(self, replicas: list[ReplicaItem]):
135
- return self.model_copy(update=dict(inner_context=InnerContextItem(replicas=replicas)))
136
-
137
- def get_last_state(self, default: str = "empty") -> str:
138
- replicas = self.inner_context.replicas
139
- for ii in range(len(replicas) - 1, -1, -1):
140
- replica = replicas[ii]
141
- if replica.role:
142
- return replica.state
143
- return default
144
-
145
- def zip_history(self, field: str) -> list[Any]:
146
- return [replica.to_dict().get(field, None) for replica in self.inner_context.replicas]
147
-
148
- @classmethod
149
- def parse(cls, chat_obj: str | dict) -> "ChatItem":
150
- return _parse_chat_item(chat_obj)
151
-
152
-
153
- def _parse_chat_item(chat_obj: str | dict) -> ChatItem:
154
- if isinstance(chat_obj, dict):
155
- return ChatItem.model_validate(chat_obj)
156
-
157
- return ChatItem.model_validate_json(chat_obj)
@@ -1,17 +0,0 @@
1
- mmar_mapi/__init__.py,sha256=hhLEK5u9NL1Du3X1M0SzmMPYYq5FIUcbiVNL2_jCZYc,1084
2
- mmar_mapi/api.py,sha256=C9Sr8dISvf51xfEznPjccI_odaG4coQE3HI_0jVpjMQ,1677
3
- mmar_mapi/file_storage.py,sha256=GbahBabBdAKjlAnv1MszERUxxZyA9HGMiR9tz2a9dgY,4409
4
- mmar_mapi/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- mmar_mapi/models/base.py,sha256=mKtXV2x51XVj7W-et9tjGcPMDUUUMelW-BywMgFc2p0,411
6
- mmar_mapi/models/chat.py,sha256=IsIYoHY3Taxnfn-Audeml-WLGIbJgKr_-Arg-ZA8FIE,13293
7
- mmar_mapi/models/chat_item.py,sha256=ZfCKvTqr7gpuJSAuHVxWRnlTefRwki_IVNA2N_CXGdg,5557
8
- mmar_mapi/models/enums.py,sha256=J-GNpql9MCnKnWiV9aJRQGI-pAybvV86923RZs99grA,1006
9
- mmar_mapi/models/tracks.py,sha256=HKDp-BX1p7AlDfSEKfOKCu0TRSK9cD4Dmq1vJt8oRjw,307
10
- mmar_mapi/models/widget.py,sha256=pQHiOukNLzsrz5lr5ptMeARPxSzJhJnijA0rVpVSIhk,1108
11
- mmar_mapi/type_union.py,sha256=diwmzcnbqkpGFckPHNw9o8zyQ955mOGNvhTlcBJ0RMI,1905
12
- mmar_mapi/utils.py,sha256=hcKJVslvTBLw2vjZ9zcKZxh_tqk48obHcVs_i3Rxn3M,112
13
- mmar_mapi/xml_parser.py,sha256=VvLIX_XCZao9i0qqpTVx8nx0vbFXSe8pEbdJdXnj97g,568
14
- mmar_mapi-1.0.7.dist-info/licenses/LICENSE,sha256=2A90w8WjhOgQXnFuUijKJYazaqZ4_NTokYb9Po4y-9k,1061
15
- mmar_mapi-1.0.7.dist-info/WHEEL,sha256=Pi5uDq5Fdo_Rr-HD5h9BiPn9Et29Y9Sh8NhcJNnFU1c,79
16
- mmar_mapi-1.0.7.dist-info/METADATA,sha256=GnTR9S4PdQJak3j-3tIJ61xrA7bU1AjubCe-vRWkL3I,914
17
- mmar_mapi-1.0.7.dist-info/RECORD,,