mmar-mapi 1.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 AIRI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,29 @@
1
+ Metadata-Version: 2.4
2
+ Name: mmar-mapi
3
+ Version: 1.2.6
4
+ Summary: Common pure/IO utilities for multi-modal architectures team
5
+ Keywords:
6
+ Author: Eugene Tagin
7
+ Author-email: Eugene Tagin <tagin@airi.net>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Classifier: Topic :: Documentation
19
+ Classifier: Topic :: Software Development
20
+ Classifier: Topic :: Utilities
21
+ Classifier: Typing :: Typed
22
+ Requires-Dist: pydantic~=2.11.7
23
+ Requires-Dist: loguru~=0.7.3
24
+ Requires-Python: >=3.11
25
+ Description-Content-Type: text/markdown
26
+
27
+ # mmar-mapi
28
+
29
+ Multimodal architectures Maestro API
@@ -0,0 +1,3 @@
1
+ # mmar-mapi
2
+
3
+ Multimodal architectures Maestro API
@@ -0,0 +1,78 @@
1
+ [project]
2
+ name = "mmar-mapi"
3
+ # dynamic version is not supported yet on uv_build
4
+ version = "1.2.6"
5
+ description = "Common pure/IO utilities for multi-modal architectures team"
6
+ authors = [{name = "Eugene Tagin", email = "tagin@airi.net"}]
7
+ license = "MIT"
8
+ license-files = ["LICENSE"]
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ keywords = []
12
+ classifiers = [
13
+ "Development Status :: 4 - Beta",
14
+ "Intended Audience :: Developers",
15
+ "Programming Language :: Python",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3 :: Only",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ "Programming Language :: Python :: 3.14",
21
+ "Topic :: Documentation",
22
+ "Topic :: Software Development",
23
+ "Topic :: Utilities",
24
+ "Typing :: Typed",
25
+ ]
26
+ dependencies = [
27
+ "pydantic~=2.11.7",
28
+ "loguru~=0.7.3",
29
+ ]
30
+
31
+ [build-system]
32
+ requires = ["uv_build>=0.8.14,<0.9.0"]
33
+ build-backend = "uv_build"
34
+
35
+ [tool.uv.build-backend]
36
+ module-name = "mmar_mapi"
37
+ source-exclude = [".ruff_cache"]
38
+
39
+ [dependency-groups]
40
+ maintain = [
41
+ "build>=1.2",
42
+ "git-changelog>=2.5",
43
+ "twine>=5.1",
44
+ "yore>=0.3.3",
45
+ ]
46
+ ci = [
47
+ "ruff>=0.4",
48
+ "pytest>=8.2",
49
+ "pytest-cov>=5.0",
50
+ "pytest-randomly>=3.15",
51
+ "pytest-asyncio>=1.0.0",
52
+ "mypy>=1.10",
53
+ "types-markdown>=3.6",
54
+ "types-pyyaml>=6.0",
55
+ ]
56
+
57
+ [tool.uv]
58
+ default-groups = ["maintain", "ci"]
59
+
60
+ [tool.ruff]
61
+ line-length = 120
62
+
63
+ [tool.pytest.ini_options]
64
+ asyncio_mode = "auto"
65
+
66
+ [pytest]
67
+ pythonpath = "src"
68
+ python_files = [ "test_*.py" ]
69
+ testpaths = [ "tests" ]
70
+
71
+ # action:message_regex:warning_class:module_regex:line
72
+ filterwarnings = [ "error" ]
73
+
74
+ [mypy]
75
+ ignore_missing_imports = true
76
+ exclude = "tests/fixtures/"
77
+ warn_unused_ignores = true
78
+ show_error_codes = true
@@ -0,0 +1,47 @@
1
+ from .file_storage import FileStorage, ResourceId
2
+ from .models.base import Base
3
+ from .models.chat import (
4
+ Chat,
5
+ Context,
6
+ ChatMessage,
7
+ AIMessage,
8
+ HumanMessage,
9
+ MiscMessage,
10
+ make_content,
11
+ Content,
12
+ BaseMessage,
13
+ )
14
+ from .models.enums import MTRSLabelEnum, DiagnosticsXMLTagEnum, MTRSXMLTagEnum, DoctorChoiceXMLTagEnum
15
+ from .models.tracks import TrackInfo, DomainInfo
16
+ from .models.widget import Widget
17
+ from .utils import make_session_id, chunked
18
+ from .xml_parser import XMLParser
19
+ from .utils_import import load_main_objects
20
+ from .decorators_maybe_lru_cache import maybe_lru_cache
21
+
22
+ __all__ = [
23
+ "AIMessage",
24
+ "Base",
25
+ "BaseMessage",
26
+ "Chat",
27
+ "ChatMessage",
28
+ "Content",
29
+ "Context",
30
+ "DiagnosticsXMLTagEnum",
31
+ "DoctorChoiceXMLTagEnum",
32
+ "DomainInfo",
33
+ "FileStorage",
34
+ "HumanMessage",
35
+ "MTRSLabelEnum",
36
+ "MTRSXMLTagEnum",
37
+ "MiscMessage",
38
+ "ResourceId",
39
+ "TrackInfo",
40
+ "Widget",
41
+ "XMLParser",
42
+ "chunked",
43
+ "load_main_objects",
44
+ "make_content",
45
+ "make_session_id",
46
+ "maybe_lru_cache",
47
+ ]
@@ -0,0 +1,134 @@
1
+ from pydantic import BaseModel
2
+
3
+ from mmar_mapi.api.document_extractor import (
4
+ DocExtractionOutput,
5
+ DocExtractionSpec,
6
+ DocumentExtractorAPI,
7
+ ExtractedImage,
8
+ ExtractedImageMetadata,
9
+ ExtractedPageImage,
10
+ ExtractedPicture,
11
+ ExtractedTable,
12
+ ExtractionEngineSpec,
13
+ ForceOCR,
14
+ OutputType,
15
+ PageRange,
16
+ )
17
+ from mmar_mapi.api.llm_accessor import (
18
+ LCP,
19
+ RESPONSE_EMPTY,
20
+ Attachments,
21
+ LLMAccessorAPI,
22
+ LLMCallProps,
23
+ Message,
24
+ Messages,
25
+ Payload,
26
+ Request,
27
+ ResponseExt,
28
+ EntrypointInfo,
29
+ EntrypointsConfig,
30
+ )
31
+ from mmar_mapi.models.chat import Chat, ChatMessage
32
+ from mmar_mapi.models.tracks import DomainInfo, TrackInfo
33
+
34
+ # variable to prevent removing unused imports
35
+ __imported__ = [
36
+ # llm_accessor
37
+ LLMCallProps,
38
+ LCP,
39
+ Attachments,
40
+ Message,
41
+ Messages,
42
+ Payload,
43
+ Request,
44
+ ResponseExt,
45
+ RESPONSE_EMPTY,
46
+ LLMAccessorAPI,
47
+ # document_extractor
48
+ PageRange,
49
+ ForceOCR,
50
+ OutputType,
51
+ ExtractionEngineSpec,
52
+ DocExtractionSpec,
53
+ ExtractedImage,
54
+ ExtractedImageMetadata,
55
+ ExtractedPicture,
56
+ ExtractedTable,
57
+ ExtractedPageImage,
58
+ DocExtractionOutput,
59
+ DocumentExtractorAPI,
60
+ EntrypointInfo,
61
+ EntrypointsConfig,
62
+ ]
63
+
64
+
65
+ Interpretation = str
66
+ ResourceId = str
67
+
68
+
69
+ class ChatManagerAPI:
70
+ def get_domains(self, *, client_id: str, language_code: str = "ru") -> list[DomainInfo]:
71
+ raise NotImplementedError
72
+
73
+ def get_tracks(self, *, client_id: str, language_code: str = "ru") -> list[TrackInfo]:
74
+ raise NotImplementedError
75
+
76
+ def get_response(self, *, chat: Chat) -> list[ChatMessage]:
77
+ raise NotImplementedError
78
+
79
+
80
+ class TextGeneratorAPI:
81
+ def process(self, *, chat: Chat) -> str:
82
+ raise NotImplementedError
83
+
84
+
85
+ class ContentInterpreterRemoteResponse(BaseModel):
86
+ interpretation: str
87
+ resource_fname: str
88
+ resource: bytes
89
+
90
+
91
+ class ContentInterpreterRemoteAPI:
92
+ def interpret_remote(
93
+ self, *, kind: str, query: str, resource: bytes, chat: Chat | None = None
94
+ ) -> ContentInterpreterRemoteResponse:
95
+ raise NotImplementedError
96
+
97
+
98
+ class BinaryClassifiersAPI:
99
+ def get_classifiers(self) -> list[str]:
100
+ raise NotImplementedError
101
+
102
+ def evaluate(self, *, classifier: str | None = None, text: str) -> bool:
103
+ raise NotImplementedError
104
+
105
+
106
+ class TranslatorAPI:
107
+ def get_lang_codes(self) -> list[str]:
108
+ raise NotImplementedError
109
+
110
+ def translate(self, *, text: str, lang_code_from: str | None = None, lang_code_to: str) -> str:
111
+ raise NotImplementedError
112
+
113
+
114
+ class CriticAPI:
115
+ def evaluate(self, *, text: str, chat: Chat | None = None) -> float: # TODO replace float with bool
116
+ raise NotImplementedError
117
+
118
+
119
+ class ContentInterpreterAPI:
120
+ def interpret(
121
+ self, *, kind: str, query: str, resource_id: str = "", chat: Chat | None = None
122
+ ) -> tuple[Interpretation, ResourceId | None]:
123
+ raise NotImplementedError
124
+
125
+
126
+ class TextProcessorAPI:
127
+ def process(self, *, text: str, chat: Chat | None = None) -> str:
128
+ raise NotImplementedError
129
+
130
+
131
+ class TextExtractorAPI:
132
+ def extract(self, *, resource_id: ResourceId) -> ResourceId:
133
+ """returns file with text"""
134
+ raise NotImplementedError
@@ -0,0 +1,95 @@
1
+ from enum import StrEnum
2
+ from typing import Annotated
3
+
4
+ from pydantic import AfterValidator, BaseModel
5
+
6
+
7
+ def _validate_page_range(v: tuple[int, int]) -> tuple[int, int]:
8
+ if v[0] < 1 or v[1] < v[0]:
9
+ raise ValueError("Invalid page range: start must be ≥ 1 and end must be ≥ start.")
10
+ return v
11
+
12
+
13
+ PageRange = Annotated[tuple[int, int], AfterValidator(_validate_page_range)]
14
+ ForceOCR = StrEnum("ForceOCR", ["ENABLED", "DISABLED", "AUTO"])
15
+ OutputType = StrEnum("OutputType", ["RAW", "PLAIN", "MARKDOWN"])
16
+ ResourceId = str
17
+
18
+
19
+ class ExtractionEngineSpec(BaseModel, frozen=True):
20
+ output_type: OutputType = OutputType.MARKDOWN
21
+ force_ocr: ForceOCR = ForceOCR.AUTO
22
+ do_ocr: bool = False
23
+ do_table_structure: bool = False
24
+ do_cell_matching: bool = False
25
+ do_annotations: bool = False
26
+ do_image_extraction: bool = False
27
+ generate_page_images: bool = False
28
+ images_scale: float = 2.0
29
+
30
+
31
+ class DocExtractionSpec(BaseModel, frozen=True):
32
+ page_range: PageRange | None = None
33
+ engine: ExtractionEngineSpec = ExtractionEngineSpec()
34
+
35
+ def _update(self, **update):
36
+ return self.model_copy(update=update)
37
+
38
+ def _update_engine(self, **engine_update):
39
+ return self._update(engine=self.engine.model_copy(update=engine_update))
40
+
41
+ # fmt: off
42
+ def with_output_type_raw(self): return self._update_engine(output_type=OutputType.RAW)
43
+ def with_output_type_plain(self): return self._update_engine(output_type=OutputType.PLAIN)
44
+ def with_ocr(self): return self._update_engine(do_ocr=True)
45
+ def with_tables(self): return self._update_engine(do_table_structure=True, do_cell_matching=True)
46
+ def with_images(self): return self._update_engine(do_image_extraction=True)
47
+ def with_annotations(self): return self._update_engine(do_annotations=True)
48
+ def with_force_ocr_enabled(self): return self._update_engine(force_ocr=ForceOCR.ENABLED)
49
+ def with_force_ocr_disabled(self): return self._update_engine(force_ocr=ForceOCR.DISABLED)
50
+ def with_page_images(self): return self._update_engine(generate_page_images=True)
51
+
52
+ def with_page_range(self, page_range: PageRange): return self._update(page_range=page_range)
53
+ # fmt: on
54
+
55
+
56
+ class ExtractedImage(BaseModel):
57
+ page: int
58
+ image_resource_id: ResourceId | None = None
59
+
60
+
61
+ class ExtractedImageMetadata(BaseModel):
62
+ annotation: str = ""
63
+ caption: str = ""
64
+ width: int | None = None
65
+ height: int | None = None
66
+
67
+
68
+ class ExtractedPicture(ExtractedImage, ExtractedImageMetadata):
69
+ "Image of part of page"
70
+
71
+ ...
72
+
73
+
74
+ class ExtractedTable(ExtractedImage, ExtractedImageMetadata):
75
+ formatted_str: str
76
+
77
+
78
+ class ExtractedPageImage(ExtractedImage):
79
+ "Image of all page"
80
+
81
+ pass
82
+
83
+
84
+ class DocExtractionOutput(BaseModel):
85
+ spec: DocExtractionSpec
86
+ text: str = ""
87
+ tables: list[ExtractedTable] = []
88
+ pictures: list[ExtractedPicture] = []
89
+ page_images: list[ExtractedPageImage] = []
90
+
91
+
92
+ class DocumentExtractorAPI:
93
+ def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId | None:
94
+ """returns file with DocExtractionOutput"""
95
+ raise NotImplementedError
@@ -0,0 +1,93 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from mmar_mapi import ChatMessage
6
+
7
+
8
+ class EntrypointInfo(BaseModel):
9
+ entrypoint_key: str
10
+ caption: str
11
+
12
+ class EntrypointsConfig(BaseModel):
13
+ entrypoints: list[EntrypointInfo]
14
+ default_entrypoint_key: str
15
+
16
+
17
+ class LLMCallProps(BaseModel, frozen=True):
18
+ entrypoint_key: str = ""
19
+ attempts: int = 1
20
+
21
+
22
+ LCP = LLMCallProps()
23
+ ResourceId = str
24
+ FileId = str
25
+ Attachments = list[list[ResourceId]]
26
+
27
+
28
+ class Message(BaseModel, frozen=True):
29
+ role: Literal["system", "assistant", "user"]
30
+ content: str
31
+
32
+ @staticmethod
33
+ def create(message: ChatMessage) -> "Message":
34
+ return _create_message(message=message)
35
+
36
+
37
+ def _create_message(message: ChatMessage) -> Message | None:
38
+ role = "assistant" if message.is_ai else "user" if message.is_human else None
39
+ return Message(role=role, content=message.text) if role else None
40
+
41
+
42
+ class Messages(BaseModel, frozen=True):
43
+ messages: list[Message]
44
+
45
+
46
+ class Payload(Messages, frozen=True):
47
+ attachments: Attachments | None = None
48
+
49
+ def with_attachments(self, attachments: Attachments) -> "Payload":
50
+ return self.model_copy(update=dict(attachments=attachments))
51
+
52
+ def __repr__(self):
53
+ parts = [f"messages: {len(self.messages)}", self.attachments and "has attachments"]
54
+ payload_pretty = ", ".join(filter(None, parts))
55
+ return f"Payload({payload_pretty})"
56
+
57
+ @staticmethod
58
+ def create(user_text: str, resource_id: ResourceId = "") -> "Payload":
59
+ return _create_payload(user_text=user_text, resource_id=resource_id)
60
+
61
+
62
+ def _create_payload(user_text: str, resource_id: ResourceId = ""):
63
+ payload = Payload(messages=[Message(role="user", content=user_text)])
64
+ if not resource_id:
65
+ return payload
66
+ else:
67
+ return payload.with_attachments(attachments=[[resource_id]])
68
+
69
+
70
+ class ResponseExt(BaseModel):
71
+ text: str
72
+ resource_id: ResourceId | None = None
73
+
74
+
75
+ RESPONSE_EMPTY = ResponseExt(text="")
76
+ Request = str | Messages | Payload
77
+
78
+
79
+ class LLMAccessorAPI:
80
+ def get_entrypoints_config(self) -> EntrypointsConfig:
81
+ raise NotImplementedError
82
+
83
+ def get_entrypoint_keys(self) -> list[str]:
84
+ raise NotImplementedError
85
+
86
+ def get_response(self, *, request: Request, props: LLMCallProps = LCP) -> str:
87
+ raise NotImplementedError
88
+
89
+ def get_response_ext(self, *, request: Request, props: LLMCallProps = LCP) -> ResponseExt:
90
+ raise NotImplementedError
91
+
92
+ def get_embedding(self, *, prompt: str, props: LLMCallProps = LCP) -> list[float] | None:
93
+ raise NotImplementedError
@@ -0,0 +1,14 @@
1
+ from collections.abc import Callable
2
+ from functools import lru_cache
3
+
4
+ from loguru import logger
5
+
6
+
7
+ def maybe_lru_cache(maxsize: int, func: Callable) -> tuple[str, Callable]:
8
+ if maxsize >= 0:
9
+ maxsize = maxsize or None
10
+ logger.info(f"Caching for {func.__name__}: enabled: maxsize={maxsize}")
11
+ func = lru_cache(maxsize=maxsize)(func)
12
+ else:
13
+ logger.info(f"Caching for {func.__name__}: disabled")
14
+ return func
@@ -0,0 +1,141 @@
1
+ import json
2
+ import string
3
+ from datetime import datetime
4
+ from hashlib import md5
5
+ from pathlib import Path
6
+ from zipfile import ZipFile, is_zipfile
7
+
8
+ ResourceId = str
9
+ ASCII_DIGITS_SPECIAL = set(string.ascii_lowercase + string.digits + "-")
10
+ SUFFIX_DIR = ".dir"
11
+ SUFFIX_METADATA = ".metadata"
12
+
13
+
14
+ def _validate_exist(files_dir):
15
+ if not files_dir.exists():
16
+ err = f"Failed to access file-storage directory: {files_dir}"
17
+ raise OSError(err)
18
+
19
+
20
+ def _validate_dtype(dtype: str):
21
+ if all(map(ASCII_DIGITS_SPECIAL.__contains__, dtype)):
22
+ return
23
+ raise ValueError(f"Bad dtype: {dtype}")
24
+
25
+
26
+ def generate_fname(content, dtype):
27
+ fname_hash = md5(content).hexdigest()
28
+ fname = f"{fname_hash}.{dtype}"
29
+ return fname
30
+
31
+
32
+ class FileStorage:
33
+ def __init__(self, files_dir):
34
+ self.files_dir = Path(files_dir)
35
+ self.files_dir.mkdir(exist_ok=True, parents=True)
36
+ _validate_exist(self.files_dir)
37
+
38
+ def _generate_fname_path(self, content: bytes, dtype: str):
39
+ fpath = self.files_dir / generate_fname(content, dtype)
40
+ return fpath
41
+
42
+ def upload_maybe(self, content: bytes | str | None, fname: str) -> ResourceId | None:
43
+ if not content:
44
+ return None
45
+ resource_id = self.upload(content, fname)
46
+ return resource_id
47
+
48
+ def upload(self, content: bytes | str, fname: str, origin: str | None = None) -> ResourceId:
49
+ if isinstance(content, str):
50
+ content = content.encode()
51
+
52
+ dtype = fname.rsplit(".", 1)[-1]
53
+ _validate_dtype(dtype)
54
+ fpath = self._generate_fname_path(content, dtype)
55
+ fpath.write_bytes(content)
56
+
57
+ fpath_md = fpath.with_suffix(SUFFIX_METADATA)
58
+ update_date = f"{datetime.now():%Y-%m-%d--%H-%M-%S}"
59
+ metadata = {"fname": fname, "update_date": update_date, "size": len(content), "origin": origin}
60
+ fpath_md.write_text(json.dumps(metadata, ensure_ascii=False))
61
+
62
+ return str(fpath)
63
+
64
+ def get_metadata(self, resource_id: ResourceId) -> dict | None:
65
+ metadata_path = Path(resource_id).with_suffix(SUFFIX_METADATA)
66
+ if not metadata_path.exists():
67
+ return None
68
+ return json.loads(metadata_path.read_text())
69
+
70
+ def get_fname(self, resource_id: ResourceId) -> str | None:
71
+ metadata = self.get_metadata(resource_id)
72
+ if metadata is None:
73
+ return None
74
+ return metadata.get("fname")
75
+
76
+ async def upload_async(self, content: bytes | str, fname: str) -> ResourceId:
77
+ return self.upload(content, fname)
78
+
79
+ def upload_dir(self, resource_ids: list[ResourceId]) -> ResourceId:
80
+ content = "\n".join(resource_ids)
81
+ res = self.upload(content=content, fname=".dir")
82
+ return res
83
+
84
+ def download(self, resource_id: ResourceId) -> bytes:
85
+ return Path(resource_id).read_bytes()
86
+
87
+ async def download_async(self, resource_id: ResourceId) -> bytes:
88
+ return self.download(resource_id)
89
+
90
+ def download_text(self, resource_id: ResourceId) -> str:
91
+ return Path(resource_id).read_text(encoding="utf-8")
92
+
93
+ def read_dir_or_none(self, resource_id: ResourceId) -> list[ResourceId] | None:
94
+ if not self.is_dir(resource_id):
95
+ return None
96
+ res = self.download_text(resource_id).split("\n")
97
+ return res
98
+
99
+ def get_path(self, resource_id: ResourceId | None) -> Path | None:
100
+ return self._get_path(resource_id)
101
+
102
+ def _get_path(self, resource_id: ResourceId | None) -> Path | None:
103
+ if not resource_id:
104
+ return None
105
+ path = Path(resource_id)
106
+ return path if (path.exists() and path.is_file()) else None
107
+
108
+ def is_valid(self, resource_id: ResourceId | None) -> bool:
109
+ path = self._get_path(resource_id)
110
+ return path is not None
111
+
112
+ def is_file(self, resource_id: ResourceId | None) -> bool:
113
+ path = self._get_path(resource_id)
114
+ return bool(path and path.suffix != SUFFIX_DIR)
115
+
116
+ def is_dir(self, resource_id: ResourceId | None) -> bool:
117
+ path = self._get_path(resource_id)
118
+ return bool(path and path.suffix == SUFFIX_DIR)
119
+
120
+ def get_dtype(self, resource_id: ResourceId | None) -> str | None:
121
+ return resource_id and resource_id.rsplit(".")[-1].lower()
122
+
123
+ def unzip_file(self, resource_id: str) -> ResourceId:
124
+ """takes resource_id which refer to zip-archive, unpacks it and returns directory ResourceId with content of zip-archive"""
125
+ path = self._get_path(resource_id)
126
+ if not path:
127
+ raise ValueError(f"Not found path: {resource_id}")
128
+ if not is_zipfile(resource_id):
129
+ raise ValueError(f"Expected zip archive but found: {resource_id}")
130
+
131
+ resource_ids = []
132
+
133
+ with ZipFile(path, mode="r") as zip_file:
134
+ for file_info in zip_file.filelist:
135
+ file_dtype = file_info.filename.rsplit(".")[-1]
136
+ file_bytes = zip_file.read(file_info)
137
+ rid = self.upload(file_bytes, file_dtype)
138
+ resource_ids.append(rid)
139
+
140
+ res = self.upload_dir(resource_ids)
141
+ return res
File without changes
@@ -0,0 +1,15 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ from pydantic import ConfigDict, BaseModel, model_validator
5
+
6
+
7
+ class Base(BaseModel):
8
+ model_config = ConfigDict(populate_by_name=True, str_strip_whitespace=True)
9
+
10
+ @model_validator(mode="before")
11
+ @classmethod
12
+ def validate_to_json(cls, value: str | Any) -> Any:
13
+ if isinstance(value, str):
14
+ return cls(**json.loads(value))
15
+ return value