mmar-mapi 1.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mmar_mapi-1.2.6/LICENSE +21 -0
- mmar_mapi-1.2.6/PKG-INFO +29 -0
- mmar_mapi-1.2.6/README.md +3 -0
- mmar_mapi-1.2.6/pyproject.toml +78 -0
- mmar_mapi-1.2.6/src/mmar_mapi/__init__.py +47 -0
- mmar_mapi-1.2.6/src/mmar_mapi/api/__init__.py +134 -0
- mmar_mapi-1.2.6/src/mmar_mapi/api/document_extractor.py +95 -0
- mmar_mapi-1.2.6/src/mmar_mapi/api/llm_accessor.py +93 -0
- mmar_mapi-1.2.6/src/mmar_mapi/decorators_maybe_lru_cache.py +14 -0
- mmar_mapi-1.2.6/src/mmar_mapi/file_storage.py +141 -0
- mmar_mapi-1.2.6/src/mmar_mapi/models/__init__.py +0 -0
- mmar_mapi-1.2.6/src/mmar_mapi/models/base.py +15 -0
- mmar_mapi-1.2.6/src/mmar_mapi/models/chat.py +361 -0
- mmar_mapi-1.2.6/src/mmar_mapi/models/enums.py +40 -0
- mmar_mapi-1.2.6/src/mmar_mapi/models/tracks.py +14 -0
- mmar_mapi-1.2.6/src/mmar_mapi/models/widget.py +42 -0
- mmar_mapi-1.2.6/src/mmar_mapi/py.typed +0 -0
- mmar_mapi-1.2.6/src/mmar_mapi/tracks.py +111 -0
- mmar_mapi-1.2.6/src/mmar_mapi/type_union.py +63 -0
- mmar_mapi-1.2.6/src/mmar_mapi/utils.py +18 -0
- mmar_mapi-1.2.6/src/mmar_mapi/utils_import.py +47 -0
- mmar_mapi-1.2.6/src/mmar_mapi/xml_parser.py +18 -0
mmar_mapi-1.2.6/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 AIRI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
mmar_mapi-1.2.6/PKG-INFO
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mmar-mapi
|
|
3
|
+
Version: 1.2.6
|
|
4
|
+
Summary: Common pure/IO utilities for multi-modal architectures team
|
|
5
|
+
Keywords:
|
|
6
|
+
Author: Eugene Tagin
|
|
7
|
+
Author-email: Eugene Tagin <tagin@airi.net>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Classifier: Topic :: Documentation
|
|
19
|
+
Classifier: Topic :: Software Development
|
|
20
|
+
Classifier: Topic :: Utilities
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Dist: pydantic~=2.11.7
|
|
23
|
+
Requires-Dist: loguru~=0.7.3
|
|
24
|
+
Requires-Python: >=3.11
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# mmar-mapi
|
|
28
|
+
|
|
29
|
+
Multimodal architectures Maestro API
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mmar-mapi"
|
|
3
|
+
# dynamic version is not supported yet on uv_build
|
|
4
|
+
version = "1.2.6"
|
|
5
|
+
description = "Common pure/IO utilities for multi-modal architectures team"
|
|
6
|
+
authors = [{name = "Eugene Tagin", email = "tagin@airi.net"}]
|
|
7
|
+
license = "MIT"
|
|
8
|
+
license-files = ["LICENSE"]
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
keywords = []
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"Programming Language :: Python",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
"Programming Language :: Python :: 3.14",
|
|
21
|
+
"Topic :: Documentation",
|
|
22
|
+
"Topic :: Software Development",
|
|
23
|
+
"Topic :: Utilities",
|
|
24
|
+
"Typing :: Typed",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"pydantic~=2.11.7",
|
|
28
|
+
"loguru~=0.7.3",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[build-system]
|
|
32
|
+
requires = ["uv_build>=0.8.14,<0.9.0"]
|
|
33
|
+
build-backend = "uv_build"
|
|
34
|
+
|
|
35
|
+
[tool.uv.build-backend]
|
|
36
|
+
module-name = "mmar_mapi"
|
|
37
|
+
source-exclude = [".ruff_cache"]
|
|
38
|
+
|
|
39
|
+
[dependency-groups]
|
|
40
|
+
maintain = [
|
|
41
|
+
"build>=1.2",
|
|
42
|
+
"git-changelog>=2.5",
|
|
43
|
+
"twine>=5.1",
|
|
44
|
+
"yore>=0.3.3",
|
|
45
|
+
]
|
|
46
|
+
ci = [
|
|
47
|
+
"ruff>=0.4",
|
|
48
|
+
"pytest>=8.2",
|
|
49
|
+
"pytest-cov>=5.0",
|
|
50
|
+
"pytest-randomly>=3.15",
|
|
51
|
+
"pytest-asyncio>=1.0.0",
|
|
52
|
+
"mypy>=1.10",
|
|
53
|
+
"types-markdown>=3.6",
|
|
54
|
+
"types-pyyaml>=6.0",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
[tool.uv]
|
|
58
|
+
default-groups = ["maintain", "ci"]
|
|
59
|
+
|
|
60
|
+
[tool.ruff]
|
|
61
|
+
line-length = 120
|
|
62
|
+
|
|
63
|
+
[tool.pytest.ini_options]
|
|
64
|
+
asyncio_mode = "auto"
|
|
65
|
+
|
|
66
|
+
[pytest]
|
|
67
|
+
pythonpath = "src"
|
|
68
|
+
python_files = [ "test_*.py" ]
|
|
69
|
+
testpaths = [ "tests" ]
|
|
70
|
+
|
|
71
|
+
# action:message_regex:warning_class:module_regex:line
|
|
72
|
+
filterwarnings = [ "error" ]
|
|
73
|
+
|
|
74
|
+
[mypy]
|
|
75
|
+
ignore_missing_imports = true
|
|
76
|
+
exclude = "tests/fixtures/"
|
|
77
|
+
warn_unused_ignores = true
|
|
78
|
+
show_error_codes = true
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from .file_storage import FileStorage, ResourceId
|
|
2
|
+
from .models.base import Base
|
|
3
|
+
from .models.chat import (
|
|
4
|
+
Chat,
|
|
5
|
+
Context,
|
|
6
|
+
ChatMessage,
|
|
7
|
+
AIMessage,
|
|
8
|
+
HumanMessage,
|
|
9
|
+
MiscMessage,
|
|
10
|
+
make_content,
|
|
11
|
+
Content,
|
|
12
|
+
BaseMessage,
|
|
13
|
+
)
|
|
14
|
+
from .models.enums import MTRSLabelEnum, DiagnosticsXMLTagEnum, MTRSXMLTagEnum, DoctorChoiceXMLTagEnum
|
|
15
|
+
from .models.tracks import TrackInfo, DomainInfo
|
|
16
|
+
from .models.widget import Widget
|
|
17
|
+
from .utils import make_session_id, chunked
|
|
18
|
+
from .xml_parser import XMLParser
|
|
19
|
+
from .utils_import import load_main_objects
|
|
20
|
+
from .decorators_maybe_lru_cache import maybe_lru_cache
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"AIMessage",
|
|
24
|
+
"Base",
|
|
25
|
+
"BaseMessage",
|
|
26
|
+
"Chat",
|
|
27
|
+
"ChatMessage",
|
|
28
|
+
"Content",
|
|
29
|
+
"Context",
|
|
30
|
+
"DiagnosticsXMLTagEnum",
|
|
31
|
+
"DoctorChoiceXMLTagEnum",
|
|
32
|
+
"DomainInfo",
|
|
33
|
+
"FileStorage",
|
|
34
|
+
"HumanMessage",
|
|
35
|
+
"MTRSLabelEnum",
|
|
36
|
+
"MTRSXMLTagEnum",
|
|
37
|
+
"MiscMessage",
|
|
38
|
+
"ResourceId",
|
|
39
|
+
"TrackInfo",
|
|
40
|
+
"Widget",
|
|
41
|
+
"XMLParser",
|
|
42
|
+
"chunked",
|
|
43
|
+
"load_main_objects",
|
|
44
|
+
"make_content",
|
|
45
|
+
"make_session_id",
|
|
46
|
+
"maybe_lru_cache",
|
|
47
|
+
]
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
from pydantic import BaseModel
|
|
2
|
+
|
|
3
|
+
from mmar_mapi.api.document_extractor import (
|
|
4
|
+
DocExtractionOutput,
|
|
5
|
+
DocExtractionSpec,
|
|
6
|
+
DocumentExtractorAPI,
|
|
7
|
+
ExtractedImage,
|
|
8
|
+
ExtractedImageMetadata,
|
|
9
|
+
ExtractedPageImage,
|
|
10
|
+
ExtractedPicture,
|
|
11
|
+
ExtractedTable,
|
|
12
|
+
ExtractionEngineSpec,
|
|
13
|
+
ForceOCR,
|
|
14
|
+
OutputType,
|
|
15
|
+
PageRange,
|
|
16
|
+
)
|
|
17
|
+
from mmar_mapi.api.llm_accessor import (
|
|
18
|
+
LCP,
|
|
19
|
+
RESPONSE_EMPTY,
|
|
20
|
+
Attachments,
|
|
21
|
+
LLMAccessorAPI,
|
|
22
|
+
LLMCallProps,
|
|
23
|
+
Message,
|
|
24
|
+
Messages,
|
|
25
|
+
Payload,
|
|
26
|
+
Request,
|
|
27
|
+
ResponseExt,
|
|
28
|
+
EntrypointInfo,
|
|
29
|
+
EntrypointsConfig,
|
|
30
|
+
)
|
|
31
|
+
from mmar_mapi.models.chat import Chat, ChatMessage
|
|
32
|
+
from mmar_mapi.models.tracks import DomainInfo, TrackInfo
|
|
33
|
+
|
|
34
|
+
# variable to prevent removing unused imports
|
|
35
|
+
__imported__ = [
|
|
36
|
+
# llm_accessor
|
|
37
|
+
LLMCallProps,
|
|
38
|
+
LCP,
|
|
39
|
+
Attachments,
|
|
40
|
+
Message,
|
|
41
|
+
Messages,
|
|
42
|
+
Payload,
|
|
43
|
+
Request,
|
|
44
|
+
ResponseExt,
|
|
45
|
+
RESPONSE_EMPTY,
|
|
46
|
+
LLMAccessorAPI,
|
|
47
|
+
# document_extractor
|
|
48
|
+
PageRange,
|
|
49
|
+
ForceOCR,
|
|
50
|
+
OutputType,
|
|
51
|
+
ExtractionEngineSpec,
|
|
52
|
+
DocExtractionSpec,
|
|
53
|
+
ExtractedImage,
|
|
54
|
+
ExtractedImageMetadata,
|
|
55
|
+
ExtractedPicture,
|
|
56
|
+
ExtractedTable,
|
|
57
|
+
ExtractedPageImage,
|
|
58
|
+
DocExtractionOutput,
|
|
59
|
+
DocumentExtractorAPI,
|
|
60
|
+
EntrypointInfo,
|
|
61
|
+
EntrypointsConfig,
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
Interpretation = str
|
|
66
|
+
ResourceId = str
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ChatManagerAPI:
|
|
70
|
+
def get_domains(self, *, client_id: str, language_code: str = "ru") -> list[DomainInfo]:
|
|
71
|
+
raise NotImplementedError
|
|
72
|
+
|
|
73
|
+
def get_tracks(self, *, client_id: str, language_code: str = "ru") -> list[TrackInfo]:
|
|
74
|
+
raise NotImplementedError
|
|
75
|
+
|
|
76
|
+
def get_response(self, *, chat: Chat) -> list[ChatMessage]:
|
|
77
|
+
raise NotImplementedError
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class TextGeneratorAPI:
|
|
81
|
+
def process(self, *, chat: Chat) -> str:
|
|
82
|
+
raise NotImplementedError
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class ContentInterpreterRemoteResponse(BaseModel):
|
|
86
|
+
interpretation: str
|
|
87
|
+
resource_fname: str
|
|
88
|
+
resource: bytes
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ContentInterpreterRemoteAPI:
|
|
92
|
+
def interpret_remote(
|
|
93
|
+
self, *, kind: str, query: str, resource: bytes, chat: Chat | None = None
|
|
94
|
+
) -> ContentInterpreterRemoteResponse:
|
|
95
|
+
raise NotImplementedError
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class BinaryClassifiersAPI:
|
|
99
|
+
def get_classifiers(self) -> list[str]:
|
|
100
|
+
raise NotImplementedError
|
|
101
|
+
|
|
102
|
+
def evaluate(self, *, classifier: str | None = None, text: str) -> bool:
|
|
103
|
+
raise NotImplementedError
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class TranslatorAPI:
|
|
107
|
+
def get_lang_codes(self) -> list[str]:
|
|
108
|
+
raise NotImplementedError
|
|
109
|
+
|
|
110
|
+
def translate(self, *, text: str, lang_code_from: str | None = None, lang_code_to: str) -> str:
|
|
111
|
+
raise NotImplementedError
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class CriticAPI:
|
|
115
|
+
def evaluate(self, *, text: str, chat: Chat | None = None) -> float: # TODO replace float with bool
|
|
116
|
+
raise NotImplementedError
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class ContentInterpreterAPI:
|
|
120
|
+
def interpret(
|
|
121
|
+
self, *, kind: str, query: str, resource_id: str = "", chat: Chat | None = None
|
|
122
|
+
) -> tuple[Interpretation, ResourceId | None]:
|
|
123
|
+
raise NotImplementedError
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class TextProcessorAPI:
|
|
127
|
+
def process(self, *, text: str, chat: Chat | None = None) -> str:
|
|
128
|
+
raise NotImplementedError
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class TextExtractorAPI:
|
|
132
|
+
def extract(self, *, resource_id: ResourceId) -> ResourceId:
|
|
133
|
+
"""returns file with text"""
|
|
134
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
2
|
+
from typing import Annotated
|
|
3
|
+
|
|
4
|
+
from pydantic import AfterValidator, BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _validate_page_range(v: tuple[int, int]) -> tuple[int, int]:
|
|
8
|
+
if v[0] < 1 or v[1] < v[0]:
|
|
9
|
+
raise ValueError("Invalid page range: start must be ≥ 1 and end must be ≥ start.")
|
|
10
|
+
return v
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
PageRange = Annotated[tuple[int, int], AfterValidator(_validate_page_range)]
|
|
14
|
+
ForceOCR = StrEnum("ForceOCR", ["ENABLED", "DISABLED", "AUTO"])
|
|
15
|
+
OutputType = StrEnum("OutputType", ["RAW", "PLAIN", "MARKDOWN"])
|
|
16
|
+
ResourceId = str
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ExtractionEngineSpec(BaseModel, frozen=True):
|
|
20
|
+
output_type: OutputType = OutputType.MARKDOWN
|
|
21
|
+
force_ocr: ForceOCR = ForceOCR.AUTO
|
|
22
|
+
do_ocr: bool = False
|
|
23
|
+
do_table_structure: bool = False
|
|
24
|
+
do_cell_matching: bool = False
|
|
25
|
+
do_annotations: bool = False
|
|
26
|
+
do_image_extraction: bool = False
|
|
27
|
+
generate_page_images: bool = False
|
|
28
|
+
images_scale: float = 2.0
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DocExtractionSpec(BaseModel, frozen=True):
|
|
32
|
+
page_range: PageRange | None = None
|
|
33
|
+
engine: ExtractionEngineSpec = ExtractionEngineSpec()
|
|
34
|
+
|
|
35
|
+
def _update(self, **update):
|
|
36
|
+
return self.model_copy(update=update)
|
|
37
|
+
|
|
38
|
+
def _update_engine(self, **engine_update):
|
|
39
|
+
return self._update(engine=self.engine.model_copy(update=engine_update))
|
|
40
|
+
|
|
41
|
+
# fmt: off
|
|
42
|
+
def with_output_type_raw(self): return self._update_engine(output_type=OutputType.RAW)
|
|
43
|
+
def with_output_type_plain(self): return self._update_engine(output_type=OutputType.PLAIN)
|
|
44
|
+
def with_ocr(self): return self._update_engine(do_ocr=True)
|
|
45
|
+
def with_tables(self): return self._update_engine(do_table_structure=True, do_cell_matching=True)
|
|
46
|
+
def with_images(self): return self._update_engine(do_image_extraction=True)
|
|
47
|
+
def with_annotations(self): return self._update_engine(do_annotations=True)
|
|
48
|
+
def with_force_ocr_enabled(self): return self._update_engine(force_ocr=ForceOCR.ENABLED)
|
|
49
|
+
def with_force_ocr_disabled(self): return self._update_engine(force_ocr=ForceOCR.DISABLED)
|
|
50
|
+
def with_page_images(self): return self._update_engine(generate_page_images=True)
|
|
51
|
+
|
|
52
|
+
def with_page_range(self, page_range: PageRange): return self._update(page_range=page_range)
|
|
53
|
+
# fmt: on
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ExtractedImage(BaseModel):
|
|
57
|
+
page: int
|
|
58
|
+
image_resource_id: ResourceId | None = None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ExtractedImageMetadata(BaseModel):
|
|
62
|
+
annotation: str = ""
|
|
63
|
+
caption: str = ""
|
|
64
|
+
width: int | None = None
|
|
65
|
+
height: int | None = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ExtractedPicture(ExtractedImage, ExtractedImageMetadata):
|
|
69
|
+
"Image of part of page"
|
|
70
|
+
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ExtractedTable(ExtractedImage, ExtractedImageMetadata):
|
|
75
|
+
formatted_str: str
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ExtractedPageImage(ExtractedImage):
|
|
79
|
+
"Image of all page"
|
|
80
|
+
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class DocExtractionOutput(BaseModel):
|
|
85
|
+
spec: DocExtractionSpec
|
|
86
|
+
text: str = ""
|
|
87
|
+
tables: list[ExtractedTable] = []
|
|
88
|
+
pictures: list[ExtractedPicture] = []
|
|
89
|
+
page_images: list[ExtractedPageImage] = []
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class DocumentExtractorAPI:
|
|
93
|
+
def extract(self, *, resource_id: ResourceId, spec: DocExtractionSpec) -> ResourceId | None:
|
|
94
|
+
"""returns file with DocExtractionOutput"""
|
|
95
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from mmar_mapi import ChatMessage
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EntrypointInfo(BaseModel):
|
|
9
|
+
entrypoint_key: str
|
|
10
|
+
caption: str
|
|
11
|
+
|
|
12
|
+
class EntrypointsConfig(BaseModel):
|
|
13
|
+
entrypoints: list[EntrypointInfo]
|
|
14
|
+
default_entrypoint_key: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LLMCallProps(BaseModel, frozen=True):
|
|
18
|
+
entrypoint_key: str = ""
|
|
19
|
+
attempts: int = 1
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
LCP = LLMCallProps()
|
|
23
|
+
ResourceId = str
|
|
24
|
+
FileId = str
|
|
25
|
+
Attachments = list[list[ResourceId]]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Message(BaseModel, frozen=True):
|
|
29
|
+
role: Literal["system", "assistant", "user"]
|
|
30
|
+
content: str
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def create(message: ChatMessage) -> "Message":
|
|
34
|
+
return _create_message(message=message)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _create_message(message: ChatMessage) -> Message | None:
|
|
38
|
+
role = "assistant" if message.is_ai else "user" if message.is_human else None
|
|
39
|
+
return Message(role=role, content=message.text) if role else None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Messages(BaseModel, frozen=True):
|
|
43
|
+
messages: list[Message]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Payload(Messages, frozen=True):
|
|
47
|
+
attachments: Attachments | None = None
|
|
48
|
+
|
|
49
|
+
def with_attachments(self, attachments: Attachments) -> "Payload":
|
|
50
|
+
return self.model_copy(update=dict(attachments=attachments))
|
|
51
|
+
|
|
52
|
+
def __repr__(self):
|
|
53
|
+
parts = [f"messages: {len(self.messages)}", self.attachments and "has attachments"]
|
|
54
|
+
payload_pretty = ", ".join(filter(None, parts))
|
|
55
|
+
return f"Payload({payload_pretty})"
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def create(user_text: str, resource_id: ResourceId = "") -> "Payload":
|
|
59
|
+
return _create_payload(user_text=user_text, resource_id=resource_id)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _create_payload(user_text: str, resource_id: ResourceId = ""):
|
|
63
|
+
payload = Payload(messages=[Message(role="user", content=user_text)])
|
|
64
|
+
if not resource_id:
|
|
65
|
+
return payload
|
|
66
|
+
else:
|
|
67
|
+
return payload.with_attachments(attachments=[[resource_id]])
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ResponseExt(BaseModel):
|
|
71
|
+
text: str
|
|
72
|
+
resource_id: ResourceId | None = None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
RESPONSE_EMPTY = ResponseExt(text="")
|
|
76
|
+
Request = str | Messages | Payload
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class LLMAccessorAPI:
|
|
80
|
+
def get_entrypoints_config(self) -> EntrypointsConfig:
|
|
81
|
+
raise NotImplementedError
|
|
82
|
+
|
|
83
|
+
def get_entrypoint_keys(self) -> list[str]:
|
|
84
|
+
raise NotImplementedError
|
|
85
|
+
|
|
86
|
+
def get_response(self, *, request: Request, props: LLMCallProps = LCP) -> str:
|
|
87
|
+
raise NotImplementedError
|
|
88
|
+
|
|
89
|
+
def get_response_ext(self, *, request: Request, props: LLMCallProps = LCP) -> ResponseExt:
|
|
90
|
+
raise NotImplementedError
|
|
91
|
+
|
|
92
|
+
def get_embedding(self, *, prompt: str, props: LLMCallProps = LCP) -> list[float] | None:
|
|
93
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from functools import lru_cache
|
|
3
|
+
|
|
4
|
+
from loguru import logger
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def maybe_lru_cache(maxsize: int, func: Callable) -> tuple[str, Callable]:
|
|
8
|
+
if maxsize >= 0:
|
|
9
|
+
maxsize = maxsize or None
|
|
10
|
+
logger.info(f"Caching for {func.__name__}: enabled: maxsize={maxsize}")
|
|
11
|
+
func = lru_cache(maxsize=maxsize)(func)
|
|
12
|
+
else:
|
|
13
|
+
logger.info(f"Caching for {func.__name__}: disabled")
|
|
14
|
+
return func
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import string
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from hashlib import md5
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from zipfile import ZipFile, is_zipfile
|
|
7
|
+
|
|
8
|
+
ResourceId = str
|
|
9
|
+
ASCII_DIGITS_SPECIAL = set(string.ascii_lowercase + string.digits + "-")
|
|
10
|
+
SUFFIX_DIR = ".dir"
|
|
11
|
+
SUFFIX_METADATA = ".metadata"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _validate_exist(files_dir):
|
|
15
|
+
if not files_dir.exists():
|
|
16
|
+
err = f"Failed to access file-storage directory: {files_dir}"
|
|
17
|
+
raise OSError(err)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _validate_dtype(dtype: str):
|
|
21
|
+
if all(map(ASCII_DIGITS_SPECIAL.__contains__, dtype)):
|
|
22
|
+
return
|
|
23
|
+
raise ValueError(f"Bad dtype: {dtype}")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def generate_fname(content, dtype):
|
|
27
|
+
fname_hash = md5(content).hexdigest()
|
|
28
|
+
fname = f"{fname_hash}.{dtype}"
|
|
29
|
+
return fname
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class FileStorage:
|
|
33
|
+
def __init__(self, files_dir):
|
|
34
|
+
self.files_dir = Path(files_dir)
|
|
35
|
+
self.files_dir.mkdir(exist_ok=True, parents=True)
|
|
36
|
+
_validate_exist(self.files_dir)
|
|
37
|
+
|
|
38
|
+
def _generate_fname_path(self, content: bytes, dtype: str):
|
|
39
|
+
fpath = self.files_dir / generate_fname(content, dtype)
|
|
40
|
+
return fpath
|
|
41
|
+
|
|
42
|
+
def upload_maybe(self, content: bytes | str | None, fname: str) -> ResourceId | None:
|
|
43
|
+
if not content:
|
|
44
|
+
return None
|
|
45
|
+
resource_id = self.upload(content, fname)
|
|
46
|
+
return resource_id
|
|
47
|
+
|
|
48
|
+
def upload(self, content: bytes | str, fname: str, origin: str | None = None) -> ResourceId:
|
|
49
|
+
if isinstance(content, str):
|
|
50
|
+
content = content.encode()
|
|
51
|
+
|
|
52
|
+
dtype = fname.rsplit(".", 1)[-1]
|
|
53
|
+
_validate_dtype(dtype)
|
|
54
|
+
fpath = self._generate_fname_path(content, dtype)
|
|
55
|
+
fpath.write_bytes(content)
|
|
56
|
+
|
|
57
|
+
fpath_md = fpath.with_suffix(SUFFIX_METADATA)
|
|
58
|
+
update_date = f"{datetime.now():%Y-%m-%d--%H-%M-%S}"
|
|
59
|
+
metadata = {"fname": fname, "update_date": update_date, "size": len(content), "origin": origin}
|
|
60
|
+
fpath_md.write_text(json.dumps(metadata, ensure_ascii=False))
|
|
61
|
+
|
|
62
|
+
return str(fpath)
|
|
63
|
+
|
|
64
|
+
def get_metadata(self, resource_id: ResourceId) -> dict | None:
|
|
65
|
+
metadata_path = Path(resource_id).with_suffix(SUFFIX_METADATA)
|
|
66
|
+
if not metadata_path.exists():
|
|
67
|
+
return None
|
|
68
|
+
return json.loads(metadata_path.read_text())
|
|
69
|
+
|
|
70
|
+
def get_fname(self, resource_id: ResourceId) -> str | None:
|
|
71
|
+
metadata = self.get_metadata(resource_id)
|
|
72
|
+
if metadata is None:
|
|
73
|
+
return None
|
|
74
|
+
return metadata.get("fname")
|
|
75
|
+
|
|
76
|
+
async def upload_async(self, content: bytes | str, fname: str) -> ResourceId:
|
|
77
|
+
return self.upload(content, fname)
|
|
78
|
+
|
|
79
|
+
def upload_dir(self, resource_ids: list[ResourceId]) -> ResourceId:
|
|
80
|
+
content = "\n".join(resource_ids)
|
|
81
|
+
res = self.upload(content=content, fname=".dir")
|
|
82
|
+
return res
|
|
83
|
+
|
|
84
|
+
def download(self, resource_id: ResourceId) -> bytes:
|
|
85
|
+
return Path(resource_id).read_bytes()
|
|
86
|
+
|
|
87
|
+
async def download_async(self, resource_id: ResourceId) -> bytes:
|
|
88
|
+
return self.download(resource_id)
|
|
89
|
+
|
|
90
|
+
def download_text(self, resource_id: ResourceId) -> str:
|
|
91
|
+
return Path(resource_id).read_text(encoding="utf-8")
|
|
92
|
+
|
|
93
|
+
def read_dir_or_none(self, resource_id: ResourceId) -> list[ResourceId] | None:
|
|
94
|
+
if not self.is_dir(resource_id):
|
|
95
|
+
return None
|
|
96
|
+
res = self.download_text(resource_id).split("\n")
|
|
97
|
+
return res
|
|
98
|
+
|
|
99
|
+
def get_path(self, resource_id: ResourceId | None) -> Path | None:
|
|
100
|
+
return self._get_path(resource_id)
|
|
101
|
+
|
|
102
|
+
def _get_path(self, resource_id: ResourceId | None) -> Path | None:
|
|
103
|
+
if not resource_id:
|
|
104
|
+
return None
|
|
105
|
+
path = Path(resource_id)
|
|
106
|
+
return path if (path.exists() and path.is_file()) else None
|
|
107
|
+
|
|
108
|
+
def is_valid(self, resource_id: ResourceId | None) -> bool:
|
|
109
|
+
path = self._get_path(resource_id)
|
|
110
|
+
return path is not None
|
|
111
|
+
|
|
112
|
+
def is_file(self, resource_id: ResourceId | None) -> bool:
|
|
113
|
+
path = self._get_path(resource_id)
|
|
114
|
+
return bool(path and path.suffix != SUFFIX_DIR)
|
|
115
|
+
|
|
116
|
+
def is_dir(self, resource_id: ResourceId | None) -> bool:
|
|
117
|
+
path = self._get_path(resource_id)
|
|
118
|
+
return bool(path and path.suffix == SUFFIX_DIR)
|
|
119
|
+
|
|
120
|
+
def get_dtype(self, resource_id: ResourceId | None) -> str | None:
|
|
121
|
+
return resource_id and resource_id.rsplit(".")[-1].lower()
|
|
122
|
+
|
|
123
|
+
def unzip_file(self, resource_id: str) -> ResourceId:
|
|
124
|
+
"""takes resource_id which refer to zip-archive, unpacks it and returns directory ResourceId with content of zip-archive"""
|
|
125
|
+
path = self._get_path(resource_id)
|
|
126
|
+
if not path:
|
|
127
|
+
raise ValueError(f"Not found path: {resource_id}")
|
|
128
|
+
if not is_zipfile(resource_id):
|
|
129
|
+
raise ValueError(f"Expected zip archive but found: {resource_id}")
|
|
130
|
+
|
|
131
|
+
resource_ids = []
|
|
132
|
+
|
|
133
|
+
with ZipFile(path, mode="r") as zip_file:
|
|
134
|
+
for file_info in zip_file.filelist:
|
|
135
|
+
file_dtype = file_info.filename.rsplit(".")[-1]
|
|
136
|
+
file_bytes = zip_file.read(file_info)
|
|
137
|
+
rid = self.upload(file_bytes, file_dtype)
|
|
138
|
+
resource_ids.append(rid)
|
|
139
|
+
|
|
140
|
+
res = self.upload_dir(resource_ids)
|
|
141
|
+
return res
|
|
File without changes
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from pydantic import ConfigDict, BaseModel, model_validator
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Base(BaseModel):
|
|
8
|
+
model_config = ConfigDict(populate_by_name=True, str_strip_whitespace=True)
|
|
9
|
+
|
|
10
|
+
@model_validator(mode="before")
|
|
11
|
+
@classmethod
|
|
12
|
+
def validate_to_json(cls, value: str | Any) -> Any:
|
|
13
|
+
if isinstance(value, str):
|
|
14
|
+
return cls(**json.loads(value))
|
|
15
|
+
return value
|