admin-api-lib 3.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- admin_api_lib-3.2.0/PKG-INFO +24 -0
- admin_api_lib-3.2.0/pyproject.toml +119 -0
- admin_api_lib-3.2.0/src/admin_api_lib/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/document_deleter.py +24 -0
- admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/document_reference_retriever.py +25 -0
- admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/documents_status_retriever.py +20 -0
- admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/file_uploader.py +31 -0
- admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/source_uploader.py +40 -0
- admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/uploader_base.py +30 -0
- admin_api_lib-3.2.0/src/admin_api_lib/apis/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/apis/admin_api.py +197 -0
- admin_api_lib-3.2.0/src/admin_api_lib/apis/admin_api_base.py +120 -0
- admin_api_lib-3.2.0/src/admin_api_lib/chunker/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/chunker/chunker.py +25 -0
- admin_api_lib-3.2.0/src/admin_api_lib/dependency_container.py +236 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/__init__.py +38 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/api/__init__.py +4 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/api/extractor_api.py +516 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/api_client.py +695 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/api_response.py +20 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/configuration.py +460 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/exceptions.py +197 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/__init__.py +21 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/content_type.py +34 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/extraction_parameters.py +103 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/extraction_request.py +82 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/information_piece.py +104 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/key_value_pair.py +92 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/rest.py +209 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_content_type.py +35 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_parameters.py +59 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_request.py +56 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_extractor_api.py +39 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_information_piece.py +62 -0
- admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_key_value_pair.py +54 -0
- admin_api_lib-3.2.0/src/admin_api_lib/file_services/file_service.py +77 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/admin_api.py +167 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_document_deleter.py +84 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py +72 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_documents_status_retriever.py +41 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_file_uploader.py +234 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py +202 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/chunker/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/chunker/chunker_type.py +11 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/chunker/semantic_text_chunker.py +252 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/chunker/text_chunker.py +33 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/file_services/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/file_services/s3_service.py +130 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/information_enhancer/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/information_enhancer/general_enhancer.py +52 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/information_enhancer/page_summary_enhancer.py +62 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/information_enhancer/summary_enhancer.py +74 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/key_db/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/key_db/file_status_key_value_store.py +111 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/mapper/informationpiece2document.py +108 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/chunker_class_type_settings.py +18 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/chunker_settings.py +29 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/document_extractor_settings.py +21 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/key_value_settings.py +26 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/rag_api_settings.py +21 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/s3_settings.py +31 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/source_uploader_settings.py +23 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/summarizer_settings.py +86 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/summarizer/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/impl/summarizer/langchain_summarizer.py +117 -0
- admin_api_lib-3.2.0/src/admin_api_lib/information_enhancer/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/information_enhancer/information_enhancer.py +34 -0
- admin_api_lib-3.2.0/src/admin_api_lib/main.py +54 -0
- admin_api_lib-3.2.0/src/admin_api_lib/models/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/models/document_status.py +86 -0
- admin_api_lib-3.2.0/src/admin_api_lib/models/extra_models.py +9 -0
- admin_api_lib-3.2.0/src/admin_api_lib/models/http_validation_error.py +105 -0
- admin_api_lib-3.2.0/src/admin_api_lib/models/key_value_pair.py +85 -0
- admin_api_lib-3.2.0/src/admin_api_lib/models/status.py +44 -0
- admin_api_lib-3.2.0/src/admin_api_lib/models/validation_error.py +104 -0
- admin_api_lib-3.2.0/src/admin_api_lib/models/validation_error_loc_inner.py +114 -0
- admin_api_lib-3.2.0/src/admin_api_lib/prompt_templates/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/prompt_templates/summarize_prompt.py +14 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/__init__.py +60 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/api/__init__.py +4 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/api/rag_api.py +968 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/api_client.py +698 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/api_response.py +22 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/configuration.py +460 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/exceptions.py +197 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/__init__.py +41 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_history.py +99 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_history_message.py +83 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_request.py +93 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_response.py +103 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_role.py +35 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/content_type.py +37 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/delete_request.py +99 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/information_piece.py +110 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/key_value_pair.py +83 -0
- admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/rest.py +209 -0
- admin_api_lib-3.2.0/src/admin_api_lib/summarizer/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/summarizer/summarizer.py +33 -0
- admin_api_lib-3.2.0/src/admin_api_lib/utils/__init__.py +0 -0
- admin_api_lib-3.2.0/src/admin_api_lib/utils/utils.py +32 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: admin-api-lib
|
|
3
|
+
Version: 3.2.0
|
|
4
|
+
Summary: The admin backend is responsible for the document management. This includes deletion, upload and returning the source document.
|
|
5
|
+
Author: STACKIT Data and AI Consulting
|
|
6
|
+
Author-email: data-ai-consulting@stackit.cloud
|
|
7
|
+
Requires-Python: >=3.13,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
10
|
+
Requires-Dist: boto3 (>=1.38.10,<2.0.0)
|
|
11
|
+
Requires-Dist: dependency-injector (>=4.46.0,<5.0.0)
|
|
12
|
+
Requires-Dist: fastapi (>=0.118.0,<0.119.0)
|
|
13
|
+
Requires-Dist: langchain-experimental (>=0.3.4,<0.4.0)
|
|
14
|
+
Requires-Dist: langfuse (==3.6.1)
|
|
15
|
+
Requires-Dist: nltk (>=3.9.2,<4.0.0)
|
|
16
|
+
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
17
|
+
Requires-Dist: python-multipart (>=0.0.20,<0.0.21)
|
|
18
|
+
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
|
19
|
+
Requires-Dist: rag-core-lib (==3.2.0)
|
|
20
|
+
Requires-Dist: redis (>=6.0.0,<7.0.0)
|
|
21
|
+
Requires-Dist: starlette (>=0.47.2,<0.49.0)
|
|
22
|
+
Requires-Dist: tenacity (==9.1.2)
|
|
23
|
+
Requires-Dist: tqdm (>=4.67.1,<5.0.0)
|
|
24
|
+
Requires-Dist: uvicorn (>=0.37.0,<0.38.0)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["poetry-core"]
|
|
3
|
+
build-backend = "poetry.core.masonry.api"
|
|
4
|
+
|
|
5
|
+
[tool.poetry]
|
|
6
|
+
name = "admin-api-lib"
|
|
7
|
+
version = "v3.2.0"
|
|
8
|
+
description = "The admin backend is responsible for the document management. This includes deletion, upload and returning the source document."
|
|
9
|
+
authors = ["STACKIT Data and AI Consulting <data-ai-consulting@stackit.cloud>"]
|
|
10
|
+
packages = [{ include = "admin_api_lib", from = "src" }]
|
|
11
|
+
|
|
12
|
+
[tool.flake8]
|
|
13
|
+
exclude= [".eggs", "./libs/*", "./src/admin_api_lib/models/*", "./src/admin_api_lib/rag_backend_client/*", "./src/admin_api_lib/extractor_api_client/*", ".git", ".hg", ".mypy_cache", ".tox", ".venv", ".devcontainer", "venv", "_build", "buck-out", "build", "dist", "**/__init__.py"]
|
|
14
|
+
statistics = true
|
|
15
|
+
show-source = false
|
|
16
|
+
max-complexity = 8
|
|
17
|
+
max-annotations-complexity = 3
|
|
18
|
+
docstring-convention = 'numpy'
|
|
19
|
+
max-line-length = 120
|
|
20
|
+
ignore = ["E203", "W503", "E704"]
|
|
21
|
+
inline-quotes = '"'
|
|
22
|
+
docstring-quotes = '"""'
|
|
23
|
+
multiline-quotes = '"""'
|
|
24
|
+
dictionaries = ["en_US", "python", "technical", "pandas"]
|
|
25
|
+
ban-relative-imports = true
|
|
26
|
+
per-file-ignores = """
|
|
27
|
+
./src/admin_api_lib/prompt_templates/summarize_prompt.py: E501,
|
|
28
|
+
./src/admin_api_lib/apis/admin_api.py: B008,WOT001,
|
|
29
|
+
./src/admin_api_lib/impl/admin_api.py: B008,
|
|
30
|
+
./src/admin_api_lib/dependency_container.py: CCE002,CCE001,WOT001,
|
|
31
|
+
./src/admin_api_lib/apis/admin_api_base.py: WOT001,
|
|
32
|
+
./tests/*: S101,S106,D100,D103,PT011,N802,E501,
|
|
33
|
+
./src/admin_api_lib/impl/settings/confluence_settings.py: C901,N805,
|
|
34
|
+
./src/admin_api_lib/impl/utils/comma_separated_bool_list.py: R505,
|
|
35
|
+
./src/admin_api_lib/impl/utils/comma_separated_str_list.py: R505,
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
[tool.black]
|
|
39
|
+
line-length = 120
|
|
40
|
+
exclude = """
|
|
41
|
+
/(
|
|
42
|
+
.eggs
|
|
43
|
+
| .git
|
|
44
|
+
| .hg
|
|
45
|
+
| .mypy_cache
|
|
46
|
+
| .nox
|
|
47
|
+
| .pants.d
|
|
48
|
+
| .tox
|
|
49
|
+
| .venv
|
|
50
|
+
| _build
|
|
51
|
+
| buck-out
|
|
52
|
+
| build
|
|
53
|
+
| dist
|
|
54
|
+
| node_modules
|
|
55
|
+
| venv
|
|
56
|
+
)/
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
[tool.isort]
|
|
60
|
+
profile = "black"
|
|
61
|
+
skip = ['.eggs', '.git', '.hg', '.mypy_cache', '.nox', '.pants.d', '.tox', '.venv', '_build', 'buck-out', 'build', 'dist', 'node_modules', 'venv']
|
|
62
|
+
skip_gitignore = true
|
|
63
|
+
known_local_folder = ["admin_api_lib", "rag_core_lib"]
|
|
64
|
+
|
|
65
|
+
[tool.pylint]
|
|
66
|
+
max-line-length = 120
|
|
67
|
+
|
|
68
|
+
[tool.poetry.group.dev.dependencies]
|
|
69
|
+
debugpy = "^1.8.14"
|
|
70
|
+
pytest = "^8.3.5"
|
|
71
|
+
coverage = "^7.8.0"
|
|
72
|
+
flake8 = "^7.2.0"
|
|
73
|
+
flake8-black = "^0.4.0"
|
|
74
|
+
flake8-pyproject = "^1.2.3"
|
|
75
|
+
flake8-quotes = "^3.4.0"
|
|
76
|
+
flake8-return = "^1.2.0"
|
|
77
|
+
flake8-annotations-complexity = "^0.1.0"
|
|
78
|
+
flake8-bandit = "^4.1.1"
|
|
79
|
+
flake8-bugbear = "^24.12.12"
|
|
80
|
+
flake8-builtins = "^2.5.0"
|
|
81
|
+
flake8-comprehensions = "^3.15.0"
|
|
82
|
+
flake8-eradicate = "^1.5.0"
|
|
83
|
+
flake8-expression-complexity = "^0.0.11"
|
|
84
|
+
flake8-pytest-style = "^2.1.0"
|
|
85
|
+
pep8-naming = "^0.15.1"
|
|
86
|
+
flake8-eol = "^0.0.8"
|
|
87
|
+
flake8-exceptions = "^0.0.1a0"
|
|
88
|
+
flake8-simplify = "^0.22.0"
|
|
89
|
+
flake8-wot = "^0.2.0"
|
|
90
|
+
flake8-function-order = "^0.0.5"
|
|
91
|
+
flake8-tidy-imports = "^4.10.0"
|
|
92
|
+
black = "^25.1.0"
|
|
93
|
+
# flake8-logging-format = "^2024.24.12"
|
|
94
|
+
# flake8-docstrings = "^1.7.0"
|
|
95
|
+
pytest-asyncio = "^1.0.0"
|
|
96
|
+
|
|
97
|
+
[tool.poetry.dependencies]
|
|
98
|
+
rag-core-lib = "3.2.0"
|
|
99
|
+
python = "^3.13"
|
|
100
|
+
uvicorn = "^0.37.0"
|
|
101
|
+
fastapi = "^0.118.0"
|
|
102
|
+
dependency-injector = "^4.46.0"
|
|
103
|
+
python-dateutil = "^2.9.0.post0"
|
|
104
|
+
tenacity = "9.1.2"
|
|
105
|
+
boto3 = "^1.38.10"
|
|
106
|
+
tqdm = "^4.67.1"
|
|
107
|
+
langfuse = "3.6.1"
|
|
108
|
+
redis = "^6.0.0"
|
|
109
|
+
pyyaml = "^6.0.2"
|
|
110
|
+
python-multipart = "^0.0.20"
|
|
111
|
+
starlette = ">=0.47.2,<0.49.0"
|
|
112
|
+
langchain-experimental = "^0.3.4"
|
|
113
|
+
nltk = "^3.9.2"
|
|
114
|
+
|
|
115
|
+
[tool.pytest.ini_options]
|
|
116
|
+
log_cli = true
|
|
117
|
+
log_cli_level = "DEBUG"
|
|
118
|
+
pythonpath = "src"
|
|
119
|
+
testpaths = "src/tests"
|
|
File without changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Module for the document deletion endpoint."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DocumentDeleter(ABC):
|
|
7
|
+
"""Abstract base class for document deletion endpoint."""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
async def adelete_document(self, identification: str, remove_from_key_value_store: bool = True) -> None:
|
|
11
|
+
"""
|
|
12
|
+
Delete a document by its identification asynchronously.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
identification : str
|
|
17
|
+
The unique identifier of the document to be deleted.
|
|
18
|
+
remove_from_key_value_store : bool, optional
|
|
19
|
+
If True, the document will also be removed from the key-value store (default is True).
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
None
|
|
24
|
+
"""
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Module for the DocumentReferenceRetriever abstract base class."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from fastapi import Response
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DocumentReferenceRetriever(ABC):
|
|
9
|
+
"""Abstract base class for retrieving document references."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
async def adocument_reference_id_get(self, identification: str) -> Response:
|
|
13
|
+
"""
|
|
14
|
+
Get document reference by ID asynchronously.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
identification : str
|
|
19
|
+
The ID of the document reference to retrieve.
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
Response
|
|
24
|
+
The response containing the document reference.
|
|
25
|
+
"""
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Module abstract base class for the DocumentsStatusRetriever."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from admin_api_lib.models.document_status import DocumentStatus
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DocumentsStatusRetriever(ABC):
|
|
9
|
+
"""Abstract base class for retrieving all documents."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
async def aget_all_documents_status(self) -> list[DocumentStatus]:
|
|
13
|
+
"""
|
|
14
|
+
Get all documents and their statuses asynchronously.
|
|
15
|
+
|
|
16
|
+
Returns
|
|
17
|
+
-------
|
|
18
|
+
list[DocumentStatus]
|
|
19
|
+
A list containing document names and their statuses.
|
|
20
|
+
"""
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Module for the upload file endpoint."""
|
|
2
|
+
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
|
|
5
|
+
from fastapi import UploadFile
|
|
6
|
+
|
|
7
|
+
from admin_api_lib.api_endpoints.uploader_base import UploaderBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileUploader(UploaderBase):
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
async def upload_file(
|
|
14
|
+
self,
|
|
15
|
+
base_url: str,
|
|
16
|
+
file: UploadFile,
|
|
17
|
+
) -> None:
|
|
18
|
+
"""
|
|
19
|
+
Uploads a source file for content extraction.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
base_url : str
|
|
24
|
+
The base url of the service. Is used to determine the download link of the file.
|
|
25
|
+
file : UploadFile
|
|
26
|
+
The file to process.
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
None
|
|
31
|
+
"""
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Module for the upload source endpoint."""
|
|
2
|
+
|
|
3
|
+
from abc import abstractmethod
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import StrictStr
|
|
7
|
+
|
|
8
|
+
from admin_api_lib.api_endpoints.uploader_base import UploaderBase
|
|
9
|
+
from admin_api_lib.models.key_value_pair import KeyValuePair
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SourceUploader(UploaderBase):
|
|
13
|
+
"""Abstract base class for source uploader API endpoints."""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
async def upload_source(
|
|
17
|
+
self,
|
|
18
|
+
source_type: StrictStr,
|
|
19
|
+
name: StrictStr,
|
|
20
|
+
kwargs: list[KeyValuePair],
|
|
21
|
+
timeout: Optional[float],
|
|
22
|
+
) -> None:
|
|
23
|
+
"""
|
|
24
|
+
Uploads the parameters for source content extraction.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
source_type : str
|
|
29
|
+
The type of the source. Is used by the extractor service to determine the correct extraction method.
|
|
30
|
+
name : str
|
|
31
|
+
Display name of the source.
|
|
32
|
+
kwargs : list[KeyValuePair]
|
|
33
|
+
List of KeyValuePair with parameters used for the extraction.
|
|
34
|
+
timeout : float, optional
|
|
35
|
+
Timeout for the operation.
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
None
|
|
40
|
+
"""
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Module for the base class of uploader API endpoints."""
|
|
2
|
+
|
|
3
|
+
from threading import Thread
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class UploaderBase:
|
|
7
|
+
"""Base class for uploader API endpoints."""
|
|
8
|
+
|
|
9
|
+
def __init__(self):
|
|
10
|
+
"""
|
|
11
|
+
Initialize the UploaderBase.
|
|
12
|
+
"""
|
|
13
|
+
self._background_threads = []
|
|
14
|
+
|
|
15
|
+
def _prune_background_threads(self) -> list[Thread]:
|
|
16
|
+
"""
|
|
17
|
+
Prune background threads that are no longer running.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
list[Thread]
|
|
22
|
+
A list of background threads that are still alive.
|
|
23
|
+
"""
|
|
24
|
+
tmp_background_threads = []
|
|
25
|
+
for thread in self._background_threads:
|
|
26
|
+
if not thread.is_alive():
|
|
27
|
+
thread.join()
|
|
28
|
+
else:
|
|
29
|
+
tmp_background_threads.append(thread)
|
|
30
|
+
self._background_threads = tmp_background_threads
|
|
File without changes
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Module for the Admin API."""
|
|
2
|
+
|
|
3
|
+
# coding: utf-8
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List # noqa: F401
|
|
6
|
+
import importlib
|
|
7
|
+
import pkgutil
|
|
8
|
+
from typing_extensions import Annotated
|
|
9
|
+
|
|
10
|
+
import admin_api_lib.impl
|
|
11
|
+
|
|
12
|
+
from fastapi import ( # noqa: F401
|
|
13
|
+
APIRouter,
|
|
14
|
+
Body,
|
|
15
|
+
Cookie,
|
|
16
|
+
Depends,
|
|
17
|
+
Form,
|
|
18
|
+
UploadFile,
|
|
19
|
+
Request,
|
|
20
|
+
Header,
|
|
21
|
+
HTTPException,
|
|
22
|
+
Path,
|
|
23
|
+
Query,
|
|
24
|
+
Response,
|
|
25
|
+
Security,
|
|
26
|
+
status,
|
|
27
|
+
)
|
|
28
|
+
from pydantic import Field, StrictStr
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
from admin_api_lib.apis.admin_api_base import BaseAdminApi
|
|
32
|
+
from admin_api_lib.models.document_status import DocumentStatus
|
|
33
|
+
from admin_api_lib.models.http_validation_error import HTTPValidationError
|
|
34
|
+
from admin_api_lib.models.key_value_pair import KeyValuePair
|
|
35
|
+
from admin_api_lib.models.extra_models import TokenModel # noqa: F401
|
|
36
|
+
|
|
37
|
+
router = APIRouter()
|
|
38
|
+
|
|
39
|
+
ns_pkg = admin_api_lib.impl
|
|
40
|
+
for _, name, _ in pkgutil.iter_modules(ns_pkg.__path__, ns_pkg.__name__ + "."):
|
|
41
|
+
importlib.import_module(name)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@router.delete(
|
|
45
|
+
"/delete_document/{identification}",
|
|
46
|
+
responses={
|
|
47
|
+
200: {"description": "Deleted"},
|
|
48
|
+
500: {"description": "Internal server error"},
|
|
49
|
+
422: {"model": HTTPValidationError, "description": "Validation Error"},
|
|
50
|
+
},
|
|
51
|
+
tags=["admin"],
|
|
52
|
+
summary="Delete Document",
|
|
53
|
+
response_model_by_alias=True,
|
|
54
|
+
)
|
|
55
|
+
async def delete_document(
|
|
56
|
+
identification: StrictStr = Path(..., description=""),
|
|
57
|
+
) -> None:
|
|
58
|
+
"""
|
|
59
|
+
Asynchronously deletes a document based on the provided identification.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
identification : str
|
|
64
|
+
The unique identifier of the document to be deleted.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
None
|
|
69
|
+
"""
|
|
70
|
+
if not BaseAdminApi.subclasses:
|
|
71
|
+
raise HTTPException(status_code=500, detail="Not implemented")
|
|
72
|
+
return await BaseAdminApi.subclasses[0]().delete_document(identification)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@router.get(
|
|
76
|
+
"/document_reference/{identification}",
|
|
77
|
+
responses={
|
|
78
|
+
200: {"model": UploadFile, "description": "Returns the pdf in binary form."},
|
|
79
|
+
400: {"model": str, "description": "Bad request"},
|
|
80
|
+
404: {"model": str, "description": "Document not found."},
|
|
81
|
+
500: {"model": str, "description": "Internal server error"},
|
|
82
|
+
422: {"model": HTTPValidationError, "description": "Validation Error"},
|
|
83
|
+
},
|
|
84
|
+
tags=["admin"],
|
|
85
|
+
summary="Document Reference Id Get",
|
|
86
|
+
response_model_by_alias=True,
|
|
87
|
+
)
|
|
88
|
+
async def document_reference(
|
|
89
|
+
identification: Annotated[StrictStr, Field(description="Identifier of the document.")] = Path(
|
|
90
|
+
..., description="Identifier of the document."
|
|
91
|
+
),
|
|
92
|
+
) -> Response:
|
|
93
|
+
"""
|
|
94
|
+
Asynchronously retrieve a document reference by its identification.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
identification : str
|
|
99
|
+
The unique identifier for the document reference.
|
|
100
|
+
|
|
101
|
+
Returns
|
|
102
|
+
-------
|
|
103
|
+
Response
|
|
104
|
+
The response object containing the document reference details.
|
|
105
|
+
"""
|
|
106
|
+
if not BaseAdminApi.subclasses:
|
|
107
|
+
raise HTTPException(status_code=500, detail="Not implemented")
|
|
108
|
+
return await BaseAdminApi.subclasses[0]().document_reference(identification)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@router.get(
|
|
112
|
+
"/all_documents_status",
|
|
113
|
+
responses={
|
|
114
|
+
200: {"model": List[DocumentStatus], "description": "List of document links"},
|
|
115
|
+
500: {"description": "Internal server error"},
|
|
116
|
+
},
|
|
117
|
+
tags=["admin"],
|
|
118
|
+
summary="Get All Documents Status",
|
|
119
|
+
response_model_by_alias=True,
|
|
120
|
+
)
|
|
121
|
+
async def get_all_documents_status() -> List[DocumentStatus]:
|
|
122
|
+
"""
|
|
123
|
+
Asynchronously retrieves the status of all documents.
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
list[DocumentStatus]
|
|
128
|
+
A list containing the status of all documents.
|
|
129
|
+
"""
|
|
130
|
+
if not BaseAdminApi.subclasses:
|
|
131
|
+
raise HTTPException(status_code=500, detail="Not implemented")
|
|
132
|
+
return await BaseAdminApi.subclasses[0]().get_all_documents_status()
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@router.post(
|
|
136
|
+
"/upload_file",
|
|
137
|
+
responses={
|
|
138
|
+
200: {"description": "ok"},
|
|
139
|
+
400: {"description": "Bad request"},
|
|
140
|
+
422: {"description": "Unprocessable Content"},
|
|
141
|
+
500: {"description": "Internal server error"},
|
|
142
|
+
},
|
|
143
|
+
tags=["admin"],
|
|
144
|
+
summary="Upload File",
|
|
145
|
+
response_model_by_alias=True,
|
|
146
|
+
)
|
|
147
|
+
async def upload_file(
|
|
148
|
+
file: UploadFile,
|
|
149
|
+
request: Request,
|
|
150
|
+
) -> None:
|
|
151
|
+
"""
|
|
152
|
+
Uploads user selected sources.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
file : UploadFile
|
|
157
|
+
The file to be uploaded.
|
|
158
|
+
request : Request
|
|
159
|
+
The HTTP request object containing metadata about the upload request.
|
|
160
|
+
"""
|
|
161
|
+
if not BaseAdminApi.subclasses:
|
|
162
|
+
raise HTTPException(status_code=500, detail="Not implemented")
|
|
163
|
+
return await BaseAdminApi.subclasses[0]().upload_file(file, request)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
@router.post(
|
|
167
|
+
"/upload_source",
|
|
168
|
+
responses={
|
|
169
|
+
200: {"description": "ok"},
|
|
170
|
+
400: {"description": "Bad request"},
|
|
171
|
+
422: {"description": "Unprocessable Content"},
|
|
172
|
+
500: {"description": "Internal server error"},
|
|
173
|
+
},
|
|
174
|
+
tags=["admin"],
|
|
175
|
+
summary="Upload Source",
|
|
176
|
+
response_model_by_alias=True,
|
|
177
|
+
)
|
|
178
|
+
async def upload_source(
|
|
179
|
+
source_type: StrictStr = Query(None, description="The type of the source"),
|
|
180
|
+
name: StrictStr = Query(None, description="The name of the source", alias="name"),
|
|
181
|
+
key_value_pair: List[KeyValuePair] = Body(None, description="The key-value pairs for the source"),
|
|
182
|
+
) -> None:
|
|
183
|
+
"""
|
|
184
|
+
Uploads user selected sources.
|
|
185
|
+
|
|
186
|
+
Parameters
|
|
187
|
+
----------
|
|
188
|
+
source_type : str
|
|
189
|
+
The type of the source. Is used by the extractor service to determine the correct extractor to use.
|
|
190
|
+
name : str
|
|
191
|
+
Display name of the source.
|
|
192
|
+
key_value_pair : List[KeyValuePair]
|
|
193
|
+
List of KeyValuePair with parameters used for the extraction.
|
|
194
|
+
"""
|
|
195
|
+
if not BaseAdminApi.subclasses:
|
|
196
|
+
raise HTTPException(status_code=500, detail="Not implemented")
|
|
197
|
+
return await BaseAdminApi.subclasses[0]().upload_source(source_type, name, key_value_pair)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Module for the base AdminApi interface."""
|
|
2
|
+
|
|
3
|
+
# coding: utf-8
|
|
4
|
+
# flake8: noqa: D105
|
|
5
|
+
|
|
6
|
+
from typing import ClassVar, Dict, List, Tuple # noqa: F401
|
|
7
|
+
from typing_extensions import Annotated
|
|
8
|
+
|
|
9
|
+
from pydantic import Field, StrictStr
|
|
10
|
+
from fastapi import Request, Response, UploadFile
|
|
11
|
+
|
|
12
|
+
from admin_api_lib.models.document_status import DocumentStatus
|
|
13
|
+
from admin_api_lib.models.key_value_pair import KeyValuePair
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BaseAdminApi:
|
|
17
|
+
"""
|
|
18
|
+
The base AdminApi interface.
|
|
19
|
+
|
|
20
|
+
Attributes
|
|
21
|
+
----------
|
|
22
|
+
subclasses : ClassVar[Tuple]
|
|
23
|
+
A tuple that holds all subclasses of BaseAdminApi.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
subclasses: ClassVar[Tuple] = ()
|
|
27
|
+
|
|
28
|
+
def __init_subclass__(cls, **kwargs):
|
|
29
|
+
super().__init_subclass__(**kwargs)
|
|
30
|
+
BaseAdminApi.subclasses = BaseAdminApi.subclasses + (cls,)
|
|
31
|
+
|
|
32
|
+
async def delete_document(
|
|
33
|
+
self,
|
|
34
|
+
identification: StrictStr,
|
|
35
|
+
) -> None:
|
|
36
|
+
"""
|
|
37
|
+
Asynchronously deletes a document based on the provided identification.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
identification : str
|
|
42
|
+
The unique identifier of the document to be deleted.
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
None
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
async def document_reference(
|
|
50
|
+
self,
|
|
51
|
+
identification: Annotated[StrictStr, Field(description="Identifier of the document.")],
|
|
52
|
+
) -> Response:
|
|
53
|
+
"""
|
|
54
|
+
Asynchronously retrieve a document reference by its identification.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
identification : str
|
|
59
|
+
The unique identifier for the document reference.
|
|
60
|
+
|
|
61
|
+
Returns
|
|
62
|
+
-------
|
|
63
|
+
Response
|
|
64
|
+
The response object containing the document reference details.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
async def get_all_documents_status(
|
|
68
|
+
self,
|
|
69
|
+
) -> list[DocumentStatus]:
|
|
70
|
+
"""
|
|
71
|
+
Asynchronously retrieves the status of all documents.
|
|
72
|
+
|
|
73
|
+
Returns
|
|
74
|
+
-------
|
|
75
|
+
list[DocumentStatus]
|
|
76
|
+
A list containing the status of all documents.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
async def upload_source(
|
|
80
|
+
self,
|
|
81
|
+
source_type: StrictStr,
|
|
82
|
+
name: StrictStr,
|
|
83
|
+
key_value_pair: List[KeyValuePair],
|
|
84
|
+
) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Asynchronously uploads user selected source.
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
source_type : str
|
|
91
|
+
The type of the source. Is used by the extractor service to determine the correct extractor to use.
|
|
92
|
+
name : str
|
|
93
|
+
Display name of the source.
|
|
94
|
+
key_value_pair : list[KeyValuePair]
|
|
95
|
+
List of KeyValuePair with parameters used for the extraction.
|
|
96
|
+
|
|
97
|
+
Returns
|
|
98
|
+
-------
|
|
99
|
+
None
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
async def upload_file(
|
|
103
|
+
self,
|
|
104
|
+
file: UploadFile,
|
|
105
|
+
request: Request,
|
|
106
|
+
) -> None:
|
|
107
|
+
"""
|
|
108
|
+
Asynchronously uploads user-selected documents.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
file : UploadFile
|
|
113
|
+
The file object containing the source documents to be uploaded.
|
|
114
|
+
request : Request
|
|
115
|
+
The request object containing metadata about the upload request.
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
None
|
|
120
|
+
"""
|
|
File without changes
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Module for the Chunker abstract base class."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from langchain_core.documents import Document
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Chunker(ABC):
|
|
9
|
+
"""Abstract base class for chunking documents into smaller parts."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def chunk(self, documents: Document) -> list[Document]:
|
|
13
|
+
"""
|
|
14
|
+
Chunk the given documents into smaller parts.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
documents : Document
|
|
19
|
+
The documents to be chunked.
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
list of Document
|
|
24
|
+
A list of smaller parts obtained by chunking the documents.
|
|
25
|
+
"""
|