admin-api-lib 3.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. admin_api_lib-3.2.0/PKG-INFO +24 -0
  2. admin_api_lib-3.2.0/pyproject.toml +119 -0
  3. admin_api_lib-3.2.0/src/admin_api_lib/__init__.py +0 -0
  4. admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/document_deleter.py +24 -0
  5. admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/document_reference_retriever.py +25 -0
  6. admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/documents_status_retriever.py +20 -0
  7. admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/file_uploader.py +31 -0
  8. admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/source_uploader.py +40 -0
  9. admin_api_lib-3.2.0/src/admin_api_lib/api_endpoints/uploader_base.py +30 -0
  10. admin_api_lib-3.2.0/src/admin_api_lib/apis/__init__.py +0 -0
  11. admin_api_lib-3.2.0/src/admin_api_lib/apis/admin_api.py +197 -0
  12. admin_api_lib-3.2.0/src/admin_api_lib/apis/admin_api_base.py +120 -0
  13. admin_api_lib-3.2.0/src/admin_api_lib/chunker/__init__.py +0 -0
  14. admin_api_lib-3.2.0/src/admin_api_lib/chunker/chunker.py +25 -0
  15. admin_api_lib-3.2.0/src/admin_api_lib/dependency_container.py +236 -0
  16. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/__init__.py +0 -0
  17. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/__init__.py +38 -0
  18. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/api/__init__.py +4 -0
  19. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/api/extractor_api.py +516 -0
  20. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/api_client.py +695 -0
  21. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/api_response.py +20 -0
  22. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/configuration.py +460 -0
  23. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/exceptions.py +197 -0
  24. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/__init__.py +21 -0
  25. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/content_type.py +34 -0
  26. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/extraction_parameters.py +103 -0
  27. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/extraction_request.py +82 -0
  28. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/information_piece.py +104 -0
  29. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/models/key_value_pair.py +92 -0
  30. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/rest.py +209 -0
  31. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/__init__.py +0 -0
  32. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_content_type.py +35 -0
  33. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_parameters.py +59 -0
  34. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_request.py +56 -0
  35. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_extractor_api.py +39 -0
  36. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_information_piece.py +62 -0
  37. admin_api_lib-3.2.0/src/admin_api_lib/extractor_api_client/openapi_client/test/test_key_value_pair.py +54 -0
  38. admin_api_lib-3.2.0/src/admin_api_lib/file_services/file_service.py +77 -0
  39. admin_api_lib-3.2.0/src/admin_api_lib/impl/__init__.py +0 -0
  40. admin_api_lib-3.2.0/src/admin_api_lib/impl/admin_api.py +167 -0
  41. admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_document_deleter.py +84 -0
  42. admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py +72 -0
  43. admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_documents_status_retriever.py +41 -0
  44. admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_file_uploader.py +234 -0
  45. admin_api_lib-3.2.0/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py +202 -0
  46. admin_api_lib-3.2.0/src/admin_api_lib/impl/chunker/__init__.py +0 -0
  47. admin_api_lib-3.2.0/src/admin_api_lib/impl/chunker/chunker_type.py +11 -0
  48. admin_api_lib-3.2.0/src/admin_api_lib/impl/chunker/semantic_text_chunker.py +252 -0
  49. admin_api_lib-3.2.0/src/admin_api_lib/impl/chunker/text_chunker.py +33 -0
  50. admin_api_lib-3.2.0/src/admin_api_lib/impl/file_services/__init__.py +0 -0
  51. admin_api_lib-3.2.0/src/admin_api_lib/impl/file_services/s3_service.py +130 -0
  52. admin_api_lib-3.2.0/src/admin_api_lib/impl/information_enhancer/__init__.py +0 -0
  53. admin_api_lib-3.2.0/src/admin_api_lib/impl/information_enhancer/general_enhancer.py +52 -0
  54. admin_api_lib-3.2.0/src/admin_api_lib/impl/information_enhancer/page_summary_enhancer.py +62 -0
  55. admin_api_lib-3.2.0/src/admin_api_lib/impl/information_enhancer/summary_enhancer.py +74 -0
  56. admin_api_lib-3.2.0/src/admin_api_lib/impl/key_db/__init__.py +0 -0
  57. admin_api_lib-3.2.0/src/admin_api_lib/impl/key_db/file_status_key_value_store.py +111 -0
  58. admin_api_lib-3.2.0/src/admin_api_lib/impl/mapper/informationpiece2document.py +108 -0
  59. admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/__init__.py +0 -0
  60. admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/chunker_class_type_settings.py +18 -0
  61. admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/chunker_settings.py +29 -0
  62. admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/document_extractor_settings.py +21 -0
  63. admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/key_value_settings.py +26 -0
  64. admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/rag_api_settings.py +21 -0
  65. admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/s3_settings.py +31 -0
  66. admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/source_uploader_settings.py +23 -0
  67. admin_api_lib-3.2.0/src/admin_api_lib/impl/settings/summarizer_settings.py +86 -0
  68. admin_api_lib-3.2.0/src/admin_api_lib/impl/summarizer/__init__.py +0 -0
  69. admin_api_lib-3.2.0/src/admin_api_lib/impl/summarizer/langchain_summarizer.py +117 -0
  70. admin_api_lib-3.2.0/src/admin_api_lib/information_enhancer/__init__.py +0 -0
  71. admin_api_lib-3.2.0/src/admin_api_lib/information_enhancer/information_enhancer.py +34 -0
  72. admin_api_lib-3.2.0/src/admin_api_lib/main.py +54 -0
  73. admin_api_lib-3.2.0/src/admin_api_lib/models/__init__.py +0 -0
  74. admin_api_lib-3.2.0/src/admin_api_lib/models/document_status.py +86 -0
  75. admin_api_lib-3.2.0/src/admin_api_lib/models/extra_models.py +9 -0
  76. admin_api_lib-3.2.0/src/admin_api_lib/models/http_validation_error.py +105 -0
  77. admin_api_lib-3.2.0/src/admin_api_lib/models/key_value_pair.py +85 -0
  78. admin_api_lib-3.2.0/src/admin_api_lib/models/status.py +44 -0
  79. admin_api_lib-3.2.0/src/admin_api_lib/models/validation_error.py +104 -0
  80. admin_api_lib-3.2.0/src/admin_api_lib/models/validation_error_loc_inner.py +114 -0
  81. admin_api_lib-3.2.0/src/admin_api_lib/prompt_templates/__init__.py +0 -0
  82. admin_api_lib-3.2.0/src/admin_api_lib/prompt_templates/summarize_prompt.py +14 -0
  83. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/__init__.py +0 -0
  84. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/__init__.py +60 -0
  85. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/api/__init__.py +4 -0
  86. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/api/rag_api.py +968 -0
  87. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/api_client.py +698 -0
  88. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/api_response.py +22 -0
  89. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/configuration.py +460 -0
  90. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/exceptions.py +197 -0
  91. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/__init__.py +41 -0
  92. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_history.py +99 -0
  93. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_history_message.py +83 -0
  94. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_request.py +93 -0
  95. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_response.py +103 -0
  96. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_role.py +35 -0
  97. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/content_type.py +37 -0
  98. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/delete_request.py +99 -0
  99. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/information_piece.py +110 -0
  100. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/models/key_value_pair.py +83 -0
  101. admin_api_lib-3.2.0/src/admin_api_lib/rag_backend_client/openapi_client/rest.py +209 -0
  102. admin_api_lib-3.2.0/src/admin_api_lib/summarizer/__init__.py +0 -0
  103. admin_api_lib-3.2.0/src/admin_api_lib/summarizer/summarizer.py +33 -0
  104. admin_api_lib-3.2.0/src/admin_api_lib/utils/__init__.py +0 -0
  105. admin_api_lib-3.2.0/src/admin_api_lib/utils/utils.py +32 -0
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.3
2
+ Name: admin-api-lib
3
+ Version: 3.2.0
4
+ Summary: The admin backend is responsible for the document management. This includes deletion, upload and returning the source document.
5
+ Author: STACKIT Data and AI Consulting
6
+ Author-email: data-ai-consulting@stackit.cloud
7
+ Requires-Python: >=3.13,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.13
10
+ Requires-Dist: boto3 (>=1.38.10,<2.0.0)
11
+ Requires-Dist: dependency-injector (>=4.46.0,<5.0.0)
12
+ Requires-Dist: fastapi (>=0.118.0,<0.119.0)
13
+ Requires-Dist: langchain-experimental (>=0.3.4,<0.4.0)
14
+ Requires-Dist: langfuse (==3.6.1)
15
+ Requires-Dist: nltk (>=3.9.2,<4.0.0)
16
+ Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
17
+ Requires-Dist: python-multipart (>=0.0.20,<0.0.21)
18
+ Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
19
+ Requires-Dist: rag-core-lib (==3.2.0)
20
+ Requires-Dist: redis (>=6.0.0,<7.0.0)
21
+ Requires-Dist: starlette (>=0.47.2,<0.49.0)
22
+ Requires-Dist: tenacity (==9.1.2)
23
+ Requires-Dist: tqdm (>=4.67.1,<5.0.0)
24
+ Requires-Dist: uvicorn (>=0.37.0,<0.38.0)
@@ -0,0 +1,119 @@
1
+ [build-system]
2
+ requires = ["poetry-core"]
3
+ build-backend = "poetry.core.masonry.api"
4
+
5
+ [tool.poetry]
6
+ name = "admin-api-lib"
7
+ version = "v3.2.0"
8
+ description = "The admin backend is responsible for the document management. This includes deletion, upload and returning the source document."
9
+ authors = ["STACKIT Data and AI Consulting <data-ai-consulting@stackit.cloud>"]
10
+ packages = [{ include = "admin_api_lib", from = "src" }]
11
+
12
+ [tool.flake8]
13
+ exclude= [".eggs", "./libs/*", "./src/admin_api_lib/models/*", "./src/admin_api_lib/rag_backend_client/*", "./src/admin_api_lib/extractor_api_client/*", ".git", ".hg", ".mypy_cache", ".tox", ".venv", ".devcontainer", "venv", "_build", "buck-out", "build", "dist", "**/__init__.py"]
14
+ statistics = true
15
+ show-source = false
16
+ max-complexity = 8
17
+ max-annotations-complexity = 3
18
+ docstring-convention = 'numpy'
19
+ max-line-length = 120
20
+ ignore = ["E203", "W503", "E704"]
21
+ inline-quotes = '"'
22
+ docstring-quotes = '"""'
23
+ multiline-quotes = '"""'
24
+ dictionaries = ["en_US", "python", "technical", "pandas"]
25
+ ban-relative-imports = true
26
+ per-file-ignores = """
27
+ ./src/admin_api_lib/prompt_templates/summarize_prompt.py: E501,
28
+ ./src/admin_api_lib/apis/admin_api.py: B008,WOT001,
29
+ ./src/admin_api_lib/impl/admin_api.py: B008,
30
+ ./src/admin_api_lib/dependency_container.py: CCE002,CCE001,WOT001,
31
+ ./src/admin_api_lib/apis/admin_api_base.py: WOT001,
32
+ ./tests/*: S101,S106,D100,D103,PT011,N802,E501,
33
+ ./src/admin_api_lib/impl/settings/confluence_settings.py: C901,N805,
34
+ ./src/admin_api_lib/impl/utils/comma_separated_bool_list.py: R505,
35
+ ./src/admin_api_lib/impl/utils/comma_separated_str_list.py: R505,
36
+ """
37
+
38
+ [tool.black]
39
+ line-length = 120
40
+ exclude = """
41
+ /(
42
+ .eggs
43
+ | .git
44
+ | .hg
45
+ | .mypy_cache
46
+ | .nox
47
+ | .pants.d
48
+ | .tox
49
+ | .venv
50
+ | _build
51
+ | buck-out
52
+ | build
53
+ | dist
54
+ | node_modules
55
+ | venv
56
+ )/
57
+ """
58
+
59
+ [tool.isort]
60
+ profile = "black"
61
+ skip = ['.eggs', '.git', '.hg', '.mypy_cache', '.nox', '.pants.d', '.tox', '.venv', '_build', 'buck-out', 'build', 'dist', 'node_modules', 'venv']
62
+ skip_gitignore = true
63
+ known_local_folder = ["admin_api_lib", "rag_core_lib"]
64
+
65
+ [tool.pylint]
66
+ max-line-length = 120
67
+
68
+ [tool.poetry.group.dev.dependencies]
69
+ debugpy = "^1.8.14"
70
+ pytest = "^8.3.5"
71
+ coverage = "^7.8.0"
72
+ flake8 = "^7.2.0"
73
+ flake8-black = "^0.4.0"
74
+ flake8-pyproject = "^1.2.3"
75
+ flake8-quotes = "^3.4.0"
76
+ flake8-return = "^1.2.0"
77
+ flake8-annotations-complexity = "^0.1.0"
78
+ flake8-bandit = "^4.1.1"
79
+ flake8-bugbear = "^24.12.12"
80
+ flake8-builtins = "^2.5.0"
81
+ flake8-comprehensions = "^3.15.0"
82
+ flake8-eradicate = "^1.5.0"
83
+ flake8-expression-complexity = "^0.0.11"
84
+ flake8-pytest-style = "^2.1.0"
85
+ pep8-naming = "^0.15.1"
86
+ flake8-eol = "^0.0.8"
87
+ flake8-exceptions = "^0.0.1a0"
88
+ flake8-simplify = "^0.22.0"
89
+ flake8-wot = "^0.2.0"
90
+ flake8-function-order = "^0.0.5"
91
+ flake8-tidy-imports = "^4.10.0"
92
+ black = "^25.1.0"
93
+ # flake8-logging-format = "^2024.24.12"
94
+ # flake8-docstrings = "^1.7.0"
95
+ pytest-asyncio = "^1.0.0"
96
+
97
+ [tool.poetry.dependencies]
98
+ rag-core-lib = "3.2.0"
99
+ python = "^3.13"
100
+ uvicorn = "^0.37.0"
101
+ fastapi = "^0.118.0"
102
+ dependency-injector = "^4.46.0"
103
+ python-dateutil = "^2.9.0.post0"
104
+ tenacity = "9.1.2"
105
+ boto3 = "^1.38.10"
106
+ tqdm = "^4.67.1"
107
+ langfuse = "3.6.1"
108
+ redis = "^6.0.0"
109
+ pyyaml = "^6.0.2"
110
+ python-multipart = "^0.0.20"
111
+ starlette = ">=0.47.2,<0.49.0"
112
+ langchain-experimental = "^0.3.4"
113
+ nltk = "^3.9.2"
114
+
115
+ [tool.pytest.ini_options]
116
+ log_cli = true
117
+ log_cli_level = "DEBUG"
118
+ pythonpath = "src"
119
+ testpaths = "src/tests"
File without changes
@@ -0,0 +1,24 @@
1
+ """Module for the document deletion endpoint."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+
6
+ class DocumentDeleter(ABC):
7
+ """Abstract base class for document deletion endpoint."""
8
+
9
+ @abstractmethod
10
+ async def adelete_document(self, identification: str, remove_from_key_value_store: bool = True) -> None:
11
+ """
12
+ Delete a document by its identification asynchronously.
13
+
14
+ Parameters
15
+ ----------
16
+ identification : str
17
+ The unique identifier of the document to be deleted.
18
+ remove_from_key_value_store : bool, optional
19
+ If True, the document will also be removed from the key-value store (default is True).
20
+
21
+ Returns
22
+ -------
23
+ None
24
+ """
@@ -0,0 +1,25 @@
1
+ """Module for the DocumentReferenceRetriever abstract base class."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ from fastapi import Response
6
+
7
+
8
+ class DocumentReferenceRetriever(ABC):
9
+ """Abstract base class for retrieving document references."""
10
+
11
+ @abstractmethod
12
+ async def adocument_reference_id_get(self, identification: str) -> Response:
13
+ """
14
+ Get document reference by ID asynchronously.
15
+
16
+ Parameters
17
+ ----------
18
+ identification : str
19
+ The ID of the document reference to retrieve.
20
+
21
+ Returns
22
+ -------
23
+ Response
24
+ The response containing the document reference.
25
+ """
@@ -0,0 +1,20 @@
1
+ """Module abstract base class for the DocumentsStatusRetriever."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ from admin_api_lib.models.document_status import DocumentStatus
6
+
7
+
8
+ class DocumentsStatusRetriever(ABC):
9
+ """Abstract base class for retrieving all documents."""
10
+
11
+ @abstractmethod
12
+ async def aget_all_documents_status(self) -> list[DocumentStatus]:
13
+ """
14
+ Get all documents and their statuses asynchronously.
15
+
16
+ Returns
17
+ -------
18
+ list[DocumentStatus]
19
+ A list containing document names and their statuses.
20
+ """
@@ -0,0 +1,31 @@
1
+ """Module for the upload file endpoint."""
2
+
3
+ from abc import abstractmethod
4
+
5
+ from fastapi import UploadFile
6
+
7
+ from admin_api_lib.api_endpoints.uploader_base import UploaderBase
8
+
9
+
10
+ class FileUploader(UploaderBase):
11
+
12
+ @abstractmethod
13
+ async def upload_file(
14
+ self,
15
+ base_url: str,
16
+ file: UploadFile,
17
+ ) -> None:
18
+ """
19
+ Uploads a source file for content extraction.
20
+
21
+ Parameters
22
+ ----------
23
+ base_url : str
24
+ The base url of the service. Is used to determine the download link of the file.
25
+ file : UploadFile
26
+ The file to process.
27
+
28
+ Returns
29
+ -------
30
+ None
31
+ """
@@ -0,0 +1,40 @@
1
+ """Module for the upload source endpoint."""
2
+
3
+ from abc import abstractmethod
4
+ from typing import Optional
5
+
6
+ from pydantic import StrictStr
7
+
8
+ from admin_api_lib.api_endpoints.uploader_base import UploaderBase
9
+ from admin_api_lib.models.key_value_pair import KeyValuePair
10
+
11
+
12
+ class SourceUploader(UploaderBase):
13
+ """Abstract base class for source uploader API endpoints."""
14
+
15
+ @abstractmethod
16
+ async def upload_source(
17
+ self,
18
+ source_type: StrictStr,
19
+ name: StrictStr,
20
+ kwargs: list[KeyValuePair],
21
+ timeout: Optional[float],
22
+ ) -> None:
23
+ """
24
+ Uploads the parameters for source content extraction.
25
+
26
+ Parameters
27
+ ----------
28
+ source_type : str
29
+ The type of the source. Is used by the extractor service to determine the correct extraction method.
30
+ name : str
31
+ Display name of the source.
32
+ kwargs : list[KeyValuePair]
33
+ List of KeyValuePair with parameters used for the extraction.
34
+ timeout : float, optional
35
+ Timeout for the operation.
36
+
37
+ Returns
38
+ -------
39
+ None
40
+ """
@@ -0,0 +1,30 @@
1
+ """Module for the base class of uploader API endpoints."""
2
+
3
+ from threading import Thread
4
+
5
+
6
+ class UploaderBase:
7
+ """Base class for uploader API endpoints."""
8
+
9
+ def __init__(self):
10
+ """
11
+ Initialize the UploaderBase.
12
+ """
13
+ self._background_threads = []
14
+
15
+ def _prune_background_threads(self) -> list[Thread]:
16
+ """
17
+ Prune background threads that are no longer running.
18
+
19
+ Returns
20
+ -------
21
+ list[Thread]
22
+ A list of background threads that are still alive.
23
+ """
24
+ tmp_background_threads = []
25
+ for thread in self._background_threads:
26
+ if not thread.is_alive():
27
+ thread.join()
28
+ else:
29
+ tmp_background_threads.append(thread)
30
+ self._background_threads = tmp_background_threads
File without changes
@@ -0,0 +1,197 @@
1
+ """Module for the Admin API."""
2
+
3
+ # coding: utf-8
4
+
5
+ from typing import Dict, List # noqa: F401
6
+ import importlib
7
+ import pkgutil
8
+ from typing_extensions import Annotated
9
+
10
+ import admin_api_lib.impl
11
+
12
+ from fastapi import ( # noqa: F401
13
+ APIRouter,
14
+ Body,
15
+ Cookie,
16
+ Depends,
17
+ Form,
18
+ UploadFile,
19
+ Request,
20
+ Header,
21
+ HTTPException,
22
+ Path,
23
+ Query,
24
+ Response,
25
+ Security,
26
+ status,
27
+ )
28
+ from pydantic import Field, StrictStr
29
+
30
+
31
+ from admin_api_lib.apis.admin_api_base import BaseAdminApi
32
+ from admin_api_lib.models.document_status import DocumentStatus
33
+ from admin_api_lib.models.http_validation_error import HTTPValidationError
34
+ from admin_api_lib.models.key_value_pair import KeyValuePair
35
+ from admin_api_lib.models.extra_models import TokenModel # noqa: F401
36
+
37
+ router = APIRouter()
38
+
39
+ ns_pkg = admin_api_lib.impl
40
+ for _, name, _ in pkgutil.iter_modules(ns_pkg.__path__, ns_pkg.__name__ + "."):
41
+ importlib.import_module(name)
42
+
43
+
44
+ @router.delete(
45
+ "/delete_document/{identification}",
46
+ responses={
47
+ 200: {"description": "Deleted"},
48
+ 500: {"description": "Internal server error"},
49
+ 422: {"model": HTTPValidationError, "description": "Validation Error"},
50
+ },
51
+ tags=["admin"],
52
+ summary="Delete Document",
53
+ response_model_by_alias=True,
54
+ )
55
+ async def delete_document(
56
+ identification: StrictStr = Path(..., description=""),
57
+ ) -> None:
58
+ """
59
+ Asynchronously deletes a document based on the provided identification.
60
+
61
+ Parameters
62
+ ----------
63
+ identification : str
64
+ The unique identifier of the document to be deleted.
65
+
66
+ Returns
67
+ -------
68
+ None
69
+ """
70
+ if not BaseAdminApi.subclasses:
71
+ raise HTTPException(status_code=500, detail="Not implemented")
72
+ return await BaseAdminApi.subclasses[0]().delete_document(identification)
73
+
74
+
75
+ @router.get(
76
+ "/document_reference/{identification}",
77
+ responses={
78
+ 200: {"model": UploadFile, "description": "Returns the pdf in binary form."},
79
+ 400: {"model": str, "description": "Bad request"},
80
+ 404: {"model": str, "description": "Document not found."},
81
+ 500: {"model": str, "description": "Internal server error"},
82
+ 422: {"model": HTTPValidationError, "description": "Validation Error"},
83
+ },
84
+ tags=["admin"],
85
+ summary="Document Reference Id Get",
86
+ response_model_by_alias=True,
87
+ )
88
+ async def document_reference(
89
+ identification: Annotated[StrictStr, Field(description="Identifier of the document.")] = Path(
90
+ ..., description="Identifier of the document."
91
+ ),
92
+ ) -> Response:
93
+ """
94
+ Asynchronously retrieve a document reference by its identification.
95
+
96
+ Parameters
97
+ ----------
98
+ identification : str
99
+ The unique identifier for the document reference.
100
+
101
+ Returns
102
+ -------
103
+ Response
104
+ The response object containing the document reference details.
105
+ """
106
+ if not BaseAdminApi.subclasses:
107
+ raise HTTPException(status_code=500, detail="Not implemented")
108
+ return await BaseAdminApi.subclasses[0]().document_reference(identification)
109
+
110
+
111
+ @router.get(
112
+ "/all_documents_status",
113
+ responses={
114
+ 200: {"model": List[DocumentStatus], "description": "List of document links"},
115
+ 500: {"description": "Internal server error"},
116
+ },
117
+ tags=["admin"],
118
+ summary="Get All Documents Status",
119
+ response_model_by_alias=True,
120
+ )
121
+ async def get_all_documents_status() -> List[DocumentStatus]:
122
+ """
123
+ Asynchronously retrieves the status of all documents.
124
+
125
+ Returns
126
+ -------
127
+ list[DocumentStatus]
128
+ A list containing the status of all documents.
129
+ """
130
+ if not BaseAdminApi.subclasses:
131
+ raise HTTPException(status_code=500, detail="Not implemented")
132
+ return await BaseAdminApi.subclasses[0]().get_all_documents_status()
133
+
134
+
135
+ @router.post(
136
+ "/upload_file",
137
+ responses={
138
+ 200: {"description": "ok"},
139
+ 400: {"description": "Bad request"},
140
+ 422: {"description": "Unprocessable Content"},
141
+ 500: {"description": "Internal server error"},
142
+ },
143
+ tags=["admin"],
144
+ summary="Upload File",
145
+ response_model_by_alias=True,
146
+ )
147
+ async def upload_file(
148
+ file: UploadFile,
149
+ request: Request,
150
+ ) -> None:
151
+ """
152
+ Uploads user selected sources.
153
+
154
+ Parameters
155
+ ----------
156
+ file : UploadFile
157
+ The file to be uploaded.
158
+ request : Request
159
+ The HTTP request object containing metadata about the upload request.
160
+ """
161
+ if not BaseAdminApi.subclasses:
162
+ raise HTTPException(status_code=500, detail="Not implemented")
163
+ return await BaseAdminApi.subclasses[0]().upload_file(file, request)
164
+
165
+
166
+ @router.post(
167
+ "/upload_source",
168
+ responses={
169
+ 200: {"description": "ok"},
170
+ 400: {"description": "Bad request"},
171
+ 422: {"description": "Unprocessable Content"},
172
+ 500: {"description": "Internal server error"},
173
+ },
174
+ tags=["admin"],
175
+ summary="Upload Source",
176
+ response_model_by_alias=True,
177
+ )
178
+ async def upload_source(
179
+ source_type: StrictStr = Query(None, description="The type of the source"),
180
+ name: StrictStr = Query(None, description="The name of the source", alias="name"),
181
+ key_value_pair: List[KeyValuePair] = Body(None, description="The key-value pairs for the source"),
182
+ ) -> None:
183
+ """
184
+ Uploads user selected sources.
185
+
186
+ Parameters
187
+ ----------
188
+ source_type : str
189
+ The type of the source. Is used by the extractor service to determine the correct extractor to use.
190
+ name : str
191
+ Display name of the source.
192
+ key_value_pair : List[KeyValuePair]
193
+ List of KeyValuePair with parameters used for the extraction.
194
+ """
195
+ if not BaseAdminApi.subclasses:
196
+ raise HTTPException(status_code=500, detail="Not implemented")
197
+ return await BaseAdminApi.subclasses[0]().upload_source(source_type, name, key_value_pair)
@@ -0,0 +1,120 @@
1
+ """Module for the base AdminApi interface."""
2
+
3
+ # coding: utf-8
4
+ # flake8: noqa: D105
5
+
6
+ from typing import ClassVar, Dict, List, Tuple # noqa: F401
7
+ from typing_extensions import Annotated
8
+
9
+ from pydantic import Field, StrictStr
10
+ from fastapi import Request, Response, UploadFile
11
+
12
+ from admin_api_lib.models.document_status import DocumentStatus
13
+ from admin_api_lib.models.key_value_pair import KeyValuePair
14
+
15
+
16
+ class BaseAdminApi:
17
+ """
18
+ The base AdminApi interface.
19
+
20
+ Attributes
21
+ ----------
22
+ subclasses : ClassVar[Tuple]
23
+ A tuple that holds all subclasses of BaseAdminApi.
24
+ """
25
+
26
+ subclasses: ClassVar[Tuple] = ()
27
+
28
+ def __init_subclass__(cls, **kwargs):
29
+ super().__init_subclass__(**kwargs)
30
+ BaseAdminApi.subclasses = BaseAdminApi.subclasses + (cls,)
31
+
32
+ async def delete_document(
33
+ self,
34
+ identification: StrictStr,
35
+ ) -> None:
36
+ """
37
+ Asynchronously deletes a document based on the provided identification.
38
+
39
+ Parameters
40
+ ----------
41
+ identification : str
42
+ The unique identifier of the document to be deleted.
43
+
44
+ Returns
45
+ -------
46
+ None
47
+ """
48
+
49
+ async def document_reference(
50
+ self,
51
+ identification: Annotated[StrictStr, Field(description="Identifier of the document.")],
52
+ ) -> Response:
53
+ """
54
+ Asynchronously retrieve a document reference by its identification.
55
+
56
+ Parameters
57
+ ----------
58
+ identification : str
59
+ The unique identifier for the document reference.
60
+
61
+ Returns
62
+ -------
63
+ Response
64
+ The response object containing the document reference details.
65
+ """
66
+
67
+ async def get_all_documents_status(
68
+ self,
69
+ ) -> list[DocumentStatus]:
70
+ """
71
+ Asynchronously retrieves the status of all documents.
72
+
73
+ Returns
74
+ -------
75
+ list[DocumentStatus]
76
+ A list containing the status of all documents.
77
+ """
78
+
79
+ async def upload_source(
80
+ self,
81
+ source_type: StrictStr,
82
+ name: StrictStr,
83
+ key_value_pair: List[KeyValuePair],
84
+ ) -> None:
85
+ """
86
+ Asynchronously uploads user selected source.
87
+
88
+ Parameters
89
+ ----------
90
+ source_type : str
91
+ The type of the source. Is used by the extractor service to determine the correct extractor to use.
92
+ name : str
93
+ Display name of the source.
94
+ key_value_pair : list[KeyValuePair]
95
+ List of KeyValuePair with parameters used for the extraction.
96
+
97
+ Returns
98
+ -------
99
+ None
100
+ """
101
+
102
+ async def upload_file(
103
+ self,
104
+ file: UploadFile,
105
+ request: Request,
106
+ ) -> None:
107
+ """
108
+ Asynchronously uploads user-selected documents.
109
+
110
+ Parameters
111
+ ----------
112
+ file : UploadFile
113
+ The file object containing the source documents to be uploaded.
114
+ request : Request
115
+ The request object containing metadata about the upload request.
116
+
117
+ Returns
118
+ -------
119
+ None
120
+ """
@@ -0,0 +1,25 @@
1
+ """Module for the Chunker abstract base class."""
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+ from langchain_core.documents import Document
6
+
7
+
8
+ class Chunker(ABC):
9
+ """Abstract base class for chunking documents into smaller parts."""
10
+
11
+ @abstractmethod
12
+ def chunk(self, documents: Document) -> list[Document]:
13
+ """
14
+ Chunk the given documents into smaller parts.
15
+
16
+ Parameters
17
+ ----------
18
+ documents : Document
19
+ The documents to be chunked.
20
+
21
+ Returns
22
+ -------
23
+ list of Document
24
+ A list of smaller parts obtained by chunking the documents.
25
+ """