admin-api-lib 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. admin_api_lib/__init__.py +0 -0
  2. admin_api_lib/api_endpoints/document_deleter.py +24 -0
  3. admin_api_lib/api_endpoints/document_reference_retriever.py +25 -0
  4. admin_api_lib/api_endpoints/documents_status_retriever.py +20 -0
  5. admin_api_lib/api_endpoints/file_uploader.py +31 -0
  6. admin_api_lib/api_endpoints/source_uploader.py +40 -0
  7. admin_api_lib/api_endpoints/uploader_base.py +30 -0
  8. admin_api_lib/apis/__init__.py +0 -0
  9. admin_api_lib/apis/admin_api.py +197 -0
  10. admin_api_lib/apis/admin_api_base.py +120 -0
  11. admin_api_lib/chunker/__init__.py +0 -0
  12. admin_api_lib/chunker/chunker.py +25 -0
  13. admin_api_lib/dependency_container.py +236 -0
  14. admin_api_lib/extractor_api_client/__init__.py +0 -0
  15. admin_api_lib/extractor_api_client/openapi_client/__init__.py +38 -0
  16. admin_api_lib/extractor_api_client/openapi_client/api/__init__.py +4 -0
  17. admin_api_lib/extractor_api_client/openapi_client/api/extractor_api.py +516 -0
  18. admin_api_lib/extractor_api_client/openapi_client/api_client.py +695 -0
  19. admin_api_lib/extractor_api_client/openapi_client/api_response.py +20 -0
  20. admin_api_lib/extractor_api_client/openapi_client/configuration.py +460 -0
  21. admin_api_lib/extractor_api_client/openapi_client/exceptions.py +197 -0
  22. admin_api_lib/extractor_api_client/openapi_client/models/__init__.py +21 -0
  23. admin_api_lib/extractor_api_client/openapi_client/models/content_type.py +34 -0
  24. admin_api_lib/extractor_api_client/openapi_client/models/extraction_parameters.py +103 -0
  25. admin_api_lib/extractor_api_client/openapi_client/models/extraction_request.py +82 -0
  26. admin_api_lib/extractor_api_client/openapi_client/models/information_piece.py +104 -0
  27. admin_api_lib/extractor_api_client/openapi_client/models/key_value_pair.py +92 -0
  28. admin_api_lib/extractor_api_client/openapi_client/rest.py +209 -0
  29. admin_api_lib/extractor_api_client/openapi_client/test/__init__.py +0 -0
  30. admin_api_lib/extractor_api_client/openapi_client/test/test_content_type.py +35 -0
  31. admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_parameters.py +59 -0
  32. admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_request.py +56 -0
  33. admin_api_lib/extractor_api_client/openapi_client/test/test_extractor_api.py +39 -0
  34. admin_api_lib/extractor_api_client/openapi_client/test/test_information_piece.py +62 -0
  35. admin_api_lib/extractor_api_client/openapi_client/test/test_key_value_pair.py +54 -0
  36. admin_api_lib/file_services/file_service.py +77 -0
  37. admin_api_lib/impl/__init__.py +0 -0
  38. admin_api_lib/impl/admin_api.py +167 -0
  39. admin_api_lib/impl/api_endpoints/default_document_deleter.py +84 -0
  40. admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py +72 -0
  41. admin_api_lib/impl/api_endpoints/default_documents_status_retriever.py +41 -0
  42. admin_api_lib/impl/api_endpoints/default_file_uploader.py +234 -0
  43. admin_api_lib/impl/api_endpoints/default_source_uploader.py +202 -0
  44. admin_api_lib/impl/chunker/__init__.py +0 -0
  45. admin_api_lib/impl/chunker/chunker_type.py +11 -0
  46. admin_api_lib/impl/chunker/semantic_text_chunker.py +252 -0
  47. admin_api_lib/impl/chunker/text_chunker.py +33 -0
  48. admin_api_lib/impl/file_services/__init__.py +0 -0
  49. admin_api_lib/impl/file_services/s3_service.py +130 -0
  50. admin_api_lib/impl/information_enhancer/__init__.py +0 -0
  51. admin_api_lib/impl/information_enhancer/general_enhancer.py +52 -0
  52. admin_api_lib/impl/information_enhancer/page_summary_enhancer.py +62 -0
  53. admin_api_lib/impl/information_enhancer/summary_enhancer.py +74 -0
  54. admin_api_lib/impl/key_db/__init__.py +0 -0
  55. admin_api_lib/impl/key_db/file_status_key_value_store.py +111 -0
  56. admin_api_lib/impl/mapper/informationpiece2document.py +108 -0
  57. admin_api_lib/impl/settings/__init__.py +0 -0
  58. admin_api_lib/impl/settings/chunker_class_type_settings.py +18 -0
  59. admin_api_lib/impl/settings/chunker_settings.py +29 -0
  60. admin_api_lib/impl/settings/document_extractor_settings.py +21 -0
  61. admin_api_lib/impl/settings/key_value_settings.py +26 -0
  62. admin_api_lib/impl/settings/rag_api_settings.py +21 -0
  63. admin_api_lib/impl/settings/s3_settings.py +31 -0
  64. admin_api_lib/impl/settings/source_uploader_settings.py +23 -0
  65. admin_api_lib/impl/settings/summarizer_settings.py +86 -0
  66. admin_api_lib/impl/summarizer/__init__.py +0 -0
  67. admin_api_lib/impl/summarizer/langchain_summarizer.py +117 -0
  68. admin_api_lib/information_enhancer/__init__.py +0 -0
  69. admin_api_lib/information_enhancer/information_enhancer.py +34 -0
  70. admin_api_lib/main.py +54 -0
  71. admin_api_lib/models/__init__.py +0 -0
  72. admin_api_lib/models/document_status.py +86 -0
  73. admin_api_lib/models/extra_models.py +9 -0
  74. admin_api_lib/models/http_validation_error.py +105 -0
  75. admin_api_lib/models/key_value_pair.py +85 -0
  76. admin_api_lib/models/status.py +44 -0
  77. admin_api_lib/models/validation_error.py +104 -0
  78. admin_api_lib/models/validation_error_loc_inner.py +114 -0
  79. admin_api_lib/prompt_templates/__init__.py +0 -0
  80. admin_api_lib/prompt_templates/summarize_prompt.py +14 -0
  81. admin_api_lib/rag_backend_client/__init__.py +0 -0
  82. admin_api_lib/rag_backend_client/openapi_client/__init__.py +60 -0
  83. admin_api_lib/rag_backend_client/openapi_client/api/__init__.py +4 -0
  84. admin_api_lib/rag_backend_client/openapi_client/api/rag_api.py +968 -0
  85. admin_api_lib/rag_backend_client/openapi_client/api_client.py +698 -0
  86. admin_api_lib/rag_backend_client/openapi_client/api_response.py +22 -0
  87. admin_api_lib/rag_backend_client/openapi_client/configuration.py +460 -0
  88. admin_api_lib/rag_backend_client/openapi_client/exceptions.py +197 -0
  89. admin_api_lib/rag_backend_client/openapi_client/models/__init__.py +41 -0
  90. admin_api_lib/rag_backend_client/openapi_client/models/chat_history.py +99 -0
  91. admin_api_lib/rag_backend_client/openapi_client/models/chat_history_message.py +83 -0
  92. admin_api_lib/rag_backend_client/openapi_client/models/chat_request.py +93 -0
  93. admin_api_lib/rag_backend_client/openapi_client/models/chat_response.py +103 -0
  94. admin_api_lib/rag_backend_client/openapi_client/models/chat_role.py +35 -0
  95. admin_api_lib/rag_backend_client/openapi_client/models/content_type.py +37 -0
  96. admin_api_lib/rag_backend_client/openapi_client/models/delete_request.py +99 -0
  97. admin_api_lib/rag_backend_client/openapi_client/models/information_piece.py +110 -0
  98. admin_api_lib/rag_backend_client/openapi_client/models/key_value_pair.py +83 -0
  99. admin_api_lib/rag_backend_client/openapi_client/rest.py +209 -0
  100. admin_api_lib/summarizer/__init__.py +0 -0
  101. admin_api_lib/summarizer/summarizer.py +33 -0
  102. admin_api_lib/utils/__init__.py +0 -0
  103. admin_api_lib/utils/utils.py +32 -0
  104. admin_api_lib-3.2.0.dist-info/METADATA +24 -0
  105. admin_api_lib-3.2.0.dist-info/RECORD +106 -0
  106. admin_api_lib-3.2.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,59 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ extractor-api-lib
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: 1.0.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from admin_api_lib.extractor_api_client.openapi_client.models.extraction_parameters import ExtractionParameters
18
+
19
+
20
+ class TestExtractionParameters(unittest.TestCase):
21
+ """ExtractionParameters unit test stubs"""
22
+
23
+ def setUp(self):
24
+ pass
25
+
26
+ def tearDown(self):
27
+ pass
28
+
29
+ def make_instance(self, include_optional) -> ExtractionParameters:
30
+ """Test ExtractionParameters
31
+ include_optional is a boolean, when False only required
32
+ params are included, when True both required and
33
+ optional params are included"""
34
+ # uncomment below to create an instance of `ExtractionParameters`
35
+ """
36
+ model = ExtractionParameters()
37
+ if include_optional:
38
+ return ExtractionParameters(
39
+ document_name = '',
40
+ confluence_kwargs = [
41
+ {"value":"value","key":"key"}
42
+ ],
43
+ type = ''
44
+ )
45
+ else:
46
+ return ExtractionParameters(
47
+ document_name = '',
48
+ type = '',
49
+ )
50
+ """
51
+
52
+ def testExtractionParameters(self):
53
+ """Test ExtractionParameters"""
54
+ # inst_req_only = self.make_instance(include_optional=False)
55
+ # inst_req_and_optional = self.make_instance(include_optional=True)
56
+
57
+
58
+ if __name__ == "__main__":
59
+ unittest.main()
@@ -0,0 +1,56 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ extractor-api-lib
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: 1.0.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from admin_api_lib.extractor_api_client.openapi_client.models.extraction_request import ExtractionRequest
18
+
19
+
20
+ class TestExtractionRequest(unittest.TestCase):
21
+ """ExtractionRequest unit test stubs"""
22
+
23
+ def setUp(self):
24
+ pass
25
+
26
+ def tearDown(self):
27
+ pass
28
+
29
+ def make_instance(self, include_optional) -> ExtractionRequest:
30
+ """Test ExtractionRequest
31
+ include_optional is a boolean, when False only required
32
+ params are included, when True both required and
33
+ optional params are included"""
34
+ # uncomment below to create an instance of `ExtractionRequest`
35
+ """
36
+ model = ExtractionRequest()
37
+ if include_optional:
38
+ return ExtractionRequest(
39
+ path_on_s3 = '',
40
+ document_name = ''
41
+ )
42
+ else:
43
+ return ExtractionRequest(
44
+ path_on_s3 = '',
45
+ document_name = '',
46
+ )
47
+ """
48
+
49
+ def testExtractionRequest(self):
50
+ """Test ExtractionRequest"""
51
+ # inst_req_only = self.make_instance(include_optional=False)
52
+ # inst_req_and_optional = self.make_instance(include_optional=True)
53
+
54
+
55
+ if __name__ == "__main__":
56
+ unittest.main()
@@ -0,0 +1,39 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ extractor-api-lib
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: 1.0.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from admin_api_lib.extractor_api_client.openapi_client.api.extractor_api import ExtractorApi
18
+
19
+
20
+ class TestExtractorApi(unittest.TestCase):
21
+ """ExtractorApi unit test stubs"""
22
+
23
+ def setUp(self) -> None:
24
+ self.api = ExtractorApi()
25
+
26
+ def tearDown(self) -> None:
27
+ pass
28
+
29
+ def test_extract_from_file_post(self) -> None:
30
+ """Test case for extract_from_file_post"""
31
+ pass
32
+
33
+ def test_extract_from_source(self) -> None:
34
+ """Test case for extract_from_source"""
35
+ pass
36
+
37
+
38
+ if __name__ == "__main__":
39
+ unittest.main()
@@ -0,0 +1,62 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ extractor-api-lib
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: 1.0.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from admin_api_lib.extractor_api_client.openapi_client.models.information_piece import InformationPiece
18
+
19
+
20
+ class TestInformationPiece(unittest.TestCase):
21
+ """InformationPiece unit test stubs"""
22
+
23
+ def setUp(self):
24
+ pass
25
+
26
+ def tearDown(self):
27
+ pass
28
+
29
+ def make_instance(self, include_optional) -> InformationPiece:
30
+ """Test InformationPiece
31
+ include_optional is a boolean, when False only required
32
+ params are included, when True both required and
33
+ optional params are included"""
34
+ # uncomment below to create an instance of `InformationPiece`
35
+ """
36
+ model = InformationPiece()
37
+ if include_optional:
38
+ return InformationPiece(
39
+ metadata = [
40
+ {"value":"value","key":"key"}
41
+ ],
42
+ page_content = '',
43
+ type = 'IMAGE'
44
+ )
45
+ else:
46
+ return InformationPiece(
47
+ metadata = [
48
+ {"value":"value","key":"key"}
49
+ ],
50
+ page_content = '',
51
+ type = 'IMAGE',
52
+ )
53
+ """
54
+
55
+ def testInformationPiece(self):
56
+ """Test InformationPiece"""
57
+ # inst_req_only = self.make_instance(include_optional=False)
58
+ # inst_req_and_optional = self.make_instance(include_optional=True)
59
+
60
+
61
+ if __name__ == "__main__":
62
+ unittest.main()
@@ -0,0 +1,54 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ extractor-api-lib
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: 1.0.0
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ import unittest
16
+
17
+ from admin_api_lib.extractor_api_client.openapi_client.models.key_value_pair import KeyValuePair
18
+
19
+
20
+ class TestKeyValuePair(unittest.TestCase):
21
+ """KeyValuePair unit test stubs"""
22
+
23
+ def setUp(self):
24
+ pass
25
+
26
+ def tearDown(self):
27
+ pass
28
+
29
+ def make_instance(self, include_optional) -> KeyValuePair:
30
+ """Test KeyValuePair
31
+ include_optional is a boolean, when False only required
32
+ params are included, when True both required and
33
+ optional params are included"""
34
+ # uncomment below to create an instance of `KeyValuePair`
35
+ """
36
+ model = KeyValuePair()
37
+ if include_optional:
38
+ return KeyValuePair(
39
+ key = None,
40
+ value = None
41
+ )
42
+ else:
43
+ return KeyValuePair(
44
+ )
45
+ """
46
+
47
+ def testKeyValuePair(self):
48
+ """Test KeyValuePair"""
49
+ # inst_req_only = self.make_instance(include_optional=False)
50
+ # inst_req_and_optional = self.make_instance(include_optional=True)
51
+
52
+
53
+ if __name__ == "__main__":
54
+ unittest.main()
@@ -0,0 +1,77 @@
1
+ """Abstract class for dealing with I/O."""
2
+
3
+ import abc
4
+ from abc import ABC
5
+ from pathlib import Path
6
+ from typing import BinaryIO
7
+
8
+
9
+ class FileService(ABC):
10
+ """Abstract class for dealing with I/O."""
11
+
12
+ @abc.abstractmethod
13
+ def download_folder(self, source: str, target: Path) -> None:
14
+ """Download the remote folder on "source" to the local "target" directory.
15
+
16
+ Parameters
17
+ ----------
18
+ source: str
19
+ Path to the remote folder.
20
+ target: Path
21
+ Download destination path.
22
+ """
23
+
24
+ @abc.abstractmethod
25
+ def download_file(self, source: str, target_file: BinaryIO) -> None:
26
+ """Read a single remote file "source" into the local "target_file" file-like object.
27
+
28
+ Example usage
29
+ =============
30
+ ```
31
+ s3_settings: S3Settings = get_s3_settings()
32
+ s3_service = S3Service(endpoint="endpoint", username="username", password="password", bucket_name="bucket")
33
+
34
+ with tempfile.SpooledTemporaryFile(max_size=self._iot_forecast_settings.max_model_size) as temp_file:
35
+ s3_service.download_file("remote_file", temp_file)
36
+ # do stuff with temp_file
37
+ ```
38
+
39
+ Parameters
40
+ ----------
41
+ source: str
42
+ Path to the remote folder.
43
+ target_file: BinaryIO
44
+ File-like object to save the data to.
45
+ """
46
+
47
+ @abc.abstractmethod
48
+ def upload_file(self, file_path: str, file_name: str) -> None:
49
+ """Upload a local file to the Fileservice.
50
+
51
+ Parameters
52
+ ----------
53
+ file_path : str
54
+ The path to the local file to be uploaded.
55
+ file_name : str
56
+ The target path in the file storage where the file will be stored.
57
+ """
58
+
59
+ @abc.abstractmethod
60
+ def get_all_sorted_file_names(self) -> list[str]:
61
+ """Retrieve all file names stored in the file storage.
62
+
63
+ Returns
64
+ -------
65
+ list[str]
66
+ A list of file names stored in the file storage.
67
+ """
68
+
69
+ @abc.abstractmethod
70
+ def delete_file(self, file_name: str) -> None:
71
+ """Delete a file from the file storage.
72
+
73
+ Parameters
74
+ ----------
75
+ file_name : str
76
+ The name of the file to be deleted from the file storage.
77
+ """
File without changes
@@ -0,0 +1,167 @@
1
+ """Module containing the implementation of the Admin API."""
2
+
3
+ import logging
4
+
5
+
6
+ from pydantic import StrictStr
7
+ from dependency_injector.wiring import Provide, inject
8
+ from fastapi import Depends, Request, Response, UploadFile
9
+
10
+ from admin_api_lib.api_endpoints.file_uploader import FileUploader
11
+ from admin_api_lib.api_endpoints.source_uploader import SourceUploader
12
+ from admin_api_lib.models.key_value_pair import KeyValuePair
13
+ from admin_api_lib.api_endpoints.document_deleter import DocumentDeleter
14
+ from admin_api_lib.api_endpoints.document_reference_retriever import (
15
+ DocumentReferenceRetriever,
16
+ )
17
+ from admin_api_lib.api_endpoints.documents_status_retriever import (
18
+ DocumentsStatusRetriever,
19
+ )
20
+ from admin_api_lib.apis.admin_api_base import BaseAdminApi
21
+ from admin_api_lib.dependency_container import DependencyContainer
22
+ from admin_api_lib.models.document_status import DocumentStatus
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class AdminApi(BaseAdminApi):
28
+ """The Class for the Admin API.
29
+
30
+ AdminApi class provides various asynchronous methods to interact with documents, including deleting,
31
+ retrieving status, loading from Confluence, retrieving by reference ID, and uploading documents.
32
+ """
33
+
34
+ def __init__(self):
35
+ """
36
+ Initialize the AdminAPI class.
37
+
38
+ This constructor calls the parent class's initializer and sets up
39
+ an empty list to hold background threads.
40
+ """
41
+ super().__init__()
42
+ self._background_threads = []
43
+
44
+ @inject
45
+ async def delete_document(
46
+ self,
47
+ identification: str,
48
+ document_deleter: DocumentDeleter = Depends(Provide[DependencyContainer.document_deleter]),
49
+ ) -> None:
50
+ """
51
+ Delete a document asynchronously.
52
+
53
+ Parameters
54
+ ----------
55
+ identification : str
56
+ The unique identifier of the document to be deleted.
57
+ document_deleter : DocumentDeleter
58
+ The document deleter instance, injected by dependency injection
59
+ (default is Depends(Provide[DependencyContainer.document_deleter])).
60
+
61
+ Returns
62
+ -------
63
+ None
64
+ """
65
+ await document_deleter.adelete_document(identification)
66
+
67
+ @inject
68
+ async def get_all_documents_status(
69
+ self,
70
+ document_status_retriever: DocumentsStatusRetriever = Depends(
71
+ Provide[DependencyContainer.documents_status_retriever]
72
+ ),
73
+ ) -> list[DocumentStatus]:
74
+ """
75
+ Asynchronously retrieve the status of all documents.
76
+
77
+ Parameters
78
+ ----------
79
+ document_status_retriever : DocumentsStatusRetriever
80
+ An instance of DocumentsStatusRetriever
81
+ (default is Depends(Provide[DependencyContainer.documents_status_retriever])).
82
+
83
+ Returns
84
+ -------
85
+ list[DocumentStatus]
86
+ A list containing the status of all documents.
87
+ """
88
+ return await document_status_retriever.aget_all_documents_status()
89
+
90
+ @inject
91
+ async def upload_source(
92
+ self,
93
+ source_type: StrictStr,
94
+ name: StrictStr,
95
+ kwargs: list[KeyValuePair],
96
+ source_uploader: SourceUploader = Depends(Provide[DependencyContainer.source_uploader]),
97
+ ) -> None:
98
+ """
99
+ Asynchronously uploads user-selected source documents.
100
+
101
+ Parameters
102
+ ----------
103
+ source_type : StrictStr
104
+ The type of the source document to be uploaded.
105
+ name : StrictStr
106
+ The name of the source document to be uploaded.
107
+ kwargs : list[KeyValuePair]
108
+ Additional parameters required for the extractor.
109
+ source_uploader : SourceUploader
110
+ An instance of SourceUploader to handle the upload process.
111
+
112
+ Returns
113
+ -------
114
+ None
115
+ """
116
+ await source_uploader.upload_source(source_type, name, kwargs)
117
+
118
+ @inject
119
+ async def upload_file(
120
+ self,
121
+ file: UploadFile,
122
+ request: Request,
123
+ file_uploader: FileUploader = Depends(Provide[DependencyContainer.file_uploader]),
124
+ ) -> None:
125
+ """
126
+ Asynchronously uploads a file to the server.
127
+
128
+ Parameters
129
+ ----------
130
+ file : UploadFile
131
+ The file object to be uploaded.
132
+ request : Request
133
+ The HTTP request object containing metadata about the upload request.
134
+ file_uploader : FileUploader, optional
135
+ An instance of FileUploader to handle the upload process.
136
+
137
+ Returns
138
+ -------
139
+ None
140
+ """
141
+ await file_uploader.upload_file(str(request.base_url), file)
142
+
143
+ @inject
144
+ async def document_reference(
145
+ self,
146
+ identification: str,
147
+ document_reference_retriever: DocumentReferenceRetriever = Depends(
148
+ Provide[DependencyContainer.document_reference_retriever]
149
+ ),
150
+ ) -> Response:
151
+ """
152
+ Retrieve the document with the given identification.
153
+
154
+ Parameters
155
+ ----------
156
+ identification : str
157
+ The identification of the document.
158
+ document_reference_retriever : DocumentReferenceRetriever, optional
159
+ The service to retrieve the document reference.
160
+ Defaults to Depends(Provide[DependencyContainer.document_reference_retriever]).
161
+
162
+ Returns
163
+ -------
164
+ Response
165
+ The document in binary form.
166
+ """
167
+ return await document_reference_retriever.adocument_reference_id_get(identification)
@@ -0,0 +1,84 @@
1
+ """Module for the DefaultDocumentDeleter class."""
2
+
3
+ import json
4
+ import logging
5
+
6
+ from fastapi import HTTPException
7
+
8
+ from admin_api_lib.api_endpoints.document_deleter import DocumentDeleter
9
+ from admin_api_lib.file_services.file_service import FileService
10
+ from admin_api_lib.impl.key_db.file_status_key_value_store import (
11
+ FileStatusKeyValueStore,
12
+ )
13
+ from admin_api_lib.rag_backend_client.openapi_client.api.rag_api import RagApi
14
+ from admin_api_lib.rag_backend_client.openapi_client.models.delete_request import (
15
+ DeleteRequest,
16
+ )
17
+ from admin_api_lib.rag_backend_client.openapi_client.models.key_value_pair import (
18
+ KeyValuePair,
19
+ )
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class DefaultDocumentDeleter(DocumentDeleter):
25
+ """A class used to delete documents from file storage and vector database."""
26
+
27
+ def __init__(self, file_service: FileService, rag_api: RagApi, key_value_store: FileStatusKeyValueStore):
28
+ """
29
+ Initialize the DefaultDocumentDeleter.
30
+
31
+ Parameters
32
+ ----------
33
+ file_service : FileService
34
+ The service responsible for file operations with s3 storage.
35
+ rag_api : RagApi
36
+ The API client for interacting with the RAG backend system.
37
+ key_value_store : FileStatusKeyValueStore
38
+ The key-value store to store file names and the corresponding file statuses.
39
+ """
40
+ self._file_service = file_service
41
+ self._rag_api = rag_api
42
+ self._key_value_store = key_value_store
43
+
44
+ async def adelete_document(self, identification: str, remove_from_key_value_store: bool = True) -> None:
45
+ """
46
+ Asynchronously delete a document identified by the given identification string.
47
+
48
+ This method attempts to delete the document from both the S3 storage and the vector database.
49
+ If any errors occur during the deletion process, an HTTPException is raised with the error messages.
50
+ If the source document is from a service like Confluence, no document on the S3 storage exists, and nothing
51
+ can be deleted from the S3 storage. However, this does not prevent the deletion of the document from the
52
+ vector database. If the document does not exist on the S3 storage, the deletion process will continue.
53
+
54
+ Parameters
55
+ ----------
56
+ identification : str
57
+ The unique identifier of the document to be deleted.
58
+ remove_from_key_value_store : bool, optional
59
+ If True, the document will also be removed from the key-value store (default is True).
60
+
61
+ Raises
62
+ ------
63
+ HTTPException
64
+ If any errors occur during the deletion process, an HTTPException is raised with a 404 status code
65
+ and the error messages.
66
+ """
67
+ error_messages = ""
68
+ # Delete the document from file service and vector database
69
+ logger.debug("Deleting existing document: %s", identification)
70
+ try:
71
+ if remove_from_key_value_store:
72
+ self._key_value_store.remove(identification)
73
+ self._file_service.delete_file(identification)
74
+ except Exception as e:
75
+ error_messages += f"Error while deleting {identification} from file storage\n {str(e)}\n"
76
+ try:
77
+ self._rag_api.remove_information_piece(
78
+ DeleteRequest(metadata=[KeyValuePair(key="document", value=json.dumps(identification))])
79
+ )
80
+ logger.info("Deleted information pieces belonging to %s from rag.", identification)
81
+ except Exception as e:
82
+ error_messages += f"Error while deleting {identification} from vector db\n{str(e)}"
83
+ if error_messages:
84
+ raise HTTPException(404, error_messages)
@@ -0,0 +1,72 @@
1
+ """Module for the DefaultDocumentReferenceRetriever class."""
2
+
3
+ import io
4
+ import logging
5
+ import traceback
6
+
7
+ from fastapi import HTTPException, Response, status
8
+
9
+ from admin_api_lib.api_endpoints.document_reference_retriever import (
10
+ DocumentReferenceRetriever,
11
+ )
12
+ from admin_api_lib.file_services.file_service import FileService
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class DefaultDocumentReferenceRetriever(DocumentReferenceRetriever):
18
+ """A class to retrieve document references using a file service."""
19
+
20
+ def __init__(self, file_service: FileService):
21
+ """
22
+ Initialize the DefaultDocumentReferenceRetriever.
23
+
24
+ Parameters
25
+ ----------
26
+ file_service : FileService
27
+ An instance of FileService used to handle file operations.
28
+ """
29
+ self._file_service = file_service
30
+
31
+ async def adocument_reference_id_get(self, identification: str) -> Response:
32
+ """
33
+ Retrieve the document with the given identification asynchronously.
34
+
35
+ Parameters
36
+ ----------
37
+ identification : str
38
+ The identification string of the document.
39
+
40
+ Returns
41
+ -------
42
+ Response
43
+ The document in binary form wrapped in a FastAPI Response object.
44
+
45
+ Raises
46
+ ------
47
+ HTTPException
48
+ If the document with the given identification is not found or any other value error occurs.
49
+ """
50
+ try:
51
+ logger.debug("START retrieving document with id: %s", identification)
52
+ document_buffer = io.BytesIO()
53
+ try:
54
+ self._file_service.download_file(identification, document_buffer)
55
+ logger.debug("DONE retrieving document with id: %s", identification)
56
+ document_data = document_buffer.getvalue()
57
+ except Exception as e:
58
+ logger.error(
59
+ "Error retrieving document with id: %s. Error: %s %s", identification, e, traceback.format_exc()
60
+ )
61
+ raise ValueError(f"Document with id '{identification}' not found.")
62
+ finally:
63
+ document_buffer.close()
64
+ except ValueError as e:
65
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e))
66
+
67
+ media_type = "application/pdf" if identification.endswith(".pdf") else "application/octet-stream"
68
+ headers = {
69
+ "Content-Disposition": f'inline; filename="{identification.encode("utf-8").decode()}"',
70
+ "Content-Type": media_type,
71
+ }
72
+ return Response(document_data, status_code=200, headers=headers, media_type=media_type)