admin-api-lib 3.4.0__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- admin_api_lib/api_endpoints/document_deleter.py +8 -1
- admin_api_lib/dependency_container.py +1 -1
- admin_api_lib/file_services/file_service.py +3 -75
- admin_api_lib/impl/api_endpoints/default_document_deleter.py +34 -7
- admin_api_lib/impl/api_endpoints/default_file_uploader.py +5 -1
- admin_api_lib/impl/api_endpoints/default_source_uploader.py +10 -2
- admin_api_lib/impl/chunker/text_chunker.py +1 -1
- admin_api_lib/impl/file_services/s3_service.py +3 -128
- admin_api_lib/impl/information_enhancer/page_summary_enhancer.py +72 -21
- admin_api_lib/impl/key_db/file_status_key_value_store.py +48 -2
- admin_api_lib/impl/settings/key_value_settings.py +24 -0
- admin_api_lib/impl/settings/s3_settings.py +3 -29
- admin_api_lib/impl/summarizer/langchain_summarizer.py +49 -4
- {admin_api_lib-3.4.0.dist-info → admin_api_lib-4.1.0.dist-info}/METADATA +10 -8
- {admin_api_lib-3.4.0.dist-info → admin_api_lib-4.1.0.dist-info}/RECORD +16 -16
- {admin_api_lib-3.4.0.dist-info → admin_api_lib-4.1.0.dist-info}/WHEEL +1 -1
|
@@ -7,7 +7,12 @@ class DocumentDeleter(ABC):
|
|
|
7
7
|
"""Abstract base class for document deletion endpoint."""
|
|
8
8
|
|
|
9
9
|
@abstractmethod
|
|
10
|
-
async def adelete_document(
|
|
10
|
+
async def adelete_document(
|
|
11
|
+
self,
|
|
12
|
+
identification: str,
|
|
13
|
+
remove_from_key_value_store: bool = True,
|
|
14
|
+
remove_from_storage: bool = True,
|
|
15
|
+
) -> None:
|
|
11
16
|
"""
|
|
12
17
|
Delete a document by its identification asynchronously.
|
|
13
18
|
|
|
@@ -17,6 +22,8 @@ class DocumentDeleter(ABC):
|
|
|
17
22
|
The unique identifier of the document to be deleted.
|
|
18
23
|
remove_from_key_value_store : bool, optional
|
|
19
24
|
If True, the document will also be removed from the key-value store (default is True).
|
|
25
|
+
remove_from_storage : bool, optional
|
|
26
|
+
If True, the document will also be removed from the file storage (default is True).
|
|
20
27
|
|
|
21
28
|
Returns
|
|
22
29
|
-------
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
from admin_api_lib.impl.api_endpoints.default_file_uploader import DefaultFileUploader
|
|
4
4
|
from dependency_injector.containers import DeclarativeContainer
|
|
5
5
|
from dependency_injector.providers import Configuration, List, Selector, Singleton
|
|
6
|
-
from
|
|
6
|
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
7
7
|
from langchain_community.embeddings import OllamaEmbeddings
|
|
8
8
|
from langfuse import Langfuse
|
|
9
9
|
|
|
@@ -1,77 +1,5 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Re-export core file service interface."""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from abc import ABC
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import BinaryIO
|
|
3
|
+
from rag_core_lib.file_services.file_service import FileService
|
|
7
4
|
|
|
8
|
-
|
|
9
|
-
class FileService(ABC):
|
|
10
|
-
"""Abstract class for dealing with I/O."""
|
|
11
|
-
|
|
12
|
-
@abc.abstractmethod
|
|
13
|
-
def download_folder(self, source: str, target: Path) -> None:
|
|
14
|
-
"""Download the remote folder on "source" to the local "target" directory.
|
|
15
|
-
|
|
16
|
-
Parameters
|
|
17
|
-
----------
|
|
18
|
-
source: str
|
|
19
|
-
Path to the remote folder.
|
|
20
|
-
target: Path
|
|
21
|
-
Download destination path.
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
@abc.abstractmethod
|
|
25
|
-
def download_file(self, source: str, target_file: BinaryIO) -> None:
|
|
26
|
-
"""Read a single remote file "source" into the local "target_file" file-like object.
|
|
27
|
-
|
|
28
|
-
Example usage
|
|
29
|
-
=============
|
|
30
|
-
```
|
|
31
|
-
s3_settings: S3Settings = get_s3_settings()
|
|
32
|
-
s3_service = S3Service(endpoint="endpoint", username="username", password="password", bucket_name="bucket")
|
|
33
|
-
|
|
34
|
-
with tempfile.SpooledTemporaryFile(max_size=self._iot_forecast_settings.max_model_size) as temp_file:
|
|
35
|
-
s3_service.download_file("remote_file", temp_file)
|
|
36
|
-
# do stuff with temp_file
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
Parameters
|
|
40
|
-
----------
|
|
41
|
-
source: str
|
|
42
|
-
Path to the remote folder.
|
|
43
|
-
target_file: BinaryIO
|
|
44
|
-
File-like object to save the data to.
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
@abc.abstractmethod
|
|
48
|
-
def upload_file(self, file_path: str, file_name: str) -> None:
|
|
49
|
-
"""Upload a local file to the Fileservice.
|
|
50
|
-
|
|
51
|
-
Parameters
|
|
52
|
-
----------
|
|
53
|
-
file_path : str
|
|
54
|
-
The path to the local file to be uploaded.
|
|
55
|
-
file_name : str
|
|
56
|
-
The target path in the file storage where the file will be stored.
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
@abc.abstractmethod
|
|
60
|
-
def get_all_sorted_file_names(self) -> list[str]:
|
|
61
|
-
"""Retrieve all file names stored in the file storage.
|
|
62
|
-
|
|
63
|
-
Returns
|
|
64
|
-
-------
|
|
65
|
-
list[str]
|
|
66
|
-
A list of file names stored in the file storage.
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
@abc.abstractmethod
|
|
70
|
-
def delete_file(self, file_name: str) -> None:
|
|
71
|
-
"""Delete a file from the file storage.
|
|
72
|
-
|
|
73
|
-
Parameters
|
|
74
|
-
----------
|
|
75
|
-
file_name : str
|
|
76
|
-
The name of the file to be deleted from the file storage.
|
|
77
|
-
"""
|
|
5
|
+
__all__ = ["FileService"]
|
|
@@ -41,7 +41,21 @@ class DefaultDocumentDeleter(DocumentDeleter):
|
|
|
41
41
|
self._rag_api = rag_api
|
|
42
42
|
self._key_value_store = key_value_store
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
@staticmethod
|
|
45
|
+
def _storage_key_from_identification(identification: str) -> str | None:
|
|
46
|
+
if identification.startswith("file:"):
|
|
47
|
+
storage_key = identification[len("file:") :]
|
|
48
|
+
return storage_key or None
|
|
49
|
+
if ":" in identification:
|
|
50
|
+
return None
|
|
51
|
+
return identification or None
|
|
52
|
+
|
|
53
|
+
async def adelete_document(
|
|
54
|
+
self,
|
|
55
|
+
identification: str,
|
|
56
|
+
remove_from_key_value_store: bool = True,
|
|
57
|
+
remove_from_storage: bool = True,
|
|
58
|
+
) -> None:
|
|
45
59
|
"""
|
|
46
60
|
Asynchronously delete a document identified by the given identification string.
|
|
47
61
|
|
|
@@ -57,6 +71,8 @@ class DefaultDocumentDeleter(DocumentDeleter):
|
|
|
57
71
|
The unique identifier of the document to be deleted.
|
|
58
72
|
remove_from_key_value_store : bool, optional
|
|
59
73
|
If True, the document will also be removed from the key-value store (default is True).
|
|
74
|
+
remove_from_storage : bool, optional
|
|
75
|
+
If True, the document will also be removed from the file storage (default is True).
|
|
60
76
|
|
|
61
77
|
Raises
|
|
62
78
|
------
|
|
@@ -67,12 +83,12 @@ class DefaultDocumentDeleter(DocumentDeleter):
|
|
|
67
83
|
error_messages = ""
|
|
68
84
|
# Delete the document from file service and vector database
|
|
69
85
|
logger.debug("Deleting existing document: %s", identification)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
86
|
+
if remove_from_key_value_store:
|
|
87
|
+
self._key_value_store.remove(identification)
|
|
88
|
+
|
|
89
|
+
if remove_from_storage:
|
|
90
|
+
error_messages = self._delete_from_storage(identification, error_messages)
|
|
91
|
+
|
|
76
92
|
try:
|
|
77
93
|
self._rag_api.remove_information_piece(
|
|
78
94
|
DeleteRequest(metadata=[KeyValuePair(key="document", value=json.dumps(identification))])
|
|
@@ -82,3 +98,14 @@ class DefaultDocumentDeleter(DocumentDeleter):
|
|
|
82
98
|
error_messages += f"Error while deleting {identification} from vector db\n{str(e)}"
|
|
83
99
|
if error_messages:
|
|
84
100
|
raise HTTPException(404, error_messages)
|
|
101
|
+
|
|
102
|
+
def _delete_from_storage(self, identification: str, error_messages: str) -> str:
|
|
103
|
+
try:
|
|
104
|
+
storage_key = self._storage_key_from_identification(identification)
|
|
105
|
+
if storage_key:
|
|
106
|
+
self._file_service.delete_file(storage_key)
|
|
107
|
+
else:
|
|
108
|
+
logger.debug("Skipping file storage deletion for non-file source: %s", identification)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
error_messages += f"Error while deleting {identification} from file storage\n {str(e)}\n"
|
|
111
|
+
return error_messages
|
|
@@ -188,7 +188,11 @@ class DefaultFileUploader(FileUploader):
|
|
|
188
188
|
# Replace old document
|
|
189
189
|
# deletion is allowed to fail
|
|
190
190
|
with suppress(Exception):
|
|
191
|
-
await self._document_deleter.adelete_document(
|
|
191
|
+
await self._document_deleter.adelete_document(
|
|
192
|
+
source_name,
|
|
193
|
+
remove_from_key_value_store=False,
|
|
194
|
+
remove_from_storage=False,
|
|
195
|
+
)
|
|
192
196
|
|
|
193
197
|
# Run blocking RAG API call in thread pool to avoid blocking event loop
|
|
194
198
|
await asyncio.to_thread(self._rag_api.upload_information_piece, rag_information_pieces)
|
|
@@ -149,7 +149,11 @@ class DefaultSourceUploader(SourceUploader):
|
|
|
149
149
|
)
|
|
150
150
|
)
|
|
151
151
|
except asyncio.TimeoutError:
|
|
152
|
-
logger.error(
|
|
152
|
+
logger.error(
|
|
153
|
+
"Upload of %s timed out after %s seconds (increase SOURCE_UPLOADER_TIMEOUT to allow longer ingestions)",
|
|
154
|
+
source_name,
|
|
155
|
+
timeout,
|
|
156
|
+
)
|
|
153
157
|
self._key_value_store.upsert(source_name, Status.ERROR)
|
|
154
158
|
except Exception:
|
|
155
159
|
logger.exception("Error while uploading %s", source_name)
|
|
@@ -193,7 +197,11 @@ class DefaultSourceUploader(SourceUploader):
|
|
|
193
197
|
rag_information_pieces.append(self._information_mapper.document2rag_information_piece(doc))
|
|
194
198
|
|
|
195
199
|
with suppress(Exception):
|
|
196
|
-
await self._document_deleter.adelete_document(
|
|
200
|
+
await self._document_deleter.adelete_document(
|
|
201
|
+
source_name,
|
|
202
|
+
remove_from_key_value_store=False,
|
|
203
|
+
remove_from_storage=False,
|
|
204
|
+
)
|
|
197
205
|
|
|
198
206
|
# Run blocking RAG API call in thread pool to avoid blocking event loop
|
|
199
207
|
await asyncio.to_thread(self._rag_api.upload_information_piece, rag_information_pieces)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Module containing the TextChunker class."""
|
|
2
2
|
|
|
3
|
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
4
3
|
from langchain_core.documents import Document
|
|
4
|
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
5
5
|
|
|
6
6
|
from admin_api_lib.chunker.chunker import Chunker
|
|
7
7
|
|
|
@@ -1,130 +1,5 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Re-export core S3 service implementation."""
|
|
2
2
|
|
|
3
|
-
import
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import BinaryIO
|
|
3
|
+
from rag_core_lib.impl.file_services.s3_service import S3Service
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
from admin_api_lib.file_services.file_service import FileService
|
|
10
|
-
from admin_api_lib.impl.settings.s3_settings import S3Settings
|
|
11
|
-
|
|
12
|
-
logger = logging.getLogger(__name__)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class S3Service(FileService):
|
|
16
|
-
"""Class to handle I/O with S3 storage."""
|
|
17
|
-
|
|
18
|
-
def __init__(self, s3_settings: S3Settings):
|
|
19
|
-
"""Class to handle I/O with S3 storage.
|
|
20
|
-
|
|
21
|
-
Parameters
|
|
22
|
-
----------
|
|
23
|
-
s3_settings: S3Settings
|
|
24
|
-
Settings for the s3. Must contain at least the endpoint, access_key_id, secret_access_key and bucket.
|
|
25
|
-
"""
|
|
26
|
-
self._s3_settings = s3_settings
|
|
27
|
-
self._s3_client = boto3.client(
|
|
28
|
-
"s3",
|
|
29
|
-
endpoint_url=s3_settings.endpoint,
|
|
30
|
-
aws_access_key_id=s3_settings.access_key_id,
|
|
31
|
-
aws_secret_access_key=s3_settings.secret_access_key,
|
|
32
|
-
aws_session_token=None,
|
|
33
|
-
config=boto3.session.Config(signature_version="s3v4"),
|
|
34
|
-
verify=False,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
def download_folder(self, source: str, target: Path) -> None:
|
|
38
|
-
"""Download the remote folder on "source" to the local "target" directory.
|
|
39
|
-
|
|
40
|
-
Parameters
|
|
41
|
-
----------
|
|
42
|
-
source: str
|
|
43
|
-
Path to the remote folder.
|
|
44
|
-
target: Path
|
|
45
|
-
Download destination path.
|
|
46
|
-
"""
|
|
47
|
-
target.mkdir(parents=True, exist_ok=True)
|
|
48
|
-
|
|
49
|
-
search_response = self._s3_client.list_objects_v2(
|
|
50
|
-
Bucket=self._s3_settings.bucket,
|
|
51
|
-
Prefix=source,
|
|
52
|
-
)
|
|
53
|
-
for found_content in search_response.get("Contents", []):
|
|
54
|
-
file_source = found_content["Key"]
|
|
55
|
-
target_path = target / file_source[len(source) :]
|
|
56
|
-
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
57
|
-
with open(target_path, "wb") as local_file:
|
|
58
|
-
self.download_file(file_source, local_file)
|
|
59
|
-
|
|
60
|
-
def download_file(self, source: str, target_file: BinaryIO) -> None:
|
|
61
|
-
"""Read a single remote file "source" into the local "target_file" file-like object.
|
|
62
|
-
|
|
63
|
-
Example usage
|
|
64
|
-
=============
|
|
65
|
-
```
|
|
66
|
-
s3_settings: S3Settings = get_s3_settings()
|
|
67
|
-
s3_service = S3Service(endpoint="endpoint", username="username", password="password", bucket_name="bucket")
|
|
68
|
-
|
|
69
|
-
with tempfile.SpooledTemporaryFile(max_size=self._iot_forecast_settings.max_model_size) as temp_file:
|
|
70
|
-
s3_service.download_file("remote_file", temp_file)
|
|
71
|
-
# do stuff with temp_file
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
Parameters
|
|
75
|
-
----------
|
|
76
|
-
source: str
|
|
77
|
-
Path to the remote folder.
|
|
78
|
-
target_file: BinaryIO
|
|
79
|
-
File-like object to save the data to.
|
|
80
|
-
"""
|
|
81
|
-
self._s3_client.download_fileobj(self._s3_settings.bucket, source, target_file)
|
|
82
|
-
|
|
83
|
-
def upload_file(self, file_path: str, file_name: str) -> None:
|
|
84
|
-
"""
|
|
85
|
-
Upload a local file to the S3 bucket.
|
|
86
|
-
|
|
87
|
-
Parameters
|
|
88
|
-
----------
|
|
89
|
-
source : Path
|
|
90
|
-
The path to the local file to upload.
|
|
91
|
-
target : str
|
|
92
|
-
The target path in the S3 bucket where the file will be stored.
|
|
93
|
-
"""
|
|
94
|
-
self._s3_client.upload_file(
|
|
95
|
-
Filename=file_path,
|
|
96
|
-
Bucket=self._s3_settings.bucket,
|
|
97
|
-
Key=file_name,
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
def get_all_sorted_file_names(self) -> list[str]:
|
|
101
|
-
"""Retrieve all file names stored in the S3 bucket.
|
|
102
|
-
|
|
103
|
-
Returns
|
|
104
|
-
-------
|
|
105
|
-
list[str]
|
|
106
|
-
A list of file names stored in the S3 bucket.
|
|
107
|
-
"""
|
|
108
|
-
file_names = []
|
|
109
|
-
|
|
110
|
-
resp = self._s3_client.list_objects_v2(Bucket=self._s3_settings.bucket)
|
|
111
|
-
if resp.get("Contents"):
|
|
112
|
-
for obj in resp["Contents"]:
|
|
113
|
-
file_names.append(obj["Key"])
|
|
114
|
-
return file_names
|
|
115
|
-
|
|
116
|
-
def delete_file(self, file_name: str) -> None:
|
|
117
|
-
"""Delete a file from the S3 bucket.
|
|
118
|
-
|
|
119
|
-
Parameters
|
|
120
|
-
----------
|
|
121
|
-
file_name : str
|
|
122
|
-
The name of the file to be deleted from the S3 bucket.
|
|
123
|
-
"""
|
|
124
|
-
try:
|
|
125
|
-
file_name = f"/{file_name}" if not file_name.startswith("/") else file_name
|
|
126
|
-
self._s3_client.delete_object(Bucket=self._s3_settings.bucket, Key=file_name)
|
|
127
|
-
logger.info("File %s successfully deleted.", file_name)
|
|
128
|
-
except Exception:
|
|
129
|
-
logger.exception("Error deleting file %s", file_name)
|
|
130
|
-
raise
|
|
5
|
+
__all__ = ["S3Service"]
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
"""Module for enhancing the summary of pages by grouping information by page and summarizing each page."""
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
import asyncio
|
|
4
4
|
from hashlib import sha256
|
|
5
5
|
from typing import Optional
|
|
6
|
+
from typing import Any
|
|
6
7
|
|
|
7
8
|
from langchain_core.documents import Document
|
|
8
9
|
from langchain_core.runnables import RunnableConfig
|
|
@@ -25,8 +26,36 @@ class PageSummaryEnhancer(SummaryEnhancer):
|
|
|
25
26
|
"""
|
|
26
27
|
|
|
27
28
|
BASE64_IMAGE_KEY = "base64_image"
|
|
29
|
+
DOCUMENT_URL_KEY = "document_url"
|
|
28
30
|
DEFAULT_PAGE_NR = 1
|
|
29
31
|
|
|
32
|
+
@staticmethod
|
|
33
|
+
def _parse_max_concurrency(config: Optional[RunnableConfig]) -> int:
|
|
34
|
+
if not config:
|
|
35
|
+
return 1
|
|
36
|
+
raw = config.get("max_concurrency")
|
|
37
|
+
if raw is None:
|
|
38
|
+
return 1
|
|
39
|
+
try:
|
|
40
|
+
return max(1, int(raw))
|
|
41
|
+
except (TypeError, ValueError):
|
|
42
|
+
return 1
|
|
43
|
+
|
|
44
|
+
def _group_key(self, piece: Document) -> tuple[Any, ...]:
|
|
45
|
+
document_url = piece.metadata.get(self.DOCUMENT_URL_KEY)
|
|
46
|
+
page = piece.metadata.get("page", self.DEFAULT_PAGE_NR)
|
|
47
|
+
|
|
48
|
+
# For paged documents (PDF/docling/etc.) keep per-page summaries even if a shared document URL exists.
|
|
49
|
+
if isinstance(page, int) or (isinstance(page, str) and page != "Unknown Title"):
|
|
50
|
+
return ("page_number", document_url, page)
|
|
51
|
+
|
|
52
|
+
# For sources like sitemaps/confluence, `page` can be a non-unique title (or missing),
|
|
53
|
+
# so group by the page URL when available to ensure one summary per page.
|
|
54
|
+
if document_url:
|
|
55
|
+
return ("document_url", document_url)
|
|
56
|
+
|
|
57
|
+
return ("page", page)
|
|
58
|
+
|
|
30
59
|
async def _asummarize_page(self, page_pieces: list[Document], config: Optional[RunnableConfig]) -> Document:
|
|
31
60
|
full_page_content = " ".join([piece.page_content for piece in page_pieces])
|
|
32
61
|
summary = await self._summarizer.ainvoke(full_page_content, config)
|
|
@@ -39,24 +68,46 @@ class PageSummaryEnhancer(SummaryEnhancer):
|
|
|
39
68
|
return Document(metadata=meta, page_content=summary)
|
|
40
69
|
|
|
41
70
|
async def _acreate_summary(self, information: list[Document], config: Optional[RunnableConfig]) -> list[Document]:
|
|
42
|
-
|
|
71
|
+
grouped = self._group_information(information)
|
|
72
|
+
max_concurrency = self._parse_max_concurrency(config)
|
|
73
|
+
return await self._summarize_groups(grouped, config, max_concurrency=max_concurrency)
|
|
74
|
+
|
|
75
|
+
def _group_information(self, information: list[Document]) -> list[list[Document]]:
|
|
76
|
+
ordered_keys: list[tuple[Any, ...]] = []
|
|
77
|
+
groups: dict[tuple[Any, ...], list[Document]] = {}
|
|
43
78
|
for info in information:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
79
|
+
key = self._group_key(info)
|
|
80
|
+
if key not in groups:
|
|
81
|
+
ordered_keys.append(key)
|
|
82
|
+
groups[key] = []
|
|
83
|
+
groups[key].append(info)
|
|
84
|
+
return [groups[key] for key in ordered_keys]
|
|
85
|
+
|
|
86
|
+
async def _summarize_groups(
|
|
87
|
+
self,
|
|
88
|
+
grouped: list[list[Document]],
|
|
89
|
+
config: Optional[RunnableConfig],
|
|
90
|
+
*,
|
|
91
|
+
max_concurrency: int,
|
|
92
|
+
) -> list[Document]:
|
|
93
|
+
if max_concurrency == 1:
|
|
94
|
+
summaries: list[Document] = []
|
|
95
|
+
for info_group in tqdm(grouped):
|
|
96
|
+
summaries.append(await self._asummarize_page(info_group, config))
|
|
97
|
+
return summaries
|
|
98
|
+
|
|
99
|
+
semaphore = asyncio.Semaphore(max_concurrency)
|
|
100
|
+
results: list[Document | None] = [None] * len(grouped)
|
|
101
|
+
|
|
102
|
+
async def _run(idx: int, info_group: list[Document]) -> tuple[int, Document]:
|
|
103
|
+
async with semaphore:
|
|
104
|
+
return idx, await self._asummarize_page(info_group, config)
|
|
105
|
+
|
|
106
|
+
tasks = [asyncio.create_task(_run(idx, info_group)) for idx, info_group in enumerate(grouped)]
|
|
107
|
+
with tqdm(total=len(tasks)) as pbar:
|
|
108
|
+
for task in asyncio.as_completed(tasks):
|
|
109
|
+
idx, summary = await task
|
|
110
|
+
results[idx] = summary
|
|
111
|
+
pbar.update(1)
|
|
112
|
+
|
|
113
|
+
return [summary for summary in results if summary is not None]
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Module containing the FileStatusKeyValueStore class."""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
import ssl
|
|
5
|
+
from typing import Any
|
|
4
6
|
|
|
5
7
|
from redis import Redis
|
|
6
8
|
|
|
@@ -37,9 +39,53 @@ class FileStatusKeyValueStore:
|
|
|
37
39
|
Parameters
|
|
38
40
|
----------
|
|
39
41
|
settings : KeyValueSettings
|
|
40
|
-
The settings object containing the
|
|
42
|
+
The settings object containing the connection information for the Redis connection.
|
|
41
43
|
"""
|
|
42
|
-
|
|
44
|
+
redis_kwargs: dict[str, Any] = {
|
|
45
|
+
"host": settings.host,
|
|
46
|
+
"port": settings.port,
|
|
47
|
+
"decode_responses": True,
|
|
48
|
+
**self._build_ssl_kwargs(settings),
|
|
49
|
+
}
|
|
50
|
+
if settings.username:
|
|
51
|
+
redis_kwargs["username"] = settings.username
|
|
52
|
+
if settings.password:
|
|
53
|
+
redis_kwargs["password"] = settings.password
|
|
54
|
+
|
|
55
|
+
self._redis = Redis(**redis_kwargs)
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def _build_ssl_kwargs(settings: KeyValueSettings) -> dict[str, Any]:
|
|
59
|
+
"""Build Redis SSL settings from configuration, mapping string values to ssl constants."""
|
|
60
|
+
if not settings.use_ssl:
|
|
61
|
+
return {}
|
|
62
|
+
|
|
63
|
+
cert_reqs_map = {
|
|
64
|
+
"required": ssl.CERT_REQUIRED,
|
|
65
|
+
"optional": ssl.CERT_OPTIONAL,
|
|
66
|
+
"none": ssl.CERT_NONE,
|
|
67
|
+
"cert_required": ssl.CERT_REQUIRED,
|
|
68
|
+
"cert_optional": ssl.CERT_OPTIONAL,
|
|
69
|
+
"cert_none": ssl.CERT_NONE,
|
|
70
|
+
}
|
|
71
|
+
ssl_cert_reqs = None
|
|
72
|
+
if settings.ssl_cert_reqs:
|
|
73
|
+
ssl_cert_reqs = cert_reqs_map.get(settings.ssl_cert_reqs.lower(), settings.ssl_cert_reqs)
|
|
74
|
+
|
|
75
|
+
ssl_kwargs: dict[str, Any] = {
|
|
76
|
+
"ssl": settings.use_ssl,
|
|
77
|
+
"ssl_check_hostname": settings.ssl_check_hostname,
|
|
78
|
+
}
|
|
79
|
+
if ssl_cert_reqs is not None:
|
|
80
|
+
ssl_kwargs["ssl_cert_reqs"] = ssl_cert_reqs
|
|
81
|
+
if settings.ssl_ca_certs:
|
|
82
|
+
ssl_kwargs["ssl_ca_certs"] = settings.ssl_ca_certs
|
|
83
|
+
if settings.ssl_certfile:
|
|
84
|
+
ssl_kwargs["ssl_certfile"] = settings.ssl_certfile
|
|
85
|
+
if settings.ssl_keyfile:
|
|
86
|
+
ssl_kwargs["ssl_keyfile"] = settings.ssl_keyfile
|
|
87
|
+
|
|
88
|
+
return ssl_kwargs
|
|
43
89
|
|
|
44
90
|
@staticmethod
|
|
45
91
|
def _to_str(file_name: str, file_status: Status) -> str:
|
|
@@ -14,6 +14,22 @@ class KeyValueSettings(BaseSettings):
|
|
|
14
14
|
The hostname of the key value store.
|
|
15
15
|
port : int
|
|
16
16
|
The port number of the key value store.
|
|
17
|
+
username : str | None
|
|
18
|
+
Optional username for authenticating with the key value store.
|
|
19
|
+
password : str | None
|
|
20
|
+
Optional password for authenticating with the key value store.
|
|
21
|
+
use_ssl : bool
|
|
22
|
+
Whether to use SSL/TLS when connecting to the key value store.
|
|
23
|
+
ssl_cert_reqs : str | None
|
|
24
|
+
SSL certificate requirement level (e.g., 'required', 'optional', 'none').
|
|
25
|
+
ssl_ca_certs : str | None
|
|
26
|
+
Path to a CA bundle file for verifying the server certificate.
|
|
27
|
+
ssl_certfile : str | None
|
|
28
|
+
Path to the client SSL certificate file (if mutual TLS is required).
|
|
29
|
+
ssl_keyfile : str | None
|
|
30
|
+
Path to the client SSL private key file (if mutual TLS is required).
|
|
31
|
+
ssl_check_hostname : bool
|
|
32
|
+
Whether to verify the server hostname against the certificate.
|
|
17
33
|
"""
|
|
18
34
|
|
|
19
35
|
class Config:
|
|
@@ -24,3 +40,11 @@ class KeyValueSettings(BaseSettings):
|
|
|
24
40
|
|
|
25
41
|
host: str = Field()
|
|
26
42
|
port: int = Field()
|
|
43
|
+
username: str | None = Field(default=None)
|
|
44
|
+
password: str | None = Field(default=None)
|
|
45
|
+
use_ssl: bool = Field(default=False)
|
|
46
|
+
ssl_cert_reqs: str | None = Field(default=None)
|
|
47
|
+
ssl_ca_certs: str | None = Field(default=None)
|
|
48
|
+
ssl_certfile: str | None = Field(default=None)
|
|
49
|
+
ssl_keyfile: str | None = Field(default=None)
|
|
50
|
+
ssl_check_hostname: bool = Field(default=True)
|
|
@@ -1,31 +1,5 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Re-export core S3 settings."""
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from rag_core_lib.impl.settings.s3_settings import S3Settings
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
class S3Settings(BaseSettings):
|
|
7
|
-
"""
|
|
8
|
-
Contains settings regarding the S3 storage.
|
|
9
|
-
|
|
10
|
-
Attributes
|
|
11
|
-
----------
|
|
12
|
-
secret_access_key : str
|
|
13
|
-
The secret access key for S3.
|
|
14
|
-
access_key_id : str
|
|
15
|
-
The access key ID for S3.
|
|
16
|
-
endpoint : str
|
|
17
|
-
The endpoint URL for S3.
|
|
18
|
-
bucket : str
|
|
19
|
-
The bucket name in S3.
|
|
20
|
-
"""
|
|
21
|
-
|
|
22
|
-
class Config:
|
|
23
|
-
"""Config class for reading Fields from env."""
|
|
24
|
-
|
|
25
|
-
env_prefix = "S3_"
|
|
26
|
-
case_sensitive = False
|
|
27
|
-
|
|
28
|
-
secret_access_key: str
|
|
29
|
-
access_key_id: str
|
|
30
|
-
endpoint: str
|
|
31
|
-
bucket: str
|
|
5
|
+
__all__ = ["S3Settings"]
|
|
@@ -4,9 +4,9 @@ import asyncio
|
|
|
4
4
|
import logging
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
|
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
8
7
|
from langchain_core.documents import Document
|
|
9
8
|
from langchain_core.runnables import Runnable, RunnableConfig, ensure_config
|
|
9
|
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
10
10
|
from openai import APIConnectionError, APIError, APITimeoutError, RateLimitError
|
|
11
11
|
|
|
12
12
|
from admin_api_lib.impl.settings.summarizer_settings import SummarizerSettings
|
|
@@ -44,6 +44,24 @@ class LangchainSummarizer(Summarizer):
|
|
|
44
44
|
self._semaphore = semaphore
|
|
45
45
|
self._retry_decorator_settings = create_retry_decorator_settings(summarizer_settings, retry_decorator_settings)
|
|
46
46
|
|
|
47
|
+
@staticmethod
|
|
48
|
+
def _parse_max_concurrency(config: RunnableConfig) -> Optional[int]:
|
|
49
|
+
"""Parse max concurrency from a RunnableConfig.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
Optional[int]
|
|
54
|
+
An integer >= 1 if configured and valid, otherwise None.
|
|
55
|
+
"""
|
|
56
|
+
max_concurrency = config.get("max_concurrency")
|
|
57
|
+
if max_concurrency is None:
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
try:
|
|
61
|
+
return max(1, int(max_concurrency))
|
|
62
|
+
except (TypeError, ValueError):
|
|
63
|
+
return None
|
|
64
|
+
|
|
47
65
|
async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig] = None) -> SummarizerOutput:
|
|
48
66
|
"""
|
|
49
67
|
Asynchronously invokes the summarization process on the given query.
|
|
@@ -77,9 +95,8 @@ class LangchainSummarizer(Summarizer):
|
|
|
77
95
|
langchain_documents = self._chunker.split_documents([document])
|
|
78
96
|
logger.debug("Summarizing %d chunk(s)...", len(langchain_documents))
|
|
79
97
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
outputs = await asyncio.gather(*tasks)
|
|
98
|
+
max_concurrency = self._parse_max_concurrency(config)
|
|
99
|
+
outputs = await self._summarize_documents(langchain_documents, config, max_concurrency=max_concurrency)
|
|
83
100
|
|
|
84
101
|
if len(outputs) == 1:
|
|
85
102
|
return outputs[0]
|
|
@@ -93,6 +110,34 @@ class LangchainSummarizer(Summarizer):
|
|
|
93
110
|
)
|
|
94
111
|
return await self._summarize_chunk(merged, config)
|
|
95
112
|
|
|
113
|
+
async def _summarize_documents(
|
|
114
|
+
self,
|
|
115
|
+
documents: list[Document],
|
|
116
|
+
config: RunnableConfig,
|
|
117
|
+
*,
|
|
118
|
+
max_concurrency: Optional[int],
|
|
119
|
+
) -> list[SummarizerOutput]:
|
|
120
|
+
"""Summarize a set of already-chunked documents.
|
|
121
|
+
|
|
122
|
+
Notes
|
|
123
|
+
-----
|
|
124
|
+
This optionally limits task fan-out using a per-call semaphore (max_concurrency).
|
|
125
|
+
The actual LLM call concurrency is always bounded by the instance semaphore held
|
|
126
|
+
inside `_summarize_chunk`.
|
|
127
|
+
"""
|
|
128
|
+
if max_concurrency == 1:
|
|
129
|
+
return [await self._summarize_chunk(doc.page_content, config) for doc in documents]
|
|
130
|
+
|
|
131
|
+
limiter: asyncio.Semaphore | None = asyncio.Semaphore(max_concurrency) if max_concurrency is not None else None
|
|
132
|
+
|
|
133
|
+
async def _run(doc: Document) -> SummarizerOutput:
|
|
134
|
+
if limiter is None:
|
|
135
|
+
return await self._summarize_chunk(doc.page_content, config)
|
|
136
|
+
async with limiter:
|
|
137
|
+
return await self._summarize_chunk(doc.page_content, config)
|
|
138
|
+
|
|
139
|
+
return await asyncio.gather(*(_run(doc) for doc in documents))
|
|
140
|
+
|
|
96
141
|
def _create_chain(self) -> Runnable:
|
|
97
142
|
return self._langfuse_manager.get_base_prompt(self.__class__.__name__) | self._langfuse_manager.get_base_llm(
|
|
98
143
|
self.__class__.__name__
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: admin-api-lib
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.1.0
|
|
4
4
|
Summary: The admin backend is responsible for the document management. This includes deletion, upload and returning the source document.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: STACKIT GmbH & Co. KG
|
|
@@ -11,19 +11,19 @@ Requires-Python: >=3.13,<4.0
|
|
|
11
11
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.14
|
|
15
14
|
Requires-Dist: boto3 (>=1.38.10,<2.0.0)
|
|
16
15
|
Requires-Dist: dependency-injector (>=4.46.0,<5.0.0)
|
|
17
|
-
Requires-Dist: fastapi (>=0.
|
|
18
|
-
Requires-Dist: langchain-experimental (>=0.
|
|
19
|
-
Requires-Dist: langfuse (
|
|
16
|
+
Requires-Dist: fastapi (>=0.121.2,<0.122.0)
|
|
17
|
+
Requires-Dist: langchain-experimental (>=0.4.0,<0.5.0)
|
|
18
|
+
Requires-Dist: langfuse (>=3.10.1,<4.0.0)
|
|
19
|
+
Requires-Dist: langgraph-checkpoint (>=3.0.0,<4.0.0)
|
|
20
20
|
Requires-Dist: nltk (>=3.9.2,<4.0.0)
|
|
21
21
|
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
|
22
22
|
Requires-Dist: python-multipart (>=0.0.20,<0.0.21)
|
|
23
23
|
Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
|
|
24
|
-
Requires-Dist: rag-core-lib (==
|
|
24
|
+
Requires-Dist: rag-core-lib (==4.0.0)
|
|
25
25
|
Requires-Dist: redis (>=6.0.0,<7.0.0)
|
|
26
|
-
Requires-Dist: starlette (>=0.
|
|
26
|
+
Requires-Dist: starlette (>=0.49.1)
|
|
27
27
|
Requires-Dist: tenacity (==9.1.2)
|
|
28
28
|
Requires-Dist: tqdm (>=4.67.1,<5.0.0)
|
|
29
29
|
Requires-Dist: uvicorn (>=0.37.0,<0.38.0)
|
|
@@ -100,6 +100,8 @@ All settings are powered by `pydantic-settings`, so you can use environment vari
|
|
|
100
100
|
- `SUMMARIZER_MAXIMUM_INPUT_SIZE`, `SUMMARIZER_MAXIMUM_CONCURRENCY`, `SUMMARIZER_MAX_RETRIES`, etc. – tune summariser limits and retry behaviour.
|
|
101
101
|
- `SOURCE_UPLOADER_TIMEOUT` – adjust how long non-file source ingestions wait before timing out.
|
|
102
102
|
- `USECASE_KEYVALUE_HOST` / `USECASE_KEYVALUE_PORT` – configure the KeyDB/Redis instance that persists document status.
|
|
103
|
+
- `USECASE_KEYVALUE_USERNAME` / `USECASE_KEYVALUE_PASSWORD` – optional credentials for authenticating against KeyDB/Redis.
|
|
104
|
+
- `USECASE_KEYVALUE_USE_SSL`, `USECASE_KEYVALUE_SSL_CERT_REQS`, `USECASE_KEYVALUE_SSL_CA_CERTS`, `USECASE_KEYVALUE_SSL_CERTFILE`, `USECASE_KEYVALUE_SSL_KEYFILE`, `USECASE_KEYVALUE_SSL_CHECK_HOSTNAME` – optional TLS settings for managed Redis deployments (e.g., STACKIT Redis or other SSL-only endpoints).
|
|
103
105
|
|
|
104
106
|
The Helm chart forwards these values through `adminBackend.envs.*`, keeping deployments declarative. Local development can rely on `.env` as described in the repository root README.
|
|
105
107
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
admin_api_lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
admin_api_lib/api_endpoints/document_deleter.py,sha256=
|
|
2
|
+
admin_api_lib/api_endpoints/document_deleter.py,sha256=C6PA2OHyLPExBpuMCUU-YYz57aFh_DMkYEjg27lDoV4,941
|
|
3
3
|
admin_api_lib/api_endpoints/document_reference_retriever.py,sha256=eisisp-ZMPn3P1yNkxiqQroz3Rz4Zz8pCrj8JQ9rrro,658
|
|
4
4
|
admin_api_lib/api_endpoints/documents_status_retriever.py,sha256=PxrW4X6mN2z_XJHzTqeolCnRusiiBC4OE8TdI4lUEMg,572
|
|
5
5
|
admin_api_lib/api_endpoints/file_uploader.py,sha256=r7m9G06aSC3mBdVuXCBKTfR5bTmYEjGJSTavhKRuSJk,725
|
|
@@ -10,7 +10,7 @@ admin_api_lib/apis/admin_api.py,sha256=RaeVO7A-IW_1kntw19RVUbKAI_BDIQExmgzAAZ5T3
|
|
|
10
10
|
admin_api_lib/apis/admin_api_base.py,sha256=20M8U8dM91pJa2Wqx_UZTjpU0XCHIffVRac-_KJRMmk,3094
|
|
11
11
|
admin_api_lib/chunker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
admin_api_lib/chunker/chunker.py,sha256=R2mxwmvz8o3iNzGHaLoMERcsIh82x88ZkKndbRNU-7U,627
|
|
13
|
-
admin_api_lib/dependency_container.py,sha256=
|
|
13
|
+
admin_api_lib/dependency_container.py,sha256=E4iupfqq3B60edfOTi-YqkvbSD4ARXmJGLAHXFuZspg,10127
|
|
14
14
|
admin_api_lib/extractor_api_client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
admin_api_lib/extractor_api_client/openapi_client/__init__.py,sha256=ha9QCyCZq3PJKz7pQpTtI0bBypqj6E3Bg1pi7vBIazo,1823
|
|
16
16
|
admin_api_lib/extractor_api_client/openapi_client/api/__init__.py,sha256=6THi7hHMLnN46_cLbVVamrTv-Ab_Wsn4b5YBkhrk65U,140
|
|
@@ -33,38 +33,38 @@ admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_request.p
|
|
|
33
33
|
admin_api_lib/extractor_api_client/openapi_client/test/test_extractor_api.py,sha256=kaTxIEPyQECdkAoT-kzVyYx0jX-P-5U6v73Ndhyg40w,887
|
|
34
34
|
admin_api_lib/extractor_api_client/openapi_client/test/test_information_piece.py,sha256=Aq4h5SB-GY8BbeQR9sFMN1B3Z5UMUO6pRx6XU0b7xqs,1730
|
|
35
35
|
admin_api_lib/extractor_api_client/openapi_client/test/test_key_value_pair.py,sha256=Zd9_HyM-4U2iVtGYO45UI6xkJKVNcj369hFe5zkBhDM,1403
|
|
36
|
-
admin_api_lib/file_services/file_service.py,sha256=
|
|
36
|
+
admin_api_lib/file_services/file_service.py,sha256=8sJLyWCPRw88ZAY9-4YPcMqYRR6Bgw_pv5A8Dy1Xf5k,137
|
|
37
37
|
admin_api_lib/impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
38
38
|
admin_api_lib/impl/admin_api.py,sha256=k5geH6NorzzKg3_Z0tltLgTeJ0ru5AyFKPQi4nWjdnE,5483
|
|
39
|
-
admin_api_lib/impl/api_endpoints/default_document_deleter.py,sha256=
|
|
39
|
+
admin_api_lib/impl/api_endpoints/default_document_deleter.py,sha256=9P_UYbnn0wqBOy18jFn44zaY5nykTXfaQ_Zt39bfmF0,4727
|
|
40
40
|
admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py,sha256=H3bQvpMLMjsyUzZMfTziPW7qU3N9D5s6DMKEA4fMITM,2642
|
|
41
41
|
admin_api_lib/impl/api_endpoints/default_documents_status_retriever.py,sha256=ZtLNgmFWGcfU4jNhVPiAKIJT701Z4wVwQAWpPbegxfc,1419
|
|
42
|
-
admin_api_lib/impl/api_endpoints/default_file_uploader.py,sha256=
|
|
43
|
-
admin_api_lib/impl/api_endpoints/default_source_uploader.py,sha256=
|
|
42
|
+
admin_api_lib/impl/api_endpoints/default_file_uploader.py,sha256=2tGd3ZQOJMZob_0UreopOliaefvIYq4CTp9soQevoCc,9719
|
|
43
|
+
admin_api_lib/impl/api_endpoints/default_source_uploader.py,sha256=UrKmbyu2l-Jyhbkaiqt66Ks-RMwD_i2vEkOdhgTbwZU,8814
|
|
44
44
|
admin_api_lib/impl/chunker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
45
|
admin_api_lib/impl/chunker/chunker_type.py,sha256=ArEAmQ9OWe3ek7FMb2auFvGk4UXsOr3xzjyB5TXI2i8,247
|
|
46
46
|
admin_api_lib/impl/chunker/semantic_text_chunker.py,sha256=VrbHpY867gQPtMhz6HjqstdBQ-e74kwce_Ma-9LVsJo,10605
|
|
47
|
-
admin_api_lib/impl/chunker/text_chunker.py,sha256=
|
|
47
|
+
admin_api_lib/impl/chunker/text_chunker.py,sha256=0WC1EsvkqHI16IeopTvrx0Y2XQmXscUFCT8-z6h5nOY,1076
|
|
48
48
|
admin_api_lib/impl/file_services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
admin_api_lib/impl/file_services/s3_service.py,sha256=
|
|
49
|
+
admin_api_lib/impl/file_services/s3_service.py,sha256=8NuYwKd3zNdpGWM6UC1R3yUmbG6XOyZrhYbguznzgRY,139
|
|
50
50
|
admin_api_lib/impl/information_enhancer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
51
|
admin_api_lib/impl/information_enhancer/general_enhancer.py,sha256=H4mVxYEGW2owdCcUwbh7z132PTxa3dZssRL0MDJg56Q,1889
|
|
52
|
-
admin_api_lib/impl/information_enhancer/page_summary_enhancer.py,sha256=
|
|
52
|
+
admin_api_lib/impl/information_enhancer/page_summary_enhancer.py,sha256=Ve0KjQMdxESLi2K3-fgZcHf2xMWNeJkfxvcbJCajWJo,4652
|
|
53
53
|
admin_api_lib/impl/information_enhancer/summary_enhancer.py,sha256=CdsE4KGTfui6nBBABwBcEQJ-1SBrPwN7Pk77zPUxGBk,2704
|
|
54
54
|
admin_api_lib/impl/key_db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
admin_api_lib/impl/key_db/file_status_key_value_store.py,sha256=
|
|
55
|
+
admin_api_lib/impl/key_db/file_status_key_value_store.py,sha256=FSmenxyJx7hMWY9rNocAF7zf4zU_iowR3T-JjGyhjNU,5328
|
|
56
56
|
admin_api_lib/impl/mapper/informationpiece2document.py,sha256=dMngfdjBV2JkUtamypwmBvJVWdWcZhXn_XnBmGQLCxg,3924
|
|
57
57
|
admin_api_lib/impl/settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
58
|
admin_api_lib/impl/settings/chunker_class_type_settings.py,sha256=w7cgOB2hBattRqIbrQLMBXsPX2UZpmzxnI6n5DrcO2k,541
|
|
59
59
|
admin_api_lib/impl/settings/chunker_settings.py,sha256=TLBPY-PLmCuly8mMgGIlm3gU9FzzyhCgmLdTcfV3vAg,814
|
|
60
60
|
admin_api_lib/impl/settings/document_extractor_settings.py,sha256=gYbdk6M4dwTL9esWfemGGHEJYOdV9BT7c8zGBKLbjlU,520
|
|
61
|
-
admin_api_lib/impl/settings/key_value_settings.py,sha256=
|
|
61
|
+
admin_api_lib/impl/settings/key_value_settings.py,sha256=jokWtwXLwN4_yPaWwmVxJOSKwyR43hzQtQhKqAnIBrw,1789
|
|
62
62
|
admin_api_lib/impl/settings/rag_api_settings.py,sha256=YMxnsiMLjZBQZ2a6C3kZuVgqd42_w4JUC9dTkHnwuaU,484
|
|
63
|
-
admin_api_lib/impl/settings/s3_settings.py,sha256=
|
|
63
|
+
admin_api_lib/impl/settings/s3_settings.py,sha256=K5-zrhueyOKD_CpHQ3U2q16FZ1BZLF5oFnCnUnNoswA,123
|
|
64
64
|
admin_api_lib/impl/settings/source_uploader_settings.py,sha256=ZCVFI3TgSZpVjBVTcJw9QSyTBLvqzYsJ41vpihtI7pY,582
|
|
65
65
|
admin_api_lib/impl/settings/summarizer_settings.py,sha256=NGZ0o25dp7GkfOSAzcoCDSQrpUqiGe28Oi05ho2MAZ8,2895
|
|
66
66
|
admin_api_lib/impl/summarizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
-
admin_api_lib/impl/summarizer/langchain_summarizer.py,sha256=
|
|
67
|
+
admin_api_lib/impl/summarizer/langchain_summarizer.py,sha256=VIsUo4dI3Ebil5lnX4uY6Rdqs8CKlxk9Vvmb9Lx1K5Y,6264
|
|
68
68
|
admin_api_lib/information_enhancer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
69
|
admin_api_lib/information_enhancer/information_enhancer.py,sha256=Bp2fQ3lvOmj192QmSH5NItseyDhWczzCHoeABVx4D3w,1155
|
|
70
70
|
admin_api_lib/main.py,sha256=33fiCU05aKAxz9zaO0YtM2dynLxvJxMJn_4Cf6zk4q8,1582
|
|
@@ -101,6 +101,6 @@ admin_api_lib/summarizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
|
101
101
|
admin_api_lib/summarizer/summarizer.py,sha256=D0rkW0iZSys-68LcO1-PIkE0Faf2Grg-_9wu75Rc1OY,966
|
|
102
102
|
admin_api_lib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
103
103
|
admin_api_lib/utils/utils.py,sha256=eaNQ_NzUEp4hwhCU9EEsUXvbRH_ekVariF7tTsO9Sco,834
|
|
104
|
-
admin_api_lib-
|
|
105
|
-
admin_api_lib-
|
|
106
|
-
admin_api_lib-
|
|
104
|
+
admin_api_lib-4.1.0.dist-info/METADATA,sha256=T6a5lgyq6pnbAPxSO4l2r5RHuebfioA62HsNd-x0Br0,7900
|
|
105
|
+
admin_api_lib-4.1.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
106
|
+
admin_api_lib-4.1.0.dist-info/RECORD,,
|