admin-api-lib 3.4.0__tar.gz → 4.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/PKG-INFO +10 -8
  2. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/README.md +2 -0
  3. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/pyproject.toml +19 -14
  4. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/api_endpoints/document_deleter.py +8 -1
  5. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/dependency_container.py +1 -1
  6. admin_api_lib-4.1.0/src/admin_api_lib/file_services/file_service.py +5 -0
  7. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/api_endpoints/default_document_deleter.py +34 -7
  8. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/api_endpoints/default_file_uploader.py +5 -1
  9. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/api_endpoints/default_source_uploader.py +10 -2
  10. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/chunker/text_chunker.py +1 -1
  11. admin_api_lib-4.1.0/src/admin_api_lib/impl/file_services/s3_service.py +5 -0
  12. admin_api_lib-4.1.0/src/admin_api_lib/impl/information_enhancer/page_summary_enhancer.py +113 -0
  13. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/key_db/file_status_key_value_store.py +48 -2
  14. admin_api_lib-4.1.0/src/admin_api_lib/impl/settings/key_value_settings.py +50 -0
  15. admin_api_lib-4.1.0/src/admin_api_lib/impl/settings/s3_settings.py +5 -0
  16. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/summarizer/langchain_summarizer.py +49 -4
  17. admin_api_lib-3.4.0/src/admin_api_lib/file_services/file_service.py +0 -77
  18. admin_api_lib-3.4.0/src/admin_api_lib/impl/file_services/s3_service.py +0 -130
  19. admin_api_lib-3.4.0/src/admin_api_lib/impl/information_enhancer/page_summary_enhancer.py +0 -62
  20. admin_api_lib-3.4.0/src/admin_api_lib/impl/settings/key_value_settings.py +0 -26
  21. admin_api_lib-3.4.0/src/admin_api_lib/impl/settings/s3_settings.py +0 -31
  22. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/__init__.py +0 -0
  23. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/api_endpoints/document_reference_retriever.py +0 -0
  24. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/api_endpoints/documents_status_retriever.py +0 -0
  25. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/api_endpoints/file_uploader.py +0 -0
  26. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/api_endpoints/source_uploader.py +0 -0
  27. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/api_endpoints/uploader_base.py +0 -0
  28. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/apis/__init__.py +0 -0
  29. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/apis/admin_api.py +0 -0
  30. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/apis/admin_api_base.py +0 -0
  31. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/chunker/__init__.py +0 -0
  32. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/chunker/chunker.py +0 -0
  33. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/__init__.py +0 -0
  34. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/__init__.py +0 -0
  35. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/api/__init__.py +0 -0
  36. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/api/extractor_api.py +0 -0
  37. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/api_client.py +0 -0
  38. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/api_response.py +0 -0
  39. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/configuration.py +0 -0
  40. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/exceptions.py +0 -0
  41. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/models/__init__.py +0 -0
  42. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/models/content_type.py +0 -0
  43. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/models/extraction_parameters.py +0 -0
  44. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/models/extraction_request.py +0 -0
  45. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/models/information_piece.py +0 -0
  46. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/models/key_value_pair.py +0 -0
  47. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/rest.py +0 -0
  48. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/test/__init__.py +0 -0
  49. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/test/test_content_type.py +0 -0
  50. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_parameters.py +0 -0
  51. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_request.py +0 -0
  52. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/test/test_extractor_api.py +0 -0
  53. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/test/test_information_piece.py +0 -0
  54. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/extractor_api_client/openapi_client/test/test_key_value_pair.py +0 -0
  55. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/__init__.py +0 -0
  56. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/admin_api.py +0 -0
  57. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py +0 -0
  58. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/api_endpoints/default_documents_status_retriever.py +0 -0
  59. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/chunker/__init__.py +0 -0
  60. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/chunker/chunker_type.py +0 -0
  61. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/chunker/semantic_text_chunker.py +0 -0
  62. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/file_services/__init__.py +0 -0
  63. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/information_enhancer/__init__.py +0 -0
  64. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/information_enhancer/general_enhancer.py +0 -0
  65. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/information_enhancer/summary_enhancer.py +0 -0
  66. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/key_db/__init__.py +0 -0
  67. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/mapper/informationpiece2document.py +0 -0
  68. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/settings/__init__.py +0 -0
  69. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/settings/chunker_class_type_settings.py +0 -0
  70. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/settings/chunker_settings.py +0 -0
  71. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/settings/document_extractor_settings.py +0 -0
  72. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/settings/rag_api_settings.py +0 -0
  73. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/settings/source_uploader_settings.py +0 -0
  74. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/settings/summarizer_settings.py +0 -0
  75. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/impl/summarizer/__init__.py +0 -0
  76. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/information_enhancer/__init__.py +0 -0
  77. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/information_enhancer/information_enhancer.py +0 -0
  78. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/main.py +0 -0
  79. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/models/__init__.py +0 -0
  80. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/models/document_status.py +0 -0
  81. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/models/extra_models.py +0 -0
  82. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/models/http_validation_error.py +0 -0
  83. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/models/key_value_pair.py +0 -0
  84. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/models/status.py +0 -0
  85. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/models/validation_error.py +0 -0
  86. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/models/validation_error_loc_inner.py +0 -0
  87. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/prompt_templates/__init__.py +0 -0
  88. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/prompt_templates/summarize_prompt.py +0 -0
  89. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/__init__.py +0 -0
  90. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/__init__.py +0 -0
  91. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/api/__init__.py +0 -0
  92. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/api/rag_api.py +0 -0
  93. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/api_client.py +0 -0
  94. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/api_response.py +0 -0
  95. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/configuration.py +0 -0
  96. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/exceptions.py +0 -0
  97. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/__init__.py +0 -0
  98. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_history.py +0 -0
  99. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_history_message.py +0 -0
  100. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_request.py +0 -0
  101. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_response.py +0 -0
  102. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/chat_role.py +0 -0
  103. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/content_type.py +0 -0
  104. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/delete_request.py +0 -0
  105. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/information_piece.py +0 -0
  106. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/models/key_value_pair.py +0 -0
  107. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/rag_backend_client/openapi_client/rest.py +0 -0
  108. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/summarizer/__init__.py +0 -0
  109. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/summarizer/summarizer.py +0 -0
  110. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/utils/__init__.py +0 -0
  111. {admin_api_lib-3.4.0 → admin_api_lib-4.1.0}/src/admin_api_lib/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.3
2
2
  Name: admin-api-lib
3
- Version: 3.4.0
3
+ Version: 4.1.0
4
4
  Summary: The admin backend is responsible for the document management. This includes deletion, upload and returning the source document.
5
5
  License: Apache-2.0
6
6
  Author: STACKIT GmbH & Co. KG
@@ -11,19 +11,19 @@ Requires-Python: >=3.13,<4.0
11
11
  Classifier: License :: OSI Approved :: Apache Software License
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3.13
14
- Classifier: Programming Language :: Python :: 3.14
15
14
  Requires-Dist: boto3 (>=1.38.10,<2.0.0)
16
15
  Requires-Dist: dependency-injector (>=4.46.0,<5.0.0)
17
- Requires-Dist: fastapi (>=0.118.0,<0.119.0)
18
- Requires-Dist: langchain-experimental (>=0.3.4,<0.4.0)
19
- Requires-Dist: langfuse (==3.6.1)
16
+ Requires-Dist: fastapi (>=0.121.2,<0.122.0)
17
+ Requires-Dist: langchain-experimental (>=0.4.0,<0.5.0)
18
+ Requires-Dist: langfuse (>=3.10.1,<4.0.0)
19
+ Requires-Dist: langgraph-checkpoint (>=3.0.0,<4.0.0)
20
20
  Requires-Dist: nltk (>=3.9.2,<4.0.0)
21
21
  Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
22
22
  Requires-Dist: python-multipart (>=0.0.20,<0.0.21)
23
23
  Requires-Dist: pyyaml (>=6.0.2,<7.0.0)
24
- Requires-Dist: rag-core-lib (==3.4.0)
24
+ Requires-Dist: rag-core-lib (==4.0.0)
25
25
  Requires-Dist: redis (>=6.0.0,<7.0.0)
26
- Requires-Dist: starlette (>=0.47.2,<0.49.0)
26
+ Requires-Dist: starlette (>=0.49.1)
27
27
  Requires-Dist: tenacity (==9.1.2)
28
28
  Requires-Dist: tqdm (>=4.67.1,<5.0.0)
29
29
  Requires-Dist: uvicorn (>=0.37.0,<0.38.0)
@@ -100,6 +100,8 @@ All settings are powered by `pydantic-settings`, so you can use environment vari
100
100
  - `SUMMARIZER_MAXIMUM_INPUT_SIZE`, `SUMMARIZER_MAXIMUM_CONCURRENCY`, `SUMMARIZER_MAX_RETRIES`, etc. – tune summariser limits and retry behaviour.
101
101
  - `SOURCE_UPLOADER_TIMEOUT` – adjust how long non-file source ingestions wait before timing out.
102
102
  - `USECASE_KEYVALUE_HOST` / `USECASE_KEYVALUE_PORT` – configure the KeyDB/Redis instance that persists document status.
103
+ - `USECASE_KEYVALUE_USERNAME` / `USECASE_KEYVALUE_PASSWORD` – optional credentials for authenticating against KeyDB/Redis.
104
+ - `USECASE_KEYVALUE_USE_SSL`, `USECASE_KEYVALUE_SSL_CERT_REQS`, `USECASE_KEYVALUE_SSL_CA_CERTS`, `USECASE_KEYVALUE_SSL_CERTFILE`, `USECASE_KEYVALUE_SSL_KEYFILE`, `USECASE_KEYVALUE_SSL_CHECK_HOSTNAME` – optional TLS settings for managed Redis deployments (e.g., STACKIT Redis or other SSL-only endpoints).
103
105
 
104
106
  The Helm chart forwards these values through `adminBackend.envs.*`, keeping deployments declarative. Local development can rely on `.env` as described in the repository root README.
105
107
 
@@ -67,6 +67,8 @@ All settings are powered by `pydantic-settings`, so you can use environment vari
67
67
  - `SUMMARIZER_MAXIMUM_INPUT_SIZE`, `SUMMARIZER_MAXIMUM_CONCURRENCY`, `SUMMARIZER_MAX_RETRIES`, etc. – tune summariser limits and retry behaviour.
68
68
  - `SOURCE_UPLOADER_TIMEOUT` – adjust how long non-file source ingestions wait before timing out.
69
69
  - `USECASE_KEYVALUE_HOST` / `USECASE_KEYVALUE_PORT` – configure the KeyDB/Redis instance that persists document status.
70
+ - `USECASE_KEYVALUE_USERNAME` / `USECASE_KEYVALUE_PASSWORD` – optional credentials for authenticating against KeyDB/Redis.
71
+ - `USECASE_KEYVALUE_USE_SSL`, `USECASE_KEYVALUE_SSL_CERT_REQS`, `USECASE_KEYVALUE_SSL_CA_CERTS`, `USECASE_KEYVALUE_SSL_CERTFILE`, `USECASE_KEYVALUE_SSL_KEYFILE`, `USECASE_KEYVALUE_SSL_CHECK_HOSTNAME` – optional TLS settings for managed Redis deployments (e.g., STACKIT Redis or other SSL-only endpoints).
70
72
 
71
73
  The Helm chart forwards these values through `adminBackend.envs.*`, keeping deployments declarative. Local development can rely on `.env` as described in the repository root README.
72
74
 
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "admin-api-lib"
7
- version = "v3.4.0"
7
+ version = "4.1.0"
8
8
  description = "The admin backend is responsible for the document management. This includes deletion, upload and returning the source document."
9
9
  authors = [
10
10
  "STACKIT GmbH & Co. KG <data-ai@stackit.cloud>",
@@ -18,6 +18,11 @@ license = "Apache-2.0"
18
18
  repository = "https://github.com/stackitcloud/rag-template"
19
19
  homepage = "https://pypi.org/project/admin-api-lib"
20
20
 
21
+ [[tool.poetry.source]]
22
+ name = "testpypi"
23
+ url = "https://test.pypi.org/simple/"
24
+ priority = "explicit"
25
+
21
26
  [tool.flake8]
22
27
  exclude= [".eggs", "./libs/*", "./src/admin_api_lib/models/*", "./src/admin_api_lib/rag_backend_client/*", "./src/admin_api_lib/extractor_api_client/*", ".git", ".hg", ".mypy_cache", ".tox", ".venv", ".devcontainer", "venv", "_build", "buck-out", "build", "dist", "**/__init__.py"]
23
28
  statistics = true
@@ -74,10 +79,12 @@ known_local_folder = ["admin_api_lib", "rag_core_lib"]
74
79
  [tool.pylint]
75
80
  max-line-length = 120
76
81
 
77
- [tool.poetry.group.dev.dependencies]
78
- debugpy = "^1.8.14"
82
+ [tool.poetry.group.test.dependencies]
79
83
  pytest = "^8.3.5"
84
+ pytest-asyncio = "^1.0.0"
80
85
  coverage = "^7.8.0"
86
+
87
+ [tool.poetry.group.lint.dependencies]
81
88
  flake8 = "^7.2.0"
82
89
  flake8-black = "^0.4.0"
83
90
  flake8-pyproject = "^1.2.3"
@@ -101,33 +108,31 @@ flake8-tidy-imports = "^4.10.0"
101
108
  black = "^25.1.0"
102
109
  flake8-logging-format = "^2024.24.12"
103
110
  flake8-docstrings = "^1.7.0"
104
- pytest-asyncio = "^1.0.0"
111
+
112
+ [tool.poetry.group.dev.dependencies]
113
+ debugpy = "^1.8.14"
105
114
 
106
115
  [tool.poetry.dependencies]
107
- rag-core-lib = "3.4.0"
116
+ rag-core-lib = "==4.0.0"
108
117
  python = "^3.13"
109
118
  uvicorn = "^0.37.0"
110
- fastapi = "^0.118.0"
119
+ fastapi = "^0.121.2"
111
120
  dependency-injector = "^4.46.0"
112
121
  python-dateutil = "^2.9.0.post0"
113
122
  tenacity = "9.1.2"
114
123
  boto3 = "^1.38.10"
115
124
  tqdm = "^4.67.1"
116
- langfuse = "3.6.1"
125
+ langfuse = "^3.10.1"
117
126
  redis = "^6.0.0"
118
127
  pyyaml = "^6.0.2"
119
128
  python-multipart = "^0.0.20"
120
- starlette = ">=0.47.2,<0.49.0"
121
- langchain-experimental = "^0.3.4"
129
+ langchain-experimental = "^0.4.0"
122
130
  nltk = "^3.9.2"
131
+ starlette = ">=0.49.1"
132
+ langgraph-checkpoint = ">=3.0.0,<4.0.0"
123
133
 
124
134
  [tool.pytest.ini_options]
125
135
  log_cli = true
126
136
  log_cli_level = "DEBUG"
127
137
  pythonpath = "src"
128
138
  testpaths = "src/tests"
129
-
130
- [[tool.poetry.source]]
131
- name = "testpypi"
132
- url = "https://test.pypi.org/simple/"
133
- priority = "supplemental"
@@ -7,7 +7,12 @@ class DocumentDeleter(ABC):
7
7
  """Abstract base class for document deletion endpoint."""
8
8
 
9
9
  @abstractmethod
10
- async def adelete_document(self, identification: str, remove_from_key_value_store: bool = True) -> None:
10
+ async def adelete_document(
11
+ self,
12
+ identification: str,
13
+ remove_from_key_value_store: bool = True,
14
+ remove_from_storage: bool = True,
15
+ ) -> None:
11
16
  """
12
17
  Delete a document by its identification asynchronously.
13
18
 
@@ -17,6 +22,8 @@ class DocumentDeleter(ABC):
17
22
  The unique identifier of the document to be deleted.
18
23
  remove_from_key_value_store : bool, optional
19
24
  If True, the document will also be removed from the key-value store (default is True).
25
+ remove_from_storage : bool, optional
26
+ If True, the document will also be removed from the file storage (default is True).
20
27
 
21
28
  Returns
22
29
  -------
@@ -3,7 +3,7 @@
3
3
  from admin_api_lib.impl.api_endpoints.default_file_uploader import DefaultFileUploader
4
4
  from dependency_injector.containers import DeclarativeContainer
5
5
  from dependency_injector.providers import Configuration, List, Selector, Singleton
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
7
  from langchain_community.embeddings import OllamaEmbeddings
8
8
  from langfuse import Langfuse
9
9
 
@@ -0,0 +1,5 @@
1
+ """Re-export core file service interface."""
2
+
3
+ from rag_core_lib.file_services.file_service import FileService
4
+
5
+ __all__ = ["FileService"]
@@ -41,7 +41,21 @@ class DefaultDocumentDeleter(DocumentDeleter):
41
41
  self._rag_api = rag_api
42
42
  self._key_value_store = key_value_store
43
43
 
44
- async def adelete_document(self, identification: str, remove_from_key_value_store: bool = True) -> None:
44
+ @staticmethod
45
+ def _storage_key_from_identification(identification: str) -> str | None:
46
+ if identification.startswith("file:"):
47
+ storage_key = identification[len("file:") :]
48
+ return storage_key or None
49
+ if ":" in identification:
50
+ return None
51
+ return identification or None
52
+
53
+ async def adelete_document(
54
+ self,
55
+ identification: str,
56
+ remove_from_key_value_store: bool = True,
57
+ remove_from_storage: bool = True,
58
+ ) -> None:
45
59
  """
46
60
  Asynchronously delete a document identified by the given identification string.
47
61
 
@@ -57,6 +71,8 @@ class DefaultDocumentDeleter(DocumentDeleter):
57
71
  The unique identifier of the document to be deleted.
58
72
  remove_from_key_value_store : bool, optional
59
73
  If True, the document will also be removed from the key-value store (default is True).
74
+ remove_from_storage : bool, optional
75
+ If True, the document will also be removed from the file storage (default is True).
60
76
 
61
77
  Raises
62
78
  ------
@@ -67,12 +83,12 @@ class DefaultDocumentDeleter(DocumentDeleter):
67
83
  error_messages = ""
68
84
  # Delete the document from file service and vector database
69
85
  logger.debug("Deleting existing document: %s", identification)
70
- try:
71
- if remove_from_key_value_store:
72
- self._key_value_store.remove(identification)
73
- self._file_service.delete_file(identification)
74
- except Exception as e:
75
- error_messages += f"Error while deleting {identification} from file storage\n {str(e)}\n"
86
+ if remove_from_key_value_store:
87
+ self._key_value_store.remove(identification)
88
+
89
+ if remove_from_storage:
90
+ error_messages = self._delete_from_storage(identification, error_messages)
91
+
76
92
  try:
77
93
  self._rag_api.remove_information_piece(
78
94
  DeleteRequest(metadata=[KeyValuePair(key="document", value=json.dumps(identification))])
@@ -82,3 +98,14 @@ class DefaultDocumentDeleter(DocumentDeleter):
82
98
  error_messages += f"Error while deleting {identification} from vector db\n{str(e)}"
83
99
  if error_messages:
84
100
  raise HTTPException(404, error_messages)
101
+
102
+ def _delete_from_storage(self, identification: str, error_messages: str) -> str:
103
+ try:
104
+ storage_key = self._storage_key_from_identification(identification)
105
+ if storage_key:
106
+ self._file_service.delete_file(storage_key)
107
+ else:
108
+ logger.debug("Skipping file storage deletion for non-file source: %s", identification)
109
+ except Exception as e:
110
+ error_messages += f"Error while deleting {identification} from file storage\n {str(e)}\n"
111
+ return error_messages
@@ -188,7 +188,11 @@ class DefaultFileUploader(FileUploader):
188
188
  # Replace old document
189
189
  # deletion is allowed to fail
190
190
  with suppress(Exception):
191
- await self._document_deleter.adelete_document(source_name, remove_from_key_value_store=False)
191
+ await self._document_deleter.adelete_document(
192
+ source_name,
193
+ remove_from_key_value_store=False,
194
+ remove_from_storage=False,
195
+ )
192
196
 
193
197
  # Run blocking RAG API call in thread pool to avoid blocking event loop
194
198
  await asyncio.to_thread(self._rag_api.upload_information_piece, rag_information_pieces)
@@ -149,7 +149,11 @@ class DefaultSourceUploader(SourceUploader):
149
149
  )
150
150
  )
151
151
  except asyncio.TimeoutError:
152
- logger.error("Upload of %s timed out after %s seconds", source_name, timeout)
152
+ logger.error(
153
+ "Upload of %s timed out after %s seconds (increase SOURCE_UPLOADER_TIMEOUT to allow longer ingestions)",
154
+ source_name,
155
+ timeout,
156
+ )
153
157
  self._key_value_store.upsert(source_name, Status.ERROR)
154
158
  except Exception:
155
159
  logger.exception("Error while uploading %s", source_name)
@@ -193,7 +197,11 @@ class DefaultSourceUploader(SourceUploader):
193
197
  rag_information_pieces.append(self._information_mapper.document2rag_information_piece(doc))
194
198
 
195
199
  with suppress(Exception):
196
- await self._document_deleter.adelete_document(source_name, remove_from_key_value_store=False)
200
+ await self._document_deleter.adelete_document(
201
+ source_name,
202
+ remove_from_key_value_store=False,
203
+ remove_from_storage=False,
204
+ )
197
205
 
198
206
  # Run blocking RAG API call in thread pool to avoid blocking event loop
199
207
  await asyncio.to_thread(self._rag_api.upload_information_piece, rag_information_pieces)
@@ -1,7 +1,7 @@
1
1
  """Module containing the TextChunker class."""
2
2
 
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
3
  from langchain_core.documents import Document
4
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
5
5
 
6
6
  from admin_api_lib.chunker.chunker import Chunker
7
7
 
@@ -0,0 +1,5 @@
1
+ """Re-export core S3 service implementation."""
2
+
3
+ from rag_core_lib.impl.file_services.s3_service import S3Service
4
+
5
+ __all__ = ["S3Service"]
@@ -0,0 +1,113 @@
1
+ """Module for enhancing the summary of pages by grouping information by page and summarizing each page."""
2
+
3
+ import asyncio
4
+ from hashlib import sha256
5
+ from typing import Optional
6
+ from typing import Any
7
+
8
+ from langchain_core.documents import Document
9
+ from langchain_core.runnables import RunnableConfig
10
+ from tqdm import tqdm
11
+
12
+ from admin_api_lib.impl.information_enhancer.summary_enhancer import SummaryEnhancer
13
+ from rag_core_lib.impl.data_types.content_type import ContentType
14
+
15
+
16
+ class PageSummaryEnhancer(SummaryEnhancer):
17
+ """
18
+ Enhances the summary of pages by grouping information by page and summarizing each page.
19
+
20
+ Attributes
21
+ ----------
22
+ BASE64_IMAGE_KEY : str
23
+ Key used to identify base64 encoded images in metadata.
24
+ DEFAULT_PAGE_NR : int
25
+ Default page number used when no page metadata is available.
26
+ """
27
+
28
+ BASE64_IMAGE_KEY = "base64_image"
29
+ DOCUMENT_URL_KEY = "document_url"
30
+ DEFAULT_PAGE_NR = 1
31
+
32
+ @staticmethod
33
+ def _parse_max_concurrency(config: Optional[RunnableConfig]) -> int:
34
+ if not config:
35
+ return 1
36
+ raw = config.get("max_concurrency")
37
+ if raw is None:
38
+ return 1
39
+ try:
40
+ return max(1, int(raw))
41
+ except (TypeError, ValueError):
42
+ return 1
43
+
44
+ def _group_key(self, piece: Document) -> tuple[Any, ...]:
45
+ document_url = piece.metadata.get(self.DOCUMENT_URL_KEY)
46
+ page = piece.metadata.get("page", self.DEFAULT_PAGE_NR)
47
+
48
+ # For paged documents (PDF/docling/etc.) keep per-page summaries even if a shared document URL exists.
49
+ if isinstance(page, int) or (isinstance(page, str) and page != "Unknown Title"):
50
+ return ("page_number", document_url, page)
51
+
52
+ # For sources like sitemaps/confluence, `page` can be a non-unique title (or missing),
53
+ # so group by the page URL when available to ensure one summary per page.
54
+ if document_url:
55
+ return ("document_url", document_url)
56
+
57
+ return ("page", page)
58
+
59
+ async def _asummarize_page(self, page_pieces: list[Document], config: Optional[RunnableConfig]) -> Document:
60
+ full_page_content = " ".join([piece.page_content for piece in page_pieces])
61
+ summary = await self._summarizer.ainvoke(full_page_content, config)
62
+ meta = {key: value for key, value in page_pieces[0].metadata.items() if key != self.BASE64_IMAGE_KEY}
63
+ meta["id"] = sha256(str.encode(full_page_content)).hexdigest()
64
+ meta["related"] = meta["related"] + [piece.metadata["id"] for piece in page_pieces]
65
+ meta["related"] = list(set(meta["related"]))
66
+ meta["type"] = ContentType.SUMMARY.value
67
+
68
+ return Document(metadata=meta, page_content=summary)
69
+
70
+ async def _acreate_summary(self, information: list[Document], config: Optional[RunnableConfig]) -> list[Document]:
71
+ grouped = self._group_information(information)
72
+ max_concurrency = self._parse_max_concurrency(config)
73
+ return await self._summarize_groups(grouped, config, max_concurrency=max_concurrency)
74
+
75
+ def _group_information(self, information: list[Document]) -> list[list[Document]]:
76
+ ordered_keys: list[tuple[Any, ...]] = []
77
+ groups: dict[tuple[Any, ...], list[Document]] = {}
78
+ for info in information:
79
+ key = self._group_key(info)
80
+ if key not in groups:
81
+ ordered_keys.append(key)
82
+ groups[key] = []
83
+ groups[key].append(info)
84
+ return [groups[key] for key in ordered_keys]
85
+
86
+ async def _summarize_groups(
87
+ self,
88
+ grouped: list[list[Document]],
89
+ config: Optional[RunnableConfig],
90
+ *,
91
+ max_concurrency: int,
92
+ ) -> list[Document]:
93
+ if max_concurrency == 1:
94
+ summaries: list[Document] = []
95
+ for info_group in tqdm(grouped):
96
+ summaries.append(await self._asummarize_page(info_group, config))
97
+ return summaries
98
+
99
+ semaphore = asyncio.Semaphore(max_concurrency)
100
+ results: list[Document | None] = [None] * len(grouped)
101
+
102
+ async def _run(idx: int, info_group: list[Document]) -> tuple[int, Document]:
103
+ async with semaphore:
104
+ return idx, await self._asummarize_page(info_group, config)
105
+
106
+ tasks = [asyncio.create_task(_run(idx, info_group)) for idx, info_group in enumerate(grouped)]
107
+ with tqdm(total=len(tasks)) as pbar:
108
+ for task in asyncio.as_completed(tasks):
109
+ idx, summary = await task
110
+ results[idx] = summary
111
+ pbar.update(1)
112
+
113
+ return [summary for summary in results if summary is not None]
@@ -1,6 +1,8 @@
1
1
  """Module containing the FileStatusKeyValueStore class."""
2
2
 
3
3
  import json
4
+ import ssl
5
+ from typing import Any
4
6
 
5
7
  from redis import Redis
6
8
 
@@ -37,9 +39,53 @@ class FileStatusKeyValueStore:
37
39
  Parameters
38
40
  ----------
39
41
  settings : KeyValueSettings
40
- The settings object containing the host and port information for the Redis connection.
42
+ The settings object containing the connection information for the Redis connection.
41
43
  """
42
- self._redis = Redis(host=settings.host, port=settings.port, decode_responses=True)
44
+ redis_kwargs: dict[str, Any] = {
45
+ "host": settings.host,
46
+ "port": settings.port,
47
+ "decode_responses": True,
48
+ **self._build_ssl_kwargs(settings),
49
+ }
50
+ if settings.username:
51
+ redis_kwargs["username"] = settings.username
52
+ if settings.password:
53
+ redis_kwargs["password"] = settings.password
54
+
55
+ self._redis = Redis(**redis_kwargs)
56
+
57
+ @staticmethod
58
+ def _build_ssl_kwargs(settings: KeyValueSettings) -> dict[str, Any]:
59
+ """Build Redis SSL settings from configuration, mapping string values to ssl constants."""
60
+ if not settings.use_ssl:
61
+ return {}
62
+
63
+ cert_reqs_map = {
64
+ "required": ssl.CERT_REQUIRED,
65
+ "optional": ssl.CERT_OPTIONAL,
66
+ "none": ssl.CERT_NONE,
67
+ "cert_required": ssl.CERT_REQUIRED,
68
+ "cert_optional": ssl.CERT_OPTIONAL,
69
+ "cert_none": ssl.CERT_NONE,
70
+ }
71
+ ssl_cert_reqs = None
72
+ if settings.ssl_cert_reqs:
73
+ ssl_cert_reqs = cert_reqs_map.get(settings.ssl_cert_reqs.lower(), settings.ssl_cert_reqs)
74
+
75
+ ssl_kwargs: dict[str, Any] = {
76
+ "ssl": settings.use_ssl,
77
+ "ssl_check_hostname": settings.ssl_check_hostname,
78
+ }
79
+ if ssl_cert_reqs is not None:
80
+ ssl_kwargs["ssl_cert_reqs"] = ssl_cert_reqs
81
+ if settings.ssl_ca_certs:
82
+ ssl_kwargs["ssl_ca_certs"] = settings.ssl_ca_certs
83
+ if settings.ssl_certfile:
84
+ ssl_kwargs["ssl_certfile"] = settings.ssl_certfile
85
+ if settings.ssl_keyfile:
86
+ ssl_kwargs["ssl_keyfile"] = settings.ssl_keyfile
87
+
88
+ return ssl_kwargs
43
89
 
44
90
  @staticmethod
45
91
  def _to_str(file_name: str, file_status: Status) -> str:
@@ -0,0 +1,50 @@
1
+ """Contains settings regarding the key values store."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class KeyValueSettings(BaseSettings):
8
+ """
9
+ Contains settings regarding the key value store.
10
+
11
+ Attributes
12
+ ----------
13
+ host : str
14
+ The hostname of the key value store.
15
+ port : int
16
+ The port number of the key value store.
17
+ username : str | None
18
+ Optional username for authenticating with the key value store.
19
+ password : str | None
20
+ Optional password for authenticating with the key value store.
21
+ use_ssl : bool
22
+ Whether to use SSL/TLS when connecting to the key value store.
23
+ ssl_cert_reqs : str | None
24
+ SSL certificate requirement level (e.g., 'required', 'optional', 'none').
25
+ ssl_ca_certs : str | None
26
+ Path to a CA bundle file for verifying the server certificate.
27
+ ssl_certfile : str | None
28
+ Path to the client SSL certificate file (if mutual TLS is required).
29
+ ssl_keyfile : str | None
30
+ Path to the client SSL private key file (if mutual TLS is required).
31
+ ssl_check_hostname : bool
32
+ Whether to verify the server hostname against the certificate.
33
+ """
34
+
35
+ class Config:
36
+ """Config class for reading Fields from env."""
37
+
38
+ env_prefix = "USECASE_KEYVALUE_"
39
+ case_sensitive = False
40
+
41
+ host: str = Field()
42
+ port: int = Field()
43
+ username: str | None = Field(default=None)
44
+ password: str | None = Field(default=None)
45
+ use_ssl: bool = Field(default=False)
46
+ ssl_cert_reqs: str | None = Field(default=None)
47
+ ssl_ca_certs: str | None = Field(default=None)
48
+ ssl_certfile: str | None = Field(default=None)
49
+ ssl_keyfile: str | None = Field(default=None)
50
+ ssl_check_hostname: bool = Field(default=True)
@@ -0,0 +1,5 @@
1
+ """Re-export core S3 settings."""
2
+
3
+ from rag_core_lib.impl.settings.s3_settings import S3Settings
4
+
5
+ __all__ = ["S3Settings"]
@@ -4,9 +4,9 @@ import asyncio
4
4
  import logging
5
5
  from typing import Optional
6
6
 
7
- from langchain.text_splitter import RecursiveCharacterTextSplitter
8
7
  from langchain_core.documents import Document
9
8
  from langchain_core.runnables import Runnable, RunnableConfig, ensure_config
9
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
10
10
  from openai import APIConnectionError, APIError, APITimeoutError, RateLimitError
11
11
 
12
12
  from admin_api_lib.impl.settings.summarizer_settings import SummarizerSettings
@@ -44,6 +44,24 @@ class LangchainSummarizer(Summarizer):
44
44
  self._semaphore = semaphore
45
45
  self._retry_decorator_settings = create_retry_decorator_settings(summarizer_settings, retry_decorator_settings)
46
46
 
47
+ @staticmethod
48
+ def _parse_max_concurrency(config: RunnableConfig) -> Optional[int]:
49
+ """Parse max concurrency from a RunnableConfig.
50
+
51
+ Returns
52
+ -------
53
+ Optional[int]
54
+ An integer >= 1 if configured and valid, otherwise None.
55
+ """
56
+ max_concurrency = config.get("max_concurrency")
57
+ if max_concurrency is None:
58
+ return None
59
+
60
+ try:
61
+ return max(1, int(max_concurrency))
62
+ except (TypeError, ValueError):
63
+ return None
64
+
47
65
  async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig] = None) -> SummarizerOutput:
48
66
  """
49
67
  Asynchronously invokes the summarization process on the given query.
@@ -77,9 +95,8 @@ class LangchainSummarizer(Summarizer):
77
95
  langchain_documents = self._chunker.split_documents([document])
78
96
  logger.debug("Summarizing %d chunk(s)...", len(langchain_documents))
79
97
 
80
- # Fan out with concurrency, bounded by your semaphore inside _summarize_chunk
81
- tasks = [asyncio.create_task(self._summarize_chunk(doc.page_content, config)) for doc in langchain_documents]
82
- outputs = await asyncio.gather(*tasks)
98
+ max_concurrency = self._parse_max_concurrency(config)
99
+ outputs = await self._summarize_documents(langchain_documents, config, max_concurrency=max_concurrency)
83
100
 
84
101
  if len(outputs) == 1:
85
102
  return outputs[0]
@@ -93,6 +110,34 @@ class LangchainSummarizer(Summarizer):
93
110
  )
94
111
  return await self._summarize_chunk(merged, config)
95
112
 
113
+ async def _summarize_documents(
114
+ self,
115
+ documents: list[Document],
116
+ config: RunnableConfig,
117
+ *,
118
+ max_concurrency: Optional[int],
119
+ ) -> list[SummarizerOutput]:
120
+ """Summarize a set of already-chunked documents.
121
+
122
+ Notes
123
+ -----
124
+ This optionally limits task fan-out using a per-call semaphore (max_concurrency).
125
+ The actual LLM call concurrency is always bounded by the instance semaphore held
126
+ inside `_summarize_chunk`.
127
+ """
128
+ if max_concurrency == 1:
129
+ return [await self._summarize_chunk(doc.page_content, config) for doc in documents]
130
+
131
+ limiter: asyncio.Semaphore | None = asyncio.Semaphore(max_concurrency) if max_concurrency is not None else None
132
+
133
+ async def _run(doc: Document) -> SummarizerOutput:
134
+ if limiter is None:
135
+ return await self._summarize_chunk(doc.page_content, config)
136
+ async with limiter:
137
+ return await self._summarize_chunk(doc.page_content, config)
138
+
139
+ return await asyncio.gather(*(_run(doc) for doc in documents))
140
+
96
141
  def _create_chain(self) -> Runnable:
97
142
  return self._langfuse_manager.get_base_prompt(self.__class__.__name__) | self._langfuse_manager.get_base_llm(
98
143
  self.__class__.__name__