admin-api-lib 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. admin_api_lib/__init__.py +0 -0
  2. admin_api_lib/api_endpoints/document_deleter.py +24 -0
  3. admin_api_lib/api_endpoints/document_reference_retriever.py +25 -0
  4. admin_api_lib/api_endpoints/documents_status_retriever.py +20 -0
  5. admin_api_lib/api_endpoints/file_uploader.py +31 -0
  6. admin_api_lib/api_endpoints/source_uploader.py +40 -0
  7. admin_api_lib/api_endpoints/uploader_base.py +30 -0
  8. admin_api_lib/apis/__init__.py +0 -0
  9. admin_api_lib/apis/admin_api.py +197 -0
  10. admin_api_lib/apis/admin_api_base.py +120 -0
  11. admin_api_lib/chunker/__init__.py +0 -0
  12. admin_api_lib/chunker/chunker.py +25 -0
  13. admin_api_lib/dependency_container.py +236 -0
  14. admin_api_lib/extractor_api_client/__init__.py +0 -0
  15. admin_api_lib/extractor_api_client/openapi_client/__init__.py +38 -0
  16. admin_api_lib/extractor_api_client/openapi_client/api/__init__.py +4 -0
  17. admin_api_lib/extractor_api_client/openapi_client/api/extractor_api.py +516 -0
  18. admin_api_lib/extractor_api_client/openapi_client/api_client.py +695 -0
  19. admin_api_lib/extractor_api_client/openapi_client/api_response.py +20 -0
  20. admin_api_lib/extractor_api_client/openapi_client/configuration.py +460 -0
  21. admin_api_lib/extractor_api_client/openapi_client/exceptions.py +197 -0
  22. admin_api_lib/extractor_api_client/openapi_client/models/__init__.py +21 -0
  23. admin_api_lib/extractor_api_client/openapi_client/models/content_type.py +34 -0
  24. admin_api_lib/extractor_api_client/openapi_client/models/extraction_parameters.py +103 -0
  25. admin_api_lib/extractor_api_client/openapi_client/models/extraction_request.py +82 -0
  26. admin_api_lib/extractor_api_client/openapi_client/models/information_piece.py +104 -0
  27. admin_api_lib/extractor_api_client/openapi_client/models/key_value_pair.py +92 -0
  28. admin_api_lib/extractor_api_client/openapi_client/rest.py +209 -0
  29. admin_api_lib/extractor_api_client/openapi_client/test/__init__.py +0 -0
  30. admin_api_lib/extractor_api_client/openapi_client/test/test_content_type.py +35 -0
  31. admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_parameters.py +59 -0
  32. admin_api_lib/extractor_api_client/openapi_client/test/test_extraction_request.py +56 -0
  33. admin_api_lib/extractor_api_client/openapi_client/test/test_extractor_api.py +39 -0
  34. admin_api_lib/extractor_api_client/openapi_client/test/test_information_piece.py +62 -0
  35. admin_api_lib/extractor_api_client/openapi_client/test/test_key_value_pair.py +54 -0
  36. admin_api_lib/file_services/file_service.py +77 -0
  37. admin_api_lib/impl/__init__.py +0 -0
  38. admin_api_lib/impl/admin_api.py +167 -0
  39. admin_api_lib/impl/api_endpoints/default_document_deleter.py +84 -0
  40. admin_api_lib/impl/api_endpoints/default_document_reference_retriever.py +72 -0
  41. admin_api_lib/impl/api_endpoints/default_documents_status_retriever.py +41 -0
  42. admin_api_lib/impl/api_endpoints/default_file_uploader.py +234 -0
  43. admin_api_lib/impl/api_endpoints/default_source_uploader.py +202 -0
  44. admin_api_lib/impl/chunker/__init__.py +0 -0
  45. admin_api_lib/impl/chunker/chunker_type.py +11 -0
  46. admin_api_lib/impl/chunker/semantic_text_chunker.py +252 -0
  47. admin_api_lib/impl/chunker/text_chunker.py +33 -0
  48. admin_api_lib/impl/file_services/__init__.py +0 -0
  49. admin_api_lib/impl/file_services/s3_service.py +130 -0
  50. admin_api_lib/impl/information_enhancer/__init__.py +0 -0
  51. admin_api_lib/impl/information_enhancer/general_enhancer.py +52 -0
  52. admin_api_lib/impl/information_enhancer/page_summary_enhancer.py +62 -0
  53. admin_api_lib/impl/information_enhancer/summary_enhancer.py +74 -0
  54. admin_api_lib/impl/key_db/__init__.py +0 -0
  55. admin_api_lib/impl/key_db/file_status_key_value_store.py +111 -0
  56. admin_api_lib/impl/mapper/informationpiece2document.py +108 -0
  57. admin_api_lib/impl/settings/__init__.py +0 -0
  58. admin_api_lib/impl/settings/chunker_class_type_settings.py +18 -0
  59. admin_api_lib/impl/settings/chunker_settings.py +29 -0
  60. admin_api_lib/impl/settings/document_extractor_settings.py +21 -0
  61. admin_api_lib/impl/settings/key_value_settings.py +26 -0
  62. admin_api_lib/impl/settings/rag_api_settings.py +21 -0
  63. admin_api_lib/impl/settings/s3_settings.py +31 -0
  64. admin_api_lib/impl/settings/source_uploader_settings.py +23 -0
  65. admin_api_lib/impl/settings/summarizer_settings.py +86 -0
  66. admin_api_lib/impl/summarizer/__init__.py +0 -0
  67. admin_api_lib/impl/summarizer/langchain_summarizer.py +117 -0
  68. admin_api_lib/information_enhancer/__init__.py +0 -0
  69. admin_api_lib/information_enhancer/information_enhancer.py +34 -0
  70. admin_api_lib/main.py +54 -0
  71. admin_api_lib/models/__init__.py +0 -0
  72. admin_api_lib/models/document_status.py +86 -0
  73. admin_api_lib/models/extra_models.py +9 -0
  74. admin_api_lib/models/http_validation_error.py +105 -0
  75. admin_api_lib/models/key_value_pair.py +85 -0
  76. admin_api_lib/models/status.py +44 -0
  77. admin_api_lib/models/validation_error.py +104 -0
  78. admin_api_lib/models/validation_error_loc_inner.py +114 -0
  79. admin_api_lib/prompt_templates/__init__.py +0 -0
  80. admin_api_lib/prompt_templates/summarize_prompt.py +14 -0
  81. admin_api_lib/rag_backend_client/__init__.py +0 -0
  82. admin_api_lib/rag_backend_client/openapi_client/__init__.py +60 -0
  83. admin_api_lib/rag_backend_client/openapi_client/api/__init__.py +4 -0
  84. admin_api_lib/rag_backend_client/openapi_client/api/rag_api.py +968 -0
  85. admin_api_lib/rag_backend_client/openapi_client/api_client.py +698 -0
  86. admin_api_lib/rag_backend_client/openapi_client/api_response.py +22 -0
  87. admin_api_lib/rag_backend_client/openapi_client/configuration.py +460 -0
  88. admin_api_lib/rag_backend_client/openapi_client/exceptions.py +197 -0
  89. admin_api_lib/rag_backend_client/openapi_client/models/__init__.py +41 -0
  90. admin_api_lib/rag_backend_client/openapi_client/models/chat_history.py +99 -0
  91. admin_api_lib/rag_backend_client/openapi_client/models/chat_history_message.py +83 -0
  92. admin_api_lib/rag_backend_client/openapi_client/models/chat_request.py +93 -0
  93. admin_api_lib/rag_backend_client/openapi_client/models/chat_response.py +103 -0
  94. admin_api_lib/rag_backend_client/openapi_client/models/chat_role.py +35 -0
  95. admin_api_lib/rag_backend_client/openapi_client/models/content_type.py +37 -0
  96. admin_api_lib/rag_backend_client/openapi_client/models/delete_request.py +99 -0
  97. admin_api_lib/rag_backend_client/openapi_client/models/information_piece.py +110 -0
  98. admin_api_lib/rag_backend_client/openapi_client/models/key_value_pair.py +83 -0
  99. admin_api_lib/rag_backend_client/openapi_client/rest.py +209 -0
  100. admin_api_lib/summarizer/__init__.py +0 -0
  101. admin_api_lib/summarizer/summarizer.py +33 -0
  102. admin_api_lib/utils/__init__.py +0 -0
  103. admin_api_lib/utils/utils.py +32 -0
  104. admin_api_lib-3.2.0.dist-info/METADATA +24 -0
  105. admin_api_lib-3.2.0.dist-info/RECORD +106 -0
  106. admin_api_lib-3.2.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,111 @@
1
+ """Module containing the FileStatusKeyValueStore class."""
2
+
3
+ import json
4
+
5
+ from redis import Redis
6
+
7
+ from admin_api_lib.impl.settings.key_value_settings import KeyValueSettings
8
+ from admin_api_lib.models.status import Status
9
+
10
+
11
+ class FileStatusKeyValueStore:
12
+ """
13
+ A key-value store for managing file statuses using Redis.
14
+
15
+ This class provides methods to upsert, remove, and retrieve file status information
16
+ from a Redis store. Each file status is stored as a JSON string containing the file name
17
+ and its associated status.
18
+
19
+ Attributes
20
+ ----------
21
+ STORAGE_KEY : str
22
+ The key under which all file statuses are stored in Redis.
23
+ INNER_FILENAME_KEY : str
24
+ The key used for the file name in the JSON string.
25
+ INNER_STATUS_KEY : str
26
+ The key used for the file status in the JSON string.
27
+ """
28
+
29
+ STORAGE_KEY = "stackit-rag-template-files"
30
+ INNER_FILENAME_KEY = "filename"
31
+ INNER_STATUS_KEY = "status"
32
+
33
+ def __init__(self, settings: KeyValueSettings):
34
+ """
35
+ Initialize the FileStatusKeyValueStore with the given settings.
36
+
37
+ Parameters
38
+ ----------
39
+ settings : KeyValueSettings
40
+ The settings object containing the host and port information for the Redis connection.
41
+ """
42
+ self._redis = Redis(host=settings.host, port=settings.port, decode_responses=True)
43
+
44
+ @staticmethod
45
+ def _to_str(file_name: str, file_status: Status) -> str:
46
+ return json.dumps(
47
+ {
48
+ FileStatusKeyValueStore.INNER_FILENAME_KEY: file_name,
49
+ FileStatusKeyValueStore.INNER_STATUS_KEY: file_status,
50
+ }
51
+ )
52
+
53
+ @staticmethod
54
+ def _from_str(redis_content: str) -> tuple[str, Status]:
55
+ content_dict = json.loads(redis_content)
56
+ return (
57
+ content_dict[FileStatusKeyValueStore.INNER_FILENAME_KEY],
58
+ content_dict[FileStatusKeyValueStore.INNER_STATUS_KEY],
59
+ )
60
+
61
+ def upsert(self, file_name: str, file_status: Status) -> None:
62
+ """
63
+ Upserts the status of a file in the key-value store.
64
+
65
+ This method first removes any existing entry for the given file name and then adds the new status.
66
+
67
+ Parameters
68
+ ----------
69
+ file_name : str
70
+ The name of the file whose status is to be upserted.
71
+ file_status : Status
72
+ The status to be associated with the file.
73
+
74
+ Returns
75
+ -------
76
+ None
77
+ """
78
+ self.remove(file_name)
79
+ self._redis.sadd(self.STORAGE_KEY, FileStatusKeyValueStore._to_str(file_name, file_status))
80
+
81
+ def remove(self, file_name: str) -> None:
82
+ """
83
+ Remove the specified file name from the key-value store.
84
+
85
+ Parameters
86
+ ----------
87
+ file_name : str
88
+ The name of the file to be removed from the key-value store.
89
+
90
+ Returns
91
+ -------
92
+ None
93
+ """
94
+ all_documents = self.get_all()
95
+ correct_file_name = [x for x in all_documents if x[0] == file_name]
96
+ for file_name_related in correct_file_name:
97
+ self._redis.srem(
98
+ self.STORAGE_KEY, FileStatusKeyValueStore._to_str(file_name_related[0], file_name_related[1])
99
+ )
100
+
101
+ def get_all(self) -> list[tuple[str, Status]]:
102
+ """
103
+ Retrieve all file status information from the Redis store.
104
+
105
+ Returns
106
+ -------
107
+ list[tuple[str, Status]]
108
+ A list of tuples where each tuple contains a string and a Status object.
109
+ """
110
+ all_file_informations = list(self._redis.smembers(self.STORAGE_KEY))
111
+ return [FileStatusKeyValueStore._from_str(x) for x in all_file_informations]
@@ -0,0 +1,108 @@
1
+ """Module for mapping between InformationPiece and LangchainDocument."""
2
+
3
+ import json
4
+
5
+ from langchain_core.documents import Document as LangchainDocument
6
+
7
+ from admin_api_lib.extractor_api_client.openapi_client.models.content_type import (
8
+ ContentType as ExtractorInformaType,
9
+ )
10
+ from admin_api_lib.extractor_api_client.openapi_client.models.information_piece import (
11
+ InformationPiece as ExtractorInformationPiece,
12
+ )
13
+ from admin_api_lib.rag_backend_client.openapi_client.models.information_piece import (
14
+ InformationPiece as RagInformationPiece,
15
+ )
16
+ from admin_api_lib.rag_backend_client.openapi_client.models.key_value_pair import (
17
+ KeyValuePair as RagKeyValue,
18
+ )
19
+ from rag_core_lib.impl.data_types.content_type import ContentType as RagInformationType
20
+
21
+
22
+ class InformationPiece2Document:
23
+ """The InformationPiece2Document class.
24
+
25
+ A utility class for converting between ExtractorInformationPiece and LangchainDocument,
26
+ and between LangchainDocument and RagInformationPiece.
27
+
28
+ Attributes
29
+ ----------
30
+ LOOKUP_TABLE : dict
31
+ A dictionary mapping ExtractorInformaType to RagInformationType.
32
+ METADATA_TYPE_KEY : str
33
+ The key used to store the type of information piece in metadata.
34
+ """
35
+
36
+ LOOKUP_TABLE = {
37
+ ExtractorInformaType.IMAGE: RagInformationType.IMAGE,
38
+ ExtractorInformaType.TABLE: RagInformationType.TABLE,
39
+ ExtractorInformaType.TEXT: RagInformationType.TEXT,
40
+ }
41
+ METADATA_TYPE_KEY = "type"
42
+
43
+ @staticmethod
44
+ def extractor_information_piece2document(info: ExtractorInformationPiece) -> LangchainDocument:
45
+ """
46
+ Convert an ExtractorInformationPiece instance to a LangchainDocument instance.
47
+
48
+ Parameters
49
+ ----------
50
+ info : ExtractorInformationPiece
51
+ The information piece to be converted, containing metadata page content, type.
52
+
53
+ Returns
54
+ -------
55
+ LangchainDocument
56
+ The converted LangchainDocument with the page content, metadata and type.
57
+
58
+ Notes
59
+ -----
60
+ The metadata of the resulting LangchainDocument includes all key-value pairs from the
61
+ input metadata, with an additional entry for the type of the information piece.
62
+ """
63
+ metadata = {x.key: x.value for x in info.metadata}
64
+ metadata[InformationPiece2Document.METADATA_TYPE_KEY] = InformationPiece2Document.infotype2infotype(
65
+ info.type
66
+ ).value
67
+
68
+ return LangchainDocument(page_content=info.page_content, metadata=metadata)
69
+
70
+ @staticmethod
71
+ def document2rag_information_piece(document: LangchainDocument) -> RagInformationPiece:
72
+ """
73
+ Convert a LangchainDocument to a RagInformationPiece.
74
+
75
+ Parameters
76
+ ----------
77
+ document : LangchainDocument
78
+ The document to be converted, containing metadata, page content and type.
79
+
80
+ Returns
81
+ -------
82
+ RagInformationPiece
83
+ The converted information piece with type, metadata, and page content.
84
+ """
85
+ metadata = [RagKeyValue(key=str(key), value=json.dumps(value)) for key, value in document.metadata.items()]
86
+ content_type = RagInformationType(document.metadata[InformationPiece2Document.METADATA_TYPE_KEY].upper())
87
+ return RagInformationPiece(
88
+ type=content_type,
89
+ metadata=metadata,
90
+ page_content=document.page_content,
91
+ )
92
+
93
+ @staticmethod
94
+ def infotype2infotype(info_type: ExtractorInformaType) -> RagInformationType:
95
+ """
96
+ Convert from ExtractorInformaType to RagInformationType.
97
+
98
+ Parameters
99
+ ----------
100
+ info_type : ExtractorInformaType
101
+ The external information type to be converted.
102
+
103
+ Returns
104
+ -------
105
+ RagInformationType
106
+ The corresponding internal information type.
107
+ """
108
+ return InformationPiece2Document.LOOKUP_TABLE[info_type]
File without changes
@@ -0,0 +1,18 @@
1
+ """Settings for selecting the embedder implementation."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+ from admin_api_lib.impl.chunker.chunker_type import ChunkerType
7
+
8
+
9
+ class ChunkerClassTypeSettings(BaseSettings):
10
+ """Settings controlling which chunker implementation is used."""
11
+
12
+ class Config:
13
+ """Configure environment integration for the settings."""
14
+
15
+ env_prefix = "CHUNKER_CLASS_TYPE_"
16
+ case_sensitive = False
17
+
18
+ chunker_type: ChunkerType = Field(default=ChunkerType.RECURSIVE)
@@ -0,0 +1,29 @@
1
+ """Contains settings regarding the chunker."""
2
+
3
+ from typing import Literal
4
+
5
+ from pydantic import Field
6
+ from pydantic_settings import BaseSettings
7
+
8
+
9
+ class ChunkerSettings(BaseSettings):
10
+ """Contains settings regarding the chunker configuration."""
11
+
12
+ class Config:
13
+ """Config class for reading Fields from env."""
14
+
15
+ env_prefix = "CHUNKER_"
16
+ case_sensitive = False
17
+
18
+ max_size: int = Field(default=1000, gt=0)
19
+ overlap: int = Field(default=100, ge=0)
20
+
21
+ breakpoint_threshold_type: Literal[
22
+ "percentile",
23
+ "standard_deviation",
24
+ "interquartile",
25
+ "gradient",
26
+ ] = Field(default="percentile")
27
+ breakpoint_threshold_amount: float = Field(default=95.0, ge=0.0)
28
+ buffer_size: int = Field(default=1, ge=0)
29
+ min_size: int = Field(default=200, gt=0)
@@ -0,0 +1,21 @@
1
+ """Contains settings regarding the chunker."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class DocumentExtractorSettings(BaseSettings):
8
+ """Contains settings regarding the document extractor microservice.
9
+
10
+ Attributes
11
+ ----------
12
+ host (str): The url to the api.
13
+ """
14
+
15
+ class Config:
16
+ """Config class for reading Fields from env."""
17
+
18
+ env_prefix = "DOCUMENT_EXTRACTOR_"
19
+ case_sensitive = False
20
+
21
+ host: str = Field(default="http://extractor:8080")
@@ -0,0 +1,26 @@
1
+ """Contains settings regarding the key values store."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class KeyValueSettings(BaseSettings):
8
+ """
9
+ Contains settings regarding the key value store.
10
+
11
+ Attributes
12
+ ----------
13
+ host : str
14
+ The hostname of the key value store.
15
+ port : int
16
+ The port number of the key value store.
17
+ """
18
+
19
+ class Config:
20
+ """Config class for reading Fields from env."""
21
+
22
+ env_prefix = "USECASE_KEYVALUE_"
23
+ case_sensitive = False
24
+
25
+ host: str = Field()
26
+ port: int = Field()
@@ -0,0 +1,21 @@
1
+ """Contains settings regarding the chunker."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class RAGAPISettings(BaseSettings):
8
+ """Contains settings regarding the rag api microservice.
9
+
10
+ Attributes
11
+ ----------
12
+ host (str): The url to the api.
13
+ """
14
+
15
+ class Config:
16
+ """Config class for reading Fields from env."""
17
+
18
+ env_prefix = "RAG_API"
19
+ case_sensitive = False
20
+
21
+ host: str = Field(default="http://backend:8080")
@@ -0,0 +1,31 @@
1
+ """Contains settings regarding the S3 storage."""
2
+
3
+ from pydantic_settings import BaseSettings
4
+
5
+
6
+ class S3Settings(BaseSettings):
7
+ """
8
+ Contains settings regarding the S3 storage.
9
+
10
+ Attributes
11
+ ----------
12
+ secret_access_key : str
13
+ The secret access key for S3.
14
+ access_key_id : str
15
+ The access key ID for S3.
16
+ endpoint : str
17
+ The endpoint URL for S3.
18
+ bucket : str
19
+ The bucket name in S3.
20
+ """
21
+
22
+ class Config:
23
+ """Config class for reading Fields from env."""
24
+
25
+ env_prefix = "S3_"
26
+ case_sensitive = False
27
+
28
+ secret_access_key: str
29
+ access_key_id: str
30
+ endpoint: str
31
+ bucket: str
@@ -0,0 +1,23 @@
1
+ """Contains settings regarding the SourceUploader."""
2
+
3
+ from pydantic import Field
4
+ from pydantic_settings import BaseSettings
5
+
6
+
7
+ class SourceUploaderSettings(BaseSettings):
8
+ """
9
+ Contains settings regarding the SourceUploader.
10
+
11
+ Attributes
12
+ ----------
13
+ timeout : float
14
+ The timeout for the SourceUploader.
15
+ """
16
+
17
+ class Config:
18
+ """Config class for reading Fields from env."""
19
+
20
+ env_prefix = "SOURCE_UPLOADER_"
21
+ case_sensitive = False
22
+
23
+ timeout: float = Field(default=3600.0, description="Timeout for the SourceUploader in seconds.")
@@ -0,0 +1,86 @@
1
+ """Contains settings for summarizer."""
2
+
3
+ from typing import Optional
4
+ from pydantic import Field, PositiveInt, model_validator
5
+ from pydantic_settings import BaseSettings, SettingsConfigDict
6
+
7
+
8
+ class SummarizerSettings(BaseSettings):
9
+ """
10
+ Contains settings regarding the summarizer.
11
+
12
+ Attributes
13
+ ----------
14
+ maximum_input_size : int
15
+ The maximum size of the input that the summarizer can handle. Default is 8000.
16
+ maximum_concurrency : int
17
+ The maximum number of concurrent summarization processes. Default is 10.
18
+ max_retries: Optional[PositiveInt]
19
+ Total retries, not counting the initial attempt.
20
+ retry_base_delay: Optional[float]
21
+ Base delay in seconds for the first retry.
22
+ retry_max_delay: Optional[float]
23
+ Maximum delay cap in seconds for any single wait.
24
+ backoff_factor: Optional[float]
25
+ Exponential backoff factor (>= 1).
26
+ attempt_cap: Optional[int]
27
+ Cap for exponent growth (backoff_factor ** attempt_cap).
28
+ jitter_min: Optional[float]
29
+ Minimum jitter in seconds.
30
+ jitter_max: Optional[float]
31
+ Maximum jitter in seconds.
32
+ """
33
+
34
+ model_config = SettingsConfigDict(env_prefix="SUMMARIZER_", case_sensitive=False)
35
+
36
+ maximum_input_size: int = Field(default=8000)
37
+ maximum_concurrency: int = Field(default=10)
38
+ max_retries: Optional[PositiveInt] = Field(
39
+ default=None,
40
+ title="Max Retries",
41
+ description="Total retries, not counting the initial attempt.",
42
+ )
43
+ retry_base_delay: Optional[float] = Field(
44
+ default=None,
45
+ ge=0,
46
+ title="Retry Base Delay",
47
+ description="Base delay in seconds for the first retry.",
48
+ )
49
+ retry_max_delay: Optional[float] = Field(
50
+ default=None,
51
+ gt=0,
52
+ title="Retry Max Delay",
53
+ description="Maximum delay cap in seconds for any single wait.",
54
+ )
55
+ backoff_factor: Optional[float] = Field(
56
+ default=None,
57
+ ge=1.0,
58
+ title="Backoff Factor",
59
+ description="Exponential backoff factor (>= 1).",
60
+ )
61
+ attempt_cap: Optional[int] = Field(
62
+ default=None,
63
+ ge=0,
64
+ title="Attempt Cap",
65
+ description="Cap for exponent growth (backoff_factor ** attempt_cap).",
66
+ )
67
+ jitter_min: Optional[float] = Field(
68
+ default=None,
69
+ ge=0.0,
70
+ title="Jitter Min (s)",
71
+ description="Minimum jitter in seconds.",
72
+ )
73
+ jitter_max: Optional[float] = Field(
74
+ default=None,
75
+ ge=0.0,
76
+ title="Jitter Max (s)",
77
+ description="Maximum jitter in seconds.",
78
+ )
79
+
80
+ @model_validator(mode="after")
81
+ def _check_relations(self) -> "SummarizerSettings":
82
+ if not self.jitter_min or not self.jitter_max:
83
+ return self
84
+ if self.jitter_max < self.jitter_min:
85
+ raise ValueError("jitter_max must be >= jitter_min")
86
+ return self
File without changes
@@ -0,0 +1,117 @@
1
+ """Module for the LangchainSummarizer class."""
2
+
3
+ import asyncio
4
+ import logging
5
+ from typing import Optional
6
+
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain_core.documents import Document
9
+ from langchain_core.runnables import Runnable, RunnableConfig, ensure_config
10
+ from openai import APIConnectionError, APIError, APITimeoutError, RateLimitError
11
+
12
+ from admin_api_lib.impl.settings.summarizer_settings import SummarizerSettings
13
+ from admin_api_lib.summarizer.summarizer import (
14
+ Summarizer,
15
+ SummarizerInput,
16
+ SummarizerOutput,
17
+ )
18
+ from rag_core_lib.impl.langfuse_manager.langfuse_manager import LangfuseManager
19
+ from rag_core_lib.impl.settings.retry_decorator_settings import RetryDecoratorSettings
20
+ from rag_core_lib.impl.utils.async_threadsafe_semaphore import AsyncThreadsafeSemaphore
21
+ from rag_core_lib.impl.utils.retry_decorator import create_retry_decorator_settings, retry_with_backoff
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class LangchainSummarizer(Summarizer):
27
+ """Is responsible for summarizing input data.
28
+
29
+ LangchainSummarizer is responsible for summarizing input data using the LangfuseManager,
30
+ RecursiveCharacterTextSplitter, and AsyncThreadsafeSemaphore. It handles chunking of the input
31
+ document and retries the summarization process if an error occurs.
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ langfuse_manager: LangfuseManager,
37
+ chunker: RecursiveCharacterTextSplitter,
38
+ semaphore: AsyncThreadsafeSemaphore,
39
+ summarizer_settings: SummarizerSettings,
40
+ retry_decorator_settings: RetryDecoratorSettings,
41
+ ):
42
+ self._chunker = chunker
43
+ self._langfuse_manager = langfuse_manager
44
+ self._semaphore = semaphore
45
+ self._retry_decorator_settings = create_retry_decorator_settings(summarizer_settings, retry_decorator_settings)
46
+
47
+ async def ainvoke(self, query: SummarizerInput, config: Optional[RunnableConfig] = None) -> SummarizerOutput:
48
+ """
49
+ Asynchronously invokes the summarization process on the given query.
50
+
51
+ Parameters
52
+ ----------
53
+ query : SummarizerInput
54
+ The input data to be summarized.
55
+ config : Optional[RunnableConfig], optional
56
+ Configuration options for the summarization process, by default None.
57
+
58
+ Returns
59
+ -------
60
+ SummarizerOutput
61
+ The summarized output.
62
+
63
+ Raises
64
+ ------
65
+ Exception
66
+ If the summary creation fails after the allowed number of tries.
67
+
68
+ Notes
69
+ -----
70
+ This method handles chunking of the input document and retries the summarization
71
+ process if an error occurs, up to the number of tries specified in the config.
72
+ """
73
+ assert query, "Query is empty: %s" % query # noqa S101
74
+ config = ensure_config(config)
75
+
76
+ document = Document(page_content=query)
77
+ langchain_documents = self._chunker.split_documents([document])
78
+ logger.debug("Summarizing %d chunk(s)...", len(langchain_documents))
79
+
80
+ # Fan out with concurrency, bounded by your semaphore inside _summarize_chunk
81
+ tasks = [asyncio.create_task(self._summarize_chunk(doc.page_content, config)) for doc in langchain_documents]
82
+ outputs = await asyncio.gather(*tasks)
83
+
84
+ if len(outputs) == 1:
85
+ return outputs[0]
86
+
87
+ merged = " ".join(outputs)
88
+
89
+ logger.debug(
90
+ "Reduced number of chars from %d to %d",
91
+ len("".join([x.page_content for x in langchain_documents])),
92
+ len(merged),
93
+ )
94
+ return await self._summarize_chunk(merged, config)
95
+
96
+ def _create_chain(self) -> Runnable:
97
+ return self._langfuse_manager.get_base_prompt(self.__class__.__name__) | self._langfuse_manager.get_base_llm(
98
+ self.__class__.__name__
99
+ )
100
+
101
+ def _retry_with_backoff_wrapper(self):
102
+ return retry_with_backoff(
103
+ settings=self._retry_decorator_settings,
104
+ exceptions=(APIError, RateLimitError, APITimeoutError, APIConnectionError),
105
+ rate_limit_exceptions=(RateLimitError,),
106
+ logger=logger,
107
+ )
108
+
109
+ async def _summarize_chunk(self, text: str, config: Optional[RunnableConfig]) -> SummarizerOutput:
110
+ @self._retry_with_backoff_wrapper()
111
+ async def _call(text: str, config: Optional[RunnableConfig]) -> SummarizerOutput:
112
+ response = await self._create_chain().ainvoke({"text": text}, config)
113
+ return response.content if hasattr(response, "content") else str(response)
114
+
115
+ # Hold the semaphore for the entire retry lifecycle
116
+ async with self._semaphore:
117
+ return await _call(text, config)
File without changes
@@ -0,0 +1,34 @@
1
+ """Module for the InformationEnhancer abstract base class."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import Optional
5
+
6
+ from langchain_core.documents import Document
7
+ from langchain_core.runnables import RunnableConfig
8
+
9
+ from rag_core_lib.runnables.async_runnable import AsyncRunnable
10
+
11
+ RetrieverInput = list[Document]
12
+ RetrieverOutput = list[Document]
13
+
14
+
15
+ class InformationEnhancer(AsyncRunnable[RetrieverInput, RetrieverOutput], ABC):
16
+ """The base class for an information enhancer."""
17
+
18
+ @abstractmethod
19
+ async def ainvoke(self, information: RetrieverInput, config: Optional[RunnableConfig] = None) -> RetrieverOutput:
20
+ """
21
+ Asynchronously invokes the information enhancer with the given input and configuration.
22
+
23
+ Parameters
24
+ ----------
25
+ information : RetrieverInput
26
+ The input information to be processed by the information enhancer.
27
+ config : Optional[RunnableConfig]
28
+ The configuration settings for the information enhancer, by default None.
29
+
30
+ Returns
31
+ -------
32
+ RetrieverOutput
33
+ The output after processing the input information.
34
+ """