kolzchut-ragbot 1.4.2__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kolzchut_ragbot/Document.py +101 -101
- kolzchut_ragbot/IntegrateService.py +4 -4
- kolzchut_ragbot/__init__.py +0 -2
- kolzchut_ragbot/config.py +5 -5
- kolzchut_ragbot/engine.py +333 -246
- kolzchut_ragbot/get_full_documents_utilities.py +45 -0
- kolzchut_ragbot/llm_client.py +11 -11
- kolzchut_ragbot/model.py +182 -182
- kolzchut_ragbot-1.6.0.dist-info/METADATA +76 -0
- kolzchut_ragbot-1.6.0.dist-info/RECORD +12 -0
- {kolzchut_ragbot-1.4.2.dist-info → kolzchut_ragbot-1.6.0.dist-info}/WHEEL +1 -1
- kolzchut_ragbot-1.4.2.dist-info/METADATA +0 -67
- kolzchut_ragbot-1.4.2.dist-info/RECORD +0 -11
- {kolzchut_ragbot-1.4.2.dist-info → kolzchut_ragbot-1.6.0.dist-info}/top_level.txt +0 -0
kolzchut_ragbot/Document.py
CHANGED
@@ -1,101 +1,101 @@
|
|
1
|
-
import json
|
2
|
-
import os
|
3
|
-
|
4
|
-
DEFINITIONS_FILE = os.getenv("DOCUMENT_DEFINITION_CONFIG", "example-conf.json")
|
5
|
-
|
6
|
-
|
7
|
-
class DocumentFieldDefinition:
|
8
|
-
"""
|
9
|
-
Represents the definition of a document field.
|
10
|
-
|
11
|
-
Attributes:
|
12
|
-
field_name (str): The name of the field.
|
13
|
-
required (bool): Indicates if the field is required. Default is False.
|
14
|
-
"""
|
15
|
-
def __init__(self, field_name: str, required: bool = False):
|
16
|
-
self.field_name = field_name
|
17
|
-
self.required = required
|
18
|
-
|
19
|
-
|
20
|
-
class DocumentDefinitions:
|
21
|
-
"""
|
22
|
-
Represents the definitions for a document.
|
23
|
-
|
24
|
-
Attributes:
|
25
|
-
saved_fields (dict[str, DocumentFieldDefinition]): A dictionary of saved fields.
|
26
|
-
models (dict[str, str]): A dictionary of models.
|
27
|
-
identifier (str): The identifier field.
|
28
|
-
field_for_llm (str, optional): The field for LLM. Default is None.
|
29
|
-
"""
|
30
|
-
def __init__(self, saved_fields: dict[str, DocumentFieldDefinition], models: dict[str, str],
|
31
|
-
identifier: str, field_for_llm: str = None):
|
32
|
-
self.saved_fields = saved_fields
|
33
|
-
self.models = models
|
34
|
-
self.identifier = identifier
|
35
|
-
self.field_for_llm = field_for_llm
|
36
|
-
|
37
|
-
|
38
|
-
def initialize_definitions():
|
39
|
-
"""
|
40
|
-
Initializes the document definitions by reading the configuration file.
|
41
|
-
|
42
|
-
Raises:
|
43
|
-
ValueError: If the identifier field is not one of the saved fields or if any model field is not one of the saved fields.
|
44
|
-
|
45
|
-
Returns:
|
46
|
-
DocumentDefinitions: The initialized document definitions.
|
47
|
-
"""
|
48
|
-
with open(DEFINITIONS_FILE, 'r', encoding='utf-8') as f:
|
49
|
-
definitions = json.load(f)
|
50
|
-
|
51
|
-
saved_fields = definitions['saved_fields']
|
52
|
-
models = definitions['models']
|
53
|
-
identifier_field = definitions['identifier_field']
|
54
|
-
field_for_llm = definitions.get('field_for_llm', None)
|
55
|
-
if identifier_field not in saved_fields.keys():
|
56
|
-
raise ValueError("identifier_field must be one of the saved fields, check the configuration file")
|
57
|
-
|
58
|
-
for embedded_field in models.values():
|
59
|
-
if embedded_field not in saved_fields.keys():
|
60
|
-
raise ValueError(f"{embedded_field} must be one of the saved fields {saved_fields.keys()}, check the configuration file")
|
61
|
-
|
62
|
-
return DocumentDefinitions(saved_fields, models, identifier_field, field_for_llm)
|
63
|
-
|
64
|
-
|
65
|
-
definitions_singleton = None
|
66
|
-
|
67
|
-
|
68
|
-
def factory():
|
69
|
-
"""
|
70
|
-
Factory method to get the singleton instance of DocumentDefinitions.
|
71
|
-
|
72
|
-
Returns:
|
73
|
-
DocumentDefinitions: The singleton instance of document definitions.
|
74
|
-
"""
|
75
|
-
global definitions_singleton
|
76
|
-
if definitions_singleton is None:
|
77
|
-
definitions_singleton = initialize_definitions()
|
78
|
-
return definitions_singleton
|
79
|
-
|
80
|
-
|
81
|
-
class Document:
|
82
|
-
"""
|
83
|
-
Represents a document.
|
84
|
-
|
85
|
-
Attributes:
|
86
|
-
page_id (str): The ID of the page.
|
87
|
-
fields (dict): The fields of the document.
|
88
|
-
|
89
|
-
Raises:
|
90
|
-
ValueError: If the fields do not match the required fields or if a required field is missing.
|
91
|
-
"""
|
92
|
-
def __init__(self, page_id: str, fields: dict):
|
93
|
-
definitions = factory()
|
94
|
-
self.page_id = page_id
|
95
|
-
if fields.keys() != definitions.saved_fields.keys():
|
96
|
-
raise ValueError("fields do not match the required fields")
|
97
|
-
for defined_field in definitions.saved_fields.values():
|
98
|
-
if defined_field.required and defined_field.field_name not in fields:
|
99
|
-
raise ValueError(f"field {defined_field.field_name} is required")
|
100
|
-
if defined_field.field_name in fields:
|
101
|
-
setattr(self, defined_field.field_name, fields[defined_field.field_name])
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
|
4
|
+
DEFINITIONS_FILE = os.getenv("DOCUMENT_DEFINITION_CONFIG", "example-conf.json")
|
5
|
+
|
6
|
+
|
7
|
+
class DocumentFieldDefinition:
|
8
|
+
"""
|
9
|
+
Represents the definition of a document field.
|
10
|
+
|
11
|
+
Attributes:
|
12
|
+
field_name (str): The name of the field.
|
13
|
+
required (bool): Indicates if the field is required. Default is False.
|
14
|
+
"""
|
15
|
+
def __init__(self, field_name: str, required: bool = False):
|
16
|
+
self.field_name = field_name
|
17
|
+
self.required = required
|
18
|
+
|
19
|
+
|
20
|
+
class DocumentDefinitions:
|
21
|
+
"""
|
22
|
+
Represents the definitions for a document.
|
23
|
+
|
24
|
+
Attributes:
|
25
|
+
saved_fields (dict[str, DocumentFieldDefinition]): A dictionary of saved fields.
|
26
|
+
models (dict[str, str]): A dictionary of models.
|
27
|
+
identifier (str): The identifier field.
|
28
|
+
field_for_llm (str, optional): The field for LLM. Default is None.
|
29
|
+
"""
|
30
|
+
def __init__(self, saved_fields: dict[str, DocumentFieldDefinition], models: dict[str, str],
|
31
|
+
identifier: str, field_for_llm: str = None):
|
32
|
+
self.saved_fields = saved_fields
|
33
|
+
self.models = models
|
34
|
+
self.identifier = identifier
|
35
|
+
self.field_for_llm = field_for_llm
|
36
|
+
|
37
|
+
|
38
|
+
def initialize_definitions():
|
39
|
+
"""
|
40
|
+
Initializes the document definitions by reading the configuration file.
|
41
|
+
|
42
|
+
Raises:
|
43
|
+
ValueError: If the identifier field is not one of the saved fields or if any model field is not one of the saved fields.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
DocumentDefinitions: The initialized document definitions.
|
47
|
+
"""
|
48
|
+
with open(DEFINITIONS_FILE, 'r', encoding='utf-8') as f:
|
49
|
+
definitions = json.load(f)
|
50
|
+
|
51
|
+
saved_fields = definitions['saved_fields']
|
52
|
+
models = definitions['models']
|
53
|
+
identifier_field = definitions['identifier_field']
|
54
|
+
field_for_llm = definitions.get('field_for_llm', None)
|
55
|
+
if identifier_field not in saved_fields.keys():
|
56
|
+
raise ValueError("identifier_field must be one of the saved fields, check the configuration file")
|
57
|
+
|
58
|
+
for embedded_field in models.values():
|
59
|
+
if embedded_field not in saved_fields.keys():
|
60
|
+
raise ValueError(f"{embedded_field} must be one of the saved fields {saved_fields.keys()}, check the configuration file")
|
61
|
+
|
62
|
+
return DocumentDefinitions(saved_fields, models, identifier_field, field_for_llm)
|
63
|
+
|
64
|
+
|
65
|
+
definitions_singleton = None
|
66
|
+
|
67
|
+
|
68
|
+
def factory():
|
69
|
+
"""
|
70
|
+
Factory method to get the singleton instance of DocumentDefinitions.
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
DocumentDefinitions: The singleton instance of document definitions.
|
74
|
+
"""
|
75
|
+
global definitions_singleton
|
76
|
+
if definitions_singleton is None:
|
77
|
+
definitions_singleton = initialize_definitions()
|
78
|
+
return definitions_singleton
|
79
|
+
|
80
|
+
|
81
|
+
class Document:
|
82
|
+
"""
|
83
|
+
Represents a document.
|
84
|
+
|
85
|
+
Attributes:
|
86
|
+
page_id (str): The ID of the page.
|
87
|
+
fields (dict): The fields of the document.
|
88
|
+
|
89
|
+
Raises:
|
90
|
+
ValueError: If the fields do not match the required fields or if a required field is missing.
|
91
|
+
"""
|
92
|
+
def __init__(self, page_id: str, fields: dict):
|
93
|
+
definitions = factory()
|
94
|
+
self.page_id = page_id
|
95
|
+
if fields.keys() != definitions.saved_fields.keys():
|
96
|
+
raise ValueError("fields do not match the required fields")
|
97
|
+
for defined_field in definitions.saved_fields.values():
|
98
|
+
if defined_field.required and defined_field.field_name not in fields:
|
99
|
+
raise ValueError(f"field {defined_field.field_name} is required")
|
100
|
+
if defined_field.field_name in fields:
|
101
|
+
setattr(self, defined_field.field_name, fields[defined_field.field_name])
|
@@ -1,4 +1,4 @@
|
|
1
|
-
class IntegrateService:
|
2
|
-
|
3
|
-
def on_update_docs(self, _docs):
|
4
|
-
raise NotImplementedError
|
1
|
+
class IntegrateService:
|
2
|
+
|
3
|
+
def on_update_docs(self, _docs):
|
4
|
+
raise NotImplementedError
|
kolzchut_ragbot/__init__.py
CHANGED
kolzchut_ragbot/config.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
import os
|
2
|
-
|
3
|
-
EMBEDDING_INDEX = os.getenv("ES_EMBEDDING_INDEX", "embeddings")
|
4
|
-
MODELS_LOCATION = os.getenv("MODELS_LOCATION", "models")
|
5
|
-
|
1
|
+
import os
|
2
|
+
|
3
|
+
EMBEDDING_INDEX = os.getenv("ES_EMBEDDING_INDEX", "embeddings")
|
4
|
+
MODELS_LOCATION = os.getenv("MODELS_LOCATION", "models")
|
5
|
+
|