kolzchut-ragbot 1.4.2__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,101 +1,101 @@
1
- import json
2
- import os
3
-
4
- DEFINITIONS_FILE = os.getenv("DOCUMENT_DEFINITION_CONFIG", "example-conf.json")
5
-
6
-
7
- class DocumentFieldDefinition:
8
- """
9
- Represents the definition of a document field.
10
-
11
- Attributes:
12
- field_name (str): The name of the field.
13
- required (bool): Indicates if the field is required. Default is False.
14
- """
15
- def __init__(self, field_name: str, required: bool = False):
16
- self.field_name = field_name
17
- self.required = required
18
-
19
-
20
- class DocumentDefinitions:
21
- """
22
- Represents the definitions for a document.
23
-
24
- Attributes:
25
- saved_fields (dict[str, DocumentFieldDefinition]): A dictionary of saved fields.
26
- models (dict[str, str]): A dictionary of models.
27
- identifier (str): The identifier field.
28
- field_for_llm (str, optional): The field for LLM. Default is None.
29
- """
30
- def __init__(self, saved_fields: dict[str, DocumentFieldDefinition], models: dict[str, str],
31
- identifier: str, field_for_llm: str = None):
32
- self.saved_fields = saved_fields
33
- self.models = models
34
- self.identifier = identifier
35
- self.field_for_llm = field_for_llm
36
-
37
-
38
- def initialize_definitions():
39
- """
40
- Initializes the document definitions by reading the configuration file.
41
-
42
- Raises:
43
- ValueError: If the identifier field is not one of the saved fields or if any model field is not one of the saved fields.
44
-
45
- Returns:
46
- DocumentDefinitions: The initialized document definitions.
47
- """
48
- with open(DEFINITIONS_FILE, 'r', encoding='utf-8') as f:
49
- definitions = json.load(f)
50
-
51
- saved_fields = definitions['saved_fields']
52
- models = definitions['models']
53
- identifier_field = definitions['identifier_field']
54
- field_for_llm = definitions.get('field_for_llm', None)
55
- if identifier_field not in saved_fields.keys():
56
- raise ValueError("identifier_field must be one of the saved fields, check the configuration file")
57
-
58
- for embedded_field in models.values():
59
- if embedded_field not in saved_fields.keys():
60
- raise ValueError(f"{embedded_field} must be one of the saved fields {saved_fields.keys()}, check the configuration file")
61
-
62
- return DocumentDefinitions(saved_fields, models, identifier_field, field_for_llm)
63
-
64
-
65
- definitions_singleton = None
66
-
67
-
68
- def factory():
69
- """
70
- Factory method to get the singleton instance of DocumentDefinitions.
71
-
72
- Returns:
73
- DocumentDefinitions: The singleton instance of document definitions.
74
- """
75
- global definitions_singleton
76
- if definitions_singleton is None:
77
- definitions_singleton = initialize_definitions()
78
- return definitions_singleton
79
-
80
-
81
- class Document:
82
- """
83
- Represents a document.
84
-
85
- Attributes:
86
- page_id (str): The ID of the page.
87
- fields (dict): The fields of the document.
88
-
89
- Raises:
90
- ValueError: If the fields do not match the required fields or if a required field is missing.
91
- """
92
- def __init__(self, page_id: str, fields: dict):
93
- definitions = factory()
94
- self.page_id = page_id
95
- if fields.keys() != definitions.saved_fields.keys():
96
- raise ValueError("fields do not match the required fields")
97
- for defined_field in definitions.saved_fields.values():
98
- if defined_field.required and defined_field.field_name not in fields:
99
- raise ValueError(f"field {defined_field.field_name} is required")
100
- if defined_field.field_name in fields:
101
- setattr(self, defined_field.field_name, fields[defined_field.field_name])
1
+ import json
2
+ import os
3
+
4
+ DEFINITIONS_FILE = os.getenv("DOCUMENT_DEFINITION_CONFIG", "example-conf.json")
5
+
6
+
7
+ class DocumentFieldDefinition:
8
+ """
9
+ Represents the definition of a document field.
10
+
11
+ Attributes:
12
+ field_name (str): The name of the field.
13
+ required (bool): Indicates if the field is required. Default is False.
14
+ """
15
+ def __init__(self, field_name: str, required: bool = False):
16
+ self.field_name = field_name
17
+ self.required = required
18
+
19
+
20
+ class DocumentDefinitions:
21
+ """
22
+ Represents the definitions for a document.
23
+
24
+ Attributes:
25
+ saved_fields (dict[str, DocumentFieldDefinition]): A dictionary of saved fields.
26
+ models (dict[str, str]): A dictionary of models.
27
+ identifier (str): The identifier field.
28
+ field_for_llm (str, optional): The field for LLM. Default is None.
29
+ """
30
+ def __init__(self, saved_fields: dict[str, DocumentFieldDefinition], models: dict[str, str],
31
+ identifier: str, field_for_llm: str = None):
32
+ self.saved_fields = saved_fields
33
+ self.models = models
34
+ self.identifier = identifier
35
+ self.field_for_llm = field_for_llm
36
+
37
+
38
+ def initialize_definitions():
39
+ """
40
+ Initializes the document definitions by reading the configuration file.
41
+
42
+ Raises:
43
+ ValueError: If the identifier field is not one of the saved fields or if any model field is not one of the saved fields.
44
+
45
+ Returns:
46
+ DocumentDefinitions: The initialized document definitions.
47
+ """
48
+ with open(DEFINITIONS_FILE, 'r', encoding='utf-8') as f:
49
+ definitions = json.load(f)
50
+
51
+ saved_fields = definitions['saved_fields']
52
+ models = definitions['models']
53
+ identifier_field = definitions['identifier_field']
54
+ field_for_llm = definitions.get('field_for_llm', None)
55
+ if identifier_field not in saved_fields.keys():
56
+ raise ValueError("identifier_field must be one of the saved fields, check the configuration file")
57
+
58
+ for embedded_field in models.values():
59
+ if embedded_field not in saved_fields.keys():
60
+ raise ValueError(f"{embedded_field} must be one of the saved fields {saved_fields.keys()}, check the configuration file")
61
+
62
+ return DocumentDefinitions(saved_fields, models, identifier_field, field_for_llm)
63
+
64
+
65
+ definitions_singleton = None
66
+
67
+
68
+ def factory():
69
+ """
70
+ Factory method to get the singleton instance of DocumentDefinitions.
71
+
72
+ Returns:
73
+ DocumentDefinitions: The singleton instance of document definitions.
74
+ """
75
+ global definitions_singleton
76
+ if definitions_singleton is None:
77
+ definitions_singleton = initialize_definitions()
78
+ return definitions_singleton
79
+
80
+
81
+ class Document:
82
+ """
83
+ Represents a document.
84
+
85
+ Attributes:
86
+ page_id (str): The ID of the page.
87
+ fields (dict): The fields of the document.
88
+
89
+ Raises:
90
+ ValueError: If the fields do not match the required fields or if a required field is missing.
91
+ """
92
+ def __init__(self, page_id: str, fields: dict):
93
+ definitions = factory()
94
+ self.page_id = page_id
95
+ if fields.keys() != definitions.saved_fields.keys():
96
+ raise ValueError("fields do not match the required fields")
97
+ for defined_field in definitions.saved_fields.values():
98
+ if defined_field.required and defined_field.field_name not in fields:
99
+ raise ValueError(f"field {defined_field.field_name} is required")
100
+ if defined_field.field_name in fields:
101
+ setattr(self, defined_field.field_name, fields[defined_field.field_name])
@@ -1,4 +1,4 @@
1
- class IntegrateService:
2
-
3
- def on_update_docs(self, _docs):
4
- raise NotImplementedError
1
+ class IntegrateService:
2
+
3
+ def on_update_docs(self, _docs):
4
+ raise NotImplementedError
@@ -1,2 +0,0 @@
1
- __version__ = "1.0.53"
2
-
kolzchut_ragbot/config.py CHANGED
@@ -1,5 +1,5 @@
1
- import os
2
-
3
- EMBEDDING_INDEX = os.getenv("ES_EMBEDDING_INDEX", "embeddings")
4
- MODELS_LOCATION = os.getenv("MODELS_LOCATION", "models")
5
-
1
+ import os
2
+
3
+ EMBEDDING_INDEX = os.getenv("ES_EMBEDDING_INDEX", "embeddings")
4
+ MODELS_LOCATION = os.getenv("MODELS_LOCATION", "models")
5
+
kolzchut_ragbot/engine.py CHANGED
@@ -1,246 +1,254 @@
1
- import time
2
- from collections import defaultdict
3
- from datetime import datetime
4
- from .llm_client import LLMClient
5
- from . import config
6
- from .model import es_client_factory
7
- from .Document import factory
8
- from sentence_transformers import SentenceTransformer
9
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
-
11
- import torch
12
- import os
13
-
14
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
- definitions = factory()
16
-
17
-
18
- class Engine:
19
- """
20
- Engine class for handling document search and retrieval using Elasticsearch and LLMs.
21
-
22
- Attributes:
23
- llms_client (LLMClient): The LLM client instance.
24
- elastic_model (Model): The Elasticsearch model instance.
25
- models (dict): A dictionary of SentenceTransformer models.
26
- reranker_tokenizer (AutoTokenizer): The tokenizer for the reranker model.
27
- reranker_model (AutoModelForSequenceClassification): The reranker model.
28
- identifier_field (str): The identifier field for documents.
29
-
30
- Methods:
31
- rerank_with_me5(query, documents, k=5):
32
- Reranks documents based on the query using the reranker model.
33
-
34
- update_docs(list_of_docs, embed_only_fields=None, delete_existing=False):
35
- Updates or creates documents in the Elasticsearch index.
36
-
37
- reciprocal_rank_fusion(ranking_lists, k=60, weights=None):
38
- Performs Reciprocal Rank Fusion on a list of ranking lists.
39
-
40
- search_documents(query, top_k):
41
- Searches for documents based on the query and returns the top_k results.
42
-
43
- answer_query(query, top_k, model):
44
- Answers a query using the top_k documents and the specified model.
45
- """
46
-
47
- def __init__(self, llms_client: LLMClient, elastic_model=None, models=None, reranker_tokenizer=None,
48
- reranker_model=None, es_client=None):
49
- """
50
- Initializes the Engine instance.
51
-
52
- Args:
53
- llms_client (LLMClient): The LLM client instance.
54
- elastic_model (Model, optional): The Elasticsearch model instance. Default is None.
55
- models (dict, optional): A dictionary of SentenceTransformer models. Default is None.
56
- reranker_tokenizer (AutoTokenizer, optional): The tokenizer for the reranker model. Default is None.
57
- reranker_model (AutoModelForSequenceClassification, optional): The reranker model. Default is None.
58
- es_client (optional): The Elasticsearch client instance. Default is None.
59
- """
60
- if elastic_model is None:
61
- self.elastic_model = es_client_factory(es_client)
62
- else:
63
- self.elastic_model = elastic_model
64
-
65
- self.llms_client = llms_client
66
-
67
- self.identifier_field = factory().identifier
68
-
69
- if models is None:
70
- self.models = {f"{model_name}": SentenceTransformer(config.MODELS_LOCATION + "/" + model_name).to(device)
71
- for model_name in definitions.models.keys()}
72
- else:
73
- self.models = models
74
- for model in self.models.values():
75
- model.eval()
76
-
77
- if reranker_tokenizer is None:
78
- self.reranker_tokenizer = AutoTokenizer.from_pretrained(os.getenv("TOKENIZER_LOCATION"))
79
- else:
80
- self.reranker_tokenizer = reranker_tokenizer
81
-
82
- if reranker_model is None:
83
- self.reranker_model = AutoModelForSequenceClassification.from_pretrained(os.getenv("TOKENIZER_LOCATION"))
84
- else:
85
- self.reranker_model = reranker_model
86
- self.reranker_model.eval()
87
-
88
-
89
- def rerank_with_me5(self, query, documents, k=5):
90
- """
91
- Reranks documents based on the query using the reranker model.
92
-
93
- Args:
94
- query (str): The query string.
95
- documents (list): A list of documents to be reranked.
96
- k (int, optional): The number of top documents to return. Default is 5.
97
-
98
- Returns:
99
- list: A list of top k reranked documents.
100
- """
101
- pairs = [(query, doc) for doc in set(documents)]
102
- inputs = self.reranker_tokenizer(pairs, return_tensors='pt', padding=True, truncation=True, max_length=512)
103
-
104
- # Make predictions
105
- with torch.no_grad():
106
- outputs = self.reranker_model(**inputs)
107
-
108
- scores = outputs.logits.squeeze()
109
-
110
- if scores.ndim > 1:
111
- scores = scores[:, 1] # Assuming binary classification and index 1 is the relevance score
112
-
113
- sorted_indices = torch.argsort(scores, descending=True)
114
- # If there is only one document, return it to avoid torch error
115
- if len(sorted_indices) == 1:
116
- return [pairs[0][1]]
117
- # Sort documents by their highest score
118
- sorted_docs = [pairs[i][1] for i in sorted_indices]
119
- return sorted_docs[:k]
120
-
121
- def update_docs(self, list_of_docs: list[dict], embed_only_fields=None, delete_existing=False):
122
- """
123
- Updates or creates documents in the Elasticsearch index.
124
-
125
- Args:
126
- list_of_docs (list[dict]): A list of dictionaries representing the documents to be indexed.
127
- embed_only_fields (list, optional): A list of fields to be embedded. Default is None.
128
- delete_existing (bool, optional): Whether to delete existing documents. Default is False.
129
- """
130
- embed_only_fields = embed_only_fields or definitions.models.values()
131
- for doc in list_of_docs:
132
- for semantic_model, field in definitions.models.items():
133
- if field in doc.keys() and field in embed_only_fields:
134
- content_vectors = self.models[semantic_model].encode(doc[field])
135
- doc[f'{field}_{semantic_model}_vectors'] = content_vectors
136
-
137
- doc['last_update'] = datetime.now()
138
- self.elastic_model.create_or_update_documents(list_of_docs, delete_existing)
139
-
140
- def reciprocal_rank_fusion(self, ranking_lists, k=60, weights=None):
141
- """
142
- Performs Reciprocal Rank Fusion on a list of ranking lists.
143
-
144
- Args:
145
- :param ranking_lists: List of ranking lists, where each ranking list is a list of documents returned by a model.
146
- :param k: The parameter for the reciprocal rank calculation (default is 60).
147
- :param: weights: Optional. Weights for each ranking list.
148
-
149
- Returns:
150
- list: A fused ranking list of documents.
151
- """
152
- scores = defaultdict(float)
153
-
154
- for list_index, rank_list in enumerate(ranking_lists):
155
- for rank, identifier in enumerate(rank_list):
156
- # Reciprocal rank score
157
- w = weights[list_index] if weights else 1
158
- scores[identifier] += w / (k + rank + 1)
159
-
160
- # Sort the documents by their cumulative scores in descending order
161
- fused_list = sorted(scores, key=scores.get, reverse=True)
162
-
163
- return fused_list
164
-
165
- def search_documents(self, query: str, top_k: int):
166
- """
167
- Searches for documents based on the query and returns the top_k results.
168
-
169
- Args:
170
- query (str): The query string.
171
- top_k (int): The number of top documents to return.
172
-
173
- Returns:
174
- list: A list of top k documents.
175
- """
176
- query_embeddings = {f"{semantic_model}": self.models[semantic_model].encode(query) for semantic_model in
177
- definitions.models.keys()}
178
- all_docs_by_model = self.elastic_model.search(query_embeddings)
179
- all_docs = []
180
- ids_for_fusion = []
181
- all_docs_and_scores = {}
182
-
183
- for key, values in all_docs_by_model.items():
184
- print(f"\nFound {len(values)} documents for model\n")
185
- model_ids = []
186
- scores_for_model = []
187
-
188
- for doc in values:
189
- model_ids.append(doc["_source"]["page_id"])
190
- all_docs.append(doc)
191
- scores_for_model.append({"doc": doc["_source"]["title"], "score": doc["_score"]})
192
- ids_for_fusion.append(model_ids)
193
- all_docs_and_scores[f'{key}'] = scores_for_model
194
- print(f"\nFusing {len(ids_for_fusion)} results\n")
195
- fused_ids = self.reciprocal_rank_fusion(ids_for_fusion, k=top_k)
196
- top_k_documents = []
197
- top_titles = []
198
-
199
- for fused_id in fused_ids:
200
- for doc in all_docs:
201
- if doc["_source"]["page_id"] == fused_id and doc["_source"]["title"] not in top_titles:
202
- top_k_documents.append(doc["_source"])
203
- top_titles.append(doc["_source"]["title"])
204
- break
205
- if len(top_titles) >= top_k:
206
- break
207
-
208
- return top_k_documents, all_docs_and_scores
209
-
210
- def answer_query(self, query, top_k: int, model):
211
- """
212
- Answers a query using the top_k documents and the specified model.
213
-
214
- Args:
215
- query (str): The query string.
216
- top_k (int): The number of top documents to use for answering the query.
217
- model: The model to use for answering the query.
218
-
219
- Returns:
220
- tuple: A tuple containing the top k documents, the answer, and the stats.
221
- """
222
- before_retrieval = time.perf_counter()
223
- top_k_documents, all_docs_and_scores = self.search_documents(query, top_k)
224
-
225
-
226
- retrieval_time = round(time.perf_counter() - before_retrieval, 4)
227
- print(f"retrieval time: {retrieval_time}")
228
-
229
- gpt_answer, gpt_elapsed, tokens = self.llms_client.answer(query, top_k_documents)
230
- stats = {
231
- "retrieval_time": retrieval_time,
232
- "gpt_model": model,
233
- "gpt_time": gpt_elapsed,
234
- "tokens": tokens
235
- }
236
- return top_k_documents, gpt_answer, stats, all_docs_and_scores
237
-
238
-
239
- engine = None
240
-
241
-
242
- def engine_factory(llms_client: LLMClient, es_client=None):
243
- global engine
244
- if engine is None:
245
- engine = Engine(llms_client=llms_client, es_client=es_client)
246
- return engine
1
+ import time
2
+ from collections import defaultdict
3
+ from datetime import datetime
4
+ from .llm_client import LLMClient
5
+ from . import config
6
+ from .model import es_client_factory
7
+ from .Document import factory
8
+ from sentence_transformers import SentenceTransformer
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
+
11
+ import torch
12
+ import os
13
+
14
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
+ definitions = factory()
16
+
17
+
18
+ class Engine:
19
+ """
20
+ Engine class for handling document search and retrieval using Elasticsearch and LLMs.
21
+
22
+ Attributes:
23
+ llms_client (LLMClient): The LLM client instance.
24
+ elastic_model (Model): The Elasticsearch model instance.
25
+ models (dict): A dictionary of SentenceTransformer models.
26
+ reranker_tokenizer (AutoTokenizer): The tokenizer for the reranker model.
27
+ reranker_model (AutoModelForSequenceClassification): The reranker model.
28
+ identifier_field (str): The identifier field for documents.
29
+
30
+ Methods:
31
+ rerank_with_me5(query, documents, k=5):
32
+ Reranks documents based on the query using the reranker model.
33
+
34
+ update_docs(list_of_docs, embed_only_fields=None, delete_existing=False):
35
+ Updates or creates documents in the Elasticsearch index.
36
+
37
+ reciprocal_rank_fusion(ranking_lists, k=60, weights=None):
38
+ Performs Reciprocal Rank Fusion on a list of ranking lists.
39
+
40
+ search_documents(query, top_k):
41
+ Searches for documents based on the query and returns the top_k results.
42
+
43
+ answer_query(query, top_k, model):
44
+ Answers a query using the top_k documents and the specified model.
45
+ """
46
+
47
+ def __init__(self, llms_client: LLMClient, elastic_model=None, models=None, reranker_tokenizer=None,
48
+ reranker_model=None, es_client=None):
49
+ """
50
+ Initializes the Engine instance.
51
+
52
+ Args:
53
+ llms_client (LLMClient): The LLM client instance.
54
+ elastic_model (Model, optional): The Elasticsearch model instance. Default is None.
55
+ models (dict, optional): A dictionary of SentenceTransformer models. Default is None.
56
+ reranker_tokenizer (AutoTokenizer, optional): The tokenizer for the reranker model. Default is None.
57
+ reranker_model (AutoModelForSequenceClassification, optional): The reranker model. Default is None.
58
+ es_client (optional): The Elasticsearch client instance. Default is None.
59
+ """
60
+ if elastic_model is None:
61
+ self.elastic_model = es_client_factory(es_client)
62
+ else:
63
+ self.elastic_model = elastic_model
64
+
65
+ self.llms_client = llms_client
66
+
67
+ self.identifier_field = factory().identifier
68
+
69
+ if models is None:
70
+ self.models = {f"{model_name}": SentenceTransformer(config.MODELS_LOCATION + "/" + model_name).to(device)
71
+ for model_name in definitions.models.keys()}
72
+ else:
73
+ self.models = models
74
+ for model in self.models.values():
75
+ model.eval()
76
+
77
+ if reranker_tokenizer is None:
78
+ self.reranker_tokenizer = AutoTokenizer.from_pretrained(os.getenv("TOKENIZER_LOCATION"))
79
+ else:
80
+ self.reranker_tokenizer = reranker_tokenizer
81
+
82
+ if reranker_model is None:
83
+ self.reranker_model = AutoModelForSequenceClassification.from_pretrained(os.getenv("TOKENIZER_LOCATION"))
84
+ else:
85
+ self.reranker_model = reranker_model
86
+ self.reranker_model.eval()
87
+
88
+ def change_llm(self, llms_client: LLMClient):
89
+ """
90
+ Changes the LLM client for the Engine instance.
91
+
92
+ Args:
93
+ llms_client (LLMClient): The new LLM client instance.
94
+ """
95
+ self.llms_client = llms_client
96
+
97
+ def rerank_with_me5(self, query, documents, k=5):
98
+ """
99
+ Reranks documents based on the query using the reranker model.
100
+
101
+ Args:
102
+ query (str): The query string.
103
+ documents (list): A list of documents to be reranked.
104
+ k (int, optional): The number of top documents to return. Default is 5.
105
+
106
+ Returns:
107
+ list: A list of top k reranked documents.
108
+ """
109
+ pairs = [(query, doc) for doc in set(documents)]
110
+ inputs = self.reranker_tokenizer(pairs, return_tensors='pt', padding=True, truncation=True, max_length=512)
111
+
112
+ # Make predictions
113
+ with torch.no_grad():
114
+ outputs = self.reranker_model(**inputs)
115
+
116
+ scores = outputs.logits.squeeze()
117
+
118
+ if scores.ndim > 1:
119
+ scores = scores[:, 1] # Assuming binary classification and index 1 is the relevance score
120
+
121
+ sorted_indices = torch.argsort(scores, descending=True)
122
+ # If there is only one document, return it to avoid torch error
123
+ if len(sorted_indices) == 1:
124
+ return [pairs[0][1]]
125
+ # Sort documents by their highest score
126
+ sorted_docs = [pairs[i][1] for i in sorted_indices]
127
+ return sorted_docs[:k]
128
+
129
+ def update_docs(self, list_of_docs: list[dict], embed_only_fields=None, delete_existing=False):
130
+ """
131
+ Updates or creates documents in the Elasticsearch index.
132
+
133
+ Args:
134
+ list_of_docs (list[dict]): A list of dictionaries representing the documents to be indexed.
135
+ embed_only_fields (list, optional): A list of fields to be embedded. Default is None.
136
+ delete_existing (bool, optional): Whether to delete existing documents. Default is False.
137
+ """
138
+ embed_only_fields = embed_only_fields or definitions.models.values()
139
+ for doc in list_of_docs:
140
+ for semantic_model, field in definitions.models.items():
141
+ if field in doc.keys() and field in embed_only_fields:
142
+ content_vectors = self.models[semantic_model].encode(doc[field])
143
+ doc[f'{field}_{semantic_model}_vectors'] = content_vectors
144
+
145
+ doc['last_update'] = datetime.now()
146
+ self.elastic_model.create_or_update_documents(list_of_docs, delete_existing)
147
+
148
+ def reciprocal_rank_fusion(self, ranking_lists, k=60, weights=None):
149
+ """
150
+ Performs Reciprocal Rank Fusion on a list of ranking lists.
151
+
152
+ Args:
153
+ :param ranking_lists: List of ranking lists, where each ranking list is a list of documents returned by a model.
154
+ :param k: The parameter for the reciprocal rank calculation (default is 60).
155
+ :param: weights: Optional. Weights for each ranking list.
156
+
157
+ Returns:
158
+ list: A fused ranking list of documents.
159
+ """
160
+ scores = defaultdict(float)
161
+
162
+ for list_index, rank_list in enumerate(ranking_lists):
163
+ for rank, identifier in enumerate(rank_list):
164
+ # Reciprocal rank score
165
+ w = weights[list_index] if weights else 1
166
+ scores[identifier] += w / (k + rank + 1)
167
+
168
+ # Sort the documents by their cumulative scores in descending order
169
+ fused_list = sorted(scores, key=scores.get, reverse=True)
170
+
171
+ return fused_list
172
+
173
+ def search_documents(self, query: str, top_k: int):
174
+ """
175
+ Searches for documents based on the query and returns the top_k results.
176
+
177
+ Args:
178
+ query (str): The query string.
179
+ top_k (int): The number of top documents to return.
180
+
181
+ Returns:
182
+ list: A list of top k documents.
183
+ """
184
+ query_embeddings = {f"{semantic_model}": self.models[semantic_model].encode(query) for semantic_model in
185
+ definitions.models.keys()}
186
+ all_docs_by_model = self.elastic_model.search(query_embeddings)
187
+ all_docs = []
188
+ ids_for_fusion = []
189
+ all_docs_and_scores = {}
190
+
191
+ for key, values in all_docs_by_model.items():
192
+ print(f"\nFound {len(values)} documents for model\n")
193
+ model_ids = []
194
+ scores_for_model = []
195
+
196
+ for doc in values:
197
+ model_ids.append(doc["_source"]["page_id"])
198
+ all_docs.append(doc)
199
+ scores_for_model.append({"doc": doc["_source"]["title"], "score": doc["_score"]})
200
+ ids_for_fusion.append(model_ids)
201
+ all_docs_and_scores[f'{key}'] = scores_for_model
202
+ print(f"\nFusing {len(ids_for_fusion)} results\n")
203
+ fused_ids = self.reciprocal_rank_fusion(ids_for_fusion, k=top_k)
204
+ top_k_documents = []
205
+ top_titles = []
206
+
207
+ for fused_id in fused_ids:
208
+ for doc in all_docs:
209
+ if doc["_source"]["page_id"] == fused_id and doc["_source"]["title"] not in top_titles:
210
+ top_k_documents.append(doc["_source"])
211
+ top_titles.append(doc["_source"]["title"])
212
+ break
213
+ if len(top_titles) >= top_k:
214
+ break
215
+
216
+ return top_k_documents, all_docs_and_scores
217
+
218
+ def answer_query(self, query, top_k: int, model):
219
+ """
220
+ Answers a query using the top_k documents and the specified model.
221
+
222
+ Args:
223
+ query (str): The query string.
224
+ top_k (int): The number of top documents to use for answering the query.
225
+ model: The model to use for answering the query.
226
+
227
+ Returns:
228
+ tuple: A tuple containing the top k documents, the answer, and the stats.
229
+ """
230
+ before_retrieval = time.perf_counter()
231
+ top_k_documents, all_docs_and_scores = self.search_documents(query, top_k)
232
+
233
+
234
+ retrieval_time = round(time.perf_counter() - before_retrieval, 4)
235
+ print(f"retrieval time: {retrieval_time}")
236
+
237
+ gpt_answer, gpt_elapsed, tokens = self.llms_client.answer(query, top_k_documents)
238
+ stats = {
239
+ "retrieval_time": retrieval_time,
240
+ "gpt_model": model,
241
+ "gpt_time": gpt_elapsed,
242
+ "tokens": tokens
243
+ }
244
+ return top_k_documents, gpt_answer, stats, all_docs_and_scores
245
+
246
+
247
+ engine = None
248
+
249
+
250
+ def engine_factory(llms_client: LLMClient, es_client=None):
251
+ global engine
252
+ if engine is None:
253
+ engine = Engine(llms_client=llms_client, es_client=es_client)
254
+ return engine
@@ -1,11 +1,11 @@
1
- from .Document import factory
2
- from abc import ABC, abstractmethod
3
- definitions = factory()
4
-
5
- class LLMClient(ABC):
6
- @abstractmethod
7
- def __init__(self):
8
- self.field_for_answer = definitions.field_for_llm
9
- @abstractmethod
10
- def answer(self, _question, _top_k_docs) -> tuple[str, float, int]:
11
- raise NotImplementedError
1
+ from .Document import factory
2
+ from abc import ABC, abstractmethod
3
+ definitions = factory()
4
+
5
+ class LLMClient(ABC):
6
+ @abstractmethod
7
+ def __init__(self):
8
+ self.field_for_answer = definitions.field_for_llm
9
+ @abstractmethod
10
+ def answer(self, _question, _top_k_docs) -> tuple[str, float, int]:
11
+ raise NotImplementedError
kolzchut_ragbot/model.py CHANGED
@@ -1,182 +1,182 @@
1
- import datetime
2
- import logging
3
- import os
4
- from .Document import factory as definitions_factory
5
-
6
- definitions_singleton = definitions_factory()
7
- EMBEDDING_INDEX = os.getenv("ES_EMBEDDING_INDEX", "embeddings")
8
- semantic_models = definitions_singleton.models.keys()
9
-
10
-
11
- def index_from_page_id(page_id: int):
12
- """
13
- Generates an index name based on the page ID.
14
-
15
- Args:
16
- page_id (int): The ID of the page.
17
-
18
- Returns:
19
- str: The generated index name.
20
- """
21
- index_postfix = round(page_id / 1000)
22
- return EMBEDDING_INDEX + "_" + str(index_postfix)
23
-
24
-
25
- def create_mapping():
26
- """
27
- Creates a mapping for the model in Elasticsearch.
28
- """
29
- vector_fields = {f'{semantic_model}_{name}_vectors': {"type": "dense_vector", "dims": 1024}
30
- for name, semantic_model in definitions_singleton.models.items()}
31
-
32
- data_fields = {}
33
- for field in definitions_singleton.saved_fields.keys():
34
- field_type = definitions_singleton.saved_fields[field]
35
- field_mapping = {"type": field_type}
36
- if field_type == "date":
37
- field_mapping["format"] = "yyyyMMddHHmmss"
38
- data_fields[f"{field}"] = field_mapping
39
-
40
- mappings = {
41
- "properties": {
42
- "last_update": {
43
- "type": "date",
44
- },
45
- **vector_fields,
46
- **data_fields,
47
- }
48
- }
49
- return mappings
50
-
51
-
52
- class Model:
53
- """
54
- Represents the model for creating, updating, and searching documents in Elasticsearch.
55
-
56
- Attributes:
57
- custom_result_selection_function (callable): A custom function for selecting search results.
58
- es_client: The Elasticsearch client instance.
59
-
60
- Methods:
61
- create_index():
62
- Creates an index for the model in Elasticsearch.
63
-
64
- create_or_update_documents(paragraphs_dicts: list[dict], update=False):
65
- Creates or updates documents in the Elasticsearch index.
66
-
67
- search(embedded_search: dict[str, list[float]], size=50) -> dict[str, list[dict]]:
68
- Searches for similar documents using cosine similarity.
69
- """
70
-
71
- custom_result_selection_function = None
72
-
73
- def __init__(self, es_client, custom_result_selection_function=None):
74
- """
75
- Initializes the Model instance.
76
-
77
- Args:
78
- es_client: The Elasticsearch client instance.
79
- custom_result_selection_function (callable, optional): A custom function for selecting search results.
80
- """
81
- self.es_client = es_client
82
- if custom_result_selection_function is not None:
83
- self.custom_result_selection_function = custom_result_selection_function
84
-
85
- def create_index(self, index_name):
86
- """
87
- Creates an index for the model in Elasticsearch.
88
- """
89
- mapping = create_mapping()
90
- if not self.es_client.indices.exists(index=index_name):
91
- self.es_client.indices.create(
92
- index=index_name,
93
- mappings=mapping
94
- )
95
-
96
- def create_or_update_documents(self, paragraphs_dicts: list[dict], update=False):
97
- """
98
- Creates or updates documents in the Elasticsearch index.
99
-
100
- Args:
101
- paragraphs_dicts (list[dict]): A list of dictionaries representing the paragraphs to be indexed.
102
- update (bool, optional): Whether to update existing documents. Default is False.
103
- """
104
-
105
- identifier = definitions_singleton.identifier
106
- print(f"Creating or updating documents in the index, {len(paragraphs_dicts)} paragraphs\n")
107
- # Identify the doc from the first paragraph - all paragraphs should have the same doc_id
108
- doc_id = paragraphs_dicts[0][identifier]
109
- index = index_from_page_id(int(doc_id))
110
-
111
- if update:
112
- try:
113
- query = {
114
- "query": {
115
- "match": {
116
- f"{identifier}": doc_id
117
- }
118
- }
119
- }
120
- self.es_client.delete_by_query(index=f"{EMBEDDING_INDEX}*", body=query)
121
-
122
- except Exception as e:
123
- logging.error(f"Error while searching for existing document: {e}")
124
- self.create_index(index)
125
- for i, doc_dict in enumerate(paragraphs_dicts):
126
- print(f"saving paragraph {i + 1} / {len(paragraphs_dicts)}")
127
- doc = {
128
- "last_update": datetime.datetime.now(),
129
- **doc_dict
130
- }
131
-
132
- self.es_client.index(index=index, body=doc)
133
-
134
- def search(self, embedded_search: dict[str, list[float]], size=50) -> dict[str, list[dict]]:
135
- """
136
- Searches for similar documents using cosine similarity.
137
-
138
- Args:
139
- embedded_search (dict[str, list[float]]): A dictionary containing the embedded search vectors.
140
- size (int, optional): The number of search results to return. Default is 50.
141
-
142
- Returns:
143
- dict[str, list[dict]]: A dictionary containing the search results.
144
- """
145
- results = {}
146
- for semantic_model, field in definitions_singleton.models.items():
147
- results[field] = [] if field not in results.keys() else results[field]
148
- body = {
149
- "script_score": {
150
- "query": {
151
- "exists": {
152
- "field": f'{field}_{semantic_model}_vectors'
153
- }
154
- },
155
- "script": {
156
- "source": f"cosineSimilarity(params.query_vector, '{field}_{semantic_model}_vectors') + 1.0",
157
- "params": {
158
- "query_vector": embedded_search[semantic_model]
159
- }
160
- }
161
- }
162
- }
163
- print(f"Searching for {field} using {semantic_model} on index {EMBEDDING_INDEX}\n")
164
- field_results = self.es_client.search(
165
- index=EMBEDDING_INDEX + "*",
166
- body={
167
- "size": size,
168
- "query": body
169
- })
170
- results[field] = results[field] + field_results["hits"]["hits"]
171
-
172
- return results
173
-
174
-
175
- model = None
176
-
177
-
178
- def es_client_factory(es_client) -> Model:
179
- global model
180
- if model is None:
181
- model = Model(es_client)
182
- return model
1
+ import datetime
2
+ import logging
3
+ import os
4
+ from .Document import factory as definitions_factory
5
+
6
+ definitions_singleton = definitions_factory()
7
+ EMBEDDING_INDEX = os.getenv("ES_EMBEDDING_INDEX", "embeddings")
8
+ semantic_models = definitions_singleton.models.keys()
9
+
10
+
11
+ def index_from_page_id(page_id: int):
12
+ """
13
+ Generates an index name based on the page ID.
14
+
15
+ Args:
16
+ page_id (int): The ID of the page.
17
+
18
+ Returns:
19
+ str: The generated index name.
20
+ """
21
+ index_postfix = round(page_id / 1000)
22
+ return EMBEDDING_INDEX + "_" + str(index_postfix)
23
+
24
+
25
+ def create_mapping():
26
+ """
27
+ Creates a mapping for the model in Elasticsearch.
28
+ """
29
+ vector_fields = {f'{semantic_model}_{name}_vectors': {"type": "dense_vector", "dims": 1024}
30
+ for name, semantic_model in definitions_singleton.models.items()}
31
+
32
+ data_fields = {}
33
+ for field in definitions_singleton.saved_fields.keys():
34
+ field_type = definitions_singleton.saved_fields[field]
35
+ field_mapping = {"type": field_type}
36
+ if field_type == "date":
37
+ field_mapping["format"] = "yyyyMMddHHmmss"
38
+ data_fields[f"{field}"] = field_mapping
39
+
40
+ mappings = {
41
+ "properties": {
42
+ "last_update": {
43
+ "type": "date",
44
+ },
45
+ **vector_fields,
46
+ **data_fields,
47
+ }
48
+ }
49
+ return mappings
50
+
51
+
52
+ class Model:
53
+ """
54
+ Represents the model for creating, updating, and searching documents in Elasticsearch.
55
+
56
+ Attributes:
57
+ custom_result_selection_function (callable): A custom function for selecting search results.
58
+ es_client: The Elasticsearch client instance.
59
+
60
+ Methods:
61
+ create_index():
62
+ Creates an index for the model in Elasticsearch.
63
+
64
+ create_or_update_documents(paragraphs_dicts: list[dict], update=False):
65
+ Creates or updates documents in the Elasticsearch index.
66
+
67
+ search(embedded_search: dict[str, list[float]], size=50) -> dict[str, list[dict]]:
68
+ Searches for similar documents using cosine similarity.
69
+ """
70
+
71
+ custom_result_selection_function = None
72
+
73
+ def __init__(self, es_client, custom_result_selection_function=None):
74
+ """
75
+ Initializes the Model instance.
76
+
77
+ Args:
78
+ es_client: The Elasticsearch client instance.
79
+ custom_result_selection_function (callable, optional): A custom function for selecting search results.
80
+ """
81
+ self.es_client = es_client
82
+ if custom_result_selection_function is not None:
83
+ self.custom_result_selection_function = custom_result_selection_function
84
+
85
+ def create_index(self, index_name):
86
+ """
87
+ Creates an index for the model in Elasticsearch.
88
+ """
89
+ mapping = create_mapping()
90
+ if not self.es_client.indices.exists(index=index_name):
91
+ self.es_client.indices.create(
92
+ index=index_name,
93
+ mappings=mapping
94
+ )
95
+
96
+ def create_or_update_documents(self, paragraphs_dicts: list[dict], update=False):
97
+ """
98
+ Creates or updates documents in the Elasticsearch index.
99
+
100
+ Args:
101
+ paragraphs_dicts (list[dict]): A list of dictionaries representing the paragraphs to be indexed.
102
+ update (bool, optional): Whether to update existing documents. Default is False.
103
+ """
104
+
105
+ identifier = definitions_singleton.identifier
106
+ print(f"Creating or updating documents in the index, {len(paragraphs_dicts)} paragraphs\n")
107
+ # Identify the doc from the first paragraph - all paragraphs should have the same doc_id
108
+ doc_id = paragraphs_dicts[0][identifier]
109
+ index = index_from_page_id(int(doc_id))
110
+
111
+ if update:
112
+ try:
113
+ query = {
114
+ "query": {
115
+ "match": {
116
+ f"{identifier}": doc_id
117
+ }
118
+ }
119
+ }
120
+ self.es_client.delete_by_query(index=f"{EMBEDDING_INDEX}*", body=query)
121
+
122
+ except Exception as e:
123
+ logging.error(f"Error while searching for existing document: {e}")
124
+ self.create_index(index)
125
+ for i, doc_dict in enumerate(paragraphs_dicts):
126
+ print(f"saving paragraph {i + 1} / {len(paragraphs_dicts)}")
127
+ doc = {
128
+ "last_update": datetime.datetime.now(),
129
+ **doc_dict
130
+ }
131
+
132
+ self.es_client.index(index=index, body=doc)
133
+
134
+ def search(self, embedded_search: dict[str, list[float]], size=50) -> dict[str, list[dict]]:
135
+ """
136
+ Searches for similar documents using cosine similarity.
137
+
138
+ Args:
139
+ embedded_search (dict[str, list[float]]): A dictionary containing the embedded search vectors.
140
+ size (int, optional): The number of search results to return. Default is 50.
141
+
142
+ Returns:
143
+ dict[str, list[dict]]: A dictionary containing the search results.
144
+ """
145
+ results = {}
146
+ for semantic_model, field in definitions_singleton.models.items():
147
+ results[field] = [] if field not in results.keys() else results[field]
148
+ body = {
149
+ "script_score": {
150
+ "query": {
151
+ "exists": {
152
+ "field": f'{field}_{semantic_model}_vectors'
153
+ }
154
+ },
155
+ "script": {
156
+ "source": f"cosineSimilarity(params.query_vector, '{field}_{semantic_model}_vectors') + 1.0",
157
+ "params": {
158
+ "query_vector": embedded_search[semantic_model]
159
+ }
160
+ }
161
+ }
162
+ }
163
+ print(f"Searching for {field} using {semantic_model} on index {EMBEDDING_INDEX}\n")
164
+ field_results = self.es_client.search(
165
+ index=EMBEDDING_INDEX + "*",
166
+ body={
167
+ "size": size,
168
+ "query": body
169
+ })
170
+ results[field] = results[field] + field_results["hits"]["hits"]
171
+
172
+ return results
173
+
174
+
175
+ model = None
176
+
177
+
178
+ def es_client_factory(es_client) -> Model:
179
+ global model
180
+ if model is None:
181
+ model = Model(es_client)
182
+ return model
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: kolzchut-ragbot
3
+ Version: 1.5.0
4
+ Summary: A search engine using machine learning models and Elasticsearch for advanced document retrieval.
5
+ Home-page: https://github.com/shmuelrob/rag-bot
6
+ Author: Shmuel Robinov
7
+ Author-email: shmuel_robinov@webiks.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ Requires-Dist: elasticsearch==8.17.1
14
+ Requires-Dist: sentence-transformers==3.4.1
15
+ Requires-Dist: torch==2.6.0
16
+ Requires-Dist: transformers==4.48.3
17
+ Dynamic: author
18
+ Dynamic: author-email
19
+ Dynamic: classifier
20
+ Dynamic: description
21
+ Dynamic: description-content-type
22
+ Dynamic: home-page
23
+ Dynamic: requires-dist
24
+ Dynamic: requires-python
25
+ Dynamic: summary
26
+
27
+ # kolzchut-ragbot
28
+
29
+ ## Overview
30
+
31
+ This project is a search engine that uses machine learning models and Elasticsearch to provide advanced document retrieval.
32
+ You can use [kolzchut-ragbot](https://github.com/shmuelrob/rag-bot) to demonstrate the engine's document retrieval abilities.
33
+
34
+ ## Features
35
+
36
+ - Document representation and validation
37
+ - Document embedding and indexing in Elasticsearch
38
+ - Advanced search using machine learning model
39
+ - Integration with LLM (Large Language Model) client for query answering
40
+
41
+ ## Installation
42
+
43
+ ### From PyPI
44
+
45
+ ```bash
46
+ pip install kolzchut-ragbot
47
+ ```
48
+
49
+ ### From Source
50
+
51
+ 1. Clone the repository:
52
+
53
+ ```bash
54
+ git clone https://github.com/shmuelrob/rag-bot.git
55
+ cd rag-bot
56
+ ```
57
+
58
+ 2. Create a virtual environment and activate it:
59
+
60
+ ```bash
61
+ python -m venv venv
62
+ source venv/bin/activate # On Windows use: venv\Scripts\activate
63
+ ```
64
+
65
+ 3. Install the required dependencies:
66
+
67
+ ```bash
68
+ pip install -r requirements.txt
69
+ ```
70
+
71
+ ## Configuration
72
+
73
+ Set the following environment variables:
74
+
75
+ - `ES_EMBEDDING_INDEX`: The name of the Elasticsearch index for embeddings.
76
+ - `TOKENIZER_LOCATION`: The location of the tokenizer model.
@@ -0,0 +1,11 @@
1
+ kolzchut_ragbot/Document.py,sha256=5OyBBTZyAJFM_1Pjs3SUC-_s5zEJ5U6wjhw12_FFkdE,3621
2
+ kolzchut_ragbot/IntegrateService.py,sha256=rcwUY2RkclCY3l8BGAmNbstdxhxwhLO9oA8BofqLyts,96
3
+ kolzchut_ragbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ kolzchut_ragbot/config.py,sha256=uILFvgn9W92-NRaKXYtaoQXpn3KOWKK8SZYRsIAa5Yw,133
5
+ kolzchut_ragbot/engine.py,sha256=tULiiCdqLoXydxx7VrZKChDgbe4ygSoEft-k76j3_t0,10158
6
+ kolzchut_ragbot/llm_client.py,sha256=Frp7CL0OIlQA6ltohrGWedI6uD6MpGg6TbpZTBE0qIo,341
7
+ kolzchut_ragbot/model.py,sha256=HCi3r4YztPknnbgTOA7I-GVaqxn8CzrTeLFkEg-7fg0,6320
8
+ kolzchut_ragbot-1.5.0.dist-info/METADATA,sha256=nzWcZsfn23nVWANcWMGoQvQXJ-idymUKg0ZheMbWp1c,1999
9
+ kolzchut_ragbot-1.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
10
+ kolzchut_ragbot-1.5.0.dist-info/top_level.txt,sha256=NTZoY4GGw3v_7jm0MgcdHw8simoZ78PsR7Meqmkgd_Q,16
11
+ kolzchut_ragbot-1.5.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,67 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: kolzchut-ragbot
3
- Version: 1.4.2
4
- Summary: A search engine using machine learning models and Elasticsearch for advanced document retrieval.
5
- Home-page: https://github.com/shmuelrob/rag-bot
6
- Author: Shmuel Robinov
7
- Author-email: shmuel_robinov@webiks.com
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.10
12
- Description-Content-Type: text/markdown
13
- Requires-Dist: elasticsearch==8.17.1
14
- Requires-Dist: sentence-transformers==3.4.1
15
- Requires-Dist: torch==2.6.0
16
- Requires-Dist: transformers==4.48.3
17
- Dynamic: author
18
- Dynamic: author-email
19
- Dynamic: classifier
20
- Dynamic: description
21
- Dynamic: description-content-type
22
- Dynamic: home-page
23
- Dynamic: requires-dist
24
- Dynamic: requires-python
25
- Dynamic: summary
26
-
27
- # **Webiks-Hebrew-RAGbot**
28
-
29
- ## **Overview**
30
-
31
- This project is a search engine that uses machine learning models and Elasticsearch to provide advanced document retrieval.
32
- You can use [Webiks-Hebrew-RAGbot-Demo](https://github.com/NNLP-IL/Webiks-Hebrew-RAGbot-Demo) to demonstrate the engine's document retrieval abilities
33
-
34
- ## **Features**
35
-
36
- Document representation and validation
37
- Document embedding and indexing in Elasticsearch
38
- Advanced search using machine learning model
39
- Integration with LLM (Large Language Model) client for query answering
40
-
41
- ## **Installation**
42
-
43
- 1. Clone the repository:
44
-
45
- `git clone https://github.com/NNLP-IL/Webiks-Hebrew-RAGbot.git`
46
-
47
- `cd Webiks-Hebrew-RAGbot`
48
-
49
- 2. Create a virtual environment and activate it:  
50
-
51
- `python -m venv venv`
52
-
53
- `source venv/bin/activate`
54
-
55
- On Windows use `\venv\\Scripts\\activate\`
56
-
57
- 3. Install the required dependencies:  
58
-
59
- `pip install -r requirements.txt`
60
-
61
- ## **Configuration**
62
-
63
- Set the following environment variables:  
64
-
65
- ES\_EMBEDDING\_INDEX: The name of the Elasticsearch index for embeddings.
66
-
67
- TOKENIZER\_LOCATION: The location of the tokenizer model.
@@ -1,11 +0,0 @@
1
- kolzchut_ragbot/Document.py,sha256=ySawnD06HA0zHjHp4Y_CPjMMZqLp8onaEgd1dGP5sbs,3722
2
- kolzchut_ragbot/IntegrateService.py,sha256=CqB9vW6W5oj6Ig3crEa6hXqwro21z97UaG9ngxFTzYs,100
3
- kolzchut_ragbot/__init__.py,sha256=KKAc2xjCl5Aui2Cj0FWyvJ51nmnFv7MspLMqOYb-QHA,26
4
- kolzchut_ragbot/config.py,sha256=pcKVJVJ8P2YximjTrmVlrocHXSmzmNu_DFzNoPLa22E,138
5
- kolzchut_ragbot/engine.py,sha256=V8WUWyqvBWbGt-rRRf8G6BEyD-4GjsmtJrxBb6aPon8,10154
6
- kolzchut_ragbot/llm_client.py,sha256=q_cUZq645P7i1PliYzpJRTWlsoSECVIhE-y9wU5eRtQ,352
7
- kolzchut_ragbot/model.py,sha256=M7i9B-zzwa-ATblY-5c7gmbkOXKwS8wWmYMP8l0HE40,6502
8
- kolzchut_ragbot-1.4.2.dist-info/METADATA,sha256=YctMSApfbXBBvx7d0APy28lDWmulO3cfHJKjKOZ_LYQ,2024
9
- kolzchut_ragbot-1.4.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
- kolzchut_ragbot-1.4.2.dist-info/top_level.txt,sha256=NTZoY4GGw3v_7jm0MgcdHw8simoZ78PsR7Meqmkgd_Q,16
11
- kolzchut_ragbot-1.4.2.dist-info/RECORD,,