kolzchut-ragbot 1.4.0__py3-none-any.whl → 1.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kolzchut_ragbot/model.py CHANGED
@@ -22,6 +22,33 @@ def index_from_page_id(page_id: int):
22
22
  return EMBEDDING_INDEX + "_" + str(index_postfix)
23
23
 
24
24
 
25
+ def create_mapping():
26
+ """
27
+ Creates a mapping for the model in Elasticsearch.
28
+ """
29
+ vector_fields = {f'{semantic_model}_{name}_vectors': {"type": "dense_vector", "dims": 1024}
30
+ for name, semantic_model in definitions_singleton.models.items()}
31
+
32
+ data_fields = {}
33
+ for field in definitions_singleton.saved_fields.keys():
34
+ field_type = definitions_singleton.saved_fields[field]
35
+ field_mapping = {"type": field_type}
36
+ if field_type == "date":
37
+ field_mapping["format"] = "yyyyMMddHHmmss"
38
+ data_fields[f"{field}"] = field_mapping
39
+
40
+ mappings = {
41
+ "properties": {
42
+ "last_update": {
43
+ "type": "date",
44
+ },
45
+ **vector_fields,
46
+ **data_fields,
47
+ }
48
+ }
49
+ return mappings
50
+
51
+
25
52
  class Model:
26
53
  """
27
54
  Represents the model for creating, updating, and searching documents in Elasticsearch.
@@ -55,27 +82,16 @@ class Model:
55
82
  if custom_result_selection_function is not None:
56
83
  self.custom_result_selection_function = custom_result_selection_function
57
84
 
58
- def create_index(self):
85
+ def create_index(self, index_name):
59
86
  """
60
87
  Creates an index for the model in Elasticsearch.
61
88
  """
62
- if not self.es_client.indices.exists(index=os.getenv("ES_EMBEDDING_INDEX")):
63
- vector_fields = {f"{name}_{semantic_model}_vectors": {"type": "dense_vector", "dims": 1024}
64
- for name, semantic_model in definitions_singleton.models.items()}
65
- data_fields = {f"{field}": {"type": definitions_singleton.saved_fields[field]}
66
- for field in definitions_singleton.saved_fields.keys()}
67
-
89
+ mapping = create_mapping()
90
+ if not self.es_client.indices.exists(index=index_name):
68
91
  self.es_client.indices.create(
69
- index=os.getenv("ES_EMBEDDING_INDEX"),
70
- mappings={
71
- "properties": {
72
- "last_update": {
73
- "type": "date"
74
- },
75
- **vector_fields,
76
- **data_fields,
77
- }
78
- })
92
+ index=index_name,
93
+ mappings=mapping
94
+ )
79
95
 
80
96
  def create_or_update_documents(self, paragraphs_dicts: list[dict], update=False):
81
97
  """
@@ -105,9 +121,9 @@ class Model:
105
121
 
106
122
  except Exception as e:
107
123
  logging.error(f"Error while searching for existing document: {e}")
108
-
124
+ self.create_index(index)
109
125
  for i, doc_dict in enumerate(paragraphs_dicts):
110
- print(f"saving paragraph {i}")
126
+ print(f"saving paragraph {i + 1} / {len(paragraphs_dicts)}")
111
127
  doc = {
112
128
  "last_update": datetime.datetime.now(),
113
129
  **doc_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kolzchut-ragbot
3
- Version: 1.4.0
3
+ Version: 1.4.2
4
4
  Summary: A search engine using machine learning models and Elasticsearch for advanced document retrieval.
5
5
  Home-page: https://github.com/shmuelrob/rag-bot
6
6
  Author: Shmuel Robinov
@@ -4,8 +4,8 @@ kolzchut_ragbot/__init__.py,sha256=KKAc2xjCl5Aui2Cj0FWyvJ51nmnFv7MspLMqOYb-QHA,2
4
4
  kolzchut_ragbot/config.py,sha256=pcKVJVJ8P2YximjTrmVlrocHXSmzmNu_DFzNoPLa22E,138
5
5
  kolzchut_ragbot/engine.py,sha256=V8WUWyqvBWbGt-rRRf8G6BEyD-4GjsmtJrxBb6aPon8,10154
6
6
  kolzchut_ragbot/llm_client.py,sha256=q_cUZq645P7i1PliYzpJRTWlsoSECVIhE-y9wU5eRtQ,352
7
- kolzchut_ragbot/model.py,sha256=OfFFNj429IUUObOpjDB08ubZo6io45sOaoxkyEU68d4,6228
8
- kolzchut_ragbot-1.4.0.dist-info/METADATA,sha256=PGV1l69-kxkDMjtVPYrXudwLyag7cJvEZtUi1Tgo6CA,2024
9
- kolzchut_ragbot-1.4.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
- kolzchut_ragbot-1.4.0.dist-info/top_level.txt,sha256=NTZoY4GGw3v_7jm0MgcdHw8simoZ78PsR7Meqmkgd_Q,16
11
- kolzchut_ragbot-1.4.0.dist-info/RECORD,,
7
+ kolzchut_ragbot/model.py,sha256=M7i9B-zzwa-ATblY-5c7gmbkOXKwS8wWmYMP8l0HE40,6502
8
+ kolzchut_ragbot-1.4.2.dist-info/METADATA,sha256=YctMSApfbXBBvx7d0APy28lDWmulO3cfHJKjKOZ_LYQ,2024
9
+ kolzchut_ragbot-1.4.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
+ kolzchut_ragbot-1.4.2.dist-info/top_level.txt,sha256=NTZoY4GGw3v_7jm0MgcdHw8simoZ78PsR7Meqmkgd_Q,16
11
+ kolzchut_ragbot-1.4.2.dist-info/RECORD,,