kobai-sdk 0.2.8rc11__py3-none-any.whl → 0.2.8rc12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kobai-sdk might be problematic. Click here for more details.

kobai/ai_rag.py CHANGED
@@ -143,7 +143,7 @@ def __generate_sentences_from_questions(tc: AIContext, debug):
143
143
  ss.sql(full_sql)
144
144
 
145
145
 
146
- def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
146
+ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
147
147
  """
148
148
  Encode Semantic Data to Vectors in Delta Table
149
149
 
@@ -163,7 +163,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
163
163
  sentences_df = ss.sql(sentences_sql)
164
164
 
165
165
  num_records = sentences_df.count()
166
- query_batch_size = 100000
166
+ query_batch_size = batch_size
167
167
 
168
168
  for x in range(0, num_records, query_batch_size):
169
169
  print(f"Running Batch Starting at {x}")
@@ -174,9 +174,11 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
174
174
 
175
175
  if isinstance(st_model, SentenceTransformer):
176
176
  vector_list = st_model.encode(
177
- content_list, normalize_embeddings=True, show_progress_bar=True)
177
+ content_list, normalize_embeddings=True, show_progress_bar=True).tolist()
178
178
  else:
179
179
  vector_list = st_model.embed_documents(content_list)
180
+ for i, v in enumerate(vector_list):
181
+ vector_list[i] = [float(x) for x in v]
180
182
  #vector_list = st_model.encode(
181
183
  # content_list, normalize_embeddings=True, show_progress_bar=True)
182
184
 
@@ -185,7 +187,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
185
187
  StructField("vector", ArrayType(FloatType()), False)
186
188
  ])
187
189
 
188
- updated_list = [[r[0], r[1].tolist()]
190
+ updated_list = [[r[0], r[1]]
189
191
  for r in zip(id_list, vector_list)]
190
192
  updated_df = ss.createDataFrame(updated_list, schema_v)
191
193
 
kobai/tenant_client.py CHANGED
@@ -451,7 +451,7 @@ class TenantClient:
451
451
  """
452
452
  ai_rag.generate_sentences(self.get_ai_context(), replica_schema=replica_schema, concept_white_list=concept_white_list, use_questions=use_questions, debug=debug)
453
453
 
454
- def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
454
+ def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
455
455
  """
456
456
  Encode Semantic Data to Vectors in Delta Table
457
457
 
@@ -459,7 +459,7 @@ class TenantClient:
459
459
  st_model (SentenceTransformer): A sentence_transformers model to use for encoding.
460
460
  replica_schema (str) OPTIONAL: An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
461
461
  """
462
- ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema)
462
+ ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema, batch_size=batch_size)
463
463
 
464
464
  def rag_delta(self, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
465
465
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: kobai-sdk
3
- Version: 0.2.8rc11
3
+ Version: 0.2.8rc12
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -1,13 +1,13 @@
1
1
  kobai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  kobai/ai_query.py,sha256=d0WOPKtQ-bI_zW3-_6guEJX0t55OcxdXIdgXaD-zKK0,9413
3
- kobai/ai_rag.py,sha256=Gqj_CF2lLjMc0CcxquOAgXhiuY6pKwyJumR2NK7QdOo,14559
3
+ kobai/ai_rag.py,sha256=hc8M7M9azY-tFovsXdrjHJTmemFr8L74X2dy0j-XUoY,14685
4
4
  kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
5
5
  kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
6
6
  kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
7
7
  kobai/tenant_api.py,sha256=9U6UbxpaAb-kpbuADXx3kbkNKaOzYy0I-GGwbpiCCOk,4212
8
- kobai/tenant_client.py,sha256=ktw75dmmX95gD6qIkG-mAmdo48RkF96SgmpRmfDWblo,39422
9
- kobai_sdk-0.2.8rc11.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
10
- kobai_sdk-0.2.8rc11.dist-info/METADATA,sha256=jyrtCYbqTYKrHMHBOmWUrDfkK3NFqtqxmQyTW-vZ05A,19205
11
- kobai_sdk-0.2.8rc11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
12
- kobai_sdk-0.2.8rc11.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
13
- kobai_sdk-0.2.8rc11.dist-info/RECORD,,
8
+ kobai/tenant_client.py,sha256=OUHQMmqWlezenkmMJH9p4wOz5CsAntmO25v5vWdepVE,39464
9
+ kobai_sdk-0.2.8rc12.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
10
+ kobai_sdk-0.2.8rc12.dist-info/METADATA,sha256=vrkBxo0XcZ1uG4lPHs-XfL38rxu-az55yw4NuJ_v5rk,19205
11
+ kobai_sdk-0.2.8rc12.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
12
+ kobai_sdk-0.2.8rc12.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
13
+ kobai_sdk-0.2.8rc12.dist-info/RECORD,,