kobai-sdk 0.2.8rc10__tar.gz → 0.2.8rc12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kobai-sdk might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: kobai-sdk
3
- Version: 0.2.8rc10
3
+ Version: 0.2.8rc12
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -143,7 +143,7 @@ def __generate_sentences_from_questions(tc: AIContext, debug):
143
143
  ss.sql(full_sql)
144
144
 
145
145
 
146
- def encode_to_delta_local(tc: AIContext, st_model: SentenceTransformer, replica_schema=None):
146
+ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
147
147
  """
148
148
  Encode Semantic Data to Vectors in Delta Table
149
149
 
@@ -163,7 +163,7 @@ def encode_to_delta_local(tc: AIContext, st_model: SentenceTransformer, replica_
163
163
  sentences_df = ss.sql(sentences_sql)
164
164
 
165
165
  num_records = sentences_df.count()
166
- query_batch_size = 100000
166
+ query_batch_size = batch_size
167
167
 
168
168
  for x in range(0, num_records, query_batch_size):
169
169
  print(f"Running Batch Starting at {x}")
@@ -172,15 +172,22 @@ def encode_to_delta_local(tc: AIContext, st_model: SentenceTransformer, replica_
172
172
  content_list = [r["content"] for r in sentences_df.collect()]
173
173
  id_list = [r["id"] for r in sentences_df.collect()]
174
174
 
175
- vector_list = st_model.encode(
176
- content_list, normalize_embeddings=True, show_progress_bar=True)
175
+ if isinstance(st_model, SentenceTransformer):
176
+ vector_list = st_model.encode(
177
+ content_list, normalize_embeddings=True, show_progress_bar=True).tolist()
178
+ else:
179
+ vector_list = st_model.embed_documents(content_list)
180
+ for i, v in enumerate(vector_list):
181
+ vector_list[i] = [float(x) for x in v]
182
+ #vector_list = st_model.encode(
183
+ # content_list, normalize_embeddings=True, show_progress_bar=True)
177
184
 
178
185
  schema_v = StructType([
179
186
  StructField("id", IntegerType(), True),
180
187
  StructField("vector", ArrayType(FloatType()), False)
181
188
  ])
182
189
 
183
- updated_list = [[r[0], r[1].tolist()]
190
+ updated_list = [[r[0], r[1]]
184
191
  for r in zip(id_list, vector_list)]
185
192
  updated_df = ss.createDataFrame(updated_list, schema_v)
186
193
 
@@ -451,7 +451,7 @@ class TenantClient:
451
451
  """
452
452
  ai_rag.generate_sentences(self.get_ai_context(), replica_schema=replica_schema, concept_white_list=concept_white_list, use_questions=use_questions, debug=debug)
453
453
 
454
- def rag_encode_to_delta_local(self, st_model: SentenceTransformer, replica_schema=None):
454
+ def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
455
455
  """
456
456
  Encode Semantic Data to Vectors in Delta Table
457
457
 
@@ -459,7 +459,7 @@ class TenantClient:
459
459
  st_model (SentenceTransformer): A sentence_transformers model to use for encoding.
460
460
  replica_schema (str) OPTIONAL: An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
461
461
  """
462
- ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema)
462
+ ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema, batch_size=batch_size)
463
463
 
464
464
  def rag_delta(self, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
465
465
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: kobai-sdk
3
- Version: 0.2.8rc10
3
+ Version: 0.2.8rc12
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "kobai-sdk"
7
- version = "0.2.8rc10"
7
+ version = "0.2.8rc12"
8
8
  description = "A package that enables interaction with a Kobai tenant."
9
9
  readme = "README.md"
10
10
  authors = [{ name = "Ryan Oattes", email = "ryan@kobai.io" }]
File without changes
File without changes
File without changes
File without changes