kobai-sdk 0.2.8rc11__tar.gz → 0.2.8rc13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: kobai-sdk
3
- Version: 0.2.8rc11
3
+ Version: 0.2.8rc13
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -73,17 +73,17 @@ def format_docs(docs):
73
73
  def input_only(inpt):
74
74
  return inpt["question"]
75
75
 
76
- def followup_question(user_question, question_results, question_name, question_def, embedding_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, use_inmem_vectors=False):
76
+ def followup_question(user_question, question_results, question_name, question_def, embedding_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, use_inmem_vectors=False, k=50):
77
77
 
78
78
  row_texts = process_question_results(question_def, question_results)
79
79
  question_documents = [Document(page_content=r, metadata={"source": "kobai"}) for r in row_texts]
80
80
 
81
81
  if use_inmem_vectors:
82
82
  question_retriever = InMemoryVectorStore.from_documents(question_documents, embedding=embedding_model).as_retriever(
83
- search_kwargs={"k": 5}
83
+ search_kwargs={"k": k}
84
84
  )
85
85
  else:
86
- question_retriever = QuestionRetriever(documents=question_documents)
86
+ question_retriever = QuestionRetriever(documents=question_documents, k=k)
87
87
 
88
88
  output_parser = StrOutputParser()
89
89
 
@@ -143,7 +143,7 @@ def __generate_sentences_from_questions(tc: AIContext, debug):
143
143
  ss.sql(full_sql)
144
144
 
145
145
 
146
- def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
146
+ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
147
147
  """
148
148
  Encode Semantic Data to Vectors in Delta Table
149
149
 
@@ -163,7 +163,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
163
163
  sentences_df = ss.sql(sentences_sql)
164
164
 
165
165
  num_records = sentences_df.count()
166
- query_batch_size = 100000
166
+ query_batch_size = batch_size
167
167
 
168
168
  for x in range(0, num_records, query_batch_size):
169
169
  print(f"Running Batch Starting at {x}")
@@ -174,9 +174,11 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
174
174
 
175
175
  if isinstance(st_model, SentenceTransformer):
176
176
  vector_list = st_model.encode(
177
- content_list, normalize_embeddings=True, show_progress_bar=True)
177
+ content_list, normalize_embeddings=True, show_progress_bar=True).tolist()
178
178
  else:
179
179
  vector_list = st_model.embed_documents(content_list)
180
+ for i, v in enumerate(vector_list):
181
+ vector_list[i] = [float(x) for x in v]
180
182
  #vector_list = st_model.encode(
181
183
  # content_list, normalize_embeddings=True, show_progress_bar=True)
182
184
 
@@ -185,7 +187,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
185
187
  StructField("vector", ArrayType(FloatType()), False)
186
188
  ])
187
189
 
188
- updated_list = [[r[0], r[1].tolist()]
190
+ updated_list = [[r[0], r[1]]
189
191
  for r in zip(id_list, vector_list)]
190
192
  updated_df = ss.createDataFrame(updated_list, schema_v)
191
193
 
@@ -199,15 +201,15 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
199
201
  .whenMatchedUpdate(set={"vector": "s.vector"}) \
200
202
  .execute()
201
203
 
202
- ss.sql(f"""
203
- CREATE FUNCTION IF NOT EXISTS {schema}.cos_sim(a ARRAY<FLOAT>, b ARRAY<FLOAT>)
204
- RETURNS FLOAT
205
- LANGUAGE PYTHON
206
- AS $$
207
- import numpy as np
208
- return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
209
- $$
210
- """)
204
+ #ss.sql(f"""
205
+ # CREATE FUNCTION IF NOT EXISTS {schema}.cos_sim(a ARRAY<FLOAT>, b ARRAY<FLOAT>)
206
+ # RETURNS FLOAT
207
+ # LANGUAGE PYTHON
208
+ # AS $$
209
+ # import numpy as np
210
+ # return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
211
+ # $$
212
+ # """)
211
213
 
212
214
 
213
215
  def rag_delta(tc: AIContext, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
@@ -82,6 +82,7 @@ class TenantClient:
82
82
  access = credential.authenticate()
83
83
 
84
84
  oauth_token = access.serialize()
85
+ print(oauth_token)
85
86
  user_name = json.loads(access.serialize())["username"]
86
87
 
87
88
  if override_username is not None:
@@ -451,7 +452,7 @@ class TenantClient:
451
452
  """
452
453
  ai_rag.generate_sentences(self.get_ai_context(), replica_schema=replica_schema, concept_white_list=concept_white_list, use_questions=use_questions, debug=debug)
453
454
 
454
- def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
455
+ def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
455
456
  """
456
457
  Encode Semantic Data to Vectors in Delta Table
457
458
 
@@ -459,7 +460,7 @@ class TenantClient:
459
460
  st_model (SentenceTransformer): A sentence_transformers model to use for encoding.
460
461
  replica_schema (str) OPTIONAL: An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
461
462
  """
462
- ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema)
463
+ ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema, batch_size=batch_size)
463
464
 
464
465
  def rag_delta(self, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
465
466
  """
@@ -478,7 +479,7 @@ class TenantClient:
478
479
  # AI Functions
479
480
  ########################################
480
481
 
481
- def followup_question(self, user_question, question_id=None, use_inmem_vectors=False):
482
+ def followup_question(self, user_question, question_id=None, use_inmem_vectors=False, k=50):
482
483
  """
483
484
  Use LLM to answer question in the context of a Kobai Studio question.
484
485
 
@@ -499,7 +500,7 @@ class TenantClient:
499
500
  question_def = self.get_question(question_id)
500
501
  question_name = question_def["description"]
501
502
 
502
- return ai_query.followup_question(user_question, question_results, question_name, question_def, self.embedding_model, self.chat_model, use_inmem_vectors=use_inmem_vectors)
503
+ return ai_query.followup_question(user_question, question_results, question_name, question_def, self.embedding_model, self.chat_model, use_inmem_vectors=use_inmem_vectors, k=k)
503
504
 
504
505
  def init_ai_components(self, embedding_model: Union[SentenceTransformer, Embeddings] = None, chat_model: BaseChatModel = None):
505
506
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: kobai-sdk
3
- Version: 0.2.8rc11
3
+ Version: 0.2.8rc13
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "kobai-sdk"
7
- version = "0.2.8rc11"
7
+ version = "0.2.8rc13"
8
8
  description = "A package that enables interaction with a Kobai tenant."
9
9
  readme = "README.md"
10
10
  authors = [{ name = "Ryan Oattes", email = "ryan@kobai.io" }]
File without changes
File without changes
File without changes
File without changes