PyPI - kobai-sdk - Versions diffs - 0.2.8rc11__tar.gz → 0.2.8rc13__tar.gz - Mend

kobai-sdk 0.2.8rc11tar.gz → 0.2.8rc13tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: kobai-sdk
-Version: 0.2.8rc11
+Version: 0.2.8rc13
 Summary: A package that enables interaction with a Kobai tenant.
 Author-email: Ryan Oattes <ryan@kobai.io>
 License:                                  Apache License

{kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/ai_query.py RENAMED Viewed

@@ -73,17 +73,17 @@ def format_docs(docs):
 def input_only(inpt):
     return inpt["question"]
-def followup_question(user_question, question_results, question_name, question_def, embedding_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, use_inmem_vectors=False):
+def followup_question(user_question, question_results, question_name, question_def, embedding_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, use_inmem_vectors=False, k=50):
     row_texts = process_question_results(question_def, question_results)
     question_documents = [Document(page_content=r, metadata={"source": "kobai"}) for r in row_texts]
     if use_inmem_vectors:
         question_retriever = InMemoryVectorStore.from_documents(question_documents, embedding=embedding_model).as_retriever(
-    search_kwargs={"k": 5}
+    search_kwargs={"k": k}
 )
     else:
-        question_retriever = QuestionRetriever(documents=question_documents)
+        question_retriever = QuestionRetriever(documents=question_documents, k=k)
     output_parser = StrOutputParser()

{kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/ai_rag.py RENAMED Viewed

@@ -143,7 +143,7 @@ def __generate_sentences_from_questions(tc: AIContext, debug):
     ss.sql(full_sql)
-def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
+def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
     """
     Encode Semantic Data to Vectors in Delta Table
@@ -163,7 +163,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
     sentences_df = ss.sql(sentences_sql)
     num_records = sentences_df.count()
-    query_batch_size = 100000
+    query_batch_size = batch_size
     for x in range(0, num_records, query_batch_size):
         print(f"Running Batch Starting at {x}")
@@ -174,9 +174,11 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
         if isinstance(st_model, SentenceTransformer):
             vector_list = st_model.encode(
-                content_list, normalize_embeddings=True, show_progress_bar=True)
+                content_list, normalize_embeddings=True, show_progress_bar=True).tolist()
         else:
             vector_list = st_model.embed_documents(content_list)
+            for i, v in enumerate(vector_list):
+                vector_list[i] = [float(x) for x in v]
         #vector_list = st_model.encode(
         #    content_list, normalize_embeddings=True, show_progress_bar=True)
@@ -185,7 +187,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
             StructField("vector", ArrayType(FloatType()), False)
         ])
-        updated_list = [[r[0], r[1].tolist()]
+        updated_list = [[r[0], r[1]]
                         for r in zip(id_list, vector_list)]
         updated_df = ss.createDataFrame(updated_list, schema_v)
@@ -199,15 +201,15 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
             .whenMatchedUpdate(set={"vector": "s.vector"}) \
             .execute()
-    ss.sql(f"""
-          CREATE FUNCTION IF NOT EXISTS {schema}.cos_sim(a ARRAY<FLOAT>, b ARRAY<FLOAT>)
-            RETURNS FLOAT
-            LANGUAGE PYTHON
-            AS $$
-                import numpy as np
-                return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
-            $$
-          """)
+    #ss.sql(f"""
+    #      CREATE FUNCTION IF NOT EXISTS {schema}.cos_sim(a ARRAY<FLOAT>, b ARRAY<FLOAT>)
+    #        RETURNS FLOAT
+    #       LANGUAGE PYTHON
+    #        AS $$
+    #           import numpy as np
+    #            return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
+    #        $$
+    #      """)
 def rag_delta(tc: AIContext, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):

{kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/tenant_client.py RENAMED Viewed

@@ -82,6 +82,7 @@ class TenantClient:
         access = credential.authenticate()
         oauth_token = access.serialize()
+        print(oauth_token)
         user_name = json.loads(access.serialize())["username"]
         if override_username is not None:
@@ -451,7 +452,7 @@ class TenantClient:
         """
         ai_rag.generate_sentences(self.get_ai_context(), replica_schema=replica_schema, concept_white_list=concept_white_list, use_questions=use_questions, debug=debug)
-    def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
+    def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
         """
         Encode Semantic Data to Vectors in Delta Table
@@ -459,7 +460,7 @@ class TenantClient:
         st_model (SentenceTransformer): A sentence_transformers model to use for encoding.
         replica_schema (str) OPTIONAL: An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
         """
-        ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema)
+        ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema, batch_size=batch_size)
     def rag_delta(self, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
         """
@@ -478,7 +479,7 @@ class TenantClient:
 # AI Functions
 ########################################
-    def followup_question(self, user_question, question_id=None, use_inmem_vectors=False):
+    def followup_question(self, user_question, question_id=None, use_inmem_vectors=False, k=50):
         """
         Use LLM to answer question in the context of a Kobai Studio question.
@@ -499,7 +500,7 @@ class TenantClient:
         question_def = self.get_question(question_id)
         question_name = question_def["description"]
-        return ai_query.followup_question(user_question, question_results, question_name, question_def, self.embedding_model, self.chat_model, use_inmem_vectors=use_inmem_vectors)
+        return ai_query.followup_question(user_question, question_results, question_name, question_def, self.embedding_model, self.chat_model, use_inmem_vectors=use_inmem_vectors, k=k)
     def init_ai_components(self, embedding_model: Union[SentenceTransformer, Embeddings] = None, chat_model: BaseChatModel = None):
         """

{kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai_sdk.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: kobai-sdk
-Version: 0.2.8rc11
+Version: 0.2.8rc13
 Summary: A package that enables interaction with a Kobai tenant.
 Author-email: Ryan Oattes <ryan@kobai.io>
 License:                                  Apache License

{kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "kobai-sdk"
-version = "0.2.8rc11"
+version = "0.2.8rc13"
 description = "A package that enables interaction with a Kobai tenant."
 readme = "README.md"
 authors = [{ name = "Ryan Oattes", email = "ryan@kobai.io" }]