kobai-sdk 0.2.8rc11__tar.gz → 0.2.8rc13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/PKG-INFO +1 -1
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/ai_query.py +3 -3
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/ai_rag.py +15 -13
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/tenant_client.py +5 -4
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai_sdk.egg-info/PKG-INFO +1 -1
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/pyproject.toml +1 -1
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/LICENSE +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/MANIFEST.in +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/README.md +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/__init__.py +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/databricks_client.py +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/demo_tenant_client.py +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/spark_client.py +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai/tenant_api.py +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai_sdk.egg-info/SOURCES.txt +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai_sdk.egg-info/dependency_links.txt +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai_sdk.egg-info/requires.txt +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/kobai_sdk.egg-info/top_level.txt +0 -0
- {kobai_sdk-0.2.8rc11 → kobai_sdk-0.2.8rc13}/setup.cfg +0 -0
|
@@ -73,17 +73,17 @@ def format_docs(docs):
|
|
|
73
73
|
def input_only(inpt):
|
|
74
74
|
return inpt["question"]
|
|
75
75
|
|
|
76
|
-
def followup_question(user_question, question_results, question_name, question_def, embedding_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, use_inmem_vectors=False):
|
|
76
|
+
def followup_question(user_question, question_results, question_name, question_def, embedding_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, use_inmem_vectors=False, k=50):
|
|
77
77
|
|
|
78
78
|
row_texts = process_question_results(question_def, question_results)
|
|
79
79
|
question_documents = [Document(page_content=r, metadata={"source": "kobai"}) for r in row_texts]
|
|
80
80
|
|
|
81
81
|
if use_inmem_vectors:
|
|
82
82
|
question_retriever = InMemoryVectorStore.from_documents(question_documents, embedding=embedding_model).as_retriever(
|
|
83
|
-
search_kwargs={"k":
|
|
83
|
+
search_kwargs={"k": k}
|
|
84
84
|
)
|
|
85
85
|
else:
|
|
86
|
-
question_retriever = QuestionRetriever(documents=question_documents)
|
|
86
|
+
question_retriever = QuestionRetriever(documents=question_documents, k=k)
|
|
87
87
|
|
|
88
88
|
output_parser = StrOutputParser()
|
|
89
89
|
|
|
@@ -143,7 +143,7 @@ def __generate_sentences_from_questions(tc: AIContext, debug):
|
|
|
143
143
|
ss.sql(full_sql)
|
|
144
144
|
|
|
145
145
|
|
|
146
|
-
def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
|
|
146
|
+
def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
|
|
147
147
|
"""
|
|
148
148
|
Encode Semantic Data to Vectors in Delta Table
|
|
149
149
|
|
|
@@ -163,7 +163,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
|
|
|
163
163
|
sentences_df = ss.sql(sentences_sql)
|
|
164
164
|
|
|
165
165
|
num_records = sentences_df.count()
|
|
166
|
-
query_batch_size =
|
|
166
|
+
query_batch_size = batch_size
|
|
167
167
|
|
|
168
168
|
for x in range(0, num_records, query_batch_size):
|
|
169
169
|
print(f"Running Batch Starting at {x}")
|
|
@@ -174,9 +174,11 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
|
|
|
174
174
|
|
|
175
175
|
if isinstance(st_model, SentenceTransformer):
|
|
176
176
|
vector_list = st_model.encode(
|
|
177
|
-
content_list, normalize_embeddings=True, show_progress_bar=True)
|
|
177
|
+
content_list, normalize_embeddings=True, show_progress_bar=True).tolist()
|
|
178
178
|
else:
|
|
179
179
|
vector_list = st_model.embed_documents(content_list)
|
|
180
|
+
for i, v in enumerate(vector_list):
|
|
181
|
+
vector_list[i] = [float(x) for x in v]
|
|
180
182
|
#vector_list = st_model.encode(
|
|
181
183
|
# content_list, normalize_embeddings=True, show_progress_bar=True)
|
|
182
184
|
|
|
@@ -185,7 +187,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
|
|
|
185
187
|
StructField("vector", ArrayType(FloatType()), False)
|
|
186
188
|
])
|
|
187
189
|
|
|
188
|
-
updated_list = [[r[0], r[1]
|
|
190
|
+
updated_list = [[r[0], r[1]]
|
|
189
191
|
for r in zip(id_list, vector_list)]
|
|
190
192
|
updated_df = ss.createDataFrame(updated_list, schema_v)
|
|
191
193
|
|
|
@@ -199,15 +201,15 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
|
|
|
199
201
|
.whenMatchedUpdate(set={"vector": "s.vector"}) \
|
|
200
202
|
.execute()
|
|
201
203
|
|
|
202
|
-
ss.sql(f"""
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
204
|
+
#ss.sql(f"""
|
|
205
|
+
# CREATE FUNCTION IF NOT EXISTS {schema}.cos_sim(a ARRAY<FLOAT>, b ARRAY<FLOAT>)
|
|
206
|
+
# RETURNS FLOAT
|
|
207
|
+
# LANGUAGE PYTHON
|
|
208
|
+
# AS $$
|
|
209
|
+
# import numpy as np
|
|
210
|
+
# return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
|
|
211
|
+
# $$
|
|
212
|
+
# """)
|
|
211
213
|
|
|
212
214
|
|
|
213
215
|
def rag_delta(tc: AIContext, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
|
|
@@ -82,6 +82,7 @@ class TenantClient:
|
|
|
82
82
|
access = credential.authenticate()
|
|
83
83
|
|
|
84
84
|
oauth_token = access.serialize()
|
|
85
|
+
print(oauth_token)
|
|
85
86
|
user_name = json.loads(access.serialize())["username"]
|
|
86
87
|
|
|
87
88
|
if override_username is not None:
|
|
@@ -451,7 +452,7 @@ class TenantClient:
|
|
|
451
452
|
"""
|
|
452
453
|
ai_rag.generate_sentences(self.get_ai_context(), replica_schema=replica_schema, concept_white_list=concept_white_list, use_questions=use_questions, debug=debug)
|
|
453
454
|
|
|
454
|
-
def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
|
|
455
|
+
def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
|
|
455
456
|
"""
|
|
456
457
|
Encode Semantic Data to Vectors in Delta Table
|
|
457
458
|
|
|
@@ -459,7 +460,7 @@ class TenantClient:
|
|
|
459
460
|
st_model (SentenceTransformer): A sentence_transformers model to use for encoding.
|
|
460
461
|
replica_schema (str) OPTIONAL: An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
|
|
461
462
|
"""
|
|
462
|
-
ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema)
|
|
463
|
+
ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema, batch_size=batch_size)
|
|
463
464
|
|
|
464
465
|
def rag_delta(self, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
|
|
465
466
|
"""
|
|
@@ -478,7 +479,7 @@ class TenantClient:
|
|
|
478
479
|
# AI Functions
|
|
479
480
|
########################################
|
|
480
481
|
|
|
481
|
-
def followup_question(self, user_question, question_id=None, use_inmem_vectors=False):
|
|
482
|
+
def followup_question(self, user_question, question_id=None, use_inmem_vectors=False, k=50):
|
|
482
483
|
"""
|
|
483
484
|
Use LLM to answer question in the context of a Kobai Studio question.
|
|
484
485
|
|
|
@@ -499,7 +500,7 @@ class TenantClient:
|
|
|
499
500
|
question_def = self.get_question(question_id)
|
|
500
501
|
question_name = question_def["description"]
|
|
501
502
|
|
|
502
|
-
return ai_query.followup_question(user_question, question_results, question_name, question_def, self.embedding_model, self.chat_model, use_inmem_vectors=use_inmem_vectors)
|
|
503
|
+
return ai_query.followup_question(user_question, question_results, question_name, question_def, self.embedding_model, self.chat_model, use_inmem_vectors=use_inmem_vectors, k=k)
|
|
503
504
|
|
|
504
505
|
def init_ai_components(self, embedding_model: Union[SentenceTransformer, Embeddings] = None, chat_model: BaseChatModel = None):
|
|
505
506
|
"""
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "kobai-sdk"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.8rc13"
|
|
8
8
|
description = "A package that enables interaction with a Kobai tenant."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [{ name = "Ryan Oattes", email = "ryan@kobai.io" }]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|