kobai-sdk 0.2.8rc11__py3-none-any.whl → 0.2.8rc12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kobai-sdk might be problematic. Click here for more details.
- kobai/ai_rag.py +6 -4
- kobai/tenant_client.py +2 -2
- {kobai_sdk-0.2.8rc11.dist-info → kobai_sdk-0.2.8rc12.dist-info}/METADATA +1 -1
- {kobai_sdk-0.2.8rc11.dist-info → kobai_sdk-0.2.8rc12.dist-info}/RECORD +7 -7
- {kobai_sdk-0.2.8rc11.dist-info → kobai_sdk-0.2.8rc12.dist-info}/LICENSE +0 -0
- {kobai_sdk-0.2.8rc11.dist-info → kobai_sdk-0.2.8rc12.dist-info}/WHEEL +0 -0
- {kobai_sdk-0.2.8rc11.dist-info → kobai_sdk-0.2.8rc12.dist-info}/top_level.txt +0 -0
kobai/ai_rag.py
CHANGED
|
@@ -143,7 +143,7 @@ def __generate_sentences_from_questions(tc: AIContext, debug):
|
|
|
143
143
|
ss.sql(full_sql)
|
|
144
144
|
|
|
145
145
|
|
|
146
|
-
def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
|
|
146
|
+
def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
|
|
147
147
|
"""
|
|
148
148
|
Encode Semantic Data to Vectors in Delta Table
|
|
149
149
|
|
|
@@ -163,7 +163,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
|
|
|
163
163
|
sentences_df = ss.sql(sentences_sql)
|
|
164
164
|
|
|
165
165
|
num_records = sentences_df.count()
|
|
166
|
-
query_batch_size =
|
|
166
|
+
query_batch_size = batch_size
|
|
167
167
|
|
|
168
168
|
for x in range(0, num_records, query_batch_size):
|
|
169
169
|
print(f"Running Batch Starting at {x}")
|
|
@@ -174,9 +174,11 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
|
|
|
174
174
|
|
|
175
175
|
if isinstance(st_model, SentenceTransformer):
|
|
176
176
|
vector_list = st_model.encode(
|
|
177
|
-
content_list, normalize_embeddings=True, show_progress_bar=True)
|
|
177
|
+
content_list, normalize_embeddings=True, show_progress_bar=True).tolist()
|
|
178
178
|
else:
|
|
179
179
|
vector_list = st_model.embed_documents(content_list)
|
|
180
|
+
for i, v in enumerate(vector_list):
|
|
181
|
+
vector_list[i] = [float(x) for x in v]
|
|
180
182
|
#vector_list = st_model.encode(
|
|
181
183
|
# content_list, normalize_embeddings=True, show_progress_bar=True)
|
|
182
184
|
|
|
@@ -185,7 +187,7 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
|
|
|
185
187
|
StructField("vector", ArrayType(FloatType()), False)
|
|
186
188
|
])
|
|
187
189
|
|
|
188
|
-
updated_list = [[r[0], r[1]
|
|
190
|
+
updated_list = [[r[0], r[1]]
|
|
189
191
|
for r in zip(id_list, vector_list)]
|
|
190
192
|
updated_df = ss.createDataFrame(updated_list, schema_v)
|
|
191
193
|
|
kobai/tenant_client.py
CHANGED
|
@@ -451,7 +451,7 @@ class TenantClient:
|
|
|
451
451
|
"""
|
|
452
452
|
ai_rag.generate_sentences(self.get_ai_context(), replica_schema=replica_schema, concept_white_list=concept_white_list, use_questions=use_questions, debug=debug)
|
|
453
453
|
|
|
454
|
-
def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None):
|
|
454
|
+
def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
|
|
455
455
|
"""
|
|
456
456
|
Encode Semantic Data to Vectors in Delta Table
|
|
457
457
|
|
|
@@ -459,7 +459,7 @@ class TenantClient:
|
|
|
459
459
|
st_model (SentenceTransformer): A sentence_transformers model to use for encoding.
|
|
460
460
|
replica_schema (str) OPTIONAL: An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
|
|
461
461
|
"""
|
|
462
|
-
ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema)
|
|
462
|
+
ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema, batch_size=batch_size)
|
|
463
463
|
|
|
464
464
|
def rag_delta(self, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
|
|
465
465
|
"""
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
kobai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
kobai/ai_query.py,sha256=d0WOPKtQ-bI_zW3-_6guEJX0t55OcxdXIdgXaD-zKK0,9413
|
|
3
|
-
kobai/ai_rag.py,sha256=
|
|
3
|
+
kobai/ai_rag.py,sha256=hc8M7M9azY-tFovsXdrjHJTmemFr8L74X2dy0j-XUoY,14685
|
|
4
4
|
kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
|
|
5
5
|
kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
|
|
6
6
|
kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
|
|
7
7
|
kobai/tenant_api.py,sha256=9U6UbxpaAb-kpbuADXx3kbkNKaOzYy0I-GGwbpiCCOk,4212
|
|
8
|
-
kobai/tenant_client.py,sha256=
|
|
9
|
-
kobai_sdk-0.2.
|
|
10
|
-
kobai_sdk-0.2.
|
|
11
|
-
kobai_sdk-0.2.
|
|
12
|
-
kobai_sdk-0.2.
|
|
13
|
-
kobai_sdk-0.2.
|
|
8
|
+
kobai/tenant_client.py,sha256=OUHQMmqWlezenkmMJH9p4wOz5CsAntmO25v5vWdepVE,39464
|
|
9
|
+
kobai_sdk-0.2.8rc12.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
10
|
+
kobai_sdk-0.2.8rc12.dist-info/METADATA,sha256=vrkBxo0XcZ1uG4lPHs-XfL38rxu-az55yw4NuJ_v5rk,19205
|
|
11
|
+
kobai_sdk-0.2.8rc12.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
12
|
+
kobai_sdk-0.2.8rc12.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
|
|
13
|
+
kobai_sdk-0.2.8rc12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|