kobai-sdk 0.2.7__tar.gz → 0.2.8rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kobai-sdk might be problematic. Click here for more details.
- {kobai_sdk-0.2.7/kobai_sdk.egg-info → kobai_sdk-0.2.8rc2}/PKG-INFO +3 -2
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai/ai_query.py +7 -1
- kobai_sdk-0.2.8rc2/kobai/ai_rag.py +330 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai/tenant_client.py +4 -4
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2/kobai_sdk.egg-info}/PKG-INFO +3 -2
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai_sdk.egg-info/SOURCES.txt +1 -1
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai_sdk.egg-info/requires.txt +1 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/pyproject.toml +3 -2
- kobai_sdk-0.2.7/kobai/test.py +0 -5
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/LICENSE +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/MANIFEST.in +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/README.md +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai/__init__.py +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai/databricks_client.py +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai/demo_tenant_client.py +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai/llm_config.py +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai/spark_client.py +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai/tenant_api.py +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai_sdk.egg-info/dependency_links.txt +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/kobai_sdk.egg-info/top_level.txt +0 -0
- {kobai_sdk-0.2.7 → kobai_sdk-0.2.8rc2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: kobai-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8rc2
|
|
4
4
|
Summary: A package that enables interaction with a Kobai tenant.
|
|
5
5
|
Author-email: Ryan Oattes <ryan@kobai.io>
|
|
6
6
|
License: Apache License
|
|
@@ -222,6 +222,7 @@ Requires-Dist: azure-storage-blob
|
|
|
222
222
|
Requires-Dist: langchain-core
|
|
223
223
|
Requires-Dist: langchain-community
|
|
224
224
|
Requires-Dist: langchain_openai
|
|
225
|
+
Requires-Dist: sentence_transformers
|
|
225
226
|
Provides-Extra: dev
|
|
226
227
|
Requires-Dist: black; extra == "dev"
|
|
227
228
|
Requires-Dist: bumpver; extra == "dev"
|
|
@@ -79,7 +79,13 @@ def followup_question(question, data, question_name, llm_config:llm_config, over
|
|
|
79
79
|
openai_api_version=llm_config.api_version,
|
|
80
80
|
temperature = llm_config.temperature,
|
|
81
81
|
max_tokens = llm_config.max_tokens,
|
|
82
|
-
)
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
chat_model = ChatDatabricks(
|
|
85
|
+
endpoint = llm_config.endpoint,
|
|
86
|
+
temperature = llm_config.temperature,
|
|
87
|
+
max_tokens = llm_config.max_tokens,
|
|
88
|
+
)
|
|
83
89
|
|
|
84
90
|
if llm_config.use_simple_prompt:
|
|
85
91
|
prompt = PromptTemplate.from_template(SIMPLE_PROMPT_TEMPLATE)
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
from kobai import tenant_client
|
|
2
|
+
|
|
3
|
+
from pyspark.sql.types import StructType, StructField, StringType, ArrayType, FloatType, IntegerType
|
|
4
|
+
#from delta.tables import *
|
|
5
|
+
from sentence_transformers import SentenceTransformer, util
|
|
6
|
+
#from deltalake import DeltaTable
|
|
7
|
+
from delta import DeltaTable
|
|
8
|
+
from typing import Union
|
|
9
|
+
from langchain_core.language_models.chat_models import BaseChatModel
|
|
10
|
+
from langchain_core.embeddings import Embeddings
|
|
11
|
+
from langchain_core.documents import Document
|
|
12
|
+
#from databricks_langchain import DatabricksEmbeddings, ChatDatabricks
|
|
13
|
+
from langchain_community.document_loaders import PySparkDataFrameLoader
|
|
14
|
+
from langchain import hub
|
|
15
|
+
from langchain_core.output_parsers import StrOutputParser
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def generate_sentences(tc: tenant_client.TenantClient, replica_schema=None, concept_white_list=None):
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
Extract Semantic Data from Graph to Delta Table
|
|
25
|
+
|
|
26
|
+
Parameters:
|
|
27
|
+
tc (TenantClient): The Kobai tenant_client instance instantiated via the SDK.
|
|
28
|
+
replica_schema (str): An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
if tc.spark_client is None:
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
ss = tc.spark_client.spark_session
|
|
35
|
+
|
|
36
|
+
print("Getting Tenant Config")
|
|
37
|
+
tenant_json = tc.get_tenant_config()
|
|
38
|
+
|
|
39
|
+
concepts = __get_concept_metadata(tenant_json, tc.schema, tc.model_id, concept_white_list)
|
|
40
|
+
print(concepts)
|
|
41
|
+
print("")
|
|
42
|
+
|
|
43
|
+
print("Dropping and Recreating the RAG Table")
|
|
44
|
+
ss.sql(__create_rag_table_sql(tc.schema, tc.model_id))
|
|
45
|
+
|
|
46
|
+
print("Generating Extraction SQL")
|
|
47
|
+
sql_statements = []
|
|
48
|
+
sql_statements.extend(__generate_sentence_sql_concept_literals(concepts, tc.schema, tc.model_id))
|
|
49
|
+
sql_statements.extend(__generate_sentence_sql_concept_relations(concepts, tc.schema, tc.model_id))
|
|
50
|
+
|
|
51
|
+
print("Running the Extraction")
|
|
52
|
+
for sql_statement in sql_statements:
|
|
53
|
+
print(sql_statement)
|
|
54
|
+
print("")
|
|
55
|
+
ss.sql(sql_statement)
|
|
56
|
+
|
|
57
|
+
if replica_schema is not None:
|
|
58
|
+
print("Replicating Schema")
|
|
59
|
+
ss.sql(__create_rag_table_sql(replica_schema, tc.model_id))
|
|
60
|
+
ss.sql(__replicate_to_catalog_sql(tc.schema, replica_schema, tc.model_id))
|
|
61
|
+
|
|
62
|
+
def encode_to_delta_local(tc: tenant_client.TenantClient, st_model: SentenceTransformer, replica_schema=None):
|
|
63
|
+
|
|
64
|
+
if tc.spark_client is None:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
ss = tc.spark_client.spark_session
|
|
68
|
+
|
|
69
|
+
schema = tc.schema
|
|
70
|
+
if replica_schema is not None:
|
|
71
|
+
schema = replica_schema
|
|
72
|
+
|
|
73
|
+
sentences_sql = f"SELECT content FROM {schema}.rag_{tc.model_id}"
|
|
74
|
+
sentences_df = ss.sql(sentences_sql)
|
|
75
|
+
|
|
76
|
+
num_records = sentences_df.count()
|
|
77
|
+
query_batch_size = 100000
|
|
78
|
+
|
|
79
|
+
#pool = model.start_multi_process_pool()
|
|
80
|
+
|
|
81
|
+
for x in range(0, num_records, query_batch_size):
|
|
82
|
+
print(f"Running Batch Starting at {x}")
|
|
83
|
+
sentences_sql = f" SELECT id, content FROM {schema}.rag_{tc.model_id} ORDER BY id LIMIT {str(query_batch_size)} OFFSET {str(x)}"
|
|
84
|
+
sentences_df = ss.sql(sentences_sql)
|
|
85
|
+
content_list = [r["content"] for r in sentences_df.collect()]
|
|
86
|
+
id_list = [r["id"] for r in sentences_df.collect()]
|
|
87
|
+
#num_records_batch = len(content_list)
|
|
88
|
+
#print("Done Getting Data")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
vector_list = st_model.encode(content_list, normalize_embeddings=True, show_progress_bar=True)
|
|
92
|
+
#vector_list = model.encode_multi_process(content_list, pool)
|
|
93
|
+
|
|
94
|
+
#print("Done Encoding")
|
|
95
|
+
|
|
96
|
+
schemaV = StructType([
|
|
97
|
+
StructField("id",IntegerType(),True),
|
|
98
|
+
StructField("vector", ArrayType(FloatType()), False)
|
|
99
|
+
])
|
|
100
|
+
|
|
101
|
+
updated_list = [[r[0], r[1].tolist()] for r in zip(id_list, vector_list)]
|
|
102
|
+
updated_df = ss.createDataFrame(updated_list, schemaV)
|
|
103
|
+
|
|
104
|
+
target_table = DeltaTable.forName(ss, f"{schema}.rag_{tc.model_id}")
|
|
105
|
+
|
|
106
|
+
target_table.alias("t") \
|
|
107
|
+
.merge(
|
|
108
|
+
updated_df.alias("s"),
|
|
109
|
+
't.id = s.id'
|
|
110
|
+
) \
|
|
111
|
+
.whenMatchedUpdate(set = {"vector": "s.vector"}) \
|
|
112
|
+
.execute()
|
|
113
|
+
|
|
114
|
+
ss.sql(f"""
|
|
115
|
+
CREATE FUNCTION IF NOT EXISTS {schema}.cos_sim(a ARRAY<FLOAT>, b ARRAY<FLOAT>)
|
|
116
|
+
RETURNS FLOAT
|
|
117
|
+
LANGUAGE PYTHON
|
|
118
|
+
AS $$
|
|
119
|
+
import numpy as np
|
|
120
|
+
return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
|
|
121
|
+
$$
|
|
122
|
+
""")
|
|
123
|
+
|
|
124
|
+
def rag_delta(tc: tenant_client.TenantClient, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
|
|
125
|
+
|
|
126
|
+
schema = tc.schema
|
|
127
|
+
if replica_schema is not None:
|
|
128
|
+
schema = replica_schema
|
|
129
|
+
|
|
130
|
+
if tc.spark_client is None:
|
|
131
|
+
print("Instantiate Spark Client First")
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
ss = tc.spark_client.spark_session
|
|
135
|
+
|
|
136
|
+
if isinstance(emb_model, SentenceTransformer):
|
|
137
|
+
vector_list = emb_model.encode(question, normalize_embeddings=True).tolist()
|
|
138
|
+
elif isinstance(emb_model, Embeddings):
|
|
139
|
+
vector_list = emb_model.embed_query(question)
|
|
140
|
+
else:
|
|
141
|
+
print("Invalid Embedding Model Type")
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
if not isinstance(chat_model, BaseChatModel):
|
|
145
|
+
print("Invalid Chat Model Type")
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
#print(vector_list)
|
|
149
|
+
vector_list = [str(x) for x in vector_list]
|
|
150
|
+
#print(vector_list)
|
|
151
|
+
vector_sql = ", ".join(vector_list)
|
|
152
|
+
#print(vector_sql)
|
|
153
|
+
|
|
154
|
+
results = ss.sql(f"""
|
|
155
|
+
SELECT content, reduce(zip_with(vector, cast(array({vector_sql}) as array<float>), (x,y) -> x*y), float(0.0), (acc,x) -> acc + x) score
|
|
156
|
+
FROM {schema}.rag_{tc.model_id}
|
|
157
|
+
ORDER BY score DESC
|
|
158
|
+
LIMIT {k}
|
|
159
|
+
""")
|
|
160
|
+
|
|
161
|
+
loader = PySparkDataFrameLoader(ss, results, page_content_column="content")
|
|
162
|
+
documents = loader.load()
|
|
163
|
+
docs_content = "\n\n".join(doc.page_content for doc in documents)
|
|
164
|
+
|
|
165
|
+
#print(docs_content)
|
|
166
|
+
|
|
167
|
+
prompt = hub.pull("rlm/rag-prompt")
|
|
168
|
+
|
|
169
|
+
output_parser = StrOutputParser()
|
|
170
|
+
|
|
171
|
+
chain = prompt | chat_model | output_parser
|
|
172
|
+
|
|
173
|
+
response = chain.invoke(
|
|
174
|
+
{
|
|
175
|
+
"context": docs_content,
|
|
176
|
+
"question": question
|
|
177
|
+
}
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
return response
|
|
181
|
+
|
|
182
|
+
def dep_rag_delta(tc: tenant_client.TenantClient, st_model: SentenceTransformer, question, k=5, replica_schema=None):
|
|
183
|
+
|
|
184
|
+
schema = tc.schema
|
|
185
|
+
if replica_schema is not None:
|
|
186
|
+
schema = replica_schema
|
|
187
|
+
|
|
188
|
+
if tc.spark_client is None:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
ss = tc.spark_client.spark_session
|
|
192
|
+
|
|
193
|
+
vector_list = st_model.encode(question, normalize_embeddings=True).tolist()
|
|
194
|
+
|
|
195
|
+
#print(vector_list)
|
|
196
|
+
vector_list = [str(x) for x in vector_list]
|
|
197
|
+
#print(vector_list)
|
|
198
|
+
vector_sql = ", ".join(vector_list)
|
|
199
|
+
#print(vector_sql)
|
|
200
|
+
|
|
201
|
+
results = ss.sql(f"""
|
|
202
|
+
SELECT content, reduce(zip_with(vector, cast(array({vector_sql}) as array<float>), (x,y) -> x*y), float(0.0), (acc,x) -> acc + x) score
|
|
203
|
+
FROM {schema}.rag_{tc.model_id}
|
|
204
|
+
ORDER BY score DESC
|
|
205
|
+
LIMIT {k}
|
|
206
|
+
""")
|
|
207
|
+
|
|
208
|
+
return results
|
|
209
|
+
|
|
210
|
+
def __create_rag_table_sql(schema, model_id):
|
|
211
|
+
return f"CREATE OR REPLACE TABLE {schema}.rag_{model_id} (id BIGINT GENERATED BY DEFAULT AS IDENTITY, content STRING, type string, concept_id string, vector ARRAY<FLOAT>) TBLPROPERTIES (delta.enableChangeDataFeed = true)"
|
|
212
|
+
|
|
213
|
+
def __replicate_to_catalog_sql(base_schema, target_schema, model_id):
|
|
214
|
+
move_sql = f"INSERT INTO {target_schema}.rag_{model_id} (content, concept_id, type)"
|
|
215
|
+
move_sql += f" SELECT content, concept_id, type FROM {base_schema}.rag_{model_id}"
|
|
216
|
+
return move_sql
|
|
217
|
+
|
|
218
|
+
def __generate_sentence_sql_concept_literals(concepts, schema, model_id):
|
|
219
|
+
statements = []
|
|
220
|
+
for con in concepts:
|
|
221
|
+
sql = f"'This is a {con['label']}. '"
|
|
222
|
+
#sql += " || 'It is identified by ' || split(cid._conceptid,'#')[1] || '. '"
|
|
223
|
+
sql += " || 'It is identified by ' || cid._plain_conceptid || '. '"
|
|
224
|
+
|
|
225
|
+
#sql_from = f"{con['con_table_name']} cid"
|
|
226
|
+
sql_from = f"(SELECT _conceptid, _plain_conceptid FROM {con['prop_table_name']} GROUP BY _conceptid, _plain_conceptid) cid"
|
|
227
|
+
for prop in con["properties"]:
|
|
228
|
+
|
|
229
|
+
sql_from += f" LEFT JOIN {con['prop_table_name']} AS {prop['label']}"
|
|
230
|
+
sql_from += f" ON cid._conceptid = {prop['label']}._conceptid"
|
|
231
|
+
sql_from += f" AND {prop['label']}.type = 'l'"
|
|
232
|
+
sql_from += f" AND {prop['label']}.name = '{prop['name']}'"
|
|
233
|
+
|
|
234
|
+
sql += f" || 'The {prop['label']} is ' || ifnull(any_value({prop['label']}.value) IGNORE NULLS, 'unknown') || '. '"
|
|
235
|
+
|
|
236
|
+
full_sql = f"INSERT INTO {schema}.rag_{model_id} (content, concept_id, type)"
|
|
237
|
+
full_sql += f" SELECT {sql} content, cid._conceptid concept_id, 'c' type FROM {sql_from} GROUP BY cid._conceptid, cid._plain_conceptid"
|
|
238
|
+
|
|
239
|
+
statements.append(full_sql)
|
|
240
|
+
#test_df = spark.sql(full_sql)
|
|
241
|
+
return statements
|
|
242
|
+
|
|
243
|
+
def __generate_sentence_sql_concept_relations(concepts, schema, model_id):
|
|
244
|
+
statements = []
|
|
245
|
+
for con in concepts:
|
|
246
|
+
for rel in con["relations"]:
|
|
247
|
+
sql_from = f"{con['prop_table_name']} rel"
|
|
248
|
+
sql_from += f" INNER JOIN (SELECT _conceptid, _plain_conceptid FROM {rel['target_table_name']} GROUP BY _conceptid, _plain_conceptid) cid"
|
|
249
|
+
sql_from += f" ON rel.value = cid._conceptid"
|
|
250
|
+
sql_from += f" AND rel.type = 'r'"
|
|
251
|
+
sql_from += f" AND rel.name = '{rel['name']}'"
|
|
252
|
+
|
|
253
|
+
sql = f"'The {con['label']} identified by ' || rel._plain_conceptid"
|
|
254
|
+
sql += f" || ' has a relationship called {rel['label']} that connects it to one or more {rel['target_con_label']} identified by '"
|
|
255
|
+
#sql += " || concat_ws(', ', array_agg(split(value, '#')[1])) || '. '"
|
|
256
|
+
sql += " || concat_ws(', ', array_agg(cid._plain_conceptid)) || '. '"
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
full_sql = f"INSERT INTO {schema}.rag_{model_id} (content, concept_id, type)"
|
|
260
|
+
full_sql += f" SELECT {sql} content, rel._conceptid concept_id, 'e' type FROM {sql_from} GROUP BY rel._conceptid, rel._plain_conceptid"
|
|
261
|
+
|
|
262
|
+
statements.append(full_sql)
|
|
263
|
+
return statements
|
|
264
|
+
|
|
265
|
+
def __get_concept_metadata(tenant_json, schema, model_id, whitelist):
|
|
266
|
+
target_concept_labels = {}
|
|
267
|
+
target_table_names = {}
|
|
268
|
+
for d in tenant_json["domains"]:
|
|
269
|
+
for c in d["concepts"]:
|
|
270
|
+
target_concept_labels[c["uri"]] = d["name"] + " " + c["label"]
|
|
271
|
+
target_table_names[c["uri"]] = {
|
|
272
|
+
"prop": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_np",
|
|
273
|
+
"con": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_c"
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
concepts = []
|
|
277
|
+
#parents = {}
|
|
278
|
+
for d in tenant_json["domains"]:
|
|
279
|
+
for c in d["concepts"]:
|
|
280
|
+
#if whitelist is not None and d["name"] + " " + c["label"] not in whitelist:
|
|
281
|
+
# continue
|
|
282
|
+
con_props = []
|
|
283
|
+
for col in c["properties"]:
|
|
284
|
+
con_props.append({
|
|
285
|
+
#"col_name": d["name"] + "_" + c["label"] + "_" + col["label"],
|
|
286
|
+
"label": col["label"],
|
|
287
|
+
"name": f"{model_id}/{d['name']}/{c['label']}#{col['label']}"
|
|
288
|
+
})
|
|
289
|
+
con_rels = []
|
|
290
|
+
for rel in c["relations"]:
|
|
291
|
+
if whitelist is not None and target_concept_labels[rel["relationTypeUri"]] not in whitelist:
|
|
292
|
+
continue
|
|
293
|
+
con_rels.append({
|
|
294
|
+
"label": rel["label"],
|
|
295
|
+
"name": f"{model_id}/{d['name']}/{c['label']}#{rel['label']}",
|
|
296
|
+
"target_con_label": target_concept_labels[rel["relationTypeUri"]],
|
|
297
|
+
"target_table_name": target_table_names[rel["relationTypeUri"]]["prop"]
|
|
298
|
+
})
|
|
299
|
+
con_parents = []
|
|
300
|
+
for p in c["inheritedConcepts"]:
|
|
301
|
+
con_parents.append(p)
|
|
302
|
+
concepts.append({
|
|
303
|
+
"uri": c["uri"],
|
|
304
|
+
"label": d["name"] + " " + c["label"],
|
|
305
|
+
#"id_column": d["name"] + "_" + c["label"],
|
|
306
|
+
"relations": con_rels,
|
|
307
|
+
"properties": con_props,
|
|
308
|
+
"parents": con_parents,
|
|
309
|
+
#"table_name": "data_" + k.model_id + "_" + d["name"] + "_" + c["label"] + "_w",
|
|
310
|
+
#"prop_table_name": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_np",
|
|
311
|
+
#"con_table_name": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_c",
|
|
312
|
+
"prop_table_name": target_table_names[c["uri"]]["prop"],
|
|
313
|
+
"con_table_name": target_table_names[c["uri"]]["con"]
|
|
314
|
+
})
|
|
315
|
+
|
|
316
|
+
for ci, c in enumerate(concepts):
|
|
317
|
+
if len(c["parents"]) > 0:
|
|
318
|
+
for p in c["parents"]:
|
|
319
|
+
for a in concepts:
|
|
320
|
+
if a["uri"] == p:
|
|
321
|
+
concepts[ci]["properties"].extend(a["properties"])
|
|
322
|
+
#concepts[ci]["properties"] = list(set(concepts[ci]["properties"]))
|
|
323
|
+
|
|
324
|
+
out_concepts = []
|
|
325
|
+
for c in concepts:
|
|
326
|
+
if whitelist is not None and c["label"] not in whitelist:
|
|
327
|
+
continue
|
|
328
|
+
out_concepts.append(c)
|
|
329
|
+
|
|
330
|
+
return out_concepts
|
|
@@ -441,10 +441,10 @@ class TenantClient:
|
|
|
441
441
|
|
|
442
442
|
return ai_query.followup_question(followup_question,
|
|
443
443
|
data,
|
|
444
|
-
question_name,
|
|
444
|
+
question_name,
|
|
445
|
+
None,
|
|
445
446
|
override_model=override_model,
|
|
446
|
-
|
|
447
|
-
debug=debug)
|
|
447
|
+
)
|
|
448
448
|
|
|
449
449
|
def process_question_results(self, question_def):
|
|
450
450
|
|
|
@@ -1018,7 +1018,7 @@ class TenantClient:
|
|
|
1018
1018
|
datasource_label (string): Label of datasource to use.
|
|
1019
1019
|
table_name (string): Name of table to use from specified datasource.
|
|
1020
1020
|
"""
|
|
1021
|
-
|
|
1021
|
+
data_source_id = 0
|
|
1022
1022
|
existing_datasource = self.list_data_sources()
|
|
1023
1023
|
for d in existing_datasource["used"]:
|
|
1024
1024
|
if datasource_label.lower() == d["name"].lower():
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: kobai-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.8rc2
|
|
4
4
|
Summary: A package that enables interaction with a Kobai tenant.
|
|
5
5
|
Author-email: Ryan Oattes <ryan@kobai.io>
|
|
6
6
|
License: Apache License
|
|
@@ -222,6 +222,7 @@ Requires-Dist: azure-storage-blob
|
|
|
222
222
|
Requires-Dist: langchain-core
|
|
223
223
|
Requires-Dist: langchain-community
|
|
224
224
|
Requires-Dist: langchain_openai
|
|
225
|
+
Requires-Dist: sentence_transformers
|
|
225
226
|
Provides-Extra: dev
|
|
226
227
|
Requires-Dist: black; extra == "dev"
|
|
227
228
|
Requires-Dist: bumpver; extra == "dev"
|
|
@@ -4,13 +4,13 @@ README.md
|
|
|
4
4
|
pyproject.toml
|
|
5
5
|
kobai/__init__.py
|
|
6
6
|
kobai/ai_query.py
|
|
7
|
+
kobai/ai_rag.py
|
|
7
8
|
kobai/databricks_client.py
|
|
8
9
|
kobai/demo_tenant_client.py
|
|
9
10
|
kobai/llm_config.py
|
|
10
11
|
kobai/spark_client.py
|
|
11
12
|
kobai/tenant_api.py
|
|
12
13
|
kobai/tenant_client.py
|
|
13
|
-
kobai/test.py
|
|
14
14
|
kobai_sdk.egg-info/PKG-INFO
|
|
15
15
|
kobai_sdk.egg-info/SOURCES.txt
|
|
16
16
|
kobai_sdk.egg-info/dependency_links.txt
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "kobai-sdk"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.8rc2"
|
|
8
8
|
description = "A package that enables interaction with a Kobai tenant."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [{ name = "Ryan Oattes", email = "ryan@kobai.io" }]
|
|
@@ -25,7 +25,8 @@ dependencies = [
|
|
|
25
25
|
"azure-storage-blob",
|
|
26
26
|
"langchain-core",
|
|
27
27
|
"langchain-community",
|
|
28
|
-
"langchain_openai"
|
|
28
|
+
"langchain_openai",
|
|
29
|
+
"sentence_transformers"
|
|
29
30
|
]
|
|
30
31
|
requires-python = ">=3.9"
|
|
31
32
|
|
kobai_sdk-0.2.7/kobai/test.py
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
import llm_config, ai_query
|
|
2
|
-
|
|
3
|
-
llm_config = llm_config.LLMConfig(api_key="sV9LuoA5n0PwqggMXOYMhhZlt56FpgnMXFohimPhD7Ug3CnBLbO8JQQJ99ALACYeBjFXJ3w3AAABACOGZm8X", llm_provider="azure_openai")
|
|
4
|
-
llm_config.get_azure_ad_token()
|
|
5
|
-
ai_query.followup_question_1(question="abc", data={}, question_name="sample", llm_config=llm_config)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|