kobai-sdk 0.2.6__py3-none-any.whl → 0.2.8rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kobai-sdk might be problematic. Click here for more details.
- kobai/ai_query.py +7 -1
- kobai/ai_rag.py +113 -0
- kobai/tenant_client.py +17 -14
- {kobai_sdk-0.2.6.dist-info → kobai_sdk-0.2.8rc1.dist-info}/METADATA +2 -2
- kobai_sdk-0.2.8rc1.dist-info/RECORD +14 -0
- {kobai_sdk-0.2.6.dist-info → kobai_sdk-0.2.8rc1.dist-info}/WHEEL +1 -1
- kobai/test.py +0 -5
- kobai_sdk-0.2.6.dist-info/RECORD +0 -14
- {kobai_sdk-0.2.6.dist-info → kobai_sdk-0.2.8rc1.dist-info}/LICENSE +0 -0
- {kobai_sdk-0.2.6.dist-info → kobai_sdk-0.2.8rc1.dist-info}/top_level.txt +0 -0
kobai/ai_query.py
CHANGED
|
@@ -79,7 +79,13 @@ def followup_question(question, data, question_name, llm_config:llm_config, over
|
|
|
79
79
|
openai_api_version=llm_config.api_version,
|
|
80
80
|
temperature = llm_config.temperature,
|
|
81
81
|
max_tokens = llm_config.max_tokens,
|
|
82
|
-
)
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
chat_model = ChatDatabricks(
|
|
85
|
+
endpoint = llm_config.endpoint,
|
|
86
|
+
temperature = llm_config.temperature,
|
|
87
|
+
max_tokens = llm_config.max_tokens,
|
|
88
|
+
)
|
|
83
89
|
|
|
84
90
|
if llm_config.use_simple_prompt:
|
|
85
91
|
prompt = PromptTemplate.from_template(SIMPLE_PROMPT_TEMPLATE)
|
kobai/ai_rag.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from kobai import tenant_client
|
|
2
|
+
|
|
3
|
+
def generate_sentences(tc: tenant_client.TenantClient, replica_schema=None):
|
|
4
|
+
if tc.spark_client is None:
|
|
5
|
+
return None
|
|
6
|
+
|
|
7
|
+
ss = tc.spark_client.spark_session
|
|
8
|
+
|
|
9
|
+
print("Getting Tenant Config")
|
|
10
|
+
tenant_json = tc.get_tenant_config()
|
|
11
|
+
|
|
12
|
+
concepts = __get_concept_metadata(tenant_json, tc.schema, tc.model_id)
|
|
13
|
+
|
|
14
|
+
print("Dropping and Recreating the RAG Table")
|
|
15
|
+
ss.sql(__create_rag_table_sql(tc.schema, tc.model_id))
|
|
16
|
+
|
|
17
|
+
print("Generating Extraction SQL")
|
|
18
|
+
sql_statements = []
|
|
19
|
+
sql_statements.extend(__generate_sentence_sql_concept_literals(concepts, tc.schema, tc.model_id))
|
|
20
|
+
sql_statements.extend(__generate_sentence_sql_concept_relations(concepts, tc.schema, tc.model_id))
|
|
21
|
+
|
|
22
|
+
print("Running the Extraction")
|
|
23
|
+
for sql_statement in sql_statements:
|
|
24
|
+
ss.sql(sql_statement)
|
|
25
|
+
|
|
26
|
+
if replica_schema is not None:
|
|
27
|
+
print("Replicating Schema")
|
|
28
|
+
ss.sql(__create_rag_table_sql(replica_schema, tc.model_id))
|
|
29
|
+
ss.sql(__replicate_to_catalog_sql(tc.schema, replica_schema, tc.model_id))
|
|
30
|
+
|
|
31
|
+
def __create_rag_table_sql(schema, model_id):
|
|
32
|
+
return f"CREATE OR REPLACE TABLE {schema}.rag_{model_id} (id BIGINT GENERATED BY DEFAULT AS IDENTITY, content STRING, type string, concept_id string, vector ARRAY<FLOAT>) TBLPROPERTIES (delta.enableChangeDataFeed = true)"
|
|
33
|
+
|
|
34
|
+
def __replicate_to_catalog_sql(base_schema, target_schema, model_id):
|
|
35
|
+
move_sql = f"INSERT INTO {target_schema}.rag_{model_id} (content, concept_id, type)"
|
|
36
|
+
move_sql += f" SELECT content, concept_id, type FROM {base_schema}.rag_{model_id}"
|
|
37
|
+
return move_sql
|
|
38
|
+
|
|
39
|
+
def __generate_sentence_sql_concept_literals(concepts, schema, model_id):
|
|
40
|
+
statements = []
|
|
41
|
+
for con in concepts:
|
|
42
|
+
sql = f"'This is a {con['label']}. '"
|
|
43
|
+
sql += " || 'It is identified by ' || split(cid._conceptid,'#')[1] || '. '"
|
|
44
|
+
|
|
45
|
+
sql_from = f"{con['con_table_name']} cid"
|
|
46
|
+
for prop in con["properties"]:
|
|
47
|
+
|
|
48
|
+
sql_from += f" INNER JOIN {con['prop_table_name']} AS {prop['label']}"
|
|
49
|
+
sql_from += f" ON cid._conceptid = {prop['label']}._conceptid"
|
|
50
|
+
sql_from += f" AND {prop['label']}.type = 'l'"
|
|
51
|
+
sql_from += f" AND {prop['label']}.name = '{prop['name']}'"
|
|
52
|
+
|
|
53
|
+
sql += f" || 'The {prop['label']} is ' || any_value({prop['label']}.value) IGNORE NULLS || '. '"
|
|
54
|
+
|
|
55
|
+
full_sql = f"INSERT INTO {schema}.rag_{model_id} (content, concept_id, type)"
|
|
56
|
+
full_sql += f" SELECT {sql} content, cid._conceptid concept_id, 'c' type FROM {sql_from} GROUP BY cid._conceptid"
|
|
57
|
+
|
|
58
|
+
statements.append(full_sql)
|
|
59
|
+
#test_df = spark.sql(full_sql)
|
|
60
|
+
return statements
|
|
61
|
+
|
|
62
|
+
def __generate_sentence_sql_concept_relations(concepts, schema, model_id):
|
|
63
|
+
statements = []
|
|
64
|
+
for con in concepts:
|
|
65
|
+
|
|
66
|
+
sql_from = f"{con['prop_table_name']} "
|
|
67
|
+
for rel in con["relations"]:
|
|
68
|
+
|
|
69
|
+
sql = f"'The {con['label']} identified by ' || split(_conceptid,'#')[1]"
|
|
70
|
+
sql += f" || ' has a relationship called {rel['label']} that connects it to one or more {rel['target_con_label']} identified by '"
|
|
71
|
+
sql += " || concat_ws(', ', array_agg(split(value, '#')[1])) || '. '"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
full_sql = f"INSERT INTO {schema}.rag_{model_id} (content, concept_id, type)"
|
|
75
|
+
full_sql += f" SELECT {sql} content, _conceptid concept_id, 'e' type FROM {sql_from} GROUP BY _conceptid"
|
|
76
|
+
|
|
77
|
+
statements.append(full_sql)
|
|
78
|
+
return statements
|
|
79
|
+
|
|
80
|
+
def __get_concept_metadata(tenant_json, schema, model_id):
|
|
81
|
+
target_concept_labels = {}
|
|
82
|
+
for d in tenant_json["domains"]:
|
|
83
|
+
for c in d["concepts"]:
|
|
84
|
+
target_concept_labels[c["uri"]] = d["name"] + " " + c["label"]
|
|
85
|
+
|
|
86
|
+
concepts = []
|
|
87
|
+
|
|
88
|
+
for d in tenant_json["domains"]:
|
|
89
|
+
for c in d["concepts"]:
|
|
90
|
+
con_props = []
|
|
91
|
+
for col in c["properties"]:
|
|
92
|
+
con_props.append({
|
|
93
|
+
#"col_name": d["name"] + "_" + c["label"] + "_" + col["label"],
|
|
94
|
+
"label": col["label"],
|
|
95
|
+
"name": f"{model_id}/{d['name']}/{c['label']}#{col['label']}"
|
|
96
|
+
})
|
|
97
|
+
con_rels = []
|
|
98
|
+
for rel in c["relations"]:
|
|
99
|
+
con_rels.append({
|
|
100
|
+
"label": rel["label"],
|
|
101
|
+
"name": f"{model_id}/{d['name']}/{c['label']}#{rel['label']}",
|
|
102
|
+
"target_con_label": target_concept_labels[rel["relationTypeUri"]]
|
|
103
|
+
})
|
|
104
|
+
concepts.append({
|
|
105
|
+
"label": d["name"] + " " + c["label"],
|
|
106
|
+
#"id_column": d["name"] + "_" + c["label"],
|
|
107
|
+
"relations": con_rels,
|
|
108
|
+
"properties": con_props,
|
|
109
|
+
#"table_name": "data_" + k.model_id + "_" + d["name"] + "_" + c["label"] + "_w",
|
|
110
|
+
"prop_table_name": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_np",
|
|
111
|
+
"con_table_name": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_c",
|
|
112
|
+
})
|
|
113
|
+
return concepts
|
kobai/tenant_client.py
CHANGED
|
@@ -339,26 +339,26 @@ class TenantClient:
|
|
|
339
339
|
hasRels = True
|
|
340
340
|
if hasProps or hasRels:
|
|
341
341
|
hasEither = True
|
|
342
|
-
con_label = dom["name"] + "_" + con["
|
|
342
|
+
con_label = dom["name"] + "_" + con["label"]
|
|
343
343
|
out_table = con["schema_table"].replace(".data_", ".genie_").replace("_np", "")
|
|
344
344
|
sql = "CREATE OR REPLACE VIEW " + out_table + " "
|
|
345
345
|
sql += "(" + con_label + " COMMENT '" + con["schema_id_sentence"] + "' "
|
|
346
346
|
if hasEither:
|
|
347
347
|
sql += ", "
|
|
348
|
-
from_sql = "(SELECT DISTINCT _conceptid, p1 FROM " + con["schema_table"] + ") AS " + dom["name"] + "_" + con["
|
|
348
|
+
from_sql = "(SELECT DISTINCT _conceptid, p1 FROM " + con["schema_table"] + ") AS " + dom["name"] + "_" + con["label"] + "_ID "
|
|
349
349
|
as_sql = "SELECT DISTINCT " + con_label + "_ID._conceptid " + con_label
|
|
350
350
|
if hasEither:
|
|
351
351
|
as_sql += ", "
|
|
352
352
|
as_props = []
|
|
353
353
|
top_props = []
|
|
354
354
|
for prop in con["properties"]:
|
|
355
|
-
prop_label = con_label + "_" + prop["
|
|
355
|
+
prop_label = con_label + "_" + prop["label"]
|
|
356
356
|
prop_name = self.model_id + "/" + prop["uri"].split("/")[-2] + "/" + prop["uri"].split("/")[-1]
|
|
357
357
|
from_sql += "LEFT JOIN " + con["schema_table"] + " AS " + prop_label + " ON " + prop_label + ".type='l' AND " + prop_label + ".name='" + prop_name + "' AND " + prop_label + ".scenario='' AND " + con_label + "_ID.p1=" + prop_label + ".p1 AND " + con_label + "_ID._conceptid=" + prop_label + "._conceptid "
|
|
358
358
|
as_props.append(prop_label + ".value " + prop_label)
|
|
359
359
|
top_props.append(prop_label + " COMMENT '" + prop["schema_sentence"] + "'")
|
|
360
360
|
for prop in con["relations"]:
|
|
361
|
-
prop_label = con_label + "_" + prop["
|
|
361
|
+
prop_label = con_label + "_" + prop["label"]
|
|
362
362
|
prop_name = self.model_id + "/" + prop["uri"].split("/")[-2] + "/" + prop["uri"].split("/")[-1]
|
|
363
363
|
from_sql += "LEFT JOIN " + con["schema_table"] + " AS " + prop_label + " ON " + prop_label + ".type='r' AND " + prop_label + ".name='" + prop_name + "' AND " + prop_label + ".scenario='' AND " + con_label + "_ID.p1=" + prop_label + ".p1 AND " + con_label + "_ID._conceptid=" + prop_label + "._conceptid "
|
|
364
364
|
as_props.append(prop_label + ".value " + prop_label)
|
|
@@ -371,7 +371,7 @@ class TenantClient:
|
|
|
371
371
|
|
|
372
372
|
|
|
373
373
|
if not_concepts is not None:
|
|
374
|
-
if con["
|
|
374
|
+
if con["label"] in not_concepts:
|
|
375
375
|
continue
|
|
376
376
|
|
|
377
377
|
concept_added = False
|
|
@@ -388,7 +388,7 @@ class TenantClient:
|
|
|
388
388
|
concept_added = True
|
|
389
389
|
sql_list.append({"table": out_table, "sql": sql})
|
|
390
390
|
if concepts is not None:
|
|
391
|
-
if con["
|
|
391
|
+
if con["label"] in concepts and not concept_added:
|
|
392
392
|
if enforce_map and con["map_count"] > 0:
|
|
393
393
|
sql_list.append({"table": out_table, "sql": sql})
|
|
394
394
|
else:
|
|
@@ -441,10 +441,10 @@ class TenantClient:
|
|
|
441
441
|
|
|
442
442
|
return ai_query.followup_question(followup_question,
|
|
443
443
|
data,
|
|
444
|
-
question_name,
|
|
444
|
+
question_name,
|
|
445
|
+
None,
|
|
445
446
|
override_model=override_model,
|
|
446
|
-
|
|
447
|
-
debug=debug)
|
|
447
|
+
)
|
|
448
448
|
|
|
449
449
|
def process_question_results(self, question_def):
|
|
450
450
|
|
|
@@ -621,7 +621,7 @@ class TenantClient:
|
|
|
621
621
|
# Tenant Questions
|
|
622
622
|
########################################
|
|
623
623
|
|
|
624
|
-
def run_question_remote(self, question_id):
|
|
624
|
+
def run_question_remote(self, question_id, dynamic_filters: dict = None):
|
|
625
625
|
|
|
626
626
|
"""
|
|
627
627
|
Returns JSON formatted result of Kobai question.
|
|
@@ -630,10 +630,13 @@ class TenantClient:
|
|
|
630
630
|
question_id (int): Numeric identifier of Kobai question.
|
|
631
631
|
"""
|
|
632
632
|
|
|
633
|
-
uri = '/data-svcs/api/query/' + str(question_id) + '/execute?
|
|
633
|
+
uri = '/data-svcs/api/query/' + str(question_id) + '/execute?' #'/data-svcs/api/query/4518/solution/9/execute/tabular?'
|
|
634
634
|
|
|
635
635
|
queryParams = {'jsontype': 'tableau'}
|
|
636
636
|
|
|
637
|
+
if bool(dynamic_filters):
|
|
638
|
+
queryParams.update(dynamic_filters)
|
|
639
|
+
|
|
637
640
|
uri += urllib.parse.urlencode(queryParams)
|
|
638
641
|
|
|
639
642
|
json={
|
|
@@ -643,7 +646,7 @@ class TenantClient:
|
|
|
643
646
|
|
|
644
647
|
return response.json()
|
|
645
648
|
|
|
646
|
-
def run_question_remote_spark(self, question_id, schema=None):
|
|
649
|
+
def run_question_remote_spark(self, question_id, dynamic_filters: dict = None, schema=None):
|
|
647
650
|
|
|
648
651
|
"""
|
|
649
652
|
Returns result of Kobai question in PySpark Dataframe.
|
|
@@ -655,7 +658,7 @@ class TenantClient:
|
|
|
655
658
|
if not self.__spark_check_init_status():
|
|
656
659
|
return None
|
|
657
660
|
|
|
658
|
-
question_data = self.run_question_remote(question_id)
|
|
661
|
+
question_data = self.run_question_remote(question_id, dynamic_filters)
|
|
659
662
|
|
|
660
663
|
if question_data is None:
|
|
661
664
|
return None
|
|
@@ -1015,7 +1018,7 @@ class TenantClient:
|
|
|
1015
1018
|
datasource_label (string): Label of datasource to use.
|
|
1016
1019
|
table_name (string): Name of table to use from specified datasource.
|
|
1017
1020
|
"""
|
|
1018
|
-
|
|
1021
|
+
data_source_id = 0
|
|
1019
1022
|
existing_datasource = self.list_data_sources()
|
|
1020
1023
|
for d in existing_datasource["used"]:
|
|
1021
1024
|
if datasource_label.lower() == d["name"].lower():
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
kobai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
kobai/ai_query.py,sha256=fMTcfj-6Ma3FRB08VYEDj8PwOEOtFGsJHyQrha5yvPg,4512
|
|
3
|
+
kobai/ai_rag.py,sha256=y_N7qVu8HfUHHZPIyQSO7L995RBeNtDhva7U5HBHSfY,5063
|
|
4
|
+
kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
|
|
5
|
+
kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
|
|
6
|
+
kobai/llm_config.py,sha256=ZFx81cUAOHYZgRoTkTY-utQYaWYlmR8773ZJpj74C1A,1900
|
|
7
|
+
kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
|
|
8
|
+
kobai/tenant_api.py,sha256=9U6UbxpaAb-kpbuADXx3kbkNKaOzYy0I-GGwbpiCCOk,4212
|
|
9
|
+
kobai/tenant_client.py,sha256=AyJ5R2oukEv3q1dcItpojvTUVp5-gwUKvyGjofjBKyc,41821
|
|
10
|
+
kobai_sdk-0.2.8rc1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
11
|
+
kobai_sdk-0.2.8rc1.dist-info/METADATA,sha256=nZTb2svQk01wT32zBZDPKgeYnSAx22YER5YLHEIjoAQ,19167
|
|
12
|
+
kobai_sdk-0.2.8rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
13
|
+
kobai_sdk-0.2.8rc1.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
|
|
14
|
+
kobai_sdk-0.2.8rc1.dist-info/RECORD,,
|
kobai/test.py
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
import llm_config, ai_query
|
|
2
|
-
|
|
3
|
-
llm_config = llm_config.LLMConfig(api_key="sV9LuoA5n0PwqggMXOYMhhZlt56FpgnMXFohimPhD7Ug3CnBLbO8JQQJ99ALACYeBjFXJ3w3AAABACOGZm8X", llm_provider="azure_openai")
|
|
4
|
-
llm_config.get_azure_ad_token()
|
|
5
|
-
ai_query.followup_question_1(question="abc", data={}, question_name="sample", llm_config=llm_config)
|
kobai_sdk-0.2.6.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
kobai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
kobai/ai_query.py,sha256=QciKN643VsoZ5rP9zzxshuSMyXouLfp46WR9g4et1-M,4309
|
|
3
|
-
kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
|
|
4
|
-
kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
|
|
5
|
-
kobai/llm_config.py,sha256=ZFx81cUAOHYZgRoTkTY-utQYaWYlmR8773ZJpj74C1A,1900
|
|
6
|
-
kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
|
|
7
|
-
kobai/tenant_api.py,sha256=9U6UbxpaAb-kpbuADXx3kbkNKaOzYy0I-GGwbpiCCOk,4212
|
|
8
|
-
kobai/tenant_client.py,sha256=LhAZPMYbFdMnE16g9jqdc5w2qSmubLPnW9_VBo90LFw,41696
|
|
9
|
-
kobai/test.py,sha256=LHCKClyFNPgLA1hF62RlDzilaFw2Nd3BWcaLtOc5xdc,320
|
|
10
|
-
kobai_sdk-0.2.6.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
11
|
-
kobai_sdk-0.2.6.dist-info/METADATA,sha256=q3tpys2ZjqVXDl441A4TA-0QE8dagHxZEp_U68H-ePk,19164
|
|
12
|
-
kobai_sdk-0.2.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
13
|
-
kobai_sdk-0.2.6.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
|
|
14
|
-
kobai_sdk-0.2.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|