kobai-sdk 0.2.7__py3-none-any.whl → 0.2.8rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kobai-sdk might be problematic. Click here for more details.
- kobai/ai_query.py +7 -1
- kobai/ai_rag.py +113 -0
- kobai/tenant_client.py +4 -4
- {kobai_sdk-0.2.7.dist-info → kobai_sdk-0.2.8rc1.dist-info}/METADATA +2 -2
- kobai_sdk-0.2.8rc1.dist-info/RECORD +14 -0
- {kobai_sdk-0.2.7.dist-info → kobai_sdk-0.2.8rc1.dist-info}/WHEEL +1 -1
- kobai/test.py +0 -5
- kobai_sdk-0.2.7.dist-info/RECORD +0 -14
- {kobai_sdk-0.2.7.dist-info → kobai_sdk-0.2.8rc1.dist-info}/LICENSE +0 -0
- {kobai_sdk-0.2.7.dist-info → kobai_sdk-0.2.8rc1.dist-info}/top_level.txt +0 -0
kobai/ai_query.py
CHANGED
|
@@ -79,7 +79,13 @@ def followup_question(question, data, question_name, llm_config:llm_config, over
|
|
|
79
79
|
openai_api_version=llm_config.api_version,
|
|
80
80
|
temperature = llm_config.temperature,
|
|
81
81
|
max_tokens = llm_config.max_tokens,
|
|
82
|
-
)
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
chat_model = ChatDatabricks(
|
|
85
|
+
endpoint = llm_config.endpoint,
|
|
86
|
+
temperature = llm_config.temperature,
|
|
87
|
+
max_tokens = llm_config.max_tokens,
|
|
88
|
+
)
|
|
83
89
|
|
|
84
90
|
if llm_config.use_simple_prompt:
|
|
85
91
|
prompt = PromptTemplate.from_template(SIMPLE_PROMPT_TEMPLATE)
|
kobai/ai_rag.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from kobai import tenant_client
|
|
2
|
+
|
|
3
|
+
def generate_sentences(tc: tenant_client.TenantClient, replica_schema=None):
|
|
4
|
+
if tc.spark_client is None:
|
|
5
|
+
return None
|
|
6
|
+
|
|
7
|
+
ss = tc.spark_client.spark_session
|
|
8
|
+
|
|
9
|
+
print("Getting Tenant Config")
|
|
10
|
+
tenant_json = tc.get_tenant_config()
|
|
11
|
+
|
|
12
|
+
concepts = __get_concept_metadata(tenant_json, tc.schema, tc.model_id)
|
|
13
|
+
|
|
14
|
+
print("Dropping and Recreating the RAG Table")
|
|
15
|
+
ss.sql(__create_rag_table_sql(tc.schema, tc.model_id))
|
|
16
|
+
|
|
17
|
+
print("Generating Extraction SQL")
|
|
18
|
+
sql_statements = []
|
|
19
|
+
sql_statements.extend(__generate_sentence_sql_concept_literals(concepts, tc.schema, tc.model_id))
|
|
20
|
+
sql_statements.extend(__generate_sentence_sql_concept_relations(concepts, tc.schema, tc.model_id))
|
|
21
|
+
|
|
22
|
+
print("Running the Extraction")
|
|
23
|
+
for sql_statement in sql_statements:
|
|
24
|
+
ss.sql(sql_statement)
|
|
25
|
+
|
|
26
|
+
if replica_schema is not None:
|
|
27
|
+
print("Replicating Schema")
|
|
28
|
+
ss.sql(__create_rag_table_sql(replica_schema, tc.model_id))
|
|
29
|
+
ss.sql(__replicate_to_catalog_sql(tc.schema, replica_schema, tc.model_id))
|
|
30
|
+
|
|
31
|
+
def __create_rag_table_sql(schema, model_id):
|
|
32
|
+
return f"CREATE OR REPLACE TABLE {schema}.rag_{model_id} (id BIGINT GENERATED BY DEFAULT AS IDENTITY, content STRING, type string, concept_id string, vector ARRAY<FLOAT>) TBLPROPERTIES (delta.enableChangeDataFeed = true)"
|
|
33
|
+
|
|
34
|
+
def __replicate_to_catalog_sql(base_schema, target_schema, model_id):
|
|
35
|
+
move_sql = f"INSERT INTO {target_schema}.rag_{model_id} (content, concept_id, type)"
|
|
36
|
+
move_sql += f" SELECT content, concept_id, type FROM {base_schema}.rag_{model_id}"
|
|
37
|
+
return move_sql
|
|
38
|
+
|
|
39
|
+
def __generate_sentence_sql_concept_literals(concepts, schema, model_id):
|
|
40
|
+
statements = []
|
|
41
|
+
for con in concepts:
|
|
42
|
+
sql = f"'This is a {con['label']}. '"
|
|
43
|
+
sql += " || 'It is identified by ' || split(cid._conceptid,'#')[1] || '. '"
|
|
44
|
+
|
|
45
|
+
sql_from = f"{con['con_table_name']} cid"
|
|
46
|
+
for prop in con["properties"]:
|
|
47
|
+
|
|
48
|
+
sql_from += f" INNER JOIN {con['prop_table_name']} AS {prop['label']}"
|
|
49
|
+
sql_from += f" ON cid._conceptid = {prop['label']}._conceptid"
|
|
50
|
+
sql_from += f" AND {prop['label']}.type = 'l'"
|
|
51
|
+
sql_from += f" AND {prop['label']}.name = '{prop['name']}'"
|
|
52
|
+
|
|
53
|
+
sql += f" || 'The {prop['label']} is ' || any_value({prop['label']}.value) IGNORE NULLS || '. '"
|
|
54
|
+
|
|
55
|
+
full_sql = f"INSERT INTO {schema}.rag_{model_id} (content, concept_id, type)"
|
|
56
|
+
full_sql += f" SELECT {sql} content, cid._conceptid concept_id, 'c' type FROM {sql_from} GROUP BY cid._conceptid"
|
|
57
|
+
|
|
58
|
+
statements.append(full_sql)
|
|
59
|
+
#test_df = spark.sql(full_sql)
|
|
60
|
+
return statements
|
|
61
|
+
|
|
62
|
+
def __generate_sentence_sql_concept_relations(concepts, schema, model_id):
|
|
63
|
+
statements = []
|
|
64
|
+
for con in concepts:
|
|
65
|
+
|
|
66
|
+
sql_from = f"{con['prop_table_name']} "
|
|
67
|
+
for rel in con["relations"]:
|
|
68
|
+
|
|
69
|
+
sql = f"'The {con['label']} identified by ' || split(_conceptid,'#')[1]"
|
|
70
|
+
sql += f" || ' has a relationship called {rel['label']} that connects it to one or more {rel['target_con_label']} identified by '"
|
|
71
|
+
sql += " || concat_ws(', ', array_agg(split(value, '#')[1])) || '. '"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
full_sql = f"INSERT INTO {schema}.rag_{model_id} (content, concept_id, type)"
|
|
75
|
+
full_sql += f" SELECT {sql} content, _conceptid concept_id, 'e' type FROM {sql_from} GROUP BY _conceptid"
|
|
76
|
+
|
|
77
|
+
statements.append(full_sql)
|
|
78
|
+
return statements
|
|
79
|
+
|
|
80
|
+
def __get_concept_metadata(tenant_json, schema, model_id):
|
|
81
|
+
target_concept_labels = {}
|
|
82
|
+
for d in tenant_json["domains"]:
|
|
83
|
+
for c in d["concepts"]:
|
|
84
|
+
target_concept_labels[c["uri"]] = d["name"] + " " + c["label"]
|
|
85
|
+
|
|
86
|
+
concepts = []
|
|
87
|
+
|
|
88
|
+
for d in tenant_json["domains"]:
|
|
89
|
+
for c in d["concepts"]:
|
|
90
|
+
con_props = []
|
|
91
|
+
for col in c["properties"]:
|
|
92
|
+
con_props.append({
|
|
93
|
+
#"col_name": d["name"] + "_" + c["label"] + "_" + col["label"],
|
|
94
|
+
"label": col["label"],
|
|
95
|
+
"name": f"{model_id}/{d['name']}/{c['label']}#{col['label']}"
|
|
96
|
+
})
|
|
97
|
+
con_rels = []
|
|
98
|
+
for rel in c["relations"]:
|
|
99
|
+
con_rels.append({
|
|
100
|
+
"label": rel["label"],
|
|
101
|
+
"name": f"{model_id}/{d['name']}/{c['label']}#{rel['label']}",
|
|
102
|
+
"target_con_label": target_concept_labels[rel["relationTypeUri"]]
|
|
103
|
+
})
|
|
104
|
+
concepts.append({
|
|
105
|
+
"label": d["name"] + " " + c["label"],
|
|
106
|
+
#"id_column": d["name"] + "_" + c["label"],
|
|
107
|
+
"relations": con_rels,
|
|
108
|
+
"properties": con_props,
|
|
109
|
+
#"table_name": "data_" + k.model_id + "_" + d["name"] + "_" + c["label"] + "_w",
|
|
110
|
+
"prop_table_name": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_np",
|
|
111
|
+
"con_table_name": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_c",
|
|
112
|
+
})
|
|
113
|
+
return concepts
|
kobai/tenant_client.py
CHANGED
|
@@ -441,10 +441,10 @@ class TenantClient:
|
|
|
441
441
|
|
|
442
442
|
return ai_query.followup_question(followup_question,
|
|
443
443
|
data,
|
|
444
|
-
question_name,
|
|
444
|
+
question_name,
|
|
445
|
+
None,
|
|
445
446
|
override_model=override_model,
|
|
446
|
-
|
|
447
|
-
debug=debug)
|
|
447
|
+
)
|
|
448
448
|
|
|
449
449
|
def process_question_results(self, question_def):
|
|
450
450
|
|
|
@@ -1018,7 +1018,7 @@ class TenantClient:
|
|
|
1018
1018
|
datasource_label (string): Label of datasource to use.
|
|
1019
1019
|
table_name (string): Name of table to use from specified datasource.
|
|
1020
1020
|
"""
|
|
1021
|
-
|
|
1021
|
+
data_source_id = 0
|
|
1022
1022
|
existing_datasource = self.list_data_sources()
|
|
1023
1023
|
for d in existing_datasource["used"]:
|
|
1024
1024
|
if datasource_label.lower() == d["name"].lower():
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
kobai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
kobai/ai_query.py,sha256=fMTcfj-6Ma3FRB08VYEDj8PwOEOtFGsJHyQrha5yvPg,4512
|
|
3
|
+
kobai/ai_rag.py,sha256=y_N7qVu8HfUHHZPIyQSO7L995RBeNtDhva7U5HBHSfY,5063
|
|
4
|
+
kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
|
|
5
|
+
kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
|
|
6
|
+
kobai/llm_config.py,sha256=ZFx81cUAOHYZgRoTkTY-utQYaWYlmR8773ZJpj74C1A,1900
|
|
7
|
+
kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
|
|
8
|
+
kobai/tenant_api.py,sha256=9U6UbxpaAb-kpbuADXx3kbkNKaOzYy0I-GGwbpiCCOk,4212
|
|
9
|
+
kobai/tenant_client.py,sha256=AyJ5R2oukEv3q1dcItpojvTUVp5-gwUKvyGjofjBKyc,41821
|
|
10
|
+
kobai_sdk-0.2.8rc1.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
11
|
+
kobai_sdk-0.2.8rc1.dist-info/METADATA,sha256=nZTb2svQk01wT32zBZDPKgeYnSAx22YER5YLHEIjoAQ,19167
|
|
12
|
+
kobai_sdk-0.2.8rc1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
13
|
+
kobai_sdk-0.2.8rc1.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
|
|
14
|
+
kobai_sdk-0.2.8rc1.dist-info/RECORD,,
|
kobai/test.py
DELETED
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
import llm_config, ai_query
|
|
2
|
-
|
|
3
|
-
llm_config = llm_config.LLMConfig(api_key="sV9LuoA5n0PwqggMXOYMhhZlt56FpgnMXFohimPhD7Ug3CnBLbO8JQQJ99ALACYeBjFXJ3w3AAABACOGZm8X", llm_provider="azure_openai")
|
|
4
|
-
llm_config.get_azure_ad_token()
|
|
5
|
-
ai_query.followup_question_1(question="abc", data={}, question_name="sample", llm_config=llm_config)
|
kobai_sdk-0.2.7.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
kobai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
kobai/ai_query.py,sha256=QciKN643VsoZ5rP9zzxshuSMyXouLfp46WR9g4et1-M,4309
|
|
3
|
-
kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
|
|
4
|
-
kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
|
|
5
|
-
kobai/llm_config.py,sha256=ZFx81cUAOHYZgRoTkTY-utQYaWYlmR8773ZJpj74C1A,1900
|
|
6
|
-
kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
|
|
7
|
-
kobai/tenant_api.py,sha256=9U6UbxpaAb-kpbuADXx3kbkNKaOzYy0I-GGwbpiCCOk,4212
|
|
8
|
-
kobai/tenant_client.py,sha256=R74NZpeo547vLpi_pt3QWT7I7phPy25kG_ZPuJhdEAg,41846
|
|
9
|
-
kobai/test.py,sha256=LHCKClyFNPgLA1hF62RlDzilaFw2Nd3BWcaLtOc5xdc,320
|
|
10
|
-
kobai_sdk-0.2.7.dist-info/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
11
|
-
kobai_sdk-0.2.7.dist-info/METADATA,sha256=EUiOF9YK0TElH6fZpb5125yy3TCFyyxJryhMgYOFuFo,19164
|
|
12
|
-
kobai_sdk-0.2.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
13
|
-
kobai_sdk-0.2.7.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
|
|
14
|
-
kobai_sdk-0.2.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|