kobai-sdk 0.3.2__py3-none-any.whl → 0.3.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kobai-sdk might be problematic. Click here for more details.
- kobai/genie.py +226 -0
- kobai/tenant_client.py +21 -13
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3rc1.dist-info}/METADATA +17 -1
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3rc1.dist-info}/RECORD +7 -6
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3rc1.dist-info}/WHEEL +0 -0
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3rc1.dist-info}/licenses/LICENSE +0 -0
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3rc1.dist-info}/top_level.txt +0 -0
kobai/genie.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
def get_genie_descriptions(solution_id, structure, schema):
|
|
4
|
+
for di, dom in enumerate(structure["domains"]):
|
|
5
|
+
structure["domains"][di]["label"] = dom["name"]
|
|
6
|
+
|
|
7
|
+
add_episteme_metadata(structure)
|
|
8
|
+
download_inherited_props(structure)
|
|
9
|
+
add_map_count(structure)
|
|
10
|
+
add_semantic_sentences(structure)
|
|
11
|
+
add_schema_sentences(structure, schema, solution_id)
|
|
12
|
+
|
|
13
|
+
#add map count
|
|
14
|
+
|
|
15
|
+
return structure
|
|
16
|
+
|
|
17
|
+
#def get_genie_questions(solution_id, structure):
|
|
18
|
+
|
|
19
|
+
# graph_uri = structure["uri"]
|
|
20
|
+
|
|
21
|
+
# question_config = get_tenant_question_config(solution_id, structure)
|
|
22
|
+
|
|
23
|
+
# questions = []
|
|
24
|
+
# for q in question_config:
|
|
25
|
+
# q_name = q["description"]
|
|
26
|
+
# q_def = json.loads(q["definition"])
|
|
27
|
+
|
|
28
|
+
# if q["published"]:
|
|
29
|
+
# result = get_question_sql(q_def, graph_uri)
|
|
30
|
+
# if result is not None:
|
|
31
|
+
# questions.append({"name": q_name, "sql": result})
|
|
32
|
+
|
|
33
|
+
# return questions
|
|
34
|
+
|
|
35
|
+
############################
|
|
36
|
+
# Config Building
|
|
37
|
+
############################
|
|
38
|
+
|
|
39
|
+
def add_episteme_metadata(structure):
|
|
40
|
+
for dom in structure["domains"]:
|
|
41
|
+
dom["e_id"] = dom["label"]
|
|
42
|
+
for con in dom["concepts"]:
|
|
43
|
+
con["e_id"] = dom["label"] + "_" + con["label"]
|
|
44
|
+
|
|
45
|
+
for prop in con["properties"]:
|
|
46
|
+
prop["e_id"] = dom["label"] + "_" + con["label"] + "_" + prop["label"]
|
|
47
|
+
|
|
48
|
+
for rel in con["relations"]:
|
|
49
|
+
rel["e_id"] = dom["label"] + "_" + con["label"] + "_" + rel["label"]
|
|
50
|
+
rel["e_target_id"] = rel["relationTypeUri"].split("/")[-1].replace("#", "_")
|
|
51
|
+
|
|
52
|
+
def download_inherited_props(structure):
|
|
53
|
+
for dom in structure["domains"]:
|
|
54
|
+
if "concepts" in dom:
|
|
55
|
+
for con in dom["concepts"]:
|
|
56
|
+
recurse_parent_props(con["uri"], structure, con["properties"], con["relations"], visited=[])
|
|
57
|
+
|
|
58
|
+
def recurse_parent_props(uri, structure, props, rels, visited=None):
|
|
59
|
+
visited.append(uri)
|
|
60
|
+
for dom in structure["domains"]:
|
|
61
|
+
for con in dom["concepts"]:
|
|
62
|
+
if con["uri"] == uri:
|
|
63
|
+
for icon in con["inheritedConcepts"]:
|
|
64
|
+
for pdom in structure["domains"]:
|
|
65
|
+
for pcon in pdom["concepts"]:
|
|
66
|
+
if pcon["uri"] == icon:
|
|
67
|
+
for pprop in pcon["properties"]:
|
|
68
|
+
prop_found = False
|
|
69
|
+
for pf in props:
|
|
70
|
+
if pf["uri"] == pprop["uri"]:
|
|
71
|
+
prop_found = True
|
|
72
|
+
if not prop_found:
|
|
73
|
+
props.append(pprop)
|
|
74
|
+
for prel in pcon["relations"]:
|
|
75
|
+
rel_found = False
|
|
76
|
+
for rf in rels:
|
|
77
|
+
if rf["uri"] == prel["uri"]:
|
|
78
|
+
rel_found = True
|
|
79
|
+
if not rel_found:
|
|
80
|
+
rels.append(prel)
|
|
81
|
+
if icon not in visited:
|
|
82
|
+
recurse_parent_props(icon, structure, props, rels, visited)
|
|
83
|
+
|
|
84
|
+
def add_map_count(structure):
|
|
85
|
+
#mapping_defs = get_tenant_mapping_config(structure["solution_id"])
|
|
86
|
+
|
|
87
|
+
for dom in structure["domains"]:
|
|
88
|
+
for con in dom["concepts"]:
|
|
89
|
+
map_count = 0
|
|
90
|
+
#for md in mapping_defs:
|
|
91
|
+
for md in structure["mappingDefs"]:
|
|
92
|
+
if con["uri"] == md["conceptTypeUri"]:
|
|
93
|
+
map_count = map_count + 1
|
|
94
|
+
con["map_count"] = map_count
|
|
95
|
+
|
|
96
|
+
#def get_tenant_mapping_config(solution_id):
|
|
97
|
+
# mapping_sql = f"""
|
|
98
|
+
# select s.id, mapd.concept_type_uri
|
|
99
|
+
# from studio.solutions s
|
|
100
|
+
# inner join studio.mapping_defs mapd
|
|
101
|
+
# on s.id = mapd.solution_id
|
|
102
|
+
# where s.id = {solution_id}
|
|
103
|
+
# """
|
|
104
|
+
|
|
105
|
+
# mapping_rows = app_db.run_query(mapping_sql)
|
|
106
|
+
|
|
107
|
+
# mapping_defs = []
|
|
108
|
+
# for row in mapping_rows:
|
|
109
|
+
# mapping_def = {"solution_id": row[0], "concept_type_uri": row[1]}
|
|
110
|
+
# mapping_defs.append(mapping_def)
|
|
111
|
+
# return mapping_defs
|
|
112
|
+
|
|
113
|
+
############################
|
|
114
|
+
# Question Config
|
|
115
|
+
############################
|
|
116
|
+
|
|
117
|
+
def get_tenant_question_config(solution_id, structure):
|
|
118
|
+
#question_sql = f"""
|
|
119
|
+
# select s.id solution_id, q.id, q.description, q.definition, a.id api_id
|
|
120
|
+
# from studio.models m
|
|
121
|
+
# inner join studio.queries q
|
|
122
|
+
# on m.id = q.model_id
|
|
123
|
+
# inner join studio.solutions s
|
|
124
|
+
# on m.id = s.model_id
|
|
125
|
+
# left join studio.api a
|
|
126
|
+
# on q.id = a.query_id
|
|
127
|
+
# where s.id = {solution_id}
|
|
128
|
+
#"""
|
|
129
|
+
|
|
130
|
+
#question_rows = app_db.run_query(question_sql)
|
|
131
|
+
|
|
132
|
+
question_defs = []
|
|
133
|
+
for row in structure["queries"]:
|
|
134
|
+
print(row)
|
|
135
|
+
is_published = False
|
|
136
|
+
if row[4] is not None and row[4] != "":
|
|
137
|
+
is_published = True
|
|
138
|
+
question_def = {"question_id": row[1], "description": row[2], "definition": row[3], "published": is_published}
|
|
139
|
+
question_defs.append(question_def)
|
|
140
|
+
return question_defs
|
|
141
|
+
|
|
142
|
+
############################
|
|
143
|
+
# Sentences
|
|
144
|
+
############################
|
|
145
|
+
|
|
146
|
+
def add_semantic_sentences(structure):
|
|
147
|
+
for dom in structure["domains"]:
|
|
148
|
+
concept_list = []
|
|
149
|
+
|
|
150
|
+
for con in dom["concepts"]:
|
|
151
|
+
concept_sentence = ""
|
|
152
|
+
property_list = []
|
|
153
|
+
relation_list = []
|
|
154
|
+
concept_list.append(con["label"])
|
|
155
|
+
|
|
156
|
+
for prop in con["properties"]:
|
|
157
|
+
property_list.append(prop["label"])
|
|
158
|
+
property_sentence = "The " + prop["label"] + " for the " + dom["label"] + " " + con["label"] + "."
|
|
159
|
+
prop["sementic_sentence"] = property_sentence
|
|
160
|
+
|
|
161
|
+
for rel in con["relations"]:
|
|
162
|
+
relation_list.append(rel["label"])
|
|
163
|
+
|
|
164
|
+
concept_sentence = "The " + con["label"] + " concept contains details about " + smart_comma_formatting(property_list) + "."
|
|
165
|
+
con["semantic_sentence"] = concept_sentence
|
|
166
|
+
|
|
167
|
+
domain_sentence = "The " + dom["label"] + " domain contains concepts called " + smart_comma_formatting(concept_list) + "."
|
|
168
|
+
dom["semantic_sentence"] = domain_sentence
|
|
169
|
+
|
|
170
|
+
def add_schema_sentences(structure, schema, tenant_id):
|
|
171
|
+
for dom in structure["domains"]:
|
|
172
|
+
|
|
173
|
+
for con in dom["concepts"]:
|
|
174
|
+
neighbours_list = []
|
|
175
|
+
concept_sentence = "The " + schema + ".data_" + tenant_id + "_" + dom["label"] + "_" + con["label"] + "_w table contains information about " + dom["label"] + " " + con["label"] + "s. "
|
|
176
|
+
concept_sentence += "This refers to a class " + con["label"] + " in a domain of similar classes called " + dom["label"] + ". "
|
|
177
|
+
concept_sentence += "It includes details such as "
|
|
178
|
+
property_list = []
|
|
179
|
+
|
|
180
|
+
if len(con["properties"]) > 0:
|
|
181
|
+
for prop in con["properties"]:
|
|
182
|
+
property_list.append(prop["label"])
|
|
183
|
+
property_sentence = "The " + prop["label"] + " for the " + dom["label"] + " " + con["label"] + "."
|
|
184
|
+
prop["schema_sentence"] = property_sentence
|
|
185
|
+
concept_sentence += smart_comma_formatting(property_list) + ". "
|
|
186
|
+
|
|
187
|
+
if len(con["relations"]) > 0:
|
|
188
|
+
for rel in con["relations"]:
|
|
189
|
+
rel_dom = rel["relationTypeUri"].split("/")[-1].split("#")[0]
|
|
190
|
+
rel_con = rel["relationTypeUri"].split("/")[-1].split("#")[1]
|
|
191
|
+
rel_table = schema + ".data_" + tenant_id + "_" + rel_dom + "_" + rel_con + "_w"
|
|
192
|
+
relation_sentence = "A relationship called " + rel["label"] + " connecting " + dom["label"] + " " + con["label"] + "s to " + rel_dom + " " + rel_con + "s. "
|
|
193
|
+
relation_sentence += "A key connecting this table to the unique identifier of the " + rel_table + " table. "
|
|
194
|
+
rel["schema_sentence"] = relation_sentence
|
|
195
|
+
neighbours_list.append(rel_dom + " " + rel_con)
|
|
196
|
+
#structure["domains"][idom]["concepts"][icon]["relations"][irel].pop("target", None)
|
|
197
|
+
if len(neighbours_list) > 0:
|
|
198
|
+
concept_sentence += "For context, it is connected to neighbor classes like " + smart_comma_formatting(neighbours_list)
|
|
199
|
+
|
|
200
|
+
con["schema_sentence"] = concept_sentence
|
|
201
|
+
con["schema_id_sentence"] = "The unique identifier for each " + dom["label"] + " " + con["label"] + ". "
|
|
202
|
+
con["schema_table"] = schema + ".data_" + tenant_id + "_" + dom["label"] + "_" + con["label"] + "_np"
|
|
203
|
+
|
|
204
|
+
def smart_comma_formatting(items):
|
|
205
|
+
if items is None:
|
|
206
|
+
return ""
|
|
207
|
+
match len(items):
|
|
208
|
+
case 0:
|
|
209
|
+
return ""
|
|
210
|
+
case 1:
|
|
211
|
+
return items[0]
|
|
212
|
+
case 2:
|
|
213
|
+
return items[0] + " and " + items[1]
|
|
214
|
+
case _:
|
|
215
|
+
return ", ".join(items[0: -1]) + " and " + items[-1]
|
|
216
|
+
|
|
217
|
+
def label_from_url(inpt):
|
|
218
|
+
try:
|
|
219
|
+
return inpt.split("#")[1].replace("_", " ")
|
|
220
|
+
except IndexError:
|
|
221
|
+
return inpt
|
|
222
|
+
|
|
223
|
+
############################
|
|
224
|
+
# Mapping
|
|
225
|
+
############################
|
|
226
|
+
|
kobai/tenant_client.py
CHANGED
|
@@ -11,6 +11,7 @@ from langchain_core.language_models.chat_models import BaseChatModel
|
|
|
11
11
|
from langchain_core.embeddings import Embeddings
|
|
12
12
|
|
|
13
13
|
from . import spark_client, databricks_client, ai_query, tenant_api, ai_rag
|
|
14
|
+
from .genie import get_genie_descriptions
|
|
14
15
|
|
|
15
16
|
class TenantClient:
|
|
16
17
|
|
|
@@ -228,7 +229,8 @@ class TenantClient:
|
|
|
228
229
|
#print(t["sql"])
|
|
229
230
|
try:
|
|
230
231
|
self.spark_client._SparkClient__run_sql(t["sql"])
|
|
231
|
-
except:
|
|
232
|
+
except Exception as e:
|
|
233
|
+
print("Error creating view.", e)
|
|
232
234
|
print(t["sql"])
|
|
233
235
|
print("Updated " + str(len(tables)) + " views for Genie.")
|
|
234
236
|
|
|
@@ -265,7 +267,6 @@ class TenantClient:
|
|
|
265
267
|
"""
|
|
266
268
|
Use the Databricks Client to create a Genie Data Room for this tenant.
|
|
267
269
|
"""
|
|
268
|
-
|
|
269
270
|
data_rooms = self.databricks_client._DatabricksClient__api_get("/api/2.0/data-rooms")
|
|
270
271
|
room_id = "-1"
|
|
271
272
|
if data_rooms:
|
|
@@ -325,10 +326,13 @@ class TenantClient:
|
|
|
325
326
|
|
|
326
327
|
def __get_descriptions(self):
|
|
327
328
|
|
|
328
|
-
params={"schema": self.schema, "tenant_id": self.model_id}
|
|
329
|
-
response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/descriptions", params=params)
|
|
330
|
-
return response.json()
|
|
329
|
+
#params={"schema": self.schema, "tenant_id": self.model_id}
|
|
330
|
+
#response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/descriptions", params=params)
|
|
331
|
+
#return response.json()
|
|
331
332
|
|
|
333
|
+
tenant_config = self.get_tenant_config()
|
|
334
|
+
descriptions = get_genie_descriptions(self.model_id, tenant_config, self.schema)
|
|
335
|
+
return descriptions
|
|
332
336
|
|
|
333
337
|
def __get_view_sql(self, domains=None, concepts=None, not_concepts=None, enforce_map=True):
|
|
334
338
|
sql_list = []
|
|
@@ -404,16 +408,20 @@ class TenantClient:
|
|
|
404
408
|
|
|
405
409
|
def __get_questions(self):
|
|
406
410
|
|
|
407
|
-
response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/questions")
|
|
411
|
+
#response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/questions")
|
|
412
|
+
|
|
413
|
+
#tenant_config = self.get_tenant_config()
|
|
414
|
+
#questions = get_genie_questions(self.id, tenant_config)
|
|
408
415
|
|
|
409
|
-
return_questions = []
|
|
410
|
-
for q in
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
416
|
+
#return_questions = []
|
|
417
|
+
#for q in questions:
|
|
418
|
+
# sql = q["sql"]
|
|
419
|
+
# sql = sql[2:-2]
|
|
420
|
+
# sql = sql.replace(".data_", ".genie_").replace("_Literals", "").replace("_w", "")
|
|
421
|
+
# return_questions.append({"name": q["name"], "sql": sql})
|
|
415
422
|
|
|
416
|
-
return return_questions
|
|
423
|
+
#return return_questions
|
|
424
|
+
return []
|
|
417
425
|
|
|
418
426
|
########################################
|
|
419
427
|
# RAG Functions
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kobai-sdk
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3rc1
|
|
4
4
|
Summary: A package that enables interaction with a Kobai tenant.
|
|
5
5
|
Author-email: Ryan Oattes <ryan@kobai.io>
|
|
6
6
|
License: Apache License
|
|
@@ -292,6 +292,22 @@ k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
|
|
|
292
292
|
#### Authentication via on-behalf-of flow
|
|
293
293
|
The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
|
|
294
294
|
|
|
295
|
+
3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
|
|
296
|
+
|
|
297
|
+
```python
|
|
298
|
+
k.spark_init_session(spark)
|
|
299
|
+
k.spark_generate_genie_views()
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
4. Initialize a Databricks API client using your Notebook context, and create a Genie Data Rooms environment for this Kobai tenant.
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
notebook_context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
|
|
306
|
+
sql_warehouse = '8834d98a8agffa76'
|
|
307
|
+
|
|
308
|
+
k.databricks_init_notebook(notebook_context, sql_warehouse)
|
|
309
|
+
k.databricks_build_genie()
|
|
310
|
+
```
|
|
295
311
|
|
|
296
312
|
## AI Functionality
|
|
297
313
|
The Kobai SDK enables users to ask follow-up questions based on the results of previous queries. This functionality currently supports models hosted on Databricks and Azure OpenAI.
|
|
@@ -3,12 +3,13 @@ kobai/ai_query.py,sha256=FnXn2pabJpXfTUcJvieVkAgMAjSTH9u5SFR9SJUJ-Lk,9556
|
|
|
3
3
|
kobai/ai_rag.py,sha256=XUq_SnJw17P53Zk75hHJgTryGjHEAyYPwC0r2WtuNp4,14627
|
|
4
4
|
kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
|
|
5
5
|
kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
|
|
6
|
+
kobai/genie.py,sha256=L78QQPnAYHk3SRXPdmrGpkG52jwCO2aG4r0esWdt3Ng,9102
|
|
6
7
|
kobai/ms_authenticate.py,sha256=rlmhtvAaSRBlYmvIBy5epMVa4MBGBLPaMwawu1T_xDQ,2252
|
|
7
8
|
kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
|
|
8
9
|
kobai/tenant_api.py,sha256=Q5yuFd9_V4lo3LWzvYEEO3LpDRWFgQD4TlRPXDTGbiE,4368
|
|
9
|
-
kobai/tenant_client.py,sha256=
|
|
10
|
-
kobai_sdk-0.3.
|
|
11
|
-
kobai_sdk-0.3.
|
|
12
|
-
kobai_sdk-0.3.
|
|
13
|
-
kobai_sdk-0.3.
|
|
14
|
-
kobai_sdk-0.3.
|
|
10
|
+
kobai/tenant_client.py,sha256=Feu8786FFAUhIenLxBWRqWWrrdENzf0t35an95M1nSA,39023
|
|
11
|
+
kobai_sdk-0.3.3rc1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
12
|
+
kobai_sdk-0.3.3rc1.dist-info/METADATA,sha256=KC2TQJd94mdxebpMTs8Jq-XrJaHQvBJxTYiJ4wQRAIQ,19872
|
|
13
|
+
kobai_sdk-0.3.3rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
kobai_sdk-0.3.3rc1.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
|
|
15
|
+
kobai_sdk-0.3.3rc1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|