kobai-sdk 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kobai-sdk might be problematic. Click here for more details.
- kobai/genie.py +194 -0
- kobai/tenant_client.py +20 -15
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3.dist-info}/METADATA +17 -1
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3.dist-info}/RECORD +7 -6
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3.dist-info}/WHEEL +0 -0
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {kobai_sdk-0.3.2.dist-info → kobai_sdk-0.3.3.dist-info}/top_level.txt +0 -0
kobai/genie.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
|
|
2
|
+
def get_genie_descriptions(solution_id, structure, schema):
|
|
3
|
+
for di, dom in enumerate(structure["domains"]):
|
|
4
|
+
structure["domains"][di]["label"] = dom["name"]
|
|
5
|
+
|
|
6
|
+
add_episteme_metadata(structure)
|
|
7
|
+
download_inherited_props(structure)
|
|
8
|
+
add_map_count(structure)
|
|
9
|
+
add_semantic_sentences(structure)
|
|
10
|
+
add_schema_sentences(structure, schema, solution_id)
|
|
11
|
+
|
|
12
|
+
#add map count
|
|
13
|
+
|
|
14
|
+
return structure
|
|
15
|
+
|
|
16
|
+
#def get_genie_questions(solution_id, structure):
|
|
17
|
+
|
|
18
|
+
# graph_uri = structure["uri"]
|
|
19
|
+
|
|
20
|
+
# question_config = get_tenant_question_config(solution_id, structure)
|
|
21
|
+
|
|
22
|
+
# questions = []
|
|
23
|
+
# for q in question_config:
|
|
24
|
+
# q_name = q["description"]
|
|
25
|
+
# q_def = json.loads(q["definition"])
|
|
26
|
+
|
|
27
|
+
# if q["published"]:
|
|
28
|
+
# result = get_question_sql(q_def, graph_uri)
|
|
29
|
+
# if result is not None:
|
|
30
|
+
# questions.append({"name": q_name, "sql": result})
|
|
31
|
+
|
|
32
|
+
# return questions
|
|
33
|
+
|
|
34
|
+
############################
|
|
35
|
+
# Config Building
|
|
36
|
+
############################
|
|
37
|
+
|
|
38
|
+
def add_episteme_metadata(structure):
|
|
39
|
+
for dom in structure["domains"]:
|
|
40
|
+
dom["e_id"] = dom["label"]
|
|
41
|
+
for con in dom["concepts"]:
|
|
42
|
+
con["e_id"] = dom["label"] + "_" + con["label"]
|
|
43
|
+
|
|
44
|
+
for prop in con["properties"]:
|
|
45
|
+
prop["e_id"] = dom["label"] + "_" + con["label"] + "_" + prop["label"]
|
|
46
|
+
|
|
47
|
+
for rel in con["relations"]:
|
|
48
|
+
rel["e_id"] = dom["label"] + "_" + con["label"] + "_" + rel["label"]
|
|
49
|
+
rel["e_target_id"] = rel["relationTypeUri"].split("/")[-1].replace("#", "_")
|
|
50
|
+
|
|
51
|
+
def download_inherited_props(structure):
|
|
52
|
+
for dom in structure["domains"]:
|
|
53
|
+
if "concepts" in dom:
|
|
54
|
+
for con in dom["concepts"]:
|
|
55
|
+
recurse_parent_props(con["uri"], structure, con["properties"], con["relations"], visited=[])
|
|
56
|
+
|
|
57
|
+
def recurse_parent_props(uri, structure, props, rels, visited=None):
|
|
58
|
+
visited.append(uri)
|
|
59
|
+
for dom in structure["domains"]:
|
|
60
|
+
for con in dom["concepts"]:
|
|
61
|
+
if con["uri"] == uri:
|
|
62
|
+
for icon in con["inheritedConcepts"]:
|
|
63
|
+
for pdom in structure["domains"]:
|
|
64
|
+
for pcon in pdom["concepts"]:
|
|
65
|
+
if pcon["uri"] == icon:
|
|
66
|
+
for pprop in pcon["properties"]:
|
|
67
|
+
prop_found = False
|
|
68
|
+
for pf in props:
|
|
69
|
+
if pf["uri"] == pprop["uri"]:
|
|
70
|
+
prop_found = True
|
|
71
|
+
if not prop_found:
|
|
72
|
+
props.append(pprop)
|
|
73
|
+
for prel in pcon["relations"]:
|
|
74
|
+
rel_found = False
|
|
75
|
+
for rf in rels:
|
|
76
|
+
if rf["uri"] == prel["uri"]:
|
|
77
|
+
rel_found = True
|
|
78
|
+
if not rel_found:
|
|
79
|
+
rels.append(prel)
|
|
80
|
+
if icon not in visited:
|
|
81
|
+
recurse_parent_props(icon, structure, props, rels, visited)
|
|
82
|
+
|
|
83
|
+
def add_map_count(structure):
|
|
84
|
+
|
|
85
|
+
for dom in structure["domains"]:
|
|
86
|
+
for con in dom["concepts"]:
|
|
87
|
+
map_count = 0
|
|
88
|
+
#for md in mapping_defs:
|
|
89
|
+
for md in structure["mappingDefs"]:
|
|
90
|
+
if con["uri"] == md["conceptTypeUri"]:
|
|
91
|
+
map_count = map_count + 1
|
|
92
|
+
con["map_count"] = map_count
|
|
93
|
+
|
|
94
|
+
############################
|
|
95
|
+
# Question Config
|
|
96
|
+
############################
|
|
97
|
+
|
|
98
|
+
def get_tenant_question_config(solution_id, structure):
|
|
99
|
+
|
|
100
|
+
question_defs = []
|
|
101
|
+
for row in structure["queries"]:
|
|
102
|
+
print(row)
|
|
103
|
+
is_published = False
|
|
104
|
+
if row[4] is not None and row[4] != "":
|
|
105
|
+
is_published = True
|
|
106
|
+
question_def = {"question_id": row[1], "description": row[2], "definition": row[3], "published": is_published}
|
|
107
|
+
question_defs.append(question_def)
|
|
108
|
+
return question_defs
|
|
109
|
+
|
|
110
|
+
############################
|
|
111
|
+
# Sentences
|
|
112
|
+
############################
|
|
113
|
+
|
|
114
|
+
def add_semantic_sentences(structure):
|
|
115
|
+
for dom in structure["domains"]:
|
|
116
|
+
concept_list = []
|
|
117
|
+
|
|
118
|
+
for con in dom["concepts"]:
|
|
119
|
+
concept_sentence = ""
|
|
120
|
+
property_list = []
|
|
121
|
+
relation_list = []
|
|
122
|
+
concept_list.append(con["label"])
|
|
123
|
+
|
|
124
|
+
for prop in con["properties"]:
|
|
125
|
+
property_list.append(prop["label"])
|
|
126
|
+
property_sentence = "The " + prop["label"] + " for the " + dom["label"] + " " + con["label"] + "."
|
|
127
|
+
prop["sementic_sentence"] = property_sentence
|
|
128
|
+
|
|
129
|
+
for rel in con["relations"]:
|
|
130
|
+
relation_list.append(rel["label"])
|
|
131
|
+
|
|
132
|
+
concept_sentence = "The " + con["label"] + " concept contains details about " + smart_comma_formatting(property_list) + "."
|
|
133
|
+
con["semantic_sentence"] = concept_sentence
|
|
134
|
+
|
|
135
|
+
domain_sentence = "The " + dom["label"] + " domain contains concepts called " + smart_comma_formatting(concept_list) + "."
|
|
136
|
+
dom["semantic_sentence"] = domain_sentence
|
|
137
|
+
|
|
138
|
+
def add_schema_sentences(structure, schema, tenant_id):
|
|
139
|
+
for dom in structure["domains"]:
|
|
140
|
+
|
|
141
|
+
for con in dom["concepts"]:
|
|
142
|
+
neighbours_list = []
|
|
143
|
+
concept_sentence = "The " + schema + ".data_" + tenant_id + "_" + dom["label"] + "_" + con["label"] + "_w table contains information about " + dom["label"] + " " + con["label"] + "s. "
|
|
144
|
+
concept_sentence += "This refers to a class " + con["label"] + " in a domain of similar classes called " + dom["label"] + ". "
|
|
145
|
+
concept_sentence += "It includes details such as "
|
|
146
|
+
property_list = []
|
|
147
|
+
|
|
148
|
+
if len(con["properties"]) > 0:
|
|
149
|
+
for prop in con["properties"]:
|
|
150
|
+
property_list.append(prop["label"])
|
|
151
|
+
property_sentence = "The " + prop["label"] + " for the " + dom["label"] + " " + con["label"] + "."
|
|
152
|
+
prop["schema_sentence"] = property_sentence
|
|
153
|
+
concept_sentence += smart_comma_formatting(property_list) + ". "
|
|
154
|
+
|
|
155
|
+
if len(con["relations"]) > 0:
|
|
156
|
+
for rel in con["relations"]:
|
|
157
|
+
rel_dom = rel["relationTypeUri"].split("/")[-1].split("#")[0]
|
|
158
|
+
rel_con = rel["relationTypeUri"].split("/")[-1].split("#")[1]
|
|
159
|
+
rel_table = schema + ".data_" + tenant_id + "_" + rel_dom + "_" + rel_con + "_w"
|
|
160
|
+
relation_sentence = "A relationship called " + rel["label"] + " connecting " + dom["label"] + " " + con["label"] + "s to " + rel_dom + " " + rel_con + "s. "
|
|
161
|
+
relation_sentence += "A key connecting this table to the unique identifier of the " + rel_table + " table. "
|
|
162
|
+
rel["schema_sentence"] = relation_sentence
|
|
163
|
+
neighbours_list.append(rel_dom + " " + rel_con)
|
|
164
|
+
#structure["domains"][idom]["concepts"][icon]["relations"][irel].pop("target", None)
|
|
165
|
+
if len(neighbours_list) > 0:
|
|
166
|
+
concept_sentence += "For context, it is connected to neighbor classes like " + smart_comma_formatting(neighbours_list)
|
|
167
|
+
|
|
168
|
+
con["schema_sentence"] = concept_sentence
|
|
169
|
+
con["schema_id_sentence"] = "The unique identifier for each " + dom["label"] + " " + con["label"] + ". "
|
|
170
|
+
con["schema_table"] = schema + ".data_" + tenant_id + "_" + dom["label"] + "_" + con["label"] + "_np"
|
|
171
|
+
|
|
172
|
+
def smart_comma_formatting(items):
|
|
173
|
+
if items is None:
|
|
174
|
+
return ""
|
|
175
|
+
match len(items):
|
|
176
|
+
case 0:
|
|
177
|
+
return ""
|
|
178
|
+
case 1:
|
|
179
|
+
return items[0]
|
|
180
|
+
case 2:
|
|
181
|
+
return items[0] + " and " + items[1]
|
|
182
|
+
case _:
|
|
183
|
+
return ", ".join(items[0: -1]) + " and " + items[-1]
|
|
184
|
+
|
|
185
|
+
def label_from_url(inpt):
|
|
186
|
+
try:
|
|
187
|
+
return inpt.split("#")[1].replace("_", " ")
|
|
188
|
+
except IndexError:
|
|
189
|
+
return inpt
|
|
190
|
+
|
|
191
|
+
############################
|
|
192
|
+
# Mapping
|
|
193
|
+
############################
|
|
194
|
+
|
kobai/tenant_client.py
CHANGED
|
@@ -11,6 +11,7 @@ from langchain_core.language_models.chat_models import BaseChatModel
|
|
|
11
11
|
from langchain_core.embeddings import Embeddings
|
|
12
12
|
|
|
13
13
|
from . import spark_client, databricks_client, ai_query, tenant_api, ai_rag
|
|
14
|
+
from .genie import get_genie_descriptions
|
|
14
15
|
|
|
15
16
|
class TenantClient:
|
|
16
17
|
|
|
@@ -228,7 +229,8 @@ class TenantClient:
|
|
|
228
229
|
#print(t["sql"])
|
|
229
230
|
try:
|
|
230
231
|
self.spark_client._SparkClient__run_sql(t["sql"])
|
|
231
|
-
except:
|
|
232
|
+
except Exception as e:
|
|
233
|
+
print("Error creating view.", e)
|
|
232
234
|
print(t["sql"])
|
|
233
235
|
print("Updated " + str(len(tables)) + " views for Genie.")
|
|
234
236
|
|
|
@@ -265,7 +267,6 @@ class TenantClient:
|
|
|
265
267
|
"""
|
|
266
268
|
Use the Databricks Client to create a Genie Data Room for this tenant.
|
|
267
269
|
"""
|
|
268
|
-
|
|
269
270
|
data_rooms = self.databricks_client._DatabricksClient__api_get("/api/2.0/data-rooms")
|
|
270
271
|
room_id = "-1"
|
|
271
272
|
if data_rooms:
|
|
@@ -280,9 +281,10 @@ class TenantClient:
|
|
|
280
281
|
|
|
281
282
|
for t in self.__get_view_sql(domains=domains, concepts=concepts, not_concepts=not_concepts, enforce_map=enforce_map):
|
|
282
283
|
payload["table_identifiers"].append(t["table"])
|
|
284
|
+
print(t["table"])
|
|
283
285
|
response = self.databricks_client._DatabricksClient__api_patch("/api/2.0/data-rooms/" + room_id, payload)
|
|
284
286
|
|
|
285
|
-
payload = {"title":"Notes","content":"When filtering for a named entity, use a like comparison instead of equality. All tables are denormalized, so columns may have repeated rows for the same primary identifier. You should handle this by putting each table in a subquery and using the DISTINCT keyword.","instruction_type":"TEXT_INSTRUCTION"}
|
|
287
|
+
payload = {"title":"Notes","content":"When filtering for a named entity, use a like comparison instead of equality. All tables are denormalized, so columns may have repeated rows for the same primary identifier. You should handle this by putting each table in a subquery and using the DISTINCT keyword. The first column in each view is a unique identifier that should only be used for joins, and never shown to a user. Find another column to identify the subject of the table.","instruction_type":"TEXT_INSTRUCTION"}
|
|
286
288
|
instructions = self.databricks_client._DatabricksClient__api_get("/api/2.0/data-rooms/" + room_id + "/instructions")
|
|
287
289
|
inst_id = "-1"
|
|
288
290
|
|
|
@@ -324,11 +326,10 @@ class TenantClient:
|
|
|
324
326
|
########################################
|
|
325
327
|
|
|
326
328
|
def __get_descriptions(self):
|
|
327
|
-
|
|
328
|
-
params={"schema": self.schema, "tenant_id": self.model_id}
|
|
329
|
-
response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/descriptions", params=params)
|
|
330
|
-
return response.json()
|
|
331
329
|
|
|
330
|
+
tenant_config = self.get_tenant_config()
|
|
331
|
+
descriptions = get_genie_descriptions(self.model_id, tenant_config, self.schema)
|
|
332
|
+
return descriptions
|
|
332
333
|
|
|
333
334
|
def __get_view_sql(self, domains=None, concepts=None, not_concepts=None, enforce_map=True):
|
|
334
335
|
sql_list = []
|
|
@@ -404,16 +405,20 @@ class TenantClient:
|
|
|
404
405
|
|
|
405
406
|
def __get_questions(self):
|
|
406
407
|
|
|
407
|
-
response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/questions")
|
|
408
|
+
#response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/questions")
|
|
409
|
+
|
|
410
|
+
#tenant_config = self.get_tenant_config()
|
|
411
|
+
#questions = get_genie_questions(self.id, tenant_config)
|
|
408
412
|
|
|
409
|
-
return_questions = []
|
|
410
|
-
for q in
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
413
|
+
#return_questions = []
|
|
414
|
+
#for q in questions:
|
|
415
|
+
# sql = q["sql"]
|
|
416
|
+
# sql = sql[2:-2]
|
|
417
|
+
# sql = sql.replace(".data_", ".genie_").replace("_Literals", "").replace("_w", "")
|
|
418
|
+
# return_questions.append({"name": q["name"], "sql": sql})
|
|
415
419
|
|
|
416
|
-
return return_questions
|
|
420
|
+
#return return_questions
|
|
421
|
+
return []
|
|
417
422
|
|
|
418
423
|
########################################
|
|
419
424
|
# RAG Functions
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kobai-sdk
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: A package that enables interaction with a Kobai tenant.
|
|
5
5
|
Author-email: Ryan Oattes <ryan@kobai.io>
|
|
6
6
|
License: Apache License
|
|
@@ -292,6 +292,22 @@ k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
|
|
|
292
292
|
#### Authentication via on-behalf-of flow
|
|
293
293
|
The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
|
|
294
294
|
|
|
295
|
+
3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
|
|
296
|
+
|
|
297
|
+
```python
|
|
298
|
+
k.spark_init_session(spark)
|
|
299
|
+
k.spark_generate_genie_views()
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
4. Initialize a Databricks API client using your Notebook context, and create a Genie Data Rooms environment for this Kobai tenant.
|
|
303
|
+
|
|
304
|
+
```python
|
|
305
|
+
notebook_context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
|
|
306
|
+
sql_warehouse = '8834d98a8agffa76'
|
|
307
|
+
|
|
308
|
+
k.databricks_init_notebook(notebook_context, sql_warehouse)
|
|
309
|
+
k.databricks_build_genie()
|
|
310
|
+
```
|
|
295
311
|
|
|
296
312
|
## AI Functionality
|
|
297
313
|
The Kobai SDK enables users to ask follow-up questions based on the results of previous queries. This functionality currently supports models hosted on Databricks and Azure OpenAI.
|
|
@@ -3,12 +3,13 @@ kobai/ai_query.py,sha256=FnXn2pabJpXfTUcJvieVkAgMAjSTH9u5SFR9SJUJ-Lk,9556
|
|
|
3
3
|
kobai/ai_rag.py,sha256=XUq_SnJw17P53Zk75hHJgTryGjHEAyYPwC0r2WtuNp4,14627
|
|
4
4
|
kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
|
|
5
5
|
kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
|
|
6
|
+
kobai/genie.py,sha256=-EbEYpu9xj_3zIXaPdwbNJEAmoeM7nb9qK-h1f_STtM,8061
|
|
6
7
|
kobai/ms_authenticate.py,sha256=rlmhtvAaSRBlYmvIBy5epMVa4MBGBLPaMwawu1T_xDQ,2252
|
|
7
8
|
kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
|
|
8
9
|
kobai/tenant_api.py,sha256=Q5yuFd9_V4lo3LWzvYEEO3LpDRWFgQD4TlRPXDTGbiE,4368
|
|
9
|
-
kobai/tenant_client.py,sha256=
|
|
10
|
-
kobai_sdk-0.3.
|
|
11
|
-
kobai_sdk-0.3.
|
|
12
|
-
kobai_sdk-0.3.
|
|
13
|
-
kobai_sdk-0.3.
|
|
14
|
-
kobai_sdk-0.3.
|
|
10
|
+
kobai/tenant_client.py,sha256=QWITygInZBZCkW3M7xNQCkNfQoGpTvKW_BykG2-E7Is,39012
|
|
11
|
+
kobai_sdk-0.3.3.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
|
|
12
|
+
kobai_sdk-0.3.3.dist-info/METADATA,sha256=bKWV-R83S7Mz4N1Sqn7cqyJN6WlYGADGD3Zd0Why-Oc,19869
|
|
13
|
+
kobai_sdk-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
kobai_sdk-0.3.3.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
|
|
15
|
+
kobai_sdk-0.3.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|