kobai-sdk 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kobai-sdk might be problematic. Click here for more details.

kobai/genie.py ADDED
@@ -0,0 +1,194 @@
1
+
2
+ def get_genie_descriptions(solution_id, structure, schema):
3
+ for di, dom in enumerate(structure["domains"]):
4
+ structure["domains"][di]["label"] = dom["name"]
5
+
6
+ add_episteme_metadata(structure)
7
+ download_inherited_props(structure)
8
+ add_map_count(structure)
9
+ add_semantic_sentences(structure)
10
+ add_schema_sentences(structure, schema, solution_id)
11
+
12
+ #add map count
13
+
14
+ return structure
15
+
16
+ #def get_genie_questions(solution_id, structure):
17
+
18
+ # graph_uri = structure["uri"]
19
+
20
+ # question_config = get_tenant_question_config(solution_id, structure)
21
+
22
+ # questions = []
23
+ # for q in question_config:
24
+ # q_name = q["description"]
25
+ # q_def = json.loads(q["definition"])
26
+
27
+ # if q["published"]:
28
+ # result = get_question_sql(q_def, graph_uri)
29
+ # if result is not None:
30
+ # questions.append({"name": q_name, "sql": result})
31
+
32
+ # return questions
33
+
34
+ ############################
35
+ # Config Building
36
+ ############################
37
+
38
+ def add_episteme_metadata(structure):
39
+ for dom in structure["domains"]:
40
+ dom["e_id"] = dom["label"]
41
+ for con in dom["concepts"]:
42
+ con["e_id"] = dom["label"] + "_" + con["label"]
43
+
44
+ for prop in con["properties"]:
45
+ prop["e_id"] = dom["label"] + "_" + con["label"] + "_" + prop["label"]
46
+
47
+ for rel in con["relations"]:
48
+ rel["e_id"] = dom["label"] + "_" + con["label"] + "_" + rel["label"]
49
+ rel["e_target_id"] = rel["relationTypeUri"].split("/")[-1].replace("#", "_")
50
+
51
+ def download_inherited_props(structure):
52
+ for dom in structure["domains"]:
53
+ if "concepts" in dom:
54
+ for con in dom["concepts"]:
55
+ recurse_parent_props(con["uri"], structure, con["properties"], con["relations"], visited=[])
56
+
57
+ def recurse_parent_props(uri, structure, props, rels, visited=None):
58
+ visited.append(uri)
59
+ for dom in structure["domains"]:
60
+ for con in dom["concepts"]:
61
+ if con["uri"] == uri:
62
+ for icon in con["inheritedConcepts"]:
63
+ for pdom in structure["domains"]:
64
+ for pcon in pdom["concepts"]:
65
+ if pcon["uri"] == icon:
66
+ for pprop in pcon["properties"]:
67
+ prop_found = False
68
+ for pf in props:
69
+ if pf["uri"] == pprop["uri"]:
70
+ prop_found = True
71
+ if not prop_found:
72
+ props.append(pprop)
73
+ for prel in pcon["relations"]:
74
+ rel_found = False
75
+ for rf in rels:
76
+ if rf["uri"] == prel["uri"]:
77
+ rel_found = True
78
+ if not rel_found:
79
+ rels.append(prel)
80
+ if icon not in visited:
81
+ recurse_parent_props(icon, structure, props, rels, visited)
82
+
83
+ def add_map_count(structure):
84
+
85
+ for dom in structure["domains"]:
86
+ for con in dom["concepts"]:
87
+ map_count = 0
88
+ #for md in mapping_defs:
89
+ for md in structure["mappingDefs"]:
90
+ if con["uri"] == md["conceptTypeUri"]:
91
+ map_count = map_count + 1
92
+ con["map_count"] = map_count
93
+
94
+ ############################
95
+ # Question Config
96
+ ############################
97
+
98
+ def get_tenant_question_config(solution_id, structure):
99
+
100
+ question_defs = []
101
+ for row in structure["queries"]:
102
+ print(row)
103
+ is_published = False
104
+ if row[4] is not None and row[4] != "":
105
+ is_published = True
106
+ question_def = {"question_id": row[1], "description": row[2], "definition": row[3], "published": is_published}
107
+ question_defs.append(question_def)
108
+ return question_defs
109
+
110
+ ############################
111
+ # Sentences
112
+ ############################
113
+
114
+ def add_semantic_sentences(structure):
115
+ for dom in structure["domains"]:
116
+ concept_list = []
117
+
118
+ for con in dom["concepts"]:
119
+ concept_sentence = ""
120
+ property_list = []
121
+ relation_list = []
122
+ concept_list.append(con["label"])
123
+
124
+ for prop in con["properties"]:
125
+ property_list.append(prop["label"])
126
+ property_sentence = "The " + prop["label"] + " for the " + dom["label"] + " " + con["label"] + "."
127
+ prop["sementic_sentence"] = property_sentence
128
+
129
+ for rel in con["relations"]:
130
+ relation_list.append(rel["label"])
131
+
132
+ concept_sentence = "The " + con["label"] + " concept contains details about " + smart_comma_formatting(property_list) + "."
133
+ con["semantic_sentence"] = concept_sentence
134
+
135
+ domain_sentence = "The " + dom["label"] + " domain contains concepts called " + smart_comma_formatting(concept_list) + "."
136
+ dom["semantic_sentence"] = domain_sentence
137
+
138
+ def add_schema_sentences(structure, schema, tenant_id):
139
+ for dom in structure["domains"]:
140
+
141
+ for con in dom["concepts"]:
142
+ neighbours_list = []
143
+ concept_sentence = "The " + schema + ".data_" + tenant_id + "_" + dom["label"] + "_" + con["label"] + "_w table contains information about " + dom["label"] + " " + con["label"] + "s. "
144
+ concept_sentence += "This refers to a class " + con["label"] + " in a domain of similar classes called " + dom["label"] + ". "
145
+ concept_sentence += "It includes details such as "
146
+ property_list = []
147
+
148
+ if len(con["properties"]) > 0:
149
+ for prop in con["properties"]:
150
+ property_list.append(prop["label"])
151
+ property_sentence = "The " + prop["label"] + " for the " + dom["label"] + " " + con["label"] + "."
152
+ prop["schema_sentence"] = property_sentence
153
+ concept_sentence += smart_comma_formatting(property_list) + ". "
154
+
155
+ if len(con["relations"]) > 0:
156
+ for rel in con["relations"]:
157
+ rel_dom = rel["relationTypeUri"].split("/")[-1].split("#")[0]
158
+ rel_con = rel["relationTypeUri"].split("/")[-1].split("#")[1]
159
+ rel_table = schema + ".data_" + tenant_id + "_" + rel_dom + "_" + rel_con + "_w"
160
+ relation_sentence = "A relationship called " + rel["label"] + " connecting " + dom["label"] + " " + con["label"] + "s to " + rel_dom + " " + rel_con + "s. "
161
+ relation_sentence += "A key connecting this table to the unique identifier of the " + rel_table + " table. "
162
+ rel["schema_sentence"] = relation_sentence
163
+ neighbours_list.append(rel_dom + " " + rel_con)
164
+ #structure["domains"][idom]["concepts"][icon]["relations"][irel].pop("target", None)
165
+ if len(neighbours_list) > 0:
166
+ concept_sentence += "For context, it is connected to neighbor classes like " + smart_comma_formatting(neighbours_list)
167
+
168
+ con["schema_sentence"] = concept_sentence
169
+ con["schema_id_sentence"] = "The unique identifier for each " + dom["label"] + " " + con["label"] + ". "
170
+ con["schema_table"] = schema + ".data_" + tenant_id + "_" + dom["label"] + "_" + con["label"] + "_np"
171
+
172
+ def smart_comma_formatting(items):
173
+ if items is None:
174
+ return ""
175
+ match len(items):
176
+ case 0:
177
+ return ""
178
+ case 1:
179
+ return items[0]
180
+ case 2:
181
+ return items[0] + " and " + items[1]
182
+ case _:
183
+ return ", ".join(items[0: -1]) + " and " + items[-1]
184
+
185
+ def label_from_url(inpt):
186
+ try:
187
+ return inpt.split("#")[1].replace("_", " ")
188
+ except IndexError:
189
+ return inpt
190
+
191
+ ############################
192
+ # Mapping
193
+ ############################
194
+
kobai/tenant_client.py CHANGED
@@ -11,6 +11,7 @@ from langchain_core.language_models.chat_models import BaseChatModel
11
11
  from langchain_core.embeddings import Embeddings
12
12
 
13
13
  from . import spark_client, databricks_client, ai_query, tenant_api, ai_rag
14
+ from .genie import get_genie_descriptions
14
15
 
15
16
  class TenantClient:
16
17
 
@@ -228,7 +229,8 @@ class TenantClient:
228
229
  #print(t["sql"])
229
230
  try:
230
231
  self.spark_client._SparkClient__run_sql(t["sql"])
231
- except:
232
+ except Exception as e:
233
+ print("Error creating view.", e)
232
234
  print(t["sql"])
233
235
  print("Updated " + str(len(tables)) + " views for Genie.")
234
236
 
@@ -265,7 +267,6 @@ class TenantClient:
265
267
  """
266
268
  Use the Databricks Client to create a Genie Data Room for this tenant.
267
269
  """
268
-
269
270
  data_rooms = self.databricks_client._DatabricksClient__api_get("/api/2.0/data-rooms")
270
271
  room_id = "-1"
271
272
  if data_rooms:
@@ -280,9 +281,10 @@ class TenantClient:
280
281
 
281
282
  for t in self.__get_view_sql(domains=domains, concepts=concepts, not_concepts=not_concepts, enforce_map=enforce_map):
282
283
  payload["table_identifiers"].append(t["table"])
284
+ print(t["table"])
283
285
  response = self.databricks_client._DatabricksClient__api_patch("/api/2.0/data-rooms/" + room_id, payload)
284
286
 
285
- payload = {"title":"Notes","content":"When filtering for a named entity, use a like comparison instead of equality. All tables are denormalized, so columns may have repeated rows for the same primary identifier. You should handle this by putting each table in a subquery and using the DISTINCT keyword.","instruction_type":"TEXT_INSTRUCTION"}
287
+ payload = {"title":"Notes","content":"When filtering for a named entity, use a like comparison instead of equality. All tables are denormalized, so columns may have repeated rows for the same primary identifier. You should handle this by putting each table in a subquery and using the DISTINCT keyword. The first column in each view is a unique identifier that should only be used for joins, and never shown to a user. Find another column to identify the subject of the table.","instruction_type":"TEXT_INSTRUCTION"}
286
288
  instructions = self.databricks_client._DatabricksClient__api_get("/api/2.0/data-rooms/" + room_id + "/instructions")
287
289
  inst_id = "-1"
288
290
 
@@ -324,11 +326,10 @@ class TenantClient:
324
326
  ########################################
325
327
 
326
328
  def __get_descriptions(self):
327
-
328
- params={"schema": self.schema, "tenant_id": self.model_id}
329
- response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/descriptions", params=params)
330
- return response.json()
331
329
 
330
+ tenant_config = self.get_tenant_config()
331
+ descriptions = get_genie_descriptions(self.model_id, tenant_config, self.schema)
332
+ return descriptions
332
333
 
333
334
  def __get_view_sql(self, domains=None, concepts=None, not_concepts=None, enforce_map=True):
334
335
  sql_list = []
@@ -404,16 +405,20 @@ class TenantClient:
404
405
 
405
406
  def __get_questions(self):
406
407
 
407
- response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/questions")
408
+ #response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/questions")
409
+
410
+ #tenant_config = self.get_tenant_config()
411
+ #questions = get_genie_questions(self.id, tenant_config)
408
412
 
409
- return_questions = []
410
- for q in response.json():
411
- sql = q["sql"]
412
- sql = sql[2:-2]
413
- sql = sql.replace(".data_", ".genie_").replace("_Literals", "").replace("_w", "")
414
- return_questions.append({"name": q["name"], "sql": sql})
413
+ #return_questions = []
414
+ #for q in questions:
415
+ # sql = q["sql"]
416
+ # sql = sql[2:-2]
417
+ # sql = sql.replace(".data_", ".genie_").replace("_Literals", "").replace("_w", "")
418
+ # return_questions.append({"name": q["name"], "sql": sql})
415
419
 
416
- return return_questions
420
+ #return return_questions
421
+ return []
417
422
 
418
423
  ########################################
419
424
  # RAG Functions
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kobai-sdk
3
- Version: 0.3.2
3
+ Version: 0.3.3
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -292,6 +292,22 @@ k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
292
292
  #### Authentication via on-behalf-of flow
293
293
  The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
294
294
 
295
+ 3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
296
+
297
+ ```python
298
+ k.spark_init_session(spark)
299
+ k.spark_generate_genie_views()
300
+ ```
301
+
302
+ 4. Initialize a Databricks API client using your Notebook context, and create a Genie Data Rooms environment for this Kobai tenant.
303
+
304
+ ```python
305
+ notebook_context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
306
+ sql_warehouse = '8834d98a8agffa76'
307
+
308
+ k.databricks_init_notebook(notebook_context, sql_warehouse)
309
+ k.databricks_build_genie()
310
+ ```
295
311
 
296
312
  ## AI Functionality
297
313
  The Kobai SDK enables users to ask follow-up questions based on the results of previous queries. This functionality currently supports models hosted on Databricks and Azure OpenAI.
@@ -3,12 +3,13 @@ kobai/ai_query.py,sha256=FnXn2pabJpXfTUcJvieVkAgMAjSTH9u5SFR9SJUJ-Lk,9556
3
3
  kobai/ai_rag.py,sha256=XUq_SnJw17P53Zk75hHJgTryGjHEAyYPwC0r2WtuNp4,14627
4
4
  kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
5
5
  kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
6
+ kobai/genie.py,sha256=-EbEYpu9xj_3zIXaPdwbNJEAmoeM7nb9qK-h1f_STtM,8061
6
7
  kobai/ms_authenticate.py,sha256=rlmhtvAaSRBlYmvIBy5epMVa4MBGBLPaMwawu1T_xDQ,2252
7
8
  kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
8
9
  kobai/tenant_api.py,sha256=Q5yuFd9_V4lo3LWzvYEEO3LpDRWFgQD4TlRPXDTGbiE,4368
9
- kobai/tenant_client.py,sha256=w83NmLuOEyJjOVUuLva2vbq0zpGFzhi9LdSq1pKClA8,38613
10
- kobai_sdk-0.3.2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
11
- kobai_sdk-0.3.2.dist-info/METADATA,sha256=7WIGEJBGHn2QIsYPLdbsnkwvtjqx3RJHZJR3kl0gu_M,19304
12
- kobai_sdk-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- kobai_sdk-0.3.2.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
14
- kobai_sdk-0.3.2.dist-info/RECORD,,
10
+ kobai/tenant_client.py,sha256=QWITygInZBZCkW3M7xNQCkNfQoGpTvKW_BykG2-E7Is,39012
11
+ kobai_sdk-0.3.3.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
12
+ kobai_sdk-0.3.3.dist-info/METADATA,sha256=bKWV-R83S7Mz4N1Sqn7cqyJN6WlYGADGD3Zd0Why-Oc,19869
13
+ kobai_sdk-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ kobai_sdk-0.3.3.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
15
+ kobai_sdk-0.3.3.dist-info/RECORD,,