kobai-sdk 0.3.2__tar.gz → 0.3.3rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kobai-sdk might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kobai-sdk
3
- Version: 0.3.2
3
+ Version: 0.3.3rc1
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -292,6 +292,22 @@ k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
292
292
  #### Authentication via on-behalf-of flow
293
293
  The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
294
294
 
295
+ 3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
296
+
297
+ ```python
298
+ k.spark_init_session(spark)
299
+ k.spark_generate_genie_views()
300
+ ```
301
+
302
+ 4. Initialize a Databricks API client using your Notebook context, and create a Genie Data Rooms environment for this Kobai tenant.
303
+
304
+ ```python
305
+ notebook_context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
306
+ sql_warehouse = '8834d98a8agffa76'
307
+
308
+ k.databricks_init_notebook(notebook_context, sql_warehouse)
309
+ k.databricks_build_genie()
310
+ ```
295
311
 
296
312
  ## AI Functionality
297
313
  The Kobai SDK enables users to ask follow-up questions based on the results of previous queries. This functionality currently supports models hosted on Databricks and Azure OpenAI.
@@ -59,6 +59,22 @@ k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
59
59
  #### Authentication via on-behalf-of flow
60
60
  The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
61
61
 
62
+ 3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
63
+
64
+ ```python
65
+ k.spark_init_session(spark)
66
+ k.spark_generate_genie_views()
67
+ ```
68
+
69
+ 4. Initialize a Databricks API client using your Notebook context, and create a Genie Data Rooms environment for this Kobai tenant.
70
+
71
+ ```python
72
+ notebook_context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
73
+ sql_warehouse = '8834d98a8agffa76'
74
+
75
+ k.databricks_init_notebook(notebook_context, sql_warehouse)
76
+ k.databricks_build_genie()
77
+ ```
62
78
 
63
79
  ## AI Functionality
64
80
  The Kobai SDK enables users to ask follow-up questions based on the results of previous queries. This functionality currently supports models hosted on Databricks and Azure OpenAI.
@@ -0,0 +1,226 @@
1
+ import json
2
+
3
+ def get_genie_descriptions(solution_id, structure, schema):
4
+ for di, dom in enumerate(structure["domains"]):
5
+ structure["domains"][di]["label"] = dom["name"]
6
+
7
+ add_episteme_metadata(structure)
8
+ download_inherited_props(structure)
9
+ add_map_count(structure)
10
+ add_semantic_sentences(structure)
11
+ add_schema_sentences(structure, schema, solution_id)
12
+
13
+ #add map count
14
+
15
+ return structure
16
+
17
+ #def get_genie_questions(solution_id, structure):
18
+
19
+ # graph_uri = structure["uri"]
20
+
21
+ # question_config = get_tenant_question_config(solution_id, structure)
22
+
23
+ # questions = []
24
+ # for q in question_config:
25
+ # q_name = q["description"]
26
+ # q_def = json.loads(q["definition"])
27
+
28
+ # if q["published"]:
29
+ # result = get_question_sql(q_def, graph_uri)
30
+ # if result is not None:
31
+ # questions.append({"name": q_name, "sql": result})
32
+
33
+ # return questions
34
+
35
+ ############################
36
+ # Config Building
37
+ ############################
38
+
39
+ def add_episteme_metadata(structure):
40
+ for dom in structure["domains"]:
41
+ dom["e_id"] = dom["label"]
42
+ for con in dom["concepts"]:
43
+ con["e_id"] = dom["label"] + "_" + con["label"]
44
+
45
+ for prop in con["properties"]:
46
+ prop["e_id"] = dom["label"] + "_" + con["label"] + "_" + prop["label"]
47
+
48
+ for rel in con["relations"]:
49
+ rel["e_id"] = dom["label"] + "_" + con["label"] + "_" + rel["label"]
50
+ rel["e_target_id"] = rel["relationTypeUri"].split("/")[-1].replace("#", "_")
51
+
52
+ def download_inherited_props(structure):
53
+ for dom in structure["domains"]:
54
+ if "concepts" in dom:
55
+ for con in dom["concepts"]:
56
+ recurse_parent_props(con["uri"], structure, con["properties"], con["relations"], visited=[])
57
+
58
+ def recurse_parent_props(uri, structure, props, rels, visited=None):
59
+ visited.append(uri)
60
+ for dom in structure["domains"]:
61
+ for con in dom["concepts"]:
62
+ if con["uri"] == uri:
63
+ for icon in con["inheritedConcepts"]:
64
+ for pdom in structure["domains"]:
65
+ for pcon in pdom["concepts"]:
66
+ if pcon["uri"] == icon:
67
+ for pprop in pcon["properties"]:
68
+ prop_found = False
69
+ for pf in props:
70
+ if pf["uri"] == pprop["uri"]:
71
+ prop_found = True
72
+ if not prop_found:
73
+ props.append(pprop)
74
+ for prel in pcon["relations"]:
75
+ rel_found = False
76
+ for rf in rels:
77
+ if rf["uri"] == prel["uri"]:
78
+ rel_found = True
79
+ if not rel_found:
80
+ rels.append(prel)
81
+ if icon not in visited:
82
+ recurse_parent_props(icon, structure, props, rels, visited)
83
+
84
+ def add_map_count(structure):
85
+ #mapping_defs = get_tenant_mapping_config(structure["solution_id"])
86
+
87
+ for dom in structure["domains"]:
88
+ for con in dom["concepts"]:
89
+ map_count = 0
90
+ #for md in mapping_defs:
91
+ for md in structure["mappingDefs"]:
92
+ if con["uri"] == md["conceptTypeUri"]:
93
+ map_count = map_count + 1
94
+ con["map_count"] = map_count
95
+
96
+ #def get_tenant_mapping_config(solution_id):
97
+ # mapping_sql = f"""
98
+ # select s.id, mapd.concept_type_uri
99
+ # from studio.solutions s
100
+ # inner join studio.mapping_defs mapd
101
+ # on s.id = mapd.solution_id
102
+ # where s.id = {solution_id}
103
+ # """
104
+
105
+ # mapping_rows = app_db.run_query(mapping_sql)
106
+
107
+ # mapping_defs = []
108
+ # for row in mapping_rows:
109
+ # mapping_def = {"solution_id": row[0], "concept_type_uri": row[1]}
110
+ # mapping_defs.append(mapping_def)
111
+ # return mapping_defs
112
+
113
+ ############################
114
+ # Question Config
115
+ ############################
116
+
117
+ def get_tenant_question_config(solution_id, structure):
118
+ #question_sql = f"""
119
+ # select s.id solution_id, q.id, q.description, q.definition, a.id api_id
120
+ # from studio.models m
121
+ # inner join studio.queries q
122
+ # on m.id = q.model_id
123
+ # inner join studio.solutions s
124
+ # on m.id = s.model_id
125
+ # left join studio.api a
126
+ # on q.id = a.query_id
127
+ # where s.id = {solution_id}
128
+ #"""
129
+
130
+ #question_rows = app_db.run_query(question_sql)
131
+
132
+ question_defs = []
133
+ for row in structure["queries"]:
134
+ print(row)
135
+ is_published = False
136
+ if row[4] is not None and row[4] != "":
137
+ is_published = True
138
+ question_def = {"question_id": row[1], "description": row[2], "definition": row[3], "published": is_published}
139
+ question_defs.append(question_def)
140
+ return question_defs
141
+
142
+ ############################
143
+ # Sentences
144
+ ############################
145
+
146
+ def add_semantic_sentences(structure):
147
+ for dom in structure["domains"]:
148
+ concept_list = []
149
+
150
+ for con in dom["concepts"]:
151
+ concept_sentence = ""
152
+ property_list = []
153
+ relation_list = []
154
+ concept_list.append(con["label"])
155
+
156
+ for prop in con["properties"]:
157
+ property_list.append(prop["label"])
158
+ property_sentence = "The " + prop["label"] + " for the " + dom["label"] + " " + con["label"] + "."
159
+ prop["sementic_sentence"] = property_sentence
160
+
161
+ for rel in con["relations"]:
162
+ relation_list.append(rel["label"])
163
+
164
+ concept_sentence = "The " + con["label"] + " concept contains details about " + smart_comma_formatting(property_list) + "."
165
+ con["semantic_sentence"] = concept_sentence
166
+
167
+ domain_sentence = "The " + dom["label"] + " domain contains concepts called " + smart_comma_formatting(concept_list) + "."
168
+ dom["semantic_sentence"] = domain_sentence
169
+
170
+ def add_schema_sentences(structure, schema, tenant_id):
171
+ for dom in structure["domains"]:
172
+
173
+ for con in dom["concepts"]:
174
+ neighbours_list = []
175
+ concept_sentence = "The " + schema + ".data_" + tenant_id + "_" + dom["label"] + "_" + con["label"] + "_w table contains information about " + dom["label"] + " " + con["label"] + "s. "
176
+ concept_sentence += "This refers to a class " + con["label"] + " in a domain of similar classes called " + dom["label"] + ". "
177
+ concept_sentence += "It includes details such as "
178
+ property_list = []
179
+
180
+ if len(con["properties"]) > 0:
181
+ for prop in con["properties"]:
182
+ property_list.append(prop["label"])
183
+ property_sentence = "The " + prop["label"] + " for the " + dom["label"] + " " + con["label"] + "."
184
+ prop["schema_sentence"] = property_sentence
185
+ concept_sentence += smart_comma_formatting(property_list) + ". "
186
+
187
+ if len(con["relations"]) > 0:
188
+ for rel in con["relations"]:
189
+ rel_dom = rel["relationTypeUri"].split("/")[-1].split("#")[0]
190
+ rel_con = rel["relationTypeUri"].split("/")[-1].split("#")[1]
191
+ rel_table = schema + ".data_" + tenant_id + "_" + rel_dom + "_" + rel_con + "_w"
192
+ relation_sentence = "A relationship called " + rel["label"] + " connecting " + dom["label"] + " " + con["label"] + "s to " + rel_dom + " " + rel_con + "s. "
193
+ relation_sentence += "A key connecting this table to the unique identifier of the " + rel_table + " table. "
194
+ rel["schema_sentence"] = relation_sentence
195
+ neighbours_list.append(rel_dom + " " + rel_con)
196
+ #structure["domains"][idom]["concepts"][icon]["relations"][irel].pop("target", None)
197
+ if len(neighbours_list) > 0:
198
+ concept_sentence += "For context, it is connected to neighbor classes like " + smart_comma_formatting(neighbours_list)
199
+
200
+ con["schema_sentence"] = concept_sentence
201
+ con["schema_id_sentence"] = "The unique identifier for each " + dom["label"] + " " + con["label"] + ". "
202
+ con["schema_table"] = schema + ".data_" + tenant_id + "_" + dom["label"] + "_" + con["label"] + "_np"
203
+
204
+ def smart_comma_formatting(items):
205
+ if items is None:
206
+ return ""
207
+ match len(items):
208
+ case 0:
209
+ return ""
210
+ case 1:
211
+ return items[0]
212
+ case 2:
213
+ return items[0] + " and " + items[1]
214
+ case _:
215
+ return ", ".join(items[0: -1]) + " and " + items[-1]
216
+
217
+ def label_from_url(inpt):
218
+ try:
219
+ return inpt.split("#")[1].replace("_", " ")
220
+ except IndexError:
221
+ return inpt
222
+
223
+ ############################
224
+ # Mapping
225
+ ############################
226
+
@@ -11,6 +11,7 @@ from langchain_core.language_models.chat_models import BaseChatModel
11
11
  from langchain_core.embeddings import Embeddings
12
12
 
13
13
  from . import spark_client, databricks_client, ai_query, tenant_api, ai_rag
14
+ from .genie import get_genie_descriptions
14
15
 
15
16
  class TenantClient:
16
17
 
@@ -228,7 +229,8 @@ class TenantClient:
228
229
  #print(t["sql"])
229
230
  try:
230
231
  self.spark_client._SparkClient__run_sql(t["sql"])
231
- except:
232
+ except Exception as e:
233
+ print("Error creating view.", e)
232
234
  print(t["sql"])
233
235
  print("Updated " + str(len(tables)) + " views for Genie.")
234
236
 
@@ -265,7 +267,6 @@ class TenantClient:
265
267
  """
266
268
  Use the Databricks Client to create a Genie Data Room for this tenant.
267
269
  """
268
-
269
270
  data_rooms = self.databricks_client._DatabricksClient__api_get("/api/2.0/data-rooms")
270
271
  room_id = "-1"
271
272
  if data_rooms:
@@ -325,10 +326,13 @@ class TenantClient:
325
326
 
326
327
  def __get_descriptions(self):
327
328
 
328
- params={"schema": self.schema, "tenant_id": self.model_id}
329
- response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/descriptions", params=params)
330
- return response.json()
329
+ #params={"schema": self.schema, "tenant_id": self.model_id}
330
+ #response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/descriptions", params=params)
331
+ #return response.json()
331
332
 
333
+ tenant_config = self.get_tenant_config()
334
+ descriptions = get_genie_descriptions(self.model_id, tenant_config, self.schema)
335
+ return descriptions
332
336
 
333
337
  def __get_view_sql(self, domains=None, concepts=None, not_concepts=None, enforce_map=True):
334
338
  sql_list = []
@@ -404,16 +408,20 @@ class TenantClient:
404
408
 
405
409
  def __get_questions(self):
406
410
 
407
- response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/questions")
411
+ #response = self.api_client._TenantAPI__run_get("/episteme-svcs/api/questions")
412
+
413
+ #tenant_config = self.get_tenant_config()
414
+ #questions = get_genie_questions(self.id, tenant_config)
408
415
 
409
- return_questions = []
410
- for q in response.json():
411
- sql = q["sql"]
412
- sql = sql[2:-2]
413
- sql = sql.replace(".data_", ".genie_").replace("_Literals", "").replace("_w", "")
414
- return_questions.append({"name": q["name"], "sql": sql})
416
+ #return_questions = []
417
+ #for q in questions:
418
+ # sql = q["sql"]
419
+ # sql = sql[2:-2]
420
+ # sql = sql.replace(".data_", ".genie_").replace("_Literals", "").replace("_w", "")
421
+ # return_questions.append({"name": q["name"], "sql": sql})
415
422
 
416
- return return_questions
423
+ #return return_questions
424
+ return []
417
425
 
418
426
  ########################################
419
427
  # RAG Functions
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kobai-sdk
3
- Version: 0.3.2
3
+ Version: 0.3.3rc1
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -292,6 +292,22 @@ k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
292
292
  #### Authentication via on-behalf-of flow
293
293
  The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
294
294
 
295
+ 3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
296
+
297
+ ```python
298
+ k.spark_init_session(spark)
299
+ k.spark_generate_genie_views()
300
+ ```
301
+
302
+ 4. Initialize a Databricks API client using your Notebook context, and create a Genie Data Rooms environment for this Kobai tenant.
303
+
304
+ ```python
305
+ notebook_context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
306
+ sql_warehouse = '8834d98a8agffa76'
307
+
308
+ k.databricks_init_notebook(notebook_context, sql_warehouse)
309
+ k.databricks_build_genie()
310
+ ```
295
311
 
296
312
  ## AI Functionality
297
313
  The Kobai SDK enables users to ask follow-up questions based on the results of previous queries. This functionality currently supports models hosted on Databricks and Azure OpenAI.
@@ -7,6 +7,7 @@ kobai/ai_query.py
7
7
  kobai/ai_rag.py
8
8
  kobai/databricks_client.py
9
9
  kobai/demo_tenant_client.py
10
+ kobai/genie.py
10
11
  kobai/ms_authenticate.py
11
12
  kobai/spark_client.py
12
13
  kobai/tenant_api.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "kobai-sdk"
7
- version = "0.3.2"
7
+ version = "0.3.3rc1"
8
8
  description = "A package that enables interaction with a Kobai tenant."
9
9
  readme = "README.md"
10
10
  authors = [{ name = "Ryan Oattes", email = "ryan@kobai.io" }]
File without changes
File without changes
File without changes
File without changes