kobai-sdk 0.3.0rc2__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kobai-sdk might be problematic. Click here for more details.

kobai/ai_query.py CHANGED
@@ -1,8 +1,6 @@
1
1
  from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
2
2
  from langchain_core.output_parsers import StrOutputParser
3
3
 
4
- from sentence_transformers import SentenceTransformer, util
5
-
6
4
  from langchain_core.language_models.chat_models import BaseChatModel
7
5
  from langchain_core.embeddings import Embeddings
8
6
  from langchain_core.documents import Document
@@ -10,8 +8,9 @@ from langchain_core.retrievers import BaseRetriever
10
8
  from langchain_core.callbacks import CallbackManagerForRetrieverRun
11
9
  from langchain_core.runnables import RunnablePassthrough, RunnableLambda
12
10
  from langchain_core.vectorstores import InMemoryVectorStore
11
+ import numpy as np
13
12
 
14
- from typing import Union, List
13
+ from typing import List
15
14
 
16
15
 
17
16
  MESSAGE_SYSTEM_TEMPLATE = """
@@ -73,7 +72,7 @@ def format_docs(docs):
73
72
  def input_only(inpt):
74
73
  return inpt["question"]
75
74
 
76
- def followup_question(user_question, question_results, question_name, question_def, embedding_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, use_inmem_vectors=False, k=50):
75
+ def followup_question(user_question, question_results, question_name, question_def, embedding_model: Embeddings, chat_model: BaseChatModel, use_inmem_vectors=False, k=50):
77
76
 
78
77
  row_texts = process_question_results(question_def, question_results)
79
78
  question_documents = [Document(page_content=r, metadata={"source": "kobai"}) for r in row_texts]
@@ -118,22 +117,13 @@ def init_question_search_index(tenant_questions, emb_model):
118
117
 
119
118
  q_ids = [q["id"] for q in tenant_questions]
120
119
  q_descs = [q["description"] for q in tenant_questions]
121
-
122
- if isinstance(emb_model, SentenceTransformer):
123
- q_vectors = emb_model.encode(q_descs)
124
- else:
125
- q_vectors = emb_model.embed_documents(q_descs)
126
-
120
+ q_vectors = emb_model.embed_documents(q_descs)
127
121
  return {"ids": q_ids, "descs": q_descs, "vectors": q_vectors}
128
122
 
129
123
 
130
124
  def question_search(search_text: str, search_index, emb_model, k: int):
131
- if isinstance(emb_model, SentenceTransformer):
132
- search_vec = emb_model.encode(search_text)
133
- else:
134
- search_vec = emb_model.embed_query(search_text)
125
+ search_vec = emb_model.embed_query(search_text)
135
126
  #search_vec = emb_model.encode(search_text)
136
-
137
127
  matches = __top_vector_matches(search_vec, search_index["vectors"], top=k)
138
128
 
139
129
  for mi, m in enumerate(matches):
@@ -142,13 +132,25 @@ def question_search(search_text: str, search_index, emb_model, k: int):
142
132
  return matches
143
133
 
144
134
  def __top_vector_matches(test_vec, options_list_vec, top=1):
145
- scores_t = util.cos_sim(test_vec, options_list_vec)[0]
146
- scores_l = scores_t.tolist()
147
- scores_d = [{"index": i, "value": v} for i, v in enumerate(scores_l)]
148
- sorted_d = sorted(scores_d, key=lambda i: i["value"], reverse=True)
149
- top_d = sorted_d[0:top]
135
+ # Normalize the test vector
136
+ test_vec_norm = test_vec / np.linalg.norm(test_vec)
137
+ # Normalize the option vectors
138
+ options_norm = options_list_vec / np.linalg.norm(options_list_vec, axis=1, keepdims=True)
139
+
140
+ # Compute cosine similarity (dot product of normalized vectors)
141
+ cosine_similarities = np.dot(options_norm, test_vec_norm)
142
+
143
+ # Get indexes and similarity scores as dict
144
+ scores_d = [{"index": i, "value": float(v)} for i, v in enumerate(cosine_similarities)]
145
+
146
+ # Sort dict by similarity score descending
147
+ sorted_d = sorted(scores_d, key=lambda x: x["value"], reverse=True)
148
+
149
+ # Return top results
150
+ top_d = sorted_d[:top]
150
151
  return top_d
151
152
 
153
+
152
154
  def process_question_results(question_def, question_results):
153
155
 
154
156
  """
@@ -211,8 +213,9 @@ def process_question_results(question_def, question_results):
211
213
 
212
214
 
213
215
  concept_order = [max_src]
214
- for t in concept_rels[max_src]["edges"]:
215
- concept_order.append(t["dst"])
216
+ if max_src != "":
217
+ for t in concept_rels[max_src]["edges"]:
218
+ concept_order.append(t["dst"])
216
219
 
217
220
  for c in concept_props:
218
221
  if c not in concept_order:
kobai/ai_rag.py CHANGED
@@ -3,9 +3,7 @@ from pyspark.sql import SparkSession
3
3
 
4
4
  from pyspark.sql.types import StructType, StructField, StringType, ArrayType, FloatType, IntegerType
5
5
  from pyspark.sql import functions as F
6
- from sentence_transformers import SentenceTransformer
7
6
  from delta import DeltaTable
8
- from typing import Union
9
7
  from langchain_core.language_models.chat_models import BaseChatModel
10
8
  from langchain_core.embeddings import Embeddings
11
9
  from langchain_community.document_loaders import PySparkDataFrameLoader
@@ -145,13 +143,13 @@ def __generate_sentences_from_questions(tc: AIContext, debug):
145
143
  ss.sql(full_sql)
146
144
 
147
145
 
148
- def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
146
+ def encode_to_delta_local(tc: AIContext, st_model: Embeddings, replica_schema=None, batch_size=100000):
149
147
  """
150
148
  Encode Semantic Data to Vectors in Delta Table
151
149
 
152
150
  Parameters:
153
151
  tc (TenantClient): The Kobai tenant_client instance instantiated via the SDK.
154
- st_model (SentenceTransformer): A sentence_transformers model to use for encoding.
152
+ st_model (Embeddings): A langchain embedding model to use for encoding.
155
153
  replica_schema (str) OPTIONAL: An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
156
154
  """
157
155
 
@@ -174,12 +172,8 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
174
172
  content_list = [r["content"] for r in sentences_df.collect()]
175
173
  id_list = [r["id"] for r in sentences_df.collect()]
176
174
 
177
- if isinstance(st_model, SentenceTransformer):
178
- vector_list = st_model.encode(
179
- content_list, normalize_embeddings=True, show_progress_bar=True).tolist()
180
- else:
181
- vector_list = st_model.embed_documents(content_list)
182
- for i, v in enumerate(vector_list):
175
+ vector_list = st_model.embed_documents(content_list)
176
+ for i, v in enumerate(vector_list):
183
177
  vector_list[i] = [float(x) for x in v]
184
178
  #vector_list = st_model.encode(
185
179
  # content_list, normalize_embeddings=True, show_progress_bar=True)
@@ -214,13 +208,13 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
214
208
  # """)
215
209
 
216
210
 
217
- def rag_delta(tc: AIContext, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
211
+ def rag_delta(tc: AIContext, emb_model: Embeddings, chat_model: BaseChatModel, question, k=5, replica_schema=None):
218
212
  """
219
213
  Run a RAG query using vectors in Delta table.
220
214
 
221
215
  Parameters:
222
216
  tc (TenantClient): The Kobai tenant_client instance instantiated via the SDK.
223
- emb_model (UNION[SentenceTransformer, Embeddings]): A sentence_transformers or langchain embedding model to use for encoding the query.
217
+ emb_model (Embeddings): A langchain embedding model to use for encoding the query.
224
218
  chat_model (BaseChatModel): A langchain chat model to use in the RAG pipeline.
225
219
  question (str): The user's query.
226
220
  k (int) OPTIONAL: The number of RAG documents to retrieve.
@@ -233,10 +227,7 @@ def rag_delta(tc: AIContext, emb_model: Union[SentenceTransformer, Embeddings],
233
227
 
234
228
  ss = tc.spark_session
235
229
 
236
- if isinstance(emb_model, SentenceTransformer):
237
- vector_list = emb_model.encode(
238
- question, normalize_embeddings=True).tolist()
239
- elif isinstance(emb_model, Embeddings):
230
+ if isinstance(emb_model, Embeddings):
240
231
  vector_list = emb_model.embed_query(question)
241
232
  else:
242
233
  print("Invalid Embedding Model Type")
kobai/tenant_client.py CHANGED
@@ -7,13 +7,10 @@ from pyspark.sql import SparkSession
7
7
 
8
8
  from langchain_community.chat_models import ChatDatabricks
9
9
  from databricks_langchain import DatabricksEmbeddings
10
- from sentence_transformers import SentenceTransformer
11
10
  from langchain_core.language_models.chat_models import BaseChatModel
12
11
  from langchain_core.embeddings import Embeddings
13
- from typing import Union
14
12
 
15
- from . import spark_client, databricks_client, ai_query, tenant_api, ai_rag, mobi
16
- from .mobi_config import MobiSettings
13
+ from . import spark_client, databricks_client, ai_query, tenant_api, ai_rag
17
14
 
18
15
  class TenantClient:
19
16
 
@@ -59,128 +56,12 @@ class TenantClient:
59
56
  self.__api_init_session()
60
57
 
61
58
 
62
- ########################################
63
- # Mobi
64
- ########################################
65
-
66
- def pull_mobi_to_tenant(self, ontology_name, mobi_config: MobiSettings):
67
-
68
- """
69
- Export an ontology from Mobi and import it into a Kobai tenant, replacing the contents of the tenant.
70
-
71
- Requires that the SDK be authenticated against the target Kobai tenant.
72
-
73
- Parameters:
74
- ontology_name (str): The name of the ontology to access in Mobi.
75
- mobi_config (MobiSettings): Configuration required to access the Mobi service.
76
- """
77
-
78
- _, tenant_json_enc = mobi.get_tenant(ontology_name, mobi_config)
79
- self.__set_tenant_import(tenant_json_enc)
80
-
81
- def pull_mobi_to_file(self, ontology_name, mobi_config: MobiSettings, file_name, human_readable=False):
82
-
83
- """
84
- Export an ontology from Mobi and save it in a Kobai json import file.
85
-
86
- Requires that the SDK be authenticated against the target Kobai tenant.
87
-
88
- Parameters:
89
- ontology_name (str): The name of the ontology to access in Mobi.
90
- mobi_config (MobiSettings): Configuration required to access the Mobi service.
91
- file_name (str): File name to give the output (no extension)
92
- human_readable (bool) OPTIONAL: generate a second, decoded Kobai file.
93
- """
94
-
95
- tenant_json, tenant_json_enc = mobi.get_tenant(ontology_name, mobi_config)
96
-
97
- if ".json" in file_name:
98
- file_name = file_name.split(".json")[0]
99
-
100
- with open(f"{file_name}.json", "w") as out_file:
101
- json.dump(tenant_json_enc, out_file)
102
-
103
- if human_readable:
104
- with open(f"{file_name}_decoded.json", "w") as out_file:
105
- json.dump(tenant_json, out_file)
106
-
107
- def push_tenant_update_to_mobi(self, ontology_name, mobi_config: MobiSettings):
108
-
109
- """
110
- Compare a (modified) Kobai tenant to a Mobi ontology, and generate a Merge Request for the changes.
111
-
112
- Requires that the SDK be authenticated against the target Kobai tenant.
113
-
114
- Parameters:
115
- ontology_name (str): The name of the ontology to access in Mobi.
116
- mobi_config (MobiSettings): Configuration required to access the Mobi service.
117
- """
118
-
119
- tenant_json_enc = self.__get_tenant_export()
120
- mobi.update_tenant(tenant_json_enc, ontology_name, mobi_config)
121
-
122
- def push_whole_tenant_to_mobi(self, ontology_name, mobi_config: MobiSettings):
123
-
124
- """
125
- Export a tenant from Kobai, and create an ontology in Mobi.
126
-
127
- Requires that the SDK be authenticated against the target Kobai tenant.
128
- Requires that an ontology with the same name does not already exist in Mobi.
129
-
130
- Parameters:
131
- ontology_name (str): The name of the ontology to create in Mobi.
132
- mobi_config (MobiSettings): Configuration required to access the Mobi service.
133
- """
134
-
135
- tenant_json = self.get_tenant_config()
136
- mobi.replace_tenant_to_mobi(tenant_json, ontology_name, mobi_config)
137
-
138
- def push_whole_tenant_to_jsonld_file(self, ontology_name, file_name):
139
-
140
- """
141
- Export a tenant from Kobai, and create an ontology in Mobi.
142
-
143
- Requires that the SDK be authenticated against the target Kobai tenant.
144
-
145
- Parameters:
146
- ontology_name (str): The name of the ontology to create in Mobi.
147
- file_name (str): File name to give the output (no extension)
148
- """
149
-
150
- tenant_json = self.get_tenant_config()
151
- tenant_jsonld = mobi.replace_tenant_to_file(tenant_json, ontology_name)
152
-
153
- if ".json" in file_name:
154
- file_name = file_name.split(".json")[0]
155
-
156
- with open(f"{file_name}.json", "w") as out_file:
157
- json.dump(tenant_jsonld, out_file)
158
-
159
- def get_default_mobi_config(self):
160
-
161
- """
162
- Returns a default MobiSettings configuration object.
163
-
164
- Available Fields to Set:
165
- domain_extraction: Mapping of ontology url structures to Kobai domain names.
166
- mobi_api_url: url for Mobi service. (ex: https://localhost:8443/mobirest)
167
- mobi_username: User name for Mobi service.
168
- mobi_password: Password for Mobi service.
169
- """
170
-
171
- return MobiSettings()
172
-
173
- def __set_tenant_import(self, tenant_json_enc):
174
- self.api_client._TenantAPI__run_post_files(
175
- '/data-svcs/solution/snapshot/import/upload',
176
- {'file': json.dumps(tenant_json_enc)}
177
- )
178
59
 
179
60
  ########################################
180
61
  # MS Entra Auth
181
62
  ########################################
182
63
 
183
- def use_browser_token(self, access_token, run_ai_init: bool = True):
64
+ def use_browser_token(self, access_token):
184
65
 
185
66
  """
186
67
  Authenticate the TenantClient with the Kobai instance. Returns nothing, but stores bearer token in client.
@@ -189,9 +70,9 @@ class TenantClient:
189
70
  Parameters:
190
71
  access_token (str): Bearer token for Kobai app session.
191
72
  """
192
- self._init_post_auth_success(access_token, run_ai_init)
73
+ self._init_post_auth_success(access_token)
193
74
 
194
- def use_access_token(self, access_token: str, id_token: str = None, tenant_id: str = None, run_ai_init: bool = True):
75
+ def use_access_token(self, access_token: str, id_token: str = None, tenant_id: str = None):
195
76
 
196
77
  """
197
78
  Authenticate the TenantClient with the Kobai instance. Returns nothing, but stores bearer token in client.
@@ -215,7 +96,7 @@ class TenantClient:
215
96
  )
216
97
 
217
98
  kb_access_token = response.headers.get('Authorization')
218
- self.use_browser_token(kb_access_token, run_ai_init)
99
+ self.use_browser_token(kb_access_token)
219
100
 
220
101
  def get_tenants(self, id_token: str = None):
221
102
 
@@ -242,12 +123,10 @@ class TenantClient:
242
123
  def __api_init_session(self):
243
124
  self.api_client = tenant_api.TenantAPI(self.token, self.uri, verify=self.ssl_verify, proxies=self.proxies )
244
125
 
245
- def _init_post_auth_success(self, access_token, run_ai_init: bool = True):
126
+ def _init_post_auth_success(self, access_token):
246
127
  self.token = access_token
247
128
  self.__api_init_session()
248
129
  self.__set_tenant_solutionid()
249
- if run_ai_init:
250
- self.init_ai_components()
251
130
  print("Authentication Successful.")
252
131
 
253
132
  ########################################
@@ -560,7 +439,7 @@ class TenantClient:
560
439
  """
561
440
  ai_rag.generate_sentences(self.get_ai_context(), replica_schema=replica_schema, concept_white_list=concept_white_list, use_questions=use_questions, debug=debug)
562
441
 
563
- def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
442
+ def rag_encode_to_delta_local(self, st_model: Embeddings, replica_schema=None, batch_size=100000):
564
443
  """
565
444
  Encode Semantic Data to Vectors in Delta Table
566
445
 
@@ -570,7 +449,7 @@ class TenantClient:
570
449
  """
571
450
  ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema, batch_size=batch_size)
572
451
 
573
- def rag_delta(self, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
452
+ def rag_delta(self, emb_model: Embeddings, chat_model: BaseChatModel, question, k=5, replica_schema=None):
574
453
  """
575
454
  Run a RAG query using vectors in Delta table.
576
455
 
@@ -598,9 +477,7 @@ class TenantClient:
598
477
  """
599
478
 
600
479
  if question_id is None:
601
-
602
480
  suggestions = self.question_search(user_question, k=1)
603
-
604
481
  question_id = suggestions[0]["id"]
605
482
 
606
483
  question_results = self.run_question_remote(question_id, dynamic_filters=dynamic_filters)
@@ -610,26 +487,16 @@ class TenantClient:
610
487
 
611
488
  return ai_query.followup_question(user_question, question_results, question_name, question_def, self.embedding_model, self.chat_model, use_inmem_vectors=use_inmem_vectors, k=k)
612
489
 
613
- def init_ai_components(self, embedding_model: Union[SentenceTransformer, Embeddings] = None, chat_model: BaseChatModel = None):
490
+ def init_ai_components(self, embedding_model: Embeddings, chat_model: BaseChatModel):
614
491
  """
615
492
  Set Chat and Embedding models for AI functions to use. If no arguments provided, Databricks hosted services are used.
616
493
 
617
494
  Parameters:
618
- embedding_model (Union[SentenceTransformer, Embeddings]) OPTIONAL: A sentence_transformer or Langchain Embedding model.
619
- chat_model (BaseChatModel) OPTIONAL: A Langchain BaseChatModel chat model.
495
+ embedding_model (Embeddings): A Langchain Embedding model.
496
+ chat_model (BaseChatModel): A Langchain BaseChatModel chat model.
620
497
  """
621
-
622
- if embedding_model is not None:
623
- self.embedding_model = embedding_model
624
- else:
625
- #self.embedding_model = SentenceTransformer("baai/bge-large-en-v1.5")
626
- self.embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
627
-
628
- if chat_model is not None:
629
- self.chat_model = chat_model
630
- else:
631
- self.chat_model = ChatDatabricks(endpoint="databricks-dbrx-instruct")
632
-
498
+ self.embedding_model = embedding_model
499
+ self.chat_model = chat_model
633
500
  self.question_search_index = ai_query.init_question_search_index(self.list_questions(), self.embedding_model)
634
501
 
635
502
  def question_search(self, search_text, k: int = 1):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kobai-sdk
3
- Version: 0.3.0rc2
3
+ Version: 0.3.2
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -223,7 +223,6 @@ Requires-Dist: langchain-core
223
223
  Requires-Dist: langchain-community
224
224
  Requires-Dist: langchain_openai
225
225
  Requires-Dist: databricks_langchain
226
- Requires-Dist: sentence-transformers
227
226
  Provides-Extra: dev
228
227
  Requires-Dist: black; extra == "dev"
229
228
  Requires-Dist: bumpver; extra == "dev"
@@ -249,38 +248,51 @@ from kobai import tenant_client, spark_client, databricks_client
249
248
 
250
249
  schema = 'main.demo'
251
250
  uri = 'https://demo.kobai.io'
252
- tenant_id = '1'
253
251
  tenant_name = 'My Demo Tenant'
254
-
255
- k = tenant_client.TenantClient(tenant_name, tenant_id, uri, schema)
252
+ k = tenant_client.TenantClient(tenant_name, uri, schema)
256
253
  ```
257
254
 
258
255
  2. Authenticate with the Kobai instance:
256
+ Authentication can be performed using different methods, such as device code flow, on-behalf-of flow, or browser-based tokens.
257
+
258
+ #### Authentication via device code
259
+ Step 1: Obtain the access token from IDM (Identity and Access Management)
259
260
 
260
261
  ```python
261
- client_id = 'your_Entra_app_id_here'
262
+ from kobai import ms_authenticate
263
+
262
264
  tenant_id = 'your_Entra_directory_id_here'
265
+ client_id = 'your_Entra_app_id_here'
263
266
 
264
- k.authenticate(client_id, tenant_id)
267
+ access_token = ms_authenticate.device_code(tenant_id, client_id)
265
268
  ```
266
269
 
267
- 3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
270
+ Step 2: Use the token to retrieve the list of Kobai tenants (unless the tenant ID is already known).
268
271
 
269
272
  ```python
270
- k.spark_init_session(spark)
271
- k.spark_generate_genie_views()
273
+ tenants = k.get_tenants(id_token=access_token)
274
+ print(tenants)
272
275
  ```
273
276
 
274
- 4. Initialize a Databricks API client using your Notebook context, and create a Genie Data Rooms environment for this Kobai tenant.
277
+ Step 3: Authenticate with Kobai for the specific tenant using the IDM access token.
275
278
 
276
279
  ```python
277
- notebook_context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
278
- sql_warehouse = '8834d98a8agffa76'
280
+ kobai_tenant_id = "5c1ba715-3961-4835-8a10-6f6f963b53ff"
281
+ k.use_access_token(access_token = access_token, tenant_id=kobai_tenant_id)
282
+ ```
283
+
284
+ At this point, authentication to the Kobai tenant is successfully completed.
285
+
286
+ #### Authentication via browser token
279
287
 
280
- k.databricks_init_notebook(notebook_context, sql_warehouse)
281
- k.databricks_build_genie()
288
+ ```python
289
+ k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
282
290
  ```
283
291
 
292
+ #### Authentication via on-behalf-of flow
293
+ The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
294
+
295
+
284
296
  ## AI Functionality
285
297
  The Kobai SDK enables users to ask follow-up questions based on the results of previous queries. This functionality currently supports models hosted on Databricks and Azure OpenAI.
286
298
 
@@ -305,68 +317,41 @@ kobai_query_name = "Set ownership"
305
317
  question_json = k.run_question_remote(k.get_question_id(kobai_query_name)) # By questionName
306
318
  ```
307
319
 
308
- 3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using either Azure OpenAI, Databricks or a user-provided chat model.
309
-
310
- #### Using Azure OpenAI
311
-
312
- ###### Authentication Methods:
313
-
314
- 1. ApiKey
315
-
316
- ```python
317
- from kobai import ai_query, llm_config
318
- import json
319
-
320
- followup_question = "Which owner owns the most sets?"
321
-
322
- llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", api_key="YOUR_API_KEY", deployment="gpt-4o-mini", llm_provider="azure_openai")
323
-
324
- output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
325
- print(output)
326
- ```
327
-
328
- 2. Azure Active Directory Authentication
320
+ 3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using the user-provided chat and embedding model.
329
321
 
330
- Ensure that the logged-in tenant has access to Azure OpenAI.
331
- In case of databricks notebook, the logged in service principal should have access to Azure OpenAI.
322
+ #### Using Databricks Embeddings and Chat Models in a Databricks Notebook
323
+ Initialize the AI components by specifying the embedding and chat models, then proceed with follow-up questions for interactive engagement.
332
324
 
333
325
  ```python
334
- from kobai import ai_query, llm_config
326
+ from databricks_langchain import DatabricksEmbeddings
327
+ from langchain_community.chat_models import ChatDatabricks
335
328
  import json
336
329
 
337
- followup_question = "Which owner owns the most sets?"
338
-
339
- llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", deployment="gpt-4o-mini", llm_provider="azure_openai")
340
- llm_config.get_azure_ad_token()
341
-
342
- output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
343
- print(output)
344
- ```
345
-
346
- #### Using Databricks (Default Configuration)
347
-
348
- ```python
349
- from kobai import ai_query, llm_config
350
- import json
330
+ # choose the embedding and chat model of your choice from the databricks serving and initialize.
331
+ embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
332
+ chat_model = ChatDatabricks(endpoint="databricks-gpt-oss-20b")
333
+ k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
351
334
 
352
335
  followup_question = "Which owner owns the most sets?"
353
-
354
- llm_config = llm_config.LLMConfig()
355
-
356
- output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
336
+ output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
357
337
  print(output)
358
338
  ```
359
339
 
360
- #### User Provided Chat Model
340
+ #### Using Azure OpenAI Embeddings and Chat Models
361
341
 
362
342
  ```python
363
- from kobai import ai_query, llm_config
364
- import json
365
343
  from langchain_openai import AzureChatOpenAI
344
+ from langchain_openai import AzureOpenAIEmbeddings
345
+ import json
366
346
 
367
347
  followup_question = "Which owner owns the most sets?"
368
348
 
369
- llm_config = llm_config.LLMConfig(debug=True)
349
+ embedding_model = AzureOpenAIEmbeddings(
350
+ model="text-embedding-3-small",
351
+ azure_endpoint="https://kobaipoc.openai.azure.com/",
352
+ api_key="YOUR_API_KEY",
353
+ openai_api_version="2023-05-15"
354
+ )
370
355
 
371
356
  chat_model = AzureChatOpenAI(
372
357
  azure_endpoint="https://kobaipoc.openai.azure.com/", azure_deployment="gpt-4o-mini",
@@ -375,7 +360,10 @@ openai_api_version="2024-02-15-preview",
375
360
  temperature=0.5,
376
361
  max_tokens=150,)
377
362
 
378
- output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, override_model=chat_model, llm_config=llm_config)
363
+ k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
364
+
365
+ followup_question = "Which theme has the most sets?"
366
+ output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
379
367
  print(output)
380
368
  ```
381
369
 
@@ -0,0 +1,14 @@
1
+ kobai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ kobai/ai_query.py,sha256=FnXn2pabJpXfTUcJvieVkAgMAjSTH9u5SFR9SJUJ-Lk,9556
3
+ kobai/ai_rag.py,sha256=XUq_SnJw17P53Zk75hHJgTryGjHEAyYPwC0r2WtuNp4,14627
4
+ kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
5
+ kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
6
+ kobai/ms_authenticate.py,sha256=rlmhtvAaSRBlYmvIBy5epMVa4MBGBLPaMwawu1T_xDQ,2252
7
+ kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
8
+ kobai/tenant_api.py,sha256=Q5yuFd9_V4lo3LWzvYEEO3LpDRWFgQD4TlRPXDTGbiE,4368
9
+ kobai/tenant_client.py,sha256=w83NmLuOEyJjOVUuLva2vbq0zpGFzhi9LdSq1pKClA8,38613
10
+ kobai_sdk-0.3.2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
11
+ kobai_sdk-0.3.2.dist-info/METADATA,sha256=7WIGEJBGHn2QIsYPLdbsnkwvtjqx3RJHZJR3kl0gu_M,19304
12
+ kobai_sdk-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ kobai_sdk-0.3.2.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
14
+ kobai_sdk-0.3.2.dist-info/RECORD,,
kobai/mobi.py DELETED
@@ -1,682 +0,0 @@
1
- import requests
2
- import urllib.parse
3
- import json
4
- import base64
5
- from random import randrange
6
- from requests_toolbelt.multipart.encoder import MultipartEncoder
7
-
8
- from .mobi_config import MobiSettings
9
-
10
- ##############################
11
- # Mobi Pull
12
- ##############################
13
-
14
- def get_tenant(top_level_ontology_name, mobi_config: MobiSettings):
15
- #Find Ontology Record
16
-
17
- ont_record_id = _get_ont_record_by_name(top_level_ontology_name, mobi_config)
18
- print("Mobi Ontology Record ID:", ont_record_id)
19
- #Get Deprecated Nodes
20
-
21
- api_url = mobi_config.mobi_api_url + "/ontologies/" + urllib.parse.quote_plus(ont_record_id) + "/property-ranges"
22
- response = requests.get(api_url, verify=False, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
23
-
24
- prop_ranges = response.json()["propertyToRanges"]
25
-
26
-
27
- api_url = mobi_config.mobi_api_url + "/ontologies/" + urllib.parse.quote_plus(ont_record_id) + "/ontology-stuff"
28
- response = requests.get(api_url, verify=False, timeout=5000, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
29
-
30
- ########################
31
- # Deprecated Classes
32
- ########################
33
-
34
- deprecated = []
35
-
36
- try:
37
- resp_data = response.json()["iriList"]["deprecatedIris"]
38
- except requests.exceptions.JSONDecodeError:
39
- resp_data = []
40
-
41
- for iri in resp_data:
42
- deprecated.append(iri)
43
-
44
- try:
45
- resp_data = response.json()["importedIRIs"]
46
- except requests.exceptions.JSONDecodeError:
47
- resp_data = []
48
-
49
- for o in resp_data:
50
- for iri in o["deprecatedIris"]:
51
- deprecated.append(iri)
52
-
53
- ########################
54
- # Properties
55
- ########################
56
-
57
- data_properties = []
58
- object_properties = []
59
-
60
- for p in response.json()["iriList"]["dataProperties"]:
61
- data_properties.append(p)
62
-
63
- for p in response.json()["iriList"]["objectProperties"]:
64
- object_properties.append(p)
65
-
66
- for o in response.json()["importedIRIs"]:
67
- for p in o["dataProperties"]:
68
- data_properties.append(p)
69
-
70
- for o in response.json()["importedIRIs"]:
71
- for p in o["objectProperties"]:
72
- object_properties.append(p)
73
-
74
- all_properties = data_properties + object_properties
75
-
76
- ########################
77
- # Classes and Domains
78
- ########################
79
-
80
- domains = {}
81
- concepts = {}
82
- prop_domains = {}
83
-
84
- for ont in _get_classes_by_ont(ont_record_id, mobi_config):
85
- for c in ont["classes"]:
86
- if c not in deprecated:
87
- d = _domain_from_uri(c, mobi_config)
88
- c_lu = _fix_uri(c, "concept", mobi_config)
89
-
90
- if d not in domains:
91
- domains[d] = {"name": d, "concepts": [], "color": ""}
92
-
93
- #Add Leaf Ontology Concepts
94
- loc = _parent_uri_from_uri(c)
95
- loc_lu = _fix_uri(loc, "concept", mobi_config)
96
- if loc_lu not in concepts:
97
- name = _name_from_uri(loc, mobi_config)
98
- concepts[loc_lu] = {"label": name, "domainName": d, "name": name, "uri": loc_lu, "properties": [], "relations": [], "inheritedConcepts": []}
99
-
100
- #Add Class Concepts
101
- name = _name_from_uri(c, mobi_config)
102
- concepts[c_lu] = {"label": name, "domainName": d, "name": name, "uri": c_lu, "properties": [], "relations": [], "inheritedConcepts": [loc_lu]}
103
-
104
- try:
105
- resp_data = response.json()["classToAssociatedProperties"]
106
- except requests.exceptions.JSONDecodeError:
107
- resp_data = {}
108
-
109
- class_to_props = resp_data
110
-
111
- props_with_domain = []
112
- for c in class_to_props:
113
- for p in class_to_props[c]:
114
- props_with_domain.append(p)
115
-
116
- #for p in resp_data:
117
- for p in all_properties:
118
- if p not in props_with_domain:
119
- leaf_ont_concept = _parent_uri_from_uri(p)
120
- if leaf_ont_concept not in class_to_props:
121
- class_to_props[leaf_ont_concept] = []
122
- class_to_props[leaf_ont_concept].append(p)
123
-
124
- for c in class_to_props:
125
- if c not in deprecated:
126
- for p in class_to_props[c]:
127
- #loc_lu = _fix_uri(c, "concept", mobi_config)
128
-
129
- if p in prop_ranges:
130
- range = prop_ranges[p][0]
131
- if range not in ["http://www.w3.org/2001/XMLSchema#string", "http://www.w3.org/2001/XMLSchema#number", "http://www.w3.org/2001/XMLSchema#boolean", "http://www.w3.org/2001/XMLSchema#dateTime"]:
132
- range = "http://www.w3.org/2001/XMLSchema#string"
133
- else:
134
- range = "http://www.w3.org/2001/XMLSchema#string"
135
-
136
- #range_lu = _fix_uri(prop_ranges[p][0], "concept", mobi_config)
137
-
138
- #if p in prop_domains:
139
- #for dc in prop_domains[p]:
140
- #cp = dc + "/" + _label_from_uri(p)
141
- cp = c + "/" + _label_from_uri(p)
142
- #dc_lu = _fix_uri(dc, "concept", mobi_config)
143
- dc_lu = _fix_uri(c, "concept", mobi_config)
144
- if p in data_properties:
145
- prop = {"label": _label_from_uri(p), "uri": _fix_uri(cp, "prop", mobi_config), "conceptUri": dc_lu, "propTypeUri": range, "dataClassTags": []}
146
- if dc_lu in concepts:
147
- if prop not in concepts[dc_lu]["properties"]:
148
- concepts[dc_lu]["properties"].append(prop)
149
- if p in object_properties:
150
- if p in prop_ranges:
151
- range_lu = _fix_uri(prop_ranges[p][0], "concept", mobi_config)
152
- prop = {"label": _label_from_uri(p), "uri": _fix_uri(cp, "prop", mobi_config), "conceptUri": dc_lu, "relationTypeUri": range_lu, "dataClassTags": []}
153
- if dc_lu in concepts:
154
- if range_lu in concepts:
155
- if prop not in concepts[dc_lu]["relations"]:
156
- concepts[dc_lu]["relations"].append(prop)
157
- else:
158
- print("PROPERTY RANGE MISSING", p)
159
-
160
- try:
161
- resp_data = response.json()["classHierarchy"]["childMap"]
162
- except requests.exceptions.JSONDecodeError:
163
- resp_data = {}
164
-
165
- for c in resp_data:
166
- c_lu = _fix_uri(c, "concept", mobi_config)
167
- for pc in resp_data[c]:
168
- pc_lu = _fix_uri(pc, "concept", mobi_config)
169
- if c not in deprecated:
170
- if c_lu in concepts:
171
- concepts[c_lu]["inheritedConcepts"].append(pc_lu)
172
- else:
173
- print("RELATION TARGET MISSING", c_lu)
174
-
175
- empty_leaf_concepts = []
176
- for c in concepts:
177
- if c.split("#")[1][0] == "_":
178
- if len(concepts[c]["properties"]) == 0 and len(concepts[c]["relations"]) == 0:
179
- empty_leaf_concepts.append(c)
180
- for cc in concepts:
181
- if c in concepts[cc]["inheritedConcepts"]:
182
- concepts[cc]["inheritedConcepts"].remove(c)
183
- else:
184
- print("KEEPING LEAF ONTOLOGY", c)
185
- for c in empty_leaf_concepts:
186
- print("REMOVING LEAF ONTOLOGY", c)
187
- del concepts[c]
188
-
189
- tenant = {"solutionId": 0, "model": {"name": "AssetModel", "uri": "http://kobai/" + mobi_config.default_tenant_id + "/AssetModel"}, "tenantId": mobi_config.default_tenant_id, "domains": []}
190
- tenant_encoded = {"solutionId": 0, "model": {"name": "AssetModel", "uri": "http://kobai/" + mobi_config.default_tenant_id + "/AssetModel"}, "tenantId": mobi_config.default_tenant_id, "domains": []}
191
- _add_empty_tenant_metadata(tenant)
192
- _add_empty_tenant_metadata(tenant_encoded)
193
-
194
- di = 0
195
- for dk, d in domains.items():
196
- d['id'] = di
197
- d['color'] = "#" + str(randrange(222222, 888888))
198
- d_encoded = {}
199
- d_encoded['id'] = di
200
- d_encoded['color'] = "#" + str(randrange(222222, 888888))
201
- d_encoded['name'] = dk
202
-
203
- for _, c in concepts.items():
204
- if dk == c['domainName']:
205
- cprime = {"uri": c['uri'], "label": c['label'], "relations": c['relations'], "properties": c['properties'], "inheritedConcepts": c['inheritedConcepts']}
206
- d['concepts'].append(cprime)
207
- encodedConcepts = base64.b64encode(json.dumps(d['concepts']).encode('ascii')).decode('ascii')
208
- d_encoded['concepts'] = encodedConcepts
209
- tenant['domains'].append(d)
210
- tenant_encoded['domains'].append(d_encoded)
211
- di += 1
212
-
213
- return tenant, tenant_encoded
214
-
215
- def _get_classes_by_ont(ont_record_id, mobi_config):
216
- api_url = mobi_config.mobi_api_url + "/ontologies/" + urllib.parse.quote_plus(ont_record_id) + "/imported-classes"
217
- response = requests.get(api_url, verify=False, timeout=5000, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
218
-
219
- data = []
220
- try:
221
- resp_data = response.json()
222
- except requests.exceptions.JSONDecodeError:
223
- resp_data = []
224
-
225
- #for ont in response.json():
226
- for ont in resp_data:
227
- record = {}
228
- record["id"] = _trim_trailing_slash(ont["id"])
229
- record["classes"] = []
230
- for c in ont["classes"]:
231
- record["classes"].append(c)
232
- data.append(record)
233
-
234
- api_url = mobi_config.mobi_api_url + "/ontologies/" + urllib.parse.quote_plus(ont_record_id) + "/classes"
235
- response = requests.get(api_url, verify=False, timeout=5000, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
236
-
237
- try:
238
- resp_data = response.json()
239
- except requests.exceptions.JSONDecodeError:
240
- resp_data = []
241
-
242
- record = {}
243
- if len(resp_data) == 0:
244
- return data
245
- record["id"] = _parent_uri_from_uri(_trim_trailing_slash(response.json()[0]["@id"]))
246
- record["classes"] = []
247
-
248
- for c in resp_data:
249
- record["classes"].append(c["@id"])
250
- data.append(record)
251
-
252
- return data
253
-
254
- def _get_ont_record_by_name(name, mobi_config):
255
- api_url = mobi_config.mobi_api_url + "/catalogs/" + urllib.parse.quote_plus(mobi_config.catalog_name) + "/records"
256
- response = requests.get(api_url, verify=False, timeout=5000, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
257
-
258
- ont_record_id = ""
259
- for r in response.json():
260
- if r["http://purl.org/dc/terms/title"][0]["@value"] == name:
261
- ont_record_id = r["@id"]
262
- return ont_record_id
263
-
264
- def _get_ont_record_by_url(url, mobi_config):
265
- api_url = mobi_config.mobi_api_url + "/catalogs/" + urllib.parse.quote_plus(mobi_config.catalog_name) + "/records"
266
- response = requests.get(api_url, verify=False, timeout=5000, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
267
-
268
- ont_record_id = ""
269
- for r in response.json():
270
- for u in r["http://mobi.com/ontologies/ontology-editor#ontologyIRI"]:
271
- if url == _trim_trailing_slash(u["@id"]):
272
- ont_record_id = r["@id"]
273
- return ont_record_id
274
-
275
- def _add_empty_tenant_metadata(tenant):
276
- tenant["dataAccessTags"] = []
277
- tenant["conceptAccessTags"] = []
278
- tenant["dataSources"] = []
279
- tenant["dataSets"] = []
280
- tenant["collections"] = []
281
- tenant["visualizations"] = []
282
- tenant["queries"] = []
283
- tenant["mappingDefs"] = []
284
- tenant["dataSourceFileKeys"] = []
285
- tenant["apiQueryProfiles"] = []
286
- tenant["collectionVizs"] = []
287
- tenant["collectionVizOrders"] = []
288
- tenant["queryDataTags"] = []
289
- tenant["queryCalcs"] = []
290
- tenant["dataSourceSettings"] = []
291
- tenant["publishedAPIs"] = []
292
- tenant["scenarios"] = []
293
-
294
- ##############################
295
- # Mobi Replace
296
- ##############################
297
-
298
- def replace_tenant_to_mobi(kobai_tenant, top_level_ontology, mobi_config: MobiSettings):
299
- json_ld = _create_jsonld(kobai_tenant, top_level_ontology)
300
- _post_model(json_ld, top_level_ontology, mobi_config)
301
-
302
- def replace_tenant_to_file(kobai_tenant, top_level_ontology):
303
- return _create_jsonld(kobai_tenant, top_level_ontology)
304
-
305
- def _create_jsonld(kobai_tenant, top_level_ontology):
306
- output_json = []
307
- uri = kobai_tenant["model"]["uri"]
308
- uri = uri.replace("AssetModel", top_level_ontology)
309
-
310
- group = {
311
- "@id": uri,
312
- "@type": ["http://www.w3.org/2002/07/owl#Ontology"],
313
- "http://purl.org/dc/terms/description": [{"@value": "This model was exported from Kobai."}],
314
- "http://purl.org/dc/terms/title": [{"@value": top_level_ontology}]
315
- }
316
- output_json.append(group)
317
-
318
-
319
- for dom in kobai_tenant["domains"]:
320
- for con in dom["concepts"]:
321
-
322
- group = {
323
- "@id": con["uri"].replace("AssetModel", top_level_ontology),
324
- "@type": ["http://www.w3.org/2002/07/owl#Class"],
325
- "http://purl.org/dc/terms/title": [{"@value": con["label"]}]
326
- }
327
- if len(con["inheritedConcepts"]) > 0:
328
- group["http://www.w3.org/2000/01/rdf-schema#subClassOf"] = []
329
- for parent in con["inheritedConcepts"]:
330
- group["http://www.w3.org/2000/01/rdf-schema#subClassOf"].append(
331
- {"@id": parent.replace("AssetModel", top_level_ontology)}
332
- )
333
- output_json.append(group)
334
-
335
- for prop in con["properties"]:
336
- group = {
337
- "@id": prop["uri"].replace("AssetModel", top_level_ontology),
338
- "@type": ["http://www.w3.org/2002/07/owl#DatatypeProperty"],
339
- "http://purl.org/dc/terms/title": [{"@value": prop["label"]}],
340
- "http://www.w3.org/2000/01/rdf-schema#domain": [{"@id": con["uri"].replace("AssetModel", top_level_ontology)}],
341
- "http://www.w3.org/2000/01/rdf-schema#range": [{"@id": prop["propTypeUri"]}]
342
- }
343
- output_json.append(group)
344
-
345
- for rel in con["relations"]:
346
- group = {
347
- "@id": rel["uri"].replace("AssetModel", top_level_ontology),
348
- "@type": ["http://www.w3.org/2002/07/owl#ObjectProperty"],
349
- "http://purl.org/dc/terms/title": [{"@value": rel["label"]}],
350
- "http://www.w3.org/2000/01/rdf-schema#domain": [{"@id": con["uri"].replace("AssetModel", top_level_ontology)}],
351
- "http://www.w3.org/2000/01/rdf-schema#range": [{"@id": rel["relationTypeUri"].replace("AssetModel", top_level_ontology)}]
352
- }
353
- output_json.append(group)
354
- return output_json
355
-
356
- def _post_model(tenant_json, top_level_ontology, mobi_config):
357
-
358
- mp = MultipartEncoder(fields={
359
- "title": top_level_ontology,
360
- "description": "This model was exported from Kobai.",
361
- "json": json.dumps(tenant_json)
362
- })
363
- h = {"Content-type": mp.content_type}
364
-
365
- api_url = mobi_config.mobi_api_url + "/ontologies"
366
- response = requests.post(
367
- api_url,
368
- headers = h,
369
- data = mp,
370
- verify=False,
371
- timeout=5000,
372
- auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password)
373
- )
374
- print("Upload Status", response.status_code)
375
- if response.status_code != 201:
376
- print(response.text)
377
-
378
- ##############################
379
- # Mobi Update
380
- ##############################
381
-
382
- def update_tenant(kobai_tenant, top_level_ontology_name, mobi_config: MobiSettings):
383
- record_id = _get_ont_record_by_name(top_level_ontology_name, mobi_config)
384
-
385
- api_url = mobi_config.mobi_api_url + "/catalogs/" + urllib.parse.quote_plus(mobi_config.catalog_name) + "/records/" + urllib.parse.quote_plus(record_id) + "/branches"
386
- response = requests.get(api_url, verify=False, timeout=5000, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
387
-
388
- classes = _get_classes_by_ont(record_id, mobi_config)
389
- ontology_by_class = {}
390
- for o in classes:
391
- for c in o["classes"]:
392
- ontology_by_class[c] = o["id"]
393
-
394
- _, mobi_tenant = get_tenant(top_level_ontology_name, mobi_config)
395
-
396
- change_json = _compare_tenants(kobai_tenant, mobi_tenant, classes, mobi_config)
397
-
398
- #################################
399
- # Apply changes with Mobi API calls
400
- #################################
401
-
402
- for o in change_json:
403
- if not change_json[o]["changed"]:
404
- continue
405
-
406
- ont_record_id = _get_ont_record_by_url(o, mobi_config)
407
- if ont_record_id == "":
408
- continue
409
-
410
- branch_id = _get_or_create_branch_by_record(ont_record_id, "kobai_dev", mobi_config)
411
- master_branch_id = _get_or_create_branch_by_record(ont_record_id, "MASTER", mobi_config)
412
-
413
- for change in change_json[o]["class"]:
414
- _stage_changes([change["mobi"]], ont_record_id, mobi_config)
415
- _commit_changes("Kobai added class " + change["mobi"]["http://purl.org/dc/terms/title"][0]["@value"], ont_record_id, branch_id, mobi_config)
416
- for change in change_json[o]["property"]:
417
- _stage_changes([change["mobi"]], ont_record_id, mobi_config)
418
- _commit_changes("Kobai added property " + change["mobi"]["http://purl.org/dc/terms/title"][0]["@value"], ont_record_id, branch_id, mobi_config)
419
-
420
- api_url = mobi_config.mobi_api_url + "/merge-requests"
421
- pd = {"title": "Kobai Change from kobai-dev to master", "recordId": ont_record_id, "sourceBranchId": branch_id, "targetBranchId": master_branch_id, "assignees": ["admin"], "removeSource": "true"}
422
- response = requests.post(api_url, verify=False, timeout=5000, params=pd, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
423
- #print(response.status_code)
424
-
425
- def _compare_tenants(kobai_tenant, mobi_tenant, classes, mobi_config):
426
- existing_concepts = []
427
- existing_relations = {}
428
- existing_properties = {}
429
-
430
- for dom in mobi_tenant["domains"]:
431
- conText = base64.b64decode(dom['concepts']).decode('UTF-8')
432
- cons = json.loads(conText)
433
- for con in cons:
434
- existing_concepts.append(con["uri"])
435
- existing_properties[con["uri"]] = []
436
- for prop in con["properties"]:
437
- existing_properties[con["uri"]].append(prop["uri"])
438
- existing_relations[con["uri"]] = []
439
- for rel in con["relations"]:
440
- existing_relations[con["uri"]].append(rel["uri"])
441
-
442
- new_concepts = []
443
- new_relations = {}
444
- new_properties = {}
445
-
446
- tenantId = kobai_tenant["tenantId"]
447
- for dom in kobai_tenant["domains"]:
448
- conText = base64.b64decode(dom['concepts']).decode('UTF-8')
449
- cons = json.loads(conText)
450
- for con in cons:
451
- con_d = con["uri"].replace(tenantId, mobi_config.default_tenant_id)
452
- if con_d not in existing_concepts:
453
- print("New Class Detected")
454
- new_concepts.append(con)
455
- for prop in con["properties"]:
456
- prop_d = prop["uri"].replace(tenantId, mobi_config.default_tenant_id)
457
- if con_d in existing_properties:
458
- if prop_d not in existing_properties[con_d]:
459
- print("New Prop Detected")
460
- if con_d not in new_properties:
461
- new_properties[con_d] = []
462
- new_properties[con_d].append(prop)
463
- print(prop)
464
- else:
465
- print("New Property due to New Concept")
466
- for rel in con["relations"]:
467
- rel_d = rel["uri"].replace(tenantId, mobi_config.default_tenant_id)
468
- if con_d in existing_relations:
469
- if rel_d not in existing_relations[con_d]:
470
- print("New Rel Detected")
471
- if con_d not in new_relations:
472
- new_relations[con_d] = []
473
- new_relations[con_d].append(rel)
474
- else:
475
- print("New Relation due to New Concept")
476
-
477
- change_json = {}
478
- for o in classes:
479
- ont_exist = o["id"]
480
- change_json[ont_exist] = {}
481
- change_json[ont_exist]["exists"] = True
482
- change_json[ont_exist]["changed"] = False
483
- change_json[ont_exist]["class"] = []
484
- change_json[ont_exist]["property"] = []
485
- change_json[ont_exist]["relation"] = []
486
-
487
- #################################
488
- # Identify and capture changes associated to Mobi ontology
489
- #################################
490
- for c in new_concepts:
491
- c_d = c["uri"].replace(tenantId, mobi_config.default_tenant_id)
492
- ont_sig = c_d.replace("http://kobai/" + mobi_config.default_tenant_id + "/AssetModel/", "").replace("#", "/").replace("_", "/")
493
- ont_sig = "/".join(ont_sig.split("/")[:-1])
494
- ont = ""
495
- for o in classes:
496
- ont_exist = o["id"]
497
- if ont_sig == _get_ont_sig_from_ont(ont_exist, len(ont_sig.split("/"))):
498
- change_json[ont_exist]["class"].append({"type": "new", "kobai": c, "mobi": {}})
499
-
500
- for c in new_properties:
501
- for p in new_properties[c]:
502
- ont_sig = _get_ont_sig_from_concept(tenantId, c, mobi_config)
503
- for o in classes:
504
- ont_exist = o["id"]
505
- if ont_sig == _get_ont_sig_from_ont(ont_exist, len(ont_sig.split("/"))):
506
- change_json[ont_exist]["property"].append({"type": "new", "kobai": p, "mobi": {}})
507
-
508
- for c in new_relations:
509
- for r in new_properties[c]:
510
- ont_sig = _get_ont_sig_from_concept(tenantId, c, mobi_config)
511
- for o in classes:
512
- ont_exist = o["id"]
513
- if ont_sig == _get_ont_sig_from_ont(ont_exist, len(ont_sig.split("/"))):
514
- change_json[ont_exist]["relation"].append({"type": "new", "kobai": r, "mobi": {}})
515
-
516
- #################################
517
- # Generate Mobi json for every change
518
- #################################
519
- for ont in change_json:
520
- changed = False
521
- for i, change in enumerate(change_json[ont]["class"]):
522
- c = change["kobai"]
523
- c_json = {}
524
- c_json["@id"] = ont + "/" + c["label"].split("_")[-1]
525
- c_json["@type"] = [ "http://www.w3.org/2002/07/owl#Class" ]
526
- c_json["http://www.w3.org/2000/01/rdf-schema#label"] = [{"@value": c["label"].split("_")[-1]}]
527
- c_json["http://purl.org/dc/terms/title"] = [{"@value": c["label"].split("_")[-1]}]
528
- c_json["http://www.w3.org/2000/01/rdf-schema#subClassOf"] = []
529
- #c_json["http://www.w3.org/2002/07/owl#deprecated"] = [{"@value": "true", "@type": "http://www.w3.org/2001/XMLSchema#boolean"}]
530
- for pc in c["inheritedConcepts"]:
531
- c_json["http://www.w3.org/2000/01/rdf-schema#subClassOf"].append(pc)
532
- change_json[ont]["class"][i]["mobi"] = c_json
533
- changed = True
534
-
535
- for i, change in enumerate(change_json[ont]["property"]):
536
- p = change["kobai"]
537
- p_json = {}
538
- p_json["@id"] = ont + "/" + p["label"]
539
- p_json["@type"] = [ "http://www.w3.org/2002/07/owl#Class" ]
540
- p_json["http://www.w3.org/2000/01/rdf-schema#label"] = [{"@value": p["label"]}]
541
- p_json["http://purl.org/dc/terms/title"] = [{"@value": p["label"]}]
542
- p_json["http://www.w3.org/2000/01/rdf-schema#domain"] = [{"@id": ont + "/" + _get_concept_name_from_prop_uri(p["uri"])}]
543
- p_json["http://www.w3.org/2000/01/rdf-schema#range"] = [{"@id": p["propTypeUri"]}]
544
- change_json[ont]["property"][i]["mobi"] = p_json
545
- changed = True
546
-
547
- if changed is True:
548
- change_json[ont]["changed"] = True
549
-
550
- return change_json
551
-
552
- def _stage_changes(changes, ont_record_id, mobi_config):
553
- api_url = mobi_config.mobi_api_url + "/catalogs/" + urllib.parse.quote_plus(mobi_config.catalog_name) + "/records/" + urllib.parse.quote_plus(ont_record_id) + "/in-progress-commit"
554
- response = requests.delete(api_url, verify=False, timeout=5000, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
555
-
556
- api_url = mobi_config.mobi_api_url + "/catalogs/" + urllib.parse.quote_plus(mobi_config.catalog_name) + "/records/" + urllib.parse.quote_plus(ont_record_id) + "/in-progress-commit"
557
- m = MultipartEncoder(fields={"additions": json.dumps(changes), "deletions": "[]"})
558
- h = {"Content-type": m.content_type}
559
- response = requests.put(api_url, verify=False, timeout=5000, data=m, headers=h, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
560
-
561
- api_url = mobi_config.mobi_api_url + "/catalogs/" + urllib.parse.quote_plus(mobi_config.catalog_name) + "/records/" + urllib.parse.quote_plus(ont_record_id) + "/in-progress-commit"
562
- response = requests.get(api_url, verify=False, timeout=5000, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
563
-
564
- def _commit_changes(message, ont_record_id, branch_id, mobi_config):
565
- api_url = mobi_config.mobi_api_url + "/catalogs/" + urllib.parse.quote_plus(mobi_config.catalog_name) + "/records/" + urllib.parse.quote_plus(ont_record_id) + "/branches/" + urllib.parse.quote_plus(branch_id) + "/commits"
566
- pd = {"message": message}
567
- response = requests.post(api_url, verify=False, timeout=5000, params=pd, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
568
-
569
- ##############################
570
- # Mobi Branch
571
- ##############################
572
-
573
- #def jprint(data):
574
- # json_str = json.dumps(data, indent=4)
575
-
576
- def _get_branches_by_record(id, mobi_config):
577
- api_url = mobi_config.mobi_api_url + "/catalogs/" + urllib.parse.quote_plus(mobi_config.catalog_name) + "/records/" + urllib.parse.quote_plus(id) + "/branches"
578
- response = requests.get(api_url, verify=False, timeout=5000, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
579
- return response.json()
580
-
581
- def _get_or_create_branch_by_record(id, name, mobi_config):
582
- branches = _get_branches_by_record(id, mobi_config)
583
- for b in branches:
584
- if b["http://purl.org/dc/terms/title"][0]["@value"] == name:
585
- return b["@id"]
586
-
587
- commit = ""
588
- for b in branches:
589
- if b["http://purl.org/dc/terms/title"][0]["@value"] == "MASTER":
590
- commit = b["http://mobi.com/ontologies/catalog#head"][0]["@id"]
591
-
592
- api_url = mobi_config.mobi_api_url + "/catalogs/" + urllib.parse.quote_plus(mobi_config.catalog_name) + "/records/" + urllib.parse.quote_plus(id) + "/branches"
593
- pd = {"type": "http://mobi.com/ontologies/catalog#Branch", "title": name, "commitId": commit}
594
- requests.post(api_url, verify=False, timeout=5000, params=pd, auth=requests.auth.HTTPBasicAuth(mobi_config.mobi_username, mobi_config.mobi_password))
595
-
596
- branches = _get_branches_by_record(id, mobi_config)
597
- for b in branches:
598
- if b["http://purl.org/dc/terms/title"][0]["@value"] == name:
599
- return b["@id"]
600
- return ""
601
-
602
- ##############################
603
- # Mobi Parse
604
- ##############################
605
- def _get_domain_range(url, mobi_config):
606
- for d, r in mobi_config.domain_extraction.items():
607
- if d in url:
608
- return r
609
- return {"min": 0, "max": 0}
610
-
611
- def _parent_uri_from_uri(uri):
612
- #return "/".join(uri.split("/")[:-1])
613
- return "/".join(_uri_split(uri)[:-1])
614
-
615
- def _trim_trailing_slash(uri):
616
- if uri[-1] == "/":
617
- return uri[:-1]
618
- else:
619
- return uri
620
-
621
- def _uri_split(uri):
622
- uri = uri.replace("#", "/")
623
- return uri.split("/")
624
-
625
-
626
- ################################
627
- # Transform from Kobai to Mobi
628
- ################################
629
-
630
- def _get_ont_sig_from_concept(tenantId, uri, mobi_config):
631
- uri = uri.replace(tenantId, mobi_config.default_tenant_id)
632
- ont_sig = uri.replace("http://kobai/" + mobi_config.default_tenant_id + "/AssetModel/", "").replace("#", "/").replace("_", "/")
633
- ont_sig = "/".join(ont_sig.split("/")[:-1])
634
- return ont_sig
635
-
636
- def _get_ont_sig_from_ont(uri, length):
637
- return "/".join(uri.split("/")[-length:])
638
-
639
- def _get_concept_name_from_prop_uri(uri):
640
- return uri.split("#")[0].split("/")[-1]
641
- #return uri.split("#")[0].split("_")[-1]
642
-
643
- ################################
644
- # Transform from Mobi to Kobai
645
- ################################
646
-
647
- def _domain_from_uri(uri, mobi_config):
648
- #domain = "_".join(uri.split("/")[_get_domain_range(uri, mobi_config)['min']:_get_domain_range(uri, mobi_config)['max']+1])
649
- domain = "_".join(_uri_split(uri)[_get_domain_range(uri, mobi_config)['min']:_get_domain_range(uri, mobi_config)['max']+1])
650
- return domain
651
-
652
- def _name_from_uri(uri, mobi_config):
653
- #name = "_".join(uri.split("/")[_get_domain_range(uri, mobi_config)['max']+1:])
654
- name = "_".join(_uri_split(uri)[_get_domain_range(uri, mobi_config)['max']+1:])
655
- if name == "":
656
- #name = "_" + "_".join(uri.split("/")[_get_domain_range(uri, mobi_config)['max']:])
657
- name = "_" + "_".join(_uri_split(uri)[_get_domain_range(uri, mobi_config)['max']:])
658
- return name
659
-
660
- def _label_from_uri(uri):
661
- #return uri.split("/")[-1]
662
- return _uri_split(uri)[-1]
663
-
664
- def _fix_uri(uri, type, mobi_config):
665
- domain = _domain_from_uri(uri, mobi_config)
666
- name = _name_from_uri(uri, mobi_config)
667
-
668
- top = "/".join(uri.split("/")[0:_get_domain_range(uri, mobi_config)['min']])
669
-
670
- if type == "concept":
671
- uri = domain + "#" + name
672
- elif type == "prop":
673
- uri = domain + "/" + "_".join(name.split("_")[0:-1]) + "#" + name.split("_")[-1]
674
-
675
- for d in mobi_config.domain_extraction:
676
-
677
- if d in top:
678
- uri = "http://kobai/" + mobi_config.default_tenant_id + "/AssetModel/" + uri
679
-
680
- return uri
681
-
682
-
kobai/mobi_config.py DELETED
@@ -1,16 +0,0 @@
1
- from pydantic_settings import BaseSettings
2
-
3
- class MobiSettings(BaseSettings):
4
-
5
- #Application Specific Settings
6
- domain_extraction: dict = {}
7
-
8
- #Mobi Server Settings
9
- mobi_api_url: str = "https://localhost:8443/mobirest"
10
- mobi_username: str = "admin"
11
- mobi_password: str = "admin"
12
-
13
- catalog_name: str = "http://mobi.com/catalog-local"
14
- default_tenant_id: str = "00000000-0000-0000-0000-000000000000"
15
-
16
- #settings = MobiSettings()
@@ -1,16 +0,0 @@
1
- kobai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- kobai/ai_query.py,sha256=xZh_OyakU01gIrnzaW4v_TdfzG51nPu0ntXJw1WEnvw,9424
3
- kobai/ai_rag.py,sha256=8B3HM4GoGVrgxJG678NN4vGaDwZRYnQiK5SCGiMIYkM,15186
4
- kobai/databricks_client.py,sha256=fyqqMly2Qm0r1AHWsQjkYeNsDdH0G1JSgTkF9KJ55qA,2118
5
- kobai/demo_tenant_client.py,sha256=wlNc-bdI2wotRXo8ppUOalv4hYdBlek_WzJNARZV-AE,9293
6
- kobai/mobi.py,sha256=FKIyVcmDoQetH00Hg9ajOPmO029IGpL1sttgwvB8-Pc,29862
7
- kobai/mobi_config.py,sha256=BpW1cn7BgF5iTYz_bB0HMpXflfql0PYlQ61Y_Uh1Vns,454
8
- kobai/ms_authenticate.py,sha256=rlmhtvAaSRBlYmvIBy5epMVa4MBGBLPaMwawu1T_xDQ,2252
9
- kobai/spark_client.py,sha256=opM_F-4Ut5Hq5zZjWMuLvUps9sDULvyPNZHXGL8dW1k,776
10
- kobai/tenant_api.py,sha256=Q5yuFd9_V4lo3LWzvYEEO3LpDRWFgQD4TlRPXDTGbiE,4368
11
- kobai/tenant_client.py,sha256=Zb9XKOM23u311nbnBaB8JQ72nrPVA7LyOmMn526rC7U,43870
12
- kobai_sdk-0.3.0rc2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
13
- kobai_sdk-0.3.0rc2.dist-info/METADATA,sha256=g-g_YgrjB32fVx_gsE9uPpmZXm2B6iBMehsVycgAj7o,19263
14
- kobai_sdk-0.3.0rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- kobai_sdk-0.3.0rc2.dist-info/top_level.txt,sha256=ns1El3BrTTHKvoAgU1XtiSaVIudYeCXbEEUVY8HFDZ4,6
16
- kobai_sdk-0.3.0rc2.dist-info/RECORD,,