kobai-sdk 0.2.10__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kobai-sdk might be problematic. Click here for more details.
- {kobai_sdk-0.2.10/kobai_sdk.egg-info → kobai_sdk-0.3.0}/PKG-INFO +58 -54
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/README.md +57 -52
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai/ai_query.py +25 -22
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai/ai_rag.py +7 -16
- kobai_sdk-0.3.0/kobai/ms_authenticate.py +66 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai/tenant_api.py +5 -2
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai/tenant_client.py +46 -73
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0/kobai_sdk.egg-info}/PKG-INFO +58 -54
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai_sdk.egg-info/SOURCES.txt +1 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai_sdk.egg-info/requires.txt +0 -1
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/pyproject.toml +2 -3
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/LICENSE +0 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/MANIFEST.in +0 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai/__init__.py +0 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai/databricks_client.py +0 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai/demo_tenant_client.py +0 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai/spark_client.py +0 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai_sdk.egg-info/dependency_links.txt +0 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/kobai_sdk.egg-info/top_level.txt +0 -0
- {kobai_sdk-0.2.10 → kobai_sdk-0.3.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kobai-sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A package that enables interaction with a Kobai tenant.
|
|
5
5
|
Author-email: Ryan Oattes <ryan@kobai.io>
|
|
6
6
|
License: Apache License
|
|
@@ -223,7 +223,6 @@ Requires-Dist: langchain-core
|
|
|
223
223
|
Requires-Dist: langchain-community
|
|
224
224
|
Requires-Dist: langchain_openai
|
|
225
225
|
Requires-Dist: databricks_langchain
|
|
226
|
-
Requires-Dist: sentence-transformers
|
|
227
226
|
Provides-Extra: dev
|
|
228
227
|
Requires-Dist: black; extra == "dev"
|
|
229
228
|
Requires-Dist: bumpver; extra == "dev"
|
|
@@ -249,21 +248,50 @@ from kobai import tenant_client, spark_client, databricks_client
|
|
|
249
248
|
|
|
250
249
|
schema = 'main.demo'
|
|
251
250
|
uri = 'https://demo.kobai.io'
|
|
252
|
-
tenant_id = '1'
|
|
253
251
|
tenant_name = 'My Demo Tenant'
|
|
254
|
-
|
|
255
|
-
k = tenant_client.TenantClient(tenant_name, tenant_id, uri, schema)
|
|
252
|
+
k = tenant_client.TenantClient(tenant_name, uri, schema)
|
|
256
253
|
```
|
|
257
254
|
|
|
258
255
|
2. Authenticate with the Kobai instance:
|
|
256
|
+
Authentication can be performed using different methods, such as device code flow, on-behalf-of flow, or browser-based tokens.
|
|
257
|
+
|
|
258
|
+
#### Authentication via device code
|
|
259
|
+
Step 1: Obtain the access token from IDM (Identity and Access Management)
|
|
259
260
|
|
|
260
261
|
```python
|
|
261
|
-
|
|
262
|
+
from kobai import ms_authenticate
|
|
263
|
+
|
|
262
264
|
tenant_id = 'your_Entra_directory_id_here'
|
|
265
|
+
client_id = 'your_Entra_app_id_here'
|
|
266
|
+
|
|
267
|
+
access_token = ms_authenticate.device_code(tenant_id, client_id)
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
Step 2: Use the token to retrieve the list of Kobai tenants (unless the tenant ID is already known).
|
|
263
271
|
|
|
264
|
-
|
|
272
|
+
```python
|
|
273
|
+
tenants = k.get_tenants(id_token=access_token)
|
|
274
|
+
print(tenants)
|
|
265
275
|
```
|
|
266
276
|
|
|
277
|
+
Step 3: Authenticate with Kobai for the specific tenant using the IDM access token.
|
|
278
|
+
|
|
279
|
+
```python
|
|
280
|
+
kobai_tenant_id = "5c1ba715-3961-4835-8a10-6f6f963b53ff"
|
|
281
|
+
k.use_access_token(access_token = access_token, tenant_id=kobai_tenant_id)
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
At this point, authentication to the Kobai tenant is successfully completed.
|
|
285
|
+
|
|
286
|
+
#### Authentication via browser token
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
#### Authentication via on-behalf-of flow
|
|
293
|
+
The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
|
|
294
|
+
|
|
267
295
|
3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
|
|
268
296
|
|
|
269
297
|
```python
|
|
@@ -305,68 +333,41 @@ kobai_query_name = "Set ownership"
|
|
|
305
333
|
question_json = k.run_question_remote(k.get_question_id(kobai_query_name)) # By questionName
|
|
306
334
|
```
|
|
307
335
|
|
|
308
|
-
3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using
|
|
309
|
-
|
|
310
|
-
#### Using Azure OpenAI
|
|
311
|
-
|
|
312
|
-
###### Authentication Methods:
|
|
313
|
-
|
|
314
|
-
1. ApiKey
|
|
315
|
-
|
|
316
|
-
```python
|
|
317
|
-
from kobai import ai_query, llm_config
|
|
318
|
-
import json
|
|
319
|
-
|
|
320
|
-
followup_question = "Which owner owns the most sets?"
|
|
321
|
-
|
|
322
|
-
llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", api_key="YOUR_API_KEY", deployment="gpt-4o-mini", llm_provider="azure_openai")
|
|
323
|
-
|
|
324
|
-
output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
|
|
325
|
-
print(output)
|
|
326
|
-
```
|
|
327
|
-
|
|
328
|
-
2. Azure Active Directory Authentication
|
|
336
|
+
3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using the user-provided chat and embedding model.
|
|
329
337
|
|
|
330
|
-
|
|
331
|
-
|
|
338
|
+
#### Using Databricks Embeddings and Chat Models in a Databricks Notebook
|
|
339
|
+
Initialize the AI components by specifying the embedding and chat models, then proceed with follow-up questions for interactive engagement.
|
|
332
340
|
|
|
333
341
|
```python
|
|
334
|
-
from
|
|
342
|
+
from databricks_langchain import DatabricksEmbeddings
|
|
343
|
+
from langchain_community.chat_models import ChatDatabricks
|
|
335
344
|
import json
|
|
336
345
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
|
|
343
|
-
print(output)
|
|
344
|
-
```
|
|
345
|
-
|
|
346
|
-
#### Using Databricks (Default Configuration)
|
|
347
|
-
|
|
348
|
-
```python
|
|
349
|
-
from kobai import ai_query, llm_config
|
|
350
|
-
import json
|
|
346
|
+
# choose the embedding and chat model of your choice from the databricks serving and initialize.
|
|
347
|
+
embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
|
|
348
|
+
chat_model = ChatDatabricks(endpoint="databricks-gpt-oss-20b")
|
|
349
|
+
k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
|
|
351
350
|
|
|
352
351
|
followup_question = "Which owner owns the most sets?"
|
|
353
|
-
|
|
354
|
-
llm_config = llm_config.LLMConfig()
|
|
355
|
-
|
|
356
|
-
output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
|
|
352
|
+
output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
|
|
357
353
|
print(output)
|
|
358
354
|
```
|
|
359
355
|
|
|
360
|
-
####
|
|
356
|
+
#### Using Azure OpenAI Embeddings and Chat Models
|
|
361
357
|
|
|
362
358
|
```python
|
|
363
|
-
from kobai import ai_query, llm_config
|
|
364
|
-
import json
|
|
365
359
|
from langchain_openai import AzureChatOpenAI
|
|
360
|
+
from langchain_openai import AzureOpenAIEmbeddings
|
|
361
|
+
import json
|
|
366
362
|
|
|
367
363
|
followup_question = "Which owner owns the most sets?"
|
|
368
364
|
|
|
369
|
-
|
|
365
|
+
embedding_model = AzureOpenAIEmbeddings(
|
|
366
|
+
model="text-embedding-3-small",
|
|
367
|
+
azure_endpoint="https://kobaipoc.openai.azure.com/",
|
|
368
|
+
api_key="YOUR_API_KEY",
|
|
369
|
+
openai_api_version="2023-05-15"
|
|
370
|
+
)
|
|
370
371
|
|
|
371
372
|
chat_model = AzureChatOpenAI(
|
|
372
373
|
azure_endpoint="https://kobaipoc.openai.azure.com/", azure_deployment="gpt-4o-mini",
|
|
@@ -375,7 +376,10 @@ openai_api_version="2024-02-15-preview",
|
|
|
375
376
|
temperature=0.5,
|
|
376
377
|
max_tokens=150,)
|
|
377
378
|
|
|
378
|
-
|
|
379
|
+
k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
|
|
380
|
+
|
|
381
|
+
followup_question = "Which theme has the most sets?"
|
|
382
|
+
output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
|
|
379
383
|
print(output)
|
|
380
384
|
```
|
|
381
385
|
|
|
@@ -15,21 +15,50 @@ from kobai import tenant_client, spark_client, databricks_client
|
|
|
15
15
|
|
|
16
16
|
schema = 'main.demo'
|
|
17
17
|
uri = 'https://demo.kobai.io'
|
|
18
|
-
tenant_id = '1'
|
|
19
18
|
tenant_name = 'My Demo Tenant'
|
|
20
|
-
|
|
21
|
-
k = tenant_client.TenantClient(tenant_name, tenant_id, uri, schema)
|
|
19
|
+
k = tenant_client.TenantClient(tenant_name, uri, schema)
|
|
22
20
|
```
|
|
23
21
|
|
|
24
22
|
2. Authenticate with the Kobai instance:
|
|
23
|
+
Authentication can be performed using different methods, such as device code flow, on-behalf-of flow, or browser-based tokens.
|
|
24
|
+
|
|
25
|
+
#### Authentication via device code
|
|
26
|
+
Step 1: Obtain the access token from IDM (Identity and Access Management)
|
|
25
27
|
|
|
26
28
|
```python
|
|
27
|
-
|
|
29
|
+
from kobai import ms_authenticate
|
|
30
|
+
|
|
28
31
|
tenant_id = 'your_Entra_directory_id_here'
|
|
32
|
+
client_id = 'your_Entra_app_id_here'
|
|
33
|
+
|
|
34
|
+
access_token = ms_authenticate.device_code(tenant_id, client_id)
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Step 2: Use the token to retrieve the list of Kobai tenants (unless the tenant ID is already known).
|
|
29
38
|
|
|
30
|
-
|
|
39
|
+
```python
|
|
40
|
+
tenants = k.get_tenants(id_token=access_token)
|
|
41
|
+
print(tenants)
|
|
31
42
|
```
|
|
32
43
|
|
|
44
|
+
Step 3: Authenticate with Kobai for the specific tenant using the IDM access token.
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
kobai_tenant_id = "5c1ba715-3961-4835-8a10-6f6f963b53ff"
|
|
48
|
+
k.use_access_token(access_token = access_token, tenant_id=kobai_tenant_id)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
At this point, authentication to the Kobai tenant is successfully completed.
|
|
52
|
+
|
|
53
|
+
#### Authentication via browser token
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
#### Authentication via on-behalf-of flow
|
|
60
|
+
The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
|
|
61
|
+
|
|
33
62
|
3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
|
|
34
63
|
|
|
35
64
|
```python
|
|
@@ -71,68 +100,41 @@ kobai_query_name = "Set ownership"
|
|
|
71
100
|
question_json = k.run_question_remote(k.get_question_id(kobai_query_name)) # By questionName
|
|
72
101
|
```
|
|
73
102
|
|
|
74
|
-
3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using
|
|
75
|
-
|
|
76
|
-
#### Using Azure OpenAI
|
|
77
|
-
|
|
78
|
-
###### Authentication Methods:
|
|
79
|
-
|
|
80
|
-
1. ApiKey
|
|
81
|
-
|
|
82
|
-
```python
|
|
83
|
-
from kobai import ai_query, llm_config
|
|
84
|
-
import json
|
|
85
|
-
|
|
86
|
-
followup_question = "Which owner owns the most sets?"
|
|
87
|
-
|
|
88
|
-
llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", api_key="YOUR_API_KEY", deployment="gpt-4o-mini", llm_provider="azure_openai")
|
|
89
|
-
|
|
90
|
-
output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
|
|
91
|
-
print(output)
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
2. Azure Active Directory Authentication
|
|
103
|
+
3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using the user-provided chat and embedding model.
|
|
95
104
|
|
|
96
|
-
|
|
97
|
-
|
|
105
|
+
#### Using Databricks Embeddings and Chat Models in a Databricks Notebook
|
|
106
|
+
Initialize the AI components by specifying the embedding and chat models, then proceed with follow-up questions for interactive engagement.
|
|
98
107
|
|
|
99
108
|
```python
|
|
100
|
-
from
|
|
109
|
+
from databricks_langchain import DatabricksEmbeddings
|
|
110
|
+
from langchain_community.chat_models import ChatDatabricks
|
|
101
111
|
import json
|
|
102
112
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
|
|
109
|
-
print(output)
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
#### Using Databricks (Default Configuration)
|
|
113
|
-
|
|
114
|
-
```python
|
|
115
|
-
from kobai import ai_query, llm_config
|
|
116
|
-
import json
|
|
113
|
+
# choose the embedding and chat model of your choice from the databricks serving and initialize.
|
|
114
|
+
embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
|
|
115
|
+
chat_model = ChatDatabricks(endpoint="databricks-gpt-oss-20b")
|
|
116
|
+
k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
|
|
117
117
|
|
|
118
118
|
followup_question = "Which owner owns the most sets?"
|
|
119
|
-
|
|
120
|
-
llm_config = llm_config.LLMConfig()
|
|
121
|
-
|
|
122
|
-
output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
|
|
119
|
+
output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
|
|
123
120
|
print(output)
|
|
124
121
|
```
|
|
125
122
|
|
|
126
|
-
####
|
|
123
|
+
#### Using Azure OpenAI Embeddings and Chat Models
|
|
127
124
|
|
|
128
125
|
```python
|
|
129
|
-
from kobai import ai_query, llm_config
|
|
130
|
-
import json
|
|
131
126
|
from langchain_openai import AzureChatOpenAI
|
|
127
|
+
from langchain_openai import AzureOpenAIEmbeddings
|
|
128
|
+
import json
|
|
132
129
|
|
|
133
130
|
followup_question = "Which owner owns the most sets?"
|
|
134
131
|
|
|
135
|
-
|
|
132
|
+
embedding_model = AzureOpenAIEmbeddings(
|
|
133
|
+
model="text-embedding-3-small",
|
|
134
|
+
azure_endpoint="https://kobaipoc.openai.azure.com/",
|
|
135
|
+
api_key="YOUR_API_KEY",
|
|
136
|
+
openai_api_version="2023-05-15"
|
|
137
|
+
)
|
|
136
138
|
|
|
137
139
|
chat_model = AzureChatOpenAI(
|
|
138
140
|
azure_endpoint="https://kobaipoc.openai.azure.com/", azure_deployment="gpt-4o-mini",
|
|
@@ -141,7 +143,10 @@ openai_api_version="2024-02-15-preview",
|
|
|
141
143
|
temperature=0.5,
|
|
142
144
|
max_tokens=150,)
|
|
143
145
|
|
|
144
|
-
|
|
146
|
+
k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
|
|
147
|
+
|
|
148
|
+
followup_question = "Which theme has the most sets?"
|
|
149
|
+
output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
|
|
145
150
|
print(output)
|
|
146
151
|
```
|
|
147
152
|
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
|
|
2
2
|
from langchain_core.output_parsers import StrOutputParser
|
|
3
3
|
|
|
4
|
-
from sentence_transformers import SentenceTransformer, util
|
|
5
|
-
|
|
6
4
|
from langchain_core.language_models.chat_models import BaseChatModel
|
|
7
5
|
from langchain_core.embeddings import Embeddings
|
|
8
6
|
from langchain_core.documents import Document
|
|
@@ -10,8 +8,9 @@ from langchain_core.retrievers import BaseRetriever
|
|
|
10
8
|
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
|
11
9
|
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
|
|
12
10
|
from langchain_core.vectorstores import InMemoryVectorStore
|
|
11
|
+
import numpy as np
|
|
13
12
|
|
|
14
|
-
from typing import
|
|
13
|
+
from typing import List
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
MESSAGE_SYSTEM_TEMPLATE = """
|
|
@@ -73,7 +72,7 @@ def format_docs(docs):
|
|
|
73
72
|
def input_only(inpt):
|
|
74
73
|
return inpt["question"]
|
|
75
74
|
|
|
76
|
-
def followup_question(user_question, question_results, question_name, question_def, embedding_model:
|
|
75
|
+
def followup_question(user_question, question_results, question_name, question_def, embedding_model: Embeddings, chat_model: BaseChatModel, use_inmem_vectors=False, k=50):
|
|
77
76
|
|
|
78
77
|
row_texts = process_question_results(question_def, question_results)
|
|
79
78
|
question_documents = [Document(page_content=r, metadata={"source": "kobai"}) for r in row_texts]
|
|
@@ -118,22 +117,13 @@ def init_question_search_index(tenant_questions, emb_model):
|
|
|
118
117
|
|
|
119
118
|
q_ids = [q["id"] for q in tenant_questions]
|
|
120
119
|
q_descs = [q["description"] for q in tenant_questions]
|
|
121
|
-
|
|
122
|
-
if isinstance(emb_model, SentenceTransformer):
|
|
123
|
-
q_vectors = emb_model.encode(q_descs)
|
|
124
|
-
else:
|
|
125
|
-
q_vectors = emb_model.embed_documents(q_descs)
|
|
126
|
-
|
|
120
|
+
q_vectors = emb_model.embed_documents(q_descs)
|
|
127
121
|
return {"ids": q_ids, "descs": q_descs, "vectors": q_vectors}
|
|
128
122
|
|
|
129
123
|
|
|
130
124
|
def question_search(search_text: str, search_index, emb_model, k: int):
|
|
131
|
-
|
|
132
|
-
search_vec = emb_model.encode(search_text)
|
|
133
|
-
else:
|
|
134
|
-
search_vec = emb_model.embed_query(search_text)
|
|
125
|
+
search_vec = emb_model.embed_query(search_text)
|
|
135
126
|
#search_vec = emb_model.encode(search_text)
|
|
136
|
-
|
|
137
127
|
matches = __top_vector_matches(search_vec, search_index["vectors"], top=k)
|
|
138
128
|
|
|
139
129
|
for mi, m in enumerate(matches):
|
|
@@ -142,13 +132,25 @@ def question_search(search_text: str, search_index, emb_model, k: int):
|
|
|
142
132
|
return matches
|
|
143
133
|
|
|
144
134
|
def __top_vector_matches(test_vec, options_list_vec, top=1):
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
135
|
+
# Normalize the test vector
|
|
136
|
+
test_vec_norm = test_vec / np.linalg.norm(test_vec)
|
|
137
|
+
# Normalize the option vectors
|
|
138
|
+
options_norm = options_list_vec / np.linalg.norm(options_list_vec, axis=1, keepdims=True)
|
|
139
|
+
|
|
140
|
+
# Compute cosine similarity (dot product of normalized vectors)
|
|
141
|
+
cosine_similarities = np.dot(options_norm, test_vec_norm)
|
|
142
|
+
|
|
143
|
+
# Get indexes and similarity scores as dict
|
|
144
|
+
scores_d = [{"index": i, "value": float(v)} for i, v in enumerate(cosine_similarities)]
|
|
145
|
+
|
|
146
|
+
# Sort dict by similarity score descending
|
|
147
|
+
sorted_d = sorted(scores_d, key=lambda x: x["value"], reverse=True)
|
|
148
|
+
|
|
149
|
+
# Return top results
|
|
150
|
+
top_d = sorted_d[:top]
|
|
150
151
|
return top_d
|
|
151
152
|
|
|
153
|
+
|
|
152
154
|
def process_question_results(question_def, question_results):
|
|
153
155
|
|
|
154
156
|
"""
|
|
@@ -211,8 +213,9 @@ def process_question_results(question_def, question_results):
|
|
|
211
213
|
|
|
212
214
|
|
|
213
215
|
concept_order = [max_src]
|
|
214
|
-
|
|
215
|
-
|
|
216
|
+
if max_src != "":
|
|
217
|
+
for t in concept_rels[max_src]["edges"]:
|
|
218
|
+
concept_order.append(t["dst"])
|
|
216
219
|
|
|
217
220
|
for c in concept_props:
|
|
218
221
|
if c not in concept_order:
|
|
@@ -3,9 +3,7 @@ from pyspark.sql import SparkSession
|
|
|
3
3
|
|
|
4
4
|
from pyspark.sql.types import StructType, StructField, StringType, ArrayType, FloatType, IntegerType
|
|
5
5
|
from pyspark.sql import functions as F
|
|
6
|
-
from sentence_transformers import SentenceTransformer
|
|
7
6
|
from delta import DeltaTable
|
|
8
|
-
from typing import Union
|
|
9
7
|
from langchain_core.language_models.chat_models import BaseChatModel
|
|
10
8
|
from langchain_core.embeddings import Embeddings
|
|
11
9
|
from langchain_community.document_loaders import PySparkDataFrameLoader
|
|
@@ -145,13 +143,13 @@ def __generate_sentences_from_questions(tc: AIContext, debug):
|
|
|
145
143
|
ss.sql(full_sql)
|
|
146
144
|
|
|
147
145
|
|
|
148
|
-
def encode_to_delta_local(tc: AIContext, st_model:
|
|
146
|
+
def encode_to_delta_local(tc: AIContext, st_model: Embeddings, replica_schema=None, batch_size=100000):
|
|
149
147
|
"""
|
|
150
148
|
Encode Semantic Data to Vectors in Delta Table
|
|
151
149
|
|
|
152
150
|
Parameters:
|
|
153
151
|
tc (TenantClient): The Kobai tenant_client instance instantiated via the SDK.
|
|
154
|
-
st_model (
|
|
152
|
+
st_model (Embeddings): A langchain embedding model to use for encoding.
|
|
155
153
|
replica_schema (str) OPTIONAL: An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
|
|
156
154
|
"""
|
|
157
155
|
|
|
@@ -174,12 +172,8 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
|
|
|
174
172
|
content_list = [r["content"] for r in sentences_df.collect()]
|
|
175
173
|
id_list = [r["id"] for r in sentences_df.collect()]
|
|
176
174
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
content_list, normalize_embeddings=True, show_progress_bar=True).tolist()
|
|
180
|
-
else:
|
|
181
|
-
vector_list = st_model.embed_documents(content_list)
|
|
182
|
-
for i, v in enumerate(vector_list):
|
|
175
|
+
vector_list = st_model.embed_documents(content_list)
|
|
176
|
+
for i, v in enumerate(vector_list):
|
|
183
177
|
vector_list[i] = [float(x) for x in v]
|
|
184
178
|
#vector_list = st_model.encode(
|
|
185
179
|
# content_list, normalize_embeddings=True, show_progress_bar=True)
|
|
@@ -214,13 +208,13 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
|
|
|
214
208
|
# """)
|
|
215
209
|
|
|
216
210
|
|
|
217
|
-
def rag_delta(tc: AIContext, emb_model:
|
|
211
|
+
def rag_delta(tc: AIContext, emb_model: Embeddings, chat_model: BaseChatModel, question, k=5, replica_schema=None):
|
|
218
212
|
"""
|
|
219
213
|
Run a RAG query using vectors in Delta table.
|
|
220
214
|
|
|
221
215
|
Parameters:
|
|
222
216
|
tc (TenantClient): The Kobai tenant_client instance instantiated via the SDK.
|
|
223
|
-
emb_model (
|
|
217
|
+
emb_model (Embeddings): A langchain embedding model to use for encoding the query.
|
|
224
218
|
chat_model (BaseChatModel): A langchain chat model to use in the RAG pipeline.
|
|
225
219
|
question (str): The user's query.
|
|
226
220
|
k (int) OPTIONAL: The number of RAG documents to retrieve.
|
|
@@ -233,10 +227,7 @@ def rag_delta(tc: AIContext, emb_model: Union[SentenceTransformer, Embeddings],
|
|
|
233
227
|
|
|
234
228
|
ss = tc.spark_session
|
|
235
229
|
|
|
236
|
-
if isinstance(emb_model,
|
|
237
|
-
vector_list = emb_model.encode(
|
|
238
|
-
question, normalize_embeddings=True).tolist()
|
|
239
|
-
elif isinstance(emb_model, Embeddings):
|
|
230
|
+
if isinstance(emb_model, Embeddings):
|
|
240
231
|
vector_list = emb_model.embed_query(question)
|
|
241
232
|
else:
|
|
242
233
|
print("Invalid Embedding Model Type")
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from azure.identity import DeviceCodeCredential
|
|
2
|
+
from azure.identity import OnBehalfOfCredential
|
|
3
|
+
from azure.core.exceptions import AzureError
|
|
4
|
+
|
|
5
|
+
def get_scope(client_id: str = None, target_client_id: str = None, scope: str = None):
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
Get the default scopes
|
|
9
|
+
|
|
10
|
+
Parameters:
|
|
11
|
+
client_id (str): Client ID or Application ID from app registration with IDM.
|
|
12
|
+
target_client_id (str): Kobai IDM client ID.
|
|
13
|
+
scope (str): Scope to be passed
|
|
14
|
+
"""
|
|
15
|
+
if scope is not None:
|
|
16
|
+
return scope
|
|
17
|
+
|
|
18
|
+
if target_client_id is None:
|
|
19
|
+
target_client_id = client_id
|
|
20
|
+
|
|
21
|
+
return f"openid profile offline_access api://{target_client_id}/Kobai.Access"
|
|
22
|
+
|
|
23
|
+
def device_code(tenant_id: str, client_id: str, target_client_id: str = None, scope: str = None):
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
Authenticate using the device code flow and get the access token
|
|
27
|
+
|
|
28
|
+
Parameters:
|
|
29
|
+
tenant_id (str): Tenant ID or Directory ID for IDM.
|
|
30
|
+
client_id (str): Client ID or Application ID from app registration with IDM.
|
|
31
|
+
target_client_id (str): Kobai IDM client ID.
|
|
32
|
+
scope (str): Scope to be passed
|
|
33
|
+
"""
|
|
34
|
+
credential = DeviceCodeCredential(client_id=client_id, tenant_id=tenant_id)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
token = credential.get_token(get_scope(client_id, target_client_id, scope))
|
|
38
|
+
return token.token
|
|
39
|
+
except AzureError as e:
|
|
40
|
+
return e
|
|
41
|
+
|
|
42
|
+
def onbehalf(tenant_id: str, client_id: str, client_secret: str, access_token: str, target_client_id: str = None, scope: str = None):
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
Authenticate using the onbehalf flow and get the access token
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
tenant_id (str): Tenant ID or Directory ID for IDM.
|
|
49
|
+
client_id (str): Client ID or Application ID from app registration with IDM.
|
|
50
|
+
client_secret (str): Client secret from app registration with IDM.
|
|
51
|
+
access_token (str): Access token to be exchanged.
|
|
52
|
+
target_client_id (str): Kobai IDM client ID.
|
|
53
|
+
scope (str): Scope to be passed
|
|
54
|
+
"""
|
|
55
|
+
credential = OnBehalfOfCredential(
|
|
56
|
+
tenant_id=tenant_id,
|
|
57
|
+
client_id=client_id,
|
|
58
|
+
client_secret=client_secret,
|
|
59
|
+
user_assertion=access_token
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
token = credential.get_token(get_scope(client_id, target_client_id, scope))
|
|
64
|
+
return token.token
|
|
65
|
+
except AzureError as e:
|
|
66
|
+
return e
|
|
@@ -19,7 +19,10 @@ class TenantAPI:
|
|
|
19
19
|
self.session = requests.Session()
|
|
20
20
|
|
|
21
21
|
if token is not None:
|
|
22
|
-
|
|
22
|
+
if token.startswith('Bearer'):
|
|
23
|
+
self.session.headers.update({'Authorization': '%s' % self.token})
|
|
24
|
+
else:
|
|
25
|
+
self.session.headers.update({'Authorization': 'Bearer %s' % self.token})
|
|
23
26
|
|
|
24
27
|
self.ssl_verify = verify
|
|
25
28
|
self.session.verify = verify
|
|
@@ -112,7 +115,7 @@ class TenantAPI:
|
|
|
112
115
|
|
|
113
116
|
if op_desc is None:
|
|
114
117
|
op_desc = "operation"
|
|
115
|
-
|
|
118
|
+
|
|
116
119
|
response = self.session.get(
|
|
117
120
|
self.base_uri + uri,
|
|
118
121
|
params=params,
|
|
@@ -3,15 +3,12 @@ import json
|
|
|
3
3
|
import urllib
|
|
4
4
|
import urllib.parse
|
|
5
5
|
|
|
6
|
-
from azure.identity import DeviceCodeCredential
|
|
7
6
|
from pyspark.sql import SparkSession
|
|
8
7
|
|
|
9
8
|
from langchain_community.chat_models import ChatDatabricks
|
|
10
9
|
from databricks_langchain import DatabricksEmbeddings
|
|
11
|
-
from sentence_transformers import SentenceTransformer
|
|
12
10
|
from langchain_core.language_models.chat_models import BaseChatModel
|
|
13
11
|
from langchain_core.embeddings import Embeddings
|
|
14
|
-
from typing import Union
|
|
15
12
|
|
|
16
13
|
from . import spark_client, databricks_client, ai_query, tenant_api, ai_rag
|
|
17
14
|
|
|
@@ -64,85 +61,73 @@ class TenantClient:
|
|
|
64
61
|
# MS Entra Auth
|
|
65
62
|
########################################
|
|
66
63
|
|
|
67
|
-
def
|
|
64
|
+
def use_browser_token(self, access_token):
|
|
68
65
|
|
|
69
66
|
"""
|
|
70
67
|
Authenticate the TenantClient with the Kobai instance. Returns nothing, but stores bearer token in client.
|
|
71
|
-
|
|
72
|
-
Limitations:
|
|
73
|
-
Currently supports only authentication via Microsoft Entra (AzureAD) using DecideCode OAuth flow.
|
|
68
|
+
This is a fall-back method for instances not using OAuth. It is inconvenient as a Kobai Bearer Token must be retrieved from the users browser.
|
|
74
69
|
|
|
75
70
|
Parameters:
|
|
76
|
-
|
|
77
|
-
tenant_id (str): Tenant ID or Directory ID for IDM.
|
|
71
|
+
access_token (str): Bearer token for Kobai app session.
|
|
78
72
|
"""
|
|
73
|
+
self._init_post_auth_success(access_token)
|
|
79
74
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
scope = client_id + "/.default"
|
|
83
|
-
token = credential.get_token("openid profile offline_access " + scope)
|
|
84
|
-
access_token = token.token
|
|
85
|
-
print(access_token)
|
|
86
|
-
|
|
87
|
-
user_name = ""
|
|
88
|
-
if override_username is not None:
|
|
89
|
-
user_name = override_username
|
|
90
|
-
|
|
91
|
-
user_name_query_params={ 'userName' : user_name}
|
|
92
|
-
tenants_response = self.api_client._TenantAPI__run_get('/user-mgmt-svcs/auth/tenants?'+urllib.parse.urlencode(user_name_query_params))
|
|
93
|
-
|
|
75
|
+
def use_access_token(self, access_token: str, id_token: str = None, tenant_id: str = None):
|
|
94
76
|
|
|
95
|
-
|
|
77
|
+
"""
|
|
78
|
+
Authenticate the TenantClient with the Kobai instance. Returns nothing, but stores bearer token in client.
|
|
96
79
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
80
|
+
Parameters:
|
|
81
|
+
access_token (str): Access token of the IDM server to be used to obtained the kobai access token.
|
|
82
|
+
id_token (str): ID token of the IDM server to be used to obtained the onbehalf access token.
|
|
83
|
+
tenant_id (str): Kobai tenant id.
|
|
84
|
+
"""
|
|
101
85
|
|
|
102
86
|
token_request_payload={
|
|
87
|
+
"tenantName" : self.tenant_name,
|
|
103
88
|
"tenantId" : tenant_id,
|
|
104
|
-
"
|
|
105
|
-
"
|
|
89
|
+
"idToken" : id_token,
|
|
90
|
+
"accessToken" : access_token
|
|
106
91
|
}
|
|
107
92
|
|
|
108
|
-
|
|
109
|
-
'/user-mgmt-svcs/auth/oauth/
|
|
93
|
+
response = self.api_client._TenantAPI__run_post(
|
|
94
|
+
'/user-mgmt-svcs/auth/oauth/external/onbehalf/token',
|
|
110
95
|
token_request_payload
|
|
111
96
|
)
|
|
112
|
-
|
|
113
|
-
access_token = token_response.content.decode()
|
|
114
|
-
self.token = access_token
|
|
115
|
-
|
|
116
|
-
self.__api_init_session()
|
|
117
|
-
self.__set_tenant_solutionid()
|
|
118
|
-
if run_ai_init:
|
|
119
|
-
self.init_ai_components()
|
|
120
|
-
|
|
121
|
-
print("Authentication Successful.")
|
|
122
97
|
|
|
123
|
-
|
|
98
|
+
kb_access_token = response.headers.get('Authorization')
|
|
99
|
+
self.use_browser_token(kb_access_token)
|
|
100
|
+
|
|
101
|
+
def get_tenants(self, id_token: str = None):
|
|
124
102
|
|
|
125
103
|
"""
|
|
126
|
-
|
|
127
|
-
This is a fall-back method for instances not using OAuth. It is inconvenient as a Kobai Bearer Token must be retrieved from the users browser.
|
|
104
|
+
Get the tenants associated with the given id token of the IDM. Returns tenants list.
|
|
128
105
|
|
|
129
106
|
Parameters:
|
|
130
|
-
|
|
107
|
+
id_token (str): ID token of the IDM server to be used to obtain user tenants.
|
|
131
108
|
"""
|
|
132
109
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if run_ai_init:
|
|
138
|
-
self.init_ai_components()
|
|
110
|
+
if (id_token is not None) :
|
|
111
|
+
token_request_payload={
|
|
112
|
+
"idToken" : id_token
|
|
113
|
+
}
|
|
139
114
|
|
|
115
|
+
response = self.api_client._TenantAPI__run_post(
|
|
116
|
+
'/user-mgmt-svcs/auth/oauth/external/token/tenants',
|
|
117
|
+
token_request_payload
|
|
118
|
+
)
|
|
140
119
|
|
|
141
|
-
|
|
120
|
+
self.tenant_list = response.json()
|
|
121
|
+
return self.tenant_list
|
|
142
122
|
|
|
143
123
|
def __api_init_session(self):
|
|
144
124
|
self.api_client = tenant_api.TenantAPI(self.token, self.uri, verify=self.ssl_verify, proxies=self.proxies )
|
|
145
|
-
|
|
125
|
+
|
|
126
|
+
def _init_post_auth_success(self, access_token):
|
|
127
|
+
self.token = access_token
|
|
128
|
+
self.__api_init_session()
|
|
129
|
+
self.__set_tenant_solutionid()
|
|
130
|
+
print("Authentication Successful.")
|
|
146
131
|
|
|
147
132
|
########################################
|
|
148
133
|
# Basic Config
|
|
@@ -454,7 +439,7 @@ class TenantClient:
|
|
|
454
439
|
"""
|
|
455
440
|
ai_rag.generate_sentences(self.get_ai_context(), replica_schema=replica_schema, concept_white_list=concept_white_list, use_questions=use_questions, debug=debug)
|
|
456
441
|
|
|
457
|
-
def rag_encode_to_delta_local(self, st_model:
|
|
442
|
+
def rag_encode_to_delta_local(self, st_model: Embeddings, replica_schema=None, batch_size=100000):
|
|
458
443
|
"""
|
|
459
444
|
Encode Semantic Data to Vectors in Delta Table
|
|
460
445
|
|
|
@@ -464,7 +449,7 @@ class TenantClient:
|
|
|
464
449
|
"""
|
|
465
450
|
ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema, batch_size=batch_size)
|
|
466
451
|
|
|
467
|
-
def rag_delta(self, emb_model:
|
|
452
|
+
def rag_delta(self, emb_model: Embeddings, chat_model: BaseChatModel, question, k=5, replica_schema=None):
|
|
468
453
|
"""
|
|
469
454
|
Run a RAG query using vectors in Delta table.
|
|
470
455
|
|
|
@@ -492,9 +477,7 @@ class TenantClient:
|
|
|
492
477
|
"""
|
|
493
478
|
|
|
494
479
|
if question_id is None:
|
|
495
|
-
|
|
496
480
|
suggestions = self.question_search(user_question, k=1)
|
|
497
|
-
|
|
498
481
|
question_id = suggestions[0]["id"]
|
|
499
482
|
|
|
500
483
|
question_results = self.run_question_remote(question_id, dynamic_filters=dynamic_filters)
|
|
@@ -504,26 +487,16 @@ class TenantClient:
|
|
|
504
487
|
|
|
505
488
|
return ai_query.followup_question(user_question, question_results, question_name, question_def, self.embedding_model, self.chat_model, use_inmem_vectors=use_inmem_vectors, k=k)
|
|
506
489
|
|
|
507
|
-
def init_ai_components(self, embedding_model:
|
|
490
|
+
def init_ai_components(self, embedding_model: Embeddings, chat_model: BaseChatModel):
|
|
508
491
|
"""
|
|
509
492
|
Set Chat and Embedding models for AI functions to use. If no arguments provided, Databricks hosted services are used.
|
|
510
493
|
|
|
511
494
|
Parameters:
|
|
512
|
-
embedding_model (
|
|
513
|
-
chat_model (BaseChatModel)
|
|
495
|
+
embedding_model (Embeddings): A Langchain Embedding model.
|
|
496
|
+
chat_model (BaseChatModel): A Langchain BaseChatModel chat model.
|
|
514
497
|
"""
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
self.embedding_model = embedding_model
|
|
518
|
-
else:
|
|
519
|
-
#self.embedding_model = SentenceTransformer("baai/bge-large-en-v1.5")
|
|
520
|
-
self.embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
|
|
521
|
-
|
|
522
|
-
if chat_model is not None:
|
|
523
|
-
self.chat_model = chat_model
|
|
524
|
-
else:
|
|
525
|
-
self.chat_model = ChatDatabricks(endpoint="databricks-dbrx-instruct")
|
|
526
|
-
|
|
498
|
+
self.embedding_model = embedding_model
|
|
499
|
+
self.chat_model = chat_model
|
|
527
500
|
self.question_search_index = ai_query.init_question_search_index(self.list_questions(), self.embedding_model)
|
|
528
501
|
|
|
529
502
|
def question_search(self, search_text, k: int = 1):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: kobai-sdk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: A package that enables interaction with a Kobai tenant.
|
|
5
5
|
Author-email: Ryan Oattes <ryan@kobai.io>
|
|
6
6
|
License: Apache License
|
|
@@ -223,7 +223,6 @@ Requires-Dist: langchain-core
|
|
|
223
223
|
Requires-Dist: langchain-community
|
|
224
224
|
Requires-Dist: langchain_openai
|
|
225
225
|
Requires-Dist: databricks_langchain
|
|
226
|
-
Requires-Dist: sentence-transformers
|
|
227
226
|
Provides-Extra: dev
|
|
228
227
|
Requires-Dist: black; extra == "dev"
|
|
229
228
|
Requires-Dist: bumpver; extra == "dev"
|
|
@@ -249,21 +248,50 @@ from kobai import tenant_client, spark_client, databricks_client
|
|
|
249
248
|
|
|
250
249
|
schema = 'main.demo'
|
|
251
250
|
uri = 'https://demo.kobai.io'
|
|
252
|
-
tenant_id = '1'
|
|
253
251
|
tenant_name = 'My Demo Tenant'
|
|
254
|
-
|
|
255
|
-
k = tenant_client.TenantClient(tenant_name, tenant_id, uri, schema)
|
|
252
|
+
k = tenant_client.TenantClient(tenant_name, uri, schema)
|
|
256
253
|
```
|
|
257
254
|
|
|
258
255
|
2. Authenticate with the Kobai instance:
|
|
256
|
+
Authentication can be performed using different methods, such as device code flow, on-behalf-of flow, or browser-based tokens.
|
|
257
|
+
|
|
258
|
+
#### Authentication via device code
|
|
259
|
+
Step 1: Obtain the access token from IDM (Identity and Access Management)
|
|
259
260
|
|
|
260
261
|
```python
|
|
261
|
-
|
|
262
|
+
from kobai import ms_authenticate
|
|
263
|
+
|
|
262
264
|
tenant_id = 'your_Entra_directory_id_here'
|
|
265
|
+
client_id = 'your_Entra_app_id_here'
|
|
266
|
+
|
|
267
|
+
access_token = ms_authenticate.device_code(tenant_id, client_id)
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
Step 2: Use the token to retrieve the list of Kobai tenants (unless the tenant ID is already known).
|
|
263
271
|
|
|
264
|
-
|
|
272
|
+
```python
|
|
273
|
+
tenants = k.get_tenants(id_token=access_token)
|
|
274
|
+
print(tenants)
|
|
265
275
|
```
|
|
266
276
|
|
|
277
|
+
Step 3: Authenticate with Kobai for the specific tenant using the IDM access token.
|
|
278
|
+
|
|
279
|
+
```python
|
|
280
|
+
kobai_tenant_id = "5c1ba715-3961-4835-8a10-6f6f963b53ff"
|
|
281
|
+
k.use_access_token(access_token = access_token, tenant_id=kobai_tenant_id)
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
At this point, authentication to the Kobai tenant is successfully completed.
|
|
285
|
+
|
|
286
|
+
#### Authentication via browser token
|
|
287
|
+
|
|
288
|
+
```python
|
|
289
|
+
k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
#### Authentication via on-behalf-of flow
|
|
293
|
+
The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
|
|
294
|
+
|
|
267
295
|
3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
|
|
268
296
|
|
|
269
297
|
```python
|
|
@@ -305,68 +333,41 @@ kobai_query_name = "Set ownership"
|
|
|
305
333
|
question_json = k.run_question_remote(k.get_question_id(kobai_query_name)) # By questionName
|
|
306
334
|
```
|
|
307
335
|
|
|
308
|
-
3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using
|
|
309
|
-
|
|
310
|
-
#### Using Azure OpenAI
|
|
311
|
-
|
|
312
|
-
###### Authentication Methods:
|
|
313
|
-
|
|
314
|
-
1. ApiKey
|
|
315
|
-
|
|
316
|
-
```python
|
|
317
|
-
from kobai import ai_query, llm_config
|
|
318
|
-
import json
|
|
319
|
-
|
|
320
|
-
followup_question = "Which owner owns the most sets?"
|
|
321
|
-
|
|
322
|
-
llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", api_key="YOUR_API_KEY", deployment="gpt-4o-mini", llm_provider="azure_openai")
|
|
323
|
-
|
|
324
|
-
output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
|
|
325
|
-
print(output)
|
|
326
|
-
```
|
|
327
|
-
|
|
328
|
-
2. Azure Active Directory Authentication
|
|
336
|
+
3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using the user-provided chat and embedding model.
|
|
329
337
|
|
|
330
|
-
|
|
331
|
-
|
|
338
|
+
#### Using Databricks Embeddings and Chat Models in a Databricks Notebook
|
|
339
|
+
Initialize the AI components by specifying the embedding and chat models, then proceed with follow-up questions for interactive engagement.
|
|
332
340
|
|
|
333
341
|
```python
|
|
334
|
-
from
|
|
342
|
+
from databricks_langchain import DatabricksEmbeddings
|
|
343
|
+
from langchain_community.chat_models import ChatDatabricks
|
|
335
344
|
import json
|
|
336
345
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
|
|
343
|
-
print(output)
|
|
344
|
-
```
|
|
345
|
-
|
|
346
|
-
#### Using Databricks (Default Configuration)
|
|
347
|
-
|
|
348
|
-
```python
|
|
349
|
-
from kobai import ai_query, llm_config
|
|
350
|
-
import json
|
|
346
|
+
# choose the embedding and chat model of your choice from the databricks serving and initialize.
|
|
347
|
+
embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
|
|
348
|
+
chat_model = ChatDatabricks(endpoint="databricks-gpt-oss-20b")
|
|
349
|
+
k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
|
|
351
350
|
|
|
352
351
|
followup_question = "Which owner owns the most sets?"
|
|
353
|
-
|
|
354
|
-
llm_config = llm_config.LLMConfig()
|
|
355
|
-
|
|
356
|
-
output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
|
|
352
|
+
output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
|
|
357
353
|
print(output)
|
|
358
354
|
```
|
|
359
355
|
|
|
360
|
-
####
|
|
356
|
+
#### Using Azure OpenAI Embeddings and Chat Models
|
|
361
357
|
|
|
362
358
|
```python
|
|
363
|
-
from kobai import ai_query, llm_config
|
|
364
|
-
import json
|
|
365
359
|
from langchain_openai import AzureChatOpenAI
|
|
360
|
+
from langchain_openai import AzureOpenAIEmbeddings
|
|
361
|
+
import json
|
|
366
362
|
|
|
367
363
|
followup_question = "Which owner owns the most sets?"
|
|
368
364
|
|
|
369
|
-
|
|
365
|
+
embedding_model = AzureOpenAIEmbeddings(
|
|
366
|
+
model="text-embedding-3-small",
|
|
367
|
+
azure_endpoint="https://kobaipoc.openai.azure.com/",
|
|
368
|
+
api_key="YOUR_API_KEY",
|
|
369
|
+
openai_api_version="2023-05-15"
|
|
370
|
+
)
|
|
370
371
|
|
|
371
372
|
chat_model = AzureChatOpenAI(
|
|
372
373
|
azure_endpoint="https://kobaipoc.openai.azure.com/", azure_deployment="gpt-4o-mini",
|
|
@@ -375,7 +376,10 @@ openai_api_version="2024-02-15-preview",
|
|
|
375
376
|
temperature=0.5,
|
|
376
377
|
max_tokens=150,)
|
|
377
378
|
|
|
378
|
-
|
|
379
|
+
k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
|
|
380
|
+
|
|
381
|
+
followup_question = "Which theme has the most sets?"
|
|
382
|
+
output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
|
|
379
383
|
print(output)
|
|
380
384
|
```
|
|
381
385
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "kobai-sdk"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "A package that enables interaction with a Kobai tenant."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [{ name = "Ryan Oattes", email = "ryan@kobai.io" }]
|
|
@@ -26,8 +26,7 @@ dependencies = [
|
|
|
26
26
|
"langchain-core",
|
|
27
27
|
"langchain-community",
|
|
28
28
|
"langchain_openai",
|
|
29
|
-
"databricks_langchain"
|
|
30
|
-
"sentence-transformers"
|
|
29
|
+
"databricks_langchain"
|
|
31
30
|
]
|
|
32
31
|
requires-python = ">=3.11"
|
|
33
32
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|