PyPI - kobai-sdk - Versions diffs - 0.2.9__tar.gz → 0.3.0__tar.gz - Mend

kobai-sdk 0.2.9tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kobai-sdk might be problematic. Click here for more details.

Files changed (21) hide show

{kobai_sdk-0.2.9/kobai_sdk.egg-info → kobai_sdk-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kobai-sdk
-Version: 0.2.9
+Version: 0.3.0
 Summary: A package that enables interaction with a Kobai tenant.
 Author-email: Ryan Oattes <ryan@kobai.io>
 License:                                  Apache License
@@ -223,7 +223,6 @@ Requires-Dist: langchain-core
 Requires-Dist: langchain-community
 Requires-Dist: langchain_openai
 Requires-Dist: databricks_langchain
-Requires-Dist: sentence-transformers
 Provides-Extra: dev
 Requires-Dist: black; extra == "dev"
 Requires-Dist: bumpver; extra == "dev"
@@ -249,21 +248,50 @@ from kobai import tenant_client, spark_client, databricks_client
 schema = 'main.demo'
 uri = 'https://demo.kobai.io'
-tenant_id = '1'
 tenant_name = 'My Demo Tenant'
-k = tenant_client.TenantClient(tenant_name, tenant_id, uri, schema)
+k = tenant_client.TenantClient(tenant_name, uri, schema)
 ```
 2. Authenticate with the Kobai instance:
+Authentication can be performed using different methods, such as device code flow, on-behalf-of flow, or browser-based tokens.
+#### Authentication via device code
+Step 1: Obtain the access token from IDM (Identity and Access Management)
 ```python
-client_id = 'your_Entra_app_id_here'
+from kobai import ms_authenticate
 tenant_id = 'your_Entra_directory_id_here'
+client_id = 'your_Entra_app_id_here'
+access_token = ms_authenticate.device_code(tenant_id, client_id)
+```
+Step 2: Use the token to retrieve the list of Kobai tenants (unless the tenant ID is already known).
-k.authenticate(client_id, tenant_id)
+```python
+tenants = k.get_tenants(id_token=access_token)
+print(tenants)
 ```
+Step 3: Authenticate with Kobai for the specific tenant using the IDM access token.
+```python
+kobai_tenant_id = "5c1ba715-3961-4835-8a10-6f6f963b53ff"
+k.use_access_token(access_token = access_token, tenant_id=kobai_tenant_id)
+```
+At this point, authentication to the Kobai tenant is successfully completed.
+#### Authentication via browser token
+```python
+k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
+```
+#### Authentication via on-behalf-of flow
+The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
 3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
 ```python
@@ -305,68 +333,41 @@ kobai_query_name = "Set ownership"
 question_json = k.run_question_remote(k.get_question_id(kobai_query_name)) # By questionName
 ```
-3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using either Azure OpenAI, Databricks or a user-provided chat model.
-#### Using Azure OpenAI
-###### Authentication Methods:
-1. ApiKey
-```python
-from kobai import ai_query, llm_config
-import json
-followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", api_key="YOUR_API_KEY", deployment="gpt-4o-mini", llm_provider="azure_openai")
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
-print(output)
-```
-2. Azure Active Directory Authentication
+3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using the user-provided chat and embedding model.
-Ensure that the logged-in tenant has access to Azure OpenAI.
-In case of databricks notebook, the logged in service principal should have access to Azure OpenAI.
+#### Using Databricks Embeddings and Chat Models in a Databricks Notebook
+Initialize the AI components by specifying the embedding and chat models, then proceed with follow-up questions for interactive engagement.
 ```python
-from kobai import ai_query, llm_config
+from databricks_langchain import DatabricksEmbeddings
+from langchain_community.chat_models import ChatDatabricks
 import json
-followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", deployment="gpt-4o-mini", llm_provider="azure_openai")
-llm_config.get_azure_ad_token()
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
-print(output)
-```
-#### Using Databricks (Default Configuration)
-```python
-from kobai import ai_query, llm_config
-import json
+# choose the embedding and chat model of your choice from the databricks serving and initialize.
+embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
+chat_model = ChatDatabricks(endpoint="databricks-gpt-oss-20b")
+k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
 followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig()
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
+output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
 print(output)
 ```
-#### User Provided Chat Model
+#### Using Azure OpenAI Embeddings and Chat Models
 ```python
-from kobai import ai_query, llm_config
-import json
 from langchain_openai import AzureChatOpenAI
+from langchain_openai import AzureOpenAIEmbeddings
+import json
 followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig(debug=True)
+embedding_model = AzureOpenAIEmbeddings(
+    model="text-embedding-3-small",
+    azure_endpoint="https://kobaipoc.openai.azure.com/",
+    api_key="YOUR_API_KEY",
+    openai_api_version="2023-05-15"
+)
 chat_model = AzureChatOpenAI(
 azure_endpoint="https://kobaipoc.openai.azure.com/", azure_deployment="gpt-4o-mini",
@@ -375,7 +376,10 @@ openai_api_version="2024-02-15-preview",
 temperature=0.5,
 max_tokens=150,)
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, override_model=chat_model, llm_config=llm_config)
+k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
+followup_question = "Which theme has the most sets?"
+output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
 print(output)
 ```

{kobai_sdk-0.2.9 → kobai_sdk-0.3.0}/README.md RENAMED Viewed

@@ -15,21 +15,50 @@ from kobai import tenant_client, spark_client, databricks_client
 schema = 'main.demo'
 uri = 'https://demo.kobai.io'
-tenant_id = '1'
 tenant_name = 'My Demo Tenant'
-k = tenant_client.TenantClient(tenant_name, tenant_id, uri, schema)
+k = tenant_client.TenantClient(tenant_name, uri, schema)
 ```
 2. Authenticate with the Kobai instance:
+Authentication can be performed using different methods, such as device code flow, on-behalf-of flow, or browser-based tokens.
+#### Authentication via device code
+Step 1: Obtain the access token from IDM (Identity and Access Management)
 ```python
-client_id = 'your_Entra_app_id_here'
+from kobai import ms_authenticate
 tenant_id = 'your_Entra_directory_id_here'
+client_id = 'your_Entra_app_id_here'
+access_token = ms_authenticate.device_code(tenant_id, client_id)
+```
+Step 2: Use the token to retrieve the list of Kobai tenants (unless the tenant ID is already known).
-k.authenticate(client_id, tenant_id)
+```python
+tenants = k.get_tenants(id_token=access_token)
+print(tenants)
 ```
+Step 3: Authenticate with Kobai for the specific tenant using the IDM access token.
+```python
+kobai_tenant_id = "5c1ba715-3961-4835-8a10-6f6f963b53ff"
+k.use_access_token(access_token = access_token, tenant_id=kobai_tenant_id)
+```
+At this point, authentication to the Kobai tenant is successfully completed.
+#### Authentication via browser token
+```python
+k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
+```
+#### Authentication via on-behalf-of flow
+The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
 3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
 ```python
@@ -71,68 +100,41 @@ kobai_query_name = "Set ownership"
 question_json = k.run_question_remote(k.get_question_id(kobai_query_name)) # By questionName
 ```
-3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using either Azure OpenAI, Databricks or a user-provided chat model.
-#### Using Azure OpenAI
-###### Authentication Methods:
-1. ApiKey
-```python
-from kobai import ai_query, llm_config
-import json
-followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", api_key="YOUR_API_KEY", deployment="gpt-4o-mini", llm_provider="azure_openai")
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
-print(output)
-```
-2. Azure Active Directory Authentication
+3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using the user-provided chat and embedding model.
-Ensure that the logged-in tenant has access to Azure OpenAI.
-In case of databricks notebook, the logged in service principal should have access to Azure OpenAI.
+#### Using Databricks Embeddings and Chat Models in a Databricks Notebook
+Initialize the AI components by specifying the embedding and chat models, then proceed with follow-up questions for interactive engagement.
 ```python
-from kobai import ai_query, llm_config
+from databricks_langchain import DatabricksEmbeddings
+from langchain_community.chat_models import ChatDatabricks
 import json
-followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", deployment="gpt-4o-mini", llm_provider="azure_openai")
-llm_config.get_azure_ad_token()
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
-print(output)
-```
-#### Using Databricks (Default Configuration)
-```python
-from kobai import ai_query, llm_config
-import json
+# choose the embedding and chat model of your choice from the databricks serving and initialize.
+embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
+chat_model = ChatDatabricks(endpoint="databricks-gpt-oss-20b")
+k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
 followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig()
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
+output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
 print(output)
 ```
-#### User Provided Chat Model
+#### Using Azure OpenAI Embeddings and Chat Models
 ```python
-from kobai import ai_query, llm_config
-import json
 from langchain_openai import AzureChatOpenAI
+from langchain_openai import AzureOpenAIEmbeddings
+import json
 followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig(debug=True)
+embedding_model = AzureOpenAIEmbeddings(
+    model="text-embedding-3-small",
+    azure_endpoint="https://kobaipoc.openai.azure.com/",
+    api_key="YOUR_API_KEY",
+    openai_api_version="2023-05-15"
+)
 chat_model = AzureChatOpenAI(
 azure_endpoint="https://kobaipoc.openai.azure.com/", azure_deployment="gpt-4o-mini",
@@ -141,7 +143,10 @@ openai_api_version="2024-02-15-preview",
 temperature=0.5,
 max_tokens=150,)
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, override_model=chat_model, llm_config=llm_config)
+k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
+followup_question = "Which theme has the most sets?"
+output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
 print(output)
 ```

{kobai_sdk-0.2.9 → kobai_sdk-0.3.0}/kobai/ai_query.py RENAMED Viewed

@@ -1,8 +1,6 @@
 from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate
 from langchain_core.output_parsers import StrOutputParser
-from sentence_transformers import SentenceTransformer, util
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.embeddings import Embeddings
 from langchain_core.documents import Document
@@ -10,8 +8,9 @@ from langchain_core.retrievers import BaseRetriever
 from langchain_core.callbacks import CallbackManagerForRetrieverRun
 from langchain_core.runnables import RunnablePassthrough, RunnableLambda
 from langchain_core.vectorstores import InMemoryVectorStore
+import numpy as np
-from typing import Union, List
+from typing import List
 MESSAGE_SYSTEM_TEMPLATE = """
@@ -73,7 +72,7 @@ def format_docs(docs):
 def input_only(inpt):
     return inpt["question"]
-def followup_question(user_question, question_results, question_name, question_def, embedding_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, use_inmem_vectors=False, k=50):
+def followup_question(user_question, question_results, question_name, question_def, embedding_model: Embeddings, chat_model: BaseChatModel, use_inmem_vectors=False, k=50):
     row_texts = process_question_results(question_def, question_results)
     question_documents = [Document(page_content=r, metadata={"source": "kobai"}) for r in row_texts]
@@ -118,22 +117,13 @@ def init_question_search_index(tenant_questions, emb_model):
     q_ids = [q["id"] for q in tenant_questions]
     q_descs = [q["description"] for q in tenant_questions]
-    if isinstance(emb_model, SentenceTransformer):
-        q_vectors = emb_model.encode(q_descs)
-    else:
-        q_vectors = emb_model.embed_documents(q_descs)
+    q_vectors = emb_model.embed_documents(q_descs)
     return {"ids": q_ids, "descs": q_descs, "vectors": q_vectors}
 def question_search(search_text: str, search_index, emb_model, k: int):
-    if isinstance(emb_model, SentenceTransformer):
-        search_vec = emb_model.encode(search_text)
-    else:
-        search_vec = emb_model.embed_query(search_text)
+    search_vec = emb_model.embed_query(search_text)
     #search_vec = emb_model.encode(search_text)
     matches = __top_vector_matches(search_vec, search_index["vectors"], top=k)
     for mi, m in enumerate(matches):
@@ -142,13 +132,25 @@ def question_search(search_text: str, search_index, emb_model, k: int):
     return matches
 def __top_vector_matches(test_vec, options_list_vec, top=1):
-    scores_t = util.cos_sim(test_vec, options_list_vec)[0]
-    scores_l = scores_t.tolist()
-    scores_d = [{"index": i, "value": v} for i, v in enumerate(scores_l)]
-    sorted_d = sorted(scores_d, key=lambda i: i["value"], reverse=True)
-    top_d = sorted_d[0:top]
+    # Normalize the test vector
+    test_vec_norm = test_vec / np.linalg.norm(test_vec)
+    # Normalize the option vectors
+    options_norm = options_list_vec / np.linalg.norm(options_list_vec, axis=1, keepdims=True)
+    # Compute cosine similarity (dot product of normalized vectors)
+    cosine_similarities = np.dot(options_norm, test_vec_norm)
+    # Get indexes and similarity scores as dict
+    scores_d = [{"index": i, "value": float(v)} for i, v in enumerate(cosine_similarities)]
+    # Sort dict by similarity score descending
+    sorted_d = sorted(scores_d, key=lambda x: x["value"], reverse=True)
+    # Return top results
+    top_d = sorted_d[:top]
     return top_d
 def process_question_results(question_def, question_results):
     """
@@ -211,8 +213,9 @@ def process_question_results(question_def, question_results):
     concept_order = [max_src]
-    for t in concept_rels[max_src]["edges"]:
-        concept_order.append(t["dst"])
+    if max_src != "":
+        for t in concept_rels[max_src]["edges"]:
+            concept_order.append(t["dst"])
     for c in concept_props:
         if c not in concept_order:

{kobai_sdk-0.2.9 → kobai_sdk-0.3.0}/kobai/ai_rag.py RENAMED Viewed

@@ -3,9 +3,7 @@ from pyspark.sql import SparkSession
 from pyspark.sql.types import StructType, StructField, StringType, ArrayType, FloatType, IntegerType
 from pyspark.sql import functions as F
-from sentence_transformers import SentenceTransformer
 from delta import DeltaTable
-from typing import Union
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.embeddings import Embeddings
 from langchain_community.document_loaders import PySparkDataFrameLoader
@@ -145,13 +143,13 @@ def __generate_sentences_from_questions(tc: AIContext, debug):
     ss.sql(full_sql)
-def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
+def encode_to_delta_local(tc: AIContext, st_model: Embeddings, replica_schema=None, batch_size=100000):
     """
     Encode Semantic Data to Vectors in Delta Table
     Parameters:
     tc (TenantClient): The Kobai tenant_client instance instantiated via the SDK.
-    st_model (SentenceTransformer): A sentence_transformers model to use for encoding.
+    st_model (Embeddings): A langchain embedding model to use for encoding.
     replica_schema (str) OPTIONAL: An alternate schema (catalog.database) to create the Delta table. Useful when the base Kobai schema is not on a Unity Catalog.
     """
@@ -174,12 +172,8 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
         content_list = [r["content"] for r in sentences_df.collect()]
         id_list = [r["id"] for r in sentences_df.collect()]
-        if isinstance(st_model, SentenceTransformer):
-            vector_list = st_model.encode(
-                content_list, normalize_embeddings=True, show_progress_bar=True).tolist()
-        else:
-            vector_list = st_model.embed_documents(content_list)
-            for i, v in enumerate(vector_list):
+        vector_list = st_model.embed_documents(content_list)
+        for i, v in enumerate(vector_list):
                 vector_list[i] = [float(x) for x in v]
         #vector_list = st_model.encode(
         #    content_list, normalize_embeddings=True, show_progress_bar=True)
@@ -214,13 +208,13 @@ def encode_to_delta_local(tc: AIContext, st_model: Union[SentenceTransformer, Em
     #      """)
-def rag_delta(tc: AIContext, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
+def rag_delta(tc: AIContext, emb_model: Embeddings, chat_model: BaseChatModel, question, k=5, replica_schema=None):
     """
     Run a RAG query using vectors in Delta table.
     Parameters:
     tc (TenantClient): The Kobai tenant_client instance instantiated via the SDK.
-    emb_model (UNION[SentenceTransformer, Embeddings]): A sentence_transformers or langchain embedding model to use for encoding the query.
+    emb_model (Embeddings): A langchain embedding model to use for encoding the query.
     chat_model (BaseChatModel): A langchain chat model to use in the RAG pipeline.
     question (str): The user's query.
     k (int) OPTIONAL: The number of RAG documents to retrieve.
@@ -233,10 +227,7 @@ def rag_delta(tc: AIContext, emb_model: Union[SentenceTransformer, Embeddings],
     ss = tc.spark_session
-    if isinstance(emb_model, SentenceTransformer):
-        vector_list = emb_model.encode(
-            question, normalize_embeddings=True).tolist()
-    elif isinstance(emb_model, Embeddings):
+    if isinstance(emb_model, Embeddings):
         vector_list = emb_model.embed_query(question)
     else:
         print("Invalid Embedding Model Type")

kobai_sdk-0.3.0/kobai/ms_authenticate.py ADDED Viewed

@@ -0,0 +1,66 @@
+from azure.identity import DeviceCodeCredential
+from azure.identity import OnBehalfOfCredential
+from azure.core.exceptions import AzureError
+def get_scope(client_id: str = None, target_client_id: str = None, scope: str = None):
+    """
+    Get the default scopes
+    Parameters:
+    client_id (str): Client ID or Application ID from app registration with IDM.
+    target_client_id (str): Kobai IDM client ID.
+    scope (str): Scope to be passed
+    """
+    if scope is not None:
+        return scope
+    if target_client_id is None:
+        target_client_id = client_id
+    return f"openid profile offline_access api://{target_client_id}/Kobai.Access"
+def device_code(tenant_id: str, client_id: str, target_client_id: str = None, scope: str = None):
+    """
+    Authenticate using the device code flow and get the access token
+    Parameters:
+    tenant_id (str): Tenant ID or Directory ID for IDM.
+    client_id (str): Client ID or Application ID from app registration with IDM.
+    target_client_id (str): Kobai IDM client ID.
+    scope (str): Scope to be passed
+    """
+    credential = DeviceCodeCredential(client_id=client_id, tenant_id=tenant_id)
+    try:
+        token = credential.get_token(get_scope(client_id, target_client_id, scope))
+        return token.token
+    except AzureError as e:
+        return e
+def onbehalf(tenant_id: str, client_id: str, client_secret: str, access_token: str, target_client_id: str = None, scope: str = None):
+    """
+    Authenticate using the onbehalf flow and get the access token
+    Parameters:
+    tenant_id (str): Tenant ID or Directory ID for IDM.
+    client_id (str): Client ID or Application ID from app registration with IDM.
+    client_secret (str): Client secret from app registration with IDM.
+    access_token (str): Access token to be exchanged.
+    target_client_id (str): Kobai IDM client ID.
+    scope (str): Scope to be passed
+    """
+    credential = OnBehalfOfCredential(
+        tenant_id=tenant_id,
+        client_id=client_id,
+        client_secret=client_secret,
+        user_assertion=access_token
+    )
+    try:
+        token = credential.get_token(get_scope(client_id, target_client_id, scope))
+        return token.token
+    except AzureError as e:
+        return e

{kobai_sdk-0.2.9 → kobai_sdk-0.3.0}/kobai/tenant_api.py RENAMED Viewed

@@ -19,7 +19,10 @@ class TenantAPI:
         self.session = requests.Session()
         if token is not None:
-            self.session.headers.update({'Authorization': 'Bearer %s' % self.token})
+            if token.startswith('Bearer'):
+                 self.session.headers.update({'Authorization': '%s' % self.token})
+            else:
+                self.session.headers.update({'Authorization': 'Bearer %s' % self.token})
         self.ssl_verify = verify
         self.session.verify = verify
@@ -112,7 +115,7 @@ class TenantAPI:
         if op_desc is None:
             op_desc = "operation"
         response = self.session.get(
             self.base_uri + uri,
             params=params,

{kobai_sdk-0.2.9 → kobai_sdk-0.3.0}/kobai/tenant_client.py RENAMED Viewed

@@ -3,15 +3,12 @@ import json
 import urllib
 import urllib.parse
-from azure.identity import DeviceCodeCredential
 from pyspark.sql import SparkSession
 from langchain_community.chat_models import ChatDatabricks
 from databricks_langchain import DatabricksEmbeddings
-from sentence_transformers import SentenceTransformer
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.embeddings import Embeddings
-from typing import Union
 from . import spark_client, databricks_client, ai_query, tenant_api, ai_rag
@@ -64,83 +61,73 @@ class TenantClient:
 # MS Entra Auth
 ########################################
-    def authenticate(self, client_id: str, tenant_id: str, run_ai_init: bool = True, override_username: str = None):
+    def use_browser_token(self, access_token):
         """
         Authenticate the TenantClient with the Kobai instance. Returns nothing, but stores bearer token in client.
-        Limitations:
-        Currently supports only authentication via Microsoft Entra (AzureAD) using DecideCode OAuth flow.
+        This is a fall-back method for instances not using OAuth. It is inconvenient as a Kobai Bearer Token must be retrieved from the users browser.
         Parameters:
-        client_id (str): Client ID or Application ID from app registration with IDM.
-        tenant_id (str): Tenant ID or Directory ID for IDM.
+        access_token (str): Bearer token for Kobai app session.
         """
+        self._init_post_auth_success(access_token)
-        credential = DeviceCodeCredential(client_id=client_id, tenant_id=tenant_id)
-        access = credential.authenticate()
-        oauth_token = access.serialize()
-        print(oauth_token)
-        user_name = json.loads(access.serialize())["username"]
-        if override_username is not None:
-            user_name = override_username
+    def use_access_token(self, access_token: str, id_token: str = None,  tenant_id: str = None):
-        user_name_query_params={ 'userName' : user_name}
-        tenants_response = self.api_client._TenantAPI__run_get('/user-mgmt-svcs/auth/tenants?'+urllib.parse.urlencode(user_name_query_params))
-        tenant_list = json.loads(tenants_response.content.decode("utf-8"))
+        """
+        Authenticate the TenantClient with the Kobai instance. Returns nothing, but stores bearer token in client.
-        tenant_id = ""
-        for t in tenant_list:
-            if t["name"] == self.tenant_name:
-                tenant_id = t["id"]
+        Parameters:
+        access_token (str): Access token of the IDM server to be used to obtained the kobai access token.
+        id_token (str): ID token of the IDM server to be used to obtained the onbehalf access token.
+        tenant_id (str): Kobai tenant id.
+        """
         token_request_payload={
+            "tenantName" : self.tenant_name,
             "tenantId" : tenant_id,
-            "oauthToken" : oauth_token,
-            "userName" : user_name
+            "idToken" : id_token,
+            "accessToken" : access_token
         }
-        token_response = self.api_client._TenantAPI__run_post(
-            '/user-mgmt-svcs/auth/oauth/devicecode',
+        response = self.api_client._TenantAPI__run_post(
+            '/user-mgmt-svcs/auth/oauth/external/onbehalf/token',
             token_request_payload
         )
-        access_token = token_response.content.decode()
-        self.token = access_token
-        self.__api_init_session()
-        self.__set_tenant_solutionid()
-        if run_ai_init:
-            self.init_ai_components()
-        print("Authentication Successful.")
-    def authenticate_browser_token(self, access_token, run_ai_init: bool = True):
+        kb_access_token = response.headers.get('Authorization')
+        self.use_browser_token(kb_access_token)
+    def get_tenants(self, id_token: str = None):
         """
-        Authenticate the TenantClient with the Kobai instance. Returns nothing, but stores bearer token in client.
-        This is a fall-back method for instances not using OAuth. It is inconvenient as a Kobai Bearer Token must be retrieved from the users browser.
+        Get the tenants associated with the given id token of the IDM. Returns tenants list.
         Parameters:
-        access_token (str): Bearer token for Kobai app session.
+        id_token (str): ID token of the IDM server to be used to obtain user tenants.
         """
-        self.token = access_token
-        self.__api_init_session()
-        self.__set_tenant_solutionid()
-        if run_ai_init:
-            self.init_ai_components()
+        if (id_token is not None) :
+            token_request_payload={
+                "idToken" : id_token
+            }
+            response = self.api_client._TenantAPI__run_post(
+                '/user-mgmt-svcs/auth/oauth/external/token/tenants',
+                token_request_payload
+            )
-        print("Authentication Successful.")
+            self.tenant_list = response.json()
+        return self.tenant_list
     def __api_init_session(self):
         self.api_client = tenant_api.TenantAPI(self.token, self.uri, verify=self.ssl_verify, proxies=self.proxies )
+    def _init_post_auth_success(self, access_token):
+        self.token = access_token
+        self.__api_init_session()
+        self.__set_tenant_solutionid()
+        print("Authentication Successful.")
 ########################################
 # Basic Config
@@ -452,7 +439,7 @@ class TenantClient:
         """
         ai_rag.generate_sentences(self.get_ai_context(), replica_schema=replica_schema, concept_white_list=concept_white_list, use_questions=use_questions, debug=debug)
-    def rag_encode_to_delta_local(self, st_model: Union[SentenceTransformer, Embeddings], replica_schema=None, batch_size=100000):
+    def rag_encode_to_delta_local(self, st_model: Embeddings, replica_schema=None, batch_size=100000):
         """
         Encode Semantic Data to Vectors in Delta Table
@@ -462,7 +449,7 @@ class TenantClient:
         """
         ai_rag.encode_to_delta_local(self.get_ai_context(), st_model=st_model, replica_schema=replica_schema, batch_size=batch_size)
-    def rag_delta(self, emb_model: Union[SentenceTransformer, Embeddings], chat_model: BaseChatModel, question, k=5, replica_schema=None):
+    def rag_delta(self, emb_model: Embeddings, chat_model: BaseChatModel, question, k=5, replica_schema=None):
         """
         Run a RAG query using vectors in Delta table.
@@ -490,9 +477,7 @@ class TenantClient:
         """
         if question_id is None:
             suggestions = self.question_search(user_question, k=1)
             question_id = suggestions[0]["id"]
         question_results = self.run_question_remote(question_id, dynamic_filters=dynamic_filters)
@@ -502,26 +487,16 @@ class TenantClient:
         return ai_query.followup_question(user_question, question_results, question_name, question_def, self.embedding_model, self.chat_model, use_inmem_vectors=use_inmem_vectors, k=k)
-    def init_ai_components(self, embedding_model: Union[SentenceTransformer, Embeddings] = None, chat_model: BaseChatModel = None):
+    def init_ai_components(self, embedding_model: Embeddings, chat_model: BaseChatModel):
         """
         Set Chat and Embedding models for AI functions to use. If no arguments provided, Databricks hosted services are used.
         Parameters:
-        embedding_model (Union[SentenceTransformer, Embeddings]) OPTIONAL: A sentence_transformer or Langchain Embedding model.
-        chat_model (BaseChatModel) OPTIONAL: A Langchain BaseChatModel chat model.
+        embedding_model (Embeddings): A Langchain Embedding model.
+        chat_model (BaseChatModel): A Langchain BaseChatModel chat model.
         """
-        if embedding_model is not None:
-            self.embedding_model = embedding_model
-        else:
-            #self.embedding_model = SentenceTransformer("baai/bge-large-en-v1.5")
-            self.embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
-        if chat_model is not None:
-            self.chat_model = chat_model
-        else:
-            self.chat_model = ChatDatabricks(endpoint="databricks-dbrx-instruct")
+        self.embedding_model = embedding_model
+        self.chat_model = chat_model
         self.question_search_index = ai_query.init_question_search_index(self.list_questions(), self.embedding_model)
     def question_search(self, search_text, k: int = 1):

{kobai_sdk-0.2.9 → kobai_sdk-0.3.0/kobai_sdk.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kobai-sdk
-Version: 0.2.9
+Version: 0.3.0
 Summary: A package that enables interaction with a Kobai tenant.
 Author-email: Ryan Oattes <ryan@kobai.io>
 License:                                  Apache License
@@ -223,7 +223,6 @@ Requires-Dist: langchain-core
 Requires-Dist: langchain-community
 Requires-Dist: langchain_openai
 Requires-Dist: databricks_langchain
-Requires-Dist: sentence-transformers
 Provides-Extra: dev
 Requires-Dist: black; extra == "dev"
 Requires-Dist: bumpver; extra == "dev"
@@ -249,21 +248,50 @@ from kobai import tenant_client, spark_client, databricks_client
 schema = 'main.demo'
 uri = 'https://demo.kobai.io'
-tenant_id = '1'
 tenant_name = 'My Demo Tenant'
-k = tenant_client.TenantClient(tenant_name, tenant_id, uri, schema)
+k = tenant_client.TenantClient(tenant_name, uri, schema)
 ```
 2. Authenticate with the Kobai instance:
+Authentication can be performed using different methods, such as device code flow, on-behalf-of flow, or browser-based tokens.
+#### Authentication via device code
+Step 1: Obtain the access token from IDM (Identity and Access Management)
 ```python
-client_id = 'your_Entra_app_id_here'
+from kobai import ms_authenticate
 tenant_id = 'your_Entra_directory_id_here'
+client_id = 'your_Entra_app_id_here'
+access_token = ms_authenticate.device_code(tenant_id, client_id)
+```
+Step 2: Use the token to retrieve the list of Kobai tenants (unless the tenant ID is already known).
-k.authenticate(client_id, tenant_id)
+```python
+tenants = k.get_tenants(id_token=access_token)
+print(tenants)
 ```
+Step 3: Authenticate with Kobai for the specific tenant using the IDM access token.
+```python
+kobai_tenant_id = "5c1ba715-3961-4835-8a10-6f6f963b53ff"
+k.use_access_token(access_token = access_token, tenant_id=kobai_tenant_id)
+```
+At this point, authentication to the Kobai tenant is successfully completed.
+#### Authentication via browser token
+```python
+k.use_browser_token(access_token="KOBAI_ACESS_TOKEN_FROM_BROWSER")
+```
+#### Authentication via on-behalf-of flow
+The sample code demonstrating authentication via the on-behalf-of flow will be provided, if requested.
 3. Initialize a Spark client using your current `SparkSession`, and generate semantically-rich SQL views describing this Kobai tenant:
 ```python
@@ -305,68 +333,41 @@ kobai_query_name = "Set ownership"
 question_json = k.run_question_remote(k.get_question_id(kobai_query_name)) # By questionName
 ```
-3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using either Azure OpenAI, Databricks or a user-provided chat model.
-#### Using Azure OpenAI
-###### Authentication Methods:
-1. ApiKey
-```python
-from kobai import ai_query, llm_config
-import json
-followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", api_key="YOUR_API_KEY", deployment="gpt-4o-mini", llm_provider="azure_openai")
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
-print(output)
-```
-2. Azure Active Directory Authentication
+3. Ask a Follow-Up Question: Based on the initial results, you can ask a follow-up question using the user-provided chat and embedding model.
-Ensure that the logged-in tenant has access to Azure OpenAI.
-In case of databricks notebook, the logged in service principal should have access to Azure OpenAI.
+#### Using Databricks Embeddings and Chat Models in a Databricks Notebook
+Initialize the AI components by specifying the embedding and chat models, then proceed with follow-up questions for interactive engagement.
 ```python
-from kobai import ai_query, llm_config
+from databricks_langchain import DatabricksEmbeddings
+from langchain_community.chat_models import ChatDatabricks
 import json
-followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig(endpoint="https://kobaipoc.openai.azure.com/", deployment="gpt-4o-mini", llm_provider="azure_openai")
-llm_config.get_azure_ad_token()
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
-print(output)
-```
-#### Using Databricks (Default Configuration)
-```python
-from kobai import ai_query, llm_config
-import json
+# choose the embedding and chat model of your choice from the databricks serving and initialize.
+embedding_model = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
+chat_model = ChatDatabricks(endpoint="databricks-gpt-oss-20b")
+k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
 followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig()
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, llm_config=llm_config)
+output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
 print(output)
 ```
-#### User Provided Chat Model
+#### Using Azure OpenAI Embeddings and Chat Models
 ```python
-from kobai import ai_query, llm_config
-import json
 from langchain_openai import AzureChatOpenAI
+from langchain_openai import AzureOpenAIEmbeddings
+import json
 followup_question = "Which owner owns the most sets?"
-llm_config = llm_config.LLMConfig(debug=True)
+embedding_model = AzureOpenAIEmbeddings(
+    model="text-embedding-3-small",
+    azure_endpoint="https://kobaipoc.openai.azure.com/",
+    api_key="YOUR_API_KEY",
+    openai_api_version="2023-05-15"
+)
 chat_model = AzureChatOpenAI(
 azure_endpoint="https://kobaipoc.openai.azure.com/", azure_deployment="gpt-4o-mini",
@@ -375,7 +376,10 @@ openai_api_version="2024-02-15-preview",
 temperature=0.5,
 max_tokens=150,)
-output = ai_query.followup_question(followup_question, json.dumps(question_json), kobai_query_name, override_model=chat_model, llm_config=llm_config)
+k.init_ai_components(embedding_model=embedding_model, chat_model=chat_model)
+followup_question = "Which theme has the most sets?"
+output = k.followup_question(followup_question, question_id=k.get_question_id(kobai_query_name))
 print(output)
 ```

{kobai_sdk-0.2.9 → kobai_sdk-0.3.0}/kobai_sdk.egg-info/SOURCES.txt RENAMED Viewed

@@ -7,10 +7,10 @@ kobai/ai_query.py
 kobai/ai_rag.py
 kobai/databricks_client.py
 kobai/demo_tenant_client.py
+kobai/ms_authenticate.py
 kobai/spark_client.py
 kobai/tenant_api.py
 kobai/tenant_client.py
-kobai/test.py
 kobai_sdk.egg-info/PKG-INFO
 kobai_sdk.egg-info/SOURCES.txt
 kobai_sdk.egg-info/dependency_links.txt

{kobai_sdk-0.2.9 → kobai_sdk-0.3.0}/kobai_sdk.egg-info/requires.txt RENAMED Viewed

@@ -7,7 +7,6 @@ langchain-core
 langchain-community
 langchain_openai
 databricks_langchain
-sentence-transformers
 [dev]
 black

{kobai_sdk-0.2.9 → kobai_sdk-0.3.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "kobai-sdk"
-version = "0.2.9"
+version = "0.3.0"
 description = "A package that enables interaction with a Kobai tenant."
 readme = "README.md"
 authors = [{ name = "Ryan Oattes", email = "ryan@kobai.io" }]
@@ -26,8 +26,7 @@ dependencies = [
     "langchain-core",
     "langchain-community",
     "langchain_openai",
-    "databricks_langchain",
-    "sentence-transformers"
+    "databricks_langchain"
     ]
 requires-python = ">=3.11"

kobai_sdk-0.2.9/kobai/test.py DELETED Viewed

@@ -1,5 +0,0 @@
-import llm_config, ai_query
-llm_config = llm_config.LLMConfig(api_key="sV9LuoA5n0PwqggMXOYMhhZlt56FpgnMXFohimPhD7Ug3CnBLbO8JQQJ99ALACYeBjFXJ3w3AAABACOGZm8X", llm_provider="azure_openai")
-llm_config.get_azure_ad_token()
-ai_query.followup_question_1(question="abc", data={}, question_name="sample", llm_config=llm_config)