kobai-sdk 0.2.7__tar.gz → 0.2.8rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kobai-sdk might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: kobai-sdk
3
- Version: 0.2.7
3
+ Version: 0.2.8rc1
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -79,7 +79,13 @@ def followup_question(question, data, question_name, llm_config:llm_config, over
79
79
  openai_api_version=llm_config.api_version,
80
80
  temperature = llm_config.temperature,
81
81
  max_tokens = llm_config.max_tokens,
82
- )
82
+ )
83
+ else:
84
+ chat_model = ChatDatabricks(
85
+ endpoint = llm_config.endpoint,
86
+ temperature = llm_config.temperature,
87
+ max_tokens = llm_config.max_tokens,
88
+ )
83
89
 
84
90
  if llm_config.use_simple_prompt:
85
91
  prompt = PromptTemplate.from_template(SIMPLE_PROMPT_TEMPLATE)
@@ -0,0 +1,113 @@
1
+ from kobai import tenant_client
2
+
3
+ def generate_sentences(tc: tenant_client.TenantClient, replica_schema=None):
4
+ if tc.spark_client is None:
5
+ return None
6
+
7
+ ss = tc.spark_client.spark_session
8
+
9
+ print("Getting Tenant Config")
10
+ tenant_json = tc.get_tenant_config()
11
+
12
+ concepts = __get_concept_metadata(tenant_json, tc.schema, tc.model_id)
13
+
14
+ print("Dropping and Recreating the RAG Table")
15
+ ss.sql(__create_rag_table_sql(tc.schema, tc.model_id))
16
+
17
+ print("Generating Extraction SQL")
18
+ sql_statements = []
19
+ sql_statements.extend(__generate_sentence_sql_concept_literals(concepts, tc.schema, tc.model_id))
20
+ sql_statements.extend(__generate_sentence_sql_concept_relations(concepts, tc.schema, tc.model_id))
21
+
22
+ print("Running the Extraction")
23
+ for sql_statement in sql_statements:
24
+ ss.sql(sql_statement)
25
+
26
+ if replica_schema is not None:
27
+ print("Replicating Schema")
28
+ ss.sql(__create_rag_table_sql(replica_schema, tc.model_id))
29
+ ss.sql(__replicate_to_catalog_sql(tc.schema, replica_schema, tc.model_id))
30
+
31
+ def __create_rag_table_sql(schema, model_id):
32
+ return f"CREATE OR REPLACE TABLE {schema}.rag_{model_id} (id BIGINT GENERATED BY DEFAULT AS IDENTITY, content STRING, type string, concept_id string, vector ARRAY<FLOAT>) TBLPROPERTIES (delta.enableChangeDataFeed = true)"
33
+
34
+ def __replicate_to_catalog_sql(base_schema, target_schema, model_id):
35
+ move_sql = f"INSERT INTO {target_schema}.rag_{model_id} (content, concept_id, type)"
36
+ move_sql += f" SELECT content, concept_id, type FROM {base_schema}.rag_{model_id}"
37
+ return move_sql
38
+
39
+ def __generate_sentence_sql_concept_literals(concepts, schema, model_id):
40
+ statements = []
41
+ for con in concepts:
42
+ sql = f"'This is a {con['label']}. '"
43
+ sql += " || 'It is identified by ' || split(cid._conceptid,'#')[1] || '. '"
44
+
45
+ sql_from = f"{con['con_table_name']} cid"
46
+ for prop in con["properties"]:
47
+
48
+ sql_from += f" INNER JOIN {con['prop_table_name']} AS {prop['label']}"
49
+ sql_from += f" ON cid._conceptid = {prop['label']}._conceptid"
50
+ sql_from += f" AND {prop['label']}.type = 'l'"
51
+ sql_from += f" AND {prop['label']}.name = '{prop['name']}'"
52
+
53
+ sql += f" || 'The {prop['label']} is ' || any_value({prop['label']}.value) IGNORE NULLS || '. '"
54
+
55
+ full_sql = f"INSERT INTO {schema}.rag_{model_id} (content, concept_id, type)"
56
+ full_sql += f" SELECT {sql} content, cid._conceptid concept_id, 'c' type FROM {sql_from} GROUP BY cid._conceptid"
57
+
58
+ statements.append(full_sql)
59
+ #test_df = spark.sql(full_sql)
60
+ return statements
61
+
62
+ def __generate_sentence_sql_concept_relations(concepts, schema, model_id):
63
+ statements = []
64
+ for con in concepts:
65
+
66
+ sql_from = f"{con['prop_table_name']} "
67
+ for rel in con["relations"]:
68
+
69
+ sql = f"'The {con['label']} identified by ' || split(_conceptid,'#')[1]"
70
+ sql += f" || ' has a relationship called {rel['label']} that connects it to one or more {rel['target_con_label']} identified by '"
71
+ sql += " || concat_ws(', ', array_agg(split(value, '#')[1])) || '. '"
72
+
73
+
74
+ full_sql = f"INSERT INTO {schema}.rag_{model_id} (content, concept_id, type)"
75
+ full_sql += f" SELECT {sql} content, _conceptid concept_id, 'e' type FROM {sql_from} GROUP BY _conceptid"
76
+
77
+ statements.append(full_sql)
78
+ return statements
79
+
80
+ def __get_concept_metadata(tenant_json, schema, model_id):
81
+ target_concept_labels = {}
82
+ for d in tenant_json["domains"]:
83
+ for c in d["concepts"]:
84
+ target_concept_labels[c["uri"]] = d["name"] + " " + c["label"]
85
+
86
+ concepts = []
87
+
88
+ for d in tenant_json["domains"]:
89
+ for c in d["concepts"]:
90
+ con_props = []
91
+ for col in c["properties"]:
92
+ con_props.append({
93
+ #"col_name": d["name"] + "_" + c["label"] + "_" + col["label"],
94
+ "label": col["label"],
95
+ "name": f"{model_id}/{d['name']}/{c['label']}#{col['label']}"
96
+ })
97
+ con_rels = []
98
+ for rel in c["relations"]:
99
+ con_rels.append({
100
+ "label": rel["label"],
101
+ "name": f"{model_id}/{d['name']}/{c['label']}#{rel['label']}",
102
+ "target_con_label": target_concept_labels[rel["relationTypeUri"]]
103
+ })
104
+ concepts.append({
105
+ "label": d["name"] + " " + c["label"],
106
+ #"id_column": d["name"] + "_" + c["label"],
107
+ "relations": con_rels,
108
+ "properties": con_props,
109
+ #"table_name": "data_" + k.model_id + "_" + d["name"] + "_" + c["label"] + "_w",
110
+ "prop_table_name": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_np",
111
+ "con_table_name": f"{schema}.data_{model_id}_{d['name']}_{c['label']}_c",
112
+ })
113
+ return concepts
@@ -441,10 +441,10 @@ class TenantClient:
441
441
 
442
442
  return ai_query.followup_question(followup_question,
443
443
  data,
444
- question_name,
444
+ question_name,
445
+ None,
445
446
  override_model=override_model,
446
- use_simple_prompt=use_simple_prompt,
447
- debug=debug)
447
+ )
448
448
 
449
449
  def process_question_results(self, question_def):
450
450
 
@@ -1018,7 +1018,7 @@ class TenantClient:
1018
1018
  datasource_label (string): Label of datasource to use.
1019
1019
  table_name (string): Name of table to use from specified datasource.
1020
1020
  """
1021
-
1021
+ data_source_id = 0
1022
1022
  existing_datasource = self.list_data_sources()
1023
1023
  for d in existing_datasource["used"]:
1024
1024
  if datasource_label.lower() == d["name"].lower():
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: kobai-sdk
3
- Version: 0.2.7
3
+ Version: 0.2.8rc1
4
4
  Summary: A package that enables interaction with a Kobai tenant.
5
5
  Author-email: Ryan Oattes <ryan@kobai.io>
6
6
  License: Apache License
@@ -4,13 +4,13 @@ README.md
4
4
  pyproject.toml
5
5
  kobai/__init__.py
6
6
  kobai/ai_query.py
7
+ kobai/ai_rag.py
7
8
  kobai/databricks_client.py
8
9
  kobai/demo_tenant_client.py
9
10
  kobai/llm_config.py
10
11
  kobai/spark_client.py
11
12
  kobai/tenant_api.py
12
13
  kobai/tenant_client.py
13
- kobai/test.py
14
14
  kobai_sdk.egg-info/PKG-INFO
15
15
  kobai_sdk.egg-info/SOURCES.txt
16
16
  kobai_sdk.egg-info/dependency_links.txt
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "kobai-sdk"
7
- version = "0.2.7"
7
+ version = "0.2.8rc1"
8
8
  description = "A package that enables interaction with a Kobai tenant."
9
9
  readme = "README.md"
10
10
  authors = [{ name = "Ryan Oattes", email = "ryan@kobai.io" }]
@@ -1,5 +0,0 @@
1
- import llm_config, ai_query
2
-
3
- llm_config = llm_config.LLMConfig(api_key="sV9LuoA5n0PwqggMXOYMhhZlt56FpgnMXFohimPhD7Ug3CnBLbO8JQQJ99ALACYeBjFXJ3w3AAABACOGZm8X", llm_provider="azure_openai")
4
- llm_config.get_azure_ad_token()
5
- ai_query.followup_question_1(question="abc", data={}, question_name="sample", llm_config=llm_config)
File without changes
File without changes
File without changes
File without changes