npcpy 1.1.28__py3-none-any.whl → 1.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcpy/data/audio.py +16 -38
- npcpy/data/image.py +29 -29
- npcpy/data/load.py +4 -3
- npcpy/data/text.py +28 -28
- npcpy/data/video.py +6 -6
- npcpy/data/web.py +49 -21
- npcpy/ft/__init__.py +0 -0
- npcpy/ft/diff.py +110 -0
- npcpy/ft/ge.py +115 -0
- npcpy/ft/memory_trainer.py +171 -0
- npcpy/ft/model_ensembler.py +357 -0
- npcpy/ft/rl.py +360 -0
- npcpy/ft/sft.py +248 -0
- npcpy/ft/usft.py +128 -0
- npcpy/gen/audio_gen.py +24 -0
- npcpy/gen/embeddings.py +13 -13
- npcpy/gen/image_gen.py +37 -15
- npcpy/gen/response.py +287 -111
- npcpy/gen/video_gen.py +10 -9
- npcpy/llm_funcs.py +447 -79
- npcpy/memory/command_history.py +201 -48
- npcpy/memory/kg_vis.py +74 -74
- npcpy/memory/knowledge_graph.py +482 -115
- npcpy/memory/memory_processor.py +81 -0
- npcpy/memory/search.py +70 -70
- npcpy/mix/debate.py +192 -3
- npcpy/npc_compiler.py +1541 -879
- npcpy/npc_sysenv.py +250 -78
- npcpy/serve.py +1036 -321
- npcpy/sql/ai_function_tools.py +257 -0
- npcpy/sql/database_ai_adapters.py +186 -0
- npcpy/sql/database_ai_functions.py +163 -0
- npcpy/sql/model_runner.py +19 -19
- npcpy/sql/npcsql.py +706 -507
- npcpy/sql/sql_model_compiler.py +156 -0
- npcpy/tools.py +20 -20
- npcpy/work/plan.py +8 -8
- npcpy/work/trigger.py +3 -3
- {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/METADATA +169 -9
- npcpy-1.2.32.dist-info/RECORD +54 -0
- npcpy-1.1.28.dist-info/RECORD +0 -40
- {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/WHEEL +0 -0
- {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/licenses/LICENSE +0 -0
- {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/top_level.txt +0 -0
npcpy/sql/npcsql.py
CHANGED
|
@@ -1,530 +1,789 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
from
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
2
|
+
import re
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Set, Union, Any, Optional, Callable
|
|
6
|
+
from collections import defaultdict, deque
|
|
7
|
+
from sqlalchemy import create_engine, text, Engine, inspect
|
|
8
|
+
import inspect as py_inspect
|
|
9
|
+
|
|
10
|
+
# --- Explicitly import llm_funcs as a module object ---
|
|
11
|
+
try:
|
|
12
|
+
import npcpy.llm_funcs as llm_funcs
|
|
13
|
+
except ImportError:
|
|
14
|
+
print("Warning: `npcpy.llm_funcs` not found. Providing mock AI functions for execution.")
|
|
15
|
+
class MockLlmFuncs:
|
|
16
|
+
def generate_text(self, prompt: str, npc=None, team=None, context="") -> Dict[str, str]:
|
|
17
|
+
print(f"MOCK AI: generate_text('{prompt}')")
|
|
18
|
+
return {"response": f"MOCK: Generated text for '{prompt}'"}
|
|
19
|
+
def analyze_sentiment(self, text: str, npc=None, team=None, context="") -> Dict[str, str]:
|
|
20
|
+
print(f"MOCK AI: analyze_sentiment('{text}')")
|
|
21
|
+
return {"response": f"MOCK: Positive sentiment for '{text}'"}
|
|
22
|
+
def summarize(self, text: str, npc=None, team=None, context="") -> Dict[str, str]:
|
|
23
|
+
print(f"MOCK AI: summarize('{text}')")
|
|
24
|
+
return {"response": f"MOCK: Summary of '{text}'"}
|
|
25
|
+
def translate(self, text: str, source_lang='auto', target_lang='en', npc=None, team=None, context="") -> Dict[str, str]:
|
|
26
|
+
print(f"MOCK AI: translate('{text}', '{source_lang}', '{target_lang}')")
|
|
27
|
+
return {"response": f"MOCK: Translated '{text}' from {source_lang} to {target_lang}"}
|
|
28
|
+
def extract_entities(self, text: str, npc=None, team=None, context="") -> Dict[str, str]:
|
|
29
|
+
print(f"MOCK AI: extract_entities('{text}')")
|
|
30
|
+
return {"response": f"MOCK: Entities from '{text}'"}
|
|
31
|
+
def generate_embedding(self, text: str, model='default', npc=None, team=None, context="") -> Dict[str, str]:
|
|
32
|
+
print(f"MOCK AI: generate_embedding('{text}', '{model}')")
|
|
33
|
+
return {"response": f"MOCK: Embedding for '{text}'"}
|
|
34
|
+
llm_funcs = MockLlmFuncs()
|
|
35
|
+
|
|
36
|
+
# Assuming these are available in the npcpy environment
|
|
37
|
+
from npcpy.memory.command_history import create_engine_from_path
|
|
38
|
+
try:
|
|
39
|
+
from npcpy.npc_compiler import Team
|
|
40
|
+
except ImportError:
|
|
41
|
+
print("Warning: `npcpy.npc_compiler.Team` not found. Providing mock Team class.")
|
|
42
|
+
class Team:
|
|
43
|
+
def __init__(self, team_path: str = "./npc_team/", npcs: Optional[List[Any]] = None):
|
|
44
|
+
print(f"MOCK NPC: Team initialized for path: {team_path}")
|
|
45
|
+
self.npcs = npcs if npcs is not None else []
|
|
46
|
+
def get_npc(self, npc_ref: str):
|
|
47
|
+
print(f"MOCK NPC: get_npc called for: {npc_ref}")
|
|
48
|
+
return {"name": npc_ref, "type": "mock_npc"}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# --- PANDAS BACKEND CONFIGURATION ---
|
|
52
|
+
try:
|
|
53
|
+
import modin.pandas as pd_modin
|
|
54
|
+
import snowflake.snowpark.modin.plugin
|
|
55
|
+
pd = pd_modin
|
|
56
|
+
PANDAS_BACKEND = 'snowflake_modin'
|
|
57
|
+
except ImportError:
|
|
58
|
+
try:
|
|
59
|
+
import modin.pandas as pd_modin
|
|
60
|
+
pd = pd_modin
|
|
61
|
+
PANDAS_BACKEND = 'modin'
|
|
62
|
+
except ImportError:
|
|
63
|
+
import pandas as pd
|
|
64
|
+
PANDAS_BACKEND = 'pandas'
|
|
65
|
+
# print(f"Using pandas backend: {PANDAS_BACKEND}") # Removed for cleaner output
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# --- AI Function Mappings ---
|
|
69
|
+
class DatabaseAIFunctionMapper:
|
|
70
|
+
@staticmethod
|
|
71
|
+
def get_snowflake_cortex_mapping() -> Dict[str, Dict[str, Any]]:
|
|
72
|
+
return {
|
|
73
|
+
'get_llm_response': {
|
|
74
|
+
'cortex_function': 'COMPLETE',
|
|
75
|
+
'transformer': lambda prompt, **kwargs: f"SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b', {prompt})"
|
|
76
|
+
},
|
|
77
|
+
'extract_facts': {
|
|
78
|
+
'cortex_function': 'COMPLETE',
|
|
79
|
+
'transformer': lambda text, **kwargs: f"SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b', CONCAT('Extract concise facts from this text. Return JSON with fact_list array. Text: ', {text}))"
|
|
80
|
+
},
|
|
81
|
+
'get_facts': {
|
|
82
|
+
'cortex_function': 'COMPLETE',
|
|
83
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
84
|
+
'Extract facts from this text. Return JSON with facts array containing statement, source_text, and type fields. Text: ' || {text})"""
|
|
85
|
+
},
|
|
86
|
+
'identify_groups': {
|
|
87
|
+
'cortex_function': 'COMPLETE',
|
|
88
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
89
|
+
'Identify main groups these facts could be organized into. Return JSON with groups array. Facts: ' || {text})"""
|
|
90
|
+
},
|
|
91
|
+
'assign_groups_to_fact': {
|
|
92
|
+
'cortex_function': 'COMPLETE',
|
|
93
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
94
|
+
'Assign this fact to relevant groups. Return JSON with groups array. Fact: ' || {text})"""
|
|
95
|
+
},
|
|
96
|
+
'generate_group_candidates': {
|
|
97
|
+
'cortex_function': 'COMPLETE',
|
|
98
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
99
|
+
'Generate specific conceptual groups for these items. Return JSON with groups array. Items: ' || {text})"""
|
|
100
|
+
},
|
|
101
|
+
'remove_idempotent_groups': {
|
|
102
|
+
'cortex_function': 'COMPLETE',
|
|
103
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
104
|
+
'Remove conceptually identical groups, favor specificity. Return JSON with distinct_groups array. Groups: ' || {text})"""
|
|
105
|
+
},
|
|
106
|
+
'zoom_in': {
|
|
107
|
+
'cortex_function': 'COMPLETE',
|
|
108
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
109
|
+
'Infer new implied facts from existing facts. Return JSON with implied_facts array. Facts: ' || {text})"""
|
|
110
|
+
},
|
|
111
|
+
'generate_groups': {
|
|
112
|
+
'cortex_function': 'COMPLETE',
|
|
113
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
114
|
+
'Generate conceptual groups for facts. Return JSON with groups array. Facts: ' || {text})"""
|
|
115
|
+
},
|
|
116
|
+
'remove_redundant_groups': {
|
|
117
|
+
'cortex_function': 'COMPLETE',
|
|
118
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
119
|
+
'Remove redundant groups, merge similar concepts. Return JSON with groups array. Groups: ' || {text})"""
|
|
120
|
+
},
|
|
121
|
+
'criticize': {
|
|
122
|
+
'cortex_function': 'COMPLETE',
|
|
123
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
124
|
+
'Provide critical analysis and constructive criticism. Input: ' || {text})"""
|
|
125
|
+
},
|
|
126
|
+
'synthesize': {
|
|
127
|
+
'cortex_function': 'COMPLETE',
|
|
128
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
129
|
+
'Synthesize information from multiple perspectives. Input: ' || {text})"""
|
|
130
|
+
},
|
|
131
|
+
'breathe': {
|
|
132
|
+
'cortex_function': 'COMPLETE',
|
|
133
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
134
|
+
'Condense conversation context into key extractions. Return JSON with high_level_objective, most_recent_task, accomplishments, failures. Conversation: ' || {text})"""
|
|
135
|
+
},
|
|
136
|
+
'abstract': {
|
|
137
|
+
'cortex_function': 'COMPLETE',
|
|
138
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
139
|
+
'Create more abstract categories from groups. Return JSON with groups array. Groups: ' || {text})"""
|
|
140
|
+
}
|
|
141
|
+
}
|
|
16
142
|
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def get_databricks_ai_mapping() -> Dict[str, Dict[str, Any]]:
|
|
146
|
+
return {
|
|
147
|
+
'generate_text': {
|
|
148
|
+
'databricks_function': 'serving.predict',
|
|
149
|
+
'transformer': lambda prompt, model='databricks-dolly', **kwargs:
|
|
150
|
+
f"serving.predict('{model}', '{prompt}')"
|
|
151
|
+
},
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def get_bigquery_ai_mapping() -> Dict[str, Dict[str, Any]]:
|
|
156
|
+
return {
|
|
157
|
+
'generate_text': {
|
|
158
|
+
'bigquery_function': 'ML.GENERATE_TEXT',
|
|
159
|
+
'transformer': lambda prompt, model='text-bison', **kwargs:
|
|
160
|
+
f"ML.GENERATE_TEXT(MODEL `{model}`, '{prompt}')"
|
|
161
|
+
},
|
|
162
|
+
}
|
|
17
163
|
|
|
164
|
+
# --- Native Database AI Transformer (INCLUDED in the module) ---
|
|
165
|
+
class NativeDatabaseAITransformer:
|
|
166
|
+
def __init__(self, database_type: str):
|
|
167
|
+
self.database_type = database_type.lower()
|
|
168
|
+
self.function_mappings = self._get_database_mappings()
|
|
169
|
+
|
|
170
|
+
def _get_database_mappings(self) -> Dict[str, Dict[str, Any]]:
|
|
171
|
+
mappings = {
|
|
172
|
+
'snowflake': DatabaseAIFunctionMapper.get_snowflake_cortex_mapping(),
|
|
173
|
+
'databricks': DatabaseAIFunctionMapper.get_databricks_ai_mapping(),
|
|
174
|
+
'bigquery': DatabaseAIFunctionMapper.get_bigquery_ai_mapping()
|
|
175
|
+
}
|
|
176
|
+
return mappings.get(self.database_type, {})
|
|
177
|
+
|
|
178
|
+
def transform_ai_function(self, function_name: str, **kwargs) -> str:
|
|
179
|
+
mapping = self.function_mappings.get(function_name)
|
|
180
|
+
if not mapping:
|
|
181
|
+
raise ValueError(f"No native mapping found for function: {function_name} for database type {self.database_type}")
|
|
182
|
+
|
|
183
|
+
transformer: Callable[..., str] = mapping.get('transformer')
|
|
184
|
+
if not transformer:
|
|
185
|
+
raise ValueError(f"No transformer found for function: {function_name} for database type {self.database_type}")
|
|
186
|
+
|
|
187
|
+
if function_name == 'generate_text' and 'text' in kwargs:
|
|
188
|
+
kwargs['prompt'] = kwargs.pop('text')
|
|
189
|
+
|
|
190
|
+
return transformer(**kwargs)
|
|
191
|
+
|
|
192
|
+
# --- NPCSQL Operations ---
|
|
18
193
|
class NPCSQLOperations:
|
|
19
|
-
def __init__(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
if isinstance(
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def apply_spread_sync(text):
|
|
86
|
-
results = []
|
|
87
|
-
for variation in variations:
|
|
88
|
-
prompt = f"""Variation: {variation}
|
|
89
|
-
Context: {context_text}
|
|
90
|
-
Text to analyze: {text}
|
|
91
|
-
Analyze the above text with {variation} perspective."""
|
|
92
|
-
|
|
93
|
-
result = self.execute_stage(
|
|
94
|
-
{"step_name": f"spread_{variation}", "npc": npc, "task": prompt},
|
|
95
|
-
{},
|
|
96
|
-
self.jinja_env,
|
|
97
|
-
)
|
|
98
|
-
|
|
99
|
-
results.append(result[0]["response"])
|
|
100
|
-
|
|
101
|
-
# Sync results
|
|
102
|
-
sync_result = self.aggregate_step_results(
|
|
103
|
-
[{"response": r} for r in results], sync_strategy
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
return sync_result
|
|
107
|
-
|
|
108
|
-
return df[column].apply(apply_spread_sync)
|
|
109
|
-
# COMPARISON OPERATIONS
|
|
110
|
-
|
|
111
|
-
def contrast(
|
|
112
|
-
self,
|
|
113
|
-
df: pd.DataFrame,
|
|
114
|
-
col1: str,
|
|
115
|
-
col2: str,
|
|
116
|
-
npc: str,
|
|
117
|
-
context: Union[str, Dict, List[str]],
|
|
118
|
-
comparison_framework: str,
|
|
194
|
+
def __init__(
|
|
195
|
+
self,
|
|
196
|
+
npc_directory: str,
|
|
197
|
+
db_engine: Union[str, Engine] = "~/npcsh_history.db"
|
|
198
|
+
):
|
|
199
|
+
self.npc_directory = npc_directory
|
|
200
|
+
|
|
201
|
+
if isinstance(db_engine, str):
|
|
202
|
+
self.engine = create_engine_from_path(db_engine)
|
|
203
|
+
else:
|
|
204
|
+
self.engine = db_engine
|
|
205
|
+
|
|
206
|
+
self.npc_loader = None
|
|
207
|
+
self.function_map = self._build_function_map()
|
|
208
|
+
|
|
209
|
+
def _get_team(self):
|
|
210
|
+
return (self.npc_loader
|
|
211
|
+
if hasattr(self.npc_loader, 'npcs')
|
|
212
|
+
else None)
|
|
213
|
+
|
|
214
|
+
def _build_function_map(self):
|
|
215
|
+
import types
|
|
216
|
+
|
|
217
|
+
function_map = {}
|
|
218
|
+
for name in dir(llm_funcs):
|
|
219
|
+
if name.startswith('_'):
|
|
220
|
+
continue
|
|
221
|
+
obj = getattr(llm_funcs, name)
|
|
222
|
+
if (isinstance(obj, types.FunctionType) or
|
|
223
|
+
(isinstance(obj, types.MethodType) and obj.__self__ is not None)):
|
|
224
|
+
function_map[name] = obj
|
|
225
|
+
|
|
226
|
+
return function_map
|
|
227
|
+
|
|
228
|
+
def _resolve_npc_reference(self, npc_ref: str):
|
|
229
|
+
if not npc_ref or not self.npc_loader:
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
if npc_ref.endswith('.npc'):
|
|
233
|
+
npc_ref = npc_ref[:-4]
|
|
234
|
+
|
|
235
|
+
npc = self.npc_loader.get_npc(npc_ref)
|
|
236
|
+
if npc:
|
|
237
|
+
return npc
|
|
238
|
+
|
|
239
|
+
if ',' in npc_ref:
|
|
240
|
+
npc_names = [
|
|
241
|
+
name.strip() for name in npc_ref.split(',')
|
|
242
|
+
]
|
|
243
|
+
npcs = [
|
|
244
|
+
self.npc_loader.get_npc(name)
|
|
245
|
+
for name in npc_names
|
|
246
|
+
]
|
|
247
|
+
npcs = [npc for npc in npcs if npc is not None]
|
|
248
|
+
|
|
249
|
+
if npcs:
|
|
250
|
+
temp_team = Team(npcs=npcs)
|
|
251
|
+
return temp_team
|
|
252
|
+
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
def execute_ai_function(
|
|
256
|
+
self,
|
|
257
|
+
func_name: str,
|
|
258
|
+
df: pd.DataFrame,
|
|
259
|
+
**params
|
|
119
260
|
) -> pd.Series:
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
"""
|
|
143
|
-
1. delegate(COLUMN, npc, query, context, jinxs, reviewers)
|
|
144
|
-
2. dilate(COLUMN, npc, query, context, scope, reviewers)
|
|
145
|
-
3. erode(COLUMN, npc, query, context, scope, reviewers)
|
|
146
|
-
4. strategize(COLUMN, npc, query, context, timeline, constraints)
|
|
147
|
-
5. validate(COLUMN, npc, query, context, criteria)
|
|
148
|
-
6. synthesize(COLUMN, npc, query, context, framework)
|
|
149
|
-
7. decompose(COLUMN, npc, query, context, granularity)
|
|
150
|
-
8. criticize(COLUMN, npc, query, context, framework)
|
|
151
|
-
9. summarize(COLUMN, npc, query, context, style)
|
|
152
|
-
10. advocate(COLUMN, npc, query, context, perspective)
|
|
153
|
-
|
|
154
|
-
MULTI-PROMPT/PARALLEL OPERATIONS
|
|
155
|
-
11. spread_and_sync(COLUMN, npc, query, variations, sync_strategy, context)
|
|
156
|
-
12. bootstrap(COLUMN, npc, query, sample_params, sync_strategy, context)
|
|
157
|
-
13. resample(COLUMN, npc, query, variation_strategy, sync_strategy, context)
|
|
158
|
-
|
|
159
|
-
COMPARISON OPERATIONS
|
|
160
|
-
14. mediate(COL1, COL2, npc, query, context, resolution_strategy)
|
|
161
|
-
15. contrast(COL1, COL2, npc, query, context, comparison_framework)
|
|
162
|
-
16. reconcile(COL1, COL2, npc, query, context, alignment_strategy)
|
|
163
|
-
|
|
164
|
-
MULTI-COLUMN INTEGRATION
|
|
165
|
-
17. integrate(COLS[], npc, query, context, integration_method)
|
|
166
|
-
18. harmonize(COLS[], npc, query, context, harmony_rules)
|
|
167
|
-
19. orchestrate(COLS[], npc, query, context, workflow)
|
|
168
|
-
"""
|
|
169
|
-
|
|
170
|
-
# Example usage in SQL-like syntax:
|
|
171
|
-
"""
|
|
172
|
-
def execute_sql(self, sql: str) -> pd.DataFrame:
|
|
173
|
-
# This would be implemented to parse and execute SQL with our custom functions
|
|
174
|
-
# Example SQL:
|
|
175
|
-
'''
|
|
176
|
-
SELECT
|
|
177
|
-
customer_id,
|
|
178
|
-
synthesize(feedback_text,
|
|
179
|
-
npc='analyst',
|
|
180
|
-
context=customer_segment,
|
|
181
|
-
framework='satisfaction') as analysis,
|
|
182
|
-
spread_and_sync(price_sensitivity,
|
|
183
|
-
npc='pricing_agent',
|
|
184
|
-
variations=['conservative', 'aggressive'],
|
|
185
|
-
sync_strategy='balanced_analysis',
|
|
186
|
-
context=market_context) as price_strategy
|
|
187
|
-
FROM customer_data
|
|
188
|
-
'''
|
|
189
|
-
pass
|
|
190
|
-
"""
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
class NPCDBTAdapter:
|
|
194
|
-
def __init__(self, npc_sql: NPCSQLOperations):
|
|
195
|
-
self.npc_sql = npc_sql
|
|
196
|
-
self.models = {}
|
|
197
|
-
|
|
198
|
-
def ref(self, model_name: str) -> pd.DataFrame:
|
|
199
|
-
# Implementation for model referencing
|
|
200
|
-
return self.models.get(model_name)
|
|
201
|
-
|
|
202
|
-
def parse_model(self, model_sql: str) -> pd.DataFrame:
|
|
203
|
-
# Parse the SQL model and execute with our custom functions
|
|
204
|
-
pass
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
class AIFunctionParser:
|
|
208
|
-
"""Handles parsing and extraction of AI function calls from SQL"""
|
|
209
|
-
|
|
210
|
-
@staticmethod
|
|
211
|
-
def extract_function_params(sql: str) -> Dict[str, Dict]:
|
|
212
|
-
"""Extract AI function parameters from SQL"""
|
|
213
|
-
ai_functions = {}
|
|
214
|
-
|
|
215
|
-
pattern = r"(\w+)\s*\(((?:[^()]*|\([^()]*\))*)\)"
|
|
216
|
-
matches = re.finditer(pattern, sql)
|
|
217
|
-
|
|
218
|
-
for match in matches:
|
|
219
|
-
func_name = match.group(1)
|
|
220
|
-
if func_name in ["synthesize", "spread_and_sync"]:
|
|
221
|
-
params = match.group(2).split(",")
|
|
222
|
-
ai_functions[func_name] = {
|
|
223
|
-
"query": params[0].strip().strip("\"'"),
|
|
224
|
-
"npc": params[1].strip().strip("\"'"),
|
|
225
|
-
"context": params[2].strip().strip("\"'"),
|
|
261
|
+
if func_name not in self.function_map:
|
|
262
|
+
raise ValueError(f"Unknown AI function: {func_name}")
|
|
263
|
+
|
|
264
|
+
func = self.function_map[func_name]
|
|
265
|
+
|
|
266
|
+
npc_ref = params.get('npc', '')
|
|
267
|
+
resolved_npc = self._resolve_npc_reference(npc_ref)
|
|
268
|
+
|
|
269
|
+
resolved_team = self._get_team()
|
|
270
|
+
if not resolved_team and hasattr(resolved_npc, 'team'):
|
|
271
|
+
resolved_team = resolved_npc.team
|
|
272
|
+
|
|
273
|
+
def apply_function_to_row(row):
|
|
274
|
+
query_template = params.get('query', '')
|
|
275
|
+
column_name = params.get('column', '')
|
|
276
|
+
|
|
277
|
+
column_value = str(row[column_name]) if column_name and column_name in row.index else column_name
|
|
278
|
+
|
|
279
|
+
if query_template:
|
|
280
|
+
row_data = {
|
|
281
|
+
col: str(row[col])
|
|
282
|
+
for col in df.columns
|
|
226
283
|
}
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
284
|
+
row_data['column_value'] = column_value
|
|
285
|
+
query = query_template.format(**row_data)
|
|
286
|
+
else:
|
|
287
|
+
query = column_value
|
|
288
|
+
|
|
289
|
+
sig = py_inspect.signature(func)
|
|
290
|
+
func_params = {
|
|
291
|
+
k: v for k, v in {
|
|
292
|
+
'prompt': query,
|
|
293
|
+
'text': query,
|
|
294
|
+
'npc': resolved_npc,
|
|
295
|
+
'team': resolved_team,
|
|
296
|
+
'context': params.get('context', '')
|
|
297
|
+
}.items() if k in sig.parameters
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
result = func(**func_params)
|
|
301
|
+
return (result.get("response", "")
|
|
302
|
+
if isinstance(result, dict)
|
|
303
|
+
else str(result))
|
|
304
|
+
|
|
305
|
+
return df.apply(apply_function_to_row, axis=1)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
# --- SQL Model Definition ---
|
|
231
309
|
class SQLModel:
|
|
232
|
-
def __init__(
|
|
310
|
+
def __init__(
|
|
311
|
+
self,
|
|
312
|
+
name: str,
|
|
313
|
+
content: str,
|
|
314
|
+
path: str,
|
|
315
|
+
npc_directory: str
|
|
316
|
+
):
|
|
233
317
|
self.name = name
|
|
234
318
|
self.content = content
|
|
235
319
|
self.path = path
|
|
236
|
-
self.npc_directory = npc_directory
|
|
320
|
+
self.npc_directory = npc_directory
|
|
321
|
+
|
|
322
|
+
config_match = re.search(
|
|
323
|
+
r'\{\{[\s]*config\((.*?)\)[\s]*\}\}',
|
|
324
|
+
content,
|
|
325
|
+
re.DOTALL
|
|
326
|
+
)
|
|
327
|
+
if config_match:
|
|
328
|
+
self.config = self._parse_config(config_match.group(1))
|
|
329
|
+
else:
|
|
330
|
+
self.config = {'materialized': 'table'}
|
|
237
331
|
|
|
238
332
|
self.dependencies = self._extract_dependencies()
|
|
239
333
|
self.has_ai_function = self._check_ai_functions()
|
|
334
|
+
|
|
335
|
+
# DEBUG print to confirm if AI functions are found
|
|
240
336
|
self.ai_functions = self._extract_ai_functions()
|
|
241
|
-
|
|
337
|
+
if self.ai_functions:
|
|
338
|
+
print(f"DEBUG SQLModel: Model '{self.name}' extracted AI functions: {list(self.ai_functions.keys())}")
|
|
339
|
+
else:
|
|
340
|
+
print(f"DEBUG SQLModel: Model '{self.name}' has no AI functions found by _extract_ai_functions.")
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _parse_config(self, config_str: str) -> Dict:
|
|
344
|
+
config = {}
|
|
345
|
+
for item in re.split(r',\s*(?=[a-zA-Z0-9_]+\s*=)', config_str):
|
|
346
|
+
if '=' in item:
|
|
347
|
+
key, value = item.split('=', 1)
|
|
348
|
+
key = key.strip()
|
|
349
|
+
value = value.strip().strip('"').strip("'")
|
|
350
|
+
config[key] = value
|
|
351
|
+
return config
|
|
242
352
|
|
|
243
353
|
def _extract_dependencies(self) -> Set[str]:
|
|
244
|
-
"""Extract model dependencies using ref() calls"""
|
|
245
354
|
pattern = r"\{\{\s*ref\(['\"]([^'\"]+)['\"]\)\s*\}\}"
|
|
246
355
|
return set(re.findall(pattern, self.content))
|
|
247
|
-
|
|
356
|
+
|
|
248
357
|
def _check_ai_functions(self) -> bool:
|
|
249
|
-
""
|
|
250
|
-
ai_functions = [
|
|
251
|
-
"synthesize",
|
|
252
|
-
"spread_and_sync",
|
|
253
|
-
"delegate",
|
|
254
|
-
"dilate",
|
|
255
|
-
"erode",
|
|
256
|
-
"strategize",
|
|
257
|
-
"validate",
|
|
258
|
-
"decompose",
|
|
259
|
-
"criticize",
|
|
260
|
-
"summarize",
|
|
261
|
-
"advocate",
|
|
262
|
-
"bootstrap",
|
|
263
|
-
"resample",
|
|
264
|
-
"mediate",
|
|
265
|
-
"contrast",
|
|
266
|
-
"reconcile",
|
|
267
|
-
"integrate",
|
|
268
|
-
"harmonize",
|
|
269
|
-
"orchestrate",
|
|
270
|
-
]
|
|
271
|
-
return any(func in self.content for func in ai_functions)
|
|
358
|
+
return "nql." in self.content
|
|
272
359
|
|
|
273
360
|
def _extract_ai_functions(self) -> Dict[str, Dict]:
|
|
274
|
-
"""Extract
|
|
361
|
+
"""Extract AI function calls from SQL content with improved robustness."""
|
|
362
|
+
import types
|
|
363
|
+
|
|
275
364
|
ai_functions = {}
|
|
276
|
-
pattern
|
|
277
|
-
|
|
278
|
-
|
|
365
|
+
# More robust pattern that handles nested parentheses better
|
|
366
|
+
# This captures: nql.function_name(args...)
|
|
367
|
+
pattern = r"nql\.(\w+)\s*\(((?:[^()]|\([^()]*\))*)\)"
|
|
368
|
+
|
|
369
|
+
matches = re.finditer(pattern, self.content, flags=re.DOTALL | re.IGNORECASE)
|
|
370
|
+
|
|
371
|
+
available_functions = []
|
|
372
|
+
for name in dir(llm_funcs):
|
|
373
|
+
if name.startswith('_'):
|
|
374
|
+
continue
|
|
375
|
+
obj = getattr(llm_funcs, name)
|
|
376
|
+
if (isinstance(obj, types.FunctionType) or
|
|
377
|
+
(isinstance(obj, types.MethodType) and obj.__self__ is not None)):
|
|
378
|
+
available_functions.append(name.lower()) # Store as lowercase for comparison
|
|
379
|
+
|
|
279
380
|
for match in matches:
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
381
|
+
full_call_string = match.group(0).strip()
|
|
382
|
+
func_name = match.group(1).lower() # Convert to lowercase for lookup
|
|
383
|
+
|
|
384
|
+
if func_name in available_functions:
|
|
385
|
+
params_str = match.group(2)
|
|
386
|
+
|
|
387
|
+
# Simplified parameter extraction
|
|
388
|
+
params_list = []
|
|
389
|
+
balance = 0
|
|
390
|
+
in_quote = None
|
|
391
|
+
current_param_chars = []
|
|
392
|
+
|
|
393
|
+
for char in params_str:
|
|
394
|
+
if char in ("'", '"'):
|
|
395
|
+
if in_quote == char:
|
|
396
|
+
in_quote = None
|
|
397
|
+
elif in_quote is None:
|
|
398
|
+
in_quote = char
|
|
399
|
+
current_param_chars.append(char)
|
|
400
|
+
elif char == '(' and in_quote is None:
|
|
401
|
+
balance += 1
|
|
402
|
+
current_param_chars.append(char)
|
|
403
|
+
elif char == ')' and in_quote is None:
|
|
404
|
+
balance -= 1
|
|
405
|
+
current_param_chars.append(char)
|
|
406
|
+
elif char == ',' and balance == 0 and in_quote is None:
|
|
407
|
+
params_list.append("".join(current_param_chars).strip())
|
|
408
|
+
current_param_chars = []
|
|
409
|
+
else:
|
|
410
|
+
current_param_chars.append(char)
|
|
411
|
+
|
|
412
|
+
if current_param_chars:
|
|
413
|
+
params_list.append("".join(current_param_chars).strip())
|
|
414
|
+
|
|
415
|
+
params = [p.strip().strip("'\"") for p in params_list]
|
|
416
|
+
|
|
417
|
+
column_param = params[0] if len(params) > 0 else ""
|
|
418
|
+
npc_param = params[1] if len(params) > 1 else ""
|
|
419
|
+
query_param = params[2] if len(params) > 2 else ""
|
|
420
|
+
context_param = params[3] if len(params) > 3 else None
|
|
421
|
+
|
|
422
|
+
if npc_param.endswith(".npc"):
|
|
423
|
+
npc_param = npc_param[:-4]
|
|
424
|
+
if self.npc_directory and npc_param.startswith(self.npc_directory):
|
|
425
|
+
npc_param = npc_param[len(self.npc_directory):].strip('/')
|
|
426
|
+
|
|
312
427
|
ai_functions[func_name] = {
|
|
313
|
-
"column":
|
|
314
|
-
"npc":
|
|
315
|
-
"query":
|
|
316
|
-
"context":
|
|
428
|
+
"column": column_param,
|
|
429
|
+
"npc": npc_param,
|
|
430
|
+
"query": query_param,
|
|
431
|
+
"context": context_param,
|
|
432
|
+
"full_call_string": full_call_string,
|
|
433
|
+
"original_func_name": match.group(1) # Store original case
|
|
317
434
|
}
|
|
318
|
-
|
|
435
|
+
else:
|
|
436
|
+
print(f"DEBUG SQLModel: Function '{func_name}' not found in available LLM funcs ({available_functions}). Skipping this NQL call.")
|
|
319
437
|
|
|
438
|
+
return ai_functions
|
|
320
439
|
|
|
440
|
+
# --- Model Compiler ---
|
|
321
441
|
class ModelCompiler:
|
|
322
|
-
def __init__(
|
|
323
|
-
self
|
|
324
|
-
|
|
442
|
+
def __init__(
|
|
443
|
+
self,
|
|
444
|
+
models_dir: str,
|
|
445
|
+
target_engine: Union[str, Engine],
|
|
446
|
+
npc_directory: str = "./npc_team/",
|
|
447
|
+
external_engines: Optional[Dict[str, Engine]] = None,
|
|
448
|
+
target_schema: Optional[str] = None
|
|
449
|
+
):
|
|
450
|
+
self.models_dir = Path(os.path.expanduser(models_dir))
|
|
451
|
+
|
|
452
|
+
if isinstance(target_engine, str):
|
|
453
|
+
self.target_engine = create_engine_from_path(
|
|
454
|
+
target_engine
|
|
455
|
+
)
|
|
456
|
+
else:
|
|
457
|
+
self.target_engine = target_engine
|
|
458
|
+
|
|
459
|
+
self.external_engines = external_engines or {}
|
|
460
|
+
self.target_schema = target_schema
|
|
325
461
|
self.models: Dict[str, SQLModel] = {}
|
|
326
|
-
self.npc_operations = NPCSQLOperations(
|
|
462
|
+
self.npc_operations = NPCSQLOperations(
|
|
463
|
+
npc_directory,
|
|
464
|
+
self.target_engine
|
|
465
|
+
)
|
|
327
466
|
self.npc_directory = npc_directory
|
|
467
|
+
|
|
468
|
+
try:
|
|
469
|
+
self.npc_team = Team(team_path=npc_directory)
|
|
470
|
+
self.npc_operations.npc_loader = self.npc_team
|
|
471
|
+
except Exception as e:
|
|
472
|
+
self.npc_team = None
|
|
473
|
+
print(f"Warning: Could not load NPC team from {npc_directory}. AI functions relying on NPC context might fail: {e}")
|
|
474
|
+
|
|
475
|
+
def _get_engine(self, source_name: str) -> Engine:
|
|
476
|
+
if source_name.lower() == 'local' or not self.external_engines:
|
|
477
|
+
return self.target_engine
|
|
478
|
+
|
|
479
|
+
for key, engine in self.external_engines.items():
|
|
480
|
+
if key.lower() == source_name.lower():
|
|
481
|
+
return engine
|
|
482
|
+
return self.target_engine
|
|
483
|
+
|
|
484
|
+
def _has_native_ai_functions(self, source_name: str) -> bool:
|
|
485
|
+
ai_enabled_dbs = {'snowflake', 'databricks', 'bigquery'}
|
|
486
|
+
return source_name.lower() in ai_enabled_dbs
|
|
328
487
|
|
|
329
488
|
def discover_models(self):
|
|
330
|
-
"""Discover all SQL models in the models directory"""
|
|
331
489
|
self.models = {}
|
|
332
|
-
|
|
490
|
+
sql_files = list(self.models_dir.glob("**/*.sql"))
|
|
491
|
+
|
|
492
|
+
for sql_file in sql_files:
|
|
333
493
|
model_name = sql_file.stem
|
|
334
494
|
with open(sql_file, "r") as f:
|
|
335
495
|
content = f.read()
|
|
496
|
+
|
|
336
497
|
self.models[model_name] = SQLModel(
|
|
337
|
-
model_name,
|
|
498
|
+
model_name,
|
|
499
|
+
content,
|
|
500
|
+
str(sql_file),
|
|
501
|
+
str(sql_file.parent)
|
|
338
502
|
)
|
|
339
|
-
|
|
503
|
+
|
|
340
504
|
return self.models
|
|
341
505
|
|
|
342
506
|
def build_dag(self) -> Dict[str, Set[str]]:
|
|
343
|
-
"""Build dependency graph"""
|
|
344
507
|
dag = {}
|
|
345
508
|
for model_name, model in self.models.items():
|
|
346
509
|
dag[model_name] = model.dependencies
|
|
347
|
-
print(f"Built DAG: {dag}")
|
|
348
510
|
return dag
|
|
349
511
|
|
|
350
512
|
def topological_sort(self) -> List[str]:
|
|
351
|
-
"""Generate execution order using topological sort"""
|
|
352
513
|
dag = self.build_dag()
|
|
353
|
-
|
|
514
|
+
|
|
515
|
+
true_in_degree = {model_name: 0 for model_name in self.models.keys()}
|
|
516
|
+
adj_list = defaultdict(list)
|
|
354
517
|
|
|
355
|
-
for
|
|
356
|
-
for
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
518
|
+
for model_name, model in self.models.items():
|
|
519
|
+
for dependency in model.dependencies:
|
|
520
|
+
if dependency not in self.models:
|
|
521
|
+
raise ValueError(f"Dependency '{dependency}' of model '{model_name}' not found in discovered models.")
|
|
522
|
+
true_in_degree[model_name] += 1
|
|
523
|
+
adj_list[dependency].append(model_name)
|
|
360
524
|
|
|
361
|
-
queue = deque([
|
|
525
|
+
queue = deque([model_name for model_name in self.models.keys() if true_in_degree[model_name] == 0])
|
|
362
526
|
result = []
|
|
363
|
-
|
|
527
|
+
|
|
364
528
|
while queue:
|
|
365
|
-
|
|
366
|
-
result.append(
|
|
529
|
+
current_model = queue.popleft()
|
|
530
|
+
result.append(current_model)
|
|
367
531
|
|
|
368
|
-
for
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
queue.append(dependent)
|
|
532
|
+
for dependent_model in adj_list[current_model]:
|
|
533
|
+
true_in_degree[dependent_model] -= 1
|
|
534
|
+
if true_in_degree[dependent_model] == 0:
|
|
535
|
+
queue.append(dependent_model)
|
|
373
536
|
|
|
374
|
-
if len(result) != len(
|
|
375
|
-
raise ValueError("Circular dependency detected")
|
|
537
|
+
if len(result) != len(self.models):
|
|
538
|
+
raise ValueError("Circular dependency detected or some models not processed.")
|
|
376
539
|
|
|
377
|
-
print(f"Execution order: {result}")
|
|
378
540
|
return result
|
|
379
541
|
|
|
380
|
-
def _replace_model_references(self,
|
|
381
|
-
ref_pattern =
|
|
542
|
+
def _replace_model_references(self, sql_content: str) -> str:
|
|
543
|
+
ref_pattern = (
|
|
544
|
+
r"\{\{\s*ref\s*\(\s*['\"]([^'\"]+)['\"]\s*\)\s*\}\}"
|
|
545
|
+
)
|
|
382
546
|
|
|
383
547
|
def replace_ref(match):
|
|
384
548
|
model_name = match.group(1)
|
|
385
549
|
if model_name not in self.models:
|
|
386
550
|
raise ValueError(
|
|
387
|
-
f"Model '{model_name}' not found during
|
|
551
|
+
f"Model '{model_name}' referenced by '{{{{ ref('{model_name}') }}}}' not found during compilation."
|
|
388
552
|
)
|
|
553
|
+
|
|
554
|
+
if self.target_schema:
|
|
555
|
+
return f"{self.target_schema}.{model_name}"
|
|
389
556
|
return model_name
|
|
390
557
|
|
|
391
|
-
replaced_sql = re.sub(ref_pattern, replace_ref,
|
|
558
|
+
replaced_sql = re.sub(ref_pattern, replace_ref, sql_content)
|
|
392
559
|
return replaced_sql
|
|
393
560
|
|
|
394
|
-
def
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
return
|
|
401
|
-
|
|
402
|
-
def
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
561
|
+
def _clean_sql_for_execution(self, sql_content: str) -> str:
|
|
562
|
+
config_pattern = r'\{\{[\s]*config\((.*?)\)[\s]*\}\}'
|
|
563
|
+
cleaned_sql = re.sub(config_pattern, '', sql_content, flags=re.DOTALL).strip()
|
|
564
|
+
cleaned_sql = re.sub(r"--.*?\n", "\n", cleaned_sql)
|
|
565
|
+
cleaned_sql = re.sub(r"/\*.*?\*/", "", cleaned_sql, flags=re.DOTALL)
|
|
566
|
+
cleaned_sql = re.sub(r"\s+", " ", cleaned_sql).strip()
|
|
567
|
+
return cleaned_sql
|
|
568
|
+
|
|
569
|
+
def _execute_standard_sql(
|
|
570
|
+
self,
|
|
571
|
+
sql_to_execute: str,
|
|
572
|
+
engine: Engine
|
|
573
|
+
) -> pd.DataFrame:
|
|
574
|
+
return pd.read_sql(sql_to_execute, engine)
|
|
575
|
+
|
|
576
|
+
def _execute_ai_model(self, cleaned_sql_content: str, model: SQLModel) -> pd.DataFrame:
|
|
577
|
+
processed_sql = self._replace_model_references(cleaned_sql_content)
|
|
578
|
+
|
|
579
|
+
db_type = self.target_engine.dialect.name.lower()
|
|
580
|
+
print(f"DEBUG: Determined DB dialect: '{db_type}'")
|
|
581
|
+
|
|
582
|
+
if self._has_native_ai_functions(db_type):
|
|
583
|
+
print(f"DEBUG: Native AI functions ARE supported for '{db_type}'. Attempting native translation.")
|
|
584
|
+
transformer = NativeDatabaseAITransformer(db_type)
|
|
585
|
+
sql_to_execute_with_native_ai = processed_sql
|
|
586
|
+
|
|
587
|
+
print("DEBUG: AI functions and NQL calls to replace (from model.ai_functions):")
|
|
588
|
+
if model.ai_functions:
|
|
589
|
+
for fn, params in model.ai_functions.items():
|
|
590
|
+
print(f" Function: {fn}, Full Call String: '{params.get('full_call_string')}'")
|
|
591
|
+
else:
|
|
592
|
+
print(" (None found in model.ai_functions to replace natively)")
|
|
412
593
|
|
|
413
|
-
|
|
594
|
+
# Replace NQL calls with native functions
|
|
595
|
+
for func_name, params in model.ai_functions.items():
|
|
596
|
+
original_nql_call = params.get('full_call_string')
|
|
597
|
+
if not original_nql_call:
|
|
598
|
+
print(f"WARNING: 'full_call_string' not found for NQL function '{func_name}'. Skipping native replacement attempt.")
|
|
599
|
+
continue
|
|
600
|
+
|
|
601
|
+
try:
|
|
602
|
+
column_ref = params.get('column', '')
|
|
603
|
+
|
|
604
|
+
transform_kwargs = {
|
|
605
|
+
'text': column_ref,
|
|
606
|
+
'prompt': column_ref,
|
|
607
|
+
'query': params.get('query', ''),
|
|
608
|
+
'context': params.get('context', ''),
|
|
609
|
+
'npc': params.get('npc', '')
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
native_func_call = transformer.transform_ai_function(
|
|
613
|
+
func_name,
|
|
614
|
+
**transform_kwargs
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
print(f"DEBUG: Replacing '{original_nql_call}' with '{native_func_call}'")
|
|
618
|
+
|
|
619
|
+
# NORMALIZE WHITESPACE in both the original call and the SQL
|
|
620
|
+
# This handles multiline NQL calls with varying indentation
|
|
621
|
+
normalized_original = re.sub(r'\s+', ' ', original_nql_call).strip()
|
|
622
|
+
normalized_sql = re.sub(r'\s+', ' ', sql_to_execute_with_native_ai).strip()
|
|
623
|
+
|
|
624
|
+
# Find the normalized pattern in the normalized SQL
|
|
625
|
+
if normalized_original in normalized_sql:
|
|
626
|
+
# Now do the replacement on the ORIGINAL (non-normalized) SQL
|
|
627
|
+
# by creating a flexible regex pattern
|
|
628
|
+
# Escape special regex chars but allow flexible whitespace
|
|
629
|
+
pattern_parts = [re.escape(part) for part in original_nql_call.split()]
|
|
630
|
+
flexible_pattern = r'\s*'.join(pattern_parts)
|
|
631
|
+
pattern = re.compile(flexible_pattern, re.IGNORECASE | re.DOTALL)
|
|
632
|
+
|
|
633
|
+
old_sql = sql_to_execute_with_native_ai
|
|
634
|
+
sql_to_execute_with_native_ai = pattern.sub(native_func_call, sql_to_execute_with_native_ai, count=1)
|
|
635
|
+
|
|
636
|
+
if old_sql != sql_to_execute_with_native_ai:
|
|
637
|
+
print(f"DEBUG: Successfully replaced with flexible whitespace pattern.")
|
|
638
|
+
else:
|
|
639
|
+
print(f"ERROR: Flexible pattern replacement failed for '{func_name}'.")
|
|
640
|
+
else:
|
|
641
|
+
print(f"ERROR: Could not find normalized NQL call in SQL for '{func_name}'.")
|
|
642
|
+
|
|
643
|
+
except ValueError as e:
|
|
644
|
+
print(f"WARNING: Native translation failed for '{func_name}': {e}. This AI function will NOT be natively translated.")
|
|
645
|
+
except Exception as e:
|
|
646
|
+
print(f"ERROR: An unexpected error occurred during native AI transformation for '{func_name}': {e}. This AI function will NOT be natively translated.") # Check for remaining NQL calls
|
|
647
|
+
if "nql." in sql_to_execute_with_native_ai.lower():
|
|
648
|
+
print(f"WARNING: Some NQL calls remain after native translation attempts. Replacing remaining NQL calls with NULLs.")
|
|
649
|
+
sql_to_execute_with_native_ai = self._replace_nql_calls_with_null(sql_to_execute_with_native_ai, model)
|
|
650
|
+
|
|
651
|
+
print(f"DEBUG: Final SQL for native/mixed AI execution:\n{sql_to_execute_with_native_ai}\n")
|
|
652
|
+
target_engine_for_native_ai = self.target_engine
|
|
653
|
+
return pd.read_sql(sql_to_execute_with_native_ai, target_engine_for_native_ai)
|
|
654
|
+
|
|
655
|
+
else: # Fallback path when native AI is not supported for the determined DB type
|
|
656
|
+
print(f"DEBUG: Native AI functions are NOT supported for '{db_type}'. Entering Python fallback path.")
|
|
657
|
+
sql_with_nql_as_null = self._replace_nql_calls_with_null(processed_sql, model)
|
|
658
|
+
|
|
659
|
+
print(f"DEBUG: SQL to execute in pure fallback (NQL as NULLs for DB):\n{sql_with_nql_as_null}\n")
|
|
660
|
+
|
|
661
|
+
target_engine_for_fallback = self.target_engine # Use target_engine directly
|
|
662
|
+
df = pd.read_sql(sql_with_nql_as_null, target_engine_for_fallback)
|
|
663
|
+
|
|
664
|
+
# Apply Python-driven AI functions on the DataFrame
|
|
665
|
+
for func_name, params in model.ai_functions.items():
|
|
666
|
+
try:
|
|
667
|
+
result_series = self.npc_operations.execute_ai_function(func_name, df, **params)
|
|
668
|
+
result_column_name = f"{func_name}_{params.get('column', 'result')}" # Use a more specific alias if possible
|
|
669
|
+
df[result_column_name] = result_series
|
|
670
|
+
print(f"DEBUG: Python-driven AI function '{func_name}' executed. Result in column '{result_column_name}'.")
|
|
671
|
+
except Exception as e:
|
|
672
|
+
print(f"ERROR: Executing Python-driven AI function '{func_name}': {e}. Assigning NULL.")
|
|
673
|
+
df[f"{func_name}_{params.get('column', 'result')}"] = None
|
|
674
|
+
|
|
675
|
+
return df
|
|
414
676
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
677
|
+
def _replace_nql_calls_with_null(self, sql_content: str, model: SQLModel) -> str:
|
|
678
|
+
"""
|
|
679
|
+
Replaces specific nql.func(...) as alias calls with NULL as alias.
|
|
680
|
+
This is used for the fallback path or to clean up any NQL calls missed by native translation.
|
|
681
|
+
"""
|
|
682
|
+
modified_sql = sql_content
|
|
683
|
+
for func_name, params in model.ai_functions.items():
|
|
684
|
+
original_nql_call = params.get('full_call_string')
|
|
685
|
+
if not original_nql_call:
|
|
686
|
+
print(f"WARNING: 'full_call_string' not found for NQL function '{func_name}'. Cannot replace with NULL.")
|
|
687
|
+
continue
|
|
688
|
+
|
|
689
|
+
# Extract alias from the original_nql_call string for NULL replacement
|
|
690
|
+
alias_match = re.search(r'\s+as\s+(\w+)(?:\W|$)', original_nql_call, re.IGNORECASE)
|
|
691
|
+
alias_name = alias_match.group(1) if alias_match else f"{func_name}_{params.get('column', 'result')}"
|
|
692
|
+
|
|
693
|
+
# Create a robust pattern for the original NQL call to handle whitespace variability
|
|
694
|
+
escaped_original_call = re.escape(original_nql_call.strip())
|
|
695
|
+
pattern_to_sub = re.compile(r"\s*".join(escaped_original_call.split()), flags=re.IGNORECASE)
|
|
696
|
+
|
|
697
|
+
# Perform the replacement with NULL as alias
|
|
698
|
+
old_sql = modified_sql
|
|
699
|
+
modified_sql, count = pattern_to_sub.subn(f"NULL as {alias_name}", modified_sql)
|
|
700
|
+
if count == 0:
|
|
701
|
+
print(f"WARNING: NULL replacement failed for NQL call '{original_nql_call}' (no change to SQL). SQL still contains NQL call.")
|
|
419
702
|
else:
|
|
420
|
-
|
|
421
|
-
if alias_match:
|
|
422
|
-
final_columns.append(f"NULL as {alias_match.group(1)}")
|
|
703
|
+
print(f"DEBUG: Replaced NQL call '{original_nql_call}' with 'NULL as {alias_name}'.")
|
|
423
704
|
|
|
424
|
-
|
|
425
|
-
print(f"Extracted base query:\n{final_sql}")
|
|
426
|
-
|
|
427
|
-
return final_sql
|
|
705
|
+
return modified_sql
|
|
428
706
|
|
|
429
707
|
def execute_model(self, model_name: str) -> pd.DataFrame:
|
|
430
|
-
"""Execute a model and materialize it to the database"""
|
|
431
708
|
self.current_model = model_name
|
|
432
709
|
model = self.models[model_name]
|
|
433
|
-
compiled_sql = self.compile_model(model_name)
|
|
434
|
-
|
|
435
|
-
try:
|
|
436
|
-
if model.has_ai_function:
|
|
437
|
-
df = self._execute_ai_model(compiled_sql, model)
|
|
438
|
-
else:
|
|
439
|
-
df = self._execute_standard_sql(compiled_sql)
|
|
440
|
-
|
|
441
|
-
self._materialize_to_db(model_name, df)
|
|
442
|
-
return df
|
|
443
|
-
|
|
444
|
-
except Exception as e:
|
|
445
|
-
print(f"Error executing model {model_name}: {str(e)}")
|
|
446
|
-
raise
|
|
447
|
-
|
|
448
|
-
def _execute_standard_sql(self, sql: str) -> pd.DataFrame:
|
|
449
|
-
with sqlite3.connect(self.db_path) as conn:
|
|
450
|
-
try:
|
|
451
|
-
sql = re.sub(r"--.*?\n", "\n", sql)
|
|
452
|
-
sql = re.sub(r"\s+", " ", sql).strip()
|
|
453
|
-
return pd.read_sql(sql, conn)
|
|
454
|
-
except Exception as e:
|
|
455
|
-
print(f"Failed to execute SQL: {sql}")
|
|
456
|
-
print(f"Error: {str(e)}")
|
|
457
|
-
raise
|
|
458
|
-
|
|
459
|
-
def execute_ai_function(self, query, npc, column_value, context):
|
|
460
|
-
"""Execute a specific AI function logic - placeholder"""
|
|
461
|
-
print(f"Executing AI function on value: {column_value}")
|
|
462
|
-
synthesized_value = (
|
|
463
|
-
f"Processed({query}): {column_value} in context {context} with npc {npc}"
|
|
464
|
-
)
|
|
465
|
-
return synthesized_value
|
|
466
|
-
|
|
467
|
-
def _execute_ai_model(self, sql: str, model: SQLModel) -> pd.DataFrame:
|
|
468
|
-
try:
|
|
469
|
-
base_sql = self._extract_base_query(sql)
|
|
470
|
-
print(f"Executing base SQL:\n{base_sql}")
|
|
471
|
-
df = self._execute_standard_sql(base_sql)
|
|
472
|
-
|
|
473
|
-
# extract the columns they are between {} pairs
|
|
474
|
-
columns = re.findall(r"\{([^}]+)\}", sql)
|
|
475
710
|
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
query_template = params["query"]
|
|
480
|
-
|
|
481
|
-
npc = params["npc"]
|
|
482
|
-
# only take the after the split "/"
|
|
483
|
-
npc = npc.split("/")[-1]
|
|
484
|
-
context = params["context"]
|
|
485
|
-
# Call the synthesize method using DataFrame directly
|
|
486
|
-
synthesized_df = self.npc_operations.synthesize(
|
|
487
|
-
query=query_template, # The raw query to format
|
|
488
|
-
df=df, # The DataFrame containing the data
|
|
489
|
-
columns=columns, # The column(s) used to format the query
|
|
490
|
-
npc=npc, # NPC parameter
|
|
491
|
-
context=context, # Context parameter
|
|
492
|
-
framework="default_framework", # Adjust this as per your needs
|
|
493
|
-
)
|
|
711
|
+
cleaned_sql_content = self._clean_sql_for_execution(model.content)
|
|
712
|
+
|
|
713
|
+
print(f"DEBUG: Cleaned SQL content for model '{model_name}':\n{cleaned_sql_content}\n")
|
|
494
714
|
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
715
|
+
if model.has_ai_function:
|
|
716
|
+
df = self._execute_ai_model(cleaned_sql_content, model)
|
|
717
|
+
else:
|
|
718
|
+
compiled_sql = self._replace_model_references(
|
|
719
|
+
cleaned_sql_content
|
|
720
|
+
)
|
|
721
|
+
print(f"DEBUG: Compiled standard SQL for model '{model_name}':\n{compiled_sql}\n")
|
|
722
|
+
df = self._execute_standard_sql(
|
|
723
|
+
compiled_sql,
|
|
724
|
+
self.target_engine
|
|
725
|
+
)
|
|
501
726
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
raise
|
|
727
|
+
self._materialize_to_db(model_name, df, model.config)
|
|
728
|
+
return df
|
|
505
729
|
|
|
506
|
-
def _materialize_to_db(
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
730
|
+
def _materialize_to_db(
|
|
731
|
+
self,
|
|
732
|
+
model_name: str,
|
|
733
|
+
df: pd.DataFrame,
|
|
734
|
+
config: Dict
|
|
735
|
+
):
|
|
736
|
+
materialization = config.get('materialized', 'table')
|
|
737
|
+
|
|
738
|
+
table_name = model_name
|
|
739
|
+
table_name_with_schema = (
|
|
740
|
+
f"{self.target_schema}.{table_name}"
|
|
741
|
+
if self.target_schema
|
|
742
|
+
else table_name
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
with self.target_engine.begin() as conn:
|
|
746
|
+
if self.target_schema:
|
|
747
|
+
inspector = inspect(conn)
|
|
748
|
+
if not inspector.has_schema(self.target_schema):
|
|
749
|
+
print(f"Creating schema '{self.target_schema}'...")
|
|
750
|
+
conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {self.target_schema}"))
|
|
751
|
+
print(f"Schema '{self.target_schema}' created (if it didn't exist).")
|
|
752
|
+
|
|
753
|
+
if materialization == 'view':
|
|
754
|
+
print(
|
|
755
|
+
f"Warning: Materialization '{materialization}' requested for model '{model_name}'. "
|
|
756
|
+
f"Pandas `to_sql` does not directly create SQL VIEWS from DataFrames. "
|
|
757
|
+
f"Materializing as TABLE instead. You may need to manually create the view."
|
|
758
|
+
)
|
|
759
|
+
df.to_sql(
|
|
760
|
+
table_name,
|
|
761
|
+
self.target_engine,
|
|
762
|
+
schema=self.target_schema,
|
|
763
|
+
index=False,
|
|
764
|
+
if_exists='replace'
|
|
765
|
+
)
|
|
766
|
+
print(f"Materialized model {model_name} as TABLE to {table_name_with_schema}")
|
|
767
|
+
else:
|
|
768
|
+
df.to_sql(
|
|
769
|
+
table_name,
|
|
770
|
+
self.target_engine,
|
|
771
|
+
schema=self.target_schema,
|
|
772
|
+
index=False,
|
|
773
|
+
if_exists='replace'
|
|
774
|
+
)
|
|
775
|
+
print(f"Materialized model {model_name} as TABLE to {table_name_with_schema}")
|
|
511
776
|
|
|
512
777
|
def _table_exists(self, table_name: str) -> bool:
|
|
513
|
-
with
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
SELECT name FROM sqlite_master
|
|
518
|
-
WHERE type='table' AND name=?;
|
|
519
|
-
""",
|
|
520
|
-
(table_name,),
|
|
521
|
-
)
|
|
522
|
-
return cursor.fetchone() is not None
|
|
778
|
+
with self.target_engine.connect() as conn:
|
|
779
|
+
inspector = inspect(conn)
|
|
780
|
+
return inspector.has_table(table_name, schema=self.target_schema) or \
|
|
781
|
+
inspector.has_view(table_name, schema=self.target_schema)
|
|
523
782
|
|
|
524
783
|
def run_all_models(self):
|
|
525
|
-
"""Execute all models in dependency order"""
|
|
526
784
|
self.discover_models()
|
|
527
785
|
execution_order = self.topological_sort()
|
|
786
|
+
|
|
528
787
|
print(f"Running models in order: {execution_order}")
|
|
529
788
|
|
|
530
789
|
results = {}
|
|
@@ -534,72 +793,12 @@ class ModelCompiler:
|
|
|
534
793
|
model = self.models[model_name]
|
|
535
794
|
for dep in model.dependencies:
|
|
536
795
|
if not self._table_exists(dep):
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
796
|
+
if dep not in results:
|
|
797
|
+
raise ValueError(
|
|
798
|
+
f"Dependency '{dep}' for model '{model_name}' not found in database or already processed models. "
|
|
799
|
+
f"Please ensure all dependencies are resolved and run first."
|
|
800
|
+
)
|
|
540
801
|
|
|
541
802
|
results[model_name] = self.execute_model(model_name)
|
|
542
803
|
|
|
543
804
|
return results
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
def create_example_models(
|
|
547
|
-
models_dir: str = os.path.abspath("./npc_team/factory/models/"),
|
|
548
|
-
db_path: str = "~/npcsh_history.db",
|
|
549
|
-
npc_directory: str = "./npc_team/",
|
|
550
|
-
):
|
|
551
|
-
"""Create example SQL model files"""
|
|
552
|
-
os.makedirs(os.path.abspath("./npc_team/factory/"), exist_ok=True)
|
|
553
|
-
os.makedirs(models_dir, exist_ok=True)
|
|
554
|
-
db_path = os.path.expanduser(db_path)
|
|
555
|
-
conn = sqlite3.connect(db_path)
|
|
556
|
-
df = pd.DataFrame(
|
|
557
|
-
{
|
|
558
|
-
"feedback": ["Great product!", "Could be better", "Amazing service"],
|
|
559
|
-
"customer_id": [1, 2, 3],
|
|
560
|
-
"timestamp": pd.to_datetime(["2024-01-01", "2024-01-02", "2024-01-03"]),
|
|
561
|
-
}
|
|
562
|
-
)
|
|
563
|
-
|
|
564
|
-
df.to_sql("raw_customer_feedback", conn, index=False, if_exists="replace")
|
|
565
|
-
print("Created raw_customer_feedback table")
|
|
566
|
-
|
|
567
|
-
compiler = ModelCompiler(models_dir, db_path, npc_directory)
|
|
568
|
-
results = compiler.run_all_models()
|
|
569
|
-
|
|
570
|
-
for model_name, df in results.items():
|
|
571
|
-
print(f"\nResults for {model_name}:")
|
|
572
|
-
print(df.head())
|
|
573
|
-
|
|
574
|
-
customer_feedback = """
|
|
575
|
-
SELECT
|
|
576
|
-
feedback,
|
|
577
|
-
customer_id,
|
|
578
|
-
timestamp
|
|
579
|
-
FROM raw_customer_feedback
|
|
580
|
-
WHERE LENGTH(feedback) > 10;
|
|
581
|
-
"""
|
|
582
|
-
|
|
583
|
-
customer_insights = """
|
|
584
|
-
SELECT
|
|
585
|
-
customer_id,
|
|
586
|
-
feedback,
|
|
587
|
-
timestamp,
|
|
588
|
-
synthesize(
|
|
589
|
-
"feedback text: {feedback}",
|
|
590
|
-
"analyst",
|
|
591
|
-
"feedback_analysis"
|
|
592
|
-
) as ai_analysis
|
|
593
|
-
FROM {{ ref('customer_feedback') }};
|
|
594
|
-
"""
|
|
595
|
-
|
|
596
|
-
models = {
|
|
597
|
-
"customer_feedback.sql": customer_feedback,
|
|
598
|
-
"customer_insights.sql": customer_insights,
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
for name, content in models.items():
|
|
602
|
-
path = os.path.join(models_dir, name)
|
|
603
|
-
with open(path, "w") as f:
|
|
604
|
-
f.write(content)
|
|
605
|
-
print(f"Created model: {name}")
|