npcpy 1.2.25__py3-none-any.whl → 1.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcpy/gen/response.py +1 -1
- npcpy/npc_compiler.py +69 -320
- npcpy/npc_sysenv.py +3 -3
- npcpy/sql/npcsql.py +604 -177
- {npcpy-1.2.25.dist-info → npcpy-1.2.27.dist-info}/METADATA +1 -1
- {npcpy-1.2.25.dist-info → npcpy-1.2.27.dist-info}/RECORD +9 -9
- {npcpy-1.2.25.dist-info → npcpy-1.2.27.dist-info}/WHEEL +0 -0
- {npcpy-1.2.25.dist-info → npcpy-1.2.27.dist-info}/licenses/LICENSE +0 -0
- {npcpy-1.2.25.dist-info → npcpy-1.2.27.dist-info}/top_level.txt +0 -0
npcpy/sql/npcsql.py
CHANGED
|
@@ -2,27 +2,200 @@ import pandas as pd
|
|
|
2
2
|
import re
|
|
3
3
|
import os
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Dict, List, Set, Union, Any
|
|
5
|
+
from typing import Dict, List, Set, Union, Any, Optional, Callable
|
|
6
6
|
from collections import defaultdict, deque
|
|
7
|
-
from sqlalchemy import create_engine, text, Engine
|
|
8
|
-
import inspect
|
|
9
|
-
|
|
7
|
+
from sqlalchemy import create_engine, text, Engine, inspect
|
|
8
|
+
import inspect as py_inspect
|
|
9
|
+
|
|
10
|
+
# --- Explicitly import llm_funcs as a module object ---
|
|
11
|
+
try:
|
|
12
|
+
import npcpy.llm_funcs as llm_funcs
|
|
13
|
+
except ImportError:
|
|
14
|
+
print("Warning: `npcpy.llm_funcs` not found. Providing mock AI functions for execution.")
|
|
15
|
+
class MockLlmFuncs:
|
|
16
|
+
def generate_text(self, prompt: str, npc=None, team=None, context="") -> Dict[str, str]:
|
|
17
|
+
print(f"MOCK AI: generate_text('{prompt}')")
|
|
18
|
+
return {"response": f"MOCK: Generated text for '{prompt}'"}
|
|
19
|
+
def analyze_sentiment(self, text: str, npc=None, team=None, context="") -> Dict[str, str]:
|
|
20
|
+
print(f"MOCK AI: analyze_sentiment('{text}')")
|
|
21
|
+
return {"response": f"MOCK: Positive sentiment for '{text}'"}
|
|
22
|
+
def summarize(self, text: str, npc=None, team=None, context="") -> Dict[str, str]:
|
|
23
|
+
print(f"MOCK AI: summarize('{text}')")
|
|
24
|
+
return {"response": f"MOCK: Summary of '{text}'"}
|
|
25
|
+
def translate(self, text: str, source_lang='auto', target_lang='en', npc=None, team=None, context="") -> Dict[str, str]:
|
|
26
|
+
print(f"MOCK AI: translate('{text}', '{source_lang}', '{target_lang}')")
|
|
27
|
+
return {"response": f"MOCK: Translated '{text}' from {source_lang} to {target_lang}"}
|
|
28
|
+
def extract_entities(self, text: str, npc=None, team=None, context="") -> Dict[str, str]:
|
|
29
|
+
print(f"MOCK AI: extract_entities('{text}')")
|
|
30
|
+
return {"response": f"MOCK: Entities from '{text}'"}
|
|
31
|
+
def generate_embedding(self, text: str, model='default', npc=None, team=None, context="") -> Dict[str, str]:
|
|
32
|
+
print(f"MOCK AI: generate_embedding('{text}', '{model}')")
|
|
33
|
+
return {"response": f"MOCK: Embedding for '{text}'"}
|
|
34
|
+
llm_funcs = MockLlmFuncs()
|
|
35
|
+
|
|
36
|
+
# Assuming these are available in the npcpy environment
|
|
10
37
|
from npcpy.memory.command_history import create_engine_from_path
|
|
38
|
+
try:
|
|
39
|
+
from npcpy.npc_compiler import Team
|
|
40
|
+
except ImportError:
|
|
41
|
+
print("Warning: `npcpy.npc_compiler.Team` not found. Providing mock Team class.")
|
|
42
|
+
class Team:
|
|
43
|
+
def __init__(self, team_path: str = "./npc_team/", npcs: Optional[List[Any]] = None):
|
|
44
|
+
print(f"MOCK NPC: Team initialized for path: {team_path}")
|
|
45
|
+
self.npcs = npcs if npcs is not None else []
|
|
46
|
+
def get_npc(self, npc_ref: str):
|
|
47
|
+
print(f"MOCK NPC: get_npc called for: {npc_ref}")
|
|
48
|
+
return {"name": npc_ref, "type": "mock_npc"}
|
|
49
|
+
|
|
11
50
|
|
|
51
|
+
# --- PANDAS BACKEND CONFIGURATION ---
|
|
12
52
|
try:
|
|
13
|
-
import modin.pandas as
|
|
53
|
+
import modin.pandas as pd_modin
|
|
14
54
|
import snowflake.snowpark.modin.plugin
|
|
15
|
-
|
|
55
|
+
pd = pd_modin
|
|
56
|
+
PANDAS_BACKEND = 'snowflake_modin'
|
|
16
57
|
except ImportError:
|
|
17
58
|
try:
|
|
18
|
-
import modin.pandas as
|
|
59
|
+
import modin.pandas as pd_modin
|
|
60
|
+
pd = pd_modin
|
|
19
61
|
PANDAS_BACKEND = 'modin'
|
|
20
62
|
except ImportError:
|
|
21
63
|
import pandas as pd
|
|
22
64
|
PANDAS_BACKEND = 'pandas'
|
|
65
|
+
# print(f"Using pandas backend: {PANDAS_BACKEND}") # Removed for cleaner output
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# --- AI Function Mappings ---
|
|
69
|
+
class DatabaseAIFunctionMapper:
|
|
70
|
+
@staticmethod
|
|
71
|
+
def get_snowflake_cortex_mapping() -> Dict[str, Dict[str, Any]]:
|
|
72
|
+
return {
|
|
73
|
+
'get_llm_response': {
|
|
74
|
+
'cortex_function': 'COMPLETE',
|
|
75
|
+
'transformer': lambda prompt, **kwargs: f"SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b', {prompt})"
|
|
76
|
+
},
|
|
77
|
+
'extract_facts': {
|
|
78
|
+
'cortex_function': 'COMPLETE',
|
|
79
|
+
'transformer': lambda text, **kwargs: f"SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b', CONCAT('Extract concise facts from this text. Return JSON with fact_list array. Text: ', {text}))"
|
|
80
|
+
},
|
|
81
|
+
'get_facts': {
|
|
82
|
+
'cortex_function': 'COMPLETE',
|
|
83
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
84
|
+
'Extract facts from this text. Return JSON with facts array containing statement, source_text, and type fields. Text: ' || {text})"""
|
|
85
|
+
},
|
|
86
|
+
'identify_groups': {
|
|
87
|
+
'cortex_function': 'COMPLETE',
|
|
88
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
89
|
+
'Identify main groups these facts could be organized into. Return JSON with groups array. Facts: ' || {text})"""
|
|
90
|
+
},
|
|
91
|
+
'assign_groups_to_fact': {
|
|
92
|
+
'cortex_function': 'COMPLETE',
|
|
93
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
94
|
+
'Assign this fact to relevant groups. Return JSON with groups array. Fact: ' || {text})"""
|
|
95
|
+
},
|
|
96
|
+
'generate_group_candidates': {
|
|
97
|
+
'cortex_function': 'COMPLETE',
|
|
98
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
99
|
+
'Generate specific conceptual groups for these items. Return JSON with groups array. Items: ' || {text})"""
|
|
100
|
+
},
|
|
101
|
+
'remove_idempotent_groups': {
|
|
102
|
+
'cortex_function': 'COMPLETE',
|
|
103
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
104
|
+
'Remove conceptually identical groups, favor specificity. Return JSON with distinct_groups array. Groups: ' || {text})"""
|
|
105
|
+
},
|
|
106
|
+
'zoom_in': {
|
|
107
|
+
'cortex_function': 'COMPLETE',
|
|
108
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
109
|
+
'Infer new implied facts from existing facts. Return JSON with implied_facts array. Facts: ' || {text})"""
|
|
110
|
+
},
|
|
111
|
+
'generate_groups': {
|
|
112
|
+
'cortex_function': 'COMPLETE',
|
|
113
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
114
|
+
'Generate conceptual groups for facts. Return JSON with groups array. Facts: ' || {text})"""
|
|
115
|
+
},
|
|
116
|
+
'remove_redundant_groups': {
|
|
117
|
+
'cortex_function': 'COMPLETE',
|
|
118
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
119
|
+
'Remove redundant groups, merge similar concepts. Return JSON with groups array. Groups: ' || {text})"""
|
|
120
|
+
},
|
|
121
|
+
'criticize': {
|
|
122
|
+
'cortex_function': 'COMPLETE',
|
|
123
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
124
|
+
'Provide critical analysis and constructive criticism. Input: ' || {text})"""
|
|
125
|
+
},
|
|
126
|
+
'synthesize': {
|
|
127
|
+
'cortex_function': 'COMPLETE',
|
|
128
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
129
|
+
'Synthesize information from multiple perspectives. Input: ' || {text})"""
|
|
130
|
+
},
|
|
131
|
+
'breathe': {
|
|
132
|
+
'cortex_function': 'COMPLETE',
|
|
133
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
134
|
+
'Condense conversation context into key extractions. Return JSON with high_level_objective, most_recent_task, accomplishments, failures. Conversation: ' || {text})"""
|
|
135
|
+
},
|
|
136
|
+
'abstract': {
|
|
137
|
+
'cortex_function': 'COMPLETE',
|
|
138
|
+
'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
|
|
139
|
+
'Create more abstract categories from groups. Return JSON with groups array. Groups: ' || {text})"""
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def get_databricks_ai_mapping() -> Dict[str, Dict[str, Any]]:
|
|
146
|
+
return {
|
|
147
|
+
'generate_text': {
|
|
148
|
+
'databricks_function': 'serving.predict',
|
|
149
|
+
'transformer': lambda prompt, model='databricks-dolly', **kwargs:
|
|
150
|
+
f"serving.predict('{model}', '{prompt}')"
|
|
151
|
+
},
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def get_bigquery_ai_mapping() -> Dict[str, Dict[str, Any]]:
|
|
156
|
+
return {
|
|
157
|
+
'generate_text': {
|
|
158
|
+
'bigquery_function': 'ML.GENERATE_TEXT',
|
|
159
|
+
'transformer': lambda prompt, model='text-bison', **kwargs:
|
|
160
|
+
f"ML.GENERATE_TEXT(MODEL `{model}`, '{prompt}')"
|
|
161
|
+
},
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
# --- Native Database AI Transformer (INCLUDED in the module) ---
|
|
165
|
+
class NativeDatabaseAITransformer:
|
|
166
|
+
def __init__(self, database_type: str):
|
|
167
|
+
self.database_type = database_type.lower()
|
|
168
|
+
self.function_mappings = self._get_database_mappings()
|
|
169
|
+
|
|
170
|
+
def _get_database_mappings(self) -> Dict[str, Dict[str, Any]]:
|
|
171
|
+
mappings = {
|
|
172
|
+
'snowflake': DatabaseAIFunctionMapper.get_snowflake_cortex_mapping(),
|
|
173
|
+
'databricks': DatabaseAIFunctionMapper.get_databricks_ai_mapping(),
|
|
174
|
+
'bigquery': DatabaseAIFunctionMapper.get_bigquery_ai_mapping()
|
|
175
|
+
}
|
|
176
|
+
return mappings.get(self.database_type, {})
|
|
177
|
+
|
|
178
|
+
def transform_ai_function(self, function_name: str, **kwargs) -> str:
|
|
179
|
+
mapping = self.function_mappings.get(function_name)
|
|
180
|
+
if not mapping:
|
|
181
|
+
raise ValueError(f"No native mapping found for function: {function_name} for database type {self.database_type}")
|
|
182
|
+
|
|
183
|
+
transformer: Callable[..., str] = mapping.get('transformer')
|
|
184
|
+
if not transformer:
|
|
185
|
+
raise ValueError(f"No transformer found for function: {function_name} for database type {self.database_type}")
|
|
186
|
+
|
|
187
|
+
if function_name == 'generate_text' and 'text' in kwargs:
|
|
188
|
+
kwargs['prompt'] = kwargs.pop('text')
|
|
189
|
+
|
|
190
|
+
return transformer(**kwargs)
|
|
23
191
|
|
|
192
|
+
# --- NPCSQL Operations ---
|
|
24
193
|
class NPCSQLOperations:
|
|
25
|
-
def __init__(
|
|
194
|
+
def __init__(
|
|
195
|
+
self,
|
|
196
|
+
npc_directory: str,
|
|
197
|
+
db_engine: Union[str, Engine] = "~/npcsh_history.db"
|
|
198
|
+
):
|
|
26
199
|
self.npc_directory = npc_directory
|
|
27
200
|
|
|
28
201
|
if isinstance(db_engine, str):
|
|
@@ -34,10 +207,11 @@ class NPCSQLOperations:
|
|
|
34
207
|
self.function_map = self._build_function_map()
|
|
35
208
|
|
|
36
209
|
def _get_team(self):
|
|
37
|
-
return
|
|
210
|
+
return (self.npc_loader
|
|
211
|
+
if hasattr(self.npc_loader, 'npcs')
|
|
212
|
+
else None)
|
|
38
213
|
|
|
39
214
|
def _build_function_map(self):
|
|
40
|
-
import npcpy.llm_funcs as llm_funcs
|
|
41
215
|
import types
|
|
42
216
|
|
|
43
217
|
function_map = {}
|
|
@@ -45,7 +219,8 @@ class NPCSQLOperations:
|
|
|
45
219
|
if name.startswith('_'):
|
|
46
220
|
continue
|
|
47
221
|
obj = getattr(llm_funcs, name)
|
|
48
|
-
if isinstance(obj, types.FunctionType)
|
|
222
|
+
if (isinstance(obj, types.FunctionType) or
|
|
223
|
+
(isinstance(obj, types.MethodType) and obj.__self__ is not None)):
|
|
49
224
|
function_map[name] = obj
|
|
50
225
|
|
|
51
226
|
return function_map
|
|
@@ -62,18 +237,27 @@ class NPCSQLOperations:
|
|
|
62
237
|
return npc
|
|
63
238
|
|
|
64
239
|
if ',' in npc_ref:
|
|
65
|
-
npc_names = [
|
|
66
|
-
|
|
240
|
+
npc_names = [
|
|
241
|
+
name.strip() for name in npc_ref.split(',')
|
|
242
|
+
]
|
|
243
|
+
npcs = [
|
|
244
|
+
self.npc_loader.get_npc(name)
|
|
245
|
+
for name in npc_names
|
|
246
|
+
]
|
|
67
247
|
npcs = [npc for npc in npcs if npc is not None]
|
|
68
248
|
|
|
69
249
|
if npcs:
|
|
70
|
-
from npcpy.npc_compiler import Team
|
|
71
250
|
temp_team = Team(npcs=npcs)
|
|
72
251
|
return temp_team
|
|
73
252
|
|
|
74
253
|
return None
|
|
75
254
|
|
|
76
|
-
def execute_ai_function(
|
|
255
|
+
def execute_ai_function(
|
|
256
|
+
self,
|
|
257
|
+
func_name: str,
|
|
258
|
+
df: pd.DataFrame,
|
|
259
|
+
**params
|
|
260
|
+
) -> pd.Series:
|
|
77
261
|
if func_name not in self.function_map:
|
|
78
262
|
raise ValueError(f"Unknown AI function: {func_name}")
|
|
79
263
|
|
|
@@ -86,43 +270,85 @@ class NPCSQLOperations:
|
|
|
86
270
|
if not resolved_team and hasattr(resolved_npc, 'team'):
|
|
87
271
|
resolved_team = resolved_npc.team
|
|
88
272
|
|
|
89
|
-
def
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
273
|
+
def apply_function_to_row(row):
|
|
274
|
+
query_template = params.get('query', '')
|
|
275
|
+
column_name = params.get('column', '')
|
|
276
|
+
|
|
277
|
+
column_value = str(row[column_name]) if column_name and column_name in row.index else column_name
|
|
278
|
+
|
|
279
|
+
if query_template:
|
|
280
|
+
row_data = {
|
|
281
|
+
col: str(row[col])
|
|
282
|
+
for col in df.columns
|
|
283
|
+
}
|
|
284
|
+
row_data['column_value'] = column_value
|
|
285
|
+
query = query_template.format(**row_data)
|
|
286
|
+
else:
|
|
287
|
+
query = column_value
|
|
288
|
+
|
|
289
|
+
sig = py_inspect.signature(func)
|
|
290
|
+
func_params = {
|
|
291
|
+
k: v for k, v in {
|
|
292
|
+
'prompt': query,
|
|
293
|
+
'text': query,
|
|
101
294
|
'npc': resolved_npc,
|
|
102
295
|
'team': resolved_team,
|
|
103
296
|
'context': params.get('context', '')
|
|
104
|
-
}.items() if k in sig.parameters
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
return f"Error: {str(e)}"
|
|
297
|
+
}.items() if k in sig.parameters
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
result = func(**func_params)
|
|
301
|
+
return (result.get("response", "")
|
|
302
|
+
if isinstance(result, dict)
|
|
303
|
+
else str(result))
|
|
112
304
|
|
|
113
|
-
return df.apply(
|
|
305
|
+
return df.apply(apply_function_to_row, axis=1)
|
|
114
306
|
|
|
115
307
|
|
|
308
|
+
# --- SQL Model Definition ---
|
|
116
309
|
class SQLModel:
|
|
117
|
-
def __init__(
|
|
310
|
+
def __init__(
|
|
311
|
+
self,
|
|
312
|
+
name: str,
|
|
313
|
+
content: str,
|
|
314
|
+
path: str,
|
|
315
|
+
npc_directory: str
|
|
316
|
+
):
|
|
118
317
|
self.name = name
|
|
119
318
|
self.content = content
|
|
120
319
|
self.path = path
|
|
121
320
|
self.npc_directory = npc_directory
|
|
321
|
+
|
|
322
|
+
config_match = re.search(
|
|
323
|
+
r'\{\{[\s]*config\((.*?)\)[\s]*\}\}',
|
|
324
|
+
content,
|
|
325
|
+
re.DOTALL
|
|
326
|
+
)
|
|
327
|
+
if config_match:
|
|
328
|
+
self.config = self._parse_config(config_match.group(1))
|
|
329
|
+
else:
|
|
330
|
+
self.config = {'materialized': 'table'}
|
|
122
331
|
|
|
123
332
|
self.dependencies = self._extract_dependencies()
|
|
124
333
|
self.has_ai_function = self._check_ai_functions()
|
|
334
|
+
|
|
335
|
+
# DEBUG print to confirm if AI functions are found
|
|
125
336
|
self.ai_functions = self._extract_ai_functions()
|
|
337
|
+
if self.ai_functions:
|
|
338
|
+
print(f"DEBUG SQLModel: Model '{self.name}' extracted AI functions: {list(self.ai_functions.keys())}")
|
|
339
|
+
else:
|
|
340
|
+
print(f"DEBUG SQLModel: Model '{self.name}' has no AI functions found by _extract_ai_functions.")
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _parse_config(self, config_str: str) -> Dict:
|
|
344
|
+
config = {}
|
|
345
|
+
for item in re.split(r',\s*(?=[a-zA-Z0-9_]+\s*=)', config_str):
|
|
346
|
+
if '=' in item:
|
|
347
|
+
key, value = item.split('=', 1)
|
|
348
|
+
key = key.strip()
|
|
349
|
+
value = value.strip().strip('"').strip("'")
|
|
350
|
+
config[key] = value
|
|
351
|
+
return config
|
|
126
352
|
|
|
127
353
|
def _extract_dependencies(self) -> Set[str]:
|
|
128
354
|
pattern = r"\{\{\s*ref\(['\"]([^'\"]+)['\"]\)\s*\}\}"
|
|
@@ -132,232 +358,429 @@ class SQLModel:
|
|
|
132
358
|
return "nql." in self.content
|
|
133
359
|
|
|
134
360
|
def _extract_ai_functions(self) -> Dict[str, Dict]:
|
|
135
|
-
|
|
361
|
+
"""Extract AI function calls from SQL content with improved robustness."""
|
|
136
362
|
import types
|
|
137
363
|
|
|
138
364
|
ai_functions = {}
|
|
139
|
-
pattern
|
|
140
|
-
|
|
365
|
+
# More robust pattern that handles nested parentheses better
|
|
366
|
+
# This captures: nql.function_name(args...)
|
|
367
|
+
pattern = r"nql\.(\w+)\s*\(((?:[^()]|\([^()]*\))*)\)"
|
|
368
|
+
|
|
369
|
+
matches = re.finditer(pattern, self.content, flags=re.DOTALL | re.IGNORECASE)
|
|
141
370
|
|
|
142
|
-
# Get available function names dynamically
|
|
143
371
|
available_functions = []
|
|
144
372
|
for name in dir(llm_funcs):
|
|
145
373
|
if name.startswith('_'):
|
|
146
374
|
continue
|
|
147
375
|
obj = getattr(llm_funcs, name)
|
|
148
|
-
if isinstance(obj, types.FunctionType)
|
|
149
|
-
|
|
150
|
-
|
|
376
|
+
if (isinstance(obj, types.FunctionType) or
|
|
377
|
+
(isinstance(obj, types.MethodType) and obj.__self__ is not None)):
|
|
378
|
+
available_functions.append(name.lower()) # Store as lowercase for comparison
|
|
379
|
+
|
|
151
380
|
for match in matches:
|
|
152
|
-
|
|
381
|
+
full_call_string = match.group(0).strip()
|
|
382
|
+
func_name = match.group(1).lower() # Convert to lowercase for lookup
|
|
383
|
+
|
|
153
384
|
if func_name in available_functions:
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
385
|
+
params_str = match.group(2)
|
|
386
|
+
|
|
387
|
+
# Simplified parameter extraction
|
|
388
|
+
params_list = []
|
|
389
|
+
balance = 0
|
|
390
|
+
in_quote = None
|
|
391
|
+
current_param_chars = []
|
|
392
|
+
|
|
393
|
+
for char in params_str:
|
|
394
|
+
if char in ("'", '"'):
|
|
395
|
+
if in_quote == char:
|
|
396
|
+
in_quote = None
|
|
397
|
+
elif in_quote is None:
|
|
398
|
+
in_quote = char
|
|
399
|
+
current_param_chars.append(char)
|
|
400
|
+
elif char == '(' and in_quote is None:
|
|
401
|
+
balance += 1
|
|
402
|
+
current_param_chars.append(char)
|
|
403
|
+
elif char == ')' and in_quote is None:
|
|
404
|
+
balance -= 1
|
|
405
|
+
current_param_chars.append(char)
|
|
406
|
+
elif char == ',' and balance == 0 and in_quote is None:
|
|
407
|
+
params_list.append("".join(current_param_chars).strip())
|
|
408
|
+
current_param_chars = []
|
|
409
|
+
else:
|
|
410
|
+
current_param_chars.append(char)
|
|
411
|
+
|
|
412
|
+
if current_param_chars:
|
|
413
|
+
params_list.append("".join(current_param_chars).strip())
|
|
414
|
+
|
|
415
|
+
params = [p.strip().strip("'\"") for p in params_list]
|
|
416
|
+
|
|
417
|
+
column_param = params[0] if len(params) > 0 else ""
|
|
418
|
+
npc_param = params[1] if len(params) > 1 else ""
|
|
419
|
+
query_param = params[2] if len(params) > 2 else ""
|
|
420
|
+
context_param = params[3] if len(params) > 3 else None
|
|
421
|
+
|
|
422
|
+
if npc_param.endswith(".npc"):
|
|
423
|
+
npc_param = npc_param[:-4]
|
|
424
|
+
if self.npc_directory and npc_param.startswith(self.npc_directory):
|
|
425
|
+
npc_param = npc_param[len(self.npc_directory):].strip('/')
|
|
162
426
|
|
|
163
427
|
ai_functions[func_name] = {
|
|
164
|
-
"column":
|
|
165
|
-
"npc":
|
|
166
|
-
"query":
|
|
167
|
-
"context":
|
|
428
|
+
"column": column_param,
|
|
429
|
+
"npc": npc_param,
|
|
430
|
+
"query": query_param,
|
|
431
|
+
"context": context_param,
|
|
432
|
+
"full_call_string": full_call_string,
|
|
433
|
+
"original_func_name": match.group(1) # Store original case
|
|
168
434
|
}
|
|
169
|
-
|
|
170
|
-
|
|
435
|
+
else:
|
|
436
|
+
print(f"DEBUG SQLModel: Function '{func_name}' not found in available LLM funcs ({available_functions}). Skipping this NQL call.")
|
|
171
437
|
|
|
438
|
+
return ai_functions
|
|
172
439
|
|
|
440
|
+
# --- Model Compiler ---
|
|
173
441
|
class ModelCompiler:
|
|
174
|
-
def __init__(
|
|
175
|
-
|
|
442
|
+
def __init__(
|
|
443
|
+
self,
|
|
444
|
+
models_dir: str,
|
|
445
|
+
target_engine: Union[str, Engine],
|
|
446
|
+
npc_directory: str = "./npc_team/",
|
|
447
|
+
external_engines: Optional[Dict[str, Engine]] = None,
|
|
448
|
+
target_schema: Optional[str] = None
|
|
449
|
+
):
|
|
176
450
|
self.models_dir = Path(os.path.expanduser(models_dir))
|
|
177
451
|
|
|
178
|
-
if isinstance(
|
|
179
|
-
self.
|
|
452
|
+
if isinstance(target_engine, str):
|
|
453
|
+
self.target_engine = create_engine_from_path(
|
|
454
|
+
target_engine
|
|
455
|
+
)
|
|
180
456
|
else:
|
|
181
|
-
self.
|
|
457
|
+
self.target_engine = target_engine
|
|
182
458
|
|
|
183
459
|
self.external_engines = external_engines or {}
|
|
460
|
+
self.target_schema = target_schema
|
|
184
461
|
self.models: Dict[str, SQLModel] = {}
|
|
185
|
-
self.npc_operations = NPCSQLOperations(
|
|
462
|
+
self.npc_operations = NPCSQLOperations(
|
|
463
|
+
npc_directory,
|
|
464
|
+
self.target_engine
|
|
465
|
+
)
|
|
186
466
|
self.npc_directory = npc_directory
|
|
187
467
|
|
|
188
|
-
from npcpy.npc_compiler import Team
|
|
189
468
|
try:
|
|
190
469
|
self.npc_team = Team(team_path=npc_directory)
|
|
191
470
|
self.npc_operations.npc_loader = self.npc_team
|
|
192
|
-
except:
|
|
471
|
+
except Exception as e:
|
|
193
472
|
self.npc_team = None
|
|
473
|
+
print(f"Warning: Could not load NPC team from {npc_directory}. AI functions relying on NPC context might fail: {e}")
|
|
194
474
|
|
|
195
475
|
def _get_engine(self, source_name: str) -> Engine:
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
return self.engine
|
|
199
|
-
return self.external_engines[source_name]
|
|
476
|
+
if source_name.lower() == 'local' or not self.external_engines:
|
|
477
|
+
return self.target_engine
|
|
200
478
|
|
|
479
|
+
for key, engine in self.external_engines.items():
|
|
480
|
+
if key.lower() == source_name.lower():
|
|
481
|
+
return engine
|
|
482
|
+
return self.target_engine
|
|
483
|
+
|
|
201
484
|
def _has_native_ai_functions(self, source_name: str) -> bool:
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
return source_name in ai_enabled
|
|
485
|
+
ai_enabled_dbs = {'snowflake', 'databricks', 'bigquery'}
|
|
486
|
+
return source_name.lower() in ai_enabled_dbs
|
|
205
487
|
|
|
206
488
|
def discover_models(self):
|
|
207
|
-
"""Discover SQL models in directory structure"""
|
|
208
489
|
self.models = {}
|
|
209
|
-
|
|
210
|
-
print(list(self.models_dir.glob("**/*.sql")))
|
|
490
|
+
sql_files = list(self.models_dir.glob("**/*.sql"))
|
|
211
491
|
|
|
212
|
-
for sql_file in
|
|
492
|
+
for sql_file in sql_files:
|
|
213
493
|
model_name = sql_file.stem
|
|
214
494
|
with open(sql_file, "r") as f:
|
|
215
495
|
content = f.read()
|
|
216
|
-
|
|
217
|
-
model_npc_dir = sql_file.parent
|
|
218
496
|
|
|
219
497
|
self.models[model_name] = SQLModel(
|
|
220
|
-
model_name,
|
|
498
|
+
model_name,
|
|
499
|
+
content,
|
|
500
|
+
str(sql_file),
|
|
501
|
+
str(sql_file.parent)
|
|
221
502
|
)
|
|
222
|
-
print(f"Discovered model: {model_name}")
|
|
223
|
-
print(sql_file, )
|
|
224
503
|
|
|
225
504
|
return self.models
|
|
226
505
|
|
|
227
506
|
def build_dag(self) -> Dict[str, Set[str]]:
|
|
228
|
-
"""Build dependency graph"""
|
|
229
507
|
dag = {}
|
|
230
508
|
for model_name, model in self.models.items():
|
|
231
509
|
dag[model_name] = model.dependencies
|
|
232
510
|
return dag
|
|
233
511
|
|
|
234
512
|
def topological_sort(self) -> List[str]:
|
|
235
|
-
"""Generate execution order using topological sort"""
|
|
236
513
|
dag = self.build_dag()
|
|
237
|
-
|
|
514
|
+
|
|
515
|
+
true_in_degree = {model_name: 0 for model_name in self.models.keys()}
|
|
516
|
+
adj_list = defaultdict(list)
|
|
238
517
|
|
|
239
|
-
for
|
|
240
|
-
for
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
518
|
+
for model_name, model in self.models.items():
|
|
519
|
+
for dependency in model.dependencies:
|
|
520
|
+
if dependency not in self.models:
|
|
521
|
+
raise ValueError(f"Dependency '{dependency}' of model '{model_name}' not found in discovered models.")
|
|
522
|
+
true_in_degree[model_name] += 1
|
|
523
|
+
adj_list[dependency].append(model_name)
|
|
244
524
|
|
|
245
|
-
queue = deque([
|
|
525
|
+
queue = deque([model_name for model_name in self.models.keys() if true_in_degree[model_name] == 0])
|
|
246
526
|
result = []
|
|
247
|
-
|
|
527
|
+
|
|
248
528
|
while queue:
|
|
249
|
-
|
|
250
|
-
result.append(
|
|
529
|
+
current_model = queue.popleft()
|
|
530
|
+
result.append(current_model)
|
|
251
531
|
|
|
252
|
-
for
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
queue.append(dependent)
|
|
532
|
+
for dependent_model in adj_list[current_model]:
|
|
533
|
+
true_in_degree[dependent_model] -= 1
|
|
534
|
+
if true_in_degree[dependent_model] == 0:
|
|
535
|
+
queue.append(dependent_model)
|
|
257
536
|
|
|
258
|
-
if len(result) != len(
|
|
259
|
-
raise ValueError("Circular dependency detected")
|
|
537
|
+
if len(result) != len(self.models):
|
|
538
|
+
raise ValueError("Circular dependency detected or some models not processed.")
|
|
260
539
|
|
|
261
540
|
return result
|
|
262
541
|
|
|
263
|
-
def _replace_model_references(self,
|
|
264
|
-
ref_pattern =
|
|
542
|
+
def _replace_model_references(self, sql_content: str) -> str:
|
|
543
|
+
ref_pattern = (
|
|
544
|
+
r"\{\{\s*ref\s*\(\s*['\"]([^'\"]+)['\"]\s*\)\s*\}\}"
|
|
545
|
+
)
|
|
265
546
|
|
|
266
547
|
def replace_ref(match):
|
|
267
548
|
model_name = match.group(1)
|
|
268
549
|
if model_name not in self.models:
|
|
269
|
-
raise ValueError(
|
|
550
|
+
raise ValueError(
|
|
551
|
+
f"Model '{model_name}' referenced by '{{{{ ref('{model_name}') }}}}' not found during compilation."
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
if self.target_schema:
|
|
555
|
+
return f"{self.target_schema}.{model_name}"
|
|
270
556
|
return model_name
|
|
271
557
|
|
|
272
|
-
replaced_sql = re.sub(ref_pattern, replace_ref,
|
|
558
|
+
replaced_sql = re.sub(ref_pattern, replace_ref, sql_content)
|
|
273
559
|
return replaced_sql
|
|
274
560
|
|
|
561
|
+
def _clean_sql_for_execution(self, sql_content: str) -> str:
|
|
562
|
+
config_pattern = r'\{\{[\s]*config\((.*?)\)[\s]*\}\}'
|
|
563
|
+
cleaned_sql = re.sub(config_pattern, '', sql_content, flags=re.DOTALL).strip()
|
|
564
|
+
cleaned_sql = re.sub(r"--.*?\n", "\n", cleaned_sql)
|
|
565
|
+
cleaned_sql = re.sub(r"/\*.*?\*/", "", cleaned_sql, flags=re.DOTALL)
|
|
566
|
+
cleaned_sql = re.sub(r"\s+", " ", cleaned_sql).strip()
|
|
567
|
+
return cleaned_sql
|
|
568
|
+
|
|
569
|
+
def _execute_standard_sql(
|
|
570
|
+
self,
|
|
571
|
+
sql_to_execute: str,
|
|
572
|
+
engine: Engine
|
|
573
|
+
) -> pd.DataFrame:
|
|
574
|
+
return pd.read_sql(sql_to_execute, engine)
|
|
575
|
+
|
|
576
|
+
def _execute_ai_model(self, cleaned_sql_content: str, model: SQLModel) -> pd.DataFrame:
|
|
577
|
+
processed_sql = self._replace_model_references(cleaned_sql_content)
|
|
578
|
+
|
|
579
|
+
db_type = self.target_engine.dialect.name.lower()
|
|
580
|
+
print(f"DEBUG: Determined DB dialect: '{db_type}'")
|
|
581
|
+
|
|
582
|
+
if self._has_native_ai_functions(db_type):
|
|
583
|
+
print(f"DEBUG: Native AI functions ARE supported for '{db_type}'. Attempting native translation.")
|
|
584
|
+
transformer = NativeDatabaseAITransformer(db_type)
|
|
585
|
+
sql_to_execute_with_native_ai = processed_sql
|
|
586
|
+
|
|
587
|
+
print("DEBUG: AI functions and NQL calls to replace (from model.ai_functions):")
|
|
588
|
+
if model.ai_functions:
|
|
589
|
+
for fn, params in model.ai_functions.items():
|
|
590
|
+
print(f" Function: {fn}, Full Call String: '{params.get('full_call_string')}'")
|
|
591
|
+
else:
|
|
592
|
+
print(" (None found in model.ai_functions to replace natively)")
|
|
593
|
+
|
|
594
|
+
# Replace NQL calls with native functions
|
|
595
|
+
for func_name, params in model.ai_functions.items():
|
|
596
|
+
original_nql_call = params.get('full_call_string')
|
|
597
|
+
if not original_nql_call:
|
|
598
|
+
print(f"WARNING: 'full_call_string' not found for NQL function '{func_name}'. Skipping native replacement attempt.")
|
|
599
|
+
continue
|
|
600
|
+
|
|
601
|
+
try:
|
|
602
|
+
column_ref = params.get('column', '')
|
|
603
|
+
|
|
604
|
+
transform_kwargs = {
|
|
605
|
+
'text': column_ref,
|
|
606
|
+
'prompt': column_ref,
|
|
607
|
+
'query': params.get('query', ''),
|
|
608
|
+
'context': params.get('context', ''),
|
|
609
|
+
'npc': params.get('npc', '')
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
native_func_call = transformer.transform_ai_function(
|
|
613
|
+
func_name,
|
|
614
|
+
**transform_kwargs
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
print(f"DEBUG: Replacing '{original_nql_call}' with '{native_func_call}'")
|
|
618
|
+
|
|
619
|
+
# NORMALIZE WHITESPACE in both the original call and the SQL
|
|
620
|
+
# This handles multiline NQL calls with varying indentation
|
|
621
|
+
normalized_original = re.sub(r'\s+', ' ', original_nql_call).strip()
|
|
622
|
+
normalized_sql = re.sub(r'\s+', ' ', sql_to_execute_with_native_ai).strip()
|
|
623
|
+
|
|
624
|
+
# Find the normalized pattern in the normalized SQL
|
|
625
|
+
if normalized_original in normalized_sql:
|
|
626
|
+
# Now do the replacement on the ORIGINAL (non-normalized) SQL
|
|
627
|
+
# by creating a flexible regex pattern
|
|
628
|
+
# Escape special regex chars but allow flexible whitespace
|
|
629
|
+
pattern_parts = [re.escape(part) for part in original_nql_call.split()]
|
|
630
|
+
flexible_pattern = r'\s*'.join(pattern_parts)
|
|
631
|
+
pattern = re.compile(flexible_pattern, re.IGNORECASE | re.DOTALL)
|
|
632
|
+
|
|
633
|
+
old_sql = sql_to_execute_with_native_ai
|
|
634
|
+
sql_to_execute_with_native_ai = pattern.sub(native_func_call, sql_to_execute_with_native_ai, count=1)
|
|
635
|
+
|
|
636
|
+
if old_sql != sql_to_execute_with_native_ai:
|
|
637
|
+
print(f"DEBUG: Successfully replaced with flexible whitespace pattern.")
|
|
638
|
+
else:
|
|
639
|
+
print(f"ERROR: Flexible pattern replacement failed for '{func_name}'.")
|
|
640
|
+
else:
|
|
641
|
+
print(f"ERROR: Could not find normalized NQL call in SQL for '{func_name}'.")
|
|
642
|
+
|
|
643
|
+
except ValueError as e:
|
|
644
|
+
print(f"WARNING: Native translation failed for '{func_name}': {e}. This AI function will NOT be natively translated.")
|
|
645
|
+
except Exception as e:
|
|
646
|
+
print(f"ERROR: An unexpected error occurred during native AI transformation for '{func_name}': {e}. This AI function will NOT be natively translated.") # Check for remaining NQL calls
|
|
647
|
+
if "nql." in sql_to_execute_with_native_ai.lower():
|
|
648
|
+
print(f"WARNING: Some NQL calls remain after native translation attempts. Replacing remaining NQL calls with NULLs.")
|
|
649
|
+
sql_to_execute_with_native_ai = self._replace_nql_calls_with_null(sql_to_execute_with_native_ai, model)
|
|
650
|
+
|
|
651
|
+
print(f"DEBUG: Final SQL for native/mixed AI execution:\n{sql_to_execute_with_native_ai}\n")
|
|
652
|
+
target_engine_for_native_ai = self.target_engine
|
|
653
|
+
return pd.read_sql(sql_to_execute_with_native_ai, target_engine_for_native_ai)
|
|
654
|
+
|
|
655
|
+
else: # Fallback path when native AI is not supported for the determined DB type
|
|
656
|
+
print(f"DEBUG: Native AI functions are NOT supported for '{db_type}'. Entering Python fallback path.")
|
|
657
|
+
sql_with_nql_as_null = self._replace_nql_calls_with_null(processed_sql, model)
|
|
658
|
+
|
|
659
|
+
print(f"DEBUG: SQL to execute in pure fallback (NQL as NULLs for DB):\n{sql_with_nql_as_null}\n")
|
|
660
|
+
|
|
661
|
+
target_engine_for_fallback = self.target_engine # Use target_engine directly
|
|
662
|
+
df = pd.read_sql(sql_with_nql_as_null, target_engine_for_fallback)
|
|
663
|
+
|
|
664
|
+
# Apply Python-driven AI functions on the DataFrame
|
|
665
|
+
for func_name, params in model.ai_functions.items():
|
|
666
|
+
try:
|
|
667
|
+
result_series = self.npc_operations.execute_ai_function(func_name, df, **params)
|
|
668
|
+
result_column_name = f"{func_name}_{params.get('column', 'result')}" # Use a more specific alias if possible
|
|
669
|
+
df[result_column_name] = result_series
|
|
670
|
+
print(f"DEBUG: Python-driven AI function '{func_name}' executed. Result in column '{result_column_name}'.")
|
|
671
|
+
except Exception as e:
|
|
672
|
+
print(f"ERROR: Executing Python-driven AI function '{func_name}': {e}. Assigning NULL.")
|
|
673
|
+
df[f"{func_name}_{params.get('column', 'result')}"] = None
|
|
674
|
+
|
|
675
|
+
return df
|
|
275
676
|
|
|
677
|
+
def _replace_nql_calls_with_null(self, sql_content: str, model: SQLModel) -> str:
|
|
678
|
+
"""
|
|
679
|
+
Replaces specific nql.func(...) as alias calls with NULL as alias.
|
|
680
|
+
This is used for the fallback path or to clean up any NQL calls missed by native translation.
|
|
681
|
+
"""
|
|
682
|
+
modified_sql = sql_content
|
|
683
|
+
for func_name, params in model.ai_functions.items():
|
|
684
|
+
original_nql_call = params.get('full_call_string')
|
|
685
|
+
if not original_nql_call:
|
|
686
|
+
print(f"WARNING: 'full_call_string' not found for NQL function '{func_name}'. Cannot replace with NULL.")
|
|
687
|
+
continue
|
|
276
688
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
689
|
+
# Extract alias from the original_nql_call string for NULL replacement
|
|
690
|
+
alias_match = re.search(r'\s+as\s+(\w+)(?:\W|$)', original_nql_call, re.IGNORECASE)
|
|
691
|
+
alias_name = alias_match.group(1) if alias_match else f"{func_name}_{params.get('column', 'result')}"
|
|
280
692
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
alias_name = match.group(1)
|
|
285
|
-
return f"NULL as {alias_name}"
|
|
286
|
-
|
|
287
|
-
cleaned_sql = re.sub(nql_pattern, replace_nql_func, sql, flags=re.DOTALL)
|
|
288
|
-
return cleaned_sql
|
|
693
|
+
# Create a robust pattern for the original NQL call to handle whitespace variability
|
|
694
|
+
escaped_original_call = re.escape(original_nql_call.strip())
|
|
695
|
+
pattern_to_sub = re.compile(r"\s*".join(escaped_original_call.split()), flags=re.IGNORECASE)
|
|
289
696
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
except Exception as e:
|
|
296
|
-
print(f"Failed to execute SQL: {sql}")
|
|
297
|
-
print(f"Error: {str(e)}")
|
|
298
|
-
raise
|
|
299
|
-
|
|
300
|
-
def _execute_ai_model(self, sql: str, model: SQLModel) -> pd.DataFrame:
|
|
301
|
-
"""Execute SQL with AI functions"""
|
|
302
|
-
source_pattern = r'FROM\s+(\w+)\.(\w+)'
|
|
303
|
-
matches = re.findall(source_pattern, sql)
|
|
304
|
-
|
|
305
|
-
if matches:
|
|
306
|
-
source_name, table_name = matches[0]
|
|
307
|
-
engine = self._get_engine(source_name)
|
|
308
|
-
|
|
309
|
-
if self._has_native_ai_functions(source_name):
|
|
310
|
-
return pd.read_sql(sql.replace(f"{source_name}.", ""), engine)
|
|
697
|
+
# Perform the replacement with NULL as alias
|
|
698
|
+
old_sql = modified_sql
|
|
699
|
+
modified_sql, count = pattern_to_sub.subn(f"NULL as {alias_name}", modified_sql)
|
|
700
|
+
if count == 0:
|
|
701
|
+
print(f"WARNING: NULL replacement failed for NQL call '{original_nql_call}' (no change to SQL). SQL still contains NQL call.")
|
|
311
702
|
else:
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
base_sql = self._extract_base_query(sql)
|
|
316
|
-
df = pd.read_sql(base_sql, self.engine)
|
|
317
|
-
|
|
318
|
-
for func_name, params in model.ai_functions.items():
|
|
319
|
-
result_series = self.npc_operations.execute_ai_function(
|
|
320
|
-
func_name, df, **params
|
|
321
|
-
)
|
|
322
|
-
df[f"{func_name}_result"] = result_series
|
|
323
|
-
|
|
324
|
-
return df
|
|
703
|
+
print(f"DEBUG: Replaced NQL call '{original_nql_call}' with 'NULL as {alias_name}'.")
|
|
704
|
+
|
|
705
|
+
return modified_sql
|
|
325
706
|
|
|
326
707
|
def execute_model(self, model_name: str) -> pd.DataFrame:
|
|
327
|
-
"""Execute a model and materialize it to the database"""
|
|
328
708
|
self.current_model = model_name
|
|
329
709
|
model = self.models[model_name]
|
|
330
710
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
else:
|
|
335
|
-
compiled_sql = self._replace_model_references(model.content)
|
|
336
|
-
df = self._execute_standard_sql(compiled_sql)
|
|
711
|
+
cleaned_sql_content = self._clean_sql_for_execution(model.content)
|
|
712
|
+
|
|
713
|
+
print(f"DEBUG: Cleaned SQL content for model '{model_name}':\n{cleaned_sql_content}\n")
|
|
337
714
|
|
|
338
|
-
|
|
339
|
-
|
|
715
|
+
if model.has_ai_function:
|
|
716
|
+
df = self._execute_ai_model(cleaned_sql_content, model)
|
|
717
|
+
else:
|
|
718
|
+
compiled_sql = self._replace_model_references(
|
|
719
|
+
cleaned_sql_content
|
|
720
|
+
)
|
|
721
|
+
print(f"DEBUG: Compiled standard SQL for model '{model_name}':\n{compiled_sql}\n")
|
|
722
|
+
df = self._execute_standard_sql(
|
|
723
|
+
compiled_sql,
|
|
724
|
+
self.target_engine
|
|
725
|
+
)
|
|
340
726
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
raise
|
|
727
|
+
self._materialize_to_db(model_name, df, model.config)
|
|
728
|
+
return df
|
|
344
729
|
|
|
345
|
-
def _materialize_to_db(
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
df
|
|
349
|
-
|
|
730
|
+
def _materialize_to_db(
|
|
731
|
+
self,
|
|
732
|
+
model_name: str,
|
|
733
|
+
df: pd.DataFrame,
|
|
734
|
+
config: Dict
|
|
735
|
+
):
|
|
736
|
+
materialization = config.get('materialized', 'table')
|
|
737
|
+
|
|
738
|
+
table_name = model_name
|
|
739
|
+
table_name_with_schema = (
|
|
740
|
+
f"{self.target_schema}.{table_name}"
|
|
741
|
+
if self.target_schema
|
|
742
|
+
else table_name
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
with self.target_engine.begin() as conn:
|
|
746
|
+
if self.target_schema:
|
|
747
|
+
inspector = inspect(conn)
|
|
748
|
+
if not inspector.has_schema(self.target_schema):
|
|
749
|
+
print(f"Creating schema '{self.target_schema}'...")
|
|
750
|
+
conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {self.target_schema}"))
|
|
751
|
+
print(f"Schema '{self.target_schema}' created (if it didn't exist).")
|
|
752
|
+
|
|
753
|
+
if materialization == 'view':
|
|
754
|
+
print(
|
|
755
|
+
f"Warning: Materialization '{materialization}' requested for model '{model_name}'. "
|
|
756
|
+
f"Pandas `to_sql` does not directly create SQL VIEWS from DataFrames. "
|
|
757
|
+
f"Materializing as TABLE instead. You may need to manually create the view."
|
|
758
|
+
)
|
|
759
|
+
df.to_sql(
|
|
760
|
+
table_name,
|
|
761
|
+
self.target_engine,
|
|
762
|
+
schema=self.target_schema,
|
|
763
|
+
index=False,
|
|
764
|
+
if_exists='replace'
|
|
765
|
+
)
|
|
766
|
+
print(f"Materialized model {model_name} as TABLE to {table_name_with_schema}")
|
|
767
|
+
else:
|
|
768
|
+
df.to_sql(
|
|
769
|
+
table_name,
|
|
770
|
+
self.target_engine,
|
|
771
|
+
schema=self.target_schema,
|
|
772
|
+
index=False,
|
|
773
|
+
if_exists='replace'
|
|
774
|
+
)
|
|
775
|
+
print(f"Materialized model {model_name} as TABLE to {table_name_with_schema}")
|
|
350
776
|
|
|
351
777
|
def _table_exists(self, table_name: str) -> bool:
|
|
352
|
-
with self.
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
except:
|
|
357
|
-
return False
|
|
778
|
+
with self.target_engine.connect() as conn:
|
|
779
|
+
inspector = inspect(conn)
|
|
780
|
+
return inspector.has_table(table_name, schema=self.target_schema) or \
|
|
781
|
+
inspector.has_view(table_name, schema=self.target_schema)
|
|
358
782
|
|
|
359
783
|
def run_all_models(self):
|
|
360
|
-
"""Execute all models in dependency order"""
|
|
361
784
|
self.discover_models()
|
|
362
785
|
execution_order = self.topological_sort()
|
|
363
786
|
|
|
@@ -370,8 +793,12 @@ class ModelCompiler:
|
|
|
370
793
|
model = self.models[model_name]
|
|
371
794
|
for dep in model.dependencies:
|
|
372
795
|
if not self._table_exists(dep):
|
|
373
|
-
|
|
796
|
+
if dep not in results:
|
|
797
|
+
raise ValueError(
|
|
798
|
+
f"Dependency '{dep}' for model '{model_name}' not found in database or already processed models. "
|
|
799
|
+
f"Please ensure all dependencies are resolved and run first."
|
|
800
|
+
)
|
|
374
801
|
|
|
375
802
|
results[model_name] = self.execute_model(model_name)
|
|
376
803
|
|
|
377
|
-
return results
|
|
804
|
+
return results
|