npcpy 1.1.28__py3-none-any.whl → 1.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. npcpy/data/audio.py +16 -38
  2. npcpy/data/image.py +29 -29
  3. npcpy/data/load.py +4 -3
  4. npcpy/data/text.py +28 -28
  5. npcpy/data/video.py +6 -6
  6. npcpy/data/web.py +49 -21
  7. npcpy/ft/__init__.py +0 -0
  8. npcpy/ft/diff.py +110 -0
  9. npcpy/ft/ge.py +115 -0
  10. npcpy/ft/memory_trainer.py +171 -0
  11. npcpy/ft/model_ensembler.py +357 -0
  12. npcpy/ft/rl.py +360 -0
  13. npcpy/ft/sft.py +248 -0
  14. npcpy/ft/usft.py +128 -0
  15. npcpy/gen/audio_gen.py +24 -0
  16. npcpy/gen/embeddings.py +13 -13
  17. npcpy/gen/image_gen.py +37 -15
  18. npcpy/gen/response.py +287 -111
  19. npcpy/gen/video_gen.py +10 -9
  20. npcpy/llm_funcs.py +447 -79
  21. npcpy/memory/command_history.py +201 -48
  22. npcpy/memory/kg_vis.py +74 -74
  23. npcpy/memory/knowledge_graph.py +482 -115
  24. npcpy/memory/memory_processor.py +81 -0
  25. npcpy/memory/search.py +70 -70
  26. npcpy/mix/debate.py +192 -3
  27. npcpy/npc_compiler.py +1541 -879
  28. npcpy/npc_sysenv.py +250 -78
  29. npcpy/serve.py +1036 -321
  30. npcpy/sql/ai_function_tools.py +257 -0
  31. npcpy/sql/database_ai_adapters.py +186 -0
  32. npcpy/sql/database_ai_functions.py +163 -0
  33. npcpy/sql/model_runner.py +19 -19
  34. npcpy/sql/npcsql.py +706 -507
  35. npcpy/sql/sql_model_compiler.py +156 -0
  36. npcpy/tools.py +20 -20
  37. npcpy/work/plan.py +8 -8
  38. npcpy/work/trigger.py +3 -3
  39. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/METADATA +169 -9
  40. npcpy-1.2.32.dist-info/RECORD +54 -0
  41. npcpy-1.1.28.dist-info/RECORD +0 -40
  42. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/WHEEL +0 -0
  43. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/licenses/LICENSE +0 -0
  44. {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/top_level.txt +0 -0
npcpy/sql/npcsql.py CHANGED
@@ -1,530 +1,789 @@
1
1
  import pandas as pd
2
- import yaml
3
- from typing import List, Dict, Any, Union
4
- from npcpy.npc_compiler import
5
-
6
-
7
-
8
- def execute_squish_command():
9
- return
10
-
11
-
12
- def execute_splat_command():
13
- return
14
-
15
-
2
+ import re
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Dict, List, Set, Union, Any, Optional, Callable
6
+ from collections import defaultdict, deque
7
+ from sqlalchemy import create_engine, text, Engine, inspect
8
+ import inspect as py_inspect
9
+
10
+ # --- Explicitly import llm_funcs as a module object ---
11
+ try:
12
+ import npcpy.llm_funcs as llm_funcs
13
+ except ImportError:
14
+ print("Warning: `npcpy.llm_funcs` not found. Providing mock AI functions for execution.")
15
+ class MockLlmFuncs:
16
+ def generate_text(self, prompt: str, npc=None, team=None, context="") -> Dict[str, str]:
17
+ print(f"MOCK AI: generate_text('{prompt}')")
18
+ return {"response": f"MOCK: Generated text for '{prompt}'"}
19
+ def analyze_sentiment(self, text: str, npc=None, team=None, context="") -> Dict[str, str]:
20
+ print(f"MOCK AI: analyze_sentiment('{text}')")
21
+ return {"response": f"MOCK: Positive sentiment for '{text}'"}
22
+ def summarize(self, text: str, npc=None, team=None, context="") -> Dict[str, str]:
23
+ print(f"MOCK AI: summarize('{text}')")
24
+ return {"response": f"MOCK: Summary of '{text}'"}
25
+ def translate(self, text: str, source_lang='auto', target_lang='en', npc=None, team=None, context="") -> Dict[str, str]:
26
+ print(f"MOCK AI: translate('{text}', '{source_lang}', '{target_lang}')")
27
+ return {"response": f"MOCK: Translated '{text}' from {source_lang} to {target_lang}"}
28
+ def extract_entities(self, text: str, npc=None, team=None, context="") -> Dict[str, str]:
29
+ print(f"MOCK AI: extract_entities('{text}')")
30
+ return {"response": f"MOCK: Entities from '{text}'"}
31
+ def generate_embedding(self, text: str, model='default', npc=None, team=None, context="") -> Dict[str, str]:
32
+ print(f"MOCK AI: generate_embedding('{text}', '{model}')")
33
+ return {"response": f"MOCK: Embedding for '{text}'"}
34
+ llm_funcs = MockLlmFuncs()
35
+
36
+ # Assuming these are available in the npcpy environment
37
+ from npcpy.memory.command_history import create_engine_from_path
38
+ try:
39
+ from npcpy.npc_compiler import Team
40
+ except ImportError:
41
+ print("Warning: `npcpy.npc_compiler.Team` not found. Providing mock Team class.")
42
+ class Team:
43
+ def __init__(self, team_path: str = "./npc_team/", npcs: Optional[List[Any]] = None):
44
+ print(f"MOCK NPC: Team initialized for path: {team_path}")
45
+ self.npcs = npcs if npcs is not None else []
46
+ def get_npc(self, npc_ref: str):
47
+ print(f"MOCK NPC: get_npc called for: {npc_ref}")
48
+ return {"name": npc_ref, "type": "mock_npc"}
49
+
50
+
51
+ # --- PANDAS BACKEND CONFIGURATION ---
52
+ try:
53
+ import modin.pandas as pd_modin
54
+ import snowflake.snowpark.modin.plugin
55
+ pd = pd_modin
56
+ PANDAS_BACKEND = 'snowflake_modin'
57
+ except ImportError:
58
+ try:
59
+ import modin.pandas as pd_modin
60
+ pd = pd_modin
61
+ PANDAS_BACKEND = 'modin'
62
+ except ImportError:
63
+ import pandas as pd
64
+ PANDAS_BACKEND = 'pandas'
65
+ # print(f"Using pandas backend: {PANDAS_BACKEND}") # Removed for cleaner output
66
+
67
+
68
+ # --- AI Function Mappings ---
69
+ class DatabaseAIFunctionMapper:
70
+ @staticmethod
71
+ def get_snowflake_cortex_mapping() -> Dict[str, Dict[str, Any]]:
72
+ return {
73
+ 'get_llm_response': {
74
+ 'cortex_function': 'COMPLETE',
75
+ 'transformer': lambda prompt, **kwargs: f"SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b', {prompt})"
76
+ },
77
+ 'extract_facts': {
78
+ 'cortex_function': 'COMPLETE',
79
+ 'transformer': lambda text, **kwargs: f"SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b', CONCAT('Extract concise facts from this text. Return JSON with fact_list array. Text: ', {text}))"
80
+ },
81
+ 'get_facts': {
82
+ 'cortex_function': 'COMPLETE',
83
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
84
+ 'Extract facts from this text. Return JSON with facts array containing statement, source_text, and type fields. Text: ' || {text})"""
85
+ },
86
+ 'identify_groups': {
87
+ 'cortex_function': 'COMPLETE',
88
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
89
+ 'Identify main groups these facts could be organized into. Return JSON with groups array. Facts: ' || {text})"""
90
+ },
91
+ 'assign_groups_to_fact': {
92
+ 'cortex_function': 'COMPLETE',
93
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
94
+ 'Assign this fact to relevant groups. Return JSON with groups array. Fact: ' || {text})"""
95
+ },
96
+ 'generate_group_candidates': {
97
+ 'cortex_function': 'COMPLETE',
98
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
99
+ 'Generate specific conceptual groups for these items. Return JSON with groups array. Items: ' || {text})"""
100
+ },
101
+ 'remove_idempotent_groups': {
102
+ 'cortex_function': 'COMPLETE',
103
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
104
+ 'Remove conceptually identical groups, favor specificity. Return JSON with distinct_groups array. Groups: ' || {text})"""
105
+ },
106
+ 'zoom_in': {
107
+ 'cortex_function': 'COMPLETE',
108
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
109
+ 'Infer new implied facts from existing facts. Return JSON with implied_facts array. Facts: ' || {text})"""
110
+ },
111
+ 'generate_groups': {
112
+ 'cortex_function': 'COMPLETE',
113
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
114
+ 'Generate conceptual groups for facts. Return JSON with groups array. Facts: ' || {text})"""
115
+ },
116
+ 'remove_redundant_groups': {
117
+ 'cortex_function': 'COMPLETE',
118
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
119
+ 'Remove redundant groups, merge similar concepts. Return JSON with groups array. Groups: ' || {text})"""
120
+ },
121
+ 'criticize': {
122
+ 'cortex_function': 'COMPLETE',
123
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
124
+ 'Provide critical analysis and constructive criticism. Input: ' || {text})"""
125
+ },
126
+ 'synthesize': {
127
+ 'cortex_function': 'COMPLETE',
128
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
129
+ 'Synthesize information from multiple perspectives. Input: ' || {text})"""
130
+ },
131
+ 'breathe': {
132
+ 'cortex_function': 'COMPLETE',
133
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
134
+ 'Condense conversation context into key extractions. Return JSON with high_level_objective, most_recent_task, accomplishments, failures. Conversation: ' || {text})"""
135
+ },
136
+ 'abstract': {
137
+ 'cortex_function': 'COMPLETE',
138
+ 'transformer': lambda text, **kwargs: f"""SNOWFLAKE.CORTEX.COMPLETE('llama3.1-8b',
139
+ 'Create more abstract categories from groups. Return JSON with groups array. Groups: ' || {text})"""
140
+ }
141
+ }
16
142
 
143
+
144
+ @staticmethod
145
+ def get_databricks_ai_mapping() -> Dict[str, Dict[str, Any]]:
146
+ return {
147
+ 'generate_text': {
148
+ 'databricks_function': 'serving.predict',
149
+ 'transformer': lambda prompt, model='databricks-dolly', **kwargs:
150
+ f"serving.predict('{model}', '{prompt}')"
151
+ },
152
+ }
153
+
154
+ @staticmethod
155
+ def get_bigquery_ai_mapping() -> Dict[str, Dict[str, Any]]:
156
+ return {
157
+ 'generate_text': {
158
+ 'bigquery_function': 'ML.GENERATE_TEXT',
159
+ 'transformer': lambda prompt, model='text-bison', **kwargs:
160
+ f"ML.GENERATE_TEXT(MODEL `{model}`, '{prompt}')"
161
+ },
162
+ }
17
163
 
164
+ # --- Native Database AI Transformer (INCLUDED in the module) ---
165
+ class NativeDatabaseAITransformer:
166
+ def __init__(self, database_type: str):
167
+ self.database_type = database_type.lower()
168
+ self.function_mappings = self._get_database_mappings()
169
+
170
+ def _get_database_mappings(self) -> Dict[str, Dict[str, Any]]:
171
+ mappings = {
172
+ 'snowflake': DatabaseAIFunctionMapper.get_snowflake_cortex_mapping(),
173
+ 'databricks': DatabaseAIFunctionMapper.get_databricks_ai_mapping(),
174
+ 'bigquery': DatabaseAIFunctionMapper.get_bigquery_ai_mapping()
175
+ }
176
+ return mappings.get(self.database_type, {})
177
+
178
+ def transform_ai_function(self, function_name: str, **kwargs) -> str:
179
+ mapping = self.function_mappings.get(function_name)
180
+ if not mapping:
181
+ raise ValueError(f"No native mapping found for function: {function_name} for database type {self.database_type}")
182
+
183
+ transformer: Callable[..., str] = mapping.get('transformer')
184
+ if not transformer:
185
+ raise ValueError(f"No transformer found for function: {function_name} for database type {self.database_type}")
186
+
187
+ if function_name == 'generate_text' and 'text' in kwargs:
188
+ kwargs['prompt'] = kwargs.pop('text')
189
+
190
+ return transformer(**kwargs)
191
+
192
+ # --- NPCSQL Operations ---
18
193
  class NPCSQLOperations:
19
- def __init__(self, npc_directory, db_path):
20
- super().__init__(npc_directory, db_path)
21
-
22
- def _get_context(
23
- self, df: pd.DataFrame, context: Union[str, Dict, List[str]]
24
- ) -> str:
25
- """Resolve context from different sources"""
26
- if isinstance(context, str):
27
- # Check if it's a column reference
28
- if context in df.columns:
29
- return df[context].to_string()
30
- # Assume it's static text
31
- return context
32
- elif isinstance(context, list):
33
- # List of column names to include
34
- return " ".join(df[col].to_string() for col in context if col in df.columns)
35
- elif isinstance(context, dict):
36
- # YAML-style context
37
- return yaml.dump(context)
38
- return ""
39
-
40
- # SINGLE PROMPT OPERATIONS
41
- def synthesize(
42
- self,
43
- query,
44
- df: pd.DataFrame,
45
- columns: List[str],
46
- npc: str,
47
- context: Union[str, Dict, List[str]],
48
- framework: str,
49
- ) -> pd.Series:
50
- context_text = self._get_context(df, context)
51
-
52
- def apply_synthesis(row):
53
- # we have f strings from the query, we want to fill those back in in the request
54
- request = query.format(**row[columns])
55
- prompt = f"""Framework: {framework}
56
- Context: {context_text}
57
- Text to synthesize: {request}
58
- Synthesize the above text."""
59
-
60
- result = self.execute_stage(
61
- {"step_name": "synthesize", "npc": npc, "task": prompt},
62
- {},
63
- self.jinja_env,
64
- )
65
-
66
- return result[0]["response"]
67
-
68
- # columns a list
69
- columns_str = "_".join(columns)
70
- df_out = df[columns].apply(apply_synthesis, axis=1)
71
- return df_out
72
-
73
- # MULTI-PROMPT/PARALLEL OPERATIONS
74
- def spread_and_sync(
75
- self,
76
- df: pd.DataFrame,
77
- column: str,
78
- npc: str,
79
- variations: List[str],
80
- sync_strategy: str,
81
- context: Union[str, Dict, List[str]],
82
- ) -> pd.Series:
83
- context_text = self._get_context(df, context)
84
-
85
- def apply_spread_sync(text):
86
- results = []
87
- for variation in variations:
88
- prompt = f"""Variation: {variation}
89
- Context: {context_text}
90
- Text to analyze: {text}
91
- Analyze the above text with {variation} perspective."""
92
-
93
- result = self.execute_stage(
94
- {"step_name": f"spread_{variation}", "npc": npc, "task": prompt},
95
- {},
96
- self.jinja_env,
97
- )
98
-
99
- results.append(result[0]["response"])
100
-
101
- # Sync results
102
- sync_result = self.aggregate_step_results(
103
- [{"response": r} for r in results], sync_strategy
104
- )
105
-
106
- return sync_result
107
-
108
- return df[column].apply(apply_spread_sync)
109
- # COMPARISON OPERATIONS
110
-
111
- def contrast(
112
- self,
113
- df: pd.DataFrame,
114
- col1: str,
115
- col2: str,
116
- npc: str,
117
- context: Union[str, Dict, List[str]],
118
- comparison_framework: str,
194
+ def __init__(
195
+ self,
196
+ npc_directory: str,
197
+ db_engine: Union[str, Engine] = "~/npcsh_history.db"
198
+ ):
199
+ self.npc_directory = npc_directory
200
+
201
+ if isinstance(db_engine, str):
202
+ self.engine = create_engine_from_path(db_engine)
203
+ else:
204
+ self.engine = db_engine
205
+
206
+ self.npc_loader = None
207
+ self.function_map = self._build_function_map()
208
+
209
+ def _get_team(self):
210
+ return (self.npc_loader
211
+ if hasattr(self.npc_loader, 'npcs')
212
+ else None)
213
+
214
+ def _build_function_map(self):
215
+ import types
216
+
217
+ function_map = {}
218
+ for name in dir(llm_funcs):
219
+ if name.startswith('_'):
220
+ continue
221
+ obj = getattr(llm_funcs, name)
222
+ if (isinstance(obj, types.FunctionType) or
223
+ (isinstance(obj, types.MethodType) and obj.__self__ is not None)):
224
+ function_map[name] = obj
225
+
226
+ return function_map
227
+
228
+ def _resolve_npc_reference(self, npc_ref: str):
229
+ if not npc_ref or not self.npc_loader:
230
+ return None
231
+
232
+ if npc_ref.endswith('.npc'):
233
+ npc_ref = npc_ref[:-4]
234
+
235
+ npc = self.npc_loader.get_npc(npc_ref)
236
+ if npc:
237
+ return npc
238
+
239
+ if ',' in npc_ref:
240
+ npc_names = [
241
+ name.strip() for name in npc_ref.split(',')
242
+ ]
243
+ npcs = [
244
+ self.npc_loader.get_npc(name)
245
+ for name in npc_names
246
+ ]
247
+ npcs = [npc for npc in npcs if npc is not None]
248
+
249
+ if npcs:
250
+ temp_team = Team(npcs=npcs)
251
+ return temp_team
252
+
253
+ return None
254
+
255
+ def execute_ai_function(
256
+ self,
257
+ func_name: str,
258
+ df: pd.DataFrame,
259
+ **params
119
260
  ) -> pd.Series:
120
- context_text = self._get_context(df, context)
121
-
122
- def apply_contrast(row):
123
- prompt = f"""Framework: {comparison_framework}
124
- Context: {context_text}
125
- Text 1: {row[col1]}
126
- Text 2: {row[col2]}
127
- Compare and contrast the above texts."""
128
-
129
- result = self.execute_stage(
130
- {"step_name": "contrast", "npc": npc, "task": prompt},
131
- {},
132
- self.jinja_env,
133
- )
134
-
135
- return result[0]["response"]
136
-
137
- return df.apply(apply_contrast, axis=1)
138
-
139
- def sql_operations(self, sql: str) -> pd.DataFrame:
140
- # Execute the SQL query
141
-
142
- """
143
- 1. delegate(COLUMN, npc, query, context, jinxs, reviewers)
144
- 2. dilate(COLUMN, npc, query, context, scope, reviewers)
145
- 3. erode(COLUMN, npc, query, context, scope, reviewers)
146
- 4. strategize(COLUMN, npc, query, context, timeline, constraints)
147
- 5. validate(COLUMN, npc, query, context, criteria)
148
- 6. synthesize(COLUMN, npc, query, context, framework)
149
- 7. decompose(COLUMN, npc, query, context, granularity)
150
- 8. criticize(COLUMN, npc, query, context, framework)
151
- 9. summarize(COLUMN, npc, query, context, style)
152
- 10. advocate(COLUMN, npc, query, context, perspective)
153
-
154
- MULTI-PROMPT/PARALLEL OPERATIONS
155
- 11. spread_and_sync(COLUMN, npc, query, variations, sync_strategy, context)
156
- 12. bootstrap(COLUMN, npc, query, sample_params, sync_strategy, context)
157
- 13. resample(COLUMN, npc, query, variation_strategy, sync_strategy, context)
158
-
159
- COMPARISON OPERATIONS
160
- 14. mediate(COL1, COL2, npc, query, context, resolution_strategy)
161
- 15. contrast(COL1, COL2, npc, query, context, comparison_framework)
162
- 16. reconcile(COL1, COL2, npc, query, context, alignment_strategy)
163
-
164
- MULTI-COLUMN INTEGRATION
165
- 17. integrate(COLS[], npc, query, context, integration_method)
166
- 18. harmonize(COLS[], npc, query, context, harmony_rules)
167
- 19. orchestrate(COLS[], npc, query, context, workflow)
168
- """
169
-
170
- # Example usage in SQL-like syntax:
171
- """
172
- def execute_sql(self, sql: str) -> pd.DataFrame:
173
- # This would be implemented to parse and execute SQL with our custom functions
174
- # Example SQL:
175
- '''
176
- SELECT
177
- customer_id,
178
- synthesize(feedback_text,
179
- npc='analyst',
180
- context=customer_segment,
181
- framework='satisfaction') as analysis,
182
- spread_and_sync(price_sensitivity,
183
- npc='pricing_agent',
184
- variations=['conservative', 'aggressive'],
185
- sync_strategy='balanced_analysis',
186
- context=market_context) as price_strategy
187
- FROM customer_data
188
- '''
189
- pass
190
- """
191
-
192
-
193
- class NPCDBTAdapter:
194
- def __init__(self, npc_sql: NPCSQLOperations):
195
- self.npc_sql = npc_sql
196
- self.models = {}
197
-
198
- def ref(self, model_name: str) -> pd.DataFrame:
199
- # Implementation for model referencing
200
- return self.models.get(model_name)
201
-
202
- def parse_model(self, model_sql: str) -> pd.DataFrame:
203
- # Parse the SQL model and execute with our custom functions
204
- pass
205
-
206
-
207
- class AIFunctionParser:
208
- """Handles parsing and extraction of AI function calls from SQL"""
209
-
210
- @staticmethod
211
- def extract_function_params(sql: str) -> Dict[str, Dict]:
212
- """Extract AI function parameters from SQL"""
213
- ai_functions = {}
214
-
215
- pattern = r"(\w+)\s*\(((?:[^()]*|\([^()]*\))*)\)"
216
- matches = re.finditer(pattern, sql)
217
-
218
- for match in matches:
219
- func_name = match.group(1)
220
- if func_name in ["synthesize", "spread_and_sync"]:
221
- params = match.group(2).split(",")
222
- ai_functions[func_name] = {
223
- "query": params[0].strip().strip("\"'"),
224
- "npc": params[1].strip().strip("\"'"),
225
- "context": params[2].strip().strip("\"'"),
261
+ if func_name not in self.function_map:
262
+ raise ValueError(f"Unknown AI function: {func_name}")
263
+
264
+ func = self.function_map[func_name]
265
+
266
+ npc_ref = params.get('npc', '')
267
+ resolved_npc = self._resolve_npc_reference(npc_ref)
268
+
269
+ resolved_team = self._get_team()
270
+ if not resolved_team and hasattr(resolved_npc, 'team'):
271
+ resolved_team = resolved_npc.team
272
+
273
+ def apply_function_to_row(row):
274
+ query_template = params.get('query', '')
275
+ column_name = params.get('column', '')
276
+
277
+ column_value = str(row[column_name]) if column_name and column_name in row.index else column_name
278
+
279
+ if query_template:
280
+ row_data = {
281
+ col: str(row[col])
282
+ for col in df.columns
226
283
  }
227
-
228
- return ai_functions
229
-
230
-
284
+ row_data['column_value'] = column_value
285
+ query = query_template.format(**row_data)
286
+ else:
287
+ query = column_value
288
+
289
+ sig = py_inspect.signature(func)
290
+ func_params = {
291
+ k: v for k, v in {
292
+ 'prompt': query,
293
+ 'text': query,
294
+ 'npc': resolved_npc,
295
+ 'team': resolved_team,
296
+ 'context': params.get('context', '')
297
+ }.items() if k in sig.parameters
298
+ }
299
+
300
+ result = func(**func_params)
301
+ return (result.get("response", "")
302
+ if isinstance(result, dict)
303
+ else str(result))
304
+
305
+ return df.apply(apply_function_to_row, axis=1)
306
+
307
+
308
+ # --- SQL Model Definition ---
231
309
  class SQLModel:
232
- def __init__(self, name: str, content: str, path: str, npc_directory: str):
310
+ def __init__(
311
+ self,
312
+ name: str,
313
+ content: str,
314
+ path: str,
315
+ npc_directory: str
316
+ ):
233
317
  self.name = name
234
318
  self.content = content
235
319
  self.path = path
236
- self.npc_directory = npc_directory # This sets the npc_directory attribute
320
+ self.npc_directory = npc_directory
321
+
322
+ config_match = re.search(
323
+ r'\{\{[\s]*config\((.*?)\)[\s]*\}\}',
324
+ content,
325
+ re.DOTALL
326
+ )
327
+ if config_match:
328
+ self.config = self._parse_config(config_match.group(1))
329
+ else:
330
+ self.config = {'materialized': 'table'}
237
331
 
238
332
  self.dependencies = self._extract_dependencies()
239
333
  self.has_ai_function = self._check_ai_functions()
334
+
335
+ # DEBUG print to confirm if AI functions are found
240
336
  self.ai_functions = self._extract_ai_functions()
241
- print(f"Initializing SQLModel with NPC directory: {npc_directory}")
337
+ if self.ai_functions:
338
+ print(f"DEBUG SQLModel: Model '{self.name}' extracted AI functions: {list(self.ai_functions.keys())}")
339
+ else:
340
+ print(f"DEBUG SQLModel: Model '{self.name}' has no AI functions found by _extract_ai_functions.")
341
+
342
+
343
+ def _parse_config(self, config_str: str) -> Dict:
344
+ config = {}
345
+ for item in re.split(r',\s*(?=[a-zA-Z0-9_]+\s*=)', config_str):
346
+ if '=' in item:
347
+ key, value = item.split('=', 1)
348
+ key = key.strip()
349
+ value = value.strip().strip('"').strip("'")
350
+ config[key] = value
351
+ return config
242
352
 
243
353
  def _extract_dependencies(self) -> Set[str]:
244
- """Extract model dependencies using ref() calls"""
245
354
  pattern = r"\{\{\s*ref\(['\"]([^'\"]+)['\"]\)\s*\}\}"
246
355
  return set(re.findall(pattern, self.content))
247
-
356
+
248
357
  def _check_ai_functions(self) -> bool:
249
- """Check if the model contains AI function calls"""
250
- ai_functions = [
251
- "synthesize",
252
- "spread_and_sync",
253
- "delegate",
254
- "dilate",
255
- "erode",
256
- "strategize",
257
- "validate",
258
- "decompose",
259
- "criticize",
260
- "summarize",
261
- "advocate",
262
- "bootstrap",
263
- "resample",
264
- "mediate",
265
- "contrast",
266
- "reconcile",
267
- "integrate",
268
- "harmonize",
269
- "orchestrate",
270
- ]
271
- return any(func in self.content for func in ai_functions)
358
+ return "nql." in self.content
272
359
 
273
360
  def _extract_ai_functions(self) -> Dict[str, Dict]:
274
- """Extract all AI functions and their parameters from the SQL content."""
361
+ """Extract AI function calls from SQL content with improved robustness."""
362
+ import types
363
+
275
364
  ai_functions = {}
276
- pattern = r"(\w+)\s*\(((?:[^()]*|\([^()]*\))*)\)"
277
- matches = re.finditer(pattern, self.content)
278
-
365
+ # More robust pattern that handles nested parentheses better
366
+ # This captures: nql.function_name(args...)
367
+ pattern = r"nql\.(\w+)\s*\(((?:[^()]|\([^()]*\))*)\)"
368
+
369
+ matches = re.finditer(pattern, self.content, flags=re.DOTALL | re.IGNORECASE)
370
+
371
+ available_functions = []
372
+ for name in dir(llm_funcs):
373
+ if name.startswith('_'):
374
+ continue
375
+ obj = getattr(llm_funcs, name)
376
+ if (isinstance(obj, types.FunctionType) or
377
+ (isinstance(obj, types.MethodType) and obj.__self__ is not None)):
378
+ available_functions.append(name.lower()) # Store as lowercase for comparison
379
+
279
380
  for match in matches:
280
- func_name = match.group(1)
281
- if func_name in [
282
- "synthesize",
283
- "spread_and_sync",
284
- "delegate",
285
- "dilate",
286
- "erode",
287
- "strategize",
288
- "validate",
289
- "decompose",
290
- "criticize",
291
- "summarize",
292
- "advocate",
293
- "bootstrap",
294
- "resample",
295
- "mediate",
296
- "contrast",
297
- "reconcile",
298
- "integrate",
299
- "harmonize",
300
- "orchestrate",
301
- ]:
302
- params = [
303
- param.strip().strip("\"'") for param in match.group(2).split(",")
304
- ]
305
- npc = params[1]
306
- if not npc.endswith(".npc"):
307
- npc = npc.replace(".npc", "")
308
- if self.npc_directory in npc:
309
- npc = npc.replace(self.npc_directory, "")
310
-
311
- # print(npc)
381
+ full_call_string = match.group(0).strip()
382
+ func_name = match.group(1).lower() # Convert to lowercase for lookup
383
+
384
+ if func_name in available_functions:
385
+ params_str = match.group(2)
386
+
387
+ # Simplified parameter extraction
388
+ params_list = []
389
+ balance = 0
390
+ in_quote = None
391
+ current_param_chars = []
392
+
393
+ for char in params_str:
394
+ if char in ("'", '"'):
395
+ if in_quote == char:
396
+ in_quote = None
397
+ elif in_quote is None:
398
+ in_quote = char
399
+ current_param_chars.append(char)
400
+ elif char == '(' and in_quote is None:
401
+ balance += 1
402
+ current_param_chars.append(char)
403
+ elif char == ')' and in_quote is None:
404
+ balance -= 1
405
+ current_param_chars.append(char)
406
+ elif char == ',' and balance == 0 and in_quote is None:
407
+ params_list.append("".join(current_param_chars).strip())
408
+ current_param_chars = []
409
+ else:
410
+ current_param_chars.append(char)
411
+
412
+ if current_param_chars:
413
+ params_list.append("".join(current_param_chars).strip())
414
+
415
+ params = [p.strip().strip("'\"") for p in params_list]
416
+
417
+ column_param = params[0] if len(params) > 0 else ""
418
+ npc_param = params[1] if len(params) > 1 else ""
419
+ query_param = params[2] if len(params) > 2 else ""
420
+ context_param = params[3] if len(params) > 3 else None
421
+
422
+ if npc_param.endswith(".npc"):
423
+ npc_param = npc_param[:-4]
424
+ if self.npc_directory and npc_param.startswith(self.npc_directory):
425
+ npc_param = npc_param[len(self.npc_directory):].strip('/')
426
+
312
427
  ai_functions[func_name] = {
313
- "column": params[0],
314
- "npc": npc,
315
- "query": params[2],
316
- "context": params[3] if len(params) > 3 else None,
428
+ "column": column_param,
429
+ "npc": npc_param,
430
+ "query": query_param,
431
+ "context": context_param,
432
+ "full_call_string": full_call_string,
433
+ "original_func_name": match.group(1) # Store original case
317
434
  }
318
- return ai_functions
435
+ else:
436
+ print(f"DEBUG SQLModel: Function '{func_name}' not found in available LLM funcs ({available_functions}). Skipping this NQL call.")
319
437
 
438
+ return ai_functions
320
439
 
440
+ # --- Model Compiler ---
321
441
  class ModelCompiler:
322
- def __init__(self, models_dir: str, db_path: str, npc_directory: str):
323
- self.models_dir = Path(models_dir)
324
- self.db_path = db_path
442
+ def __init__(
443
+ self,
444
+ models_dir: str,
445
+ target_engine: Union[str, Engine],
446
+ npc_directory: str = "./npc_team/",
447
+ external_engines: Optional[Dict[str, Engine]] = None,
448
+ target_schema: Optional[str] = None
449
+ ):
450
+ self.models_dir = Path(os.path.expanduser(models_dir))
451
+
452
+ if isinstance(target_engine, str):
453
+ self.target_engine = create_engine_from_path(
454
+ target_engine
455
+ )
456
+ else:
457
+ self.target_engine = target_engine
458
+
459
+ self.external_engines = external_engines or {}
460
+ self.target_schema = target_schema
325
461
  self.models: Dict[str, SQLModel] = {}
326
- self.npc_operations = NPCSQLOperations(npc_directory, db_path)
462
+ self.npc_operations = NPCSQLOperations(
463
+ npc_directory,
464
+ self.target_engine
465
+ )
327
466
  self.npc_directory = npc_directory
467
+
468
+ try:
469
+ self.npc_team = Team(team_path=npc_directory)
470
+ self.npc_operations.npc_loader = self.npc_team
471
+ except Exception as e:
472
+ self.npc_team = None
473
+ print(f"Warning: Could not load NPC team from {npc_directory}. AI functions relying on NPC context might fail: {e}")
474
+
475
+ def _get_engine(self, source_name: str) -> Engine:
476
+ if source_name.lower() == 'local' or not self.external_engines:
477
+ return self.target_engine
478
+
479
+ for key, engine in self.external_engines.items():
480
+ if key.lower() == source_name.lower():
481
+ return engine
482
+ return self.target_engine
483
+
484
+ def _has_native_ai_functions(self, source_name: str) -> bool:
485
+ ai_enabled_dbs = {'snowflake', 'databricks', 'bigquery'}
486
+ return source_name.lower() in ai_enabled_dbs
328
487
 
329
488
  def discover_models(self):
330
- """Discover all SQL models in the models directory"""
331
489
  self.models = {}
332
- for sql_file in self.models_dir.glob("**/*.sql"):
490
+ sql_files = list(self.models_dir.glob("**/*.sql"))
491
+
492
+ for sql_file in sql_files:
333
493
  model_name = sql_file.stem
334
494
  with open(sql_file, "r") as f:
335
495
  content = f.read()
496
+
336
497
  self.models[model_name] = SQLModel(
337
- model_name, content, str(sql_file), self.npc_directory
498
+ model_name,
499
+ content,
500
+ str(sql_file),
501
+ str(sql_file.parent)
338
502
  )
339
- print(f"Discovered model: {model_name}")
503
+
340
504
  return self.models
341
505
 
342
506
  def build_dag(self) -> Dict[str, Set[str]]:
343
- """Build dependency graph"""
344
507
  dag = {}
345
508
  for model_name, model in self.models.items():
346
509
  dag[model_name] = model.dependencies
347
- print(f"Built DAG: {dag}")
348
510
  return dag
349
511
 
350
512
  def topological_sort(self) -> List[str]:
351
- """Generate execution order using topological sort"""
352
513
  dag = self.build_dag()
353
- in_degree = defaultdict(int)
514
+
515
+ true_in_degree = {model_name: 0 for model_name in self.models.keys()}
516
+ adj_list = defaultdict(list)
354
517
 
355
- for node, deps in dag.items():
356
- for dep in deps:
357
- in_degree[dep] += 1
358
- if dep not in dag:
359
- dag[dep] = set()
518
+ for model_name, model in self.models.items():
519
+ for dependency in model.dependencies:
520
+ if dependency not in self.models:
521
+ raise ValueError(f"Dependency '{dependency}' of model '{model_name}' not found in discovered models.")
522
+ true_in_degree[model_name] += 1
523
+ adj_list[dependency].append(model_name)
360
524
 
361
- queue = deque([node for node in dag.keys() if len(dag[node]) == 0])
525
+ queue = deque([model_name for model_name in self.models.keys() if true_in_degree[model_name] == 0])
362
526
  result = []
363
-
527
+
364
528
  while queue:
365
- node = queue.popleft()
366
- result.append(node)
529
+ current_model = queue.popleft()
530
+ result.append(current_model)
367
531
 
368
- for dependent, deps in dag.items():
369
- if node in deps:
370
- deps.remove(node)
371
- if len(deps) == 0:
372
- queue.append(dependent)
532
+ for dependent_model in adj_list[current_model]:
533
+ true_in_degree[dependent_model] -= 1
534
+ if true_in_degree[dependent_model] == 0:
535
+ queue.append(dependent_model)
373
536
 
374
- if len(result) != len(dag):
375
- raise ValueError("Circular dependency detected")
537
+ if len(result) != len(self.models):
538
+ raise ValueError("Circular dependency detected or some models not processed.")
376
539
 
377
- print(f"Execution order: {result}")
378
540
  return result
379
541
 
380
- def _replace_model_references(self, sql: str) -> str:
381
- ref_pattern = r"\{\{\s*ref\s*\(\s*['\"]([^'\"]+)['\"]\s*\)\s*\}\}"
542
+ def _replace_model_references(self, sql_content: str) -> str:
543
+ ref_pattern = (
544
+ r"\{\{\s*ref\s*\(\s*['\"]([^'\"]+)['\"]\s*\)\s*\}\}"
545
+ )
382
546
 
383
547
  def replace_ref(match):
384
548
  model_name = match.group(1)
385
549
  if model_name not in self.models:
386
550
  raise ValueError(
387
- f"Model '{model_name}' not found during ref replacement."
551
+ f"Model '{model_name}' referenced by '{{{{ ref('{model_name}') }}}}' not found during compilation."
388
552
  )
553
+
554
+ if self.target_schema:
555
+ return f"{self.target_schema}.{model_name}"
389
556
  return model_name
390
557
 
391
- replaced_sql = re.sub(ref_pattern, replace_ref, sql)
558
+ replaced_sql = re.sub(ref_pattern, replace_ref, sql_content)
392
559
  return replaced_sql
393
560
 
394
- def compile_model(self, model_name: str) -> str:
395
- """Compile a single model, resolving refs."""
396
- model = self.models[model_name]
397
- compiled_sql = model.content
398
- compiled_sql = self._replace_model_references(compiled_sql)
399
- print(f"Compiled SQL for {model_name}:\n{compiled_sql}")
400
- return compiled_sql
401
-
402
- def _extract_base_query(self, sql: str) -> str:
403
- for dep in self.models[self.current_model].dependencies:
404
- sql = sql.replace(f"{{{{ ref('{dep}') }}}}", dep)
405
-
406
- parts = sql.split("FROM", 1)
407
- if len(parts) != 2:
408
- raise ValueError("Invalid SQL syntax")
409
-
410
- select_part = parts[0].replace("SELECT", "").strip()
411
- from_part = "FROM" + parts[1]
561
+ def _clean_sql_for_execution(self, sql_content: str) -> str:
562
+ config_pattern = r'\{\{[\s]*config\((.*?)\)[\s]*\}\}'
563
+ cleaned_sql = re.sub(config_pattern, '', sql_content, flags=re.DOTALL).strip()
564
+ cleaned_sql = re.sub(r"--.*?\n", "\n", cleaned_sql)
565
+ cleaned_sql = re.sub(r"/\*.*?\*/", "", cleaned_sql, flags=re.DOTALL)
566
+ cleaned_sql = re.sub(r"\s+", " ", cleaned_sql).strip()
567
+ return cleaned_sql
568
+
569
+ def _execute_standard_sql(
570
+ self,
571
+ sql_to_execute: str,
572
+ engine: Engine
573
+ ) -> pd.DataFrame:
574
+ return pd.read_sql(sql_to_execute, engine)
575
+
576
+ def _execute_ai_model(self, cleaned_sql_content: str, model: SQLModel) -> pd.DataFrame:
577
+ processed_sql = self._replace_model_references(cleaned_sql_content)
578
+
579
+ db_type = self.target_engine.dialect.name.lower()
580
+ print(f"DEBUG: Determined DB dialect: '{db_type}'")
581
+
582
+ if self._has_native_ai_functions(db_type):
583
+ print(f"DEBUG: Native AI functions ARE supported for '{db_type}'. Attempting native translation.")
584
+ transformer = NativeDatabaseAITransformer(db_type)
585
+ sql_to_execute_with_native_ai = processed_sql
586
+
587
+ print("DEBUG: AI functions and NQL calls to replace (from model.ai_functions):")
588
+ if model.ai_functions:
589
+ for fn, params in model.ai_functions.items():
590
+ print(f" Function: {fn}, Full Call String: '{params.get('full_call_string')}'")
591
+ else:
592
+ print(" (None found in model.ai_functions to replace natively)")
412
593
 
413
- columns = re.split(r",\s*(?![^()]*\))", select_part.strip())
594
+ # Replace NQL calls with native functions
595
+ for func_name, params in model.ai_functions.items():
596
+ original_nql_call = params.get('full_call_string')
597
+ if not original_nql_call:
598
+ print(f"WARNING: 'full_call_string' not found for NQL function '{func_name}'. Skipping native replacement attempt.")
599
+ continue
600
+
601
+ try:
602
+ column_ref = params.get('column', '')
603
+
604
+ transform_kwargs = {
605
+ 'text': column_ref,
606
+ 'prompt': column_ref,
607
+ 'query': params.get('query', ''),
608
+ 'context': params.get('context', ''),
609
+ 'npc': params.get('npc', '')
610
+ }
611
+
612
+ native_func_call = transformer.transform_ai_function(
613
+ func_name,
614
+ **transform_kwargs
615
+ )
616
+
617
+ print(f"DEBUG: Replacing '{original_nql_call}' with '{native_func_call}'")
618
+
619
+ # NORMALIZE WHITESPACE in both the original call and the SQL
620
+ # This handles multiline NQL calls with varying indentation
621
+ normalized_original = re.sub(r'\s+', ' ', original_nql_call).strip()
622
+ normalized_sql = re.sub(r'\s+', ' ', sql_to_execute_with_native_ai).strip()
623
+
624
+ # Find the normalized pattern in the normalized SQL
625
+ if normalized_original in normalized_sql:
626
+ # Now do the replacement on the ORIGINAL (non-normalized) SQL
627
+ # by creating a flexible regex pattern
628
+ # Escape special regex chars but allow flexible whitespace
629
+ pattern_parts = [re.escape(part) for part in original_nql_call.split()]
630
+ flexible_pattern = r'\s*'.join(pattern_parts)
631
+ pattern = re.compile(flexible_pattern, re.IGNORECASE | re.DOTALL)
632
+
633
+ old_sql = sql_to_execute_with_native_ai
634
+ sql_to_execute_with_native_ai = pattern.sub(native_func_call, sql_to_execute_with_native_ai, count=1)
635
+
636
+ if old_sql != sql_to_execute_with_native_ai:
637
+ print(f"DEBUG: Successfully replaced with flexible whitespace pattern.")
638
+ else:
639
+ print(f"ERROR: Flexible pattern replacement failed for '{func_name}'.")
640
+ else:
641
+ print(f"ERROR: Could not find normalized NQL call in SQL for '{func_name}'.")
642
+
643
+ except ValueError as e:
644
+ print(f"WARNING: Native translation failed for '{func_name}': {e}. This AI function will NOT be natively translated.")
645
+ except Exception as e:
646
+ print(f"ERROR: An unexpected error occurred during native AI transformation for '{func_name}': {e}. This AI function will NOT be natively translated.") # Check for remaining NQL calls
647
+ if "nql." in sql_to_execute_with_native_ai.lower():
648
+ print(f"WARNING: Some NQL calls remain after native translation attempts. Replacing remaining NQL calls with NULLs.")
649
+ sql_to_execute_with_native_ai = self._replace_nql_calls_with_null(sql_to_execute_with_native_ai, model)
650
+
651
+ print(f"DEBUG: Final SQL for native/mixed AI execution:\n{sql_to_execute_with_native_ai}\n")
652
+ target_engine_for_native_ai = self.target_engine
653
+ return pd.read_sql(sql_to_execute_with_native_ai, target_engine_for_native_ai)
654
+
655
+ else: # Fallback path when native AI is not supported for the determined DB type
656
+ print(f"DEBUG: Native AI functions are NOT supported for '{db_type}'. Entering Python fallback path.")
657
+ sql_with_nql_as_null = self._replace_nql_calls_with_null(processed_sql, model)
658
+
659
+ print(f"DEBUG: SQL to execute in pure fallback (NQL as NULLs for DB):\n{sql_with_nql_as_null}\n")
660
+
661
+ target_engine_for_fallback = self.target_engine # Use target_engine directly
662
+ df = pd.read_sql(sql_with_nql_as_null, target_engine_for_fallback)
663
+
664
+ # Apply Python-driven AI functions on the DataFrame
665
+ for func_name, params in model.ai_functions.items():
666
+ try:
667
+ result_series = self.npc_operations.execute_ai_function(func_name, df, **params)
668
+ result_column_name = f"{func_name}_{params.get('column', 'result')}" # Use a more specific alias if possible
669
+ df[result_column_name] = result_series
670
+ print(f"DEBUG: Python-driven AI function '{func_name}' executed. Result in column '{result_column_name}'.")
671
+ except Exception as e:
672
+ print(f"ERROR: Executing Python-driven AI function '{func_name}': {e}. Assigning NULL.")
673
+ df[f"{func_name}_{params.get('column', 'result')}"] = None
674
+
675
+ return df
414
676
 
415
- final_columns = []
416
- for col in columns:
417
- if "synthesize(" not in col:
418
- final_columns.append(col)
677
+ def _replace_nql_calls_with_null(self, sql_content: str, model: SQLModel) -> str:
678
+ """
679
+ Replaces specific nql.func(...) as alias calls with NULL as alias.
680
+ This is used for the fallback path or to clean up any NQL calls missed by native translation.
681
+ """
682
+ modified_sql = sql_content
683
+ for func_name, params in model.ai_functions.items():
684
+ original_nql_call = params.get('full_call_string')
685
+ if not original_nql_call:
686
+ print(f"WARNING: 'full_call_string' not found for NQL function '{func_name}'. Cannot replace with NULL.")
687
+ continue
688
+
689
+ # Extract alias from the original_nql_call string for NULL replacement
690
+ alias_match = re.search(r'\s+as\s+(\w+)(?:\W|$)', original_nql_call, re.IGNORECASE)
691
+ alias_name = alias_match.group(1) if alias_match else f"{func_name}_{params.get('column', 'result')}"
692
+
693
+ # Create a robust pattern for the original NQL call to handle whitespace variability
694
+ escaped_original_call = re.escape(original_nql_call.strip())
695
+ pattern_to_sub = re.compile(r"\s*".join(escaped_original_call.split()), flags=re.IGNORECASE)
696
+
697
+ # Perform the replacement with NULL as alias
698
+ old_sql = modified_sql
699
+ modified_sql, count = pattern_to_sub.subn(f"NULL as {alias_name}", modified_sql)
700
+ if count == 0:
701
+ print(f"WARNING: NULL replacement failed for NQL call '{original_nql_call}' (no change to SQL). SQL still contains NQL call.")
419
702
  else:
420
- alias_match = re.search(r"as\s+(\w+)\s*$", col, re.IGNORECASE)
421
- if alias_match:
422
- final_columns.append(f"NULL as {alias_match.group(1)}")
703
+ print(f"DEBUG: Replaced NQL call '{original_nql_call}' with 'NULL as {alias_name}'.")
423
704
 
424
- final_sql = f"SELECT {', '.join(final_columns)} {from_part}"
425
- print(f"Extracted base query:\n{final_sql}")
426
-
427
- return final_sql
705
+ return modified_sql
428
706
 
429
707
  def execute_model(self, model_name: str) -> pd.DataFrame:
430
- """Execute a model and materialize it to the database"""
431
708
  self.current_model = model_name
432
709
  model = self.models[model_name]
433
- compiled_sql = self.compile_model(model_name)
434
-
435
- try:
436
- if model.has_ai_function:
437
- df = self._execute_ai_model(compiled_sql, model)
438
- else:
439
- df = self._execute_standard_sql(compiled_sql)
440
-
441
- self._materialize_to_db(model_name, df)
442
- return df
443
-
444
- except Exception as e:
445
- print(f"Error executing model {model_name}: {str(e)}")
446
- raise
447
-
448
- def _execute_standard_sql(self, sql: str) -> pd.DataFrame:
449
- with sqlite3.connect(self.db_path) as conn:
450
- try:
451
- sql = re.sub(r"--.*?\n", "\n", sql)
452
- sql = re.sub(r"\s+", " ", sql).strip()
453
- return pd.read_sql(sql, conn)
454
- except Exception as e:
455
- print(f"Failed to execute SQL: {sql}")
456
- print(f"Error: {str(e)}")
457
- raise
458
-
459
- def execute_ai_function(self, query, npc, column_value, context):
460
- """Execute a specific AI function logic - placeholder"""
461
- print(f"Executing AI function on value: {column_value}")
462
- synthesized_value = (
463
- f"Processed({query}): {column_value} in context {context} with npc {npc}"
464
- )
465
- return synthesized_value
466
-
467
- def _execute_ai_model(self, sql: str, model: SQLModel) -> pd.DataFrame:
468
- try:
469
- base_sql = self._extract_base_query(sql)
470
- print(f"Executing base SQL:\n{base_sql}")
471
- df = self._execute_standard_sql(base_sql)
472
-
473
- # extract the columns they are between {} pairs
474
- columns = re.findall(r"\{([^}]+)\}", sql)
475
710
 
476
- # Handle AI function a
477
- for func_name, params in model.ai_functions.items():
478
- if func_name == "synthesize":
479
- query_template = params["query"]
480
-
481
- npc = params["npc"]
482
- # only take the after the split "/"
483
- npc = npc.split("/")[-1]
484
- context = params["context"]
485
- # Call the synthesize method using DataFrame directly
486
- synthesized_df = self.npc_operations.synthesize(
487
- query=query_template, # The raw query to format
488
- df=df, # The DataFrame containing the data
489
- columns=columns, # The column(s) used to format the query
490
- npc=npc, # NPC parameter
491
- context=context, # Context parameter
492
- framework="default_framework", # Adjust this as per your needs
493
- )
711
+ cleaned_sql_content = self._clean_sql_for_execution(model.content)
712
+
713
+ print(f"DEBUG: Cleaned SQL content for model '{model_name}':\n{cleaned_sql_content}\n")
494
714
 
495
- # Optionally pull the synthesized data into a new column
496
- df[
497
- "ai_analysis"
498
- ] = synthesized_df # Adjust as per what synthesize returns
499
-
500
- return df
715
+ if model.has_ai_function:
716
+ df = self._execute_ai_model(cleaned_sql_content, model)
717
+ else:
718
+ compiled_sql = self._replace_model_references(
719
+ cleaned_sql_content
720
+ )
721
+ print(f"DEBUG: Compiled standard SQL for model '{model_name}':\n{compiled_sql}\n")
722
+ df = self._execute_standard_sql(
723
+ compiled_sql,
724
+ self.target_engine
725
+ )
501
726
 
502
- except Exception as e:
503
- print(f"Error in AI model execution: {str(e)}")
504
- raise
727
+ self._materialize_to_db(model_name, df, model.config)
728
+ return df
505
729
 
506
- def _materialize_to_db(self, model_name: str, df: pd.DataFrame):
507
- with sqlite3.connect(self.db_path) as conn:
508
- conn.execute(f"DROP TABLE IF EXISTS {model_name}")
509
- df.to_sql(model_name, conn, index=False)
510
- print(f"Materialized model {model_name} to database")
730
+ def _materialize_to_db(
731
+ self,
732
+ model_name: str,
733
+ df: pd.DataFrame,
734
+ config: Dict
735
+ ):
736
+ materialization = config.get('materialized', 'table')
737
+
738
+ table_name = model_name
739
+ table_name_with_schema = (
740
+ f"{self.target_schema}.{table_name}"
741
+ if self.target_schema
742
+ else table_name
743
+ )
744
+
745
+ with self.target_engine.begin() as conn:
746
+ if self.target_schema:
747
+ inspector = inspect(conn)
748
+ if not inspector.has_schema(self.target_schema):
749
+ print(f"Creating schema '{self.target_schema}'...")
750
+ conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {self.target_schema}"))
751
+ print(f"Schema '{self.target_schema}' created (if it didn't exist).")
752
+
753
+ if materialization == 'view':
754
+ print(
755
+ f"Warning: Materialization '{materialization}' requested for model '{model_name}'. "
756
+ f"Pandas `to_sql` does not directly create SQL VIEWS from DataFrames. "
757
+ f"Materializing as TABLE instead. You may need to manually create the view."
758
+ )
759
+ df.to_sql(
760
+ table_name,
761
+ self.target_engine,
762
+ schema=self.target_schema,
763
+ index=False,
764
+ if_exists='replace'
765
+ )
766
+ print(f"Materialized model {model_name} as TABLE to {table_name_with_schema}")
767
+ else:
768
+ df.to_sql(
769
+ table_name,
770
+ self.target_engine,
771
+ schema=self.target_schema,
772
+ index=False,
773
+ if_exists='replace'
774
+ )
775
+ print(f"Materialized model {model_name} as TABLE to {table_name_with_schema}")
511
776
 
512
777
  def _table_exists(self, table_name: str) -> bool:
513
- with sqlite3.connect(self.db_path) as conn:
514
- cursor = conn.cursor()
515
- cursor.execute(
516
- """
517
- SELECT name FROM sqlite_master
518
- WHERE type='table' AND name=?;
519
- """,
520
- (table_name,),
521
- )
522
- return cursor.fetchone() is not None
778
+ with self.target_engine.connect() as conn:
779
+ inspector = inspect(conn)
780
+ return inspector.has_table(table_name, schema=self.target_schema) or \
781
+ inspector.has_view(table_name, schema=self.target_schema)
523
782
 
524
783
  def run_all_models(self):
525
- """Execute all models in dependency order"""
526
784
  self.discover_models()
527
785
  execution_order = self.topological_sort()
786
+
528
787
  print(f"Running models in order: {execution_order}")
529
788
 
530
789
  results = {}
@@ -534,72 +793,12 @@ class ModelCompiler:
534
793
  model = self.models[model_name]
535
794
  for dep in model.dependencies:
536
795
  if not self._table_exists(dep):
537
- raise ValueError(
538
- f"Dependency {dep} not found in database for model {model_name}"
539
- )
796
+ if dep not in results:
797
+ raise ValueError(
798
+ f"Dependency '{dep}' for model '{model_name}' not found in database or already processed models. "
799
+ f"Please ensure all dependencies are resolved and run first."
800
+ )
540
801
 
541
802
  results[model_name] = self.execute_model(model_name)
542
803
 
543
804
  return results
544
-
545
-
546
- def create_example_models(
547
- models_dir: str = os.path.abspath("./npc_team/factory/models/"),
548
- db_path: str = "~/npcsh_history.db",
549
- npc_directory: str = "./npc_team/",
550
- ):
551
- """Create example SQL model files"""
552
- os.makedirs(os.path.abspath("./npc_team/factory/"), exist_ok=True)
553
- os.makedirs(models_dir, exist_ok=True)
554
- db_path = os.path.expanduser(db_path)
555
- conn = sqlite3.connect(db_path)
556
- df = pd.DataFrame(
557
- {
558
- "feedback": ["Great product!", "Could be better", "Amazing service"],
559
- "customer_id": [1, 2, 3],
560
- "timestamp": pd.to_datetime(["2024-01-01", "2024-01-02", "2024-01-03"]),
561
- }
562
- )
563
-
564
- df.to_sql("raw_customer_feedback", conn, index=False, if_exists="replace")
565
- print("Created raw_customer_feedback table")
566
-
567
- compiler = ModelCompiler(models_dir, db_path, npc_directory)
568
- results = compiler.run_all_models()
569
-
570
- for model_name, df in results.items():
571
- print(f"\nResults for {model_name}:")
572
- print(df.head())
573
-
574
- customer_feedback = """
575
- SELECT
576
- feedback,
577
- customer_id,
578
- timestamp
579
- FROM raw_customer_feedback
580
- WHERE LENGTH(feedback) > 10;
581
- """
582
-
583
- customer_insights = """
584
- SELECT
585
- customer_id,
586
- feedback,
587
- timestamp,
588
- synthesize(
589
- "feedback text: {feedback}",
590
- "analyst",
591
- "feedback_analysis"
592
- ) as ai_analysis
593
- FROM {{ ref('customer_feedback') }};
594
- """
595
-
596
- models = {
597
- "customer_feedback.sql": customer_feedback,
598
- "customer_insights.sql": customer_insights,
599
- }
600
-
601
- for name, content in models.items():
602
- path = os.path.join(models_dir, name)
603
- with open(path, "w") as f:
604
- f.write(content)
605
- print(f"Created model: {name}")