ds-agent-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/bin/ds-agent.js +451 -0
  2. package/ds_agent/__init__.py +8 -0
  3. package/package.json +28 -0
  4. package/requirements.txt +126 -0
  5. package/setup.py +35 -0
  6. package/src/__init__.py +7 -0
  7. package/src/_compress_tool_result.py +118 -0
  8. package/src/api/__init__.py +4 -0
  9. package/src/api/app.py +1626 -0
  10. package/src/cache/__init__.py +5 -0
  11. package/src/cache/cache_manager.py +561 -0
  12. package/src/cli.py +2886 -0
  13. package/src/dynamic_prompts.py +281 -0
  14. package/src/orchestrator.py +4799 -0
  15. package/src/progress_manager.py +139 -0
  16. package/src/reasoning/__init__.py +332 -0
  17. package/src/reasoning/business_summary.py +431 -0
  18. package/src/reasoning/data_understanding.py +356 -0
  19. package/src/reasoning/model_explanation.py +383 -0
  20. package/src/reasoning/reasoning_trace.py +239 -0
  21. package/src/registry/__init__.py +3 -0
  22. package/src/registry/tools_registry.py +3 -0
  23. package/src/session_memory.py +448 -0
  24. package/src/session_store.py +370 -0
  25. package/src/storage/__init__.py +19 -0
  26. package/src/storage/artifact_store.py +620 -0
  27. package/src/storage/helpers.py +116 -0
  28. package/src/storage/huggingface_storage.py +694 -0
  29. package/src/storage/r2_storage.py +0 -0
  30. package/src/storage/user_files_service.py +288 -0
  31. package/src/tools/__init__.py +335 -0
  32. package/src/tools/advanced_analysis.py +823 -0
  33. package/src/tools/advanced_feature_engineering.py +708 -0
  34. package/src/tools/advanced_insights.py +578 -0
  35. package/src/tools/advanced_preprocessing.py +549 -0
  36. package/src/tools/advanced_training.py +906 -0
  37. package/src/tools/agent_tool_mapping.py +326 -0
  38. package/src/tools/auto_pipeline.py +420 -0
  39. package/src/tools/autogluon_training.py +1480 -0
  40. package/src/tools/business_intelligence.py +860 -0
  41. package/src/tools/cloud_data_sources.py +581 -0
  42. package/src/tools/code_interpreter.py +390 -0
  43. package/src/tools/computer_vision.py +614 -0
  44. package/src/tools/data_cleaning.py +614 -0
  45. package/src/tools/data_profiling.py +593 -0
  46. package/src/tools/data_type_conversion.py +268 -0
  47. package/src/tools/data_wrangling.py +433 -0
  48. package/src/tools/eda_reports.py +284 -0
  49. package/src/tools/enhanced_feature_engineering.py +241 -0
  50. package/src/tools/feature_engineering.py +302 -0
  51. package/src/tools/matplotlib_visualizations.py +1327 -0
  52. package/src/tools/model_training.py +520 -0
  53. package/src/tools/nlp_text_analytics.py +761 -0
  54. package/src/tools/plotly_visualizations.py +497 -0
  55. package/src/tools/production_mlops.py +852 -0
  56. package/src/tools/time_series.py +507 -0
  57. package/src/tools/tools_registry.py +2133 -0
  58. package/src/tools/visualization_engine.py +559 -0
  59. package/src/utils/__init__.py +42 -0
  60. package/src/utils/error_recovery.py +313 -0
  61. package/src/utils/parallel_executor.py +402 -0
  62. package/src/utils/polars_helpers.py +248 -0
  63. package/src/utils/schema_extraction.py +132 -0
  64. package/src/utils/semantic_layer.py +392 -0
  65. package/src/utils/token_budget.py +411 -0
  66. package/src/utils/validation.py +377 -0
  67. package/src/workflow_state.py +154 -0
@@ -0,0 +1,288 @@
1
+ """
2
+ User Files Service - Manages file metadata in Supabase
3
+
4
+ This service:
5
+ 1. Tracks all user files (plots, CSVs, reports, models) in Supabase
6
+ 2. Provides file listing for the Assets panel
7
+ 3. Handles file expiration and cleanup coordination
8
+ 4. Works with R2StorageService for actual file storage
9
+ """
10
+
11
+ import os
12
+ from datetime import datetime, timedelta
13
+ from typing import Optional, Dict, Any, List
14
+ from dataclasses import dataclass
15
+ from enum import Enum
16
+
17
+ # Supabase client import
18
+ try:
19
+ from supabase import create_client, Client
20
+ except ImportError:
21
+ print("Warning: supabase package not installed. Run: pip install supabase")
22
+ Client = None
23
+
24
+ SUPABASE_URL = os.getenv("SUPABASE_URL", "")
25
+ SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY", "") # Use service key for backend
26
+
27
+
28
+ class FileType(Enum):
29
+ PLOT = "plot"
30
+ CSV = "csv"
31
+ REPORT = "report"
32
+ MODEL = "model"
33
+
34
+
35
+ @dataclass
36
+ class UserFile:
37
+ """Represents a user file record."""
38
+ id: str
39
+ user_id: str
40
+ session_id: Optional[str]
41
+ file_type: FileType
42
+ file_name: str
43
+ r2_key: str
44
+ size_bytes: int
45
+ mime_type: str
46
+ metadata: Dict[str, Any]
47
+ created_at: datetime
48
+ expires_at: datetime
49
+ download_url: Optional[str] = None
50
+
51
+
52
+ class UserFilesService:
53
+ """Service for managing user file metadata in Supabase."""
54
+
55
+ def __init__(self):
56
+ """Initialize Supabase client."""
57
+ if not SUPABASE_URL or not SUPABASE_SERVICE_KEY:
58
+ raise ValueError("SUPABASE_URL and SUPABASE_SERVICE_KEY must be set")
59
+
60
+ self.client: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_KEY)
61
+ self.table = "user_files"
62
+
63
+ # ==================== CREATE ====================
64
+
65
+ def create_file_record(
66
+ self,
67
+ user_id: str,
68
+ file_type: FileType,
69
+ file_name: str,
70
+ r2_key: str,
71
+ size_bytes: int,
72
+ session_id: Optional[str] = None,
73
+ mime_type: str = "application/octet-stream",
74
+ metadata: Optional[Dict[str, Any]] = None,
75
+ expires_in_days: int = 7
76
+ ) -> UserFile:
77
+ """
78
+ Create a file record in Supabase.
79
+
80
+ Args:
81
+ user_id: User ID
82
+ file_type: Type of file
83
+ file_name: Display name
84
+ r2_key: R2 storage key
85
+ size_bytes: File size
86
+ session_id: Optional chat session ID
87
+ mime_type: MIME type
88
+ metadata: Additional metadata (plot type, metrics, etc.)
89
+ expires_in_days: Days until file expires
90
+
91
+ Returns:
92
+ Created UserFile record
93
+ """
94
+ expires_at = datetime.utcnow() + timedelta(days=expires_in_days)
95
+
96
+ data = {
97
+ "user_id": user_id,
98
+ "session_id": session_id,
99
+ "file_type": file_type.value,
100
+ "file_name": file_name,
101
+ "r2_key": r2_key,
102
+ "size_bytes": size_bytes,
103
+ "mime_type": mime_type,
104
+ "metadata": metadata or {},
105
+ "expires_at": expires_at.isoformat()
106
+ }
107
+
108
+ result = self.client.table(self.table).insert(data).execute()
109
+
110
+ if result.data:
111
+ return self._to_user_file(result.data[0])
112
+ raise Exception("Failed to create file record")
113
+
114
+ # ==================== READ ====================
115
+
116
+ def get_user_files(
117
+ self,
118
+ user_id: str,
119
+ file_type: Optional[FileType] = None,
120
+ session_id: Optional[str] = None,
121
+ include_expired: bool = False
122
+ ) -> List[UserFile]:
123
+ """
124
+ Get all files for a user.
125
+
126
+ Args:
127
+ user_id: User ID
128
+ file_type: Optional filter by type
129
+ session_id: Optional filter by session
130
+ include_expired: Include expired files
131
+
132
+ Returns:
133
+ List of UserFile records
134
+ """
135
+ query = self.client.table(self.table)\
136
+ .select("*")\
137
+ .eq("user_id", user_id)\
138
+ .eq("is_deleted", False)
139
+
140
+ if file_type:
141
+ query = query.eq("file_type", file_type.value)
142
+
143
+ if session_id:
144
+ query = query.eq("session_id", session_id)
145
+
146
+ if not include_expired:
147
+ query = query.gt("expires_at", datetime.utcnow().isoformat())
148
+
149
+ query = query.order("created_at", desc=True)
150
+
151
+ result = query.execute()
152
+
153
+ return [self._to_user_file(row) for row in (result.data or [])]
154
+
155
+ def get_file_by_id(self, file_id: str) -> Optional[UserFile]:
156
+ """Get a specific file by ID."""
157
+ result = self.client.table(self.table)\
158
+ .select("*")\
159
+ .eq("id", file_id)\
160
+ .single()\
161
+ .execute()
162
+
163
+ if result.data:
164
+ return self._to_user_file(result.data)
165
+ return None
166
+
167
+ def get_file_by_r2_key(self, r2_key: str) -> Optional[UserFile]:
168
+ """Get a file by R2 key."""
169
+ result = self.client.table(self.table)\
170
+ .select("*")\
171
+ .eq("r2_key", r2_key)\
172
+ .single()\
173
+ .execute()
174
+
175
+ if result.data:
176
+ return self._to_user_file(result.data)
177
+ return None
178
+
179
+ def get_session_files(self, session_id: str) -> List[UserFile]:
180
+ """Get all files for a chat session."""
181
+ result = self.client.table(self.table)\
182
+ .select("*")\
183
+ .eq("session_id", session_id)\
184
+ .eq("is_deleted", False)\
185
+ .order("created_at", desc=True)\
186
+ .execute()
187
+
188
+ return [self._to_user_file(row) for row in (result.data or [])]
189
+
190
+ # ==================== UPDATE ====================
191
+
192
+ def extend_expiration(self, file_id: str, additional_days: int = 7) -> bool:
193
+ """Extend file expiration date."""
194
+ file = self.get_file_by_id(file_id)
195
+ if not file:
196
+ return False
197
+
198
+ new_expires = datetime.utcnow() + timedelta(days=additional_days)
199
+
200
+ result = self.client.table(self.table)\
201
+ .update({"expires_at": new_expires.isoformat()})\
202
+ .eq("id", file_id)\
203
+ .execute()
204
+
205
+ return bool(result.data)
206
+
207
+ # ==================== DELETE ====================
208
+
209
+ def soft_delete_file(self, file_id: str) -> bool:
210
+ """Soft delete a file (mark as deleted)."""
211
+ result = self.client.table(self.table)\
212
+ .update({"is_deleted": True})\
213
+ .eq("id", file_id)\
214
+ .execute()
215
+
216
+ return bool(result.data)
217
+
218
+ def hard_delete_file(self, file_id: str) -> bool:
219
+ """Permanently delete a file record."""
220
+ result = self.client.table(self.table)\
221
+ .delete()\
222
+ .eq("id", file_id)\
223
+ .execute()
224
+
225
+ return bool(result.data)
226
+
227
+ def get_expired_files(self) -> List[UserFile]:
228
+ """Get all expired files for cleanup."""
229
+ result = self.client.table(self.table)\
230
+ .select("*")\
231
+ .lt("expires_at", datetime.utcnow().isoformat())\
232
+ .eq("is_deleted", False)\
233
+ .execute()
234
+
235
+ return [self._to_user_file(row) for row in (result.data or [])]
236
+
237
+ # ==================== STATS ====================
238
+
239
+ def get_user_storage_stats(self, user_id: str) -> Dict[str, Any]:
240
+ """Get storage statistics for a user."""
241
+ files = self.get_user_files(user_id, include_expired=False)
242
+
243
+ stats = {
244
+ "total_files": len(files),
245
+ "total_size_bytes": sum(f.size_bytes for f in files),
246
+ "by_type": {}
247
+ }
248
+
249
+ for file_type in FileType:
250
+ type_files = [f for f in files if f.file_type == file_type]
251
+ stats["by_type"][file_type.value] = {
252
+ "count": len(type_files),
253
+ "size_bytes": sum(f.size_bytes for f in type_files)
254
+ }
255
+
256
+ stats["total_size_mb"] = round(stats["total_size_bytes"] / (1024 * 1024), 2)
257
+
258
+ return stats
259
+
260
+ # ==================== HELPERS ====================
261
+
262
+ def _to_user_file(self, row: Dict[str, Any]) -> UserFile:
263
+ """Convert database row to UserFile object."""
264
+ return UserFile(
265
+ id=row["id"],
266
+ user_id=row["user_id"],
267
+ session_id=row.get("session_id"),
268
+ file_type=FileType(row["file_type"]),
269
+ file_name=row["file_name"],
270
+ r2_key=row["r2_key"],
271
+ size_bytes=row.get("size_bytes", 0),
272
+ mime_type=row.get("mime_type", "application/octet-stream"),
273
+ metadata=row.get("metadata", {}),
274
+ created_at=datetime.fromisoformat(row["created_at"].replace("Z", "+00:00")),
275
+ expires_at=datetime.fromisoformat(row["expires_at"].replace("Z", "+00:00"))
276
+ )
277
+
278
+
279
+ # ==================== SINGLETON ====================
280
+
281
+ _files_service: Optional[UserFilesService] = None
282
+
283
+ def get_files_service() -> UserFilesService:
284
+ """Get or create UserFilesService singleton."""
285
+ global _files_service
286
+ if _files_service is None:
287
+ _files_service = UserFilesService()
288
+ return _files_service
@@ -0,0 +1,335 @@
1
+ """Tools module initialization - All 44 tools."""
2
+
3
+ # Basic Tools (10)
4
+ from .data_profiling import (
5
+ profile_dataset,
6
+ detect_data_quality_issues,
7
+ analyze_correlations,
8
+ get_smart_summary, # NEW: Enhanced data summary
9
+ detect_label_errors # NEW: cleanlab label error detection
10
+ )
11
+
12
+ from .data_cleaning import (
13
+ clean_missing_values,
14
+ handle_outliers,
15
+ fix_data_types
16
+ )
17
+
18
+ from .data_type_conversion import (
19
+ force_numeric_conversion,
20
+ smart_type_inference
21
+ )
22
+
23
+ # Data Wrangling Tools (3) - NEW
24
+ from .data_wrangling import (
25
+ merge_datasets,
26
+ concat_datasets,
27
+ reshape_dataset
28
+ )
29
+
30
+ from .feature_engineering import (
31
+ create_time_features,
32
+ encode_categorical
33
+ )
34
+
35
+ from .model_training import (
36
+ train_baseline_models,
37
+ generate_model_report
38
+ )
39
+
40
+ # AutoGluon-Powered Training (9) - Classification, Regression, Time Series, Optimization
41
+ from .autogluon_training import (
42
+ train_with_autogluon,
43
+ predict_with_autogluon,
44
+ forecast_with_autogluon,
45
+ optimize_autogluon_model,
46
+ analyze_autogluon_model,
47
+ extend_autogluon_training,
48
+ train_multilabel_autogluon,
49
+ backtest_timeseries,
50
+ analyze_timeseries_model
51
+ )
52
+
53
+ # Advanced Analysis Tools (5)
54
+ from .advanced_analysis import (
55
+ perform_eda_analysis,
56
+ detect_model_issues,
57
+ detect_anomalies,
58
+ detect_and_handle_multicollinearity,
59
+ perform_statistical_tests
60
+ )
61
+
62
+ # Advanced Feature Engineering Tools (4)
63
+ from .advanced_feature_engineering import (
64
+ create_interaction_features,
65
+ create_aggregation_features,
66
+ engineer_text_features,
67
+ auto_feature_engineering
68
+ )
69
+
70
+ # Advanced Preprocessing Tools (3)
71
+ from .advanced_preprocessing import (
72
+ handle_imbalanced_data,
73
+ perform_feature_scaling,
74
+ split_data_strategically
75
+ )
76
+
77
+ # Advanced Training Tools (3)
78
+ from .advanced_training import (
79
+ hyperparameter_tuning,
80
+ train_ensemble_models,
81
+ perform_cross_validation
82
+ )
83
+
84
+ # Business Intelligence Tools (4)
85
+ from .business_intelligence import (
86
+ perform_cohort_analysis,
87
+ perform_rfm_analysis,
88
+ detect_causal_relationships,
89
+ generate_business_insights
90
+ )
91
+
92
+ # Computer Vision Tools (3)
93
+ from .computer_vision import (
94
+ extract_image_features,
95
+ perform_image_clustering,
96
+ analyze_tabular_image_hybrid
97
+ )
98
+
99
+ # NLP/Text Analytics Tools (4)
100
+ from .nlp_text_analytics import (
101
+ perform_topic_modeling,
102
+ perform_named_entity_recognition,
103
+ analyze_sentiment_advanced,
104
+ perform_text_similarity
105
+ )
106
+
107
+ # Production/MLOps Tools (5 + 2 new)
108
+ from .production_mlops import (
109
+ monitor_model_drift,
110
+ explain_predictions,
111
+ generate_model_card,
112
+ perform_ab_test_analysis,
113
+ detect_feature_leakage,
114
+ monitor_drift_evidently, # NEW: Evidently drift reports
115
+ explain_with_dtreeviz # NEW: Decision tree visualization
116
+ )
117
+
118
+ # Time Series Tools (3)
119
+ from .time_series import (
120
+ forecast_time_series,
121
+ detect_seasonality_trends,
122
+ create_time_series_features
123
+ )
124
+
125
+ # Advanced Insights Tools (6) - NEW
126
+ from .advanced_insights import (
127
+ analyze_root_cause,
128
+ detect_trends_and_seasonality,
129
+ detect_anomalies_advanced,
130
+ perform_hypothesis_testing,
131
+ analyze_distribution,
132
+ perform_segment_analysis
133
+ )
134
+
135
+ # Automated Pipeline Tools (2) - NEW
136
+ from .auto_pipeline import (
137
+ auto_ml_pipeline,
138
+ auto_feature_selection
139
+ )
140
+
141
+ # Visualization Tools (5) - NEW
142
+ from .visualization_engine import (
143
+ generate_all_plots,
144
+ generate_data_quality_plots,
145
+ generate_eda_plots,
146
+ generate_model_performance_plots,
147
+ generate_feature_importance_plot
148
+ )
149
+
150
+ # Interactive Plotly Visualizations (6) - NEW PHASE 2
151
+ from .plotly_visualizations import (
152
+ generate_interactive_scatter,
153
+ generate_interactive_histogram,
154
+ generate_interactive_correlation_heatmap,
155
+ generate_interactive_box_plots,
156
+ generate_interactive_time_series,
157
+ generate_plotly_dashboard
158
+ )
159
+
160
+ # EDA Report Generation (2) - NEW PHASE 2
161
+ from .eda_reports import (
162
+ generate_ydata_profiling_report,
163
+ generate_sweetviz_report # NEW: Sweetviz EDA with comparison
164
+ )
165
+
166
+ # Code Interpreter (2) - NEW PHASE 2 - CRITICAL for True AI Agent
167
+ from .code_interpreter import (
168
+ execute_python_code,
169
+ execute_code_from_file
170
+ )
171
+
172
+ # Cloud Data Sources (4) - NEW: BigQuery Integration
173
+ from .cloud_data_sources import (
174
+ load_bigquery_table,
175
+ write_bigquery_table,
176
+ profile_bigquery_table,
177
+ query_bigquery
178
+ )
179
+
180
+ from .tools_registry import TOOLS, get_tool_by_name, get_all_tool_names
181
+
182
+ from .enhanced_feature_engineering import (
183
+ create_ratio_features,
184
+ create_statistical_features,
185
+ create_log_features,
186
+ create_binned_features,
187
+ )
188
+
189
+ __all__ = [
190
+ # Basic Data Profiling (4 + 1 new) - UPDATED
191
+ "profile_dataset",
192
+ "detect_data_quality_issues",
193
+ "analyze_correlations",
194
+ "get_smart_summary", # NEW
195
+ "detect_label_errors", # NEW: cleanlab
196
+
197
+ # Basic Data Cleaning (3)
198
+ "clean_missing_values",
199
+ "handle_outliers",
200
+ "fix_data_types",
201
+
202
+ # Data Type Conversion (2)
203
+ "force_numeric_conversion",
204
+ "smart_type_inference",
205
+
206
+ # Data Wrangling (3) - NEW
207
+ "merge_datasets",
208
+ "concat_datasets",
209
+ "reshape_dataset",
210
+
211
+ # Basic Feature Engineering (2)
212
+ "create_time_features",
213
+ "encode_categorical",
214
+
215
+ # Basic Model Training (2)
216
+ "train_baseline_models",
217
+ "generate_model_report",
218
+
219
+ # AutoGluon Training (9) - NEW
220
+ "train_with_autogluon",
221
+ "predict_with_autogluon",
222
+ "forecast_with_autogluon",
223
+ "optimize_autogluon_model",
224
+ "analyze_autogluon_model",
225
+ "extend_autogluon_training",
226
+ "train_multilabel_autogluon",
227
+ "backtest_timeseries",
228
+ "analyze_timeseries_model",
229
+
230
+ # Advanced Analysis (5)
231
+ "perform_eda_analysis",
232
+ "detect_model_issues",
233
+ "detect_anomalies",
234
+ "detect_and_handle_multicollinearity",
235
+ "perform_statistical_tests",
236
+
237
+ # Advanced Feature Engineering (4)
238
+ "create_interaction_features",
239
+ "create_aggregation_features",
240
+ "engineer_text_features",
241
+ "auto_feature_engineering",
242
+
243
+ # Advanced Preprocessing (3)
244
+ "handle_imbalanced_data",
245
+ "perform_feature_scaling",
246
+ "split_data_strategically",
247
+
248
+ # Advanced Training (3)
249
+ "hyperparameter_tuning",
250
+ "train_ensemble_models",
251
+ "perform_cross_validation",
252
+
253
+ # Business Intelligence (4)
254
+ "perform_cohort_analysis",
255
+ "perform_rfm_analysis",
256
+ "detect_causal_relationships",
257
+ "generate_business_insights",
258
+
259
+ # Computer Vision (3)
260
+ "extract_image_features",
261
+ "perform_image_clustering",
262
+ "analyze_tabular_image_hybrid",
263
+
264
+ # NLP/Text Analytics (4)
265
+ "perform_topic_modeling",
266
+ "perform_named_entity_recognition",
267
+ "analyze_sentiment_advanced",
268
+ "perform_text_similarity",
269
+
270
+ # Production/MLOps (5 + 2 new)
271
+ "monitor_model_drift",
272
+ "explain_predictions",
273
+ "generate_model_card",
274
+ "perform_ab_test_analysis",
275
+ "detect_feature_leakage",
276
+ "monitor_drift_evidently", # NEW: Evidently
277
+ "explain_with_dtreeviz", # NEW: dtreeviz
278
+
279
+ # Time Series (3)
280
+ "forecast_time_series",
281
+ "detect_seasonality_trends",
282
+ "create_time_series_features",
283
+
284
+ # Advanced Insights (6) - NEW
285
+ "analyze_root_cause",
286
+ "detect_trends_and_seasonality",
287
+ "detect_anomalies_advanced",
288
+ "perform_hypothesis_testing",
289
+ "analyze_distribution",
290
+ "perform_segment_analysis",
291
+
292
+ # Automated Pipeline (2) - NEW
293
+ "auto_ml_pipeline",
294
+ "auto_feature_selection",
295
+
296
+ # Visualization (5) - NEW
297
+ "generate_all_plots",
298
+ "generate_data_quality_plots",
299
+ "generate_eda_plots",
300
+ "generate_model_performance_plots",
301
+ "generate_feature_importance_plot",
302
+
303
+ # Interactive Plotly Visualizations (6) - NEW PHASE 2
304
+ "generate_interactive_scatter",
305
+ "generate_interactive_histogram",
306
+ "generate_interactive_correlation_heatmap",
307
+ "generate_interactive_box_plots",
308
+ "generate_interactive_time_series",
309
+ "generate_plotly_dashboard",
310
+
311
+ # EDA Report Generation (2) - NEW PHASE 2
312
+ "generate_ydata_profiling_report",
313
+ "generate_sweetviz_report", # NEW: Sweetviz
314
+
315
+ # Code Interpreter (2) - NEW PHASE 2 - CRITICAL for True AI Agent
316
+ "execute_python_code",
317
+ "execute_code_from_file",
318
+
319
+ # Cloud Data Sources (4) - NEW: BigQuery Integration
320
+ "load_bigquery_table",
321
+ "write_bigquery_table",
322
+ "profile_bigquery_table",
323
+ "query_bigquery",
324
+
325
+ # Enhanced Feature Engineering (4) - NEW
326
+ "create_ratio_features",
327
+ "create_statistical_features",
328
+ "create_log_features",
329
+ "create_binned_features",
330
+
331
+ # Registry
332
+ "TOOLS",
333
+ "get_tool_by_name",
334
+ "get_all_tool_names",
335
+ ]