ds-agent-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/bin/ds-agent.js +451 -0
  2. package/ds_agent/__init__.py +8 -0
  3. package/package.json +28 -0
  4. package/requirements.txt +126 -0
  5. package/setup.py +35 -0
  6. package/src/__init__.py +7 -0
  7. package/src/_compress_tool_result.py +118 -0
  8. package/src/api/__init__.py +4 -0
  9. package/src/api/app.py +1626 -0
  10. package/src/cache/__init__.py +5 -0
  11. package/src/cache/cache_manager.py +561 -0
  12. package/src/cli.py +2886 -0
  13. package/src/dynamic_prompts.py +281 -0
  14. package/src/orchestrator.py +4799 -0
  15. package/src/progress_manager.py +139 -0
  16. package/src/reasoning/__init__.py +332 -0
  17. package/src/reasoning/business_summary.py +431 -0
  18. package/src/reasoning/data_understanding.py +356 -0
  19. package/src/reasoning/model_explanation.py +383 -0
  20. package/src/reasoning/reasoning_trace.py +239 -0
  21. package/src/registry/__init__.py +3 -0
  22. package/src/registry/tools_registry.py +3 -0
  23. package/src/session_memory.py +448 -0
  24. package/src/session_store.py +370 -0
  25. package/src/storage/__init__.py +19 -0
  26. package/src/storage/artifact_store.py +620 -0
  27. package/src/storage/helpers.py +116 -0
  28. package/src/storage/huggingface_storage.py +694 -0
  29. package/src/storage/r2_storage.py +0 -0
  30. package/src/storage/user_files_service.py +288 -0
  31. package/src/tools/__init__.py +335 -0
  32. package/src/tools/advanced_analysis.py +823 -0
  33. package/src/tools/advanced_feature_engineering.py +708 -0
  34. package/src/tools/advanced_insights.py +578 -0
  35. package/src/tools/advanced_preprocessing.py +549 -0
  36. package/src/tools/advanced_training.py +906 -0
  37. package/src/tools/agent_tool_mapping.py +326 -0
  38. package/src/tools/auto_pipeline.py +420 -0
  39. package/src/tools/autogluon_training.py +1480 -0
  40. package/src/tools/business_intelligence.py +860 -0
  41. package/src/tools/cloud_data_sources.py +581 -0
  42. package/src/tools/code_interpreter.py +390 -0
  43. package/src/tools/computer_vision.py +614 -0
  44. package/src/tools/data_cleaning.py +614 -0
  45. package/src/tools/data_profiling.py +593 -0
  46. package/src/tools/data_type_conversion.py +268 -0
  47. package/src/tools/data_wrangling.py +433 -0
  48. package/src/tools/eda_reports.py +284 -0
  49. package/src/tools/enhanced_feature_engineering.py +241 -0
  50. package/src/tools/feature_engineering.py +302 -0
  51. package/src/tools/matplotlib_visualizations.py +1327 -0
  52. package/src/tools/model_training.py +520 -0
  53. package/src/tools/nlp_text_analytics.py +761 -0
  54. package/src/tools/plotly_visualizations.py +497 -0
  55. package/src/tools/production_mlops.py +852 -0
  56. package/src/tools/time_series.py +507 -0
  57. package/src/tools/tools_registry.py +2133 -0
  58. package/src/tools/visualization_engine.py +559 -0
  59. package/src/utils/__init__.py +42 -0
  60. package/src/utils/error_recovery.py +313 -0
  61. package/src/utils/parallel_executor.py +402 -0
  62. package/src/utils/polars_helpers.py +248 -0
  63. package/src/utils/schema_extraction.py +132 -0
  64. package/src/utils/semantic_layer.py +392 -0
  65. package/src/utils/token_budget.py +411 -0
  66. package/src/utils/validation.py +377 -0
  67. package/src/workflow_state.py +154 -0
@@ -0,0 +1,507 @@
1
+ """
2
+ Time Series & Forecasting Tools
3
+ Tools for time series analysis, forecasting, seasonality detection, and feature engineering.
4
+ """
5
+
6
+ import polars as pl
7
+ import numpy as np
8
+ from typing import Dict, Any, List, Optional
9
+ from pathlib import Path
10
+ import sys
11
+ import os
12
+ import warnings
13
+
14
+ warnings.filterwarnings('ignore')
15
+
16
+ # Add parent directory to path for imports
17
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
18
+
19
+ # Lazy imports - only import when needed to avoid blocking app startup
20
+ # from statsmodels.tsa.arima.model import ARIMA
21
+ # from statsmodels.tsa.statespace.sarimax import SARIMAX
22
+ # from statsmodels.tsa.holtwinters import ExponentialSmoothing
23
+ # from statsmodels.tsa.seasonal import seasonal_decompose, STL
24
+ # from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
25
+ # from prophet import Prophet
26
+ import pandas as pd
27
+
28
+ from ds_agent.utils.polars_helpers import load_dataframe, save_dataframe
29
+ from ds_agent.utils.validation import validate_file_exists, validate_file_format, validate_dataframe, validate_column_exists
30
+
31
+
32
+ def forecast_time_series(
33
+ file_path: str,
34
+ time_col: str,
35
+ target_col: str,
36
+ forecast_horizon: int = 30,
37
+ method: str = "prophet",
38
+ seasonal_period: Optional[int] = None,
39
+ output_path: Optional[str] = None
40
+ ) -> Dict[str, Any]:
41
+ """
42
+ Forecast time series using ARIMA, SARIMA, Prophet, or Exponential Smoothing.
43
+
44
+ Args:
45
+ file_path: Path to time series dataset
46
+ time_col: Time/date column name
47
+ target_col: Target variable to forecast
48
+ forecast_horizon: Number of periods to forecast ahead
49
+ method: Forecasting method ('arima', 'auto_arima', 'sarima', 'prophet', 'exponential_smoothing')
50
+ seasonal_period: Seasonal period (e.g., 7 for weekly, 12 for monthly)
51
+ output_path: Path to save forecast results
52
+
53
+ Returns:
54
+ Dictionary with forecast values and metrics
55
+ """
56
+ # Load data
57
+ df = load_dataframe(file_path)
58
+ validate_dataframe(df)
59
+ validate_column_exists(df, time_col)
60
+ validate_column_exists(df, target_col)
61
+
62
+ # Sort by time
63
+ df = df.sort(time_col)
64
+
65
+ # Lazy import of time series libraries
66
+ try:
67
+ if method == "prophet":
68
+ from prophet import Prophet
69
+ elif method in ["arima", "sarima"]:
70
+ from statsmodels.tsa.arima.model import ARIMA
71
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
72
+ elif method == "exponential_smoothing":
73
+ from statsmodels.tsa.holtwinters import ExponentialSmoothing
74
+ except ImportError as e:
75
+ return {
76
+ 'status': 'error',
77
+ 'message': f"Required library not installed for {method}: {str(e)}"
78
+ }
79
+
80
+ print(f"📈 Forecasting with {method} (horizon={forecast_horizon})...")
81
+
82
+ # Convert to pandas for time series libraries
83
+ df_pd = df.to_pandas()
84
+
85
+ if method == "prophet":
86
+ # Prophet requires 'ds' and 'y' columns
87
+ prophet_df = pd.DataFrame({
88
+ 'ds': pd.to_datetime(df_pd[time_col]),
89
+ 'y': df_pd[target_col]
90
+ })
91
+
92
+ model = Prophet(yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False)
93
+ model.fit(prophet_df)
94
+
95
+ # Create future dataframe
96
+ future = model.make_future_dataframe(periods=forecast_horizon)
97
+ forecast = model.predict(future)
98
+
99
+ # Extract forecast values
100
+ forecast_values = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(forecast_horizon)
101
+
102
+ result = {
103
+ 'method': 'prophet',
104
+ 'forecast': forecast_values.to_dict('records'),
105
+ 'model_components': {
106
+ 'trend': forecast['trend'].tail(forecast_horizon).tolist(),
107
+ 'weekly': forecast.get('weekly', pd.Series([0]*forecast_horizon)).tail(forecast_horizon).tolist()
108
+ }
109
+ }
110
+
111
+ elif method == "auto_arima":
112
+ # Auto ARIMA using pmdarima - automatically finds best (p,d,q) order
113
+ try:
114
+ import pmdarima as pm
115
+ except ImportError:
116
+ return {
117
+ 'status': 'error',
118
+ 'message': 'pmdarima not installed. Install with: pip install pmdarima>=2.0'
119
+ }
120
+
121
+ ts_data = df_pd.set_index(time_col)[target_col]
122
+
123
+ print("🔧 Running auto_arima to find optimal ARIMA order...")
124
+ auto_model = pm.auto_arima(
125
+ ts_data,
126
+ seasonal=bool(seasonal_period),
127
+ m=seasonal_period or 1,
128
+ stepwise=True,
129
+ suppress_warnings=True,
130
+ error_action='ignore',
131
+ max_p=5, max_q=5, max_d=2,
132
+ max_P=2, max_Q=2, max_D=1,
133
+ trace=False
134
+ )
135
+
136
+ # Forecast
137
+ forecast_vals, conf_int = auto_model.predict(
138
+ n_periods=forecast_horizon,
139
+ return_conf_int=True
140
+ )
141
+ forecast_index = pd.date_range(start=ts_data.index[-1], periods=forecast_horizon+1, freq='D')[1:]
142
+
143
+ result = {
144
+ 'method': 'auto_arima',
145
+ 'order': str(auto_model.order),
146
+ 'seasonal_order': str(auto_model.seasonal_order) if seasonal_period else None,
147
+ 'forecast': [
148
+ {
149
+ 'date': str(date),
150
+ 'value': float(val),
151
+ 'lower_ci': float(ci[0]),
152
+ 'upper_ci': float(ci[1])
153
+ }
154
+ for date, val, ci in zip(forecast_index, forecast_vals, conf_int)
155
+ ],
156
+ 'aic': float(auto_model.aic()),
157
+ 'bic': float(auto_model.bic()),
158
+ 'model_summary': str(auto_model.summary())
159
+ }
160
+ print(f" ✅ Best order: {auto_model.order} | AIC: {auto_model.aic():.2f}")
161
+
162
+ elif method == "arima":
163
+ # ARIMA model
164
+ ts_data = df_pd.set_index(time_col)[target_col]
165
+
166
+ # Auto-determine order (p,d,q) - simplified version
167
+ model = ARIMA(ts_data, order=(1, 1, 1))
168
+ fitted_model = model.fit()
169
+
170
+ # Forecast
171
+ forecast = fitted_model.forecast(steps=forecast_horizon)
172
+ forecast_index = pd.date_range(start=ts_data.index[-1], periods=forecast_horizon+1, freq='D')[1:]
173
+
174
+ result = {
175
+ 'method': 'arima',
176
+ 'order': '(1,1,1)',
177
+ 'forecast': [{'date': str(date), 'value': float(val)} for date, val in zip(forecast_index, forecast)],
178
+ 'aic': float(fitted_model.aic),
179
+ 'bic': float(fitted_model.bic)
180
+ }
181
+
182
+ elif method == "sarima":
183
+ if not seasonal_period:
184
+ seasonal_period = 7 # Default weekly
185
+
186
+ ts_data = df_pd.set_index(time_col)[target_col]
187
+
188
+ # SARIMA model
189
+ model = SARIMAX(ts_data, order=(1, 1, 1), seasonal_order=(1, 1, 1, seasonal_period))
190
+ fitted_model = model.fit(disp=False)
191
+
192
+ # Forecast
193
+ forecast = fitted_model.forecast(steps=forecast_horizon)
194
+ forecast_index = pd.date_range(start=ts_data.index[-1], periods=forecast_horizon+1, freq='D')[1:]
195
+
196
+ result = {
197
+ 'method': 'sarima',
198
+ 'order': '(1,1,1)',
199
+ 'seasonal_order': f'(1,1,1,{seasonal_period})',
200
+ 'forecast': [{'date': str(date), 'value': float(val)} for date, val in zip(forecast_index, forecast)],
201
+ 'aic': float(fitted_model.aic)
202
+ }
203
+
204
+ elif method == "exponential_smoothing":
205
+ ts_data = df_pd.set_index(time_col)[target_col]
206
+
207
+ # Exponential Smoothing
208
+ model = ExponentialSmoothing(
209
+ ts_data,
210
+ seasonal_periods=seasonal_period if seasonal_period else 12,
211
+ trend='add',
212
+ seasonal='add' if seasonal_period else None
213
+ )
214
+ fitted_model = model.fit()
215
+
216
+ # Forecast
217
+ forecast = fitted_model.forecast(steps=forecast_horizon)
218
+ forecast_index = pd.date_range(start=ts_data.index[-1], periods=forecast_horizon+1, freq='D')[1:]
219
+
220
+ result = {
221
+ 'method': 'exponential_smoothing',
222
+ 'forecast': [{'date': str(date), 'value': float(val)} for date, val in zip(forecast_index, forecast)]
223
+ }
224
+ else:
225
+ raise ValueError(f"Unsupported method: {method}")
226
+
227
+ # Save forecast
228
+ if output_path:
229
+ forecast_df = pl.DataFrame(result['forecast'])
230
+ save_dataframe(forecast_df, output_path)
231
+ print(f"💾 Forecast saved to: {output_path}")
232
+
233
+ result['status'] = 'success'
234
+ result['forecast_horizon'] = forecast_horizon
235
+ result['output_path'] = output_path
236
+
237
+ return result
238
+
239
+
240
+ def detect_seasonality_trends(
241
+ file_path: str,
242
+ time_col: str,
243
+ target_col: str,
244
+ period: Optional[int] = None,
245
+ method: str = "stl",
246
+ output_path: Optional[str] = None
247
+ ) -> Dict[str, Any]:
248
+ """
249
+ Detect seasonality and trends in time series using STL decomposition.
250
+
251
+ Args:
252
+ file_path: Path to time series dataset
253
+ time_col: Time/date column
254
+ target_col: Target variable
255
+ period: Seasonal period (None = auto-detect)
256
+ method: Decomposition method ('stl', 'classical')
257
+ output_path: Path to save decomposition results
258
+
259
+ Returns:
260
+ Dictionary with trend, seasonal, and residual components
261
+ """
262
+ # Load data
263
+ df = load_dataframe(file_path)
264
+ validate_dataframe(df)
265
+ validate_column_exists(df, time_col)
266
+ validate_column_exists(df, target_col)
267
+
268
+ # Sort by time
269
+ df = df.sort(time_col)
270
+
271
+ # Lazy import of time series libraries
272
+ try:
273
+ if method == "stl":
274
+ from statsmodels.tsa.seasonal import STL
275
+ else:
276
+ from statsmodels.tsa.seasonal import seasonal_decompose
277
+ except ImportError as e:
278
+ return {
279
+ 'status': 'error',
280
+ 'message': f"Required library not installed: {str(e)}"
281
+ }
282
+
283
+ print(f"🔍 Detecting seasonality and trends using {method}...")
284
+
285
+ # Convert to pandas
286
+ df_pd = df.to_pandas()
287
+ ts_data = df_pd.set_index(time_col)[target_col]
288
+
289
+ # Auto-detect period using FFT if not provided
290
+ if period is None:
291
+ from scipy.fft import fft
292
+ from scipy.signal import find_peaks
293
+
294
+ # Remove trend
295
+ detrended = ts_data - ts_data.rolling(window=min(len(ts_data)//10, 30), center=True).mean()
296
+ detrended = detrended.fillna(method='bfill').fillna(method='ffill')
297
+
298
+ # FFT
299
+ fft_vals = np.abs(fft(detrended.values))
300
+ freqs = np.fft.fftfreq(len(fft_vals))
301
+
302
+ # Find peaks
303
+ peaks, _ = find_peaks(fft_vals[:len(fft_vals)//2], height=np.max(fft_vals)*0.1)
304
+
305
+ if len(peaks) > 0:
306
+ # Get dominant frequency
307
+ dominant_freq = freqs[peaks[0]]
308
+ period = int(1 / abs(dominant_freq)) if dominant_freq != 0 else 7
309
+ else:
310
+ period = 7 # Default weekly
311
+
312
+ print(f"📊 Auto-detected period: {period}")
313
+
314
+ # Perform decomposition
315
+ if method == "stl":
316
+ # STL decomposition (more robust)
317
+ stl = STL(ts_data, seasonal=period*2+1, trend=period*4+1)
318
+ result_decomp = stl.fit()
319
+
320
+ trend = result_decomp.trend
321
+ seasonal = result_decomp.seasonal
322
+ residual = result_decomp.resid
323
+
324
+ else:
325
+ # Classical decomposition
326
+ result_decomp = seasonal_decompose(ts_data, model='additive', period=period)
327
+ trend = result_decomp.trend
328
+ seasonal = result_decomp.seasonal
329
+ residual = result_decomp.resid
330
+
331
+ # Calculate seasonality strength
332
+ var_resid = np.var(residual.dropna())
333
+ var_seasonal_resid = np.var((seasonal + residual).dropna())
334
+ seasonality_strength = 1 - (var_resid / var_seasonal_resid) if var_seasonal_resid > 0 else 0
335
+
336
+ # Calculate trend strength
337
+ var_detrended = np.var((ts_data - trend).dropna())
338
+ trend_strength = 1 - (var_resid / var_detrended) if var_detrended > 0 else 0
339
+
340
+ # Autocorrelation analysis
341
+ from statsmodels.tsa.stattools import acf
342
+ acf_values = acf(ts_data.dropna(), nlags=min(40, len(ts_data)//2))
343
+
344
+ # Create decomposition dataframe
345
+ decomp_df = pl.DataFrame({
346
+ 'time': df[time_col].to_list(),
347
+ 'original': ts_data.values,
348
+ 'trend': trend.fillna(0).values,
349
+ 'seasonal': seasonal.fillna(0).values,
350
+ 'residual': residual.fillna(0).values
351
+ })
352
+
353
+ # Save if output path provided
354
+ if output_path:
355
+ save_dataframe(decomp_df, output_path)
356
+ print(f"💾 Decomposition saved to: {output_path}")
357
+
358
+ return {
359
+ 'status': 'success',
360
+ 'method': method,
361
+ 'detected_period': period,
362
+ 'seasonality_strength': float(seasonality_strength),
363
+ 'trend_strength': float(trend_strength),
364
+ 'interpretation': {
365
+ 'seasonality': 'strong' if seasonality_strength > 0.6 else 'moderate' if seasonality_strength > 0.3 else 'weak',
366
+ 'trend': 'strong' if trend_strength > 0.6 else 'moderate' if trend_strength > 0.3 else 'weak'
367
+ },
368
+ 'autocorrelation': acf_values[:min(10, len(acf_values))].tolist(),
369
+ 'output_path': output_path
370
+ }
371
+
372
+
373
+ def create_time_series_features(
374
+ file_path: str,
375
+ time_col: str,
376
+ target_col: str,
377
+ lag_periods: Optional[List[int]] = None,
378
+ rolling_windows: Optional[List[int]] = None,
379
+ add_holiday_features: bool = True,
380
+ country: str = "US",
381
+ output_path: Optional[str] = None
382
+ ) -> Dict[str, Any]:
383
+ """
384
+ Create comprehensive time series features including lags, rolling stats, and calendar features.
385
+
386
+ Args:
387
+ file_path: Path to time series dataset
388
+ time_col: Time/date column
389
+ target_col: Target variable
390
+ lag_periods: Lag periods to create (e.g., [1, 7, 30])
391
+ rolling_windows: Rolling window sizes (e.g., [7, 14, 30])
392
+ add_holiday_features: Add holiday indicators
393
+ country: Country for holiday calendar
394
+ output_path: Path to save dataset with new features
395
+
396
+ Returns:
397
+ Dictionary with feature engineering results
398
+ """
399
+ # Load data
400
+ df = load_dataframe(file_path)
401
+ validate_dataframe(df)
402
+ validate_column_exists(df, time_col)
403
+ validate_column_exists(df, target_col)
404
+
405
+ # Sort by time
406
+ df = df.sort(time_col)
407
+
408
+ print("⏰ Creating time series features...")
409
+
410
+ # Convert to pandas for easier datetime handling
411
+ df_pd = df.to_pandas()
412
+ df_pd[time_col] = pd.to_datetime(df_pd[time_col])
413
+ df_pd = df_pd.set_index(time_col)
414
+
415
+ created_features = []
416
+
417
+ # Lag features
418
+ if lag_periods is None:
419
+ lag_periods = [1, 7, 14, 30]
420
+
421
+ for lag in lag_periods:
422
+ df_pd[f'{target_col}_lag_{lag}'] = df_pd[target_col].shift(lag)
423
+ created_features.append(f'{target_col}_lag_{lag}')
424
+
425
+ # Rolling window features
426
+ if rolling_windows is None:
427
+ rolling_windows = [7, 14, 30]
428
+
429
+ for window in rolling_windows:
430
+ df_pd[f'{target_col}_rolling_mean_{window}'] = df_pd[target_col].rolling(window=window).mean()
431
+ df_pd[f'{target_col}_rolling_std_{window}'] = df_pd[target_col].rolling(window=window).std()
432
+ df_pd[f'{target_col}_rolling_min_{window}'] = df_pd[target_col].rolling(window=window).min()
433
+ df_pd[f'{target_col}_rolling_max_{window}'] = df_pd[target_col].rolling(window=window).max()
434
+
435
+ created_features.extend([
436
+ f'{target_col}_rolling_mean_{window}',
437
+ f'{target_col}_rolling_std_{window}',
438
+ f'{target_col}_rolling_min_{window}',
439
+ f'{target_col}_rolling_max_{window}'
440
+ ])
441
+
442
+ # Exponential moving average
443
+ df_pd[f'{target_col}_ema_7'] = df_pd[target_col].ewm(span=7).mean()
444
+ df_pd[f'{target_col}_ema_30'] = df_pd[target_col].ewm(span=30).mean()
445
+ created_features.extend([f'{target_col}_ema_7', f'{target_col}_ema_30'])
446
+
447
+ # Calendar features
448
+ df_pd['year'] = df_pd.index.year
449
+ df_pd['month'] = df_pd.index.month
450
+ df_pd['day'] = df_pd.index.day
451
+ df_pd['dayofweek'] = df_pd.index.dayofweek
452
+ df_pd['dayofyear'] = df_pd.index.dayofyear
453
+ df_pd['quarter'] = df_pd.index.quarter
454
+ df_pd['is_weekend'] = (df_pd.index.dayofweek >= 5).astype(int)
455
+ df_pd['is_month_start'] = df_pd.index.is_month_start.astype(int)
456
+ df_pd['is_month_end'] = df_pd.index.is_month_end.astype(int)
457
+
458
+ # Cyclical encoding for periodic features
459
+ df_pd['month_sin'] = np.sin(2 * np.pi * df_pd['month'] / 12)
460
+ df_pd['month_cos'] = np.cos(2 * np.pi * df_pd['month'] / 12)
461
+ df_pd['day_sin'] = np.sin(2 * np.pi * df_pd['day'] / 31)
462
+ df_pd['day_cos'] = np.cos(2 * np.pi * df_pd['day'] / 31)
463
+ df_pd['dayofweek_sin'] = np.sin(2 * np.pi * df_pd['dayofweek'] / 7)
464
+ df_pd['dayofweek_cos'] = np.cos(2 * np.pi * df_pd['dayofweek'] / 7)
465
+
466
+ created_features.extend([
467
+ 'year', 'month', 'day', 'dayofweek', 'dayofyear', 'quarter',
468
+ 'is_weekend', 'is_month_start', 'is_month_end',
469
+ 'month_sin', 'month_cos', 'day_sin', 'day_cos',
470
+ 'dayofweek_sin', 'dayofweek_cos'
471
+ ])
472
+
473
+ # Holiday features
474
+ if add_holiday_features:
475
+ try:
476
+ import holidays
477
+ country_holidays = holidays.country_holidays(country)
478
+ df_pd['is_holiday'] = df_pd.index.map(lambda x: 1 if x in country_holidays else 0)
479
+
480
+ # Days until next holiday
481
+ holiday_dates = sorted([date for date in country_holidays if date >= df_pd.index.min()])
482
+ df_pd['days_to_next_holiday'] = df_pd.index.map(
483
+ lambda x: min([abs((hol - x).days) for hol in holiday_dates if hol >= x], default=365)
484
+ )
485
+
486
+ created_features.extend(['is_holiday', 'days_to_next_holiday'])
487
+ except Exception as e:
488
+ print(f"⚠️ Could not add holiday features: {str(e)}")
489
+
490
+ # Convert back to polars
491
+ df_pd = df_pd.reset_index()
492
+ df_result = pl.from_pandas(df_pd)
493
+
494
+ # Save if output path provided
495
+ if output_path:
496
+ save_dataframe(df_result, output_path)
497
+ print(f"💾 Dataset with time series features saved to: {output_path}")
498
+
499
+ return {
500
+ 'status': 'success',
501
+ 'features_created': len(created_features),
502
+ 'feature_names': created_features,
503
+ 'lag_periods': lag_periods,
504
+ 'rolling_windows': rolling_windows,
505
+ 'holiday_features_added': add_holiday_features,
506
+ 'output_path': output_path
507
+ }