mseep-rmcp 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,239 @@
1
+ """
2
+ Time series analysis tools for RMCP.
3
+
4
+ Comprehensive time series modeling and forecasting capabilities.
5
+ """
6
+
7
+ from typing import Dict, Any
8
+ from ..registries.tools import tool
9
+ from ..core.schemas import table_schema
10
+ from ..r_integration import execute_r_script
11
+
12
+
13
+ @tool(
14
+ name="arima_model",
15
+ input_schema={
16
+ "type": "object",
17
+ "properties": {
18
+ "data": {
19
+ "type": "object",
20
+ "properties": {
21
+ "values": {"type": "array", "items": {"type": "number"}},
22
+ "dates": {"type": "array", "items": {"type": "string"}}
23
+ },
24
+ "required": ["values"]
25
+ },
26
+ "order": {
27
+ "type": "array",
28
+ "items": {"type": "integer"},
29
+ "minItems": 3,
30
+ "maxItems": 3,
31
+ "description": "ARIMA order (p, d, q)"
32
+ },
33
+ "seasonal": {
34
+ "type": "array",
35
+ "items": {"type": "integer"},
36
+ "minItems": 4,
37
+ "maxItems": 4,
38
+ "description": "Seasonal ARIMA order (P, D, Q, s)"
39
+ },
40
+ "forecast_periods": {"type": "integer", "minimum": 1, "maximum": 100, "default": 12}
41
+ },
42
+ "required": ["data"]
43
+ },
44
+ description="Fit ARIMA time series model with forecasting"
45
+ )
46
+ async def arima_model(context, params):
47
+ """Fit ARIMA model and generate forecasts."""
48
+
49
+ await context.info("Fitting ARIMA time series model")
50
+
51
+ r_script = '''
52
+ # Install required packages
53
+ if (!require(forecast)) install.packages("forecast", quietly = TRUE)
54
+ library(forecast)
55
+
56
+ # Prepare data
57
+ values <- args$data$values
58
+
59
+ # Convert to time series
60
+ if (!is.null(args$data$dates)) {
61
+ dates <- as.Date(args$data$dates)
62
+ ts_data <- ts(values, frequency = 12) # Assume monthly by default
63
+ } else {
64
+ ts_data <- ts(values, frequency = 12)
65
+ }
66
+
67
+ # Fit ARIMA model
68
+ if (!is.null(args$order)) {
69
+ if (!is.null(args$seasonal)) {
70
+ model <- Arima(ts_data, order = args$order, seasonal = args$seasonal)
71
+ } else {
72
+ model <- Arima(ts_data, order = args$order)
73
+ }
74
+ } else {
75
+ # Auto ARIMA
76
+ model <- auto.arima(ts_data)
77
+ }
78
+
79
+ # Generate forecasts
80
+ forecast_periods <- args$forecast_periods %||% 12
81
+ forecasts <- forecast(model, h = forecast_periods)
82
+
83
+ # Extract results
84
+ result <- list(
85
+ model_type = "ARIMA",
86
+ order = arimaorder(model),
87
+ coefficients = as.list(coef(model)),
88
+ aic = AIC(model),
89
+ bic = BIC(model),
90
+ loglik = logLik(model)[1],
91
+ sigma2 = model$sigma2,
92
+ fitted_values = as.numeric(fitted(model)),
93
+ residuals = as.numeric(residuals(model)),
94
+ forecasts = as.numeric(forecasts$mean),
95
+ forecast_lower = as.numeric(forecasts$lower[,2]), # 95% CI
96
+ forecast_upper = as.numeric(forecasts$upper[,2]),
97
+ accuracy = accuracy(model),
98
+ n_obs = length(values)
99
+ )
100
+ '''
101
+
102
+ try:
103
+ result = execute_r_script(r_script, params)
104
+ await context.info("ARIMA model fitted successfully",
105
+ aic=result.get("aic"),
106
+ n_obs=result.get("n_obs"))
107
+ return result
108
+
109
+ except Exception as e:
110
+ await context.error("ARIMA model fitting failed", error=str(e))
111
+ raise
112
+
113
+
114
+ @tool(
115
+ name="decompose_timeseries",
116
+ input_schema={
117
+ "type": "object",
118
+ "properties": {
119
+ "data": {
120
+ "type": "object",
121
+ "properties": {
122
+ "values": {"type": "array", "items": {"type": "number"}},
123
+ "dates": {"type": "array", "items": {"type": "string"}}
124
+ },
125
+ "required": ["values"]
126
+ },
127
+ "frequency": {"type": "integer", "minimum": 1, "default": 12},
128
+ "type": {"type": "string", "enum": ["additive", "multiplicative"], "default": "additive"}
129
+ },
130
+ "required": ["data"]
131
+ },
132
+ description="Decompose time series into trend, seasonal, and remainder components"
133
+ )
134
+ async def decompose_timeseries(context, params):
135
+ """Decompose time series into components."""
136
+
137
+ await context.info("Decomposing time series")
138
+
139
+ r_script = '''
140
+ values <- args$data$values
141
+ frequency <- args$frequency %||% 12
142
+ decomp_type <- args$type %||% "additive"
143
+
144
+ # Create time series
145
+ ts_data <- ts(values, frequency = frequency)
146
+
147
+ # Decompose
148
+ if (decomp_type == "multiplicative") {
149
+ decomp <- decompose(ts_data, type = "multiplicative")
150
+ } else {
151
+ decomp <- decompose(ts_data, type = "additive")
152
+ }
153
+
154
+ result <- list(
155
+ original = as.numeric(decomp$x),
156
+ trend = as.numeric(decomp$trend),
157
+ seasonal = as.numeric(decomp$seasonal),
158
+ remainder = as.numeric(decomp$random),
159
+ type = decomp_type,
160
+ frequency = frequency,
161
+ n_obs = length(values)
162
+ )
163
+ '''
164
+
165
+ try:
166
+ result = execute_r_script(r_script, params)
167
+ await context.info("Time series decomposed successfully")
168
+ return result
169
+
170
+ except Exception as e:
171
+ await context.error("Time series decomposition failed", error=str(e))
172
+ raise
173
+
174
+
175
+ @tool(
176
+ name="stationarity_test",
177
+ input_schema={
178
+ "type": "object",
179
+ "properties": {
180
+ "data": {
181
+ "type": "object",
182
+ "properties": {
183
+ "values": {"type": "array", "items": {"type": "number"}}
184
+ },
185
+ "required": ["values"]
186
+ },
187
+ "test": {"type": "string", "enum": ["adf", "kpss", "pp"], "default": "adf"}
188
+ },
189
+ "required": ["data"]
190
+ },
191
+ description="Test time series for stationarity (ADF, KPSS, Phillips-Perron)"
192
+ )
193
+ async def stationarity_test(context, params):
194
+ """Test time series stationarity."""
195
+
196
+ await context.info("Testing time series stationarity")
197
+
198
+ r_script = '''
199
+ if (!require(tseries)) install.packages("tseries", quietly = TRUE)
200
+ library(tseries)
201
+
202
+ values <- args$data$values
203
+ test_type <- args$test %||% "adf"
204
+
205
+ ts_data <- ts(values)
206
+
207
+ if (test_type == "adf") {
208
+ test_result <- adf.test(ts_data)
209
+ test_name <- "Augmented Dickey-Fuller"
210
+ } else if (test_type == "kpss") {
211
+ test_result <- kpss.test(ts_data)
212
+ test_name <- "KPSS"
213
+ } else if (test_type == "pp") {
214
+ test_result <- pp.test(ts_data)
215
+ test_name <- "Phillips-Perron"
216
+ }
217
+
218
+ result <- list(
219
+ test_name = test_name,
220
+ test_type = test_type,
221
+ statistic = as.numeric(test_result$statistic),
222
+ p_value = test_result$p.value,
223
+ critical_values = as.list(test_result$critical),
224
+ alternative = test_result$alternative,
225
+ is_stationary = if (test_type == "kpss") test_result$p.value > 0.05 else test_result$p.value < 0.05,
226
+ n_obs = length(values)
227
+ )
228
+ '''
229
+
230
+ try:
231
+ result = execute_r_script(r_script, params)
232
+ await context.info("Stationarity test completed",
233
+ test=result.get("test_name"),
234
+ p_value=result.get("p_value"))
235
+ return result
236
+
237
+ except Exception as e:
238
+ await context.error("Stationarity test failed", error=str(e))
239
+ raise
@@ -0,0 +1,293 @@
1
+ """
2
+ Data transformation tools for RMCP.
3
+
4
+ Essential data manipulation and cleaning capabilities.
5
+ """
6
+
7
+ from typing import Dict, Any
8
+ from ..registries.tools import tool
9
+ from ..core.schemas import table_schema
10
+ from ..r_integration import execute_r_script
11
+
12
+
13
+ @tool(
14
+ name="lag_lead",
15
+ input_schema={
16
+ "type": "object",
17
+ "properties": {
18
+ "data": table_schema(),
19
+ "variables": {"type": "array", "items": {"type": "string"}},
20
+ "lags": {"type": "array", "items": {"type": "integer"}},
21
+ "leads": {"type": "array", "items": {"type": "integer"}}
22
+ },
23
+ "required": ["data", "variables"]
24
+ },
25
+ description="Create lagged and lead variables for time series analysis"
26
+ )
27
+ async def lag_lead(context, params):
28
+ """Create lagged and lead variables."""
29
+
30
+ await context.info("Creating lag/lead variables")
31
+
32
+ r_script = '''
33
+ data <- as.data.frame(args$data)
34
+ variables <- args$variables
35
+ lags <- args$lags %||% c(1)
36
+ leads <- args$leads %||% c()
37
+
38
+ result_data <- data
39
+
40
+ # Create lagged variables
41
+ for (var in variables) {
42
+ for (lag_val in lags) {
43
+ new_var <- paste0(var, "_lag", lag_val)
44
+ result_data[[new_var]] <- c(rep(NA, lag_val), head(data[[var]], -lag_val))
45
+ }
46
+ }
47
+
48
+ # Create lead variables
49
+ for (var in variables) {
50
+ for (lead_val in leads) {
51
+ new_var <- paste0(var, "_lead", lead_val)
52
+ result_data[[new_var]] <- c(tail(data[[var]], -lead_val), rep(NA, lead_val))
53
+ }
54
+ }
55
+
56
+ result <- list(
57
+ data = result_data,
58
+ variables_created = names(result_data)[!names(result_data) %in% names(data)],
59
+ n_obs = nrow(result_data),
60
+ operation = "lag_lead"
61
+ )
62
+ '''
63
+
64
+ try:
65
+ result = execute_r_script(r_script, params)
66
+ await context.info("Lag/lead variables created successfully")
67
+ return result
68
+
69
+ except Exception as e:
70
+ await context.error("Lag/lead creation failed", error=str(e))
71
+ raise
72
+
73
+
74
+ @tool(
75
+ name="winsorize",
76
+ input_schema={
77
+ "type": "object",
78
+ "properties": {
79
+ "data": table_schema(),
80
+ "variables": {"type": "array", "items": {"type": "string"}},
81
+ "percentiles": {
82
+ "type": "array",
83
+ "items": {"type": "number", "minimum": 0, "maximum": 0.5},
84
+ "minItems": 2,
85
+ "maxItems": 2,
86
+ "default": [0.01, 0.99]
87
+ }
88
+ },
89
+ "required": ["data", "variables"]
90
+ },
91
+ description="Winsorize variables to handle outliers"
92
+ )
93
+ async def winsorize(context, params):
94
+ """Winsorize variables to handle outliers."""
95
+
96
+ await context.info("Winsorizing variables")
97
+
98
+ r_script = '''
99
+ data <- as.data.frame(args$data)
100
+ variables <- args$variables
101
+ percentiles <- args$percentiles %||% c(0.01, 0.99)
102
+
103
+ result_data <- data
104
+ outliers_summary <- list()
105
+
106
+ for (var in variables) {
107
+ original_values <- data[[var]]
108
+
109
+ # Calculate percentile thresholds
110
+ lower_threshold <- quantile(original_values, percentiles[1], na.rm = TRUE)
111
+ upper_threshold <- quantile(original_values, percentiles[2], na.rm = TRUE)
112
+
113
+ # Winsorize
114
+ winsorized <- pmax(pmin(original_values, upper_threshold), lower_threshold)
115
+ result_data[[var]] <- winsorized
116
+
117
+ # Track changes
118
+ n_lower <- sum(original_values < lower_threshold, na.rm = TRUE)
119
+ n_upper <- sum(original_values > upper_threshold, na.rm = TRUE)
120
+
121
+ outliers_summary[[var]] <- list(
122
+ lower_threshold = lower_threshold,
123
+ upper_threshold = upper_threshold,
124
+ n_capped_lower = n_lower,
125
+ n_capped_upper = n_upper,
126
+ total_capped = n_lower + n_upper
127
+ )
128
+ }
129
+
130
+ result <- list(
131
+ data = result_data,
132
+ outliers_summary = outliers_summary,
133
+ percentiles = percentiles,
134
+ variables_winsorized = variables,
135
+ n_obs = nrow(result_data)
136
+ )
137
+ '''
138
+
139
+ try:
140
+ result = execute_r_script(r_script, params)
141
+ await context.info("Variables winsorized successfully")
142
+ return result
143
+
144
+ except Exception as e:
145
+ await context.error("Winsorization failed", error=str(e))
146
+ raise
147
+
148
+
149
+ @tool(
150
+ name="difference",
151
+ input_schema={
152
+ "type": "object",
153
+ "properties": {
154
+ "data": table_schema(),
155
+ "variables": {"type": "array", "items": {"type": "string"}},
156
+ "order": {"type": "integer", "minimum": 1, "maximum": 3, "default": 1},
157
+ "log_transform": {"type": "boolean", "default": False}
158
+ },
159
+ "required": ["data", "variables"]
160
+ },
161
+ description="Compute differences of variables (for stationarity)"
162
+ )
163
+ async def difference(context, params):
164
+ """Compute differences of variables."""
165
+
166
+ await context.info("Computing variable differences")
167
+
168
+ r_script = '''
169
+ data <- as.data.frame(args$data)
170
+ variables <- args$variables
171
+ diff_order <- args$order %||% 1
172
+ log_transform <- args$log_transform %||% FALSE
173
+
174
+ result_data <- data
175
+
176
+ for (var in variables) {
177
+ original_values <- data[[var]]
178
+
179
+ # Log transform first if requested
180
+ if (log_transform) {
181
+ if (any(original_values <= 0, na.rm = TRUE)) {
182
+ stop(paste("Cannot log-transform", var, "- contains non-positive values"))
183
+ }
184
+ transformed <- log(original_values)
185
+ log_var <- paste0("log_", var)
186
+ result_data[[log_var]] <- transformed
187
+ working_values <- transformed
188
+ base_name <- log_var
189
+ } else {
190
+ working_values <- original_values
191
+ base_name <- var
192
+ }
193
+
194
+ # Compute differences
195
+ diff_values <- working_values
196
+ for (i in 1:diff_order) {
197
+ diff_values <- diff(diff_values)
198
+ diff_name <- paste0(base_name, "_diff", if (diff_order > 1) i else "")
199
+
200
+ # Pad with NA to maintain same length
201
+ padded_diff <- c(rep(NA, i), diff_values)
202
+ result_data[[diff_name]] <- padded_diff
203
+ }
204
+ }
205
+
206
+ result <- list(
207
+ data = result_data,
208
+ variables_differenced = variables,
209
+ difference_order = diff_order,
210
+ log_transformed = log_transform,
211
+ n_obs = nrow(result_data)
212
+ )
213
+ '''
214
+
215
+ try:
216
+ result = execute_r_script(r_script, params)
217
+ await context.info("Variable differences computed successfully")
218
+ return result
219
+
220
+ except Exception as e:
221
+ await context.error("Differencing failed", error=str(e))
222
+ raise
223
+
224
+
225
+ @tool(
226
+ name="standardize",
227
+ input_schema={
228
+ "type": "object",
229
+ "properties": {
230
+ "data": table_schema(),
231
+ "variables": {"type": "array", "items": {"type": "string"}},
232
+ "method": {"type": "string", "enum": ["z_score", "min_max", "robust"], "default": "z_score"}
233
+ },
234
+ "required": ["data", "variables"]
235
+ },
236
+ description="Standardize variables using z-score, min-max, or robust scaling"
237
+ )
238
+ async def standardize(context, params):
239
+ """Standardize variables."""
240
+
241
+ await context.info("Standardizing variables")
242
+
243
+ r_script = '''
244
+ data <- as.data.frame(args$data)
245
+ variables <- args$variables
246
+ method <- args$method %||% "z_score"
247
+
248
+ result_data <- data
249
+ scaling_info <- list()
250
+
251
+ for (var in variables) {
252
+ original_values <- data[[var]]
253
+
254
+ if (method == "z_score") {
255
+ mean_val <- mean(original_values, na.rm = TRUE)
256
+ sd_val <- sd(original_values, na.rm = TRUE)
257
+ scaled <- (original_values - mean_val) / sd_val
258
+ scaling_info[[var]] <- list(mean = mean_val, sd = sd_val)
259
+
260
+ } else if (method == "min_max") {
261
+ min_val <- min(original_values, na.rm = TRUE)
262
+ max_val <- max(original_values, na.rm = TRUE)
263
+ scaled <- (original_values - min_val) / (max_val - min_val)
264
+ scaling_info[[var]] <- list(min = min_val, max = max_val)
265
+
266
+ } else if (method == "robust") {
267
+ median_val <- median(original_values, na.rm = TRUE)
268
+ mad_val <- mad(original_values, na.rm = TRUE)
269
+ scaled <- (original_values - median_val) / mad_val
270
+ scaling_info[[var]] <- list(median = median_val, mad = mad_val)
271
+ }
272
+
273
+ new_var <- paste0(var, "_", method)
274
+ result_data[[new_var]] <- scaled
275
+ }
276
+
277
+ result <- list(
278
+ data = result_data,
279
+ scaling_method = method,
280
+ scaling_info = scaling_info,
281
+ variables_scaled = variables,
282
+ n_obs = nrow(result_data)
283
+ )
284
+ '''
285
+
286
+ try:
287
+ result = execute_r_script(r_script, params)
288
+ await context.info("Variables standardized successfully")
289
+ return result
290
+
291
+ except Exception as e:
292
+ await context.error("Standardization failed", error=str(e))
293
+ raise