Sensor2EventLog 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,270 @@
1
+ """
2
+ Modular feature extraction library with diagnostic capabilities
3
+ """
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import re
8
+ from typing import Dict, List
9
+ from evaluation.rule_analyzer import RuleDiagnosticAnalyzer
10
+
11
+
12
+ class ModularFeatureLibrary:
13
+ """
14
+ Modular feature extraction library supporting multiple feature families
15
+ with integrated rule diagnostics.
16
+ """
17
+
18
+ def __init__(self, window_sizes=None, stability_eps=1, peak_threshold=0.1):
19
+ self.window_sizes = window_sizes or [5]
20
+ self.stability_eps = stability_eps
21
+ self.peak_threshold = peak_threshold
22
+
23
+ # Feature family implementations
24
+ self.feature_families = {
25
+ 'statistical': self._compute_statistical_features,
26
+ 'temporal': self._compute_temporal_features,
27
+ 'stability': self._compute_stability_features,
28
+ 'interaction': self._compute_interaction_features,
29
+ 'event': self._compute_event_features,
30
+ 'contextual': self._compute_contextual_features
31
+ }
32
+
33
+ self.diagnostic_analyzer = RuleDiagnosticAnalyzer()
34
+ self._feature_cache = {}
35
+
36
+ def _normalize_rule_expr(self, expr: str) -> str:
37
+ """Convert human-friendly logical ops to pandas-style bitwise ops."""
38
+ s = expr.strip()
39
+ s = re.sub(r'\bAND\b', '&', s, flags=re.I)
40
+ s = re.sub(r'\bOR\b', '|', s, flags=re.I)
41
+ s = re.sub(r'\bNOT\b', '~', s, flags=re.I)
42
+ s = re.sub(r'\band\b', '&', s)
43
+ s = re.sub(r'\bor\b', '|', s)
44
+ s = re.sub(r'\bnot\b', '~', s)
45
+ s = re.sub(r'\s*([&|~><=!]+)\s*', r' \1 ', s)
46
+ s = re.sub(r'\s+', ' ', s)
47
+ return s.strip()
48
+
49
+ def _evaluate_rule(self, rule_expr: str, available_features: pd.DataFrame) -> pd.Series:
50
+ """Evaluate a rule expression using available features."""
51
+ normalized_expr = self._normalize_rule_expr(rule_expr)
52
+
53
+ try:
54
+ eval_env = {col: available_features[col] for col in available_features.columns}
55
+ eval_env.update({
56
+ 'np': np, 'pd': pd, 'abs': np.abs,
57
+ 'min': np.minimum, 'max': np.maximum
58
+ })
59
+
60
+ result = eval(normalized_expr, {"__builtins__": {}}, eval_env)
61
+
62
+ if isinstance(result, pd.Series):
63
+ return result.astype(bool)
64
+ else:
65
+ return pd.Series([bool(result)] * len(available_features),
66
+ index=available_features.index)
67
+
68
+ except Exception as e:
69
+ print(f"Error evaluating rule '{rule_expr}': {e}")
70
+ return pd.Series(False, index=available_features.index)
71
+
72
+ def _safe_ratio(self, a, b):
73
+ """Safe ratio calculation with log transformation."""
74
+ a_safe = np.abs(a) + 1e-6
75
+ b_safe = np.abs(b) + 1e-6
76
+ ratio = np.log1p(a_safe) - np.log1p(b_safe)
77
+ sign = np.sign(a * b)
78
+ return ratio * sign
79
+
80
+ def _compute_statistical_features(self, df, signals, **kwargs):
81
+ """Statistical features: rolling means."""
82
+ features = pd.DataFrame(index=df.index)
83
+
84
+ for signal in signals:
85
+ s = df[signal]
86
+ for win in self.window_sizes:
87
+ roll = s.rolling(win, min_periods=1)
88
+ features[f"{signal}_roll_mean_{win}"] = roll.mean()
89
+
90
+ return features
91
+
92
+ def _compute_temporal_features(self, df, signals, **kwargs):
93
+ """Temporal dynamics features: differences and rates."""
94
+ features = pd.DataFrame(index=df.index)
95
+
96
+ for signal in signals:
97
+ s = df[signal]
98
+ diff = s.diff().fillna(0)
99
+ features[f"{signal}_diff"] = diff
100
+ features[f"{signal}_diff_sign"] = np.sign(diff)
101
+ features[f"{signal}_diff_smooth"] = diff.ewm(span=5).mean()
102
+ features[f"{signal}_abs_diff"] = np.abs(diff)
103
+
104
+ return features
105
+
106
+ def _compute_stability_features(self, df, signals, **kwargs):
107
+ """Stability features: stability flags and consecutive stable periods."""
108
+ features = pd.DataFrame(index=df.index)
109
+
110
+ for signal in signals:
111
+ s = df[signal]
112
+ diff = s.diff().fillna(0)
113
+ features[f"{signal}_stability"] = 1.0 / (1.0 + np.abs(diff))
114
+ features[f"{signal}_stable_flag"] = (np.abs(diff) < self.stability_eps).astype(int)
115
+
116
+ stable_periods = (np.abs(diff) < self.stability_eps)
117
+ consecutive_stable = stable_periods.groupby((~stable_periods).cumsum()).cumsum()
118
+ features[f"{signal}_consecutive_stable"] = consecutive_stable
119
+
120
+ return features
121
+
122
+ def _compute_interaction_features(self, df, signals, **kwargs):
123
+ """Interaction features: products and ratios between signals."""
124
+ features = pd.DataFrame(index=df.index)
125
+
126
+ if len(signals) < 2:
127
+ return features
128
+
129
+ for i in range(len(signals)):
130
+ for j in range(i + 1, len(signals)):
131
+ sig1, sig2 = signals[i], signals[j]
132
+ features[f"{sig1}_x_{sig2}"] = df[sig1] * df[sig2]
133
+ features[f"{sig1}_ratio_{sig2}"] = self._safe_ratio(df[sig1], df[sig2])
134
+
135
+ return features
136
+
137
+ def _compute_event_features(self, df, signals, **kwargs):
138
+ """Event/regime features with rule-based definitions."""
139
+ features = pd.DataFrame(index=df.index)
140
+
141
+ # Create comprehensive set of available features
142
+ available_features = df.copy()
143
+
144
+ # Pre-compute derived features for all numeric columns
145
+ for signal in df.columns:
146
+ if pd.api.types.is_numeric_dtype(df[signal]):
147
+ try:
148
+ diff = df[signal].diff().fillna(0)
149
+ available_features[f"{signal}_diff"] = diff
150
+ available_features[f"{signal}_diff_smooth"] = diff.ewm(span=5).mean()
151
+ available_features[f"{signal}_abs_diff"] = np.abs(diff)
152
+ available_features[f"{signal}_stability"] = 1.0 / (1.0 + np.abs(diff))
153
+ available_features[f"{signal}_stable_flag"] = (np.abs(diff) < self.stability_eps).astype(int)
154
+ except (TypeError, ValueError) as e:
155
+ print(f"Warning: Could not compute derived features for {signal}: {e}")
156
+
157
+ # Process event definitions
158
+ rule_counter = 0
159
+ for signal_def in signals:
160
+ if isinstance(signal_def, str) and any(op in signal_def for op in ['>', '<', '==', '&', '|']):
161
+ rule_counter += 1
162
+ try:
163
+ fixed_expr = self._fix_rule_parentheses(signal_def)
164
+ rule_result = self._evaluate_rule(fixed_expr, available_features)
165
+ clean_name = re.sub(r'[^a-zA-Z0-9_]', '_', signal_def[:20])
166
+ feature_name = f"event_{clean_name}"
167
+ if feature_name in features.columns:
168
+ feature_name = f"event_{clean_name}_{rule_counter}"
169
+ features[feature_name] = rule_result.astype(int)
170
+ print(f"Created event feature: {feature_name} from rule: {signal_def}")
171
+ except Exception as e:
172
+ print(f"Error processing rule '{signal_def}': {e}")
173
+ features[f"event_rule_error_{rule_counter}"] = 0
174
+
175
+ elif isinstance(signal_def, dict):
176
+ for rule_name, rule_expr in signal_def.items():
177
+ try:
178
+ fixed_expr = self._fix_rule_parentheses(rule_expr)
179
+ rule_result = self._evaluate_rule(fixed_expr, available_features)
180
+ features[f"event_{rule_name}"] = rule_result.astype(int)
181
+ print(f"Created named event feature: event_{rule_name}")
182
+ except Exception as e:
183
+ print(f"Error processing named rule '{rule_name}': {e}")
184
+ features[f"event_{rule_name}_error"] = 0
185
+
186
+ return features
187
+
188
+ def _compute_contextual_features(self, df, signals, **kwargs):
189
+ """Contextual features: batch position and boundaries."""
190
+ features = pd.DataFrame(index=df.index)
191
+ batch_id = kwargs.get('batch_id', 'batch_id')
192
+
193
+ if batch_id in df.columns:
194
+ batch_pos = df.groupby(batch_id).cumcount()
195
+ features["batch_position"] = batch_pos / batch_pos.groupby(df[batch_id]).transform('max')
196
+ features["is_batch_start"] = (batch_pos == 0).astype(int)
197
+ features["is_batch_end"] = (batch_pos == batch_pos.groupby(df[batch_id]).transform('max')).astype(int)
198
+
199
+ return features
200
+
201
+ def _fix_rule_parentheses(self, expr: str) -> str:
202
+ """Add parentheses around comparison operations to avoid ambiguous truth values."""
203
+ normalized = self._normalize_rule_expr(expr)
204
+ parts = re.split(r'(\s*[&|]\s*)', normalized)
205
+
206
+ if len(parts) == 1:
207
+ return normalized
208
+
209
+ result_parts = []
210
+ for part in parts:
211
+ if part.strip() in ['&', '|']:
212
+ result_parts.append(part)
213
+ else:
214
+ if any(op in part for op in ['>', '<', '==', '!=', '>=', '<=']):
215
+ result_parts.append(f'({part})')
216
+ else:
217
+ result_parts.append(part)
218
+
219
+ return ''.join(result_parts)
220
+
221
+ def compute_features(self, df, feature_plan: Dict[str, List[str]]):
222
+ """Compute features based on a feature plan."""
223
+ all_features = pd.DataFrame(index=df.index)
224
+
225
+ for family, signals in feature_plan.items():
226
+ if family not in self.feature_families:
227
+ print(f"Warning: Unknown feature family '{family}'")
228
+ continue
229
+
230
+ if family == 'interaction':
231
+ for signal_pair in signals:
232
+ if len(signal_pair) == 2:
233
+ family_features = self.feature_families[family](df, signal_pair)
234
+ all_features = pd.concat([all_features, family_features], axis=1)
235
+ else:
236
+ family_features = self.feature_families[family](df, signals)
237
+ all_features = pd.concat([all_features, family_features], axis=1)
238
+
239
+ return all_features.fillna(0)
240
+
241
+ def analyze_rule_performance(self, df: pd.DataFrame, feature_plan: Dict[str, List[str]]) -> Dict:
242
+ """
243
+ Compute features and analyze rule performance.
244
+
245
+ Parameters:
246
+ -----------
247
+ df : Input data with sensor signals and state labels
248
+ feature_plan : Feature plan including event rules
249
+
250
+ Returns:
251
+ --------
252
+ Dict with features and diagnostic results
253
+ """
254
+ # Compute features
255
+ features = self.compute_features(df, feature_plan)
256
+
257
+ # Extract event features for analysis
258
+ event_features = features[[col for col in features.columns if col.startswith('event_')]]
259
+
260
+ if event_features.empty:
261
+ print("No event features found for analysis")
262
+ return {'features': features, 'diagnostics': None}
263
+
264
+ # Run diagnostic analysis
265
+ diagnostic_results = self.diagnostic_analyzer.compute_rule_metrics(df, event_features)
266
+
267
+ return {
268
+ 'features': features,
269
+ 'diagnostics': diagnostic_results
270
+ }
main.py ADDED
@@ -0,0 +1,394 @@
1
+ """
2
+ Main analysis pipeline for HMM process analyzer
3
+ """
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from hmmlearn.hmm import GaussianHMM
8
+ from sklearn.preprocessing import StandardScaler
9
+ from scipy.optimize import linear_sum_assignment
10
+
11
+ from features.feature_library import ModularFeatureLibrary
12
+ from utils.hmm_utils import (
13
+ empirical_start_trans, emissions_from_labels, viterbi_decode,
14
+ print_evaluation, normalize_timestamps, create_interval_event_log_normalized,
15
+ filter_brief_states, create_gantt_chart
16
+ )
17
+ import config
18
+
19
+ import sys
20
+ import io
21
+ import os
22
+ # --- capture print output to save to results.txt ---
23
+
24
+ class _Tee:
25
+ """Write to multiple streams at once (e.g., console + file buffer)."""
26
+ def __init__(self, *streams):
27
+ self.streams = streams
28
+ def write(self, data):
29
+ for s in self.streams:
30
+ s.write(data)
31
+ def flush(self):
32
+ for s in self.streams:
33
+ s.flush()
34
+
35
+
36
+ def analyze_process(data_path, feature_plan, mode="unsupervised",
37
+ use_cip=False, n_unsup=None, random_seed=42,
38
+ results_txt_path=None):
39
+ if results_txt_path is None:
40
+ script_dir = os.path.dirname(os.path.abspath(__file__))
41
+ results_txt_path = os.path.join(script_dir, "results.txt")
42
+ buffer = io.StringIO()
43
+ original_stdout = sys.stdout
44
+ sys.stdout = _Tee(original_stdout, buffer)
45
+ """
46
+ Main analysis pipeline for process data.
47
+
48
+ Parameters:
49
+ -----------
50
+ data_path : str
51
+ Path to CSV data file
52
+ feature_plan : dict
53
+ Feature extraction plan
54
+ mode : str
55
+ "supervised" or "unsupervised"
56
+ use_cip : bool
57
+ Whether to include CIP states
58
+ n_unsup : int
59
+ Number of states for unsupervised mode
60
+ random_seed : int
61
+ Random seed for reproducibility
62
+
63
+ Returns:
64
+ --------
65
+ dict with analysis results
66
+ """
67
+
68
+ try:
69
+ # Load and prepare data
70
+ df = load_and_prepare_data(data_path, use_cip)
71
+
72
+ # Initialize feature library
73
+ feature_lib = ModularFeatureLibrary(
74
+ window_sizes=config.FEATURE_CONFIG["window_sizes"],
75
+ stability_eps=config.FEATURE_CONFIG["stability_eps"],
76
+ peak_threshold=config.FEATURE_CONFIG["peak_threshold"]
77
+ )
78
+
79
+ # Compute features and analyze rule performance
80
+ print("Computing features and analyzing rule performance...")
81
+ result = feature_lib.analyze_rule_performance(df, feature_plan)
82
+ all_features = result['features']
83
+ diagnostics = result['diagnostics']
84
+
85
+ # Print diagnostic report
86
+ if diagnostics:
87
+ feature_lib.diagnostic_analyzer.print_diagnostic_report(diagnostics)
88
+
89
+ # Prepare features for HMM
90
+ event_features = all_features[[col for col in all_features.columns if col.startswith('event_')]]
91
+ important_raw_features = ['T_roll_mean_5', 'Q_in_roll_mean_5', 'Q_out_roll_mean_5', 'T_diff']
92
+
93
+ # Combine features
94
+ features = pd.concat([all_features[important_raw_features], event_features], axis=1)
95
+
96
+ # Split data into train/test
97
+ train_features, test_features, df_train, df_test = split_train_test(df, features)
98
+
99
+ # Scale features
100
+ scaler = StandardScaler()
101
+ X_train_scaled = scaler.fit_transform(train_features)
102
+ X_test_scaled = scaler.transform(test_features)
103
+
104
+ # Pack sequences for HMM
105
+ X_train_np, lengths_train = pack_sequences(df_train, X_train_scaled)
106
+ X_test_np, lengths_test = pack_sequences(df_test, X_test_scaled)
107
+
108
+ # Create state mappings
109
+ state_list = get_state_list(use_cip, n_unsup, mode)
110
+ state_to_idx, idx_to_state = create_state_mappings(state_list)
111
+
112
+ # Train and evaluate HMM
113
+ if mode.lower() == "supervised":
114
+ results = train_supervised_hmm(
115
+ X_train_np, lengths_train, X_test_np, lengths_test,
116
+ df_train, df_test, state_to_idx, idx_to_state, state_list
117
+ )
118
+ else:
119
+ results = train_unsupervised_hmm(
120
+ X_train_np, lengths_train, X_test_np, lengths_test,
121
+ df_train, df_test, state_to_idx, idx_to_state, state_list, n_unsup
122
+ )
123
+
124
+ # Generate event log
125
+ print("\nGenerating event log...")
126
+ event_log = generate_event_log(df, results['model'], results['mapping'],
127
+ results['test_predictions'], scaler, features)
128
+
129
+ # Create visualizations
130
+ print("\nCreating visualizations...")
131
+ create_visualizations(event_log)
132
+
133
+ return {
134
+ 'model': results['model'],
135
+ 'features': features,
136
+ 'event_log': event_log,
137
+ 'diagnostics': diagnostics,
138
+ 'predictions': results['test_predictions']
139
+ }
140
+ finally:
141
+ # --- always restore stdout and save the captured text ---
142
+ sys.stdout = original_stdout
143
+ with open(results_txt_path, "w", encoding="utf-8") as f:
144
+ f.write(buffer.getvalue())
145
+ print(f"Results text saved to: {results_txt_path}")
146
+
147
+
148
+ def load_and_prepare_data(data_path, use_cip):
149
+ """Load and prepare data for analysis."""
150
+ df = pd.read_csv(data_path)
151
+
152
+ # Determine state list
153
+ state_list = config.PROCESS_STATES["production"]
154
+ if use_cip:
155
+ state_list += config.PROCESS_STATES["cip"]
156
+
157
+ # Filter to relevant states and sort
158
+ df = df[df["state"].isin(state_list)].copy()
159
+ df.sort_values(["batch_id", "timestamp"], inplace=True)
160
+
161
+ return df
162
+
163
+
164
+ def split_train_test(df, features, train_ratio=0.6):
165
+ """Split data into training and testing sets."""
166
+ batch_ids = df["batch_id"].unique()
167
+ n_train = max(1, int(train_ratio * len(batch_ids)))
168
+
169
+ train_batch_ids = set(batch_ids[:n_train])
170
+ test_batch_ids = set(batch_ids[n_train:])
171
+
172
+ # Split features
173
+ X_train = features[df["batch_id"].isin(train_batch_ids)]
174
+ X_test = features[df["batch_id"].isin(test_batch_ids)]
175
+
176
+ # Split labels
177
+ df_train = df[df["batch_id"].isin(train_batch_ids)]
178
+ df_test = df[df["batch_id"].isin(test_batch_ids)]
179
+
180
+ return X_train, X_test, df_train, df_test
181
+
182
+
183
+ def pack_sequences(df_subset, X_subset):
184
+ """Pack sequences for HMM training."""
185
+ lengths = df_subset.groupby("batch_id").size().tolist()
186
+ if isinstance(X_subset, pd.DataFrame):
187
+ X_subset = X_subset.values
188
+ return X_subset, lengths
189
+
190
+
191
+ def get_state_list(use_cip, n_unsup, mode):
192
+ """Get the list of states based on configuration."""
193
+ state_list = config.PROCESS_STATES["production"]
194
+ if use_cip:
195
+ state_list += config.PROCESS_STATES["cip"]
196
+
197
+ if mode.lower() == "unsupervised" and n_unsup is not None:
198
+ n_states = n_unsup
199
+ else:
200
+ n_states = len(state_list)
201
+
202
+ return state_list
203
+
204
+
205
+ def create_state_mappings(state_list):
206
+ """Create mappings between state names and indices."""
207
+ state_to_idx = {s: i for i, s in enumerate(state_list)}
208
+ idx_to_state = {i: s for s, i in state_to_idx.items()}
209
+ return state_to_idx, idx_to_state
210
+
211
+
212
+ def train_supervised_hmm(X_train_np, lengths_train, X_test_np, lengths_test,
213
+ df_train, df_test, state_to_idx, idx_to_state, state_list):
214
+ """Train and evaluate supervised HMM."""
215
+ print("\nTraining supervised HMM...")
216
+
217
+ # Convert labels to indices
218
+ y_train_idx = df_train["state"].map(state_to_idx).values
219
+ y_test_idx = df_test["state"].map(state_to_idx).values
220
+
221
+ # Initialize from labels
222
+ startprob_, transmat_ = empirical_start_trans(y_train_idx, lengths_train, len(state_list))
223
+ means_, covars_ = emissions_from_labels(X_train_np, y_train_idx, len(state_list))
224
+
225
+ # Build and fit HMM
226
+ hmm = GaussianHMM(
227
+ n_components=len(state_list),
228
+ covariance_type="full",
229
+ n_iter=30,
230
+ init_params="",
231
+ random_state=config.HMM_CONFIG["random_seed"],
232
+ tol=config.HMM_CONFIG["tol"],
233
+ verbose=False
234
+ )
235
+ hmm.startprob_ = startprob_
236
+ hmm.transmat_ = transmat_
237
+ hmm.means_ = means_
238
+ hmm.covars_ = covars_
239
+
240
+ hmm.fit(X_train_np, lengths_train)
241
+
242
+ # Decode and evaluate
243
+ y_pred_test = viterbi_decode(hmm, X_test_np, lengths_test)
244
+ print_evaluation(y_test_idx, y_pred_test, idx_to_state, state_list,
245
+ title="Supervised HMM (Test)")
246
+
247
+ return {
248
+ 'model': hmm,
249
+ 'mapping': idx_to_state,
250
+ 'test_predictions': y_pred_test
251
+ }
252
+
253
+
254
+ def train_unsupervised_hmm(X_train_np, lengths_train, X_test_np, lengths_test,
255
+ df_train, df_test, state_to_idx, idx_to_state, state_list, n_unsup):
256
+ """Train and evaluate unsupervised HMM with state mapping."""
257
+ print("\nTraining unsupervised HMM...")
258
+
259
+ n_states = n_unsup if n_unsup is not None else len(state_list)
260
+
261
+ # Train unsupervised HMM
262
+ hmm = GaussianHMM(
263
+ n_components=n_states,
264
+ covariance_type=config.HMM_CONFIG["covariance_type"],
265
+ n_iter=config.HMM_CONFIG["n_iter"],
266
+ random_state=config.HMM_CONFIG["random_seed"],
267
+ tol=config.HMM_CONFIG["tol"],
268
+ init_params="stmc",
269
+ params="stmc"
270
+ )
271
+ hmm.fit(X_train_np, lengths_train)
272
+
273
+ # Predict and map states
274
+ y_train_true = df_train["state"].map(state_to_idx).values
275
+ y_train_hat = viterbi_decode(hmm, X_train_np, lengths_train)
276
+
277
+ # Build contingency matrix for mapping
278
+ K = len(state_list)
279
+ cont = np.zeros((K, K), dtype=int)
280
+ for t, p in zip(y_train_true, y_train_hat):
281
+ if t < K and p < K:
282
+ cont[t, p] += 1
283
+
284
+ # Optimal mapping using Hungarian algorithm
285
+ row_ind, col_ind = linear_sum_assignment(cont.max() - cont)
286
+ mapping = {pred: true for true, pred in zip(row_ind, col_ind)}
287
+
288
+ # Decode test set and map states
289
+ y_test_hat = viterbi_decode(hmm, X_test_np, lengths_test)
290
+ y_test_mapped = np.array([mapping.get(s, 0) for s in y_test_hat], dtype=int)
291
+ y_test_true = df_test["state"].map(state_to_idx).values
292
+
293
+ print_evaluation(y_test_true, y_test_mapped, idx_to_state, state_list,
294
+ title="Unsupervised HMM (mapped) — Test")
295
+
296
+ # Create full mapping for event log
297
+ state_mapping = {pred: idx_to_state[true] for pred, true in mapping.items() if true in idx_to_state}
298
+
299
+ return {
300
+ 'model': hmm,
301
+ 'mapping': state_mapping,
302
+ 'test_predictions': y_test_hat
303
+ }
304
+
305
+
306
+ def generate_event_log(df, hmm, state_mapping, predictions, scaler, features):
307
+ """Generate event log from HMM predictions."""
308
+ # Normalize timestamps
309
+ df_normalized = normalize_timestamps(df)
310
+
311
+ # Prepare test features
312
+ X_test_scaled = scaler.transform(features)
313
+ X_test_np, lengths_test = pack_sequences(df, X_test_scaled)
314
+
315
+ # Get predictions for full dataset
316
+ y_pred_full = viterbi_decode(hmm, X_test_np, lengths_test)
317
+
318
+ # Create event log
319
+ event_log = create_interval_event_log_normalized(
320
+ df_normalized, y_pred_full, state_mapping
321
+ )
322
+
323
+ # Filter brief states
324
+ filtered_log = filter_brief_states(
325
+ event_log,
326
+ min_duration_seconds=2.0
327
+ )
328
+
329
+ # Save logs
330
+ event_log.to_csv(config.PATHS["event_log"], index=False)
331
+ filtered_log.to_csv(config.PATHS["filtered_log"], index=False)
332
+
333
+ print(f"Event log saved to: {config.PATHS['event_log']}")
334
+ print(f"Filtered event log saved to: {config.PATHS['filtered_log']}")
335
+
336
+ return filtered_log
337
+
338
+
339
+ def create_visualizations(event_log):
340
+ """Create visualization of process execution."""
341
+ # Create Gantt chart
342
+ gantt_chart = create_gantt_chart(
343
+ event_log,
344
+ max_cases=10,
345
+ figsize=config.VISUALIZATION_CONFIG["gantt_figsize"],
346
+ color_map=config.VISUALIZATION_CONFIG["colors"]
347
+ )
348
+
349
+ # Save and show
350
+ gantt_chart.savefig(config.PATHS["gantt_chart"], dpi=300, bbox_inches='tight')
351
+ gantt_chart.show()
352
+
353
+ print(f"Gantt chart saved to: {config.PATHS['gantt_chart']}")
354
+
355
+
356
+ if __name__ == "__main__":
357
+ # Example feature plan
358
+ feature_plan = {
359
+ 'statistical': ['T', 'Q_in','Q_out'],
360
+ 'temporal': ['T', 'Q_in','Q_out'],
361
+ 'stability': ['T', 'Q_in','Q_out'],
362
+ 'interaction': [['T', 'Q_in','Q_out']],
363
+ 'event': [
364
+ '(T_diff_smooth < -1)',
365
+
366
+ # Fill rules
367
+ '(Q_in > 0.2) AND (Q_out < 0.05)',
368
+ '(Q_in > 0.2) AND (abs(T_diff) < 0.5) AND (Q_out < 0.05)',
369
+
370
+ # Hold rules
371
+ '(abs(T_diff) < 0.3) AND (T > 70)',
372
+ '(abs(T_diff) < 0.2) AND (Q_in < 0.1) AND (Q_out < 0.1) AND (T > 70)',
373
+
374
+ # Discharge rules
375
+ '(Q_out > 0.2)',
376
+ '(Q_out > 0.3)',
377
+
378
+ # Idle rules
379
+ '(Q_in < 0.05) AND (Q_out < 0.05) AND (abs(T_diff) < 0.2)',
380
+ '(T < 15) AND (Q_in < 0.05) AND (Q_out < 0.05)',
381
+ '(Q_in < 0.05) AND (Q_out < 0.05) AND (abs(T_diff) < 0.2) AND (T < 15)'
382
+ ],
383
+ 'contextual': []
384
+ }
385
+
386
+ # Run analysis
387
+ results = analyze_process(
388
+ data_path="synthetic_pasteurization_with_cip_signals_100.csv",
389
+ feature_plan=feature_plan,
390
+ mode="unsupervised",
391
+ use_cip=False,
392
+ n_unsup=None,
393
+ random_seed=42
394
+ )