PyPI - Sensor2EventLog - Versions diffs - 2.0.0__py3-none-any.whl - Mend

Sensor2EventLog 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

S2E_v2.py +1025 -0
abstraction/mt_loop.py +141 -0
config.py +46 -0
contextualization/event_log.py +339 -0
core/__init__.py +3 -0
core/pipeline.py +189 -0
evaluation/rule_analyzer.py +246 -0
features/feature_library.py +270 -0
main.py +394 -0
models/base_model.py +68 -0
models/hmm_model.py +174 -0
sensor2eventlog-2.0.0.dist-info/METADATA +53 -0
sensor2eventlog-2.0.0.dist-info/RECORD +18 -0
sensor2eventlog-2.0.0.dist-info/WHEEL +5 -0
sensor2eventlog-2.0.0.dist-info/licenses/LICENSE +21 -0
sensor2eventlog-2.0.0.dist-info/top_level.txt +10 -0
utils/__init__.py +30 -0
utils/hmm_utils.py +277 -0

core/pipeline.py ADDED Viewed

@@ -0,0 +1,189 @@
+"""
+Core pipeline orchestrator for Sensor2EventLog framework
+"""
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+from typing import Optional, Dict, Any, Union
+from abstraction.mt_loop import MachineTeachingLoop
+from contextualization.event_log import EventLog
+from features.feature_library import ModularFeatureLibrary
+from evaluation.rule_analyzer import RuleDiagnosticAnalyzer
+import config
+class Sensor2EventLogPipeline:
+    """
+    Main pipeline class for transforming sensor data to event logs.
+    This class orchestrates the entire Machine Teaching process:
+    1. Feature extraction
+    2. Model training (HMM with supervised/unsupervised modes)
+    3. Diagnostic analysis
+    4. Event log generation
+    Example:
+        >>> pipeline = Sensor2EventLogPipeline(config)
+        >>> result = pipeline.run(
+        ...     data_path="sensor_data.csv",
+        ...     feature_plan=feature_plan,
+        ...     mode="unsupervised"
+        ... )
+        >>> event_log = result['event_log']
+        >>> event_log.to_xes("output.xes")
+    """
+    def __init__(self, config_module=None):
+        """
+        Initialize the pipeline with configuration.
+        Parameters:
+        -----------
+        config_module : module, optional
+            Configuration module with all parameters. If None, uses default config
+        """
+        self.config = config_module or config
+        # Initialize components
+        self.feature_library = ModularFeatureLibrary(
+            window_sizes=self.config.FEATURE_CONFIG["window_sizes"],
+            stability_eps=self.config.FEATURE_CONFIG["stability_eps"],
+            peak_threshold=self.config.FEATURE_CONFIG["peak_threshold"]
+        )
+        self.diagnostic_analyzer = RuleDiagnosticAnalyzer(
+            coverage_threshold=self.config.DIAGNOSTIC_CONFIG["coverage_threshold"],
+            precision_threshold=self.config.DIAGNOSTIC_CONFIG["precision_threshold"],
+            explainability_threshold=self.config.DIAGNOSTIC_CONFIG["explainability_threshold"]
+        )
+        self.mt_loop = MachineTeachingLoop(
+            model_type="hmm",
+            feature_extractor=self.feature_library,
+            diagnostic_analyzer=self.diagnostic_analyzer,
+            config=self.config
+        )
+        self._scaler = StandardScaler()
+        self._fitted = False
+    def run(self,
+            data_path: str,
+            feature_plan: Dict[str, list],
+            mode: str = "unsupervised",
+            use_cip: bool = False,
+            n_unsup: Optional[int] = None,
+            random_seed: int = 42,
+            min_duration_seconds: float = 2.0,
+            return_intermediate: bool = False) -> Dict[str, Any]:
+        """
+        Run the complete pipeline.
+        Parameters:
+        -----------
+        data_path : str
+            Path to CSV data file
+        feature_plan : dict
+            Feature extraction plan with families and signals
+        mode : str
+            "supervised" or "unsupervised"
+        use_cip : bool
+            Whether to include CIP states
+        n_unsup : int
+            Number of states for unsupervised mode
+        random_seed : int
+            Random seed for reproducibility
+        min_duration_seconds : float
+            Minimum duration for filtering brief states
+        return_intermediate : bool
+            If True, returns intermediate results (features, diagnostics)
+        Returns:
+        --------
+        dict with keys:
+            - event_log: EventLog object
+            - model: trained HMM model
+            - predictions: predicted state sequences
+            - features (if return_intermediate): extracted features
+            - diagnostics (if return_intermediate): diagnostic results
+        """
+        # Load and prepare data
+        df = self._load_and_prepare_data(data_path, use_cip)
+        # Run Machine Teaching loop
+        mt_result = self.mt_loop.run(
+            df=df,
+            feature_plan=feature_plan,
+            mode=mode,
+            n_unsup=n_unsup,
+            random_seed=random_seed
+        )
+        features = mt_result['features']
+        model = mt_result['model']
+        predictions = mt_result['predictions']
+        diagnostics = mt_result['diagnostics']
+        state_mapping = mt_result['state_mapping']
+        # Generate event log
+        event_log = self._generate_event_log(
+            df, predictions, state_mapping, min_duration_seconds
+        )
+        result = {
+            'event_log': event_log,
+            'model': model,
+            'predictions': predictions
+        }
+        if return_intermediate:
+            result['features'] = features
+            result['diagnostics'] = diagnostics
+        return result
+    def _load_and_prepare_data(self, data_path: str, use_cip: bool) -> pd.DataFrame:
+        """Load and prepare data for analysis."""
+        df = pd.read_csv(data_path)
+        # Determine state list
+        state_list = self.config.PROCESS_STATES["production"].copy()
+        if use_cip:
+            state_list += self.config.PROCESS_STATES["cip"]
+        # Filter to relevant states and sort
+        df = df[df["state"].isin(state_list)].copy()
+        df.sort_values(["batch_id", "timestamp"], inplace=True)
+        return df
+    def _generate_event_log(self, df: pd.DataFrame, predictions: np.ndarray,
+                           state_mapping: Dict, min_duration_seconds: float) -> 'EventLog':
+        """Generate event log from predictions."""
+        from contextualization.event_log import create_interval_event_log_normalized
+        # Normalize timestamps
+        from utils.hmm_utils import normalize_timestamps
+        df_normalized = normalize_timestamps(df)
+        # Create event log
+        event_df = create_interval_event_log_normalized(
+            df_normalized, predictions, state_mapping
+        )
+        # Filter brief states
+        from utils.hmm_utils import filter_brief_states
+        filtered_df = filter_brief_states(event_df, min_duration_seconds)
+        # Wrap in EventLog object
+        event_log = EventLog(filtered_df)
+        # Save if paths are configured
+        if hasattr(self.config, 'PATHS'):
+            event_log.to_csv(self.config.PATHS.get("event_log", "event_log.csv"))
+            event_log.to_csv(self.config.PATHS.get("filtered_log", "filtered_log.csv"),
+                            filtered=True)
+        return event_log

evaluation/rule_analyzer.py ADDED Viewed

@@ -0,0 +1,246 @@
+"""
+Rule diagnostic analyzer for evaluating rule performance metrics
+"""
+import numpy as np
+import pandas as pd
+from typing import Dict, List, Union
+import re
+class RuleDiagnosticAnalyzer:
+    """
+    Analyzes rule performance using coverage, precision, and explainability metrics.
+    """
+    def __init__(self, coverage_threshold: float = 0.6, precision_threshold: float = 0.7,
+                 explainability_threshold: float = 0.3):
+        self.c_low = coverage_threshold
+        self.p_low = precision_threshold
+        self.epsilon_unex = explainability_threshold
+    def compute_rule_metrics(self, df: pd.DataFrame, event_features: pd.DataFrame,
+                           state_column: str = 'state') -> Dict:
+        """
+        Compute coverage, precision, and explainability metrics for all event features.
+        Parameters:
+        -----------
+        df : DataFrame with state labels
+        event_features : DataFrame containing event rule features (binary columns)
+        state_column : Column name containing state labels
+        Returns:
+        --------
+        Dict with comprehensive diagnostic results
+        """
+        results = {
+            'rule_metrics': {},
+            'state_metrics': {},
+            'recommendations': [],
+            'unexplainable_states': []
+        }
+        states = df[state_column].unique()
+        # Compute metrics for each rule-state combination
+        rule_metrics = {}
+        for rule_col in event_features.columns:
+            if rule_col.startswith('event_'):
+                rule_metrics[rule_col] = {}
+                for state in states:
+                    # Get timestamps for this state
+                    state_mask = df[state_column] == state
+                    state_timestamps = state_mask[state_mask].index
+                    if len(state_timestamps) == 0:
+                        continue
+                    # Rule activations for this state
+                    rule_activations_state = event_features.loc[state_timestamps, rule_col]
+                    # Total rule activations
+                    total_rule_activations = event_features[rule_col].sum()
+                    # Compute coverage and precision
+                    coverage = rule_activations_state.sum() / len(state_timestamps)
+                    precision = (rule_activations_state.sum() / total_rule_activations
+                               if total_rule_activations > 0 else 0)
+                    rule_metrics[rule_col][state] = {
+                        'coverage': coverage,
+                        'precision': precision,
+                        'effectiveness': np.sqrt(coverage * precision) if coverage > 0 and precision > 0 else 0
+                    }
+        results['rule_metrics'] = rule_metrics
+        # Compute state-level metrics
+        state_metrics = {}
+        for state in states:
+            state_mask = df[state_column] == state
+            state_timestamps = state_mask[state_mask].index
+            if len(state_timestamps) == 0:
+                continue
+            # Find best coverage across all rules for this state
+            best_coverage = 0
+            best_rule = None
+            for rule_col, state_metrics_dict in rule_metrics.items():
+                if state in state_metrics_dict:
+                    coverage = state_metrics_dict[state]['coverage']
+                    if coverage > best_coverage:
+                        best_coverage = coverage
+                        best_rule = rule_col
+            explainability = best_coverage
+            gap = 1 - explainability
+            state_metrics[state] = {
+                'explainability': explainability,
+                'gap': gap,
+                'best_rule': best_rule,
+                'best_coverage': best_coverage,
+                'state_frequency': len(state_timestamps) / len(df),
+                'unexplainable': explainability < self.epsilon_unex
+            }
+            if explainability < self.epsilon_unex:
+                results['unexplainable_states'].append(state)
+        results['state_metrics'] = state_metrics
+        # Generate recommendations
+        results['recommendations'] = self._generate_recommendations(rule_metrics, state_metrics)
+        return results
+    def _generate_recommendations(self, rule_metrics: Dict, state_metrics: Dict) -> List[Dict]:
+        """Generate actionable recommendations based on diagnostic metrics."""
+        recommendations = []
+        # Analyze each rule-state combination
+        for rule_col, state_dict in rule_metrics.items():
+            for state, metrics in state_dict.items():
+                coverage = metrics['coverage']
+                precision = metrics['precision']
+                # Rule categorization and recommendations
+                if coverage >= self.c_low and precision >= self.p_low:
+                    # Optimal rule - no action needed
+                    continue
+                elif coverage >= self.c_low and precision < self.p_low:
+                    # Overly sensitive rule
+                    recommendations.append({
+                        'type': 'OVERLY_SENSITIVE_RULE',
+                        'rule': rule_col,
+                        'state': state,
+                        'coverage': coverage,
+                        'precision': precision,
+                        'action': f"Rule '{rule_col}' for state '{state}' has good coverage ({coverage:.1%}) but low precision ({precision:.1%}). Add temporal stability constraints or interaction features to reduce false positives.",
+                        'priority': 'HIGH',
+                        'suggested_families': ['stability', 'interaction']
+                    })
+                elif coverage < self.c_low and precision >= self.p_low:
+                    # Overly specific rule
+                    recommendations.append({
+                        'type': 'OVERLY_SPECIFIC_RULE',
+                        'rule': rule_col,
+                        'state': state,
+                        'coverage': coverage,
+                        'precision': precision,
+                        'action': f"Rule '{rule_col}' for state '{state}' has high precision ({precision:.1%}) but low coverage ({coverage:.1%}). Relax thresholds or remove restrictive conditions.",
+                        'priority': 'MEDIUM',
+                        'suggested_families': ['temporal', 'statistical']
+                    })
+                elif coverage < self.c_low and precision < self.p_low:
+                    # Ineffective rule
+                    recommendations.append({
+                        'type': 'INEFFECTIVE_RULE',
+                        'rule': rule_col,
+                        'state': state,
+                        'coverage': coverage,
+                        'precision': precision,
+                        'action': f"Rule '{rule_col}' for state '{state}' performs poorly (coverage: {coverage:.1%}, precision: {precision:.1%}). Consider complete redesign with alternative sensor combinations.",
+                        'priority': 'HIGH',
+                        'suggested_families': ['all']
+                    })
+        # Analyze unexplainable states
+        for state, metrics in state_metrics.items():
+            if metrics['unexplainable']:
+                recommendations.append({
+                    'type': 'UNEXPLAINABLE_STATE',
+                    'state': state,
+                    'explainability': metrics['explainability'],
+                    'frequency': metrics['state_frequency'],
+                    'action': f"State '{state}' is largely unexplained (explainability: {metrics['explainability']:.1%}). Consider state decomposition, feature space expansion, or probabilistic approaches.",
+                    'priority': 'CRITICAL' if metrics['state_frequency'] > 0.1 else 'HIGH',
+                    'suggested_approaches': ['state_decomposition', 'feature_expansion', 'probabilistic_modeling']
+                })
+        # Sort by priority
+        priority_order = {'CRITICAL': 0, 'HIGH': 1, 'MEDIUM': 2, 'LOW': 3}
+        recommendations.sort(key=lambda x: priority_order[x['priority']])
+        return recommendations
+    def print_diagnostic_report(self, diagnostic_results: Dict):
+        """Print comprehensive diagnostic report."""
+        print("=" * 80)
+        print("RULE DIAGNOSTIC REPORT")
+        print("=" * 80)
+        # Rule performance summary
+        print("\n1. RULE PERFORMANCE SUMMARY:")
+        print("-" * 40)
+        rule_metrics = diagnostic_results['rule_metrics']
+        for rule_col, state_dict in rule_metrics.items():
+            print(f"\nRule: {rule_col}")
+            for state, metrics in state_dict.items():
+                print(f"  State: {state:15} | Coverage: {metrics['coverage']:6.1%} | "
+                      f"Precision: {metrics['precision']:6.1%} | "
+                      f"Effectiveness: {metrics['effectiveness']:6.1%}")
+        # State explainability
+        print("\n2. STATE EXPLAINABILITY ANALYSIS:")
+        print("-" * 40)
+        state_metrics = diagnostic_results['state_metrics']
+        for state, metrics in state_metrics.items():
+            unexplainable_flag = " ⚠ UNEXPLAINABLE" if metrics['unexplainable'] else ""
+            print(f"State: {state:15} | Explainability: {metrics['explainability']:6.1%} | "
+                  f"Best Rule: {metrics['best_rule'] or 'None'}{unexplainable_flag}")
+        # Recommendations
+        print("\n3. ACTIONABLE RECOMMENDATIONS:")
+        print("-" * 40)
+        for i, rec in enumerate(diagnostic_results['recommendations'], 1):
+            print(f"\n{i}. [{rec['priority']}] {rec['type']}")
+            print(f"   {rec['action']}")
+            if 'suggested_families' in rec:
+                print(f"   Suggested feature families: {', '.join(rec['suggested_families'])}")
+            if 'suggested_approaches' in rec:
+                print(f"   Suggested approaches: {', '.join(rec['suggested_approaches'])}")
+        # Summary statistics
+        print("\n4. SUMMARY STATISTICS:")
+        print("-" * 40)
+        total_states = len(state_metrics)
+        unexplainable_states = len(diagnostic_results['unexplainable_states'])
+        avg_explainability = np.mean([m['explainability'] for m in state_metrics.values()])
+        print(f"Total states analyzed: {total_states}")
+        print(f"Unexplainable states: {unexplainable_states} ({unexplainable_states/total_states:.1%})")
+        print(f"Average explainability: {avg_explainability:.1%}")
+        print(f"Recommendations generated: {len(diagnostic_results['recommendations'])}")