seq-hybrid-detector 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,61 @@
1
+ Metadata-Version: 2.4
2
+ Name: seq_hybrid_detector
3
+ Version: 0.1.0
4
+ Summary: A sequential hybrid anomaly detection framework combining PyTorch GRUs and Isolation Forests.
5
+ Author-email: Your Name <your.email@example.com>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
10
+ Requires-Python: >=3.8
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: numpy>=1.20.0
14
+ Requires-Dist: pandas>=1.3.0
15
+ Requires-Dist: torch>=2.0.0
16
+ Requires-Dist: scikit-learn>=1.0.0
17
+ Requires-Dist: joblib>=1.1.0
18
+ Requires-Dist: matplotlib>=3.4.0
19
+ Requires-Dist: seaborn>=0.11.0
20
+ Dynamic: license-file
21
+
22
+ # seq_hybrid_detector
23
+
24
+ `seq_hybrid_detector` is a small Python package scaffold for sequence hybrid detection workflows.
25
+
26
+ ## What is included
27
+
28
+ The repository currently provides a clean `src/` layout with starter modules for loading data, defining core models, and wiring a simple pipeline.
29
+
30
+ ## Project layout
31
+
32
+ ```text
33
+ seq_hybrid_detector/
34
+ ├── LICENSE
35
+ ├── README.md
36
+ ├── pyproject.toml
37
+ └── src/
38
+ └── seq_hybrid_detector/
39
+ ├── __init__.py
40
+ ├── data_engine.py
41
+ ├── models.py
42
+ └── pipeline.py
43
+ ```
44
+
45
+ ## Installation
46
+
47
+ From the project root, install the package in editable mode during development:
48
+
49
+ ```bash
50
+ pip install -e .
51
+ ```
52
+
53
+ ## Modules
54
+
55
+ - `data_engine.py` contains basic sequence loading and normalization helpers.
56
+ - `models.py` defines lightweight dataclasses for input samples and predictions.
57
+ - `pipeline.py` provides a starter pipeline class for fitting and predicting.
58
+
59
+ ## Status
60
+
61
+ This is a scaffold, not a finished detector. The modules are in place for you to extend with real preprocessing, model training, and scoring logic.
@@ -0,0 +1,40 @@
1
+ # seq_hybrid_detector
2
+
3
+ `seq_hybrid_detector` is a small Python package scaffold for sequence hybrid detection workflows.
4
+
5
+ ## What is included
6
+
7
+ The repository currently provides a clean `src/` layout with starter modules for loading data, defining core models, and wiring a simple pipeline.
8
+
9
+ ## Project layout
10
+
11
+ ```text
12
+ seq_hybrid_detector/
13
+ ├── LICENSE
14
+ ├── README.md
15
+ ├── pyproject.toml
16
+ └── src/
17
+ └── seq_hybrid_detector/
18
+ ├── __init__.py
19
+ ├── data_engine.py
20
+ ├── models.py
21
+ └── pipeline.py
22
+ ```
23
+
24
+ ## Installation
25
+
26
+ From the project root, install the package in editable mode during development:
27
+
28
+ ```bash
29
+ pip install -e .
30
+ ```
31
+
32
+ ## Modules
33
+
34
+ - `data_engine.py` contains basic sequence loading and normalization helpers.
35
+ - `models.py` defines lightweight dataclasses for input samples and predictions.
36
+ - `pipeline.py` provides a starter pipeline class for fitting and predicting.
37
+
38
+ ## Status
39
+
40
+ This is a scaffold, not a finished detector. The modules are in place for you to extend with real preprocessing, model training, and scoring logic.
@@ -0,0 +1,31 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "seq_hybrid_detector"
7
+ version = "0.1.0"
8
+ authors = [
9
+ { name="Your Name", email="your.email@example.com" },
10
+ ]
11
+ description = "A sequential hybrid anomaly detection framework combining PyTorch GRUs and Isolation Forests."
12
+ readme = "README.md"
13
+ requires-python = ">=3.8"
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ "Topic :: Scientific/Engineering :: Artificial Intelligence"
19
+ ]
20
+ dependencies = [
21
+ "numpy>=1.20.0",
22
+ "pandas>=1.3.0",
23
+ "torch>=2.0.0",
24
+ "scikit-learn>=1.0.0",
25
+ "joblib>=1.1.0",
26
+ "matplotlib>=3.4.0",
27
+ "seaborn>=0.11.0"
28
+ ]
29
+
30
+ [tool.setuptools.packages.find]
31
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,6 @@
1
+ from .data_engine import load_and_clean_data
2
+ from .models import GRUFeatureExtractor
3
+ from .pipeline import CascadedIoTDetector
4
+
5
+ __version__ = "0.1.0"
6
+ __all__ = ["load_and_clean_data", "GRUFeatureExtractor", "CascadedIoTDetector"]
@@ -0,0 +1,58 @@
1
+ import os
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ def load_and_clean_data(data_path='./datasets/', files=None, samples_per_file=10000, correlation_threshold=0.85):
6
+ """
7
+ Loads raw tabular datasets, applies metadata filtering, handles encoding,
8
+ and drops highly correlated variables.
9
+ """
10
+ if files is None:
11
+ files = {
12
+ 'b5': 'benign_samples_5sec.csv',
13
+ 'b10': 'benign_samples_10sec.csv'
14
+ }
15
+
16
+ dfs = []
17
+ for key, fname in files.items():
18
+ path = os.path.join(data_path, fname)
19
+ if not os.path.exists(path):
20
+ continue
21
+ temp_df = pd.read_csv(path)
22
+ temp_df = temp_df.sample(n=min(samples_per_file, len(temp_df)), random_state=42)
23
+ temp_df["label"] = 0
24
+ dfs.append(temp_df)
25
+
26
+ if not dfs:
27
+ raise FileNotFoundError(f"No valid dataset files found within: {data_path}")
28
+
29
+ full_df = pd.concat(dfs, ignore_index=True).sample(frac=1, random_state=42).reset_index(drop=True)
30
+
31
+ # Metadata Filtering Block
32
+ drop_cols = ['device_name', 'device_mac', 'device_id', 'ip', 'timestamp']
33
+ existing_drop_cols = [col for col in drop_cols if col in full_df.columns]
34
+ if existing_drop_cols:
35
+ print(f">>> Dropping metadata identifiers: {existing_drop_cols}")
36
+ full_df = full_df.drop(columns=existing_drop_cols)
37
+
38
+ # Encode non-numeric/categorical variables
39
+ for col in full_df.select_dtypes(include=['object', 'category']).columns:
40
+ if col != "label":
41
+ full_df[col] = full_df[col].astype("category").cat.codes
42
+
43
+ # Correlation Filtering Block
44
+ feature_df = full_df.drop(columns=["label"], errors="ignore")
45
+ corr_matrix = feature_df.corr().abs()
46
+ upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
47
+ to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > correlation_threshold)]
48
+
49
+ if to_drop:
50
+ print(f">>> Dropping highly correlated redundant features (|r| > {correlation_threshold}): {to_drop}")
51
+ full_df = full_df.drop(columns=to_drop)
52
+
53
+ # Drop zero variance columns
54
+ feature_cols = [col for col in full_df.columns if col != "label"]
55
+ non_zero_var_cols = [col for col in feature_cols if (full_df[col] != full_df[col].iloc[0]).any()]
56
+ full_df = full_df[non_zero_var_cols + ["label"]]
57
+
58
+ return full_df
@@ -0,0 +1,19 @@
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class GRUFeatureExtractor(nn.Module):
5
+ def __init__(self, input_dim, hidden_dim=32, num_layers=1):
6
+ super().__init__()
7
+ self.gru = nn.GRU(
8
+ input_size=input_dim,
9
+ hidden_size=hidden_dim,
10
+ num_layers=num_layers,
11
+ batch_first=True
12
+ )
13
+ self.decoder = nn.Linear(hidden_dim, input_dim)
14
+
15
+ def forward(self, x):
16
+ out, h_n = self.gru(x)
17
+ hidden_context = h_n[-1]
18
+ reconstructed = self.decoder(hidden_context).unsqueeze(1).repeat(1, x.size(1), 1)
19
+ return reconstructed, hidden_context
@@ -0,0 +1,121 @@
1
+ import os
2
+ import joblib
3
+ import torch
4
+ import torch.nn as nn
5
+ import numpy as np
6
+ from torch.utils.data import DataLoader, TensorDataset
7
+ from sklearn.ensemble import IsolationForest
8
+ from .models import GRUFeatureExtractor
9
+
10
+ class CascadedIoTDetector:
11
+ def __init__(self, input_dim, hidden_dim=32, num_layers=1, window_size=8, contamination=0.01, n_estimators=300):
12
+ self.input_dim = input_dim
13
+ self.window_size = window_size
14
+ self.hidden_dim = hidden_dim
15
+ self.num_layers = num_layers
16
+
17
+ self.feature_extractor = GRUFeatureExtractor(input_dim, hidden_dim, num_layers)
18
+ self.anomaly_classifier = IsolationForest(
19
+ n_estimators=n_estimators,
20
+ max_samples=0.8,
21
+ contamination=contamination,
22
+ random_state=42,
23
+ n_jobs=-1
24
+ )
25
+
26
+ def create_sliding_windows(self, data):
27
+ sequences = []
28
+ for i in range(len(data) - self.window_size + 1):
29
+ sequences.append(data[i : i + self.window_size])
30
+ return np.array(sequences)
31
+
32
+ def fit_backbone(self, X_train_scaled, epochs=20, batch_size=256, lr=0.002):
33
+ X_seq = self.create_sliding_windows(X_train_scaled)
34
+ dataset = TensorDataset(torch.FloatTensor(X_seq))
35
+ dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
36
+
37
+ optimizer = torch.optim.RMSprop(self.feature_extractor.parameters(), lr=lr, alpha=0.99, eps=1e-08)
38
+ criterion = nn.MSELoss()
39
+
40
+ print(">>> Beginning GRU Feature Extractor Training Pipeline...")
41
+ self.feature_extractor.train()
42
+ for epoch in range(epochs):
43
+ total_loss = 0
44
+ for batch in dataloader:
45
+ inputs = batch[0]
46
+ optimizer.zero_grad()
47
+ reconstructed, _ = self.feature_extractor(inputs)
48
+ loss = criterion(reconstructed, inputs)
49
+ loss.backward()
50
+ optimizer.step()
51
+ total_loss += loss.item() * inputs.size(0)
52
+
53
+ if (epoch + 1) % 5 == 0 or epoch == 0:
54
+ print(f" Epoch {epoch+1:02d}/{epochs:02d} | Sequence Reconstruction Loss: {total_loss/len(dataset):.6f}")
55
+
56
+ def extract_fused_features(self, X_scaled):
57
+ X_seq = self.create_sliding_windows(X_scaled)
58
+ self.feature_extractor.eval()
59
+ with torch.no_grad():
60
+ _, hidden_embeddings = self.feature_extractor(torch.FloatTensor(X_seq))
61
+ hidden_embeddings = hidden_embeddings.numpy()
62
+
63
+ point_features = X_scaled[self.window_size - 1 :]
64
+ fused_features = np.hstack((point_features, hidden_embeddings))
65
+ return fused_features, hidden_embeddings
66
+
67
+ def fit_ensemble(self, X_train_scaled):
68
+ print("\n>>> Extracting Combined Temporal-Point Feature Ensembles...")
69
+ fused_train, _ = self.extract_fused_features(X_train_scaled)
70
+ print(">>> Fitting Isolation Forest Engine...")
71
+ self.anomaly_classifier.fit(fused_train)
72
+
73
+ def compute_anomaly_scores(self, X_test_scaled):
74
+ fused_features, hidden_embeddings = self.extract_fused_features(X_test_scaled)
75
+ scores = -self.anomaly_classifier.decision_function(fused_features)
76
+ pad_length = self.window_size - 1
77
+ padded_scores = np.concatenate([np.repeat(scores[0], pad_length), scores])
78
+ return padded_scores, hidden_embeddings
79
+
80
+ def export_model(self, export_dir="exported_pipeline"):
81
+ """
82
+ Saves both the PyTorch weights and the Sklearn Isolation Forest model.
83
+ """
84
+ os.makedirs(export_dir, exist_ok=True)
85
+
86
+ # Save structural metadata config
87
+ config = {
88
+ "input_dim": self.input_dim,
89
+ "hidden_dim": self.hidden_dim,
90
+ "num_layers": self.num_layers,
91
+ "window_size": self.window_size
92
+ }
93
+ joblib.dump(config, os.path.join(export_dir, "config.pkl"))
94
+
95
+ # Save Backbones
96
+ torch.save(self.feature_extractor.state_dict(), os.path.join(export_dir, "gru_backbone.pt"))
97
+ joblib.dump(self.anomaly_classifier, os.path.join(export_dir, "isolation_forest.pkl"))
98
+ print(f">>> Successfully exported full model pipeline components to '{export_dir}/'")
99
+
100
+ @classmethod
101
+ def load_model(cls, export_dir="exported_pipeline"):
102
+ """
103
+ Loads components and reconstructs an instance of CascadedIoTDetector.
104
+ """
105
+ config = joblib.load(os.path.join(export_dir, "config.pkl"))
106
+
107
+ # Instantiate object with saved dimensions
108
+ instance = cls(
109
+ input_dim=config["input_dim"],
110
+ hidden_dim=config["hidden_dim"],
111
+ num_layers=config["num_layers"],
112
+ window_size=config["window_size"]
113
+ )
114
+
115
+ # Load states
116
+ instance.feature_extractor.load_state_dict(torch.load(os.path.join(export_dir, "gru_backbone.pt")))
117
+ instance.feature_extractor.eval()
118
+ instance.anomaly_classifier = joblib.load(os.path.join(export_dir, "isolation_forest.pkl"))
119
+
120
+ print(f">>> Successfully loaded framework artifacts from '{export_dir}/'")
121
+ return instance
@@ -0,0 +1,61 @@
1
+ Metadata-Version: 2.4
2
+ Name: seq_hybrid_detector
3
+ Version: 0.1.0
4
+ Summary: A sequential hybrid anomaly detection framework combining PyTorch GRUs and Isolation Forests.
5
+ Author-email: Your Name <your.email@example.com>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
10
+ Requires-Python: >=3.8
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Requires-Dist: numpy>=1.20.0
14
+ Requires-Dist: pandas>=1.3.0
15
+ Requires-Dist: torch>=2.0.0
16
+ Requires-Dist: scikit-learn>=1.0.0
17
+ Requires-Dist: joblib>=1.1.0
18
+ Requires-Dist: matplotlib>=3.4.0
19
+ Requires-Dist: seaborn>=0.11.0
20
+ Dynamic: license-file
21
+
22
+ # seq_hybrid_detector
23
+
24
+ `seq_hybrid_detector` is a small Python package scaffold for sequence hybrid detection workflows.
25
+
26
+ ## What is included
27
+
28
+ The repository currently provides a clean `src/` layout with starter modules for loading data, defining core models, and wiring a simple pipeline.
29
+
30
+ ## Project layout
31
+
32
+ ```text
33
+ seq_hybrid_detector/
34
+ ├── LICENSE
35
+ ├── README.md
36
+ ├── pyproject.toml
37
+ └── src/
38
+ └── seq_hybrid_detector/
39
+ ├── __init__.py
40
+ ├── data_engine.py
41
+ ├── models.py
42
+ └── pipeline.py
43
+ ```
44
+
45
+ ## Installation
46
+
47
+ From the project root, install the package in editable mode during development:
48
+
49
+ ```bash
50
+ pip install -e .
51
+ ```
52
+
53
+ ## Modules
54
+
55
+ - `data_engine.py` contains basic sequence loading and normalization helpers.
56
+ - `models.py` defines lightweight dataclasses for input samples and predictions.
57
+ - `pipeline.py` provides a starter pipeline class for fitting and predicting.
58
+
59
+ ## Status
60
+
61
+ This is a scaffold, not a finished detector. The modules are in place for you to extend with real preprocessing, model training, and scoring logic.
@@ -0,0 +1,12 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/seq_hybrid_detector/__init__.py
5
+ src/seq_hybrid_detector/data_engine.py
6
+ src/seq_hybrid_detector/models.py
7
+ src/seq_hybrid_detector/pipeline.py
8
+ src/seq_hybrid_detector.egg-info/PKG-INFO
9
+ src/seq_hybrid_detector.egg-info/SOURCES.txt
10
+ src/seq_hybrid_detector.egg-info/dependency_links.txt
11
+ src/seq_hybrid_detector.egg-info/requires.txt
12
+ src/seq_hybrid_detector.egg-info/top_level.txt
@@ -0,0 +1,7 @@
1
+ numpy>=1.20.0
2
+ pandas>=1.3.0
3
+ torch>=2.0.0
4
+ scikit-learn>=1.0.0
5
+ joblib>=1.1.0
6
+ matplotlib>=3.4.0
7
+ seaborn>=0.11.0
@@ -0,0 +1 @@
1
+ seq_hybrid_detector