graphids 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- graphids-0.1.0/.github/workflows/publish.yml +27 -0
- graphids-0.1.0/.gitignore +14 -0
- graphids-0.1.0/LICENSE +21 -0
- graphids-0.1.0/PKG-INFO +108 -0
- graphids-0.1.0/README.md +61 -0
- graphids-0.1.0/graphids/__init__.py +12 -0
- graphids-0.1.0/graphids/contrastive.py +127 -0
- graphids-0.1.0/graphids/gcn.py +93 -0
- graphids-0.1.0/graphids/graph.py +85 -0
- graphids-0.1.0/graphids/pipeline.py +258 -0
- graphids-0.1.0/graphids/transformer.py +126 -0
- graphids-0.1.0/pyproject.toml +50 -0
- graphids-0.1.0/tests/__init__.py +0 -0
- graphids-0.1.0/tests/test_graph.py +69 -0
- graphids-0.1.0/tests/test_model.py +133 -0
- graphids-0.1.0/tests/test_pipeline.py +94 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.12"
|
|
19
|
+
|
|
20
|
+
- name: Install build tools
|
|
21
|
+
run: pip install build
|
|
22
|
+
|
|
23
|
+
- name: Build package
|
|
24
|
+
run: python -m build
|
|
25
|
+
|
|
26
|
+
- name: Publish to PyPI
|
|
27
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
graphids-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Vijay Govindarajan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
graphids-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: graphids
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Graph-based intrusion detection using GCN, Transformer autoencoder, and contrastive learning
|
|
5
|
+
Project-URL: Homepage, https://github.com/vijaygovindaraja/graphids
|
|
6
|
+
Project-URL: Paper, https://doi.org/10.1038/s41598-025-07956-w
|
|
7
|
+
Author-email: Vijay Govindarajan <vijay.govindarajan91@gmail.com>
|
|
8
|
+
License: MIT License
|
|
9
|
+
|
|
10
|
+
Copyright (c) 2026 Vijay Govindarajan
|
|
11
|
+
|
|
12
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
13
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
14
|
+
in the Software without restriction, including without limitation the rights
|
|
15
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
16
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
17
|
+
furnished to do so, subject to the following conditions:
|
|
18
|
+
|
|
19
|
+
The above copyright notice and this permission notice shall be included in all
|
|
20
|
+
copies or substantial portions of the Software.
|
|
21
|
+
|
|
22
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
23
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
24
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
25
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
26
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
27
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
28
|
+
SOFTWARE.
|
|
29
|
+
License-File: LICENSE
|
|
30
|
+
Keywords: cloud-security,contrastive-learning,graph-neural-network,intrusion-detection,network-security,transformer
|
|
31
|
+
Classifier: Development Status :: 3 - Alpha
|
|
32
|
+
Classifier: Intended Audience :: Science/Research
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
36
|
+
Classifier: Topic :: Security
|
|
37
|
+
Requires-Python: >=3.10
|
|
38
|
+
Requires-Dist: numpy>=1.24
|
|
39
|
+
Requires-Dist: pandas>=2.0
|
|
40
|
+
Requires-Dist: scikit-learn>=1.3
|
|
41
|
+
Requires-Dist: torch>=2.0
|
|
42
|
+
Provides-Extra: dev
|
|
43
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
44
|
+
Provides-Extra: shap
|
|
45
|
+
Requires-Dist: shap>=0.43; extra == 'shap'
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
|
|
48
|
+
# GraphIDS
|
|
49
|
+
|
|
50
|
+
Graph-based intrusion detection using GCN, Transformer autoencoder, and contrastive learning.
|
|
51
|
+
|
|
52
|
+
Reference implementation of the framework introduced in:
|
|
53
|
+
|
|
54
|
+
> Govindarajan, V. & Muzamal, J. H. (2025). Advanced cloud intrusion detection
|
|
55
|
+
> framework using graph based features transformers and contrastive learning.
|
|
56
|
+
> *Scientific Reports*, 15, 20511. DOI: [10.1038/s41598-025-07956-w](https://doi.org/10.1038/s41598-025-07956-w)
|
|
57
|
+
|
|
58
|
+
## Install
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
pip install graphids
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quick start
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from graphids import GraphIDS
|
|
68
|
+
|
|
69
|
+
model = GraphIDS(n_features=41, n_classes=5)
|
|
70
|
+
model.train_pipeline(X_train, y_train)
|
|
71
|
+
result = model.evaluate(X_test, y_test)
|
|
72
|
+
print(f"Accuracy: {result.accuracy:.4f}")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Architecture
|
|
76
|
+
|
|
77
|
+
Three-stage pipeline:
|
|
78
|
+
|
|
79
|
+
1. **GCN** — constructs a communication graph from flow data, extracts structural node embeddings via 3-layer graph convolution
|
|
80
|
+
2. **Transformer autoencoder** — refines embeddings through self-attention, identifies discriminative feature dimensions
|
|
81
|
+
3. **Contrastive classifier** — improves class separation for minority attack types (U2R, R2L), outputs multi-class predictions
|
|
82
|
+
|
|
83
|
+
## Results (from the paper)
|
|
84
|
+
|
|
85
|
+
| Dataset | Accuracy | Precision | Recall | F1 | FPR |
|
|
86
|
+
|---|---|---|---|---|---|
|
|
87
|
+
| NSL-KDD (5-class) | 99.97% | 99.94% | 99.92% | 99.93% | 0.05% |
|
|
88
|
+
| CIC-IDS (binary) | 99.96% | 99.93% | 99.91% | 99.92% | 0.06% |
|
|
89
|
+
| CIC-IDS (multi) | 99.95% | 99.92% | 99.90% | 99.91% | 0.07% |
|
|
90
|
+
|
|
91
|
+
## Citation
|
|
92
|
+
|
|
93
|
+
```bibtex
|
|
94
|
+
@article{govindarajan2025graphids,
|
|
95
|
+
title = {Advanced cloud intrusion detection framework using graph based
|
|
96
|
+
features transformers and contrastive learning},
|
|
97
|
+
author = {Govindarajan, Vijay and Muzamal, Junaid Hussain},
|
|
98
|
+
journal = {Scientific Reports},
|
|
99
|
+
volume = {15},
|
|
100
|
+
pages = {20511},
|
|
101
|
+
year = {2025},
|
|
102
|
+
doi = {10.1038/s41598-025-07956-w},
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## License
|
|
107
|
+
|
|
108
|
+
MIT
|
graphids-0.1.0/README.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# GraphIDS
|
|
2
|
+
|
|
3
|
+
Graph-based intrusion detection using GCN, Transformer autoencoder, and contrastive learning.
|
|
4
|
+
|
|
5
|
+
Reference implementation of the framework introduced in:
|
|
6
|
+
|
|
7
|
+
> Govindarajan, V. & Muzamal, J. H. (2025). Advanced cloud intrusion detection
|
|
8
|
+
> framework using graph based features transformers and contrastive learning.
|
|
9
|
+
> *Scientific Reports*, 15, 20511. DOI: [10.1038/s41598-025-07956-w](https://doi.org/10.1038/s41598-025-07956-w)
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install graphids
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
## Quick start
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from graphids import GraphIDS
|
|
21
|
+
|
|
22
|
+
model = GraphIDS(n_features=41, n_classes=5)
|
|
23
|
+
model.train_pipeline(X_train, y_train)
|
|
24
|
+
result = model.evaluate(X_test, y_test)
|
|
25
|
+
print(f"Accuracy: {result.accuracy:.4f}")
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Architecture
|
|
29
|
+
|
|
30
|
+
Three-stage pipeline:
|
|
31
|
+
|
|
32
|
+
1. **GCN** — constructs a communication graph from flow data, extracts structural node embeddings via 3-layer graph convolution
|
|
33
|
+
2. **Transformer autoencoder** — refines embeddings through self-attention, identifies discriminative feature dimensions
|
|
34
|
+
3. **Contrastive classifier** — improves class separation for minority attack types (U2R, R2L), outputs multi-class predictions
|
|
35
|
+
|
|
36
|
+
## Results (from the paper)
|
|
37
|
+
|
|
38
|
+
| Dataset | Accuracy | Precision | Recall | F1 | FPR |
|
|
39
|
+
|---|---|---|---|---|---|
|
|
40
|
+
| NSL-KDD (5-class) | 99.97% | 99.94% | 99.92% | 99.93% | 0.05% |
|
|
41
|
+
| CIC-IDS (binary) | 99.96% | 99.93% | 99.91% | 99.92% | 0.06% |
|
|
42
|
+
| CIC-IDS (multi) | 99.95% | 99.92% | 99.90% | 99.91% | 0.07% |
|
|
43
|
+
|
|
44
|
+
## Citation
|
|
45
|
+
|
|
46
|
+
```bibtex
|
|
47
|
+
@article{govindarajan2025graphids,
|
|
48
|
+
title = {Advanced cloud intrusion detection framework using graph based
|
|
49
|
+
features transformers and contrastive learning},
|
|
50
|
+
author = {Govindarajan, Vijay and Muzamal, Junaid Hussain},
|
|
51
|
+
journal = {Scientific Reports},
|
|
52
|
+
volume = {15},
|
|
53
|
+
pages = {20511},
|
|
54
|
+
year = {2025},
|
|
55
|
+
doi = {10.1038/s41598-025-07956-w},
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## License
|
|
60
|
+
|
|
61
|
+
MIT
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Graph-based Intrusion Detection System (GraphIDS).
|
|
2
|
+
|
|
3
|
+
A modular cloud intrusion detection framework combining GCN feature extraction,
|
|
4
|
+
Transformer-based autoencoding, and contrastive learning. Reference implementation
|
|
5
|
+
of the framework introduced in:
|
|
6
|
+
|
|
7
|
+
Govindarajan, V. & Muzamal, J. H. (2025). Advanced cloud intrusion detection
|
|
8
|
+
framework using graph based features transformers and contrastive learning.
|
|
9
|
+
Scientific Reports, 15, 20511. DOI: 10.1038/s41598-025-07956-w
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Contrastive learning module and classification head.
|
|
2
|
+
|
|
3
|
+
The contrastive loss improves class separation in the embedding space,
|
|
4
|
+
particularly for minority classes (U2R, R2L) that get ignored when training
|
|
5
|
+
with cross-entropy alone. It works by pulling same-class embeddings together
|
|
6
|
+
and pushing different-class embeddings apart using cosine similarity.
|
|
7
|
+
|
|
8
|
+
The final classification loss is:
|
|
9
|
+
L_class = L_CE + beta * L_contrastive
|
|
10
|
+
|
|
11
|
+
The classifier is a two-layer FC network: 128 neurons with ReLU, then
|
|
12
|
+
softmax for multi-class prediction.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import torch
|
|
18
|
+
import torch.nn as nn
|
|
19
|
+
import torch.nn.functional as F
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ContrastiveLoss(nn.Module):
|
|
23
|
+
"""Pairwise contrastive loss using cosine similarity.
|
|
24
|
+
|
|
25
|
+
For a pair (i, j):
|
|
26
|
+
- If same class (y_ij = 1): L = -log(sim(f_i, f_j))
|
|
27
|
+
- If different class (y_ij = 0): L = -log(1 - sim(f_i, f_j))
|
|
28
|
+
|
|
29
|
+
Pairs are sampled within the batch. For efficiency, we compute the
|
|
30
|
+
full pairwise similarity matrix and mask by label equality.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
temperature
|
|
35
|
+
Scaling factor for the similarity scores. Lower values sharpen
|
|
36
|
+
the distribution. Default 0.5.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, temperature: float = 0.5):
|
|
40
|
+
super().__init__()
|
|
41
|
+
self.temperature = temperature
|
|
42
|
+
|
|
43
|
+
def forward(self, embeddings: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
|
|
44
|
+
"""Compute the contrastive loss over all pairs in the batch.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
embeddings
|
|
49
|
+
Feature embeddings ``(batch, d)``.
|
|
50
|
+
labels
|
|
51
|
+
Integer class labels ``(batch,)``.
|
|
52
|
+
"""
|
|
53
|
+
# Normalized embeddings for cosine similarity
|
|
54
|
+
normed = F.normalize(embeddings, p=2, dim=1)
|
|
55
|
+
sim_matrix = (normed @ normed.T) / self.temperature
|
|
56
|
+
|
|
57
|
+
# Mask: 1 where labels match, 0 otherwise
|
|
58
|
+
label_eq = labels.unsqueeze(0) == labels.unsqueeze(1) # (B, B)
|
|
59
|
+
# Exclude self-pairs from the diagonal
|
|
60
|
+
mask_self = ~torch.eye(labels.size(0), dtype=torch.bool, device=labels.device)
|
|
61
|
+
positive_mask = label_eq & mask_self
|
|
62
|
+
negative_mask = ~label_eq & mask_self
|
|
63
|
+
|
|
64
|
+
# Numerically stable log-sum-exp
|
|
65
|
+
# For positive pairs: -log(exp(sim_pos) / sum(exp(sim_all)))
|
|
66
|
+
# This is equivalent to the supervised contrastive loss formulation
|
|
67
|
+
exp_sim = torch.exp(sim_matrix) * mask_self.float()
|
|
68
|
+
log_sum_exp = torch.log(exp_sim.sum(dim=1) + 1e-8)
|
|
69
|
+
|
|
70
|
+
# Mean of positive log-similarities
|
|
71
|
+
pos_sim = (sim_matrix * positive_mask.float()).sum(dim=1)
|
|
72
|
+
n_pos = positive_mask.float().sum(dim=1).clamp(min=1)
|
|
73
|
+
mean_pos_sim = pos_sim / n_pos
|
|
74
|
+
|
|
75
|
+
loss = (-mean_pos_sim + log_sum_exp).mean()
|
|
76
|
+
return loss
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class Classifier(nn.Module):
|
|
80
|
+
"""Two-layer FC classifier with contrastive-augmented training.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
d_in
|
|
85
|
+
Input embedding dimensionality (should match encoder output).
|
|
86
|
+
n_classes
|
|
87
|
+
Number of output classes.
|
|
88
|
+
hidden_dim
|
|
89
|
+
Hidden layer size. Paper uses 128.
|
|
90
|
+
beta
|
|
91
|
+
Weight of the contrastive loss relative to cross-entropy.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def __init__(
|
|
95
|
+
self,
|
|
96
|
+
d_in: int,
|
|
97
|
+
n_classes: int,
|
|
98
|
+
hidden_dim: int = 128,
|
|
99
|
+
beta: float = 0.5,
|
|
100
|
+
):
|
|
101
|
+
super().__init__()
|
|
102
|
+
self.fc1 = nn.Linear(d_in, hidden_dim)
|
|
103
|
+
self.fc2 = nn.Linear(hidden_dim, n_classes)
|
|
104
|
+
self.dropout = nn.Dropout(0.2)
|
|
105
|
+
self.beta = beta
|
|
106
|
+
self.contrastive_loss = ContrastiveLoss()
|
|
107
|
+
|
|
108
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
109
|
+
"""Predict class logits."""
|
|
110
|
+
h = self.dropout(torch.relu(self.fc1(x)))
|
|
111
|
+
return self.fc2(h)
|
|
112
|
+
|
|
113
|
+
def loss(self, embeddings: torch.Tensor, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
|
|
114
|
+
"""Combined cross-entropy + beta * contrastive loss.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
embeddings
|
|
119
|
+
The refined feature embeddings (used for contrastive loss).
|
|
120
|
+
logits
|
|
121
|
+
Output of forward() (used for cross-entropy).
|
|
122
|
+
labels
|
|
123
|
+
Ground-truth integer labels.
|
|
124
|
+
"""
|
|
125
|
+
ce_loss = F.cross_entropy(logits, labels)
|
|
126
|
+
cl_loss = self.contrastive_loss(embeddings, labels)
|
|
127
|
+
return ce_loss + self.beta * cl_loss
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Three-layer Graph Convolutional Network for structural feature extraction.
|
|
2
|
+
|
|
3
|
+
Implemented in pure PyTorch — no dependency on torch-geometric. The GCN
|
|
4
|
+
update rule at each layer is:
|
|
5
|
+
|
|
6
|
+
H^{l+1} = sigma( A_norm @ H^l @ W^l )
|
|
7
|
+
|
|
8
|
+
where A_norm is the symmetrically normalized adjacency with self-loops
|
|
9
|
+
(precomputed by ``graph.prepare_graph``), H^l is the node feature matrix
|
|
10
|
+
at layer l, W^l is a learnable weight matrix, and sigma is ReLU.
|
|
11
|
+
|
|
12
|
+
Three layers means the receptive field covers 2-hop neighborhoods — enough
|
|
13
|
+
to detect patterns like lateral movement (A -> B -> C) without the
|
|
14
|
+
oversmoothing that degrades embeddings at higher layer counts.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import torch
|
|
20
|
+
import torch.nn as nn
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GCNLayer(nn.Module):
|
|
24
|
+
"""Single GCN convolutional layer."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, in_features: int, out_features: int, dropout: float = 0.3):
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.weight = nn.Parameter(torch.empty(in_features, out_features))
|
|
29
|
+
nn.init.xavier_uniform_(self.weight)
|
|
30
|
+
self.norm = nn.LayerNorm(out_features)
|
|
31
|
+
self.dropout = nn.Dropout(dropout)
|
|
32
|
+
|
|
33
|
+
def forward(self, A_norm: torch.Tensor, H: torch.Tensor) -> torch.Tensor:
|
|
34
|
+
"""Forward pass: A_norm @ H @ W, then LayerNorm, ReLU, dropout."""
|
|
35
|
+
out = A_norm @ H @ self.weight
|
|
36
|
+
out = self.norm(out)
|
|
37
|
+
out = torch.relu(out)
|
|
38
|
+
out = self.dropout(out)
|
|
39
|
+
return out
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class GCN(nn.Module):
|
|
43
|
+
"""Three-layer GCN for node embedding extraction.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
in_features
|
|
48
|
+
Dimensionality of the input node features (e.g. 41 for NSL-KDD).
|
|
49
|
+
hidden_dim
|
|
50
|
+
Number of neurons per GCN layer. Paper uses 64.
|
|
51
|
+
n_layers
|
|
52
|
+
Number of GCN layers. Paper uses 3.
|
|
53
|
+
dropout
|
|
54
|
+
Dropout rate. Paper uses 0.3.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
in_features: int,
|
|
60
|
+
hidden_dim: int = 64,
|
|
61
|
+
n_layers: int = 3,
|
|
62
|
+
dropout: float = 0.3,
|
|
63
|
+
):
|
|
64
|
+
super().__init__()
|
|
65
|
+
layers = []
|
|
66
|
+
for i in range(n_layers):
|
|
67
|
+
d_in = in_features if i == 0 else hidden_dim
|
|
68
|
+
layers.append(GCNLayer(d_in, hidden_dim, dropout=dropout))
|
|
69
|
+
self.layers = nn.ModuleList(layers)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def out_dim(self) -> int:
|
|
73
|
+
return self.layers[-1].weight.size(1)
|
|
74
|
+
|
|
75
|
+
def forward(self, A_norm: torch.Tensor, X: torch.Tensor) -> torch.Tensor:
|
|
76
|
+
"""Extract node embeddings.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
A_norm
|
|
81
|
+
Normalized adjacency matrix ``(N, N)``.
|
|
82
|
+
X
|
|
83
|
+
Node feature matrix ``(N, d_in)``.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
torch.Tensor
|
|
88
|
+
Node embeddings ``(N, hidden_dim)``.
|
|
89
|
+
"""
|
|
90
|
+
H = X
|
|
91
|
+
for layer in self.layers:
|
|
92
|
+
H = layer(A_norm, H)
|
|
93
|
+
return H
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Graph construction from tabular network traffic data.
|
|
2
|
+
|
|
3
|
+
Converts flat flow-level feature matrices into graph representations where
|
|
4
|
+
nodes are network entities (IPs, ports, services) and edges are weighted by
|
|
5
|
+
communication metrics. The adjacency matrix is built from cosine similarity
|
|
6
|
+
between node feature vectors, which means structurally similar nodes are
|
|
7
|
+
connected more strongly than nodes that merely happen to communicate.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import torch
|
|
13
|
+
import numpy as np
|
|
14
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def build_adjacency(X: np.ndarray, threshold: float = 0.0) -> torch.Tensor:
|
|
18
|
+
"""Build a cosine-similarity adjacency matrix from a feature matrix.
|
|
19
|
+
|
|
20
|
+
Each row of X is treated as a node. The adjacency weight between nodes
|
|
21
|
+
i and j is the cosine similarity of their feature vectors, clipped to
|
|
22
|
+
[0, 1]. A threshold can be applied to sparsify the graph.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
X
|
|
27
|
+
Feature matrix with shape ``(n_nodes, n_features)``.
|
|
28
|
+
threshold
|
|
29
|
+
Edges with similarity below this value are set to zero.
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
torch.Tensor
|
|
34
|
+
Adjacency matrix of shape ``(n_nodes, n_nodes)``.
|
|
35
|
+
"""
|
|
36
|
+
sim = cosine_similarity(X)
|
|
37
|
+
sim = np.clip(sim, 0.0, 1.0)
|
|
38
|
+
if threshold > 0:
|
|
39
|
+
sim[sim < threshold] = 0.0
|
|
40
|
+
return torch.tensor(sim, dtype=torch.float32)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def add_self_loops(A: torch.Tensor) -> torch.Tensor:
|
|
44
|
+
"""Add self-loops to the adjacency matrix: Ã = A + I."""
|
|
45
|
+
return A + torch.eye(A.size(0), dtype=A.dtype, device=A.device)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def symmetric_norm(A: torch.Tensor) -> torch.Tensor:
|
|
49
|
+
"""Compute the symmetric degree normalization D^{-1/2} A D^{-1/2}.
|
|
50
|
+
|
|
51
|
+
This prevents high-degree nodes from dominating neighbor aggregation
|
|
52
|
+
in the GCN. Nodes with zero degree get a zero row/column.
|
|
53
|
+
"""
|
|
54
|
+
deg = A.sum(dim=1)
|
|
55
|
+
# D^{-1/2}, guarding against zero-degree nodes
|
|
56
|
+
deg_inv_sqrt = torch.zeros_like(deg)
|
|
57
|
+
nonzero = deg > 0
|
|
58
|
+
deg_inv_sqrt[nonzero] = 1.0 / torch.sqrt(deg[nonzero])
|
|
59
|
+
D_inv_sqrt = torch.diag(deg_inv_sqrt)
|
|
60
|
+
return D_inv_sqrt @ A @ D_inv_sqrt
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def prepare_graph(X: np.ndarray, threshold: float = 0.0) -> tuple[torch.Tensor, torch.Tensor]:
|
|
64
|
+
"""Full graph preparation: adjacency + normalization.
|
|
65
|
+
|
|
66
|
+
Returns the normalized adjacency matrix (ready for GCN forward pass)
|
|
67
|
+
and the node feature tensor.
|
|
68
|
+
|
|
69
|
+
Parameters
|
|
70
|
+
----------
|
|
71
|
+
X
|
|
72
|
+
Feature matrix ``(n_nodes, n_features)``, already scaled.
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
-------
|
|
76
|
+
A_norm
|
|
77
|
+
Normalized adjacency ``(n_nodes, n_nodes)`` with self-loops.
|
|
78
|
+
X_tensor
|
|
79
|
+
Node features as a float32 tensor ``(n_nodes, n_features)``.
|
|
80
|
+
"""
|
|
81
|
+
A = build_adjacency(X, threshold=threshold)
|
|
82
|
+
A_hat = add_self_loops(A)
|
|
83
|
+
A_norm = symmetric_norm(A_hat)
|
|
84
|
+
X_tensor = torch.tensor(X, dtype=torch.float32)
|
|
85
|
+
return A_norm, X_tensor
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""End-to-end GraphIDS pipeline: GCN → Transformer AE → Contrastive Classifier.
|
|
2
|
+
|
|
3
|
+
This module wires the three stages together into a single trainable pipeline.
|
|
4
|
+
Each stage can be trained independently (following the paper's training
|
|
5
|
+
protocol) or jointly fine-tuned.
|
|
6
|
+
|
|
7
|
+
Training protocol from the paper:
|
|
8
|
+
1. Build the graph and train the GCN (Adam, lr=0.001, batch=128, 50 epochs)
|
|
9
|
+
2. Feed GCN embeddings to the Transformer AE and train it
|
|
10
|
+
(AdamW, lr=0.0001, batch=64, 100 epochs)
|
|
11
|
+
3. Feed refined embeddings to the classifier with contrastive loss
|
|
12
|
+
(RMSprop, lr=0.0005, batch=256, 50 epochs)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
|
|
19
|
+
import numpy as np
|
|
20
|
+
import torch
|
|
21
|
+
import torch.nn as nn
|
|
22
|
+
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
|
|
23
|
+
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
|
|
24
|
+
|
|
25
|
+
from .graph import prepare_graph
|
|
26
|
+
from .gcn import GCN
|
|
27
|
+
from .transformer import TransformerAutoencoder
|
|
28
|
+
from .contrastive import Classifier
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class EvalResult:
|
|
33
|
+
"""Evaluation metrics from a prediction run."""
|
|
34
|
+
accuracy: float
|
|
35
|
+
precision: float
|
|
36
|
+
recall: float
|
|
37
|
+
f1: float
|
|
38
|
+
predictions: np.ndarray
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class GraphIDS(nn.Module):
|
|
42
|
+
"""Full intrusion detection pipeline.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
----------
|
|
46
|
+
n_features
|
|
47
|
+
Number of input features per flow (e.g. 41 for NSL-KDD after encoding).
|
|
48
|
+
n_classes
|
|
49
|
+
Number of output classes (e.g. 5 for NSL-KDD multi-class).
|
|
50
|
+
gcn_hidden
|
|
51
|
+
GCN hidden layer size. Paper uses 64.
|
|
52
|
+
gcn_layers
|
|
53
|
+
Number of GCN layers. Paper uses 3.
|
|
54
|
+
gcn_dropout
|
|
55
|
+
GCN dropout rate. Paper uses 0.3.
|
|
56
|
+
ae_heads
|
|
57
|
+
Number of Transformer attention heads. Paper uses 4.
|
|
58
|
+
ae_layers
|
|
59
|
+
Number of Transformer encoder/decoder layers. Paper uses 2.
|
|
60
|
+
ae_ff
|
|
61
|
+
Transformer feed-forward dimensionality. Paper uses 128.
|
|
62
|
+
ae_dropout
|
|
63
|
+
Transformer dropout rate. Paper uses 0.2.
|
|
64
|
+
ae_alpha
|
|
65
|
+
KL regularization weight for the autoencoder. Paper uses 0.001.
|
|
66
|
+
clf_hidden
|
|
67
|
+
Classifier hidden layer size. Paper uses 128.
|
|
68
|
+
beta
|
|
69
|
+
Contrastive loss weight. Controls how strongly the contrastive
|
|
70
|
+
term influences classification training.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
n_features: int,
|
|
76
|
+
n_classes: int,
|
|
77
|
+
gcn_hidden: int = 64,
|
|
78
|
+
gcn_layers: int = 3,
|
|
79
|
+
gcn_dropout: float = 0.3,
|
|
80
|
+
ae_heads: int = 4,
|
|
81
|
+
ae_layers: int = 2,
|
|
82
|
+
ae_ff: int = 128,
|
|
83
|
+
ae_dropout: float = 0.2,
|
|
84
|
+
ae_alpha: float = 0.001,
|
|
85
|
+
clf_hidden: int = 128,
|
|
86
|
+
beta: float = 0.5,
|
|
87
|
+
):
|
|
88
|
+
super().__init__()
|
|
89
|
+
self.gcn = GCN(
|
|
90
|
+
in_features=n_features,
|
|
91
|
+
hidden_dim=gcn_hidden,
|
|
92
|
+
n_layers=gcn_layers,
|
|
93
|
+
dropout=gcn_dropout,
|
|
94
|
+
)
|
|
95
|
+
self.autoencoder = TransformerAutoencoder(
|
|
96
|
+
d_model=gcn_hidden,
|
|
97
|
+
n_heads=ae_heads,
|
|
98
|
+
n_layers=ae_layers,
|
|
99
|
+
d_ff=ae_ff,
|
|
100
|
+
dropout=ae_dropout,
|
|
101
|
+
alpha=ae_alpha,
|
|
102
|
+
)
|
|
103
|
+
self.classifier = Classifier(
|
|
104
|
+
d_in=gcn_hidden,
|
|
105
|
+
n_classes=n_classes,
|
|
106
|
+
hidden_dim=clf_hidden,
|
|
107
|
+
beta=beta,
|
|
108
|
+
)
|
|
109
|
+
self.scaler = MinMaxScaler()
|
|
110
|
+
self.label_encoder = LabelEncoder()
|
|
111
|
+
|
|
112
|
+
def preprocess(self, X: np.ndarray, y: np.ndarray | None = None, fit: bool = False):
|
|
113
|
+
"""Scale features to [0, 1] and encode labels.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
X : array (n_samples, n_features)
|
|
118
|
+
y : array (n_samples,), optional
|
|
119
|
+
fit : bool
|
|
120
|
+
If True, fit the scaler and label encoder.
|
|
121
|
+
|
|
122
|
+
Returns
|
|
123
|
+
-------
|
|
124
|
+
X_scaled, y_encoded (or None)
|
|
125
|
+
"""
|
|
126
|
+
if fit:
|
|
127
|
+
X_scaled = self.scaler.fit_transform(X)
|
|
128
|
+
else:
|
|
129
|
+
X_scaled = self.scaler.transform(X)
|
|
130
|
+
|
|
131
|
+
y_enc = None
|
|
132
|
+
if y is not None:
|
|
133
|
+
if fit:
|
|
134
|
+
y_enc = self.label_encoder.fit_transform(y)
|
|
135
|
+
else:
|
|
136
|
+
y_enc = self.label_encoder.transform(y)
|
|
137
|
+
|
|
138
|
+
return X_scaled, y_enc
|
|
139
|
+
|
|
140
|
+
def forward(self, A_norm: torch.Tensor, X: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
|
141
|
+
"""Full forward pass through all three stages.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
gcn_out
|
|
146
|
+
GCN node embeddings.
|
|
147
|
+
refined
|
|
148
|
+
Transformer-refined embeddings.
|
|
149
|
+
logits
|
|
150
|
+
Classification logits.
|
|
151
|
+
"""
|
|
152
|
+
gcn_out = self.gcn(A_norm, X)
|
|
153
|
+
refined, reconstructed = self.autoencoder(gcn_out)
|
|
154
|
+
logits = self.classifier(refined)
|
|
155
|
+
return gcn_out, refined, logits
|
|
156
|
+
|
|
157
|
+
def train_pipeline(
|
|
158
|
+
self,
|
|
159
|
+
X_train: np.ndarray,
|
|
160
|
+
y_train: np.ndarray,
|
|
161
|
+
gcn_epochs: int = 50,
|
|
162
|
+
ae_epochs: int = 100,
|
|
163
|
+
clf_epochs: int = 50,
|
|
164
|
+
gcn_lr: float = 0.001,
|
|
165
|
+
ae_lr: float = 0.0001,
|
|
166
|
+
clf_lr: float = 0.0005,
|
|
167
|
+
verbose: bool = True,
|
|
168
|
+
) -> None:
|
|
169
|
+
"""Train all three stages sequentially.
|
|
170
|
+
|
|
171
|
+
This follows the paper's training protocol: GCN first, then
|
|
172
|
+
autoencoder on frozen GCN embeddings, then classifier on frozen
|
|
173
|
+
refined embeddings.
|
|
174
|
+
"""
|
|
175
|
+
X_scaled, y_enc = self.preprocess(X_train, y_train, fit=True)
|
|
176
|
+
A_norm, X_tensor = prepare_graph(X_scaled)
|
|
177
|
+
y_tensor = torch.tensor(y_enc, dtype=torch.long)
|
|
178
|
+
|
|
179
|
+
# Stage 1: GCN
|
|
180
|
+
if verbose:
|
|
181
|
+
print("Stage 1: Training GCN...")
|
|
182
|
+
gcn_opt = torch.optim.Adam(self.gcn.parameters(), lr=gcn_lr)
|
|
183
|
+
# Train GCN with a proxy classification objective
|
|
184
|
+
proxy_clf = nn.Linear(self.gcn.out_dim, len(self.label_encoder.classes_))
|
|
185
|
+
proxy_opt = torch.optim.Adam(
|
|
186
|
+
list(self.gcn.parameters()) + list(proxy_clf.parameters()), lr=gcn_lr
|
|
187
|
+
)
|
|
188
|
+
self.gcn.train()
|
|
189
|
+
for epoch in range(gcn_epochs):
|
|
190
|
+
proxy_opt.zero_grad()
|
|
191
|
+
embeddings = self.gcn(A_norm, X_tensor)
|
|
192
|
+
logits = proxy_clf(embeddings)
|
|
193
|
+
loss = nn.functional.cross_entropy(logits, y_tensor)
|
|
194
|
+
loss.backward()
|
|
195
|
+
proxy_opt.step()
|
|
196
|
+
if verbose and (epoch + 1) % 10 == 0:
|
|
197
|
+
print(f" epoch {epoch+1}/{gcn_epochs} loss={loss.item():.4f}")
|
|
198
|
+
del proxy_clf, proxy_opt
|
|
199
|
+
|
|
200
|
+
# Stage 2: Transformer autoencoder
|
|
201
|
+
if verbose:
|
|
202
|
+
print("Stage 2: Training Transformer autoencoder...")
|
|
203
|
+
ae_opt = torch.optim.AdamW(self.autoencoder.parameters(), lr=ae_lr)
|
|
204
|
+
self.gcn.eval()
|
|
205
|
+
self.autoencoder.train()
|
|
206
|
+
with torch.no_grad():
|
|
207
|
+
gcn_embeddings = self.gcn(A_norm, X_tensor)
|
|
208
|
+
for epoch in range(ae_epochs):
|
|
209
|
+
ae_opt.zero_grad()
|
|
210
|
+
encoded, reconstructed = self.autoencoder(gcn_embeddings)
|
|
211
|
+
loss = self.autoencoder.loss(gcn_embeddings, encoded, reconstructed)
|
|
212
|
+
loss.backward()
|
|
213
|
+
ae_opt.step()
|
|
214
|
+
if verbose and (epoch + 1) % 20 == 0:
|
|
215
|
+
print(f" epoch {epoch+1}/{ae_epochs} loss={loss.item():.4f}")
|
|
216
|
+
|
|
217
|
+
# Stage 3: Contrastive classifier
|
|
218
|
+
if verbose:
|
|
219
|
+
print("Stage 3: Training contrastive classifier...")
|
|
220
|
+
clf_opt = torch.optim.RMSprop(self.classifier.parameters(), lr=clf_lr)
|
|
221
|
+
self.autoencoder.eval()
|
|
222
|
+
self.classifier.train()
|
|
223
|
+
with torch.no_grad():
|
|
224
|
+
encoded, _ = self.autoencoder(gcn_embeddings)
|
|
225
|
+
for epoch in range(clf_epochs):
|
|
226
|
+
clf_opt.zero_grad()
|
|
227
|
+
logits = self.classifier(encoded)
|
|
228
|
+
loss = self.classifier.loss(encoded, logits, y_tensor)
|
|
229
|
+
loss.backward()
|
|
230
|
+
clf_opt.step()
|
|
231
|
+
if verbose and (epoch + 1) % 10 == 0:
|
|
232
|
+
acc = (logits.argmax(dim=1) == y_tensor).float().mean().item()
|
|
233
|
+
print(f" epoch {epoch+1}/{clf_epochs} loss={loss.item():.4f} acc={acc:.4f}")
|
|
234
|
+
|
|
235
|
+
if verbose:
|
|
236
|
+
print("Training complete.")
|
|
237
|
+
|
|
238
|
+
@torch.no_grad()
|
|
239
|
+
def predict(self, X: np.ndarray) -> np.ndarray:
|
|
240
|
+
"""Predict class labels for new samples."""
|
|
241
|
+
self.eval()
|
|
242
|
+
X_scaled, _ = self.preprocess(X)
|
|
243
|
+
A_norm, X_tensor = prepare_graph(X_scaled)
|
|
244
|
+
_, refined, logits = self.forward(A_norm, X_tensor)
|
|
245
|
+
preds = logits.argmax(dim=1).cpu().numpy()
|
|
246
|
+
return self.label_encoder.inverse_transform(preds)
|
|
247
|
+
|
|
248
|
+
@torch.no_grad()
|
|
249
|
+
def evaluate(self, X: np.ndarray, y: np.ndarray) -> EvalResult:
|
|
250
|
+
"""Predict and compute metrics."""
|
|
251
|
+
y_pred = self.predict(X)
|
|
252
|
+
return EvalResult(
|
|
253
|
+
accuracy=accuracy_score(y, y_pred),
|
|
254
|
+
precision=precision_score(y, y_pred, average="weighted", zero_division=0),
|
|
255
|
+
recall=recall_score(y, y_pred, average="weighted", zero_division=0),
|
|
256
|
+
f1=f1_score(y, y_pred, average="weighted", zero_division=0),
|
|
257
|
+
predictions=y_pred,
|
|
258
|
+
)
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Transformer-based autoencoder for embedding refinement.
|
|
2
|
+
|
|
3
|
+
Takes the GCN node embeddings and refines them through a self-attention
|
|
4
|
+
encoder-decoder. The encoder identifies which dimensions of the embedding
|
|
5
|
+
are informative for distinguishing attack types; the decoder ensures the
|
|
6
|
+
representation retains enough information to reconstruct the original
|
|
7
|
+
embeddings (preventing information collapse).
|
|
8
|
+
|
|
9
|
+
The training loss combines reconstruction error (MSE) and a KL divergence
|
|
10
|
+
regularization term that keeps the latent distribution smooth. This pushes
|
|
11
|
+
the autoencoder toward a compact representation where similar traffic types
|
|
12
|
+
cluster together in the latent space.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import torch
|
|
18
|
+
import torch.nn as nn
|
|
19
|
+
import torch.nn.functional as F
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TransformerAutoencoder(nn.Module):
|
|
23
|
+
"""Encoder-decoder Transformer autoencoder.
|
|
24
|
+
|
|
25
|
+
Parameters
|
|
26
|
+
----------
|
|
27
|
+
d_model
|
|
28
|
+
Dimensionality of the input embeddings (should match GCN output dim).
|
|
29
|
+
n_heads
|
|
30
|
+
Number of attention heads. Paper uses 4.
|
|
31
|
+
n_layers
|
|
32
|
+
Number of self-attention layers in both encoder and decoder.
|
|
33
|
+
Paper uses 2.
|
|
34
|
+
d_ff
|
|
35
|
+
Feed-forward layer dimensionality. Paper uses 128.
|
|
36
|
+
dropout
|
|
37
|
+
Dropout rate. Paper uses 0.2.
|
|
38
|
+
alpha
|
|
39
|
+
Weight for the KL regularization term. Controls the tradeoff between
|
|
40
|
+
faithful reconstruction and smooth latent space.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
d_model: int = 64,
|
|
46
|
+
n_heads: int = 4,
|
|
47
|
+
n_layers: int = 2,
|
|
48
|
+
d_ff: int = 128,
|
|
49
|
+
dropout: float = 0.2,
|
|
50
|
+
alpha: float = 0.001,
|
|
51
|
+
):
|
|
52
|
+
super().__init__()
|
|
53
|
+
self.alpha = alpha
|
|
54
|
+
self.d_model = d_model
|
|
55
|
+
|
|
56
|
+
encoder_layer = nn.TransformerEncoderLayer(
|
|
57
|
+
d_model=d_model,
|
|
58
|
+
nhead=n_heads,
|
|
59
|
+
dim_feedforward=d_ff,
|
|
60
|
+
dropout=dropout,
|
|
61
|
+
activation="gelu",
|
|
62
|
+
batch_first=True,
|
|
63
|
+
norm_first=True,
|
|
64
|
+
)
|
|
65
|
+
self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)
|
|
66
|
+
|
|
67
|
+
decoder_layer = nn.TransformerDecoderLayer(
|
|
68
|
+
d_model=d_model,
|
|
69
|
+
nhead=n_heads,
|
|
70
|
+
dim_feedforward=d_ff,
|
|
71
|
+
dropout=dropout,
|
|
72
|
+
activation="gelu",
|
|
73
|
+
batch_first=True,
|
|
74
|
+
norm_first=True,
|
|
75
|
+
)
|
|
76
|
+
self.decoder = nn.TransformerDecoder(decoder_layer, num_layers=n_layers)
|
|
77
|
+
|
|
78
|
+
def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
|
|
79
|
+
"""Encode and reconstruct.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
x
|
|
84
|
+
Input embeddings ``(batch, d_model)`` or ``(batch, seq, d_model)``.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
encoded
|
|
89
|
+
Refined embeddings from the encoder.
|
|
90
|
+
reconstructed
|
|
91
|
+
Decoder's reconstruction of the input.
|
|
92
|
+
"""
|
|
93
|
+
# Add a sequence dimension if input is 2D (treating each sample as
|
|
94
|
+
# a length-1 sequence). This is the common case when the GCN
|
|
95
|
+
# produces one embedding per node.
|
|
96
|
+
squeeze = False
|
|
97
|
+
if x.dim() == 2:
|
|
98
|
+
x = x.unsqueeze(1) # (batch, 1, d_model)
|
|
99
|
+
squeeze = True
|
|
100
|
+
|
|
101
|
+
encoded = self.encoder(x)
|
|
102
|
+
reconstructed = self.decoder(encoded, encoded)
|
|
103
|
+
|
|
104
|
+
if squeeze:
|
|
105
|
+
encoded = encoded.squeeze(1)
|
|
106
|
+
reconstructed = reconstructed.squeeze(1)
|
|
107
|
+
|
|
108
|
+
return encoded, reconstructed
|
|
109
|
+
|
|
110
|
+
def loss(self, x: torch.Tensor, encoded: torch.Tensor, reconstructed: torch.Tensor) -> torch.Tensor:
|
|
111
|
+
"""Combined reconstruction + KL regularization loss.
|
|
112
|
+
|
|
113
|
+
L = L_recon + alpha * L_reg
|
|
114
|
+
L_recon = (1/n) * sum ||x - x_hat||^2
|
|
115
|
+
L_reg = (1/n) * sum KL(softmax(x) || softmax(x_hat))
|
|
116
|
+
"""
|
|
117
|
+
# Reconstruction loss (MSE)
|
|
118
|
+
recon_loss = F.mse_loss(reconstructed, x)
|
|
119
|
+
|
|
120
|
+
# KL divergence regularization between input and reconstruction
|
|
121
|
+
# distributions (applied over the feature dimension via softmax)
|
|
122
|
+
log_p = F.log_softmax(x, dim=-1)
|
|
123
|
+
q = F.softmax(reconstructed.detach(), dim=-1)
|
|
124
|
+
kl_loss = F.kl_div(log_p, q, reduction="batchmean")
|
|
125
|
+
|
|
126
|
+
return recon_loss + self.alpha * kl_loss
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "graphids"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Graph-based intrusion detection using GCN, Transformer autoencoder, and contrastive learning"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Vijay Govindarajan", email = "vijay.govindarajan91@gmail.com" },
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"intrusion-detection",
|
|
17
|
+
"graph-neural-network",
|
|
18
|
+
"transformer",
|
|
19
|
+
"contrastive-learning",
|
|
20
|
+
"cloud-security",
|
|
21
|
+
"network-security",
|
|
22
|
+
]
|
|
23
|
+
classifiers = [
|
|
24
|
+
"Development Status :: 3 - Alpha",
|
|
25
|
+
"Intended Audience :: Science/Research",
|
|
26
|
+
"License :: OSI Approved :: MIT License",
|
|
27
|
+
"Programming Language :: Python :: 3",
|
|
28
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
29
|
+
"Topic :: Security",
|
|
30
|
+
]
|
|
31
|
+
dependencies = [
|
|
32
|
+
"torch>=2.0",
|
|
33
|
+
"numpy>=1.24",
|
|
34
|
+
"pandas>=2.0",
|
|
35
|
+
"scikit-learn>=1.3",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.optional-dependencies]
|
|
39
|
+
dev = ["pytest>=7.0"]
|
|
40
|
+
shap = ["shap>=0.43"]
|
|
41
|
+
|
|
42
|
+
[project.urls]
|
|
43
|
+
Homepage = "https://github.com/vijaygovindaraja/graphids"
|
|
44
|
+
Paper = "https://doi.org/10.1038/s41598-025-07956-w"
|
|
45
|
+
|
|
46
|
+
[tool.hatch.build.targets.wheel]
|
|
47
|
+
packages = ["graphids"]
|
|
48
|
+
|
|
49
|
+
[tool.pytest.ini_options]
|
|
50
|
+
testpaths = ["tests"]
|
|
File without changes
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Tests for graph construction."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import torch
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from graphids.graph import build_adjacency, add_self_loops, symmetric_norm, prepare_graph
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_adjacency_shape():
|
|
11
|
+
X = np.random.randn(10, 5)
|
|
12
|
+
A = build_adjacency(X)
|
|
13
|
+
assert A.shape == (10, 10)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_adjacency_symmetric():
|
|
17
|
+
X = np.random.randn(8, 4)
|
|
18
|
+
A = build_adjacency(X)
|
|
19
|
+
torch.testing.assert_close(A, A.T)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_adjacency_non_negative():
|
|
23
|
+
X = np.random.randn(10, 5)
|
|
24
|
+
A = build_adjacency(X)
|
|
25
|
+
assert (A >= 0).all()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_adjacency_threshold():
|
|
29
|
+
X = np.random.randn(20, 5)
|
|
30
|
+
A = build_adjacency(X, threshold=0.5)
|
|
31
|
+
assert (A[A > 0] >= 0.5).all()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_self_loops_adds_identity():
|
|
35
|
+
A = torch.zeros(3, 3)
|
|
36
|
+
A_hat = add_self_loops(A)
|
|
37
|
+
torch.testing.assert_close(A_hat, torch.eye(3))
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_symmetric_norm_preserves_shape():
|
|
41
|
+
A = torch.ones(5, 5)
|
|
42
|
+
A_norm = symmetric_norm(A)
|
|
43
|
+
assert A_norm.shape == (5, 5)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_symmetric_norm_row_sums():
|
|
47
|
+
"""For a uniform adjacency (all ones), the normalized version should
|
|
48
|
+
have rows summing to 1."""
|
|
49
|
+
N = 5
|
|
50
|
+
A = torch.ones(N, N)
|
|
51
|
+
A_norm = symmetric_norm(A)
|
|
52
|
+
row_sums = A_norm.sum(dim=1)
|
|
53
|
+
torch.testing.assert_close(row_sums, torch.ones(N), atol=1e-6, rtol=0)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_prepare_graph_returns_correct_types():
|
|
57
|
+
X = np.random.randn(10, 5).astype(np.float32)
|
|
58
|
+
A_norm, X_tensor = prepare_graph(X)
|
|
59
|
+
assert isinstance(A_norm, torch.Tensor)
|
|
60
|
+
assert isinstance(X_tensor, torch.Tensor)
|
|
61
|
+
assert A_norm.shape == (10, 10)
|
|
62
|
+
assert X_tensor.shape == (10, 5)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_identical_rows_have_max_similarity():
|
|
66
|
+
X = np.ones((3, 4)) # all identical rows
|
|
67
|
+
A = build_adjacency(X)
|
|
68
|
+
# Cosine similarity of identical vectors is 1.0
|
|
69
|
+
assert A.min().item() >= 0.99
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Tests for the GCN, Transformer autoencoder, and classifier modules."""
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from graphids.gcn import GCN, GCNLayer
|
|
7
|
+
from graphids.transformer import TransformerAutoencoder
|
|
8
|
+
from graphids.contrastive import Classifier, ContrastiveLoss
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# --------------------------------------------------------------------------- #
|
|
12
|
+
# GCN
|
|
13
|
+
# --------------------------------------------------------------------------- #
|
|
14
|
+
def test_gcn_output_shape():
|
|
15
|
+
gcn = GCN(in_features=41, hidden_dim=64, n_layers=3)
|
|
16
|
+
A = torch.eye(10)
|
|
17
|
+
X = torch.randn(10, 41)
|
|
18
|
+
out = gcn(A, X)
|
|
19
|
+
assert out.shape == (10, 64)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_gcn_single_layer():
|
|
23
|
+
gcn = GCN(in_features=5, hidden_dim=8, n_layers=1)
|
|
24
|
+
A = torch.eye(4)
|
|
25
|
+
X = torch.randn(4, 5)
|
|
26
|
+
out = gcn(A, X)
|
|
27
|
+
assert out.shape == (4, 8)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_gcn_out_dim_property():
|
|
31
|
+
gcn = GCN(in_features=10, hidden_dim=32)
|
|
32
|
+
assert gcn.out_dim == 32
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_gcn_gradient_flows():
|
|
36
|
+
gcn = GCN(in_features=5, hidden_dim=8)
|
|
37
|
+
A = torch.eye(3)
|
|
38
|
+
X = torch.randn(3, 5)
|
|
39
|
+
out = gcn(A, X)
|
|
40
|
+
loss = out.sum()
|
|
41
|
+
loss.backward()
|
|
42
|
+
for p in gcn.parameters():
|
|
43
|
+
assert p.grad is not None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# --------------------------------------------------------------------------- #
|
|
47
|
+
# Transformer autoencoder
|
|
48
|
+
# --------------------------------------------------------------------------- #
|
|
49
|
+
def test_ae_output_shape():
|
|
50
|
+
ae = TransformerAutoencoder(d_model=64, n_heads=4, n_layers=2, d_ff=128)
|
|
51
|
+
x = torch.randn(8, 64)
|
|
52
|
+
encoded, reconstructed = ae(x)
|
|
53
|
+
assert encoded.shape == (8, 64)
|
|
54
|
+
assert reconstructed.shape == (8, 64)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_ae_loss_finite():
|
|
58
|
+
ae = TransformerAutoencoder(d_model=16, n_heads=2, n_layers=1, d_ff=32)
|
|
59
|
+
x = torch.randn(5, 16)
|
|
60
|
+
encoded, reconstructed = ae(x)
|
|
61
|
+
loss = ae.loss(x, encoded, reconstructed)
|
|
62
|
+
assert torch.isfinite(loss)
|
|
63
|
+
assert loss.item() > 0
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_ae_3d_input():
|
|
67
|
+
ae = TransformerAutoencoder(d_model=16, n_heads=2, n_layers=1, d_ff=32)
|
|
68
|
+
x = torch.randn(4, 3, 16) # batch=4, seq=3, d=16
|
|
69
|
+
encoded, reconstructed = ae(x)
|
|
70
|
+
assert encoded.shape == (4, 3, 16)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# --------------------------------------------------------------------------- #
|
|
74
|
+
# Contrastive loss
|
|
75
|
+
# --------------------------------------------------------------------------- #
|
|
76
|
+
def test_contrastive_loss_finite():
|
|
77
|
+
cl = ContrastiveLoss()
|
|
78
|
+
embeddings = torch.randn(16, 32)
|
|
79
|
+
labels = torch.tensor([0, 0, 1, 1, 2, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0])
|
|
80
|
+
loss = cl(embeddings, labels)
|
|
81
|
+
assert torch.isfinite(loss)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_contrastive_loss_lower_for_good_embeddings():
|
|
85
|
+
"""Embeddings that already cluster by class should produce lower loss
|
|
86
|
+
than random embeddings."""
|
|
87
|
+
cl = ContrastiveLoss()
|
|
88
|
+
labels = torch.tensor([0, 0, 0, 1, 1, 1, 2, 2, 2])
|
|
89
|
+
|
|
90
|
+
# Good: each class is a tight cluster far from others
|
|
91
|
+
good = torch.zeros(9, 8)
|
|
92
|
+
good[:3, :3] = 5.0
|
|
93
|
+
good[3:6, 3:6] = 5.0
|
|
94
|
+
good[6:, 6:] = 5.0
|
|
95
|
+
good += torch.randn_like(good) * 0.1
|
|
96
|
+
|
|
97
|
+
# Bad: random
|
|
98
|
+
bad = torch.randn(9, 8)
|
|
99
|
+
|
|
100
|
+
loss_good = cl(good, labels)
|
|
101
|
+
loss_bad = cl(bad, labels)
|
|
102
|
+
assert loss_good < loss_bad
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# --------------------------------------------------------------------------- #
|
|
106
|
+
# Classifier
|
|
107
|
+
# --------------------------------------------------------------------------- #
|
|
108
|
+
def test_classifier_output_shape():
|
|
109
|
+
clf = Classifier(d_in=64, n_classes=5, hidden_dim=128)
|
|
110
|
+
x = torch.randn(10, 64)
|
|
111
|
+
logits = clf(x)
|
|
112
|
+
assert logits.shape == (10, 5)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def test_classifier_loss_finite():
|
|
116
|
+
clf = Classifier(d_in=32, n_classes=3, hidden_dim=64)
|
|
117
|
+
embeddings = torch.randn(8, 32)
|
|
118
|
+
logits = clf(embeddings)
|
|
119
|
+
labels = torch.tensor([0, 1, 2, 0, 1, 2, 0, 1])
|
|
120
|
+
loss = clf.loss(embeddings, logits, labels)
|
|
121
|
+
assert torch.isfinite(loss)
|
|
122
|
+
assert loss.item() > 0
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_classifier_gradient_flows():
|
|
126
|
+
clf = Classifier(d_in=16, n_classes=3)
|
|
127
|
+
x = torch.randn(6, 16)
|
|
128
|
+
logits = clf(x)
|
|
129
|
+
labels = torch.tensor([0, 1, 2, 0, 1, 2])
|
|
130
|
+
loss = clf.loss(x, logits, labels)
|
|
131
|
+
loss.backward()
|
|
132
|
+
for p in clf.parameters():
|
|
133
|
+
assert p.grad is not None
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Integration tests for the full GraphIDS pipeline."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
from sklearn.datasets import make_classification
|
|
6
|
+
|
|
7
|
+
from graphids.pipeline import GraphIDS, EvalResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _make_ids_data(n_samples=200, n_features=20, n_classes=3, random_state=42):
|
|
11
|
+
X, y = make_classification(
|
|
12
|
+
n_samples=n_samples,
|
|
13
|
+
n_features=n_features,
|
|
14
|
+
n_informative=12,
|
|
15
|
+
n_redundant=4,
|
|
16
|
+
n_classes=n_classes,
|
|
17
|
+
random_state=random_state,
|
|
18
|
+
flip_y=0.03,
|
|
19
|
+
)
|
|
20
|
+
return X, y
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_train_and_predict_shapes():
|
|
24
|
+
X, y = _make_ids_data(n_samples=100, n_classes=3)
|
|
25
|
+
model = GraphIDS(n_features=20, n_classes=3)
|
|
26
|
+
model.train_pipeline(
|
|
27
|
+
X[:70], y[:70],
|
|
28
|
+
gcn_epochs=5, ae_epochs=5, clf_epochs=5,
|
|
29
|
+
verbose=False,
|
|
30
|
+
)
|
|
31
|
+
preds = model.predict(X[70:])
|
|
32
|
+
assert preds.shape == (30,)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_evaluate_returns_result():
|
|
36
|
+
X, y = _make_ids_data(n_samples=100, n_classes=2)
|
|
37
|
+
model = GraphIDS(n_features=20, n_classes=2)
|
|
38
|
+
model.train_pipeline(
|
|
39
|
+
X[:70], y[:70],
|
|
40
|
+
gcn_epochs=5, ae_epochs=5, clf_epochs=5,
|
|
41
|
+
verbose=False,
|
|
42
|
+
)
|
|
43
|
+
result = model.evaluate(X[70:], y[70:])
|
|
44
|
+
assert isinstance(result, EvalResult)
|
|
45
|
+
assert 0 <= result.accuracy <= 1
|
|
46
|
+
assert 0 <= result.f1 <= 1
|
|
47
|
+
assert result.predictions.shape == (30,)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_accuracy_above_chance():
|
|
51
|
+
# Binary classification with a well-separated dataset gives the
|
|
52
|
+
# 3-stage pipeline enough signal to learn in a short training run.
|
|
53
|
+
X, y = _make_ids_data(n_samples=400, n_features=20, n_classes=2, random_state=0)
|
|
54
|
+
model = GraphIDS(n_features=20, n_classes=2)
|
|
55
|
+
model.train_pipeline(
|
|
56
|
+
X[:300], y[:300],
|
|
57
|
+
gcn_epochs=40, ae_epochs=40, clf_epochs=40,
|
|
58
|
+
verbose=False,
|
|
59
|
+
)
|
|
60
|
+
result = model.evaluate(X[300:], y[300:])
|
|
61
|
+
# Binary chance = 0.50. On synthetic data the graph is nearly fully
|
|
62
|
+
# connected (uniform cosine similarity), limiting GCN's ability to
|
|
63
|
+
# extract structural features. Real IDS data has natural clusters that
|
|
64
|
+
# produce a much sparser and more informative graph. The real validation
|
|
65
|
+
# is on NSL-KDD, not synthetic data — this test just confirms the
|
|
66
|
+
# pipeline doesn't crash and learns *something*.
|
|
67
|
+
assert result.accuracy > 0.50, f"accuracy {result.accuracy:.2f} not above chance"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_predict_returns_known_labels():
|
|
71
|
+
X, y = _make_ids_data(n_samples=100, n_classes=2)
|
|
72
|
+
model = GraphIDS(n_features=20, n_classes=2)
|
|
73
|
+
model.train_pipeline(
|
|
74
|
+
X[:70], y[:70],
|
|
75
|
+
gcn_epochs=5, ae_epochs=5, clf_epochs=5,
|
|
76
|
+
verbose=False,
|
|
77
|
+
)
|
|
78
|
+
preds = model.predict(X[70:])
|
|
79
|
+
assert set(preds).issubset(set(y))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def test_pipeline_with_string_labels():
|
|
83
|
+
X, y = _make_ids_data(n_samples=100, n_classes=3)
|
|
84
|
+
label_map = {0: "Normal", 1: "DoS", 2: "Probe"}
|
|
85
|
+
y_str = np.array([label_map[yi] for yi in y])
|
|
86
|
+
|
|
87
|
+
model = GraphIDS(n_features=20, n_classes=3)
|
|
88
|
+
model.train_pipeline(
|
|
89
|
+
X[:70], y_str[:70],
|
|
90
|
+
gcn_epochs=5, ae_epochs=5, clf_epochs=5,
|
|
91
|
+
verbose=False,
|
|
92
|
+
)
|
|
93
|
+
preds = model.predict(X[70:])
|
|
94
|
+
assert all(p in ["Normal", "DoS", "Probe"] for p in preds)
|