cesnet-datazoo 0.1.4__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/PKG-INFO +1 -1
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/cesnet_dataset.py +6 -4
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/pytables_data/data_scalers.py +13 -11
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/pytables_data/pytables_dataset.py +2 -4
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo.egg-info/PKG-INFO +1 -1
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/pyproject.toml +1 -1
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/LICENCE +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/README.md +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/__init__.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/config.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/constants.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/__init__.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/datasets.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/datasets_constants.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/loaders.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/metadata/__init__.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/metadata/dataset_metadata.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/metadata/metadata.csv +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/statistics.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/metrics/__init__.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/metrics/classification_report.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/metrics/provider_metrics.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/pytables_data/__init__.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/pytables_data/apps_split.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/pytables_data/indices_setup.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/utils/__init__.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/utils/class_info.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/utils/download.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/utils/fileutils.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/utils/random.py +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo.egg-info/SOURCES.txt +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo.egg-info/dependency_links.txt +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo.egg-info/requires.txt +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo.egg-info/top_level.txt +0 -0
- {cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cesnet-datazoo
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.5
|
4
4
|
Summary: A toolkit for large network traffic datasets
|
5
5
|
Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
6
6
|
Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
@@ -10,6 +10,7 @@ import numpy as np
|
|
10
10
|
import pandas as pd
|
11
11
|
import tables as tb
|
12
12
|
import torch
|
13
|
+
from numpy.lib.recfunctions import repack_fields
|
13
14
|
from sklearn.model_selection import train_test_split
|
14
15
|
from sklearn.preprocessing import LabelEncoder
|
15
16
|
from torch.utils.data import BatchSampler, DataLoader, RandomSampler, Sampler, SequentialSampler
|
@@ -17,7 +18,8 @@ from typing_extensions import assert_never
|
|
17
18
|
|
18
19
|
from cesnet_datazoo.config import AppSelection, DataLoaderOrder, DatasetConfig, ValidationApproach
|
19
20
|
from cesnet_datazoo.constants import (APP_COLUMN, CATEGORY_COLUMN, DATASET_SIZES, INDICES_APP_FIELD,
|
20
|
-
SERVICEMAP_FILE,
|
21
|
+
INDICES_INDEX_FIELD, INDICES_TABLE_FIELD, SERVICEMAP_FILE,
|
22
|
+
UNKNOWN_STR_LABEL)
|
21
23
|
from cesnet_datazoo.datasets.loaders import collate_fn_simple, create_df_from_dataloader
|
22
24
|
from cesnet_datazoo.datasets.metadata.dataset_metadata import DatasetMetadata, load_metadata
|
23
25
|
from cesnet_datazoo.datasets.statistics import compute_dataset_statistics
|
@@ -619,7 +621,7 @@ class CesnetDataset():
|
|
619
621
|
train_dataset = PyTablesDataset(
|
620
622
|
database_path=dataset_config.database_path,
|
621
623
|
tables_paths=dataset_config._get_train_tables_paths(),
|
622
|
-
indices=dataset_indices.train_indices,
|
624
|
+
indices=repack_fields(dataset_indices.train_indices[[INDICES_TABLE_FIELD, INDICES_INDEX_FIELD]]), # type: ignore
|
623
625
|
tables_app_enum=self._tables_app_enum,
|
624
626
|
tables_cat_enum=self._tables_cat_enum,
|
625
627
|
flowstats_features=dataset_config.flowstats_features,
|
@@ -638,7 +640,7 @@ class CesnetDataset():
|
|
638
640
|
val_dataset = PyTablesDataset(
|
639
641
|
database_path=dataset_config.database_path,
|
640
642
|
tables_paths=dataset_config._get_val_tables_paths(),
|
641
|
-
indices=dataset_indices.val_known_indices,
|
643
|
+
indices=repack_fields(dataset_indices.val_known_indices[[INDICES_TABLE_FIELD, INDICES_INDEX_FIELD]]), # type: ignore
|
642
644
|
tables_app_enum=self._tables_app_enum,
|
643
645
|
tables_cat_enum=self._tables_cat_enum,
|
644
646
|
flowstats_features=dataset_config.flowstats_features,
|
@@ -659,7 +661,7 @@ class CesnetDataset():
|
|
659
661
|
test_dataset = PyTablesDataset(
|
660
662
|
database_path=dataset_config.database_path,
|
661
663
|
tables_paths=dataset_config._get_test_tables_paths(),
|
662
|
-
indices=test_combined_indices,
|
664
|
+
indices=repack_fields(test_combined_indices[[INDICES_TABLE_FIELD, INDICES_INDEX_FIELD]]), # type: ignore
|
663
665
|
tables_app_enum=self._tables_app_enum,
|
664
666
|
tables_cat_enum=self._tables_cat_enum,
|
665
667
|
flowstats_features=dataset_config.flowstats_features,
|
@@ -46,18 +46,20 @@ def fit_scalers(dataset_config: DatasetConfig, train_indices: np.ndarray) -> Non
|
|
46
46
|
data_ppi = data_ppi.transpose(0, 2, 1).reshape(-1, ppi_channels)
|
47
47
|
padding_mask = data_ppi[:, DIR_POS] == 0 # Mask of padded packets
|
48
48
|
# Fit IPT scaler
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
49
|
+
if clip_and_scale_ppi_transform.ipt_scaler:
|
50
|
+
train_ipt = data_ppi[:, IPT_POS].clip(max=clip_and_scale_ppi_transform.ipt_max, min=clip_and_scale_ppi_transform.ipt_min)
|
51
|
+
train_ipt[padding_mask] = np.nan # NaNs are ignored in sklearn scalers
|
52
|
+
if isinstance(clip_and_scale_ppi_transform.ipt_scaler, MinMaxScaler):
|
53
|
+
# Let zero be the minimum for minmax scaling
|
54
|
+
train_ipt = np.concatenate((train_ipt, [0]))
|
55
|
+
clip_and_scale_ppi_transform.ipt_scaler.fit(train_ipt.reshape(-1, 1))
|
55
56
|
# Fit packet sizes scaler
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
57
|
+
if clip_and_scale_ppi_transform.psizes_scaler:
|
58
|
+
train_psizes = data_ppi[:, SIZE_POS].clip(max=clip_and_scale_ppi_transform.psizes_max, min=clip_and_scale_ppi_transform.pszies_min)
|
59
|
+
train_psizes[padding_mask] = np.nan
|
60
|
+
if isinstance(clip_and_scale_ppi_transform.psizes_scaler, MinMaxScaler):
|
61
|
+
train_psizes = np.concatenate((train_psizes, [0]))
|
62
|
+
clip_and_scale_ppi_transform.psizes_scaler.fit(train_psizes.reshape(-1, 1))
|
61
63
|
clip_and_scale_ppi_transform.needs_fitting = False
|
62
64
|
json.dump(clip_and_scale_ppi_transform.to_dict(), open(os.path.join(train_data_path, "transforms", "ppi-transform.json"), "w"), indent=4)
|
63
65
|
|
{cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/pytables_data/pytables_dataset.py
RENAMED
@@ -158,11 +158,9 @@ class PyTablesDataset(Dataset):
|
|
158
158
|
for i in range(len(tables)):
|
159
159
|
base_labels[i] = tables[i].read(field=APP_COLUMN)
|
160
160
|
base_indices[i] = np.nonzero(np.isin(base_labels[i], disabled_apps_ids, invert=True))[0]
|
161
|
-
indices = np.
|
161
|
+
indices = np.array(list(zip(
|
162
162
|
np.concatenate([[table_id] * len(base_indices[table_id]) for table_id in tables]),
|
163
|
-
np.concatenate(list(base_indices.values())),
|
164
|
-
np.concatenate(list(base_labels.values()))
|
165
|
-
)).astype(np.int32)
|
163
|
+
np.concatenate(list(base_indices.values())))), dtype=[field for field in INDICES_DTYPE if field[0] in [INDICES_INDEX_FIELD, INDICES_TABLE_FIELD]])
|
166
164
|
self.indices = indices
|
167
165
|
database.close()
|
168
166
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: cesnet-datazoo
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.5
|
4
4
|
Summary: A toolkit for large network traffic datasets
|
5
5
|
Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
6
6
|
Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/datasets/metadata/dataset_metadata.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cesnet_datazoo-0.1.4 → cesnet_datazoo-0.1.5}/cesnet_datazoo/metrics/classification_report.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|