cesnet-datazoo 0.1.13__tar.gz → 0.1.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/PKG-INFO +1 -1
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/config.py +2 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/cesnet_dataset.py +7 -4
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/pytables_dataset.py +2 -1
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/PKG-INFO +1 -1
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/pyproject.toml +1 -1
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/LICENCE +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/README.md +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/__init__.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/constants.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/__init__.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/datasets.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/datasets_constants.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/loaders.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/metadata/__init__.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/metadata/dataset_metadata.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/metadata/metadata.csv +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/statistics.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/metrics/__init__.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/metrics/classification_report.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/metrics/provider_metrics.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/__init__.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/apps_split.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/data_scalers.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/indices_setup.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/__init__.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/class_info.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/download.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/fileutils.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/random.py +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/SOURCES.txt +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/dependency_links.txt +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/requires.txt +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/top_level.txt +0 -0
- {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cesnet-datazoo
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.14
|
4
4
|
Summary: A toolkit for large network traffic datasets
|
5
5
|
Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
6
6
|
Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
@@ -178,6 +178,7 @@ class DatasetConfig():
|
|
178
178
|
|
179
179
|
return_other_fields: Whether to return [auxiliary fields][other-fields], such as communicating hosts, flow times, and more fields extracted from the ClientHello message. `Default: False`
|
180
180
|
return_tensors: Use for returning `torch.Tensor` from dataloaders. Dataframes are not available when this option is used. `Default: False`
|
181
|
+
disable_label_encoding: Whether to disable label encoding and return application names as strings. The original labels of configured unknown classes are preserved. `Default: False`
|
181
182
|
use_packet_histograms: Whether to use packet histogram features, if available in the dataset. `Default: True`
|
182
183
|
use_tcp_features: Whether to use TCP features, if available in the dataset. `Default: True`
|
183
184
|
use_push_flags: Whether to use push flags in packet sequences, if available in the dataset. `Default: False`
|
@@ -255,6 +256,7 @@ class DatasetConfig():
|
|
255
256
|
|
256
257
|
return_other_fields: bool = False
|
257
258
|
return_tensors: bool = False
|
259
|
+
disable_label_encoding: bool = False
|
258
260
|
use_packet_histograms: bool = False
|
259
261
|
use_tcp_features: bool = False
|
260
262
|
use_push_flags: bool = False
|
@@ -619,7 +619,10 @@ class CesnetDataset():
|
|
619
619
|
encoder = LabelEncoder().fit(known_apps)
|
620
620
|
encoder.classes_ = np.append(encoder.classes_, UNKNOWN_STR_LABEL)
|
621
621
|
class_info = create_class_info(servicemap=servicemap, encoder=encoder, known_apps=known_apps, unknown_apps=unknown_apps)
|
622
|
-
|
622
|
+
if dataset_config.disable_label_encoding:
|
623
|
+
label_encoder_fn = None
|
624
|
+
else:
|
625
|
+
label_encoder_fn = partial(_encode_labels_with_unknown, encoder=encoder, class_info=class_info)
|
623
626
|
# Create train, validation, and test datasets
|
624
627
|
train_dataset = val_dataset = test_dataset = None
|
625
628
|
if dataset_config.need_train_set:
|
@@ -638,7 +641,7 @@ class CesnetDataset():
|
|
638
641
|
ppi_transform=dataset_config.ppi_transform,
|
639
642
|
flowstats_transform=dataset_config.flowstats_transform,
|
640
643
|
flowstats_phist_transform=dataset_config.flowstats_phist_transform,
|
641
|
-
target_transform=
|
644
|
+
target_transform=label_encoder_fn,
|
642
645
|
return_tensors=dataset_config.return_tensors,)
|
643
646
|
if dataset_config.need_val_set:
|
644
647
|
assert val_data_path is not None
|
@@ -657,7 +660,7 @@ class CesnetDataset():
|
|
657
660
|
ppi_transform=dataset_config.ppi_transform,
|
658
661
|
flowstats_transform=dataset_config.flowstats_transform,
|
659
662
|
flowstats_phist_transform=dataset_config.flowstats_phist_transform,
|
660
|
-
target_transform=
|
663
|
+
target_transform=label_encoder_fn,
|
661
664
|
return_tensors=dataset_config.return_tensors,
|
662
665
|
preload=dataset_config.preload_val,
|
663
666
|
preload_blob=os.path.join(val_data_path, "preload", f"val_dataset-{dataset_config.val_known_size}.npz"),)
|
@@ -678,7 +681,7 @@ class CesnetDataset():
|
|
678
681
|
ppi_transform=dataset_config.ppi_transform,
|
679
682
|
flowstats_transform=dataset_config.flowstats_transform,
|
680
683
|
flowstats_phist_transform=dataset_config.flowstats_phist_transform,
|
681
|
-
target_transform=
|
684
|
+
target_transform=label_encoder_fn,
|
682
685
|
return_tensors=dataset_config.return_tensors,
|
683
686
|
preload=dataset_config.preload_test,
|
684
687
|
preload_blob=os.path.join(test_data_path, "preload", f"test_dataset-{dataset_config.test_known_size}-{dataset_config.test_unknown_size}.npz"),)
|
{cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/pytables_dataset.py
RENAMED
@@ -1,6 +1,7 @@
|
|
1
1
|
import atexit
|
2
2
|
import logging
|
3
3
|
import os
|
4
|
+
import sys
|
4
5
|
import time
|
5
6
|
import warnings
|
6
7
|
from datetime import datetime
|
@@ -198,7 +199,7 @@ def init_train_indices(train_data_params: TrainDataParams, database_path: str, t
|
|
198
199
|
if train_data_params.min_train_samples_check == MinTrainSamplesCheck.WARN_AND_EXIT:
|
199
200
|
warnings.warn(f"Found applications with less than {train_data_params.min_train_samples_per_app} train samples: {min_samples_apps_names}. " +
|
200
201
|
"To disable these applications, add them to config.disabled_apps or set config.min_train_samples_check to disable-apps. To turn off this check, set config.min_train_samples_per_app to zero. Exiting")
|
201
|
-
exit()
|
202
|
+
sys.exit()
|
202
203
|
elif train_data_params.min_train_samples_check == MinTrainSamplesCheck.DISABLE_APPS:
|
203
204
|
log.info(f"Found applications with less than {train_data_params.min_train_samples_per_app} train samples: {min_samples_apps_names}. " +
|
204
205
|
"Disabling these applications")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cesnet-datazoo
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.14
|
4
4
|
Summary: A toolkit for large network traffic datasets
|
5
5
|
Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
6
6
|
Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/datasets_constants.py
RENAMED
File without changes
|
File without changes
|
{cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/metadata/__init__.py
RENAMED
File without changes
|
{cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/metadata/dataset_metadata.py
RENAMED
File without changes
|
{cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/metadata/metadata.csv
RENAMED
File without changes
|
File without changes
|
File without changes
|
{cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/metrics/classification_report.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/data_scalers.py
RENAMED
File without changes
|
{cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/indices_setup.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/dependency_links.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|