cesnet-datazoo 0.1.13__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/PKG-INFO +1 -1
  2. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/config.py +2 -0
  3. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/cesnet_dataset.py +7 -4
  4. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/pytables_dataset.py +2 -1
  5. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/PKG-INFO +1 -1
  6. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/pyproject.toml +1 -1
  7. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/LICENCE +0 -0
  8. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/README.md +0 -0
  9. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/__init__.py +0 -0
  10. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/constants.py +0 -0
  11. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/__init__.py +0 -0
  12. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/datasets.py +0 -0
  13. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/datasets_constants.py +0 -0
  14. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/loaders.py +0 -0
  15. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/metadata/__init__.py +0 -0
  16. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/metadata/dataset_metadata.py +0 -0
  17. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/metadata/metadata.csv +0 -0
  18. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/datasets/statistics.py +0 -0
  19. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/metrics/__init__.py +0 -0
  20. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/metrics/classification_report.py +0 -0
  21. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/metrics/provider_metrics.py +0 -0
  22. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/__init__.py +0 -0
  23. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/apps_split.py +0 -0
  24. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/data_scalers.py +0 -0
  25. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/pytables_data/indices_setup.py +0 -0
  26. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/__init__.py +0 -0
  27. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/class_info.py +0 -0
  28. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/download.py +0 -0
  29. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/fileutils.py +0 -0
  30. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo/utils/random.py +0 -0
  31. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/SOURCES.txt +0 -0
  32. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/dependency_links.txt +0 -0
  33. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/requires.txt +0 -0
  34. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/cesnet_datazoo.egg-info/top_level.txt +0 -0
  35. {cesnet_datazoo-0.1.13 → cesnet_datazoo-0.1.14}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cesnet-datazoo
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: A toolkit for large network traffic datasets
5
5
  Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
6
6
  Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
@@ -178,6 +178,7 @@ class DatasetConfig():
178
178
 
179
179
  return_other_fields: Whether to return [auxiliary fields][other-fields], such as communicating hosts, flow times, and more fields extracted from the ClientHello message. `Default: False`
180
180
  return_tensors: Use for returning `torch.Tensor` from dataloaders. Dataframes are not available when this option is used. `Default: False`
181
+ disable_label_encoding: Whether to disable label encoding and return application names as strings. The original labels of configured unknown classes are preserved. `Default: False`
181
182
  use_packet_histograms: Whether to use packet histogram features, if available in the dataset. `Default: True`
182
183
  use_tcp_features: Whether to use TCP features, if available in the dataset. `Default: True`
183
184
  use_push_flags: Whether to use push flags in packet sequences, if available in the dataset. `Default: False`
@@ -255,6 +256,7 @@ class DatasetConfig():
255
256
 
256
257
  return_other_fields: bool = False
257
258
  return_tensors: bool = False
259
+ disable_label_encoding: bool = False
258
260
  use_packet_histograms: bool = False
259
261
  use_tcp_features: bool = False
260
262
  use_push_flags: bool = False
@@ -619,7 +619,10 @@ class CesnetDataset():
619
619
  encoder = LabelEncoder().fit(known_apps)
620
620
  encoder.classes_ = np.append(encoder.classes_, UNKNOWN_STR_LABEL)
621
621
  class_info = create_class_info(servicemap=servicemap, encoder=encoder, known_apps=known_apps, unknown_apps=unknown_apps)
622
- encode_labels_with_unknown_fn = partial(_encode_labels_with_unknown, encoder=encoder, class_info=class_info)
622
+ if dataset_config.disable_label_encoding:
623
+ label_encoder_fn = None
624
+ else:
625
+ label_encoder_fn = partial(_encode_labels_with_unknown, encoder=encoder, class_info=class_info)
623
626
  # Create train, validation, and test datasets
624
627
  train_dataset = val_dataset = test_dataset = None
625
628
  if dataset_config.need_train_set:
@@ -638,7 +641,7 @@ class CesnetDataset():
638
641
  ppi_transform=dataset_config.ppi_transform,
639
642
  flowstats_transform=dataset_config.flowstats_transform,
640
643
  flowstats_phist_transform=dataset_config.flowstats_phist_transform,
641
- target_transform=encode_labels_with_unknown_fn,
644
+ target_transform=label_encoder_fn,
642
645
  return_tensors=dataset_config.return_tensors,)
643
646
  if dataset_config.need_val_set:
644
647
  assert val_data_path is not None
@@ -657,7 +660,7 @@ class CesnetDataset():
657
660
  ppi_transform=dataset_config.ppi_transform,
658
661
  flowstats_transform=dataset_config.flowstats_transform,
659
662
  flowstats_phist_transform=dataset_config.flowstats_phist_transform,
660
- target_transform=encode_labels_with_unknown_fn,
663
+ target_transform=label_encoder_fn,
661
664
  return_tensors=dataset_config.return_tensors,
662
665
  preload=dataset_config.preload_val,
663
666
  preload_blob=os.path.join(val_data_path, "preload", f"val_dataset-{dataset_config.val_known_size}.npz"),)
@@ -678,7 +681,7 @@ class CesnetDataset():
678
681
  ppi_transform=dataset_config.ppi_transform,
679
682
  flowstats_transform=dataset_config.flowstats_transform,
680
683
  flowstats_phist_transform=dataset_config.flowstats_phist_transform,
681
- target_transform=encode_labels_with_unknown_fn,
684
+ target_transform=label_encoder_fn,
682
685
  return_tensors=dataset_config.return_tensors,
683
686
  preload=dataset_config.preload_test,
684
687
  preload_blob=os.path.join(test_data_path, "preload", f"test_dataset-{dataset_config.test_known_size}-{dataset_config.test_unknown_size}.npz"),)
@@ -1,6 +1,7 @@
1
1
  import atexit
2
2
  import logging
3
3
  import os
4
+ import sys
4
5
  import time
5
6
  import warnings
6
7
  from datetime import datetime
@@ -198,7 +199,7 @@ def init_train_indices(train_data_params: TrainDataParams, database_path: str, t
198
199
  if train_data_params.min_train_samples_check == MinTrainSamplesCheck.WARN_AND_EXIT:
199
200
  warnings.warn(f"Found applications with less than {train_data_params.min_train_samples_per_app} train samples: {min_samples_apps_names}. " +
200
201
  "To disable these applications, add them to config.disabled_apps or set config.min_train_samples_check to disable-apps. To turn off this check, set config.min_train_samples_per_app to zero. Exiting")
201
- exit()
202
+ sys.exit()
202
203
  elif train_data_params.min_train_samples_check == MinTrainSamplesCheck.DISABLE_APPS:
203
204
  log.info(f"Found applications with less than {train_data_params.min_train_samples_per_app} train samples: {min_samples_apps_names}. " +
204
205
  "Disabling these applications")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cesnet-datazoo
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: A toolkit for large network traffic datasets
5
5
  Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
6
6
  Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "cesnet-datazoo"
7
- version = "0.1.13"
7
+ version = "0.1.14"
8
8
  authors = [
9
9
  {name = "Jan Luxemburk", email = "luxemburk@cesnet.cz"},
10
10
  {name = "Karel Hynek", email = "hynekkar@cesnet.cz"},
File without changes