cesnet-datazoo 0.1.9__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/PKG-INFO +8 -8
  2. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/README.md +1 -1
  3. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/datasets/cesnet_dataset.py +6 -1
  4. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/datasets/metadata/metadata.csv +1 -1
  5. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo.egg-info/PKG-INFO +8 -8
  6. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo.egg-info/requires.txt +3 -3
  7. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/pyproject.toml +5 -6
  8. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/LICENCE +0 -0
  9. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/__init__.py +0 -0
  10. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/config.py +0 -0
  11. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/constants.py +0 -0
  12. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/datasets/__init__.py +0 -0
  13. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/datasets/datasets.py +0 -0
  14. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/datasets/datasets_constants.py +0 -0
  15. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/datasets/loaders.py +0 -0
  16. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/datasets/metadata/__init__.py +0 -0
  17. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/datasets/metadata/dataset_metadata.py +0 -0
  18. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/datasets/statistics.py +0 -0
  19. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/metrics/__init__.py +0 -0
  20. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/metrics/classification_report.py +0 -0
  21. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/metrics/provider_metrics.py +0 -0
  22. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/pytables_data/__init__.py +0 -0
  23. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/pytables_data/apps_split.py +0 -0
  24. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/pytables_data/data_scalers.py +0 -0
  25. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/pytables_data/indices_setup.py +0 -0
  26. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/pytables_data/pytables_dataset.py +0 -0
  27. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/utils/__init__.py +0 -0
  28. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/utils/class_info.py +0 -0
  29. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/utils/download.py +0 -0
  30. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/utils/fileutils.py +0 -0
  31. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo/utils/random.py +0 -0
  32. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo.egg-info/SOURCES.txt +0 -0
  33. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo.egg-info/dependency_links.txt +0 -0
  34. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/cesnet_datazoo.egg-info/top_level.txt +0 -0
  35. {cesnet_datazoo-0.1.9 → cesnet_datazoo-0.1.11}/setup.cfg +0 -0
@@ -1,31 +1,30 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: cesnet-datazoo
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: A toolkit for large network traffic datasets
5
5
  Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
6
6
  Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
7
- License: BSD-3-Clause
7
+ License-Expression: BSD-3-Clause
8
8
  Project-URL: Homepage, https://github.com/CESNET/cesnet-datazoo
9
9
  Project-URL: Documentation, https://cesnet.github.io/cesnet-datazoo/
10
10
  Project-URL: Bug Tracker, https://github.com/CESNET/cesnet-datazoo/issues
11
11
  Keywords: traffic classification,datasets,machine learning
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Intended Audience :: Science/Research
14
- Classifier: License :: OSI Approved :: BSD License
15
14
  Classifier: Operating System :: OS Independent
16
15
  Requires-Python: >=3.10
17
16
  Description-Content-Type: text/markdown
18
17
  License-File: LICENCE
19
18
  Requires-Dist: cesnet_models
20
19
  Requires-Dist: matplotlib
21
- Requires-Dist: numpy<2.0
20
+ Requires-Dist: numpy
22
21
  Requires-Dist: pandas
23
- Requires-Dist: pydantic<=2.8.2,>=2.0
22
+ Requires-Dist: pydantic!=2.10.*,!=2.9.*,<2.12.0,>=2.0
24
23
  Requires-Dist: PyYAML
25
24
  Requires-Dist: requests
26
25
  Requires-Dist: scikit-learn
27
26
  Requires-Dist: seaborn
28
- Requires-Dist: tables<=3.9.2,>=3.8.0
27
+ Requires-Dist: tables>=3.10.0
29
28
  Requires-Dist: torch>=1.10
30
29
  Requires-Dist: tqdm
31
30
  Provides-Extra: dev
@@ -37,6 +36,7 @@ Requires-Dist: mkdocs; extra == "dev"
37
36
  Requires-Dist: mkdocstrings-python; extra == "dev"
38
37
  Requires-Dist: mkdocstrings; extra == "dev"
39
38
  Requires-Dist: twine; extra == "dev"
39
+ Dynamic: license-file
40
40
 
41
41
  <p align="center">
42
42
  <img src="https://raw.githubusercontent.com/CESNET/cesnet-datazoo/main/docs/images/datazoo.svg" width="450">
@@ -72,7 +72,7 @@ The `cesnet-datazoo` package currently provides three datasets with details in t
72
72
  | Name | CESNET-TLS22 | CESNET-QUIC22 | CESNET-TLS-Year22 |
73
73
  | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
74
74
  | _Protocol_ | TLS | QUIC | TLS |
75
- | _Published in_ | 2022 | 2023 | 2023 |
75
+ | _Published in_ | 2022 | 2023 | 2024 |
76
76
  | _Collection duration_ | 2 weeks | 4 weeks | 1 year |
77
77
  | _Collection period_ | 4.10.2021 - 17.10.2021 | 31.10.2022 - 27.11.2022 | 1.1.2022 - 31.12.2022 |
78
78
  | _Application count_ | 191 | 102 | 180 |
@@ -32,7 +32,7 @@ The `cesnet-datazoo` package currently provides three datasets with details in t
32
32
  | Name | CESNET-TLS22 | CESNET-QUIC22 | CESNET-TLS-Year22 |
33
33
  | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
34
34
  | _Protocol_ | TLS | QUIC | TLS |
35
- | _Published in_ | 2022 | 2023 | 2023 |
35
+ | _Published in_ | 2022 | 2023 | 2024 |
36
36
  | _Collection duration_ | 2 weeks | 4 weeks | 1 year |
37
37
  | _Collection period_ | 4.10.2021 - 17.10.2021 | 31.10.2022 - 27.11.2022 | 1.1.2022 - 31.12.2022 |
38
38
  | _Application count_ | 191 | 102 | 180 |
@@ -176,7 +176,7 @@ class CesnetDataset():
176
176
  raise ValueError(f"Found {len(available_applications)} applications in the servicemap (omitting background traffic classes), but expected {self.metadata.application_count}. Please report this issue.")
177
177
  self.available_classes = available_applications + self.metadata.background_traffic_classes
178
178
 
179
- def set_dataset_config_and_initialize(self, dataset_config: DatasetConfig, disable_indices_cache: bool = False) -> None:
179
+ def set_dataset_config_and_initialize(self, dataset_config: DatasetConfig, disable_indices_cache: bool = False, silent_warning: bool = False) -> None:
180
180
  """
181
181
  Initialize train, validation, and test sets. Data cannot be accessed before calling this method.
182
182
 
@@ -184,6 +184,11 @@ class CesnetDataset():
184
184
  dataset_config: Desired configuration of the dataset.
185
185
  disable_indices_cache: Whether to disable caching of the dataset indices. This is useful when the dataset is used in many different configurations and you want to save disk space.
186
186
  """
187
+ if self.name.startswith("CESNET-TLS-Year22") and not silent_warning:
188
+ warnings.warn("The CESNET-TLS-Year22 dataset contains traffic from the entire year of 2022. During the dataset collection, in week 10 (7.3.2022 - 13.3.2022), " + \
189
+ "the used flow exporter was updated with new features resulting in a change in the distribution of packet sequence data. " + \
190
+ "This can lead to a decrease in model performance when a model is trained on traffic before week 10 and tested on traffic after week 10. " + \
191
+ "More details can be found in the paper at https://doi.org/10.1038/s41597-024-03927-4. To disable this warning set silent_warning=True.")
187
192
  self.dataset_config = dataset_config
188
193
  self._clear()
189
194
  self._initialize_train_val_test(disable_indices_cache=disable_indices_cache)
@@ -1,4 +1,4 @@
1
1
  Name,Protocol,Published in,Collected in,Collection duration,Available samples,Available dataset sizes,Collection period,Missing dates in collection period,Application count,Background traffic classes,PPI features,Flowstats features,Flowstats features boolean,Packet histograms,TCP features,Other fields,Cite,Zenodo URL,Related papers
2
2
  CESNET-TLS22,TLS,2022,2021,2 weeks,141392195,"XS, S, M, L",4.10.2021 - 17.10.2021,,191,,"IPT, DIR, SIZE","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION",,,"FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV",ID,https://doi.org/10.1016/j.comnet.2022.109467,https://zenodo.org/record/7965515,
3
3
  CESNET-QUIC22,QUIC,2023,2022,4 weeks,153226273,"XS, S, M, L",31.10.2022 - 27.11.2022,,102,"default-background, google-background, facebook-background","IPT, DIR, SIZE","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT",,"ID, SRC_IP, DST_IP, DST_ASN, SRC_PORT, DST_PORT, PROTOCOL, QUIC_VERSION, QUIC_SNI, QUIC_USERAGENT, TIME_FIRST, TIME_LAST",https://doi.org/10.1016/j.dib.2023.108888,https://zenodo.org/record/7963302,https://doi.org/10.23919/TMA58422.2023.10199052
4
- CESNET-TLS-Year22,TLS,2023,2022,1 year,507739073,"XS, S, M, L",1.1.2022 - 31.12.2022,"20220128, 20220129, 20220130, 20221212, 20221213, 20221229, 20221230, 20221231",180,,"IPT, DIR, SIZE, PUSH_FLAG","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_END, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT","FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV","ID, SRC_IP, DST_IP, DST_ASN, DST_PORT, PROTOCOL, TLS_SNI, TLS_JA3, TIME_FIRST, TIME_LAST",https://doi.org/10.1038/s41597-024-03927-4,https://zenodo.org/records/10608607,
4
+ CESNET-TLS-Year22,TLS,2024,2022,1 year,507739073,"XS, S, M, L",1.1.2022 - 31.12.2022,"20220128, 20220129, 20220130, 20221212, 20221213, 20221229, 20221230, 20221231",180,,"IPT, DIR, SIZE, PUSH_FLAG","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_END, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT","FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV","ID, SRC_IP, DST_IP, DST_ASN, DST_PORT, PROTOCOL, TLS_SNI, TLS_JA3, TIME_FIRST, TIME_LAST",https://doi.org/10.1038/s41597-024-03927-4,https://zenodo.org/records/10608607,
@@ -1,31 +1,30 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: cesnet-datazoo
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: A toolkit for large network traffic datasets
5
5
  Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
6
6
  Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
7
- License: BSD-3-Clause
7
+ License-Expression: BSD-3-Clause
8
8
  Project-URL: Homepage, https://github.com/CESNET/cesnet-datazoo
9
9
  Project-URL: Documentation, https://cesnet.github.io/cesnet-datazoo/
10
10
  Project-URL: Bug Tracker, https://github.com/CESNET/cesnet-datazoo/issues
11
11
  Keywords: traffic classification,datasets,machine learning
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Intended Audience :: Science/Research
14
- Classifier: License :: OSI Approved :: BSD License
15
14
  Classifier: Operating System :: OS Independent
16
15
  Requires-Python: >=3.10
17
16
  Description-Content-Type: text/markdown
18
17
  License-File: LICENCE
19
18
  Requires-Dist: cesnet_models
20
19
  Requires-Dist: matplotlib
21
- Requires-Dist: numpy<2.0
20
+ Requires-Dist: numpy
22
21
  Requires-Dist: pandas
23
- Requires-Dist: pydantic<=2.8.2,>=2.0
22
+ Requires-Dist: pydantic!=2.10.*,!=2.9.*,<2.12.0,>=2.0
24
23
  Requires-Dist: PyYAML
25
24
  Requires-Dist: requests
26
25
  Requires-Dist: scikit-learn
27
26
  Requires-Dist: seaborn
28
- Requires-Dist: tables<=3.9.2,>=3.8.0
27
+ Requires-Dist: tables>=3.10.0
29
28
  Requires-Dist: torch>=1.10
30
29
  Requires-Dist: tqdm
31
30
  Provides-Extra: dev
@@ -37,6 +36,7 @@ Requires-Dist: mkdocs; extra == "dev"
37
36
  Requires-Dist: mkdocstrings-python; extra == "dev"
38
37
  Requires-Dist: mkdocstrings; extra == "dev"
39
38
  Requires-Dist: twine; extra == "dev"
39
+ Dynamic: license-file
40
40
 
41
41
  <p align="center">
42
42
  <img src="https://raw.githubusercontent.com/CESNET/cesnet-datazoo/main/docs/images/datazoo.svg" width="450">
@@ -72,7 +72,7 @@ The `cesnet-datazoo` package currently provides three datasets with details in t
72
72
  | Name | CESNET-TLS22 | CESNET-QUIC22 | CESNET-TLS-Year22 |
73
73
  | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
74
74
  | _Protocol_ | TLS | QUIC | TLS |
75
- | _Published in_ | 2022 | 2023 | 2023 |
75
+ | _Published in_ | 2022 | 2023 | 2024 |
76
76
  | _Collection duration_ | 2 weeks | 4 weeks | 1 year |
77
77
  | _Collection period_ | 4.10.2021 - 17.10.2021 | 31.10.2022 - 27.11.2022 | 1.1.2022 - 31.12.2022 |
78
78
  | _Application count_ | 191 | 102 | 180 |
@@ -1,13 +1,13 @@
1
1
  cesnet_models
2
2
  matplotlib
3
- numpy<2.0
3
+ numpy
4
4
  pandas
5
- pydantic<=2.8.2,>=2.0
5
+ pydantic!=2.10.*,!=2.9.*,<2.12.0,>=2.0
6
6
  PyYAML
7
7
  requests
8
8
  scikit-learn
9
9
  seaborn
10
- tables<=3.9.2,>=3.8.0
10
+ tables>=3.10.0
11
11
  torch>=1.10
12
12
  tqdm
13
13
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "cesnet-datazoo"
7
- version = "0.1.9"
7
+ version = "0.1.11"
8
8
  authors = [
9
9
  {name = "Jan Luxemburk", email = "luxemburk@cesnet.cz"},
10
10
  {name = "Karel Hynek", email = "hynekkar@cesnet.cz"},
@@ -15,7 +15,7 @@ maintainers = [
15
15
  ]
16
16
  description = "A toolkit for large network traffic datasets"
17
17
  readme = "README.md"
18
- license = {text = "BSD-3-Clause"}
18
+ license = "BSD-3-Clause"
19
19
  keywords = [
20
20
  "traffic classification",
21
21
  "datasets",
@@ -24,21 +24,20 @@ keywords = [
24
24
  classifiers = [
25
25
  "Programming Language :: Python :: 3.10",
26
26
  "Intended Audience :: Science/Research",
27
- "License :: OSI Approved :: BSD License",
28
27
  "Operating System :: OS Independent",
29
28
  ]
30
29
  requires-python = ">=3.10"
31
30
  dependencies = [
32
31
  "cesnet_models",
33
32
  "matplotlib",
34
- "numpy<2.0",
33
+ "numpy",
35
34
  "pandas",
36
- "pydantic>=2.0,<=2.8.2",
35
+ "pydantic >=2.0, !=2.9.*, !=2.10.*, <2.12.0",
37
36
  "PyYAML",
38
37
  "requests",
39
38
  "scikit-learn",
40
39
  "seaborn",
41
- "tables>=3.8.0,<=3.9.2",
40
+ "tables >=3.10.0",
42
41
  "torch>=1.10",
43
42
  "tqdm",
44
43
  ]
File without changes