cesnet-datazoo 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -176,7 +176,7 @@ class CesnetDataset():
176
176
  raise ValueError(f"Found {len(available_applications)} applications in the servicemap (omitting background traffic classes), but expected {self.metadata.application_count}. Please report this issue.")
177
177
  self.available_classes = available_applications + self.metadata.background_traffic_classes
178
178
 
179
- def set_dataset_config_and_initialize(self, dataset_config: DatasetConfig, disable_indices_cache: bool = False) -> None:
179
+ def set_dataset_config_and_initialize(self, dataset_config: DatasetConfig, disable_indices_cache: bool = False, silent_warning: bool = False) -> None:
180
180
  """
181
181
  Initialize train, validation, and test sets. Data cannot be accessed before calling this method.
182
182
 
@@ -184,6 +184,11 @@ class CesnetDataset():
184
184
  dataset_config: Desired configuration of the dataset.
185
185
  disable_indices_cache: Whether to disable caching of the dataset indices. This is useful when the dataset is used in many different configurations and you want to save disk space.
186
186
  """
187
+ if self.name.startswith("CESNET-TLS-Year22") and not silent_warning:
188
+ warnings.warn("The CESNET-TLS-Year22 dataset contains traffic from the entire year of 2022. During the dataset collection, in week 10 (7.3.2022 - 13.3.2022), " + \
189
+ "the used flow exporter was updated with new features resulting in a change in the distribution of packet sequence data. " + \
190
+ "This can lead to a decrease in model performance when a model is trained on traffic before week 10 and tested on traffic after week 10. " + \
191
+ "More details can be found in the paper at https://doi.org/10.1038/s41597-024-03927-4. To disable this warning set silent_warning=True.")
187
192
  self.dataset_config = dataset_config
188
193
  self._clear()
189
194
  self._initialize_train_val_test(disable_indices_cache=disable_indices_cache)
@@ -1,4 +1,4 @@
1
1
  Name,Protocol,Published in,Collected in,Collection duration,Available samples,Available dataset sizes,Collection period,Missing dates in collection period,Application count,Background traffic classes,PPI features,Flowstats features,Flowstats features boolean,Packet histograms,TCP features,Other fields,Cite,Zenodo URL,Related papers
2
2
  CESNET-TLS22,TLS,2022,2021,2 weeks,141392195,"XS, S, M, L",4.10.2021 - 17.10.2021,,191,,"IPT, DIR, SIZE","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION",,,"FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV",ID,https://doi.org/10.1016/j.comnet.2022.109467,https://zenodo.org/record/7965515,
3
3
  CESNET-QUIC22,QUIC,2023,2022,4 weeks,153226273,"XS, S, M, L",31.10.2022 - 27.11.2022,,102,"default-background, google-background, facebook-background","IPT, DIR, SIZE","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT",,"ID, SRC_IP, DST_IP, DST_ASN, SRC_PORT, DST_PORT, PROTOCOL, QUIC_VERSION, QUIC_SNI, QUIC_USERAGENT, TIME_FIRST, TIME_LAST",https://doi.org/10.1016/j.dib.2023.108888,https://zenodo.org/record/7963302,https://doi.org/10.23919/TMA58422.2023.10199052
4
- CESNET-TLS-Year22,TLS,2023,2022,1 year,507739073,"XS, S, M, L",1.1.2022 - 31.12.2022,"20220128, 20220129, 20220130, 20221212, 20221213, 20221229, 20221230, 20221231",180,,"IPT, DIR, SIZE, PUSH_FLAG","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_END, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT","FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV","ID, SRC_IP, DST_IP, DST_ASN, DST_PORT, PROTOCOL, TLS_SNI, TLS_JA3, TIME_FIRST, TIME_LAST",https://doi.org/10.1038/s41597-024-03927-4,https://zenodo.org/records/10608607,
4
+ CESNET-TLS-Year22,TLS,2024,2022,1 year,507739073,"XS, S, M, L",1.1.2022 - 31.12.2022,"20220128, 20220129, 20220130, 20221212, 20221213, 20221229, 20221230, 20221231",180,,"IPT, DIR, SIZE, PUSH_FLAG","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_END, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT","FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV","ID, SRC_IP, DST_IP, DST_ASN, DST_PORT, PROTOCOL, TLS_SNI, TLS_JA3, TIME_FIRST, TIME_LAST",https://doi.org/10.1038/s41597-024-03927-4,https://zenodo.org/records/10608607,
@@ -1,42 +1,42 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: cesnet-datazoo
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: A toolkit for large network traffic datasets
5
5
  Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
6
6
  Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
7
- License: BSD-3-Clause
7
+ License-Expression: BSD-3-Clause
8
8
  Project-URL: Homepage, https://github.com/CESNET/cesnet-datazoo
9
9
  Project-URL: Documentation, https://cesnet.github.io/cesnet-datazoo/
10
10
  Project-URL: Bug Tracker, https://github.com/CESNET/cesnet-datazoo/issues
11
11
  Keywords: traffic classification,datasets,machine learning
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Intended Audience :: Science/Research
14
- Classifier: License :: OSI Approved :: BSD License
15
14
  Classifier: Operating System :: OS Independent
16
15
  Requires-Python: >=3.10
17
16
  Description-Content-Type: text/markdown
18
17
  License-File: LICENCE
19
- Requires-Dist: cesnet-models
18
+ Requires-Dist: cesnet_models
20
19
  Requires-Dist: matplotlib
21
- Requires-Dist: numpy <2.0
20
+ Requires-Dist: numpy
22
21
  Requires-Dist: pandas
23
- Requires-Dist: pydantic <=2.8.2,>=2.0
22
+ Requires-Dist: pydantic!=2.10.*,!=2.9.*,<2.12.0,>=2.0
24
23
  Requires-Dist: PyYAML
25
24
  Requires-Dist: requests
26
25
  Requires-Dist: scikit-learn
27
26
  Requires-Dist: seaborn
28
- Requires-Dist: tables <=3.9.2,>=3.8.0
29
- Requires-Dist: torch >=1.10
27
+ Requires-Dist: tables>=3.10.0
28
+ Requires-Dist: torch>=1.10
30
29
  Requires-Dist: tqdm
31
30
  Provides-Extra: dev
32
- Requires-Dist: build ; extra == 'dev'
33
- Requires-Dist: mkdocs-autorefs ; extra == 'dev'
34
- Requires-Dist: mkdocs-material-extensions ; extra == 'dev'
35
- Requires-Dist: mkdocs-material ; extra == 'dev'
36
- Requires-Dist: mkdocs ; extra == 'dev'
37
- Requires-Dist: mkdocstrings-python ; extra == 'dev'
38
- Requires-Dist: mkdocstrings ; extra == 'dev'
39
- Requires-Dist: twine ; extra == 'dev'
31
+ Requires-Dist: build; extra == "dev"
32
+ Requires-Dist: mkdocs-autorefs; extra == "dev"
33
+ Requires-Dist: mkdocs-material-extensions; extra == "dev"
34
+ Requires-Dist: mkdocs-material; extra == "dev"
35
+ Requires-Dist: mkdocs; extra == "dev"
36
+ Requires-Dist: mkdocstrings-python; extra == "dev"
37
+ Requires-Dist: mkdocstrings; extra == "dev"
38
+ Requires-Dist: twine; extra == "dev"
39
+ Dynamic: license-file
40
40
 
41
41
  <p align="center">
42
42
  <img src="https://raw.githubusercontent.com/CESNET/cesnet-datazoo/main/docs/images/datazoo.svg" width="450">
@@ -72,7 +72,7 @@ The `cesnet-datazoo` package currently provides three datasets with details in t
72
72
  | Name | CESNET-TLS22 | CESNET-QUIC22 | CESNET-TLS-Year22 |
73
73
  | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
74
74
  | _Protocol_ | TLS | QUIC | TLS |
75
- | _Published in_ | 2022 | 2023 | 2023 |
75
+ | _Published in_ | 2022 | 2023 | 2024 |
76
76
  | _Collection duration_ | 2 weeks | 4 weeks | 1 year |
77
77
  | _Collection period_ | 4.10.2021 - 17.10.2021 | 31.10.2022 - 27.11.2022 | 1.1.2022 - 31.12.2022 |
78
78
  | _Application count_ | 191 | 102 | 180 |
@@ -2,14 +2,14 @@ cesnet_datazoo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  cesnet_datazoo/config.py,sha256=wkpD_OL3gRXX2t0WDfDMsBD2A3vEdAjSm4yXhzsR8T0,38536
3
3
  cesnet_datazoo/constants.py,sha256=6GhcIyjVnWYrVnxRgTlGuiWRtvwZL1KqyzMJS26ge2E,1481
4
4
  cesnet_datazoo/datasets/__init__.py,sha256=8ziQ3EUzUh5fMfWWXwk0cqYk0lOUNU7zbi0Gom3bLnI,443
5
- cesnet_datazoo/datasets/cesnet_dataset.py,sha256=14uKWWSGIkH3GM_BDUSYyCIoOh1L-I4bH0zu0m3DkkQ,46988
5
+ cesnet_datazoo/datasets/cesnet_dataset.py,sha256=V2rBE4Mh2gaMw-NjQ4xHp6ViuuvCdEzB-ymX3CYyfkc,47762
6
6
  cesnet_datazoo/datasets/datasets.py,sha256=Bn4SU1k5og6AsUlnPapFPeu4uGlpRH-IaOSafz0ZT2k,3617
7
7
  cesnet_datazoo/datasets/datasets_constants.py,sha256=1P54Ns8wCQMemdKNe8OH7cVUfkxs3vL29ugSmOLXceI,29154
8
8
  cesnet_datazoo/datasets/loaders.py,sha256=9KgRY-Y8CcgtXbgqWpAaG7gyOAsSf278w7b1eHwTSyE,1854
9
9
  cesnet_datazoo/datasets/statistics.py,sha256=DfeCq-o7ML8u2Wg_AlAaarEBZ5oulCJz4S7enGswXRg,15137
10
10
  cesnet_datazoo/datasets/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  cesnet_datazoo/datasets/metadata/dataset_metadata.py,sha256=o0rHXZ9S5NjjboGiXRQkBoZ7kYKlweQMRsMSAQm1EPE,1623
12
- cesnet_datazoo/datasets/metadata/metadata.csv,sha256=jD-2VyL5yfmDKkizQ_tQxsqm28M4qiCHfwiil4kCuno,2252
12
+ cesnet_datazoo/datasets/metadata/metadata.csv,sha256=S2PSEYDTIThoTpmTjjTDuVGfeJN4AwLe4oL8KuTu5G4,2252
13
13
  cesnet_datazoo/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  cesnet_datazoo/metrics/classification_report.py,sha256=kqVW35uEctTiWpMqxhWzOmmDkV4p3yEFLMRqLn_R6AU,3981
15
15
  cesnet_datazoo/metrics/provider_metrics.py,sha256=zoX0ps8BzEs3ml70g9dWWeLPflNAKUGYOEFYqdBbNY4,1374
@@ -23,8 +23,8 @@ cesnet_datazoo/utils/class_info.py,sha256=H5UgyRqXIepBJmkLQ1gAIXV4owKSoIllguRiqF
23
23
  cesnet_datazoo/utils/download.py,sha256=hG5V1ZYZGtqCzlVV76NMgOZkSKOywdOFiq9Lagkgego,1441
24
24
  cesnet_datazoo/utils/fileutils.py,sha256=XA_VWDuTiCXnoOgHPUzsmbnLFgrlxOo5cvUY_OBJUR8,642
25
25
  cesnet_datazoo/utils/random.py,sha256=Dqgm_T25ljbew-OJozK90PsiXKnd4Kw6lcUexxF6vIc,575
26
- cesnet_datazoo-0.1.9.dist-info/LICENCE,sha256=69Wc69APiM1YKrFOIipG7jjU2lk89WQuO_U0AXKU8KE,1541
27
- cesnet_datazoo-0.1.9.dist-info/METADATA,sha256=tulO0ucEAPIvMEXqBTmlCuqDWU5-0XsYat7WDrQazkQ,12606
28
- cesnet_datazoo-0.1.9.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
29
- cesnet_datazoo-0.1.9.dist-info/top_level.txt,sha256=bu1Z8zaI_1Id_ZaYyvJnxIBa87OSrdlZ8J2OBMggK5o,15
30
- cesnet_datazoo-0.1.9.dist-info/RECORD,,
26
+ cesnet_datazoo-0.1.11.dist-info/licenses/LICENCE,sha256=69Wc69APiM1YKrFOIipG7jjU2lk89WQuO_U0AXKU8KE,1541
27
+ cesnet_datazoo-0.1.11.dist-info/METADATA,sha256=NBfTvdZUASh2-Et2p9nExhveoHkVaWtvZSLbojZqiDw,12583
28
+ cesnet_datazoo-0.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ cesnet_datazoo-0.1.11.dist-info/top_level.txt,sha256=bu1Z8zaI_1Id_ZaYyvJnxIBa87OSrdlZ8J2OBMggK5o,15
30
+ cesnet_datazoo-0.1.11.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.2.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5