cesnet-datazoo 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cesnet_datazoo/datasets/cesnet_dataset.py +6 -1
- cesnet_datazoo/datasets/metadata/metadata.csv +1 -1
- {cesnet_datazoo-0.1.9.dist-info → cesnet_datazoo-0.1.11.dist-info}/METADATA +18 -18
- {cesnet_datazoo-0.1.9.dist-info → cesnet_datazoo-0.1.11.dist-info}/RECORD +7 -7
- {cesnet_datazoo-0.1.9.dist-info → cesnet_datazoo-0.1.11.dist-info}/WHEEL +1 -1
- {cesnet_datazoo-0.1.9.dist-info → cesnet_datazoo-0.1.11.dist-info/licenses}/LICENCE +0 -0
- {cesnet_datazoo-0.1.9.dist-info → cesnet_datazoo-0.1.11.dist-info}/top_level.txt +0 -0
@@ -176,7 +176,7 @@ class CesnetDataset():
|
|
176
176
|
raise ValueError(f"Found {len(available_applications)} applications in the servicemap (omitting background traffic classes), but expected {self.metadata.application_count}. Please report this issue.")
|
177
177
|
self.available_classes = available_applications + self.metadata.background_traffic_classes
|
178
178
|
|
179
|
-
def set_dataset_config_and_initialize(self, dataset_config: DatasetConfig, disable_indices_cache: bool = False) -> None:
|
179
|
+
def set_dataset_config_and_initialize(self, dataset_config: DatasetConfig, disable_indices_cache: bool = False, silent_warning: bool = False) -> None:
|
180
180
|
"""
|
181
181
|
Initialize train, validation, and test sets. Data cannot be accessed before calling this method.
|
182
182
|
|
@@ -184,6 +184,11 @@ class CesnetDataset():
|
|
184
184
|
dataset_config: Desired configuration of the dataset.
|
185
185
|
disable_indices_cache: Whether to disable caching of the dataset indices. This is useful when the dataset is used in many different configurations and you want to save disk space.
|
186
186
|
"""
|
187
|
+
if self.name.startswith("CESNET-TLS-Year22") and not silent_warning:
|
188
|
+
warnings.warn("The CESNET-TLS-Year22 dataset contains traffic from the entire year of 2022. During the dataset collection, in week 10 (7.3.2022 - 13.3.2022), " + \
|
189
|
+
"the used flow exporter was updated with new features resulting in a change in the distribution of packet sequence data. " + \
|
190
|
+
"This can lead to a decrease in model performance when a model is trained on traffic before week 10 and tested on traffic after week 10. " + \
|
191
|
+
"More details can be found in the paper at https://doi.org/10.1038/s41597-024-03927-4. To disable this warning set silent_warning=True.")
|
187
192
|
self.dataset_config = dataset_config
|
188
193
|
self._clear()
|
189
194
|
self._initialize_train_val_test(disable_indices_cache=disable_indices_cache)
|
@@ -1,4 +1,4 @@
|
|
1
1
|
Name,Protocol,Published in,Collected in,Collection duration,Available samples,Available dataset sizes,Collection period,Missing dates in collection period,Application count,Background traffic classes,PPI features,Flowstats features,Flowstats features boolean,Packet histograms,TCP features,Other fields,Cite,Zenodo URL,Related papers
|
2
2
|
CESNET-TLS22,TLS,2022,2021,2 weeks,141392195,"XS, S, M, L",4.10.2021 - 17.10.2021,,191,,"IPT, DIR, SIZE","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION",,,"FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV",ID,https://doi.org/10.1016/j.comnet.2022.109467,https://zenodo.org/record/7965515,
|
3
3
|
CESNET-QUIC22,QUIC,2023,2022,4 weeks,153226273,"XS, S, M, L",31.10.2022 - 27.11.2022,,102,"default-background, google-background, facebook-background","IPT, DIR, SIZE","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT",,"ID, SRC_IP, DST_IP, DST_ASN, SRC_PORT, DST_PORT, PROTOCOL, QUIC_VERSION, QUIC_SNI, QUIC_USERAGENT, TIME_FIRST, TIME_LAST",https://doi.org/10.1016/j.dib.2023.108888,https://zenodo.org/record/7963302,https://doi.org/10.23919/TMA58422.2023.10199052
|
4
|
-
CESNET-TLS-Year22,TLS,
|
4
|
+
CESNET-TLS-Year22,TLS,2024,2022,1 year,507739073,"XS, S, M, L",1.1.2022 - 31.12.2022,"20220128, 20220129, 20220130, 20221212, 20221213, 20221229, 20221230, 20221231",180,,"IPT, DIR, SIZE, PUSH_FLAG","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_END, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT","FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV","ID, SRC_IP, DST_IP, DST_ASN, DST_PORT, PROTOCOL, TLS_SNI, TLS_JA3, TIME_FIRST, TIME_LAST",https://doi.org/10.1038/s41597-024-03927-4,https://zenodo.org/records/10608607,
|
@@ -1,42 +1,42 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: cesnet-datazoo
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.11
|
4
4
|
Summary: A toolkit for large network traffic datasets
|
5
5
|
Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
6
6
|
Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
|
7
|
-
License: BSD-3-Clause
|
7
|
+
License-Expression: BSD-3-Clause
|
8
8
|
Project-URL: Homepage, https://github.com/CESNET/cesnet-datazoo
|
9
9
|
Project-URL: Documentation, https://cesnet.github.io/cesnet-datazoo/
|
10
10
|
Project-URL: Bug Tracker, https://github.com/CESNET/cesnet-datazoo/issues
|
11
11
|
Keywords: traffic classification,datasets,machine learning
|
12
12
|
Classifier: Programming Language :: Python :: 3.10
|
13
13
|
Classifier: Intended Audience :: Science/Research
|
14
|
-
Classifier: License :: OSI Approved :: BSD License
|
15
14
|
Classifier: Operating System :: OS Independent
|
16
15
|
Requires-Python: >=3.10
|
17
16
|
Description-Content-Type: text/markdown
|
18
17
|
License-File: LICENCE
|
19
|
-
Requires-Dist:
|
18
|
+
Requires-Dist: cesnet_models
|
20
19
|
Requires-Dist: matplotlib
|
21
|
-
Requires-Dist: numpy
|
20
|
+
Requires-Dist: numpy
|
22
21
|
Requires-Dist: pandas
|
23
|
-
Requires-Dist: pydantic
|
22
|
+
Requires-Dist: pydantic!=2.10.*,!=2.9.*,<2.12.0,>=2.0
|
24
23
|
Requires-Dist: PyYAML
|
25
24
|
Requires-Dist: requests
|
26
25
|
Requires-Dist: scikit-learn
|
27
26
|
Requires-Dist: seaborn
|
28
|
-
Requires-Dist: tables
|
29
|
-
Requires-Dist: torch
|
27
|
+
Requires-Dist: tables>=3.10.0
|
28
|
+
Requires-Dist: torch>=1.10
|
30
29
|
Requires-Dist: tqdm
|
31
30
|
Provides-Extra: dev
|
32
|
-
Requires-Dist: build
|
33
|
-
Requires-Dist: mkdocs-autorefs
|
34
|
-
Requires-Dist: mkdocs-material-extensions
|
35
|
-
Requires-Dist: mkdocs-material
|
36
|
-
Requires-Dist: mkdocs
|
37
|
-
Requires-Dist: mkdocstrings-python
|
38
|
-
Requires-Dist: mkdocstrings
|
39
|
-
Requires-Dist: twine
|
31
|
+
Requires-Dist: build; extra == "dev"
|
32
|
+
Requires-Dist: mkdocs-autorefs; extra == "dev"
|
33
|
+
Requires-Dist: mkdocs-material-extensions; extra == "dev"
|
34
|
+
Requires-Dist: mkdocs-material; extra == "dev"
|
35
|
+
Requires-Dist: mkdocs; extra == "dev"
|
36
|
+
Requires-Dist: mkdocstrings-python; extra == "dev"
|
37
|
+
Requires-Dist: mkdocstrings; extra == "dev"
|
38
|
+
Requires-Dist: twine; extra == "dev"
|
39
|
+
Dynamic: license-file
|
40
40
|
|
41
41
|
<p align="center">
|
42
42
|
<img src="https://raw.githubusercontent.com/CESNET/cesnet-datazoo/main/docs/images/datazoo.svg" width="450">
|
@@ -72,7 +72,7 @@ The `cesnet-datazoo` package currently provides three datasets with details in t
|
|
72
72
|
| Name | CESNET-TLS22 | CESNET-QUIC22 | CESNET-TLS-Year22 |
|
73
73
|
| ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
74
74
|
| _Protocol_ | TLS | QUIC | TLS |
|
75
|
-
| _Published in_ | 2022 | 2023 |
|
75
|
+
| _Published in_ | 2022 | 2023 | 2024 |
|
76
76
|
| _Collection duration_ | 2 weeks | 4 weeks | 1 year |
|
77
77
|
| _Collection period_ | 4.10.2021 - 17.10.2021 | 31.10.2022 - 27.11.2022 | 1.1.2022 - 31.12.2022 |
|
78
78
|
| _Application count_ | 191 | 102 | 180 |
|
@@ -2,14 +2,14 @@ cesnet_datazoo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
cesnet_datazoo/config.py,sha256=wkpD_OL3gRXX2t0WDfDMsBD2A3vEdAjSm4yXhzsR8T0,38536
|
3
3
|
cesnet_datazoo/constants.py,sha256=6GhcIyjVnWYrVnxRgTlGuiWRtvwZL1KqyzMJS26ge2E,1481
|
4
4
|
cesnet_datazoo/datasets/__init__.py,sha256=8ziQ3EUzUh5fMfWWXwk0cqYk0lOUNU7zbi0Gom3bLnI,443
|
5
|
-
cesnet_datazoo/datasets/cesnet_dataset.py,sha256=
|
5
|
+
cesnet_datazoo/datasets/cesnet_dataset.py,sha256=V2rBE4Mh2gaMw-NjQ4xHp6ViuuvCdEzB-ymX3CYyfkc,47762
|
6
6
|
cesnet_datazoo/datasets/datasets.py,sha256=Bn4SU1k5og6AsUlnPapFPeu4uGlpRH-IaOSafz0ZT2k,3617
|
7
7
|
cesnet_datazoo/datasets/datasets_constants.py,sha256=1P54Ns8wCQMemdKNe8OH7cVUfkxs3vL29ugSmOLXceI,29154
|
8
8
|
cesnet_datazoo/datasets/loaders.py,sha256=9KgRY-Y8CcgtXbgqWpAaG7gyOAsSf278w7b1eHwTSyE,1854
|
9
9
|
cesnet_datazoo/datasets/statistics.py,sha256=DfeCq-o7ML8u2Wg_AlAaarEBZ5oulCJz4S7enGswXRg,15137
|
10
10
|
cesnet_datazoo/datasets/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
cesnet_datazoo/datasets/metadata/dataset_metadata.py,sha256=o0rHXZ9S5NjjboGiXRQkBoZ7kYKlweQMRsMSAQm1EPE,1623
|
12
|
-
cesnet_datazoo/datasets/metadata/metadata.csv,sha256=
|
12
|
+
cesnet_datazoo/datasets/metadata/metadata.csv,sha256=S2PSEYDTIThoTpmTjjTDuVGfeJN4AwLe4oL8KuTu5G4,2252
|
13
13
|
cesnet_datazoo/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
14
|
cesnet_datazoo/metrics/classification_report.py,sha256=kqVW35uEctTiWpMqxhWzOmmDkV4p3yEFLMRqLn_R6AU,3981
|
15
15
|
cesnet_datazoo/metrics/provider_metrics.py,sha256=zoX0ps8BzEs3ml70g9dWWeLPflNAKUGYOEFYqdBbNY4,1374
|
@@ -23,8 +23,8 @@ cesnet_datazoo/utils/class_info.py,sha256=H5UgyRqXIepBJmkLQ1gAIXV4owKSoIllguRiqF
|
|
23
23
|
cesnet_datazoo/utils/download.py,sha256=hG5V1ZYZGtqCzlVV76NMgOZkSKOywdOFiq9Lagkgego,1441
|
24
24
|
cesnet_datazoo/utils/fileutils.py,sha256=XA_VWDuTiCXnoOgHPUzsmbnLFgrlxOo5cvUY_OBJUR8,642
|
25
25
|
cesnet_datazoo/utils/random.py,sha256=Dqgm_T25ljbew-OJozK90PsiXKnd4Kw6lcUexxF6vIc,575
|
26
|
-
cesnet_datazoo-0.1.
|
27
|
-
cesnet_datazoo-0.1.
|
28
|
-
cesnet_datazoo-0.1.
|
29
|
-
cesnet_datazoo-0.1.
|
30
|
-
cesnet_datazoo-0.1.
|
26
|
+
cesnet_datazoo-0.1.11.dist-info/licenses/LICENCE,sha256=69Wc69APiM1YKrFOIipG7jjU2lk89WQuO_U0AXKU8KE,1541
|
27
|
+
cesnet_datazoo-0.1.11.dist-info/METADATA,sha256=NBfTvdZUASh2-Et2p9nExhveoHkVaWtvZSLbojZqiDw,12583
|
28
|
+
cesnet_datazoo-0.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
29
|
+
cesnet_datazoo-0.1.11.dist-info/top_level.txt,sha256=bu1Z8zaI_1Id_ZaYyvJnxIBa87OSrdlZ8J2OBMggK5o,15
|
30
|
+
cesnet_datazoo-0.1.11.dist-info/RECORD,,
|
File without changes
|
File without changes
|