cesnet-datazoo 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
1
  Name,Protocol,Published in,Collected in,Collection duration,Available samples,Available dataset sizes,Collection period,Missing dates in collection period,Application count,Background traffic classes,PPI features,Flowstats features,Flowstats features boolean,Packet histograms,TCP features,Other fields,Cite,Zenodo URL,Related papers
2
2
  CESNET-TLS22,TLS,2022,2021,2 weeks,141392195,"XS, S, M, L",4.10.2021 - 17.10.2021,,191,,"IPT, DIR, SIZE","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION",,,"FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV",ID,https://doi.org/10.1016/j.comnet.2022.109467,https://zenodo.org/record/7965515,
3
3
  CESNET-QUIC22,QUIC,2023,2022,4 weeks,153226273,"XS, S, M, L",31.10.2022 - 27.11.2022,,102,"default-background, google-background, facebook-background","IPT, DIR, SIZE","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT",,"ID, SRC_IP, DST_IP, DST_ASN, SRC_PORT, DST_PORT, PROTOCOL, QUIC_VERSION, QUIC_SNI, QUIC_USERAGENT, TIME_FIRST, TIME_LAST",https://doi.org/10.1016/j.dib.2023.108888,https://zenodo.org/record/7963302,https://doi.org/10.23919/TMA58422.2023.10199052
4
- CESNET-TLS-Year22,TLS,2023,2022,1 year,507739073,"XS, S, M, L",1.1.2022 - 31.12.2022,"20220128, 20220129, 20220130, 20221212, 20221213, 20221229, 20221230, 20221231",180,,"IPT, DIR, SIZE, PUSH_FLAG","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_END, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT","FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV","ID, SRC_IP, DST_IP, DST_ASN, DST_PORT, PROTOCOL, TLS_SNI, TLS_JA3, TIME_FIRST, TIME_LAST",,,
4
+ CESNET-TLS-Year22,TLS,2023,2022,1 year,507739073,"XS, S, M, L",1.1.2022 - 31.12.2022,"20220128, 20220129, 20220130, 20221212, 20221213, 20221229, 20221230, 20221231",180,,"IPT, DIR, SIZE, PUSH_FLAG","BYTES, BYTES_REV, PACKETS, PACKETS_REV, DURATION, PPI_LEN, PPI_ROUNDTRIPS, PPI_DURATION","FLOW_ENDREASON_IDLE, FLOW_ENDREASON_ACTIVE, FLOW_ENDREASON_END, FLOW_ENDREASON_OTHER","PHIST_SRC_SIZES, PHIST_DST_SIZES, PHIST_SRC_IPT, PHIST_DST_IPT","FLAG_CWR, FLAG_CWR_REV, FLAG_ECE, FLAG_ECE_REV, FLAG_URG, FLAG_URG_REV, FLAG_ACK, FLAG_ACK_REV, FLAG_PSH, FLAG_PSH_REV, FLAG_RST, FLAG_RST_REV, FLAG_SYN, FLAG_SYN_REV, FLAG_FIN, FLAG_FIN_REV","ID, SRC_IP, DST_IP, DST_ASN, DST_PORT, PROTOCOL, TLS_SNI, TLS_JA3, TIME_FIRST, TIME_LAST",https://doi.org/10.1038/s41597-024-03927-4,https://zenodo.org/records/10608607,
@@ -55,7 +55,7 @@ def fit_scalers(dataset_config: DatasetConfig, train_indices: np.ndarray) -> Non
55
55
  clip_and_scale_ppi_transform.ipt_scaler.fit(train_ipt.reshape(-1, 1))
56
56
  # Fit packet sizes scaler
57
57
  if clip_and_scale_ppi_transform.psizes_scaler:
58
- train_psizes = data_ppi[:, SIZE_POS].clip(max=clip_and_scale_ppi_transform.psizes_max, min=clip_and_scale_ppi_transform.pszies_min)
58
+ train_psizes = data_ppi[:, SIZE_POS].clip(max=clip_and_scale_ppi_transform.psizes_max, min=clip_and_scale_ppi_transform.psizes_min)
59
59
  train_psizes[padding_mask] = np.nan
60
60
  if isinstance(clip_and_scale_ppi_transform.psizes_scaler, MinMaxScaler):
61
61
  train_psizes = np.concatenate((train_psizes, [0]))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cesnet-datazoo
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: A toolkit for large network traffic datasets
5
5
  Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
6
6
  Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
@@ -56,7 +56,7 @@ The goal of this project is to provide tools for working with large network traf
56
56
  - Selection of application classes and splitting classes between *known* and *unknown*.
57
57
  - Data transformations, such as feature scaling.
58
58
  - Built on suitable data structures for experiments with large datasets. There are several caching mechanisms to make repeated runs faster, for example, when searching for the best model configuration.
59
- - Datasets are offered in multiple sizes to give users an option to start the experiments at a smaller scale (also faster dataset download, disk space, etc.). The default is the `S` size containing 25 million samples.
59
+ - Datasets are offered in multiple sizes to give users an option to start the experiments at a smaller scale (also faster dataset download, disk space, etc.). The default is the `S` size containing 25 million samples.
60
60
 
61
61
  :brain: :brain: See a related project [CESNET Models](https://github.com/CESNET/cesnet-models) providing pre-trained neural networks for traffic classification. :brain: :brain:
62
62
 
@@ -74,12 +74,12 @@ The `cesnet-datazoo` package currently provides three datasets with details in t
74
74
  | _Protocol_ | TLS | QUIC | TLS |
75
75
  | _Published in_ | 2022 | 2023 | 2023 |
76
76
  | _Collection duration_ | 2 weeks | 4 weeks | 1 year |
77
- | _Collection period_ | 4.10.2021 - 17.10.2021 | 31.10.2022 - 27.11.2022 | 1.1.2022 - 31.12.2022 | | ID, SRC_IP, DST_IP, DST_ASN, SRC_PORT, DST_PORT, PROTOCOL, QUIC_VERSION, QUIC_SNI, QUIC_USERAGENT, TIME_FIRST, TIME_LAST | ID, SRC_IP, DST_IP, DST_ASN, DST_PORT, PROTOCOL, TLS_SNI, TLS_JA3, TIME_FIRST, TIME_LAST |
77
+ | _Collection period_ | 4.10.2021 - 17.10.2021 | 31.10.2022 - 27.11.2022 | 1.1.2022 - 31.12.2022 |
78
78
  | _Application count_ | 191 | 102 | 180 |
79
79
  | _Available samples_ | 141392195 | 153226273 | 507739073 |
80
80
  | _Available dataset sizes_ | XS, S, M, L | XS, S, M, L | XS, S, M, L |
81
- | _Cite_ | [https://doi.org/10.1016/j.comnet.2022.109467](https://doi.org/10.1016/j.comnet.2022.109467) | [https://doi.org/10.1016/j.dib.2023.108888](https://doi.org/10.1016/j.dib.2023.108888) | |
82
- | _Zenodo URL_ | [https://zenodo.org/record/7965515](https://zenodo.org/record/7965515) | [https://zenodo.org/record/7963302](https://zenodo.org/record/7963302) | |
81
+ | _Cite_ | [https://doi.org/10.1016/j.comnet.2022.109467](https://doi.org/10.1016/j.comnet.2022.109467) | [https://doi.org/10.1016/j.dib.2023.108888](https://doi.org/10.1016/j.dib.2023.108888) | [https://doi.org/10.1038/s41597-024-03927-4](https://doi.org/10.1038/s41597-024-03927-4) |
82
+ | _Zenodo URL_ | [https://zenodo.org/record/7965515](https://zenodo.org/record/7965515) | [https://zenodo.org/record/7963302](https://zenodo.org/record/7963302) | [https://zenodo.org/records/10608607](https://zenodo.org/records/10608607) |
83
83
  | _Related papers_ | | [https://doi.org/10.23919/TMA58422.2023.10199052](https://doi.org/10.23919/TMA58422.2023.10199052) | |
84
84
 
85
85
  ## Installation
@@ -9,13 +9,13 @@ cesnet_datazoo/datasets/loaders.py,sha256=9KgRY-Y8CcgtXbgqWpAaG7gyOAsSf278w7b1eH
9
9
  cesnet_datazoo/datasets/statistics.py,sha256=DfeCq-o7ML8u2Wg_AlAaarEBZ5oulCJz4S7enGswXRg,15137
10
10
  cesnet_datazoo/datasets/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  cesnet_datazoo/datasets/metadata/dataset_metadata.py,sha256=o0rHXZ9S5NjjboGiXRQkBoZ7kYKlweQMRsMSAQm1EPE,1623
12
- cesnet_datazoo/datasets/metadata/metadata.csv,sha256=lG1Wz7Rr66pG2hWnMqoERIN_oX53DpAmlRZLw3T2p34,2175
12
+ cesnet_datazoo/datasets/metadata/metadata.csv,sha256=jD-2VyL5yfmDKkizQ_tQxsqm28M4qiCHfwiil4kCuno,2252
13
13
  cesnet_datazoo/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  cesnet_datazoo/metrics/classification_report.py,sha256=kqVW35uEctTiWpMqxhWzOmmDkV4p3yEFLMRqLn_R6AU,3981
15
15
  cesnet_datazoo/metrics/provider_metrics.py,sha256=zoX0ps8BzEs3ml70g9dWWeLPflNAKUGYOEFYqdBbNY4,1374
16
16
  cesnet_datazoo/pytables_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  cesnet_datazoo/pytables_data/apps_split.py,sha256=RjLFomrlBCmnBn08FDw1IzL3PuQf4914yJQzwhiXH_E,1411
18
- cesnet_datazoo/pytables_data/data_scalers.py,sha256=gW75d-DGBokMKNUwM_5A3W3XCZ12WYXefGtpD8xYf1Y,5236
18
+ cesnet_datazoo/pytables_data/data_scalers.py,sha256=xPL0SCLByDOgKv1Apqi5XQd501mIfsF8FdonmRQ0zzQ,5236
19
19
  cesnet_datazoo/pytables_data/indices_setup.py,sha256=M5J2BevkQK8fuC22vUauKyKAEVwYg8xRz9JJK8E1VX8,13717
20
20
  cesnet_datazoo/pytables_data/pytables_dataset.py,sha256=YGbzYKrSklCu3J52Xbdcs3zZsYroBBtP8ulgS1c5Fnw,19431
21
21
  cesnet_datazoo/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -23,8 +23,8 @@ cesnet_datazoo/utils/class_info.py,sha256=H5UgyRqXIepBJmkLQ1gAIXV4owKSoIllguRiqF
23
23
  cesnet_datazoo/utils/download.py,sha256=hG5V1ZYZGtqCzlVV76NMgOZkSKOywdOFiq9Lagkgego,1441
24
24
  cesnet_datazoo/utils/fileutils.py,sha256=XA_VWDuTiCXnoOgHPUzsmbnLFgrlxOo5cvUY_OBJUR8,642
25
25
  cesnet_datazoo/utils/random.py,sha256=Dqgm_T25ljbew-OJozK90PsiXKnd4Kw6lcUexxF6vIc,575
26
- cesnet_datazoo-0.1.8.dist-info/LICENCE,sha256=69Wc69APiM1YKrFOIipG7jjU2lk89WQuO_U0AXKU8KE,1541
27
- cesnet_datazoo-0.1.8.dist-info/METADATA,sha256=OaEH-gYH4Pjm8NyjXumxn_7Q7i8mfAhZoCvcBbfv-xM,13144
28
- cesnet_datazoo-0.1.8.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
29
- cesnet_datazoo-0.1.8.dist-info/top_level.txt,sha256=bu1Z8zaI_1Id_ZaYyvJnxIBa87OSrdlZ8J2OBMggK5o,15
30
- cesnet_datazoo-0.1.8.dist-info/RECORD,,
26
+ cesnet_datazoo-0.1.9.dist-info/LICENCE,sha256=69Wc69APiM1YKrFOIipG7jjU2lk89WQuO_U0AXKU8KE,1541
27
+ cesnet_datazoo-0.1.9.dist-info/METADATA,sha256=tulO0ucEAPIvMEXqBTmlCuqDWU5-0XsYat7WDrQazkQ,12606
28
+ cesnet_datazoo-0.1.9.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
29
+ cesnet_datazoo-0.1.9.dist-info/top_level.txt,sha256=bu1Z8zaI_1Id_ZaYyvJnxIBa87OSrdlZ8J2OBMggK5o,15
30
+ cesnet_datazoo-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5