cesnet-datazoo 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cesnet_datazoo/config.py CHANGED
@@ -14,8 +14,9 @@ import yaml
14
14
  from pydantic import model_validator
15
15
  from pydantic.dataclasses import dataclass
16
16
 
17
- from cesnet_datazoo.constants import (PHIST_BIN_COUNT, PPI_MAX_LEN, SELECTED_TCP_FLAGS,
18
- TCP_PPI_CHANNELS, UDP_PPI_CHANNELS)
17
+ from cesnet_datazoo.constants import (PHIST_BIN_COUNT, PPI_MAX_LEN, QUIC_SNI_COLUMN,
18
+ SELECTED_TCP_FLAGS, TCP_PPI_CHANNELS, TLS_SNI_COLUMN,
19
+ UDP_PPI_CHANNELS)
19
20
 
20
21
  if TYPE_CHECKING:
21
22
  from cesnet_datazoo.datasets.cesnet_dataset import CesnetDataset
@@ -128,6 +129,7 @@ class DatasetConfig():
128
129
  flowstats_features_boolean: Taken from `dataset.metadata.flowstats_features_boolean`.
129
130
  flowstats_features_phist: Taken from `dataset.metadata.packet_histograms` if `use_packet_histograms` is true, otherwise an empty list.
130
131
  other_fields: Taken from `dataset.metadata.other_fields` if `return_other_fields` is true, otherwise an empty list.
132
+ sni_column: Database column with SNI domains, can be None for datasets without SNI domains.
131
133
 
132
134
  # Configuration options
133
135
 
@@ -343,6 +345,8 @@ class DatasetConfig():
343
345
  # Configure features
344
346
  self.flowstats_features = dataset.metadata.flowstats_features
345
347
  self.flowstats_features_boolean = dataset.metadata.flowstats_features_boolean
348
+ sni_column = TLS_SNI_COLUMN if dataset.metadata.protocol == Protocol.TLS else QUIC_SNI_COLUMN
349
+ self.sni_column = sni_column if sni_column in dataset.metadata.other_fields else None
346
350
  self.other_fields = dataset.metadata.other_fields if self.return_other_fields else []
347
351
  if self.use_packet_histograms:
348
352
  if len(dataset.metadata.packet_histograms) == 0:
@@ -39,6 +39,8 @@ DEFAULT_BACKGROUND_CLASS = "default-background"
39
39
  GOOGLE_BACKGROUND_CLASS = "google-background"
40
40
 
41
41
  # Indices
42
- INDICES_TABLE_POS = 0
43
- INDICES_INDEX_POS = 1
44
- INDICES_LABEL_POS = 2
42
+ INDICES_TABLE_FIELD = "TABLE"
43
+ INDICES_INDEX_FIELD = "INDEX"
44
+ INDICES_APP_FIELD = "APP"
45
+ INDICES_SNI_FIELD = "SNI"
46
+ INDICES_DTYPE = [(INDICES_TABLE_FIELD, "int32"), (INDICES_INDEX_FIELD, "int32"), (INDICES_APP_FIELD, "int32"), (INDICES_SNI_FIELD, "U50")]
@@ -10,14 +10,16 @@ import numpy as np
10
10
  import pandas as pd
11
11
  import tables as tb
12
12
  import torch
13
+ from numpy.lib.recfunctions import repack_fields
13
14
  from sklearn.model_selection import train_test_split
14
15
  from sklearn.preprocessing import LabelEncoder
15
16
  from torch.utils.data import BatchSampler, DataLoader, RandomSampler, Sampler, SequentialSampler
16
17
  from typing_extensions import assert_never
17
18
 
18
19
  from cesnet_datazoo.config import AppSelection, DataLoaderOrder, DatasetConfig, ValidationApproach
19
- from cesnet_datazoo.constants import (APP_COLUMN, CATEGORY_COLUMN, DATASET_SIZES, INDICES_LABEL_POS,
20
- SERVICEMAP_FILE, UNKNOWN_STR_LABEL)
20
+ from cesnet_datazoo.constants import (APP_COLUMN, CATEGORY_COLUMN, DATASET_SIZES, INDICES_APP_FIELD,
21
+ INDICES_INDEX_FIELD, INDICES_TABLE_FIELD, SERVICEMAP_FILE,
22
+ UNKNOWN_STR_LABEL)
21
23
  from cesnet_datazoo.datasets.loaders import collate_fn_simple, create_df_from_dataloader
22
24
  from cesnet_datazoo.datasets.metadata.dataset_metadata import DatasetMetadata, load_metadata
23
25
  from cesnet_datazoo.datasets.statistics import compute_dataset_statistics
@@ -555,7 +557,7 @@ class CesnetDataset():
555
557
  train_val_rng = get_fresh_random_generator(dataset_config=dataset_config, section=RandomizedSection.TRAIN_VAL_SPLIT)
556
558
  val_data_path = dataset_config._get_train_data_path()
557
559
  val_unknown_indices = train_unknown_indices
558
- train_labels = train_indices[:, INDICES_LABEL_POS]
560
+ train_labels = train_indices[INDICES_APP_FIELD]
559
561
  if dataset_config.train_dates_weigths is not None:
560
562
  assert dataset_config.val_known_size != "all"
561
563
  # When weight sampling is used, val_known_size is kept but the resulting train size can be smaller due to no enough samples in some train dates
@@ -619,13 +621,14 @@ class CesnetDataset():
619
621
  train_dataset = PyTablesDataset(
620
622
  database_path=dataset_config.database_path,
621
623
  tables_paths=dataset_config._get_train_tables_paths(),
622
- indices=dataset_indices.train_indices,
624
+ indices=repack_fields(dataset_indices.train_indices[[INDICES_TABLE_FIELD, INDICES_INDEX_FIELD]]), # type: ignore
623
625
  tables_app_enum=self._tables_app_enum,
624
626
  tables_cat_enum=self._tables_cat_enum,
625
627
  flowstats_features=dataset_config.flowstats_features,
626
628
  flowstats_features_boolean=dataset_config.flowstats_features_boolean,
627
629
  flowstats_features_phist=dataset_config.flowstats_features_phist,
628
630
  other_fields=self.dataset_config.other_fields,
631
+ sni_column=self.dataset_config.sni_column,
629
632
  ppi_channels=dataset_config.get_ppi_channels(),
630
633
  ppi_transform=dataset_config.ppi_transform,
631
634
  flowstats_transform=dataset_config.flowstats_transform,
@@ -637,13 +640,14 @@ class CesnetDataset():
637
640
  val_dataset = PyTablesDataset(
638
641
  database_path=dataset_config.database_path,
639
642
  tables_paths=dataset_config._get_val_tables_paths(),
640
- indices=dataset_indices.val_known_indices,
643
+ indices=repack_fields(dataset_indices.val_known_indices[[INDICES_TABLE_FIELD, INDICES_INDEX_FIELD]]), # type: ignore
641
644
  tables_app_enum=self._tables_app_enum,
642
645
  tables_cat_enum=self._tables_cat_enum,
643
646
  flowstats_features=dataset_config.flowstats_features,
644
647
  flowstats_features_boolean=dataset_config.flowstats_features_boolean,
645
648
  flowstats_features_phist=dataset_config.flowstats_features_phist,
646
649
  other_fields=self.dataset_config.other_fields,
650
+ sni_column=self.dataset_config.sni_column,
647
651
  ppi_channels=dataset_config.get_ppi_channels(),
648
652
  ppi_transform=dataset_config.ppi_transform,
649
653
  flowstats_transform=dataset_config.flowstats_transform,
@@ -657,13 +661,14 @@ class CesnetDataset():
657
661
  test_dataset = PyTablesDataset(
658
662
  database_path=dataset_config.database_path,
659
663
  tables_paths=dataset_config._get_test_tables_paths(),
660
- indices=test_combined_indices,
664
+ indices=repack_fields(test_combined_indices[[INDICES_TABLE_FIELD, INDICES_INDEX_FIELD]]), # type: ignore
661
665
  tables_app_enum=self._tables_app_enum,
662
666
  tables_cat_enum=self._tables_cat_enum,
663
667
  flowstats_features=dataset_config.flowstats_features,
664
668
  flowstats_features_boolean=dataset_config.flowstats_features_boolean,
665
669
  flowstats_features_phist=dataset_config.flowstats_features_phist,
666
670
  other_fields=self.dataset_config.other_fields,
671
+ sni_column=self.dataset_config.sni_column,
667
672
  ppi_channels=dataset_config.get_ppi_channels(),
668
673
  ppi_transform=dataset_config.ppi_transform,
669
674
  flowstats_transform=dataset_config.flowstats_transform,
@@ -39,5 +39,5 @@ class DatasetMetadata():
39
39
  metadata_df = pd.read_csv(os.path.join(os.path.dirname(__file__), "metadata.csv"), index_col="Name", keep_default_na=False)
40
40
  def load_metadata(dataset_name: str) -> DatasetMetadata:
41
41
  d = metadata_df.loc[dataset_name].to_dict()
42
- d = {k.replace(" ", "_").lower(): v for k, v in d.items()}
42
+ d = {k.replace(" ", "_").lower(): v for k, v in d.items()} # type: ignore
43
43
  return DatasetMetadata(**d)
@@ -92,7 +92,7 @@ def compute_dataset_statistics(database_path: str,
92
92
  tables_paths=table_paths,
93
93
  indices=None,
94
94
  disabled_apps=disabled_apps,
95
- return_all_fields=True,
95
+ return_raw_fields=True,
96
96
  flowstats_features=[],
97
97
  flowstats_features_boolean=[],
98
98
  flowstats_features_phist=[],
@@ -1,13 +1,12 @@
1
1
  import numpy as np
2
2
  from sklearn.metrics import accuracy_score, precision_recall_fscore_support
3
3
 
4
- from cesnet_datazoo.metrics.provider_metrics import (per_app_provider_metrics,
5
- provider_accuracies)
4
+ from cesnet_datazoo.metrics.provider_metrics import per_app_provider_metrics, provider_accuracies
6
5
  from cesnet_datazoo.utils.class_info import ClassInfo
7
6
 
8
7
 
9
- def better_classification_report(y_true: np.ndarray, y_pred: np.ndarray, cm: np.ndarray, labels: list[int], class_info: ClassInfo, digits: int = 2, zero_division: int = 0) -> tuple[str, dict[str, float]]:
10
- p, r, f1, s = precision_recall_fscore_support(y_true, y_pred,
8
+ def better_classification_report(test_labels: np.ndarray, preds: np.ndarray, cm: np.ndarray, labels: list[int], class_info: ClassInfo, digits: int = 2, zero_division: int = 0) -> tuple[str, dict[str, float]]:
9
+ p, r, f1, s = precision_recall_fscore_support(test_labels, preds,
11
10
  labels=labels,
12
11
  zero_division=zero_division)
13
12
  sc_p, sc_r, sc_f1 = per_app_provider_metrics(cm, class_info=class_info)
@@ -46,20 +45,20 @@ def better_classification_report(y_true: np.ndarray, y_pred: np.ndarray, cm: np.
46
45
  report += headers_fmt.format("", *headers_avg, width=width)
47
46
  report += row_fmt_avg.format("macro avg", *row_avg, width=width, digits=digits)
48
47
 
49
- acc = accuracy_score(y_true, y_pred)
50
- provider_acc, failed_provider_acc = provider_accuracies(y_true, y_pred, class_info=class_info)
48
+ acc = accuracy_score(test_labels, preds)
49
+ provider_acc, failed_provider_acc = provider_accuracies(test_labels, preds, class_info=class_info)
51
50
 
52
51
  row_fmt_acc = "{:>{width}} {:>15} {:>15} {:>7.{digits}f}\n"
53
52
  report += row_fmt_acc.format("acc", "", "", acc, width=width, digits=digits)
54
53
  report += row_fmt_acc.format("provider acc", "", "", provider_acc, width=width, digits=digits)
55
54
  report += row_fmt_acc.format("failed provider acc", "", "", failed_provider_acc, width=width, digits=digits)
56
55
  metrics = {
57
- "Test/Accuracy": acc,
58
- "Test/Provider Accuracy": provider_acc,
59
- "Test/Failed Provider Accuracy": failed_provider_acc,
60
- "Test/Fscore": avg_f1,
61
- "Test/Provider Fscore": avg_sc_f1,
62
- "Test/Recall": avg_r,
63
- "Test/Provider Recall": avg_sc_r,
56
+ "test/acc": acc,
57
+ "test/provider-acc": provider_acc,
58
+ "test/failed-provider-acc": failed_provider_acc,
59
+ "test/fscore": avg_f1,
60
+ "test/provider-fscore": avg_sc_f1,
61
+ "test/recall": avg_r,
62
+ "test/provider-recall": avg_sc_r,
64
63
  }
65
64
  return report, metrics
@@ -3,13 +3,13 @@ import numpy as np
3
3
  from cesnet_datazoo.utils.class_info import ClassInfo
4
4
 
5
5
 
6
- def provider_accuracies(y_true: np.ndarray, y_pred: np.ndarray, class_info: ClassInfo) -> tuple[float, float]:
6
+ def provider_accuracies(true_labels: np.ndarray, preds: np.ndarray, class_info: ClassInfo) -> tuple[float, float]:
7
7
  provider_mapping_arr = np.array(list(class_info.provider_mapping.values()))
8
- y_true_sc = provider_mapping_arr[y_true]
9
- y_pred_sc = provider_mapping_arr[y_pred]
10
- mistakes = y_true != y_pred
11
- provider_acc = (y_true_sc == y_pred_sc).sum() / len(y_true_sc)
12
- failed_provider_acc = (y_true_sc[mistakes] == y_pred_sc[mistakes]).sum() / mistakes.sum()
8
+ true_labels_provider = provider_mapping_arr[true_labels]
9
+ preds_provider = provider_mapping_arr[preds]
10
+ mistakes = true_labels != preds
11
+ provider_acc = (true_labels_provider == preds_provider).sum() / len(true_labels_provider)
12
+ failed_provider_acc = (true_labels_provider[mistakes] == preds_provider[mistakes]).sum() / mistakes.sum()
13
13
  return provider_acc, failed_provider_acc
14
14
 
15
15
  def per_app_provider_metrics(cm, class_info: ClassInfo):
@@ -46,18 +46,20 @@ def fit_scalers(dataset_config: DatasetConfig, train_indices: np.ndarray) -> Non
46
46
  data_ppi = data_ppi.transpose(0, 2, 1).reshape(-1, ppi_channels)
47
47
  padding_mask = data_ppi[:, DIR_POS] == 0 # Mask of padded packets
48
48
  # Fit IPT scaler
49
- train_ipt = data_ppi[:, IPT_POS].clip(max=clip_and_scale_ppi_transform.ipt_max, min=clip_and_scale_ppi_transform.ipt_min)
50
- train_ipt[padding_mask] = np.nan # NaNs are ignored in sklearn scalers
51
- if isinstance(clip_and_scale_ppi_transform.ipt_scaler, MinMaxScaler):
52
- # Let zero be the minimum for minmax scaling
53
- train_ipt = np.concatenate((train_ipt, [0]))
54
- clip_and_scale_ppi_transform.ipt_scaler.fit(train_ipt.reshape(-1, 1))
49
+ if clip_and_scale_ppi_transform.ipt_scaler:
50
+ train_ipt = data_ppi[:, IPT_POS].clip(max=clip_and_scale_ppi_transform.ipt_max, min=clip_and_scale_ppi_transform.ipt_min)
51
+ train_ipt[padding_mask] = np.nan # NaNs are ignored in sklearn scalers
52
+ if isinstance(clip_and_scale_ppi_transform.ipt_scaler, MinMaxScaler):
53
+ # Let zero be the minimum for minmax scaling
54
+ train_ipt = np.concatenate((train_ipt, [0]))
55
+ clip_and_scale_ppi_transform.ipt_scaler.fit(train_ipt.reshape(-1, 1))
55
56
  # Fit packet sizes scaler
56
- train_psizes = data_ppi[:, SIZE_POS].clip(max=clip_and_scale_ppi_transform.psizes_max, min=clip_and_scale_ppi_transform.pszies_min)
57
- train_psizes[padding_mask] = np.nan
58
- if isinstance(clip_and_scale_ppi_transform.psizes_scaler, MinMaxScaler):
59
- train_psizes = np.concatenate((train_psizes, [0]))
60
- clip_and_scale_ppi_transform.psizes_scaler.fit(train_psizes.reshape(-1, 1))
57
+ if clip_and_scale_ppi_transform.psizes_scaler:
58
+ train_psizes = data_ppi[:, SIZE_POS].clip(max=clip_and_scale_ppi_transform.psizes_max, min=clip_and_scale_ppi_transform.pszies_min)
59
+ train_psizes[padding_mask] = np.nan
60
+ if isinstance(clip_and_scale_ppi_transform.psizes_scaler, MinMaxScaler):
61
+ train_psizes = np.concatenate((train_psizes, [0]))
62
+ clip_and_scale_ppi_transform.psizes_scaler.fit(train_psizes.reshape(-1, 1))
61
63
  clip_and_scale_ppi_transform.needs_fitting = False
62
64
  json.dump(clip_and_scale_ppi_transform.to_dict(), open(os.path.join(train_data_path, "transforms", "ppi-transform.json"), "w"), indent=4)
63
65
 
@@ -1,6 +1,7 @@
1
1
  import dataclasses
2
2
  import logging
3
3
  import os
4
+ import time
4
5
  import warnings
5
6
  from collections import namedtuple
6
7
  from enum import Enum
@@ -9,7 +10,8 @@ import numpy as np
9
10
  import pandas as pd
10
11
 
11
12
  from cesnet_datazoo.config import DatasetConfig
12
- from cesnet_datazoo.constants import INDICES_INDEX_POS, INDICES_LABEL_POS, INDICES_TABLE_POS
13
+ from cesnet_datazoo.constants import (INDICES_APP_FIELD, INDICES_DTYPE, INDICES_INDEX_FIELD,
14
+ INDICES_TABLE_FIELD)
13
15
  from cesnet_datazoo.pytables_data.pytables_dataset import init_test_indices, init_train_indices
14
16
  from cesnet_datazoo.utils.fileutils import yaml_dump, yaml_load
15
17
  from cesnet_datazoo.utils.random import RandomizedSection, get_fresh_random_generator
@@ -21,8 +23,8 @@ IndicesTuple = namedtuple("IndicesTuple", ["train_indices", "val_known_indices",
21
23
 
22
24
 
23
25
  def sort_indices(indices: np.ndarray) -> np.ndarray:
24
- idxs = np.argsort(indices[:, INDICES_INDEX_POS])
25
- res = idxs[np.argsort(indices[idxs, INDICES_TABLE_POS], kind="stable")]
26
+ idxs = np.argsort(indices[INDICES_INDEX_FIELD])
27
+ res = idxs[np.argsort(indices[idxs][INDICES_TABLE_FIELD], kind="stable")]
26
28
  return indices[res]
27
29
 
28
30
  def subset_and_sort_indices(dataset_config: DatasetConfig, dataset_indices: IndicesTuple) -> IndicesTuple:
@@ -61,7 +63,7 @@ def subset_and_sort_indices(dataset_config: DatasetConfig, dataset_indices: Indi
61
63
 
62
64
  def date_weight_sample_train_indices(dataset_config: DatasetConfig, train_indices: np.ndarray, num_samples: int) -> np.ndarray:
63
65
  rng = get_fresh_random_generator(dataset_config=dataset_config, section=RandomizedSection.DATE_WEIGHT_SAMPLING)
64
- indices_per_date = [train_indices[train_indices[:, INDICES_TABLE_POS] == i] for i in np.unique(train_indices[:, INDICES_TABLE_POS])]
66
+ indices_per_date = [train_indices[train_indices[INDICES_TABLE_FIELD] == i] for i in np.unique(train_indices[INDICES_TABLE_FIELD])]
65
67
  weights = np.array(dataset_config.train_dates_weigths)
66
68
  weights = weights / weights.sum()
67
69
  samples_per_date = np.ceil((weights * (num_samples))).astype(int)
@@ -77,7 +79,7 @@ def date_weight_sample_train_indices(dataset_config: DatasetConfig, train_indice
77
79
  return sampled_train_indices
78
80
 
79
81
  def indices_to_app_counts(indices: np.ndarray, tables_app_enum: dict[int, str]) -> pd.Series:
80
- app_counts = pd.Series(indices[:, INDICES_LABEL_POS]).value_counts()
82
+ app_counts = pd.Series(indices[INDICES_APP_FIELD]).value_counts()
81
83
  app_counts.index = app_counts.index.map(lambda x: tables_app_enum[x])
82
84
  return app_counts
83
85
 
@@ -99,23 +101,26 @@ def init_or_load_train_indices(dataset_config: DatasetConfig, tables_app_enum: d
99
101
  init_train_data(train_data_path)
100
102
  if not os.path.isfile(os.path.join(train_data_path, TRAIN_DATA_PARAMS_FILE)):
101
103
  log.info("Processing train indices")
104
+ start_time = time.time()
102
105
  train_data_params = dataset_config._get_train_data_params()
103
106
  train_known_indices, train_unknown_indices, known_apps, unknown_apps = init_train_indices(train_data_params=train_data_params,
104
107
  database_path=dataset_config.database_path,
105
108
  tables_app_enum=tables_app_enum,
109
+ sni_column=dataset_config.sni_column,
106
110
  servicemap=servicemap,
107
111
  rng=get_fresh_random_generator(dataset_config=dataset_config, section=RandomizedSection.INIT_TRAIN_INDICES))
108
112
  if not disable_indices_cache:
109
113
  yaml_dump({k: str(v) if isinstance(v, Enum) else list(v) if isinstance(v, tuple) else v for k, v in dataclasses.asdict(train_data_params).items()}, os.path.join(train_data_path, TRAIN_DATA_PARAMS_FILE))
110
114
  yaml_dump(known_apps, os.path.join(train_data_path, "known_apps.yaml"))
111
115
  yaml_dump(unknown_apps, os.path.join(train_data_path, "unknown_apps.yaml"))
112
- np.save(os.path.join(train_data_path, "train_known_indices.npy"), train_known_indices)
113
- np.save(os.path.join(train_data_path, "train_unknown_indices.npy"), train_unknown_indices)
116
+ np.savez_compressed(os.path.join(train_data_path, "train_indices.npz"), train_known_indices=train_known_indices, train_unknown_indices=train_unknown_indices)
117
+ log.info(f"Processing indices took {time.time() - start_time:.2f} seconds")
114
118
  else:
115
119
  known_apps = yaml_load(os.path.join(train_data_path, "known_apps.yaml"))
116
120
  unknown_apps = yaml_load(os.path.join(train_data_path, "unknown_apps.yaml"))
117
- train_known_indices = np.load(os.path.join(train_data_path, "train_known_indices.npy"))
118
- train_unknown_indices = np.load(os.path.join(train_data_path, "train_unknown_indices.npy"))
121
+ loaded = np.load(os.path.join(train_data_path, "train_indices.npz"))
122
+ train_known_indices = loaded["train_known_indices"]
123
+ train_unknown_indices = loaded["train_unknown_indices"]
119
124
  return train_known_indices, train_unknown_indices, known_apps, unknown_apps
120
125
 
121
126
  def init_or_load_val_indices(dataset_config: DatasetConfig, known_apps: list[str], unknown_apps: list[str], tables_app_enum: dict[int, str], disable_indices_cache: bool) -> tuple[np.ndarray, np.ndarray, str]:
@@ -123,17 +128,20 @@ def init_or_load_val_indices(dataset_config: DatasetConfig, known_apps: list[str
123
128
  init_test_data(val_data_path)
124
129
  if not os.path.isfile(os.path.join(val_data_path, TEST_DATA_PARAMS_FILE)):
125
130
  log.info("Processing validation indices")
131
+ start_time = time.time()
126
132
  val_known_indices, val_unknown_indices = init_test_indices(test_data_params=val_data_params,
127
133
  database_path=dataset_config.database_path,
128
134
  tables_app_enum=tables_app_enum,
135
+ sni_column=dataset_config.sni_column,
129
136
  rng=get_fresh_random_generator(dataset_config=dataset_config, section=RandomizedSection.INIT_VAL_INIDICES))
130
137
  if not disable_indices_cache:
131
138
  yaml_dump(dataclasses.asdict(val_data_params), os.path.join(val_data_path, TEST_DATA_PARAMS_FILE))
132
- np.save(os.path.join(val_data_path, "val_known_indices.npy"), val_known_indices)
133
- np.save(os.path.join(val_data_path, "val_unknown_indices.npy"), val_unknown_indices)
139
+ np.savez_compressed(os.path.join(val_data_path, "val_indices.npz"), val_known_indices=val_known_indices, val_unknown_indices=val_unknown_indices)
140
+ log.info(f"Processing indices took {time.time() - start_time:.2f} seconds")
134
141
  else:
135
- val_known_indices = np.load(os.path.join(val_data_path, "val_known_indices.npy"))
136
- val_unknown_indices = np.load(os.path.join(val_data_path, "val_unknown_indices.npy"))
142
+ loaded = np.load(os.path.join(val_data_path, "val_indices.npz"))
143
+ val_known_indices = loaded["val_known_indices"]
144
+ val_unknown_indices = loaded["val_unknown_indices"]
137
145
  return val_known_indices, val_unknown_indices, val_data_path
138
146
 
139
147
  def init_or_load_test_indices(dataset_config: DatasetConfig, known_apps: list[str], unknown_apps: list[str], tables_app_enum: dict[int, str], disable_indices_cache: bool) -> tuple[np.ndarray, np.ndarray, str]:
@@ -141,17 +149,20 @@ def init_or_load_test_indices(dataset_config: DatasetConfig, known_apps: list[st
141
149
  init_test_data(test_data_path)
142
150
  if not os.path.isfile(os.path.join(test_data_path, TEST_DATA_PARAMS_FILE)):
143
151
  log.info("Processing test indices")
152
+ start_time = time.time()
144
153
  test_known_indices, test_unknown_indices = init_test_indices(test_data_params=test_data_params,
145
154
  database_path=dataset_config.database_path,
146
155
  tables_app_enum=tables_app_enum,
156
+ sni_column=dataset_config.sni_column,
147
157
  rng=get_fresh_random_generator(dataset_config=dataset_config, section=RandomizedSection.INIT_TEST_INDICES))
148
158
  if not disable_indices_cache:
149
159
  yaml_dump(dataclasses.asdict(test_data_params), os.path.join(test_data_path, TEST_DATA_PARAMS_FILE))
150
- np.save(os.path.join(test_data_path, "test_known_indices.npy"), test_known_indices)
151
- np.save(os.path.join(test_data_path, "test_unknown_indices.npy"), test_unknown_indices)
160
+ np.savez_compressed(os.path.join(test_data_path, "test_indices.npz"), test_known_indices=test_known_indices, test_unknown_indices=test_unknown_indices)
161
+ log.info(f"Processing indices took {time.time() - start_time:.2f} seconds")
152
162
  else:
153
- test_known_indices = np.load(os.path.join(test_data_path, "test_known_indices.npy"))
154
- test_unknown_indices = np.load(os.path.join(test_data_path, "test_unknown_indices.npy"))
163
+ loaded = np.load(os.path.join(test_data_path, "test_indices.npz"))
164
+ test_known_indices = loaded["test_known_indices"]
165
+ test_unknown_indices = loaded["test_unknown_indices"]
155
166
  return test_known_indices, test_unknown_indices, test_data_path
156
167
 
157
168
  def init_train_data(train_data_path: str):
@@ -164,4 +175,4 @@ def init_test_data(test_data_path: str):
164
175
  os.makedirs(os.path.join(test_data_path, "preload"), exist_ok=True)
165
176
 
166
177
  def no_indices() -> np.ndarray:
167
- return np.zeros((0,3), dtype=np.int64)
178
+ return np.empty(shape=(0,), dtype=INDICES_DTYPE)
@@ -16,8 +16,8 @@ from typing_extensions import assert_never
16
16
 
17
17
  from cesnet_datazoo.config import (AppSelection, MinTrainSamplesCheck, TestDataParams,
18
18
  TrainDataParams)
19
- from cesnet_datazoo.constants import (APP_COLUMN, INDICES_INDEX_POS, INDICES_TABLE_POS, PPI_COLUMN,
20
- QUIC_SNI_COLUMN, TLS_SNI_COLUMN)
19
+ from cesnet_datazoo.constants import (APP_COLUMN, INDICES_DTYPE, INDICES_INDEX_FIELD,
20
+ INDICES_TABLE_FIELD, PPI_COLUMN)
21
21
  from cesnet_datazoo.pytables_data.apps_split import (is_background_app,
22
22
  split_apps_topx_with_provider_groups)
23
23
 
@@ -36,12 +36,13 @@ class PyTablesDataset(Dataset):
36
36
  flowstats_features_phist: list[str],
37
37
  other_fields: list[str],
38
38
  ppi_channels: list[int],
39
+ sni_column: Optional[str] = None,
39
40
  ppi_transform: Optional[Callable] = None,
40
41
  flowstats_transform: Optional[Callable] = None,
41
42
  flowstats_phist_transform: Optional[Callable] = None,
42
43
  target_transform: Optional[Callable] = None,
43
44
  return_tensors: bool = False,
44
- return_all_fields: bool = False,
45
+ return_raw_fields: bool = False,
45
46
  preload: bool = False,
46
47
  preload_blob: Optional[str] = None,
47
48
  disabled_apps: Optional[list[str]] = None,):
@@ -60,14 +61,14 @@ class PyTablesDataset(Dataset):
60
61
  self.flowstats_features_boolean = flowstats_features_boolean
61
62
  self.flowstats_features_phist = flowstats_features_phist
62
63
  self.other_fields = other_fields
64
+ self.sni_column = sni_column
63
65
  self.ppi_channels = ppi_channels
64
66
  self.ppi_transform = ppi_transform
65
67
  self.flowstats_transform = flowstats_transform
66
68
  self.flowstats_phist_transform = flowstats_phist_transform
67
69
  self.target_transform = target_transform
68
70
  self.return_tensors = return_tensors
69
- self.return_all_fields = return_all_fields
70
- self.sni_column = TLS_SNI_COLUMN if TLS_SNI_COLUMN in self.other_fields else QUIC_SNI_COLUMN if QUIC_SNI_COLUMN in self.other_fields else None
71
+ self.return_raw_fields = return_raw_fields
71
72
 
72
73
  self.preload = preload
73
74
  self.preload_blob = preload_blob
@@ -78,7 +79,7 @@ class PyTablesDataset(Dataset):
78
79
  batch_data = self.data[batch_idx]
79
80
  else:
80
81
  batch_data = load_data_from_tables(tables=self.tables, indices=self.indices[batch_idx], data_dtype=self.data_dtype)
81
- if self.return_all_fields:
82
+ if self.return_raw_fields:
82
83
  return (batch_data, batch_idx)
83
84
 
84
85
  # Prepare data
@@ -157,11 +158,9 @@ class PyTablesDataset(Dataset):
157
158
  for i in range(len(tables)):
158
159
  base_labels[i] = tables[i].read(field=APP_COLUMN)
159
160
  base_indices[i] = np.nonzero(np.isin(base_labels[i], disabled_apps_ids, invert=True))[0]
160
- indices = np.column_stack((
161
+ indices = np.array(list(zip(
161
162
  np.concatenate([[table_id] * len(base_indices[table_id]) for table_id in tables]),
162
- np.concatenate(list(base_indices.values())),
163
- np.concatenate(list(base_labels.values()))
164
- )).astype(np.int32)
163
+ np.concatenate(list(base_indices.values())))), dtype=[field for field in INDICES_DTYPE if field[0] in [INDICES_INDEX_FIELD, INDICES_TABLE_FIELD]])
165
164
  self.indices = indices
166
165
  database.close()
167
166
 
@@ -173,16 +172,21 @@ def worker_init_fn(worker_id):
173
172
  dataset = worker_info.dataset
174
173
  dataset.pytables_worker_init(worker_id)
175
174
 
176
- def init_train_indices(train_data_params: TrainDataParams, database_path: str, tables_app_enum: dict[int, str], servicemap: pd.DataFrame, rng: np.random.RandomState) -> tuple[np.ndarray, np.ndarray, list[str], list[str]]:
175
+ def init_train_indices(train_data_params: TrainDataParams, database_path: str, tables_app_enum: dict[int, str], sni_column: Optional[str], servicemap: pd.DataFrame, rng: np.random.RandomState) -> tuple[np.ndarray, np.ndarray, list[str], list[str]]:
177
176
  database, train_tables = load_database(database_path, tables_paths=train_data_params.train_tables_paths)
178
177
  inverted_tables_app_enum = {v: k for k, v in tables_app_enum.items()}
179
- all_app_labels = {}
178
+ all_labels = {}
179
+ all_sni_domains = {}
180
180
  app_counts = pd.Series(dtype="int64")
181
181
  start_time = time.time()
182
182
  for i, table_path in enumerate(train_data_params.train_tables_paths):
183
- all_app_labels[i] = train_tables[i].read(field=APP_COLUMN)
184
- log.info(f"Reading app column for table {table_path} took {time.time() - start_time:.2f} seconds"); start_time = time.time()
185
- app_counts = app_counts.add(pd.Series(all_app_labels[i]).value_counts(), fill_value=0)
183
+ all_labels[i] = train_tables[i].read(field=APP_COLUMN)
184
+ if sni_column is not None:
185
+ all_sni_domains[i] = train_tables[i].read(field=sni_column)
186
+ else:
187
+ all_sni_domains[i] = np.full_like(all_labels[i], "", dtype="U1")
188
+ log.info(f"Reading app and SNI columns for table {table_path} took {time.time() - start_time:.2f} seconds"); start_time = time.time()
189
+ app_counts = app_counts.add(pd.Series(all_labels[i]).value_counts(), fill_value=0)
186
190
  database.close()
187
191
  # Handle disabled apps and apps with less than min_samples_per_app samples
188
192
  if len(train_data_params.disabled_apps) > 0:
@@ -202,8 +206,9 @@ def init_train_indices(train_data_params: TrainDataParams, database_path: str, t
202
206
  # Base indices are indices of samples that are not disabled and have enough samples
203
207
  base_indices = {}
204
208
  for i, table_path in enumerate(train_data_params.train_tables_paths):
205
- base_indices[i] = np.nonzero(np.isin(all_app_labels[i], disabled_apps_ids, invert=True))[0]
206
- base_labels = {table_id: arr[base_indices[table_id]] for table_id, arr in all_app_labels.items()}
209
+ base_indices[i] = np.nonzero(np.isin(all_labels[i], disabled_apps_ids, invert=True))[0]
210
+ base_labels = {table_id: arr[base_indices[table_id]] for table_id, arr in all_labels.items()}
211
+ base_sni_domains = {table_id: arr[base_indices[table_id]] for table_id, arr in all_sni_domains.items()}
207
212
  # Apps selection
208
213
  if train_data_params.apps_selection != AppSelection.FIXED:
209
214
  app_counts = app_counts[[app for app in app_counts.index.tolist() if app not in disabled_apps_ids]]
@@ -230,26 +235,38 @@ def init_train_indices(train_data_params: TrainDataParams, database_path: str, t
230
235
  known_apps_ids = [inverted_tables_app_enum[app] for app in known_apps]
231
236
  unknown_apps_ids = [inverted_tables_app_enum[app] for app in unknown_apps]
232
237
 
233
- train_known_indices, train_unknown_indices = convert_dict_indices(base_indices=base_indices, base_labels=base_labels, known_apps_ids=known_apps_ids, unknown_apps_ids=unknown_apps_ids)
238
+ train_known_indices, train_unknown_indices = convert_dict_indices(base_indices=base_indices,
239
+ base_labels=base_labels,
240
+ base_sni_domains=base_sni_domains,
241
+ known_apps_ids=known_apps_ids,
242
+ unknown_apps_ids=unknown_apps_ids)
234
243
  rng.shuffle(train_known_indices)
235
244
  rng.shuffle(train_unknown_indices)
236
- log.info(f"Processing indices took {time.time() - start_time:.2f} seconds"); start_time = time.time()
237
245
  return train_known_indices, train_unknown_indices, known_apps, unknown_apps
238
246
 
239
- def init_test_indices(test_data_params: TestDataParams, database_path: str, tables_app_enum: dict[int, str], rng: np.random.RandomState) -> tuple[np.ndarray, np.ndarray]:
247
+ def init_test_indices(test_data_params: TestDataParams, database_path: str, tables_app_enum: dict[int, str], sni_column: Optional[str], rng: np.random.RandomState) -> tuple[np.ndarray, np.ndarray]:
240
248
  database, test_tables = load_database(database_path, tables_paths=test_data_params.test_tables_paths)
241
249
  inverted_tables_app_enum = {v: k for k, v in tables_app_enum.items()}
242
250
  base_labels = {}
251
+ base_sni_domains = {}
243
252
  base_indices = {}
244
253
  start_time = time.time()
245
254
  for i, table_path in enumerate(test_data_params.test_tables_paths):
246
255
  base_labels[i] = test_tables[i].read(field=APP_COLUMN)
247
- log.info(f"Reading app column for table {table_path} took {time.time() - start_time:.2f} seconds"); start_time = time.time()
256
+ if sni_column is not None:
257
+ base_sni_domains[i] = test_tables[i].read(field=sni_column)
258
+ else:
259
+ base_sni_domains[i] = np.full_like(base_labels[i], "", dtype="U1")
260
+ log.info(f"Reading app and SNI columns for table {table_path} took {time.time() - start_time:.2f} seconds"); start_time = time.time()
248
261
  base_indices[i] = np.arange(len(test_tables[i]))
249
262
  database.close()
250
263
  known_apps_ids = [inverted_tables_app_enum[app] for app in test_data_params.known_apps]
251
264
  unknown_apps_ids = [inverted_tables_app_enum[app] for app in test_data_params.unknown_apps]
252
- test_known_indices, test_unknown_indices = convert_dict_indices(base_indices=base_indices, base_labels=base_labels, known_apps_ids=known_apps_ids, unknown_apps_ids=unknown_apps_ids)
265
+ test_known_indices, test_unknown_indices = convert_dict_indices(base_indices=base_indices,
266
+ base_labels=base_labels,
267
+ base_sni_domains=base_sni_domains,
268
+ known_apps_ids=known_apps_ids,
269
+ unknown_apps_ids=unknown_apps_ids)
253
270
  rng.shuffle(test_known_indices)
254
271
  rng.shuffle(test_unknown_indices)
255
272
  log.info(f"Processing indices took {time.time() - start_time:.2f} seconds"); start_time = time.time()
@@ -271,28 +288,32 @@ def list_all_tables(database_path: str) -> list[str]:
271
288
  with tb.open_file(database_path, mode="r") as database:
272
289
  return list(map(lambda x: x._v_pathname, iter(database.get_node(f"/flows"))))
273
290
 
274
- def convert_dict_indices(base_indices: dict[int, np.ndarray], base_labels: dict[int, np.ndarray], known_apps_ids: list[int], unknown_apps_ids: list[int]) -> tuple[np.ndarray, np.ndarray]:
291
+ def convert_dict_indices(base_indices: dict[int, np.ndarray], base_labels: dict[int, np.ndarray], base_sni_domains: dict[int, np.ndarray], known_apps_ids: list[int], unknown_apps_ids: list[int]) -> tuple[np.ndarray, np.ndarray]:
275
292
  is_known = {table_id: np.isin(table_arr, known_apps_ids) for table_id, table_arr in base_labels.items()}
276
293
  is_unknown = {table_id: np.isin(table_arr, unknown_apps_ids) for table_id, table_arr in base_labels.items()}
277
294
  known_indices_dict = {table_id: table_arr[is_known[table_id]] for table_id, table_arr in base_indices.items()}
278
295
  unknown_indices_dict = {table_id: table_arr[is_unknown[table_id]] for table_id, table_arr in base_indices.items()}
279
296
  known_labels_dict = {table_id: table_arr[is_known[table_id]] for table_id, table_arr in base_labels.items()}
280
297
  unknown_labels_dict = {table_id: table_arr[is_unknown[table_id]] for table_id, table_arr in base_labels.items()}
281
- known_indices = np.column_stack((
298
+ known_sni_domains_dict = {table_id: table_arr[is_known[table_id]] for table_id, table_arr in base_sni_domains.items()}
299
+ unknown_sni_domains_dict = {table_id: table_arr[is_unknown[table_id]] for table_id, table_arr in base_sni_domains.items()}
300
+ known_indices = np.array(list(zip(
282
301
  np.concatenate([[table_id] * table_arr.sum() for table_id, table_arr in is_known.items()]),
283
302
  np.concatenate(list(known_indices_dict.values())),
284
- np.concatenate(list(known_labels_dict.values()))))
285
- unknown_indices = np.column_stack((
303
+ np.concatenate(list(known_labels_dict.values())),
304
+ np.concatenate(list(known_sni_domains_dict.values())))), dtype=INDICES_DTYPE)
305
+ unknown_indices = np.array(list(zip(
286
306
  np.concatenate([[table_id] * table_arr.sum() for table_id, table_arr in is_unknown.items()]),
287
307
  np.concatenate(list(unknown_indices_dict.values())),
288
- np.concatenate(list(unknown_labels_dict.values()))))
308
+ np.concatenate(list(unknown_labels_dict.values())),
309
+ np.concatenate(list(unknown_sni_domains_dict.values())))), dtype=INDICES_DTYPE)
289
310
  return known_indices, unknown_indices
290
311
 
291
312
  def load_data_from_tables(tables, indices: np.ndarray, data_dtype: np.dtype) -> np.ndarray:
292
- sorted_indices = indices[indices[:, INDICES_TABLE_POS].argsort(kind="stable")]
293
- unique_tables, split_bounderies = np.unique(sorted_indices[:, INDICES_TABLE_POS], return_index=True)
313
+ sorted_indices = indices[indices[INDICES_TABLE_FIELD].argsort(kind="stable")]
314
+ unique_tables, split_bounderies = np.unique(sorted_indices[INDICES_TABLE_FIELD], return_index=True)
294
315
  indices_per_table = np.split(sorted_indices, split_bounderies[1:])
295
316
  data = np.zeros(len(indices), dtype=data_dtype)
296
317
  for table_id, table_indices in zip(unique_tables, indices_per_table):
297
- data[np.where(indices[:, INDICES_TABLE_POS] == table_id)[0]] = tables[table_id].read_coordinates(table_indices[:, INDICES_INDEX_POS])
318
+ data[np.where(indices[INDICES_TABLE_FIELD] == table_id)[0]] = tables[table_id].read_coordinates(table_indices[INDICES_INDEX_FIELD])
298
319
  return data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cesnet-datazoo
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: A toolkit for large network traffic datasets
5
5
  Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
6
6
  Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
@@ -1,30 +1,30 @@
1
1
  cesnet_datazoo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- cesnet_datazoo/config.py,sha256=Q-gSD0iuQFJSaD_19R7Dtrq-7QlKFsLkvXcWn14hhJ8,38035
3
- cesnet_datazoo/constants.py,sha256=SiA0hOzoGJcC_o-Qp67ETDjrBmJdUCr8LKSZHkPj8M4,1294
2
+ cesnet_datazoo/config.py,sha256=x8bugBZmBZ9PNd0D5TNHLPHbvx4ZTCQGwQzXPypenjc,38406
3
+ cesnet_datazoo/constants.py,sha256=6GhcIyjVnWYrVnxRgTlGuiWRtvwZL1KqyzMJS26ge2E,1481
4
4
  cesnet_datazoo/datasets/__init__.py,sha256=8ziQ3EUzUh5fMfWWXwk0cqYk0lOUNU7zbi0Gom3bLnI,443
5
- cesnet_datazoo/datasets/cesnet_dataset.py,sha256=PUu4jxRcvsB4xTzAp-rwQZbReuuYSc2Ybqx6ykI0bn4,46458
5
+ cesnet_datazoo/datasets/cesnet_dataset.py,sha256=14uKWWSGIkH3GM_BDUSYyCIoOh1L-I4bH0zu0m3DkkQ,46988
6
6
  cesnet_datazoo/datasets/datasets.py,sha256=Bn4SU1k5og6AsUlnPapFPeu4uGlpRH-IaOSafz0ZT2k,3617
7
7
  cesnet_datazoo/datasets/datasets_constants.py,sha256=1P54Ns8wCQMemdKNe8OH7cVUfkxs3vL29ugSmOLXceI,29154
8
8
  cesnet_datazoo/datasets/loaders.py,sha256=9KgRY-Y8CcgtXbgqWpAaG7gyOAsSf278w7b1eHwTSyE,1854
9
- cesnet_datazoo/datasets/statistics.py,sha256=wR8QISIh-KC7CQ5SjN7WoTMFaoRuq0G7pgTFGhC8ek0,15137
9
+ cesnet_datazoo/datasets/statistics.py,sha256=DfeCq-o7ML8u2Wg_AlAaarEBZ5oulCJz4S7enGswXRg,15137
10
10
  cesnet_datazoo/datasets/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- cesnet_datazoo/datasets/metadata/dataset_metadata.py,sha256=Ntlp8mHUSr7g-ZTvtBVh238TswZHwGAudMuE52-OA-c,1608
11
+ cesnet_datazoo/datasets/metadata/dataset_metadata.py,sha256=o0rHXZ9S5NjjboGiXRQkBoZ7kYKlweQMRsMSAQm1EPE,1623
12
12
  cesnet_datazoo/datasets/metadata/metadata.csv,sha256=lG1Wz7Rr66pG2hWnMqoERIN_oX53DpAmlRZLw3T2p34,2175
13
13
  cesnet_datazoo/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- cesnet_datazoo/metrics/classification_report.py,sha256=stAWGWXbx24jkmgivXk3LvWycHBBAVo_osPsKUzhhwM,4038
15
- cesnet_datazoo/metrics/provider_metrics.py,sha256=sRg2bdRTzLLTmiVjacBtGez4LEIfr35hSvMBwW-W73U,1303
14
+ cesnet_datazoo/metrics/classification_report.py,sha256=kqVW35uEctTiWpMqxhWzOmmDkV4p3yEFLMRqLn_R6AU,3981
15
+ cesnet_datazoo/metrics/provider_metrics.py,sha256=zoX0ps8BzEs3ml70g9dWWeLPflNAKUGYOEFYqdBbNY4,1374
16
16
  cesnet_datazoo/pytables_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  cesnet_datazoo/pytables_data/apps_split.py,sha256=RjLFomrlBCmnBn08FDw1IzL3PuQf4914yJQzwhiXH_E,1411
18
- cesnet_datazoo/pytables_data/data_scalers.py,sha256=ednTRVl-sjrFLX6vwzCuPLJDpFuwNWDlJz7msV3yM9M,5083
19
- cesnet_datazoo/pytables_data/indices_setup.py,sha256=2Qqe8tSuCsgTNRFuFer2Ai_Rh1_nZMMb0R14V3a_E-U,13011
20
- cesnet_datazoo/pytables_data/pytables_dataset.py,sha256=lmQf_3U5TtunYY-to7zcepeGGXbZw-7eVz-_xZD3N3Q,17881
18
+ cesnet_datazoo/pytables_data/data_scalers.py,sha256=gW75d-DGBokMKNUwM_5A3W3XCZ12WYXefGtpD8xYf1Y,5236
19
+ cesnet_datazoo/pytables_data/indices_setup.py,sha256=M5J2BevkQK8fuC22vUauKyKAEVwYg8xRz9JJK8E1VX8,13717
20
+ cesnet_datazoo/pytables_data/pytables_dataset.py,sha256=YGbzYKrSklCu3J52Xbdcs3zZsYroBBtP8ulgS1c5Fnw,19431
21
21
  cesnet_datazoo/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  cesnet_datazoo/utils/class_info.py,sha256=H5UgyRqXIepBJmkLQ1gAIXV4owKSoIllguRiqFTu5XU,2462
23
23
  cesnet_datazoo/utils/download.py,sha256=hG5V1ZYZGtqCzlVV76NMgOZkSKOywdOFiq9Lagkgego,1441
24
24
  cesnet_datazoo/utils/fileutils.py,sha256=XA_VWDuTiCXnoOgHPUzsmbnLFgrlxOo5cvUY_OBJUR8,642
25
25
  cesnet_datazoo/utils/random.py,sha256=Dqgm_T25ljbew-OJozK90PsiXKnd4Kw6lcUexxF6vIc,575
26
- cesnet_datazoo-0.1.3.dist-info/LICENCE,sha256=69Wc69APiM1YKrFOIipG7jjU2lk89WQuO_U0AXKU8KE,1541
27
- cesnet_datazoo-0.1.3.dist-info/METADATA,sha256=IFl4-ePbSMAjWXbC05jBbVCruylejK9g8x9rTB6LIGs,12964
28
- cesnet_datazoo-0.1.3.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
29
- cesnet_datazoo-0.1.3.dist-info/top_level.txt,sha256=bu1Z8zaI_1Id_ZaYyvJnxIBa87OSrdlZ8J2OBMggK5o,15
30
- cesnet_datazoo-0.1.3.dist-info/RECORD,,
26
+ cesnet_datazoo-0.1.5.dist-info/LICENCE,sha256=69Wc69APiM1YKrFOIipG7jjU2lk89WQuO_U0AXKU8KE,1541
27
+ cesnet_datazoo-0.1.5.dist-info/METADATA,sha256=81_DLVvxQ_dBMZR0c9F3rIMvX6KSlIgi0D0fRiupmek,12964
28
+ cesnet_datazoo-0.1.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
29
+ cesnet_datazoo-0.1.5.dist-info/top_level.txt,sha256=bu1Z8zaI_1Id_ZaYyvJnxIBa87OSrdlZ8J2OBMggK5o,15
30
+ cesnet_datazoo-0.1.5.dist-info/RECORD,,