PyPI - cesnet-datazoo - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl - Mend

cesnet-datazoo 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

cesnet_datazoo/config.py CHANGED Viewed

@@ -142,7 +142,7 @@ class DatasetConfig():
         need_test_set: Use to disable the test set. `Default: True`
         train_period_name: Name of the train period. See [instructions][config.DatasetConfig--how-to-configure-train-validation-and-test-sets].
         train_dates: Dates used for creating a train set.
-        train_dates_weigths: To use a non-uniform distribution of samples across train dates.
+        train_dates_weights: To use a non-uniform distribution of samples across train dates.
         val_approach: How a validation set should be created. Either split train data into train and validation or have a separate validation period. `Default: SPLIT_FROM_TRAIN`
         train_val_split_fraction: The fraction of validation samples when splitting from the train set. `Default: 0.2`
         val_period_name: Name of the validation period. See [instructions][config.DatasetConfig--how-to-configure-train-validation-and-test-sets].
@@ -219,7 +219,7 @@ class DatasetConfig():
     need_test_set: bool = True
     train_period_name: str = ""
     train_dates: list[str] = field(default_factory=list)
-    train_dates_weigths: Optional[list[int]] = None
+    train_dates_weights: Optional[list[int]] = None
     val_approach: ValidationApproach = ValidationApproach.SPLIT_FROM_TRAIN
     train_val_split_fraction: float = 0.2
     val_period_name: str = ""
@@ -369,16 +369,16 @@ class DatasetConfig():
                 raise ValueError("QUIC datasets do not support use_tcp_features")
             if self.use_push_flags:
                 raise ValueError("QUIC datasets do not support use_push_flags")
-        # When train_dates_weigths are used, train_size and val_known_size have to be specified
-        if self.train_dates_weigths is not None:
+        # When train_dates_weights are used, train_size and val_known_size have to be specified
+        if self.train_dates_weights is not None:
             if not self.need_train_set:
-                raise ValueError("train_dates_weigths cannot be specified when need_train_set is false")
-            if len(self.train_dates_weigths) != len(self.train_dates):
-                raise ValueError("train_dates_weigths has to have the same length as train_dates")
+                raise ValueError("train_dates_weights cannot be specified when need_train_set is false")
+            if len(self.train_dates_weights) != len(self.train_dates):
+                raise ValueError("train_dates_weights has to have the same length as train_dates")
             if self.train_size == "all":
-                raise ValueError("train_size cannot be 'all' when train_dates_weigths are speficied")
+                raise ValueError("train_size cannot be 'all' when train_dates_weights are speficied")
             if self.val_approach == ValidationApproach.SPLIT_FROM_TRAIN and self.val_known_size == "all":
-                raise ValueError("val_known_size cannot be 'all' when train_dates_weigths are speficied and validation_approach is split-from-train")
+                raise ValueError("val_known_size cannot be 'all' when train_dates_weights are speficied and validation_approach is split-from-train")
         # App selection
         if self.apps_selection == AppSelection.ALL_KNOWN:
             self.val_unknown_size = 0

cesnet_datazoo/datasets/cesnet_dataset.py CHANGED Viewed

@@ -532,7 +532,7 @@ class CesnetDataset():
                                                                                                         servicemap=servicemap,
                                                                                                         disable_indices_cache=disable_indices_cache,)
             # Date weight sampling of train indices
-            if dataset_config.train_dates_weigths is not None:
+            if dataset_config.train_dates_weights is not None:
                 assert dataset_config.train_size != "all"
                 if dataset_config.val_approach == ValidationApproach.SPLIT_FROM_TRAIN:
                     # requested number of samples is train_size + val_known_size when using the split-from-train validation approach
@@ -563,7 +563,7 @@ class CesnetDataset():
                 val_data_path = dataset_config._get_train_data_path()
                 val_unknown_indices = train_unknown_indices
                 train_labels = train_indices[INDICES_APP_FIELD]
-                if dataset_config.train_dates_weigths is not None:
+                if dataset_config.train_dates_weights is not None:
                     assert dataset_config.val_known_size != "all"
                     # When weight sampling is used, val_known_size is kept but the resulting train size can be smaller due to no enough samples in some train dates
                     if dataset_config.val_known_size > len(train_indices):

cesnet_datazoo/pytables_data/indices_setup.py CHANGED Viewed

@@ -64,11 +64,11 @@ def subset_and_sort_indices(dataset_config: DatasetConfig, dataset_indices: Indi
 def date_weight_sample_train_indices(dataset_config: DatasetConfig, train_indices: np.ndarray, num_samples: int) -> np.ndarray:
     rng = get_fresh_random_generator(dataset_config=dataset_config, section=RandomizedSection.DATE_WEIGHT_SAMPLING)
     indices_per_date = [train_indices[train_indices[INDICES_TABLE_FIELD] == i] for i in np.unique(train_indices[INDICES_TABLE_FIELD])]
-    weights = np.array(dataset_config.train_dates_weigths)
+    weights = np.array(dataset_config.train_dates_weights)
     weights = weights / weights.sum()
     samples_per_date = np.ceil((weights * (num_samples))).astype(int)
     samples_per_date_clipped = np.clip(samples_per_date, a_max=list(map(len, indices_per_date)), a_min=0)
-    df = pd.DataFrame(data={"Dates": dataset_config.train_dates, "Weights": dataset_config.train_dates_weigths, "Requested Samples": samples_per_date, "Available Samples": samples_per_date_clipped})
+    df = pd.DataFrame(data={"Dates": dataset_config.train_dates, "Weights": dataset_config.train_dates_weights, "Requested Samples": samples_per_date, "Available Samples": samples_per_date_clipped})
     log.info(f"Weight sampling per date with requsted total number of samples {num_samples} (train_size + val_known_size when using the split-from-train validation approach; train_size otherwise)")
     for l in df.to_string(index=False).splitlines():
         log.info(l)

{cesnet_datazoo-0.1.11.dist-info → cesnet_datazoo-0.1.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cesnet-datazoo
-Version: 0.1.11
+Version: 0.1.13
 Summary: A toolkit for large network traffic datasets
 Author-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
 Maintainer-email: Jan Luxemburk <luxemburk@cesnet.cz>, Karel Hynek <hynekkar@cesnet.cz>
@@ -19,7 +19,7 @@ Requires-Dist: cesnet_models
 Requires-Dist: matplotlib
 Requires-Dist: numpy
 Requires-Dist: pandas
-Requires-Dist: pydantic!=2.10.*,!=2.9.*,<2.12.0,>=2.0
+Requires-Dist: pydantic!=2.9.*,<2.12.0,>=2.0
 Requires-Dist: PyYAML
 Requires-Dist: requests
 Requires-Dist: scikit-learn

{cesnet_datazoo-0.1.11.dist-info → cesnet_datazoo-0.1.13.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 cesnet_datazoo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-cesnet_datazoo/config.py,sha256=wkpD_OL3gRXX2t0WDfDMsBD2A3vEdAjSm4yXhzsR8T0,38536
+cesnet_datazoo/config.py,sha256=4lE5pnUUhJkQ2KuEhekzUo3NWSzSKOsusiBWbGMX2yU,38536
 cesnet_datazoo/constants.py,sha256=6GhcIyjVnWYrVnxRgTlGuiWRtvwZL1KqyzMJS26ge2E,1481
 cesnet_datazoo/datasets/__init__.py,sha256=8ziQ3EUzUh5fMfWWXwk0cqYk0lOUNU7zbi0Gom3bLnI,443
-cesnet_datazoo/datasets/cesnet_dataset.py,sha256=V2rBE4Mh2gaMw-NjQ4xHp6ViuuvCdEzB-ymX3CYyfkc,47762
+cesnet_datazoo/datasets/cesnet_dataset.py,sha256=lcQ3ovsKE3sEgrYhx-JaDbeyu7UkXNhsZRAPpZAS6-g,47762
 cesnet_datazoo/datasets/datasets.py,sha256=Bn4SU1k5og6AsUlnPapFPeu4uGlpRH-IaOSafz0ZT2k,3617
 cesnet_datazoo/datasets/datasets_constants.py,sha256=1P54Ns8wCQMemdKNe8OH7cVUfkxs3vL29ugSmOLXceI,29154
 cesnet_datazoo/datasets/loaders.py,sha256=9KgRY-Y8CcgtXbgqWpAaG7gyOAsSf278w7b1eHwTSyE,1854
@@ -16,15 +16,15 @@ cesnet_datazoo/metrics/provider_metrics.py,sha256=zoX0ps8BzEs3ml70g9dWWeLPflNAKU
 cesnet_datazoo/pytables_data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cesnet_datazoo/pytables_data/apps_split.py,sha256=RjLFomrlBCmnBn08FDw1IzL3PuQf4914yJQzwhiXH_E,1411
 cesnet_datazoo/pytables_data/data_scalers.py,sha256=xPL0SCLByDOgKv1Apqi5XQd501mIfsF8FdonmRQ0zzQ,5236
-cesnet_datazoo/pytables_data/indices_setup.py,sha256=M5J2BevkQK8fuC22vUauKyKAEVwYg8xRz9JJK8E1VX8,13717
+cesnet_datazoo/pytables_data/indices_setup.py,sha256=yCYWjkCPIj0en3btnC-C7cte0CqbqMZzOnaVR9jaNes,13717
 cesnet_datazoo/pytables_data/pytables_dataset.py,sha256=YGbzYKrSklCu3J52Xbdcs3zZsYroBBtP8ulgS1c5Fnw,19431
 cesnet_datazoo/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 cesnet_datazoo/utils/class_info.py,sha256=H5UgyRqXIepBJmkLQ1gAIXV4owKSoIllguRiqFTu5XU,2462
 cesnet_datazoo/utils/download.py,sha256=hG5V1ZYZGtqCzlVV76NMgOZkSKOywdOFiq9Lagkgego,1441
 cesnet_datazoo/utils/fileutils.py,sha256=XA_VWDuTiCXnoOgHPUzsmbnLFgrlxOo5cvUY_OBJUR8,642
 cesnet_datazoo/utils/random.py,sha256=Dqgm_T25ljbew-OJozK90PsiXKnd4Kw6lcUexxF6vIc,575
-cesnet_datazoo-0.1.11.dist-info/licenses/LICENCE,sha256=69Wc69APiM1YKrFOIipG7jjU2lk89WQuO_U0AXKU8KE,1541
-cesnet_datazoo-0.1.11.dist-info/METADATA,sha256=NBfTvdZUASh2-Et2p9nExhveoHkVaWtvZSLbojZqiDw,12583
-cesnet_datazoo-0.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cesnet_datazoo-0.1.11.dist-info/top_level.txt,sha256=bu1Z8zaI_1Id_ZaYyvJnxIBa87OSrdlZ8J2OBMggK5o,15
-cesnet_datazoo-0.1.11.dist-info/RECORD,,
+cesnet_datazoo-0.1.13.dist-info/licenses/LICENCE,sha256=69Wc69APiM1YKrFOIipG7jjU2lk89WQuO_U0AXKU8KE,1541
+cesnet_datazoo-0.1.13.dist-info/METADATA,sha256=AKyK8HNdpysKMUz5xqKL6TSXMgbVAPsCCytalVg7sWA,12574
+cesnet_datazoo-0.1.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+cesnet_datazoo-0.1.13.dist-info/top_level.txt,sha256=bu1Z8zaI_1Id_ZaYyvJnxIBa87OSrdlZ8J2OBMggK5o,15
+cesnet_datazoo-0.1.13.dist-info/RECORD,,

{cesnet_datazoo-0.1.11.dist-info → cesnet_datazoo-0.1.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{cesnet_datazoo-0.1.11.dist-info → cesnet_datazoo-0.1.13.dist-info}/licenses/LICENCE RENAMED Viewed

File without changes

{cesnet_datazoo-0.1.11.dist-info → cesnet_datazoo-0.1.13.dist-info}/top_level.txt RENAMED Viewed

File without changes

cesnet-datazoo 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

cesnet-datazoo 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl