replay-rec 0.20.0__tar.gz → 0.20.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. {replay_rec-0.20.0 → replay_rec-0.20.1}/PKG-INFO +7 -1
  2. {replay_rec-0.20.0 → replay_rec-0.20.1}/README.md +6 -0
  3. {replay_rec-0.20.0 → replay_rec-0.20.1}/pyproject.toml +2 -2
  4. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/__init__.py +1 -1
  5. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/dataset.py +10 -9
  6. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/dataset_utils/dataset_label_encoder.py +5 -4
  7. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/nn/schema.py +9 -18
  8. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/nn/sequence_tokenizer.py +26 -18
  9. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/nn/sequential_dataset.py +22 -18
  10. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/nn/torch_sequential_dataset.py +17 -16
  11. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/nn/utils.py +2 -1
  12. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/schema.py +3 -12
  13. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/base_metric.py +11 -10
  14. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/categorical_diversity.py +8 -8
  15. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/coverage.py +4 -4
  16. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/experiment.py +3 -3
  17. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/hitrate.py +1 -3
  18. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/map.py +1 -3
  19. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/mrr.py +1 -3
  20. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/ndcg.py +1 -2
  21. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/novelty.py +3 -3
  22. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/offline_metrics.py +16 -16
  23. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/precision.py +1 -3
  24. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/recall.py +1 -3
  25. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/rocauc.py +1 -3
  26. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/surprisal.py +4 -4
  27. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/torch_metrics_builder.py +13 -12
  28. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/unexpectedness.py +2 -2
  29. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/als.py +2 -2
  30. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/association_rules.py +4 -3
  31. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/base_neighbour_rec.py +3 -2
  32. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/base_rec.py +11 -10
  33. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/cat_pop_rec.py +2 -1
  34. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/ann_mixin.py +2 -1
  35. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_builders/executor_hnswlib_index_builder.py +2 -1
  36. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_builders/executor_nmslib_index_builder.py +2 -1
  37. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/lin_ucb.py +57 -11
  38. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/optimizer_utils/optimizer_factory.py +2 -2
  39. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/bert4rec/dataset.py +5 -18
  40. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/bert4rec/lightning.py +3 -3
  41. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/bert4rec/model.py +2 -2
  42. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/callbacks/prediction_callbacks.py +12 -12
  43. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/callbacks/validation_callback.py +9 -9
  44. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/compiled/base_compiled_model.py +5 -5
  45. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/postprocessors/_base.py +2 -3
  46. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/postprocessors/postprocessors.py +11 -11
  47. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/sasrec/dataset.py +3 -16
  48. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/sasrec/lightning.py +3 -3
  49. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/sasrec/model.py +8 -8
  50. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/slim.py +2 -2
  51. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/ucb.py +2 -2
  52. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/word2vec.py +3 -3
  53. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/preprocessing/discretizer.py +8 -7
  54. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/preprocessing/filters.py +4 -4
  55. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/preprocessing/history_based_fp.py +6 -6
  56. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/preprocessing/label_encoder.py +8 -7
  57. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/scenarios/fallback.py +4 -3
  58. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/base_splitter.py +3 -3
  59. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/cold_user_random_splitter.py +4 -4
  60. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/k_folds.py +4 -4
  61. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/last_n_splitter.py +10 -10
  62. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/new_users_splitter.py +4 -4
  63. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/random_splitter.py +4 -4
  64. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/ratio_splitter.py +10 -10
  65. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/time_splitter.py +6 -6
  66. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/two_stage_splitter.py +4 -4
  67. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/utils/__init__.py +1 -1
  68. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/utils/common.py +1 -1
  69. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/utils/session_handler.py +2 -2
  70. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/utils/spark_utils.py +6 -5
  71. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/utils/types.py +3 -1
  72. replay_rec-0.20.0/replay/utils/warnings.py +0 -26
  73. {replay_rec-0.20.0 → replay_rec-0.20.1}/LICENSE +0 -0
  74. {replay_rec-0.20.0 → replay_rec-0.20.1}/NOTICE +0 -0
  75. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/__init__.py +0 -0
  76. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/dataset_utils/__init__.py +0 -0
  77. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/nn/__init__.py +0 -0
  78. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/data/spark_schema.py +0 -0
  79. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/__init__.py +0 -0
  80. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/metrics/descriptors.py +0 -0
  81. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/__init__.py +0 -0
  82. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/cluster.py +0 -0
  83. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/common.py +0 -0
  84. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/__init__.py +0 -0
  85. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/__init__.py +0 -0
  86. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/entities/__init__.py +0 -0
  87. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/entities/base_hnsw_param.py +0 -0
  88. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/entities/hnswlib_param.py +0 -0
  89. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/entities/nmslib_hnsw_param.py +0 -0
  90. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_builders/__init__.py +0 -0
  91. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_builders/base_index_builder.py +0 -0
  92. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_builders/driver_hnswlib_index_builder.py +0 -0
  93. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_builders/driver_nmslib_index_builder.py +0 -0
  94. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_builders/nmslib_index_builder_mixin.py +0 -0
  95. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_inferers/__init__.py +0 -0
  96. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_inferers/base_inferer.py +0 -0
  97. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_inferers/hnswlib_filter_index_inferer.py +0 -0
  98. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_inferers/hnswlib_index_inferer.py +0 -0
  99. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_inferers/nmslib_filter_index_inferer.py +0 -0
  100. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_inferers/nmslib_index_inferer.py +0 -0
  101. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_inferers/utils.py +0 -0
  102. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_stores/__init__.py +0 -0
  103. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_stores/base_index_store.py +0 -0
  104. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_stores/hdfs_index_store.py +0 -0
  105. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_stores/shared_disk_index_store.py +0 -0
  106. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_stores/spark_files_index_store.py +0 -0
  107. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/index_stores/utils.py +0 -0
  108. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/extensions/ann/utils.py +0 -0
  109. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/kl_ucb.py +0 -0
  110. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/knn.py +0 -0
  111. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/__init__.py +0 -0
  112. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/loss/__init__.py +0 -0
  113. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/loss/sce.py +0 -0
  114. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/optimizer_utils/__init__.py +0 -0
  115. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/__init__.py +0 -0
  116. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/bert4rec/__init__.py +0 -0
  117. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/callbacks/__init__.py +0 -0
  118. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/compiled/__init__.py +0 -0
  119. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/compiled/bert4rec_compiled.py +0 -0
  120. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/compiled/sasrec_compiled.py +0 -0
  121. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/postprocessors/__init__.py +0 -0
  122. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/nn/sequential/sasrec/__init__.py +0 -0
  123. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/optimization/__init__.py +0 -0
  124. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/optimization/optuna_mixin.py +0 -0
  125. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/optimization/optuna_objective.py +0 -0
  126. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/pop_rec.py +0 -0
  127. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/query_pop_rec.py +0 -0
  128. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/random_rec.py +0 -0
  129. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/thompson_sampling.py +0 -0
  130. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/models/wilson.py +0 -0
  131. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/preprocessing/__init__.py +0 -0
  132. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/preprocessing/converter.py +0 -0
  133. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/preprocessing/sessionizer.py +0 -0
  134. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/scenarios/__init__.py +0 -0
  135. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/splitters/__init__.py +0 -0
  136. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/utils/dataframe_bucketizer.py +0 -0
  137. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/utils/distributions.py +0 -0
  138. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/utils/model_handler.py +0 -0
  139. {replay_rec-0.20.0 → replay_rec-0.20.1}/replay/utils/time.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: replay-rec
3
- Version: 0.20.0
3
+ Version: 0.20.1
4
4
  Summary: RecSys Library
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -245,6 +245,12 @@ pip install openvino onnx
245
245
  pip install hnswlib fixed-install-nmslib
246
246
  ```
247
247
 
248
+ 4) (Experimental) LightFM model support:
249
+ ```bash
250
+ pip install ligfhtfm
251
+ ```
252
+ > **_NOTE_** : LightFM is not officially supported for Python 3.12 due to discontinued maintenance of the library. If you wish to install it locally, you'll have to use a patched fork of LightFM, such as the [one used internally](https://github.com/daviddavo/lightfm).
253
+
248
254
 
249
255
  <a name="examples"></a>
250
256
  ## 📑 Resources
@@ -201,6 +201,12 @@ pip install openvino onnx
201
201
  pip install hnswlib fixed-install-nmslib
202
202
  ```
203
203
 
204
+ 4) (Experimental) LightFM model support:
205
+ ```bash
206
+ pip install ligfhtfm
207
+ ```
208
+ > **_NOTE_** : LightFM is not officially supported for Python 3.12 due to discontinued maintenance of the library. If you wish to install it locally, you'll have to use a patched fork of LightFM, such as the [one used internally](https://github.com/daviddavo/lightfm).
209
+
204
210
 
205
211
  <a name="examples"></a>
206
212
  ## 📑 Resources
@@ -47,7 +47,7 @@ dependencies = [
47
47
  "lightning (<2.6.0); extra == 'torch' or extra == 'torch-cpu'",
48
48
  ]
49
49
  dynamic = ["dependencies"]
50
- version = "0.20.0"
50
+ version = "0.20.1"
51
51
 
52
52
  [project.optional-dependencies]
53
53
  spark = ["pyspark", "psutil"]
@@ -102,7 +102,7 @@ priority = "explicit"
102
102
 
103
103
  [tool.poetry-dynamic-versioning]
104
104
  enable = false
105
- format-jinja = """0.20.0{{ env['PACKAGE_SUFFIX'] }}"""
105
+ format-jinja = """0.20.1{{ env['PACKAGE_SUFFIX'] }}"""
106
106
  vcs = "git"
107
107
 
108
108
  [tool.ruff]
@@ -4,4 +4,4 @@
4
4
  # functionality removed in Python 3.12 is used in downstream packages (like lightfm)
5
5
  import setuptools as _
6
6
 
7
- __version__ = "0.20.0"
7
+ __version__ = "0.20.1"
@@ -5,8 +5,9 @@
5
5
  from __future__ import annotations
6
6
 
7
7
  import json
8
+ from collections.abc import Iterable, Sequence
8
9
  from pathlib import Path
9
- from typing import Callable, Dict, Iterable, List, Optional, Sequence, Union
10
+ from typing import Callable, Optional, Union
10
11
 
11
12
  import numpy as np
12
13
  from pandas import read_parquet as pd_read_parquet
@@ -315,7 +316,7 @@ class Dataset:
315
316
  :returns: Loaded Dataset.
316
317
  """
317
318
  base_path = Path(path).with_suffix(".replay").resolve()
318
- with open(base_path / "init_args.json", "r") as file:
319
+ with open(base_path / "init_args.json") as file:
319
320
  dataset_dict = json.loads(file.read())
320
321
 
321
322
  if dataframe_type not in ["pandas", "spark", "polars", None]:
@@ -436,14 +437,14 @@ class Dataset:
436
437
  )
437
438
 
438
439
  def _get_feature_source_map(self):
439
- self._feature_source_map: Dict[FeatureSource, DataFrameLike] = {
440
+ self._feature_source_map: dict[FeatureSource, DataFrameLike] = {
440
441
  FeatureSource.INTERACTIONS: self.interactions,
441
442
  FeatureSource.QUERY_FEATURES: self.query_features,
442
443
  FeatureSource.ITEM_FEATURES: self.item_features,
443
444
  }
444
445
 
445
446
  def _get_ids_source_map(self):
446
- self._ids_feature_map: Dict[FeatureHint, DataFrameLike] = {
447
+ self._ids_feature_map: dict[FeatureHint, DataFrameLike] = {
447
448
  FeatureHint.QUERY_ID: self.query_features if self.query_features is not None else self.interactions,
448
449
  FeatureHint.ITEM_ID: self.item_features if self.item_features is not None else self.interactions,
449
450
  }
@@ -499,10 +500,10 @@ class Dataset:
499
500
  )
500
501
  return FeatureSchema(features_list=features_list + filled_features)
501
502
 
502
- def _fill_unlabeled_features_sources(self, feature_schema: FeatureSchema) -> List[FeatureInfo]:
503
+ def _fill_unlabeled_features_sources(self, feature_schema: FeatureSchema) -> list[FeatureInfo]:
503
504
  features_list = list(feature_schema.all_features)
504
505
 
505
- source_mapping: Dict[str, FeatureSource] = {}
506
+ source_mapping: dict[str, FeatureSource] = {}
506
507
  for source in FeatureSource:
507
508
  dataframe = self._feature_source_map[source]
508
509
  if dataframe is not None:
@@ -524,7 +525,7 @@ class Dataset:
524
525
  self._set_cardinality(features_list=features_list)
525
526
  return features_list
526
527
 
527
- def _get_unlabeled_columns(self, source: FeatureSource, feature_schema: FeatureSchema) -> List[FeatureInfo]:
528
+ def _get_unlabeled_columns(self, source: FeatureSource, feature_schema: FeatureSchema) -> list[FeatureInfo]:
528
529
  set_source_dataframe_columns = set(self._feature_source_map[source].columns)
529
530
  set_labeled_dataframe_columns = set(feature_schema.columns)
530
531
  unlabeled_columns = set_source_dataframe_columns - set_labeled_dataframe_columns
@@ -534,13 +535,13 @@ class Dataset:
534
535
  ]
535
536
  return unlabeled_features_list
536
537
 
537
- def _fill_unlabeled_features(self, source: FeatureSource, feature_schema: FeatureSchema) -> List[FeatureInfo]:
538
+ def _fill_unlabeled_features(self, source: FeatureSource, feature_schema: FeatureSchema) -> list[FeatureInfo]:
538
539
  unlabeled_columns = self._get_unlabeled_columns(source=source, feature_schema=feature_schema)
539
540
  self._set_features_source(feature_list=unlabeled_columns, source=source)
540
541
  self._set_cardinality(features_list=unlabeled_columns)
541
542
  return unlabeled_columns
542
543
 
543
- def _set_features_source(self, feature_list: List[FeatureInfo], source: FeatureSource) -> None:
544
+ def _set_features_source(self, feature_list: list[FeatureInfo], source: FeatureSource) -> None:
544
545
  for feature in feature_list:
545
546
  feature._set_feature_source(source)
546
547
 
@@ -6,7 +6,8 @@ Contains classes for encoding categorical data
6
6
  """
7
7
 
8
8
  import warnings
9
- from typing import Dict, Iterable, Iterator, Optional, Sequence, Set, Union
9
+ from collections.abc import Iterable, Iterator, Sequence
10
+ from typing import Optional, Union
10
11
 
11
12
  from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource, FeatureType
12
13
  from replay.preprocessing import LabelEncoder, LabelEncodingRule, SequenceEncodingRule
@@ -45,9 +46,9 @@ class DatasetLabelEncoder:
45
46
  """
46
47
  self._handle_unknown_rule = handle_unknown_rule
47
48
  self._default_value_rule = default_value_rule
48
- self._encoding_rules: Dict[str, LabelEncodingRule] = {}
49
+ self._encoding_rules: dict[str, LabelEncodingRule] = {}
49
50
 
50
- self._features_columns: Dict[Union[FeatureHint, FeatureSource], Sequence[str]] = {}
51
+ self._features_columns: dict[Union[FeatureHint, FeatureSource], Sequence[str]] = {}
51
52
 
52
53
  def fit(self, dataset: Dataset) -> "DatasetLabelEncoder":
53
54
  """
@@ -161,7 +162,7 @@ class DatasetLabelEncoder:
161
162
  """
162
163
  self._check_if_initialized()
163
164
 
164
- columns_set: Set[str]
165
+ columns_set: set[str]
165
166
  columns_set = {columns} if isinstance(columns, str) else {*columns}
166
167
 
167
168
  def get_encoding_rules() -> Iterator[LabelEncodingRule]:
@@ -1,17 +1,8 @@
1
+ from collections import OrderedDict
2
+ from collections.abc import ItemsView, Iterable, Iterator, KeysView, Mapping, Sequence, ValuesView
1
3
  from typing import (
2
- Dict,
3
- ItemsView,
4
- Iterable,
5
- Iterator,
6
- KeysView,
7
- List,
8
- Mapping,
9
4
  Optional,
10
- OrderedDict,
11
- Sequence,
12
- Set,
13
5
  Union,
14
- ValuesView,
15
6
  )
16
7
 
17
8
  import torch
@@ -20,7 +11,7 @@ from replay.data import FeatureHint, FeatureSource, FeatureType
20
11
 
21
12
  # Alias
22
13
  TensorMap = Mapping[str, torch.Tensor]
23
- MutableTensorMap = Dict[str, torch.Tensor]
14
+ MutableTensorMap = dict[str, torch.Tensor]
24
15
 
25
16
 
26
17
  class TensorFeatureSource:
@@ -79,7 +70,7 @@ class TensorFeatureInfo:
79
70
  feature_type: FeatureType,
80
71
  is_seq: bool = False,
81
72
  feature_hint: Optional[FeatureHint] = None,
82
- feature_sources: Optional[List[TensorFeatureSource]] = None,
73
+ feature_sources: Optional[list[TensorFeatureSource]] = None,
83
74
  cardinality: Optional[int] = None,
84
75
  padding_value: int = 0,
85
76
  embedding_dim: Optional[int] = None,
@@ -154,13 +145,13 @@ class TensorFeatureInfo:
154
145
  self._feature_hint = hint
155
146
 
156
147
  @property
157
- def feature_sources(self) -> Optional[List[TensorFeatureSource]]:
148
+ def feature_sources(self) -> Optional[list[TensorFeatureSource]]:
158
149
  """
159
150
  :returns: List of sources feature came from.
160
151
  """
161
152
  return self._feature_sources
162
153
 
163
- def _set_feature_sources(self, sources: List[TensorFeatureSource]) -> None:
154
+ def _set_feature_sources(self, sources: list[TensorFeatureSource]) -> None:
164
155
  self._feature_sources = sources
165
156
 
166
157
  @property
@@ -276,7 +267,7 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
276
267
 
277
268
  :returns: New tensor schema of given features.
278
269
  """
279
- features: Set[TensorFeatureInfo] = set()
270
+ features: set[TensorFeatureInfo] = set()
280
271
  for feature_name in features_to_keep:
281
272
  features.add(self._tensor_schema[feature_name])
282
273
  return TensorSchema(list(features))
@@ -432,7 +423,7 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
432
423
  return None
433
424
  return rating_features.item().name
434
425
 
435
- def _get_object_args(self) -> Dict:
426
+ def _get_object_args(self) -> dict:
436
427
  """
437
428
  Returns list of features represented as dictionaries.
438
429
  """
@@ -456,7 +447,7 @@ class TensorSchema(Mapping[str, TensorFeatureInfo]):
456
447
  return features
457
448
 
458
449
  @classmethod
459
- def _create_object_by_args(cls, args: Dict) -> "TensorSchema":
450
+ def _create_object_by_args(cls, args: dict) -> "TensorSchema":
460
451
  features_list = []
461
452
  for feature_data in args:
462
453
  feature_data["feature_sources"] = (
@@ -2,8 +2,9 @@ import abc
2
2
  import json
3
3
  import pickle
4
4
  import warnings
5
+ from collections.abc import Sequence
5
6
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Dict, Generic, List, Optional, Sequence, Set, Tuple, TypeVar, Union
7
+ from typing import TYPE_CHECKING, Generic, Optional, TypeVar, Union
7
8
 
8
9
  import numpy as np
9
10
  import polars as pl
@@ -14,7 +15,6 @@ from replay.data import Dataset, FeatureHint, FeatureSchema, FeatureSource, Feat
14
15
  from replay.data.dataset_utils import DatasetLabelEncoder
15
16
  from replay.preprocessing import LabelEncoder, LabelEncodingRule
16
17
  from replay.preprocessing.label_encoder import HandleUnknownStrategies
17
- from replay.utils import deprecation_warning
18
18
 
19
19
  if TYPE_CHECKING:
20
20
  from .schema import TensorFeatureInfo, TensorFeatureSource, TensorSchema
@@ -187,7 +187,7 @@ class SequenceTokenizer:
187
187
  def _group_dataset(
188
188
  self,
189
189
  dataset: Dataset,
190
- ) -> Tuple[SequenceDataFrameLike, Optional[SequenceDataFrameLike], Optional[SequenceDataFrameLike]]:
190
+ ) -> tuple[SequenceDataFrameLike, Optional[SequenceDataFrameLike], Optional[SequenceDataFrameLike]]:
191
191
  from replay.data.nn.utils import ensure_pandas, groupby_sequences
192
192
 
193
193
  grouped_interactions = groupby_sequences(
@@ -268,13 +268,13 @@ class SequenceTokenizer:
268
268
  tensor_schema: "TensorSchema",
269
269
  query_id_column: str,
270
270
  item_id_column: str,
271
- ) -> Set[str]:
271
+ ) -> set[str]:
272
272
  # We need only features, which related to tensor schema, otherwise feature should
273
273
  # be ignored for efficiency reasons. The code below does feature filtering, and
274
274
  # keeps features used as a source in tensor schema.
275
275
 
276
276
  # Query and item IDs are always needed
277
- features_subset: List[str] = [
277
+ features_subset: list[str] = [
278
278
  query_id_column,
279
279
  item_id_column,
280
280
  ]
@@ -303,7 +303,7 @@ class SequenceTokenizer:
303
303
  msg = "All tensor features must have sources defined"
304
304
  raise ValueError(msg)
305
305
 
306
- source_tables: List[FeatureSource] = [s.source for s in feature_sources]
306
+ source_tables: list[FeatureSource] = [s.source for s in feature_sources]
307
307
 
308
308
  unexpected_tables = list(filter(lambda x: not isinstance(x, FeatureSource), source_tables))
309
309
  if len(unexpected_tables) > 0:
@@ -327,7 +327,7 @@ class SequenceTokenizer:
327
327
  tensor_features_to_keep: Optional[Sequence[str]] = None,
328
328
  ) -> None:
329
329
  # Check if all source columns specified in tensor schema exist in provided data frames
330
- sources_for_tensors: List["TensorFeatureSource"] = []
330
+ sources_for_tensors: list["TensorFeatureSource"] = []
331
331
  for tensor_feature_name, tensor_feature in tensor_schema.items():
332
332
  if tensor_features_to_keep is not None and tensor_feature_name not in tensor_features_to_keep:
333
333
  continue
@@ -405,7 +405,6 @@ class SequenceTokenizer:
405
405
  tensor_feature._set_cardinality(dataset_feature.cardinality)
406
406
 
407
407
  @classmethod
408
- @deprecation_warning("with `use_pickle` equals to `True` will be deprecated in future versions")
409
408
  def load(cls, path: str, use_pickle: bool = False, **kwargs) -> "SequenceTokenizer":
410
409
  """
411
410
  Load tokenizer object from the given path.
@@ -421,7 +420,7 @@ class SequenceTokenizer:
421
420
 
422
421
  if not use_pickle:
423
422
  base_path = Path(path).with_suffix(".replay").resolve()
424
- with open(base_path / "init_args.json", "r") as file:
423
+ with open(base_path / "init_args.json") as file:
425
424
  tokenizer_dict = json.loads(file.read())
426
425
 
427
426
  # load tensor_schema, tensor_features
@@ -449,12 +448,16 @@ class SequenceTokenizer:
449
448
  tokenizer._encoder._features_columns = encoder_features_columns
450
449
  tokenizer._encoder._encoding_rules = tokenizer_dict["encoder"]["encoding_rules"]
451
450
  else:
451
+ warnings.warn(
452
+ "with `use_pickle` equals to `True` will be deprecated in future versions",
453
+ DeprecationWarning,
454
+ stacklevel=2,
455
+ )
452
456
  with open(path, "rb") as file:
453
457
  tokenizer = pickle.load(file)
454
458
 
455
459
  return tokenizer
456
460
 
457
- @deprecation_warning("with `use_pickle` equals to `True` will be deprecated in future versions")
458
461
  def save(self, path: str, use_pickle: bool = False) -> None:
459
462
  """
460
463
  Save the tokenizer to the given path.
@@ -495,6 +498,11 @@ class SequenceTokenizer:
495
498
  with open(base_path / "init_args.json", "w+") as file:
496
499
  json.dump(tokenizer_dict, file)
497
500
  else:
501
+ warnings.warn(
502
+ "with `use_pickle` equals to `True` will be deprecated in future versions",
503
+ DeprecationWarning,
504
+ stacklevel=2,
505
+ )
498
506
  with open(path, "wb") as file:
499
507
  pickle.dump(self, file)
500
508
 
@@ -625,7 +633,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
625
633
  """
626
634
  :returns: processed Pandas DataFrame with all features from tensor schema.
627
635
  """
628
- all_features: Dict[str, Union[np.ndarray, List[np.ndarray]]] = {}
636
+ all_features: dict[str, Union[np.ndarray, list[np.ndarray]]] = {}
629
637
  all_features[self._query_id_column] = self._grouped_interactions[self._query_id_column].values
630
638
 
631
639
  for tensor_feature_name in self._tensor_schema:
@@ -635,7 +643,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
635
643
 
636
644
  def _process_num_interaction_feature(
637
645
  self, tensor_feature: "TensorFeatureInfo"
638
- ) -> Union[List[np.ndarray], List[List]]:
646
+ ) -> Union[list[np.ndarray], list[list]]:
639
647
  """
640
648
  Process numerical interaction feature.
641
649
 
@@ -656,7 +664,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
656
664
  values.append(np.array(sequence))
657
665
  return values
658
666
 
659
- def _process_num_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[List[np.ndarray], List[List]]:
667
+ def _process_num_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[list[np.ndarray], list[list]]:
660
668
  """
661
669
  Process numerical feature from item features dataset.
662
670
 
@@ -682,7 +690,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
682
690
 
683
691
  return values
684
692
 
685
- def _process_num_query_feature(self, tensor_feature: "TensorFeatureInfo") -> List[np.ndarray]:
693
+ def _process_num_query_feature(self, tensor_feature: "TensorFeatureInfo") -> list[np.ndarray]:
686
694
  """
687
695
  Process numerical feature from query features dataset.
688
696
 
@@ -694,7 +702,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
694
702
 
695
703
  def _process_cat_interaction_feature(
696
704
  self, tensor_feature: "TensorFeatureInfo"
697
- ) -> Union[List[np.ndarray], List[List]]:
705
+ ) -> Union[list[np.ndarray], list[list]]:
698
706
  """
699
707
  Process categorical interaction feature.
700
708
 
@@ -715,7 +723,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
715
723
  values.append(np.array(sequence))
716
724
  return values
717
725
 
718
- def _process_cat_query_feature(self, tensor_feature: "TensorFeatureInfo") -> List[np.ndarray]:
726
+ def _process_cat_query_feature(self, tensor_feature: "TensorFeatureInfo") -> list[np.ndarray]:
719
727
  """
720
728
  Process categorical feature from query features dataset.
721
729
 
@@ -744,7 +752,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
744
752
  ]
745
753
  return [np.array([query_feature[i]]).reshape(-1) for i in range(len(self._grouped_interactions))]
746
754
 
747
- def _process_cat_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[List[np.ndarray], List[List]]:
755
+ def _process_cat_item_feature(self, tensor_feature: "TensorFeatureInfo") -> Union[list[np.ndarray], list[list]]:
748
756
  """
749
757
  Process categorical feature from item features dataset.
750
758
 
@@ -760,7 +768,7 @@ class _PandasSequenceProcessor(_BaseSequenceProcessor[PandasDataFrame]):
760
768
  assert source is not None
761
769
 
762
770
  item_feature = self._item_features[source.column]
763
- values: List[np.ndarray] = []
771
+ values: list[np.ndarray] = []
764
772
 
765
773
  for item_id_sequence in self._grouped_interactions[self._item_id_column]:
766
774
  feature_sequence = item_feature.loc[item_id_sequence].values
@@ -1,7 +1,7 @@
1
1
  import abc
2
2
  import json
3
3
  from pathlib import Path
4
- from typing import TYPE_CHECKING, Tuple, Union
4
+ from typing import TYPE_CHECKING, Union
5
5
 
6
6
  import numpy as np
7
7
  import pandas as pd
@@ -90,7 +90,7 @@ class SequentialDataset(abc.ABC):
90
90
  @staticmethod
91
91
  def keep_common_query_ids(
92
92
  lhs: "SequentialDataset", rhs: "SequentialDataset"
93
- ) -> Tuple["SequentialDataset", "SequentialDataset"]:
93
+ ) -> tuple["SequentialDataset", "SequentialDataset"]:
94
94
  """
95
95
  Returns `SequentialDatasets` that contain query ids from both datasets.
96
96
 
@@ -110,17 +110,27 @@ class SequentialDataset(abc.ABC):
110
110
 
111
111
  sequential_dict = {}
112
112
  sequential_dict["_class_name"] = self.__class__.__name__
113
- self._sequences.reset_index().to_json(base_path / "sequences.json")
113
+
114
+ df = SequentialDataset._convert_array_to_list(self._sequences)
115
+ df.reset_index().to_parquet(base_path / "sequences.parquet")
114
116
  sequential_dict["init_args"] = {
115
117
  "tensor_schema": self._tensor_schema._get_object_args(),
116
118
  "query_id_column": self._query_id_column,
117
119
  "item_id_column": self._item_id_column,
118
- "sequences_path": "sequences.json",
120
+ "sequences_path": "sequences.parquet",
119
121
  }
120
122
 
121
123
  with open(base_path / "init_args.json", "w+") as file:
122
124
  json.dump(sequential_dict, file)
123
125
 
126
+ @staticmethod
127
+ def _convert_array_to_list(df):
128
+ return df.map(lambda x: x.tolist() if isinstance(x, np.ndarray) else x)
129
+
130
+ @staticmethod
131
+ def _convert_list_to_array(df):
132
+ return df.map(lambda x: np.array(x) if isinstance(x, list) else x)
133
+
124
134
 
125
135
  class PandasSequentialDataset(SequentialDataset):
126
136
  """
@@ -149,7 +159,7 @@ class PandasSequentialDataset(SequentialDataset):
149
159
  if sequences.index.name != query_id_column:
150
160
  sequences = sequences.set_index(query_id_column)
151
161
 
152
- self._sequences = sequences
162
+ self._sequences = SequentialDataset._convert_list_to_array(sequences)
153
163
 
154
164
  def __len__(self) -> int:
155
165
  return len(self._sequences)
@@ -203,10 +213,11 @@ class PandasSequentialDataset(SequentialDataset):
203
213
  from replay.data.nn import TensorSchema
204
214
 
205
215
  base_path = Path(path).with_suffix(".replay").resolve()
206
- with open(base_path / "init_args.json", "r") as file:
216
+ with open(base_path / "init_args.json") as file:
207
217
  sequential_dict = json.loads(file.read())
208
218
 
209
- sequences = pd.read_json(base_path / sequential_dict["init_args"]["sequences_path"])
219
+ sequences = pd.read_parquet(base_path / sequential_dict["init_args"]["sequences_path"])
220
+ sequences = cls._convert_array_to_list(sequences)
210
221
  dataset = cls(
211
222
  tensor_schema=TensorSchema._create_object_by_args(sequential_dict["init_args"]["tensor_schema"]),
212
223
  query_id_column=sequential_dict["init_args"]["query_id_column"],
@@ -258,18 +269,11 @@ class PolarsSequentialDataset(PandasSequentialDataset):
258
269
 
259
270
  def _convert_polars_to_pandas(self, df: PolarsDataFrame) -> PandasDataFrame:
260
271
  pandas_df = PandasDataFrame(df.to_dict(as_series=False))
261
-
262
- for column in pandas_df.select_dtypes(include="object").columns:
263
- if isinstance(pandas_df[column].iloc[0], list):
264
- pandas_df[column] = pandas_df[column].apply(lambda x: np.array(x))
265
-
272
+ pandas_df = SequentialDataset._convert_list_to_array(pandas_df)
266
273
  return pandas_df
267
274
 
268
275
  def _convert_pandas_to_polars(self, df: PandasDataFrame) -> PolarsDataFrame:
269
- for column in df.select_dtypes(include="object").columns:
270
- if isinstance(df[column].iloc[0], np.ndarray):
271
- df[column] = df[column].apply(lambda x: x.tolist())
272
-
276
+ df = SequentialDataset._convert_array_to_list(df)
273
277
  return pl.from_dict(df.to_dict("list"))
274
278
 
275
279
  @classmethod
@@ -287,10 +291,10 @@ class PolarsSequentialDataset(PandasSequentialDataset):
287
291
  from replay.data.nn import TensorSchema
288
292
 
289
293
  base_path = Path(path).with_suffix(".replay").resolve()
290
- with open(base_path / "init_args.json", "r") as file:
294
+ with open(base_path / "init_args.json") as file:
291
295
  sequential_dict = json.loads(file.read())
292
296
 
293
- sequences = pl.DataFrame(pd.read_json(base_path / sequential_dict["init_args"]["sequences_path"]))
297
+ sequences = pl.from_pandas(pd.read_parquet(base_path / sequential_dict["init_args"]["sequences_path"]))
294
298
  dataset = cls(
295
299
  tensor_schema=TensorSchema._create_object_by_args(sequential_dict["init_args"]["tensor_schema"]),
296
300
  query_id_column=sequential_dict["init_args"]["query_id_column"],
@@ -1,11 +1,11 @@
1
- from typing import TYPE_CHECKING, Generator, NamedTuple, Optional, Sequence, Tuple, Union, cast
1
+ import warnings
2
+ from collections.abc import Generator, Sequence
3
+ from typing import TYPE_CHECKING, NamedTuple, Optional, Union, cast
2
4
 
3
5
  import numpy as np
4
6
  import torch
5
7
  from torch.utils.data import Dataset as TorchDataset
6
8
 
7
- from replay.utils import deprecation_warning
8
-
9
9
  if TYPE_CHECKING:
10
10
  from .schema import TensorFeatureInfo, TensorMap, TensorSchema
11
11
  from .sequential_dataset import SequentialDataset
@@ -28,16 +28,12 @@ class TorchSequentialDataset(TorchDataset):
28
28
  Torch dataset for sequential recommender models
29
29
  """
30
30
 
31
- @deprecation_warning(
32
- "`padding_value` parameter will be removed in future versions. "
33
- "Instead, you should specify `padding_value` for each column in TensorSchema"
34
- )
35
31
  def __init__(
36
32
  self,
37
33
  sequential: "SequentialDataset",
38
34
  max_sequence_length: int,
39
35
  sliding_window_step: Optional[int] = None,
40
- padding_value: int = 0,
36
+ padding_value: Optional[int] = None,
41
37
  ) -> None:
42
38
  """
43
39
  :param sequential: sequential dataset
@@ -52,6 +48,15 @@ class TorchSequentialDataset(TorchDataset):
52
48
  self._sequential = sequential
53
49
  self._max_sequence_length = max_sequence_length
54
50
  self._sliding_window_step = sliding_window_step
51
+ if padding_value is not None:
52
+ warnings.warn(
53
+ "`padding_value` parameter will be removed in future versions. "
54
+ "Instead, you should specify `padding_value` for each column in TensorSchema",
55
+ DeprecationWarning,
56
+ stacklevel=2,
57
+ )
58
+ else:
59
+ padding_value = 0
55
60
  self._padding_value = padding_value
56
61
  self._index2sequence_map = self._build_index2sequence_map()
57
62
 
@@ -110,7 +115,7 @@ class TorchSequentialDataset(TorchDataset):
110
115
  return sequence
111
116
 
112
117
  # form shape for padded_sequence. Now supported one and two-dimentions features
113
- padded_sequence_shape: Union[Tuple[int, int], Tuple[int]]
118
+ padded_sequence_shape: Union[tuple[int, int], tuple[int]]
114
119
  if len(sequence.shape) == 1:
115
120
  padded_sequence_shape = (self._max_sequence_length,)
116
121
  elif len(sequence.shape) == 2:
@@ -134,10 +139,10 @@ class TorchSequentialDataset(TorchDataset):
134
139
  return torch.float32
135
140
  assert False, "Unknown tensor feature type"
136
141
 
137
- def _build_index2sequence_map(self) -> Sequence[Tuple[int, int]]:
142
+ def _build_index2sequence_map(self) -> Sequence[tuple[int, int]]:
138
143
  return list(self._iter_with_window())
139
144
 
140
- def _iter_with_window(self) -> Generator[Tuple[int, int], None, None]:
145
+ def _iter_with_window(self) -> Generator[tuple[int, int], None, None]:
141
146
  for i in range(len(self._sequential)):
142
147
  actual_seq_len = self._sequential.get_sequence_length(i)
143
148
  left_seq_len = actual_seq_len - self._max_sequence_length
@@ -176,17 +181,13 @@ class TorchSequentialValidationDataset(TorchDataset):
176
181
  Torch dataset for sequential recommender models that additionally stores ground truth
177
182
  """
178
183
 
179
- @deprecation_warning(
180
- "`padding_value` parameter will be removed in future versions. "
181
- "Instead, you should specify `padding_value` for each column in TensorSchema"
182
- )
183
184
  def __init__(
184
185
  self,
185
186
  sequential: "SequentialDataset",
186
187
  ground_truth: "SequentialDataset",
187
188
  train: "SequentialDataset",
188
189
  max_sequence_length: int,
189
- padding_value: int = 0,
190
+ padding_value: Optional[int] = None,
190
191
  sliding_window_step: Optional[int] = None,
191
192
  label_feature_name: Optional[str] = None,
192
193
  ):
@@ -1,4 +1,5 @@
1
- from typing import Iterable, Optional
1
+ from collections.abc import Iterable
2
+ from typing import Optional
2
3
 
3
4
  import polars as pl
4
5
 
@@ -1,18 +1,9 @@
1
+ from collections.abc import ItemsView, Iterable, Iterator, KeysView, Mapping, Sequence, ValuesView
1
2
  from enum import Enum
2
3
  from typing import (
3
4
  Callable,
4
- Dict,
5
- ItemsView,
6
- Iterable,
7
- Iterator,
8
- KeysView,
9
- List,
10
- Mapping,
11
5
  Optional,
12
- Sequence,
13
- Set,
14
6
  Union,
15
- ValuesView,
16
7
  )
17
8
 
18
9
 
@@ -162,7 +153,7 @@ class FeatureSchema(Mapping[str, FeatureInfo]):
162
153
  in original schema to keep in subset.
163
154
  :returns: new feature schema of given features.
164
155
  """
165
- features: Set[FeatureInfo] = set()
156
+ features: set[FeatureInfo] = set()
166
157
  for feature_column in features_to_keep:
167
158
  if feature_column in self._features_schema:
168
159
  features.add(self._features_schema[feature_column])
@@ -438,7 +429,7 @@ class FeatureSchema(Mapping[str, FeatureInfo]):
438
429
  """
439
430
  unique_columns = set()
440
431
  duplicates = set()
441
- item_query_names: Dict[FeatureHint, List[str]] = {
432
+ item_query_names: dict[FeatureHint, list[str]] = {
442
433
  FeatureHint.ITEM_ID: [],
443
434
  FeatureHint.QUERY_ID: [],
444
435
  }