ddi-fw 0.0.171__py3-none-any.whl → 0.0.172__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ddi_fw/datasets/core.py CHANGED
@@ -21,7 +21,6 @@ except ImportError:
21
21
  "Failed to import langchain.embeddings module. ")
22
22
 
23
23
 
24
-
25
24
  def stack(df_column):
26
25
  return np.stack(df_column.values)
27
26
 
@@ -61,10 +60,10 @@ class BaseDataset(BaseModel):
61
60
  dataset_splitter_type: Type[DatasetSplitter]
62
61
  class_column: str = 'class'
63
62
  dataframe: Optional[pd.DataFrame] = None
64
- X_train: Optional[pd.DataFrame] = None
65
- X_test: Optional[pd.DataFrame] = None
66
- y_train: Optional[pd.Series] = None
67
- y_test: Optional[pd.Series] = None
63
+ X_train: Optional[pd.DataFrame | np.ndarray] = None
64
+ X_test: Optional[pd.DataFrame | np.ndarray] = None
65
+ y_train: Optional[pd.Series | np.ndarray] = None
66
+ y_test: Optional[pd.Series | np.ndarray] = None
68
67
  train_indexes: Optional[pd.Index] = None
69
68
  test_indexes: Optional[pd.Index] = None
70
69
  train_idx_arr: Optional[List[np.ndarray]] = None
@@ -89,7 +88,7 @@ class BaseDataset(BaseModel):
89
88
  # items.append([f'{column}_embedding', train_data,
90
89
  # y_train_label, test_data, y_test_label])
91
90
  return items
92
-
91
+
93
92
  @computed_field
94
93
  @property
95
94
  def dataset_splitter(self) -> DatasetSplitter:
@@ -109,17 +108,20 @@ class BaseDataset(BaseModel):
109
108
  """
110
109
  if self.X_train is not None and self.y_train is not None and self.X_test is not None and self.y_test is not None:
111
110
  # Data is already provided, no need to calculate
112
- logging.info("X_train, y_train, X_test, and y_test are already provided. Skipping calculation.")
111
+ logging.info(
112
+ "X_train, y_train, X_test, and y_test are already provided. Skipping calculation.")
113
113
  return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
114
114
 
115
115
  if self.index_path is None:
116
- raise Exception("There is no index path. Please call split_dataset or provide indices.")
116
+ raise Exception(
117
+ "There is no index path. Please call split_dataset or provide indices.")
117
118
 
118
119
  if self.dataframe is None:
119
120
  raise Exception("There is no dataframe to derive data from.")
120
121
 
121
122
  try:
122
- train_idx_all, test_idx_all, train_idx_arr, val_idx_arr = self.__get_indexes__(self.index_path)
123
+ train_idx_all, test_idx_all, train_idx_arr, val_idx_arr = self.__get_indexes__(
124
+ self.index_path)
123
125
  except FileNotFoundError as e:
124
126
  raise FileNotFoundError(f"Index files not found: {e.filename}")
125
127
 
@@ -176,7 +178,8 @@ class BaseDataset(BaseModel):
176
178
  raise an error.
177
179
  """
178
180
  if self.X_train is not None or self.X_test is not None:
179
- raise Exception("X_train and X_test are already present. Splitting is not allowed.")
181
+ raise Exception(
182
+ "X_train and X_test are already present. Splitting is not allowed.")
180
183
 
181
184
  if self.dataframe is None:
182
185
  raise Exception("There is no dataframe to split.")
@@ -215,8 +218,9 @@ class BaseDataset(BaseModel):
215
218
 
216
219
 
217
220
  class TextDatasetMixin(BaseDataset):
218
- embedding_size: Optional[int] = None
219
- embedding_dict: Dict[str, Any] = Field(default_factory=dict, description="Dictionary for embeddings")
221
+ embedding_size: Optional[int] = None
222
+ embedding_dict: Dict[str, Any] = Field(
223
+ default_factory=dict, description="Dictionary for embeddings")
220
224
  embeddings_pooling_strategy: PoolingStrategy | None = None
221
225
 
222
226
  def process_text(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.171
3
+ Version: 0.0.172
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,5 +1,5 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
2
- ddi_fw/datasets/core.py,sha256=vnbaqDRcnlKd7TX1emkc3lNEEcD_PACbeu72p4y-4Ok,9223
2
+ ddi_fw/datasets/core.py,sha256=jXPEMrlQ685qMEZ-Pj4izOVH7nkE62JtpMsDjfosBeQ,9350
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
@@ -97,7 +97,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
97
97
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
98
98
  ddi_fw/vectorization/feature_vector_generation.py,sha256=Z1A_DOBqDFPqLN4YB-3oYlOQWJK-X6Oes6UFjpzR47Q,4760
99
99
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
100
- ddi_fw-0.0.171.dist-info/METADATA,sha256=qAlq05fNm5PaC-jJ-kCZz7oH-1yZZdxq7eAbRuNmM6U,2542
101
- ddi_fw-0.0.171.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
102
- ddi_fw-0.0.171.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
103
- ddi_fw-0.0.171.dist-info/RECORD,,
100
+ ddi_fw-0.0.172.dist-info/METADATA,sha256=saohphdC9IZ8Fg4_QLDpyzEufhTWY_NBr2GzJqw5imU,2542
101
+ ddi_fw-0.0.172.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
102
+ ddi_fw-0.0.172.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
103
+ ddi_fw-0.0.172.dist-info/RECORD,,