ddi-fw 0.0.187__py3-none-any.whl → 0.0.189__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/core.py +6 -4
- ddi_fw/ml/ml_helper.py +4 -3
- ddi_fw/ml/tensorflow_wrapper.py +1 -1
- ddi_fw/pipeline/pipeline.py +26 -17
- {ddi_fw-0.0.187.dist-info → ddi_fw-0.0.189.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.187.dist-info → ddi_fw-0.0.189.dist-info}/RECORD +8 -8
- {ddi_fw-0.0.187.dist-info → ddi_fw-0.0.189.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.187.dist-info → ddi_fw-0.0.189.dist-info}/top_level.txt +0 -0
ddi_fw/datasets/core.py
CHANGED
@@ -158,9 +158,11 @@ class BaseDataset(BaseModel):
|
|
158
158
|
y_test = test[self.class_column]
|
159
159
|
|
160
160
|
self.X_train = np.array(X_train)
|
161
|
-
self.y_train = np.array(y_train)
|
161
|
+
# self.y_train = np.array(y_train)
|
162
|
+
self.y_train = np.array(y_train.tolist())
|
162
163
|
self.X_test = np.array(X_test)
|
163
|
-
self.y_test = np.array(y_test)
|
164
|
+
# self.y_test = np.array(y_test)
|
165
|
+
self.y_test = np.array(y_test.tolist())
|
164
166
|
|
165
167
|
self.train_indexes = X_train.index
|
166
168
|
self.test_indexes = X_test.index
|
@@ -222,8 +224,8 @@ class BaseDataset(BaseModel):
|
|
222
224
|
|
223
225
|
X_train, X_test, y_train, y_test, X_train.index, X_test.index, train_idx_arr, val_idx_arr = self.dataset_splitter.split(
|
224
226
|
X=X, y=y)
|
225
|
-
self.X_train = np.array(
|
226
|
-
self.X_test = np.array(
|
227
|
+
self.X_train = np.array(X_train)
|
228
|
+
self.X_test = np.array(X_test)
|
227
229
|
self.y_train = np.array(y_train.tolist())
|
228
230
|
self.y_test = np.array(y_test.tolist())
|
229
231
|
self.train_indexes = X_train.index
|
ddi_fw/ml/ml_helper.py
CHANGED
@@ -57,7 +57,7 @@ class MultiModalRunner:
|
|
57
57
|
raise ValueError(
|
58
58
|
"Unsupported library type. Choose 'tensorflow' or 'pytorch'.")
|
59
59
|
|
60
|
-
def __predict(self,single_results):
|
60
|
+
def __predict(self, single_results):
|
61
61
|
item_dict = {t[0]: t for t in self.items}
|
62
62
|
print("multi_modal")
|
63
63
|
print(self.multi_modal)
|
@@ -71,7 +71,8 @@ class MultiModalRunner:
|
|
71
71
|
model_type = get_import(m.get("model_type"))
|
72
72
|
kwargs = m.get('params')
|
73
73
|
T = self.__create_model(self.library)
|
74
|
-
single_modal = T(self.date, name, model_type,
|
74
|
+
single_modal = T(self.date, name, model_type,
|
75
|
+
use_mlflow=self.use_mlflow, **kwargs)
|
75
76
|
if input_type == '1D':
|
76
77
|
item = item_dict[input]
|
77
78
|
single_modal.set_data(
|
@@ -79,7 +80,7 @@ class MultiModalRunner:
|
|
79
80
|
elif input_type == '2D':
|
80
81
|
# check keys
|
81
82
|
filtered_dict = {k: item_dict[k]
|
82
|
-
|
83
|
+
for k in inputs if k in item_dict}
|
83
84
|
print(filtered_dict.keys())
|
84
85
|
first_input = next(iter(filtered_dict.values()))
|
85
86
|
train_data_list = [f[1] for f in filtered_dict.values()]
|
ddi_fw/ml/tensorflow_wrapper.py
CHANGED
@@ -64,7 +64,7 @@ def convert_to_categorical(arr, num_classes):
|
|
64
64
|
|
65
65
|
class TFModelWrapper(ModelWrapper):
|
66
66
|
|
67
|
-
def __init__(self, date, descriptor, model_func, use_mlflow=
|
67
|
+
def __init__(self, date, descriptor, model_func, use_mlflow=False, **kwargs):
|
68
68
|
super().__init__(date, descriptor, model_func, **kwargs)
|
69
69
|
self.batch_size = kwargs.get('batch_size', 128)
|
70
70
|
self.epochs = kwargs.get('epochs', 100)
|
ddi_fw/pipeline/pipeline.py
CHANGED
@@ -37,23 +37,24 @@ class Pipeline(BaseModel):
|
|
37
37
|
model: Optional[Any] = None
|
38
38
|
multi_modal: Optional[Any] = None
|
39
39
|
use_mlflow: bool = False
|
40
|
-
_items:List=[]
|
41
|
-
_train_idx_arr:List|None=[]
|
42
|
-
_val_idx_arr:List|None=[]
|
43
|
-
|
40
|
+
_items: List = []
|
41
|
+
_train_idx_arr: List | None = []
|
42
|
+
_val_idx_arr: List | None = []
|
43
|
+
|
44
44
|
@property
|
45
45
|
def items(self) -> List:
|
46
46
|
return self._items
|
47
|
+
|
47
48
|
@property
|
48
|
-
def train_idx_arr(self) -> List|None:
|
49
|
+
def train_idx_arr(self) -> List | None:
|
49
50
|
return self._train_idx_arr
|
51
|
+
|
50
52
|
@property
|
51
|
-
def val_idx_arr(self) -> List|None:
|
53
|
+
def val_idx_arr(self) -> List | None:
|
52
54
|
return self._val_idx_arr
|
53
55
|
|
54
56
|
class Config:
|
55
57
|
arbitrary_types_allowed = True
|
56
|
-
|
57
58
|
|
58
59
|
def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
|
59
60
|
"""
|
@@ -146,21 +147,22 @@ class Pipeline(BaseModel):
|
|
146
147
|
# filename=self.ner_data_file) if self.ner_data_file else None
|
147
148
|
|
148
149
|
dataset_splitter = self.dataset_splitter_type()
|
149
|
-
|
150
|
+
|
150
151
|
if issubclass(self.dataset_type, TextDatasetMixin):
|
151
152
|
key, value = next(iter(embedding_dict.items()))
|
152
153
|
embedding_size = value[next(iter(value))][0].shape[0]
|
153
|
-
pooling_strategy = self.embedding_pooling_strategy_type(
|
154
|
+
pooling_strategy = self.embedding_pooling_strategy_type(
|
155
|
+
) if self.embedding_pooling_strategy_type else None
|
154
156
|
|
155
157
|
dataset = self.dataset_type(
|
156
158
|
embedding_dict=embedding_dict,
|
157
159
|
embedding_size=embedding_size,
|
158
160
|
embeddings_pooling_strategy=pooling_strategy,
|
159
|
-
dataset_splitter_type
|
161
|
+
dataset_splitter_type=self.dataset_splitter_type,
|
160
162
|
**kwargs)
|
161
163
|
elif self.dataset_type == BaseDataset:
|
162
|
-
|
163
|
-
dataset_splitter_type
|
164
|
+
dataset = self.dataset_type(
|
165
|
+
dataset_splitter_type=self.dataset_splitter_type,
|
164
166
|
**kwargs)
|
165
167
|
else:
|
166
168
|
dataset = self.dataset_type(**kwargs)
|
@@ -168,15 +170,22 @@ class Pipeline(BaseModel):
|
|
168
170
|
# X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
|
169
171
|
dataset.load()
|
170
172
|
dataframe = dataset.dataframe
|
171
|
-
b = not( dataset.X_train and dataset.y_train and dataset.X_test and dataset.y_test)
|
172
|
-
c = dataframe is None or dataframe.empty
|
173
173
|
|
174
|
-
if
|
174
|
+
# Check if any of the arrays are None or empty
|
175
|
+
is_data_valid = (dataset.X_train is not None and dataset.X_train.size > 0 and
|
176
|
+
dataset.y_train is not None and dataset.y_train.size > 0 and
|
177
|
+
dataset.X_test is not None and dataset.X_test.size > 0 and
|
178
|
+
dataset.y_test is not None and dataset.y_test.size > 0)
|
179
|
+
|
180
|
+
# Check if the dataframe is None or empty
|
181
|
+
is_dataframe_valid = dataframe is not None and not dataframe.empty
|
182
|
+
|
183
|
+
if not (is_data_valid and is_dataframe_valid):
|
175
184
|
raise ValueError("The dataset is not loaded")
|
176
|
-
|
185
|
+
|
177
186
|
# column name, train data, train label, test data, test label
|
178
187
|
self._items = dataset.produce_inputs()
|
179
|
-
|
188
|
+
|
180
189
|
print("Building the experiment with the following settings:")
|
181
190
|
print(
|
182
191
|
f"Name: {self.experiment_name}, Dataset: {dataset}, Model: {self.model}")
|
@@ -1,5 +1,5 @@
|
|
1
1
|
ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
|
2
|
-
ddi_fw/datasets/core.py,sha256=
|
2
|
+
ddi_fw/datasets/core.py,sha256=WWWd5SGHVUpJn-IJF1p1PScSWpb7VfQdcMTroufkgUk,10734
|
3
3
|
ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
|
4
4
|
ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
|
5
5
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
@@ -74,10 +74,10 @@ ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSz
|
|
74
74
|
ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
|
75
75
|
ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
|
76
76
|
ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5M,11581
|
77
|
-
ddi_fw/ml/ml_helper.py,sha256=
|
77
|
+
ddi_fw/ml/ml_helper.py,sha256=xbIg0fAJeJuB7rlgUMzCFhQ4WLBXS35x5N5gCcs6-so,6367
|
78
78
|
ddi_fw/ml/model_wrapper.py,sha256=kabPXuo7S8tGkp9a00V04n4rXDmv7dD8wYGMjotISRc,1050
|
79
79
|
ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
|
80
|
-
ddi_fw/ml/tensorflow_wrapper.py,sha256
|
80
|
+
ddi_fw/ml/tensorflow_wrapper.py,sha256=lNJvg3odqMKmILecOMdcOCAOrwzWZDzxB0DWGcYWsPg,12952
|
81
81
|
ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
|
82
82
|
ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6859
|
83
83
|
ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
|
@@ -85,7 +85,7 @@ ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,2
|
|
85
85
|
ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
|
86
86
|
ddi_fw/pipeline/multi_pipeline.py,sha256=NfcH4Ze5U-JRiH3lrxEDWj-VPxYQYtp7tq6bLCImBzs,5550
|
87
87
|
ddi_fw/pipeline/ner_pipeline.py,sha256=Bp6BA6nozfWFaMHH6jKlzesnCGO6qiMkzdGy_ed6nh0,5947
|
88
|
-
ddi_fw/pipeline/pipeline.py,sha256=
|
88
|
+
ddi_fw/pipeline/pipeline.py,sha256=dCXZuXOlW74ZO0e_OhS9OX0dqI9abj7CQz_lkKrDIWY,9787
|
89
89
|
ddi_fw/utils/__init__.py,sha256=bqIC0YjbD0YSHtO0nWUkRs4w5nu7qBV0yU72sRzwCj8,475
|
90
90
|
ddi_fw/utils/categorical_data_encoding_checker.py,sha256=gzb_vUDBrCMUhBxY1fBYTe8hmK72p0_uw3DTga8cqP8,1580
|
91
91
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
@@ -98,7 +98,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
|
|
98
98
|
ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
|
99
99
|
ddi_fw/vectorization/feature_vector_generation.py,sha256=Z1A_DOBqDFPqLN4YB-3oYlOQWJK-X6Oes6UFjpzR47Q,4760
|
100
100
|
ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
101
|
-
ddi_fw-0.0.
|
102
|
-
ddi_fw-0.0.
|
103
|
-
ddi_fw-0.0.
|
104
|
-
ddi_fw-0.0.
|
101
|
+
ddi_fw-0.0.189.dist-info/METADATA,sha256=y3hik68p2UsqKr7ur8R2ix8TyPfGsveDbKWboSaRkzA,2542
|
102
|
+
ddi_fw-0.0.189.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
103
|
+
ddi_fw-0.0.189.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
104
|
+
ddi_fw-0.0.189.dist-info/RECORD,,
|
File without changes
|
File without changes
|