ddi-fw 0.0.190__py3-none-any.whl → 0.0.192__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/core.py +10 -8
- ddi_fw/ml/ml_helper.py +40 -29
- ddi_fw/pipeline/multi_pipeline.py +2 -0
- ddi_fw/pipeline/pipeline.py +9 -1
- {ddi_fw-0.0.190.dist-info → ddi_fw-0.0.192.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.190.dist-info → ddi_fw-0.0.192.dist-info}/RECORD +8 -8
- {ddi_fw-0.0.190.dist-info → ddi_fw-0.0.192.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.190.dist-info → ddi_fw-0.0.192.dist-info}/top_level.txt +0 -0
ddi_fw/datasets/core.py
CHANGED
@@ -73,6 +73,7 @@ class BaseDataset(BaseModel):
|
|
73
73
|
class Config:
|
74
74
|
arbitrary_types_allowed = True
|
75
75
|
|
76
|
+
# TODO columns yoksa tüm feature'lar alınıyor, bu pipeline'da nasıl yapılacak?
|
76
77
|
def produce_inputs(self):
|
77
78
|
items = []
|
78
79
|
if self.X_train is None or self.X_test is None:
|
@@ -80,7 +81,10 @@ class BaseDataset(BaseModel):
|
|
80
81
|
y_train_label, y_test_label = np.array(
|
81
82
|
self.y_train), np.array(self.y_test)
|
82
83
|
|
83
|
-
if self.columns is None or len(self.columns) == 0:
|
84
|
+
if self.columns is None or len(self.columns) == 0 or len(self.columns) == 1:
|
85
|
+
# If no columns or only one column are provided, do not change the data
|
86
|
+
# and use the entire dataset as a single input.
|
87
|
+
column = self.columns[0] if self.columns else 'default'
|
84
88
|
items.append([f'default', np.nan_to_num(self.X_train),
|
85
89
|
y_train_label, np.nan_to_num(self.X_test), y_test_label])
|
86
90
|
else:
|
@@ -127,15 +131,15 @@ class BaseDataset(BaseModel):
|
|
127
131
|
Load the dataset. If X_train, y_train, X_test, and y_test are already provided,
|
128
132
|
skip deriving them. Otherwise, derive them from the dataframe and indices.
|
129
133
|
"""
|
130
|
-
|
134
|
+
self.prep()
|
135
|
+
|
136
|
+
if self.X_train is not None or self.y_train is not None or self.X_test is not None or self.y_test is not None:
|
131
137
|
# Data is already provided, no need to calculate
|
132
138
|
logging.info(
|
133
139
|
"X_train, y_train, X_test, and y_test are already provided. Skipping calculation.")
|
134
140
|
return
|
135
141
|
# return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
|
136
142
|
|
137
|
-
self.prep()
|
138
|
-
|
139
143
|
if self.index_path is None:
|
140
144
|
raise Exception(
|
141
145
|
"There is no index path. Please call split_dataset or provide indices.")
|
@@ -156,7 +160,7 @@ class BaseDataset(BaseModel):
|
|
156
160
|
y_train = train[self.class_column]
|
157
161
|
X_test = test.drop(self.class_column, axis=1)
|
158
162
|
y_test = test[self.class_column]
|
159
|
-
|
163
|
+
|
160
164
|
self.X_train = np.array(X_train)
|
161
165
|
# self.y_train = np.array(y_train)
|
162
166
|
self.y_train = np.array(y_train.tolist())
|
@@ -170,7 +174,6 @@ class BaseDataset(BaseModel):
|
|
170
174
|
self.val_idx_arr = val_idx_arr
|
171
175
|
|
172
176
|
# Dataframe to numpy array conversion
|
173
|
-
|
174
177
|
|
175
178
|
# return self.X_train, self.X_test, self.y_train, self.y_test, self.train_indexes, self.test_indexes, self.train_idx_arr, self.val_idx_arr
|
176
179
|
|
@@ -226,14 +229,13 @@ class BaseDataset(BaseModel):
|
|
226
229
|
X=X, y=y)
|
227
230
|
self.X_train = np.array(X_train)
|
228
231
|
self.X_test = np.array(X_test)
|
229
|
-
self.y_train =
|
232
|
+
self.y_train = np.array(y_train.tolist())
|
230
233
|
self.y_test = np.array(y_test.tolist())
|
231
234
|
self.train_indexes = X_train.index
|
232
235
|
self.test_indexes = X_test.index
|
233
236
|
self.train_idx_arr = train_idx_arr
|
234
237
|
self.val_idx_arr = val_idx_arr
|
235
238
|
|
236
|
-
|
237
239
|
if save_indexes:
|
238
240
|
# train_pairs = [row['id1'].join(',').row['id2'] for index, row in X_train.iterrows()]
|
239
241
|
self.__save_indexes__(
|
ddi_fw/ml/ml_helper.py
CHANGED
@@ -57,42 +57,53 @@ class MultiModalRunner:
|
|
57
57
|
raise ValueError(
|
58
58
|
"Unsupported library type. Choose 'tensorflow' or 'pytorch'.")
|
59
59
|
|
60
|
+
# TODO check single_results, 1d,2d ...
|
60
61
|
def __predict(self, single_results):
|
61
62
|
item_dict = {t[0]: t for t in self.items}
|
62
63
|
print("multi_modal")
|
63
64
|
print(self.multi_modal)
|
64
65
|
print(item_dict.keys())
|
65
66
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
67
|
+
if self.multi_modal:
|
68
|
+
for m in self.multi_modal:
|
69
|
+
name = m.get('name')
|
70
|
+
input_type = m.get('input_type')
|
71
|
+
input = m.get('input')
|
72
|
+
inputs = m.get('inputs')
|
73
|
+
model_type = get_import(m.get("model_type"))
|
74
|
+
kwargs = m.get('params')
|
75
|
+
T = self.__create_model(self.library)
|
76
|
+
single_modal = T(self.date, name, model_type,
|
77
|
+
use_mlflow=self.use_mlflow, **kwargs)
|
78
|
+
|
79
|
+
if input is not None and inputs is not None:
|
80
|
+
raise Exception("input and inputs should not be used together")
|
81
|
+
|
82
|
+
if input:
|
83
|
+
item = item_dict[input]
|
84
|
+
single_modal.set_data(
|
85
|
+
self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
|
86
|
+
elif inputs:
|
87
|
+
# check keys
|
88
|
+
filtered_dict = {k: item_dict[k]
|
89
|
+
for k in inputs if k in item_dict}
|
90
|
+
print(filtered_dict.keys())
|
91
|
+
first_input = next(iter(filtered_dict.values()))
|
92
|
+
train_data_list = [f[1] for f in filtered_dict.values()]
|
93
|
+
test_data_list = [f[3] for f in filtered_dict.values()]
|
94
|
+
train_data = np.stack(train_data_list, axis=1)
|
95
|
+
test_data = np.stack(test_data_list, axis=1)
|
96
|
+
train_label = first_input[2]
|
97
|
+
test_label = first_input[4]
|
98
|
+
single_modal.set_data(
|
99
|
+
self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
|
100
|
+
else:
|
101
|
+
raise Exception("check configurations")
|
102
|
+
else: # TODO default model maybe?
|
103
|
+
item = self.items[0]
|
78
104
|
single_modal.set_data(
|
79
|
-
|
80
|
-
|
81
|
-
# check keys
|
82
|
-
filtered_dict = {k: item_dict[k]
|
83
|
-
for k in inputs if k in item_dict}
|
84
|
-
print(filtered_dict.keys())
|
85
|
-
first_input = next(iter(filtered_dict.values()))
|
86
|
-
train_data_list = [f[1] for f in filtered_dict.values()]
|
87
|
-
test_data_list = [f[3] for f in filtered_dict.values()]
|
88
|
-
train_data = np.stack(train_data_list, axis=1)
|
89
|
-
test_data = np.stack(test_data_list, axis=1)
|
90
|
-
train_label = first_input[2]
|
91
|
-
test_label = first_input[4]
|
92
|
-
single_modal.set_data(
|
93
|
-
self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
|
94
|
-
else:
|
95
|
-
raise Exception("check configurations")
|
105
|
+
self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
|
106
|
+
|
96
107
|
logs, metrics, prediction = single_modal.fit_and_evaluate()
|
97
108
|
self.result.add_metric(name, metrics)
|
98
109
|
single_results[name] = prediction
|
@@ -50,6 +50,7 @@ class MultiPipeline():
|
|
50
50
|
type = config.get("type")
|
51
51
|
library = config.get("library")
|
52
52
|
|
53
|
+
use_mlflow = config.get("use_mlflow")
|
53
54
|
experiment_name = config.get("experiment_name")
|
54
55
|
experiment_description = config.get("experiment_description")
|
55
56
|
experiment_tags = config.get("experiment_tags")
|
@@ -84,6 +85,7 @@ class MultiPipeline():
|
|
84
85
|
if type == "general":
|
85
86
|
pipeline = Pipeline(
|
86
87
|
library=library,
|
88
|
+
use_mlflow=use_mlflow,
|
87
89
|
experiment_name=experiment_name,
|
88
90
|
experiment_description=experiment_description,
|
89
91
|
experiment_tags=experiment_tags,
|
ddi_fw/pipeline/pipeline.py
CHANGED
@@ -37,10 +37,15 @@ class Pipeline(BaseModel):
|
|
37
37
|
model: Optional[Any] = None
|
38
38
|
multi_modal: Optional[Any] = None
|
39
39
|
use_mlflow: bool = False
|
40
|
+
_dataset: BaseDataset = []
|
40
41
|
_items: List = []
|
41
42
|
_train_idx_arr: List | None = []
|
42
43
|
_val_idx_arr: List | None = []
|
43
44
|
|
45
|
+
@property
|
46
|
+
def dataset(self) -> BaseDataset:
|
47
|
+
return self._dataset
|
48
|
+
|
44
49
|
@property
|
45
50
|
def items(self) -> List:
|
46
51
|
return self._items
|
@@ -168,7 +173,10 @@ class Pipeline(BaseModel):
|
|
168
173
|
dataset = self.dataset_type(**kwargs)
|
169
174
|
|
170
175
|
# X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
|
176
|
+
|
171
177
|
dataset.load()
|
178
|
+
self._dataset = dataset
|
179
|
+
|
172
180
|
dataframe = dataset.dataframe
|
173
181
|
|
174
182
|
# Check if any of the arrays are None or empty
|
@@ -180,7 +188,7 @@ class Pipeline(BaseModel):
|
|
180
188
|
# Check if the dataframe is None or empty
|
181
189
|
is_dataframe_valid = dataframe is not None and not dataframe.empty
|
182
190
|
|
183
|
-
if not (is_data_valid
|
191
|
+
if not (is_data_valid or is_dataframe_valid):
|
184
192
|
raise ValueError("The dataset is not loaded")
|
185
193
|
|
186
194
|
# column name, train data, train label, test data, test label
|
@@ -1,5 +1,5 @@
|
|
1
1
|
ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
|
2
|
-
ddi_fw/datasets/core.py,sha256=
|
2
|
+
ddi_fw/datasets/core.py,sha256=4705a94kKBueyWFXRJ3cnivAGKjrR89uBBKpxtMozOM,11080
|
3
3
|
ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
|
4
4
|
ddi_fw/datasets/db_utils.py,sha256=OTsa3d-Iic7z3HmzSQK9UigedRbHDxYChJk0s4GfLnw,6191
|
5
5
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
@@ -74,7 +74,7 @@ ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSz
|
|
74
74
|
ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
|
75
75
|
ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
|
76
76
|
ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5M,11581
|
77
|
-
ddi_fw/ml/ml_helper.py,sha256
|
77
|
+
ddi_fw/ml/ml_helper.py,sha256=-pgnLhuPBp60IZxAvS11oDYggrgQY3GRHkAwQVWM9XI,6943
|
78
78
|
ddi_fw/ml/model_wrapper.py,sha256=kabPXuo7S8tGkp9a00V04n4rXDmv7dD8wYGMjotISRc,1050
|
79
79
|
ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
|
80
80
|
ddi_fw/ml/tensorflow_wrapper.py,sha256=lNJvg3odqMKmILecOMdcOCAOrwzWZDzxB0DWGcYWsPg,12952
|
@@ -83,9 +83,9 @@ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6
|
|
83
83
|
ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
|
84
84
|
ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
|
85
85
|
ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
|
86
|
-
ddi_fw/pipeline/multi_pipeline.py,sha256=
|
86
|
+
ddi_fw/pipeline/multi_pipeline.py,sha256=fYyvwIOscUahjXd3QO5RSFrp1LliGR7RzOZyAXrXXz4,5637
|
87
87
|
ddi_fw/pipeline/ner_pipeline.py,sha256=Bp6BA6nozfWFaMHH6jKlzesnCGO6qiMkzdGy_ed6nh0,5947
|
88
|
-
ddi_fw/pipeline/pipeline.py,sha256=
|
88
|
+
ddi_fw/pipeline/pipeline.py,sha256=GMMauyp0GvdaQLyQ5dPBffDDxFK28hdDtPUzdFX9-Yk,9961
|
89
89
|
ddi_fw/utils/__init__.py,sha256=HC32XkYQTYH_9vt0eX6tqQngEFG-R70hGrYkT-BcHCk,519
|
90
90
|
ddi_fw/utils/categorical_data_encoding_checker.py,sha256=gzb_vUDBrCMUhBxY1fBYTe8hmK72p0_uw3DTga8cqP8,1580
|
91
91
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
@@ -99,7 +99,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
|
|
99
99
|
ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
|
100
100
|
ddi_fw/vectorization/feature_vector_generation.py,sha256=Z1A_DOBqDFPqLN4YB-3oYlOQWJK-X6Oes6UFjpzR47Q,4760
|
101
101
|
ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
102
|
-
ddi_fw-0.0.
|
103
|
-
ddi_fw-0.0.
|
104
|
-
ddi_fw-0.0.
|
105
|
-
ddi_fw-0.0.
|
102
|
+
ddi_fw-0.0.192.dist-info/METADATA,sha256=MhOs-typXfDNCIRm8_9NheEkyNYmnmQbArQGstSOAas,2542
|
103
|
+
ddi_fw-0.0.192.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
104
|
+
ddi_fw-0.0.192.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
105
|
+
ddi_fw-0.0.192.dist-info/RECORD,,
|
File without changes
|
File without changes
|