ddi-fw 0.0.209__py3-none-any.whl → 0.0.211__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/pipeline/multi_pipeline.py +61 -3
- ddi_fw/pipeline/pipeline.py +6 -6
- {ddi_fw-0.0.209.dist-info → ddi_fw-0.0.211.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.209.dist-info → ddi_fw-0.0.211.dist-info}/RECORD +6 -6
- {ddi_fw-0.0.209.dist-info → ddi_fw-0.0.211.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.209.dist-info → ddi_fw-0.0.211.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
|
|
1
1
|
import json
|
2
|
+
from typing import Optional
|
2
3
|
from ddi_fw.pipeline.pipeline import Pipeline
|
3
4
|
from ddi_fw.pipeline.ner_pipeline import NerParameterSearch
|
4
5
|
import importlib
|
@@ -41,11 +42,68 @@ def get_import(full_path_of_import):
|
|
41
42
|
|
42
43
|
|
43
44
|
class MultiPipeline():
|
44
|
-
def __init__(self, experiments_config_file):
|
45
|
-
|
45
|
+
# def __init__(self, experiments_config_file, experiments_config):
|
46
|
+
# if experiments_config_file is None and experiments_config is None:
|
47
|
+
# raise ValueError("Either experiments_config_file or experiments_config must be provided.")
|
48
|
+
# if experiments_config_file is not None and experiments_config is not None:
|
49
|
+
# raise ValueError("Only one of experiments_config_file or experiments_config should be provided.")
|
50
|
+
# if experiments_config_file is not None:
|
51
|
+
# self.experiments_config = load_config(experiments_config_file)
|
52
|
+
# else:
|
53
|
+
# self.experiments_config = experiments_config
|
54
|
+
# self.items = []
|
55
|
+
# self.pipeline_resuts = dict()
|
56
|
+
|
57
|
+
def __init__(self, experiments_config_file: Optional[str] = None, experiments_config: Optional[dict] = None):
|
58
|
+
"""
|
59
|
+
Initialize the MultiPipeline.
|
60
|
+
|
61
|
+
Args:
|
62
|
+
experiments_config_file (str, optional): Path to the experiments configuration file.
|
63
|
+
experiments_config (dict, optional): Dictionary containing the experiments configuration.
|
64
|
+
|
65
|
+
Raises:
|
66
|
+
ValueError: If neither or both of the parameters are provided.
|
67
|
+
"""
|
68
|
+
self.experiments_config = self._validate_and_load_config(experiments_config_file, experiments_config)
|
46
69
|
self.items = []
|
70
|
+
# self.pipeline_results = {}
|
47
71
|
self.pipeline_resuts = dict()
|
48
72
|
|
73
|
+
def _validate_and_load_config(self, experiments_config_file: Optional[str], experiments_config: Optional[dict]) -> dict:
|
74
|
+
"""
|
75
|
+
Validate and load the experiments configuration.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
experiments_config_file (str, optional): Path to the experiments configuration file.
|
79
|
+
experiments_config (dict, optional): Dictionary containing the experiments configuration.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
dict: The loaded experiments configuration.
|
83
|
+
|
84
|
+
Raises:
|
85
|
+
ValueError: If neither or both of the parameters are provided.
|
86
|
+
"""
|
87
|
+
if experiments_config_file is None and experiments_config is None:
|
88
|
+
raise ValueError("Either 'experiments_config_file' or 'experiments_config' must be provided.")
|
89
|
+
if experiments_config_file is not None and experiments_config is not None:
|
90
|
+
raise ValueError("Only one of 'experiments_config_file' or 'experiments_config' should be provided.")
|
91
|
+
|
92
|
+
if experiments_config_file is not None:
|
93
|
+
try:
|
94
|
+
config = load_config(experiments_config_file)
|
95
|
+
except FileNotFoundError:
|
96
|
+
raise FileNotFoundError(f"Configuration file '{experiments_config_file}' not found.")
|
97
|
+
else:
|
98
|
+
config = experiments_config
|
99
|
+
if config is None:
|
100
|
+
raise ValueError("Configuration cannot be None.")
|
101
|
+
if not isinstance(config, dict):
|
102
|
+
raise ValueError("Configuration must be a dictionary.")
|
103
|
+
# if "experiments" not in config:
|
104
|
+
# raise ValueError("Configuration must contain 'experiments' key.")
|
105
|
+
return config
|
106
|
+
|
49
107
|
def __create_pipeline(self, config):
|
50
108
|
type = config.get("type")
|
51
109
|
library = config.get("library")
|
@@ -57,7 +115,7 @@ class MultiPipeline():
|
|
57
115
|
tracking_uri = config.get("tracking_uri")
|
58
116
|
artifact_location = config.get("artifact_location")
|
59
117
|
#new
|
60
|
-
default_model = config.get("default_model")
|
118
|
+
default_model = config.get("default_model")
|
61
119
|
multi_modal = config.get("multi_modal")
|
62
120
|
columns = config.get("columns")
|
63
121
|
ner_data_file = config.get("ner_data_file")
|
ddi_fw/pipeline/pipeline.py
CHANGED
@@ -33,18 +33,18 @@ class Pipeline(BaseModel):
|
|
33
33
|
embedding_pooling_strategy_type: Type[PoolingStrategy] | None = None
|
34
34
|
ner_data_file: Optional[str] = None
|
35
35
|
ner_threshold: Optional[dict] = None
|
36
|
-
combinations: Optional[List[
|
36
|
+
combinations: Optional[List[tuple]] = None
|
37
37
|
model: Optional[Any] = None
|
38
38
|
default_model: Optional[Any] = None
|
39
39
|
multi_modal: Optional[Any] = None
|
40
40
|
use_mlflow: bool = False
|
41
|
-
_dataset: BaseDataset =
|
41
|
+
_dataset: BaseDataset | None = None
|
42
42
|
_items: List = []
|
43
43
|
_train_idx_arr: List | None = []
|
44
44
|
_val_idx_arr: List | None = []
|
45
45
|
|
46
46
|
@property
|
47
|
-
def dataset(self) -> BaseDataset:
|
47
|
+
def dataset(self) -> BaseDataset | None:
|
48
48
|
return self._dataset
|
49
49
|
|
50
50
|
@property
|
@@ -126,14 +126,14 @@ class Pipeline(BaseModel):
|
|
126
126
|
kwargs[k] = v
|
127
127
|
|
128
128
|
|
129
|
-
|
130
|
-
|
129
|
+
self.ner_df = CTakesNER(df=None).load(
|
130
|
+
filename=self.ner_data_file) if self.ner_data_file else None
|
131
131
|
|
132
132
|
dataset_splitter = self.dataset_splitter_type()
|
133
133
|
pooling_strategy = self.embedding_pooling_strategy_type(
|
134
134
|
) if self.embedding_pooling_strategy_type else None
|
135
135
|
if issubclass(self.dataset_type, TextDatasetMixin):
|
136
|
-
|
136
|
+
kwargs["ner_df"] = self.ner_df
|
137
137
|
dataset = self.dataset_type(
|
138
138
|
embedding_dict=self.embedding_dict,
|
139
139
|
pooling_strategy=pooling_strategy,
|
@@ -83,9 +83,9 @@ ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6
|
|
83
83
|
ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
|
84
84
|
ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
|
85
85
|
ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
|
86
|
-
ddi_fw/pipeline/multi_pipeline.py,sha256=
|
86
|
+
ddi_fw/pipeline/multi_pipeline.py,sha256=SZFJ9QSPD_3mcG9NHZOtMqKyNvyWrodsdsLryMyDdUw,8686
|
87
87
|
ddi_fw/pipeline/ner_pipeline.py,sha256=Bp6BA6nozfWFaMHH6jKlzesnCGO6qiMkzdGy_ed6nh0,5947
|
88
|
-
ddi_fw/pipeline/pipeline.py,sha256=
|
88
|
+
ddi_fw/pipeline/pipeline.py,sha256=_sRzMyxGSJo4GhM8ZJhxwvMvKsqFa1WSSECpx4SgdDw,9181
|
89
89
|
ddi_fw/utils/__init__.py,sha256=HC32XkYQTYH_9vt0eX6tqQngEFG-R70hGrYkT-BcHCk,519
|
90
90
|
ddi_fw/utils/categorical_data_encoding_checker.py,sha256=gzb_vUDBrCMUhBxY1fBYTe8hmK72p0_uw3DTga8cqP8,1580
|
91
91
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
@@ -99,7 +99,7 @@ ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,55
|
|
99
99
|
ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
|
100
100
|
ddi_fw/vectorization/feature_vector_generation.py,sha256=EBf-XAiwQwr68az91erEYNegfeqssBR29kVgrliIyac,4765
|
101
101
|
ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
102
|
-
ddi_fw-0.0.
|
103
|
-
ddi_fw-0.0.
|
104
|
-
ddi_fw-0.0.
|
105
|
-
ddi_fw-0.0.
|
102
|
+
ddi_fw-0.0.211.dist-info/METADATA,sha256=HO-fzXxm5AGnMMB8S0hnkOtH18bvaanoIAJgLOCP8gk,2631
|
103
|
+
ddi_fw-0.0.211.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
104
|
+
ddi_fw-0.0.211.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
105
|
+
ddi_fw-0.0.211.dist-info/RECORD,,
|
File without changes
|
File without changes
|