ddi-fw 0.0.217__py3-none-any.whl → 0.0.219__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/datasets/core.py +1 -0
- ddi_fw/datasets/ddi_mdl/base.py +24 -8
- ddi_fw/datasets/mdf_sa_ddi/base.py +266 -55
- ddi_fw/ml/__init__.py +2 -1
- ddi_fw/ml/ml_helper.py +26 -30
- ddi_fw/ml/model_wrapper.py +0 -1
- ddi_fw/ml/tensorflow_wrapper.py +165 -89
- ddi_fw/ml/tracking_service.py +194 -0
- ddi_fw/pipeline/multi_pipeline.py +52 -32
- ddi_fw/pipeline/{multi_pipeline_v2.py → multi_pipeline_org.py} +25 -48
- ddi_fw/pipeline/pipeline.py +38 -96
- ddi_fw/utils/utils.py +51 -51
- {ddi_fw-0.0.217.dist-info → ddi_fw-0.0.219.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.217.dist-info → ddi_fw-0.0.219.dist-info}/RECORD +16 -15
- {ddi_fw-0.0.217.dist-info → ddi_fw-0.0.219.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.217.dist-info → ddi_fw-0.0.219.dist-info}/top_level.txt +0 -0
@@ -107,30 +107,57 @@ class MultiPipeline():
|
|
107
107
|
def __create_pipeline(self, config):
|
108
108
|
type = config.get("type")
|
109
109
|
library = config.get("library")
|
110
|
-
|
111
|
-
use_mlflow = config.get("use_mlflow")
|
112
110
|
experiment_name = config.get("experiment_name")
|
113
111
|
experiment_description = config.get("experiment_description")
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
112
|
+
|
113
|
+
# Tracking configuration
|
114
|
+
tracking_config = config.get("tracking_config", {})
|
115
|
+
tracking_library = tracking_config.get("library")
|
116
|
+
use_tracking = tracking_config.get("use_tracking", False)
|
117
|
+
tracking_params = tracking_config.get("params", {}).get(tracking_library, {})
|
118
|
+
|
119
|
+
# tracking_uri = config.get("tracking_uri")
|
120
|
+
# artifact_location = config.get("artifact_location")
|
121
|
+
|
122
|
+
# Dataset configuration
|
123
|
+
dataset_config = config.get("dataset", {})
|
124
|
+
dataset_type = get_import(dataset_config.get("dataset_type"))
|
125
|
+
dataset_splitter_type = get_import(dataset_config.get("dataset_splitter_type"))
|
126
|
+
columns = dataset_config.get("columns", [])
|
127
|
+
additional_config = dataset_config.get("additional_config", {})
|
128
|
+
|
129
|
+
# Vector database configuration
|
130
|
+
vector_database = config.get("vector_databases", {})
|
131
|
+
vector_db_persist_directory = None
|
132
|
+
vector_db_collection_name = None
|
133
|
+
embedding_pooling_strategy = None
|
134
|
+
column_embedding_configs = None
|
135
|
+
if vector_database:
|
136
|
+
vector_db_persist_directory = vector_database.get("vector_db_persist_directory")
|
137
|
+
vector_db_collection_name = vector_database.get("vector_db_collection_name")
|
138
|
+
embedding_pooling_strategy = get_import(vector_database.get("embedding_pooling_strategy"))
|
139
|
+
column_embedding_configs = vector_database.get("column_embedding_configs")
|
140
|
+
|
141
|
+
# Combination strategy
|
142
|
+
combination_strategy_config = config.get("combination_strategy", {})
|
143
|
+
combination_type = get_import(combination_strategy_config.get("type")) if combination_strategy_config else None
|
144
|
+
kwargs_combination_params = combination_strategy_config.get("params", {})
|
145
|
+
combinations = combination_type(**kwargs_combination_params).generate() if combination_type else []
|
146
|
+
|
147
|
+
# Default model configuration
|
148
|
+
default_model = config.get("default_model", {})
|
149
|
+
default_model_type = get_import(default_model.get("model_type"))
|
150
|
+
default_model_params = default_model.get("params", {})
|
151
|
+
|
119
152
|
multi_modal = config.get("multi_modal")
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
embedding_pooling_strategy = get_import(
|
129
|
-
config.get("embedding_pooling_strategy_type")) if config.get("embedding_pooling_strategy_type") else None
|
130
|
-
# Dynamically import the model and dataset classes
|
131
|
-
# model_type = get_import(config.get("model_type"))
|
132
|
-
dataset_type = get_import(config.get("dataset_type"))
|
133
|
-
dataset_splitter_type = get_import(config.get("dataset_splitter_type"))
|
153
|
+
|
154
|
+
|
155
|
+
|
156
|
+
#ner move it to related dataset
|
157
|
+
|
158
|
+
# ner_data_file = config.get("ner_data_file")
|
159
|
+
# ner_threshold = config.get("ner_threshold")
|
160
|
+
|
134
161
|
|
135
162
|
combination_type = None
|
136
163
|
kwargs_combination_params=None
|
@@ -146,21 +173,18 @@ class MultiPipeline():
|
|
146
173
|
if type == "general":
|
147
174
|
pipeline = Pipeline(
|
148
175
|
library=library,
|
149
|
-
|
176
|
+
tracking_library=tracking_library,
|
177
|
+
tracking_params=tracking_params,
|
150
178
|
experiment_name=experiment_name,
|
151
179
|
experiment_description=experiment_description,
|
152
|
-
experiment_tags=experiment_tags,
|
153
|
-
artifact_location=artifact_location,
|
154
|
-
tracking_uri=tracking_uri,
|
155
180
|
dataset_type=dataset_type,
|
181
|
+
dataset_additional_config=additional_config,
|
156
182
|
dataset_splitter_type=dataset_splitter_type,
|
157
183
|
columns=columns,
|
158
184
|
column_embedding_configs=column_embedding_configs,
|
159
185
|
vector_db_persist_directory=vector_db_persist_directory,
|
160
186
|
vector_db_collection_name=vector_db_collection_name,
|
161
187
|
embedding_pooling_strategy_type=embedding_pooling_strategy,
|
162
|
-
ner_data_file=ner_data_file,
|
163
|
-
ner_threshold=ner_threshold,
|
164
188
|
combinations=combinations,
|
165
189
|
default_model=default_model,
|
166
190
|
multi_modal= multi_modal)
|
@@ -172,14 +196,10 @@ class MultiPipeline():
|
|
172
196
|
experiment_tags=experiment_tags,
|
173
197
|
tracking_uri=tracking_uri,
|
174
198
|
dataset_type=dataset_type,
|
175
|
-
dataset_splitter_type=dataset_splitter_type,
|
176
199
|
umls_code_types = None,
|
177
200
|
text_types = None,
|
178
|
-
|
179
|
-
max_threshold_dict=ner_max_threshold_dict,
|
180
|
-
columns=columns,
|
201
|
+
columns=['tui', 'cui', 'entities'],
|
181
202
|
ner_data_file=ner_data_file,
|
182
|
-
default_model=default_model,
|
183
203
|
multi_modal= multi_modal
|
184
204
|
)
|
185
205
|
|
@@ -107,57 +107,30 @@ class MultiPipeline():
|
|
107
107
|
def __create_pipeline(self, config):
|
108
108
|
type = config.get("type")
|
109
109
|
library = config.get("library")
|
110
|
+
|
111
|
+
use_mlflow = config.get("use_mlflow")
|
110
112
|
experiment_name = config.get("experiment_name")
|
111
113
|
experiment_description = config.get("experiment_description")
|
112
114
|
experiment_tags = config.get("experiment_tags")
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
use_tracking = tracking_config.get("use_tracking", False)
|
118
|
-
tracking_params = tracking_config.get("params", {}).get(tracking_library, {})
|
119
|
-
|
120
|
-
# tracking_uri = config.get("tracking_uri")
|
121
|
-
# artifact_location = config.get("artifact_location")
|
122
|
-
|
123
|
-
# Dataset configuration
|
124
|
-
dataset_config = config.get("dataset", {})
|
125
|
-
dataset_type = get_import(dataset_config.get("dataset_type"))
|
126
|
-
dataset_splitter_type = get_import(dataset_config.get("dataset_splitter_type"))
|
127
|
-
columns = dataset_config.get("columns", [])
|
128
|
-
additional_config = dataset_config.get("additional_config", {})
|
129
|
-
|
130
|
-
# Vector database configuration
|
131
|
-
vector_database = config.get("vector_databases", {})
|
132
|
-
vector_db_persist_directory = None
|
133
|
-
vector_db_collection_name = None
|
134
|
-
embedding_pooling_strategy = None
|
135
|
-
if vector_database:
|
136
|
-
vector_db_persist_directory = vector_database.get("vector_db_persist_directory")
|
137
|
-
vector_db_collection_name = vector_database.get("vector_db_collection_name")
|
138
|
-
embedding_pooling_strategy = get_import(vector_database.get("embedding_pooling_strategy"))
|
139
|
-
column_embedding_configs = vector_database.get("column_embedding_configs")
|
140
|
-
|
141
|
-
# Combination strategy
|
142
|
-
combination_strategy_config = config.get("combination_strategy", {})
|
143
|
-
combination_type = get_import(combination_strategy_config.get("type")) if combination_strategy_config else None
|
144
|
-
kwargs_combination_params = combination_strategy_config.get("params", {})
|
145
|
-
combinations = combination_type(**kwargs_combination_params).generate() if combination_type else []
|
146
|
-
|
147
|
-
# Default model configuration
|
148
|
-
default_model_config = config.get("default_model", {})
|
149
|
-
default_model_type = get_import(default_model_config.get("model_type"))
|
150
|
-
default_model_params = default_model_config.get("params", {})
|
151
|
-
|
115
|
+
tracking_uri = config.get("tracking_uri")
|
116
|
+
artifact_location = config.get("artifact_location")
|
117
|
+
#new
|
118
|
+
default_model = config.get("default_model")
|
152
119
|
multi_modal = config.get("multi_modal")
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
120
|
+
columns = config.get("columns")
|
121
|
+
ner_data_file = config.get("ner_data_file")
|
122
|
+
ner_threshold = config.get("ner_threshold")
|
123
|
+
ner_min_threshold_dict = config.get("ner_min_threshold_dict")
|
124
|
+
ner_max_threshold_dict = config.get("ner_max_threshold_dict")
|
125
|
+
column_embedding_configs = config.get("column_embedding_configs")
|
126
|
+
vector_db_persist_directory = config.get("vector_db_persist_directory")
|
127
|
+
vector_db_collection_name = config.get("vector_db_collection_name")
|
128
|
+
embedding_pooling_strategy = get_import(
|
129
|
+
config.get("embedding_pooling_strategy_type")) if config.get("embedding_pooling_strategy_type") else None
|
130
|
+
# Dynamically import the model and dataset classes
|
131
|
+
# model_type = get_import(config.get("model_type"))
|
132
|
+
dataset_type = get_import(config.get("dataset_type"))
|
133
|
+
dataset_splitter_type = get_import(config.get("dataset_splitter_type"))
|
161
134
|
|
162
135
|
combination_type = None
|
163
136
|
kwargs_combination_params=None
|
@@ -199,10 +172,14 @@ class MultiPipeline():
|
|
199
172
|
experiment_tags=experiment_tags,
|
200
173
|
tracking_uri=tracking_uri,
|
201
174
|
dataset_type=dataset_type,
|
175
|
+
dataset_splitter_type=dataset_splitter_type,
|
202
176
|
umls_code_types = None,
|
203
177
|
text_types = None,
|
204
|
-
|
178
|
+
min_threshold_dict=ner_min_threshold_dict,
|
179
|
+
max_threshold_dict=ner_max_threshold_dict,
|
180
|
+
columns=columns,
|
205
181
|
ner_data_file=ner_data_file,
|
182
|
+
default_model=default_model,
|
206
183
|
multi_modal= multi_modal
|
207
184
|
)
|
208
185
|
|
ddi_fw/pipeline/pipeline.py
CHANGED
@@ -1,29 +1,24 @@
|
|
1
|
-
from typing import Any, Dict, List, Optional, Type
|
1
|
+
from typing import Any, Dict, List, Optional, Type
|
2
2
|
from ddi_fw.datasets.dataset_splitter import DatasetSplitter
|
3
|
-
import numpy as np
|
4
|
-
import pandas as pd
|
5
|
-
import chromadb
|
6
|
-
from collections import defaultdict
|
7
|
-
from chromadb.api.types import IncludeEnum
|
8
3
|
|
9
4
|
from pydantic import BaseModel
|
10
5
|
from ddi_fw.datasets.core import TextDatasetMixin
|
11
|
-
from ddi_fw.
|
6
|
+
from ddi_fw.ml.tracking_service import TrackingService
|
12
7
|
from ddi_fw.langchain.embeddings import PoolingStrategy
|
13
|
-
from ddi_fw.datasets import BaseDataset
|
14
|
-
from ddi_fw.langchain.embeddings import SumPoolingStrategy
|
15
|
-
import mlflow
|
8
|
+
from ddi_fw.datasets import BaseDataset
|
16
9
|
from ddi_fw.ml import MultiModalRunner
|
10
|
+
import logging
|
17
11
|
|
18
12
|
|
19
13
|
class Pipeline(BaseModel):
|
14
|
+
|
20
15
|
library: str = 'tensorflow'
|
21
16
|
experiment_name: str
|
22
17
|
experiment_description: str
|
23
|
-
|
24
|
-
|
25
|
-
tracking_uri: Optional[str] = None
|
18
|
+
tracking_library: str
|
19
|
+
tracking_params: Optional[Dict[str, Any]] = None
|
26
20
|
dataset_type: Type[BaseDataset]
|
21
|
+
dataset_additional_config: Optional[Dict[str, Any]] = None
|
27
22
|
dataset_splitter_type: Type[DatasetSplitter] = DatasetSplitter
|
28
23
|
columns: Optional[List[str]] = None
|
29
24
|
embedding_dict: Optional[Dict[str, Any]] = None
|
@@ -31,22 +26,24 @@ class Pipeline(BaseModel):
|
|
31
26
|
vector_db_persist_directory: Optional[str] = None
|
32
27
|
vector_db_collection_name: Optional[str] = None
|
33
28
|
embedding_pooling_strategy_type: Type[PoolingStrategy] | None = None
|
34
|
-
ner_data_file: Optional[str] = None
|
35
|
-
ner_threshold: Optional[dict] = None
|
36
29
|
combinations: Optional[List[tuple]] = None
|
37
30
|
model: Optional[Any] = None
|
38
31
|
default_model: Optional[Any] = None
|
39
32
|
multi_modal: Optional[Any] = None
|
40
|
-
|
33
|
+
_tracking_service: TrackingService | None = None
|
41
34
|
_dataset: BaseDataset | None = None
|
42
35
|
_items: List = []
|
43
36
|
_train_idx_arr: List | None = []
|
44
37
|
_val_idx_arr: List | None = []
|
45
38
|
|
39
|
+
@property
|
40
|
+
def tracking_service(self) -> TrackingService | None:
|
41
|
+
return self._tracking_service
|
42
|
+
|
46
43
|
@property
|
47
44
|
def dataset(self) -> BaseDataset | None:
|
48
45
|
return self._dataset
|
49
|
-
|
46
|
+
|
50
47
|
@property
|
51
48
|
def items(self) -> List:
|
52
49
|
return self._items
|
@@ -62,56 +59,11 @@ class Pipeline(BaseModel):
|
|
62
59
|
class Config:
|
63
60
|
arbitrary_types_allowed = True
|
64
61
|
|
65
|
-
#
|
66
|
-
# """
|
67
|
-
# Fetch embeddings and metadata from a persistent Chroma vector database and update the provided embedding_dict.
|
68
|
-
|
69
|
-
# Args:
|
70
|
-
# - vector_db_persist_directory (str): The path to the directory where the Chroma vector database is stored.
|
71
|
-
# - vector_db_collection_name (str): The name of the collection to query.
|
72
|
-
# - embedding_dict (dict): The existing dictionary to update with embeddings.
|
73
|
-
|
74
|
-
# """
|
75
|
-
# if vector_db_persist_directory:
|
76
|
-
# # Initialize the Chroma client and get the collection
|
77
|
-
# vector_db = chromadb.PersistentClient(
|
78
|
-
# path=vector_db_persist_directory)
|
79
|
-
# collection = vector_db.get_collection(vector_db_collection_name)
|
80
|
-
# include = [IncludeEnum.embeddings, IncludeEnum.metadatas]
|
81
|
-
# dictionary: chromadb.GetResult
|
82
|
-
# # Fetch the embeddings and metadata
|
83
|
-
# if column == None:
|
84
|
-
# dictionary = collection.get(
|
85
|
-
# include=include
|
86
|
-
# # include=['embeddings', 'metadatas']
|
87
|
-
# )
|
88
|
-
# print(
|
89
|
-
# f"Embeddings are calculated from {vector_db_collection_name}")
|
90
|
-
# else:
|
91
|
-
# dictionary = collection.get(
|
92
|
-
# include=include,
|
93
|
-
# # include=['embeddings', 'metadatas'],
|
94
|
-
# where={
|
95
|
-
# "type": {"$eq": f"{column}"}})
|
96
|
-
# print(
|
97
|
-
# f"Embeddings of {column} are calculated from {vector_db_collection_name}")
|
98
|
-
|
99
|
-
# # Populate the embedding dictionary with embeddings from the vector database
|
100
|
-
# metadatas = dictionary["metadatas"]
|
101
|
-
# embeddings = dictionary["embeddings"]
|
102
|
-
# if metadatas is None or embeddings is None:
|
103
|
-
# raise ValueError(
|
104
|
-
# "The collection does not contain embeddings or metadatas.")
|
105
|
-
# for metadata, embedding in zip(metadatas, embeddings):
|
106
|
-
# embedding_dict[metadata["type"]
|
107
|
-
# ][metadata["id"]].append(embedding)
|
108
|
-
|
109
|
-
# else:
|
110
|
-
# raise ValueError(
|
111
|
-
# "Persistent directory for the vector DB is not specified.")
|
112
|
-
|
113
|
-
#TODO embedding'leri set etme kimin görevi
|
62
|
+
# TODO embedding'leri set etme kimin görevi
|
114
63
|
def build(self):
|
64
|
+
self._tracking_service = TrackingService(self.experiment_name,
|
65
|
+
backend=self.tracking_library, tracking_params=self.tracking_params)
|
66
|
+
|
115
67
|
if self.embedding_pooling_strategy_type is not None and not isinstance(self.embedding_pooling_strategy_type, type):
|
116
68
|
raise TypeError(
|
117
69
|
"self.embedding_pooling_strategy_type must be a class, not an instance")
|
@@ -120,29 +72,29 @@ class Pipeline(BaseModel):
|
|
120
72
|
"self.dataset_type must be a class, not an instance")
|
121
73
|
|
122
74
|
# 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
|
123
|
-
kwargs = {"columns": self.columns
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
75
|
+
kwargs = {"columns": self.columns,
|
76
|
+
"additional_config": self.dataset_additional_config}
|
77
|
+
# DDIMDL Dataset'e aktar
|
78
|
+
# if self.ner_threshold:
|
79
|
+
# for k, v in self.ner_threshold.items():
|
80
|
+
# kwargs[k] = v
|
128
81
|
|
129
|
-
ner_df = CTakesNER(df=None).load(
|
130
|
-
|
82
|
+
# ner_df = CTakesNER(df=None).load(
|
83
|
+
# filename=self.ner_data_file) if self.ner_data_file else None
|
131
84
|
|
132
85
|
dataset_splitter = self.dataset_splitter_type()
|
133
86
|
pooling_strategy = self.embedding_pooling_strategy_type(
|
134
|
-
|
87
|
+
) if self.embedding_pooling_strategy_type else None
|
135
88
|
if issubclass(self.dataset_type, TextDatasetMixin):
|
136
|
-
kwargs["ner_df"] = ner_df
|
137
89
|
dataset = self.dataset_type(
|
138
|
-
embedding_dict=self.embedding_dict,
|
90
|
+
embedding_dict=self.embedding_dict,
|
139
91
|
pooling_strategy=pooling_strategy,
|
140
92
|
column_embedding_configs=self.column_embedding_configs,
|
141
93
|
vector_db_persist_directory=self.vector_db_persist_directory,
|
142
94
|
vector_db_collection_name=self.vector_db_collection_name,
|
143
95
|
dataset_splitter_type=self.dataset_splitter_type,
|
144
96
|
**kwargs)
|
145
|
-
|
97
|
+
|
146
98
|
elif self.dataset_type == BaseDataset:
|
147
99
|
dataset = self.dataset_type(
|
148
100
|
dataset_splitter_type=self.dataset_splitter_type,
|
@@ -151,11 +103,11 @@ class Pipeline(BaseModel):
|
|
151
103
|
dataset = self.dataset_type(**kwargs)
|
152
104
|
|
153
105
|
# X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
|
154
|
-
|
106
|
+
|
155
107
|
dataset.load()
|
156
|
-
|
108
|
+
|
157
109
|
self._dataset = dataset
|
158
|
-
|
110
|
+
|
159
111
|
dataframe = dataset.dataframe
|
160
112
|
|
161
113
|
# Check if any of the arrays are None or empty
|
@@ -180,25 +132,15 @@ class Pipeline(BaseModel):
|
|
180
132
|
return self
|
181
133
|
|
182
134
|
def run(self):
|
183
|
-
if self.
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
if mlflow.get_experiment_by_name(self.experiment_name) == None:
|
189
|
-
mlflow.create_experiment(
|
190
|
-
self.experiment_name, self.artifact_location)
|
191
|
-
if self.experiment_tags is not None:
|
192
|
-
mlflow.set_experiment_tags(self.experiment_tags)
|
193
|
-
mlflow.set_experiment(self.experiment_name)
|
135
|
+
if self._tracking_service is None:
|
136
|
+
logging.warning("Tracking service is not initialized.")
|
137
|
+
else:
|
138
|
+
self._tracking_service.setup()
|
194
139
|
|
195
140
|
y_test_label = self.items[0][4]
|
196
141
|
multi_modal_runner = MultiModalRunner(
|
197
|
-
library=self.library, multi_modal=self.multi_modal, default_model=
|
198
|
-
|
199
|
-
# library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
|
200
|
-
# multi_modal = TFMultiModal(
|
201
|
-
# model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
|
142
|
+
library=self.library, multi_modal=self.multi_modal, default_model=self.default_model, tracking_service=self._tracking_service)
|
143
|
+
|
202
144
|
multi_modal_runner.set_data(
|
203
145
|
self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
|
204
146
|
combinations = self.combinations if self.combinations is not None else []
|
ddi_fw/utils/utils.py
CHANGED
@@ -2,7 +2,7 @@ import gzip
|
|
2
2
|
import json
|
3
3
|
import os
|
4
4
|
from datetime import datetime, timezone
|
5
|
-
from matplotlib import pyplot as plt
|
5
|
+
# from matplotlib import pyplot as plt
|
6
6
|
import shutil
|
7
7
|
|
8
8
|
def create_folder_if_not_exists(path):
|
@@ -65,53 +65,53 @@ def clear_directory(directory_path):
|
|
65
65
|
print(f"The directory does not exist: {directory_path}")
|
66
66
|
|
67
67
|
|
68
|
-
if __name__ == "__main__":
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
68
|
+
# if __name__ == "__main__":
|
69
|
+
# # json_file = f'C:\\Users\\kivanc\\Downloads\\metrics.json'
|
70
|
+
# # file_data = open(json_file, "r", 1).read()
|
71
|
+
# # a = json.loads(file_data) # store in json structure
|
72
|
+
# # # a = {'key1':1, 'key2':2}
|
73
|
+
# # compressed = compress_data(a)
|
74
|
+
# # with gzip.open('deneme.gzip', 'wb') as f:
|
75
|
+
# # f.write(compressed)
|
76
|
+
|
77
|
+
# # with gzip.open('deneme.gzip', 'r') as fin: # 4. gzip
|
78
|
+
# # json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
|
79
|
+
# # json_bytes = gzip.decompress(json_bytes)
|
80
|
+
# # json_str = json_bytes.decode('UTF-8') # 2. string (i.e. JSON)
|
81
|
+
# # data = json.loads(json_str)
|
82
|
+
# # print(data)
|
83
|
+
|
84
|
+
# gzip_file = f'C:\\Users\\kivanc\\Downloads\\metrics (2).gzip'
|
85
|
+
# stored_file = f'C:\\Users\\kivanc\\Downloads\\save.png'
|
86
|
+
# metrics = decompress(gzip_file)
|
87
|
+
# # print(metrics)
|
88
|
+
|
89
|
+
# # Plot Precision-Recall curves for each class and micro-average
|
90
|
+
# fig = plt.figure()
|
91
|
+
# plt.step(metrics['recall']['micro_event'], metrics['precision']['micro_event'],
|
92
|
+
# color='b', alpha=0.2, where='post')
|
93
|
+
# plt.fill_between(
|
94
|
+
# metrics['recall']["micro_event"], metrics['precision']["micro_event"], step='post', alpha=0.2, color='b')
|
95
|
+
|
96
|
+
# # for i in range(65):
|
97
|
+
# # plt.step( metrics['recall'][str(i)], metrics['precision'][str(i)], where='post',
|
98
|
+
# # label='Class {0} (AUC={1:0.2f})'.format(i, metrics['roc_aupr'][str(i)]))
|
99
|
+
|
100
|
+
# plt.xlabel('Recall')
|
101
|
+
# plt.ylabel('Precision')
|
102
|
+
# plt.ylim([0.0, 1.05])
|
103
|
+
# plt.xlim([0.0, 1.0])
|
104
|
+
# plt.title(
|
105
|
+
# 'Micro-average Precision-Recall curve: AUC={0:0.2f}'.format(metrics['roc_aupr']["micro"]))
|
106
|
+
# plt.legend(loc='best')
|
107
|
+
# plt.savefig(stored_file)
|
108
|
+
# # plt.show()
|
109
|
+
|
110
|
+
# import plotly.express as px
|
111
|
+
# import pandas as pd
|
112
|
+
# df = pd.DataFrame(dict(
|
113
|
+
# r=[1, 5, 2, 2, 3],
|
114
|
+
# theta=['processing cost','mechanical properties','chemical stability',
|
115
|
+
# 'thermal stability', 'device integration']))
|
116
|
+
# fig = px.line_polar(df, r='r', theta='theta', line_close=True)
|
117
|
+
# fig.show()
|
@@ -1,9 +1,9 @@
|
|
1
1
|
ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
|
2
|
-
ddi_fw/datasets/core.py,sha256=
|
2
|
+
ddi_fw/datasets/core.py,sha256=12S7gJnWlxBvUwhQ5HoBy1ILwTW1xm9r1v6P9pPgu_Y,16936
|
3
3
|
ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
|
4
4
|
ddi_fw/datasets/db_utils.py,sha256=xRj28U_uXTRPHcz3yIICczFUHXUPiAOZtAj5BM6kH44,6465
|
5
5
|
ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
|
6
|
-
ddi_fw/datasets/ddi_mdl/base.py,sha256=
|
6
|
+
ddi_fw/datasets/ddi_mdl/base.py,sha256=_45xa9oo5mBY5gooIy3hxlHBVJcx4NFRjzMCBIMllvA,11247
|
7
7
|
ddi_fw/datasets/ddi_mdl/debug.log,sha256=eWz05j8RFqZuHFDTCF7Rck5w4rvtTanFN21iZsgxO7Y,115
|
8
8
|
ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
|
9
9
|
ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
|
@@ -46,7 +46,7 @@ ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt,sha256=fFJbN0DbKH4mve
|
|
46
46
|
ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt,sha256=NhiLF_5INQCpjOlE-RIxDKy7rYwksLdx60L6HCmDKoY,81247
|
47
47
|
ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt,sha256=bPvMCJVy7jtcaYbR-5bmdB6s7gT8NSfK2wDC7iJ0O10,81308
|
48
48
|
ddi_fw/datasets/mdf_sa_ddi/__init__.py,sha256=UEFBM92y2aJjlMJw4Jx405tOAwJ88r_nHAVgAszSjuo,68
|
49
|
-
ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=
|
49
|
+
ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=CRhcbADi_CkRyXY2Eq7fYXH-ywmDhrTTB67JLKbk8F4,15907
|
50
50
|
ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv,sha256=EOOLF_0vVVzShoofcGYlOzpztlM1m9jJdftepHicix4,25787699
|
51
51
|
ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv,sha256=lpuMz5KxPsG6MKNuIIUmT5cZquWHQiIao8tXlmOHzq8,381321
|
52
52
|
ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip,sha256=DfN8mczGvWba2y45cPqtWtXjUDXy49VOtRfpcb0tn8c,4382827
|
@@ -72,21 +72,22 @@ ddi_fw/langchain/__init__.py,sha256=zS0CQrakWEP19biSRewFJGcBT8WBZq4899HrEKiMqUY,
|
|
72
72
|
ddi_fw/langchain/embeddings.py,sha256=eEWy4okcjdhUJHi4N48Wd8XauPXyeaQVLUdNWEvtEcY,6754
|
73
73
|
ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
|
74
74
|
ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
|
75
|
-
ddi_fw/ml/__init__.py,sha256=
|
75
|
+
ddi_fw/ml/__init__.py,sha256=FteYEawCkVQOaK-cTv2VrHZ2ZnfeFr31BD6VucO7_DQ,268
|
76
76
|
ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5M,11581
|
77
|
-
ddi_fw/ml/ml_helper.py,sha256=
|
78
|
-
ddi_fw/ml/model_wrapper.py,sha256=
|
77
|
+
ddi_fw/ml/ml_helper.py,sha256=RbFUz4kLkqv6WUXXscDFtYBMWcmv7uWOf8YIR5DBfqA,7701
|
78
|
+
ddi_fw/ml/model_wrapper.py,sha256=38uBdHI4H_sjDKPWuhGXovUy_L1tpSNm5tEqCtwmlpY,973
|
79
79
|
ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
|
80
|
-
ddi_fw/ml/tensorflow_wrapper.py,sha256=
|
80
|
+
ddi_fw/ml/tensorflow_wrapper.py,sha256=Zdf1FmJ9488pFBG4xBLkC5GzyLv9tn5OrdFzHGHPmCM,16172
|
81
|
+
ddi_fw/ml/tracking_service.py,sha256=eHWFI3lyQX_xM16CRekgITwldHj2RBMYl5XG8lD8Zks,7508
|
81
82
|
ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
|
82
83
|
ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6859
|
83
84
|
ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
|
84
85
|
ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
|
85
86
|
ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
|
86
|
-
ddi_fw/pipeline/multi_pipeline.py,sha256=
|
87
|
-
ddi_fw/pipeline/
|
87
|
+
ddi_fw/pipeline/multi_pipeline.py,sha256=npJUXYT31fxD6kpJKSeixjbH5jNfPUwIVG7lRdBszRg,9852
|
88
|
+
ddi_fw/pipeline/multi_pipeline_org.py,sha256=AbErwu05-3YIPnCcXRsj-jxPJG8HG2H7cMZlGjzaYa8,9037
|
88
89
|
ddi_fw/pipeline/ner_pipeline.py,sha256=yp-Met2794EKcgr8_3gqt03l4v2efOdaZuAcIXTubvQ,5780
|
89
|
-
ddi_fw/pipeline/pipeline.py,sha256=
|
90
|
+
ddi_fw/pipeline/pipeline.py,sha256=IxqvIy2thLQyO-0Qon1JAKT3k8mLk5OpTGE25ZTTsOQ,6133
|
90
91
|
ddi_fw/utils/__init__.py,sha256=WNxkQXk-694roG50D355TGLXstfdWVb_tUyr-PM-8rg,537
|
91
92
|
ddi_fw/utils/categorical_data_encoding_checker.py,sha256=T1X70Rh4atucAuqyUZmz-iFULllY9dY0NRyV9-jTjJ0,3438
|
92
93
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
@@ -95,12 +96,12 @@ ddi_fw/utils/kaggle.py,sha256=wKRJ18KpQ6P-CubpZklEgsDtyFpR9RUL1_HyyF6ttEE,2425
|
|
95
96
|
ddi_fw/utils/numpy_utils.py,sha256=gd1WNq5NpWD2MBEMTtFuS5I0h8B6FAUNcq6BVOlxdhY,797
|
96
97
|
ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5I,1056
|
97
98
|
ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
|
98
|
-
ddi_fw/utils/utils.py,sha256=
|
99
|
+
ddi_fw/utils/utils.py,sha256=PY-zDawREKoXQfzX7lVkxBLVFQPkfvr9385kHCjaNXo,4391
|
99
100
|
ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
|
100
101
|
ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
|
101
102
|
ddi_fw/vectorization/feature_vector_generation.py,sha256=EBf-XAiwQwr68az91erEYNegfeqssBR29kVgrliIyac,4765
|
102
103
|
ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
|
103
|
-
ddi_fw-0.0.
|
104
|
-
ddi_fw-0.0.
|
105
|
-
ddi_fw-0.0.
|
106
|
-
ddi_fw-0.0.
|
104
|
+
ddi_fw-0.0.219.dist-info/METADATA,sha256=kPgF9a4rw8e9GoqXTQ1_fwb3i8L1RLBWUClZEcfeQnM,2631
|
105
|
+
ddi_fw-0.0.219.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
106
|
+
ddi_fw-0.0.219.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
107
|
+
ddi_fw-0.0.219.dist-info/RECORD,,
|
File without changes
|
File without changes
|