ddi-fw 0.0.217__py3-none-any.whl → 0.0.219__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -107,30 +107,57 @@ class MultiPipeline():
107
107
  def __create_pipeline(self, config):
108
108
  type = config.get("type")
109
109
  library = config.get("library")
110
-
111
- use_mlflow = config.get("use_mlflow")
112
110
  experiment_name = config.get("experiment_name")
113
111
  experiment_description = config.get("experiment_description")
114
- experiment_tags = config.get("experiment_tags")
115
- tracking_uri = config.get("tracking_uri")
116
- artifact_location = config.get("artifact_location")
117
- #new
118
- default_model = config.get("default_model")
112
+
113
+ # Tracking configuration
114
+ tracking_config = config.get("tracking_config", {})
115
+ tracking_library = tracking_config.get("library")
116
+ use_tracking = tracking_config.get("use_tracking", False)
117
+ tracking_params = tracking_config.get("params", {}).get(tracking_library, {})
118
+
119
+ # tracking_uri = config.get("tracking_uri")
120
+ # artifact_location = config.get("artifact_location")
121
+
122
+ # Dataset configuration
123
+ dataset_config = config.get("dataset", {})
124
+ dataset_type = get_import(dataset_config.get("dataset_type"))
125
+ dataset_splitter_type = get_import(dataset_config.get("dataset_splitter_type"))
126
+ columns = dataset_config.get("columns", [])
127
+ additional_config = dataset_config.get("additional_config", {})
128
+
129
+ # Vector database configuration
130
+ vector_database = config.get("vector_databases", {})
131
+ vector_db_persist_directory = None
132
+ vector_db_collection_name = None
133
+ embedding_pooling_strategy = None
134
+ column_embedding_configs = None
135
+ if vector_database:
136
+ vector_db_persist_directory = vector_database.get("vector_db_persist_directory")
137
+ vector_db_collection_name = vector_database.get("vector_db_collection_name")
138
+ embedding_pooling_strategy = get_import(vector_database.get("embedding_pooling_strategy"))
139
+ column_embedding_configs = vector_database.get("column_embedding_configs")
140
+
141
+ # Combination strategy
142
+ combination_strategy_config = config.get("combination_strategy", {})
143
+ combination_type = get_import(combination_strategy_config.get("type")) if combination_strategy_config else None
144
+ kwargs_combination_params = combination_strategy_config.get("params", {})
145
+ combinations = combination_type(**kwargs_combination_params).generate() if combination_type else []
146
+
147
+ # Default model configuration
148
+ default_model = config.get("default_model", {})
149
+ default_model_type = get_import(default_model.get("model_type"))
150
+ default_model_params = default_model.get("params", {})
151
+
119
152
  multi_modal = config.get("multi_modal")
120
- columns = config.get("columns")
121
- ner_data_file = config.get("ner_data_file")
122
- ner_threshold = config.get("ner_threshold")
123
- ner_min_threshold_dict = config.get("ner_min_threshold_dict")
124
- ner_max_threshold_dict = config.get("ner_max_threshold_dict")
125
- column_embedding_configs = config.get("column_embedding_configs")
126
- vector_db_persist_directory = config.get("vector_db_persist_directory")
127
- vector_db_collection_name = config.get("vector_db_collection_name")
128
- embedding_pooling_strategy = get_import(
129
- config.get("embedding_pooling_strategy_type")) if config.get("embedding_pooling_strategy_type") else None
130
- # Dynamically import the model and dataset classes
131
- # model_type = get_import(config.get("model_type"))
132
- dataset_type = get_import(config.get("dataset_type"))
133
- dataset_splitter_type = get_import(config.get("dataset_splitter_type"))
153
+
154
+
155
+
156
+ #ner move it to related dataset
157
+
158
+ # ner_data_file = config.get("ner_data_file")
159
+ # ner_threshold = config.get("ner_threshold")
160
+
134
161
 
135
162
  combination_type = None
136
163
  kwargs_combination_params=None
@@ -146,21 +173,18 @@ class MultiPipeline():
146
173
  if type == "general":
147
174
  pipeline = Pipeline(
148
175
  library=library,
149
- use_mlflow=use_mlflow,
176
+ tracking_library=tracking_library,
177
+ tracking_params=tracking_params,
150
178
  experiment_name=experiment_name,
151
179
  experiment_description=experiment_description,
152
- experiment_tags=experiment_tags,
153
- artifact_location=artifact_location,
154
- tracking_uri=tracking_uri,
155
180
  dataset_type=dataset_type,
181
+ dataset_additional_config=additional_config,
156
182
  dataset_splitter_type=dataset_splitter_type,
157
183
  columns=columns,
158
184
  column_embedding_configs=column_embedding_configs,
159
185
  vector_db_persist_directory=vector_db_persist_directory,
160
186
  vector_db_collection_name=vector_db_collection_name,
161
187
  embedding_pooling_strategy_type=embedding_pooling_strategy,
162
- ner_data_file=ner_data_file,
163
- ner_threshold=ner_threshold,
164
188
  combinations=combinations,
165
189
  default_model=default_model,
166
190
  multi_modal= multi_modal)
@@ -172,14 +196,10 @@ class MultiPipeline():
172
196
  experiment_tags=experiment_tags,
173
197
  tracking_uri=tracking_uri,
174
198
  dataset_type=dataset_type,
175
- dataset_splitter_type=dataset_splitter_type,
176
199
  umls_code_types = None,
177
200
  text_types = None,
178
- min_threshold_dict=ner_min_threshold_dict,
179
- max_threshold_dict=ner_max_threshold_dict,
180
- columns=columns,
201
+ columns=['tui', 'cui', 'entities'],
181
202
  ner_data_file=ner_data_file,
182
- default_model=default_model,
183
203
  multi_modal= multi_modal
184
204
  )
185
205
 
@@ -107,57 +107,30 @@ class MultiPipeline():
107
107
  def __create_pipeline(self, config):
108
108
  type = config.get("type")
109
109
  library = config.get("library")
110
+
111
+ use_mlflow = config.get("use_mlflow")
110
112
  experiment_name = config.get("experiment_name")
111
113
  experiment_description = config.get("experiment_description")
112
114
  experiment_tags = config.get("experiment_tags")
113
-
114
- # Tracking configuration
115
- tracking_config = config.get("tracking_config", {})
116
- tracking_library = tracking_config.get("library")
117
- use_tracking = tracking_config.get("use_tracking", False)
118
- tracking_params = tracking_config.get("params", {}).get(tracking_library, {})
119
-
120
- # tracking_uri = config.get("tracking_uri")
121
- # artifact_location = config.get("artifact_location")
122
-
123
- # Dataset configuration
124
- dataset_config = config.get("dataset", {})
125
- dataset_type = get_import(dataset_config.get("dataset_type"))
126
- dataset_splitter_type = get_import(dataset_config.get("dataset_splitter_type"))
127
- columns = dataset_config.get("columns", [])
128
- additional_config = dataset_config.get("additional_config", {})
129
-
130
- # Vector database configuration
131
- vector_database = config.get("vector_databases", {})
132
- vector_db_persist_directory = None
133
- vector_db_collection_name = None
134
- embedding_pooling_strategy = None
135
- if vector_database:
136
- vector_db_persist_directory = vector_database.get("vector_db_persist_directory")
137
- vector_db_collection_name = vector_database.get("vector_db_collection_name")
138
- embedding_pooling_strategy = get_import(vector_database.get("embedding_pooling_strategy"))
139
- column_embedding_configs = vector_database.get("column_embedding_configs")
140
-
141
- # Combination strategy
142
- combination_strategy_config = config.get("combination_strategy", {})
143
- combination_type = get_import(combination_strategy_config.get("type")) if combination_strategy_config else None
144
- kwargs_combination_params = combination_strategy_config.get("params", {})
145
- combinations = combination_type(**kwargs_combination_params).generate() if combination_type else []
146
-
147
- # Default model configuration
148
- default_model_config = config.get("default_model", {})
149
- default_model_type = get_import(default_model_config.get("model_type"))
150
- default_model_params = default_model_config.get("params", {})
151
-
115
+ tracking_uri = config.get("tracking_uri")
116
+ artifact_location = config.get("artifact_location")
117
+ #new
118
+ default_model = config.get("default_model")
152
119
  multi_modal = config.get("multi_modal")
153
-
154
-
155
-
156
- #ner move it to related dataset
157
-
158
- # ner_data_file = config.get("ner_data_file")
159
- # ner_threshold = config.get("ner_threshold")
160
-
120
+ columns = config.get("columns")
121
+ ner_data_file = config.get("ner_data_file")
122
+ ner_threshold = config.get("ner_threshold")
123
+ ner_min_threshold_dict = config.get("ner_min_threshold_dict")
124
+ ner_max_threshold_dict = config.get("ner_max_threshold_dict")
125
+ column_embedding_configs = config.get("column_embedding_configs")
126
+ vector_db_persist_directory = config.get("vector_db_persist_directory")
127
+ vector_db_collection_name = config.get("vector_db_collection_name")
128
+ embedding_pooling_strategy = get_import(
129
+ config.get("embedding_pooling_strategy_type")) if config.get("embedding_pooling_strategy_type") else None
130
+ # Dynamically import the model and dataset classes
131
+ # model_type = get_import(config.get("model_type"))
132
+ dataset_type = get_import(config.get("dataset_type"))
133
+ dataset_splitter_type = get_import(config.get("dataset_splitter_type"))
161
134
 
162
135
  combination_type = None
163
136
  kwargs_combination_params=None
@@ -199,10 +172,14 @@ class MultiPipeline():
199
172
  experiment_tags=experiment_tags,
200
173
  tracking_uri=tracking_uri,
201
174
  dataset_type=dataset_type,
175
+ dataset_splitter_type=dataset_splitter_type,
202
176
  umls_code_types = None,
203
177
  text_types = None,
204
- columns=['tui', 'cui', 'entities'],
178
+ min_threshold_dict=ner_min_threshold_dict,
179
+ max_threshold_dict=ner_max_threshold_dict,
180
+ columns=columns,
205
181
  ner_data_file=ner_data_file,
182
+ default_model=default_model,
206
183
  multi_modal= multi_modal
207
184
  )
208
185
 
@@ -1,29 +1,24 @@
1
- from typing import Any, Dict, List, Optional, Type, Union
1
+ from typing import Any, Dict, List, Optional, Type
2
2
  from ddi_fw.datasets.dataset_splitter import DatasetSplitter
3
- import numpy as np
4
- import pandas as pd
5
- import chromadb
6
- from collections import defaultdict
7
- from chromadb.api.types import IncludeEnum
8
3
 
9
4
  from pydantic import BaseModel
10
5
  from ddi_fw.datasets.core import TextDatasetMixin
11
- from ddi_fw.ner.ner import CTakesNER
6
+ from ddi_fw.ml.tracking_service import TrackingService
12
7
  from ddi_fw.langchain.embeddings import PoolingStrategy
13
- from ddi_fw.datasets import BaseDataset, DDIMDLDataset
14
- from ddi_fw.langchain.embeddings import SumPoolingStrategy
15
- import mlflow
8
+ from ddi_fw.datasets import BaseDataset
16
9
  from ddi_fw.ml import MultiModalRunner
10
+ import logging
17
11
 
18
12
 
19
13
  class Pipeline(BaseModel):
14
+
20
15
  library: str = 'tensorflow'
21
16
  experiment_name: str
22
17
  experiment_description: str
23
- experiment_tags: Optional[Dict[str, Any]] = None
24
- artifact_location: Optional[str] = None
25
- tracking_uri: Optional[str] = None
18
+ tracking_library: str
19
+ tracking_params: Optional[Dict[str, Any]] = None
26
20
  dataset_type: Type[BaseDataset]
21
+ dataset_additional_config: Optional[Dict[str, Any]] = None
27
22
  dataset_splitter_type: Type[DatasetSplitter] = DatasetSplitter
28
23
  columns: Optional[List[str]] = None
29
24
  embedding_dict: Optional[Dict[str, Any]] = None
@@ -31,22 +26,24 @@ class Pipeline(BaseModel):
31
26
  vector_db_persist_directory: Optional[str] = None
32
27
  vector_db_collection_name: Optional[str] = None
33
28
  embedding_pooling_strategy_type: Type[PoolingStrategy] | None = None
34
- ner_data_file: Optional[str] = None
35
- ner_threshold: Optional[dict] = None
36
29
  combinations: Optional[List[tuple]] = None
37
30
  model: Optional[Any] = None
38
31
  default_model: Optional[Any] = None
39
32
  multi_modal: Optional[Any] = None
40
- use_mlflow: bool = False
33
+ _tracking_service: TrackingService | None = None
41
34
  _dataset: BaseDataset | None = None
42
35
  _items: List = []
43
36
  _train_idx_arr: List | None = []
44
37
  _val_idx_arr: List | None = []
45
38
 
39
+ @property
40
+ def tracking_service(self) -> TrackingService | None:
41
+ return self._tracking_service
42
+
46
43
  @property
47
44
  def dataset(self) -> BaseDataset | None:
48
45
  return self._dataset
49
-
46
+
50
47
  @property
51
48
  def items(self) -> List:
52
49
  return self._items
@@ -62,56 +59,11 @@ class Pipeline(BaseModel):
62
59
  class Config:
63
60
  arbitrary_types_allowed = True
64
61
 
65
- # def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
66
- # """
67
- # Fetch embeddings and metadata from a persistent Chroma vector database and update the provided embedding_dict.
68
-
69
- # Args:
70
- # - vector_db_persist_directory (str): The path to the directory where the Chroma vector database is stored.
71
- # - vector_db_collection_name (str): The name of the collection to query.
72
- # - embedding_dict (dict): The existing dictionary to update with embeddings.
73
-
74
- # """
75
- # if vector_db_persist_directory:
76
- # # Initialize the Chroma client and get the collection
77
- # vector_db = chromadb.PersistentClient(
78
- # path=vector_db_persist_directory)
79
- # collection = vector_db.get_collection(vector_db_collection_name)
80
- # include = [IncludeEnum.embeddings, IncludeEnum.metadatas]
81
- # dictionary: chromadb.GetResult
82
- # # Fetch the embeddings and metadata
83
- # if column == None:
84
- # dictionary = collection.get(
85
- # include=include
86
- # # include=['embeddings', 'metadatas']
87
- # )
88
- # print(
89
- # f"Embeddings are calculated from {vector_db_collection_name}")
90
- # else:
91
- # dictionary = collection.get(
92
- # include=include,
93
- # # include=['embeddings', 'metadatas'],
94
- # where={
95
- # "type": {"$eq": f"{column}"}})
96
- # print(
97
- # f"Embeddings of {column} are calculated from {vector_db_collection_name}")
98
-
99
- # # Populate the embedding dictionary with embeddings from the vector database
100
- # metadatas = dictionary["metadatas"]
101
- # embeddings = dictionary["embeddings"]
102
- # if metadatas is None or embeddings is None:
103
- # raise ValueError(
104
- # "The collection does not contain embeddings or metadatas.")
105
- # for metadata, embedding in zip(metadatas, embeddings):
106
- # embedding_dict[metadata["type"]
107
- # ][metadata["id"]].append(embedding)
108
-
109
- # else:
110
- # raise ValueError(
111
- # "Persistent directory for the vector DB is not specified.")
112
-
113
- #TODO embedding'leri set etme kimin görevi
62
+ # TODO embedding'leri set etme kimin görevi
114
63
  def build(self):
64
+ self._tracking_service = TrackingService(self.experiment_name,
65
+ backend=self.tracking_library, tracking_params=self.tracking_params)
66
+
115
67
  if self.embedding_pooling_strategy_type is not None and not isinstance(self.embedding_pooling_strategy_type, type):
116
68
  raise TypeError(
117
69
  "self.embedding_pooling_strategy_type must be a class, not an instance")
@@ -120,29 +72,29 @@ class Pipeline(BaseModel):
120
72
  "self.dataset_type must be a class, not an instance")
121
73
 
122
74
  # 'enzyme','target','pathway','smile','all_text','indication', 'description','mechanism_of_action','pharmacodynamics', 'tui', 'cui', 'entities'
123
- kwargs = {"columns": self.columns}
124
- if self.ner_threshold:
125
- for k, v in self.ner_threshold.items():
126
- kwargs[k] = v
127
-
75
+ kwargs = {"columns": self.columns,
76
+ "additional_config": self.dataset_additional_config}
77
+ # DDIMDL Dataset'e aktar
78
+ # if self.ner_threshold:
79
+ # for k, v in self.ner_threshold.items():
80
+ # kwargs[k] = v
128
81
 
129
- ner_df = CTakesNER(df=None).load(
130
- filename=self.ner_data_file) if self.ner_data_file else None
82
+ # ner_df = CTakesNER(df=None).load(
83
+ # filename=self.ner_data_file) if self.ner_data_file else None
131
84
 
132
85
  dataset_splitter = self.dataset_splitter_type()
133
86
  pooling_strategy = self.embedding_pooling_strategy_type(
134
- ) if self.embedding_pooling_strategy_type else None
87
+ ) if self.embedding_pooling_strategy_type else None
135
88
  if issubclass(self.dataset_type, TextDatasetMixin):
136
- kwargs["ner_df"] = ner_df
137
89
  dataset = self.dataset_type(
138
- embedding_dict=self.embedding_dict,
90
+ embedding_dict=self.embedding_dict,
139
91
  pooling_strategy=pooling_strategy,
140
92
  column_embedding_configs=self.column_embedding_configs,
141
93
  vector_db_persist_directory=self.vector_db_persist_directory,
142
94
  vector_db_collection_name=self.vector_db_collection_name,
143
95
  dataset_splitter_type=self.dataset_splitter_type,
144
96
  **kwargs)
145
-
97
+
146
98
  elif self.dataset_type == BaseDataset:
147
99
  dataset = self.dataset_type(
148
100
  dataset_splitter_type=self.dataset_splitter_type,
@@ -151,11 +103,11 @@ class Pipeline(BaseModel):
151
103
  dataset = self.dataset_type(**kwargs)
152
104
 
153
105
  # X_train, X_test, y_train, y_test, train_indexes, test_indexes, train_idx_arr, val_idx_arr = dataset.load()
154
-
106
+
155
107
  dataset.load()
156
-
108
+
157
109
  self._dataset = dataset
158
-
110
+
159
111
  dataframe = dataset.dataframe
160
112
 
161
113
  # Check if any of the arrays are None or empty
@@ -180,25 +132,15 @@ class Pipeline(BaseModel):
180
132
  return self
181
133
 
182
134
  def run(self):
183
- if self.use_mlflow:
184
- if self.tracking_uri is None:
185
- raise ValueError("Tracking uri should be specified")
186
- mlflow.set_tracking_uri(self.tracking_uri)
187
-
188
- if mlflow.get_experiment_by_name(self.experiment_name) == None:
189
- mlflow.create_experiment(
190
- self.experiment_name, self.artifact_location)
191
- if self.experiment_tags is not None:
192
- mlflow.set_experiment_tags(self.experiment_tags)
193
- mlflow.set_experiment(self.experiment_name)
135
+ if self._tracking_service is None:
136
+ logging.warning("Tracking service is not initialized.")
137
+ else:
138
+ self._tracking_service.setup()
194
139
 
195
140
  y_test_label = self.items[0][4]
196
141
  multi_modal_runner = MultiModalRunner(
197
- library=self.library, multi_modal=self.multi_modal, default_model= self.default_model , use_mlflow=self.use_mlflow)
198
- # multi_modal_runner = MultiModalRunner(
199
- # library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
200
- # multi_modal = TFMultiModal(
201
- # model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
142
+ library=self.library, multi_modal=self.multi_modal, default_model=self.default_model, tracking_service=self._tracking_service)
143
+
202
144
  multi_modal_runner.set_data(
203
145
  self.items, self.train_idx_arr, self.val_idx_arr, y_test_label)
204
146
  combinations = self.combinations if self.combinations is not None else []
ddi_fw/utils/utils.py CHANGED
@@ -2,7 +2,7 @@ import gzip
2
2
  import json
3
3
  import os
4
4
  from datetime import datetime, timezone
5
- from matplotlib import pyplot as plt
5
+ # from matplotlib import pyplot as plt
6
6
  import shutil
7
7
 
8
8
  def create_folder_if_not_exists(path):
@@ -65,53 +65,53 @@ def clear_directory(directory_path):
65
65
  print(f"The directory does not exist: {directory_path}")
66
66
 
67
67
 
68
- if __name__ == "__main__":
69
- # json_file = f'C:\\Users\\kivanc\\Downloads\\metrics.json'
70
- # file_data = open(json_file, "r", 1).read()
71
- # a = json.loads(file_data) # store in json structure
72
- # # a = {'key1':1, 'key2':2}
73
- # compressed = compress_data(a)
74
- # with gzip.open('deneme.gzip', 'wb') as f:
75
- # f.write(compressed)
76
-
77
- # with gzip.open('deneme.gzip', 'r') as fin: # 4. gzip
78
- # json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
79
- # json_bytes = gzip.decompress(json_bytes)
80
- # json_str = json_bytes.decode('UTF-8') # 2. string (i.e. JSON)
81
- # data = json.loads(json_str)
82
- # print(data)
83
-
84
- gzip_file = f'C:\\Users\\kivanc\\Downloads\\metrics (2).gzip'
85
- stored_file = f'C:\\Users\\kivanc\\Downloads\\save.png'
86
- metrics = decompress(gzip_file)
87
- # print(metrics)
88
-
89
- # Plot Precision-Recall curves for each class and micro-average
90
- fig = plt.figure()
91
- plt.step(metrics['recall']['micro_event'], metrics['precision']['micro_event'],
92
- color='b', alpha=0.2, where='post')
93
- plt.fill_between(
94
- metrics['recall']["micro_event"], metrics['precision']["micro_event"], step='post', alpha=0.2, color='b')
95
-
96
- # for i in range(65):
97
- # plt.step( metrics['recall'][str(i)], metrics['precision'][str(i)], where='post',
98
- # label='Class {0} (AUC={1:0.2f})'.format(i, metrics['roc_aupr'][str(i)]))
99
-
100
- plt.xlabel('Recall')
101
- plt.ylabel('Precision')
102
- plt.ylim([0.0, 1.05])
103
- plt.xlim([0.0, 1.0])
104
- plt.title(
105
- 'Micro-average Precision-Recall curve: AUC={0:0.2f}'.format(metrics['roc_aupr']["micro"]))
106
- plt.legend(loc='best')
107
- plt.savefig(stored_file)
108
- # plt.show()
109
-
110
- import plotly.express as px
111
- import pandas as pd
112
- df = pd.DataFrame(dict(
113
- r=[1, 5, 2, 2, 3],
114
- theta=['processing cost','mechanical properties','chemical stability',
115
- 'thermal stability', 'device integration']))
116
- fig = px.line_polar(df, r='r', theta='theta', line_close=True)
117
- fig.show()
68
+ # if __name__ == "__main__":
69
+ # # json_file = f'C:\\Users\\kivanc\\Downloads\\metrics.json'
70
+ # # file_data = open(json_file, "r", 1).read()
71
+ # # a = json.loads(file_data) # store in json structure
72
+ # # # a = {'key1':1, 'key2':2}
73
+ # # compressed = compress_data(a)
74
+ # # with gzip.open('deneme.gzip', 'wb') as f:
75
+ # # f.write(compressed)
76
+
77
+ # # with gzip.open('deneme.gzip', 'r') as fin: # 4. gzip
78
+ # # json_bytes = fin.read() # 3. bytes (i.e. UTF-8)
79
+ # # json_bytes = gzip.decompress(json_bytes)
80
+ # # json_str = json_bytes.decode('UTF-8') # 2. string (i.e. JSON)
81
+ # # data = json.loads(json_str)
82
+ # # print(data)
83
+
84
+ # gzip_file = f'C:\\Users\\kivanc\\Downloads\\metrics (2).gzip'
85
+ # stored_file = f'C:\\Users\\kivanc\\Downloads\\save.png'
86
+ # metrics = decompress(gzip_file)
87
+ # # print(metrics)
88
+
89
+ # # Plot Precision-Recall curves for each class and micro-average
90
+ # fig = plt.figure()
91
+ # plt.step(metrics['recall']['micro_event'], metrics['precision']['micro_event'],
92
+ # color='b', alpha=0.2, where='post')
93
+ # plt.fill_between(
94
+ # metrics['recall']["micro_event"], metrics['precision']["micro_event"], step='post', alpha=0.2, color='b')
95
+
96
+ # # for i in range(65):
97
+ # # plt.step( metrics['recall'][str(i)], metrics['precision'][str(i)], where='post',
98
+ # # label='Class {0} (AUC={1:0.2f})'.format(i, metrics['roc_aupr'][str(i)]))
99
+
100
+ # plt.xlabel('Recall')
101
+ # plt.ylabel('Precision')
102
+ # plt.ylim([0.0, 1.05])
103
+ # plt.xlim([0.0, 1.0])
104
+ # plt.title(
105
+ # 'Micro-average Precision-Recall curve: AUC={0:0.2f}'.format(metrics['roc_aupr']["micro"]))
106
+ # plt.legend(loc='best')
107
+ # plt.savefig(stored_file)
108
+ # # plt.show()
109
+
110
+ # import plotly.express as px
111
+ # import pandas as pd
112
+ # df = pd.DataFrame(dict(
113
+ # r=[1, 5, 2, 2, 3],
114
+ # theta=['processing cost','mechanical properties','chemical stability',
115
+ # 'thermal stability', 'device integration']))
116
+ # fig = px.line_polar(df, r='r', theta='theta', line_close=True)
117
+ # fig.show()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ddi_fw
3
- Version: 0.0.217
3
+ Version: 0.0.219
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -1,9 +1,9 @@
1
1
  ddi_fw/datasets/__init__.py,sha256=_I3iDHARwzmg7_EL5XKtB_TgG1yAkLSOVTujLL9Wz9Q,280
2
- ddi_fw/datasets/core.py,sha256=WuNPDxLAEtw7k15X38mwbOoCslXQ1hHQWhiIIwzrpRA,16880
2
+ ddi_fw/datasets/core.py,sha256=12S7gJnWlxBvUwhQ5HoBy1ILwTW1xm9r1v6P9pPgu_Y,16936
3
3
  ddi_fw/datasets/dataset_splitter.py,sha256=8H8uZTAf8N9LUZeSeHOMawtJFJhnDgUUqFcnl7dquBQ,1672
4
4
  ddi_fw/datasets/db_utils.py,sha256=xRj28U_uXTRPHcz3yIICczFUHXUPiAOZtAj5BM6kH44,6465
5
5
  ddi_fw/datasets/setup_._py,sha256=khYVJuW5PlOY_i_A16F3UbSZ6s6o_ljw33Byw3C-A8E,1047
6
- ddi_fw/datasets/ddi_mdl/base.py,sha256=Vvyzxd2BnFK9Bn2mn-3aS5ZczlPElQ0-TKMAqgkyJiI,10483
6
+ ddi_fw/datasets/ddi_mdl/base.py,sha256=_45xa9oo5mBY5gooIy3hxlHBVJcx4NFRjzMCBIMllvA,11247
7
7
  ddi_fw/datasets/ddi_mdl/debug.log,sha256=eWz05j8RFqZuHFDTCF7Rck5w4rvtTanFN21iZsgxO7Y,115
8
8
  ddi_fw/datasets/ddi_mdl/readme.md,sha256=WC6lpmsEKvIISnZqENY7TWtzCQr98HPpE3oRsBl8pIw,625
9
9
  ddi_fw/datasets/ddi_mdl/data/event.db,sha256=cmlSsf9MYjRzqR-mw3cUDnTnfT6FkpOG2yCl2mMwwew,30580736
@@ -46,7 +46,7 @@ ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt,sha256=fFJbN0DbKH4mve
46
46
  ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt,sha256=NhiLF_5INQCpjOlE-RIxDKy7rYwksLdx60L6HCmDKoY,81247
47
47
  ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt,sha256=bPvMCJVy7jtcaYbR-5bmdB6s7gT8NSfK2wDC7iJ0O10,81308
48
48
  ddi_fw/datasets/mdf_sa_ddi/__init__.py,sha256=UEFBM92y2aJjlMJw4Jx405tOAwJ88r_nHAVgAszSjuo,68
49
- ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=ILdvu7pBMazt-FxRWzIaqO2PmbkyooEOT3U9vSoV3PY,6398
49
+ ddi_fw/datasets/mdf_sa_ddi/base.py,sha256=CRhcbADi_CkRyXY2Eq7fYXH-ywmDhrTTB67JLKbk8F4,15907
50
50
  ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv,sha256=EOOLF_0vVVzShoofcGYlOzpztlM1m9jJdftepHicix4,25787699
51
51
  ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv,sha256=lpuMz5KxPsG6MKNuIIUmT5cZquWHQiIao8tXlmOHzq8,381321
52
52
  ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip,sha256=DfN8mczGvWba2y45cPqtWtXjUDXy49VOtRfpcb0tn8c,4382827
@@ -72,21 +72,22 @@ ddi_fw/langchain/__init__.py,sha256=zS0CQrakWEP19biSRewFJGcBT8WBZq4899HrEKiMqUY,
72
72
  ddi_fw/langchain/embeddings.py,sha256=eEWy4okcjdhUJHi4N48Wd8XauPXyeaQVLUdNWEvtEcY,6754
73
73
  ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
74
74
  ddi_fw/langchain/storage.py,sha256=OizKyWm74Js7T6Q9kez-ulUoBGzIMFo4R46h4kjUyIM,11200
75
- ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
75
+ ddi_fw/ml/__init__.py,sha256=FteYEawCkVQOaK-cTv2VrHZ2ZnfeFr31BD6VucO7_DQ,268
76
76
  ddi_fw/ml/evaluation_helper.py,sha256=2-7CLSgGTqLEk4HkgCVIOt-GxfLAn6SBozJghAtHb5M,11581
77
- ddi_fw/ml/ml_helper.py,sha256=6BO1ikCHmlYK9TPDN7Atov0BuTtoyLg06NoSGl3RYGA,7716
78
- ddi_fw/ml/model_wrapper.py,sha256=kabPXuo7S8tGkp9a00V04n4rXDmv7dD8wYGMjotISRc,1050
77
+ ddi_fw/ml/ml_helper.py,sha256=RbFUz4kLkqv6WUXXscDFtYBMWcmv7uWOf8YIR5DBfqA,7701
78
+ ddi_fw/ml/model_wrapper.py,sha256=38uBdHI4H_sjDKPWuhGXovUy_L1tpSNm5tEqCtwmlpY,973
79
79
  ddi_fw/ml/pytorch_wrapper.py,sha256=pe6UsjP2XeTgLxDnIUiodoyhJTGCxV27wD4Cjxysu2Q,8553
80
- ddi_fw/ml/tensorflow_wrapper.py,sha256=Vw6M2rHDHV90jzfCr0XWpUqYVl4vmZeKsS7FUb3VkH4,12980
80
+ ddi_fw/ml/tensorflow_wrapper.py,sha256=Zdf1FmJ9488pFBG4xBLkC5GzyLv9tn5OrdFzHGHPmCM,16172
81
+ ddi_fw/ml/tracking_service.py,sha256=eHWFI3lyQX_xM16CRekgITwldHj2RBMYl5XG8lD8Zks,7508
81
82
  ddi_fw/ner/__init__.py,sha256=JwhGXrepomxPSsGsg2b_xPRC72AjvxOIn2CW5Mvscn0,26
82
83
  ddi_fw/ner/mmlrestclient.py,sha256=NZta7m2Qm6I_qtVguMZhqtAUjVBmmXn0-TMnsNp0jpg,6859
83
84
  ddi_fw/ner/ner.py,sha256=FHyyX53Xwpdw8Hec261dyN88yD7Z9LmJua2mIrQLguI,17967
84
85
  ddi_fw/pipeline/__init__.py,sha256=tKDM_rW4vPjlYTeOkNgi9PujDzb4e9O3LK1w5wqnebw,212
85
86
  ddi_fw/pipeline/multi_modal_combination_strategy.py,sha256=JSyuP71b1I1yuk0s2ecCJZTtCED85jBtkpwTUxibJvI,1706
86
- ddi_fw/pipeline/multi_pipeline.py,sha256=AbErwu05-3YIPnCcXRsj-jxPJG8HG2H7cMZlGjzaYa8,9037
87
- ddi_fw/pipeline/multi_pipeline_v2.py,sha256=7IGtaGFhgJqW29a6nDheUrVtn_7_xvWFdD6GC--sehM,10003
87
+ ddi_fw/pipeline/multi_pipeline.py,sha256=npJUXYT31fxD6kpJKSeixjbH5jNfPUwIVG7lRdBszRg,9852
88
+ ddi_fw/pipeline/multi_pipeline_org.py,sha256=AbErwu05-3YIPnCcXRsj-jxPJG8HG2H7cMZlGjzaYa8,9037
88
89
  ddi_fw/pipeline/ner_pipeline.py,sha256=yp-Met2794EKcgr8_3gqt03l4v2efOdaZuAcIXTubvQ,5780
89
- ddi_fw/pipeline/pipeline.py,sha256=YhUBVLC29ZD2tmVd0e8X1FVBLhSKECZL2OP57oEW6HE,9171
90
+ ddi_fw/pipeline/pipeline.py,sha256=IxqvIy2thLQyO-0Qon1JAKT3k8mLk5OpTGE25ZTTsOQ,6133
90
91
  ddi_fw/utils/__init__.py,sha256=WNxkQXk-694roG50D355TGLXstfdWVb_tUyr-PM-8rg,537
91
92
  ddi_fw/utils/categorical_data_encoding_checker.py,sha256=T1X70Rh4atucAuqyUZmz-iFULllY9dY0NRyV9-jTjJ0,3438
92
93
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
@@ -95,12 +96,12 @@ ddi_fw/utils/kaggle.py,sha256=wKRJ18KpQ6P-CubpZklEgsDtyFpR9RUL1_HyyF6ttEE,2425
95
96
  ddi_fw/utils/numpy_utils.py,sha256=gd1WNq5NpWD2MBEMTtFuS5I0h8B6FAUNcq6BVOlxdhY,797
96
97
  ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5I,1056
97
98
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
98
- ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
99
+ ddi_fw/utils/utils.py,sha256=PY-zDawREKoXQfzX7lVkxBLVFQPkfvr9385kHCjaNXo,4391
99
100
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
100
101
  ddi_fw/vectorization/__init__.py,sha256=LcJOpLVoLvHPDw9phGFlUQGeNcST_zKV-Oi1Pm5h_nE,110
101
102
  ddi_fw/vectorization/feature_vector_generation.py,sha256=EBf-XAiwQwr68az91erEYNegfeqssBR29kVgrliIyac,4765
102
103
  ddi_fw/vectorization/idf_helper.py,sha256=_Gd1dtDSLaw8o-o0JugzSKMt9FpeXewTh4wGEaUd4VQ,2571
103
- ddi_fw-0.0.217.dist-info/METADATA,sha256=G1lQHwAd3_yxL53MryeC1s0Lq_daknO2yzt33ZGnICw,2631
104
- ddi_fw-0.0.217.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
105
- ddi_fw-0.0.217.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
106
- ddi_fw-0.0.217.dist-info/RECORD,,
104
+ ddi_fw-0.0.219.dist-info/METADATA,sha256=kPgF9a4rw8e9GoqXTQ1_fwb3i8L1RLBWUClZEcfeQnM,2631
105
+ ddi_fw-0.0.219.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
106
+ ddi_fw-0.0.219.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
107
+ ddi_fw-0.0.219.dist-info/RECORD,,