ddi-fw 0.0.130__tar.gz → 0.0.132__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/PKG-INFO +1 -1
  2. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/pyproject.toml +1 -1
  3. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/__init__.py +1 -0
  4. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/base.py +3 -3
  5. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/ml/ml_helper.py +45 -15
  6. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/ml/model_wrapper.py +3 -3
  7. ddi_fw-0.0.132/src/ddi_fw/ml/tensorflow_wrapper.py +152 -0
  8. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/pipeline/multi_pipeline.py +14 -10
  9. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/pipeline/pipeline.py +8 -4
  10. ddi_fw-0.0.132/src/ddi_fw/test/__init__.py +0 -0
  11. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw.egg-info/PKG-INFO +1 -1
  12. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw.egg-info/SOURCES.txt +1 -0
  13. ddi_fw-0.0.130/src/ddi_fw/ml/tensorflow_wrapper.py +0 -157
  14. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/README.md +0 -0
  15. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/setup.cfg +0 -0
  16. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/core.py +0 -0
  17. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/db_utils.py +0 -0
  18. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/base.py +0 -0
  19. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  20. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  21. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  22. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  23. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  24. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  25. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  26. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  27. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  28. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  29. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  30. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  31. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  32. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  33. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  34. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  35. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  36. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  37. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  38. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  39. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  40. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  41. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  42. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  43. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  44. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  45. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/data/event.db +0 -0
  46. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/test_indexes.txt +0 -0
  47. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_0.txt +0 -0
  48. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_1.txt +0 -0
  49. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_2.txt +0 -0
  50. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_3.txt +0 -0
  51. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_fold_4.txt +0 -0
  52. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/train_indexes.txt +0 -0
  53. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_0.txt +0 -0
  54. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_1.txt +0 -0
  55. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_2.txt +0 -0
  56. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_3.txt +0 -0
  57. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/ddi_mdl_text/indexes/validation_fold_4.txt +0 -0
  58. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/embedding_generator.py +0 -0
  59. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/feature_vector_generation.py +0 -0
  60. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/idf_helper.py +0 -0
  61. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  62. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  63. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  64. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  65. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  66. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  67. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  68. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  69. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  70. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  71. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  72. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  73. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  74. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  75. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  76. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  77. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  78. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/datasets/setup_._py +0 -0
  79. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/drugbank/__init__.py +0 -0
  80. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  81. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  82. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  83. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  84. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  85. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/langchain/__init__.py +0 -0
  86. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/langchain/embeddings.py +0 -0
  87. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
  88. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/langchain/storage.py +0 -0
  89. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/ml/__init__.py +0 -0
  90. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  91. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  92. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/ner/__init__.py +0 -0
  93. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  94. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/ner/ner.py +0 -0
  95. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/pipeline/__init__.py +0 -0
  96. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  97. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
  98. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/basic_test.py +0 -0
  99. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/combination_test.py +0 -0
  100. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/compress_json_test.py +0 -0
  101. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/date_test.py +0 -0
  102. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/idf_score.py +0 -0
  103. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/jaccard_similarity.py +0 -0
  104. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/mlfow_test.py +0 -0
  105. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/sklearn-tfidf.py +0 -0
  106. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/test.py +0 -0
  107. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/torch_cuda_test.py +0 -0
  108. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/test/type_guarding_test.py +0 -0
  109. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/utils/__init__.py +0 -0
  110. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/utils/enums.py +0 -0
  111. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/utils/kaggle.py +0 -0
  112. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/utils/package_helper.py +0 -0
  113. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  114. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/utils/utils.py +0 -0
  115. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw/utils/zip_helper.py +0 -0
  116. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  117. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw.egg-info/requires.txt +0 -0
  118. {ddi_fw-0.0.130 → ddi_fw-0.0.132}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.130
3
+ Version: 0.0.132
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "ddi_fw"
8
- version = "0.0.130"
8
+ version = "0.0.132"
9
9
  description = "Do not use :)"
10
10
  readme = "README.md"
11
11
  authors = [
@@ -1,5 +1,6 @@
1
1
  from .core import BaseDataset
2
2
  from .ddi_mdl.base import DDIMDLDataset
3
+ from .ddi_mdl_text.base import DDIMDLDatasetV2
3
4
  from .mdf_sa_ddi.base import MDFSADDIDataset
4
5
  from .embedding_generator import create_embeddings
5
6
  from .idf_helper import IDF
@@ -22,8 +22,8 @@ list_of_embedding_columns = ['description',
22
22
  'description_indication_mechanism_of_action',
23
23
  'description_indication_pharmacodynamics',
24
24
  'description_mechanism_of_action_pharmacodynamics',
25
- 'indication_mechanism_of_action_pharmacodynamics'
26
- 'description_indication_mechanism_of_action_pharmacodynamics',
25
+ 'indication_mechanism_of_action_pharmacodynamics',
26
+ 'description_indication_mechanism_of_action_pharmacodynamics'
27
27
  ]
28
28
 
29
29
  list_of_chemical_property_columns = ['enzyme',
@@ -46,7 +46,7 @@ def indices_to_binary_vector(indices, vector_length=881):
46
46
  return binary_vector
47
47
 
48
48
 
49
- class DDIMDLDataset(BaseDataset):
49
+ class DDIMDLDatasetV2(BaseDataset):
50
50
  def __init__(self, embedding_size,
51
51
  embedding_dict,
52
52
  embeddings_pooling_strategy: PoolingStrategy,
@@ -3,6 +3,7 @@ from matplotlib import pyplot as plt
3
3
  from ddi_fw.ml.model_wrapper import Result
4
4
  from ddi_fw.ml.pytorch_wrapper import PTModelWrapper
5
5
  from ddi_fw.ml.tensorflow_wrapper import TFModelWrapper
6
+ from ddi_fw.utils.package_helper import get_import
6
7
  import tensorflow as tf
7
8
  from tensorflow import keras
8
9
  from keras.models import Model, Sequential
@@ -30,11 +31,9 @@ import ddi_fw.utils as utils
30
31
 
31
32
  class MultiModalRunner:
32
33
  # todo model related parameters to config
33
- def __init__(self, library ,model_func, batch_size=128, epochs=100):
34
+ def __init__(self, library, multi_modal):
34
35
  self.library = library
35
- self.model_func = model_func
36
- self.batch_size = batch_size
37
- self.epochs = epochs
36
+ self.epochs = multi_modal
38
37
  self.result = Result()
39
38
 
40
39
  def set_data(self, items, train_idx_arr, val_idx_arr, y_test_label):
@@ -43,7 +42,7 @@ class MultiModalRunner:
43
42
  self.val_idx_arr = val_idx_arr
44
43
  self.y_test_label = y_test_label
45
44
 
46
- def __create_multi_modal(self,library):
45
+ def __create_model(self,library):
47
46
  if library == 'tensorflow':
48
47
  return TFModelWrapper
49
48
  elif library == 'pytorch':
@@ -66,18 +65,49 @@ class MultiModalRunner:
66
65
 
67
66
  with mlflow.start_run(run_name=self.prefix, description="***") as run:
68
67
  self.level_0_run_id = run.info.run_id
69
- for item in self.items:
70
- print(item[0])
71
- T = self.__create_multi_modal(self.library)
72
- single_modal=T(self.date, item[0], self.model_func, self.batch_size, self.epochs)
73
- single_modal.set_data(
74
- self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
68
+ item_dict = {t[0]: t for t in self.items}
69
+ for m in self.multi_modal:
70
+ name = m.get('name')
71
+ input_type = m.get('input_type')
72
+ input = m.get('input')
73
+ inputs = m.get('inputs')
74
+ model_type = get_import(m.get("model_type"))
75
+ kwargs = m.get('params')
76
+ T = self.__create_model(self.library)
77
+ single_modal=T(self.date, name, model_type, **kwargs)
78
+ if input_type == '1D':
79
+ item = item_dict[input]
80
+ single_modal.set_data(
81
+ self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
82
+ elif input_type == '2D':
83
+ filtered_dict = {k: item_dict[k] for k in inputs if k in item_dict}
84
+ first_input = next(iter(item_dict.values()))
85
+ train_data_list = [f[1] for f in filtered_dict.values()]
86
+ test_data_list = [f[3] for f in filtered_dict.values()]
87
+ train_data = np.stack(train_data_list, axis=1)
88
+ test_data = np.stack(test_data_list, axis=1)
89
+ train_label = first_input[2]
90
+ test_label = first_input[2]
91
+ single_modal.set_data(
92
+ self.train_idx_arr, self.val_idx_arr, train_data, train_label, test_data, test_label)
75
93
  logs, metrics, prediction = single_modal.predict()
76
- # self.result.add_log(item[0], logs)
77
- #Check
78
94
  self.result.add_metric(item[0], metrics)
79
- single_results[item[0]] = prediction
80
- # sum = sum + prediction
95
+ single_results[name] = prediction
96
+
97
+
98
+ # for item in self.items:
99
+ # print(item[0])
100
+ # T = self.__create_model(self.library)
101
+ # # parameters of model should be dictionary
102
+ # single_modal=T(self.date, item[0], self.model_func, self.batch_size, self.epochs)
103
+ # single_modal.set_data(
104
+ # self.train_idx_arr, self.val_idx_arr, item[1], item[2], item[3], item[4])
105
+ # logs, metrics, prediction = single_modal.predict()
106
+ # # self.result.add_log(item[0], logs)
107
+ # #Check
108
+ # self.result.add_metric(item[0], metrics)
109
+ # single_results[item[0]] = prediction
110
+ # # sum = sum + prediction
81
111
 
82
112
  if combinations:
83
113
  self.evaluate_combinations(single_results, combinations)
@@ -15,12 +15,12 @@ class Result:
15
15
 
16
16
 
17
17
  class ModelWrapper:
18
- def __init__(self, date, descriptor, model_func, batch_size=128, epochs=100):
18
+ def __init__(self, date, descriptor, model_func ,**kwargs):
19
19
  self.date = date
20
20
  self.descriptor = descriptor
21
21
  self.model_func = model_func
22
- self.batch_size = batch_size
23
- self.epochs = epochs
22
+ self.kwargs = kwargs
23
+
24
24
 
25
25
  def set_data(self, train_idx_arr, val_idx_arr, train_data, train_label, test_data, test_label):
26
26
  self.train_idx_arr = train_idx_arr
@@ -0,0 +1,152 @@
1
+ from ddi_fw.ml.model_wrapper import ModelWrapper
2
+ import tensorflow as tf
3
+ from tensorflow import keras
4
+ from keras.callbacks import EarlyStopping, ModelCheckpoint
5
+ from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
6
+ import numpy as np
7
+
8
+ import mlflow
9
+ from mlflow.utils.autologging_utils import batch_metrics_logger
10
+
11
+ from mlflow.models import infer_signature
12
+ from ddi_fw.ml.evaluation_helper import Metrics, evaluate
13
+
14
+ # import tf2onnx
15
+ # import onnx
16
+
17
+ import ddi_fw.utils as utils
18
+ import os
19
+
20
+
21
+ class TFModelWrapper(ModelWrapper):
22
+
23
+ def __init__(self, date, descriptor, model_func, **kwargs):
24
+ super().__init__(date, descriptor, model_func, **kwargs)
25
+ self.batch_size = kwargs.get('batch_size',128)
26
+ self.epochs = kwargs.get('epochs',100)
27
+
28
+ def fit_model(self, X_train, y_train, X_valid, y_valid):
29
+ self.kwargs['input_shape'] = self.train_data.shape
30
+ model = self.model_func(**self.kwargs)
31
+ checkpoint = ModelCheckpoint(
32
+ filepath=f'{self.descriptor}_validation.weights.h5',
33
+ monitor='val_loss',
34
+ save_best_only=True,
35
+ save_weights_only=True,
36
+ verbose=1,
37
+ mode='min'
38
+ )
39
+ early_stopping = EarlyStopping(
40
+ monitor='val_loss', patience=10, mode='auto')
41
+ custom_callback = CustomCallback()
42
+
43
+ history = model.fit(
44
+ X_train, y_train,
45
+ batch_size=self.batch_size,
46
+ epochs=self.epochs,
47
+ validation_data=(X_valid, y_valid),
48
+ callbacks=[early_stopping, checkpoint, custom_callback]
49
+ )
50
+
51
+ if os.path.exists(f'{self.descriptor}_validation.weights.h5'):
52
+ os.remove(f'{self.descriptor}_validation.weights.h5')
53
+
54
+ return model, checkpoint
55
+
56
+ def fit(self):
57
+ print(self.train_data.shape)
58
+ models = {}
59
+ models_val_acc = {}
60
+ for i, (train_idx, val_idx) in enumerate(zip(self.train_idx_arr, self.val_idx_arr)):
61
+ print(f"Validation {i}")
62
+ with mlflow.start_run(run_name=f'Validation {i}', description='CV models', nested=True) as cv_fit:
63
+ X_train_cv = self.train_data[train_idx]
64
+ y_train_cv = self.train_label[train_idx]
65
+ X_valid_cv = self.train_data[val_idx]
66
+ y_valid_cv = self.train_label[val_idx]
67
+ model, best_val_acc = self.fit_model(
68
+ X_train_cv, y_train_cv, X_valid_cv, y_valid_cv)
69
+ models[f'validation_{i}'] = model
70
+ models_val_acc[f'{self.descriptor}_validation_{i}'] = best_val_acc
71
+
72
+ best_model_key = max(models_val_acc, key=models_val_acc.get)
73
+ best_model = models[best_model_key]
74
+ return best_model, best_model_key
75
+
76
+ # https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
77
+
78
+ def predict(self, best_model):
79
+ pred = best_model.predict(self.test_data)
80
+ return pred
81
+
82
+ def fit_and_evaluate(self):
83
+
84
+ with mlflow.start_run(run_name=self.descriptor, description="***", nested=True) as run:
85
+ print(run.info.artifact_uri)
86
+ best_model, best_model_key = self.fit()
87
+ pred = self.predict(best_model)
88
+ logs, metrics = evaluate(
89
+ actual=self.test_label, pred=pred, info=self.descriptor)
90
+ metrics.format_float()
91
+ mlflow.log_metrics(logs)
92
+ mlflow.log_param('best_cv', best_model_key)
93
+
94
+ utils.compress_and_save_data(
95
+ metrics.__dict__, run.info.artifact_uri, f'{self.date}_metrics.gzip')
96
+
97
+ return logs, metrics, pred
98
+
99
+ class CustomCallback(keras.callbacks.Callback):
100
+ def on_train_begin(self, logs=None):
101
+ keys = list(logs.keys())
102
+ mlflow.log_param("train_begin_keys", keys)
103
+ config = self.model.optimizer.get_config()
104
+ for attribute in config:
105
+ mlflow.log_param("opt_" + attribute, config[attribute])
106
+
107
+ sum_list = []
108
+ self.model.summary(print_fn=sum_list.append)
109
+ summary = "\n".join(sum_list)
110
+ mlflow.log_text(summary, artifact_file="model_summary.txt")
111
+
112
+ def on_train_end(self, logs=None):
113
+ print(logs)
114
+ mlflow.log_metrics(logs)
115
+
116
+ def on_epoch_begin(self, epoch, logs=None):
117
+ keys = list(logs.keys())
118
+
119
+ def on_epoch_end(self, epoch, logs=None):
120
+ keys = list(logs.keys())
121
+
122
+ def on_test_begin(self, logs=None):
123
+ keys = list(logs.keys())
124
+
125
+ def on_test_end(self, logs=None):
126
+ mlflow.log_metrics(logs)
127
+ print(logs)
128
+
129
+ def on_predict_begin(self, logs=None):
130
+ keys = list(logs.keys())
131
+
132
+ def on_predict_end(self, logs=None):
133
+ keys = list(logs.keys())
134
+ mlflow.log_metrics(logs)
135
+
136
+ def on_train_batch_begin(self, batch, logs=None):
137
+ keys = list(logs.keys())
138
+
139
+ def on_train_batch_end(self, batch, logs=None):
140
+ keys = list(logs.keys())
141
+
142
+ def on_test_batch_begin(self, batch, logs=None):
143
+ keys = list(logs.keys())
144
+
145
+ def on_test_batch_end(self, batch, logs=None):
146
+ keys = list(logs.keys())
147
+
148
+ def on_predict_batch_begin(self, batch, logs=None):
149
+ keys = list(logs.keys())
150
+
151
+ def on_predict_batch_end(self, batch, logs=None):
152
+ keys = list(logs.keys())
@@ -49,8 +49,8 @@ class MultiPipeline():
49
49
  def __create_pipeline(self, config):
50
50
  type = config.get("type")
51
51
  library = config.get("library")
52
- batch_size = config.get("batch_size")
53
- epochs = config.get("epochs")
52
+ # batch_size = config.get("batch_size")
53
+ # epochs = config.get("epochs")
54
54
 
55
55
  # dataset_module = config.get("dataset_module")
56
56
  # dataset_name = config.get("dataset_name")
@@ -60,6 +60,8 @@ class MultiPipeline():
60
60
  experiment_tags = config.get("experiment_tags")
61
61
  tracking_uri = config.get("tracking_uri")
62
62
  artifact_location = config.get("artifact_location")
63
+ #new
64
+ multi_modal = config.get("multi_modal")
63
65
  columns = config.get("columns")
64
66
  ner_data_file = config.get("ner_data_file")
65
67
  ner_threshold = config.get("ner_threshold")
@@ -69,8 +71,9 @@ class MultiPipeline():
69
71
  embedding_pooling_strategy = get_import(
70
72
  config.get("embedding_pooling_strategy_type")) if config.get("embedding_pooling_strategy_type") else None
71
73
  # Dynamically import the model and dataset classes
72
- model_type = get_import(config.get("model_type"))
74
+ # model_type = get_import(config.get("model_type"))
73
75
  dataset_type = get_import(config.get("dataset_type"))
76
+
74
77
  combination_type = None
75
78
  kwargs_combination_params=None
76
79
  if config.get("combination_strategy"):
@@ -100,7 +103,8 @@ class MultiPipeline():
100
103
  embedding_pooling_strategy_type=embedding_pooling_strategy,
101
104
  ner_data_file=ner_data_file,
102
105
  ner_threshold=ner_threshold,
103
- combinations=combinations)
106
+ combinations=combinations,
107
+ multi_modal= multi_modal)
104
108
  elif type== "ner_search":
105
109
  pipeline = NerParameterSearch(
106
110
  library=library,
@@ -119,9 +123,9 @@ class MultiPipeline():
119
123
  return {
120
124
  "name": experiment_name,
121
125
  "library": library,
122
- "batch_size": batch_size,
123
- "epochs": epochs,
124
- "model_type": model_type,
126
+ # "batch_size": batch_size,
127
+ # "epochs": epochs,
128
+ # "model_type": model_type,
125
129
  "pipeline": pipeline}
126
130
 
127
131
  def build(self):
@@ -135,11 +139,11 @@ class MultiPipeline():
135
139
  print(f"{item['name']} is running")
136
140
  pipeline = item['pipeline']
137
141
  model_type = item['model_type']
138
- batch_size = item['batch_size']
139
- epochs = item['epochs']
142
+ # batch_size = item['batch_size']
143
+ # epochs = item['epochs']
140
144
  # It can be moved to build function
141
145
  pipeline.build()
142
- result = pipeline.run(model_type, epochs=epochs, batch_size=batch_size)
146
+ result = pipeline.run(model_type)
143
147
  self.pipeline_resuts[item['name']] = result
144
148
  return self
145
149
 
@@ -28,7 +28,8 @@ class Pipeline:
28
28
  ner_data_file=None,
29
29
  ner_threshold=None,
30
30
  combinations=None,
31
- model=None):
31
+ model=None,
32
+ multi_modal = None ):
32
33
  self.library = library
33
34
  self.experiment_name = experiment_name
34
35
  self.experiment_description = experiment_description
@@ -46,6 +47,7 @@ class Pipeline:
46
47
  self.ner_threshold = ner_threshold
47
48
  self.combinations = combinations
48
49
  self.model = model
50
+ self.multi_modal = multi_modal
49
51
 
50
52
  def __create_or_update_embeddings__(self, embedding_dict, vector_db_persist_directory, vector_db_collection_name, column=None):
51
53
  """
@@ -155,6 +157,7 @@ class Pipeline:
155
157
  self.train_idx_arr = self.dataset.train_idx_arr
156
158
  self.val_idx_arr = self.dataset.val_idx_arr
157
159
  # Logic to set up the experiment
160
+ # column name, train data, train label, test data, test label
158
161
  self.items = self.dataset.produce_inputs()
159
162
 
160
163
  unique_classes = pd.unique(self.dataframe['event_category'])
@@ -168,7 +171,7 @@ class Pipeline:
168
171
  # Implement additional build logic as needed
169
172
  return self
170
173
 
171
- def run(self, model_func, batch_size=128, epochs=100):
174
+ def run(self):
172
175
  mlflow.set_tracking_uri(self.tracking_uri)
173
176
 
174
177
  if mlflow.get_experiment_by_name(self.experiment_name) == None:
@@ -178,8 +181,9 @@ class Pipeline:
178
181
  mlflow.set_experiment(self.experiment_name)
179
182
 
180
183
  y_test_label = self.items[0][4]
181
- multi_modal_runner = MultiModalRunner(
182
- library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
184
+ multi_modal_runner = MultiModalRunner(library=self.library, multi_modal = self.multi_modal)
185
+ # multi_modal_runner = MultiModalRunner(
186
+ # library=self.library, model_func=model_func, batch_size=batch_size, epochs=epochs)
183
187
  # multi_modal = TFMultiModal(
184
188
  # model_func=model_func, batch_size=batch_size, epochs=epochs) # 100
185
189
  multi_modal_runner.set_data(
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.130
3
+ Version: 0.0.132
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -94,6 +94,7 @@ src/ddi_fw/pipeline/multi_modal_combination_strategy.py
94
94
  src/ddi_fw/pipeline/multi_pipeline.py
95
95
  src/ddi_fw/pipeline/ner_pipeline.py
96
96
  src/ddi_fw/pipeline/pipeline.py
97
+ src/ddi_fw/test/__init__.py
97
98
  src/ddi_fw/test/basic_test.py
98
99
  src/ddi_fw/test/combination_test.py
99
100
  src/ddi_fw/test/compress_json_test.py
@@ -1,157 +0,0 @@
1
- from ddi_fw.ml.model_wrapper import ModelWrapper
2
- import tensorflow as tf
3
- from tensorflow import keras
4
- from keras.callbacks import EarlyStopping,ModelCheckpoint
5
- from sklearn.model_selection import train_test_split, KFold, StratifiedKFold
6
- import numpy as np
7
-
8
- import mlflow
9
- from mlflow.utils.autologging_utils import batch_metrics_logger
10
-
11
- from mlflow.models import infer_signature
12
- from ddi_fw.ml.evaluation_helper import Metrics, evaluate
13
-
14
- # import tf2onnx
15
- # import onnx
16
-
17
- import ddi_fw.utils as utils
18
-
19
-
20
- class TFModelWrapper(ModelWrapper):
21
- # https://github.com/mlflow/mlflow/blob/master/examples/tensorflow/train.py
22
- def predict(self):
23
- print(self.train_data.shape)
24
-
25
- # Failed to convert a NumPy array to a Tensor
26
- with mlflow.start_run(run_name=self.descriptor, description="***", nested=True) as run:
27
- models = dict()
28
- histories = dict()
29
- models_val_acc = dict()
30
- # with batch_metrics_logger(run_id) as metrics_logger:
31
- for i, (train_idx, val_idx) in enumerate(zip(self.train_idx_arr, self.val_idx_arr)):
32
- print(f"Validation {i}")
33
-
34
- with mlflow.start_run(run_name=f'Validation {i}', description='CV models', nested=True) as cv_fit:
35
- model = self.model_func(self.train_data.shape[1])
36
- models[f'validation_{i}'] = model
37
- X_train_cv = self.train_data[train_idx]
38
- y_train_cv = self.train_label[train_idx]
39
- X_valid_cv = self.train_data[val_idx]
40
- y_valid_cv = self.train_label[val_idx]
41
-
42
- checkpoint = ModelCheckpoint(
43
- filepath=f'{self.descriptor}_validation_{i}.weights.h5',
44
- monitor='val_loss',
45
- save_best_only=True,
46
- save_weights_only=True,
47
- verbose=1,
48
- mode='min'
49
- )
50
-
51
- early_stopping = EarlyStopping(
52
- monitor='val_loss', patience=10, verbose=0, mode='auto')
53
- custom_callback = CustomCallback()
54
- history = model.fit(X_train_cv, y_train_cv,
55
- batch_size=self.batch_size,
56
- epochs=self.epochs,
57
- validation_data=(
58
- X_valid_cv, y_valid_cv),
59
- callbacks=[early_stopping, checkpoint, custom_callback])
60
- # histories[f'validation_{i}'] = history
61
- # models_val_acc[f'validation_{i}'] = history.history['val_accuracy'][-1]
62
- models_val_acc[f'{self.descriptor}_validation_{i}'] = checkpoint.best
63
- models[f'{self.descriptor}_validation_{i}'] = checkpoint.model
64
- import os
65
- if os.path.exists(f'{self.descriptor}_validation_{i}.weights.h5'):
66
- os.remove(f'{self.descriptor}_validation_{i}.weights.h5')
67
- # Saving each CV model
68
-
69
- best_model_key = max(models_val_acc, key=models_val_acc.get)
70
- best_model = models[best_model_key]
71
- # mlflow.tensorflow.log_model(best_model, "model")
72
- # best_model.evaluate(self.test_data, self.test_label,
73
- # callbacks=[custom_callback])
74
- pred = best_model.predict(self.test_data)
75
-
76
- logs, metrics = evaluate(
77
- actual=self.test_label, pred=pred, info=self.descriptor)
78
- metrics.format_float()
79
- mlflow.log_metrics(logs)
80
- mlflow.log_param('best_cv', best_model_key)
81
- # signature = infer_signature(
82
- # self.train_data,
83
- # # generate_signature_output(model,X_valid_cv)
84
- # # params=params,
85
- # )
86
-
87
- # mlflow.keras.save_model(
88
- # best_model,
89
- # path=run.info.artifact_uri + '/model',
90
- # signature=signature,
91
- # )
92
- print(run.info.artifact_uri)
93
- # todo tf2onnx not compatible with keras > 2.15
94
- # onnx_model, _ = tf2onnx.convert.from_keras(
95
- # best_model, input_signature=None, opset=13)
96
- # onnx.save(onnx_model, run.info.artifact_uri +
97
- # '/model/model.onnx')
98
- utils.compress_and_save_data(
99
- metrics.__dict__, run.info.artifact_uri, f'{self.date}_metrics.gzip')
100
-
101
- return logs, metrics, pred
102
-
103
-
104
- class CustomCallback(keras.callbacks.Callback):
105
- def on_train_begin(self, logs=None):
106
- keys = list(logs.keys())
107
- mlflow.log_param("train_begin_keys", keys)
108
- config = self.model.optimizer.get_config()
109
- for attribute in config:
110
- mlflow.log_param("opt_" + attribute, config[attribute])
111
-
112
- sum_list = []
113
- self.model.summary(print_fn=sum_list.append)
114
- summary = "\n".join(sum_list)
115
- mlflow.log_text(summary, artifact_file="model_summary.txt")
116
-
117
- def on_train_end(self, logs=None):
118
- print(logs)
119
- mlflow.log_metrics(logs)
120
-
121
- def on_epoch_begin(self, epoch, logs=None):
122
- keys = list(logs.keys())
123
-
124
- def on_epoch_end(self, epoch, logs=None):
125
- keys = list(logs.keys())
126
-
127
- def on_test_begin(self, logs=None):
128
- keys = list(logs.keys())
129
-
130
- def on_test_end(self, logs=None):
131
- mlflow.log_metrics(logs)
132
- print(logs)
133
-
134
- def on_predict_begin(self, logs=None):
135
- keys = list(logs.keys())
136
-
137
- def on_predict_end(self, logs=None):
138
- keys = list(logs.keys())
139
- mlflow.log_metrics(logs)
140
-
141
- def on_train_batch_begin(self, batch, logs=None):
142
- keys = list(logs.keys())
143
-
144
- def on_train_batch_end(self, batch, logs=None):
145
- keys = list(logs.keys())
146
-
147
- def on_test_batch_begin(self, batch, logs=None):
148
- keys = list(logs.keys())
149
-
150
- def on_test_batch_end(self, batch, logs=None):
151
- keys = list(logs.keys())
152
-
153
- def on_predict_batch_begin(self, batch, logs=None):
154
- keys = list(logs.keys())
155
-
156
- def on_predict_batch_end(self, batch, logs=None):
157
- keys = list(logs.keys())
File without changes
File without changes
File without changes