ddi-fw 0.0.97__tar.gz → 0.0.98__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/PKG-INFO +13 -13
  2. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/pyproject.toml +13 -13
  3. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/core.py +28 -71
  4. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw.egg-info/PKG-INFO +13 -13
  5. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw.egg-info/requires.txt +12 -12
  6. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/README.md +0 -0
  7. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/setup.cfg +0 -0
  8. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/__init__.py +0 -0
  9. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/db_utils.py +0 -0
  10. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/base.py +0 -0
  11. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/data/event.db +0 -0
  12. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/test_indexes.txt +0 -0
  13. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_0.txt +0 -0
  14. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_1.txt +0 -0
  15. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_2.txt +0 -0
  16. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_3.txt +0 -0
  17. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/train_fold_4.txt +0 -0
  18. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/train_indexes.txt +0 -0
  19. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_0.txt +0 -0
  20. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_1.txt +0 -0
  21. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_2.txt +0 -0
  22. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_3.txt +0 -0
  23. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes/validation_fold_4.txt +0 -0
  24. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/test_indexes.txt +0 -0
  25. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_0.txt +0 -0
  26. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_1.txt +0 -0
  27. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_2.txt +0 -0
  28. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_3.txt +0 -0
  29. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_fold_4.txt +0 -0
  30. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/train_indexes.txt +0 -0
  31. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_0.txt +0 -0
  32. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_1.txt +0 -0
  33. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_2.txt +0 -0
  34. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_3.txt +0 -0
  35. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/indexes_old/validation_fold_4.txt +0 -0
  36. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/ddi_mdl/readme.md +0 -0
  37. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/embedding_generator.py +0 -0
  38. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/feature_vector_generation.py +0 -0
  39. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/idf_helper.py +0 -0
  40. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/__init__.py +0 -0
  41. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/base.py +0 -0
  42. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/df_extraction_cleanxiaoyu50.csv +0 -0
  43. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/drug_information_del_noDDIxiaoyu50.csv +0 -0
  44. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/test_indexes.txt +0 -0
  45. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_0.txt +0 -0
  46. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_1.txt +0 -0
  47. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_2.txt +0 -0
  48. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_3.txt +0 -0
  49. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_fold_4.txt +0 -0
  50. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/train_indexes.txt +0 -0
  51. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_0.txt +0 -0
  52. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_1.txt +0 -0
  53. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_2.txt +0 -0
  54. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_3.txt +0 -0
  55. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/indexes/validation_fold_4.txt +0 -0
  56. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/mdf_sa_ddi/mdf-sa-ddi.zip +0 -0
  57. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/datasets/setup_._py +0 -0
  58. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/drugbank/__init__.py +0 -0
  59. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/drugbank/drugbank.xsd +0 -0
  60. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/drugbank/drugbank_parser.py +0 -0
  61. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/drugbank/drugbank_processor.py +0 -0
  62. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/drugbank/drugbank_processor_org.py +0 -0
  63. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/drugbank/event_extractor.py +0 -0
  64. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/langchain/__init__.py +0 -0
  65. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/langchain/embeddings.py +0 -0
  66. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/langchain/sentence_splitter.py +0 -0
  67. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/langchain/storage.py +0 -0
  68. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/ml/__init__.py +0 -0
  69. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/ml/evaluation_helper.py +0 -0
  70. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/ml/ml_helper.py +0 -0
  71. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/ml/model_wrapper.py +0 -0
  72. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/ml/pytorch_wrapper.py +0 -0
  73. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/ml/tensorflow_wrapper.py +0 -0
  74. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/ner/__init__.py +0 -0
  75. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/ner/mmlrestclient.py +0 -0
  76. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/ner/ner.py +0 -0
  77. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/pipeline/__init__.py +0 -0
  78. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/pipeline/multi_modal_combination_strategy.py +0 -0
  79. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/pipeline/multi_pipeline.py +0 -0
  80. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/pipeline/ner_pipeline.py +0 -0
  81. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/pipeline/pipeline.py +0 -0
  82. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/basic_test.py +0 -0
  83. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/combination_test.py +0 -0
  84. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/compress_json_test.py +0 -0
  85. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/date_test.py +0 -0
  86. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/idf_score.py +0 -0
  87. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/jaccard_similarity.py +0 -0
  88. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/mlfow_test.py +0 -0
  89. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/sklearn-tfidf.py +0 -0
  90. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/test.py +0 -0
  91. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/torch_cuda_test.py +0 -0
  92. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/test/type_guarding_test.py +0 -0
  93. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/utils/__init__.py +0 -0
  94. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/utils/enums.py +0 -0
  95. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/utils/py7zr_helper.py +0 -0
  96. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/utils/utils.py +0 -0
  97. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw/utils/zip_helper.py +0 -0
  98. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw.egg-info/SOURCES.txt +0 -0
  99. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw.egg-info/dependency_links.txt +0 -0
  100. {ddi_fw-0.0.97 → ddi_fw-0.0.98}/src/ddi_fw.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.97
3
+ Version: 0.0.98
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -27,25 +27,25 @@ Requires-Dist: python-stopwatch==1.1.11
27
27
  Requires-Dist: lxml==5.3.0
28
28
  Requires-Dist: matplotlib==3.8.0
29
29
  Requires-Dist: mlflow==2.16.1
30
- Requires-Dist: nltk==3.8.1
31
- Requires-Dist: numpy==1.26.4
32
- Requires-Dist: pandas==2.2.2
30
+ Requires-Dist: nltk>=3.8.1
31
+ Requires-Dist: numpy>=1.26.4
32
+ Requires-Dist: pandas>=2.2.0
33
33
  Requires-Dist: plotly==5.24.1
34
34
  Requires-Dist: rdkit==2023.3.3
35
35
  Requires-Dist: scikit-learn==1.5.2
36
36
  Requires-Dist: scipy==1.13.1
37
- Requires-Dist: accelerate==0.33.0
38
- Requires-Dist: sentence-transformers==3.0.1
39
- Requires-Dist: transformers==4.42.4
37
+ Requires-Dist: accelerate>=0.33.0
38
+ Requires-Dist: sentence-transformers>=3.0.1
39
+ Requires-Dist: transformers>=4.42.4
40
40
  Requires-Dist: stanza==1.9.2
41
- Requires-Dist: tokenizers==0.19.1
42
- Requires-Dist: tqdm==4.66.6
41
+ Requires-Dist: tokenizers>=0.19.1
42
+ Requires-Dist: tqdm>=4.66.6
43
43
  Requires-Dist: xmlschema==3.4.2
44
- Requires-Dist: zipp==3.20.2
44
+ Requires-Dist: zipp>=3.20.2
45
45
  Requires-Dist: py7zr==0.22.0
46
- Requires-Dist: openai==1.52.2
47
- Requires-Dist: langchain==0.3.4
48
- Requires-Dist: chromadb==0.5.15
46
+ Requires-Dist: openai>=1.52.2
47
+ Requires-Dist: langchain>=0.3.4
48
+ Requires-Dist: chromadb>=0.5.15
49
49
  Requires-Dist: langchain_community==0.3.3
50
50
  Requires-Dist: datasets==3.0.2
51
51
  Requires-Dist: unstructured==0.16.3
@@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
5
5
 
6
6
  [project]
7
7
  name = "ddi_fw"
8
- version = "0.0.97"
8
+ version = "0.0.98"
9
9
  description = "Do not use :)"
10
10
  readme = "README.md"
11
11
  authors = [
@@ -50,25 +50,25 @@ dependencies = [
50
50
  ,"lxml==5.3.0"
51
51
  ,"matplotlib==3.8.0"
52
52
  ,"mlflow==2.16.1"
53
- ,"nltk==3.8.1"
54
- ,"numpy==1.26.4"
55
- ,"pandas==2.2.2"
53
+ ,"nltk>=3.8.1"
54
+ ,"numpy>=1.26.4"
55
+ ,"pandas>=2.2.0"
56
56
  ,"plotly==5.24.1"
57
57
  ,"rdkit==2023.3.3"
58
58
  ,"scikit-learn==1.5.2"
59
59
  ,"scipy==1.13.1"
60
- ,"accelerate==0.33.0"
61
- ,"sentence-transformers==3.0.1"
62
- ,"transformers==4.42.4"
60
+ ,"accelerate>=0.33.0"
61
+ ,"sentence-transformers>=3.0.1"
62
+ ,"transformers>=4.42.4"
63
63
  ,"stanza==1.9.2"
64
- ,"tokenizers==0.19.1"
65
- ,"tqdm==4.66.6"
64
+ ,"tokenizers>=0.19.1"
65
+ ,"tqdm>=4.66.6"
66
66
  ,"xmlschema==3.4.2"
67
- ,"zipp==3.20.2"
67
+ ,"zipp>=3.20.2"
68
68
  ,"py7zr==0.22.0"
69
- ,"openai==1.52.2"
70
- ,"langchain==0.3.4"
71
- ,"chromadb==0.5.15"
69
+ ,"openai>=1.52.2"
70
+ ,"langchain>=0.3.4"
71
+ ,"chromadb>=0.5.15"
72
72
  ,"langchain_community==0.3.3"
73
73
  ,"datasets==3.0.2"
74
74
  ,"unstructured==0.16.3"
@@ -268,81 +268,39 @@ class BaseDataset(ABC):
268
268
 
269
269
  # her bir metin tipi için embedding oluşturursan burayı düzenle
270
270
  def prep(self):
271
- # if self.embedding_columns:
272
- # zip_helper = ZipHelper()
273
- # zip_helper.extract(str(HERE.joinpath('zips/embeddings')),
274
- # str(HERE.joinpath('zips/embeddings')))
275
-
276
- # embedding_dict = dict()
277
- # for embedding_column in self.embedding_columns:
278
- # embedding_file = HERE.joinpath(
279
- # f'zips/embeddings/{embedding_column}_embeddings.pkl')
280
- # embedding_values = pd.read_pickle(embedding_file)
281
- # d = embedding_values.apply(
282
- # lambda x: {x.id: x[f'{embedding_column}_embedding']}, axis=1)
283
- # x = {k: v for l in d.values.tolist() for k, v in l.items()}
284
- # embedding_dict[embedding_column] = x
285
-
286
- # self.ner_df = CTakesNER().load()
287
271
  drug_names = self.drugs_df['name'].to_list()
288
272
  drug_ids = self.drugs_df['id'].to_list()
289
273
 
290
- # self.ddis_df = self.ddis_df[(self.ddis_df['name1'].isin(drug_names)) & (
291
- # self.ddis_df['name2'].isin(drug_names))]
292
-
293
274
  filtered_df = self.drugs_df
294
- filtered_ner_df = self.ner_df[self.ner_df['drugbank_id'].isin(
295
- drug_ids)]
296
- filtered_ner_df = self.ner_df.copy()
297
-
298
275
  combined_df = filtered_df.copy()
299
- # TODO: eğer kullanılan veri setinde tui, cui veya entity bilgileri yoksa o veri setine bu sütunları eklemek için aşağısı gerekli
300
-
301
- # idf_calc = IDF(filtered_ner_df, [f for f in filtered_ner_df.keys()])
302
- idf_calc = IDF(filtered_ner_df, self.ner_columns)
303
- idf_calc.calculate()
304
- idf_scores_df = idf_calc.to_dataframe()
305
-
306
- # for key in filtered_ner_df.keys():
307
- for key in self.ner_columns:
308
- threshold = 0
309
- if key.startswith('tui'):
310
- threshold = self.tui_threshold
311
- if key.startswith('cui'):
312
- threshold = self.cui_threshold
313
- if key.startswith('entities'):
314
- threshold = self.entities_threshold
315
- combined_df[key] = filtered_ner_df[key]
316
- valid_codes = idf_scores_df[idf_scores_df[key] > threshold].index
317
-
318
- # print(f'{key}: valid code size = {len(valid_codes)}')
319
- combined_df[key] = combined_df[key].apply(lambda items:
320
- [item for item in items if item in valid_codes])
321
-
322
- # Yukarıdaki koda evrildi
323
- # combined_df['tui_description'] = filtered_ner_df['description_tuis']
324
- # combined_df['cui_description'] = filtered_ner_df['description_cuis']
325
- # combined_df['entities_description'] = filtered_ner_df['description_entities']
326
-
327
- # tui_idf = IDF(combined_df['tui_description'], self.tui_threshold)
328
- # cui_idf = IDF(combined_df['cui_description'], self.cui_threshold)
329
- # entities_idf = IDF(
330
- # combined_df['entities_description'], self.entities_threshold)
331
-
332
- # tui_idf.calculate()
333
- # cui_idf.calculate()
334
- # entities_idf.calculate()
335
-
336
- # valid_tui_codes = tui_idf.find_items_over_threshold()
337
- # valid_cui_codes = cui_idf.find_items_over_threshold()
338
- # valid_entities_codes = entities_idf.find_items_over_threshold()
339
-
340
- # combined_df['tui_description'] = combined_df['tui_description'].apply(lambda items:
341
- # [item for item in items if item in valid_tui_codes])
342
- # combined_df['cui_description'] = combined_df['cui_description'].apply(lambda items:
343
- # [item for item in items if item in valid_cui_codes])
344
- # combined_df['entities_description'] = combined_df['entities_description'].apply(lambda items:
345
- # [item for item in items if item in valid_entities_codes])
276
+
277
+ if self.ner_df:
278
+ filtered_ner_df = self.ner_df[self.ner_df['drugbank_id'].isin(
279
+ drug_ids)]
280
+ filtered_ner_df = self.ner_df.copy()
281
+
282
+ # TODO: eğer kullanılan veri setinde tui, cui veya entity bilgileri yoksa o veri setine bu sütunları eklemek için aşağısı gerekli
283
+
284
+ # idf_calc = IDF(filtered_ner_df, [f for f in filtered_ner_df.keys()])
285
+ idf_calc = IDF(filtered_ner_df, self.ner_columns)
286
+ idf_calc.calculate()
287
+ idf_scores_df = idf_calc.to_dataframe()
288
+
289
+ # for key in filtered_ner_df.keys():
290
+ for key in self.ner_columns:
291
+ threshold = 0
292
+ if key.startswith('tui'):
293
+ threshold = self.tui_threshold
294
+ if key.startswith('cui'):
295
+ threshold = self.cui_threshold
296
+ if key.startswith('entities'):
297
+ threshold = self.entities_threshold
298
+ combined_df[key] = filtered_ner_df[key]
299
+ valid_codes = idf_scores_df[idf_scores_df[key] > threshold].index
300
+
301
+ # print(f'{key}: valid code size = {len(valid_codes)}')
302
+ combined_df[key] = combined_df[key].apply(lambda items:
303
+ [item for item in items if item in valid_codes])
346
304
 
347
305
  moved_columns = ['id']
348
306
  moved_columns.extend(self.__similarity_related_columns__)
@@ -409,7 +367,6 @@ class BaseDataset(ABC):
409
367
  x_fnc, args=(embeddings_after_pooling,), axis=1)
410
368
 
411
369
  self.dataframe = self.ddis_df.copy()
412
- self.dataframe['class_as_txt'] = labels
413
370
  self.dataframe['class'] = list(classes)
414
371
  print(self.dataframe.shape)
415
372
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.97
3
+ Version: 0.0.98
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -27,25 +27,25 @@ Requires-Dist: python-stopwatch==1.1.11
27
27
  Requires-Dist: lxml==5.3.0
28
28
  Requires-Dist: matplotlib==3.8.0
29
29
  Requires-Dist: mlflow==2.16.1
30
- Requires-Dist: nltk==3.8.1
31
- Requires-Dist: numpy==1.26.4
32
- Requires-Dist: pandas==2.2.2
30
+ Requires-Dist: nltk>=3.8.1
31
+ Requires-Dist: numpy>=1.26.4
32
+ Requires-Dist: pandas>=2.2.0
33
33
  Requires-Dist: plotly==5.24.1
34
34
  Requires-Dist: rdkit==2023.3.3
35
35
  Requires-Dist: scikit-learn==1.5.2
36
36
  Requires-Dist: scipy==1.13.1
37
- Requires-Dist: accelerate==0.33.0
38
- Requires-Dist: sentence-transformers==3.0.1
39
- Requires-Dist: transformers==4.42.4
37
+ Requires-Dist: accelerate>=0.33.0
38
+ Requires-Dist: sentence-transformers>=3.0.1
39
+ Requires-Dist: transformers>=4.42.4
40
40
  Requires-Dist: stanza==1.9.2
41
- Requires-Dist: tokenizers==0.19.1
42
- Requires-Dist: tqdm==4.66.6
41
+ Requires-Dist: tokenizers>=0.19.1
42
+ Requires-Dist: tqdm>=4.66.6
43
43
  Requires-Dist: xmlschema==3.4.2
44
- Requires-Dist: zipp==3.20.2
44
+ Requires-Dist: zipp>=3.20.2
45
45
  Requires-Dist: py7zr==0.22.0
46
- Requires-Dist: openai==1.52.2
47
- Requires-Dist: langchain==0.3.4
48
- Requires-Dist: chromadb==0.5.15
46
+ Requires-Dist: openai>=1.52.2
47
+ Requires-Dist: langchain>=0.3.4
48
+ Requires-Dist: chromadb>=0.5.15
49
49
  Requires-Dist: langchain_community==0.3.3
50
50
  Requires-Dist: datasets==3.0.2
51
51
  Requires-Dist: unstructured==0.16.3
@@ -3,25 +3,25 @@ python-stopwatch==1.1.11
3
3
  lxml==5.3.0
4
4
  matplotlib==3.8.0
5
5
  mlflow==2.16.1
6
- nltk==3.8.1
7
- numpy==1.26.4
8
- pandas==2.2.2
6
+ nltk>=3.8.1
7
+ numpy>=1.26.4
8
+ pandas>=2.2.0
9
9
  plotly==5.24.1
10
10
  rdkit==2023.3.3
11
11
  scikit-learn==1.5.2
12
12
  scipy==1.13.1
13
- accelerate==0.33.0
14
- sentence-transformers==3.0.1
15
- transformers==4.42.4
13
+ accelerate>=0.33.0
14
+ sentence-transformers>=3.0.1
15
+ transformers>=4.42.4
16
16
  stanza==1.9.2
17
- tokenizers==0.19.1
18
- tqdm==4.66.6
17
+ tokenizers>=0.19.1
18
+ tqdm>=4.66.6
19
19
  xmlschema==3.4.2
20
- zipp==3.20.2
20
+ zipp>=3.20.2
21
21
  py7zr==0.22.0
22
- openai==1.52.2
23
- langchain==0.3.4
24
- chromadb==0.5.15
22
+ openai>=1.52.2
23
+ langchain>=0.3.4
24
+ chromadb>=0.5.15
25
25
  langchain_community==0.3.3
26
26
  datasets==3.0.2
27
27
  unstructured==0.16.3
File without changes
File without changes
File without changes
File without changes