ddi-fw 0.0.100__py3-none-any.whl → 0.0.102__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -122,7 +122,7 @@ def generate_embeddings(df, config_file, new_model_names, collections=None, pers
122
122
  # print(f"Configuration for collection {id} not found.")
123
123
  # continue
124
124
 
125
- embedding_model_type = collection_config['embedding_model_tpe']
125
+ embedding_model_type = collection_config['embedding_model_type']
126
126
  text_splitters_types = collection_config['text_splitters_types']
127
127
  batch_size = collection_config['batch_size']
128
128
  columns = collection_config['columns']
@@ -135,7 +135,7 @@ def generate_embeddings(df, config_file, new_model_names, collections=None, pers
135
135
  # Assuming the classes for the embeddings and splitters are available
136
136
  try:
137
137
  model = get_import(embedding_model_type)(
138
- model_name=name, model_kwargs=c['model_kwargs'])
138
+ model_name=name, model_kwargs=collection_config['model_kwargs'])
139
139
  except:
140
140
  # print(f"Unknown embedding model: {embedding_model_type}")
141
141
  raise Exception(f"Unknown embedding model: {embedding_model_type}")
ddi_fw/utils/kaggle.py CHANGED
@@ -17,13 +17,19 @@ def create_kaggle_dataset(base_path: str, collections: list):
17
17
  # Step 1: Loop through each folder in base_path
18
18
  for folder_name in os.listdir(base_path):
19
19
  folder_path = os.path.join(base_path, folder_name)
20
-
20
+ metadata_file_path = os.path.join(folder_path, 'dataset-metadata.json')
21
+
21
22
  # Step 2: Get metadata for the current folder
22
23
  model_info = next((c for c in collections if c['id'] == folder_name), None)
23
24
  if model_info is None:
24
25
  continue # Skip if model info is not found
25
26
 
26
27
  title = model_info['kaggle_title']
28
+
29
+ if os.path.exists(metadata_file_path):
30
+ print(f"{title} has dataset-metadata.json file")
31
+ continue
32
+
27
33
  id = model_info['kaggle_id'].lower().replace(' ', '-')
28
34
  licenses = model_info['kaggle_licenses']
29
35
  description = model_info['kaggle_description']
@@ -41,7 +47,6 @@ def create_kaggle_dataset(base_path: str, collections: list):
41
47
  }
42
48
 
43
49
  # Step 4: Write the metadata to a JSON file in the folder
44
- metadata_file_path = os.path.join(folder_path, 'dataset-metadata.json')
45
50
  with open(metadata_file_path, 'w') as f:
46
51
  json.dump(metadata, f, indent=4)
47
52
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.100
3
+ Version: 0.0.102
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -58,7 +58,7 @@ ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHr
58
58
  ddi_fw/langchain/__init__.py,sha256=zS0CQrakWEP19biSRewFJGcBT8WBZq4899HrEKiMqUY,269
59
59
  ddi_fw/langchain/embeddings.py,sha256=b9BUG73Ayx3Wy8MQrfsVeZ-qBB41vjVECSp2YhH-CIY,7514
60
60
  ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
61
- ddi_fw/langchain/storage.py,sha256=-QRlzHsfQ7yj0OEFShRDb2A0H1iMReHiD4absxoYwGU,7415
61
+ ddi_fw/langchain/storage.py,sha256=CDJuUSvUlUgHL8gKb3bo4RHLtI6gY8N2sNyJXg4TBJQ,7432
62
62
  ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
63
63
  ddi_fw/ml/evaluation_helper.py,sha256=o4-w5Xa3t4olLW4ymx_8L-Buhe5wfQEmT2bh4Zz544c,13066
64
64
  ddi_fw/ml/ml_helper.py,sha256=fySjIAFzkeEOvaLJhDwtCOgRhgYQ7H106eqaP16GhDY,4489
@@ -86,12 +86,12 @@ ddi_fw/test/torch_cuda_test.py,sha256=R-4VGVErl_Ufk54DoZbgL_YXWoCYFyanIVWd6P39IE
86
86
  ddi_fw/test/type_guarding_test.py,sha256=KxjyBxohDu7lwpejalCj-REjtJ-k1S1wQbOB6TGY0O8,766
87
87
  ddi_fw/utils/__init__.py,sha256=77563ikqAtdzjjgRlLp5OAsJBbpLA1Cao8iecGaVUXQ,354
88
88
  ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
89
- ddi_fw/utils/kaggle.py,sha256=FjWR1ncOEif6XCCzDYpErLDz_9fxAQub0L7X4aVPw24,2266
89
+ ddi_fw/utils/kaggle.py,sha256=wKRJ18KpQ6P-CubpZklEgsDtyFpR9RUL1_HyyF6ttEE,2425
90
90
  ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5I,1056
91
91
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
92
92
  ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
93
93
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
94
- ddi_fw-0.0.100.dist-info/METADATA,sha256=TyW7tsHIuFEoXe8d2tpZ02iCE3HEGqBAKnwKIrpPmgs,1967
95
- ddi_fw-0.0.100.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
96
- ddi_fw-0.0.100.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
97
- ddi_fw-0.0.100.dist-info/RECORD,,
94
+ ddi_fw-0.0.102.dist-info/METADATA,sha256=f8-pfgayNvtlcMCO9E-e4Mza4QEddI417SO60XbdwMo,1967
95
+ ddi_fw-0.0.102.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
96
+ ddi_fw-0.0.102.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
97
+ ddi_fw-0.0.102.dist-info/RECORD,,