ddi-fw 0.0.100__py3-none-any.whl → 0.0.102__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/langchain/storage.py +2 -2
- ddi_fw/utils/kaggle.py +7 -2
- {ddi_fw-0.0.100.dist-info → ddi_fw-0.0.102.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.100.dist-info → ddi_fw-0.0.102.dist-info}/RECORD +6 -6
- {ddi_fw-0.0.100.dist-info → ddi_fw-0.0.102.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.100.dist-info → ddi_fw-0.0.102.dist-info}/top_level.txt +0 -0
ddi_fw/langchain/storage.py
CHANGED
@@ -122,7 +122,7 @@ def generate_embeddings(df, config_file, new_model_names, collections=None, pers
|
|
122
122
|
# print(f"Configuration for collection {id} not found.")
|
123
123
|
# continue
|
124
124
|
|
125
|
-
embedding_model_type = collection_config['
|
125
|
+
embedding_model_type = collection_config['embedding_model_type']
|
126
126
|
text_splitters_types = collection_config['text_splitters_types']
|
127
127
|
batch_size = collection_config['batch_size']
|
128
128
|
columns = collection_config['columns']
|
@@ -135,7 +135,7 @@ def generate_embeddings(df, config_file, new_model_names, collections=None, pers
|
|
135
135
|
# Assuming the classes for the embeddings and splitters are available
|
136
136
|
try:
|
137
137
|
model = get_import(embedding_model_type)(
|
138
|
-
model_name=name, model_kwargs=
|
138
|
+
model_name=name, model_kwargs=collection_config['model_kwargs'])
|
139
139
|
except:
|
140
140
|
# print(f"Unknown embedding model: {embedding_model_type}")
|
141
141
|
raise Exception(f"Unknown embedding model: {embedding_model_type}")
|
ddi_fw/utils/kaggle.py
CHANGED
@@ -17,13 +17,19 @@ def create_kaggle_dataset(base_path: str, collections: list):
|
|
17
17
|
# Step 1: Loop through each folder in base_path
|
18
18
|
for folder_name in os.listdir(base_path):
|
19
19
|
folder_path = os.path.join(base_path, folder_name)
|
20
|
-
|
20
|
+
metadata_file_path = os.path.join(folder_path, 'dataset-metadata.json')
|
21
|
+
|
21
22
|
# Step 2: Get metadata for the current folder
|
22
23
|
model_info = next((c for c in collections if c['id'] == folder_name), None)
|
23
24
|
if model_info is None:
|
24
25
|
continue # Skip if model info is not found
|
25
26
|
|
26
27
|
title = model_info['kaggle_title']
|
28
|
+
|
29
|
+
if os.path.exists(metadata_file_path):
|
30
|
+
print(f"{title} has dataset-metadata.json file")
|
31
|
+
continue
|
32
|
+
|
27
33
|
id = model_info['kaggle_id'].lower().replace(' ', '-')
|
28
34
|
licenses = model_info['kaggle_licenses']
|
29
35
|
description = model_info['kaggle_description']
|
@@ -41,7 +47,6 @@ def create_kaggle_dataset(base_path: str, collections: list):
|
|
41
47
|
}
|
42
48
|
|
43
49
|
# Step 4: Write the metadata to a JSON file in the folder
|
44
|
-
metadata_file_path = os.path.join(folder_path, 'dataset-metadata.json')
|
45
50
|
with open(metadata_file_path, 'w') as f:
|
46
51
|
json.dump(metadata, f, indent=4)
|
47
52
|
|
@@ -58,7 +58,7 @@ ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHr
|
|
58
58
|
ddi_fw/langchain/__init__.py,sha256=zS0CQrakWEP19biSRewFJGcBT8WBZq4899HrEKiMqUY,269
|
59
59
|
ddi_fw/langchain/embeddings.py,sha256=b9BUG73Ayx3Wy8MQrfsVeZ-qBB41vjVECSp2YhH-CIY,7514
|
60
60
|
ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
|
61
|
-
ddi_fw/langchain/storage.py,sha256
|
61
|
+
ddi_fw/langchain/storage.py,sha256=CDJuUSvUlUgHL8gKb3bo4RHLtI6gY8N2sNyJXg4TBJQ,7432
|
62
62
|
ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
|
63
63
|
ddi_fw/ml/evaluation_helper.py,sha256=o4-w5Xa3t4olLW4ymx_8L-Buhe5wfQEmT2bh4Zz544c,13066
|
64
64
|
ddi_fw/ml/ml_helper.py,sha256=fySjIAFzkeEOvaLJhDwtCOgRhgYQ7H106eqaP16GhDY,4489
|
@@ -86,12 +86,12 @@ ddi_fw/test/torch_cuda_test.py,sha256=R-4VGVErl_Ufk54DoZbgL_YXWoCYFyanIVWd6P39IE
|
|
86
86
|
ddi_fw/test/type_guarding_test.py,sha256=KxjyBxohDu7lwpejalCj-REjtJ-k1S1wQbOB6TGY0O8,766
|
87
87
|
ddi_fw/utils/__init__.py,sha256=77563ikqAtdzjjgRlLp5OAsJBbpLA1Cao8iecGaVUXQ,354
|
88
88
|
ddi_fw/utils/enums.py,sha256=19eJ3fX5eRK_xPvkYcukmug144jXPH4X9zQqtsFBj5A,671
|
89
|
-
ddi_fw/utils/kaggle.py,sha256=
|
89
|
+
ddi_fw/utils/kaggle.py,sha256=wKRJ18KpQ6P-CubpZklEgsDtyFpR9RUL1_HyyF6ttEE,2425
|
90
90
|
ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5I,1056
|
91
91
|
ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
|
92
92
|
ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
|
93
93
|
ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
|
94
|
-
ddi_fw-0.0.
|
95
|
-
ddi_fw-0.0.
|
96
|
-
ddi_fw-0.0.
|
97
|
-
ddi_fw-0.0.
|
94
|
+
ddi_fw-0.0.102.dist-info/METADATA,sha256=f8-pfgayNvtlcMCO9E-e4Mza4QEddI417SO60XbdwMo,1967
|
95
|
+
ddi_fw-0.0.102.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
96
|
+
ddi_fw-0.0.102.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
97
|
+
ddi_fw-0.0.102.dist-info/RECORD,,
|
File without changes
|
File without changes
|