ddi-fw 0.0.107__py3-none-any.whl → 0.0.108__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ddi_fw/langchain/storage.py +18 -15
- {ddi_fw-0.0.107.dist-info → ddi_fw-0.0.108.dist-info}/METADATA +1 -1
- {ddi_fw-0.0.107.dist-info → ddi_fw-0.0.108.dist-info}/RECORD +5 -5
- {ddi_fw-0.0.107.dist-info → ddi_fw-0.0.108.dist-info}/WHEEL +0 -0
- {ddi_fw-0.0.107.dist-info → ddi_fw-0.0.108.dist-info}/top_level.txt +0 -0
ddi_fw/langchain/storage.py
CHANGED
@@ -25,32 +25,35 @@ def load_configuration(config_file):
|
|
25
25
|
config = json.load(f)
|
26
26
|
return config
|
27
27
|
|
28
|
-
def split_dataframe(df, min_size=512
|
29
|
-
# Ensure the total size of the DataFrame is larger than the desired split size
|
28
|
+
def split_dataframe(df, min_size=512):
|
30
29
|
total_size = len(df)
|
31
|
-
|
32
|
-
# Check if the dataframe is large enough
|
30
|
+
# If the dataframe is smaller than min_size, return the dataframe as a whole
|
33
31
|
if total_size <= min_size:
|
34
|
-
|
32
|
+
return [df]
|
35
33
|
|
36
34
|
# List to store partial DataFrames
|
37
35
|
partial_dfs = []
|
38
|
-
|
39
|
-
# Start splitting the DataFrame
|
40
36
|
start_idx = 0
|
41
37
|
|
42
|
-
|
43
|
-
|
44
|
-
|
38
|
+
# Calculate the minimum number of chunks we need to ensure each chunk has at least min_size
|
39
|
+
num_chunks = total_size // min_size
|
40
|
+
remaining_rows = total_size
|
41
|
+
# Split into chunks
|
42
|
+
for i in range(num_chunks):
|
43
|
+
# If there are fewer rows left than the size of the chunk, adjust the chunk size
|
44
|
+
chunk_size = min_size
|
45
|
+
if (remaining_rows - chunk_size) < min_size:
|
46
|
+
chunk_size = remaining_rows # Last chunk takes all remaining rows
|
45
47
|
|
46
|
-
# Ensure that the chunk size does not exceed the remaining data
|
47
|
-
chunk_size = min(chunk_size, total_size - start_idx)
|
48
|
-
|
49
|
-
# Create the partial DataFrame and append to the list
|
50
48
|
partial_dfs.append(df.iloc[start_idx:start_idx + chunk_size])
|
51
49
|
|
52
|
-
# Update the start index
|
50
|
+
# Update the start index and remaining rows
|
53
51
|
start_idx += chunk_size
|
52
|
+
remaining_rows -= chunk_size
|
53
|
+
|
54
|
+
# If there are any remaining rows left after the loop, they should form the last chunk
|
55
|
+
if remaining_rows > 0:
|
56
|
+
partial_dfs.append(df.iloc[start_idx:start_idx + remaining_rows])
|
54
57
|
|
55
58
|
return partial_dfs
|
56
59
|
|
@@ -58,7 +58,7 @@ ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHr
|
|
58
58
|
ddi_fw/langchain/__init__.py,sha256=zS0CQrakWEP19biSRewFJGcBT8WBZq4899HrEKiMqUY,269
|
59
59
|
ddi_fw/langchain/embeddings.py,sha256=lU64a5AZ62jP8U3hTSwK0kXt7gThbwPACLfJMZ1baPA,7538
|
60
60
|
ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
|
61
|
-
ddi_fw/langchain/storage.py,sha256=
|
61
|
+
ddi_fw/langchain/storage.py,sha256=LHbrN9QJ6-aV6jaxYHCcJ2shgdrgH4Y4vCuFMSxYvrw,9028
|
62
62
|
ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
|
63
63
|
ddi_fw/ml/evaluation_helper.py,sha256=o4-w5Xa3t4olLW4ymx_8L-Buhe5wfQEmT2bh4Zz544c,13066
|
64
64
|
ddi_fw/ml/ml_helper.py,sha256=fySjIAFzkeEOvaLJhDwtCOgRhgYQ7H106eqaP16GhDY,4489
|
@@ -91,7 +91,7 @@ ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5
|
|
91
91
|
ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
|
92
92
|
ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
|
93
93
|
ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
|
94
|
-
ddi_fw-0.0.
|
95
|
-
ddi_fw-0.0.
|
96
|
-
ddi_fw-0.0.
|
97
|
-
ddi_fw-0.0.
|
94
|
+
ddi_fw-0.0.108.dist-info/METADATA,sha256=g_nsmA5N7aVJkBiWJNZAbkdcoCXGxBxnspfpVRg_n-0,1967
|
95
|
+
ddi_fw-0.0.108.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
96
|
+
ddi_fw-0.0.108.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
|
97
|
+
ddi_fw-0.0.108.dist-info/RECORD,,
|
File without changes
|
File without changes
|