ddi-fw 0.0.108__py3-none-any.whl → 0.0.109__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,42 @@ def split_dataframe(df, min_size=512):
57
57
 
58
58
  return partial_dfs
59
59
 
60
+ def split_dataframe_indices(df, min_size=512):
61
+ total_size = len(df)
62
+
63
+ # If the dataframe is smaller than min_size, return the entire range
64
+ if total_size <= min_size:
65
+ return [(0, total_size - 1)]
66
+
67
+ # List to store the start and end indices of each chunk
68
+ chunk_indices = []
69
+ start_idx = 0
70
+
71
+ # Calculate the minimum number of chunks needed to ensure each chunk has at least min_size
72
+ num_chunks = total_size // min_size
73
+ remaining_rows = total_size
74
+
75
+ # Split into chunks
76
+ for i in range(num_chunks):
77
+ chunk_size = min_size
78
+ if (remaining_rows - chunk_size) < min_size:
79
+ chunk_size = remaining_rows # Last chunk takes all remaining rows
80
+
81
+ # Calculate the ending index of the chunk (exclusive, hence chunk_size - 1)
82
+ end_idx = start_idx + chunk_size - 1
83
+ chunk_indices.append((start_idx, end_idx))
84
+
85
+ # Update the start index and remaining rows
86
+ start_idx += chunk_size
87
+ remaining_rows -= chunk_size
88
+
89
+ # If there are any remaining rows after the loop, they should form the last chunk
90
+ if remaining_rows > 0:
91
+ end_idx = start_idx + remaining_rows - 1
92
+ chunk_indices.append((start_idx, end_idx))
93
+
94
+ return chunk_indices
95
+
60
96
  class DataFrameToVectorDB:
61
97
  def __init__(self,
62
98
  collection_name,
@@ -78,8 +114,9 @@ class DataFrameToVectorDB:
78
114
  return docs
79
115
 
80
116
  def __split_list(self, input_list, batch_size):
81
- for i in range(0, len(input_list), batch_size):
82
- yield input_list[i:i + batch_size]
117
+ # for i in range(0, len(input_list), batch_size):
118
+ for s,e in split_dataframe_indices(input_list):
119
+ yield input_list[s:e+1]
83
120
 
84
121
  def store_documents(self, df, columns, page_content_columns):
85
122
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ddi_fw
3
- Version: 0.0.108
3
+ Version: 0.0.109
4
4
  Summary: Do not use :)
5
5
  Author-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
6
6
  Maintainer-email: Kıvanç Bayraktar <bayraktarkivanc@gmail.com>
@@ -58,7 +58,7 @@ ddi_fw/drugbank/event_extractor.py,sha256=6odoZohhK7OdLF-LF0l-5BFq0_NMG_5jrFJbHr
58
58
  ddi_fw/langchain/__init__.py,sha256=zS0CQrakWEP19biSRewFJGcBT8WBZq4899HrEKiMqUY,269
59
59
  ddi_fw/langchain/embeddings.py,sha256=lU64a5AZ62jP8U3hTSwK0kXt7gThbwPACLfJMZ1baPA,7538
60
60
  ddi_fw/langchain/sentence_splitter.py,sha256=h_bYElx4Ud1mwDNJfL7mUwvgadwKX3GKlSzu5L2PXzg,280
61
- ddi_fw/langchain/storage.py,sha256=LHbrN9QJ6-aV6jaxYHCcJ2shgdrgH4Y4vCuFMSxYvrw,9028
61
+ ddi_fw/langchain/storage.py,sha256=02cIFOrPxo2Nav5ufkKLOh8cfVY8QupxiO6rzRjNB7I,10399
62
62
  ddi_fw/ml/__init__.py,sha256=tIxiW0g6q1VsmDYVXR_ovvHQR3SCir8g2bKxx_CrS7s,221
63
63
  ddi_fw/ml/evaluation_helper.py,sha256=o4-w5Xa3t4olLW4ymx_8L-Buhe5wfQEmT2bh4Zz544c,13066
64
64
  ddi_fw/ml/ml_helper.py,sha256=fySjIAFzkeEOvaLJhDwtCOgRhgYQ7H106eqaP16GhDY,4489
@@ -91,7 +91,7 @@ ddi_fw/utils/package_helper.py,sha256=erl8_onmhK-41zQoaED2qyDUV9GQxmT9sdoyRp9_q5
91
91
  ddi_fw/utils/py7zr_helper.py,sha256=gOqaFIyJvTjUM-btO2x9AQ69jZOS8PoKN0wetYIckJw,4747
92
92
  ddi_fw/utils/utils.py,sha256=szwnxMTDRrZoeNRyDuf3aCbtzriwtaRk4mHSH3asLdA,4301
93
93
  ddi_fw/utils/zip_helper.py,sha256=YRZA4tKZVBJwGQM0_WK6L-y5MoqkKoC-nXuuHK6CU9I,5567
94
- ddi_fw-0.0.108.dist-info/METADATA,sha256=g_nsmA5N7aVJkBiWJNZAbkdcoCXGxBxnspfpVRg_n-0,1967
95
- ddi_fw-0.0.108.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
96
- ddi_fw-0.0.108.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
97
- ddi_fw-0.0.108.dist-info/RECORD,,
94
+ ddi_fw-0.0.109.dist-info/METADATA,sha256=0kpJYBb0pe7lXlETKf2vSuK0_dOizijcPX0N4h4aBN8,1967
95
+ ddi_fw-0.0.109.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
96
+ ddi_fw-0.0.109.dist-info/top_level.txt,sha256=PMwHICFZTZtcpzQNPV4UQnfNXYIeLR_Ste-Wfc1h810,7
97
+ ddi_fw-0.0.109.dist-info/RECORD,,