datachain 0.18.10__py3-none-any.whl → 0.18.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -109,12 +109,14 @@ class SQLiteDatabaseEngine(DatabaseEngine):
109
109
  metadata: "MetaData",
110
110
  db: sqlite3.Connection,
111
111
  db_file: Optional[str] = None,
112
+ max_variable_number: Optional[int] = 999,
112
113
  ):
113
114
  self.engine = engine
114
115
  self.metadata = metadata
115
116
  self.db = db
116
117
  self.db_file = db_file
117
118
  self.is_closed = False
119
+ self.max_variable_number = max_variable_number
118
120
 
119
121
  @classmethod
120
122
  def from_db_file(cls, db_file: Optional[str] = None) -> "SQLiteDatabaseEngine":
@@ -123,7 +125,7 @@ class SQLiteDatabaseEngine(DatabaseEngine):
123
125
  @staticmethod
124
126
  def _connect(
125
127
  db_file: Optional[str] = None,
126
- ) -> tuple["Engine", "MetaData", sqlite3.Connection, str]:
128
+ ) -> tuple["Engine", "MetaData", sqlite3.Connection, str, int]:
127
129
  try:
128
130
  if db_file == ":memory:":
129
131
  # Enable multithreaded usage of the same in-memory db
@@ -150,6 +152,13 @@ class SQLiteDatabaseEngine(DatabaseEngine):
150
152
  db.execute("PRAGMA journal_mode = WAL")
151
153
  db.execute("PRAGMA synchronous = NORMAL")
152
154
  db.execute("PRAGMA case_sensitive_like = ON")
155
+
156
+ max_variable_number = 999 # minimum in old SQLite versions
157
+ for row in db.execute("PRAGMA compile_options;").fetchall():
158
+ option = row[0]
159
+ if option.startswith("MAX_VARIABLE_NUMBER="):
160
+ max_variable_number = int(option.split("=")[1])
161
+
153
162
  if os.environ.get("DEBUG_SHOW_SQL_QUERIES"):
154
163
  import sys
155
164
 
@@ -157,7 +166,7 @@ class SQLiteDatabaseEngine(DatabaseEngine):
157
166
 
158
167
  load_usearch_extension(db)
159
168
 
160
- return engine, MetaData(), db, db_file
169
+ return engine, MetaData(), db, db_file, max_variable_number
161
170
  except RuntimeError:
162
171
  raise DataChainError("Can't connect to SQLite DB") from None
163
172
 
@@ -180,11 +189,14 @@ class SQLiteDatabaseEngine(DatabaseEngine):
180
189
  def _reconnect(self) -> None:
181
190
  if not self.is_closed:
182
191
  raise RuntimeError("Cannot reconnect on still-open DB!")
183
- engine, metadata, db, db_file = self._connect(db_file=self.db_file)
192
+ engine, metadata, db, db_file, max_variable_number = self._connect(
193
+ db_file=self.db_file
194
+ )
184
195
  self.engine = engine
185
196
  self.metadata = metadata
186
197
  self.db = db
187
198
  self.db_file = db_file
199
+ self.max_variable_number = max_variable_number
188
200
  self.is_closed = False
189
201
 
190
202
  def get_table(self, name: str) -> Table:
@@ -231,13 +243,27 @@ class SQLiteDatabaseEngine(DatabaseEngine):
231
243
  return self.db.execute(sql, parameters)
232
244
 
233
245
  def insert_dataframe(self, table_name: str, df) -> int:
246
+ # Dynamically calculates chunksize by dividing max variable limit in a
247
+ # single SQL insert with number of columns in dataframe.
248
+ # This way we avoid error: sqlite3.OperationalError: too many SQL variables,
249
+ num_columns = df.shape[1]
250
+ if num_columns == 0:
251
+ num_columns = 1
252
+
253
+ if self.max_variable_number < num_columns:
254
+ raise RuntimeError(
255
+ "Number of columns exceeds DB maximum variables when inserting data"
256
+ )
257
+
258
+ chunksize = self.max_variable_number // num_columns
259
+
234
260
  return df.to_sql(
235
261
  table_name,
236
262
  self.db,
237
263
  if_exists="append",
238
264
  index=False,
239
265
  method="multi",
240
- chunksize=1000,
266
+ chunksize=chunksize,
241
267
  )
242
268
 
243
269
  def cursor(self, factory=None):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.18.10
3
+ Version: 0.18.11
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -50,7 +50,7 @@ datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw
50
50
  datachain/data_storage/metastore.py,sha256=1PaRTQbL7kjcU1BVjiLjXJLrrLzQtUvpqLmm0pwc1rU,39882
51
51
  datachain/data_storage/schema.py,sha256=asZYz1cg_WKfe2Q-k5W51E2z2CzHU5B4QEDZDMFr8yo,9346
52
52
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
53
- datachain/data_storage/sqlite.py,sha256=bwZAB_NUMT2WMv5tPQnnLFA0P-PiQtxzSaQ1q6xDxOU,24590
53
+ datachain/data_storage/sqlite.py,sha256=BB8x7jtBmHK9lwn2zTo4HgfTKWGF43JxOsGr38J8YV8,25698
54
54
  datachain/data_storage/warehouse.py,sha256=imPm4R2V7TkqgGNSO2FGnKu03axU9UVLMfdUPfpwgHE,31747
55
55
  datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
56
56
  datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -153,9 +153,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
153
153
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
154
154
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
155
155
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
156
- datachain-0.18.10.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
- datachain-0.18.10.dist-info/METADATA,sha256=Vjkb16V4J8lNJphVuqD2DZ_V_7BLIf8YPRlvJNtsLaM,11320
158
- datachain-0.18.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
- datachain-0.18.10.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
- datachain-0.18.10.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
- datachain-0.18.10.dist-info/RECORD,,
156
+ datachain-0.18.11.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
+ datachain-0.18.11.dist-info/METADATA,sha256=TgOokr9DxfY4A1mq7-5APy8DTHUqFEf2FslYxASH1IA,11320
158
+ datachain-0.18.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
+ datachain-0.18.11.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
+ datachain-0.18.11.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
+ datachain-0.18.11.dist-info/RECORD,,