datachain 0.18.10__py3-none-any.whl → 0.18.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/data_storage/sqlite.py +30 -4
- {datachain-0.18.10.dist-info → datachain-0.18.11.dist-info}/METADATA +1 -1
- {datachain-0.18.10.dist-info → datachain-0.18.11.dist-info}/RECORD +7 -7
- {datachain-0.18.10.dist-info → datachain-0.18.11.dist-info}/WHEEL +0 -0
- {datachain-0.18.10.dist-info → datachain-0.18.11.dist-info}/entry_points.txt +0 -0
- {datachain-0.18.10.dist-info → datachain-0.18.11.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.18.10.dist-info → datachain-0.18.11.dist-info}/top_level.txt +0 -0
datachain/data_storage/sqlite.py
CHANGED
|
@@ -109,12 +109,14 @@ class SQLiteDatabaseEngine(DatabaseEngine):
|
|
|
109
109
|
metadata: "MetaData",
|
|
110
110
|
db: sqlite3.Connection,
|
|
111
111
|
db_file: Optional[str] = None,
|
|
112
|
+
max_variable_number: Optional[int] = 999,
|
|
112
113
|
):
|
|
113
114
|
self.engine = engine
|
|
114
115
|
self.metadata = metadata
|
|
115
116
|
self.db = db
|
|
116
117
|
self.db_file = db_file
|
|
117
118
|
self.is_closed = False
|
|
119
|
+
self.max_variable_number = max_variable_number
|
|
118
120
|
|
|
119
121
|
@classmethod
|
|
120
122
|
def from_db_file(cls, db_file: Optional[str] = None) -> "SQLiteDatabaseEngine":
|
|
@@ -123,7 +125,7 @@ class SQLiteDatabaseEngine(DatabaseEngine):
|
|
|
123
125
|
@staticmethod
|
|
124
126
|
def _connect(
|
|
125
127
|
db_file: Optional[str] = None,
|
|
126
|
-
) -> tuple["Engine", "MetaData", sqlite3.Connection, str]:
|
|
128
|
+
) -> tuple["Engine", "MetaData", sqlite3.Connection, str, int]:
|
|
127
129
|
try:
|
|
128
130
|
if db_file == ":memory:":
|
|
129
131
|
# Enable multithreaded usage of the same in-memory db
|
|
@@ -150,6 +152,13 @@ class SQLiteDatabaseEngine(DatabaseEngine):
|
|
|
150
152
|
db.execute("PRAGMA journal_mode = WAL")
|
|
151
153
|
db.execute("PRAGMA synchronous = NORMAL")
|
|
152
154
|
db.execute("PRAGMA case_sensitive_like = ON")
|
|
155
|
+
|
|
156
|
+
max_variable_number = 999 # minimum in old SQLite versions
|
|
157
|
+
for row in db.execute("PRAGMA compile_options;").fetchall():
|
|
158
|
+
option = row[0]
|
|
159
|
+
if option.startswith("MAX_VARIABLE_NUMBER="):
|
|
160
|
+
max_variable_number = int(option.split("=")[1])
|
|
161
|
+
|
|
153
162
|
if os.environ.get("DEBUG_SHOW_SQL_QUERIES"):
|
|
154
163
|
import sys
|
|
155
164
|
|
|
@@ -157,7 +166,7 @@ class SQLiteDatabaseEngine(DatabaseEngine):
|
|
|
157
166
|
|
|
158
167
|
load_usearch_extension(db)
|
|
159
168
|
|
|
160
|
-
return engine, MetaData(), db, db_file
|
|
169
|
+
return engine, MetaData(), db, db_file, max_variable_number
|
|
161
170
|
except RuntimeError:
|
|
162
171
|
raise DataChainError("Can't connect to SQLite DB") from None
|
|
163
172
|
|
|
@@ -180,11 +189,14 @@ class SQLiteDatabaseEngine(DatabaseEngine):
|
|
|
180
189
|
def _reconnect(self) -> None:
|
|
181
190
|
if not self.is_closed:
|
|
182
191
|
raise RuntimeError("Cannot reconnect on still-open DB!")
|
|
183
|
-
engine, metadata, db, db_file = self._connect(
|
|
192
|
+
engine, metadata, db, db_file, max_variable_number = self._connect(
|
|
193
|
+
db_file=self.db_file
|
|
194
|
+
)
|
|
184
195
|
self.engine = engine
|
|
185
196
|
self.metadata = metadata
|
|
186
197
|
self.db = db
|
|
187
198
|
self.db_file = db_file
|
|
199
|
+
self.max_variable_number = max_variable_number
|
|
188
200
|
self.is_closed = False
|
|
189
201
|
|
|
190
202
|
def get_table(self, name: str) -> Table:
|
|
@@ -231,13 +243,27 @@ class SQLiteDatabaseEngine(DatabaseEngine):
|
|
|
231
243
|
return self.db.execute(sql, parameters)
|
|
232
244
|
|
|
233
245
|
def insert_dataframe(self, table_name: str, df) -> int:
|
|
246
|
+
# Dynamically calculates chunksize by dividing max variable limit in a
|
|
247
|
+
# single SQL insert with number of columns in dataframe.
|
|
248
|
+
# This way we avoid error: sqlite3.OperationalError: too many SQL variables,
|
|
249
|
+
num_columns = df.shape[1]
|
|
250
|
+
if num_columns == 0:
|
|
251
|
+
num_columns = 1
|
|
252
|
+
|
|
253
|
+
if self.max_variable_number < num_columns:
|
|
254
|
+
raise RuntimeError(
|
|
255
|
+
"Number of columns exceeds DB maximum variables when inserting data"
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
chunksize = self.max_variable_number // num_columns
|
|
259
|
+
|
|
234
260
|
return df.to_sql(
|
|
235
261
|
table_name,
|
|
236
262
|
self.db,
|
|
237
263
|
if_exists="append",
|
|
238
264
|
index=False,
|
|
239
265
|
method="multi",
|
|
240
|
-
chunksize=
|
|
266
|
+
chunksize=chunksize,
|
|
241
267
|
)
|
|
242
268
|
|
|
243
269
|
def cursor(self, factory=None):
|
|
@@ -50,7 +50,7 @@ datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw
|
|
|
50
50
|
datachain/data_storage/metastore.py,sha256=1PaRTQbL7kjcU1BVjiLjXJLrrLzQtUvpqLmm0pwc1rU,39882
|
|
51
51
|
datachain/data_storage/schema.py,sha256=asZYz1cg_WKfe2Q-k5W51E2z2CzHU5B4QEDZDMFr8yo,9346
|
|
52
52
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
53
|
-
datachain/data_storage/sqlite.py,sha256=
|
|
53
|
+
datachain/data_storage/sqlite.py,sha256=BB8x7jtBmHK9lwn2zTo4HgfTKWGF43JxOsGr38J8YV8,25698
|
|
54
54
|
datachain/data_storage/warehouse.py,sha256=imPm4R2V7TkqgGNSO2FGnKu03axU9UVLMfdUPfpwgHE,31747
|
|
55
55
|
datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
|
|
56
56
|
datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -153,9 +153,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
153
153
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
154
154
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
155
155
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
156
|
-
datachain-0.18.
|
|
157
|
-
datachain-0.18.
|
|
158
|
-
datachain-0.18.
|
|
159
|
-
datachain-0.18.
|
|
160
|
-
datachain-0.18.
|
|
161
|
-
datachain-0.18.
|
|
156
|
+
datachain-0.18.11.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
157
|
+
datachain-0.18.11.dist-info/METADATA,sha256=TgOokr9DxfY4A1mq7-5APy8DTHUqFEf2FslYxASH1IA,11320
|
|
158
|
+
datachain-0.18.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
159
|
+
datachain-0.18.11.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
160
|
+
datachain-0.18.11.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
161
|
+
datachain-0.18.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|