datachain 0.18.5__py3-none-any.whl → 0.18.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/client/azure.py CHANGED
@@ -15,7 +15,7 @@ class AzureClient(Client):
15
15
  protocol = "az"
16
16
 
17
17
  def info_to_file(self, v: dict[str, Any], path: str) -> File:
18
- version_id = v.get("version_id")
18
+ version_id = v.get("version_id") if self._is_version_aware() else None
19
19
  return File(
20
20
  source=self.uri,
21
21
  path=path,
@@ -208,7 +208,7 @@ class Client(ABC):
208
208
 
209
209
  async def get_current_etag(self, file: "File") -> str:
210
210
  kwargs = {}
211
- if getattr(self.fs, "version_aware", False):
211
+ if self._is_version_aware():
212
212
  kwargs["version_id"] = file.version
213
213
  info = await self.fs._info(
214
214
  self.get_full_path(file.path, file.version), **kwargs
@@ -326,8 +326,11 @@ class Client(ABC):
326
326
  """
327
327
  return not (key.startswith("/") or key.endswith("/") or "//" in key)
328
328
 
329
+ def _is_version_aware(self) -> bool:
330
+ return getattr(self.fs, "version_aware", False)
331
+
329
332
  async def ls_dir(self, path):
330
- if getattr(self.fs, "version_aware", False):
333
+ if self._is_version_aware():
331
334
  kwargs = {"versions": True}
332
335
  return await self.fs._ls(path, detail=True, **kwargs)
333
336
 
datachain/client/gcs.py CHANGED
@@ -115,7 +115,7 @@ class GCSClient(Client):
115
115
  maxResults=page_size,
116
116
  pageToken=next_page_token,
117
117
  json_out=True,
118
- versions="true",
118
+ versions="true" if self._is_version_aware() else "false",
119
119
  )
120
120
  assert page["kind"] == "storage#objects"
121
121
  await page_queue.put(page.get("items", []))
@@ -134,7 +134,7 @@ class GCSClient(Client):
134
134
  source=self.uri,
135
135
  path=path,
136
136
  etag=v.get("etag", ""),
137
- version=v.get("generation", ""),
137
+ version=v.get("generation", "") if self._is_version_aware() else "",
138
138
  is_latest=not v.get("timeDeleted"),
139
139
  last_modified=self.parse_timestamp(v["updated"]),
140
140
  size=v.get("size", ""),
datachain/client/s3.py CHANGED
@@ -101,7 +101,7 @@ class ClientS3(Client):
101
101
  prefix = start_prefix
102
102
  if prefix:
103
103
  prefix = prefix.lstrip(DELIMITER) + DELIMITER
104
- versions = True
104
+ versions = self._is_version_aware()
105
105
  fs = self.fs
106
106
  await fs.set_session()
107
107
  s3 = await fs.get_s3(self.name)
@@ -139,7 +139,9 @@ class ClientS3(Client):
139
139
  source=self.uri,
140
140
  path=v["Key"],
141
141
  etag=v.get("ETag", "").strip('"'),
142
- version=ClientS3.clean_s3_version(v.get("VersionId", "")),
142
+ version=(
143
+ ClientS3.clean_s3_version(v.get("VersionId", "")) if versions else ""
144
+ ),
143
145
  is_latest=v.get("IsLatest", True),
144
146
  last_modified=v.get("LastModified", ""),
145
147
  size=v["Size"],
@@ -193,7 +195,11 @@ class ClientS3(Client):
193
195
  source=self.uri,
194
196
  path=path,
195
197
  size=v["size"],
196
- version=ClientS3.clean_s3_version(v.get("VersionId", "")),
198
+ version=(
199
+ ClientS3.clean_s3_version(v.get("VersionId", ""))
200
+ if self._is_version_aware()
201
+ else ""
202
+ ),
197
203
  etag=v.get("ETag", "").strip('"'),
198
204
  is_latest=v.get("IsLatest", True),
199
205
  last_modified=v.get("LastModified", ""),
@@ -264,9 +264,10 @@ class UDFDispatcher:
264
264
  # Will be set to True when the input is exhausted
265
265
  input_finished = False
266
266
 
267
- if not self.is_batching:
268
- batch_size = self.input_batch_size(n_workers)
269
- input_rows = batched(flatten(input_rows), batch_size)
267
+ input_rows = batched(
268
+ input_rows if self.is_batching else flatten(input_rows),
269
+ self.input_batch_size(n_workers),
270
+ )
270
271
 
271
272
  # Stop all workers after the input rows have finished processing
272
273
  input_data = chain(input_rows, [STOP_SIGNAL] * n_workers)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.18.5
3
+ Version: 0.18.7
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -37,13 +37,13 @@ datachain/cli/parser/job.py,sha256=acdVYuTsqluRDI_FYhZ1ohjQcVtBj-taUm8y9tGb0_0,4
37
37
  datachain/cli/parser/studio.py,sha256=Y-1OlQGecLVi9QofvWUfSlPd2ISyaESf7QFGZqGsrdw,3609
38
38
  datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
39
39
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
40
- datachain/client/azure.py,sha256=dM9rGHHmi40iT3FOcLP5iNo_1nS2exjmJH9U4PElW5A,3232
40
+ datachain/client/azure.py,sha256=7yyAgANHfu9Kfh187MKNTT1guvu9Q-WYsi4vYoY3aew,3270
41
41
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
42
- datachain/client/fsspec.py,sha256=UJ7PDq1F11gf7OMjfXYqzrS1GHL3FZctOwXI0S_LU74,13852
43
- datachain/client/gcs.py,sha256=ckdP7utmBfSDkj0GOhV_34itw5pN3DOD74hrWNKIcQg,5127
42
+ datachain/client/fsspec.py,sha256=c8oRBUMo31k8bMB_mIA60PDfna4nYTdslzHqmqL2Uvg,13918
43
+ datachain/client/gcs.py,sha256=8hcFhEHp8qGRsJoyfCoawfuwb1Et-MSkyQoM9AnNuXI,5204
44
44
  datachain/client/hf.py,sha256=posnI5WOKOMG1yY_ZiV9Orcd24QsUPKZlOXgJVLxxrM,1558
45
45
  datachain/client/local.py,sha256=cGoCYflribzexiOe-Y1qbaE2fJRh-_EgQrfCSa0yK_E,4568
46
- datachain/client/s3.py,sha256=bhlQGOVi0smKmLkkyO-Y1YR95u-aPim4bdkEj4wyhzU,7343
46
+ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
47
47
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
48
48
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
49
49
  datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw,402
@@ -122,7 +122,7 @@ datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm
122
122
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
123
123
  datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
124
124
  datachain/query/dataset.py,sha256=3c3MAiIl7ZnCii_0dZA-Om73ornNMSKkna32JX3H05E,60587
125
- datachain/query/dispatch.py,sha256=15M3zlTUFKM6D2ijITX4o5QxCkRe2klkODsIDi3aQOg,15544
125
+ datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
126
126
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
127
127
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
128
128
  datachain/query/queue.py,sha256=v0UeK4ilmdiRoJ5OdjB5qpnHTYDxRP4vhVp5Iw_toaI,3512
@@ -153,9 +153,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
153
153
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
154
154
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
155
155
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
156
- datachain-0.18.5.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
- datachain-0.18.5.dist-info/METADATA,sha256=nRjXtvxIMAGDnI5sLMewoVcqsRbn9jGWCZnmu2wzk1I,11319
158
- datachain-0.18.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
- datachain-0.18.5.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
- datachain-0.18.5.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
- datachain-0.18.5.dist-info/RECORD,,
156
+ datachain-0.18.7.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
+ datachain-0.18.7.dist-info/METADATA,sha256=OXGuP0EbV6ZC57NPhtyse2-6OP2pDKbhJkmcDfHp1mU,11319
158
+ datachain-0.18.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
+ datachain-0.18.7.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
+ datachain-0.18.7.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
+ datachain-0.18.7.dist-info/RECORD,,