datachain 0.20.4__py3-none-any.whl → 0.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (47) hide show
  1. datachain/__init__.py +0 -2
  2. datachain/cache.py +2 -2
  3. datachain/catalog/catalog.py +65 -180
  4. datachain/cli/__init__.py +11 -2
  5. datachain/cli/commands/datasets.py +28 -43
  6. datachain/cli/commands/ls.py +2 -2
  7. datachain/cli/parser/__init__.py +35 -1
  8. datachain/client/fsspec.py +3 -5
  9. datachain/client/hf.py +0 -10
  10. datachain/client/local.py +4 -4
  11. datachain/data_storage/metastore.py +37 -405
  12. datachain/data_storage/sqlite.py +7 -136
  13. datachain/data_storage/warehouse.py +7 -26
  14. datachain/dataset.py +12 -126
  15. datachain/delta.py +7 -11
  16. datachain/error.py +0 -36
  17. datachain/func/func.py +1 -1
  18. datachain/lib/arrow.py +3 -3
  19. datachain/lib/dataset_info.py +0 -4
  20. datachain/lib/dc/datachain.py +92 -260
  21. datachain/lib/dc/datasets.py +50 -104
  22. datachain/lib/dc/listings.py +3 -3
  23. datachain/lib/dc/records.py +0 -1
  24. datachain/lib/dc/storage.py +40 -38
  25. datachain/lib/file.py +23 -77
  26. datachain/lib/listing.py +1 -3
  27. datachain/lib/meta_formats.py +1 -1
  28. datachain/lib/pytorch.py +1 -1
  29. datachain/lib/settings.py +0 -10
  30. datachain/lib/tar.py +2 -1
  31. datachain/lib/udf_signature.py +1 -1
  32. datachain/lib/webdataset.py +20 -30
  33. datachain/listing.py +1 -3
  34. datachain/query/dataset.py +46 -71
  35. datachain/query/session.py +1 -1
  36. datachain/remote/studio.py +26 -61
  37. datachain/studio.py +20 -27
  38. {datachain-0.20.4.dist-info → datachain-0.21.1.dist-info}/METADATA +2 -2
  39. {datachain-0.20.4.dist-info → datachain-0.21.1.dist-info}/RECORD +43 -47
  40. datachain/lib/namespaces.py +0 -71
  41. datachain/lib/projects.py +0 -86
  42. datachain/namespace.py +0 -65
  43. datachain/project.py +0 -78
  44. {datachain-0.20.4.dist-info → datachain-0.21.1.dist-info}/WHEEL +0 -0
  45. {datachain-0.20.4.dist-info → datachain-0.21.1.dist-info}/entry_points.txt +0 -0
  46. {datachain-0.20.4.dist-info → datachain-0.21.1.dist-info}/licenses/LICENSE +0 -0
  47. {datachain-0.20.4.dist-info → datachain-0.21.1.dist-info}/top_level.txt +0 -0
datachain/studio.py CHANGED
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Optional
6
6
  import tabulate
7
7
 
8
8
  from datachain.config import Config, ConfigLevel
9
- from datachain.dataset import QUERY_DATASET_PREFIX, parse_dataset_name
9
+ from datachain.dataset import QUERY_DATASET_PREFIX
10
10
  from datachain.error import DataChainError
11
11
  from datachain.remote.studio import StudioClient
12
12
  from datachain.utils import STUDIO_URL
@@ -102,13 +102,11 @@ def set_team(args: "Namespace"):
102
102
  def login(args: "Namespace"):
103
103
  from dvc_studio_client.auth import StudioAuthError, get_access_token
104
104
 
105
- from datachain.remote.studio import get_studio_env_variable
106
-
107
105
  config = Config().read().get("studio", {})
108
106
  name = args.name
109
107
  hostname = (
110
108
  args.hostname
111
- or get_studio_env_variable("URL")
109
+ or os.environ.get("DVC_STUDIO_URL")
112
110
  or config.get("url")
113
111
  or STUDIO_URL
114
112
  )
@@ -167,11 +165,6 @@ def token():
167
165
 
168
166
 
169
167
  def list_datasets(team: Optional[str] = None, name: Optional[str] = None):
170
- def ds_full_name(ds: dict) -> str:
171
- return (
172
- f"{ds['project']['namespace']['name']}.{ds['project']['name']}.{ds['name']}"
173
- )
174
-
175
168
  if name:
176
169
  yield from list_dataset_versions(team, name)
177
170
  return
@@ -188,22 +181,18 @@ def list_datasets(team: Optional[str] = None, name: Optional[str] = None):
188
181
 
189
182
  for d in response.data:
190
183
  name = d.get("name")
191
- full_name = ds_full_name(d)
192
184
  if name and name.startswith(QUERY_DATASET_PREFIX):
193
185
  continue
194
186
 
195
187
  for v in d.get("versions", []):
196
188
  version = v.get("version")
197
- yield (full_name, version)
189
+ yield (name, version)
198
190
 
199
191
 
200
192
  def list_dataset_versions(team: Optional[str] = None, name: str = ""):
201
193
  client = StudioClient(team=team)
202
194
 
203
- namespace_name, project_name, name = parse_dataset_name(name)
204
- if not namespace_name or not project_name:
205
- raise DataChainError(f"Missing namespace or project form dataset name {name}")
206
- response = client.dataset_info(namespace_name, project_name, name)
195
+ response = client.dataset_info(name)
207
196
 
208
197
  if not response.ok:
209
198
  raise DataChainError(response.message)
@@ -219,16 +208,12 @@ def list_dataset_versions(team: Optional[str] = None, name: str = ""):
219
208
  def edit_studio_dataset(
220
209
  team_name: Optional[str],
221
210
  name: str,
222
- namespace: str,
223
- project: str,
224
211
  new_name: Optional[str] = None,
225
212
  description: Optional[str] = None,
226
213
  attrs: Optional[list[str]] = None,
227
214
  ):
228
215
  client = StudioClient(team=team_name)
229
- response = client.edit_dataset(
230
- name, namespace, project, new_name, description, attrs
231
- )
216
+ response = client.edit_dataset(name, new_name, description, attrs)
232
217
  if not response.ok:
233
218
  raise DataChainError(response.message)
234
219
 
@@ -238,13 +223,11 @@ def edit_studio_dataset(
238
223
  def remove_studio_dataset(
239
224
  team_name: Optional[str],
240
225
  name: str,
241
- namespace: str,
242
- project: str,
243
226
  version: Optional[str] = None,
244
227
  force: Optional[bool] = False,
245
228
  ):
246
229
  client = StudioClient(team=team_name)
247
- response = client.rm_dataset(name, namespace, project, version, force)
230
+ response = client.rm_dataset(name, version, force)
248
231
  if not response.ok:
249
232
  raise DataChainError(response.message)
250
233
 
@@ -265,14 +248,17 @@ def save_config(hostname, token, level=ConfigLevel.GLOBAL):
265
248
  def show_logs_from_client(client, job_id):
266
249
  # Sync usage
267
250
  async def _run():
251
+ latest_status = None
268
252
  async for message in client.tail_job_logs(job_id):
269
253
  if "logs" in message:
270
254
  for log in message["logs"]:
271
255
  print(log["message"], end="")
272
256
  elif "job" in message:
273
- print(f"\n>>>> Job is now in {message['job']['status']} status.")
257
+ latest_status = message["job"]["status"]
258
+ print(f"\n>>>> Job is now in {latest_status} status.")
259
+ return latest_status
274
260
 
275
- asyncio.run(_run())
261
+ latest_status = asyncio.run(_run())
276
262
 
277
263
  response = client.dataset_job_versions(job_id)
278
264
  if not response.ok:
@@ -287,6 +273,12 @@ def show_logs_from_client(client, job_id):
287
273
  else:
288
274
  print("\n\nNo dataset versions created during the job.")
289
275
 
276
+ exit_code_by_status = {
277
+ "FAILED": 1,
278
+ "CANCELLED": 2,
279
+ }
280
+ return exit_code_by_status.get(latest_status.upper(), 0) if latest_status else 0
281
+
290
282
 
291
283
  def create_job(
292
284
  query_file: str,
@@ -343,7 +335,7 @@ def create_job(
343
335
  print("Open the job in Studio at", response.data.get("job", {}).get("url"))
344
336
  print("=" * 40)
345
337
 
346
- show_logs_from_client(client, job_id)
338
+ return show_logs_from_client(client, job_id)
347
339
 
348
340
 
349
341
  def upload_files(client: StudioClient, files: list[str]) -> list[str]:
@@ -413,7 +405,7 @@ def show_job_logs(job_id: str, team_name: Optional[str]):
413
405
  )
414
406
 
415
407
  client = StudioClient(team=team_name)
416
- show_logs_from_client(client, job_id)
408
+ return show_logs_from_client(client, job_id)
417
409
 
418
410
 
419
411
  def list_clusters(team_name: Optional[str]):
@@ -430,6 +422,7 @@ def list_clusters(team_name: Optional[str]):
430
422
  rows = [
431
423
  {
432
424
  "ID": cluster.get("id"),
425
+ "Name": cluster.get("name"),
433
426
  "Status": cluster.get("status"),
434
427
  "Cloud Provider": cluster.get("cloud_provider"),
435
428
  "Cloud Credentials": cluster.get("cloud_credentials"),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.20.4
3
+ Version: 0.21.1
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -94,7 +94,7 @@ Requires-Dist: scipy; extra == "tests"
94
94
  Requires-Dist: ultralytics; extra == "tests"
95
95
  Provides-Extra: dev
96
96
  Requires-Dist: datachain[docs,tests]; extra == "dev"
97
- Requires-Dist: mypy==1.16.1; extra == "dev"
97
+ Requires-Dist: mypy==1.16.0; extra == "dev"
98
98
  Requires-Dist: types-python-dateutil; extra == "dev"
99
99
  Requires-Dist: types-pytz; extra == "dev"
100
100
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -1,59 +1,57 @@
1
- datachain/__init__.py,sha256=gGeaUZXIGQIMCllVXCyDinLfW6oIn33vlK1bXfCAJjI,1578
1
+ datachain/__init__.py,sha256=Dx_Dw6AuvC_CZtXxfRv0Z-ND6ieC4Cz-tZkMW-Rvmz4,1496
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
4
- datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
4
+ datachain/cache.py,sha256=yQblPhOh_Mq74Ma7xT1CL1idLJ0HgrQxpGVYvRy_9Eg,3623
5
5
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
6
- datachain/dataset.py,sha256=--7UI8lZ2lVhk2mNCsHACGigQe96-jBUcbnDMebj-cE,24089
7
- datachain/delta.py,sha256=4RqLLc9dJLF8x9GG9IDgi86DwuPerZQ4HAUnNBeACw8,8446
8
- datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
6
+ datachain/dataset.py,sha256=XUZ-kSBL1y6juFqlSWXXbattGS1E53lXpyhc0Ip1_AA,20527
7
+ datachain/delta.py,sha256=fP1Yy_MfdnTZmIOe243SBiDWTzd6MqLw0tQxvZNxLcs,8384
8
+ datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
9
9
  datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
10
- datachain/listing.py,sha256=T4bCgdCRuFW7bsPUG2PSl5om2nfJL6fzB84m7mCO8cA,7136
11
- datachain/namespace.py,sha256=r7YwpXFc7LdMS2jhyLMTrAHffALFG2bAIiYUSuZa5Rc,1786
10
+ datachain/listing.py,sha256=JtExYIfKMFhEIIcSSWBmaxWpoS3ben7kb692cHHm4Lo,7079
12
11
  datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
13
12
  datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
14
13
  datachain/nodes_thread_pool.py,sha256=mdo0s-VybuSZkRUARcUO4Tjh8KFfZr9foHqmupx2SmM,3989
15
14
  datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
16
- datachain/project.py,sha256=kX5urIAnkHcZJ8m_IIzrUtHEytMBeiceVTrqgCXc_4E,2275
17
15
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
16
  datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
19
17
  datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
20
- datachain/studio.py,sha256=NMJYuWyJNLm6HS4GjcBBj7_CKjymsSBrjrLeXuEiD1M,12997
18
+ datachain/studio.py,sha256=u74vy3iymPFSXU9ePi7KNOSwv-vQYUUS7BuEoC8cLfs,12697
21
19
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
22
20
  datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
23
21
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
24
- datachain/catalog/catalog.py,sha256=43Yp1xQnwrozl2_VhVWQDFHxYPunXP95v7n1PoGc6mw,63546
22
+ datachain/catalog/catalog.py,sha256=J1MKOuoMSl5B0_XYGF5EjDPm7KCvOvllz8PXxt316Og,59352
25
23
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
26
24
  datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
27
- datachain/cli/__init__.py,sha256=kJJf_LScBNMOhvd1n3EEZrJHiN-SkJED13xvNTWEK1A,8144
25
+ datachain/cli/__init__.py,sha256=AQqMMC4zwBi06XfyDg8mIkCRjL7OtyH7W-9Lq6GyQnc,8421
28
26
  datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
29
27
  datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
30
- datachain/cli/commands/datasets.py,sha256=Bva9gTi1HMvvCQPFUPxLYrHQduDlJDWV8EN6IcJcC3Y,6949
28
+ datachain/cli/commands/datasets.py,sha256=77QBkn_Enok0vzkHE0rqCbM9YQuXK1oQNdfCCSKoFKE,5793
31
29
  datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
32
30
  datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
33
- datachain/cli/commands/ls.py,sha256=CBmk838Q-EQp04lE2Qdnpsc1GXAkC4-I-b-a_828n1E,5272
31
+ datachain/cli/commands/ls.py,sha256=dSD2_MHng4t9HRFJZWMOCjPL4XU3qaBV3piNl8UXP08,5275
34
32
  datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
35
33
  datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
36
34
  datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
37
- datachain/cli/parser/__init__.py,sha256=NPB6ssP4CCt7G1SWZ_8oNQEH2C1lktWgkyHYXDQJZNc,15073
35
+ datachain/cli/parser/__init__.py,sha256=sjCIcosAtZqa0m50GMQHqmCkZSYxKyZNwQ29XwRQlP0,15913
38
36
  datachain/cli/parser/job.py,sha256=_wqOOxGRXG_-xuQ35FaLUOwjw6w8HviWvoEpZZ7VBzI,5289
39
37
  datachain/cli/parser/studio.py,sha256=Bo__LKM7qhJGgkyX8M_bCvgZ2Gvqq6r_X4t1NdtaBIY,3881
40
38
  datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
41
39
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
42
40
  datachain/client/azure.py,sha256=7yyAgANHfu9Kfh187MKNTT1guvu9Q-WYsi4vYoY3aew,3270
43
41
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
44
- datachain/client/fsspec.py,sha256=kb_myMWcgGFClY5Rsv6fvHIRblg41dfH5knHJuDbW6w,14015
42
+ datachain/client/fsspec.py,sha256=huPHNDZRGz_rSN7XnS9hKmRoS2fsSz_y2-cxUSlvsOA,13938
45
43
  datachain/client/gcs.py,sha256=8hcFhEHp8qGRsJoyfCoawfuwb1Et-MSkyQoM9AnNuXI,5204
46
- datachain/client/hf.py,sha256=n5xJZdvNLS-SqokxuBCIPfGbhIeC_XfLm_BNYtEVvg4,2677
47
- datachain/client/local.py,sha256=0J52Wzvw25hSucVlzBvLuMRAZwrAHZAYDvD1mNBqf4c,4607
44
+ datachain/client/hf.py,sha256=mRBqHeBT758TJicU-Fn2L3l5AbHWwMzycWwttNUACKk,2180
45
+ datachain/client/local.py,sha256=cGoCYflribzexiOe-Y1qbaE2fJRh-_EgQrfCSa0yK_E,4568
48
46
  datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
49
47
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
50
48
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
51
49
  datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw,402
52
- datachain/data_storage/metastore.py,sha256=Rvkp7HzOtrRFxQVVlCJSgsdIKG4rw_wvU5TaBr5kEV4,51621
50
+ datachain/data_storage/metastore.py,sha256=1PaRTQbL7kjcU1BVjiLjXJLrrLzQtUvpqLmm0pwc1rU,39882
53
51
  datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
54
52
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
55
- datachain/data_storage/sqlite.py,sha256=bGb4_kEFvnGf3ZWekiv8z3VMZBzQyO0bSaNB5RrpUUs,29991
56
- datachain/data_storage/warehouse.py,sha256=_7btARw-kd-Nx19S0qW6JqdF3VYyypQXFzsXq68SWKI,32327
53
+ datachain/data_storage/sqlite.py,sha256=BB8x7jtBmHK9lwn2zTo4HgfTKWGF43JxOsGr38J8YV8,25698
54
+ datachain/data_storage/warehouse.py,sha256=imPm4R2V7TkqgGNSO2FGnKu03axU9UVLMfdUPfpwgHE,31747
57
55
  datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
58
56
  datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
57
  datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
@@ -63,36 +61,34 @@ datachain/func/aggregate.py,sha256=fmVEKf3MUR29dEgllGdtl6nG7Lwz-SiyA5X1EyRRNUk,1
63
61
  datachain/func/array.py,sha256=fz5NUIPkp_KZ7tadCqJQSSJwWMYXEfYn60QkG2epC3k,13627
64
62
  datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
65
63
  datachain/func/conditional.py,sha256=bzIZRSpVpe-lrHoWPTCA7bzZ-AHtR44BVM82hqD1pY0,9188
66
- datachain/func/func.py,sha256=fpslnn4edr0dH3mD8BSTndRFJiiVZvbJoBJV6HkHMqw,17400
64
+ datachain/func/func.py,sha256=ARU6ifniVWR5a3vXynNDD42AATOBnuMIq66fQnDyTJc,17383
67
65
  datachain/func/numeric.py,sha256=J6FgzuIAcS6B02Cm1qPnJdB6ut21jyBDVXSBrkZNZaQ,6978
68
66
  datachain/func/path.py,sha256=9Jas35QhEtRai4l54hMqVvuJsqxHvOx88oo4vym1H_I,4077
69
67
  datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
70
68
  datachain/func/string.py,sha256=X9u4ip97U63RCaKRhMddoze7HgPiY3LbPRn9G06UWWo,7311
71
69
  datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
72
70
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
- datachain/lib/arrow.py,sha256=PyLXiscZ7sVEo65CAhYXmgHh1OLSH2lvbM5dAYhE8x4,10348
71
+ datachain/lib/arrow.py,sha256=2IuNZ6tRFsxVNhWElqr0ptz28geSDzlDHUtzD4qeDNM,10339
74
72
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
75
73
  datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
76
- datachain/lib/dataset_info.py,sha256=7w-DoKOyIVoOtWGCgciMLcP5CiAWJB3rVI-vUDF80k0,3311
77
- datachain/lib/file.py,sha256=gTzJXaGIyFOrw_B4yiOEs7U23n4oAQuWDI2v9KWwp2o,33889
74
+ datachain/lib/dataset_info.py,sha256=d-jz6zeDU5DEgYtyeSF5nK0MU-40FV5km_iOCh4pXzo,3179
75
+ datachain/lib/file.py,sha256=PuTa6CEG9CaJXPhxrZFY-R9-DS7ynB9l7Y0bUbd_Qwg,31952
78
76
  datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
79
77
  datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
80
- datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
78
+ datachain/lib/listing.py,sha256=5_GoATtIwCtd1JMqlorPB_vQDxndOQZpiWjNOG3NMw4,7007
81
79
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
82
- datachain/lib/meta_formats.py,sha256=zdyg6XLk3QIsSk3I7s0Ez5kaCJSlE3uq7JiGxf7UwtU,6348
80
+ datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A,6349
83
81
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
84
- datachain/lib/namespaces.py,sha256=it52UbbwB8dzhesO2pMs_nThXiPQ1Ph9sD9I3GQkg5s,2099
85
- datachain/lib/projects.py,sha256=C-HTzTLUbIB735_iBSV6MjWnntV6gaKCEIkMSR1YEQw,2596
86
- datachain/lib/pytorch.py,sha256=oBBd6cxYrcwaFz7IQajKqhGqDdNnwUZWs0wJPRizrjk,7712
87
- datachain/lib/settings.py,sha256=9wi0FoHxRxNiyn99pR28IYsMkoo47jQxeXuObQr2Ar0,2929
82
+ datachain/lib/pytorch.py,sha256=elrmJ4YUDC2LZ9yXM1KwImVBOYIBJf6k0ZR7eSe6Aao,7712
83
+ datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
88
84
  datachain/lib/signal_schema.py,sha256=Zhg8qThFDf9eoNWFH6KGeYB-sIGys7A_ybq2CUBG7Dg,36127
89
- datachain/lib/tar.py,sha256=MLcVjzIgBqRuJacCNpZ6kwSZNq1i2tLyROc8PVprHsA,999
85
+ datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
90
86
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
91
87
  datachain/lib/udf.py,sha256=FWqA476ygdk4MU-0qehYKxvnt8Tekh21Cyf3RgddD1k,16674
92
- datachain/lib/udf_signature.py,sha256=Yz20iJ-WF1pijT3hvcDIKFzgWV9gFxZM73KZRx3NbPk,7560
88
+ datachain/lib/udf_signature.py,sha256=2EtsOPDNSPqcOlYwqbCdy6RF5MldI-7smii8aLy8p7Y,7543
93
89
  datachain/lib/utils.py,sha256=rG2y7NwTqZOuomZZRmrA-Q-ANM_j1cToQYqDJoOeGyU,1480
94
90
  datachain/lib/video.py,sha256=u6fLJWj5G6QqsVkpfHnKGklBNpG3BRRg6v3izngnNcU,6767
95
- datachain/lib/webdataset.py,sha256=CkW8FfGigNx6wo2EEK4KMjhEE8FamRHWGs2HZuH7jDY,7214
91
+ datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6924
96
92
  datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
97
93
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
98
94
  datachain/lib/convert/flatten.py,sha256=IZFiUYbgXSxXhPSG5Cqf5IjnJ4ZDZKXMr4o_yCR1NY4,1505
@@ -103,15 +99,15 @@ datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUO
103
99
  datachain/lib/dc/__init__.py,sha256=HD0NYrdy44u6kkpvgGjJcvGz-UGTHui2azghcT8ZUg0,838
104
100
  datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
105
101
  datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
106
- datachain/lib/dc/datachain.py,sha256=B6z8e33ZAUKbJ-cqQko-VJEtmia2bfUnuqH7BQQVt_A,85998
107
- datachain/lib/dc/datasets.py,sha256=xiVNe7PosuIsyACFhly9qNxGmRQy1J2TQw3AD6uj9UM,12747
102
+ datachain/lib/dc/datachain.py,sha256=cQjq6_OWQ_1JKvIqb8snl6mKfuBbpllPEao5ygVINog,81733
103
+ datachain/lib/dc/datasets.py,sha256=g_bBGCUwAwNJypYSUQvrDDqnaw7nfXpvrEvUVPtWATY,11268
108
104
  datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
109
105
  datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
110
- datachain/lib/dc/listings.py,sha256=eVBUP25W81dv46DLqkv8K0X7N3nxhoZm77gFrByeT_E,4660
106
+ datachain/lib/dc/listings.py,sha256=8kX-eELQGHDuOAtavLRJ2iwXkdJQ2bIAv_Z5mKYDJbI,4667
111
107
  datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
112
108
  datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
113
- datachain/lib/dc/records.py,sha256=AMtfWc7K6mtbW2OiaeIm3SjHTxDGnSgCEQW5u984Qh0,3111
114
- datachain/lib/dc/storage.py,sha256=8xiV3c6k-sG14RGwNJCp0AbV6L0mNDsTVZ-Est-ccnw,7672
109
+ datachain/lib/dc/records.py,sha256=J1I69J2gFIBjRTGr2LG-5qn_rTVzRLcr2y3tVDrmHdg,3068
110
+ datachain/lib/dc/storage.py,sha256=u-QB_0sn1Wwc0-9phi1zT38UDe5uBIc25xbAhKMU2fA,8774
115
111
  datachain/lib/dc/utils.py,sha256=VawOAlJSvAtZbsMg33s5tJe21TRx1Km3QggI1nN6tnw,3984
116
112
  datachain/lib/dc/values.py,sha256=7l1n352xWrEdql2NhBcZ3hj8xyPglWiY4qHjFPjn6iw,1428
117
113
  datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
@@ -125,17 +121,17 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
125
121
  datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
126
122
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
127
123
  datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
128
- datachain/query/dataset.py,sha256=SjFUh77rBTpgBZG4cfMJiJ2DhiCubGVk2cG1RYX4oyA,61571
124
+ datachain/query/dataset.py,sha256=dI51zOU1Drev65f6SPn4mvRdwRXs4SOW5STMm3WYd7A,60601
129
125
  datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
130
126
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
131
127
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
132
128
  datachain/query/queue.py,sha256=v0UeK4ilmdiRoJ5OdjB5qpnHTYDxRP4vhVp5Iw_toaI,3512
133
129
  datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
134
- datachain/query/session.py,sha256=gKblltJAVQAVSTswAgWGDgGbpmFlFzFVkIQojDCjgXM,6809
130
+ datachain/query/session.py,sha256=6_ydvPasurmc5tR11dzFj51DpUAo4NxXP9p4ltoTauc,6792
135
131
  datachain/query/udf.py,sha256=e753bDJzTNjGFQn1WGTvOAWSwjDbrFI1-_DDWkWN2ls,1343
136
132
  datachain/query/utils.py,sha256=HaSDNH_XGvp_NIcXjcB7j4vJRPi4_tbztDWclYelHY4,1208
137
133
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
- datachain/remote/studio.py,sha256=aSAh7CCHrTp7U-642jHFkwY0Fer4YRAHVVpWomab3zY,15110
134
+ datachain/remote/studio.py,sha256=SZFmTTIHH7eRnWEGT-YCHsayeCIaLfh0UiasHIiA2s4,13915
139
135
  datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
140
136
  datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
141
137
  datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
@@ -157,9 +153,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
157
153
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
158
154
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
159
155
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
160
- datachain-0.20.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
161
- datachain-0.20.4.dist-info/METADATA,sha256=qJW1OSk6js8XNxap4sftMjW02AzBrPsrsyA64igleYw,13281
162
- datachain-0.20.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
163
- datachain-0.20.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
164
- datachain-0.20.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
165
- datachain-0.20.4.dist-info/RECORD,,
156
+ datachain-0.21.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
+ datachain-0.21.1.dist-info/METADATA,sha256=AgSKtpuTkewsQ0JUUIt_mI3BdzEksWWfE2BpWQuf4HE,13281
158
+ datachain-0.21.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
+ datachain-0.21.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
+ datachain-0.21.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
+ datachain-0.21.1.dist-info/RECORD,,
@@ -1,71 +0,0 @@
1
- from typing import Optional
2
-
3
- from datachain.error import NamespaceCreateNotAllowedError
4
- from datachain.namespace import Namespace
5
- from datachain.query import Session
6
-
7
-
8
- def create(
9
- name: str, descr: Optional[str] = None, session: Optional[Session] = None
10
- ) -> Namespace:
11
- """
12
- Creates a new namespace.
13
-
14
- Namespaces organize projects, which in turn organize datasets. A default
15
- namespace always exists and is used if none is specified. Multiple namespaces
16
- can be created in Studio, but only the default is available in the CLI.
17
-
18
- Parameters:
19
- name: Name of the new namespace.
20
- descr: Optional description of the namespace.
21
- session: Optional session to use for the operation.
22
-
23
- Example:
24
- ```py
25
- from datachain.lib.namespaces import create as create_namespace
26
- namespace = create_namespace("dev", "Dev namespace")
27
- ```
28
- """
29
- session = Session.get(session)
30
-
31
- if not session.catalog.metastore.namespace_allowed_to_create:
32
- raise NamespaceCreateNotAllowedError("Creating namespace is not allowed")
33
-
34
- Namespace.validate_name(name)
35
-
36
- return session.catalog.metastore.create_namespace(name, descr)
37
-
38
-
39
- def get(name: str, session: Optional[Session] = None) -> Namespace:
40
- """
41
- Gets a namespace by name.
42
- If the namespace is not found, a `NamespaceNotFoundError` is raised.
43
-
44
- Parameters:
45
- name : The name of the namespace.
46
- session : Session to use for getting namespace.
47
-
48
- Example:
49
- ```py
50
- import datachain as dc
51
- namespace = dc.get_namespace("local")
52
- ```
53
- """
54
- session = Session.get(session)
55
- return session.catalog.metastore.get_namespace(name)
56
-
57
-
58
- def ls(session: Optional[Session] = None) -> list[Namespace]:
59
- """
60
- Gets a list of all namespaces.
61
-
62
- Parameters:
63
- session : Session to use for getting namespaces.
64
-
65
- Example:
66
- ```py
67
- from datachain.lib.namespaces import ls as ls_namespaces
68
- namespaces = ls_namespaces()
69
- ```
70
- """
71
- return Session.get(session).catalog.metastore.list_namespaces()
datachain/lib/projects.py DELETED
@@ -1,86 +0,0 @@
1
- from typing import Optional
2
-
3
- from datachain.error import ProjectCreateNotAllowedError
4
- from datachain.project import Project
5
- from datachain.query import Session
6
-
7
-
8
- def create(
9
- namespace: str,
10
- name: str,
11
- descr: Optional[str] = None,
12
- session: Optional[Session] = None,
13
- ) -> Project:
14
- """
15
- Creates a new project under a specified namespace.
16
-
17
- Projects help organize datasets. A default project is always available,
18
- but users can create additional ones (only in Studio, not via CLI).
19
-
20
-
21
- Parameters:
22
- name: Name of the new project.
23
- namespace: Namespace to create the project in. Created if it doesn't exist.
24
- descr: Optional description of the project.
25
- session: Optional session to use for the operation.
26
-
27
- Example:
28
- ```py
29
- import datachain as dc
30
- project = dc.create_project("dev", "my-project", "My personal project")
31
- ```
32
- """
33
- session = Session.get(session)
34
-
35
- if not session.catalog.metastore.project_allowed_to_create:
36
- raise ProjectCreateNotAllowedError("Creating project is not allowed")
37
-
38
- Project.validate_name(name)
39
-
40
- return session.catalog.metastore.create_project(namespace, name, descr)
41
-
42
-
43
- def get(name: str, namespace: str, session: Optional[Session]) -> Project:
44
- """
45
- Gets a project by name in some namespace.
46
- If the project is not found, a `ProjectNotFoundError` is raised.
47
-
48
- Parameters:
49
- name : The name of the project.
50
- namespace : The name of the namespace.
51
- session : Session to use for getting project.
52
-
53
- Example:
54
- ```py
55
- import datachain as dc
56
- from datachain.lib.projects import get as get_project
57
- project = get_project("my-project", "local")
58
- ```
59
- """
60
- return Session.get(session).catalog.metastore.get_project(name, namespace)
61
-
62
-
63
- def ls(
64
- namespace: Optional[str] = None, session: Optional[Session] = None
65
- ) -> list[Project]:
66
- """
67
- Gets a list of projects in a specific namespace or from all namespaces.
68
-
69
- Parameters:
70
- namespace : An optional namespace name.
71
- session : Session to use for getting project.
72
-
73
- Example:
74
- ```py
75
- import datachain as dc
76
- from datachain.lib.projects import ls as ls_projects
77
- local_namespace_projects = ls_projects("local")
78
- all_projects = ls_projects()
79
- ```
80
- """
81
- session = Session.get(session)
82
- namespace_id = None
83
- if namespace:
84
- namespace_id = session.catalog.metastore.get_namespace(namespace).id
85
-
86
- return session.catalog.metastore.list_projects(namespace_id)
datachain/namespace.py DELETED
@@ -1,65 +0,0 @@
1
- import builtins
2
- from dataclasses import dataclass, fields
3
- from datetime import datetime
4
- from typing import Any, Optional, TypeVar
5
-
6
- from datachain.error import InvalidNamespaceNameError
7
-
8
- N = TypeVar("N", bound="Namespace")
9
- NAMESPACE_NAME_RESERVED_CHARS = ["."]
10
-
11
-
12
- @dataclass(frozen=True)
13
- class Namespace:
14
- id: int
15
- uuid: str
16
- name: str
17
- descr: Optional[str]
18
- created_at: datetime
19
-
20
- @staticmethod
21
- def validate_name(name: str) -> None:
22
- """Throws exception if name is invalid, otherwise returns None"""
23
- if not name:
24
- raise InvalidNamespaceNameError("Namespace name cannot be empty")
25
-
26
- for c in NAMESPACE_NAME_RESERVED_CHARS:
27
- if c in name:
28
- raise InvalidNamespaceNameError(
29
- f"Character {c} is reserved and not allowed in namespace name"
30
- )
31
-
32
- if name in [Namespace.default(), Namespace.system()]:
33
- raise InvalidNamespaceNameError(
34
- f"Namespace name {name} is reserved and cannot be used."
35
- )
36
-
37
- @staticmethod
38
- def default() -> str:
39
- """Name of default namespace"""
40
- return "local"
41
-
42
- @staticmethod
43
- def system() -> str:
44
- """Name of the system namespace"""
45
- return "system"
46
-
47
- @property
48
- def is_system(self):
49
- return self.name == Namespace.system()
50
-
51
- @classmethod
52
- def parse(
53
- cls: builtins.type[N],
54
- id: int,
55
- uuid: str,
56
- name: str,
57
- descr: Optional[str],
58
- created_at: datetime,
59
- ) -> "Namespace":
60
- return cls(id, uuid, name, descr, created_at)
61
-
62
- @classmethod
63
- def from_dict(cls, d: dict[str, Any]) -> "Namespace":
64
- kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
65
- return cls(**kwargs)
datachain/project.py DELETED
@@ -1,78 +0,0 @@
1
- import builtins
2
- from dataclasses import dataclass, fields
3
- from datetime import datetime
4
- from typing import Any, Optional, TypeVar
5
-
6
- from datachain.error import InvalidProjectNameError
7
- from datachain.namespace import Namespace
8
-
9
- P = TypeVar("P", bound="Project")
10
- PROJECT_NAME_RESERVED_CHARS = ["."]
11
-
12
-
13
- @dataclass(frozen=True)
14
- class Project:
15
- id: int
16
- uuid: str
17
- name: str
18
- descr: Optional[str]
19
- created_at: datetime
20
- namespace: Namespace
21
-
22
- @staticmethod
23
- def validate_name(name: str) -> None:
24
- """Throws exception if name is invalid, otherwise returns None"""
25
- if not name:
26
- raise InvalidProjectNameError("Project name cannot be empty")
27
-
28
- for c in PROJECT_NAME_RESERVED_CHARS:
29
- if c in name:
30
- raise InvalidProjectNameError(
31
- f"Character {c} is reserved and not allowed in project name."
32
- )
33
-
34
- if name in [Project.default(), Project.listing()]:
35
- raise InvalidProjectNameError(
36
- f"Project name {name} is reserved and cannot be used."
37
- )
38
-
39
- @staticmethod
40
- def default() -> str:
41
- """Name of default project"""
42
- return "local"
43
-
44
- @staticmethod
45
- def listing() -> str:
46
- """Name of listing project where all listing datasets will be saved"""
47
- return "listing"
48
-
49
- @classmethod
50
- def parse(
51
- cls: builtins.type[P],
52
- namespace_id: int,
53
- namespace_uuid: str,
54
- namespace_name: str,
55
- namespace_descr: Optional[str],
56
- namespace_created_at: datetime,
57
- project_id: int,
58
- uuid: str,
59
- name: str,
60
- descr: Optional[str],
61
- created_at: datetime,
62
- project_namespace_id: int,
63
- ) -> "Project":
64
- namespace = Namespace.parse(
65
- namespace_id,
66
- namespace_uuid,
67
- namespace_name,
68
- namespace_descr,
69
- namespace_created_at,
70
- )
71
-
72
- return cls(project_id, uuid, name, descr, created_at, namespace)
73
-
74
- @classmethod
75
- def from_dict(cls, d: dict[str, Any]) -> "Project":
76
- namespace = Namespace.from_dict(d.pop("namespace"))
77
- kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
78
- return cls(**kwargs, namespace=namespace)