datachain 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (37) hide show
  1. datachain/__init__.py +3 -0
  2. datachain/catalog/catalog.py +180 -65
  3. datachain/cli/__init__.py +0 -7
  4. datachain/cli/commands/datasets.py +43 -28
  5. datachain/cli/parser/__init__.py +1 -35
  6. datachain/cli/parser/job.py +25 -0
  7. datachain/cli/parser/studio.py +11 -4
  8. datachain/data_storage/metastore.py +390 -37
  9. datachain/data_storage/schema.py +23 -1
  10. datachain/data_storage/sqlite.py +139 -7
  11. datachain/data_storage/warehouse.py +26 -7
  12. datachain/dataset.py +125 -12
  13. datachain/delta.py +9 -5
  14. datachain/error.py +36 -0
  15. datachain/lib/dataset_info.py +4 -0
  16. datachain/lib/dc/datachain.py +86 -7
  17. datachain/lib/dc/datasets.py +62 -12
  18. datachain/lib/dc/listings.py +111 -0
  19. datachain/lib/dc/records.py +1 -0
  20. datachain/lib/dc/storage.py +14 -2
  21. datachain/lib/listing.py +3 -1
  22. datachain/lib/namespaces.py +73 -0
  23. datachain/lib/projects.py +86 -0
  24. datachain/lib/settings.py +10 -0
  25. datachain/listing.py +3 -1
  26. datachain/namespace.py +65 -0
  27. datachain/project.py +78 -0
  28. datachain/query/dataset.py +71 -46
  29. datachain/query/session.py +1 -1
  30. datachain/remote/studio.py +67 -26
  31. datachain/studio.py +68 -8
  32. {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/METADATA +2 -2
  33. {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/RECORD +37 -33
  34. {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/WHEEL +0 -0
  35. {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/entry_points.txt +0 -0
  36. {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/licenses/LICENSE +0 -0
  37. {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/top_level.txt +0 -0
@@ -17,6 +17,7 @@ import websockets
17
17
  from requests.exceptions import HTTPError, Timeout
18
18
 
19
19
  from datachain.config import Config
20
+ from datachain.dataset import DatasetRecord
20
21
  from datachain.error import DataChainError
21
22
  from datachain.utils import STUDIO_URL, retry_with_backoff
22
23
 
@@ -30,18 +31,39 @@ DatasetExportSignedUrls = Optional[list[str]]
30
31
  FileUploadData = Optional[dict[str, Any]]
31
32
  JobData = Optional[dict[str, Any]]
32
33
  JobListData = dict[str, Any]
34
+ ClusterListData = dict[str, Any]
33
35
  logger = logging.getLogger("datachain")
34
36
 
35
37
  DATASET_ROWS_CHUNK_SIZE = 8192
36
38
 
37
39
 
40
+ def get_studio_env_variable(name: str) -> Any:
41
+ """
42
+ Get the value of a DataChain Studio environment variable.
43
+ It first checks for the variable prefixed with 'DATACHAIN_STUDIO_',
44
+ then checks for the deprecated 'DVC_STUDIO_' prefix.
45
+ If neither is set, it returns the provided default value.
46
+ """
47
+ if (value := os.environ.get(f"DATACHAIN_STUDIO_{name}")) is not None:
48
+ return value
49
+ if (value := os.environ.get(f"DVC_STUDIO_{name}")) is not None: # deprecated
50
+ logger.warning(
51
+ "Environment variable 'DVC_STUDIO_%s' is deprecated, "
52
+ "use 'DATACHAIN_STUDIO_%s' instead.",
53
+ name,
54
+ name,
55
+ )
56
+ return value
57
+ return None
58
+
59
+
38
60
  def _is_server_error(status_code: int) -> bool:
39
61
  return str(status_code).startswith("5")
40
62
 
41
63
 
42
64
  def is_token_set() -> bool:
43
65
  return (
44
- bool(os.environ.get("DVC_STUDIO_TOKEN"))
66
+ bool(get_studio_env_variable("TOKEN"))
45
67
  or Config().read().get("studio", {}).get("token") is not None
46
68
  )
47
69
 
@@ -77,12 +99,12 @@ class StudioClient:
77
99
 
78
100
  @property
79
101
  def token(self) -> str:
80
- token = os.environ.get("DVC_STUDIO_TOKEN") or self.config.get("token")
102
+ token = get_studio_env_variable("TOKEN") or self.config.get("token")
81
103
 
82
104
  if not token:
83
105
  raise DataChainError(
84
106
  "Studio token is not set. Use `datachain auth login` "
85
- "or environment variable `DVC_STUDIO_TOKEN` to set it."
107
+ "or environment variable `DATACHAIN_STUDIO_TOKEN` to set it."
86
108
  )
87
109
 
88
110
  return token
@@ -90,8 +112,8 @@ class StudioClient:
90
112
  @property
91
113
  def url(self) -> str:
92
114
  return (
93
- os.environ.get("DVC_STUDIO_URL") or self.config.get("url") or STUDIO_URL
94
- ) + "/api"
115
+ get_studio_env_variable("URL") or self.config.get("url") or STUDIO_URL
116
+ ).rstrip("/") + "/api"
95
117
 
96
118
  @property
97
119
  def config(self) -> dict:
@@ -106,13 +128,13 @@ class StudioClient:
106
128
  return self._team
107
129
 
108
130
  def _get_team(self) -> str:
109
- team = os.environ.get("DVC_STUDIO_TEAM") or self.config.get("team")
131
+ team = get_studio_env_variable("TEAM") or self.config.get("team")
110
132
 
111
133
  if not team:
112
134
  raise DataChainError(
113
135
  "Studio team is not set. "
114
136
  "Use `datachain auth team <team_name>` "
115
- "or environment variable `DVC_STUDIO_TEAM` to set it. "
137
+ "or environment variable `DATACHAIN_STUDIO_TEAM` to set it. "
116
138
  "You can also set `studio.team` in the config file."
117
139
  )
118
140
 
@@ -290,13 +312,17 @@ class StudioClient:
290
312
  def edit_dataset(
291
313
  self,
292
314
  name: str,
315
+ namespace: str,
316
+ project: str,
293
317
  new_name: Optional[str] = None,
294
318
  description: Optional[str] = None,
295
319
  attrs: Optional[list[str]] = None,
296
320
  ) -> Response[DatasetInfoData]:
297
321
  body = {
298
322
  "new_name": new_name,
299
- "dataset_name": name,
323
+ "name": name,
324
+ "namespace": namespace,
325
+ "project": project,
300
326
  "description": description,
301
327
  "attrs": attrs,
302
328
  }
@@ -309,44 +335,44 @@ class StudioClient:
309
335
  def rm_dataset(
310
336
  self,
311
337
  name: str,
338
+ namespace: str,
339
+ project: str,
312
340
  version: Optional[str] = None,
313
341
  force: Optional[bool] = False,
314
342
  ) -> Response[DatasetInfoData]:
315
343
  return self._send_request(
316
344
  "datachain/datasets",
317
345
  {
318
- "dataset_name": name,
319
- "dataset_version": version,
346
+ "name": name,
347
+ "namespace": namespace,
348
+ "project": project,
349
+ "version": version,
320
350
  "force": force,
321
351
  },
322
352
  method="DELETE",
323
353
  )
324
354
 
325
- def dataset_info(self, name: str) -> Response[DatasetInfoData]:
355
+ def dataset_info(
356
+ self, namespace: str, project: str, name: str
357
+ ) -> Response[DatasetInfoData]:
326
358
  def _parse_dataset_info(dataset_info):
327
359
  _parse_dates(dataset_info, ["created_at", "finished_at"])
328
360
  for version in dataset_info.get("versions"):
329
361
  _parse_dates(version, ["created_at"])
362
+ _parse_dates(dataset_info.get("project"), ["created_at"])
363
+ _parse_dates(dataset_info.get("project").get("namespace"), ["created_at"])
330
364
 
331
365
  return dataset_info
332
366
 
333
367
  response = self._send_request(
334
- "datachain/datasets/info", {"dataset_name": name}, method="GET"
368
+ "datachain/datasets/info",
369
+ {"namespace": namespace, "project": project, "name": name},
370
+ method="GET",
335
371
  )
336
372
  if response.ok:
337
373
  response.data = _parse_dataset_info(response.data)
338
374
  return response
339
375
 
340
- def dataset_rows_chunk(
341
- self, name: str, version: str, offset: int
342
- ) -> Response[DatasetRowsData]:
343
- req_data = {"dataset_name": name, "dataset_version": version}
344
- return self._send_request_msgpack(
345
- "datachain/datasets/rows",
346
- {**req_data, "offset": offset, "limit": DATASET_ROWS_CHUNK_SIZE},
347
- method="GET",
348
- )
349
-
350
376
  def dataset_job_versions(self, job_id: str) -> Response[DatasetJobVersionsData]:
351
377
  return self._send_request(
352
378
  "datachain/datasets/dataset_job_versions",
@@ -355,20 +381,30 @@ class StudioClient:
355
381
  )
356
382
 
357
383
  def export_dataset_table(
358
- self, name: str, version: str
384
+ self, dataset: DatasetRecord, version: str
359
385
  ) -> Response[DatasetExportSignedUrls]:
360
386
  return self._send_request(
361
387
  "datachain/datasets/export",
362
- {"dataset_name": name, "dataset_version": version},
388
+ {
389
+ "namespace": dataset.project.namespace.name,
390
+ "project": dataset.project.name,
391
+ "name": dataset.name,
392
+ "version": version,
393
+ },
363
394
  method="GET",
364
395
  )
365
396
 
366
397
  def dataset_export_status(
367
- self, name: str, version: str
398
+ self, dataset: DatasetRecord, version: str
368
399
  ) -> Response[DatasetExportStatus]:
369
400
  return self._send_request(
370
401
  "datachain/datasets/export-status",
371
- {"dataset_name": name, "dataset_version": version},
402
+ {
403
+ "namespace": dataset.project.namespace.name,
404
+ "project": dataset.project.name,
405
+ "name": dataset.name,
406
+ "version": version,
407
+ },
372
408
  method="GET",
373
409
  )
374
410
 
@@ -391,6 +427,7 @@ class StudioClient:
391
427
  requirements: Optional[str] = None,
392
428
  repository: Optional[str] = None,
393
429
  priority: Optional[int] = None,
430
+ cluster_id: Optional[int] = None,
394
431
  ) -> Response[JobData]:
395
432
  data = {
396
433
  "query": query,
@@ -403,6 +440,7 @@ class StudioClient:
403
440
  "requirements": requirements,
404
441
  "repository": repository,
405
442
  "priority": priority,
443
+ "compute_cluster_id": cluster_id,
406
444
  }
407
445
  return self._send_request("datachain/job", data)
408
446
 
@@ -423,3 +461,6 @@ class StudioClient:
423
461
  ) -> Response[JobData]:
424
462
  url = f"datachain/job/{job_id}/cancel"
425
463
  return self._send_request(url, data={}, method="POST")
464
+
465
+ def get_clusters(self) -> Response[ClusterListData]:
466
+ return self._send_request("datachain/clusters", {}, method="GET")
datachain/studio.py CHANGED
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Optional
6
6
  import tabulate
7
7
 
8
8
  from datachain.config import Config, ConfigLevel
9
- from datachain.dataset import QUERY_DATASET_PREFIX
9
+ from datachain.dataset import QUERY_DATASET_PREFIX, parse_dataset_name
10
10
  from datachain.error import DataChainError
11
11
  from datachain.remote.studio import StudioClient
12
12
  from datachain.utils import STUDIO_URL
@@ -41,6 +41,7 @@ def process_jobs_args(args: "Namespace"):
41
41
  args.req,
42
42
  args.req_file,
43
43
  args.priority,
44
+ args.cluster_id,
44
45
  )
45
46
 
46
47
  if args.cmd == "cancel":
@@ -51,6 +52,9 @@ def process_jobs_args(args: "Namespace"):
51
52
  if args.cmd == "ls":
52
53
  return list_jobs(args.status, args.team, args.limit)
53
54
 
55
+ if args.cmd == "clusters":
56
+ return list_clusters(args.team)
57
+
54
58
  raise DataChainError(f"Unknown command '{args.cmd}'.")
55
59
 
56
60
 
@@ -68,14 +72,24 @@ def process_auth_cli_args(args: "Namespace"):
68
72
  return logout(args.local)
69
73
  if args.cmd == "token":
70
74
  return token()
71
-
72
75
  if args.cmd == "team":
73
76
  return set_team(args)
74
77
  raise DataChainError(f"Unknown command '{args.cmd}'.")
75
78
 
76
79
 
77
80
  def set_team(args: "Namespace"):
78
- level = ConfigLevel.GLOBAL if args.__dict__.get("global") else ConfigLevel.LOCAL
81
+ if args.team_name is None:
82
+ config = Config().read().get("studio", {})
83
+ team = config.get("team")
84
+ if team:
85
+ print(f"Default team is '{team}'")
86
+ return 0
87
+
88
+ raise DataChainError(
89
+ "No default team set. Use `datachain auth team <team_name>` to set one."
90
+ )
91
+
92
+ level = ConfigLevel.LOCAL if args.local else ConfigLevel.GLOBAL
79
93
  config = Config(level)
80
94
  with config.edit() as conf:
81
95
  studio_conf = conf.get("studio", {})
@@ -88,11 +102,13 @@ def set_team(args: "Namespace"):
88
102
  def login(args: "Namespace"):
89
103
  from dvc_studio_client.auth import StudioAuthError, get_access_token
90
104
 
105
+ from datachain.remote.studio import get_studio_env_variable
106
+
91
107
  config = Config().read().get("studio", {})
92
108
  name = args.name
93
109
  hostname = (
94
110
  args.hostname
95
- or os.environ.get("DVC_STUDIO_URL")
111
+ or get_studio_env_variable("URL")
96
112
  or config.get("url")
97
113
  or STUDIO_URL
98
114
  )
@@ -121,6 +137,7 @@ def login(args: "Namespace"):
121
137
  level = ConfigLevel.LOCAL if args.local else ConfigLevel.GLOBAL
122
138
  config_path = save_config(hostname, access_token, level=level)
123
139
  print(f"Authentication complete. Saved token to {config_path}.")
140
+ print("You can now use 'datachain auth team' to set the default team.")
124
141
  return 0
125
142
 
126
143
 
@@ -150,6 +167,11 @@ def token():
150
167
 
151
168
 
152
169
  def list_datasets(team: Optional[str] = None, name: Optional[str] = None):
170
+ def ds_full_name(ds: dict) -> str:
171
+ return (
172
+ f"{ds['project']['namespace']['name']}.{ds['project']['name']}.{ds['name']}"
173
+ )
174
+
153
175
  if name:
154
176
  yield from list_dataset_versions(team, name)
155
177
  return
@@ -166,18 +188,22 @@ def list_datasets(team: Optional[str] = None, name: Optional[str] = None):
166
188
 
167
189
  for d in response.data:
168
190
  name = d.get("name")
191
+ full_name = ds_full_name(d)
169
192
  if name and name.startswith(QUERY_DATASET_PREFIX):
170
193
  continue
171
194
 
172
195
  for v in d.get("versions", []):
173
196
  version = v.get("version")
174
- yield (name, version)
197
+ yield (full_name, version)
175
198
 
176
199
 
177
200
  def list_dataset_versions(team: Optional[str] = None, name: str = ""):
178
201
  client = StudioClient(team=team)
179
202
 
180
- response = client.dataset_info(name)
203
+ namespace_name, project_name, name = parse_dataset_name(name)
204
+ if not namespace_name or not project_name:
205
+ raise DataChainError(f"Missing namespace or project form dataset name {name}")
206
+ response = client.dataset_info(namespace_name, project_name, name)
181
207
 
182
208
  if not response.ok:
183
209
  raise DataChainError(response.message)
@@ -193,12 +219,16 @@ def list_dataset_versions(team: Optional[str] = None, name: str = ""):
193
219
  def edit_studio_dataset(
194
220
  team_name: Optional[str],
195
221
  name: str,
222
+ namespace: str,
223
+ project: str,
196
224
  new_name: Optional[str] = None,
197
225
  description: Optional[str] = None,
198
226
  attrs: Optional[list[str]] = None,
199
227
  ):
200
228
  client = StudioClient(team=team_name)
201
- response = client.edit_dataset(name, new_name, description, attrs)
229
+ response = client.edit_dataset(
230
+ name, namespace, project, new_name, description, attrs
231
+ )
202
232
  if not response.ok:
203
233
  raise DataChainError(response.message)
204
234
 
@@ -208,11 +238,13 @@ def edit_studio_dataset(
208
238
  def remove_studio_dataset(
209
239
  team_name: Optional[str],
210
240
  name: str,
241
+ namespace: str,
242
+ project: str,
211
243
  version: Optional[str] = None,
212
244
  force: Optional[bool] = False,
213
245
  ):
214
246
  client = StudioClient(team=team_name)
215
- response = client.rm_dataset(name, version, force)
247
+ response = client.rm_dataset(name, namespace, project, version, force)
216
248
  if not response.ok:
217
249
  raise DataChainError(response.message)
218
250
 
@@ -268,6 +300,7 @@ def create_job(
268
300
  req: Optional[list[str]] = None,
269
301
  req_file: Optional[str] = None,
270
302
  priority: Optional[int] = None,
303
+ cluster_id: Optional[int] = None,
271
304
  ):
272
305
  query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
273
306
  with open(query_file) as f:
@@ -297,6 +330,7 @@ def create_job(
297
330
  repository=repository,
298
331
  requirements=requirements,
299
332
  priority=priority,
333
+ cluster_id=cluster_id,
300
334
  )
301
335
  if not response.ok:
302
336
  raise DataChainError(response.message)
@@ -380,3 +414,29 @@ def show_job_logs(job_id: str, team_name: Optional[str]):
380
414
 
381
415
  client = StudioClient(team=team_name)
382
416
  show_logs_from_client(client, job_id)
417
+
418
+
419
+ def list_clusters(team_name: Optional[str]):
420
+ client = StudioClient(team=team_name)
421
+ response = client.get_clusters()
422
+ if not response.ok:
423
+ raise DataChainError(response.message)
424
+
425
+ clusters = response.data.get("clusters", [])
426
+ if not clusters:
427
+ print("No clusters found")
428
+ return
429
+
430
+ rows = [
431
+ {
432
+ "ID": cluster.get("id"),
433
+ "Status": cluster.get("status"),
434
+ "Cloud Provider": cluster.get("cloud_provider"),
435
+ "Cloud Credentials": cluster.get("cloud_credentials"),
436
+ "Is Active": cluster.get("is_active"),
437
+ "Max Workers": cluster.get("max_workers"),
438
+ }
439
+ for cluster in clusters
440
+ ]
441
+
442
+ print(tabulate.tabulate(rows, headers="keys", tablefmt="grid"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.19.1
3
+ Version: 0.20.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -94,7 +94,7 @@ Requires-Dist: scipy; extra == "tests"
94
94
  Requires-Dist: ultralytics; extra == "tests"
95
95
  Provides-Extra: dev
96
96
  Requires-Dist: datachain[docs,tests]; extra == "dev"
97
- Requires-Dist: mypy==1.16.0; extra == "dev"
97
+ Requires-Dist: mypy==1.16.1; extra == "dev"
98
98
  Requires-Dist: types-python-dateutil; extra == "dev"
99
99
  Requires-Dist: types-pytz; extra == "dev"
100
100
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -1,40 +1,42 @@
1
- datachain/__init__.py,sha256=Dx_Dw6AuvC_CZtXxfRv0Z-ND6ieC4Cz-tZkMW-Rvmz4,1496
1
+ datachain/__init__.py,sha256=Mq1dyOUSetvR80Swistr84HOuRSIOps6DHuUjAyQffA,1577
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
4
4
  datachain/cache.py,sha256=yQblPhOh_Mq74Ma7xT1CL1idLJ0HgrQxpGVYvRy_9Eg,3623
5
5
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
6
- datachain/dataset.py,sha256=XUZ-kSBL1y6juFqlSWXXbattGS1E53lXpyhc0Ip1_AA,20527
7
- datachain/delta.py,sha256=fP1Yy_MfdnTZmIOe243SBiDWTzd6MqLw0tQxvZNxLcs,8384
8
- datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
6
+ datachain/dataset.py,sha256=d6b9LKsKBYO40PIxpbMFVRT0TZqaaLpvWNIH-0ladGM,24016
7
+ datachain/delta.py,sha256=v__gty2T-AZYcYAZ5iJ6CBUo7iBGcQF4JxmAPeZIzHw,8452
8
+ datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
9
9
  datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
10
- datachain/listing.py,sha256=JtExYIfKMFhEIIcSSWBmaxWpoS3ben7kb692cHHm4Lo,7079
10
+ datachain/listing.py,sha256=T4bCgdCRuFW7bsPUG2PSl5om2nfJL6fzB84m7mCO8cA,7136
11
+ datachain/namespace.py,sha256=4qb-XsTnx6tFWCTCFmDxzUI1pbum2GKVutfVIjFgAkI,1804
11
12
  datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
12
13
  datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
13
14
  datachain/nodes_thread_pool.py,sha256=mdo0s-VybuSZkRUARcUO4Tjh8KFfZr9foHqmupx2SmM,3989
14
15
  datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
16
+ datachain/project.py,sha256=FobJbBJIy1-e-yemlL7M5eOcy694eceO5CWuY5H7bbw,2305
15
17
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
18
  datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
17
19
  datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
18
- datachain/studio.py,sha256=1J2ANFVVA1ysPxBuLibQSnSXt0U9Vfgz9ZNGikYtWdk,11038
20
+ datachain/studio.py,sha256=xtuRtqokqvBEICveaQ2FNsD43duXxHTcBPrqv-C9t4M,13009
19
21
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
20
22
  datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
21
23
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
22
- datachain/catalog/catalog.py,sha256=J1MKOuoMSl5B0_XYGF5EjDPm7KCvOvllz8PXxt316Og,59352
24
+ datachain/catalog/catalog.py,sha256=TjEFu00nwlyUB3wgeNyMfK1ROySdV5B_RtKCbBlbwic,63558
23
25
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
24
26
  datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
25
- datachain/cli/__init__.py,sha256=eKCyqT05OMESHXCC93iQdqzusBdk1ptqZbBeaEghkgo,8344
27
+ datachain/cli/__init__.py,sha256=kJJf_LScBNMOhvd1n3EEZrJHiN-SkJED13xvNTWEK1A,8144
26
28
  datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
27
29
  datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
28
- datachain/cli/commands/datasets.py,sha256=77QBkn_Enok0vzkHE0rqCbM9YQuXK1oQNdfCCSKoFKE,5793
30
+ datachain/cli/commands/datasets.py,sha256=Bva9gTi1HMvvCQPFUPxLYrHQduDlJDWV8EN6IcJcC3Y,6949
29
31
  datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
30
32
  datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
31
33
  datachain/cli/commands/ls.py,sha256=dSD2_MHng4t9HRFJZWMOCjPL4XU3qaBV3piNl8UXP08,5275
32
34
  datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
33
35
  datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
34
36
  datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
35
- datachain/cli/parser/__init__.py,sha256=sjCIcosAtZqa0m50GMQHqmCkZSYxKyZNwQ29XwRQlP0,15913
36
- datachain/cli/parser/job.py,sha256=acdVYuTsqluRDI_FYhZ1ohjQcVtBj-taUm8y9tGb0_0,4580
37
- datachain/cli/parser/studio.py,sha256=Y-1OlQGecLVi9QofvWUfSlPd2ISyaESf7QFGZqGsrdw,3609
37
+ datachain/cli/parser/__init__.py,sha256=NPB6ssP4CCt7G1SWZ_8oNQEH2C1lktWgkyHYXDQJZNc,15073
38
+ datachain/cli/parser/job.py,sha256=9mEkbhXFIGZxDAjZT9lWWIf3G-HiuJd1oTJNjSrEppc,5295
39
+ datachain/cli/parser/studio.py,sha256=Bo__LKM7qhJGgkyX8M_bCvgZ2Gvqq6r_X4t1NdtaBIY,3881
38
40
  datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
39
41
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
40
42
  datachain/client/azure.py,sha256=7yyAgANHfu9Kfh187MKNTT1guvu9Q-WYsi4vYoY3aew,3270
@@ -47,11 +49,11 @@ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
47
49
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
48
50
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
49
51
  datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw,402
50
- datachain/data_storage/metastore.py,sha256=1PaRTQbL7kjcU1BVjiLjXJLrrLzQtUvpqLmm0pwc1rU,39882
51
- datachain/data_storage/schema.py,sha256=asZYz1cg_WKfe2Q-k5W51E2z2CzHU5B4QEDZDMFr8yo,9346
52
+ datachain/data_storage/metastore.py,sha256=VJOVzXktpeoMp_On9WwjSLqx1e7RllYiTcPw86uD1lY,51211
53
+ datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
52
54
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
53
- datachain/data_storage/sqlite.py,sha256=BB8x7jtBmHK9lwn2zTo4HgfTKWGF43JxOsGr38J8YV8,25698
54
- datachain/data_storage/warehouse.py,sha256=imPm4R2V7TkqgGNSO2FGnKu03axU9UVLMfdUPfpwgHE,31747
55
+ datachain/data_storage/sqlite.py,sha256=H3dpJhqz4y_hde5efR3721GXiDftRGVUeDfN7FuCguk,30166
56
+ datachain/data_storage/warehouse.py,sha256=_7btARw-kd-Nx19S0qW6JqdF3VYyypQXFzsXq68SWKI,32327
55
57
  datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
56
58
  datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
59
  datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
@@ -71,16 +73,18 @@ datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
73
  datachain/lib/arrow.py,sha256=2IuNZ6tRFsxVNhWElqr0ptz28geSDzlDHUtzD4qeDNM,10339
72
74
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
73
75
  datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
74
- datachain/lib/dataset_info.py,sha256=d-jz6zeDU5DEgYtyeSF5nK0MU-40FV5km_iOCh4pXzo,3179
76
+ datachain/lib/dataset_info.py,sha256=uWq2YJgPqvORqIpPcUmDB4_itDVE5cybSn6jlJ_ktng,3331
75
77
  datachain/lib/file.py,sha256=PuTa6CEG9CaJXPhxrZFY-R9-DS7ynB9l7Y0bUbd_Qwg,31952
76
78
  datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
77
79
  datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
78
- datachain/lib/listing.py,sha256=5_GoATtIwCtd1JMqlorPB_vQDxndOQZpiWjNOG3NMw4,7007
80
+ datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
79
81
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
80
82
  datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A,6349
81
83
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
84
+ datachain/lib/namespaces.py,sha256=O2YtylfojZg0mjoaGLOwweyCN1CIQfl4r9dYaVAiJPI,2198
85
+ datachain/lib/projects.py,sha256=7DOWS-zvuLTqQsb4OYicWXBf3InHDHUqaqgDAdRUPiA,2734
82
86
  datachain/lib/pytorch.py,sha256=elrmJ4YUDC2LZ9yXM1KwImVBOYIBJf6k0ZR7eSe6Aao,7712
83
- datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
87
+ datachain/lib/settings.py,sha256=9wi0FoHxRxNiyn99pR28IYsMkoo47jQxeXuObQr2Ar0,2929
84
88
  datachain/lib/signal_schema.py,sha256=Zhg8qThFDf9eoNWFH6KGeYB-sIGys7A_ybq2CUBG7Dg,36127
85
89
  datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
86
90
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
@@ -99,15 +103,15 @@ datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUO
99
103
  datachain/lib/dc/__init__.py,sha256=HD0NYrdy44u6kkpvgGjJcvGz-UGTHui2azghcT8ZUg0,838
100
104
  datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
101
105
  datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
102
- datachain/lib/dc/datachain.py,sha256=cQjq6_OWQ_1JKvIqb8snl6mKfuBbpllPEao5ygVINog,81733
103
- datachain/lib/dc/datasets.py,sha256=g_bBGCUwAwNJypYSUQvrDDqnaw7nfXpvrEvUVPtWATY,11268
106
+ datachain/lib/dc/datachain.py,sha256=DNxcBnhlmxasKU7dnwsNn-p46S5EoP7CsxOxAmgswtQ,84242
107
+ datachain/lib/dc/datasets.py,sha256=07z81HerMMD4KOKJek541KESTB7XMwyna6wHVgxZ9Ek,13620
104
108
  datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
105
109
  datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
106
- datachain/lib/dc/listings.py,sha256=2na9v63xO1vPUNaoBSzA-TSN49V7zQAb-4iS1wOPLFE,1029
110
+ datachain/lib/dc/listings.py,sha256=eVBUP25W81dv46DLqkv8K0X7N3nxhoZm77gFrByeT_E,4660
107
111
  datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
108
112
  datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
109
- datachain/lib/dc/records.py,sha256=J1I69J2gFIBjRTGr2LG-5qn_rTVzRLcr2y3tVDrmHdg,3068
110
- datachain/lib/dc/storage.py,sha256=u-QB_0sn1Wwc0-9phi1zT38UDe5uBIc25xbAhKMU2fA,8774
113
+ datachain/lib/dc/records.py,sha256=AMtfWc7K6mtbW2OiaeIm3SjHTxDGnSgCEQW5u984Qh0,3111
114
+ datachain/lib/dc/storage.py,sha256=PX4wc5t-wmL_VD45rFaeJUJihME3jyGDXESZx04obZY,9203
111
115
  datachain/lib/dc/utils.py,sha256=VawOAlJSvAtZbsMg33s5tJe21TRx1Km3QggI1nN6tnw,3984
112
116
  datachain/lib/dc/values.py,sha256=7l1n352xWrEdql2NhBcZ3hj8xyPglWiY4qHjFPjn6iw,1428
113
117
  datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
@@ -121,17 +125,17 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
121
125
  datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
122
126
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
123
127
  datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
124
- datachain/query/dataset.py,sha256=dI51zOU1Drev65f6SPn4mvRdwRXs4SOW5STMm3WYd7A,60601
128
+ datachain/query/dataset.py,sha256=SjFUh77rBTpgBZG4cfMJiJ2DhiCubGVk2cG1RYX4oyA,61571
125
129
  datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
126
130
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
127
131
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
128
132
  datachain/query/queue.py,sha256=v0UeK4ilmdiRoJ5OdjB5qpnHTYDxRP4vhVp5Iw_toaI,3512
129
133
  datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
130
- datachain/query/session.py,sha256=6_ydvPasurmc5tR11dzFj51DpUAo4NxXP9p4ltoTauc,6792
134
+ datachain/query/session.py,sha256=gKblltJAVQAVSTswAgWGDgGbpmFlFzFVkIQojDCjgXM,6809
131
135
  datachain/query/udf.py,sha256=e753bDJzTNjGFQn1WGTvOAWSwjDbrFI1-_DDWkWN2ls,1343
132
136
  datachain/query/utils.py,sha256=HaSDNH_XGvp_NIcXjcB7j4vJRPi4_tbztDWclYelHY4,1208
133
137
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
- datachain/remote/studio.py,sha256=kUu4TBTKSky1qwdql5DtXNA045qXeoPn4o8G67eZJI4,13666
138
+ datachain/remote/studio.py,sha256=jA-I6q46GEuiGLp3PlQvCaU17ylHThC0HqS73oeBKUU,15114
135
139
  datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
136
140
  datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
137
141
  datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
@@ -153,9 +157,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
153
157
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
154
158
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
155
159
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
156
- datachain-0.19.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
157
- datachain-0.19.1.dist-info/METADATA,sha256=qg4KSU457ARE-A00yjNYNtFP3vhX0yqsxrCGKctXva4,13281
158
- datachain-0.19.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
159
- datachain-0.19.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
160
- datachain-0.19.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
161
- datachain-0.19.1.dist-info/RECORD,,
160
+ datachain-0.20.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
161
+ datachain-0.20.0.dist-info/METADATA,sha256=etcARLRHIUWT7LdqUv9Di5ZLOV3iq1HAOgiL9dsrd7c,13281
162
+ datachain-0.20.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
163
+ datachain-0.20.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
164
+ datachain-0.20.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
165
+ datachain-0.20.0.dist-info/RECORD,,