datachain 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (44) hide show
  1. datachain/cache.py +4 -2
  2. datachain/catalog/catalog.py +100 -54
  3. datachain/catalog/datasource.py +4 -6
  4. datachain/cli/__init__.py +311 -0
  5. datachain/cli/commands/__init__.py +29 -0
  6. datachain/cli/commands/datasets.py +129 -0
  7. datachain/cli/commands/du.py +14 -0
  8. datachain/cli/commands/index.py +12 -0
  9. datachain/cli/commands/ls.py +169 -0
  10. datachain/cli/commands/misc.py +28 -0
  11. datachain/cli/commands/query.py +53 -0
  12. datachain/cli/commands/show.py +38 -0
  13. datachain/cli/parser/__init__.py +547 -0
  14. datachain/cli/parser/job.py +120 -0
  15. datachain/cli/parser/studio.py +126 -0
  16. datachain/cli/parser/utils.py +63 -0
  17. datachain/{cli_utils.py → cli/utils.py} +27 -1
  18. datachain/client/azure.py +21 -1
  19. datachain/client/fsspec.py +45 -13
  20. datachain/client/gcs.py +10 -2
  21. datachain/client/local.py +4 -4
  22. datachain/client/s3.py +10 -0
  23. datachain/dataset.py +1 -0
  24. datachain/func/__init__.py +2 -2
  25. datachain/func/conditional.py +52 -0
  26. datachain/func/func.py +5 -1
  27. datachain/lib/arrow.py +4 -0
  28. datachain/lib/dc.py +18 -3
  29. datachain/lib/file.py +1 -1
  30. datachain/lib/listing.py +36 -3
  31. datachain/lib/signal_schema.py +89 -27
  32. datachain/listing.py +1 -5
  33. datachain/node.py +27 -1
  34. datachain/progress.py +2 -2
  35. datachain/query/session.py +1 -1
  36. datachain/studio.py +58 -38
  37. datachain/utils.py +1 -1
  38. {datachain-0.8.2.dist-info → datachain-0.8.4.dist-info}/METADATA +6 -6
  39. {datachain-0.8.2.dist-info → datachain-0.8.4.dist-info}/RECORD +43 -31
  40. {datachain-0.8.2.dist-info → datachain-0.8.4.dist-info}/WHEEL +1 -1
  41. datachain/cli.py +0 -1475
  42. {datachain-0.8.2.dist-info → datachain-0.8.4.dist-info}/LICENSE +0 -0
  43. {datachain-0.8.2.dist-info → datachain-0.8.4.dist-info}/entry_points.txt +0 -0
  44. {datachain-0.8.2.dist-info → datachain-0.8.4.dist-info}/top_level.txt +0 -0
datachain/lib/listing.py CHANGED
@@ -39,6 +39,15 @@ def list_bucket(uri: str, cache, client_config=None) -> Callable:
39
39
  return list_func
40
40
 
41
41
 
42
+ def get_file_info(uri: str, cache, client_config=None) -> File:
43
+ """
44
+ Wrapper to return File object by its URI
45
+ """
46
+ client = Client.get_client(uri, cache, **(client_config or {})) # type: ignore[arg-type]
47
+ _, path = Client.parse_url(uri)
48
+ return client.get_file_info(path)
49
+
50
+
42
51
  def ls(
43
52
  dc: D,
44
53
  path: str,
@@ -76,7 +85,25 @@ def ls(
76
85
  return dc.filter(pathfunc.parent(_file_c("path")) == path.lstrip("/").rstrip("/*"))
77
86
 
78
87
 
79
- def parse_listing_uri(uri: str, cache, client_config) -> tuple[str, str, str]:
88
+ def _isfile(client: "Client", path: str) -> bool:
89
+ """
90
+ Returns True if uri points to a file
91
+ """
92
+ try:
93
+ info = client.fs.info(path)
94
+ name = info.get("name")
95
+ # case for special simulated directories on some clouds
96
+ # e.g. Google creates a zero byte file with the same name as the
97
+ # directory with a trailing slash at the end
98
+ if not name or name.endswith("/"):
99
+ return False
100
+
101
+ return info["type"] == "file"
102
+ except: # noqa: E722
103
+ return False
104
+
105
+
106
+ def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], str, str]:
80
107
  """
81
108
  Parsing uri and returns listing dataset name, listing uri and listing path
82
109
  """
@@ -85,7 +112,9 @@ def parse_listing_uri(uri: str, cache, client_config) -> tuple[str, str, str]:
85
112
  storage_uri, path = Client.parse_url(uri)
86
113
  telemetry.log_param("client", client.PREFIX)
87
114
 
88
- if uses_glob(path) or client.fs.isfile(uri):
115
+ if not uri.endswith("/") and _isfile(client, uri):
116
+ return None, f'{storage_uri}/{path.lstrip("/")}', path
117
+ if uses_glob(path):
89
118
  lst_uri_path = posixpath.dirname(path)
90
119
  else:
91
120
  storage_uri, path = Client.parse_url(f'{uri.rstrip("/")}/')
@@ -113,7 +142,7 @@ def listing_uri_from_name(dataset_name: str) -> str:
113
142
 
114
143
  def get_listing(
115
144
  uri: str, session: "Session", update: bool = False
116
- ) -> tuple[str, str, str, bool]:
145
+ ) -> tuple[Optional[str], str, str, bool]:
117
146
  """Returns correct listing dataset name that must be used for saving listing
118
147
  operation. It takes into account existing listings and reusability of those.
119
148
  It also returns boolean saying if returned dataset name is reused / already
@@ -131,6 +160,10 @@ def get_listing(
131
160
  ds_name, list_uri, list_path = parse_listing_uri(uri, cache, client_config)
132
161
  listing = None
133
162
 
163
+ # if we don't want to use cached dataset (e.g. for a single file listing)
164
+ if not ds_name:
165
+ return None, list_uri, list_path, False
166
+
134
167
  listings = [
135
168
  ls for ls in catalog.listings() if not ls.is_expired and ls.contains(ds_name)
136
169
  ]
@@ -13,13 +13,14 @@ from typing import ( # noqa: UP035
13
13
  Final,
14
14
  List,
15
15
  Literal,
16
+ Mapping,
16
17
  Optional,
17
18
  Union,
18
19
  get_args,
19
20
  get_origin,
20
21
  )
21
22
 
22
- from pydantic import BaseModel, create_model
23
+ from pydantic import BaseModel, Field, create_model
23
24
  from sqlalchemy import ColumnElement
24
25
  from typing_extensions import Literal as LiteralEx
25
26
 
@@ -85,8 +86,31 @@ class SignalResolvingTypeError(SignalResolvingError):
85
86
  )
86
87
 
87
88
 
89
+ class CustomType(BaseModel):
90
+ schema_version: int = Field(ge=1, le=2, strict=True)
91
+ name: str
92
+ fields: dict[str, str]
93
+ bases: list[tuple[str, str, Optional[str]]]
94
+
95
+ @classmethod
96
+ def deserialize(cls, data: dict[str, Any], type_name: str) -> "CustomType":
97
+ version = data.get("schema_version", 1)
98
+
99
+ if version == 1:
100
+ data = {
101
+ "schema_version": 1,
102
+ "name": type_name,
103
+ "fields": data,
104
+ "bases": [],
105
+ }
106
+
107
+ return cls(**data)
108
+
109
+
88
110
  def create_feature_model(
89
- name: str, fields: dict[str, Union[type, tuple[type, Any]]]
111
+ name: str,
112
+ fields: Mapping[str, Union[type, None, tuple[type, Any]]],
113
+ base: Optional[type] = None,
90
114
  ) -> type[BaseModel]:
91
115
  """
92
116
  This gets or returns a dynamic feature model for use in restoring a model
@@ -98,7 +122,7 @@ def create_feature_model(
98
122
  name = name.replace("@", "_")
99
123
  return create_model(
100
124
  name,
101
- __base__=DataModel, # type: ignore[call-overload]
125
+ __base__=base or DataModel, # type: ignore[call-overload]
102
126
  # These are tuples for each field of: annotation, default (if any)
103
127
  **{
104
128
  field_name: anno if isinstance(anno, tuple) else (anno, None)
@@ -156,7 +180,7 @@ class SignalSchema:
156
180
  return SignalSchema(signals)
157
181
 
158
182
  @staticmethod
159
- def _serialize_custom_model_fields(
183
+ def _serialize_custom_model(
160
184
  version_name: str, fr: type[BaseModel], custom_types: dict[str, Any]
161
185
  ) -> str:
162
186
  """This serializes any custom type information to the provided custom_types
@@ -165,12 +189,23 @@ class SignalSchema:
165
189
  # This type is already stored in custom_types.
166
190
  return version_name
167
191
  fields = {}
192
+
168
193
  for field_name, info in fr.model_fields.items():
169
194
  field_type = info.annotation
170
195
  # All fields should be typed.
171
196
  assert field_type
172
197
  fields[field_name] = SignalSchema._serialize_type(field_type, custom_types)
173
- custom_types[version_name] = fields
198
+
199
+ bases: list[tuple[str, str, Optional[str]]] = []
200
+ for type_ in fr.__mro__:
201
+ model_store_name = (
202
+ ModelStore.get_name(type_) if issubclass(type_, DataModel) else None
203
+ )
204
+ bases.append((type_.__name__, type_.__module__, model_store_name))
205
+
206
+ ct = CustomType(schema_version=2, name=version_name, fields=fields, bases=bases)
207
+ custom_types[version_name] = ct.model_dump()
208
+
174
209
  return version_name
175
210
 
176
211
  @staticmethod
@@ -184,15 +219,12 @@ class SignalSchema:
184
219
  if st is None or not ModelStore.is_pydantic(st):
185
220
  continue
186
221
  # Register and save feature types.
187
- ModelStore.register(st)
188
222
  st_version_name = ModelStore.get_name(st)
189
223
  if st is fr:
190
224
  # If the main type is Pydantic, then use the ModelStore version name.
191
225
  type_name = st_version_name
192
226
  # Save this type to custom_types.
193
- SignalSchema._serialize_custom_model_fields(
194
- st_version_name, st, custom_types
195
- )
227
+ SignalSchema._serialize_custom_model(st_version_name, st, custom_types)
196
228
  return type_name
197
229
 
198
230
  def serialize(self) -> dict[str, Any]:
@@ -215,7 +247,7 @@ class SignalSchema:
215
247
  depth += 1
216
248
  elif c == "]":
217
249
  if depth == 0:
218
- raise TypeError(
250
+ raise ValueError(
219
251
  "Extra closing square bracket when parsing subtype list"
220
252
  )
221
253
  depth -= 1
@@ -223,16 +255,51 @@ class SignalSchema:
223
255
  subtypes.append(type_name[start:i].strip())
224
256
  start = i + 1
225
257
  if depth > 0:
226
- raise TypeError("Unclosed square bracket when parsing subtype list")
258
+ raise ValueError("Unclosed square bracket when parsing subtype list")
227
259
  subtypes.append(type_name[start:].strip())
228
260
  return subtypes
229
261
 
230
262
  @staticmethod
231
- def _resolve_type(type_name: str, custom_types: dict[str, Any]) -> Optional[type]: # noqa: PLR0911
263
+ def _deserialize_custom_type(
264
+ type_name: str, custom_types: dict[str, Any]
265
+ ) -> Optional[type]:
266
+ """Given a type name like MyType@v1 gets a type from ModelStore or recreates
267
+ it based on the information from the custom types dict that includes fields and
268
+ bases."""
269
+ model_name, version = ModelStore.parse_name_version(type_name)
270
+ fr = ModelStore.get(model_name, version)
271
+ if fr:
272
+ return fr
273
+
274
+ if type_name in custom_types:
275
+ ct = CustomType.deserialize(custom_types[type_name], type_name)
276
+
277
+ fields = {
278
+ field_name: SignalSchema._resolve_type(field_type_str, custom_types)
279
+ for field_name, field_type_str in ct.fields.items()
280
+ }
281
+
282
+ base_model = None
283
+ for base in ct.bases:
284
+ _, _, model_store_name = base
285
+ if model_store_name:
286
+ model_name, version = ModelStore.parse_name_version(
287
+ model_store_name
288
+ )
289
+ base_model = ModelStore.get(model_name, version)
290
+ if base_model:
291
+ break
292
+
293
+ return create_feature_model(type_name, fields, base=base_model)
294
+
295
+ return None
296
+
297
+ @staticmethod
298
+ def _resolve_type(type_name: str, custom_types: dict[str, Any]) -> Optional[type]:
232
299
  """Convert a string-based type back into a python type."""
233
300
  type_name = type_name.strip()
234
301
  if not type_name:
235
- raise TypeError("Type cannot be empty")
302
+ raise ValueError("Type cannot be empty")
236
303
  if type_name == "NoneType":
237
304
  return None
238
305
 
@@ -240,14 +307,14 @@ class SignalSchema:
240
307
  subtypes: Optional[tuple[Optional[type], ...]] = None
241
308
  if bracket_idx > -1:
242
309
  if bracket_idx == 0:
243
- raise TypeError("Type cannot start with '['")
310
+ raise ValueError("Type cannot start with '['")
244
311
  close_bracket_idx = type_name.rfind("]")
245
312
  if close_bracket_idx == -1:
246
- raise TypeError("Unclosed square bracket when parsing type")
313
+ raise ValueError("Unclosed square bracket when parsing type")
247
314
  if close_bracket_idx < bracket_idx:
248
- raise TypeError("Square brackets are out of order when parsing type")
315
+ raise ValueError("Square brackets are out of order when parsing type")
249
316
  if close_bracket_idx == bracket_idx + 1:
250
- raise TypeError("Empty square brackets when parsing type")
317
+ raise ValueError("Empty square brackets when parsing type")
251
318
  subtype_names = SignalSchema._split_subtypes(
252
319
  type_name[bracket_idx + 1 : close_bracket_idx]
253
320
  )
@@ -267,18 +334,10 @@ class SignalSchema:
267
334
  return fr[subtypes] # type: ignore[index]
268
335
  return fr # type: ignore[return-value]
269
336
 
270
- model_name, version = ModelStore.parse_name_version(type_name)
271
- fr = ModelStore.get(model_name, version)
337
+ fr = SignalSchema._deserialize_custom_type(type_name, custom_types)
272
338
  if fr:
273
339
  return fr
274
340
 
275
- if type_name in custom_types:
276
- fields = custom_types[type_name]
277
- fields = {
278
- field_name: SignalSchema._resolve_type(field_type_str, custom_types)
279
- for field_name, field_type_str in fields.items()
280
- }
281
- return create_feature_model(type_name, fields)
282
341
  # This can occur if a third-party or custom type is used, which is not available
283
342
  # when deserializing.
284
343
  warnings.warn(
@@ -317,7 +376,7 @@ class SignalSchema:
317
376
  stacklevel=2,
318
377
  )
319
378
  continue
320
- except TypeError as err:
379
+ except ValueError as err:
321
380
  raise SignalSchemaError(
322
381
  f"cannot deserialize '{signal}': {err}"
323
382
  ) from err
@@ -662,6 +721,9 @@ class SignalSchema:
662
721
  stacklevel=2,
663
722
  )
664
723
  return "Any"
724
+ if ModelStore.is_pydantic(type_):
725
+ ModelStore.register(type_)
726
+ return ModelStore.get_name(type_)
665
727
  return type_.__name__
666
728
 
667
729
  @staticmethod
datachain/listing.py CHANGED
@@ -157,11 +157,7 @@ class Listing:
157
157
 
158
158
  counter = 0
159
159
  for node in all_nodes:
160
- dst = os.path.join(output, *node.path)
161
- dst_dir = os.path.dirname(dst)
162
- os.makedirs(dst_dir, exist_ok=True)
163
- file = node.n.to_file(self.client.uri)
164
- self.client.instantiate_object(file, dst, progress_bar, force)
160
+ node.instantiate(self.client, output, progress_bar, force=force)
165
161
  counter += 1
166
162
  if counter > 1000:
167
163
  progress_bar.update(counter)
datachain/node.py CHANGED
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from datetime import datetime
2
3
  from typing import TYPE_CHECKING, Any, Optional
3
4
 
@@ -10,6 +11,8 @@ from datachain.utils import TIME_ZERO, time_to_str
10
11
  if TYPE_CHECKING:
11
12
  from typing_extensions import Self
12
13
 
14
+ from datachain.client import Client
15
+
13
16
 
14
17
  class DirType:
15
18
  FILE = 0
@@ -114,7 +117,21 @@ class Node:
114
117
  )
115
118
 
116
119
  @classmethod
117
- def from_dict(cls, d: dict[str, Any], file_prefix: str = "file") -> "Self":
120
+ def from_file(cls, f: File) -> "Self":
121
+ return cls(
122
+ source=StorageURI(f.source),
123
+ path=f.path,
124
+ etag=f.etag,
125
+ is_latest=f.is_latest,
126
+ size=f.size,
127
+ last_modified=f.last_modified,
128
+ version=f.version,
129
+ location=str(f.location) if f.location else None,
130
+ dir_type=DirType.FILE,
131
+ )
132
+
133
+ @classmethod
134
+ def from_row(cls, d: dict[str, Any], file_prefix: str = "file") -> "Self":
118
135
  def _dval(field_name: str):
119
136
  return d.get(f"{file_prefix}__{field_name}")
120
137
 
@@ -174,6 +191,15 @@ class NodeWithPath:
174
191
  path += "/"
175
192
  return path
176
193
 
194
+ def instantiate(
195
+ self, client: "Client", output: str, progress_bar, *, force: bool = False
196
+ ):
197
+ dst = os.path.join(output, *self.path)
198
+ dst_dir = os.path.dirname(dst)
199
+ os.makedirs(dst_dir, exist_ok=True)
200
+ file = self.n.to_file(client.uri)
201
+ client.instantiate_object(file, dst, progress_bar, force)
202
+
177
203
 
178
204
  TIME_FMT = "%Y-%m-%d %H:%M"
179
205
 
datachain/progress.py CHANGED
@@ -61,7 +61,7 @@ class Tqdm(tqdm):
61
61
  disable : If (default: None) or False,
62
62
  will be determined by logging level.
63
63
  May be overridden to `True` due to non-TTY status.
64
- Skip override by specifying env var `DVC_IGNORE_ISATTY`.
64
+ Skip override by specifying env var `DATACHAIN_IGNORE_ISATTY`.
65
65
  kwargs : anything accepted by `tqdm.tqdm()`
66
66
  """
67
67
  kwargs = kwargs.copy()
@@ -77,7 +77,7 @@ class Tqdm(tqdm):
77
77
  # auto-disable based on TTY
78
78
  if (
79
79
  not disable
80
- and not env2bool("DVC_IGNORE_ISATTY")
80
+ and not env2bool("DATACHAIN_IGNORE_ISATTY")
81
81
  and hasattr(file, "isatty")
82
82
  ):
83
83
  disable = not file.isatty()
@@ -55,7 +55,7 @@ class Session:
55
55
  client_config: Optional[dict] = None,
56
56
  in_memory: bool = False,
57
57
  ):
58
- if re.match(r"^[0-9a-zA-Z]+$", name) is None:
58
+ if re.match(r"^[0-9a-zA-Z]*$", name) is None:
59
59
  raise ValueError(
60
60
  f"Session name can contain only letters or numbers - '{name}' given."
61
61
  )
datachain/studio.py CHANGED
@@ -20,21 +20,7 @@ POST_LOGIN_MESSAGE = (
20
20
  )
21
21
 
22
22
 
23
- def process_studio_cli_args(args: "Namespace"): # noqa: PLR0911
24
- if args.cmd == "login":
25
- return login(args)
26
- if args.cmd == "logout":
27
- return logout()
28
- if args.cmd == "token":
29
- return token()
30
- if args.cmd == "datasets":
31
- rows = [
32
- {"Name": name, "Version": version}
33
- for name, version in list_datasets(args.team)
34
- ]
35
- print(tabulate(rows, headers="keys"))
36
- return 0
37
-
23
+ def process_jobs_args(args: "Namespace"):
38
24
  if args.cmd == "run":
39
25
  return create_job(
40
26
  args.query_file,
@@ -50,6 +36,25 @@ def process_studio_cli_args(args: "Namespace"): # noqa: PLR0911
50
36
 
51
37
  if args.cmd == "cancel":
52
38
  return cancel_job(args.job_id, args.team)
39
+ if args.cmd == "logs":
40
+ return show_job_logs(args.job_id, args.team)
41
+ raise DataChainError(f"Unknown command '{args.cmd}'.")
42
+
43
+
44
+ def process_studio_cli_args(args: "Namespace"):
45
+ if args.cmd == "login":
46
+ return login(args)
47
+ if args.cmd == "logout":
48
+ return logout()
49
+ if args.cmd == "token":
50
+ return token()
51
+ if args.cmd == "dataset":
52
+ rows = [
53
+ {"Name": name, "Version": version}
54
+ for name, version in list_datasets(args.team)
55
+ ]
56
+ print(tabulate(rows, headers="keys"))
57
+ return 0
53
58
 
54
59
  if args.cmd == "team":
55
60
  return set_team(args)
@@ -187,6 +192,32 @@ def save_config(hostname, token):
187
192
  return config.config_file()
188
193
 
189
194
 
195
+ def show_logs_from_client(client, job_id):
196
+ # Sync usage
197
+ async def _run():
198
+ async for message in client.tail_job_logs(job_id):
199
+ if "logs" in message:
200
+ for log in message["logs"]:
201
+ print(log["message"], end="")
202
+ elif "job" in message:
203
+ print(f"\n>>>> Job is now in {message['job']['status']} status.")
204
+
205
+ asyncio.run(_run())
206
+
207
+ response = client.dataset_job_versions(job_id)
208
+ if not response.ok:
209
+ raise_remote_error(response.message)
210
+
211
+ response_data = response.data
212
+ if response_data:
213
+ dataset_versions = response_data.get("dataset_versions", [])
214
+ print("\n\n>>>> Dataset versions created during the job:")
215
+ for version in dataset_versions:
216
+ print(f" - {version.get('dataset_name')}@v{version.get('version')}")
217
+ else:
218
+ print("No dataset versions created during the job.")
219
+
220
+
190
221
  def create_job(
191
222
  query_file: str,
192
223
  team_name: Optional[str],
@@ -236,29 +267,7 @@ def create_job(
236
267
  print("Open the job in Studio at", response.data.get("job", {}).get("url"))
237
268
  print("=" * 40)
238
269
 
239
- # Sync usage
240
- async def _run():
241
- async for message in client.tail_job_logs(job_id):
242
- if "logs" in message:
243
- for log in message["logs"]:
244
- print(log["message"], end="")
245
- elif "job" in message:
246
- print(f"\n>>>> Job is now in {message['job']['status']} status.")
247
-
248
- asyncio.run(_run())
249
-
250
- response = client.dataset_job_versions(job_id)
251
- if not response.ok:
252
- raise_remote_error(response.message)
253
-
254
- response_data = response.data
255
- if response_data:
256
- dataset_versions = response_data.get("dataset_versions", [])
257
- print("\n\n>>>> Dataset versions created during the job:")
258
- for version in dataset_versions:
259
- print(f" - {version.get('dataset_name')}@v{version.get('version')}")
260
- else:
261
- print("No dataset versions created during the job.")
270
+ show_logs_from_client(client, job_id)
262
271
 
263
272
 
264
273
  def upload_files(client: StudioClient, files: list[str]) -> list[str]:
@@ -293,3 +302,14 @@ def cancel_job(job_id: str, team_name: Optional[str]):
293
302
  raise_remote_error(response.message)
294
303
 
295
304
  print(f"Job {job_id} canceled")
305
+
306
+
307
+ def show_job_logs(job_id: str, team_name: Optional[str]):
308
+ token = Config().read().get("studio", {}).get("token")
309
+ if not token:
310
+ raise DataChainError(
311
+ "Not logged in to Studio. Log in with 'datachain studio login'."
312
+ )
313
+
314
+ client = StudioClient(team=team_name)
315
+ show_logs_from_client(client, job_id)
datachain/utils.py CHANGED
@@ -30,7 +30,7 @@ APPNAME = "datachain"
30
30
  APPAUTHOR = "iterative"
31
31
  ENV_DATACHAIN_SYSTEM_CONFIG_DIR = "DATACHAIN_SYSTEM_CONFIG_DIR"
32
32
  ENV_DATACHAIN_GLOBAL_CONFIG_DIR = "DATACHAIN_GLOBAL_CONFIG_DIR"
33
- STUDIO_URL = "https://studio.dvc.ai"
33
+ STUDIO_URL = "https://studio.datachain.ai"
34
34
 
35
35
 
36
36
  T = TypeVar("T", bound="DataChainDir")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.8.2
3
+ Version: 0.8.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -50,7 +50,7 @@ Requires-Dist: websockets
50
50
  Provides-Extra: docs
51
51
  Requires-Dist: mkdocs>=1.5.2; extra == "docs"
52
52
  Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
53
- Requires-Dist: mkdocs-material>=9.3.1; extra == "docs"
53
+ Requires-Dist: mkdocs-material==9.5.22; extra == "docs"
54
54
  Requires-Dist: mkdocs-section-index>=0.3.6; extra == "docs"
55
55
  Requires-Dist: mkdocstrings-python>=1.6.3; extra == "docs"
56
56
  Requires-Dist: mkdocs-literate-nav>=0.6.1; extra == "docs"
@@ -72,7 +72,7 @@ Requires-Dist: pytest<9,>=8; extra == "tests"
72
72
  Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
73
73
  Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
74
74
  Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
75
- Requires-Dist: pytest-servers[all]>=0.5.8; extra == "tests"
75
+ Requires-Dist: pytest-servers[all]>=0.5.9; extra == "tests"
76
76
  Requires-Dist: pytest-benchmark[histogram]; extra == "tests"
77
77
  Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
78
78
  Requires-Dist: virtualenv; extra == "tests"
@@ -84,7 +84,7 @@ Requires-Dist: requests-mock; extra == "tests"
84
84
  Requires-Dist: scipy; extra == "tests"
85
85
  Provides-Extra: dev
86
86
  Requires-Dist: datachain[docs,tests]; extra == "dev"
87
- Requires-Dist: mypy==1.14.0; extra == "dev"
87
+ Requires-Dist: mypy==1.14.1; extra == "dev"
88
88
  Requires-Dist: types-python-dateutil; extra == "dev"
89
89
  Requires-Dist: types-pytz; extra == "dev"
90
90
  Requires-Dist: types-PyYAML; extra == "dev"
@@ -95,11 +95,11 @@ Requires-Dist: datachain[tests]; extra == "examples"
95
95
  Requires-Dist: defusedxml; extra == "examples"
96
96
  Requires-Dist: accelerate; extra == "examples"
97
97
  Requires-Dist: unstructured_ingest[embed-huggingface]; extra == "examples"
98
- Requires-Dist: unstructured[pdf]; extra == "examples"
98
+ Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
99
99
  Requires-Dist: pdfplumber==0.11.4; extra == "examples"
100
100
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
101
101
  Requires-Dist: onnx==1.16.1; extra == "examples"
102
- Requires-Dist: ultralytics==8.3.53; extra == "examples"
102
+ Requires-Dist: ultralytics==8.3.55; extra == "examples"
103
103
 
104
104
  ================
105
105
  |logo| DataChain