datachain 0.26.4__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -20,8 +20,8 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
20
20
  studio_run_description = "Run a job in Studio. \n"
21
21
  studio_run_description += (
22
22
  "When using --start-time or --cron,"
23
- " the job is scheduled as a task and will not show logs immediately."
24
- " The job will be executed according to the schedule."
23
+ " the job is scheduled to run but won't start immediately"
24
+ " (can be seen in the Tasks tab in UI)"
25
25
  )
26
26
 
27
27
  studio_run_parser = jobs_subparser.add_parser(
@@ -104,7 +104,7 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
104
104
  studio_run_parser.add_argument(
105
105
  "--start-time",
106
106
  action="store",
107
- help="Start time in ISO format or natural language for the cron task.",
107
+ help="Time to schedule a task in YYYY-MM-DDTHH:mm format or natural language.",
108
108
  )
109
109
  studio_run_parser.add_argument(
110
110
  "--cron", action="store", help="Cron expression for the cron task."
datachain/lib/arrow.py CHANGED
@@ -245,7 +245,7 @@ def arrow_type_mapper(col_type: pa.DataType, column: str = "") -> type: # noqa:
245
245
  if field.nullable and not ModelStore.is_pydantic(dtype):
246
246
  dtype = Optional[dtype] # type: ignore[assignment]
247
247
  type_dict[field.name] = dtype
248
- return dict_to_data_model(column, type_dict)
248
+ return dict_to_data_model(f"ArrowDataModel_{column}", type_dict)
249
249
  if pa.types.is_map(col_type):
250
250
  return dict
251
251
  if isinstance(col_type, pa.lib.DictionaryType):
datachain/lib/audio.py CHANGED
@@ -33,10 +33,14 @@ def audio_info(file: "Union[File, AudioFile]") -> "Audio":
33
33
  frames = int(info.num_frames)
34
34
  duration = float(frames / sample_rate) if sample_rate > 0 else 0.0
35
35
 
36
- # Get format information
37
- format_name = getattr(info, "format", "")
38
36
  codec_name = getattr(info, "encoding", "")
39
- bit_rate = getattr(info, "bits_per_sample", 0) * sample_rate * channels
37
+ file_ext = file.get_file_ext().lower()
38
+ format_name = _encoding_to_format(codec_name, file_ext)
39
+
40
+ bits_per_sample = getattr(info, "bits_per_sample", 0)
41
+ bit_rate = (
42
+ bits_per_sample * sample_rate * channels if bits_per_sample > 0 else -1
43
+ )
40
44
 
41
45
  except Exception as exc:
42
46
  raise FileError(
@@ -54,7 +58,47 @@ def audio_info(file: "Union[File, AudioFile]") -> "Audio":
54
58
  )
55
59
 
56
60
 
57
- def audio_fragment_np(
61
+ def _encoding_to_format(encoding: str, file_ext: str) -> str:
62
+ """
63
+ Map torchaudio encoding to a format name.
64
+
65
+ Args:
66
+ encoding: The encoding string from torchaudio.info()
67
+ file_ext: The file extension as a fallback
68
+
69
+ Returns:
70
+ Format name as a string
71
+ """
72
+ # Direct mapping for formats that match exactly
73
+ encoding_map = {
74
+ "FLAC": "flac",
75
+ "MP3": "mp3",
76
+ "VORBIS": "ogg",
77
+ "AMR_WB": "amr",
78
+ "AMR_NB": "amr",
79
+ "OPUS": "opus",
80
+ "GSM": "gsm",
81
+ }
82
+
83
+ if encoding in encoding_map:
84
+ return encoding_map[encoding]
85
+
86
+ # For PCM variants, use file extension to determine format
87
+ if encoding.startswith("PCM_"):
88
+ # Common PCM formats by extension
89
+ pcm_formats = {
90
+ "wav": "wav",
91
+ "aiff": "aiff",
92
+ "au": "au",
93
+ "raw": "raw",
94
+ }
95
+ return pcm_formats.get(file_ext, "wav") # Default to wav for PCM
96
+
97
+ # Fallback to file extension if encoding is unknown
98
+ return file_ext if file_ext else "unknown"
99
+
100
+
101
+ def audio_to_np(
58
102
  audio: "AudioFile", start: float = 0, duration: Optional[float] = None
59
103
  ) -> "tuple[ndarray, int]":
60
104
  """Load audio fragment as numpy array.
@@ -98,14 +142,17 @@ def audio_fragment_np(
98
142
  ) from exc
99
143
 
100
144
 
101
- def audio_fragment_bytes(
145
+ def audio_to_bytes(
102
146
  audio: "AudioFile",
147
+ format: str = "wav",
103
148
  start: float = 0,
104
149
  duration: Optional[float] = None,
105
- format: str = "wav",
106
150
  ) -> bytes:
107
- """Convert audio fragment to bytes using soundfile."""
108
- y, sr = audio_fragment_np(audio, start, duration)
151
+ """Convert audio to bytes using soundfile.
152
+
153
+ If duration is None, converts from start to end of file.
154
+ If start is 0 and duration is None, converts entire file."""
155
+ y, sr = audio_to_np(audio, start, duration)
109
156
 
110
157
  import io
111
158
 
@@ -116,36 +163,82 @@ def audio_fragment_bytes(
116
163
  return buffer.getvalue()
117
164
 
118
165
 
119
- def save_audio_fragment(
166
+ def save_audio(
120
167
  audio: "AudioFile",
121
- start: float,
122
- end: float,
123
168
  output: str,
124
169
  format: Optional[str] = None,
170
+ start: float = 0,
171
+ end: Optional[float] = None,
125
172
  ) -> "AudioFile":
126
- """Save audio fragment with timestamped filename.
127
- Supports local and remote storage upload."""
128
- if start < 0 or end < 0 or start >= end:
129
- raise ValueError(f"Invalid time range: ({start:.3f}, {end:.3f})")
130
-
173
+ """Save audio file or extract fragment to specified format.
174
+
175
+ Args:
176
+ audio: Source AudioFile object
177
+ output: Output directory path
178
+ format: Output format ('wav', 'mp3', etc). Defaults to source format
179
+ start: Start time in seconds (>= 0). Defaults to 0
180
+ end: End time in seconds. If None, extracts to end of file
181
+
182
+ Returns:
183
+ AudioFile: New audio file with format conversion/extraction applied
184
+
185
+ Examples:
186
+ save_audio(audio, "/path", "mp3") # Entire file to MP3
187
+ save_audio(audio, "s3://bucket/path", "wav", start=2.5) # From 2.5s to end
188
+ save_audio(audio, "/path", "flac", start=1, end=3) # Extract 1-3s fragment
189
+ """
131
190
  if format is None:
132
191
  format = audio.get_file_ext()
133
192
 
134
- duration = end - start
135
- start_ms = int(start * 1000)
136
- end_ms = int(end * 1000)
137
- output_file = posixpath.join(
138
- output, f"{audio.get_file_stem()}_{start_ms:06d}_{end_ms:06d}.{format}"
139
- )
193
+ # Validate start time
194
+ if start < 0:
195
+ raise ValueError(
196
+ f"Can't save audio for '{audio.path}', "
197
+ f"start time must be non-negative: {start:.3f}"
198
+ )
199
+
200
+ # Handle full file conversion when end is None and start is 0
201
+ if end is None and start == 0:
202
+ output_file = posixpath.join(output, f"{audio.get_file_stem()}.{format}")
203
+ try:
204
+ audio_bytes = audio_to_bytes(audio, format, start=0, duration=None)
205
+ except Exception as exc:
206
+ raise FileError(
207
+ "unable to convert audio file", audio.source, audio.path
208
+ ) from exc
209
+ elif end is None:
210
+ # Extract from start to end of file
211
+ output_file = posixpath.join(
212
+ output, f"{audio.get_file_stem()}_{int(start * 1000):06d}_end.{format}"
213
+ )
214
+ try:
215
+ audio_bytes = audio_to_bytes(audio, format, start=start, duration=None)
216
+ except Exception as exc:
217
+ raise FileError(
218
+ "unable to save audio fragment", audio.source, audio.path
219
+ ) from exc
220
+ else:
221
+ # Fragment extraction mode with specific end time
222
+ if end < 0 or start >= end:
223
+ raise ValueError(
224
+ f"Can't save audio for '{audio.path}', "
225
+ f"invalid time range: ({start:.3f}, {end:.3f})"
226
+ )
140
227
 
141
- try:
142
- audio_bytes = audio_fragment_bytes(audio, start, duration, format)
228
+ duration = end - start
229
+ start_ms = int(start * 1000)
230
+ end_ms = int(end * 1000)
231
+ output_file = posixpath.join(
232
+ output, f"{audio.get_file_stem()}_{start_ms:06d}_{end_ms:06d}.{format}"
233
+ )
143
234
 
144
- from datachain.lib.file import AudioFile
235
+ try:
236
+ audio_bytes = audio_to_bytes(audio, format, start, duration)
237
+ except Exception as exc:
238
+ raise FileError(
239
+ "unable to save audio fragment", audio.source, audio.path
240
+ ) from exc
145
241
 
146
- return AudioFile.upload(audio_bytes, output_file, catalog=audio._catalog)
242
+ from datachain.lib.file import AudioFile
147
243
 
148
- except Exception as exc:
149
- raise FileError(
150
- "unable to save audio fragment", audio.source, audio.path
151
- ) from exc
244
+ return AudioFile.upload(audio_bytes, output_file, catalog=audio._catalog)
@@ -1,3 +1,5 @@
1
+ import inspect
2
+ import uuid
1
3
  from collections.abc import Sequence
2
4
  from datetime import datetime
3
5
  from typing import ClassVar, Optional, Union, get_args, get_origin
@@ -80,7 +82,9 @@ def dict_to_data_model(
80
82
 
81
83
  fields = {
82
84
  name: (
83
- anno,
85
+ anno
86
+ if inspect.isclass(anno) and issubclass(anno, BaseModel)
87
+ else Optional[anno],
84
88
  Field(
85
89
  validation_alias=AliasChoices(name, original_names[idx] or name),
86
90
  default=None,
@@ -101,6 +105,10 @@ def dict_to_data_model(
101
105
  field_info[str(alias)] = (_name, field)
102
106
  return field_info
103
107
 
108
+ # Generate random unique name if not provided
109
+ if not name:
110
+ name = f"DataModel_{uuid.uuid4().hex[:8]}"
111
+
104
112
  return create_model(
105
113
  name,
106
114
  __base__=_DataModelStrict,
datachain/lib/dc/hf.py CHANGED
@@ -25,19 +25,23 @@ def read_hf(
25
25
  settings: Optional[dict] = None,
26
26
  column: str = "",
27
27
  model_name: str = "",
28
+ limit: int = 0,
28
29
  **kwargs,
29
30
  ) -> "DataChain":
30
- """Generate chain from huggingface hub dataset.
31
+ """Generate chain from Hugging Face Hub dataset.
31
32
 
32
33
  Parameters:
33
34
  dataset : Path or name of the dataset to read from Hugging Face Hub,
34
35
  or an instance of `datasets.Dataset`-like object.
35
- args : Additional positional arguments to pass to datasets.load_dataset.
36
+ args : Additional positional arguments to pass to `datasets.load_dataset`.
36
37
  session : Session to use for the chain.
37
38
  settings : Settings to use for the chain.
38
39
  column : Generated object column name.
39
40
  model_name : Generated model name.
40
- kwargs : Parameters to pass to datasets.load_dataset.
41
+ limit : Limit the number of items to read from the HF dataset.
42
+ Adds `take(limit)` to the `datasets.load_dataset`.
43
+ Defaults to 0 (no limit).
44
+ kwargs : Parameters to pass to `datasets.load_dataset`.
41
45
 
42
46
  Example:
43
47
  Load from Hugging Face Hub:
@@ -53,6 +57,18 @@ def read_hf(
53
57
  import datachain as dc
54
58
  chain = dc.read_hf(ds)
55
59
  ```
60
+
61
+ Streaming with limit, for large datasets:
62
+ ```py
63
+ import datachain as dc
64
+ ds = dc.read_hf("beans", split="train", streaming=True, limit=10)
65
+ ```
66
+
67
+ or use HF split syntax (not supported if streaming is enabled):
68
+ ```py
69
+ import datachain as dc
70
+ ds = dc.read_hf("beans", split="train[%10]")
71
+ ```
56
72
  """
57
73
  from datachain.lib.hf import HFGenerator, get_output_schema, stream_splits
58
74
 
@@ -72,4 +88,4 @@ def read_hf(
72
88
  output = {column: model}
73
89
 
74
90
  chain = read_values(split=list(ds_dict.keys()), session=session, settings=settings)
75
- return chain.gen(HFGenerator(dataset, model, *args, **kwargs), output=output)
91
+ return chain.gen(HFGenerator(dataset, model, limit, *args, **kwargs), output=output)
datachain/lib/file.py CHANGED
@@ -832,7 +832,10 @@ class VideoFile(File):
832
832
  VideoFragment: A Model representing the video fragment.
833
833
  """
834
834
  if start < 0 or end < 0 or start >= end:
835
- raise ValueError(f"Invalid time range: ({start:.3f}, {end:.3f})")
835
+ raise ValueError(
836
+ f"Can't get video fragment for '{self.path}', "
837
+ f"invalid time range: ({start:.3f}, {end:.3f})"
838
+ )
836
839
 
837
840
  return VideoFragment(video=self, start=start, end=end)
838
841
 
@@ -915,7 +918,10 @@ class AudioFile(File):
915
918
  AudioFragment: A Model representing the audio fragment.
916
919
  """
917
920
  if start < 0 or end < 0 or start >= end:
918
- raise ValueError(f"Invalid time range: ({start:.3f}, {end:.3f})")
921
+ raise ValueError(
922
+ f"Can't get audio fragment for '{self.path}', "
923
+ f"invalid time range: ({start:.3f}, {end:.3f})"
924
+ )
919
925
 
920
926
  return AudioFragment(audio=self, start=start, end=end)
921
927
 
@@ -958,6 +964,35 @@ class AudioFile(File):
958
964
  yield self.get_fragment(start, min(start + duration, end))
959
965
  start += duration
960
966
 
967
+ def save( # type: ignore[override]
968
+ self,
969
+ output: str,
970
+ format: Optional[str] = None,
971
+ start: float = 0,
972
+ end: Optional[float] = None,
973
+ client_config: Optional[dict] = None,
974
+ ) -> "AudioFile":
975
+ """Save audio file or extract fragment to specified format.
976
+
977
+ Args:
978
+ output: Output directory path
979
+ format: Output format ('wav', 'mp3', etc). Defaults to source format
980
+ start: Start time in seconds (>= 0). Defaults to 0
981
+ end: End time in seconds. If None, extracts to end of file
982
+ client_config: Optional client configuration
983
+
984
+ Returns:
985
+ AudioFile: New audio file with format conversion/extraction applied
986
+
987
+ Examples:
988
+ audio.save("/path", "mp3") # Entire file to MP3
989
+ audio.save("s3://bucket/path", "wav", start=2.5) # From 2.5s to end as WAV
990
+ audio.save("/path", "flac", start=1, end=3) # 1-3s fragment as FLAC
991
+ """
992
+ from .audio import save_audio
993
+
994
+ return save_audio(self, output, format, start, end)
995
+
961
996
 
962
997
  class AudioFragment(DataModel):
963
998
  """
@@ -985,10 +1020,10 @@ class AudioFragment(DataModel):
985
1020
  tuple[ndarray, int]: A tuple containing the audio data as a NumPy array
986
1021
  and the sample rate.
987
1022
  """
988
- from .audio import audio_fragment_np
1023
+ from .audio import audio_to_np
989
1024
 
990
1025
  duration = self.end - self.start
991
- return audio_fragment_np(self.audio, self.start, duration)
1026
+ return audio_to_np(self.audio, self.start, duration)
992
1027
 
993
1028
  def read_bytes(self, format: str = "wav") -> bytes:
994
1029
  """
@@ -1001,10 +1036,10 @@ class AudioFragment(DataModel):
1001
1036
  Returns:
1002
1037
  bytes: The encoded audio fragment as bytes.
1003
1038
  """
1004
- from .audio import audio_fragment_bytes
1039
+ from .audio import audio_to_bytes
1005
1040
 
1006
1041
  duration = self.end - self.start
1007
- return audio_fragment_bytes(self.audio, self.start, duration, format)
1042
+ return audio_to_bytes(self.audio, format, self.start, duration)
1008
1043
 
1009
1044
  def save(self, output: str, format: Optional[str] = None) -> "AudioFile":
1010
1045
  """
@@ -1022,9 +1057,9 @@ class AudioFragment(DataModel):
1022
1057
  Returns:
1023
1058
  AudioFile: A Model representing the saved audio file.
1024
1059
  """
1025
- from .audio import save_audio_fragment
1060
+ from .audio import save_audio
1026
1061
 
1027
- return save_audio_fragment(self.audio, self.start, self.end, output, format)
1062
+ return save_audio(self.audio, output, format, self.start, self.end)
1028
1063
 
1029
1064
 
1030
1065
  class VideoFrame(DataModel):
datachain/lib/hf.py CHANGED
@@ -69,21 +69,25 @@ class HFGenerator(Generator):
69
69
  self,
70
70
  ds: Union[str, HFDatasetType],
71
71
  output_schema: type["BaseModel"],
72
+ limit: int = 0,
72
73
  *args,
73
74
  **kwargs,
74
75
  ):
75
76
  """
76
- Generator for chain from huggingface datasets.
77
+ Generator for chain from Hugging Face datasets.
77
78
 
78
79
  Parameters:
79
80
 
80
- ds : Path or name of the dataset to read from Hugging Face Hub,
81
- or an instance of `datasets.Dataset`-like object.
82
- output_schema : Pydantic model for validation.
81
+ ds : Path or name of the dataset to read from Hugging Face Hub,
82
+ or an instance of `datasets.Dataset`-like object.
83
+ limit : Limit the number of items to read from the HF dataset.
84
+ Defaults to 0 (no limit).
85
+ output_schema : Pydantic model for validation.
83
86
  """
84
87
  super().__init__()
85
88
  self.ds = ds
86
89
  self.output_schema = output_schema
90
+ self.limit = limit
87
91
  self.args = args
88
92
  self.kwargs = kwargs
89
93
 
@@ -93,6 +97,8 @@ class HFGenerator(Generator):
93
97
  def process(self, split: str = ""):
94
98
  desc = "Parsed Hugging Face dataset"
95
99
  ds = self.ds_dict[split]
100
+ if self.limit > 0:
101
+ ds = ds.take(self.limit)
96
102
  if split:
97
103
  desc += f" split '{split}'"
98
104
  model_fields = self.output_schema._model_fields_by_aliases() # type: ignore[attr-defined]
@@ -113,7 +119,6 @@ class HFGenerator(Generator):
113
119
 
114
120
  def stream_splits(ds: Union[str, HFDatasetType], *args, **kwargs):
115
121
  if isinstance(ds, str):
116
- kwargs["streaming"] = True
117
122
  ds = load_dataset(ds, *args, **kwargs)
118
123
  if isinstance(ds, (DatasetDict, IterableDatasetDict)):
119
124
  return ds
@@ -132,7 +137,12 @@ def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
132
137
  sfeat = feat[sname]
133
138
  norm_name, info = model_fields[sname]
134
139
  sanno = info.annotation
135
- sdict[norm_name] = [convert_feature(v, sfeat, sanno) for v in val[sname]]
140
+ if isinstance(val[sname], list):
141
+ sdict[norm_name] = [
142
+ convert_feature(v, sfeat, sanno) for v in val[sname]
143
+ ]
144
+ else:
145
+ sdict[norm_name] = convert_feature(val[sname], sfeat, sanno)
136
146
  return anno(**sdict)
137
147
  if isinstance(feat, Image):
138
148
  if isinstance(val, dict):
@@ -174,7 +184,7 @@ def _feature_to_chain_type(name: str, val: Any) -> DataType: # noqa: PLR0911
174
184
  for sname, sval in val.items():
175
185
  dtype = _feature_to_chain_type(sname, sval)
176
186
  sequence_dict[sname] = dtype # type: ignore[valid-type]
177
- return dict_to_data_model(name, sequence_dict) # type: ignore[arg-type]
187
+ return dict_to_data_model(f"HFDataModel_{name}", sequence_dict) # type: ignore[arg-type]
178
188
  if isinstance(val, List):
179
189
  return list[_feature_to_chain_type(name, val.feature)] # type: ignore[arg-type,misc,return-value]
180
190
  if isinstance(val, Array2D):
datachain/lib/video.py CHANGED
@@ -205,7 +205,10 @@ def save_video_fragment(
205
205
  VideoFile: Video fragment model.
206
206
  """
207
207
  if start < 0 or end < 0 or start >= end:
208
- raise ValueError(f"Invalid time range: ({start:.3f}, {end:.3f})")
208
+ raise ValueError(
209
+ f"Can't save video fragment for '{video.path}', "
210
+ f"invalid time range: ({start:.3f}, {end:.3f})"
211
+ )
209
212
 
210
213
  if format is None:
211
214
  format = video.get_file_ext()
datachain/studio.py CHANGED
@@ -270,25 +270,18 @@ def parse_start_time(start_time_str: Optional[str]) -> Optional[str]:
270
270
  if not start_time_str:
271
271
  return None
272
272
 
273
- try:
274
- # Parse the datetime string using dateparser
275
- parsed_datetime = dateparser.parse(start_time_str)
276
-
277
- if parsed_datetime is None:
278
- raise DataChainError(
279
- f"Could not parse datetime string: '{start_time_str}'. "
280
- f"Supported formats include: '2024-01-15 14:30:00', 'tomorrow 3pm', "
281
- f"'monday 9am', '2024-01-15T14:30:00Z', 'in 2 hours', etc."
282
- )
273
+ # Parse the datetime string using dateparser
274
+ parsed_datetime = dateparser.parse(start_time_str)
283
275
 
284
- # Convert to ISO format string
285
- return parsed_datetime.isoformat()
286
- except Exception as e:
276
+ if parsed_datetime is None:
287
277
  raise DataChainError(
288
- f"Invalid datetime format for start_time: '{start_time_str}'. "
278
+ f"Could not parse datetime string: '{start_time_str}'. "
289
279
  f"Supported formats include: '2024-01-15 14:30:00', 'tomorrow 3pm', "
290
- f"'monday 9am', '2024-01-15T14:30:00Z', 'in 2 hours', etc. Error: {e}"
291
- ) from e
280
+ f"'monday 9am', '2024-01-15T14:30:00Z', 'in 2 hours', etc."
281
+ )
282
+
283
+ # Convert to ISO format string
284
+ return parsed_datetime.isoformat()
292
285
 
293
286
 
294
287
  def show_logs_from_client(client, job_id):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.26.4
3
+ Version: 0.27.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -120,7 +120,7 @@ Dynamic: license-file
120
120
  |logo| DataChain
121
121
  ================
122
122
 
123
- |PyPI| |Python Version| |Codecov| |Tests|
123
+ |PyPI| |Python Version| |Codecov| |Tests| |DeepWiki|
124
124
 
125
125
  .. |logo| image:: docs/assets/datachain.svg
126
126
  :height: 24
@@ -136,6 +136,9 @@ Dynamic: license-file
136
136
  .. |Tests| image:: https://github.com/iterative/datachain/actions/workflows/tests.yml/badge.svg
137
137
  :target: https://github.com/iterative/datachain/actions/workflows/tests.yml
138
138
  :alt: Tests
139
+ .. |DeepWiki| image:: https://deepwiki.com/badge.svg
140
+ :target: https://deepwiki.com/iterative/datachain
141
+ :alt: DeepWiki
139
142
 
140
143
  DataChain is a Python-based AI-data warehouse for transforming and analyzing unstructured
141
144
  data like images, audio, videos, text and PDFs. It integrates with external storage
@@ -17,7 +17,7 @@ datachain/project.py,sha256=90D4GpJSA3t0fayYZbzrL3sk4U7EJhQo8psnWvdI7_o,2280
17
17
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
19
19
  datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
20
- datachain/studio.py,sha256=w5RyntqSl6qOs2mbw4Dc7SpZNNEN97xpvjxfJL0rO7M,14850
20
+ datachain/studio.py,sha256=RCpVZdHRX-ClEddXaAsZDGFy5o-SOqVCa5NhLj8337s,14486
21
21
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
22
22
  datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
23
23
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
@@ -35,7 +35,7 @@ datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibV
35
35
  datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
36
36
  datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
37
37
  datachain/cli/parser/__init__.py,sha256=NPB6ssP4CCt7G1SWZ_8oNQEH2C1lktWgkyHYXDQJZNc,15073
38
- datachain/cli/parser/job.py,sha256=2_g46bx_p7DnqZoYsXY2rHlB07BjBCuRPzpGP-Duk-s,5804
38
+ datachain/cli/parser/job.py,sha256=iytBZaCcQUhaOcRlYZFeAJsscN2T2XcEY7MibTeuZhg,5786
39
39
  datachain/cli/parser/studio.py,sha256=Bo__LKM7qhJGgkyX8M_bCvgZ2Gvqq6r_X4t1NdtaBIY,3881
40
40
  datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
41
41
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
@@ -70,13 +70,13 @@ datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
70
70
  datachain/func/string.py,sha256=X9u4ip97U63RCaKRhMddoze7HgPiY3LbPRn9G06UWWo,7311
71
71
  datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
72
72
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
- datachain/lib/arrow.py,sha256=gMgmiMOhTGFMSyWBbjyzF2RsSXjx0XmUGPoSBxcWwe0,10756
74
- datachain/lib/audio.py,sha256=J7XJ14ItPF9y6pN-tmMV9In9X9rgwlBwzyzdGOUkPGk,4376
73
+ datachain/lib/arrow.py,sha256=geoLvyDd5uMqS3D9Ec1ODlShCUAdtwHUwl8FqbUX_hg,10776
74
+ datachain/lib/audio.py,sha256=fQmIBq-9hrUZtkgeJdPHYA_D8Wfe9D4cQZk4_ijxpNc,7580
75
75
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
76
- datachain/lib/data_model.py,sha256=JPHPO6z-pehyiY-qNBAnp8u015xUHrijPKbGkMHS6lo,3493
76
+ datachain/lib/data_model.py,sha256=Rjah76GHwIV6AZQk4rsdg6JLre5D8Kb9T4PS5SXzsPA,3740
77
77
  datachain/lib/dataset_info.py,sha256=7w-DoKOyIVoOtWGCgciMLcP5CiAWJB3rVI-vUDF80k0,3311
78
- datachain/lib/file.py,sha256=vlSFsmj0ltvQWG6_isfWwNZt5u002bwrl70J2KbdvDE,41335
79
- datachain/lib/hf.py,sha256=dadHs2dsi4ALwXz92Y3T7AUgq3wQF4mBydWqHCMjvks,6880
78
+ datachain/lib/file.py,sha256=_ch7xYcpl0kzImgEwccbQ-a5qb9rbEvx1vcuWerOn9k,42608
79
+ datachain/lib/hf.py,sha256=3xdvPQPilnJiGv3H4S4bTGqvrGGlZgZmqjE1n_SMJZg,7293
80
80
  datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
81
81
  datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
82
82
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
@@ -92,7 +92,7 @@ datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
92
92
  datachain/lib/udf.py,sha256=SUnJWRDC3TlLhvpi8iqqJbeZGn5DChot7DyH-0Q-z20,17305
93
93
  datachain/lib/udf_signature.py,sha256=Yz20iJ-WF1pijT3hvcDIKFzgWV9gFxZM73KZRx3NbPk,7560
94
94
  datachain/lib/utils.py,sha256=rG2y7NwTqZOuomZZRmrA-Q-ANM_j1cToQYqDJoOeGyU,1480
95
- datachain/lib/video.py,sha256=u6fLJWj5G6QqsVkpfHnKGklBNpG3BRRg6v3izngnNcU,6767
95
+ datachain/lib/video.py,sha256=ddVstiMkfxyBPDsnjCKY0d_93bw-DcMqGqN60yzsZoo,6851
96
96
  datachain/lib/webdataset.py,sha256=CkW8FfGigNx6wo2EEK4KMjhEE8FamRHWGs2HZuH7jDY,7214
97
97
  datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
98
98
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -106,7 +106,7 @@ datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
106
106
  datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
107
107
  datachain/lib/dc/datachain.py,sha256=mLE5v4KhzEQm7HVWBTxY6EwJ2J-YeFVcLUY4I21216c,93212
108
108
  datachain/lib/dc/datasets.py,sha256=P6CIJizD2IYFwOQG5D3VbQRjDmUiRH0ysdtb551Xdm8,15098
109
- datachain/lib/dc/hf.py,sha256=MJWO-NL4jAD6CEAmXsyeqXEyvefRLMhyxhT9jKT5vMU,2324
109
+ datachain/lib/dc/hf.py,sha256=AP_MUHg6HJWae10PN9hD_beQVjrl0cleZ6Cvhtl1yoI,2901
110
110
  datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
111
111
  datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
112
112
  datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
@@ -158,9 +158,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
158
158
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
159
159
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
160
160
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
161
- datachain-0.26.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
162
- datachain-0.26.4.dist-info/METADATA,sha256=oWaaj_Avr95dDdM_txeheiOefsoHuXTu0QR71hTN634,13624
163
- datachain-0.26.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
164
- datachain-0.26.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
165
- datachain-0.26.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
166
- datachain-0.26.4.dist-info/RECORD,,
161
+ datachain-0.27.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
162
+ datachain-0.27.0.dist-info/METADATA,sha256=PWZ_EWTpk1OvWlQZe__5SCjFem6BD1AtYmTxJ5wV3iY,13759
163
+ datachain-0.27.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
164
+ datachain-0.27.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
165
+ datachain-0.27.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
166
+ datachain-0.27.0.dist-info/RECORD,,