datachain 0.26.3__py3-none-any.whl → 0.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cli/parser/job.py +14 -1
- datachain/lib/arrow.py +1 -1
- datachain/lib/audio.py +123 -30
- datachain/lib/data_model.py +9 -1
- datachain/lib/dc/datachain.py +8 -4
- datachain/lib/dc/hf.py +20 -4
- datachain/lib/dc/storage.py +3 -3
- datachain/lib/file.py +60 -8
- datachain/lib/hf.py +17 -7
- datachain/lib/video.py +4 -1
- datachain/remote/studio.py +4 -0
- datachain/studio.py +36 -0
- {datachain-0.26.3.dist-info → datachain-0.27.0.dist-info}/METADATA +7 -2
- {datachain-0.26.3.dist-info → datachain-0.27.0.dist-info}/RECORD +18 -18
- {datachain-0.26.3.dist-info → datachain-0.27.0.dist-info}/WHEEL +0 -0
- {datachain-0.26.3.dist-info → datachain-0.27.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.26.3.dist-info → datachain-0.27.0.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.26.3.dist-info → datachain-0.27.0.dist-info}/top_level.txt +0 -0
datachain/cli/parser/job.py
CHANGED
|
@@ -17,7 +17,12 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
17
17
|
)
|
|
18
18
|
|
|
19
19
|
studio_run_help = "Run a job in Studio"
|
|
20
|
-
studio_run_description = "Run a job in Studio."
|
|
20
|
+
studio_run_description = "Run a job in Studio. \n"
|
|
21
|
+
studio_run_description += (
|
|
22
|
+
"When using --start-time or --cron,"
|
|
23
|
+
" the job is scheduled to run but won't start immediately"
|
|
24
|
+
" (can be seen in the Tasks tab in UI)"
|
|
25
|
+
)
|
|
21
26
|
|
|
22
27
|
studio_run_parser = jobs_subparser.add_parser(
|
|
23
28
|
"run",
|
|
@@ -96,6 +101,14 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
96
101
|
help="Priority for the job in range 0-5. "
|
|
97
102
|
"Lower value is higher priority (default: 5)",
|
|
98
103
|
)
|
|
104
|
+
studio_run_parser.add_argument(
|
|
105
|
+
"--start-time",
|
|
106
|
+
action="store",
|
|
107
|
+
help="Time to schedule a task in YYYY-MM-DDTHH:mm format or natural language.",
|
|
108
|
+
)
|
|
109
|
+
studio_run_parser.add_argument(
|
|
110
|
+
"--cron", action="store", help="Cron expression for the cron task."
|
|
111
|
+
)
|
|
99
112
|
|
|
100
113
|
studio_ls_help = "List jobs in Studio"
|
|
101
114
|
studio_ls_description = "List jobs in Studio."
|
datachain/lib/arrow.py
CHANGED
|
@@ -245,7 +245,7 @@ def arrow_type_mapper(col_type: pa.DataType, column: str = "") -> type: # noqa:
|
|
|
245
245
|
if field.nullable and not ModelStore.is_pydantic(dtype):
|
|
246
246
|
dtype = Optional[dtype] # type: ignore[assignment]
|
|
247
247
|
type_dict[field.name] = dtype
|
|
248
|
-
return dict_to_data_model(column, type_dict)
|
|
248
|
+
return dict_to_data_model(f"ArrowDataModel_{column}", type_dict)
|
|
249
249
|
if pa.types.is_map(col_type):
|
|
250
250
|
return dict
|
|
251
251
|
if isinstance(col_type, pa.lib.DictionaryType):
|
datachain/lib/audio.py
CHANGED
|
@@ -33,10 +33,14 @@ def audio_info(file: "Union[File, AudioFile]") -> "Audio":
|
|
|
33
33
|
frames = int(info.num_frames)
|
|
34
34
|
duration = float(frames / sample_rate) if sample_rate > 0 else 0.0
|
|
35
35
|
|
|
36
|
-
# Get format information
|
|
37
|
-
format_name = getattr(info, "format", "")
|
|
38
36
|
codec_name = getattr(info, "encoding", "")
|
|
39
|
-
|
|
37
|
+
file_ext = file.get_file_ext().lower()
|
|
38
|
+
format_name = _encoding_to_format(codec_name, file_ext)
|
|
39
|
+
|
|
40
|
+
bits_per_sample = getattr(info, "bits_per_sample", 0)
|
|
41
|
+
bit_rate = (
|
|
42
|
+
bits_per_sample * sample_rate * channels if bits_per_sample > 0 else -1
|
|
43
|
+
)
|
|
40
44
|
|
|
41
45
|
except Exception as exc:
|
|
42
46
|
raise FileError(
|
|
@@ -54,7 +58,47 @@ def audio_info(file: "Union[File, AudioFile]") -> "Audio":
|
|
|
54
58
|
)
|
|
55
59
|
|
|
56
60
|
|
|
57
|
-
def
|
|
61
|
+
def _encoding_to_format(encoding: str, file_ext: str) -> str:
|
|
62
|
+
"""
|
|
63
|
+
Map torchaudio encoding to a format name.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
encoding: The encoding string from torchaudio.info()
|
|
67
|
+
file_ext: The file extension as a fallback
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Format name as a string
|
|
71
|
+
"""
|
|
72
|
+
# Direct mapping for formats that match exactly
|
|
73
|
+
encoding_map = {
|
|
74
|
+
"FLAC": "flac",
|
|
75
|
+
"MP3": "mp3",
|
|
76
|
+
"VORBIS": "ogg",
|
|
77
|
+
"AMR_WB": "amr",
|
|
78
|
+
"AMR_NB": "amr",
|
|
79
|
+
"OPUS": "opus",
|
|
80
|
+
"GSM": "gsm",
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if encoding in encoding_map:
|
|
84
|
+
return encoding_map[encoding]
|
|
85
|
+
|
|
86
|
+
# For PCM variants, use file extension to determine format
|
|
87
|
+
if encoding.startswith("PCM_"):
|
|
88
|
+
# Common PCM formats by extension
|
|
89
|
+
pcm_formats = {
|
|
90
|
+
"wav": "wav",
|
|
91
|
+
"aiff": "aiff",
|
|
92
|
+
"au": "au",
|
|
93
|
+
"raw": "raw",
|
|
94
|
+
}
|
|
95
|
+
return pcm_formats.get(file_ext, "wav") # Default to wav for PCM
|
|
96
|
+
|
|
97
|
+
# Fallback to file extension if encoding is unknown
|
|
98
|
+
return file_ext if file_ext else "unknown"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def audio_to_np(
|
|
58
102
|
audio: "AudioFile", start: float = 0, duration: Optional[float] = None
|
|
59
103
|
) -> "tuple[ndarray, int]":
|
|
60
104
|
"""Load audio fragment as numpy array.
|
|
@@ -98,14 +142,17 @@ def audio_fragment_np(
|
|
|
98
142
|
) from exc
|
|
99
143
|
|
|
100
144
|
|
|
101
|
-
def
|
|
145
|
+
def audio_to_bytes(
|
|
102
146
|
audio: "AudioFile",
|
|
147
|
+
format: str = "wav",
|
|
103
148
|
start: float = 0,
|
|
104
149
|
duration: Optional[float] = None,
|
|
105
|
-
format: str = "wav",
|
|
106
150
|
) -> bytes:
|
|
107
|
-
"""Convert audio
|
|
108
|
-
|
|
151
|
+
"""Convert audio to bytes using soundfile.
|
|
152
|
+
|
|
153
|
+
If duration is None, converts from start to end of file.
|
|
154
|
+
If start is 0 and duration is None, converts entire file."""
|
|
155
|
+
y, sr = audio_to_np(audio, start, duration)
|
|
109
156
|
|
|
110
157
|
import io
|
|
111
158
|
|
|
@@ -116,36 +163,82 @@ def audio_fragment_bytes(
|
|
|
116
163
|
return buffer.getvalue()
|
|
117
164
|
|
|
118
165
|
|
|
119
|
-
def
|
|
166
|
+
def save_audio(
|
|
120
167
|
audio: "AudioFile",
|
|
121
|
-
start: float,
|
|
122
|
-
end: float,
|
|
123
168
|
output: str,
|
|
124
169
|
format: Optional[str] = None,
|
|
170
|
+
start: float = 0,
|
|
171
|
+
end: Optional[float] = None,
|
|
125
172
|
) -> "AudioFile":
|
|
126
|
-
"""Save audio fragment
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
173
|
+
"""Save audio file or extract fragment to specified format.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
audio: Source AudioFile object
|
|
177
|
+
output: Output directory path
|
|
178
|
+
format: Output format ('wav', 'mp3', etc). Defaults to source format
|
|
179
|
+
start: Start time in seconds (>= 0). Defaults to 0
|
|
180
|
+
end: End time in seconds. If None, extracts to end of file
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
AudioFile: New audio file with format conversion/extraction applied
|
|
184
|
+
|
|
185
|
+
Examples:
|
|
186
|
+
save_audio(audio, "/path", "mp3") # Entire file to MP3
|
|
187
|
+
save_audio(audio, "s3://bucket/path", "wav", start=2.5) # From 2.5s to end
|
|
188
|
+
save_audio(audio, "/path", "flac", start=1, end=3) # Extract 1-3s fragment
|
|
189
|
+
"""
|
|
131
190
|
if format is None:
|
|
132
191
|
format = audio.get_file_ext()
|
|
133
192
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
193
|
+
# Validate start time
|
|
194
|
+
if start < 0:
|
|
195
|
+
raise ValueError(
|
|
196
|
+
f"Can't save audio for '{audio.path}', "
|
|
197
|
+
f"start time must be non-negative: {start:.3f}"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Handle full file conversion when end is None and start is 0
|
|
201
|
+
if end is None and start == 0:
|
|
202
|
+
output_file = posixpath.join(output, f"{audio.get_file_stem()}.{format}")
|
|
203
|
+
try:
|
|
204
|
+
audio_bytes = audio_to_bytes(audio, format, start=0, duration=None)
|
|
205
|
+
except Exception as exc:
|
|
206
|
+
raise FileError(
|
|
207
|
+
"unable to convert audio file", audio.source, audio.path
|
|
208
|
+
) from exc
|
|
209
|
+
elif end is None:
|
|
210
|
+
# Extract from start to end of file
|
|
211
|
+
output_file = posixpath.join(
|
|
212
|
+
output, f"{audio.get_file_stem()}_{int(start * 1000):06d}_end.{format}"
|
|
213
|
+
)
|
|
214
|
+
try:
|
|
215
|
+
audio_bytes = audio_to_bytes(audio, format, start=start, duration=None)
|
|
216
|
+
except Exception as exc:
|
|
217
|
+
raise FileError(
|
|
218
|
+
"unable to save audio fragment", audio.source, audio.path
|
|
219
|
+
) from exc
|
|
220
|
+
else:
|
|
221
|
+
# Fragment extraction mode with specific end time
|
|
222
|
+
if end < 0 or start >= end:
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"Can't save audio for '{audio.path}', "
|
|
225
|
+
f"invalid time range: ({start:.3f}, {end:.3f})"
|
|
226
|
+
)
|
|
140
227
|
|
|
141
|
-
|
|
142
|
-
|
|
228
|
+
duration = end - start
|
|
229
|
+
start_ms = int(start * 1000)
|
|
230
|
+
end_ms = int(end * 1000)
|
|
231
|
+
output_file = posixpath.join(
|
|
232
|
+
output, f"{audio.get_file_stem()}_{start_ms:06d}_{end_ms:06d}.{format}"
|
|
233
|
+
)
|
|
143
234
|
|
|
144
|
-
|
|
235
|
+
try:
|
|
236
|
+
audio_bytes = audio_to_bytes(audio, format, start, duration)
|
|
237
|
+
except Exception as exc:
|
|
238
|
+
raise FileError(
|
|
239
|
+
"unable to save audio fragment", audio.source, audio.path
|
|
240
|
+
) from exc
|
|
145
241
|
|
|
146
|
-
|
|
242
|
+
from datachain.lib.file import AudioFile
|
|
147
243
|
|
|
148
|
-
|
|
149
|
-
raise FileError(
|
|
150
|
-
"unable to save audio fragment", audio.source, audio.path
|
|
151
|
-
) from exc
|
|
244
|
+
return AudioFile.upload(audio_bytes, output_file, catalog=audio._catalog)
|
datachain/lib/data_model.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import uuid
|
|
1
3
|
from collections.abc import Sequence
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from typing import ClassVar, Optional, Union, get_args, get_origin
|
|
@@ -80,7 +82,9 @@ def dict_to_data_model(
|
|
|
80
82
|
|
|
81
83
|
fields = {
|
|
82
84
|
name: (
|
|
83
|
-
anno
|
|
85
|
+
anno
|
|
86
|
+
if inspect.isclass(anno) and issubclass(anno, BaseModel)
|
|
87
|
+
else Optional[anno],
|
|
84
88
|
Field(
|
|
85
89
|
validation_alias=AliasChoices(name, original_names[idx] or name),
|
|
86
90
|
default=None,
|
|
@@ -101,6 +105,10 @@ def dict_to_data_model(
|
|
|
101
105
|
field_info[str(alias)] = (_name, field)
|
|
102
106
|
return field_info
|
|
103
107
|
|
|
108
|
+
# Generate random unique name if not provided
|
|
109
|
+
if not name:
|
|
110
|
+
name = f"DataModel_{uuid.uuid4().hex[:8]}"
|
|
111
|
+
|
|
104
112
|
return create_model(
|
|
105
113
|
name,
|
|
106
114
|
__base__=_DataModelStrict,
|
datachain/lib/dc/datachain.py
CHANGED
|
@@ -2388,7 +2388,7 @@ class DataChain:
|
|
|
2388
2388
|
placement: FileExportPlacement = "fullpath",
|
|
2389
2389
|
link_type: Literal["copy", "symlink"] = "copy",
|
|
2390
2390
|
num_threads: Optional[int] = EXPORT_FILES_MAX_THREADS,
|
|
2391
|
-
anon: bool =
|
|
2391
|
+
anon: Optional[bool] = None,
|
|
2392
2392
|
client_config: Optional[dict] = None,
|
|
2393
2393
|
) -> None:
|
|
2394
2394
|
"""Export files from a specified signal to a directory. Files can be
|
|
@@ -2403,7 +2403,11 @@ class DataChain:
|
|
|
2403
2403
|
Falls back to `'copy'` if symlinking fails.
|
|
2404
2404
|
num_threads : number of threads to use for exporting files.
|
|
2405
2405
|
By default it uses 5 threads.
|
|
2406
|
-
anon: If
|
|
2406
|
+
anon: If True, we will treat cloud bucket as public one. Default behavior
|
|
2407
|
+
depends on the previous session configuration (e.g. happens in the
|
|
2408
|
+
initial `read_storage`) and particular cloud storage client
|
|
2409
|
+
implementation (e.g. S3 fallbacks to anonymous access if no credentials
|
|
2410
|
+
were found).
|
|
2407
2411
|
client_config: Optional configuration for the destination storage client
|
|
2408
2412
|
|
|
2409
2413
|
Example:
|
|
@@ -2421,8 +2425,8 @@ class DataChain:
|
|
|
2421
2425
|
):
|
|
2422
2426
|
raise ValueError("Files with the same name found")
|
|
2423
2427
|
|
|
2424
|
-
if anon:
|
|
2425
|
-
client_config = (client_config or {}) | {"anon":
|
|
2428
|
+
if anon is not None:
|
|
2429
|
+
client_config = (client_config or {}) | {"anon": anon}
|
|
2426
2430
|
|
|
2427
2431
|
progress_bar = tqdm(
|
|
2428
2432
|
desc=f"Exporting files to {output}: ",
|
datachain/lib/dc/hf.py
CHANGED
|
@@ -25,19 +25,23 @@ def read_hf(
|
|
|
25
25
|
settings: Optional[dict] = None,
|
|
26
26
|
column: str = "",
|
|
27
27
|
model_name: str = "",
|
|
28
|
+
limit: int = 0,
|
|
28
29
|
**kwargs,
|
|
29
30
|
) -> "DataChain":
|
|
30
|
-
"""Generate chain from
|
|
31
|
+
"""Generate chain from Hugging Face Hub dataset.
|
|
31
32
|
|
|
32
33
|
Parameters:
|
|
33
34
|
dataset : Path or name of the dataset to read from Hugging Face Hub,
|
|
34
35
|
or an instance of `datasets.Dataset`-like object.
|
|
35
|
-
args : Additional positional arguments to pass to datasets.load_dataset
|
|
36
|
+
args : Additional positional arguments to pass to `datasets.load_dataset`.
|
|
36
37
|
session : Session to use for the chain.
|
|
37
38
|
settings : Settings to use for the chain.
|
|
38
39
|
column : Generated object column name.
|
|
39
40
|
model_name : Generated model name.
|
|
40
|
-
|
|
41
|
+
limit : Limit the number of items to read from the HF dataset.
|
|
42
|
+
Adds `take(limit)` to the `datasets.load_dataset`.
|
|
43
|
+
Defaults to 0 (no limit).
|
|
44
|
+
kwargs : Parameters to pass to `datasets.load_dataset`.
|
|
41
45
|
|
|
42
46
|
Example:
|
|
43
47
|
Load from Hugging Face Hub:
|
|
@@ -53,6 +57,18 @@ def read_hf(
|
|
|
53
57
|
import datachain as dc
|
|
54
58
|
chain = dc.read_hf(ds)
|
|
55
59
|
```
|
|
60
|
+
|
|
61
|
+
Streaming with limit, for large datasets:
|
|
62
|
+
```py
|
|
63
|
+
import datachain as dc
|
|
64
|
+
ds = dc.read_hf("beans", split="train", streaming=True, limit=10)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
or use HF split syntax (not supported if streaming is enabled):
|
|
68
|
+
```py
|
|
69
|
+
import datachain as dc
|
|
70
|
+
ds = dc.read_hf("beans", split="train[%10]")
|
|
71
|
+
```
|
|
56
72
|
"""
|
|
57
73
|
from datachain.lib.hf import HFGenerator, get_output_schema, stream_splits
|
|
58
74
|
|
|
@@ -72,4 +88,4 @@ def read_hf(
|
|
|
72
88
|
output = {column: model}
|
|
73
89
|
|
|
74
90
|
chain = read_values(split=list(ds_dict.keys()), session=session, settings=settings)
|
|
75
|
-
return chain.gen(HFGenerator(dataset, model, *args, **kwargs), output=output)
|
|
91
|
+
return chain.gen(HFGenerator(dataset, model, limit, *args, **kwargs), output=output)
|
datachain/lib/dc/storage.py
CHANGED
|
@@ -33,7 +33,7 @@ def read_storage(
|
|
|
33
33
|
recursive: Optional[bool] = True,
|
|
34
34
|
column: str = "file",
|
|
35
35
|
update: bool = False,
|
|
36
|
-
anon: bool =
|
|
36
|
+
anon: Optional[bool] = None,
|
|
37
37
|
delta: Optional[bool] = False,
|
|
38
38
|
delta_on: Optional[Union[str, Sequence[str]]] = (
|
|
39
39
|
"file.path",
|
|
@@ -124,8 +124,8 @@ def read_storage(
|
|
|
124
124
|
|
|
125
125
|
file_type = get_file_type(type)
|
|
126
126
|
|
|
127
|
-
if anon:
|
|
128
|
-
client_config = (client_config or {}) | {"anon":
|
|
127
|
+
if anon is not None:
|
|
128
|
+
client_config = (client_config or {}) | {"anon": anon}
|
|
129
129
|
session = Session.get(session, client_config=client_config, in_memory=in_memory)
|
|
130
130
|
catalog = session.catalog
|
|
131
131
|
cache = catalog.cache
|
datachain/lib/file.py
CHANGED
|
@@ -717,6 +717,23 @@ class ImageFile(File):
|
|
|
717
717
|
destination = stringify_path(destination)
|
|
718
718
|
|
|
719
719
|
client: Client = self._catalog.get_client(destination, **(client_config or {}))
|
|
720
|
+
|
|
721
|
+
# If format is not provided, determine it from the file extension
|
|
722
|
+
if format is None:
|
|
723
|
+
from pathlib import PurePosixPath
|
|
724
|
+
|
|
725
|
+
from PIL import Image as PilImage
|
|
726
|
+
|
|
727
|
+
ext = PurePosixPath(destination).suffix.lower()
|
|
728
|
+
format = PilImage.registered_extensions().get(ext)
|
|
729
|
+
|
|
730
|
+
if not format:
|
|
731
|
+
raise FileError(
|
|
732
|
+
f"Can't determine format for destination '{destination}'",
|
|
733
|
+
self.source,
|
|
734
|
+
self.path,
|
|
735
|
+
)
|
|
736
|
+
|
|
720
737
|
with client.fs.open(destination, mode="wb") as f:
|
|
721
738
|
self.read().save(f, format=format)
|
|
722
739
|
|
|
@@ -815,7 +832,10 @@ class VideoFile(File):
|
|
|
815
832
|
VideoFragment: A Model representing the video fragment.
|
|
816
833
|
"""
|
|
817
834
|
if start < 0 or end < 0 or start >= end:
|
|
818
|
-
raise ValueError(
|
|
835
|
+
raise ValueError(
|
|
836
|
+
f"Can't get video fragment for '{self.path}', "
|
|
837
|
+
f"invalid time range: ({start:.3f}, {end:.3f})"
|
|
838
|
+
)
|
|
819
839
|
|
|
820
840
|
return VideoFragment(video=self, start=start, end=end)
|
|
821
841
|
|
|
@@ -898,7 +918,10 @@ class AudioFile(File):
|
|
|
898
918
|
AudioFragment: A Model representing the audio fragment.
|
|
899
919
|
"""
|
|
900
920
|
if start < 0 or end < 0 or start >= end:
|
|
901
|
-
raise ValueError(
|
|
921
|
+
raise ValueError(
|
|
922
|
+
f"Can't get audio fragment for '{self.path}', "
|
|
923
|
+
f"invalid time range: ({start:.3f}, {end:.3f})"
|
|
924
|
+
)
|
|
902
925
|
|
|
903
926
|
return AudioFragment(audio=self, start=start, end=end)
|
|
904
927
|
|
|
@@ -941,6 +964,35 @@ class AudioFile(File):
|
|
|
941
964
|
yield self.get_fragment(start, min(start + duration, end))
|
|
942
965
|
start += duration
|
|
943
966
|
|
|
967
|
+
def save( # type: ignore[override]
|
|
968
|
+
self,
|
|
969
|
+
output: str,
|
|
970
|
+
format: Optional[str] = None,
|
|
971
|
+
start: float = 0,
|
|
972
|
+
end: Optional[float] = None,
|
|
973
|
+
client_config: Optional[dict] = None,
|
|
974
|
+
) -> "AudioFile":
|
|
975
|
+
"""Save audio file or extract fragment to specified format.
|
|
976
|
+
|
|
977
|
+
Args:
|
|
978
|
+
output: Output directory path
|
|
979
|
+
format: Output format ('wav', 'mp3', etc). Defaults to source format
|
|
980
|
+
start: Start time in seconds (>= 0). Defaults to 0
|
|
981
|
+
end: End time in seconds. If None, extracts to end of file
|
|
982
|
+
client_config: Optional client configuration
|
|
983
|
+
|
|
984
|
+
Returns:
|
|
985
|
+
AudioFile: New audio file with format conversion/extraction applied
|
|
986
|
+
|
|
987
|
+
Examples:
|
|
988
|
+
audio.save("/path", "mp3") # Entire file to MP3
|
|
989
|
+
audio.save("s3://bucket/path", "wav", start=2.5) # From 2.5s to end as WAV
|
|
990
|
+
audio.save("/path", "flac", start=1, end=3) # 1-3s fragment as FLAC
|
|
991
|
+
"""
|
|
992
|
+
from .audio import save_audio
|
|
993
|
+
|
|
994
|
+
return save_audio(self, output, format, start, end)
|
|
995
|
+
|
|
944
996
|
|
|
945
997
|
class AudioFragment(DataModel):
|
|
946
998
|
"""
|
|
@@ -968,10 +1020,10 @@ class AudioFragment(DataModel):
|
|
|
968
1020
|
tuple[ndarray, int]: A tuple containing the audio data as a NumPy array
|
|
969
1021
|
and the sample rate.
|
|
970
1022
|
"""
|
|
971
|
-
from .audio import
|
|
1023
|
+
from .audio import audio_to_np
|
|
972
1024
|
|
|
973
1025
|
duration = self.end - self.start
|
|
974
|
-
return
|
|
1026
|
+
return audio_to_np(self.audio, self.start, duration)
|
|
975
1027
|
|
|
976
1028
|
def read_bytes(self, format: str = "wav") -> bytes:
|
|
977
1029
|
"""
|
|
@@ -984,10 +1036,10 @@ class AudioFragment(DataModel):
|
|
|
984
1036
|
Returns:
|
|
985
1037
|
bytes: The encoded audio fragment as bytes.
|
|
986
1038
|
"""
|
|
987
|
-
from .audio import
|
|
1039
|
+
from .audio import audio_to_bytes
|
|
988
1040
|
|
|
989
1041
|
duration = self.end - self.start
|
|
990
|
-
return
|
|
1042
|
+
return audio_to_bytes(self.audio, format, self.start, duration)
|
|
991
1043
|
|
|
992
1044
|
def save(self, output: str, format: Optional[str] = None) -> "AudioFile":
|
|
993
1045
|
"""
|
|
@@ -1005,9 +1057,9 @@ class AudioFragment(DataModel):
|
|
|
1005
1057
|
Returns:
|
|
1006
1058
|
AudioFile: A Model representing the saved audio file.
|
|
1007
1059
|
"""
|
|
1008
|
-
from .audio import
|
|
1060
|
+
from .audio import save_audio
|
|
1009
1061
|
|
|
1010
|
-
return
|
|
1062
|
+
return save_audio(self.audio, output, format, self.start, self.end)
|
|
1011
1063
|
|
|
1012
1064
|
|
|
1013
1065
|
class VideoFrame(DataModel):
|
datachain/lib/hf.py
CHANGED
|
@@ -69,21 +69,25 @@ class HFGenerator(Generator):
|
|
|
69
69
|
self,
|
|
70
70
|
ds: Union[str, HFDatasetType],
|
|
71
71
|
output_schema: type["BaseModel"],
|
|
72
|
+
limit: int = 0,
|
|
72
73
|
*args,
|
|
73
74
|
**kwargs,
|
|
74
75
|
):
|
|
75
76
|
"""
|
|
76
|
-
Generator for chain from
|
|
77
|
+
Generator for chain from Hugging Face datasets.
|
|
77
78
|
|
|
78
79
|
Parameters:
|
|
79
80
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
81
|
+
ds : Path or name of the dataset to read from Hugging Face Hub,
|
|
82
|
+
or an instance of `datasets.Dataset`-like object.
|
|
83
|
+
limit : Limit the number of items to read from the HF dataset.
|
|
84
|
+
Defaults to 0 (no limit).
|
|
85
|
+
output_schema : Pydantic model for validation.
|
|
83
86
|
"""
|
|
84
87
|
super().__init__()
|
|
85
88
|
self.ds = ds
|
|
86
89
|
self.output_schema = output_schema
|
|
90
|
+
self.limit = limit
|
|
87
91
|
self.args = args
|
|
88
92
|
self.kwargs = kwargs
|
|
89
93
|
|
|
@@ -93,6 +97,8 @@ class HFGenerator(Generator):
|
|
|
93
97
|
def process(self, split: str = ""):
|
|
94
98
|
desc = "Parsed Hugging Face dataset"
|
|
95
99
|
ds = self.ds_dict[split]
|
|
100
|
+
if self.limit > 0:
|
|
101
|
+
ds = ds.take(self.limit)
|
|
96
102
|
if split:
|
|
97
103
|
desc += f" split '{split}'"
|
|
98
104
|
model_fields = self.output_schema._model_fields_by_aliases() # type: ignore[attr-defined]
|
|
@@ -113,7 +119,6 @@ class HFGenerator(Generator):
|
|
|
113
119
|
|
|
114
120
|
def stream_splits(ds: Union[str, HFDatasetType], *args, **kwargs):
|
|
115
121
|
if isinstance(ds, str):
|
|
116
|
-
kwargs["streaming"] = True
|
|
117
122
|
ds = load_dataset(ds, *args, **kwargs)
|
|
118
123
|
if isinstance(ds, (DatasetDict, IterableDatasetDict)):
|
|
119
124
|
return ds
|
|
@@ -132,7 +137,12 @@ def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
|
|
|
132
137
|
sfeat = feat[sname]
|
|
133
138
|
norm_name, info = model_fields[sname]
|
|
134
139
|
sanno = info.annotation
|
|
135
|
-
|
|
140
|
+
if isinstance(val[sname], list):
|
|
141
|
+
sdict[norm_name] = [
|
|
142
|
+
convert_feature(v, sfeat, sanno) for v in val[sname]
|
|
143
|
+
]
|
|
144
|
+
else:
|
|
145
|
+
sdict[norm_name] = convert_feature(val[sname], sfeat, sanno)
|
|
136
146
|
return anno(**sdict)
|
|
137
147
|
if isinstance(feat, Image):
|
|
138
148
|
if isinstance(val, dict):
|
|
@@ -174,7 +184,7 @@ def _feature_to_chain_type(name: str, val: Any) -> DataType: # noqa: PLR0911
|
|
|
174
184
|
for sname, sval in val.items():
|
|
175
185
|
dtype = _feature_to_chain_type(sname, sval)
|
|
176
186
|
sequence_dict[sname] = dtype # type: ignore[valid-type]
|
|
177
|
-
return dict_to_data_model(name, sequence_dict) # type: ignore[arg-type]
|
|
187
|
+
return dict_to_data_model(f"HFDataModel_{name}", sequence_dict) # type: ignore[arg-type]
|
|
178
188
|
if isinstance(val, List):
|
|
179
189
|
return list[_feature_to_chain_type(name, val.feature)] # type: ignore[arg-type,misc,return-value]
|
|
180
190
|
if isinstance(val, Array2D):
|
datachain/lib/video.py
CHANGED
|
@@ -205,7 +205,10 @@ def save_video_fragment(
|
|
|
205
205
|
VideoFile: Video fragment model.
|
|
206
206
|
"""
|
|
207
207
|
if start < 0 or end < 0 or start >= end:
|
|
208
|
-
raise ValueError(
|
|
208
|
+
raise ValueError(
|
|
209
|
+
f"Can't save video fragment for '{video.path}', "
|
|
210
|
+
f"invalid time range: ({start:.3f}, {end:.3f})"
|
|
211
|
+
)
|
|
209
212
|
|
|
210
213
|
if format is None:
|
|
211
214
|
format = video.get_file_ext()
|
datachain/remote/studio.py
CHANGED
|
@@ -429,6 +429,8 @@ class StudioClient:
|
|
|
429
429
|
repository: Optional[str] = None,
|
|
430
430
|
priority: Optional[int] = None,
|
|
431
431
|
cluster: Optional[str] = None,
|
|
432
|
+
start_time: Optional[str] = None,
|
|
433
|
+
cron: Optional[str] = None,
|
|
432
434
|
) -> Response[JobData]:
|
|
433
435
|
data = {
|
|
434
436
|
"query": query,
|
|
@@ -442,6 +444,8 @@ class StudioClient:
|
|
|
442
444
|
"repository": repository,
|
|
443
445
|
"priority": priority,
|
|
444
446
|
"compute_cluster_name": cluster,
|
|
447
|
+
"start_after": start_time,
|
|
448
|
+
"cron_expression": cron,
|
|
445
449
|
}
|
|
446
450
|
return self._send_request("datachain/job", data)
|
|
447
451
|
|
datachain/studio.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import os
|
|
3
3
|
import sys
|
|
4
|
+
from datetime import datetime, timezone
|
|
4
5
|
from typing import TYPE_CHECKING, Optional
|
|
5
6
|
|
|
7
|
+
import dateparser
|
|
6
8
|
import tabulate
|
|
7
9
|
|
|
8
10
|
from datachain.config import Config, ConfigLevel
|
|
@@ -42,6 +44,8 @@ def process_jobs_args(args: "Namespace"):
|
|
|
42
44
|
args.req_file,
|
|
43
45
|
args.priority,
|
|
44
46
|
args.cluster,
|
|
47
|
+
args.start_time,
|
|
48
|
+
args.cron,
|
|
45
49
|
)
|
|
46
50
|
|
|
47
51
|
if args.cmd == "cancel":
|
|
@@ -262,6 +266,24 @@ def save_config(hostname, token, level=ConfigLevel.GLOBAL):
|
|
|
262
266
|
return config.config_file()
|
|
263
267
|
|
|
264
268
|
|
|
269
|
+
def parse_start_time(start_time_str: Optional[str]) -> Optional[str]:
|
|
270
|
+
if not start_time_str:
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
# Parse the datetime string using dateparser
|
|
274
|
+
parsed_datetime = dateparser.parse(start_time_str)
|
|
275
|
+
|
|
276
|
+
if parsed_datetime is None:
|
|
277
|
+
raise DataChainError(
|
|
278
|
+
f"Could not parse datetime string: '{start_time_str}'. "
|
|
279
|
+
f"Supported formats include: '2024-01-15 14:30:00', 'tomorrow 3pm', "
|
|
280
|
+
f"'monday 9am', '2024-01-15T14:30:00Z', 'in 2 hours', etc."
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Convert to ISO format string
|
|
284
|
+
return parsed_datetime.isoformat()
|
|
285
|
+
|
|
286
|
+
|
|
265
287
|
def show_logs_from_client(client, job_id):
|
|
266
288
|
# Sync usage
|
|
267
289
|
async def _run():
|
|
@@ -310,6 +332,8 @@ def create_job(
|
|
|
310
332
|
req_file: Optional[str] = None,
|
|
311
333
|
priority: Optional[int] = None,
|
|
312
334
|
cluster: Optional[str] = None,
|
|
335
|
+
start_time: Optional[str] = None,
|
|
336
|
+
cron: Optional[str] = None,
|
|
313
337
|
):
|
|
314
338
|
query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
|
|
315
339
|
with open(query_file) as f:
|
|
@@ -328,6 +352,11 @@ def create_job(
|
|
|
328
352
|
client = StudioClient(team=team_name)
|
|
329
353
|
file_ids = upload_files(client, files) if files else []
|
|
330
354
|
|
|
355
|
+
# Parse start_time if provided
|
|
356
|
+
parsed_start_time = parse_start_time(start_time)
|
|
357
|
+
if cron and parsed_start_time is None:
|
|
358
|
+
parsed_start_time = datetime.now(timezone.utc).isoformat()
|
|
359
|
+
|
|
331
360
|
response = client.create_job(
|
|
332
361
|
query=query,
|
|
333
362
|
query_type=query_type,
|
|
@@ -340,6 +369,8 @@ def create_job(
|
|
|
340
369
|
requirements=requirements,
|
|
341
370
|
priority=priority,
|
|
342
371
|
cluster=cluster,
|
|
372
|
+
start_time=parsed_start_time,
|
|
373
|
+
cron=cron,
|
|
343
374
|
)
|
|
344
375
|
if not response.ok:
|
|
345
376
|
raise DataChainError(response.message)
|
|
@@ -348,6 +379,11 @@ def create_job(
|
|
|
348
379
|
raise DataChainError("Failed to create job")
|
|
349
380
|
|
|
350
381
|
job_id = response.data.get("job", {}).get("id")
|
|
382
|
+
|
|
383
|
+
if parsed_start_time or cron:
|
|
384
|
+
print(f"Job {job_id} is scheduled as a task in Studio.")
|
|
385
|
+
return 0
|
|
386
|
+
|
|
351
387
|
print(f"Job {job_id} created")
|
|
352
388
|
print("Open the job in Studio at", response.data.get("job", {}).get("url"))
|
|
353
389
|
print("=" * 40)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.27.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -26,6 +26,7 @@ Requires-Dist: packaging
|
|
|
26
26
|
Requires-Dist: pyarrow
|
|
27
27
|
Requires-Dist: typing-extensions
|
|
28
28
|
Requires-Dist: python-dateutil>=2
|
|
29
|
+
Requires-Dist: dateparser>=1.0.0
|
|
29
30
|
Requires-Dist: attrs>=21.3.0
|
|
30
31
|
Requires-Dist: fsspec>=2024.2.0
|
|
31
32
|
Requires-Dist: s3fs>=2024.2.0
|
|
@@ -100,6 +101,7 @@ Provides-Extra: dev
|
|
|
100
101
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
101
102
|
Requires-Dist: mypy==1.17.0; extra == "dev"
|
|
102
103
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
104
|
+
Requires-Dist: types-dateparser; extra == "dev"
|
|
103
105
|
Requires-Dist: types-pytz; extra == "dev"
|
|
104
106
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
105
107
|
Requires-Dist: types-requests; extra == "dev"
|
|
@@ -118,7 +120,7 @@ Dynamic: license-file
|
|
|
118
120
|
|logo| DataChain
|
|
119
121
|
================
|
|
120
122
|
|
|
121
|
-
|PyPI| |Python Version| |Codecov| |Tests|
|
|
123
|
+
|PyPI| |Python Version| |Codecov| |Tests| |DeepWiki|
|
|
122
124
|
|
|
123
125
|
.. |logo| image:: docs/assets/datachain.svg
|
|
124
126
|
:height: 24
|
|
@@ -134,6 +136,9 @@ Dynamic: license-file
|
|
|
134
136
|
.. |Tests| image:: https://github.com/iterative/datachain/actions/workflows/tests.yml/badge.svg
|
|
135
137
|
:target: https://github.com/iterative/datachain/actions/workflows/tests.yml
|
|
136
138
|
:alt: Tests
|
|
139
|
+
.. |DeepWiki| image:: https://deepwiki.com/badge.svg
|
|
140
|
+
:target: https://deepwiki.com/iterative/datachain
|
|
141
|
+
:alt: DeepWiki
|
|
137
142
|
|
|
138
143
|
DataChain is a Python-based AI-data warehouse for transforming and analyzing unstructured
|
|
139
144
|
data like images, audio, videos, text and PDFs. It integrates with external storage
|
|
@@ -17,7 +17,7 @@ datachain/project.py,sha256=90D4GpJSA3t0fayYZbzrL3sk4U7EJhQo8psnWvdI7_o,2280
|
|
|
17
17
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
|
|
19
19
|
datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
|
|
20
|
-
datachain/studio.py,sha256=
|
|
20
|
+
datachain/studio.py,sha256=RCpVZdHRX-ClEddXaAsZDGFy5o-SOqVCa5NhLj8337s,14486
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
22
|
datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
|
|
23
23
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
@@ -35,7 +35,7 @@ datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibV
|
|
|
35
35
|
datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
|
|
36
36
|
datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
|
|
37
37
|
datachain/cli/parser/__init__.py,sha256=NPB6ssP4CCt7G1SWZ_8oNQEH2C1lktWgkyHYXDQJZNc,15073
|
|
38
|
-
datachain/cli/parser/job.py,sha256=
|
|
38
|
+
datachain/cli/parser/job.py,sha256=iytBZaCcQUhaOcRlYZFeAJsscN2T2XcEY7MibTeuZhg,5786
|
|
39
39
|
datachain/cli/parser/studio.py,sha256=Bo__LKM7qhJGgkyX8M_bCvgZ2Gvqq6r_X4t1NdtaBIY,3881
|
|
40
40
|
datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
|
|
41
41
|
datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
|
|
@@ -70,13 +70,13 @@ datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
|
|
|
70
70
|
datachain/func/string.py,sha256=X9u4ip97U63RCaKRhMddoze7HgPiY3LbPRn9G06UWWo,7311
|
|
71
71
|
datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
|
|
72
72
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
|
-
datachain/lib/arrow.py,sha256=
|
|
74
|
-
datachain/lib/audio.py,sha256=
|
|
73
|
+
datachain/lib/arrow.py,sha256=geoLvyDd5uMqS3D9Ec1ODlShCUAdtwHUwl8FqbUX_hg,10776
|
|
74
|
+
datachain/lib/audio.py,sha256=fQmIBq-9hrUZtkgeJdPHYA_D8Wfe9D4cQZk4_ijxpNc,7580
|
|
75
75
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
76
|
-
datachain/lib/data_model.py,sha256=
|
|
76
|
+
datachain/lib/data_model.py,sha256=Rjah76GHwIV6AZQk4rsdg6JLre5D8Kb9T4PS5SXzsPA,3740
|
|
77
77
|
datachain/lib/dataset_info.py,sha256=7w-DoKOyIVoOtWGCgciMLcP5CiAWJB3rVI-vUDF80k0,3311
|
|
78
|
-
datachain/lib/file.py,sha256=
|
|
79
|
-
datachain/lib/hf.py,sha256=
|
|
78
|
+
datachain/lib/file.py,sha256=_ch7xYcpl0kzImgEwccbQ-a5qb9rbEvx1vcuWerOn9k,42608
|
|
79
|
+
datachain/lib/hf.py,sha256=3xdvPQPilnJiGv3H4S4bTGqvrGGlZgZmqjE1n_SMJZg,7293
|
|
80
80
|
datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
|
|
81
81
|
datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
|
|
82
82
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
@@ -92,7 +92,7 @@ datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
|
92
92
|
datachain/lib/udf.py,sha256=SUnJWRDC3TlLhvpi8iqqJbeZGn5DChot7DyH-0Q-z20,17305
|
|
93
93
|
datachain/lib/udf_signature.py,sha256=Yz20iJ-WF1pijT3hvcDIKFzgWV9gFxZM73KZRx3NbPk,7560
|
|
94
94
|
datachain/lib/utils.py,sha256=rG2y7NwTqZOuomZZRmrA-Q-ANM_j1cToQYqDJoOeGyU,1480
|
|
95
|
-
datachain/lib/video.py,sha256=
|
|
95
|
+
datachain/lib/video.py,sha256=ddVstiMkfxyBPDsnjCKY0d_93bw-DcMqGqN60yzsZoo,6851
|
|
96
96
|
datachain/lib/webdataset.py,sha256=CkW8FfGigNx6wo2EEK4KMjhEE8FamRHWGs2HZuH7jDY,7214
|
|
97
97
|
datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
|
|
98
98
|
datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -104,15 +104,15 @@ datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUO
|
|
|
104
104
|
datachain/lib/dc/__init__.py,sha256=TFci5HTvYGjBesNUxDAnXaX36PnzPEUSn5a6JxB9o0U,872
|
|
105
105
|
datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
|
|
106
106
|
datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
|
|
107
|
-
datachain/lib/dc/datachain.py,sha256=
|
|
107
|
+
datachain/lib/dc/datachain.py,sha256=mLE5v4KhzEQm7HVWBTxY6EwJ2J-YeFVcLUY4I21216c,93212
|
|
108
108
|
datachain/lib/dc/datasets.py,sha256=P6CIJizD2IYFwOQG5D3VbQRjDmUiRH0ysdtb551Xdm8,15098
|
|
109
|
-
datachain/lib/dc/hf.py,sha256=
|
|
109
|
+
datachain/lib/dc/hf.py,sha256=AP_MUHg6HJWae10PN9hD_beQVjrl0cleZ6Cvhtl1yoI,2901
|
|
110
110
|
datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
|
|
111
111
|
datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
|
|
112
112
|
datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
|
|
113
113
|
datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
|
|
114
114
|
datachain/lib/dc/records.py,sha256=FpPbApWopUri1gIaSMsfXN4fevja4mjmfb6Q5eiaGxI,3116
|
|
115
|
-
datachain/lib/dc/storage.py,sha256=
|
|
115
|
+
datachain/lib/dc/storage.py,sha256=FXroEdxOZfbuEBIWfWTkbGwrI0D4_mrLZSRsIQm0WFE,7693
|
|
116
116
|
datachain/lib/dc/utils.py,sha256=VawOAlJSvAtZbsMg33s5tJe21TRx1Km3QggI1nN6tnw,3984
|
|
117
117
|
datachain/lib/dc/values.py,sha256=7l1n352xWrEdql2NhBcZ3hj8xyPglWiY4qHjFPjn6iw,1428
|
|
118
118
|
datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
|
|
@@ -136,7 +136,7 @@ datachain/query/session.py,sha256=gKblltJAVQAVSTswAgWGDgGbpmFlFzFVkIQojDCjgXM,68
|
|
|
136
136
|
datachain/query/udf.py,sha256=e753bDJzTNjGFQn1WGTvOAWSwjDbrFI1-_DDWkWN2ls,1343
|
|
137
137
|
datachain/query/utils.py,sha256=HaSDNH_XGvp_NIcXjcB7j4vJRPi4_tbztDWclYelHY4,1208
|
|
138
138
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
|
-
datachain/remote/studio.py,sha256=
|
|
139
|
+
datachain/remote/studio.py,sha256=vsuqCAO65PBJKGLMxOvc3Bmieo2TJwcfc9YclxkzmFk,15350
|
|
140
140
|
datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
|
|
141
141
|
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
142
142
|
datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
|
|
@@ -158,9 +158,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
158
158
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
159
159
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
160
160
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
161
|
-
datachain-0.
|
|
162
|
-
datachain-0.
|
|
163
|
-
datachain-0.
|
|
164
|
-
datachain-0.
|
|
165
|
-
datachain-0.
|
|
166
|
-
datachain-0.
|
|
161
|
+
datachain-0.27.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
162
|
+
datachain-0.27.0.dist-info/METADATA,sha256=PWZ_EWTpk1OvWlQZe__5SCjFem6BD1AtYmTxJ5wV3iY,13759
|
|
163
|
+
datachain-0.27.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
164
|
+
datachain-0.27.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
165
|
+
datachain-0.27.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
166
|
+
datachain-0.27.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|