datachain 0.26.4__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cli/parser/job.py +8 -3
- datachain/data_storage/job.py +2 -1
- datachain/lib/arrow.py +1 -1
- datachain/lib/audio.py +123 -30
- datachain/lib/data_model.py +9 -1
- datachain/lib/dc/hf.py +20 -4
- datachain/lib/file.py +43 -8
- datachain/lib/hf.py +17 -7
- datachain/lib/video.py +4 -1
- datachain/studio.py +42 -27
- {datachain-0.26.4.dist-info → datachain-0.28.0.dist-info}/METADATA +6 -3
- {datachain-0.26.4.dist-info → datachain-0.28.0.dist-info}/RECORD +16 -16
- {datachain-0.26.4.dist-info → datachain-0.28.0.dist-info}/WHEEL +0 -0
- {datachain-0.26.4.dist-info → datachain-0.28.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.26.4.dist-info → datachain-0.28.0.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.26.4.dist-info → datachain-0.28.0.dist-info}/top_level.txt +0 -0
datachain/cli/parser/job.py
CHANGED
|
@@ -20,8 +20,8 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
20
20
|
studio_run_description = "Run a job in Studio. \n"
|
|
21
21
|
studio_run_description += (
|
|
22
22
|
"When using --start-time or --cron,"
|
|
23
|
-
" the job is scheduled
|
|
24
|
-
"
|
|
23
|
+
" the job is scheduled to run but won't start immediately"
|
|
24
|
+
" (can be seen in the Tasks tab in UI)"
|
|
25
25
|
)
|
|
26
26
|
|
|
27
27
|
studio_run_parser = jobs_subparser.add_parser(
|
|
@@ -104,11 +104,16 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
104
104
|
studio_run_parser.add_argument(
|
|
105
105
|
"--start-time",
|
|
106
106
|
action="store",
|
|
107
|
-
help="
|
|
107
|
+
help="Time to schedule a task in YYYY-MM-DDTHH:mm format or natural language.",
|
|
108
108
|
)
|
|
109
109
|
studio_run_parser.add_argument(
|
|
110
110
|
"--cron", action="store", help="Cron expression for the cron task."
|
|
111
111
|
)
|
|
112
|
+
studio_run_parser.add_argument(
|
|
113
|
+
"--no-wait",
|
|
114
|
+
action="store_true",
|
|
115
|
+
help="Do not wait for the job to finish",
|
|
116
|
+
)
|
|
112
117
|
|
|
113
118
|
studio_ls_help = "List jobs in Studio"
|
|
114
119
|
studio_ls_description = "List jobs in Studio."
|
datachain/data_storage/job.py
CHANGED
|
@@ -12,10 +12,11 @@ class JobStatus(int, Enum):
|
|
|
12
12
|
CANCELING = 7
|
|
13
13
|
CANCELED = 8
|
|
14
14
|
CANCELING_SCHEDULED = 9
|
|
15
|
+
TASK = 11
|
|
15
16
|
|
|
16
17
|
@classmethod
|
|
17
18
|
def finished(cls) -> tuple[int, ...]:
|
|
18
|
-
return cls.COMPLETE, cls.FAILED, cls.CANCELED
|
|
19
|
+
return cls.COMPLETE, cls.FAILED, cls.CANCELED, cls.TASK
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class JobQueryType(int, Enum):
|
datachain/lib/arrow.py
CHANGED
|
@@ -245,7 +245,7 @@ def arrow_type_mapper(col_type: pa.DataType, column: str = "") -> type: # noqa:
|
|
|
245
245
|
if field.nullable and not ModelStore.is_pydantic(dtype):
|
|
246
246
|
dtype = Optional[dtype] # type: ignore[assignment]
|
|
247
247
|
type_dict[field.name] = dtype
|
|
248
|
-
return dict_to_data_model(column, type_dict)
|
|
248
|
+
return dict_to_data_model(f"ArrowDataModel_{column}", type_dict)
|
|
249
249
|
if pa.types.is_map(col_type):
|
|
250
250
|
return dict
|
|
251
251
|
if isinstance(col_type, pa.lib.DictionaryType):
|
datachain/lib/audio.py
CHANGED
|
@@ -33,10 +33,14 @@ def audio_info(file: "Union[File, AudioFile]") -> "Audio":
|
|
|
33
33
|
frames = int(info.num_frames)
|
|
34
34
|
duration = float(frames / sample_rate) if sample_rate > 0 else 0.0
|
|
35
35
|
|
|
36
|
-
# Get format information
|
|
37
|
-
format_name = getattr(info, "format", "")
|
|
38
36
|
codec_name = getattr(info, "encoding", "")
|
|
39
|
-
|
|
37
|
+
file_ext = file.get_file_ext().lower()
|
|
38
|
+
format_name = _encoding_to_format(codec_name, file_ext)
|
|
39
|
+
|
|
40
|
+
bits_per_sample = getattr(info, "bits_per_sample", 0)
|
|
41
|
+
bit_rate = (
|
|
42
|
+
bits_per_sample * sample_rate * channels if bits_per_sample > 0 else -1
|
|
43
|
+
)
|
|
40
44
|
|
|
41
45
|
except Exception as exc:
|
|
42
46
|
raise FileError(
|
|
@@ -54,7 +58,47 @@ def audio_info(file: "Union[File, AudioFile]") -> "Audio":
|
|
|
54
58
|
)
|
|
55
59
|
|
|
56
60
|
|
|
57
|
-
def
|
|
61
|
+
def _encoding_to_format(encoding: str, file_ext: str) -> str:
|
|
62
|
+
"""
|
|
63
|
+
Map torchaudio encoding to a format name.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
encoding: The encoding string from torchaudio.info()
|
|
67
|
+
file_ext: The file extension as a fallback
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Format name as a string
|
|
71
|
+
"""
|
|
72
|
+
# Direct mapping for formats that match exactly
|
|
73
|
+
encoding_map = {
|
|
74
|
+
"FLAC": "flac",
|
|
75
|
+
"MP3": "mp3",
|
|
76
|
+
"VORBIS": "ogg",
|
|
77
|
+
"AMR_WB": "amr",
|
|
78
|
+
"AMR_NB": "amr",
|
|
79
|
+
"OPUS": "opus",
|
|
80
|
+
"GSM": "gsm",
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if encoding in encoding_map:
|
|
84
|
+
return encoding_map[encoding]
|
|
85
|
+
|
|
86
|
+
# For PCM variants, use file extension to determine format
|
|
87
|
+
if encoding.startswith("PCM_"):
|
|
88
|
+
# Common PCM formats by extension
|
|
89
|
+
pcm_formats = {
|
|
90
|
+
"wav": "wav",
|
|
91
|
+
"aiff": "aiff",
|
|
92
|
+
"au": "au",
|
|
93
|
+
"raw": "raw",
|
|
94
|
+
}
|
|
95
|
+
return pcm_formats.get(file_ext, "wav") # Default to wav for PCM
|
|
96
|
+
|
|
97
|
+
# Fallback to file extension if encoding is unknown
|
|
98
|
+
return file_ext if file_ext else "unknown"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def audio_to_np(
|
|
58
102
|
audio: "AudioFile", start: float = 0, duration: Optional[float] = None
|
|
59
103
|
) -> "tuple[ndarray, int]":
|
|
60
104
|
"""Load audio fragment as numpy array.
|
|
@@ -98,14 +142,17 @@ def audio_fragment_np(
|
|
|
98
142
|
) from exc
|
|
99
143
|
|
|
100
144
|
|
|
101
|
-
def
|
|
145
|
+
def audio_to_bytes(
|
|
102
146
|
audio: "AudioFile",
|
|
147
|
+
format: str = "wav",
|
|
103
148
|
start: float = 0,
|
|
104
149
|
duration: Optional[float] = None,
|
|
105
|
-
format: str = "wav",
|
|
106
150
|
) -> bytes:
|
|
107
|
-
"""Convert audio
|
|
108
|
-
|
|
151
|
+
"""Convert audio to bytes using soundfile.
|
|
152
|
+
|
|
153
|
+
If duration is None, converts from start to end of file.
|
|
154
|
+
If start is 0 and duration is None, converts entire file."""
|
|
155
|
+
y, sr = audio_to_np(audio, start, duration)
|
|
109
156
|
|
|
110
157
|
import io
|
|
111
158
|
|
|
@@ -116,36 +163,82 @@ def audio_fragment_bytes(
|
|
|
116
163
|
return buffer.getvalue()
|
|
117
164
|
|
|
118
165
|
|
|
119
|
-
def
|
|
166
|
+
def save_audio(
|
|
120
167
|
audio: "AudioFile",
|
|
121
|
-
start: float,
|
|
122
|
-
end: float,
|
|
123
168
|
output: str,
|
|
124
169
|
format: Optional[str] = None,
|
|
170
|
+
start: float = 0,
|
|
171
|
+
end: Optional[float] = None,
|
|
125
172
|
) -> "AudioFile":
|
|
126
|
-
"""Save audio fragment
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
173
|
+
"""Save audio file or extract fragment to specified format.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
audio: Source AudioFile object
|
|
177
|
+
output: Output directory path
|
|
178
|
+
format: Output format ('wav', 'mp3', etc). Defaults to source format
|
|
179
|
+
start: Start time in seconds (>= 0). Defaults to 0
|
|
180
|
+
end: End time in seconds. If None, extracts to end of file
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
AudioFile: New audio file with format conversion/extraction applied
|
|
184
|
+
|
|
185
|
+
Examples:
|
|
186
|
+
save_audio(audio, "/path", "mp3") # Entire file to MP3
|
|
187
|
+
save_audio(audio, "s3://bucket/path", "wav", start=2.5) # From 2.5s to end
|
|
188
|
+
save_audio(audio, "/path", "flac", start=1, end=3) # Extract 1-3s fragment
|
|
189
|
+
"""
|
|
131
190
|
if format is None:
|
|
132
191
|
format = audio.get_file_ext()
|
|
133
192
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
193
|
+
# Validate start time
|
|
194
|
+
if start < 0:
|
|
195
|
+
raise ValueError(
|
|
196
|
+
f"Can't save audio for '{audio.path}', "
|
|
197
|
+
f"start time must be non-negative: {start:.3f}"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Handle full file conversion when end is None and start is 0
|
|
201
|
+
if end is None and start == 0:
|
|
202
|
+
output_file = posixpath.join(output, f"{audio.get_file_stem()}.{format}")
|
|
203
|
+
try:
|
|
204
|
+
audio_bytes = audio_to_bytes(audio, format, start=0, duration=None)
|
|
205
|
+
except Exception as exc:
|
|
206
|
+
raise FileError(
|
|
207
|
+
"unable to convert audio file", audio.source, audio.path
|
|
208
|
+
) from exc
|
|
209
|
+
elif end is None:
|
|
210
|
+
# Extract from start to end of file
|
|
211
|
+
output_file = posixpath.join(
|
|
212
|
+
output, f"{audio.get_file_stem()}_{int(start * 1000):06d}_end.{format}"
|
|
213
|
+
)
|
|
214
|
+
try:
|
|
215
|
+
audio_bytes = audio_to_bytes(audio, format, start=start, duration=None)
|
|
216
|
+
except Exception as exc:
|
|
217
|
+
raise FileError(
|
|
218
|
+
"unable to save audio fragment", audio.source, audio.path
|
|
219
|
+
) from exc
|
|
220
|
+
else:
|
|
221
|
+
# Fragment extraction mode with specific end time
|
|
222
|
+
if end < 0 or start >= end:
|
|
223
|
+
raise ValueError(
|
|
224
|
+
f"Can't save audio for '{audio.path}', "
|
|
225
|
+
f"invalid time range: ({start:.3f}, {end:.3f})"
|
|
226
|
+
)
|
|
140
227
|
|
|
141
|
-
|
|
142
|
-
|
|
228
|
+
duration = end - start
|
|
229
|
+
start_ms = int(start * 1000)
|
|
230
|
+
end_ms = int(end * 1000)
|
|
231
|
+
output_file = posixpath.join(
|
|
232
|
+
output, f"{audio.get_file_stem()}_{start_ms:06d}_{end_ms:06d}.{format}"
|
|
233
|
+
)
|
|
143
234
|
|
|
144
|
-
|
|
235
|
+
try:
|
|
236
|
+
audio_bytes = audio_to_bytes(audio, format, start, duration)
|
|
237
|
+
except Exception as exc:
|
|
238
|
+
raise FileError(
|
|
239
|
+
"unable to save audio fragment", audio.source, audio.path
|
|
240
|
+
) from exc
|
|
145
241
|
|
|
146
|
-
|
|
242
|
+
from datachain.lib.file import AudioFile
|
|
147
243
|
|
|
148
|
-
|
|
149
|
-
raise FileError(
|
|
150
|
-
"unable to save audio fragment", audio.source, audio.path
|
|
151
|
-
) from exc
|
|
244
|
+
return AudioFile.upload(audio_bytes, output_file, catalog=audio._catalog)
|
datachain/lib/data_model.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import uuid
|
|
1
3
|
from collections.abc import Sequence
|
|
2
4
|
from datetime import datetime
|
|
3
5
|
from typing import ClassVar, Optional, Union, get_args, get_origin
|
|
@@ -80,7 +82,9 @@ def dict_to_data_model(
|
|
|
80
82
|
|
|
81
83
|
fields = {
|
|
82
84
|
name: (
|
|
83
|
-
anno
|
|
85
|
+
anno
|
|
86
|
+
if inspect.isclass(anno) and issubclass(anno, BaseModel)
|
|
87
|
+
else Optional[anno],
|
|
84
88
|
Field(
|
|
85
89
|
validation_alias=AliasChoices(name, original_names[idx] or name),
|
|
86
90
|
default=None,
|
|
@@ -101,6 +105,10 @@ def dict_to_data_model(
|
|
|
101
105
|
field_info[str(alias)] = (_name, field)
|
|
102
106
|
return field_info
|
|
103
107
|
|
|
108
|
+
# Generate random unique name if not provided
|
|
109
|
+
if not name:
|
|
110
|
+
name = f"DataModel_{uuid.uuid4().hex[:8]}"
|
|
111
|
+
|
|
104
112
|
return create_model(
|
|
105
113
|
name,
|
|
106
114
|
__base__=_DataModelStrict,
|
datachain/lib/dc/hf.py
CHANGED
|
@@ -25,19 +25,23 @@ def read_hf(
|
|
|
25
25
|
settings: Optional[dict] = None,
|
|
26
26
|
column: str = "",
|
|
27
27
|
model_name: str = "",
|
|
28
|
+
limit: int = 0,
|
|
28
29
|
**kwargs,
|
|
29
30
|
) -> "DataChain":
|
|
30
|
-
"""Generate chain from
|
|
31
|
+
"""Generate chain from Hugging Face Hub dataset.
|
|
31
32
|
|
|
32
33
|
Parameters:
|
|
33
34
|
dataset : Path or name of the dataset to read from Hugging Face Hub,
|
|
34
35
|
or an instance of `datasets.Dataset`-like object.
|
|
35
|
-
args : Additional positional arguments to pass to datasets.load_dataset
|
|
36
|
+
args : Additional positional arguments to pass to `datasets.load_dataset`.
|
|
36
37
|
session : Session to use for the chain.
|
|
37
38
|
settings : Settings to use for the chain.
|
|
38
39
|
column : Generated object column name.
|
|
39
40
|
model_name : Generated model name.
|
|
40
|
-
|
|
41
|
+
limit : Limit the number of items to read from the HF dataset.
|
|
42
|
+
Adds `take(limit)` to the `datasets.load_dataset`.
|
|
43
|
+
Defaults to 0 (no limit).
|
|
44
|
+
kwargs : Parameters to pass to `datasets.load_dataset`.
|
|
41
45
|
|
|
42
46
|
Example:
|
|
43
47
|
Load from Hugging Face Hub:
|
|
@@ -53,6 +57,18 @@ def read_hf(
|
|
|
53
57
|
import datachain as dc
|
|
54
58
|
chain = dc.read_hf(ds)
|
|
55
59
|
```
|
|
60
|
+
|
|
61
|
+
Streaming with limit, for large datasets:
|
|
62
|
+
```py
|
|
63
|
+
import datachain as dc
|
|
64
|
+
ds = dc.read_hf("beans", split="train", streaming=True, limit=10)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
or use HF split syntax (not supported if streaming is enabled):
|
|
68
|
+
```py
|
|
69
|
+
import datachain as dc
|
|
70
|
+
ds = dc.read_hf("beans", split="train[%10]")
|
|
71
|
+
```
|
|
56
72
|
"""
|
|
57
73
|
from datachain.lib.hf import HFGenerator, get_output_schema, stream_splits
|
|
58
74
|
|
|
@@ -72,4 +88,4 @@ def read_hf(
|
|
|
72
88
|
output = {column: model}
|
|
73
89
|
|
|
74
90
|
chain = read_values(split=list(ds_dict.keys()), session=session, settings=settings)
|
|
75
|
-
return chain.gen(HFGenerator(dataset, model, *args, **kwargs), output=output)
|
|
91
|
+
return chain.gen(HFGenerator(dataset, model, limit, *args, **kwargs), output=output)
|
datachain/lib/file.py
CHANGED
|
@@ -832,7 +832,10 @@ class VideoFile(File):
|
|
|
832
832
|
VideoFragment: A Model representing the video fragment.
|
|
833
833
|
"""
|
|
834
834
|
if start < 0 or end < 0 or start >= end:
|
|
835
|
-
raise ValueError(
|
|
835
|
+
raise ValueError(
|
|
836
|
+
f"Can't get video fragment for '{self.path}', "
|
|
837
|
+
f"invalid time range: ({start:.3f}, {end:.3f})"
|
|
838
|
+
)
|
|
836
839
|
|
|
837
840
|
return VideoFragment(video=self, start=start, end=end)
|
|
838
841
|
|
|
@@ -915,7 +918,10 @@ class AudioFile(File):
|
|
|
915
918
|
AudioFragment: A Model representing the audio fragment.
|
|
916
919
|
"""
|
|
917
920
|
if start < 0 or end < 0 or start >= end:
|
|
918
|
-
raise ValueError(
|
|
921
|
+
raise ValueError(
|
|
922
|
+
f"Can't get audio fragment for '{self.path}', "
|
|
923
|
+
f"invalid time range: ({start:.3f}, {end:.3f})"
|
|
924
|
+
)
|
|
919
925
|
|
|
920
926
|
return AudioFragment(audio=self, start=start, end=end)
|
|
921
927
|
|
|
@@ -958,6 +964,35 @@ class AudioFile(File):
|
|
|
958
964
|
yield self.get_fragment(start, min(start + duration, end))
|
|
959
965
|
start += duration
|
|
960
966
|
|
|
967
|
+
def save( # type: ignore[override]
|
|
968
|
+
self,
|
|
969
|
+
output: str,
|
|
970
|
+
format: Optional[str] = None,
|
|
971
|
+
start: float = 0,
|
|
972
|
+
end: Optional[float] = None,
|
|
973
|
+
client_config: Optional[dict] = None,
|
|
974
|
+
) -> "AudioFile":
|
|
975
|
+
"""Save audio file or extract fragment to specified format.
|
|
976
|
+
|
|
977
|
+
Args:
|
|
978
|
+
output: Output directory path
|
|
979
|
+
format: Output format ('wav', 'mp3', etc). Defaults to source format
|
|
980
|
+
start: Start time in seconds (>= 0). Defaults to 0
|
|
981
|
+
end: End time in seconds. If None, extracts to end of file
|
|
982
|
+
client_config: Optional client configuration
|
|
983
|
+
|
|
984
|
+
Returns:
|
|
985
|
+
AudioFile: New audio file with format conversion/extraction applied
|
|
986
|
+
|
|
987
|
+
Examples:
|
|
988
|
+
audio.save("/path", "mp3") # Entire file to MP3
|
|
989
|
+
audio.save("s3://bucket/path", "wav", start=2.5) # From 2.5s to end as WAV
|
|
990
|
+
audio.save("/path", "flac", start=1, end=3) # 1-3s fragment as FLAC
|
|
991
|
+
"""
|
|
992
|
+
from .audio import save_audio
|
|
993
|
+
|
|
994
|
+
return save_audio(self, output, format, start, end)
|
|
995
|
+
|
|
961
996
|
|
|
962
997
|
class AudioFragment(DataModel):
|
|
963
998
|
"""
|
|
@@ -985,10 +1020,10 @@ class AudioFragment(DataModel):
|
|
|
985
1020
|
tuple[ndarray, int]: A tuple containing the audio data as a NumPy array
|
|
986
1021
|
and the sample rate.
|
|
987
1022
|
"""
|
|
988
|
-
from .audio import
|
|
1023
|
+
from .audio import audio_to_np
|
|
989
1024
|
|
|
990
1025
|
duration = self.end - self.start
|
|
991
|
-
return
|
|
1026
|
+
return audio_to_np(self.audio, self.start, duration)
|
|
992
1027
|
|
|
993
1028
|
def read_bytes(self, format: str = "wav") -> bytes:
|
|
994
1029
|
"""
|
|
@@ -1001,10 +1036,10 @@ class AudioFragment(DataModel):
|
|
|
1001
1036
|
Returns:
|
|
1002
1037
|
bytes: The encoded audio fragment as bytes.
|
|
1003
1038
|
"""
|
|
1004
|
-
from .audio import
|
|
1039
|
+
from .audio import audio_to_bytes
|
|
1005
1040
|
|
|
1006
1041
|
duration = self.end - self.start
|
|
1007
|
-
return
|
|
1042
|
+
return audio_to_bytes(self.audio, format, self.start, duration)
|
|
1008
1043
|
|
|
1009
1044
|
def save(self, output: str, format: Optional[str] = None) -> "AudioFile":
|
|
1010
1045
|
"""
|
|
@@ -1022,9 +1057,9 @@ class AudioFragment(DataModel):
|
|
|
1022
1057
|
Returns:
|
|
1023
1058
|
AudioFile: A Model representing the saved audio file.
|
|
1024
1059
|
"""
|
|
1025
|
-
from .audio import
|
|
1060
|
+
from .audio import save_audio
|
|
1026
1061
|
|
|
1027
|
-
return
|
|
1062
|
+
return save_audio(self.audio, output, format, self.start, self.end)
|
|
1028
1063
|
|
|
1029
1064
|
|
|
1030
1065
|
class VideoFrame(DataModel):
|
datachain/lib/hf.py
CHANGED
|
@@ -69,21 +69,25 @@ class HFGenerator(Generator):
|
|
|
69
69
|
self,
|
|
70
70
|
ds: Union[str, HFDatasetType],
|
|
71
71
|
output_schema: type["BaseModel"],
|
|
72
|
+
limit: int = 0,
|
|
72
73
|
*args,
|
|
73
74
|
**kwargs,
|
|
74
75
|
):
|
|
75
76
|
"""
|
|
76
|
-
Generator for chain from
|
|
77
|
+
Generator for chain from Hugging Face datasets.
|
|
77
78
|
|
|
78
79
|
Parameters:
|
|
79
80
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
81
|
+
ds : Path or name of the dataset to read from Hugging Face Hub,
|
|
82
|
+
or an instance of `datasets.Dataset`-like object.
|
|
83
|
+
limit : Limit the number of items to read from the HF dataset.
|
|
84
|
+
Defaults to 0 (no limit).
|
|
85
|
+
output_schema : Pydantic model for validation.
|
|
83
86
|
"""
|
|
84
87
|
super().__init__()
|
|
85
88
|
self.ds = ds
|
|
86
89
|
self.output_schema = output_schema
|
|
90
|
+
self.limit = limit
|
|
87
91
|
self.args = args
|
|
88
92
|
self.kwargs = kwargs
|
|
89
93
|
|
|
@@ -93,6 +97,8 @@ class HFGenerator(Generator):
|
|
|
93
97
|
def process(self, split: str = ""):
|
|
94
98
|
desc = "Parsed Hugging Face dataset"
|
|
95
99
|
ds = self.ds_dict[split]
|
|
100
|
+
if self.limit > 0:
|
|
101
|
+
ds = ds.take(self.limit)
|
|
96
102
|
if split:
|
|
97
103
|
desc += f" split '{split}'"
|
|
98
104
|
model_fields = self.output_schema._model_fields_by_aliases() # type: ignore[attr-defined]
|
|
@@ -113,7 +119,6 @@ class HFGenerator(Generator):
|
|
|
113
119
|
|
|
114
120
|
def stream_splits(ds: Union[str, HFDatasetType], *args, **kwargs):
|
|
115
121
|
if isinstance(ds, str):
|
|
116
|
-
kwargs["streaming"] = True
|
|
117
122
|
ds = load_dataset(ds, *args, **kwargs)
|
|
118
123
|
if isinstance(ds, (DatasetDict, IterableDatasetDict)):
|
|
119
124
|
return ds
|
|
@@ -132,7 +137,12 @@ def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
|
|
|
132
137
|
sfeat = feat[sname]
|
|
133
138
|
norm_name, info = model_fields[sname]
|
|
134
139
|
sanno = info.annotation
|
|
135
|
-
|
|
140
|
+
if isinstance(val[sname], list):
|
|
141
|
+
sdict[norm_name] = [
|
|
142
|
+
convert_feature(v, sfeat, sanno) for v in val[sname]
|
|
143
|
+
]
|
|
144
|
+
else:
|
|
145
|
+
sdict[norm_name] = convert_feature(val[sname], sfeat, sanno)
|
|
136
146
|
return anno(**sdict)
|
|
137
147
|
if isinstance(feat, Image):
|
|
138
148
|
if isinstance(val, dict):
|
|
@@ -174,7 +184,7 @@ def _feature_to_chain_type(name: str, val: Any) -> DataType: # noqa: PLR0911
|
|
|
174
184
|
for sname, sval in val.items():
|
|
175
185
|
dtype = _feature_to_chain_type(sname, sval)
|
|
176
186
|
sequence_dict[sname] = dtype # type: ignore[valid-type]
|
|
177
|
-
return dict_to_data_model(name, sequence_dict) # type: ignore[arg-type]
|
|
187
|
+
return dict_to_data_model(f"HFDataModel_{name}", sequence_dict) # type: ignore[arg-type]
|
|
178
188
|
if isinstance(val, List):
|
|
179
189
|
return list[_feature_to_chain_type(name, val.feature)] # type: ignore[arg-type,misc,return-value]
|
|
180
190
|
if isinstance(val, Array2D):
|
datachain/lib/video.py
CHANGED
|
@@ -205,7 +205,10 @@ def save_video_fragment(
|
|
|
205
205
|
VideoFile: Video fragment model.
|
|
206
206
|
"""
|
|
207
207
|
if start < 0 or end < 0 or start >= end:
|
|
208
|
-
raise ValueError(
|
|
208
|
+
raise ValueError(
|
|
209
|
+
f"Can't save video fragment for '{video.path}', "
|
|
210
|
+
f"invalid time range: ({start:.3f}, {end:.3f})"
|
|
211
|
+
)
|
|
209
212
|
|
|
210
213
|
if format is None:
|
|
211
214
|
format = video.get_file_ext()
|
datachain/studio.py
CHANGED
|
@@ -8,6 +8,7 @@ import dateparser
|
|
|
8
8
|
import tabulate
|
|
9
9
|
|
|
10
10
|
from datachain.config import Config, ConfigLevel
|
|
11
|
+
from datachain.data_storage.job import JobStatus
|
|
11
12
|
from datachain.dataset import QUERY_DATASET_PREFIX, parse_dataset_name
|
|
12
13
|
from datachain.error import DataChainError
|
|
13
14
|
from datachain.remote.studio import StudioClient
|
|
@@ -20,6 +21,8 @@ POST_LOGIN_MESSAGE = (
|
|
|
20
21
|
"Once you've logged in, return here "
|
|
21
22
|
"and you'll be ready to start using DataChain with Studio."
|
|
22
23
|
)
|
|
24
|
+
RETRY_MAX_TIMES = 10
|
|
25
|
+
RETRY_SLEEP_SEC = 1
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
def process_jobs_args(args: "Namespace"):
|
|
@@ -46,6 +49,7 @@ def process_jobs_args(args: "Namespace"):
|
|
|
46
49
|
args.cluster,
|
|
47
50
|
args.start_time,
|
|
48
51
|
args.cron,
|
|
52
|
+
args.no_wait,
|
|
49
53
|
)
|
|
50
54
|
|
|
51
55
|
if args.cmd == "cancel":
|
|
@@ -270,41 +274,51 @@ def parse_start_time(start_time_str: Optional[str]) -> Optional[str]:
|
|
|
270
274
|
if not start_time_str:
|
|
271
275
|
return None
|
|
272
276
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
parsed_datetime = dateparser.parse(start_time_str)
|
|
276
|
-
|
|
277
|
-
if parsed_datetime is None:
|
|
278
|
-
raise DataChainError(
|
|
279
|
-
f"Could not parse datetime string: '{start_time_str}'. "
|
|
280
|
-
f"Supported formats include: '2024-01-15 14:30:00', 'tomorrow 3pm', "
|
|
281
|
-
f"'monday 9am', '2024-01-15T14:30:00Z', 'in 2 hours', etc."
|
|
282
|
-
)
|
|
277
|
+
# Parse the datetime string using dateparser
|
|
278
|
+
parsed_datetime = dateparser.parse(start_time_str)
|
|
283
279
|
|
|
284
|
-
|
|
285
|
-
return parsed_datetime.isoformat()
|
|
286
|
-
except Exception as e:
|
|
280
|
+
if parsed_datetime is None:
|
|
287
281
|
raise DataChainError(
|
|
288
|
-
f"
|
|
282
|
+
f"Could not parse datetime string: '{start_time_str}'. "
|
|
289
283
|
f"Supported formats include: '2024-01-15 14:30:00', 'tomorrow 3pm', "
|
|
290
|
-
f"'monday 9am', '2024-01-15T14:30:00Z', 'in 2 hours', etc.
|
|
291
|
-
)
|
|
284
|
+
f"'monday 9am', '2024-01-15T14:30:00Z', 'in 2 hours', etc."
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Convert to ISO format string
|
|
288
|
+
return parsed_datetime.isoformat()
|
|
292
289
|
|
|
293
290
|
|
|
294
291
|
def show_logs_from_client(client, job_id):
|
|
295
292
|
# Sync usage
|
|
296
293
|
async def _run():
|
|
294
|
+
retry_count = 0
|
|
297
295
|
latest_status = None
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
296
|
+
processed_statuses = set()
|
|
297
|
+
while True:
|
|
298
|
+
async for message in client.tail_job_logs(job_id):
|
|
299
|
+
if "logs" in message:
|
|
300
|
+
for log in message["logs"]:
|
|
301
|
+
print(log["message"], end="")
|
|
302
|
+
elif "job" in message:
|
|
303
|
+
latest_status = message["job"]["status"]
|
|
304
|
+
if latest_status in processed_statuses:
|
|
305
|
+
continue
|
|
306
|
+
processed_statuses.add(latest_status)
|
|
307
|
+
print(f"\n>>>> Job is now in {latest_status} status.")
|
|
308
|
+
|
|
309
|
+
try:
|
|
310
|
+
if retry_count > RETRY_MAX_TIMES or (
|
|
311
|
+
latest_status and JobStatus[latest_status].finished()
|
|
312
|
+
):
|
|
313
|
+
break
|
|
314
|
+
await asyncio.sleep(RETRY_SLEEP_SEC)
|
|
315
|
+
retry_count += 1
|
|
316
|
+
except KeyError:
|
|
317
|
+
pass
|
|
318
|
+
|
|
305
319
|
return latest_status
|
|
306
320
|
|
|
307
|
-
|
|
321
|
+
final_status = asyncio.run(_run())
|
|
308
322
|
|
|
309
323
|
response = client.dataset_job_versions(job_id)
|
|
310
324
|
if not response.ok:
|
|
@@ -321,9 +335,9 @@ def show_logs_from_client(client, job_id):
|
|
|
321
335
|
|
|
322
336
|
exit_code_by_status = {
|
|
323
337
|
"FAILED": 1,
|
|
324
|
-
"
|
|
338
|
+
"CANCELED": 2,
|
|
325
339
|
}
|
|
326
|
-
return exit_code_by_status.get(
|
|
340
|
+
return exit_code_by_status.get(final_status.upper(), 0) if final_status else 0
|
|
327
341
|
|
|
328
342
|
|
|
329
343
|
def create_job(
|
|
@@ -341,6 +355,7 @@ def create_job(
|
|
|
341
355
|
cluster: Optional[str] = None,
|
|
342
356
|
start_time: Optional[str] = None,
|
|
343
357
|
cron: Optional[str] = None,
|
|
358
|
+
no_wait: Optional[bool] = False,
|
|
344
359
|
):
|
|
345
360
|
query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
|
|
346
361
|
with open(query_file) as f:
|
|
@@ -395,7 +410,7 @@ def create_job(
|
|
|
395
410
|
print("Open the job in Studio at", response.data.get("job", {}).get("url"))
|
|
396
411
|
print("=" * 40)
|
|
397
412
|
|
|
398
|
-
return show_logs_from_client(client, job_id)
|
|
413
|
+
return 0 if no_wait else show_logs_from_client(client, job_id)
|
|
399
414
|
|
|
400
415
|
|
|
401
416
|
def upload_files(client: StudioClient, files: list[str]) -> list[str]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.28.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
|
|
|
45
45
|
Requires-Dist: Pillow<12,>=10.0.0
|
|
46
46
|
Requires-Dist: msgpack<2,>=1.0.4
|
|
47
47
|
Requires-Dist: psutil
|
|
48
|
-
Requires-Dist: huggingface_hub
|
|
48
|
+
Requires-Dist: huggingface_hub<0.34.0
|
|
49
49
|
Requires-Dist: iterative-telemetry>=0.0.10
|
|
50
50
|
Requires-Dist: platformdirs
|
|
51
51
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
@@ -120,7 +120,7 @@ Dynamic: license-file
|
|
|
120
120
|
|logo| DataChain
|
|
121
121
|
================
|
|
122
122
|
|
|
123
|
-
|PyPI| |Python Version| |Codecov| |Tests|
|
|
123
|
+
|PyPI| |Python Version| |Codecov| |Tests| |DeepWiki|
|
|
124
124
|
|
|
125
125
|
.. |logo| image:: docs/assets/datachain.svg
|
|
126
126
|
:height: 24
|
|
@@ -136,6 +136,9 @@ Dynamic: license-file
|
|
|
136
136
|
.. |Tests| image:: https://github.com/iterative/datachain/actions/workflows/tests.yml/badge.svg
|
|
137
137
|
:target: https://github.com/iterative/datachain/actions/workflows/tests.yml
|
|
138
138
|
:alt: Tests
|
|
139
|
+
.. |DeepWiki| image:: https://deepwiki.com/badge.svg
|
|
140
|
+
:target: https://deepwiki.com/iterative/datachain
|
|
141
|
+
:alt: DeepWiki
|
|
139
142
|
|
|
140
143
|
DataChain is a Python-based AI-data warehouse for transforming and analyzing unstructured
|
|
141
144
|
data like images, audio, videos, text and PDFs. It integrates with external storage
|
|
@@ -17,7 +17,7 @@ datachain/project.py,sha256=90D4GpJSA3t0fayYZbzrL3sk4U7EJhQo8psnWvdI7_o,2280
|
|
|
17
17
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
|
|
19
19
|
datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
|
|
20
|
-
datachain/studio.py,sha256
|
|
20
|
+
datachain/studio.py,sha256=-BmKLVNBLPFveUgVVE2So3aaiGndO2jK2qbHZ0zBDd8,15239
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
22
|
datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
|
|
23
23
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
@@ -35,7 +35,7 @@ datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibV
|
|
|
35
35
|
datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
|
|
36
36
|
datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
|
|
37
37
|
datachain/cli/parser/__init__.py,sha256=NPB6ssP4CCt7G1SWZ_8oNQEH2C1lktWgkyHYXDQJZNc,15073
|
|
38
|
-
datachain/cli/parser/job.py,sha256=
|
|
38
|
+
datachain/cli/parser/job.py,sha256=g6ozI3pnV0ly79L7M9mikCeYTPgKlG5gR0D144R82tk,5928
|
|
39
39
|
datachain/cli/parser/studio.py,sha256=Bo__LKM7qhJGgkyX8M_bCvgZ2Gvqq6r_X4t1NdtaBIY,3881
|
|
40
40
|
datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
|
|
41
41
|
datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
|
|
@@ -48,7 +48,7 @@ datachain/client/local.py,sha256=0J52Wzvw25hSucVlzBvLuMRAZwrAHZAYDvD1mNBqf4c,460
|
|
|
48
48
|
datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
|
|
49
49
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
50
50
|
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
51
|
-
datachain/data_storage/job.py,sha256=
|
|
51
|
+
datachain/data_storage/job.py,sha256=ZkeXCNUj_VCkoKYx29hqB4AcfVUielnRjY-GYUcUxt4,426
|
|
52
52
|
datachain/data_storage/metastore.py,sha256=Qw332arvhgXB4UY0yX-Hu8Vgl3smU12l6bvxrL9Q-vo,53810
|
|
53
53
|
datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
|
|
54
54
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
@@ -70,13 +70,13 @@ datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
|
|
|
70
70
|
datachain/func/string.py,sha256=X9u4ip97U63RCaKRhMddoze7HgPiY3LbPRn9G06UWWo,7311
|
|
71
71
|
datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
|
|
72
72
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
|
-
datachain/lib/arrow.py,sha256=
|
|
74
|
-
datachain/lib/audio.py,sha256=
|
|
73
|
+
datachain/lib/arrow.py,sha256=geoLvyDd5uMqS3D9Ec1ODlShCUAdtwHUwl8FqbUX_hg,10776
|
|
74
|
+
datachain/lib/audio.py,sha256=fQmIBq-9hrUZtkgeJdPHYA_D8Wfe9D4cQZk4_ijxpNc,7580
|
|
75
75
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
76
|
-
datachain/lib/data_model.py,sha256=
|
|
76
|
+
datachain/lib/data_model.py,sha256=Rjah76GHwIV6AZQk4rsdg6JLre5D8Kb9T4PS5SXzsPA,3740
|
|
77
77
|
datachain/lib/dataset_info.py,sha256=7w-DoKOyIVoOtWGCgciMLcP5CiAWJB3rVI-vUDF80k0,3311
|
|
78
|
-
datachain/lib/file.py,sha256=
|
|
79
|
-
datachain/lib/hf.py,sha256=
|
|
78
|
+
datachain/lib/file.py,sha256=_ch7xYcpl0kzImgEwccbQ-a5qb9rbEvx1vcuWerOn9k,42608
|
|
79
|
+
datachain/lib/hf.py,sha256=3xdvPQPilnJiGv3H4S4bTGqvrGGlZgZmqjE1n_SMJZg,7293
|
|
80
80
|
datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
|
|
81
81
|
datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
|
|
82
82
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
@@ -92,7 +92,7 @@ datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
|
92
92
|
datachain/lib/udf.py,sha256=SUnJWRDC3TlLhvpi8iqqJbeZGn5DChot7DyH-0Q-z20,17305
|
|
93
93
|
datachain/lib/udf_signature.py,sha256=Yz20iJ-WF1pijT3hvcDIKFzgWV9gFxZM73KZRx3NbPk,7560
|
|
94
94
|
datachain/lib/utils.py,sha256=rG2y7NwTqZOuomZZRmrA-Q-ANM_j1cToQYqDJoOeGyU,1480
|
|
95
|
-
datachain/lib/video.py,sha256=
|
|
95
|
+
datachain/lib/video.py,sha256=ddVstiMkfxyBPDsnjCKY0d_93bw-DcMqGqN60yzsZoo,6851
|
|
96
96
|
datachain/lib/webdataset.py,sha256=CkW8FfGigNx6wo2EEK4KMjhEE8FamRHWGs2HZuH7jDY,7214
|
|
97
97
|
datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
|
|
98
98
|
datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -106,7 +106,7 @@ datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
|
|
|
106
106
|
datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
|
|
107
107
|
datachain/lib/dc/datachain.py,sha256=mLE5v4KhzEQm7HVWBTxY6EwJ2J-YeFVcLUY4I21216c,93212
|
|
108
108
|
datachain/lib/dc/datasets.py,sha256=P6CIJizD2IYFwOQG5D3VbQRjDmUiRH0ysdtb551Xdm8,15098
|
|
109
|
-
datachain/lib/dc/hf.py,sha256=
|
|
109
|
+
datachain/lib/dc/hf.py,sha256=AP_MUHg6HJWae10PN9hD_beQVjrl0cleZ6Cvhtl1yoI,2901
|
|
110
110
|
datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
|
|
111
111
|
datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
|
|
112
112
|
datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
|
|
@@ -158,9 +158,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
158
158
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
159
159
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
160
160
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
161
|
-
datachain-0.
|
|
162
|
-
datachain-0.
|
|
163
|
-
datachain-0.
|
|
164
|
-
datachain-0.
|
|
165
|
-
datachain-0.
|
|
166
|
-
datachain-0.
|
|
161
|
+
datachain-0.28.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
162
|
+
datachain-0.28.0.dist-info/METADATA,sha256=lA3lv9RX2NeQPobrEjoEbAwg5K3zmnAnbDJ_hjR8KLw,13766
|
|
163
|
+
datachain-0.28.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
164
|
+
datachain-0.28.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
165
|
+
datachain-0.28.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
166
|
+
datachain-0.28.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|