datachain 0.36.6__py3-none-any.whl → 0.37.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +2 -0
- datachain/data_storage/metastore.py +16 -0
- datachain/delta.py +3 -1
- datachain/diff/__init__.py +3 -1
- datachain/job.py +1 -1
- datachain/lib/dc/datachain.py +10 -17
- datachain/lib/dc/records.py +0 -2
- datachain/query/dataset.py +0 -4
- datachain/query/session.py +163 -21
- {datachain-0.36.6.dist-info → datachain-0.37.1.dist-info}/METADATA +2 -2
- {datachain-0.36.6.dist-info → datachain-0.37.1.dist-info}/RECORD +15 -15
- {datachain-0.36.6.dist-info → datachain-0.37.1.dist-info}/WHEEL +0 -0
- {datachain-0.36.6.dist-info → datachain-0.37.1.dist-info}/entry_points.txt +0 -0
- {datachain-0.36.6.dist-info → datachain-0.37.1.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.36.6.dist-info → datachain-0.37.1.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -793,6 +793,7 @@ class Catalog:
|
|
|
793
793
|
description: str | None = None,
|
|
794
794
|
attrs: list[str] | None = None,
|
|
795
795
|
update_version: str | None = "patch",
|
|
796
|
+
job_id: str | None = None,
|
|
796
797
|
) -> "DatasetRecord":
|
|
797
798
|
"""
|
|
798
799
|
Creates new dataset of a specific version.
|
|
@@ -866,6 +867,7 @@ class Catalog:
|
|
|
866
867
|
create_rows_table=create_rows,
|
|
867
868
|
columns=columns,
|
|
868
869
|
uuid=uuid,
|
|
870
|
+
job_id=job_id,
|
|
869
871
|
)
|
|
870
872
|
|
|
871
873
|
def create_new_dataset_version(
|
|
@@ -448,6 +448,10 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
448
448
|
def get_job_status(self, job_id: str) -> JobStatus | None:
|
|
449
449
|
"""Returns the status of the given job."""
|
|
450
450
|
|
|
451
|
+
@abstractmethod
|
|
452
|
+
def get_last_job_by_name(self, name: str, conn=None) -> "Job | None":
|
|
453
|
+
"""Returns the last job with the given name, ordered by created_at."""
|
|
454
|
+
|
|
451
455
|
#
|
|
452
456
|
# Checkpoints
|
|
453
457
|
#
|
|
@@ -1685,6 +1689,18 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
1685
1689
|
query = self._jobs_query().where(self._jobs.c.id.in_(ids))
|
|
1686
1690
|
yield from self._parse_jobs(self.db.execute(query, conn=conn))
|
|
1687
1691
|
|
|
1692
|
+
def get_last_job_by_name(self, name: str, conn=None) -> "Job | None":
|
|
1693
|
+
query = (
|
|
1694
|
+
self._jobs_query()
|
|
1695
|
+
.where(self._jobs.c.name == name)
|
|
1696
|
+
.order_by(self._jobs.c.created_at.desc())
|
|
1697
|
+
.limit(1)
|
|
1698
|
+
)
|
|
1699
|
+
results = list(self.db.execute(query, conn=conn))
|
|
1700
|
+
if not results:
|
|
1701
|
+
return None
|
|
1702
|
+
return self._parse_job(results[0])
|
|
1703
|
+
|
|
1688
1704
|
def create_job(
|
|
1689
1705
|
self,
|
|
1690
1706
|
name: str,
|
datachain/delta.py
CHANGED
|
@@ -200,7 +200,9 @@ def _get_source_info(
|
|
|
200
200
|
indirect=False,
|
|
201
201
|
)
|
|
202
202
|
|
|
203
|
-
source_ds_dep = next(
|
|
203
|
+
source_ds_dep = next(
|
|
204
|
+
(d for d in dependencies if d and d.name == source_ds.name), None
|
|
205
|
+
)
|
|
204
206
|
if not source_ds_dep:
|
|
205
207
|
# Starting dataset was removed, back off to normal dataset creation
|
|
206
208
|
return None, None, None, None, None
|
datachain/diff/__init__.py
CHANGED
|
@@ -103,8 +103,10 @@ def _compare( # noqa: C901
|
|
|
103
103
|
left = left.mutate(**{ldiff_col: 1})
|
|
104
104
|
right = right.mutate(**{rdiff_col: 1})
|
|
105
105
|
|
|
106
|
-
if
|
|
106
|
+
if compare is None:
|
|
107
107
|
modified_cond = True
|
|
108
|
+
elif len(compare) == 0:
|
|
109
|
+
modified_cond = False
|
|
108
110
|
else:
|
|
109
111
|
modified_cond = or_( # type: ignore[assignment]
|
|
110
112
|
*[
|
datachain/job.py
CHANGED
datachain/lib/dc/datachain.py
CHANGED
|
@@ -27,7 +27,6 @@ from datachain import semver
|
|
|
27
27
|
from datachain.dataset import DatasetRecord
|
|
28
28
|
from datachain.delta import delta_disabled
|
|
29
29
|
from datachain.error import (
|
|
30
|
-
JobNotFoundError,
|
|
31
30
|
ProjectCreateNotAllowedError,
|
|
32
31
|
ProjectNotFoundError,
|
|
33
32
|
)
|
|
@@ -627,6 +626,9 @@ class DataChain:
|
|
|
627
626
|
self._validate_version(version)
|
|
628
627
|
self._validate_update_version(update_version)
|
|
629
628
|
|
|
629
|
+
# get existing job if running in SaaS, or creating new one if running locally
|
|
630
|
+
job = self.session.get_or_create_job()
|
|
631
|
+
|
|
630
632
|
namespace_name, project_name, name = catalog.get_full_dataset_name(
|
|
631
633
|
name,
|
|
632
634
|
namespace_name=self._settings.namespace,
|
|
@@ -635,7 +637,7 @@ class DataChain:
|
|
|
635
637
|
project = self._get_or_create_project(namespace_name, project_name)
|
|
636
638
|
|
|
637
639
|
# Checkpoint handling
|
|
638
|
-
|
|
640
|
+
_hash, result = self._resolve_checkpoint(name, project, job, kwargs)
|
|
639
641
|
|
|
640
642
|
# Schema preparation
|
|
641
643
|
schema = self.signals_schema.clone_without_sys_signals().serialize()
|
|
@@ -655,13 +657,12 @@ class DataChain:
|
|
|
655
657
|
attrs=attrs,
|
|
656
658
|
feature_schema=schema,
|
|
657
659
|
update_version=update_version,
|
|
660
|
+
job_id=job.id,
|
|
658
661
|
**kwargs,
|
|
659
662
|
)
|
|
660
663
|
)
|
|
661
664
|
|
|
662
|
-
|
|
663
|
-
catalog.metastore.create_checkpoint(job.id, _hash) # type: ignore[arg-type]
|
|
664
|
-
|
|
665
|
+
catalog.metastore.create_checkpoint(job.id, _hash) # type: ignore[arg-type]
|
|
665
666
|
return result
|
|
666
667
|
|
|
667
668
|
def _validate_version(self, version: str | None) -> None:
|
|
@@ -690,23 +691,15 @@ class DataChain:
|
|
|
690
691
|
self,
|
|
691
692
|
name: str,
|
|
692
693
|
project: Project,
|
|
694
|
+
job: Job,
|
|
693
695
|
kwargs: dict,
|
|
694
|
-
) -> tuple[
|
|
696
|
+
) -> tuple[str, "DataChain | None"]:
|
|
695
697
|
"""Check if checkpoint exists and return cached dataset if possible."""
|
|
696
698
|
from .datasets import read_dataset
|
|
697
699
|
|
|
698
700
|
metastore = self.session.catalog.metastore
|
|
699
|
-
|
|
700
|
-
job_id = os.getenv("DATACHAIN_JOB_ID")
|
|
701
701
|
checkpoints_reset = env2bool("DATACHAIN_CHECKPOINTS_RESET", undefined=True)
|
|
702
702
|
|
|
703
|
-
if not job_id:
|
|
704
|
-
return None, None, None
|
|
705
|
-
|
|
706
|
-
job = metastore.get_job(job_id)
|
|
707
|
-
if not job:
|
|
708
|
-
raise JobNotFoundError(f"Job with id {job_id} not found")
|
|
709
|
-
|
|
710
703
|
_hash = self._calculate_job_hash(job.id)
|
|
711
704
|
|
|
712
705
|
if (
|
|
@@ -718,9 +711,9 @@ class DataChain:
|
|
|
718
711
|
chain = read_dataset(
|
|
719
712
|
name, namespace=project.namespace.name, project=project.name, **kwargs
|
|
720
713
|
)
|
|
721
|
-
return
|
|
714
|
+
return _hash, chain
|
|
722
715
|
|
|
723
|
-
return
|
|
716
|
+
return _hash, None
|
|
724
717
|
|
|
725
718
|
def _handle_delta(
|
|
726
719
|
self,
|
datachain/lib/dc/records.py
CHANGED
datachain/query/dataset.py
CHANGED
|
@@ -1927,10 +1927,6 @@ class DatasetQuery:
|
|
|
1927
1927
|
)
|
|
1928
1928
|
version = version or dataset.latest_version
|
|
1929
1929
|
|
|
1930
|
-
self.session.add_dataset_version(
|
|
1931
|
-
dataset=dataset, version=version, listing=kwargs.get("listing", False)
|
|
1932
|
-
)
|
|
1933
|
-
|
|
1934
1930
|
dr = self.catalog.warehouse.dataset_rows(dataset)
|
|
1935
1931
|
|
|
1936
1932
|
self.catalog.warehouse.copy_table(dr.get_table(), query.select())
|
datachain/query/session.py
CHANGED
|
@@ -1,21 +1,37 @@
|
|
|
1
1
|
import atexit
|
|
2
2
|
import gc
|
|
3
3
|
import logging
|
|
4
|
+
import os
|
|
4
5
|
import re
|
|
5
6
|
import sys
|
|
7
|
+
import traceback
|
|
8
|
+
from collections.abc import Callable
|
|
6
9
|
from typing import TYPE_CHECKING, ClassVar
|
|
7
10
|
from uuid import uuid4
|
|
8
11
|
|
|
9
12
|
from datachain.catalog import get_catalog
|
|
10
|
-
from datachain.
|
|
13
|
+
from datachain.data_storage import JobQueryType, JobStatus
|
|
14
|
+
from datachain.error import JobNotFoundError, TableMissingError
|
|
11
15
|
|
|
12
16
|
if TYPE_CHECKING:
|
|
13
17
|
from datachain.catalog import Catalog
|
|
14
|
-
from datachain.
|
|
18
|
+
from datachain.job import Job
|
|
15
19
|
|
|
16
20
|
logger = logging.getLogger("datachain")
|
|
17
21
|
|
|
18
22
|
|
|
23
|
+
def is_script_run() -> bool:
|
|
24
|
+
"""
|
|
25
|
+
Returns True if this was ran as python script, e.g python my_script.py.
|
|
26
|
+
Otherwise (if interactive or module run) returns False.
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
argv0 = sys.argv[0]
|
|
30
|
+
except (IndexError, AttributeError):
|
|
31
|
+
return False
|
|
32
|
+
return bool(argv0) and argv0 not in ("-c", "-m", "ipython")
|
|
33
|
+
|
|
34
|
+
|
|
19
35
|
class Session:
|
|
20
36
|
"""
|
|
21
37
|
Session is a context that keeps track of temporary DataChain datasets for a proper
|
|
@@ -43,6 +59,13 @@ class Session:
|
|
|
43
59
|
SESSION_CONTEXTS: ClassVar[list["Session"]] = []
|
|
44
60
|
ORIGINAL_EXCEPT_HOOK = None
|
|
45
61
|
|
|
62
|
+
# Job management - class-level to ensure one job per process
|
|
63
|
+
_CURRENT_JOB: ClassVar["Job | None"] = None
|
|
64
|
+
_JOB_STATUS: ClassVar[JobStatus | None] = None
|
|
65
|
+
_OWNS_JOB: ClassVar[bool | None] = None
|
|
66
|
+
_JOB_HOOKS_REGISTERED: ClassVar[bool] = False
|
|
67
|
+
_JOB_FINALIZE_HOOK: ClassVar[Callable[[], None] | None] = None
|
|
68
|
+
|
|
46
69
|
DATASET_PREFIX = "session_"
|
|
47
70
|
GLOBAL_SESSION_NAME = "global"
|
|
48
71
|
SESSION_UUID_LEN = 6
|
|
@@ -69,7 +92,6 @@ class Session:
|
|
|
69
92
|
self.catalog = catalog or get_catalog(
|
|
70
93
|
client_config=client_config, in_memory=in_memory
|
|
71
94
|
)
|
|
72
|
-
self.dataset_versions: list[tuple[DatasetRecord, str, bool]] = []
|
|
73
95
|
|
|
74
96
|
def __enter__(self):
|
|
75
97
|
# Push the current context onto the stack
|
|
@@ -78,9 +100,8 @@ class Session:
|
|
|
78
100
|
return self
|
|
79
101
|
|
|
80
102
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
103
|
+
# Don't cleanup created versions on exception
|
|
104
|
+
# Datasets should persist even if the session fails
|
|
84
105
|
self._cleanup_temp_datasets()
|
|
85
106
|
if self.is_new_catalog:
|
|
86
107
|
self.catalog.metastore.close_on_exit()
|
|
@@ -89,10 +110,114 @@ class Session:
|
|
|
89
110
|
if Session.SESSION_CONTEXTS:
|
|
90
111
|
Session.SESSION_CONTEXTS.pop()
|
|
91
112
|
|
|
92
|
-
def
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
113
|
+
def get_or_create_job(self) -> "Job":
|
|
114
|
+
"""
|
|
115
|
+
Get or create a Job for this process.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
Job: The active Job instance.
|
|
119
|
+
|
|
120
|
+
Behavior:
|
|
121
|
+
- If a job already exists, it is returned.
|
|
122
|
+
- If ``DATACHAIN_JOB_ID`` is set, the corresponding job is fetched.
|
|
123
|
+
- Otherwise, a new job is created:
|
|
124
|
+
* Name = absolute path to the Python script.
|
|
125
|
+
* Query = empty string.
|
|
126
|
+
* Parent = last job with the same name, if available.
|
|
127
|
+
* Status = "running".
|
|
128
|
+
Exit hooks are registered to finalize the job.
|
|
129
|
+
|
|
130
|
+
Note:
|
|
131
|
+
Job is shared across all Session instances to ensure one job per process.
|
|
132
|
+
"""
|
|
133
|
+
if Session._CURRENT_JOB:
|
|
134
|
+
return Session._CURRENT_JOB
|
|
135
|
+
|
|
136
|
+
if env_job_id := os.getenv("DATACHAIN_JOB_ID"):
|
|
137
|
+
# SaaS run: just fetch existing job
|
|
138
|
+
Session._CURRENT_JOB = self.catalog.metastore.get_job(env_job_id)
|
|
139
|
+
if not Session._CURRENT_JOB:
|
|
140
|
+
raise JobNotFoundError(
|
|
141
|
+
f"Job {env_job_id} from DATACHAIN_JOB_ID env not found"
|
|
142
|
+
)
|
|
143
|
+
Session._OWNS_JOB = False
|
|
144
|
+
else:
|
|
145
|
+
# Local run: create new job
|
|
146
|
+
if is_script_run():
|
|
147
|
+
script = os.path.abspath(sys.argv[0])
|
|
148
|
+
else:
|
|
149
|
+
# Interactive session or module run - use unique name to avoid
|
|
150
|
+
# linking unrelated sessions
|
|
151
|
+
script = str(uuid4())
|
|
152
|
+
python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
|
|
153
|
+
|
|
154
|
+
# try to find the parent job
|
|
155
|
+
parent = self.catalog.metastore.get_last_job_by_name(script)
|
|
156
|
+
|
|
157
|
+
job_id = self.catalog.metastore.create_job(
|
|
158
|
+
name=script,
|
|
159
|
+
query="",
|
|
160
|
+
query_type=JobQueryType.PYTHON,
|
|
161
|
+
status=JobStatus.RUNNING,
|
|
162
|
+
python_version=python_version,
|
|
163
|
+
parent_job_id=parent.id if parent else None,
|
|
164
|
+
)
|
|
165
|
+
Session._CURRENT_JOB = self.catalog.metastore.get_job(job_id)
|
|
166
|
+
Session._OWNS_JOB = True
|
|
167
|
+
Session._JOB_STATUS = JobStatus.RUNNING
|
|
168
|
+
|
|
169
|
+
# register cleanup hooks only once
|
|
170
|
+
if not Session._JOB_HOOKS_REGISTERED:
|
|
171
|
+
|
|
172
|
+
def _finalize_success_hook() -> None:
|
|
173
|
+
self._finalize_job_success()
|
|
174
|
+
|
|
175
|
+
Session._JOB_FINALIZE_HOOK = _finalize_success_hook
|
|
176
|
+
atexit.register(Session._JOB_FINALIZE_HOOK)
|
|
177
|
+
Session._JOB_HOOKS_REGISTERED = True
|
|
178
|
+
|
|
179
|
+
assert Session._CURRENT_JOB is not None
|
|
180
|
+
return Session._CURRENT_JOB
|
|
181
|
+
|
|
182
|
+
def _finalize_job_success(self):
|
|
183
|
+
"""Mark the current job as completed."""
|
|
184
|
+
if (
|
|
185
|
+
Session._CURRENT_JOB
|
|
186
|
+
and Session._OWNS_JOB
|
|
187
|
+
and Session._JOB_STATUS == JobStatus.RUNNING
|
|
188
|
+
):
|
|
189
|
+
self.catalog.metastore.set_job_status(
|
|
190
|
+
Session._CURRENT_JOB.id, JobStatus.COMPLETE
|
|
191
|
+
)
|
|
192
|
+
Session._JOB_STATUS = JobStatus.COMPLETE
|
|
193
|
+
|
|
194
|
+
def _finalize_job_as_canceled(self):
|
|
195
|
+
"""Mark the current job as canceled."""
|
|
196
|
+
if (
|
|
197
|
+
Session._CURRENT_JOB
|
|
198
|
+
and Session._OWNS_JOB
|
|
199
|
+
and Session._JOB_STATUS == JobStatus.RUNNING
|
|
200
|
+
):
|
|
201
|
+
self.catalog.metastore.set_job_status(
|
|
202
|
+
Session._CURRENT_JOB.id, JobStatus.CANCELED
|
|
203
|
+
)
|
|
204
|
+
Session._JOB_STATUS = JobStatus.CANCELED
|
|
205
|
+
|
|
206
|
+
def _finalize_job_as_failed(self, exc_type, exc_value, tb):
|
|
207
|
+
"""Mark the current job as failed with error details."""
|
|
208
|
+
if (
|
|
209
|
+
Session._CURRENT_JOB
|
|
210
|
+
and Session._OWNS_JOB
|
|
211
|
+
and Session._JOB_STATUS == JobStatus.RUNNING
|
|
212
|
+
):
|
|
213
|
+
error_stack = "".join(traceback.format_exception(exc_type, exc_value, tb))
|
|
214
|
+
self.catalog.metastore.set_job_status(
|
|
215
|
+
Session._CURRENT_JOB.id,
|
|
216
|
+
JobStatus.FAILED,
|
|
217
|
+
error_message=str(exc_value),
|
|
218
|
+
error_stack=error_stack,
|
|
219
|
+
)
|
|
220
|
+
Session._JOB_STATUS = JobStatus.FAILED
|
|
96
221
|
|
|
97
222
|
def generate_temp_dataset_name(self) -> str:
|
|
98
223
|
return self.get_temp_prefix() + uuid4().hex[: self.TEMP_TABLE_UUID_LEN]
|
|
@@ -113,16 +238,6 @@ class Session:
|
|
|
113
238
|
except TableMissingError:
|
|
114
239
|
pass
|
|
115
240
|
|
|
116
|
-
def _cleanup_created_versions(self) -> None:
|
|
117
|
-
if not self.dataset_versions:
|
|
118
|
-
return
|
|
119
|
-
|
|
120
|
-
for dataset, version, listing in self.dataset_versions:
|
|
121
|
-
if not listing:
|
|
122
|
-
self.catalog.remove_dataset_version(dataset, version)
|
|
123
|
-
|
|
124
|
-
self.dataset_versions.clear()
|
|
125
|
-
|
|
126
241
|
@classmethod
|
|
127
242
|
def get(
|
|
128
243
|
cls,
|
|
@@ -173,12 +288,27 @@ class Session:
|
|
|
173
288
|
|
|
174
289
|
@staticmethod
|
|
175
290
|
def except_hook(exc_type, exc_value, exc_traceback):
|
|
176
|
-
Session.GLOBAL_SESSION_CTX
|
|
291
|
+
if Session.GLOBAL_SESSION_CTX:
|
|
292
|
+
# Handle KeyboardInterrupt specially - mark as canceled and exit with
|
|
293
|
+
# signal code
|
|
294
|
+
if exc_type is KeyboardInterrupt:
|
|
295
|
+
Session.GLOBAL_SESSION_CTX._finalize_job_as_canceled()
|
|
296
|
+
else:
|
|
297
|
+
Session.GLOBAL_SESSION_CTX._finalize_job_as_failed(
|
|
298
|
+
exc_type, exc_value, exc_traceback
|
|
299
|
+
)
|
|
300
|
+
Session.GLOBAL_SESSION_CTX.__exit__(exc_type, exc_value, exc_traceback)
|
|
301
|
+
|
|
177
302
|
Session._global_cleanup()
|
|
178
303
|
|
|
304
|
+
# Always delegate to original hook if it exists
|
|
179
305
|
if Session.ORIGINAL_EXCEPT_HOOK:
|
|
180
306
|
Session.ORIGINAL_EXCEPT_HOOK(exc_type, exc_value, exc_traceback)
|
|
181
307
|
|
|
308
|
+
if exc_type is KeyboardInterrupt:
|
|
309
|
+
# Exit with SIGINT signal code (128 + 2 = 130, or -2 in subprocess terms)
|
|
310
|
+
sys.exit(130)
|
|
311
|
+
|
|
182
312
|
@classmethod
|
|
183
313
|
def cleanup_for_tests(cls):
|
|
184
314
|
if cls.GLOBAL_SESSION_CTX is not None:
|
|
@@ -186,6 +316,18 @@ class Session:
|
|
|
186
316
|
cls.GLOBAL_SESSION_CTX = None
|
|
187
317
|
atexit.unregister(cls._global_cleanup)
|
|
188
318
|
|
|
319
|
+
# Reset job-related class variables
|
|
320
|
+
if cls._JOB_FINALIZE_HOOK:
|
|
321
|
+
try:
|
|
322
|
+
atexit.unregister(cls._JOB_FINALIZE_HOOK)
|
|
323
|
+
except ValueError:
|
|
324
|
+
pass # Hook was not registered
|
|
325
|
+
cls._CURRENT_JOB = None
|
|
326
|
+
cls._JOB_STATUS = None
|
|
327
|
+
cls._OWNS_JOB = None
|
|
328
|
+
cls._JOB_HOOKS_REGISTERED = False
|
|
329
|
+
cls._JOB_FINALIZE_HOOK = None
|
|
330
|
+
|
|
189
331
|
if cls.ORIGINAL_EXCEPT_HOOK:
|
|
190
332
|
sys.excepthook = cls.ORIGINAL_EXCEPT_HOOK
|
|
191
333
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.37.1
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -41,7 +41,7 @@ Requires-Dist: cloudpickle
|
|
|
41
41
|
Requires-Dist: pydantic
|
|
42
42
|
Requires-Dist: jmespath>=1.0
|
|
43
43
|
Requires-Dist: datamodel-code-generator>=0.25
|
|
44
|
-
Requires-Dist: Pillow<
|
|
44
|
+
Requires-Dist: Pillow<13,>=10.0.0
|
|
45
45
|
Requires-Dist: msgpack<2,>=1.0.4
|
|
46
46
|
Requires-Dist: psutil
|
|
47
47
|
Requires-Dist: huggingface_hub
|
|
@@ -5,10 +5,10 @@ datachain/cache.py,sha256=Klkc7iL_KvryeZk-UNjtByTFk7URbpb60XblalqHoYI,3604
|
|
|
5
5
|
datachain/checkpoint.py,sha256=AOMqN_2fNuEBJDAsmc-P4L7FU444eQxTU4MCgr-XEH8,1121
|
|
6
6
|
datachain/config.py,sha256=KPXef6P4NAZiEbSDMUcFwuNVTul2fZBs5xrCbyRl6Tg,4193
|
|
7
7
|
datachain/dataset.py,sha256=PQwgeFPmEyN8xucaU41q371VJ1EAFXdMVbeQOVeCPFQ,24995
|
|
8
|
-
datachain/delta.py,sha256=
|
|
8
|
+
datachain/delta.py,sha256=eUQK_zUH5xMwe0dNUaDsaovsAl97ULGtlG90uVuPaLY,11605
|
|
9
9
|
datachain/error.py,sha256=P_5KXlfVIsW4E42JJCoFhGsgvY8la-6jXBEWbHbgqKo,1846
|
|
10
10
|
datachain/hash_utils.py,sha256=FHzZS8WC4Qr_e-kZeQlfl-ilZ78IXWxj-xMZOqm8Ies,4455
|
|
11
|
-
datachain/job.py,sha256=
|
|
11
|
+
datachain/job.py,sha256=vCcHJHKRo5uZTpmUYt_1oVkeawFF0x8jbnm-XZYaKfI,1358
|
|
12
12
|
datachain/listing.py,sha256=cFOjzX9kdo0kpEDo5JfcUeVTkiMXSBNqcJJu0BVfPUI,7220
|
|
13
13
|
datachain/namespace.py,sha256=YhxHdmCekWH4l-ZayNHGiPy5KAz_5LGqviivFYu0u9U,2337
|
|
14
14
|
datachain/node.py,sha256=gBLCoh-3nyaCDnMPt3gLS_t3m7qL0_JDiN02a9DH_kY,5552
|
|
@@ -24,7 +24,7 @@ datachain/studio.py,sha256=OHVAY8IcktgEHNSgYaJuBfAIln_nKBrF2j7BOM2Fxd0,15177
|
|
|
24
24
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
25
25
|
datachain/utils.py,sha256=9KXA-fRH8lhK4E2JmdNOOH-74aUe-Sjb8wLiTiqXOh8,15710
|
|
26
26
|
datachain/catalog/__init__.py,sha256=9NBaywvAOaXdkyqiHjbBEiXs7JImR1OJsY9r8D5Q16g,403
|
|
27
|
-
datachain/catalog/catalog.py,sha256=
|
|
27
|
+
datachain/catalog/catalog.py,sha256=yYwxOM7bdHM3KYoqcH6eoMp5X12bnJARZOucoe2wQcU,69924
|
|
28
28
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
29
29
|
datachain/catalog/dependency.py,sha256=EHuu_Ox76sEhy71NXjFJiHxQVTz19KecqBcrjwFCa7M,5280
|
|
30
30
|
datachain/catalog/loader.py,sha256=VTaGPc4ASNdUdr7Elobp8qcXUOHwd0oqQcnk3LUwtF0,6244
|
|
@@ -54,12 +54,12 @@ datachain/client/s3.py,sha256=KS9o0jxXJRFp7Isdibz366VaWrULmpegzfYdurJpAl0,7499
|
|
|
54
54
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
55
55
|
datachain/data_storage/db_engine.py,sha256=MGbrckXk5kHOfpjnhHhGpyJpAsgaBCxMmfd33hB2SWI,3756
|
|
56
56
|
datachain/data_storage/job.py,sha256=NGFhXg0C0zRFTaF6ccjXZJT4xI4_gUr1WcxTLK6WYDE,448
|
|
57
|
-
datachain/data_storage/metastore.py,sha256=
|
|
57
|
+
datachain/data_storage/metastore.py,sha256=DFyTkKLJN5-nFXXc7ln_rGj-FLctj0nrhXJxuyprZSk,64661
|
|
58
58
|
datachain/data_storage/schema.py,sha256=3fAgiE11TIDYCW7EbTdiOm61SErRitvsLr7YPnUlVm0,9801
|
|
59
59
|
datachain/data_storage/serializer.py,sha256=oL8i8smyAeVUyDepk8Xhf3lFOGOEHMoZjA5GdFzvfGI,3862
|
|
60
60
|
datachain/data_storage/sqlite.py,sha256=pee99RewNQh5kVxGpD2sf9V5VloM4xwn8oeEhquU1rs,31756
|
|
61
61
|
datachain/data_storage/warehouse.py,sha256=nuGT27visvAi7jr7ZAZF-wmFe0ZEFD8qaTheINX_7RM,35269
|
|
62
|
-
datachain/diff/__init__.py,sha256=
|
|
62
|
+
datachain/diff/__init__.py,sha256=lGrygGzdWSSYJ1DgX4h2q_ko5QINEW8PKfxOwE9ZFnI,9394
|
|
63
63
|
datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
64
|
datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
|
|
65
65
|
datachain/fs/utils.py,sha256=s-FkTOCGBk-b6TT3toQH51s9608pofoFjUSTc1yy7oE,825
|
|
@@ -109,14 +109,14 @@ datachain/lib/convert/values_to_tuples.py,sha256=Sxj0ojeMSpAwM_NNoXa1dMR_2L_cQ6X
|
|
|
109
109
|
datachain/lib/dc/__init__.py,sha256=UrUzmDH6YyVl8fxM5iXTSFtl5DZTUzEYm1MaazK4vdQ,900
|
|
110
110
|
datachain/lib/dc/csv.py,sha256=fIfj5-2Ix4z5D5yZueagd5WUWw86pusJ9JJKD-U3KGg,4407
|
|
111
111
|
datachain/lib/dc/database.py,sha256=Wqob3dQc9Mol_0vagzVEXzteCKS9M0E3U5130KVmQKg,14629
|
|
112
|
-
datachain/lib/dc/datachain.py,sha256=
|
|
112
|
+
datachain/lib/dc/datachain.py,sha256=9zEL36hVkDxPmmy1A8dv9CFADUEnDr3S7vXNxbSHpGE,104054
|
|
113
113
|
datachain/lib/dc/datasets.py,sha256=A4SW-b3dkQnm9Wi7ciCdlXqtrsquIeRfBQN_bJ_ulqY,15237
|
|
114
114
|
datachain/lib/dc/hf.py,sha256=FeruEO176L2qQ1Mnx0QmK4kV0GuQ4xtj717N8fGJrBI,2849
|
|
115
115
|
datachain/lib/dc/json.py,sha256=iJ6G0jwTKz8xtfh1eICShnWk_bAMWjF5bFnOXLHaTlw,2683
|
|
116
116
|
datachain/lib/dc/listings.py,sha256=0XTZERQZ2ErP3LSVg9lF9i3alKebqA1Kip2Zf15unUM,4507
|
|
117
117
|
datachain/lib/dc/pandas.py,sha256=o9rTcZf27-3mCEaDdX1ZzM0I4bSOsu-4mA2zK6rWoS4,1460
|
|
118
118
|
datachain/lib/dc/parquet.py,sha256=wa_VazXotY5RZ8ypC0_M9Qo30tamzXmYeVE6P-NcQ1Y,2375
|
|
119
|
-
datachain/lib/dc/records.py,sha256=
|
|
119
|
+
datachain/lib/dc/records.py,sha256=WvbaLhMqM9e54gJLLeG54QX5ZXkkBIK3FokojLTSbZc,2974
|
|
120
120
|
datachain/lib/dc/storage.py,sha256=nlEg-9v9iwtiQUcGd-Ng1lnrpMviliR95mjceBez1BU,9767
|
|
121
121
|
datachain/lib/dc/storage_pattern.py,sha256=TqaDb5yq050W9IxpESz9iotjs0R__i5ngRtVo5BmJ-8,7645
|
|
122
122
|
datachain/lib/dc/utils.py,sha256=kTpzS8CBQmle1A0XZzu4b5zAHo8piFiBSP1lS8ztkQU,4088
|
|
@@ -132,13 +132,13 @@ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigF
|
|
|
132
132
|
datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
|
|
133
133
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
134
134
|
datachain/query/batch.py,sha256=ugTlSFqh_kxMcG6vJ5XrEzG9jBXRdb7KRAEEsFWiPew,4190
|
|
135
|
-
datachain/query/dataset.py,sha256=
|
|
135
|
+
datachain/query/dataset.py,sha256=pHdanZoPsCM20IK0PDt5EXWcPro9W0C-U3OXtPzpMDE,67556
|
|
136
136
|
datachain/query/dispatch.py,sha256=Tg73zB6vDnYYYAvtlS9l7BI3sI1EfRCbDjiasvNxz2s,16385
|
|
137
137
|
datachain/query/metrics.py,sha256=qOMHiYPTMtVs2zI-mUSy8OPAVwrg4oJtVF85B9tdQyM,810
|
|
138
138
|
datachain/query/params.py,sha256=JkVz6IKUIpF58JZRkUXFT8DAHX2yfaULbhVaGmHKFLc,826
|
|
139
139
|
datachain/query/queue.py,sha256=kCetMG6y7_ynV_jJDAXkLsf8WsVZCEk1fAuQGd7yTOo,3543
|
|
140
140
|
datachain/query/schema.py,sha256=Cn1keXjktptAbEDbHlxSzdoCu5H6h_Vzp_DtNpMSr5w,6697
|
|
141
|
-
datachain/query/session.py,sha256=
|
|
141
|
+
datachain/query/session.py,sha256=iRbb01tsgTagYKRNZoQ4JsA9u6OQK1vofwZ7nS10z8o,12261
|
|
142
142
|
datachain/query/udf.py,sha256=SLLLNLz3QmtaM04ZVTu7K6jo58I-1j5Jf7Lb4ORv4tQ,1385
|
|
143
143
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
144
144
|
datachain/remote/studio.py,sha256=4voPFVDXAU6BSBHDAvB_LTYiCACA6Zr0IfYnDjrnN6s,16737
|
|
@@ -165,9 +165,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
165
165
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
166
166
|
datachain/toolkit/split.py,sha256=xQzzmvQRKsPteDKbpgOxd4r971BnFaK33mcOl0FuGeI,2883
|
|
167
167
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
168
|
-
datachain-0.
|
|
169
|
-
datachain-0.
|
|
170
|
-
datachain-0.
|
|
171
|
-
datachain-0.
|
|
172
|
-
datachain-0.
|
|
173
|
-
datachain-0.
|
|
168
|
+
datachain-0.37.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
169
|
+
datachain-0.37.1.dist-info/METADATA,sha256=Gxm5b2gZCiJZpi7L8_J0RM_YpxhgHueS0GqSSkWQWaA,13762
|
|
170
|
+
datachain-0.37.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
171
|
+
datachain-0.37.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
172
|
+
datachain-0.37.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
173
|
+
datachain-0.37.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|