datachain 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/asyn.py +16 -6
- datachain/cache.py +32 -10
- datachain/catalog/catalog.py +17 -1
- datachain/client/azure.py +6 -2
- datachain/client/fsspec.py +1 -1
- datachain/client/gcs.py +6 -2
- datachain/client/s3.py +16 -1
- datachain/data_storage/db_engine.py +9 -0
- datachain/data_storage/schema.py +4 -10
- datachain/data_storage/sqlite.py +7 -1
- datachain/data_storage/warehouse.py +6 -4
- datachain/{lib/diff.py → diff/__init__.py} +116 -12
- datachain/func/__init__.py +2 -1
- datachain/func/conditional.py +31 -9
- datachain/lib/arrow.py +3 -1
- datachain/lib/dc.py +5 -3
- datachain/lib/file.py +15 -4
- datachain/lib/hf.py +1 -1
- datachain/lib/pytorch.py +57 -13
- datachain/lib/udf.py +82 -40
- datachain/listing.py +1 -0
- datachain/progress.py +18 -1
- datachain/query/dataset.py +122 -93
- datachain/query/dispatch.py +22 -16
- datachain/utils.py +13 -2
- {datachain-0.8.4.dist-info → datachain-0.8.5.dist-info}/METADATA +6 -6
- {datachain-0.8.4.dist-info → datachain-0.8.5.dist-info}/RECORD +31 -31
- {datachain-0.8.4.dist-info → datachain-0.8.5.dist-info}/WHEEL +1 -1
- {datachain-0.8.4.dist-info → datachain-0.8.5.dist-info}/LICENSE +0 -0
- {datachain-0.8.4.dist-info → datachain-0.8.5.dist-info}/entry_points.txt +0 -0
- {datachain-0.8.4.dist-info → datachain-0.8.5.dist-info}/top_level.txt +0 -0
datachain/query/dataset.py
CHANGED
|
@@ -35,6 +35,7 @@ from sqlalchemy.sql.schema import TableClause
|
|
|
35
35
|
from sqlalchemy.sql.selectable import Select
|
|
36
36
|
|
|
37
37
|
from datachain.asyn import ASYNC_WORKERS, AsyncMapper, OrderedMapper
|
|
38
|
+
from datachain.catalog.catalog import clone_catalog_with_cache
|
|
38
39
|
from datachain.data_storage.schema import (
|
|
39
40
|
PARTITION_COLUMN_ID,
|
|
40
41
|
partition_col_names,
|
|
@@ -43,7 +44,8 @@ from datachain.data_storage.schema import (
|
|
|
43
44
|
from datachain.dataset import DatasetStatus, RowDict
|
|
44
45
|
from datachain.error import DatasetNotFoundError, QueryScriptCancelError
|
|
45
46
|
from datachain.func.base import Function
|
|
46
|
-
from datachain.
|
|
47
|
+
from datachain.lib.udf import UDFAdapter, _get_cache
|
|
48
|
+
from datachain.progress import CombinedDownloadCallback, TqdmCombinedDownloadCallback
|
|
47
49
|
from datachain.query.schema import C, UDFParamSpec, normalize_param
|
|
48
50
|
from datachain.query.session import Session
|
|
49
51
|
from datachain.sql.functions.random import rand
|
|
@@ -52,6 +54,7 @@ from datachain.utils import (
|
|
|
52
54
|
determine_processes,
|
|
53
55
|
filtered_cloudpickle_dumps,
|
|
54
56
|
get_datachain_executable,
|
|
57
|
+
safe_closing,
|
|
55
58
|
)
|
|
56
59
|
|
|
57
60
|
if TYPE_CHECKING:
|
|
@@ -349,19 +352,26 @@ def process_udf_outputs(
|
|
|
349
352
|
warehouse.insert_rows_done(udf_table)
|
|
350
353
|
|
|
351
354
|
|
|
352
|
-
def get_download_callback() ->
|
|
353
|
-
return
|
|
354
|
-
{
|
|
355
|
+
def get_download_callback(suffix: str = "", **kwargs) -> CombinedDownloadCallback:
|
|
356
|
+
return TqdmCombinedDownloadCallback(
|
|
357
|
+
{
|
|
358
|
+
"desc": "Download" + suffix,
|
|
359
|
+
"unit": "B",
|
|
360
|
+
"unit_scale": True,
|
|
361
|
+
"unit_divisor": 1024,
|
|
362
|
+
"leave": False,
|
|
363
|
+
**kwargs,
|
|
364
|
+
},
|
|
355
365
|
)
|
|
356
366
|
|
|
357
367
|
|
|
358
368
|
def get_processed_callback() -> Callback:
|
|
359
|
-
return TqdmCallback({"desc": "Processed", "unit": " rows"})
|
|
369
|
+
return TqdmCallback({"desc": "Processed", "unit": " rows", "leave": False})
|
|
360
370
|
|
|
361
371
|
|
|
362
372
|
def get_generated_callback(is_generator: bool = False) -> Callback:
|
|
363
373
|
if is_generator:
|
|
364
|
-
return TqdmCallback({"desc": "Generated", "unit": " rows"})
|
|
374
|
+
return TqdmCallback({"desc": "Generated", "unit": " rows", "leave": False})
|
|
365
375
|
return DEFAULT_CALLBACK
|
|
366
376
|
|
|
367
377
|
|
|
@@ -412,97 +422,109 @@ class UDFStep(Step, ABC):
|
|
|
412
422
|
|
|
413
423
|
udf_fields = [str(c.name) for c in query.selected_columns]
|
|
414
424
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
425
|
+
prefetch = self.udf.prefetch
|
|
426
|
+
with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
|
|
427
|
+
catalog = clone_catalog_with_cache(self.catalog, _cache)
|
|
428
|
+
try:
|
|
429
|
+
if workers:
|
|
430
|
+
if catalog.in_memory:
|
|
431
|
+
raise RuntimeError(
|
|
432
|
+
"In-memory databases cannot be used with "
|
|
433
|
+
"distributed processing."
|
|
434
|
+
)
|
|
422
435
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
self.udf,
|
|
428
|
-
self.catalog,
|
|
429
|
-
udf_table,
|
|
430
|
-
query,
|
|
431
|
-
workers,
|
|
432
|
-
processes,
|
|
433
|
-
udf_fields=udf_fields,
|
|
434
|
-
is_generator=self.is_generator,
|
|
435
|
-
use_partitioning=use_partitioning,
|
|
436
|
-
cache=self.cache,
|
|
437
|
-
)
|
|
438
|
-
elif processes:
|
|
439
|
-
# Parallel processing (faster for more CPU-heavy UDFs)
|
|
440
|
-
if self.catalog.in_memory:
|
|
441
|
-
raise RuntimeError(
|
|
442
|
-
"In-memory databases cannot be used with parallel processing."
|
|
443
|
-
)
|
|
444
|
-
udf_info: UdfInfo = {
|
|
445
|
-
"udf_data": filtered_cloudpickle_dumps(self.udf),
|
|
446
|
-
"catalog_init": self.catalog.get_init_params(),
|
|
447
|
-
"metastore_clone_params": self.catalog.metastore.clone_params(),
|
|
448
|
-
"warehouse_clone_params": self.catalog.warehouse.clone_params(),
|
|
449
|
-
"table": udf_table,
|
|
450
|
-
"query": query,
|
|
451
|
-
"udf_fields": udf_fields,
|
|
452
|
-
"batching": batching,
|
|
453
|
-
"processes": processes,
|
|
454
|
-
"is_generator": self.is_generator,
|
|
455
|
-
"cache": self.cache,
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
# Run the UDFDispatcher in another process to avoid needing
|
|
459
|
-
# if __name__ == '__main__': in user scripts
|
|
460
|
-
exec_cmd = get_datachain_executable()
|
|
461
|
-
cmd = [*exec_cmd, "internal-run-udf"]
|
|
462
|
-
envs = dict(os.environ)
|
|
463
|
-
envs.update({"PYTHONPATH": os.getcwd()})
|
|
464
|
-
process_data = filtered_cloudpickle_dumps(udf_info)
|
|
465
|
-
|
|
466
|
-
with subprocess.Popen(cmd, env=envs, stdin=subprocess.PIPE) as process: # noqa: S603
|
|
467
|
-
process.communicate(process_data)
|
|
468
|
-
if retval := process.poll():
|
|
469
|
-
raise RuntimeError(f"UDF Execution Failed! Exit code: {retval}")
|
|
470
|
-
else:
|
|
471
|
-
# Otherwise process single-threaded (faster for smaller UDFs)
|
|
472
|
-
warehouse = self.catalog.warehouse
|
|
473
|
-
|
|
474
|
-
udf_inputs = batching(warehouse.dataset_select_paginated, query)
|
|
475
|
-
download_cb = get_download_callback()
|
|
476
|
-
processed_cb = get_processed_callback()
|
|
477
|
-
generated_cb = get_generated_callback(self.is_generator)
|
|
478
|
-
try:
|
|
479
|
-
udf_results = self.udf.run(
|
|
480
|
-
udf_fields,
|
|
481
|
-
udf_inputs,
|
|
482
|
-
self.catalog,
|
|
483
|
-
self.cache,
|
|
484
|
-
download_cb,
|
|
485
|
-
processed_cb,
|
|
436
|
+
from datachain.catalog.loader import get_distributed_class
|
|
437
|
+
|
|
438
|
+
distributor = get_distributed_class(
|
|
439
|
+
min_task_size=self.min_task_size
|
|
486
440
|
)
|
|
487
|
-
|
|
488
|
-
warehouse,
|
|
489
|
-
udf_table,
|
|
490
|
-
udf_results,
|
|
441
|
+
distributor(
|
|
491
442
|
self.udf,
|
|
492
|
-
|
|
443
|
+
catalog,
|
|
444
|
+
udf_table,
|
|
445
|
+
query,
|
|
446
|
+
workers,
|
|
447
|
+
processes,
|
|
448
|
+
udf_fields=udf_fields,
|
|
449
|
+
is_generator=self.is_generator,
|
|
450
|
+
use_partitioning=use_partitioning,
|
|
451
|
+
cache=self.cache,
|
|
493
452
|
)
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
453
|
+
elif processes:
|
|
454
|
+
# Parallel processing (faster for more CPU-heavy UDFs)
|
|
455
|
+
if catalog.in_memory:
|
|
456
|
+
raise RuntimeError(
|
|
457
|
+
"In-memory databases cannot be used "
|
|
458
|
+
"with parallel processing."
|
|
459
|
+
)
|
|
460
|
+
udf_info: UdfInfo = {
|
|
461
|
+
"udf_data": filtered_cloudpickle_dumps(self.udf),
|
|
462
|
+
"catalog_init": catalog.get_init_params(),
|
|
463
|
+
"metastore_clone_params": catalog.metastore.clone_params(),
|
|
464
|
+
"warehouse_clone_params": catalog.warehouse.clone_params(),
|
|
465
|
+
"table": udf_table,
|
|
466
|
+
"query": query,
|
|
467
|
+
"udf_fields": udf_fields,
|
|
468
|
+
"batching": batching,
|
|
469
|
+
"processes": processes,
|
|
470
|
+
"is_generator": self.is_generator,
|
|
471
|
+
"cache": self.cache,
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
# Run the UDFDispatcher in another process to avoid needing
|
|
475
|
+
# if __name__ == '__main__': in user scripts
|
|
476
|
+
exec_cmd = get_datachain_executable()
|
|
477
|
+
cmd = [*exec_cmd, "internal-run-udf"]
|
|
478
|
+
envs = dict(os.environ)
|
|
479
|
+
envs.update({"PYTHONPATH": os.getcwd()})
|
|
480
|
+
process_data = filtered_cloudpickle_dumps(udf_info)
|
|
481
|
+
|
|
482
|
+
with subprocess.Popen( # noqa: S603
|
|
483
|
+
cmd, env=envs, stdin=subprocess.PIPE
|
|
484
|
+
) as process:
|
|
485
|
+
process.communicate(process_data)
|
|
486
|
+
if retval := process.poll():
|
|
487
|
+
raise RuntimeError(
|
|
488
|
+
f"UDF Execution Failed! Exit code: {retval}"
|
|
489
|
+
)
|
|
490
|
+
else:
|
|
491
|
+
# Otherwise process single-threaded (faster for smaller UDFs)
|
|
492
|
+
warehouse = catalog.warehouse
|
|
493
|
+
|
|
494
|
+
udf_inputs = batching(warehouse.dataset_select_paginated, query)
|
|
495
|
+
download_cb = get_download_callback()
|
|
496
|
+
processed_cb = get_processed_callback()
|
|
497
|
+
generated_cb = get_generated_callback(self.is_generator)
|
|
498
|
+
|
|
499
|
+
try:
|
|
500
|
+
udf_results = self.udf.run(
|
|
501
|
+
udf_fields,
|
|
502
|
+
udf_inputs,
|
|
503
|
+
catalog,
|
|
504
|
+
self.cache,
|
|
505
|
+
download_cb,
|
|
506
|
+
processed_cb,
|
|
507
|
+
)
|
|
508
|
+
with safe_closing(udf_results):
|
|
509
|
+
process_udf_outputs(
|
|
510
|
+
warehouse,
|
|
511
|
+
udf_table,
|
|
512
|
+
udf_results,
|
|
513
|
+
self.udf,
|
|
514
|
+
cb=generated_cb,
|
|
515
|
+
)
|
|
516
|
+
finally:
|
|
517
|
+
download_cb.close()
|
|
518
|
+
processed_cb.close()
|
|
519
|
+
generated_cb.close()
|
|
520
|
+
|
|
521
|
+
except QueryScriptCancelError:
|
|
522
|
+
self.catalog.warehouse.close()
|
|
523
|
+
sys.exit(QUERY_SCRIPT_CANCELED_EXIT_CODE)
|
|
524
|
+
except (Exception, KeyboardInterrupt):
|
|
525
|
+
# Close any open database connections if an error is encountered
|
|
526
|
+
self.catalog.warehouse.close()
|
|
527
|
+
raise
|
|
506
528
|
|
|
507
529
|
def create_partitions_table(self, query: Select) -> "Table":
|
|
508
530
|
"""
|
|
@@ -602,6 +624,13 @@ class UDFSignal(UDFStep):
|
|
|
602
624
|
signal_name_cols = {c.name: c for c in signal_cols}
|
|
603
625
|
cols = signal_cols
|
|
604
626
|
|
|
627
|
+
overlap = {c.name for c in original_cols} & {c.name for c in cols}
|
|
628
|
+
if overlap:
|
|
629
|
+
raise ValueError(
|
|
630
|
+
"Column already exists or added in the previous steps: "
|
|
631
|
+
+ ", ".join(overlap)
|
|
632
|
+
)
|
|
633
|
+
|
|
605
634
|
def q(*columns):
|
|
606
635
|
cols1 = []
|
|
607
636
|
cols2 = []
|
datachain/query/dispatch.py
CHANGED
|
@@ -14,7 +14,9 @@ from multiprocess import get_context
|
|
|
14
14
|
from sqlalchemy.sql import func
|
|
15
15
|
|
|
16
16
|
from datachain.catalog import Catalog
|
|
17
|
+
from datachain.catalog.catalog import clone_catalog_with_cache
|
|
17
18
|
from datachain.catalog.loader import get_distributed_class
|
|
19
|
+
from datachain.lib.udf import _get_cache
|
|
18
20
|
from datachain.query.batch import RowsOutput, RowsOutputBatch
|
|
19
21
|
from datachain.query.dataset import (
|
|
20
22
|
get_download_callback,
|
|
@@ -25,7 +27,7 @@ from datachain.query.dataset import (
|
|
|
25
27
|
from datachain.query.queue import get_from_queue, put_into_queue
|
|
26
28
|
from datachain.query.udf import UdfInfo
|
|
27
29
|
from datachain.query.utils import get_query_id_column
|
|
28
|
-
from datachain.utils import batched, flatten
|
|
30
|
+
from datachain.utils import batched, flatten, safe_closing
|
|
29
31
|
|
|
30
32
|
if TYPE_CHECKING:
|
|
31
33
|
from sqlalchemy import Select, Table
|
|
@@ -304,21 +306,25 @@ class UDFWorker:
|
|
|
304
306
|
processed_cb = ProcessedCallback()
|
|
305
307
|
generated_cb = get_generated_callback(self.is_generator)
|
|
306
308
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
self.
|
|
310
|
-
self.
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
309
|
+
prefetch = self.udf.prefetch
|
|
310
|
+
with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
|
|
311
|
+
catalog = clone_catalog_with_cache(self.catalog, _cache)
|
|
312
|
+
udf_results = self.udf.run(
|
|
313
|
+
self.udf_fields,
|
|
314
|
+
self.get_inputs(),
|
|
315
|
+
catalog,
|
|
316
|
+
self.cache,
|
|
317
|
+
download_cb=self.cb,
|
|
318
|
+
processed_cb=processed_cb,
|
|
319
|
+
)
|
|
320
|
+
with safe_closing(udf_results):
|
|
321
|
+
process_udf_outputs(
|
|
322
|
+
catalog.warehouse,
|
|
323
|
+
self.table,
|
|
324
|
+
self.notify_and_process(udf_results, processed_cb),
|
|
325
|
+
self.udf,
|
|
326
|
+
cb=generated_cb,
|
|
327
|
+
)
|
|
322
328
|
|
|
323
329
|
put_into_queue(
|
|
324
330
|
self.done_queue,
|
datachain/utils.py
CHANGED
|
@@ -9,6 +9,7 @@ import stat
|
|
|
9
9
|
import sys
|
|
10
10
|
import time
|
|
11
11
|
from collections.abc import Iterable, Iterator, Sequence
|
|
12
|
+
from contextlib import contextmanager
|
|
12
13
|
from datetime import date, datetime, timezone
|
|
13
14
|
from itertools import chain, islice
|
|
14
15
|
from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
|
|
@@ -22,6 +23,7 @@ from pydantic import BaseModel
|
|
|
22
23
|
|
|
23
24
|
if TYPE_CHECKING:
|
|
24
25
|
import pandas as pd
|
|
26
|
+
from typing_extensions import Self
|
|
25
27
|
|
|
26
28
|
NUL = b"\0"
|
|
27
29
|
TIME_ZERO = datetime.fromtimestamp(0, tz=timezone.utc)
|
|
@@ -33,7 +35,7 @@ ENV_DATACHAIN_GLOBAL_CONFIG_DIR = "DATACHAIN_GLOBAL_CONFIG_DIR"
|
|
|
33
35
|
STUDIO_URL = "https://studio.datachain.ai"
|
|
34
36
|
|
|
35
37
|
|
|
36
|
-
T = TypeVar("T"
|
|
38
|
+
T = TypeVar("T")
|
|
37
39
|
|
|
38
40
|
|
|
39
41
|
class DataChainDir:
|
|
@@ -90,7 +92,7 @@ class DataChainDir:
|
|
|
90
92
|
return osp.join(root_dir, cls.DEFAULT)
|
|
91
93
|
|
|
92
94
|
@classmethod
|
|
93
|
-
def find(cls
|
|
95
|
+
def find(cls, create: bool = True) -> "Self":
|
|
94
96
|
try:
|
|
95
97
|
root = os.environ[cls.ENV_VAR]
|
|
96
98
|
except KeyError:
|
|
@@ -479,3 +481,12 @@ def row_to_nested_dict(
|
|
|
479
481
|
for h, v in zip(headers, row):
|
|
480
482
|
nested_dict_path_set(result, h, v)
|
|
481
483
|
return result
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
@contextmanager
|
|
487
|
+
def safe_closing(thing: T) -> Iterator[T]:
|
|
488
|
+
try:
|
|
489
|
+
yield thing
|
|
490
|
+
finally:
|
|
491
|
+
if hasattr(thing, "close"):
|
|
492
|
+
thing.close()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.5
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -96,10 +96,10 @@ Requires-Dist: defusedxml; extra == "examples"
|
|
|
96
96
|
Requires-Dist: accelerate; extra == "examples"
|
|
97
97
|
Requires-Dist: unstructured_ingest[embed-huggingface]; extra == "examples"
|
|
98
98
|
Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
|
|
99
|
-
Requires-Dist: pdfplumber==0.11.
|
|
99
|
+
Requires-Dist: pdfplumber==0.11.5; extra == "examples"
|
|
100
100
|
Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
|
|
101
101
|
Requires-Dist: onnx==1.16.1; extra == "examples"
|
|
102
|
-
Requires-Dist: ultralytics==8.3.
|
|
102
|
+
Requires-Dist: ultralytics==8.3.58; extra == "examples"
|
|
103
103
|
|
|
104
104
|
================
|
|
105
105
|
|logo| DataChain
|
|
@@ -134,7 +134,7 @@ Use Cases
|
|
|
134
134
|
1. **ETL.** Pythonic framework for describing and running unstructured data transformations
|
|
135
135
|
and enrichments, applying models to data, including LLMs.
|
|
136
136
|
2. **Analytics.** DataChain dataset is a table that combines all the information about data
|
|
137
|
-
objects in one place + it provides dataframe-like API and
|
|
137
|
+
objects in one place + it provides dataframe-like API and vectorized engine to do analytics
|
|
138
138
|
on these tables at scale.
|
|
139
139
|
3. **Versioning.** DataChain doesn't store, require moving or copying data (unlike DVC).
|
|
140
140
|
Perfect use case is a bucket with thousands or millions of images, videos, audio, PDFs.
|
|
@@ -270,7 +270,7 @@ DataChain Studio Platform
|
|
|
270
270
|
|
|
271
271
|
`DataChain Studio`_ is a proprietary solution for teams that offers:
|
|
272
272
|
|
|
273
|
-
- **Centralized dataset registry** to manage data, code and
|
|
273
|
+
- **Centralized dataset registry** to manage data, code and
|
|
274
274
|
dependencies in one place.
|
|
275
275
|
- **Data Lineage** for data sources as well as derivative dataset.
|
|
276
276
|
- **UI for Multimodal Data** like images, videos, and PDFs.
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
|
-
datachain/asyn.py,sha256=
|
|
4
|
-
datachain/cache.py,sha256=
|
|
3
|
+
datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
|
|
4
|
+
datachain/cache.py,sha256=7ABXvxhuYmXPymC_MilxxUk3iIr2y5s2FqCmh4uacaQ,3651
|
|
5
5
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
6
6
|
datachain/dataset.py,sha256=5HtqZBRaaToa_C74g62bACjBaCRf2Y6BDgIACLhK1ZA,19161
|
|
7
7
|
datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
|
|
8
8
|
datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
|
|
9
|
-
datachain/listing.py,sha256=
|
|
9
|
+
datachain/listing.py,sha256=1v4ryVp1EbodyA-bmfFqWHWvykfd6ww33pp9pEbozo8,7607
|
|
10
10
|
datachain/node.py,sha256=HSpjBUBQBWXUUpbUEq839dsSc5KR2O8ww1Udl4jQemY,6023
|
|
11
11
|
datachain/nodes_fetcher.py,sha256=ILMzUW5o4_6lUOVrLDC9gJPCXfcgKnMG68plrc7dAOA,1113
|
|
12
12
|
datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
|
|
13
|
-
datachain/progress.py,sha256=
|
|
13
|
+
datachain/progress.py,sha256=V-TSrzrbmSkxegKxvXmNiGxpfBEL_XM26iVfSfbJJ-c,4962
|
|
14
14
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
datachain/studio.py,sha256=LFSX-HDRiceZDqc4pfy6q97xoejQCeWmuUGomwmOH9Y,9315
|
|
16
16
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
17
|
-
datachain/utils.py,sha256=
|
|
17
|
+
datachain/utils.py,sha256=LBeg-9n48saBTHSPk7u_j-kjJnPUAq5Oyps_peSaqlM,14128
|
|
18
18
|
datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
|
|
19
|
-
datachain/catalog/catalog.py,sha256=
|
|
19
|
+
datachain/catalog/catalog.py,sha256=tPE5aqA6rj0T19JeQdb6A8nHy76R7WwOiQMhWrUfQK8,60511
|
|
20
20
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
21
21
|
datachain/catalog/loader.py,sha256=HA_mBC7q_My8j2WnSvIjUGuJpl6SIdg5vvy_lagxJlA,5733
|
|
22
22
|
datachain/cli/__init__.py,sha256=ywf3C552rQeXAW7xemodYqxJb1pAeVQulyCJSr7xiCk,8380
|
|
@@ -34,26 +34,27 @@ datachain/cli/parser/job.py,sha256=KIs4_yIcfr09RqG5Bx7YAd-QlUs7IznUhf34OxX1z2c,3
|
|
|
34
34
|
datachain/cli/parser/studio.py,sha256=V3LjaN8gexpMOHdshSCgfwR0LJswE4te0PLqARwwlPA,4044
|
|
35
35
|
datachain/cli/parser/utils.py,sha256=exnlrEQlEa5q0Jh4w_g-1O4niyDixsDpqa1DoIQewok,1590
|
|
36
36
|
datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
|
|
37
|
-
datachain/client/azure.py,sha256=
|
|
37
|
+
datachain/client/azure.py,sha256=lK2yg24doplYsR28CAG9eNr34MJjkoYplT1Urcf0EOQ,3216
|
|
38
38
|
datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
|
|
39
|
-
datachain/client/fsspec.py,sha256=
|
|
40
|
-
datachain/client/gcs.py,sha256
|
|
39
|
+
datachain/client/fsspec.py,sha256=ZelCVAuPnSUYuMD-l7IUsbIKNmWzTm6PKdrlK9Bw5xw,13907
|
|
40
|
+
datachain/client/gcs.py,sha256=-KsOrA_SPS9xCQtizUcI3Iy9lMSY8iVxUSPINOWj7i4,5109
|
|
41
41
|
datachain/client/hf.py,sha256=XeVJVbiNViZCpn3sfb90Fr8SYO3BdLmfE3hOWMoqInE,951
|
|
42
42
|
datachain/client/local.py,sha256=iHQKh-HhoNzqZ2yaiuIfZWGXtt_X9FMSA-TN_03zjPc,4708
|
|
43
|
-
datachain/client/s3.py,sha256=
|
|
43
|
+
datachain/client/s3.py,sha256=qPwpHTD934WypEbetPgn0uAiQ31_mTitCVeQr5QkDc0,6965
|
|
44
44
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
45
|
-
datachain/data_storage/db_engine.py,sha256=
|
|
45
|
+
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
46
46
|
datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
|
|
47
47
|
datachain/data_storage/metastore.py,sha256=hfTITcesE9XlUTxcCcdDyWGGep-QSjJL9DUxko5QCeI,37524
|
|
48
|
-
datachain/data_storage/schema.py,sha256
|
|
48
|
+
datachain/data_storage/schema.py,sha256=8np_S6Ltq7WXfcqpoSeFPryPS7cipdbiSP6UnKJkAac,9516
|
|
49
49
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
50
|
-
datachain/data_storage/sqlite.py,sha256=
|
|
51
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
52
|
-
datachain/
|
|
50
|
+
datachain/data_storage/sqlite.py,sha256=hz6ZBxhEID1AroY5Xs3YbgJf_o9-4JiG2OE5yN5Ci1o,23176
|
|
51
|
+
datachain/data_storage/warehouse.py,sha256=gFAzkt_lNF0KoKylwtmQ9sLg4Soc6AVho0nvkUX67_0,30823
|
|
52
|
+
datachain/diff/__init__.py,sha256=OapNRBsyGDOQHelefUEoXoFHRWCJuBnhvD0ibebKvBc,10486
|
|
53
|
+
datachain/func/__init__.py,sha256=8WWvzWYtOzXmAC1fOMegyoJ-rFnpAca_5UW4gy8BVsk,1077
|
|
53
54
|
datachain/func/aggregate.py,sha256=7_IPrIwb2XSs3zG4iOr1eTvzn6kNVe2mkzvNzjusDHk,10942
|
|
54
55
|
datachain/func/array.py,sha256=zHDNWuWLA7HVa9FEvQeHhVi00_xqenyleTqcLwkXWBI,5477
|
|
55
56
|
datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
|
|
56
|
-
datachain/func/conditional.py,sha256=
|
|
57
|
+
datachain/func/conditional.py,sha256=AfvGQKBFOJ-wkmDTH0P7pmq42Zf1DRcYRsLGurdAiJE,4766
|
|
57
58
|
datachain/func/func.py,sha256=4FJYMqeGD6xNe5ahrFgfthi0DTFb5w3QDLlXxbpHZjU,15371
|
|
58
59
|
datachain/func/numeric.py,sha256=gMe1Ks0dqQKHkjcpvj7I5S-neECzQ_gltPQLNoaWOyo,5632
|
|
59
60
|
datachain/func/path.py,sha256=mqN_mfkwv44z2II7DMTp_fGGw95hmTCNls_TOFNpr4k,3155
|
|
@@ -61,25 +62,24 @@ datachain/func/random.py,sha256=pENOLj9rSmWfGCnOsUIaCsVC5486zQb66qfQvXaz9Z4,452
|
|
|
61
62
|
datachain/func/string.py,sha256=8az3BTeezlaZt6NW-54GWX7WSosAOVMbTr6bXIYyJq4,5958
|
|
62
63
|
datachain/func/window.py,sha256=0MB1yjpVbwOrl_WNLZ8V3jkJz3o0XlYinpAcZQJuxiA,1688
|
|
63
64
|
datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
-
datachain/lib/arrow.py,sha256=
|
|
65
|
+
datachain/lib/arrow.py,sha256=N1s59qNLtkpGRk400pztuukzekC_Yt_hvNfhul7Rf_Y,9902
|
|
65
66
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
66
67
|
datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
|
|
67
68
|
datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
|
|
68
|
-
datachain/lib/dc.py,sha256=
|
|
69
|
-
datachain/lib/
|
|
70
|
-
datachain/lib/
|
|
71
|
-
datachain/lib/hf.py,sha256=a-zFpDmZIR4r8dlNNTjfpAKSnuJ9xyRXlgcdENiXt3E,5864
|
|
69
|
+
datachain/lib/dc.py,sha256=qwZzHQ0blx3nddmNIHjRYaGcsDfoAVIhWYEYeRq8p4Q,91234
|
|
70
|
+
datachain/lib/file.py,sha256=VGC5Bj5BGLIj-6KOICP_H7IbRhYsKuGoh293GCmJCfs,15440
|
|
71
|
+
datachain/lib/hf.py,sha256=CfRbT3VQ8_siLQ0tFuvNwx4n4D2m4AfEjJ9MKO7Ukww,5877
|
|
72
72
|
datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
|
|
73
73
|
datachain/lib/listing.py,sha256=6TRVCoXzC83wLFSyVOdA90_yxbKUmgcVYgIDSYuixiA,6621
|
|
74
74
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
75
75
|
datachain/lib/meta_formats.py,sha256=hDPfEkcmiLZOjhBBXuareMdnq65Wj8vZvxjmum6cROM,6377
|
|
76
76
|
datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
|
|
77
|
-
datachain/lib/pytorch.py,sha256=
|
|
77
|
+
datachain/lib/pytorch.py,sha256=hExKapbOSA9Bw1DQZd4tMf_xnZ9nCwygSGU9EGUW9Jo,7641
|
|
78
78
|
datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
|
|
79
79
|
datachain/lib/signal_schema.py,sha256=ps5od6zhWtdX3Khx2fwArl2xlGkK8SKi6vCQ6QmbaR0,27404
|
|
80
80
|
datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
|
|
81
81
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
82
|
-
datachain/lib/udf.py,sha256=
|
|
82
|
+
datachain/lib/udf.py,sha256=gTdUTa2qKpmVQqkYMotXGUvFjiTCUrqR14FctazDcfc,14995
|
|
83
83
|
datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
|
|
84
84
|
datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
|
|
85
85
|
datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -101,8 +101,8 @@ datachain/model/ultralytics/pose.py,sha256=71KBTcoST2wcEtsyGXqLVpvUtqbp9gwZGA15p
|
|
|
101
101
|
datachain/model/ultralytics/segment.py,sha256=Z1ab0tZRJubSYNH4KkFlzhYeGNTfAyC71KmkQcToHDQ,2760
|
|
102
102
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
103
103
|
datachain/query/batch.py,sha256=6w8gzLTmLeylststu-gT5jIqEfi4-djS7_yTYyeo-fw,4190
|
|
104
|
-
datachain/query/dataset.py,sha256=
|
|
105
|
-
datachain/query/dispatch.py,sha256=
|
|
104
|
+
datachain/query/dataset.py,sha256=VL9iyVlX3jvir5XVnVxvfM2msBvxFsJGiwXaKkJIrmY,56148
|
|
105
|
+
datachain/query/dispatch.py,sha256=_1vjeQ1wjUoxlik55k0JkWqQCUfMjgVWmEOyWRkx0dU,12437
|
|
106
106
|
datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
|
|
107
107
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
108
108
|
datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
|
|
@@ -133,9 +133,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
133
133
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
134
134
|
datachain/toolkit/split.py,sha256=z3zRJNzjWrpPuRw-zgFbCOBKInyYxJew8ygrYQRQLNc,2930
|
|
135
135
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
136
|
-
datachain-0.8.
|
|
137
|
-
datachain-0.8.
|
|
138
|
-
datachain-0.8.
|
|
139
|
-
datachain-0.8.
|
|
140
|
-
datachain-0.8.
|
|
141
|
-
datachain-0.8.
|
|
136
|
+
datachain-0.8.5.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
137
|
+
datachain-0.8.5.dist-info/METADATA,sha256=hwFSeah_bNcAtJvdN_xPnvAFjz17hoK2MCHQbtUZD9I,11064
|
|
138
|
+
datachain-0.8.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
139
|
+
datachain-0.8.5.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
140
|
+
datachain-0.8.5.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
141
|
+
datachain-0.8.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|