datachain 0.8.4__py3-none-any.whl → 0.8.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -35,6 +35,7 @@ from sqlalchemy.sql.schema import TableClause
35
35
  from sqlalchemy.sql.selectable import Select
36
36
 
37
37
  from datachain.asyn import ASYNC_WORKERS, AsyncMapper, OrderedMapper
38
+ from datachain.catalog.catalog import clone_catalog_with_cache
38
39
  from datachain.data_storage.schema import (
39
40
  PARTITION_COLUMN_ID,
40
41
  partition_col_names,
@@ -43,7 +44,8 @@ from datachain.data_storage.schema import (
43
44
  from datachain.dataset import DatasetStatus, RowDict
44
45
  from datachain.error import DatasetNotFoundError, QueryScriptCancelError
45
46
  from datachain.func.base import Function
46
- from datachain.progress import CombinedDownloadCallback
47
+ from datachain.lib.udf import UDFAdapter, _get_cache
48
+ from datachain.progress import CombinedDownloadCallback, TqdmCombinedDownloadCallback
47
49
  from datachain.query.schema import C, UDFParamSpec, normalize_param
48
50
  from datachain.query.session import Session
49
51
  from datachain.sql.functions.random import rand
@@ -52,6 +54,7 @@ from datachain.utils import (
52
54
  determine_processes,
53
55
  filtered_cloudpickle_dumps,
54
56
  get_datachain_executable,
57
+ safe_closing,
55
58
  )
56
59
 
57
60
  if TYPE_CHECKING:
@@ -349,19 +352,26 @@ def process_udf_outputs(
349
352
  warehouse.insert_rows_done(udf_table)
350
353
 
351
354
 
352
- def get_download_callback() -> Callback:
353
- return CombinedDownloadCallback(
354
- {"desc": "Download", "unit": "B", "unit_scale": True, "unit_divisor": 1024}
355
+ def get_download_callback(suffix: str = "", **kwargs) -> CombinedDownloadCallback:
356
+ return TqdmCombinedDownloadCallback(
357
+ {
358
+ "desc": "Download" + suffix,
359
+ "unit": "B",
360
+ "unit_scale": True,
361
+ "unit_divisor": 1024,
362
+ "leave": False,
363
+ **kwargs,
364
+ },
355
365
  )
356
366
 
357
367
 
358
368
  def get_processed_callback() -> Callback:
359
- return TqdmCallback({"desc": "Processed", "unit": " rows"})
369
+ return TqdmCallback({"desc": "Processed", "unit": " rows", "leave": False})
360
370
 
361
371
 
362
372
  def get_generated_callback(is_generator: bool = False) -> Callback:
363
373
  if is_generator:
364
- return TqdmCallback({"desc": "Generated", "unit": " rows"})
374
+ return TqdmCallback({"desc": "Generated", "unit": " rows", "leave": False})
365
375
  return DEFAULT_CALLBACK
366
376
 
367
377
 
@@ -412,97 +422,109 @@ class UDFStep(Step, ABC):
412
422
 
413
423
  udf_fields = [str(c.name) for c in query.selected_columns]
414
424
 
415
- try:
416
- if workers:
417
- if self.catalog.in_memory:
418
- raise RuntimeError(
419
- "In-memory databases cannot be used with "
420
- "distributed processing."
421
- )
425
+ prefetch = self.udf.prefetch
426
+ with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
427
+ catalog = clone_catalog_with_cache(self.catalog, _cache)
428
+ try:
429
+ if workers:
430
+ if catalog.in_memory:
431
+ raise RuntimeError(
432
+ "In-memory databases cannot be used with "
433
+ "distributed processing."
434
+ )
422
435
 
423
- from datachain.catalog.loader import get_distributed_class
424
-
425
- distributor = get_distributed_class(min_task_size=self.min_task_size)
426
- distributor(
427
- self.udf,
428
- self.catalog,
429
- udf_table,
430
- query,
431
- workers,
432
- processes,
433
- udf_fields=udf_fields,
434
- is_generator=self.is_generator,
435
- use_partitioning=use_partitioning,
436
- cache=self.cache,
437
- )
438
- elif processes:
439
- # Parallel processing (faster for more CPU-heavy UDFs)
440
- if self.catalog.in_memory:
441
- raise RuntimeError(
442
- "In-memory databases cannot be used with parallel processing."
443
- )
444
- udf_info: UdfInfo = {
445
- "udf_data": filtered_cloudpickle_dumps(self.udf),
446
- "catalog_init": self.catalog.get_init_params(),
447
- "metastore_clone_params": self.catalog.metastore.clone_params(),
448
- "warehouse_clone_params": self.catalog.warehouse.clone_params(),
449
- "table": udf_table,
450
- "query": query,
451
- "udf_fields": udf_fields,
452
- "batching": batching,
453
- "processes": processes,
454
- "is_generator": self.is_generator,
455
- "cache": self.cache,
456
- }
457
-
458
- # Run the UDFDispatcher in another process to avoid needing
459
- # if __name__ == '__main__': in user scripts
460
- exec_cmd = get_datachain_executable()
461
- cmd = [*exec_cmd, "internal-run-udf"]
462
- envs = dict(os.environ)
463
- envs.update({"PYTHONPATH": os.getcwd()})
464
- process_data = filtered_cloudpickle_dumps(udf_info)
465
-
466
- with subprocess.Popen(cmd, env=envs, stdin=subprocess.PIPE) as process: # noqa: S603
467
- process.communicate(process_data)
468
- if retval := process.poll():
469
- raise RuntimeError(f"UDF Execution Failed! Exit code: {retval}")
470
- else:
471
- # Otherwise process single-threaded (faster for smaller UDFs)
472
- warehouse = self.catalog.warehouse
473
-
474
- udf_inputs = batching(warehouse.dataset_select_paginated, query)
475
- download_cb = get_download_callback()
476
- processed_cb = get_processed_callback()
477
- generated_cb = get_generated_callback(self.is_generator)
478
- try:
479
- udf_results = self.udf.run(
480
- udf_fields,
481
- udf_inputs,
482
- self.catalog,
483
- self.cache,
484
- download_cb,
485
- processed_cb,
436
+ from datachain.catalog.loader import get_distributed_class
437
+
438
+ distributor = get_distributed_class(
439
+ min_task_size=self.min_task_size
486
440
  )
487
- process_udf_outputs(
488
- warehouse,
489
- udf_table,
490
- udf_results,
441
+ distributor(
491
442
  self.udf,
492
- cb=generated_cb,
443
+ catalog,
444
+ udf_table,
445
+ query,
446
+ workers,
447
+ processes,
448
+ udf_fields=udf_fields,
449
+ is_generator=self.is_generator,
450
+ use_partitioning=use_partitioning,
451
+ cache=self.cache,
493
452
  )
494
- finally:
495
- download_cb.close()
496
- processed_cb.close()
497
- generated_cb.close()
498
-
499
- except QueryScriptCancelError:
500
- self.catalog.warehouse.close()
501
- sys.exit(QUERY_SCRIPT_CANCELED_EXIT_CODE)
502
- except (Exception, KeyboardInterrupt):
503
- # Close any open database connections if an error is encountered
504
- self.catalog.warehouse.close()
505
- raise
453
+ elif processes:
454
+ # Parallel processing (faster for more CPU-heavy UDFs)
455
+ if catalog.in_memory:
456
+ raise RuntimeError(
457
+ "In-memory databases cannot be used "
458
+ "with parallel processing."
459
+ )
460
+ udf_info: UdfInfo = {
461
+ "udf_data": filtered_cloudpickle_dumps(self.udf),
462
+ "catalog_init": catalog.get_init_params(),
463
+ "metastore_clone_params": catalog.metastore.clone_params(),
464
+ "warehouse_clone_params": catalog.warehouse.clone_params(),
465
+ "table": udf_table,
466
+ "query": query,
467
+ "udf_fields": udf_fields,
468
+ "batching": batching,
469
+ "processes": processes,
470
+ "is_generator": self.is_generator,
471
+ "cache": self.cache,
472
+ }
473
+
474
+ # Run the UDFDispatcher in another process to avoid needing
475
+ # if __name__ == '__main__': in user scripts
476
+ exec_cmd = get_datachain_executable()
477
+ cmd = [*exec_cmd, "internal-run-udf"]
478
+ envs = dict(os.environ)
479
+ envs.update({"PYTHONPATH": os.getcwd()})
480
+ process_data = filtered_cloudpickle_dumps(udf_info)
481
+
482
+ with subprocess.Popen( # noqa: S603
483
+ cmd, env=envs, stdin=subprocess.PIPE
484
+ ) as process:
485
+ process.communicate(process_data)
486
+ if retval := process.poll():
487
+ raise RuntimeError(
488
+ f"UDF Execution Failed! Exit code: {retval}"
489
+ )
490
+ else:
491
+ # Otherwise process single-threaded (faster for smaller UDFs)
492
+ warehouse = catalog.warehouse
493
+
494
+ udf_inputs = batching(warehouse.dataset_select_paginated, query)
495
+ download_cb = get_download_callback()
496
+ processed_cb = get_processed_callback()
497
+ generated_cb = get_generated_callback(self.is_generator)
498
+
499
+ try:
500
+ udf_results = self.udf.run(
501
+ udf_fields,
502
+ udf_inputs,
503
+ catalog,
504
+ self.cache,
505
+ download_cb,
506
+ processed_cb,
507
+ )
508
+ with safe_closing(udf_results):
509
+ process_udf_outputs(
510
+ warehouse,
511
+ udf_table,
512
+ udf_results,
513
+ self.udf,
514
+ cb=generated_cb,
515
+ )
516
+ finally:
517
+ download_cb.close()
518
+ processed_cb.close()
519
+ generated_cb.close()
520
+
521
+ except QueryScriptCancelError:
522
+ self.catalog.warehouse.close()
523
+ sys.exit(QUERY_SCRIPT_CANCELED_EXIT_CODE)
524
+ except (Exception, KeyboardInterrupt):
525
+ # Close any open database connections if an error is encountered
526
+ self.catalog.warehouse.close()
527
+ raise
506
528
 
507
529
  def create_partitions_table(self, query: Select) -> "Table":
508
530
  """
@@ -602,6 +624,13 @@ class UDFSignal(UDFStep):
602
624
  signal_name_cols = {c.name: c for c in signal_cols}
603
625
  cols = signal_cols
604
626
 
627
+ overlap = {c.name for c in original_cols} & {c.name for c in cols}
628
+ if overlap:
629
+ raise ValueError(
630
+ "Column already exists or added in the previous steps: "
631
+ + ", ".join(overlap)
632
+ )
633
+
605
634
  def q(*columns):
606
635
  cols1 = []
607
636
  cols2 = []
@@ -14,7 +14,9 @@ from multiprocess import get_context
14
14
  from sqlalchemy.sql import func
15
15
 
16
16
  from datachain.catalog import Catalog
17
+ from datachain.catalog.catalog import clone_catalog_with_cache
17
18
  from datachain.catalog.loader import get_distributed_class
19
+ from datachain.lib.udf import _get_cache
18
20
  from datachain.query.batch import RowsOutput, RowsOutputBatch
19
21
  from datachain.query.dataset import (
20
22
  get_download_callback,
@@ -25,7 +27,7 @@ from datachain.query.dataset import (
25
27
  from datachain.query.queue import get_from_queue, put_into_queue
26
28
  from datachain.query.udf import UdfInfo
27
29
  from datachain.query.utils import get_query_id_column
28
- from datachain.utils import batched, flatten
30
+ from datachain.utils import batched, flatten, safe_closing
29
31
 
30
32
  if TYPE_CHECKING:
31
33
  from sqlalchemy import Select, Table
@@ -304,21 +306,25 @@ class UDFWorker:
304
306
  processed_cb = ProcessedCallback()
305
307
  generated_cb = get_generated_callback(self.is_generator)
306
308
 
307
- udf_results = self.udf.run(
308
- self.udf_fields,
309
- self.get_inputs(),
310
- self.catalog,
311
- self.cache,
312
- download_cb=self.cb,
313
- processed_cb=processed_cb,
314
- )
315
- process_udf_outputs(
316
- self.catalog.warehouse,
317
- self.table,
318
- self.notify_and_process(udf_results, processed_cb),
319
- self.udf,
320
- cb=generated_cb,
321
- )
309
+ prefetch = self.udf.prefetch
310
+ with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
311
+ catalog = clone_catalog_with_cache(self.catalog, _cache)
312
+ udf_results = self.udf.run(
313
+ self.udf_fields,
314
+ self.get_inputs(),
315
+ catalog,
316
+ self.cache,
317
+ download_cb=self.cb,
318
+ processed_cb=processed_cb,
319
+ )
320
+ with safe_closing(udf_results):
321
+ process_udf_outputs(
322
+ catalog.warehouse,
323
+ self.table,
324
+ self.notify_and_process(udf_results, processed_cb),
325
+ self.udf,
326
+ cb=generated_cb,
327
+ )
322
328
 
323
329
  put_into_queue(
324
330
  self.done_queue,
datachain/utils.py CHANGED
@@ -9,6 +9,7 @@ import stat
9
9
  import sys
10
10
  import time
11
11
  from collections.abc import Iterable, Iterator, Sequence
12
+ from contextlib import contextmanager
12
13
  from datetime import date, datetime, timezone
13
14
  from itertools import chain, islice
14
15
  from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
@@ -22,6 +23,7 @@ from pydantic import BaseModel
22
23
 
23
24
  if TYPE_CHECKING:
24
25
  import pandas as pd
26
+ from typing_extensions import Self
25
27
 
26
28
  NUL = b"\0"
27
29
  TIME_ZERO = datetime.fromtimestamp(0, tz=timezone.utc)
@@ -33,7 +35,7 @@ ENV_DATACHAIN_GLOBAL_CONFIG_DIR = "DATACHAIN_GLOBAL_CONFIG_DIR"
33
35
  STUDIO_URL = "https://studio.datachain.ai"
34
36
 
35
37
 
36
- T = TypeVar("T", bound="DataChainDir")
38
+ T = TypeVar("T")
37
39
 
38
40
 
39
41
  class DataChainDir:
@@ -90,7 +92,7 @@ class DataChainDir:
90
92
  return osp.join(root_dir, cls.DEFAULT)
91
93
 
92
94
  @classmethod
93
- def find(cls: type[T], create: bool = True) -> T:
95
+ def find(cls, create: bool = True) -> "Self":
94
96
  try:
95
97
  root = os.environ[cls.ENV_VAR]
96
98
  except KeyError:
@@ -479,3 +481,12 @@ def row_to_nested_dict(
479
481
  for h, v in zip(headers, row):
480
482
  nested_dict_path_set(result, h, v)
481
483
  return result
484
+
485
+
486
+ @contextmanager
487
+ def safe_closing(thing: T) -> Iterator[T]:
488
+ try:
489
+ yield thing
490
+ finally:
491
+ if hasattr(thing, "close"):
492
+ thing.close()
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.4
3
+ Version: 0.8.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -96,10 +96,10 @@ Requires-Dist: defusedxml; extra == "examples"
96
96
  Requires-Dist: accelerate; extra == "examples"
97
97
  Requires-Dist: unstructured_ingest[embed-huggingface]; extra == "examples"
98
98
  Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
99
- Requires-Dist: pdfplumber==0.11.4; extra == "examples"
99
+ Requires-Dist: pdfplumber==0.11.5; extra == "examples"
100
100
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
101
101
  Requires-Dist: onnx==1.16.1; extra == "examples"
102
- Requires-Dist: ultralytics==8.3.55; extra == "examples"
102
+ Requires-Dist: ultralytics==8.3.58; extra == "examples"
103
103
 
104
104
  ================
105
105
  |logo| DataChain
@@ -134,7 +134,7 @@ Use Cases
134
134
  1. **ETL.** Pythonic framework for describing and running unstructured data transformations
135
135
  and enrichments, applying models to data, including LLMs.
136
136
  2. **Analytics.** DataChain dataset is a table that combines all the information about data
137
- objects in one place + it provides dataframe-like API and vecrorized engine to do analytics
137
+ objects in one place + it provides dataframe-like API and vectorized engine to do analytics
138
138
  on these tables at scale.
139
139
  3. **Versioning.** DataChain doesn't store, require moving or copying data (unlike DVC).
140
140
  Perfect use case is a bucket with thousands or millions of images, videos, audio, PDFs.
@@ -270,7 +270,7 @@ DataChain Studio Platform
270
270
 
271
271
  `DataChain Studio`_ is a proprietary solution for teams that offers:
272
272
 
273
- - **Centralized dataset registry** to manage data, code and dependency
273
+ - **Centralized dataset registry** to manage data, code and
274
274
  dependencies in one place.
275
275
  - **Data Lineage** for data sources as well as derivative dataset.
276
276
  - **UI for Multimodal Data** like images, videos, and PDFs.
@@ -1,22 +1,22 @@
1
1
  datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
- datachain/asyn.py,sha256=5aKrjnUxk0mtnZeFKNJd1DCE0MsnSoyJBZkr0y9H_a0,9313
4
- datachain/cache.py,sha256=4xI0Ct2gVXuLZPqKdbjmfb_KD2klou-9WnL1WNhIuCA,3077
3
+ datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
4
+ datachain/cache.py,sha256=7ABXvxhuYmXPymC_MilxxUk3iIr2y5s2FqCmh4uacaQ,3651
5
5
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
6
6
  datachain/dataset.py,sha256=5HtqZBRaaToa_C74g62bACjBaCRf2Y6BDgIACLhK1ZA,19161
7
7
  datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
8
8
  datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
9
- datachain/listing.py,sha256=WdiWMVa0xZ-LtR3SJ0gFLgYUI6VaLI0DSEE_KvfikXs,7582
9
+ datachain/listing.py,sha256=1v4ryVp1EbodyA-bmfFqWHWvykfd6ww33pp9pEbozo8,7607
10
10
  datachain/node.py,sha256=HSpjBUBQBWXUUpbUEq839dsSc5KR2O8ww1Udl4jQemY,6023
11
11
  datachain/nodes_fetcher.py,sha256=ILMzUW5o4_6lUOVrLDC9gJPCXfcgKnMG68plrc7dAOA,1113
12
12
  datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
13
- datachain/progress.py,sha256=jgW_MOGSyY9devjvW3Y1VkCk1z6FkQZ3qMURfRrNXMo,4342
13
+ datachain/progress.py,sha256=V-TSrzrbmSkxegKxvXmNiGxpfBEL_XM26iVfSfbJJ-c,4962
14
14
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  datachain/studio.py,sha256=LFSX-HDRiceZDqc4pfy6q97xoejQCeWmuUGomwmOH9Y,9315
16
16
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
17
- datachain/utils.py,sha256=UWkPJrzGC6RiNxIvclxbchoJbuMnD0Nvf1ZO6RU6AcY,13912
17
+ datachain/utils.py,sha256=LBeg-9n48saBTHSPk7u_j-kjJnPUAq5Oyps_peSaqlM,14128
18
18
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
19
- datachain/catalog/catalog.py,sha256=ixXJKftUIG_ZBPdie1dJAPPHddWV6HZwb3GO-TRHtxY,60103
19
+ datachain/catalog/catalog.py,sha256=tPE5aqA6rj0T19JeQdb6A8nHy76R7WwOiQMhWrUfQK8,60511
20
20
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
21
21
  datachain/catalog/loader.py,sha256=HA_mBC7q_My8j2WnSvIjUGuJpl6SIdg5vvy_lagxJlA,5733
22
22
  datachain/cli/__init__.py,sha256=ywf3C552rQeXAW7xemodYqxJb1pAeVQulyCJSr7xiCk,8380
@@ -34,26 +34,27 @@ datachain/cli/parser/job.py,sha256=KIs4_yIcfr09RqG5Bx7YAd-QlUs7IznUhf34OxX1z2c,3
34
34
  datachain/cli/parser/studio.py,sha256=V3LjaN8gexpMOHdshSCgfwR0LJswE4te0PLqARwwlPA,4044
35
35
  datachain/cli/parser/utils.py,sha256=exnlrEQlEa5q0Jh4w_g-1O4niyDixsDpqa1DoIQewok,1590
36
36
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
37
- datachain/client/azure.py,sha256=D-mfLtpiq6O-DaSs-ofEEYhjIZBNfgRw1l9R7UgxEM4,3055
37
+ datachain/client/azure.py,sha256=lK2yg24doplYsR28CAG9eNr34MJjkoYplT1Urcf0EOQ,3216
38
38
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
39
- datachain/client/fsspec.py,sha256=6Jwd3yaSG93NCfbRxf6I2IUi5t4nfgCp40De916IcoI,13894
40
- datachain/client/gcs.py,sha256=MI94GXpCRqAlaF56HNrzQbXA-yR7bn2FOBPzO-lG_SI,4947
39
+ datachain/client/fsspec.py,sha256=ZelCVAuPnSUYuMD-l7IUsbIKNmWzTm6PKdrlK9Bw5xw,13907
40
+ datachain/client/gcs.py,sha256=-KsOrA_SPS9xCQtizUcI3Iy9lMSY8iVxUSPINOWj7i4,5109
41
41
  datachain/client/hf.py,sha256=XeVJVbiNViZCpn3sfb90Fr8SYO3BdLmfE3hOWMoqInE,951
42
42
  datachain/client/local.py,sha256=iHQKh-HhoNzqZ2yaiuIfZWGXtt_X9FMSA-TN_03zjPc,4708
43
- datachain/client/s3.py,sha256=67XISS6tW9bnhlbRtKJEAYd_JQvtLHqdPBxm8ySrJl8,6440
43
+ datachain/client/s3.py,sha256=qPwpHTD934WypEbetPgn0uAiQ31_mTitCVeQr5QkDc0,6965
44
44
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
45
- datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
45
+ datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
46
46
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
47
47
  datachain/data_storage/metastore.py,sha256=hfTITcesE9XlUTxcCcdDyWGGep-QSjJL9DUxko5QCeI,37524
48
- datachain/data_storage/schema.py,sha256=-QVlRvD0dfu-ZFUxylEoSnLJLnleMEjVlcAb2OGu-AY,9895
48
+ datachain/data_storage/schema.py,sha256=8np_S6Ltq7WXfcqpoSeFPryPS7cipdbiSP6UnKJkAac,9516
49
49
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
50
- datachain/data_storage/sqlite.py,sha256=iJv1QxwVifOowtYhIDqYVoea21dvkQIdxklGNIend3c,22961
51
- datachain/data_storage/warehouse.py,sha256=CMW36ZNrAzoZ003xGkdnIxC7S0PnQxC2588iUMKI9SM,30779
52
- datachain/func/__init__.py,sha256=mvvuIycO3emE3pgbc1GNTeYbxnvto1yloULBLBBa0g4,1055
50
+ datachain/data_storage/sqlite.py,sha256=hz6ZBxhEID1AroY5Xs3YbgJf_o9-4JiG2OE5yN5Ci1o,23176
51
+ datachain/data_storage/warehouse.py,sha256=gFAzkt_lNF0KoKylwtmQ9sLg4Soc6AVho0nvkUX67_0,30823
52
+ datachain/diff/__init__.py,sha256=OapNRBsyGDOQHelefUEoXoFHRWCJuBnhvD0ibebKvBc,10486
53
+ datachain/func/__init__.py,sha256=8WWvzWYtOzXmAC1fOMegyoJ-rFnpAca_5UW4gy8BVsk,1077
53
54
  datachain/func/aggregate.py,sha256=7_IPrIwb2XSs3zG4iOr1eTvzn6kNVe2mkzvNzjusDHk,10942
54
55
  datachain/func/array.py,sha256=zHDNWuWLA7HVa9FEvQeHhVi00_xqenyleTqcLwkXWBI,5477
55
56
  datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
56
- datachain/func/conditional.py,sha256=iMh13mmeVJq8xa856suPD4ozDWo6-fs3nRtNhmLXGhg,3998
57
+ datachain/func/conditional.py,sha256=AfvGQKBFOJ-wkmDTH0P7pmq42Zf1DRcYRsLGurdAiJE,4766
57
58
  datachain/func/func.py,sha256=4FJYMqeGD6xNe5ahrFgfthi0DTFb5w3QDLlXxbpHZjU,15371
58
59
  datachain/func/numeric.py,sha256=gMe1Ks0dqQKHkjcpvj7I5S-neECzQ_gltPQLNoaWOyo,5632
59
60
  datachain/func/path.py,sha256=mqN_mfkwv44z2II7DMTp_fGGw95hmTCNls_TOFNpr4k,3155
@@ -61,25 +62,24 @@ datachain/func/random.py,sha256=pENOLj9rSmWfGCnOsUIaCsVC5486zQb66qfQvXaz9Z4,452
61
62
  datachain/func/string.py,sha256=8az3BTeezlaZt6NW-54GWX7WSosAOVMbTr6bXIYyJq4,5958
62
63
  datachain/func/window.py,sha256=0MB1yjpVbwOrl_WNLZ8V3jkJz3o0XlYinpAcZQJuxiA,1688
63
64
  datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
- datachain/lib/arrow.py,sha256=pclruEeTffWZToeDlYDkWdlHZIyXb1YYFSjyutf8CDk,9867
65
+ datachain/lib/arrow.py,sha256=N1s59qNLtkpGRk400pztuukzekC_Yt_hvNfhul7Rf_Y,9902
65
66
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
66
67
  datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
67
68
  datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
68
- datachain/lib/dc.py,sha256=5AolChfT41QLhzVX1zaNRsZfQ8BAhLDZVvgu8qiU3jg,91145
69
- datachain/lib/diff.py,sha256=Yurzyi7PzZzY80HOnVTpwtbWzSJ1LqN8NgZWwZOh_UU,6732
70
- datachain/lib/file.py,sha256=JQ8GfqBwIikdaeSaQzbHo04DZWwNNk_Cgsofh1X7eg8,15047
71
- datachain/lib/hf.py,sha256=a-zFpDmZIR4r8dlNNTjfpAKSnuJ9xyRXlgcdENiXt3E,5864
69
+ datachain/lib/dc.py,sha256=qwZzHQ0blx3nddmNIHjRYaGcsDfoAVIhWYEYeRq8p4Q,91234
70
+ datachain/lib/file.py,sha256=VGC5Bj5BGLIj-6KOICP_H7IbRhYsKuGoh293GCmJCfs,15440
71
+ datachain/lib/hf.py,sha256=CfRbT3VQ8_siLQ0tFuvNwx4n4D2m4AfEjJ9MKO7Ukww,5877
72
72
  datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
73
73
  datachain/lib/listing.py,sha256=6TRVCoXzC83wLFSyVOdA90_yxbKUmgcVYgIDSYuixiA,6621
74
74
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
75
75
  datachain/lib/meta_formats.py,sha256=hDPfEkcmiLZOjhBBXuareMdnq65Wj8vZvxjmum6cROM,6377
76
76
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
77
- datachain/lib/pytorch.py,sha256=dA3r1JY0wqV_907a1D0lFaEN-7v3fMRpc1ePFE9CnvA,6168
77
+ datachain/lib/pytorch.py,sha256=hExKapbOSA9Bw1DQZd4tMf_xnZ9nCwygSGU9EGUW9Jo,7641
78
78
  datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
79
79
  datachain/lib/signal_schema.py,sha256=ps5od6zhWtdX3Khx2fwArl2xlGkK8SKi6vCQ6QmbaR0,27404
80
80
  datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
81
81
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
82
- datachain/lib/udf.py,sha256=Rsf_6mN6qCQVknl99yvi1guta1AMnG9MsoPn14ff5Fc,13609
82
+ datachain/lib/udf.py,sha256=gTdUTa2qKpmVQqkYMotXGUvFjiTCUrqR14FctazDcfc,14995
83
83
  datachain/lib/udf_signature.py,sha256=GXw24A-Olna6DWCdgy2bC-gZh_gLGPQ-KvjuI6pUjC0,7281
84
84
  datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
85
85
  datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -101,8 +101,8 @@ datachain/model/ultralytics/pose.py,sha256=71KBTcoST2wcEtsyGXqLVpvUtqbp9gwZGA15p
101
101
  datachain/model/ultralytics/segment.py,sha256=Z1ab0tZRJubSYNH4KkFlzhYeGNTfAyC71KmkQcToHDQ,2760
102
102
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
103
103
  datachain/query/batch.py,sha256=6w8gzLTmLeylststu-gT5jIqEfi4-djS7_yTYyeo-fw,4190
104
- datachain/query/dataset.py,sha256=1wJuiFgXgtYarJAgLmgQESrvp0ayIQbJUiCZyAe0NkU,54774
105
- datachain/query/dispatch.py,sha256=Uw4highKfs70ioEddSK2zknjpvz_q59OHc8s43nXa_I,12004
104
+ datachain/query/dataset.py,sha256=VL9iyVlX3jvir5XVnVxvfM2msBvxFsJGiwXaKkJIrmY,56148
105
+ datachain/query/dispatch.py,sha256=_1vjeQ1wjUoxlik55k0JkWqQCUfMjgVWmEOyWRkx0dU,12437
106
106
  datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
107
107
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
108
108
  datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
@@ -133,9 +133,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
133
133
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
134
134
  datachain/toolkit/split.py,sha256=z3zRJNzjWrpPuRw-zgFbCOBKInyYxJew8ygrYQRQLNc,2930
135
135
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
136
- datachain-0.8.4.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
137
- datachain-0.8.4.dist-info/METADATA,sha256=s91ugEL68b3G1-Fv85lcMTj3C2LiKhlzSLrPP5hrb0E,11075
138
- datachain-0.8.4.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
139
- datachain-0.8.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
140
- datachain-0.8.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
141
- datachain-0.8.4.dist-info/RECORD,,
136
+ datachain-0.8.5.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
137
+ datachain-0.8.5.dist-info/METADATA,sha256=hwFSeah_bNcAtJvdN_xPnvAFjz17hoK2MCHQbtUZD9I,11064
138
+ datachain-0.8.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
139
+ datachain-0.8.5.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
140
+ datachain-0.8.5.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
141
+ datachain-0.8.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.7.0)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5