datachain 0.30.5__py3-none-any.whl → 0.39.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. datachain/__init__.py +4 -0
  2. datachain/asyn.py +11 -12
  3. datachain/cache.py +5 -5
  4. datachain/catalog/__init__.py +0 -2
  5. datachain/catalog/catalog.py +276 -354
  6. datachain/catalog/dependency.py +164 -0
  7. datachain/catalog/loader.py +8 -3
  8. datachain/checkpoint.py +43 -0
  9. datachain/cli/__init__.py +10 -17
  10. datachain/cli/commands/__init__.py +1 -8
  11. datachain/cli/commands/datasets.py +42 -27
  12. datachain/cli/commands/ls.py +15 -15
  13. datachain/cli/commands/show.py +2 -2
  14. datachain/cli/parser/__init__.py +3 -43
  15. datachain/cli/parser/job.py +1 -1
  16. datachain/cli/parser/utils.py +1 -2
  17. datachain/cli/utils.py +2 -15
  18. datachain/client/azure.py +2 -2
  19. datachain/client/fsspec.py +34 -23
  20. datachain/client/gcs.py +3 -3
  21. datachain/client/http.py +157 -0
  22. datachain/client/local.py +11 -7
  23. datachain/client/s3.py +3 -3
  24. datachain/config.py +4 -8
  25. datachain/data_storage/db_engine.py +12 -6
  26. datachain/data_storage/job.py +2 -0
  27. datachain/data_storage/metastore.py +716 -137
  28. datachain/data_storage/schema.py +20 -27
  29. datachain/data_storage/serializer.py +105 -15
  30. datachain/data_storage/sqlite.py +114 -114
  31. datachain/data_storage/warehouse.py +140 -48
  32. datachain/dataset.py +109 -89
  33. datachain/delta.py +117 -42
  34. datachain/diff/__init__.py +25 -33
  35. datachain/error.py +24 -0
  36. datachain/func/aggregate.py +9 -11
  37. datachain/func/array.py +12 -12
  38. datachain/func/base.py +7 -4
  39. datachain/func/conditional.py +9 -13
  40. datachain/func/func.py +63 -45
  41. datachain/func/numeric.py +5 -7
  42. datachain/func/string.py +2 -2
  43. datachain/hash_utils.py +123 -0
  44. datachain/job.py +11 -7
  45. datachain/json.py +138 -0
  46. datachain/lib/arrow.py +18 -15
  47. datachain/lib/audio.py +60 -59
  48. datachain/lib/clip.py +14 -13
  49. datachain/lib/convert/python_to_sql.py +6 -10
  50. datachain/lib/convert/values_to_tuples.py +151 -53
  51. datachain/lib/data_model.py +23 -19
  52. datachain/lib/dataset_info.py +7 -7
  53. datachain/lib/dc/__init__.py +2 -1
  54. datachain/lib/dc/csv.py +22 -26
  55. datachain/lib/dc/database.py +37 -34
  56. datachain/lib/dc/datachain.py +518 -324
  57. datachain/lib/dc/datasets.py +38 -30
  58. datachain/lib/dc/hf.py +16 -20
  59. datachain/lib/dc/json.py +17 -18
  60. datachain/lib/dc/listings.py +5 -8
  61. datachain/lib/dc/pandas.py +3 -6
  62. datachain/lib/dc/parquet.py +33 -21
  63. datachain/lib/dc/records.py +9 -13
  64. datachain/lib/dc/storage.py +103 -65
  65. datachain/lib/dc/storage_pattern.py +251 -0
  66. datachain/lib/dc/utils.py +17 -14
  67. datachain/lib/dc/values.py +3 -6
  68. datachain/lib/file.py +187 -50
  69. datachain/lib/hf.py +7 -5
  70. datachain/lib/image.py +13 -13
  71. datachain/lib/listing.py +5 -5
  72. datachain/lib/listing_info.py +1 -2
  73. datachain/lib/meta_formats.py +2 -3
  74. datachain/lib/model_store.py +20 -8
  75. datachain/lib/namespaces.py +59 -7
  76. datachain/lib/projects.py +51 -9
  77. datachain/lib/pytorch.py +31 -23
  78. datachain/lib/settings.py +188 -85
  79. datachain/lib/signal_schema.py +302 -64
  80. datachain/lib/text.py +8 -7
  81. datachain/lib/udf.py +103 -63
  82. datachain/lib/udf_signature.py +59 -34
  83. datachain/lib/utils.py +20 -0
  84. datachain/lib/video.py +3 -4
  85. datachain/lib/webdataset.py +31 -36
  86. datachain/lib/webdataset_laion.py +15 -16
  87. datachain/listing.py +12 -5
  88. datachain/model/bbox.py +3 -1
  89. datachain/namespace.py +22 -3
  90. datachain/node.py +6 -6
  91. datachain/nodes_thread_pool.py +0 -1
  92. datachain/plugins.py +24 -0
  93. datachain/project.py +4 -4
  94. datachain/query/batch.py +10 -12
  95. datachain/query/dataset.py +376 -194
  96. datachain/query/dispatch.py +112 -84
  97. datachain/query/metrics.py +3 -4
  98. datachain/query/params.py +2 -3
  99. datachain/query/queue.py +2 -1
  100. datachain/query/schema.py +7 -6
  101. datachain/query/session.py +190 -33
  102. datachain/query/udf.py +9 -6
  103. datachain/remote/studio.py +90 -53
  104. datachain/script_meta.py +12 -12
  105. datachain/sql/sqlite/base.py +37 -25
  106. datachain/sql/sqlite/types.py +1 -1
  107. datachain/sql/types.py +36 -5
  108. datachain/studio.py +49 -40
  109. datachain/toolkit/split.py +31 -10
  110. datachain/utils.py +39 -48
  111. {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/METADATA +26 -38
  112. datachain-0.39.0.dist-info/RECORD +173 -0
  113. datachain/cli/commands/query.py +0 -54
  114. datachain/query/utils.py +0 -36
  115. datachain-0.30.5.dist-info/RECORD +0 -168
  116. {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/WHEEL +0 -0
  117. {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/entry_points.txt +0 -0
  118. {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/licenses/LICENSE +0 -0
  119. {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/top_level.txt +0 -0
datachain/utils.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import glob
2
2
  import io
3
- import json
4
3
  import logging
5
4
  import os
6
5
  import os.path as osp
@@ -10,9 +9,8 @@ import sys
10
9
  import time
11
10
  from collections.abc import Iterable, Iterator, Sequence
12
11
  from contextlib import contextmanager
13
- from datetime import date, datetime, timezone
14
- from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
15
- from uuid import UUID
12
+ from datetime import datetime, timezone
13
+ from typing import TYPE_CHECKING, Any, TypeVar
16
14
 
17
15
  import cloudpickle
18
16
  import platformdirs
@@ -25,7 +23,7 @@ if TYPE_CHECKING:
25
23
  from typing_extensions import Self
26
24
 
27
25
 
28
- DEFAULT_CHUNK_ROWS = 2000
26
+ DEFAULT_BATCH_SIZE = 2000
29
27
 
30
28
  logger = logging.getLogger("datachain")
31
29
 
@@ -53,11 +51,11 @@ class DataChainDir:
53
51
 
54
52
  def __init__(
55
53
  self,
56
- root: Optional[str] = None,
57
- cache: Optional[str] = None,
58
- tmp: Optional[str] = None,
59
- db: Optional[str] = None,
60
- config: Optional[str] = None,
54
+ root: str | None = None,
55
+ cache: str | None = None,
56
+ tmp: str | None = None,
57
+ db: str | None = None,
58
+ config: str | None = None,
61
59
  ) -> None:
62
60
  self.root = osp.abspath(root) if root is not None else self.default_root()
63
61
  self.cache = (
@@ -122,7 +120,7 @@ def global_config_dir():
122
120
  )
123
121
 
124
122
 
125
- def human_time_to_int(time: str) -> Optional[int]:
123
+ def human_time_to_int(time: str) -> int | None:
126
124
  if not time:
127
125
  return None
128
126
 
@@ -146,7 +144,7 @@ def time_to_str(dt):
146
144
  return dt.strftime("%Y-%m-%d %H:%M:%S")
147
145
 
148
146
 
149
- def time_to_local(dt: Union[datetime, str]) -> datetime:
147
+ def time_to_local(dt: datetime | str) -> datetime:
150
148
  # TODO check usage
151
149
  if isinstance(dt, str):
152
150
  dt = isoparse(dt)
@@ -156,11 +154,11 @@ def time_to_local(dt: Union[datetime, str]) -> datetime:
156
154
  return dt
157
155
 
158
156
 
159
- def time_to_local_str(dt: Union[datetime, str]) -> str:
157
+ def time_to_local_str(dt: datetime | str) -> str:
160
158
  return time_to_str(time_to_local(dt))
161
159
 
162
160
 
163
- def is_expired(expires: Optional[Union[datetime, str]]):
161
+ def is_expired(expires: datetime | str | None):
164
162
  if expires:
165
163
  return time_to_local(expires) < time_to_local(datetime.now()) # noqa: DTZ005
166
164
 
@@ -228,7 +226,7 @@ _T_co = TypeVar("_T_co", covariant=True)
228
226
 
229
227
  def _dynamic_batched_core(
230
228
  iterable: Iterable[_T_co],
231
- batch_rows: int,
229
+ batch_size: int,
232
230
  ) -> Iterator[list[_T_co]]:
233
231
  """Core batching logic that yields lists."""
234
232
 
@@ -236,7 +234,7 @@ def _dynamic_batched_core(
236
234
 
237
235
  for item in iterable:
238
236
  # Check if adding this item would exceed limits
239
- if len(batch) >= batch_rows and batch: # Yield current batch if we have one
237
+ if len(batch) >= batch_size and batch: # Yield current batch if we have one
240
238
  yield batch
241
239
  batch = []
242
240
 
@@ -247,23 +245,22 @@ def _dynamic_batched_core(
247
245
  yield batch
248
246
 
249
247
 
250
- def batched(iterable: Iterable[_T_co], batch_rows: int) -> Iterator[tuple[_T_co, ...]]:
248
+ def batched(iterable: Iterable[_T_co], batch_size: int) -> Iterator[tuple[_T_co, ...]]:
251
249
  """
252
- Batch data into tuples of length batch_rows .
250
+ Batch data into tuples of length batch_size.
253
251
  The last batch may be shorter.
254
252
  """
255
- yield from (tuple(batch) for batch in _dynamic_batched_core(iterable, batch_rows))
253
+ yield from (tuple(batch) for batch in _dynamic_batched_core(iterable, batch_size))
256
254
 
257
255
 
258
256
  def batched_it(
259
257
  iterable: Iterable[_T_co],
260
- batch_rows: int = DEFAULT_CHUNK_ROWS,
258
+ batch_size: int = DEFAULT_BATCH_SIZE,
261
259
  ) -> Iterator[Iterator[_T_co]]:
262
260
  """
263
- Batch data into iterators with dynamic sizing
264
- based on row count and memory usage.
261
+ Batch data into iterators with dynamic sizing based on row count and memory usage.
265
262
  """
266
- yield from (iter(batch) for batch in _dynamic_batched_core(iterable, batch_rows))
263
+ yield from (iter(batch) for batch in _dynamic_batched_core(iterable, batch_size))
267
264
 
268
265
 
269
266
  def flatten(items):
@@ -302,9 +299,9 @@ def retry_with_backoff(retries=5, backoff_sec=1, errors=(Exception,)):
302
299
 
303
300
 
304
301
  def determine_workers(
305
- workers: Union[bool, int],
306
- rows_total: Optional[int] = None,
307
- ) -> Union[bool, int]:
302
+ workers: bool | int,
303
+ rows_total: int | None = None,
304
+ ) -> bool | int:
308
305
  """Determine the number of workers to use for distributed processing."""
309
306
  if rows_total is not None and rows_total <= 1:
310
307
  # Disable distributed processing if there is no rows or only one row.
@@ -323,9 +320,9 @@ def determine_workers(
323
320
 
324
321
 
325
322
  def determine_processes(
326
- parallel: Optional[Union[bool, int]] = None,
327
- rows_total: Optional[int] = None,
328
- ) -> Union[bool, int]:
323
+ parallel: bool | int | None = None,
324
+ rows_total: int | None = None,
325
+ ) -> bool | int:
329
326
  """Determine the number of processes to use for parallel processing."""
330
327
  if rows_total is not None and rows_total <= 1:
331
328
  # Disable parallel processing if there is no rows or only one row.
@@ -345,8 +342,8 @@ def determine_processes(
345
342
 
346
343
 
347
344
  def get_env_list(
348
- key: str, default: Optional[Sequence] = None, sep: str = ","
349
- ) -> Optional[Sequence[str]]:
345
+ key: str, default: Sequence | None = None, sep: str = ","
346
+ ) -> Sequence[str] | None:
350
347
  try:
351
348
  str_val = os.environ[key]
352
349
  except KeyError:
@@ -387,10 +384,10 @@ def show_df(
387
384
 
388
385
 
389
386
  def show_records(
390
- records: Optional[list[dict]],
387
+ records: list[dict] | None,
391
388
  collapse_columns: bool = False,
392
389
  system_columns: bool = False,
393
- hidden_fields: Optional[list[str]] = None,
390
+ hidden_fields: list[str] | None = None,
394
391
  ) -> None:
395
392
  import pandas as pd
396
393
 
@@ -403,18 +400,6 @@ def show_records(
403
400
  return show_df(df, collapse_columns=collapse_columns, system_columns=system_columns)
404
401
 
405
402
 
406
- class JSONSerialize(json.JSONEncoder):
407
- def default(self, obj):
408
- if isinstance(obj, bytes):
409
- return list(obj[:1024])
410
- if isinstance(obj, (datetime, date)):
411
- return obj.isoformat()
412
- if isinstance(obj, UUID):
413
- return str(obj)
414
-
415
- return super().default(obj)
416
-
417
-
418
403
  def inside_colab() -> bool:
419
404
  try:
420
405
  from google import colab # type: ignore[attr-defined] # noqa: F401
@@ -434,7 +419,7 @@ def inside_notebook() -> bool:
434
419
 
435
420
  if shell == "ZMQInteractiveShell":
436
421
  try:
437
- import IPython
422
+ import IPython # type: ignore[import-not-found]
438
423
 
439
424
  return IPython.__version__ >= "6.0.0"
440
425
  except ImportError:
@@ -519,7 +504,7 @@ def row_to_nested_dict(
519
504
  ) -> dict[str, Any]:
520
505
  """Converts a row to a nested dict based on the provided headers."""
521
506
  result: dict[str, Any] = {}
522
- for h, v in zip(headers, row):
507
+ for h, v in zip(headers, row, strict=False):
523
508
  nested_dict_path_set(result, h, v)
524
509
  return result
525
510
 
@@ -530,7 +515,7 @@ def safe_closing(thing: T) -> Iterator[T]:
530
515
  yield thing
531
516
  finally:
532
517
  if hasattr(thing, "close"):
533
- thing.close()
518
+ thing.close() # type: ignore[attr-defined]
534
519
 
535
520
 
536
521
  def getenv_bool(name: str, default: bool = False) -> bool:
@@ -538,3 +523,9 @@ def getenv_bool(name: str, default: bool = False) -> bool:
538
523
  if val is None:
539
524
  return default
540
525
  return val.lower() in ("1", "true", "yes", "on")
526
+
527
+
528
+ def ensure_sequence(x) -> Sequence:
529
+ if isinstance(x, Sequence) and not isinstance(x, (str, bytes)):
530
+ return x
531
+ return [x]
@@ -1,20 +1,19 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.30.5
3
+ Version: 0.39.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
7
7
  Project-URL: Documentation, https://datachain.dvc.ai
8
- Project-URL: Issues, https://github.com/iterative/datachain/issues
9
- Project-URL: Source, https://github.com/iterative/datachain
8
+ Project-URL: Issues, https://github.com/datachain-ai/datachain/issues
9
+ Project-URL: Source, https://github.com/datachain-ai/datachain
10
10
  Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.9
12
11
  Classifier: Programming Language :: Python :: 3.10
13
12
  Classifier: Programming Language :: Python :: 3.11
14
13
  Classifier: Programming Language :: Python :: 3.12
15
14
  Classifier: Programming Language :: Python :: 3.13
16
15
  Classifier: Development Status :: 2 - Pre-Alpha
17
- Requires-Python: >=3.9
16
+ Requires-Python: >=3.10
18
17
  Description-Content-Type: text/x-rst
19
18
  License-File: LICENSE
20
19
  Requires-Dist: pyyaml
@@ -42,7 +41,7 @@ Requires-Dist: cloudpickle
42
41
  Requires-Dist: pydantic
43
42
  Requires-Dist: jmespath>=1.0
44
43
  Requires-Dist: datamodel-code-generator>=0.25
45
- Requires-Dist: Pillow<12,>=10.0.0
44
+ Requires-Dist: Pillow<13,>=10.0.0
46
45
  Requires-Dist: msgpack<2,>=1.0.4
47
46
  Requires-Dist: psutil
48
47
  Requires-Dist: huggingface_hub
@@ -56,16 +55,15 @@ Provides-Extra: docs
56
55
  Requires-Dist: mkdocs>=1.5.2; extra == "docs"
57
56
  Requires-Dist: mkdocs-gen-files>=0.5.0; extra == "docs"
58
57
  Requires-Dist: mkdocs-material==9.5.22; extra == "docs"
59
- Requires-Dist: mkdocs-section-index>=0.3.6; extra == "docs"
60
58
  Requires-Dist: mkdocstrings-python>=1.6.3; extra == "docs"
61
59
  Requires-Dist: mkdocs-literate-nav>=0.6.1; extra == "docs"
60
+ Requires-Dist: mkdocs-section-index>=0.3.10; extra == "docs"
62
61
  Requires-Dist: eval-type-backport; extra == "docs"
63
62
  Provides-Extra: torch
64
63
  Requires-Dist: torch>=2.1.0; extra == "torch"
65
64
  Requires-Dist: torchvision; extra == "torch"
66
65
  Requires-Dist: transformers>=4.36.0; extra == "torch"
67
66
  Provides-Extra: audio
68
- Requires-Dist: torchaudio; extra == "audio"
69
67
  Requires-Dist: soundfile; extra == "audio"
70
68
  Provides-Extra: remote
71
69
  Requires-Dist: lz4; extra == "remote"
@@ -85,7 +83,8 @@ Provides-Extra: postgres
85
83
  Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
86
84
  Provides-Extra: tests
87
85
  Requires-Dist: datachain[audio,hf,postgres,remote,torch,vector,video]; extra == "tests"
88
- Requires-Dist: pytest<9,>=8; extra == "tests"
86
+ Requires-Dist: pytest<10,>=8; extra == "tests"
87
+ Requires-Dist: pytest-asyncio; extra == "tests"
89
88
  Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
90
89
  Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
91
90
  Requires-Dist: pytest-mock>=3.12.0; extra == "tests"
@@ -102,7 +101,7 @@ Requires-Dist: scipy; extra == "tests"
102
101
  Requires-Dist: ultralytics; extra == "tests"
103
102
  Provides-Extra: dev
104
103
  Requires-Dist: datachain[docs,tests]; extra == "dev"
105
- Requires-Dist: mypy==1.17.0; extra == "dev"
104
+ Requires-Dist: mypy==1.19.0; extra == "dev"
106
105
  Requires-Dist: types-python-dateutil; extra == "dev"
107
106
  Requires-Dist: types-dateparser; extra == "dev"
108
107
  Requires-Dist: types-pytz; extra == "dev"
@@ -117,6 +116,7 @@ Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
117
116
  Requires-Dist: ultralytics; extra == "examples"
118
117
  Requires-Dist: open_clip_torch; extra == "examples"
119
118
  Requires-Dist: openai; extra == "examples"
119
+ Requires-Dist: torchaudio; extra == "examples"
120
120
  Dynamic: license-file
121
121
 
122
122
  ================
@@ -133,14 +133,14 @@ Dynamic: license-file
133
133
  .. |Python Version| image:: https://img.shields.io/pypi/pyversions/datachain
134
134
  :target: https://pypi.org/project/datachain
135
135
  :alt: Python Version
136
- .. |Codecov| image:: https://codecov.io/gh/iterative/datachain/graph/badge.svg?token=byliXGGyGB
137
- :target: https://codecov.io/gh/iterative/datachain
136
+ .. |Codecov| image:: https://codecov.io/gh/datachain-ai/datachain/graph/badge.svg?token=byliXGGyGB
137
+ :target: https://codecov.io/gh/datachain-ai/datachain
138
138
  :alt: Codecov
139
- .. |Tests| image:: https://github.com/iterative/datachain/actions/workflows/tests.yml/badge.svg
140
- :target: https://github.com/iterative/datachain/actions/workflows/tests.yml
139
+ .. |Tests| image:: https://github.com/datachain-ai/datachain/actions/workflows/tests.yml/badge.svg
140
+ :target: https://github.com/datachain-ai/datachain/actions/workflows/tests.yml
141
141
  :alt: Tests
142
142
  .. |DeepWiki| image:: https://deepwiki.com/badge.svg
143
- :target: https://deepwiki.com/iterative/datachain
143
+ :target: https://deepwiki.com/datachain-ai/datachain
144
144
  :alt: DeepWiki
145
145
 
146
146
  DataChain is a Python-based AI-data warehouse for transforming and analyzing unstructured
@@ -210,45 +210,33 @@ datasets that evolve over time and may occasionally have processing errors.
210
210
  .. code:: py
211
211
 
212
212
  import datachain as dc
213
- from datachain import C, File
214
213
 
215
- def process_file(file: File):
216
- """Process a file, which may occasionally fail."""
214
+ def process_file(file: dc.File) -> tuple[str, str, str]:
215
+ """Analyze a file, may occasionally fail."""
217
216
  try:
218
217
  # Your processing logic here
219
218
  content = file.read_text()
220
- result = analyze_content(content)
221
- return {
222
- "content": content,
223
- "result": result,
224
- "error": None # No error
225
- }
219
+ result = content.upper()
220
+ return content, result, "" # No error
226
221
  except Exception as e:
227
222
  # Return an error that will trigger reprocessing next time
228
- return {
229
- "content": None,
230
- "result": None,
231
- "error": str(e) # Error field will trigger retry
232
- }
223
+ return "", "", str(e) # Error field will trigger retry
233
224
 
234
225
  # Process files efficiently with delta and retry
226
+ # Run it many times, keep adding files, to see delta and retry in action
235
227
  chain = (
236
228
  dc.read_storage(
237
229
  "data/",
238
230
  update=True,
239
231
  delta=True, # Process only new/changed files
240
232
  delta_on="file.path", # Identify files by path
241
- retry_on="error" # Field that indicates errors
233
+ delta_retry="error", # Process files with error again
242
234
  )
243
- .map(processed_result=process_file)
244
- .mutate(
245
- content=C("processed_result.content"),
246
- result=C("processed_result.result"),
247
- error=C("processed_result.error")
248
- )
249
- .save(name="processed_data")
235
+ .map(process_file, output=("content", "result", "error"))
236
+ .save("processed-data")
250
237
  )
251
238
 
239
+
252
240
  Example: LLM based text-file evaluation
253
241
  ---------------------------------------
254
242
 
@@ -355,7 +343,7 @@ DataChain Studio Platform
355
343
  - **Access control** including SSO and team based collaboration.
356
344
 
357
345
  .. _PyPI: https://pypi.org/
358
- .. _file an issue: https://github.com/iterative/datachain/issues
346
+ .. _file an issue: https://github.com/datachain-ai/datachain/issues
359
347
  .. github-only
360
348
  .. _Contributor Guide: https://docs.datachain.ai/contributing
361
349
  .. _Pydantic: https://github.com/pydantic/pydantic
@@ -0,0 +1,173 @@
1
+ datachain/__init__.py,sha256=KVwlU1kC6qWTFQD1DxQs1M_4rh0GC9ZUG3FXJZmQjh4,1852
2
+ datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
+ datachain/asyn.py,sha256=3xSQkc2AMABDZcbDdrbJq6QxVWH3yIaY0pzCiXRqR0U,9634
4
+ datachain/cache.py,sha256=Klkc7iL_KvryeZk-UNjtByTFk7URbpb60XblalqHoYI,3604
5
+ datachain/checkpoint.py,sha256=AOMqN_2fNuEBJDAsmc-P4L7FU444eQxTU4MCgr-XEH8,1121
6
+ datachain/config.py,sha256=KPXef6P4NAZiEbSDMUcFwuNVTul2fZBs5xrCbyRl6Tg,4193
7
+ datachain/dataset.py,sha256=U3UAbElJEWND0_-aO8JPdNlAL-BDyOa6D_fVbvs5c1U,25978
8
+ datachain/delta.py,sha256=2c4XO-nKKYD1wMOW_zhEfUceAhH_8nISfnKVpZbScDg,12194
9
+ datachain/error.py,sha256=5Qkd55kl4NIG33iwOeY1Qa6SM8FLftIZ8vcf663r-9Y,1961
10
+ datachain/hash_utils.py,sha256=uU7xa-XQRC_8zmUxHg9h__Kts4TxW566311d1LRTz-Q,4510
11
+ datachain/job.py,sha256=mx4Cs-qX8DLyaie5JbZMSYYQSRspbQruM7L1s5kg1ws,1374
12
+ datachain/json.py,sha256=yK6ID17dP0vd0NIhSHAa3u2ym4N0jjtDEpBZ5PUuRlE,3673
13
+ datachain/listing.py,sha256=yIZYiCLVHQirGsXs52Ssph2j-woCIRnagqLJIRT1l7U,7406
14
+ datachain/namespace.py,sha256=YhxHdmCekWH4l-ZayNHGiPy5KAz_5LGqviivFYu0u9U,2337
15
+ datachain/node.py,sha256=gBLCoh-3nyaCDnMPt3gLS_t3m7qL0_JDiN02a9DH_kY,5552
16
+ datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
17
+ datachain/nodes_thread_pool.py,sha256=Fh4YZlwNzZMJxoUpuLGPKAnsvTGn8zy6xfrxBOeySfA,3971
18
+ datachain/plugins.py,sha256=QRpM-xcDAyGGOGAO6K3Hk4rGb78plgQ-k0iVWU8tn-I,693
19
+ datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
20
+ datachain/project.py,sha256=2_RAUmkDpbk5q5Iab9H6_k3S8Cju7dFRofk3OaaaQYo,2261
21
+ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ datachain/script_meta.py,sha256=vQOnPnqsNEDGtCHfy2DozcOdPwjGQ-dwDrrgHGpw90s,4935
23
+ datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
24
+ datachain/studio.py,sha256=seWeSACPVgUI23Z8UbgjeJQIZWVLu45aVG4p7kMDj5E,15611
25
+ datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
26
+ datachain/utils.py,sha256=qizzUaT3SOoO7cWeZImWAMG00sMx69_j6EmcaiqO_1s,15414
27
+ datachain/catalog/__init__.py,sha256=r5aZompzY1jKJGt9PmvMTrObx1rQDiG5PsRp_Da3yLs,299
28
+ datachain/catalog/catalog.py,sha256=pM1lXssrI4PdnGms6QJDVMGjBJrwZyKd5djskbndG54,64739
29
+ datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
30
+ datachain/catalog/dependency.py,sha256=EHuu_Ox76sEhy71NXjFJiHxQVTz19KecqBcrjwFCa7M,5280
31
+ datachain/catalog/loader.py,sha256=VTaGPc4ASNdUdr7Elobp8qcXUOHwd0oqQcnk3LUwtF0,6244
32
+ datachain/cli/__init__.py,sha256=DrJ9hABuo7ujKwoqt66wiH5vLNYFw_3yz6KJ9khXYdk,8133
33
+ datachain/cli/utils.py,sha256=v4__FknyILQejFE-GdqvBamOaXhfdeHxmJ9hBmEbwFE,2471
34
+ datachain/cli/commands/__init__.py,sha256=vH52z3H9wmfMJRiaFY-_Gk8tIzK3x6A_26oE79RROY8,434
35
+ datachain/cli/commands/datasets.py,sha256=S6sMuG9MvC2svQzfVZ9DKkLqMONWWJ1P5k1fywkrBr8,6915
36
+ datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
37
+ datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
38
+ datachain/cli/commands/ls.py,sha256=0QyE1dSUES4EmhG_qcisfdDdr8m2G6i5nmQS7sjGeVU,5293
39
+ datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
40
+ datachain/cli/commands/show.py,sha256=zkgBEVW04j41ewNnyD0nUY-NcCBANoEpQ8dpIrKof5U,1593
41
+ datachain/cli/parser/__init__.py,sha256=ub3Y7AM0stEKmW3QZPcXhO_Pefy98GEMAgKjuAcwmt0,14019
42
+ datachain/cli/parser/job.py,sha256=f6IQrFUyXCSseOYxlDFPs414MsJszA8YWxEYAv530r0,6105
43
+ datachain/cli/parser/studio.py,sha256=Bo__LKM7qhJGgkyX8M_bCvgZ2Gvqq6r_X4t1NdtaBIY,3881
44
+ datachain/cli/parser/utils.py,sha256=WbuKIDTTAPItgbMoxyz-DBInHKxOkNfgW7L2V0hcBxs,2857
45
+ datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
46
+ datachain/client/azure.py,sha256=cknEJaVrSGQNApdcDKrq9HykJJWyzEPetfs_sYZuahE,3257
47
+ datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
48
+ datachain/client/fsspec.py,sha256=MZ1vmCIUIIxFCI9mlp_Fbd0KuqTL49vf-qQ5OMj5mFg,14603
49
+ datachain/client/gcs.py,sha256=Qy2mqYJnZCbTR80D2ZLygLjaVkmzjkYJ7L3LeV3449M,5188
50
+ datachain/client/hf.py,sha256=n5xJZdvNLS-SqokxuBCIPfGbhIeC_XfLm_BNYtEVvg4,2677
51
+ datachain/client/http.py,sha256=ebeXD0xy73VFmxuAr9Dk79KpyvCk2TL8Qx-3JTK02rY,5152
52
+ datachain/client/local.py,sha256=slWi7dB77A8XsNcwDFMyKzaDyUONwaCVwRAiMkLpBV8,4739
53
+ datachain/client/s3.py,sha256=KS9o0jxXJRFp7Isdibz366VaWrULmpegzfYdurJpAl0,7499
54
+ datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
55
+ datachain/data_storage/db_engine.py,sha256=-XHNnmLtg8_oqmlYB1aQ2swVB7G5P3vllNPmH0mWQCQ,3831
56
+ datachain/data_storage/job.py,sha256=KTNKarEq1p7sXEsfV3oeQHXWVy6stv4wmD3qv4nF4F4,465
57
+ datachain/data_storage/metastore.py,sha256=kYSqEx1ZvcnqyGqLOsykD0owjiCsvCuGyD14dkYwz10,74006
58
+ datachain/data_storage/schema.py,sha256=3fAgiE11TIDYCW7EbTdiOm61SErRitvsLr7YPnUlVm0,9801
59
+ datachain/data_storage/serializer.py,sha256=lhWm_N1F97S8wJW647sJcdOZQSX25XVERL5xcw-XrWI,3877
60
+ datachain/data_storage/sqlite.py,sha256=qFmf0YQKNsPmlkD1wiVYIo5n5QodksjzQZsx0gJgw28,30107
61
+ datachain/data_storage/warehouse.py,sha256=AI281_7_VEkP7lgh5_vplow8ue_cB6XquoaSOuj8Rtc,35972
62
+ datachain/diff/__init__.py,sha256=lGrygGzdWSSYJ1DgX4h2q_ko5QINEW8PKfxOwE9ZFnI,9394
63
+ datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
+ datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
65
+ datachain/fs/utils.py,sha256=s-FkTOCGBk-b6TT3toQH51s9608pofoFjUSTc1yy7oE,825
66
+ datachain/func/__init__.py,sha256=9K2MEC1NclY_zWuqevfEUOcrSE26cXDVnGqhNTj4lF8,1288
67
+ datachain/func/aggregate.py,sha256=jtyzePQ_vRDZuEeLM7YZw5PzekOU86c4HDR5qeIWLwY,12363
68
+ datachain/func/array.py,sha256=EpGSeBnnwmX16XqL8v7aqkJ8PAqkMZKLcnA0Auke0VI,13565
69
+ datachain/func/base.py,sha256=ykhGUBlMGWSNN1fRDlPSyDJVf8DV7k_r_tyNVm_o7ZY,590
70
+ datachain/func/conditional.py,sha256=vpBLKqwIA0OKrAClvQhnN6NiplCitXMOAim-enyrjUk,10076
71
+ datachain/func/func.py,sha256=Au3B51AiJSh7OC2LANNBv220L6vhYu1CTgrEz0Xxkqg,18092
72
+ datachain/func/numeric.py,sha256=L2fq6DgZLTEsqVPrPIf4b84WRxidZvQcBZLMd-kOBPo,6932
73
+ datachain/func/path.py,sha256=9Jas35QhEtRai4l54hMqVvuJsqxHvOx88oo4vym1H_I,4077
74
+ datachain/func/random.py,sha256=t7jwXsI8-hy0qAdvjAntgzy-AHtTAfozlZ1CpKR-QZE,458
75
+ datachain/func/string.py,sha256=kXkPHimtA__EVg_Th1yldGaLJpw4HYVhIeYtKy3DuyQ,7406
76
+ datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
77
+ datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
+ datachain/lib/arrow.py,sha256=EoGpA9Lqv5xUmc5P0SQYfjBQ-wSNHN16PvkZ6exMcsI,10821
79
+ datachain/lib/audio.py,sha256=hHG29vqrV389im152wCjh80d0xqXGGvFnUpUwkzZejQ,7385
80
+ datachain/lib/clip.py,sha256=nF8-N6Uz0MbAsPJBY2iXEYa3DPLo80OOer5SRNAtcGM,6149
81
+ datachain/lib/data_model.py,sha256=srz0pfFohSXwFnt5OMi1fNjSbKkFq8vzkcO0n4PHxlQ,3904
82
+ datachain/lib/dataset_info.py,sha256=Yl11SqW47Uf-WkuB9zTVUS17ALGTQiWkvxzKLN2GWQ4,3288
83
+ datachain/lib/file.py,sha256=-iLKN2lnn4fHxDGk91b4FDjTgQFUvk3KCX-ZgxvKh-k,49277
84
+ datachain/lib/hf.py,sha256=jmyqRDXdksojUJCiU_2XFSIoMzzDJAZQs9xr-sEwEJc,7281
85
+ datachain/lib/image.py,sha256=xKyVsFKi1Shji7oluvd4Ibr3Atiz-Q0MNJhIsXeGcMI,3197
86
+ datachain/lib/listing.py,sha256=pXRzHCUxX0b1sZrFWPN77bHY69Hrn6rFwr5IzSxuhvI,7060
87
+ datachain/lib/listing_info.py,sha256=lnl5oQpkt7kVHyb06TdWhy5Fi2A0gBVIbnECgDtvLec,1079
88
+ datachain/lib/meta_formats.py,sha256=HO8WxPeQLdVSNh2sOy0XSc66yRhBjFOjNgkzIVFIFBk,6345
89
+ datachain/lib/model_store.py,sha256=GQoqNTPNwizX1hB4peKWs-h0VovJxX8dt1MdnTPI7bQ,3713
90
+ datachain/lib/namespaces.py,sha256=d4Zt2mYdGFctkA20SkB1woUxrNI4JwSxruxUGKwfauc,3731
91
+ datachain/lib/projects.py,sha256=FfBfGoWvy1SccCQW2ITKdDA6V03FbnRCusOeHdPHr6Y,4059
92
+ datachain/lib/pytorch.py,sha256=Ux_Gpl0dUJuME7d8qJSpgVQL0kQv6-lnq22KXQCy1NE,8069
93
+ datachain/lib/settings.py,sha256=maMtywOUetJvEApDiMVfTTq-oaRNvUIfDCrqZwFL2GE,7559
94
+ datachain/lib/signal_schema.py,sha256=Pi5HMYisCio4dq3zmKn1ijDf6-9vHvZONVfa9kVWSu0,47623
95
+ datachain/lib/tar.py,sha256=MLcVjzIgBqRuJacCNpZ6kwSZNq1i2tLyROc8PVprHsA,999
96
+ datachain/lib/text.py,sha256=uZom8qXfrv9QYvuDrvd0PuvPmj6qCsjVUwZSNr60BI4,1242
97
+ datachain/lib/udf.py,sha256=CUodkCS3GCndDSjFwRh4m9zB9SsaPpOHk_zVaxJdm28,19356
98
+ datachain/lib/udf_signature.py,sha256=C3J0E_01elO81swP7Sy6Ne5P__BbHTfWc_ena-CCcmQ,8563
99
+ datachain/lib/utils.py,sha256=506ULwIQYwT1DWPyI3WwWS10qQ-qgUf39GDDoVcT2G0,5220
100
+ datachain/lib/video.py,sha256=7Q4oWvf4_HMX2QrkjZwTE0e8mZpoQlBxOxV9k2ubRag,6804
101
+ datachain/lib/webdataset.py,sha256=GKq_R9wDIH8Ckn9Hh-ipDu9X6eIsCPvcR95nCqSZ9yA,7315
102
+ datachain/lib/webdataset_laion.py,sha256=3vMWW_MrMvTA4FvwJVRHHL-Hxf9bq0wmLzefXyNW2bM,2486
103
+ datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
+ datachain/lib/convert/flatten.py,sha256=_5rjGFnN6t1KCX5ftL5rG7tiiNat7j0SdNqajO15KUY,1539
105
+ datachain/lib/convert/python_to_sql.py,sha256=wfnqJ2vRL5UydNPQHshd82hUONsDBa4XyobCSTGqcEo,3187
106
+ datachain/lib/convert/sql_to_python.py,sha256=Gxc4FylWC_Pvvuawuc2MKZIiuAWI7wje8pyeN1MxRrU,670
107
+ datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sDcLS6s,2010
108
+ datachain/lib/convert/values_to_tuples.py,sha256=25VmP7b9Fa3BPAlSFFo54O9FAgEAqicmE44R4pPsLcM,7313
109
+ datachain/lib/dc/__init__.py,sha256=xguTjbh3iMDTUjfggCDGYZNA0_1iOVvkc-53f6cUpXc,926
110
+ datachain/lib/dc/csv.py,sha256=fIfj5-2Ix4z5D5yZueagd5WUWw86pusJ9JJKD-U3KGg,4407
111
+ datachain/lib/dc/database.py,sha256=RNXpTUdEyBAtlMJIesVfEzZee5peTDPszZPwFQtqEYA,14817
112
+ datachain/lib/dc/datachain.py,sha256=iYScoUXH1QeqffcoLBtj_RnZwcw9MSH-qF7IogK0b3g,107239
113
+ datachain/lib/dc/datasets.py,sha256=oY1t8QBAaZdhjwR439zZT74hMOspewVCrgdwy6juXng,15321
114
+ datachain/lib/dc/hf.py,sha256=FeruEO176L2qQ1Mnx0QmK4kV0GuQ4xtj717N8fGJrBI,2849
115
+ datachain/lib/dc/json.py,sha256=iJ6G0jwTKz8xtfh1eICShnWk_bAMWjF5bFnOXLHaTlw,2683
116
+ datachain/lib/dc/listings.py,sha256=0XTZERQZ2ErP3LSVg9lF9i3alKebqA1Kip2Zf15unUM,4507
117
+ datachain/lib/dc/pandas.py,sha256=o9rTcZf27-3mCEaDdX1ZzM0I4bSOsu-4mA2zK6rWoS4,1460
118
+ datachain/lib/dc/parquet.py,sha256=wa_VazXotY5RZ8ypC0_M9Qo30tamzXmYeVE6P-NcQ1Y,2375
119
+ datachain/lib/dc/records.py,sha256=WvbaLhMqM9e54gJLLeG54QX5ZXkkBIK3FokojLTSbZc,2974
120
+ datachain/lib/dc/storage.py,sha256=zfVMkYqwmhI4bnOqyO6bW5gg_DfdYPM7ltWLTHDjGZo,9737
121
+ datachain/lib/dc/storage_pattern.py,sha256=TqaDb5yq050W9IxpESz9iotjs0R__i5ngRtVo5BmJ-8,7645
122
+ datachain/lib/dc/utils.py,sha256=Ya2rqVj0UHzY_gxdZrIKnmoPZaKgLs397Y1o6M6kGQE,4273
123
+ datachain/lib/dc/values.py,sha256=-EI3xYUNzfwzogbW8WdHX0XbWev-je6_5-CnDsLRcF4,1399
124
+ datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
125
+ datachain/model/bbox.py,sha256=7RT-xsKL8Rywy7l_R9DhzsTF4eQx35VCNdbaQEFDcVc,9362
126
+ datachain/model/pose.py,sha256=rjquA6M-I-Y30Xm6YSkGv1OY52hJZmR2AuxbIpE5uD0,3865
127
+ datachain/model/segment.py,sha256=NhcEYB_KVa0aLQYiZ4jEwkylH9QBLd8fZhmg6PVnx1Y,1967
128
+ datachain/model/utils.py,sha256=5elwCKleOO6CZM0IuWjFykPekrhc5m7V4jSIOcgGMms,6733
129
+ datachain/model/ultralytics/__init__.py,sha256=EvcNX9qUyxKXXlKCPpsXeRrabyXk5E9EkN-tyiYkfS4,750
130
+ datachain/model/ultralytics/bbox.py,sha256=C-aDiBhVa_ML2oERWvksRkyMU1XuYSpb6eItHB5q0qc,4764
131
+ datachain/model/ultralytics/pose.py,sha256=pvoXrWWUSWT_UBaMwUb5MBHAY57Co2HFDPigFYNZWUA,3392
132
+ datachain/model/ultralytics/segment.py,sha256=v9_xDxd5zw_I8rXsbl7yQXgEdTs2T38zyY_Y4XGN8ok,3194
133
+ datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
134
+ datachain/query/batch.py,sha256=ugTlSFqh_kxMcG6vJ5XrEzG9jBXRdb7KRAEEsFWiPew,4190
135
+ datachain/query/dataset.py,sha256=CxHOFJwMhlmZUHhMKVw22HAuMOgORiQJ-25qJWCjk7g,69548
136
+ datachain/query/dispatch.py,sha256=VOq0Lzp-Dmy4dkVIeqL5ekYIe5QqcN4o0_xtPJGI-X4,16907
137
+ datachain/query/metrics.py,sha256=qOMHiYPTMtVs2zI-mUSy8OPAVwrg4oJtVF85B9tdQyM,810
138
+ datachain/query/params.py,sha256=JkVz6IKUIpF58JZRkUXFT8DAHX2yfaULbhVaGmHKFLc,826
139
+ datachain/query/queue.py,sha256=kCetMG6y7_ynV_jJDAXkLsf8WsVZCEk1fAuQGd7yTOo,3543
140
+ datachain/query/schema.py,sha256=Cn1keXjktptAbEDbHlxSzdoCu5H6h_Vzp_DtNpMSr5w,6697
141
+ datachain/query/session.py,sha256=IN0ruNMxEU5K5xp_cL4-zMOxeYD0EnCAUunHa0zNdJ4,12793
142
+ datachain/query/udf.py,sha256=SsVHb_TksVygWsqKX7SX8SOnjOyyd8n5NPF7i-EKb2Q,1364
143
+ datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
144
+ datachain/remote/studio.py,sha256=4voPFVDXAU6BSBHDAvB_LTYiCACA6Zr0IfYnDjrnN6s,16737
145
+ datachain/sql/__init__.py,sha256=8D2omsBiATt8bjLjGo6jBEtaKEkOlnlNFWhVryHMDv0,388
146
+ datachain/sql/postgresql_dialect.py,sha256=pDTfH8xaXz5xZsq8O1aQUvWLRIv_ogYeAqtmKlPp3Rw,280
147
+ datachain/sql/postgresql_types.py,sha256=ryb_0lzuA9UOJ_B6nW9Yb8nJjzeSmEItAL_Ceue65lc,627
148
+ datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
149
+ datachain/sql/types.py,sha256=u0WNBjZafuG54lHQUtSSUOd14vHASQ1Nk3bKYjgPpJs,16152
150
+ datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
151
+ datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
152
+ datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
153
+ datachain/sql/functions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
154
+ datachain/sql/functions/aggregate.py,sha256=3AQdA8YHPFdtCEfwZKQXTT8SlQWdG9gD5PBtGN3Odqs,944
155
+ datachain/sql/functions/array.py,sha256=eRWpDRItwIG87-AU7jb8WuiR-MGuhklVxWwR7t97GvY,2050
156
+ datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
157
+ datachain/sql/functions/numeric.py,sha256=BK2KCiPSgM2IveCq-9M_PG3CtPBlztaS9TTn1LGzyLs,1250
158
+ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
159
+ datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
160
+ datachain/sql/functions/string.py,sha256=E-T9OIzUR-GKaLgjZsEtg5CJrY_sLf1lt1awTvY7w2w,1426
161
+ datachain/sql/sqlite/__init__.py,sha256=PsLaDSij9a03VxGSpagpNl7NQsGtgm72ArUeALZONoc,183
162
+ datachain/sql/sqlite/base.py,sha256=llTx_wzuXo7pCM95eq8xvZ1iFOeKlcN7WVyJyHlBrW8,21840
163
+ datachain/sql/sqlite/types.py,sha256=EDmvEJz8oBQnv0r_ST-M1jQVPC39ceAC5pgzSB9vnkg,1821
164
+ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
165
+ datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
166
+ datachain/toolkit/split.py,sha256=9HHZl0fGs5Zj8b9l2L3IKf0AiiVNL9SnWbc2rfDiXRA,3710
167
+ datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
168
+ datachain-0.39.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
169
+ datachain-0.39.0.dist-info/METADATA,sha256=0rSXP0DTd51AK74TiFsQL1ftNgY6huoeHUu8IvqWEHI,13635
170
+ datachain-0.39.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
171
+ datachain-0.39.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
172
+ datachain-0.39.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
173
+ datachain-0.39.0.dist-info/RECORD,,
@@ -1,54 +0,0 @@
1
- import os
2
- import sys
3
- import traceback
4
- from typing import TYPE_CHECKING, Optional
5
-
6
- if TYPE_CHECKING:
7
- from datachain.catalog import Catalog
8
-
9
-
10
- def query(
11
- catalog: "Catalog",
12
- script: str,
13
- parallel: Optional[int] = None,
14
- params: Optional[dict[str, str]] = None,
15
- ) -> None:
16
- from datachain.data_storage import JobQueryType, JobStatus
17
-
18
- with open(script, encoding="utf-8") as f:
19
- script_content = f.read()
20
-
21
- if parallel is not None:
22
- # This also sets this environment variable for any subprocesses
23
- os.environ["DATACHAIN_SETTINGS_PARALLEL"] = str(parallel)
24
-
25
- python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
26
- python_executable = sys.executable
27
-
28
- job_id = catalog.metastore.create_job(
29
- name=os.path.basename(script),
30
- query=script_content,
31
- query_type=JobQueryType.PYTHON,
32
- status=JobStatus.RUNNING,
33
- python_version=python_version,
34
- params=params,
35
- )
36
-
37
- try:
38
- catalog.query(
39
- script_content,
40
- python_executable=python_executable,
41
- params=params,
42
- job_id=job_id,
43
- )
44
- except Exception as e:
45
- error_message = str(e)
46
- error_stack = traceback.format_exc()
47
- catalog.metastore.set_job_status(
48
- job_id,
49
- JobStatus.FAILED,
50
- error_message=error_message,
51
- error_stack=error_stack,
52
- )
53
- raise
54
- catalog.metastore.set_job_status(job_id, JobStatus.COMPLETE)
datachain/query/utils.py DELETED
@@ -1,36 +0,0 @@
1
- from typing import Optional, Union
2
-
3
- import sqlalchemy as sa
4
-
5
- ColT = Union[sa.Column, sa.ColumnElement, sa.TextClause, sa.Label]
6
-
7
-
8
- def column_name(col: ColT) -> str:
9
- """Returns column name from column element."""
10
- return col.name if isinstance(col, (sa.Column, sa.Label)) else str(col)
11
-
12
-
13
- def get_query_column(query: sa.Select, name: str) -> Optional[ColT]:
14
- """Returns column element from query by name or None if column not found."""
15
- return next((col for col in query.inner_columns if column_name(col) == name), None)
16
-
17
-
18
- def get_query_id_column(query: sa.Select) -> Optional[sa.ColumnElement]:
19
- """Returns ID column element from query or None if column not found."""
20
- col = get_query_column(query, "sys__id")
21
- return col if col is not None and isinstance(col, sa.ColumnElement) else None
22
-
23
-
24
- def select_only_columns(query: sa.Select, *names: str) -> sa.Select:
25
- """Returns query selecting defined columns only."""
26
- if not names:
27
- return query
28
-
29
- cols: list[ColT] = []
30
- for name in names:
31
- col = get_query_column(query, name)
32
- if col is None:
33
- raise ValueError(f"Column '{name}' not found in query")
34
- cols.append(col)
35
-
36
- return query.with_only_columns(*cols)