datachain 0.14.1__py3-none-any.whl → 0.14.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/__init__.py CHANGED
@@ -5,16 +5,16 @@ from datachain.lib.dc import (
5
5
  DataChain,
6
6
  Sys,
7
7
  datasets,
8
- from_csv,
9
- from_dataset,
10
- from_hf,
11
- from_json,
12
- from_pandas,
13
- from_parquet,
14
- from_records,
15
- from_storage,
16
- from_values,
17
8
  listings,
9
+ read_csv,
10
+ read_dataset,
11
+ read_hf,
12
+ read_json,
13
+ read_pandas,
14
+ read_parquet,
15
+ read_records,
16
+ read_storage,
17
+ read_values,
18
18
  )
19
19
  from datachain.lib.file import (
20
20
  ArrowRow,
@@ -61,17 +61,17 @@ __all__ = [
61
61
  "VideoFragment",
62
62
  "VideoFrame",
63
63
  "datasets",
64
- "from_csv",
65
- "from_dataset",
66
- "from_hf",
67
- "from_json",
68
- "from_pandas",
69
- "from_parquet",
70
- "from_records",
71
- "from_storage",
72
- "from_values",
73
64
  "is_chain_type",
74
65
  "listings",
75
66
  "metrics",
76
67
  "param",
68
+ "read_csv",
69
+ "read_dataset",
70
+ "read_hf",
71
+ "read_json",
72
+ "read_pandas",
73
+ "read_parquet",
74
+ "read_records",
75
+ "read_storage",
76
+ "read_values",
77
77
  ]
@@ -583,10 +583,10 @@ class Catalog:
583
583
  object_name="file",
584
584
  skip_indexing=False,
585
585
  ) -> tuple[Optional["Listing"], "Client", str]:
586
- from datachain import from_storage
586
+ from datachain import read_storage
587
587
  from datachain.listing import Listing
588
588
 
589
- from_storage(
589
+ read_storage(
590
590
  source, session=self.session, update=update, object_name=object_name
591
591
  ).exec()
592
592
 
@@ -994,14 +994,14 @@ class Catalog:
994
994
  if not sources:
995
995
  raise ValueError("Sources needs to be non empty list")
996
996
 
997
- from datachain import from_dataset, from_storage
997
+ from datachain import read_dataset, read_storage
998
998
 
999
999
  chains = []
1000
1000
  for source in sources:
1001
1001
  if source.startswith(DATASET_PREFIX):
1002
- dc = from_dataset(source[len(DATASET_PREFIX) :], session=self.session)
1002
+ dc = read_dataset(source[len(DATASET_PREFIX) :], session=self.session)
1003
1003
  else:
1004
- dc = from_storage(source, session=self.session, recursive=recursive)
1004
+ dc = read_storage(source, session=self.session, recursive=recursive)
1005
1005
 
1006
1006
  chains.append(dc)
1007
1007
 
@@ -18,7 +18,7 @@ def show(
18
18
  schema: bool = False,
19
19
  include_hidden: bool = False,
20
20
  ) -> None:
21
- from datachain import Session, from_dataset
21
+ from datachain import Session, read_dataset
22
22
  from datachain.query.dataset import DatasetQuery
23
23
  from datachain.utils import show_records
24
24
 
@@ -51,5 +51,5 @@ def show(
51
51
  if schema and dataset_version.feature_schema:
52
52
  print("\nSchema:")
53
53
  session = Session.get(catalog=catalog)
54
- dc = from_dataset(name=name, version=version, session=session)
54
+ dc = read_dataset(name=name, version=version, session=session)
55
55
  dc.print_schema()
@@ -1,15 +1,15 @@
1
- from .csv import from_csv
1
+ from .csv import read_csv
2
2
  from .datachain import C, Column, DataChain
3
- from .datasets import datasets, from_dataset
4
- from .hf import from_hf
5
- from .json import from_json
3
+ from .datasets import datasets, read_dataset
4
+ from .hf import read_hf
5
+ from .json import read_json
6
6
  from .listings import listings
7
- from .pandas import from_pandas
8
- from .parquet import from_parquet
9
- from .records import from_records
10
- from .storage import from_storage
7
+ from .pandas import read_pandas
8
+ from .parquet import read_parquet
9
+ from .records import read_records
10
+ from .storage import read_storage
11
11
  from .utils import DatasetMergeError, DatasetPrepareError, Sys
12
- from .values import from_values
12
+ from .values import read_values
13
13
 
14
14
  __all__ = [
15
15
  "C",
@@ -19,14 +19,14 @@ __all__ = [
19
19
  "DatasetPrepareError",
20
20
  "Sys",
21
21
  "datasets",
22
- "from_csv",
23
- "from_dataset",
24
- "from_hf",
25
- "from_json",
26
- "from_pandas",
27
- "from_parquet",
28
- "from_records",
29
- "from_storage",
30
- "from_values",
31
22
  "listings",
23
+ "read_csv",
24
+ "read_dataset",
25
+ "read_hf",
26
+ "read_json",
27
+ "read_pandas",
28
+ "read_parquet",
29
+ "read_records",
30
+ "read_storage",
31
+ "read_values",
32
32
  ]
datachain/lib/dc/csv.py CHANGED
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
16
16
  from .datachain import DataChain
17
17
 
18
18
 
19
- def from_csv(
19
+ def read_csv(
20
20
  path,
21
21
  delimiter: Optional[str] = None,
22
22
  header: bool = True,
@@ -58,13 +58,13 @@ def from_csv(
58
58
  Reading a csv file:
59
59
  ```py
60
60
  import datachain as dc
61
- chain = dc.from_csv("s3://mybucket/file.csv")
61
+ chain = dc.read_csv("s3://mybucket/file.csv")
62
62
  ```
63
63
 
64
64
  Reading csv files from a directory as a combined dataset:
65
65
  ```py
66
66
  import datachain as dc
67
- chain = dc.from_csv("s3://mybucket/dir")
67
+ chain = dc.read_csv("s3://mybucket/dir")
68
68
  ```
69
69
  """
70
70
  from pandas.io.parsers.readers import STR_NA_VALUES
@@ -72,7 +72,7 @@ def from_csv(
72
72
  from pyarrow.dataset import CsvFileFormat
73
73
  from pyarrow.lib import type_for_alias
74
74
 
75
- from .storage import from_storage
75
+ from .storage import read_storage
76
76
 
77
77
  parse_options = parse_options or {}
78
78
  if "delimiter" not in parse_options:
@@ -88,7 +88,7 @@ def from_csv(
88
88
  else:
89
89
  column_types = {}
90
90
 
91
- chain = from_storage(path, session=session, settings=settings, **kwargs)
91
+ chain = read_storage(path, session=session, settings=settings, **kwargs)
92
92
 
93
93
  column_names = None
94
94
  if not header:
@@ -84,22 +84,22 @@ class DataChain:
84
84
  underlyind library `Pydantic`.
85
85
 
86
86
  See Also:
87
- `from_storage("s3://my-bucket/my-dir/")` - reading unstructured
87
+ `read_storage("s3://my-bucket/my-dir/")` - reading unstructured
88
88
  data files from storages such as S3, gs or Azure ADLS.
89
89
 
90
90
  `DataChain.save("name")` - saving to a dataset.
91
91
 
92
- `from_dataset("name")` - reading from a dataset.
92
+ `read_dataset("name")` - reading from a dataset.
93
93
 
94
- `from_values(fib=[1, 2, 3, 5, 8])` - generating from values.
94
+ `read_values(fib=[1, 2, 3, 5, 8])` - generating from values.
95
95
 
96
- `from_pandas(pd.DataFrame(...))` - generating from pandas.
96
+ `read_pandas(pd.DataFrame(...))` - generating from pandas.
97
97
 
98
- `from_json("file.json")` - generating from json.
98
+ `read_json("file.json")` - generating from json.
99
99
 
100
- `from_csv("file.csv")` - generating from csv.
100
+ `read_csv("file.csv")` - generating from csv.
101
101
 
102
- `from_parquet("file.parquet")` - generating from parquet.
102
+ `read_parquet("file.parquet")` - generating from parquet.
103
103
 
104
104
  Example:
105
105
  ```py
@@ -118,7 +118,7 @@ class DataChain:
118
118
  api_key = os.environ["MISTRAL_API_KEY"]
119
119
 
120
120
  chain = (
121
- dc.from_storage("gs://datachain-demo/chatbot-KiT/")
121
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/")
122
122
  .limit(5)
123
123
  .settings(cache=True, parallel=5)
124
124
  .map(
@@ -315,27 +315,27 @@ class DataChain:
315
315
  *args,
316
316
  **kwargs,
317
317
  ) -> "DataChain":
318
- from .storage import from_storage
318
+ from .storage import read_storage
319
319
 
320
320
  warnings.warn(
321
321
  "Class method `from_storage` is deprecated. "
322
- "Use `from_storage` function instead from top_module.",
322
+ "Use `read_storage` function instead from top_module.",
323
323
  DeprecationWarning,
324
324
  stacklevel=2,
325
325
  )
326
- return from_storage(*args, **kwargs)
326
+ return read_storage(*args, **kwargs)
327
327
 
328
328
  @classmethod
329
329
  def from_dataset(cls, *args, **kwargs) -> "DataChain":
330
- from .datasets import from_dataset
330
+ from .datasets import read_dataset
331
331
 
332
332
  warnings.warn(
333
333
  "Class method `from_dataset` is deprecated. "
334
- "Use `from_dataset` function instead from top_module.",
334
+ "Use `read_dataset` function instead from top_module.",
335
335
  DeprecationWarning,
336
336
  stacklevel=2,
337
337
  )
338
- return from_dataset(*args, **kwargs)
338
+ return read_dataset(*args, **kwargs)
339
339
 
340
340
  @classmethod
341
341
  def from_json(
@@ -343,15 +343,15 @@ class DataChain:
343
343
  *args,
344
344
  **kwargs,
345
345
  ) -> "DataChain":
346
- from .json import from_json
346
+ from .json import read_json
347
347
 
348
348
  warnings.warn(
349
349
  "Class method `from_json` is deprecated. "
350
- "Use `from_json` function instead from top_module.",
350
+ "Use `read_json` function instead from top_module.",
351
351
  DeprecationWarning,
352
352
  stacklevel=2,
353
353
  )
354
- return from_json(*args, **kwargs)
354
+ return read_json(*args, **kwargs)
355
355
 
356
356
  def explode(
357
357
  self,
@@ -487,7 +487,7 @@ class DataChain:
487
487
  )
488
488
 
489
489
  chain = (
490
- dc.from_storage("s3://my-bucket")
490
+ dc.read_storage("s3://my-bucket")
491
491
  .apply(parse_stem)
492
492
  .filter(C("stem").glob("*cat*"))
493
493
  )
@@ -727,7 +727,7 @@ class DataChain:
727
727
 
728
728
  Note:
729
729
  Order is not guaranteed when steps are added after an `order_by` statement.
730
- I.e. when using `from_dataset` an `order_by` statement should be used if
730
+ I.e. when using `read_dataset` an `order_by` statement should be used if
731
731
  the order of the records in the chain is important.
732
732
  Using `order_by` directly before `limit`, `collect` and `collect_flatten`
733
733
  will give expected results.
@@ -1466,15 +1466,15 @@ class DataChain:
1466
1466
  *args,
1467
1467
  **kwargs,
1468
1468
  ) -> "DataChain":
1469
- from .values import from_values
1469
+ from .values import read_values
1470
1470
 
1471
1471
  warnings.warn(
1472
1472
  "Class method `from_values` is deprecated. "
1473
- "Use `from_values` function instead from top_module.",
1473
+ "Use `read_values` function instead from top_module.",
1474
1474
  DeprecationWarning,
1475
1475
  stacklevel=2,
1476
1476
  )
1477
- return from_values(*args, **kwargs)
1477
+ return read_values(*args, **kwargs)
1478
1478
 
1479
1479
  @classmethod
1480
1480
  def from_pandas(
@@ -1482,15 +1482,15 @@ class DataChain:
1482
1482
  *args,
1483
1483
  **kwargs,
1484
1484
  ) -> "DataChain":
1485
- from .pandas import from_pandas
1485
+ from .pandas import read_pandas
1486
1486
 
1487
1487
  warnings.warn(
1488
1488
  "Class method `from_pandas` is deprecated. "
1489
- "Use `from_pandas` function instead from top_module.",
1489
+ "Use `read_pandas` function instead from top_module.",
1490
1490
  DeprecationWarning,
1491
1491
  stacklevel=2,
1492
1492
  )
1493
- return from_pandas(*args, **kwargs)
1493
+ return read_pandas(*args, **kwargs)
1494
1494
 
1495
1495
  def to_pandas(self, flatten=False, include_hidden=True) -> "pd.DataFrame":
1496
1496
  """Return a pandas DataFrame from the chain.
@@ -1575,15 +1575,15 @@ class DataChain:
1575
1575
  *args,
1576
1576
  **kwargs,
1577
1577
  ) -> "DataChain":
1578
- from .hf import from_hf
1578
+ from .hf import read_hf
1579
1579
 
1580
1580
  warnings.warn(
1581
1581
  "Class method `from_hf` is deprecated. "
1582
- "Use `from_hf` function instead from top_module.",
1582
+ "Use `read_hf` function instead from top_module.",
1583
1583
  DeprecationWarning,
1584
1584
  stacklevel=2,
1585
1585
  )
1586
- return from_hf(*args, **kwargs)
1586
+ return read_hf(*args, **kwargs)
1587
1587
 
1588
1588
  def parse_tabular(
1589
1589
  self,
@@ -1610,7 +1610,7 @@ class DataChain:
1610
1610
  Reading a json lines file:
1611
1611
  ```py
1612
1612
  import datachain as dc
1613
- chain = dc.from_storage("s3://mybucket/file.jsonl")
1613
+ chain = dc.read_storage("s3://mybucket/file.jsonl")
1614
1614
  chain = chain.parse_tabular(format="json")
1615
1615
  ```
1616
1616
 
@@ -1618,7 +1618,7 @@ class DataChain:
1618
1618
  ```py
1619
1619
  import datachain as dc
1620
1620
 
1621
- chain = dc.from_storage("s3://mybucket")
1621
+ chain = dc.read_storage("s3://mybucket")
1622
1622
  chain = chain.filter(dc.C("file.name").glob("*.jsonl"))
1623
1623
  chain = chain.parse_tabular(format="json")
1624
1624
  ```
@@ -1680,15 +1680,15 @@ class DataChain:
1680
1680
  *args,
1681
1681
  **kwargs,
1682
1682
  ) -> "DataChain":
1683
- from .csv import from_csv
1683
+ from .csv import read_csv
1684
1684
 
1685
1685
  warnings.warn(
1686
1686
  "Class method `from_csv` is deprecated. "
1687
- "Use `from_csv` function instead from top_module.",
1687
+ "Use `read_csv` function instead from top_module.",
1688
1688
  DeprecationWarning,
1689
1689
  stacklevel=2,
1690
1690
  )
1691
- return from_csv(*args, **kwargs)
1691
+ return read_csv(*args, **kwargs)
1692
1692
 
1693
1693
  @classmethod
1694
1694
  def from_parquet(
@@ -1696,15 +1696,15 @@ class DataChain:
1696
1696
  *args,
1697
1697
  **kwargs,
1698
1698
  ) -> "DataChain":
1699
- from .parquet import from_parquet
1699
+ from .parquet import read_parquet
1700
1700
 
1701
1701
  warnings.warn(
1702
1702
  "Class method `from_parquet` is deprecated. "
1703
- "Use `from_parquet` function instead from top_module.",
1703
+ "Use `read_parquet` function instead from top_module.",
1704
1704
  DeprecationWarning,
1705
1705
  stacklevel=2,
1706
1706
  )
1707
- return from_parquet(*args, **kwargs)
1707
+ return read_parquet(*args, **kwargs)
1708
1708
 
1709
1709
  def to_parquet(
1710
1710
  self,
@@ -1930,15 +1930,15 @@ class DataChain:
1930
1930
  *args,
1931
1931
  **kwargs,
1932
1932
  ) -> "DataChain":
1933
- from .records import from_records
1933
+ from .records import read_records
1934
1934
 
1935
1935
  warnings.warn(
1936
1936
  "Class method `from_records` is deprecated. "
1937
- "Use `from_records` function instead from top_module.",
1937
+ "Use `read_records` function instead from top_module.",
1938
1938
  DeprecationWarning,
1939
1939
  stacklevel=2,
1940
1940
  )
1941
- return from_records(*args, **kwargs)
1941
+ return read_records(*args, **kwargs)
1942
1942
 
1943
1943
  def sum(self, fr: DataType): # type: ignore[override]
1944
1944
  """Compute the sum of a column."""
@@ -1969,7 +1969,7 @@ class DataChain:
1969
1969
  import datachain as dc
1970
1970
 
1971
1971
  (
1972
- dc.from_storage(DATA, type="text")
1972
+ dc.read_storage(DATA, type="text")
1973
1973
  .settings(parallel=4, cache=True)
1974
1974
  .setup(client=lambda: anthropic.Anthropic(api_key=API_KEY))
1975
1975
  .map(
@@ -2021,7 +2021,7 @@ class DataChain:
2021
2021
  ```py
2022
2022
  import datachain as dc
2023
2023
 
2024
- ds = dc.from_storage("s3://mybucket")
2024
+ ds = dc.read_storage("s3://mybucket")
2025
2025
  ds.to_storage("gs://mybucket", placement="filename")
2026
2026
  ```
2027
2027
  """
@@ -2139,7 +2139,7 @@ class DataChain:
2139
2139
  ```py
2140
2140
  import datachain as dc
2141
2141
 
2142
- chain = dc.from_storage(...)
2142
+ chain = dc.read_storage(...)
2143
2143
  chunk_1 = query._chunk(0, 2)
2144
2144
  chunk_2 = query._chunk(1, 2)
2145
2145
  ```
@@ -13,7 +13,7 @@ from datachain.query import Session
13
13
  from datachain.query.dataset import DatasetQuery
14
14
 
15
15
  from .utils import Sys
16
- from .values import from_values
16
+ from .values import read_values
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from typing_extensions import ParamSpec
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
23
23
  P = ParamSpec("P")
24
24
 
25
25
 
26
- def from_dataset(
26
+ def read_dataset(
27
27
  name: str,
28
28
  version: Optional[int] = None,
29
29
  session: Optional[Session] = None,
@@ -44,15 +44,15 @@ def from_dataset(
44
44
  Example:
45
45
  ```py
46
46
  import datachain as dc
47
- chain = dc.from_dataset("my_cats")
47
+ chain = dc.read_dataset("my_cats")
48
48
  ```
49
49
 
50
50
  ```py
51
- chain = dc.from_dataset("my_cats", fallback_to_studio=False)
51
+ chain = dc.read_dataset("my_cats", fallback_to_studio=False)
52
52
  ```
53
53
 
54
54
  ```py
55
- chain = dc.from_dataset("my_cats", version=1)
55
+ chain = dc.read_dataset("my_cats", version=1)
56
56
  ```
57
57
 
58
58
  ```py
@@ -64,7 +64,7 @@ def from_dataset(
64
64
  "min_task_size": 1000,
65
65
  "prefetch": 10,
66
66
  }
67
- chain = dc.from_dataset(
67
+ chain = dc.read_dataset(
68
68
  name="my_cats",
69
69
  version=1,
70
70
  session=session,
@@ -140,7 +140,7 @@ def datasets(
140
140
  )
141
141
  ]
142
142
 
143
- return from_values(
143
+ return read_values(
144
144
  session=session,
145
145
  settings=settings,
146
146
  in_memory=in_memory,
datachain/lib/dc/hf.py CHANGED
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
18
18
  P = ParamSpec("P")
19
19
 
20
20
 
21
- def from_hf(
21
+ def read_hf(
22
22
  dataset: Union[str, "HFDatasetType"],
23
23
  *args,
24
24
  session: Optional[Session] = None,
@@ -42,7 +42,7 @@ def from_hf(
42
42
  Load from Hugging Face Hub:
43
43
  ```py
44
44
  import datachain as dc
45
- chain = dc.from_hf("beans", split="train")
45
+ chain = dc.read_hf("beans", split="train")
46
46
  ```
47
47
 
48
48
  Generate chain from loaded dataset:
@@ -50,12 +50,12 @@ def from_hf(
50
50
  from datasets import load_dataset
51
51
  ds = load_dataset("beans", split="train")
52
52
  import datachain as dc
53
- chain = dc.from_hf(ds)
53
+ chain = dc.read_hf(ds)
54
54
  ```
55
55
  """
56
56
  from datachain.lib.hf import HFGenerator, get_output_schema, stream_splits
57
57
 
58
- from .values import from_values
58
+ from .values import read_values
59
59
 
60
60
  output: dict[str, DataType] = {}
61
61
  ds_dict = stream_splits(dataset, *args, **kwargs)
@@ -69,5 +69,5 @@ def from_hf(
69
69
  if object_name:
70
70
  output = {object_name: model}
71
71
 
72
- chain = from_values(split=list(ds_dict.keys()), session=session, settings=settings)
72
+ chain = read_values(split=list(ds_dict.keys()), session=session, settings=settings)
73
73
  return chain.gen(HFGenerator(dataset, model, *args, **kwargs), output=output)
datachain/lib/dc/json.py CHANGED
@@ -22,7 +22,7 @@ if TYPE_CHECKING:
22
22
  P = ParamSpec("P")
23
23
 
24
24
 
25
- def from_json(
25
+ def read_json(
26
26
  path: Union[str, os.PathLike[str]],
27
27
  type: FileType = "text",
28
28
  spec: Optional[DataType] = None,
@@ -52,16 +52,16 @@ def from_json(
52
52
  infer JSON schema from data, reduce using JMESPATH
53
53
  ```py
54
54
  import datachain as dc
55
- chain = dc.from_json("gs://json", jmespath="key1.key2")
55
+ chain = dc.read_json("gs://json", jmespath="key1.key2")
56
56
  ```
57
57
 
58
58
  infer JSON schema from a particular path
59
59
  ```py
60
60
  import datachain as dc
61
- chain = dc.from_json("gs://json_ds", schema_from="gs://json/my.json")
61
+ chain = dc.read_json("gs://json_ds", schema_from="gs://json/my.json")
62
62
  ```
63
63
  """
64
- from .storage import from_storage
64
+ from .storage import read_storage
65
65
 
66
66
  if schema_from == "auto":
67
67
  schema_from = os.fspath(path)
@@ -74,7 +74,7 @@ def from_json(
74
74
  object_name = jmespath_to_name(jmespath)
75
75
  if not object_name:
76
76
  object_name = format
77
- chain = from_storage(uri=path, type=type, **kwargs)
77
+ chain = read_storage(uri=path, type=type, **kwargs)
78
78
  signal_dict = {
79
79
  object_name: read_meta(
80
80
  schema_from=schema_from,
@@ -6,7 +6,7 @@ from typing import (
6
6
  from datachain.lib.listing_info import ListingInfo
7
7
  from datachain.query import Session
8
8
 
9
- from .values import from_values
9
+ from .values import read_values
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  from typing_extensions import ParamSpec
@@ -35,7 +35,7 @@ def listings(
35
35
  session = Session.get(session, in_memory=in_memory)
36
36
  catalog = kwargs.get("catalog") or session.catalog
37
37
 
38
- return from_values(
38
+ return read_values(
39
39
  session=session,
40
40
  in_memory=in_memory,
41
41
  output={object_name: ListingInfo},
@@ -5,7 +5,7 @@ from typing import (
5
5
 
6
6
  from datachain.query import Session
7
7
 
8
- from .values import from_values
8
+ from .values import read_values
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  import pandas as pd
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
16
16
  P = ParamSpec("P")
17
17
 
18
18
 
19
- def from_pandas( # type: ignore[override]
19
+ def read_pandas( # type: ignore[override]
20
20
  df: "pd.DataFrame",
21
21
  name: str = "",
22
22
  session: Optional[Session] = None,
@@ -32,7 +32,7 @@ def from_pandas( # type: ignore[override]
32
32
  import datachain as dc
33
33
 
34
34
  df = pd.DataFrame({"fib": [1, 2, 3, 5, 8]})
35
- dc.from_pandas(df)
35
+ dc.read_pandas(df)
36
36
  ```
37
37
  """
38
38
  from .utils import DatasetPrepareError
@@ -46,7 +46,7 @@ def from_pandas( # type: ignore[override]
46
46
  f"import from pandas error - '{column}' cannot be a column name",
47
47
  )
48
48
 
49
- return from_values(
49
+ return read_values(
50
50
  name,
51
51
  session,
52
52
  settings=settings,
@@ -15,7 +15,7 @@ if TYPE_CHECKING:
15
15
  P = ParamSpec("P")
16
16
 
17
17
 
18
- def from_parquet(
18
+ def read_parquet(
19
19
  path,
20
20
  partitioning: Any = "hive",
21
21
  output: Optional[dict[str, DataType]] = None,
@@ -43,18 +43,18 @@ def from_parquet(
43
43
  Reading a single file:
44
44
  ```py
45
45
  import datachain as dc
46
- dc.from_parquet("s3://mybucket/file.parquet")
46
+ dc.read_parquet("s3://mybucket/file.parquet")
47
47
  ```
48
48
 
49
49
  Reading a partitioned dataset from a directory:
50
50
  ```py
51
51
  import datachain as dc
52
- dc.from_parquet("s3://mybucket/dir")
52
+ dc.read_parquet("s3://mybucket/dir")
53
53
  ```
54
54
  """
55
- from .storage import from_storage
55
+ from .storage import read_storage
56
56
 
57
- chain = from_storage(path, session=session, settings=settings, **kwargs)
57
+ chain = read_storage(path, session=session, settings=settings, **kwargs)
58
58
  return chain.parse_tabular(
59
59
  output=output,
60
60
  object_name=object_name,
@@ -21,7 +21,7 @@ if TYPE_CHECKING:
21
21
  P = ParamSpec("P")
22
22
 
23
23
 
24
- def from_records(
24
+ def read_records(
25
25
  to_insert: Optional[Union[dict, list[dict]]],
26
26
  session: Optional[Session] = None,
27
27
  settings: Optional[dict] = None,
@@ -40,10 +40,10 @@ def from_records(
40
40
  Example:
41
41
  ```py
42
42
  import datachain as dc
43
- single_record = dc.from_records(dc.DEFAULT_FILE_RECORD)
43
+ single_record = dc.read_records(dc.DEFAULT_FILE_RECORD)
44
44
  ```
45
45
  """
46
- from .datasets import from_dataset
46
+ from .datasets import read_dataset
47
47
 
48
48
  session = Session.get(session, in_memory=in_memory)
49
49
  catalog = session.catalog
@@ -87,4 +87,4 @@ def from_records(
87
87
  insert_q = dr.get_table().insert()
88
88
  for record in to_insert:
89
89
  db.execute(insert_q.values(**record))
90
- return from_dataset(name=dsr.name, session=session, settings=settings)
90
+ return read_dataset(name=dsr.name, session=session, settings=settings)
@@ -21,7 +21,7 @@ if TYPE_CHECKING:
21
21
  from .datachain import DataChain
22
22
 
23
23
 
24
- def from_storage(
24
+ def read_storage(
25
25
  uri: Union[str, os.PathLike[str], list[str], list[os.PathLike[str]]],
26
26
  *,
27
27
  type: FileType = "binary",
@@ -55,12 +55,12 @@ def from_storage(
55
55
  Simple call from s3:
56
56
  ```python
57
57
  import datachain as dc
58
- chain = dc.from_storage("s3://my-bucket/my-dir")
58
+ chain = dc.read_storage("s3://my-bucket/my-dir")
59
59
  ```
60
60
 
61
61
  Multiple URIs:
62
62
  ```python
63
- chain = dc.from_storage([
63
+ chain = dc.read_storage([
64
64
  "s3://bucket1/dir1",
65
65
  "s3://bucket2/dir2"
66
66
  ])
@@ -68,7 +68,7 @@ def from_storage(
68
68
 
69
69
  With AWS S3-compatible storage:
70
70
  ```python
71
- chain = dc.from_storage(
71
+ chain = dc.read_storage(
72
72
  "s3://my-bucket/my-dir",
73
73
  client_config = {"aws_endpoint_url": "<minio-endpoint-url>"}
74
74
  )
@@ -77,7 +77,7 @@ def from_storage(
77
77
  Pass existing session
78
78
  ```py
79
79
  session = Session.get()
80
- chain = dc.from_storage([
80
+ chain = dc.read_storage([
81
81
  "path/to/dir1",
82
82
  "path/to/dir2"
83
83
  ], session=session, recursive=True)
@@ -88,9 +88,9 @@ def from_storage(
88
88
  avoiding redundant updates for URIs pointing to the same storage location.
89
89
  """
90
90
  from .datachain import DataChain
91
- from .datasets import from_dataset
92
- from .records import from_records
93
- from .values import from_values
91
+ from .datasets import read_dataset
92
+ from .records import read_records
93
+ from .values import read_values
94
94
 
95
95
  file_type = get_file_type(type)
96
96
 
@@ -122,7 +122,8 @@ def from_storage(
122
122
  )
123
123
  continue
124
124
 
125
- dc = from_dataset(list_ds_name, session=session, settings=settings)
125
+ dc = read_dataset(list_ds_name, session=session, settings=settings)
126
+ dc._query.update = update
126
127
  dc.signals_schema = dc.signals_schema.mutate({f"{object_name}": file_type})
127
128
 
128
129
  if update or not list_ds_exists:
@@ -130,7 +131,7 @@ def from_storage(
130
131
  def lst_fn(ds_name, lst_uri):
131
132
  # disable prefetch for listing, as it pre-downloads all files
132
133
  (
133
- from_records(
134
+ read_records(
134
135
  DataChain.DEFAULT_FILE_RECORD,
135
136
  session=session,
136
137
  settings=settings,
@@ -144,7 +145,7 @@ def from_storage(
144
145
  .save(ds_name, listing=True)
145
146
  )
146
147
 
147
- dc._query.add_before_steps(
148
+ dc._query.set_listing_fn(
148
149
  lambda ds_name=list_ds_name, lst_uri=list_uri: lst_fn(ds_name, lst_uri)
149
150
  )
150
151
 
@@ -154,7 +155,7 @@ def from_storage(
154
155
  listed_ds_name.add(list_ds_name)
155
156
 
156
157
  if file_values:
157
- file_chain = from_values(
158
+ file_chain = read_values(
158
159
  session=session,
159
160
  settings=settings,
160
161
  in_memory=in_memory,
@@ -6,7 +6,7 @@ from typing import (
6
6
 
7
7
  from datachain.lib.convert.values_to_tuples import values_to_tuples
8
8
  from datachain.lib.data_model import dict_to_data_model
9
- from datachain.lib.dc.records import from_records
9
+ from datachain.lib.dc.records import read_records
10
10
  from datachain.lib.dc.utils import OutputType
11
11
  from datachain.query import Session
12
12
 
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
18
18
  P = ParamSpec("P")
19
19
 
20
20
 
21
- def from_values(
21
+ def read_values(
22
22
  ds_name: str = "",
23
23
  session: Optional[Session] = None,
24
24
  settings: Optional[dict] = None,
@@ -32,7 +32,7 @@ def from_values(
32
32
  Example:
33
33
  ```py
34
34
  import datachain as dc
35
- dc.from_values(fib=[1, 2, 3, 5, 8])
35
+ dc.read_values(fib=[1, 2, 3, 5, 8])
36
36
  ```
37
37
  """
38
38
  from .datachain import DataChain
@@ -42,7 +42,7 @@ def from_values(
42
42
  def _func_fr() -> Iterator[tuple_type]: # type: ignore[valid-type]
43
43
  yield from tuples
44
44
 
45
- chain = from_records(
45
+ chain = read_records(
46
46
  DataChain.DEFAULT_FILE_RECORD,
47
47
  session=session,
48
48
  settings=settings,
datachain/lib/listing.py CHANGED
@@ -4,6 +4,7 @@ import os
4
4
  import posixpath
5
5
  from collections.abc import Iterator
6
6
  from contextlib import contextmanager
7
+ from datetime import datetime, timedelta, timezone
7
8
  from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
8
9
 
9
10
  from fsspec.asyn import get_loop
@@ -32,6 +33,16 @@ logging.getLogger("aiobotocore.credentials").setLevel(logging.CRITICAL)
32
33
  logging.getLogger("gcsfs").setLevel(logging.CRITICAL)
33
34
 
34
35
 
36
+ def listing_dataset_expired(lst_ds) -> bool:
37
+ """Function that checks if listing dataset is expired or not"""
38
+ lst_version = lst_ds.versions[-1]
39
+ if not lst_version.finished_at:
40
+ return False
41
+
42
+ expires = lst_version.finished_at + timedelta(seconds=LISTING_TTL)
43
+ return datetime.now(timezone.utc) > expires
44
+
45
+
35
46
  def list_bucket(uri: str, cache, client_config=None) -> Callable:
36
47
  """
37
48
  Function that returns another generator function that yields File objects
@@ -103,10 +103,10 @@ def read_meta( # noqa: C901
103
103
  model_name=None,
104
104
  nrows=None,
105
105
  ) -> Callable:
106
- from datachain import from_storage
106
+ from datachain import read_storage
107
107
 
108
108
  if schema_from:
109
- file = next(from_storage(schema_from, type="text").limit(1).collect("file"))
109
+ file = next(read_storage(schema_from, type="text").limit(1).collect("file"))
110
110
  model_code = gen_datamodel_code(
111
111
  file, format=format, jmespath=jmespath, model_name=model_name
112
112
  )
datachain/lib/pytorch.py CHANGED
@@ -14,7 +14,7 @@ from torchvision.transforms import v2
14
14
  from datachain import Session
15
15
  from datachain.cache import get_temp_cache
16
16
  from datachain.catalog import Catalog, get_catalog
17
- from datachain.lib.dc.datasets import from_dataset
17
+ from datachain.lib.dc.datasets import read_dataset
18
18
  from datachain.lib.settings import Settings
19
19
  from datachain.lib.text import convert_text
20
20
  from datachain.progress import CombinedDownloadCallback
@@ -122,7 +122,7 @@ class PytorchDataset(IterableDataset):
122
122
  ) -> Generator[tuple[Any, ...], None, None]:
123
123
  catalog = self._get_catalog()
124
124
  session = Session("PyTorch", catalog=catalog)
125
- ds = from_dataset(
125
+ ds = read_dataset(
126
126
  name=self.name, version=self.version, session=session
127
127
  ).settings(cache=self.cache, prefetch=self.prefetch)
128
128
  ds = ds.remove_file_signals()
datachain/lib/udf.py CHANGED
@@ -145,7 +145,7 @@ class UDFBase(AbstractUDF):
145
145
  return emb[0].tolist()
146
146
 
147
147
  (
148
- dc.from_storage(
148
+ dc.read_storage(
149
149
  "gs://datachain-demo/fashion-product-images/images", type="image"
150
150
  )
151
151
  .limit(5)
@@ -47,7 +47,10 @@ from datachain.error import (
47
47
  QueryScriptCancelError,
48
48
  )
49
49
  from datachain.func.base import Function
50
- from datachain.lib.listing import is_listing_dataset
50
+ from datachain.lib.listing import (
51
+ is_listing_dataset,
52
+ listing_dataset_expired,
53
+ )
51
54
  from datachain.lib.udf import UDFAdapter, _get_cache
52
55
  from datachain.progress import CombinedDownloadCallback, TqdmCombinedDownloadCallback
53
56
  from datachain.query.schema import C, UDFParamSpec, normalize_param
@@ -1080,6 +1083,7 @@ class DatasetQuery:
1080
1083
  indexing_column_types: Optional[dict[str, Any]] = None,
1081
1084
  in_memory: bool = False,
1082
1085
  fallback_to_studio: bool = True,
1086
+ update: bool = False,
1083
1087
  ) -> None:
1084
1088
  from datachain.remote.studio import is_token_set
1085
1089
 
@@ -1097,6 +1101,8 @@ class DatasetQuery:
1097
1101
  self.feature_schema: Optional[dict] = None
1098
1102
  self.column_types: Optional[dict[str, Any]] = None
1099
1103
  self.before_steps: list[Callable] = []
1104
+ self.listing_fn: Optional[Callable] = None
1105
+ self.update = update
1100
1106
 
1101
1107
  self.list_ds_name: Optional[str] = None
1102
1108
 
@@ -1190,23 +1196,30 @@ class DatasetQuery:
1190
1196
  col.table = self.table
1191
1197
  return col
1192
1198
 
1193
- def add_before_steps(self, fn: Callable) -> None:
1194
- """
1195
- Setting custom function to be run before applying steps
1196
- """
1197
- self.before_steps.append(fn)
1199
+ def set_listing_fn(self, fn: Callable) -> None:
1200
+ """Setting listing function to be run if needed"""
1201
+ self.listing_fn = fn
1198
1202
 
1199
1203
  def apply_steps(self) -> QueryGenerator:
1200
1204
  """
1201
1205
  Apply the steps in the query and return the resulting
1202
1206
  sqlalchemy.SelectBase.
1203
1207
  """
1204
- for fn in self.before_steps:
1205
- fn()
1208
+ if self.list_ds_name and not self.starting_step:
1209
+ listing_ds = None
1210
+ try:
1211
+ listing_ds = self.catalog.get_dataset(self.list_ds_name)
1212
+ except DatasetNotFoundError:
1213
+ pass
1214
+
1215
+ if not listing_ds or self.update or listing_dataset_expired(listing_ds):
1216
+ assert self.listing_fn
1217
+ self.listing_fn()
1218
+ listing_ds = self.catalog.get_dataset(self.list_ds_name)
1206
1219
 
1207
- if self.list_ds_name:
1208
1220
  # at this point we know what is our starting listing dataset name
1209
- self._set_starting_step(self.catalog.get_dataset(self.list_ds_name)) # type: ignore [arg-type]
1221
+ self._set_starting_step(listing_ds) # type: ignore [arg-type]
1222
+
1210
1223
  query = self.clone()
1211
1224
 
1212
1225
  index = os.getenv("DATACHAIN_QUERY_CHUNK_INDEX", self._chunk_index)
@@ -41,7 +41,7 @@ def train_test_split(
41
41
  from datachain.toolkit import train_test_split
42
42
 
43
43
  # Load a DataChain from a storage source (e.g., S3 bucket)
44
- dc = dc.from_storage("s3://bucket/dir/")
44
+ dc = dc.read_storage("s3://bucket/dir/")
45
45
 
46
46
  # Perform a 70/30 train-test split
47
47
  train, test = train_test_split(dc, [0.7, 0.3])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.14.1
3
+ Version: 0.14.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -38,7 +38,7 @@ Requires-Dist: sqlalchemy>=2
38
38
  Requires-Dist: multiprocess==0.70.16
39
39
  Requires-Dist: cloudpickle
40
40
  Requires-Dist: orjson>=3.10.5
41
- Requires-Dist: pydantic<3,>=2
41
+ Requires-Dist: pydantic<2.11,>=2
42
42
  Requires-Dist: jmespath>=1.0
43
43
  Requires-Dist: datamodel-code-generator>=0.25
44
44
  Requires-Dist: Pillow<12,>=10.0.0
@@ -171,8 +171,8 @@ high confidence scores.
171
171
 
172
172
  import datachain as dc
173
173
 
174
- meta = dc.from_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
175
- images = dc.from_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
174
+ meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
175
+ images = dc.read_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
176
176
 
177
177
  images_id = images.map(id=lambda file: file.path.split('.')[-2])
178
178
  annotated = images_id.merge(meta, on="id", right_on="meta.id")
@@ -213,7 +213,7 @@ Python code:
213
213
  return result.lower().startswith("success")
214
214
 
215
215
  chain = (
216
- dc.from_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
216
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
217
217
  .settings(parallel=4, cache=True)
218
218
  .map(is_success=eval_dialogue)
219
219
  .save("mistral_files")
@@ -1,4 +1,4 @@
1
- datachain/__init__.py,sha256=M_0MfSBJqlWA9hI3z47Yu36fxkxsekbpvNa-LBa1e5Q,1414
1
+ datachain/__init__.py,sha256=h3W0agyTcpXOfMA26jZyHo-Gs7vLXhbR-9uEkzK8Szk,1414
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
4
4
  datachain/cache.py,sha256=yQblPhOh_Mq74Ma7xT1CL1idLJ0HgrQxpGVYvRy_9Eg,3623
@@ -17,7 +17,7 @@ datachain/studio.py,sha256=9MEpFPLKI3gG4isKklcfD5BMLeNsSXhtOUboOjW4Fdc,10017
17
17
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
18
18
  datachain/utils.py,sha256=CLAYkI7iPbLYw3Pjh5EkWuc2UOs8wEbuXQnqIs4UyV8,14173
19
19
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
20
- datachain/catalog/catalog.py,sha256=FGW2cEOysgVMyokqIFAJ1PB-RYJrqDEFGfHP5qLYO-k,60729
20
+ datachain/catalog/catalog.py,sha256=k-okQ4aqoyWrsNlDeCz6jP6TNRiZCUENbGV9Sz6EEtw,60729
21
21
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
22
22
  datachain/catalog/loader.py,sha256=AhSQR_-S-9lY3DcXn3PVZv9UtarHOMlDy2x75iDwUjo,6035
23
23
  datachain/cli/__init__.py,sha256=YPVkuQ7IezNhtzo5xrfca1hEIiZtFxOlJCOzAOEuxmA,8335
@@ -29,7 +29,7 @@ datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR
29
29
  datachain/cli/commands/ls.py,sha256=dSD2_MHng4t9HRFJZWMOCjPL4XU3qaBV3piNl8UXP08,5275
30
30
  datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
31
31
  datachain/cli/commands/query.py,sha256=2S7hQxialt1fkbocxi6JXZI6jS5QnFrD1aOjKgZkzfI,1471
32
- datachain/cli/commands/show.py,sha256=pn8jQ5HqUpzZE3KE-iydflp8LGgfVIKR8eKwVKv6A-8,1604
32
+ datachain/cli/commands/show.py,sha256=P6e6bYiRCyVKO0ggnoFkLkwGmBWlrlm8W5c_sBNxBBw,1604
33
33
  datachain/cli/parser/__init__.py,sha256=rtjlqSsDd4LZH9WdgvluO27M4sID1wD7YkQ4cKhNXzw,15721
34
34
  datachain/cli/parser/job.py,sha256=kvQkSfieyUmvJpOK8p78UgS8sygHhQXztRlOtVcgtaU,3449
35
35
  datachain/cli/parser/studio.py,sha256=Y-1OlQGecLVi9QofvWUfSlPd2ISyaESf7QFGZqGsrdw,3609
@@ -73,16 +73,16 @@ datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810
73
73
  datachain/lib/file.py,sha256=HLQXS_WULm7Y-fkHMy0WpibVAcrkLPRS6CrZy6rwFe0,30450
74
74
  datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
75
75
  datachain/lib/image.py,sha256=butvUY_33PVEYPKX2nVCPeJjJVcBaptZwsE9REQsTS8,3247
76
- datachain/lib/listing.py,sha256=xrgsd1_YLLiA69LnwK56oZwe0RXTBCDicGzhavF_2AQ,6665
76
+ datachain/lib/listing.py,sha256=O29s7H-2rqjHHGKWkKGNNXlo2zynv4pygVTKImpV8fo,7046
77
77
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
78
- datachain/lib/meta_formats.py,sha256=f-irPQH_acIYT5gzjqoOvGrGOQrm-E_0wN-4lqZF_j8,6349
78
+ datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A,6349
79
79
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
80
- datachain/lib/pytorch.py,sha256=FFCZoVkoG_FY_vJ4v_DgzijEEcTozuddlPz1uAa5tyg,7712
80
+ datachain/lib/pytorch.py,sha256=YS6yR13iVlrAXo5wzJswFFUHwWOql9KTdWIa86DXB-k,7712
81
81
  datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
82
82
  datachain/lib/signal_schema.py,sha256=DRatqSG7OVtCUCWyZvMXe4m7r7XFO6NCfzsJRDErMtg,35185
83
83
  datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
84
84
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
85
- datachain/lib/udf.py,sha256=6ZCn9qIAVwQA4zEyWiTb1jaSLkpkBXeGeaH8EB7Im3I,16168
85
+ datachain/lib/udf.py,sha256=h38a457xg-4wO2XcxPs4pzDq8JxTmYm4N84iAf0HRzY,16168
86
86
  datachain/lib/udf_signature.py,sha256=2EtsOPDNSPqcOlYwqbCdy6RF5MldI-7smii8aLy8p7Y,7543
87
87
  datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
88
88
  datachain/lib/video.py,sha256=suH_8Mi8VYk4-IVb1vjSduF_njs64ji1WGKHxDLnGYw,6629
@@ -94,19 +94,19 @@ datachain/lib/convert/python_to_sql.py,sha256=wg-O5FRKX3x3Wh8ZL1b9ntMlgf1zRO4djM
94
94
  datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
95
95
  datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sDcLS6s,2010
96
96
  datachain/lib/convert/values_to_tuples.py,sha256=EFfIGBiVVltJQG8blzsQ1dGXneh4D3wdLfSUeoK10OI,3931
97
- datachain/lib/dc/__init__.py,sha256=QKjTAYrigSoy74RQHmNS_86SEOisKk-BLDREYaJ_olY,743
98
- datachain/lib/dc/csv.py,sha256=OaVHYnOZiYEfsUcispXuGcIYQKF03u4XrRf6Fgce6Kk,4401
99
- datachain/lib/dc/datachain.py,sha256=NdGCRNk3NZCGQHs-sq0jiKkvsXiowiqDQTY_X4AbL6o,76390
100
- datachain/lib/dc/datasets.py,sha256=0vdgNpA_xakFgnfm78I1yU98u2hvOawOXS872pg2F48,4329
101
- datachain/lib/dc/hf.py,sha256=F_ME1IpUlQfhqVGe__Uz7jLwd-fp-O7pu50OLhkaG0w,2170
102
- datachain/lib/dc/json.py,sha256=mlrqsmxLDYNP7dmde3IDYP01QlbUzP8Pj5UDqlqJcZ0,2725
103
- datachain/lib/dc/listings.py,sha256=c2ASPhwRhPDMbA5esYp3kMVw6sQ7vsWEflHWh9x7tkw,1044
104
- datachain/lib/dc/pandas.py,sha256=eteVB6DqRGAU2tDF_Bep7JRU4nny3uyVPbGKOZ6PVq0,1249
105
- datachain/lib/dc/parquet.py,sha256=tO0rDL3XZ24rqkUJYAYn_yAyZgIYV5N6r28MTlPE0Z0,1809
106
- datachain/lib/dc/records.py,sha256=zV4vPJvCEd5mBv-E_q-VfrSXNjcfu74QY884z3QuftM,2524
107
- datachain/lib/dc/storage.py,sha256=mIAlNEYRJ8r3yHA2sJyt8duwuSfehbPro7WqMQvezIc,5295
97
+ datachain/lib/dc/__init__.py,sha256=6rKKHS6MA3mS6UJXiysrv4TURs4R_UWAQK2tJ2t1QMs,743
98
+ datachain/lib/dc/csv.py,sha256=d0ULzpsTTeqp_eM-2jVHb1kYHQN2lJFf4O6LWd5tOJw,4401
99
+ datachain/lib/dc/datachain.py,sha256=hwuAElfEhRLyh-Uvuc7YIpFx6nsI_B90xwnMqgkkgrI,76390
100
+ datachain/lib/dc/datasets.py,sha256=hTzq18Ij9kpOAJOU-VN4-VyThTTxLSWLfVIk3bgzAPs,4329
101
+ datachain/lib/dc/hf.py,sha256=I1vFNOa1C87lBuBj5FHENLY2jTaQ8erngiX0cyBmOp4,2170
102
+ datachain/lib/dc/json.py,sha256=9ei9ZNzWVXZWD4HNGTfBhcoLPnXBBDywKV-3Wi1mT28,2725
103
+ datachain/lib/dc/listings.py,sha256=qPy1DTvYkbNICT1ujo8LwezzMEW8E3dln1knw7Jwl0I,1044
104
+ datachain/lib/dc/pandas.py,sha256=jJvgNPPjiSLAjdYlhI4fvGKNWRh-hbMgZyBlURS633E,1249
105
+ datachain/lib/dc/parquet.py,sha256=lXCSr_S7bQsPUWq1pJ-Ur8R8RxArjyFpCpBXK-aorQw,1809
106
+ datachain/lib/dc/records.py,sha256=DOFkQV7A7kZnMiCS4mHOzee2ibWIhz-mWQpgVsU78SE,2524
107
+ datachain/lib/dc/storage.py,sha256=kM3Ix2L0j01a4XcXPZpdDxvici9yu-Ks-Cd3uf_qESA,5327
108
108
  datachain/lib/dc/utils.py,sha256=Ct-0FqCaDhNWHx09gJFcCXJGPjMI-VZr4t-GJyqTi44,3984
109
- datachain/lib/dc/values.py,sha256=PLBZew0BYO3mv7W3n8OF5Ad-5tp5eWPqlbiVxG5pJ30,1409
109
+ datachain/lib/dc/values.py,sha256=HaABQKmhgW-N1pcBn7CQuTIiOFXYVjU1H9LbupGM3WQ,1409
110
110
  datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
111
111
  datachain/model/bbox.py,sha256=cQNHuQuVsh6bW3n3Hj40F2Cc20cExQ9Lg_q7R2jxUMI,9324
112
112
  datachain/model/pose.py,sha256=rjquA6M-I-Y30Xm6YSkGv1OY52hJZmR2AuxbIpE5uD0,3865
@@ -118,7 +118,7 @@ datachain/model/ultralytics/pose.py,sha256=gXAWfAk4OWZl93hKcQPKZvqJa3nIrECB4RM8K
118
118
  datachain/model/ultralytics/segment.py,sha256=koq1HASo29isf0in6oSlzmU4IzsmOXe87F1ajQQVfh4,2911
119
119
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
120
120
  datachain/query/batch.py,sha256=6w8gzLTmLeylststu-gT5jIqEfi4-djS7_yTYyeo-fw,4190
121
- datachain/query/dataset.py,sha256=Em5vfKkZygzXCiWRYUBGLSh3eWlIamMBvh328YNnmww,58201
121
+ datachain/query/dataset.py,sha256=G_fyt3vwifY5Usnp8pvkho543innrcDOImKLqG3W3YU,58665
122
122
  datachain/query/dispatch.py,sha256=_1vjeQ1wjUoxlik55k0JkWqQCUfMjgVWmEOyWRkx0dU,12437
123
123
  datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
124
124
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -148,11 +148,11 @@ datachain/sql/sqlite/base.py,sha256=N-cQT0Hpu9ROWe4OiKlkkn_YP1NKCRZZ3xSfTzpyaDA,
148
148
  datachain/sql/sqlite/types.py,sha256=cH6oge2E_YWFy22wY-txPJH8gxoQFSpCthtZR8PZjpo,1849
149
149
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
150
150
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
151
- datachain/toolkit/split.py,sha256=VdcP_zVLqAxuSrze3BaR-dBzTmyKkCUAiAremw3OEPU,2914
151
+ datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
152
152
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
153
- datachain-0.14.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
154
- datachain-0.14.1.dist-info/METADATA,sha256=UPk0v7fsYz_eTsJf5YpexjD4jrjpWsKEyAVNSXN3KvE,11335
155
- datachain-0.14.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
156
- datachain-0.14.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
157
- datachain-0.14.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
158
- datachain-0.14.1.dist-info/RECORD,,
153
+ datachain-0.14.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
154
+ datachain-0.14.2.dist-info/METADATA,sha256=hvPp9rvpa2p9FnopnOrd4DvJE1Rugef5YHe8vViSPyI,11338
155
+ datachain-0.14.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
156
+ datachain-0.14.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
157
+ datachain-0.14.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
158
+ datachain-0.14.2.dist-info/RECORD,,