datachain 0.14.1__py3-none-any.whl → 0.14.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
datachain/__init__.py CHANGED
@@ -5,16 +5,16 @@ from datachain.lib.dc import (
5
5
  DataChain,
6
6
  Sys,
7
7
  datasets,
8
- from_csv,
9
- from_dataset,
10
- from_hf,
11
- from_json,
12
- from_pandas,
13
- from_parquet,
14
- from_records,
15
- from_storage,
16
- from_values,
17
8
  listings,
9
+ read_csv,
10
+ read_dataset,
11
+ read_hf,
12
+ read_json,
13
+ read_pandas,
14
+ read_parquet,
15
+ read_records,
16
+ read_storage,
17
+ read_values,
18
18
  )
19
19
  from datachain.lib.file import (
20
20
  ArrowRow,
@@ -61,17 +61,17 @@ __all__ = [
61
61
  "VideoFragment",
62
62
  "VideoFrame",
63
63
  "datasets",
64
- "from_csv",
65
- "from_dataset",
66
- "from_hf",
67
- "from_json",
68
- "from_pandas",
69
- "from_parquet",
70
- "from_records",
71
- "from_storage",
72
- "from_values",
73
64
  "is_chain_type",
74
65
  "listings",
75
66
  "metrics",
76
67
  "param",
68
+ "read_csv",
69
+ "read_dataset",
70
+ "read_hf",
71
+ "read_json",
72
+ "read_pandas",
73
+ "read_parquet",
74
+ "read_records",
75
+ "read_storage",
76
+ "read_values",
77
77
  ]
@@ -583,10 +583,10 @@ class Catalog:
583
583
  object_name="file",
584
584
  skip_indexing=False,
585
585
  ) -> tuple[Optional["Listing"], "Client", str]:
586
- from datachain import from_storage
586
+ from datachain import read_storage
587
587
  from datachain.listing import Listing
588
588
 
589
- from_storage(
589
+ read_storage(
590
590
  source, session=self.session, update=update, object_name=object_name
591
591
  ).exec()
592
592
 
@@ -994,14 +994,14 @@ class Catalog:
994
994
  if not sources:
995
995
  raise ValueError("Sources needs to be non empty list")
996
996
 
997
- from datachain import from_dataset, from_storage
997
+ from datachain import read_dataset, read_storage
998
998
 
999
999
  chains = []
1000
1000
  for source in sources:
1001
1001
  if source.startswith(DATASET_PREFIX):
1002
- dc = from_dataset(source[len(DATASET_PREFIX) :], session=self.session)
1002
+ dc = read_dataset(source[len(DATASET_PREFIX) :], session=self.session)
1003
1003
  else:
1004
- dc = from_storage(source, session=self.session, recursive=recursive)
1004
+ dc = read_storage(source, session=self.session, recursive=recursive)
1005
1005
 
1006
1006
  chains.append(dc)
1007
1007
 
@@ -7,6 +7,7 @@ from datachain.utils import get_envs_by_prefix
7
7
  if TYPE_CHECKING:
8
8
  from datachain.catalog import Catalog
9
9
  from datachain.data_storage import AbstractMetastore, AbstractWarehouse
10
+ from datachain.query.udf import AbstractUDFDistributor
10
11
 
11
12
  METASTORE_SERIALIZED = "DATACHAIN__METASTORE"
12
13
  METASTORE_IMPORT_PATH = "DATACHAIN_METASTORE"
@@ -15,7 +16,6 @@ WAREHOUSE_SERIALIZED = "DATACHAIN__WAREHOUSE"
15
16
  WAREHOUSE_IMPORT_PATH = "DATACHAIN_WAREHOUSE"
16
17
  WAREHOUSE_ARG_PREFIX = "DATACHAIN_WAREHOUSE_ARG_"
17
18
  DISTRIBUTED_IMPORT_PATH = "DATACHAIN_DISTRIBUTED"
18
- DISTRIBUTED_ARG_PREFIX = "DATACHAIN_DISTRIBUTED_ARG_"
19
19
 
20
20
  IN_MEMORY_ERROR_MESSAGE = "In-memory is only supported on SQLite"
21
21
 
@@ -100,27 +100,22 @@ def get_warehouse(in_memory: bool = False) -> "AbstractWarehouse":
100
100
  return warehouse_class(**warehouse_args)
101
101
 
102
102
 
103
- def get_distributed_class(**kwargs):
103
+ def get_udf_distributor_class() -> type["AbstractUDFDistributor"]:
104
104
  distributed_import_path = os.environ.get(DISTRIBUTED_IMPORT_PATH)
105
- distributed_arg_envs = get_envs_by_prefix(DISTRIBUTED_ARG_PREFIX)
106
- # Convert env variable names to keyword argument names by lowercasing them
107
- distributed_args = {k.lower(): v for k, v in distributed_arg_envs.items()}
108
105
 
109
106
  if not distributed_import_path:
110
107
  raise RuntimeError(
111
108
  f"{DISTRIBUTED_IMPORT_PATH} import path is required "
112
109
  "for distributed UDF processing."
113
110
  )
114
- # Distributed class paths are specified as (for example):
115
- # module.classname
111
+ # Distributed class paths are specified as (for example): module.classname
116
112
  if "." not in distributed_import_path:
117
113
  raise RuntimeError(
118
114
  f"Invalid {DISTRIBUTED_IMPORT_PATH} import path: {distributed_import_path}"
119
115
  )
120
116
  module_name, _, class_name = distributed_import_path.rpartition(".")
121
117
  distributed = import_module(module_name)
122
- distributed_class = getattr(distributed, class_name)
123
- return distributed_class(**distributed_args | kwargs)
118
+ return getattr(distributed, class_name)
124
119
 
125
120
 
126
121
  def get_catalog(
@@ -18,7 +18,7 @@ def show(
18
18
  schema: bool = False,
19
19
  include_hidden: bool = False,
20
20
  ) -> None:
21
- from datachain import Session, from_dataset
21
+ from datachain import Session, read_dataset
22
22
  from datachain.query.dataset import DatasetQuery
23
23
  from datachain.utils import show_records
24
24
 
@@ -51,5 +51,5 @@ def show(
51
51
  if schema and dataset_version.feature_schema:
52
52
  print("\nSchema:")
53
53
  session = Session.get(catalog=catalog)
54
- dc = from_dataset(name=name, version=version, session=session)
54
+ dc = read_dataset(name=name, version=version, session=session)
55
55
  dc.print_schema()
@@ -199,6 +199,15 @@ class AbstractWarehouse(ABC, Serializable):
199
199
  # Query Execution
200
200
  #
201
201
 
202
+ def query_count(self, query: sa.sql.selectable.Select) -> int:
203
+ """Count the number of rows in a query."""
204
+ count_query = sa.select(func.count(1)).select_from(query.subquery())
205
+ return next(self.db.execute(count_query))[0]
206
+
207
+ def table_rows_count(self, table) -> int:
208
+ count_query = sa.select(func.count(1)).select_from(table)
209
+ return next(self.db.execute(count_query))[0]
210
+
202
211
  def dataset_select_paginated(
203
212
  self,
204
213
  query,
@@ -1,15 +1,15 @@
1
- from .csv import from_csv
1
+ from .csv import read_csv
2
2
  from .datachain import C, Column, DataChain
3
- from .datasets import datasets, from_dataset
4
- from .hf import from_hf
5
- from .json import from_json
3
+ from .datasets import datasets, read_dataset
4
+ from .hf import read_hf
5
+ from .json import read_json
6
6
  from .listings import listings
7
- from .pandas import from_pandas
8
- from .parquet import from_parquet
9
- from .records import from_records
10
- from .storage import from_storage
7
+ from .pandas import read_pandas
8
+ from .parquet import read_parquet
9
+ from .records import read_records
10
+ from .storage import read_storage
11
11
  from .utils import DatasetMergeError, DatasetPrepareError, Sys
12
- from .values import from_values
12
+ from .values import read_values
13
13
 
14
14
  __all__ = [
15
15
  "C",
@@ -19,14 +19,14 @@ __all__ = [
19
19
  "DatasetPrepareError",
20
20
  "Sys",
21
21
  "datasets",
22
- "from_csv",
23
- "from_dataset",
24
- "from_hf",
25
- "from_json",
26
- "from_pandas",
27
- "from_parquet",
28
- "from_records",
29
- "from_storage",
30
- "from_values",
31
22
  "listings",
23
+ "read_csv",
24
+ "read_dataset",
25
+ "read_hf",
26
+ "read_json",
27
+ "read_pandas",
28
+ "read_parquet",
29
+ "read_records",
30
+ "read_storage",
31
+ "read_values",
32
32
  ]
datachain/lib/dc/csv.py CHANGED
@@ -16,7 +16,7 @@ if TYPE_CHECKING:
16
16
  from .datachain import DataChain
17
17
 
18
18
 
19
- def from_csv(
19
+ def read_csv(
20
20
  path,
21
21
  delimiter: Optional[str] = None,
22
22
  header: bool = True,
@@ -58,13 +58,13 @@ def from_csv(
58
58
  Reading a csv file:
59
59
  ```py
60
60
  import datachain as dc
61
- chain = dc.from_csv("s3://mybucket/file.csv")
61
+ chain = dc.read_csv("s3://mybucket/file.csv")
62
62
  ```
63
63
 
64
64
  Reading csv files from a directory as a combined dataset:
65
65
  ```py
66
66
  import datachain as dc
67
- chain = dc.from_csv("s3://mybucket/dir")
67
+ chain = dc.read_csv("s3://mybucket/dir")
68
68
  ```
69
69
  """
70
70
  from pandas.io.parsers.readers import STR_NA_VALUES
@@ -72,7 +72,7 @@ def from_csv(
72
72
  from pyarrow.dataset import CsvFileFormat
73
73
  from pyarrow.lib import type_for_alias
74
74
 
75
- from .storage import from_storage
75
+ from .storage import read_storage
76
76
 
77
77
  parse_options = parse_options or {}
78
78
  if "delimiter" not in parse_options:
@@ -88,7 +88,7 @@ def from_csv(
88
88
  else:
89
89
  column_types = {}
90
90
 
91
- chain = from_storage(path, session=session, settings=settings, **kwargs)
91
+ chain = read_storage(path, session=session, settings=settings, **kwargs)
92
92
 
93
93
  column_names = None
94
94
  if not header:
@@ -84,22 +84,22 @@ class DataChain:
84
84
  underlyind library `Pydantic`.
85
85
 
86
86
  See Also:
87
- `from_storage("s3://my-bucket/my-dir/")` - reading unstructured
87
+ `read_storage("s3://my-bucket/my-dir/")` - reading unstructured
88
88
  data files from storages such as S3, gs or Azure ADLS.
89
89
 
90
90
  `DataChain.save("name")` - saving to a dataset.
91
91
 
92
- `from_dataset("name")` - reading from a dataset.
92
+ `read_dataset("name")` - reading from a dataset.
93
93
 
94
- `from_values(fib=[1, 2, 3, 5, 8])` - generating from values.
94
+ `read_values(fib=[1, 2, 3, 5, 8])` - generating from values.
95
95
 
96
- `from_pandas(pd.DataFrame(...))` - generating from pandas.
96
+ `read_pandas(pd.DataFrame(...))` - generating from pandas.
97
97
 
98
- `from_json("file.json")` - generating from json.
98
+ `read_json("file.json")` - generating from json.
99
99
 
100
- `from_csv("file.csv")` - generating from csv.
100
+ `read_csv("file.csv")` - generating from csv.
101
101
 
102
- `from_parquet("file.parquet")` - generating from parquet.
102
+ `read_parquet("file.parquet")` - generating from parquet.
103
103
 
104
104
  Example:
105
105
  ```py
@@ -118,7 +118,7 @@ class DataChain:
118
118
  api_key = os.environ["MISTRAL_API_KEY"]
119
119
 
120
120
  chain = (
121
- dc.from_storage("gs://datachain-demo/chatbot-KiT/")
121
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/")
122
122
  .limit(5)
123
123
  .settings(cache=True, parallel=5)
124
124
  .map(
@@ -315,27 +315,27 @@ class DataChain:
315
315
  *args,
316
316
  **kwargs,
317
317
  ) -> "DataChain":
318
- from .storage import from_storage
318
+ from .storage import read_storage
319
319
 
320
320
  warnings.warn(
321
321
  "Class method `from_storage` is deprecated. "
322
- "Use `from_storage` function instead from top_module.",
322
+ "Use `read_storage` function instead from top_module.",
323
323
  DeprecationWarning,
324
324
  stacklevel=2,
325
325
  )
326
- return from_storage(*args, **kwargs)
326
+ return read_storage(*args, **kwargs)
327
327
 
328
328
  @classmethod
329
329
  def from_dataset(cls, *args, **kwargs) -> "DataChain":
330
- from .datasets import from_dataset
330
+ from .datasets import read_dataset
331
331
 
332
332
  warnings.warn(
333
333
  "Class method `from_dataset` is deprecated. "
334
- "Use `from_dataset` function instead from top_module.",
334
+ "Use `read_dataset` function instead from top_module.",
335
335
  DeprecationWarning,
336
336
  stacklevel=2,
337
337
  )
338
- return from_dataset(*args, **kwargs)
338
+ return read_dataset(*args, **kwargs)
339
339
 
340
340
  @classmethod
341
341
  def from_json(
@@ -343,15 +343,15 @@ class DataChain:
343
343
  *args,
344
344
  **kwargs,
345
345
  ) -> "DataChain":
346
- from .json import from_json
346
+ from .json import read_json
347
347
 
348
348
  warnings.warn(
349
349
  "Class method `from_json` is deprecated. "
350
- "Use `from_json` function instead from top_module.",
350
+ "Use `read_json` function instead from top_module.",
351
351
  DeprecationWarning,
352
352
  stacklevel=2,
353
353
  )
354
- return from_json(*args, **kwargs)
354
+ return read_json(*args, **kwargs)
355
355
 
356
356
  def explode(
357
357
  self,
@@ -487,7 +487,7 @@ class DataChain:
487
487
  )
488
488
 
489
489
  chain = (
490
- dc.from_storage("s3://my-bucket")
490
+ dc.read_storage("s3://my-bucket")
491
491
  .apply(parse_stem)
492
492
  .filter(C("stem").glob("*cat*"))
493
493
  )
@@ -727,7 +727,7 @@ class DataChain:
727
727
 
728
728
  Note:
729
729
  Order is not guaranteed when steps are added after an `order_by` statement.
730
- I.e. when using `from_dataset` an `order_by` statement should be used if
730
+ I.e. when using `read_dataset` an `order_by` statement should be used if
731
731
  the order of the records in the chain is important.
732
732
  Using `order_by` directly before `limit`, `collect` and `collect_flatten`
733
733
  will give expected results.
@@ -1466,15 +1466,15 @@ class DataChain:
1466
1466
  *args,
1467
1467
  **kwargs,
1468
1468
  ) -> "DataChain":
1469
- from .values import from_values
1469
+ from .values import read_values
1470
1470
 
1471
1471
  warnings.warn(
1472
1472
  "Class method `from_values` is deprecated. "
1473
- "Use `from_values` function instead from top_module.",
1473
+ "Use `read_values` function instead from top_module.",
1474
1474
  DeprecationWarning,
1475
1475
  stacklevel=2,
1476
1476
  )
1477
- return from_values(*args, **kwargs)
1477
+ return read_values(*args, **kwargs)
1478
1478
 
1479
1479
  @classmethod
1480
1480
  def from_pandas(
@@ -1482,15 +1482,15 @@ class DataChain:
1482
1482
  *args,
1483
1483
  **kwargs,
1484
1484
  ) -> "DataChain":
1485
- from .pandas import from_pandas
1485
+ from .pandas import read_pandas
1486
1486
 
1487
1487
  warnings.warn(
1488
1488
  "Class method `from_pandas` is deprecated. "
1489
- "Use `from_pandas` function instead from top_module.",
1489
+ "Use `read_pandas` function instead from top_module.",
1490
1490
  DeprecationWarning,
1491
1491
  stacklevel=2,
1492
1492
  )
1493
- return from_pandas(*args, **kwargs)
1493
+ return read_pandas(*args, **kwargs)
1494
1494
 
1495
1495
  def to_pandas(self, flatten=False, include_hidden=True) -> "pd.DataFrame":
1496
1496
  """Return a pandas DataFrame from the chain.
@@ -1575,15 +1575,15 @@ class DataChain:
1575
1575
  *args,
1576
1576
  **kwargs,
1577
1577
  ) -> "DataChain":
1578
- from .hf import from_hf
1578
+ from .hf import read_hf
1579
1579
 
1580
1580
  warnings.warn(
1581
1581
  "Class method `from_hf` is deprecated. "
1582
- "Use `from_hf` function instead from top_module.",
1582
+ "Use `read_hf` function instead from top_module.",
1583
1583
  DeprecationWarning,
1584
1584
  stacklevel=2,
1585
1585
  )
1586
- return from_hf(*args, **kwargs)
1586
+ return read_hf(*args, **kwargs)
1587
1587
 
1588
1588
  def parse_tabular(
1589
1589
  self,
@@ -1610,7 +1610,7 @@ class DataChain:
1610
1610
  Reading a json lines file:
1611
1611
  ```py
1612
1612
  import datachain as dc
1613
- chain = dc.from_storage("s3://mybucket/file.jsonl")
1613
+ chain = dc.read_storage("s3://mybucket/file.jsonl")
1614
1614
  chain = chain.parse_tabular(format="json")
1615
1615
  ```
1616
1616
 
@@ -1618,7 +1618,7 @@ class DataChain:
1618
1618
  ```py
1619
1619
  import datachain as dc
1620
1620
 
1621
- chain = dc.from_storage("s3://mybucket")
1621
+ chain = dc.read_storage("s3://mybucket")
1622
1622
  chain = chain.filter(dc.C("file.name").glob("*.jsonl"))
1623
1623
  chain = chain.parse_tabular(format="json")
1624
1624
  ```
@@ -1680,15 +1680,15 @@ class DataChain:
1680
1680
  *args,
1681
1681
  **kwargs,
1682
1682
  ) -> "DataChain":
1683
- from .csv import from_csv
1683
+ from .csv import read_csv
1684
1684
 
1685
1685
  warnings.warn(
1686
1686
  "Class method `from_csv` is deprecated. "
1687
- "Use `from_csv` function instead from top_module.",
1687
+ "Use `read_csv` function instead from top_module.",
1688
1688
  DeprecationWarning,
1689
1689
  stacklevel=2,
1690
1690
  )
1691
- return from_csv(*args, **kwargs)
1691
+ return read_csv(*args, **kwargs)
1692
1692
 
1693
1693
  @classmethod
1694
1694
  def from_parquet(
@@ -1696,15 +1696,15 @@ class DataChain:
1696
1696
  *args,
1697
1697
  **kwargs,
1698
1698
  ) -> "DataChain":
1699
- from .parquet import from_parquet
1699
+ from .parquet import read_parquet
1700
1700
 
1701
1701
  warnings.warn(
1702
1702
  "Class method `from_parquet` is deprecated. "
1703
- "Use `from_parquet` function instead from top_module.",
1703
+ "Use `read_parquet` function instead from top_module.",
1704
1704
  DeprecationWarning,
1705
1705
  stacklevel=2,
1706
1706
  )
1707
- return from_parquet(*args, **kwargs)
1707
+ return read_parquet(*args, **kwargs)
1708
1708
 
1709
1709
  def to_parquet(
1710
1710
  self,
@@ -1930,15 +1930,15 @@ class DataChain:
1930
1930
  *args,
1931
1931
  **kwargs,
1932
1932
  ) -> "DataChain":
1933
- from .records import from_records
1933
+ from .records import read_records
1934
1934
 
1935
1935
  warnings.warn(
1936
1936
  "Class method `from_records` is deprecated. "
1937
- "Use `from_records` function instead from top_module.",
1937
+ "Use `read_records` function instead from top_module.",
1938
1938
  DeprecationWarning,
1939
1939
  stacklevel=2,
1940
1940
  )
1941
- return from_records(*args, **kwargs)
1941
+ return read_records(*args, **kwargs)
1942
1942
 
1943
1943
  def sum(self, fr: DataType): # type: ignore[override]
1944
1944
  """Compute the sum of a column."""
@@ -1969,7 +1969,7 @@ class DataChain:
1969
1969
  import datachain as dc
1970
1970
 
1971
1971
  (
1972
- dc.from_storage(DATA, type="text")
1972
+ dc.read_storage(DATA, type="text")
1973
1973
  .settings(parallel=4, cache=True)
1974
1974
  .setup(client=lambda: anthropic.Anthropic(api_key=API_KEY))
1975
1975
  .map(
@@ -2021,7 +2021,7 @@ class DataChain:
2021
2021
  ```py
2022
2022
  import datachain as dc
2023
2023
 
2024
- ds = dc.from_storage("s3://mybucket")
2024
+ ds = dc.read_storage("s3://mybucket")
2025
2025
  ds.to_storage("gs://mybucket", placement="filename")
2026
2026
  ```
2027
2027
  """
@@ -2139,7 +2139,7 @@ class DataChain:
2139
2139
  ```py
2140
2140
  import datachain as dc
2141
2141
 
2142
- chain = dc.from_storage(...)
2142
+ chain = dc.read_storage(...)
2143
2143
  chunk_1 = query._chunk(0, 2)
2144
2144
  chunk_2 = query._chunk(1, 2)
2145
2145
  ```
@@ -13,7 +13,7 @@ from datachain.query import Session
13
13
  from datachain.query.dataset import DatasetQuery
14
14
 
15
15
  from .utils import Sys
16
- from .values import from_values
16
+ from .values import read_values
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from typing_extensions import ParamSpec
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
23
23
  P = ParamSpec("P")
24
24
 
25
25
 
26
- def from_dataset(
26
+ def read_dataset(
27
27
  name: str,
28
28
  version: Optional[int] = None,
29
29
  session: Optional[Session] = None,
@@ -44,15 +44,15 @@ def from_dataset(
44
44
  Example:
45
45
  ```py
46
46
  import datachain as dc
47
- chain = dc.from_dataset("my_cats")
47
+ chain = dc.read_dataset("my_cats")
48
48
  ```
49
49
 
50
50
  ```py
51
- chain = dc.from_dataset("my_cats", fallback_to_studio=False)
51
+ chain = dc.read_dataset("my_cats", fallback_to_studio=False)
52
52
  ```
53
53
 
54
54
  ```py
55
- chain = dc.from_dataset("my_cats", version=1)
55
+ chain = dc.read_dataset("my_cats", version=1)
56
56
  ```
57
57
 
58
58
  ```py
@@ -64,7 +64,7 @@ def from_dataset(
64
64
  "min_task_size": 1000,
65
65
  "prefetch": 10,
66
66
  }
67
- chain = dc.from_dataset(
67
+ chain = dc.read_dataset(
68
68
  name="my_cats",
69
69
  version=1,
70
70
  session=session,
@@ -140,7 +140,7 @@ def datasets(
140
140
  )
141
141
  ]
142
142
 
143
- return from_values(
143
+ return read_values(
144
144
  session=session,
145
145
  settings=settings,
146
146
  in_memory=in_memory,
datachain/lib/dc/hf.py CHANGED
@@ -18,7 +18,7 @@ if TYPE_CHECKING:
18
18
  P = ParamSpec("P")
19
19
 
20
20
 
21
- def from_hf(
21
+ def read_hf(
22
22
  dataset: Union[str, "HFDatasetType"],
23
23
  *args,
24
24
  session: Optional[Session] = None,
@@ -42,7 +42,7 @@ def from_hf(
42
42
  Load from Hugging Face Hub:
43
43
  ```py
44
44
  import datachain as dc
45
- chain = dc.from_hf("beans", split="train")
45
+ chain = dc.read_hf("beans", split="train")
46
46
  ```
47
47
 
48
48
  Generate chain from loaded dataset:
@@ -50,12 +50,12 @@ def from_hf(
50
50
  from datasets import load_dataset
51
51
  ds = load_dataset("beans", split="train")
52
52
  import datachain as dc
53
- chain = dc.from_hf(ds)
53
+ chain = dc.read_hf(ds)
54
54
  ```
55
55
  """
56
56
  from datachain.lib.hf import HFGenerator, get_output_schema, stream_splits
57
57
 
58
- from .values import from_values
58
+ from .values import read_values
59
59
 
60
60
  output: dict[str, DataType] = {}
61
61
  ds_dict = stream_splits(dataset, *args, **kwargs)
@@ -69,5 +69,5 @@ def from_hf(
69
69
  if object_name:
70
70
  output = {object_name: model}
71
71
 
72
- chain = from_values(split=list(ds_dict.keys()), session=session, settings=settings)
72
+ chain = read_values(split=list(ds_dict.keys()), session=session, settings=settings)
73
73
  return chain.gen(HFGenerator(dataset, model, *args, **kwargs), output=output)
datachain/lib/dc/json.py CHANGED
@@ -22,7 +22,7 @@ if TYPE_CHECKING:
22
22
  P = ParamSpec("P")
23
23
 
24
24
 
25
- def from_json(
25
+ def read_json(
26
26
  path: Union[str, os.PathLike[str]],
27
27
  type: FileType = "text",
28
28
  spec: Optional[DataType] = None,
@@ -52,16 +52,16 @@ def from_json(
52
52
  infer JSON schema from data, reduce using JMESPATH
53
53
  ```py
54
54
  import datachain as dc
55
- chain = dc.from_json("gs://json", jmespath="key1.key2")
55
+ chain = dc.read_json("gs://json", jmespath="key1.key2")
56
56
  ```
57
57
 
58
58
  infer JSON schema from a particular path
59
59
  ```py
60
60
  import datachain as dc
61
- chain = dc.from_json("gs://json_ds", schema_from="gs://json/my.json")
61
+ chain = dc.read_json("gs://json_ds", schema_from="gs://json/my.json")
62
62
  ```
63
63
  """
64
- from .storage import from_storage
64
+ from .storage import read_storage
65
65
 
66
66
  if schema_from == "auto":
67
67
  schema_from = os.fspath(path)
@@ -74,7 +74,7 @@ def from_json(
74
74
  object_name = jmespath_to_name(jmespath)
75
75
  if not object_name:
76
76
  object_name = format
77
- chain = from_storage(uri=path, type=type, **kwargs)
77
+ chain = read_storage(uri=path, type=type, **kwargs)
78
78
  signal_dict = {
79
79
  object_name: read_meta(
80
80
  schema_from=schema_from,
@@ -6,7 +6,7 @@ from typing import (
6
6
  from datachain.lib.listing_info import ListingInfo
7
7
  from datachain.query import Session
8
8
 
9
- from .values import from_values
9
+ from .values import read_values
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  from typing_extensions import ParamSpec
@@ -35,7 +35,7 @@ def listings(
35
35
  session = Session.get(session, in_memory=in_memory)
36
36
  catalog = kwargs.get("catalog") or session.catalog
37
37
 
38
- return from_values(
38
+ return read_values(
39
39
  session=session,
40
40
  in_memory=in_memory,
41
41
  output={object_name: ListingInfo},