datachain 0.14.3__py3-none-any.whl → 0.14.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -580,15 +580,13 @@ class Catalog:
580
580
  source: str,
581
581
  update=False,
582
582
  client_config=None,
583
- object_name="file",
583
+ column="file",
584
584
  skip_indexing=False,
585
585
  ) -> tuple[Optional["Listing"], "Client", str]:
586
586
  from datachain import read_storage
587
587
  from datachain.listing import Listing
588
588
 
589
- read_storage(
590
- source, session=self.session, update=update, object_name=object_name
591
- ).exec()
589
+ read_storage(source, session=self.session, update=update, column=column).exec()
592
590
 
593
591
  list_ds_name, list_uri, list_path, _ = get_listing(
594
592
  source, self.session, update=update
@@ -602,7 +600,7 @@ class Catalog:
602
600
  self.warehouse.clone(),
603
601
  client,
604
602
  dataset_name=list_ds_name,
605
- object_name=object_name,
603
+ column=column,
606
604
  )
607
605
 
608
606
  return lst, client, list_path
@@ -30,8 +30,8 @@ if TYPE_CHECKING:
30
30
  DEFAULT_DELIMITER = "__"
31
31
 
32
32
 
33
- def col_name(name: str, object_name: str = "file") -> str:
34
- return f"{object_name}{DEFAULT_DELIMITER}{name}"
33
+ def col_name(name: str, column: str = "file") -> str:
34
+ return f"{column}{DEFAULT_DELIMITER}{name}"
35
35
 
36
36
 
37
37
  def dedup_columns(columns: Iterable[sa.Column]) -> list[sa.Column]:
@@ -84,19 +84,19 @@ def convert_rows_custom_column_types(
84
84
 
85
85
 
86
86
  class DirExpansion:
87
- def __init__(self, object_name: str):
88
- self.object_name = object_name
87
+ def __init__(self, column: str):
88
+ self.column = column
89
89
 
90
- def col_name(self, name: str, object_name: Optional[str] = None) -> str:
91
- object_name = object_name or self.object_name
92
- return col_name(name, object_name)
90
+ def col_name(self, name: str, column: Optional[str] = None) -> str:
91
+ column = column or self.column
92
+ return col_name(name, column)
93
93
 
94
- def c(self, query, name: str, object_name: Optional[str] = None) -> str:
95
- return getattr(query.c, self.col_name(name, object_name=object_name))
94
+ def c(self, query, name: str, column: Optional[str] = None) -> str:
95
+ return getattr(query.c, self.col_name(name, column=column))
96
96
 
97
97
  def base_select(self, q):
98
98
  return sa.select(
99
- self.c(q, "id", object_name="sys"),
99
+ self.c(q, "id", column="sys"),
100
100
  false().label(self.col_name("is_dir")),
101
101
  self.c(q, "source"),
102
102
  self.c(q, "path"),
@@ -153,12 +153,12 @@ class DataTable:
153
153
  name: str,
154
154
  engine: "DatabaseEngine",
155
155
  column_types: Optional[dict[str, SQLType]] = None,
156
- object_name: str = "file",
156
+ column: str = "file",
157
157
  ):
158
158
  self.name: str = name
159
159
  self.engine = engine
160
160
  self.column_types: dict[str, SQLType] = column_types or {}
161
- self.object_name = object_name
161
+ self.column = column
162
162
 
163
163
  @staticmethod
164
164
  def copy_column(
@@ -224,18 +224,16 @@ class DataTable:
224
224
  def columns(self) -> "ReadOnlyColumnCollection[str, sa.Column[Any]]":
225
225
  return self.table.columns
226
226
 
227
- def col_name(self, name: str, object_name: Optional[str] = None) -> str:
228
- object_name = object_name or self.object_name
229
- return col_name(name, object_name)
227
+ def col_name(self, name: str, column: Optional[str] = None) -> str:
228
+ column = column or self.column
229
+ return col_name(name, column)
230
230
 
231
- def without_object(
232
- self, column_name: str, object_name: Optional[str] = None
233
- ) -> str:
234
- object_name = object_name or self.object_name
235
- return column_name.removeprefix(f"{object_name}{DEFAULT_DELIMITER}")
231
+ def without_object(self, column_name: str, column: Optional[str] = None) -> str:
232
+ column = column or self.column
233
+ return column_name.removeprefix(f"{column}{DEFAULT_DELIMITER}")
236
234
 
237
- def c(self, name: str, object_name: Optional[str] = None):
238
- return getattr(self.columns, self.col_name(name, object_name=object_name))
235
+ def c(self, name: str, column: Optional[str] = None):
236
+ return getattr(self.columns, self.col_name(name, column=column))
239
237
 
240
238
  @property
241
239
  def table(self) -> "sa.Table":
@@ -275,7 +273,7 @@ class DataTable:
275
273
  ]
276
274
 
277
275
  def dir_expansion(self):
278
- return DirExpansion(self.object_name)
276
+ return DirExpansion(self.column)
279
277
 
280
278
 
281
279
  PARTITION_COLUMN_ID = "partition_id"
@@ -489,7 +489,7 @@ class SQLiteWarehouse(AbstractWarehouse):
489
489
  self, dataset: DatasetRecord, version: int
490
490
  ) -> list[StorageURI]:
491
491
  dr = self.dataset_rows(dataset, version)
492
- query = dr.select(dr.c("source", object_name="file")).distinct()
492
+ query = dr.select(dr.c("source", column="file")).distinct()
493
493
  cur = self.db.cursor()
494
494
  cur.row_factory = sqlite3.Row # type: ignore[assignment]
495
495
 
@@ -179,7 +179,7 @@ class AbstractWarehouse(ABC, Serializable):
179
179
  self,
180
180
  dataset: DatasetRecord,
181
181
  version: Optional[int] = None,
182
- object_name: str = "file",
182
+ column: str = "file",
183
183
  ):
184
184
  version = version or dataset.latest_version
185
185
 
@@ -188,7 +188,7 @@ class AbstractWarehouse(ABC, Serializable):
188
188
  table_name,
189
189
  self.db,
190
190
  dataset.get_schema(version),
191
- object_name=object_name,
191
+ column=column,
192
192
  )
193
193
 
194
194
  @property
@@ -487,7 +487,7 @@ class AbstractWarehouse(ABC, Serializable):
487
487
  dataset_rows: "DataTable",
488
488
  path_list: list[str],
489
489
  glob_name: str,
490
- object_name="file",
490
+ column="file",
491
491
  ) -> Iterator[Node]:
492
492
  """Finds all Nodes that correspond to GLOB like path pattern."""
493
493
  dr = dataset_rows
@@ -521,7 +521,7 @@ class AbstractWarehouse(ABC, Serializable):
521
521
  de = dr.dir_expansion()
522
522
  q = de.query(
523
523
  dr.select().where(dr.c("is_latest") == true()).subquery(),
524
- object_name=dr.object_name,
524
+ column=dr.column,
525
525
  ).subquery()
526
526
  q = self.expand_query(de, q, dr)
527
527
 
@@ -597,12 +597,10 @@ class AbstractWarehouse(ABC, Serializable):
597
597
  with_default(dr.c("is_latest")),
598
598
  dr.c("last_modified"),
599
599
  with_default(dr.c("size")),
600
- with_default(dr.c("rand", object_name="sys")),
600
+ with_default(dr.c("rand", column="sys")),
601
601
  dr.c("location"),
602
602
  de.c(q, "source"),
603
- ).select_from(
604
- q.outerjoin(dr.table, q.c.sys__id == dr.c("id", object_name="sys"))
605
- )
603
+ ).select_from(q.outerjoin(dr.table, q.c.sys__id == dr.c("id", column="sys")))
606
604
 
607
605
  def get_node_by_path(self, dataset_rows: "DataTable", path: str) -> Node:
608
606
  """Gets node that corresponds to some path"""
@@ -12,6 +12,7 @@ from datachain.dataset import (
12
12
  )
13
13
  from datachain.job import Job
14
14
  from datachain.lib.data_model import DataModel
15
+ from datachain.query.session import Session
15
16
  from datachain.utils import TIME_ZERO
16
17
 
17
18
  if TYPE_CHECKING:
@@ -32,6 +33,10 @@ class DatasetInfo(DataModel):
32
33
  error_message: str = Field(default="")
33
34
  error_stack: str = Field(default="")
34
35
 
36
+ @property
37
+ def is_temp(self) -> bool:
38
+ return Session.is_temp_dataset(self.name)
39
+
35
40
  @staticmethod
36
41
  def _validate_dict(
37
42
  v: Optional[Union[str, dict]],
datachain/lib/dc/csv.py CHANGED
@@ -21,7 +21,7 @@ def read_csv(
21
21
  delimiter: Optional[str] = None,
22
22
  header: bool = True,
23
23
  output: OutputType = None,
24
- object_name: str = "",
24
+ column: str = "",
25
25
  model_name: str = "",
26
26
  source: bool = True,
27
27
  nrows=None,
@@ -42,7 +42,7 @@ def read_csv(
42
42
  output : Dictionary or feature class defining column names and their
43
43
  corresponding types. List of column names is also accepted, in which
44
44
  case types will be inferred.
45
- object_name : Created object column name.
45
+ column : Created column name.
46
46
  model_name : Generated model name.
47
47
  source : Whether to include info about the source file.
48
48
  nrows : Optional row limit.
@@ -119,7 +119,7 @@ def read_csv(
119
119
  )
120
120
  return chain.parse_tabular(
121
121
  output=output,
122
- object_name=object_name,
122
+ column=column,
123
123
  model_name=model_name,
124
124
  source=source,
125
125
  nrows=nrows,
@@ -357,7 +357,7 @@ class DataChain:
357
357
  self,
358
358
  col: str,
359
359
  model_name: Optional[str] = None,
360
- object_name: Optional[str] = None,
360
+ column: Optional[str] = None,
361
361
  schema_sample_size: int = 1,
362
362
  ) -> "DataChain":
363
363
  """Explodes a column containing JSON objects (dict or str DataChain type) into
@@ -368,7 +368,7 @@ class DataChain:
368
368
  col: the name of the column containing JSON to be exploded.
369
369
  model_name: optional generated model name. By default generates the name
370
370
  automatically.
371
- object_name: optional generated object column name. By default generates the
371
+ column: optional generated column name. By default generates the
372
372
  name automatically.
373
373
  schema_sample_size: the number of rows to use for inferring the schema of
374
374
  the JSON (in case some fields are optional and it's not enough to
@@ -406,10 +406,10 @@ class DataChain:
406
406
  )
407
407
  return model.model_validate(json_dict)
408
408
 
409
- if not object_name:
410
- object_name = f"{col}_expl"
409
+ if not column:
410
+ column = f"{col}_expl"
411
411
 
412
- return self.map(json_to_model, params=col, output={object_name: model})
412
+ return self.map(json_to_model, params=col, output={column: model})
413
413
 
414
414
  @classmethod
415
415
  def datasets(
@@ -1588,7 +1588,7 @@ class DataChain:
1588
1588
  def parse_tabular(
1589
1589
  self,
1590
1590
  output: OutputType = None,
1591
- object_name: str = "",
1591
+ column: str = "",
1592
1592
  model_name: str = "",
1593
1593
  source: bool = True,
1594
1594
  nrows: Optional[int] = None,
@@ -1600,7 +1600,7 @@ class DataChain:
1600
1600
  output : Dictionary or feature class defining column names and their
1601
1601
  corresponding types. List of column names is also accepted, in which
1602
1602
  case types will be inferred.
1603
- object_name : Generated object column name.
1603
+ column : Generated column name.
1604
1604
  model_name : Generated model name.
1605
1605
  source : Whether to include info about the source file.
1606
1606
  nrows : Optional row limit.
@@ -1651,14 +1651,14 @@ class DataChain:
1651
1651
  raise DatasetPrepareError(self.name, e) from e
1652
1652
 
1653
1653
  if isinstance(output, dict):
1654
- model_name = model_name or object_name or ""
1654
+ model_name = model_name or column or ""
1655
1655
  model = dict_to_data_model(model_name, output)
1656
1656
  output = model
1657
1657
  else:
1658
1658
  model = output # type: ignore[assignment]
1659
1659
 
1660
- if object_name:
1661
- output = {object_name: model} # type: ignore[dict-item]
1660
+ if column:
1661
+ output = {column: model} # type: ignore[dict-item]
1662
1662
  elif isinstance(output, type(BaseModel)):
1663
1663
  output = {
1664
1664
  name: info.annotation # type: ignore[misc]
@@ -1,7 +1,4 @@
1
- from typing import (
2
- TYPE_CHECKING,
3
- Optional,
4
- )
1
+ from typing import TYPE_CHECKING, Optional, get_origin, get_type_hints
5
2
 
6
3
  from datachain.lib.dataset_info import DatasetInfo
7
4
  from datachain.lib.file import (
@@ -102,7 +99,7 @@ def datasets(
102
99
  session: Optional[Session] = None,
103
100
  settings: Optional[dict] = None,
104
101
  in_memory: bool = False,
105
- object_name: str = "dataset",
102
+ column: Optional[str] = None,
106
103
  include_listing: bool = False,
107
104
  studio: bool = False,
108
105
  ) -> "DataChain":
@@ -112,7 +109,8 @@ def datasets(
112
109
  session: Optional session instance. If not provided, uses default session.
113
110
  settings: Optional dictionary of settings to configure the chain.
114
111
  in_memory: If True, creates an in-memory session. Defaults to False.
115
- object_name: Name of the output object in the chain. Defaults to "dataset".
112
+ column: Name of the output column in the chain. Defaults to None which
113
+ means no top level column will be created.
116
114
  include_listing: If True, includes listing datasets. Defaults to False.
117
115
  studio: If True, returns datasets from Studio only,
118
116
  otherwise returns all local datasets. Defaults to False.
@@ -124,7 +122,7 @@ def datasets(
124
122
  ```py
125
123
  import datachain as dc
126
124
 
127
- chain = dc.datasets()
125
+ chain = dc.datasets(column="dataset")
128
126
  for ds in chain.collect("dataset"):
129
127
  print(f"{ds.name}@v{ds.version}")
130
128
  ```
@@ -139,11 +137,32 @@ def datasets(
139
137
  include_listing=include_listing, studio=studio
140
138
  )
141
139
  ]
140
+ datasets_values = [d for d in datasets_values if not d.is_temp]
141
+
142
+ if not column:
143
+ # flattening dataset fields
144
+ schema = {
145
+ k: get_origin(v) if get_origin(v) is dict else v
146
+ for k, v in get_type_hints(DatasetInfo).items()
147
+ if k in DatasetInfo.model_fields
148
+ }
149
+ data = {k: [] for k in DatasetInfo.model_fields} # type: ignore[var-annotated]
150
+ for d in [d.model_dump() for d in datasets_values]:
151
+ for field, value in d.items():
152
+ data[field].append(value)
153
+
154
+ return read_values(
155
+ session=session,
156
+ settings=settings,
157
+ in_memory=in_memory,
158
+ output=schema,
159
+ **data, # type: ignore[arg-type]
160
+ )
142
161
 
143
162
  return read_values(
144
163
  session=session,
145
164
  settings=settings,
146
165
  in_memory=in_memory,
147
- output={object_name: DatasetInfo},
148
- **{object_name: datasets_values}, # type: ignore[arg-type]
166
+ output={column: DatasetInfo},
167
+ **{column: datasets_values}, # type: ignore[arg-type]
149
168
  )
datachain/lib/dc/hf.py CHANGED
@@ -23,7 +23,7 @@ def read_hf(
23
23
  *args,
24
24
  session: Optional[Session] = None,
25
25
  settings: Optional[dict] = None,
26
- object_name: str = "",
26
+ column: str = "",
27
27
  model_name: str = "",
28
28
  **kwargs,
29
29
  ) -> "DataChain":
@@ -34,7 +34,7 @@ def read_hf(
34
34
  or an instance of `datasets.Dataset`-like object.
35
35
  session : Session to use for the chain.
36
36
  settings : Settings to use for the chain.
37
- object_name : Generated object column name.
37
+ column : Generated object column name.
38
38
  model_name : Generated model name.
39
39
  kwargs : Parameters to pass to datasets.load_dataset.
40
40
 
@@ -62,12 +62,12 @@ def read_hf(
62
62
  if len(ds_dict) > 1:
63
63
  output = {"split": str}
64
64
 
65
- model_name = model_name or object_name or ""
65
+ model_name = model_name or column or ""
66
66
  hf_features = next(iter(ds_dict.values())).features
67
67
  output = output | get_output_schema(hf_features)
68
68
  model = dict_to_data_model(model_name, output)
69
- if object_name:
70
- output = {object_name: model}
69
+ if column:
70
+ output = {column: model}
71
71
 
72
72
  chain = read_values(split=list(ds_dict.keys()), session=session, settings=settings)
73
73
  return chain.gen(HFGenerator(dataset, model, *args, **kwargs), output=output)
datachain/lib/dc/json.py CHANGED
@@ -28,7 +28,7 @@ def read_json(
28
28
  spec: Optional[DataType] = None,
29
29
  schema_from: Optional[str] = "auto",
30
30
  jmespath: Optional[str] = None,
31
- object_name: Optional[str] = "",
31
+ column: Optional[str] = "",
32
32
  model_name: Optional[str] = None,
33
33
  format: Optional[str] = "json",
34
34
  nrows=None,
@@ -42,7 +42,7 @@ def read_json(
42
42
  type : read file as "binary", "text", or "image" data. Default is "text".
43
43
  spec : optional Data Model
44
44
  schema_from : path to sample to infer spec (if schema not provided)
45
- object_name : generated object column name
45
+ column : generated column name
46
46
  model_name : optional generated model name
47
47
  format: "json", "jsonl"
48
48
  jmespath : optional JMESPATH expression to reduce JSON
@@ -70,13 +70,13 @@ def read_json(
70
70
  name_end = re.search(r"\W", s).start() if re.search(r"\W", s) else len(s) # type: ignore[union-attr]
71
71
  return s[:name_end]
72
72
 
73
- if (not object_name) and jmespath:
74
- object_name = jmespath_to_name(jmespath)
75
- if not object_name:
76
- object_name = format
73
+ if (not column) and jmespath:
74
+ column = jmespath_to_name(jmespath)
75
+ if not column:
76
+ column = format
77
77
  chain = read_storage(uri=path, type=type, **kwargs)
78
78
  signal_dict = {
79
- object_name: read_meta(
79
+ column: read_meta(
80
80
  schema_from=schema_from,
81
81
  format=format,
82
82
  spec=spec,
@@ -19,7 +19,7 @@ if TYPE_CHECKING:
19
19
  def listings(
20
20
  session: Optional[Session] = None,
21
21
  in_memory: bool = False,
22
- object_name: str = "listing",
22
+ column: str = "listing",
23
23
  **kwargs,
24
24
  ) -> "DataChain":
25
25
  """Generate chain with list of cached listings.
@@ -38,6 +38,6 @@ def listings(
38
38
  return read_values(
39
39
  session=session,
40
40
  in_memory=in_memory,
41
- output={object_name: ListingInfo},
42
- **{object_name: catalog.listings()}, # type: ignore[arg-type]
41
+ output={column: ListingInfo},
42
+ **{column: catalog.listings()}, # type: ignore[arg-type]
43
43
  )
@@ -22,7 +22,7 @@ def read_pandas( # type: ignore[override]
22
22
  session: Optional[Session] = None,
23
23
  settings: Optional[dict] = None,
24
24
  in_memory: bool = False,
25
- object_name: str = "",
25
+ column: str = "",
26
26
  ) -> "DataChain":
27
27
  """Generate chain from pandas data-frame.
28
28
 
@@ -39,18 +39,18 @@ def read_pandas( # type: ignore[override]
39
39
 
40
40
  fr_map = {col.lower(): df[col].tolist() for col in df.columns}
41
41
 
42
- for column in fr_map:
43
- if not column.isidentifier():
42
+ for c in fr_map:
43
+ if not c.isidentifier():
44
44
  raise DatasetPrepareError(
45
45
  name,
46
- f"import from pandas error - '{column}' cannot be a column name",
46
+ f"import from pandas error - '{c}' cannot be a column name",
47
47
  )
48
48
 
49
49
  return read_values(
50
50
  name,
51
51
  session,
52
52
  settings=settings,
53
- object_name=object_name,
53
+ column=column,
54
54
  in_memory=in_memory,
55
55
  **fr_map,
56
56
  )
@@ -19,7 +19,7 @@ def read_parquet(
19
19
  path,
20
20
  partitioning: Any = "hive",
21
21
  output: Optional[dict[str, DataType]] = None,
22
- object_name: str = "",
22
+ column: str = "",
23
23
  model_name: str = "",
24
24
  source: bool = True,
25
25
  session: Optional[Session] = None,
@@ -33,7 +33,7 @@ def read_parquet(
33
33
  as `s3://`, `gs://`, `az://` or "file:///".
34
34
  partitioning : Any pyarrow partitioning schema.
35
35
  output : Dictionary defining column names and their corresponding types.
36
- object_name : Created object column name.
36
+ column : Created column name.
37
37
  model_name : Generated model name.
38
38
  source : Whether to include info about the source file.
39
39
  session : Session to use for the chain.
@@ -57,7 +57,7 @@ def read_parquet(
57
57
  chain = read_storage(path, session=session, settings=settings, **kwargs)
58
58
  return chain.parse_tabular(
59
59
  output=output,
60
- object_name=object_name,
60
+ column=column,
61
61
  model_name=model_name,
62
62
  source=source,
63
63
  format="parquet",
@@ -29,7 +29,7 @@ def read_storage(
29
29
  settings: Optional[dict] = None,
30
30
  in_memory: bool = False,
31
31
  recursive: Optional[bool] = True,
32
- object_name: str = "file",
32
+ column: str = "file",
33
33
  update: bool = False,
34
34
  anon: bool = False,
35
35
  client_config: Optional[dict] = None,
@@ -43,7 +43,7 @@ def read_storage(
43
43
  as `s3://`, `gs://`, `az://` or "file:///"
44
44
  type : read file as "binary", "text", or "image" data. Default is "binary".
45
45
  recursive : search recursively for the given path.
46
- object_name : Created object column name.
46
+ column : Created column name.
47
47
  update : force storage reindexing. Default is False.
48
48
  anon : If True, we will treat cloud bucket as public one
49
49
  client_config : Optional client configuration for the storage client.
@@ -124,7 +124,7 @@ def read_storage(
124
124
 
125
125
  dc = read_dataset(list_ds_name, session=session, settings=settings)
126
126
  dc._query.update = update
127
- dc.signals_schema = dc.signals_schema.mutate({f"{object_name}": file_type})
127
+ dc.signals_schema = dc.signals_schema.mutate({f"{column}": file_type})
128
128
 
129
129
  if update or not list_ds_exists:
130
130
 
@@ -140,7 +140,7 @@ def read_storage(
140
140
  .settings(prefetch=0)
141
141
  .gen(
142
142
  list_bucket(lst_uri, cache, client_config=client_config),
143
- output={f"{object_name}": file_type},
143
+ output={f"{column}": file_type},
144
144
  )
145
145
  .save(ds_name, listing=True)
146
146
  )
@@ -149,7 +149,7 @@ def read_storage(
149
149
  lambda ds_name=list_ds_name, lst_uri=list_uri: lst_fn(ds_name, lst_uri)
150
150
  )
151
151
 
152
- chain = ls(dc, list_path, recursive=recursive, object_name=object_name)
152
+ chain = ls(dc, list_path, recursive=recursive, column=column)
153
153
 
154
154
  storage_chain = storage_chain.union(chain) if storage_chain else chain
155
155
  listed_ds_name.add(list_ds_name)
@@ -162,7 +162,7 @@ def read_storage(
162
162
  file=file_values,
163
163
  )
164
164
  file_chain.signals_schema = file_chain.signals_schema.mutate(
165
- {f"{object_name}": file_type}
165
+ {f"{column}": file_type}
166
166
  )
167
167
  storage_chain = storage_chain.union(file_chain) if storage_chain else file_chain
168
168
 
@@ -24,7 +24,7 @@ def read_values(
24
24
  settings: Optional[dict] = None,
25
25
  in_memory: bool = False,
26
26
  output: OutputType = None,
27
- object_name: str = "",
27
+ column: str = "",
28
28
  **fr_map,
29
29
  ) -> "DataChain":
30
30
  """Generate chain from list of values.
@@ -48,6 +48,6 @@ def read_values(
48
48
  settings=settings,
49
49
  in_memory=in_memory,
50
50
  )
51
- if object_name:
52
- output = {object_name: dict_to_data_model(object_name, output)} # type: ignore[arg-type]
51
+ if column:
52
+ output = {column: dict_to_data_model(column, output)} # type: ignore[arg-type]
53
53
  return chain.gen(_func_fr, output=output)
datachain/lib/listing.py CHANGED
@@ -72,7 +72,7 @@ def ls(
72
72
  dc: D,
73
73
  path: str,
74
74
  recursive: Optional[bool] = True,
75
- object_name="file",
75
+ column="file",
76
76
  ) -> D:
77
77
  """
78
78
  Return files by some path from DataChain instance which contains bucket listing.
@@ -82,7 +82,7 @@ def ls(
82
82
  """
83
83
 
84
84
  def _file_c(name: str) -> Column:
85
- return Column(f"{object_name}.{name}")
85
+ return Column(f"{column}.{name}")
86
86
 
87
87
  dc = dc.filter(_file_c("is_latest") == true())
88
88
 
@@ -87,6 +87,12 @@ class SignalResolvingTypeError(SignalResolvingError):
87
87
  )
88
88
 
89
89
 
90
+ class SignalRemoveError(SignalSchemaError):
91
+ def __init__(self, path: Optional[list[str]], msg: str):
92
+ name = " '" + ".".join(path) + "'" if path else ""
93
+ super().__init__(f"cannot remove signal name{name}: {msg}")
94
+
95
+
90
96
  class CustomType(BaseModel):
91
97
  schema_version: int = Field(ge=1, le=2, strict=True)
92
98
  name: str
@@ -620,18 +626,27 @@ class SignalSchema:
620
626
  return curr_type
621
627
 
622
628
  def select_except_signals(self, *args: str) -> "SignalSchema":
623
- schema = copy.deepcopy(self.values)
624
- for field in args:
625
- if not isinstance(field, str):
626
- raise SignalResolvingTypeError("select_except()", field)
629
+ def has_signal(signal: str):
630
+ signal = signal.replace(".", DEFAULT_DELIMITER)
631
+ return any(signal == s for s in self.db_signals())
627
632
 
628
- if field not in self.values:
633
+ schema = copy.deepcopy(self.values)
634
+ for signal in args:
635
+ if not isinstance(signal, str):
636
+ raise SignalResolvingTypeError("select_except()", signal)
637
+
638
+ if signal not in self.values:
639
+ if has_signal(signal):
640
+ raise SignalRemoveError(
641
+ signal.split("."),
642
+ "select_except() error - removing nested signal would"
643
+ " break parent schema, which isn't supported.",
644
+ )
629
645
  raise SignalResolvingError(
630
- field.split("."),
631
- "select_except() error - the feature name does not exist or "
632
- "inside of feature (not supported)",
646
+ signal.split("."),
647
+ "select_except() error - the signal does not exist",
633
648
  )
634
- del schema[field]
649
+ del schema[signal]
635
650
 
636
651
  return SignalSchema(schema)
637
652
 
datachain/lib/udf.py CHANGED
@@ -16,7 +16,6 @@ from datachain.lib.convert.flatten import flatten
16
16
  from datachain.lib.data_model import DataValue
17
17
  from datachain.lib.file import File
18
18
  from datachain.lib.utils import AbstractUDF, DataChainError, DataChainParamsError
19
- from datachain.progress import CombinedDownloadCallback
20
19
  from datachain.query.batch import (
21
20
  Batch,
22
21
  BatchingStrategy,
@@ -327,8 +326,9 @@ def _prefetch_inputs(
327
326
 
328
327
  if after_prefetch is None:
329
328
  after_prefetch = noop
330
- if isinstance(download_cb, CombinedDownloadCallback):
331
- after_prefetch = download_cb.increment_file_count
329
+ if download_cb and hasattr(download_cb, "increment_file_count"):
330
+ increment_file_count: Callable[[], None] = download_cb.increment_file_count
331
+ after_prefetch = increment_file_count
332
332
 
333
333
  f = partial(_prefetch_input, download_cb=download_cb, after_prefetch=after_prefetch)
334
334
  mapper = AsyncMapper(f, prepared_inputs, workers=prefetch)
datachain/listing.py CHANGED
@@ -27,14 +27,14 @@ class Listing:
27
27
  client: "Client",
28
28
  dataset_name: Optional["str"] = None,
29
29
  dataset_version: Optional[int] = None,
30
- object_name: str = "file",
30
+ column: str = "file",
31
31
  ):
32
32
  self.metastore = metastore
33
33
  self.warehouse = warehouse
34
34
  self.client = client
35
35
  self.dataset_name = dataset_name # dataset representing bucket listing
36
36
  self.dataset_version = dataset_version # dataset representing bucket listing
37
- self.object_name = object_name
37
+ self.column = column
38
38
 
39
39
  def clone(self) -> "Listing":
40
40
  return self.__class__(
@@ -43,7 +43,7 @@ class Listing:
43
43
  self.client,
44
44
  self.dataset_name,
45
45
  self.dataset_version,
46
- self.object_name,
46
+ self.column,
47
47
  )
48
48
 
49
49
  def __enter__(self) -> "Listing":
@@ -74,7 +74,7 @@ class Listing:
74
74
  return self.warehouse.dataset_rows(
75
75
  dataset,
76
76
  self.dataset_version or dataset.latest_version,
77
- object_name=self.object_name,
77
+ column=self.column,
78
78
  )
79
79
 
80
80
  def expand_path(self, path, use_glob=True) -> list[Node]:
@@ -4,9 +4,8 @@ from itertools import chain
4
4
  from multiprocessing import cpu_count
5
5
  from sys import stdin
6
6
  from threading import Timer
7
- from typing import TYPE_CHECKING, Optional
7
+ from typing import TYPE_CHECKING, Literal, Optional
8
8
 
9
- import attrs
10
9
  import multiprocess
11
10
  from cloudpickle import load, loads
12
11
  from fsspec.callbacks import DEFAULT_CALLBACK, Callback
@@ -60,6 +59,7 @@ def udf_entrypoint() -> int:
60
59
  query = udf_info["query"]
61
60
  rows_total = udf_info["rows_total"]
62
61
  batching = udf_info["batching"]
62
+ is_generator = udf_info["is_generator"]
63
63
  n_workers = udf_info["processes"]
64
64
  if n_workers is True:
65
65
  n_workers = None # Use default number of CPUs (cores)
@@ -72,17 +72,20 @@ def udf_entrypoint() -> int:
72
72
  ) as udf_inputs:
73
73
  download_cb = get_download_callback()
74
74
  processed_cb = get_processed_callback()
75
+ generated_cb = get_generated_callback(is_generator)
75
76
  try:
76
77
  dispatch.run_udf_parallel(
77
78
  udf_inputs,
78
79
  rows_total=rows_total,
79
80
  n_workers=n_workers,
80
- processed_cb=processed_cb,
81
81
  download_cb=download_cb,
82
+ processed_cb=processed_cb,
83
+ generated_cb=generated_cb,
82
84
  )
83
85
  finally:
84
86
  download_cb.close()
85
87
  processed_cb.close()
88
+ generated_cb.close()
86
89
 
87
90
  return 0
88
91
 
@@ -128,7 +131,6 @@ class UDFDispatcher:
128
131
  self.done_queue,
129
132
  self.query,
130
133
  self.table,
131
- self.is_generator,
132
134
  self.is_batching,
133
135
  self.cache,
134
136
  self.udf_fields,
@@ -152,16 +154,14 @@ class UDFDispatcher:
152
154
  for _ in range(n_workers):
153
155
  put_into_queue(task_queue, STOP_SIGNAL)
154
156
 
155
- def create_input_queue(self):
156
- return self.ctx.Queue()
157
-
158
157
  def run_udf_parallel( # noqa: C901, PLR0912
159
158
  self,
160
159
  input_rows: Iterable[RowsOutput],
161
160
  rows_total: int,
162
161
  n_workers: Optional[int] = None,
163
- processed_cb: Callback = DEFAULT_CALLBACK,
164
162
  download_cb: Callback = DEFAULT_CALLBACK,
163
+ processed_cb: Callback = DEFAULT_CALLBACK,
164
+ generated_cb: Callback = DEFAULT_CALLBACK,
165
165
  ) -> None:
166
166
  n_workers = get_n_workers_from_arg(n_workers)
167
167
 
@@ -214,6 +214,8 @@ class UDFDispatcher:
214
214
  download_cb.relative_update(downloaded)
215
215
  if processed := result.get("processed"):
216
216
  processed_cb.relative_update(processed)
217
+ if generated := result.get("generated"):
218
+ generated_cb.relative_update(generated)
217
219
 
218
220
  status = result["status"]
219
221
  if status in (OK_STATUS, NOTIFY_STATUS):
@@ -260,46 +262,61 @@ class UDFDispatcher:
260
262
  p.join()
261
263
 
262
264
 
263
- class WorkerCallback(Callback):
264
- def __init__(self, queue: "multiprocess.Queue"):
265
+ class DownloadCallback(Callback):
266
+ def __init__(self, queue: "multiprocess.Queue") -> None:
265
267
  self.queue = queue
266
268
  super().__init__()
267
269
 
268
270
  def relative_update(self, inc: int = 1) -> None:
271
+ # This callback is used to notify the size of the downloaded files
272
+ pass
273
+
274
+ def increment_file_count(self, inc: int = 1) -> None:
269
275
  put_into_queue(self.queue, {"status": NOTIFY_STATUS, "downloaded": inc})
270
276
 
271
277
 
272
278
  class ProcessedCallback(Callback):
273
- def __init__(self):
274
- self.processed_rows: Optional[int] = None
279
+ def __init__(
280
+ self,
281
+ name: Literal["processed", "generated"],
282
+ queue: "multiprocess.Queue",
283
+ ) -> None:
284
+ self.name = name
285
+ self.queue = queue
275
286
  super().__init__()
276
287
 
277
288
  def relative_update(self, inc: int = 1) -> None:
278
- self.processed_rows = inc
289
+ put_into_queue(self.queue, {"status": NOTIFY_STATUS, self.name: inc})
279
290
 
280
291
 
281
- @attrs.define
282
292
  class UDFWorker:
283
- catalog: "Catalog"
284
- udf: "UDFAdapter"
285
- task_queue: "multiprocess.Queue"
286
- done_queue: "multiprocess.Queue"
287
- query: "Select"
288
- table: "Table"
289
- is_generator: bool
290
- is_batching: bool
291
- cache: bool
292
- udf_fields: Sequence[str]
293
- cb: Callback = attrs.field()
294
-
295
- @cb.default
296
- def _default_callback(self) -> WorkerCallback:
297
- return WorkerCallback(self.done_queue)
293
+ def __init__(
294
+ self,
295
+ catalog: "Catalog",
296
+ udf: "UDFAdapter",
297
+ task_queue: "multiprocess.Queue",
298
+ done_queue: "multiprocess.Queue",
299
+ query: "Select",
300
+ table: "Table",
301
+ is_batching: bool,
302
+ cache: bool,
303
+ udf_fields: Sequence[str],
304
+ ) -> None:
305
+ self.catalog = catalog
306
+ self.udf = udf
307
+ self.task_queue = task_queue
308
+ self.done_queue = done_queue
309
+ self.query = query
310
+ self.table = table
311
+ self.is_batching = is_batching
312
+ self.cache = cache
313
+ self.udf_fields = udf_fields
314
+
315
+ self.download_cb = DownloadCallback(self.done_queue)
316
+ self.processed_cb = ProcessedCallback("processed", self.done_queue)
317
+ self.generated_cb = ProcessedCallback("generated", self.done_queue)
298
318
 
299
319
  def run(self) -> None:
300
- processed_cb = ProcessedCallback()
301
- generated_cb = get_generated_callback(self.is_generator)
302
-
303
320
  prefetch = self.udf.prefetch
304
321
  with _get_cache(self.catalog.cache, prefetch, use_cache=self.cache) as _cache:
305
322
  catalog = clone_catalog_with_cache(self.catalog, _cache)
@@ -308,29 +325,22 @@ class UDFWorker:
308
325
  self.get_inputs(),
309
326
  catalog,
310
327
  self.cache,
311
- download_cb=self.cb,
312
- processed_cb=processed_cb,
328
+ download_cb=self.download_cb,
329
+ processed_cb=self.processed_cb,
313
330
  )
314
331
  with safe_closing(udf_results):
315
332
  process_udf_outputs(
316
333
  catalog.warehouse,
317
334
  self.table,
318
- self.notify_and_process(udf_results, processed_cb),
335
+ self.notify_and_process(udf_results),
319
336
  self.udf,
320
- cb=generated_cb,
337
+ cb=self.generated_cb,
321
338
  )
339
+ put_into_queue(self.done_queue, {"status": FINISHED_STATUS})
322
340
 
323
- put_into_queue(
324
- self.done_queue,
325
- {"status": FINISHED_STATUS, "processed": processed_cb.processed_rows},
326
- )
327
-
328
- def notify_and_process(self, udf_results, processed_cb):
341
+ def notify_and_process(self, udf_results):
329
342
  for row in udf_results:
330
- put_into_queue(
331
- self.done_queue,
332
- {"status": OK_STATUS, "processed": processed_cb.processed_rows},
333
- )
343
+ put_into_queue(self.done_queue, {"status": OK_STATUS})
334
344
  yield row
335
345
 
336
346
  def get_inputs(self):
@@ -100,6 +100,10 @@ class Session:
100
100
  def get_temp_prefix(self) -> str:
101
101
  return f"{self.DATASET_PREFIX}{self.name}_"
102
102
 
103
+ @classmethod
104
+ def is_temp_dataset(cls, name) -> bool:
105
+ return name.startswith(cls.DATASET_PREFIX)
106
+
103
107
  def _cleanup_temp_datasets(self) -> None:
104
108
  prefix = self.get_temp_prefix()
105
109
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.14.3
3
+ Version: 0.14.5
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -171,7 +171,7 @@ high confidence scores.
171
171
 
172
172
  import datachain as dc
173
173
 
174
- meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", object_name="meta", anon=True)
174
+ meta = dc.read_json("gs://datachain-demo/dogs-and-cats/*json", column="meta", anon=True)
175
175
  images = dc.read_storage("gs://datachain-demo/dogs-and-cats/*jpg", anon=True)
176
176
 
177
177
  images_id = images.map(id=lambda file: file.path.split('.')[-2])
@@ -213,7 +213,7 @@ Python code:
213
213
  return result.lower().startswith("success")
214
214
 
215
215
  chain = (
216
- dc.read_storage("gs://datachain-demo/chatbot-KiT/", object_name="file", anon=True)
216
+ dc.read_storage("gs://datachain-demo/chatbot-KiT/", column="file", anon=True)
217
217
  .settings(parallel=4, cache=True)
218
218
  .map(is_success=eval_dialogue)
219
219
  .save("mistral_files")
@@ -6,7 +6,7 @@ datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
6
6
  datachain/dataset.py,sha256=ZfgsGlddTXsSqCohNSRSChdH6Jjw7wrkso1Am166k-M,19391
7
7
  datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
8
8
  datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
9
- datachain/listing.py,sha256=HNB-xeKA6aUA-HTWr--H22S6jVOxP2OVQ-3d07ISqAk,7109
9
+ datachain/listing.py,sha256=kNSCFYWo2iM1wWg1trwq4WpYZxYqz4RKxkTtsppEzAw,7079
10
10
  datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
11
11
  datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
12
12
  datachain/nodes_thread_pool.py,sha256=mdo0s-VybuSZkRUARcUO4Tjh8KFfZr9foHqmupx2SmM,3989
@@ -17,7 +17,7 @@ datachain/studio.py,sha256=9MEpFPLKI3gG4isKklcfD5BMLeNsSXhtOUboOjW4Fdc,10017
17
17
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
18
18
  datachain/utils.py,sha256=8Qz8lRrX0bUTGvwYd-OR-l6ElVRsQBdBO5QMvwt56T4,15190
19
19
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
20
- datachain/catalog/catalog.py,sha256=k-okQ4aqoyWrsNlDeCz6jP6TNRiZCUENbGV9Sz6EEtw,60729
20
+ datachain/catalog/catalog.py,sha256=05_JplTuoyqDWtxUeu324ogaHVqXGPSaPxtUXtuMljk,60682
21
21
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
22
22
  datachain/catalog/loader.py,sha256=wCOWeDwuFNKr_frZRkqTZhkCAiB0CBCRJio3LF2zKPA,5765
23
23
  datachain/cli/__init__.py,sha256=YPVkuQ7IezNhtzo5xrfca1hEIiZtFxOlJCOzAOEuxmA,8335
@@ -46,10 +46,10 @@ datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWT
46
46
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
47
47
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
48
48
  datachain/data_storage/metastore.py,sha256=19LP15xT2Fmz0aIZ1sIajq8i1-KnFgCBEZeU2Ka9-mc,37780
49
- datachain/data_storage/schema.py,sha256=qSukry2kINhVw8aj5lQrpe7N90DFeatKIKmDh6jAzR8,9515
49
+ datachain/data_storage/schema.py,sha256=asZYz1cg_WKfe2Q-k5W51E2z2CzHU5B4QEDZDMFr8yo,9346
50
50
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
51
- datachain/data_storage/sqlite.py,sha256=KJ8hI0Hrwv9eAA-nLUlw2AYCQxiAAZ12a-ftUBtroNQ,24545
52
- datachain/data_storage/warehouse.py,sha256=7awrmqBgcqKfJFuZG4WRpjZLdRCTBwbNDvtL3XakR8c,31145
51
+ datachain/data_storage/sqlite.py,sha256=f4tvq0gzYQP7aYGnfL3j4IBUNvctpBxI_ioFU-B1LFc,24540
52
+ datachain/data_storage/warehouse.py,sha256=0GQdf7BwGav783zhB-RBKwjUqkLf2JW2lS-KXTqimnQ,31083
53
53
  datachain/diff/__init__.py,sha256=YkGdiDbZIMhAZ2SJ4eSe00HU67VP1P6SL2L_t0ODYMs,9425
54
54
  datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
@@ -69,20 +69,20 @@ datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
69
  datachain/lib/arrow.py,sha256=9UBCF-lftQaz0yxdsjbLKbyzVSmrF_QSWdhp2oBDPqs,9486
70
70
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
71
71
  datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
72
- datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
72
+ datachain/lib/dataset_info.py,sha256=Jnjy7vq4iNVkq1e-SYjqxdojlxIDXvZ352NCLLZg59k,2633
73
73
  datachain/lib/file.py,sha256=HLQXS_WULm7Y-fkHMy0WpibVAcrkLPRS6CrZy6rwFe0,30450
74
74
  datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
75
75
  datachain/lib/image.py,sha256=butvUY_33PVEYPKX2nVCPeJjJVcBaptZwsE9REQsTS8,3247
76
- datachain/lib/listing.py,sha256=O29s7H-2rqjHHGKWkKGNNXlo2zynv4pygVTKImpV8fo,7046
76
+ datachain/lib/listing.py,sha256=qmie3z8UR5bdXiZnMo3Qy5IpT76vFu5gn0x2ksMi-y8,7036
77
77
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
78
78
  datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A,6349
79
79
  datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
80
80
  datachain/lib/pytorch.py,sha256=YS6yR13iVlrAXo5wzJswFFUHwWOql9KTdWIa86DXB-k,7712
81
81
  datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
82
- datachain/lib/signal_schema.py,sha256=DRatqSG7OVtCUCWyZvMXe4m7r7XFO6NCfzsJRDErMtg,35185
82
+ datachain/lib/signal_schema.py,sha256=uIBHYXtu_XpLbOUVC-kq-GduEOCfz9hQORi9ZG3JFqo,35820
83
83
  datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
84
84
  datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
85
- datachain/lib/udf.py,sha256=h38a457xg-4wO2XcxPs4pzDq8JxTmYm4N84iAf0HRzY,16168
85
+ datachain/lib/udf.py,sha256=JJwjvy41N65PtWGUAq7TYnhdOOR6RiMDUJEKl5xtwLc,16199
86
86
  datachain/lib/udf_signature.py,sha256=2EtsOPDNSPqcOlYwqbCdy6RF5MldI-7smii8aLy8p7Y,7543
87
87
  datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
88
88
  datachain/lib/video.py,sha256=suH_8Mi8VYk4-IVb1vjSduF_njs64ji1WGKHxDLnGYw,6629
@@ -95,18 +95,18 @@ datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji
95
95
  datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sDcLS6s,2010
96
96
  datachain/lib/convert/values_to_tuples.py,sha256=EFfIGBiVVltJQG8blzsQ1dGXneh4D3wdLfSUeoK10OI,3931
97
97
  datachain/lib/dc/__init__.py,sha256=6rKKHS6MA3mS6UJXiysrv4TURs4R_UWAQK2tJ2t1QMs,743
98
- datachain/lib/dc/csv.py,sha256=d0ULzpsTTeqp_eM-2jVHb1kYHQN2lJFf4O6LWd5tOJw,4401
99
- datachain/lib/dc/datachain.py,sha256=hwuAElfEhRLyh-Uvuc7YIpFx6nsI_B90xwnMqgkkgrI,76390
100
- datachain/lib/dc/datasets.py,sha256=hTzq18Ij9kpOAJOU-VN4-VyThTTxLSWLfVIk3bgzAPs,4329
101
- datachain/lib/dc/hf.py,sha256=I1vFNOa1C87lBuBj5FHENLY2jTaQ8erngiX0cyBmOp4,2170
102
- datachain/lib/dc/json.py,sha256=9ei9ZNzWVXZWD4HNGTfBhcoLPnXBBDywKV-3Wi1mT28,2725
103
- datachain/lib/dc/listings.py,sha256=qPy1DTvYkbNICT1ujo8LwezzMEW8E3dln1knw7Jwl0I,1044
104
- datachain/lib/dc/pandas.py,sha256=jJvgNPPjiSLAjdYlhI4fvGKNWRh-hbMgZyBlURS633E,1249
105
- datachain/lib/dc/parquet.py,sha256=lXCSr_S7bQsPUWq1pJ-Ur8R8RxArjyFpCpBXK-aorQw,1809
98
+ datachain/lib/dc/csv.py,sha256=asWPAxhMgIoLAdD2dObDlnGL8CTSD3TAuFuM4ci89bQ,4374
99
+ datachain/lib/dc/datachain.py,sha256=PDkB1fvmokJr-Tmyn0CuFGgZSxPn25FMjjUVHbrx6-c,76326
100
+ datachain/lib/dc/datasets.py,sha256=K-GCTZ6Ps_XNpzKz19my8VijXb-b0b3eZASoavKk1Uc,5157
101
+ datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
102
+ datachain/lib/dc/json.py,sha256=ZUThPDAaP2gBFIL5vsQTwKBcuN_dhvC_O44wdDv0jEc,2683
103
+ datachain/lib/dc/listings.py,sha256=2na9v63xO1vPUNaoBSzA-TSN49V7zQAb-4iS1wOPLFE,1029
104
+ datachain/lib/dc/pandas.py,sha256=mM2y44s1-3dwkxjVe6RdfT6PVoeRHS9OgsGaSz4YsqQ,1219
105
+ datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
106
106
  datachain/lib/dc/records.py,sha256=DOFkQV7A7kZnMiCS4mHOzee2ibWIhz-mWQpgVsU78SE,2524
107
- datachain/lib/dc/storage.py,sha256=kM3Ix2L0j01a4XcXPZpdDxvici9yu-Ks-Cd3uf_qESA,5327
107
+ datachain/lib/dc/storage.py,sha256=QLf3-xMV2Gmy3AA8qF9WqAsb7R8Rk87l4s5hBoiCH98,5285
108
108
  datachain/lib/dc/utils.py,sha256=Ct-0FqCaDhNWHx09gJFcCXJGPjMI-VZr4t-GJyqTi44,3984
109
- datachain/lib/dc/values.py,sha256=HaABQKmhgW-N1pcBn7CQuTIiOFXYVjU1H9LbupGM3WQ,1409
109
+ datachain/lib/dc/values.py,sha256=cBQubhmPNEDMJldUXzGh-UKbdim4P6O2B91Gp39roKw,1389
110
110
  datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
111
111
  datachain/model/bbox.py,sha256=cQNHuQuVsh6bW3n3Hj40F2Cc20cExQ9Lg_q7R2jxUMI,9324
112
112
  datachain/model/pose.py,sha256=rjquA6M-I-Y30Xm6YSkGv1OY52hJZmR2AuxbIpE5uD0,3865
@@ -119,12 +119,12 @@ datachain/model/ultralytics/segment.py,sha256=koq1HASo29isf0in6oSlzmU4IzsmOXe87F
119
119
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
120
120
  datachain/query/batch.py,sha256=6w8gzLTmLeylststu-gT5jIqEfi4-djS7_yTYyeo-fw,4190
121
121
  datachain/query/dataset.py,sha256=caUsFzaVZXOz8NmeTMeOdyRQLQP8KCnxYMxF-pG4yFQ,58712
122
- datachain/query/dispatch.py,sha256=T4vdJE0k3Ff1osaQzYTC_2gOOkT0mXwKzNy-5aZcrTE,12300
122
+ datachain/query/dispatch.py,sha256=ErdK-biHYhRLDsm7on6vAHSjX-hAHgEHsBRHmuMS_4E,12979
123
123
  datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
124
124
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
125
125
  datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
126
126
  datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
127
- datachain/query/session.py,sha256=I1KG8jDIaxGAfRfDRucMx8DqsANf_VYWtwtXjeD19lI,6399
127
+ datachain/query/session.py,sha256=wNdOHAi4HrsEihfzdcTlfB5i1xyj0dw6rlUz84StOoU,6512
128
128
  datachain/query/udf.py,sha256=ljAYaF-J77t7iS4zc1-g1ssYd4c6Q-ccKGEc3VQQmeM,1322
129
129
  datachain/query/utils.py,sha256=u0A_BwG9PNs0DxoDcvSWgWLpj3ByTUv8CqH13CIuGag,1293
130
130
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -150,9 +150,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
150
150
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
151
151
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
152
152
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
153
- datachain-0.14.3.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
154
- datachain-0.14.3.dist-info/METADATA,sha256=hTVICGrF_sALHSr7uuAipFr5HVrgydiP2JcDlZ-Q_w0,11338
155
- datachain-0.14.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
156
- datachain-0.14.3.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
157
- datachain-0.14.3.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
158
- datachain-0.14.3.dist-info/RECORD,,
153
+ datachain-0.14.5.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
154
+ datachain-0.14.5.dist-info/METADATA,sha256=y6sL0tB9tFRXF_LnjkPLM7cmtBBhXWxTvtNWRnmgfb4,11328
155
+ datachain-0.14.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
156
+ datachain-0.14.5.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
157
+ datachain-0.14.5.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
158
+ datachain-0.14.5.dist-info/RECORD,,