datachain 0.8.9__py3-none-any.whl → 0.8.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (38) hide show
  1. datachain/cache.py +4 -4
  2. datachain/catalog/__init__.py +0 -2
  3. datachain/catalog/catalog.py +102 -138
  4. datachain/cli/__init__.py +9 -9
  5. datachain/cli/parser/__init__.py +36 -20
  6. datachain/cli/parser/job.py +1 -1
  7. datachain/cli/parser/studio.py +35 -34
  8. datachain/cli/parser/utils.py +19 -1
  9. datachain/cli/utils.py +1 -1
  10. datachain/client/fsspec.py +11 -8
  11. datachain/client/local.py +4 -4
  12. datachain/data_storage/schema.py +1 -1
  13. datachain/data_storage/sqlite.py +38 -7
  14. datachain/data_storage/warehouse.py +2 -2
  15. datachain/dataset.py +1 -1
  16. datachain/error.py +12 -0
  17. datachain/func/__init__.py +2 -1
  18. datachain/func/conditional.py +67 -23
  19. datachain/func/func.py +17 -5
  20. datachain/lib/convert/python_to_sql.py +15 -3
  21. datachain/lib/dc.py +27 -5
  22. datachain/lib/file.py +16 -0
  23. datachain/lib/listing.py +30 -12
  24. datachain/lib/pytorch.py +1 -1
  25. datachain/lib/udf.py +1 -1
  26. datachain/listing.py +1 -13
  27. datachain/node.py +0 -15
  28. datachain/nodes_fetcher.py +2 -2
  29. datachain/query/dataset.py +8 -4
  30. datachain/remote/studio.py +3 -3
  31. datachain/sql/sqlite/base.py +35 -14
  32. datachain/studio.py +8 -8
  33. {datachain-0.8.9.dist-info → datachain-0.8.11.dist-info}/METADATA +3 -7
  34. {datachain-0.8.9.dist-info → datachain-0.8.11.dist-info}/RECORD +38 -38
  35. {datachain-0.8.9.dist-info → datachain-0.8.11.dist-info}/LICENSE +0 -0
  36. {datachain-0.8.9.dist-info → datachain-0.8.11.dist-info}/WHEEL +0 -0
  37. {datachain-0.8.9.dist-info → datachain-0.8.11.dist-info}/entry_points.txt +0 -0
  38. {datachain-0.8.9.dist-info → datachain-0.8.11.dist-info}/top_level.txt +0 -0
datachain/lib/dc.py CHANGED
@@ -25,6 +25,7 @@ from sqlalchemy.sql.functions import GenericFunction
25
25
  from sqlalchemy.sql.sqltypes import NullType
26
26
 
27
27
  from datachain.dataset import DatasetRecord
28
+ from datachain.func import literal
28
29
  from datachain.func.base import Function
29
30
  from datachain.func.func import Func
30
31
  from datachain.lib.convert.python_to_sql import python_to_sql
@@ -1129,8 +1130,12 @@ class DataChain:
1129
1130
  )
1130
1131
  ```
1131
1132
  """
1133
+ primitives = (bool, str, int, float)
1134
+
1132
1135
  for col_name, expr in kwargs.items():
1133
- if not isinstance(expr, (Column, Func)) and isinstance(expr.type, NullType):
1136
+ if not isinstance(expr, (*primitives, Column, Func)) and isinstance(
1137
+ expr.type, NullType
1138
+ ):
1134
1139
  raise DataChainColumnError(
1135
1140
  col_name, f"Cannot infer type with expression {expr}"
1136
1141
  )
@@ -1145,6 +1150,11 @@ class DataChain:
1145
1150
  elif isinstance(value, Func):
1146
1151
  # adding new signal
1147
1152
  mutated[name] = value.get_column(schema)
1153
+ elif isinstance(value, primitives):
1154
+ # adding simple python constant primitives like str, int, float, bool
1155
+ val = literal(value)
1156
+ val.type = python_to_sql(type(value))()
1157
+ mutated[name] = val # type: ignore[assignment]
1148
1158
  else:
1149
1159
  # adding new signal
1150
1160
  mutated[name] = value
@@ -1332,6 +1342,7 @@ class DataChain:
1332
1342
  on: Union[MergeColType, Sequence[MergeColType]],
1333
1343
  right_on: Optional[Union[MergeColType, Sequence[MergeColType]]] = None,
1334
1344
  inner=False,
1345
+ full=False,
1335
1346
  rname="right_",
1336
1347
  ) -> "Self":
1337
1348
  """Merge two chains based on the specified criteria.
@@ -1345,6 +1356,7 @@ class DataChain:
1345
1356
  right_on: Optional predicate or list of Predicates for the `right_ds`
1346
1357
  to join.
1347
1358
  inner (bool): Whether to run inner join or outer join.
1359
+ full (bool): Whether to run full outer join.
1348
1360
  rname (str): Name prefix for conflicting signal names.
1349
1361
 
1350
1362
  Examples:
@@ -1419,7 +1431,7 @@ class DataChain:
1419
1431
  )
1420
1432
 
1421
1433
  query = self._query.join(
1422
- right_ds._query, sqlalchemy.and_(*ops), inner, rname + "{name}"
1434
+ right_ds._query, sqlalchemy.and_(*ops), inner, full, rname + "{name}"
1423
1435
  )
1424
1436
  query.feature_schema = None
1425
1437
  ds = self._evolve(query=query)
@@ -1940,7 +1952,7 @@ class DataChain:
1940
1952
  def from_csv(
1941
1953
  cls,
1942
1954
  path,
1943
- delimiter: str = ",",
1955
+ delimiter: Optional[str] = None,
1944
1956
  header: bool = True,
1945
1957
  output: OutputType = None,
1946
1958
  object_name: str = "",
@@ -1950,6 +1962,7 @@ class DataChain:
1950
1962
  session: Optional[Session] = None,
1951
1963
  settings: Optional[dict] = None,
1952
1964
  column_types: Optional[dict[str, "Union[str, ArrowDataType]"]] = None,
1965
+ parse_options: Optional[dict[str, "Union[str, Union[bool, Callable]]"]] = None,
1953
1966
  **kwargs,
1954
1967
  ) -> "DataChain":
1955
1968
  """Generate chain from csv files.
@@ -1957,7 +1970,8 @@ class DataChain:
1957
1970
  Parameters:
1958
1971
  path : Storage URI with directory. URI must start with storage prefix such
1959
1972
  as `s3://`, `gs://`, `az://` or "file:///".
1960
- delimiter : Character for delimiting columns.
1973
+ delimiter : Character for delimiting columns. Takes precedence if also
1974
+ specified in `parse_options`. Defaults to ",".
1961
1975
  header : Whether the files include a header row.
1962
1976
  output : Dictionary or feature class defining column names and their
1963
1977
  corresponding types. List of column names is also accepted, in which
@@ -1971,6 +1985,8 @@ class DataChain:
1971
1985
  column_types : Dictionary of column names and their corresponding types.
1972
1986
  It is passed to CSV reader and for each column specified type auto
1973
1987
  inference is disabled.
1988
+ parse_options: Tells the parser how to process lines.
1989
+ See https://arrow.apache.org/docs/python/generated/pyarrow.csv.ParseOptions.html
1974
1990
 
1975
1991
  Example:
1976
1992
  Reading a csv file:
@@ -1988,6 +2004,12 @@ class DataChain:
1988
2004
  from pyarrow.dataset import CsvFileFormat
1989
2005
  from pyarrow.lib import type_for_alias
1990
2006
 
2007
+ parse_options = parse_options or {}
2008
+ if "delimiter" not in parse_options:
2009
+ parse_options["delimiter"] = ","
2010
+ if delimiter:
2011
+ parse_options["delimiter"] = delimiter
2012
+
1991
2013
  if column_types:
1992
2014
  column_types = {
1993
2015
  name: type_for_alias(typ) if isinstance(typ, str) else typ
@@ -2015,7 +2037,7 @@ class DataChain:
2015
2037
  msg = f"error parsing csv - incompatible output type {type(output)}"
2016
2038
  raise DatasetPrepareError(chain.name, msg)
2017
2039
 
2018
- parse_options = ParseOptions(delimiter=delimiter)
2040
+ parse_options = ParseOptions(**parse_options)
2019
2041
  read_options = ReadOptions(column_names=column_names)
2020
2042
  convert_options = ConvertOptions(
2021
2043
  strings_can_be_null=True,
datachain/lib/file.py CHANGED
@@ -190,6 +190,22 @@ class File(DataModel):
190
190
  self._catalog = None
191
191
  self._caching_enabled: bool = False
192
192
 
193
+ @classmethod
194
+ def upload(
195
+ cls, data: bytes, path: str, catalog: Optional["Catalog"] = None
196
+ ) -> "File":
197
+ if catalog is None:
198
+ from datachain.catalog.loader import get_catalog
199
+
200
+ catalog = get_catalog()
201
+
202
+ parent, name = posixpath.split(path)
203
+
204
+ client = catalog.get_client(parent)
205
+ file = client.upload(data, name)
206
+ file._set_stream(catalog)
207
+ return file
208
+
193
209
  @classmethod
194
210
  def _from_row(cls, row: "RowDict") -> "Self":
195
211
  return cls(**{key: row[key] for key in cls._datachain_column_types})
datachain/lib/listing.py CHANGED
@@ -1,3 +1,5 @@
1
+ import logging
2
+ import os
1
3
  import posixpath
2
4
  from collections.abc import Iterator
3
5
  from typing import TYPE_CHECKING, Callable, Optional, TypeVar
@@ -7,6 +9,7 @@ from sqlalchemy.sql.expression import true
7
9
 
8
10
  from datachain.asyn import iter_over_async
9
11
  from datachain.client import Client
12
+ from datachain.error import REMOTE_ERRORS, ClientError
10
13
  from datachain.lib.file import File
11
14
  from datachain.query.schema import Column
12
15
  from datachain.sql.functions import path as pathfunc
@@ -22,6 +25,10 @@ LISTING_PREFIX = "lst__" # listing datasets start with this name
22
25
 
23
26
  D = TypeVar("D", bound="DataChain")
24
27
 
28
+ # Disable warnings for remote errors in clients
29
+ logging.getLogger("aiobotocore.credentials").setLevel(logging.CRITICAL)
30
+ logging.getLogger("gcsfs").setLevel(logging.CRITICAL)
31
+
25
32
 
26
33
  def list_bucket(uri: str, cache, client_config=None) -> Callable:
27
34
  """
@@ -90,6 +97,15 @@ def _isfile(client: "Client", path: str) -> bool:
90
97
  Returns True if uri points to a file
91
98
  """
92
99
  try:
100
+ if "://" in path:
101
+ # This makes sure that the uppercase scheme is converted to lowercase
102
+ scheme, path = path.split("://", 1)
103
+ path = f"{scheme.lower()}://{path}"
104
+
105
+ if os.name == "nt" and "*" in path:
106
+ # On Windows, the glob pattern "*" is not supported
107
+ return False
108
+
93
109
  info = client.fs.info(path)
94
110
  name = info.get("name")
95
111
  # case for special simulated directories on some clouds
@@ -99,21 +115,21 @@ def _isfile(client: "Client", path: str) -> bool:
99
115
  return False
100
116
 
101
117
  return info["type"] == "file"
102
- except: # noqa: E722
118
+ except FileNotFoundError:
103
119
  return False
120
+ except REMOTE_ERRORS as e:
121
+ raise ClientError(
122
+ message=str(e),
123
+ error_code=getattr(e, "code", None),
124
+ ) from e
104
125
 
105
126
 
106
- def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], str, str]:
127
+ def parse_listing_uri(uri: str, client_config) -> tuple[str, str, str]:
107
128
  """
108
129
  Parsing uri and returns listing dataset name, listing uri and listing path
109
130
  """
110
131
  client_config = client_config or {}
111
- client = Client.get_client(uri, cache, **client_config)
112
132
  storage_uri, path = Client.parse_url(uri)
113
- telemetry.log_param("client", client.PREFIX)
114
-
115
- if not uri.endswith("/") and _isfile(client, uri):
116
- return None, f"{storage_uri}/{path.lstrip('/')}", path
117
133
  if uses_glob(path):
118
134
  lst_uri_path = posixpath.dirname(path)
119
135
  else:
@@ -157,13 +173,15 @@ def get_listing(
157
173
  client_config = catalog.client_config
158
174
 
159
175
  client = Client.get_client(uri, cache, **client_config)
160
- ds_name, list_uri, list_path = parse_listing_uri(uri, cache, client_config)
161
- listing = None
176
+ telemetry.log_param("client", client.PREFIX)
162
177
 
163
- # if we don't want to use cached dataset (e.g. for a single file listing)
164
- if not ds_name:
165
- return None, list_uri, list_path, False
178
+ # we don't want to use cached dataset (e.g. for a single file listing)
179
+ if not uri.endswith("/") and _isfile(client, uri):
180
+ storage_uri, path = Client.parse_url(uri)
181
+ return None, f"{storage_uri}/{path.lstrip('/')}", path, False
166
182
 
183
+ ds_name, list_uri, list_path = parse_listing_uri(uri, client_config)
184
+ listing = None
167
185
  listings = [
168
186
  ls for ls in catalog.listings() if not ls.is_expired and ls.contains(ds_name)
169
187
  ]
datachain/lib/pytorch.py CHANGED
@@ -23,7 +23,7 @@ from datachain.query.dataset import get_download_callback
23
23
  if TYPE_CHECKING:
24
24
  from torchvision.transforms.v2 import Transform
25
25
 
26
- from datachain.cache import DataChainCache as Cache
26
+ from datachain.cache import Cache
27
27
 
28
28
 
29
29
  logger = logging.getLogger("datachain")
datachain/lib/udf.py CHANGED
@@ -32,7 +32,7 @@ if TYPE_CHECKING:
32
32
 
33
33
  from typing_extensions import Self
34
34
 
35
- from datachain.cache import DataChainCache as Cache
35
+ from datachain.cache import Cache
36
36
  from datachain.catalog import Catalog
37
37
  from datachain.lib.signal_schema import SignalSchema
38
38
  from datachain.lib.udf_signature import UdfSignature
datachain/listing.py CHANGED
@@ -2,7 +2,6 @@ import glob
2
2
  import os
3
3
  from collections.abc import Iterable, Iterator
4
4
  from functools import cached_property
5
- from itertools import zip_longest
6
5
  from typing import TYPE_CHECKING, Optional
7
6
 
8
7
  from sqlalchemy import Column
@@ -101,11 +100,8 @@ class Listing:
101
100
  copy_to_filename: Optional[str],
102
101
  recursive=False,
103
102
  copy_dir_contents=False,
104
- relative_path=None,
105
- from_edatachain=False,
106
103
  from_dataset=False,
107
104
  ) -> list[NodeWithPath]:
108
- rel_path_elements = relative_path.split("/") if relative_path else []
109
105
  all_nodes: list[NodeWithPath] = []
110
106
  for src in sources:
111
107
  node = src.node
@@ -119,15 +115,7 @@ class Listing:
119
115
  )
120
116
  else:
121
117
  node_path = []
122
- if from_edatachain:
123
- for rpe, npe in zip_longest(
124
- rel_path_elements, node.path.split("/")
125
- ):
126
- if rpe == npe:
127
- continue
128
- if npe:
129
- node_path.append(npe)
130
- elif copy_to_filename:
118
+ if copy_to_filename:
131
119
  node_path = [os.path.basename(copy_to_filename)]
132
120
  elif from_dataset:
133
121
  node_path = [
datachain/node.py CHANGED
@@ -84,18 +84,6 @@ class Node:
84
84
  fd.write(f" size: {self.size}\n")
85
85
  return size
86
86
 
87
- def get_metafile_data(self, path: str):
88
- data: dict[str, Any] = {
89
- "name": path,
90
- "etag": self.etag,
91
- }
92
- version = self.version
93
- if version:
94
- data["version"] = version
95
- data["last_modified"] = time_to_str(self.last_modified)
96
- data["size"] = self.size
97
- return data
98
-
99
87
  @property
100
88
  def full_path(self) -> str:
101
89
  if self.is_dir and self.path:
@@ -181,9 +169,6 @@ class NodeWithPath:
181
169
  def append_to_file(self, fd):
182
170
  return self.n.append_to_file(fd, "/".join(self.path))
183
171
 
184
- def get_metafile_data(self):
185
- return self.n.get_metafile_data("/".join(self.path))
186
-
187
172
  @property
188
173
  def full_path(self) -> str:
189
174
  path = "/".join(self.path)
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING
5
5
  from datachain.nodes_thread_pool import NodesThreadPool
6
6
 
7
7
  if TYPE_CHECKING:
8
- from datachain.cache import DataChainCache
8
+ from datachain.cache import Cache
9
9
  from datachain.client.fsspec import Client
10
10
  from datachain.node import Node
11
11
 
@@ -13,7 +13,7 @@ logger = logging.getLogger("datachain")
13
13
 
14
14
 
15
15
  class NodesFetcher(NodesThreadPool):
16
- def __init__(self, client: "Client", max_threads: int, cache: "DataChainCache"):
16
+ def __init__(self, client: "Client", max_threads: int, cache: "Cache"):
17
17
  super().__init__(max_threads)
18
18
  self.client = client
19
19
  self.cache = cache
@@ -875,6 +875,7 @@ class SQLJoin(Step):
875
875
  query2: "DatasetQuery"
876
876
  predicates: Union[JoinPredicateType, tuple[JoinPredicateType, ...]]
877
877
  inner: bool
878
+ full: bool
878
879
  rname: str
879
880
 
880
881
  def get_query(self, dq: "DatasetQuery", temp_tables: list[str]) -> sa.Subquery:
@@ -977,14 +978,14 @@ class SQLJoin(Step):
977
978
  self.validate_expression(join_expression, q1, q2)
978
979
 
979
980
  def q(*columns):
980
- join_query = self.catalog.warehouse.join(
981
+ return self.catalog.warehouse.join(
981
982
  q1,
982
983
  q2,
983
984
  join_expression,
984
985
  inner=self.inner,
986
+ full=self.full,
987
+ columns=columns,
985
988
  )
986
- return sqlalchemy.select(*columns).select_from(join_query)
987
- # return sqlalchemy.select(*subquery.c).select_from(subquery)
988
989
 
989
990
  return step_result(
990
991
  q,
@@ -1489,6 +1490,7 @@ class DatasetQuery:
1489
1490
  dataset_query: "DatasetQuery",
1490
1491
  predicates: Union[JoinPredicateType, Sequence[JoinPredicateType]],
1491
1492
  inner=False,
1493
+ full=False,
1492
1494
  rname="{name}_right",
1493
1495
  ) -> "Self":
1494
1496
  left = self.clone(new_table=False)
@@ -1504,7 +1506,9 @@ class DatasetQuery:
1504
1506
  if isinstance(predicates, (str, ColumnClause, ColumnElement))
1505
1507
  else tuple(predicates)
1506
1508
  )
1507
- new_query.steps = [SQLJoin(self.catalog, left, right, predicates, inner, rname)]
1509
+ new_query.steps = [
1510
+ SQLJoin(self.catalog, left, right, predicates, inner, full, rname)
1511
+ ]
1508
1512
  return new_query
1509
1513
 
1510
1514
  @detach
@@ -75,7 +75,7 @@ class StudioClient:
75
75
 
76
76
  if not token:
77
77
  raise DataChainError(
78
- "Studio token is not set. Use `datachain studio login` "
78
+ "Studio token is not set. Use `datachain auth login` "
79
79
  "or environment variable `DVC_STUDIO_TOKEN` to set it."
80
80
  )
81
81
 
@@ -105,7 +105,7 @@ class StudioClient:
105
105
  if not team:
106
106
  raise DataChainError(
107
107
  "Studio team is not set. "
108
- "Use `datachain studio team <team_name>` "
108
+ "Use `datachain auth team <team_name>` "
109
109
  "or environment variable `DVC_STUDIO_TEAM` to set it."
110
110
  "You can also set it in the config file as team under studio."
111
111
  )
@@ -375,7 +375,7 @@ class StudioClient:
375
375
  method="GET",
376
376
  )
377
377
 
378
- def upload_file(self, file_name: str, content: bytes) -> Response[FileUploadData]:
378
+ def upload_file(self, content: bytes, file_name: str) -> Response[FileUploadData]:
379
379
  data = {
380
380
  "file_content": base64.b64encode(content).decode("utf-8"),
381
381
  "file_name": file_name,
@@ -4,6 +4,7 @@ import sqlite3
4
4
  import warnings
5
5
  from collections.abc import Iterable
6
6
  from datetime import MAXYEAR, MINYEAR, datetime, timezone
7
+ from functools import cache
7
8
  from types import MappingProxyType
8
9
  from typing import Callable, Optional
9
10
 
@@ -526,24 +527,44 @@ def compile_collect(element, compiler, **kwargs):
526
527
  return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
527
528
 
528
529
 
529
- def load_usearch_extension(conn: sqlite3.Connection) -> bool:
530
+ @cache
531
+ def usearch_sqlite_path() -> Optional[str]:
530
532
  try:
531
- # usearch is part of the vector optional dependencies
532
- # we use the extension's cosine and euclidean distance functions
533
- from usearch import sqlite_path
533
+ import usearch
534
+ except ImportError:
535
+ return None
534
536
 
535
- conn.enable_load_extension(True)
537
+ with warnings.catch_warnings():
538
+ # usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
539
+ # and, sometimes fail to download the binary in other platforms
540
+ # triggering UserWarning.
536
541
 
537
- with warnings.catch_warnings():
538
- # usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
539
- # and, sometimes fail to download the binary in other platforms
540
- # triggering UserWarning.
542
+ warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
541
543
 
542
- warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
543
- conn.load_extension(sqlite_path())
544
+ try:
545
+ return usearch.sqlite_path()
546
+ except FileNotFoundError:
547
+ return None
544
548
 
545
- conn.enable_load_extension(False)
546
- return True
547
549
 
548
- except Exception: # noqa: BLE001
550
+ def load_usearch_extension(conn: sqlite3.Connection) -> bool:
551
+ # usearch is part of the vector optional dependencies
552
+ # we use the extension's cosine and euclidean distance functions
553
+ ext_path = usearch_sqlite_path()
554
+ if ext_path is None:
555
+ return False
556
+
557
+ try:
558
+ conn.enable_load_extension(True)
559
+ except AttributeError:
560
+ # sqlite3 module is not built with loadable extension support by default.
561
+ return False
562
+
563
+ try:
564
+ conn.load_extension(ext_path)
565
+ except sqlite3.OperationalError:
549
566
  return False
567
+ else:
568
+ return True
569
+ finally:
570
+ conn.enable_load_extension(False)
datachain/studio.py CHANGED
@@ -47,7 +47,7 @@ def process_jobs_args(args: "Namespace"):
47
47
  raise DataChainError(f"Unknown command '{args.cmd}'.")
48
48
 
49
49
 
50
- def process_studio_cli_args(args: "Namespace"):
50
+ def process_auth_cli_args(args: "Namespace"):
51
51
  if args.cmd is None:
52
52
  print(
53
53
  f"Use 'datachain {args.command} --help' to see available options",
@@ -95,7 +95,7 @@ def login(args: "Namespace"):
95
95
  raise DataChainError(
96
96
  "Token already exists. "
97
97
  "To login with a different token, "
98
- "logout using `datachain studio logout`."
98
+ "logout using `datachain auth logout`."
99
99
  )
100
100
 
101
101
  open_browser = not args.no_open
@@ -121,12 +121,12 @@ def logout():
121
121
  token = conf.get("studio", {}).get("token")
122
122
  if not token:
123
123
  raise DataChainError(
124
- "Not logged in to Studio. Log in with 'datachain studio login'."
124
+ "Not logged in to Studio. Log in with 'datachain auth login'."
125
125
  )
126
126
 
127
127
  del conf["studio"]["token"]
128
128
 
129
- print("Logged out from Studio. (you can log back in with 'datachain studio login')")
129
+ print("Logged out from Studio. (you can log back in with 'datachain auth login')")
130
130
 
131
131
 
132
132
  def token():
@@ -134,7 +134,7 @@ def token():
134
134
  token = config.get("token")
135
135
  if not token:
136
136
  raise DataChainError(
137
- "Not logged in to Studio. Log in with 'datachain studio login'."
137
+ "Not logged in to Studio. Log in with 'datachain auth login'."
138
138
  )
139
139
 
140
140
  print(token)
@@ -282,7 +282,7 @@ def upload_files(client: StudioClient, files: list[str]) -> list[str]:
282
282
  file_name = os.path.basename(file)
283
283
  with open(file, "rb") as f:
284
284
  file_content = f.read()
285
- response = client.upload_file(file_name, file_content)
285
+ response = client.upload_file(file_content, file_name)
286
286
  if not response.ok:
287
287
  raise_remote_error(response.message)
288
288
 
@@ -299,7 +299,7 @@ def cancel_job(job_id: str, team_name: Optional[str]):
299
299
  token = Config().read().get("studio", {}).get("token")
300
300
  if not token:
301
301
  raise DataChainError(
302
- "Not logged in to Studio. Log in with 'datachain studio login'."
302
+ "Not logged in to Studio. Log in with 'datachain auth login'."
303
303
  )
304
304
 
305
305
  client = StudioClient(team=team_name)
@@ -314,7 +314,7 @@ def show_job_logs(job_id: str, team_name: Optional[str]):
314
314
  token = Config().read().get("studio", {}).get("token")
315
315
  if not token:
316
316
  raise DataChainError(
317
- "Not logged in to Studio. Log in with 'datachain studio login'."
317
+ "Not logged in to Studio. Log in with 'datachain auth login'."
318
318
  )
319
319
 
320
320
  client = StudioClient(team=team_name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.9
3
+ Version: 0.8.11
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -78,7 +78,6 @@ Requires-Dist: pytest-xdist>=3.3.1; extra == "tests"
78
78
  Requires-Dist: virtualenv; extra == "tests"
79
79
  Requires-Dist: dulwich; extra == "tests"
80
80
  Requires-Dist: hypothesis; extra == "tests"
81
- Requires-Dist: open_clip_torch; extra == "tests"
82
81
  Requires-Dist: aiotools>=1.7.0; extra == "tests"
83
82
  Requires-Dist: requests-mock; extra == "tests"
84
83
  Requires-Dist: scipy; extra == "tests"
@@ -94,12 +93,9 @@ Provides-Extra: examples
94
93
  Requires-Dist: datachain[tests]; extra == "examples"
95
94
  Requires-Dist: defusedxml; extra == "examples"
96
95
  Requires-Dist: accelerate; extra == "examples"
97
- Requires-Dist: unstructured_ingest[embed-huggingface]; extra == "examples"
98
- Requires-Dist: unstructured[pdf]<0.16.12; extra == "examples"
99
- Requires-Dist: pdfplumber==0.11.5; extra == "examples"
100
96
  Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
101
- Requires-Dist: onnx==1.16.1; extra == "examples"
102
- Requires-Dist: ultralytics==8.3.61; extra == "examples"
97
+ Requires-Dist: ultralytics==8.3.68; extra == "examples"
98
+ Requires-Dist: open_clip_torch; extra == "examples"
103
99
 
104
100
  ================
105
101
  |logo| DataChain