datachain 0.8.9__py3-none-any.whl → 0.8.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/cli/__init__.py CHANGED
@@ -48,7 +48,6 @@ def main(argv: Optional[list[str]] = None) -> int:
48
48
  logger.setLevel(logging_level)
49
49
 
50
50
  client_config = {
51
- "aws_endpoint_url": args.aws_endpoint_url,
52
51
  "anon": args.anon,
53
52
  }
54
53
 
@@ -73,7 +72,7 @@ def main(argv: Optional[list[str]] = None) -> int:
73
72
 
74
73
  def handle_command(args, catalog, client_config) -> int:
75
74
  """Handle the different CLI commands."""
76
- from datachain.studio import process_jobs_args, process_studio_cli_args
75
+ from datachain.studio import process_auth_cli_args, process_jobs_args
77
76
 
78
77
  command_handlers = {
79
78
  "cp": lambda: handle_cp_command(args, catalog),
@@ -89,7 +88,7 @@ def handle_command(args, catalog, client_config) -> int:
89
88
  "query": lambda: handle_query_command(args, catalog),
90
89
  "clear-cache": lambda: clear_cache(catalog),
91
90
  "gc": lambda: garbage_collect(catalog),
92
- "studio": lambda: process_studio_cli_args(args),
91
+ "auth": lambda: process_auth_cli_args(args),
93
92
  "job": lambda: process_jobs_args(args),
94
93
  }
95
94
 
@@ -7,7 +7,7 @@ import shtab
7
7
  from datachain.cli.utils import BooleanOptionalAction, KeyValueArgs
8
8
 
9
9
  from .job import add_jobs_parser
10
- from .studio import add_studio_parser
10
+ from .studio import add_auth_parser
11
11
  from .utils import FIND_COLUMNS, add_show_args, add_sources_arg, find_columns_type
12
12
 
13
13
 
@@ -26,24 +26,25 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
26
26
 
27
27
  parent_parser = ArgumentParser(add_help=False)
28
28
  parent_parser.add_argument(
29
- "--aws-endpoint-url",
30
- type=str,
31
- help="AWS endpoint URL",
32
- )
33
- parent_parser.add_argument(
34
- "--anon",
35
- action="store_true",
36
- help="anon flag for remote storage (like awscli's --no-sign-request)",
29
+ "-v", "--verbose", action="count", default=0, help="Be verbose"
37
30
  )
38
31
  parent_parser.add_argument(
39
- "-u", "--update", action="count", default=0, help="Update cache"
32
+ "-q", "--quiet", action="count", default=0, help="Be quiet"
40
33
  )
34
+
41
35
  parent_parser.add_argument(
42
- "-v", "--verbose", action="count", default=0, help="Be verbose"
36
+ "--anon",
37
+ action="store_true",
38
+ help="Use anonymous access to storage",
43
39
  )
44
40
  parent_parser.add_argument(
45
- "-q", "--quiet", action="count", default=0, help="Be quiet"
41
+ "-u",
42
+ "--update",
43
+ action="count",
44
+ default=0,
45
+ help="Update cached list of files for the sources",
46
46
  )
47
+
47
48
  parent_parser.add_argument(
48
49
  "--debug-sql",
49
50
  action="store_true",
@@ -67,7 +68,9 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
67
68
  "cp", parents=[parent_parser], description="Copy data files from the cloud."
68
69
  )
69
70
  add_sources_arg(parse_cp).complete = shtab.DIR # type: ignore[attr-defined]
70
- parse_cp.add_argument("output", type=str, help="Output")
71
+ parse_cp.add_argument(
72
+ "output", type=str, help="Path to a directory or file to put data to"
73
+ )
71
74
  parse_cp.add_argument(
72
75
  "-f",
73
76
  "--force",
@@ -94,7 +97,9 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
94
97
  "clone", parents=[parent_parser], description="Copy data files from the cloud."
95
98
  )
96
99
  add_sources_arg(parse_clone).complete = shtab.DIR # type: ignore[attr-defined]
97
- parse_clone.add_argument("output", type=str, help="Output")
100
+ parse_clone.add_argument(
101
+ "output", type=str, help="Path to a directory or file to put data to"
102
+ )
98
103
  parse_clone.add_argument(
99
104
  "-f",
100
105
  "--force",
@@ -123,7 +128,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
123
128
  help="Do not copy files, just create a dataset",
124
129
  )
125
130
 
126
- add_studio_parser(subp, parent_parser)
131
+ add_auth_parser(subp, parent_parser)
127
132
  add_jobs_parser(subp, parent_parser)
128
133
 
129
134
  datasets_parser = subp.add_parser(
@@ -6,7 +6,7 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
6
6
  )
7
7
  jobs_subparser = jobs_parser.add_subparsers(
8
8
  dest="cmd",
9
- help="Use `datachain studio CMD --help` to display command-specific help",
9
+ help="Use `datachain auth CMD --help` to display command-specific help",
10
10
  )
11
11
 
12
12
  studio_run_help = "Run a job in Studio"
@@ -1,31 +1,31 @@
1
- def add_studio_parser(subparsers, parent_parser) -> None:
2
- studio_help = "Manage Studio authentication"
3
- studio_description = (
1
+ def add_auth_parser(subparsers, parent_parser) -> None:
2
+ auth_help = "Manage Studio authentication"
3
+ auth_description = (
4
4
  "Manage authentication and settings for Studio. "
5
5
  "Configure tokens for sharing datasets and using Studio features."
6
6
  )
7
7
 
8
- studio_parser = subparsers.add_parser(
9
- "studio",
8
+ auth_parser = subparsers.add_parser(
9
+ "auth",
10
10
  parents=[parent_parser],
11
- description=studio_description,
12
- help=studio_help,
11
+ description=auth_description,
12
+ help=auth_help,
13
13
  )
14
- studio_subparser = studio_parser.add_subparsers(
14
+ auth_subparser = auth_parser.add_subparsers(
15
15
  dest="cmd",
16
- help="Use `datachain studio CMD --help` to display command-specific help",
16
+ help="Use `datachain auth CMD --help` to display command-specific help",
17
17
  )
18
18
 
19
- studio_login_help = "Authenticate with Studio"
20
- studio_login_description = (
19
+ auth_login_help = "Authenticate with Studio"
20
+ auth_login_description = (
21
21
  "Authenticate with Studio using default scopes. "
22
22
  "A random name will be assigned as the token name if not specified."
23
23
  )
24
- login_parser = studio_subparser.add_parser(
24
+ login_parser = auth_subparser.add_parser(
25
25
  "login",
26
26
  parents=[parent_parser],
27
- description=studio_login_description,
28
- help=studio_login_help,
27
+ description=auth_login_description,
28
+ help=auth_login_help,
29
29
  )
30
30
 
31
31
  login_parser.add_argument(
@@ -58,26 +58,26 @@ def add_studio_parser(subparsers, parent_parser) -> None:
58
58
  help="Use code-based authentication without browser",
59
59
  )
60
60
 
61
- studio_logout_help = "Log out from Studio"
62
- studio_logout_description = (
61
+ auth_logout_help = "Log out from Studio"
62
+ auth_logout_description = (
63
63
  "Remove the Studio authentication token from global config."
64
64
  )
65
65
 
66
- studio_subparser.add_parser(
66
+ auth_subparser.add_parser(
67
67
  "logout",
68
68
  parents=[parent_parser],
69
- description=studio_logout_description,
70
- help=studio_logout_help,
69
+ description=auth_logout_description,
70
+ help=auth_logout_help,
71
71
  )
72
72
 
73
- studio_team_help = "Set default team for Studio operations"
74
- studio_team_description = "Set the default team for Studio operations."
73
+ auth_team_help = "Set default team for Studio operations"
74
+ auth_team_description = "Set the default team for Studio operations."
75
75
 
76
- team_parser = studio_subparser.add_parser(
76
+ team_parser = auth_subparser.add_parser(
77
77
  "team",
78
78
  parents=[parent_parser],
79
- description=studio_team_description,
80
- help=studio_team_help,
79
+ description=auth_team_description,
80
+ help=auth_team_help,
81
81
  )
82
82
  team_parser.add_argument(
83
83
  "team_name",
@@ -91,12 +91,12 @@ def add_studio_parser(subparsers, parent_parser) -> None:
91
91
  help="Set team globally for all projects",
92
92
  )
93
93
 
94
- studio_token_help = "View Studio authentication token" # noqa: S105
95
- studio_token_description = "Display the current authentication token for Studio." # noqa: S105
94
+ auth_token_help = "View Studio authentication token" # noqa: S105
95
+ auth_token_description = "Display the current authentication token for Studio." # noqa: S105
96
96
 
97
- studio_subparser.add_parser(
97
+ auth_subparser.add_parser(
98
98
  "token",
99
99
  parents=[parent_parser],
100
- description=studio_token_description,
101
- help=studio_token_help,
100
+ description=auth_token_description,
101
+ help=auth_token_help,
102
102
  )
@@ -30,7 +30,7 @@ def add_sources_arg(parser: ArgumentParser, nargs: Union[str, int] = "+") -> Act
30
30
  "sources",
31
31
  type=str,
32
32
  nargs=nargs,
33
- help="Data sources - paths to cloud storage directories",
33
+ help="Data sources - paths to source storage directories or files",
34
34
  )
35
35
 
36
36
 
datachain/cli/utils.py CHANGED
@@ -87,7 +87,7 @@ def get_logging_level(args: Namespace) -> int:
87
87
  def determine_flavors(studio: bool, local: bool, all: bool, token: Optional[str]):
88
88
  if studio and not token:
89
89
  raise DataChainError(
90
- "Not logged in to Studio. Log in with 'datachain studio login'."
90
+ "Not logged in to Studio. Log in with 'datachain auth login'."
91
91
  )
92
92
 
93
93
  if local or studio:
@@ -19,6 +19,7 @@ from sqlalchemy import MetaData, Table, UniqueConstraint, exists, select
19
19
  from sqlalchemy.dialects import sqlite
20
20
  from sqlalchemy.schema import CreateIndex, CreateTable, DropTable
21
21
  from sqlalchemy.sql import func
22
+ from sqlalchemy.sql.elements import BinaryExpression, BooleanClauseList
22
23
  from sqlalchemy.sql.expression import bindparam, cast
23
24
  from sqlalchemy.sql.selectable import Select
24
25
  from tqdm.auto import tqdm
@@ -40,7 +41,6 @@ if TYPE_CHECKING:
40
41
  from sqlalchemy.schema import SchemaItem
41
42
  from sqlalchemy.sql._typing import _FromClauseArgument, _OnClauseArgument
42
43
  from sqlalchemy.sql.elements import ColumnElement
43
- from sqlalchemy.sql.selectable import Join
44
44
  from sqlalchemy.types import TypeEngine
45
45
 
46
46
  from datachain.lib.file import File
@@ -654,16 +654,47 @@ class SQLiteWarehouse(AbstractWarehouse):
654
654
  right: "_FromClauseArgument",
655
655
  onclause: "_OnClauseArgument",
656
656
  inner: bool = True,
657
- ) -> "Join":
657
+ full: bool = False,
658
+ columns=None,
659
+ ) -> "Select":
658
660
  """
659
661
  Join two tables together.
660
662
  """
661
- return sqlalchemy.join(
662
- left,
663
- right,
664
- onclause,
665
- isouter=not inner,
663
+ if not full:
664
+ join_query = sqlalchemy.join(
665
+ left,
666
+ right,
667
+ onclause,
668
+ isouter=not inner,
669
+ )
670
+ return sqlalchemy.select(*columns).select_from(join_query)
671
+
672
+ left_right_join = sqlalchemy.select(*columns).select_from(
673
+ sqlalchemy.join(left, right, onclause, isouter=True)
666
674
  )
675
+ right_left_join = sqlalchemy.select(*columns).select_from(
676
+ sqlalchemy.join(right, left, onclause, isouter=True)
677
+ )
678
+
679
+ def add_left_rows_filter(exp: BinaryExpression):
680
+ """
681
+ Adds filter to right_left_join to remove unmatched left table rows by
682
+ getting column names that need to be NULL from BinaryExpressions in onclause
683
+ """
684
+ return right_left_join.where(
685
+ getattr(left.c, exp.left.name) == None # type: ignore[union-attr] # noqa: E711
686
+ )
687
+
688
+ if isinstance(onclause, BinaryExpression):
689
+ right_left_join = add_left_rows_filter(onclause)
690
+
691
+ if isinstance(onclause, BooleanClauseList):
692
+ for c in onclause.get_children():
693
+ if isinstance(c, BinaryExpression):
694
+ right_left_join = add_left_rows_filter(c)
695
+
696
+ union = sqlalchemy.union(left_right_join, right_left_join).subquery()
697
+ return sqlalchemy.select(*union.c).select_from(union)
667
698
 
668
699
  def create_pre_udf_table(self, query: "Select") -> "Table":
669
700
  """
@@ -31,7 +31,7 @@ if TYPE_CHECKING:
31
31
  _FromClauseArgument,
32
32
  _OnClauseArgument,
33
33
  )
34
- from sqlalchemy.sql.selectable import Join, Select
34
+ from sqlalchemy.sql.selectable import Select
35
35
  from sqlalchemy.types import TypeEngine
36
36
 
37
37
  from datachain.data_storage import schema
@@ -873,7 +873,7 @@ class AbstractWarehouse(ABC, Serializable):
873
873
  right: "_FromClauseArgument",
874
874
  onclause: "_OnClauseArgument",
875
875
  inner: bool = True,
876
- ) -> "Join":
876
+ ) -> "Select":
877
877
  """
878
878
  Join two tables together.
879
879
  """
@@ -52,15 +52,15 @@ def python_to_sql(typ): # noqa: PLR0911
52
52
 
53
53
  args = get_args(typ)
54
54
  if inspect.isclass(orig) and (issubclass(list, orig) or issubclass(tuple, orig)):
55
- if args is None or len(args) != 1:
55
+ if args is None:
56
56
  raise TypeError(f"Cannot resolve type '{typ}' for flattening features")
57
57
 
58
58
  args0 = args[0]
59
59
  if ModelStore.is_pydantic(args0):
60
60
  return Array(JSON())
61
61
 
62
- next_type = python_to_sql(args0)
63
- return Array(next_type)
62
+ list_type = list_of_args_to_type(args)
63
+ return Array(list_type)
64
64
 
65
65
  if orig is Annotated:
66
66
  # Ignoring annotations
@@ -82,6 +82,18 @@ def python_to_sql(typ): # noqa: PLR0911
82
82
  raise TypeError(f"Cannot recognize type {typ}")
83
83
 
84
84
 
85
+ def list_of_args_to_type(args) -> SQLType:
86
+ first_type = python_to_sql(args[0])
87
+ for next_arg in args[1:]:
88
+ try:
89
+ next_type = python_to_sql(next_arg)
90
+ if next_type != first_type:
91
+ return JSON()
92
+ except TypeError:
93
+ return JSON()
94
+ return first_type
95
+
96
+
85
97
  def _is_json_inside_union(orig, args) -> bool:
86
98
  if orig == Union and len(args) >= 2:
87
99
  # List in JSON: Union[dict, list[dict]]
datachain/lib/dc.py CHANGED
@@ -1332,6 +1332,7 @@ class DataChain:
1332
1332
  on: Union[MergeColType, Sequence[MergeColType]],
1333
1333
  right_on: Optional[Union[MergeColType, Sequence[MergeColType]]] = None,
1334
1334
  inner=False,
1335
+ full=False,
1335
1336
  rname="right_",
1336
1337
  ) -> "Self":
1337
1338
  """Merge two chains based on the specified criteria.
@@ -1345,6 +1346,7 @@ class DataChain:
1345
1346
  right_on: Optional predicate or list of Predicates for the `right_ds`
1346
1347
  to join.
1347
1348
  inner (bool): Whether to run inner join or outer join.
1349
+ full (bool): Whether to run full outer join.
1348
1350
  rname (str): Name prefix for conflicting signal names.
1349
1351
 
1350
1352
  Examples:
@@ -1419,7 +1421,7 @@ class DataChain:
1419
1421
  )
1420
1422
 
1421
1423
  query = self._query.join(
1422
- right_ds._query, sqlalchemy.and_(*ops), inner, rname + "{name}"
1424
+ right_ds._query, sqlalchemy.and_(*ops), inner, full, rname + "{name}"
1423
1425
  )
1424
1426
  query.feature_schema = None
1425
1427
  ds = self._evolve(query=query)
@@ -875,6 +875,7 @@ class SQLJoin(Step):
875
875
  query2: "DatasetQuery"
876
876
  predicates: Union[JoinPredicateType, tuple[JoinPredicateType, ...]]
877
877
  inner: bool
878
+ full: bool
878
879
  rname: str
879
880
 
880
881
  def get_query(self, dq: "DatasetQuery", temp_tables: list[str]) -> sa.Subquery:
@@ -977,14 +978,14 @@ class SQLJoin(Step):
977
978
  self.validate_expression(join_expression, q1, q2)
978
979
 
979
980
  def q(*columns):
980
- join_query = self.catalog.warehouse.join(
981
+ return self.catalog.warehouse.join(
981
982
  q1,
982
983
  q2,
983
984
  join_expression,
984
985
  inner=self.inner,
986
+ full=self.full,
987
+ columns=columns,
985
988
  )
986
- return sqlalchemy.select(*columns).select_from(join_query)
987
- # return sqlalchemy.select(*subquery.c).select_from(subquery)
988
989
 
989
990
  return step_result(
990
991
  q,
@@ -1489,6 +1490,7 @@ class DatasetQuery:
1489
1490
  dataset_query: "DatasetQuery",
1490
1491
  predicates: Union[JoinPredicateType, Sequence[JoinPredicateType]],
1491
1492
  inner=False,
1493
+ full=False,
1492
1494
  rname="{name}_right",
1493
1495
  ) -> "Self":
1494
1496
  left = self.clone(new_table=False)
@@ -1504,7 +1506,9 @@ class DatasetQuery:
1504
1506
  if isinstance(predicates, (str, ColumnClause, ColumnElement))
1505
1507
  else tuple(predicates)
1506
1508
  )
1507
- new_query.steps = [SQLJoin(self.catalog, left, right, predicates, inner, rname)]
1509
+ new_query.steps = [
1510
+ SQLJoin(self.catalog, left, right, predicates, inner, full, rname)
1511
+ ]
1508
1512
  return new_query
1509
1513
 
1510
1514
  @detach
@@ -75,7 +75,7 @@ class StudioClient:
75
75
 
76
76
  if not token:
77
77
  raise DataChainError(
78
- "Studio token is not set. Use `datachain studio login` "
78
+ "Studio token is not set. Use `datachain auth login` "
79
79
  "or environment variable `DVC_STUDIO_TOKEN` to set it."
80
80
  )
81
81
 
@@ -105,7 +105,7 @@ class StudioClient:
105
105
  if not team:
106
106
  raise DataChainError(
107
107
  "Studio team is not set. "
108
- "Use `datachain studio team <team_name>` "
108
+ "Use `datachain auth team <team_name>` "
109
109
  "or environment variable `DVC_STUDIO_TEAM` to set it."
110
110
  "You can also set it in the config file as team under studio."
111
111
  )
@@ -4,6 +4,7 @@ import sqlite3
4
4
  import warnings
5
5
  from collections.abc import Iterable
6
6
  from datetime import MAXYEAR, MINYEAR, datetime, timezone
7
+ from functools import cache
7
8
  from types import MappingProxyType
8
9
  from typing import Callable, Optional
9
10
 
@@ -526,24 +527,44 @@ def compile_collect(element, compiler, **kwargs):
526
527
  return compiler.process(func.json_group_array(*element.clauses.clauses), **kwargs)
527
528
 
528
529
 
529
- def load_usearch_extension(conn: sqlite3.Connection) -> bool:
530
+ @cache
531
+ def usearch_sqlite_path() -> Optional[str]:
530
532
  try:
531
- # usearch is part of the vector optional dependencies
532
- # we use the extension's cosine and euclidean distance functions
533
- from usearch import sqlite_path
533
+ import usearch
534
+ except ImportError:
535
+ return None
534
536
 
535
- conn.enable_load_extension(True)
537
+ with warnings.catch_warnings():
538
+ # usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
539
+ # and, sometimes fail to download the binary in other platforms
540
+ # triggering UserWarning.
536
541
 
537
- with warnings.catch_warnings():
538
- # usearch binary is not available for Windows, see: https://github.com/unum-cloud/usearch/issues/427.
539
- # and, sometimes fail to download the binary in other platforms
540
- # triggering UserWarning.
542
+ warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
541
543
 
542
- warnings.filterwarnings("ignore", category=UserWarning, module="usearch")
543
- conn.load_extension(sqlite_path())
544
+ try:
545
+ return usearch.sqlite_path()
546
+ except FileNotFoundError:
547
+ return None
544
548
 
545
- conn.enable_load_extension(False)
546
- return True
547
549
 
548
- except Exception: # noqa: BLE001
550
+ def load_usearch_extension(conn: sqlite3.Connection) -> bool:
551
+ # usearch is part of the vector optional dependencies
552
+ # we use the extension's cosine and euclidean distance functions
553
+ ext_path = usearch_sqlite_path()
554
+ if ext_path is None:
555
+ return False
556
+
557
+ try:
558
+ conn.enable_load_extension(True)
559
+ except AttributeError:
560
+ # sqlite3 module is not built with loadable extension support by default.
561
+ return False
562
+
563
+ try:
564
+ conn.load_extension(ext_path)
565
+ except sqlite3.OperationalError:
549
566
  return False
567
+ else:
568
+ return True
569
+ finally:
570
+ conn.enable_load_extension(False)
datachain/studio.py CHANGED
@@ -47,7 +47,7 @@ def process_jobs_args(args: "Namespace"):
47
47
  raise DataChainError(f"Unknown command '{args.cmd}'.")
48
48
 
49
49
 
50
- def process_studio_cli_args(args: "Namespace"):
50
+ def process_auth_cli_args(args: "Namespace"):
51
51
  if args.cmd is None:
52
52
  print(
53
53
  f"Use 'datachain {args.command} --help' to see available options",
@@ -95,7 +95,7 @@ def login(args: "Namespace"):
95
95
  raise DataChainError(
96
96
  "Token already exists. "
97
97
  "To login with a different token, "
98
- "logout using `datachain studio logout`."
98
+ "logout using `datachain auth logout`."
99
99
  )
100
100
 
101
101
  open_browser = not args.no_open
@@ -121,12 +121,12 @@ def logout():
121
121
  token = conf.get("studio", {}).get("token")
122
122
  if not token:
123
123
  raise DataChainError(
124
- "Not logged in to Studio. Log in with 'datachain studio login'."
124
+ "Not logged in to Studio. Log in with 'datachain auth login'."
125
125
  )
126
126
 
127
127
  del conf["studio"]["token"]
128
128
 
129
- print("Logged out from Studio. (you can log back in with 'datachain studio login')")
129
+ print("Logged out from Studio. (you can log back in with 'datachain auth login')")
130
130
 
131
131
 
132
132
  def token():
@@ -134,7 +134,7 @@ def token():
134
134
  token = config.get("token")
135
135
  if not token:
136
136
  raise DataChainError(
137
- "Not logged in to Studio. Log in with 'datachain studio login'."
137
+ "Not logged in to Studio. Log in with 'datachain auth login'."
138
138
  )
139
139
 
140
140
  print(token)
@@ -299,7 +299,7 @@ def cancel_job(job_id: str, team_name: Optional[str]):
299
299
  token = Config().read().get("studio", {}).get("token")
300
300
  if not token:
301
301
  raise DataChainError(
302
- "Not logged in to Studio. Log in with 'datachain studio login'."
302
+ "Not logged in to Studio. Log in with 'datachain auth login'."
303
303
  )
304
304
 
305
305
  client = StudioClient(team=team_name)
@@ -314,7 +314,7 @@ def show_job_logs(job_id: str, team_name: Optional[str]):
314
314
  token = Config().read().get("studio", {}).get("token")
315
315
  if not token:
316
316
  raise DataChainError(
317
- "Not logged in to Studio. Log in with 'datachain studio login'."
317
+ "Not logged in to Studio. Log in with 'datachain auth login'."
318
318
  )
319
319
 
320
320
  client = StudioClient(team=team_name)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: datachain
3
- Version: 0.8.9
3
+ Version: 0.8.10
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -12,15 +12,15 @@ datachain/nodes_fetcher.py,sha256=ILMzUW5o4_6lUOVrLDC9gJPCXfcgKnMG68plrc7dAOA,11
12
12
  datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
13
13
  datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
14
14
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- datachain/studio.py,sha256=5LTzr7jNxapQk4aF4ob8ax9zNQ0ShZ26nQtOi4gKToc,9422
15
+ datachain/studio.py,sha256=k4QcFmzJ8rBVJIz27BfexmH6lxqpY6VFRgSTJ1Y90Gw,9408
16
16
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
17
17
  datachain/utils.py,sha256=LBeg-9n48saBTHSPk7u_j-kjJnPUAq5Oyps_peSaqlM,14128
18
18
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
19
19
  datachain/catalog/catalog.py,sha256=1jtwHVxCRQWJSTz1GjP6qvB2bDo2AosBjouQh3neKaM,60516
20
20
  datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
21
21
  datachain/catalog/loader.py,sha256=HA_mBC7q_My8j2WnSvIjUGuJpl6SIdg5vvy_lagxJlA,5733
22
- datachain/cli/__init__.py,sha256=jnmSov-UIvgz3p-YclkKEhVpvawPVtsd8o5uq7hrRfc,8489
23
- datachain/cli/utils.py,sha256=jEoqY0agU3AZ-VQBpyieDKIlk7j3sumtlHE3OgbAzdU,3038
22
+ datachain/cli/__init__.py,sha256=c5iTf37zCeyJfKbJeFyseVCH9tj-zxlYlCUZWWZCy3s,8432
23
+ datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
24
24
  datachain/cli/commands/__init__.py,sha256=uc77ggTRWrq-w1AVsH3Muy6v1ATkNsXUBPIRaOFgNus,533
25
25
  datachain/cli/commands/datasets.py,sha256=q1FkvFfeBCkuIuaA8pick0y51ZQuQK89ULUFse5xsu0,3583
26
26
  datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
@@ -29,10 +29,10 @@ datachain/cli/commands/ls.py,sha256=Wb8hXyBwyhb62Zk6ZhNFPFrj2lJhdbRcnBQQkgL_qyw,
29
29
  datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
30
30
  datachain/cli/commands/query.py,sha256=2S7hQxialt1fkbocxi6JXZI6jS5QnFrD1aOjKgZkzfI,1471
31
31
  datachain/cli/commands/show.py,sha256=RVb_7Kjd1kzqTxRKYFvmD04LaJHOtrCc4FYMyc-ZEYw,1149
32
- datachain/cli/parser/__init__.py,sha256=xBvS6FDkD-0TrME9t56C00jCFHD5Ly4SIQLe9JGHlpE,14881
33
- datachain/cli/parser/job.py,sha256=m_w6DZBMvQu0pu5LHxBVnh9gENPf02jxhwtLqjfhEGU,3199
34
- datachain/cli/parser/studio.py,sha256=AxCK8Oz5psj3jtl6XFSkGjDoSXuek3nBpPttvNFov9U,2977
35
- datachain/cli/parser/utils.py,sha256=gDaRll8CugmdFdx9mhdSPVKW3oBoQOVlg6WmNeMgyd8,1597
32
+ datachain/cli/parser/__init__.py,sha256=dgTUS0SjkT01imPG-EcXP0k_juornFh360wsWhdT1fY,14893
33
+ datachain/cli/parser/job.py,sha256=Zpi_bEsMp71YCr8xay0I93Taz8zS0_jHbxtvvTzXj6c,3197
34
+ datachain/cli/parser/studio.py,sha256=NTymVNu0vnRgNDWSqE22fRsMDlCFNZLcWJ7SsLDs2sE,2917
35
+ datachain/cli/parser/utils.py,sha256=8dybCL0Tce9-3IQntCiEY0t_q2-YGBxZeMqnq_es8vI,1607
36
36
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
37
37
  datachain/client/azure.py,sha256=ma6fJcnveG8wpNy1PSrN5hgvmRdCj8Sf3RKjfd3qCyM,3221
38
38
  datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
@@ -47,8 +47,8 @@ datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s
47
47
  datachain/data_storage/metastore.py,sha256=hfTITcesE9XlUTxcCcdDyWGGep-QSjJL9DUxko5QCeI,37524
48
48
  datachain/data_storage/schema.py,sha256=8np_S6Ltq7WXfcqpoSeFPryPS7cipdbiSP6UnKJkAac,9516
49
49
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
50
- datachain/data_storage/sqlite.py,sha256=kSUvChn3bugyh5qUN8cEE6Rvornwh_6fd94ZKKTxLpk,23181
51
- datachain/data_storage/warehouse.py,sha256=ASKD0mMB8eTvXhMLoUroUypWpN1fVaynSB6k1NZX8lE,30828
50
+ datachain/data_storage/sqlite.py,sha256=KJ8hI0Hrwv9eAA-nLUlw2AYCQxiAAZ12a-ftUBtroNQ,24545
51
+ datachain/data_storage/warehouse.py,sha256=ovdH9LmOWLfCrvf0UvXnrNC-CrdAjns3EmXEgFdz4KM,30824
52
52
  datachain/diff/__init__.py,sha256=OapNRBsyGDOQHelefUEoXoFHRWCJuBnhvD0ibebKvBc,10486
53
53
  datachain/func/__init__.py,sha256=8WWvzWYtOzXmAC1fOMegyoJ-rFnpAca_5UW4gy8BVsk,1077
54
54
  datachain/func/aggregate.py,sha256=7_IPrIwb2XSs3zG4iOr1eTvzn6kNVe2mkzvNzjusDHk,10942
@@ -66,7 +66,7 @@ datachain/lib/arrow.py,sha256=sU6cbjz2W1UuTfez6tCYPfVPJXlmfMDbnaVWPhMu0XU,9906
66
66
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
67
67
  datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
68
68
  datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
69
- datachain/lib/dc.py,sha256=673Mu2Pqu63o7wrpdvujOmWJhJNbXfnyCSWLS93mJpw,92315
69
+ datachain/lib/dc.py,sha256=l1iKnT6wplndIgfMsVMbbKL-etlj2TdHPKJ6kIwqK88,92398
70
70
  datachain/lib/file.py,sha256=7posvEFSb7gsLKAiid75dOJRyHTRKOmBAkmBw6RiZyg,16307
71
71
  datachain/lib/hf.py,sha256=DvoI8fv-WkL3FDEuIT80T9WrRs6fXesjbU0bmIDDsNE,5882
72
72
  datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
@@ -87,7 +87,7 @@ datachain/lib/webdataset.py,sha256=o7SHk5HOUWsZ5Ln04xOM04eQqiBHiJNO7xLgyVBrwo8,6
87
87
  datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
88
88
  datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
89
89
  datachain/lib/convert/flatten.py,sha256=IZFiUYbgXSxXhPSG5Cqf5IjnJ4ZDZKXMr4o_yCR1NY4,1505
90
- datachain/lib/convert/python_to_sql.py,sha256=40SAOdoOgikZRhn8iomCPDRoxC3RFxjJLivEAA9MHDU,2880
90
+ datachain/lib/convert/python_to_sql.py,sha256=wg-O5FRKX3x3Wh8ZL1b9ntMlgf1zRO4djMP3t8CHJLo,3188
91
91
  datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
92
92
  datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sDcLS6s,2010
93
93
  datachain/lib/convert/values_to_tuples.py,sha256=EFfIGBiVVltJQG8blzsQ1dGXneh4D3wdLfSUeoK10OI,3931
@@ -101,7 +101,7 @@ datachain/model/ultralytics/pose.py,sha256=71KBTcoST2wcEtsyGXqLVpvUtqbp9gwZGA15p
101
101
  datachain/model/ultralytics/segment.py,sha256=Z1ab0tZRJubSYNH4KkFlzhYeGNTfAyC71KmkQcToHDQ,2760
102
102
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
103
103
  datachain/query/batch.py,sha256=6w8gzLTmLeylststu-gT5jIqEfi4-djS7_yTYyeo-fw,4190
104
- datachain/query/dataset.py,sha256=iupWhXBxbBDuihqyidztXWhTAD7wAli3FLFkf8_neUA,56351
104
+ datachain/query/dataset.py,sha256=Su4axlm8ShxUzalYYA7UKU11Pm9cmYi88kUI9FKScew,56328
105
105
  datachain/query/dispatch.py,sha256=_1vjeQ1wjUoxlik55k0JkWqQCUfMjgVWmEOyWRkx0dU,12437
106
106
  datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
107
107
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -111,7 +111,7 @@ datachain/query/session.py,sha256=fQAtl5zRESRDfRS2d5J9KgrWauunCtrd96vP4Ns1KlE,59
111
111
  datachain/query/udf.py,sha256=GY8E9pnzPE7ZKl_jvetZpn9R2rlUtMlhoYj4UmrzFzw,594
112
112
  datachain/query/utils.py,sha256=u0A_BwG9PNs0DxoDcvSWgWLpj3ByTUv8CqH13CIuGag,1293
113
113
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
- datachain/remote/studio.py,sha256=3DlgESETzxm3dgb6zzjjGxsddSkacT2dARnteLAfMxQ,13366
114
+ datachain/remote/studio.py,sha256=DfW2Iej858tWkKQMg_fvp1cvsvgGX3Z07VXhv_sI3GY,13362
115
115
  datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
116
116
  datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
117
117
  datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
@@ -127,15 +127,15 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
127
127
  datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
128
128
  datachain/sql/functions/string.py,sha256=E-T9OIzUR-GKaLgjZsEtg5CJrY_sLf1lt1awTvY7w2w,1426
129
129
  datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
130
- datachain/sql/sqlite/base.py,sha256=E2PK3hoGlHey1eEjcReXRrI-c_ASr3AmAXaNYKDY_o8,18634
130
+ datachain/sql/sqlite/base.py,sha256=bPrYfj2ZF9hFZFs0chgH7J5l_tdXI4VMZMgkuBjf7Ng,19070
131
131
  datachain/sql/sqlite/types.py,sha256=lPXS1XbkmUtlkkiRxy_A_UzsgpPv2VSkXYOD4zIHM4w,1734
132
132
  datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
133
133
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
134
134
  datachain/toolkit/split.py,sha256=z3zRJNzjWrpPuRw-zgFbCOBKInyYxJew8ygrYQRQLNc,2930
135
135
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
136
- datachain-0.8.9.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
137
- datachain-0.8.9.dist-info/METADATA,sha256=fXC2bFkpYNoMmWo8a1m4NYgdCsH4HfLy9ufR89u0PHY,11117
138
- datachain-0.8.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
139
- datachain-0.8.9.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
140
- datachain-0.8.9.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
141
- datachain-0.8.9.dist-info/RECORD,,
136
+ datachain-0.8.10.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
137
+ datachain-0.8.10.dist-info/METADATA,sha256=pOueDLv5huX86frgB9ihIuydK_1-t_PHDUvTpabvLuE,11118
138
+ datachain-0.8.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
139
+ datachain-0.8.10.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
140
+ datachain-0.8.10.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
141
+ datachain-0.8.10.dist-info/RECORD,,