PyPI - datachain - Versions diffs - 0.8.8__py3-none-any.whl → 0.8.10__py3-none-any.whl - Mend

datachain 0.8.8py3-none-any.whl → 0.8.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (32) hide show

datachain/cli/__init__.py +14 -7
datachain/cli/commands/datasets.py +2 -3
datachain/cli/parser/__init__.py +69 -82
datachain/cli/parser/job.py +20 -25
datachain/cli/parser/studio.py +41 -65
datachain/cli/parser/utils.py +1 -1
datachain/cli/utils.py +1 -1
datachain/client/local.py +1 -1
datachain/data_storage/sqlite.py +38 -7
datachain/data_storage/warehouse.py +2 -2
datachain/lib/arrow.py +1 -1
datachain/lib/convert/python_to_sql.py +15 -3
datachain/lib/convert/unflatten.py +1 -2
datachain/lib/dc.py +26 -5
datachain/lib/file.py +27 -4
datachain/lib/listing.py +4 -4
datachain/lib/pytorch.py +3 -1
datachain/lib/udf.py +56 -20
datachain/model/bbox.py +9 -9
datachain/model/pose.py +9 -9
datachain/model/segment.py +6 -6
datachain/progress.py +0 -13
datachain/query/dataset.py +20 -14
datachain/remote/studio.py +2 -2
datachain/sql/sqlite/base.py +35 -14
datachain/studio.py +22 -16
{datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/METADATA +4 -3
{datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/RECORD +32 -32
{datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/LICENSE +0 -0
{datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/WHEEL +0 -0
{datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/entry_points.txt +0 -0
{datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/top_level.txt +0 -0

datachain/cli/__init__.py CHANGED Viewed

@@ -39,12 +39,15 @@ def main(argv: Optional[list[str]] = None) -> int:
     if args.command in ("internal-run-udf", "internal-run-udf-worker"):
         return handle_udf(args.command)
+    if args.command is None:
+        datachain_parser.print_help(sys.stderr)
+        return 1
     logger.addHandler(logging.StreamHandler())
     logging_level = get_logging_level(args)
     logger.setLevel(logging_level)
     client_config = {
-        "aws_endpoint_url": args.aws_endpoint_url,
         "anon": args.anon,
     }
@@ -69,7 +72,7 @@ def main(argv: Optional[list[str]] = None) -> int:
 def handle_command(args, catalog, client_config) -> int:
     """Handle the different CLI commands."""
-    from datachain.studio import process_jobs_args, process_studio_cli_args
+    from datachain.studio import process_auth_cli_args, process_jobs_args
     command_handlers = {
         "cp": lambda: handle_cp_command(args, catalog),
@@ -85,7 +88,7 @@ def handle_command(args, catalog, client_config) -> int:
         "query": lambda: handle_query_command(args, catalog),
         "clear-cache": lambda: clear_cache(catalog),
         "gc": lambda: garbage_collect(catalog),
-        "studio": lambda: process_studio_cli_args(args),
+        "auth": lambda: process_auth_cli_args(args),
         "job": lambda: process_jobs_args(args),
     }
@@ -120,12 +123,17 @@ def handle_clone_command(args, catalog):
         recursive=bool(args.recursive),
         no_glob=args.no_glob,
         no_cp=args.no_cp,
-        edatachain=args.edatachain,
-        edatachain_file=args.edatachain_file,
     )
 def handle_dataset_command(args, catalog):
+    if args.datasets_cmd is None:
+        print(
+            f"Use 'datachain {args.command} --help' to see available options",
+            file=sys.stderr,
+        )
+        return 1
     dataset_commands = {
         "pull": lambda: catalog.pull_dataset(
             args.dataset,
@@ -134,8 +142,6 @@ def handle_dataset_command(args, catalog):
             local_ds_version=args.local_version,
             cp=args.cp,
             force=bool(args.force),
-            edatachain=args.edatachain,
-            edatachain_file=args.edatachain_file,
         ),
         "edit": lambda: edit_dataset(
             catalog,
@@ -187,6 +193,7 @@ def handle_dataset_command(args, catalog):
     handler = dataset_commands.get(args.datasets_cmd)
     if handler:
         return handler()
     raise Exception(f"Unexpected command {args.datasets_cmd}")

datachain/cli/commands/datasets.py CHANGED Viewed

@@ -11,6 +11,7 @@ if TYPE_CHECKING:
 from datachain.cli.utils import determine_flavors
 from datachain.config import Config
 from datachain.error import DatasetNotFoundError
+from datachain.studio import list_datasets as list_datasets_studio
 def list_datasets(
@@ -20,14 +21,12 @@ def list_datasets(
     all: bool = True,
     team: Optional[str] = None,
 ):
-    from datachain.studio import list_datasets
     token = Config().read().get("studio", {}).get("token")
     all, local, studio = determine_flavors(studio, local, all, token)
     local_datasets = set(list_datasets_local(catalog)) if all or local else set()
     studio_datasets = (
-        set(list_datasets(team=team)) if (all or studio) and token else set()
+        set(list_datasets_studio(team=team)) if (all or studio) and token else set()
     )
     rows = [

datachain/cli/parser/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import argparse
 from argparse import ArgumentParser
 from importlib.metadata import PackageNotFoundError, version
@@ -6,7 +7,7 @@ import shtab
 from datachain.cli.utils import BooleanOptionalAction, KeyValueArgs
 from .job import add_jobs_parser
-from .studio import add_studio_parser
+from .studio import add_auth_parser
 from .utils import FIND_COLUMNS, add_show_args, add_sources_arg, find_columns_type
@@ -18,61 +19,64 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         __version__ = "unknown"
     parser = ArgumentParser(
-        description="DataChain: Wrangle unstructured AI data at scale", prog="datachain"
+        description="DataChain: Wrangle unstructured AI data at scale.",
+        prog="datachain",
     )
     parser.add_argument("-V", "--version", action="version", version=__version__)
     parent_parser = ArgumentParser(add_help=False)
     parent_parser.add_argument(
-        "--aws-endpoint-url",
-        type=str,
-        help="AWS endpoint URL",
-    )
-    parent_parser.add_argument(
-        "--anon",
-        action="store_true",
-        help="AWS anon (aka awscli's --no-sign-request)",
+        "-v", "--verbose", action="count", default=0, help="Be verbose"
     )
     parent_parser.add_argument(
-        "-u", "--update", action="count", default=0, help="Update cache"
+        "-q", "--quiet", action="count", default=0, help="Be quiet"
     )
     parent_parser.add_argument(
-        "-v", "--verbose", action="count", default=0, help="Verbose"
+        "--anon",
+        action="store_true",
+        help="Use anonymous access to storage",
     )
     parent_parser.add_argument(
-        "-q", "--quiet", action="count", default=0, help="Be quiet"
+        "-u",
+        "--update",
+        action="count",
+        default=0,
+        help="Update cached list of files for the sources",
     )
     parent_parser.add_argument(
         "--debug-sql",
         action="store_true",
         default=False,
-        help="Show All SQL Queries (very verbose output, for debugging only)",
+        help=argparse.SUPPRESS,
     )
     parent_parser.add_argument(
         "--pdb",
         action="store_true",
         default=False,
-        help="Drop into the pdb debugger on fatal exception",
+        help=argparse.SUPPRESS,
     )
     subp = parser.add_subparsers(
         title="Available Commands",
         metavar="command",
         dest="command",
-        help=f"Use `{parser.prog} command --help` for command-specific help.",
-        required=True,
+        help=f"Use `{parser.prog} command --help` for command-specific help",
     )
     parse_cp = subp.add_parser(
-        "cp", parents=[parent_parser], description="Copy data files from the cloud"
+        "cp", parents=[parent_parser], description="Copy data files from the cloud."
     )
     add_sources_arg(parse_cp).complete = shtab.DIR  # type: ignore[attr-defined]
-    parse_cp.add_argument("output", type=str, help="Output")
+    parse_cp.add_argument(
+        "output", type=str, help="Path to a directory or file to put data to"
+    )
     parse_cp.add_argument(
         "-f",
         "--force",
         default=False,
         action="store_true",
-        help="Force creating outputs",
+        help="Force creating files even if they already exist",
     )
     parse_cp.add_argument(
         "-r",
@@ -90,10 +94,12 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
     )
     parse_clone = subp.add_parser(
-        "clone", parents=[parent_parser], description="Copy data files from the cloud"
+        "clone", parents=[parent_parser], description="Copy data files from the cloud."
     )
     add_sources_arg(parse_clone).complete = shtab.DIR  # type: ignore[attr-defined]
-    parse_clone.add_argument("output", type=str, help="Output")
+    parse_clone.add_argument(
+        "output", type=str, help="Path to a directory or file to put data to"
+    )
     parse_clone.add_argument(
         "-f",
         "--force",
@@ -121,40 +127,30 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         action="store_true",
         help="Do not copy files, just create a dataset",
     )
-    parse_clone.add_argument(
-        "--edatachain",
-        default=False,
-        action="store_true",
-        help="Create a .edatachain file",
-    )
-    parse_clone.add_argument(
-        "--edatachain-file",
-        help="Use a different filename for the resulting .edatachain file",
-    )
-    add_studio_parser(subp, parent_parser)
+    add_auth_parser(subp, parent_parser)
     add_jobs_parser(subp, parent_parser)
     datasets_parser = subp.add_parser(
         "dataset",
         aliases=["ds"],
         parents=[parent_parser],
-        description="Commands for managing datasers",
+        description="Commands for managing datasets.",
     )
     datasets_subparser = datasets_parser.add_subparsers(
         dest="datasets_cmd",
-        help="Use `datachain datasets CMD --help` to display command specific help",
+        help="Use `datachain dataset CMD --help` to display command-specific help",
     )
     parse_pull = datasets_subparser.add_parser(
         "pull",
         parents=[parent_parser],
-        description="Pull specific dataset version from SaaS",
+        description="Pull specific dataset version from Studio.",
     )
     parse_pull.add_argument(
         "dataset",
         type=str,
-        help="Name and version of remote dataset created in SaaS",
+        help="Name and version of remote dataset created in Studio",
     )
     parse_pull.add_argument("-o", "--output", type=str, help="Output")
     parse_pull.add_argument(
@@ -178,16 +174,7 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         action="store_true",
         help="Copy actual files after pulling remote dataset into local DB",
     )
-    parse_pull.add_argument(
-        "--edatachain",
-        default=False,
-        action="store_true",
-        help="Create .edatachain file",
-    )
-    parse_pull.add_argument(
-        "--edatachain-file",
-        help="Use a different filename for the resulting .edatachain file",
-    )
     parse_pull.add_argument(
         "--local-name",
         action="store",
@@ -202,7 +189,7 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
     )
     parse_edit_dataset = datasets_subparser.add_parser(
-        "edit", parents=[parent_parser], description="Edit dataset metadata"
+        "edit", parents=[parent_parser], description="Edit dataset metadata."
     )
     parse_edit_dataset.add_argument("name", type=str, help="Dataset name")
     parse_edit_dataset.add_argument(
@@ -244,41 +231,41 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         "--team",
         action="store",
         default=None,
-        help="The team to edit a dataset. By default, it will use team from config.",
+        help="The team to edit a dataset. By default, it will use team from config",
     )
-    datasets_parser = datasets_subparser.add_parser(
-        "ls", parents=[parent_parser], description="List datasets"
+    datasets_ls_parser = datasets_subparser.add_parser(
+        "ls", parents=[parent_parser], description="List datasets."
     )
-    datasets_parser.add_argument(
+    datasets_ls_parser.add_argument(
         "--studio",
         action="store_true",
         default=False,
         help="List the files in the Studio",
     )
-    datasets_parser.add_argument(
+    datasets_ls_parser.add_argument(
         "-L",
         "--local",
         action="store_true",
         default=False,
         help="List local files only",
     )
-    datasets_parser.add_argument(
+    datasets_ls_parser.add_argument(
         "-a",
         "--all",
         action="store_true",
         default=True,
         help="List all files including hidden files",
     )
-    datasets_parser.add_argument(
+    datasets_ls_parser.add_argument(
         "--team",
         action="store",
         default=None,
-        help="The team to list datasets for. By default, it will use team from config.",
+        help="The team to list datasets for. By default, it will use team from config",
     )
     rm_dataset_parser = datasets_subparser.add_parser(
-        "rm", parents=[parent_parser], description="Removes dataset", aliases=["remove"]
+        "rm", parents=[parent_parser], description="Remove dataset.", aliases=["remove"]
     )
     rm_dataset_parser.add_argument("name", type=str, help="Dataset name")
     rm_dataset_parser.add_argument(
@@ -292,7 +279,7 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         "--force",
         default=False,
         action=BooleanOptionalAction,
-        help="Force delete registered dataset with all of it's versions",
+        help="Force delete registered dataset with all of its versions",
     )
     rm_dataset_parser.add_argument(
         "--studio",
@@ -318,13 +305,11 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         "--team",
         action="store",
         default=None,
-        help="The team to delete a dataset. By default, it will use team from config.",
+        help="The team to delete a dataset. By default, it will use team from config",
     )
     dataset_stats_parser = datasets_subparser.add_parser(
-        "stats",
-        parents=[parent_parser],
-        description="Shows basic dataset stats",
+        "stats", parents=[parent_parser], description="Show basic dataset statistics."
     )
     dataset_stats_parser.add_argument("name", type=str, help="Dataset name")
     dataset_stats_parser.add_argument(
@@ -349,7 +334,7 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
     )
     parse_ls = subp.add_parser(
-        "ls", parents=[parent_parser], description="List storage contents"
+        "ls", parents=[parent_parser], description="List storage contents."
     )
     add_sources_arg(parse_ls, nargs="*")
     parse_ls.add_argument(
@@ -357,7 +342,7 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         "--long",
         action="count",
         default=0,
-        help="List files in the long format",
+        help="List files in long format",
     )
     parse_ls.add_argument(
         "--studio",
@@ -383,11 +368,11 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         "--team",
         action="store",
         default=None,
-        help="The team to list datasets for. By default, it will use team from config.",
+        help="The team to list datasets for. By default, it will use team from config",
     )
     parse_du = subp.add_parser(
-        "du", parents=[parent_parser], description="Display space usage"
+        "du", parents=[parent_parser], description="Display space usage."
     )
     add_sources_arg(parse_du)
     parse_du.add_argument(
@@ -405,8 +390,8 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         type=int,
         metavar="N",
         help=(
-            "Display sizes for N directory depths below the given directory, "
-            "the default is 0 (summarize provided directory only)."
+            "Display sizes up to N directory levels deep "
+            "(default: 0, summarize provided directory only)"
         ),
     )
     parse_du.add_argument(
@@ -417,32 +402,32 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
     )
     parse_find = subp.add_parser(
-        "find", parents=[parent_parser], description="Search in a directory hierarchy"
+        "find", parents=[parent_parser], description="Search in a directory hierarchy."
     )
     add_sources_arg(parse_find)
     parse_find.add_argument(
         "--name",
         type=str,
         action="append",
-        help="Filename to match pattern.",
+        help="Match filename pattern",
     )
     parse_find.add_argument(
         "--iname",
         type=str,
         action="append",
-        help="Like -name but case insensitive.",
+        help="Match filename pattern (case insensitive)",
     )
     parse_find.add_argument(
         "--path",
         type=str,
         action="append",
-        help="Path to match pattern.",
+        help="Path to match pattern",
     )
     parse_find.add_argument(
         "--ipath",
         type=str,
         action="append",
-        help="Like -path but case insensitive.",
+        help="Like -path but case insensitive",
     )
     parse_find.add_argument(
         "--size",
@@ -450,7 +435,7 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         help=(
             "Filter by size (+ is greater or equal, - is less or equal). "
             "Specified size is in bytes, or use a suffix like K, M, G for "
-            "kilobytes, megabytes, gigabytes, etc."
+            "kilobytes, megabytes, gigabytes, etc"
         ),
     )
     parse_find.add_argument(
@@ -470,14 +455,14 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
     )
     parse_index = subp.add_parser(
-        "index", parents=[parent_parser], description="Index storage location"
+        "index", parents=[parent_parser], description="Index storage location."
     )
     add_sources_arg(parse_index)
     show_parser = subp.add_parser(
         "show",
         parents=[parent_parser],
-        description="Create a new dataset with a query script",
+        description="Create a new dataset with a query script.",
     )
     show_parser.add_argument("name", type=str, help="Dataset name")
     show_parser.add_argument(
@@ -493,7 +478,7 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
     query_parser = subp.add_parser(
         "query",
         parents=[parent_parser],
-        description="Create a new dataset with a query script",
+        description="Create a new dataset with a query script.",
     )
     query_parser.add_argument(
         "script", metavar="<script.py>", type=str, help="Filepath for script"
@@ -507,7 +492,7 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         metavar="N",
         help=(
             "Use multiprocessing to run any query script UDFs with N worker processes. "
-            "N defaults to the CPU count."
+            "N defaults to the CPU count"
         ),
     )
     query_parser.add_argument(
@@ -520,10 +505,12 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
     )
     subp.add_parser(
-        "clear-cache", parents=[parent_parser], description="Clear the local file cache"
+        "clear-cache",
+        parents=[parent_parser],
+        description="Clear the local file cache.",
     )
     subp.add_parser(
-        "gc", parents=[parent_parser], description="Garbage collect temporary tables"
+        "gc", parents=[parent_parser], description="Garbage collect temporary tables."
     )
     subp.add_parser("internal-run-udf", parents=[parent_parser])
@@ -536,12 +523,12 @@ def add_completion_parser(subparsers, parents):
     parser = subparsers.add_parser(
         "completion",
         parents=parents,
-        description="Output shell completion script",
+        description="Output shell completion script.",
     )
     parser.add_argument(
         "-s",
         "--shell",
-        help="Shell syntax for completions.",
+        help="Shell syntax for completions",
         default="bash",
         choices=shtab.SUPPORTED_SHELLS,
     )

datachain/cli/parser/job.py CHANGED Viewed

@@ -1,19 +1,16 @@
 def add_jobs_parser(subparsers, parent_parser) -> None:
-    jobs_help = "Commands to handle the Job running with Iterative Studio"
-    jobs_description = (
-        "This will help us to run, cancel and view the status of the job in Studio. "
-    )
+    jobs_help = "Manage jobs in Studio"
+    jobs_description = "Commands to manage job execution in Studio."
     jobs_parser = subparsers.add_parser(
         "job", parents=[parent_parser], description=jobs_description, help=jobs_help
     )
     jobs_subparser = jobs_parser.add_subparsers(
         dest="cmd",
-        help="Use `DataChain studio CMD --help` to display command-specific help.",
-        required=True,
+        help="Use `datachain auth CMD --help` to display command-specific help",
     )
     studio_run_help = "Run a job in Studio"
-    studio_run_description = "This command runs a job in Studio."
+    studio_run_description = "Run a job in Studio."
     studio_run_parser = jobs_subparser.add_parser(
         "run",
@@ -25,56 +22,56 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
     studio_run_parser.add_argument(
         "query_file",
         action="store",
-        help="The query file to run.",
+        help="Query file to run",
     )
     studio_run_parser.add_argument(
         "--team",
         action="store",
         default=None,
-        help="The team to run a job for. By default, it will use team from config.",
+        help="Team to run job for (default: from config)",
     )
     studio_run_parser.add_argument(
         "--env-file",
         action="store",
-        help="File containing environment variables to set for the job.",
+        help="File with environment variables for the job",
     )
     studio_run_parser.add_argument(
         "--env",
         nargs="+",
-        help="Environment variable. Can be specified multiple times. Format: KEY=VALUE",
+        help="Environment variables in KEY=VALUE format",
     )
     studio_run_parser.add_argument(
         "--workers",
         type=int,
-        help="Number of workers to use for the job.",
+        help="Number of workers for the job",
     )
     studio_run_parser.add_argument(
         "--files",
         nargs="+",
-        help="Files to include in the job.",
+        help="Additional files to include in the job",
     )
     studio_run_parser.add_argument(
         "--python-version",
         action="store",
-        help="Python version to use for the job (e.g. '3.9', '3.10', '3.11').",
+        help="Python version for the job (e.g., 3.9, 3.10, 3.11)",
     )
     studio_run_parser.add_argument(
         "--req-file",
         action="store",
-        help="File containing Python package requirements.",
+        help="Python requirements file",
     )
     studio_run_parser.add_argument(
         "--req",
         nargs="+",
-        help="Python package requirement. Can be specified multiple times.",
+        help="Python package requirements",
     )
     studio_cancel_help = "Cancel a job in Studio"
-    studio_cancel_description = "This command cancels a job in Studio."
+    studio_cancel_description = "Cancel a running job in Studio."
     studio_cancel_parser = jobs_subparser.add_parser(
         "cancel",
@@ -86,19 +83,17 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
     studio_cancel_parser.add_argument(
         "job_id",
         action="store",
-        help="The job ID to cancel.",
+        help="Job ID to cancel",
     )
     studio_cancel_parser.add_argument(
         "--team",
         action="store",
         default=None,
-        help="The team to cancel a job for. By default, it will use team from config.",
+        help="Team to cancel job for (default: from config)",
     )
-    studio_log_help = "Show the logs and latest status of Jobs in Studio"
-    studio_log_description = (
-        "This will display the logs and latest status of jobs in Studio"
-    )
+    studio_log_help = "Show job logs and status in Studio"
+    studio_log_description = "Display logs and current status of jobs in Studio."
     studio_log_parser = jobs_subparser.add_parser(
         "logs",
@@ -110,11 +105,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
     studio_log_parser.add_argument(
         "job_id",
         action="store",
-        help="The job ID to show the logs.",
+        help="Job ID to show logs for",
     )
     studio_log_parser.add_argument(
         "--team",
         action="store",
         default=None,
-        help="The team to check the logs. By default, it will use team from config.",
+        help="Team to check logs for (default: from config)",
     )

datachain 0.8.8__py3-none-any.whl → 0.8.10__py3-none-any.whl

Potentially problematic release.

datachain 0.8.8py3-none-any.whl → 0.8.10py3-none-any.whl