PyPI - calkit-python - Versions diffs - 0.0.9__tar.gz → 0.1.0__tar.gz - Mend

calkit-python 0.0.9tar.gz → 0.1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

{calkit_python-0.0.9 → calkit_python-0.1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: calkit-python
-Version: 0.0.9
+Version: 0.1.0
 Summary: Reproducibility simplified.
 Project-URL: Homepage, https://github.com/calkit/calkit
 Project-URL: Issues, https://github.com/calkit/calkit/issues
@@ -107,3 +107,34 @@ what files should be considered datasets, figures, publications, etc.
 The Calkit cloud reads this database and registers the various entities
 as part of the entire ecosystem such that if a project is made public,
 other researchers can find and reuse your work to accelerate their own.
+## Design/UX principles
+1. Be opinionated. Users should not be forced to make unimportant decisions.
+   However, if they disagree, they should have the ability to change the
+   default behavior. The most common use case should be default.
+   Commands that are commonly executed as groups should be combined, but
+   still available to be run individually if desired.
+1. Commits should ideally be made automatically as part of actions that make
+   changes to the project repo. For
+   example, if a new object is added via the CLI, a commit should be made
+   right then unless otherwise specified. This saves the trouble of running
+   multiple commands and encourages atomic commits.
+1. Pushes should require explicit input from the user.
+   It is still TBD whether or not a pull should automatically be
+   made, though in general we want to encourage trunk-based development, i.e.,
+   only working on a single branch. One exception might be for local
+   experimentation that has a high likelihood of failure, in which case a
+   branch can be a nice way to throw those changes away.
+   Multiple branches should probably not live in the cloud, however, except
+   for small, quickly merged pull requests.
+1. Idempotency is always a good thing. Unnecessary state is bad. For example,
+   we should not encourage caching pipeline outputs for operations that are
+   cheap. Caching should happen either for state that is valuable on its
+   own, like a figure, or for an intermediate result that is expensive to
+   generate.
+1. There should be the smallest number of
+   frequently used commands as possible, and they should require at little
+   memorization as possible to know how to execute, e.g., a user should be
+   able to keep running `calkit run` and that's all they really need to do
+   to make sure the project is up-to-date.

{calkit_python-0.0.9 → calkit_python-0.1.0}/README.md RENAMED Viewed

@@ -79,3 +79,34 @@ what files should be considered datasets, figures, publications, etc.
 The Calkit cloud reads this database and registers the various entities
 as part of the entire ecosystem such that if a project is made public,
 other researchers can find and reuse your work to accelerate their own.
+## Design/UX principles
+1. Be opinionated. Users should not be forced to make unimportant decisions.
+   However, if they disagree, they should have the ability to change the
+   default behavior. The most common use case should be default.
+   Commands that are commonly executed as groups should be combined, but
+   still available to be run individually if desired.
+1. Commits should ideally be made automatically as part of actions that make
+   changes to the project repo. For
+   example, if a new object is added via the CLI, a commit should be made
+   right then unless otherwise specified. This saves the trouble of running
+   multiple commands and encourages atomic commits.
+1. Pushes should require explicit input from the user.
+   It is still TBD whether or not a pull should automatically be
+   made, though in general we want to encourage trunk-based development, i.e.,
+   only working on a single branch. One exception might be for local
+   experimentation that has a high likelihood of failure, in which case a
+   branch can be a nice way to throw those changes away.
+   Multiple branches should probably not live in the cloud, however, except
+   for small, quickly merged pull requests.
+1. Idempotency is always a good thing. Unnecessary state is bad. For example,
+   we should not encourage caching pipeline outputs for operations that are
+   cheap. Caching should happen either for state that is valuable on its
+   own, like a figure, or for an intermediate result that is expensive to
+   generate.
+1. There should be the smallest number of
+   frequently used commands as possible, and they should require at little
+   memorization as possible to know how to execute, e.g., a user should be
+   able to keep running `calkit run` and that's all they really need to do
+   to make sure the project is up-to-date.

{calkit_python-0.0.9 → calkit_python-0.1.0}/calkit/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.0.9"
+__version__ = "0.1.0"
 from .core import *
 from . import git
@@ -6,3 +6,4 @@ from . import dvc
 from . import cloud
 from . import jupyter
 from . import config
+from . import models

{calkit_python-0.0.9 → calkit_python-0.1.0}/calkit/cli/core.py RENAMED Viewed

@@ -1,7 +1,6 @@
 """Core CLI functionality."""
 import os
-import pty
 import subprocess
 import typer
@@ -19,4 +18,6 @@ def run_cmd(cmd: list[str]):
     if os.name == "nt":
         subprocess.call(cmd)
     else:
+        import pty
         pty.spawn(cmd, lambda fd: os.read(fd, 1024))

calkit_python-0.1.0/calkit/cli/import_.py ADDED Viewed

@@ -0,0 +1,122 @@
+"""CLI for importing objects."""
+from __future__ import annotations
+import os
+import subprocess
+from typing import Annotated
+import git
+import typer
+import calkit
+import_app = typer.Typer(no_args_is_help=True)
+@import_app.command(name="dataset")
+def import_dataset(
+    src_path: Annotated[
+        str,
+        typer.Argument(
+            help=(
+                "Location of dataset, including project owner and name, e.g., "
+                "someone/some-project/data/some-data.csv"
+            )
+        ),
+    ],
+    dest_path: Annotated[
+        str,
+        typer.Argument(help="Output path at which to save."),
+    ] = None,
+    overwrite: Annotated[
+        bool,
+        typer.Option(
+            "--overwrite",
+            "-f",
+            help="Force adding the dataset even if it already exists.",
+        ),
+    ] = False,
+):
+    """Import a dataset.
+    Currently only supports datasets kept in DVC, not Git.
+    """
+    # Ensure we don't already have a dataset at this path
+    path_split = src_path.split("/")
+    owner_name = path_split[0]
+    project_name = path_split[1]
+    path = "/".join(path_split[2:])
+    if dest_path is None:
+        dest_path = path
+    ck_info = calkit.load_calkit_info()
+    datasets = ck_info.get("datasets", [])
+    ds_paths = [ds["path"] for ds in datasets]
+    if not overwrite and dest_path in ds_paths:
+        raise ValueError(
+            "A dataset already exists in this project at this path"
+        )
+    elif overwrite and dest_path in ds_paths:
+        datasets = [ds for ds in datasets if ds["path"] != dest_path]
+    repo = git.Repo()
+    # Obtain, save, and commit the .dvc file for the dataset
+    typer.echo("Fetching import info")
+    resp = calkit.cloud.get(
+        f"/projects/{owner_name}/{project_name}/datasets/{path}"
+    )
+    if not "dvc_import" in resp:
+        raise ValueError("This file is not available to import with DVC")
+    dvc_fpath = dest_path + ".dvc"
+    dvc_dir = os.path.dirname(dvc_fpath)
+    os.makedirs(dvc_dir, exist_ok=True)
+    # Update path in .dvc file if necessary
+    dvc_import = resp["dvc_import"]
+    dvc_import["outs"][0]["path"] = os.path.basename(dest_path)
+    typer.echo("Saving .dvc file")
+    with open(dvc_fpath, "w") as f:
+        calkit.ryaml.dump(dvc_import, f)
+    repo.git.add(dvc_fpath)
+    # Ensure we have a DVC remote corresponding to this project, and that we
+    # have a token set for that remote
+    typer.echo("Adding new DVC remote")
+    calkit.dvc.add_external_remote(
+        owner_name=owner_name, project_name=project_name
+    )
+    repo.git.add(".dvc/config")
+    # Add to .gitignore
+    typer.echo("Checking .gitignore")
+    if os.path.isfile(".gitignore"):
+        with open(".gitignore") as f:
+            gitignore = f.read()
+    else:
+        gitignore = ""
+    if dest_path not in gitignore.split("\n"):
+        typer.echo(f"Adding {dest_path} to .gitignore")
+        gitignore = gitignore.rstrip() + "\n" + dest_path + "\n"
+        with open(".gitignore", "w") as f:
+            f.write(gitignore)
+        repo.git.add(".gitignore")
+    # Add to datasets in calkit.yaml
+    typer.echo("Adding dataset to calkit.yaml")
+    new_ds = calkit.models.ImportedDataset(
+        path=dest_path,
+        title=resp.get("title"),
+        description=resp.get("description"),
+        stage=None,
+        imported_from=calkit.models._ImportedFromProject(
+            project=f"{owner_name}/{project_name}",
+            path=path,
+            git_rev=None,  # TODO?
+        ),
+    )
+    datasets.append(new_ds.model_dump())
+    ck_info["datasets"] = datasets
+    with open("calkit.yaml", "w") as f:
+        calkit.ryaml.dump(ck_info, f)
+    repo.git.add("calkit.yaml")
+    # Commit any necessary changes
+    typer.echo("Committing changes")
+    repo.git.commit(["-m", f"Import dataset {src_path}"])
+    # Run dvc pull
+    typer.echo("Running dvc pull")
+    subprocess.call(["dvc", "pull", dest_path])

{calkit_python-0.0.9 → calkit_python-0.1.0}/calkit/cli/list.py RENAMED Viewed

@@ -68,3 +68,12 @@ def list_publications():
 @list_app.command(name="references")
 def list_references():
     _list_objects("references")
+@list_app.command(name="environments")
+def list_environments():
+    envs = calkit.load_calkit_info().get("environments", {})
+    for name, env in envs.items():
+        typer.echo(name + ":")
+        for k, v in env.items():
+            typer.echo(f"    {k}: {v}")

{calkit_python-0.0.9 → calkit_python-0.1.0}/calkit/cli/main.py RENAMED Viewed

@@ -4,13 +4,16 @@ from __future__ import annotations
 import os
 import subprocess
+import sys
+import git
 import typer
 from typing_extensions import Annotated, Optional
 import calkit
 from calkit.cli import print_sep, run_cmd
 from calkit.cli.config import config_app
+from calkit.cli.import_ import import_app
 from calkit.cli.list import list_app
 from calkit.cli.new import new_app
 from calkit.cli.notebooks import notebooks_app
@@ -27,6 +30,7 @@ app.add_typer(
 )
 app.add_typer(notebooks_app, name="nb", help="Work with Jupyter notebooks.")
 app.add_typer(list_app, name="list", help="List Calkit objects.")
+app.add_typer(import_app, name="import", help="Import objects.")
 @app.callback()
@@ -37,7 +41,7 @@ def main(
     ] = False,
 ):
     if version:
-        typer.echo(calkit.__version__)
+        typer.echo(f"Calkit {calkit.__version__}")
         raise typer.Exit()
@@ -119,8 +123,16 @@ def add(
                 if os.path.isdir(path):
                     typer.echo("Cannot auto-add directories; use git or dvc")
                     raise typer.Exit(1)
+        repo = git.Repo()
         for path in paths:
             # Detect if this file should be tracked with Git or DVC
+            # First see if it's in Git
+            if repo.git.ls_files(path):
+                typer.echo(
+                    f"Adding {path} to Git since it's already in the repo"
+                )
+                subprocess.call(["git", "add", path])
+                continue
             if os.path.splitext(path)[-1] in dvc_extensions:
                 typer.echo(f"Adding {path} to DVC per its extension")
                 subprocess.call(["dvc", "add", path])
@@ -168,6 +180,50 @@ def commit(
         push()
+@app.command(name="save")
+def save(
+    paths: Annotated[
+        Optional[list[str]],
+        typer.Argument(
+            help=(
+                "Paths to add and commit. If not provided, will default to "
+                "any changed files that have been added previously."
+            ),
+        ),
+    ] = None,
+    all: Annotated[
+        Optional[bool],
+        typer.Option(
+            "--all", "-a", help="Automatically stage all changed files."
+        ),
+    ] = False,
+    message: Annotated[
+        Optional[str], typer.Option("--message", "-m", help="Commit message.")
+    ] = None,
+    to: Annotated[
+        str,
+        typer.Option(
+            "--to", "-t", help="System with which to add (git or dvc)."
+        ),
+    ] = None,
+    no_push: Annotated[
+        bool,
+        typer.Option(
+            "--no-push", help="Do not push to Git and DVC after committing."
+        ),
+    ] = False,
+):
+    """Save paths by committing and pushing.
+    This is essentially git/dvc add, commit, and push in one step.
+    """
+    if paths is not None:
+        add(paths, to=to)
+    commit(all=True if paths is None else False, message=message)
+    if not no_push:
+        push()
 @app.command(name="pull", help="Pull with both Git and DVC.")
 def pull():
     typer.echo("Git pulling")
@@ -354,3 +410,78 @@ def manual_step(
         )
     input(message + " (press enter to confirm): ")
     typer.echo("Done")
+@app.command(
+    name="run-env",
+    help="Run a command in an environment.",
+    context_settings={"ignore_unknown_options": True},
+)
+def run_in_env(
+    cmd: Annotated[
+        list[str], typer.Argument(help="Command to run in the environment.")
+    ],
+    env_name: Annotated[
+        str,
+        typer.Option(
+            "--name",
+            "-n",
+            help=(
+                "Environment name in which to run. "
+                "Only necessary if there are multiple in this project."
+            ),
+        ),
+    ] = None,
+    verbose: Annotated[
+        bool, typer.Option("--verbose", "-v", help="Print verbose output.")
+    ] = False,
+):
+    ck_info = calkit.load_calkit_info()
+    envs = ck_info.get("environments", {})
+    if not envs:
+        typer.echo("No environments defined in calkit.yaml", err=True)
+        raise typer.Exit(1)
+    if isinstance(envs, list):
+        typer.echo(
+            "Error: Environments should be a dict, not a list", err=True
+        )
+        raise typer.Exit(1)
+    if len(envs) > 1 and env_name is None:
+        typer.echo(
+            "Environment must be specified if there are multiple",
+            err=True,
+        )
+        raise typer.Exit(1)
+    if env_name is None:
+        env_name = list(envs.keys())[0]
+    env = envs[env_name]
+    cwd = os.getcwd()
+    image_name = env.get("image", env_name)
+    wdir = env.get("wdir", "/work")
+    if env["kind"] == "docker":
+        cmd = " ".join(cmd)
+        cmd = [
+            "docker",
+            "run",
+            "-it" if sys.stdin.isatty() else "-i",
+            "--rm",
+            "-w",
+            wdir,
+            "-v",
+            f"{cwd}:{wdir}",
+            image_name,
+            "bash",
+            "-c",
+            f"{cmd}",
+        ]
+        if verbose:
+            typer.echo(f"Running command: {cmd}")
+        subprocess.call(cmd)
+    elif env["kind"] == "conda":
+        cmd = ["conda", "run", "-n", env_name] + cmd
+        if verbose:
+            typer.echo(f"Running command: {cmd}")
+        subprocess.call(cmd)
+    else:
+        typer.echo("Environment kind not supported", err=True)
+        raise typer.Exit(1)

calkit-python 0.0.9__tar.gz → 0.1.0__tar.gz

calkit-python 0.0.9tar.gz → 0.1.0tar.gz