calkit-python 0.3.3__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {calkit_python-0.3.3 → calkit_python-0.5.0}/PKG-INFO +13 -8
- {calkit_python-0.3.3 → calkit_python-0.5.0}/README.md +12 -7
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/__init__.py +2 -1
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/core.py +1 -1
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/list.py +14 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/main.py +175 -12
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/new.py +187 -0
- calkit_python-0.5.0/calkit/core.py +98 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/models.py +57 -0
- calkit_python-0.5.0/calkit/templates/__init__.py +1 -0
- calkit_python-0.5.0/calkit/templates/core.py +118 -0
- calkit_python-0.5.0/calkit/templates/latex/__init__.py +1 -0
- calkit_python-0.5.0/calkit/templates/latex/article/paper.tex +43 -0
- calkit_python-0.5.0/calkit/templates/latex/core.py +11 -0
- calkit_python-0.5.0/calkit/templates/latex/jfm/jfm.bst +1659 -0
- calkit_python-0.5.0/calkit/templates/latex/jfm/jfm.cls +1518 -0
- calkit_python-0.5.0/calkit/templates/latex/jfm/lineno-FLM.sty +113 -0
- calkit_python-0.5.0/calkit/templates/latex/jfm/paper.tex +468 -0
- calkit_python-0.5.0/calkit/templates/latex/jfm/upmath.sty +158 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/cli/test_list.py +4 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/cli/test_new.py +39 -0
- calkit_python-0.5.0/calkit/tests/test_templates.py +14 -0
- calkit_python-0.5.0/docs/tutorials/adding-latex-pub-docker.md +47 -0
- calkit_python-0.5.0/docs/tutorials/img/run-proc.png +0 -0
- calkit_python-0.5.0/docs/tutorials/procedures.md +120 -0
- calkit_python-0.3.3/calkit/core.py +0 -56
- {calkit_python-0.3.3 → calkit_python-0.5.0}/.github/FUNDING.yml +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/.github/workflows/publish-test.yml +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/.github/workflows/publish.yml +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/.gitignore +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/LICENSE +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/__init__.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/config.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/import_.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/notebooks.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/office.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cloud.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/config.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/data.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/docker.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/dvc.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/git.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/gui.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/jupyter.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/office.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/server.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/__init__.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/cli/__init__.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/cli/test_main.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/test_core.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/test_dvc.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/test_jupyter.py +0 -0
- {calkit_python-0.3.3 → calkit_python-0.5.0}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: calkit-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Reproducibility simplified.
|
|
5
5
|
Project-URL: Homepage, https://github.com/calkit/calkit
|
|
6
6
|
Project-URL: Issues, https://github.com/calkit/calkit/issues
|
|
@@ -31,21 +31,25 @@ Description-Content-Type: text/markdown
|
|
|
31
31
|
|
|
32
32
|
# Calkit
|
|
33
33
|
|
|
34
|
-
[Calkit](https://calkit.io)
|
|
34
|
+
[Calkit](https://calkit.io) helps simplify reproducibility,
|
|
35
35
|
acting as a layer on top of
|
|
36
36
|
[Git](https://git-scm.com/), [DVC](https://dvc.org/),
|
|
37
|
-
[Docker](https://docker.com),
|
|
38
|
-
|
|
39
|
-
|
|
37
|
+
[Docker](https://docker.com),
|
|
38
|
+
and adds a domain-specific data model
|
|
39
|
+
such that all aspects of the research process can be fully described in a
|
|
40
|
+
single repository and therefore easily consumed by others.
|
|
40
41
|
|
|
41
42
|
## Tutorials
|
|
42
43
|
|
|
44
|
+
- [Defining and executing manual procedures](docs/tutorials/procedures.md)
|
|
45
|
+
- [Adding a new LaTeX-based publication with its own Docker build environment](docs/tutorials/adding-latex-pub-docker.md)
|
|
46
|
+
- [A reproducibly workflow using Microsoft Office (Word and Excel)](https://petebachant.me/office-repro/)
|
|
43
47
|
- [Reproducible OpenFOAM simulations](https://petebachant.me/reproducible-openfoam/)
|
|
44
48
|
|
|
45
49
|
## Why does reproducibility matter?
|
|
46
50
|
|
|
47
51
|
If your work is reproducible, that means that someone else can "run" it and
|
|
48
|
-
|
|
52
|
+
calculate the same results or outputs.
|
|
49
53
|
This is a major step towards addressing
|
|
50
54
|
[the replication crisis](https://en.wikipedia.org/wiki/Replication_crisis)
|
|
51
55
|
and has some major benefits for both you as an individual and the research
|
|
@@ -63,7 +67,8 @@ community:
|
|
|
63
67
|
## Why another tool/platform?
|
|
64
68
|
|
|
65
69
|
Git, GitHub, DVC, Docker et al. are amazing tools/platforms, but their
|
|
66
|
-
use involves multiple fairly difficult learning curves
|
|
70
|
+
use involves multiple fairly difficult learning curves,
|
|
71
|
+
and tying them together might mean developing something new for each project.
|
|
67
72
|
Our goal is to provide a single tool and platform to unify all of these so
|
|
68
73
|
that there is a single, gentle learning curve.
|
|
69
74
|
However, it is not our goal to hide or replace these underlying components.
|
|
@@ -152,7 +157,7 @@ other researchers can find and reuse your work to accelerate their own.
|
|
|
152
157
|
own, like a figure, or for an intermediate result that is expensive to
|
|
153
158
|
generate.
|
|
154
159
|
1. There should be the smallest number of
|
|
155
|
-
frequently used commands as possible, and they should require
|
|
160
|
+
frequently used commands as possible, and they should require as little
|
|
156
161
|
memorization as possible to know how to execute, e.g., a user should be
|
|
157
162
|
able to keep running `calkit run` and that's all they really need to do
|
|
158
163
|
to make sure the project is up-to-date.
|
|
@@ -1,20 +1,24 @@
|
|
|
1
1
|
# Calkit
|
|
2
2
|
|
|
3
|
-
[Calkit](https://calkit.io)
|
|
3
|
+
[Calkit](https://calkit.io) helps simplify reproducibility,
|
|
4
4
|
acting as a layer on top of
|
|
5
5
|
[Git](https://git-scm.com/), [DVC](https://dvc.org/),
|
|
6
|
-
[Docker](https://docker.com),
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
[Docker](https://docker.com),
|
|
7
|
+
and adds a domain-specific data model
|
|
8
|
+
such that all aspects of the research process can be fully described in a
|
|
9
|
+
single repository and therefore easily consumed by others.
|
|
9
10
|
|
|
10
11
|
## Tutorials
|
|
11
12
|
|
|
13
|
+
- [Defining and executing manual procedures](docs/tutorials/procedures.md)
|
|
14
|
+
- [Adding a new LaTeX-based publication with its own Docker build environment](docs/tutorials/adding-latex-pub-docker.md)
|
|
15
|
+
- [A reproducibly workflow using Microsoft Office (Word and Excel)](https://petebachant.me/office-repro/)
|
|
12
16
|
- [Reproducible OpenFOAM simulations](https://petebachant.me/reproducible-openfoam/)
|
|
13
17
|
|
|
14
18
|
## Why does reproducibility matter?
|
|
15
19
|
|
|
16
20
|
If your work is reproducible, that means that someone else can "run" it and
|
|
17
|
-
|
|
21
|
+
calculate the same results or outputs.
|
|
18
22
|
This is a major step towards addressing
|
|
19
23
|
[the replication crisis](https://en.wikipedia.org/wiki/Replication_crisis)
|
|
20
24
|
and has some major benefits for both you as an individual and the research
|
|
@@ -32,7 +36,8 @@ community:
|
|
|
32
36
|
## Why another tool/platform?
|
|
33
37
|
|
|
34
38
|
Git, GitHub, DVC, Docker et al. are amazing tools/platforms, but their
|
|
35
|
-
use involves multiple fairly difficult learning curves
|
|
39
|
+
use involves multiple fairly difficult learning curves,
|
|
40
|
+
and tying them together might mean developing something new for each project.
|
|
36
41
|
Our goal is to provide a single tool and platform to unify all of these so
|
|
37
42
|
that there is a single, gentle learning curve.
|
|
38
43
|
However, it is not our goal to hide or replace these underlying components.
|
|
@@ -121,7 +126,7 @@ other researchers can find and reuse your work to accelerate their own.
|
|
|
121
126
|
own, like a figure, or for an intermediate result that is expensive to
|
|
122
127
|
generate.
|
|
123
128
|
1. There should be the smallest number of
|
|
124
|
-
frequently used commands as possible, and they should require
|
|
129
|
+
frequently used commands as possible, and they should require as little
|
|
125
130
|
memorization as possible to know how to execute, e.g., a user should be
|
|
126
131
|
able to keep running `calkit run` and that's all they really need to do
|
|
127
132
|
to make sure the project is up-to-date.
|
|
@@ -77,3 +77,17 @@ def list_environments():
|
|
|
77
77
|
typer.echo(name + ":")
|
|
78
78
|
for k, v in env.items():
|
|
79
79
|
typer.echo(f" {k}: {v}")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@list_app.command(name="templates")
|
|
83
|
+
def list_templates():
|
|
84
|
+
for kind, tpl_dict in calkit.templates.TEMPLATES.items():
|
|
85
|
+
for name in tpl_dict:
|
|
86
|
+
typer.echo(f"{kind}/{name}")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@list_app.command(name="procedures")
|
|
90
|
+
def list_procedures():
|
|
91
|
+
ck_info = calkit.load_calkit_info()
|
|
92
|
+
for p in ck_info.get("procedures", {}):
|
|
93
|
+
typer.echo(p)
|
|
@@ -2,11 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import csv
|
|
5
6
|
import hashlib
|
|
6
7
|
import json
|
|
7
8
|
import os
|
|
8
9
|
import subprocess
|
|
9
10
|
import sys
|
|
11
|
+
import time
|
|
12
|
+
from datetime import UTC, datetime, timedelta
|
|
10
13
|
|
|
11
14
|
import git
|
|
12
15
|
import typer
|
|
@@ -20,6 +23,7 @@ from calkit.cli.list import list_app
|
|
|
20
23
|
from calkit.cli.new import new_app
|
|
21
24
|
from calkit.cli.notebooks import notebooks_app
|
|
22
25
|
from calkit.cli.office import office_app
|
|
26
|
+
from calkit.models import Procedure
|
|
23
27
|
|
|
24
28
|
app = typer.Typer(
|
|
25
29
|
invoke_without_command=True,
|
|
@@ -481,11 +485,21 @@ def run_in_env(
|
|
|
481
485
|
),
|
|
482
486
|
),
|
|
483
487
|
] = None,
|
|
488
|
+
wdir: Annotated[
|
|
489
|
+
str,
|
|
490
|
+
typer.Option(
|
|
491
|
+
"--wdir",
|
|
492
|
+
help=(
|
|
493
|
+
"Working directory. "
|
|
494
|
+
"By default will run current working directory."
|
|
495
|
+
),
|
|
496
|
+
),
|
|
497
|
+
] = None,
|
|
484
498
|
verbose: Annotated[
|
|
485
499
|
bool, typer.Option("--verbose", "-v", help="Print verbose output.")
|
|
486
500
|
] = False,
|
|
487
501
|
):
|
|
488
|
-
ck_info = calkit.load_calkit_info()
|
|
502
|
+
ck_info = calkit.load_calkit_info(process_includes="environments")
|
|
489
503
|
envs = ck_info.get("environments", {})
|
|
490
504
|
if not envs:
|
|
491
505
|
raise_error("No environments defined in calkit.yaml")
|
|
@@ -507,33 +521,42 @@ def run_in_env(
|
|
|
507
521
|
if env_name is None:
|
|
508
522
|
raise_error("Environment must be specified if there are multiple")
|
|
509
523
|
env = envs[env_name]
|
|
510
|
-
|
|
524
|
+
if wdir is not None:
|
|
525
|
+
cwd = os.path.abspath(wdir)
|
|
526
|
+
else:
|
|
527
|
+
cwd = os.getcwd()
|
|
511
528
|
image_name = env.get("image", env_name)
|
|
512
|
-
|
|
529
|
+
docker_wdir = env.get("wdir", "/work")
|
|
530
|
+
shell = env.get("shell", "sh")
|
|
531
|
+
platform = env.get("platform")
|
|
513
532
|
if env["kind"] == "docker":
|
|
514
|
-
|
|
515
|
-
|
|
533
|
+
shell_cmd = " ".join(cmd)
|
|
534
|
+
docker_cmd = [
|
|
516
535
|
"docker",
|
|
517
536
|
"run",
|
|
537
|
+
]
|
|
538
|
+
if platform:
|
|
539
|
+
docker_cmd += ["--platform", platform]
|
|
540
|
+
docker_cmd += [
|
|
518
541
|
"-it" if sys.stdin.isatty() else "-i",
|
|
519
542
|
"--rm",
|
|
520
543
|
"-w",
|
|
521
|
-
|
|
544
|
+
docker_wdir,
|
|
522
545
|
"-v",
|
|
523
|
-
f"{cwd}:{
|
|
546
|
+
f"{cwd}:{docker_wdir}",
|
|
524
547
|
image_name,
|
|
525
|
-
|
|
548
|
+
shell,
|
|
526
549
|
"-c",
|
|
527
|
-
f"{
|
|
550
|
+
f"{shell_cmd}",
|
|
528
551
|
]
|
|
529
552
|
if verbose:
|
|
530
|
-
typer.echo(f"Running command: {
|
|
531
|
-
subprocess.call(
|
|
553
|
+
typer.echo(f"Running command: {docker_cmd}")
|
|
554
|
+
subprocess.call(docker_cmd, cwd=wdir)
|
|
532
555
|
elif env["kind"] == "conda":
|
|
533
556
|
cmd = ["conda", "run", "-n", env_name] + cmd
|
|
534
557
|
if verbose:
|
|
535
558
|
typer.echo(f"Running command: {cmd}")
|
|
536
|
-
subprocess.call(cmd)
|
|
559
|
+
subprocess.call(cmd, cwd=wdir)
|
|
537
560
|
else:
|
|
538
561
|
raise_error("Environment kind not supported")
|
|
539
562
|
|
|
@@ -621,3 +644,143 @@ def build_docker(
|
|
|
621
644
|
inspect[0]["DockerfileMD5"] = dockerfile_md5
|
|
622
645
|
with open(lock_fpath, "w") as f:
|
|
623
646
|
json.dump(inspect, f, indent=4)
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
@app.command(name="runproc", help="Run or execute a procedure.")
|
|
650
|
+
def run_procedure(
|
|
651
|
+
name: Annotated[str, typer.Argument(help="The name of the procedure.")],
|
|
652
|
+
no_commit: Annotated[
|
|
653
|
+
bool,
|
|
654
|
+
typer.Option("--no-commit", help="Do not commit after each action."),
|
|
655
|
+
] = False,
|
|
656
|
+
):
|
|
657
|
+
def wait(seconds):
|
|
658
|
+
typer.echo(f"Wait {seconds} seconds")
|
|
659
|
+
dt = 0.1
|
|
660
|
+
while seconds >= 0:
|
|
661
|
+
mins, secs = divmod(seconds, 60)
|
|
662
|
+
mins, secs = int(mins), int(secs)
|
|
663
|
+
out = f"Time left: {mins:02d}:{secs:02d}\r"
|
|
664
|
+
typer.echo(out, nl=False)
|
|
665
|
+
time.sleep(dt)
|
|
666
|
+
seconds -= dt
|
|
667
|
+
typer.echo()
|
|
668
|
+
|
|
669
|
+
def convert_value(value, dtype):
|
|
670
|
+
if dtype == "int":
|
|
671
|
+
return int(value)
|
|
672
|
+
elif dtype == "float":
|
|
673
|
+
return float(value)
|
|
674
|
+
elif dtype == "str":
|
|
675
|
+
return str(value)
|
|
676
|
+
elif dtype == "bool":
|
|
677
|
+
return bool(value)
|
|
678
|
+
return value
|
|
679
|
+
|
|
680
|
+
ck_info = calkit.load_calkit_info(process_includes="procedures")
|
|
681
|
+
procs = ck_info.get("procedures", {})
|
|
682
|
+
if name not in procs:
|
|
683
|
+
raise_error(f"'{name}' is not defined as a procedure")
|
|
684
|
+
try:
|
|
685
|
+
proc = Procedure.model_validate(procs[name])
|
|
686
|
+
except Exception as e:
|
|
687
|
+
raise_error(f"Procedure '{name}' is invalid: {e}")
|
|
688
|
+
git_repo = git.Repo()
|
|
689
|
+
# Check to make sure the working tree is clean, so we know we ran the
|
|
690
|
+
# committed version of the procedure
|
|
691
|
+
git_status = git_repo.git.status()
|
|
692
|
+
if not "working tree clean" in git_status:
|
|
693
|
+
raise_error(
|
|
694
|
+
f"Cannot execute procedures unless repo is clean:\n\n{git_status}"
|
|
695
|
+
)
|
|
696
|
+
t_start_overall = calkit.utcnow()
|
|
697
|
+
# Formulate headers for CSV file, which must contain all inputs from all
|
|
698
|
+
# steps
|
|
699
|
+
headers = [
|
|
700
|
+
"calkit_version",
|
|
701
|
+
"procedure_name",
|
|
702
|
+
"step",
|
|
703
|
+
"start",
|
|
704
|
+
"end",
|
|
705
|
+
]
|
|
706
|
+
for step in proc.steps:
|
|
707
|
+
if step.inputs:
|
|
708
|
+
for iname in step.inputs:
|
|
709
|
+
if iname not in headers:
|
|
710
|
+
headers.append(iname)
|
|
711
|
+
# TODO: Add ability to process periodic logic
|
|
712
|
+
# See if now falls between start and end, and if there is a run with a
|
|
713
|
+
# timestamp corresponding to the period in which now falls
|
|
714
|
+
# If so, exit
|
|
715
|
+
# If not, continue
|
|
716
|
+
# Create empty CSV if one doesn't exist
|
|
717
|
+
t_start_overall_str = t_start_overall.isoformat(timespec="seconds")
|
|
718
|
+
fpath = f".calkit/procedure-runs/{name}/{t_start_overall_str}.csv"
|
|
719
|
+
dirname = os.path.dirname(fpath)
|
|
720
|
+
if not os.path.isdir(dirname):
|
|
721
|
+
os.makedirs(dirname)
|
|
722
|
+
if not os.path.isfile(fpath):
|
|
723
|
+
with open(fpath, "w") as f:
|
|
724
|
+
csv.writer(f).writerow(headers)
|
|
725
|
+
for n, step in enumerate(proc.steps):
|
|
726
|
+
typer.echo(f"Starting step {n}")
|
|
727
|
+
t_start = calkit.utcnow()
|
|
728
|
+
if step.wait_before_s:
|
|
729
|
+
wait(step.wait_before_s)
|
|
730
|
+
# Execute the step
|
|
731
|
+
inputs = step.inputs
|
|
732
|
+
input_vals = {}
|
|
733
|
+
if not inputs:
|
|
734
|
+
input(f"{step.summary} and press enter when complete: ")
|
|
735
|
+
else:
|
|
736
|
+
typer.echo(step.summary)
|
|
737
|
+
for input_name, i in inputs.items():
|
|
738
|
+
msg = f"Enter {input_name}"
|
|
739
|
+
if i.units:
|
|
740
|
+
msg += f" ({i.units})"
|
|
741
|
+
msg += " and press enter: "
|
|
742
|
+
success = False
|
|
743
|
+
while not success:
|
|
744
|
+
val = input(msg)
|
|
745
|
+
if i.dtype:
|
|
746
|
+
try:
|
|
747
|
+
val = convert_value(val, i.dtype)
|
|
748
|
+
success = True
|
|
749
|
+
except ValueError:
|
|
750
|
+
typer.echo(
|
|
751
|
+
typer.style(
|
|
752
|
+
f"Invalid {i.dtype} value", fg="red"
|
|
753
|
+
)
|
|
754
|
+
)
|
|
755
|
+
else:
|
|
756
|
+
success = True
|
|
757
|
+
input_vals[input_name] = val
|
|
758
|
+
t_end = calkit.utcnow()
|
|
759
|
+
# Log step completion
|
|
760
|
+
row = (
|
|
761
|
+
dict(
|
|
762
|
+
procedure_name=name,
|
|
763
|
+
step=n,
|
|
764
|
+
calkit_version=calkit.__version__,
|
|
765
|
+
start=t_start.isoformat(),
|
|
766
|
+
end=t_end.isoformat(),
|
|
767
|
+
)
|
|
768
|
+
| input_vals
|
|
769
|
+
)
|
|
770
|
+
row = {k: row.get(k, "") for k in headers}
|
|
771
|
+
# Log this row to CSV
|
|
772
|
+
with open(fpath, "a") as f:
|
|
773
|
+
csv.writer(f).writerow(row.values())
|
|
774
|
+
typer.echo(f"Logged step {n} to {fpath}")
|
|
775
|
+
if not no_commit:
|
|
776
|
+
typer.echo("Committing to Git repo")
|
|
777
|
+
git_repo.git.reset()
|
|
778
|
+
git_repo.git.add(fpath)
|
|
779
|
+
git_repo.git.commit(
|
|
780
|
+
[
|
|
781
|
+
"-m",
|
|
782
|
+
f"Execute procedure {name} step {n}",
|
|
783
|
+
]
|
|
784
|
+
)
|
|
785
|
+
if step.wait_after_s:
|
|
786
|
+
wait(step.wait_after_s)
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
5
|
import os
|
|
6
|
+
import shutil
|
|
6
7
|
import subprocess
|
|
7
8
|
|
|
8
9
|
import git
|
|
@@ -472,3 +473,189 @@ def new_dataset(
|
|
|
472
473
|
repo.git.add("dvc.yaml")
|
|
473
474
|
if repo.git.diff("--staged"):
|
|
474
475
|
repo.git.commit(["-m", f"Add dataset {path}"])
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
@new_app.command(name="publication", help="Create a new publication.")
|
|
479
|
+
def new_publication(
|
|
480
|
+
path: Annotated[
|
|
481
|
+
str,
|
|
482
|
+
typer.Argument(
|
|
483
|
+
help=(
|
|
484
|
+
"Path for the publication. "
|
|
485
|
+
"If using a template, this could be a directory."
|
|
486
|
+
)
|
|
487
|
+
),
|
|
488
|
+
],
|
|
489
|
+
title: Annotated[
|
|
490
|
+
str, typer.Option("--title", help="The title of the publication.")
|
|
491
|
+
],
|
|
492
|
+
description: Annotated[
|
|
493
|
+
str,
|
|
494
|
+
typer.Option(
|
|
495
|
+
"--description", help="A description of the publication."
|
|
496
|
+
),
|
|
497
|
+
],
|
|
498
|
+
kind: Annotated[
|
|
499
|
+
str,
|
|
500
|
+
typer.Option(
|
|
501
|
+
"--kind", help="Kind of the publication, e.g., 'journal-article'."
|
|
502
|
+
),
|
|
503
|
+
],
|
|
504
|
+
stage_name: Annotated[
|
|
505
|
+
str,
|
|
506
|
+
typer.Option(
|
|
507
|
+
"--stage",
|
|
508
|
+
help="Name of the pipeline stage to build the output file.",
|
|
509
|
+
),
|
|
510
|
+
] = None,
|
|
511
|
+
deps: Annotated[
|
|
512
|
+
list[str], typer.Option("--dep", help="Path to stage dependency.")
|
|
513
|
+
] = [],
|
|
514
|
+
outs_from_stage: Annotated[
|
|
515
|
+
str,
|
|
516
|
+
typer.Option(
|
|
517
|
+
"--deps-from-stage-outs",
|
|
518
|
+
help="Stage name from which to add outputs as dependencies.",
|
|
519
|
+
),
|
|
520
|
+
] = None,
|
|
521
|
+
template: Annotated[
|
|
522
|
+
str,
|
|
523
|
+
typer.Option(
|
|
524
|
+
"--template",
|
|
525
|
+
help=(
|
|
526
|
+
"Template with which to create the source files. "
|
|
527
|
+
"Should be in the format {type}/{name}."
|
|
528
|
+
),
|
|
529
|
+
),
|
|
530
|
+
] = None,
|
|
531
|
+
env_name: Annotated[
|
|
532
|
+
str,
|
|
533
|
+
typer.Option(
|
|
534
|
+
"--environment",
|
|
535
|
+
help="Name of the build environment to create, if desired.",
|
|
536
|
+
),
|
|
537
|
+
] = None,
|
|
538
|
+
no_commit: Annotated[
|
|
539
|
+
bool,
|
|
540
|
+
typer.Option(
|
|
541
|
+
"--no-commit", help="Do not commit resulting changes to the repo."
|
|
542
|
+
),
|
|
543
|
+
] = False,
|
|
544
|
+
overwrite: Annotated[
|
|
545
|
+
bool,
|
|
546
|
+
typer.Option(
|
|
547
|
+
"--overwrite",
|
|
548
|
+
"-f",
|
|
549
|
+
help="Overwrite existing objects if they already exist.",
|
|
550
|
+
),
|
|
551
|
+
] = False,
|
|
552
|
+
):
|
|
553
|
+
ck_info = calkit.load_calkit_info(process_includes=False)
|
|
554
|
+
pubs = ck_info.get("publications", [])
|
|
555
|
+
envs = ck_info.get("environments", {})
|
|
556
|
+
pub_paths = [p.get("path") for p in pubs]
|
|
557
|
+
if template is not None:
|
|
558
|
+
template_type, _ = template.split("/")
|
|
559
|
+
else:
|
|
560
|
+
template_type = None
|
|
561
|
+
# Check all of our inputs
|
|
562
|
+
if template_type not in ["latex"]:
|
|
563
|
+
raise_error(f"Unknown template type '{template_type}'")
|
|
564
|
+
if env_name is not None and template_type != "latex":
|
|
565
|
+
raise_error("Environments can only be created for latex templates")
|
|
566
|
+
if env_name is not None and env_name in envs and not overwrite:
|
|
567
|
+
raise_error(f"Environment '{env_name}' already exists")
|
|
568
|
+
if template_type is not None:
|
|
569
|
+
try:
|
|
570
|
+
template_obj = calkit.templates.get_template(template)
|
|
571
|
+
except ValueError:
|
|
572
|
+
raise_error(f"Template '{template}' does not exist")
|
|
573
|
+
# Parse outs from stage if specified
|
|
574
|
+
if outs_from_stage:
|
|
575
|
+
pipeline = calkit.dvc.read_pipeline()
|
|
576
|
+
stages = pipeline.get("stages", {})
|
|
577
|
+
if outs_from_stage not in stages:
|
|
578
|
+
raise_error(f"Stage {outs_from_stage} does not exist")
|
|
579
|
+
stage = stages[outs_from_stage]
|
|
580
|
+
if "foreach" in stage:
|
|
581
|
+
for val in stage["foreach"]:
|
|
582
|
+
for out in stage.get("do", {}).get("outs", []):
|
|
583
|
+
deps.append(out.replace("${item}", val))
|
|
584
|
+
else:
|
|
585
|
+
deps += stage.get("outs", [])
|
|
586
|
+
# Create publication object
|
|
587
|
+
if template_type == "latex":
|
|
588
|
+
pub_fpath = os.path.join(
|
|
589
|
+
path, template_obj.target.removesuffix(".tex") + ".pdf"
|
|
590
|
+
)
|
|
591
|
+
else:
|
|
592
|
+
pub_fpath = path
|
|
593
|
+
if not overwrite and pub_fpath in pub_paths:
|
|
594
|
+
raise_error(f"Publication with path {pub_fpath} already exists")
|
|
595
|
+
elif overwrite and pub_fpath in pub_paths:
|
|
596
|
+
pubs = [p for p in pubs if p.get("path") != pub_fpath]
|
|
597
|
+
pub = dict(
|
|
598
|
+
path=pub_fpath,
|
|
599
|
+
kind=kind,
|
|
600
|
+
title=title,
|
|
601
|
+
description=description,
|
|
602
|
+
stage=stage_name,
|
|
603
|
+
)
|
|
604
|
+
pubs.append(pub)
|
|
605
|
+
ck_info["publications"] = pubs
|
|
606
|
+
repo = git.Repo()
|
|
607
|
+
# Create environment if applicable
|
|
608
|
+
if env_name is not None and template_type == "latex":
|
|
609
|
+
env_path = f".calkit/environments/{env_name}.yaml"
|
|
610
|
+
os.makedirs(".calkit/environments", exist_ok=True)
|
|
611
|
+
env = {"_include": env_path}
|
|
612
|
+
envs[env_name] = env
|
|
613
|
+
env_remote = dict(
|
|
614
|
+
kind="docker",
|
|
615
|
+
image="kjarosh/latex:2024.4",
|
|
616
|
+
description="TeXlive full from kjarosh.",
|
|
617
|
+
platform="linux/amd64",
|
|
618
|
+
)
|
|
619
|
+
with open(env_path, "w") as f:
|
|
620
|
+
calkit.ryaml.dump(env_remote, f)
|
|
621
|
+
ck_info["environments"] = envs
|
|
622
|
+
repo.git.add(env_path)
|
|
623
|
+
with open("calkit.yaml", "w") as f:
|
|
624
|
+
calkit.ryaml.dump(ck_info, f)
|
|
625
|
+
repo.git.add("calkit.yaml")
|
|
626
|
+
# Copy in template files if applicable
|
|
627
|
+
if template_type == "latex":
|
|
628
|
+
if overwrite and os.path.exists(path):
|
|
629
|
+
shutil.rmtree(path)
|
|
630
|
+
calkit.templates.use_template(
|
|
631
|
+
name=template, dest_dir=path, title=title
|
|
632
|
+
)
|
|
633
|
+
repo.git.add(path)
|
|
634
|
+
# Create stage if applicable
|
|
635
|
+
if stage_name is not None and template_type == "latex":
|
|
636
|
+
cmd = f"cd {path} && latexmk -pdf {template_obj.target}"
|
|
637
|
+
if env_name is not None:
|
|
638
|
+
cmd = f'calkit runenv -n {env_name} "{cmd}"'
|
|
639
|
+
target_dep = os.path.join(path, template_obj.target)
|
|
640
|
+
dvc_cmd = [
|
|
641
|
+
"dvc",
|
|
642
|
+
"stage",
|
|
643
|
+
"add",
|
|
644
|
+
"-n",
|
|
645
|
+
stage_name,
|
|
646
|
+
"-o",
|
|
647
|
+
pub_fpath,
|
|
648
|
+
"-d",
|
|
649
|
+
target_dep,
|
|
650
|
+
]
|
|
651
|
+
if env_name is not None:
|
|
652
|
+
dvc_cmd += ["-d", env_path]
|
|
653
|
+
for dep in deps:
|
|
654
|
+
dvc_cmd += ["-d", dep]
|
|
655
|
+
if overwrite:
|
|
656
|
+
dvc_cmd.append("-f")
|
|
657
|
+
dvc_cmd.append(cmd)
|
|
658
|
+
subprocess.check_call(dvc_cmd)
|
|
659
|
+
repo.git.add("dvc.yaml")
|
|
660
|
+
if not no_commit and repo.git.diff("--staged"):
|
|
661
|
+
repo.git.commit(["-m", f"Add new publication {pub_fpath}"])
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Core functionality."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import glob
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
from datetime import UTC, datetime
|
|
9
|
+
|
|
10
|
+
import ruamel.yaml
|
|
11
|
+
from git import Repo
|
|
12
|
+
from git.exc import InvalidGitRepositoryError
|
|
13
|
+
|
|
14
|
+
logging.basicConfig(level=logging.INFO)
|
|
15
|
+
logger = logging.getLogger(__package__)
|
|
16
|
+
|
|
17
|
+
ryaml = ruamel.yaml.YAML()
|
|
18
|
+
ryaml.indent(mapping=2, sequence=4, offset=2)
|
|
19
|
+
ryaml.preserve_quotes = True
|
|
20
|
+
ryaml.width = 70
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def find_project_dirs(relative=False, max_depth=3) -> list[str]:
|
|
24
|
+
"""Find all Calkit project directories."""
|
|
25
|
+
if relative:
|
|
26
|
+
start = ""
|
|
27
|
+
else:
|
|
28
|
+
start = os.path.expanduser("~")
|
|
29
|
+
res = []
|
|
30
|
+
for i in range(max_depth):
|
|
31
|
+
pattern = os.path.join(start, *["*"] * (i + 1), "calkit.yaml")
|
|
32
|
+
res += glob.glob(pattern)
|
|
33
|
+
# Check GitHub documents for users who use GitHub Desktop
|
|
34
|
+
pattern = os.path.join(
|
|
35
|
+
start, "*", "GitHub", *["*"] * (i + 1), "calkit.yaml"
|
|
36
|
+
)
|
|
37
|
+
res += glob.glob(pattern)
|
|
38
|
+
final_res = []
|
|
39
|
+
for ck_fpath in res:
|
|
40
|
+
path = os.path.dirname(ck_fpath)
|
|
41
|
+
# Make sure this path is a Git repo
|
|
42
|
+
try:
|
|
43
|
+
Repo(path)
|
|
44
|
+
except InvalidGitRepositoryError:
|
|
45
|
+
continue
|
|
46
|
+
final_res.append(path)
|
|
47
|
+
return final_res
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def load_calkit_info(
|
|
51
|
+
wdir=None, process_includes: bool | str | list[str] = False
|
|
52
|
+
) -> dict:
|
|
53
|
+
"""Load Calkit project information.
|
|
54
|
+
|
|
55
|
+
Parameters
|
|
56
|
+
----------
|
|
57
|
+
wdir : str
|
|
58
|
+
Working directory. Defaults to current working directory.
|
|
59
|
+
process_includes: bool, string or list of strings
|
|
60
|
+
Whether or not to process any '_include' keys for a given kind of
|
|
61
|
+
object. If a string is passed, only process includes for that kind.
|
|
62
|
+
Similarly, if a list of strings is passed, only process those kinds.
|
|
63
|
+
If True, process all default kinds.
|
|
64
|
+
"""
|
|
65
|
+
info = {}
|
|
66
|
+
fpath = "calkit.yaml"
|
|
67
|
+
if wdir is not None:
|
|
68
|
+
fpath = os.path.join(wdir, fpath)
|
|
69
|
+
if os.path.isfile(fpath):
|
|
70
|
+
with open(fpath) as f:
|
|
71
|
+
info = ryaml.load(f)
|
|
72
|
+
# Check for any includes, i.e., entities with an _include key, for which
|
|
73
|
+
# we should merge in another file
|
|
74
|
+
default_includes_enabled = ["environments", "procedures"]
|
|
75
|
+
if process_includes:
|
|
76
|
+
if isinstance(process_includes, bool):
|
|
77
|
+
includes_enabled = default_includes_enabled
|
|
78
|
+
elif isinstance(process_includes, str):
|
|
79
|
+
includes_enabled = [process_includes]
|
|
80
|
+
elif isinstance(process_includes, list):
|
|
81
|
+
includes_enabled = process_includes
|
|
82
|
+
for kind in includes_enabled:
|
|
83
|
+
if kind in info:
|
|
84
|
+
for obj_name, obj in info[kind].items():
|
|
85
|
+
if "_include" in obj:
|
|
86
|
+
include_fpath = obj.pop("_include")
|
|
87
|
+
with open(include_fpath) as f:
|
|
88
|
+
include_data = ryaml.load(f)
|
|
89
|
+
info[kind][obj_name] |= include_data
|
|
90
|
+
return info
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def utcnow(remove_tz=True) -> datetime:
|
|
94
|
+
"""Return now in UTC, optionally stripping timezone information."""
|
|
95
|
+
dt = datetime.now(UTC)
|
|
96
|
+
if remove_tz:
|
|
97
|
+
dt = dt.replace(tzinfo=None)
|
|
98
|
+
return dt
|