calkit-python 0.3.3__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {calkit_python-0.3.3 → calkit_python-0.5.0}/PKG-INFO +13 -8
  2. {calkit_python-0.3.3 → calkit_python-0.5.0}/README.md +12 -7
  3. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/__init__.py +2 -1
  4. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/core.py +1 -1
  5. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/list.py +14 -0
  6. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/main.py +175 -12
  7. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/new.py +187 -0
  8. calkit_python-0.5.0/calkit/core.py +98 -0
  9. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/models.py +57 -0
  10. calkit_python-0.5.0/calkit/templates/__init__.py +1 -0
  11. calkit_python-0.5.0/calkit/templates/core.py +118 -0
  12. calkit_python-0.5.0/calkit/templates/latex/__init__.py +1 -0
  13. calkit_python-0.5.0/calkit/templates/latex/article/paper.tex +43 -0
  14. calkit_python-0.5.0/calkit/templates/latex/core.py +11 -0
  15. calkit_python-0.5.0/calkit/templates/latex/jfm/jfm.bst +1659 -0
  16. calkit_python-0.5.0/calkit/templates/latex/jfm/jfm.cls +1518 -0
  17. calkit_python-0.5.0/calkit/templates/latex/jfm/lineno-FLM.sty +113 -0
  18. calkit_python-0.5.0/calkit/templates/latex/jfm/paper.tex +468 -0
  19. calkit_python-0.5.0/calkit/templates/latex/jfm/upmath.sty +158 -0
  20. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/cli/test_list.py +4 -0
  21. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/cli/test_new.py +39 -0
  22. calkit_python-0.5.0/calkit/tests/test_templates.py +14 -0
  23. calkit_python-0.5.0/docs/tutorials/adding-latex-pub-docker.md +47 -0
  24. calkit_python-0.5.0/docs/tutorials/img/run-proc.png +0 -0
  25. calkit_python-0.5.0/docs/tutorials/procedures.md +120 -0
  26. calkit_python-0.3.3/calkit/core.py +0 -56
  27. {calkit_python-0.3.3 → calkit_python-0.5.0}/.github/FUNDING.yml +0 -0
  28. {calkit_python-0.3.3 → calkit_python-0.5.0}/.github/workflows/publish-test.yml +0 -0
  29. {calkit_python-0.3.3 → calkit_python-0.5.0}/.github/workflows/publish.yml +0 -0
  30. {calkit_python-0.3.3 → calkit_python-0.5.0}/.gitignore +0 -0
  31. {calkit_python-0.3.3 → calkit_python-0.5.0}/LICENSE +0 -0
  32. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/__init__.py +0 -0
  33. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/config.py +0 -0
  34. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/import_.py +0 -0
  35. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/notebooks.py +0 -0
  36. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cli/office.py +0 -0
  37. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/cloud.py +0 -0
  38. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/config.py +0 -0
  39. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/data.py +0 -0
  40. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/docker.py +0 -0
  41. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/dvc.py +0 -0
  42. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/git.py +0 -0
  43. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/gui.py +0 -0
  44. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/jupyter.py +0 -0
  45. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/office.py +0 -0
  46. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/server.py +0 -0
  47. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/__init__.py +0 -0
  48. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/cli/__init__.py +0 -0
  49. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/cli/test_main.py +0 -0
  50. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/test_core.py +0 -0
  51. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/test_dvc.py +0 -0
  52. {calkit_python-0.3.3 → calkit_python-0.5.0}/calkit/tests/test_jupyter.py +0 -0
  53. {calkit_python-0.3.3 → calkit_python-0.5.0}/pyproject.toml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: calkit-python
3
- Version: 0.3.3
3
+ Version: 0.5.0
4
4
  Summary: Reproducibility simplified.
5
5
  Project-URL: Homepage, https://github.com/calkit/calkit
6
6
  Project-URL: Issues, https://github.com/calkit/calkit/issues
@@ -31,21 +31,25 @@ Description-Content-Type: text/markdown
31
31
 
32
32
  # Calkit
33
33
 
34
- [Calkit](https://calkit.io) simplifies reproducibility,
34
+ [Calkit](https://calkit.io) helps simplify reproducibility,
35
35
  acting as a layer on top of
36
36
  [Git](https://git-scm.com/), [DVC](https://dvc.org/),
37
- [Docker](https://docker.com), and more,
38
- such that all all aspects of the research process can be fully described in a
39
- single repository.
37
+ [Docker](https://docker.com),
38
+ and adds a domain-specific data model
39
+ such that all aspects of the research process can be fully described in a
40
+ single repository and therefore easily consumed by others.
40
41
 
41
42
  ## Tutorials
42
43
 
44
+ - [Defining and executing manual procedures](docs/tutorials/procedures.md)
45
+ - [Adding a new LaTeX-based publication with its own Docker build environment](docs/tutorials/adding-latex-pub-docker.md)
46
+ - [A reproducibly workflow using Microsoft Office (Word and Excel)](https://petebachant.me/office-repro/)
43
47
  - [Reproducible OpenFOAM simulations](https://petebachant.me/reproducible-openfoam/)
44
48
 
45
49
  ## Why does reproducibility matter?
46
50
 
47
51
  If your work is reproducible, that means that someone else can "run" it and
48
- get the same results or outputs.
52
+ calculate the same results or outputs.
49
53
  This is a major step towards addressing
50
54
  [the replication crisis](https://en.wikipedia.org/wiki/Replication_crisis)
51
55
  and has some major benefits for both you as an individual and the research
@@ -63,7 +67,8 @@ community:
63
67
  ## Why another tool/platform?
64
68
 
65
69
  Git, GitHub, DVC, Docker et al. are amazing tools/platforms, but their
66
- use involves multiple fairly difficult learning curves.
70
+ use involves multiple fairly difficult learning curves,
71
+ and tying them together might mean developing something new for each project.
67
72
  Our goal is to provide a single tool and platform to unify all of these so
68
73
  that there is a single, gentle learning curve.
69
74
  However, it is not our goal to hide or replace these underlying components.
@@ -152,7 +157,7 @@ other researchers can find and reuse your work to accelerate their own.
152
157
  own, like a figure, or for an intermediate result that is expensive to
153
158
  generate.
154
159
  1. There should be the smallest number of
155
- frequently used commands as possible, and they should require at little
160
+ frequently used commands as possible, and they should require as little
156
161
  memorization as possible to know how to execute, e.g., a user should be
157
162
  able to keep running `calkit run` and that's all they really need to do
158
163
  to make sure the project is up-to-date.
@@ -1,20 +1,24 @@
1
1
  # Calkit
2
2
 
3
- [Calkit](https://calkit.io) simplifies reproducibility,
3
+ [Calkit](https://calkit.io) helps simplify reproducibility,
4
4
  acting as a layer on top of
5
5
  [Git](https://git-scm.com/), [DVC](https://dvc.org/),
6
- [Docker](https://docker.com), and more,
7
- such that all all aspects of the research process can be fully described in a
8
- single repository.
6
+ [Docker](https://docker.com),
7
+ and adds a domain-specific data model
8
+ such that all aspects of the research process can be fully described in a
9
+ single repository and therefore easily consumed by others.
9
10
 
10
11
  ## Tutorials
11
12
 
13
+ - [Defining and executing manual procedures](docs/tutorials/procedures.md)
14
+ - [Adding a new LaTeX-based publication with its own Docker build environment](docs/tutorials/adding-latex-pub-docker.md)
15
+ - [A reproducibly workflow using Microsoft Office (Word and Excel)](https://petebachant.me/office-repro/)
12
16
  - [Reproducible OpenFOAM simulations](https://petebachant.me/reproducible-openfoam/)
13
17
 
14
18
  ## Why does reproducibility matter?
15
19
 
16
20
  If your work is reproducible, that means that someone else can "run" it and
17
- get the same results or outputs.
21
+ calculate the same results or outputs.
18
22
  This is a major step towards addressing
19
23
  [the replication crisis](https://en.wikipedia.org/wiki/Replication_crisis)
20
24
  and has some major benefits for both you as an individual and the research
@@ -32,7 +36,8 @@ community:
32
36
  ## Why another tool/platform?
33
37
 
34
38
  Git, GitHub, DVC, Docker et al. are amazing tools/platforms, but their
35
- use involves multiple fairly difficult learning curves.
39
+ use involves multiple fairly difficult learning curves,
40
+ and tying them together might mean developing something new for each project.
36
41
  Our goal is to provide a single tool and platform to unify all of these so
37
42
  that there is a single, gentle learning curve.
38
43
  However, it is not our goal to hide or replace these underlying components.
@@ -121,7 +126,7 @@ other researchers can find and reuse your work to accelerate their own.
121
126
  own, like a figure, or for an intermediate result that is expensive to
122
127
  generate.
123
128
  1. There should be the smallest number of
124
- frequently used commands as possible, and they should require at little
129
+ frequently used commands as possible, and they should require as little
125
130
  memorization as possible to know how to execute, e.g., a user should be
126
131
  able to keep running `calkit run` and that's all they really need to do
127
132
  to make sure the project is up-to-date.
@@ -1,4 +1,4 @@
1
- __version__ = "0.3.3"
1
+ __version__ = "0.5.0"
2
2
 
3
3
  from .core import *
4
4
  from . import git
@@ -8,3 +8,4 @@ from . import jupyter
8
8
  from . import config
9
9
  from . import models
10
10
  from . import office
11
+ from . import templates
@@ -24,5 +24,5 @@ def run_cmd(cmd: list[str]):
24
24
 
25
25
 
26
26
  def raise_error(txt):
27
- typer.echo(txt, err=txt)
27
+ typer.echo(typer.style(txt, fg="red"), err=txt)
28
28
  raise typer.Exit(1)
@@ -77,3 +77,17 @@ def list_environments():
77
77
  typer.echo(name + ":")
78
78
  for k, v in env.items():
79
79
  typer.echo(f" {k}: {v}")
80
+
81
+
82
+ @list_app.command(name="templates")
83
+ def list_templates():
84
+ for kind, tpl_dict in calkit.templates.TEMPLATES.items():
85
+ for name in tpl_dict:
86
+ typer.echo(f"{kind}/{name}")
87
+
88
+
89
+ @list_app.command(name="procedures")
90
+ def list_procedures():
91
+ ck_info = calkit.load_calkit_info()
92
+ for p in ck_info.get("procedures", {}):
93
+ typer.echo(p)
@@ -2,11 +2,14 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import csv
5
6
  import hashlib
6
7
  import json
7
8
  import os
8
9
  import subprocess
9
10
  import sys
11
+ import time
12
+ from datetime import UTC, datetime, timedelta
10
13
 
11
14
  import git
12
15
  import typer
@@ -20,6 +23,7 @@ from calkit.cli.list import list_app
20
23
  from calkit.cli.new import new_app
21
24
  from calkit.cli.notebooks import notebooks_app
22
25
  from calkit.cli.office import office_app
26
+ from calkit.models import Procedure
23
27
 
24
28
  app = typer.Typer(
25
29
  invoke_without_command=True,
@@ -481,11 +485,21 @@ def run_in_env(
481
485
  ),
482
486
  ),
483
487
  ] = None,
488
+ wdir: Annotated[
489
+ str,
490
+ typer.Option(
491
+ "--wdir",
492
+ help=(
493
+ "Working directory. "
494
+ "By default will run current working directory."
495
+ ),
496
+ ),
497
+ ] = None,
484
498
  verbose: Annotated[
485
499
  bool, typer.Option("--verbose", "-v", help="Print verbose output.")
486
500
  ] = False,
487
501
  ):
488
- ck_info = calkit.load_calkit_info()
502
+ ck_info = calkit.load_calkit_info(process_includes="environments")
489
503
  envs = ck_info.get("environments", {})
490
504
  if not envs:
491
505
  raise_error("No environments defined in calkit.yaml")
@@ -507,33 +521,42 @@ def run_in_env(
507
521
  if env_name is None:
508
522
  raise_error("Environment must be specified if there are multiple")
509
523
  env = envs[env_name]
510
- cwd = os.getcwd()
524
+ if wdir is not None:
525
+ cwd = os.path.abspath(wdir)
526
+ else:
527
+ cwd = os.getcwd()
511
528
  image_name = env.get("image", env_name)
512
- wdir = env.get("wdir", "/work")
529
+ docker_wdir = env.get("wdir", "/work")
530
+ shell = env.get("shell", "sh")
531
+ platform = env.get("platform")
513
532
  if env["kind"] == "docker":
514
- cmd = " ".join(cmd)
515
- cmd = [
533
+ shell_cmd = " ".join(cmd)
534
+ docker_cmd = [
516
535
  "docker",
517
536
  "run",
537
+ ]
538
+ if platform:
539
+ docker_cmd += ["--platform", platform]
540
+ docker_cmd += [
518
541
  "-it" if sys.stdin.isatty() else "-i",
519
542
  "--rm",
520
543
  "-w",
521
- wdir,
544
+ docker_wdir,
522
545
  "-v",
523
- f"{cwd}:{wdir}",
546
+ f"{cwd}:{docker_wdir}",
524
547
  image_name,
525
- "bash",
548
+ shell,
526
549
  "-c",
527
- f"{cmd}",
550
+ f"{shell_cmd}",
528
551
  ]
529
552
  if verbose:
530
- typer.echo(f"Running command: {cmd}")
531
- subprocess.call(cmd)
553
+ typer.echo(f"Running command: {docker_cmd}")
554
+ subprocess.call(docker_cmd, cwd=wdir)
532
555
  elif env["kind"] == "conda":
533
556
  cmd = ["conda", "run", "-n", env_name] + cmd
534
557
  if verbose:
535
558
  typer.echo(f"Running command: {cmd}")
536
- subprocess.call(cmd)
559
+ subprocess.call(cmd, cwd=wdir)
537
560
  else:
538
561
  raise_error("Environment kind not supported")
539
562
 
@@ -621,3 +644,143 @@ def build_docker(
621
644
  inspect[0]["DockerfileMD5"] = dockerfile_md5
622
645
  with open(lock_fpath, "w") as f:
623
646
  json.dump(inspect, f, indent=4)
647
+
648
+
649
+ @app.command(name="runproc", help="Run or execute a procedure.")
650
+ def run_procedure(
651
+ name: Annotated[str, typer.Argument(help="The name of the procedure.")],
652
+ no_commit: Annotated[
653
+ bool,
654
+ typer.Option("--no-commit", help="Do not commit after each action."),
655
+ ] = False,
656
+ ):
657
+ def wait(seconds):
658
+ typer.echo(f"Wait {seconds} seconds")
659
+ dt = 0.1
660
+ while seconds >= 0:
661
+ mins, secs = divmod(seconds, 60)
662
+ mins, secs = int(mins), int(secs)
663
+ out = f"Time left: {mins:02d}:{secs:02d}\r"
664
+ typer.echo(out, nl=False)
665
+ time.sleep(dt)
666
+ seconds -= dt
667
+ typer.echo()
668
+
669
+ def convert_value(value, dtype):
670
+ if dtype == "int":
671
+ return int(value)
672
+ elif dtype == "float":
673
+ return float(value)
674
+ elif dtype == "str":
675
+ return str(value)
676
+ elif dtype == "bool":
677
+ return bool(value)
678
+ return value
679
+
680
+ ck_info = calkit.load_calkit_info(process_includes="procedures")
681
+ procs = ck_info.get("procedures", {})
682
+ if name not in procs:
683
+ raise_error(f"'{name}' is not defined as a procedure")
684
+ try:
685
+ proc = Procedure.model_validate(procs[name])
686
+ except Exception as e:
687
+ raise_error(f"Procedure '{name}' is invalid: {e}")
688
+ git_repo = git.Repo()
689
+ # Check to make sure the working tree is clean, so we know we ran the
690
+ # committed version of the procedure
691
+ git_status = git_repo.git.status()
692
+ if not "working tree clean" in git_status:
693
+ raise_error(
694
+ f"Cannot execute procedures unless repo is clean:\n\n{git_status}"
695
+ )
696
+ t_start_overall = calkit.utcnow()
697
+ # Formulate headers for CSV file, which must contain all inputs from all
698
+ # steps
699
+ headers = [
700
+ "calkit_version",
701
+ "procedure_name",
702
+ "step",
703
+ "start",
704
+ "end",
705
+ ]
706
+ for step in proc.steps:
707
+ if step.inputs:
708
+ for iname in step.inputs:
709
+ if iname not in headers:
710
+ headers.append(iname)
711
+ # TODO: Add ability to process periodic logic
712
+ # See if now falls between start and end, and if there is a run with a
713
+ # timestamp corresponding to the period in which now falls
714
+ # If so, exit
715
+ # If not, continue
716
+ # Create empty CSV if one doesn't exist
717
+ t_start_overall_str = t_start_overall.isoformat(timespec="seconds")
718
+ fpath = f".calkit/procedure-runs/{name}/{t_start_overall_str}.csv"
719
+ dirname = os.path.dirname(fpath)
720
+ if not os.path.isdir(dirname):
721
+ os.makedirs(dirname)
722
+ if not os.path.isfile(fpath):
723
+ with open(fpath, "w") as f:
724
+ csv.writer(f).writerow(headers)
725
+ for n, step in enumerate(proc.steps):
726
+ typer.echo(f"Starting step {n}")
727
+ t_start = calkit.utcnow()
728
+ if step.wait_before_s:
729
+ wait(step.wait_before_s)
730
+ # Execute the step
731
+ inputs = step.inputs
732
+ input_vals = {}
733
+ if not inputs:
734
+ input(f"{step.summary} and press enter when complete: ")
735
+ else:
736
+ typer.echo(step.summary)
737
+ for input_name, i in inputs.items():
738
+ msg = f"Enter {input_name}"
739
+ if i.units:
740
+ msg += f" ({i.units})"
741
+ msg += " and press enter: "
742
+ success = False
743
+ while not success:
744
+ val = input(msg)
745
+ if i.dtype:
746
+ try:
747
+ val = convert_value(val, i.dtype)
748
+ success = True
749
+ except ValueError:
750
+ typer.echo(
751
+ typer.style(
752
+ f"Invalid {i.dtype} value", fg="red"
753
+ )
754
+ )
755
+ else:
756
+ success = True
757
+ input_vals[input_name] = val
758
+ t_end = calkit.utcnow()
759
+ # Log step completion
760
+ row = (
761
+ dict(
762
+ procedure_name=name,
763
+ step=n,
764
+ calkit_version=calkit.__version__,
765
+ start=t_start.isoformat(),
766
+ end=t_end.isoformat(),
767
+ )
768
+ | input_vals
769
+ )
770
+ row = {k: row.get(k, "") for k in headers}
771
+ # Log this row to CSV
772
+ with open(fpath, "a") as f:
773
+ csv.writer(f).writerow(row.values())
774
+ typer.echo(f"Logged step {n} to {fpath}")
775
+ if not no_commit:
776
+ typer.echo("Committing to Git repo")
777
+ git_repo.git.reset()
778
+ git_repo.git.add(fpath)
779
+ git_repo.git.commit(
780
+ [
781
+ "-m",
782
+ f"Execute procedure {name} step {n}",
783
+ ]
784
+ )
785
+ if step.wait_after_s:
786
+ wait(step.wait_after_s)
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import os
6
+ import shutil
6
7
  import subprocess
7
8
 
8
9
  import git
@@ -472,3 +473,189 @@ def new_dataset(
472
473
  repo.git.add("dvc.yaml")
473
474
  if repo.git.diff("--staged"):
474
475
  repo.git.commit(["-m", f"Add dataset {path}"])
476
+
477
+
478
+ @new_app.command(name="publication", help="Create a new publication.")
479
+ def new_publication(
480
+ path: Annotated[
481
+ str,
482
+ typer.Argument(
483
+ help=(
484
+ "Path for the publication. "
485
+ "If using a template, this could be a directory."
486
+ )
487
+ ),
488
+ ],
489
+ title: Annotated[
490
+ str, typer.Option("--title", help="The title of the publication.")
491
+ ],
492
+ description: Annotated[
493
+ str,
494
+ typer.Option(
495
+ "--description", help="A description of the publication."
496
+ ),
497
+ ],
498
+ kind: Annotated[
499
+ str,
500
+ typer.Option(
501
+ "--kind", help="Kind of the publication, e.g., 'journal-article'."
502
+ ),
503
+ ],
504
+ stage_name: Annotated[
505
+ str,
506
+ typer.Option(
507
+ "--stage",
508
+ help="Name of the pipeline stage to build the output file.",
509
+ ),
510
+ ] = None,
511
+ deps: Annotated[
512
+ list[str], typer.Option("--dep", help="Path to stage dependency.")
513
+ ] = [],
514
+ outs_from_stage: Annotated[
515
+ str,
516
+ typer.Option(
517
+ "--deps-from-stage-outs",
518
+ help="Stage name from which to add outputs as dependencies.",
519
+ ),
520
+ ] = None,
521
+ template: Annotated[
522
+ str,
523
+ typer.Option(
524
+ "--template",
525
+ help=(
526
+ "Template with which to create the source files. "
527
+ "Should be in the format {type}/{name}."
528
+ ),
529
+ ),
530
+ ] = None,
531
+ env_name: Annotated[
532
+ str,
533
+ typer.Option(
534
+ "--environment",
535
+ help="Name of the build environment to create, if desired.",
536
+ ),
537
+ ] = None,
538
+ no_commit: Annotated[
539
+ bool,
540
+ typer.Option(
541
+ "--no-commit", help="Do not commit resulting changes to the repo."
542
+ ),
543
+ ] = False,
544
+ overwrite: Annotated[
545
+ bool,
546
+ typer.Option(
547
+ "--overwrite",
548
+ "-f",
549
+ help="Overwrite existing objects if they already exist.",
550
+ ),
551
+ ] = False,
552
+ ):
553
+ ck_info = calkit.load_calkit_info(process_includes=False)
554
+ pubs = ck_info.get("publications", [])
555
+ envs = ck_info.get("environments", {})
556
+ pub_paths = [p.get("path") for p in pubs]
557
+ if template is not None:
558
+ template_type, _ = template.split("/")
559
+ else:
560
+ template_type = None
561
+ # Check all of our inputs
562
+ if template_type not in ["latex"]:
563
+ raise_error(f"Unknown template type '{template_type}'")
564
+ if env_name is not None and template_type != "latex":
565
+ raise_error("Environments can only be created for latex templates")
566
+ if env_name is not None and env_name in envs and not overwrite:
567
+ raise_error(f"Environment '{env_name}' already exists")
568
+ if template_type is not None:
569
+ try:
570
+ template_obj = calkit.templates.get_template(template)
571
+ except ValueError:
572
+ raise_error(f"Template '{template}' does not exist")
573
+ # Parse outs from stage if specified
574
+ if outs_from_stage:
575
+ pipeline = calkit.dvc.read_pipeline()
576
+ stages = pipeline.get("stages", {})
577
+ if outs_from_stage not in stages:
578
+ raise_error(f"Stage {outs_from_stage} does not exist")
579
+ stage = stages[outs_from_stage]
580
+ if "foreach" in stage:
581
+ for val in stage["foreach"]:
582
+ for out in stage.get("do", {}).get("outs", []):
583
+ deps.append(out.replace("${item}", val))
584
+ else:
585
+ deps += stage.get("outs", [])
586
+ # Create publication object
587
+ if template_type == "latex":
588
+ pub_fpath = os.path.join(
589
+ path, template_obj.target.removesuffix(".tex") + ".pdf"
590
+ )
591
+ else:
592
+ pub_fpath = path
593
+ if not overwrite and pub_fpath in pub_paths:
594
+ raise_error(f"Publication with path {pub_fpath} already exists")
595
+ elif overwrite and pub_fpath in pub_paths:
596
+ pubs = [p for p in pubs if p.get("path") != pub_fpath]
597
+ pub = dict(
598
+ path=pub_fpath,
599
+ kind=kind,
600
+ title=title,
601
+ description=description,
602
+ stage=stage_name,
603
+ )
604
+ pubs.append(pub)
605
+ ck_info["publications"] = pubs
606
+ repo = git.Repo()
607
+ # Create environment if applicable
608
+ if env_name is not None and template_type == "latex":
609
+ env_path = f".calkit/environments/{env_name}.yaml"
610
+ os.makedirs(".calkit/environments", exist_ok=True)
611
+ env = {"_include": env_path}
612
+ envs[env_name] = env
613
+ env_remote = dict(
614
+ kind="docker",
615
+ image="kjarosh/latex:2024.4",
616
+ description="TeXlive full from kjarosh.",
617
+ platform="linux/amd64",
618
+ )
619
+ with open(env_path, "w") as f:
620
+ calkit.ryaml.dump(env_remote, f)
621
+ ck_info["environments"] = envs
622
+ repo.git.add(env_path)
623
+ with open("calkit.yaml", "w") as f:
624
+ calkit.ryaml.dump(ck_info, f)
625
+ repo.git.add("calkit.yaml")
626
+ # Copy in template files if applicable
627
+ if template_type == "latex":
628
+ if overwrite and os.path.exists(path):
629
+ shutil.rmtree(path)
630
+ calkit.templates.use_template(
631
+ name=template, dest_dir=path, title=title
632
+ )
633
+ repo.git.add(path)
634
+ # Create stage if applicable
635
+ if stage_name is not None and template_type == "latex":
636
+ cmd = f"cd {path} && latexmk -pdf {template_obj.target}"
637
+ if env_name is not None:
638
+ cmd = f'calkit runenv -n {env_name} "{cmd}"'
639
+ target_dep = os.path.join(path, template_obj.target)
640
+ dvc_cmd = [
641
+ "dvc",
642
+ "stage",
643
+ "add",
644
+ "-n",
645
+ stage_name,
646
+ "-o",
647
+ pub_fpath,
648
+ "-d",
649
+ target_dep,
650
+ ]
651
+ if env_name is not None:
652
+ dvc_cmd += ["-d", env_path]
653
+ for dep in deps:
654
+ dvc_cmd += ["-d", dep]
655
+ if overwrite:
656
+ dvc_cmd.append("-f")
657
+ dvc_cmd.append(cmd)
658
+ subprocess.check_call(dvc_cmd)
659
+ repo.git.add("dvc.yaml")
660
+ if not no_commit and repo.git.diff("--staged"):
661
+ repo.git.commit(["-m", f"Add new publication {pub_fpath}"])
@@ -0,0 +1,98 @@
1
+ """Core functionality."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import glob
6
+ import logging
7
+ import os
8
+ from datetime import UTC, datetime
9
+
10
+ import ruamel.yaml
11
+ from git import Repo
12
+ from git.exc import InvalidGitRepositoryError
13
+
14
+ logging.basicConfig(level=logging.INFO)
15
+ logger = logging.getLogger(__package__)
16
+
17
+ ryaml = ruamel.yaml.YAML()
18
+ ryaml.indent(mapping=2, sequence=4, offset=2)
19
+ ryaml.preserve_quotes = True
20
+ ryaml.width = 70
21
+
22
+
23
+ def find_project_dirs(relative=False, max_depth=3) -> list[str]:
24
+ """Find all Calkit project directories."""
25
+ if relative:
26
+ start = ""
27
+ else:
28
+ start = os.path.expanduser("~")
29
+ res = []
30
+ for i in range(max_depth):
31
+ pattern = os.path.join(start, *["*"] * (i + 1), "calkit.yaml")
32
+ res += glob.glob(pattern)
33
+ # Check GitHub documents for users who use GitHub Desktop
34
+ pattern = os.path.join(
35
+ start, "*", "GitHub", *["*"] * (i + 1), "calkit.yaml"
36
+ )
37
+ res += glob.glob(pattern)
38
+ final_res = []
39
+ for ck_fpath in res:
40
+ path = os.path.dirname(ck_fpath)
41
+ # Make sure this path is a Git repo
42
+ try:
43
+ Repo(path)
44
+ except InvalidGitRepositoryError:
45
+ continue
46
+ final_res.append(path)
47
+ return final_res
48
+
49
+
50
+ def load_calkit_info(
51
+ wdir=None, process_includes: bool | str | list[str] = False
52
+ ) -> dict:
53
+ """Load Calkit project information.
54
+
55
+ Parameters
56
+ ----------
57
+ wdir : str
58
+ Working directory. Defaults to current working directory.
59
+ process_includes: bool, string or list of strings
60
+ Whether or not to process any '_include' keys for a given kind of
61
+ object. If a string is passed, only process includes for that kind.
62
+ Similarly, if a list of strings is passed, only process those kinds.
63
+ If True, process all default kinds.
64
+ """
65
+ info = {}
66
+ fpath = "calkit.yaml"
67
+ if wdir is not None:
68
+ fpath = os.path.join(wdir, fpath)
69
+ if os.path.isfile(fpath):
70
+ with open(fpath) as f:
71
+ info = ryaml.load(f)
72
+ # Check for any includes, i.e., entities with an _include key, for which
73
+ # we should merge in another file
74
+ default_includes_enabled = ["environments", "procedures"]
75
+ if process_includes:
76
+ if isinstance(process_includes, bool):
77
+ includes_enabled = default_includes_enabled
78
+ elif isinstance(process_includes, str):
79
+ includes_enabled = [process_includes]
80
+ elif isinstance(process_includes, list):
81
+ includes_enabled = process_includes
82
+ for kind in includes_enabled:
83
+ if kind in info:
84
+ for obj_name, obj in info[kind].items():
85
+ if "_include" in obj:
86
+ include_fpath = obj.pop("_include")
87
+ with open(include_fpath) as f:
88
+ include_data = ryaml.load(f)
89
+ info[kind][obj_name] |= include_data
90
+ return info
91
+
92
+
93
+ def utcnow(remove_tz=True) -> datetime:
94
+ """Return now in UTC, optionally stripping timezone information."""
95
+ dt = datetime.now(UTC)
96
+ if remove_tz:
97
+ dt = dt.replace(tzinfo=None)
98
+ return dt