calkit-python 0.2.2__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {calkit_python-0.2.2 → calkit_python-0.3.1}/PKG-INFO +11 -3
  2. {calkit_python-0.2.2 → calkit_python-0.3.1}/README.md +6 -2
  3. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/__init__.py +2 -1
  4. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/cli/__init__.py +1 -0
  5. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/cli/main.py +12 -2
  6. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/cli/new.py +108 -0
  7. calkit_python-0.3.1/calkit/cli/office.py +62 -0
  8. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/config.py +2 -2
  9. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/core.py +6 -3
  10. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/git.py +4 -1
  11. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/models.py +5 -0
  12. calkit_python-0.3.1/calkit/office.py +42 -0
  13. calkit_python-0.3.1/calkit/server.py +631 -0
  14. {calkit_python-0.2.2 → calkit_python-0.3.1}/pyproject.toml +4 -0
  15. calkit_python-0.2.2/calkit/server.py +0 -203
  16. {calkit_python-0.2.2 → calkit_python-0.3.1}/.github/FUNDING.yml +0 -0
  17. {calkit_python-0.2.2 → calkit_python-0.3.1}/.github/workflows/publish-test.yml +0 -0
  18. {calkit_python-0.2.2 → calkit_python-0.3.1}/.github/workflows/publish.yml +0 -0
  19. {calkit_python-0.2.2 → calkit_python-0.3.1}/.gitignore +0 -0
  20. {calkit_python-0.2.2 → calkit_python-0.3.1}/LICENSE +0 -0
  21. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/cli/config.py +0 -0
  22. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/cli/core.py +0 -0
  23. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/cli/import_.py +0 -0
  24. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/cli/list.py +0 -0
  25. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/cli/notebooks.py +0 -0
  26. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/cloud.py +0 -0
  27. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/data.py +0 -0
  28. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/docker.py +0 -0
  29. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/dvc.py +0 -0
  30. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/gui.py +0 -0
  31. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/jupyter.py +0 -0
  32. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/tests/__init__.py +0 -0
  33. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/tests/cli/__init__.py +0 -0
  34. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/tests/cli/test_list.py +0 -0
  35. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/tests/cli/test_main.py +0 -0
  36. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/tests/cli/test_new.py +0 -0
  37. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/tests/test_core.py +0 -0
  38. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/tests/test_dvc.py +0 -0
  39. {calkit_python-0.2.2 → calkit_python-0.3.1}/calkit/tests/test_jupyter.py +0 -0
  40. {calkit_python-0.2.2 → calkit_python-0.3.1}/examples/cfd-study/README.md +0 -0
  41. {calkit_python-0.2.2 → calkit_python-0.3.1}/examples/cfd-study/calkit.yaml +0 -0
  42. {calkit_python-0.2.2 → calkit_python-0.3.1}/examples/cfd-study/config/simulations/runs.csv +0 -0
  43. {calkit_python-0.2.2 → calkit_python-0.3.1}/examples/cfd-study/notebook.ipynb +0 -0
  44. {calkit_python-0.2.2 → calkit_python-0.3.1}/examples/ms-office/.gitignore +0 -0
  45. {calkit_python-0.2.2 → calkit_python-0.3.1}/examples/ms-office/README.md +0 -0
  46. {calkit_python-0.2.2 → calkit_python-0.3.1}/examples/ms-office/calkit.yaml +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: calkit-python
3
- Version: 0.2.2
3
+ Version: 0.3.1
4
4
  Summary: Reproducibility simplified.
5
5
  Project-URL: Homepage, https://github.com/calkit/calkit
6
6
  Project-URL: Issues, https://github.com/calkit/calkit/issues
@@ -10,17 +10,21 @@ Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Requires-Python: >=3.8
13
+ Requires-Dist: docx2pdf
13
14
  Requires-Dist: dvc
14
15
  Requires-Dist: eval-type-backport; python_version < '3.10'
15
16
  Requires-Dist: fastapi
16
17
  Requires-Dist: gitpython
17
18
  Requires-Dist: keyring
18
19
  Requires-Dist: nbconvert
20
+ Requires-Dist: pillow
19
21
  Requires-Dist: pydantic-settings
20
22
  Requires-Dist: pydantic[email]
21
23
  Requires-Dist: pyjwt
24
+ Requires-Dist: pywin32; platform_system == 'Windows'
22
25
  Requires-Dist: requests
23
26
  Requires-Dist: typer
27
+ Requires-Dist: uvicorn
24
28
  Provides-Extra: data
25
29
  Requires-Dist: pandas; extra == 'data'
26
30
  Requires-Dist: polars; extra == 'data'
@@ -31,10 +35,14 @@ Description-Content-Type: text/markdown
31
35
  [Calkit](https://calkit.io) simplifies reproducibility,
32
36
  acting as a layer on top of
33
37
  [Git](https://git-scm.com/), [DVC](https://dvc.org/),
34
- [Zenodo](https://zenodo.org), and more,
38
+ [Docker](https://docker.com), and more,
35
39
  such that all all aspects of the research process can be fully described in a
36
40
  single repository.
37
41
 
42
+ ## Tutorials
43
+
44
+ - [Reproducible OpenFOAM simulations](https://petebachant.me/reproducible-openfoam/)
45
+
38
46
  ## Why does reproducibility matter?
39
47
 
40
48
  If your work is reproducible, that means that someone else can "run" it and
@@ -55,7 +63,7 @@ community:
55
63
 
56
64
  ## Why another tool/platform?
57
65
 
58
- Git, GitHub, DVC, Zenodo et al. are amazing tools/platforms, but their
66
+ Git, GitHub, DVC, Docker et al. are amazing tools/platforms, but their
59
67
  use involves multiple fairly difficult learning curves.
60
68
  Our goal is to provide a single tool and platform to unify all of these so
61
69
  that there is a single, gentle learning curve.
@@ -3,10 +3,14 @@
3
3
  [Calkit](https://calkit.io) simplifies reproducibility,
4
4
  acting as a layer on top of
5
5
  [Git](https://git-scm.com/), [DVC](https://dvc.org/),
6
- [Zenodo](https://zenodo.org), and more,
6
+ [Docker](https://docker.com), and more,
7
7
  such that all all aspects of the research process can be fully described in a
8
8
  single repository.
9
9
 
10
+ ## Tutorials
11
+
12
+ - [Reproducible OpenFOAM simulations](https://petebachant.me/reproducible-openfoam/)
13
+
10
14
  ## Why does reproducibility matter?
11
15
 
12
16
  If your work is reproducible, that means that someone else can "run" it and
@@ -27,7 +31,7 @@ community:
27
31
 
28
32
  ## Why another tool/platform?
29
33
 
30
- Git, GitHub, DVC, Zenodo et al. are amazing tools/platforms, but their
34
+ Git, GitHub, DVC, Docker et al. are amazing tools/platforms, but their
31
35
  use involves multiple fairly difficult learning curves.
32
36
  Our goal is to provide a single tool and platform to unify all of these so
33
37
  that there is a single, gentle learning curve.
@@ -1,4 +1,4 @@
1
- __version__ = "0.2.2"
1
+ __version__ = "0.3.1"
2
2
 
3
3
  from .core import *
4
4
  from . import git
@@ -7,3 +7,4 @@ from . import cloud
7
7
  from . import jupyter
8
8
  from . import config
9
9
  from . import models
10
+ from . import office
@@ -3,4 +3,5 @@ from .core import *
3
3
 
4
4
  def run() -> None:
5
5
  from .main import app
6
+
6
7
  app()
@@ -19,6 +19,7 @@ from calkit.cli.import_ import import_app
19
19
  from calkit.cli.list import list_app
20
20
  from calkit.cli.new import new_app
21
21
  from calkit.cli.notebooks import notebooks_app
22
+ from calkit.cli.office import office_app
22
23
 
23
24
  app = typer.Typer(
24
25
  invoke_without_command=True,
@@ -36,6 +37,7 @@ app.add_typer(
36
37
  app.add_typer(notebooks_app, name="nb", help="Work with Jupyter notebooks.")
37
38
  app.add_typer(list_app, name="list", help="List Calkit objects.")
38
39
  app.add_typer(import_app, name="import", help="Import objects.")
40
+ app.add_typer(office_app, name="office", help="Work with Microsoft Office.")
39
41
 
40
42
 
41
43
  @app.callback()
@@ -157,6 +159,9 @@ def add(
157
159
  else:
158
160
  dvc_extensions = [
159
161
  ".png",
162
+ ".jpeg",
163
+ ".jpg",
164
+ ".gif",
160
165
  ".h5",
161
166
  ".parquet",
162
167
  ".pickle",
@@ -164,6 +169,10 @@ def add(
164
169
  ".avi",
165
170
  ".webm",
166
171
  ".pdf",
172
+ ".xlsx",
173
+ ".docx",
174
+ ".xls",
175
+ ".doc",
167
176
  ]
168
177
  dvc_size_thresh_bytes = 1_000_000
169
178
  if "." in paths and to is None:
@@ -298,7 +307,7 @@ def run_server():
298
307
  port=8866,
299
308
  host="localhost",
300
309
  reload=True,
301
- reload_dirs=[os.path.dirname(__file__)],
310
+ reload_dirs=[os.path.dirname(os.path.dirname(__file__))],
302
311
  )
303
312
 
304
313
 
@@ -368,7 +377,7 @@ def run_dvc_repro(
368
377
  args += ["--pipeline", pipeline]
369
378
  if downstream is not None:
370
379
  args += downstream
371
- subprocess.call(["dvc", "repro"] + args)
380
+ subprocess.check_call(["dvc", "repro"] + args)
372
381
  # Now parse stage metadata for calkit objects
373
382
  if not os.path.isfile("dvc.yaml"):
374
383
  raise_error("No dvc.yaml file found")
@@ -575,6 +584,7 @@ def build_docker(
575
584
  _ = out[0].pop("Id")
576
585
  _ = out[0].pop("RepoDigests")
577
586
  _ = out[0].pop("Metadata")
587
+ _ = out[0].pop("DockerVersion")
578
588
  return out
579
589
 
580
590
  typer.echo(f"Checking for existing image with tag {tag}")
@@ -364,3 +364,111 @@ def new_foreach_stage(
364
364
  repo.git.add("dvc.yaml")
365
365
  if not no_commit and repo.git.diff("--staged"):
366
366
  repo.git.commit(["-m", f"Add foreach stage {name}"])
367
+
368
+
369
+ @new_app.command(name="dataset")
370
+ def new_dataset(
371
+ path: str,
372
+ title: Annotated[str, typer.Option("--title")],
373
+ description: Annotated[str, typer.Option("--description")],
374
+ stage_name: Annotated[
375
+ str,
376
+ typer.Option(
377
+ "--stage",
378
+ help="Name of the pipeline stage that generates this dataset.",
379
+ ),
380
+ ] = None,
381
+ cmd: Annotated[
382
+ str,
383
+ typer.Option(
384
+ "--cmd", help="Command to add to the stage, if specified."
385
+ ),
386
+ ] = None,
387
+ deps: Annotated[
388
+ list[str], typer.Option("--dep", help="Path to stage dependency.")
389
+ ] = [],
390
+ outs: Annotated[
391
+ list[str],
392
+ typer.Option(
393
+ "--out",
394
+ help=(
395
+ "Path to stage output. "
396
+ "Dataset path will be added automatically."
397
+ ),
398
+ ),
399
+ ] = [],
400
+ outs_from_stage: Annotated[
401
+ str,
402
+ typer.Option(
403
+ "--deps-from-stage-outs",
404
+ help="Stage name from which to add outputs as dependencies.",
405
+ ),
406
+ ] = None,
407
+ no_commit: Annotated[bool, typer.Option("--no-commit")] = False,
408
+ overwrite: Annotated[
409
+ bool,
410
+ typer.Option(
411
+ "--overwrite",
412
+ "-f",
413
+ help="Overwrite existing dataset if one exists.",
414
+ ),
415
+ ] = False,
416
+ ):
417
+ """Create a new dataset."""
418
+ ck_info = calkit.load_calkit_info()
419
+ datasets = ck_info.get("datasets", [])
420
+ paths = [f.get("path") for f in datasets]
421
+ if not overwrite and path in paths:
422
+ raise_error(f"Dataset at path {path} already exists")
423
+ elif overwrite and path in paths:
424
+ datasets = [fig for fig in datasets if fig.get("path") != path]
425
+ if cmd is not None and stage_name is None:
426
+ raise_error("Stage name must be provided if command is specified")
427
+ if (deps or outs or outs_from_stage) and not cmd:
428
+ raise_error("Command must be provided")
429
+ if (deps or outs or outs_from_stage) and not stage_name:
430
+ raise_error("Stage name must be provided")
431
+ obj = dict(path=path, title=title)
432
+ if description is not None:
433
+ obj["description"] = description
434
+ if stage_name is not None:
435
+ obj["stage"] = stage_name
436
+ if cmd:
437
+ if outs_from_stage:
438
+ pipeline = calkit.dvc.read_pipeline()
439
+ stages = pipeline.get("stages", {})
440
+ if outs_from_stage not in stages:
441
+ raise_error(f"Stage {outs_from_stage} does not exist")
442
+ stage = stages[outs_from_stage]
443
+ if "foreach" in stage:
444
+ for val in stage["foreach"]:
445
+ for out in stage.get("do", {}).get("outs", []):
446
+ deps.append(out.replace("${item}", val))
447
+ else:
448
+ deps += stage.get("outs", [])
449
+ if path not in outs:
450
+ outs.append(path)
451
+ deps_cmd = []
452
+ for dep in deps:
453
+ deps_cmd += ["-d", dep]
454
+ outs_cmd = []
455
+ for out in outs:
456
+ outs_cmd += ["-o", out]
457
+ subprocess.check_call(
458
+ ["dvc", "stage", "add", "-n", stage_name]
459
+ + (["-f"] if overwrite else [])
460
+ + deps_cmd
461
+ + outs_cmd
462
+ + [cmd]
463
+ )
464
+ datasets.append(obj)
465
+ ck_info["datasets"] = datasets
466
+ with open("calkit.yaml", "w") as f:
467
+ ryaml.dump(ck_info, f)
468
+ if not no_commit:
469
+ repo = git.Repo()
470
+ repo.git.add("calkit.yaml")
471
+ if cmd:
472
+ repo.git.add("dvc.yaml")
473
+ if repo.git.diff("--staged"):
474
+ repo.git.commit(["-m", f"Add dataset {path}"])
@@ -0,0 +1,62 @@
1
+ """CLI for working with Office."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import platform
6
+
7
+ import docx2pdf
8
+ import typer
9
+ from typing_extensions import Annotated
10
+
11
+ import calkit
12
+ from calkit.cli import raise_error
13
+
14
+ office_app = typer.Typer(no_args_is_help=True)
15
+
16
+
17
+ @office_app.command(
18
+ name="excel-chart-to-png",
19
+ help="Extract a chart from Excel and save to PNG.",
20
+ )
21
+ def excel_chart_to_png(
22
+ input_fpath: Annotated[str, typer.Argument(help="Input Excel file path.")],
23
+ output_fpath: Annotated[str, typer.Argument(help="Output PNG file path.")],
24
+ sheet: Annotated[
25
+ int, typer.Option("--sheet", help="Sheet in workbook.")
26
+ ] = 1,
27
+ chart_index: Annotated[
28
+ int, typer.Option("--chart-index", help="Chart index.")
29
+ ] = 0,
30
+ ):
31
+ if platform.system() != "Windows":
32
+ raise_error("This command is only available on Windows")
33
+ typer.echo(
34
+ f"Exporting chart at index {chart_index} from sheet {sheet} "
35
+ f"in {input_fpath} to {output_fpath}"
36
+ )
37
+ calkit.office.excel_chart_to_png(
38
+ input_fpath=input_fpath,
39
+ output_fpath=output_fpath,
40
+ sheet=sheet,
41
+ chart_index=chart_index,
42
+ )
43
+
44
+
45
+ @office_app.command(name="word-to-pdf", help="Convert a Word document to PDF.")
46
+ def word_to_pdf(
47
+ input_fpath: Annotated[
48
+ str, typer.Argument(help="Input Word document file path.")
49
+ ],
50
+ output_fpath: Annotated[
51
+ str,
52
+ typer.Option(
53
+ "-o",
54
+ "--output",
55
+ help=(
56
+ "Output file path. If not specified, "
57
+ "will be the same as input with a .pdf extension."
58
+ ),
59
+ ),
60
+ ] = None,
61
+ ):
62
+ docx2pdf.convert(input_path=input_fpath, output_path=output_fpath)
@@ -7,7 +7,7 @@ from typing import Literal
7
7
 
8
8
  import keyring
9
9
  import yaml
10
- from pydantic import EmailStr, computed_field
10
+ from pydantic import computed_field
11
11
  from pydantic_settings import BaseSettings, SettingsConfigDict
12
12
 
13
13
 
@@ -40,7 +40,7 @@ class Settings(BaseSettings):
40
40
  ),
41
41
  extra="ignore",
42
42
  )
43
- username: EmailStr | None = None
43
+ username: str | None = None
44
44
  token: str | None = None
45
45
  dvc_token: str | None = None
46
46
  dataframe_engine: Literal["pandas", "polars"] = "pandas"
@@ -46,8 +46,11 @@ def find_project_dirs(relative=False, max_depth=3) -> list[str]:
46
46
  return final_res
47
47
 
48
48
 
49
- def load_calkit_info() -> dict:
50
- if os.path.isfile("calkit.yaml"):
51
- with open("calkit.yaml") as f:
49
+ def load_calkit_info(wdir=None) -> dict:
50
+ fpath = "calkit.yaml"
51
+ if wdir is not None:
52
+ fpath = os.path.join(wdir, fpath)
53
+ if os.path.isfile(fpath):
54
+ with open(fpath) as f:
52
55
  return ryaml.load(f)
53
56
  return {}
@@ -8,7 +8,10 @@ import git
8
8
  def detect_project_name(path=None) -> str:
9
9
  """Read the project owner and name from the remote.
10
10
 
11
- TODO: Currently only works with GitHub remotes.
11
+ TODO: Currently only works with GitHub remotes where the GitHub repo
12
+ name is identical to the Calkit project name, which is not guaranteed.
13
+ We should probably look inside ``calkit.yaml`` at ``name``
14
+ first, and fallback to the GitHub remote URL if we can't find that.
12
15
  """
13
16
  url = git.Repo(path=path).remote().url
14
17
  return url.split("github.com")[-1][1:].removesuffix(".git")
@@ -107,6 +107,11 @@ class ProjectInfo(BaseModel):
107
107
  distinguish what has been newly created here.
108
108
  """
109
109
 
110
+ title: str | None = None
111
+ owner: str | None = None
112
+ description: str | None = None
113
+ name: str | None = None
114
+ git_repo_url: str | None = None
110
115
  parent: str | None = None
111
116
  questions: list[str] = []
112
117
  datasets: list[Dataset] = []
@@ -0,0 +1,42 @@
1
+ """Functionality for working with Microsoft Office."""
2
+
3
+ import os
4
+
5
+ from PIL import ImageGrab
6
+
7
+
8
+ def excel_chart_to_png(
9
+ input_fpath: str,
10
+ output_fpath: str,
11
+ sheet: int = 1,
12
+ chart_index: int = 0,
13
+ ):
14
+ """Export a chart from an Excel sheet to PNG."""
15
+ import win32com.client
16
+
17
+ # Open the excel application using win32com
18
+ excel = win32com.client.Dispatch("Excel.Application")
19
+ # Disable alerts and visibility to the user
20
+ excel.Visible = 0
21
+ excel.DisplayAlerts = 0
22
+ # Open workbook
23
+ wb = excel.Workbooks.Open(os.path.abspath(input_fpath))
24
+ factor = 1.0
25
+ # Extract sheet
26
+ # TODO: Close workbook if something fails
27
+ sheet = excel.Sheets(sheet)
28
+ shape = sheet.Shapes[chart_index]
29
+ shape.Copy()
30
+ image = ImageGrab.grabclipboard()
31
+ length_x, width_y = image.size
32
+ size = int(factor * length_x), int(factor * width_y)
33
+ image_resize = image.resize(size)
34
+ # Save the image into the existing png file, overwriting if exists
35
+ dirname = os.path.dirname(output_fpath)
36
+ if dirname and not os.path.isdir(dirname):
37
+ os.makedirs(dirname)
38
+ image_resize.save(
39
+ os.path.abspath(output_fpath), "png", quality=95, dpi=(300, 300)
40
+ )
41
+ wb.Close(True)
42
+ excel.Quit()