calkit-python 0.12.0__tar.gz → 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {calkit_python-0.12.0 → calkit_python-0.14.0}/PKG-INFO +1 -1
  2. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/__init__.py +1 -1
  3. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/main.py +37 -12
  4. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/new.py +173 -3
  5. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/core.py +73 -0
  6. calkit_python-0.14.0/calkit/datasets.py +71 -0
  7. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/models.py +13 -1
  8. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/cli/test_main.py +91 -9
  9. calkit_python-0.12.0/calkit/data.py +0 -60
  10. {calkit_python-0.12.0 → calkit_python-0.14.0}/.github/FUNDING.yml +0 -0
  11. {calkit_python-0.12.0 → calkit_python-0.14.0}/.github/workflows/publish-test.yml +0 -0
  12. {calkit_python-0.12.0 → calkit_python-0.14.0}/.github/workflows/publish.yml +0 -0
  13. {calkit_python-0.12.0 → calkit_python-0.14.0}/.gitignore +0 -0
  14. {calkit_python-0.12.0 → calkit_python-0.14.0}/LICENSE +0 -0
  15. {calkit_python-0.12.0 → calkit_python-0.14.0}/README.md +0 -0
  16. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/calc.py +0 -0
  17. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/check.py +0 -0
  18. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/__init__.py +0 -0
  19. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/check.py +0 -0
  20. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/config.py +0 -0
  21. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/core.py +0 -0
  22. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/import_.py +0 -0
  23. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/list.py +0 -0
  24. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/notebooks.py +0 -0
  25. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/office.py +0 -0
  26. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cli/update.py +0 -0
  27. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/cloud.py +0 -0
  28. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/conda.py +0 -0
  29. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/config.py +0 -0
  30. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/docker.py +0 -0
  31. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/dvc.py +0 -0
  32. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/git.py +0 -0
  33. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/gui.py +0 -0
  34. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/jupyter.py +0 -0
  35. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/magics.py +0 -0
  36. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/office.py +0 -0
  37. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/server.py +0 -0
  38. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/__init__.py +0 -0
  39. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/core.py +0 -0
  40. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/latex/__init__.py +0 -0
  41. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/latex/article/paper.tex +0 -0
  42. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/latex/core.py +0 -0
  43. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/latex/jfm/jfm.bst +0 -0
  44. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/latex/jfm/jfm.cls +0 -0
  45. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/latex/jfm/lineno-FLM.sty +0 -0
  46. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/latex/jfm/paper.tex +0 -0
  47. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/templates/latex/jfm/upmath.sty +0 -0
  48. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/__init__.py +0 -0
  49. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/cli/__init__.py +0 -0
  50. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/cli/test_list.py +0 -0
  51. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/cli/test_new.py +0 -0
  52. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/test_calc.py +0 -0
  53. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/test_check.py +0 -0
  54. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/test_conda.py +0 -0
  55. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/test_core.py +0 -0
  56. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/test_dvc.py +0 -0
  57. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/test_jupyter.py +0 -0
  58. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/test_magics.py +0 -0
  59. {calkit_python-0.12.0 → calkit_python-0.14.0}/calkit/tests/test_templates.py +0 -0
  60. {calkit_python-0.12.0 → calkit_python-0.14.0}/docs/img/calkit-no-bg.png +0 -0
  61. {calkit_python-0.12.0 → calkit_python-0.14.0}/docs/tutorials/adding-latex-pub-docker.md +0 -0
  62. {calkit_python-0.12.0 → calkit_python-0.14.0}/docs/tutorials/conda-envs.md +0 -0
  63. {calkit_python-0.12.0 → calkit_python-0.14.0}/docs/tutorials/img/run-proc.png +0 -0
  64. {calkit_python-0.12.0 → calkit_python-0.14.0}/docs/tutorials/notebook-pipeline.md +0 -0
  65. {calkit_python-0.12.0 → calkit_python-0.14.0}/docs/tutorials/procedures.md +0 -0
  66. {calkit_python-0.12.0 → calkit_python-0.14.0}/pyproject.toml +0 -0
  67. {calkit_python-0.12.0 → calkit_python-0.14.0}/test/pipeline.ipynb +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: calkit-python
3
- Version: 0.12.0
3
+ Version: 0.14.0
4
4
  Summary: Reproducibility simplified.
5
5
  Project-URL: Homepage, https://github.com/calkit/calkit
6
6
  Project-URL: Issues, https://github.com/calkit/calkit/issues
@@ -1,4 +1,4 @@
1
- __version__ = "0.12.0"
1
+ __version__ = "0.14.0"
2
2
 
3
3
  from .core import *
4
4
  from . import git
@@ -8,6 +8,7 @@ import hashlib
8
8
  import json
9
9
  import os
10
10
  import platform as _platform
11
+ import shlex
11
12
  import subprocess
12
13
  import sys
13
14
  import time
@@ -671,23 +672,42 @@ def run_in_env(
671
672
  subprocess.check_call(cmd, cwd=wdir)
672
673
  except subprocess.CalledProcessError:
673
674
  raise_error(f"Failed to run in {env['kind']} environment")
674
- elif env["kind"] == "uv-venv":
675
+ elif (kind := env["kind"]) in ["uv-venv", "venv"]:
675
676
  if "prefix" not in env:
676
- raise_error("uv-venv environments require a prefix")
677
+ raise_error("venv environments require a prefix")
677
678
  if "path" not in env:
678
- raise_error("uv-venv environments require a path")
679
+ raise_error("venv environments require a path")
679
680
  prefix = env["prefix"]
680
681
  path = env["path"]
681
- shell_cmd = " ".join(cmd)
682
+ # Any parts of the raw command with whitespace in them need to be
683
+ # quoted
684
+ quoted_cmd = [shlex.quote(part) for part in cmd]
685
+ shell_cmd = " ".join(quoted_cmd)
686
+ if verbose:
687
+ typer.echo(f"Raw command: {cmd}")
688
+ typer.echo(f"Quoted command: {quoted_cmd}")
689
+ typer.echo(f"Shell command: {shell_cmd}")
690
+ create_cmd = (
691
+ ["uv", "venv"] if kind == "uv-venv" else ["python", "-m", "venv"]
692
+ )
693
+ pip_cmd = "pip" if kind == "venv" else "uv pip"
694
+ pip_install_args = "-q"
695
+ if "python" in env and kind == "uv-venv":
696
+ create_cmd += ["--python", env["python"]]
697
+ pip_install_args += f" --python {env['python']}"
682
698
  # Check environment
683
699
  if not no_check:
684
700
  if not os.path.isdir(prefix):
685
701
  if verbose:
686
- typer.echo(f"Creating uv-venv at {prefix}")
702
+ typer.echo(f"Creating {kind} at {prefix}")
687
703
  try:
688
- subprocess.check_call(["uv", "venv", prefix], cwd=wdir)
704
+ subprocess.check_call(create_cmd + [prefix], cwd=wdir)
689
705
  except subprocess.CalledProcessError:
690
- raise_error(f"Failed to create uv-venv at {prefix}")
706
+ raise_error(f"Failed to create {kind} at {prefix}")
707
+ # Put a gitignore file in the env dir if one doesn't exist
708
+ if not os.path.isfile(os.path.join(prefix, ".gitignore")):
709
+ with open(os.path.join(prefix, ".gitignore"), "w") as f:
710
+ f.write("*\n")
691
711
  fname, ext = os.path.splitext(path)
692
712
  lock_fpath = fname + "-lock" + ext
693
713
  if _platform.system() == "Windows":
@@ -696,16 +716,21 @@ def run_in_env(
696
716
  activate_cmd = f". {prefix}/bin/activate"
697
717
  check_cmd = (
698
718
  f"{activate_cmd} "
699
- f"&& uv pip install -q -r {path} "
700
- f"&& uv pip freeze > {lock_fpath} "
719
+ f"&& {pip_cmd} install {pip_install_args} -r {path} "
720
+ f"&& {pip_cmd} freeze > {lock_fpath} "
701
721
  "&& deactivate"
702
722
  )
703
723
  try:
704
724
  if verbose:
705
725
  typer.echo(f"Running command: {check_cmd}")
706
- subprocess.check_output(check_cmd, shell=True, cwd=wdir)
726
+ subprocess.check_output(
727
+ check_cmd,
728
+ shell=True,
729
+ cwd=wdir,
730
+ stderr=subprocess.STDOUT if not verbose else None,
731
+ )
707
732
  except subprocess.CalledProcessError:
708
- raise_error("Failed to check uv-venv")
733
+ raise_error(f"Failed to check {kind}")
709
734
  # Now run the command
710
735
  cmd = f"{activate_cmd} && {shell_cmd} && deactivate"
711
736
  if verbose:
@@ -713,7 +738,7 @@ def run_in_env(
713
738
  try:
714
739
  subprocess.check_call(cmd, shell=True, cwd=wdir)
715
740
  except subprocess.CalledProcessError:
716
- raise_error("Failed to run in uv-venv")
741
+ raise_error(f"Failed to run in {kind}")
717
742
  else:
718
743
  raise_error("Environment kind not supported")
719
744
 
@@ -465,9 +465,7 @@ def new_docker_env(
465
465
  ] = "Dockerfile",
466
466
  stage: Annotated[
467
467
  str,
468
- typer.Option(
469
- "--stage", help="DVC pipeline stage name, deprecated."
470
- ),
468
+ typer.Option("--stage", help="DVC pipeline stage name, deprecated."),
471
469
  ] = None,
472
470
  layers: Annotated[
473
471
  list[str],
@@ -1053,6 +1051,9 @@ def new_uv_venv(
1053
1051
  prefix: Annotated[
1054
1052
  str, typer.Option("--prefix", help="Prefix for environment location.")
1055
1053
  ] = ".venv",
1054
+ python_version: Annotated[
1055
+ str, typer.Option("--python", "-p", help="Python version.")
1056
+ ] = None,
1056
1057
  description: Annotated[
1057
1058
  str, typer.Option("--description", help="Description.")
1058
1059
  ] = None,
@@ -1084,6 +1085,14 @@ def new_uv_venv(
1084
1085
  f"Environment with name {name} already exists "
1085
1086
  "(use -f to overwrite)"
1086
1087
  )
1088
+ # Check prefixes
1089
+ if not overwrite:
1090
+ for env_name, env in envs.items():
1091
+ if env.get("prefix") == prefix:
1092
+ raise_error(
1093
+ f"Environment '{env_name}' already exists with "
1094
+ f"prefix '{prefix}'"
1095
+ )
1087
1096
  packages_txt = "\n".join(packages)
1088
1097
  # Write environment to path
1089
1098
  with open(path, "w") as f:
@@ -1091,6 +1100,8 @@ def new_uv_venv(
1091
1100
  repo.git.add(path)
1092
1101
  typer.echo("Adding environment to calkit.yaml")
1093
1102
  env = dict(path=path, kind="uv-venv", prefix=prefix)
1103
+ if python_version is not None:
1104
+ env["python"] = python_version
1094
1105
  if description is not None:
1095
1106
  env["description"] = description
1096
1107
  envs[name] = env
@@ -1100,3 +1111,162 @@ def new_uv_venv(
1100
1111
  repo.git.add("calkit.yaml")
1101
1112
  if not no_commit and repo.git.diff("--staged"):
1102
1113
  repo.git.commit(["-m", f"Add uv venv {name}"])
1114
+
1115
+
1116
+ @new_app.command("venv")
1117
+ def new_venv(
1118
+ packages: Annotated[
1119
+ list[str],
1120
+ typer.Argument(help="Packages to include in the environment."),
1121
+ ],
1122
+ name: Annotated[
1123
+ str, typer.Option("--name", "-n", help="Environment name.")
1124
+ ],
1125
+ path: Annotated[
1126
+ str, typer.Option("--path", help="Path for requirements file.")
1127
+ ] = "requirements.txt",
1128
+ prefix: Annotated[
1129
+ str, typer.Option("--prefix", help="Prefix for environment location.")
1130
+ ] = ".venv",
1131
+ description: Annotated[
1132
+ str, typer.Option("--description", help="Description.")
1133
+ ] = None,
1134
+ overwrite: Annotated[
1135
+ bool,
1136
+ typer.Option(
1137
+ "--overwrite",
1138
+ "-f",
1139
+ help="Overwrite any existing environment with this name.",
1140
+ ),
1141
+ ] = False,
1142
+ no_commit: Annotated[
1143
+ bool, typer.Option("--no-commit", help="Do not commit changes.")
1144
+ ] = False,
1145
+ ):
1146
+ """Create a new Python virtual environment with venv."""
1147
+ if os.path.isfile(path) and not overwrite:
1148
+ raise_error("Output path already exists (use -f to overwrite)")
1149
+ repo = git.Repo()
1150
+ # Add environment to Calkit info
1151
+ ck_info = calkit.load_calkit_info()
1152
+ # If environments is a list instead of a dict, reformulate it
1153
+ envs = ck_info.get("environments", {})
1154
+ if isinstance(envs, list):
1155
+ typer.echo("Converting environments from list to dict")
1156
+ envs = {env.pop("name"): env for env in envs}
1157
+ if name in envs and not overwrite:
1158
+ raise_error(
1159
+ f"Environment with name {name} already exists "
1160
+ "(use -f to overwrite)"
1161
+ )
1162
+ # Check prefixes
1163
+ if not overwrite:
1164
+ for env_name, env in envs.items():
1165
+ if env.get("prefix") == prefix:
1166
+ raise_error(
1167
+ f"Environment '{env_name}' already exists with "
1168
+ f"prefix '{prefix}'"
1169
+ )
1170
+ packages_txt = "\n".join(packages)
1171
+ # Write environment to path
1172
+ with open(path, "w") as f:
1173
+ f.write(packages_txt)
1174
+ repo.git.add(path)
1175
+ typer.echo("Adding environment to calkit.yaml")
1176
+ env = dict(path=path, kind="venv", prefix=prefix)
1177
+ if description is not None:
1178
+ env["description"] = description
1179
+ envs[name] = env
1180
+ ck_info["environments"] = envs
1181
+ with open("calkit.yaml", "w") as f:
1182
+ ryaml.dump(ck_info, f)
1183
+ repo.git.add("calkit.yaml")
1184
+ if not no_commit and repo.git.diff("--staged"):
1185
+ repo.git.commit(["-m", f"Add venv {name}"])
1186
+
1187
+
1188
+ @new_app.command("pixi-env")
1189
+ def new_pixi_env(
1190
+ packages: Annotated[
1191
+ list[str],
1192
+ typer.Argument(help="Packages to include in the environment."),
1193
+ ],
1194
+ name: Annotated[
1195
+ str, typer.Option("--name", "-n", help="Environment name.")
1196
+ ],
1197
+ description: Annotated[
1198
+ str, typer.Option("--description", help="Description.")
1199
+ ] = None,
1200
+ overwrite: Annotated[
1201
+ bool,
1202
+ typer.Option(
1203
+ "--overwrite",
1204
+ "-f",
1205
+ help="Overwrite any existing environment with this name.",
1206
+ ),
1207
+ ] = False,
1208
+ no_commit: Annotated[
1209
+ bool, typer.Option("--no-commit", help="Do not commit changes.")
1210
+ ] = False,
1211
+ ):
1212
+ """Create a new pixi virtual environment."""
1213
+ repo = git.Repo()
1214
+ # Add environment to Calkit info
1215
+ ck_info = calkit.load_calkit_info()
1216
+ # If environments is a list instead of a dict, reformulate it
1217
+ envs = ck_info.get("environments", {})
1218
+ if isinstance(envs, list):
1219
+ typer.echo("Converting environments from list to dict")
1220
+ envs = {env.pop("name"): env for env in envs}
1221
+ if name in envs and not overwrite:
1222
+ raise_error(
1223
+ f"Environment with name {name} already exists "
1224
+ "(use -f to overwrite)"
1225
+ )
1226
+ # Create the environment now
1227
+ if not os.path.isfile("pixi.toml"):
1228
+ subprocess.run(
1229
+ [
1230
+ "pixi",
1231
+ "init",
1232
+ ".",
1233
+ "--format",
1234
+ "pixi",
1235
+ "--platform",
1236
+ "win-64",
1237
+ "--platform",
1238
+ "linux-64",
1239
+ "--platform",
1240
+ "osx-64",
1241
+ "--platform",
1242
+ "osx-arm64",
1243
+ ]
1244
+ )
1245
+ # Install the packages
1246
+ for pkg in packages:
1247
+ subprocess.run(["pixi", "add", pkg, "--feature", name])
1248
+ # Create a pixi environment
1249
+ subprocess.run(
1250
+ [
1251
+ "pixi",
1252
+ "project",
1253
+ "environment",
1254
+ "add",
1255
+ name,
1256
+ "--feature",
1257
+ name,
1258
+ "--force",
1259
+ ]
1260
+ )
1261
+ typer.echo("Adding environment to calkit.yaml")
1262
+ env = dict(kind="pixi", path="pixi.toml", name=name)
1263
+ if description is not None:
1264
+ env["description"] = description
1265
+ envs[name] = env
1266
+ ck_info["environments"] = envs
1267
+ with open("calkit.yaml", "w") as f:
1268
+ ryaml.dump(ck_info, f)
1269
+ repo.git.add("pixi.toml")
1270
+ repo.git.add("calkit.yaml")
1271
+ if not no_commit and repo.git.diff("--staged"):
1272
+ repo.git.commit(["-m", f"Add pixi env {name}"])
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import base64
5
6
  import glob
6
7
  import json
7
8
  import logging
@@ -10,6 +11,8 @@ import pickle
10
11
  import re
11
12
  import subprocess
12
13
 
14
+ import requests
15
+
13
16
  try:
14
17
  from datetime import UTC
15
18
  except ImportError:
@@ -276,3 +279,73 @@ def check_system_deps(wdir: str | None = None) -> None:
276
279
  dep_name = re.split("[=<>]", dep)[0]
277
280
  if not check_dep_exists(dep_name):
278
281
  raise ValueError(f"{dep_name} not found")
282
+
283
+
284
+ def project_and_path_from_path(path: str) -> tuple:
285
+ """Split a path into project and path, respecting the ``CALKIT_PROJECT``
286
+ environmental variable if set.
287
+
288
+ For example, a path like
289
+
290
+ someone/some-project:some/path/to/file.png
291
+
292
+ will return
293
+
294
+ (someone/some-project, some/path/to/file.png)
295
+ """
296
+ path_split = path.split(":")
297
+ if len(path_split) == 2:
298
+ project = path_split[0]
299
+ path = path_split[1]
300
+ elif len(path_split) == 1:
301
+ project = None
302
+ else:
303
+ raise ValueError("Path has too many colons in it")
304
+ if project is None:
305
+ project = os.getenv("CALKIT_PROJECT")
306
+ return project, path
307
+
308
+
309
+ def read_file(path: str, as_bytes: bool = None) -> str | bytes:
310
+ """Read file content from path, which can optionally include a project
311
+ identifier, which if specified will indicate we should read from the API.
312
+ """
313
+ project, path = project_and_path_from_path(path)
314
+ if as_bytes is None:
315
+ _, ext = os.path.splitext(path)
316
+ as_bytes = ext in [
317
+ ".png",
318
+ ".jpg",
319
+ ".gif",
320
+ ".jpeg",
321
+ ".pdf",
322
+ ".xlsx",
323
+ ".docx",
324
+ ]
325
+ if project is not None:
326
+ import calkit.cloud
327
+
328
+ if len(project.split("/")) != 2:
329
+ raise ValueError("Invalid project identifier (too many slashes)")
330
+ resp = calkit.cloud.get(f"/projects/{project}/contents/{path}")
331
+ # If the response has a content key, that is a base64 encoded string
332
+ if (content := resp.get("content")) is not None:
333
+ # Load the content appropriately
334
+ content_bytes = base64.b64decode(content)
335
+ if as_bytes:
336
+ return content_bytes
337
+ else:
338
+ return content_bytes.decode()
339
+ # If the response has a URL, we can fetch from that directly
340
+ elif (url := resp.get("url")) is not None:
341
+ resp2 = requests.get(url)
342
+ resp2.raise_for_status()
343
+ if as_bytes:
344
+ return resp2.content
345
+ else:
346
+ return resp2.text
347
+ else:
348
+ raise ValueError("No content or URL returned from API")
349
+ # Project is None, so let's just read a local file
350
+ with open(path, mode="rb" if as_bytes else "r") as f:
351
+ return f.read()
@@ -0,0 +1,71 @@
1
+ """Functionality for working with datasets."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import io
7
+ from typing import Literal
8
+
9
+ import calkit
10
+ import calkit.config
11
+
12
+ DEFAULT_ENGINE = calkit.config.read().dataframe_engine
13
+
14
+
15
+ def _get_df_lib(engine: str):
16
+ if engine == "pandas":
17
+ import pandas
18
+
19
+ return pandas
20
+ elif engine == "polars":
21
+ import polars
22
+
23
+ return polars
24
+ else:
25
+ raise ValueError("Unknown engine")
26
+
27
+
28
+ def list_datasets() -> list[dict]:
29
+ """Read the Calkit metadata file and list out our datasets."""
30
+ ck_info = calkit.load_calkit_info(as_pydantic=False, process_includes=True)
31
+ return ck_info.get("datasets", [])
32
+
33
+
34
+ def read_dataset(
35
+ path: str,
36
+ engine: Literal["pandas", "polars"] = DEFAULT_ENGINE,
37
+ ):
38
+ """Read a dataset from a path.
39
+
40
+ Path can include the project owner/name like
41
+
42
+ someone/some-project:my-data-folder/data.csv
43
+
44
+ When a project is set via the ``CALKIT_PROJECT`` environmental variable,
45
+ we will use the API to fetch the data.
46
+ """
47
+
48
+ def load_from_fobj(fobj, path: str):
49
+ """Read from a filelike object or path."""
50
+ if path.endswith(".csv"):
51
+ return _get_df_lib(engine).read_csv(fobj)
52
+ elif path.endswith(".parquet"):
53
+ return _get_df_lib(engine).read_parquet(fobj)
54
+
55
+ project, path = calkit.project_and_path_from_path(path)
56
+ if project is not None:
57
+ if len(project.split("/")) != 2:
58
+ raise ValueError("Invalid project identifier (too many slashes)")
59
+ resp = calkit.cloud.get(f"/projects/{project}/contents/{path}")
60
+ # If the response has a content key, that is a base64 encoded string
61
+ if (content := resp.get("content")) is not None:
62
+ # Load the content appropriately
63
+ content_bytes = base64.b64decode(content)
64
+ return load_from_fobj(io.BytesIO(content_bytes), path=path)
65
+ # If the response has a URL, we can fetch from that directly
66
+ elif (url := resp.get("url")) is not None:
67
+ return load_from_fobj(url, path=path)
68
+ else:
69
+ raise ValueError("No content or URL returned from API")
70
+ # Project is None, so let's just read a local file
71
+ return load_from_fobj(path, path)
@@ -71,6 +71,7 @@ class Environment(BaseModel):
71
71
  "remote-ssh",
72
72
  "uv",
73
73
  "pixi",
74
+ "venv",
74
75
  "uv-venv",
75
76
  "renv",
76
77
  ]
@@ -80,11 +81,21 @@ class Environment(BaseModel):
80
81
  default: bool | None = None
81
82
 
82
83
 
84
+ class VenvEnvironment(Environment):
85
+ kind: Literal["venv"]
86
+ prefix: str
87
+
88
+
83
89
  class UvVenvEnvironment(Environment):
84
90
  kind: Literal["uv-venv"]
85
91
  prefix: str
86
92
 
87
93
 
94
+ class PixiEnvironment(Environment):
95
+ kind: Literal["pixi"]
96
+ name: str | None = None
97
+
98
+
88
99
  class DockerEnvironment(Environment):
89
100
  kind: Literal["docker"]
90
101
  image: str
@@ -222,7 +233,8 @@ class ProjectInfo(BaseModel):
222
233
  publications: list[Publication] = []
223
234
  references: list[ReferenceCollection] = []
224
235
  environments: dict[
225
- str, Environment | DockerEnvironment | UvVenvEnvironment
236
+ str,
237
+ Environment | DockerEnvironment | VenvEnvironment | UvVenvEnvironment,
226
238
  ] = {}
227
239
  software: list[Software] = []
228
240
  notebooks: list[Notebook] = []
@@ -89,23 +89,105 @@ def test_run_in_env(tmp_dir):
89
89
  ck_info = calkit.load_calkit_info()
90
90
  env = ck_info["environments"]["py3.10"]
91
91
  assert env.get("path") is None
92
+
93
+
94
+ def test_run_in_venv(tmp_dir):
95
+ subprocess.check_call("git init", shell=True)
96
+ subprocess.check_call("dvc init", shell=True)
92
97
  # Test uv venv
93
98
  subprocess.check_call(
94
- ["calkit", "new", "uv-venv", "-n", "uv1", "polars==1.18.0"]
95
- )
96
- out = subprocess.check_output(
97
99
  [
98
100
  "calkit",
99
- "xenv",
101
+ "new",
102
+ "uv-venv",
100
103
  "-n",
101
104
  "uv1",
102
- "--",
103
- "python",
104
- "-c",
105
- "'import polars; print(polars.__version__)'",
105
+ "--python",
106
+ "3.13",
107
+ "--no-commit",
108
+ "polars==1.18.0",
106
109
  ]
107
- ).decode().strip()
110
+ )
111
+ out = (
112
+ subprocess.check_output(
113
+ [
114
+ "calkit",
115
+ "xenv",
116
+ "-n",
117
+ "uv1",
118
+ "--",
119
+ "python",
120
+ "-c",
121
+ "import polars; print(polars.__version__)",
122
+ ]
123
+ )
124
+ .decode()
125
+ .strip()
126
+ )
108
127
  assert out == "1.18.0"
128
+ # Test regular venvs
129
+ subprocess.check_call(
130
+ [
131
+ "calkit",
132
+ "new",
133
+ "venv",
134
+ "-n",
135
+ "venv1",
136
+ "--prefix",
137
+ ".venv1",
138
+ "--path",
139
+ "reqs2.txt",
140
+ "polars==1.17.0",
141
+ ]
142
+ )
143
+ out = (
144
+ subprocess.check_output(
145
+ [
146
+ "calkit",
147
+ "xenv",
148
+ "-n",
149
+ "venv1",
150
+ "--",
151
+ "python",
152
+ "-c",
153
+ "import polars; print(polars.__version__)",
154
+ ]
155
+ )
156
+ .decode()
157
+ .strip()
158
+ )
159
+ assert out == "1.17.0"
160
+ # Test pixi envs
161
+ subprocess.check_call(
162
+ [
163
+ "calkit",
164
+ "new",
165
+ "pixi-env",
166
+ "-n",
167
+ "my-pixi",
168
+ "pandas=2.0.0",
169
+ ]
170
+ )
171
+ ck_info = calkit.load_calkit_info(as_pydantic=True)
172
+ envs = ck_info.environments
173
+ env = envs["my-pixi"]
174
+ out = (
175
+ subprocess.check_output(
176
+ [
177
+ "calkit",
178
+ "xenv",
179
+ "-n",
180
+ "my-pixi",
181
+ "--",
182
+ "python",
183
+ "-c",
184
+ "import pandas; print(pandas.__version__)",
185
+ ]
186
+ )
187
+ .decode()
188
+ .strip()
189
+ )
190
+ assert out == "2.0.0"
109
191
 
110
192
 
111
193
  def test_check_call():
@@ -1,60 +0,0 @@
1
- """Functionality for working with datasets.
2
-
3
- Since the dependencies here are optional, we need to ensure this isn't imported
4
- by default, or otherwise ensure ``import calkit`` works when the data
5
- dependencies are not installed.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- from typing import Literal, Union
11
-
12
- import pandas as pd
13
- import polars as pl
14
-
15
- import calkit.config
16
-
17
- DEFAULT_ENGINE = calkit.config.read().dataframe_engine
18
-
19
-
20
- def list_data():
21
- """Read the Calkit metadata file and list out our datasets."""
22
- pass
23
-
24
-
25
- def read_data(
26
- path: str, engine: Literal["pandas", "polars"] = DEFAULT_ENGINE
27
- ) -> Union[pd.DataFrame, pl.DataFrame]:
28
- """Read (tabular) data from dataset with path ``path`` and return a
29
- DataFrame.
30
-
31
- If the dataset doesn't exist locally, but is a DVC object, download it
32
- first.
33
-
34
- If the dataset path includes a user and project name, we add it to the
35
- project as an imported dataset, and therefore DVC import it?
36
-
37
- For example: someuser/someproject:data/somefile.parquet
38
-
39
- We can run a DVC import command if it needs to be imported. We will need to
40
- find the Git repo and path within it? Maybe we should require an explicit
41
- import of the data.
42
- """
43
- pass
44
-
45
-
46
- def write_data(
47
- data: Union[pd.DataFrame, pl.DataFrame],
48
- path: str,
49
- filename: str | None = None,
50
- commit=False,
51
- ):
52
- """Write ``data`` to the dataset with path ``path``.
53
-
54
- If the dataset path is a directory, the filename must be specified.
55
-
56
- If the path is not a Calkit dataset, it will be created.
57
-
58
- If ``commit`` is specified, create a commit for the dataset update.
59
- """
60
- pass
File without changes
File without changes