calkit-python 0.12.0__tar.gz → 0.13.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {calkit_python-0.12.0 → calkit_python-0.13.0}/PKG-INFO +1 -1
  2. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/__init__.py +1 -1
  3. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/core.py +73 -0
  4. calkit_python-0.13.0/calkit/datasets.py +71 -0
  5. calkit_python-0.12.0/calkit/data.py +0 -60
  6. {calkit_python-0.12.0 → calkit_python-0.13.0}/.github/FUNDING.yml +0 -0
  7. {calkit_python-0.12.0 → calkit_python-0.13.0}/.github/workflows/publish-test.yml +0 -0
  8. {calkit_python-0.12.0 → calkit_python-0.13.0}/.github/workflows/publish.yml +0 -0
  9. {calkit_python-0.12.0 → calkit_python-0.13.0}/.gitignore +0 -0
  10. {calkit_python-0.12.0 → calkit_python-0.13.0}/LICENSE +0 -0
  11. {calkit_python-0.12.0 → calkit_python-0.13.0}/README.md +0 -0
  12. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/calc.py +0 -0
  13. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/check.py +0 -0
  14. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/__init__.py +0 -0
  15. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/check.py +0 -0
  16. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/config.py +0 -0
  17. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/core.py +0 -0
  18. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/import_.py +0 -0
  19. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/list.py +0 -0
  20. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/main.py +0 -0
  21. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/new.py +0 -0
  22. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/notebooks.py +0 -0
  23. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/office.py +0 -0
  24. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/update.py +0 -0
  25. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cloud.py +0 -0
  26. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/conda.py +0 -0
  27. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/config.py +0 -0
  28. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/docker.py +0 -0
  29. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/dvc.py +0 -0
  30. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/git.py +0 -0
  31. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/gui.py +0 -0
  32. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/jupyter.py +0 -0
  33. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/magics.py +0 -0
  34. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/models.py +0 -0
  35. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/office.py +0 -0
  36. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/server.py +0 -0
  37. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/__init__.py +0 -0
  38. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/core.py +0 -0
  39. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/__init__.py +0 -0
  40. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/article/paper.tex +0 -0
  41. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/core.py +0 -0
  42. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/jfm.bst +0 -0
  43. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/jfm.cls +0 -0
  44. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/lineno-FLM.sty +0 -0
  45. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/paper.tex +0 -0
  46. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/upmath.sty +0 -0
  47. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/__init__.py +0 -0
  48. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/cli/__init__.py +0 -0
  49. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/cli/test_list.py +0 -0
  50. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/cli/test_main.py +0 -0
  51. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/cli/test_new.py +0 -0
  52. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_calc.py +0 -0
  53. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_check.py +0 -0
  54. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_conda.py +0 -0
  55. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_core.py +0 -0
  56. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_dvc.py +0 -0
  57. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_jupyter.py +0 -0
  58. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_magics.py +0 -0
  59. {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_templates.py +0 -0
  60. {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/img/calkit-no-bg.png +0 -0
  61. {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/adding-latex-pub-docker.md +0 -0
  62. {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/conda-envs.md +0 -0
  63. {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/img/run-proc.png +0 -0
  64. {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/notebook-pipeline.md +0 -0
  65. {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/procedures.md +0 -0
  66. {calkit_python-0.12.0 → calkit_python-0.13.0}/pyproject.toml +0 -0
  67. {calkit_python-0.12.0 → calkit_python-0.13.0}/test/pipeline.ipynb +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: calkit-python
3
- Version: 0.12.0
3
+ Version: 0.13.0
4
4
  Summary: Reproducibility simplified.
5
5
  Project-URL: Homepage, https://github.com/calkit/calkit
6
6
  Project-URL: Issues, https://github.com/calkit/calkit/issues
@@ -1,4 +1,4 @@
1
- __version__ = "0.12.0"
1
+ __version__ = "0.13.0"
2
2
 
3
3
  from .core import *
4
4
  from . import git
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import base64
5
6
  import glob
6
7
  import json
7
8
  import logging
@@ -10,6 +11,8 @@ import pickle
10
11
  import re
11
12
  import subprocess
12
13
 
14
+ import requests
15
+
13
16
  try:
14
17
  from datetime import UTC
15
18
  except ImportError:
@@ -276,3 +279,73 @@ def check_system_deps(wdir: str | None = None) -> None:
276
279
  dep_name = re.split("[=<>]", dep)[0]
277
280
  if not check_dep_exists(dep_name):
278
281
  raise ValueError(f"{dep_name} not found")
282
+
283
+
284
+ def project_and_path_from_path(path: str) -> tuple:
285
+ """Split a path into project and path, respecting the ``CALKIT_PROJECT``
286
+ environmental variable if set.
287
+
288
+ For example, a path like
289
+
290
+ someone/some-project:some/path/to/file.png
291
+
292
+ will return
293
+
294
+ (someone/some-project, some/path/to/file.png)
295
+ """
296
+ path_split = path.split(":")
297
+ if len(path_split) == 2:
298
+ project = path_split[0]
299
+ path = path_split[1]
300
+ elif len(path_split) == 1:
301
+ project = None
302
+ else:
303
+ raise ValueError("Path has too many colons in it")
304
+ if project is None:
305
+ project = os.getenv("CALKIT_PROJECT")
306
+ return project, path
307
+
308
+
309
+ def read_file(path: str, as_bytes: bool = None) -> str | bytes:
310
+ """Read file content from path, which can optionally include a project
311
+ identifier, which if specified will indicate we should read from the API.
312
+ """
313
+ project, path = project_and_path_from_path(path)
314
+ if as_bytes is None:
315
+ _, ext = os.path.splitext(path)
316
+ as_bytes = ext in [
317
+ ".png",
318
+ ".jpg",
319
+ ".gif",
320
+ ".jpeg",
321
+ ".pdf",
322
+ ".xlsx",
323
+ ".docx",
324
+ ]
325
+ if project is not None:
326
+ import calkit.cloud
327
+
328
+ if len(project.split("/")) != 2:
329
+ raise ValueError("Invalid project identifier (too many slashes)")
330
+ resp = calkit.cloud.get(f"/projects/{project}/contents/{path}")
331
+ # If the response has a content key, that is a base64 encoded string
332
+ if (content := resp.get("content")) is not None:
333
+ # Load the content appropriately
334
+ content_bytes = base64.b64decode(content)
335
+ if as_bytes:
336
+ return content_bytes
337
+ else:
338
+ return content_bytes.decode()
339
+ # If the response has a URL, we can fetch from that directly
340
+ elif (url := resp.get("url")) is not None:
341
+ resp2 = requests.get(url)
342
+ resp2.raise_for_status()
343
+ if as_bytes:
344
+ return resp2.content
345
+ else:
346
+ return resp2.text
347
+ else:
348
+ raise ValueError("No content or URL returned from API")
349
+ # Project is None, so let's just read a local file
350
+ with open(path, mode="rb" if as_bytes else "r") as f:
351
+ return f.read()
@@ -0,0 +1,71 @@
1
+ """Functionality for working with datasets."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import io
7
+ from typing import Literal
8
+
9
+ import calkit
10
+ import calkit.config
11
+
12
+ DEFAULT_ENGINE = calkit.config.read().dataframe_engine
13
+
14
+
15
+ def _get_df_lib(engine: str):
16
+ if engine == "pandas":
17
+ import pandas
18
+
19
+ return pandas
20
+ elif engine == "polars":
21
+ import polars
22
+
23
+ return polars
24
+ else:
25
+ raise ValueError("Unknown engine")
26
+
27
+
28
+ def list_datasets() -> list[dict]:
29
+ """Read the Calkit metadata file and list out our datasets."""
30
+ ck_info = calkit.load_calkit_info(as_pydantic=False, process_includes=True)
31
+ return ck_info.get("datasets", [])
32
+
33
+
34
+ def read_dataset(
35
+ path: str,
36
+ engine: Literal["pandas", "polars"] = DEFAULT_ENGINE,
37
+ ):
38
+ """Read a dataset from a path.
39
+
40
+ Path can include the project owner/name like
41
+
42
+ someone/some-project:my-data-folder/data.csv
43
+
44
+ When a project is set via the ``CALKIT_PROJECT`` environmental variable,
45
+ we will use the API to fetch the data.
46
+ """
47
+
48
+ def load_from_fobj(fobj, path: str):
49
+ """Read from a filelike object or path."""
50
+ if path.endswith(".csv"):
51
+ return _get_df_lib(engine).read_csv(fobj)
52
+ elif path.endswith(".parquet"):
53
+ return _get_df_lib(engine).read_parquet(fobj)
54
+
55
+ project, path = calkit.project_and_path_from_path(path)
56
+ if project is not None:
57
+ if len(project.split("/")) != 2:
58
+ raise ValueError("Invalid project identifier (too many slashes)")
59
+ resp = calkit.cloud.get(f"/projects/{project}/contents/{path}")
60
+ # If the response has a content key, that is a base64 encoded string
61
+ if (content := resp.get("content")) is not None:
62
+ # Load the content appropriately
63
+ content_bytes = base64.b64decode(content)
64
+ return load_from_fobj(io.BytesIO(content_bytes), path=path)
65
+ # If the response has a URL, we can fetch from that directly
66
+ elif (url := resp.get("url")) is not None:
67
+ return load_from_fobj(url, path=path)
68
+ else:
69
+ raise ValueError("No content or URL returned from API")
70
+ # Project is None, so let's just read a local file
71
+ return load_from_fobj(path, path)
@@ -1,60 +0,0 @@
1
- """Functionality for working with datasets.
2
-
3
- Since the dependencies here are optional, we need to ensure this isn't imported
4
- by default, or otherwise ensure ``import calkit`` works when the data
5
- dependencies are not installed.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- from typing import Literal, Union
11
-
12
- import pandas as pd
13
- import polars as pl
14
-
15
- import calkit.config
16
-
17
- DEFAULT_ENGINE = calkit.config.read().dataframe_engine
18
-
19
-
20
- def list_data():
21
- """Read the Calkit metadata file and list out our datasets."""
22
- pass
23
-
24
-
25
- def read_data(
26
- path: str, engine: Literal["pandas", "polars"] = DEFAULT_ENGINE
27
- ) -> Union[pd.DataFrame, pl.DataFrame]:
28
- """Read (tabular) data from dataset with path ``path`` and return a
29
- DataFrame.
30
-
31
- If the dataset doesn't exist locally, but is a DVC object, download it
32
- first.
33
-
34
- If the dataset path includes a user and project name, we add it to the
35
- project as an imported dataset, and therefore DVC import it?
36
-
37
- For example: someuser/someproject:data/somefile.parquet
38
-
39
- We can run a DVC import command if it needs to be imported. We will need to
40
- find the Git repo and path within it? Maybe we should require an explicit
41
- import of the data.
42
- """
43
- pass
44
-
45
-
46
- def write_data(
47
- data: Union[pd.DataFrame, pl.DataFrame],
48
- path: str,
49
- filename: str | None = None,
50
- commit=False,
51
- ):
52
- """Write ``data`` to the dataset with path ``path``.
53
-
54
- If the dataset path is a directory, the filename must be specified.
55
-
56
- If the path is not a Calkit dataset, it will be created.
57
-
58
- If ``commit`` is specified, create a commit for the dataset update.
59
- """
60
- pass
File without changes
File without changes