calkit-python 0.12.0__tar.gz → 0.13.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {calkit_python-0.12.0 → calkit_python-0.13.0}/PKG-INFO +1 -1
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/__init__.py +1 -1
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/core.py +73 -0
- calkit_python-0.13.0/calkit/datasets.py +71 -0
- calkit_python-0.12.0/calkit/data.py +0 -60
- {calkit_python-0.12.0 → calkit_python-0.13.0}/.github/FUNDING.yml +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/.github/workflows/publish-test.yml +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/.github/workflows/publish.yml +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/.gitignore +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/LICENSE +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/README.md +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/calc.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/check.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/__init__.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/check.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/config.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/core.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/import_.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/list.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/main.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/new.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/notebooks.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/office.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cli/update.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/cloud.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/conda.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/config.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/docker.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/dvc.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/git.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/gui.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/jupyter.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/magics.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/models.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/office.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/server.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/__init__.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/core.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/__init__.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/article/paper.tex +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/core.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/jfm.bst +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/jfm.cls +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/lineno-FLM.sty +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/paper.tex +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/templates/latex/jfm/upmath.sty +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/__init__.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/cli/__init__.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/cli/test_list.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/cli/test_main.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/cli/test_new.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_calc.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_check.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_conda.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_core.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_dvc.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_jupyter.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_magics.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/calkit/tests/test_templates.py +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/img/calkit-no-bg.png +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/adding-latex-pub-docker.md +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/conda-envs.md +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/img/run-proc.png +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/notebook-pipeline.md +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/docs/tutorials/procedures.md +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/pyproject.toml +0 -0
- {calkit_python-0.12.0 → calkit_python-0.13.0}/test/pipeline.ipynb +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import base64
|
|
5
6
|
import glob
|
|
6
7
|
import json
|
|
7
8
|
import logging
|
|
@@ -10,6 +11,8 @@ import pickle
|
|
|
10
11
|
import re
|
|
11
12
|
import subprocess
|
|
12
13
|
|
|
14
|
+
import requests
|
|
15
|
+
|
|
13
16
|
try:
|
|
14
17
|
from datetime import UTC
|
|
15
18
|
except ImportError:
|
|
@@ -276,3 +279,73 @@ def check_system_deps(wdir: str | None = None) -> None:
|
|
|
276
279
|
dep_name = re.split("[=<>]", dep)[0]
|
|
277
280
|
if not check_dep_exists(dep_name):
|
|
278
281
|
raise ValueError(f"{dep_name} not found")
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def project_and_path_from_path(path: str) -> tuple:
|
|
285
|
+
"""Split a path into project and path, respecting the ``CALKIT_PROJECT``
|
|
286
|
+
environmental variable if set.
|
|
287
|
+
|
|
288
|
+
For example, a path like
|
|
289
|
+
|
|
290
|
+
someone/some-project:some/path/to/file.png
|
|
291
|
+
|
|
292
|
+
will return
|
|
293
|
+
|
|
294
|
+
(someone/some-project, some/path/to/file.png)
|
|
295
|
+
"""
|
|
296
|
+
path_split = path.split(":")
|
|
297
|
+
if len(path_split) == 2:
|
|
298
|
+
project = path_split[0]
|
|
299
|
+
path = path_split[1]
|
|
300
|
+
elif len(path_split) == 1:
|
|
301
|
+
project = None
|
|
302
|
+
else:
|
|
303
|
+
raise ValueError("Path has too many colons in it")
|
|
304
|
+
if project is None:
|
|
305
|
+
project = os.getenv("CALKIT_PROJECT")
|
|
306
|
+
return project, path
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def read_file(path: str, as_bytes: bool = None) -> str | bytes:
|
|
310
|
+
"""Read file content from path, which can optionally include a project
|
|
311
|
+
identifier, which if specified will indicate we should read from the API.
|
|
312
|
+
"""
|
|
313
|
+
project, path = project_and_path_from_path(path)
|
|
314
|
+
if as_bytes is None:
|
|
315
|
+
_, ext = os.path.splitext(path)
|
|
316
|
+
as_bytes = ext in [
|
|
317
|
+
".png",
|
|
318
|
+
".jpg",
|
|
319
|
+
".gif",
|
|
320
|
+
".jpeg",
|
|
321
|
+
".pdf",
|
|
322
|
+
".xlsx",
|
|
323
|
+
".docx",
|
|
324
|
+
]
|
|
325
|
+
if project is not None:
|
|
326
|
+
import calkit.cloud
|
|
327
|
+
|
|
328
|
+
if len(project.split("/")) != 2:
|
|
329
|
+
raise ValueError("Invalid project identifier (too many slashes)")
|
|
330
|
+
resp = calkit.cloud.get(f"/projects/{project}/contents/{path}")
|
|
331
|
+
# If the response has a content key, that is a base64 encoded string
|
|
332
|
+
if (content := resp.get("content")) is not None:
|
|
333
|
+
# Load the content appropriately
|
|
334
|
+
content_bytes = base64.b64decode(content)
|
|
335
|
+
if as_bytes:
|
|
336
|
+
return content_bytes
|
|
337
|
+
else:
|
|
338
|
+
return content_bytes.decode()
|
|
339
|
+
# If the response has a URL, we can fetch from that directly
|
|
340
|
+
elif (url := resp.get("url")) is not None:
|
|
341
|
+
resp2 = requests.get(url)
|
|
342
|
+
resp2.raise_for_status()
|
|
343
|
+
if as_bytes:
|
|
344
|
+
return resp2.content
|
|
345
|
+
else:
|
|
346
|
+
return resp2.text
|
|
347
|
+
else:
|
|
348
|
+
raise ValueError("No content or URL returned from API")
|
|
349
|
+
# Project is None, so let's just read a local file
|
|
350
|
+
with open(path, mode="rb" if as_bytes else "r") as f:
|
|
351
|
+
return f.read()
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Functionality for working with datasets."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import io
|
|
7
|
+
from typing import Literal
|
|
8
|
+
|
|
9
|
+
import calkit
|
|
10
|
+
import calkit.config
|
|
11
|
+
|
|
12
|
+
DEFAULT_ENGINE = calkit.config.read().dataframe_engine
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _get_df_lib(engine: str):
|
|
16
|
+
if engine == "pandas":
|
|
17
|
+
import pandas
|
|
18
|
+
|
|
19
|
+
return pandas
|
|
20
|
+
elif engine == "polars":
|
|
21
|
+
import polars
|
|
22
|
+
|
|
23
|
+
return polars
|
|
24
|
+
else:
|
|
25
|
+
raise ValueError("Unknown engine")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def list_datasets() -> list[dict]:
|
|
29
|
+
"""Read the Calkit metadata file and list out our datasets."""
|
|
30
|
+
ck_info = calkit.load_calkit_info(as_pydantic=False, process_includes=True)
|
|
31
|
+
return ck_info.get("datasets", [])
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def read_dataset(
|
|
35
|
+
path: str,
|
|
36
|
+
engine: Literal["pandas", "polars"] = DEFAULT_ENGINE,
|
|
37
|
+
):
|
|
38
|
+
"""Read a dataset from a path.
|
|
39
|
+
|
|
40
|
+
Path can include the project owner/name like
|
|
41
|
+
|
|
42
|
+
someone/some-project:my-data-folder/data.csv
|
|
43
|
+
|
|
44
|
+
When a project is set via the ``CALKIT_PROJECT`` environmental variable,
|
|
45
|
+
we will use the API to fetch the data.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def load_from_fobj(fobj, path: str):
|
|
49
|
+
"""Read from a filelike object or path."""
|
|
50
|
+
if path.endswith(".csv"):
|
|
51
|
+
return _get_df_lib(engine).read_csv(fobj)
|
|
52
|
+
elif path.endswith(".parquet"):
|
|
53
|
+
return _get_df_lib(engine).read_parquet(fobj)
|
|
54
|
+
|
|
55
|
+
project, path = calkit.project_and_path_from_path(path)
|
|
56
|
+
if project is not None:
|
|
57
|
+
if len(project.split("/")) != 2:
|
|
58
|
+
raise ValueError("Invalid project identifier (too many slashes)")
|
|
59
|
+
resp = calkit.cloud.get(f"/projects/{project}/contents/{path}")
|
|
60
|
+
# If the response has a content key, that is a base64 encoded string
|
|
61
|
+
if (content := resp.get("content")) is not None:
|
|
62
|
+
# Load the content appropriately
|
|
63
|
+
content_bytes = base64.b64decode(content)
|
|
64
|
+
return load_from_fobj(io.BytesIO(content_bytes), path=path)
|
|
65
|
+
# If the response has a URL, we can fetch from that directly
|
|
66
|
+
elif (url := resp.get("url")) is not None:
|
|
67
|
+
return load_from_fobj(url, path=path)
|
|
68
|
+
else:
|
|
69
|
+
raise ValueError("No content or URL returned from API")
|
|
70
|
+
# Project is None, so let's just read a local file
|
|
71
|
+
return load_from_fobj(path, path)
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
"""Functionality for working with datasets.
|
|
2
|
-
|
|
3
|
-
Since the dependencies here are optional, we need to ensure this isn't imported
|
|
4
|
-
by default, or otherwise ensure ``import calkit`` works when the data
|
|
5
|
-
dependencies are not installed.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from __future__ import annotations
|
|
9
|
-
|
|
10
|
-
from typing import Literal, Union
|
|
11
|
-
|
|
12
|
-
import pandas as pd
|
|
13
|
-
import polars as pl
|
|
14
|
-
|
|
15
|
-
import calkit.config
|
|
16
|
-
|
|
17
|
-
DEFAULT_ENGINE = calkit.config.read().dataframe_engine
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def list_data():
|
|
21
|
-
"""Read the Calkit metadata file and list out our datasets."""
|
|
22
|
-
pass
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def read_data(
|
|
26
|
-
path: str, engine: Literal["pandas", "polars"] = DEFAULT_ENGINE
|
|
27
|
-
) -> Union[pd.DataFrame, pl.DataFrame]:
|
|
28
|
-
"""Read (tabular) data from dataset with path ``path`` and return a
|
|
29
|
-
DataFrame.
|
|
30
|
-
|
|
31
|
-
If the dataset doesn't exist locally, but is a DVC object, download it
|
|
32
|
-
first.
|
|
33
|
-
|
|
34
|
-
If the dataset path includes a user and project name, we add it to the
|
|
35
|
-
project as an imported dataset, and therefore DVC import it?
|
|
36
|
-
|
|
37
|
-
For example: someuser/someproject:data/somefile.parquet
|
|
38
|
-
|
|
39
|
-
We can run a DVC import command if it needs to be imported. We will need to
|
|
40
|
-
find the Git repo and path within it? Maybe we should require an explicit
|
|
41
|
-
import of the data.
|
|
42
|
-
"""
|
|
43
|
-
pass
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def write_data(
|
|
47
|
-
data: Union[pd.DataFrame, pl.DataFrame],
|
|
48
|
-
path: str,
|
|
49
|
-
filename: str | None = None,
|
|
50
|
-
commit=False,
|
|
51
|
-
):
|
|
52
|
-
"""Write ``data`` to the dataset with path ``path``.
|
|
53
|
-
|
|
54
|
-
If the dataset path is a directory, the filename must be specified.
|
|
55
|
-
|
|
56
|
-
If the path is not a Calkit dataset, it will be created.
|
|
57
|
-
|
|
58
|
-
If ``commit`` is specified, create a commit for the dataset update.
|
|
59
|
-
"""
|
|
60
|
-
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|