lamin_cli 1.1.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamin_cli-1.3.0/.github/workflows/build.yml +82 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/.gitignore +1 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/.pre-commit-config.yaml +2 -2
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/PKG-INFO +1 -1
- lamin_cli-1.3.0/lamin_cli/__init__.py +3 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/__main__.py +90 -32
- lamin_cli-1.3.0/lamin_cli/_load.py +192 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/_save.py +29 -2
- lamin_cli-1.3.0/lamin_cli/compute/__init__.py +0 -0
- lamin_cli-1.3.0/lamin_cli/compute/modal.py +175 -0
- lamin_cli-1.3.0/noxfile.py +13 -0
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_load.py +18 -1
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_multi_process.py +1 -1
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_save_files.py +2 -2
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_save_notebooks.py +4 -6
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_save_r_code.py +1 -1
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_save_scripts.py +1 -1
- lamin_cli-1.3.0/tests/modal/test_modal.py +19 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track-and-finish.py +0 -4
- lamin_cli-1.1.0/lamin_cli/__init__.py +0 -3
- lamin_cli-1.1.0/lamin_cli/_load.py +0 -169
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/.github/workflows/doc-changes.yml +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/LICENSE +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/README.md +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/_cache.py +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/_migration.py +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/_settings.py +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/pyproject.toml +0 -0
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/conftest.py +0 -0
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_cli.py +0 -0
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_migrate.py +0 -0
- {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_parse_uid_from_code.py +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/notebooks/not-initialized.ipynb +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/notebooks/with-title-and-initialized-consecutive.ipynb +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/notebooks/with-title-and-initialized-non-consecutive.ipynb +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/merely-import-lamindb.py +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track-and-finish-sync-git.py +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track-with-params.py +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track.R +0 -0
- {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track.qmd +0 -0
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
name: build
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [release]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
pre-filter:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
outputs:
|
|
12
|
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
with:
|
|
16
|
+
fetch-depth: 0
|
|
17
|
+
|
|
18
|
+
- uses: dorny/paths-filter@v3
|
|
19
|
+
id: changes
|
|
20
|
+
if: github.event_name != 'push'
|
|
21
|
+
with:
|
|
22
|
+
filters: |
|
|
23
|
+
modal:
|
|
24
|
+
- 'lamin_cli/compute/modal.py'
|
|
25
|
+
- 'tests/modal/**'
|
|
26
|
+
|
|
27
|
+
- id: set-matrix
|
|
28
|
+
shell: bash
|
|
29
|
+
run: |
|
|
30
|
+
BASE_GROUPS=$(jq -n -c '[]')
|
|
31
|
+
|
|
32
|
+
if [[ "${{ github.event_name }}" == "push" || "${{ steps.changes.outputs.modal }}" == "true" ]]; then
|
|
33
|
+
# Run everything on push or when modal paths change
|
|
34
|
+
MATRIX=$(jq -n -c --argjson groups "$BASE_GROUPS" '{group: ($groups + ["modal"])}')
|
|
35
|
+
else
|
|
36
|
+
# Otherwise only run base groups
|
|
37
|
+
MATRIX=$(jq -n -c --argjson groups "$BASE_GROUPS" '{group: $groups}')
|
|
38
|
+
fi
|
|
39
|
+
|
|
40
|
+
# Output as single line for GitHub Actions
|
|
41
|
+
echo "matrix=$(echo "$MATRIX" | jq -c .)" >> $GITHUB_OUTPUT
|
|
42
|
+
|
|
43
|
+
# Pretty print for debugging
|
|
44
|
+
echo "Generated matrix:"
|
|
45
|
+
echo "$MATRIX" | jq .
|
|
46
|
+
|
|
47
|
+
test:
|
|
48
|
+
needs: pre-filter
|
|
49
|
+
runs-on: ubuntu-latest
|
|
50
|
+
env:
|
|
51
|
+
LAMIN_API_KEY: ${{ secrets.LAMIN_API_KEY_TESTUSER1 }}
|
|
52
|
+
strategy:
|
|
53
|
+
fail-fast: false
|
|
54
|
+
matrix: ${{fromJson(needs.pre-filter.outputs.matrix)}}
|
|
55
|
+
timeout-minutes: 20
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/checkout@v4
|
|
58
|
+
with:
|
|
59
|
+
submodules: recursive
|
|
60
|
+
fetch-depth: 0
|
|
61
|
+
|
|
62
|
+
- uses: actions/setup-python@v5
|
|
63
|
+
with:
|
|
64
|
+
python-version: 3.12
|
|
65
|
+
|
|
66
|
+
- name: cache pre-commit
|
|
67
|
+
uses: actions/cache@v4
|
|
68
|
+
with:
|
|
69
|
+
path: ~/.cache/pre-commit
|
|
70
|
+
key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}
|
|
71
|
+
|
|
72
|
+
- run: pip install git+https://github.com/laminlabs/laminci
|
|
73
|
+
|
|
74
|
+
- run: uv pip install --system modal pytest
|
|
75
|
+
|
|
76
|
+
- run: modal token set --token-id ${{ secrets.MODAL_DEV_TOKEN_ID }} --token-secret ${{ secrets.MODAL_DEV_TOKEN_SECRET }}
|
|
77
|
+
|
|
78
|
+
- run: nox -s setup
|
|
79
|
+
|
|
80
|
+
- run: lamin login
|
|
81
|
+
|
|
82
|
+
- run: pytest tests/modal
|
|
@@ -16,8 +16,8 @@ repos:
|
|
|
16
16
|
- id: ruff
|
|
17
17
|
args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
|
|
18
18
|
- id: ruff-format
|
|
19
|
-
- repo: https://github.com/
|
|
20
|
-
rev:
|
|
19
|
+
- repo: https://github.com/rbubley/mirrors-prettier
|
|
20
|
+
rev: v3.5.1
|
|
21
21
|
hooks:
|
|
22
22
|
- id: prettier
|
|
23
23
|
- repo: https://github.com/kynan/nbstripout
|
|
@@ -9,6 +9,13 @@ from functools import wraps
|
|
|
9
9
|
from importlib.metadata import PackageNotFoundError, version
|
|
10
10
|
from typing import TYPE_CHECKING
|
|
11
11
|
|
|
12
|
+
from lamindb_setup._init_instance import (
|
|
13
|
+
DOC_DB,
|
|
14
|
+
DOC_INSTANCE_NAME,
|
|
15
|
+
DOC_MODULES,
|
|
16
|
+
DOC_STORAGE_ARG,
|
|
17
|
+
)
|
|
18
|
+
|
|
12
19
|
if TYPE_CHECKING:
|
|
13
20
|
from collections.abc import Mapping
|
|
14
21
|
|
|
@@ -41,12 +48,7 @@ else:
|
|
|
41
48
|
"lamin": [
|
|
42
49
|
{
|
|
43
50
|
"name": "Connect to an instance",
|
|
44
|
-
"commands": [
|
|
45
|
-
"connect",
|
|
46
|
-
"disconnect",
|
|
47
|
-
"info",
|
|
48
|
-
"init",
|
|
49
|
-
],
|
|
51
|
+
"commands": ["connect", "disconnect", "info", "init", "run"],
|
|
50
52
|
},
|
|
51
53
|
{
|
|
52
54
|
"name": "Read & write data",
|
|
@@ -155,24 +157,20 @@ def schema_to_modules_callback(ctx, param, value):
|
|
|
155
157
|
|
|
156
158
|
# fmt: off
|
|
157
159
|
@main.command()
|
|
158
|
-
@click.option("--storage", type=str,
|
|
159
|
-
@click.option("--
|
|
160
|
-
@click.option("--
|
|
161
|
-
@click.option("--
|
|
162
|
-
@click.option("--schema", type=str, default=None, help="[DEPRECATED] Use --modules instead.", callback=schema_to_modules_callback)
|
|
160
|
+
@click.option("--storage", type=str, default = ".", help=DOC_STORAGE_ARG)
|
|
161
|
+
@click.option("--name", type=str, default=None, help=DOC_INSTANCE_NAME)
|
|
162
|
+
@click.option("--db", type=str, default=None, help=DOC_DB)
|
|
163
|
+
@click.option("--modules", type=str, default=None, help=DOC_MODULES)
|
|
163
164
|
# fmt: on
|
|
164
165
|
def init(
|
|
165
166
|
storage: str,
|
|
167
|
+
name: str | None,
|
|
166
168
|
db: str | None,
|
|
167
169
|
modules: str | None,
|
|
168
|
-
name: str | None,
|
|
169
|
-
schema: str | None,
|
|
170
170
|
):
|
|
171
171
|
"""Init an instance."""
|
|
172
172
|
from lamindb_setup._init_instance import init as init_
|
|
173
173
|
|
|
174
|
-
modules = modules if modules is not None else schema
|
|
175
|
-
|
|
176
174
|
return init_(storage=storage, db=db, modules=modules, name=name)
|
|
177
175
|
|
|
178
176
|
|
|
@@ -188,6 +186,8 @@ def connect(instance: str):
|
|
|
188
186
|
`lamin connect` switches
|
|
189
187
|
{attr}`~lamindb.setup.core.SetupSettings.auto_connect` to `True` so that you
|
|
190
188
|
auto-connect in a Python session upon importing `lamindb`.
|
|
189
|
+
|
|
190
|
+
For manually connecting in a Python session, use {func}`~lamindb.connect`.
|
|
191
191
|
"""
|
|
192
192
|
from lamindb_setup import connect as connect_
|
|
193
193
|
from lamindb_setup import settings as settings_
|
|
@@ -279,45 +279,103 @@ def load(entity: str, uid: str | None = None, key: str | None = None, with_env:
|
|
|
279
279
|
@click.argument("entity", type=str)
|
|
280
280
|
@click.option("--uid", help="The uid for the entity.")
|
|
281
281
|
@click.option("--key", help="The key for the entity.")
|
|
282
|
-
|
|
283
|
-
"--with-env", is_flag=True, help="Also return the environment for a tranform."
|
|
284
|
-
)
|
|
285
|
-
def get(entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False):
|
|
282
|
+
def get(entity: str, uid: str | None = None, key: str | None = None):
|
|
286
283
|
"""Query metadata about an entity.
|
|
287
284
|
|
|
288
|
-
Currently only works for artifact
|
|
285
|
+
Currently only works for artifact.
|
|
289
286
|
"""
|
|
290
|
-
|
|
287
|
+
import lamindb_setup as ln_setup
|
|
288
|
+
|
|
289
|
+
from ._load import decompose_url
|
|
290
|
+
|
|
291
|
+
if entity.startswith("https://") and "lamin" in entity:
|
|
292
|
+
url = entity
|
|
293
|
+
instance, entity, uid = decompose_url(url)
|
|
294
|
+
elif entity not in {"artifact"}:
|
|
295
|
+
raise SystemExit("Entity has to be a laminhub URL or 'artifact'")
|
|
296
|
+
else:
|
|
297
|
+
instance = ln_setup.settings.instance.slug
|
|
298
|
+
|
|
299
|
+
ln_setup.connect(instance)
|
|
300
|
+
import lamindb as ln
|
|
291
301
|
|
|
292
|
-
|
|
293
|
-
|
|
302
|
+
if uid is not None:
|
|
303
|
+
artifact = ln.Artifact.get(uid)
|
|
304
|
+
else:
|
|
305
|
+
artifact = ln.Artifact.get(key=key)
|
|
306
|
+
artifact.describe()
|
|
294
307
|
|
|
295
308
|
|
|
296
309
|
@main.command()
|
|
297
|
-
@click.argument("
|
|
310
|
+
@click.argument("path", type=click.Path(exists=True, dir_okay=True, file_okay=True))
|
|
298
311
|
@click.option("--key", type=str, default=None)
|
|
299
312
|
@click.option("--description", type=str, default=None)
|
|
313
|
+
@click.option("--stem-uid", type=str, default=None)
|
|
300
314
|
@click.option("--registry", type=str, default=None)
|
|
301
|
-
def save(
|
|
315
|
+
def save(path: str, key: str, description: str, stem_uid: str, registry: str):
|
|
302
316
|
"""Save a file or folder.
|
|
303
317
|
|
|
304
|
-
Defaults to saving `.py
|
|
305
|
-
other file types and folders as {class}`~lamindb.Artifact`.
|
|
306
|
-
|
|
307
|
-
You can save a `.py` or `.ipynb` file as an {class}`~lamindb.Artifact` by
|
|
308
|
-
passing `--registry artifact`.
|
|
318
|
+
Defaults to saving `.py`, `.ipynb`, `.R`, `.Rmd`, and `.qmd` as {class}`~lamindb.Transform` and
|
|
319
|
+
other file types and folders as {class}`~lamindb.Artifact`. You can save a `.py` or `.ipynb` file as
|
|
320
|
+
an {class}`~lamindb.Artifact` by passing `--registry artifact`.
|
|
309
321
|
"""
|
|
310
322
|
from lamin_cli._save import save_from_filepath_cli
|
|
311
323
|
|
|
312
|
-
if save_from_filepath_cli(
|
|
324
|
+
if save_from_filepath_cli(path, key, description, stem_uid, registry) is not None:
|
|
313
325
|
sys.exit(1)
|
|
314
326
|
|
|
315
327
|
|
|
328
|
+
@main.command()
|
|
329
|
+
@click.argument("filepath", type=str)
|
|
330
|
+
@click.option("--project", type=str, default=None, help="A valid project name or uid. When running on Modal, creates an app with the same name.", required=True)
|
|
331
|
+
@click.option("--image-url", type=str, default=None, help="A URL to the base docker image to use.")
|
|
332
|
+
@click.option("--packages", type=str, default="lamindb", help="A comma-separated list of additional packages to install.")
|
|
333
|
+
@click.option("--cpu", type=float, default=None, help="Configuration for the CPU.")
|
|
334
|
+
@click.option("--gpu", type=str, default=None, help="The type of GPU to use (only compatible with cuda images).")
|
|
335
|
+
def run(filepath: str, project: str, image_url: str, packages: str, cpu: int, gpu: str | None):
|
|
336
|
+
"""Run a compute job in the cloud.
|
|
337
|
+
|
|
338
|
+
This is an EXPERIMENTAL feature that enables to run a script on Modal.
|
|
339
|
+
|
|
340
|
+
Example: Given a valid project name "my_project".
|
|
341
|
+
|
|
342
|
+
```
|
|
343
|
+
lamin run my_script.py --project my_project
|
|
344
|
+
```
|
|
345
|
+
"""
|
|
346
|
+
import shutil
|
|
347
|
+
from pathlib import Path
|
|
348
|
+
|
|
349
|
+
from lamin_cli.compute.modal import Runner
|
|
350
|
+
|
|
351
|
+
default_mount_dir = Path('./modal_mount_dir')
|
|
352
|
+
if not default_mount_dir.is_dir():
|
|
353
|
+
default_mount_dir.mkdir(parents=True, exist_ok=True)
|
|
354
|
+
|
|
355
|
+
shutil.copy(filepath, default_mount_dir)
|
|
356
|
+
|
|
357
|
+
filepath_in_mount_dir = Path(default_mount_dir) / Path(filepath).name
|
|
358
|
+
|
|
359
|
+
package_list = []
|
|
360
|
+
if packages:
|
|
361
|
+
package_list = [package.strip() for package in packages.split(',')]
|
|
362
|
+
|
|
363
|
+
runner = Runner(
|
|
364
|
+
local_mount_dir=default_mount_dir,
|
|
365
|
+
app_name=project,
|
|
366
|
+
packages=package_list,
|
|
367
|
+
image_url=image_url,
|
|
368
|
+
cpu=cpu,
|
|
369
|
+
gpu=gpu
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
runner.run(filepath_in_mount_dir)
|
|
373
|
+
|
|
374
|
+
|
|
316
375
|
main.add_command(settings)
|
|
317
376
|
main.add_command(cache)
|
|
318
377
|
main.add_command(migrate)
|
|
319
378
|
|
|
320
|
-
|
|
321
379
|
# https://stackoverflow.com/questions/57810659/automatically-generate-all-help-documentation-for-click-commands
|
|
322
380
|
# https://claude.ai/chat/73c28487-bec3-4073-8110-50d1a2dd6b84
|
|
323
381
|
def _generate_help():
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from lamin_utils import logger
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def decompose_url(url: str) -> tuple[str, str, str]:
|
|
11
|
+
assert any(keyword in url for keyword in ["transform", "artifact", "collection"])
|
|
12
|
+
for entity in ["transform", "artifact", "collection"]:
|
|
13
|
+
if entity in url:
|
|
14
|
+
break
|
|
15
|
+
uid = url.split(f"{entity}/")[1]
|
|
16
|
+
instance_slug = "/".join(url.split("/")[3:5])
|
|
17
|
+
return instance_slug, entity, uid
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def load(
|
|
21
|
+
entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False
|
|
22
|
+
):
|
|
23
|
+
import lamindb_setup as ln_setup
|
|
24
|
+
|
|
25
|
+
if entity.startswith("https://") and "lamin" in entity:
|
|
26
|
+
url = entity
|
|
27
|
+
instance, entity, uid = decompose_url(url)
|
|
28
|
+
elif entity not in {"artifact", "transform", "collection"}:
|
|
29
|
+
raise SystemExit(
|
|
30
|
+
"Entity has to be a laminhub URL or 'artifact', 'collection', or 'transform'"
|
|
31
|
+
)
|
|
32
|
+
else:
|
|
33
|
+
instance = ln_setup.settings.instance.slug
|
|
34
|
+
|
|
35
|
+
ln_setup.connect(instance)
|
|
36
|
+
import lamindb as ln
|
|
37
|
+
|
|
38
|
+
def script_to_notebook(
|
|
39
|
+
transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
|
|
40
|
+
) -> None:
|
|
41
|
+
import jupytext
|
|
42
|
+
from lamin_utils._base62 import increment_base62
|
|
43
|
+
|
|
44
|
+
if notebook_path.suffix == ".ipynb":
|
|
45
|
+
# below is backward compat
|
|
46
|
+
if "# # transform.name" in transform.source_code:
|
|
47
|
+
new_content = transform.source_code.replace(
|
|
48
|
+
"# # transform.name", f"# # {transform.description}"
|
|
49
|
+
)
|
|
50
|
+
elif transform.source_code.startswith("# %% [markdown]"):
|
|
51
|
+
source_code_split = transform.source_code.split("\n")
|
|
52
|
+
if source_code_split[1] == "#":
|
|
53
|
+
source_code_split[1] = f"# # {transform.description}"
|
|
54
|
+
new_content = "\n".join(source_code_split)
|
|
55
|
+
else:
|
|
56
|
+
new_content = transform.source_code
|
|
57
|
+
else: # R notebook
|
|
58
|
+
# Pattern to match title only within YAML header section
|
|
59
|
+
title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
|
|
60
|
+
title_match = re.search(
|
|
61
|
+
title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
|
|
62
|
+
)
|
|
63
|
+
new_content = transform.source_code
|
|
64
|
+
if title_match:
|
|
65
|
+
current_title = title_match.group(1)
|
|
66
|
+
if current_title != transform.description:
|
|
67
|
+
pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
|
|
68
|
+
replacement = f'\\1"{transform.description}"\\3'
|
|
69
|
+
new_content = re.sub(
|
|
70
|
+
pattern,
|
|
71
|
+
replacement,
|
|
72
|
+
new_content,
|
|
73
|
+
flags=re.DOTALL | re.MULTILINE,
|
|
74
|
+
)
|
|
75
|
+
logger.important(
|
|
76
|
+
f"updated title to match description: {current_title} →"
|
|
77
|
+
f" {transform.description}"
|
|
78
|
+
)
|
|
79
|
+
if bump_revision:
|
|
80
|
+
uid = transform.uid
|
|
81
|
+
if uid in new_content:
|
|
82
|
+
new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
|
|
83
|
+
new_content = new_content.replace(uid, new_uid)
|
|
84
|
+
logger.important(f"updated uid: {uid} → {new_uid}")
|
|
85
|
+
if notebook_path.suffix == ".ipynb":
|
|
86
|
+
notebook = jupytext.reads(new_content, fmt="py:percent")
|
|
87
|
+
jupytext.write(notebook, notebook_path)
|
|
88
|
+
else:
|
|
89
|
+
notebook_path.write_text(new_content)
|
|
90
|
+
|
|
91
|
+
query_by_uid = uid is not None
|
|
92
|
+
|
|
93
|
+
match entity:
|
|
94
|
+
case "transform":
|
|
95
|
+
if query_by_uid:
|
|
96
|
+
# we don't use .get here because DoesNotExist is hard to catch
|
|
97
|
+
# due to private django API
|
|
98
|
+
# here full uid is not expected anymore as before
|
|
99
|
+
# via ln.Transform.objects.get(uid=uid)
|
|
100
|
+
transforms = ln.Transform.objects.filter(uid__startswith=uid)
|
|
101
|
+
else:
|
|
102
|
+
# if below, we take is_latest=True as the criterion, we might get draft notebooks
|
|
103
|
+
# hence, we use source_code__isnull=False and order by created_at instead
|
|
104
|
+
transforms = ln.Transform.objects.filter(
|
|
105
|
+
key=key, source_code__isnull=False
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if (n_transforms := len(transforms)) == 0:
|
|
109
|
+
err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
|
|
110
|
+
raise SystemExit(f"Transform with {err_msg} does not exist.")
|
|
111
|
+
|
|
112
|
+
if n_transforms > 1:
|
|
113
|
+
transforms = transforms.order_by("-created_at")
|
|
114
|
+
transform = transforms.first()
|
|
115
|
+
|
|
116
|
+
target_relpath = Path(transform.key)
|
|
117
|
+
if len(target_relpath.parents) > 1:
|
|
118
|
+
logger.important(
|
|
119
|
+
"preserve the folder structure for versioning:"
|
|
120
|
+
f" {target_relpath.parent}/"
|
|
121
|
+
)
|
|
122
|
+
target_relpath.parent.mkdir(parents=True, exist_ok=True)
|
|
123
|
+
if target_relpath.exists():
|
|
124
|
+
response = input(f"! {target_relpath} exists: replace? (y/n)")
|
|
125
|
+
if response != "y":
|
|
126
|
+
raise SystemExit("Aborted.")
|
|
127
|
+
|
|
128
|
+
if transform.source_code is not None:
|
|
129
|
+
if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
|
|
130
|
+
script_to_notebook(transform, target_relpath, bump_revision=True)
|
|
131
|
+
else:
|
|
132
|
+
target_relpath.write_text(transform.source_code)
|
|
133
|
+
else:
|
|
134
|
+
raise SystemExit("No source code available for this transform.")
|
|
135
|
+
|
|
136
|
+
logger.important(f"{transform.type} is here: {target_relpath}")
|
|
137
|
+
|
|
138
|
+
if with_env:
|
|
139
|
+
ln.settings.track_run_inputs = False
|
|
140
|
+
if (
|
|
141
|
+
transform.latest_run is not None
|
|
142
|
+
and transform.latest_run.environment is not None
|
|
143
|
+
):
|
|
144
|
+
filepath_env_cache = transform.latest_run.environment.cache()
|
|
145
|
+
target_env_filename = (
|
|
146
|
+
target_relpath.parent
|
|
147
|
+
/ f"{target_relpath.stem}__requirements.txt"
|
|
148
|
+
)
|
|
149
|
+
shutil.move(filepath_env_cache, target_env_filename)
|
|
150
|
+
logger.important(f"environment is here: {target_env_filename}")
|
|
151
|
+
else:
|
|
152
|
+
logger.warning(
|
|
153
|
+
"latest transform run with environment doesn't exist"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return target_relpath
|
|
157
|
+
case "artifact" | "collection":
|
|
158
|
+
ln.settings.track_run_inputs = False
|
|
159
|
+
|
|
160
|
+
EntityClass = ln.Artifact if entity == "artifact" else ln.Collection
|
|
161
|
+
|
|
162
|
+
# we don't use .get here because DoesNotExist is hard to catch
|
|
163
|
+
# due to private django API
|
|
164
|
+
if query_by_uid:
|
|
165
|
+
entities = EntityClass.filter(uid__startswith=uid)
|
|
166
|
+
else:
|
|
167
|
+
entities = EntityClass.filter(key=key)
|
|
168
|
+
|
|
169
|
+
if (n_entities := len(entities)) == 0:
|
|
170
|
+
err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
|
|
171
|
+
raise SystemExit(
|
|
172
|
+
f"{entity.capitalize()} with {err_msg} does not exist."
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if n_entities > 1:
|
|
176
|
+
entities = entities.order_by("-created_at")
|
|
177
|
+
|
|
178
|
+
entity_obj = entities.first()
|
|
179
|
+
cache_path = entity_obj.cache()
|
|
180
|
+
|
|
181
|
+
# collection gives us a list of paths
|
|
182
|
+
if isinstance(cache_path, list):
|
|
183
|
+
logger.important(f"{entity} paths ({len(cache_path)} files):")
|
|
184
|
+
for i, path in enumerate(cache_path):
|
|
185
|
+
if i < 5 or i >= len(cache_path) - 5:
|
|
186
|
+
logger.important(f" [{i + 1}/{len(cache_path)}] {path}")
|
|
187
|
+
elif i == 5:
|
|
188
|
+
logger.important(f" ... {len(cache_path) - 10} more files ...")
|
|
189
|
+
else:
|
|
190
|
+
logger.important(f"{entity} is here: {cache_path}")
|
|
191
|
+
case _:
|
|
192
|
+
raise AssertionError(f"unknown entity {entity}")
|
|
@@ -46,6 +46,7 @@ def save_from_filepath_cli(
|
|
|
46
46
|
filepath: str | Path,
|
|
47
47
|
key: str | None,
|
|
48
48
|
description: str | None,
|
|
49
|
+
stem_uid: str | None,
|
|
49
50
|
registry: str | None,
|
|
50
51
|
) -> str | None:
|
|
51
52
|
import lamindb_setup as ln_setup
|
|
@@ -99,10 +100,21 @@ def save_from_filepath_cli(
|
|
|
99
100
|
|
|
100
101
|
if registry == "artifact":
|
|
101
102
|
ln.settings.creation.artifact_silence_missing_run_warning = True
|
|
102
|
-
|
|
103
|
+
revises = None
|
|
104
|
+
if stem_uid is not None:
|
|
105
|
+
revises = (
|
|
106
|
+
ln.Artifact.filter(uid__startswith=stem_uid)
|
|
107
|
+
.order_by("-created_at")
|
|
108
|
+
.first()
|
|
109
|
+
)
|
|
110
|
+
if revises is None:
|
|
111
|
+
raise ln.errors.InvalidArgument("The stem uid is not found.")
|
|
112
|
+
elif key is None and description is None:
|
|
103
113
|
logger.error("Please pass a key or description via --key or --description")
|
|
104
114
|
return "missing-key-or-description"
|
|
105
|
-
artifact = ln.Artifact(
|
|
115
|
+
artifact = ln.Artifact(
|
|
116
|
+
filepath, key=key, description=description, revises=revises
|
|
117
|
+
).save()
|
|
106
118
|
logger.important(f"saved: {artifact}")
|
|
107
119
|
logger.important(f"storage path: {artifact.path}")
|
|
108
120
|
if ln_setup.settings.storage.type == "s3":
|
|
@@ -125,6 +137,15 @@ def save_from_filepath_cli(
|
|
|
125
137
|
)
|
|
126
138
|
return "not-tracked-in-transform-registry"
|
|
127
139
|
else:
|
|
140
|
+
revises = None
|
|
141
|
+
if stem_uid is not None:
|
|
142
|
+
revises = (
|
|
143
|
+
ln.Transform.filter(uid__startswith=stem_uid)
|
|
144
|
+
.order_by("-created_at")
|
|
145
|
+
.first()
|
|
146
|
+
)
|
|
147
|
+
if revises is None:
|
|
148
|
+
raise ln.errors.InvalidArgument("The stem uid is not found.")
|
|
128
149
|
# TODO: build in the logic that queries for relative file paths
|
|
129
150
|
# we have in Context; add tests for multiple versions
|
|
130
151
|
transform = ln.Transform.filter(
|
|
@@ -135,6 +156,7 @@ def save_from_filepath_cli(
|
|
|
135
156
|
description=filepath.name,
|
|
136
157
|
key=filepath.name,
|
|
137
158
|
type="script" if filepath.suffix in {".R", ".py"} else "notebook",
|
|
159
|
+
revises=revises,
|
|
138
160
|
).save()
|
|
139
161
|
logger.important(f"created Transform('{transform.uid}')")
|
|
140
162
|
# latest run of this transform by user
|
|
@@ -147,6 +169,11 @@ def save_from_filepath_cli(
|
|
|
147
169
|
)
|
|
148
170
|
if response != "y":
|
|
149
171
|
return "aborted-save-notebook-created-by-different-user"
|
|
172
|
+
if run is None and transform.key.endswith(".ipynb"):
|
|
173
|
+
run = ln.Run(transform=transform).save()
|
|
174
|
+
logger.important(
|
|
175
|
+
f"found no run, creating Run('{run.uid}') to display the html"
|
|
176
|
+
)
|
|
150
177
|
return_code = save_context_core(
|
|
151
178
|
run=run,
|
|
152
179
|
transform=transform,
|
|
File without changes
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
import threading
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import lamindb_setup as ln_setup
|
|
8
|
+
import modal
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def run_script(path: Path) -> dict:
|
|
12
|
+
"""Takes a path to a script for running it as a function through Modal."""
|
|
13
|
+
result = {"success": False, "output": "", "error": ""}
|
|
14
|
+
|
|
15
|
+
def stream_output(stream, capture_list):
|
|
16
|
+
"""Read from stream line by line and print in real-time while also capturing to a list."""
|
|
17
|
+
for line in iter(stream.readline, ""):
|
|
18
|
+
print(line, end="") # Print in real-time
|
|
19
|
+
capture_list.append(line)
|
|
20
|
+
stream.close()
|
|
21
|
+
|
|
22
|
+
if not path.exists():
|
|
23
|
+
raise FileNotFoundError(f"Script file not found: {path}")
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
# Run the script using subprocess
|
|
27
|
+
process = subprocess.Popen(
|
|
28
|
+
[sys.executable, path.as_posix()],
|
|
29
|
+
stdout=subprocess.PIPE,
|
|
30
|
+
stderr=subprocess.PIPE,
|
|
31
|
+
text=True,
|
|
32
|
+
bufsize=1, # Line buffered
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Capture output and error while streaming stdout in real-time
|
|
36
|
+
stdout_lines: list[str] = []
|
|
37
|
+
stderr_lines: list[str] = []
|
|
38
|
+
|
|
39
|
+
# Create threads to handle stdout and stderr streams
|
|
40
|
+
stdout_thread = threading.Thread(
|
|
41
|
+
target=stream_output, args=(process.stdout, stdout_lines)
|
|
42
|
+
)
|
|
43
|
+
stderr_thread = threading.Thread(
|
|
44
|
+
target=stream_output, args=(process.stderr, stderr_lines)
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Set as daemon threads so they exit when the main program exits
|
|
48
|
+
stdout_thread.daemon = True
|
|
49
|
+
stderr_thread.daemon = True
|
|
50
|
+
|
|
51
|
+
# Start the threads
|
|
52
|
+
stdout_thread.start()
|
|
53
|
+
stderr_thread.start()
|
|
54
|
+
|
|
55
|
+
# Wait for the process to complete
|
|
56
|
+
return_code = process.wait()
|
|
57
|
+
|
|
58
|
+
# Wait for the threads to finish
|
|
59
|
+
stdout_thread.join()
|
|
60
|
+
stderr_thread.join()
|
|
61
|
+
|
|
62
|
+
# Join the captured output
|
|
63
|
+
stdout_output = "".join(stdout_lines)
|
|
64
|
+
stderr_output = "".join(stderr_lines)
|
|
65
|
+
|
|
66
|
+
# Check return code
|
|
67
|
+
if return_code == 0:
|
|
68
|
+
result["success"] = True
|
|
69
|
+
result["output"] = stdout_output
|
|
70
|
+
else:
|
|
71
|
+
result["error"] = stderr_output
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
import traceback
|
|
75
|
+
|
|
76
|
+
result["error"] = str(e) + "\n" + traceback.format_exc()
|
|
77
|
+
return result
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class Runner:
|
|
81
|
+
def __init__(
|
|
82
|
+
self,
|
|
83
|
+
app_name: str,
|
|
84
|
+
local_mount_dir: str | Path = "./scripts",
|
|
85
|
+
remote_mount_dir: str | Path = "/scripts",
|
|
86
|
+
image_url: str | None = None,
|
|
87
|
+
packages: list[str] | None = None,
|
|
88
|
+
cpu: float | None = None,
|
|
89
|
+
gpu: str | None = None,
|
|
90
|
+
):
|
|
91
|
+
self.app_name = app_name # we use the LaminDB project name as the app name
|
|
92
|
+
self.app = self.create_modal_app(app_name)
|
|
93
|
+
|
|
94
|
+
self.local_mount_dir = local_mount_dir
|
|
95
|
+
self.remote_mount_dir = remote_mount_dir
|
|
96
|
+
|
|
97
|
+
self.image = self.create_modal_image(
|
|
98
|
+
local_dir=local_mount_dir, packages=packages, image_url=image_url
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
local_secrets = self._configure_local_secrets()
|
|
102
|
+
|
|
103
|
+
self.modal_function = self.app.function(
|
|
104
|
+
image=self.image, cpu=cpu, gpu=gpu, secrets=[local_secrets]
|
|
105
|
+
)(run_script)
|
|
106
|
+
|
|
107
|
+
def run(self, script_local_path: Path) -> None:
|
|
108
|
+
script_remote_path = self.local_to_remote_path(str(script_local_path))
|
|
109
|
+
with modal.enable_output(show_progress=True): # Prints out modal logs
|
|
110
|
+
with self.app.run():
|
|
111
|
+
self.modal_function.remote(Path(script_remote_path))
|
|
112
|
+
|
|
113
|
+
def create_modal_app(self, app_name: str) -> modal.App:
|
|
114
|
+
app = modal.App(app_name)
|
|
115
|
+
return app
|
|
116
|
+
|
|
117
|
+
def local_to_remote_path(self, local_path: str | Path) -> str:
|
|
118
|
+
local_path = Path(local_path).absolute()
|
|
119
|
+
local_mount_dir = Path(self.local_mount_dir).absolute()
|
|
120
|
+
remote_mount_dir = Path(self.remote_mount_dir)
|
|
121
|
+
|
|
122
|
+
# Check if local_path is inside local_mount_dir
|
|
123
|
+
try:
|
|
124
|
+
# This will raise ValueError if local_path is not relative to local_mount_dir
|
|
125
|
+
relative_path = local_path.relative_to(local_mount_dir)
|
|
126
|
+
except ValueError as err:
|
|
127
|
+
raise ValueError(
|
|
128
|
+
f"Local path '{local_path}' is not inside the mount directory '{local_mount_dir}'"
|
|
129
|
+
) from err
|
|
130
|
+
|
|
131
|
+
# Join remote_mount_dir with the relative path
|
|
132
|
+
remote_path = remote_mount_dir / relative_path
|
|
133
|
+
|
|
134
|
+
# Return as string with normalized separators
|
|
135
|
+
return remote_path.as_posix()
|
|
136
|
+
|
|
137
|
+
def _configure_local_secrets(self) -> dict:
|
|
138
|
+
if ln_setup.settings.user.api_key is None:
|
|
139
|
+
raise ValueError("Please authenticate via: lamin login")
|
|
140
|
+
|
|
141
|
+
all_env_variables = {
|
|
142
|
+
"LAMIN_API_KEY": ln_setup.settings.user.api_key,
|
|
143
|
+
"LAMIN_CURRENT_PROJECT": self.app_name,
|
|
144
|
+
"LAMIN_CURRENT_INSTANCE": ln_setup.settings.instance.slug,
|
|
145
|
+
}
|
|
146
|
+
local_secrets = modal.Secret.from_dict(all_env_variables)
|
|
147
|
+
return local_secrets
|
|
148
|
+
|
|
149
|
+
def create_modal_image(
|
|
150
|
+
self,
|
|
151
|
+
python_version: str = "3.12",
|
|
152
|
+
packages: list[str] | None = None,
|
|
153
|
+
local_dir: str | Path = "./scripts",
|
|
154
|
+
remote_dir: str = "/scripts/",
|
|
155
|
+
image_url: str | None = None,
|
|
156
|
+
env_variables: dict | None = None,
|
|
157
|
+
) -> modal.Image:
|
|
158
|
+
if env_variables is None:
|
|
159
|
+
env_variables = {}
|
|
160
|
+
if packages is None:
|
|
161
|
+
packages = ["lamindb"]
|
|
162
|
+
else:
|
|
163
|
+
packages.append("lamindb") # Append lamindb to the list of packages
|
|
164
|
+
|
|
165
|
+
if image_url is None:
|
|
166
|
+
image = modal.Image.debian_slim(python_version=python_version)
|
|
167
|
+
else:
|
|
168
|
+
image = modal.Image.from_registry(image_url, add_python=python_version)
|
|
169
|
+
return (
|
|
170
|
+
image.pip_install(packages)
|
|
171
|
+
.env(env_variables)
|
|
172
|
+
.add_local_python_source("lamindb", "lamindb_setup", copy=True)
|
|
173
|
+
.run_commands("lamin settings set auto-connect true")
|
|
174
|
+
.add_local_dir(local_dir, remote_dir)
|
|
175
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import nox
|
|
4
|
+
from laminci.nox import install_lamindb
|
|
5
|
+
|
|
6
|
+
IS_PR = os.getenv("GITHUB_EVENT_NAME") != "push"
|
|
7
|
+
nox.options.default_venv_backend = "none"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@nox.session
|
|
11
|
+
def setup(session):
|
|
12
|
+
branch = "main" if IS_PR else "release"
|
|
13
|
+
install_lamindb(session, branch=branch)
|
|
@@ -60,7 +60,15 @@ def test_load_transform():
|
|
|
60
60
|
path2.unlink()
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
def
|
|
63
|
+
def test_get_load_artifact():
|
|
64
|
+
result = subprocess.run(
|
|
65
|
+
"lamin get"
|
|
66
|
+
" 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
|
|
67
|
+
shell=True,
|
|
68
|
+
capture_output=True,
|
|
69
|
+
)
|
|
70
|
+
assert result.returncode == 0
|
|
71
|
+
|
|
64
72
|
result = subprocess.run(
|
|
65
73
|
"lamin load"
|
|
66
74
|
" 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
|
|
@@ -76,3 +84,12 @@ def test_load_artifact():
|
|
|
76
84
|
capture_output=True,
|
|
77
85
|
)
|
|
78
86
|
assert result.returncode == 0
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_load_collection():
|
|
90
|
+
result = subprocess.run(
|
|
91
|
+
"lamin load 'https://lamin.ai/laminlabs/lamindata/collection/2wUs6V1OuGzp5Ll4'",
|
|
92
|
+
shell=True,
|
|
93
|
+
capture_output=True,
|
|
94
|
+
)
|
|
95
|
+
assert result.returncode == 0
|
|
@@ -3,7 +3,7 @@ from pathlib import Path
|
|
|
3
3
|
|
|
4
4
|
import lamindb_setup as ln_setup
|
|
5
5
|
|
|
6
|
-
test_file = Path(__file__).parent.parent.resolve() / ".gitignore"
|
|
6
|
+
test_file = Path(__file__).parent.parent.parent.resolve() / ".gitignore"
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def test_save_file():
|
|
@@ -44,7 +44,7 @@ def test_save_file():
|
|
|
44
44
|
)
|
|
45
45
|
print(result.stdout.decode())
|
|
46
46
|
print(result.stderr.decode())
|
|
47
|
-
assert "
|
|
47
|
+
assert "returning existing artifact with same hash" in result.stdout.decode()
|
|
48
48
|
assert "key='mytest'" in result.stdout.decode()
|
|
49
49
|
assert "storage path:" in result.stdout.decode()
|
|
50
50
|
assert result.returncode == 0
|
|
@@ -184,9 +184,7 @@ print("my consecutive cell")
|
|
|
184
184
|
new_path = notebook_path.with_name("new_name.ipynb")
|
|
185
185
|
os.system(f"cp {notebook_path} {new_path}")
|
|
186
186
|
|
|
187
|
-
# upon re-running it, the
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
print(error.exconly())
|
|
192
|
-
assert "clashes with the existing key" in error.exconly()
|
|
187
|
+
# upon re-running it, the notebook name is updated
|
|
188
|
+
nbproject_test.execute_notebooks(new_path, print_outputs=True)
|
|
189
|
+
transform = ln.Transform.get("hlsFXswrJjtt0001")
|
|
190
|
+
assert "new_name.ipynb" in transform.key
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
scripts_dir = Path(__file__).parent.parent.resolve() / "scripts"
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_run_on_modal():
|
|
8
|
+
filepath = scripts_dir / "run-track-and-finish.py"
|
|
9
|
+
|
|
10
|
+
subprocess.run("lamin connect laminlabs/lamindata", shell=True, check=True)
|
|
11
|
+
result = subprocess.run(
|
|
12
|
+
f"lamin run {filepath} --project 1QLbS6N7wwiL",
|
|
13
|
+
shell=True,
|
|
14
|
+
capture_output=True,
|
|
15
|
+
)
|
|
16
|
+
print(result.stdout.decode())
|
|
17
|
+
assert result.returncode == 0
|
|
18
|
+
assert "hello!" in result.stdout.decode()
|
|
19
|
+
assert "finished Run" in result.stdout.decode()
|
|
@@ -1,169 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import re
|
|
4
|
-
import shutil
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from lamin_utils import logger
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def decompose_url(url: str) -> tuple[str, str, str]:
|
|
11
|
-
assert "transform" in url or "artifact" in url
|
|
12
|
-
for entity in ["transform", "artifact"]:
|
|
13
|
-
if entity in url:
|
|
14
|
-
break
|
|
15
|
-
uid = url.split(f"{entity}/")[1]
|
|
16
|
-
instance_slug = "/".join(url.split("/")[3:5])
|
|
17
|
-
return instance_slug, entity, uid
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def load(
|
|
21
|
-
entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False
|
|
22
|
-
):
|
|
23
|
-
import lamindb_setup as ln_setup
|
|
24
|
-
|
|
25
|
-
if entity.startswith("https://") and "lamin" in entity:
|
|
26
|
-
url = entity
|
|
27
|
-
instance, entity, uid = decompose_url(url)
|
|
28
|
-
elif entity not in {"artifact", "transform"}:
|
|
29
|
-
raise SystemExit("Entity has to be a laminhub URL or 'artifact' or 'transform'")
|
|
30
|
-
else:
|
|
31
|
-
instance = ln_setup.settings.instance.slug
|
|
32
|
-
|
|
33
|
-
ln_setup.connect(instance)
|
|
34
|
-
import lamindb as ln
|
|
35
|
-
|
|
36
|
-
def script_to_notebook(
|
|
37
|
-
transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
|
|
38
|
-
) -> None:
|
|
39
|
-
import jupytext
|
|
40
|
-
from lamin_utils._base62 import increment_base62
|
|
41
|
-
|
|
42
|
-
if notebook_path.suffix == ".ipynb":
|
|
43
|
-
# below is backward compat
|
|
44
|
-
if "# # transform.name" in transform.source_code:
|
|
45
|
-
new_content = transform.source_code.replace(
|
|
46
|
-
"# # transform.name", f"# # {transform.description}"
|
|
47
|
-
)
|
|
48
|
-
elif transform.source_code.startswith("# %% [markdown]"):
|
|
49
|
-
source_code_split = transform.source_code.split("\n")
|
|
50
|
-
if source_code_split[1] == "#":
|
|
51
|
-
source_code_split[1] = f"# # {transform.description}"
|
|
52
|
-
new_content = "\n".join(source_code_split)
|
|
53
|
-
else:
|
|
54
|
-
new_content = transform.source_code
|
|
55
|
-
else: # R notebook
|
|
56
|
-
# Pattern to match title only within YAML header section
|
|
57
|
-
title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
|
|
58
|
-
title_match = re.search(
|
|
59
|
-
title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
|
|
60
|
-
)
|
|
61
|
-
new_content = transform.source_code
|
|
62
|
-
if title_match:
|
|
63
|
-
current_title = title_match.group(1)
|
|
64
|
-
if current_title != transform.description:
|
|
65
|
-
pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
|
|
66
|
-
replacement = f'\\1"{transform.description}"\\3'
|
|
67
|
-
new_content = re.sub(
|
|
68
|
-
pattern,
|
|
69
|
-
replacement,
|
|
70
|
-
new_content,
|
|
71
|
-
flags=re.DOTALL | re.MULTILINE,
|
|
72
|
-
)
|
|
73
|
-
logger.important(
|
|
74
|
-
f"updated title to match description: {current_title} →"
|
|
75
|
-
f" {transform.description}"
|
|
76
|
-
)
|
|
77
|
-
if bump_revision:
|
|
78
|
-
uid = transform.uid
|
|
79
|
-
if uid in new_content:
|
|
80
|
-
new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
|
|
81
|
-
new_content = new_content.replace(uid, new_uid)
|
|
82
|
-
logger.important(f"updated uid: {uid} → {new_uid}")
|
|
83
|
-
if notebook_path.suffix == ".ipynb":
|
|
84
|
-
notebook = jupytext.reads(new_content, fmt="py:percent")
|
|
85
|
-
jupytext.write(notebook, notebook_path)
|
|
86
|
-
else:
|
|
87
|
-
notebook_path.write_text(new_content)
|
|
88
|
-
|
|
89
|
-
query_by_uid = uid is not None
|
|
90
|
-
|
|
91
|
-
if entity == "transform":
|
|
92
|
-
if query_by_uid:
|
|
93
|
-
# we don't use .get here because DoesNotExist is hard to catch
|
|
94
|
-
# due to private django API
|
|
95
|
-
# here full uid is not expected anymore as before
|
|
96
|
-
# via ln.Transform.objects.get(uid=uid)
|
|
97
|
-
transforms = ln.Transform.objects.filter(uid__startswith=uid)
|
|
98
|
-
else:
|
|
99
|
-
# if below, we take is_latest=True as the criterion, we might get draft notebooks
|
|
100
|
-
# hence, we use source_code__isnull=False and order by created_at instead
|
|
101
|
-
transforms = ln.Transform.objects.filter(key=key, source_code__isnull=False)
|
|
102
|
-
|
|
103
|
-
if (n_transforms := len(transforms)) == 0:
|
|
104
|
-
err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
|
|
105
|
-
raise SystemExit(f"Transform with {err_msg} does not exist.")
|
|
106
|
-
|
|
107
|
-
if n_transforms > 1:
|
|
108
|
-
transforms = transforms.order_by("-created_at")
|
|
109
|
-
transform = transforms.first()
|
|
110
|
-
|
|
111
|
-
target_relpath = Path(transform.key)
|
|
112
|
-
if len(target_relpath.parents) > 1:
|
|
113
|
-
logger.important(
|
|
114
|
-
"preserve the folder structure for versioning:"
|
|
115
|
-
f" {target_relpath.parent}/"
|
|
116
|
-
)
|
|
117
|
-
target_relpath.parent.mkdir(parents=True, exist_ok=True)
|
|
118
|
-
if target_relpath.exists():
|
|
119
|
-
response = input(f"! {target_relpath} exists: replace? (y/n)")
|
|
120
|
-
if response != "y":
|
|
121
|
-
raise SystemExit("Aborted.")
|
|
122
|
-
|
|
123
|
-
if transform.source_code is not None:
|
|
124
|
-
if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
|
|
125
|
-
script_to_notebook(transform, target_relpath, bump_revision=True)
|
|
126
|
-
else:
|
|
127
|
-
target_relpath.write_text(transform.source_code)
|
|
128
|
-
else:
|
|
129
|
-
raise SystemExit("No source code available for this transform.")
|
|
130
|
-
|
|
131
|
-
logger.important(f"{transform.type} is here: {target_relpath}")
|
|
132
|
-
|
|
133
|
-
if with_env:
|
|
134
|
-
ln.settings.track_run_inputs = False
|
|
135
|
-
if (
|
|
136
|
-
transform.latest_run is not None
|
|
137
|
-
and transform.latest_run.environment is not None
|
|
138
|
-
):
|
|
139
|
-
filepath_env_cache = transform.latest_run.environment.cache()
|
|
140
|
-
target_env_filename = (
|
|
141
|
-
target_relpath.parent / f"{target_relpath.stem}__requirements.txt"
|
|
142
|
-
)
|
|
143
|
-
shutil.move(filepath_env_cache, target_env_filename)
|
|
144
|
-
logger.important(f"environment is here: {target_env_filename}")
|
|
145
|
-
else:
|
|
146
|
-
logger.warning("latest transform run with environment doesn't exist")
|
|
147
|
-
|
|
148
|
-
return target_relpath
|
|
149
|
-
elif entity == "artifact":
|
|
150
|
-
ln.settings.track_run_inputs = False
|
|
151
|
-
|
|
152
|
-
if query_by_uid:
|
|
153
|
-
# we don't use .get here because DoesNotExist is hard to catch
|
|
154
|
-
# due to private django API
|
|
155
|
-
artifacts = ln.Artifact.filter(uid__startswith=uid)
|
|
156
|
-
else:
|
|
157
|
-
artifacts = ln.Artifact.filter(key=key)
|
|
158
|
-
|
|
159
|
-
if (n_artifacts := len(artifacts)) == 0:
|
|
160
|
-
err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
|
|
161
|
-
raise SystemExit(f"Artifact with {err_msg} does not exist.")
|
|
162
|
-
|
|
163
|
-
if n_artifacts > 1:
|
|
164
|
-
artifacts = artifacts.order_by("-created_at")
|
|
165
|
-
artifact = artifacts.first()
|
|
166
|
-
|
|
167
|
-
cache_path = artifact.cache()
|
|
168
|
-
logger.important(f"artifact is here: {cache_path}")
|
|
169
|
-
return cache_path
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/notebooks/with-title-and-initialized-consecutive.ipynb
RENAMED
|
File without changes
|
{lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/notebooks/with-title-and-initialized-non-consecutive.ipynb
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|