lamin_cli 1.1.0__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. lamin_cli-1.3.0/.github/workflows/build.yml +82 -0
  2. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/.gitignore +1 -0
  3. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/.pre-commit-config.yaml +2 -2
  4. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/PKG-INFO +1 -1
  5. lamin_cli-1.3.0/lamin_cli/__init__.py +3 -0
  6. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/__main__.py +90 -32
  7. lamin_cli-1.3.0/lamin_cli/_load.py +192 -0
  8. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/_save.py +29 -2
  9. lamin_cli-1.3.0/lamin_cli/compute/__init__.py +0 -0
  10. lamin_cli-1.3.0/lamin_cli/compute/modal.py +175 -0
  11. lamin_cli-1.3.0/noxfile.py +13 -0
  12. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_load.py +18 -1
  13. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_multi_process.py +1 -1
  14. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_save_files.py +2 -2
  15. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_save_notebooks.py +4 -6
  16. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_save_r_code.py +1 -1
  17. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_save_scripts.py +1 -1
  18. lamin_cli-1.3.0/tests/modal/test_modal.py +19 -0
  19. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track-and-finish.py +0 -4
  20. lamin_cli-1.1.0/lamin_cli/__init__.py +0 -3
  21. lamin_cli-1.1.0/lamin_cli/_load.py +0 -169
  22. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/.github/workflows/doc-changes.yml +0 -0
  23. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/LICENSE +0 -0
  24. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/README.md +0 -0
  25. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/_cache.py +0 -0
  26. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/_migration.py +0 -0
  27. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/lamin_cli/_settings.py +0 -0
  28. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/pyproject.toml +0 -0
  29. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/conftest.py +0 -0
  30. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_cli.py +0 -0
  31. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_migrate.py +0 -0
  32. {lamin_cli-1.1.0/tests → lamin_cli-1.3.0/tests/core}/test_parse_uid_from_code.py +0 -0
  33. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/notebooks/not-initialized.ipynb +0 -0
  34. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/notebooks/with-title-and-initialized-consecutive.ipynb +0 -0
  35. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/notebooks/with-title-and-initialized-non-consecutive.ipynb +0 -0
  36. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/merely-import-lamindb.py +0 -0
  37. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track-and-finish-sync-git.py +0 -0
  38. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track-with-params.py +0 -0
  39. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track.R +0 -0
  40. {lamin_cli-1.1.0 → lamin_cli-1.3.0}/tests/scripts/run-track.qmd +0 -0
@@ -0,0 +1,82 @@
1
+ name: build
2
+
3
+ on:
4
+ push:
5
+ branches: [release]
6
+ pull_request:
7
+
8
+ jobs:
9
+ pre-filter:
10
+ runs-on: ubuntu-latest
11
+ outputs:
12
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+ with:
16
+ fetch-depth: 0
17
+
18
+ - uses: dorny/paths-filter@v3
19
+ id: changes
20
+ if: github.event_name != 'push'
21
+ with:
22
+ filters: |
23
+ modal:
24
+ - 'lamin_cli/compute/modal.py'
25
+ - 'tests/modal/**'
26
+
27
+ - id: set-matrix
28
+ shell: bash
29
+ run: |
30
+ BASE_GROUPS=$(jq -n -c '[]')
31
+
32
+ if [[ "${{ github.event_name }}" == "push" || "${{ steps.changes.outputs.modal }}" == "true" ]]; then
33
+ # Run everything on push or when modal paths change
34
+ MATRIX=$(jq -n -c --argjson groups "$BASE_GROUPS" '{group: ($groups + ["modal"])}')
35
+ else
36
+ # Otherwise only run base groups
37
+ MATRIX=$(jq -n -c --argjson groups "$BASE_GROUPS" '{group: $groups}')
38
+ fi
39
+
40
+ # Output as single line for GitHub Actions
41
+ echo "matrix=$(echo "$MATRIX" | jq -c .)" >> $GITHUB_OUTPUT
42
+
43
+ # Pretty print for debugging
44
+ echo "Generated matrix:"
45
+ echo "$MATRIX" | jq .
46
+
47
+ test:
48
+ needs: pre-filter
49
+ runs-on: ubuntu-latest
50
+ env:
51
+ LAMIN_API_KEY: ${{ secrets.LAMIN_API_KEY_TESTUSER1 }}
52
+ strategy:
53
+ fail-fast: false
54
+ matrix: ${{fromJson(needs.pre-filter.outputs.matrix)}}
55
+ timeout-minutes: 20
56
+ steps:
57
+ - uses: actions/checkout@v4
58
+ with:
59
+ submodules: recursive
60
+ fetch-depth: 0
61
+
62
+ - uses: actions/setup-python@v5
63
+ with:
64
+ python-version: 3.12
65
+
66
+ - name: cache pre-commit
67
+ uses: actions/cache@v4
68
+ with:
69
+ path: ~/.cache/pre-commit
70
+ key: pre-commit-${{ runner.os }}-${{ hashFiles('.pre-commit-config.yaml') }}
71
+
72
+ - run: pip install git+https://github.com/laminlabs/laminci
73
+
74
+ - run: uv pip install --system modal pytest
75
+
76
+ - run: modal token set --token-id ${{ secrets.MODAL_DEV_TOKEN_ID }} --token-secret ${{ secrets.MODAL_DEV_TOKEN_SECRET }}
77
+
78
+ - run: nox -s setup
79
+
80
+ - run: lamin login
81
+
82
+ - run: pytest tests/modal
@@ -1,4 +1,5 @@
1
1
  # LaminDB
2
+ modal_mount_dir/
2
3
  lamindb_docs/
3
4
  _build
4
5
  mydata/
@@ -16,8 +16,8 @@ repos:
16
16
  - id: ruff
17
17
  args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
18
18
  - id: ruff-format
19
- - repo: https://github.com/pre-commit/mirrors-prettier
20
- rev: v4.0.0-alpha.8
19
+ - repo: https://github.com/rbubley/mirrors-prettier
20
+ rev: v3.5.1
21
21
  hooks:
22
22
  - id: prettier
23
23
  - repo: https://github.com/kynan/nbstripout
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamin_cli
3
- Version: 1.1.0
3
+ Version: 1.3.0
4
4
  Summary: Lamin CLI.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Description-Content-Type: text/markdown
@@ -0,0 +1,3 @@
1
+ """Lamin CLI."""
2
+
3
+ __version__ = "1.3.0"
@@ -9,6 +9,13 @@ from functools import wraps
9
9
  from importlib.metadata import PackageNotFoundError, version
10
10
  from typing import TYPE_CHECKING
11
11
 
12
+ from lamindb_setup._init_instance import (
13
+ DOC_DB,
14
+ DOC_INSTANCE_NAME,
15
+ DOC_MODULES,
16
+ DOC_STORAGE_ARG,
17
+ )
18
+
12
19
  if TYPE_CHECKING:
13
20
  from collections.abc import Mapping
14
21
 
@@ -41,12 +48,7 @@ else:
41
48
  "lamin": [
42
49
  {
43
50
  "name": "Connect to an instance",
44
- "commands": [
45
- "connect",
46
- "disconnect",
47
- "info",
48
- "init",
49
- ],
51
+ "commands": ["connect", "disconnect", "info", "init", "run"],
50
52
  },
51
53
  {
52
54
  "name": "Read & write data",
@@ -155,24 +157,20 @@ def schema_to_modules_callback(ctx, param, value):
155
157
 
156
158
  # fmt: off
157
159
  @main.command()
158
- @click.option("--storage", type=str, help="Local directory, s3://bucket_name, gs://bucket_name.")
159
- @click.option("--db", type=str, default=None, help="Postgres database connection URL, do not pass for SQLite.")
160
- @click.option("--modules", type=str, default=None, help="Comma-separated string of schema modules.")
161
- @click.option("--name", type=str, default=None, help="The instance name.")
162
- @click.option("--schema", type=str, default=None, help="[DEPRECATED] Use --modules instead.", callback=schema_to_modules_callback)
160
+ @click.option("--storage", type=str, default = ".", help=DOC_STORAGE_ARG)
161
+ @click.option("--name", type=str, default=None, help=DOC_INSTANCE_NAME)
162
+ @click.option("--db", type=str, default=None, help=DOC_DB)
163
+ @click.option("--modules", type=str, default=None, help=DOC_MODULES)
163
164
  # fmt: on
164
165
  def init(
165
166
  storage: str,
167
+ name: str | None,
166
168
  db: str | None,
167
169
  modules: str | None,
168
- name: str | None,
169
- schema: str | None,
170
170
  ):
171
171
  """Init an instance."""
172
172
  from lamindb_setup._init_instance import init as init_
173
173
 
174
- modules = modules if modules is not None else schema
175
-
176
174
  return init_(storage=storage, db=db, modules=modules, name=name)
177
175
 
178
176
 
@@ -188,6 +186,8 @@ def connect(instance: str):
188
186
  `lamin connect` switches
189
187
  {attr}`~lamindb.setup.core.SetupSettings.auto_connect` to `True` so that you
190
188
  auto-connect in a Python session upon importing `lamindb`.
189
+
190
+ For manually connecting in a Python session, use {func}`~lamindb.connect`.
191
191
  """
192
192
  from lamindb_setup import connect as connect_
193
193
  from lamindb_setup import settings as settings_
@@ -279,45 +279,103 @@ def load(entity: str, uid: str | None = None, key: str | None = None, with_env:
279
279
  @click.argument("entity", type=str)
280
280
  @click.option("--uid", help="The uid for the entity.")
281
281
  @click.option("--key", help="The key for the entity.")
282
- @click.option(
283
- "--with-env", is_flag=True, help="Also return the environment for a tranform."
284
- )
285
- def get(entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False):
282
+ def get(entity: str, uid: str | None = None, key: str | None = None):
286
283
  """Query metadata about an entity.
287
284
 
288
- Currently only works for artifact & transform and behaves like `lamin load`.
285
+ Currently only works for artifact.
289
286
  """
290
- from lamin_cli._load import load as load_
287
+ import lamindb_setup as ln_setup
288
+
289
+ from ._load import decompose_url
290
+
291
+ if entity.startswith("https://") and "lamin" in entity:
292
+ url = entity
293
+ instance, entity, uid = decompose_url(url)
294
+ elif entity not in {"artifact"}:
295
+ raise SystemExit("Entity has to be a laminhub URL or 'artifact'")
296
+ else:
297
+ instance = ln_setup.settings.instance.slug
298
+
299
+ ln_setup.connect(instance)
300
+ import lamindb as ln
291
301
 
292
- click.echo(f"! to load a file or folder, please use: lamin load {entity}")
293
- return load_(entity, uid=uid, key=key, with_env=with_env)
302
+ if uid is not None:
303
+ artifact = ln.Artifact.get(uid)
304
+ else:
305
+ artifact = ln.Artifact.get(key=key)
306
+ artifact.describe()
294
307
 
295
308
 
296
309
  @main.command()
297
- @click.argument("filepath", type=click.Path(exists=True, dir_okay=True, file_okay=True))
310
+ @click.argument("path", type=click.Path(exists=True, dir_okay=True, file_okay=True))
298
311
  @click.option("--key", type=str, default=None)
299
312
  @click.option("--description", type=str, default=None)
313
+ @click.option("--stem-uid", type=str, default=None)
300
314
  @click.option("--registry", type=str, default=None)
301
- def save(filepath: str, key: str, description: str, registry: str):
315
+ def save(path: str, key: str, description: str, stem_uid: str, registry: str):
302
316
  """Save a file or folder.
303
317
 
304
- Defaults to saving `.py` and `.ipynb` as {class}`~lamindb.Transform` and
305
- other file types and folders as {class}`~lamindb.Artifact`.
306
-
307
- You can save a `.py` or `.ipynb` file as an {class}`~lamindb.Artifact` by
308
- passing `--registry artifact`.
318
+ Defaults to saving `.py`, `.ipynb`, `.R`, `.Rmd`, and `.qmd` as {class}`~lamindb.Transform` and
319
+ other file types and folders as {class}`~lamindb.Artifact`. You can save a `.py` or `.ipynb` file as
320
+ an {class}`~lamindb.Artifact` by passing `--registry artifact`.
309
321
  """
310
322
  from lamin_cli._save import save_from_filepath_cli
311
323
 
312
- if save_from_filepath_cli(filepath, key, description, registry) is not None:
324
+ if save_from_filepath_cli(path, key, description, stem_uid, registry) is not None:
313
325
  sys.exit(1)
314
326
 
315
327
 
328
+ @main.command()
329
+ @click.argument("filepath", type=str)
330
+ @click.option("--project", type=str, default=None, help="A valid project name or uid. When running on Modal, creates an app with the same name.", required=True)
331
+ @click.option("--image-url", type=str, default=None, help="A URL to the base docker image to use.")
332
+ @click.option("--packages", type=str, default="lamindb", help="A comma-separated list of additional packages to install.")
333
+ @click.option("--cpu", type=float, default=None, help="Configuration for the CPU.")
334
+ @click.option("--gpu", type=str, default=None, help="The type of GPU to use (only compatible with cuda images).")
335
+ def run(filepath: str, project: str, image_url: str, packages: str, cpu: int, gpu: str | None):
336
+ """Run a compute job in the cloud.
337
+
338
+ This is an EXPERIMENTAL feature that enables to run a script on Modal.
339
+
340
+ Example: Given a valid project name "my_project".
341
+
342
+ ```
343
+ lamin run my_script.py --project my_project
344
+ ```
345
+ """
346
+ import shutil
347
+ from pathlib import Path
348
+
349
+ from lamin_cli.compute.modal import Runner
350
+
351
+ default_mount_dir = Path('./modal_mount_dir')
352
+ if not default_mount_dir.is_dir():
353
+ default_mount_dir.mkdir(parents=True, exist_ok=True)
354
+
355
+ shutil.copy(filepath, default_mount_dir)
356
+
357
+ filepath_in_mount_dir = Path(default_mount_dir) / Path(filepath).name
358
+
359
+ package_list = []
360
+ if packages:
361
+ package_list = [package.strip() for package in packages.split(',')]
362
+
363
+ runner = Runner(
364
+ local_mount_dir=default_mount_dir,
365
+ app_name=project,
366
+ packages=package_list,
367
+ image_url=image_url,
368
+ cpu=cpu,
369
+ gpu=gpu
370
+ )
371
+
372
+ runner.run(filepath_in_mount_dir)
373
+
374
+
316
375
  main.add_command(settings)
317
376
  main.add_command(cache)
318
377
  main.add_command(migrate)
319
378
 
320
-
321
379
  # https://stackoverflow.com/questions/57810659/automatically-generate-all-help-documentation-for-click-commands
322
380
  # https://claude.ai/chat/73c28487-bec3-4073-8110-50d1a2dd6b84
323
381
  def _generate_help():
@@ -0,0 +1,192 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+ from lamin_utils import logger
8
+
9
+
10
+ def decompose_url(url: str) -> tuple[str, str, str]:
11
+ assert any(keyword in url for keyword in ["transform", "artifact", "collection"])
12
+ for entity in ["transform", "artifact", "collection"]:
13
+ if entity in url:
14
+ break
15
+ uid = url.split(f"{entity}/")[1]
16
+ instance_slug = "/".join(url.split("/")[3:5])
17
+ return instance_slug, entity, uid
18
+
19
+
20
+ def load(
21
+ entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False
22
+ ):
23
+ import lamindb_setup as ln_setup
24
+
25
+ if entity.startswith("https://") and "lamin" in entity:
26
+ url = entity
27
+ instance, entity, uid = decompose_url(url)
28
+ elif entity not in {"artifact", "transform", "collection"}:
29
+ raise SystemExit(
30
+ "Entity has to be a laminhub URL or 'artifact', 'collection', or 'transform'"
31
+ )
32
+ else:
33
+ instance = ln_setup.settings.instance.slug
34
+
35
+ ln_setup.connect(instance)
36
+ import lamindb as ln
37
+
38
+ def script_to_notebook(
39
+ transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
40
+ ) -> None:
41
+ import jupytext
42
+ from lamin_utils._base62 import increment_base62
43
+
44
+ if notebook_path.suffix == ".ipynb":
45
+ # below is backward compat
46
+ if "# # transform.name" in transform.source_code:
47
+ new_content = transform.source_code.replace(
48
+ "# # transform.name", f"# # {transform.description}"
49
+ )
50
+ elif transform.source_code.startswith("# %% [markdown]"):
51
+ source_code_split = transform.source_code.split("\n")
52
+ if source_code_split[1] == "#":
53
+ source_code_split[1] = f"# # {transform.description}"
54
+ new_content = "\n".join(source_code_split)
55
+ else:
56
+ new_content = transform.source_code
57
+ else: # R notebook
58
+ # Pattern to match title only within YAML header section
59
+ title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
60
+ title_match = re.search(
61
+ title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
62
+ )
63
+ new_content = transform.source_code
64
+ if title_match:
65
+ current_title = title_match.group(1)
66
+ if current_title != transform.description:
67
+ pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
68
+ replacement = f'\\1"{transform.description}"\\3'
69
+ new_content = re.sub(
70
+ pattern,
71
+ replacement,
72
+ new_content,
73
+ flags=re.DOTALL | re.MULTILINE,
74
+ )
75
+ logger.important(
76
+ f"updated title to match description: {current_title} →"
77
+ f" {transform.description}"
78
+ )
79
+ if bump_revision:
80
+ uid = transform.uid
81
+ if uid in new_content:
82
+ new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
83
+ new_content = new_content.replace(uid, new_uid)
84
+ logger.important(f"updated uid: {uid} → {new_uid}")
85
+ if notebook_path.suffix == ".ipynb":
86
+ notebook = jupytext.reads(new_content, fmt="py:percent")
87
+ jupytext.write(notebook, notebook_path)
88
+ else:
89
+ notebook_path.write_text(new_content)
90
+
91
+ query_by_uid = uid is not None
92
+
93
+ match entity:
94
+ case "transform":
95
+ if query_by_uid:
96
+ # we don't use .get here because DoesNotExist is hard to catch
97
+ # due to private django API
98
+ # here full uid is not expected anymore as before
99
+ # via ln.Transform.objects.get(uid=uid)
100
+ transforms = ln.Transform.objects.filter(uid__startswith=uid)
101
+ else:
102
+ # if below, we take is_latest=True as the criterion, we might get draft notebooks
103
+ # hence, we use source_code__isnull=False and order by created_at instead
104
+ transforms = ln.Transform.objects.filter(
105
+ key=key, source_code__isnull=False
106
+ )
107
+
108
+ if (n_transforms := len(transforms)) == 0:
109
+ err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
110
+ raise SystemExit(f"Transform with {err_msg} does not exist.")
111
+
112
+ if n_transforms > 1:
113
+ transforms = transforms.order_by("-created_at")
114
+ transform = transforms.first()
115
+
116
+ target_relpath = Path(transform.key)
117
+ if len(target_relpath.parents) > 1:
118
+ logger.important(
119
+ "preserve the folder structure for versioning:"
120
+ f" {target_relpath.parent}/"
121
+ )
122
+ target_relpath.parent.mkdir(parents=True, exist_ok=True)
123
+ if target_relpath.exists():
124
+ response = input(f"! {target_relpath} exists: replace? (y/n)")
125
+ if response != "y":
126
+ raise SystemExit("Aborted.")
127
+
128
+ if transform.source_code is not None:
129
+ if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
130
+ script_to_notebook(transform, target_relpath, bump_revision=True)
131
+ else:
132
+ target_relpath.write_text(transform.source_code)
133
+ else:
134
+ raise SystemExit("No source code available for this transform.")
135
+
136
+ logger.important(f"{transform.type} is here: {target_relpath}")
137
+
138
+ if with_env:
139
+ ln.settings.track_run_inputs = False
140
+ if (
141
+ transform.latest_run is not None
142
+ and transform.latest_run.environment is not None
143
+ ):
144
+ filepath_env_cache = transform.latest_run.environment.cache()
145
+ target_env_filename = (
146
+ target_relpath.parent
147
+ / f"{target_relpath.stem}__requirements.txt"
148
+ )
149
+ shutil.move(filepath_env_cache, target_env_filename)
150
+ logger.important(f"environment is here: {target_env_filename}")
151
+ else:
152
+ logger.warning(
153
+ "latest transform run with environment doesn't exist"
154
+ )
155
+
156
+ return target_relpath
157
+ case "artifact" | "collection":
158
+ ln.settings.track_run_inputs = False
159
+
160
+ EntityClass = ln.Artifact if entity == "artifact" else ln.Collection
161
+
162
+ # we don't use .get here because DoesNotExist is hard to catch
163
+ # due to private django API
164
+ if query_by_uid:
165
+ entities = EntityClass.filter(uid__startswith=uid)
166
+ else:
167
+ entities = EntityClass.filter(key=key)
168
+
169
+ if (n_entities := len(entities)) == 0:
170
+ err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
171
+ raise SystemExit(
172
+ f"{entity.capitalize()} with {err_msg} does not exist."
173
+ )
174
+
175
+ if n_entities > 1:
176
+ entities = entities.order_by("-created_at")
177
+
178
+ entity_obj = entities.first()
179
+ cache_path = entity_obj.cache()
180
+
181
+ # collection gives us a list of paths
182
+ if isinstance(cache_path, list):
183
+ logger.important(f"{entity} paths ({len(cache_path)} files):")
184
+ for i, path in enumerate(cache_path):
185
+ if i < 5 or i >= len(cache_path) - 5:
186
+ logger.important(f" [{i + 1}/{len(cache_path)}] {path}")
187
+ elif i == 5:
188
+ logger.important(f" ... {len(cache_path) - 10} more files ...")
189
+ else:
190
+ logger.important(f"{entity} is here: {cache_path}")
191
+ case _:
192
+ raise AssertionError(f"unknown entity {entity}")
@@ -46,6 +46,7 @@ def save_from_filepath_cli(
46
46
  filepath: str | Path,
47
47
  key: str | None,
48
48
  description: str | None,
49
+ stem_uid: str | None,
49
50
  registry: str | None,
50
51
  ) -> str | None:
51
52
  import lamindb_setup as ln_setup
@@ -99,10 +100,21 @@ def save_from_filepath_cli(
99
100
 
100
101
  if registry == "artifact":
101
102
  ln.settings.creation.artifact_silence_missing_run_warning = True
102
- if key is None and description is None:
103
+ revises = None
104
+ if stem_uid is not None:
105
+ revises = (
106
+ ln.Artifact.filter(uid__startswith=stem_uid)
107
+ .order_by("-created_at")
108
+ .first()
109
+ )
110
+ if revises is None:
111
+ raise ln.errors.InvalidArgument("The stem uid is not found.")
112
+ elif key is None and description is None:
103
113
  logger.error("Please pass a key or description via --key or --description")
104
114
  return "missing-key-or-description"
105
- artifact = ln.Artifact(filepath, key=key, description=description).save()
115
+ artifact = ln.Artifact(
116
+ filepath, key=key, description=description, revises=revises
117
+ ).save()
106
118
  logger.important(f"saved: {artifact}")
107
119
  logger.important(f"storage path: {artifact.path}")
108
120
  if ln_setup.settings.storage.type == "s3":
@@ -125,6 +137,15 @@ def save_from_filepath_cli(
125
137
  )
126
138
  return "not-tracked-in-transform-registry"
127
139
  else:
140
+ revises = None
141
+ if stem_uid is not None:
142
+ revises = (
143
+ ln.Transform.filter(uid__startswith=stem_uid)
144
+ .order_by("-created_at")
145
+ .first()
146
+ )
147
+ if revises is None:
148
+ raise ln.errors.InvalidArgument("The stem uid is not found.")
128
149
  # TODO: build in the logic that queries for relative file paths
129
150
  # we have in Context; add tests for multiple versions
130
151
  transform = ln.Transform.filter(
@@ -135,6 +156,7 @@ def save_from_filepath_cli(
135
156
  description=filepath.name,
136
157
  key=filepath.name,
137
158
  type="script" if filepath.suffix in {".R", ".py"} else "notebook",
159
+ revises=revises,
138
160
  ).save()
139
161
  logger.important(f"created Transform('{transform.uid}')")
140
162
  # latest run of this transform by user
@@ -147,6 +169,11 @@ def save_from_filepath_cli(
147
169
  )
148
170
  if response != "y":
149
171
  return "aborted-save-notebook-created-by-different-user"
172
+ if run is None and transform.key.endswith(".ipynb"):
173
+ run = ln.Run(transform=transform).save()
174
+ logger.important(
175
+ f"found no run, creating Run('{run.uid}') to display the html"
176
+ )
150
177
  return_code = save_context_core(
151
178
  run=run,
152
179
  transform=transform,
File without changes
@@ -0,0 +1,175 @@
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ import threading
5
+ from pathlib import Path
6
+
7
+ import lamindb_setup as ln_setup
8
+ import modal
9
+
10
+
11
+ def run_script(path: Path) -> dict:
12
+ """Takes a path to a script for running it as a function through Modal."""
13
+ result = {"success": False, "output": "", "error": ""}
14
+
15
+ def stream_output(stream, capture_list):
16
+ """Read from stream line by line and print in real-time while also capturing to a list."""
17
+ for line in iter(stream.readline, ""):
18
+ print(line, end="") # Print in real-time
19
+ capture_list.append(line)
20
+ stream.close()
21
+
22
+ if not path.exists():
23
+ raise FileNotFoundError(f"Script file not found: {path}")
24
+
25
+ try:
26
+ # Run the script using subprocess
27
+ process = subprocess.Popen(
28
+ [sys.executable, path.as_posix()],
29
+ stdout=subprocess.PIPE,
30
+ stderr=subprocess.PIPE,
31
+ text=True,
32
+ bufsize=1, # Line buffered
33
+ )
34
+
35
+ # Capture output and error while streaming stdout in real-time
36
+ stdout_lines: list[str] = []
37
+ stderr_lines: list[str] = []
38
+
39
+ # Create threads to handle stdout and stderr streams
40
+ stdout_thread = threading.Thread(
41
+ target=stream_output, args=(process.stdout, stdout_lines)
42
+ )
43
+ stderr_thread = threading.Thread(
44
+ target=stream_output, args=(process.stderr, stderr_lines)
45
+ )
46
+
47
+ # Set as daemon threads so they exit when the main program exits
48
+ stdout_thread.daemon = True
49
+ stderr_thread.daemon = True
50
+
51
+ # Start the threads
52
+ stdout_thread.start()
53
+ stderr_thread.start()
54
+
55
+ # Wait for the process to complete
56
+ return_code = process.wait()
57
+
58
+ # Wait for the threads to finish
59
+ stdout_thread.join()
60
+ stderr_thread.join()
61
+
62
+ # Join the captured output
63
+ stdout_output = "".join(stdout_lines)
64
+ stderr_output = "".join(stderr_lines)
65
+
66
+ # Check return code
67
+ if return_code == 0:
68
+ result["success"] = True
69
+ result["output"] = stdout_output
70
+ else:
71
+ result["error"] = stderr_output
72
+
73
+ except Exception as e:
74
+ import traceback
75
+
76
+ result["error"] = str(e) + "\n" + traceback.format_exc()
77
+ return result
78
+
79
+
80
+ class Runner:
81
+ def __init__(
82
+ self,
83
+ app_name: str,
84
+ local_mount_dir: str | Path = "./scripts",
85
+ remote_mount_dir: str | Path = "/scripts",
86
+ image_url: str | None = None,
87
+ packages: list[str] | None = None,
88
+ cpu: float | None = None,
89
+ gpu: str | None = None,
90
+ ):
91
+ self.app_name = app_name # we use the LaminDB project name as the app name
92
+ self.app = self.create_modal_app(app_name)
93
+
94
+ self.local_mount_dir = local_mount_dir
95
+ self.remote_mount_dir = remote_mount_dir
96
+
97
+ self.image = self.create_modal_image(
98
+ local_dir=local_mount_dir, packages=packages, image_url=image_url
99
+ )
100
+
101
+ local_secrets = self._configure_local_secrets()
102
+
103
+ self.modal_function = self.app.function(
104
+ image=self.image, cpu=cpu, gpu=gpu, secrets=[local_secrets]
105
+ )(run_script)
106
+
107
+ def run(self, script_local_path: Path) -> None:
108
+ script_remote_path = self.local_to_remote_path(str(script_local_path))
109
+ with modal.enable_output(show_progress=True): # Prints out modal logs
110
+ with self.app.run():
111
+ self.modal_function.remote(Path(script_remote_path))
112
+
113
+ def create_modal_app(self, app_name: str) -> modal.App:
114
+ app = modal.App(app_name)
115
+ return app
116
+
117
+ def local_to_remote_path(self, local_path: str | Path) -> str:
118
+ local_path = Path(local_path).absolute()
119
+ local_mount_dir = Path(self.local_mount_dir).absolute()
120
+ remote_mount_dir = Path(self.remote_mount_dir)
121
+
122
+ # Check if local_path is inside local_mount_dir
123
+ try:
124
+ # This will raise ValueError if local_path is not relative to local_mount_dir
125
+ relative_path = local_path.relative_to(local_mount_dir)
126
+ except ValueError as err:
127
+ raise ValueError(
128
+ f"Local path '{local_path}' is not inside the mount directory '{local_mount_dir}'"
129
+ ) from err
130
+
131
+ # Join remote_mount_dir with the relative path
132
+ remote_path = remote_mount_dir / relative_path
133
+
134
+ # Return as string with normalized separators
135
+ return remote_path.as_posix()
136
+
137
+ def _configure_local_secrets(self) -> dict:
138
+ if ln_setup.settings.user.api_key is None:
139
+ raise ValueError("Please authenticate via: lamin login")
140
+
141
+ all_env_variables = {
142
+ "LAMIN_API_KEY": ln_setup.settings.user.api_key,
143
+ "LAMIN_CURRENT_PROJECT": self.app_name,
144
+ "LAMIN_CURRENT_INSTANCE": ln_setup.settings.instance.slug,
145
+ }
146
+ local_secrets = modal.Secret.from_dict(all_env_variables)
147
+ return local_secrets
148
+
149
+ def create_modal_image(
150
+ self,
151
+ python_version: str = "3.12",
152
+ packages: list[str] | None = None,
153
+ local_dir: str | Path = "./scripts",
154
+ remote_dir: str = "/scripts/",
155
+ image_url: str | None = None,
156
+ env_variables: dict | None = None,
157
+ ) -> modal.Image:
158
+ if env_variables is None:
159
+ env_variables = {}
160
+ if packages is None:
161
+ packages = ["lamindb"]
162
+ else:
163
+ packages.append("lamindb") # Append lamindb to the list of packages
164
+
165
+ if image_url is None:
166
+ image = modal.Image.debian_slim(python_version=python_version)
167
+ else:
168
+ image = modal.Image.from_registry(image_url, add_python=python_version)
169
+ return (
170
+ image.pip_install(packages)
171
+ .env(env_variables)
172
+ .add_local_python_source("lamindb", "lamindb_setup", copy=True)
173
+ .run_commands("lamin settings set auto-connect true")
174
+ .add_local_dir(local_dir, remote_dir)
175
+ )
@@ -0,0 +1,13 @@
1
+ import os
2
+
3
+ import nox
4
+ from laminci.nox import install_lamindb
5
+
6
+ IS_PR = os.getenv("GITHUB_EVENT_NAME") != "push"
7
+ nox.options.default_venv_backend = "none"
8
+
9
+
10
+ @nox.session
11
+ def setup(session):
12
+ branch = "main" if IS_PR else "release"
13
+ install_lamindb(session, branch=branch)
@@ -60,7 +60,15 @@ def test_load_transform():
60
60
  path2.unlink()
61
61
 
62
62
 
63
- def test_load_artifact():
63
+ def test_get_load_artifact():
64
+ result = subprocess.run(
65
+ "lamin get"
66
+ " 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
67
+ shell=True,
68
+ capture_output=True,
69
+ )
70
+ assert result.returncode == 0
71
+
64
72
  result = subprocess.run(
65
73
  "lamin load"
66
74
  " 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
@@ -76,3 +84,12 @@ def test_load_artifact():
76
84
  capture_output=True,
77
85
  )
78
86
  assert result.returncode == 0
87
+
88
+
89
+ def test_load_collection():
90
+ result = subprocess.run(
91
+ "lamin load 'https://lamin.ai/laminlabs/lamindata/collection/2wUs6V1OuGzp5Ll4'",
92
+ shell=True,
93
+ capture_output=True,
94
+ )
95
+ assert result.returncode == 0
@@ -2,7 +2,7 @@ import subprocess
2
2
  from multiprocessing import Process
3
3
  from pathlib import Path
4
4
 
5
- scripts_dir = Path(__file__).parent.resolve() / "scripts"
5
+ scripts_dir = Path(__file__).parent.parent.resolve() / "scripts"
6
6
 
7
7
 
8
8
  def run_script():
@@ -3,7 +3,7 @@ from pathlib import Path
3
3
 
4
4
  import lamindb_setup as ln_setup
5
5
 
6
- test_file = Path(__file__).parent.parent.resolve() / ".gitignore"
6
+ test_file = Path(__file__).parent.parent.parent.resolve() / ".gitignore"
7
7
 
8
8
 
9
9
  def test_save_file():
@@ -44,7 +44,7 @@ def test_save_file():
44
44
  )
45
45
  print(result.stdout.decode())
46
46
  print(result.stderr.decode())
47
- assert "found artifact with same hash" in result.stdout.decode()
47
+ assert "returning existing artifact with same hash" in result.stdout.decode()
48
48
  assert "key='mytest'" in result.stdout.decode()
49
49
  assert "storage path:" in result.stdout.decode()
50
50
  assert result.returncode == 0
@@ -184,9 +184,7 @@ print("my consecutive cell")
184
184
  new_path = notebook_path.with_name("new_name.ipynb")
185
185
  os.system(f"cp {notebook_path} {new_path}")
186
186
 
187
- # upon re-running it, the user is asked to create a new stem uid
188
- with pytest.raises(CellExecutionError) as error:
189
- nbproject_test.execute_notebooks(new_path, print_outputs=True)
190
-
191
- print(error.exconly())
192
- assert "clashes with the existing key" in error.exconly()
187
+ # upon re-running it, the notebook name is updated
188
+ nbproject_test.execute_notebooks(new_path, print_outputs=True)
189
+ transform = ln.Transform.get("hlsFXswrJjtt0001")
190
+ assert "new_name.ipynb" in transform.key
@@ -4,7 +4,7 @@ from pathlib import Path
4
4
 
5
5
  import lamindb as ln
6
6
 
7
- scripts_dir = Path(__file__).parent.resolve() / "scripts"
7
+ scripts_dir = Path(__file__).parent.parent.resolve() / "scripts"
8
8
 
9
9
 
10
10
  def test_run_save_cache():
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
  import lamindb as ln
6
6
  from lamindb_setup import settings
7
7
 
8
- scripts_dir = Path(__file__).parent.resolve() / "scripts"
8
+ scripts_dir = Path(__file__).parent.parent.resolve() / "scripts"
9
9
 
10
10
 
11
11
  def test_save_without_uid():
@@ -0,0 +1,19 @@
1
+ import subprocess
2
+ from pathlib import Path
3
+
4
+ scripts_dir = Path(__file__).parent.parent.resolve() / "scripts"
5
+
6
+
7
+ def test_run_on_modal():
8
+ filepath = scripts_dir / "run-track-and-finish.py"
9
+
10
+ subprocess.run("lamin connect laminlabs/lamindata", shell=True, check=True)
11
+ result = subprocess.run(
12
+ f"lamin run {filepath} --project 1QLbS6N7wwiL",
13
+ shell=True,
14
+ capture_output=True,
15
+ )
16
+ print(result.stdout.decode())
17
+ assert result.returncode == 0
18
+ assert "hello!" in result.stdout.decode()
19
+ assert "finished Run" in result.stdout.decode()
@@ -1,9 +1,5 @@
1
1
  import lamindb as ln
2
2
 
3
- # purposefully do not pass uid
4
- ln.track()
5
-
6
-
7
3
  if __name__ == "__main__":
8
4
  # we're using new_run here to mock the notebook situation
9
5
  # and cover the look up of an existing run in the tests
@@ -1,3 +0,0 @@
1
- """Lamin CLI."""
2
-
3
- __version__ = "1.1.0"
@@ -1,169 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import re
4
- import shutil
5
- from pathlib import Path
6
-
7
- from lamin_utils import logger
8
-
9
-
10
- def decompose_url(url: str) -> tuple[str, str, str]:
11
- assert "transform" in url or "artifact" in url
12
- for entity in ["transform", "artifact"]:
13
- if entity in url:
14
- break
15
- uid = url.split(f"{entity}/")[1]
16
- instance_slug = "/".join(url.split("/")[3:5])
17
- return instance_slug, entity, uid
18
-
19
-
20
- def load(
21
- entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False
22
- ):
23
- import lamindb_setup as ln_setup
24
-
25
- if entity.startswith("https://") and "lamin" in entity:
26
- url = entity
27
- instance, entity, uid = decompose_url(url)
28
- elif entity not in {"artifact", "transform"}:
29
- raise SystemExit("Entity has to be a laminhub URL or 'artifact' or 'transform'")
30
- else:
31
- instance = ln_setup.settings.instance.slug
32
-
33
- ln_setup.connect(instance)
34
- import lamindb as ln
35
-
36
- def script_to_notebook(
37
- transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
38
- ) -> None:
39
- import jupytext
40
- from lamin_utils._base62 import increment_base62
41
-
42
- if notebook_path.suffix == ".ipynb":
43
- # below is backward compat
44
- if "# # transform.name" in transform.source_code:
45
- new_content = transform.source_code.replace(
46
- "# # transform.name", f"# # {transform.description}"
47
- )
48
- elif transform.source_code.startswith("# %% [markdown]"):
49
- source_code_split = transform.source_code.split("\n")
50
- if source_code_split[1] == "#":
51
- source_code_split[1] = f"# # {transform.description}"
52
- new_content = "\n".join(source_code_split)
53
- else:
54
- new_content = transform.source_code
55
- else: # R notebook
56
- # Pattern to match title only within YAML header section
57
- title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
58
- title_match = re.search(
59
- title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
60
- )
61
- new_content = transform.source_code
62
- if title_match:
63
- current_title = title_match.group(1)
64
- if current_title != transform.description:
65
- pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
66
- replacement = f'\\1"{transform.description}"\\3'
67
- new_content = re.sub(
68
- pattern,
69
- replacement,
70
- new_content,
71
- flags=re.DOTALL | re.MULTILINE,
72
- )
73
- logger.important(
74
- f"updated title to match description: {current_title} →"
75
- f" {transform.description}"
76
- )
77
- if bump_revision:
78
- uid = transform.uid
79
- if uid in new_content:
80
- new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
81
- new_content = new_content.replace(uid, new_uid)
82
- logger.important(f"updated uid: {uid} → {new_uid}")
83
- if notebook_path.suffix == ".ipynb":
84
- notebook = jupytext.reads(new_content, fmt="py:percent")
85
- jupytext.write(notebook, notebook_path)
86
- else:
87
- notebook_path.write_text(new_content)
88
-
89
- query_by_uid = uid is not None
90
-
91
- if entity == "transform":
92
- if query_by_uid:
93
- # we don't use .get here because DoesNotExist is hard to catch
94
- # due to private django API
95
- # here full uid is not expected anymore as before
96
- # via ln.Transform.objects.get(uid=uid)
97
- transforms = ln.Transform.objects.filter(uid__startswith=uid)
98
- else:
99
- # if below, we take is_latest=True as the criterion, we might get draft notebooks
100
- # hence, we use source_code__isnull=False and order by created_at instead
101
- transforms = ln.Transform.objects.filter(key=key, source_code__isnull=False)
102
-
103
- if (n_transforms := len(transforms)) == 0:
104
- err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
105
- raise SystemExit(f"Transform with {err_msg} does not exist.")
106
-
107
- if n_transforms > 1:
108
- transforms = transforms.order_by("-created_at")
109
- transform = transforms.first()
110
-
111
- target_relpath = Path(transform.key)
112
- if len(target_relpath.parents) > 1:
113
- logger.important(
114
- "preserve the folder structure for versioning:"
115
- f" {target_relpath.parent}/"
116
- )
117
- target_relpath.parent.mkdir(parents=True, exist_ok=True)
118
- if target_relpath.exists():
119
- response = input(f"! {target_relpath} exists: replace? (y/n)")
120
- if response != "y":
121
- raise SystemExit("Aborted.")
122
-
123
- if transform.source_code is not None:
124
- if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
125
- script_to_notebook(transform, target_relpath, bump_revision=True)
126
- else:
127
- target_relpath.write_text(transform.source_code)
128
- else:
129
- raise SystemExit("No source code available for this transform.")
130
-
131
- logger.important(f"{transform.type} is here: {target_relpath}")
132
-
133
- if with_env:
134
- ln.settings.track_run_inputs = False
135
- if (
136
- transform.latest_run is not None
137
- and transform.latest_run.environment is not None
138
- ):
139
- filepath_env_cache = transform.latest_run.environment.cache()
140
- target_env_filename = (
141
- target_relpath.parent / f"{target_relpath.stem}__requirements.txt"
142
- )
143
- shutil.move(filepath_env_cache, target_env_filename)
144
- logger.important(f"environment is here: {target_env_filename}")
145
- else:
146
- logger.warning("latest transform run with environment doesn't exist")
147
-
148
- return target_relpath
149
- elif entity == "artifact":
150
- ln.settings.track_run_inputs = False
151
-
152
- if query_by_uid:
153
- # we don't use .get here because DoesNotExist is hard to catch
154
- # due to private django API
155
- artifacts = ln.Artifact.filter(uid__startswith=uid)
156
- else:
157
- artifacts = ln.Artifact.filter(key=key)
158
-
159
- if (n_artifacts := len(artifacts)) == 0:
160
- err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
161
- raise SystemExit(f"Artifact with {err_msg} does not exist.")
162
-
163
- if n_artifacts > 1:
164
- artifacts = artifacts.order_by("-created_at")
165
- artifact = artifacts.first()
166
-
167
- cache_path = artifact.cache()
168
- logger.important(f"artifact is here: {cache_path}")
169
- return cache_path
File without changes
File without changes
File without changes
File without changes