lamin_cli 1.0.7__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/.gitignore +2 -0
- lamin_cli-1.2.0/.pre-commit-config.yaml +40 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/PKG-INFO +1 -1
- lamin_cli-1.2.0/lamin_cli/__init__.py +3 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/__main__.py +53 -31
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/_cache.py +1 -0
- lamin_cli-1.2.0/lamin_cli/_load.py +192 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/_migration.py +4 -3
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/_save.py +12 -7
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/_settings.py +1 -0
- lamin_cli-1.2.0/pyproject.toml +114 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/merely-import-lamindb.py +1 -1
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track-and-finish-sync-git.py +1 -1
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track-with-params.py +1 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_cli.py +1 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_load.py +25 -7
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_parse_uid_from_code.py +3 -3
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_save_files.py +2 -1
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_save_notebooks.py +6 -5
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_save_r_code.py +3 -2
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_save_scripts.py +29 -8
- lamin_cli-1.0.7/.pre-commit-config.yaml +0 -64
- lamin_cli-1.0.7/lamin_cli/__init__.py +0 -3
- lamin_cli-1.0.7/lamin_cli/_load.py +0 -164
- lamin_cli-1.0.7/pyproject.toml +0 -21
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/.github/workflows/doc-changes.yml +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/LICENSE +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/README.md +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/conftest.py +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/notebooks/not-initialized.ipynb +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/notebooks/with-title-and-initialized-consecutive.ipynb +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/notebooks/with-title-and-initialized-non-consecutive.ipynb +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track-and-finish.py +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track.R +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track.qmd +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_migrate.py +0 -0
- {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_multi_process.py +1 -1
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v4.5.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: trailing-whitespace
|
|
6
|
+
- id: end-of-file-fixer
|
|
7
|
+
exclude: |
|
|
8
|
+
(?x)(
|
|
9
|
+
.github/workflows/latest-changes.jinja2
|
|
10
|
+
)
|
|
11
|
+
- id: check-yaml
|
|
12
|
+
- id: check-added-large-files
|
|
13
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
14
|
+
rev: v0.9.2
|
|
15
|
+
hooks:
|
|
16
|
+
- id: ruff
|
|
17
|
+
args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
|
|
18
|
+
- id: ruff-format
|
|
19
|
+
- repo: https://github.com/rbubley/mirrors-prettier
|
|
20
|
+
rev: v3.5.1
|
|
21
|
+
hooks:
|
|
22
|
+
- id: prettier
|
|
23
|
+
- repo: https://github.com/kynan/nbstripout
|
|
24
|
+
rev: 0.3.9
|
|
25
|
+
hooks:
|
|
26
|
+
- id: nbstripout
|
|
27
|
+
exclude: |
|
|
28
|
+
(?x)(
|
|
29
|
+
docs/examples/|
|
|
30
|
+
docs/notes/|
|
|
31
|
+
tests
|
|
32
|
+
)
|
|
33
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
34
|
+
rev: v1.14.1
|
|
35
|
+
hooks:
|
|
36
|
+
- id: mypy
|
|
37
|
+
exclude: |
|
|
38
|
+
(?x)(
|
|
39
|
+
tests/hub-local/conftest.py
|
|
40
|
+
)
|
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
2
4
|
import os
|
|
3
5
|
import sys
|
|
6
|
+
import warnings
|
|
4
7
|
from collections import OrderedDict
|
|
5
|
-
import inspect
|
|
6
|
-
from importlib.metadata import PackageNotFoundError, version
|
|
7
|
-
from typing import Optional, Mapping
|
|
8
8
|
from functools import wraps
|
|
9
|
-
import
|
|
9
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Mapping
|
|
10
14
|
|
|
11
15
|
# https://github.com/ewels/rich-click/issues/19
|
|
12
16
|
# Otherwise rich-click takes over the formatting.
|
|
@@ -18,11 +22,11 @@ if os.environ.get("NO_RICH"):
|
|
|
18
22
|
|
|
19
23
|
def __init__(
|
|
20
24
|
self,
|
|
21
|
-
name:
|
|
22
|
-
commands:
|
|
25
|
+
name: str | None = None,
|
|
26
|
+
commands: Mapping[str, click.Command] | None = None,
|
|
23
27
|
**kwargs,
|
|
24
28
|
):
|
|
25
|
-
super(
|
|
29
|
+
super().__init__(name, commands, **kwargs)
|
|
26
30
|
self.commands = commands or OrderedDict()
|
|
27
31
|
|
|
28
32
|
def list_commands(self, ctx: click.Context) -> Mapping[str, click.Command]:
|
|
@@ -77,12 +81,14 @@ else:
|
|
|
77
81
|
return wrapper
|
|
78
82
|
|
|
79
83
|
|
|
80
|
-
from click import Command, Context
|
|
81
84
|
from lamindb_setup._silence_loggers import silence_loggers
|
|
82
85
|
|
|
83
|
-
from lamin_cli._settings import settings
|
|
84
86
|
from lamin_cli._cache import cache
|
|
85
87
|
from lamin_cli._migration import migrate
|
|
88
|
+
from lamin_cli._settings import settings
|
|
89
|
+
|
|
90
|
+
if TYPE_CHECKING:
|
|
91
|
+
from click import Command, Context
|
|
86
92
|
|
|
87
93
|
try:
|
|
88
94
|
lamindb_version = version("lamindb")
|
|
@@ -100,7 +106,7 @@ def main():
|
|
|
100
106
|
@main.command()
|
|
101
107
|
@click.argument("user", type=str, default=None, required=False)
|
|
102
108
|
@click.option("--key", type=str, default=None, help="The legacy API key.")
|
|
103
|
-
def login(user: str, key:
|
|
109
|
+
def login(user: str, key: str | None):
|
|
104
110
|
"""Log into LaminHub.
|
|
105
111
|
|
|
106
112
|
`lamin login` prompts for your API key unless you set it via environment variable `LAMIN_API_KEY`.
|
|
@@ -142,24 +148,25 @@ def schema_to_modules_callback(ctx, param, value):
|
|
|
142
148
|
"The --schema option is deprecated and will be removed in a future version."
|
|
143
149
|
" Please use --modules instead.",
|
|
144
150
|
DeprecationWarning,
|
|
151
|
+
stacklevel=2,
|
|
145
152
|
)
|
|
146
153
|
return value
|
|
147
154
|
|
|
148
155
|
|
|
149
156
|
# fmt: off
|
|
150
157
|
@main.command()
|
|
151
|
-
@click.option("--storage", type=str, help="Local directory, s3://bucket_name, gs://bucket_name.")
|
|
152
|
-
@click.option("--db", type=str, default=None, help="Postgres database connection URL, do not pass for SQLite.")
|
|
153
|
-
@click.option("--modules", type=str, default=None, help="Comma-separated string of modules.")
|
|
158
|
+
@click.option("--storage", type=str, help="Local directory, s3://bucket_name, gs://bucket_name.")
|
|
159
|
+
@click.option("--db", type=str, default=None, help="Postgres database connection URL, do not pass for SQLite.")
|
|
160
|
+
@click.option("--modules", type=str, default=None, help="Comma-separated string of schema modules.")
|
|
154
161
|
@click.option("--name", type=str, default=None, help="The instance name.")
|
|
155
|
-
@click.option("--schema", type=str, default=None, help="[DEPRECATED] Use --modules instead.", callback=schema_to_modules_callback)
|
|
162
|
+
@click.option("--schema", type=str, default=None, help="[DEPRECATED] Use --modules instead.", callback=schema_to_modules_callback)
|
|
156
163
|
# fmt: on
|
|
157
164
|
def init(
|
|
158
165
|
storage: str,
|
|
159
|
-
db:
|
|
160
|
-
modules:
|
|
161
|
-
name:
|
|
162
|
-
schema:
|
|
166
|
+
db: str | None,
|
|
167
|
+
modules: str | None,
|
|
168
|
+
name: str | None,
|
|
169
|
+
schema: str | None,
|
|
163
170
|
):
|
|
164
171
|
"""Init an instance."""
|
|
165
172
|
from lamindb_setup._init_instance import init as init_
|
|
@@ -182,7 +189,8 @@ def connect(instance: str):
|
|
|
182
189
|
{attr}`~lamindb.setup.core.SetupSettings.auto_connect` to `True` so that you
|
|
183
190
|
auto-connect in a Python session upon importing `lamindb`.
|
|
184
191
|
"""
|
|
185
|
-
from lamindb_setup import
|
|
192
|
+
from lamindb_setup import connect as connect_
|
|
193
|
+
from lamindb_setup import settings as settings_
|
|
186
194
|
|
|
187
195
|
settings_.auto_connect = True
|
|
188
196
|
return connect_(instance, _reload_lamindb=False)
|
|
@@ -217,7 +225,7 @@ def info(schema: bool):
|
|
|
217
225
|
# fmt: off
|
|
218
226
|
@main.command()
|
|
219
227
|
@click.argument("instance", type=str, default=None)
|
|
220
|
-
@click.option("--force", is_flag=True, default=False, help="Do not ask for confirmation.")
|
|
228
|
+
@click.option("--force", is_flag=True, default=False, help="Do not ask for confirmation.")
|
|
221
229
|
# fmt: on
|
|
222
230
|
def delete(instance: str, force: bool = False):
|
|
223
231
|
"""Delete an entity.
|
|
@@ -236,7 +244,7 @@ def delete(instance: str, force: bool = False):
|
|
|
236
244
|
@click.option(
|
|
237
245
|
"--with-env", is_flag=True, help="Also return the environment for a tranform."
|
|
238
246
|
)
|
|
239
|
-
def load(entity: str, uid: str = None, key: str = None, with_env: bool = False):
|
|
247
|
+
def load(entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False):
|
|
240
248
|
"""Load a file or folder.
|
|
241
249
|
|
|
242
250
|
Pass a URL, `artifact`, or `transform`. For example:
|
|
@@ -252,7 +260,8 @@ def load(entity: str, uid: str = None, key: str = None, with_env: bool = False):
|
|
|
252
260
|
"""
|
|
253
261
|
is_slug = entity.count("/") == 1
|
|
254
262
|
if is_slug:
|
|
255
|
-
from lamindb_setup import
|
|
263
|
+
from lamindb_setup import connect
|
|
264
|
+
from lamindb_setup import settings as settings_
|
|
256
265
|
|
|
257
266
|
# can decide whether we want to actually deprecate
|
|
258
267
|
# click.echo(
|
|
@@ -270,18 +279,31 @@ def load(entity: str, uid: str = None, key: str = None, with_env: bool = False):
|
|
|
270
279
|
@click.argument("entity", type=str)
|
|
271
280
|
@click.option("--uid", help="The uid for the entity.")
|
|
272
281
|
@click.option("--key", help="The key for the entity.")
|
|
273
|
-
|
|
274
|
-
"--with-env", is_flag=True, help="Also return the environment for a tranform."
|
|
275
|
-
)
|
|
276
|
-
def get(entity: str, uid: str = None, key: str = None, with_env: bool = False):
|
|
282
|
+
def get(entity: str, uid: str | None = None, key: str | None = None):
|
|
277
283
|
"""Query metadata about an entity.
|
|
278
284
|
|
|
279
|
-
Currently only works for artifact
|
|
285
|
+
Currently only works for artifact.
|
|
280
286
|
"""
|
|
281
|
-
|
|
287
|
+
import lamindb_setup as ln_setup
|
|
282
288
|
|
|
283
|
-
|
|
284
|
-
|
|
289
|
+
from ._load import decompose_url
|
|
290
|
+
|
|
291
|
+
if entity.startswith("https://") and "lamin" in entity:
|
|
292
|
+
url = entity
|
|
293
|
+
instance, entity, uid = decompose_url(url)
|
|
294
|
+
elif entity not in {"artifact"}:
|
|
295
|
+
raise SystemExit("Entity has to be a laminhub URL or 'artifact'")
|
|
296
|
+
else:
|
|
297
|
+
instance = ln_setup.settings.instance.slug
|
|
298
|
+
|
|
299
|
+
ln_setup.connect(instance)
|
|
300
|
+
import lamindb as ln
|
|
301
|
+
|
|
302
|
+
if uid is not None:
|
|
303
|
+
artifact = ln.Artifact.get(uid)
|
|
304
|
+
else:
|
|
305
|
+
artifact = ln.Artifact.get(key=key)
|
|
306
|
+
artifact.describe()
|
|
285
307
|
|
|
286
308
|
|
|
287
309
|
@main.command()
|
|
@@ -315,7 +337,7 @@ def _generate_help():
|
|
|
315
337
|
out: dict[str, dict[str, str | None]] = {}
|
|
316
338
|
|
|
317
339
|
def recursive_help(
|
|
318
|
-
cmd: Command, parent:
|
|
340
|
+
cmd: Command, parent: Context | None = None, name: tuple[str, ...] = ()
|
|
319
341
|
):
|
|
320
342
|
ctx = click.Context(cmd, info_name=cmd.name, parent=parent)
|
|
321
343
|
assert cmd.name
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import shutil
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from lamin_utils import logger
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def decompose_url(url: str) -> tuple[str, str, str]:
|
|
11
|
+
assert any(keyword in url for keyword in ["transform", "artifact", "collection"])
|
|
12
|
+
for entity in ["transform", "artifact", "collection"]:
|
|
13
|
+
if entity in url:
|
|
14
|
+
break
|
|
15
|
+
uid = url.split(f"{entity}/")[1]
|
|
16
|
+
instance_slug = "/".join(url.split("/")[3:5])
|
|
17
|
+
return instance_slug, entity, uid
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def load(
|
|
21
|
+
entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False
|
|
22
|
+
):
|
|
23
|
+
import lamindb_setup as ln_setup
|
|
24
|
+
|
|
25
|
+
if entity.startswith("https://") and "lamin" in entity:
|
|
26
|
+
url = entity
|
|
27
|
+
instance, entity, uid = decompose_url(url)
|
|
28
|
+
elif entity not in {"artifact", "transform", "collection"}:
|
|
29
|
+
raise SystemExit(
|
|
30
|
+
"Entity has to be a laminhub URL or 'artifact', 'collection', or 'transform'"
|
|
31
|
+
)
|
|
32
|
+
else:
|
|
33
|
+
instance = ln_setup.settings.instance.slug
|
|
34
|
+
|
|
35
|
+
ln_setup.connect(instance)
|
|
36
|
+
import lamindb as ln
|
|
37
|
+
|
|
38
|
+
def script_to_notebook(
|
|
39
|
+
transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
|
|
40
|
+
) -> None:
|
|
41
|
+
import jupytext
|
|
42
|
+
from lamin_utils._base62 import increment_base62
|
|
43
|
+
|
|
44
|
+
if notebook_path.suffix == ".ipynb":
|
|
45
|
+
# below is backward compat
|
|
46
|
+
if "# # transform.name" in transform.source_code:
|
|
47
|
+
new_content = transform.source_code.replace(
|
|
48
|
+
"# # transform.name", f"# # {transform.description}"
|
|
49
|
+
)
|
|
50
|
+
elif transform.source_code.startswith("# %% [markdown]"):
|
|
51
|
+
source_code_split = transform.source_code.split("\n")
|
|
52
|
+
if source_code_split[1] == "#":
|
|
53
|
+
source_code_split[1] = f"# # {transform.description}"
|
|
54
|
+
new_content = "\n".join(source_code_split)
|
|
55
|
+
else:
|
|
56
|
+
new_content = transform.source_code
|
|
57
|
+
else: # R notebook
|
|
58
|
+
# Pattern to match title only within YAML header section
|
|
59
|
+
title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
|
|
60
|
+
title_match = re.search(
|
|
61
|
+
title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
|
|
62
|
+
)
|
|
63
|
+
new_content = transform.source_code
|
|
64
|
+
if title_match:
|
|
65
|
+
current_title = title_match.group(1)
|
|
66
|
+
if current_title != transform.description:
|
|
67
|
+
pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
|
|
68
|
+
replacement = f'\\1"{transform.description}"\\3'
|
|
69
|
+
new_content = re.sub(
|
|
70
|
+
pattern,
|
|
71
|
+
replacement,
|
|
72
|
+
new_content,
|
|
73
|
+
flags=re.DOTALL | re.MULTILINE,
|
|
74
|
+
)
|
|
75
|
+
logger.important(
|
|
76
|
+
f"updated title to match description: {current_title} →"
|
|
77
|
+
f" {transform.description}"
|
|
78
|
+
)
|
|
79
|
+
if bump_revision:
|
|
80
|
+
uid = transform.uid
|
|
81
|
+
if uid in new_content:
|
|
82
|
+
new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
|
|
83
|
+
new_content = new_content.replace(uid, new_uid)
|
|
84
|
+
logger.important(f"updated uid: {uid} → {new_uid}")
|
|
85
|
+
if notebook_path.suffix == ".ipynb":
|
|
86
|
+
notebook = jupytext.reads(new_content, fmt="py:percent")
|
|
87
|
+
jupytext.write(notebook, notebook_path)
|
|
88
|
+
else:
|
|
89
|
+
notebook_path.write_text(new_content)
|
|
90
|
+
|
|
91
|
+
query_by_uid = uid is not None
|
|
92
|
+
|
|
93
|
+
match entity:
|
|
94
|
+
case "transform":
|
|
95
|
+
if query_by_uid:
|
|
96
|
+
# we don't use .get here because DoesNotExist is hard to catch
|
|
97
|
+
# due to private django API
|
|
98
|
+
# here full uid is not expected anymore as before
|
|
99
|
+
# via ln.Transform.objects.get(uid=uid)
|
|
100
|
+
transforms = ln.Transform.objects.filter(uid__startswith=uid)
|
|
101
|
+
else:
|
|
102
|
+
# if below, we take is_latest=True as the criterion, we might get draft notebooks
|
|
103
|
+
# hence, we use source_code__isnull=False and order by created_at instead
|
|
104
|
+
transforms = ln.Transform.objects.filter(
|
|
105
|
+
key=key, source_code__isnull=False
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
if (n_transforms := len(transforms)) == 0:
|
|
109
|
+
err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
|
|
110
|
+
raise SystemExit(f"Transform with {err_msg} does not exist.")
|
|
111
|
+
|
|
112
|
+
if n_transforms > 1:
|
|
113
|
+
transforms = transforms.order_by("-created_at")
|
|
114
|
+
transform = transforms.first()
|
|
115
|
+
|
|
116
|
+
target_relpath = Path(transform.key)
|
|
117
|
+
if len(target_relpath.parents) > 1:
|
|
118
|
+
logger.important(
|
|
119
|
+
"preserve the folder structure for versioning:"
|
|
120
|
+
f" {target_relpath.parent}/"
|
|
121
|
+
)
|
|
122
|
+
target_relpath.parent.mkdir(parents=True, exist_ok=True)
|
|
123
|
+
if target_relpath.exists():
|
|
124
|
+
response = input(f"! {target_relpath} exists: replace? (y/n)")
|
|
125
|
+
if response != "y":
|
|
126
|
+
raise SystemExit("Aborted.")
|
|
127
|
+
|
|
128
|
+
if transform.source_code is not None:
|
|
129
|
+
if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
|
|
130
|
+
script_to_notebook(transform, target_relpath, bump_revision=True)
|
|
131
|
+
else:
|
|
132
|
+
target_relpath.write_text(transform.source_code)
|
|
133
|
+
else:
|
|
134
|
+
raise SystemExit("No source code available for this transform.")
|
|
135
|
+
|
|
136
|
+
logger.important(f"{transform.type} is here: {target_relpath}")
|
|
137
|
+
|
|
138
|
+
if with_env:
|
|
139
|
+
ln.settings.track_run_inputs = False
|
|
140
|
+
if (
|
|
141
|
+
transform.latest_run is not None
|
|
142
|
+
and transform.latest_run.environment is not None
|
|
143
|
+
):
|
|
144
|
+
filepath_env_cache = transform.latest_run.environment.cache()
|
|
145
|
+
target_env_filename = (
|
|
146
|
+
target_relpath.parent
|
|
147
|
+
/ f"{target_relpath.stem}__requirements.txt"
|
|
148
|
+
)
|
|
149
|
+
shutil.move(filepath_env_cache, target_env_filename)
|
|
150
|
+
logger.important(f"environment is here: {target_env_filename}")
|
|
151
|
+
else:
|
|
152
|
+
logger.warning(
|
|
153
|
+
"latest transform run with environment doesn't exist"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
return target_relpath
|
|
157
|
+
case "artifact" | "collection":
|
|
158
|
+
ln.settings.track_run_inputs = False
|
|
159
|
+
|
|
160
|
+
EntityClass = ln.Artifact if entity == "artifact" else ln.Collection
|
|
161
|
+
|
|
162
|
+
# we don't use .get here because DoesNotExist is hard to catch
|
|
163
|
+
# due to private django API
|
|
164
|
+
if query_by_uid:
|
|
165
|
+
entities = EntityClass.filter(uid__startswith=uid)
|
|
166
|
+
else:
|
|
167
|
+
entities = EntityClass.filter(key=key)
|
|
168
|
+
|
|
169
|
+
if (n_entities := len(entities)) == 0:
|
|
170
|
+
err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
|
|
171
|
+
raise SystemExit(
|
|
172
|
+
f"{entity.capitalize()} with {err_msg} does not exist."
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if n_entities > 1:
|
|
176
|
+
entities = entities.order_by("-created_at")
|
|
177
|
+
|
|
178
|
+
entity_obj = entities.first()
|
|
179
|
+
cache_path = entity_obj.cache()
|
|
180
|
+
|
|
181
|
+
# collection gives us a list of paths
|
|
182
|
+
if isinstance(cache_path, list):
|
|
183
|
+
logger.important(f"{entity} paths ({len(cache_path)} files):")
|
|
184
|
+
for i, path in enumerate(cache_path):
|
|
185
|
+
if i < 5 or i >= len(cache_path) - 5:
|
|
186
|
+
logger.important(f" [{i + 1}/{len(cache_path)}] {path}")
|
|
187
|
+
elif i == 5:
|
|
188
|
+
logger.important(f" ... {len(cache_path) - 10} more files ...")
|
|
189
|
+
else:
|
|
190
|
+
logger.important(f"{entity} is here: {cache_path}")
|
|
191
|
+
case _:
|
|
192
|
+
raise AssertionError(f"unknown entity {entity}")
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
2
3
|
import os
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
@@ -34,9 +35,9 @@ def deploy():
|
|
|
34
35
|
@click.option("--end-number", type=str, default=None)
|
|
35
36
|
@click.option("--start-number", type=str, default=None)
|
|
36
37
|
def squash(
|
|
37
|
-
package_name:
|
|
38
|
-
end_number:
|
|
39
|
-
start_number:
|
|
38
|
+
package_name: str | None,
|
|
39
|
+
end_number: str | None,
|
|
40
|
+
start_number: str | None,
|
|
40
41
|
):
|
|
41
42
|
"""Squash migrations."""
|
|
42
43
|
from lamindb_setup._migrate import migrate
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import sys
|
|
2
5
|
from pathlib import Path
|
|
3
|
-
|
|
6
|
+
|
|
4
7
|
from lamin_utils import logger
|
|
5
|
-
import re
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
def parse_uid_from_code(content: str, suffix: str) -> str | None:
|
|
@@ -41,7 +43,7 @@ def parse_uid_from_code(content: str, suffix: str) -> str | None:
|
|
|
41
43
|
|
|
42
44
|
|
|
43
45
|
def save_from_filepath_cli(
|
|
44
|
-
filepath:
|
|
46
|
+
filepath: str | Path,
|
|
45
47
|
key: str | None,
|
|
46
48
|
description: str | None,
|
|
47
49
|
registry: str | None,
|
|
@@ -57,13 +59,16 @@ def save_from_filepath_cli(
|
|
|
57
59
|
ln_setup.settings.auto_connect = True
|
|
58
60
|
|
|
59
61
|
import lamindb as ln
|
|
62
|
+
|
|
63
|
+
if not ln.setup.core.django.IS_SETUP:
|
|
64
|
+
sys.exit(-1)
|
|
60
65
|
from lamindb._finish import save_context_core
|
|
61
66
|
|
|
62
67
|
ln_setup.settings.auto_connect = auto_connect_state
|
|
63
68
|
|
|
64
69
|
suffixes_transform = {
|
|
65
|
-
"py":
|
|
66
|
-
"R":
|
|
70
|
+
"py": {".py", ".ipynb"},
|
|
71
|
+
"R": {".R", ".qmd", ".Rmd"},
|
|
67
72
|
}
|
|
68
73
|
|
|
69
74
|
if filepath.suffix in {".qmd", ".Rmd"}:
|
|
@@ -80,8 +85,8 @@ def save_from_filepath_cli(
|
|
|
80
85
|
and filepath.with_suffix(".nb.html").exists()
|
|
81
86
|
):
|
|
82
87
|
raise SystemExit(
|
|
83
|
-
f
|
|
84
|
-
f
|
|
88
|
+
f"Please delete one of\n - {filepath.with_suffix('.html')}\n -"
|
|
89
|
+
f" {filepath.with_suffix('.nb.html')}"
|
|
85
90
|
)
|
|
86
91
|
|
|
87
92
|
if registry is None:
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["flit_core >=3.2,<4"]
|
|
3
|
+
build-backend = "flit_core.buildapi"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "lamin_cli"
|
|
7
|
+
authors = [{name = "Lamin Labs", email = "open-source@lamin.ai"}]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
dynamic = ["version", "description"]
|
|
10
|
+
dependencies = [
|
|
11
|
+
"rich-click>=1.7",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.urls]
|
|
15
|
+
Home = "https://github.com/laminlabs/lamin-cli"
|
|
16
|
+
|
|
17
|
+
[project.scripts]
|
|
18
|
+
lamin = "lamin_cli.__main__:main"
|
|
19
|
+
|
|
20
|
+
[tool.ruff]
|
|
21
|
+
src = ["src"]
|
|
22
|
+
line-length = 88
|
|
23
|
+
lint.select = [
|
|
24
|
+
"F", # Errors detected by Pyflakes
|
|
25
|
+
"E", # Error detected by Pycodestyle
|
|
26
|
+
"W", # Warning detected by Pycodestyle
|
|
27
|
+
"I", # isort
|
|
28
|
+
"D", # pydocstyle
|
|
29
|
+
"B", # flake8-bugbear
|
|
30
|
+
"TID", # flake8-tidy-imports
|
|
31
|
+
"C4", # flake8-comprehensions
|
|
32
|
+
"BLE", # flake8-blind-except
|
|
33
|
+
"UP", # pyupgrade
|
|
34
|
+
"RUF100", # Report unused noqa directives
|
|
35
|
+
"TCH", # Typing imports
|
|
36
|
+
"NPY", # Numpy specific rules
|
|
37
|
+
"PTH" # Use pathlib
|
|
38
|
+
]
|
|
39
|
+
lint.ignore = [
|
|
40
|
+
# Do not catch blind exception: `Exception`
|
|
41
|
+
"BLE001",
|
|
42
|
+
# Errors from function calls in argument defaults. These are fine when the result is immutable.
|
|
43
|
+
"B008",
|
|
44
|
+
# line too long -> we accept long comment lines; black gets rid of long code lines
|
|
45
|
+
"E501",
|
|
46
|
+
# Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
|
|
47
|
+
"E731",
|
|
48
|
+
# allow I, O, l as variable names -> I is the identity matrix
|
|
49
|
+
"E741",
|
|
50
|
+
# Missing docstring in public module
|
|
51
|
+
"D100",
|
|
52
|
+
# undocumented-public-class
|
|
53
|
+
"D101",
|
|
54
|
+
# Missing docstring in public method
|
|
55
|
+
"D102",
|
|
56
|
+
# Missing docstring in public function
|
|
57
|
+
"D103",
|
|
58
|
+
# Missing docstring in public package
|
|
59
|
+
"D104",
|
|
60
|
+
# __magic__ methods are are often self-explanatory, allow missing docstrings
|
|
61
|
+
"D105",
|
|
62
|
+
# Missing docstring in public nested class
|
|
63
|
+
"D106",
|
|
64
|
+
# Missing docstring in __init__
|
|
65
|
+
"D107",
|
|
66
|
+
## Disable one in each pair of mutually incompatible rules
|
|
67
|
+
# We don’t want a blank line before a class docstring
|
|
68
|
+
"D203",
|
|
69
|
+
# 1 blank line required after class docstring
|
|
70
|
+
"D204",
|
|
71
|
+
# first line should end with a period [Bug: doesn't work with single-line docstrings]
|
|
72
|
+
# We want docstrings to start immediately after the opening triple quote
|
|
73
|
+
"D213",
|
|
74
|
+
# Section underline is over-indented ("{name}")
|
|
75
|
+
"D215",
|
|
76
|
+
# First line should end with a period
|
|
77
|
+
"D400",
|
|
78
|
+
# First line should be in imperative mood; try rephrasing
|
|
79
|
+
"D401",
|
|
80
|
+
# First word of the first line should be capitalized: {} -> {}
|
|
81
|
+
"D403",
|
|
82
|
+
# First word of the docstring should not be "This"
|
|
83
|
+
"D404",
|
|
84
|
+
# Section name should end with a newline ("{name}")
|
|
85
|
+
"D406",
|
|
86
|
+
# Missing dashed underline after section ("{name}")
|
|
87
|
+
"D407",
|
|
88
|
+
# Section underline should be in the line following the section's name ("{name}")
|
|
89
|
+
"D408",
|
|
90
|
+
# Section underline should match the length of its name ("{name}")
|
|
91
|
+
"D409",
|
|
92
|
+
# No blank lines allowed between a section header and its content ("{name}")
|
|
93
|
+
"D412",
|
|
94
|
+
# Missing blank line after last section ("{name}")
|
|
95
|
+
"D413",
|
|
96
|
+
# Missing argument description
|
|
97
|
+
"D417",
|
|
98
|
+
# Imports unused
|
|
99
|
+
"F401",
|
|
100
|
+
# camcelcase imported as lowercase
|
|
101
|
+
"N813",
|
|
102
|
+
# module import not at top level of file
|
|
103
|
+
"E402",
|
|
104
|
+
# open()` should be replaced by `Path.open()
|
|
105
|
+
"PTH123",
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
[tool.ruff.lint.pydocstyle]
|
|
109
|
+
convention = "google"
|
|
110
|
+
|
|
111
|
+
[tool.ruff.lint.per-file-ignores]
|
|
112
|
+
"docs/*" = ["I", "B018", "B017"]
|
|
113
|
+
"tests/*" = ["D"]
|
|
114
|
+
"*/__init__.py" = ["F401"]
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
from lamin_cli._load import decompose_url
|
|
2
|
-
from pathlib import Path
|
|
3
1
|
import subprocess
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from lamin_cli._load import decompose_url
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def test_decompose_url():
|
|
7
8
|
urls = [
|
|
8
|
-
"https://lamin.ai/laminlabs/arrayloader-benchmarks/transform/1GCKs8zLtkc85zKv",
|
|
9
|
-
"https://lamin.company.com/laminlabs/arrayloader-benchmarks/transform/1GCKs8zLtkc85zKv",
|
|
9
|
+
"https://lamin.ai/laminlabs/arrayloader-benchmarks/transform/1GCKs8zLtkc85zKv",
|
|
10
|
+
"https://lamin.company.com/laminlabs/arrayloader-benchmarks/transform/1GCKs8zLtkc85zKv",
|
|
10
11
|
]
|
|
11
12
|
for url in urls:
|
|
12
13
|
result = decompose_url(url)
|
|
@@ -23,7 +24,7 @@ def test_load_transform():
|
|
|
23
24
|
result = subprocess.run(
|
|
24
25
|
"lamin load"
|
|
25
26
|
" 'https://lamin.ai/laminlabs/lamin-dev/transform/VFYCIuaw2GsX0000'"
|
|
26
|
-
" --with-env",
|
|
27
|
+
" --with-env",
|
|
27
28
|
shell=True,
|
|
28
29
|
capture_output=True,
|
|
29
30
|
)
|
|
@@ -59,10 +60,18 @@ def test_load_transform():
|
|
|
59
60
|
path2.unlink()
|
|
60
61
|
|
|
61
62
|
|
|
62
|
-
def
|
|
63
|
+
def test_get_load_artifact():
|
|
64
|
+
result = subprocess.run(
|
|
65
|
+
"lamin get"
|
|
66
|
+
" 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
|
|
67
|
+
shell=True,
|
|
68
|
+
capture_output=True,
|
|
69
|
+
)
|
|
70
|
+
assert result.returncode == 0
|
|
71
|
+
|
|
63
72
|
result = subprocess.run(
|
|
64
73
|
"lamin load"
|
|
65
|
-
" 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
|
|
74
|
+
" 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
|
|
66
75
|
shell=True,
|
|
67
76
|
capture_output=True,
|
|
68
77
|
)
|
|
@@ -75,3 +84,12 @@ def test_load_artifact():
|
|
|
75
84
|
capture_output=True,
|
|
76
85
|
)
|
|
77
86
|
assert result.returncode == 0
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def test_load_collection():
|
|
90
|
+
result = subprocess.run(
|
|
91
|
+
"lamin load 'https://lamin.ai/laminlabs/lamindata/collection/2wUs6V1OuGzp5Ll4'",
|
|
92
|
+
shell=True,
|
|
93
|
+
capture_output=True,
|
|
94
|
+
)
|
|
95
|
+
assert result.returncode == 0
|
|
@@ -112,6 +112,6 @@ def test_r_track_pattern():
|
|
|
112
112
|
for suffix in suffixes:
|
|
113
113
|
for content, expected_uid in valid_cases:
|
|
114
114
|
uid = parse_uid_from_code(content, suffix)
|
|
115
|
-
assert (
|
|
116
|
-
|
|
117
|
-
)
|
|
115
|
+
assert uid == expected_uid, (
|
|
116
|
+
f"Failed for valid content with {suffix}: {content}"
|
|
117
|
+
)
|
|
@@ -1,12 +1,13 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
3
|
import subprocess
|
|
3
4
|
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import lamindb as ln
|
|
4
7
|
import nbproject_test
|
|
5
8
|
import pytest
|
|
6
|
-
from nbproject.dev import read_notebook, write_notebook
|
|
7
9
|
from nbclient.exceptions import CellExecutionError
|
|
8
|
-
import
|
|
9
|
-
import lamindb as ln
|
|
10
|
+
from nbproject.dev import read_notebook, write_notebook
|
|
10
11
|
|
|
11
12
|
notebook_dir = "./sub/lamin-cli/tests/notebooks/"
|
|
12
13
|
|
|
@@ -167,13 +168,13 @@ print("my consecutive cell")
|
|
|
167
168
|
# get the the source code via command line
|
|
168
169
|
result = subprocess.run(
|
|
169
170
|
"yes | lamin load"
|
|
170
|
-
f" https://lamin.ai/{ln.setup.settings.user.handle}/laminci-unit-tests/transform/hlsFXswrJjtt0000",
|
|
171
|
+
f" https://lamin.ai/{ln.setup.settings.user.handle}/laminci-unit-tests/transform/hlsFXswrJjtt0000",
|
|
171
172
|
shell=True,
|
|
172
173
|
capture_output=True,
|
|
173
174
|
)
|
|
174
175
|
# print(result.stderr.decode())
|
|
175
176
|
assert Path("./with-title-and-initialized-consecutive.ipynb").exists()
|
|
176
|
-
with open("./with-title-and-initialized-consecutive.ipynb"
|
|
177
|
+
with open("./with-title-and-initialized-consecutive.ipynb") as f:
|
|
177
178
|
json_notebook = json.load(f)
|
|
178
179
|
print(json_notebook["cells"][0])
|
|
179
180
|
assert json_notebook["cells"][0]["source"] == ["# My test notebook (consecutive)"]
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
import subprocess
|
|
3
1
|
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
4
5
|
import lamindb as ln
|
|
5
6
|
from lamindb_setup import settings
|
|
6
7
|
|
|
7
|
-
|
|
8
8
|
scripts_dir = Path(__file__).parent.resolve() / "scripts"
|
|
9
9
|
|
|
10
10
|
|
|
@@ -45,17 +45,38 @@ def test_run_save_cache_with_git_and_uid():
|
|
|
45
45
|
shell=True,
|
|
46
46
|
capture_output=True,
|
|
47
47
|
)
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
print(result.stdout.decode())
|
|
49
|
+
print(result.stderr.decode())
|
|
50
50
|
assert result.returncode == 0
|
|
51
51
|
assert "created Transform" in result.stdout.decode()
|
|
52
52
|
assert "m5uCHTTp" in result.stdout.decode()
|
|
53
53
|
assert "started new Run" in result.stdout.decode()
|
|
54
54
|
|
|
55
55
|
transform = ln.Transform.get("m5uCHTTpJnjQ")
|
|
56
|
-
assert transform.hash == "
|
|
56
|
+
assert transform.hash == "VC1oTPcaVSrzNrXUT9p4qw"
|
|
57
57
|
assert transform.latest_run.environment.path.exists()
|
|
58
58
|
|
|
59
|
+
assert (
|
|
60
|
+
transform.source_code
|
|
61
|
+
== """import lamindb as ln
|
|
62
|
+
|
|
63
|
+
ln.settings.sync_git_repo = "https://github.com/laminlabs/lamin-cli"
|
|
64
|
+
ln.context.description = "My good script"
|
|
65
|
+
ln.track("m5uCHTTpJnjQ0000")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
# we're using new_run here to mock the notebook situation
|
|
70
|
+
# and cover the look up of an existing run in the tests
|
|
71
|
+
# new_run = True is trivial
|
|
72
|
+
ln.track(new_run=False)
|
|
73
|
+
|
|
74
|
+
print("hello!")
|
|
75
|
+
|
|
76
|
+
ln.finish()
|
|
77
|
+
"""
|
|
78
|
+
)
|
|
79
|
+
|
|
59
80
|
# you can rerun the same script
|
|
60
81
|
result = subprocess.run(
|
|
61
82
|
f"python {filepath}",
|
|
@@ -160,7 +181,7 @@ def test_run_save_cache_with_git_and_uid():
|
|
|
160
181
|
# try to get the the source code via command line
|
|
161
182
|
result = subprocess.run(
|
|
162
183
|
"yes | lamin load"
|
|
163
|
-
f" https://lamin.ai/{settings.user.handle}/laminci-unit-tests/transform/m5uCHTTpJnjQ0000",
|
|
184
|
+
f" https://lamin.ai/{settings.user.handle}/laminci-unit-tests/transform/m5uCHTTpJnjQ0000",
|
|
164
185
|
shell=True,
|
|
165
186
|
capture_output=True,
|
|
166
187
|
)
|
|
@@ -168,7 +189,7 @@ def test_run_save_cache_with_git_and_uid():
|
|
|
168
189
|
assert result.returncode == 0
|
|
169
190
|
|
|
170
191
|
result = subprocess.run(
|
|
171
|
-
f"yes | lamin load transform --key {filepath.name}",
|
|
192
|
+
f"yes | lamin load transform --key {filepath.name}",
|
|
172
193
|
shell=True,
|
|
173
194
|
capture_output=True,
|
|
174
195
|
)
|
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
repos:
|
|
2
|
-
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
-
rev: v3.2.0
|
|
4
|
-
hooks:
|
|
5
|
-
- id: trailing-whitespace
|
|
6
|
-
- id: end-of-file-fixer
|
|
7
|
-
exclude: |
|
|
8
|
-
(?x)(
|
|
9
|
-
.github/workflows/latest-changes.jinja2
|
|
10
|
-
)
|
|
11
|
-
- id: check-yaml
|
|
12
|
-
- id: check-added-large-files
|
|
13
|
-
- repo: https://github.com/psf/black
|
|
14
|
-
rev: 22.6.0
|
|
15
|
-
hooks:
|
|
16
|
-
- id: black-jupyter
|
|
17
|
-
- repo: https://github.com/pycqa/flake8
|
|
18
|
-
rev: 4.0.1
|
|
19
|
-
hooks:
|
|
20
|
-
- id: flake8
|
|
21
|
-
additional_dependencies:
|
|
22
|
-
- flake8-black==0.3.3
|
|
23
|
-
- flake8-typing-imports==1.10.0
|
|
24
|
-
language_version: python3
|
|
25
|
-
args:
|
|
26
|
-
- --max-line-length=120
|
|
27
|
-
- --ignore=E203,W503,BLK100,TYP001
|
|
28
|
-
exclude: |
|
|
29
|
-
(?x)(
|
|
30
|
-
__init__.py
|
|
31
|
-
)
|
|
32
|
-
- repo: https://github.com/pre-commit/mirrors-prettier
|
|
33
|
-
rev: v2.6.2
|
|
34
|
-
hooks:
|
|
35
|
-
- id: prettier
|
|
36
|
-
- repo: https://github.com/kynan/nbstripout
|
|
37
|
-
rev: 0.3.9
|
|
38
|
-
hooks:
|
|
39
|
-
- id: nbstripout
|
|
40
|
-
exclude: |
|
|
41
|
-
(?x)(
|
|
42
|
-
docs/examples/|
|
|
43
|
-
docs/notes/|
|
|
44
|
-
tests
|
|
45
|
-
)
|
|
46
|
-
- repo: https://github.com/Lucas-C/pre-commit-hooks
|
|
47
|
-
rev: v1.1.9
|
|
48
|
-
hooks:
|
|
49
|
-
- id: forbid-crlf
|
|
50
|
-
- id: remove-crlf
|
|
51
|
-
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
52
|
-
rev: v0.940
|
|
53
|
-
hooks:
|
|
54
|
-
- id: mypy
|
|
55
|
-
exclude: |
|
|
56
|
-
(?x)(
|
|
57
|
-
tests/hub-local/conftest.py
|
|
58
|
-
)
|
|
59
|
-
- repo: https://github.com/pycqa/pydocstyle
|
|
60
|
-
rev: 6.1.1
|
|
61
|
-
hooks:
|
|
62
|
-
- id: pydocstyle
|
|
63
|
-
args: # google style + __init__, see http://www.pydocstyle.org/en/stable/error_codes.html
|
|
64
|
-
- --ignore=D100,D101,D102,D103,D104,D106,D107,D203,D204,D213,D215,D400,D401,D403,D404,D406,D407,D408,D409,D412,D413
|
|
@@ -1,164 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
from typing import Tuple
|
|
3
|
-
from lamin_utils import logger
|
|
4
|
-
import shutil
|
|
5
|
-
import re
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def decompose_url(url: str) -> Tuple[str, str, str]:
|
|
10
|
-
assert "transform" in url or "artifact" in url
|
|
11
|
-
for entity in ["transform", "artifact"]:
|
|
12
|
-
if entity in url:
|
|
13
|
-
break
|
|
14
|
-
uid = url.split(f"{entity}/")[1]
|
|
15
|
-
instance_slug = "/".join(url.split("/")[3:5])
|
|
16
|
-
return instance_slug, entity, uid
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def load(entity: str, uid: str = None, key: str = None, with_env: bool = False):
|
|
20
|
-
import lamindb_setup as ln_setup
|
|
21
|
-
|
|
22
|
-
if entity.startswith("https://") and "lamin" in entity:
|
|
23
|
-
url = entity
|
|
24
|
-
instance, entity, uid = decompose_url(url)
|
|
25
|
-
elif entity not in {"artifact", "transform"}:
|
|
26
|
-
raise SystemExit("Entity has to be a laminhub URL or 'artifact' or 'transform'")
|
|
27
|
-
else:
|
|
28
|
-
instance = ln_setup.settings.instance.slug
|
|
29
|
-
|
|
30
|
-
ln_setup.connect(instance)
|
|
31
|
-
import lamindb as ln
|
|
32
|
-
|
|
33
|
-
def script_to_notebook(
|
|
34
|
-
transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
|
|
35
|
-
) -> None:
|
|
36
|
-
import jupytext
|
|
37
|
-
from lamin_utils._base62 import increment_base62
|
|
38
|
-
|
|
39
|
-
if notebook_path.suffix == ".ipynb":
|
|
40
|
-
# below is backward compat
|
|
41
|
-
if "# # transform.name" in transform.source_code:
|
|
42
|
-
new_content = transform.source_code.replace(
|
|
43
|
-
"# # transform.name", f"# # {transform.description}"
|
|
44
|
-
)
|
|
45
|
-
elif transform.source_code.startswith("# %% [markdown]\n#\n"):
|
|
46
|
-
new_content = transform.source_code.replace(
|
|
47
|
-
"# %% [markdown]\n#\n",
|
|
48
|
-
f"# %% [markdown]\n# # {transform.description}\n",
|
|
49
|
-
)
|
|
50
|
-
else: # R notebook
|
|
51
|
-
# Pattern to match title only within YAML header section
|
|
52
|
-
title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
|
|
53
|
-
title_match = re.search(
|
|
54
|
-
title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
|
|
55
|
-
)
|
|
56
|
-
new_content = transform.source_code
|
|
57
|
-
if title_match:
|
|
58
|
-
current_title = title_match.group(1)
|
|
59
|
-
if current_title != transform.description:
|
|
60
|
-
pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
|
|
61
|
-
replacement = f'\\1"{transform.description}"\\3'
|
|
62
|
-
new_content = re.sub(
|
|
63
|
-
pattern,
|
|
64
|
-
replacement,
|
|
65
|
-
new_content,
|
|
66
|
-
flags=re.DOTALL | re.MULTILINE,
|
|
67
|
-
)
|
|
68
|
-
logger.important(
|
|
69
|
-
f"updated title to match description: {current_title} →"
|
|
70
|
-
f" {transform.description}"
|
|
71
|
-
)
|
|
72
|
-
if bump_revision:
|
|
73
|
-
uid = transform.uid
|
|
74
|
-
if uid in new_content:
|
|
75
|
-
new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
|
|
76
|
-
new_content = new_content.replace(uid, new_uid)
|
|
77
|
-
logger.important(f"updated uid: {uid} → {new_uid}")
|
|
78
|
-
if notebook_path.suffix == ".ipynb":
|
|
79
|
-
notebook = jupytext.reads(new_content, fmt="py:percent")
|
|
80
|
-
jupytext.write(notebook, notebook_path)
|
|
81
|
-
else:
|
|
82
|
-
notebook_path.write_text(new_content)
|
|
83
|
-
|
|
84
|
-
query_by_uid = uid is not None
|
|
85
|
-
|
|
86
|
-
if entity == "transform":
|
|
87
|
-
if query_by_uid:
|
|
88
|
-
# we don't use .get here because DoesNotExist is hard to catch
|
|
89
|
-
# due to private django API
|
|
90
|
-
# here full uid is not expected anymore as before
|
|
91
|
-
# via ln.Transform.objects.get(uid=uid)
|
|
92
|
-
transforms = ln.Transform.objects.filter(uid__startswith=uid)
|
|
93
|
-
else:
|
|
94
|
-
# if below, we take is_latest=True as the criterion, we might get draft notebooks
|
|
95
|
-
# hence, we use source_code__isnull=False and order by created_at instead
|
|
96
|
-
transforms = ln.Transform.objects.filter(key=key, source_code__isnull=False)
|
|
97
|
-
|
|
98
|
-
if (n_transforms := len(transforms)) == 0:
|
|
99
|
-
err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
|
|
100
|
-
raise SystemExit(f"Transform with {err_msg} does not exist.")
|
|
101
|
-
|
|
102
|
-
if n_transforms > 1:
|
|
103
|
-
transforms = transforms.order_by("-created_at")
|
|
104
|
-
transform = transforms.first()
|
|
105
|
-
|
|
106
|
-
target_relpath = Path(transform.key)
|
|
107
|
-
if len(target_relpath.parents) > 1:
|
|
108
|
-
logger.important(
|
|
109
|
-
"preserve the folder structure for versioning:"
|
|
110
|
-
f" {target_relpath.parent}/"
|
|
111
|
-
)
|
|
112
|
-
target_relpath.parent.mkdir(parents=True, exist_ok=True)
|
|
113
|
-
if target_relpath.exists():
|
|
114
|
-
response = input(f"! {target_relpath} exists: replace? (y/n)")
|
|
115
|
-
if response != "y":
|
|
116
|
-
raise SystemExit("Aborted.")
|
|
117
|
-
|
|
118
|
-
if transform.source_code is not None:
|
|
119
|
-
if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
|
|
120
|
-
script_to_notebook(transform, target_relpath, bump_revision=True)
|
|
121
|
-
else:
|
|
122
|
-
target_relpath.write_text(transform.source_code)
|
|
123
|
-
else:
|
|
124
|
-
raise SystemExit("No source code available for this transform.")
|
|
125
|
-
|
|
126
|
-
logger.important(f"{transform.type} is here: {target_relpath}")
|
|
127
|
-
|
|
128
|
-
if with_env:
|
|
129
|
-
ln.settings.track_run_inputs = False
|
|
130
|
-
if (
|
|
131
|
-
transform.latest_run is not None
|
|
132
|
-
and transform.latest_run.environment is not None
|
|
133
|
-
):
|
|
134
|
-
filepath_env_cache = transform.latest_run.environment.cache()
|
|
135
|
-
target_env_filename = (
|
|
136
|
-
target_relpath.parent / f"{target_relpath.stem}__requirements.txt"
|
|
137
|
-
)
|
|
138
|
-
shutil.move(filepath_env_cache, target_env_filename)
|
|
139
|
-
logger.important(f"environment is here: {target_env_filename}")
|
|
140
|
-
else:
|
|
141
|
-
logger.warning("latest transform run with environment doesn't exist")
|
|
142
|
-
|
|
143
|
-
return target_relpath
|
|
144
|
-
elif entity == "artifact":
|
|
145
|
-
ln.settings.track_run_inputs = False
|
|
146
|
-
|
|
147
|
-
if query_by_uid:
|
|
148
|
-
# we don't use .get here because DoesNotExist is hard to catch
|
|
149
|
-
# due to private django API
|
|
150
|
-
artifacts = ln.Artifact.filter(uid__startswith=uid)
|
|
151
|
-
else:
|
|
152
|
-
artifacts = ln.Artifact.filter(key=key)
|
|
153
|
-
|
|
154
|
-
if (n_artifacts := len(artifacts)) == 0:
|
|
155
|
-
err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
|
|
156
|
-
raise SystemExit(f"Artifact with {err_msg} does not exist.")
|
|
157
|
-
|
|
158
|
-
if n_artifacts > 1:
|
|
159
|
-
artifacts = artifacts.order_by("-created_at")
|
|
160
|
-
artifact = artifacts.first()
|
|
161
|
-
|
|
162
|
-
cache_path = artifact.cache()
|
|
163
|
-
logger.important(f"artifact is here: {cache_path}")
|
|
164
|
-
return cache_path
|
lamin_cli-1.0.7/pyproject.toml
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
[build-system]
|
|
2
|
-
requires = ["flit_core >=3.2,<4"]
|
|
3
|
-
build-backend = "flit_core.buildapi"
|
|
4
|
-
|
|
5
|
-
[project]
|
|
6
|
-
name = "lamin_cli"
|
|
7
|
-
authors = [{name = "Lamin Labs", email = "open-source@lamin.ai"}]
|
|
8
|
-
readme = "README.md"
|
|
9
|
-
dynamic = ["version", "description"]
|
|
10
|
-
dependencies = [
|
|
11
|
-
"rich-click>=1.7",
|
|
12
|
-
]
|
|
13
|
-
|
|
14
|
-
[project.urls]
|
|
15
|
-
Home = "https://github.com/laminlabs/lamin-cli"
|
|
16
|
-
|
|
17
|
-
[project.scripts]
|
|
18
|
-
lamin = "lamin_cli.__main__:main"
|
|
19
|
-
|
|
20
|
-
[tool.black]
|
|
21
|
-
preview = true
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/notebooks/with-title-and-initialized-consecutive.ipynb
RENAMED
|
File without changes
|
{lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/notebooks/with-title-and-initialized-non-consecutive.ipynb
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|