lamin_cli 1.9.0__py2.py3-none-any.whl → 1.11.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamin_cli/__init__.py CHANGED
@@ -2,10 +2,11 @@
2
2
 
3
3
  This is the command line interface for interacting with LaminDB & LaminHub.
4
4
 
5
- The interface is defined in `__main__.py`. The root API here is used by LaminR to replicate the CLI functionality.
5
+ The interface is defined in `__main__.py`.
6
+ The root API here is used by LaminR to replicate the CLI functionality.
6
7
  """
7
8
 
8
- __version__ = "1.9.0"
9
+ __version__ = "1.11.0"
9
10
 
10
11
  from lamindb_setup import disconnect, logout
11
12
  from lamindb_setup._connect_instance import _connect_cli as connect
@@ -15,4 +16,12 @@ from lamindb_setup._setup_user import login
15
16
  from ._delete import delete
16
17
  from ._save import save
17
18
 
18
- __all__ = ["save", "init", "connect", "delete", "login", "disconnect"]
19
+ __all__ = [
20
+ "save",
21
+ "init",
22
+ "connect",
23
+ "delete",
24
+ "login",
25
+ "logout",
26
+ "disconnect",
27
+ ]
lamin_cli/__main__.py CHANGED
@@ -42,13 +42,23 @@ COMMAND_GROUPS = {
42
42
  "name": "Load, save, create & delete data",
43
43
  "commands": ["load", "save", "create", "delete"],
44
44
  },
45
+ {
46
+ "name": "Tracking within shell scripts",
47
+ "commands": ["track", "finish"],
48
+ },
45
49
  {
46
50
  "name": "Describe, annotate & list data",
47
51
  "commands": ["describe", "annotate", "list"],
48
52
  },
49
53
  {
50
54
  "name": "Configure",
51
- "commands": ["checkout", "switch", "cache", "settings", "migrate"],
55
+ "commands": [
56
+ "checkout",
57
+ "switch",
58
+ "cache",
59
+ "settings",
60
+ "migrate",
61
+ ],
52
62
  },
53
63
  {
54
64
  "name": "Auth",
@@ -103,6 +113,7 @@ else:
103
113
  from lamindb_setup._silence_loggers import silence_loggers
104
114
 
105
115
  from lamin_cli._cache import cache
116
+ from lamin_cli._io import io
106
117
  from lamin_cli._migration import migrate
107
118
  from lamin_cli._settings import settings
108
119
 
@@ -294,54 +305,60 @@ def info(schema: bool):
294
305
  @click.option("--name", type=str, default=None)
295
306
  @click.option("--uid", type=str, default=None)
296
307
  @click.option("--slug", type=str, default=None)
308
+ @click.option("--permanent", is_flag=True, default=None, help="Permanently delete the entity where applicable, e.g., for artifact, transform, collection.")
297
309
  @click.option("--force", is_flag=True, default=False, help="Do not ask for confirmation (only relevant for instance).")
298
310
  # fmt: on
299
- def delete(entity: str, name: str | None = None, uid: str | None = None, slug: str | None = None, force: bool = False):
311
+ def delete(entity: str, name: str | None = None, uid: str | None = None, slug: str | None = None, permanent: bool | None = None, force: bool = False):
300
312
  """Delete an entity.
301
313
 
302
314
  Currently supported: `branch`, `artifact`, `transform`, `collection`, and `instance`. For example:
303
315
 
304
316
  ```
305
317
  lamin delete https://lamin.ai/account/instance/artifact/e2G7k9EVul4JbfsEYAy5
318
+ lamin delete https://lamin.ai/account/instance/artifact/e2G7k9EVul4JbfsEYAy5 --permanent
306
319
  lamin delete branch --name my_branch
307
320
  lamin delete instance --slug account/name
308
321
  ```
309
322
  """
310
323
  from lamin_cli._delete import delete as delete_
311
324
 
312
- return delete_(entity=entity, name=name, uid=uid, slug=slug, force=force)
325
+ return delete_(entity=entity, name=name, uid=uid, slug=slug, permanent=permanent, force=force)
313
326
 
314
327
 
315
328
  @main.command()
316
- @click.argument("entity", type=str)
329
+ @click.argument("entity", type=str, required=False)
317
330
  @click.option("--uid", help="The uid for the entity.")
318
331
  @click.option("--key", help="The key for the entity.")
319
332
  @click.option(
320
333
  "--with-env", is_flag=True, help="Also return the environment for a tranform."
321
334
  )
322
- def load(entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False):
335
+ def load(entity: str | None = None, uid: str | None = None, key: str | None = None, with_env: bool = False):
323
336
  """Load a file or folder into the cache or working directory.
324
337
 
325
- Pass a URL, `artifact`, or `transform`. For example:
338
+ Pass a URL or `--key`. For example:
326
339
 
327
340
  ```
328
341
  lamin load https://lamin.ai/account/instance/artifact/e2G7k9EVul4JbfsE
329
- lamin load artifact --key mydatasets/mytable.parquet
342
+ lamin load --key mydatasets/mytable.parquet
343
+ lamin load --key analysis.ipynb
344
+ lamin load --key myanalyses/analysis.ipynb --with-env
345
+ ```
346
+
347
+ You can also pass a uid and the entity type:
348
+
349
+ ```
330
350
  lamin load artifact --uid e2G7k9EVul4JbfsE
331
- lamin load transform --key analysis.ipynb
332
351
  lamin load transform --uid Vul4JbfsEYAy5
333
- lamin load transform --uid Vul4JbfsEYAy5 --with-env
334
352
  ```
335
353
  """
336
- is_slug = entity.count("/") == 1
337
- if is_slug:
338
- from lamindb_setup._connect_instance import _connect_cli
339
- # for backward compat and convenience, connect to the instance
340
- return _connect_cli(entity)
341
- else:
342
- from lamin_cli._load import load as load_
343
-
344
- return load_(entity, uid=uid, key=key, with_env=with_env)
354
+ from lamin_cli._load import load as load_
355
+ if entity is not None:
356
+ is_slug = entity.count("/") == 1
357
+ if is_slug:
358
+ from lamindb_setup._connect_instance import _connect_cli
359
+ # for backward compat
360
+ return _connect_cli(entity)
361
+ return load_(entity, uid=uid, key=key, with_env=with_env)
345
362
 
346
363
 
347
364
  def _describe(entity: str = "artifact", uid: str | None = None, key: str | None = None):
@@ -423,6 +440,41 @@ def save(path: str, key: str, description: str, stem_uid: str, project: str, spa
423
440
  if save_(path=path, key=key, description=description, stem_uid=stem_uid, project=project, space=space, branch=branch, registry=registry) is not None:
424
441
  sys.exit(1)
425
442
 
443
+ @main.command()
444
+ def track():
445
+ """Start tracking a run of a shell script.
446
+
447
+ This command works like {func}`~lamindb.track()` in a Python session. Here is an example script:
448
+
449
+ ```
450
+ # my_script.sh
451
+ set -e # exit on error
452
+ lamin track # initiate a tracked shell script run
453
+ lamin load --key raw/file1.txt
454
+ # do something
455
+ lamin save processed_file1.txt --key processed/file1.txt
456
+ lamin finish # mark the shell script run as finished
457
+ ```
458
+
459
+ If you run that script, it will track the run of the script, and save the input and output artifacts:
460
+
461
+ ```
462
+ sh my_script.sh
463
+ ```
464
+ """
465
+ from lamin_cli._context import track as track_
466
+ return track_()
467
+
468
+
469
+ @main.command()
470
+ def finish():
471
+ """Finish the current tracked run of a shell script.
472
+
473
+ This command works like {func}`~lamindb.finish()` in a Python session.
474
+ """
475
+ from lamin_cli._context import finish as finish_
476
+ return finish_()
477
+
426
478
 
427
479
  @main.command()
428
480
  @click.argument("entity", type=str, default=None, required=False)
@@ -539,6 +591,7 @@ def run(filepath: str, project: str, image_url: str, packages: str, cpu: int, gp
539
591
  main.add_command(settings)
540
592
  main.add_command(cache)
541
593
  main.add_command(migrate)
594
+ main.add_command(io)
542
595
 
543
596
  # https://stackoverflow.com/questions/57810659/automatically-generate-all-help-documentation-for-click-commands
544
597
  # https://claude.ai/chat/73c28487-bec3-4073-8110-50d1a2dd6b84
lamin_cli/_context.py ADDED
@@ -0,0 +1,76 @@
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ import click
6
+ from lamin_utils import logger
7
+ from lamindb_setup.core._settings_store import settings_dir
8
+
9
+
10
+ def get_current_run_file() -> Path:
11
+ """Get the path to the file storing the current run UID."""
12
+ return settings_dir / "current_shell_run.txt"
13
+
14
+
15
+ def is_interactive_shell() -> bool:
16
+ """Check if running in an interactive terminal."""
17
+ return sys.stdin.isatty() and sys.stdout.isatty() and os.isatty(0)
18
+
19
+
20
+ def get_script_filename() -> Path:
21
+ """Try to get the filename of the calling shell script."""
22
+ import psutil
23
+
24
+ parent = psutil.Process(os.getppid())
25
+ cmdline = parent.cmdline()
26
+
27
+ # For shells like bash, sh, zsh
28
+ if parent.name() in ["bash", "sh", "zsh", "dash"]:
29
+ # cmdline is typically: ['/bin/bash', 'script.sh', ...]
30
+ if len(cmdline) > 1 and not cmdline[1].startswith("-"):
31
+ return Path(cmdline[1])
32
+ raise click.ClickException(
33
+ "Cannot determine script filename. Please run in an interactive shell."
34
+ )
35
+
36
+
37
+ def track():
38
+ import lamindb as ln
39
+
40
+ if not ln.setup.settings._instance_exists:
41
+ raise click.ClickException(
42
+ "Not connected to an instance. Please run: lamin connect account/name"
43
+ )
44
+ path = get_script_filename()
45
+ source_code = path.read_text()
46
+ transform = ln.Transform(
47
+ key=path.name, source_code=source_code, type="script"
48
+ ).save()
49
+ run = ln.Run(transform=transform).save()
50
+ current_run_file = get_current_run_file()
51
+ current_run_file.parent.mkdir(parents=True, exist_ok=True)
52
+ current_run_file.write_text(run.uid)
53
+ logger.important(f"started tracking shell run: {run.uid}")
54
+
55
+
56
+ def finish():
57
+ from datetime import datetime, timezone
58
+
59
+ import lamindb as ln
60
+
61
+ if not ln.setup.settings._instance_exists:
62
+ raise click.ClickException(
63
+ "Not connected to an instance. Please run: lamin connect account/name"
64
+ )
65
+
66
+ current_run_file = get_current_run_file()
67
+ if not current_run_file.exists():
68
+ raise click.ClickException(
69
+ "No active run to finish. Please run `lamin track` first."
70
+ )
71
+ run = ln.Run.get(uid=current_run_file.read_text().strip())
72
+ run._status_code = 0
73
+ run.finished_at = datetime.now(timezone.utc)
74
+ run.save()
75
+ current_run_file.unlink()
76
+ logger.important(f"finished tracking shell run: {run.uid}")
lamin_cli/_delete.py CHANGED
@@ -9,6 +9,7 @@ def delete(
9
9
  name: str | None = None,
10
10
  uid: str | None = None,
11
11
  slug: str | None = None,
12
+ permanent: bool | None = None,
12
13
  force: bool = False,
13
14
  ):
14
15
  # TODO: refactor to abstract getting and deleting across entities
@@ -21,22 +22,22 @@ def delete(
21
22
  assert name is not None, "You have to pass a name for deleting a branch."
22
23
  from lamindb import Branch
23
24
 
24
- Branch.get(name=name).delete()
25
+ Branch.get(name=name).delete(permanent=permanent)
25
26
  elif entity == "artifact":
26
27
  assert uid is not None, "You have to pass a uid for deleting an artifact."
27
28
  from lamindb import Artifact
28
29
 
29
- Artifact.get(uid).delete()
30
+ Artifact.get(uid).delete(permanent=permanent)
30
31
  elif entity == "transform":
31
32
  assert uid is not None, "You have to pass a uid for deleting an transform."
32
33
  from lamindb import Transform
33
34
 
34
- Transform.get(uid).delete()
35
+ Transform.get(uid).delete(permanent=permanent)
35
36
  elif entity == "collection":
36
37
  assert uid is not None, "You have to pass a uid for deleting an collection."
37
38
  from lamindb import Collection
38
39
 
39
- Collection.get(uid).delete()
40
+ Collection.get(uid).delete(permanent=permanent)
40
41
  elif entity == "instance":
41
42
  return delete_instance(slug, force=force)
42
43
  else: # backwards compatibility
lamin_cli/_io.py ADDED
@@ -0,0 +1,144 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import subprocess
6
+ import sys
7
+ import tempfile
8
+ from pathlib import Path
9
+
10
+ import lamindb_setup as ln_setup
11
+
12
+ from lamin_cli.clone._clone_verification import (
13
+ _count_instance_records,
14
+ )
15
+
16
+ if os.environ.get("NO_RICH"):
17
+ import click as click
18
+ else:
19
+ import rich_click as click
20
+
21
+
22
+ @click.group()
23
+ def io():
24
+ """Import and export instances."""
25
+
26
+
27
+ # fmt: off
28
+ @io.command("snapshot")
29
+ @click.option("--upload/--no-upload", is_flag=True, help="Whether to upload the snapshot.", default=True)
30
+ @click.option("--track/--no-track", is_flag=True, help="Whether to track snapshot generation.", default=True)
31
+ # fmt: on
32
+ def snapshot(upload: bool, track: bool) -> None:
33
+ """Create a SQLite snapshot of the connected instance."""
34
+ if not ln_setup.settings._instance_exists:
35
+ raise click.ClickException(
36
+ "Not connected to an instance. Please run: lamin connect account/name"
37
+ )
38
+
39
+ instance_owner = ln_setup.settings.instance.owner
40
+ instance_name = ln_setup.settings.instance.name
41
+
42
+ ln_setup.connect(f"{instance_owner}/{instance_name}", use_root_db_user=True)
43
+
44
+ import lamindb as ln
45
+
46
+ original_counts = _count_instance_records()
47
+
48
+ modules_without_lamindb = ln_setup.settings.instance.modules
49
+ modules_complete = modules_without_lamindb.copy()
50
+ modules_complete.add("lamindb")
51
+
52
+
53
+ with tempfile.TemporaryDirectory() as export_dir:
54
+ if track:
55
+ ln.track()
56
+ ln_setup.io.export_db(module_names=modules_complete, output_dir=export_dir)
57
+ if track:
58
+ ln.finish()
59
+
60
+ script_path = (
61
+ Path(__file__).parent / "clone" / "create_sqlite_clone_and_import_db.py"
62
+ )
63
+ result = subprocess.run(
64
+ [
65
+ sys.executable,
66
+ str(script_path),
67
+ "--instance-name",
68
+ instance_name,
69
+ "--export-dir",
70
+ export_dir,
71
+ "--modules",
72
+ ",".join(modules_without_lamindb),
73
+ "--original-counts",
74
+ json.dumps(original_counts),
75
+ ],
76
+ check=False,
77
+ stderr=subprocess.PIPE,
78
+ text=True,
79
+ cwd=Path.cwd(),
80
+ )
81
+ if result.returncode != 0:
82
+ try:
83
+ mismatches = json.loads(result.stderr.strip())
84
+ error_msg = "Record count mismatch detected:\n" + "\n".join(
85
+ [f" {table}: original={orig}, clone={clone}"
86
+ for table, (orig, clone) in mismatches.items()]
87
+ )
88
+ raise click.ClickException(error_msg)
89
+ except (json.JSONDecodeError, AttributeError, ValueError, TypeError):
90
+ raise click.ClickException(f"Clone verification failed:\n{result.stderr}") from None
91
+
92
+
93
+ ln_setup.connect(f"{instance_owner}/{instance_name}", use_root_db_user=True)
94
+ if upload:
95
+ ln_setup.core._clone.upload_sqlite_clone(
96
+ local_sqlite_path=f"{instance_name}-clone/.lamindb/lamin.db",
97
+ compress=True,
98
+ )
99
+
100
+ ln_setup.disconnect()
101
+
102
+
103
+ # fmt: off
104
+ @io.command("exportdb")
105
+ @click.option("--modules", type=str, default=None, help="Comma-separated list of modules to export (e.g., 'lamindb,bionty').",)
106
+ @click.option("--output-dir", type=str, help="Output directory for exported parquet files.")
107
+ @click.option("--max-workers", type=int, default=8, help="Number of parallel workers.")
108
+ @click.option("--chunk-size", type=int, default=500_000, help="Number of rows per chunk for large tables.")
109
+ # fmt: on
110
+ def exportdb(modules: str | None, output_dir: str, max_workers: int, chunk_size: int):
111
+ """Export registry tables to parquet files."""
112
+ if not ln_setup.settings._instance_exists:
113
+ raise click.ClickException(
114
+ "Not connected to an instance. Please run: lamin connect account/name"
115
+ )
116
+
117
+ module_list = modules.split(",") if modules else None
118
+ ln_setup.io.export_db(
119
+ module_names=module_list,
120
+ output_dir=output_dir,
121
+ max_workers=max_workers,
122
+ chunk_size=chunk_size,
123
+ )
124
+
125
+
126
+ # fmt: off
127
+ @io.command("importdb")
128
+ @click.option("--modules", type=str, default=None, help="Comma-separated list of modules to import (e.g., 'lamindb,bionty').")
129
+ @click.option("--input-dir", type=str, help="Input directory containing exported parquet files.")
130
+ @click.option("--if-exists", type=click.Choice(["fail", "replace", "append"]), default="replace", help="How to handle existing data.")
131
+ # fmt: on
132
+ def importdb(modules: str | None, input_dir: str, if_exists: str):
133
+ """Import registry tables from parquet files."""
134
+ if not ln_setup.settings._instance_exists:
135
+ raise click.ClickException(
136
+ "Not connected to an instance. Please run: lamin connect account/name"
137
+ )
138
+
139
+ module_list = modules.split(",") if modules else None
140
+ ln_setup.io.import_db(
141
+ module_names=module_list,
142
+ input_dir=input_dir,
143
+ if_exists=if_exists,
144
+ )
lamin_cli/_load.py CHANGED
@@ -6,15 +6,36 @@ from pathlib import Path
6
6
 
7
7
  from lamin_utils import logger
8
8
 
9
- from ._save import parse_title_r_notebook
9
+ from ._context import get_current_run_file
10
+ from ._save import infer_registry_from_path, parse_title_r_notebook
10
11
  from .urls import decompose_url
11
12
 
12
13
 
13
14
  def load(
14
- entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False
15
+ entity: str | None = None,
16
+ uid: str | None = None,
17
+ key: str | None = None,
18
+ with_env: bool = False,
15
19
  ):
20
+ """Load artifact, collection, or transform from LaminDB.
21
+
22
+ Args:
23
+ entity: URL containing 'lamin', or 'artifact', 'collection', or 'transform'
24
+ uid: Unique identifier (prefix matching supported)
25
+ key: Key identifier
26
+ with_env: If True, also load environment requirements file for transforms
27
+
28
+ Returns:
29
+ Path to loaded transform, or None for artifacts/collections
30
+ """
16
31
  import lamindb_setup as ln_setup
17
32
 
33
+ if entity is None:
34
+ if key is None:
35
+ raise SystemExit("Either entity or key has to be provided.")
36
+ else:
37
+ entity = infer_registry_from_path(key)
38
+
18
39
  if entity.startswith("https://") and "lamin" in entity:
19
40
  url = entity
20
41
  instance, entity, uid = decompose_url(url)
@@ -28,6 +49,10 @@ def load(
28
49
  ln_setup.connect(instance)
29
50
  import lamindb as ln
30
51
 
52
+ current_run = None
53
+ if get_current_run_file().exists():
54
+ current_run = ln.Run.get(uid=get_current_run_file().read_text().strip())
55
+
31
56
  def script_to_notebook(
32
57
  transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
33
58
  ) -> None:
@@ -102,27 +127,25 @@ def load(
102
127
  transforms = transforms.order_by("-created_at")
103
128
  transform = transforms.first()
104
129
 
105
- target_relpath = Path(transform.key)
106
- if len(target_relpath.parents) > 1:
107
- logger.important(
108
- "preserve the folder structure for versioning:"
109
- f" {target_relpath.parent}/"
110
- )
111
- target_relpath.parent.mkdir(parents=True, exist_ok=True)
112
- if target_relpath.exists():
113
- response = input(f"! {target_relpath} exists: replace? (y/n)")
130
+ target_path = Path(transform.key)
131
+ if ln_setup.settings.dev_dir is not None:
132
+ target_path = ln_setup.settings.dev_dir / target_path
133
+ if len(target_path.parents) > 1:
134
+ target_path.parent.mkdir(parents=True, exist_ok=True)
135
+ if target_path.exists():
136
+ response = input(f"! {target_path} exists: replace? (y/n)")
114
137
  if response != "y":
115
138
  raise SystemExit("Aborted.")
116
139
 
117
140
  if transform.source_code is not None:
118
- if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
119
- script_to_notebook(transform, target_relpath, bump_revision=True)
141
+ if target_path.suffix in (".ipynb", ".Rmd", ".qmd"):
142
+ script_to_notebook(transform, target_path, bump_revision=True)
120
143
  else:
121
- target_relpath.write_text(transform.source_code)
144
+ target_path.write_text(transform.source_code)
122
145
  else:
123
146
  raise SystemExit("No source code available for this transform.")
124
147
 
125
- logger.important(f"{transform.type} is here: {target_relpath}")
148
+ logger.important(f"{transform.type} is here: {target_path}")
126
149
 
127
150
  if with_env:
128
151
  ln.settings.track_run_inputs = False
@@ -132,8 +155,7 @@ def load(
132
155
  ):
133
156
  filepath_env_cache = transform.latest_run.environment.cache()
134
157
  target_env_filename = (
135
- target_relpath.parent
136
- / f"{target_relpath.stem}__requirements.txt"
158
+ target_path.parent / f"{target_path.stem}__requirements.txt"
137
159
  )
138
160
  shutil.move(filepath_env_cache, target_env_filename)
139
161
  logger.important(f"environment is here: {target_env_filename}")
@@ -142,14 +164,13 @@ def load(
142
164
  "latest transform run with environment doesn't exist"
143
165
  )
144
166
 
145
- return target_relpath
167
+ return target_path
146
168
  case "artifact" | "collection":
147
169
  ln.settings.track_run_inputs = False
148
170
 
149
171
  EntityClass = ln.Artifact if entity == "artifact" else ln.Collection
150
172
 
151
- # we don't use .get here because DoesNotExist is hard to catch
152
- # due to private django API
173
+ # we don't use .get here because DoesNotExist is hard to catch due to private django API
153
174
  # we use `.objects` here because we don't want to exclude kind = __lamindb_run__ artifacts
154
175
  if query_by_uid:
155
176
  entities = EntityClass.objects.filter(uid__startswith=uid)
@@ -166,7 +187,7 @@ def load(
166
187
  entities = entities.order_by("-created_at")
167
188
 
168
189
  entity_obj = entities.first()
169
- cache_path = entity_obj.cache()
190
+ cache_path = entity_obj.cache(is_run_input=current_run)
170
191
 
171
192
  # collection gives us a list of paths
172
193
  if isinstance(cache_path, list):
lamin_cli/_migration.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
- from typing import Optional
5
4
 
6
5
  if os.environ.get("NO_RICH"):
7
6
  import click as click
lamin_cli/_save.py CHANGED
@@ -2,25 +2,30 @@ from __future__ import annotations
2
2
 
3
3
  import os
4
4
  import re
5
- import sys
6
5
  from pathlib import Path
7
- from typing import TYPE_CHECKING
8
6
 
9
7
  import click
8
+ import lamindb_setup as ln_setup
10
9
  from lamin_utils import logger
11
10
  from lamindb_setup.core.hashing import hash_file
12
11
 
12
+ from lamin_cli._context import get_current_run_file
13
+
13
14
 
14
15
  def infer_registry_from_path(path: Path | str) -> str:
15
16
  suffixes_transform = {
16
17
  "py": {".py", ".ipynb"},
17
18
  "R": {".R", ".qmd", ".Rmd"},
19
+ "sh": {".sh"},
18
20
  }
19
21
  if isinstance(path, str):
20
22
  path = Path(path)
21
23
  registry = (
22
24
  "transform"
23
- if path.suffix in suffixes_transform["py"].union(suffixes_transform["R"])
25
+ if path.suffix
26
+ in suffixes_transform["py"]
27
+ .union(suffixes_transform["R"])
28
+ .union(suffixes_transform["sh"])
24
29
  else "artifact"
25
30
  )
26
31
  return registry
@@ -43,9 +48,11 @@ def parse_uid_from_code(content: str, suffix: str) -> str | None:
43
48
  r'track\(\s*(?:transform\s*=\s*)?([\'"])([a-zA-Z0-9]{12,16})\1'
44
49
  )
45
50
  uid_pattern = None
51
+ elif suffix == ".sh":
52
+ return None
46
53
  else:
47
54
  raise SystemExit(
48
- "Only .py, .ipynb, .R, .qmd, .Rmd files are supported for saving"
55
+ "Only .py, .ipynb, .R, .qmd, .Rmd, .sh files are supported for saving"
49
56
  " transforms."
50
57
  )
51
58
 
@@ -83,18 +90,23 @@ def save(
83
90
  ) -> str | None:
84
91
  import lamindb as ln
85
92
  from lamindb._finish import save_context_core
93
+ from lamindb_setup.core._settings_store import settings_dir
86
94
  from lamindb_setup.core.upath import LocalPathClasses, UPath, create_path
87
95
 
96
+ current_run = None
97
+ if get_current_run_file().exists():
98
+ current_run = ln.Run.get(uid=get_current_run_file().read_text().strip())
99
+
88
100
  # this allows to have the correct treatment of credentials in case of cloud paths
89
- path = create_path(path)
101
+ ppath = create_path(path)
90
102
  # isinstance is needed to cast the type of path to UPath
91
103
  # to avoid mypy erors
92
- assert isinstance(path, UPath)
93
- if not path.exists():
94
- raise click.BadParameter(f"Path {path} does not exist", param_hint="path")
104
+ assert isinstance(ppath, UPath)
105
+ if not ppath.exists():
106
+ raise click.BadParameter(f"Path {ppath} does not exist", param_hint="path")
95
107
 
96
108
  if registry is None:
97
- registry = infer_registry_from_path(path)
109
+ registry = infer_registry_from_path(ppath)
98
110
 
99
111
  if project is not None:
100
112
  project_record = ln.Project.filter(
@@ -121,7 +133,7 @@ def save(
121
133
  f"Branch '{branch}' not found, either create it with `ln.Branch(name='...').save()` or fix typos."
122
134
  )
123
135
 
124
- is_cloud_path = not isinstance(path, LocalPathClasses)
136
+ is_cloud_path = not isinstance(ppath, LocalPathClasses)
125
137
 
126
138
  if registry == "artifact":
127
139
  ln.settings.creation.artifact_silence_missing_run_warning = True
@@ -144,12 +156,13 @@ def save(
144
156
  return "missing-key-or-description"
145
157
 
146
158
  artifact = ln.Artifact(
147
- path,
159
+ ppath,
148
160
  key=key,
149
161
  description=description,
150
162
  revises=revises,
151
163
  branch=branch_record,
152
164
  space=space_record,
165
+ run=current_run,
153
166
  ).save()
154
167
  logger.important(f"saved: {artifact}")
155
168
  logger.important(f"storage path: {artifact.path}")
@@ -167,34 +180,40 @@ def save(
167
180
  if registry == "transform":
168
181
  if key is not None:
169
182
  logger.warning(
170
- "key is ignored for transforms, the transform key is determined by the filename"
183
+ "key is ignored for transforms, the transform key is determined by the filename and the development directory (dev-dir)"
171
184
  )
172
185
  if is_cloud_path:
173
186
  logger.error("Can not register a transform from a cloud path")
174
187
  return "transform-with-cloud-path"
175
188
 
176
- if path.suffix in {".qmd", ".Rmd"}:
177
- html_file_exists = path.with_suffix(".html").exists()
178
- nb_html_file_exists = path.with_suffix(".nb.html").exists()
189
+ if ppath.suffix in {".qmd", ".Rmd"}:
190
+ html_file_exists = ppath.with_suffix(".html").exists()
191
+ nb_html_file_exists = ppath.with_suffix(".nb.html").exists()
179
192
 
180
193
  if not html_file_exists and not nb_html_file_exists:
181
194
  logger.error(
182
- f"Please export your {path.suffix} file as an html file here"
183
- f" {path.with_suffix('.html')}"
195
+ f"Please export your {ppath.suffix} file as an html file here"
196
+ f" {ppath.with_suffix('.html')}"
184
197
  )
185
198
  return "export-qmd-Rmd-as-html"
186
199
  elif html_file_exists and nb_html_file_exists:
187
200
  logger.error(
188
- f"Please delete one of\n - {path.with_suffix('.html')}\n -"
189
- f" {path.with_suffix('.nb.html')}"
201
+ f"Please delete one of\n - {ppath.with_suffix('.html')}\n -"
202
+ f" {ppath.with_suffix('.nb.html')}"
190
203
  )
191
204
  return "delete-html-or-nb-html"
192
205
 
193
- content = path.read_text()
194
- uid = parse_uid_from_code(content, path.suffix)
206
+ content = ppath.read_text()
207
+ uid = parse_uid_from_code(content, ppath.suffix)
208
+
209
+ ppath = ppath.resolve().expanduser()
210
+ if ln_setup.settings.dev_dir is not None:
211
+ key = ppath.relative_to(ln_setup.settings.dev_dir).as_posix()
212
+ else:
213
+ key = ppath.name
195
214
 
196
215
  if uid is not None:
197
- logger.important(f"mapped '{path.name}' on uid '{uid}'")
216
+ logger.important(f"mapped '{ppath.name}' on uid '{uid}'")
198
217
  if len(uid) == 16:
199
218
  # is full uid
200
219
  transform = ln.Transform.filter(uid=uid).one_or_none()
@@ -214,12 +233,16 @@ def save(
214
233
  if transform is None:
215
234
  uid = f"{stem_uid}0000"
216
235
  else:
217
- # TODO: account for folders as we do in ln.track()
218
- transform_hash, _ = hash_file(path)
219
- transform = ln.Transform.filter(key=path.name, is_latest=True).one_or_none()
236
+ _, transform_hash, _ = hash_file(ppath)
237
+ transform = ln.Transform.filter(hash=transform_hash).first()
220
238
  if transform is not None and transform.hash is not None:
221
239
  if transform.hash == transform_hash:
222
240
  if transform.type != "notebook":
241
+ logger.important(f"transform already saved: {transform}")
242
+ if transform.key != key:
243
+ transform.key = key
244
+ logger.important(f"updated key to '{key}'")
245
+ transform.save()
223
246
  return None
224
247
  if os.getenv("LAMIN_TESTING") == "true":
225
248
  response = "y"
@@ -245,21 +268,21 @@ def save(
245
268
  if revises is None:
246
269
  raise ln.errors.InvalidArgument("The stem uid is not found.")
247
270
  if transform is None:
248
- if path.suffix == ".ipynb":
271
+ if ppath.suffix == ".ipynb":
249
272
  from nbproject.dev import read_notebook
250
273
  from nbproject.dev._meta_live import get_title
251
274
 
252
- nb = read_notebook(path)
275
+ nb = read_notebook(ppath)
253
276
  description = get_title(nb)
254
- elif path.suffix in {".qmd", ".Rmd"}:
277
+ elif ppath.suffix in {".qmd", ".Rmd"}:
255
278
  description = parse_title_r_notebook(content)
256
279
  else:
257
280
  description = None
258
281
  transform = ln.Transform(
259
282
  uid=uid,
260
283
  description=description,
261
- key=path.name,
262
- type="script" if path.suffix in {".R", ".py"} else "notebook",
284
+ key=key,
285
+ type="script" if ppath.suffix in {".R", ".py", ".sh"} else "notebook",
263
286
  revises=revises,
264
287
  )
265
288
  if space is not None:
@@ -267,7 +290,9 @@ def save(
267
290
  if branch is not None:
268
291
  transform.branch = branch_record
269
292
  transform.save()
270
- logger.important(f"created Transform('{transform.uid}')")
293
+ logger.important(
294
+ f"created Transform('{transform.uid}', key='{transform.key}')"
295
+ )
271
296
  if project is not None:
272
297
  transform.projects.add(project_record)
273
298
  logger.important(f"labeled with project: {project_record.name}")
@@ -292,7 +317,7 @@ def save(
292
317
  return_code = save_context_core(
293
318
  run=run,
294
319
  transform=transform,
295
- filepath=path,
320
+ filepath=ppath,
296
321
  from_cli=True,
297
322
  )
298
323
  return return_code
File without changes
@@ -0,0 +1,56 @@
1
+ from concurrent.futures import ThreadPoolExecutor
2
+
3
+ from django.db import OperationalError, ProgrammingError
4
+ from lamin_utils import logger
5
+
6
+
7
+ def _count_instance_records() -> dict[str, int]:
8
+ """Count all records across SQLRecord registries in parallel.
9
+
10
+ Returns:
11
+ Dictionary mapping table names (format: "app_label.ModelName") to their record counts.
12
+
13
+ Example:
14
+ >>> counts = _count_all_records()
15
+ >>> counts
16
+ {'lamindb.Artifact': 1523, 'lamindb.Collection': 42, 'bionty.Gene': 60000}
17
+ """
18
+ # Import here to ensure that models are loaded
19
+ from django.apps import apps
20
+ from lamindb.models import SQLRecord
21
+
22
+ def _count_model(model):
23
+ """Count records for a single model."""
24
+ table_name = f"{model._meta.app_label}.{model.__name__}"
25
+ try:
26
+ return (table_name, model.objects.count())
27
+ except (OperationalError, ProgrammingError) as e:
28
+ logger.warning(f"Could not count {table_name}: {e}")
29
+ return (table_name, 0)
30
+
31
+ models = [m for m in apps.get_models() if issubclass(m, SQLRecord)]
32
+
33
+ with ThreadPoolExecutor(max_workers=10) as executor:
34
+ results = executor.map(_count_model, models)
35
+
36
+ return dict(results)
37
+
38
+
39
+ def _compare_record_counts(
40
+ original: dict[str, int], clone: dict[str, int]
41
+ ) -> dict[str, tuple[int, int]]:
42
+ """Compare record counts and return mismatches."""
43
+ mismatches = {}
44
+
45
+ all_tables = set(original.keys()) | set(clone.keys())
46
+
47
+ for table in all_tables:
48
+ orig_count = original.get(table, 0)
49
+ clone_count = clone.get(table, 0)
50
+
51
+ # we allow a difference of 1 because of tracking
52
+ # new records during the cloning process
53
+ if abs(clone_count - orig_count) > 1:
54
+ mismatches[table] = (orig_count, clone_count)
55
+
56
+ return mismatches
@@ -0,0 +1,51 @@
1
+ import argparse
2
+ import json
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import lamindb_setup as ln_setup
7
+
8
+ if __name__ == "__main__":
9
+ sys.path.insert(0, str(Path(__file__).parent.parent))
10
+
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--instance-name", required=True)
13
+ parser.add_argument("--export-dir", required=True)
14
+ parser.add_argument("--modules", required=True)
15
+ parser.add_argument("--original-counts", required=True)
16
+ args = parser.parse_args()
17
+
18
+ instance_name = args.instance_name
19
+ export_dir = args.export_dir
20
+ modules_without_lamindb = {m for m in args.modules.split(",") if m}
21
+ modules_complete = modules_without_lamindb.copy()
22
+ modules_complete.add("lamindb")
23
+
24
+ ln_setup.init(
25
+ storage=f"{instance_name}-clone", modules=f"{','.join(modules_without_lamindb)}"
26
+ )
27
+
28
+ ln_setup.io.import_db(
29
+ module_names=list(modules_complete), input_dir=export_dir, if_exists="replace"
30
+ )
31
+
32
+ from django.db import connection
33
+
34
+ with connection.cursor() as cursor:
35
+ cursor.execute("PRAGMA wal_checkpoint(FULL)")
36
+
37
+ from lamin_cli.clone._clone_verification import (
38
+ _compare_record_counts,
39
+ _count_instance_records,
40
+ )
41
+
42
+ clone_counts = _count_instance_records()
43
+ original_counts = json.loads(args.original_counts)
44
+ mismatches = _compare_record_counts(original_counts, clone_counts)
45
+ if mismatches:
46
+ print(json.dumps(mismatches), file=sys.stderr)
47
+ sys.exit(1)
48
+
49
+ from django.db import connections
50
+
51
+ connections.close_all()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamin_cli
3
- Version: 1.9.0
3
+ Version: 1.11.0
4
4
  Summary: Lamin CLI.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Description-Content-Type: text/markdown
@@ -0,0 +1,22 @@
1
+ lamin_cli/__init__.py,sha256=bd6la8_wxlClB7o8h3FNAjQ6z8HPcZ4QqxvVp4Cvycg,605
2
+ lamin_cli/__main__.py,sha256=DEifHuxyyffXHVpBjn9-LVpVUsuSnRhicxTfsuZ0YUw,20791
3
+ lamin_cli/_annotate.py,sha256=ZD76__K-mQt7UpYqyM1I2lKCs-DraTmnkjsByIHmD-g,1839
4
+ lamin_cli/_cache.py,sha256=oplwE8AcS_9PYptQUZxff2qTIdNFS81clGPkJNWk098,800
5
+ lamin_cli/_context.py,sha256=JLhv98isasX_M_o6tIDkeBWcNH8ryRrl3nk2rmwsuD4,2392
6
+ lamin_cli/_delete.py,sha256=1wp8LQdiWHJznRrm4abxEhApB9derBWEm25ufrzjvIg,1531
7
+ lamin_cli/_io.py,sha256=DboDoecZN5OLCHe5vJzOLuwztW8hxbdRffWj1kmJ9WI,5036
8
+ lamin_cli/_load.py,sha256=v7PCB7sYqGs6jF186aN191lTV2Mr5jdayHKSmdgsw_k,8445
9
+ lamin_cli/_migration.py,sha256=w-TLYC2q6sqcfNtIUewqRW_69_xy_71X3eSV7Ud58jk,1239
10
+ lamin_cli/_save.py,sha256=8dM4blz6caKJtbqND-6xJXUMlGIvo2FwvSu1CDcBPig,12517
11
+ lamin_cli/_settings.py,sha256=nqVM8FO9kjL0SXYqP8x-Xfww6Qth4I86HKKeTirsgc4,2657
12
+ lamin_cli/urls.py,sha256=gc72s4SpaAQA8J50CtCIWlr49DWOSL_a6OM9lXfPouM,367
13
+ lamin_cli/clone/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ lamin_cli/clone/_clone_verification.py,sha256=8hRFZt_1Z0i2wMqKBkrioq01RJEjwHy9cYmw6LV4PXI,1835
15
+ lamin_cli/clone/create_sqlite_clone_and_import_db.py,sha256=yv2mLkHdRUao_IkZO_4Swu5w4Nieh1gMrBwPbyn4c1o,1544
16
+ lamin_cli/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ lamin_cli/compute/modal.py,sha256=QnR7GyyvWWWkLnou95HxS9xxSQfw1k-SiefM_qRVnU0,6010
18
+ lamin_cli-1.11.0.dist-info/entry_points.txt,sha256=Qms85i9cZPlu-U7RnVZhFsF7vJ9gaLZUFkCjcGcXTpg,49
19
+ lamin_cli-1.11.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
20
+ lamin_cli-1.11.0.dist-info/WHEEL,sha256=ssQ84EZ5gH1pCOujd3iW7HClo_O_aDaClUbX4B8bjKY,100
21
+ lamin_cli-1.11.0.dist-info/METADATA,sha256=4TK6hWuABuBxr1PYF_iBpm7KP8xNH7g4LjMWUkHgDKE,338
22
+ lamin_cli-1.11.0.dist-info/RECORD,,
@@ -1,17 +0,0 @@
1
- lamin_cli/__init__.py,sha256=DFl_WGoPWYCrKssL6X5UCL9Via9gGqBVfd0j8UzEsL0,563
2
- lamin_cli/__main__.py,sha256=zuzvpfWDvrLxvcLNwAyGQHIGXiFvAra4dnPe7Nygu6M,19225
3
- lamin_cli/_annotate.py,sha256=ZD76__K-mQt7UpYqyM1I2lKCs-DraTmnkjsByIHmD-g,1839
4
- lamin_cli/_cache.py,sha256=oplwE8AcS_9PYptQUZxff2qTIdNFS81clGPkJNWk098,800
5
- lamin_cli/_delete.py,sha256=dxItUgf7At247iYGLagmJ2Ccp5nAWgodPL7c7zfZQ_0,1420
6
- lamin_cli/_load.py,sha256=OgTGqZQtoJ0GYOhuJihz-izt0F4jM4U_nSX_vhPoukg,7698
7
- lamin_cli/_migration.py,sha256=XUl_L9_3pTTk5jJoBiqbzf0Bd2LdKKtHa1zPZ4Rla5c,1267
8
- lamin_cli/_save.py,sha256=EPYsRpl0vD_Q52ksqVyP56ZOvPXBjgTFPeJWyGokN8I,11555
9
- lamin_cli/_settings.py,sha256=nqVM8FO9kjL0SXYqP8x-Xfww6Qth4I86HKKeTirsgc4,2657
10
- lamin_cli/urls.py,sha256=gc72s4SpaAQA8J50CtCIWlr49DWOSL_a6OM9lXfPouM,367
11
- lamin_cli/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- lamin_cli/compute/modal.py,sha256=QnR7GyyvWWWkLnou95HxS9xxSQfw1k-SiefM_qRVnU0,6010
13
- lamin_cli-1.9.0.dist-info/entry_points.txt,sha256=Qms85i9cZPlu-U7RnVZhFsF7vJ9gaLZUFkCjcGcXTpg,49
14
- lamin_cli-1.9.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
15
- lamin_cli-1.9.0.dist-info/WHEEL,sha256=ssQ84EZ5gH1pCOujd3iW7HClo_O_aDaClUbX4B8bjKY,100
16
- lamin_cli-1.9.0.dist-info/METADATA,sha256=YhZjJlaC6E_BDVHstvkFya-5p2qEn6lZCrPUzYkN_8w,337
17
- lamin_cli-1.9.0.dist-info/RECORD,,