lamin_cli 1.0.7__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/.gitignore +2 -0
  2. lamin_cli-1.2.0/.pre-commit-config.yaml +40 -0
  3. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/PKG-INFO +1 -1
  4. lamin_cli-1.2.0/lamin_cli/__init__.py +3 -0
  5. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/__main__.py +53 -31
  6. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/_cache.py +1 -0
  7. lamin_cli-1.2.0/lamin_cli/_load.py +192 -0
  8. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/_migration.py +4 -3
  9. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/_save.py +12 -7
  10. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/lamin_cli/_settings.py +1 -0
  11. lamin_cli-1.2.0/pyproject.toml +114 -0
  12. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/merely-import-lamindb.py +1 -1
  13. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track-and-finish-sync-git.py +1 -1
  14. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track-with-params.py +1 -0
  15. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_cli.py +1 -0
  16. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_load.py +25 -7
  17. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_parse_uid_from_code.py +3 -3
  18. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_save_files.py +2 -1
  19. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_save_notebooks.py +6 -5
  20. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_save_r_code.py +3 -2
  21. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_save_scripts.py +29 -8
  22. lamin_cli-1.0.7/.pre-commit-config.yaml +0 -64
  23. lamin_cli-1.0.7/lamin_cli/__init__.py +0 -3
  24. lamin_cli-1.0.7/lamin_cli/_load.py +0 -164
  25. lamin_cli-1.0.7/pyproject.toml +0 -21
  26. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/.github/workflows/doc-changes.yml +0 -0
  27. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/LICENSE +0 -0
  28. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/README.md +0 -0
  29. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/conftest.py +0 -0
  30. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/notebooks/not-initialized.ipynb +0 -0
  31. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/notebooks/with-title-and-initialized-consecutive.ipynb +0 -0
  32. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/notebooks/with-title-and-initialized-non-consecutive.ipynb +0 -0
  33. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track-and-finish.py +0 -0
  34. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track.R +0 -0
  35. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/scripts/run-track.qmd +0 -0
  36. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_migrate.py +0 -0
  37. {lamin_cli-1.0.7 → lamin_cli-1.2.0}/tests/test_multi_process.py +1 -1
@@ -48,6 +48,8 @@ test-search2
48
48
  test-search3
49
49
  test-search4
50
50
  test-search5
51
+ test.ipynb
52
+
51
53
  # General
52
54
  .DS_Store
53
55
 
@@ -0,0 +1,40 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.5.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ exclude: |
8
+ (?x)(
9
+ .github/workflows/latest-changes.jinja2
10
+ )
11
+ - id: check-yaml
12
+ - id: check-added-large-files
13
+ - repo: https://github.com/astral-sh/ruff-pre-commit
14
+ rev: v0.9.2
15
+ hooks:
16
+ - id: ruff
17
+ args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
18
+ - id: ruff-format
19
+ - repo: https://github.com/rbubley/mirrors-prettier
20
+ rev: v3.5.1
21
+ hooks:
22
+ - id: prettier
23
+ - repo: https://github.com/kynan/nbstripout
24
+ rev: 0.3.9
25
+ hooks:
26
+ - id: nbstripout
27
+ exclude: |
28
+ (?x)(
29
+ docs/examples/|
30
+ docs/notes/|
31
+ tests
32
+ )
33
+ - repo: https://github.com/pre-commit/mirrors-mypy
34
+ rev: v1.14.1
35
+ hooks:
36
+ - id: mypy
37
+ exclude: |
38
+ (?x)(
39
+ tests/hub-local/conftest.py
40
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamin_cli
3
- Version: 1.0.7
3
+ Version: 1.2.0
4
4
  Summary: Lamin CLI.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Description-Content-Type: text/markdown
@@ -0,0 +1,3 @@
1
+ """Lamin CLI."""
2
+
3
+ __version__ = "1.2.0"
@@ -1,12 +1,16 @@
1
1
  from __future__ import annotations
2
+
3
+ import inspect
2
4
  import os
3
5
  import sys
6
+ import warnings
4
7
  from collections import OrderedDict
5
- import inspect
6
- from importlib.metadata import PackageNotFoundError, version
7
- from typing import Optional, Mapping
8
8
  from functools import wraps
9
- import warnings
9
+ from importlib.metadata import PackageNotFoundError, version
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from collections.abc import Mapping
10
14
 
11
15
  # https://github.com/ewels/rich-click/issues/19
12
16
  # Otherwise rich-click takes over the formatting.
@@ -18,11 +22,11 @@ if os.environ.get("NO_RICH"):
18
22
 
19
23
  def __init__(
20
24
  self,
21
- name: Optional[str] = None,
22
- commands: Optional[Mapping[str, click.Command]] = None,
25
+ name: str | None = None,
26
+ commands: Mapping[str, click.Command] | None = None,
23
27
  **kwargs,
24
28
  ):
25
- super(OrderedGroup, self).__init__(name, commands, **kwargs)
29
+ super().__init__(name, commands, **kwargs)
26
30
  self.commands = commands or OrderedDict()
27
31
 
28
32
  def list_commands(self, ctx: click.Context) -> Mapping[str, click.Command]:
@@ -77,12 +81,14 @@ else:
77
81
  return wrapper
78
82
 
79
83
 
80
- from click import Command, Context
81
84
  from lamindb_setup._silence_loggers import silence_loggers
82
85
 
83
- from lamin_cli._settings import settings
84
86
  from lamin_cli._cache import cache
85
87
  from lamin_cli._migration import migrate
88
+ from lamin_cli._settings import settings
89
+
90
+ if TYPE_CHECKING:
91
+ from click import Command, Context
86
92
 
87
93
  try:
88
94
  lamindb_version = version("lamindb")
@@ -100,7 +106,7 @@ def main():
100
106
  @main.command()
101
107
  @click.argument("user", type=str, default=None, required=False)
102
108
  @click.option("--key", type=str, default=None, help="The legacy API key.")
103
- def login(user: str, key: Optional[str]):
109
+ def login(user: str, key: str | None):
104
110
  """Log into LaminHub.
105
111
 
106
112
  `lamin login` prompts for your API key unless you set it via environment variable `LAMIN_API_KEY`.
@@ -142,24 +148,25 @@ def schema_to_modules_callback(ctx, param, value):
142
148
  "The --schema option is deprecated and will be removed in a future version."
143
149
  " Please use --modules instead.",
144
150
  DeprecationWarning,
151
+ stacklevel=2,
145
152
  )
146
153
  return value
147
154
 
148
155
 
149
156
  # fmt: off
150
157
  @main.command()
151
- @click.option("--storage", type=str, help="Local directory, s3://bucket_name, gs://bucket_name.") # noqa: E501
152
- @click.option("--db", type=str, default=None, help="Postgres database connection URL, do not pass for SQLite.") # noqa: E501
153
- @click.option("--modules", type=str, default=None, help="Comma-separated string of modules.") # noqa: E501
158
+ @click.option("--storage", type=str, help="Local directory, s3://bucket_name, gs://bucket_name.")
159
+ @click.option("--db", type=str, default=None, help="Postgres database connection URL, do not pass for SQLite.")
160
+ @click.option("--modules", type=str, default=None, help="Comma-separated string of schema modules.")
154
161
  @click.option("--name", type=str, default=None, help="The instance name.")
155
- @click.option("--schema", type=str, default=None, help="[DEPRECATED] Use --modules instead.", callback=schema_to_modules_callback) # noqa: E501
162
+ @click.option("--schema", type=str, default=None, help="[DEPRECATED] Use --modules instead.", callback=schema_to_modules_callback)
156
163
  # fmt: on
157
164
  def init(
158
165
  storage: str,
159
- db: Optional[str],
160
- modules: Optional[str],
161
- name: Optional[str],
162
- schema: Optional[str],
166
+ db: str | None,
167
+ modules: str | None,
168
+ name: str | None,
169
+ schema: str | None,
163
170
  ):
164
171
  """Init an instance."""
165
172
  from lamindb_setup._init_instance import init as init_
@@ -182,7 +189,8 @@ def connect(instance: str):
182
189
  {attr}`~lamindb.setup.core.SetupSettings.auto_connect` to `True` so that you
183
190
  auto-connect in a Python session upon importing `lamindb`.
184
191
  """
185
- from lamindb_setup import settings as settings_, connect as connect_
192
+ from lamindb_setup import connect as connect_
193
+ from lamindb_setup import settings as settings_
186
194
 
187
195
  settings_.auto_connect = True
188
196
  return connect_(instance, _reload_lamindb=False)
@@ -217,7 +225,7 @@ def info(schema: bool):
217
225
  # fmt: off
218
226
  @main.command()
219
227
  @click.argument("instance", type=str, default=None)
220
- @click.option("--force", is_flag=True, default=False, help="Do not ask for confirmation.") # noqa: E501
228
+ @click.option("--force", is_flag=True, default=False, help="Do not ask for confirmation.")
221
229
  # fmt: on
222
230
  def delete(instance: str, force: bool = False):
223
231
  """Delete an entity.
@@ -236,7 +244,7 @@ def delete(instance: str, force: bool = False):
236
244
  @click.option(
237
245
  "--with-env", is_flag=True, help="Also return the environment for a tranform."
238
246
  )
239
- def load(entity: str, uid: str = None, key: str = None, with_env: bool = False):
247
+ def load(entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False):
240
248
  """Load a file or folder.
241
249
 
242
250
  Pass a URL, `artifact`, or `transform`. For example:
@@ -252,7 +260,8 @@ def load(entity: str, uid: str = None, key: str = None, with_env: bool = False):
252
260
  """
253
261
  is_slug = entity.count("/") == 1
254
262
  if is_slug:
255
- from lamindb_setup import settings as settings_, connect
263
+ from lamindb_setup import connect
264
+ from lamindb_setup import settings as settings_
256
265
 
257
266
  # can decide whether we want to actually deprecate
258
267
  # click.echo(
@@ -270,18 +279,31 @@ def load(entity: str, uid: str = None, key: str = None, with_env: bool = False):
270
279
  @click.argument("entity", type=str)
271
280
  @click.option("--uid", help="The uid for the entity.")
272
281
  @click.option("--key", help="The key for the entity.")
273
- @click.option(
274
- "--with-env", is_flag=True, help="Also return the environment for a tranform."
275
- )
276
- def get(entity: str, uid: str = None, key: str = None, with_env: bool = False):
282
+ def get(entity: str, uid: str | None = None, key: str | None = None):
277
283
  """Query metadata about an entity.
278
284
 
279
- Currently only works for artifact & transform and behaves like `lamin load`.
285
+ Currently only works for artifact.
280
286
  """
281
- from lamin_cli._load import load as load_
287
+ import lamindb_setup as ln_setup
282
288
 
283
- click.echo(f"! to load a file or folder, please use: lamin load {entity}")
284
- return load_(entity, uid=uid, key=key, with_env=with_env)
289
+ from ._load import decompose_url
290
+
291
+ if entity.startswith("https://") and "lamin" in entity:
292
+ url = entity
293
+ instance, entity, uid = decompose_url(url)
294
+ elif entity not in {"artifact"}:
295
+ raise SystemExit("Entity has to be a laminhub URL or 'artifact'")
296
+ else:
297
+ instance = ln_setup.settings.instance.slug
298
+
299
+ ln_setup.connect(instance)
300
+ import lamindb as ln
301
+
302
+ if uid is not None:
303
+ artifact = ln.Artifact.get(uid)
304
+ else:
305
+ artifact = ln.Artifact.get(key=key)
306
+ artifact.describe()
285
307
 
286
308
 
287
309
  @main.command()
@@ -315,7 +337,7 @@ def _generate_help():
315
337
  out: dict[str, dict[str, str | None]] = {}
316
338
 
317
339
  def recursive_help(
318
- cmd: Command, parent: Optional[Context] = None, name: tuple[str, ...] = ()
340
+ cmd: Command, parent: Context | None = None, name: tuple[str, ...] = ()
319
341
  ):
320
342
  ctx = click.Context(cmd, info_name=cmd.name, parent=parent)
321
343
  assert cmd.name
@@ -1,4 +1,5 @@
1
1
  from __future__ import annotations
2
+
2
3
  import os
3
4
 
4
5
  if os.environ.get("NO_RICH"):
@@ -0,0 +1,192 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+ from lamin_utils import logger
8
+
9
+
10
+ def decompose_url(url: str) -> tuple[str, str, str]:
11
+ assert any(keyword in url for keyword in ["transform", "artifact", "collection"])
12
+ for entity in ["transform", "artifact", "collection"]:
13
+ if entity in url:
14
+ break
15
+ uid = url.split(f"{entity}/")[1]
16
+ instance_slug = "/".join(url.split("/")[3:5])
17
+ return instance_slug, entity, uid
18
+
19
+
20
+ def load(
21
+ entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False
22
+ ):
23
+ import lamindb_setup as ln_setup
24
+
25
+ if entity.startswith("https://") and "lamin" in entity:
26
+ url = entity
27
+ instance, entity, uid = decompose_url(url)
28
+ elif entity not in {"artifact", "transform", "collection"}:
29
+ raise SystemExit(
30
+ "Entity has to be a laminhub URL or 'artifact', 'collection', or 'transform'"
31
+ )
32
+ else:
33
+ instance = ln_setup.settings.instance.slug
34
+
35
+ ln_setup.connect(instance)
36
+ import lamindb as ln
37
+
38
+ def script_to_notebook(
39
+ transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
40
+ ) -> None:
41
+ import jupytext
42
+ from lamin_utils._base62 import increment_base62
43
+
44
+ if notebook_path.suffix == ".ipynb":
45
+ # below is backward compat
46
+ if "# # transform.name" in transform.source_code:
47
+ new_content = transform.source_code.replace(
48
+ "# # transform.name", f"# # {transform.description}"
49
+ )
50
+ elif transform.source_code.startswith("# %% [markdown]"):
51
+ source_code_split = transform.source_code.split("\n")
52
+ if source_code_split[1] == "#":
53
+ source_code_split[1] = f"# # {transform.description}"
54
+ new_content = "\n".join(source_code_split)
55
+ else:
56
+ new_content = transform.source_code
57
+ else: # R notebook
58
+ # Pattern to match title only within YAML header section
59
+ title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
60
+ title_match = re.search(
61
+ title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
62
+ )
63
+ new_content = transform.source_code
64
+ if title_match:
65
+ current_title = title_match.group(1)
66
+ if current_title != transform.description:
67
+ pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
68
+ replacement = f'\\1"{transform.description}"\\3'
69
+ new_content = re.sub(
70
+ pattern,
71
+ replacement,
72
+ new_content,
73
+ flags=re.DOTALL | re.MULTILINE,
74
+ )
75
+ logger.important(
76
+ f"updated title to match description: {current_title} →"
77
+ f" {transform.description}"
78
+ )
79
+ if bump_revision:
80
+ uid = transform.uid
81
+ if uid in new_content:
82
+ new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
83
+ new_content = new_content.replace(uid, new_uid)
84
+ logger.important(f"updated uid: {uid} → {new_uid}")
85
+ if notebook_path.suffix == ".ipynb":
86
+ notebook = jupytext.reads(new_content, fmt="py:percent")
87
+ jupytext.write(notebook, notebook_path)
88
+ else:
89
+ notebook_path.write_text(new_content)
90
+
91
+ query_by_uid = uid is not None
92
+
93
+ match entity:
94
+ case "transform":
95
+ if query_by_uid:
96
+ # we don't use .get here because DoesNotExist is hard to catch
97
+ # due to private django API
98
+ # here full uid is not expected anymore as before
99
+ # via ln.Transform.objects.get(uid=uid)
100
+ transforms = ln.Transform.objects.filter(uid__startswith=uid)
101
+ else:
102
+ # if below, we take is_latest=True as the criterion, we might get draft notebooks
103
+ # hence, we use source_code__isnull=False and order by created_at instead
104
+ transforms = ln.Transform.objects.filter(
105
+ key=key, source_code__isnull=False
106
+ )
107
+
108
+ if (n_transforms := len(transforms)) == 0:
109
+ err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
110
+ raise SystemExit(f"Transform with {err_msg} does not exist.")
111
+
112
+ if n_transforms > 1:
113
+ transforms = transforms.order_by("-created_at")
114
+ transform = transforms.first()
115
+
116
+ target_relpath = Path(transform.key)
117
+ if len(target_relpath.parents) > 1:
118
+ logger.important(
119
+ "preserve the folder structure for versioning:"
120
+ f" {target_relpath.parent}/"
121
+ )
122
+ target_relpath.parent.mkdir(parents=True, exist_ok=True)
123
+ if target_relpath.exists():
124
+ response = input(f"! {target_relpath} exists: replace? (y/n)")
125
+ if response != "y":
126
+ raise SystemExit("Aborted.")
127
+
128
+ if transform.source_code is not None:
129
+ if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
130
+ script_to_notebook(transform, target_relpath, bump_revision=True)
131
+ else:
132
+ target_relpath.write_text(transform.source_code)
133
+ else:
134
+ raise SystemExit("No source code available for this transform.")
135
+
136
+ logger.important(f"{transform.type} is here: {target_relpath}")
137
+
138
+ if with_env:
139
+ ln.settings.track_run_inputs = False
140
+ if (
141
+ transform.latest_run is not None
142
+ and transform.latest_run.environment is not None
143
+ ):
144
+ filepath_env_cache = transform.latest_run.environment.cache()
145
+ target_env_filename = (
146
+ target_relpath.parent
147
+ / f"{target_relpath.stem}__requirements.txt"
148
+ )
149
+ shutil.move(filepath_env_cache, target_env_filename)
150
+ logger.important(f"environment is here: {target_env_filename}")
151
+ else:
152
+ logger.warning(
153
+ "latest transform run with environment doesn't exist"
154
+ )
155
+
156
+ return target_relpath
157
+ case "artifact" | "collection":
158
+ ln.settings.track_run_inputs = False
159
+
160
+ EntityClass = ln.Artifact if entity == "artifact" else ln.Collection
161
+
162
+ # we don't use .get here because DoesNotExist is hard to catch
163
+ # due to private django API
164
+ if query_by_uid:
165
+ entities = EntityClass.filter(uid__startswith=uid)
166
+ else:
167
+ entities = EntityClass.filter(key=key)
168
+
169
+ if (n_entities := len(entities)) == 0:
170
+ err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
171
+ raise SystemExit(
172
+ f"{entity.capitalize()} with {err_msg} does not exist."
173
+ )
174
+
175
+ if n_entities > 1:
176
+ entities = entities.order_by("-created_at")
177
+
178
+ entity_obj = entities.first()
179
+ cache_path = entity_obj.cache()
180
+
181
+ # collection gives us a list of paths
182
+ if isinstance(cache_path, list):
183
+ logger.important(f"{entity} paths ({len(cache_path)} files):")
184
+ for i, path in enumerate(cache_path):
185
+ if i < 5 or i >= len(cache_path) - 5:
186
+ logger.important(f" [{i + 1}/{len(cache_path)}] {path}")
187
+ elif i == 5:
188
+ logger.important(f" ... {len(cache_path) - 10} more files ...")
189
+ else:
190
+ logger.important(f"{entity} is here: {cache_path}")
191
+ case _:
192
+ raise AssertionError(f"unknown entity {entity}")
@@ -1,4 +1,5 @@
1
1
  from __future__ import annotations
2
+
2
3
  import os
3
4
  from typing import Optional
4
5
 
@@ -34,9 +35,9 @@ def deploy():
34
35
  @click.option("--end-number", type=str, default=None)
35
36
  @click.option("--start-number", type=str, default=None)
36
37
  def squash(
37
- package_name: Optional[str],
38
- end_number: Optional[str],
39
- start_number: Optional[str],
38
+ package_name: str | None,
39
+ end_number: str | None,
40
+ start_number: str | None,
40
41
  ):
41
42
  """Squash migrations."""
42
43
  from lamindb_setup._migrate import migrate
@@ -1,8 +1,10 @@
1
1
  from __future__ import annotations
2
+
3
+ import re
4
+ import sys
2
5
  from pathlib import Path
3
- from typing import Union
6
+
4
7
  from lamin_utils import logger
5
- import re
6
8
 
7
9
 
8
10
  def parse_uid_from_code(content: str, suffix: str) -> str | None:
@@ -41,7 +43,7 @@ def parse_uid_from_code(content: str, suffix: str) -> str | None:
41
43
 
42
44
 
43
45
  def save_from_filepath_cli(
44
- filepath: Union[str, Path],
46
+ filepath: str | Path,
45
47
  key: str | None,
46
48
  description: str | None,
47
49
  registry: str | None,
@@ -57,13 +59,16 @@ def save_from_filepath_cli(
57
59
  ln_setup.settings.auto_connect = True
58
60
 
59
61
  import lamindb as ln
62
+
63
+ if not ln.setup.core.django.IS_SETUP:
64
+ sys.exit(-1)
60
65
  from lamindb._finish import save_context_core
61
66
 
62
67
  ln_setup.settings.auto_connect = auto_connect_state
63
68
 
64
69
  suffixes_transform = {
65
- "py": set([".py", ".ipynb"]),
66
- "R": set([".R", ".qmd", ".Rmd"]),
70
+ "py": {".py", ".ipynb"},
71
+ "R": {".R", ".qmd", ".Rmd"},
67
72
  }
68
73
 
69
74
  if filepath.suffix in {".qmd", ".Rmd"}:
@@ -80,8 +85,8 @@ def save_from_filepath_cli(
80
85
  and filepath.with_suffix(".nb.html").exists()
81
86
  ):
82
87
  raise SystemExit(
83
- f'Please delete one of\n - {filepath.with_suffix(".html")}\n -'
84
- f' {filepath.with_suffix(".nb.html")}'
88
+ f"Please delete one of\n - {filepath.with_suffix('.html')}\n -"
89
+ f" {filepath.with_suffix('.nb.html')}"
85
90
  )
86
91
 
87
92
  if registry is None:
@@ -1,4 +1,5 @@
1
1
  from __future__ import annotations
2
+
2
3
  import os
3
4
 
4
5
  if os.environ.get("NO_RICH"):
@@ -0,0 +1,114 @@
1
+ [build-system]
2
+ requires = ["flit_core >=3.2,<4"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [project]
6
+ name = "lamin_cli"
7
+ authors = [{name = "Lamin Labs", email = "open-source@lamin.ai"}]
8
+ readme = "README.md"
9
+ dynamic = ["version", "description"]
10
+ dependencies = [
11
+ "rich-click>=1.7",
12
+ ]
13
+
14
+ [project.urls]
15
+ Home = "https://github.com/laminlabs/lamin-cli"
16
+
17
+ [project.scripts]
18
+ lamin = "lamin_cli.__main__:main"
19
+
20
+ [tool.ruff]
21
+ src = ["src"]
22
+ line-length = 88
23
+ lint.select = [
24
+ "F", # Errors detected by Pyflakes
25
+ "E", # Error detected by Pycodestyle
26
+ "W", # Warning detected by Pycodestyle
27
+ "I", # isort
28
+ "D", # pydocstyle
29
+ "B", # flake8-bugbear
30
+ "TID", # flake8-tidy-imports
31
+ "C4", # flake8-comprehensions
32
+ "BLE", # flake8-blind-except
33
+ "UP", # pyupgrade
34
+ "RUF100", # Report unused noqa directives
35
+ "TCH", # Typing imports
36
+ "NPY", # Numpy specific rules
37
+ "PTH" # Use pathlib
38
+ ]
39
+ lint.ignore = [
40
+ # Do not catch blind exception: `Exception`
41
+ "BLE001",
42
+ # Errors from function calls in argument defaults. These are fine when the result is immutable.
43
+ "B008",
44
+ # line too long -> we accept long comment lines; black gets rid of long code lines
45
+ "E501",
46
+ # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
47
+ "E731",
48
+ # allow I, O, l as variable names -> I is the identity matrix
49
+ "E741",
50
+ # Missing docstring in public module
51
+ "D100",
52
+ # undocumented-public-class
53
+ "D101",
54
+ # Missing docstring in public method
55
+ "D102",
56
+ # Missing docstring in public function
57
+ "D103",
58
+ # Missing docstring in public package
59
+ "D104",
60
+ # __magic__ methods are are often self-explanatory, allow missing docstrings
61
+ "D105",
62
+ # Missing docstring in public nested class
63
+ "D106",
64
+ # Missing docstring in __init__
65
+ "D107",
66
+ ## Disable one in each pair of mutually incompatible rules
67
+ # We don’t want a blank line before a class docstring
68
+ "D203",
69
+ # 1 blank line required after class docstring
70
+ "D204",
71
+ # first line should end with a period [Bug: doesn't work with single-line docstrings]
72
+ # We want docstrings to start immediately after the opening triple quote
73
+ "D213",
74
+ # Section underline is over-indented ("{name}")
75
+ "D215",
76
+ # First line should end with a period
77
+ "D400",
78
+ # First line should be in imperative mood; try rephrasing
79
+ "D401",
80
+ # First word of the first line should be capitalized: {} -> {}
81
+ "D403",
82
+ # First word of the docstring should not be "This"
83
+ "D404",
84
+ # Section name should end with a newline ("{name}")
85
+ "D406",
86
+ # Missing dashed underline after section ("{name}")
87
+ "D407",
88
+ # Section underline should be in the line following the section's name ("{name}")
89
+ "D408",
90
+ # Section underline should match the length of its name ("{name}")
91
+ "D409",
92
+ # No blank lines allowed between a section header and its content ("{name}")
93
+ "D412",
94
+ # Missing blank line after last section ("{name}")
95
+ "D413",
96
+ # Missing argument description
97
+ "D417",
98
+ # Imports unused
99
+ "F401",
100
+ # camcelcase imported as lowercase
101
+ "N813",
102
+ # module import not at top level of file
103
+ "E402",
104
+ # open()` should be replaced by `Path.open()
105
+ "PTH123",
106
+ ]
107
+
108
+ [tool.ruff.lint.pydocstyle]
109
+ convention = "google"
110
+
111
+ [tool.ruff.lint.per-file-ignores]
112
+ "docs/*" = ["I", "B018", "B017"]
113
+ "tests/*" = ["D"]
114
+ "*/__init__.py" = ["F401"]
@@ -1,4 +1,4 @@
1
- import lamindb as ln # noqa
1
+ import lamindb as ln
2
2
 
3
3
  if __name__ == "__main__":
4
4
  print("hello!")
@@ -1,7 +1,7 @@
1
1
  import lamindb as ln
2
2
 
3
3
  ln.settings.sync_git_repo = "https://github.com/laminlabs/lamin-cli"
4
- ln.context.name = "My good script"
4
+ ln.context.description = "My good script"
5
5
  ln.track("m5uCHTTpJnjQ0000")
6
6
 
7
7
 
@@ -1,4 +1,5 @@
1
1
  import argparse
2
+
2
3
  import lamindb as ln
3
4
 
4
5
  if __name__ == "__main__":
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  from datetime import datetime, timedelta, timezone
3
+
3
4
  from lamindb_setup import settings
4
5
  from lamindb_setup.core._hub_client import connect_hub_with_auth
5
6
  from lamindb_setup.core._hub_core import create_api_key
@@ -1,12 +1,13 @@
1
- from lamin_cli._load import decompose_url
2
- from pathlib import Path
3
1
  import subprocess
2
+ from pathlib import Path
3
+
4
+ from lamin_cli._load import decompose_url
4
5
 
5
6
 
6
7
  def test_decompose_url():
7
8
  urls = [
8
- "https://lamin.ai/laminlabs/arrayloader-benchmarks/transform/1GCKs8zLtkc85zKv", # noqa
9
- "https://lamin.company.com/laminlabs/arrayloader-benchmarks/transform/1GCKs8zLtkc85zKv", # noqa
9
+ "https://lamin.ai/laminlabs/arrayloader-benchmarks/transform/1GCKs8zLtkc85zKv",
10
+ "https://lamin.company.com/laminlabs/arrayloader-benchmarks/transform/1GCKs8zLtkc85zKv",
10
11
  ]
11
12
  for url in urls:
12
13
  result = decompose_url(url)
@@ -23,7 +24,7 @@ def test_load_transform():
23
24
  result = subprocess.run(
24
25
  "lamin load"
25
26
  " 'https://lamin.ai/laminlabs/lamin-dev/transform/VFYCIuaw2GsX0000'"
26
- " --with-env", # noqa
27
+ " --with-env",
27
28
  shell=True,
28
29
  capture_output=True,
29
30
  )
@@ -59,10 +60,18 @@ def test_load_transform():
59
60
  path2.unlink()
60
61
 
61
62
 
62
- def test_load_artifact():
63
+ def test_get_load_artifact():
64
+ result = subprocess.run(
65
+ "lamin get"
66
+ " 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
67
+ shell=True,
68
+ capture_output=True,
69
+ )
70
+ assert result.returncode == 0
71
+
63
72
  result = subprocess.run(
64
73
  "lamin load"
65
- " 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'", # noqa
74
+ " 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
66
75
  shell=True,
67
76
  capture_output=True,
68
77
  )
@@ -75,3 +84,12 @@ def test_load_artifact():
75
84
  capture_output=True,
76
85
  )
77
86
  assert result.returncode == 0
87
+
88
+
89
+ def test_load_collection():
90
+ result = subprocess.run(
91
+ "lamin load 'https://lamin.ai/laminlabs/lamindata/collection/2wUs6V1OuGzp5Ll4'",
92
+ shell=True,
93
+ capture_output=True,
94
+ )
95
+ assert result.returncode == 0
@@ -112,6 +112,6 @@ def test_r_track_pattern():
112
112
  for suffix in suffixes:
113
113
  for content, expected_uid in valid_cases:
114
114
  uid = parse_uid_from_code(content, suffix)
115
- assert (
116
- uid == expected_uid
117
- ), f"Failed for valid content with {suffix}: {content}"
115
+ assert uid == expected_uid, (
116
+ f"Failed for valid content with {suffix}: {content}"
117
+ )
@@ -1,7 +1,8 @@
1
1
  import subprocess
2
- import lamindb_setup as ln_setup
3
2
  from pathlib import Path
4
3
 
4
+ import lamindb_setup as ln_setup
5
+
5
6
  test_file = Path(__file__).parent.parent.resolve() / ".gitignore"
6
7
 
7
8
 
@@ -1,12 +1,13 @@
1
+ import json
1
2
  import os
2
3
  import subprocess
3
4
  from pathlib import Path
5
+
6
+ import lamindb as ln
4
7
  import nbproject_test
5
8
  import pytest
6
- from nbproject.dev import read_notebook, write_notebook
7
9
  from nbclient.exceptions import CellExecutionError
8
- import json
9
- import lamindb as ln
10
+ from nbproject.dev import read_notebook, write_notebook
10
11
 
11
12
  notebook_dir = "./sub/lamin-cli/tests/notebooks/"
12
13
 
@@ -167,13 +168,13 @@ print("my consecutive cell")
167
168
  # get the the source code via command line
168
169
  result = subprocess.run(
169
170
  "yes | lamin load"
170
- f" https://lamin.ai/{ln.setup.settings.user.handle}/laminci-unit-tests/transform/hlsFXswrJjtt0000", # noqa
171
+ f" https://lamin.ai/{ln.setup.settings.user.handle}/laminci-unit-tests/transform/hlsFXswrJjtt0000",
171
172
  shell=True,
172
173
  capture_output=True,
173
174
  )
174
175
  # print(result.stderr.decode())
175
176
  assert Path("./with-title-and-initialized-consecutive.ipynb").exists()
176
- with open("./with-title-and-initialized-consecutive.ipynb", "r") as f:
177
+ with open("./with-title-and-initialized-consecutive.ipynb") as f:
177
178
  json_notebook = json.load(f)
178
179
  print(json_notebook["cells"][0])
179
180
  assert json_notebook["cells"][0]["source"] == ["# My test notebook (consecutive)"]
@@ -1,6 +1,7 @@
1
- from pathlib import Path
2
- import subprocess
3
1
  import os
2
+ import subprocess
3
+ from pathlib import Path
4
+
4
5
  import lamindb as ln
5
6
 
6
7
  scripts_dir = Path(__file__).parent.resolve() / "scripts"
@@ -1,10 +1,10 @@
1
- from pathlib import Path
2
- import subprocess
3
1
  import os
2
+ import subprocess
3
+ from pathlib import Path
4
+
4
5
  import lamindb as ln
5
6
  from lamindb_setup import settings
6
7
 
7
-
8
8
  scripts_dir = Path(__file__).parent.resolve() / "scripts"
9
9
 
10
10
 
@@ -45,17 +45,38 @@ def test_run_save_cache_with_git_and_uid():
45
45
  shell=True,
46
46
  capture_output=True,
47
47
  )
48
- # print(result.stdout.decode())
49
- # print(result.stderr.decode())
48
+ print(result.stdout.decode())
49
+ print(result.stderr.decode())
50
50
  assert result.returncode == 0
51
51
  assert "created Transform" in result.stdout.decode()
52
52
  assert "m5uCHTTp" in result.stdout.decode()
53
53
  assert "started new Run" in result.stdout.decode()
54
54
 
55
55
  transform = ln.Transform.get("m5uCHTTpJnjQ")
56
- assert transform.hash == "MoIciBQ0lpVPCKQGofPX6g"
56
+ assert transform.hash == "VC1oTPcaVSrzNrXUT9p4qw"
57
57
  assert transform.latest_run.environment.path.exists()
58
58
 
59
+ assert (
60
+ transform.source_code
61
+ == """import lamindb as ln
62
+
63
+ ln.settings.sync_git_repo = "https://github.com/laminlabs/lamin-cli"
64
+ ln.context.description = "My good script"
65
+ ln.track("m5uCHTTpJnjQ0000")
66
+
67
+
68
+ if __name__ == "__main__":
69
+ # we're using new_run here to mock the notebook situation
70
+ # and cover the look up of an existing run in the tests
71
+ # new_run = True is trivial
72
+ ln.track(new_run=False)
73
+
74
+ print("hello!")
75
+
76
+ ln.finish()
77
+ """
78
+ )
79
+
59
80
  # you can rerun the same script
60
81
  result = subprocess.run(
61
82
  f"python {filepath}",
@@ -160,7 +181,7 @@ def test_run_save_cache_with_git_and_uid():
160
181
  # try to get the the source code via command line
161
182
  result = subprocess.run(
162
183
  "yes | lamin load"
163
- f" https://lamin.ai/{settings.user.handle}/laminci-unit-tests/transform/m5uCHTTpJnjQ0000", # noqa
184
+ f" https://lamin.ai/{settings.user.handle}/laminci-unit-tests/transform/m5uCHTTpJnjQ0000",
164
185
  shell=True,
165
186
  capture_output=True,
166
187
  )
@@ -168,7 +189,7 @@ def test_run_save_cache_with_git_and_uid():
168
189
  assert result.returncode == 0
169
190
 
170
191
  result = subprocess.run(
171
- f"yes | lamin load transform --key {filepath.name}", # noqa
192
+ f"yes | lamin load transform --key {filepath.name}",
172
193
  shell=True,
173
194
  capture_output=True,
174
195
  )
@@ -1,64 +0,0 @@
1
- repos:
2
- - repo: https://github.com/pre-commit/pre-commit-hooks
3
- rev: v3.2.0
4
- hooks:
5
- - id: trailing-whitespace
6
- - id: end-of-file-fixer
7
- exclude: |
8
- (?x)(
9
- .github/workflows/latest-changes.jinja2
10
- )
11
- - id: check-yaml
12
- - id: check-added-large-files
13
- - repo: https://github.com/psf/black
14
- rev: 22.6.0
15
- hooks:
16
- - id: black-jupyter
17
- - repo: https://github.com/pycqa/flake8
18
- rev: 4.0.1
19
- hooks:
20
- - id: flake8
21
- additional_dependencies:
22
- - flake8-black==0.3.3
23
- - flake8-typing-imports==1.10.0
24
- language_version: python3
25
- args:
26
- - --max-line-length=120
27
- - --ignore=E203,W503,BLK100,TYP001
28
- exclude: |
29
- (?x)(
30
- __init__.py
31
- )
32
- - repo: https://github.com/pre-commit/mirrors-prettier
33
- rev: v2.6.2
34
- hooks:
35
- - id: prettier
36
- - repo: https://github.com/kynan/nbstripout
37
- rev: 0.3.9
38
- hooks:
39
- - id: nbstripout
40
- exclude: |
41
- (?x)(
42
- docs/examples/|
43
- docs/notes/|
44
- tests
45
- )
46
- - repo: https://github.com/Lucas-C/pre-commit-hooks
47
- rev: v1.1.9
48
- hooks:
49
- - id: forbid-crlf
50
- - id: remove-crlf
51
- - repo: https://github.com/pre-commit/mirrors-mypy
52
- rev: v0.940
53
- hooks:
54
- - id: mypy
55
- exclude: |
56
- (?x)(
57
- tests/hub-local/conftest.py
58
- )
59
- - repo: https://github.com/pycqa/pydocstyle
60
- rev: 6.1.1
61
- hooks:
62
- - id: pydocstyle
63
- args: # google style + __init__, see http://www.pydocstyle.org/en/stable/error_codes.html
64
- - --ignore=D100,D101,D102,D103,D104,D106,D107,D203,D204,D213,D215,D400,D401,D403,D404,D406,D407,D408,D409,D412,D413
@@ -1,3 +0,0 @@
1
- """Lamin CLI."""
2
-
3
- __version__ = "1.0.7"
@@ -1,164 +0,0 @@
1
- from __future__ import annotations
2
- from typing import Tuple
3
- from lamin_utils import logger
4
- import shutil
5
- import re
6
- from pathlib import Path
7
-
8
-
9
- def decompose_url(url: str) -> Tuple[str, str, str]:
10
- assert "transform" in url or "artifact" in url
11
- for entity in ["transform", "artifact"]:
12
- if entity in url:
13
- break
14
- uid = url.split(f"{entity}/")[1]
15
- instance_slug = "/".join(url.split("/")[3:5])
16
- return instance_slug, entity, uid
17
-
18
-
19
- def load(entity: str, uid: str = None, key: str = None, with_env: bool = False):
20
- import lamindb_setup as ln_setup
21
-
22
- if entity.startswith("https://") and "lamin" in entity:
23
- url = entity
24
- instance, entity, uid = decompose_url(url)
25
- elif entity not in {"artifact", "transform"}:
26
- raise SystemExit("Entity has to be a laminhub URL or 'artifact' or 'transform'")
27
- else:
28
- instance = ln_setup.settings.instance.slug
29
-
30
- ln_setup.connect(instance)
31
- import lamindb as ln
32
-
33
- def script_to_notebook(
34
- transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
35
- ) -> None:
36
- import jupytext
37
- from lamin_utils._base62 import increment_base62
38
-
39
- if notebook_path.suffix == ".ipynb":
40
- # below is backward compat
41
- if "# # transform.name" in transform.source_code:
42
- new_content = transform.source_code.replace(
43
- "# # transform.name", f"# # {transform.description}"
44
- )
45
- elif transform.source_code.startswith("# %% [markdown]\n#\n"):
46
- new_content = transform.source_code.replace(
47
- "# %% [markdown]\n#\n",
48
- f"# %% [markdown]\n# # {transform.description}\n",
49
- )
50
- else: # R notebook
51
- # Pattern to match title only within YAML header section
52
- title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
53
- title_match = re.search(
54
- title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
55
- )
56
- new_content = transform.source_code
57
- if title_match:
58
- current_title = title_match.group(1)
59
- if current_title != transform.description:
60
- pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
61
- replacement = f'\\1"{transform.description}"\\3'
62
- new_content = re.sub(
63
- pattern,
64
- replacement,
65
- new_content,
66
- flags=re.DOTALL | re.MULTILINE,
67
- )
68
- logger.important(
69
- f"updated title to match description: {current_title} →"
70
- f" {transform.description}"
71
- )
72
- if bump_revision:
73
- uid = transform.uid
74
- if uid in new_content:
75
- new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
76
- new_content = new_content.replace(uid, new_uid)
77
- logger.important(f"updated uid: {uid} → {new_uid}")
78
- if notebook_path.suffix == ".ipynb":
79
- notebook = jupytext.reads(new_content, fmt="py:percent")
80
- jupytext.write(notebook, notebook_path)
81
- else:
82
- notebook_path.write_text(new_content)
83
-
84
- query_by_uid = uid is not None
85
-
86
- if entity == "transform":
87
- if query_by_uid:
88
- # we don't use .get here because DoesNotExist is hard to catch
89
- # due to private django API
90
- # here full uid is not expected anymore as before
91
- # via ln.Transform.objects.get(uid=uid)
92
- transforms = ln.Transform.objects.filter(uid__startswith=uid)
93
- else:
94
- # if below, we take is_latest=True as the criterion, we might get draft notebooks
95
- # hence, we use source_code__isnull=False and order by created_at instead
96
- transforms = ln.Transform.objects.filter(key=key, source_code__isnull=False)
97
-
98
- if (n_transforms := len(transforms)) == 0:
99
- err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
100
- raise SystemExit(f"Transform with {err_msg} does not exist.")
101
-
102
- if n_transforms > 1:
103
- transforms = transforms.order_by("-created_at")
104
- transform = transforms.first()
105
-
106
- target_relpath = Path(transform.key)
107
- if len(target_relpath.parents) > 1:
108
- logger.important(
109
- "preserve the folder structure for versioning:"
110
- f" {target_relpath.parent}/"
111
- )
112
- target_relpath.parent.mkdir(parents=True, exist_ok=True)
113
- if target_relpath.exists():
114
- response = input(f"! {target_relpath} exists: replace? (y/n)")
115
- if response != "y":
116
- raise SystemExit("Aborted.")
117
-
118
- if transform.source_code is not None:
119
- if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
120
- script_to_notebook(transform, target_relpath, bump_revision=True)
121
- else:
122
- target_relpath.write_text(transform.source_code)
123
- else:
124
- raise SystemExit("No source code available for this transform.")
125
-
126
- logger.important(f"{transform.type} is here: {target_relpath}")
127
-
128
- if with_env:
129
- ln.settings.track_run_inputs = False
130
- if (
131
- transform.latest_run is not None
132
- and transform.latest_run.environment is not None
133
- ):
134
- filepath_env_cache = transform.latest_run.environment.cache()
135
- target_env_filename = (
136
- target_relpath.parent / f"{target_relpath.stem}__requirements.txt"
137
- )
138
- shutil.move(filepath_env_cache, target_env_filename)
139
- logger.important(f"environment is here: {target_env_filename}")
140
- else:
141
- logger.warning("latest transform run with environment doesn't exist")
142
-
143
- return target_relpath
144
- elif entity == "artifact":
145
- ln.settings.track_run_inputs = False
146
-
147
- if query_by_uid:
148
- # we don't use .get here because DoesNotExist is hard to catch
149
- # due to private django API
150
- artifacts = ln.Artifact.filter(uid__startswith=uid)
151
- else:
152
- artifacts = ln.Artifact.filter(key=key)
153
-
154
- if (n_artifacts := len(artifacts)) == 0:
155
- err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
156
- raise SystemExit(f"Artifact with {err_msg} does not exist.")
157
-
158
- if n_artifacts > 1:
159
- artifacts = artifacts.order_by("-created_at")
160
- artifact = artifacts.first()
161
-
162
- cache_path = artifact.cache()
163
- logger.important(f"artifact is here: {cache_path}")
164
- return cache_path
@@ -1,21 +0,0 @@
1
- [build-system]
2
- requires = ["flit_core >=3.2,<4"]
3
- build-backend = "flit_core.buildapi"
4
-
5
- [project]
6
- name = "lamin_cli"
7
- authors = [{name = "Lamin Labs", email = "open-source@lamin.ai"}]
8
- readme = "README.md"
9
- dynamic = ["version", "description"]
10
- dependencies = [
11
- "rich-click>=1.7",
12
- ]
13
-
14
- [project.urls]
15
- Home = "https://github.com/laminlabs/lamin-cli"
16
-
17
- [project.scripts]
18
- lamin = "lamin_cli.__main__:main"
19
-
20
- [tool.black]
21
- preview = true
File without changes
File without changes
File without changes
@@ -1,6 +1,6 @@
1
+ import subprocess
1
2
  from multiprocessing import Process
2
3
  from pathlib import Path
3
- import subprocess
4
4
 
5
5
  scripts_dir = Path(__file__).parent.resolve() / "scripts"
6
6