lamin_cli 1.1.0__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/.pre-commit-config.yaml +2 -2
  2. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/PKG-INFO +1 -1
  3. lamin_cli-1.2.0/lamin_cli/__init__.py +3 -0
  4. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/lamin_cli/__main__.py +21 -8
  5. lamin_cli-1.2.0/lamin_cli/_load.py +192 -0
  6. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/test_load.py +18 -1
  7. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/test_save_files.py +1 -1
  8. lamin_cli-1.1.0/lamin_cli/__init__.py +0 -3
  9. lamin_cli-1.1.0/lamin_cli/_load.py +0 -169
  10. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/.github/workflows/doc-changes.yml +0 -0
  11. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/.gitignore +0 -0
  12. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/LICENSE +0 -0
  13. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/README.md +0 -0
  14. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/lamin_cli/_cache.py +0 -0
  15. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/lamin_cli/_migration.py +0 -0
  16. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/lamin_cli/_save.py +0 -0
  17. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/lamin_cli/_settings.py +0 -0
  18. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/pyproject.toml +0 -0
  19. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/conftest.py +0 -0
  20. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/notebooks/not-initialized.ipynb +0 -0
  21. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/notebooks/with-title-and-initialized-consecutive.ipynb +0 -0
  22. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/notebooks/with-title-and-initialized-non-consecutive.ipynb +0 -0
  23. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/scripts/merely-import-lamindb.py +0 -0
  24. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/scripts/run-track-and-finish-sync-git.py +0 -0
  25. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/scripts/run-track-and-finish.py +0 -0
  26. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/scripts/run-track-with-params.py +0 -0
  27. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/scripts/run-track.R +0 -0
  28. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/scripts/run-track.qmd +0 -0
  29. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/test_cli.py +0 -0
  30. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/test_migrate.py +0 -0
  31. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/test_multi_process.py +0 -0
  32. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/test_parse_uid_from_code.py +0 -0
  33. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/test_save_notebooks.py +0 -0
  34. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/test_save_r_code.py +0 -0
  35. {lamin_cli-1.1.0 → lamin_cli-1.2.0}/tests/test_save_scripts.py +0 -0
@@ -16,8 +16,8 @@ repos:
16
16
  - id: ruff
17
17
  args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
18
18
  - id: ruff-format
19
- - repo: https://github.com/pre-commit/mirrors-prettier
20
- rev: v4.0.0-alpha.8
19
+ - repo: https://github.com/rbubley/mirrors-prettier
20
+ rev: v3.5.1
21
21
  hooks:
22
22
  - id: prettier
23
23
  - repo: https://github.com/kynan/nbstripout
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamin_cli
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: Lamin CLI.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Description-Content-Type: text/markdown
@@ -0,0 +1,3 @@
1
+ """Lamin CLI."""
2
+
3
+ __version__ = "1.2.0"
@@ -279,18 +279,31 @@ def load(entity: str, uid: str | None = None, key: str | None = None, with_env:
279
279
  @click.argument("entity", type=str)
280
280
  @click.option("--uid", help="The uid for the entity.")
281
281
  @click.option("--key", help="The key for the entity.")
282
- @click.option(
283
- "--with-env", is_flag=True, help="Also return the environment for a tranform."
284
- )
285
- def get(entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False):
282
+ def get(entity: str, uid: str | None = None, key: str | None = None):
286
283
  """Query metadata about an entity.
287
284
 
288
- Currently only works for artifact & transform and behaves like `lamin load`.
285
+ Currently only works for artifact.
289
286
  """
290
- from lamin_cli._load import load as load_
287
+ import lamindb_setup as ln_setup
288
+
289
+ from ._load import decompose_url
291
290
 
292
- click.echo(f"! to load a file or folder, please use: lamin load {entity}")
293
- return load_(entity, uid=uid, key=key, with_env=with_env)
291
+ if entity.startswith("https://") and "lamin" in entity:
292
+ url = entity
293
+ instance, entity, uid = decompose_url(url)
294
+ elif entity not in {"artifact"}:
295
+ raise SystemExit("Entity has to be a laminhub URL or 'artifact'")
296
+ else:
297
+ instance = ln_setup.settings.instance.slug
298
+
299
+ ln_setup.connect(instance)
300
+ import lamindb as ln
301
+
302
+ if uid is not None:
303
+ artifact = ln.Artifact.get(uid)
304
+ else:
305
+ artifact = ln.Artifact.get(key=key)
306
+ artifact.describe()
294
307
 
295
308
 
296
309
  @main.command()
@@ -0,0 +1,192 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+ from lamin_utils import logger
8
+
9
+
10
+ def decompose_url(url: str) -> tuple[str, str, str]:
11
+ assert any(keyword in url for keyword in ["transform", "artifact", "collection"])
12
+ for entity in ["transform", "artifact", "collection"]:
13
+ if entity in url:
14
+ break
15
+ uid = url.split(f"{entity}/")[1]
16
+ instance_slug = "/".join(url.split("/")[3:5])
17
+ return instance_slug, entity, uid
18
+
19
+
20
+ def load(
21
+ entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False
22
+ ):
23
+ import lamindb_setup as ln_setup
24
+
25
+ if entity.startswith("https://") and "lamin" in entity:
26
+ url = entity
27
+ instance, entity, uid = decompose_url(url)
28
+ elif entity not in {"artifact", "transform", "collection"}:
29
+ raise SystemExit(
30
+ "Entity has to be a laminhub URL or 'artifact', 'collection', or 'transform'"
31
+ )
32
+ else:
33
+ instance = ln_setup.settings.instance.slug
34
+
35
+ ln_setup.connect(instance)
36
+ import lamindb as ln
37
+
38
+ def script_to_notebook(
39
+ transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
40
+ ) -> None:
41
+ import jupytext
42
+ from lamin_utils._base62 import increment_base62
43
+
44
+ if notebook_path.suffix == ".ipynb":
45
+ # below is backward compat
46
+ if "# # transform.name" in transform.source_code:
47
+ new_content = transform.source_code.replace(
48
+ "# # transform.name", f"# # {transform.description}"
49
+ )
50
+ elif transform.source_code.startswith("# %% [markdown]"):
51
+ source_code_split = transform.source_code.split("\n")
52
+ if source_code_split[1] == "#":
53
+ source_code_split[1] = f"# # {transform.description}"
54
+ new_content = "\n".join(source_code_split)
55
+ else:
56
+ new_content = transform.source_code
57
+ else: # R notebook
58
+ # Pattern to match title only within YAML header section
59
+ title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
60
+ title_match = re.search(
61
+ title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
62
+ )
63
+ new_content = transform.source_code
64
+ if title_match:
65
+ current_title = title_match.group(1)
66
+ if current_title != transform.description:
67
+ pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
68
+ replacement = f'\\1"{transform.description}"\\3'
69
+ new_content = re.sub(
70
+ pattern,
71
+ replacement,
72
+ new_content,
73
+ flags=re.DOTALL | re.MULTILINE,
74
+ )
75
+ logger.important(
76
+ f"updated title to match description: {current_title} →"
77
+ f" {transform.description}"
78
+ )
79
+ if bump_revision:
80
+ uid = transform.uid
81
+ if uid in new_content:
82
+ new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
83
+ new_content = new_content.replace(uid, new_uid)
84
+ logger.important(f"updated uid: {uid} → {new_uid}")
85
+ if notebook_path.suffix == ".ipynb":
86
+ notebook = jupytext.reads(new_content, fmt="py:percent")
87
+ jupytext.write(notebook, notebook_path)
88
+ else:
89
+ notebook_path.write_text(new_content)
90
+
91
+ query_by_uid = uid is not None
92
+
93
+ match entity:
94
+ case "transform":
95
+ if query_by_uid:
96
+ # we don't use .get here because DoesNotExist is hard to catch
97
+ # due to private django API
98
+ # here full uid is not expected anymore as before
99
+ # via ln.Transform.objects.get(uid=uid)
100
+ transforms = ln.Transform.objects.filter(uid__startswith=uid)
101
+ else:
102
+ # if below, we take is_latest=True as the criterion, we might get draft notebooks
103
+ # hence, we use source_code__isnull=False and order by created_at instead
104
+ transforms = ln.Transform.objects.filter(
105
+ key=key, source_code__isnull=False
106
+ )
107
+
108
+ if (n_transforms := len(transforms)) == 0:
109
+ err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
110
+ raise SystemExit(f"Transform with {err_msg} does not exist.")
111
+
112
+ if n_transforms > 1:
113
+ transforms = transforms.order_by("-created_at")
114
+ transform = transforms.first()
115
+
116
+ target_relpath = Path(transform.key)
117
+ if len(target_relpath.parents) > 1:
118
+ logger.important(
119
+ "preserve the folder structure for versioning:"
120
+ f" {target_relpath.parent}/"
121
+ )
122
+ target_relpath.parent.mkdir(parents=True, exist_ok=True)
123
+ if target_relpath.exists():
124
+ response = input(f"! {target_relpath} exists: replace? (y/n)")
125
+ if response != "y":
126
+ raise SystemExit("Aborted.")
127
+
128
+ if transform.source_code is not None:
129
+ if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
130
+ script_to_notebook(transform, target_relpath, bump_revision=True)
131
+ else:
132
+ target_relpath.write_text(transform.source_code)
133
+ else:
134
+ raise SystemExit("No source code available for this transform.")
135
+
136
+ logger.important(f"{transform.type} is here: {target_relpath}")
137
+
138
+ if with_env:
139
+ ln.settings.track_run_inputs = False
140
+ if (
141
+ transform.latest_run is not None
142
+ and transform.latest_run.environment is not None
143
+ ):
144
+ filepath_env_cache = transform.latest_run.environment.cache()
145
+ target_env_filename = (
146
+ target_relpath.parent
147
+ / f"{target_relpath.stem}__requirements.txt"
148
+ )
149
+ shutil.move(filepath_env_cache, target_env_filename)
150
+ logger.important(f"environment is here: {target_env_filename}")
151
+ else:
152
+ logger.warning(
153
+ "latest transform run with environment doesn't exist"
154
+ )
155
+
156
+ return target_relpath
157
+ case "artifact" | "collection":
158
+ ln.settings.track_run_inputs = False
159
+
160
+ EntityClass = ln.Artifact if entity == "artifact" else ln.Collection
161
+
162
+ # we don't use .get here because DoesNotExist is hard to catch
163
+ # due to private django API
164
+ if query_by_uid:
165
+ entities = EntityClass.filter(uid__startswith=uid)
166
+ else:
167
+ entities = EntityClass.filter(key=key)
168
+
169
+ if (n_entities := len(entities)) == 0:
170
+ err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
171
+ raise SystemExit(
172
+ f"{entity.capitalize()} with {err_msg} does not exist."
173
+ )
174
+
175
+ if n_entities > 1:
176
+ entities = entities.order_by("-created_at")
177
+
178
+ entity_obj = entities.first()
179
+ cache_path = entity_obj.cache()
180
+
181
+ # collection gives us a list of paths
182
+ if isinstance(cache_path, list):
183
+ logger.important(f"{entity} paths ({len(cache_path)} files):")
184
+ for i, path in enumerate(cache_path):
185
+ if i < 5 or i >= len(cache_path) - 5:
186
+ logger.important(f" [{i + 1}/{len(cache_path)}] {path}")
187
+ elif i == 5:
188
+ logger.important(f" ... {len(cache_path) - 10} more files ...")
189
+ else:
190
+ logger.important(f"{entity} is here: {cache_path}")
191
+ case _:
192
+ raise AssertionError(f"unknown entity {entity}")
@@ -60,7 +60,15 @@ def test_load_transform():
60
60
  path2.unlink()
61
61
 
62
62
 
63
- def test_load_artifact():
63
+ def test_get_load_artifact():
64
+ result = subprocess.run(
65
+ "lamin get"
66
+ " 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
67
+ shell=True,
68
+ capture_output=True,
69
+ )
70
+ assert result.returncode == 0
71
+
64
72
  result = subprocess.run(
65
73
  "lamin load"
66
74
  " 'https://lamin.ai/laminlabs/lamin-site-assets/artifact/e2G7k9EVul4JbfsEYAy5'",
@@ -76,3 +84,12 @@ def test_load_artifact():
76
84
  capture_output=True,
77
85
  )
78
86
  assert result.returncode == 0
87
+
88
+
89
+ def test_load_collection():
90
+ result = subprocess.run(
91
+ "lamin load 'https://lamin.ai/laminlabs/lamindata/collection/2wUs6V1OuGzp5Ll4'",
92
+ shell=True,
93
+ capture_output=True,
94
+ )
95
+ assert result.returncode == 0
@@ -44,7 +44,7 @@ def test_save_file():
44
44
  )
45
45
  print(result.stdout.decode())
46
46
  print(result.stderr.decode())
47
- assert "found artifact with same hash" in result.stdout.decode()
47
+ assert "returning existing artifact with same hash" in result.stdout.decode()
48
48
  assert "key='mytest'" in result.stdout.decode()
49
49
  assert "storage path:" in result.stdout.decode()
50
50
  assert result.returncode == 0
@@ -1,3 +0,0 @@
1
- """Lamin CLI."""
2
-
3
- __version__ = "1.1.0"
@@ -1,169 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import re
4
- import shutil
5
- from pathlib import Path
6
-
7
- from lamin_utils import logger
8
-
9
-
10
- def decompose_url(url: str) -> tuple[str, str, str]:
11
- assert "transform" in url or "artifact" in url
12
- for entity in ["transform", "artifact"]:
13
- if entity in url:
14
- break
15
- uid = url.split(f"{entity}/")[1]
16
- instance_slug = "/".join(url.split("/")[3:5])
17
- return instance_slug, entity, uid
18
-
19
-
20
- def load(
21
- entity: str, uid: str | None = None, key: str | None = None, with_env: bool = False
22
- ):
23
- import lamindb_setup as ln_setup
24
-
25
- if entity.startswith("https://") and "lamin" in entity:
26
- url = entity
27
- instance, entity, uid = decompose_url(url)
28
- elif entity not in {"artifact", "transform"}:
29
- raise SystemExit("Entity has to be a laminhub URL or 'artifact' or 'transform'")
30
- else:
31
- instance = ln_setup.settings.instance.slug
32
-
33
- ln_setup.connect(instance)
34
- import lamindb as ln
35
-
36
- def script_to_notebook(
37
- transform: ln.Transform, notebook_path: Path, bump_revision: bool = False
38
- ) -> None:
39
- import jupytext
40
- from lamin_utils._base62 import increment_base62
41
-
42
- if notebook_path.suffix == ".ipynb":
43
- # below is backward compat
44
- if "# # transform.name" in transform.source_code:
45
- new_content = transform.source_code.replace(
46
- "# # transform.name", f"# # {transform.description}"
47
- )
48
- elif transform.source_code.startswith("# %% [markdown]"):
49
- source_code_split = transform.source_code.split("\n")
50
- if source_code_split[1] == "#":
51
- source_code_split[1] = f"# # {transform.description}"
52
- new_content = "\n".join(source_code_split)
53
- else:
54
- new_content = transform.source_code
55
- else: # R notebook
56
- # Pattern to match title only within YAML header section
57
- title_pattern = r'^---\n.*?title:\s*"([^"]*)".*?---'
58
- title_match = re.search(
59
- title_pattern, transform.source_code, flags=re.DOTALL | re.MULTILINE
60
- )
61
- new_content = transform.source_code
62
- if title_match:
63
- current_title = title_match.group(1)
64
- if current_title != transform.description:
65
- pattern = r'^(---\n.*?title:\s*)"([^"]*)"(.*?---)'
66
- replacement = f'\\1"{transform.description}"\\3'
67
- new_content = re.sub(
68
- pattern,
69
- replacement,
70
- new_content,
71
- flags=re.DOTALL | re.MULTILINE,
72
- )
73
- logger.important(
74
- f"updated title to match description: {current_title} →"
75
- f" {transform.description}"
76
- )
77
- if bump_revision:
78
- uid = transform.uid
79
- if uid in new_content:
80
- new_uid = f"{uid[:-4]}{increment_base62(uid[-4:])}"
81
- new_content = new_content.replace(uid, new_uid)
82
- logger.important(f"updated uid: {uid} → {new_uid}")
83
- if notebook_path.suffix == ".ipynb":
84
- notebook = jupytext.reads(new_content, fmt="py:percent")
85
- jupytext.write(notebook, notebook_path)
86
- else:
87
- notebook_path.write_text(new_content)
88
-
89
- query_by_uid = uid is not None
90
-
91
- if entity == "transform":
92
- if query_by_uid:
93
- # we don't use .get here because DoesNotExist is hard to catch
94
- # due to private django API
95
- # here full uid is not expected anymore as before
96
- # via ln.Transform.objects.get(uid=uid)
97
- transforms = ln.Transform.objects.filter(uid__startswith=uid)
98
- else:
99
- # if below, we take is_latest=True as the criterion, we might get draft notebooks
100
- # hence, we use source_code__isnull=False and order by created_at instead
101
- transforms = ln.Transform.objects.filter(key=key, source_code__isnull=False)
102
-
103
- if (n_transforms := len(transforms)) == 0:
104
- err_msg = f"uid {uid}" if query_by_uid else f"key={key} and source_code"
105
- raise SystemExit(f"Transform with {err_msg} does not exist.")
106
-
107
- if n_transforms > 1:
108
- transforms = transforms.order_by("-created_at")
109
- transform = transforms.first()
110
-
111
- target_relpath = Path(transform.key)
112
- if len(target_relpath.parents) > 1:
113
- logger.important(
114
- "preserve the folder structure for versioning:"
115
- f" {target_relpath.parent}/"
116
- )
117
- target_relpath.parent.mkdir(parents=True, exist_ok=True)
118
- if target_relpath.exists():
119
- response = input(f"! {target_relpath} exists: replace? (y/n)")
120
- if response != "y":
121
- raise SystemExit("Aborted.")
122
-
123
- if transform.source_code is not None:
124
- if target_relpath.suffix in (".ipynb", ".Rmd", ".qmd"):
125
- script_to_notebook(transform, target_relpath, bump_revision=True)
126
- else:
127
- target_relpath.write_text(transform.source_code)
128
- else:
129
- raise SystemExit("No source code available for this transform.")
130
-
131
- logger.important(f"{transform.type} is here: {target_relpath}")
132
-
133
- if with_env:
134
- ln.settings.track_run_inputs = False
135
- if (
136
- transform.latest_run is not None
137
- and transform.latest_run.environment is not None
138
- ):
139
- filepath_env_cache = transform.latest_run.environment.cache()
140
- target_env_filename = (
141
- target_relpath.parent / f"{target_relpath.stem}__requirements.txt"
142
- )
143
- shutil.move(filepath_env_cache, target_env_filename)
144
- logger.important(f"environment is here: {target_env_filename}")
145
- else:
146
- logger.warning("latest transform run with environment doesn't exist")
147
-
148
- return target_relpath
149
- elif entity == "artifact":
150
- ln.settings.track_run_inputs = False
151
-
152
- if query_by_uid:
153
- # we don't use .get here because DoesNotExist is hard to catch
154
- # due to private django API
155
- artifacts = ln.Artifact.filter(uid__startswith=uid)
156
- else:
157
- artifacts = ln.Artifact.filter(key=key)
158
-
159
- if (n_artifacts := len(artifacts)) == 0:
160
- err_msg = f"uid={uid}" if query_by_uid else f"key={key}"
161
- raise SystemExit(f"Artifact with {err_msg} does not exist.")
162
-
163
- if n_artifacts > 1:
164
- artifacts = artifacts.order_by("-created_at")
165
- artifact = artifacts.first()
166
-
167
- cache_path = artifact.cache()
168
- logger.important(f"artifact is here: {cache_path}")
169
- return cache_path
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes