lamindb 0.77.2__py3-none-any.whl → 1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lamindb/__init__.py +39 -32
  2. lamindb/_artifact.py +95 -64
  3. lamindb/_can_curate.py +19 -10
  4. lamindb/_collection.py +51 -49
  5. lamindb/_feature.py +9 -9
  6. lamindb/_finish.py +99 -86
  7. lamindb/_from_values.py +20 -17
  8. lamindb/_is_versioned.py +2 -1
  9. lamindb/_parents.py +23 -16
  10. lamindb/_query_manager.py +3 -3
  11. lamindb/_query_set.py +85 -18
  12. lamindb/_record.py +121 -46
  13. lamindb/_run.py +3 -3
  14. lamindb/_save.py +14 -8
  15. lamindb/{_feature_set.py → _schema.py} +34 -31
  16. lamindb/_storage.py +2 -1
  17. lamindb/_transform.py +51 -23
  18. lamindb/_ulabel.py +17 -8
  19. lamindb/_view.py +15 -14
  20. lamindb/base/__init__.py +24 -0
  21. lamindb/base/fields.py +281 -0
  22. lamindb/base/ids.py +103 -0
  23. lamindb/base/types.py +51 -0
  24. lamindb/base/users.py +30 -0
  25. lamindb/base/validation.py +67 -0
  26. lamindb/core/__init__.py +19 -14
  27. lamindb/core/_context.py +297 -228
  28. lamindb/core/_data.py +44 -49
  29. lamindb/core/_describe.py +41 -31
  30. lamindb/core/_django.py +59 -44
  31. lamindb/core/_feature_manager.py +192 -168
  32. lamindb/core/_label_manager.py +22 -22
  33. lamindb/core/_mapped_collection.py +17 -14
  34. lamindb/core/_settings.py +1 -12
  35. lamindb/core/_sync_git.py +56 -9
  36. lamindb/core/_track_environment.py +1 -1
  37. lamindb/core/datasets/_core.py +5 -6
  38. lamindb/core/exceptions.py +0 -7
  39. lamindb/core/fields.py +1 -1
  40. lamindb/core/loaders.py +18 -2
  41. lamindb/core/{schema.py → relations.py} +22 -19
  42. lamindb/core/storage/_anndata_accessor.py +1 -2
  43. lamindb/core/storage/_backed_access.py +2 -1
  44. lamindb/core/storage/_tiledbsoma.py +40 -13
  45. lamindb/core/storage/objects.py +1 -1
  46. lamindb/core/storage/paths.py +13 -8
  47. lamindb/core/subsettings/__init__.py +0 -2
  48. lamindb/core/types.py +2 -23
  49. lamindb/core/versioning.py +11 -7
  50. lamindb/{_curate.py → curators/__init__.py} +700 -57
  51. lamindb/curators/_spatial.py +528 -0
  52. lamindb/integrations/_vitessce.py +1 -3
  53. lamindb/migrations/0052_squashed.py +1261 -0
  54. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +57 -0
  55. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +35 -0
  56. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +61 -0
  57. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +22 -0
  58. lamindb/migrations/0057_link_models_latest_report_and_others.py +356 -0
  59. lamindb/migrations/0058_artifact__actions_collection__actions.py +22 -0
  60. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +31 -0
  61. lamindb/migrations/0060_alter_artifact__actions.py +22 -0
  62. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +45 -0
  63. lamindb/migrations/0062_add_is_latest_field.py +32 -0
  64. lamindb/migrations/0063_populate_latest_field.py +45 -0
  65. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +33 -0
  66. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +22 -0
  67. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +352 -0
  68. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +20 -0
  69. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +20 -0
  70. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +1294 -0
  71. lamindb/migrations/0069_squashed.py +1770 -0
  72. lamindb/migrations/0070_lamindbv1_migrate_data.py +78 -0
  73. lamindb/migrations/0071_lamindbv1_migrate_schema.py +741 -0
  74. lamindb/migrations/0072_remove_user__branch_code_remove_user_aux_and_more.py +148 -0
  75. lamindb/migrations/0073_merge_ourprojects.py +945 -0
  76. lamindb/migrations/0074_lamindbv1_part4.py +374 -0
  77. lamindb/migrations/0075_lamindbv1_part5.py +276 -0
  78. lamindb/migrations/0076_lamindbv1_part6.py +621 -0
  79. lamindb/migrations/0077_lamindbv1_part6b.py +228 -0
  80. lamindb/migrations/0078_lamindbv1_part6c.py +468 -0
  81. lamindb/migrations/0079_alter_rundata_value_json_and_more.py +36 -0
  82. lamindb/migrations/__init__.py +0 -0
  83. lamindb/models.py +4064 -0
  84. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/METADATA +15 -20
  85. lamindb-1.0rc1.dist-info/RECORD +100 -0
  86. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/WHEEL +1 -1
  87. lamindb/core/subsettings/_transform_settings.py +0 -21
  88. lamindb-0.77.2.dist-info/RECORD +0 -63
  89. {lamindb-0.77.2.dist-info → lamindb-1.0rc1.dist-info}/LICENSE +0 -0
lamindb/_collection.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import warnings
3
4
  from collections import defaultdict
4
5
  from typing import (
5
6
  TYPE_CHECKING,
@@ -13,28 +14,27 @@ import pandas as pd
13
14
  from lamin_utils import logger
14
15
  from lamindb_setup.core._docs import doc_args
15
16
  from lamindb_setup.core.hashing import hash_set
16
- from lnschema_core.models import (
17
+
18
+ from lamindb.models import (
17
19
  Collection,
18
20
  CollectionArtifact,
19
- FeatureSet,
21
+ Schema,
20
22
  )
21
- from lnschema_core.types import VisibilityChoice
22
23
 
23
- from . import Artifact, Run
24
24
  from ._parents import view_lineage
25
25
  from ._record import init_self_from_db, update_attributes
26
26
  from ._utils import attach_func_to_class_method
27
27
  from .core._data import (
28
28
  _track_run_input,
29
- add_transform_to_kwargs,
30
29
  describe,
31
30
  get_run,
32
- save_feature_set_links,
33
- save_feature_sets,
31
+ save_schema_links,
32
+ save_staged__schemas_m2m,
34
33
  )
35
34
  from .core._mapped_collection import MappedCollection
36
35
  from .core._settings import settings
37
36
  from .core.versioning import process_revises
37
+ from .models import Artifact, Run
38
38
 
39
39
  if TYPE_CHECKING:
40
40
  from collections.abc import Iterable
@@ -50,31 +50,31 @@ class CollectionFeatureManager:
50
50
  def __init__(self, collection: Collection):
51
51
  self._collection = collection
52
52
 
53
- def get_feature_sets_union(self) -> dict[str, FeatureSet]:
54
- links_feature_set_artifact = Artifact.feature_sets.through.objects.filter(
53
+ def _get_staged__schemas_m2m_union(self) -> dict[str, Schema]:
54
+ links_schema_artifact = Artifact._schemas_m2m.through.objects.filter(
55
55
  artifact_id__in=self._collection.artifacts.values_list("id", flat=True)
56
56
  )
57
- feature_sets_by_slots = defaultdict(list)
58
- for link in links_feature_set_artifact:
59
- feature_sets_by_slots[link.slot].append(link.featureset_id)
60
- feature_sets_union = {}
61
- for slot, feature_set_ids_slot in feature_sets_by_slots.items():
62
- feature_set_1 = FeatureSet.get(id=feature_set_ids_slot[0])
63
- related_name = feature_set_1._get_related_name()
64
- features_registry = getattr(FeatureSet, related_name).field.model
57
+ _schemas_m2m_by_slots = defaultdict(list)
58
+ for link in links_schema_artifact:
59
+ _schemas_m2m_by_slots[link.slot].append(link.schema_id)
60
+ _schemas_m2m_union = {}
61
+ for slot, schema_ids_slot in _schemas_m2m_by_slots.items():
62
+ schema_1 = Schema.get(id=schema_ids_slot[0])
63
+ related_name = schema_1._get_related_name()
64
+ features_registry = getattr(Schema, related_name).field.model
65
65
  # this way of writing the __in statement turned out to be the fastest
66
66
  # evaluated on a link table with 16M entries connecting 500 feature sets with
67
67
  # 60k genes
68
68
  feature_ids = (
69
- features_registry.feature_sets.through.objects.filter(
70
- featureset_id__in=feature_set_ids_slot
69
+ features_registry.schemas.through.objects.filter(
70
+ schema_id__in=schema_ids_slot
71
71
  )
72
72
  .values(f"{features_registry.__name__.lower()}_id")
73
73
  .distinct()
74
74
  )
75
75
  features = features_registry.filter(id__in=feature_ids)
76
- feature_sets_union[slot] = FeatureSet(features, dtype=feature_set_1.dtype)
77
- return feature_sets_union
76
+ _schemas_m2m_union[slot] = Schema(features, dtype=schema_1.dtype)
77
+ return _schemas_m2m_union
78
78
 
79
79
 
80
80
  def __init__(
@@ -95,7 +95,7 @@ def __init__(
95
95
  meta_artifact: Artifact | None = (
96
96
  kwargs.pop("meta_artifact") if "meta_artifact" in kwargs else None
97
97
  )
98
- name: str | None = kwargs.pop("name") if "name" in kwargs else None
98
+ key: str | None = kwargs.pop("key") if "key" in kwargs else None
99
99
  description: str | None = (
100
100
  kwargs.pop("description") if "description" in kwargs else None
101
101
  )
@@ -106,27 +106,29 @@ def __init__(
106
106
  run: Run | None = kwargs.pop("run") if "run" in kwargs else None
107
107
  revises: Collection | None = kwargs.pop("revises") if "revises" in kwargs else None
108
108
  version: str | None = kwargs.pop("version") if "version" in kwargs else None
109
- visibility: int | None = (
110
- kwargs.pop("visibility")
111
- if "visibility" in kwargs
112
- else VisibilityChoice.default.value
109
+ _branch_code: int | None = (
110
+ kwargs.pop("_branch_code") if "_branch_code" in kwargs else 1
113
111
  )
114
- if "is_new_version_of" in kwargs:
115
- logger.warning("`is_new_version_of` will be removed soon, please use `revises`")
116
- revises = kwargs.pop("is_new_version_of")
112
+ if "name" in kwargs:
113
+ key = kwargs.pop("name")
114
+ warnings.warn(
115
+ f"argument `name` will be removed, please pass {key} to `key` instead",
116
+ FutureWarning,
117
+ stacklevel=2,
118
+ )
117
119
  if not len(kwargs) == 0:
118
120
  raise ValueError(
119
- f"Only artifacts, name, run, description, reference, reference_type, visibility can be passed, you passed: {kwargs}"
121
+ f"Only artifacts, key, run, description, reference, reference_type can be passed, you passed: {kwargs}"
120
122
  )
121
- provisional_uid, version, name, revises = process_revises(
122
- revises, version, name, Collection
123
+ provisional_uid, version, key, description, revises = process_revises(
124
+ revises, version, key, description, Collection
123
125
  )
124
126
  run = get_run(run)
125
127
  if isinstance(artifacts, Artifact):
126
128
  artifacts = [artifacts]
127
129
  else:
128
130
  if not hasattr(artifacts, "__getitem__"):
129
- raise ValueError("Artifact or List[Artifact] is allowed.")
131
+ raise ValueError("Artifact or list[Artifact] is allowed.")
130
132
  assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
131
133
  hash = from_artifacts(artifacts) # type: ignore
132
134
  if meta_artifact is not None:
@@ -144,7 +146,7 @@ def __init__(
144
146
  existing_collection = None
145
147
  if existing_collection is not None:
146
148
  logger.warning(
147
- f"returning existing collection with same hash: {existing_collection}"
149
+ f"returning existing collection with same hash: {existing_collection}; if you intended to query to track this collection as an input, use: ln.Collection.get()"
148
150
  )
149
151
  # update the run of the existing collection
150
152
  if run is not None:
@@ -157,18 +159,16 @@ def __init__(
157
159
  )
158
160
  # update the run of the collection with the latest run
159
161
  existing_collection.run = run
160
- existing_collection.transform = run.transform
161
162
  init_self_from_db(collection, existing_collection)
162
- update_attributes(collection, {"description": description, "name": name})
163
+ update_attributes(collection, {"description": description, "key": key})
163
164
  else:
164
165
  kwargs = {}
165
- add_transform_to_kwargs(kwargs, run)
166
166
  search_names_setting = settings.creation.search_names
167
- if revises is not None and name == revises.name:
167
+ if revises is not None and key == revises.key:
168
168
  settings.creation.search_names = False
169
169
  super(Collection, collection).__init__(
170
170
  uid=provisional_uid,
171
- name=name,
171
+ key=key,
172
172
  description=description,
173
173
  reference=reference,
174
174
  reference_type=reference_type,
@@ -176,7 +176,7 @@ def __init__(
176
176
  hash=hash,
177
177
  run=run,
178
178
  version=version,
179
- visibility=visibility,
179
+ _branch_code=_branch_code,
180
180
  revises=revises,
181
181
  **kwargs,
182
182
  )
@@ -224,7 +224,7 @@ def mapped(
224
224
  layers_keys: str | list[str] | None = None,
225
225
  obs_keys: str | list[str] | None = None,
226
226
  obsm_keys: str | list[str] | None = None,
227
- obs_filter: tuple[str, str | tuple[str, ...]] | None = None,
227
+ obs_filter: dict[str, str | tuple[str, ...]] | None = None,
228
228
  join: Literal["inner", "outer"] | None = "inner",
229
229
  encode_labels: bool | list[str] = True,
230
230
  unknown_label: str | dict[str, str] | None = None,
@@ -306,12 +306,14 @@ def load(
306
306
 
307
307
  # docstring handled through attach_func_to_class_method
308
308
  def delete(self, permanent: bool | None = None) -> None:
309
- # change visibility to trash
310
- trash_visibility = VisibilityChoice.trash.value
311
- if self.visibility > trash_visibility and permanent is not True:
312
- self.visibility = trash_visibility
309
+ # change _branch_code to trash
310
+ trash__branch_code = -1
311
+ if self._branch_code > trash__branch_code and permanent is not True:
312
+ self._branch_code = trash__branch_code
313
313
  self.save()
314
- logger.warning(f"moved collection to trash (visibility = {trash_visibility})")
314
+ logger.warning(
315
+ f"moved collection to trash (_branch_code = {trash__branch_code})"
316
+ )
315
317
  return
316
318
 
317
319
  # permanent delete
@@ -333,7 +335,7 @@ def save(self, using: str | None = None) -> Collection:
333
335
  if self.meta_artifact is not None:
334
336
  self.meta_artifact.save()
335
337
  # we don't need to save feature sets again
336
- save_feature_sets(self)
338
+ save_staged__schemas_m2m(self)
337
339
  super(Collection, self).save()
338
340
  # we don't allow updating the collection of artifacts
339
341
  # if users want to update the set of artifacts, they
@@ -348,7 +350,7 @@ def save(self, using: str | None = None) -> Collection:
348
350
  # merely using .artifacts.set(*...) doesn't achieve this
349
351
  # we need ignore_conflicts=True so that this won't error if links already exist
350
352
  CollectionArtifact.objects.bulk_create(links, ignore_conflicts=True)
351
- save_feature_set_links(self)
353
+ save_schema_links(self)
352
354
  if using is not None:
353
355
  logger.warning("using argument is ignored")
354
356
  return self
@@ -356,7 +358,7 @@ def save(self, using: str | None = None) -> Collection:
356
358
 
357
359
  # docstring handled through attach_func_to_class_method
358
360
  def restore(self) -> None:
359
- self.visibility = VisibilityChoice.default.value
361
+ self._branch_code = 1
360
362
  self.save()
361
363
 
362
364
 
lamindb/_feature.py CHANGED
@@ -1,28 +1,28 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING, Any, Literal, get_args
3
+ from typing import TYPE_CHECKING, Any, get_args
4
4
 
5
5
  import lamindb_setup as ln_setup
6
6
  import pandas as pd
7
7
  from lamin_utils import logger
8
8
  from lamindb_setup.core._docs import doc_args
9
- from lnschema_core.models import Artifact, Feature, Record
10
- from lnschema_core.types import FeatureDtype
11
9
  from pandas.api.types import CategoricalDtype, is_string_dtype
12
10
 
11
+ from lamindb.base.types import FeatureDtype
13
12
  from lamindb.core.exceptions import ValidationError
13
+ from lamindb.models import Artifact, Feature, Record
14
14
 
15
15
  from ._query_set import RecordList
16
16
  from ._utils import attach_func_to_class_method
17
- from .core._settings import settings
18
- from .core.schema import dict_schema_name_to_model_name
17
+ from .core.relations import dict_module_name_to_model_name
19
18
 
20
19
  if TYPE_CHECKING:
21
20
  from collections.abc import Iterable
22
21
 
23
- from lnschema_core.types import FieldAttr
24
22
  from pandas.core.dtypes.base import ExtensionDtype
25
23
 
24
+ from lamindb.base.types import FieldAttr
25
+
26
26
 
27
27
  FEATURE_DTYPES = set(get_args(FeatureDtype))
28
28
 
@@ -38,9 +38,9 @@ def get_dtype_str_from_dtype(dtype: Any) -> str:
38
38
  raise ValueError(error_message)
39
39
  registries_str = ""
40
40
  for registry in dtype:
41
- if not hasattr(registry, "__get_name_with_schema__"):
41
+ if not hasattr(registry, "__get_name_with_module__"):
42
42
  raise ValueError(error_message)
43
- registries_str += registry.__get_name_with_schema__() + "|"
43
+ registries_str += registry.__get_name_with_module__() + "|"
44
44
  dtype_str = f'cat[{registries_str.rstrip("|")}]'
45
45
  return dtype_str
46
46
 
@@ -89,7 +89,7 @@ def __init__(self, *args, **kwargs):
89
89
  if registries_str != "":
90
90
  registry_str_list = registries_str.split("|")
91
91
  for registry_str in registry_str_list:
92
- if registry_str not in dict_schema_name_to_model_name(Artifact):
92
+ if registry_str not in dict_module_name_to_model_name(Artifact):
93
93
  raise ValueError(
94
94
  f"'{registry_str}' is an invalid dtype, pass, e.g. `[ln.ULabel, bt.CellType]` or similar"
95
95
  )
lamindb/_finish.py CHANGED
@@ -9,17 +9,14 @@ from lamin_utils import logger
9
9
  from lamindb_setup.core.hashing import hash_file
10
10
 
11
11
  from lamindb.core.exceptions import NotebookNotSaved
12
+ from lamindb.models import Artifact, Run, Transform
12
13
 
13
14
  if TYPE_CHECKING:
14
15
  from pathlib import Path
15
16
 
16
- from lnschema_core import Run, Transform
17
17
 
18
- from ._query_set import QuerySet
19
-
20
-
21
- def get_r_save_notebook_message() -> str:
22
- return f"Please save the notebook in RStudio (shortcut `{get_shortcut()}`) within 2 sec before calling `db$finish()`"
18
+ def get_save_notebook_message() -> str:
19
+ return f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `finish()`"
23
20
 
24
21
 
25
22
  def get_shortcut() -> str:
@@ -32,13 +29,29 @@ def get_seconds_since_modified(filepath) -> float:
32
29
  return datetime.now().timestamp() - filepath.stat().st_mtime
33
30
 
34
31
 
32
+ def save_run_logs(run: Run, save_run: bool = False) -> None:
33
+ logs_path = ln_setup.settings.cache_dir / f"run_logs_{run.uid}.txt"
34
+ if logs_path.exists():
35
+ if run.report is not None:
36
+ logger.important("overwriting run.report")
37
+ artifact = Artifact(
38
+ logs_path,
39
+ description=f"log streams of run {run.uid}",
40
+ _branch_code=0,
41
+ run=False,
42
+ )
43
+ artifact.save(upload=True, print_progress=False)
44
+ run.report = artifact
45
+ if save_run: # defaults to false because is slow
46
+ run.save()
47
+
48
+
35
49
  # this is from the get_title function in nbproject
36
50
  # should be moved into lamindb sooner or later
37
51
  def prepare_notebook(
38
52
  nb,
39
53
  strip_title: bool = False,
40
54
  ) -> str | None:
41
- """Strip title from the notebook if requested."""
42
55
  title_found = False
43
56
  for cell in nb.cells:
44
57
  cell.metadata.clear() # strip cell metadata
@@ -85,8 +98,8 @@ def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
85
98
 
86
99
 
87
100
  def notebook_to_script(
88
- transform: Transform, notebook_path: Path, script_path: Path
89
- ) -> None:
101
+ transform: Transform, notebook_path: Path, script_path: Path | None = None
102
+ ) -> None | str:
90
103
  import jupytext
91
104
 
92
105
  notebook = jupytext.read(notebook_path)
@@ -94,8 +107,11 @@ def notebook_to_script(
94
107
  # remove global metadata header
95
108
  py_content = re.sub(r"^# ---\n.*?# ---\n\n", "", py_content, flags=re.DOTALL)
96
109
  # replace title
97
- py_content = py_content.replace(f"# # {transform.name}", "# # transform.name")
98
- script_path.write_text(py_content)
110
+ py_content = py_content.replace(f"# # {transform.description}", "#")
111
+ if script_path is None:
112
+ return py_content
113
+ else:
114
+ script_path.write_text(py_content)
99
115
 
100
116
 
101
117
  # removes NotebookNotSaved error message from notebook html
@@ -114,7 +130,7 @@ def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
114
130
  cleaned_content = re.sub(pattern_title, "", cleaned_content)
115
131
  cleaned_content = re.sub(pattern_h1, "", cleaned_content)
116
132
  cleaned_content = cleaned_content.replace(
117
- f"NotebookNotSaved: {get_r_save_notebook_message()}", ""
133
+ f"NotebookNotSaved: {get_save_notebook_message()}", ""
118
134
  )
119
135
  cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}")
120
136
  cleaned_path.write_text(cleaned_content)
@@ -123,20 +139,19 @@ def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
123
139
 
124
140
  def save_context_core(
125
141
  *,
126
- run: Run,
142
+ run: Run | None,
127
143
  transform: Transform,
128
144
  filepath: Path,
129
145
  finished_at: bool = False,
130
146
  ignore_non_consecutive: bool | None = None,
131
147
  from_cli: bool = False,
132
148
  ) -> str | None:
133
- from lnschema_core.models import (
149
+ import lamindb as ln
150
+ from lamindb.models import (
134
151
  format_field_value, # needs to come after lamindb was imported because of CLI use
135
152
  )
136
153
 
137
- import lamindb as ln
138
-
139
- from .core._context import context, is_run_from_ipython
154
+ from .core._context import context
140
155
 
141
156
  ln.settings.verbosity = "success"
142
157
 
@@ -149,7 +164,7 @@ def save_context_core(
149
164
  # for notebooks, we need more work
150
165
  if is_ipynb:
151
166
  try:
152
- import jupytext
167
+ import jupytext # noqa: F401
153
168
  from nbproject.dev import (
154
169
  check_consecutiveness,
155
170
  read_notebook,
@@ -189,23 +204,19 @@ def save_context_core(
189
204
  logger.warning(
190
205
  f"no {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}"
191
206
  )
207
+ if report_path is not None and not from_cli:
208
+ if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING:
209
+ # this can happen when auto-knitting an html with RStudio
210
+ raise NotebookNotSaved(get_save_notebook_message())
192
211
  ln.settings.creation.artifact_silence_missing_run_warning = True
193
212
  # track source code
194
213
  hash, _ = hash_file(source_code_path) # ignore hash_type for now
195
- if (
196
- transform._source_code_artifact_id is not None
197
- or transform.hash is not None # .hash is equivalent to .transform
198
- ):
214
+ if transform.hash is not None:
199
215
  # check if the hash of the transform source code matches
200
216
  # (for scripts, we already run the same logic in track() - we can deduplicate the call at some point)
201
- ref_hash = (
202
- transform.hash
203
- if transform.hash is not None
204
- else transform._source_code_artifact.hash
205
- )
206
- if hash != ref_hash:
217
+ if hash != transform.hash:
207
218
  response = input(
208
- f"You are about to overwrite existing source code (hash '{ref_hash}') for Transform('{transform.uid}')."
219
+ f"You are about to overwrite existing source code (hash '{transform.hash}') for Transform('{transform.uid}')."
209
220
  f" Proceed? (y/n)"
210
221
  )
211
222
  if response == "y":
@@ -221,75 +232,77 @@ def save_context_core(
221
232
  transform.hash = hash
222
233
 
223
234
  # track environment
224
- env_path = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt"
225
- if env_path.exists():
226
- overwrite_env = True
227
- if run.environment_id is not None and from_cli:
228
- logger.important("run.environment is already saved")
229
- overwrite_env = False
230
- if overwrite_env:
231
- hash, _ = hash_file(env_path)
232
- artifact = ln.Artifact.filter(hash=hash, visibility=0).one_or_none()
233
- new_env_artifact = artifact is None
234
- if new_env_artifact:
235
- artifact = ln.Artifact(
236
- env_path,
237
- description="requirements.txt",
238
- visibility=0,
239
- run=False,
240
- )
241
- artifact.save(upload=True, print_progress=False)
242
- run.environment = artifact
243
- if new_env_artifact:
244
- logger.debug(f"saved run.environment: {run.environment}")
235
+ if run is not None:
236
+ env_path = ln_setup.settings.cache_dir / f"run_env_pip_{run.uid}.txt"
237
+ if env_path.exists():
238
+ overwrite_env = True
239
+ if run.environment_id is not None and from_cli:
240
+ logger.important("run.environment is already saved, ignoring")
241
+ overwrite_env = False
242
+ if overwrite_env:
243
+ hash, _ = hash_file(env_path)
244
+ artifact = ln.Artifact.filter(hash=hash, _branch_code=0).one_or_none()
245
+ new_env_artifact = artifact is None
246
+ if new_env_artifact:
247
+ artifact = ln.Artifact(
248
+ env_path,
249
+ description="requirements.txt",
250
+ _branch_code=0,
251
+ run=False,
252
+ )
253
+ artifact.save(upload=True, print_progress=False)
254
+ run.environment = artifact
255
+ if new_env_artifact:
256
+ logger.debug(f"saved run.environment: {run.environment}")
245
257
 
246
258
  # set finished_at
247
- if finished_at:
259
+ if finished_at and run is not None:
248
260
  run.finished_at = datetime.now(timezone.utc)
249
261
 
262
+ # track logs
263
+ if run is not None and not from_cli and not is_ipynb and not is_r_notebook:
264
+ save_run_logs(run)
265
+
250
266
  # track report and set is_consecutive
251
- if report_path is not None:
252
- if not from_cli:
253
- if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING:
254
- # this can happen when auto-knitting an html with RStudio
255
- raise NotebookNotSaved(get_r_save_notebook_message())
256
- if is_r_notebook:
257
- title_text, report_path = clean_r_notebook_html(report_path)
258
- if title_text is not None:
259
- transform.name = title_text
260
- if run.report_id is not None:
261
- hash, _ = hash_file(report_path) # ignore hash_type for now
262
- if hash != run.report.hash:
263
- response = input(
264
- f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
265
- )
266
- if response == "y":
267
- run.report.replace(report_path)
268
- run.report.save(upload=True, print_progress=False)
267
+ if run is not None:
268
+ if report_path is not None:
269
+ if is_r_notebook:
270
+ title_text, report_path = clean_r_notebook_html(report_path)
271
+ if title_text is not None:
272
+ transform.description = title_text
273
+ if run.report_id is not None:
274
+ hash, _ = hash_file(report_path) # ignore hash_type for now
275
+ if hash != run.report.hash:
276
+ response = input(
277
+ f"You are about to overwrite an existing report (hash '{run.report.hash}') for Run('{run.uid}'). Proceed? (y/n)"
278
+ )
279
+ if response == "y":
280
+ run.report.replace(report_path)
281
+ run.report.save(upload=True, print_progress=False)
282
+ else:
283
+ logger.important("keeping old report")
269
284
  else:
270
- logger.important("keeping old report")
285
+ logger.important("report is already saved")
271
286
  else:
272
- logger.important("report is already saved")
273
- else:
274
- report_file = ln.Artifact(
275
- report_path,
276
- description=f"Report of run {run.uid}",
277
- visibility=0, # hidden file
278
- run=False,
287
+ report_file = ln.Artifact(
288
+ report_path,
289
+ description=f"Report of run {run.uid}",
290
+ _branch_code=0, # hidden file
291
+ run=False,
292
+ )
293
+ report_file.save(upload=True, print_progress=False)
294
+ run.report = report_file
295
+ logger.debug(
296
+ f"saved transform.latest_run.report: {transform.latest_run.report}"
279
297
  )
280
- report_file.save(upload=True, print_progress=False)
281
- run.report = report_file
282
- logger.debug(
283
- f"saved transform.latest_run.report: {transform.latest_run.report}"
284
- )
285
- run.is_consecutive = is_consecutive
298
+ run.is_consecutive = is_consecutive
286
299
 
287
- # save both run & transform records if we arrive here
288
- run.save()
300
+ # save both run & transform records if we arrive here
301
+ run.save()
289
302
  transform.save()
290
303
 
291
304
  # finalize
292
- if not from_cli:
305
+ if not from_cli and run is not None:
293
306
  run_time = run.finished_at - run.started_at
294
307
  days = run_time.days
295
308
  seconds = run_time.seconds