lamindb 0.76.14__py3-none-any.whl → 0.76.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_finish.py CHANGED
@@ -103,10 +103,10 @@ def save_context_core(
103
103
 
104
104
  # for scripts, things are easy
105
105
  is_consecutive = True
106
- is_notebook = transform.type == "notebook"
106
+ is_ipynb = filepath.suffix == ".ipynb"
107
107
  source_code_path = filepath
108
108
  # for notebooks, we need more work
109
- if is_notebook:
109
+ if is_ipynb:
110
110
  try:
111
111
  import jupytext
112
112
  from nbproject.dev import (
@@ -198,7 +198,7 @@ def save_context_core(
198
198
  run.finished_at = datetime.now(timezone.utc)
199
199
 
200
200
  # track report and set is_consecutive
201
- if not is_notebook:
201
+ if not is_ipynb:
202
202
  run.is_consecutive = True
203
203
  run.save()
204
204
  else:
@@ -234,8 +234,15 @@ def save_context_core(
234
234
  # finalize
235
235
  if not from_cli:
236
236
  run_time = run.finished_at - run.started_at
237
+ days = run_time.days
238
+ seconds = run_time.seconds
239
+ hours = seconds // 3600
240
+ minutes = (seconds % 3600) // 60
241
+ secs = seconds % 60
242
+ formatted_run_time = f"{days}d {hours}h {minutes}m {secs}s"
243
+
237
244
  logger.important(
238
- f"finished Run('{run.uid[:8]}') after {run_time} at {format_field_value(run.finished_at)}"
245
+ f"finished Run('{run.uid[:8]}') after {formatted_run_time} at {format_field_value(run.finished_at)}"
239
246
  )
240
247
  if ln_setup.settings.instance.is_on_hub:
241
248
  identifier = ln_setup.settings.instance.slug
@@ -244,9 +251,7 @@ def save_context_core(
244
251
  )
245
252
  if not from_cli:
246
253
  thing, name = (
247
- ("notebook", "notebook.ipynb")
248
- if is_notebook
249
- else ("script", "script.py")
254
+ ("notebook", "notebook.ipynb") if is_ipynb else ("script", "script.py")
250
255
  )
251
256
  logger.important(
252
257
  f"if you want to update your {thing} without re-running it, use `lamin save {name}`"
lamindb/_is_versioned.py CHANGED
@@ -5,8 +5,7 @@ from lamin_utils import logger
5
5
  from lamindb_setup.core.upath import UPath
6
6
  from lnschema_core.models import IsVersioned
7
7
 
8
- from lamindb._utils import attach_func_to_class_method
9
-
8
+ from ._utils import attach_func_to_class_method
10
9
  from .core.versioning import create_uid, get_new_path_from_uid
11
10
 
12
11
 
lamindb/_parents.py CHANGED
@@ -8,13 +8,14 @@ from lamin_utils import logger
8
8
  from lnschema_core import Artifact, Collection, Record, Run, Transform
9
9
  from lnschema_core.models import HasParents, format_field_value
10
10
 
11
- from lamindb._utils import attach_func_to_class_method
12
-
13
11
  from ._record import get_name_field
12
+ from ._utils import attach_func_to_class_method
14
13
 
15
14
  if TYPE_CHECKING:
16
15
  from lnschema_core.types import StrField
17
16
 
17
+ from lamindb.core import QuerySet
18
+
18
19
  LAMIN_GREEN_LIGHTER = "#10b981"
19
20
  LAMIN_GREEN_DARKER = "#065f46"
20
21
  GREEN_FILL = "honeydew"
@@ -22,6 +23,30 @@ TRANSFORM_EMOJIS = {"notebook": "📔", "app": "🖥️", "pipeline": "🧩"}
22
23
  is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
23
24
 
24
25
 
26
+ # this is optimized to have fewer recursive calls
27
+ # also len of QuerySet can be costly at times
28
+ def _query_relatives(
29
+ records: QuerySet | list[Record],
30
+ kind: Literal["parents", "children"],
31
+ cls: type[HasParents],
32
+ ) -> QuerySet:
33
+ relatives = cls.objects.none()
34
+ if len(records) == 0:
35
+ return relatives
36
+ for record in records:
37
+ relatives = relatives.union(getattr(record, kind).all())
38
+ relatives = relatives.union(_query_relatives(relatives, kind, cls))
39
+ return relatives
40
+
41
+
42
+ def query_parents(self) -> QuerySet:
43
+ return _query_relatives([self], "parents", self.__class__)
44
+
45
+
46
+ def query_children(self) -> QuerySet:
47
+ return _query_relatives([self], "children", self.__class__)
48
+
49
+
25
50
  def _transform_emoji(transform: Transform):
26
51
  if transform is not None:
27
52
  return TRANSFORM_EMOJIS.get(transform.type, "💫")
@@ -474,9 +499,7 @@ def _df_edges_from_runs(df_values: list):
474
499
  return df
475
500
 
476
501
 
477
- METHOD_NAMES = [
478
- "view_parents",
479
- ]
502
+ METHOD_NAMES = ["view_parents", "query_parents", "query_children"]
480
503
 
481
504
  if ln_setup._TESTING: # type: ignore
482
505
  from inspect import signature
lamindb/_query_manager.py CHANGED
@@ -7,9 +7,8 @@ from lamin_utils import logger
7
7
  from lamindb_setup.core._docs import doc_args
8
8
  from lnschema_core.models import Record
9
9
 
10
- from lamindb.core._settings import settings
11
-
12
10
  from .core._feature_manager import get_feature_set_by_slot_
11
+ from .core._settings import settings
13
12
 
14
13
  if TYPE_CHECKING:
15
14
  from lnschema_core.types import StrField
lamindb/_query_set.py CHANGED
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections import UserList
4
- from typing import TYPE_CHECKING, NamedTuple
4
+ from collections.abc import Iterable
5
+ from collections.abc import Iterable as IterableType
6
+ from typing import TYPE_CHECKING, Any, NamedTuple
5
7
 
6
8
  import pandas as pd
7
9
  from django.db import models
@@ -20,7 +22,7 @@ from lnschema_core.models import (
20
22
  VisibilityChoice,
21
23
  )
22
24
 
23
- from lamindb.core.exceptions import DoesNotExist
25
+ from .core.exceptions import DoesNotExist
24
26
 
25
27
  if TYPE_CHECKING:
26
28
  from collections.abc import Iterable
@@ -69,8 +71,33 @@ def one_helper(self):
69
71
  return self[0]
70
72
 
71
73
 
72
- def process_expressions(registry: Registry, expressions: dict) -> dict:
73
- if registry in {Artifact, Collection}:
74
+ def process_expressions(queryset: QuerySet, expressions: dict) -> dict:
75
+ def _map_databases(value: Any, key: str, target_db: str) -> tuple[str, Any]:
76
+ if isinstance(value, Record):
77
+ if value._state.db != target_db:
78
+ logger.warning(
79
+ f"passing record from database {value._state.db} to query {target_db}, matching on uid '{value.uid}'"
80
+ )
81
+ return f"{key}__uid", value.uid
82
+ return key, value
83
+
84
+ if (
85
+ key.endswith("__in")
86
+ and isinstance(value, IterableType)
87
+ and not isinstance(value, str)
88
+ ):
89
+ if any(isinstance(v, Record) and v._state.db != target_db for v in value):
90
+ logger.warning(
91
+ f"passing records from another database to query {target_db}, matching on uids"
92
+ )
93
+ return key.replace("__in", "__uid__in"), [
94
+ v.uid if isinstance(v, Record) else v for v in value
95
+ ]
96
+ return key, value
97
+
98
+ return key, value
99
+
100
+ if queryset.model in {Artifact, Collection}:
74
101
  # visibility is set to 0 unless expressions contains id or uid equality
75
102
  if not (
76
103
  "id" in expressions
@@ -87,7 +114,17 @@ def process_expressions(registry: Registry, expressions: dict) -> dict:
87
114
  # sense for a non-NULLABLE column
88
115
  elif visibility in expressions and expressions[visibility] is None:
89
116
  expressions.pop(visibility)
90
- return expressions
117
+ if queryset._db is not None:
118
+ # only check for database mismatch if there is a defined database on the
119
+ # queryset
120
+ return dict(
121
+ (
122
+ _map_databases(value, key, queryset._db)
123
+ for key, value in expressions.items()
124
+ )
125
+ )
126
+ else:
127
+ return expressions
91
128
 
92
129
 
93
130
  def get(
@@ -114,7 +151,7 @@ def get(
114
151
  return qs.one()
115
152
  else:
116
153
  assert idlike is None # noqa: S101
117
- expressions = process_expressions(registry, expressions)
154
+ expressions = process_expressions(qs, expressions)
118
155
  return registry.objects.using(qs.db).get(**expressions)
119
156
 
120
157
 
@@ -282,6 +319,14 @@ class QuerySet(models.QuerySet):
282
319
  """Query a single record. Raises error if there are more or none."""
283
320
  return get(self, idlike, **expressions)
284
321
 
322
+ def filter(self, *queries, **expressions) -> QuerySet:
323
+ """Query a set of records."""
324
+ expressions = process_expressions(self, expressions)
325
+ if len(expressions) > 0:
326
+ return super().filter(*queries, **expressions)
327
+ else:
328
+ return self
329
+
285
330
  def one(self) -> Record:
286
331
  """Exactly one result. Raises error if there are more or none."""
287
332
  return one_helper(self)
lamindb/_record.py CHANGED
@@ -1,12 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import builtins
4
+ from functools import reduce
4
5
  from typing import TYPE_CHECKING, NamedTuple
5
6
 
6
7
  import dj_database_url
7
8
  import lamindb_setup as ln_setup
9
+ from django.core.exceptions import FieldDoesNotExist
8
10
  from django.db import connections, transaction
9
- from django.db.models import IntegerField, Manager, Q, QuerySet, Value
11
+ from django.db.models import F, IntegerField, Manager, Q, QuerySet, TextField, Value
12
+ from django.db.models.functions import Cast, Coalesce
13
+ from django.db.models.lookups import Contains, Exact, IContains, IExact, IRegex, Regex
10
14
  from lamin_utils import colors, logger
11
15
  from lamin_utils._lookup import Lookup
12
16
  from lamindb_setup._connect_instance import (
@@ -17,11 +21,22 @@ from lamindb_setup._connect_instance import (
17
21
  from lamindb_setup.core._docs import doc_args
18
22
  from lamindb_setup.core._hub_core import connect_instance_hub
19
23
  from lamindb_setup.core._settings_store import instance_settings_file
20
- from lnschema_core.models import Artifact, Feature, IsVersioned, Record, Run, Transform
24
+ from lnschema_core.models import (
25
+ Artifact,
26
+ Collection,
27
+ Feature,
28
+ FeatureSet,
29
+ IsVersioned,
30
+ Param,
31
+ Record,
32
+ Run,
33
+ Transform,
34
+ ULabel,
35
+ )
21
36
 
22
- from lamindb._utils import attach_func_to_class_method
23
- from lamindb.core._settings import settings
24
- from lamindb.core.exceptions import RecordNameChangeIntegrityError
37
+ from ._utils import attach_func_to_class_method
38
+ from .core._settings import settings
39
+ from .core.exceptions import RecordNameChangeIntegrityError, ValidationError
25
40
 
26
41
  if TYPE_CHECKING:
27
42
  import pandas as pd
@@ -48,6 +63,7 @@ def update_attributes(record: Record, attributes: dict[str, str]):
48
63
 
49
64
 
50
65
  def validate_required_fields(record: Record, kwargs):
66
+ # a "required field" is a Django field that has `null=True, default=None`
51
67
  required_fields = {
52
68
  k.name for k in record._meta.fields if not k.null and k.default is None
53
69
  }
@@ -58,25 +74,47 @@ def validate_required_fields(record: Record, kwargs):
58
74
  ]
59
75
  if missing_fields:
60
76
  raise TypeError(f"{missing_fields} are required.")
77
+ # ensure the exact length of the internal uid for core entities
78
+ if "uid" in kwargs and record.__class__ in {
79
+ Artifact,
80
+ Collection,
81
+ Transform,
82
+ Run,
83
+ ULabel,
84
+ Feature,
85
+ FeatureSet,
86
+ Param,
87
+ }:
88
+ uid_max_length = record.__class__._meta.get_field(
89
+ "uid"
90
+ ).max_length # triggers FieldDoesNotExist
91
+ if len(kwargs["uid"]) != uid_max_length: # triggers KeyError
92
+ raise ValidationError(
93
+ f'`uid` must be exactly {uid_max_length} characters long, got {len(kwargs["uid"])}.'
94
+ )
61
95
 
62
96
 
63
- def suggest_records_with_similar_names(record: Record, kwargs) -> bool:
97
+ def suggest_records_with_similar_names(record: Record, name_field: str, kwargs) -> bool:
64
98
  """Returns True if found exact match, otherwise False.
65
99
 
66
100
  Logs similar matches if found.
67
101
  """
68
- if kwargs.get("name") is None:
102
+ if kwargs.get(name_field) is None or not isinstance(kwargs.get(name_field), str):
69
103
  return False
70
104
  queryset = _search(
71
- record.__class__, kwargs["name"], field="name", truncate_words=True, limit=3
105
+ record.__class__,
106
+ kwargs[name_field],
107
+ field=name_field,
108
+ truncate_string=True,
109
+ limit=3,
72
110
  )
73
111
  if not queryset.exists(): # empty queryset
74
112
  return False
75
113
  for alternative_record in queryset:
76
- if alternative_record.name == kwargs["name"]:
114
+ if getattr(alternative_record, name_field) == kwargs[name_field]:
77
115
  return True
78
116
  s, it, nots = ("", "it", "s") if len(queryset) == 1 else ("s", "one of them", "")
79
- msg = f"record{s} with similar name{s} exist{nots}! did you mean to load {it}?"
117
+ msg = f"record{s} with similar {name_field}{s} exist{nots}! did you mean to load {it}?"
80
118
  if IPYTHON:
81
119
  from IPython.display import display
82
120
 
@@ -98,13 +136,19 @@ def __init__(record: Record, *args, **kwargs):
98
136
  if "_has_consciously_provided_uid" in kwargs:
99
137
  has_consciously_provided_uid = kwargs.pop("_has_consciously_provided_uid")
100
138
  if settings.creation.search_names and not has_consciously_provided_uid:
101
- match = suggest_records_with_similar_names(record, kwargs)
139
+ name_field = (
140
+ "name" if not hasattr(record, "_name_field") else record._name_field
141
+ )
142
+ match = suggest_records_with_similar_names(record, name_field, kwargs)
102
143
  if match:
103
144
  if "version" in kwargs:
104
145
  if kwargs["version"] is not None:
105
146
  version_comment = " and version"
106
147
  existing_record = record.__class__.filter(
107
- name=kwargs["name"], version=kwargs["version"]
148
+ **{
149
+ name_field: kwargs[name_field],
150
+ "version": kwargs["version"],
151
+ }
108
152
  ).one_or_none()
109
153
  else:
110
154
  # for a versioned record, an exact name match is not a
@@ -115,12 +159,12 @@ def __init__(record: Record, *args, **kwargs):
115
159
  else:
116
160
  version_comment = ""
117
161
  existing_record = record.__class__.filter(
118
- name=kwargs["name"]
162
+ **{name_field: kwargs[name_field]}
119
163
  ).one_or_none()
120
164
  if existing_record is not None:
121
165
  logger.important(
122
166
  f"returning existing {record.__class__.__name__} record with same"
123
- f" name{version_comment}: '{kwargs['name']}'"
167
+ f" {name_field}{version_comment}: '{kwargs[name_field]}'"
124
168
  )
125
169
  init_self_from_db(record, existing_record)
126
170
  return None
@@ -137,9 +181,13 @@ def __init__(record: Record, *args, **kwargs):
137
181
  @doc_args(Record.filter.__doc__)
138
182
  def filter(cls, *queries, **expressions) -> QuerySet:
139
183
  """{}""" # noqa: D415
140
- from lamindb._filter import filter
184
+ from lamindb._query_set import QuerySet
141
185
 
142
- return filter(cls, *queries, **expressions)
186
+ _using_key = None
187
+ if "_using_key" in expressions:
188
+ _using_key = expressions.pop("_using_key")
189
+
190
+ return QuerySet(model=cls, using=_using_key).filter(*queries, **expressions)
143
191
 
144
192
 
145
193
  @classmethod # type:ignore
@@ -150,8 +198,6 @@ def get(
150
198
  **expressions,
151
199
  ) -> Record:
152
200
  """{}""" # noqa: D415
153
- # this is the only place in which we need the lamindb queryset
154
- # in this file; everywhere else it should be Django's
155
201
  from lamindb._query_set import QuerySet
156
202
 
157
203
  return QuerySet(model=cls).get(idlike, **expressions)
@@ -166,9 +212,7 @@ def df(
166
212
  limit: int = 100,
167
213
  ) -> pd.DataFrame:
168
214
  """{}""" # noqa: D415
169
- from lamindb._filter import filter
170
-
171
- query_set = filter(cls)
215
+ query_set = cls.filter()
172
216
  if hasattr(cls, "updated_at"):
173
217
  query_set = query_set.order_by("-updated_at")
174
218
  return query_set[:limit].df(include=include, join=join)
@@ -182,7 +226,7 @@ def _search(
182
226
  limit: int | None = 20,
183
227
  case_sensitive: bool = False,
184
228
  using_key: str | None = None,
185
- truncate_words: bool = False,
229
+ truncate_string: bool = False,
186
230
  ) -> QuerySet:
187
231
  input_queryset = _queryset(cls, using_key=using_key)
188
232
  registry = input_queryset.model
@@ -209,48 +253,67 @@ def _search(
209
253
  else:
210
254
  fields.append(field)
211
255
 
212
- # decompose search string
213
- def truncate_word(word) -> str:
214
- if len(word) > 5:
215
- n_80_pct = int(len(word) * 0.8)
216
- return word[:n_80_pct]
217
- elif len(word) > 3:
218
- return word[:3]
219
- else:
220
- return word
221
-
222
- decomposed_string = str(string).split()
223
- # add the entire string back
224
- decomposed_string += [string]
225
- for word in decomposed_string:
226
- # will not search against words with 3 or fewer characters
227
- if len(word) <= 3:
228
- decomposed_string.remove(word)
229
- if truncate_words:
230
- decomposed_string = [truncate_word(word) for word in decomposed_string]
231
- # construct the query
232
- expression = Q()
233
- case_sensitive_i = "" if case_sensitive else "i"
234
- for field in fields:
235
- for word in decomposed_string:
236
- query = {f"{field}__{case_sensitive_i}contains": word}
237
- expression |= Q(**query)
238
- output_queryset = input_queryset.filter(expression)
239
- # ensure exact matches are at the top
240
- narrow_expression = Q()
256
+ if truncate_string:
257
+ if (len_string := len(string)) > 5:
258
+ n_80_pct = int(len_string * 0.8)
259
+ string = string[:n_80_pct]
260
+
261
+ string = string.strip()
262
+
263
+ exact_lookup = Exact if case_sensitive else IExact
264
+ regex_lookup = Regex if case_sensitive else IRegex
265
+ contains_lookup = Contains if case_sensitive else IContains
266
+
267
+ ranks = []
268
+ contains_filters = []
241
269
  for field in fields:
242
- query = {f"{field}__{case_sensitive_i}contains": string}
243
- narrow_expression |= Q(**query)
244
- refined_output_queryset = output_queryset.filter(narrow_expression).annotate(
245
- ordering=Value(1, output_field=IntegerField())
246
- )
247
- remaining_output_queryset = output_queryset.exclude(narrow_expression).annotate(
248
- ordering=Value(2, output_field=IntegerField())
270
+ field_expr = Coalesce(
271
+ Cast(field, output_field=TextField()),
272
+ Value(""),
273
+ output_field=TextField(),
274
+ )
275
+ # exact rank
276
+ exact_expr = exact_lookup(field_expr, string)
277
+ exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
278
+ ranks.append(exact_rank)
279
+ # exact synonym
280
+ synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string}(?:\|.*|$)")
281
+ synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
282
+ ranks.append(synonym_rank)
283
+ # match as sub-phrase
284
+ sub_expr = regex_lookup(
285
+ field_expr, rf"(?:^|.*[ \|\.,;:]){string}(?:[ \|\.,;:].*|$)"
286
+ )
287
+ sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
288
+ ranks.append(sub_rank)
289
+ # startswith and avoid matching string with " " on the right
290
+ # mostly for truncated
291
+ startswith_expr = regex_lookup(field_expr, rf"(?:^|\|){string}[^ ]*(\||$)")
292
+ startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
293
+ ranks.append(startswith_rank)
294
+ # match as sub-phrase from the left, mostly for truncated
295
+ right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string}.*")
296
+ right_rank = Cast(right_expr, output_field=IntegerField()) * 2
297
+ ranks.append(right_rank)
298
+ # match as sub-phrase from the right
299
+ left_expr = regex_lookup(field_expr, rf".*{string}(?:$|[ \|\.,;:].*)")
300
+ left_rank = Cast(left_expr, output_field=IntegerField()) * 2
301
+ ranks.append(left_rank)
302
+ # simple contains filter
303
+ contains_expr = contains_lookup(field_expr, string)
304
+ contains_filter = Q(contains_expr)
305
+ contains_filters.append(contains_filter)
306
+ # also rank by contains
307
+ contains_rank = Cast(contains_expr, output_field=IntegerField())
308
+ ranks.append(contains_rank)
309
+
310
+ ranked_queryset = (
311
+ input_queryset.filter(reduce(lambda a, b: a | b, contains_filters))
312
+ .alias(rank=sum(ranks))
313
+ .order_by("-rank")
249
314
  )
250
- combined_queryset = refined_output_queryset.union(
251
- remaining_output_queryset
252
- ).order_by("ordering")[:limit]
253
- return combined_queryset
315
+
316
+ return ranked_queryset[:limit]
254
317
 
255
318
 
256
319
  @classmethod # type: ignore
lamindb/_save.py CHANGED
@@ -15,8 +15,8 @@ from lamin_utils import logger
15
15
  from lamindb_setup.core.upath import LocalPathClasses
16
16
  from lnschema_core.models import Artifact, Record
17
17
 
18
- from lamindb.core._settings import settings
19
- from lamindb.core.storage.paths import (
18
+ from .core._settings import settings
19
+ from .core.storage.paths import (
20
20
  _cache_key_from_artifact_storage,
21
21
  attempt_accessing_path,
22
22
  auto_storage_key_from_artifact,
lamindb/_transform.py CHANGED
@@ -6,10 +6,9 @@ from lamin_utils import logger
6
6
  from lamindb_setup.core._docs import doc_args
7
7
  from lnschema_core.models import Run, Transform
8
8
 
9
- from lamindb.core.exceptions import InconsistentKey
10
-
11
9
  from ._parents import _view_parents
12
10
  from ._run import delete_run_artifacts
11
+ from .core.exceptions import InconsistentKey
13
12
  from .core.versioning import message_update_key_in_version_family, process_revises
14
13
 
15
14
  if TYPE_CHECKING:
lamindb/_ulabel.py CHANGED
@@ -6,7 +6,7 @@ import lamindb_setup as ln_setup
6
6
  from lamindb_setup.core._docs import doc_args
7
7
  from lnschema_core import ULabel
8
8
 
9
- from lamindb._utils import attach_func_to_class_method
9
+ from ._utils import attach_func_to_class_method
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  from lnschema_core.types import ListLike