lamindb 1.0.5__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. lamindb/__init__.py +17 -6
  2. lamindb/_artifact.py +202 -87
  3. lamindb/_can_curate.py +27 -8
  4. lamindb/_collection.py +86 -52
  5. lamindb/_feature.py +177 -41
  6. lamindb/_finish.py +21 -7
  7. lamindb/_from_values.py +83 -98
  8. lamindb/_parents.py +4 -4
  9. lamindb/_query_set.py +78 -18
  10. lamindb/_record.py +170 -53
  11. lamindb/_run.py +4 -4
  12. lamindb/_save.py +42 -11
  13. lamindb/_schema.py +135 -38
  14. lamindb/_storage.py +1 -1
  15. lamindb/_tracked.py +129 -0
  16. lamindb/_transform.py +21 -8
  17. lamindb/_ulabel.py +5 -14
  18. lamindb/base/users.py +1 -4
  19. lamindb/base/validation.py +2 -6
  20. lamindb/core/__init__.py +13 -14
  21. lamindb/core/_context.py +14 -9
  22. lamindb/core/_data.py +29 -25
  23. lamindb/core/_describe.py +1 -1
  24. lamindb/core/_django.py +1 -1
  25. lamindb/core/_feature_manager.py +53 -43
  26. lamindb/core/_label_manager.py +4 -4
  27. lamindb/core/_mapped_collection.py +24 -9
  28. lamindb/core/_track_environment.py +2 -1
  29. lamindb/core/datasets/__init__.py +6 -1
  30. lamindb/core/datasets/_core.py +12 -11
  31. lamindb/core/datasets/_small.py +67 -21
  32. lamindb/core/exceptions.py +1 -90
  33. lamindb/core/loaders.py +21 -15
  34. lamindb/core/relations.py +6 -4
  35. lamindb/core/storage/_anndata_accessor.py +49 -3
  36. lamindb/core/storage/_backed_access.py +12 -7
  37. lamindb/core/storage/_pyarrow_dataset.py +40 -15
  38. lamindb/core/storage/_tiledbsoma.py +56 -12
  39. lamindb/core/storage/paths.py +30 -24
  40. lamindb/core/subsettings/_creation_settings.py +4 -16
  41. lamindb/curators/__init__.py +2193 -846
  42. lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
  43. lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
  44. lamindb/errors.py +96 -0
  45. lamindb/integrations/_vitessce.py +3 -3
  46. lamindb/migrations/0069_squashed.py +76 -75
  47. lamindb/migrations/0075_lamindbv1_part5.py +4 -5
  48. lamindb/migrations/0082_alter_feature_dtype.py +21 -0
  49. lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
  50. lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
  51. lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
  52. lamindb/migrations/0086_various.py +95 -0
  53. lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
  54. lamindb/migrations/0088_schema_components.py +273 -0
  55. lamindb/migrations/0088_squashed.py +4372 -0
  56. lamindb/models.py +475 -168
  57. {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/METADATA +9 -7
  58. lamindb-1.1.1.dist-info/RECORD +95 -0
  59. lamindb/curators/_spatial.py +0 -528
  60. lamindb/migrations/0052_squashed.py +0 -1261
  61. lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
  62. lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
  63. lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
  64. lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
  65. lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
  66. lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
  67. lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
  68. lamindb/migrations/0060_alter_artifact__actions.py +0 -22
  69. lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
  70. lamindb/migrations/0062_add_is_latest_field.py +0 -32
  71. lamindb/migrations/0063_populate_latest_field.py +0 -45
  72. lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
  73. lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
  74. lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
  75. lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
  76. lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
  77. lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
  78. lamindb-1.0.5.dist-info/RECORD +0 -102
  79. {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/LICENSE +0 -0
  80. {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/WHEEL +0 -0
lamindb/_finish.py CHANGED
@@ -96,7 +96,7 @@ def save_run_logs(run: Run, save_run: bool = False) -> None:
96
96
  if logs_path.exists():
97
97
  if run.report is not None:
98
98
  logger.important("overwriting run.report")
99
- artifact = Artifact(
99
+ artifact = Artifact( # type: ignore
100
100
  logs_path,
101
101
  description=f"log streams of run {run.uid}",
102
102
  _branch_code=0,
@@ -159,7 +159,7 @@ def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
159
159
  output_path.write_text(html, encoding="utf-8")
160
160
 
161
161
 
162
- def notebook_to_script(
162
+ def notebook_to_script( # type: ignore
163
163
  transform: Transform, notebook_path: Path, script_path: Path | None = None
164
164
  ) -> None | str:
165
165
  import jupytext
@@ -207,8 +207,13 @@ def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
207
207
 
208
208
 
209
209
  def check_filepath_recently_saved(filepath: Path, is_finish_retry: bool) -> bool:
210
- recently_saved_time = 3 if not is_finish_retry else 20
210
+ # the recently_saved_time needs to be very low for the first check
211
+ # because an accidental save (e.g. via auto-save) might otherwise lead
212
+ # to upload of an outdated notebook
213
+ # also see implementation for R notebooks below
214
+ offset_saved_time = 0.3 if not is_finish_retry else 20
211
215
  for retry in range(30):
216
+ recently_saved_time = offset_saved_time + retry # sleep time is 1 sec
212
217
  if get_seconds_since_modified(filepath) > recently_saved_time:
213
218
  if retry == 0:
214
219
  prefix = f"{LEVEL_TO_COLORS[20]}{LEVEL_TO_ICONS[20]}{RESET_COLOR}"
@@ -316,7 +321,8 @@ def save_context_core(
316
321
  f"no html report found; to attach one, create an .html export for your {filepath.suffix} file and then run: lamin save {filepath}"
317
322
  )
318
323
  if report_path is not None and is_r_notebook and not from_cli: # R notebooks
319
- recently_saved_time = 3 if not is_retry else 20
324
+ # see comment above in check_filepath_recently_saved
325
+ recently_saved_time = 0.3 if not is_retry else 20
320
326
  if get_seconds_since_modified(report_path) > recently_saved_time:
321
327
  # the automated retry solution of Jupyter notebooks does not work in RStudio because the execution of the notebook cell
322
328
  # seems to block the event loop of the frontend
@@ -365,7 +371,7 @@ def save_context_core(
365
371
  artifact = ln.Artifact.filter(hash=hash, _branch_code=0).one_or_none()
366
372
  new_env_artifact = artifact is None
367
373
  if new_env_artifact:
368
- artifact = ln.Artifact(
374
+ artifact = ln.Artifact( # type: ignore
369
375
  env_path,
370
376
  description="requirements.txt",
371
377
  _branch_code=0,
@@ -411,7 +417,7 @@ def save_context_core(
411
417
  else:
412
418
  logger.important("report is already saved")
413
419
  else:
414
- report_file = ln.Artifact(
420
+ report_file = ln.Artifact( # type: ignore
415
421
  report_path,
416
422
  description=f"Report of run {run.uid}",
417
423
  _branch_code=0, # hidden file
@@ -430,7 +436,15 @@ def save_context_core(
430
436
  # save both run & transform records if we arrive here
431
437
  if run is not None:
432
438
  run.save()
433
- transform.save()
439
+ transform_id_prior_to_save = transform.id
440
+ transform.save() # this in-place updates the state of transform upon hash collision
441
+ if transform.id != transform_id_prior_to_save:
442
+ # the hash existed and we're actually back to the previous version
443
+ # hence, this was in fact a run of the previous transform rather than of
444
+ # the new transform
445
+ # this can happen in interactive notebooks if the user makes no change to the notebook
446
+ run.transform = transform
447
+ run.save()
434
448
 
435
449
  # finalize
436
450
  if not from_cli and run is not None:
lamindb/_from_values.py CHANGED
@@ -9,8 +9,6 @@ from lamin_utils import colors, logger
9
9
  from lamindb._query_set import RecordList
10
10
  from lamindb.models import Record
11
11
 
12
- from .core._settings import settings
13
-
14
12
  if TYPE_CHECKING:
15
13
  from collections.abc import Iterable
16
14
 
@@ -29,88 +27,72 @@ def get_or_create_records(
29
27
  mute: bool = False,
30
28
  ) -> RecordList:
31
29
  """Get or create records from iterables."""
32
- registry = field.field.model
30
+ registry = field.field.model # type: ignore
33
31
  if create:
34
- return RecordList([registry(**{field.field.name: value}) for value in iterable])
35
- creation_search_names = settings.creation.search_names
32
+ return RecordList([registry(**{field.field.name: value}) for value in iterable]) # type: ignore
36
33
  organism = _get_organism_record(field, organism)
37
- settings.creation.search_names = False
38
- try:
39
- iterable_idx = index_iterable(iterable)
40
-
41
- # returns existing records & non-existing values
42
- records, nonexist_values, msg = get_existing_records(
43
- iterable_idx=iterable_idx,
44
- field=field,
45
- organism=organism,
46
- mute=mute,
47
- )
34
+ iterable_idx = index_iterable(iterable)
35
+
36
+ # returns existing records & non-existing values
37
+ records, nonexist_values, msg = get_existing_records(
38
+ iterable_idx=iterable_idx,
39
+ field=field,
40
+ organism=organism,
41
+ mute=mute,
42
+ )
48
43
 
49
- # new records to be created based on new values
50
- if len(nonexist_values) > 0:
51
- source_record = None
52
- if from_source:
53
- if isinstance(source, Record):
54
- source_record = source
55
- if not source_record and hasattr(registry, "public"):
56
- if organism is None:
57
- organism = _ensembl_prefix(nonexist_values[0], field, organism)
58
- organism = _get_organism_record(field, organism, force=True)
59
-
60
- if source_record:
61
- from bionty.core._add_ontology import check_source_in_db
62
-
63
- check_source_in_db(registry=registry, source=source_record)
64
-
65
- from_source = not source_record.in_db
66
- elif hasattr(registry, "source_id"):
67
- from_source = True
68
- else:
69
- from_source = False
70
-
71
- if from_source:
72
- records_bionty, unmapped_values = create_records_from_source(
73
- iterable_idx=nonexist_values,
74
- field=field,
75
- organism=organism,
76
- source=source_record,
77
- msg=msg,
78
- mute=mute,
44
+ # new records to be created based on new values
45
+ if len(nonexist_values) > 0:
46
+ source_record = None
47
+ if from_source:
48
+ if isinstance(source, Record):
49
+ source_record = source
50
+ if not source_record and hasattr(registry, "public"):
51
+ if organism is None:
52
+ organism = _ensembl_prefix(nonexist_values[0], field, organism)
53
+ organism = _get_organism_record(field, organism, force=True)
54
+
55
+ if source_record:
56
+ from bionty.core._add_ontology import check_source_in_db
57
+
58
+ check_source_in_db(registry=registry, source=source_record)
59
+
60
+ from_source = not source_record.in_db
61
+ elif hasattr(registry, "source_id"):
62
+ from_source = True
63
+ else:
64
+ from_source = False
65
+
66
+ if from_source:
67
+ records_bionty, unmapped_values = create_records_from_source(
68
+ iterable_idx=nonexist_values,
69
+ field=field,
70
+ organism=organism,
71
+ source=source_record,
72
+ msg=msg,
73
+ mute=mute,
74
+ )
75
+ if len(records_bionty) > 0:
76
+ msg = ""
77
+ for record in records_bionty:
78
+ record._from_source = True
79
+ records += records_bionty
80
+ else:
81
+ unmapped_values = nonexist_values
82
+ # unmapped new_ids will NOT create records
83
+ if len(unmapped_values) > 0:
84
+ if len(msg) > 0 and not mute:
85
+ logger.success(msg)
86
+ s = "" if len(unmapped_values) == 1 else "s"
87
+ print_values = colors.yellow(_format_values(unmapped_values))
88
+ name = registry.__name__
89
+ n_nonval = colors.yellow(f"{len(unmapped_values)} non-validated")
90
+ if not mute:
91
+ logger.warning(
92
+ f"{colors.red('did not create')} {name} record{s} for "
93
+ f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}" # type: ignore
79
94
  )
80
- if len(records_bionty) > 0:
81
- msg = ""
82
- for record in records_bionty:
83
- record._from_source = True
84
- records += records_bionty
85
- else:
86
- unmapped_values = nonexist_values
87
- # unmapped new_ids will NOT create records
88
- if len(unmapped_values) > 0:
89
- if len(msg) > 0 and not mute:
90
- logger.success(msg)
91
- s = "" if len(unmapped_values) == 1 else "s"
92
- print_values = colors.yellow(_format_values(unmapped_values))
93
- name = registry.__name__
94
- n_nonval = colors.yellow(f"{len(unmapped_values)} non-validated")
95
- if not mute:
96
- logger.warning(
97
- f"{colors.red('did not create')} {name} record{s} for "
98
- f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}"
99
- )
100
- # if registry.__get_module_name__() == "bionty" or registry == ULabel:
101
- # if isinstance(iterable, pd.Series):
102
- # feature = iterable.name
103
- # feature_name = None
104
- # if isinstance(feature, str):
105
- # feature_name = feature
106
- # if feature_name is not None:
107
- # if feature_name is not None:
108
- # for record in records:
109
- # record._feature = feature_name
110
- # logger.debug(f"added default feature '{feature_name}'")
111
- return RecordList(records)
112
- finally:
113
- settings.creation.search_names = creation_search_names
95
+ return RecordList(records)
114
96
 
115
97
 
116
98
  def get_existing_records(
@@ -120,10 +102,10 @@ def get_existing_records(
120
102
  mute: bool = False,
121
103
  ):
122
104
  # NOTE: existing records matching is agnostic to the source
123
- model = field.field.model
124
- if organism is None and field.field.name == "ensembl_gene_id":
105
+ model = field.field.model # type: ignore
106
+ if organism is None and field.field.name == "ensembl_gene_id": # type: ignore
125
107
  if len(iterable_idx) > 0:
126
- organism = _ensembl_prefix(iterable_idx[0], field, organism)
108
+ organism = _ensembl_prefix(iterable_idx[0], field, organism) # type: ignore
127
109
  organism = _get_organism_record(field, organism, force=True)
128
110
 
129
111
  # standardize based on the DB reference
@@ -152,6 +134,7 @@ def get_existing_records(
152
134
  is_validated = model.validate(
153
135
  iterable_idx, field=field, organism=organism, mute=True
154
136
  )
137
+
155
138
  if len(is_validated) > 0:
156
139
  validated = iterable_idx[is_validated]
157
140
  else:
@@ -165,7 +148,7 @@ def get_existing_records(
165
148
  msg = (
166
149
  "loaded"
167
150
  f" {colors.green(f'{len(validated)} {model.__name__} record{s}')}"
168
- f" matching {colors.italic(f'{field.field.name}')}: {print_values}"
151
+ f" matching {colors.italic(f'{field.field.name}')}: {print_values}" # type: ignore
169
152
  )
170
153
  if len(syn_mapper) > 0:
171
154
  s = "" if len(syn_mapper) == 1 else "s"
@@ -189,7 +172,7 @@ def get_existing_records(
189
172
  # get all existing records in the db
190
173
  # if necessary, create records for the values in kwargs
191
174
  # k:v -> k:v_record
192
- query = {f"{field.field.name}__in": iterable_idx.values}
175
+ query = {f"{field.field.name}__in": iterable_idx.values} # type: ignore
193
176
  if organism is not None:
194
177
  query["organism"] = organism
195
178
  records = model.filter(**query).list()
@@ -209,7 +192,7 @@ def create_records_from_source(
209
192
  msg: str = "",
210
193
  mute: bool = False,
211
194
  ):
212
- model = field.field.model
195
+ model = field.field.model # type: ignore
213
196
  records: list = []
214
197
  # populate additional fields from bionty
215
198
  from bionty._bionty import get_source_record
@@ -232,11 +215,11 @@ def create_records_from_source(
232
215
  # standardize in the bionty reference
233
216
  # do not inspect synonyms if the field is not name field
234
217
  inspect_synonyms = True
235
- if hasattr(model, "_name_field") and field.field.name != model._name_field:
218
+ if hasattr(model, "_name_field") and field.field.name != model._name_field: # type: ignore
236
219
  inspect_synonyms = False
237
220
  result = public_ontology.inspect(
238
221
  iterable_idx,
239
- field=field.field.name,
222
+ field=field.field.name, # type: ignore
240
223
  mute=True,
241
224
  inspect_synonyms=inspect_synonyms,
242
225
  )
@@ -257,12 +240,14 @@ def create_records_from_source(
257
240
 
258
241
  # create records for values that are found in the bionty reference
259
242
  # matching either field or synonyms
260
- mapped_values = iterable_idx.intersection(bionty_df[field.field.name])
243
+ mapped_values = iterable_idx.intersection(bionty_df[field.field.name]) # type: ignore
261
244
 
262
245
  multi_msg = ""
263
246
  if len(mapped_values) > 0:
264
247
  bionty_kwargs, multi_msg = _bulk_create_dicts_from_df(
265
- keys=mapped_values, column_name=field.field.name, df=bionty_df
248
+ keys=mapped_values,
249
+ column_name=field.field.name, # type: ignore
250
+ df=bionty_df,
266
251
  )
267
252
 
268
253
  if hasattr(model, "organism_id") and organism is None:
@@ -274,7 +259,7 @@ def create_records_from_source(
274
259
  else {"source": source}
275
260
  )
276
261
  for bk in bionty_kwargs:
277
- records.append(model(**bk, **create_kwargs))
262
+ records.append(model(**bk, **create_kwargs, _skip_validation=True))
278
263
 
279
264
  # number of records that matches field (not synonyms)
280
265
  validated = result.validated
@@ -288,7 +273,7 @@ def create_records_from_source(
288
273
  logger.success(
289
274
  "created"
290
275
  f" {colors.purple(f'{len(validated)} {model.__name__} record{s} from Bionty')}"
291
- f" matching {colors.italic(f'{field.field.name}')}: {print_values}"
276
+ f" matching {colors.italic(f'{field.field.name}')}: {print_values}" # type: ignore
292
277
  )
293
278
 
294
279
  # make sure that synonyms logging appears after the field logging
@@ -365,7 +350,7 @@ def _has_organism_field(registry: type[Record]) -> bool:
365
350
  return False
366
351
 
367
352
 
368
- def _get_organism_record(
353
+ def _get_organism_record( # type: ignore
369
354
  field: StrField, organism: str | Record, force: bool = False
370
355
  ) -> Record:
371
356
  """Get organism record.
@@ -375,10 +360,10 @@ def _get_organism_record(
375
360
  organism: the organism to get the record for
376
361
  force: whether to force fetching the organism record
377
362
  """
378
- registry = field.field.model
363
+ registry = field.field.model # type: ignore
379
364
  check = True
380
365
  if not force and hasattr(registry, "_ontology_id_field"):
381
- check = field.field.name != registry._ontology_id_field
366
+ check = field.field.name != registry._ontology_id_field # type: ignore
382
367
  # e.g. bionty.CellMarker has "name" as _ontology_id_field
383
368
  if not registry._ontology_id_field.endswith("id"):
384
369
  check = True
@@ -397,10 +382,10 @@ def _get_organism_record(
397
382
 
398
383
 
399
384
  def _ensembl_prefix(id: str, field: StrField, organism: Record | None) -> str | None:
400
- if field.field.name == "ensembl_gene_id" and organism is None:
385
+ if field.field.name == "ensembl_gene_id" and organism is None: # type: ignore
401
386
  if id.startswith("ENSG"):
402
- organism = "human"
387
+ organism = "human" # type: ignore
403
388
  elif id.startswith("ENSMUSG"):
404
- organism = "mouse"
389
+ organism = "mouse" # type: ignore
405
390
 
406
391
  return organism
lamindb/_parents.py CHANGED
@@ -44,7 +44,7 @@ def _query_relatives(
44
44
  kind: Literal["parents", "children"],
45
45
  cls: type[HasParents],
46
46
  ) -> QuerySet:
47
- relatives = cls.objects.none()
47
+ relatives = cls.objects.none() # type: ignore
48
48
  if len(records) == 0:
49
49
  return relatives
50
50
  for record in records:
@@ -350,9 +350,9 @@ def _record_label(record: Record, field: str | None = None):
350
350
  )
351
351
  elif isinstance(record, Run):
352
352
  if record.transform.description:
353
- name = f'{record.transform.description.replace("&", "&")}'
353
+ name = f"{record.transform.description.replace('&', '&')}"
354
354
  elif record.transform.key:
355
- name = f'{record.transform.key.replace("&", "&")}'
355
+ name = f"{record.transform.key.replace('&', '&')}"
356
356
  else:
357
357
  name = f"{record.transform.uid}"
358
358
  user_display = (
@@ -366,7 +366,7 @@ def _record_label(record: Record, field: str | None = None):
366
366
  rf" user={user_display}<BR/>run={format_field_value(record.started_at)}</FONT>>"
367
367
  )
368
368
  elif isinstance(record, Transform):
369
- name = f'{record.name.replace("&", "&amp;")}'
369
+ name = f"{record.name.replace('&', '&amp;')}"
370
370
  return (
371
371
  rf'<{TRANSFORM_EMOJIS.get(str(record.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
372
372
  rf' FACE="Monospace">uid={record.uid}<BR/>type={record.type},'
lamindb/_query_set.py CHANGED
@@ -8,6 +8,7 @@ from collections.abc import Iterable as IterableType
8
8
  from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
9
9
 
10
10
  import pandas as pd
11
+ from django.core.exceptions import FieldError
11
12
  from django.db import models
12
13
  from django.db.models import F, ForeignKey, ManyToManyField
13
14
  from django.db.models.fields.related import ForeignObjectRel
@@ -26,7 +27,7 @@ from lamindb.models import (
26
27
  Transform,
27
28
  )
28
29
 
29
- from .core.exceptions import DoesNotExist
30
+ from .errors import DoesNotExist
30
31
 
31
32
  T = TypeVar("T")
32
33
 
@@ -91,14 +92,12 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
91
92
  "n_objects": "n_files",
92
93
  "visibility": "_branch_code", # for convenience (and backward compat <1.0)
93
94
  "transform": "run__transform", # for convenience (and backward compat <1.0)
94
- "feature_sets": "_schemas_m2m",
95
95
  "type": "kind",
96
96
  "_accessor": "otype",
97
97
  }
98
98
  elif queryset.model == Schema:
99
99
  name_mappings = {
100
100
  "registry": "itype",
101
- "artifacts": "_artifacts_m2m", # will raise warning when we start to migrate over
102
101
  }
103
102
  else:
104
103
  return expressions
@@ -114,7 +113,6 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
114
113
  if parts[0] not in {
115
114
  "transform",
116
115
  "visibility",
117
- "feature_sets",
118
116
  "schemas",
119
117
  "artifacts",
120
118
  }:
@@ -203,7 +201,7 @@ def get(
203
201
  qs = QuerySet(model=registry_or_queryset)
204
202
  registry = registry_or_queryset
205
203
  if isinstance(idlike, int):
206
- return super(QuerySet, qs).get(id=idlike)
204
+ return super(QuerySet, qs).get(id=idlike) # type: ignore
207
205
  elif isinstance(idlike, str):
208
206
  qs = qs.filter(uid__startswith=idlike)
209
207
  if issubclass(registry, IsVersioned):
@@ -216,7 +214,27 @@ def get(
216
214
  else:
217
215
  assert idlike is None # noqa: S101
218
216
  expressions = process_expressions(qs, expressions)
219
- return registry.objects.using(qs.db).get(**expressions)
217
+ # don't want _branch_code here in .get(), only in .filter()
218
+ expressions.pop("_branch_code", None)
219
+ # inject is_latest for consistency with idlike
220
+ is_latest_was_not_in_expressions = "is_latest" not in expressions
221
+ if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
222
+ expressions["is_latest"] = True
223
+ try:
224
+ return registry.objects.using(qs.db).get(**expressions)
225
+ except registry.DoesNotExist:
226
+ # handle the case in which the is_latest injection led to a missed query
227
+ if "is_latest" in expressions and is_latest_was_not_in_expressions:
228
+ expressions.pop("is_latest")
229
+ result = (
230
+ registry.objects.using(qs.db)
231
+ .filter(**expressions)
232
+ .order_by("-created_at")
233
+ .first()
234
+ )
235
+ if result is not None:
236
+ return result
237
+ raise registry.DoesNotExist from registry.DoesNotExist
220
238
 
221
239
 
222
240
  class RecordList(UserList, Generic[T]):
@@ -537,13 +555,13 @@ class QuerySet(models.QuerySet):
537
555
  elif isinstance(include, str):
538
556
  include = [include]
539
557
  include = get_backward_compat_filter_kwargs(self, include)
540
- field_names = get_basic_field_names(self, include, features)
558
+ field_names = get_basic_field_names(self, include, features) # type: ignore
541
559
 
542
560
  annotate_kwargs = {}
543
561
  if features:
544
562
  annotate_kwargs.update(get_feature_annotate_kwargs(features))
545
563
  if include:
546
- include = include.copy()[::-1]
564
+ include = include.copy()[::-1] # type: ignore
547
565
  include_kwargs = {s: F(s) for s in include if s not in field_names}
548
566
  annotate_kwargs.update(include_kwargs)
549
567
  if annotate_kwargs:
@@ -561,12 +579,6 @@ class QuerySet(models.QuerySet):
561
579
  pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
562
580
  if pk_column_name in df_reshaped.columns:
563
581
  df_reshaped = df_reshaped.set_index(pk_column_name)
564
-
565
- # Compatibility code
566
- df_reshaped.columns = df_reshaped.columns.str.replace(
567
- r"_schemas_m2m", "feature_sets", regex=True
568
- )
569
-
570
582
  return df_reshaped
571
583
 
572
584
  def delete(self, *args, **kwargs):
@@ -601,17 +613,65 @@ class QuerySet(models.QuerySet):
601
613
  return None
602
614
  return self[0]
603
615
 
616
+ def _handle_unknown_field(self, error: FieldError) -> None:
617
+ """Suggest available fields if an unknown field was passed."""
618
+ if "Cannot resolve keyword" in str(error):
619
+ field = str(error).split("'")[1]
620
+ fields = ", ".join(
621
+ sorted(
622
+ f.name
623
+ for f in self.model._meta.get_fields()
624
+ if not f.name.startswith("_")
625
+ and not f.name.startswith("links_")
626
+ and not f.name.endswith("_id")
627
+ )
628
+ )
629
+ raise FieldError(
630
+ f"Unknown field '{field}'. Available fields: {fields}"
631
+ ) from None
632
+ raise error # pragma: no cover
633
+
604
634
  def get(self, idlike: int | str | None = None, **expressions) -> Record:
605
635
  """Query a single record. Raises error if there are more or none."""
606
- return get(self, idlike, **expressions)
636
+ try:
637
+ return get(self, idlike, **expressions)
638
+ except ValueError as e:
639
+ # Pass through original error for explicit id lookups
640
+ if "Field 'id' expected a number" in str(e):
641
+ if "id" in expressions:
642
+ raise
643
+ field = next(iter(expressions))
644
+ raise FieldError(
645
+ f"Invalid lookup '{expressions[field]}' for {field}. Did you mean {field}__name?"
646
+ ) from None
647
+ raise # pragma: no cover
648
+ except FieldError as e:
649
+ self._handle_unknown_field(e)
650
+ raise # pragma: no cover
607
651
 
608
652
  def filter(self, *queries, **expressions) -> QuerySet:
609
653
  """Query a set of records."""
654
+ # Suggest to use __name for related fields such as id when not passed
655
+ for field, value in expressions.items():
656
+ if (
657
+ isinstance(value, str)
658
+ and value.strip("-").isalpha()
659
+ and "__" not in field
660
+ and hasattr(self.model, field)
661
+ ):
662
+ field_attr = getattr(self.model, field)
663
+ if hasattr(field_attr, "field") and field_attr.field.related_model:
664
+ raise FieldError(
665
+ f"Invalid lookup '{value}' for {field}. Did you mean {field}__name?"
666
+ )
667
+
610
668
  expressions = process_expressions(self, expressions)
611
669
  if len(expressions) > 0:
612
- return super().filter(*queries, **expressions)
613
- else:
614
- return self
670
+ try:
671
+ return super().filter(*queries, **expressions)
672
+ except FieldError as e:
673
+ self._handle_unknown_field(e)
674
+ return self
615
675
 
616
676
  def one(self) -> Record:
617
677
  """Exactly one result. Raises error if there are more or none."""