lamindb 0.76.8__py3-none-any.whl → 0.76.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. lamindb/__init__.py +114 -113
  2. lamindb/_artifact.py +1206 -1205
  3. lamindb/_can_validate.py +621 -579
  4. lamindb/_collection.py +390 -387
  5. lamindb/_curate.py +1603 -1601
  6. lamindb/_feature.py +155 -155
  7. lamindb/_feature_set.py +244 -242
  8. lamindb/_filter.py +23 -23
  9. lamindb/_finish.py +250 -256
  10. lamindb/_from_values.py +403 -382
  11. lamindb/_is_versioned.py +40 -40
  12. lamindb/_parents.py +476 -476
  13. lamindb/_query_manager.py +125 -125
  14. lamindb/_query_set.py +364 -362
  15. lamindb/_record.py +668 -649
  16. lamindb/_run.py +60 -57
  17. lamindb/_save.py +310 -308
  18. lamindb/_storage.py +14 -14
  19. lamindb/_transform.py +130 -127
  20. lamindb/_ulabel.py +56 -56
  21. lamindb/_utils.py +9 -9
  22. lamindb/_view.py +72 -72
  23. lamindb/core/__init__.py +94 -94
  24. lamindb/core/_context.py +590 -574
  25. lamindb/core/_data.py +510 -438
  26. lamindb/core/_django.py +209 -0
  27. lamindb/core/_feature_manager.py +994 -867
  28. lamindb/core/_label_manager.py +289 -253
  29. lamindb/core/_mapped_collection.py +631 -597
  30. lamindb/core/_settings.py +188 -187
  31. lamindb/core/_sync_git.py +138 -138
  32. lamindb/core/_track_environment.py +27 -27
  33. lamindb/core/datasets/__init__.py +59 -59
  34. lamindb/core/datasets/_core.py +581 -571
  35. lamindb/core/datasets/_fake.py +36 -36
  36. lamindb/core/exceptions.py +90 -90
  37. lamindb/core/fields.py +12 -12
  38. lamindb/core/loaders.py +164 -164
  39. lamindb/core/schema.py +56 -56
  40. lamindb/core/storage/__init__.py +25 -25
  41. lamindb/core/storage/_anndata_accessor.py +741 -740
  42. lamindb/core/storage/_anndata_sizes.py +41 -41
  43. lamindb/core/storage/_backed_access.py +98 -98
  44. lamindb/core/storage/_tiledbsoma.py +204 -204
  45. lamindb/core/storage/_valid_suffixes.py +21 -21
  46. lamindb/core/storage/_zarr.py +110 -110
  47. lamindb/core/storage/objects.py +62 -62
  48. lamindb/core/storage/paths.py +172 -172
  49. lamindb/core/subsettings/__init__.py +12 -12
  50. lamindb/core/subsettings/_creation_settings.py +38 -38
  51. lamindb/core/subsettings/_transform_settings.py +21 -21
  52. lamindb/core/types.py +19 -19
  53. lamindb/core/versioning.py +146 -158
  54. lamindb/integrations/__init__.py +12 -12
  55. lamindb/integrations/_vitessce.py +107 -107
  56. lamindb/setup/__init__.py +14 -14
  57. lamindb/setup/core/__init__.py +4 -4
  58. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/LICENSE +201 -201
  59. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/METADATA +8 -8
  60. lamindb-0.76.10.dist-info/RECORD +61 -0
  61. {lamindb-0.76.8.dist-info → lamindb-0.76.10.dist-info}/WHEEL +1 -1
  62. lamindb-0.76.8.dist-info/RECORD +0 -60
lamindb/_parents.py CHANGED
@@ -1,476 +1,476 @@
1
- from __future__ import annotations
2
-
3
- import builtins
4
- from typing import TYPE_CHECKING, Literal
5
-
6
- import lamindb_setup as ln_setup
7
- from lamin_utils import logger
8
- from lnschema_core import Artifact, Collection, Record, Run, Transform
9
- from lnschema_core.models import HasParents, format_field_value
10
-
11
- from lamindb._utils import attach_func_to_class_method
12
-
13
- from ._record import get_name_field
14
-
15
- if TYPE_CHECKING:
16
- from lnschema_core.types import StrField
17
-
18
- LAMIN_GREEN_LIGHTER = "#10b981"
19
- LAMIN_GREEN_DARKER = "#065f46"
20
- GREEN_FILL = "honeydew"
21
- TRANSFORM_EMOJIS = {"notebook": "📔", "app": "🖥️", "pipeline": "🧩"}
22
- is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
23
-
24
-
25
- def _transform_emoji(transform: Transform):
26
- if transform is not None:
27
- return TRANSFORM_EMOJIS.get(transform.type, "💫")
28
- else:
29
- return TRANSFORM_EMOJIS["pipeline"]
30
-
31
-
32
- def _view(u):
33
- from graphviz.backend import ExecutableNotFound
34
-
35
- try:
36
- if is_run_from_ipython:
37
- from IPython import get_ipython
38
- from IPython.display import display
39
-
40
- # True if the code is running in a Jupyter Notebook or Lab environment
41
- if get_ipython().__class__.__name__ == "TerminalInteractiveShell":
42
- return u.view()
43
- else:
44
- # call u._repr_mimebundle_() manually that exception gets raised properly and not just printed by
45
- # call to display()
46
- display(u._repr_mimebundle_(), raw=True)
47
- else:
48
- return u
49
- except (FileNotFoundError, RuntimeError, ExecutableNotFound): # pragma: no cover
50
- logger.error(
51
- "please install the graphviz executable on your system:\n - Ubuntu: `sudo"
52
- " apt-get install graphviz`\n - Windows:"
53
- " https://graphviz.org/download/#windows\n - Mac: `brew install graphviz`"
54
- )
55
-
56
-
57
- def view_parents(
58
- self,
59
- field: StrField | None = None,
60
- with_children: bool = False,
61
- distance: int = 5,
62
- ):
63
- if field is None:
64
- field = get_name_field(self)
65
- if not isinstance(field, str):
66
- field = field.field.name
67
-
68
- return _view_parents(
69
- record=self, field=field, with_children=with_children, distance=distance
70
- )
71
-
72
-
73
- def view_lineage(data: Artifact | Collection, with_children: bool = True) -> None:
74
- """Graph of data flow.
75
-
76
- Notes:
77
- For more info, see use cases: :doc:`docs:data-flow`.
78
-
79
- Examples:
80
- >>> collection.view_lineage()
81
- >>> artifact.view_lineage()
82
- """
83
- import graphviz
84
-
85
- df_values = _get_all_parent_runs(data)
86
- if with_children:
87
- df_values += _get_all_child_runs(data)
88
- df_edges = _df_edges_from_runs(df_values)
89
-
90
- data_label = _record_label(data)
91
-
92
- def add_node(
93
- record: Run | Artifact | Collection,
94
- node_id: str,
95
- node_label: str,
96
- u: graphviz.Digraph,
97
- ):
98
- if isinstance(record, Run):
99
- fillcolor = "gainsboro"
100
- else:
101
- fillcolor = GREEN_FILL
102
- u.node(
103
- node_id,
104
- label=node_label,
105
- shape="box",
106
- style="rounded,filled",
107
- fillcolor=fillcolor,
108
- )
109
-
110
- u = graphviz.Digraph(
111
- f"{data._meta.model_name}_{data.uid}",
112
- node_attr={
113
- "fillcolor": GREEN_FILL,
114
- "color": LAMIN_GREEN_DARKER,
115
- "fontname": "Helvetica",
116
- "fontsize": "10",
117
- },
118
- edge_attr={"arrowsize": "0.5"},
119
- )
120
-
121
- for _, row in df_edges.iterrows():
122
- add_node(row["source_record"], row["source"], row["source_label"], u)
123
- if row["target_record"] not in df_edges["source_record"]:
124
- add_node(row["target_record"], row["target"], row["target_label"], u)
125
-
126
- u.edge(row["source"], row["target"], color="dimgrey")
127
- # label the searched file
128
- u.node(
129
- f"{data._meta.model_name}_{data.uid}",
130
- label=data_label,
131
- style="rounded,filled",
132
- fillcolor=LAMIN_GREEN_LIGHTER,
133
- shape="box",
134
- )
135
-
136
- _view(u)
137
-
138
-
139
- def _view_parents(
140
- record: Record,
141
- field: str,
142
- with_children: bool = False,
143
- distance: int = 100,
144
- attr_name: Literal["parents", "predecessors"] = "parents",
145
- ):
146
- """Graph of parents."""
147
- if not hasattr(record, attr_name):
148
- raise NotImplementedError(
149
- f"Parents view is not supported for {record.__class__.__name__}!"
150
- )
151
- import graphviz
152
- import pandas as pd
153
-
154
- df_edges = None
155
- df_edges_parents = _df_edges_from_parents(
156
- record=record, field=field, distance=distance, attr_name=attr_name
157
- )
158
- if df_edges_parents is not None:
159
- df_edges = df_edges_parents
160
- if with_children:
161
- df_edges_children = _df_edges_from_parents(
162
- record=record,
163
- field=field,
164
- distance=distance,
165
- children=True,
166
- attr_name=attr_name,
167
- )
168
- if df_edges_children is not None:
169
- if df_edges is not None:
170
- df_edges = pd.concat(
171
- [df_edges_parents, df_edges_children]
172
- ).drop_duplicates()
173
- else:
174
- df_edges = df_edges_children
175
-
176
- record_label = _record_label(record, field)
177
-
178
- u = graphviz.Digraph(
179
- record.uid,
180
- node_attr={
181
- "color": LAMIN_GREEN_DARKER,
182
- "fillcolor": GREEN_FILL,
183
- "shape": "box",
184
- "style": "rounded,filled",
185
- "fontname": "Helvetica",
186
- "fontsize": "10",
187
- },
188
- edge_attr={"arrowsize": "0.5"},
189
- )
190
- u.node(
191
- record.uid,
192
- label=(
193
- _record_label(record)
194
- if record.__class__.__name__ == "Transform"
195
- else _add_emoji(record, record_label)
196
- ),
197
- fillcolor=LAMIN_GREEN_LIGHTER,
198
- )
199
- if df_edges is not None:
200
- for _, row in df_edges.iterrows():
201
- u.node(row["source"], label=row["source_label"])
202
- u.node(row["target"], label=row["target_label"])
203
- u.edge(row["source"], row["target"], color="dimgrey")
204
-
205
- _view(u)
206
-
207
-
208
- def _get_parents(
209
- record: Record,
210
- field: str,
211
- distance: int,
212
- children: bool = False,
213
- attr_name: Literal["parents", "predecessors"] = "parents",
214
- ):
215
- """Recursively get parent records within a distance."""
216
- if children:
217
- key = attr_name
218
- else:
219
- key = "children" if attr_name == "parents" else "successors" # type: ignore
220
- model = record.__class__
221
- condition = f"{key}__{field}"
222
- results = model.filter(**{condition: record.__getattribute__(field)}).all()
223
- if distance < 2:
224
- return results
225
-
226
- d = 2
227
- while d < distance:
228
- condition = f"{key}__{condition}"
229
- records = model.filter(**{condition: record.__getattribute__(field)})
230
-
231
- try:
232
- if not records.exists():
233
- return results
234
-
235
- results = results | records.all()
236
- d += 1
237
- except Exception:
238
- # For OperationalError:
239
- # SQLite does not support joins containing more than 64 tables
240
- return results
241
- return results
242
-
243
-
244
- def _df_edges_from_parents(
245
- record: Record,
246
- field: str,
247
- distance: int,
248
- children: bool = False,
249
- attr_name: Literal["parents", "predecessors"] = "parents",
250
- ):
251
- """Construct a DataFrame of edges as the input of graphviz.Digraph."""
252
- if attr_name == "parents":
253
- key = "children" if children else "parents"
254
- else:
255
- key = "successors" if children else "predecessors"
256
- parents = _get_parents(
257
- record=record,
258
- field=field,
259
- distance=distance,
260
- children=children,
261
- attr_name=attr_name,
262
- )
263
- all = record.__class__.objects
264
- records = parents | all.filter(id=record.id)
265
- df = records.distinct().df(include=[f"{key}__id"])
266
- if f"{key}__id" not in df.columns:
267
- return None
268
- df_edges = df[[f"{key}__id"]]
269
- df_edges = df_edges.explode(f"{key}__id")
270
- df_edges.index.name = "target"
271
- df_edges = df_edges.reset_index()
272
- df_edges.dropna(axis=0, inplace=True)
273
- df_edges.rename(columns={f"{key}__id": "source"}, inplace=True)
274
- df_edges = df_edges.drop_duplicates()
275
-
276
- # colons messes with the node formatting:
277
- # https://graphviz.readthedocs.io/en/stable/node_ports.html
278
- df_edges["source_record"] = df_edges["source"].apply(lambda x: all.get(id=x))
279
- df_edges["target_record"] = df_edges["target"].apply(lambda x: all.get(id=x))
280
- if record.__class__.__name__ == "Transform":
281
- df_edges["source_label"] = df_edges["source_record"].apply(_record_label)
282
- df_edges["target_label"] = df_edges["target_record"].apply(_record_label)
283
- else:
284
- df_edges["source_label"] = df_edges["source_record"].apply(
285
- lambda x: _record_label(x, field)
286
- )
287
- df_edges["target_label"] = df_edges["target_record"].apply(
288
- lambda x: _record_label(x, field)
289
- )
290
- df_edges["source"] = df_edges["source_record"].apply(lambda x: x.uid)
291
- df_edges["target"] = df_edges["target_record"].apply(lambda x: x.uid)
292
- return df_edges
293
-
294
-
295
- def _record_label(record: Record, field: str | None = None):
296
- if isinstance(record, Artifact):
297
- if record.description is None:
298
- name = record.key
299
- else:
300
- name = record.description.replace("&", "&amp;")
301
-
302
- return (
303
- rf'<📄 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
304
- rf' FACE="Monospace">uid={record.uid}<BR/>suffix={record.suffix}</FONT>>'
305
- )
306
- elif isinstance(record, Collection):
307
- name = record.name.replace("&", "&amp;")
308
- return (
309
- rf'<🍱 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
310
- rf' FACE="Monospace">uid={record.uid}<BR/>version={record.version}</FONT>>'
311
- )
312
- elif isinstance(record, Run):
313
- name = f'{record.transform.name.replace("&", "&amp;")}'
314
- user_display = (
315
- record.created_by.handle
316
- if record.created_by.name is None
317
- else record.created_by.name
318
- )
319
- return (
320
- rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
321
- rf' FACE="Monospace">uid={record.transform.uid}<BR/>type={record.transform.type},'
322
- rf" user={user_display}<BR/>run={format_field_value(record.started_at)}</FONT>>"
323
- )
324
- elif isinstance(record, Transform):
325
- name = f'{record.name.replace("&", "&amp;")}'
326
- return (
327
- rf'<{TRANSFORM_EMOJIS.get(str(record.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
328
- rf' FACE="Monospace">uid={record.uid}<BR/>type={record.type},'
329
- rf" user={record.created_by.name}<BR/>updated_at={format_field_value(record.updated_at)}</FONT>>"
330
- )
331
- else:
332
- name = record.__getattribute__(field)
333
- return (
334
- rf'<{name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
335
- rf' FACE="Monospace">uid={record.uid}</FONT>>'
336
- )
337
-
338
-
339
- def _add_emoji(record: Record, label: str):
340
- if record.__class__.__name__ == "Transform":
341
- emoji = TRANSFORM_EMOJIS.get(record.type, "💫")
342
- elif record.__class__.__name__ == "Run":
343
- emoji = TRANSFORM_EMOJIS.get(record.transform.type, "💫")
344
- else:
345
- emoji = ""
346
- return f"{emoji} {label}"
347
-
348
-
349
- def _get_all_parent_runs(data: Artifact | Collection) -> list:
350
- """Get all input file/collection runs recursively."""
351
- name = data._meta.model_name
352
- run_inputs_outputs = []
353
-
354
- runs = [data.run] if data.run is not None else []
355
- while len(runs) > 0:
356
- inputs = []
357
- for r in runs:
358
- inputs_run = (
359
- r.__getattribute__(f"input_{name}s")
360
- .all()
361
- .filter(visibility__in=[0, 1])
362
- .list()
363
- )
364
- if name == "artifact":
365
- inputs_run += (
366
- r.input_collections.all().filter(visibility__in=[0, 1]).list()
367
- )
368
- run_inputs_outputs += [(inputs_run, r)]
369
- outputs_run = (
370
- r.__getattribute__(f"output_{name}s")
371
- .all()
372
- .filter(visibility__in=[0, 1])
373
- .list()
374
- )
375
- if name == "artifact":
376
- outputs_run += (
377
- r.output_collections.all().filter(visibility__in=[0, 1]).list()
378
- )
379
- run_inputs_outputs += [(r, outputs_run)]
380
- inputs += inputs_run
381
- runs = [f.run for f in inputs if f.run is not None]
382
- return run_inputs_outputs
383
-
384
-
385
- def _get_all_child_runs(data: Artifact | Collection) -> list:
386
- """Get all output file/collection runs recursively."""
387
- name = data._meta.model_name
388
- all_runs: set[Run] = set()
389
- run_inputs_outputs = []
390
-
391
- if data.run is not None:
392
- runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
393
- else:
394
- runs = set()
395
- if name == "artifact" and data.run is not None:
396
- runs.update(
397
- {
398
- f.run
399
- for f in data.run.output_collections.all()
400
- .filter(visibility__in=[0, 1])
401
- .all()
402
- }
403
- )
404
- while runs.difference(all_runs):
405
- all_runs.update(runs)
406
- child_runs: set[Run] = set()
407
- for r in runs:
408
- inputs_run = (
409
- r.__getattribute__(f"input_{name}s")
410
- .all()
411
- .filter(visibility__in=[0, 1])
412
- .list()
413
- )
414
- if name == "artifact":
415
- inputs_run += (
416
- r.input_collections.all().filter(visibility__in=[0, 1]).list()
417
- )
418
- run_inputs_outputs += [(inputs_run, r)]
419
-
420
- outputs_run = (
421
- r.__getattribute__(f"output_{name}s")
422
- .all()
423
- .filter(visibility__in=[0, 1])
424
- .list()
425
- )
426
- if name == "artifact":
427
- outputs_run += (
428
- r.output_collections.all().filter(visibility__in=[0, 1]).list()
429
- )
430
- run_inputs_outputs += [(r, outputs_run)]
431
-
432
- child_runs.update(
433
- Run.filter(
434
- **{f"input_{name}s__uid__in": [i.uid for i in outputs_run]}
435
- ).list()
436
- )
437
- # for artifacts, also include collections in the lineage
438
- if name == "artifact":
439
- child_runs.update(
440
- Run.filter(
441
- input_collections__uid__in=[i.uid for i in outputs_run]
442
- ).list()
443
- )
444
- runs = child_runs
445
- return run_inputs_outputs
446
-
447
-
448
- def _df_edges_from_runs(df_values: list):
449
- import pandas as pd
450
-
451
- df = pd.DataFrame(df_values, columns=["source_record", "target_record"])
452
- df = df.explode("source_record")
453
- df = df.explode("target_record")
454
- df = df.drop_duplicates().dropna()
455
- df["source"] = [f"{i._meta.model_name}_{i.uid}" for i in df["source_record"]]
456
- df["target"] = [f"{i._meta.model_name}_{i.uid}" for i in df["target_record"]]
457
- df["source_label"] = df["source_record"].apply(_record_label)
458
- df["target_label"] = df["target_record"].apply(_record_label)
459
- return df
460
-
461
-
462
- METHOD_NAMES = [
463
- "view_parents",
464
- ]
465
-
466
- if ln_setup._TESTING: # type: ignore
467
- from inspect import signature
468
-
469
- SIGS = {
470
- name: signature(getattr(HasParents, name))
471
- for name in METHOD_NAMES
472
- if not name.startswith("__")
473
- }
474
-
475
- for name in METHOD_NAMES:
476
- attach_func_to_class_method(name, HasParents, globals())
1
+ from __future__ import annotations
2
+
3
+ import builtins
4
+ from typing import TYPE_CHECKING, Literal
5
+
6
+ import lamindb_setup as ln_setup
7
+ from lamin_utils import logger
8
+ from lnschema_core import Artifact, Collection, Record, Run, Transform
9
+ from lnschema_core.models import HasParents, format_field_value
10
+
11
+ from lamindb._utils import attach_func_to_class_method
12
+
13
+ from ._record import get_name_field
14
+
15
+ if TYPE_CHECKING:
16
+ from lnschema_core.types import StrField
17
+
18
+ LAMIN_GREEN_LIGHTER = "#10b981"
19
+ LAMIN_GREEN_DARKER = "#065f46"
20
+ GREEN_FILL = "honeydew"
21
+ TRANSFORM_EMOJIS = {"notebook": "📔", "app": "🖥️", "pipeline": "🧩"}
22
+ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
23
+
24
+
25
+ def _transform_emoji(transform: Transform):
26
+ if transform is not None:
27
+ return TRANSFORM_EMOJIS.get(transform.type, "💫")
28
+ else:
29
+ return TRANSFORM_EMOJIS["pipeline"]
30
+
31
+
32
+ def _view(u):
33
+ from graphviz.backend import ExecutableNotFound
34
+
35
+ try:
36
+ if is_run_from_ipython:
37
+ from IPython import get_ipython
38
+ from IPython.display import display
39
+
40
+ # True if the code is running in a Jupyter Notebook or Lab environment
41
+ if get_ipython().__class__.__name__ == "TerminalInteractiveShell":
42
+ return u.view()
43
+ else:
44
+ # call u._repr_mimebundle_() manually that exception gets raised properly and not just printed by
45
+ # call to display()
46
+ display(u._repr_mimebundle_(), raw=True)
47
+ else:
48
+ return u
49
+ except (FileNotFoundError, RuntimeError, ExecutableNotFound): # pragma: no cover
50
+ logger.error(
51
+ "please install the graphviz executable on your system:\n - Ubuntu: `sudo"
52
+ " apt-get install graphviz`\n - Windows:"
53
+ " https://graphviz.org/download/#windows\n - Mac: `brew install graphviz`"
54
+ )
55
+
56
+
57
+ def view_parents(
58
+ self,
59
+ field: StrField | None = None,
60
+ with_children: bool = False,
61
+ distance: int = 5,
62
+ ):
63
+ if field is None:
64
+ field = get_name_field(self)
65
+ if not isinstance(field, str):
66
+ field = field.field.name
67
+
68
+ return _view_parents(
69
+ record=self, field=field, with_children=with_children, distance=distance
70
+ )
71
+
72
+
73
+ def view_lineage(data: Artifact | Collection, with_children: bool = True) -> None:
74
+ """Graph of data flow.
75
+
76
+ Notes:
77
+ For more info, see use cases: :doc:`docs:data-flow`.
78
+
79
+ Examples:
80
+ >>> collection.view_lineage()
81
+ >>> artifact.view_lineage()
82
+ """
83
+ import graphviz
84
+
85
+ df_values = _get_all_parent_runs(data)
86
+ if with_children:
87
+ df_values += _get_all_child_runs(data)
88
+ df_edges = _df_edges_from_runs(df_values)
89
+
90
+ data_label = _record_label(data)
91
+
92
+ def add_node(
93
+ record: Run | Artifact | Collection,
94
+ node_id: str,
95
+ node_label: str,
96
+ u: graphviz.Digraph,
97
+ ):
98
+ if isinstance(record, Run):
99
+ fillcolor = "gainsboro"
100
+ else:
101
+ fillcolor = GREEN_FILL
102
+ u.node(
103
+ node_id,
104
+ label=node_label,
105
+ shape="box",
106
+ style="rounded,filled",
107
+ fillcolor=fillcolor,
108
+ )
109
+
110
+ u = graphviz.Digraph(
111
+ f"{data._meta.model_name}_{data.uid}",
112
+ node_attr={
113
+ "fillcolor": GREEN_FILL,
114
+ "color": LAMIN_GREEN_DARKER,
115
+ "fontname": "Helvetica",
116
+ "fontsize": "10",
117
+ },
118
+ edge_attr={"arrowsize": "0.5"},
119
+ )
120
+
121
+ for _, row in df_edges.iterrows():
122
+ add_node(row["source_record"], row["source"], row["source_label"], u)
123
+ if row["target_record"] not in df_edges["source_record"]:
124
+ add_node(row["target_record"], row["target"], row["target_label"], u)
125
+
126
+ u.edge(row["source"], row["target"], color="dimgrey")
127
+ # label the searched file
128
+ u.node(
129
+ f"{data._meta.model_name}_{data.uid}",
130
+ label=data_label,
131
+ style="rounded,filled",
132
+ fillcolor=LAMIN_GREEN_LIGHTER,
133
+ shape="box",
134
+ )
135
+
136
+ _view(u)
137
+
138
+
139
+ def _view_parents(
140
+ record: Record,
141
+ field: str,
142
+ with_children: bool = False,
143
+ distance: int = 100,
144
+ attr_name: Literal["parents", "predecessors"] = "parents",
145
+ ):
146
+ """Graph of parents."""
147
+ if not hasattr(record, attr_name):
148
+ raise NotImplementedError(
149
+ f"Parents view is not supported for {record.__class__.__name__}!"
150
+ )
151
+ import graphviz
152
+ import pandas as pd
153
+
154
+ df_edges = None
155
+ df_edges_parents = _df_edges_from_parents(
156
+ record=record, field=field, distance=distance, attr_name=attr_name
157
+ )
158
+ if df_edges_parents is not None:
159
+ df_edges = df_edges_parents
160
+ if with_children:
161
+ df_edges_children = _df_edges_from_parents(
162
+ record=record,
163
+ field=field,
164
+ distance=distance,
165
+ children=True,
166
+ attr_name=attr_name,
167
+ )
168
+ if df_edges_children is not None:
169
+ if df_edges is not None:
170
+ df_edges = pd.concat(
171
+ [df_edges_parents, df_edges_children]
172
+ ).drop_duplicates()
173
+ else:
174
+ df_edges = df_edges_children
175
+
176
+ record_label = _record_label(record, field)
177
+
178
+ u = graphviz.Digraph(
179
+ record.uid,
180
+ node_attr={
181
+ "color": LAMIN_GREEN_DARKER,
182
+ "fillcolor": GREEN_FILL,
183
+ "shape": "box",
184
+ "style": "rounded,filled",
185
+ "fontname": "Helvetica",
186
+ "fontsize": "10",
187
+ },
188
+ edge_attr={"arrowsize": "0.5"},
189
+ )
190
+ u.node(
191
+ record.uid,
192
+ label=(
193
+ _record_label(record)
194
+ if record.__class__.__name__ == "Transform"
195
+ else _add_emoji(record, record_label)
196
+ ),
197
+ fillcolor=LAMIN_GREEN_LIGHTER,
198
+ )
199
+ if df_edges is not None:
200
+ for _, row in df_edges.iterrows():
201
+ u.node(row["source"], label=row["source_label"])
202
+ u.node(row["target"], label=row["target_label"])
203
+ u.edge(row["source"], row["target"], color="dimgrey")
204
+
205
+ _view(u)
206
+
207
+
208
+ def _get_parents(
209
+ record: Record,
210
+ field: str,
211
+ distance: int,
212
+ children: bool = False,
213
+ attr_name: Literal["parents", "predecessors"] = "parents",
214
+ ):
215
+ """Recursively get parent records within a distance."""
216
+ if children:
217
+ key = attr_name
218
+ else:
219
+ key = "children" if attr_name == "parents" else "successors" # type: ignore
220
+ model = record.__class__
221
+ condition = f"{key}__{field}"
222
+ results = model.filter(**{condition: record.__getattribute__(field)}).all()
223
+ if distance < 2:
224
+ return results
225
+
226
+ d = 2
227
+ while d < distance:
228
+ condition = f"{key}__{condition}"
229
+ records = model.filter(**{condition: record.__getattribute__(field)})
230
+
231
+ try:
232
+ if not records.exists():
233
+ return results
234
+
235
+ results = results | records.all()
236
+ d += 1
237
+ except Exception:
238
+ # For OperationalError:
239
+ # SQLite does not support joins containing more than 64 tables
240
+ return results
241
+ return results
242
+
243
+
244
+ def _df_edges_from_parents(
245
+ record: Record,
246
+ field: str,
247
+ distance: int,
248
+ children: bool = False,
249
+ attr_name: Literal["parents", "predecessors"] = "parents",
250
+ ):
251
+ """Construct a DataFrame of edges as the input of graphviz.Digraph."""
252
+ if attr_name == "parents":
253
+ key = "children" if children else "parents"
254
+ else:
255
+ key = "successors" if children else "predecessors"
256
+ parents = _get_parents(
257
+ record=record,
258
+ field=field,
259
+ distance=distance,
260
+ children=children,
261
+ attr_name=attr_name,
262
+ )
263
+ all = record.__class__.objects
264
+ records = parents | all.filter(id=record.id)
265
+ df = records.distinct().df(include=[f"{key}__id"])
266
+ if f"{key}__id" not in df.columns:
267
+ return None
268
+ df_edges = df[[f"{key}__id"]]
269
+ df_edges = df_edges.explode(f"{key}__id")
270
+ df_edges.index.name = "target"
271
+ df_edges = df_edges.reset_index()
272
+ df_edges.dropna(axis=0, inplace=True)
273
+ df_edges.rename(columns={f"{key}__id": "source"}, inplace=True)
274
+ df_edges = df_edges.drop_duplicates()
275
+
276
+ # colons messes with the node formatting:
277
+ # https://graphviz.readthedocs.io/en/stable/node_ports.html
278
+ df_edges["source_record"] = df_edges["source"].apply(lambda x: all.get(id=x))
279
+ df_edges["target_record"] = df_edges["target"].apply(lambda x: all.get(id=x))
280
+ if record.__class__.__name__ == "Transform":
281
+ df_edges["source_label"] = df_edges["source_record"].apply(_record_label)
282
+ df_edges["target_label"] = df_edges["target_record"].apply(_record_label)
283
+ else:
284
+ df_edges["source_label"] = df_edges["source_record"].apply(
285
+ lambda x: _record_label(x, field)
286
+ )
287
+ df_edges["target_label"] = df_edges["target_record"].apply(
288
+ lambda x: _record_label(x, field)
289
+ )
290
+ df_edges["source"] = df_edges["source_record"].apply(lambda x: x.uid)
291
+ df_edges["target"] = df_edges["target_record"].apply(lambda x: x.uid)
292
+ return df_edges
293
+
294
+
295
+ def _record_label(record: Record, field: str | None = None):
296
+ if isinstance(record, Artifact):
297
+ if record.description is None:
298
+ name = record.key
299
+ else:
300
+ name = record.description.replace("&", "&amp;")
301
+
302
+ return (
303
+ rf'<📄 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
304
+ rf' FACE="Monospace">uid={record.uid}<BR/>suffix={record.suffix}</FONT>>'
305
+ )
306
+ elif isinstance(record, Collection):
307
+ name = record.name.replace("&", "&amp;")
308
+ return (
309
+ rf'<🍱 {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
310
+ rf' FACE="Monospace">uid={record.uid}<BR/>version={record.version}</FONT>>'
311
+ )
312
+ elif isinstance(record, Run):
313
+ name = f'{record.transform.name.replace("&", "&amp;")}'
314
+ user_display = (
315
+ record.created_by.handle
316
+ if record.created_by.name is None
317
+ else record.created_by.name
318
+ )
319
+ return (
320
+ rf'<{TRANSFORM_EMOJIS.get(str(record.transform.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
321
+ rf' FACE="Monospace">uid={record.transform.uid}<BR/>type={record.transform.type},'
322
+ rf" user={user_display}<BR/>run={format_field_value(record.started_at)}</FONT>>"
323
+ )
324
+ elif isinstance(record, Transform):
325
+ name = f'{record.name.replace("&", "&amp;")}'
326
+ return (
327
+ rf'<{TRANSFORM_EMOJIS.get(str(record.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
328
+ rf' FACE="Monospace">uid={record.uid}<BR/>type={record.type},'
329
+ rf" user={record.created_by.name}<BR/>updated_at={format_field_value(record.updated_at)}</FONT>>"
330
+ )
331
+ else:
332
+ name = record.__getattribute__(field)
333
+ return (
334
+ rf'<{name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
335
+ rf' FACE="Monospace">uid={record.uid}</FONT>>'
336
+ )
337
+
338
+
339
+ def _add_emoji(record: Record, label: str):
340
+ if record.__class__.__name__ == "Transform":
341
+ emoji = TRANSFORM_EMOJIS.get(record.type, "💫")
342
+ elif record.__class__.__name__ == "Run":
343
+ emoji = TRANSFORM_EMOJIS.get(record.transform.type, "💫")
344
+ else:
345
+ emoji = ""
346
+ return f"{emoji} {label}"
347
+
348
+
349
+ def _get_all_parent_runs(data: Artifact | Collection) -> list:
350
+ """Get all input file/collection runs recursively."""
351
+ name = data._meta.model_name
352
+ run_inputs_outputs = []
353
+
354
+ runs = [data.run] if data.run is not None else []
355
+ while len(runs) > 0:
356
+ inputs = []
357
+ for r in runs:
358
+ inputs_run = (
359
+ r.__getattribute__(f"input_{name}s")
360
+ .all()
361
+ .filter(visibility__in=[0, 1])
362
+ .list()
363
+ )
364
+ if name == "artifact":
365
+ inputs_run += (
366
+ r.input_collections.all().filter(visibility__in=[0, 1]).list()
367
+ )
368
+ run_inputs_outputs += [(inputs_run, r)]
369
+ outputs_run = (
370
+ r.__getattribute__(f"output_{name}s")
371
+ .all()
372
+ .filter(visibility__in=[0, 1])
373
+ .list()
374
+ )
375
+ if name == "artifact":
376
+ outputs_run += (
377
+ r.output_collections.all().filter(visibility__in=[0, 1]).list()
378
+ )
379
+ run_inputs_outputs += [(r, outputs_run)]
380
+ inputs += inputs_run
381
+ runs = [f.run for f in inputs if f.run is not None]
382
+ return run_inputs_outputs
383
+
384
+
385
+ def _get_all_child_runs(data: Artifact | Collection) -> list:
386
+ """Get all output file/collection runs recursively."""
387
+ name = data._meta.model_name
388
+ all_runs: set[Run] = set()
389
+ run_inputs_outputs = []
390
+
391
+ if data.run is not None:
392
+ runs = {f.run for f in data.run.__getattribute__(f"output_{name}s").all()}
393
+ else:
394
+ runs = set()
395
+ if name == "artifact" and data.run is not None:
396
+ runs.update(
397
+ {
398
+ f.run
399
+ for f in data.run.output_collections.all()
400
+ .filter(visibility__in=[0, 1])
401
+ .all()
402
+ }
403
+ )
404
+ while runs.difference(all_runs):
405
+ all_runs.update(runs)
406
+ child_runs: set[Run] = set()
407
+ for r in runs:
408
+ inputs_run = (
409
+ r.__getattribute__(f"input_{name}s")
410
+ .all()
411
+ .filter(visibility__in=[0, 1])
412
+ .list()
413
+ )
414
+ if name == "artifact":
415
+ inputs_run += (
416
+ r.input_collections.all().filter(visibility__in=[0, 1]).list()
417
+ )
418
+ run_inputs_outputs += [(inputs_run, r)]
419
+
420
+ outputs_run = (
421
+ r.__getattribute__(f"output_{name}s")
422
+ .all()
423
+ .filter(visibility__in=[0, 1])
424
+ .list()
425
+ )
426
+ if name == "artifact":
427
+ outputs_run += (
428
+ r.output_collections.all().filter(visibility__in=[0, 1]).list()
429
+ )
430
+ run_inputs_outputs += [(r, outputs_run)]
431
+
432
+ child_runs.update(
433
+ Run.filter(
434
+ **{f"input_{name}s__uid__in": [i.uid for i in outputs_run]}
435
+ ).list()
436
+ )
437
+ # for artifacts, also include collections in the lineage
438
+ if name == "artifact":
439
+ child_runs.update(
440
+ Run.filter(
441
+ input_collections__uid__in=[i.uid for i in outputs_run]
442
+ ).list()
443
+ )
444
+ runs = child_runs
445
+ return run_inputs_outputs
446
+
447
+
448
+ def _df_edges_from_runs(df_values: list):
449
+ import pandas as pd
450
+
451
+ df = pd.DataFrame(df_values, columns=["source_record", "target_record"])
452
+ df = df.explode("source_record")
453
+ df = df.explode("target_record")
454
+ df = df.drop_duplicates().dropna()
455
+ df["source"] = [f"{i._meta.model_name}_{i.uid}" for i in df["source_record"]]
456
+ df["target"] = [f"{i._meta.model_name}_{i.uid}" for i in df["target_record"]]
457
+ df["source_label"] = df["source_record"].apply(_record_label)
458
+ df["target_label"] = df["target_record"].apply(_record_label)
459
+ return df
460
+
461
+
462
+ METHOD_NAMES = [
463
+ "view_parents",
464
+ ]
465
+
466
+ if ln_setup._TESTING: # type: ignore
467
+ from inspect import signature
468
+
469
+ SIGS = {
470
+ name: signature(getattr(HasParents, name))
471
+ for name in METHOD_NAMES
472
+ if not name.startswith("__")
473
+ }
474
+
475
+ for name in METHOD_NAMES:
476
+ attach_func_to_class_method(name, HasParents, globals())