offwork 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. offwork/__init__.py +167 -0
  2. offwork/__main__.py +770 -0
  3. offwork/_venv.py +174 -0
  4. offwork/core/__init__.py +15 -0
  5. offwork/core/errors.py +83 -0
  6. offwork/core/models.py +174 -0
  7. offwork/core/pairing.py +389 -0
  8. offwork/core/progress.py +91 -0
  9. offwork/core/signing.py +91 -0
  10. offwork/core/task.py +520 -0
  11. offwork/core/token.py +184 -0
  12. offwork/core/version.py +10 -0
  13. offwork/graph/__init__.py +5 -0
  14. offwork/graph/analyzer.py +637 -0
  15. offwork/graph/decorator.py +87 -0
  16. offwork/graph/graph.py +995 -0
  17. offwork/graph/store.py +500 -0
  18. offwork/graph/tracing.py +429 -0
  19. offwork/py.typed +0 -0
  20. offwork/typing.py +48 -0
  21. offwork/worker/__init__.py +18 -0
  22. offwork/worker/backends/__init__.py +3 -0
  23. offwork/worker/backends/base.py +149 -0
  24. offwork/worker/backends/http.py +237 -0
  25. offwork/worker/backends/local.py +452 -0
  26. offwork/worker/backends/rabbitmq.py +410 -0
  27. offwork/worker/backends/redis.py +175 -0
  28. offwork/worker/deps.py +365 -0
  29. offwork/worker/remote.py +793 -0
  30. offwork/worker/result.py +276 -0
  31. offwork/worker/sandbox/Dockerfile +24 -0
  32. offwork/worker/sandbox/__init__.py +18 -0
  33. offwork/worker/sandbox/_protocol.py +50 -0
  34. offwork/worker/sandbox/docker.py +438 -0
  35. offwork/worker/sandbox/guest_agent.py +622 -0
  36. offwork/worker/schedule.py +26 -0
  37. offwork/worker/worker.py +263 -0
  38. offwork-0.4.0.dist-info/METADATA +143 -0
  39. offwork-0.4.0.dist-info/RECORD +42 -0
  40. offwork-0.4.0.dist-info/WHEEL +4 -0
  41. offwork-0.4.0.dist-info/entry_points.txt +3 -0
  42. offwork-0.4.0.dist-info/licenses/LICENSE +661 -0
offwork/graph/graph.py ADDED
@@ -0,0 +1,995 @@
1
+ """Dependency graph: function registration, auto-discovery, and serialization."""
2
+
3
+ import ast
4
+ import sys
5
+ import base64
6
+ import pickle
7
+ import inspect
8
+ import logging
9
+ import warnings
10
+ import threading
11
+ import collections
12
+ import contextvars
13
+ from typing import Any, Self
14
+ from dataclasses import dataclass
15
+ from collections.abc import Callable
16
+
17
+ from offwork.core.errors import Error
18
+ from offwork.core.models import ImportInfo, FunctionNode
19
+ from offwork.graph.store import Store
20
+ from offwork.graph.tracing import _BUILTIN_NAMES, TracingMixin, _is_user_class, _is_user_function
21
+ from offwork.graph.analyzer import (
22
+ filter_imports,
23
+ get_used_names,
24
+ has_super_call,
25
+ find_bare_calls,
26
+ find_self_calls,
27
+ get_class_attrs,
28
+ get_module_imports,
29
+ get_function_source,
30
+ _resolve_owner_class,
31
+ get_module_assignments,
32
+ detect_traced_dependencies,
33
+ get_class_bases_from_source,
34
+ )
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ @dataclass
40
+ class _AnalysisResult:
41
+ """Result of static analysis of a function."""
42
+
43
+ source: str
44
+ imports: list[ImportInfo]
45
+ owner_class: str | None
46
+ module_vars: dict[str, str]
47
+
48
+
49
+ def _analyze_function(func: Callable[..., object]) -> _AnalysisResult:
50
+ """Extract source, imports, and module-level vars for a function.
51
+
52
+ Shared logic between :meth:`Graph.register` and
53
+ :meth:`Graph._auto_register`.
54
+ """
55
+ source = get_function_source(func)
56
+ all_imports = get_module_imports(func)
57
+ used_names = get_used_names(source)
58
+ owner_class = _resolve_owner_class(func.__qualname__)
59
+
60
+ try:
61
+ all_assignments = get_module_assignments(func)
62
+ except (OSError, TypeError):
63
+ all_assignments = {}
64
+ module_vars = {
65
+ name: src
66
+ for name, src in all_assignments.items()
67
+ if name in used_names
68
+ }
69
+ for var_src in module_vars.values():
70
+ used_names |= get_used_names(var_src)
71
+
72
+ imports = filter_imports(all_imports, used_names)
73
+ imports = [imp for imp in imports if imp.bound_name not in module_vars]
74
+
75
+ return _AnalysisResult(source, imports, owner_class, module_vars)
76
+
77
+
78
+ def _try_constructor_expr(value: object) -> str | None:
79
+ """Try to produce a valid Python expression for common stdlib types."""
80
+ if isinstance(value, collections.defaultdict):
81
+ factory = value.default_factory
82
+ if factory is None:
83
+ factory_repr = "None"
84
+ elif factory in (int, float, str, list, dict, set, tuple, bool, bytes):
85
+ factory_repr = factory.__name__
86
+ else:
87
+ return None
88
+ items_repr = repr(dict(value))
89
+ return f"__import__('collections').defaultdict({factory_repr}, {items_repr})"
90
+ if isinstance(value, collections.Counter):
91
+ return f"__import__('collections').Counter({repr(dict(value))})"
92
+ if isinstance(value, collections.deque):
93
+ if value.maxlen is not None:
94
+ return f"__import__('collections').deque({repr(list(value))}, maxlen={value.maxlen})"
95
+ return f"__import__('collections').deque({repr(list(value))})"
96
+ return None
97
+
98
+
99
+ def _try_pickle_fallback(value: object) -> str | None:
100
+ """Try to serialize a value via pickle+base64 into a self-contained expression."""
101
+ try:
102
+ pickled = pickle.dumps(value)
103
+ encoded = base64.b64encode(pickled).decode("ascii")
104
+ expr = f"__import__('pickle').loads(__import__('base64').b64decode('{encoded}'))"
105
+ ast.parse(expr, mode="eval")
106
+ return expr
107
+ except (pickle.PicklingError, TypeError, AttributeError, SyntaxError):
108
+ return None
109
+
110
+
111
+ def _try_get_lambda_source(func: Callable[..., object]) -> str | None:
112
+ """Try to extract the lambda expression source from a lambda function."""
113
+ try:
114
+ source = inspect.getsource(func).strip()
115
+ except (OSError, TypeError):
116
+ return None
117
+ if "lambda" not in source:
118
+ return None
119
+ try:
120
+ tree = ast.parse(source, mode="exec")
121
+ except SyntaxError:
122
+ return None
123
+ for node in ast.walk(tree):
124
+ if isinstance(node, ast.Lambda):
125
+ return ast.unparse(node)
126
+ return None
127
+
128
+
129
+ def _capture_closure(
130
+ func: Callable[..., object],
131
+ ) -> tuple[dict[str, str], dict[str, str], dict[str, Callable[..., object]], list[ImportInfo], dict[str, type]]:
132
+ """Extract closure variables and traced function references from *func*.
133
+
134
+ Returns ``(closure_vars, closure_func_refs, closure_func_objects, closure_module_imports, closure_classes)``
135
+ where *closure_vars* maps variable names to repr strings,
136
+ *closure_func_refs* maps variable names to qualified names,
137
+ *closure_func_objects* maps qualified names to the actual callable
138
+ objects (for auto-registration of non-traced functions),
139
+ *closure_module_imports* is a list of :class:`ImportInfo` for module
140
+ objects found in the closure (from inline imports), and
141
+ *closure_classes* maps variable names to user-defined class objects
142
+ that need auto-registration.
143
+ """
144
+ closure_vars: dict[str, str] = {}
145
+ closure_func_refs: dict[str, str] = {}
146
+ closure_func_objects: dict[str, Callable[..., object]] = {}
147
+ closure_module_imports: list[ImportInfo] = []
148
+ closure_classes: dict[str, type] = {}
149
+
150
+ if not func.__code__.co_freevars:
151
+ return closure_vars, closure_func_refs, closure_func_objects, closure_module_imports, closure_classes
152
+
153
+ try:
154
+ closure_info = inspect.getclosurevars(func)
155
+ except ValueError:
156
+ return closure_vars, closure_func_refs, closure_func_objects, closure_module_imports, closure_classes
157
+
158
+ for name, value in closure_info.nonlocals.items():
159
+ # Skip the implicit __class__ cell injected by Python for super() calls.
160
+ # Reconstructed code uses explicit super(ClassName, self) instead.
161
+ if name == "__class__" and inspect.isclass(value):
162
+ continue
163
+
164
+ try:
165
+ repr_value = repr(value)
166
+ except Exception:
167
+ warnings.warn(
168
+ f"Cannot repr closure variable '{name}' in "
169
+ f"'{func.__qualname__}'",
170
+ stacklevel=3,
171
+ )
172
+ continue
173
+
174
+ try:
175
+ ast.parse(repr_value, mode="eval")
176
+ closure_vars[name] = repr_value
177
+ continue
178
+ except SyntaxError:
179
+ pass
180
+
181
+ # Module objects from inline imports (e.g. `import time as _time`)
182
+ if inspect.ismodule(value):
183
+ mod_name = value.__name__
184
+ if name == mod_name or name == mod_name.split(".")[0]:
185
+ stmt = f"import {mod_name}"
186
+ else:
187
+ stmt = f"import {mod_name} as {name}"
188
+ closure_module_imports.append(ImportInfo(statement=stmt, bound_name=name))
189
+ logger.debug("Closure var '%s' is module %s", name, mod_name)
190
+ continue
191
+
192
+ # Callables: prefer source-level capture over serialization
193
+ if getattr(value, "__offwork_traced__", False):
194
+ unwrapped = value
195
+ while hasattr(unwrapped, "__wrapped__"):
196
+ unwrapped = unwrapped.__wrapped__
197
+ ref_qname = f"{unwrapped.__module__}.{unwrapped.__qualname__}"
198
+ closure_func_refs[name] = ref_qname
199
+ logger.debug("Closure var '%s' is traced function %s", name, ref_qname)
200
+ continue
201
+ if callable(value) and getattr(value, "__name__", "") == "<lambda>":
202
+ lambda_src = _try_get_lambda_source(value)
203
+ if lambda_src is not None:
204
+ closure_vars[name] = lambda_src
205
+ logger.debug("Closure var '%s' is lambda: %s", name, lambda_src)
206
+ continue
207
+ if callable(value) and _is_user_function(value):
208
+ ref_qname = f"{value.__module__}.{value.__qualname__}"
209
+ closure_func_refs[name] = ref_qname
210
+ closure_func_objects[ref_qname] = value
211
+ logger.debug("Closure var '%s' is untraced user function %s", name, ref_qname)
212
+ continue
213
+
214
+ # User-defined classes: auto-register all their methods
215
+ if inspect.isclass(value) and _is_user_class(value):
216
+ closure_classes[name] = value
217
+ logger.debug("Closure var '%s' is user class %s", name, value.__qualname__)
218
+ continue
219
+
220
+ # Non-callable fallbacks
221
+ ctor_expr = _try_constructor_expr(value)
222
+ if ctor_expr is not None:
223
+ closure_vars[name] = ctor_expr
224
+ logger.debug("Closure var '%s' captured via constructor expression", name)
225
+ continue
226
+
227
+ pickle_expr = _try_pickle_fallback(value)
228
+ if pickle_expr is not None:
229
+ closure_vars[name] = pickle_expr
230
+ logger.debug("Closure var '%s' captured via pickle fallback", name)
231
+ continue
232
+
233
+ warnings.warn(
234
+ f"Closure variable '{name}' in "
235
+ f"'{func.__qualname__}' (type: {type(value).__name__}) "
236
+ f"cannot be serialized: repr is not valid Python "
237
+ f"and not picklable",
238
+ stacklevel=3,
239
+ )
240
+
241
+ return closure_vars, closure_func_refs, closure_func_objects, closure_module_imports, closure_classes
242
+
243
+
244
+ def _mermaid_node_id(qname: str) -> str:
245
+ return qname.replace(".", "_")
246
+
247
+
248
+ def _render_mermaid(subgraph: dict[str, FunctionNode], direction: str) -> str:
249
+ """Render a node subgraph as a Mermaid flowchart string."""
250
+ lines: list[str] = [f"graph {direction}"]
251
+
252
+ class_members: dict[str, list[FunctionNode]] = {}
253
+ standalone: list[FunctionNode] = []
254
+ for node in subgraph.values():
255
+ if node.owner_class is not None:
256
+ class_members.setdefault(node.owner_class, []).append(node)
257
+ else:
258
+ standalone.append(node)
259
+
260
+ for owner_class, members in class_members.items():
261
+ class_name = owner_class.rsplit(".", 1)[-1]
262
+ lines.append(f" subgraph {class_name}")
263
+ for node in members:
264
+ nid = _mermaid_node_id(node.qualified_name)
265
+ lines.append(f' {nid}["{node.name}"]')
266
+ lines.append(" end")
267
+
268
+ for node in standalone:
269
+ nid = _mermaid_node_id(node.qualified_name)
270
+ lines.append(f' {nid}["{node.name}"]')
271
+
272
+ for node in subgraph.values():
273
+ src = _mermaid_node_id(node.qualified_name)
274
+ for dep in node.dependencies:
275
+ if dep in subgraph:
276
+ lines.append(f" {src} --> {_mermaid_node_id(dep)}")
277
+
278
+ return "\n".join(lines) + "\n"
279
+
280
+
281
+ class Graph(TracingMixin):
282
+ """Dependency graph of traced functions."""
283
+
284
+ _default: "Graph | None" = None
285
+
286
+ def __init__(self) -> None:
287
+ self._nodes: dict[str, FunctionNode] = {}
288
+ self._funcs: dict[str, Callable[..., object]] = {}
289
+ self._call_stack: contextvars.ContextVar[list[str]] = contextvars.ContextVar(
290
+ "offwork_call_stack"
291
+ )
292
+ self._runtime_deps: dict[str, set[str]] = {}
293
+ self._lock: threading.Lock = threading.Lock()
294
+ self._classes_in_progress: set[str] = set()
295
+ self._inclusion_deps: dict[str, set[str]] = {}
296
+
297
+ @classmethod
298
+ def default(cls) -> "Graph":
299
+ """Return the singleton default graph used by ``@trace``."""
300
+ if cls._default is None:
301
+ cls._default = Graph()
302
+ return cls._default
303
+
304
+ @classmethod
305
+ def reset_default(cls) -> None:
306
+ """Reset the default graph, clearing all registered functions."""
307
+ cls._default = None
308
+
309
+ @property
310
+ def nodes(self) -> dict[str, FunctionNode]:
311
+ """Snapshot of all registered function nodes, keyed by qualified name."""
312
+ return dict(self._nodes)
313
+
314
+ # -- Registration ----------------------------------------------------------
315
+
316
+ @staticmethod
317
+ def _unwrap_func(func: Callable[..., object]) -> Callable[..., object]:
318
+ original = func
319
+ while hasattr(original, "__wrapped__"):
320
+ original = original.__wrapped__
321
+ return original
322
+
323
+ def _build_dependencies(
324
+ self,
325
+ analysis: _AnalysisResult,
326
+ qualified_name: str,
327
+ module: str,
328
+ closure_func_refs: dict[str, str],
329
+ ) -> list[str]:
330
+ """Detect static and closure-based dependencies for a function."""
331
+ deps = [
332
+ dep for dep in detect_traced_dependencies(
333
+ analysis.source, module, self._nodes,
334
+ owner_class=analysis.owner_class,
335
+ )
336
+ if dep != qualified_name
337
+ ]
338
+ for ref_qname in closure_func_refs.values():
339
+ if ref_qname != qualified_name and ref_qname not in deps:
340
+ deps.append(ref_qname)
341
+ return deps
342
+
343
+ def register(self, func: Callable[..., object]) -> None:
344
+ """Register a function for tracing and remote execution."""
345
+ original = self._unwrap_func(func)
346
+ qualified_name = f"{original.__module__}.{original.__qualname__}"
347
+ logger.info("Registering %s", qualified_name)
348
+
349
+ try:
350
+ analysis = _analyze_function(original)
351
+ except (OSError, TypeError) as exc:
352
+ logger.info("Cannot register %s: source unavailable", qualified_name)
353
+ raise Error(
354
+ f"Cannot trace function '{original.__qualname__}': source code "
355
+ "unavailable. Functions must be defined in .py source files."
356
+ ) from exc
357
+
358
+ closure_vars, closure_func_refs, closure_func_objects, closure_module_imports, closure_classes = _capture_closure(original)
359
+
360
+ for ref_qname, func_obj in closure_func_objects.items():
361
+ if ref_qname not in self._nodes:
362
+ self._auto_register(func_obj)
363
+
364
+ for cls_obj in closure_classes.values():
365
+ self._auto_register_class(cls_obj)
366
+
367
+ # Add module imports from closures (inline imports like `import time as _time`)
368
+ existing = {imp.bound_name for imp in analysis.imports}
369
+ for imp in closure_module_imports:
370
+ if imp.bound_name not in existing:
371
+ analysis.imports.append(imp)
372
+ existing.add(imp.bound_name)
373
+
374
+ if closure_vars:
375
+ closure_names: set[str] = set()
376
+ for cv in closure_vars.values():
377
+ closure_names |= get_used_names(cv)
378
+ if closure_names:
379
+ all_imports = get_module_imports(original)
380
+ for imp in all_imports:
381
+ if imp.bound_name in closure_names and imp.bound_name not in existing:
382
+ analysis.imports.append(imp)
383
+ existing.add(imp.bound_name)
384
+
385
+ dependencies = self._build_dependencies(
386
+ analysis, qualified_name, original.__module__, closure_func_refs,
387
+ )
388
+
389
+ node = FunctionNode(
390
+ qualified_name=qualified_name,
391
+ name=original.__name__,
392
+ module=original.__module__,
393
+ source=analysis.source,
394
+ imports=analysis.imports,
395
+ dependencies=dependencies,
396
+ owner_class=analysis.owner_class,
397
+ closure_vars=closure_vars,
398
+ closure_func_refs=closure_func_refs,
399
+ module_vars=analysis.module_vars,
400
+ )
401
+ self._nodes[qualified_name] = node
402
+ self._funcs[qualified_name] = original
403
+
404
+ logger.debug(
405
+ "Registered %s: %d imports, %d deps, %d closure vars, "
406
+ "%d closure func refs",
407
+ qualified_name, len(analysis.imports), len(dependencies),
408
+ len(closure_vars), len(closure_func_refs),
409
+ )
410
+
411
+ self.refresh()
412
+
413
+ # -- Auto-discovery --------------------------------------------------------
414
+
415
+ def _auto_register(self, func: Callable[..., object]) -> bool:
416
+ """Auto-register an untraced function into the graph.
417
+
418
+ Returns False on failure and emits a warning so the user knows
419
+ a dependency could not be captured.
420
+ """
421
+ qualified_name = f"{func.__module__}.{func.__qualname__}"
422
+ if qualified_name in self._nodes:
423
+ return False
424
+ if not _is_user_function(func):
425
+ return False
426
+
427
+ try:
428
+ analysis = _analyze_function(func)
429
+ except (OSError, TypeError, SyntaxError):
430
+ warnings.warn(
431
+ f"Cannot auto-register dependency '{func.__qualname__}': "
432
+ "source code unavailable. The reconstructed code may be "
433
+ "incomplete.",
434
+ stacklevel=2,
435
+ )
436
+ return False
437
+
438
+ closure_vars, closure_func_refs, closure_func_objects, closure_module_imports, closure_classes = _capture_closure(func)
439
+
440
+ # Add module imports from closures (inline imports)
441
+ existing_names = {imp.bound_name for imp in analysis.imports}
442
+ for imp in closure_module_imports:
443
+ if imp.bound_name not in existing_names:
444
+ analysis.imports.append(imp)
445
+ existing_names.add(imp.bound_name)
446
+
447
+ # Add imports needed by closure var expressions
448
+ if closure_vars:
449
+ closure_names: set[str] = set()
450
+ for cv in closure_vars.values():
451
+ closure_names |= get_used_names(cv)
452
+ if closure_names:
453
+ try:
454
+ all_imports = get_module_imports(func)
455
+ for imp in all_imports:
456
+ if imp.bound_name in closure_names and imp.bound_name not in existing_names:
457
+ analysis.imports.append(imp)
458
+ existing_names.add(imp.bound_name)
459
+ except (OSError, TypeError):
460
+ pass
461
+
462
+ dependencies = [
463
+ dep for dep in detect_traced_dependencies(
464
+ analysis.source, func.__module__, self._nodes,
465
+ owner_class=analysis.owner_class,
466
+ )
467
+ if dep != qualified_name
468
+ ]
469
+ for ref_qname in closure_func_refs.values():
470
+ if ref_qname != qualified_name and ref_qname not in dependencies:
471
+ dependencies.append(ref_qname)
472
+
473
+ node = FunctionNode(
474
+ qualified_name=qualified_name,
475
+ name=func.__name__,
476
+ module=func.__module__,
477
+ source=analysis.source,
478
+ imports=analysis.imports,
479
+ dependencies=dependencies,
480
+ owner_class=analysis.owner_class,
481
+ closure_vars=closure_vars,
482
+ closure_func_refs=closure_func_refs,
483
+ module_vars=analysis.module_vars,
484
+ )
485
+ self._nodes[qualified_name] = node
486
+ self._funcs[qualified_name] = func
487
+ logger.info("Auto-registered untraced dependency %s", qualified_name)
488
+
489
+ # Auto-register closure function deps (after node is in self._nodes to prevent re-entry)
490
+ for ref_qname, func_obj in closure_func_objects.items():
491
+ if ref_qname not in self._nodes:
492
+ self._auto_register(func_obj)
493
+
494
+ for cls_obj in closure_classes.values():
495
+ self._auto_register_class(cls_obj)
496
+
497
+ self._discover_untraced_deps(func.__module__, node)
498
+ return True
499
+
500
+ def _auto_register_class(self, cls: type) -> None:
501
+ """Auto-register all user-defined methods of a class into the graph."""
502
+ class_name = cls.__name__
503
+ module_name = cls.__module__
504
+ cls_key = f"{module_name}.{cls.__qualname__}"
505
+ if cls_key in self._classes_in_progress:
506
+ return
507
+ self._classes_in_progress.add(cls_key)
508
+ try:
509
+ for attr_name, raw in cls.__dict__.items():
510
+ if isinstance(raw, (classmethod, staticmethod)):
511
+ func = raw.__func__
512
+ elif inspect.isfunction(raw):
513
+ func = raw
514
+ else:
515
+ continue
516
+ if not _is_user_function(func):
517
+ continue
518
+ qname = f"{module_name}.{class_name}.{attr_name}"
519
+ if qname in self._nodes:
520
+ continue
521
+ self._auto_register(func)
522
+
523
+ self._set_class_metadata(cls)
524
+ self._resolve_class_bases(cls)
525
+
526
+ # Subclass registry pattern: classes that hook ``__init_subclass__``
527
+ # populate registries from subclass definitions. The traced source
528
+ # may look subclasses up indirectly (e.g. by name); to make that
529
+ # work on the worker, pull every user-defined subclass into the
530
+ # graph so its definition fires the parent hook on reconstruct.
531
+ if "__init_subclass__" in cls.__dict__:
532
+ for sub in cls.__subclasses__():
533
+ if _is_user_class(sub):
534
+ self._auto_register_class(sub)
535
+ finally:
536
+ self._classes_in_progress.discard(cls_key)
537
+
538
+ def _set_class_metadata(self, cls: type) -> None:
539
+ """Capture class-level attributes and decorators onto method nodes."""
540
+ class_name = cls.__name__
541
+ module_name = cls.__module__
542
+
543
+ attrs, decorators = get_class_attrs(cls)
544
+ if not attrs and not decorators:
545
+ return
546
+
547
+ extra_names: set[str] = set()
548
+ for attr_src in attrs:
549
+ extra_names |= get_used_names(attr_src)
550
+ for deco_src in decorators:
551
+ extra_names |= get_used_names(deco_src)
552
+
553
+ # User classes referenced from class-body RHS (e.g. descriptors like
554
+ # ``field = Doubler()``) are not visible to bare-call discovery on
555
+ # function bodies; register them here so they survive reconstruction.
556
+ ref_method_qnames = self._register_class_attr_refs(cls, extra_names)
557
+
558
+ for node in self._nodes.values():
559
+ if node.owner_class == class_name and node.module == module_name:
560
+ node.class_attrs = attrs
561
+ node.class_decorators = decorators
562
+ for ref_qname in ref_method_qnames:
563
+ if ref_qname != node.qualified_name:
564
+ self._inclusion_deps.setdefault(node.qualified_name, set()).add(ref_qname)
565
+ if extra_names:
566
+ existing_names = {imp.bound_name for imp in node.imports}
567
+ try:
568
+ any_func = next(
569
+ f for f in self._funcs.values()
570
+ if f.__module__ == module_name
571
+ )
572
+ all_imports = get_module_imports(any_func)
573
+ for imp in all_imports:
574
+ if imp.bound_name in extra_names and imp.bound_name not in existing_names:
575
+ node.imports.append(imp)
576
+ existing_names.add(imp.bound_name)
577
+ except StopIteration:
578
+ pass
579
+
580
+ def _register_class_attr_refs(
581
+ self, cls: type, extra_names: set[str]
582
+ ) -> list[str]:
583
+ """Auto-register user classes referenced from the class body.
584
+
585
+ Returns the qualified names of one method per referenced class so
586
+ callers can wire dependency edges that keep them in the subgraph.
587
+ """
588
+ if not extra_names:
589
+ return []
590
+ module_obj = sys.modules.get(cls.__module__)
591
+ if module_obj is None:
592
+ return []
593
+
594
+ ref_method_qnames: list[str] = []
595
+ for name in extra_names:
596
+ if name in _BUILTIN_NAMES:
597
+ continue
598
+ obj = getattr(module_obj, name, None)
599
+ if obj is None or obj is cls:
600
+ continue
601
+ if not (inspect.isclass(obj) and _is_user_class(obj)):
602
+ continue
603
+ self._auto_register_class(obj)
604
+ for ref_node in self._nodes.values():
605
+ if (
606
+ ref_node.owner_class == obj.__name__
607
+ and ref_node.module == obj.__module__
608
+ ):
609
+ ref_method_qnames.append(ref_node.qualified_name)
610
+ return ref_method_qnames
611
+
612
+ def _resolve_class_bases(self, cls: type) -> None:
613
+ """Detect class bases and store them on method nodes.
614
+
615
+ Also auto-registers user-defined base classes and adds dependency
616
+ edges from child methods (that use ``super()``) to parent methods.
617
+ """
618
+ class_name = cls.__name__
619
+ module_name = cls.__module__
620
+
621
+ bases, keywords = get_class_bases_from_source(cls)
622
+ if not bases and not keywords:
623
+ return
624
+
625
+ keyword_names: set[str] = set()
626
+ for v in keywords.values():
627
+ keyword_names |= get_used_names(v)
628
+
629
+ for node in self._nodes.values():
630
+ if node.owner_class == class_name and node.module == module_name:
631
+ node.class_bases = bases
632
+ node.class_keywords = keywords
633
+ if keyword_names:
634
+ existing_names = {imp.bound_name for imp in node.imports}
635
+ try:
636
+ any_func = next(
637
+ f for f in self._funcs.values()
638
+ if f.__module__ == module_name
639
+ )
640
+ all_imports = get_module_imports(any_func)
641
+ for imp in all_imports:
642
+ if imp.bound_name in keyword_names and imp.bound_name not in existing_names:
643
+ node.imports.append(imp)
644
+ except StopIteration:
645
+ pass
646
+
647
+ for base_cls in cls.__mro__[1:]:
648
+ if base_cls is object:
649
+ continue
650
+ if _is_user_class(base_cls):
651
+ self._auto_register_class(base_cls)
652
+
653
+ # Add ordering edges from child methods to every parent method per
654
+ # direct user base, so the topological reconstruction emits parents
655
+ # first when the subclass is included via the registry pattern
656
+ # (without relying on ``super()`` being present in the subclass body).
657
+ for base_cls in cls.__bases__:
658
+ if base_cls is object or not _is_user_class(base_cls):
659
+ continue
660
+ parent_method_qnames = [
661
+ parent_node.qualified_name
662
+ for parent_node in self._nodes.values()
663
+ if (
664
+ parent_node.owner_class == base_cls.__name__
665
+ and parent_node.module == base_cls.__module__
666
+ )
667
+ ]
668
+ if not parent_method_qnames:
669
+ continue
670
+ for child_node in self._nodes.values():
671
+ if (
672
+ child_node.owner_class != class_name
673
+ or child_node.module != module_name
674
+ ):
675
+ continue
676
+ bucket = self._inclusion_deps.setdefault(
677
+ child_node.qualified_name, set()
678
+ )
679
+ for parent_qname in parent_method_qnames:
680
+ if parent_qname != child_node.qualified_name:
681
+ bucket.add(parent_qname)
682
+
683
+ def _discover_untraced_deps(
684
+ self, module_name: str, node: FunctionNode
685
+ ) -> None:
686
+ """Find and auto-register untraced dependencies of a node."""
687
+ module_obj = sys.modules.get(module_name)
688
+ if module_obj is None:
689
+ warnings.warn(
690
+ f"Cannot auto-discover dependencies for "
691
+ f"'{node.qualified_name}': module '{module_name}' not found "
692
+ "in sys.modules.",
693
+ stacklevel=2,
694
+ )
695
+ return
696
+
697
+ self._discover_bare_call_deps(node, module_obj)
698
+ if node.owner_class:
699
+ self._discover_self_call_deps(node, module_obj, module_name)
700
+ self._discover_init_subclass_deps(node, module_obj)
701
+
702
+ def _discover_bare_call_deps(
703
+ self,
704
+ node: FunctionNode,
705
+ module_obj: object,
706
+ ) -> None:
707
+ """Discover and auto-register bare function/class call dependencies."""
708
+ bare_calls = find_bare_calls(node.source)
709
+ imports_to_remove: list[ImportInfo] = []
710
+
711
+ for name in bare_calls:
712
+ if name in _BUILTIN_NAMES:
713
+ continue
714
+ obj = getattr(module_obj, name, None)
715
+ if obj is None:
716
+ continue
717
+ if inspect.isclass(obj) and _is_user_class(obj):
718
+ self._auto_register_class(obj)
719
+ if obj.__module__ != node.module:
720
+ imports_to_remove.extend(
721
+ imp for imp in node.imports if imp.bound_name == name
722
+ )
723
+ continue
724
+ if not inspect.isfunction(obj):
725
+ continue
726
+ if obj.__name__ != name:
727
+ continue # Skip aliased imports to avoid name mismatch
728
+ self._auto_register(obj)
729
+ qualified = f"{obj.__module__}.{obj.__qualname__}"
730
+ if qualified in self._nodes and obj.__module__ != node.module:
731
+ imports_to_remove.extend(
732
+ imp for imp in node.imports if imp.bound_name == name
733
+ )
734
+
735
+ if imports_to_remove:
736
+ node.imports = [imp for imp in node.imports if imp not in imports_to_remove]
737
+
738
+ def _discover_self_call_deps(
739
+ self,
740
+ node: FunctionNode,
741
+ module_obj: object,
742
+ module_name: str,
743
+ ) -> None:
744
+ """Discover and auto-register self.method() / cls.method() dependencies."""
745
+ assert node.owner_class is not None
746
+ class_simple = node.owner_class.rsplit(".", 1)[-1]
747
+ cls_obj = getattr(module_obj, class_simple, None)
748
+ if cls_obj is None:
749
+ return
750
+
751
+ for method_name in find_self_calls(node.source):
752
+ method_qname = f"{module_name}.{class_simple}.{method_name}"
753
+ if method_qname in self._nodes:
754
+ continue
755
+ raw = cls_obj.__dict__.get(method_name)
756
+ if raw is not None and isinstance(raw, (classmethod, staticmethod)):
757
+ self._auto_register(raw.__func__)
758
+ else:
759
+ method_obj = getattr(cls_obj, method_name, None)
760
+ if method_obj is not None and inspect.isfunction(method_obj):
761
+ self._auto_register(method_obj)
762
+
763
+ if inspect.isclass(cls_obj):
764
+ self._set_class_metadata(cls_obj)
765
+ self._resolve_class_bases(cls_obj)
766
+
767
+ def _discover_init_subclass_deps(
768
+ self,
769
+ node: FunctionNode,
770
+ module_obj: object,
771
+ ) -> None:
772
+ """Pull subclasses of registry-style parents into the caller's deps.
773
+
774
+ When the traced source references a class with a user-defined
775
+ ``__init_subclass__``, its subclasses participate by being defined --
776
+ not by being named in the source. Add inclusion edges from this
777
+ node to one method of each user subclass so the subgraph keeps them.
778
+ """
779
+ for name in find_bare_calls(node.source) | get_used_names(node.source):
780
+ if name in _BUILTIN_NAMES:
781
+ continue
782
+ obj = getattr(module_obj, name, None)
783
+ if obj is None or not inspect.isclass(obj):
784
+ continue
785
+ if "__init_subclass__" not in obj.__dict__:
786
+ continue
787
+ # Skip when this node is itself a method of obj or an ancestor;
788
+ # adding child-class edges from a parent method causes cycles
789
+ # via the parent ordering edges added in ``_resolve_class_bases``.
790
+ if node.owner_class is not None:
791
+ node_cls = getattr(module_obj, node.owner_class.rsplit(".", 1)[-1], None)
792
+ if (
793
+ inspect.isclass(node_cls)
794
+ and node_cls is not None
795
+ and (node_cls is obj or issubclass(obj, node_cls))
796
+ ):
797
+ continue
798
+ for sub in obj.__subclasses__():
799
+ if not _is_user_class(sub):
800
+ continue
801
+ self._auto_register_class(sub)
802
+ for sub_node in self._nodes.values():
803
+ if (
804
+ sub_node.owner_class == sub.__name__
805
+ and sub_node.module == sub.__module__
806
+ and sub_node.qualified_name != node.qualified_name
807
+ ):
808
+ self._inclusion_deps.setdefault(
809
+ node.qualified_name, set()
810
+ ).add(sub_node.qualified_name)
811
+
812
+ # -- Refresh & dependency merging ------------------------------------------
813
+
814
+ def refresh(self) -> None:
815
+ """Re-analyze all registered functions to update dependencies."""
816
+ for node in list(self._nodes.values()):
817
+ self._discover_untraced_deps(node.module, node)
818
+
819
+ for qname, node in self._nodes.items():
820
+ deps = [
821
+ dep for dep in detect_traced_dependencies(
822
+ node.source, node.module, self._nodes,
823
+ owner_class=node.owner_class,
824
+ )
825
+ if dep != qname
826
+ ]
827
+ for ref_qname in node.closure_func_refs.values():
828
+ if ref_qname != qname and ref_qname not in deps:
829
+ deps.append(ref_qname)
830
+ for incl_qname in self._inclusion_deps.get(qname, set()):
831
+ if incl_qname in self._nodes and incl_qname not in deps:
832
+ deps.append(incl_qname)
833
+ node.dependencies = deps
834
+
835
+ def _add_super_deps(self) -> None:
836
+ """Add dependency edges from methods using super() to parent class methods."""
837
+ for qname, node in self._nodes.items():
838
+ if not node.owner_class or not node.class_bases:
839
+ continue
840
+ if not has_super_call(node.source):
841
+ continue
842
+ for parent_qname, parent_node in self._nodes.items():
843
+ if parent_node.owner_class is None:
844
+ continue
845
+ if parent_node.owner_class in node.class_bases and parent_qname not in node.dependencies:
846
+ node.dependencies.append(parent_qname)
847
+ logger.debug("Super dep: %s -> %s", qname, parent_qname)
848
+
849
+ def _merge_runtime_deps(self) -> None:
850
+ """Merge runtime-discovered dependencies into node dependency lists."""
851
+ with self._lock:
852
+ pending = dict(self._runtime_deps)
853
+ added = 0
854
+ for caller_qname, callees in pending.items():
855
+ node = self._nodes.get(caller_qname)
856
+ if node is None:
857
+ continue
858
+ existing = set(node.dependencies)
859
+ new_edges = {c for c in callees if c in self._nodes} - existing
860
+ if not new_edges:
861
+ continue
862
+ node.dependencies = sorted(existing | new_edges)
863
+ added += len(new_edges)
864
+ for dep in sorted(new_edges):
865
+ logger.debug("Runtime dep: %s -> %s", caller_qname, dep)
866
+ if added:
867
+ logger.info("Merged %d runtime dependency edges", added)
868
+
869
+ # -- Serialization ---------------------------------------------------------
870
+
871
+ def _resolve_name(self, name: str | Callable[..., object]) -> str:
872
+ if callable(name) and not isinstance(name, str):
873
+ unwrapped = inspect.unwrap(name)
874
+ return f"{unwrapped.__module__}.{unwrapped.__qualname__}"
875
+ for qname, node in self._nodes.items():
876
+ if qname == name or node.name == name:
877
+ return qname
878
+ raise KeyError(f"Function '{name}' not found in graph")
879
+
880
+ def _collect_subgraph(self, root_names: list[str]) -> dict[str, FunctionNode]:
881
+ collected: dict[str, FunctionNode] = {}
882
+ stack = list(root_names)
883
+ while stack:
884
+ qname = stack.pop()
885
+ if qname in collected:
886
+ continue
887
+ node = self._nodes[qname]
888
+ collected[qname] = node
889
+ stack.extend(node.dependencies)
890
+ return collected
891
+
892
+ def to_store(self, *funcs: Callable[..., object] | str) -> Store:
893
+ """Build a :class:`Store` from this graph.
894
+
895
+ Args:
896
+ *funcs: If given, only include these functions and their
897
+ transitive dependencies. Otherwise the full graph.
898
+ """
899
+ self.refresh()
900
+ self._add_super_deps()
901
+ self._merge_runtime_deps()
902
+
903
+ if funcs:
904
+ root_names = [self._resolve_name(f) for f in funcs]
905
+ subgraph = self._collect_subgraph(root_names)
906
+ logger.info(
907
+ "Serializing subgraph: %d/%d nodes",
908
+ len(subgraph), len(self._nodes),
909
+ )
910
+ else:
911
+ subgraph = dict(self._nodes)
912
+ logger.info("Serializing full graph: %d nodes", len(subgraph))
913
+
914
+ store = Store()
915
+ qname_to_hash: dict[str, str] = {}
916
+
917
+ for qname, node in subgraph.items():
918
+ content_hash = store.put(node)
919
+ qname_to_hash[qname] = content_hash
920
+ store.set_ref(qname, content_hash)
921
+
922
+ for qname, node in subgraph.items():
923
+ dep_hashes = [
924
+ qname_to_hash[dep]
925
+ for dep in node.dependencies
926
+ if dep in qname_to_hash
927
+ ]
928
+ store.set_deps(qname_to_hash[qname], dep_hashes)
929
+
930
+ return store
931
+
932
+ def serialize(self, *funcs: Callable[..., object] | str) -> str:
933
+ """Serialize the graph (or a subgraph) to a JSON string."""
934
+ return self.to_store(*funcs).to_json()
935
+
936
+ @classmethod
937
+ def deserialize_graph(cls, json_str: str) -> Self:
938
+ """Reconstruct a Graph from a serialized JSON string."""
939
+ store = Store.from_json(json_str)
940
+ graph = cls()
941
+ hash_to_qname = {h: qn for qn, h in store.refs.items()}
942
+
943
+ for content_hash, qname in hash_to_qname.items():
944
+ blob = store.get(content_hash)
945
+ if blob is None:
946
+ continue
947
+ dep_qnames = [
948
+ hash_to_qname.get(dep, dep) for dep in store.get_deps(content_hash)
949
+ ]
950
+ closure_func_refs = {
951
+ var: hash_to_qname.get(ref_h, ref_h)
952
+ for var, ref_h in blob.get("closure_func_refs", {}).items()
953
+ }
954
+ node = FunctionNode(
955
+ qualified_name=qname,
956
+ name=blob["name"],
957
+ module=blob["module"],
958
+ source=blob["source"],
959
+ imports=[ImportInfo.from_dict(imp) for imp in blob["imports"]],
960
+ dependencies=dep_qnames,
961
+ owner_class=blob.get("owner_class"),
962
+ closure_vars=blob.get("closure_vars", {}),
963
+ closure_func_refs=closure_func_refs,
964
+ module_vars=blob.get("module_vars", {}),
965
+ class_bases=blob.get("class_bases", []),
966
+ class_keywords=blob.get("class_keywords", {}),
967
+ class_attrs=blob.get("class_attrs", []),
968
+ class_decorators=blob.get("class_decorators", []),
969
+ )
970
+ graph._nodes[node.qualified_name] = node
971
+ return graph
972
+
973
+ @staticmethod
974
+ def reconstruct(json_str: str, function_name: str) -> str:
975
+ """Reconstruct executable Python source from serialized JSON."""
976
+ store = Store.from_json(json_str)
977
+ return store.reconstruct(function_name)
978
+
979
+ # -- Visualization ---------------------------------------------------------
980
+
981
+ def to_mermaid(
982
+ self,
983
+ *funcs: Callable[..., object] | str,
984
+ direction: str = "TD",
985
+ ) -> str:
986
+ """Render the dependency graph as a Mermaid flowchart."""
987
+ self._merge_runtime_deps()
988
+
989
+ if funcs:
990
+ root_names = [self._resolve_name(f) for f in funcs]
991
+ subgraph = self._collect_subgraph(root_names)
992
+ else:
993
+ subgraph = dict(self._nodes)
994
+
995
+ return _render_mermaid(subgraph, direction)