query-cascade 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- query_cascade-0.1.1/PKG-INFO +153 -0
- query_cascade-0.1.1/README.md +146 -0
- query_cascade-0.1.1/pyproject.toml +25 -0
- query_cascade-0.1.1/setup.cfg +4 -0
- query_cascade-0.1.1/src/cascade/__init__.py +19 -0
- query_cascade-0.1.1/src/cascade/engine.py +699 -0
- query_cascade-0.1.1/src/query_cascade.egg-info/PKG-INFO +153 -0
- query_cascade-0.1.1/src/query_cascade.egg-info/SOURCES.txt +9 -0
- query_cascade-0.1.1/src/query_cascade.egg-info/dependency_links.txt +1 -0
- query_cascade-0.1.1/src/query_cascade.egg-info/top_level.txt +1 -0
- query_cascade-0.1.1/tests/test_engine.py +647 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: query-cascade
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Minimal demand-driven query framework for incremental computation.
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
|
|
8
|
+
# Cascade Query
|
|
9
|
+
|
|
10
|
+
`cascade-query` is a minimal, demand-driven incremental computation framework for Python.
|
|
11
|
+
|
|
12
|
+
It is designed for compiler-like workloads where you want:
|
|
13
|
+
|
|
14
|
+
- lazy pull-based evaluation
|
|
15
|
+
- precise dependency tracking
|
|
16
|
+
- red-green early bailout (backdating)
|
|
17
|
+
- query dedup across concurrent callers
|
|
18
|
+
- snapshot isolation for concurrent reads
|
|
19
|
+
- safe cancellation of obsolete background work
|
|
20
|
+
- side-effect replay on cache hits
|
|
21
|
+
- persistence + graph inspection
|
|
22
|
+
|
|
23
|
+
## Minimal API
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
from cascade import Engine
|
|
27
|
+
|
|
28
|
+
engine = Engine()
|
|
29
|
+
warnings = engine.accumulator("warnings")
|
|
30
|
+
|
|
31
|
+
@engine.input
|
|
32
|
+
def source(file_id: str) -> str:
|
|
33
|
+
return ""
|
|
34
|
+
|
|
35
|
+
@engine.query
|
|
36
|
+
def parse(file_id: str) -> tuple[str, ...]:
|
|
37
|
+
return tuple(line.strip() for line in source(file_id).splitlines() if line.strip())
|
|
38
|
+
|
|
39
|
+
@engine.query
|
|
40
|
+
def symbols(file_id: str) -> tuple[str, ...]:
|
|
41
|
+
return tuple(row.split("=")[0].strip() for row in parse(file_id))
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Primitives
|
|
45
|
+
|
|
46
|
+
- `engine.input(fn)`
|
|
47
|
+
Wraps mutable roots. Use `.set(...)` to create new revisions.
|
|
48
|
+
- `engine.query(fn)`
|
|
49
|
+
Wraps pure demand-driven queries with memoization and dependency capture.
|
|
50
|
+
- `engine.accumulator(name)`
|
|
51
|
+
Creates thread-safe side-effect channels replayed on cache hits.
|
|
52
|
+
- `engine.snapshot()`
|
|
53
|
+
Captures an immutable read view (`Snapshot`) for MVCC-like isolation.
|
|
54
|
+
- `engine.submit(query, *args, snapshot=...)`
|
|
55
|
+
Runs a query in the background with cancellation if inputs mutate.
|
|
56
|
+
- `engine.compute_many([(query, args), ...], workers=N)`
|
|
57
|
+
Multi-threaded execution with a work-stealing scheduler.
|
|
58
|
+
- `engine.inspect_graph()` / `engine.traces()`
|
|
59
|
+
Introspection hooks for diagnostics.
|
|
60
|
+
- `engine.save(path)` / `engine.load(path)`
|
|
61
|
+
Persist or recover graph/cached state from SQLite.
|
|
62
|
+
- `engine.prune(roots)`
|
|
63
|
+
Garbage-collect memoized subgraphs not reachable from roots.
|
|
64
|
+
|
|
65
|
+
## Design Notes
|
|
66
|
+
|
|
67
|
+
### What this framework guarantees
|
|
68
|
+
|
|
69
|
+
- **Smart recalculation**: only stale demand paths recompute.
|
|
70
|
+
- **Selective updates**: unchanged parents remain green after child backdating.
|
|
71
|
+
- **Query deduplication**: one in-flight compute serves all identical concurrent requests.
|
|
72
|
+
- **Cycle detection**: recursive query cycles raise `CycleError`.
|
|
73
|
+
- **Cancellation**: stale background queries raise `QueryCancelled`.
|
|
74
|
+
|
|
75
|
+
### Limitations and enforceability boundaries
|
|
76
|
+
|
|
77
|
+
- **CPython GIL and CPU-bound work**: `compute_many` and dedup execution are multi-threaded, but true CPU parallel speedup is only guaranteed when query bodies release the GIL (e.g. I/O waits, native extensions). For pure Python CPU-bound loops, overlap exists but throughput may remain effectively single-core.
|
|
78
|
+
- **Process-level durability model**: persistence is an explicit point-in-time snapshot (`save`/`load`), not a transactional WAL-backed MVCC store shared by multiple live processes.
|
|
79
|
+
- **Boundary of side-effect replay guarantees**: replay is guaranteed only for effects emitted through `Accumulator`; out-of-band side effects in query bodies (printing, network calls, filesystem writes) are intentionally not replayed.
|
|
80
|
+
- **Cycle handling scope**: direct and long-chain dynamic query cycles are detected and raised as `CycleError`; this engine does not implement fixed-point solvers for cyclic dataflow.
|
|
81
|
+
|
|
82
|
+
### What this framework intentionally does not include
|
|
83
|
+
|
|
84
|
+
To keep API surface minimal, this version does not include:
|
|
85
|
+
|
|
86
|
+
- nominal interning APIs (`@interned`) and tracked structs (`@tracked`)
|
|
87
|
+
- fixed-point cycle solvers
|
|
88
|
+
- distributed/shared cache protocols
|
|
89
|
+
|
|
90
|
+
Those can be layered on top without changing the core query model.
|
|
91
|
+
|
|
92
|
+
## Quickstart
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from cascade import Engine
|
|
96
|
+
|
|
97
|
+
engine = Engine()
|
|
98
|
+
|
|
99
|
+
@engine.input
|
|
100
|
+
def text() -> str:
|
|
101
|
+
return ""
|
|
102
|
+
|
|
103
|
+
@engine.query
|
|
104
|
+
def lint_count() -> int:
|
|
105
|
+
value = text()
|
|
106
|
+
return value.count("TODO")
|
|
107
|
+
|
|
108
|
+
text.set("TODO: one\nTODO: two")
|
|
109
|
+
assert lint_count() == 2
|
|
110
|
+
|
|
111
|
+
# No recompute needed if input did not semantically change.
|
|
112
|
+
text.set("TODO: one\nTODO: two")
|
|
113
|
+
assert lint_count() == 2
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Examples
|
|
117
|
+
|
|
118
|
+
- `examples/compiler_pipeline.py`
|
|
119
|
+
Tiny compiler pipeline (`source -> parse -> symbol_names -> typecheck`) with warnings accumulator.
|
|
120
|
+
- `examples/dynamic_macro_expansion.py`
|
|
121
|
+
Runtime macro-expansion query that dynamically changes downstream graph dependencies.
|
|
122
|
+
|
|
123
|
+
## Persistence and inspection
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
engine.save("state.db")
|
|
127
|
+
engine.load("state.db")
|
|
128
|
+
print(engine.inspect_graph())
|
|
129
|
+
for event in engine.traces():
|
|
130
|
+
print(event.event, event.key, event.detail)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## Analysis review of the uploaded notes
|
|
134
|
+
|
|
135
|
+
Your friend’s notes are largely directionally correct and align with state-of-the-art incremental systems:
|
|
136
|
+
|
|
137
|
+
- Correct: pull-based demand, red-green early bailout, dependency graph capture, dedup, MVCC snapshots, cancellation, side-effect replay, tracing, and persistence.
|
|
138
|
+
- Needs qualification in Python: true CPU-bound parallelism is constrained by the GIL unless query bodies release it (I/O/native extensions).
|
|
139
|
+
- Overreach for this minimal implementation: unsafe-pointer lifetime tricks, red/green syntax tree internals, and fixed-point cycle solving are advanced optimizations that are not required for a practical minimal API.
|
|
140
|
+
|
|
141
|
+
## Running tests
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
python -m pip install -e . pytest
|
|
145
|
+
pytest -q
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## CI best practices included
|
|
149
|
+
|
|
150
|
+
- GitHub Actions workflow at `.github/workflows/ci.yml`.
|
|
151
|
+
- Runs on both pushes and pull requests.
|
|
152
|
+
- Linting with `ruff` before tests.
|
|
153
|
+
- Separate package-build job (`python -m build`) to catch packaging regressions early.
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# Cascade Query
|
|
2
|
+
|
|
3
|
+
`cascade-query` is a minimal, demand-driven incremental computation framework for Python.
|
|
4
|
+
|
|
5
|
+
It is designed for compiler-like workloads where you want:
|
|
6
|
+
|
|
7
|
+
- lazy pull-based evaluation
|
|
8
|
+
- precise dependency tracking
|
|
9
|
+
- red-green early bailout (backdating)
|
|
10
|
+
- query dedup across concurrent callers
|
|
11
|
+
- snapshot isolation for concurrent reads
|
|
12
|
+
- safe cancellation of obsolete background work
|
|
13
|
+
- side-effect replay on cache hits
|
|
14
|
+
- persistence + graph inspection
|
|
15
|
+
|
|
16
|
+
## Minimal API
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from cascade import Engine
|
|
20
|
+
|
|
21
|
+
engine = Engine()
|
|
22
|
+
warnings = engine.accumulator("warnings")
|
|
23
|
+
|
|
24
|
+
@engine.input
|
|
25
|
+
def source(file_id: str) -> str:
|
|
26
|
+
return ""
|
|
27
|
+
|
|
28
|
+
@engine.query
|
|
29
|
+
def parse(file_id: str) -> tuple[str, ...]:
|
|
30
|
+
return tuple(line.strip() for line in source(file_id).splitlines() if line.strip())
|
|
31
|
+
|
|
32
|
+
@engine.query
|
|
33
|
+
def symbols(file_id: str) -> tuple[str, ...]:
|
|
34
|
+
return tuple(row.split("=")[0].strip() for row in parse(file_id))
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Primitives
|
|
38
|
+
|
|
39
|
+
- `engine.input(fn)`
|
|
40
|
+
Wraps mutable roots. Use `.set(...)` to create new revisions.
|
|
41
|
+
- `engine.query(fn)`
|
|
42
|
+
Wraps pure demand-driven queries with memoization and dependency capture.
|
|
43
|
+
- `engine.accumulator(name)`
|
|
44
|
+
Creates thread-safe side-effect channels replayed on cache hits.
|
|
45
|
+
- `engine.snapshot()`
|
|
46
|
+
Captures an immutable read view (`Snapshot`) for MVCC-like isolation.
|
|
47
|
+
- `engine.submit(query, *args, snapshot=...)`
|
|
48
|
+
Runs a query in the background with cancellation if inputs mutate.
|
|
49
|
+
- `engine.compute_many([(query, args), ...], workers=N)`
|
|
50
|
+
Multi-threaded execution with a work-stealing scheduler.
|
|
51
|
+
- `engine.inspect_graph()` / `engine.traces()`
|
|
52
|
+
Introspection hooks for diagnostics.
|
|
53
|
+
- `engine.save(path)` / `engine.load(path)`
|
|
54
|
+
Persist or recover graph/cached state from SQLite.
|
|
55
|
+
- `engine.prune(roots)`
|
|
56
|
+
Garbage-collect memoized subgraphs not reachable from roots.
|
|
57
|
+
|
|
58
|
+
## Design Notes
|
|
59
|
+
|
|
60
|
+
### What this framework guarantees
|
|
61
|
+
|
|
62
|
+
- **Smart recalculation**: only stale demand paths recompute.
|
|
63
|
+
- **Selective updates**: unchanged parents remain green after child backdating.
|
|
64
|
+
- **Query deduplication**: one in-flight compute serves all identical concurrent requests.
|
|
65
|
+
- **Cycle detection**: recursive query cycles raise `CycleError`.
|
|
66
|
+
- **Cancellation**: stale background queries raise `QueryCancelled`.
|
|
67
|
+
|
|
68
|
+
### Limitations and enforceability boundaries
|
|
69
|
+
|
|
70
|
+
- **CPython GIL and CPU-bound work**: `compute_many` and dedup execution are multi-threaded, but true CPU parallel speedup is only guaranteed when query bodies release the GIL (e.g. I/O waits, native extensions). For pure Python CPU-bound loops, overlap exists but throughput may remain effectively single-core.
|
|
71
|
+
- **Process-level durability model**: persistence is an explicit point-in-time snapshot (`save`/`load`), not a transactional WAL-backed MVCC store shared by multiple live processes.
|
|
72
|
+
- **Boundary of side-effect replay guarantees**: replay is guaranteed only for effects emitted through `Accumulator`; out-of-band side effects in query bodies (printing, network calls, filesystem writes) are intentionally not replayed.
|
|
73
|
+
- **Cycle handling scope**: direct and long-chain dynamic query cycles are detected and raised as `CycleError`; this engine does not implement fixed-point solvers for cyclic dataflow.
|
|
74
|
+
|
|
75
|
+
### What this framework intentionally does not include
|
|
76
|
+
|
|
77
|
+
To keep API surface minimal, this version does not include:
|
|
78
|
+
|
|
79
|
+
- nominal interning APIs (`@interned`) and tracked structs (`@tracked`)
|
|
80
|
+
- fixed-point cycle solvers
|
|
81
|
+
- distributed/shared cache protocols
|
|
82
|
+
|
|
83
|
+
Those can be layered on top without changing the core query model.
|
|
84
|
+
|
|
85
|
+
## Quickstart
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from cascade import Engine
|
|
89
|
+
|
|
90
|
+
engine = Engine()
|
|
91
|
+
|
|
92
|
+
@engine.input
|
|
93
|
+
def text() -> str:
|
|
94
|
+
return ""
|
|
95
|
+
|
|
96
|
+
@engine.query
|
|
97
|
+
def lint_count() -> int:
|
|
98
|
+
value = text()
|
|
99
|
+
return value.count("TODO")
|
|
100
|
+
|
|
101
|
+
text.set("TODO: one\nTODO: two")
|
|
102
|
+
assert lint_count() == 2
|
|
103
|
+
|
|
104
|
+
# No recompute needed if input did not semantically change.
|
|
105
|
+
text.set("TODO: one\nTODO: two")
|
|
106
|
+
assert lint_count() == 2
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Examples
|
|
110
|
+
|
|
111
|
+
- `examples/compiler_pipeline.py`
|
|
112
|
+
Tiny compiler pipeline (`source -> parse -> symbol_names -> typecheck`) with warnings accumulator.
|
|
113
|
+
- `examples/dynamic_macro_expansion.py`
|
|
114
|
+
Runtime macro-expansion query that dynamically changes downstream graph dependencies.
|
|
115
|
+
|
|
116
|
+
## Persistence and inspection
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
engine.save("state.db")
|
|
120
|
+
engine.load("state.db")
|
|
121
|
+
print(engine.inspect_graph())
|
|
122
|
+
for event in engine.traces():
|
|
123
|
+
print(event.event, event.key, event.detail)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Analysis review of the uploaded notes
|
|
127
|
+
|
|
128
|
+
Your friend’s notes are largely directionally correct and align with state-of-the-art incremental systems:
|
|
129
|
+
|
|
130
|
+
- Correct: pull-based demand, red-green early bailout, dependency graph capture, dedup, MVCC snapshots, cancellation, side-effect replay, tracing, and persistence.
|
|
131
|
+
- Needs qualification in Python: true CPU-bound parallelism is constrained by the GIL unless query bodies release it (I/O/native extensions).
|
|
132
|
+
- Overreach for this minimal implementation: unsafe-pointer lifetime tricks, red/green syntax tree internals, and fixed-point cycle solving are advanced optimizations that are not required for a practical minimal API.
|
|
133
|
+
|
|
134
|
+
## Running tests
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
python -m pip install -e . pytest
|
|
138
|
+
pytest -q
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## CI best practices included
|
|
142
|
+
|
|
143
|
+
- GitHub Actions workflow at `.github/workflows/ci.yml`.
|
|
144
|
+
- Runs on both pushes and pull requests.
|
|
145
|
+
- Linting with `ruff` before tests.
|
|
146
|
+
- Separate package-build job (`python -m build`) to catch packaging regressions early.
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "query-cascade"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "Minimal demand-driven query framework for incremental computation."
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
|
|
12
|
+
[tool.setuptools.packages.find]
|
|
13
|
+
where = ["src"]
|
|
14
|
+
|
|
15
|
+
[tool.pytest.ini_options]
|
|
16
|
+
pythonpath = ["src"]
|
|
17
|
+
testpaths = ["tests"]
|
|
18
|
+
|
|
19
|
+
[tool.coverage.run]
|
|
20
|
+
branch = true
|
|
21
|
+
source = ["cascade"]
|
|
22
|
+
|
|
23
|
+
[tool.coverage.report]
|
|
24
|
+
skip_covered = false
|
|
25
|
+
show_missing = true
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .engine import (
|
|
2
|
+
Accumulator,
|
|
3
|
+
CancellationError,
|
|
4
|
+
CycleError,
|
|
5
|
+
Engine,
|
|
6
|
+
QueryCancelled,
|
|
7
|
+
Snapshot,
|
|
8
|
+
TraceEvent,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"Accumulator",
|
|
13
|
+
"CancellationError",
|
|
14
|
+
"CycleError",
|
|
15
|
+
"Engine",
|
|
16
|
+
"QueryCancelled",
|
|
17
|
+
"Snapshot",
|
|
18
|
+
"TraceEvent",
|
|
19
|
+
]
|