forward-qpop 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- forward_qpop-0.1.1/.gitignore +53 -0
- forward_qpop-0.1.1/LICENSE +21 -0
- forward_qpop-0.1.1/PKG-INFO +102 -0
- forward_qpop-0.1.1/README.md +186 -0
- forward_qpop-0.1.1/data/synthetic/README.md +15 -0
- forward_qpop-0.1.1/examples/ai_supply_chain/README.md +94 -0
- forward_qpop-0.1.1/examples/template_domain/README.md +34 -0
- forward_qpop-0.1.1/pyproject.toml +44 -0
- forward_qpop-0.1.1/repro/README.md +68 -0
- forward_qpop-0.1.1/schemas/README.md +38 -0
- forward_qpop-0.1.1/src/README.md +74 -0
- forward_qpop-0.1.1/src/forward_qpop/README.md +80 -0
- forward_qpop-0.1.1/src/forward_qpop/__init__.py +60 -0
- forward_qpop-0.1.1/src/forward_qpop/anchor.py +172 -0
- forward_qpop-0.1.1/src/forward_qpop/cli.py +156 -0
- forward_qpop-0.1.1/src/forward_qpop/ledger.py +242 -0
- forward_qpop-0.1.1/src/forward_qpop/py.typed +0 -0
- forward_qpop-0.1.1/tests/README.md +18 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Secrets / credentials — NEVER commit
|
|
2
|
+
.env
|
|
3
|
+
.env.*
|
|
4
|
+
*.key
|
|
5
|
+
*_secret*
|
|
6
|
+
**/secrets/**
|
|
7
|
+
|
|
8
|
+
# Live / private artifacts — keep this repo methods-only
|
|
9
|
+
**/live/**
|
|
10
|
+
**/brokerage/**
|
|
11
|
+
**/paid_data/**
|
|
12
|
+
*_live_*.jsonl
|
|
13
|
+
*_trades_*.csv
|
|
14
|
+
|
|
15
|
+
# Python
|
|
16
|
+
__pycache__/
|
|
17
|
+
*.py[cod]
|
|
18
|
+
.venv/
|
|
19
|
+
venv/
|
|
20
|
+
.pytest_cache/
|
|
21
|
+
*.egg-info/
|
|
22
|
+
build/
|
|
23
|
+
dist/
|
|
24
|
+
|
|
25
|
+
# OS / editor
|
|
26
|
+
.DS_Store
|
|
27
|
+
Thumbs.db
|
|
28
|
+
.idea/
|
|
29
|
+
.vscode/
|
|
30
|
+
|
|
31
|
+
# Allow synthetic / sample data explicitly (it is non-sensitive)
|
|
32
|
+
!data/synthetic/**
|
|
33
|
+
!examples/**/*sample*
|
|
34
|
+
!examples/**/*template*
|
|
35
|
+
|
|
36
|
+
# LaTeX build artifacts (paper now lives under research/)
|
|
37
|
+
research/paper/*.aux
|
|
38
|
+
research/paper/*.log
|
|
39
|
+
research/paper/*.bbl
|
|
40
|
+
research/paper/*.blg
|
|
41
|
+
research/paper/*.out
|
|
42
|
+
research/paper/*.pdf
|
|
43
|
+
research/paper/*.toc
|
|
44
|
+
|
|
45
|
+
# session / local agent data (never publish)
|
|
46
|
+
.remember/
|
|
47
|
+
|
|
48
|
+
# track the built paper PDF for readers / citers
|
|
49
|
+
!research/paper/paper.pdf
|
|
50
|
+
|
|
51
|
+
# generated anchor artifacts (run `forward-qpop anchor`)
|
|
52
|
+
*.anchor.json
|
|
53
|
+
*.ots
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Yixing Zheng
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: forward-qpop
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Tamper-evident, append-only forward pre-registration ledger for auditable, leakage-resistant research (the Forward-QPOP protocol).
|
|
5
|
+
Project-URL: Homepage, https://github.com/yixingz3/qpop
|
|
6
|
+
Project-URL: Repository, https://github.com/yixingz3/qpop
|
|
7
|
+
Project-URL: Paper, https://github.com/yixingz3/qpop/tree/main/research/paper
|
|
8
|
+
Author-email: Yixing Zheng <yz11739@stern.nyu.edu>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: auditability,forward-validation,hash-chain,look-ahead-bias,pre-registration,reproducibility,research-integrity,trustworthy-ai
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.9
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
# forward-qpop
|
|
24
|
+
|
|
25
|
+
**A tamper-evident, append-only forward pre-registration ledger for auditable,
|
|
26
|
+
leakage-resistant research.**
|
|
27
|
+
|
|
28
|
+
Register a hypothesis — a claim, dated evidence, measurable exit triggers, and a prior —
|
|
29
|
+
*before* the evaluation window opens. Each entry is content-hashed over its frozen fields and
|
|
30
|
+
chained to its predecessor, so the record proves **what was predicted** and that no entry was later
|
|
31
|
+
edited, inserted, deleted, or reordered. Proving it was registered *before* an outcome (wall-clock
|
|
32
|
+
time) needs an external anchor — see [`anchor` / `verify-anchor`](#cli).
|
|
33
|
+
|
|
34
|
+
This is the domain-agnostic core of the **Forward-QPOP** protocol from the methods paper
|
|
35
|
+
*Forward-Registered, Auditable LLM-Assisted Research* — useful for any pre-registered
|
|
36
|
+
work (ML experiments, forecasts, studies), not only the finance testbed in the paper.
|
|
37
|
+
|
|
38
|
+
- **Zero dependencies** — pure Python standard library.
|
|
39
|
+
- **`entry_hash = sha256(content_hash ‖ prev_hash)`** — a real hash chain, not just a set
|
|
40
|
+
of independently-hashed rows.
|
|
41
|
+
- **CI-friendly** — `forward-qpop verify ledger.jsonl` exits non-zero on tampering.
|
|
42
|
+
|
|
43
|
+
## Install
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# from source (not yet on PyPI):
|
|
47
|
+
pip install "git+https://github.com/yixingz3/qpop"
|
|
48
|
+
# a PyPI release of `forward-qpop` is planned
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Quickstart
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from forward_qpop import Ledger
|
|
55
|
+
|
|
56
|
+
led = Ledger("ledger.jsonl")
|
|
57
|
+
|
|
58
|
+
# 1. Pre-register BEFORE the evaluation window:
|
|
59
|
+
led.register(
|
|
60
|
+
"H-AI-01",
|
|
61
|
+
claim="Method X reduces silent production-ML failures vs the ungated baseline.",
|
|
62
|
+
mechanism="Deterministic gates reject low-evidence candidates before the expensive step.",
|
|
63
|
+
prior=0.5,
|
|
64
|
+
evidence=[{"summary": "pilot result", "tier": "primary", "date": "2026-06-24"}],
|
|
65
|
+
exit_triggers=[{"id": "no_effect", "metric": "failure-rate delta", "op": "~0",
|
|
66
|
+
"data_source": {"tier": "primary"}}],
|
|
67
|
+
fields={"domain": "ai-reliability"}, # any domain-specific payload, also hashed
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# 2. Record belief updates as evidence arrives (tertiary-only is blocked by default):
|
|
71
|
+
led.update("H-AI-01", evidence=[{"summary": "replication", "tier": "secondary",
|
|
72
|
+
"date": "2026-09-01"}])
|
|
73
|
+
|
|
74
|
+
# 3. Close with a pre-committed outcome (supported / weakened / falsified):
|
|
75
|
+
led.close("H-AI-01", "supported", observed={"failure_rate_delta": -0.31})
|
|
76
|
+
|
|
77
|
+
# 4. Verify integrity at any time:
|
|
78
|
+
res = led.verify()
|
|
79
|
+
print(res.ok, res.n_entries, res.problems)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## CLI
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
forward-qpop verify ledger.jsonl # exit 0 if intact, 1 (with details) if tampered
|
|
86
|
+
forward-qpop show ledger.jsonl # list entries
|
|
87
|
+
forward-qpop anchor ledger.jsonl # manifest committing to the head (bind to a public commit / OpenTimestamps)
|
|
88
|
+
forward-qpop verify-anchor ledger.jsonl # detect any drift since anchoring
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Why forward, and why a chain
|
|
92
|
+
|
|
93
|
+
A backward test of an LLM-scored process is structurally invalid (the outcomes are
|
|
94
|
+
already in the model's training data). Pre-registration replaces "fit the past" with
|
|
95
|
+
"commit, then observe the future"; the hash chain makes that commitment **auditable** — an external
|
|
96
|
+
reviewer can confirm that no past entry was silently changed. Proving the entry existed *before* its
|
|
97
|
+
outcome additionally requires binding the ledger head to an external, publicly-dated record (a pushed
|
|
98
|
+
Git commit, or OpenTimestamps) via `anchor` / `verify-anchor`.
|
|
99
|
+
|
|
100
|
+
## License
|
|
101
|
+
|
|
102
|
+
MIT. Part of <https://github.com/yixingz3/qpop>.
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
3
|
+
# qpop
|
|
4
|
+
|
|
5
|
+
**Your AI agent will admit any plausible idea. `qpop` makes it earn each one — gated, pre-registered, and falsifiable.**
|
|
6
|
+
|
|
7
|
+
A Claude Code plugin that turns an over-eager LLM into a disciplined researcher. The headline result is
|
|
8
|
+
counterintuitive: a well-run agent **rejects most of what it surfaces** — and that restraint, recorded
|
|
9
|
+
in a tamper-evident ledger, is the point.
|
|
10
|
+
|
|
11
|
+

|
|
12
|
+

|
|
13
|
+

|
|
14
|
+
<!--  -->
|
|
15
|
+
|
|
16
|
+
</div>
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
> **Two audiences.** **Claude Code users:** [install the plugin](#install-claude-code) — that's the whole setup. **Researchers / citers:** see [the paper](#two-pillars). *One project, three names: the plugin & repo `qpop`, the Python package `forward-qpop`, and the method, **Forward-QPOP**.*
|
|
21
|
+
|
|
22
|
+
## The problem
|
|
23
|
+
|
|
24
|
+
LLMs are brilliant at *sourcing* ideas and unreliable as *decision* engines. Left ungated, an LLM
|
|
25
|
+
screener **over-admits by construction** — it is rewarded for *finding* ideas, not *refusing* them —
|
|
26
|
+
and it confabulates, agrees with itself (sycophancy), and, when "backtested," already knows the answer
|
|
27
|
+
(look-ahead leakage). The missing piece isn't better prompts. It's **auditable restraint**.
|
|
28
|
+
|
|
29
|
+
## What `qpop` does
|
|
30
|
+
|
|
31
|
+
It wraps your agent's research in a discipline and makes every decision auditable:
|
|
32
|
+
|
|
33
|
+
- **Rejects most candidates, for defensible reasons** — deterministic gates + a bear case written
|
|
34
|
+
*before* the recommendation.
|
|
35
|
+
- **Pre-registers the survivors** to a tamper-evident, hash-chained ledger — the claim, dated
|
|
36
|
+
evidence, and measurable exit triggers, committed *before* the outcome is known.
|
|
37
|
+
- **Validates forward**, not by a leaky backtest.
|
|
38
|
+
|
|
39
|
+
> **Pilot evidence** (the methods paper, [`research/paper/`](research/paper)): removing any single
|
|
40
|
+
> discipline pushed an LLM screener's admission rate from **0% to 66–100%**; a held-out **LLM-auditor**
|
|
41
|
+
> judged **93%** of the rejections justified (n=14; an LLM-on-LLM diagnostic, not a human audit);
|
|
42
|
+
> "no action" is the modal outcome. *These are pilot metrics —
|
|
43
|
+
> they validate process discipline, not investment performance.*
|
|
44
|
+
|
|
45
|
+
## Status — what's runnable today
|
|
46
|
+
|
|
47
|
+
`qpop` is **auditable research infrastructure**, not a turnkey trading engine. What ships in this repo:
|
|
48
|
+
|
|
49
|
+
| Area | Status |
|
|
50
|
+
|---|---|
|
|
51
|
+
| Claude Code plugin discipline (`auditable-research` + `/qpop:*`) | **Working — v0.1** |
|
|
52
|
+
| Hash-chained Python ledger (`forward_qpop`) + CLI | **Working** (21/21 tests) |
|
|
53
|
+
| External timestamp anchor (`anchor` / `verify-anchor`) | **Working** — manifest + drift-detection + git / OpenTimestamps |
|
|
54
|
+
| JSON Schemas for cards / entries / runs | **Included** ([`schemas/`](schemas)) |
|
|
55
|
+
| Synthetic fixtures + worked examples | **Included** |
|
|
56
|
+
| Methods paper — theory + pilot evidence | **Included** ([PDF](research/paper/paper.pdf)) |
|
|
57
|
+
| Full `SOURCE → GATE → EVALUATE` engine (AI-supply-chain) | **Specified** — interfaces/contracts in [`src/`](src); the reference implementation is private and being generalized |
|
|
58
|
+
| PyPI package (`forward-qpop`) | **Planned** — install from source today |
|
|
59
|
+
| Forward performance results | **Pending — not claimed** |
|
|
60
|
+
|
|
61
|
+
Nothing here is finance-specific: the discipline and the ledger are domain-agnostic, and finance is a
|
|
62
|
+
deliberately *adversarial* testbed. The same flow fits an agentic literature review, an ML-eval claim,
|
|
63
|
+
or any forecast you want to pre-register — and the metric the ablations measure, the **over-admission
|
|
64
|
+
rate** (how often an agent admits plausible-but-weak ideas), is itself a reusable reliability
|
|
65
|
+
benchmark. *Reviewers:* see [`repro/`](repro) for a one-command-per-claim reproduction path (tests,
|
|
66
|
+
tamper demo, schema validation, the anchor round-trip, and the paper build), each with expected output.
|
|
67
|
+
|
|
68
|
+
## Install (Claude Code)
|
|
69
|
+
|
|
70
|
+
```text
|
|
71
|
+
/plugin marketplace add yixingz3/qpop
|
|
72
|
+
/plugin install qpop@qpop
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
That's it. The `auditable-research` discipline activates when you screen ideas or act on a finding, and
|
|
76
|
+
the `/qpop:*` commands below are ready. *(Other agents: the discipline is portable markdown — see
|
|
77
|
+
[Other tools](#other-tools).)*
|
|
78
|
+
|
|
79
|
+
## 60-second demo
|
|
80
|
+
|
|
81
|
+
After installing, ask Claude Code:
|
|
82
|
+
|
|
83
|
+
> *"Screen these 3 research claims with qpop and pre-register only the survivors."*
|
|
84
|
+
|
|
85
|
+
A disciplined run looks like this (illustrative):
|
|
86
|
+
|
|
87
|
+
```text
|
|
88
|
+
3 candidates screened
|
|
89
|
+
2 rejected — tertiary-only evidence / fails replace-don't-stack
|
|
90
|
+
1 preregistered — H-001
|
|
91
|
+
entry_hash: sha256:9f3c… (chained to prev)
|
|
92
|
+
verify: OK — 1 entry, chain intact
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
"2 of 3 rejected" is the feature, not a bug — **no action** is the correct, modal outcome. Want to
|
|
96
|
+
feel the ledger without Claude Code? `make verify-sample` (or the Python snippet below).
|
|
97
|
+
|
|
98
|
+
## How it works — the ladder
|
|
99
|
+
|
|
100
|
+
Apply **in order**; stop at the first failure → **no action** (the correct, modal outcome):
|
|
101
|
+
|
|
102
|
+
1. **Story or claim?** State it as something falsifiable, with a mechanism — or reject.
|
|
103
|
+
2. **Deterministic gates** (no LLM): eligibility, and *replace-don't-stack* on duplicates.
|
|
104
|
+
3. **Bear case first** — the strongest case *against*, written before the recommendation.
|
|
105
|
+
4. **Source tiers** — primary > secondary > market-implied > tertiary; tertiary alone never moves a conclusion.
|
|
106
|
+
5. **Pre-register, forward** — claim + dated evidence + exit triggers, hash-chained *before* the window.
|
|
107
|
+
6. **Forward, not backtest** — a backward test of an LLM-scored process is *structurally invalid*.
|
|
108
|
+
7. **On balance** — after overlap, cost, and uncertainty, does admitting beat doing nothing?
|
|
109
|
+
|
|
110
|
+
## Commands
|
|
111
|
+
|
|
112
|
+
| Command | What it does |
|
|
113
|
+
|---|---|
|
|
114
|
+
| **`auditable-research`** | *(auto)* the full discipline, applied when you screen ideas or evaluate a finding |
|
|
115
|
+
| **`/qpop:preregister`** | register a hypothesis to the tamper-evident ledger *before* evaluating |
|
|
116
|
+
| **`/qpop:review`** | audit current claims / a diff for stories, leakage, over-admission, missing pre-registration |
|
|
117
|
+
| **`/qpop:verify`** | verify a ledger's integrity — detect any post-hoc edit, insertion, or reorder |
|
|
118
|
+
|
|
119
|
+
The ledger is a real hash chain (`entry_hash = sha256(content_hash ‖ prev_hash)`): edit a past entry,
|
|
120
|
+
insert one, delete one, or reorder them, and `verify` fails (and exits non-zero — drop it in CI).
|
|
121
|
+
|
|
122
|
+
## What the ledger proves (and what it doesn't)
|
|
123
|
+
|
|
124
|
+
The hash chain is **tamper-evidence, not a clock.** Be precise about the guarantee:
|
|
125
|
+
|
|
126
|
+
| Claim | Chain alone? | What closes the gap |
|
|
127
|
+
|---|---|---|
|
|
128
|
+
| A past entry was edited | ✅ detected | hash verification |
|
|
129
|
+
| An entry was inserted / deleted / reordered | ✅ detected | hash-chain verification |
|
|
130
|
+
| An entry existed *before* the outcome | ⚠️ partial | `anchor` + a public commit or OpenTimestamps |
|
|
131
|
+
| The LLM's reasoning was correct | ❌ no | human / source review |
|
|
132
|
+
| The strategy is profitable | ❌ no | a forward window + the validity checklist |
|
|
133
|
+
|
|
134
|
+
The "before the outcome" guarantee needs an **external anchor** — and `qpop` ships one:
|
|
135
|
+
`python scripts/qpop.py anchor <ledger>` writes a manifest committing to the ledger head, and
|
|
136
|
+
`verify-anchor` detects any drift since. Bind it to time by committing the manifest to a public repo (the commit date
|
|
137
|
+
*is* the anchor) or with `--ots` ([OpenTimestamps](https://opentimestamps.org)); a turnkey Sigstore/Rekor
|
|
138
|
+
backend is on the roadmap.
|
|
139
|
+
|
|
140
|
+
## Two pillars
|
|
141
|
+
|
|
142
|
+
- **The tool** — this plugin: the discipline + a runnable, tamper-evident ledger engine.
|
|
143
|
+
- **The paper** — *Forward-Registered, Auditable LLM-Assisted Research* — [**read the PDF**](research/paper/paper.pdf),
|
|
144
|
+
or the LaTeX source in [`research/`](research). The theory and pilot evidence behind the method; if you
|
|
145
|
+
build on it, please cite ([`CITATION.cff`](CITATION.cff)).
|
|
146
|
+
|
|
147
|
+
## Use the ledger without Claude Code (Python)
|
|
148
|
+
|
|
149
|
+
The pre-registration engine is a dependency-free package, bundled here and importable directly:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
python scripts/qpop.py verify ledger.jsonl # from a clone; no install needed
|
|
153
|
+
python scripts/qpop.py anchor ledger.jsonl # manifest committing to the head…
|
|
154
|
+
python scripts/qpop.py verify-anchor ledger.jsonl # …then detect any drift since
|
|
155
|
+
# (a PyPI release, `pip install forward-qpop`, is planned)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
# pip install -e . (or set PYTHONPATH=src) so `forward_qpop` imports from a clone
|
|
160
|
+
from forward_qpop import Ledger
|
|
161
|
+
led = Ledger("ledger.jsonl")
|
|
162
|
+
led.register("H-01", claim="…", prior=0.5,
|
|
163
|
+
evidence=[{"summary": "…", "tier": "primary", "date": "2026-06-24"}])
|
|
164
|
+
led.verify() # ok=True — any later edit/insert/reorder flips it to False
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Other tools
|
|
168
|
+
|
|
169
|
+
Claude Code is supported today. The discipline lives as portable markdown in [`skills/`](skills), so
|
|
170
|
+
Codex / other agents can adopt it by pointing at the skill files; first-class support for more agents
|
|
171
|
+
is planned.
|
|
172
|
+
|
|
173
|
+
**Roadmap:** first-class adapters for Codex and Cursor; an MCP server exposing `preregister` / `verify`;
|
|
174
|
+
a LangChain / LangGraph wrapper; a turnkey Sigstore/Rekor anchor backend (the git + OpenTimestamps
|
|
175
|
+
anchor ships today); and the `forward-qpop` PyPI release.
|
|
176
|
+
|
|
177
|
+
## What this is not
|
|
178
|
+
|
|
179
|
+
Not investment advice, not a stock-picker, not a claim to beat the market. The finance domain is a
|
|
180
|
+
deliberately *adversarial testbed* (markets punish wishful thinking); `qpop` is a research
|
|
181
|
+
**discipline**. See [`DISCLAIMER.md`](DISCLAIMER.md) and [`ETHICS.md`](ETHICS.md).
|
|
182
|
+
|
|
183
|
+
## License
|
|
184
|
+
|
|
185
|
+
[MIT](LICENSE). Implements the **Forward-QPOP** protocol — see [`research/`](research) and
|
|
186
|
+
[`CITATION.cff`](CITATION.cff).
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# data/synthetic
|
|
2
|
+
|
|
3
|
+
**Non-sensitive, illustrative fixtures only.** Use this directory for synthetic prices, sample
|
|
4
|
+
candidate cards, and sample QPOP ledger entries that exercise the engine for tests and demos.
|
|
5
|
+
|
|
6
|
+
Never place here (or anywhere in this repo): live broker logs, real trade records, paid-data
|
|
7
|
+
exports, or anything tied to an actual account. The forward paper-trading record, if released later,
|
|
8
|
+
is published as **aggregated** discipline/performance statistics — not as positions or trades.
|
|
9
|
+
|
|
10
|
+
Suggested contents:
|
|
11
|
+
- `prices_synthetic.csv` — deterministic synthetic price series for a handful of fake tickers.
|
|
12
|
+
- `candidate_cards_sample.jsonl` — example SOURCE output (the card schema).
|
|
13
|
+
- `qpop_ledger_sample.jsonl` — example pre-registration entries with content hashes (admission → belief-update → outcome).
|
|
14
|
+
- `run_manifest_sample.json` — example SOURCE→GATE→EVALUATE run manifest (funnel counts + admission rate).
|
|
15
|
+
- `scoreboard_sample.csv` — example discipline metrics (counts, admission rate) over a fake window.
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Worked example — AI supply chain
|
|
2
|
+
|
|
3
|
+
This is the **reference domain** that motivated the framework. It illustrates the *method*; it is
|
|
4
|
+
**not** a portfolio, a set of recommendations, or the live book. The live paper-trading
|
|
5
|
+
implementation (broker integration, the full bottleneck map, the actual QPOP ledger and positions)
|
|
6
|
+
is maintained **privately** — see `DISCLAIMER.md`.
|
|
7
|
+
|
|
8
|
+
## The chokepoints (illustrative)
|
|
9
|
+
|
|
10
|
+
The AI accelerator supply chain is a graph of physical bottlenecks. Representative nodes:
|
|
11
|
+
|
|
12
|
+
| Node | The binding physical constraint |
|
|
13
|
+
|---|---|
|
|
14
|
+
| HBM / high-bandwidth memory | few qualified makers, multi-year capacity lead time |
|
|
15
|
+
| Advanced packaging (CoWoS/SoIC) | interposer + chip-on-wafer capacity gates assembled accelerators |
|
|
16
|
+
| Leading-edge foundry | effectively one volume supplier at the frontier |
|
|
17
|
+
| EDA / litho / WFE | the tools that make the fabs (single-supplier EUV; EDA duopoly) |
|
|
18
|
+
| Power delivery (grid + board-level) | transformers/switchgear upstream; sub-1V point-of-load silicon at the die |
|
|
19
|
+
| Cooling / thermal | liquid cooling + CDUs as rack power density rises |
|
|
20
|
+
| Optical interconnect / connectivity | transceivers, retimers, and the PCIe/CXL signal-integrity fabric |
|
|
21
|
+
| Back-end test | tester machine-hours per AI part are far higher than for mobile SoCs |
|
|
22
|
+
|
|
23
|
+
Each node carries decomposed scores and measurable exit triggers, exactly as in
|
|
24
|
+
`../template_domain/bottleneck_map.template.yml`.
|
|
25
|
+
|
|
26
|
+
## What the example demonstrates (the methodological findings)
|
|
27
|
+
|
|
28
|
+
- **Discovery surfaces far more than it should admit.** Across runs, the funnel sourced and
|
|
29
|
+
evaluated dozens of candidates and admitted a small single-digit fraction; most cycles are "no
|
|
30
|
+
action." That restraint is the result.
|
|
31
|
+
- **The gate catches duplication mechanically** — candidates proposed into a node that already holds
|
|
32
|
+
a position, or highly correlated to the held book, are flagged before any expensive evaluation.
|
|
33
|
+
- **The evaluation catches what the gate cannot** — economic-substitute and competitive-displacement
|
|
34
|
+
relationships, structural-vs-cyclical traps (a name with the biggest recent move is often the
|
|
35
|
+
worst chokepoint), and fact-vs-announcement on policy.
|
|
36
|
+
- **Policy enters as evidence, not a multiplier** — a signed, dated, material award becomes durability
|
|
37
|
+
evidence + a reversal trigger on a real chokepoint; a name whose thesis is *only* policy goes to a
|
|
38
|
+
separate, capped sleeve.
|
|
39
|
+
|
|
40
|
+
## A worked candidate flow (real numbers)
|
|
41
|
+
|
|
42
|
+
One discovery round, end to end (counts are from a real run; tickers are sanitized where a name is
|
|
43
|
+
a live holding, named where it is a *reject* — a reject is not a position):
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
~40 candidate cards sourced across 12 lenses (per-node + gaps + social + policy + literature)
|
|
47
|
+
~10 NEW symbols the rest deduped against the persistent "seen" set (already-decided names)
|
|
48
|
+
~6 pass the GATE deterministic: foreign-OTC and sub-liquidity names rejected mechanically
|
|
49
|
+
~4 survive TRIAGE cheap model drops the obvious low-purity (conglomerate / commodity / pre-rev)
|
|
50
|
+
0 admitted Sonnet bear-case + Opus adjudication on survivors -> watchlist/reject
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Most rounds end at **0 admitted**. Across the build-out the funnel evaluated dozens of finalists and
|
|
54
|
+
admitted a **low single-digit fraction**, all at small satellite weight. "No action" is the modal
|
|
55
|
+
outcome, and it is the result, not a failure.
|
|
56
|
+
|
|
57
|
+
## One admitted, one rejected, one overlap-penalized
|
|
58
|
+
|
|
59
|
+
- **Admitted** (an earlier round): a critical-material permanent-magnet chokepoint — a genuinely
|
|
60
|
+
supplier-concentrated, hard-to-substitute input. Admitted at **half** the unconstrained satellite
|
|
61
|
+
weight, because a separate critical-inputs concentration cap bound it. *Restraint expressed as
|
|
62
|
+
sizing, not just selection.* (Vehicle sanitized — it is a live holding.)
|
|
63
|
+
- **Rejected — an integrated energy major pitched for a "helium chokepoint":** the helium scarcity is
|
|
64
|
+
real and the supply-share claim checks out, but helium is **< 1%** of a ~$330B oil major; the stock trades on crude, not
|
|
65
|
+
helium economics. The chokepoint is real, the *ticker* captures none of it. Canonical
|
|
66
|
+
rounding-error-in-a-conglomerate reject.
|
|
67
|
+
- **Overlap-penalized — a pure-play 800G optical-transceiver name:** high purity, real
|
|
68
|
+
chokepoint, but the optics sleeve was already **full** (at its node cap, holding the incumbent
|
|
69
|
+
it would duplicate). Routed to a **replace-not-stack** decision — a *future replacement candidate*
|
|
70
|
+
gated on the incumbent firing an exit trigger, not a standalone add. The gate caught the
|
|
71
|
+
duplication before any expensive evaluation.
|
|
72
|
+
|
|
73
|
+
## Ablation — each discipline contributes to restraint
|
|
74
|
+
|
|
75
|
+
The same **38-candidate batch**, run through baselines that each remove one discipline:
|
|
76
|
+
|
|
77
|
+
| Arm | Admitted | Rate |
|
|
78
|
+
|---|---|---|
|
|
79
|
+
| **Full pipeline** (gate + seen-set + triage + bear-case-first + overlap) | **0 / 38** | **0%** |
|
|
80
|
+
| − overlap penalty | 25 / 38 | 66% |
|
|
81
|
+
| − bear-case-first | 28 / 38 | 74% |
|
|
82
|
+
| **Ungated LLM screener** (no discipline) | **38 / 38** | **100%** |
|
|
83
|
+
|
|
84
|
+
An ungated LLM admits *literally everything*; removing any single discipline raises admission far
|
|
85
|
+
past a "beyond noise" threshold; the full stack drives 100% → 0%. Paired with the rejection-quality
|
|
86
|
+
audit (an independent reviewer judged the rejections **0.93 justified**, rising to **1.00** after a
|
|
87
|
+
capital-at-risk adjudication overturned the one flagged false-rejection), this is the evidence that
|
|
88
|
+
the restraint is *discipline*, not blanket conservatism. Full write-up: `../../research/docs/RESULTS_INITIAL.md`.
|
|
89
|
+
|
|
90
|
+
## Reproducing the method (not the positions)
|
|
91
|
+
|
|
92
|
+
Use `../template_domain/` to build your own map and run the funnel. The *engine and disciplines* are
|
|
93
|
+
the reusable artifact here; the live AI book's specific holdings and trades are intentionally not
|
|
94
|
+
published (it would read as "copy these trades," which this project explicitly is not).
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# template_domain — start a new bottleneck domain here
|
|
2
|
+
|
|
3
|
+
Copy this directory to `examples/<your_domain>/` and fill in the two configs. The *engine* does not
|
|
4
|
+
change — only the domain map and the benchmark. That is the flywheel: a new domain is a config, not
|
|
5
|
+
a codebase.
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
examples/<your_domain>/
|
|
9
|
+
bottleneck_map.yml # the thesis: nodes (chokepoints), edges, scores, exit triggers
|
|
10
|
+
benchmark_config.yml # the theme benchmark + scoreboard settings
|
|
11
|
+
candidate_process.md # (optional) domain-specific gate thresholds / notes
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Steps
|
|
15
|
+
|
|
16
|
+
1. **Name the chokepoints.** What are the *physical*, hard-to-bypass steps in this supply chain
|
|
17
|
+
(e.g. for rare earth: separation/refining capacity, magnet manufacturing, mining permits,
|
|
18
|
+
substitution)? Each becomes a node in `bottleneck_map.yml`.
|
|
19
|
+
2. **Score each node** on `bottleneck_dims` (physical_indispensability, substitutability,
|
|
20
|
+
capacity_lead_time, supplier_concentration, pricing_power) + `demand_score` +
|
|
21
|
+
`valuation_adjustment` + `crowding_adjustment`.
|
|
22
|
+
3. **Write measurable `exit_triggers`** for each node — each with a checkable `data_source`. A
|
|
23
|
+
trigger you cannot actually check is rejected.
|
|
24
|
+
4. **List candidate tickers** per node with `exposure_purity` + role. US-tradeable (or your venue)
|
|
25
|
+
only; foreign-only listings are context, not candidates.
|
|
26
|
+
5. **Set the benchmark** in `benchmark_config.yml` (the theme benchmark you measure alpha against).
|
|
27
|
+
6. **Run the funnel** (source → gate → evaluate), **pre-register** admissions in the QPOP ledger,
|
|
28
|
+
and **forward-validate**.
|
|
29
|
+
|
|
30
|
+
## Domains this schema fits
|
|
31
|
+
|
|
32
|
+
AI supply chain (the worked example) · rare earth / critical minerals · power grid / transformers ·
|
|
33
|
+
LNG / gas infrastructure · uranium / nuclear fuel cycle · copper / electrification · defense
|
|
34
|
+
industrial base · and others. Each is a different `bottleneck_map.yml` over the same engine.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "forward-qpop"
|
|
7
|
+
version = "0.1.1"
|
|
8
|
+
description = "Tamper-evident, append-only forward pre-registration ledger for auditable, leakage-resistant research (the Forward-QPOP protocol)."
|
|
9
|
+
readme = "src/forward_qpop/README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "Yixing Zheng", email = "yz11739@stern.nyu.edu" }]
|
|
13
|
+
keywords = [
|
|
14
|
+
"pre-registration", "reproducibility", "trustworthy-ai", "auditability",
|
|
15
|
+
"hash-chain", "research-integrity", "forward-validation", "look-ahead-bias",
|
|
16
|
+
]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Development Status :: 4 - Beta",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
"Programming Language :: Python :: 3",
|
|
23
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
24
|
+
"Topic :: Scientific/Engineering",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
]
|
|
27
|
+
dependencies = []
|
|
28
|
+
|
|
29
|
+
[project.urls]
|
|
30
|
+
Homepage = "https://github.com/yixingz3/qpop"
|
|
31
|
+
Repository = "https://github.com/yixingz3/qpop"
|
|
32
|
+
Paper = "https://github.com/yixingz3/qpop/tree/main/research/paper"
|
|
33
|
+
|
|
34
|
+
[project.scripts]
|
|
35
|
+
forward-qpop = "forward_qpop.cli:main"
|
|
36
|
+
|
|
37
|
+
[tool.hatch.build.targets.wheel]
|
|
38
|
+
packages = ["src/forward_qpop"]
|
|
39
|
+
|
|
40
|
+
[tool.hatch.build.targets.sdist]
|
|
41
|
+
include = ["src/forward_qpop", "README.md", "LICENSE"]
|
|
42
|
+
|
|
43
|
+
[tool.pytest.ini_options]
|
|
44
|
+
pythonpath = ["src"]
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Reproducing qpop's claims
|
|
2
|
+
|
|
3
|
+
Everything below runs from a **clean clone** with **Python 3.9+** (the ledger core has no
|
|
4
|
+
dependencies). Two steps need an extra: schema validation needs `jsonschema`; the paper build
|
|
5
|
+
needs `pdflatex` + `bibtex`. Total time is under a minute, excluding the paper build.
|
|
6
|
+
|
|
7
|
+
`make` targets are shown first; the raw command beneath each works without `make`.
|
|
8
|
+
|
|
9
|
+
## 1. Ledger + anchor integrity — the core claim (~1s)
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
make test # or: python -m pytest -q
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Expected: **`21 passed`** — 9 ledger + 7 anchor + 5 schema tests covering field-tamper, insert,
|
|
16
|
+
delete, reorder, terminal re-registration, tertiary-only blocking, anchor drift-detection, and
|
|
17
|
+
schema/fixture validation (with `format: date` enforced).
|
|
18
|
+
|
|
19
|
+
## 2. Verify the shipped sample ledger (~1s)
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
make verify-sample # or: python scripts/qpop.py verify data/synthetic/qpop_ledger_sample.jsonl
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Expected: **`OK — 3 entries, integrity verified.`**
|
|
26
|
+
|
|
27
|
+
## 3. Watch tamper-evidence fire (~3s)
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
python repro/tamper_demo.py
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Expected: step 1 verifies clean; step 2 (after editing one frozen field) reports a
|
|
34
|
+
`content_hash mismatch`, and the script prints **`PASS`** (exit 0).
|
|
35
|
+
|
|
36
|
+
## 4. External timestamp anchor (~1s)
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
python scripts/qpop.py anchor data/synthetic/qpop_ledger_sample.jsonl
|
|
40
|
+
python scripts/qpop.py verify-anchor data/synthetic/qpop_ledger_sample.jsonl
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Expected: **`anchor OK`**. Now append or edit an entry and re-run `verify-anchor`: it reports
|
|
44
|
+
the head/digest drift and exits non-zero. The `.anchor.json` is gitignored; add `--ots` to
|
|
45
|
+
also OpenTimestamps-stamp it if the client is installed.
|
|
46
|
+
|
|
47
|
+
## 5. Fixtures match the published schemas (~1s, needs `pip install jsonschema`)
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
python repro/validate_samples.py
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Expected: **`ALL SAMPLES VALID`** — every row in `data/synthetic/` validates against
|
|
54
|
+
[`schemas/`](../schemas).
|
|
55
|
+
|
|
56
|
+
## 6. Build the paper (~30s, needs pdflatex + bibtex)
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
make paper # -> research/paper/paper.pdf
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Expected: a clean 25-page PDF, no undefined references.
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
**What is *not* reproducible here, by design:** forward performance (the evaluation window is
|
|
67
|
+
open; no return is claimed) and the live book (positions and paid feeds are never released).
|
|
68
|
+
See the paper's *Reproducibility and Release* table.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# qpop schemas
|
|
2
|
+
|
|
3
|
+
Machine-readable [JSON Schema](https://json-schema.org/) (draft 2020-12) for the data structures the
|
|
4
|
+
discipline produces and consumes. They make `qpop` read as **infrastructure**, not a prompt pack: an
|
|
5
|
+
agent (or a CI gate) can validate a card, a ledger entry, or a run before trusting it.
|
|
6
|
+
|
|
7
|
+
| Schema | Validates | Notes |
|
|
8
|
+
|---|---|---|
|
|
9
|
+
| [`candidate_card.schema.json`](candidate_card.schema.json) | a SOURCE-stage candidate | the unit the gate + bear-case consume |
|
|
10
|
+
| [`qpop_entry.schema.json`](qpop_entry.schema.json) | one line of a ledger `.jsonl` | `oneOf` admission / belief_update / outcome, plus the hash-chain fields |
|
|
11
|
+
| [`evidence.schema.json`](evidence.schema.json) | a dated, tiered fact | the building block of cards + entries |
|
|
12
|
+
| [`exit_trigger.schema.json`](exit_trigger.schema.json) | a pre-committed exit condition | must name a checkable `data_source` |
|
|
13
|
+
| [`run_manifest.schema.json`](run_manifest.schema.json) | one `SOURCE → GATE → EVALUATE` run | provenance + funnel counts + admission rate |
|
|
14
|
+
|
|
15
|
+
**Self-contained on purpose.** `qpop_entry` and `candidate_card` embed `evidence` / `exit_trigger`
|
|
16
|
+
under `$defs` so they validate with no external `$ref` resolver (the standalone `evidence` and
|
|
17
|
+
`exit_trigger` files are the canonical, reusable copies). The hash fields follow the ledger
|
|
18
|
+
convention: `content_hash` is `sha256:<64-hex>`; `prev_hash` and `entry_hash` are bare `<64-hex>`
|
|
19
|
+
(64 zeros at genesis).
|
|
20
|
+
|
|
21
|
+
## Validate
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pip install jsonschema
|
|
25
|
+
python repro/validate_samples.py # checks data/synthetic/* against these schemas
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Or inline:
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import json
|
|
32
|
+
from jsonschema import Draft202012Validator
|
|
33
|
+
schema = json.load(open("schemas/qpop_entry.schema.json", encoding="utf-8"))
|
|
34
|
+
Draft202012Validator(schema).validate(my_entry)
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
The synthetic fixtures in [`../data/synthetic/`](../data/synthetic) are kept valid against these
|
|
38
|
+
schemas by `repro/validate_samples.py`.
|