passiveworkers 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. passiveworkers-0.1.0/LICENSE +21 -0
  2. passiveworkers-0.1.0/PKG-INFO +269 -0
  3. passiveworkers-0.1.0/README.md +209 -0
  4. passiveworkers-0.1.0/council/__init__.py +1 -0
  5. passiveworkers-0.1.0/council/artifacts.py +161 -0
  6. passiveworkers-0.1.0/council/batch.py +84 -0
  7. passiveworkers-0.1.0/council/cli.py +54 -0
  8. passiveworkers-0.1.0/council/coordinator.py +133 -0
  9. passiveworkers-0.1.0/council/crypto.py +133 -0
  10. passiveworkers-0.1.0/council/fidelity.py +197 -0
  11. passiveworkers-0.1.0/council/judge.py +393 -0
  12. passiveworkers-0.1.0/council/ledger.py +230 -0
  13. passiveworkers-0.1.0/council/library.py +431 -0
  14. passiveworkers-0.1.0/council/local.py +228 -0
  15. passiveworkers-0.1.0/council/mcp_server.py +87 -0
  16. passiveworkers-0.1.0/council/net/__init__.py +1 -0
  17. passiveworkers-0.1.0/council/net/agent.py +231 -0
  18. passiveworkers-0.1.0/council/net/app.py +390 -0
  19. passiveworkers-0.1.0/council/net/baseline.py +86 -0
  20. passiveworkers-0.1.0/council/net/config.py +79 -0
  21. passiveworkers-0.1.0/council/net/coordinator_app.py +370 -0
  22. passiveworkers-0.1.0/council/net/dashboard.py +111 -0
  23. passiveworkers-0.1.0/council/net/store.py +964 -0
  24. passiveworkers-0.1.0/council/net/submit.py +102 -0
  25. passiveworkers-0.1.0/council/operator.py +412 -0
  26. passiveworkers-0.1.0/council/research.py +520 -0
  27. passiveworkers-0.1.0/council/researcher.py +300 -0
  28. passiveworkers-0.1.0/council/retrieval.py +80 -0
  29. passiveworkers-0.1.0/council/run_demo.py +175 -0
  30. passiveworkers-0.1.0/council/sanitize.py +78 -0
  31. passiveworkers-0.1.0/council/serve.py +183 -0
  32. passiveworkers-0.1.0/council/trust.py +168 -0
  33. passiveworkers-0.1.0/council/worker.py +123 -0
  34. passiveworkers-0.1.0/passiveworkers.egg-info/PKG-INFO +269 -0
  35. passiveworkers-0.1.0/passiveworkers.egg-info/SOURCES.txt +57 -0
  36. passiveworkers-0.1.0/passiveworkers.egg-info/dependency_links.txt +1 -0
  37. passiveworkers-0.1.0/passiveworkers.egg-info/entry_points.txt +2 -0
  38. passiveworkers-0.1.0/passiveworkers.egg-info/requires.txt +27 -0
  39. passiveworkers-0.1.0/passiveworkers.egg-info/top_level.txt +1 -0
  40. passiveworkers-0.1.0/pyproject.toml +50 -0
  41. passiveworkers-0.1.0/setup.cfg +4 -0
  42. passiveworkers-0.1.0/tests/test_artifacts.py +100 -0
  43. passiveworkers-0.1.0/tests/test_assisted.py +100 -0
  44. passiveworkers-0.1.0/tests/test_citations.py +27 -0
  45. passiveworkers-0.1.0/tests/test_crypto.py +84 -0
  46. passiveworkers-0.1.0/tests/test_currency_gap.py +98 -0
  47. passiveworkers-0.1.0/tests/test_digest.py +15 -0
  48. passiveworkers-0.1.0/tests/test_fidelity.py +222 -0
  49. passiveworkers-0.1.0/tests/test_freshness.py +106 -0
  50. passiveworkers-0.1.0/tests/test_hardening.py +183 -0
  51. passiveworkers-0.1.0/tests/test_judge_json.py +25 -0
  52. passiveworkers-0.1.0/tests/test_library.py +113 -0
  53. passiveworkers-0.1.0/tests/test_ratings.py +109 -0
  54. passiveworkers-0.1.0/tests/test_recency.py +197 -0
  55. passiveworkers-0.1.0/tests/test_research.py +41 -0
  56. passiveworkers-0.1.0/tests/test_retrieval.py +38 -0
  57. passiveworkers-0.1.0/tests/test_routing.py +164 -0
  58. passiveworkers-0.1.0/tests/test_sanitize.py +25 -0
  59. passiveworkers-0.1.0/tests/test_trust.py +195 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Passive Workers contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,269 @@
1
+ Metadata-Version: 2.4
2
+ Name: passiveworkers
3
+ Version: 0.1.0
4
+ Summary: Local-first deep research: your models, your connection, your disk. Multiple local LLMs research the live web as independent analysts; a blind editor compiles a cited report.
5
+ License: MIT License
6
+
7
+ Copyright (c) 2026 Passive Workers contributors
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
26
+
27
+ Project-URL: Homepage, https://github.com/wikithoughts/passiveworkers
28
+ Project-URL: Repository, https://github.com/wikithoughts/passiveworkers
29
+ Keywords: deep-research,ollama,local-first,llm,rag,privacy,mcp
30
+ Classifier: License :: OSI Approved :: MIT License
31
+ Classifier: Programming Language :: Python :: 3
32
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
33
+ Classifier: Environment :: Console
34
+ Requires-Python: >=3.10
35
+ Description-Content-Type: text/markdown
36
+ License-File: LICENSE
37
+ Requires-Dist: requests>=2.32
38
+ Requires-Dist: ddgs>=9.0
39
+ Requires-Dist: fastapi>=0.115
40
+ Requires-Dist: uvicorn>=0.30
41
+ Requires-Dist: psutil>=7.0
42
+ Requires-Dist: pydantic>=2.0
43
+ Requires-Dist: numpy>=1.26
44
+ Provides-Extra: extract
45
+ Requires-Dist: trafilatura>=2.0; extra == "extract"
46
+ Provides-Extra: docs
47
+ Requires-Dist: pypdf>=4.0; extra == "docs"
48
+ Requires-Dist: python-docx>=1.1; extra == "docs"
49
+ Provides-Extra: mcp
50
+ Requires-Dist: mcp>=1.0; extra == "mcp"
51
+ Provides-Extra: crypto
52
+ Requires-Dist: pynacl>=1.5; extra == "crypto"
53
+ Provides-Extra: all
54
+ Requires-Dist: trafilatura>=2.0; extra == "all"
55
+ Requires-Dist: pypdf>=4.0; extra == "all"
56
+ Requires-Dist: python-docx>=1.1; extra == "all"
57
+ Requires-Dist: mcp>=1.0; extra == "all"
58
+ Requires-Dist: pynacl>=1.5; extra == "all"
59
+ Dynamic: license-file
60
+
61
+ # Passive Workers — local-first deep research
62
+
63
+ **Your models. Your connection. Your disk.** One command turns any computer with
64
+ [Ollama](https://ollama.com) into a deep-research engine: multiple local models research the
65
+ **live web** as independent analysts, and a blind editor compiles one **cited markdown report**
66
+ into `./reports/`.
67
+
68
+ ```bash
69
+ pip install '.[all]' # core + extraction + private-docs + MCP (PyPI release planned)
70
+ ollama pull qwen3:14b # any decent models you like — it auto-detects what you have
71
+ pw research "What changed in EU AI Act enforcement this quarter, and who has been fined?"
72
+ ```
73
+
74
+ ```
75
+ 🔬 Deep research (standard) — analysts: qwen3:14b, gemma3:12b, llama3.2 · editor: qwen3:14b
76
+ [1/3] qwen3:14b researching the live web…
77
+ 12 sources · 390 words · 41s
78
+ [2/3] gemma3:12b researching the live web…
79
+ ...
80
+ blind judge + editor compiling the report…
81
+ 📄 Report ready in 7.2 min · 1480 words · 31 sources → reports/2026-06-10-eu-ai-act….md
82
+ ```
83
+
84
+ Prefer a UI? **`pw serve`** → a single-user research desk at `http://127.0.0.1:8770` —
85
+ brief in, live progress, rendered report, history of everything you've researched.
86
+
87
+ ### Research your own documents too (private, local RAG)
88
+
89
+ ```bash
90
+ pw library add ~/Documents/contracts # index files or folders (PDF, Word, txt, md)
91
+ pw research "What are the renewal terms across my contracts?" --local # docs only
92
+ pw research "How do my notes compare to the latest guidance?" # docs + live web (default)
93
+ ```
94
+ Your files are chunked and embedded **locally** (Ollama `nomic-embed-text`) into
95
+ `~/.passiveworkers/library.db` — nothing is uploaded. Reports cite documents as `[L#]` and web
96
+ sources as `[S#]`, kept in separate sections.
97
+
98
+ Retrieval is state-of-the-art but lean: **hybrid** (dense embeddings ⊕ BM25 lexical, fused by
99
+ reciprocal rank fusion) so exact names/codes/numbers aren't missed; **structure-aware chunking**
100
+ that never straddles a section; **parent-window** expansion for grounding; and optional
101
+ **Contextual Retrieval** (`PW_CONTEXTUAL_CHUNKS=1`, Anthropic's technique — a small local model
102
+ situates each chunk before indexing) and **reranking** (`PW_RERANK=1`). Indexing is incremental
103
+ (unchanged files are skipped). Measure it on your own corpus with `python scripts/bench_rag.py` —
104
+ we publish what actually helps, not vendor numbers.
105
+
106
+ ### Use it from your own AI (MCP)
107
+
108
+ ```bash
109
+ pw mcp # run as an MCP server (stdio)
110
+ ```
111
+ Add to Claude Desktop's `claude_desktop_config.json` so your assistant can call the engine:
112
+ ```json
113
+ { "mcpServers": { "passive-workers": { "command": "pw", "args": ["mcp"] } } }
114
+ ```
115
+ Tools exposed: `research`, `library_search`, `library_add`. Your own agentic AI orchestrates;
116
+ our multi-model, live-web + private-library engine is the capability it reaches for.
117
+
118
+ **Recommended setup (avoids public-search rate limits, keeps queries private):**
119
+ ```bash
120
+ docker compose up -d searxng # self-hosted meta-search; pw auto-detects it
121
+ ```
122
+
123
+ ## Why this exists
124
+
125
+ - **Currency beats memory.** Frontier chatbots answer from training data that is months or
126
+ years old. This engine reads the web *now* and cites what it found. In our own blind trial,
127
+ live-web research was the only thing that beat a frontier model — both times currency mattered.
128
+ For time-sensitive questions it leads with the **freshest-dated** sources (so they survive the
129
+ cap and get read first), pins the **current year into the search query** so the engine returns
130
+ *this year's* results instead of an SEO-dominant old page, and **researches deeper on breaking
131
+ topics** — while leaving stable-fact questions in plain relevance order.
132
+ - **Private by construction.** No account, no server, no telemetry. The only thing that leaves
133
+ your machine is the web searches themselves. Reports are files on your disk.
134
+ - **Plural by design.** Different model families make *different* mistakes. A planner discovers
135
+ distinct angles (STORM-style); each analyst researches its own angle with its own model and
136
+ drafts from **full page extracts**, and a blind editor **preserves disagreement**
137
+ (agree / differ / unique sections — never a forced consensus). Question diversity × model
138
+ diversity catches what any single model hallucinates.
139
+ - **Right source for the query.** Beyond live web, academic-looking queries also hit **arXiv** and
140
+ definitional ones **Wikipedia** (`PW_SOURCE_ROUTING=off` to disable). Models stay warm between
141
+ steps (`PW_OLLAMA_KEEP_ALIVE`, default `30m`; set `0` to unload immediately) so there are no
142
+ reload stalls mid-run.
143
+ - **Free forever.** It's your hardware.
144
+
145
+ ## Honesty section (when NOT to use this)
146
+
147
+ A frontier chatbot is better when the answer lives in stable knowledge — math, code,
148
+ explanations, anything where being current doesn't matter. We measured this bluntly: local
149
+ models lose that fight 0/10 (`docs/TRIAL_RESULTS.md`). This tool wins when the answer lives
150
+ **on today's web** — prices, regulations, releases, markets, anything where "as of when?"
151
+ decides usefulness. Optional `--editor api` brings your own OpenRouter key for a frontier
152
+ editor pass over locally-gathered findings — your choice; the default is fully local.
153
+
154
+ ## Benchmark (honest, small sample)
155
+
156
+ On a 25-question subset of OpenAI's SimpleQA, the engine scored **64%** (single `qwen2.5:14b`,
157
+ snippet-only search, LLM-graded — `scripts/bench_simpleqa.py`). Context, plainly: SimpleQA rewards
158
+ short factoid recall, which is the *opposite* of what this tool is built for (multi-source reports
159
+ where currency and citation matter); the leaders' ~95% figures use bigger models, deeper agentic
160
+ loops, and more sources. We publish the number — small sample and all — because the honest floor is
161
+ more useful to you than a cherry-picked one. Run it yourself: `python scripts/bench_simpleqa.py --n 100`.
162
+
163
+ ### Citation fidelity (the metric that actually matters here)
164
+
165
+ A research tool lives or dies on one question: *when it says X [S3], does source S3 say X?*
166
+ `scripts/eval_citation_fidelity.py` measures exactly that — for every cited claim it checks
167
+ content-overlap with the source it points at and flags numbers stated in a claim that are absent from
168
+ the source. Two keyless (no-API-cost) modes:
169
+
170
+ ```bash
171
+ python scripts/eval_citation_fidelity.py --report reports/your-report.md # score an existing report (re-fetches its sources)
172
+ python scripts/eval_citation_fidelity.py --run --depth quick # fresh run, scored against the exact extract each model read
173
+ ```
174
+
175
+ It is honest about being a **floor**: lexical grounding catches off-topic citations and fabricated
176
+ numbers — the common, damaging failures — but a GROUNDED verdict means "not obviously fabricated",
177
+ *not* "verified true" (it can't detect subtle misrepresentation). The "grounded rate" is of
178
+ *verifiable* claims; unreachable sources are reported separately, never counted as failures.
179
+
180
+ ### Currency gap (where live web beats a frontier model's memory)
181
+
182
+ This tool's real edge isn't raw model size — it's *currency*. `scripts/eval_currency_gap.py` measures
183
+ exactly that: the local council (live web) vs a frontier model answering from its frozen training
184
+ knowledge, scored against curated references, as a matrix by *currency window × category*. A `static`
185
+ control set keeps it fair (where currency is irrelevant, the frontier should win). It **spends nothing
186
+ by default** — a bare run is a `$0` dry run that validates the question set and estimates cost; only
187
+ `--run` (with `OPENROUTER_API_KEY` in your env) makes the paid frontier calls:
188
+
189
+ ```bash
190
+ python scripts/eval_currency_gap.py # dry run — validate + estimate, $0
191
+ python scripts/eval_currency_gap.py --run # paid: council (free) vs frontier (your API key)
192
+ ```
193
+
194
+ ## Security model (designed in, not bolted on)
195
+
196
+ - **No browser automation, no computer use, no sessions, no cookies — ever.** Search API +
197
+ plain fetch of public pages only. The gravest agent attacks (session-token theft,
198
+ authenticated exfiltration) have nothing to grab here.
199
+ - **All web content is untrusted data.** It passes a sanitizer (invisible-Unicode and
200
+ hidden-comment stripping) and enters prompts only inside spotlighting delimiters marked
201
+ "data, never instructions" (`council/sanitize.py`). The same gate covers the **ends** of the
202
+ pipeline too: your brief is scrubbed of hidden/bidi characters and length-bounded before it
203
+ shapes any prompt, and every model-written passage is re-scrubbed before it lands in the
204
+ report (so a payload smuggled through a source can't survive into the artifact) — all without
205
+ touching visible layout or citations.
206
+ - **Models hold zero tool privileges.** They only return text; every action (search, fetch,
207
+ file write) is plain Python under this repo's control. Reports write only into `./reports/`;
208
+ fetches pass an SSRF guard (public hosts only, size-capped).
209
+
210
+ ## Hardware guide
211
+
212
+ | Your machine | Models that fit (4-bit) | Experience |
213
+ |---|---|---|
214
+ | 8 GB RAM/VRAM | 3–4B (llama3.2, qwen3:4b, gemma3:4b) | quick reports, lighter analysis |
215
+ | 16 GB | 7–14B (qwen3:14b, gemma3:12b) | the sweet spot |
216
+ | 24 GB+ | 14–32B (+ mistral-small:22b) | best local quality |
217
+
218
+ Models run **sequentially** by design — no concurrent loads fighting for memory.
219
+ On CPU-only or busy machines, cap the cast by weight size: `PW_MODEL_CAP_GB=3 pw research …`
220
+ (big models on CPU crawl at 3–6 tok/s — a small model that fits is always faster than a large
221
+ one that spills).
222
+
223
+ Page evidence uses [trafilatura](https://github.com/adbar/trafilatura) for clean main-content
224
+ extraction (with a regex fallback); full credits in [docs/PRIOR_ART.md](docs/PRIOR_ART.md).
225
+
226
+ ## Federation (experimental) — the multiplayer mode
227
+
228
+ Everything above runs on one machine. The same repo contains the network layer
229
+ (`council/net/`): connect machines in **different countries** and reports gain genuinely
230
+ different windows on the web — each node researches from its own egress and returns its **own
231
+ cited findings** (never proxied traffic), an editor merges with per-country sections, and a
232
+ non-tradeable mutual-aid credit accounts for who helped whom. It already powers a live
233
+ two-country deployment, plus typed marketplace jobs (deep research, sharded batch work with
234
+ capability matching, and **assisted** human-in-the-loop tasks — `pw tasks` / `pw accept` /
235
+ `pw deliver`: an operator consents to a bounded brief and does it with their own AI or by hand,
236
+ never our autonomous code). The asker **rates** the result (`pw rate`), building operator
237
+ **reputation** that gates higher-trust offers — while newcomers can still take ungated work. Deliverables can be **real files**, moved as content-addressed,
238
+ integrity-verified chunks (`pw deliver <task> @file <job>` → `pw fetch <job> <dir>`) — a
239
+ corrupted or swapped chunk is detected, never written. With the `[crypto]` extra, deliverables are
240
+ **signed** (the asker verifies which operator produced them) and files can be **end-to-end
241
+ encrypted** to the asker (`pw keygen` → the coordinator relays ciphertext it cannot read). For
242
+ authenticity that holds even against a hostile coordinator, the asker **pins** an operator's signing
243
+ key out of band — `pw fingerprint` (operator) → `pw trust add` (asker), or trust-on-first-use — and
244
+ `pw fetch` verifies against the pinned key, refusing a swapped one. Two principles are absolute: **operators always see and consent to the
245
+ work their machine does** (never hidden tasks), and when a job needs a real computer driven, it
246
+ is **handed to the human operator** to do with their own AI under approval — our code never
247
+ automates anyone's machine. The long game is a commons of computers doing real work for each
248
+ other — **no token, no secondary market, money only ever at the edges.** See
249
+ [docs/FEDERATION_V2.md](docs/FEDERATION_V2.md).
250
+
251
+ ## Documentation
252
+
253
+ | Doc | What |
254
+ |---|---|
255
+ | [docs/CONTEXT.md](docs/CONTEXT.md) | The why, the history, the layered vision. |
256
+ | [docs/ROADMAP.md](docs/ROADMAP.md) | Milestones + pivots (living tracker). |
257
+ | [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) | Roles, local vs networked shape, trust/security. |
258
+ | [docs/DECISIONS.md](docs/DECISIONS.md) | Settled decisions + rationale (ADR-style, D1–D16). |
259
+ | [docs/ECONOMICS.md](docs/ECONOMICS.md) | Credit, give/take, score-weighted payouts, legal posture. |
260
+ | [docs/TRIAL_RESULTS.md](docs/TRIAL_RESULTS.md) | Our blind trial vs a frontier model — losses included. |
261
+ | [docs/GLOSSARY.md](docs/GLOSSARY.md) | Terms (Council, analyst, judge, lens, credit…). |
262
+ | [docs/CONTRIBUTE_COMPUTE.md](docs/CONTRIBUTE_COMPUTE.md) | Plug a machine into the federation — what it does, earns, and why it's safe. |
263
+ | [docs/RELEASING.md](docs/RELEASING.md) | How to publish to PyPI (verified build; needs your token). |
264
+
265
+ ## Status
266
+
267
+ Young software, honestly labeled: the single-player engine works and is verified end-to-end;
268
+ the federation layer is experimental. We publish our methodology and our losses, not just wins.
269
+ Issues and PRs welcome. MIT.
@@ -0,0 +1,209 @@
1
+ # Passive Workers — local-first deep research
2
+
3
+ **Your models. Your connection. Your disk.** One command turns any computer with
4
+ [Ollama](https://ollama.com) into a deep-research engine: multiple local models research the
5
+ **live web** as independent analysts, and a blind editor compiles one **cited markdown report**
6
+ into `./reports/`.
7
+
8
+ ```bash
9
+ pip install '.[all]' # core + extraction + private-docs + MCP (PyPI release planned)
10
+ ollama pull qwen3:14b # any decent models you like — it auto-detects what you have
11
+ pw research "What changed in EU AI Act enforcement this quarter, and who has been fined?"
12
+ ```
13
+
14
+ ```
15
+ 🔬 Deep research (standard) — analysts: qwen3:14b, gemma3:12b, llama3.2 · editor: qwen3:14b
16
+ [1/3] qwen3:14b researching the live web…
17
+ 12 sources · 390 words · 41s
18
+ [2/3] gemma3:12b researching the live web…
19
+ ...
20
+ blind judge + editor compiling the report…
21
+ 📄 Report ready in 7.2 min · 1480 words · 31 sources → reports/2026-06-10-eu-ai-act….md
22
+ ```
23
+
24
+ Prefer a UI? **`pw serve`** → a single-user research desk at `http://127.0.0.1:8770` —
25
+ brief in, live progress, rendered report, history of everything you've researched.
26
+
27
+ ### Research your own documents too (private, local RAG)
28
+
29
+ ```bash
30
+ pw library add ~/Documents/contracts # index files or folders (PDF, Word, txt, md)
31
+ pw research "What are the renewal terms across my contracts?" --local # docs only
32
+ pw research "How do my notes compare to the latest guidance?" # docs + live web (default)
33
+ ```
34
+ Your files are chunked and embedded **locally** (Ollama `nomic-embed-text`) into
35
+ `~/.passiveworkers/library.db` — nothing is uploaded. Reports cite documents as `[L#]` and web
36
+ sources as `[S#]`, kept in separate sections.
37
+
38
+ Retrieval is state-of-the-art but lean: **hybrid** (dense embeddings ⊕ BM25 lexical, fused by
39
+ reciprocal rank fusion) so exact names/codes/numbers aren't missed; **structure-aware chunking**
40
+ that never straddles a section; **parent-window** expansion for grounding; and optional
41
+ **Contextual Retrieval** (`PW_CONTEXTUAL_CHUNKS=1`, Anthropic's technique — a small local model
42
+ situates each chunk before indexing) and **reranking** (`PW_RERANK=1`). Indexing is incremental
43
+ (unchanged files are skipped). Measure it on your own corpus with `python scripts/bench_rag.py` —
44
+ we publish what actually helps, not vendor numbers.
45
+
46
+ ### Use it from your own AI (MCP)
47
+
48
+ ```bash
49
+ pw mcp # run as an MCP server (stdio)
50
+ ```
51
+ Add to Claude Desktop's `claude_desktop_config.json` so your assistant can call the engine:
52
+ ```json
53
+ { "mcpServers": { "passive-workers": { "command": "pw", "args": ["mcp"] } } }
54
+ ```
55
+ Tools exposed: `research`, `library_search`, `library_add`. Your own agentic AI orchestrates;
56
+ our multi-model, live-web + private-library engine is the capability it reaches for.
57
+
58
+ **Recommended setup (avoids public-search rate limits, keeps queries private):**
59
+ ```bash
60
+ docker compose up -d searxng # self-hosted meta-search; pw auto-detects it
61
+ ```
62
+
63
+ ## Why this exists
64
+
65
+ - **Currency beats memory.** Frontier chatbots answer from training data that is months or
66
+ years old. This engine reads the web *now* and cites what it found. In our own blind trial,
67
+ live-web research was the only thing that beat a frontier model — both times currency mattered.
68
+ For time-sensitive questions it leads with the **freshest-dated** sources (so they survive the
69
+ cap and get read first), pins the **current year into the search query** so the engine returns
70
+ *this year's* results instead of an SEO-dominant old page, and **researches deeper on breaking
71
+ topics** — while leaving stable-fact questions in plain relevance order.
72
+ - **Private by construction.** No account, no server, no telemetry. The only thing that leaves
73
+ your machine is the web searches themselves. Reports are files on your disk.
74
+ - **Plural by design.** Different model families make *different* mistakes. A planner discovers
75
+ distinct angles (STORM-style); each analyst researches its own angle with its own model and
76
+ drafts from **full page extracts**, and a blind editor **preserves disagreement**
77
+ (agree / differ / unique sections — never a forced consensus). Question diversity × model
78
+ diversity catches what any single model hallucinates.
79
+ - **Right source for the query.** Beyond live web, academic-looking queries also hit **arXiv** and
80
+ definitional ones **Wikipedia** (`PW_SOURCE_ROUTING=off` to disable). Models stay warm between
81
+ steps (`PW_OLLAMA_KEEP_ALIVE`, default `30m`; set `0` to unload immediately) so there are no
82
+ reload stalls mid-run.
83
+ - **Free forever.** It's your hardware.
84
+
85
+ ## Honesty section (when NOT to use this)
86
+
87
+ A frontier chatbot is better when the answer lives in stable knowledge — math, code,
88
+ explanations, anything where being current doesn't matter. We measured this bluntly: local
89
+ models lose that fight 0/10 (`docs/TRIAL_RESULTS.md`). This tool wins when the answer lives
90
+ **on today's web** — prices, regulations, releases, markets, anything where "as of when?"
91
+ decides usefulness. Optional `--editor api` brings your own OpenRouter key for a frontier
92
+ editor pass over locally-gathered findings — your choice; the default is fully local.
93
+
94
+ ## Benchmark (honest, small sample)
95
+
96
+ On a 25-question subset of OpenAI's SimpleQA, the engine scored **64%** (single `qwen2.5:14b`,
97
+ snippet-only search, LLM-graded — `scripts/bench_simpleqa.py`). Context, plainly: SimpleQA rewards
98
+ short factoid recall, which is the *opposite* of what this tool is built for (multi-source reports
99
+ where currency and citation matter); the leaders' ~95% figures use bigger models, deeper agentic
100
+ loops, and more sources. We publish the number — small sample and all — because the honest floor is
101
+ more useful to you than a cherry-picked one. Run it yourself: `python scripts/bench_simpleqa.py --n 100`.
102
+
103
+ ### Citation fidelity (the metric that actually matters here)
104
+
105
+ A research tool lives or dies on one question: *when it says X [S3], does source S3 say X?*
106
+ `scripts/eval_citation_fidelity.py` measures exactly that — for every cited claim it checks
107
+ content-overlap with the source it points at and flags numbers stated in a claim that are absent from
108
+ the source. Two keyless (no-API-cost) modes:
109
+
110
+ ```bash
111
+ python scripts/eval_citation_fidelity.py --report reports/your-report.md # score an existing report (re-fetches its sources)
112
+ python scripts/eval_citation_fidelity.py --run --depth quick # fresh run, scored against the exact extract each model read
113
+ ```
114
+
115
+ It is honest about being a **floor**: lexical grounding catches off-topic citations and fabricated
116
+ numbers — the common, damaging failures — but a GROUNDED verdict means "not obviously fabricated",
117
+ *not* "verified true" (it can't detect subtle misrepresentation). The "grounded rate" is of
118
+ *verifiable* claims; unreachable sources are reported separately, never counted as failures.
119
+
120
+ ### Currency gap (where live web beats a frontier model's memory)
121
+
122
+ This tool's real edge isn't raw model size — it's *currency*. `scripts/eval_currency_gap.py` measures
123
+ exactly that: the local council (live web) vs a frontier model answering from its frozen training
124
+ knowledge, scored against curated references, as a matrix by *currency window × category*. A `static`
125
+ control set keeps it fair (where currency is irrelevant, the frontier should win). It **spends nothing
126
+ by default** — a bare run is a `$0` dry run that validates the question set and estimates cost; only
127
+ `--run` (with `OPENROUTER_API_KEY` in your env) makes the paid frontier calls:
128
+
129
+ ```bash
130
+ python scripts/eval_currency_gap.py # dry run — validate + estimate, $0
131
+ python scripts/eval_currency_gap.py --run # paid: council (free) vs frontier (your API key)
132
+ ```
133
+
134
+ ## Security model (designed in, not bolted on)
135
+
136
+ - **No browser automation, no computer use, no sessions, no cookies — ever.** Search API +
137
+ plain fetch of public pages only. The gravest agent attacks (session-token theft,
138
+ authenticated exfiltration) have nothing to grab here.
139
+ - **All web content is untrusted data.** It passes a sanitizer (invisible-Unicode and
140
+ hidden-comment stripping) and enters prompts only inside spotlighting delimiters marked
141
+ "data, never instructions" (`council/sanitize.py`). The same gate covers the **ends** of the
142
+ pipeline too: your brief is scrubbed of hidden/bidi characters and length-bounded before it
143
+ shapes any prompt, and every model-written passage is re-scrubbed before it lands in the
144
+ report (so a payload smuggled through a source can't survive into the artifact) — all without
145
+ touching visible layout or citations.
146
+ - **Models hold zero tool privileges.** They only return text; every action (search, fetch,
147
+ file write) is plain Python under this repo's control. Reports write only into `./reports/`;
148
+ fetches pass an SSRF guard (public hosts only, size-capped).
149
+
150
+ ## Hardware guide
151
+
152
+ | Your machine | Models that fit (4-bit) | Experience |
153
+ |---|---|---|
154
+ | 8 GB RAM/VRAM | 3–4B (llama3.2, qwen3:4b, gemma3:4b) | quick reports, lighter analysis |
155
+ | 16 GB | 7–14B (qwen3:14b, gemma3:12b) | the sweet spot |
156
+ | 24 GB+ | 14–32B (+ mistral-small:22b) | best local quality |
157
+
158
+ Models run **sequentially** by design — no concurrent loads fighting for memory.
159
+ On CPU-only or busy machines, cap the cast by weight size: `PW_MODEL_CAP_GB=3 pw research …`
160
+ (big models on CPU crawl at 3–6 tok/s — a small model that fits is always faster than a large
161
+ one that spills).
162
+
163
+ Page evidence uses [trafilatura](https://github.com/adbar/trafilatura) for clean main-content
164
+ extraction (with a regex fallback); full credits in [docs/PRIOR_ART.md](docs/PRIOR_ART.md).
165
+
166
+ ## Federation (experimental) — the multiplayer mode
167
+
168
+ Everything above runs on one machine. The same repo contains the network layer
169
+ (`council/net/`): connect machines in **different countries** and reports gain genuinely
170
+ different windows on the web — each node researches from its own egress and returns its **own
171
+ cited findings** (never proxied traffic), an editor merges with per-country sections, and a
172
+ non-tradeable mutual-aid credit accounts for who helped whom. It already powers a live
173
+ two-country deployment, plus typed marketplace jobs (deep research, sharded batch work with
174
+ capability matching, and **assisted** human-in-the-loop tasks — `pw tasks` / `pw accept` /
175
+ `pw deliver`: an operator consents to a bounded brief and does it with their own AI or by hand,
176
+ never our autonomous code). The asker **rates** the result (`pw rate`), building operator
177
+ **reputation** that gates higher-trust offers — while newcomers can still take ungated work. Deliverables can be **real files**, moved as content-addressed,
178
+ integrity-verified chunks (`pw deliver <task> @file <job>` → `pw fetch <job> <dir>`) — a
179
+ corrupted or swapped chunk is detected, never written. With the `[crypto]` extra, deliverables are
180
+ **signed** (the asker verifies which operator produced them) and files can be **end-to-end
181
+ encrypted** to the asker (`pw keygen` → the coordinator relays ciphertext it cannot read). For
182
+ authenticity that holds even against a hostile coordinator, the asker **pins** an operator's signing
183
+ key out of band — `pw fingerprint` (operator) → `pw trust add` (asker), or trust-on-first-use — and
184
+ `pw fetch` verifies against the pinned key, refusing a swapped one. Two principles are absolute: **operators always see and consent to the
185
+ work their machine does** (never hidden tasks), and when a job needs a real computer driven, it
186
+ is **handed to the human operator** to do with their own AI under approval — our code never
187
+ automates anyone's machine. The long game is a commons of computers doing real work for each
188
+ other — **no token, no secondary market, money only ever at the edges.** See
189
+ [docs/FEDERATION_V2.md](docs/FEDERATION_V2.md).
190
+
191
+ ## Documentation
192
+
193
+ | Doc | What |
194
+ |---|---|
195
+ | [docs/CONTEXT.md](docs/CONTEXT.md) | The why, the history, the layered vision. |
196
+ | [docs/ROADMAP.md](docs/ROADMAP.md) | Milestones + pivots (living tracker). |
197
+ | [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) | Roles, local vs networked shape, trust/security. |
198
+ | [docs/DECISIONS.md](docs/DECISIONS.md) | Settled decisions + rationale (ADR-style, D1–D16). |
199
+ | [docs/ECONOMICS.md](docs/ECONOMICS.md) | Credit, give/take, score-weighted payouts, legal posture. |
200
+ | [docs/TRIAL_RESULTS.md](docs/TRIAL_RESULTS.md) | Our blind trial vs a frontier model — losses included. |
201
+ | [docs/GLOSSARY.md](docs/GLOSSARY.md) | Terms (Council, analyst, judge, lens, credit…). |
202
+ | [docs/CONTRIBUTE_COMPUTE.md](docs/CONTRIBUTE_COMPUTE.md) | Plug a machine into the federation — what it does, earns, and why it's safe. |
203
+ | [docs/RELEASING.md](docs/RELEASING.md) | How to publish to PyPI (verified build; needs your token). |
204
+
205
+ ## Status
206
+
207
+ Young software, honestly labeled: the single-player engine works and is verified end-to-end;
208
+ the federation layer is experimental. We publish our methodology and our losses, not just wins.
209
+ Issues and PRs welcome. MIT.
@@ -0,0 +1 @@
1
+ """Passive Workers — the Council: mutual-aid collective-intelligence MVP."""
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ council/artifacts.py — content-addressed, chunked, integrity-verified file delivery (D22)
4
+ =========================================================================================
5
+ Real marketplace work produces FILES, not just text. This is the lean, dependency-free
6
+ codec for moving them between machines safely (FEDERATION_V2 step 3):
7
+
8
+ • split a file into fixed-size chunks, hash each (sha256) → the chunk is its own address
9
+ • a manifest records {name, size, chunk_size, root, chunks:[hashes]}; `root` = sha256 of
10
+ the ordered chunk hashes (a flat Merkle root) — the tamper-evident fingerprint
11
+ • the coordinator stores chunks as OPAQUE content-addressed blobs (dedup for free)
12
+ • the receiver fetches each chunk by hash, verifies it against the manifest, and only
13
+ reassembles if every chunk and the root check out — a corrupted or swapped chunk is
14
+ detected, not written
15
+
16
+ Encryption + producer signatures layer on top of this later (the [crypto] extra); the
17
+ content-addressing here already gives integrity. Stdlib only.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import hashlib
23
+ import pathlib
24
+
25
+ CHUNK_SIZE = 256 * 1024 # 256 KiB chunks
26
+ MAX_FILE_BYTES = 50 * 1024 * 1024 # 50 MiB per deliverable file (v1 cap)
27
+
28
+
29
+ _ARTIFACT_TAG = "__pw_artifact__"
30
+
31
+
32
+ def _h(data: bytes) -> str:
33
+ return hashlib.sha256(data).hexdigest()
34
+
35
+
36
+ def wrap_artifact(manifest: dict) -> str:
37
+ """Serialize a manifest as a file-deliverable, tagged so it can't be confused with a
38
+ user's text deliverable that merely happens to be JSON."""
39
+ import json
40
+ return json.dumps({_ARTIFACT_TAG: 1, "manifest": manifest})
41
+
42
+
43
+ def read_artifact(deliverable: str):
44
+ """Return the manifest if `deliverable` is a tagged file artifact, else None (it's text)."""
45
+ import json
46
+ try:
47
+ d = json.loads(deliverable)
48
+ except Exception:
49
+ return None
50
+ if isinstance(d, dict) and d.get(_ARTIFACT_TAG) == 1 and isinstance(d.get("manifest"), dict):
51
+ return d["manifest"]
52
+ return None
53
+
54
+
55
+ def manifest_root(chunk_hashes: list[str]) -> str:
56
+ """Flat Merkle root: sha256 over the ordered chunk hashes. Order matters (reassembly)."""
57
+ return hashlib.sha256("".join(chunk_hashes).encode()).hexdigest()
58
+
59
+
60
+ def chunk_file(path: str) -> tuple[dict, dict[str, bytes]]:
61
+ """(manifest, {hash: bytes}). Raises if the file is missing or over the size cap."""
62
+ p = pathlib.Path(path).expanduser().resolve()
63
+ if not p.is_file():
64
+ raise ValueError(f"not a file: {path}")
65
+ size = p.stat().st_size
66
+ if size > MAX_FILE_BYTES:
67
+ raise ValueError(f"file too large ({size // 1_000_000} MB > {MAX_FILE_BYTES // 1_000_000} MB)")
68
+ blobs: dict[str, bytes] = {}
69
+ order: list[str] = []
70
+ with p.open("rb") as f:
71
+ while True:
72
+ buf = f.read(CHUNK_SIZE)
73
+ if not buf:
74
+ break
75
+ h = _h(buf)
76
+ blobs[h] = buf
77
+ order.append(h)
78
+ manifest = {"name": p.name, "size": size, "chunk_size": CHUNK_SIZE,
79
+ "chunks": order, "root": manifest_root(order)}
80
+ return manifest, blobs
81
+
82
+
83
+ _HEX64 = __import__("re").compile(r"^[0-9a-f]{64}$")
84
+
85
+
86
+ def chunk_file_encrypted(path: str, seal_fn) -> tuple[dict, dict[str, bytes]]:
87
+ """Like chunk_file, but each plaintext chunk is encrypted via seal_fn(bytes)->bytes before
88
+ hashing/storing. The blob address is the hash of the CIPHERTEXT (so content-addressing and
89
+ integrity still apply); the manifest is flagged `encrypted` and its size is the PLAINTEXT
90
+ size. The coordinator only ever stores ciphertext it cannot read (D23)."""
91
+ p = pathlib.Path(path).expanduser().resolve()
92
+ if not p.is_file():
93
+ raise ValueError(f"not a file: {path}")
94
+ size = p.stat().st_size
95
+ if size > MAX_FILE_BYTES:
96
+ raise ValueError(f"file too large ({size // 1_000_000} MB > {MAX_FILE_BYTES // 1_000_000} MB)")
97
+ blobs: dict[str, bytes] = {}
98
+ order: list[str] = []
99
+ with p.open("rb") as f:
100
+ while True:
101
+ buf = f.read(CHUNK_SIZE)
102
+ if not buf:
103
+ break
104
+ ct = seal_fn(buf)
105
+ h = _h(ct)
106
+ blobs[h] = ct
107
+ order.append(h)
108
+ manifest = {"name": p.name, "size": size, "chunk_size": CHUNK_SIZE,
109
+ "chunks": order, "root": manifest_root(order), "encrypted": True}
110
+ return manifest, blobs
111
+
112
+
113
+ def verify_manifest(manifest: dict) -> bool:
114
+ """The manifest's declared root must match its ordered chunk list, chunk hashes must be
115
+ well-formed, and size must be a non-negative int (catches a doctored manifest before we
116
+ fetch anything). An empty file (size 0, no chunks) is valid."""
117
+ chunks = manifest.get("chunks")
118
+ size = manifest.get("size")
119
+ if not isinstance(chunks, list) or not isinstance(size, int) or size < 0:
120
+ return False
121
+ if not all(isinstance(h, str) and _HEX64.match(h) for h in chunks):
122
+ return False
123
+ return manifest.get("root") == manifest_root(chunks)
124
+
125
+
126
+ def reassemble(manifest: dict, fetch_chunk, out_dir: str, decrypt=None) -> pathlib.Path:
127
+ """Fetch each chunk via fetch_chunk(hash)->bytes, VERIFY each against its hash, and write
128
+ the file into out_dir only if everything checks out. fetch_chunk failures or any hash
129
+ mismatch raise — a corrupted/swapped chunk never reaches disk. Path-safe: the manifest
130
+ name is reduced to a basename inside out_dir. If the manifest is `encrypted`, `decrypt`
131
+ (bytes->bytes) is applied AFTER the ciphertext hash is verified."""
132
+ if manifest.get("encrypted") and decrypt is None:
133
+ raise ValueError("manifest is encrypted but no decrypt key provided")
134
+ if not verify_manifest(manifest):
135
+ raise ValueError("manifest root does not match its chunk list")
136
+ out = pathlib.Path(out_dir).expanduser().resolve()
137
+ out.mkdir(parents=True, exist_ok=True)
138
+ name = pathlib.Path(str(manifest.get("name", ""))).name or "deliverable.bin" # strip traversal
139
+ dest = (out / name).resolve()
140
+ # segment-aware containment (not a bare startswith, which sibling-prefixes could fool)
141
+ if dest != out and out not in dest.parents:
142
+ raise ValueError("unsafe output path")
143
+ total = 0
144
+ encrypted = bool(manifest.get("encrypted"))
145
+ with dest.open("wb") as f:
146
+ for h in manifest["chunks"]:
147
+ data = fetch_chunk(h)
148
+ if data is None or _h(data) != h: # verify the (cipher)text against its address
149
+ raise ValueError(f"chunk {h[:12]}… missing or corrupted — aborting")
150
+ if encrypted:
151
+ try:
152
+ data = decrypt(data) # decrypt AFTER integrity check
153
+ except Exception:
154
+ dest.unlink(missing_ok=True)
155
+ raise ValueError("decryption failed (wrong key or tampered chunk)")
156
+ f.write(data)
157
+ total += len(data)
158
+ if manifest.get("size") is not None and total != manifest["size"]:
159
+ dest.unlink(missing_ok=True)
160
+ raise ValueError("reassembled size mismatch")
161
+ return dest