tryassay 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +553 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +80 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/commands/assess.d.ts +6 -0
  7. package/dist/commands/assess.js +267 -0
  8. package/dist/commands/assess.js.map +1 -0
  9. package/dist/commands/describe.d.ts +3 -0
  10. package/dist/commands/describe.js +114 -0
  11. package/dist/commands/describe.js.map +1 -0
  12. package/dist/commands/extract.d.ts +4 -0
  13. package/dist/commands/extract.js +144 -0
  14. package/dist/commands/extract.js.map +1 -0
  15. package/dist/commands/hallucinate.d.ts +3 -0
  16. package/dist/commands/hallucinate.js +100 -0
  17. package/dist/commands/hallucinate.js.map +1 -0
  18. package/dist/commands/init.d.ts +1 -0
  19. package/dist/commands/init.js +39 -0
  20. package/dist/commands/init.js.map +1 -0
  21. package/dist/commands/regenerate.d.ts +3 -0
  22. package/dist/commands/regenerate.js +158 -0
  23. package/dist/commands/regenerate.js.map +1 -0
  24. package/dist/commands/remediate.d.ts +5 -0
  25. package/dist/commands/remediate.js +155 -0
  26. package/dist/commands/remediate.js.map +1 -0
  27. package/dist/commands/report.d.ts +3 -0
  28. package/dist/commands/report.js +84 -0
  29. package/dist/commands/report.js.map +1 -0
  30. package/dist/commands/reverse.d.ts +9 -0
  31. package/dist/commands/reverse.js +115 -0
  32. package/dist/commands/reverse.js.map +1 -0
  33. package/dist/commands/verify.d.ts +4 -0
  34. package/dist/commands/verify.js +112 -0
  35. package/dist/commands/verify.js.map +1 -0
  36. package/dist/lib/anthropic.d.ts +13 -0
  37. package/dist/lib/anthropic.js +60 -0
  38. package/dist/lib/anthropic.js.map +1 -0
  39. package/dist/lib/assessment-reporter.d.ts +5 -0
  40. package/dist/lib/assessment-reporter.js +266 -0
  41. package/dist/lib/assessment-reporter.js.map +1 -0
  42. package/dist/lib/claim-extractor.d.ts +6 -0
  43. package/dist/lib/claim-extractor.js +138 -0
  44. package/dist/lib/claim-extractor.js.map +1 -0
  45. package/dist/lib/code-verifier.d.ts +7 -0
  46. package/dist/lib/code-verifier.js +265 -0
  47. package/dist/lib/code-verifier.js.map +1 -0
  48. package/dist/lib/codebase-indexer.d.ts +15 -0
  49. package/dist/lib/codebase-indexer.js +156 -0
  50. package/dist/lib/codebase-indexer.js.map +1 -0
  51. package/dist/lib/config.d.ts +7 -0
  52. package/dist/lib/config.js +38 -0
  53. package/dist/lib/config.js.map +1 -0
  54. package/dist/lib/constraint-engine.d.ts +2 -0
  55. package/dist/lib/constraint-engine.js +337 -0
  56. package/dist/lib/constraint-engine.js.map +1 -0
  57. package/dist/lib/fs-utils.d.ts +1 -0
  58. package/dist/lib/fs-utils.js +11 -0
  59. package/dist/lib/fs-utils.js.map +1 -0
  60. package/dist/lib/guided-generator.d.ts +2 -0
  61. package/dist/lib/guided-generator.js +195 -0
  62. package/dist/lib/guided-generator.js.map +1 -0
  63. package/dist/lib/inventory-extractor.d.ts +7 -0
  64. package/dist/lib/inventory-extractor.js +238 -0
  65. package/dist/lib/inventory-extractor.js.map +1 -0
  66. package/dist/lib/prompts.d.ts +3 -0
  67. package/dist/lib/prompts.js +50 -0
  68. package/dist/lib/prompts.js.map +1 -0
  69. package/dist/lib/publisher.d.ts +2 -0
  70. package/dist/lib/publisher.js +71 -0
  71. package/dist/lib/publisher.js.map +1 -0
  72. package/dist/lib/remediation-generator.d.ts +2 -0
  73. package/dist/lib/remediation-generator.js +136 -0
  74. package/dist/lib/remediation-generator.js.map +1 -0
  75. package/dist/lib/remediator.d.ts +7 -0
  76. package/dist/lib/remediator.js +209 -0
  77. package/dist/lib/remediator.js.map +1 -0
  78. package/dist/lib/report-generator.d.ts +8 -0
  79. package/dist/lib/report-generator.js +190 -0
  80. package/dist/lib/report-generator.js.map +1 -0
  81. package/dist/lib/requirements-generator.d.ts +14 -0
  82. package/dist/lib/requirements-generator.js +311 -0
  83. package/dist/lib/requirements-generator.js.map +1 -0
  84. package/dist/lib/spec-synthesizer.d.ts +2 -0
  85. package/dist/lib/spec-synthesizer.js +136 -0
  86. package/dist/lib/spec-synthesizer.js.map +1 -0
  87. package/dist/lib/system-prompts.d.ts +12 -0
  88. package/dist/lib/system-prompts.js +254 -0
  89. package/dist/lib/system-prompts.js.map +1 -0
  90. package/dist/types.d.ts +243 -0
  91. package/dist/types.js +2 -0
  92. package/dist/types.js.map +1 -0
  93. package/package.json +49 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rock Steady Systems LLC
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,553 @@
1
+ <div align="center">
2
+
3
+ # Assay
4
+
5
+ ### AI Code Verification
6
+
7
+ **Find bugs that tests miss, linters ignore, and code review overlooks.**
8
+
9
+ *Built on the LUCID methodology — Leveraging Unverified Claims Into Deliverables.*
10
+
11
+ [![MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
12
+ [![GitHub stars](https://img.shields.io/github/stars/gtsbahamas/hallucination-reversing-system?style=social)](https://github.com/gtsbahamas/hallucination-reversing-system/stargazers)
13
+ [![Node.js 20+](https://img.shields.io/badge/Node.js-20%2B-339933?logo=node.js&logoColor=white)](https://nodejs.org/)
14
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.7-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
15
+ [![Paper](https://img.shields.io/badge/Paper-PDF-red?logo=arxiv)](arxiv-submission/main.pdf)
16
+ [![DOI](https://img.shields.io/badge/DOI-10.5281%2Fzenodo.18522644-blue)](https://doi.org/10.5281/zenodo.18522644)
17
+ [![HumanEval](https://img.shields.io/badge/HumanEval-100%25%20pass%405-brightgreen)](results/)
18
+ [![SWE--bench](https://img.shields.io/badge/SWE--bench-30.3%25%20(%2B65%25)-brightgreen)](results/)
19
+
20
+ [Website](https://tryassay.ai) | [Paper](docs/paper.md) | [Methodology Guide](docs/methodology.md) | [Prior Art](docs/prior-art.md) | [CLI Reference](#cli-reference)
21
+
22
+ </div>
23
+
24
+ ---
25
+
26
+ > **Patent Notice:** The verification methodology implemented by Assay is the subject of U.S. Provisional Patent Application No. 63/980,048, filed February 11, 2026, assigned to Rock Steady Systems LLC. The software is licensed under the MIT License. Use of the software does not grant any patent license beyond the rights conveyed by the MIT License.
27
+
28
+ ---
29
+
30
+ ## Benchmark Results
31
+
32
+ Assay was evaluated on two standard code generation benchmarks. All results validated by running real test suites, not LLM judgment.
33
+
34
+ | Benchmark | Baseline | Assay | Improvement |
35
+ |-----------|----------|-------|-------------|
36
+ | **HumanEval** pass@1 | 86.6% | **98.8%** | +14.1% |
37
+ | **HumanEval** pass@5 | -- | **100%** (164/164) | All problems solved |
38
+ | **SWE-bench** resolve@1 | 18.3% | **25.0%** | +36.4% |
39
+ | **SWE-bench** best-of-5 | -- | **30.3%** (91/300) | +65.5% |
40
+
41
+ Key finding: LLM-as-judge verification actually **performs worse** at higher k values (97.2% vs 100% for Assay at k=5) because it hallucinates false positives. Structured claim extraction avoids this failure mode.
42
+
43
+ Full benchmark data: [`results/`](results/) | [Benchmark report](https://tryassay.ai/report)
44
+
45
+ ---
46
+
47
+ ## The Problem
48
+
49
+ Every AI development workflow treats hallucination as the enemy. Spec-Driven Development writes precise specs to prevent it. Prompt engineering constrains it. Guardrails filter it out.
50
+
51
+ But three independent formal proofs have established that **hallucination cannot be eliminated** from LLMs:
52
+
53
+ - **Xu et al. (2024)** -- learning theory proof that LLMs must hallucinate as general problem solvers
54
+ - **Banerjee et al. (2024)** -- Godel's Incompleteness Theorem applied to LLM architecture
55
+ - **Karpowicz (2025)** -- impossibility theorem via mechanism design and transformer analysis
56
+
57
+ If hallucination is mathematically inevitable, suppressing it is fighting thermodynamics. **Assay harnesses it instead.**
58
+
59
+ ## The Insight
60
+
61
+ When you ask an AI to write Terms of Service for an application that doesn't exist, it doesn't say "this application doesn't exist." It **confabulates**. It invents specific capabilities, data handling procedures, user rights, performance guarantees, and limitations -- all in the authoritative, precise language that legal documents demand.
62
+
63
+ Every one of those hallucinated claims is a **testable requirement**.
64
+
65
+ A single hallucinated ToS produces 80--150 testable claims spanning functionality, security, data privacy, performance, operations, and legal compliance. No human requirements-gathering process generates this breadth in 30 seconds.
66
+
67
+ ---
68
+
69
+ ## How Assay Works
70
+
71
+ Assay implements the LUCID methodology -- a six-phase iterative cycle that converges hallucinated fiction toward verified reality:
72
+
73
+ ```
74
+ THE ASSAY CYCLE
75
+ +------------------------------------------------------+
76
+ | |
77
+ | +-----------+ +--------------+ +----------+ |
78
+ | | 1. DESCRIBE|-->|2. HALLUCINATE|-->|3. EXTRACT| |
79
+ | | | | | | | |
80
+ | | Loose idea | | AI writes | | Each | |
81
+ | | of the app | | ToS as if | | claim = | |
82
+ | | | | app is live | | testable | |
83
+ | +-----------+ +--------------+ | req | |
84
+ | +----+-----+ |
85
+ | | |
86
+ | +------------+ +-------------+ | |
87
+ | |5. CONVERGE |<---| 4. BUILD |<--------+ |
88
+ | | | | | |
89
+ | | Verify ToS | | Implement | |
90
+ | | vs reality | | until code | |
91
+ | | | | satisfies | |
92
+ | +-----+------+ +-------------+ |
93
+ | | |
94
+ | Gap found? |
95
+ | YES --> Fix --> Re-verify |
96
+ | NO --> Continue |
97
+ | | |
98
+ | +-----v--------+ |
99
+ | |6. REGENERATE | Feed verified reality back. |
100
+ | | | AI writes updated ToS. |
101
+ | | New ToS from | New hallucinations = new reqs. |
102
+ | | updated state|-----------------------------------+
103
+ | +--------------+ Loop to step 3
104
+ |
105
+ +-- EXIT: Delta between ToS and reality is acceptable
106
+ ```
107
+
108
+ ### Phase Details
109
+
110
+ | Phase | What Happens | Output |
111
+ |-------|-------------|--------|
112
+ | **1. Describe** | Give the AI a loose, intentionally incomplete description. The gaps are where hallucination does its best work. | Seed description |
113
+ | **2. Hallucinate** | AI writes Terms of Service as if the app is live in production with paying customers. Legal language forces precision -- no hedging allowed. | 400--600 lines of dense legal text |
114
+ | **3. Extract** | Parse every declarative statement into a structured, testable claim with category, severity, and traceability back to the ToS clause. | 80--150 categorized claims |
115
+ | **4. Build** | Implement the application using any methodology (TDD, agile, etc.). The ToS-derived claims are the acceptance criteria. | Working code |
116
+ | **5. Converge** | Verify every claim against the actual codebase. Assign verdicts: PASS, PARTIAL, FAIL, or N/A. Generate a gap report. | Compliance score + gap report |
117
+ | **6. Regenerate** | Feed verified reality back to the AI. It writes an updated ToS -- keeping what's real, revising what's partial, and hallucinating new features. | Next iteration's specification |
118
+
119
+ ### Convergence
120
+
121
+ With each iteration:
122
+ - The ratio of accurate-to-hallucinated claims increases
123
+ - New hallucinations become more contextually grounded
124
+ - The gap between spec and reality shrinks
125
+ - The application grows in directions the AI deems plausible for the domain
126
+
127
+ **Exit condition:** The team decides the delta is acceptable. This is a human judgment call, not an automated threshold.
128
+
129
+ ---
130
+
131
+ ## Empirical Results
132
+
133
+ Assay was applied to a production Next.js application (~30,000 lines of TypeScript, 200+ files):
134
+
135
+ | Iteration | Compliance | PASS | PARTIAL | FAIL | N/A |
136
+ |-----------|-----------|------|---------|------|-----|
137
+ | 1 | ~35% (est.) | -- | -- | -- | -- |
138
+ | 3 | 57.3% | 38 | 15 | 32 | 6 |
139
+ | 4 | 69.8% | 47 | 18 | 20 | 6 |
140
+ | 5 | 83.2% | 61 | 15 | 9 | 6 |
141
+ | **6** | **90.8%** | **68** | **12** | **5** | **6** |
142
+
143
+ ```
144
+ Compliance Over Iterations:
145
+
146
+ 100% |
147
+ 90% | * 90.8%
148
+ 80% | * 83.2%
149
+ 70% | * 69.8%
150
+ 60% | * 57.3%
151
+ 50% |
152
+ 40% |
153
+ 35% | * ~35%
154
+ +--+------+------+------+------+------+--
155
+ 1 2 3 4 5 6
156
+ Iteration
157
+ ```
158
+
159
+ **Total cost for 6 iterations: ~$17 in API tokens.**
160
+
161
+ The 5 remaining FAIL claims after convergence were all **genuine missing functionality** -- not false positives. The hallucinated ToS correctly identified requirements a production app should have.
162
+
163
+ ---
164
+
165
+ ## Why Terms of Service?
166
+
167
+ ToS is the ideal hallucination vehicle because the document format forces specificity across every dimension of a software product simultaneously:
168
+
169
+ | ToS Section | Produces | Example Claim |
170
+ |-------------|----------|---------------|
171
+ | Service Description | Feature requirements | "The Service allows batch processing of up to 10,000 records" |
172
+ | Acceptable Use | Input validation rules | "Users may not upload files exceeding 50MB" |
173
+ | Data Handling | Privacy & security requirements | "User data is encrypted at rest using AES-256" |
174
+ | Limitations | Performance boundaries | "The Service supports up to 10,000 concurrent users" |
175
+ | SLA / Uptime | Reliability requirements | "The Service maintains 99.9% uptime" |
176
+ | Termination | Account lifecycle requirements | "Data is retained for 30 days post-deletion" |
177
+ | Liability | Error handling requirements | "Graceful degradation on third-party API failure" |
178
+ | Modifications | Versioning requirements | "Users are notified 30 days before changes" |
179
+
180
+ Legal language cannot be vague. *"The Service may do things"* is not a valid legal clause. The format forces the AI to hallucinate **precisely**.
181
+
182
+ ---
183
+
184
+ ## Quick Start
185
+
186
+ ### Prerequisites
187
+
188
+ - Node.js 20+
189
+ - An [Anthropic API key](https://console.anthropic.com/) (Claude)
190
+
191
+ ### Installation
192
+
193
+ ```bash
194
+ # Clone the repository
195
+ git clone https://github.com/gtsbahamas/hallucination-reversing-system.git
196
+ cd hallucination-reversing-system
197
+
198
+ # Install dependencies
199
+ npm install
200
+
201
+ # Build the CLI
202
+ npm run build
203
+
204
+ # Set your API key
205
+ export ANTHROPIC_API_KEY="sk-ant-..."
206
+ ```
207
+
208
+ ### Run Your First Assay Cycle
209
+
210
+ ```bash
211
+ # 1. Initialize an Assay project
212
+ npx assay init
213
+
214
+ # 2. Generate a hallucinated Terms of Service
215
+ npx assay hallucinate
216
+
217
+ # 3. Extract testable claims from the hallucination
218
+ npx assay extract
219
+
220
+ # 4. Verify claims against your codebase
221
+ npx assay verify --repo /path/to/your/project
222
+
223
+ # 5. Generate a gap report
224
+ npx assay report
225
+
226
+ # 6. Generate remediation tasks for gaps
227
+ npx assay remediate --repo /path/to/your/project
228
+
229
+ # 7. After fixing gaps, regenerate for the next iteration
230
+ npx assay regenerate
231
+ ```
232
+
233
+ Each iteration stores artifacts in `.assay/iterations/{N}/`, maintaining a complete audit trail.
234
+
235
+ ---
236
+
237
+ ## MCP Server (Claude Code, Cursor, Windsurf)
238
+
239
+ Add Assay verification as a native tool in your AI editor with one config block.
240
+
241
+ ```bash
242
+ npm install -g assay-mcp
243
+ ```
244
+
245
+ **Claude Code** (`~/.claude/settings.json`):
246
+ ```json
247
+ {
248
+ "mcpServers": {
249
+ "assay": {
250
+ "command": "npx",
251
+ "args": ["-y", "assay-mcp"],
252
+ "env": { "ASSAY_API_KEY": "ak_live_your_key_here" }
253
+ }
254
+ }
255
+ }
256
+ ```
257
+
258
+ Then ask your AI assistant: *"Verify this file with Assay"* or *"Generate a verified function that parses CSV"*
259
+
260
+ Assay catches what the AI missed and shows you exactly what would have shipped without verification.
261
+
262
+ Get a free API key at [tryassay.ai](https://tryassay.ai). See [mcp-server/README.md](mcp-server/README.md) for full docs.
263
+
264
+ ---
265
+
266
+ ## GitHub Action
267
+
268
+ Add Assay verification to your CI/CD pipeline. Every PR gets a verification report as a comment.
269
+
270
+ ```yaml
271
+ - uses: gtsbahamas/hallucination-reversing-system/github-action@v0.1.1
272
+ with:
273
+ assay-api-key: ${{ secrets.ASSAY_API_KEY }}
274
+ ```
275
+
276
+ Two modes: **Assay API** (recommended, uses your Assay key) or **BYOK** (bring your own Anthropic key for self-hosted verification). See [github-action/README.md](github-action/README.md) for full docs.
277
+
278
+ ---
279
+
280
+ ## CLI Reference
281
+
282
+ | Command | Phase | Description |
283
+ |---------|-------|-------------|
284
+ | `assay init` | Setup | Initialize project configuration (name, description, tech stack, audience) |
285
+ | `assay hallucinate` | Phase 2 | Generate a hallucinated ToS/API docs/user manual from project config |
286
+ | `assay describe` | Alt. input | Fetch an existing ToS from a URL (verify an existing product) |
287
+ | `assay extract` | Phase 3 | Extract testable claims from a hallucinated or fetched document |
288
+ | `assay verify` | Phase 5 | Verify extracted claims against a codebase |
289
+ | `assay report` | Analysis | Generate a gap report from verification results |
290
+ | `assay remediate` | Convergence | Generate code-level fix tasks from gaps |
291
+ | `assay regenerate` | Phase 6 | Feed verified reality back, regenerate spec for next iteration |
292
+
293
+ ### Options
294
+
295
+ ```bash
296
+ assay hallucinate --type tos|api-docs|user-manual # Document type (default: tos)
297
+ assay extract --iteration 3 # Specify iteration (default: latest)
298
+ assay extract --source my-tos.md # Extract from a file in .assay/sources/
299
+ assay verify --repo /path/to/code --iteration 3 # Verify specific iteration
300
+ assay remediate --threshold 95 # Set compliance target (default: 95%)
301
+ assay regenerate --iteration 3 # Regenerate from specific iteration
302
+ ```
303
+
304
+ ---
305
+
306
+ ## Scoring Methodology
307
+
308
+ Assay assigns four verdicts to each claim:
309
+
310
+ | Verdict | Meaning | Score Weight |
311
+ |---------|---------|-------------|
312
+ | **PASS** | Code fully implements the claim | 1.0 |
313
+ | **PARTIAL** | Code partially implements (some aspects missing) | 0.5 |
314
+ | **FAIL** | Code does not implement or contradicts the claim | 0.0 |
315
+ | **N/A** | Cannot be verified from code (e.g., legal-only claims) | Excluded |
316
+
317
+ **Compliance score:**
318
+
319
+ ```
320
+ Score = (PASS + 0.5 * PARTIAL) / (Total - N/A) * 100
321
+ ```
322
+
323
+ Claims are categorized by type and severity:
324
+
325
+ | Category | Examples |
326
+ |----------|---------|
327
+ | `functionality` | Features, user workflows, UI components |
328
+ | `security` | Encryption, auth, access control |
329
+ | `data-privacy` | Data handling, retention, deletion |
330
+ | `operational` | Performance, uptime, monitoring |
331
+ | `legal` | Terms, disclaimers, compliance |
332
+
333
+ | Severity | Meaning |
334
+ |----------|---------|
335
+ | `critical` | Security breach or data loss if false |
336
+ | `high` | Core functionality broken if false |
337
+ | `medium` | Important but not showstopping |
338
+ | `low` | Nice-to-have or cosmetic |
339
+
340
+ ---
341
+
342
+ ## The Neuroscience Behind Assay
343
+
344
+ Assay is not an arbitrary methodology. It is grounded in three convergent lines of evidence from cognitive neuroscience:
345
+
346
+ ### 1. Transformers = Hippocampal Pattern Completion
347
+
348
+ Ramsauer et al. (2020) proved that transformer self-attention is **mathematically equivalent** to the update rule of Hopfield networks -- the same associative memory computation performed by the hippocampal CA3 network. When an LLM generates text about a nonexistent app, it performs pattern completion from partial cues, filling gaps with plausible details. This is identical to how human memory reconstructs events -- some accurate, some confabulated.
349
+
350
+ ### 2. Perception as Controlled Hallucination
351
+
352
+ The predictive processing framework (Friston, Clark, Seth) holds that the brain is a prediction machine. As Anil Seth states: *"We're all hallucinating all the time; when we agree about our hallucinations, we call it reality."* Hallucination and perception are the same generative process under different constraint levels. Assay deliberately operates unconstrained during the Hallucinate phase, then progressively introduces constraint through Converge and Regenerate.
353
+
354
+ ### 3. The REBUS Model (Relaxed Beliefs Under Psychedelics)
355
+
356
+ Carhart-Harris and Friston (2019) showed that psychedelics relax the brain's top-down constraints, enabling novel associations that rigid priors normally suppress. This maps directly to LLM temperature: higher temperature = more novel (and hallucination-prone) outputs. Assay exploits this by generating freely at "high temperature," then constraining iteratively -- just as the brain reintegrates psychedelic insights under normal conditions.
357
+
358
+ ### The Naming
359
+
360
+ The LUCID methodology is named for **lucid dreaming** -- the state where a dreamer becomes metacognitively aware they are dreaming while remaining in the dream. A lucid dreamer does not fight the dream. They participate with awareness, harvesting creative content while maintaining the ability to distinguish generated from real. Assay applies this principle to AI-generated code: harness the hallucination, don't suppress it.
361
+
362
+ ---
363
+
364
+ ## How Assay Differs From Traditional Approaches
365
+
366
+ | Approach | Hallucination Stance | Spec Source | Convergence Loop | Verification |
367
+ |----------|---------------------|-------------|------------------|-------------|
368
+ | **Spec-Driven Development** (GitHub, 2025) | Prevents | Human-written | No | Spec compliance |
369
+ | **Readme-Driven Development** (Preston-Werner, 2010) | N/A | Human-written | No | Manual |
370
+ | **Design Fiction** (Sterling, 2005) | Intentional (human) | Human fiction | Loose | Informal |
371
+ | **Vibe Coding** (Karpathy, 2025) | Tolerates | Human prompt | No | Ad hoc |
372
+ | **Protein Hallucination** (Baker, Nobel 2024) | Exploits | Neural network | Validate-only | Lab synthesis |
373
+ | **Assay** | **Exploits** | **AI-hallucinated ToS** | **Yes** | **Codebase verification** |
374
+
375
+ Assay is the only methodology that combines AI-generated specification, deliberate hallucination exploitation, and iterative convergence verification against a real codebase.
376
+
377
+ The closest analogue is David Baker's protein hallucination -- where neural network "dreams" serve as blueprints for novel biological structures. That insight earned the **2024 Nobel Prize in Chemistry**. Assay applies the identical principle to software engineering.
378
+
379
+ ---
380
+
381
+ ## Real-World Application
382
+
383
+ Assay was developed and dogfooded on production applications, including an event photography platform and an AI agent platform. The gap analysis from a real Assay iteration looks like this:
384
+
385
+ ```
386
+ Iteration 1: CrowdPics TV (112 claims extracted)
387
+ +----------------------------------+
388
+ | REAL 36 (32%) ==== |
389
+ | PARTIAL 13 (12%) == |
390
+ | HALLUCINATED 63 (56%) ====== |
391
+ +----------------------------------+
392
+
393
+ Each HALLUCINATED claim is a missing feature.
394
+ Each PARTIAL claim is incomplete work.
395
+ The gap IS the backlog.
396
+ ```
397
+
398
+ After iterative remediation and regeneration, compliance converges toward 90%+. The remaining gaps are genuine missing functionality that serves as a prioritized development roadmap.
399
+
400
+ ---
401
+
402
+ ## Project Structure
403
+
404
+ ```
405
+ hallucination-reversing-system/
406
+ ├── src/ # CLI source (TypeScript)
407
+ │ ├── cli.ts # Entry point (Commander.js)
408
+ │ ├── commands/ # One file per CLI command
409
+ │ │ ├── init.ts # Project initialization
410
+ │ │ ├── hallucinate.ts # ToS generation
411
+ │ │ ├── describe.ts # Fetch existing ToS from URL
412
+ │ │ ├── extract.ts # Claim extraction
413
+ │ │ ├── verify.ts # Codebase verification
414
+ │ │ ├── report.ts # Gap report generation
415
+ │ │ ├── remediate.ts # Fix task generation
416
+ │ │ └── regenerate.ts # Iterative regeneration
417
+ │ ├── lib/ # Core modules
418
+ │ │ ├── anthropic.ts # Claude SDK wrapper
419
+ │ │ ├── claim-extractor.ts # Claim parsing logic
420
+ │ │ ├── code-verifier.ts # Codebase verification engine
421
+ │ │ ├── codebase-indexer.ts # File tree indexing
422
+ │ │ ├── config.ts # Project configuration
423
+ │ │ ├── prompts.ts # LLM prompt templates
424
+ │ │ └── ...
425
+ │ └── types.ts # Type definitions
426
+ ├── docs/ # Documentation
427
+ │ ├── paper.md # Full research paper
428
+ │ ├── methodology.md # Methodology guide
429
+ │ └── prior-art.md # Prior art analysis
430
+ ├── applications/ # Real-world Assay applications
431
+ ├── arxiv-submission/ # Academic paper (LaTeX + PDF)
432
+ ├── chi-submission/ # CHI 2026 workshop submission
433
+ ├── index.html # Landing page (GitHub Pages)
434
+ └── .assay/ # Assay's own self-audit
435
+ └── iterations/
436
+ └── self-audit/ # Assay audited against itself
437
+ ```
438
+
439
+ ---
440
+
441
+ ## Publications
442
+
443
+ | Venue | Status | Link |
444
+ |-------|--------|------|
445
+ | **Zenodo** (peer-reviewed DOI) | Published | [10.5281/zenodo.18522644](https://doi.org/10.5281/zenodo.18522644) |
446
+ | **arXiv** | Submitted | [arxiv-submission/main.pdf](arxiv-submission/main.pdf) |
447
+ | **CHI 2026 Workshop** | In progress | [chi-submission/](chi-submission/) |
448
+
449
+ ---
450
+
451
+ ## Token Economics
452
+
453
+ Running a full Assay iteration is inexpensive:
454
+
455
+ | Phase | Input Tokens | Output Tokens | Cost (approx.) |
456
+ |-------|-------------|---------------|----------------|
457
+ | Hallucinate | ~2K | ~12K | $0.15 |
458
+ | Extract | ~15K | ~8K | $0.25 |
459
+ | Verify | ~80K | ~20K | $1.50 |
460
+ | Remediate | ~30K | ~15K | $0.60 |
461
+ | Regenerate | ~20K | ~12K | $0.40 |
462
+ | **Full iteration** | | | **~$2.90** |
463
+
464
+ A complete 6-iteration cycle that achieves 90%+ compliance costs approximately **$17 in API tokens** -- producing a verified specification with 91 claims, a gap report, and a prioritized remediation plan.
465
+
466
+ ---
467
+
468
+ ## Principles
469
+
470
+ 1. **Hallucination is signal, not noise.** The AI's confabulations reveal what a plausible version of the application looks like.
471
+ 2. **Legal language enforces precision.** ToS cannot be vague. The format forces the AI to hallucinate precisely.
472
+ 3. **The gap is the backlog.** The difference between what the ToS claims and what the code does is your task list.
473
+ 4. **Reality is the only test.** A claim is satisfied when verified against running code, not when code is written.
474
+ 5. **The loop is the methodology.** Assay is not one-shot generation. It is iterative convergence between fiction and reality.
475
+ 6. **Verification requires external ground truth.** LLMs cannot self-correct without external feedback (Huang et al., ICLR 2024). The codebase is the ground truth.
476
+
477
+ ---
478
+
479
+ ## Contributing
480
+
481
+ Contributions are welcome. Areas where help is particularly valuable:
482
+
483
+ - **Multi-document hallucination** -- Extending beyond ToS to API docs, user manuals, privacy policies, and compliance certifications simultaneously
484
+ - **Formal verification integration** -- Replacing LLM-based verification with property-based testing, model checking, or static analysis for specific claim categories
485
+ - **CI/CD integration** -- Running Assay in continuous integration pipelines for specification-drift detection
486
+ - **Language support** -- The CLI currently targets TypeScript/JavaScript codebases; other languages need codebase indexing adapters
487
+ - **Benchmarking** -- Comparing initial hallucination quality across different LLMs (Claude, GPT-4, Gemini, Llama)
488
+
489
+ ### Development
490
+
491
+ ```bash
492
+ git clone https://github.com/gtsbahamas/hallucination-reversing-system.git
493
+ cd hallucination-reversing-system
494
+ npm install
495
+ npm run dev # Watch mode (TypeScript compilation)
496
+ npm run build # Production build
497
+ ```
498
+
499
+ ---
500
+
501
+ ## FAQ
502
+
503
+ **Q: Isn't this just "make stuff up and hope for the best"?**
504
+
505
+ No. The hallucination is the *input*, not the output. Assay verifies every claim against the actual codebase. Unverified claims are surfaced as gaps. Nothing ships without evidence. The methodology is closer to the scientific method: hypothesize (hallucinate), test (verify), refine (regenerate).
506
+
507
+ **Q: Why not just write requirements manually?**
508
+
509
+ You can. But no human writes 91 testable requirements spanning functionality, security, data privacy, performance, operations, and legal compliance in 30 seconds. Assay generates comprehensive first-draft specifications at machine speed, then converges them toward reality through verification.
510
+
511
+ **Q: Does this actually work in production?**
512
+
513
+ Yes. Assay was developed while building production applications. The empirical results (57% to 91% compliance over 6 iterations) come from a real codebase with 30,000+ lines of TypeScript. The remaining gaps were genuine missing functionality, not false positives.
514
+
515
+ **Q: How is this different from vibe coding?**
516
+
517
+ Vibe coding tolerates hallucination in the *code*. Assay exploits hallucination in the *specification* and then demands rigorous verification of the code against that specification. The verification loop is the critical difference -- vibe coding has no convergence mechanism.
518
+
519
+ **Q: What models does Assay support?**
520
+
521
+ The CLI currently uses Anthropic's Claude via the official SDK. The architecture is model-agnostic -- any LLM capable of generating structured legal text and performing code analysis can be substituted.
522
+
523
+ ---
524
+
525
+ ## Citation
526
+
527
+ ```bibtex
528
+ @article{wells2026lucid,
529
+ title={LUCID: Leveraging Unverified Claims Into Deliverables},
530
+ author={Wells, Ty},
531
+ year={2026},
532
+ doi={10.5281/zenodo.18522644},
533
+ url={https://github.com/gtsbahamas/hallucination-reversing-system}
534
+ }
535
+ ```
536
+
537
+ ---
538
+
539
+ ## License
540
+
541
+ [MIT](LICENSE) -- Use it, fork it, build on it.
542
+
543
+ Copyright (c) 2026 Rock Steady Systems LLC.
544
+
545
+ ---
546
+
547
+ <div align="center">
548
+
549
+ *"Normal specification is hallucination constrained by reality. Assay is the first development tool that uses this principle: generate freely, then constrain iteratively, just as the brain does."*
550
+
551
+ **Built by [Rock Steady Systems LLC](https://tryassay.ai)**
552
+
553
+ </div>
package/dist/cli.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
package/dist/cli.js ADDED
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from 'commander';
3
+ import { initCommand } from './commands/init.js';
4
+ import { hallucinateCommand } from './commands/hallucinate.js';
5
+ import { extractCommand } from './commands/extract.js';
6
+ import { describeCommand } from './commands/describe.js';
7
+ import { verifyCommand } from './commands/verify.js';
8
+ import { reportCommand } from './commands/report.js';
9
+ import { regenerateCommand } from './commands/regenerate.js';
10
+ import { remediateCommand } from './commands/remediate.js';
11
+ import { reverseCommand } from './commands/reverse.js';
12
+ import { assessCommand } from './commands/assess.js';
13
+ const program = new Command();
14
+ program
15
+ .name('assay')
16
+ .description('Assay — AI code verification using the LUCID methodology')
17
+ .version('0.1.0');
18
+ program
19
+ .command('init')
20
+ .description('Initialize an Assay project in the current directory')
21
+ .action(initCommand);
22
+ program
23
+ .command('hallucinate')
24
+ .description('Generate a hallucinated spec from your project config')
25
+ .option('-t, --type <type>', 'Document type: tos, api-docs, user-manual', 'tos')
26
+ .action(hallucinateCommand);
27
+ program
28
+ .command('describe')
29
+ .description('Fetch an existing ToS or Privacy Policy from a URL')
30
+ .option('-u, --url <url...>', 'URL(s) to fetch')
31
+ .action(describeCommand);
32
+ program
33
+ .command('extract')
34
+ .description('Extract testable claims from a hallucinated or described document')
35
+ .option('-i, --iteration <number>', 'Iteration number (defaults to latest)')
36
+ .option('-s, --source <filename>', 'Source file from .assay/sources/ instead of iteration')
37
+ .action(extractCommand);
38
+ program
39
+ .command('verify')
40
+ .description('Verify extracted claims against a codebase')
41
+ .option('-r, --repo <path>', 'Path to the codebase to verify against', '.')
42
+ .option('-i, --iteration <number>', 'Iteration number (defaults to latest)')
43
+ .action(verifyCommand);
44
+ program
45
+ .command('report')
46
+ .description('Generate a gap report from verification results')
47
+ .option('-i, --iteration <number>', 'Iteration number (defaults to latest)')
48
+ .action(reportCommand);
49
+ program
50
+ .command('regenerate')
51
+ .description('Regenerate a hallucinated spec from prior verification results (Phase 6)')
52
+ .option('-i, --iteration <number>', 'Source iteration to regenerate from (defaults to latest)')
53
+ .action(regenerateCommand);
54
+ program
55
+ .command('remediate')
56
+ .description('Generate code-level fix tasks from verification results (converge code toward spec)')
57
+ .option('-i, --iteration <number>', 'Iteration number (defaults to latest)')
58
+ .option('-r, --repo <path>', 'Path to the codebase to remediate', '.')
59
+ .option('-t, --threshold <number>', 'Compliance threshold (default: 95)', '95')
60
+ .action(remediateCommand);
61
+ program
62
+ .command('reverse')
63
+ .description('Reverse Assay — generate code with hallucination prevention')
64
+ .option('-t, --task <task>', 'Coding task description')
65
+ .option('-f, --task-file <path>', 'Read task from file')
66
+ .option('-l, --lang <language>', 'Target language', 'typescript')
67
+ .option('-o, --output <path>', 'Output file path')
68
+ .option('-v, --verbose', 'Show detailed progress')
69
+ .action(reverseCommand);
70
+ program
71
+ .command('assess')
72
+ .description('Run autonomous LVR Loop 1 assessment against a codebase')
73
+ .argument('<target>', 'Path to codebase or GitHub repo URL')
74
+ .option('--publish', 'Upload results to tryassay.ai')
75
+ .option('--domain <domains>', 'Comma-separated domains to assess (default: all)')
76
+ .option('--concurrency <n>', 'Max parallel routes to process', '5')
77
+ .option('--api-url <url>', 'API URL for publishing', 'https://tryassay.ai')
78
+ .action(assessCommand);
79
+ program.parse();
80
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AACjD,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAC/D,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAE,cAAc,EAAE,MAAM,uBAAuB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAErD,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,OAAO,CAAC;KACb,WAAW,CAAC,0DAA0D,CAAC;KACvE,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,sDAAsD,CAAC;KACnE,MAAM,CAAC,WAAW,CAAC,CAAC;AAEvB,OAAO;KACJ,OAAO,CAAC,aAAa,CAAC;KACtB,WAAW,CAAC,uDAAuD,CAAC;KACpE,MAAM,CAAC,mBAAmB,EAAE,2CAA2C,EAAE,KAAK,CAAC;KAC/E,MAAM,CAAC,kBAAkB,CAAC,CAAC;AAE9B,OAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,oDAAoD,CAAC;KACjE,MAAM,CAAC,oBAAoB,EAAE,iBAAiB,CAAC;KAC/C,MAAM,CAAC,eAAe,CAAC,CAAC;AAE3B,OAAO;KACJ,OAAO,CAAC,SAAS,CAAC;KAClB,WAAW,CAAC,mEAAmE,CAAC;KAChF,MAAM,CAAC,0BAA0B,EAAE,uCAAuC,CAAC;KAC3E,MAAM,CAAC,yBAAyB,EAAE,uDAAuD,CAAC;KAC1F,MAAM,CAAC,cAAc,CAAC,CAAC;AAE1B,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,4CAA4C,CAAC;KACzD,MAAM,CAAC,mBAAmB,EAAE,wCAAwC,EAAE,GAAG,CAAC;KAC1E,MAAM,CAAC,0BAA0B,EAAE,uCAAuC,CAAC;KAC3E,MAAM,CAAC,aAAa,CAAC,CAAC;AAEzB,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,iDAAiD,CAAC;KAC9D,MAAM,CAAC,0BAA0B,EAAE,uCAAuC,CAAC;KAC3E,MAAM,CAAC,aAAa,CAAC,CAAC;AAEzB,OAAO;KACJ,OAAO,CAAC,YAAY,CAAC;KACrB,WAAW,CAAC,0EAA0E,CAAC;KACvF,MAAM,CAAC,0BAA0B,EAAE,0DAA0D,CAAC;KAC9F,MAAM,CAAC,iBAAiB,CAAC,CAAC;AAE7B,OAAO;KACJ,OAAO,CAAC,WAAW,CAAC;KACpB,WAAW,CAAC,qFAAqF,CAAC;KAClG,MAAM,CAAC,0BAA0B,EAAE,uCAAuC,CAAC;KAC3E,MAAM,CAAC,mBAAmB,EAAE,mCAAmC,EAAE,GAAG,CAAC;KACrE,MAAM,CAAC,0BAA0B,EAAE,oCAAoC,EAAE,IAAI,CAAC;KAC9E,MAAM,CAAC,gBAAgB,CAAC,CAAC;AAE5B,OAAO;KACJ,OAAO,CAAC,SAAS,CAAC;KAClB,WAAW,CAAC,6DAA6D,CAAC;KAC1E,MAAM,CAAC,mBAAmB,EAAE,yBAAyB,CAAC;KACtD,MAAM,CAAC,wBAAwB,EAAE,qBAAqB,CAAC;KACvD,MAAM,CAAC,uBAAuB,EAAE,iBAAiB,EAAE,YAAY,CAAC;KAChE,MAAM,CAAC,qBAAqB,EAAE,kBAAkB,CAAC;KACjD,MAAM,CAAC,eAAe,EAAE,wBAAwB,CAAC;KACjD,MAAM,CAAC,cAAc,CAAC,CAAC;AAE1B,OAAO;KACJ,OAAO,CAAC,QAAQ,CAAC;KACjB,WAAW,CAAC,yDAAyD,CAAC;KACtE,QAAQ,CAAC,UAAU,EAAE,qCAAqC,CAAC;KAC3D,MAAM,CAAC,WAAW,EAAE,+BAA+B,CAAC;KACpD,MAAM,CAAC,oBAAoB,EAAE,kDAAkD,CAAC;KAChF,MAAM,CAAC,mBAAmB,EAAE,gCAAgC,EAAE,GAAG,CAAC;KAClE,MAAM,CAAC,iBAAiB,EAAE,wBAAwB,EAAE,qBAAqB,CAAC;KAC1E,MAAM,CAAC,aAAa,CAAC,CAAC;AAEzB,OAAO,CAAC,KAAK,EAAE,CAAC"}
@@ -0,0 +1,6 @@
1
+ export declare function assessCommand(target: string, options: {
2
+ publish?: boolean;
3
+ domain?: string;
4
+ concurrency?: string;
5
+ apiUrl?: string;
6
+ }): Promise<void>;