start-vibing 4.4.2 → 4.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/template/.claude/agents/research-query.md +128 -0
- package/template/.claude/agents/research-scout.md +124 -0
- package/template/.claude/agents/research-synthesize.md +139 -0
- package/template/.claude/agents/research-verify.md +84 -0
- package/template/.claude/commands/research.md +18 -0
- package/template/.claude/hooks/format-on-edit.sh +26 -0
- package/template/.claude/hooks/git-context-session-start.sh +22 -0
- package/template/.claude/hooks/quality-gate-stop.sh +46 -0
- package/template/.claude/hooks/research-session-start.sh +4 -0
- package/template/.claude/settings.json +29 -0
- package/template/.claude/skills/research/SKILL.md +285 -0
- package/template/.claude/skills/research/references/domain-playbooks.md +604 -0
- package/template/.claude/skills/research/references/ontology-patterns.md +376 -0
- package/template/.claude/skills/research/references/research-methodology.md +794 -0
- package/template/.claude/skills/research/references/source-directory.md +280 -0
- package/template/.claude/skills/research/scripts/__pycache__/extract-claims.cpython-313.pyc +0 -0
- package/template/.claude/skills/research/scripts/check-cache.sh +129 -0
- package/template/.claude/skills/research/scripts/dedup-research.sh +80 -0
- package/template/.claude/skills/research/scripts/extract-claims.py +83 -0
- package/template/.claude/skills/research/scripts/update-index.sh +106 -0
- package/template/.claude/skills/research/scripts/verify-citations.sh +107 -0
- package/template/.claude/skills/research/templates/adr.md.tpl +66 -0
- package/template/.claude/skills/research/templates/index.md.tpl +25 -0
- package/template/.claude/skills/research/templates/moc.md.tpl +39 -0
- package/template/.claude/skills/research/templates/research-state.schema.json +64 -0
- package/template/.claude/skills/research/templates/research.md.tpl +117 -0
- package/template/.claude/agents/research-web.md +0 -164
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# Source Directory — Reference
|
|
2
|
+
|
|
3
|
+
> Per-domain canonical sources, authority levels, authenticity checks, and trap patterns. Authority Level 5 = primary spec / authoritative dataset / standards body. Level 1 = aggregator / SEO content. Use the highest-authority source available for any claim.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## 1. Software / Web Engineering
|
|
8
|
+
|
|
9
|
+
| Source | Type | Authority | Auth check | Common Trap |
|
|
10
|
+
| ----------------------------------------------- | ------------------ | ----------------- | ------------------------------------------------------------------ | ---------------------------------------------------------------------- |
|
|
11
|
+
| WHATWG Living Standards (HTML, DOM, Fetch, URL) | Spec | 5 | URL must be `spec.whatwg.org`; "Living Standard" header present | Mid-2010s WHATWG/W3C HTML5 forks — verify which body's text is current |
|
|
12
|
+
| W3C Recommendations | Spec | 5 | URL `www.w3.org/TR/`; status "Recommendation" or "Living Standard" | Notes / WGs / Drafts cited as if Recommendations |
|
|
13
|
+
| ECMA-262 (TC39) | Spec | 5 | `tc39.es/ecma262/` (latest) or `ecma-international.org` (yearly) | Stage-2 proposals cited as language features |
|
|
14
|
+
| IETF RFC | Spec | 5 | `rfc-editor.org/rfc/rfc####` or `datatracker.ietf.org` | Obsoleted RFCs (check "Obsoleted by" header) |
|
|
15
|
+
| MDN Web Docs | Vendor docs | 5 | `developer.mozilla.org`; check Browser Compat Data | Cached translations lag English by months |
|
|
16
|
+
| Microsoft Learn | Vendor docs | 5 | `learn.microsoft.com`; published-date present | "Last updated" auto-bumped; verify content actually changed |
|
|
17
|
+
| AWS / GCP / Azure docs | Vendor docs | 5 | Canonical vendor host; not a third-party mirror | Pricing pages cached aggressively; recheck on cloud.\* |
|
|
18
|
+
| GitHub source repository (official) | Primary | 5 | Verified org badge; `package.json` `repository` field matches | Forks renamed to look official |
|
|
19
|
+
| npm / PyPI / crates.io / RubyGems | Registry | 4 | Provenance attestation (npm sigstore); maintainer badge | Typosquats; abandoned packages with new "maintainers" |
|
|
20
|
+
| caniuse.com | Compat data | 4 | Sources cited (BrowserStack, vendor) | Aggregation lag of 1–4 weeks |
|
|
21
|
+
| Can I email | Compat data | 4 | Cited sources | Same lag |
|
|
22
|
+
| MDN Browser Compat Data (BCD) | Compat data | 5 | `github.com/mdn/browser-compat-data` | Subfeatures sometimes incomplete |
|
|
23
|
+
| Stack Overflow | Q&A | 3 (vote-weighted) | Score, accepted-answer flag, last-edit | Outdated accepted answers; new wrong answers below |
|
|
24
|
+
| GitHub Issues / Discussions (official repo) | Primary discussion | 4 | In-repo, by maintainers | Closed-without-resolution; comment threads cherry-picked |
|
|
25
|
+
| RFC drafts (`draft-*`) | Working doc | 3 | `datatracker.ietf.org/doc/draft-*`; expiration date | Cited as if RFC after expiration |
|
|
26
|
+
| Vendor blog | Secondary | 3 | Author bio with role; date present | Marketing prose; future-tense roadmap as fact |
|
|
27
|
+
| Dev.to / Medium / Hashnode | Tertiary | 1–2 | Author profile, sources cited | AI-generated; unverified code samples |
|
|
28
|
+
|
|
29
|
+
### Republication networks in software
|
|
30
|
+
|
|
31
|
+
The largest republication network is the SEO tutorial farm: `geeksforgeeks.org`, `tutorialspoint.com`, `freecodecamp.org` (mostly OK but variable), `medium.com/@*`, `dev.to/*`, plus dozens of `*-tutorials.dev`, `learn-*.com` and `coding-*.io` shells. They republish identical content with light paraphrase; the _first_ publication is rarely on these sites — it is on the vendor's docs, the maintainer's GitHub, or a conference talk. Always trace upstream.
|
|
32
|
+
|
|
33
|
+
A second category is the **AI-generated framework explainer** (post-2023): "Top 10 React Server Components in 2025" articles that hallucinate APIs and conflate React versions. Detect via §10.
|
|
34
|
+
|
|
35
|
+
A third is the **vendor-funded comparison post**: "Why we picked X over Y" written by a partner / investor. Check the page footer / About for sponsorship.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## 2. UX / Design
|
|
40
|
+
|
|
41
|
+
| Source | Type | Authority | Auth check | Common Trap |
|
|
42
|
+
| -------------------------------------- | ------------- | --------- | -------------------------------------------------------- | ------------------------------------------------------------------ |
|
|
43
|
+
| Baymard Institute | Research firm | 5 | `baymard.com`; benchmark report ID | Paywalled detail; cite teaser quote + report number |
|
|
44
|
+
| Nielsen Norman Group | Research firm | 5 | `nngroup.com`; author bio links to staff page | "Articles" 1995–2005 still cited; check if updated |
|
|
45
|
+
| Interaction Design Foundation (IxDF) | Education | 4 | `interaction-design.org`; primary citations in body | Encyclopedia-style entries can be too general |
|
|
46
|
+
| W3C WAI ARIA Authoring Practices (APG) | Spec/Pattern | 5 | `w3.org/WAI/ARIA/apg/`; matches ARIA 1.2/1.3 | APG patterns updated more frequently than ARIA spec — note version |
|
|
47
|
+
| WCAG (2.1 / 2.2) | Spec | 5 | `w3.org/TR/WCAG2*/`; Recommendation status | Quoting WCAG 2.0 when 2.1/2.2 supersede |
|
|
48
|
+
| Deque University / axe docs | Accessibility | 5 | `dequeuniversity.com`, `deque.com/axe` | Axe-core rule changes between versions |
|
|
49
|
+
| Material Design 3 | Design system | 5 | `m3.material.io`; component spec page | Material 2 examples mixed in; verify M3 |
|
|
50
|
+
| Apple HIG | Design system | 5 | `developer.apple.com/design/human-interface-guidelines/` | iOS-version-specific guidance |
|
|
51
|
+
| Microsoft Fluent 2 | Design system | 5 | `fluent2.microsoft.design` | Fluent 1 vs 2 terminology drift |
|
|
52
|
+
| Shopify Polaris | Design system | 5 | `polaris.shopify.com` | Internal-only patterns excluded from public |
|
|
53
|
+
| IBM Carbon | Design system | 5 | `carbondesignsystem.com` | React vs Vue vs vanilla parity gaps |
|
|
54
|
+
| Atlassian Design System | Design system | 4 | `atlassian.design` | Atlassian-product-specific guidance |
|
|
55
|
+
| Smashing Magazine | Editorial | 3 | Author bylines; references in body | Commercial sponsor disclosure variable |
|
|
56
|
+
| A List Apart | Editorial | 4 | Established editorial process | Older articles (pre-2018) often outdated |
|
|
57
|
+
| UX Planet / UX Collective (Medium) | Editorial | 1–2 | Author bio | Anyone can publish; no editorial gating |
|
|
58
|
+
| Dribbble / Behance | Visual ref | 2 | Designer profile | Aspirational mockups, not production patterns |
|
|
59
|
+
|
|
60
|
+
### Republication networks in UX
|
|
61
|
+
|
|
62
|
+
The "10 best UX patterns of 2025"-style article cluster on Medium publications (`UX Collective`, `UX Planet`, `Bootcamp`) — these are _editor-curated but not peer-reviewed_. Treat as inspiration, cross-check claims against NN/g or Baymard before citing.
|
|
63
|
+
|
|
64
|
+
Image-heavy boards (Dribbble, Behance, Pinterest) show one-off concept work, not validated production UI. Never cite a Dribbble shot as evidence of a pattern's effectiveness.
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## 3. Academic
|
|
69
|
+
|
|
70
|
+
| Source | Type | Authority | Auth check | Common Trap |
|
|
71
|
+
| ----------------------- | ------------ | --------- | ------------------------------------------------------------------------ | ---------------------------------------------------- |
|
|
72
|
+
| DOI via Crossref | Identifier | 5 | `GET https://api.crossref.org/works/{doi}` returns 200; metadata matches | Hallucinated DOIs; Crossref returns 404 |
|
|
73
|
+
| ORCID | Author ID | 5 | `https://orcid.org/{0000-0000-0000-0000}` resolves; affiliation matches | Multiple authors with same name; ORCID disambiguates |
|
|
74
|
+
| PubMed (NLM) | Index | 5 | PMID resolves; MEDLINE-indexed | "Indexed for MEDLINE" vs "as supplied by publisher" |
|
|
75
|
+
| arXiv | Preprint | 4 | `arxiv.org/abs/{id}`; check if peer-reviewed since | Withdrawn papers; v1 vs vN |
|
|
76
|
+
| bioRxiv / medRxiv | Preprint | 3 | URL resolves; "Peer review status" banner | Preprints contradicted in peer review |
|
|
77
|
+
| Semantic Scholar | Index | 4 | API at `api.semanticscholar.org/graph/v1` | Auto-extracted citations sometimes wrong |
|
|
78
|
+
| Google Scholar | Index | 3 | Search only; verify source | Includes predatory journals; no quality filter |
|
|
79
|
+
| Scopus / Web of Science | Index | 5 | Subscription DBs; CiteScore / Impact Factor | Paywalled; coverage gaps in newer fields |
|
|
80
|
+
| ResearchGate | Hosting | 2 | Not a peer-review venue | Self-uploaded copies — go to publisher of record |
|
|
81
|
+
| OSF Preregistration | Process | 5 | `osf.io/{id}`; date-stamped | Preregistration ≠ publication |
|
|
82
|
+
| Crossref Funders | Funding meta | 4 | API endpoint | Disclosure may be incomplete |
|
|
83
|
+
| Retraction Watch | Vigilance | 5 | `retractionwatch.com`; PubPeer link | Always check whether a citation has been retracted |
|
|
84
|
+
|
|
85
|
+
### DOI verification protocol
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
GET https://api.crossref.org/works/{doi}
|
|
89
|
+
→ 200 with .message.title matching cited title
|
|
90
|
+
→ 200 with .message.author array matching cited author(s)
|
|
91
|
+
→ 200 with .message.published.date-parts[0][0] matching cited year
|
|
92
|
+
→ .message.type indicates "journal-article" / "book-chapter" / etc.
|
|
93
|
+
|
|
94
|
+
If any mismatch → flag "citation-DOI mismatch"
|
|
95
|
+
If 404 → flag "fabricated-DOI" (high prior on hallucination)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
For non-DOI sources, fall back to Semantic Scholar's `paperId` lookup or a Google Scholar `"<exact title>"` search; if nothing returns, the citation is suspect.
|
|
99
|
+
|
|
100
|
+
### ORCID verification
|
|
101
|
+
|
|
102
|
+
`https://pub.orcid.org/v3.0/{orcid}/person` returns JSON with `name.given-names`, `name.family-name`, and `employments`. Match against cited author + affiliation.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## 4. Business / Market
|
|
107
|
+
|
|
108
|
+
| Source | Type | Authority | Auth check | Common Trap |
|
|
109
|
+
| ----------------------------------- | --------------- | --------- | ---------------------------------------- | ------------------------------------------------- |
|
|
110
|
+
| Gartner Magic Quadrant | Analyst report | 5 | Report ID; publication date | Quadrant positions shift annually — cite the year |
|
|
111
|
+
| Gartner Hype Cycle | Analyst report | 5 | Report ID; date | "Trough" position used as opinion vs methodology |
|
|
112
|
+
| Forrester Wave | Analyst report | 5 | Report title/year/segment | Vendor "Leader" status without scoring detail |
|
|
113
|
+
| IDC MarketScape | Analyst report | 5 | Report ID | Same as above |
|
|
114
|
+
| HBR / MIT Sloan / Strategy+Business | Editorial | 4 | Editorial process; peer commentary | Opinion pieces vs research-backed |
|
|
115
|
+
| McKinsey / BCG / Bain insights | Consultancy | 4 | On firm domain | Marketing-flavored; check for primary data |
|
|
116
|
+
| CB Insights | Market data | 4 | Subscription tier; author | Aggregated startup data has lag |
|
|
117
|
+
| PitchBook / Crunchbase | Data | 4 | Profile shows source citations | User-edited fields can be stale |
|
|
118
|
+
| a16z / Sequoia / Bessemer | VC research | 3 | On firm domain | Conflicts of interest with portfolio |
|
|
119
|
+
| Statista | Data aggregator | 3 | Cited sources at chart bottom | Aggregator — go to primary survey |
|
|
120
|
+
| Public 10-K / 10-Q (SEC) | Primary | 5 | SEC EDGAR `sec.gov/cgi-bin/browse-edgar` | Forward-looking statements not facts |
|
|
121
|
+
| Earnings call transcripts | Primary | 4 | Seeking Alpha / company IR | Unscripted commentary cherry-picked |
|
|
122
|
+
|
|
123
|
+
### Gartner / Forrester naming conventions
|
|
124
|
+
|
|
125
|
+
- **Gartner Magic Quadrant for {Category}, {Year}** — e.g., "Gartner Magic Quadrant for Cloud AI Developer Services, 2024".
|
|
126
|
+
- **Forrester Wave™: {Category}, Q{N} {Year}** — e.g., "The Forrester Wave™: Customer Data Platforms, Q3 2024".
|
|
127
|
+
|
|
128
|
+
When citing, always include the year/quarter; positions and methodology change.
|
|
129
|
+
|
|
130
|
+
### Republication networks in business
|
|
131
|
+
|
|
132
|
+
Press releases distributed via PR Newswire, BusinessWire, GlobeNewswire are republished by hundreds of outlets verbatim. A "story" appearing on 50 sites within an hour is a press release; trace to the issuer and treat as company-stated, not journalist-verified.
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## 5. News / Current Events
|
|
137
|
+
|
|
138
|
+
| Source | Type | Authority | Auth check | Common Trap |
|
|
139
|
+
| ----------------------------------- | ------------------- | --------- | --------------------------------------------- | ------------------------------------------------------------------- |
|
|
140
|
+
| Reuters | Wire | 5 | `reuters.com`; bylined | Paywall; check archive.org |
|
|
141
|
+
| Associated Press | Wire | 5 | `apnews.com` | Same |
|
|
142
|
+
| AFP | Wire | 5 | `afp.com` | French original > syndicated translations |
|
|
143
|
+
| The New York Times | Newspaper of record | 5 | `nytimes.com`; corrections page | Op-eds vs news section |
|
|
144
|
+
| Wall Street Journal | Newspaper | 5 | `wsj.com` | Editorial vs newsroom |
|
|
145
|
+
| Financial Times | Newspaper | 5 | `ft.com` | Lex column = opinion |
|
|
146
|
+
| The Economist | Magazine | 5 | `economist.com` | Anonymous bylines; institutional voice |
|
|
147
|
+
| The Washington Post | Newspaper | 5 | `washingtonpost.com` | Same as NYT |
|
|
148
|
+
| The Guardian | Newspaper | 4 | `theguardian.com`; Editor's Code of Practice | UK/US edition divergence |
|
|
149
|
+
| BBC News | Broadcaster | 4 | `bbc.com/news`; editorial guidelines | Older URLs sometimes redirect to summaries |
|
|
150
|
+
| Axios | Digital | 4 | Bylines | Smart Brevity favors summary over nuance |
|
|
151
|
+
| The Verge / Ars Technica | Tech press | 4 | `theverge.com`, `arstechnica.com` | Vendor relationships disclosed inconsistently |
|
|
152
|
+
| Wayback Machine | Archive | 5 | `web.archive.org/web/{ts}/{url}` | Some pages blocked from archiving |
|
|
153
|
+
| archive.today | Archive | 4 | `archive.ph` | Less complete than Wayback |
|
|
154
|
+
| Snopes / PolitiFact / FactCheck.org | Fact-check | 4 | IFCN signatory list | Politically scrutinized — cite for sourcing chain not verdict alone |
|
|
155
|
+
| AllSides | Bias rating | 3 | `allsides.com`; methodology page | Coarse left/center/right; one input among many |
|
|
156
|
+
| Ad Fontes Media | Bias rating | 3 | `adfontesmedia.com`; bias × reliability chart | Same |
|
|
157
|
+
|
|
158
|
+
### Wire-vs-primary chain
|
|
159
|
+
|
|
160
|
+
Newsroom story chain typically: event → wire (Reuters/AP) → newspaper of record → trade press → aggregator → social. Earlier in chain = closer to primary. When citing a fast-breaking story, prefer the wire source over downstream rewrites.
|
|
161
|
+
|
|
162
|
+
For fact-disputes: Snopes / PolitiFact / FactCheck cite their _sources_ — those are what to cite, not the fact-check verdict by itself.
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## 6. Technical Standards
|
|
167
|
+
|
|
168
|
+
| Source | Type | Authority | Auth check | Common Trap |
|
|
169
|
+
| ------------------ | ------------- | --------- | ---------------------------------------------------- | --------------------------------------------------------------------------- |
|
|
170
|
+
| ISO | Standards | 5 | `iso.org/standard/{id}.html`; status "Published" | Paywalled; cite by `ISO/IEC NNNN-N:YYYY`. Withdrawn standards still indexed |
|
|
171
|
+
| IEC | Standards | 5 | `webstore.iec.ch` | Joint ISO/IEC standards have dual numbers |
|
|
172
|
+
| IEEE | Standards | 5 | `standards.ieee.org`; status | "Active" vs "Superseded" |
|
|
173
|
+
| NIST | Pubs | 5 | `nvlpubs.nist.gov` (free); SP / FIPS / IR series | Withdrawn SPs (e.g., NIST SP 800-63 generations) |
|
|
174
|
+
| W3C | Web standards | 5 | `w3.org/TR/`; status header | Note vs Working Draft vs Recommendation |
|
|
175
|
+
| IETF | Internet | 5 | `datatracker.ietf.org`; RFC status | Obsoleted by / Updates by |
|
|
176
|
+
| OASIS | Standards | 5 | `oasis-open.org`; standards page | Consortium standards vs ISO-ratified |
|
|
177
|
+
| ECMA International | Standards | 5 | `ecma-international.org/publications-and-standards/` | ECMA-262 numbered yearly (ES2024 etc.) |
|
|
178
|
+
| Unicode Consortium | Char encoding | 5 | `unicode.org`; UAX number | Version pinning matters (UTS #39, etc.) |
|
|
179
|
+
|
|
180
|
+
### Citation format
|
|
181
|
+
|
|
182
|
+
`{Body} {Number}{:Year}, "{Title}", §{section}` — e.g., "ISO/IEC 27001:2022 §6.1.2" or "RFC 9110 §6.4".
|
|
183
|
+
|
|
184
|
+
To verify a number is _current_:
|
|
185
|
+
|
|
186
|
+
1. Visit the standard's landing page on the issuing body's site.
|
|
187
|
+
2. Check status: "Published" / "Active" / "Recommendation".
|
|
188
|
+
3. Look for "superseded by" / "obsoleted by" notices.
|
|
189
|
+
4. Cross-check the publication year matches the citation.
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## 7. Open Data
|
|
194
|
+
|
|
195
|
+
| Source | Type | Authority | Auth check | Common Trap |
|
|
196
|
+
| --------------------- | ------------ | --------- | -------------------------------------- | ----------------------------------------------- |
|
|
197
|
+
| data.gov (US) | Catalog | 5 | `data.gov`; agency-published | Catalog entries can lag underlying data |
|
|
198
|
+
| Eurostat | Statistical | 5 | `ec.europa.eu/eurostat` | EU-aggregate vs member-state divergence |
|
|
199
|
+
| World Bank Open Data | Statistical | 5 | `data.worldbank.org`; methodology link | Country classifications change |
|
|
200
|
+
| OECD | Statistical | 5 | `data.oecd.org` | Membership changes affect aggregates |
|
|
201
|
+
| UN Data | Statistical | 5 | `data.un.org` | Reporting countries vary |
|
|
202
|
+
| US BLS / BEA / Census | Statistical | 5 | `bls.gov`, `bea.gov`, `census.gov` | Series IDs change between methodology revisions |
|
|
203
|
+
| FRED (Fed St. Louis) | Series store | 5 | `fred.stlouisfed.org`; series ID | Discontinued series still indexed |
|
|
204
|
+
| GitHub awesome-lists | Aggregator | 2 | Last commit date | Often stale |
|
|
205
|
+
| Kaggle Datasets | Aggregator | 3 | Author + license | User-uploaded; verify provenance |
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## 8. Patents
|
|
210
|
+
|
|
211
|
+
| Source | Type | Authority | Auth check | Common Trap |
|
|
212
|
+
| ---------------- | ------------- | --------- | ------------------------------------- | ---------------------------------- |
|
|
213
|
+
| USPTO | Patent office | 5 | `patents.uspto.gov`; Patent Number | Application vs grant; check status |
|
|
214
|
+
| EPO Espacenet | Patent search | 5 | `worldwide.espacenet.com` | Family vs single-jurisdiction |
|
|
215
|
+
| WIPO PATENTSCOPE | Patent search | 5 | `patentscope.wipo.int` | PCT publication ≠ national grant |
|
|
216
|
+
| Google Patents | Mirror | 4 | `patents.google.com` | OCR errors; check the official PDF |
|
|
217
|
+
| Lens.org | Index | 4 | `lens.org`; aggregates USPTO/EPO/WIPO | Indexing lag |
|
|
218
|
+
|
|
219
|
+
---
|
|
220
|
+
|
|
221
|
+
## 9. Legal
|
|
222
|
+
|
|
223
|
+
| Source | Type | Authority | Auth check | Common Trap |
|
|
224
|
+
| ------------------------------- | ------------ | --------- | ------------------------------------------ | --------------------------------------- |
|
|
225
|
+
| US: Federal Register | Regulations | 5 | `federalregister.gov` | Proposed vs final rules |
|
|
226
|
+
| US: Code of Federal Regulations | Codified | 5 | `ecfr.gov` | eCFR is current; printed CFR lags |
|
|
227
|
+
| US: CourtListener / RECAP | Court docs | 5 | `courtlistener.com` | District vs circuit precedential weight |
|
|
228
|
+
| US: PACER | Court docs | 5 | `pacer.uscourts.gov` (paid) | Sealed docs not visible |
|
|
229
|
+
| US: SCOTUS | Court | 5 | `supremecourt.gov`; slip opinion | Slip opinion vs final reporter |
|
|
230
|
+
| EU: EUR-Lex | EU law | 5 | `eur-lex.europa.eu`; CELEX number | Directives need national transposition |
|
|
231
|
+
| UK: Legislation.gov.uk | UK law | 5 | `legislation.gov.uk`; "in force" indicator | Pre-Brexit retained EU law |
|
|
232
|
+
| FindLaw / Justia | Mirror | 3 | Cite primary, link to mirror | Outdated annotations |
|
|
233
|
+
| Westlaw / LexisNexis | Subscription | 5 | Subscription DBs | Paywalled; cite primary |
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## 10. AI Content Red Flags
|
|
238
|
+
|
|
239
|
+
Detectable signs that a "source" is LLM-generated and therefore not citable:
|
|
240
|
+
|
|
241
|
+
| Signal | Indicator |
|
|
242
|
+
| -------------------------------- | ------------------------------------------------------------------------------------------------------------ |
|
|
243
|
+
| Over-clean prose | No typos, no asides, no informalisms; uniformly mid-register |
|
|
244
|
+
| Hallucinated citations | Authors with no other publications; DOIs that don't resolve in Crossref; journals slightly mis-named |
|
|
245
|
+
| Perfectly balanced bullets | Every bullet identical word-count and structure; suspiciously parallel |
|
|
246
|
+
| Em-dash overuse | Em-dashes connecting clauses where humans would use commas or periods |
|
|
247
|
+
| "It's important to note that..." | This phrase, "delve into", "in today's fast-paced world", "navigate the complexities", "tapestry", "elevate" |
|
|
248
|
+
| Generic byline | "Staff writer", "Editorial Team", no headshot, no author archive |
|
|
249
|
+
| No primary quotes | No interviews, no transcripts, no direct quotation of a source person |
|
|
250
|
+
| No screenshots / figures | All-text article on a topic that should have visual evidence |
|
|
251
|
+
| Dates with no method | "Recent studies show..." with no citation |
|
|
252
|
+
| Rebranded paraphrase | Article structure matches another article exactly; words are swapped synonyms |
|
|
253
|
+
| No author expertise | Byline links to a profile with 200 articles across 30 unrelated topics in 6 months |
|
|
254
|
+
| Comments section disabled | Or filled with bot replies that match article style |
|
|
255
|
+
| Site infrastructure | Generic CMS theme, no About page with names, registration recent (`whois`) |
|
|
256
|
+
| Suspicious uniformity | All articles on the site published exactly weekly, same length, same tone |
|
|
257
|
+
|
|
258
|
+
When in doubt, run a distinctive 8–12-word string from the article through a quoted Google search. If it appears verbatim across many sites: republication. If it appears nowhere else: novel — but check author credibility before citing.
|
|
259
|
+
|
|
260
|
+
A useful heuristic: an LLM-generated article _summarizes_ but does not _report_. If the article cannot answer "where did this fact come from", it is not a source — it is an aggregation of sources, and you must trace upstream.
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## 11. Authenticity Checks Cheat Sheet
|
|
265
|
+
|
|
266
|
+
```
|
|
267
|
+
URL resolves (HTTP 200/301) → curl -I {url}
|
|
268
|
+
DOI in Crossref → curl https://api.crossref.org/works/{doi}
|
|
269
|
+
ORCID + affiliation match → curl https://pub.orcid.org/v3.0/{orcid}/person
|
|
270
|
+
Quote-in-source → fetch page; grep -F "{quote}"
|
|
271
|
+
Wayback first-seen → https://web.archive.org/web/*/{url}
|
|
272
|
+
Author publication history → Semantic Scholar / Google Scholar by name
|
|
273
|
+
Publisher reputation → Beall's List archive; DOAJ for OA journals
|
|
274
|
+
Site WHOIS → whois {domain}; check creation date
|
|
275
|
+
Republication detection → quoted Google search of distinctive phrase
|
|
276
|
+
Image provenance → reverse image search (Google / TinEye)
|
|
277
|
+
Screenshot evidence → Playwright `browser_take_screenshot`
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
The research-verify agent runs the first four on every claim. The remainder are escalations when the first four are ambiguous.
|
|
Binary file
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# check-cache.sh — slugify a topic + scan /docs/research/ for existing fresh findings.
|
|
3
|
+
# Output: JSON to stdout (or just the slug when --slugify is the only arg).
|
|
4
|
+
# Compatible with bash on Windows git-bash and Linux.
|
|
5
|
+
set -euo pipefail
|
|
6
|
+
|
|
7
|
+
ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}"
|
|
8
|
+
RESEARCH_DIR="${ROOT}/docs/research"
|
|
9
|
+
|
|
10
|
+
slugify() {
|
|
11
|
+
echo "$1" \
|
|
12
|
+
| tr '[:upper:]' '[:lower:]' \
|
|
13
|
+
| sed -E 's/[^a-z0-9]+/-/g; s/^-+//; s/-+$//' \
|
|
14
|
+
| sed -E 's/-(a|an|the|of|in|on|at|to|for|and|or|with|how|do|does|is|are|what|why|vs|via)-/-/g' \
|
|
15
|
+
| cut -c1-60
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
classify_bucket() {
|
|
19
|
+
# heuristic content-type bucket: fast | medium | slow | permanent
|
|
20
|
+
local q="$1"
|
|
21
|
+
local lower; lower=$(echo "$q" | tr '[:upper:]' '[:lower:]')
|
|
22
|
+
case "$lower" in
|
|
23
|
+
*"pricing"*|*"version"*|*"latest"*|*"2026"*|*"this week"*|*"breaking change"*|*"sota"*|*"state of the art"*) echo fast ;;
|
|
24
|
+
*"react"*|*"next.js"*|*"nextjs"*|*"vercel"*|*"openai"*|*"anthropic"*|*"llm"*|*"ai sdk"*) echo fast ;;
|
|
25
|
+
*"theorem"*|*"law of"*|*"history of"*|*"definition of"*) echo permanent ;;
|
|
26
|
+
*"prisma"*|*"cochrane"*|*"wcag"*|*"rfc"*|*"iso"*|*"ietf"*|*"w3c"*) echo slow ;;
|
|
27
|
+
*) echo medium ;;
|
|
28
|
+
esac
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
bucket_window_days() {
|
|
32
|
+
case "$1" in
|
|
33
|
+
fast) echo 30 ;;
|
|
34
|
+
medium) echo 90 ;;
|
|
35
|
+
slow) echo 365 ;;
|
|
36
|
+
permanent) echo 1825 ;;
|
|
37
|
+
*) echo 90 ;;
|
|
38
|
+
esac
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
age_status() {
|
|
42
|
+
local age=$1 bucket=$2
|
|
43
|
+
case "$bucket" in
|
|
44
|
+
fast) if [ "$age" -lt 30 ]; then echo fresh; elif [ "$age" -lt 90 ]; then echo aging; elif [ "$age" -lt 180 ]; then echo stale; else echo outdated; fi ;;
|
|
45
|
+
medium) if [ "$age" -lt 90 ]; then echo fresh; elif [ "$age" -lt 180 ]; then echo aging; elif [ "$age" -lt 365 ]; then echo stale; else echo outdated; fi ;;
|
|
46
|
+
slow) if [ "$age" -lt 365 ]; then echo fresh; elif [ "$age" -lt 730 ]; then echo aging; elif [ "$age" -lt 1825 ]; then echo stale; else echo outdated; fi ;;
|
|
47
|
+
permanent) if [ "$age" -lt 1825 ]; then echo fresh; elif [ "$age" -lt 3650 ]; then echo aging; else echo stale; fi ;;
|
|
48
|
+
esac
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
verdict_from() {
|
|
52
|
+
case "$1" in
|
|
53
|
+
fresh) echo reuse ;;
|
|
54
|
+
aging) echo delta-update ;;
|
|
55
|
+
stale) echo delta-update ;;
|
|
56
|
+
outdated) echo full-research ;;
|
|
57
|
+
*) echo full-research ;;
|
|
58
|
+
esac
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
# ---- arg parsing ----
|
|
62
|
+
TOPIC=""
|
|
63
|
+
QUESTION=""
|
|
64
|
+
ONLY_SLUGIFY=0
|
|
65
|
+
while [ $# -gt 0 ]; do
|
|
66
|
+
case "$1" in
|
|
67
|
+
--slugify) ONLY_SLUGIFY=1; shift ;;
|
|
68
|
+
--topic) TOPIC="${2:-}"; shift 2 ;;
|
|
69
|
+
--question) QUESTION="${2:-}"; shift 2 ;;
|
|
70
|
+
*) # treat positional as the question for slugify mode
|
|
71
|
+
if [ -z "$QUESTION" ]; then QUESTION="$1"; fi; shift ;;
|
|
72
|
+
esac
|
|
73
|
+
done
|
|
74
|
+
|
|
75
|
+
if [ "$ONLY_SLUGIFY" -eq 1 ]; then
|
|
76
|
+
slugify "${QUESTION:-$TOPIC}"
|
|
77
|
+
exit 0
|
|
78
|
+
fi
|
|
79
|
+
|
|
80
|
+
[ -z "$TOPIC" ] && TOPIC=$(slugify "$QUESTION")
|
|
81
|
+
|
|
82
|
+
DOC="${RESEARCH_DIR}/${TOPIC}.md"
|
|
83
|
+
BUCKET=$(classify_bucket "${QUESTION:-$TOPIC}")
|
|
84
|
+
WINDOW=$(bucket_window_days "$BUCKET")
|
|
85
|
+
|
|
86
|
+
if [ -f "$DOC" ]; then
|
|
87
|
+
# Read frontmatter date if present, else file mtime
|
|
88
|
+
DATE=$(awk '/^date:/ { print $2; exit }' "$DOC" 2>/dev/null || true)
|
|
89
|
+
if [ -z "$DATE" ]; then
|
|
90
|
+
if stat -c %Y "$DOC" >/dev/null 2>&1; then
|
|
91
|
+
MTIME=$(stat -c %Y "$DOC")
|
|
92
|
+
else
|
|
93
|
+
MTIME=$(stat -f %m "$DOC")
|
|
94
|
+
fi
|
|
95
|
+
DATE=$(date -u -d "@$MTIME" +%Y-%m-%d 2>/dev/null || date -u -r "$MTIME" +%Y-%m-%d)
|
|
96
|
+
fi
|
|
97
|
+
NOW_EPOCH=$(date -u +%s)
|
|
98
|
+
DOC_EPOCH=$(date -u -d "$DATE" +%s 2>/dev/null || date -u -j -f "%Y-%m-%d" "$DATE" +%s 2>/dev/null || echo "$NOW_EPOCH")
|
|
99
|
+
AGE_DAYS=$(( (NOW_EPOCH - DOC_EPOCH) / 86400 ))
|
|
100
|
+
STATUS=$(age_status "$AGE_DAYS" "$BUCKET")
|
|
101
|
+
VERDICT=$(verdict_from "$STATUS")
|
|
102
|
+
cat <<JSON
|
|
103
|
+
{
|
|
104
|
+
"topic_slug": "${TOPIC}",
|
|
105
|
+
"existing_doc": "docs/research/${TOPIC}.md",
|
|
106
|
+
"exists": true,
|
|
107
|
+
"doc_date": "${DATE}",
|
|
108
|
+
"age_days": ${AGE_DAYS},
|
|
109
|
+
"content_type_bucket": "${BUCKET}",
|
|
110
|
+
"freshness_window_days": ${WINDOW},
|
|
111
|
+
"freshness_status": "${STATUS}",
|
|
112
|
+
"verdict": "${VERDICT}"
|
|
113
|
+
}
|
|
114
|
+
JSON
|
|
115
|
+
else
|
|
116
|
+
cat <<JSON
|
|
117
|
+
{
|
|
118
|
+
"topic_slug": "${TOPIC}",
|
|
119
|
+
"existing_doc": null,
|
|
120
|
+
"exists": false,
|
|
121
|
+
"doc_date": null,
|
|
122
|
+
"age_days": null,
|
|
123
|
+
"content_type_bucket": "${BUCKET}",
|
|
124
|
+
"freshness_window_days": ${WINDOW},
|
|
125
|
+
"freshness_status": "missing",
|
|
126
|
+
"verdict": "full-research"
|
|
127
|
+
}
|
|
128
|
+
JSON
|
|
129
|
+
fi
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# dedup-research.sh — detect topic overlap between /docs/research/*.md docs.
|
|
3
|
+
# Computes Jaccard similarity over (a) frontmatter `concepts:` arrays and
|
|
4
|
+
# (b) Sources URL sets. Reports pairs above thresholds with a suggested
|
|
5
|
+
# action (merge | cross-link | leave).
|
|
6
|
+
#
|
|
7
|
+
# Usage:
|
|
8
|
+
# dedup-research.sh # scan all
|
|
9
|
+
# dedup-research.sh <doc.md> # find dups for one doc
|
|
10
|
+
#
|
|
11
|
+
# Output: TSV to stdout (a, b, jaccard_concepts, jaccard_sources, action)
|
|
12
|
+
set -euo pipefail
|
|
13
|
+
|
|
14
|
+
ROOT="${CLAUDE_PROJECT_DIR:-$(pwd)}"
|
|
15
|
+
DIR="${ROOT}/docs/research"
|
|
16
|
+
TARGET="${1:-}"
|
|
17
|
+
|
|
18
|
+
[ -d "$DIR" ] || { echo "no docs/research/ yet" >&2; exit 0; }
|
|
19
|
+
|
|
20
|
+
extract_concepts() {
|
|
21
|
+
awk '
|
|
22
|
+
/^---$/ { fm = !fm; next }
|
|
23
|
+
fm && /^concepts:/ { incon = 1; sub(/^concepts:[[:space:]]*/, ""); }
|
|
24
|
+
incon && /^[a-zA-Z]/ && !/^concepts:/ && !/^- / { incon = 0 }
|
|
25
|
+
incon { print }
|
|
26
|
+
' "$1" \
|
|
27
|
+
| tr -d '[]' \
|
|
28
|
+
| tr ',' '\n' \
|
|
29
|
+
| sed -E 's/^[[:space:]]*-?[[:space:]]*//; s/[[:space:]]*$//' \
|
|
30
|
+
| grep -v '^$' \
|
|
31
|
+
| sort -u
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
extract_sources() {
|
|
35
|
+
awk '/^## Sources/{flag=1; next} /^## /{flag=0} flag' "$1" \
|
|
36
|
+
| grep -oE 'https?://[^ )]+' \
|
|
37
|
+
| sed -E 's/[\)\.,]+$//' \
|
|
38
|
+
| sort -u
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
jaccard() {
|
|
42
|
+
# Jaccard over two newline-separated sets in tmpfiles $1 $2
|
|
43
|
+
local A B AB
|
|
44
|
+
A=$(wc -l < "$1"); B=$(wc -l < "$2")
|
|
45
|
+
if [ "$A" -eq 0 ] && [ "$B" -eq 0 ]; then echo 0; return; fi
|
|
46
|
+
AB=$(comm -12 "$1" "$2" | wc -l)
|
|
47
|
+
local UN=$(( A + B - AB ))
|
|
48
|
+
[ "$UN" -eq 0 ] && { echo 0; return; }
|
|
49
|
+
awk -v ab="$AB" -v un="$UN" 'BEGIN { printf("%.3f", ab/un) }'
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if [ -n "$TARGET" ]; then
|
|
53
|
+
FILES=("$TARGET")
|
|
54
|
+
COMP=$(find "$DIR" -maxdepth 1 -type f -name "*.md" ! -name "index.md" ! -path "$TARGET")
|
|
55
|
+
else
|
|
56
|
+
FILES=$(find "$DIR" -maxdepth 1 -type f -name "*.md" ! -name "index.md" | sort)
|
|
57
|
+
COMP="$FILES"
|
|
58
|
+
fi
|
|
59
|
+
|
|
60
|
+
TMP=$(mktemp -d); trap 'rm -rf "$TMP"' EXIT
|
|
61
|
+
|
|
62
|
+
printf 'a\tb\tjaccard_concepts\tjaccard_sources\taction\n'
|
|
63
|
+
|
|
64
|
+
# Pairwise scan (O(n^2) — fine for hundreds of docs)
|
|
65
|
+
for A in $FILES; do
|
|
66
|
+
extract_concepts "$A" > "$TMP/a.concepts"
|
|
67
|
+
extract_sources "$A" > "$TMP/a.sources"
|
|
68
|
+
for B in $COMP; do
|
|
69
|
+
[ "$A" = "$B" ] && continue
|
|
70
|
+
[ "$A" \> "$B" ] && continue # avoid duplicate pairs
|
|
71
|
+
extract_concepts "$B" > "$TMP/b.concepts"
|
|
72
|
+
extract_sources "$B" > "$TMP/b.sources"
|
|
73
|
+
JC=$(jaccard "$TMP/a.concepts" "$TMP/b.concepts")
|
|
74
|
+
JS=$(jaccard "$TMP/a.sources" "$TMP/b.sources")
|
|
75
|
+
ACTION="leave"
|
|
76
|
+
awk -v jc="$JC" -v js="$JS" 'BEGIN { exit !(jc >= 0.6 || js >= 0.6) }' && ACTION="merge"
|
|
77
|
+
[ "$ACTION" = "leave" ] && awk -v jc="$JC" -v js="$JS" 'BEGIN { exit !(jc >= 0.3 || js >= 0.3) }' && ACTION="cross-link"
|
|
78
|
+
printf '%s\t%s\t%s\t%s\t%s\n' "$A" "$B" "$JC" "$JS" "$ACTION"
|
|
79
|
+
done
|
|
80
|
+
done
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
extract-claims.py — pull atomic claims with citations out of a rendered
|
|
4
|
+
/docs/research/<slug>.md into JSONL.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python3 extract-claims.py <doc.md> # writes to stdout
|
|
8
|
+
python3 extract-claims.py <doc.md> -o out.jsonl
|
|
9
|
+
|
|
10
|
+
Heuristic parser. Looks for `### Finding ...` blocks and extracts:
|
|
11
|
+
- assertion (first sentence under the heading)
|
|
12
|
+
- confidence (line `Confidence: high|medium|low|conjecture`)
|
|
13
|
+
- source IDs (e.g. `[S-0007]` markers in evidence list)
|
|
14
|
+
- quote (text inside `> "..."` blockquotes following the assertion)
|
|
15
|
+
|
|
16
|
+
Round-trip rule: anything this script cannot parse is malformed. The
|
|
17
|
+
synthesize agent must keep its output parseable by this script.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
import argparse, json, re, sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
FINDING_RE = re.compile(r"^###\s+Finding\s+(?P<id>F-\d{4})\s*[-:]\s*(?P<title>.+)$")
|
|
24
|
+
CONF_RE = re.compile(r"^\*?\*?Confidence\*?\*?\s*[::]\s*(?P<c>high|medium|low|conjecture)", re.I)
|
|
25
|
+
SOURCE_RE = re.compile(r"\[(S-\d{4})\]")
|
|
26
|
+
QUOTE_RE = re.compile(r'^>\s*"(?P<q>[^"]+)"\s*\[(?P<s>S-\d{4})\]')
|
|
27
|
+
|
|
28
|
+
def parse(doc: Path):
|
|
29
|
+
lines = doc.read_text(encoding="utf-8").splitlines()
|
|
30
|
+
claims = []
|
|
31
|
+
i = 0
|
|
32
|
+
while i < len(lines):
|
|
33
|
+
m = FINDING_RE.match(lines[i].strip())
|
|
34
|
+
if not m:
|
|
35
|
+
i += 1; continue
|
|
36
|
+
fid, title = m.group("id"), m.group("title").strip()
|
|
37
|
+
# body until next ### or ## or end
|
|
38
|
+
body, j = [], i + 1
|
|
39
|
+
while j < len(lines) and not lines[j].startswith("## ") and not lines[j].startswith("### Finding "):
|
|
40
|
+
body.append(lines[j]); j += 1
|
|
41
|
+
body_text = "\n".join(body).strip()
|
|
42
|
+
# assertion = first non-empty paragraph
|
|
43
|
+
assertion = ""
|
|
44
|
+
for chunk in body_text.split("\n\n"):
|
|
45
|
+
chunk = chunk.strip()
|
|
46
|
+
if chunk and not chunk.startswith(">") and not chunk.startswith("*"):
|
|
47
|
+
assertion = chunk.split("\n")[0].strip(); break
|
|
48
|
+
# confidence
|
|
49
|
+
conf = "unknown"
|
|
50
|
+
for ln in body:
|
|
51
|
+
cm = CONF_RE.match(ln.strip())
|
|
52
|
+
if cm: conf = cm.group("c").lower(); break
|
|
53
|
+
# quotes + source ids
|
|
54
|
+
evidence = []
|
|
55
|
+
for ln in body:
|
|
56
|
+
qm = QUOTE_RE.match(ln.strip())
|
|
57
|
+
if qm:
|
|
58
|
+
evidence.append({"quote": qm.group("q"), "source_id": qm.group("s")})
|
|
59
|
+
sources = sorted({s for s in SOURCE_RE.findall(body_text)})
|
|
60
|
+
claims.append({
|
|
61
|
+
"id": fid,
|
|
62
|
+
"title": title,
|
|
63
|
+
"assertion": assertion,
|
|
64
|
+
"confidence": conf,
|
|
65
|
+
"evidence": evidence,
|
|
66
|
+
"sources": sources,
|
|
67
|
+
})
|
|
68
|
+
i = j
|
|
69
|
+
return claims
|
|
70
|
+
|
|
71
|
+
def main():
|
|
72
|
+
ap = argparse.ArgumentParser()
|
|
73
|
+
ap.add_argument("doc")
|
|
74
|
+
ap.add_argument("-o", "--out", default=None)
|
|
75
|
+
args = ap.parse_args()
|
|
76
|
+
claims = parse(Path(args.doc))
|
|
77
|
+
out = sys.stdout if not args.out else open(args.out, "w", encoding="utf-8")
|
|
78
|
+
for c in claims:
|
|
79
|
+
out.write(json.dumps(c, ensure_ascii=False) + "\n")
|
|
80
|
+
if args.out: out.close()
|
|
81
|
+
|
|
82
|
+
if __name__ == "__main__":
|
|
83
|
+
main()
|