mindforge-cc 11.5.0 → 11.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/.agent/mindforge/skill-tdd.md +53 -0
  2. package/.agent/mindforge/skills-index.md +118 -0
  3. package/.agent/mindforge/systematic-debug.md +60 -0
  4. package/.agent/skills/1password-skill/SKILL.md +156 -0
  5. package/.agent/skills/1password-skill/references/cli-examples.md +31 -0
  6. package/.agent/skills/1password-skill/references/get-started.md +21 -0
  7. package/.agent/skills/article-illustrator/SKILL.md +199 -0
  8. package/.agent/skills/article-illustrator/references/prompt-construction.md +426 -0
  9. package/.agent/skills/article-illustrator/references/style-presets.md +80 -0
  10. package/.agent/skills/article-illustrator/references/styles.md +224 -0
  11. package/.agent/skills/article-illustrator/references/usage.md +50 -0
  12. package/.agent/skills/article-illustrator/references/workflow.md +332 -0
  13. package/.agent/skills/arxiv/SKILL.md +275 -0
  14. package/.agent/skills/blogwatcher/SKILL.md +130 -0
  15. package/.agent/skills/code-wiki/SKILL.md +438 -0
  16. package/.agent/skills/code-wiki/templates/README.md +31 -0
  17. package/.agent/skills/code-wiki/templates/architecture.md +30 -0
  18. package/.agent/skills/code-wiki/templates/getting-started.md +47 -0
  19. package/.agent/skills/code-wiki/templates/module.md +38 -0
  20. package/.agent/skills/codebase-inspection/SKILL.md +109 -0
  21. package/.agent/skills/comic-creator/SKILL.md +240 -0
  22. package/.agent/skills/comic-creator/references/analysis-framework.md +176 -0
  23. package/.agent/skills/comic-creator/references/auto-selection.md +71 -0
  24. package/.agent/skills/comic-creator/references/base-prompt.md +98 -0
  25. package/.agent/skills/comic-creator/references/character-template.md +180 -0
  26. package/.agent/skills/comic-creator/references/ohmsha-guide.md +85 -0
  27. package/.agent/skills/comic-creator/references/partial-workflows.md +106 -0
  28. package/.agent/skills/comic-creator/references/storyboard-template.md +143 -0
  29. package/.agent/skills/comic-creator/references/workflow.md +401 -0
  30. package/.agent/skills/concept-diagrams/SKILL.md +355 -0
  31. package/.agent/skills/concept-diagrams/references/dashboard-patterns.md +43 -0
  32. package/.agent/skills/concept-diagrams/references/infrastructure-patterns.md +144 -0
  33. package/.agent/skills/concept-diagrams/references/physical-shape-cookbook.md +42 -0
  34. package/.agent/skills/creative-ideation/SKILL.md +144 -0
  35. package/.agent/skills/creative-ideation/references/full-prompt-library.md +110 -0
  36. package/.agent/skills/devops-cli/SKILL.md +149 -0
  37. package/.agent/skills/devops-cli/references/app-discovery.md +112 -0
  38. package/.agent/skills/devops-cli/references/authentication.md +59 -0
  39. package/.agent/skills/devops-cli/references/cli-reference.md +104 -0
  40. package/.agent/skills/devops-cli/references/running-apps.md +171 -0
  41. package/.agent/skills/devops-watchers/SKILL.md +103 -0
  42. package/.agent/skills/docker-management/SKILL.md +273 -0
  43. package/.agent/skills/domain-intel/SKILL.md +96 -0
  44. package/.agent/skills/duckduckgo-search/SKILL.md +230 -0
  45. package/.agent/skills/github-auth/SKILL.md +240 -0
  46. package/.agent/skills/github-code-review/SKILL.md +474 -0
  47. package/.agent/skills/github-code-review/references/review-output-template.md +74 -0
  48. package/.agent/skills/github-issues/SKILL.md +363 -0
  49. package/.agent/skills/github-issues/templates/bug-report.md +35 -0
  50. package/.agent/skills/github-issues/templates/feature-request.md +31 -0
  51. package/.agent/skills/github-pr-workflow/SKILL.md +360 -0
  52. package/.agent/skills/github-pr-workflow/references/ci-troubleshooting.md +183 -0
  53. package/.agent/skills/github-pr-workflow/references/conventional-commits.md +71 -0
  54. package/.agent/skills/github-pr-workflow/templates/pr-body-bugfix.md +35 -0
  55. package/.agent/skills/github-pr-workflow/templates/pr-body-feature.md +33 -0
  56. package/.agent/skills/github-repo-management/SKILL.md +509 -0
  57. package/.agent/skills/github-repo-management/references/github-api-cheatsheet.md +161 -0
  58. package/.agent/skills/godmode/SKILL.md +396 -0
  59. package/.agent/skills/godmode/references/jailbreak-templates.md +128 -0
  60. package/.agent/skills/godmode/references/refusal-detection.md +142 -0
  61. package/.agent/skills/hyperframes/SKILL.md +182 -0
  62. package/.agent/skills/hyperframes/references/cli.md +185 -0
  63. package/.agent/skills/hyperframes/references/composition.md +129 -0
  64. package/.agent/skills/hyperframes/references/features.md +289 -0
  65. package/.agent/skills/hyperframes/references/gsap.md +136 -0
  66. package/.agent/skills/hyperframes/references/troubleshooting.md +137 -0
  67. package/.agent/skills/hyperframes/references/website-to-video.md +145 -0
  68. package/.agent/skills/jupyter-live-kernel/SKILL.md +160 -0
  69. package/.agent/skills/kanban-orchestrator/SKILL.md +209 -0
  70. package/.agent/skills/kanban-worker/SKILL.md +188 -0
  71. package/.agent/skills/llm-wiki/SKILL.md +499 -0
  72. package/.agent/skills/meme-generation/SKILL.md +122 -0
  73. package/.agent/skills/node-inspect-debugger/SKILL.md +312 -0
  74. package/.agent/skills/obsidian/SKILL.md +60 -0
  75. package/.agent/skills/osint-investigation/SKILL.md +269 -0
  76. package/.agent/skills/osint-investigation/templates/source-template.md +59 -0
  77. package/.agent/skills/oss-forensics/SKILL.md +422 -0
  78. package/.agent/skills/oss-forensics/references/evidence-types.md +89 -0
  79. package/.agent/skills/oss-forensics/references/github-archive-guide.md +184 -0
  80. package/.agent/skills/oss-forensics/references/investigation-templates.md +131 -0
  81. package/.agent/skills/oss-forensics/references/recovery-techniques.md +164 -0
  82. package/.agent/skills/oss-forensics/templates/forensic-report.md +151 -0
  83. package/.agent/skills/oss-forensics/templates/malicious-package-report.md +43 -0
  84. package/.agent/skills/parallel-cli/SKILL.md +384 -0
  85. package/.agent/skills/pinggy-tunnel/SKILL.md +302 -0
  86. package/.agent/skills/pixel-art/SKILL.md +209 -0
  87. package/.agent/skills/pixel-art/references/palettes.md +49 -0
  88. package/.agent/skills/plan/SKILL.md +331 -0
  89. package/.agent/skills/polymarket/SKILL.md +75 -0
  90. package/.agent/skills/polymarket/references/api-endpoints.md +220 -0
  91. package/.agent/skills/python-debugpy/SKILL.md +368 -0
  92. package/.agent/skills/requesting-code-review/SKILL.md +273 -0
  93. package/.agent/skills/research-paper-writing/SKILL.md +2367 -0
  94. package/.agent/skills/research-paper-writing/references/autoreason-methodology.md +394 -0
  95. package/.agent/skills/research-paper-writing/references/checklists.md +434 -0
  96. package/.agent/skills/research-paper-writing/references/citation-workflow.md +563 -0
  97. package/.agent/skills/research-paper-writing/references/experiment-patterns.md +728 -0
  98. package/.agent/skills/research-paper-writing/references/human-evaluation.md +476 -0
  99. package/.agent/skills/research-paper-writing/references/paper-types.md +481 -0
  100. package/.agent/skills/research-paper-writing/references/reviewer-guidelines.md +433 -0
  101. package/.agent/skills/research-paper-writing/references/sources.md +191 -0
  102. package/.agent/skills/research-paper-writing/references/writing-guide.md +474 -0
  103. package/.agent/skills/research-paper-writing/templates/README.md +251 -0
  104. package/.agent/skills/rest-graphql-debug/SKILL.md +507 -0
  105. package/.agent/skills/s6-container-supervision/SKILL.md +171 -0
  106. package/.agent/skills/scrapling/SKILL.md +328 -0
  107. package/.agent/skills/sherlock/SKILL.md +186 -0
  108. package/.agent/skills/simplify-code/SKILL.md +168 -0
  109. package/.agent/skills/skill-authoring/SKILL.md +158 -0
  110. package/.agent/skills/spike/SKILL.md +190 -0
  111. package/.agent/skills/subagent-driven-development/SKILL.md +345 -0
  112. package/.agent/skills/subagent-driven-development/references/context-budget-discipline.md +53 -0
  113. package/.agent/skills/subagent-driven-development/references/gates-taxonomy.md +93 -0
  114. package/.agent/skills/systematic-debugging/SKILL.md +360 -0
  115. package/.agent/skills/test-driven-development/SKILL.md +336 -0
  116. package/.agent/skills/video-orchestrator/SKILL.md +194 -0
  117. package/.agent/skills/video-orchestrator/references/examples.md +227 -0
  118. package/.agent/skills/video-orchestrator/references/intake.md +166 -0
  119. package/.agent/skills/video-orchestrator/references/kanban-setup.md +278 -0
  120. package/.agent/skills/video-orchestrator/references/monitoring.md +180 -0
  121. package/.agent/skills/video-orchestrator/references/role-archetypes.md +298 -0
  122. package/.agent/skills/video-orchestrator/references/tool-matrix.md +317 -0
  123. package/.agent/skills/web-pentest/SKILL.md +332 -0
  124. package/.agent/skills/web-pentest/references/bypass-techniques.md +133 -0
  125. package/.agent/skills/web-pentest/references/exploitation-techniques.md +204 -0
  126. package/.agent/skills/web-pentest/references/scope-enforcement.md +110 -0
  127. package/.agent/skills/web-pentest/references/vuln-taxonomy.md +81 -0
  128. package/.agent/skills/web-pentest/templates/authorization.md +69 -0
  129. package/.agent/skills/web-pentest/templates/pentest-report.md +178 -0
  130. package/.claude/commands/mindforge/skill-tdd.md +53 -0
  131. package/.claude/commands/mindforge/skills-index.md +118 -0
  132. package/.claude/commands/mindforge/systematic-debug.md +60 -0
  133. package/.mindforge/config.json +2 -2
  134. package/.mindforge/memory/sync-manifest.json +1 -1
  135. package/.mindforge/skills/arxiv/SKILL.md +294 -0
  136. package/.mindforge/skills/blogwatcher/SKILL.md +147 -0
  137. package/.mindforge/skills/code-wiki/SKILL.md +457 -0
  138. package/.mindforge/skills/codebase-inspection/SKILL.md +126 -0
  139. package/.mindforge/skills/concept-diagrams/SKILL.md +373 -0
  140. package/.mindforge/skills/creative-ideation/SKILL.md +162 -0
  141. package/.mindforge/skills/domain-intel/SKILL.md +116 -0
  142. package/.mindforge/skills/duckduckgo-search/SKILL.md +249 -0
  143. package/.mindforge/skills/github-code-review/SKILL.md +493 -0
  144. package/.mindforge/skills/github-issues/SKILL.md +382 -0
  145. package/.mindforge/skills/github-pr-workflow/SKILL.md +379 -0
  146. package/.mindforge/skills/jupyter-live-kernel/SKILL.md +179 -0
  147. package/.mindforge/skills/kanban-orchestrator/SKILL.md +227 -0
  148. package/.mindforge/skills/kanban-worker/SKILL.md +206 -0
  149. package/.mindforge/skills/meme-generation/SKILL.md +141 -0
  150. package/.mindforge/skills/obsidian/SKILL.md +80 -0
  151. package/.mindforge/skills/osint-investigation/SKILL.md +288 -0
  152. package/.mindforge/skills/oss-forensics/SKILL.md +421 -0
  153. package/.mindforge/skills/pixel-art/SKILL.md +228 -0
  154. package/.mindforge/skills/plan/SKILL.md +350 -0
  155. package/.mindforge/skills/requesting-code-review/SKILL.md +292 -0
  156. package/.mindforge/skills/research-paper-writing/SKILL.md +2384 -0
  157. package/.mindforge/skills/scrapling/SKILL.md +345 -0
  158. package/.mindforge/skills/sherlock/SKILL.md +203 -0
  159. package/.mindforge/skills/simplify-code/SKILL.md +187 -0
  160. package/.mindforge/skills/spike/SKILL.md +209 -0
  161. package/.mindforge/skills/subagent-driven-development/SKILL.md +364 -0
  162. package/.mindforge/skills/systematic-debugging/SKILL.md +379 -0
  163. package/.mindforge/skills/test-driven-development/SKILL.md +355 -0
  164. package/.mindforge/skills/web-pentest/SKILL.md +327 -0
  165. package/CHANGELOG.md +88 -0
  166. package/MINDFORGE.md +3 -3
  167. package/README.md +38 -3
  168. package/RELEASENOTES.md +100 -0
  169. package/bin/dashboard/api-router.js +10 -1
  170. package/bin/governance/approve.js +5 -1
  171. package/bin/memory/federated-sync.js +11 -2
  172. package/bin/memory/knowledge-capture.js +10 -1
  173. package/bin/memory/pillar-health-tracker.js +9 -1
  174. package/bin/review/ads-engine.js +2 -2
  175. package/bin/security/trust-boundaries.js +5 -0
  176. package/docs/getting-started.md +42 -5
  177. package/package.json +1 -1
@@ -0,0 +1,269 @@
1
+ ---
2
+ name: osint-investigation
3
+ description: Public-records OSINT investigation framework — SEC EDGAR filings, USAspending contracts, Senate lobbying, OFAC sanctions, ICIJ offshore leaks, NYC property records (ACRIS), OpenCorporates registries, CourtListener court records, Wayback Machine archives, Wikipedia + Wikidata, GDELT news monitoring. Entity resolution across sources, cross-link analysis, timing correlation, evidence chains. Python stdlib only.
4
+ version: 0.1.0
5
+ ---
6
+
7
+ # OSINT Investigation — Public Records Cross-Reference
8
+
9
+ Investigative framework for public-records OSINT: government contracts,
10
+ corporate filings, lobbying, sanctions, offshore leaks, property records,
11
+ court records, web archives, knowledge bases, and global news. Resolve
12
+ entities across heterogeneous sources, build cross-links with explicit
13
+ confidence, run statistical timing tests, and produce structured evidence
14
+ chains.
15
+
16
+ **Python stdlib only.** Zero install. Works on Linux, macOS, Windows. Most
17
+ sources work with no API key (OpenCorporates has an optional free token
18
+ that raises rate limits).
19
+
20
+ to cover identity / property / litigation / archives / news sources that
21
+ the original didn't address.
22
+
23
+ ## When to use this skill
24
+
25
+ Use when the user asks for:
26
+
27
+ - "follow the money" — government contracts, lobbying → legislation, sanctions
28
+ - corporate due diligence — who controls company X, where are they
29
+ incorporated, who serves on their boards, what filings have they made
30
+ - sanctions screening — is entity X on OFAC SDN, ICIJ offshore leaks
31
+ - pay-to-play investigation — contractors with offshore ties, lobbying
32
+ clients winning awards
33
+ - property ownership — find recorded deeds/mortgages by name or address
34
+ (NYC; for other counties point users at the relevant recorder)
35
+ - litigation history — find federal + state court opinions and PACER dockets
36
+ - multi-source entity resolution where naming varies (LLC suffixes, abbreviations)
37
+ - evidence-chain construction with explicit confidence levels
38
+ - "what's been said about X" — international news (GDELT) + Wikipedia
39
+ narrative + Wayback Machine to recover dead URLs
40
+
41
+ Do NOT use this skill for:
42
+
43
+ - general web research → `web_search` / `web_extract`
44
+ - domain/infrastructure OSINT → `domain-intel` skill
45
+ - academic literature → `arxiv` skill
46
+ - social-media profile discovery → `sherlock` skill (optional)
47
+ - US **federal** campaign finance — FEC is intentionally NOT covered here
48
+ (the API is unreliable for ad-hoc contributor-name queries on the free
49
+ DEMO_KEY tier). For federal donations, point users at
50
+ https://www.fec.gov/data/ directly.
51
+
52
+ ## Workflow
53
+
54
+ The agent runs scripts via the `terminal` tool. `SKILL_DIR` is the directory
55
+ holding this SKILL.md.
56
+
57
+ ### 1. Identify which sources apply
58
+
59
+ Read the data-source wiki entries to plan the investigation:
60
+
61
+ ```
62
+ ls SKILL_DIR/references/sources/
63
+
64
+ # Federal financial / regulatory
65
+ cat SKILL_DIR/references/sources/sec-edgar.md # corporate filings
66
+ cat SKILL_DIR/references/sources/usaspending.md # federal contracts
67
+ cat SKILL_DIR/references/sources/senate-ld.md # lobbying
68
+ cat SKILL_DIR/references/sources/ofac-sdn.md # sanctions
69
+ cat SKILL_DIR/references/sources/icij-offshore.md # offshore leaks
70
+
71
+ # Identity / property / litigation / archives / news
72
+ cat SKILL_DIR/references/sources/nyc-acris.md # NYC property records
73
+ cat SKILL_DIR/references/sources/opencorporates.md # global corporate registry
74
+ cat SKILL_DIR/references/sources/courtlistener.md # court records (federal + state)
75
+ cat SKILL_DIR/references/sources/wayback.md # Wayback Machine archives
76
+ cat SKILL_DIR/references/sources/wikipedia.md # Wikipedia + Wikidata
77
+ cat SKILL_DIR/references/sources/gdelt.md # global news monitoring
78
+ ```
79
+
80
+ Each entry follows a 9-section template: summary, access, schema, coverage,
81
+ cross-reference keys, data quality, acquisition, legal, references.
82
+
83
+ The **cross-reference potential** section maps join keys between sources — read
84
+ those first to pick the right pair.
85
+
86
+ ### 2. Acquire data
87
+
88
+ Each source has a stdlib-only fetch script in `SKILL_DIR/scripts/`:
89
+
90
+ **Federal financial / regulatory**
91
+
92
+ ```bash
93
+ # SEC EDGAR filings (corporate disclosures)
94
+ python3 SKILL_DIR/scripts/fetch_sec_edgar.py --cik 0000320193 \
95
+ --types 10-K,10-Q --out data/edgar_filings.csv
96
+
97
+ # USAspending federal contracts
98
+ python3 SKILL_DIR/scripts/fetch_usaspending.py --recipient "EXAMPLE CORP" \
99
+ --fy 2024 --out data/contracts.csv
100
+
101
+ # Senate LD-1 / LD-2 lobbying disclosures
102
+ python3 SKILL_DIR/scripts/fetch_senate_ld.py --client "EXAMPLE CORP" \
103
+ --year 2024 --out data/lobbying.csv
104
+
105
+ # OFAC SDN sanctions list (full snapshot)
106
+ python3 SKILL_DIR/scripts/fetch_ofac_sdn.py --out data/ofac_sdn.csv
107
+
108
+ # ICIJ Offshore Leaks — downloads ~70 MB bulk CSV on first use,
109
+ # then searches it locally. Cached for 30 days under
110
+ # $HERMES_OSINT_CACHE/icij/ (default: ~/.cache/hermes-osint/icij/).
111
+ python3 SKILL_DIR/scripts/fetch_icij_offshore.py --entity "EXAMPLE CORP" \
112
+ --out data/icij.csv
113
+ ```
114
+
115
+ **Identity / property / litigation / archives / news**
116
+
117
+ ```bash
118
+ # NYC property records (deeds, mortgages, liens) — ACRIS via Socrata
119
+ python3 SKILL_DIR/scripts/fetch_nyc_acris.py --name "SMITH, JOHN" \
120
+ --out data/acris.csv
121
+ python3 SKILL_DIR/scripts/fetch_nyc_acris.py --address "571 HUDSON" \
122
+ --out data/acris_addr.csv
123
+
124
+ # OpenCorporates — 130+ jurisdiction corporate registry
125
+ # (free token required; set OPENCORPORATES_API_TOKEN or pass --token)
126
+ python3 SKILL_DIR/scripts/fetch_opencorporates.py --query "Example Corp" \
127
+ --jurisdiction us_ny --out data/opencorporates.csv
128
+
129
+ # CourtListener — federal + state court opinions, PACER dockets
130
+ python3 SKILL_DIR/scripts/fetch_courtlistener.py --query "Smith v. Example Corp" \
131
+ --type opinions --out data/courts.csv
132
+
133
+ # Wayback Machine — historical web captures
134
+ python3 SKILL_DIR/scripts/fetch_wayback.py --url "example.com" \
135
+ --match host --collapse digest --out data/wayback.csv
136
+
137
+ # Wikipedia + Wikidata — narrative bio + structured facts
138
+ # Set HERMES_OSINT_UA=your-app/1.0 (your@email) to identify yourself
139
+ python3 SKILL_DIR/scripts/fetch_wikipedia.py --query "Bill Gates" \
140
+ --out data/wp.csv
141
+
142
+ # GDELT — global news in 100+ languages, ~2015→present
143
+ python3 SKILL_DIR/scripts/fetch_gdelt.py --query '"Example Corp"' \
144
+ --timespan 1y --out data/gdelt.csv
145
+ ```
146
+
147
+ All outputs are normalized CSV with a header row. Re-run scripts idempotently.
148
+
149
+ When a private individual won't be in a source (e.g. SEC EDGAR for a non-public-
150
+ company person, USAspending for someone who isn't a federal contractor, Senate
151
+ LDA for someone who isn't a lobbying client), the script returns 0 rows with a
152
+ clear warning rather than silently writing an empty CSV. EDGAR specifically
153
+ flags when the company-name resolver matched an individual Form 3/4/5 filer
154
+ rather than a corporate registrant.
155
+
156
+ Rate-limit notes are in each source's wiki entry. Default fetchers sleep
157
+ politely between paginated requests. **API keys raise rate limits** for
158
+ sources that support them (`SEC_USER_AGENT`, `SENATE_LDA_TOKEN`,
159
+ `OPENCORPORATES_API_TOKEN`, `COURTLISTENER_TOKEN`). All scripts surface
160
+ 429 responses immediately with the upstream's quota message so the user
161
+ knows to slow down or supply a key.
162
+
163
+ ### 3. Resolve entities across sources
164
+
165
+ Normalize names and find matches between two CSV files:
166
+
167
+ ```bash
168
+ # Match lobbying clients (Senate LDA) against contract recipients (USAspending)
169
+ python3 SKILL_DIR/scripts/entity_resolution.py \
170
+ --left data/lobbying.csv --left-name-col client_name \
171
+ --right data/contracts.csv --right-name-col recipient_name \
172
+ --out data/cross_links.csv
173
+ ```
174
+
175
+ Three matching tiers with explicit confidence:
176
+
177
+ | Tier | Method | Confidence |
178
+ |------|--------|------------|
179
+ | `exact` | Normalized strings equal after suffix/punctuation strip | high |
180
+ | `fuzzy` | Sorted-token equality (word-bag match) | medium |
181
+ | `token_overlap` | ≥60% token overlap, ≥2 shared tokens, tokens ≥4 chars | low |
182
+
183
+ Output `cross_links.csv` columns: `match_type, confidence, left_name,
184
+ right_name, left_normalized, right_normalized, left_row, right_row`.
185
+
186
+ ### 4. Statistical timing correlation (optional)
187
+
188
+ Test whether two time series cluster suspiciously close together — e.g.
189
+ lobbying filings near contract awards — using a permutation test:
190
+
191
+ ```bash
192
+ python3 SKILL_DIR/scripts/timing_analysis.py \
193
+ --donations data/lobbying.csv --donation-date-col filing_date \
194
+ --donation-amount-col income --donation-donor-col client_name \
195
+ --donation-recipient-col registrant_name \
196
+ --contracts data/contracts.csv --contract-date-col award_date \
197
+ --contract-vendor-col recipient_name \
198
+ --cross-links data/cross_links.csv \
199
+ --permutations 1000 \
200
+ --out data/timing.json
201
+ ```
202
+
203
+ The script's column flags are intentionally generic — the original tool was
204
+ written for donations vs awards, but it works for any (event, payee) time
205
+ series joined through cross-links. Null hypothesis: event timing is
206
+ independent of award dates. One-tailed p-value = fraction of permutations
207
+ with mean nearest-award distance ≤ observed. Minimum 3 events per (payer,
208
+ vendor) pair to run the test.
209
+
210
+ ### 5. Build the findings JSON (evidence chain)
211
+
212
+ ```bash
213
+ python3 SKILL_DIR/scripts/build_findings.py \
214
+ --cross-links data/cross_links.csv \
215
+ --timing data/timing.json \
216
+ --out data/findings.json
217
+ ```
218
+
219
+ Every finding has `id, title, severity, confidence, summary, evidence[], sources[]`.
220
+ Each evidence item points back to a specific row in a source CSV. The user (or a
221
+ follow-up agent) can verify every claim against its source.
222
+
223
+ ## Confidence and evidence discipline
224
+
225
+ This is the load-bearing rule of the skill. Tell the user:
226
+
227
+ - Every claim must trace to a record. No naked assertions.
228
+ - Confidence tier travels with the claim. `match_type=fuzzy` is "probable",
229
+ not "confirmed."
230
+ - Entity resolution produces candidates, NOT conclusions. A `fuzzy` match
231
+ between "ACME LLC" and "Acme Holdings Group" is a lead, not a fact.
232
+ - Statistical significance ≠ wrongdoing. p < 0.05 means the timing pattern
233
+ is unlikely under the null. It does not establish corruption.
234
+ - All data sources here are public records. They may still contain
235
+ inaccuracies, stale info, or redactions (GDPR, sealed records).
236
+
237
+ ## Adding a new data source
238
+
239
+ Use the template:
240
+
241
+ ```bash
242
+ cp SKILL_DIR/templates/source-template.md \
243
+ SKILL_DIR/references/sources/<your-source>.md
244
+ ```
245
+
246
+ Fill in all 9 sections. Write a `fetch_<source>.py` script in `scripts/` that
247
+ uses stdlib only and writes a normalized CSV. Update the source list in the
248
+ "When to use" section above.
249
+
250
+ ## Tools and their limits
251
+
252
+ - `entity_resolution.py` does NOT use external fuzzy libraries (no rapidfuzz,
253
+ no jellyfish). Token-bag matching is the upper bound here. If you need
254
+ Levenshtein, transliteration, or phonetic matching, pip-install separately.
255
+ - `timing_analysis.py` uses Python's `random` for permutations. For
256
+ reproducibility, pass `--seed N`.
257
+ - `fetch_*.py` scripts use `urllib.request` and respect `Retry-After`. Heavy
258
+ bulk usage may still violate ToS — read each source's legal section first.
259
+
260
+ ## Legal note
261
+
262
+ All Phase-1 sources are public records. Bulk acquisition is permitted under
263
+ their respective access terms (FOIA, public records law, ICIJ explicit
264
+ publication, OFAC public data). However:
265
+
266
+ - Some sources rate-limit aggressively. Respect their headers.
267
+ - Some redact registrant info (GDPR on WHOIS, sealed filings).
268
+ - Cross-referencing public records to identify private individuals can have
269
+ ethical implications. The skill produces evidence chains, not accusations.
@@ -0,0 +1,59 @@
1
+ # <Source Name>
2
+
3
+ ## 1. Summary
4
+
5
+ What this data source is, who publishes it, why it matters for investigations.
6
+
7
+ ## 2. Access Methods
8
+
9
+ - API endpoint(s)
10
+ - Bulk download URLs
11
+ - Auth requirements (none / API key / OAuth)
12
+ - Rate limits
13
+
14
+ ## 3. Data Schema
15
+
16
+ Key fields, record types, table relationships. List the columns the fetch
17
+ script emits.
18
+
19
+ ## 4. Coverage
20
+
21
+ - Jurisdiction
22
+ - Time range
23
+ - Update frequency
24
+ - Data volume (rows / GB)
25
+
26
+ ## 5. Cross-Reference Potential
27
+
28
+ Which other sources can be joined and on what keys. Be explicit:
29
+
30
+ - `<source>` ↔ `<column>` (join key: <normalized entity name / EIN / CIK / etc.>)
31
+
32
+ ## 6. Data Quality
33
+
34
+ Known issues — formatting inconsistencies, missing fields, duplicates,
35
+ historical gaps, redaction.
36
+
37
+ ## 7. Acquisition Script
38
+
39
+ Path: `scripts/fetch_<source>.py`
40
+
41
+ Example:
42
+
43
+ ```bash
44
+ python3 SKILL_DIR/scripts/fetch_<source>.py --<filter> <value> --out data/<source>.csv
45
+ ```
46
+
47
+ Output CSV columns: `<col1>, <col2>, ...`
48
+
49
+ ## 8. Legal & Licensing
50
+
51
+ - Public records law / FOIA basis
52
+ - Terms of use / acceptable use
53
+ - Attribution requirements (if any)
54
+
55
+ ## 9. References
56
+
57
+ - Official docs: <url>
58
+ - Data dictionary: <url>
59
+ - Related coverage / journalism: <url>