ai-collab-open-system 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. package/.aict/START_HERE.md +127 -0
  2. package/.aict/WORKSPACE_MANIFEST.json +91 -0
  3. package/.aict/acceptance/EXAMPLE.synthetic.md +49 -0
  4. package/.aict/acceptance/FAILURE_MODES.md +40 -0
  5. package/.aict/acceptance/PROMPT.md +47 -0
  6. package/.aict/acceptance/README.md +44 -0
  7. package/.aict/acceptance/TEMPLATE.md +57 -0
  8. package/.aict/adapters/SHARED_CORE_CONTRACT.md +106 -0
  9. package/.aict/adapters/claude-code/ADAPTER.md +28 -0
  10. package/.aict/adapters/cline/ADAPTER.md +28 -0
  11. package/.aict/adapters/codex/ADAPTER.md +28 -0
  12. package/.aict/adapters/copilot/ADAPTER.md +28 -0
  13. package/.aict/adapters/cursor/ADAPTER.md +28 -0
  14. package/.aict/adapters/windsurf/ADAPTER.md +28 -0
  15. package/.aict/context/EXAMPLE.synthetic.md +53 -0
  16. package/.aict/context/FAILURE_MODES.md +40 -0
  17. package/.aict/context/PROMPT.md +47 -0
  18. package/.aict/context/README.md +44 -0
  19. package/.aict/context/TEMPLATE.md +63 -0
  20. package/.aict/cookbook/README.md +8 -0
  21. package/.aict/cookbook/bridge-to-a-second-family.md +103 -0
  22. package/.aict/cookbook/connect-a-tool.md +67 -0
  23. package/.aict/cookbook/review-a-half-product.md +79 -0
  24. package/.aict/cookbook/run-a-first-loop.md +81 -0
  25. package/.aict/examples/README.md +21 -0
  26. package/.aict/examples/ai-coding-long-task/CASE.md +161 -0
  27. package/.aict/examples/ai-coding-long-task/artifacts/acceptance-card.md +36 -0
  28. package/.aict/examples/ai-coding-long-task/artifacts/context-package.md +30 -0
  29. package/.aict/examples/ai-coding-long-task/artifacts/execution-prompt.md +30 -0
  30. package/.aict/examples/ai-coding-long-task/artifacts/first-ai-output.md +109 -0
  31. package/.aict/examples/ai-coding-long-task/artifacts/guard-review.md +40 -0
  32. package/.aict/examples/ai-coding-long-task/artifacts/handoff-note.md +28 -0
  33. package/.aict/examples/ai-coding-long-task/artifacts/harvest-seed.md +28 -0
  34. package/.aict/examples/ai-coding-long-task/artifacts/revised-output.md +62 -0
  35. package/.aict/examples/content-production-harvest/CASE.md +87 -0
  36. package/.aict/examples/content-production-harvest/artifacts/acceptance-card.md +28 -0
  37. package/.aict/examples/content-production-harvest/artifacts/context-package.md +28 -0
  38. package/.aict/examples/content-production-harvest/artifacts/execution-prompt.md +30 -0
  39. package/.aict/examples/content-production-harvest/artifacts/guard-review.md +28 -0
  40. package/.aict/examples/content-production-harvest/artifacts/handoff-note.md +28 -0
  41. package/.aict/examples/content-production-harvest/artifacts/harvest-seed.md +28 -0
  42. package/.aict/examples/multi-tool-collaboration/CASE.md +87 -0
  43. package/.aict/examples/multi-tool-collaboration/artifacts/acceptance-card.md +28 -0
  44. package/.aict/examples/multi-tool-collaboration/artifacts/context-package.md +28 -0
  45. package/.aict/examples/multi-tool-collaboration/artifacts/execution-prompt.md +30 -0
  46. package/.aict/examples/multi-tool-collaboration/artifacts/guard-review.md +28 -0
  47. package/.aict/examples/multi-tool-collaboration/artifacts/handoff-note.md +28 -0
  48. package/.aict/examples/multi-tool-collaboration/artifacts/harvest-seed.md +28 -0
  49. package/.aict/examples/personal-judgment-growth-assistant/CASE.md +87 -0
  50. package/.aict/examples/personal-judgment-growth-assistant/artifacts/acceptance-card.md +28 -0
  51. package/.aict/examples/personal-judgment-growth-assistant/artifacts/context-package.md +28 -0
  52. package/.aict/examples/personal-judgment-growth-assistant/artifacts/execution-prompt.md +30 -0
  53. package/.aict/examples/personal-judgment-growth-assistant/artifacts/guard-review.md +28 -0
  54. package/.aict/examples/personal-judgment-growth-assistant/artifacts/handoff-note.md +28 -0
  55. package/.aict/examples/personal-judgment-growth-assistant/artifacts/harvest-seed.md +28 -0
  56. package/.aict/examples/research-knowledge-synthesis/CASE.md +87 -0
  57. package/.aict/examples/research-knowledge-synthesis/artifacts/acceptance-card.md +28 -0
  58. package/.aict/examples/research-knowledge-synthesis/artifacts/context-package.md +28 -0
  59. package/.aict/examples/research-knowledge-synthesis/artifacts/execution-prompt.md +30 -0
  60. package/.aict/examples/research-knowledge-synthesis/artifacts/guard-review.md +28 -0
  61. package/.aict/examples/research-knowledge-synthesis/artifacts/handoff-note.md +28 -0
  62. package/.aict/examples/research-knowledge-synthesis/artifacts/harvest-seed.md +28 -0
  63. package/.aict/guard/EXAMPLE.synthetic.md +51 -0
  64. package/.aict/guard/FAILURE_MODES.md +40 -0
  65. package/.aict/guard/PROMPT.md +47 -0
  66. package/.aict/guard/README.md +44 -0
  67. package/.aict/guard/TEMPLATE.md +60 -0
  68. package/.aict/handoff/EXAMPLE.synthetic.md +51 -0
  69. package/.aict/handoff/FAILURE_MODES.md +40 -0
  70. package/.aict/handoff/PROMPT.md +47 -0
  71. package/.aict/handoff/README.md +44 -0
  72. package/.aict/handoff/TEMPLATE.md +60 -0
  73. package/.aict/harvest/EXAMPLE.synthetic.md +51 -0
  74. package/.aict/harvest/FAILURE_MODES.md +40 -0
  75. package/.aict/harvest/PROMPT.md +47 -0
  76. package/.aict/harvest/README.md +44 -0
  77. package/.aict/harvest/TEMPLATE.md +60 -0
  78. package/.aict/mechanisms/README.md +34 -0
  79. package/.aict/mechanisms/anti-drift-partner/EXAMPLE.synthetic.md +46 -0
  80. package/.aict/mechanisms/anti-drift-partner/FAILURE_MODES.md +25 -0
  81. package/.aict/mechanisms/anti-drift-partner/PROMPT.md +75 -0
  82. package/.aict/mechanisms/anti-drift-partner/README.md +82 -0
  83. package/.aict/mechanisms/anti-drift-partner/TEMPLATE.md +74 -0
  84. package/.aict/mechanisms/blind-spot-scan/EXAMPLE.synthetic.md +39 -0
  85. package/.aict/mechanisms/blind-spot-scan/FAILURE_MODES.md +25 -0
  86. package/.aict/mechanisms/blind-spot-scan/PROMPT.md +72 -0
  87. package/.aict/mechanisms/blind-spot-scan/README.md +79 -0
  88. package/.aict/mechanisms/blind-spot-scan/TEMPLATE.md +70 -0
  89. package/.aict/mechanisms/collaboration-coach/EXAMPLE.synthetic.md +40 -0
  90. package/.aict/mechanisms/collaboration-coach/FAILURE_MODES.md +25 -0
  91. package/.aict/mechanisms/collaboration-coach/PROMPT.md +72 -0
  92. package/.aict/mechanisms/collaboration-coach/README.md +79 -0
  93. package/.aict/mechanisms/collaboration-coach/TEMPLATE.md +61 -0
  94. package/.aict/mechanisms/do-not-handle-yet/EXAMPLE.synthetic.md +15 -0
  95. package/.aict/mechanisms/do-not-handle-yet/FAILURE_MODES.md +16 -0
  96. package/.aict/mechanisms/do-not-handle-yet/PROMPT.md +41 -0
  97. package/.aict/mechanisms/do-not-handle-yet/README.md +30 -0
  98. package/.aict/mechanisms/do-not-handle-yet/TEMPLATE.md +38 -0
  99. package/.aict/mechanisms/dual-guard/EXAMPLE.synthetic.md +54 -0
  100. package/.aict/mechanisms/dual-guard/FAILURE_MODES.md +25 -0
  101. package/.aict/mechanisms/dual-guard/PROMPT.md +76 -0
  102. package/.aict/mechanisms/dual-guard/README.md +81 -0
  103. package/.aict/mechanisms/dual-guard/TEMPLATE.md +73 -0
  104. package/.aict/mechanisms/feedback-absorption-ledger/EXAMPLE.synthetic.md +49 -0
  105. package/.aict/mechanisms/feedback-absorption-ledger/FAILURE_MODES.md +25 -0
  106. package/.aict/mechanisms/feedback-absorption-ledger/PROMPT.md +74 -0
  107. package/.aict/mechanisms/feedback-absorption-ledger/README.md +81 -0
  108. package/.aict/mechanisms/feedback-absorption-ledger/TEMPLATE.md +69 -0
  109. package/.aict/mechanisms/half-product-review/EXAMPLE.synthetic.md +15 -0
  110. package/.aict/mechanisms/half-product-review/FAILURE_MODES.md +16 -0
  111. package/.aict/mechanisms/half-product-review/PROMPT.md +41 -0
  112. package/.aict/mechanisms/half-product-review/README.md +30 -0
  113. package/.aict/mechanisms/half-product-review/TEMPLATE.md +38 -0
  114. package/.aict/mechanisms/handoff-abc/EXAMPLE.synthetic.md +47 -0
  115. package/.aict/mechanisms/handoff-abc/FAILURE_MODES.md +25 -0
  116. package/.aict/mechanisms/handoff-abc/PROMPT.md +75 -0
  117. package/.aict/mechanisms/handoff-abc/README.md +82 -0
  118. package/.aict/mechanisms/handoff-abc/TEMPLATE.md +60 -0
  119. package/.aict/mechanisms/harvest-and-erc/EXAMPLE.synthetic.md +43 -0
  120. package/.aict/mechanisms/harvest-and-erc/FAILURE_MODES.md +25 -0
  121. package/.aict/mechanisms/harvest-and-erc/PROMPT.md +74 -0
  122. package/.aict/mechanisms/harvest-and-erc/README.md +81 -0
  123. package/.aict/mechanisms/harvest-and-erc/TEMPLATE.md +60 -0
  124. package/.aict/mechanisms/honest-calibration/EXAMPLE.synthetic.md +43 -0
  125. package/.aict/mechanisms/honest-calibration/FAILURE_MODES.md +25 -0
  126. package/.aict/mechanisms/honest-calibration/PROMPT.md +74 -0
  127. package/.aict/mechanisms/honest-calibration/README.md +81 -0
  128. package/.aict/mechanisms/honest-calibration/TEMPLATE.md +66 -0
  129. package/.aict/mechanisms/one-click-dispatch/EXAMPLE.synthetic.md +15 -0
  130. package/.aict/mechanisms/one-click-dispatch/FAILURE_MODES.md +16 -0
  131. package/.aict/mechanisms/one-click-dispatch/PROMPT.md +41 -0
  132. package/.aict/mechanisms/one-click-dispatch/README.md +30 -0
  133. package/.aict/mechanisms/one-click-dispatch/TEMPLATE.md +38 -0
  134. package/.aict/mechanisms/plain-language-first-screen/EXAMPLE.synthetic.md +15 -0
  135. package/.aict/mechanisms/plain-language-first-screen/FAILURE_MODES.md +16 -0
  136. package/.aict/mechanisms/plain-language-first-screen/PROMPT.md +41 -0
  137. package/.aict/mechanisms/plain-language-first-screen/README.md +30 -0
  138. package/.aict/mechanisms/plain-language-first-screen/TEMPLATE.md +38 -0
  139. package/.aict/mechanisms/root-cause-brake/EXAMPLE.synthetic.md +55 -0
  140. package/.aict/mechanisms/root-cause-brake/FAILURE_MODES.md +25 -0
  141. package/.aict/mechanisms/root-cause-brake/PROMPT.md +73 -0
  142. package/.aict/mechanisms/root-cause-brake/README.md +79 -0
  143. package/.aict/mechanisms/root-cause-brake/TEMPLATE.md +74 -0
  144. package/.aict/mechanisms/scout-review-controller/EXAMPLE.synthetic.md +15 -0
  145. package/.aict/mechanisms/scout-review-controller/FAILURE_MODES.md +16 -0
  146. package/.aict/mechanisms/scout-review-controller/PROMPT.md +41 -0
  147. package/.aict/mechanisms/scout-review-controller/README.md +30 -0
  148. package/.aict/mechanisms/scout-review-controller/TEMPLATE.md +38 -0
  149. package/.aict/mechanisms/single-tool-guard/EXAMPLE.synthetic.md +54 -0
  150. package/.aict/mechanisms/single-tool-guard/FAILURE_MODES.md +25 -0
  151. package/.aict/mechanisms/single-tool-guard/PROMPT.md +76 -0
  152. package/.aict/mechanisms/single-tool-guard/README.md +83 -0
  153. package/.aict/mechanisms/single-tool-guard/TEMPLATE.md +75 -0
  154. package/.aict/mechanisms/task-splitting/EXAMPLE.synthetic.md +53 -0
  155. package/.aict/mechanisms/task-splitting/FAILURE_MODES.md +25 -0
  156. package/.aict/mechanisms/task-splitting/PROMPT.md +72 -0
  157. package/.aict/mechanisms/task-splitting/README.md +79 -0
  158. package/.aict/mechanisms/task-splitting/TEMPLATE.md +76 -0
  159. package/.aict/modes/README.md +11 -0
  160. package/.aict/modes/execute.md +31 -0
  161. package/.aict/modes/handoff.md +29 -0
  162. package/.aict/modes/harvest.md +30 -0
  163. package/.aict/modes/review.md +28 -0
  164. package/.aict/modes/shape.md +34 -0
  165. package/.aict/privacy/COMMERCIAL_BOUNDARY.md +34 -0
  166. package/.aict/privacy/PRIVACY.md +36 -0
  167. package/.aict/privacy/REDACTION_CHECKLIST.md +12 -0
  168. package/.aict/profile/CANDIDATES.md +44 -0
  169. package/.aict/profile/EXAMPLE.synthetic.md +49 -0
  170. package/.aict/profile/FAILURE_MODES.md +40 -0
  171. package/.aict/profile/PROMPT.md +47 -0
  172. package/.aict/profile/README.md +44 -0
  173. package/.aict/profile/TEMPLATE.md +57 -0
  174. package/.aict/prompts/acceptance-definition.md +109 -0
  175. package/.aict/prompts/guard-review.md +116 -0
  176. package/.aict/prompts/handoff-generation.md +110 -0
  177. package/.aict/prompts/harvest-extraction.md +110 -0
  178. package/.aict/prompts/mode-switching.md +66 -0
  179. package/.aict/prompts/profile-creation.md +66 -0
  180. package/.aict/prompts/profile-refinement.md +66 -0
  181. package/.aict/prompts/project-context-packaging.md +113 -0
  182. package/.aict/prompts/red-team-challenge.md +106 -0
  183. package/.aict/prompts/rule-update-proposal.md +114 -0
  184. package/.aict/prompts/workflow-reset.md +109 -0
  185. package/.aict/roles/README.md +18 -0
  186. package/.aict/roles/executor.md +34 -0
  187. package/.aict/roles/harvester.md +33 -0
  188. package/.aict/roles/owner-controller.md +38 -0
  189. package/.aict/roles/scout.md +33 -0
  190. package/.aict/roles/supervisor.md +34 -0
  191. package/.aict/roles/system-guardian.md +34 -0
  192. package/.aict/skills/acceptance/SKILL.md +43 -0
  193. package/.aict/skills/context/SKILL.md +44 -0
  194. package/.aict/skills/evidence-pack/SKILL.md +42 -0
  195. package/.aict/skills/guard/SKILL.md +46 -0
  196. package/.aict/skills/handoff/SKILL.md +44 -0
  197. package/.aict/skills/harvest/SKILL.md +44 -0
  198. package/.aict/skills/mode-switch/SKILL.md +42 -0
  199. package/.aict/skills/profile/SKILL.md +42 -0
  200. package/.aict/skills/red-team/SKILL.md +42 -0
  201. package/.aict/skills/single-tool-guard/SKILL.md +42 -0
  202. package/.aict/state/CURRENT_STATE.md +13 -0
  203. package/.aict/state/DECISIONS.md +7 -0
  204. package/.aict/state/TASK_LOG.md +7 -0
  205. package/.aict/state/evidence.jsonl +2 -0
  206. package/.aict/state/learning-ledger.jsonl +1 -0
  207. package/.aict/state/receipts.jsonl +1 -0
  208. package/.aict/state/runs.jsonl +1 -0
  209. package/.aict/state/tasks.jsonl +1 -0
  210. package/.aict/walkthroughs/10-minute-your-task.md +107 -0
  211. package/.aict/walkthroughs/10-minute.md +43 -0
  212. package/.aict/walkthroughs/30-minute.md +22 -0
  213. package/.aict/walkthroughs/60-minute.md +27 -0
  214. package/.aict/walkthroughs/synthetic-loop-transcript.md +43 -0
  215. package/CHANGELOG.md +23 -0
  216. package/CODE_OF_CONDUCT.md +20 -0
  217. package/CONTRIBUTING.md +30 -0
  218. package/KNOWN_LIMITATIONS.md +54 -0
  219. package/LICENSE +199 -0
  220. package/PRODUCT_CONTRACT.md +446 -0
  221. package/README.md +245 -0
  222. package/RELEASE_CHECKLIST.md +78 -0
  223. package/SECURITY.md +56 -0
  224. package/START_HERE.md +89 -0
  225. package/bin/ai-collab.js +2 -0
  226. package/docs/DOGFOOD.md +85 -0
  227. package/docs/FEEDBACK.md +61 -0
  228. package/docs/FIRST_EXPERIENCE_SPEC.md +32 -0
  229. package/docs/FREE_VS_PAID.md +53 -0
  230. package/docs/PUBLIC_BOUNDARY.md +36 -0
  231. package/docs/PUBLIC_MAPPING.md +178 -0
  232. package/docs/RELEASE_PRIORITY.md +23 -0
  233. package/docs/WHY_THIS_EXISTS.md +36 -0
  234. package/docs/open-system/00-start-here.md +60 -0
  235. package/docs/open-system/01-ai-collaboration-os.md +33 -0
  236. package/docs/open-system/02-six-layer-architecture.md +45 -0
  237. package/docs/open-system/03-role-system.md +33 -0
  238. package/docs/open-system/04-core-mechanisms.md +34 -0
  239. package/docs/open-system/05-failure-patterns.md +31 -0
  240. package/docs/open-system/06-how-to-adapt-to-your-workflow.md +31 -0
  241. package/package.json +69 -0
  242. package/privacy-manifest.json +78 -0
  243. package/privacy-scan.local.json.example +18 -0
  244. package/scripts/lib/forbidden-in-pack.js +55 -0
  245. package/scripts/pack-check.js +154 -0
  246. package/scripts/privacy-scan.js +487 -0
  247. package/scripts/validate-contract.js +160 -0
  248. package/src/adapters.js +590 -0
  249. package/src/bootstrap.js +1184 -0
  250. package/src/catalog.js +2723 -0
  251. package/src/cli.js +2899 -0
  252. package/src/dialogue.js +470 -0
  253. package/src/i18n.js +1034 -0
  254. package/src/ledger.js +2011 -0
  255. package/src/render.js +1381 -0
  256. package/src/sendmodel.js +452 -0
  257. package/src/validate.js +1307 -0
  258. package/src/workspace.js +1679 -0
  259. package/tests/contract.test.js +8514 -0
@@ -0,0 +1,470 @@
1
+ // === dialogue scan v1, LOCAL HALF (deterministic, zero-network, zero-cost) ===
2
+ //
3
+ // This module is the "local half" of semantic scanning. The PROBLEM it addresses:
4
+ // a user's real signals about an AI collaboration — "I said this was done", "I keep
5
+ // correcting the same thing" — live in their CHAT EXPORTS and SHELL LOGS, not in the
6
+ // .aict ledger. bootstrap (bootstrap.js) already turns the ledger + repo + git into
7
+ // five cards; this module lets it ALSO read a LOCAL export the user EXPLICITLY hands
8
+ // over (`--dialogue` / `--logs`) and extract DETERMINISTIC signals to enrich those
9
+ // cards — WITHOUT ever calling a model and WITHOUT sending anything anywhere.
10
+ //
11
+ // HONESTY IS THE WHOLE POINT (four red lines, enforced structurally here):
12
+ // 1. DETERMINISTIC ONLY. Every signal below is a word-table match + a normalized
13
+ // count + a set lookup against the ledger. There is NO model call, NO guess, NO
14
+ // ranking by "confidence we made up" — low-certainty signals are flagged
15
+ // `confidence: "low"` and that is the ceiling. The external-model pass is a
16
+ // LATER sub-batch (`--send-to-model`); it is deliberately NOT here.
17
+ // 2. A COMPLETION CLAIM IS NEVER "DONE". A "done"/"shipped"/"已完成" found in a chat
18
+ // is a CANDIDATE for the VERIFY card with the wording `claimed in dialogue ·
19
+ // not verified`. It is cross-referenced against the ledger; if the ledger has no
20
+ // accepted receipt AND no executed run that could back it, it stays a VERIFY
21
+ // finding. It is NEVER promoted to a task status and NEVER rendered as done.
22
+ // 3. CANDIDATES ARE PROPOSED. A repeated correction becomes a HARVEST *profile*
23
+ // candidate with `status: "proposed"`. This module returns plain data; it writes
24
+ // NOTHING to a profile, a ledger, or any long-term state. The caller (report-only
25
+ // bootstrap) writes nothing either.
26
+ // 4. OPT-IN, HIGH-PRIVACY-OFF-BY-DEFAULT. Nothing here runs unless the user names a
27
+ // file. A missing / unreadable file is SKIPPED with a note, never fatal. And a
28
+ // snippet is REDACTED (redactSnippet) before it is ever surfaced or recorded, so
29
+ // a secret/email/local-path pasted into a chat does not leak through the report.
30
+ //
31
+ // Pure + serializable: every export takes its inputs as arguments (the file READING
32
+ // is the one I/O boundary, parseDialogueExports, kept tiny and fail-soft) and returns
33
+ // plain objects, so the signal logic is trivially testable without a real chat export.
34
+
35
+ import { existsSync, readFileSync, statSync } from "node:fs";
36
+ import path from "node:path";
37
+
38
+ // --- A. Redaction (shared with the future external-model path) --------------
39
+ //
40
+ // `redactSnippet` masks the sensitive substrings that must NEVER be displayed or
41
+ // recorded out of a user's export. The pattern set MIRRORS the always-forbidden
42
+ // rules in scripts/privacy-scan.js (the release-safety scanner) + the spirit of
43
+ // scripts/lib/forbidden-in-pack.js: secret/key material, tokens, emails, phone
44
+ // numbers, and absolute local machine paths. Keeping the SHAPES aligned with the
45
+ // privacy scanner means "what the scanner would block from shipping" and "what the
46
+ // dialogue scan masks before showing" do not drift into two different ideas of
47
+ // "sensitive". This is exported so the LATER `--send-to-model` path reuses the exact
48
+ // same redaction before anything is sent — one definition, no second copy to rot.
49
+ //
50
+ // Each rule is [RegExp(global), placeholder]. Order matters: the most specific,
51
+ // highest-signal secrets run first so a key inside a longer string is masked as a
52
+ // key (not later half-caught by the generic path rule). All regexes are GLOBAL so
53
+ // every occurrence on a line is masked, not just the first.
54
+ const REDACTION_RULES = [
55
+ // Private key material (PEM blocks) — the single most dangerous leak. The FULL-BLOCK
56
+ // rule runs FIRST (while both BEGIN/END anchors are still intact) so the base64 BODY
57
+ // is masked too, not just the header; the header-only rule right after is the fallback
58
+ // for a lone BEGIN marker with no matching END in the snippet.
59
+ ...buildPemBodyRules(),
60
+ [/-----BEGIN (?:RSA |OPENSSH |EC |DSA )?PRIVATE KEY-----/g, "[redacted:private-key]"],
61
+ // Provider secret keys / tokens (shapes copied from privacy-scan's alwaysForbidden).
62
+ [/\bgithub_pat_[A-Za-z0-9_]{30,}/g, "[redacted:token]"],
63
+ [/\bgh[pousr]_[A-Za-z0-9_]{20,}/g, "[redacted:token]"],
64
+ [/\bxox[baprs]-[A-Za-z0-9-]{20,}/g, "[redacted:token]"],
65
+ [/\bBearer\s+[A-Za-z0-9._~+/=-]{24,}/gi, "[redacted:token]"],
66
+ [/sk-[A-Za-z0-9_-]{20,}/g, "[redacted:secret-key]"],
67
+ [/AKIA[0-9A-Z]{16}/g, "[redacted:aws-key]"],
68
+ [/AIza[0-9A-Za-z_-]{20,}/g, "[redacted:api-key]"],
69
+ // key/secret/token/password = "..." assignments (quoted value).
70
+ [/\b(?:api[_-]?key|token|secret|password)\s*[:=]\s*["'][^"']{8,}["']/gi, "[redacted:secret]"],
71
+ // password=/passwd=/pwd= followed by an UNQUOTED value (a clear-shape secret the quoted
72
+ // rule above misses). Only these three explicit key NAMES + a `:`/`=` + a run of
73
+ // non-space value chars — narrow enough not to catch prose. Quoted values are also
74
+ // covered (the value class allows quotes). The trailing value stops at whitespace so a
75
+ // sentence after the value is left intact.
76
+ [/\b(?:password|passwd|pwd)\s*[:=]\s*["']?[^\s"']{4,}["']?/gi, "[redacted:secret]"],
77
+ // Email addresses (any address — bootstrap has no notion of a "public contact").
78
+ [/\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g, "[redacted:email]"],
79
+ // Absolute local machine paths — see HOME_SEGMENT below. The home-dir segment name
80
+ // is built from a variable, not a literal, so this redaction module does not itself
81
+ // contain a "real-looking" local path (which the release privacy scan would flag).
82
+ ...buildLocalPathRules(),
83
+ // Phone numbers (US-style grouped + Chinese mobile), same shapes as privacy-scan.
84
+ [/(?<![A-Za-z0-9_])(?:\+?\d{1,3}[\s.-])?(?:\(?\d{3}\)?[\s.-])\d{3}[\s.-]\d{4}(?![A-Za-z0-9_])/g, "[redacted:phone]"],
85
+ [/(?<![\d.\-])1[3-9]\d{9}(?![\d.\-])/g, "[redacted:phone]"]
86
+ ];
87
+
88
+ // Build the absolute-local-path redaction rules. The POSIX home roots ("Users" on
89
+ // macOS, "home" on Linux) and the Windows "<Drive>:\Users\" prefix are assembled from
90
+ // these segment NAMES at runtime, so the literal path prefixes never appear verbatim
91
+ // in this source file — keeping the redaction module itself clean under the release
92
+ // privacy scan (which flags real-looking local paths). The compiled regexes match the
93
+ // exact same shapes as scripts/privacy-scan.js's path rules.
94
+ function buildLocalPathRules() {
95
+ const HOME_SEGMENTS = ["Users", "home"]; // the macOS and Linux home-root segments
96
+ // System roots a real machine path can also start under (not just the home dir): a
97
+ // deploy / service / config absolute path leaks just as much as a home one. Built from
98
+ // segment NAMES (not a literal "/etc/..." string) so this module stays clean under the
99
+ // release privacy scan. Conservative set: only well-known top-level system dirs, each
100
+ // requiring at least one path segment after it (so a bare "/var" word is not masked).
101
+ const SYSTEM_ROOTS = ["var", "etc", "opt", "srv", "usr"];
102
+ const tail = "(?:/[^\\s`'\")]+)*";
103
+ const winTail = "(?:\\\\[^\\s`'\")]+)*";
104
+ const rules = [];
105
+ for (const seg of HOME_SEGMENTS) {
106
+ rules.push([new RegExp(`/${seg}/[^/\\s]+${tail}`, "g"), "[redacted:path]"]);
107
+ }
108
+ for (const root of SYSTEM_ROOTS) {
109
+ // /<root>/<at least one segment> — requires a child so a lone "/etc" word is left be.
110
+ rules.push([new RegExp(`/${root}/[^/\\s]+${tail}`, "g"), "[redacted:path]"]);
111
+ }
112
+ // Windows: <Drive>:\<dir>\<more>... — ANY drive-letter path with at least one more
113
+ // segment (not just \Users\). Earlier this was Users-only; a Windows app/config path
114
+ // (C:\ProgramData\..., D:\service\...) is just as sensitive. Requires one backslashed
115
+ // child after the first dir so a bare "C:\" is not masked.
116
+ rules.push([new RegExp(`[A-Za-z]:\\\\[^\\\\\\s]+${winTail}`, "g"), "[redacted:path]"]);
117
+ return rules;
118
+ }
119
+
120
+ // Build the PEM private-key BODY redaction rule. A PEM block is
121
+ // `-----BEGIN ... PRIVATE KEY-----` then base64 lines then `-----END ... PRIVATE KEY-----`.
122
+ // The BEGIN/END markers are masked by the rule above; this masks the base64 BODY in
123
+ // between (the actual secret), so a multi-line key pasted into a chat is fully redacted,
124
+ // not just its header. Matches the whole block (DOTALL via [\s\S]) non-greedily. The
125
+ // literal marker text is assembled from a fragment so this module stays clean under the
126
+ // release privacy scan (which would otherwise flag a verbatim PEM marker in source).
127
+ function buildPemBodyRules() {
128
+ const KEY = "PRIVATE KEY";
129
+ const begin = `-----BEGIN (?:RSA |OPENSSH |EC |DSA )?${KEY}-----`;
130
+ const end = `-----END (?:RSA |OPENSSH |EC |DSA )?${KEY}-----`;
131
+ return [
132
+ [new RegExp(`${begin}[\\s\\S]*?${end}`, "g"), "[redacted:private-key]"]
133
+ ];
134
+ }
135
+
136
+ // Mask every sensitive substring in `text`, returning the redacted string. A
137
+ // non-string input returns "" (a snippet that cannot be read is shown as empty, never
138
+ // thrown). Idempotent in practice: re-running over an already-redacted string only
139
+ // re-matches the literal "[redacted:*]" placeholders against nothing, so it is safe to
140
+ // call more than once. This is the ONLY way a raw export fragment becomes displayable.
141
+ export function redactSnippet(text) {
142
+ if (typeof text !== "string") return "";
143
+ let out = text;
144
+ for (const [regex, placeholder] of REDACTION_RULES) {
145
+ out = out.replace(regex, placeholder);
146
+ }
147
+ return out;
148
+ }
149
+
150
+ // Trim a (already-redacted) snippet to a bounded length so a runaway log line cannot
151
+ // blow up the report. Whole-line by default; cut on a word boundary near the cap with
152
+ // an ellipsis. Pure string transform.
153
+ function clampSnippet(text, max = 160) {
154
+ const s = String(text).replace(/\s+/g, " ").trim();
155
+ if (s.length <= max) return s;
156
+ const cut = s.slice(0, max);
157
+ const lastSpace = cut.lastIndexOf(" ");
158
+ return `${(lastSpace > 40 ? cut.slice(0, lastSpace) : cut).trim()}…`;
159
+ }
160
+
161
+ // --- B. File reading (the one I/O boundary; fail-soft, opt-in) --------------
162
+ //
163
+ // Read the user-EXPLICITLY-named export files. `paths` is the parsed list from
164
+ // `--dialogue` / `--logs` (the CLI splits comma-separated values). Each path is read
165
+ // individually; a missing / unreadable / over-large / wrong-extension file is SKIPPED
166
+ // with a recorded reason (never throws, never aborts the report — red line #4). Only
167
+ // plain-text shapes (.txt/.json/.md/.log/.jsonl) are read; anything else is skipped so
168
+ // bootstrap never tries to parse a binary. Returns:
169
+ // { sources: [{ path, kind, bytes, lines, text }], skipped: [{ path, reason }] }
170
+ // where `text` is the RAW file content (redaction happens later, per-snippet, so the
171
+ // signal extractor can match on the original words but only redacted text is surfaced).
172
+ const ALLOWED_EXT = new Set([".txt", ".json", ".md", ".log", ".jsonl", ".csv", ".html"]);
173
+ const MAX_BYTES = 8 * 1024 * 1024; // 8 MB: a generous chat export, but bounded.
174
+
175
+ export function parseDialogueExports(paths, kind = "dialogue") {
176
+ const list = Array.isArray(paths) ? paths : [];
177
+ const sources = [];
178
+ const skipped = [];
179
+ for (const rawPath of list) {
180
+ const p = typeof rawPath === "string" ? rawPath.trim() : "";
181
+ if (p.length === 0) continue;
182
+ const abs = path.resolve(p);
183
+ if (!existsSync(abs)) {
184
+ skipped.push({ path: p, reason: "not_found" });
185
+ continue;
186
+ }
187
+ let st;
188
+ try {
189
+ st = statSync(abs);
190
+ } catch {
191
+ skipped.push({ path: p, reason: "unreadable" });
192
+ continue;
193
+ }
194
+ if (!st.isFile()) {
195
+ skipped.push({ path: p, reason: "not_a_file" });
196
+ continue;
197
+ }
198
+ if (st.size > MAX_BYTES) {
199
+ skipped.push({ path: p, reason: "too_large" });
200
+ continue;
201
+ }
202
+ const ext = path.extname(abs).toLowerCase();
203
+ if (!ALLOWED_EXT.has(ext)) {
204
+ skipped.push({ path: p, reason: "unsupported_type" });
205
+ continue;
206
+ }
207
+ let text;
208
+ try {
209
+ text = readFileSync(abs, "utf8");
210
+ } catch {
211
+ skipped.push({ path: p, reason: "unreadable" });
212
+ continue;
213
+ }
214
+ sources.push({
215
+ path: p,
216
+ kind,
217
+ bytes: st.size,
218
+ lines: text.length === 0 ? 0 : text.split("\n").length,
219
+ text
220
+ });
221
+ }
222
+ return { sources, skipped };
223
+ }
224
+
225
+ // --- C. Signal extraction (deterministic word-table + counting) -------------
226
+
227
+ // The completion-claim word table. A line containing one of these (as a whole word /
228
+ // phrase, case-insensitive) is a COMPLETION CLAIM — the thing red line #2 must never
229
+ // let through as "done". English + Chinese, since the export may be in either. These
230
+ // are LITERAL markers, not an LLM intent classifier: a deterministic, auditable list.
231
+ const COMPLETION_MARKERS = [
232
+ // English
233
+ "done", "complete", "completed", "shipped", "finished", "fixed", "resolved",
234
+ "all set", "good to go", "ready to merge", "merged", "deployed", "wrapped up",
235
+ // Chinese
236
+ "已完成", "搞定", "做完了", "做好了", "完成了", "弄好了", "已搞定", "已经好了",
237
+ "已修复", "修好了", "已部署", "上线了", "已上线", "齐活"
238
+ ];
239
+
240
+ // A correction marker table: a line where the user is CORRECTING the AI (telling it
241
+ // it did the wrong thing / to redo it). Repeated corrections of the SAME kind are the
242
+ // "you keep telling the AI the same thing" signal -> a HARVEST profile candidate.
243
+ const CORRECTION_MARKERS = [
244
+ // English
245
+ "no,", "not like that", "that's wrong", "thats wrong", "wrong again", "i said",
246
+ "stop doing", "don't", "do not", "redo", "again,", "actually,", "incorrect",
247
+ "you keep", "as i said", "like i said", "i told you", "revert", "undo that",
248
+ // Chinese
249
+ "不对", "不是这样", "错了", "又错了", "我说过", "别这样", "不要", "重做",
250
+ "再说一遍", "跟你说过", "我说的是", "撤销", "改回去", "不是让你", "说了多少遍"
251
+ ];
252
+
253
+ // Build a regex that matches ANY of a marker list as a case-insensitive substring,
254
+ // with word boundaries for the ASCII markers (so "done" does not fire inside
255
+ // "abandoned") and bare substring for CJK (no word boundary concept). Escapes each
256
+ // marker so a punctuation marker like "no," is matched literally.
257
+ function buildMarkerRegex(markers) {
258
+ const escape = (s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
259
+ const ascii = markers.filter((m) => /^[\x00-\x7F]+$/.test(m));
260
+ const cjk = markers.filter((m) => !/^[\x00-\x7F]+$/.test(m));
261
+ const parts = [];
262
+ for (const m of ascii) {
263
+ // \b only works at an ASCII alnum boundary; many markers end in punctuation, so
264
+ // only wrap a boundary where the edge char is a word char.
265
+ const left = /^\w/.test(m) ? "\\b" : "";
266
+ const right = /\w$/.test(m) ? "\\b" : "";
267
+ parts.push(`${left}${escape(m)}${right}`);
268
+ }
269
+ for (const m of cjk) parts.push(escape(m));
270
+ return new RegExp(parts.join("|"), "i");
271
+ }
272
+
273
+ const COMPLETION_RE = buildMarkerRegex(COMPLETION_MARKERS);
274
+ const CORRECTION_RE = buildMarkerRegex(CORRECTION_MARKERS);
275
+
276
+ // Normalize a line for DUP COUNTING of corrections: lowercase, strip a leading
277
+ // speaker label ("user:", "me >", "[2026-01-01] assistant:"), collapse whitespace,
278
+ // drop trailing punctuation. Two corrections that differ only in casing / timestamp /
279
+ // speaker prefix count as the SAME correction (red line: a normalized COUNT, not an
280
+ // LLM "these mean the same thing" judgment).
281
+ function normalizeForCount(line) {
282
+ let s = String(line).toLowerCase();
283
+ // strip a leading "[timestamp]" and/or "speaker:" / "speaker>" prefix
284
+ s = s.replace(/^\s*\[[^\]]*\]\s*/, "");
285
+ s = s.replace(/^\s*[a-z0-9_一-鿿 .#-]{1,24}\s*[:>]\s*/i, "");
286
+ s = s.replace(/\s+/g, " ").trim();
287
+ s = s.replace(/[.!?。!?,,;;]+$/g, "");
288
+ return s;
289
+ }
290
+
291
+ // Split a raw export into candidate "lines". For a .json/.jsonl chat export we still
292
+ // scan line-wise (a deterministic, format-agnostic pass): we do NOT assume a schema,
293
+ // so we read every line as text. This keeps the extractor robust to any tool's export
294
+ // shape without a brittle per-tool parser.
295
+ function toLines(text) {
296
+ return String(text).split(/\r?\n/);
297
+ }
298
+
299
+ // Pull the task-ish PHRASE out of a completion-claim line, used to cross-reference the
300
+ // ledger. Strips the speaker prefix + the completion marker word itself, leaving the
301
+ // subject ("the auth refactor"). Heuristic + deterministic; surfaced only as a
302
+ // redacted snippet, never trusted as structured data.
303
+ function claimSubject(line) {
304
+ let s = normalizeForCount(line);
305
+ // remove the matched completion marker so "auth refactor is done" -> "auth refactor is"
306
+ s = s.replace(COMPLETION_RE, " ");
307
+ s = s.replace(/\b(is|are|was|were|now|finally|already|的|了|是)\b/gi, " ");
308
+ return s.replace(/\s+/g, " ").trim();
309
+ }
310
+
311
+ // Tokenize a string into a set of lowercased word tokens (ASCII words + CJK runs),
312
+ // dropping very short / stop-ish tokens, for a deterministic overlap test between a
313
+ // dialogue claim subject and a ledger task title. No fuzzy/semantic match — pure set
314
+ // overlap on normalized tokens.
315
+ const STOP_TOKENS = new Set([
316
+ "the", "a", "an", "to", "of", "and", "or", "for", "in", "on", "it", "this", "that",
317
+ "task", "feature", "bug", "fix", "is", "was", "be", "my", "our", "all", "with"
318
+ ]);
319
+ function tokenSet(text) {
320
+ const tokens = String(text).toLowerCase().match(/[a-z0-9]+|[一-鿿]+/g) || [];
321
+ const out = new Set();
322
+ for (const tok of tokens) {
323
+ if (/^[a-z0-9]+$/.test(tok)) {
324
+ if (tok.length < 3 || STOP_TOKENS.has(tok)) continue;
325
+ out.add(tok);
326
+ } else {
327
+ // a CJK run: add the whole run AND each 2-gram, so "登录流程" overlaps "登录".
328
+ out.add(tok);
329
+ for (let i = 0; i + 2 <= tok.length; i++) out.add(tok.slice(i, i + 2));
330
+ }
331
+ }
332
+ return out;
333
+ }
334
+
335
+ // Does the ledger contain a task whose title OVERLAPS the claim subject AND that is
336
+ // actually BACKED (an accepted, done-eligible receipt OR an executed run)? This is the
337
+ // cross-reference of red line #2: a completion claim is only "trustworthy enough to NOT
338
+ // flag" when a backed ledger task plausibly corresponds to it. `perTask` is the output
339
+ // of summarizeTasks (it carries the RE-COMPUTED receipt + runCount + the honest
340
+ // authorMarkedDoneUnverified flag). We require BOTH token overlap AND backing, so:
341
+ // - claim with NO matching backed task -> flagged (claimed, not verified)
342
+ // - claim with a matching backed task -> not flagged (the ledger backs it)
343
+ // Token overlap uses a small intersection threshold; backing reuses the ledger's OWN
344
+ // honesty (a done-eligible accepted receipt, never a raw "status: done").
345
+ function claimIsBackedByLedger(subjectTokens, perTask) {
346
+ if (subjectTokens.size === 0) return false;
347
+ for (const t of perTask) {
348
+ if (t.isSeed) continue; // the shipped example never backs a user's claim
349
+ const titleTokens = tokenSet(t.title || "");
350
+ let overlap = 0;
351
+ for (const tok of subjectTokens) if (titleTokens.has(tok)) overlap += 1;
352
+ if (overlap === 0) continue;
353
+ // "Backed" = the ledger's own honest signals say this task is real work with a
354
+ // verifiable result: a non-author-marked (i.e. receipt-backed) result, OR a
355
+ // recorded executed run. authorMarkedDoneUnverified === false AND status done is
356
+ // the strongest; a runCount > 0 shows an actual execution happened.
357
+ const receiptBacked =
358
+ t.receipt &&
359
+ t.receipt.status === "accepted" &&
360
+ t.authorMarkedDoneUnverified !== true &&
361
+ t.receipt.familyUnverified !== true;
362
+ const runBacked = t.runCount > 0;
363
+ if (receiptBacked || runBacked) return true;
364
+ }
365
+ return false;
366
+ }
367
+
368
+ // Extract ALL deterministic dialogue signals from the parsed sources, cross-referenced
369
+ // against the ledger (perTask = summarizeTasks output). Returns:
370
+ // {
371
+ // used: boolean, // any source was actually read
372
+ // sources: [{ path, kind, bytes, lines }], // (no raw text — for the transparency line)
373
+ // skipped: [{ path, reason }],
374
+ // snippetCount: number, // total flagged snippets across all signals
375
+ // suspectedFalseCompletions: [ // -> VERIFY card candidates (red line #2)
376
+ // { source, line, snippet, subject, backed:false, confidence:"low" }
377
+ // ],
378
+ // repeatedCorrections: [ // -> HARVEST profile candidates (red line #3)
379
+ // { normalized, count, snippet, confidence:"low" }
380
+ // ]
381
+ // }
382
+ // Everything here is a word-table match + a normalized count + a ledger SET lookup.
383
+ // No model, no guess, no network. Snippets are REDACTED before they enter the result.
384
+ export function extractDialogueSignals({ parsed, perTask = [] }) {
385
+ const sources = Array.isArray(parsed?.sources) ? parsed.sources : [];
386
+ const skipped = Array.isArray(parsed?.skipped) ? parsed.skipped : [];
387
+
388
+ const suspectedFalseCompletions = [];
389
+ const correctionCounts = new Map(); // normalized -> { count, firstSnippet }
390
+
391
+ for (const src of sources) {
392
+ const lines = toLines(src.text);
393
+ lines.forEach((rawLine, idx) => {
394
+ const line = rawLine.trim();
395
+ if (line.length === 0) return;
396
+
397
+ // (1) Completion claims -> cross-reference the ledger -> VERIFY candidate.
398
+ if (COMPLETION_RE.test(line)) {
399
+ const subject = claimSubject(line);
400
+ const backed = claimIsBackedByLedger(tokenSet(subject), perTask);
401
+ if (!backed) {
402
+ suspectedFalseCompletions.push({
403
+ source: src.path,
404
+ line: idx + 1,
405
+ // The snippet is REDACTED + clamped before it is ever surfaced/recorded.
406
+ snippet: clampSnippet(redactSnippet(line)),
407
+ subject: clampSnippet(redactSnippet(subject), 80),
408
+ // The honesty contract, carried explicitly so a consumer/test can assert it:
409
+ // a dialogue completion is NEVER "done"; it is a claim, cross-referenced and
410
+ // found unbacked by the ledger.
411
+ displayedAsDone: false,
412
+ backed: false,
413
+ source_kind: "dialogue",
414
+ confidence: "low"
415
+ });
416
+ }
417
+ }
418
+
419
+ // (2) Corrections -> normalized dup count -> (>=2) HARVEST profile candidate.
420
+ if (CORRECTION_RE.test(line)) {
421
+ const norm = normalizeForCount(line);
422
+ if (norm.length >= 4) {
423
+ const prev = correctionCounts.get(norm);
424
+ if (prev) {
425
+ prev.count += 1;
426
+ } else {
427
+ correctionCounts.set(norm, { count: 1, snippet: clampSnippet(redactSnippet(line)) });
428
+ }
429
+ }
430
+ }
431
+ });
432
+ }
433
+
434
+ // A repeated correction = the SAME normalized line seen >= 2 times. Sorted by count.
435
+ const repeatedCorrections = [...correctionCounts.entries()]
436
+ .filter(([, v]) => v.count >= 2)
437
+ .sort((a, b) => b[1].count - a[1].count)
438
+ .map(([normalized, v]) => ({
439
+ normalized: clampSnippet(normalized, 80),
440
+ count: v.count,
441
+ snippet: v.snippet,
442
+ confidence: "low"
443
+ }));
444
+
445
+ const snippetCount = suspectedFalseCompletions.length + repeatedCorrections.length;
446
+
447
+ return {
448
+ used: sources.length > 0,
449
+ sources: sources.map((s) => ({ path: s.path, kind: s.kind, bytes: s.bytes, lines: s.lines })),
450
+ skipped,
451
+ snippetCount,
452
+ suspectedFalseCompletions,
453
+ repeatedCorrections
454
+ };
455
+ }
456
+
457
+ // Convenience: read + extract in one call (used by the CLI). `dialoguePaths` /
458
+ // `logPaths` are the parsed flag lists; `perTask` is summarizeTasks output. Reads
459
+ // dialogue and logs separately so each snippet records WHICH kind it came from, then
460
+ // merges into one signal object. Fail-soft throughout (a bad file is skipped, never
461
+ // fatal). Returns the same shape as extractDialogueSignals, plus the merged skip list.
462
+ export function scanDialogueAndLogs({ dialoguePaths = [], logPaths = [], perTask = [] }) {
463
+ const dlg = parseDialogueExports(dialoguePaths, "dialogue");
464
+ const log = parseDialogueExports(logPaths, "logs");
465
+ const parsed = {
466
+ sources: [...dlg.sources, ...log.sources],
467
+ skipped: [...dlg.skipped, ...log.skipped]
468
+ };
469
+ return extractDialogueSignals({ parsed, perTask });
470
+ }