@ijfw/memory-server 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/bin/ijfw +27 -0
  2. package/bin/ijfw-dashboard +180 -0
  3. package/bin/ijfw-dispatch-plan +41 -0
  4. package/bin/ijfw-memorize +273 -0
  5. package/bin/ijfw-memory +51 -0
  6. package/fixtures/demo-target.js +28 -0
  7. package/package.json +53 -0
  8. package/src/api-client.js +190 -0
  9. package/src/audit-roster.js +315 -0
  10. package/src/caps.js +37 -0
  11. package/src/cold-scan-runner.mjs +37 -0
  12. package/src/compute/edges.js +155 -0
  13. package/src/compute/extract.js +560 -0
  14. package/src/compute/fts5.js +420 -0
  15. package/src/compute/graph-auto-index.js +191 -0
  16. package/src/compute/graph-lock.js +114 -0
  17. package/src/compute/index.js +18 -0
  18. package/src/compute/migration-runner.js +116 -0
  19. package/src/compute/migrations/001-initial.js +23 -0
  20. package/src/compute/migrations/002-porter-stemming-source.js +139 -0
  21. package/src/compute/migrations/003-tier-semantic.js +69 -0
  22. package/src/compute/migrations/004-kg-tables.js +83 -0
  23. package/src/compute/migrations/005-stale-candidate.js +72 -0
  24. package/src/compute/python-resolver.js +106 -0
  25. package/src/compute/runner-vm.js +185 -0
  26. package/src/compute/runner.js +416 -0
  27. package/src/compute/sandbox-detect.js +122 -0
  28. package/src/compute/sandbox-linux.js +164 -0
  29. package/src/compute/sandbox-macos.js +167 -0
  30. package/src/compute/sandbox-windows.js +63 -0
  31. package/src/compute/schema.sql +118 -0
  32. package/src/compute/staleness.js +239 -0
  33. package/src/compute/synonyms.js +367 -0
  34. package/src/compute/traverse.js +180 -0
  35. package/src/cost/aggregator.js +229 -0
  36. package/src/cost/pricing.js +134 -0
  37. package/src/cost/readers/claude.js +179 -0
  38. package/src/cost/readers/codex.js +131 -0
  39. package/src/cost/readers/gemini.js +111 -0
  40. package/src/cost/savings.js +243 -0
  41. package/src/cross-dispatcher.js +437 -0
  42. package/src/cross-orchestrator-cli.js +1885 -0
  43. package/src/cross-orchestrator.js +598 -0
  44. package/src/cross-project-search.js +114 -0
  45. package/src/dashboard-client.html +1180 -0
  46. package/src/dashboard-server.js +895 -0
  47. package/src/design-companion.js +81 -0
  48. package/src/dispatch/colon-syntax.js +732 -0
  49. package/src/dispatch-planner.js +235 -0
  50. package/src/dream/cooldown.js +105 -0
  51. package/src/dream/runner.mjs +373 -0
  52. package/src/dream/staleness-wiring.js +195 -0
  53. package/src/feedback-detector.js +57 -0
  54. package/src/hero-line.js +115 -0
  55. package/src/importers/claude-mem.js +152 -0
  56. package/src/importers/cli.js +311 -0
  57. package/src/importers/common.js +84 -0
  58. package/src/importers/discover.js +235 -0
  59. package/src/importers/rtk.js +107 -0
  60. package/src/intent-router.js +221 -0
  61. package/src/lib/atomic-io.js +201 -0
  62. package/src/lib/cache.js +33 -0
  63. package/src/lib/npm-view.js +104 -0
  64. package/src/lib/status-card.js +95 -0
  65. package/src/lib/token.js +85 -0
  66. package/src/memory/fts5.js +349 -0
  67. package/src/memory/migration-runner.js +116 -0
  68. package/src/memory/migrations/001-fts5-init.js +26 -0
  69. package/src/memory/migrations/002-tier-semantic.js +60 -0
  70. package/src/memory/migrations/003-stale-candidate.js +60 -0
  71. package/src/memory/reader.js +300 -0
  72. package/src/memory/recall-counter.js +76 -0
  73. package/src/memory/schema.sql +79 -0
  74. package/src/memory/search.js +431 -0
  75. package/src/memory/staleness.js +237 -0
  76. package/src/memory/tier-promotion.js +377 -0
  77. package/src/memory/tokenize.js +63 -0
  78. package/src/project-type-detector.js +866 -0
  79. package/src/prompt-check.js +171 -0
  80. package/src/ralph-allowlist.js +88 -0
  81. package/src/receipts.js +129 -0
  82. package/src/redactor.js +107 -0
  83. package/src/sandbox.js +275 -0
  84. package/src/sanitizer.js +69 -0
  85. package/src/scan-resume.js +167 -0
  86. package/src/schema.js +82 -0
  87. package/src/search-bm25.js +108 -0
  88. package/src/server.js +1414 -0
  89. package/src/swarm-config.js +80 -0
  90. package/src/trident/dispatch.js +211 -0
  91. package/src/trident/lens-health.js +253 -0
  92. package/src/update-apply.js +79 -0
  93. package/src/update-check.js +136 -0
  94. package/src/vectors.js +178 -0
  95. package/templates/design/bento-grid.md +84 -0
  96. package/templates/design/brutalist-luxe.md +82 -0
  97. package/templates/design/cinematic-dark.md +82 -0
  98. package/templates/design/data-dense-dashboard.md +88 -0
  99. package/templates/design/editorial-warm.md +81 -0
  100. package/templates/design/glassmorphic.md +84 -0
  101. package/templates/design/magazine-editorial.md +84 -0
  102. package/templates/design/maximalist-vibrant.md +85 -0
  103. package/templates/design/neo-swiss-tech.md +85 -0
  104. package/templates/design/swiss-minimal.md +80 -0
  105. package/templates/design/terminal-native.md +83 -0
  106. package/templates/design/warm-organic.md +84 -0
@@ -0,0 +1,866 @@
1
+ // IJFW v1.3.0 Alpha -- A3 project-type detection (Phase 3).
2
+ //
3
+ // Goal: classify a project as software / book / content / business / design /
4
+ // mixed / unknown. Result lands in <project>/.ijfw/project.type so downstream
5
+ // surfaces (ijfw-team, ijfw-workflow think-phase, AGENTS.md frontmatter via
6
+ // the P2-B2 hoist) can read it without re-scanning.
7
+ //
8
+ // V3 invariants honoured here:
9
+ // - V3-F2 cross-session checkpoint+resume via scan-resume.js (24h staleness
10
+ // + 3-attempt cap)
11
+ // - V3-F3 cold-scan async -- detect() can run with options.bg = true and
12
+ // return immediately while a child completes the work (the colon-syntax
13
+ // dispatcher honours --bg by spawning detect() as a detached child)
14
+ // - V3-F4 multi-type result shape -- primary_type + secondary_types[] from
15
+ // day one so Pillar B blackboard consumers don't have to re-shape
16
+ // - V3 dependency-flip fix -- when C9 / FTS5 is unavailable A3 must NOT
17
+ // halt. Falls back to the file-extension scan only, confidence capped at
18
+ // 0.7, fallback_reason: 'c9_unavailable'
19
+ // - File-tree hash + branch hash drive cache invalidation, NOT root mtime
20
+ // (which is unreliable as a stale-classification signal per V3 fix)
21
+ //
22
+ // Public surface:
23
+ // detect(projectRoot, options) -- main entry; returns full result
24
+ // loadProjectType(projectRoot) -- read cached .ijfw/project.type
25
+ // writeProjectType(projectRoot, result) -- atomic write to project.type
26
+ //
27
+ // Discipline:
28
+ // - ESM only.
29
+ // - ASCII only in strings (no smart quotes, no emojis).
30
+ // - Positive framing in any user-visible text -- this module emits machine
31
+ // JSON, not user copy, so the rule is "no negative-framed reasons" rather
32
+ // than "no errors at all".
33
+
34
+ import {
35
+ readFileSync,
36
+ writeFileSync,
37
+ existsSync,
38
+ readdirSync,
39
+ statSync,
40
+ renameSync,
41
+ mkdirSync,
42
+ unlinkSync,
43
+ realpathSync,
44
+ copyFileSync,
45
+ } from 'fs';
46
+ import { join, extname, isAbsolute, resolve as pathResolve, dirname } from 'path';
47
+ import { fileURLToPath } from 'url';
48
+ import { createHash } from 'crypto';
49
+ import {
50
+ loadScanState,
51
+ writeScanState,
52
+ shouldResume,
53
+ clearScanState,
54
+ acquireScanLock,
55
+ } from './scan-resume.js';
56
+
57
+ // --- Tunables --------------------------------------------------------------
58
+
59
+ const DOMAINS = ['software', 'book', 'content', 'business', 'design', 'mixed', 'unknown'];
60
+
61
+ // Hard guardrails. A 100k-file repo is the design target; we cap the walk
62
+ // well above that, and yield checkpoint state every CHECKPOINT_EVERY files
63
+ // so a crash never loses more than that slice of progress.
64
+ const MAX_FILES = 200000;
65
+ const MAX_DEPTH = 12;
66
+ const CHECKPOINT_EVERY = 500;
67
+ // P3-M3: time-budget guardrail. The walker checks Date.now() every
68
+ // TIME_BUDGET_CHECK_EVERY entries; if elapsed > timeBudgetMs the walk
69
+ // halts with scan_incomplete=true and persists state so the next session
70
+ // resumes. Default 5000ms; overridable via IJFW_DETECT_TIME_BUDGET_MS.
71
+ const DEFAULT_TIME_BUDGET_MS = 5000;
72
+ const TIME_BUDGET_CHECK_EVERY = 1000;
73
+
74
+ // Directories we never walk into. Cuts node_modules / venv noise without
75
+ // changing the signal balance for any of the 7 domains.
76
+ const SKIP_DIRS = new Set([
77
+ '.git', '.hg', '.svn', '.ijfw', '.planning', '.cache',
78
+ 'node_modules', 'dist', 'build', 'out', 'target', '.next',
79
+ '__pycache__', '.venv', 'venv', 'env',
80
+ '.pytest_cache', '.mypy_cache', '.tox',
81
+ '.gradle', '.idea', '.vscode',
82
+ 'vendor', 'bower_components',
83
+ ]);
84
+
85
+ // File-extension classifier. Each match contributes weight to the matching
86
+ // domain bucket; ratios then drive the per-domain confidence score. "code"
87
+ // is split across software (heavy) and content/business (light) because a
88
+ // content site can carry a build script without becoming "software".
89
+ const EXT_DOMAIN = {
90
+ // software (heavy)
91
+ '.js': 'software', '.jsx': 'software', '.ts': 'software', '.tsx': 'software',
92
+ '.mjs': 'software', '.cjs': 'software',
93
+ '.py': 'software', '.rs': 'software', '.go': 'software',
94
+ '.java': 'software', '.kt': 'software', '.scala': 'software',
95
+ '.rb': 'software', '.php': 'software',
96
+ '.c': 'software', '.cc': 'software', '.cpp': 'software', '.h': 'software',
97
+ '.hpp': 'software', '.hh': 'software',
98
+ '.swift': 'software', '.m': 'software', '.mm': 'software',
99
+ '.cs': 'software', '.fs': 'software',
100
+ '.lua': 'software', '.dart': 'software', '.zig': 'software',
101
+ // book / long-form prose
102
+ '.tex': 'book', '.bib': 'book', '.latex': 'book',
103
+ '.epub': 'book', '.mobi': 'book',
104
+ // content / blog / docs / marketing
105
+ '.mdx': 'content', '.markdown': 'content', '.rst': 'content',
106
+ // design / assets
107
+ '.fig': 'design', '.sketch': 'design', '.xd': 'design',
108
+ '.ai': 'design', '.psd': 'design', '.indd': 'design',
109
+ '.svg': 'design', '.afdesign': 'design', '.afphoto': 'design',
110
+ // business / ops
111
+ '.xlsx': 'business', '.xls': 'business', '.csv': 'business',
112
+ '.numbers': 'business', '.ods': 'business',
113
+ '.pptx': 'business', '.ppt': 'business', '.key': 'business',
114
+ '.docx': 'business', '.doc': 'business',
115
+ };
116
+
117
+ // Manifest signals. Presence at any depth <= 2 is a strong vote for software.
118
+ const SOFTWARE_MANIFESTS = [
119
+ 'package.json', 'Cargo.toml', 'pyproject.toml', 'setup.py', 'Gemfile',
120
+ 'go.mod', 'pom.xml', 'build.gradle', 'build.gradle.kts',
121
+ 'composer.json', 'Package.swift', 'mix.exs', 'rebar.config',
122
+ 'pubspec.yaml', 'CMakeLists.txt', 'Makefile',
123
+ ];
124
+
125
+ // Directory-name signals. These count when they exist near the root.
126
+ const BOOK_DIRS = ['manuscripts', 'manuscript', 'drafts', 'draft', 'chapters', 'book'];
127
+ const CONTENT_DIRS = ['content', 'posts', 'articles', 'blog', 'newsletter', 'social'];
128
+ const BUSINESS_DIRS = ['strategy', 'financials', 'finance', 'ops', 'runbooks', 'sop', 'sops', 'ops-runbooks'];
129
+ const DESIGN_DIRS = ['designs', 'design', 'assets', 'mockups', 'wireframes', 'figma'];
130
+
131
+ // Filename patterns that boost a domain (regex tested against basename).
132
+ const FILENAME_PATTERNS = [
133
+ { re: /^chapter[-_]?\d+/i, domain: 'book', weight: 0.4 },
134
+ { re: /^ch\d+/i, domain: 'book', weight: 0.3 },
135
+ { re: /^brand[-_]voice/i, domain: 'content', weight: 0.4 },
136
+ { re: /^seo[-_]/i, domain: 'content', weight: 0.2 },
137
+ { re: /^post[-_]/i, domain: 'content', weight: 0.2 },
138
+ { re: /^figma[-_]export/i, domain: 'design', weight: 0.4 },
139
+ { re: /^wireframe/i, domain: 'design', weight: 0.3 },
140
+ ];
141
+
142
+ // --- Public API ------------------------------------------------------------
143
+
144
+ /**
145
+ * detect(projectRoot, options) -> result
146
+ *
147
+ * Walks projectRoot honouring SKIP_DIRS, MAX_DEPTH, MAX_FILES. Reads existing
148
+ * AGENTS.md frontmatter + .ijfw/memory/brief.md frontmatter for higher-trust
149
+ * signals. Returns a multi-type result shape (V3-F4):
150
+ *
151
+ * {
152
+ * primary_type, secondary_types: [], confidence,
153
+ * scan_incomplete, detected_at, signals: [...],
154
+ * fallback_reason: null | 'c9_unavailable',
155
+ * type, // legacy "single label" alias = primary_type
156
+ * file_tree_hash, // for cache invalidation
157
+ * branch_hash // for cache invalidation
158
+ * }
159
+ *
160
+ * options:
161
+ * - explicitType string signal #1 (1.0 confidence)
162
+ * - c9Available bool when false, file-tree confidence caps at 0.7
163
+ * - maxFiles number override walk cap (tests use small caps)
164
+ * - sessionId string threaded into scan-state for forensics
165
+ * - resume bool honour scan-resume; default true
166
+ */
167
+ export function detect(projectRoot, options = {}) {
168
+ const root = String(projectRoot || process.cwd());
169
+ // P3-M2: when the caller doesn't pass c9Available explicitly, run a
170
+ // sync availability probe (existsSync of compute/fts5.js) so the
171
+ // confidence cap auto-engages on installs that ship without the C9
172
+ // backend. Cached for the session lifetime.
173
+ const c9Available = options.c9Available === false
174
+ ? false
175
+ : (options.c9Available === true ? true : isC9AvailableSync());
176
+ const maxFiles = Number.isFinite(options.maxFiles) && options.maxFiles > 0
177
+ ? options.maxFiles
178
+ : MAX_FILES;
179
+
180
+ const signals = [];
181
+ const fallbackReason = c9Available ? null : 'c9_unavailable';
182
+
183
+ // --- Signal #1: explicit user declaration (1.0) -------------------------
184
+ if (options.explicitType && DOMAINS.includes(String(options.explicitType))) {
185
+ signals.push({ kind: 'user_declaration', weight: 1.0, value: options.explicitType });
186
+ return finalize({
187
+ primary: options.explicitType,
188
+ secondary: [],
189
+ score: 1.0,
190
+ signals,
191
+ scanIncomplete: false,
192
+ fallbackReason,
193
+ treeHash: '',
194
+ branchHash: branchHash(root),
195
+ });
196
+ }
197
+
198
+ // --- Signal #2: AGENTS.md frontmatter (0.9) -----------------------------
199
+ const fmAgents = readFrontmatterType(join(root, 'AGENTS.md'));
200
+ if (fmAgents && DOMAINS.includes(fmAgents)) {
201
+ signals.push({ kind: 'agents_md_frontmatter', weight: 0.9, value: fmAgents });
202
+ }
203
+
204
+ // --- Signal #3: brief.md frontmatter (0.8) ------------------------------
205
+ const fmBrief = readFrontmatterType(join(root, '.ijfw', 'memory', 'brief.md'));
206
+ if (fmBrief && DOMAINS.includes(fmBrief)) {
207
+ signals.push({ kind: 'brief_md_frontmatter', weight: 0.8, value: fmBrief });
208
+ }
209
+
210
+ // --- Signal #4: file-tree walk (0.6 - 0.75 raw; capped 0.7 in fallback) -
211
+ const timeBudgetMs = resolveTimeBudgetMs(options);
212
+ const walk = walkProject(root, { maxFiles, maxDepth: MAX_DEPTH, options, timeBudgetMs });
213
+ const treeHash = fileTreeHash(walk.fingerprint);
214
+
215
+ // Manifest votes -- presence is a strong software signal.
216
+ if (walk.manifestsFound.length > 0) {
217
+ signals.push({
218
+ kind: 'manifest',
219
+ weight: 0.9,
220
+ manifests: walk.manifestsFound.slice(0, 6),
221
+ });
222
+ }
223
+
224
+ // Directory-name votes.
225
+ for (const d of walk.dirHits.book) signals.push({ kind: 'dir_book', weight: 0.4, name: d });
226
+ for (const d of walk.dirHits.content) signals.push({ kind: 'dir_content', weight: 0.4, name: d });
227
+ for (const d of walk.dirHits.business) signals.push({ kind: 'dir_business', weight: 0.4, name: d });
228
+ for (const d of walk.dirHits.design) signals.push({ kind: 'dir_design', weight: 0.4, name: d });
229
+
230
+ // File-extension ratio -- the workhorse fallback signal.
231
+ const totals = walk.extTotals;
232
+ const totalClassified = Object.values(totals).reduce((a, b) => a + b, 0);
233
+ if (totalClassified > 0) {
234
+ for (const [domain, count] of Object.entries(totals)) {
235
+ const ratio = count / totalClassified;
236
+ if (ratio >= 0.05) {
237
+ signals.push({
238
+ kind: 'file_extension_ratio',
239
+ weight: 0.7,
240
+ domain,
241
+ ratio: Number(ratio.toFixed(3)),
242
+ count,
243
+ });
244
+ }
245
+ }
246
+ }
247
+
248
+ // Filename pattern boosts.
249
+ for (const hit of walk.patternHits) {
250
+ signals.push({ kind: 'filename_pattern', weight: hit.weight, domain: hit.domain, name: hit.name });
251
+ }
252
+
253
+ // --- Score reconciliation ----------------------------------------------
254
+ const scoreboard = scoreSignals(signals);
255
+ const ranked = rankDomains(scoreboard);
256
+
257
+ let primary;
258
+ let secondary = [];
259
+ let confidence;
260
+
261
+ if (ranked.length === 0) {
262
+ primary = 'unknown';
263
+ confidence = 0;
264
+ } else {
265
+ primary = ranked[0].domain;
266
+ confidence = ranked[0].score;
267
+ secondary = ranked
268
+ .slice(1)
269
+ .filter((r) => r.score >= 0.4 && r.domain !== primary)
270
+ .map((r) => r.domain);
271
+ // Two distinct strong domains -> "mixed" surfaces as primary, with the
272
+ // two top contributors as secondary so consumers retain the detail.
273
+ // Threshold: top score >= 0.55, second score >= 0.5, AND second is
274
+ // within 25% of top. The third clause keeps a clearly-dominant primary
275
+ // out of mixed (e.g. a software repo with light docs stays "software").
276
+ if (
277
+ ranked.length >= 2 &&
278
+ ranked[0].score >= 0.55 &&
279
+ ranked[1].score >= 0.5 &&
280
+ ranked[1].score / ranked[0].score >= 0.75
281
+ ) {
282
+ const topTwo = [ranked[0].domain, ranked[1].domain];
283
+ secondary = topTwo;
284
+ primary = 'mixed';
285
+ confidence = Math.min(0.85, (ranked[0].score + ranked[1].score) / 2);
286
+ }
287
+ }
288
+
289
+ // V3 fallback cap: when C9 is unavailable, file-tree-only confidence caps
290
+ // at 0.7 so downstream consumers see "ask the user" territory. High-trust
291
+ // signals (user declaration, frontmatter) bypass the cap.
292
+ const highTrust = signals.some(
293
+ (s) =>
294
+ s.kind === 'user_declaration' ||
295
+ s.kind === 'agents_md_frontmatter' ||
296
+ s.kind === 'brief_md_frontmatter',
297
+ );
298
+ if (!c9Available && !highTrust && confidence > 0.7) confidence = 0.7;
299
+
300
+ // scan_incomplete flag -- if the walker tripped a guardrail, downstream
301
+ // surfaces should prompt the user rather than silently trust the result.
302
+ const scanIncomplete = walk.incomplete;
303
+
304
+ // Persist scan state on incomplete walks so the next session can resume.
305
+ // P3-H3: persist accumulated partial state so resume continues to add to
306
+ // counters/lists rather than restarting at zero. P3-M6: acquire the
307
+ // scan-state lock so two concurrent detect() calls never RMW the
308
+ // attempts counter unsafely. If the lock is held by another live
309
+ // writer, skip the persist -- their write covers the same forward
310
+ // progress just as accurately.
311
+ if (scanIncomplete) {
312
+ const lock = acquireScanLock(root);
313
+ if (lock) {
314
+ try {
315
+ const prior = loadScanState(root) || {};
316
+ writeScanState(root, {
317
+ scan_id: prior.scan_id || newScanId(),
318
+ started_at: prior.started_at || new Date().toISOString(),
319
+ last_path_walked: walk.lastPathWalked,
320
+ files_scanned: walk.filesScanned,
321
+ total_estimate: walk.totalEstimate,
322
+ attempts: (prior.attempts || 0) + 1,
323
+ incomplete: true,
324
+ session_id: options.sessionId || null,
325
+ partial: snapshotPartial(walk),
326
+ });
327
+ } catch { /* best-effort; never throw from detect() */ }
328
+ finally { lock.released(); }
329
+ }
330
+ } else {
331
+ try { clearScanState(root); } catch { /* best-effort */ }
332
+ }
333
+
334
+ return finalize({
335
+ primary,
336
+ secondary,
337
+ score: confidence,
338
+ signals,
339
+ scanIncomplete,
340
+ fallbackReason,
341
+ treeHash,
342
+ branchHash: branchHash(root),
343
+ });
344
+ }
345
+
346
+ /**
347
+ * loadProjectType(projectRoot) -> object | null
348
+ *
349
+ * Reads <project>/.ijfw/project.type if present and parseable. P3-H2:
350
+ * recomputes the cheap-tier hashes (top-level path sample + branch hash)
351
+ * and compares to the cached values; mismatch returns null so the caller
352
+ * forces a re-detect. P3-M1: a cached scan_incomplete=true result is
353
+ * surfaced for debugging via the file but loadProjectType returns null
354
+ * so consumers don't silently trust a partial walk.
355
+ */
356
+ export function loadProjectType(projectRoot) {
357
+ const root = String(projectRoot);
358
+ const path = join(root, '.ijfw', 'project.type');
359
+ if (!existsSync(path)) return null;
360
+ let parsed = null;
361
+ try {
362
+ const raw = readFileSync(path, 'utf8');
363
+ parsed = JSON.parse(raw);
364
+ } catch { return null; }
365
+ if (!parsed || typeof parsed !== 'object') return null;
366
+
367
+ // P3-M1: incomplete walks are kept on disk for forensic inspection but
368
+ // never returned to consumers as a fresh result.
369
+ if (parsed.scan_incomplete === true) return null;
370
+
371
+ // P3-H2: cheap-tier cache invalidation. File-tree fingerprint is the
372
+ // first 4096 entries of a sorted relative-path walk -- bounded, fast,
373
+ // exactly the input fileTreeHash() consumed when the cache was written.
374
+ // Branch hash is the .git/HEAD content (or worktree pointer per P3-M5).
375
+ try {
376
+ if (typeof parsed.file_tree_hash === 'string' && parsed.file_tree_hash.length > 0) {
377
+ const liveTree = cheapTreeHash(root);
378
+ if (liveTree && liveTree !== parsed.file_tree_hash) return null;
379
+ }
380
+ if (typeof parsed.branch_hash === 'string' && parsed.branch_hash.length > 0) {
381
+ const liveBranch = branchHash(root);
382
+ if (liveBranch && liveBranch !== parsed.branch_hash) return null;
383
+ }
384
+ } catch { /* invalidation is best-effort; on error trust the cache */ }
385
+
386
+ return parsed;
387
+ }
388
+
389
+ /**
390
+ * writeProjectType(projectRoot, result) -> string (path)
391
+ *
392
+ * Atomic tmp + rename. Creates .ijfw/ if missing. POSIX rename(2) is atomic
393
+ * on the same filesystem, so a kill mid-write leaves the prior file intact.
394
+ * P3-H5: cross-mount symlink layouts (.ijfw/ pointing to a different fs)
395
+ * raise EXDEV from rename; we fall back to copyFile + unlink so dotfile
396
+ * setups still get a durable write.
397
+ */
398
+ export function writeProjectType(projectRoot, result) {
399
+ const root = String(projectRoot);
400
+ const dir = join(root, '.ijfw');
401
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
402
+ const finalPath = join(dir, 'project.type');
403
+ const tmpPath = `${finalPath}.tmp.${process.pid}.${Date.now()}`;
404
+ const json = JSON.stringify(result, null, 2) + '\n';
405
+ writeFileSync(tmpPath, json, 'utf8');
406
+ atomicRename(tmpPath, finalPath);
407
+ return finalPath;
408
+ }
409
+
410
+ // P3-H5: shared rename-with-EXDEV-fallback helper. Same behaviour as
411
+ // renameSync on a same-fs target, falls back to copy+unlink on EXDEV.
412
+ function atomicRename(tmpPath, finalPath) {
413
+ try {
414
+ renameSync(tmpPath, finalPath);
415
+ return;
416
+ } catch (err) {
417
+ if (!err || err.code !== 'EXDEV') throw err;
418
+ }
419
+ try {
420
+ copyFileSync(tmpPath, finalPath);
421
+ } finally {
422
+ try { unlinkSync(tmpPath); } catch { /* best-effort */ }
423
+ }
424
+ }
425
+
426
+ // --- Internals -------------------------------------------------------------
427
+
428
+ function finalize({ primary, secondary, score, signals, scanIncomplete, fallbackReason, treeHash, branchHash: bh }) {
429
+ const confidence = Number(Math.max(0, Math.min(1, score)).toFixed(3));
430
+ const out = {
431
+ type: primary, // single-label alias for hoist
432
+ primary_type: primary,
433
+ secondary_types: Array.isArray(secondary) ? secondary : [],
434
+ confidence,
435
+ scan_incomplete: !!scanIncomplete,
436
+ detected_at: new Date().toISOString(),
437
+ signals,
438
+ fallback_reason: fallbackReason,
439
+ file_tree_hash: treeHash || '',
440
+ branch_hash: bh || '',
441
+ };
442
+ return out;
443
+ }
444
+
445
+ function readFrontmatterType(path) {
446
+ if (!existsSync(path)) return null;
447
+ let src;
448
+ try { src = readFileSync(path, 'utf8'); } catch { return null; }
449
+ if (!src.startsWith('---\n')) return null;
450
+ const after = src.slice(4);
451
+ const closeIdx = after.search(/\n---\s*(?:\r?\n|$)/);
452
+ if (closeIdx < 0) return null;
453
+ const fm = after.slice(0, closeIdx);
454
+ for (const ln of fm.split(/\r?\n/)) {
455
+ const m = ln.match(/^type\s*:\s*(\S+)\s*$/);
456
+ if (m) {
457
+ const v = m[1].replace(/^["']|["']$/g, '');
458
+ return v;
459
+ }
460
+ }
461
+ return null;
462
+ }
463
+
464
+ function walkProject(root, { maxFiles, maxDepth, options, timeBudgetMs }) {
465
+ const out = {
466
+ filesScanned: 0,
467
+ totalEstimate: 0,
468
+ incomplete: false,
469
+ lastPathWalked: '',
470
+ fingerprint: [],
471
+ manifestsFound: [],
472
+ dirHits: { book: [], content: [], business: [], design: [] },
473
+ extTotals: {},
474
+ patternHits: [],
475
+ };
476
+
477
+ // P3-H3: resume merges accumulated state. shouldResume() gates on
478
+ // incomplete + young + under attempt cap; if the prior state has a
479
+ // sentinel that is no longer reachable, the walk simply never sees it
480
+ // and produces a fresh full pass (the "restart from scratch" branch).
481
+ let resumeFrom = null;
482
+ let priorState = null;
483
+ if (options.resume !== false) {
484
+ const state = loadScanState(root);
485
+ if (state && shouldResume(state)) {
486
+ resumeFrom = state.last_path_walked || null;
487
+ priorState = state;
488
+ }
489
+ }
490
+ // Hydrate accumulated counters/lists from the prior partial scan so the
491
+ // resumed walk continues to add to them rather than starting at zero.
492
+ if (priorState && priorState.partial && typeof priorState.partial === 'object') {
493
+ const p = priorState.partial;
494
+ out.filesScanned = Number.isFinite(p.files_scanned) ? p.files_scanned : 0;
495
+ out.totalEstimate = Number.isFinite(p.total_estimate) ? p.total_estimate : out.filesScanned;
496
+ if (Array.isArray(p.fingerprint)) out.fingerprint = p.fingerprint.slice(0, 4096);
497
+ if (Array.isArray(p.manifestsFound)) out.manifestsFound = p.manifestsFound.slice();
498
+ if (p.dirHits && typeof p.dirHits === 'object') {
499
+ for (const k of ['book', 'content', 'business', 'design']) {
500
+ if (Array.isArray(p.dirHits[k])) out.dirHits[k] = p.dirHits[k].slice();
501
+ }
502
+ }
503
+ if (p.extTotals && typeof p.extTotals === 'object') out.extTotals = { ...p.extTotals };
504
+ if (Array.isArray(p.patternHits)) out.patternHits = p.patternHits.slice();
505
+ }
506
+ let resumed = !resumeFrom;
507
+
508
+ // P3-M4: track visited real-paths so circular symlinks never loop.
509
+ const visitedDirs = new Set();
510
+ try {
511
+ visitedDirs.add(realpathSync.native(root));
512
+ } catch { /* root may not resolve in odd test setups; tolerate */ }
513
+
514
+ // P3-M3: time-budget guardrail.
515
+ const startedAt = Date.now();
516
+ const budget = Number.isFinite(timeBudgetMs) && timeBudgetMs > 0 ? timeBudgetMs : DEFAULT_TIME_BUDGET_MS;
517
+ let entriesSinceTimeCheck = 0;
518
+
519
+ // Iterative DFS so we don't blow the stack on deep trees.
520
+ const stack = [{ path: root, depth: 0 }];
521
+ while (stack.length > 0) {
522
+ const { path, depth } = stack.pop();
523
+ if (depth > maxDepth) continue;
524
+
525
+ let entries;
526
+ try {
527
+ entries = readdirSync(path, { withFileTypes: true });
528
+ } catch { continue; }
529
+
530
+ // Sort for deterministic walks -- same inputs produce identical
531
+ // file_tree_hash + same lastPathWalked checkpoint sequence.
532
+ entries.sort((a, b) => (a.name < b.name ? -1 : a.name > b.name ? 1 : 0));
533
+
534
+ for (const entry of entries) {
535
+ const childPath = join(path, entry.name);
536
+
537
+ if (!resumed) {
538
+ if (childPath === resumeFrom) resumed = true;
539
+ // Skip ahead until we reach the resume sentinel; record the entry so
540
+ // we keep producing identical fingerprints across resumes.
541
+ continue;
542
+ }
543
+
544
+ out.lastPathWalked = childPath;
545
+
546
+ // P3-M3: poll the wall clock periodically. Every TIME_BUDGET_CHECK_EVERY
547
+ // entries we compare elapsed to budget; on overrun we mark incomplete
548
+ // and return. Persisting state happens up in detect().
549
+ entriesSinceTimeCheck += 1;
550
+ if (entriesSinceTimeCheck >= TIME_BUDGET_CHECK_EVERY) {
551
+ entriesSinceTimeCheck = 0;
552
+ if (Date.now() - startedAt > budget) {
553
+ out.incomplete = true;
554
+ return out;
555
+ }
556
+ }
557
+
558
+ if (entry.isDirectory()) {
559
+ if (SKIP_DIRS.has(entry.name)) continue;
560
+ // P3-M4: skip directories whose real-path we've already visited.
561
+ try {
562
+ const real = realpathSync.native(childPath);
563
+ if (visitedDirs.has(real)) continue;
564
+ visitedDirs.add(real);
565
+ } catch { /* unreadable; let the readdir error path handle it */ }
566
+ recordDirHit(out, entry.name, depth);
567
+ stack.push({ path: childPath, depth: depth + 1 });
568
+ continue;
569
+ }
570
+
571
+ if (!entry.isFile()) continue;
572
+
573
+ out.filesScanned += 1;
574
+ out.totalEstimate = Math.max(out.totalEstimate, out.filesScanned);
575
+
576
+ // Fingerprint: cheap sample to keep the hash stable but cap memory.
577
+ // We keep the relative path only -- mtime is intentionally excluded.
578
+ if (out.fingerprint.length < 4096) {
579
+ out.fingerprint.push(childPath.slice(root.length + 1));
580
+ }
581
+
582
+ // Manifest detection (depth <= 2).
583
+ if (depth <= 2 && SOFTWARE_MANIFESTS.includes(entry.name)) {
584
+ out.manifestsFound.push(entry.name);
585
+ }
586
+
587
+ // Filename pattern.
588
+ for (const p of FILENAME_PATTERNS) {
589
+ if (p.re.test(entry.name)) {
590
+ out.patternHits.push({ name: entry.name, domain: p.domain, weight: p.weight });
591
+ break;
592
+ }
593
+ }
594
+
595
+ // Extension classifier.
596
+ const ext = extname(entry.name).toLowerCase();
597
+ const dom = EXT_DOMAIN[ext];
598
+ if (dom) {
599
+ out.extTotals[dom] = (out.extTotals[dom] || 0) + 1;
600
+ }
601
+
602
+ // Periodic checkpoint write so a crash never loses more than
603
+ // CHECKPOINT_EVERY files of progress. P3-H3: persist accumulated
604
+ // partial state so the resumed walk can keep adding to it.
605
+ if (out.filesScanned % CHECKPOINT_EVERY === 0) {
606
+ try {
607
+ writeScanState(root, {
608
+ scan_id: newScanId(),
609
+ started_at: new Date().toISOString(),
610
+ last_path_walked: childPath,
611
+ files_scanned: out.filesScanned,
612
+ total_estimate: out.totalEstimate,
613
+ attempts: 1,
614
+ incomplete: true,
615
+ partial: snapshotPartial(out),
616
+ });
617
+ } catch { /* best-effort */ }
618
+ }
619
+
620
+ if (out.filesScanned >= maxFiles) {
621
+ out.incomplete = true;
622
+ return out;
623
+ }
624
+ }
625
+ }
626
+ return out;
627
+ }
628
+
629
+ function snapshotPartial(out) {
630
+ // Compact serialisable view of the accumulated walk state. Keeps the
631
+ // checkpoint readable without bloating scan-state.json.
632
+ return {
633
+ files_scanned: out.filesScanned,
634
+ total_estimate: out.totalEstimate,
635
+ fingerprint: out.fingerprint.slice(0, 4096),
636
+ manifestsFound: out.manifestsFound.slice(0, 32),
637
+ dirHits: {
638
+ book: out.dirHits.book.slice(0, 32),
639
+ content: out.dirHits.content.slice(0, 32),
640
+ business: out.dirHits.business.slice(0, 32),
641
+ design: out.dirHits.design.slice(0, 32),
642
+ },
643
+ extTotals: { ...out.extTotals },
644
+ patternHits: out.patternHits.slice(0, 64),
645
+ };
646
+ }
647
+
648
+ function resolveTimeBudgetMs(options) {
649
+ if (Number.isFinite(options.timeBudgetMs) && options.timeBudgetMs > 0) {
650
+ return options.timeBudgetMs;
651
+ }
652
+ const env = process.env.IJFW_DETECT_TIME_BUDGET_MS;
653
+ if (env) {
654
+ const n = Number(env);
655
+ if (Number.isFinite(n) && n > 0) return n;
656
+ }
657
+ return DEFAULT_TIME_BUDGET_MS;
658
+ }
659
+
660
+ function recordDirHit(out, name, depth) {
661
+ // Dir hits only count near the root; deeper hits add noise without
662
+ // shifting domain confidence reliably.
663
+ if (depth > 2) return;
664
+ const lower = name.toLowerCase();
665
+ if (BOOK_DIRS.includes(lower)) out.dirHits.book.push(lower);
666
+ if (CONTENT_DIRS.includes(lower)) out.dirHits.content.push(lower);
667
+ if (BUSINESS_DIRS.includes(lower)) out.dirHits.business.push(lower);
668
+ if (DESIGN_DIRS.includes(lower)) out.dirHits.design.push(lower);
669
+ }
670
+
671
+ function scoreSignals(signals) {
672
+ const board = {
673
+ software: 0, book: 0, content: 0, business: 0, design: 0, mixed: 0, unknown: 0,
674
+ };
675
+ // Per-domain cap on cumulative filename_pattern contribution. Without
676
+ // this, a book with 8 "chapter-NN" files dominates over a same-sized
677
+ // companion code tree even when the user clearly authored both. Cap at
678
+ // 0.8 -- one or two pattern hits is the signal; more is repetition.
679
+ const patternBudget = { software: 0.8, book: 0.8, content: 0.8, business: 0.8, design: 0.8, mixed: 0.8, unknown: 0.8 };
680
+ // Cap on cumulative dir-name contribution per domain. A repo with both
681
+ // 'book' and 'manuscripts' shouldn't double-count; cap mirrors a single
682
+ // hit.
683
+ const dirBudget = { software: 0.6, book: 0.6, content: 0.6, business: 0.6, design: 0.6, mixed: 0.6, unknown: 0.6 };
684
+ for (const s of signals) {
685
+ if (s.kind === 'user_declaration' && s.value) board[s.value] += 1.0;
686
+ else if (s.kind === 'agents_md_frontmatter' && s.value) board[s.value] += 0.9;
687
+ else if (s.kind === 'brief_md_frontmatter' && s.value) board[s.value] += 0.8;
688
+ else if (s.kind === 'manifest') board.software += 0.9;
689
+ else if (s.kind === 'dir_book') {
690
+ const add = Math.min(0.4, dirBudget.book);
691
+ board.book += add; dirBudget.book -= add;
692
+ } else if (s.kind === 'dir_content') {
693
+ const add = Math.min(0.4, dirBudget.content);
694
+ board.content += add; dirBudget.content -= add;
695
+ } else if (s.kind === 'dir_business') {
696
+ const add = Math.min(0.4, dirBudget.business);
697
+ board.business += add; dirBudget.business -= add;
698
+ } else if (s.kind === 'dir_design') {
699
+ const add = Math.min(0.4, dirBudget.design);
700
+ board.design += add; dirBudget.design -= add;
701
+ } else if (s.kind === 'file_extension_ratio') {
702
+ // Ratio acts as a multiplier so a 0.78 software-extension share lands
703
+ // near the manifest weight without dwarfing it.
704
+ const m = s.ratio || 0;
705
+ board[s.domain] = (board[s.domain] || 0) + 0.7 * m;
706
+ } else if (s.kind === 'filename_pattern') {
707
+ const add = Math.min(s.weight, patternBudget[s.domain] || 0);
708
+ if (add > 0) {
709
+ board[s.domain] = (board[s.domain] || 0) + add;
710
+ patternBudget[s.domain] -= add;
711
+ }
712
+ }
713
+ }
714
+ return board;
715
+ }
716
+
717
+ function rankDomains(board) {
718
+ const arr = Object.entries(board)
719
+ .filter(([d]) => d !== 'mixed' && d !== 'unknown')
720
+ .map(([domain, raw]) => ({ domain, raw }));
721
+ if (arr.length === 0) return [];
722
+
723
+ // Normalize: divide each raw by the max so the top score lands at 1.0
724
+ // and others scale relative to it. Confidence is then re-anchored against
725
+ // a logistic-ish curve so a single weak signal doesn't accidentally read
726
+ // as 1.0 confidence.
727
+ const maxRaw = arr.reduce((m, e) => Math.max(m, e.raw), 0);
728
+ if (maxRaw <= 0) return [];
729
+ for (const e of arr) {
730
+ e.score = anchor(e.raw, maxRaw);
731
+ }
732
+ arr.sort((a, b) => b.score - a.score);
733
+ return arr;
734
+ }
735
+
736
+ function anchor(raw, maxRaw) {
737
+ // Top score reflects raw magnitude (capped at 0.95); others scale linearly.
738
+ // A raw of 1.0+ from manifest or strong frontmatter lands near 0.9+. A
739
+ // raw of 0.4 lands around 0.55.
740
+ if (raw <= 0) return 0;
741
+ const top = Math.min(0.95, 0.4 + 0.55 * Math.tanh(raw));
742
+ return Number((top * (raw / maxRaw)).toFixed(3));
743
+ }
744
+
745
+ function fileTreeHash(paths) {
746
+ if (!paths || paths.length === 0) return '';
747
+ const h = createHash('sha256');
748
+ for (const p of paths) h.update(p + '\n');
749
+ return h.digest('hex').slice(0, 16);
750
+ }
751
+
752
+ function branchHash(root) {
753
+ // Best-effort git branch read. Falls back to the empty string when git is
754
+ // unavailable or the project is not a git repo -- consumers treat empty
755
+ // as "no branch signal", not an error. P3-M5: worktrees use a `.git`
756
+ // file that contains a `gitdir: <path>` pointer to the actual git dir;
757
+ // resolve through it so HEAD reads succeed in worktrees.
758
+ try {
759
+ const dotGit = join(root, '.git');
760
+ if (!existsSync(dotGit)) return '';
761
+
762
+ let headPath = null;
763
+ let st;
764
+ try { st = statSync(dotGit); } catch { return ''; }
765
+
766
+ if (st.isDirectory()) {
767
+ headPath = join(dotGit, 'HEAD');
768
+ } else if (st.isFile()) {
769
+ const ptr = readFileSync(dotGit, 'utf8');
770
+ const m = ptr.match(/^gitdir:\s*(.+?)\s*$/m);
771
+ if (!m) return '';
772
+ const target = m[1];
773
+ const gitDir = isAbsolute(target) ? target : pathResolve(root, target);
774
+ headPath = join(gitDir, 'HEAD');
775
+ } else {
776
+ return '';
777
+ }
778
+
779
+ if (!headPath || !existsSync(headPath)) return '';
780
+ const head = readFileSync(headPath, 'utf8').trim();
781
+ const m = head.match(/^ref:\s*(.+)$/);
782
+ const branch = m ? m[1] : head;
783
+ return createHash('sha256').update(branch).digest('hex').slice(0, 16);
784
+ } catch { /* best-effort */ }
785
+ return '';
786
+ }
787
+
788
+ // Cheap-tier file-tree hash for cache invalidation (P3-H2). Mirrors the
789
+ // input that fileTreeHash() consumed when the result was first written:
790
+ // up to 4096 sorted relative paths from a breadth-bounded walk.
791
+ function cheapTreeHash(root) {
792
+ const collected = [];
793
+ const limit = 4096;
794
+ // Iterative DFS bounded by the same MAX_DEPTH + SKIP_DIRS as the full
795
+ // walker so results are byte-identical to the original write.
796
+ const stack = [{ path: root, depth: 0 }];
797
+ while (stack.length > 0 && collected.length < limit) {
798
+ const { path, depth } = stack.pop();
799
+ if (depth > MAX_DEPTH) continue;
800
+ let entries;
801
+ try { entries = readdirSync(path, { withFileTypes: true }); } catch { continue; }
802
+ entries.sort((a, b) => (a.name < b.name ? -1 : a.name > b.name ? 1 : 0));
803
+ for (const entry of entries) {
804
+ const childPath = join(path, entry.name);
805
+ if (entry.isDirectory()) {
806
+ if (SKIP_DIRS.has(entry.name)) continue;
807
+ stack.push({ path: childPath, depth: depth + 1 });
808
+ continue;
809
+ }
810
+ if (!entry.isFile()) continue;
811
+ collected.push(childPath.slice(root.length + 1));
812
+ if (collected.length >= limit) break;
813
+ }
814
+ }
815
+ return fileTreeHash(collected);
816
+ }
817
+
818
+ // P3-M2: lightweight C9 availability probe. The detector defaults to
819
+ // c9Available=true, but a missing FTS5 backend should auto-flip the flag
820
+ // so the caller doesn't silently get an unfiltered confidence score.
821
+ // Cached for the session's lifetime -- the probe is best-effort and the
822
+ // answer doesn't change once a process is running.
823
+ let _c9AvailableCache = null;
824
+ export async function isC9Available() {
825
+ if (_c9AvailableCache !== null) return _c9AvailableCache;
826
+ try {
827
+ await import('./compute/fts5.js');
828
+ _c9AvailableCache = true;
829
+ } catch {
830
+ _c9AvailableCache = false;
831
+ }
832
+ return _c9AvailableCache;
833
+ }
834
+
835
+ // Sync sibling for the synchronous detect() entrypoint -- existsSync of
836
+ // the compute/fts5.js module is a cheap proxy that matches the dynamic
837
+ // import outcome on every install layout we ship.
838
+ function isC9AvailableSync() {
839
+ if (_c9AvailableCache !== null) return _c9AvailableCache;
840
+ try {
841
+ const here = fileURLToPath(import.meta.url);
842
+ const fts5Path = join(dirname(here), 'compute', 'fts5.js');
843
+ _c9AvailableCache = existsSync(fts5Path);
844
+ } catch {
845
+ _c9AvailableCache = false;
846
+ }
847
+ return _c9AvailableCache;
848
+ }
849
+
850
+ function newScanId() {
851
+ return createHash('sha256')
852
+ .update(String(process.pid) + ':' + String(Date.now()) + ':' + Math.random())
853
+ .digest('hex')
854
+ .slice(0, 12);
855
+ }
856
+
857
+ // Test-only export surface.
858
+ export const __test = {
859
+ scoreSignals,
860
+ rankDomains,
861
+ readFrontmatterType,
862
+ walkProject,
863
+ fileTreeHash,
864
+ EXT_DOMAIN,
865
+ SKIP_DIRS,
866
+ };