@ijfw/memory-server 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw +27 -0
- package/bin/ijfw-dashboard +180 -0
- package/bin/ijfw-dispatch-plan +41 -0
- package/bin/ijfw-memorize +273 -0
- package/bin/ijfw-memory +51 -0
- package/fixtures/demo-target.js +28 -0
- package/package.json +53 -0
- package/src/api-client.js +190 -0
- package/src/audit-roster.js +315 -0
- package/src/caps.js +37 -0
- package/src/cold-scan-runner.mjs +37 -0
- package/src/compute/edges.js +155 -0
- package/src/compute/extract.js +560 -0
- package/src/compute/fts5.js +420 -0
- package/src/compute/graph-auto-index.js +191 -0
- package/src/compute/graph-lock.js +114 -0
- package/src/compute/index.js +18 -0
- package/src/compute/migration-runner.js +116 -0
- package/src/compute/migrations/001-initial.js +23 -0
- package/src/compute/migrations/002-porter-stemming-source.js +139 -0
- package/src/compute/migrations/003-tier-semantic.js +69 -0
- package/src/compute/migrations/004-kg-tables.js +83 -0
- package/src/compute/migrations/005-stale-candidate.js +72 -0
- package/src/compute/python-resolver.js +106 -0
- package/src/compute/runner-vm.js +185 -0
- package/src/compute/runner.js +416 -0
- package/src/compute/sandbox-detect.js +122 -0
- package/src/compute/sandbox-linux.js +164 -0
- package/src/compute/sandbox-macos.js +167 -0
- package/src/compute/sandbox-windows.js +63 -0
- package/src/compute/schema.sql +118 -0
- package/src/compute/staleness.js +239 -0
- package/src/compute/synonyms.js +367 -0
- package/src/compute/traverse.js +180 -0
- package/src/cost/aggregator.js +229 -0
- package/src/cost/pricing.js +134 -0
- package/src/cost/readers/claude.js +179 -0
- package/src/cost/readers/codex.js +131 -0
- package/src/cost/readers/gemini.js +111 -0
- package/src/cost/savings.js +243 -0
- package/src/cross-dispatcher.js +437 -0
- package/src/cross-orchestrator-cli.js +1885 -0
- package/src/cross-orchestrator.js +598 -0
- package/src/cross-project-search.js +114 -0
- package/src/dashboard-client.html +1180 -0
- package/src/dashboard-server.js +895 -0
- package/src/design-companion.js +81 -0
- package/src/dispatch/colon-syntax.js +732 -0
- package/src/dispatch-planner.js +235 -0
- package/src/dream/cooldown.js +105 -0
- package/src/dream/runner.mjs +373 -0
- package/src/dream/staleness-wiring.js +195 -0
- package/src/feedback-detector.js +57 -0
- package/src/hero-line.js +115 -0
- package/src/importers/claude-mem.js +152 -0
- package/src/importers/cli.js +311 -0
- package/src/importers/common.js +84 -0
- package/src/importers/discover.js +235 -0
- package/src/importers/rtk.js +107 -0
- package/src/intent-router.js +221 -0
- package/src/lib/atomic-io.js +201 -0
- package/src/lib/cache.js +33 -0
- package/src/lib/npm-view.js +104 -0
- package/src/lib/status-card.js +95 -0
- package/src/lib/token.js +85 -0
- package/src/memory/fts5.js +349 -0
- package/src/memory/migration-runner.js +116 -0
- package/src/memory/migrations/001-fts5-init.js +26 -0
- package/src/memory/migrations/002-tier-semantic.js +60 -0
- package/src/memory/migrations/003-stale-candidate.js +60 -0
- package/src/memory/reader.js +300 -0
- package/src/memory/recall-counter.js +76 -0
- package/src/memory/schema.sql +79 -0
- package/src/memory/search.js +431 -0
- package/src/memory/staleness.js +237 -0
- package/src/memory/tier-promotion.js +377 -0
- package/src/memory/tokenize.js +63 -0
- package/src/project-type-detector.js +866 -0
- package/src/prompt-check.js +171 -0
- package/src/ralph-allowlist.js +88 -0
- package/src/receipts.js +129 -0
- package/src/redactor.js +107 -0
- package/src/sandbox.js +275 -0
- package/src/sanitizer.js +69 -0
- package/src/scan-resume.js +167 -0
- package/src/schema.js +82 -0
- package/src/search-bm25.js +108 -0
- package/src/server.js +1414 -0
- package/src/swarm-config.js +80 -0
- package/src/trident/dispatch.js +211 -0
- package/src/trident/lens-health.js +253 -0
- package/src/update-apply.js +79 -0
- package/src/update-check.js +136 -0
- package/src/vectors.js +178 -0
- package/templates/design/bento-grid.md +84 -0
- package/templates/design/brutalist-luxe.md +82 -0
- package/templates/design/cinematic-dark.md +82 -0
- package/templates/design/data-dense-dashboard.md +88 -0
- package/templates/design/editorial-warm.md +81 -0
- package/templates/design/glassmorphic.md +84 -0
- package/templates/design/magazine-editorial.md +84 -0
- package/templates/design/maximalist-vibrant.md +85 -0
- package/templates/design/neo-swiss-tech.md +85 -0
- package/templates/design/swiss-minimal.md +80 -0
- package/templates/design/terminal-native.md +83 -0
- package/templates/design/warm-organic.md +84 -0
|
@@ -0,0 +1,866 @@
|
|
|
1
|
+
// IJFW v1.3.0 Alpha -- A3 project-type detection (Phase 3).
|
|
2
|
+
//
|
|
3
|
+
// Goal: classify a project as software / book / content / business / design /
|
|
4
|
+
// mixed / unknown. Result lands in <project>/.ijfw/project.type so downstream
|
|
5
|
+
// surfaces (ijfw-team, ijfw-workflow think-phase, AGENTS.md frontmatter via
|
|
6
|
+
// the P2-B2 hoist) can read it without re-scanning.
|
|
7
|
+
//
|
|
8
|
+
// V3 invariants honoured here:
|
|
9
|
+
// - V3-F2 cross-session checkpoint+resume via scan-resume.js (24h staleness
|
|
10
|
+
// + 3-attempt cap)
|
|
11
|
+
// - V3-F3 cold-scan async -- detect() can run with options.bg = true and
|
|
12
|
+
// return immediately while a child completes the work (the colon-syntax
|
|
13
|
+
// dispatcher honours --bg by spawning detect() as a detached child)
|
|
14
|
+
// - V3-F4 multi-type result shape -- primary_type + secondary_types[] from
|
|
15
|
+
// day one so Pillar B blackboard consumers don't have to re-shape
|
|
16
|
+
// - V3 dependency-flip fix -- when C9 / FTS5 is unavailable A3 must NOT
|
|
17
|
+
// halt. Falls back to the file-extension scan only, confidence capped at
|
|
18
|
+
// 0.7, fallback_reason: 'c9_unavailable'
|
|
19
|
+
// - File-tree hash + branch hash drive cache invalidation, NOT root mtime
|
|
20
|
+
// (which is unreliable as a stale-classification signal per V3 fix)
|
|
21
|
+
//
|
|
22
|
+
// Public surface:
|
|
23
|
+
// detect(projectRoot, options) -- main entry; returns full result
|
|
24
|
+
// loadProjectType(projectRoot) -- read cached .ijfw/project.type
|
|
25
|
+
// writeProjectType(projectRoot, result) -- atomic write to project.type
|
|
26
|
+
//
|
|
27
|
+
// Discipline:
|
|
28
|
+
// - ESM only.
|
|
29
|
+
// - ASCII only in strings (no smart quotes, no emojis).
|
|
30
|
+
// - Positive framing in any user-visible text -- this module emits machine
|
|
31
|
+
// JSON, not user copy, so the rule is "no negative-framed reasons" rather
|
|
32
|
+
// than "no errors at all".
|
|
33
|
+
|
|
34
|
+
import {
|
|
35
|
+
readFileSync,
|
|
36
|
+
writeFileSync,
|
|
37
|
+
existsSync,
|
|
38
|
+
readdirSync,
|
|
39
|
+
statSync,
|
|
40
|
+
renameSync,
|
|
41
|
+
mkdirSync,
|
|
42
|
+
unlinkSync,
|
|
43
|
+
realpathSync,
|
|
44
|
+
copyFileSync,
|
|
45
|
+
} from 'fs';
|
|
46
|
+
import { join, extname, isAbsolute, resolve as pathResolve, dirname } from 'path';
|
|
47
|
+
import { fileURLToPath } from 'url';
|
|
48
|
+
import { createHash } from 'crypto';
|
|
49
|
+
import {
|
|
50
|
+
loadScanState,
|
|
51
|
+
writeScanState,
|
|
52
|
+
shouldResume,
|
|
53
|
+
clearScanState,
|
|
54
|
+
acquireScanLock,
|
|
55
|
+
} from './scan-resume.js';
|
|
56
|
+
|
|
57
|
+
// --- Tunables --------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
const DOMAINS = ['software', 'book', 'content', 'business', 'design', 'mixed', 'unknown'];
|
|
60
|
+
|
|
61
|
+
// Hard guardrails. A 100k-file repo is the design target; we cap the walk
|
|
62
|
+
// well above that, and yield checkpoint state every CHECKPOINT_EVERY files
|
|
63
|
+
// so a crash never loses more than that slice of progress.
|
|
64
|
+
const MAX_FILES = 200000;
|
|
65
|
+
const MAX_DEPTH = 12;
|
|
66
|
+
const CHECKPOINT_EVERY = 500;
|
|
67
|
+
// P3-M3: time-budget guardrail. The walker checks Date.now() every
|
|
68
|
+
// TIME_BUDGET_CHECK_EVERY entries; if elapsed > timeBudgetMs the walk
|
|
69
|
+
// halts with scan_incomplete=true and persists state so the next session
|
|
70
|
+
// resumes. Default 5000ms; overridable via IJFW_DETECT_TIME_BUDGET_MS.
|
|
71
|
+
const DEFAULT_TIME_BUDGET_MS = 5000;
|
|
72
|
+
const TIME_BUDGET_CHECK_EVERY = 1000;
|
|
73
|
+
|
|
74
|
+
// Directories we never walk into. Cuts node_modules / venv noise without
|
|
75
|
+
// changing the signal balance for any of the 7 domains.
|
|
76
|
+
const SKIP_DIRS = new Set([
|
|
77
|
+
'.git', '.hg', '.svn', '.ijfw', '.planning', '.cache',
|
|
78
|
+
'node_modules', 'dist', 'build', 'out', 'target', '.next',
|
|
79
|
+
'__pycache__', '.venv', 'venv', 'env',
|
|
80
|
+
'.pytest_cache', '.mypy_cache', '.tox',
|
|
81
|
+
'.gradle', '.idea', '.vscode',
|
|
82
|
+
'vendor', 'bower_components',
|
|
83
|
+
]);
|
|
84
|
+
|
|
85
|
+
// File-extension classifier. Each match contributes weight to the matching
|
|
86
|
+
// domain bucket; ratios then drive the per-domain confidence score. "code"
|
|
87
|
+
// is split across software (heavy) and content/business (light) because a
|
|
88
|
+
// content site can carry a build script without becoming "software".
|
|
89
|
+
const EXT_DOMAIN = {
|
|
90
|
+
// software (heavy)
|
|
91
|
+
'.js': 'software', '.jsx': 'software', '.ts': 'software', '.tsx': 'software',
|
|
92
|
+
'.mjs': 'software', '.cjs': 'software',
|
|
93
|
+
'.py': 'software', '.rs': 'software', '.go': 'software',
|
|
94
|
+
'.java': 'software', '.kt': 'software', '.scala': 'software',
|
|
95
|
+
'.rb': 'software', '.php': 'software',
|
|
96
|
+
'.c': 'software', '.cc': 'software', '.cpp': 'software', '.h': 'software',
|
|
97
|
+
'.hpp': 'software', '.hh': 'software',
|
|
98
|
+
'.swift': 'software', '.m': 'software', '.mm': 'software',
|
|
99
|
+
'.cs': 'software', '.fs': 'software',
|
|
100
|
+
'.lua': 'software', '.dart': 'software', '.zig': 'software',
|
|
101
|
+
// book / long-form prose
|
|
102
|
+
'.tex': 'book', '.bib': 'book', '.latex': 'book',
|
|
103
|
+
'.epub': 'book', '.mobi': 'book',
|
|
104
|
+
// content / blog / docs / marketing
|
|
105
|
+
'.mdx': 'content', '.markdown': 'content', '.rst': 'content',
|
|
106
|
+
// design / assets
|
|
107
|
+
'.fig': 'design', '.sketch': 'design', '.xd': 'design',
|
|
108
|
+
'.ai': 'design', '.psd': 'design', '.indd': 'design',
|
|
109
|
+
'.svg': 'design', '.afdesign': 'design', '.afphoto': 'design',
|
|
110
|
+
// business / ops
|
|
111
|
+
'.xlsx': 'business', '.xls': 'business', '.csv': 'business',
|
|
112
|
+
'.numbers': 'business', '.ods': 'business',
|
|
113
|
+
'.pptx': 'business', '.ppt': 'business', '.key': 'business',
|
|
114
|
+
'.docx': 'business', '.doc': 'business',
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
// Manifest signals. Presence at any depth <= 2 is a strong vote for software.
|
|
118
|
+
const SOFTWARE_MANIFESTS = [
|
|
119
|
+
'package.json', 'Cargo.toml', 'pyproject.toml', 'setup.py', 'Gemfile',
|
|
120
|
+
'go.mod', 'pom.xml', 'build.gradle', 'build.gradle.kts',
|
|
121
|
+
'composer.json', 'Package.swift', 'mix.exs', 'rebar.config',
|
|
122
|
+
'pubspec.yaml', 'CMakeLists.txt', 'Makefile',
|
|
123
|
+
];
|
|
124
|
+
|
|
125
|
+
// Directory-name signals. These count when they exist near the root.
|
|
126
|
+
const BOOK_DIRS = ['manuscripts', 'manuscript', 'drafts', 'draft', 'chapters', 'book'];
|
|
127
|
+
const CONTENT_DIRS = ['content', 'posts', 'articles', 'blog', 'newsletter', 'social'];
|
|
128
|
+
const BUSINESS_DIRS = ['strategy', 'financials', 'finance', 'ops', 'runbooks', 'sop', 'sops', 'ops-runbooks'];
|
|
129
|
+
const DESIGN_DIRS = ['designs', 'design', 'assets', 'mockups', 'wireframes', 'figma'];
|
|
130
|
+
|
|
131
|
+
// Filename patterns that boost a domain (regex tested against basename).
|
|
132
|
+
const FILENAME_PATTERNS = [
|
|
133
|
+
{ re: /^chapter[-_]?\d+/i, domain: 'book', weight: 0.4 },
|
|
134
|
+
{ re: /^ch\d+/i, domain: 'book', weight: 0.3 },
|
|
135
|
+
{ re: /^brand[-_]voice/i, domain: 'content', weight: 0.4 },
|
|
136
|
+
{ re: /^seo[-_]/i, domain: 'content', weight: 0.2 },
|
|
137
|
+
{ re: /^post[-_]/i, domain: 'content', weight: 0.2 },
|
|
138
|
+
{ re: /^figma[-_]export/i, domain: 'design', weight: 0.4 },
|
|
139
|
+
{ re: /^wireframe/i, domain: 'design', weight: 0.3 },
|
|
140
|
+
];
|
|
141
|
+
|
|
142
|
+
// --- Public API ------------------------------------------------------------
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* detect(projectRoot, options) -> result
|
|
146
|
+
*
|
|
147
|
+
* Walks projectRoot honouring SKIP_DIRS, MAX_DEPTH, MAX_FILES. Reads existing
|
|
148
|
+
* AGENTS.md frontmatter + .ijfw/memory/brief.md frontmatter for higher-trust
|
|
149
|
+
* signals. Returns a multi-type result shape (V3-F4):
|
|
150
|
+
*
|
|
151
|
+
* {
|
|
152
|
+
* primary_type, secondary_types: [], confidence,
|
|
153
|
+
* scan_incomplete, detected_at, signals: [...],
|
|
154
|
+
* fallback_reason: null | 'c9_unavailable',
|
|
155
|
+
* type, // legacy "single label" alias = primary_type
|
|
156
|
+
* file_tree_hash, // for cache invalidation
|
|
157
|
+
* branch_hash // for cache invalidation
|
|
158
|
+
* }
|
|
159
|
+
*
|
|
160
|
+
* options:
|
|
161
|
+
* - explicitType string signal #1 (1.0 confidence)
|
|
162
|
+
* - c9Available bool when false, file-tree confidence caps at 0.7
|
|
163
|
+
* - maxFiles number override walk cap (tests use small caps)
|
|
164
|
+
* - sessionId string threaded into scan-state for forensics
|
|
165
|
+
* - resume bool honour scan-resume; default true
|
|
166
|
+
*/
|
|
167
|
+
export function detect(projectRoot, options = {}) {
|
|
168
|
+
const root = String(projectRoot || process.cwd());
|
|
169
|
+
// P3-M2: when the caller doesn't pass c9Available explicitly, run a
|
|
170
|
+
// sync availability probe (existsSync of compute/fts5.js) so the
|
|
171
|
+
// confidence cap auto-engages on installs that ship without the C9
|
|
172
|
+
// backend. Cached for the session lifetime.
|
|
173
|
+
const c9Available = options.c9Available === false
|
|
174
|
+
? false
|
|
175
|
+
: (options.c9Available === true ? true : isC9AvailableSync());
|
|
176
|
+
const maxFiles = Number.isFinite(options.maxFiles) && options.maxFiles > 0
|
|
177
|
+
? options.maxFiles
|
|
178
|
+
: MAX_FILES;
|
|
179
|
+
|
|
180
|
+
const signals = [];
|
|
181
|
+
const fallbackReason = c9Available ? null : 'c9_unavailable';
|
|
182
|
+
|
|
183
|
+
// --- Signal #1: explicit user declaration (1.0) -------------------------
|
|
184
|
+
if (options.explicitType && DOMAINS.includes(String(options.explicitType))) {
|
|
185
|
+
signals.push({ kind: 'user_declaration', weight: 1.0, value: options.explicitType });
|
|
186
|
+
return finalize({
|
|
187
|
+
primary: options.explicitType,
|
|
188
|
+
secondary: [],
|
|
189
|
+
score: 1.0,
|
|
190
|
+
signals,
|
|
191
|
+
scanIncomplete: false,
|
|
192
|
+
fallbackReason,
|
|
193
|
+
treeHash: '',
|
|
194
|
+
branchHash: branchHash(root),
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// --- Signal #2: AGENTS.md frontmatter (0.9) -----------------------------
|
|
199
|
+
const fmAgents = readFrontmatterType(join(root, 'AGENTS.md'));
|
|
200
|
+
if (fmAgents && DOMAINS.includes(fmAgents)) {
|
|
201
|
+
signals.push({ kind: 'agents_md_frontmatter', weight: 0.9, value: fmAgents });
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// --- Signal #3: brief.md frontmatter (0.8) ------------------------------
|
|
205
|
+
const fmBrief = readFrontmatterType(join(root, '.ijfw', 'memory', 'brief.md'));
|
|
206
|
+
if (fmBrief && DOMAINS.includes(fmBrief)) {
|
|
207
|
+
signals.push({ kind: 'brief_md_frontmatter', weight: 0.8, value: fmBrief });
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
// --- Signal #4: file-tree walk (0.6 - 0.75 raw; capped 0.7 in fallback) -
|
|
211
|
+
const timeBudgetMs = resolveTimeBudgetMs(options);
|
|
212
|
+
const walk = walkProject(root, { maxFiles, maxDepth: MAX_DEPTH, options, timeBudgetMs });
|
|
213
|
+
const treeHash = fileTreeHash(walk.fingerprint);
|
|
214
|
+
|
|
215
|
+
// Manifest votes -- presence is a strong software signal.
|
|
216
|
+
if (walk.manifestsFound.length > 0) {
|
|
217
|
+
signals.push({
|
|
218
|
+
kind: 'manifest',
|
|
219
|
+
weight: 0.9,
|
|
220
|
+
manifests: walk.manifestsFound.slice(0, 6),
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
// Directory-name votes.
|
|
225
|
+
for (const d of walk.dirHits.book) signals.push({ kind: 'dir_book', weight: 0.4, name: d });
|
|
226
|
+
for (const d of walk.dirHits.content) signals.push({ kind: 'dir_content', weight: 0.4, name: d });
|
|
227
|
+
for (const d of walk.dirHits.business) signals.push({ kind: 'dir_business', weight: 0.4, name: d });
|
|
228
|
+
for (const d of walk.dirHits.design) signals.push({ kind: 'dir_design', weight: 0.4, name: d });
|
|
229
|
+
|
|
230
|
+
// File-extension ratio -- the workhorse fallback signal.
|
|
231
|
+
const totals = walk.extTotals;
|
|
232
|
+
const totalClassified = Object.values(totals).reduce((a, b) => a + b, 0);
|
|
233
|
+
if (totalClassified > 0) {
|
|
234
|
+
for (const [domain, count] of Object.entries(totals)) {
|
|
235
|
+
const ratio = count / totalClassified;
|
|
236
|
+
if (ratio >= 0.05) {
|
|
237
|
+
signals.push({
|
|
238
|
+
kind: 'file_extension_ratio',
|
|
239
|
+
weight: 0.7,
|
|
240
|
+
domain,
|
|
241
|
+
ratio: Number(ratio.toFixed(3)),
|
|
242
|
+
count,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Filename pattern boosts.
|
|
249
|
+
for (const hit of walk.patternHits) {
|
|
250
|
+
signals.push({ kind: 'filename_pattern', weight: hit.weight, domain: hit.domain, name: hit.name });
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// --- Score reconciliation ----------------------------------------------
|
|
254
|
+
const scoreboard = scoreSignals(signals);
|
|
255
|
+
const ranked = rankDomains(scoreboard);
|
|
256
|
+
|
|
257
|
+
let primary;
|
|
258
|
+
let secondary = [];
|
|
259
|
+
let confidence;
|
|
260
|
+
|
|
261
|
+
if (ranked.length === 0) {
|
|
262
|
+
primary = 'unknown';
|
|
263
|
+
confidence = 0;
|
|
264
|
+
} else {
|
|
265
|
+
primary = ranked[0].domain;
|
|
266
|
+
confidence = ranked[0].score;
|
|
267
|
+
secondary = ranked
|
|
268
|
+
.slice(1)
|
|
269
|
+
.filter((r) => r.score >= 0.4 && r.domain !== primary)
|
|
270
|
+
.map((r) => r.domain);
|
|
271
|
+
// Two distinct strong domains -> "mixed" surfaces as primary, with the
|
|
272
|
+
// two top contributors as secondary so consumers retain the detail.
|
|
273
|
+
// Threshold: top score >= 0.55, second score >= 0.5, AND second is
|
|
274
|
+
// within 25% of top. The third clause keeps a clearly-dominant primary
|
|
275
|
+
// out of mixed (e.g. a software repo with light docs stays "software").
|
|
276
|
+
if (
|
|
277
|
+
ranked.length >= 2 &&
|
|
278
|
+
ranked[0].score >= 0.55 &&
|
|
279
|
+
ranked[1].score >= 0.5 &&
|
|
280
|
+
ranked[1].score / ranked[0].score >= 0.75
|
|
281
|
+
) {
|
|
282
|
+
const topTwo = [ranked[0].domain, ranked[1].domain];
|
|
283
|
+
secondary = topTwo;
|
|
284
|
+
primary = 'mixed';
|
|
285
|
+
confidence = Math.min(0.85, (ranked[0].score + ranked[1].score) / 2);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// V3 fallback cap: when C9 is unavailable, file-tree-only confidence caps
|
|
290
|
+
// at 0.7 so downstream consumers see "ask the user" territory. High-trust
|
|
291
|
+
// signals (user declaration, frontmatter) bypass the cap.
|
|
292
|
+
const highTrust = signals.some(
|
|
293
|
+
(s) =>
|
|
294
|
+
s.kind === 'user_declaration' ||
|
|
295
|
+
s.kind === 'agents_md_frontmatter' ||
|
|
296
|
+
s.kind === 'brief_md_frontmatter',
|
|
297
|
+
);
|
|
298
|
+
if (!c9Available && !highTrust && confidence > 0.7) confidence = 0.7;
|
|
299
|
+
|
|
300
|
+
// scan_incomplete flag -- if the walker tripped a guardrail, downstream
|
|
301
|
+
// surfaces should prompt the user rather than silently trust the result.
|
|
302
|
+
const scanIncomplete = walk.incomplete;
|
|
303
|
+
|
|
304
|
+
// Persist scan state on incomplete walks so the next session can resume.
|
|
305
|
+
// P3-H3: persist accumulated partial state so resume continues to add to
|
|
306
|
+
// counters/lists rather than restarting at zero. P3-M6: acquire the
|
|
307
|
+
// scan-state lock so two concurrent detect() calls never RMW the
|
|
308
|
+
// attempts counter unsafely. If the lock is held by another live
|
|
309
|
+
// writer, skip the persist -- their write covers the same forward
|
|
310
|
+
// progress just as accurately.
|
|
311
|
+
if (scanIncomplete) {
|
|
312
|
+
const lock = acquireScanLock(root);
|
|
313
|
+
if (lock) {
|
|
314
|
+
try {
|
|
315
|
+
const prior = loadScanState(root) || {};
|
|
316
|
+
writeScanState(root, {
|
|
317
|
+
scan_id: prior.scan_id || newScanId(),
|
|
318
|
+
started_at: prior.started_at || new Date().toISOString(),
|
|
319
|
+
last_path_walked: walk.lastPathWalked,
|
|
320
|
+
files_scanned: walk.filesScanned,
|
|
321
|
+
total_estimate: walk.totalEstimate,
|
|
322
|
+
attempts: (prior.attempts || 0) + 1,
|
|
323
|
+
incomplete: true,
|
|
324
|
+
session_id: options.sessionId || null,
|
|
325
|
+
partial: snapshotPartial(walk),
|
|
326
|
+
});
|
|
327
|
+
} catch { /* best-effort; never throw from detect() */ }
|
|
328
|
+
finally { lock.released(); }
|
|
329
|
+
}
|
|
330
|
+
} else {
|
|
331
|
+
try { clearScanState(root); } catch { /* best-effort */ }
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
return finalize({
|
|
335
|
+
primary,
|
|
336
|
+
secondary,
|
|
337
|
+
score: confidence,
|
|
338
|
+
signals,
|
|
339
|
+
scanIncomplete,
|
|
340
|
+
fallbackReason,
|
|
341
|
+
treeHash,
|
|
342
|
+
branchHash: branchHash(root),
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* loadProjectType(projectRoot) -> object | null
|
|
348
|
+
*
|
|
349
|
+
* Reads <project>/.ijfw/project.type if present and parseable. P3-H2:
|
|
350
|
+
* recomputes the cheap-tier hashes (top-level path sample + branch hash)
|
|
351
|
+
* and compares to the cached values; mismatch returns null so the caller
|
|
352
|
+
* forces a re-detect. P3-M1: a cached scan_incomplete=true result is
|
|
353
|
+
* surfaced for debugging via the file but loadProjectType returns null
|
|
354
|
+
* so consumers don't silently trust a partial walk.
|
|
355
|
+
*/
|
|
356
|
+
export function loadProjectType(projectRoot) {
|
|
357
|
+
const root = String(projectRoot);
|
|
358
|
+
const path = join(root, '.ijfw', 'project.type');
|
|
359
|
+
if (!existsSync(path)) return null;
|
|
360
|
+
let parsed = null;
|
|
361
|
+
try {
|
|
362
|
+
const raw = readFileSync(path, 'utf8');
|
|
363
|
+
parsed = JSON.parse(raw);
|
|
364
|
+
} catch { return null; }
|
|
365
|
+
if (!parsed || typeof parsed !== 'object') return null;
|
|
366
|
+
|
|
367
|
+
// P3-M1: incomplete walks are kept on disk for forensic inspection but
|
|
368
|
+
// never returned to consumers as a fresh result.
|
|
369
|
+
if (parsed.scan_incomplete === true) return null;
|
|
370
|
+
|
|
371
|
+
// P3-H2: cheap-tier cache invalidation. File-tree fingerprint is the
|
|
372
|
+
// first 4096 entries of a sorted relative-path walk -- bounded, fast,
|
|
373
|
+
// exactly the input fileTreeHash() consumed when the cache was written.
|
|
374
|
+
// Branch hash is the .git/HEAD content (or worktree pointer per P3-M5).
|
|
375
|
+
try {
|
|
376
|
+
if (typeof parsed.file_tree_hash === 'string' && parsed.file_tree_hash.length > 0) {
|
|
377
|
+
const liveTree = cheapTreeHash(root);
|
|
378
|
+
if (liveTree && liveTree !== parsed.file_tree_hash) return null;
|
|
379
|
+
}
|
|
380
|
+
if (typeof parsed.branch_hash === 'string' && parsed.branch_hash.length > 0) {
|
|
381
|
+
const liveBranch = branchHash(root);
|
|
382
|
+
if (liveBranch && liveBranch !== parsed.branch_hash) return null;
|
|
383
|
+
}
|
|
384
|
+
} catch { /* invalidation is best-effort; on error trust the cache */ }
|
|
385
|
+
|
|
386
|
+
return parsed;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* writeProjectType(projectRoot, result) -> string (path)
|
|
391
|
+
*
|
|
392
|
+
* Atomic tmp + rename. Creates .ijfw/ if missing. POSIX rename(2) is atomic
|
|
393
|
+
* on the same filesystem, so a kill mid-write leaves the prior file intact.
|
|
394
|
+
* P3-H5: cross-mount symlink layouts (.ijfw/ pointing to a different fs)
|
|
395
|
+
* raise EXDEV from rename; we fall back to copyFile + unlink so dotfile
|
|
396
|
+
* setups still get a durable write.
|
|
397
|
+
*/
|
|
398
|
+
export function writeProjectType(projectRoot, result) {
|
|
399
|
+
const root = String(projectRoot);
|
|
400
|
+
const dir = join(root, '.ijfw');
|
|
401
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
402
|
+
const finalPath = join(dir, 'project.type');
|
|
403
|
+
const tmpPath = `${finalPath}.tmp.${process.pid}.${Date.now()}`;
|
|
404
|
+
const json = JSON.stringify(result, null, 2) + '\n';
|
|
405
|
+
writeFileSync(tmpPath, json, 'utf8');
|
|
406
|
+
atomicRename(tmpPath, finalPath);
|
|
407
|
+
return finalPath;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// P3-H5: shared rename-with-EXDEV-fallback helper. Same behaviour as
|
|
411
|
+
// renameSync on a same-fs target, falls back to copy+unlink on EXDEV.
|
|
412
|
+
function atomicRename(tmpPath, finalPath) {
|
|
413
|
+
try {
|
|
414
|
+
renameSync(tmpPath, finalPath);
|
|
415
|
+
return;
|
|
416
|
+
} catch (err) {
|
|
417
|
+
if (!err || err.code !== 'EXDEV') throw err;
|
|
418
|
+
}
|
|
419
|
+
try {
|
|
420
|
+
copyFileSync(tmpPath, finalPath);
|
|
421
|
+
} finally {
|
|
422
|
+
try { unlinkSync(tmpPath); } catch { /* best-effort */ }
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// --- Internals -------------------------------------------------------------
|
|
427
|
+
|
|
428
|
+
function finalize({ primary, secondary, score, signals, scanIncomplete, fallbackReason, treeHash, branchHash: bh }) {
|
|
429
|
+
const confidence = Number(Math.max(0, Math.min(1, score)).toFixed(3));
|
|
430
|
+
const out = {
|
|
431
|
+
type: primary, // single-label alias for hoist
|
|
432
|
+
primary_type: primary,
|
|
433
|
+
secondary_types: Array.isArray(secondary) ? secondary : [],
|
|
434
|
+
confidence,
|
|
435
|
+
scan_incomplete: !!scanIncomplete,
|
|
436
|
+
detected_at: new Date().toISOString(),
|
|
437
|
+
signals,
|
|
438
|
+
fallback_reason: fallbackReason,
|
|
439
|
+
file_tree_hash: treeHash || '',
|
|
440
|
+
branch_hash: bh || '',
|
|
441
|
+
};
|
|
442
|
+
return out;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
function readFrontmatterType(path) {
|
|
446
|
+
if (!existsSync(path)) return null;
|
|
447
|
+
let src;
|
|
448
|
+
try { src = readFileSync(path, 'utf8'); } catch { return null; }
|
|
449
|
+
if (!src.startsWith('---\n')) return null;
|
|
450
|
+
const after = src.slice(4);
|
|
451
|
+
const closeIdx = after.search(/\n---\s*(?:\r?\n|$)/);
|
|
452
|
+
if (closeIdx < 0) return null;
|
|
453
|
+
const fm = after.slice(0, closeIdx);
|
|
454
|
+
for (const ln of fm.split(/\r?\n/)) {
|
|
455
|
+
const m = ln.match(/^type\s*:\s*(\S+)\s*$/);
|
|
456
|
+
if (m) {
|
|
457
|
+
const v = m[1].replace(/^["']|["']$/g, '');
|
|
458
|
+
return v;
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
return null;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
function walkProject(root, { maxFiles, maxDepth, options, timeBudgetMs }) {
|
|
465
|
+
const out = {
|
|
466
|
+
filesScanned: 0,
|
|
467
|
+
totalEstimate: 0,
|
|
468
|
+
incomplete: false,
|
|
469
|
+
lastPathWalked: '',
|
|
470
|
+
fingerprint: [],
|
|
471
|
+
manifestsFound: [],
|
|
472
|
+
dirHits: { book: [], content: [], business: [], design: [] },
|
|
473
|
+
extTotals: {},
|
|
474
|
+
patternHits: [],
|
|
475
|
+
};
|
|
476
|
+
|
|
477
|
+
// P3-H3: resume merges accumulated state. shouldResume() gates on
|
|
478
|
+
// incomplete + young + under attempt cap; if the prior state has a
|
|
479
|
+
// sentinel that is no longer reachable, the walk simply never sees it
|
|
480
|
+
// and produces a fresh full pass (the "restart from scratch" branch).
|
|
481
|
+
let resumeFrom = null;
|
|
482
|
+
let priorState = null;
|
|
483
|
+
if (options.resume !== false) {
|
|
484
|
+
const state = loadScanState(root);
|
|
485
|
+
if (state && shouldResume(state)) {
|
|
486
|
+
resumeFrom = state.last_path_walked || null;
|
|
487
|
+
priorState = state;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
// Hydrate accumulated counters/lists from the prior partial scan so the
|
|
491
|
+
// resumed walk continues to add to them rather than starting at zero.
|
|
492
|
+
if (priorState && priorState.partial && typeof priorState.partial === 'object') {
|
|
493
|
+
const p = priorState.partial;
|
|
494
|
+
out.filesScanned = Number.isFinite(p.files_scanned) ? p.files_scanned : 0;
|
|
495
|
+
out.totalEstimate = Number.isFinite(p.total_estimate) ? p.total_estimate : out.filesScanned;
|
|
496
|
+
if (Array.isArray(p.fingerprint)) out.fingerprint = p.fingerprint.slice(0, 4096);
|
|
497
|
+
if (Array.isArray(p.manifestsFound)) out.manifestsFound = p.manifestsFound.slice();
|
|
498
|
+
if (p.dirHits && typeof p.dirHits === 'object') {
|
|
499
|
+
for (const k of ['book', 'content', 'business', 'design']) {
|
|
500
|
+
if (Array.isArray(p.dirHits[k])) out.dirHits[k] = p.dirHits[k].slice();
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
if (p.extTotals && typeof p.extTotals === 'object') out.extTotals = { ...p.extTotals };
|
|
504
|
+
if (Array.isArray(p.patternHits)) out.patternHits = p.patternHits.slice();
|
|
505
|
+
}
|
|
506
|
+
let resumed = !resumeFrom;
|
|
507
|
+
|
|
508
|
+
// P3-M4: track visited real-paths so circular symlinks never loop.
|
|
509
|
+
const visitedDirs = new Set();
|
|
510
|
+
try {
|
|
511
|
+
visitedDirs.add(realpathSync.native(root));
|
|
512
|
+
} catch { /* root may not resolve in odd test setups; tolerate */ }
|
|
513
|
+
|
|
514
|
+
// P3-M3: time-budget guardrail.
|
|
515
|
+
const startedAt = Date.now();
|
|
516
|
+
const budget = Number.isFinite(timeBudgetMs) && timeBudgetMs > 0 ? timeBudgetMs : DEFAULT_TIME_BUDGET_MS;
|
|
517
|
+
let entriesSinceTimeCheck = 0;
|
|
518
|
+
|
|
519
|
+
// Iterative DFS so we don't blow the stack on deep trees.
|
|
520
|
+
const stack = [{ path: root, depth: 0 }];
|
|
521
|
+
while (stack.length > 0) {
|
|
522
|
+
const { path, depth } = stack.pop();
|
|
523
|
+
if (depth > maxDepth) continue;
|
|
524
|
+
|
|
525
|
+
let entries;
|
|
526
|
+
try {
|
|
527
|
+
entries = readdirSync(path, { withFileTypes: true });
|
|
528
|
+
} catch { continue; }
|
|
529
|
+
|
|
530
|
+
// Sort for deterministic walks -- same inputs produce identical
|
|
531
|
+
// file_tree_hash + same lastPathWalked checkpoint sequence.
|
|
532
|
+
entries.sort((a, b) => (a.name < b.name ? -1 : a.name > b.name ? 1 : 0));
|
|
533
|
+
|
|
534
|
+
for (const entry of entries) {
|
|
535
|
+
const childPath = join(path, entry.name);
|
|
536
|
+
|
|
537
|
+
if (!resumed) {
|
|
538
|
+
if (childPath === resumeFrom) resumed = true;
|
|
539
|
+
// Skip ahead until we reach the resume sentinel; record the entry so
|
|
540
|
+
// we keep producing identical fingerprints across resumes.
|
|
541
|
+
continue;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
out.lastPathWalked = childPath;
|
|
545
|
+
|
|
546
|
+
// P3-M3: poll the wall clock periodically. Every TIME_BUDGET_CHECK_EVERY
|
|
547
|
+
// entries we compare elapsed to budget; on overrun we mark incomplete
|
|
548
|
+
// and return. Persisting state happens up in detect().
|
|
549
|
+
entriesSinceTimeCheck += 1;
|
|
550
|
+
if (entriesSinceTimeCheck >= TIME_BUDGET_CHECK_EVERY) {
|
|
551
|
+
entriesSinceTimeCheck = 0;
|
|
552
|
+
if (Date.now() - startedAt > budget) {
|
|
553
|
+
out.incomplete = true;
|
|
554
|
+
return out;
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
if (entry.isDirectory()) {
|
|
559
|
+
if (SKIP_DIRS.has(entry.name)) continue;
|
|
560
|
+
// P3-M4: skip directories whose real-path we've already visited.
|
|
561
|
+
try {
|
|
562
|
+
const real = realpathSync.native(childPath);
|
|
563
|
+
if (visitedDirs.has(real)) continue;
|
|
564
|
+
visitedDirs.add(real);
|
|
565
|
+
} catch { /* unreadable; let the readdir error path handle it */ }
|
|
566
|
+
recordDirHit(out, entry.name, depth);
|
|
567
|
+
stack.push({ path: childPath, depth: depth + 1 });
|
|
568
|
+
continue;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
if (!entry.isFile()) continue;
|
|
572
|
+
|
|
573
|
+
out.filesScanned += 1;
|
|
574
|
+
out.totalEstimate = Math.max(out.totalEstimate, out.filesScanned);
|
|
575
|
+
|
|
576
|
+
// Fingerprint: cheap sample to keep the hash stable but cap memory.
|
|
577
|
+
// We keep the relative path only -- mtime is intentionally excluded.
|
|
578
|
+
if (out.fingerprint.length < 4096) {
|
|
579
|
+
out.fingerprint.push(childPath.slice(root.length + 1));
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
// Manifest detection (depth <= 2).
|
|
583
|
+
if (depth <= 2 && SOFTWARE_MANIFESTS.includes(entry.name)) {
|
|
584
|
+
out.manifestsFound.push(entry.name);
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
// Filename pattern.
|
|
588
|
+
for (const p of FILENAME_PATTERNS) {
|
|
589
|
+
if (p.re.test(entry.name)) {
|
|
590
|
+
out.patternHits.push({ name: entry.name, domain: p.domain, weight: p.weight });
|
|
591
|
+
break;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// Extension classifier.
|
|
596
|
+
const ext = extname(entry.name).toLowerCase();
|
|
597
|
+
const dom = EXT_DOMAIN[ext];
|
|
598
|
+
if (dom) {
|
|
599
|
+
out.extTotals[dom] = (out.extTotals[dom] || 0) + 1;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Periodic checkpoint write so a crash never loses more than
|
|
603
|
+
// CHECKPOINT_EVERY files of progress. P3-H3: persist accumulated
|
|
604
|
+
// partial state so the resumed walk can keep adding to it.
|
|
605
|
+
if (out.filesScanned % CHECKPOINT_EVERY === 0) {
|
|
606
|
+
try {
|
|
607
|
+
writeScanState(root, {
|
|
608
|
+
scan_id: newScanId(),
|
|
609
|
+
started_at: new Date().toISOString(),
|
|
610
|
+
last_path_walked: childPath,
|
|
611
|
+
files_scanned: out.filesScanned,
|
|
612
|
+
total_estimate: out.totalEstimate,
|
|
613
|
+
attempts: 1,
|
|
614
|
+
incomplete: true,
|
|
615
|
+
partial: snapshotPartial(out),
|
|
616
|
+
});
|
|
617
|
+
} catch { /* best-effort */ }
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
if (out.filesScanned >= maxFiles) {
|
|
621
|
+
out.incomplete = true;
|
|
622
|
+
return out;
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
return out;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
function snapshotPartial(out) {
|
|
630
|
+
// Compact serialisable view of the accumulated walk state. Keeps the
|
|
631
|
+
// checkpoint readable without bloating scan-state.json.
|
|
632
|
+
return {
|
|
633
|
+
files_scanned: out.filesScanned,
|
|
634
|
+
total_estimate: out.totalEstimate,
|
|
635
|
+
fingerprint: out.fingerprint.slice(0, 4096),
|
|
636
|
+
manifestsFound: out.manifestsFound.slice(0, 32),
|
|
637
|
+
dirHits: {
|
|
638
|
+
book: out.dirHits.book.slice(0, 32),
|
|
639
|
+
content: out.dirHits.content.slice(0, 32),
|
|
640
|
+
business: out.dirHits.business.slice(0, 32),
|
|
641
|
+
design: out.dirHits.design.slice(0, 32),
|
|
642
|
+
},
|
|
643
|
+
extTotals: { ...out.extTotals },
|
|
644
|
+
patternHits: out.patternHits.slice(0, 64),
|
|
645
|
+
};
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
function resolveTimeBudgetMs(options) {
|
|
649
|
+
if (Number.isFinite(options.timeBudgetMs) && options.timeBudgetMs > 0) {
|
|
650
|
+
return options.timeBudgetMs;
|
|
651
|
+
}
|
|
652
|
+
const env = process.env.IJFW_DETECT_TIME_BUDGET_MS;
|
|
653
|
+
if (env) {
|
|
654
|
+
const n = Number(env);
|
|
655
|
+
if (Number.isFinite(n) && n > 0) return n;
|
|
656
|
+
}
|
|
657
|
+
return DEFAULT_TIME_BUDGET_MS;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
function recordDirHit(out, name, depth) {
|
|
661
|
+
// Dir hits only count near the root; deeper hits add noise without
|
|
662
|
+
// shifting domain confidence reliably.
|
|
663
|
+
if (depth > 2) return;
|
|
664
|
+
const lower = name.toLowerCase();
|
|
665
|
+
if (BOOK_DIRS.includes(lower)) out.dirHits.book.push(lower);
|
|
666
|
+
if (CONTENT_DIRS.includes(lower)) out.dirHits.content.push(lower);
|
|
667
|
+
if (BUSINESS_DIRS.includes(lower)) out.dirHits.business.push(lower);
|
|
668
|
+
if (DESIGN_DIRS.includes(lower)) out.dirHits.design.push(lower);
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
function scoreSignals(signals) {
|
|
672
|
+
const board = {
|
|
673
|
+
software: 0, book: 0, content: 0, business: 0, design: 0, mixed: 0, unknown: 0,
|
|
674
|
+
};
|
|
675
|
+
// Per-domain cap on cumulative filename_pattern contribution. Without
|
|
676
|
+
// this, a book with 8 "chapter-NN" files dominates over a same-sized
|
|
677
|
+
// companion code tree even when the user clearly authored both. Cap at
|
|
678
|
+
// 0.8 -- one or two pattern hits is the signal; more is repetition.
|
|
679
|
+
const patternBudget = { software: 0.8, book: 0.8, content: 0.8, business: 0.8, design: 0.8, mixed: 0.8, unknown: 0.8 };
|
|
680
|
+
// Cap on cumulative dir-name contribution per domain. A repo with both
|
|
681
|
+
// 'book' and 'manuscripts' shouldn't double-count; cap mirrors a single
|
|
682
|
+
// hit.
|
|
683
|
+
const dirBudget = { software: 0.6, book: 0.6, content: 0.6, business: 0.6, design: 0.6, mixed: 0.6, unknown: 0.6 };
|
|
684
|
+
for (const s of signals) {
|
|
685
|
+
if (s.kind === 'user_declaration' && s.value) board[s.value] += 1.0;
|
|
686
|
+
else if (s.kind === 'agents_md_frontmatter' && s.value) board[s.value] += 0.9;
|
|
687
|
+
else if (s.kind === 'brief_md_frontmatter' && s.value) board[s.value] += 0.8;
|
|
688
|
+
else if (s.kind === 'manifest') board.software += 0.9;
|
|
689
|
+
else if (s.kind === 'dir_book') {
|
|
690
|
+
const add = Math.min(0.4, dirBudget.book);
|
|
691
|
+
board.book += add; dirBudget.book -= add;
|
|
692
|
+
} else if (s.kind === 'dir_content') {
|
|
693
|
+
const add = Math.min(0.4, dirBudget.content);
|
|
694
|
+
board.content += add; dirBudget.content -= add;
|
|
695
|
+
} else if (s.kind === 'dir_business') {
|
|
696
|
+
const add = Math.min(0.4, dirBudget.business);
|
|
697
|
+
board.business += add; dirBudget.business -= add;
|
|
698
|
+
} else if (s.kind === 'dir_design') {
|
|
699
|
+
const add = Math.min(0.4, dirBudget.design);
|
|
700
|
+
board.design += add; dirBudget.design -= add;
|
|
701
|
+
} else if (s.kind === 'file_extension_ratio') {
|
|
702
|
+
// Ratio acts as a multiplier so a 0.78 software-extension share lands
|
|
703
|
+
// near the manifest weight without dwarfing it.
|
|
704
|
+
const m = s.ratio || 0;
|
|
705
|
+
board[s.domain] = (board[s.domain] || 0) + 0.7 * m;
|
|
706
|
+
} else if (s.kind === 'filename_pattern') {
|
|
707
|
+
const add = Math.min(s.weight, patternBudget[s.domain] || 0);
|
|
708
|
+
if (add > 0) {
|
|
709
|
+
board[s.domain] = (board[s.domain] || 0) + add;
|
|
710
|
+
patternBudget[s.domain] -= add;
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
return board;
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
function rankDomains(board) {
|
|
718
|
+
const arr = Object.entries(board)
|
|
719
|
+
.filter(([d]) => d !== 'mixed' && d !== 'unknown')
|
|
720
|
+
.map(([domain, raw]) => ({ domain, raw }));
|
|
721
|
+
if (arr.length === 0) return [];
|
|
722
|
+
|
|
723
|
+
// Normalize: divide each raw by the max so the top score lands at 1.0
|
|
724
|
+
// and others scale relative to it. Confidence is then re-anchored against
|
|
725
|
+
// a logistic-ish curve so a single weak signal doesn't accidentally read
|
|
726
|
+
// as 1.0 confidence.
|
|
727
|
+
const maxRaw = arr.reduce((m, e) => Math.max(m, e.raw), 0);
|
|
728
|
+
if (maxRaw <= 0) return [];
|
|
729
|
+
for (const e of arr) {
|
|
730
|
+
e.score = anchor(e.raw, maxRaw);
|
|
731
|
+
}
|
|
732
|
+
arr.sort((a, b) => b.score - a.score);
|
|
733
|
+
return arr;
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
function anchor(raw, maxRaw) {
|
|
737
|
+
// Top score reflects raw magnitude (capped at 0.95); others scale linearly.
|
|
738
|
+
// A raw of 1.0+ from manifest or strong frontmatter lands near 0.9+. A
|
|
739
|
+
// raw of 0.4 lands around 0.55.
|
|
740
|
+
if (raw <= 0) return 0;
|
|
741
|
+
const top = Math.min(0.95, 0.4 + 0.55 * Math.tanh(raw));
|
|
742
|
+
return Number((top * (raw / maxRaw)).toFixed(3));
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
function fileTreeHash(paths) {
|
|
746
|
+
if (!paths || paths.length === 0) return '';
|
|
747
|
+
const h = createHash('sha256');
|
|
748
|
+
for (const p of paths) h.update(p + '\n');
|
|
749
|
+
return h.digest('hex').slice(0, 16);
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
function branchHash(root) {
|
|
753
|
+
// Best-effort git branch read. Falls back to the empty string when git is
|
|
754
|
+
// unavailable or the project is not a git repo -- consumers treat empty
|
|
755
|
+
// as "no branch signal", not an error. P3-M5: worktrees use a `.git`
|
|
756
|
+
// file that contains a `gitdir: <path>` pointer to the actual git dir;
|
|
757
|
+
// resolve through it so HEAD reads succeed in worktrees.
|
|
758
|
+
try {
|
|
759
|
+
const dotGit = join(root, '.git');
|
|
760
|
+
if (!existsSync(dotGit)) return '';
|
|
761
|
+
|
|
762
|
+
let headPath = null;
|
|
763
|
+
let st;
|
|
764
|
+
try { st = statSync(dotGit); } catch { return ''; }
|
|
765
|
+
|
|
766
|
+
if (st.isDirectory()) {
|
|
767
|
+
headPath = join(dotGit, 'HEAD');
|
|
768
|
+
} else if (st.isFile()) {
|
|
769
|
+
const ptr = readFileSync(dotGit, 'utf8');
|
|
770
|
+
const m = ptr.match(/^gitdir:\s*(.+?)\s*$/m);
|
|
771
|
+
if (!m) return '';
|
|
772
|
+
const target = m[1];
|
|
773
|
+
const gitDir = isAbsolute(target) ? target : pathResolve(root, target);
|
|
774
|
+
headPath = join(gitDir, 'HEAD');
|
|
775
|
+
} else {
|
|
776
|
+
return '';
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
if (!headPath || !existsSync(headPath)) return '';
|
|
780
|
+
const head = readFileSync(headPath, 'utf8').trim();
|
|
781
|
+
const m = head.match(/^ref:\s*(.+)$/);
|
|
782
|
+
const branch = m ? m[1] : head;
|
|
783
|
+
return createHash('sha256').update(branch).digest('hex').slice(0, 16);
|
|
784
|
+
} catch { /* best-effort */ }
|
|
785
|
+
return '';
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
// Cheap-tier file-tree hash for cache invalidation (P3-H2). Mirrors the
|
|
789
|
+
// input that fileTreeHash() consumed when the result was first written:
|
|
790
|
+
// up to 4096 sorted relative paths from a breadth-bounded walk.
|
|
791
|
+
function cheapTreeHash(root) {
|
|
792
|
+
const collected = [];
|
|
793
|
+
const limit = 4096;
|
|
794
|
+
// Iterative DFS bounded by the same MAX_DEPTH + SKIP_DIRS as the full
|
|
795
|
+
// walker so results are byte-identical to the original write.
|
|
796
|
+
const stack = [{ path: root, depth: 0 }];
|
|
797
|
+
while (stack.length > 0 && collected.length < limit) {
|
|
798
|
+
const { path, depth } = stack.pop();
|
|
799
|
+
if (depth > MAX_DEPTH) continue;
|
|
800
|
+
let entries;
|
|
801
|
+
try { entries = readdirSync(path, { withFileTypes: true }); } catch { continue; }
|
|
802
|
+
entries.sort((a, b) => (a.name < b.name ? -1 : a.name > b.name ? 1 : 0));
|
|
803
|
+
for (const entry of entries) {
|
|
804
|
+
const childPath = join(path, entry.name);
|
|
805
|
+
if (entry.isDirectory()) {
|
|
806
|
+
if (SKIP_DIRS.has(entry.name)) continue;
|
|
807
|
+
stack.push({ path: childPath, depth: depth + 1 });
|
|
808
|
+
continue;
|
|
809
|
+
}
|
|
810
|
+
if (!entry.isFile()) continue;
|
|
811
|
+
collected.push(childPath.slice(root.length + 1));
|
|
812
|
+
if (collected.length >= limit) break;
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
return fileTreeHash(collected);
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
// P3-M2: lightweight C9 availability probe. The detector defaults to
|
|
819
|
+
// c9Available=true, but a missing FTS5 backend should auto-flip the flag
|
|
820
|
+
// so the caller doesn't silently get an unfiltered confidence score.
|
|
821
|
+
// Cached for the session's lifetime -- the probe is best-effort and the
|
|
822
|
+
// answer doesn't change once a process is running.
|
|
823
|
+
let _c9AvailableCache = null;
|
|
824
|
+
export async function isC9Available() {
|
|
825
|
+
if (_c9AvailableCache !== null) return _c9AvailableCache;
|
|
826
|
+
try {
|
|
827
|
+
await import('./compute/fts5.js');
|
|
828
|
+
_c9AvailableCache = true;
|
|
829
|
+
} catch {
|
|
830
|
+
_c9AvailableCache = false;
|
|
831
|
+
}
|
|
832
|
+
return _c9AvailableCache;
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
// Sync sibling for the synchronous detect() entrypoint -- existsSync of
|
|
836
|
+
// the compute/fts5.js module is a cheap proxy that matches the dynamic
|
|
837
|
+
// import outcome on every install layout we ship.
|
|
838
|
+
function isC9AvailableSync() {
|
|
839
|
+
if (_c9AvailableCache !== null) return _c9AvailableCache;
|
|
840
|
+
try {
|
|
841
|
+
const here = fileURLToPath(import.meta.url);
|
|
842
|
+
const fts5Path = join(dirname(here), 'compute', 'fts5.js');
|
|
843
|
+
_c9AvailableCache = existsSync(fts5Path);
|
|
844
|
+
} catch {
|
|
845
|
+
_c9AvailableCache = false;
|
|
846
|
+
}
|
|
847
|
+
return _c9AvailableCache;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
function newScanId() {
|
|
851
|
+
return createHash('sha256')
|
|
852
|
+
.update(String(process.pid) + ':' + String(Date.now()) + ':' + Math.random())
|
|
853
|
+
.digest('hex')
|
|
854
|
+
.slice(0, 12);
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
// Test-only export surface.
|
|
858
|
+
export const __test = {
|
|
859
|
+
scoreSignals,
|
|
860
|
+
rankDomains,
|
|
861
|
+
readFrontmatterType,
|
|
862
|
+
walkProject,
|
|
863
|
+
fileTreeHash,
|
|
864
|
+
EXT_DOMAIN,
|
|
865
|
+
SKIP_DIRS,
|
|
866
|
+
};
|