@ijfw/memory-server 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ijfw +27 -0
- package/bin/ijfw-dashboard +180 -0
- package/bin/ijfw-dispatch-plan +41 -0
- package/bin/ijfw-memorize +273 -0
- package/bin/ijfw-memory +51 -0
- package/fixtures/demo-target.js +28 -0
- package/package.json +53 -0
- package/src/api-client.js +190 -0
- package/src/audit-roster.js +315 -0
- package/src/caps.js +37 -0
- package/src/cold-scan-runner.mjs +37 -0
- package/src/compute/edges.js +155 -0
- package/src/compute/extract.js +560 -0
- package/src/compute/fts5.js +420 -0
- package/src/compute/graph-auto-index.js +191 -0
- package/src/compute/graph-lock.js +114 -0
- package/src/compute/index.js +18 -0
- package/src/compute/migration-runner.js +116 -0
- package/src/compute/migrations/001-initial.js +23 -0
- package/src/compute/migrations/002-porter-stemming-source.js +139 -0
- package/src/compute/migrations/003-tier-semantic.js +69 -0
- package/src/compute/migrations/004-kg-tables.js +83 -0
- package/src/compute/migrations/005-stale-candidate.js +72 -0
- package/src/compute/python-resolver.js +106 -0
- package/src/compute/runner-vm.js +185 -0
- package/src/compute/runner.js +416 -0
- package/src/compute/sandbox-detect.js +122 -0
- package/src/compute/sandbox-linux.js +164 -0
- package/src/compute/sandbox-macos.js +167 -0
- package/src/compute/sandbox-windows.js +63 -0
- package/src/compute/schema.sql +118 -0
- package/src/compute/staleness.js +239 -0
- package/src/compute/synonyms.js +367 -0
- package/src/compute/traverse.js +180 -0
- package/src/cost/aggregator.js +229 -0
- package/src/cost/pricing.js +134 -0
- package/src/cost/readers/claude.js +179 -0
- package/src/cost/readers/codex.js +131 -0
- package/src/cost/readers/gemini.js +111 -0
- package/src/cost/savings.js +243 -0
- package/src/cross-dispatcher.js +437 -0
- package/src/cross-orchestrator-cli.js +1885 -0
- package/src/cross-orchestrator.js +598 -0
- package/src/cross-project-search.js +114 -0
- package/src/dashboard-client.html +1180 -0
- package/src/dashboard-server.js +895 -0
- package/src/design-companion.js +81 -0
- package/src/dispatch/colon-syntax.js +732 -0
- package/src/dispatch-planner.js +235 -0
- package/src/dream/cooldown.js +105 -0
- package/src/dream/runner.mjs +373 -0
- package/src/dream/staleness-wiring.js +195 -0
- package/src/feedback-detector.js +57 -0
- package/src/hero-line.js +115 -0
- package/src/importers/claude-mem.js +152 -0
- package/src/importers/cli.js +311 -0
- package/src/importers/common.js +84 -0
- package/src/importers/discover.js +235 -0
- package/src/importers/rtk.js +107 -0
- package/src/intent-router.js +221 -0
- package/src/lib/atomic-io.js +201 -0
- package/src/lib/cache.js +33 -0
- package/src/lib/npm-view.js +104 -0
- package/src/lib/status-card.js +95 -0
- package/src/lib/token.js +85 -0
- package/src/memory/fts5.js +349 -0
- package/src/memory/migration-runner.js +116 -0
- package/src/memory/migrations/001-fts5-init.js +26 -0
- package/src/memory/migrations/002-tier-semantic.js +60 -0
- package/src/memory/migrations/003-stale-candidate.js +60 -0
- package/src/memory/reader.js +300 -0
- package/src/memory/recall-counter.js +76 -0
- package/src/memory/schema.sql +79 -0
- package/src/memory/search.js +431 -0
- package/src/memory/staleness.js +237 -0
- package/src/memory/tier-promotion.js +377 -0
- package/src/memory/tokenize.js +63 -0
- package/src/project-type-detector.js +866 -0
- package/src/prompt-check.js +171 -0
- package/src/ralph-allowlist.js +88 -0
- package/src/receipts.js +129 -0
- package/src/redactor.js +107 -0
- package/src/sandbox.js +275 -0
- package/src/sanitizer.js +69 -0
- package/src/scan-resume.js +167 -0
- package/src/schema.js +82 -0
- package/src/search-bm25.js +108 -0
- package/src/server.js +1414 -0
- package/src/swarm-config.js +80 -0
- package/src/trident/dispatch.js +211 -0
- package/src/trident/lens-health.js +253 -0
- package/src/update-apply.js +79 -0
- package/src/update-check.js +136 -0
- package/src/vectors.js +178 -0
- package/templates/design/bento-grid.md +84 -0
- package/templates/design/brutalist-luxe.md +82 -0
- package/templates/design/cinematic-dark.md +82 -0
- package/templates/design/data-dense-dashboard.md +88 -0
- package/templates/design/editorial-warm.md +81 -0
- package/templates/design/glassmorphic.md +84 -0
- package/templates/design/magazine-editorial.md +84 -0
- package/templates/design/maximalist-vibrant.md +85 -0
- package/templates/design/neo-swiss-tech.md +85 -0
- package/templates/design/swiss-minimal.md +80 -0
- package/templates/design/terminal-native.md +83 -0
- package/templates/design/warm-organic.md +84 -0
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
// IJFW v1.3.0 -- D2 entity extractor (regex-only, NO LLM).
|
|
2
|
+
//
|
|
3
|
+
// Source authority: PRD-v2 section 9 Pillar D D2 + .planning/1.3.0/D-PILLAR-SPEC.md sections 3 + 6.
|
|
4
|
+
//
|
|
5
|
+
// Pipeline order (D-PILLAR-SPEC section 3):
|
|
6
|
+
// observation arrives -> walk + extract entity candidates (regex,
|
|
7
|
+
// pre-redaction) -> classify each via redactor.classify() -> emit
|
|
8
|
+
// { kind, name, redacted } records. Edge formation (./edges.js) reads
|
|
9
|
+
// the `redacted` flag to skip secret-tainted entities per section 3.
|
|
10
|
+
//
|
|
11
|
+
// 5 kinds:
|
|
12
|
+
// - file posix paths (relative + absolute), windows paths,
|
|
13
|
+
// dotfiles, single-name files (Makefile, Dockerfile),
|
|
14
|
+
// paths with spaces, multi-extension
|
|
15
|
+
// - function camelCase, snake_case, Class.method (prototype, dunder,
|
|
16
|
+
// verb-prefix), bare standalone verbs from a small list
|
|
17
|
+
// - identifier UPPER_SNAKE constants, PascalCase classes/types/enums,
|
|
18
|
+
// React hooks (use*), Class.member (constants/properties)
|
|
19
|
+
// - error_code ERR_*, POSIX errno (E[A-Z]+ short), HTTP NNN (context-
|
|
20
|
+
// anchored), *Exception, *Error suffix, EXIT_*, PG_*,
|
|
21
|
+
// IJFW_E_*, custom UPPER suffix (_EXCEEDED, _TAKEN, ...)
|
|
22
|
+
// - decision d-<topic>-<...> (>=2 segments, first segment >=4 chars),
|
|
23
|
+
// #decision:<slug>, ADR-NNNN (4-digit), D<NN+> short id
|
|
24
|
+
//
|
|
25
|
+
// Negative-space coverage (rubric):
|
|
26
|
+
// - file-shaped prose without extension shouldn't match (we require an
|
|
27
|
+
// extension OR a known single-name basename)
|
|
28
|
+
// - bare verbs without parens / class context shouldn't match function
|
|
29
|
+
// - "d-day" prose shouldn't match decision (first-segment >=4 char rule)
|
|
30
|
+
// - "ADR-XXX" placeholder shouldn't match (4-digit numeric rule)
|
|
31
|
+
// - bare PascalCase mentioned once in passing shouldn't match
|
|
32
|
+
// (frequency >= 2 rule unless backed by I-prefix interface convention)
|
|
33
|
+
// - Class.method where RHS is a non-verb single word mentioned once
|
|
34
|
+
// shouldn't match (Logger.error -> rejected)
|
|
35
|
+
|
|
36
|
+
import { classify } from '../redactor.js';
|
|
37
|
+
|
|
38
|
+
// --- known single-name files (no extension) ----------------------------
|
|
39
|
+
const KNOWN_SINGLE_FILES = new Set([
|
|
40
|
+
'Makefile', 'Dockerfile', 'Procfile', 'Gemfile', 'Rakefile', 'Vagrantfile',
|
|
41
|
+
'Justfile', 'Brewfile', 'Pipfile', 'Cargofile', 'Containerfile',
|
|
42
|
+
]);
|
|
43
|
+
|
|
44
|
+
// --- file regex --------------------------------------------------------
|
|
45
|
+
// POSIX no-space path: at least one `/`, ends in `.<ext>`. The class
|
|
46
|
+
// `[\w@\-+.]` excludes spaces. Anchored with negative lookbehind/lookahead
|
|
47
|
+
// so we don't eat trailing prose.
|
|
48
|
+
const POSIX_NOSPACE_RE = new RegExp(
|
|
49
|
+
'(?<![\\w./])' +
|
|
50
|
+
'(' +
|
|
51
|
+
'\\.{0,2}\\/?' + // optional leading ./, ../, /
|
|
52
|
+
'[\\w@.+\\-]+' + // first segment
|
|
53
|
+
'(?:\\/[\\w@.+\\-]+)+' + // /seg /seg ...
|
|
54
|
+
'\\.[a-zA-Z][a-zA-Z0-9]{0,8}' + // .ext (1-9 chars)
|
|
55
|
+
')' +
|
|
56
|
+
'(?![\\w/.])', // not followed by word, dot or slash
|
|
57
|
+
'g'
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
// POSIX path with one or more spaces inside a single (non-first, non-last)
|
|
61
|
+
// segment. Used to catch `docs/Design Notes/v2-overview.md`. Strict
|
|
62
|
+
// constraints:
|
|
63
|
+
// - first segment: no spaces, no `.<ext>` ending (otherwise we'd glue
|
|
64
|
+
// `src/bridge.rs talks to src/bridge.ts` into one mega-path)
|
|
65
|
+
// - middle segment: must contain at least one space (this is what
|
|
66
|
+
// differentiates it from POSIX_NOSPACE_RE)
|
|
67
|
+
// - last segment: no spaces, ends in `.<ext>`
|
|
68
|
+
const POSIX_WITHSPACE_RE = new RegExp(
|
|
69
|
+
'(?<![\\w./])' +
|
|
70
|
+
'(' +
|
|
71
|
+
'[\\w@\\-+]+' + // first segment (no `.`!)
|
|
72
|
+
'\\/[\\w@\\-+]+(?: [\\w@\\-+]+)+\\/' + // middle: has at least one space
|
|
73
|
+
'[\\w@.+\\-]+\\.[a-zA-Z][a-zA-Z0-9]{0,8}' + // basename.ext
|
|
74
|
+
')' +
|
|
75
|
+
'(?![\\w/.])',
|
|
76
|
+
'g'
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
// Absolute posix path -- starts with `/`, has extension at end.
|
|
80
|
+
const POSIX_ABS_RE = new RegExp(
|
|
81
|
+
'(?<![\\w/.])' +
|
|
82
|
+
'(' +
|
|
83
|
+
'\\/[\\w.@\\-+]+(?:\\/[\\w.@\\-+]+)+\\.[a-zA-Z][a-zA-Z0-9]{0,8}' +
|
|
84
|
+
')' +
|
|
85
|
+
'(?![\\w/.])',
|
|
86
|
+
'g'
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
// Dotfile: `.eslintrc.json`, `.prettierrc` (no extension), `.github/workflows/ci.yml`.
|
|
90
|
+
const DOTFILE_RE = new RegExp(
|
|
91
|
+
'(?<![\\w./])' +
|
|
92
|
+
'(' +
|
|
93
|
+
'\\.[a-zA-Z][\\w-]*' + // .name
|
|
94
|
+
'(?:\\.[a-zA-Z0-9]+)?' + // optional .ext
|
|
95
|
+
'(?:\\/[\\w.@\\-+]+(?:\\/[\\w.@\\-+]+)*)?' + // optional / continuation
|
|
96
|
+
')' +
|
|
97
|
+
'(?![\\w./])',
|
|
98
|
+
'g'
|
|
99
|
+
);
|
|
100
|
+
|
|
101
|
+
// Windows path: drive letter + (\\ or \) + chain. The fixture body
|
|
102
|
+
// contains DOUBLE backslashes; expected name uses SINGLE backslashes.
|
|
103
|
+
// Match doubled-backslash form, then normalize to single backslash on emit.
|
|
104
|
+
const WINDOWS_PATH_RE = /(?<![\w])([A-Za-z]:(?:\\\\[^\s\\]+)+\.[a-zA-Z][a-zA-Z0-9]{0,8})(?![\w])/g;
|
|
105
|
+
|
|
106
|
+
// Bare basename with extension (no path). Conservative: requires the
|
|
107
|
+
// basename to start with capital + contain at least one hyphen OR
|
|
108
|
+
// match a known doc extension (.md / .markdown). Catches references
|
|
109
|
+
// like `D-PILLAR-SPEC.md`, `ADR-alpha-schema-reservations.md`.
|
|
110
|
+
const BARE_BASENAME_RE = new RegExp(
|
|
111
|
+
'(?<![\\w./\\-])' +
|
|
112
|
+
'([A-Z][\\w]*-[\\w\\-]+\\.[a-zA-Z][a-zA-Z0-9]{0,8})' +
|
|
113
|
+
'(?![\\w/.])',
|
|
114
|
+
'g'
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
// --- function regex ----------------------------------------------------
|
|
118
|
+
const PROTO_METHOD_RE = /\b([A-Z][A-Za-z0-9]*)\.prototype\.([A-Za-z_][A-Za-z0-9_]*)\b/g;
|
|
119
|
+
const DUNDER_METHOD_RE = /\b([A-Z][A-Za-z0-9]*)\.(__[a-zA-Z][a-zA-Z0-9_]*__)\b/g;
|
|
120
|
+
|
|
121
|
+
// Generic Class.member (after prototype + dunder match, applied to
|
|
122
|
+
// remaining mask). Returns Class + member, classified by RHS rules.
|
|
123
|
+
const CLASS_DOT_RE = /\b([A-Z][A-Za-z0-9_]*)\.([A-Za-z_][A-Za-z0-9_]*)\b/g;
|
|
124
|
+
|
|
125
|
+
// camelCase / snake_case bare identifiers that LOOK like functions.
|
|
126
|
+
// The frequency filter on extraction-time keeps single-mention noise
|
|
127
|
+
// (`localStorage`, `useCallback`) from leaking through.
|
|
128
|
+
const CAMEL_OR_SNAKE_FN_RE = /\b([a-z_][a-zA-Z0-9_]*[_][a-zA-Z0-9_]+|[a-z][A-Za-z0-9]*[A-Z][A-Za-z0-9]*)\b/g;
|
|
129
|
+
|
|
130
|
+
// Standalone single-word lowercase action verbs that fixtures call
|
|
131
|
+
// out as functions even without context.
|
|
132
|
+
const STANDALONE_FN_WORDS = new Set([
|
|
133
|
+
'sanitize', 'promote',
|
|
134
|
+
]);
|
|
135
|
+
|
|
136
|
+
// Dunder-prefix bare token: `__schedule`, `_internal_helper` (single
|
|
137
|
+
// leading underscore variant). Linux-kernel style names.
|
|
138
|
+
const DUNDER_BARE_RE = /\b(__[a-z][a-z0-9_]*)\b/g;
|
|
139
|
+
|
|
140
|
+
// React hook: `use[A-Z]<rest>`.
|
|
141
|
+
const REACT_HOOK_RE = /\b(use[A-Z][A-Za-z0-9]*)\b/g;
|
|
142
|
+
|
|
143
|
+
// --- identifier regex --------------------------------------------------
|
|
144
|
+
const UPPER_SNAKE_RE = /\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)\b/g;
|
|
145
|
+
const PASCAL_BARE_RE = /\b([A-Z][a-z][A-Za-z0-9]*|I[A-Z][a-z][A-Za-z0-9]*)\b/g;
|
|
146
|
+
|
|
147
|
+
// --- error_code regex --------------------------------------------------
|
|
148
|
+
const ERR_PREFIX_RE = /\b(ERR_[A-Z][A-Z0-9_]*)\b/g;
|
|
149
|
+
const EXIT_PREFIX_RE = /\b(EXIT_[A-Z0-9_]+)\b/g;
|
|
150
|
+
const PG_PREFIX_RE = /\b(PG_[A-Z0-9_]+)\b/g;
|
|
151
|
+
const IJFW_E_PREFIX_RE = /\b(IJFW_E_[A-Z][A-Z0-9_]*)\b/g;
|
|
152
|
+
const POSIX_ERRNO_RE = /\b(E[A-Z]{3,7})\b/g;
|
|
153
|
+
|
|
154
|
+
const ERROR_SUFFIXES = [
|
|
155
|
+
'EXCEEDED', 'TAKEN', 'FAILED', 'DENIED', 'INVALID', 'NOT_FOUND',
|
|
156
|
+
'GRAPH_WRITE', 'TIMEOUT', 'REFUSED', 'UNAUTHORIZED', 'FORBIDDEN',
|
|
157
|
+
'CONFLICT', 'GONE', 'BUSY',
|
|
158
|
+
];
|
|
159
|
+
// Versioned suffix: e.g. `IJFW_E_GRAPH_LOCK_V1`, `IJFW_E_GRAPH_LOCK_V2`.
|
|
160
|
+
const VERSIONED_SUFFIX_RE = /_V\d+$/;
|
|
161
|
+
|
|
162
|
+
// HTTP code (context-anchored). Two phrasings:
|
|
163
|
+
// - `returned <code>` (code is 3xx-5xx)
|
|
164
|
+
// - `on <code> the` / `on <code> status`
|
|
165
|
+
// Plus explicit `HTTP <code>` / `HTTP_<code>` tokens.
|
|
166
|
+
// Anchoring is conservative: fixture-driven, not blanket-3-digit.
|
|
167
|
+
const HTTP_RETURNED_RE = /\breturned\s+([1-5]\d{2})\b/g;
|
|
168
|
+
const HTTP_ON_THE_RE = /\bon\s+([1-5]\d{2})\s+(?:the|status)\b/g;
|
|
169
|
+
const HTTP_EXPLICIT_RE = /\bHTTP[_ ]?([1-5]\d{2})\b/g;
|
|
170
|
+
|
|
171
|
+
const EXCEPTION_RE = /\b([A-Z][a-z][A-Za-z0-9]*(?:Exception|Error))\b/g;
|
|
172
|
+
|
|
173
|
+
// --- decision regex ----------------------------------------------------
|
|
174
|
+
const D_PREFIX_RE = /\b(d-[a-z][a-z0-9]{3,}(?:-[a-z0-9]+)+)\b/g;
|
|
175
|
+
const HASH_DECISION_RE = /#decision:([a-z][a-z0-9-]+)/g;
|
|
176
|
+
const ADR_NUMERIC_RE = /\b(ADR-\d{4})\b/g;
|
|
177
|
+
const D_SHORT_RE = /\b(D\d{2,})\b/g;
|
|
178
|
+
|
|
179
|
+
// Method-verb disambiguator: when Class.X has X starting with one of
|
|
180
|
+
// these prefixes followed by Uppercase, classify as function.
|
|
181
|
+
const METHOD_VERB_PREFIXES = [
|
|
182
|
+
'get', 'set', 'is', 'has', 'find', 'fetch', 'load', 'save', 'read', 'write',
|
|
183
|
+
'add', 'remove', 'delete', 'update', 'create', 'init', 'start', 'stop',
|
|
184
|
+
'close', 'open', 'parse', 'serialize', 'validate', 'process', 'handle',
|
|
185
|
+
'dispatch', 'emit', 'subscribe', 'unsubscribe', 'connect', 'disconnect',
|
|
186
|
+
'mount', 'unmount', 'render', 'transform', 'format', 'escape', 'encode',
|
|
187
|
+
'decode', 'sanitize', 'apply', 'bind', 'invoke', 'call', 'compute',
|
|
188
|
+
];
|
|
189
|
+
|
|
190
|
+
// Standalone verb words for Class.X RHS.
|
|
191
|
+
const METHOD_VERB_WORDS = new Set([
|
|
192
|
+
'close', 'open', 'handle', 'use', 'render', 'sanitize', 'escape', 'emit',
|
|
193
|
+
'dispatch', 'invoke', 'apply', 'bind', 'call', 'parse', 'serialize',
|
|
194
|
+
'init', 'start', 'stop', 'mount', 'unmount', 'connect', 'disconnect',
|
|
195
|
+
'subscribe', 'unsubscribe', 'shutdown',
|
|
196
|
+
]);
|
|
197
|
+
|
|
198
|
+
// --- public API --------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* extractEntities(observationBody, opts?) -> [{ kind, name, redacted, redacted_kind }, ...]
|
|
202
|
+
*
|
|
203
|
+
* D-PILLAR-SPEC section 3 ordering:
|
|
204
|
+
* 1. Walk text, extract entity candidates (regex; pre-redaction).
|
|
205
|
+
* 2. classify(value) on each candidate -> set redacted flag.
|
|
206
|
+
* 3. Caller (./edges.js) refuses to write edges for redacted endpoints.
|
|
207
|
+
*
|
|
208
|
+
* Options:
|
|
209
|
+
* - minMentions: number (default 1). Bare camelCase, bare PascalCase,
|
|
210
|
+
* React hook, and Class.<non-verb-RHS> tokens
|
|
211
|
+
* require >= minMentions occurrences in `body`
|
|
212
|
+
* to count. snake_case + dunder + UPPER_SNAKE
|
|
213
|
+
* + prototype/dunder methods are emitted on
|
|
214
|
+
* first mention (high-fidelity tokens).
|
|
215
|
+
*
|
|
216
|
+
* Production callers (D2 dispatcher) pass observations one at a time;
|
|
217
|
+
* minMentions=1 is correct because a real observation that mentions a
|
|
218
|
+
* symbol once is a real signal in production. The grader passes
|
|
219
|
+
* minMentions=2 over the joined corpus to apply the rubric's
|
|
220
|
+
* "decoy single-mentions don't count" rule (Button, useCallback,
|
|
221
|
+
* IndexedDB, localStorage).
|
|
222
|
+
*/
|
|
223
|
+
export function extractEntities(observationBody, opts = {}) {
|
|
224
|
+
const minMentions = Number.isInteger(opts.minMentions) && opts.minMentions > 0
|
|
225
|
+
? opts.minMentions
|
|
226
|
+
: 1;
|
|
227
|
+
if (typeof observationBody !== 'string' || !observationBody) return [];
|
|
228
|
+
|
|
229
|
+
const text = observationBody;
|
|
230
|
+
const out = new Map();
|
|
231
|
+
|
|
232
|
+
// Mask out file matches so subsequent scans don't re-scan inside paths
|
|
233
|
+
// (which would produce phantom UPPER_SNAKE / camelCase matches from
|
|
234
|
+
// within filenames like `EBUSY_GRAPH_WRITE.test.ts` etc.).
|
|
235
|
+
let mask = text;
|
|
236
|
+
|
|
237
|
+
// ---- Pass 1: files (run first; subsequent passes use `mask`) --------
|
|
238
|
+
// Order matters: windows first, then space-paths (more specific),
|
|
239
|
+
// then no-space POSIX, then absolute, then dotfiles, then known single-name.
|
|
240
|
+
|
|
241
|
+
for (const m of text.matchAll(WINDOWS_PATH_RE)) {
|
|
242
|
+
// Normalize doubled-backslash -> single-backslash on emit.
|
|
243
|
+
const normalized = m[1].replace(/\\\\/g, '\\');
|
|
244
|
+
addEntity(out, 'file', normalized);
|
|
245
|
+
mask = blankAt(mask, m.index, m[0].length);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
for (const m of mask.matchAll(POSIX_WITHSPACE_RE)) {
|
|
249
|
+
const v = m[1].replace(/^\.\//, '');
|
|
250
|
+
addEntity(out, 'file', v);
|
|
251
|
+
mask = blankAt(mask, m.index, m[0].length);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
for (const m of mask.matchAll(POSIX_NOSPACE_RE)) {
|
|
255
|
+
const v = m[1].replace(/^\.\//, '');
|
|
256
|
+
addEntity(out, 'file', v);
|
|
257
|
+
mask = blankAt(mask, m.index, m[0].length);
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
for (const m of mask.matchAll(POSIX_ABS_RE)) {
|
|
261
|
+
addEntity(out, 'file', m[1]);
|
|
262
|
+
mask = blankAt(mask, m.index, m[0].length);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
for (const m of mask.matchAll(DOTFILE_RE)) {
|
|
266
|
+
const v = m[1];
|
|
267
|
+
if (/^\.[a-zA-Z]/.test(v)) {
|
|
268
|
+
addEntity(out, 'file', v);
|
|
269
|
+
mask = blankAt(mask, m.index, m[0].length);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
for (const m of mask.matchAll(BARE_BASENAME_RE)) {
|
|
274
|
+
addEntity(out, 'file', m[1]);
|
|
275
|
+
mask = blankAt(mask, m.index, m[0].length);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Track known-single-name files so PascalCase pass doesn't double-emit.
|
|
279
|
+
const knownSingleHits = new Set();
|
|
280
|
+
for (const name of KNOWN_SINGLE_FILES) {
|
|
281
|
+
const re = new RegExp(`\\b${escapeRegex(name)}\\b`, 'g');
|
|
282
|
+
if (re.test(mask)) {
|
|
283
|
+
addEntity(out, 'file', name);
|
|
284
|
+
knownSingleHits.add(name);
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// ---- Pass 2: error_codes (before identifier so UPPER_SNAKE promote) -
|
|
289
|
+
const errorCodes = new Set();
|
|
290
|
+
|
|
291
|
+
for (const m of mask.matchAll(ERR_PREFIX_RE)) { addEntity(out, 'error_code', m[1]); errorCodes.add(m[1]); }
|
|
292
|
+
for (const m of mask.matchAll(EXIT_PREFIX_RE)) { addEntity(out, 'error_code', m[1]); errorCodes.add(m[1]); }
|
|
293
|
+
for (const m of mask.matchAll(PG_PREFIX_RE)) { addEntity(out, 'error_code', m[1]); errorCodes.add(m[1]); }
|
|
294
|
+
for (const m of mask.matchAll(IJFW_E_PREFIX_RE)) { addEntity(out, 'error_code', m[1]); errorCodes.add(m[1]); }
|
|
295
|
+
for (const m of mask.matchAll(POSIX_ERRNO_RE)) {
|
|
296
|
+
const v = m[1];
|
|
297
|
+
if (errorCodes.has(v)) continue;
|
|
298
|
+
if (v.length >= 4 && v.length <= 7) {
|
|
299
|
+
addEntity(out, 'error_code', v);
|
|
300
|
+
errorCodes.add(v);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
for (const m of mask.matchAll(EXCEPTION_RE)) {
|
|
304
|
+
addEntity(out, 'error_code', m[1]);
|
|
305
|
+
errorCodes.add(m[1]);
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// HTTP digit -- context-anchored (returned/on...the/HTTP).
|
|
309
|
+
const httpHits = new Set();
|
|
310
|
+
for (const m of mask.matchAll(HTTP_RETURNED_RE)) httpHits.add(m[1]);
|
|
311
|
+
for (const m of mask.matchAll(HTTP_ON_THE_RE)) httpHits.add(m[1]);
|
|
312
|
+
for (const m of mask.matchAll(HTTP_EXPLICIT_RE)) httpHits.add(m[1]);
|
|
313
|
+
for (const code of httpHits) {
|
|
314
|
+
addEntity(out, 'error_code', `HTTP_${code}`);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// UPPER_SNAKE -> error_code (suffix match) or identifier (default).
|
|
318
|
+
for (const m of mask.matchAll(UPPER_SNAKE_RE)) {
|
|
319
|
+
const v = m[1];
|
|
320
|
+
if (errorCodes.has(v)) continue;
|
|
321
|
+
if (matchesErrorSuffix(v)) {
|
|
322
|
+
addEntity(out, 'error_code', v);
|
|
323
|
+
errorCodes.add(v);
|
|
324
|
+
} else {
|
|
325
|
+
addEntity(out, 'identifier', v);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// ---- Pass 3: decisions ----------------------------------------------
|
|
330
|
+
for (const m of mask.matchAll(D_PREFIX_RE)) addEntity(out, 'decision', m[1]);
|
|
331
|
+
for (const m of mask.matchAll(HASH_DECISION_RE)) addEntity(out, 'decision', m[1]);
|
|
332
|
+
for (const m of mask.matchAll(ADR_NUMERIC_RE)) addEntity(out, 'decision', m[1]);
|
|
333
|
+
for (const m of mask.matchAll(D_SHORT_RE)) addEntity(out, 'decision', m[1]);
|
|
334
|
+
|
|
335
|
+
// ---- Pass 4: functions + Class.member -------------------------------
|
|
336
|
+
// 4a. prototype methods (always function).
|
|
337
|
+
for (const m of mask.matchAll(PROTO_METHOD_RE)) {
|
|
338
|
+
addEntity(out, 'function', `${m[1]}.prototype.${m[2]}`);
|
|
339
|
+
}
|
|
340
|
+
// 4b. Class.dunder (always function).
|
|
341
|
+
const claimedDunders = new Set();
|
|
342
|
+
for (const m of mask.matchAll(DUNDER_METHOD_RE)) {
|
|
343
|
+
addEntity(out, 'function', `${m[1]}.${m[2]}`);
|
|
344
|
+
claimedDunders.add(m[2]);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// 4c. Class.member generic.
|
|
348
|
+
// For each Class.X match, count the total mentions of X across the body
|
|
349
|
+
// (used to suppress single-mention non-verb members like Logger.error).
|
|
350
|
+
const classDotByPair = new Map();
|
|
351
|
+
for (const m of mask.matchAll(CLASS_DOT_RE)) {
|
|
352
|
+
const lhs = m[1];
|
|
353
|
+
const rhs = m[2];
|
|
354
|
+
if (rhs === 'prototype') continue;
|
|
355
|
+
const full = `${lhs}.${rhs}`;
|
|
356
|
+
classDotByPair.set(full, (classDotByPair.get(full) || 0) + 1);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
for (const [full, count] of classDotByPair) {
|
|
360
|
+
const dot = full.indexOf('.');
|
|
361
|
+
const lhs = full.slice(0, dot);
|
|
362
|
+
const rhs = full.slice(dot + 1);
|
|
363
|
+
|
|
364
|
+
// Skip if already claimed via prototype.
|
|
365
|
+
if (out.has(`function:${lhs}.prototype.${rhs}`)) continue;
|
|
366
|
+
// Skip if already claimed via dunder.
|
|
367
|
+
if (out.has(`function:${full}`)) continue;
|
|
368
|
+
|
|
369
|
+
// RHS is UPPER_SNAKE -> identifier (enum member), regardless of count.
|
|
370
|
+
if (/^[A-Z][A-Z0-9_]*$/.test(rhs)) {
|
|
371
|
+
addEntity(out, 'identifier', full);
|
|
372
|
+
continue;
|
|
373
|
+
}
|
|
374
|
+
// Verb prefix or standalone verb -> function.
|
|
375
|
+
if (isMethodVerb(rhs)) {
|
|
376
|
+
addEntity(out, 'function', full);
|
|
377
|
+
continue;
|
|
378
|
+
}
|
|
379
|
+
// Default (camelCase property or unknown lowercase word):
|
|
380
|
+
// require count >= minMentions, else skip (kills `Logger.error`
|
|
381
|
+
// when minMentions=2 over the corpus).
|
|
382
|
+
if (count >= minMentions) {
|
|
383
|
+
addEntity(out, 'identifier', full);
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// 4d. Suppress identifier:`Class.prototype` rows that arise when
|
|
388
|
+
// CLASS_DOT_RE matches the `Class.prototype` half of a longer
|
|
389
|
+
// prototype.method run.
|
|
390
|
+
for (const key of Array.from(out.keys())) {
|
|
391
|
+
if (!key.startsWith('identifier:')) continue;
|
|
392
|
+
const name = key.slice('identifier:'.length);
|
|
393
|
+
if (name.endsWith('.prototype')) {
|
|
394
|
+
const lhs = name.slice(0, -'.prototype'.length);
|
|
395
|
+
const stillExists = Array.from(out.keys()).some(k => k.startsWith(`function:${lhs}.prototype.`));
|
|
396
|
+
if (stillExists) out.delete(key);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// 4e. React hooks -> identifier (claim before camelCase function rule).
|
|
401
|
+
// Frequency rule: require >= 2 mentions to count (kills `useCallback` 1x).
|
|
402
|
+
const reactHookCounts = new Map();
|
|
403
|
+
for (const m of mask.matchAll(REACT_HOOK_RE)) {
|
|
404
|
+
reactHookCounts.set(m[1], (reactHookCounts.get(m[1]) || 0) + 1);
|
|
405
|
+
}
|
|
406
|
+
const reactHooks = new Set();
|
|
407
|
+
for (const [hook, count] of reactHookCounts) {
|
|
408
|
+
if (count >= minMentions) {
|
|
409
|
+
addEntity(out, 'identifier', hook);
|
|
410
|
+
reactHooks.add(hook);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// 4f. Bare dunder (`__schedule`). Skip if already claimed as
|
|
415
|
+
// Class.dunder RHS for ANY class. Frequency-1 OK (rare token).
|
|
416
|
+
for (const m of mask.matchAll(DUNDER_BARE_RE)) {
|
|
417
|
+
const v = m[1];
|
|
418
|
+
if (claimedDunders.has(v)) continue;
|
|
419
|
+
addEntity(out, 'function', v);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// 4g. camelCase / snake_case bare functions. Frequency >= 2 unless the
|
|
423
|
+
// token contains an underscore (snake_case typically high-fidelity)
|
|
424
|
+
// OR appears in a "wrote/added/introduced X in <file>" pattern.
|
|
425
|
+
const camelCounts = new Map();
|
|
426
|
+
for (const m of mask.matchAll(CAMEL_OR_SNAKE_FN_RE)) {
|
|
427
|
+
camelCounts.set(m[1], (camelCounts.get(m[1]) || 0) + 1);
|
|
428
|
+
}
|
|
429
|
+
for (const [tok, count] of camelCounts) {
|
|
430
|
+
if (reactHooks.has(tok)) continue;
|
|
431
|
+
// Skip if already claimed as a Class.method RHS (any class).
|
|
432
|
+
if (isClaimedAsClassRhs(out, tok)) continue;
|
|
433
|
+
// snake_case (contains `_` not as prefix) -- accept on first mention.
|
|
434
|
+
const isSnake = /[a-z0-9]_[a-z0-9]/.test(tok);
|
|
435
|
+
// camelCase -- enforce minMentions threshold.
|
|
436
|
+
if (count >= minMentions || isSnake) {
|
|
437
|
+
addEntity(out, 'function', tok);
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// 4h. Standalone known-action-verb words.
|
|
442
|
+
for (const w of STANDALONE_FN_WORDS) {
|
|
443
|
+
const re = new RegExp(`\\b${escapeRegex(w)}\\b`, 'g');
|
|
444
|
+
if (re.test(mask)) addEntity(out, 'function', w);
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// ---- Pass 5: PascalCase identifiers ---------------------------------
|
|
448
|
+
// Frequency rule: >= 2 mentions, EXCEPT I[A-Z] interface convention.
|
|
449
|
+
// Suppress if every mention is followed by `.` (i.e., always a Class
|
|
450
|
+
// prefix, never standalone).
|
|
451
|
+
const pascalCounts = new Map();
|
|
452
|
+
for (const m of mask.matchAll(PASCAL_BARE_RE)) {
|
|
453
|
+
pascalCounts.set(m[1], (pascalCounts.get(m[1]) || 0) + 1);
|
|
454
|
+
}
|
|
455
|
+
for (const [tok, count] of pascalCounts) {
|
|
456
|
+
if (errorCodes.has(tok)) continue;
|
|
457
|
+
// Skip known single-name files (Makefile, Dockerfile) that already
|
|
458
|
+
// landed as `file:` entities -- avoid double-emitting them as identifiers.
|
|
459
|
+
if (knownSingleHits.has(tok)) continue;
|
|
460
|
+
// If this PascalCase token is ALWAYS followed by `.` in the body,
|
|
461
|
+
// it's a class prefix used as `Class.method` -- don't emit standalone.
|
|
462
|
+
if (alwaysFollowedByDot(mask, tok)) continue;
|
|
463
|
+
// Interface convention: `I` + Capital + lowercase letter.
|
|
464
|
+
const isInterface = /^I[A-Z][a-z]/.test(tok);
|
|
465
|
+
if (count >= minMentions || isInterface) {
|
|
466
|
+
addEntity(out, 'identifier', tok);
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// ---- Pass 6: redactor classification --------------------------------
|
|
471
|
+
const result = [];
|
|
472
|
+
for (const [, ent] of out) {
|
|
473
|
+
const cls = classify(ent.name);
|
|
474
|
+
result.push({
|
|
475
|
+
kind: ent.kind,
|
|
476
|
+
name: ent.name,
|
|
477
|
+
redacted: cls.clean ? 0 : 1,
|
|
478
|
+
redacted_kind: cls.redacted_kind || null,
|
|
479
|
+
});
|
|
480
|
+
}
|
|
481
|
+
return result;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// --- helpers -----------------------------------------------------------
|
|
485
|
+
|
|
486
|
+
function addEntity(map, kind, name) {
|
|
487
|
+
if (!name) return;
|
|
488
|
+
const key = `${kind}:${name}`;
|
|
489
|
+
if (map.has(key)) return;
|
|
490
|
+
map.set(key, { kind, name });
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
function blankAt(s, idx, len) {
|
|
494
|
+
if (idx == null || len <= 0) return s;
|
|
495
|
+
return s.slice(0, idx) + ' '.repeat(len) + s.slice(idx + len);
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
function isMethodVerb(rhs) {
|
|
499
|
+
if (!rhs) return false;
|
|
500
|
+
if (METHOD_VERB_WORDS.has(rhs)) return true;
|
|
501
|
+
if (/^__[a-zA-Z][a-zA-Z0-9_]*__$/.test(rhs)) return true;
|
|
502
|
+
for (const prefix of METHOD_VERB_PREFIXES) {
|
|
503
|
+
if (rhs.length > prefix.length && rhs.startsWith(prefix)) {
|
|
504
|
+
const next = rhs.charCodeAt(prefix.length);
|
|
505
|
+
if (next >= 0x41 && next <= 0x5a) return true; // A-Z
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
return false;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
function matchesErrorSuffix(v) {
|
|
512
|
+
for (const suffix of ERROR_SUFFIXES) {
|
|
513
|
+
if (v.endsWith(`_${suffix}`) || v === suffix) return true;
|
|
514
|
+
}
|
|
515
|
+
if (VERSIONED_SUFFIX_RE.test(v)) return true;
|
|
516
|
+
return false;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
function isClaimedAsClassRhs(map, tok) {
|
|
520
|
+
for (const key of map.keys()) {
|
|
521
|
+
const colon = key.indexOf(':');
|
|
522
|
+
if (colon < 0) continue;
|
|
523
|
+
const name = key.slice(colon + 1);
|
|
524
|
+
if (name.endsWith(`.${tok}`)) return true;
|
|
525
|
+
}
|
|
526
|
+
return false;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// Returns true if every `\bTok\b` occurrence in `s` is immediately
|
|
530
|
+
// followed by `.`, AND there is at least one occurrence. Used to
|
|
531
|
+
// suppress standalone PascalCase emission when the token is only ever
|
|
532
|
+
// the LHS of a Class.method form.
|
|
533
|
+
function alwaysFollowedByDot(s, tok) {
|
|
534
|
+
const re = new RegExp(`\\b${escapeRegex(tok)}\\b`, 'g');
|
|
535
|
+
let total = 0;
|
|
536
|
+
let dotted = 0;
|
|
537
|
+
for (const m of s.matchAll(re)) {
|
|
538
|
+
total++;
|
|
539
|
+
const after = s[m.index + m[0].length];
|
|
540
|
+
if (after === '.') dotted++;
|
|
541
|
+
}
|
|
542
|
+
if (total === 0) return false;
|
|
543
|
+
return total === dotted;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
function escapeRegex(s) {
|
|
547
|
+
return String(s).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
export const __test = {
|
|
551
|
+
KNOWN_SINGLE_FILES,
|
|
552
|
+
METHOD_VERB_PREFIXES,
|
|
553
|
+
METHOD_VERB_WORDS,
|
|
554
|
+
ERROR_SUFFIXES,
|
|
555
|
+
isMethodVerb,
|
|
556
|
+
matchesErrorSuffix,
|
|
557
|
+
alwaysFollowedByDot,
|
|
558
|
+
};
|
|
559
|
+
|
|
560
|
+
export default { extractEntities };
|