speclock 5.2.6 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +144 -24
- package/package.json +242 -67
- package/src/cli/index.js +137 -7
- package/src/core/auth.js +341 -341
- package/src/core/compliance.js +1 -1
- package/src/core/engine.js +63 -1
- package/src/core/lock-author.js +487 -487
- package/src/core/replay.js +236 -0
- package/src/core/rules-sync.js +548 -0
- package/src/core/templates.js +69 -0
- package/src/dashboard/index.html +2 -2
- package/src/mcp/http-server.js +3 -3
- package/src/mcp/server.js +130 -1
package/src/core/lock-author.js
CHANGED
|
@@ -1,487 +1,487 @@
|
|
|
1
|
-
// ===================================================================
|
|
2
|
-
// SpecLock Smart Lock Authoring Engine
|
|
3
|
-
// Auto-rewrites user locks to prevent verb contamination.
|
|
4
|
-
//
|
|
5
|
-
// Problem: "Never add authentication" causes false positives because
|
|
6
|
-
// the word "add" gets extracted as a prohibited signal and fires on
|
|
7
|
-
// every action containing "add" (like "Add dark mode").
|
|
8
|
-
//
|
|
9
|
-
// Solution: Extract the SUBJECT from the lock, rewrite to state
|
|
10
|
-
// what IS fixed/prohibited rather than what ACTION is prohibited.
|
|
11
|
-
// "Never add authentication" → "Authentication and login are prohibited"
|
|
12
|
-
//
|
|
13
|
-
// Developed by Sandeep Roy (https://github.com/sgroy10)
|
|
14
|
-
// ===================================================================
|
|
15
|
-
|
|
16
|
-
// Common action verbs that contaminate lock matching when present in lock text.
|
|
17
|
-
// These are the verbs that users naturally write in locks ("never ADD X",
|
|
18
|
-
// "don't CHANGE Y") but that the semantic engine then matches against
|
|
19
|
-
// ALL actions containing those same verbs.
|
|
20
|
-
const CONTAMINATING_VERBS = new Set([
|
|
21
|
-
// Constructive
|
|
22
|
-
"add", "create", "introduce", "insert", "implement", "build", "make",
|
|
23
|
-
"include", "put", "set", "use", "install", "deploy", "attach", "connect",
|
|
24
|
-
// Destructive
|
|
25
|
-
"remove", "delete", "drop", "destroy", "kill", "purge", "wipe", "erase",
|
|
26
|
-
"eliminate", "clear", "empty", "nuke",
|
|
27
|
-
// Modification
|
|
28
|
-
"change", "modify", "alter", "update", "mutate", "transform", "rewrite",
|
|
29
|
-
"edit", "adjust", "tweak", "revise", "amend", "touch", "rework",
|
|
30
|
-
// Movement
|
|
31
|
-
"move", "migrate", "transfer", "shift", "relocate", "switch", "swap",
|
|
32
|
-
"replace", "substitute", "exchange",
|
|
33
|
-
// Toggle
|
|
34
|
-
"enable", "disable", "activate", "deactivate", "start", "stop",
|
|
35
|
-
"turn", "pause", "suspend", "halt",
|
|
36
|
-
// General
|
|
37
|
-
"push", "pull", "send", "expose", "leak", "reveal", "show",
|
|
38
|
-
"allow", "permit", "let", "give", "grant", "open",
|
|
39
|
-
// Informal
|
|
40
|
-
"mess",
|
|
41
|
-
]);
|
|
42
|
-
|
|
43
|
-
// Prohibition patterns — these phrases introduce the verb that follows
|
|
44
|
-
const PROHIBITION_PATTERNS = [
|
|
45
|
-
/^never\s+/i,
|
|
46
|
-
/^must\s+not\s+/i,
|
|
47
|
-
/^do\s+not\s+/i,
|
|
48
|
-
/^don'?t\s+/i,
|
|
49
|
-
/^cannot\s+/i,
|
|
50
|
-
/^can'?t\s+/i,
|
|
51
|
-
/^should\s+not\s+/i,
|
|
52
|
-
/^shouldn'?t\s+/i,
|
|
53
|
-
/^no\s+(?:one\s+(?:should|may|can)\s+)?/i,
|
|
54
|
-
/^(?:it\s+is\s+)?(?:forbidden|prohibited|not\s+allowed)\s+to\s+/i,
|
|
55
|
-
/^avoid\s+/i,
|
|
56
|
-
/^prevent\s+/i,
|
|
57
|
-
/^refrain\s+from\s+/i,
|
|
58
|
-
/^stop\s+/i,
|
|
59
|
-
];
|
|
60
|
-
|
|
61
|
-
// Filler words between verb and subject
|
|
62
|
-
const FILLER_WORDS = new Set([
|
|
63
|
-
"the", "a", "an", "any", "another", "other", "new", "additional",
|
|
64
|
-
"more", "extra", "further", "existing", "current", "old", "all",
|
|
65
|
-
"our", "their", "user", "users", "to",
|
|
66
|
-
]);
|
|
67
|
-
|
|
68
|
-
// Domain subject → canonical prohibition phrasing
|
|
69
|
-
const SUBJECT_TEMPLATES = {
|
|
70
|
-
// Auth/Security
|
|
71
|
-
"authentication": "{subject} and login functionality are prohibited",
|
|
72
|
-
"auth": "Authentication and login functionality are prohibited",
|
|
73
|
-
"login": "Login and authentication functionality are prohibited",
|
|
74
|
-
"signup": "Sign-up and registration functionality are prohibited",
|
|
75
|
-
"user accounts": "User account creation and management are prohibited",
|
|
76
|
-
"2fa": "Two-factor authentication changes are prohibited",
|
|
77
|
-
"mfa": "Multi-factor authentication changes are prohibited",
|
|
78
|
-
|
|
79
|
-
// Database
|
|
80
|
-
"database": "External database services are prohibited — use {alternative} only",
|
|
81
|
-
"supabase": "Supabase integration is prohibited",
|
|
82
|
-
"firebase": "Firebase integration is prohibited",
|
|
83
|
-
"mongodb": "MongoDB integration is prohibited",
|
|
84
|
-
|
|
85
|
-
// Payment
|
|
86
|
-
"payment": "Additional payment providers are prohibited — use {alternative} exclusively",
|
|
87
|
-
"stripe": "Stripe modifications are prohibited",
|
|
88
|
-
"razorpay": "Razorpay integration is prohibited",
|
|
89
|
-
"paypal": "PayPal integration is prohibited",
|
|
90
|
-
};
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* Extract the subject (noun phrase) from a lock text.
|
|
94
|
-
* Given: "Never add user authentication or login functionality"
|
|
95
|
-
* Returns: "user authentication or login functionality"
|
|
96
|
-
*/
|
|
97
|
-
export function extractLockSubject(lockText) {
|
|
98
|
-
let remaining = lockText.trim();
|
|
99
|
-
|
|
100
|
-
// Step 1: Strip prohibition prefix
|
|
101
|
-
for (const pattern of PROHIBITION_PATTERNS) {
|
|
102
|
-
const match = remaining.match(pattern);
|
|
103
|
-
if (match) {
|
|
104
|
-
remaining = remaining.slice(match[0].length).trim();
|
|
105
|
-
break;
|
|
106
|
-
}
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
// Step 2: Strip the first contaminating verb (and optional "to")
|
|
110
|
-
const words = remaining.split(/\s+/);
|
|
111
|
-
let verbIndex = -1;
|
|
112
|
-
|
|
113
|
-
// Check first 3 words for a contaminating verb
|
|
114
|
-
for (let i = 0; i < Math.min(3, words.length); i++) {
|
|
115
|
-
const w = words[i].toLowerCase().replace(/[^a-z]/g, "");
|
|
116
|
-
if (CONTAMINATING_VERBS.has(w)) {
|
|
117
|
-
verbIndex = i;
|
|
118
|
-
break;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
if (verbIndex >= 0) {
|
|
123
|
-
let endIdx = verbIndex + 1;
|
|
124
|
-
// Handle compound verbs: "touch or modify", "change and update"
|
|
125
|
-
while (endIdx < words.length - 1) {
|
|
126
|
-
const connector = words[endIdx].toLowerCase();
|
|
127
|
-
if (connector === "or" || connector === "and") {
|
|
128
|
-
const nextWord = (words[endIdx + 1] || "").toLowerCase().replace(/[^a-z]/g, "");
|
|
129
|
-
if (CONTAMINATING_VERBS.has(nextWord)) {
|
|
130
|
-
endIdx += 2; // skip connector + verb
|
|
131
|
-
} else {
|
|
132
|
-
break;
|
|
133
|
-
}
|
|
134
|
-
} else {
|
|
135
|
-
break;
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
remaining = words.slice(endIdx).join(" ").trim();
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
// Step 3: Strip leading filler words and prepositions like "from"
|
|
142
|
-
const remainingWords = remaining.split(/\s+/);
|
|
143
|
-
let startIdx = 0;
|
|
144
|
-
const STRIP_LEADING = new Set([...FILLER_WORDS, "from", "on", "in", "at", "with"]);
|
|
145
|
-
while (startIdx < remainingWords.length - 1) {
|
|
146
|
-
if (STRIP_LEADING.has(remainingWords[startIdx].toLowerCase())) {
|
|
147
|
-
startIdx++;
|
|
148
|
-
} else {
|
|
149
|
-
break;
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
remaining = remainingWords.slice(startIdx).join(" ").trim();
|
|
153
|
-
|
|
154
|
-
// Step 4: Truncate at em dash, semicolon, or qualifier phrases
|
|
155
|
-
// "authentication system — NextAuth config must not be changed" → "authentication system"
|
|
156
|
-
remaining = remaining.split(/\s*[—–]\s*/)[0].trim();
|
|
157
|
-
remaining = remaining.split(/\s*;\s*/)[0].trim();
|
|
158
|
-
// Truncate at comma + pronoun/qualifier clause
|
|
159
|
-
// "KYC verification flow, it's SEC-compliant" → "KYC verification flow"
|
|
160
|
-
// But preserve comma-separated lists: "auth, authorization, and login"
|
|
161
|
-
remaining = remaining.replace(/,\s+(?:it'?s?|they|this|that|which|we|since|because|as)\b.*$/i, "").trim();
|
|
162
|
-
// Truncate at "must not", "should not" etc. — they start a qualifier
|
|
163
|
-
remaining = remaining.replace(/\s+(?:must|should|cannot|can't|will)\s+(?:not\s+)?(?:be\s+)?.*$/i, "").trim();
|
|
164
|
-
// Truncate at "to/with any/another/other" — directional qualifier
|
|
165
|
-
remaining = remaining.replace(/\s+(?:to|with)\s+(?:any|another|other|a\s+different)\s+.*$/i, "").trim();
|
|
166
|
-
|
|
167
|
-
return remaining || lockText;
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
/**
|
|
171
|
-
* Detect if a lock text contains verb contamination risk.
|
|
172
|
-
* Returns: { hasRisk, verb, subject, suggestion }
|
|
173
|
-
*/
|
|
174
|
-
export function detectVerbContamination(lockText) {
|
|
175
|
-
const lower = lockText.toLowerCase().trim();
|
|
176
|
-
|
|
177
|
-
// Check if it starts with a prohibition pattern
|
|
178
|
-
let matchedProhibition = false;
|
|
179
|
-
let afterProhibition = lower;
|
|
180
|
-
|
|
181
|
-
for (const pattern of PROHIBITION_PATTERNS) {
|
|
182
|
-
const match = lower.match(pattern);
|
|
183
|
-
if (match) {
|
|
184
|
-
matchedProhibition = true;
|
|
185
|
-
afterProhibition = lower.slice(match[0].length).trim();
|
|
186
|
-
break;
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
if (!matchedProhibition) {
|
|
191
|
-
// No prohibition pattern — could be a declarative lock like
|
|
192
|
-
// "Use Stripe exclusively" — these are already safe
|
|
193
|
-
return { hasRisk: false, verb: null, subject: null, suggestion: null };
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
// Check if the next word(s) are a contaminating verb
|
|
197
|
-
const words = afterProhibition.split(/\s+/);
|
|
198
|
-
const firstWord = (words[0] || "").replace(/[^a-z]/g, "");
|
|
199
|
-
|
|
200
|
-
if (!CONTAMINATING_VERBS.has(firstWord)) {
|
|
201
|
-
// Prohibition but no contaminating verb — already safe
|
|
202
|
-
// e.g., "Never expose PHI" — "expose" is domain-specific enough
|
|
203
|
-
// Actually, let's still check — "expose" is in the set
|
|
204
|
-
// But we only flag if it's a COMMON verb that will match broadly
|
|
205
|
-
return { hasRisk: false, verb: null, subject: null, suggestion: null };
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
const verb = firstWord;
|
|
209
|
-
const subject = extractLockSubject(lockText);
|
|
210
|
-
|
|
211
|
-
return {
|
|
212
|
-
hasRisk: true,
|
|
213
|
-
verb,
|
|
214
|
-
subject,
|
|
215
|
-
suggestion: rewriteLock(lockText, verb, subject),
|
|
216
|
-
original: lockText,
|
|
217
|
-
};
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
/**
|
|
221
|
-
* Rewrite a lock to eliminate verb contamination.
|
|
222
|
-
* Transforms "Never add X" → "X is/are prohibited"
|
|
223
|
-
* Preserves the semantic meaning but removes the contaminating verb.
|
|
224
|
-
*/
|
|
225
|
-
export function rewriteLock(lockText, verb, subject) {
|
|
226
|
-
if (!subject || subject === lockText) return lockText;
|
|
227
|
-
|
|
228
|
-
// Determine the appropriate rewrite based on verb category
|
|
229
|
-
const isDestructive = ["remove", "delete", "drop", "destroy", "kill",
|
|
230
|
-
"purge", "wipe", "erase", "eliminate", "clear", "empty", "nuke"].includes(verb);
|
|
231
|
-
const isConstructive = ["add", "create", "introduce", "insert", "implement",
|
|
232
|
-
"build", "make", "include", "install", "deploy", "attach", "connect",
|
|
233
|
-
"put", "set", "use"].includes(verb);
|
|
234
|
-
const isModification = ["change", "modify", "alter", "update", "mutate",
|
|
235
|
-
"transform", "rewrite", "edit", "adjust", "tweak", "revise", "amend",
|
|
236
|
-
"rework", "touch"].includes(verb);
|
|
237
|
-
const isMovement = ["move", "migrate", "transfer", "shift", "relocate",
|
|
238
|
-
"switch", "swap", "replace", "substitute", "exchange"].includes(verb);
|
|
239
|
-
const isToggle = ["enable", "disable", "activate", "deactivate", "start",
|
|
240
|
-
"stop", "turn", "pause", "suspend", "halt"].includes(verb);
|
|
241
|
-
|
|
242
|
-
// Clean subject — capitalize first letter
|
|
243
|
-
const cleanSubject = subject.charAt(0).toUpperCase() + subject.slice(1);
|
|
244
|
-
|
|
245
|
-
if (isConstructive) {
|
|
246
|
-
// "Never add X" → "X is prohibited — do not introduce it"
|
|
247
|
-
return `${cleanSubject} — prohibited. Must not be introduced or added.`;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
if (isDestructive) {
|
|
251
|
-
// "Never delete X" → "X must be preserved — delete and remove operations are prohibited"
|
|
252
|
-
// CRITICAL: include the original verb so euphemism matching can find it
|
|
253
|
-
// ("phase out" → "remove" needs "remove" in the lock text)
|
|
254
|
-
const destNote = verb === "remove"
|
|
255
|
-
? "remove and delete operations are prohibited"
|
|
256
|
-
: `${verb} and remove operations are prohibited`;
|
|
257
|
-
return `${cleanSubject} must be preserved — ${destNote}.`;
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
if (isModification) {
|
|
261
|
-
// "Never modify X" → "X is frozen — modify and change operations are prohibited"
|
|
262
|
-
const modNote = verb === "change"
|
|
263
|
-
? "change operations are prohibited"
|
|
264
|
-
: `${verb} and change operations are prohibited`;
|
|
265
|
-
return `${cleanSubject} is frozen — ${modNote}.`;
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
if (isMovement) {
|
|
269
|
-
// "Never migrate X" → "X must remain unchanged — migrate and replace operations are prohibited"
|
|
270
|
-
const moveNote = verb === "replace"
|
|
271
|
-
? "replace operations are prohibited"
|
|
272
|
-
: `${verb} and replace operations are prohibited`;
|
|
273
|
-
return `${cleanSubject} must remain unchanged — ${moveNote}.`;
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
if (isToggle) {
|
|
277
|
-
if (verb === "disable" || verb === "deactivate" || verb === "stop" ||
|
|
278
|
-
verb === "pause" || verb === "suspend" || verb === "halt" || verb === "turn") {
|
|
279
|
-
// "Never disable X" → "X must remain active — disable is prohibited"
|
|
280
|
-
return `${cleanSubject} must remain active and enabled — ${verb} is prohibited.`;
|
|
281
|
-
} else {
|
|
282
|
-
// "Never enable X" → "X must remain disabled"
|
|
283
|
-
return `${cleanSubject} must remain disabled — do not activate.`;
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
// Fallback: generic rewrite
|
|
288
|
-
return `${cleanSubject} — no ${verb} operations allowed.`;
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
/**
|
|
292
|
-
* Smart lock normalizer. Takes raw user lock text and returns
|
|
293
|
-
* the best version for the semantic engine.
|
|
294
|
-
*
|
|
295
|
-
* Returns: {
|
|
296
|
-
* normalized: string, // The rewritten lock (or original if safe)
|
|
297
|
-
* wasRewritten: boolean, // Whether the lock was rewritten
|
|
298
|
-
* original: string, // The original lock text
|
|
299
|
-
* reason: string|null, // Why it was rewritten (or null)
|
|
300
|
-
* }
|
|
301
|
-
*/
|
|
302
|
-
export function normalizeLock(lockText) {
|
|
303
|
-
const contamination = detectVerbContamination(lockText);
|
|
304
|
-
|
|
305
|
-
if (!contamination.hasRisk) {
|
|
306
|
-
return {
|
|
307
|
-
normalized: lockText,
|
|
308
|
-
wasRewritten: false,
|
|
309
|
-
original: lockText,
|
|
310
|
-
reason: null,
|
|
311
|
-
};
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
return {
|
|
315
|
-
normalized: contamination.suggestion,
|
|
316
|
-
wasRewritten: true,
|
|
317
|
-
original: lockText,
|
|
318
|
-
reason: `Verb "${contamination.verb}" in lock text causes false positives — ` +
|
|
319
|
-
`rewritten to focus on the subject "${contamination.subject}"`,
|
|
320
|
-
};
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
/**
|
|
324
|
-
* Extract subject noun phrases from any text (lock or action).
|
|
325
|
-
* This is the foundation for scope-aware matching.
|
|
326
|
-
*
|
|
327
|
-
* Given: "Update the WhatsApp message formatting logic"
|
|
328
|
-
* Returns: ["whatsapp message formatting logic", "whatsapp", "message formatting", "formatting logic"]
|
|
329
|
-
*
|
|
330
|
-
* Given: "Never modify the WhatsApp session handler"
|
|
331
|
-
* Returns: ["whatsapp session handler", "whatsapp", "session handler"]
|
|
332
|
-
*/
|
|
333
|
-
export function extractSubjects(text) {
|
|
334
|
-
const lower = text.toLowerCase().trim();
|
|
335
|
-
const subjects = [];
|
|
336
|
-
|
|
337
|
-
// Step 1: Strip prohibition prefix
|
|
338
|
-
let content = lower;
|
|
339
|
-
for (const pattern of PROHIBITION_PATTERNS) {
|
|
340
|
-
const match = content.match(pattern);
|
|
341
|
-
if (match) {
|
|
342
|
-
content = content.slice(match[0].length).trim();
|
|
343
|
-
break;
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
// Step 2: Strip leading verb
|
|
348
|
-
const words = content.split(/\s+/);
|
|
349
|
-
let startIdx = 0;
|
|
350
|
-
|
|
351
|
-
// Skip action verbs at the beginning
|
|
352
|
-
for (let i = 0; i < Math.min(2, words.length); i++) {
|
|
353
|
-
const w = words[i].replace(/[^a-z]/g, "");
|
|
354
|
-
if (CONTAMINATING_VERBS.has(w)) {
|
|
355
|
-
startIdx = i + 1;
|
|
356
|
-
break;
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
// Step 3: Skip fillers
|
|
361
|
-
while (startIdx < words.length - 1) {
|
|
362
|
-
const w = words[startIdx].replace(/[^a-z]/g, "");
|
|
363
|
-
if (FILLER_WORDS.has(w)) {
|
|
364
|
-
startIdx++;
|
|
365
|
-
} else {
|
|
366
|
-
break;
|
|
367
|
-
}
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
// Step 4: The remaining text is the subject noun phrase
|
|
371
|
-
const subjectWords = words.slice(startIdx);
|
|
372
|
-
if (subjectWords.length === 0) return subjects;
|
|
373
|
-
|
|
374
|
-
// Full noun phrase
|
|
375
|
-
const fullPhrase = subjectWords.join(" ").replace(/[^a-z0-9\s\-]/g, "").trim();
|
|
376
|
-
if (fullPhrase.length > 1) subjects.push(fullPhrase);
|
|
377
|
-
|
|
378
|
-
// Split on conjunctions for sub-phrases
|
|
379
|
-
const conjSplit = fullPhrase.split(/\s+(?:and|or|,)\s+/).map(s => s.trim()).filter(s => s.length > 1);
|
|
380
|
-
if (conjSplit.length > 1) {
|
|
381
|
-
for (const s of conjSplit) subjects.push(s);
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
// Bigrams and individual significant words
|
|
385
|
-
const significantWords = subjectWords
|
|
386
|
-
.map(w => w.replace(/[^a-z0-9\-]/g, ""))
|
|
387
|
-
.filter(w => w.length > 2 && !FILLER_WORDS.has(w));
|
|
388
|
-
|
|
389
|
-
// Generic words too vague to establish subject identity
|
|
390
|
-
const GENERIC_WORDS = new Set([
|
|
391
|
-
"system", "service", "module", "component", "feature", "function",
|
|
392
|
-
"method", "class", "model", "handler", "controller", "manager",
|
|
393
|
-
"process", "workflow", "flow", "logic", "config", "configuration",
|
|
394
|
-
"settings", "data", "information", "record", "records", "file",
|
|
395
|
-
"files", "page", "section", "layer", "level", "part", "item",
|
|
396
|
-
"code", "app", "application", "project",
|
|
397
|
-
]);
|
|
398
|
-
|
|
399
|
-
// Add individual significant words (proper nouns, domain terms) — skip generic
|
|
400
|
-
for (const w of significantWords) {
|
|
401
|
-
if (!CONTAMINATING_VERBS.has(w) && !GENERIC_WORDS.has(w) && w.length > 3) {
|
|
402
|
-
subjects.push(w);
|
|
403
|
-
}
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
// Adjacent bigrams from significant words
|
|
407
|
-
for (let i = 0; i < significantWords.length - 1; i++) {
|
|
408
|
-
const bigram = `${significantWords[i]} ${significantWords[i + 1]}`;
|
|
409
|
-
if (!subjects.includes(bigram)) {
|
|
410
|
-
subjects.push(bigram);
|
|
411
|
-
}
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
return [...new Set(subjects)];
|
|
415
|
-
}
|
|
416
|
-
|
|
417
|
-
/**
|
|
418
|
-
* Compare subjects from action and lock to determine if they target
|
|
419
|
-
* the same component. This is the scope-awareness engine.
|
|
420
|
-
*
|
|
421
|
-
* Returns: {
|
|
422
|
-
* overlaps: boolean,
|
|
423
|
-
* overlapScore: 0-1,
|
|
424
|
-
* matchedSubjects: string[],
|
|
425
|
-
* lockSubjects: string[],
|
|
426
|
-
* actionSubjects: string[],
|
|
427
|
-
* }
|
|
428
|
-
*/
|
|
429
|
-
export function compareSubjects(actionText, lockText) {
|
|
430
|
-
const lockSubjects = extractSubjects(lockText);
|
|
431
|
-
const actionSubjects = extractSubjects(actionText);
|
|
432
|
-
|
|
433
|
-
if (lockSubjects.length === 0 || actionSubjects.length === 0) {
|
|
434
|
-
return {
|
|
435
|
-
overlaps: false,
|
|
436
|
-
overlapScore: 0,
|
|
437
|
-
matchedSubjects: [],
|
|
438
|
-
lockSubjects,
|
|
439
|
-
actionSubjects,
|
|
440
|
-
};
|
|
441
|
-
}
|
|
442
|
-
|
|
443
|
-
const matched = [];
|
|
444
|
-
|
|
445
|
-
// Check for direct subject overlap
|
|
446
|
-
for (const ls of lockSubjects) {
|
|
447
|
-
for (const as of actionSubjects) {
|
|
448
|
-
// Exact match
|
|
449
|
-
if (ls === as) {
|
|
450
|
-
matched.push(ls);
|
|
451
|
-
continue;
|
|
452
|
-
}
|
|
453
|
-
// Word-level containment — "patient records" inside "old patient records"
|
|
454
|
-
// NOT substring: "shipping" should NOT match "calculateshipping"
|
|
455
|
-
const asRe = new RegExp(`\\b${as.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`);
|
|
456
|
-
const lsRe = new RegExp(`\\b${ls.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`);
|
|
457
|
-
if (asRe.test(ls) || lsRe.test(as)) {
|
|
458
|
-
matched.push(`${as} ⊂ ${ls}`);
|
|
459
|
-
continue;
|
|
460
|
-
}
|
|
461
|
-
// Word-level overlap for multi-word phrases
|
|
462
|
-
if (ls.includes(" ") && as.includes(" ")) {
|
|
463
|
-
const lsWords = new Set(ls.split(/\s+/));
|
|
464
|
-
const asWords = new Set(as.split(/\s+/));
|
|
465
|
-
const intersection = [...lsWords].filter(w => asWords.has(w) && w.length > 2);
|
|
466
|
-
// Need significant overlap — more than just shared filler
|
|
467
|
-
const significantIntersection = intersection.filter(w => !FILLER_WORDS.has(w));
|
|
468
|
-
if (significantIntersection.length >= 1 && significantIntersection.length >= Math.min(lsWords.size, asWords.size) * 0.4) {
|
|
469
|
-
matched.push(`word overlap: ${significantIntersection.join(", ")}`);
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
}
|
|
474
|
-
|
|
475
|
-
const uniqueMatched = [...new Set(matched)];
|
|
476
|
-
const overlapScore = uniqueMatched.length > 0
|
|
477
|
-
? Math.min(uniqueMatched.length / Math.max(lockSubjects.length, 1), 1.0)
|
|
478
|
-
: 0;
|
|
479
|
-
|
|
480
|
-
return {
|
|
481
|
-
overlaps: uniqueMatched.length > 0,
|
|
482
|
-
overlapScore,
|
|
483
|
-
matchedSubjects: uniqueMatched,
|
|
484
|
-
lockSubjects,
|
|
485
|
-
actionSubjects,
|
|
486
|
-
};
|
|
487
|
-
}
|
|
1
|
+
// ===================================================================
|
|
2
|
+
// SpecLock Smart Lock Authoring Engine
|
|
3
|
+
// Auto-rewrites user locks to prevent verb contamination.
|
|
4
|
+
//
|
|
5
|
+
// Problem: "Never add authentication" causes false positives because
|
|
6
|
+
// the word "add" gets extracted as a prohibited signal and fires on
|
|
7
|
+
// every action containing "add" (like "Add dark mode").
|
|
8
|
+
//
|
|
9
|
+
// Solution: Extract the SUBJECT from the lock, rewrite to state
|
|
10
|
+
// what IS fixed/prohibited rather than what ACTION is prohibited.
|
|
11
|
+
// "Never add authentication" → "Authentication and login are prohibited"
|
|
12
|
+
//
|
|
13
|
+
// Developed by Sandeep Roy (https://github.com/sgroy10)
|
|
14
|
+
// ===================================================================
|
|
15
|
+
|
|
16
|
+
// Common action verbs that contaminate lock matching when present in lock text.
|
|
17
|
+
// These are the verbs that users naturally write in locks ("never ADD X",
|
|
18
|
+
// "don't CHANGE Y") but that the semantic engine then matches against
|
|
19
|
+
// ALL actions containing those same verbs.
|
|
20
|
+
const CONTAMINATING_VERBS = new Set([
|
|
21
|
+
// Constructive
|
|
22
|
+
"add", "create", "introduce", "insert", "implement", "build", "make",
|
|
23
|
+
"include", "put", "set", "use", "install", "deploy", "attach", "connect",
|
|
24
|
+
// Destructive
|
|
25
|
+
"remove", "delete", "drop", "destroy", "kill", "purge", "wipe", "erase",
|
|
26
|
+
"eliminate", "clear", "empty", "nuke",
|
|
27
|
+
// Modification
|
|
28
|
+
"change", "modify", "alter", "update", "mutate", "transform", "rewrite",
|
|
29
|
+
"edit", "adjust", "tweak", "revise", "amend", "touch", "rework",
|
|
30
|
+
// Movement
|
|
31
|
+
"move", "migrate", "transfer", "shift", "relocate", "switch", "swap",
|
|
32
|
+
"replace", "substitute", "exchange",
|
|
33
|
+
// Toggle
|
|
34
|
+
"enable", "disable", "activate", "deactivate", "start", "stop",
|
|
35
|
+
"turn", "pause", "suspend", "halt",
|
|
36
|
+
// General
|
|
37
|
+
"push", "pull", "send", "expose", "leak", "reveal", "show",
|
|
38
|
+
"allow", "permit", "let", "give", "grant", "open",
|
|
39
|
+
// Informal
|
|
40
|
+
"mess",
|
|
41
|
+
]);
|
|
42
|
+
|
|
43
|
+
// Prohibition patterns — these phrases introduce the verb that follows
|
|
44
|
+
const PROHIBITION_PATTERNS = [
|
|
45
|
+
/^never\s+/i,
|
|
46
|
+
/^must\s+not\s+/i,
|
|
47
|
+
/^do\s+not\s+/i,
|
|
48
|
+
/^don'?t\s+/i,
|
|
49
|
+
/^cannot\s+/i,
|
|
50
|
+
/^can'?t\s+/i,
|
|
51
|
+
/^should\s+not\s+/i,
|
|
52
|
+
/^shouldn'?t\s+/i,
|
|
53
|
+
/^no\s+(?:one\s+(?:should|may|can)\s+)?/i,
|
|
54
|
+
/^(?:it\s+is\s+)?(?:forbidden|prohibited|not\s+allowed)\s+to\s+/i,
|
|
55
|
+
/^avoid\s+/i,
|
|
56
|
+
/^prevent\s+/i,
|
|
57
|
+
/^refrain\s+from\s+/i,
|
|
58
|
+
/^stop\s+/i,
|
|
59
|
+
];
|
|
60
|
+
|
|
61
|
+
// Filler words between verb and subject
|
|
62
|
+
const FILLER_WORDS = new Set([
|
|
63
|
+
"the", "a", "an", "any", "another", "other", "new", "additional",
|
|
64
|
+
"more", "extra", "further", "existing", "current", "old", "all",
|
|
65
|
+
"our", "their", "user", "users", "to",
|
|
66
|
+
]);
|
|
67
|
+
|
|
68
|
+
// Domain subject → canonical prohibition phrasing
|
|
69
|
+
const SUBJECT_TEMPLATES = {
|
|
70
|
+
// Auth/Security
|
|
71
|
+
"authentication": "{subject} and login functionality are prohibited",
|
|
72
|
+
"auth": "Authentication and login functionality are prohibited",
|
|
73
|
+
"login": "Login and authentication functionality are prohibited",
|
|
74
|
+
"signup": "Sign-up and registration functionality are prohibited",
|
|
75
|
+
"user accounts": "User account creation and management are prohibited",
|
|
76
|
+
"2fa": "Two-factor authentication changes are prohibited",
|
|
77
|
+
"mfa": "Multi-factor authentication changes are prohibited",
|
|
78
|
+
|
|
79
|
+
// Database
|
|
80
|
+
"database": "External database services are prohibited — use {alternative} only",
|
|
81
|
+
"supabase": "Supabase integration is prohibited",
|
|
82
|
+
"firebase": "Firebase integration is prohibited",
|
|
83
|
+
"mongodb": "MongoDB integration is prohibited",
|
|
84
|
+
|
|
85
|
+
// Payment
|
|
86
|
+
"payment": "Additional payment providers are prohibited — use {alternative} exclusively",
|
|
87
|
+
"stripe": "Stripe modifications are prohibited",
|
|
88
|
+
"razorpay": "Razorpay integration is prohibited",
|
|
89
|
+
"paypal": "PayPal integration is prohibited",
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Extract the subject (noun phrase) from a lock text.
|
|
94
|
+
* Given: "Never add user authentication or login functionality"
|
|
95
|
+
* Returns: "user authentication or login functionality"
|
|
96
|
+
*/
|
|
97
|
+
export function extractLockSubject(lockText) {
|
|
98
|
+
let remaining = lockText.trim();
|
|
99
|
+
|
|
100
|
+
// Step 1: Strip prohibition prefix
|
|
101
|
+
for (const pattern of PROHIBITION_PATTERNS) {
|
|
102
|
+
const match = remaining.match(pattern);
|
|
103
|
+
if (match) {
|
|
104
|
+
remaining = remaining.slice(match[0].length).trim();
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Step 2: Strip the first contaminating verb (and optional "to")
|
|
110
|
+
const words = remaining.split(/\s+/);
|
|
111
|
+
let verbIndex = -1;
|
|
112
|
+
|
|
113
|
+
// Check first 3 words for a contaminating verb
|
|
114
|
+
for (let i = 0; i < Math.min(3, words.length); i++) {
|
|
115
|
+
const w = words[i].toLowerCase().replace(/[^a-z]/g, "");
|
|
116
|
+
if (CONTAMINATING_VERBS.has(w)) {
|
|
117
|
+
verbIndex = i;
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (verbIndex >= 0) {
|
|
123
|
+
let endIdx = verbIndex + 1;
|
|
124
|
+
// Handle compound verbs: "touch or modify", "change and update"
|
|
125
|
+
while (endIdx < words.length - 1) {
|
|
126
|
+
const connector = words[endIdx].toLowerCase();
|
|
127
|
+
if (connector === "or" || connector === "and") {
|
|
128
|
+
const nextWord = (words[endIdx + 1] || "").toLowerCase().replace(/[^a-z]/g, "");
|
|
129
|
+
if (CONTAMINATING_VERBS.has(nextWord)) {
|
|
130
|
+
endIdx += 2; // skip connector + verb
|
|
131
|
+
} else {
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
} else {
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
remaining = words.slice(endIdx).join(" ").trim();
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Step 3: Strip leading filler words and prepositions like "from"
|
|
142
|
+
const remainingWords = remaining.split(/\s+/);
|
|
143
|
+
let startIdx = 0;
|
|
144
|
+
const STRIP_LEADING = new Set([...FILLER_WORDS, "from", "on", "in", "at", "with"]);
|
|
145
|
+
while (startIdx < remainingWords.length - 1) {
|
|
146
|
+
if (STRIP_LEADING.has(remainingWords[startIdx].toLowerCase())) {
|
|
147
|
+
startIdx++;
|
|
148
|
+
} else {
|
|
149
|
+
break;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
remaining = remainingWords.slice(startIdx).join(" ").trim();
|
|
153
|
+
|
|
154
|
+
// Step 4: Truncate at em dash, semicolon, or qualifier phrases
|
|
155
|
+
// "authentication system — NextAuth config must not be changed" → "authentication system"
|
|
156
|
+
remaining = remaining.split(/\s*[—–]\s*/)[0].trim();
|
|
157
|
+
remaining = remaining.split(/\s*;\s*/)[0].trim();
|
|
158
|
+
// Truncate at comma + pronoun/qualifier clause
|
|
159
|
+
// "KYC verification flow, it's SEC-compliant" → "KYC verification flow"
|
|
160
|
+
// But preserve comma-separated lists: "auth, authorization, and login"
|
|
161
|
+
remaining = remaining.replace(/,\s+(?:it'?s?|they|this|that|which|we|since|because|as)\b.*$/i, "").trim();
|
|
162
|
+
// Truncate at "must not", "should not" etc. — they start a qualifier
|
|
163
|
+
remaining = remaining.replace(/\s+(?:must|should|cannot|can't|will)\s+(?:not\s+)?(?:be\s+)?.*$/i, "").trim();
|
|
164
|
+
// Truncate at "to/with any/another/other" — directional qualifier
|
|
165
|
+
remaining = remaining.replace(/\s+(?:to|with)\s+(?:any|another|other|a\s+different)\s+.*$/i, "").trim();
|
|
166
|
+
|
|
167
|
+
return remaining || lockText;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Detect if a lock text contains verb contamination risk.
|
|
172
|
+
* Returns: { hasRisk, verb, subject, suggestion }
|
|
173
|
+
*/
|
|
174
|
+
export function detectVerbContamination(lockText) {
|
|
175
|
+
const lower = lockText.toLowerCase().trim();
|
|
176
|
+
|
|
177
|
+
// Check if it starts with a prohibition pattern
|
|
178
|
+
let matchedProhibition = false;
|
|
179
|
+
let afterProhibition = lower;
|
|
180
|
+
|
|
181
|
+
for (const pattern of PROHIBITION_PATTERNS) {
|
|
182
|
+
const match = lower.match(pattern);
|
|
183
|
+
if (match) {
|
|
184
|
+
matchedProhibition = true;
|
|
185
|
+
afterProhibition = lower.slice(match[0].length).trim();
|
|
186
|
+
break;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (!matchedProhibition) {
|
|
191
|
+
// No prohibition pattern — could be a declarative lock like
|
|
192
|
+
// "Use Stripe exclusively" — these are already safe
|
|
193
|
+
return { hasRisk: false, verb: null, subject: null, suggestion: null };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Check if the next word(s) are a contaminating verb
|
|
197
|
+
const words = afterProhibition.split(/\s+/);
|
|
198
|
+
const firstWord = (words[0] || "").replace(/[^a-z]/g, "");
|
|
199
|
+
|
|
200
|
+
if (!CONTAMINATING_VERBS.has(firstWord)) {
|
|
201
|
+
// Prohibition but no contaminating verb — already safe
|
|
202
|
+
// e.g., "Never expose PHI" — "expose" is domain-specific enough
|
|
203
|
+
// Actually, let's still check — "expose" is in the set
|
|
204
|
+
// But we only flag if it's a COMMON verb that will match broadly
|
|
205
|
+
return { hasRisk: false, verb: null, subject: null, suggestion: null };
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const verb = firstWord;
|
|
209
|
+
const subject = extractLockSubject(lockText);
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
hasRisk: true,
|
|
213
|
+
verb,
|
|
214
|
+
subject,
|
|
215
|
+
suggestion: rewriteLock(lockText, verb, subject),
|
|
216
|
+
original: lockText,
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Rewrite a lock to eliminate verb contamination.
|
|
222
|
+
* Transforms "Never add X" → "X is/are prohibited"
|
|
223
|
+
* Preserves the semantic meaning but removes the contaminating verb.
|
|
224
|
+
*/
|
|
225
|
+
export function rewriteLock(lockText, verb, subject) {
|
|
226
|
+
if (!subject || subject === lockText) return lockText;
|
|
227
|
+
|
|
228
|
+
// Determine the appropriate rewrite based on verb category
|
|
229
|
+
const isDestructive = ["remove", "delete", "drop", "destroy", "kill",
|
|
230
|
+
"purge", "wipe", "erase", "eliminate", "clear", "empty", "nuke"].includes(verb);
|
|
231
|
+
const isConstructive = ["add", "create", "introduce", "insert", "implement",
|
|
232
|
+
"build", "make", "include", "install", "deploy", "attach", "connect",
|
|
233
|
+
"put", "set", "use"].includes(verb);
|
|
234
|
+
const isModification = ["change", "modify", "alter", "update", "mutate",
|
|
235
|
+
"transform", "rewrite", "edit", "adjust", "tweak", "revise", "amend",
|
|
236
|
+
"rework", "touch"].includes(verb);
|
|
237
|
+
const isMovement = ["move", "migrate", "transfer", "shift", "relocate",
|
|
238
|
+
"switch", "swap", "replace", "substitute", "exchange"].includes(verb);
|
|
239
|
+
const isToggle = ["enable", "disable", "activate", "deactivate", "start",
|
|
240
|
+
"stop", "turn", "pause", "suspend", "halt"].includes(verb);
|
|
241
|
+
|
|
242
|
+
// Clean subject — capitalize first letter
|
|
243
|
+
const cleanSubject = subject.charAt(0).toUpperCase() + subject.slice(1);
|
|
244
|
+
|
|
245
|
+
if (isConstructive) {
|
|
246
|
+
// "Never add X" → "X is prohibited — do not introduce it"
|
|
247
|
+
return `${cleanSubject} — prohibited. Must not be introduced or added.`;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if (isDestructive) {
|
|
251
|
+
// "Never delete X" → "X must be preserved — delete and remove operations are prohibited"
|
|
252
|
+
// CRITICAL: include the original verb so euphemism matching can find it
|
|
253
|
+
// ("phase out" → "remove" needs "remove" in the lock text)
|
|
254
|
+
const destNote = verb === "remove"
|
|
255
|
+
? "remove and delete operations are prohibited"
|
|
256
|
+
: `${verb} and remove operations are prohibited`;
|
|
257
|
+
return `${cleanSubject} must be preserved — ${destNote}.`;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
if (isModification) {
|
|
261
|
+
// "Never modify X" → "X is frozen — modify and change operations are prohibited"
|
|
262
|
+
const modNote = verb === "change"
|
|
263
|
+
? "change operations are prohibited"
|
|
264
|
+
: `${verb} and change operations are prohibited`;
|
|
265
|
+
return `${cleanSubject} is frozen — ${modNote}.`;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if (isMovement) {
|
|
269
|
+
// "Never migrate X" → "X must remain unchanged — migrate and replace operations are prohibited"
|
|
270
|
+
const moveNote = verb === "replace"
|
|
271
|
+
? "replace operations are prohibited"
|
|
272
|
+
: `${verb} and replace operations are prohibited`;
|
|
273
|
+
return `${cleanSubject} must remain unchanged — ${moveNote}.`;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if (isToggle) {
|
|
277
|
+
if (verb === "disable" || verb === "deactivate" || verb === "stop" ||
|
|
278
|
+
verb === "pause" || verb === "suspend" || verb === "halt" || verb === "turn") {
|
|
279
|
+
// "Never disable X" → "X must remain active — disable is prohibited"
|
|
280
|
+
return `${cleanSubject} must remain active and enabled — ${verb} is prohibited.`;
|
|
281
|
+
} else {
|
|
282
|
+
// "Never enable X" → "X must remain disabled"
|
|
283
|
+
return `${cleanSubject} must remain disabled — do not activate.`;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// Fallback: generic rewrite
|
|
288
|
+
return `${cleanSubject} — no ${verb} operations allowed.`;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Smart lock normalizer. Takes raw user lock text and returns
|
|
293
|
+
* the best version for the semantic engine.
|
|
294
|
+
*
|
|
295
|
+
* Returns: {
|
|
296
|
+
* normalized: string, // The rewritten lock (or original if safe)
|
|
297
|
+
* wasRewritten: boolean, // Whether the lock was rewritten
|
|
298
|
+
* original: string, // The original lock text
|
|
299
|
+
* reason: string|null, // Why it was rewritten (or null)
|
|
300
|
+
* }
|
|
301
|
+
*/
|
|
302
|
+
export function normalizeLock(lockText) {
|
|
303
|
+
const contamination = detectVerbContamination(lockText);
|
|
304
|
+
|
|
305
|
+
if (!contamination.hasRisk) {
|
|
306
|
+
return {
|
|
307
|
+
normalized: lockText,
|
|
308
|
+
wasRewritten: false,
|
|
309
|
+
original: lockText,
|
|
310
|
+
reason: null,
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
return {
|
|
315
|
+
normalized: contamination.suggestion,
|
|
316
|
+
wasRewritten: true,
|
|
317
|
+
original: lockText,
|
|
318
|
+
reason: `Verb "${contamination.verb}" in lock text causes false positives — ` +
|
|
319
|
+
`rewritten to focus on the subject "${contamination.subject}"`,
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
/**
|
|
324
|
+
* Extract subject noun phrases from any text (lock or action).
|
|
325
|
+
* This is the foundation for scope-aware matching.
|
|
326
|
+
*
|
|
327
|
+
* Given: "Update the WhatsApp message formatting logic"
|
|
328
|
+
* Returns: ["whatsapp message formatting logic", "whatsapp", "message formatting", "formatting logic"]
|
|
329
|
+
*
|
|
330
|
+
* Given: "Never modify the WhatsApp session handler"
|
|
331
|
+
* Returns: ["whatsapp session handler", "whatsapp", "session handler"]
|
|
332
|
+
*/
|
|
333
|
+
export function extractSubjects(text) {
|
|
334
|
+
const lower = text.toLowerCase().trim();
|
|
335
|
+
const subjects = [];
|
|
336
|
+
|
|
337
|
+
// Step 1: Strip prohibition prefix
|
|
338
|
+
let content = lower;
|
|
339
|
+
for (const pattern of PROHIBITION_PATTERNS) {
|
|
340
|
+
const match = content.match(pattern);
|
|
341
|
+
if (match) {
|
|
342
|
+
content = content.slice(match[0].length).trim();
|
|
343
|
+
break;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Step 2: Strip leading verb
|
|
348
|
+
const words = content.split(/\s+/);
|
|
349
|
+
let startIdx = 0;
|
|
350
|
+
|
|
351
|
+
// Skip action verbs at the beginning
|
|
352
|
+
for (let i = 0; i < Math.min(2, words.length); i++) {
|
|
353
|
+
const w = words[i].replace(/[^a-z]/g, "");
|
|
354
|
+
if (CONTAMINATING_VERBS.has(w)) {
|
|
355
|
+
startIdx = i + 1;
|
|
356
|
+
break;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
// Step 3: Skip fillers
|
|
361
|
+
while (startIdx < words.length - 1) {
|
|
362
|
+
const w = words[startIdx].replace(/[^a-z]/g, "");
|
|
363
|
+
if (FILLER_WORDS.has(w)) {
|
|
364
|
+
startIdx++;
|
|
365
|
+
} else {
|
|
366
|
+
break;
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// Step 4: The remaining text is the subject noun phrase
|
|
371
|
+
const subjectWords = words.slice(startIdx);
|
|
372
|
+
if (subjectWords.length === 0) return subjects;
|
|
373
|
+
|
|
374
|
+
// Full noun phrase
|
|
375
|
+
const fullPhrase = subjectWords.join(" ").replace(/[^a-z0-9\s\-]/g, "").trim();
|
|
376
|
+
if (fullPhrase.length > 1) subjects.push(fullPhrase);
|
|
377
|
+
|
|
378
|
+
// Split on conjunctions for sub-phrases
|
|
379
|
+
const conjSplit = fullPhrase.split(/\s+(?:and|or|,)\s+/).map(s => s.trim()).filter(s => s.length > 1);
|
|
380
|
+
if (conjSplit.length > 1) {
|
|
381
|
+
for (const s of conjSplit) subjects.push(s);
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// Bigrams and individual significant words
|
|
385
|
+
const significantWords = subjectWords
|
|
386
|
+
.map(w => w.replace(/[^a-z0-9\-]/g, ""))
|
|
387
|
+
.filter(w => w.length > 2 && !FILLER_WORDS.has(w));
|
|
388
|
+
|
|
389
|
+
// Generic words too vague to establish subject identity
|
|
390
|
+
const GENERIC_WORDS = new Set([
|
|
391
|
+
"system", "service", "module", "component", "feature", "function",
|
|
392
|
+
"method", "class", "model", "handler", "controller", "manager",
|
|
393
|
+
"process", "workflow", "flow", "logic", "config", "configuration",
|
|
394
|
+
"settings", "data", "information", "record", "records", "file",
|
|
395
|
+
"files", "page", "section", "layer", "level", "part", "item",
|
|
396
|
+
"code", "app", "application", "project",
|
|
397
|
+
]);
|
|
398
|
+
|
|
399
|
+
// Add individual significant words (proper nouns, domain terms) — skip generic
|
|
400
|
+
for (const w of significantWords) {
|
|
401
|
+
if (!CONTAMINATING_VERBS.has(w) && !GENERIC_WORDS.has(w) && w.length > 3) {
|
|
402
|
+
subjects.push(w);
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Adjacent bigrams from significant words
|
|
407
|
+
for (let i = 0; i < significantWords.length - 1; i++) {
|
|
408
|
+
const bigram = `${significantWords[i]} ${significantWords[i + 1]}`;
|
|
409
|
+
if (!subjects.includes(bigram)) {
|
|
410
|
+
subjects.push(bigram);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
return [...new Set(subjects)];
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Compare subjects from action and lock to determine if they target
|
|
419
|
+
* the same component. This is the scope-awareness engine.
|
|
420
|
+
*
|
|
421
|
+
* Returns: {
|
|
422
|
+
* overlaps: boolean,
|
|
423
|
+
* overlapScore: 0-1,
|
|
424
|
+
* matchedSubjects: string[],
|
|
425
|
+
* lockSubjects: string[],
|
|
426
|
+
* actionSubjects: string[],
|
|
427
|
+
* }
|
|
428
|
+
*/
|
|
429
|
+
export function compareSubjects(actionText, lockText) {
|
|
430
|
+
const lockSubjects = extractSubjects(lockText);
|
|
431
|
+
const actionSubjects = extractSubjects(actionText);
|
|
432
|
+
|
|
433
|
+
if (lockSubjects.length === 0 || actionSubjects.length === 0) {
|
|
434
|
+
return {
|
|
435
|
+
overlaps: false,
|
|
436
|
+
overlapScore: 0,
|
|
437
|
+
matchedSubjects: [],
|
|
438
|
+
lockSubjects,
|
|
439
|
+
actionSubjects,
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
const matched = [];
|
|
444
|
+
|
|
445
|
+
// Check for direct subject overlap
|
|
446
|
+
for (const ls of lockSubjects) {
|
|
447
|
+
for (const as of actionSubjects) {
|
|
448
|
+
// Exact match
|
|
449
|
+
if (ls === as) {
|
|
450
|
+
matched.push(ls);
|
|
451
|
+
continue;
|
|
452
|
+
}
|
|
453
|
+
// Word-level containment — "patient records" inside "old patient records"
|
|
454
|
+
// NOT substring: "shipping" should NOT match "calculateshipping"
|
|
455
|
+
const asRe = new RegExp(`\\b${as.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`);
|
|
456
|
+
const lsRe = new RegExp(`\\b${ls.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`);
|
|
457
|
+
if (asRe.test(ls) || lsRe.test(as)) {
|
|
458
|
+
matched.push(`${as} ⊂ ${ls}`);
|
|
459
|
+
continue;
|
|
460
|
+
}
|
|
461
|
+
// Word-level overlap for multi-word phrases
|
|
462
|
+
if (ls.includes(" ") && as.includes(" ")) {
|
|
463
|
+
const lsWords = new Set(ls.split(/\s+/));
|
|
464
|
+
const asWords = new Set(as.split(/\s+/));
|
|
465
|
+
const intersection = [...lsWords].filter(w => asWords.has(w) && w.length > 2);
|
|
466
|
+
// Need significant overlap — more than just shared filler
|
|
467
|
+
const significantIntersection = intersection.filter(w => !FILLER_WORDS.has(w));
|
|
468
|
+
if (significantIntersection.length >= 1 && significantIntersection.length >= Math.min(lsWords.size, asWords.size) * 0.4) {
|
|
469
|
+
matched.push(`word overlap: ${significantIntersection.join(", ")}`);
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const uniqueMatched = [...new Set(matched)];
|
|
476
|
+
const overlapScore = uniqueMatched.length > 0
|
|
477
|
+
? Math.min(uniqueMatched.length / Math.max(lockSubjects.length, 1), 1.0)
|
|
478
|
+
: 0;
|
|
479
|
+
|
|
480
|
+
return {
|
|
481
|
+
overlaps: uniqueMatched.length > 0,
|
|
482
|
+
overlapScore,
|
|
483
|
+
matchedSubjects: uniqueMatched,
|
|
484
|
+
lockSubjects,
|
|
485
|
+
actionSubjects,
|
|
486
|
+
};
|
|
487
|
+
}
|