speclock 5.2.5 → 5.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,487 +1,487 @@
1
- // ===================================================================
2
- // SpecLock Smart Lock Authoring Engine
3
- // Auto-rewrites user locks to prevent verb contamination.
4
- //
5
- // Problem: "Never add authentication" causes false positives because
6
- // the word "add" gets extracted as a prohibited signal and fires on
7
- // every action containing "add" (like "Add dark mode").
8
- //
9
- // Solution: Extract the SUBJECT from the lock, rewrite to state
10
- // what IS fixed/prohibited rather than what ACTION is prohibited.
11
- // "Never add authentication" → "Authentication and login are prohibited"
12
- //
13
- // Developed by Sandeep Roy (https://github.com/sgroy10)
14
- // ===================================================================
15
-
16
- // Common action verbs that contaminate lock matching when present in lock text.
17
- // These are the verbs that users naturally write in locks ("never ADD X",
18
- // "don't CHANGE Y") but that the semantic engine then matches against
19
- // ALL actions containing those same verbs.
20
- const CONTAMINATING_VERBS = new Set([
21
- // Constructive
22
- "add", "create", "introduce", "insert", "implement", "build", "make",
23
- "include", "put", "set", "use", "install", "deploy", "attach", "connect",
24
- // Destructive
25
- "remove", "delete", "drop", "destroy", "kill", "purge", "wipe", "erase",
26
- "eliminate", "clear", "empty", "nuke",
27
- // Modification
28
- "change", "modify", "alter", "update", "mutate", "transform", "rewrite",
29
- "edit", "adjust", "tweak", "revise", "amend", "touch", "rework",
30
- // Movement
31
- "move", "migrate", "transfer", "shift", "relocate", "switch", "swap",
32
- "replace", "substitute", "exchange",
33
- // Toggle
34
- "enable", "disable", "activate", "deactivate", "start", "stop",
35
- "turn", "pause", "suspend", "halt",
36
- // General
37
- "push", "pull", "send", "expose", "leak", "reveal", "show",
38
- "allow", "permit", "let", "give", "grant", "open",
39
- // Informal
40
- "mess",
41
- ]);
42
-
43
- // Prohibition patterns — these phrases introduce the verb that follows
44
- const PROHIBITION_PATTERNS = [
45
- /^never\s+/i,
46
- /^must\s+not\s+/i,
47
- /^do\s+not\s+/i,
48
- /^don'?t\s+/i,
49
- /^cannot\s+/i,
50
- /^can'?t\s+/i,
51
- /^should\s+not\s+/i,
52
- /^shouldn'?t\s+/i,
53
- /^no\s+(?:one\s+(?:should|may|can)\s+)?/i,
54
- /^(?:it\s+is\s+)?(?:forbidden|prohibited|not\s+allowed)\s+to\s+/i,
55
- /^avoid\s+/i,
56
- /^prevent\s+/i,
57
- /^refrain\s+from\s+/i,
58
- /^stop\s+/i,
59
- ];
60
-
61
- // Filler words between verb and subject
62
- const FILLER_WORDS = new Set([
63
- "the", "a", "an", "any", "another", "other", "new", "additional",
64
- "more", "extra", "further", "existing", "current", "old", "all",
65
- "our", "their", "user", "users", "to",
66
- ]);
67
-
68
- // Domain subject → canonical prohibition phrasing
69
- const SUBJECT_TEMPLATES = {
70
- // Auth/Security
71
- "authentication": "{subject} and login functionality are prohibited",
72
- "auth": "Authentication and login functionality are prohibited",
73
- "login": "Login and authentication functionality are prohibited",
74
- "signup": "Sign-up and registration functionality are prohibited",
75
- "user accounts": "User account creation and management are prohibited",
76
- "2fa": "Two-factor authentication changes are prohibited",
77
- "mfa": "Multi-factor authentication changes are prohibited",
78
-
79
- // Database
80
- "database": "External database services are prohibited — use {alternative} only",
81
- "supabase": "Supabase integration is prohibited",
82
- "firebase": "Firebase integration is prohibited",
83
- "mongodb": "MongoDB integration is prohibited",
84
-
85
- // Payment
86
- "payment": "Additional payment providers are prohibited — use {alternative} exclusively",
87
- "stripe": "Stripe modifications are prohibited",
88
- "razorpay": "Razorpay integration is prohibited",
89
- "paypal": "PayPal integration is prohibited",
90
- };
91
-
92
- /**
93
- * Extract the subject (noun phrase) from a lock text.
94
- * Given: "Never add user authentication or login functionality"
95
- * Returns: "user authentication or login functionality"
96
- */
97
- export function extractLockSubject(lockText) {
98
- let remaining = lockText.trim();
99
-
100
- // Step 1: Strip prohibition prefix
101
- for (const pattern of PROHIBITION_PATTERNS) {
102
- const match = remaining.match(pattern);
103
- if (match) {
104
- remaining = remaining.slice(match[0].length).trim();
105
- break;
106
- }
107
- }
108
-
109
- // Step 2: Strip the first contaminating verb (and optional "to")
110
- const words = remaining.split(/\s+/);
111
- let verbIndex = -1;
112
-
113
- // Check first 3 words for a contaminating verb
114
- for (let i = 0; i < Math.min(3, words.length); i++) {
115
- const w = words[i].toLowerCase().replace(/[^a-z]/g, "");
116
- if (CONTAMINATING_VERBS.has(w)) {
117
- verbIndex = i;
118
- break;
119
- }
120
- }
121
-
122
- if (verbIndex >= 0) {
123
- let endIdx = verbIndex + 1;
124
- // Handle compound verbs: "touch or modify", "change and update"
125
- while (endIdx < words.length - 1) {
126
- const connector = words[endIdx].toLowerCase();
127
- if (connector === "or" || connector === "and") {
128
- const nextWord = (words[endIdx + 1] || "").toLowerCase().replace(/[^a-z]/g, "");
129
- if (CONTAMINATING_VERBS.has(nextWord)) {
130
- endIdx += 2; // skip connector + verb
131
- } else {
132
- break;
133
- }
134
- } else {
135
- break;
136
- }
137
- }
138
- remaining = words.slice(endIdx).join(" ").trim();
139
- }
140
-
141
- // Step 3: Strip leading filler words and prepositions like "from"
142
- const remainingWords = remaining.split(/\s+/);
143
- let startIdx = 0;
144
- const STRIP_LEADING = new Set([...FILLER_WORDS, "from", "on", "in", "at", "with"]);
145
- while (startIdx < remainingWords.length - 1) {
146
- if (STRIP_LEADING.has(remainingWords[startIdx].toLowerCase())) {
147
- startIdx++;
148
- } else {
149
- break;
150
- }
151
- }
152
- remaining = remainingWords.slice(startIdx).join(" ").trim();
153
-
154
- // Step 4: Truncate at em dash, semicolon, or qualifier phrases
155
- // "authentication system — NextAuth config must not be changed" → "authentication system"
156
- remaining = remaining.split(/\s*[—–]\s*/)[0].trim();
157
- remaining = remaining.split(/\s*;\s*/)[0].trim();
158
- // Truncate at comma + pronoun/qualifier clause
159
- // "KYC verification flow, it's SEC-compliant" → "KYC verification flow"
160
- // But preserve comma-separated lists: "auth, authorization, and login"
161
- remaining = remaining.replace(/,\s+(?:it'?s?|they|this|that|which|we|since|because|as)\b.*$/i, "").trim();
162
- // Truncate at "must not", "should not" etc. — they start a qualifier
163
- remaining = remaining.replace(/\s+(?:must|should|cannot|can't|will)\s+(?:not\s+)?(?:be\s+)?.*$/i, "").trim();
164
- // Truncate at "to/with any/another/other" — directional qualifier
165
- remaining = remaining.replace(/\s+(?:to|with)\s+(?:any|another|other|a\s+different)\s+.*$/i, "").trim();
166
-
167
- return remaining || lockText;
168
- }
169
-
170
- /**
171
- * Detect if a lock text contains verb contamination risk.
172
- * Returns: { hasRisk, verb, subject, suggestion }
173
- */
174
- export function detectVerbContamination(lockText) {
175
- const lower = lockText.toLowerCase().trim();
176
-
177
- // Check if it starts with a prohibition pattern
178
- let matchedProhibition = false;
179
- let afterProhibition = lower;
180
-
181
- for (const pattern of PROHIBITION_PATTERNS) {
182
- const match = lower.match(pattern);
183
- if (match) {
184
- matchedProhibition = true;
185
- afterProhibition = lower.slice(match[0].length).trim();
186
- break;
187
- }
188
- }
189
-
190
- if (!matchedProhibition) {
191
- // No prohibition pattern — could be a declarative lock like
192
- // "Use Stripe exclusively" — these are already safe
193
- return { hasRisk: false, verb: null, subject: null, suggestion: null };
194
- }
195
-
196
- // Check if the next word(s) are a contaminating verb
197
- const words = afterProhibition.split(/\s+/);
198
- const firstWord = (words[0] || "").replace(/[^a-z]/g, "");
199
-
200
- if (!CONTAMINATING_VERBS.has(firstWord)) {
201
- // Prohibition but no contaminating verb — already safe
202
- // e.g., "Never expose PHI" — "expose" is domain-specific enough
203
- // Actually, let's still check — "expose" is in the set
204
- // But we only flag if it's a COMMON verb that will match broadly
205
- return { hasRisk: false, verb: null, subject: null, suggestion: null };
206
- }
207
-
208
- const verb = firstWord;
209
- const subject = extractLockSubject(lockText);
210
-
211
- return {
212
- hasRisk: true,
213
- verb,
214
- subject,
215
- suggestion: rewriteLock(lockText, verb, subject),
216
- original: lockText,
217
- };
218
- }
219
-
220
- /**
221
- * Rewrite a lock to eliminate verb contamination.
222
- * Transforms "Never add X" → "X is/are prohibited"
223
- * Preserves the semantic meaning but removes the contaminating verb.
224
- */
225
- export function rewriteLock(lockText, verb, subject) {
226
- if (!subject || subject === lockText) return lockText;
227
-
228
- // Determine the appropriate rewrite based on verb category
229
- const isDestructive = ["remove", "delete", "drop", "destroy", "kill",
230
- "purge", "wipe", "erase", "eliminate", "clear", "empty", "nuke"].includes(verb);
231
- const isConstructive = ["add", "create", "introduce", "insert", "implement",
232
- "build", "make", "include", "install", "deploy", "attach", "connect",
233
- "put", "set", "use"].includes(verb);
234
- const isModification = ["change", "modify", "alter", "update", "mutate",
235
- "transform", "rewrite", "edit", "adjust", "tweak", "revise", "amend",
236
- "rework", "touch"].includes(verb);
237
- const isMovement = ["move", "migrate", "transfer", "shift", "relocate",
238
- "switch", "swap", "replace", "substitute", "exchange"].includes(verb);
239
- const isToggle = ["enable", "disable", "activate", "deactivate", "start",
240
- "stop", "turn", "pause", "suspend", "halt"].includes(verb);
241
-
242
- // Clean subject — capitalize first letter
243
- const cleanSubject = subject.charAt(0).toUpperCase() + subject.slice(1);
244
-
245
- if (isConstructive) {
246
- // "Never add X" → "X is prohibited — do not introduce it"
247
- return `${cleanSubject} — prohibited. Must not be introduced or added.`;
248
- }
249
-
250
- if (isDestructive) {
251
- // "Never delete X" → "X must be preserved — delete and remove operations are prohibited"
252
- // CRITICAL: include the original verb so euphemism matching can find it
253
- // ("phase out" → "remove" needs "remove" in the lock text)
254
- const destNote = verb === "remove"
255
- ? "remove and delete operations are prohibited"
256
- : `${verb} and remove operations are prohibited`;
257
- return `${cleanSubject} must be preserved — ${destNote}.`;
258
- }
259
-
260
- if (isModification) {
261
- // "Never modify X" → "X is frozen — modify and change operations are prohibited"
262
- const modNote = verb === "change"
263
- ? "change operations are prohibited"
264
- : `${verb} and change operations are prohibited`;
265
- return `${cleanSubject} is frozen — ${modNote}.`;
266
- }
267
-
268
- if (isMovement) {
269
- // "Never migrate X" → "X must remain unchanged — migrate and replace operations are prohibited"
270
- const moveNote = verb === "replace"
271
- ? "replace operations are prohibited"
272
- : `${verb} and replace operations are prohibited`;
273
- return `${cleanSubject} must remain unchanged — ${moveNote}.`;
274
- }
275
-
276
- if (isToggle) {
277
- if (verb === "disable" || verb === "deactivate" || verb === "stop" ||
278
- verb === "pause" || verb === "suspend" || verb === "halt" || verb === "turn") {
279
- // "Never disable X" → "X must remain active — disable is prohibited"
280
- return `${cleanSubject} must remain active and enabled — ${verb} is prohibited.`;
281
- } else {
282
- // "Never enable X" → "X must remain disabled"
283
- return `${cleanSubject} must remain disabled — do not activate.`;
284
- }
285
- }
286
-
287
- // Fallback: generic rewrite
288
- return `${cleanSubject} — no ${verb} operations allowed.`;
289
- }
290
-
291
- /**
292
- * Smart lock normalizer. Takes raw user lock text and returns
293
- * the best version for the semantic engine.
294
- *
295
- * Returns: {
296
- * normalized: string, // The rewritten lock (or original if safe)
297
- * wasRewritten: boolean, // Whether the lock was rewritten
298
- * original: string, // The original lock text
299
- * reason: string|null, // Why it was rewritten (or null)
300
- * }
301
- */
302
- export function normalizeLock(lockText) {
303
- const contamination = detectVerbContamination(lockText);
304
-
305
- if (!contamination.hasRisk) {
306
- return {
307
- normalized: lockText,
308
- wasRewritten: false,
309
- original: lockText,
310
- reason: null,
311
- };
312
- }
313
-
314
- return {
315
- normalized: contamination.suggestion,
316
- wasRewritten: true,
317
- original: lockText,
318
- reason: `Verb "${contamination.verb}" in lock text causes false positives — ` +
319
- `rewritten to focus on the subject "${contamination.subject}"`,
320
- };
321
- }
322
-
323
- /**
324
- * Extract subject noun phrases from any text (lock or action).
325
- * This is the foundation for scope-aware matching.
326
- *
327
- * Given: "Update the WhatsApp message formatting logic"
328
- * Returns: ["whatsapp message formatting logic", "whatsapp", "message formatting", "formatting logic"]
329
- *
330
- * Given: "Never modify the WhatsApp session handler"
331
- * Returns: ["whatsapp session handler", "whatsapp", "session handler"]
332
- */
333
- export function extractSubjects(text) {
334
- const lower = text.toLowerCase().trim();
335
- const subjects = [];
336
-
337
- // Step 1: Strip prohibition prefix
338
- let content = lower;
339
- for (const pattern of PROHIBITION_PATTERNS) {
340
- const match = content.match(pattern);
341
- if (match) {
342
- content = content.slice(match[0].length).trim();
343
- break;
344
- }
345
- }
346
-
347
- // Step 2: Strip leading verb
348
- const words = content.split(/\s+/);
349
- let startIdx = 0;
350
-
351
- // Skip action verbs at the beginning
352
- for (let i = 0; i < Math.min(2, words.length); i++) {
353
- const w = words[i].replace(/[^a-z]/g, "");
354
- if (CONTAMINATING_VERBS.has(w)) {
355
- startIdx = i + 1;
356
- break;
357
- }
358
- }
359
-
360
- // Step 3: Skip fillers
361
- while (startIdx < words.length - 1) {
362
- const w = words[startIdx].replace(/[^a-z]/g, "");
363
- if (FILLER_WORDS.has(w)) {
364
- startIdx++;
365
- } else {
366
- break;
367
- }
368
- }
369
-
370
- // Step 4: The remaining text is the subject noun phrase
371
- const subjectWords = words.slice(startIdx);
372
- if (subjectWords.length === 0) return subjects;
373
-
374
- // Full noun phrase
375
- const fullPhrase = subjectWords.join(" ").replace(/[^a-z0-9\s\-]/g, "").trim();
376
- if (fullPhrase.length > 1) subjects.push(fullPhrase);
377
-
378
- // Split on conjunctions for sub-phrases
379
- const conjSplit = fullPhrase.split(/\s+(?:and|or|,)\s+/).map(s => s.trim()).filter(s => s.length > 1);
380
- if (conjSplit.length > 1) {
381
- for (const s of conjSplit) subjects.push(s);
382
- }
383
-
384
- // Bigrams and individual significant words
385
- const significantWords = subjectWords
386
- .map(w => w.replace(/[^a-z0-9\-]/g, ""))
387
- .filter(w => w.length > 2 && !FILLER_WORDS.has(w));
388
-
389
- // Generic words too vague to establish subject identity
390
- const GENERIC_WORDS = new Set([
391
- "system", "service", "module", "component", "feature", "function",
392
- "method", "class", "model", "handler", "controller", "manager",
393
- "process", "workflow", "flow", "logic", "config", "configuration",
394
- "settings", "data", "information", "record", "records", "file",
395
- "files", "page", "section", "layer", "level", "part", "item",
396
- "code", "app", "application", "project",
397
- ]);
398
-
399
- // Add individual significant words (proper nouns, domain terms) — skip generic
400
- for (const w of significantWords) {
401
- if (!CONTAMINATING_VERBS.has(w) && !GENERIC_WORDS.has(w) && w.length > 3) {
402
- subjects.push(w);
403
- }
404
- }
405
-
406
- // Adjacent bigrams from significant words
407
- for (let i = 0; i < significantWords.length - 1; i++) {
408
- const bigram = `${significantWords[i]} ${significantWords[i + 1]}`;
409
- if (!subjects.includes(bigram)) {
410
- subjects.push(bigram);
411
- }
412
- }
413
-
414
- return [...new Set(subjects)];
415
- }
416
-
417
- /**
418
- * Compare subjects from action and lock to determine if they target
419
- * the same component. This is the scope-awareness engine.
420
- *
421
- * Returns: {
422
- * overlaps: boolean,
423
- * overlapScore: 0-1,
424
- * matchedSubjects: string[],
425
- * lockSubjects: string[],
426
- * actionSubjects: string[],
427
- * }
428
- */
429
- export function compareSubjects(actionText, lockText) {
430
- const lockSubjects = extractSubjects(lockText);
431
- const actionSubjects = extractSubjects(actionText);
432
-
433
- if (lockSubjects.length === 0 || actionSubjects.length === 0) {
434
- return {
435
- overlaps: false,
436
- overlapScore: 0,
437
- matchedSubjects: [],
438
- lockSubjects,
439
- actionSubjects,
440
- };
441
- }
442
-
443
- const matched = [];
444
-
445
- // Check for direct subject overlap
446
- for (const ls of lockSubjects) {
447
- for (const as of actionSubjects) {
448
- // Exact match
449
- if (ls === as) {
450
- matched.push(ls);
451
- continue;
452
- }
453
- // Word-level containment — "patient records" inside "old patient records"
454
- // NOT substring: "shipping" should NOT match "calculateshipping"
455
- const asRe = new RegExp(`\\b${as.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`);
456
- const lsRe = new RegExp(`\\b${ls.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`);
457
- if (asRe.test(ls) || lsRe.test(as)) {
458
- matched.push(`${as} ⊂ ${ls}`);
459
- continue;
460
- }
461
- // Word-level overlap for multi-word phrases
462
- if (ls.includes(" ") && as.includes(" ")) {
463
- const lsWords = new Set(ls.split(/\s+/));
464
- const asWords = new Set(as.split(/\s+/));
465
- const intersection = [...lsWords].filter(w => asWords.has(w) && w.length > 2);
466
- // Need significant overlap — more than just shared filler
467
- const significantIntersection = intersection.filter(w => !FILLER_WORDS.has(w));
468
- if (significantIntersection.length >= 1 && significantIntersection.length >= Math.min(lsWords.size, asWords.size) * 0.4) {
469
- matched.push(`word overlap: ${significantIntersection.join(", ")}`);
470
- }
471
- }
472
- }
473
- }
474
-
475
- const uniqueMatched = [...new Set(matched)];
476
- const overlapScore = uniqueMatched.length > 0
477
- ? Math.min(uniqueMatched.length / Math.max(lockSubjects.length, 1), 1.0)
478
- : 0;
479
-
480
- return {
481
- overlaps: uniqueMatched.length > 0,
482
- overlapScore,
483
- matchedSubjects: uniqueMatched,
484
- lockSubjects,
485
- actionSubjects,
486
- };
487
- }
1
+ // ===================================================================
2
+ // SpecLock Smart Lock Authoring Engine
3
+ // Auto-rewrites user locks to prevent verb contamination.
4
+ //
5
+ // Problem: "Never add authentication" causes false positives because
6
+ // the word "add" gets extracted as a prohibited signal and fires on
7
+ // every action containing "add" (like "Add dark mode").
8
+ //
9
+ // Solution: Extract the SUBJECT from the lock, rewrite to state
10
+ // what IS fixed/prohibited rather than what ACTION is prohibited.
11
+ // "Never add authentication" → "Authentication and login are prohibited"
12
+ //
13
+ // Developed by Sandeep Roy (https://github.com/sgroy10)
14
+ // ===================================================================
15
+
16
+ // Common action verbs that contaminate lock matching when present in lock text.
17
+ // These are the verbs that users naturally write in locks ("never ADD X",
18
+ // "don't CHANGE Y") but that the semantic engine then matches against
19
+ // ALL actions containing those same verbs.
20
+ const CONTAMINATING_VERBS = new Set([
21
+ // Constructive
22
+ "add", "create", "introduce", "insert", "implement", "build", "make",
23
+ "include", "put", "set", "use", "install", "deploy", "attach", "connect",
24
+ // Destructive
25
+ "remove", "delete", "drop", "destroy", "kill", "purge", "wipe", "erase",
26
+ "eliminate", "clear", "empty", "nuke",
27
+ // Modification
28
+ "change", "modify", "alter", "update", "mutate", "transform", "rewrite",
29
+ "edit", "adjust", "tweak", "revise", "amend", "touch", "rework",
30
+ // Movement
31
+ "move", "migrate", "transfer", "shift", "relocate", "switch", "swap",
32
+ "replace", "substitute", "exchange",
33
+ // Toggle
34
+ "enable", "disable", "activate", "deactivate", "start", "stop",
35
+ "turn", "pause", "suspend", "halt",
36
+ // General
37
+ "push", "pull", "send", "expose", "leak", "reveal", "show",
38
+ "allow", "permit", "let", "give", "grant", "open",
39
+ // Informal
40
+ "mess",
41
+ ]);
42
+
43
+ // Prohibition patterns — these phrases introduce the verb that follows
44
+ const PROHIBITION_PATTERNS = [
45
+ /^never\s+/i,
46
+ /^must\s+not\s+/i,
47
+ /^do\s+not\s+/i,
48
+ /^don'?t\s+/i,
49
+ /^cannot\s+/i,
50
+ /^can'?t\s+/i,
51
+ /^should\s+not\s+/i,
52
+ /^shouldn'?t\s+/i,
53
+ /^no\s+(?:one\s+(?:should|may|can)\s+)?/i,
54
+ /^(?:it\s+is\s+)?(?:forbidden|prohibited|not\s+allowed)\s+to\s+/i,
55
+ /^avoid\s+/i,
56
+ /^prevent\s+/i,
57
+ /^refrain\s+from\s+/i,
58
+ /^stop\s+/i,
59
+ ];
60
+
61
+ // Filler words between verb and subject
62
+ const FILLER_WORDS = new Set([
63
+ "the", "a", "an", "any", "another", "other", "new", "additional",
64
+ "more", "extra", "further", "existing", "current", "old", "all",
65
+ "our", "their", "user", "users", "to",
66
+ ]);
67
+
68
+ // Domain subject → canonical prohibition phrasing
69
+ const SUBJECT_TEMPLATES = {
70
+ // Auth/Security
71
+ "authentication": "{subject} and login functionality are prohibited",
72
+ "auth": "Authentication and login functionality are prohibited",
73
+ "login": "Login and authentication functionality are prohibited",
74
+ "signup": "Sign-up and registration functionality are prohibited",
75
+ "user accounts": "User account creation and management are prohibited",
76
+ "2fa": "Two-factor authentication changes are prohibited",
77
+ "mfa": "Multi-factor authentication changes are prohibited",
78
+
79
+ // Database
80
+ "database": "External database services are prohibited — use {alternative} only",
81
+ "supabase": "Supabase integration is prohibited",
82
+ "firebase": "Firebase integration is prohibited",
83
+ "mongodb": "MongoDB integration is prohibited",
84
+
85
+ // Payment
86
+ "payment": "Additional payment providers are prohibited — use {alternative} exclusively",
87
+ "stripe": "Stripe modifications are prohibited",
88
+ "razorpay": "Razorpay integration is prohibited",
89
+ "paypal": "PayPal integration is prohibited",
90
+ };
91
+
92
+ /**
93
+ * Extract the subject (noun phrase) from a lock text.
94
+ * Given: "Never add user authentication or login functionality"
95
+ * Returns: "user authentication or login functionality"
96
+ */
97
+ export function extractLockSubject(lockText) {
98
+ let remaining = lockText.trim();
99
+
100
+ // Step 1: Strip prohibition prefix
101
+ for (const pattern of PROHIBITION_PATTERNS) {
102
+ const match = remaining.match(pattern);
103
+ if (match) {
104
+ remaining = remaining.slice(match[0].length).trim();
105
+ break;
106
+ }
107
+ }
108
+
109
+ // Step 2: Strip the first contaminating verb (and optional "to")
110
+ const words = remaining.split(/\s+/);
111
+ let verbIndex = -1;
112
+
113
+ // Check first 3 words for a contaminating verb
114
+ for (let i = 0; i < Math.min(3, words.length); i++) {
115
+ const w = words[i].toLowerCase().replace(/[^a-z]/g, "");
116
+ if (CONTAMINATING_VERBS.has(w)) {
117
+ verbIndex = i;
118
+ break;
119
+ }
120
+ }
121
+
122
+ if (verbIndex >= 0) {
123
+ let endIdx = verbIndex + 1;
124
+ // Handle compound verbs: "touch or modify", "change and update"
125
+ while (endIdx < words.length - 1) {
126
+ const connector = words[endIdx].toLowerCase();
127
+ if (connector === "or" || connector === "and") {
128
+ const nextWord = (words[endIdx + 1] || "").toLowerCase().replace(/[^a-z]/g, "");
129
+ if (CONTAMINATING_VERBS.has(nextWord)) {
130
+ endIdx += 2; // skip connector + verb
131
+ } else {
132
+ break;
133
+ }
134
+ } else {
135
+ break;
136
+ }
137
+ }
138
+ remaining = words.slice(endIdx).join(" ").trim();
139
+ }
140
+
141
+ // Step 3: Strip leading filler words and prepositions like "from"
142
+ const remainingWords = remaining.split(/\s+/);
143
+ let startIdx = 0;
144
+ const STRIP_LEADING = new Set([...FILLER_WORDS, "from", "on", "in", "at", "with"]);
145
+ while (startIdx < remainingWords.length - 1) {
146
+ if (STRIP_LEADING.has(remainingWords[startIdx].toLowerCase())) {
147
+ startIdx++;
148
+ } else {
149
+ break;
150
+ }
151
+ }
152
+ remaining = remainingWords.slice(startIdx).join(" ").trim();
153
+
154
+ // Step 4: Truncate at em dash, semicolon, or qualifier phrases
155
+ // "authentication system — NextAuth config must not be changed" → "authentication system"
156
+ remaining = remaining.split(/\s*[—–]\s*/)[0].trim();
157
+ remaining = remaining.split(/\s*;\s*/)[0].trim();
158
+ // Truncate at comma + pronoun/qualifier clause
159
+ // "KYC verification flow, it's SEC-compliant" → "KYC verification flow"
160
+ // But preserve comma-separated lists: "auth, authorization, and login"
161
+ remaining = remaining.replace(/,\s+(?:it'?s?|they|this|that|which|we|since|because|as)\b.*$/i, "").trim();
162
+ // Truncate at "must not", "should not" etc. — they start a qualifier
163
+ remaining = remaining.replace(/\s+(?:must|should|cannot|can't|will)\s+(?:not\s+)?(?:be\s+)?.*$/i, "").trim();
164
+ // Truncate at "to/with any/another/other" — directional qualifier
165
+ remaining = remaining.replace(/\s+(?:to|with)\s+(?:any|another|other|a\s+different)\s+.*$/i, "").trim();
166
+
167
+ return remaining || lockText;
168
+ }
169
+
170
+ /**
171
+ * Detect if a lock text contains verb contamination risk.
172
+ * Returns: { hasRisk, verb, subject, suggestion }
173
+ */
174
+ export function detectVerbContamination(lockText) {
175
+ const lower = lockText.toLowerCase().trim();
176
+
177
+ // Check if it starts with a prohibition pattern
178
+ let matchedProhibition = false;
179
+ let afterProhibition = lower;
180
+
181
+ for (const pattern of PROHIBITION_PATTERNS) {
182
+ const match = lower.match(pattern);
183
+ if (match) {
184
+ matchedProhibition = true;
185
+ afterProhibition = lower.slice(match[0].length).trim();
186
+ break;
187
+ }
188
+ }
189
+
190
+ if (!matchedProhibition) {
191
+ // No prohibition pattern — could be a declarative lock like
192
+ // "Use Stripe exclusively" — these are already safe
193
+ return { hasRisk: false, verb: null, subject: null, suggestion: null };
194
+ }
195
+
196
+ // Check if the next word(s) are a contaminating verb
197
+ const words = afterProhibition.split(/\s+/);
198
+ const firstWord = (words[0] || "").replace(/[^a-z]/g, "");
199
+
200
+ if (!CONTAMINATING_VERBS.has(firstWord)) {
201
+ // Prohibition but no contaminating verb — already safe
202
+ // e.g., "Never expose PHI" — "expose" is domain-specific enough
203
+ // Actually, let's still check — "expose" is in the set
204
+ // But we only flag if it's a COMMON verb that will match broadly
205
+ return { hasRisk: false, verb: null, subject: null, suggestion: null };
206
+ }
207
+
208
+ const verb = firstWord;
209
+ const subject = extractLockSubject(lockText);
210
+
211
+ return {
212
+ hasRisk: true,
213
+ verb,
214
+ subject,
215
+ suggestion: rewriteLock(lockText, verb, subject),
216
+ original: lockText,
217
+ };
218
+ }
219
+
220
+ /**
221
+ * Rewrite a lock to eliminate verb contamination.
222
+ * Transforms "Never add X" → "X is/are prohibited"
223
+ * Preserves the semantic meaning but removes the contaminating verb.
224
+ */
225
+ export function rewriteLock(lockText, verb, subject) {
226
+ if (!subject || subject === lockText) return lockText;
227
+
228
+ // Determine the appropriate rewrite based on verb category
229
+ const isDestructive = ["remove", "delete", "drop", "destroy", "kill",
230
+ "purge", "wipe", "erase", "eliminate", "clear", "empty", "nuke"].includes(verb);
231
+ const isConstructive = ["add", "create", "introduce", "insert", "implement",
232
+ "build", "make", "include", "install", "deploy", "attach", "connect",
233
+ "put", "set", "use"].includes(verb);
234
+ const isModification = ["change", "modify", "alter", "update", "mutate",
235
+ "transform", "rewrite", "edit", "adjust", "tweak", "revise", "amend",
236
+ "rework", "touch"].includes(verb);
237
+ const isMovement = ["move", "migrate", "transfer", "shift", "relocate",
238
+ "switch", "swap", "replace", "substitute", "exchange"].includes(verb);
239
+ const isToggle = ["enable", "disable", "activate", "deactivate", "start",
240
+ "stop", "turn", "pause", "suspend", "halt"].includes(verb);
241
+
242
+ // Clean subject — capitalize first letter
243
+ const cleanSubject = subject.charAt(0).toUpperCase() + subject.slice(1);
244
+
245
+ if (isConstructive) {
246
+ // "Never add X" → "X is prohibited — do not introduce it"
247
+ return `${cleanSubject} — prohibited. Must not be introduced or added.`;
248
+ }
249
+
250
+ if (isDestructive) {
251
+ // "Never delete X" → "X must be preserved — delete and remove operations are prohibited"
252
+ // CRITICAL: include the original verb so euphemism matching can find it
253
+ // ("phase out" → "remove" needs "remove" in the lock text)
254
+ const destNote = verb === "remove"
255
+ ? "remove and delete operations are prohibited"
256
+ : `${verb} and remove operations are prohibited`;
257
+ return `${cleanSubject} must be preserved — ${destNote}.`;
258
+ }
259
+
260
+ if (isModification) {
261
+ // "Never modify X" → "X is frozen — modify and change operations are prohibited"
262
+ const modNote = verb === "change"
263
+ ? "change operations are prohibited"
264
+ : `${verb} and change operations are prohibited`;
265
+ return `${cleanSubject} is frozen — ${modNote}.`;
266
+ }
267
+
268
+ if (isMovement) {
269
+ // "Never migrate X" → "X must remain unchanged — migrate and replace operations are prohibited"
270
+ const moveNote = verb === "replace"
271
+ ? "replace operations are prohibited"
272
+ : `${verb} and replace operations are prohibited`;
273
+ return `${cleanSubject} must remain unchanged — ${moveNote}.`;
274
+ }
275
+
276
+ if (isToggle) {
277
+ if (verb === "disable" || verb === "deactivate" || verb === "stop" ||
278
+ verb === "pause" || verb === "suspend" || verb === "halt" || verb === "turn") {
279
+ // "Never disable X" → "X must remain active — disable is prohibited"
280
+ return `${cleanSubject} must remain active and enabled — ${verb} is prohibited.`;
281
+ } else {
282
+ // "Never enable X" → "X must remain disabled"
283
+ return `${cleanSubject} must remain disabled — do not activate.`;
284
+ }
285
+ }
286
+
287
+ // Fallback: generic rewrite
288
+ return `${cleanSubject} — no ${verb} operations allowed.`;
289
+ }
290
+
291
+ /**
292
+ * Smart lock normalizer. Takes raw user lock text and returns
293
+ * the best version for the semantic engine.
294
+ *
295
+ * Returns: {
296
+ * normalized: string, // The rewritten lock (or original if safe)
297
+ * wasRewritten: boolean, // Whether the lock was rewritten
298
+ * original: string, // The original lock text
299
+ * reason: string|null, // Why it was rewritten (or null)
300
+ * }
301
+ */
302
+ export function normalizeLock(lockText) {
303
+ const contamination = detectVerbContamination(lockText);
304
+
305
+ if (!contamination.hasRisk) {
306
+ return {
307
+ normalized: lockText,
308
+ wasRewritten: false,
309
+ original: lockText,
310
+ reason: null,
311
+ };
312
+ }
313
+
314
+ return {
315
+ normalized: contamination.suggestion,
316
+ wasRewritten: true,
317
+ original: lockText,
318
+ reason: `Verb "${contamination.verb}" in lock text causes false positives — ` +
319
+ `rewritten to focus on the subject "${contamination.subject}"`,
320
+ };
321
+ }
322
+
323
+ /**
324
+ * Extract subject noun phrases from any text (lock or action).
325
+ * This is the foundation for scope-aware matching.
326
+ *
327
+ * Given: "Update the WhatsApp message formatting logic"
328
+ * Returns: ["whatsapp message formatting logic", "whatsapp", "message formatting", "formatting logic"]
329
+ *
330
+ * Given: "Never modify the WhatsApp session handler"
331
+ * Returns: ["whatsapp session handler", "whatsapp", "session handler"]
332
+ */
333
+ export function extractSubjects(text) {
334
+ const lower = text.toLowerCase().trim();
335
+ const subjects = [];
336
+
337
+ // Step 1: Strip prohibition prefix
338
+ let content = lower;
339
+ for (const pattern of PROHIBITION_PATTERNS) {
340
+ const match = content.match(pattern);
341
+ if (match) {
342
+ content = content.slice(match[0].length).trim();
343
+ break;
344
+ }
345
+ }
346
+
347
+ // Step 2: Strip leading verb
348
+ const words = content.split(/\s+/);
349
+ let startIdx = 0;
350
+
351
+ // Skip action verbs at the beginning
352
+ for (let i = 0; i < Math.min(2, words.length); i++) {
353
+ const w = words[i].replace(/[^a-z]/g, "");
354
+ if (CONTAMINATING_VERBS.has(w)) {
355
+ startIdx = i + 1;
356
+ break;
357
+ }
358
+ }
359
+
360
+ // Step 3: Skip fillers
361
+ while (startIdx < words.length - 1) {
362
+ const w = words[startIdx].replace(/[^a-z]/g, "");
363
+ if (FILLER_WORDS.has(w)) {
364
+ startIdx++;
365
+ } else {
366
+ break;
367
+ }
368
+ }
369
+
370
+ // Step 4: The remaining text is the subject noun phrase
371
+ const subjectWords = words.slice(startIdx);
372
+ if (subjectWords.length === 0) return subjects;
373
+
374
+ // Full noun phrase
375
+ const fullPhrase = subjectWords.join(" ").replace(/[^a-z0-9\s\-]/g, "").trim();
376
+ if (fullPhrase.length > 1) subjects.push(fullPhrase);
377
+
378
+ // Split on conjunctions for sub-phrases
379
+ const conjSplit = fullPhrase.split(/\s+(?:and|or|,)\s+/).map(s => s.trim()).filter(s => s.length > 1);
380
+ if (conjSplit.length > 1) {
381
+ for (const s of conjSplit) subjects.push(s);
382
+ }
383
+
384
+ // Bigrams and individual significant words
385
+ const significantWords = subjectWords
386
+ .map(w => w.replace(/[^a-z0-9\-]/g, ""))
387
+ .filter(w => w.length > 2 && !FILLER_WORDS.has(w));
388
+
389
+ // Generic words too vague to establish subject identity
390
+ const GENERIC_WORDS = new Set([
391
+ "system", "service", "module", "component", "feature", "function",
392
+ "method", "class", "model", "handler", "controller", "manager",
393
+ "process", "workflow", "flow", "logic", "config", "configuration",
394
+ "settings", "data", "information", "record", "records", "file",
395
+ "files", "page", "section", "layer", "level", "part", "item",
396
+ "code", "app", "application", "project",
397
+ ]);
398
+
399
+ // Add individual significant words (proper nouns, domain terms) — skip generic
400
+ for (const w of significantWords) {
401
+ if (!CONTAMINATING_VERBS.has(w) && !GENERIC_WORDS.has(w) && w.length > 3) {
402
+ subjects.push(w);
403
+ }
404
+ }
405
+
406
+ // Adjacent bigrams from significant words
407
+ for (let i = 0; i < significantWords.length - 1; i++) {
408
+ const bigram = `${significantWords[i]} ${significantWords[i + 1]}`;
409
+ if (!subjects.includes(bigram)) {
410
+ subjects.push(bigram);
411
+ }
412
+ }
413
+
414
+ return [...new Set(subjects)];
415
+ }
416
+
417
+ /**
418
+ * Compare subjects from action and lock to determine if they target
419
+ * the same component. This is the scope-awareness engine.
420
+ *
421
+ * Returns: {
422
+ * overlaps: boolean,
423
+ * overlapScore: 0-1,
424
+ * matchedSubjects: string[],
425
+ * lockSubjects: string[],
426
+ * actionSubjects: string[],
427
+ * }
428
+ */
429
+ export function compareSubjects(actionText, lockText) {
430
+ const lockSubjects = extractSubjects(lockText);
431
+ const actionSubjects = extractSubjects(actionText);
432
+
433
+ if (lockSubjects.length === 0 || actionSubjects.length === 0) {
434
+ return {
435
+ overlaps: false,
436
+ overlapScore: 0,
437
+ matchedSubjects: [],
438
+ lockSubjects,
439
+ actionSubjects,
440
+ };
441
+ }
442
+
443
+ const matched = [];
444
+
445
+ // Check for direct subject overlap
446
+ for (const ls of lockSubjects) {
447
+ for (const as of actionSubjects) {
448
+ // Exact match
449
+ if (ls === as) {
450
+ matched.push(ls);
451
+ continue;
452
+ }
453
+ // Word-level containment — "patient records" inside "old patient records"
454
+ // NOT substring: "shipping" should NOT match "calculateshipping"
455
+ const asRe = new RegExp(`\\b${as.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`);
456
+ const lsRe = new RegExp(`\\b${ls.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`);
457
+ if (asRe.test(ls) || lsRe.test(as)) {
458
+ matched.push(`${as} ⊂ ${ls}`);
459
+ continue;
460
+ }
461
+ // Word-level overlap for multi-word phrases
462
+ if (ls.includes(" ") && as.includes(" ")) {
463
+ const lsWords = new Set(ls.split(/\s+/));
464
+ const asWords = new Set(as.split(/\s+/));
465
+ const intersection = [...lsWords].filter(w => asWords.has(w) && w.length > 2);
466
+ // Need significant overlap — more than just shared filler
467
+ const significantIntersection = intersection.filter(w => !FILLER_WORDS.has(w));
468
+ if (significantIntersection.length >= 1 && significantIntersection.length >= Math.min(lsWords.size, asWords.size) * 0.4) {
469
+ matched.push(`word overlap: ${significantIntersection.join(", ")}`);
470
+ }
471
+ }
472
+ }
473
+ }
474
+
475
+ const uniqueMatched = [...new Set(matched)];
476
+ const overlapScore = uniqueMatched.length > 0
477
+ ? Math.min(uniqueMatched.length / Math.max(lockSubjects.length, 1), 1.0)
478
+ : 0;
479
+
480
+ return {
481
+ overlaps: uniqueMatched.length > 0,
482
+ overlapScore,
483
+ matchedSubjects: uniqueMatched,
484
+ lockSubjects,
485
+ actionSubjects,
486
+ };
487
+ }