great-cto 2.19.0 → 2.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/archetypes.js +39 -0
- package/dist/detect.js +166 -0
- package/dist/jurisdictions.js +94 -2
- package/package.json +1 -1
package/dist/archetypes.js
CHANGED
|
@@ -1068,13 +1068,52 @@ export function pickArchetype(d) {
|
|
|
1068
1068
|
break;
|
|
1069
1069
|
}
|
|
1070
1070
|
}
|
|
1071
|
+
// ── Pack hints for low/medium confidence or niche stacks ────────────────
|
|
1072
|
+
// Surfaces domain-specific packs that the archetype alone doesn't capture.
|
|
1073
|
+
const suggestedPacks = confidence !== "high"
|
|
1074
|
+
? inferPackHints(d)
|
|
1075
|
+
: inferPackHints(d).filter((p) => isNichePack(p)); // always surface niche packs
|
|
1071
1076
|
return {
|
|
1072
1077
|
primary: top.archetype,
|
|
1073
1078
|
confidence,
|
|
1074
1079
|
rationale: top.reason,
|
|
1075
1080
|
alternatives,
|
|
1081
|
+
...(suggestedPacks.length > 0 ? { suggestedPacks } : {}),
|
|
1076
1082
|
};
|
|
1077
1083
|
}
|
|
1084
|
+
/** Niche packs that should surface even at high confidence */
|
|
1085
|
+
function isNichePack(pack) {
|
|
1086
|
+
return ["robotics-pack", "climate-pack", "drug-discovery-pack",
|
|
1087
|
+
"clinical-trials-pack", "em-fintech-pack"].includes(pack);
|
|
1088
|
+
}
|
|
1089
|
+
/**
|
|
1090
|
+
* Infer likely domain packs from README/infra keywords when the archetype
|
|
1091
|
+
* scorer doesn't have a dedicated high-confidence rule for the domain.
|
|
1092
|
+
*/
|
|
1093
|
+
function inferPackHints(d) {
|
|
1094
|
+
const hints = [];
|
|
1095
|
+
const kws = new Set([...d.readmeKeywords, ...(d.infraKeywords ?? [])]);
|
|
1096
|
+
const has = (...terms) => terms.some((t) => kws.has(t));
|
|
1097
|
+
if (has("robot", "ros2", "ros 2", "cobot", "drone", "uav"))
|
|
1098
|
+
hints.push("robotics-pack");
|
|
1099
|
+
if (has("carbon", "ghg", "mrv", "emission", "verra", "sbti"))
|
|
1100
|
+
hints.push("climate-pack");
|
|
1101
|
+
if (has("clinical", "ctms", "edc", "cdisc", "randomization", "irb"))
|
|
1102
|
+
hints.push("clinical-trials-pack");
|
|
1103
|
+
if (has("drug discovery", "binding affinity", "admet", "chembl", "alphafold"))
|
|
1104
|
+
hints.push("drug-discovery-pack");
|
|
1105
|
+
if (has("recruit", "hiring", "candidate", "ats", "aedt"))
|
|
1106
|
+
hints.push("hr-ai-pack");
|
|
1107
|
+
if (has("loan", "lending", "bnpl", "underwrit", "fcra"))
|
|
1108
|
+
hints.push("lending-pack");
|
|
1109
|
+
if (has("voice", "telephony", "ivr", "stt", "tts", "outbound call"))
|
|
1110
|
+
hints.push("voice-pack");
|
|
1111
|
+
if (has("india", "upi", "rbi", "mpesa", "gcash", "pix", "cross-border", "remittance"))
|
|
1112
|
+
hints.push("em-fintech-pack");
|
|
1113
|
+
if (has("public api", "api key", "developer portal", "openapi"))
|
|
1114
|
+
hints.push("api-platform-pack");
|
|
1115
|
+
return hints;
|
|
1116
|
+
}
|
|
1078
1117
|
// Compliance hints — auto-suggested based on stack and README.
|
|
1079
1118
|
export function suggestCompliance(d, archetype) {
|
|
1080
1119
|
const c = new Set();
|
package/dist/detect.js
CHANGED
|
@@ -1004,6 +1004,10 @@ export function detect(dir) {
|
|
|
1004
1004
|
const readmeKeywords = mineReadmeKeywords(dir);
|
|
1005
1005
|
for (const kw of readmeKeywords)
|
|
1006
1006
|
sig(`readme:${kw}`, "README");
|
|
1007
|
+
// ── Infra signals (Wave 2b) — terraform/env/docker/homepage ──
|
|
1008
|
+
const infraKeywords = mineInfraKeywords(dir, pkg);
|
|
1009
|
+
for (const kw of infraKeywords)
|
|
1010
|
+
sig(`infra:${kw}`, "infra");
|
|
1007
1011
|
return {
|
|
1008
1012
|
stack: Array.from(stack).sort(),
|
|
1009
1013
|
languages: Array.from(languages).sort(),
|
|
@@ -1022,6 +1026,7 @@ export function detect(dir) {
|
|
|
1022
1026
|
scripts: scriptHints,
|
|
1023
1027
|
projectSize,
|
|
1024
1028
|
readmeKeywords,
|
|
1029
|
+
infraKeywords,
|
|
1025
1030
|
};
|
|
1026
1031
|
}
|
|
1027
1032
|
// ── helpers ──────────────────────────────────────────────────
|
|
@@ -1208,6 +1213,25 @@ function mineReadmeKeywords(dir) {
|
|
|
1208
1213
|
"pdpa", "pdpc", "singapore users", "singaporean users",
|
|
1209
1214
|
"mas guidelines", "mas tpm", "singpass", "myinfo",
|
|
1210
1215
|
"singapore data residency",
|
|
1216
|
+
// CA
|
|
1217
|
+
"pipeda", "quebec law 25", "bill 64", "opc canada", "casl",
|
|
1218
|
+
"canadian users", "canada users", "canadian customers", "canadian residents",
|
|
1219
|
+
"osfi", "fintrac", "ca-central", "ca-west", "canada-central",
|
|
1220
|
+
// JP
|
|
1221
|
+
"appi", "personal information protection commission", "ppc japan",
|
|
1222
|
+
"japan users", "japanese users", "japan customers",
|
|
1223
|
+
"fsa japan", "jfsa", "fisc",
|
|
1224
|
+
"ap-northeast-1", "ap-northeast-3", "japan east", "japan west",
|
|
1225
|
+
// CN
|
|
1226
|
+
"pipl", "personal information protection law", "data security law",
|
|
1227
|
+
"mlps", "classified protection", "cyberspace administration",
|
|
1228
|
+
"china users", "chinese users", "mainland china",
|
|
1229
|
+
"pboc", "cn-north", "cn-east", "cn-south", "china-east", "china-north",
|
|
1230
|
+
// KR
|
|
1231
|
+
"pipa korea", "pipa", "personal information protection act korea",
|
|
1232
|
+
"pipc", "isms-p", "kisa", "k-isms",
|
|
1233
|
+
"korea users", "korean users", "south korea users",
|
|
1234
|
+
"fsc korea", "ap-northeast-2", "korea central", "korea south",
|
|
1211
1235
|
];
|
|
1212
1236
|
for (const term of jurisdictionTerms) {
|
|
1213
1237
|
if (text.includes(term))
|
|
@@ -1215,6 +1239,148 @@ function mineReadmeKeywords(dir) {
|
|
|
1215
1239
|
}
|
|
1216
1240
|
return Array.from(kws).sort();
|
|
1217
1241
|
}
|
|
1242
|
+
/**
|
|
1243
|
+
* Mine infra-level jurisdiction signals that README often omits:
|
|
1244
|
+
* - Terraform/Pulumi/CDK region strings (eu-west-1, ap-northeast-2, …)
|
|
1245
|
+
* - .env.example / .env.sample / .env.test AWS_REGION / AZURE_LOCATION / GCP_REGION
|
|
1246
|
+
* - docker-compose.yml TZ= environment variables
|
|
1247
|
+
* - package.json homepage TLD (.de, .fr, .jp, .cn, .kr, .ca, …)
|
|
1248
|
+
*
|
|
1249
|
+
* Returns a flat list of canonical keyword strings that jurisdiction.ts can match.
|
|
1250
|
+
*/
|
|
1251
|
+
function mineInfraKeywords(dir, pkg) {
|
|
1252
|
+
const kws = new Set();
|
|
1253
|
+
// ── 1. package.json homepage TLD → jurisdiction keyword ──────────────────
|
|
1254
|
+
const homepageTldMap = {
|
|
1255
|
+
".de": "german users", ".at": "austrian", ".fr": "french users",
|
|
1256
|
+
".nl": "dutch users", ".es": "spanish users", ".it": "italian users",
|
|
1257
|
+
".pl": "polish users", ".eu": "eu users",
|
|
1258
|
+
".co.uk": "uk users", ".uk": "uk users",
|
|
1259
|
+
".ca": "canadian users",
|
|
1260
|
+
".jp": "japan users",
|
|
1261
|
+
".cn": "china users",
|
|
1262
|
+
".kr": "korea users",
|
|
1263
|
+
".com.br": "brazil users", ".br": "brazil users",
|
|
1264
|
+
".com.au": "australia users", ".au": "australia users",
|
|
1265
|
+
".sg": "singapore users",
|
|
1266
|
+
".in": "india users",
|
|
1267
|
+
};
|
|
1268
|
+
const homepage = (pkg?.homepage ?? "").toLowerCase();
|
|
1269
|
+
if (homepage) {
|
|
1270
|
+
for (const [tld, kw] of Object.entries(homepageTldMap)) {
|
|
1271
|
+
if (homepage.includes(tld))
|
|
1272
|
+
kws.add(kw);
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
// ── 2. .env / docker-compose TZ= → jurisdiction ──────────────────────────
|
|
1276
|
+
const tzRegionMap = [
|
|
1277
|
+
[/tz=europe\//i, "eu users"],
|
|
1278
|
+
[/tz=america\/toronto|tz=canada/i, "canadian users"],
|
|
1279
|
+
[/tz=asia\/tokyo/i, "japan users"],
|
|
1280
|
+
[/tz=asia\/shanghai|tz=asia\/beijing|tz=prc|tz=asia\/hong_kong/i, "china users"],
|
|
1281
|
+
[/tz=asia\/seoul/i, "korea users"],
|
|
1282
|
+
[/tz=asia\/kolkata|tz=asia\/calcutta/i, "india users"],
|
|
1283
|
+
[/tz=america\/sao_paulo/i, "brazil users"],
|
|
1284
|
+
[/tz=australia\//i, "australia users"],
|
|
1285
|
+
[/tz=asia\/singapore/i, "singapore users"],
|
|
1286
|
+
[/tz=europe\/london/i, "uk users"],
|
|
1287
|
+
];
|
|
1288
|
+
const envFiles = [".env.example", ".env.sample", ".env.test", ".env.local.example",
|
|
1289
|
+
"docker-compose.yml", "docker-compose.yaml",
|
|
1290
|
+
"docker-compose.dev.yml", "docker-compose.prod.yml"];
|
|
1291
|
+
for (const f of envFiles) {
|
|
1292
|
+
const p = join(dir, f);
|
|
1293
|
+
if (!existsSync(p))
|
|
1294
|
+
continue;
|
|
1295
|
+
try {
|
|
1296
|
+
const txt = readFileSync(p, "utf-8").slice(0, 8000).toLowerCase();
|
|
1297
|
+
// AWS_REGION / AZURE_LOCATION / GCP_REGION / REGION
|
|
1298
|
+
const awsRegion = txt.match(/(?:aws_region|region)\s*=\s*["']?([a-z0-9-]+)/g) ?? [];
|
|
1299
|
+
for (const m of awsRegion) {
|
|
1300
|
+
const val = m.split(/=\s*["']?/)[1] ?? "";
|
|
1301
|
+
if (/^eu-/.test(val) || /^europe/.test(val))
|
|
1302
|
+
kws.add("eu users");
|
|
1303
|
+
if (/^ca-/.test(val) || val.includes("canada"))
|
|
1304
|
+
kws.add("canadian users");
|
|
1305
|
+
if (/^ap-northeast-1$|^ap-northeast-3$/.test(val))
|
|
1306
|
+
kws.add("japan users");
|
|
1307
|
+
if (/^ap-northeast-2$/.test(val))
|
|
1308
|
+
kws.add("korea users");
|
|
1309
|
+
if (/^cn-/.test(val) || val.includes("china"))
|
|
1310
|
+
kws.add("china users");
|
|
1311
|
+
if (/^ap-south-1$/.test(val) || val.includes("india"))
|
|
1312
|
+
kws.add("india users");
|
|
1313
|
+
if (/^ap-southeast-1$/.test(val))
|
|
1314
|
+
kws.add("singapore users");
|
|
1315
|
+
if (/^ap-southeast-2$/.test(val))
|
|
1316
|
+
kws.add("australia users");
|
|
1317
|
+
if (/^sa-east/.test(val))
|
|
1318
|
+
kws.add("brazil users");
|
|
1319
|
+
if (/^us-/.test(val) || /^us_/.test(val))
|
|
1320
|
+
kws.add("us users");
|
|
1321
|
+
if (val.includes("uk") || val.includes("europe") && val.includes("west"))
|
|
1322
|
+
kws.add("uk users");
|
|
1323
|
+
}
|
|
1324
|
+
for (const [re, kw] of tzRegionMap) {
|
|
1325
|
+
if (re.test(txt))
|
|
1326
|
+
kws.add(kw);
|
|
1327
|
+
}
|
|
1328
|
+
}
|
|
1329
|
+
catch { /* unreadable */ }
|
|
1330
|
+
}
|
|
1331
|
+
// ── 3. Terraform / Pulumi / CloudFormation region strings ─────────────────
|
|
1332
|
+
const tfFiles = [];
|
|
1333
|
+
function collectTf(d, depth) {
|
|
1334
|
+
if (depth > 4)
|
|
1335
|
+
return;
|
|
1336
|
+
const SKIP = new Set(["node_modules", ".git", "dist", ".terraform"]);
|
|
1337
|
+
try {
|
|
1338
|
+
for (const e of readdirSync(d)) {
|
|
1339
|
+
if (SKIP.has(e))
|
|
1340
|
+
continue;
|
|
1341
|
+
const p = join(d, e);
|
|
1342
|
+
try {
|
|
1343
|
+
const st = statSync(p);
|
|
1344
|
+
if (st.isDirectory()) {
|
|
1345
|
+
collectTf(p, depth + 1);
|
|
1346
|
+
continue;
|
|
1347
|
+
}
|
|
1348
|
+
if (/\.(tf|yaml|yml|json)$/.test(e) && st.size < 200_000)
|
|
1349
|
+
tfFiles.push(p);
|
|
1350
|
+
}
|
|
1351
|
+
catch { /* skip */ }
|
|
1352
|
+
if (tfFiles.length > 40)
|
|
1353
|
+
return;
|
|
1354
|
+
}
|
|
1355
|
+
}
|
|
1356
|
+
catch { /* skip */ }
|
|
1357
|
+
}
|
|
1358
|
+
collectTf(dir, 0);
|
|
1359
|
+
const regionPatterns = [
|
|
1360
|
+
[/\beu-west-\d|eu-central-\d|eu-north-\d|eu-south-\d|europe-west\d|europe-north\d|westeurope|northeurope|germanywestcentral|francecentral\b/i, "eu users"],
|
|
1361
|
+
[/\bca-central-\d|canadacentral|canadaeast\b/i, "canadian users"],
|
|
1362
|
+
[/\bap-northeast-1\b|\bjapan-east\b|\bjapaneast\b|\bjapanwest\b/i, "japan users"],
|
|
1363
|
+
[/\bap-northeast-2\b|\bkoreacentral\b|\bkoreasouth\b/i, "korea users"],
|
|
1364
|
+
[/\bcn-north-\d|\bcn-east-\d|\bcn-northwest-\d|\bchinanorth\b|\bchinaeast\b/i, "china users"],
|
|
1365
|
+
[/\bap-south-\d|\bcentralindia\b|\bsouthindia\b|\bwestindia\b/i, "india users"],
|
|
1366
|
+
[/\bap-southeast-1\b|\bsoutheastasia\b/i, "singapore users"],
|
|
1367
|
+
[/\bap-southeast-2\b|\baustralia\b|\baustraliasoutheast\b|\baustraliaeast\b/i, "australia users"],
|
|
1368
|
+
[/\bsa-east-\d|\bbrazilsouth\b|\bbrazilsoutheast\b/i, "brazil users"],
|
|
1369
|
+
[/\buksouth\b|\bukwest\b|\buk-south\b/i, "uk users"],
|
|
1370
|
+
[/\bus-east-\d|\bus-west-\d|\beastus\b|\bwestus\b|\bcentralus\b/i, "us users"],
|
|
1371
|
+
];
|
|
1372
|
+
for (const p of tfFiles) {
|
|
1373
|
+
try {
|
|
1374
|
+
const txt = readFileSync(p, "utf-8").slice(0, 50_000);
|
|
1375
|
+
for (const [re, kw] of regionPatterns) {
|
|
1376
|
+
if (re.test(txt))
|
|
1377
|
+
kws.add(kw);
|
|
1378
|
+
}
|
|
1379
|
+
}
|
|
1380
|
+
catch { /* skip */ }
|
|
1381
|
+
}
|
|
1382
|
+
return Array.from(kws).sort();
|
|
1383
|
+
}
|
|
1218
1384
|
function safeGlob(dir, pattern, kind = "file") {
|
|
1219
1385
|
try {
|
|
1220
1386
|
const entries = readdirSync(dir);
|
package/dist/jurisdictions.js
CHANGED
|
@@ -18,6 +18,10 @@ const JURISDICTION_REVIEWERS = {
|
|
|
18
18
|
"br": ["gdpr-reviewer"], // LGPD mirrors GDPR — same reviewer covers
|
|
19
19
|
"au": ["us-privacy-reviewer"], // Privacy Act 1988 — covered by privacy reviewer
|
|
20
20
|
"sg": ["us-privacy-reviewer"], // PDPA — covered by privacy reviewer
|
|
21
|
+
"ca": ["us-privacy-reviewer"], // PIPEDA + Quebec Law 25
|
|
22
|
+
"jp": ["us-privacy-reviewer"], // APPI — covered by privacy reviewer
|
|
23
|
+
"cn": ["gdpr-reviewer"], // PIPL structure mirrors GDPR concepts
|
|
24
|
+
"kr": ["us-privacy-reviewer"], // PIPA — covered by privacy reviewer
|
|
21
25
|
};
|
|
22
26
|
const JURISDICTION_GATES = {
|
|
23
27
|
"eu": ["gate:gdpr-dpia", "gate:eu-ai-act-classification"],
|
|
@@ -28,6 +32,10 @@ const JURISDICTION_GATES = {
|
|
|
28
32
|
"br": ["gate:lgpd-dpia"],
|
|
29
33
|
"au": ["gate:au-privacy-act-assessment"],
|
|
30
34
|
"sg": ["gate:pdpa-dpo"],
|
|
35
|
+
"ca": ["gate:pipeda-pia", "gate:quebec-law25-consent"],
|
|
36
|
+
"jp": ["gate:appi-third-party-transfer", "gate:appi-ppc-registration"],
|
|
37
|
+
"cn": ["gate:pipl-consent-framework", "gate:mlps-classification", "gate:pipl-data-localisation"],
|
|
38
|
+
"kr": ["gate:pipa-isms-p", "gate:pipa-consent-framework"],
|
|
31
39
|
};
|
|
32
40
|
const JURISDICTION_LAWS = {
|
|
33
41
|
"eu": ["GDPR (EU) 2016/679", "EU AI Act 2024/1689", "NIS2 Directive 2022/2555", "ePrivacy Directive"],
|
|
@@ -38,6 +46,10 @@ const JURISDICTION_LAWS = {
|
|
|
38
46
|
"br": ["LGPD (Lei 13.709/2018)", "ANPD resolutions", "Marco Civil da Internet"],
|
|
39
47
|
"au": ["Privacy Act 1988 (Cth)", "Australian Privacy Principles (APPs)", "CDR (if fintech)", "OAIC enforcement"],
|
|
40
48
|
"sg": ["PDPA 2012 (amended 2021)", "MAS TRM Guidelines (if fintech)", "PDPC Advisory Guidelines"],
|
|
49
|
+
"ca": ["PIPEDA (Personal Information Protection and Electronic Documents Act)", "Quebec Law 25 / Bill 64", "CASL (if email marketing)", "OSFI B-10 (if fintech)"],
|
|
50
|
+
"jp": ["APPI 2022 (Act on Protection of Personal Information)", "PPC Guidelines", "My Number Act (if govt-adjacent)", "FISC (if fintech)"],
|
|
51
|
+
"cn": ["PIPL 2021 (Personal Information Protection Law)", "DSL 2021 (Data Security Law)", "MLPS 2.0 (Cybersecurity Classified Protection)", "CBDT (cross-border data transfer rules)", "CAC regulations"],
|
|
52
|
+
"kr": ["PIPA (Personal Information Protection Act)", "ISMS-P certification (mandatory for large platforms)", "Network Act", "FSC regulations (if fintech)"],
|
|
41
53
|
};
|
|
42
54
|
// ── Signal dictionary ─────────────────────────────────────────────────────
|
|
43
55
|
export const JURISDICTION_SIGNALS = {
|
|
@@ -112,15 +124,95 @@ export const JURISDICTION_SIGNALS = {
|
|
|
112
124
|
"singapore data residency",
|
|
113
125
|
],
|
|
114
126
|
},
|
|
127
|
+
"ca": {
|
|
128
|
+
keywords: [
|
|
129
|
+
// Privacy law
|
|
130
|
+
"pipeda", "quebec law 25", "bill 64", "privacy commissioner",
|
|
131
|
+
"opc canada", "casl", "canadian users", "canada users",
|
|
132
|
+
"canadian customers", "canadian residents",
|
|
133
|
+
// Fintech
|
|
134
|
+
"osfi", "fintrac", "aml canada",
|
|
135
|
+
// Infra
|
|
136
|
+
"ca-central", "ca-west", "canada-central",
|
|
137
|
+
],
|
|
138
|
+
},
|
|
139
|
+
"jp": {
|
|
140
|
+
keywords: [
|
|
141
|
+
// Privacy law
|
|
142
|
+
"appi", "personal information protection commission", "ppc japan",
|
|
143
|
+
"japan users", "japanese users", "japan customers",
|
|
144
|
+
"my number", "individual number act",
|
|
145
|
+
// Fintech
|
|
146
|
+
"fsa japan", "jfsa", "fisc",
|
|
147
|
+
// Infra
|
|
148
|
+
"ap-northeast-1", "ap-northeast-3", "japan east", "japan west",
|
|
149
|
+
"japaneast", "japanwest",
|
|
150
|
+
],
|
|
151
|
+
},
|
|
152
|
+
"cn": {
|
|
153
|
+
keywords: [
|
|
154
|
+
// Privacy / data laws
|
|
155
|
+
"pipl", "personal information protection law", "data security law",
|
|
156
|
+
"dsl 2021", "mlps", "classified protection", "cybersecurity law",
|
|
157
|
+
"cac", "cyberspace administration", "cbdt",
|
|
158
|
+
"china users", "chinese users", "mainland china",
|
|
159
|
+
// Cross-border
|
|
160
|
+
"cross-border data transfer china", "security assessment cac",
|
|
161
|
+
"standard contract cac", "personal information export",
|
|
162
|
+
// Fintech
|
|
163
|
+
"pboc", "nfra", "cbirc", "csrc", "alipay", "wechatpay",
|
|
164
|
+
// Infra
|
|
165
|
+
"cn-north", "cn-northwest", "cn-east", "cn-south",
|
|
166
|
+
"china-east", "china-north", "chinaeast", "chinanorth",
|
|
167
|
+
],
|
|
168
|
+
},
|
|
169
|
+
"kr": {
|
|
170
|
+
keywords: [
|
|
171
|
+
// Privacy / data laws
|
|
172
|
+
"pipa korea", "pipa", "personal information protection act korea",
|
|
173
|
+
"pipc", "privacy commissioner korea",
|
|
174
|
+
"korea users", "korean users", "south korea users",
|
|
175
|
+
"isms-p", "kisa", "k-isms",
|
|
176
|
+
// Fintech
|
|
177
|
+
"fsc korea", "fss korea", "kftc",
|
|
178
|
+
// Infra
|
|
179
|
+
"ap-northeast-2", "korea central", "korea south",
|
|
180
|
+
"koreacentral", "koreasouth",
|
|
181
|
+
],
|
|
182
|
+
},
|
|
115
183
|
};
|
|
116
184
|
// ── Public API ─────────────────────────────────────────────────────────────
|
|
185
|
+
/**
|
|
186
|
+
* Word-boundary aware keyword match.
|
|
187
|
+
* Handles both single tokens ("gdpr") and multi-word phrases ("eu ai act").
|
|
188
|
+
* Multi-word phrases matched as substrings (spaces already serve as boundaries).
|
|
189
|
+
* Single tokens matched with word-boundary regex to avoid "india" → "indiana".
|
|
190
|
+
*/
|
|
191
|
+
function matchesKeyword(text, kw) {
|
|
192
|
+
if (kw.includes(" ")) {
|
|
193
|
+
// Multi-word phrase: substring match is fine (space = implicit boundary)
|
|
194
|
+
return text.includes(kw);
|
|
195
|
+
}
|
|
196
|
+
// Single token: require word boundaries
|
|
197
|
+
try {
|
|
198
|
+
return new RegExp(`(?<![a-z0-9_-])${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}(?![a-z0-9_-])`).test(text);
|
|
199
|
+
}
|
|
200
|
+
catch {
|
|
201
|
+
return text.includes(kw);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
117
204
|
/** Return jurisdictions whose signals match the detection result. */
|
|
118
205
|
export function suggestJurisdictions(d) {
|
|
119
206
|
const matches = [];
|
|
120
|
-
|
|
207
|
+
// Combine README keywords + infra keywords (both already lowercased)
|
|
208
|
+
const allKeywords = [
|
|
209
|
+
...d.readmeKeywords.map((k) => k.toLowerCase()),
|
|
210
|
+
...(d.infraKeywords ?? []).map((k) => k.toLowerCase()),
|
|
211
|
+
];
|
|
212
|
+
const combined = allKeywords.join(" ");
|
|
121
213
|
for (const code of Object.keys(JURISDICTION_SIGNALS)) {
|
|
122
214
|
const { keywords } = JURISDICTION_SIGNALS[code];
|
|
123
|
-
const matchedKeywords = keywords.filter((kw) =>
|
|
215
|
+
const matchedKeywords = keywords.filter((kw) => matchesKeyword(combined, kw));
|
|
124
216
|
if (matchedKeywords.length === 0)
|
|
125
217
|
continue;
|
|
126
218
|
matches.push({
|
package/package.json
CHANGED