@runhalo/engine 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ast-engine.d.ts +60 -0
- package/dist/ast-engine.js +653 -0
- package/dist/ast-engine.js.map +1 -0
- package/dist/context-analyzer.d.ts +209 -0
- package/dist/context-analyzer.js +408 -0
- package/dist/context-analyzer.js.map +1 -0
- package/dist/data-flow-tracer.d.ts +106 -0
- package/dist/data-flow-tracer.js +506 -0
- package/dist/data-flow-tracer.js.map +1 -0
- package/dist/fp-patterns.d.ts +36 -0
- package/dist/fp-patterns.js +426 -0
- package/dist/fp-patterns.js.map +1 -0
- package/dist/frameworks/angular.d.ts +11 -0
- package/dist/frameworks/angular.js +41 -0
- package/dist/frameworks/angular.js.map +1 -0
- package/dist/frameworks/django.d.ts +11 -0
- package/dist/frameworks/django.js +57 -0
- package/dist/frameworks/django.js.map +1 -0
- package/dist/frameworks/index.d.ts +59 -0
- package/dist/frameworks/index.js +99 -0
- package/dist/frameworks/index.js.map +1 -0
- package/dist/frameworks/nextjs.d.ts +11 -0
- package/dist/frameworks/nextjs.js +59 -0
- package/dist/frameworks/nextjs.js.map +1 -0
- package/dist/frameworks/rails.d.ts +11 -0
- package/dist/frameworks/rails.js +58 -0
- package/dist/frameworks/rails.js.map +1 -0
- package/dist/frameworks/react.d.ts +13 -0
- package/dist/frameworks/react.js +36 -0
- package/dist/frameworks/react.js.map +1 -0
- package/dist/frameworks/types.d.ts +29 -0
- package/dist/frameworks/types.js +11 -0
- package/dist/frameworks/types.js.map +1 -0
- package/dist/frameworks/vue.d.ts +9 -0
- package/dist/frameworks/vue.js +39 -0
- package/dist/frameworks/vue.js.map +1 -0
- package/dist/graduation/fp-verdict-logger.d.ts +81 -0
- package/dist/graduation/fp-verdict-logger.js +130 -0
- package/dist/graduation/fp-verdict-logger.js.map +1 -0
- package/dist/graduation/graduation-codifier.d.ts +37 -0
- package/dist/graduation/graduation-codifier.js +205 -0
- package/dist/graduation/graduation-codifier.js.map +1 -0
- package/dist/graduation/graduation-validator.d.ts +73 -0
- package/dist/graduation/graduation-validator.js +204 -0
- package/dist/graduation/graduation-validator.js.map +1 -0
- package/dist/graduation/index.d.ts +71 -0
- package/dist/graduation/index.js +105 -0
- package/dist/graduation/index.js.map +1 -0
- package/dist/graduation/pattern-aggregator.d.ts +77 -0
- package/dist/graduation/pattern-aggregator.js +154 -0
- package/dist/graduation/pattern-aggregator.js.map +1 -0
- package/dist/index.d.ts +99 -0
- package/dist/index.js +718 -61
- package/dist/index.js.map +1 -1
- package/dist/review-board/two-agent-review.d.ts +152 -0
- package/dist/review-board/two-agent-review.js +463 -0
- package/dist/review-board/two-agent-review.js.map +1 -0
- package/dist/scope-analyzer.d.ts +91 -0
- package/dist/scope-analyzer.js +300 -0
- package/dist/scope-analyzer.js.map +1 -0
- package/package.json +9 -2
- package/rules/coppa-tier-1.yaml +17 -10
- package/rules/rules.json +2094 -99
- package/rules/validation-report.json +58 -0
package/dist/index.js
CHANGED
|
@@ -45,11 +45,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
45
45
|
};
|
|
46
46
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
47
47
|
exports.SCAFFOLD_REGISTRY = exports.detectFramework = exports.ScaffoldEngine = exports.ComplianceScoreEngine = exports.transformSetDefault = exports.transformSanitizeInput = exports.transformRemoveDefault = exports.transformUrlUpgrade = exports.FixEngine = exports.REMEDIATION_MAP = exports.HaloEngine = exports.AU_SBD_RULES = exports.AI_AUDIT_RULES = exports.ETHICAL_RULES = exports.COPPA_RULES = exports.treeSitterParser = exports.TreeSitterParser = void 0;
|
|
48
|
+
exports.classifyFile = classifyFile;
|
|
48
49
|
exports.loadRulesFromYAML = loadRulesFromYAML;
|
|
49
50
|
exports.loadRulesFromJSON = loadRulesFromJSON;
|
|
50
51
|
exports.loadRulesFromJSONByPack = loadRulesFromJSONByPack;
|
|
51
52
|
exports.compileRawRules = compileRawRules;
|
|
52
53
|
exports.parseHaloignore = parseHaloignore;
|
|
54
|
+
exports.isVendorPath = isVendorPath;
|
|
55
|
+
exports.isDocGeneratorPath = isDocGeneratorPath;
|
|
53
56
|
exports.shouldIgnoreFile = shouldIgnoreFile;
|
|
54
57
|
exports.shouldIgnoreViolation = shouldIgnoreViolation;
|
|
55
58
|
exports.getRemediation = getRemediation;
|
|
@@ -59,6 +62,9 @@ const tree_sitter_1 = __importDefault(require("tree-sitter"));
|
|
|
59
62
|
const tree_sitter_typescript_1 = __importDefault(require("tree-sitter-typescript"));
|
|
60
63
|
const tree_sitter_javascript_1 = __importDefault(require("tree-sitter-javascript"));
|
|
61
64
|
const yaml = __importStar(require("js-yaml"));
|
|
65
|
+
const ast_engine_1 = require("./ast-engine");
|
|
66
|
+
const frameworks_1 = require("./frameworks");
|
|
67
|
+
const context_analyzer_1 = require("./context-analyzer");
|
|
62
68
|
// Extract category from ruleId (e.g. "coppa-auth-001" → "auth", "ETHICAL-001" → "ethical", "AU-SBD-001" → "au-sbd")
|
|
63
69
|
function extractCategory(ruleId) {
|
|
64
70
|
if (ruleId.startsWith('ETHICAL'))
|
|
@@ -67,6 +73,22 @@ function extractCategory(ruleId) {
|
|
|
67
73
|
return 'ai-audit';
|
|
68
74
|
if (ruleId.startsWith('AU-SBD'))
|
|
69
75
|
return 'au-sbd';
|
|
76
|
+
if (ruleId.startsWith('AU-OSA'))
|
|
77
|
+
return 'au-osa';
|
|
78
|
+
if (ruleId.startsWith('caadca'))
|
|
79
|
+
return 'caadca';
|
|
80
|
+
if (ruleId.startsWith('AI-RISK'))
|
|
81
|
+
return 'ai-risk';
|
|
82
|
+
if (ruleId.startsWith('AI-TRANSPARENCY'))
|
|
83
|
+
return 'ai-transparency';
|
|
84
|
+
if (ruleId.startsWith('AI-GOVERNANCE'))
|
|
85
|
+
return 'ai-governance';
|
|
86
|
+
if (ruleId.startsWith('AI-OVERSIGHT'))
|
|
87
|
+
return 'ai-oversight';
|
|
88
|
+
if (ruleId.startsWith('AI-ACCURACY'))
|
|
89
|
+
return 'ai-accuracy';
|
|
90
|
+
if (ruleId.startsWith('CAI-'))
|
|
91
|
+
return 'constitutional-ai';
|
|
70
92
|
const match = ruleId.match(/^coppa-(\w+)-\d+$/);
|
|
71
93
|
return match ? match[1] : 'unknown';
|
|
72
94
|
}
|
|
@@ -83,6 +105,140 @@ function detectLanguage(filePath) {
|
|
|
83
105
|
};
|
|
84
106
|
return langMap[ext] || 'unknown';
|
|
85
107
|
}
|
|
108
|
+
/**
|
|
109
|
+
* Sprint 13a: Classify a file using deterministic heuristics.
|
|
110
|
+
* Returns a FileClassification object that the scan loop uses to skip
|
|
111
|
+
* files or suppress specific rules.
|
|
112
|
+
*
|
|
113
|
+
* @param filePath — normalized file path (forward slashes)
|
|
114
|
+
* @param contentPrefix — first 3000 chars of file content (for decorator/annotation detection)
|
|
115
|
+
*/
|
|
116
|
+
function classifyFile(filePath, contentPrefix = '') {
|
|
117
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
118
|
+
const language = detectLanguage(filePath);
|
|
119
|
+
const isVendorResult = isVendorPath(filePath);
|
|
120
|
+
const isDocGeneratorResult = isDocGeneratorPath(filePath);
|
|
121
|
+
// Test/spec/fixture detection (Sprint 8 + 11a, consolidated)
|
|
122
|
+
const isTest = /\.(test|spec)\.(ts|tsx|js|jsx|py|rb|java|go)$/i.test(normalized) ||
|
|
123
|
+
/(^|\/)__tests__\//.test(normalized) ||
|
|
124
|
+
/(^|\/)test\//.test(normalized) ||
|
|
125
|
+
/(^|\/)tests\//.test(normalized) ||
|
|
126
|
+
/(^|\/)spec\//.test(normalized) ||
|
|
127
|
+
/(^|\/)fixtures\//.test(normalized) ||
|
|
128
|
+
/\.(stories|story)\.(ts|tsx|js|jsx)$/i.test(normalized) ||
|
|
129
|
+
/(^|\/)cypress\//.test(normalized) ||
|
|
130
|
+
/(^|\/)e2e\//.test(normalized) ||
|
|
131
|
+
/jest\.config|vitest\.config|playwright\.config/i.test(normalized) ||
|
|
132
|
+
// Sprint 11a: Test environment configs
|
|
133
|
+
/(^|\/)envs\/test[^/]*\.(py|json|ya?ml|toml|cfg|ini)$/i.test(normalized) ||
|
|
134
|
+
/(^|\/)config\/test[^/]*\.(py|json|ya?ml|toml|cfg|ini|js|ts)$/i.test(normalized) ||
|
|
135
|
+
/(^|\/)settings\/test[^/]*\.(py|json|ya?ml|toml)$/i.test(normalized) ||
|
|
136
|
+
/(^|\/)conftest\.py$/i.test(normalized);
|
|
137
|
+
// Consent/privacy implementation files (Sprint 10b)
|
|
138
|
+
const CONSENT_PATH_PATTERNS = /(?:^|\/)(?:consent|cookie[_-]?(?:consent|banner|preferences|notice|policy)|privacy[_-]?(?:policy|notice|banner|settings)|gdpr|ccpa|compliance|data[_-]?(?:deletion|removal|protection))\b/i;
|
|
139
|
+
const isConsent = CONSENT_PATH_PATTERNS.test(normalized);
|
|
140
|
+
// Admin/instructor/staff backend paths (Sprint 11a, updated Sprint 13b)
|
|
141
|
+
// Matches admin directories AND admin.py/admin.rb files (Django/Rails admin registration modules)
|
|
142
|
+
const ADMIN_PATH_PATTERNS = /(?:^|\/)(?:admin|instructor|teacher|staff|management|backoffice|dashboard\/admin|cms|moderator|superuser)(?:\/|\.py|\.rb|\.php|$)/i;
|
|
143
|
+
const isAdmin = ADMIN_PATH_PATTERNS.test(normalized) ||
|
|
144
|
+
/(?:@staff_member_required|@permission_required|@user_passes_test|@login_required.*staff|@admin_required|is_staff|is_superuser)/i.test(contentPrefix);
|
|
145
|
+
// === Sprint 13a: New heuristic patterns ===
|
|
146
|
+
// Django migrations — auto-generated schema changes, no user-facing code
|
|
147
|
+
const isDjangoMigration = /(^|\/)migrations\/\d{4}_[a-zA-Z0-9_]+\.py$/i.test(normalized) ||
|
|
148
|
+
/(^|\/)migrations\/__init__\.py$/i.test(normalized);
|
|
149
|
+
// Rails fixture and seed files — test data, not production behavior
|
|
150
|
+
const isFixtureOrSeed = /(^|\/)fixtures\/[^/]+\.(ya?ml|json|csv)$/i.test(normalized) ||
|
|
151
|
+
/(^|\/)seeds?\//i.test(normalized) ||
|
|
152
|
+
/(^|\/)db\/seeds/i.test(normalized) ||
|
|
153
|
+
/(^|\/)factories?\//i.test(normalized) ||
|
|
154
|
+
/(^|\/)factory\.(ts|js|py|rb)$/i.test(normalized);
|
|
155
|
+
// Mock/factory files — test infrastructure
|
|
156
|
+
const isMockOrFactory = /(?:^|\/)(?:__mocks__|mocks?|fakes?|stubs?)(?:\/|$)/i.test(normalized) ||
|
|
157
|
+
/\.mock\.(ts|tsx|js|jsx|py)$/i.test(normalized) ||
|
|
158
|
+
/\.fake\.(ts|tsx|js|jsx|py)$/i.test(normalized) ||
|
|
159
|
+
/(?:^|\/)(?:mock|fake|stub)[_-]?\w+\.(ts|tsx|js|jsx|py)$/i.test(normalized) ||
|
|
160
|
+
/(?:^|\/)(?:\w+)?[_-](?:mock|fake|stub)\.(ts|tsx|js|jsx|py)$/i.test(normalized);
|
|
161
|
+
// CI/CD configuration files — pipeline definitions, not application code
|
|
162
|
+
const isCIConfig = /(^|\/)\.github\/workflows\//i.test(normalized) ||
|
|
163
|
+
/(^|\/)\.github\/actions\//i.test(normalized) ||
|
|
164
|
+
/(^|\/)\.circleci\//i.test(normalized) ||
|
|
165
|
+
/(^|\/)\.gitlab-ci/i.test(normalized) ||
|
|
166
|
+
/(^|\/)Jenkinsfile$/i.test(normalized) ||
|
|
167
|
+
/(^|\/)\.travis\.yml$/i.test(normalized) ||
|
|
168
|
+
/(^|\/)azure-pipelines/i.test(normalized) ||
|
|
169
|
+
/(^|\/)bitbucket-pipelines/i.test(normalized) ||
|
|
170
|
+
/(^|\/)\.buildkite\//i.test(normalized) ||
|
|
171
|
+
/(^|\/)Dockerfile$/i.test(normalized) ||
|
|
172
|
+
/(^|\/)docker-compose/i.test(normalized);
|
|
173
|
+
// Build output directories — generated code, not source
|
|
174
|
+
const isBuildOutput = /(^|\/)dist\//i.test(normalized) ||
|
|
175
|
+
/(^|\/)build\/(?!src)/i.test(normalized) || // build/ but not build/src/
|
|
176
|
+
/(^|\/)\.next\//i.test(normalized) ||
|
|
177
|
+
/(^|\/)\.nuxt\//i.test(normalized) ||
|
|
178
|
+
/(^|\/)\.svelte-kit\//i.test(normalized) ||
|
|
179
|
+
/(^|\/)out\//i.test(normalized) ||
|
|
180
|
+
/(^|\/)\.output\//i.test(normalized) ||
|
|
181
|
+
/(^|\/)coverage\//i.test(normalized) ||
|
|
182
|
+
/(^|\/)\.cache\//i.test(normalized) ||
|
|
183
|
+
/(^|\/)\.parcel-cache\//i.test(normalized) ||
|
|
184
|
+
/(^|\/)\.turbo\//i.test(normalized);
|
|
185
|
+
// Type definition files — no runtime behavior, only type annotations
|
|
186
|
+
const isTypeDefinition = /\.d\.ts$/i.test(normalized) ||
|
|
187
|
+
/\.pyi$/i.test(normalized) ||
|
|
188
|
+
/(^|\/)@types\//i.test(normalized);
|
|
189
|
+
// Storybook stories — UI component demos, not production code
|
|
190
|
+
const isStorybook = /\.(stories|story)\.(ts|tsx|js|jsx|mdx)$/i.test(normalized) ||
|
|
191
|
+
/(^|\/)\.storybook\//i.test(normalized);
|
|
192
|
+
// Determine if file should be completely skipped
|
|
193
|
+
// (vendor and doc generator are already handled at file-discovery level,
|
|
194
|
+
// but including here for completeness in the classification)
|
|
195
|
+
let shouldSkip = false;
|
|
196
|
+
let skipReason;
|
|
197
|
+
if (isVendorResult) {
|
|
198
|
+
shouldSkip = true;
|
|
199
|
+
skipReason = 'vendor-library';
|
|
200
|
+
}
|
|
201
|
+
else if (isDocGeneratorResult) {
|
|
202
|
+
shouldSkip = true;
|
|
203
|
+
skipReason = 'doc-generator';
|
|
204
|
+
}
|
|
205
|
+
else if (isDjangoMigration) {
|
|
206
|
+
shouldSkip = true;
|
|
207
|
+
skipReason = 'django-migration';
|
|
208
|
+
}
|
|
209
|
+
else if (isBuildOutput) {
|
|
210
|
+
shouldSkip = true;
|
|
211
|
+
skipReason = 'build-output';
|
|
212
|
+
}
|
|
213
|
+
else if (isTypeDefinition) {
|
|
214
|
+
shouldSkip = true;
|
|
215
|
+
skipReason = 'type-definition';
|
|
216
|
+
}
|
|
217
|
+
else if (isCIConfig) {
|
|
218
|
+
shouldSkip = true;
|
|
219
|
+
skipReason = 'ci-config';
|
|
220
|
+
}
|
|
221
|
+
// Note: test, consent, admin, mock, fixture, storybook files are NOT fully skipped
|
|
222
|
+
// They get per-rule suppression instead (some rules ARE valid in these files)
|
|
223
|
+
return {
|
|
224
|
+
method: 'heuristic',
|
|
225
|
+
language,
|
|
226
|
+
isVendor: isVendorResult,
|
|
227
|
+
isTest,
|
|
228
|
+
isConsent,
|
|
229
|
+
isAdmin,
|
|
230
|
+
isDocGenerator: isDocGeneratorResult,
|
|
231
|
+
isDjangoMigration,
|
|
232
|
+
isFixtureOrSeed,
|
|
233
|
+
isMockOrFactory,
|
|
234
|
+
isCIConfig,
|
|
235
|
+
isBuildOutput,
|
|
236
|
+
isTypeDefinition,
|
|
237
|
+
isStorybook,
|
|
238
|
+
shouldSkip,
|
|
239
|
+
skipReason,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
86
242
|
// YAML Rule Loader - Load rules from coppa-tier-1.yaml
|
|
87
243
|
function loadRulesFromYAML(yamlPath) {
|
|
88
244
|
try {
|
|
@@ -365,7 +521,7 @@ exports.COPPA_RULES = [
|
|
|
365
521
|
/LoginManager\.getInstance\s*\(\s*\)\s*\.logIn/gi
|
|
366
522
|
],
|
|
367
523
|
fixSuggestion: 'Wrap the auth call in a conditional check for user.age >= 13 or use signInWithParentEmail() for children',
|
|
368
|
-
penalty: '$
|
|
524
|
+
penalty: '$53,088 per violation',
|
|
369
525
|
languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin', 'swift']
|
|
370
526
|
},
|
|
371
527
|
{
|
|
@@ -377,11 +533,19 @@ exports.COPPA_RULES = [
|
|
|
377
533
|
/(\?|&)(email|first_?name|last_?name|dob|phone|birthdate)=/gi,
|
|
378
534
|
/axios\.get\s*\(\s*[`'"]https?:\/\/[^\s]*\?[^`'"]*\$\{/gi,
|
|
379
535
|
/fetch\s*\(\s*[`'"]https?:\/\/[^\s]*\?[^`'"]*\$\{/gi,
|
|
380
|
-
/\?[^'"`\s]*\$\{[^}]*(?:\.email|\.firstName|\.lastName|\.dob|\.phone)[^}]*\}/gi
|
|
536
|
+
/\?[^'"`\s]*\$\{[^}]*(?:\.email|\.firstName|\.lastName|\.dob|\.phone)[^}]*\}/gi,
|
|
537
|
+
// Python — requests.get with PII query params
|
|
538
|
+
/requests\.get\s*\([^)]*params\s*=\s*\{[^}]*(?:email|name|phone|dob|birthdate)/gi,
|
|
539
|
+
// Python — Django/Flask redirect with PII in URL
|
|
540
|
+
/(?:redirect|HttpResponseRedirect)\s*\([^)]*\?[^)]*(?:email|name|phone)/gi,
|
|
541
|
+
// PHP — PII in $_GET superglobal
|
|
542
|
+
/\$_GET\s*\[\s*['"](?:email|first_?name|last_?name|dob|phone|birthdate)['"]\s*\]/gi,
|
|
543
|
+
// Ruby — params[] with PII in GET context
|
|
544
|
+
/request\.query_parameters\s*\[\s*:(?:email|name|phone|dob|birthdate)\s*\]/gi
|
|
381
545
|
],
|
|
382
546
|
fixSuggestion: 'Switch to POST method and move PII to request body',
|
|
383
|
-
penalty: '$
|
|
384
|
-
languages: ['typescript', 'javascript', 'python', 'java', 'swift']
|
|
547
|
+
penalty: '$53,088 per violation',
|
|
548
|
+
languages: ['typescript', 'javascript', 'python', 'java', 'swift', 'php', 'ruby']
|
|
385
549
|
},
|
|
386
550
|
{
|
|
387
551
|
id: 'coppa-tracking-003',
|
|
@@ -393,11 +557,23 @@ exports.COPPA_RULES = [
|
|
|
393
557
|
/ga\s*\(\s*['"]create['"]/gi,
|
|
394
558
|
/adsbygoogle/gi,
|
|
395
559
|
/gtag\s*\(\s*['"]config['"]/gi,
|
|
396
|
-
/google-analytics\.com\/analytics\.js/gi
|
|
560
|
+
/google-analytics\.com\/analytics\.js/gi,
|
|
561
|
+
// Python — Google Analytics measurement protocol
|
|
562
|
+
/(?:import|from)\s+(?:google\.analytics|pyga|universal_analytics)/gi,
|
|
563
|
+
// Python — Facebook pixel server-side
|
|
564
|
+
/FacebookAdsApi\.init|facebook_business\.adobjects/gi,
|
|
565
|
+
// PHP — Google Analytics server-side
|
|
566
|
+
/(?:TheIconic\\Tracking|Rize\\UriTemplate).*(?:Analytics|Measurement)/gi,
|
|
567
|
+
// PHP — wp_enqueue_script with GA/FB pixel
|
|
568
|
+
/wp_enqueue_script\s*\([^)]*(?:google-analytics|gtag|fbq|facebook-pixel)/gi,
|
|
569
|
+
// Ruby — Google Analytics gems
|
|
570
|
+
/(?:require|gem)\s+['"](?:staccato|google-analytics-rails|gabba)['"]/gi,
|
|
571
|
+
// Java/Kotlin — Firebase Analytics initialization
|
|
572
|
+
/FirebaseAnalytics\.getInstance\s*\(/gi
|
|
397
573
|
],
|
|
398
574
|
fixSuggestion: 'Add "child_directed_treatment": true or "restrictDataProcessing": true to SDK initialization',
|
|
399
|
-
penalty: '$
|
|
400
|
-
languages: ['typescript', 'javascript', 'html']
|
|
575
|
+
penalty: '$53,088 per violation',
|
|
576
|
+
languages: ['typescript', 'javascript', 'html', 'python', 'php', 'ruby', 'java', 'kotlin']
|
|
401
577
|
},
|
|
402
578
|
{
|
|
403
579
|
id: 'coppa-geo-004',
|
|
@@ -424,17 +600,24 @@ exports.COPPA_RULES = [
|
|
|
424
600
|
// Python — geopy geolocators
|
|
425
601
|
/(?:Nominatim|GoogleV3|Bing)\s*\([^)]*\)\s*\.(?:geocode|reverse)/gi,
|
|
426
602
|
// Android manifest — fine location permission
|
|
427
|
-
/android\.permission\.ACCESS_FINE_LOCATION/gi
|
|
603
|
+
/android\.permission\.ACCESS_FINE_LOCATION/gi,
|
|
604
|
+
// PHP — geolocation APIs
|
|
605
|
+
/(?:geoip_record_by_name|geoip_country_code_by_name|maxmind)\s*\(/gi,
|
|
606
|
+
// PHP — WordPress geolocation
|
|
607
|
+
/WC_Geolocation::geolocate_ip|wp_geolocate/gi,
|
|
608
|
+
// Ruby — Geocoder gem
|
|
609
|
+
/Geocoder\.search\s*\(|geocode_by\s+:/gi,
|
|
610
|
+
/reverse_geocoded_by\s+:/gi
|
|
428
611
|
],
|
|
429
612
|
fixSuggestion: 'Downgrade accuracy to kCLLocationAccuracyThreeKilometers or require parental consent',
|
|
430
|
-
penalty: '$
|
|
431
|
-
languages: ['typescript', 'javascript', 'swift', 'kotlin', 'java', 'python', 'xml']
|
|
613
|
+
penalty: '$53,088 per violation',
|
|
614
|
+
languages: ['typescript', 'javascript', 'swift', 'kotlin', 'java', 'python', 'xml', 'php', 'ruby']
|
|
432
615
|
},
|
|
433
616
|
{
|
|
434
617
|
id: 'coppa-retention-005',
|
|
435
618
|
name: 'Missing Data Retention Policy',
|
|
436
619
|
severity: 'medium',
|
|
437
|
-
description: '
|
|
620
|
+
description: 'COPPA 2025 explicitly prohibits indefinite retention of children\'s PI. Operators must retain data only as long as reasonably necessary for the purpose collected. Schemas with PII fields must define retention periods, deletion mechanisms, and purpose limitation.',
|
|
438
621
|
patterns: [
|
|
439
622
|
// JS/TS — Mongoose schemas
|
|
440
623
|
/new\s+Schema\s*\(\s*\{[^{}]*\}/gi,
|
|
@@ -447,11 +630,17 @@ exports.COPPA_RULES = [
|
|
|
447
630
|
// Java/Kotlin — JPA @Entity on user-related classes
|
|
448
631
|
/@Entity[\s\S]*?class\s+(?:User|Child|Student|Profile|Account|Member)/gi,
|
|
449
632
|
// Kotlin — data class for user models
|
|
450
|
-
/data\s+class\s+(?:User|Child|Student|Profile|Account|Member)\w*\s*\(/gi
|
|
633
|
+
/data\s+class\s+(?:User|Child|Student|Profile|Account|Member)\w*\s*\(/gi,
|
|
634
|
+
// PHP — Laravel/WordPress user models
|
|
635
|
+
/class\s+(?:User|Child|Student|Profile|Account|Member)\w*\s+extends\s+(?:Model|Authenticatable|WP_User)/gi,
|
|
636
|
+
// Ruby — ActiveRecord user models
|
|
637
|
+
/class\s+(?:User|Child|Student|Profile|Account|Member)\w*\s*<\s*(?:ApplicationRecord|ActiveRecord::Base)/gi,
|
|
638
|
+
// Android — SharedPreferences/Editor storing user PII
|
|
639
|
+
/(?:putString|putInt|putBoolean)\s*\(\s*['"](?:user_?(?:name|email|id|phone)|child_?(?:name|email|id|dob)|student_?(?:name|email|id)|email|phone|dob|birthdate)['"]/gi
|
|
451
640
|
],
|
|
452
|
-
fixSuggestion: 'Add
|
|
453
|
-
penalty: '
|
|
454
|
-
languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin', 'sql']
|
|
641
|
+
fixSuggestion: 'Add explicit retention period (retentionDays, expiresAt, or TTL index), deleted_at column, and document the purpose limitation for data collection per COPPA 2025 § 312.10',
|
|
642
|
+
penalty: '$53,088 per violation (COPPA 2025 indefinite retention prohibition)',
|
|
643
|
+
languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin', 'sql', 'php', 'ruby']
|
|
455
644
|
},
|
|
456
645
|
// ========== Rules 6-20 (Sprint 2) ==========
|
|
457
646
|
// Rule 6: Unencrypted PII Transmission
|
|
@@ -465,11 +654,18 @@ exports.COPPA_RULES = [
|
|
|
465
654
|
/http:\/\/localhost:[^\s]*(\/api\/)/gi,
|
|
466
655
|
/axios\.get\s*\(\s*['"]http:\/\//gi,
|
|
467
656
|
/fetch\s*\(\s*['"]http:\/\//gi,
|
|
468
|
-
/http:\/\/[^\s]*email[^\s]*/gi
|
|
657
|
+
/http:\/\/[^\s]*email[^\s]*/gi,
|
|
658
|
+
// Python — requests/urllib with HTTP
|
|
659
|
+
/requests\.(?:get|post)\s*\(\s*['"]http:\/\/(?!localhost)/gi,
|
|
660
|
+
/urllib\.request\.urlopen\s*\(\s*['"]http:\/\/(?!localhost)/gi,
|
|
661
|
+
// PHP — HTTP API calls
|
|
662
|
+
/(?:curl_setopt|file_get_contents|wp_remote_get)\s*\([^)]*['"]http:\/\/(?!localhost)/gi,
|
|
663
|
+
// Ruby — HTTP requests
|
|
664
|
+
/(?:Net::HTTP|HTTParty|Faraday)\.(?:get|post)\s*\([^)]*['"]http:\/\/(?!localhost)/gi
|
|
469
665
|
],
|
|
470
666
|
fixSuggestion: 'Replace http:// with https:// for all API endpoints and resources',
|
|
471
667
|
penalty: 'Security breach liability + COPPA penalties',
|
|
472
|
-
languages: ['typescript', 'javascript', 'python', 'java', 'swift']
|
|
668
|
+
languages: ['typescript', 'javascript', 'python', 'java', 'swift', 'php', 'ruby']
|
|
473
669
|
},
|
|
474
670
|
// Rule 7: Passive Audio Recording
|
|
475
671
|
// Fixed Sprint 4: Skip audio:false, skip AudioContext (playback only), skip import-only
|
|
@@ -485,11 +681,18 @@ exports.COPPA_RULES = [
|
|
|
485
681
|
/AVAudioSession\s*\.\s*sharedInstance/gi,
|
|
486
682
|
/AVAudioRecorder\s*\(/gi,
|
|
487
683
|
/new\s+AudioRecord\s*\(/gi,
|
|
488
|
-
/new\s+MediaRecorder\s*\(/gi
|
|
684
|
+
/new\s+MediaRecorder\s*\(/gi,
|
|
685
|
+
// Python — audio recording libraries
|
|
686
|
+
/(?:import|from)\s+(?:pyaudio|sounddevice|speech_recognition)/gi,
|
|
687
|
+
/sounddevice\.rec\s*\(/gi,
|
|
688
|
+
/Recognizer\(\)\.listen/gi,
|
|
689
|
+
// Java/Kotlin — Android AudioRecord
|
|
690
|
+
/AudioRecord\.Builder\s*\(\s*\)/gi,
|
|
691
|
+
/MediaRecorder\s*\(\s*\)\s*\.setAudioSource/gi
|
|
489
692
|
],
|
|
490
693
|
fixSuggestion: 'Wrap audio recording in click handler and add parental consent check',
|
|
491
|
-
penalty: '$
|
|
492
|
-
languages: ['typescript', 'javascript', 'swift', 'kotlin']
|
|
694
|
+
penalty: '$53,088 per violation',
|
|
695
|
+
languages: ['typescript', 'javascript', 'swift', 'kotlin', 'python', 'java']
|
|
493
696
|
},
|
|
494
697
|
// Rule 8: Missing Privacy Policy Link
|
|
495
698
|
// Fixed Sprint 4: Only flag forms with registration-related fields (email, password, name, DOB)
|
|
@@ -506,11 +709,17 @@ exports.COPPA_RULES = [
|
|
|
506
709
|
// kebab-case / snake_case: sign-up-form, register_form, create-account-form
|
|
507
710
|
/\b(?:sign[-_]?up|register|registration|create[-_]?account)[-_]form\b/gi,
|
|
508
711
|
// HTML form elements with registration-related ids/classes
|
|
509
|
-
/<form[^>]*(?:id|class|name)\s*=\s*["'][^"']*(?:register|signup|sign[-_]up|create[-_]account)[^"']*["']/gi
|
|
712
|
+
/<form[^>]*(?:id|class|name)\s*=\s*["'][^"']*(?:register|signup|sign[-_]up|create[-_]account)[^"']*["']/gi,
|
|
713
|
+
// Python — Django/Flask registration form classes
|
|
714
|
+
/class\s+(?:SignUp|Register|Registration|CreateAccount)Form\s*\(\s*(?:forms\.Form|ModelForm|FlaskForm)/gi,
|
|
715
|
+
// Ruby — Rails registration routes/controllers
|
|
716
|
+
/def\s+(?:sign_up|register|create_account)\b/gi,
|
|
717
|
+
// PHP — WordPress registration hooks
|
|
718
|
+
/(?:register_new_user|wp_create_user|user_register)\s*\(/gi
|
|
510
719
|
],
|
|
511
720
|
fixSuggestion: 'Add <a href="/privacy">Privacy Policy</a> link to registration form footer',
|
|
512
721
|
penalty: 'Compliance failure',
|
|
513
|
-
languages: ['typescript', 'javascript', 'html', 'tsx', 'jsx', 'php']
|
|
722
|
+
languages: ['typescript', 'javascript', 'html', 'tsx', 'jsx', 'php', 'python', 'ruby']
|
|
514
723
|
},
|
|
515
724
|
// Rule 9: Contact Info Collection Without Parent Email
|
|
516
725
|
{
|
|
@@ -520,15 +729,24 @@ exports.COPPA_RULES = [
|
|
|
520
729
|
description: 'Forms collecting child email/phone must also require parent email for consent verification',
|
|
521
730
|
patterns: [
|
|
522
731
|
/(child_email|student_email)\s*:\s*String/gi,
|
|
523
|
-
/(child_email|student_email|kid_email)\s*=/gi
|
|
732
|
+
/(child_email|student_email|kid_email)\s*=/gi,
|
|
733
|
+
// Python — Django model field for child contact
|
|
734
|
+
/(?:child_email|student_email|kid_email)\s*=\s*models\.(?:EmailField|CharField)/gi,
|
|
735
|
+
// PHP — child email in form processing
|
|
736
|
+
/\$(?:child_email|student_email|kid_email)\s*=\s*\$_(?:POST|GET|REQUEST)/gi,
|
|
737
|
+
// Ruby — child contact in params or model
|
|
738
|
+
/(?:child_email|student_email|kid_email)\s*=\s*params\[/gi,
|
|
739
|
+
// Java/Kotlin — child email field
|
|
740
|
+
/(?:private|var|val)\s+\w*\s*(?:childEmail|studentEmail|kidEmail)/gi
|
|
524
741
|
],
|
|
525
742
|
fixSuggestion: 'Make parent_email required when collecting child contact information',
|
|
526
|
-
penalty: '$
|
|
527
|
-
languages: ['typescript', 'javascript', 'python']
|
|
743
|
+
penalty: '$53,088 per violation',
|
|
744
|
+
languages: ['typescript', 'javascript', 'python', 'php', 'ruby', 'java', 'kotlin']
|
|
528
745
|
},
|
|
529
746
|
// Rule 10: Insecure Default Passwords
|
|
530
747
|
{
|
|
531
748
|
id: 'coppa-sec-010',
|
|
749
|
+
is_active: false, // Sprint 16 W1: 100% FP (0/3 TP) — all hits are test fixture passwords, not production defaults
|
|
532
750
|
name: 'Weak Default Student Passwords',
|
|
533
751
|
severity: 'medium',
|
|
534
752
|
description: 'Default passwords like "password", "123456", or "changeme" create security vulnerabilities',
|
|
@@ -557,50 +775,79 @@ exports.COPPA_RULES = [
|
|
|
557
775
|
/Freshdesk|FreshChat/gi
|
|
558
776
|
],
|
|
559
777
|
fixSuggestion: 'Disable chat widget for unauthenticated or under-13 users via conditional rendering',
|
|
560
|
-
penalty: '$
|
|
778
|
+
penalty: '$53,088 per violation',
|
|
561
779
|
languages: ['typescript', 'javascript', 'html']
|
|
562
780
|
},
|
|
563
781
|
// Rule 12: Biometric Data Collection
|
|
782
|
+
// Sprint 15: DISABLED — 0% TP precision (46 entries, ALL false positives).
|
|
783
|
+
// Pattern matches generic terms (FaceID, TouchID, FaceDetector) without
|
|
784
|
+
// distinguishing real biometric capture from SDK type definitions, AWS API
|
|
785
|
+
// schemas, and vendor library code. Rebuild requires AST-level context.
|
|
564
786
|
{
|
|
565
787
|
id: 'coppa-bio-012',
|
|
566
788
|
name: 'Biometric Data Collection',
|
|
567
789
|
severity: 'critical',
|
|
568
|
-
|
|
790
|
+
is_active: false,
|
|
791
|
+
description: 'COPPA 2025 explicitly adds biometric identifiers to the definition of PI. Face recognition, voice prints, gait analysis, behavioral biometrics (keystroke dynamics, mouse movement patterns), iris/pupil scanning, and health biometric APIs all require verifiable parental consent.',
|
|
569
792
|
patterns: [
|
|
570
793
|
/(?:import\s+.*from\s+['"]face-api\.js['"]|require\s*\(\s*['"]face-api\.js['"]\s*\))/gi,
|
|
571
794
|
/LocalAuthentication.*evaluatePolicy/gi,
|
|
572
|
-
/
|
|
573
|
-
/
|
|
574
|
-
/
|
|
575
|
-
/
|
|
576
|
-
|
|
795
|
+
/(?:biometricAuth|BiometricAuth|biometricPrompt|BiometricPrompt)/g,
|
|
796
|
+
/voicePrint|VoicePrint|voiceRecognition|VoiceRecognition|speakerVerification/g,
|
|
797
|
+
/livenessCheck|LivenessCheck|livenessDetection/g,
|
|
798
|
+
/FaceMatcher|FaceDetector|FaceRecognizer|FaceLandmarks/g,
|
|
799
|
+
// Behavioral biometrics (COPPA 2025 expansion)
|
|
800
|
+
/keystrokeDynamic|keystrokePattern|typingBiometric|keyPressAnalysis/g,
|
|
801
|
+
/gaitAnalysis|gaitDetect|gaitRecognition|motionBiometric/g,
|
|
802
|
+
/mouseMovementPattern|cursorTracking|behavioralBiometric/g,
|
|
803
|
+
/irisScann?|pupilDetect|eyeTracking|gazeTracking/gi,
|
|
804
|
+
// Health biometric APIs
|
|
805
|
+
/(?:HKHealthStore|HKQuantityType|HealthKit).*(?:heartRate|stepCount|workout|sleep)/gi,
|
|
806
|
+
/(?:GoogleFit|FitnessOptions|HistoryClient).*(?:heartRate|steps|calories|sleep)/gi,
|
|
807
|
+
// Face detection libraries
|
|
808
|
+
/(?:import|require).*(?:face-api|@mediapipe\/face|@tensorflow\/tfjs-models\/face|deepface|insightface)/gi
|
|
577
809
|
],
|
|
578
|
-
fixSuggestion: 'Ensure biometric data remains local-only (on-device) or obtain verifiable parental consent',
|
|
579
|
-
penalty: '$
|
|
580
|
-
languages: ['typescript', 'javascript', 'swift', 'kotlin']
|
|
810
|
+
fixSuggestion: 'Ensure biometric data remains local-only (on-device) or obtain verifiable parental consent per COPPA 2025. Do not transmit biometric identifiers to servers without separate parental consent.',
|
|
811
|
+
penalty: '$53,088 per violation',
|
|
812
|
+
languages: ['typescript', 'javascript', 'swift', 'kotlin', 'python', 'java']
|
|
581
813
|
},
|
|
582
814
|
// Rule 13: Push Notifications to Children
|
|
815
|
+
// Rebuilt Sprint 18: removed generic Notification constructor & requestPermission (94.4% FP).
|
|
816
|
+
// Now targets push subscription/registration APIs only.
|
|
583
817
|
{
|
|
584
818
|
id: 'coppa-notif-013',
|
|
585
819
|
name: 'Direct Push Notifications Without Consent',
|
|
586
|
-
severity: '
|
|
587
|
-
description: '
|
|
820
|
+
severity: 'low',
|
|
821
|
+
description: 'FTC declined to codify push notification restrictions in the 2025 final rule but stated it remains concerned about push notifications and engagement techniques. Best practice: gate push subscriptions behind parental consent. Maps to NGL Labs and Sendit enforcement patterns.',
|
|
588
822
|
patterns: [
|
|
589
|
-
/FirebaseMessaging\.subscribeToTopic/
|
|
590
|
-
/OneSignal\.promptForPushNotifications/
|
|
591
|
-
/sendPushNotification\s*\(/
|
|
592
|
-
/fcm\.send\s*\(/
|
|
593
|
-
/PushManager\.subscribe\s*\(/
|
|
594
|
-
/
|
|
595
|
-
/
|
|
823
|
+
/FirebaseMessaging\.subscribeToTopic/g,
|
|
824
|
+
/OneSignal\.(?:promptForPushNotifications|init)\s*\(/g,
|
|
825
|
+
/sendPushNotification\s*\(/g,
|
|
826
|
+
/fcm\.send\s*\(/g,
|
|
827
|
+
/PushManager\.subscribe\s*\(/g,
|
|
828
|
+
/pushManager\.subscribe\s*\(/g,
|
|
829
|
+
/messaging\(\)\.getToken\s*\(/g,
|
|
830
|
+
/registerForPushNotifications\s*\(/g,
|
|
831
|
+
/addEventListener\s*\(\s*['"]push['"]/g,
|
|
832
|
+
/expo-notifications/g,
|
|
833
|
+
/react-native-push-notification/g,
|
|
834
|
+
// Python — Django push notification libraries
|
|
835
|
+
/(?:import|from)\s+(?:webpush|pywebpush|push_notifications|django_push_notifications)/gi,
|
|
836
|
+
/webpush\.send\s*\(/gi,
|
|
837
|
+
// PHP — web-push-php library
|
|
838
|
+
/(?:new\s+)?WebPush\s*\(\s*\[/gi,
|
|
839
|
+
/\$webPush->sendOneNotification/gi,
|
|
840
|
+
// Ruby — web-push gem
|
|
841
|
+
/WebPush\.payload_send\s*\(/gi
|
|
596
842
|
],
|
|
597
843
|
fixSuggestion: 'Gate push notification subscription behind parental dashboard setting',
|
|
598
|
-
penalty: '$
|
|
599
|
-
languages: ['typescript', 'javascript', 'swift', 'kotlin']
|
|
844
|
+
penalty: '$53,088 per violation',
|
|
845
|
+
languages: ['typescript', 'javascript', 'swift', 'kotlin', 'python', 'php', 'ruby']
|
|
600
846
|
},
|
|
601
847
|
// Rule 14: Unfiltered User Generated Content
|
|
602
848
|
{
|
|
603
849
|
id: 'coppa-ugc-014',
|
|
850
|
+
is_active: false, // Sprint 16 W1: 100% FP (0/3 TP) — all hits are API model property assignments, not child UGC
|
|
604
851
|
name: 'UGC Upload Without PII Filter',
|
|
605
852
|
severity: 'high',
|
|
606
853
|
description: 'Text areas for "bio", "about me", or comments must pass through PII scrubbing before database storage',
|
|
@@ -613,7 +860,7 @@ exports.COPPA_RULES = [
|
|
|
613
860
|
/commentForm.*submit|handleCommentSubmit/gi
|
|
614
861
|
],
|
|
615
862
|
fixSuggestion: 'Add middleware hook for PII scrubbing (regex or AWS Comprehend) before database storage',
|
|
616
|
-
penalty: '$
|
|
863
|
+
penalty: '$53,088 per violation',
|
|
617
864
|
languages: ['typescript', 'javascript', 'python']
|
|
618
865
|
},
|
|
619
866
|
// Rule 15: XSS Vulnerabilities
|
|
@@ -628,11 +875,21 @@ exports.COPPA_RULES = [
|
|
|
628
875
|
/\.innerHTML\s*=\s*\$\{/gi,
|
|
629
876
|
/\.innerHTML\s*=\s*(?!['"]?\s*['"]?\s*;)(?!.*[Ll]ocal(?:ize|ization))(?!.*styleContent)[^;]*\b(?:user|input|query|param|req\.|request\.|body\.|data\.)\w*/gi,
|
|
630
877
|
/\.html\s*\(\s*(?:user|req\.|request\.|params?\.)/gi,
|
|
631
|
-
/v-html\s*=\s*["']?(?!.*sanitize)/gi
|
|
878
|
+
/v-html\s*=\s*["']?(?!.*sanitize)/gi,
|
|
879
|
+
// PHP — echo/print user input without escaping
|
|
880
|
+
/echo\s+\$_(?:GET|POST|REQUEST)\s*\[/gi,
|
|
881
|
+
// PHP — WordPress unescaped output
|
|
882
|
+
/<?php\s+echo\s+\$(?!esc_)/gi,
|
|
883
|
+
// Python — Django mark_safe with user input
|
|
884
|
+
/mark_safe\s*\([^)]*(?:request|user_input|params)/gi,
|
|
885
|
+
// Ruby — Rails raw() with user input
|
|
886
|
+
/raw\s*\(\s*(?:params|@\w*user|@\w*input)/gi,
|
|
887
|
+
// Ruby — html_safe on user input
|
|
888
|
+
/(?:params|request)\[.*\]\.html_safe/gi
|
|
632
889
|
],
|
|
633
890
|
fixSuggestion: 'Use standard JSX rendering or DOMPurify before setting HTML content',
|
|
634
891
|
penalty: 'Security failure',
|
|
635
|
-
languages: ['typescript', 'javascript', 'tsx', 'jsx', 'vue']
|
|
892
|
+
languages: ['typescript', 'javascript', 'tsx', 'jsx', 'vue', 'php', 'python', 'ruby']
|
|
636
893
|
},
|
|
637
894
|
// Rule 16: Missing Cookie Consent
|
|
638
895
|
// Fixed Sprint 4: Only flag tracking/PII cookies, not functional preferences (theme, view mode)
|
|
@@ -655,11 +912,17 @@ exports.COPPA_RULES = [
|
|
|
655
912
|
// Java/Kotlin — Spring ResponseCookie
|
|
656
913
|
/ResponseCookie\.from\s*\(/gi,
|
|
657
914
|
// Generic — any language setting cookies with PII field names
|
|
658
|
-
/(?:set_cookie|SetCookie|addCookie|add_cookie)\s*\([^)]*(?:user|email|token|session|track|auth|uid|analytics)/gi
|
|
915
|
+
/(?:set_cookie|SetCookie|addCookie|add_cookie)\s*\([^)]*(?:user|email|token|session|track|auth|uid|analytics)/gi,
|
|
916
|
+
// PHP — setcookie() with PII
|
|
917
|
+
/setcookie\s*\(\s*['"][^'"]*(?:user|email|token|track|auth|uid|analytics)[^'"]*['"]/gi,
|
|
918
|
+
// PHP — WordPress set_transient with PII
|
|
919
|
+
/set_transient\s*\(\s*['"][^'"]*(?:user|email|auth)[^'"]*['"]/gi,
|
|
920
|
+
// Ruby — Rails cookies[] with PII
|
|
921
|
+
/cookies\s*\[\s*:(?:user|email|token|session|track|auth|uid|analytics)\s*\]/gi
|
|
659
922
|
],
|
|
660
923
|
fixSuggestion: 'Add a cookie consent banner component before setting tracking or PII cookies',
|
|
661
924
|
penalty: 'Compliance warning',
|
|
662
|
-
languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin']
|
|
925
|
+
languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin', 'php', 'ruby']
|
|
663
926
|
},
|
|
664
927
|
// Rule 17: External Links to Non-Child-Safe Sites
|
|
665
928
|
// Fixed Sprint 4: Exclude privacy/TOS links, mailto, and common safe targets
|
|
@@ -705,13 +968,14 @@ exports.COPPA_RULES = [
|
|
|
705
968
|
/(?:setUserId|set_user_id)\s*\([^)]*(?:email|\.name|phone)/gi
|
|
706
969
|
],
|
|
707
970
|
fixSuggestion: 'Hash user ID and omit email/name from analytics payload',
|
|
708
|
-
penalty: '$
|
|
971
|
+
penalty: '$53,088 per violation',
|
|
709
972
|
languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin']
|
|
710
973
|
},
|
|
711
974
|
// Rule 19: School Official Consent Bypass
|
|
712
975
|
// Fixed Sprint 4: Tightened patterns to match actual auth/registration flows only
|
|
713
976
|
{
|
|
714
977
|
id: 'coppa-edu-019',
|
|
978
|
+
is_active: false,
|
|
715
979
|
name: 'Missing Teacher/School Verification',
|
|
716
980
|
severity: 'medium',
|
|
717
981
|
description: 'Teacher accounts using generic email (@gmail.com) bypass "School Official" consent exception',
|
|
@@ -728,6 +992,7 @@ exports.COPPA_RULES = [
|
|
|
728
992
|
// Rule 20: Default Privacy Settings Public
|
|
729
993
|
{
|
|
730
994
|
id: 'coppa-default-020',
|
|
995
|
+
is_active: false,
|
|
731
996
|
name: 'Default Public Profile Visibility',
|
|
732
997
|
severity: 'critical',
|
|
733
998
|
description: 'Default profile visibility must be private. COPPA 2.0 requires privacy by design.',
|
|
@@ -739,8 +1004,41 @@ exports.COPPA_RULES = [
|
|
|
739
1004
|
/profileVisibility\s*=\s*['"]?(?:public|Public)['"]?/gi
|
|
740
1005
|
],
|
|
741
1006
|
fixSuggestion: 'Change default visibility to "private" or false',
|
|
742
|
-
penalty: '$
|
|
1007
|
+
penalty: '$53,088 per violation',
|
|
743
1008
|
languages: ['typescript', 'javascript', 'python', 'swift']
|
|
1009
|
+
},
|
|
1010
|
+
// Rule 21: Targeted Advertising Without Separate Consent (Sprint 17 — COPPA 2025)
|
|
1011
|
+
{
|
|
1012
|
+
id: 'coppa-ads-021',
|
|
1013
|
+
name: 'Targeted Advertising Without Separate Consent',
|
|
1014
|
+
severity: 'critical',
|
|
1015
|
+
description: 'COPPA 2025 requires separate, specific opt-in consent before collecting children\'s PI for targeted advertising. Marketing consent cannot be bundled with general terms acceptance. Ad SDK initialization without a distinct consent flow is a violation.',
|
|
1016
|
+
patterns: [
|
|
1017
|
+
// Google AdMob
|
|
1018
|
+
/(?:import|require).*(?:google-mobile-ads|@react-native-firebase\/admob|react-native-admob)/gi,
|
|
1019
|
+
/(?:GADMobileAds|GADRequest|GADBannerView|GADInterstitial)\.\w+/gi,
|
|
1020
|
+
/MobileAds\.initialize|AdRequest\.Builder|AdView|InterstitialAd\.load/gi,
|
|
1021
|
+
// Meta Audience Network
|
|
1022
|
+
/(?:FBAudienceNetwork|FBAdView|FBInterstitialAd|FBNativeAd)/gi,
|
|
1023
|
+
/(?:import|require).*(?:react-native-fbads|@react-native-community\/fbads)/gi,
|
|
1024
|
+
// Unity Ads
|
|
1025
|
+
/UnityAds\.(?:initialize|show|load)|import\s+UnityAds/gi,
|
|
1026
|
+
// IronSource
|
|
1027
|
+
/IronSource\.(?:init|showRewardedVideo|loadInterstitial)|import\s+IronSource/gi,
|
|
1028
|
+
// AppLovin
|
|
1029
|
+
/AppLovin\.(?:initialize|showAd)|import.*AppLovinSDK/gi,
|
|
1030
|
+
// Chartboost
|
|
1031
|
+
/Chartboost\.(?:start|showInterstitial|cacheInterstitial)/gi,
|
|
1032
|
+
// AdColony
|
|
1033
|
+
/AdColony\.(?:configure|requestInterstitial)/gi,
|
|
1034
|
+
// Vungle
|
|
1035
|
+
/Vungle\.(?:init|playAd|loadAd)/gi,
|
|
1036
|
+
// MoPub
|
|
1037
|
+
/mopub\.(?:loadBanner|loadInterstitial)|MoPubInterstitial/gi
|
|
1038
|
+
],
|
|
1039
|
+
fixSuggestion: 'Implement a separate, specific opt-in consent flow for advertising before initializing ad SDKs. Marketing consent must NOT be bundled with general terms acceptance. Use age-gated ad experiences or contextual-only advertising for children under 13.',
|
|
1040
|
+
penalty: '$53,088 per violation (COPPA 2025 separate advertising consent requirement)',
|
|
1041
|
+
languages: ['typescript', 'javascript', 'swift', 'kotlin', 'java', 'python']
|
|
744
1042
|
}
|
|
745
1043
|
];
|
|
746
1044
|
// Ethical Design Rules (Sprint 5 Preview)
|
|
@@ -803,6 +1101,7 @@ exports.ETHICAL_RULES = [
|
|
|
803
1101
|
// ETHICAL-004: Manipulative Notifications
|
|
804
1102
|
{
|
|
805
1103
|
id: 'ETHICAL-004',
|
|
1104
|
+
is_active: false,
|
|
806
1105
|
name: 'Manipulative Notification Language',
|
|
807
1106
|
severity: 'medium',
|
|
808
1107
|
description: 'Notifications using urgency ("Hurry!", "Missing out") manipulate children\'s fear of social exclusion',
|
|
@@ -819,6 +1118,7 @@ exports.ETHICAL_RULES = [
|
|
|
819
1118
|
// ETHICAL-005: Artificial Scarcity
|
|
820
1119
|
{
|
|
821
1120
|
id: 'ETHICAL-005',
|
|
1121
|
+
is_active: false,
|
|
822
1122
|
name: 'Artificial Scarcity / Countdowns',
|
|
823
1123
|
severity: 'medium',
|
|
824
1124
|
description: 'Fake scarcity ("Only 2 left!") and countdown timers pressure children into impulsive decisions',
|
|
@@ -843,6 +1143,7 @@ exports.AI_AUDIT_RULES = [
|
|
|
843
1143
|
// AI-AUDIT-001: Placeholder Analytics
|
|
844
1144
|
{
|
|
845
1145
|
id: 'AI-AUDIT-001',
|
|
1146
|
+
is_active: false, // Sprint 15: Cut to proposed tier — zero GT
|
|
846
1147
|
name: 'Placeholder Analytics Script',
|
|
847
1148
|
severity: 'high',
|
|
848
1149
|
description: 'AI-generated code frequently includes placeholder analytics (UA-XXXXX, G-XXXXXX, fbq) copied from training data. These may activate real tracking without child_directed_treatment flags.',
|
|
@@ -877,6 +1178,7 @@ exports.AI_AUDIT_RULES = [
|
|
|
877
1178
|
// AI-AUDIT-003: Hallucinated URLs
|
|
878
1179
|
{
|
|
879
1180
|
id: 'AI-AUDIT-003',
|
|
1181
|
+
is_active: false, // Sprint 15: Cut to proposed tier — zero GT
|
|
880
1182
|
name: 'Hallucinated/Placeholder API URLs',
|
|
881
1183
|
severity: 'medium',
|
|
882
1184
|
description: 'AI models often generate fake API endpoints (api.example.com, jsonplaceholder, reqres.in) that may be replaced with real endpoints without proper review.',
|
|
@@ -892,6 +1194,7 @@ exports.AI_AUDIT_RULES = [
|
|
|
892
1194
|
// AI-AUDIT-004: Copy-Paste Tracking Boilerplate
|
|
893
1195
|
{
|
|
894
1196
|
id: 'AI-AUDIT-004',
|
|
1197
|
+
is_active: false, // Sprint 15: Cut to proposed tier — zero GT
|
|
895
1198
|
name: 'Copy-Paste Tracking Boilerplate',
|
|
896
1199
|
severity: 'high',
|
|
897
1200
|
description: 'AI assistants reproduce common analytics setup patterns from training data. These often include user identification, event tracking, and session recording without consent flows.',
|
|
@@ -931,6 +1234,7 @@ exports.AI_AUDIT_RULES = [
|
|
|
931
1234
|
// AI-AUDIT-006: TODO/FIXME Compliance Gaps
|
|
932
1235
|
{
|
|
933
1236
|
id: 'AI-AUDIT-006',
|
|
1237
|
+
is_active: false, // Sprint 15: Cut to proposed tier — zero GT
|
|
934
1238
|
name: 'Unresolved Compliance TODOs',
|
|
935
1239
|
severity: 'low',
|
|
936
1240
|
description: 'AI-generated code often includes TODO/FIXME comments for compliance-related features (consent, age verification, privacy policy) that may ship unimplemented.',
|
|
@@ -986,6 +1290,7 @@ exports.AU_SBD_RULES = [
|
|
|
986
1290
|
// AU-SBD-003: Unrestricted Direct Messaging
|
|
987
1291
|
{
|
|
988
1292
|
id: 'AU-SBD-003',
|
|
1293
|
+
is_active: false,
|
|
989
1294
|
name: 'Unrestricted Direct Messaging for Minors',
|
|
990
1295
|
severity: 'critical',
|
|
991
1296
|
description: 'Direct messaging or chat functionality without safety controls (contact restrictions, message filtering, or parental oversight). The AU Online Safety Act requires platforms to take reasonable steps to prevent child exploitation in private communications.',
|
|
@@ -1002,6 +1307,7 @@ exports.AU_SBD_RULES = [
|
|
|
1002
1307
|
// AU-SBD-004: Algorithmic Feeds Without Safety Guardrails
|
|
1003
1308
|
{
|
|
1004
1309
|
id: 'AU-SBD-004',
|
|
1310
|
+
is_active: false, // Sprint 15: Cut to proposed tier — zero GT
|
|
1005
1311
|
name: 'Recommendation Algorithm Without Safety Guardrails',
|
|
1006
1312
|
severity: 'high',
|
|
1007
1313
|
description: 'Content recommendation or feed algorithms detected without safety filtering, content classification, or age-appropriate guardrails. AU SbD requires platforms to assess and mitigate algorithmic harms, particularly for young users.',
|
|
@@ -1019,6 +1325,7 @@ exports.AU_SBD_RULES = [
|
|
|
1019
1325
|
// AU-SBD-005: Missing Digital Wellbeing / Screen Time Controls
|
|
1020
1326
|
{
|
|
1021
1327
|
id: 'AU-SBD-005',
|
|
1328
|
+
is_active: false, // Sprint 16 W1: 18.2% precision (2/11 TP). All 9 FPs are media player autoplay (jellyfin). Pattern too broad — rebuild needed.
|
|
1022
1329
|
name: 'Engagement Features Without Time Awareness',
|
|
1023
1330
|
severity: 'medium',
|
|
1024
1331
|
description: 'High-engagement features (autoplay, continuous scrolling, notifications) detected without corresponding digital wellbeing controls (screen time limits, break reminders, usage dashboards). AU SbD encourages platforms to build in digital wellbeing tools.',
|
|
@@ -1036,6 +1343,7 @@ exports.AU_SBD_RULES = [
|
|
|
1036
1343
|
// AU-SBD-006: Location Sharing Without Explicit Opt-In
|
|
1037
1344
|
{
|
|
1038
1345
|
id: 'AU-SBD-006',
|
|
1346
|
+
is_active: false,
|
|
1039
1347
|
name: 'Location Data Without Explicit Consent',
|
|
1040
1348
|
severity: 'critical',
|
|
1041
1349
|
description: 'Location data collection or sharing enabled without explicit, informed opt-in. AU SbD and the Privacy Act 1988 require data minimization, especially for children\'s geolocation data — location should never be collected by default.',
|
|
@@ -1097,6 +1405,48 @@ function parseHaloignore(content) {
|
|
|
1097
1405
|
}
|
|
1098
1406
|
return config;
|
|
1099
1407
|
}
|
|
1408
|
+
/**
|
|
1409
|
+
* Sprint 10: Check if a file path is in a vendored/third-party library directory.
|
|
1410
|
+
* Vendor files are auto-suppressed to eliminate false positives from code the project doesn't control.
|
|
1411
|
+
*/
|
|
1412
|
+
function isVendorPath(filePath) {
|
|
1413
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
1414
|
+
return /(^|\/)node_modules\//.test(normalized) ||
|
|
1415
|
+
/(^|\/)vendor\//.test(normalized) ||
|
|
1416
|
+
/(^|\/)bower_components\//.test(normalized) ||
|
|
1417
|
+
/(^|\/)third[_-]?party\//.test(normalized) ||
|
|
1418
|
+
/(^|\/)\.bundle\//.test(normalized) ||
|
|
1419
|
+
/(^|\/)Pods\//.test(normalized) ||
|
|
1420
|
+
/(^|\/)external\//.test(normalized) ||
|
|
1421
|
+
/(^|\/)deps\//.test(normalized) ||
|
|
1422
|
+
/(^|\/)\.yarn\//.test(normalized) ||
|
|
1423
|
+
/(^|\/)\.pnpm\//.test(normalized) ||
|
|
1424
|
+
// Minified files are almost always vendored/built
|
|
1425
|
+
/[.\-]min\.(js|css)$/.test(normalized) ||
|
|
1426
|
+
/\.bundle\.js$/.test(normalized) ||
|
|
1427
|
+
// Well-known vendored library directories (catches lib/google2-service/, lib/aws-sdk/, etc.)
|
|
1428
|
+
/(^|\/)lib\/(google[^/]*|aws[^/]*|yui[^/]*|php[^/]*|jquery[^/]*|bootstrap[^/]*|tinymce[^/]*|h5p[^/]*|firebase[^/]*|simplepie[^/]*|tcpdf[^/]*|guzzle[^/]*|psr[^/]*|font-?awesome[^/]*)\//i.test(normalized) ||
|
|
1429
|
+
// H5P vendored libraries (stored under h5p/h5plib/, not lib/)
|
|
1430
|
+
/(^|\/)h5plib\//.test(normalized);
|
|
1431
|
+
}
|
|
1432
|
+
/**
|
|
1433
|
+
* Sprint 11a: Check if a file path is in a documentation generator output directory.
|
|
1434
|
+
* Doc generator templates and output contain external links, code examples, etc. that are
|
|
1435
|
+
* developer-facing, not child-facing content. Flagging these is a false positive.
|
|
1436
|
+
*/
|
|
1437
|
+
function isDocGeneratorPath(filePath) {
|
|
1438
|
+
const normalized = filePath.replace(/\\/g, '/');
|
|
1439
|
+
return /(^|\/)(?:jsdoc|typedoc|apidoc|javadoc|doxygen|sphinx|_build|_static)(?:\/|\.)/i.test(normalized) ||
|
|
1440
|
+
// Documentation template files
|
|
1441
|
+
/(?:^|\/)(?:jsdoc|typedoc|apidoc)\.(?:html|hbs|tmpl|ejs)$/i.test(normalized) ||
|
|
1442
|
+
// Generated API docs
|
|
1443
|
+
/(^|\/)(?:docs?\/(?:api|generated|reference|build))\//i.test(normalized) ||
|
|
1444
|
+
// Sphinx build output
|
|
1445
|
+
/(^|\/)_build\/html\//i.test(normalized) ||
|
|
1446
|
+
// Common doc generator config files with template content
|
|
1447
|
+
/(?:^|\/)\.jsdoc\.(?:json|js)$/i.test(normalized) ||
|
|
1448
|
+
/(?:^|\/)typedoc\.json$/i.test(normalized);
|
|
1449
|
+
}
|
|
1100
1450
|
/**
|
|
1101
1451
|
* Check if a file should be ignored based on .haloignore config
|
|
1102
1452
|
*/
|
|
@@ -1140,6 +1490,12 @@ class HaloEngine {
|
|
|
1140
1490
|
constructor(config = {}) {
|
|
1141
1491
|
this.config = config;
|
|
1142
1492
|
this.treeSitter = new TreeSitterParser();
|
|
1493
|
+
this.astEngine = new ast_engine_1.ASTRuleEngine();
|
|
1494
|
+
this.contextAnalyzer = new context_analyzer_1.ContextAnalyzer({
|
|
1495
|
+
framework: config.framework,
|
|
1496
|
+
historicalFPRates: config.historicalFPRates,
|
|
1497
|
+
suppressionRates: config.suppressionRates,
|
|
1498
|
+
});
|
|
1143
1499
|
// Rule loading priority chain:
|
|
1144
1500
|
// 1. config.loadedRules — pre-compiled rules from CLI API fetch
|
|
1145
1501
|
// 2. config.rulesPath — YAML file (legacy)
|
|
@@ -1175,6 +1531,35 @@ class HaloEngine {
|
|
|
1175
1531
|
this.rules = [...this.rules, ...exports.AU_SBD_RULES];
|
|
1176
1532
|
}
|
|
1177
1533
|
}
|
|
1534
|
+
// Sprint 15: Filter out disabled rules.
|
|
1535
|
+
// Static list ensures rules are disabled regardless of source (API, cache, bundled JSON, hardcoded).
|
|
1536
|
+
// is_active flag handles hardcoded rules; DISABLED_RULE_IDS handles all sources.
|
|
1537
|
+
// Sprint 15: Zero-GT rule actions
|
|
1538
|
+
// DISABLE: zero GT entries, cannot validate precision
|
|
1539
|
+
// CUT (to proposed tier): zero GT entries, pattern too broad for production
|
|
1540
|
+
const DISABLED_RULE_IDS = new Set([
|
|
1541
|
+
'coppa-bio-012', // 0% precision, all FP — rebuild needed
|
|
1542
|
+
// coppa-notif-013 removed — rebuilt Sprint 18 with push-only patterns
|
|
1543
|
+
'coppa-sec-010', // Sprint 16 W1: 100% FP (0/3 TP) — all hits wrong
|
|
1544
|
+
'coppa-ugc-014', // Sprint 16 W1: 100% FP (0/3 TP) — all hits wrong
|
|
1545
|
+
'coppa-edu-019', // Zero GT — teacher registration patterns too narrow to validate
|
|
1546
|
+
'coppa-default-020', // Zero GT — overlaps with AU-SBD-001 default public profiles
|
|
1547
|
+
'ETHICAL-004', // Zero GT — manipulative notification language too broad
|
|
1548
|
+
'ETHICAL-005', // Zero GT — artificial scarcity patterns too broad
|
|
1549
|
+
'AU-SBD-003', // Zero GT — DM detection patterns too broad
|
|
1550
|
+
'AU-SBD-005', // Sprint 16 W1: 18.2% precision — autoplay pattern fires on media player APIs
|
|
1551
|
+
'AU-SBD-006', // Zero GT — location sharing patterns too broad
|
|
1552
|
+
// CUT to proposed tier (zero GT, pattern too broad for production)
|
|
1553
|
+
'AI-AUDIT-001', // Placeholder analytics — low real-world signal
|
|
1554
|
+
'AI-AUDIT-003', // Hallucinated URLs — low real-world signal
|
|
1555
|
+
'AI-AUDIT-004', // Copy-paste tracking boilerplate — too broad
|
|
1556
|
+
'AI-AUDIT-006', // TODO/FIXME compliance — noise in real codebases
|
|
1557
|
+
'AU-SBD-004', // Algorithmic feeds — pattern too broad for production
|
|
1558
|
+
// Sprint 17 Day 0: Zero-GT rules validated against 5 repos — patterns don't match real-world code
|
|
1559
|
+
'ut-sb142-003', // Default DM access — patterns use naming conventions no real app uses (0 hits across Moodle, Discourse, Rocket.Chat, Element, Mastodon)
|
|
1560
|
+
'ut-sb142-004', // Missing parental tools — 2 hits across 5 repos, both FP. API/bundled pattern mismatch. Needs rebuild
|
|
1561
|
+
]);
|
|
1562
|
+
this.rules = this.rules.filter(r => r.is_active !== false && !DISABLED_RULE_IDS.has(r.id));
|
|
1178
1563
|
if (config.severityFilter) {
|
|
1179
1564
|
this.rules = this.rules.filter(r => config.severityFilter.includes(r.severity));
|
|
1180
1565
|
}
|
|
@@ -1207,6 +1592,8 @@ class HaloEngine {
|
|
|
1207
1592
|
packs.push('ai-audit');
|
|
1208
1593
|
if (config.sectorAuSbd)
|
|
1209
1594
|
packs.push('au-sbd');
|
|
1595
|
+
if (config.sectorAuOsa)
|
|
1596
|
+
packs.push('au-osa');
|
|
1210
1597
|
return packs;
|
|
1211
1598
|
}
|
|
1212
1599
|
/**
|
|
@@ -1220,18 +1607,17 @@ class HaloEngine {
|
|
|
1220
1607
|
*/
|
|
1221
1608
|
scanFileWithAST(filePath, content, language = 'typescript') {
|
|
1222
1609
|
// First get regex-based violations
|
|
1223
|
-
|
|
1224
|
-
//
|
|
1610
|
+
let violations = this.scanFile(filePath, content);
|
|
1611
|
+
// Sprint 8: Enhanced AST analysis with ASTRuleEngine + framework overrides + ContextAnalyzer
|
|
1225
1612
|
try {
|
|
1226
|
-
const
|
|
1613
|
+
const tree = this.treeSitter.parse(content, language);
|
|
1614
|
+
// Legacy Sprint 1: AST-based detection for social login (signInWithPopup)
|
|
1227
1615
|
const functionCalls = this.treeSitter.findFunctionCalls(content, 'signInWithPopup');
|
|
1228
|
-
// Add AST-based detection for social login
|
|
1229
1616
|
for (const call of functionCalls) {
|
|
1230
|
-
// Check if already detected by regex
|
|
1231
1617
|
const exists = violations.some(v => v.ruleId === 'coppa-auth-001' &&
|
|
1232
1618
|
v.line === call.line);
|
|
1233
1619
|
if (!exists) {
|
|
1234
|
-
const authRule = exports.COPPA_RULES.find(r => r.id === 'coppa-auth-001');
|
|
1620
|
+
const authRule = this.rules.find(r => r.id === 'coppa-auth-001') || exports.COPPA_RULES.find(r => r.id === 'coppa-auth-001');
|
|
1235
1621
|
if (authRule) {
|
|
1236
1622
|
violations.push({
|
|
1237
1623
|
ruleId: 'coppa-auth-001',
|
|
@@ -1253,10 +1639,63 @@ class HaloEngine {
|
|
|
1253
1639
|
}
|
|
1254
1640
|
}
|
|
1255
1641
|
}
|
|
1642
|
+
// Sprint 8: Run ASTRuleEngine analysis on every violation to classify FPs
|
|
1643
|
+
if (this.config.astAnalysis !== false) {
|
|
1644
|
+
for (const violation of violations) {
|
|
1645
|
+
try {
|
|
1646
|
+
const astResult = this.astEngine.analyzeViolationWithPath(violation.ruleId, filePath, content, {
|
|
1647
|
+
ruleId: violation.ruleId,
|
|
1648
|
+
line: violation.line,
|
|
1649
|
+
column: violation.column,
|
|
1650
|
+
codeSnippet: violation.codeSnippet,
|
|
1651
|
+
}, tree);
|
|
1652
|
+
violation.astVerdict = astResult.verdict;
|
|
1653
|
+
violation.astConfidence = astResult.confidence;
|
|
1654
|
+
violation.astReason = astResult.reason;
|
|
1655
|
+
// Update matchType to reflect AST involvement
|
|
1656
|
+
if (astResult.verdict !== 'regex_only') {
|
|
1657
|
+
violation.matchType = 'hybrid';
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
catch (ruleError) {
|
|
1661
|
+
violation.astVerdict = 'regex_only';
|
|
1662
|
+
violation.astConfidence = 0;
|
|
1663
|
+
violation.astReason = 'AST analysis failed for this violation';
|
|
1664
|
+
}
|
|
1665
|
+
}
|
|
1666
|
+
}
|
|
1256
1667
|
}
|
|
1257
1668
|
catch (error) {
|
|
1258
|
-
// If AST parsing fails, fall back to regex-only
|
|
1669
|
+
// If AST parsing fails entirely, fall back to regex-only
|
|
1259
1670
|
console.warn('AST parsing failed, using regex-only mode:', error);
|
|
1671
|
+
for (const v of violations) {
|
|
1672
|
+
v.astVerdict = 'regex_only';
|
|
1673
|
+
v.astConfidence = 0;
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
// Sprint 10: Framework overrides now applied in scanFile() for ALL file types.
|
|
1677
|
+
// No longer needed here — scanFile() already filtered/downgraded regex violations.
|
|
1678
|
+
// AST-added violations (e.g. signInWithPopup → auth-001) are not in any framework profile.
|
|
1679
|
+
// Sprint 8: ContextAnalyzer — compute confidence scores
|
|
1680
|
+
const violationInputs = violations.map(v => ({
|
|
1681
|
+
ruleId: v.ruleId,
|
|
1682
|
+
severity: v.severity,
|
|
1683
|
+
line: v.line,
|
|
1684
|
+
column: v.column,
|
|
1685
|
+
codeSnippet: v.codeSnippet,
|
|
1686
|
+
astVerdict: v.astVerdict,
|
|
1687
|
+
astConfidence: v.astConfidence,
|
|
1688
|
+
astReason: v.astReason,
|
|
1689
|
+
frameworkSuppressed: v.frameworkSuppressed,
|
|
1690
|
+
}));
|
|
1691
|
+
const confidenceResults = this.contextAnalyzer.analyzeFile(violationInputs, filePath, content);
|
|
1692
|
+
for (let i = 0; i < violations.length; i++) {
|
|
1693
|
+
const result = confidenceResults.get(i);
|
|
1694
|
+
if (result) {
|
|
1695
|
+
violations[i].confidence = result.confidence;
|
|
1696
|
+
violations[i].confidenceInterpretation = result.interpretation;
|
|
1697
|
+
violations[i].confidenceReason = result.reason;
|
|
1698
|
+
}
|
|
1260
1699
|
}
|
|
1261
1700
|
return violations;
|
|
1262
1701
|
}
|
|
@@ -1275,8 +1714,66 @@ class HaloEngine {
|
|
|
1275
1714
|
if (ignoreConfig && shouldIgnoreFile(filePath, ignoreConfig)) {
|
|
1276
1715
|
return [];
|
|
1277
1716
|
}
|
|
1278
|
-
|
|
1717
|
+
// Sprint 10: Skip vendored/third-party library files entirely
|
|
1718
|
+
// These produce massive false positives (84% FP rate on Moodle — all from lib/, vendor/ paths)
|
|
1719
|
+
if (isVendorPath(filePath)) {
|
|
1720
|
+
return [];
|
|
1721
|
+
}
|
|
1722
|
+
// Sprint 11a: Skip documentation generator output files
|
|
1723
|
+
// JSDoc templates, Sphinx output, TypeDoc pages — developer tools, not child-facing content
|
|
1724
|
+
if (isDocGeneratorPath(filePath)) {
|
|
1725
|
+
return [];
|
|
1726
|
+
}
|
|
1727
|
+
let violations = [];
|
|
1279
1728
|
const lines = content.split('\n');
|
|
1729
|
+
// Sprint 13a: Consolidated file classification (Pre-filter A+)
|
|
1730
|
+
// All heuristics are now in classifyFile() for consistency and future Option C upgrade
|
|
1731
|
+
const normalizedPath = filePath.replace(/\\/g, '/');
|
|
1732
|
+
const classification = classifyFile(filePath, content.substring(0, 3000));
|
|
1733
|
+
// Sprint 13a: Skip files that should never be scanned
|
|
1734
|
+
// (Django migrations, build output, type definitions, CI configs)
|
|
1735
|
+
if (classification.shouldSkip) {
|
|
1736
|
+
return [];
|
|
1737
|
+
}
|
|
1738
|
+
// Rules that commonly false-positive in test/fixture/mock files
|
|
1739
|
+
const TEST_FP_RULES = new Set([
|
|
1740
|
+
'coppa-sec-010', // Weak passwords in test fixtures
|
|
1741
|
+
'coppa-tracking-003', // Analytics snippets in test mocks
|
|
1742
|
+
'coppa-auth-001', // Auth patterns in test helpers
|
|
1743
|
+
'coppa-sec-015', // XSS patterns in security test cases
|
|
1744
|
+
'coppa-sec-006', // Sprint 11a: HTTP URLs in test config (e.g., http://example-storage.com in envs/test.py)
|
|
1745
|
+
]);
|
|
1746
|
+
// Rules that should be suppressed in consent/compliance implementation files
|
|
1747
|
+
// These rules flag patterns that are REQUIRED in consent implementations
|
|
1748
|
+
const CONSENT_SUPPRESSED_RULES = new Set([
|
|
1749
|
+
'coppa-cookies-016', // Cookie consent banners MUST set cookies to track consent state
|
|
1750
|
+
'coppa-tracking-003', // Consent management may reference tracking to gate it
|
|
1751
|
+
'coppa-data-002', // Consent flows may reference PII fields to declare collection scope
|
|
1752
|
+
]);
|
|
1753
|
+
// Rules that FP in admin/instructor code — these patterns exist for managing users, not collecting child data
|
|
1754
|
+
const ADMIN_FP_RULES = new Set([
|
|
1755
|
+
'coppa-flow-009', // Contact collection: admin reading existing user emails is not child contact flow
|
|
1756
|
+
'coppa-data-002', // PII in URLs: admin user lookup endpoints are internal tools
|
|
1757
|
+
'coppa-ui-008', // Registration forms: admin user management is not child registration
|
|
1758
|
+
'coppa-sec-006', // Sprint 16: HTTP URLs in admin/instructor views are internal tooling, not child-facing
|
|
1759
|
+
]);
|
|
1760
|
+
// ── Graduated Heuristics (Sprint 13b) ──────────────────────────────────
|
|
1761
|
+
// Auto-promoted from AI Review Board via the graduation pipeline.
|
|
1762
|
+
// Each pattern was dismissed consistently by the AI reviewer and passed
|
|
1763
|
+
// MVP validation criteria (min dismissals, min confidence, zero false confirmations).
|
|
1764
|
+
// These replace AI review calls with deterministic checks: zero cost, instant execution.
|
|
1765
|
+
// Graduated pattern: admin-path
|
|
1766
|
+
// 193 consistent dismissals | avg confidence 9.0/10 | 0 false confirmations
|
|
1767
|
+
// AI reviewer cost per check: ~$0.014 → now $0.00
|
|
1768
|
+
const GRADUATED_ADMIN_RULES = new Set([
|
|
1769
|
+
'ut-sb142-001', // UT SB-142 age verification: admin panels are not child-facing
|
|
1770
|
+
'ut-sb142-002', // UT SB-142 parental consent: admin interfaces require staff auth
|
|
1771
|
+
]);
|
|
1772
|
+
// Graduated pattern: test-file
|
|
1773
|
+
// 27 consistent dismissals | avg confidence 9.0/10 | 0 false confirmations
|
|
1774
|
+
const GRADUATED_TEST_RULES = new Set([
|
|
1775
|
+
'ut-sb142-001', // UT SB-142 age verification: test utilities are not production child-facing code
|
|
1776
|
+
]);
|
|
1280
1777
|
// Parse suppression comments
|
|
1281
1778
|
const suppressions = parseSuppressions(content);
|
|
1282
1779
|
// Track lines with global suppressions (at top of file)
|
|
@@ -1287,8 +1784,52 @@ class HaloEngine {
|
|
|
1287
1784
|
}
|
|
1288
1785
|
}
|
|
1289
1786
|
for (const rule of this.rules) {
|
|
1787
|
+
// Sprint 10: Skip rules that don't target this file's language
|
|
1788
|
+
if (rule.languages && rule.languages.length > 0 && classification.language !== 'unknown') {
|
|
1789
|
+
if (!rule.languages.includes(classification.language)) {
|
|
1790
|
+
continue;
|
|
1791
|
+
}
|
|
1792
|
+
}
|
|
1793
|
+
// Sprint 8+13a: Skip rules that commonly FP in test/fixture/mock/factory files
|
|
1794
|
+
if ((classification.isTest || classification.isMockOrFactory || classification.isFixtureOrSeed) && TEST_FP_RULES.has(rule.id)) {
|
|
1795
|
+
continue;
|
|
1796
|
+
}
|
|
1797
|
+
// Sprint 13a: Skip ALL rules in Storybook stories (UI demos, not production code)
|
|
1798
|
+
if (classification.isStorybook) {
|
|
1799
|
+
continue;
|
|
1800
|
+
}
|
|
1801
|
+
// Sprint 10b: Skip rules that FP in consent/compliance implementation files
|
|
1802
|
+
// Consent forms MUST set cookies, reference tracking, and handle PII — that's the solution, not the problem
|
|
1803
|
+
if (classification.isConsent && CONSENT_SUPPRESSED_RULES.has(rule.id)) {
|
|
1804
|
+
continue;
|
|
1805
|
+
}
|
|
1806
|
+
// Sprint 11a: Skip rules that FP in admin/instructor backend code
|
|
1807
|
+
// Admin functions managing existing user data are not child-facing contact collection flows
|
|
1808
|
+
if (classification.isAdmin && ADMIN_FP_RULES.has(rule.id)) {
|
|
1809
|
+
continue;
|
|
1810
|
+
}
|
|
1811
|
+
// Sprint 13b Graduated: admin-path — admin files are not child-facing
|
|
1812
|
+
// (Promoted from AI Review Board: 193 dismissals, confidence 9.0)
|
|
1813
|
+
if (classification.isAdmin && GRADUATED_ADMIN_RULES.has(rule.id)) {
|
|
1814
|
+
continue;
|
|
1815
|
+
}
|
|
1816
|
+
// Sprint 15: AU-SBD-002 fix — skip in admin/vendor code (85% FP rate from these contexts)
|
|
1817
|
+
if ((classification.isAdmin || classification.isVendor) && rule.id === 'AU-SBD-002') {
|
|
1818
|
+
continue;
|
|
1819
|
+
}
|
|
1820
|
+
// Sprint 13b Graduated: test-file — test/fixture files are not production code
|
|
1821
|
+
// (Promoted from AI Review Board: 27 dismissals, confidence 9.0)
|
|
1822
|
+
if ((classification.isTest || classification.isMockOrFactory || classification.isFixtureOrSeed) && GRADUATED_TEST_RULES.has(rule.id)) {
|
|
1823
|
+
continue;
|
|
1824
|
+
}
|
|
1290
1825
|
// Special handling for coppa-retention-005: skip if schema has retention fields
|
|
1291
1826
|
if (rule.id === 'coppa-retention-005') {
|
|
1827
|
+
// Sprint 11a: Skip Python models annotated with no_pii docstrings
|
|
1828
|
+
// OpenEdX convention: `.. no_pii:` in class docstring means model contains no PII
|
|
1829
|
+
// These models have User FKs but only store non-PII data (e.g., calendar sync preferences)
|
|
1830
|
+
if (classification.language === 'python' && /(?:\.\.\s*no_pii\s*:|#\s*no[_-]?pii\b|no_pii\s*=\s*True)/i.test(content)) {
|
|
1831
|
+
continue;
|
|
1832
|
+
}
|
|
1292
1833
|
// Check if the content has retention-related fields
|
|
1293
1834
|
const hasRetention = /deletedAt|deleted_at|expires|TTL|retention|paranoid|expiration/i.test(content);
|
|
1294
1835
|
if (!hasRetention) {
|
|
@@ -1342,6 +1883,82 @@ class HaloEngine {
|
|
|
1342
1883
|
if (isOwnDomain)
|
|
1343
1884
|
continue;
|
|
1344
1885
|
}
|
|
1886
|
+
// Sprint 11a: For coppa-ext-017: skip IE conditional comments
|
|
1887
|
+
// <!--[if lte IE 9]> ... <![endif]--> are deprecated browser banners, not child-facing links
|
|
1888
|
+
// Seen in: OpenEdX templates with Chrome/Firefox download links for IE users
|
|
1889
|
+
if (rule.id === 'coppa-ext-017') {
|
|
1890
|
+
// Check if we're inside an IE conditional comment block
|
|
1891
|
+
const beforeMatch = content.substring(Math.max(0, match.index - 500), match.index);
|
|
1892
|
+
const afterMatch = content.substring(match.index, Math.min(content.length, match.index + 500));
|
|
1893
|
+
if (/<!--\s*\[if\s+(?:lt|lte|gt|gte|!)?\s*IE/i.test(beforeMatch) && /\[endif\]\s*-->/i.test(afterMatch)) {
|
|
1894
|
+
continue;
|
|
1895
|
+
}
|
|
1896
|
+
// Also skip if the line itself contains the IE conditional pattern
|
|
1897
|
+
if (/<!--\s*\[if\s+(?:lt|lte|gt|gte|!)?\s*IE/i.test(lineContent)) {
|
|
1898
|
+
continue;
|
|
1899
|
+
}
|
|
1900
|
+
}
|
|
1901
|
+
// Sprint 10+16: For coppa-sec-006: skip reserved/example/documentation/standards domains
|
|
1902
|
+
// These are IANA-reserved, standards bodies, or universally used in documentation and are never real endpoints
|
|
1903
|
+
// Require http:// before the domain to avoid matching domains in email addresses etc.
|
|
1904
|
+
if (rule.id === 'coppa-sec-006') {
|
|
1905
|
+
const checkText = (match[0] + ' ' + lineContent).toLowerCase();
|
|
1906
|
+
if (/http:\/\/(www\.)?(example\.(com|org|net)|localhost(:\d|\/|['"\s]|$)|127\.0\.0\.1|0\.0\.0\.0|\[::1\]|httpbin\.org|jsonplaceholder\.typicode\.com|testserver(\.com)?[\/\s'"]|imsglobal\.org|flickr\.com|w3\.org)/.test(checkText)) {
|
|
1907
|
+
continue;
|
|
1908
|
+
}
|
|
1909
|
+
// Sprint 16: Skip devstack/Docker service URLs (development-only, never production)
|
|
1910
|
+
if (/devstack/.test(checkText)) {
|
|
1911
|
+
continue;
|
|
1912
|
+
}
|
|
1913
|
+
// Sprint 16: Skip XML schema/namespace URLs (*.xsd, /xmlns/, /schema/)
|
|
1914
|
+
// These are namespace declarations, not API endpoints
|
|
1915
|
+
if (/\.xsd['"\s,)]|\/xmlns\/|\/schema\//.test(checkText)) {
|
|
1916
|
+
continue;
|
|
1917
|
+
}
|
|
1918
|
+
}
|
|
1919
|
+
// Sprint 10: For coppa-sec-015 (XSS): skip innerHTML assignments that are already sanitized
|
|
1920
|
+
// Y.Escape.html(), DOMPurify.sanitize(), etc. show the developer IS handling XSS
|
|
1921
|
+
if (rule.id === 'coppa-sec-015') {
|
|
1922
|
+
if (/(?:escape\.html|dompurify|sanitize|purify)\s*\(/i.test(lineContent)) {
|
|
1923
|
+
continue;
|
|
1924
|
+
}
|
|
1925
|
+
}
|
|
1926
|
+
// Sprint 10: For coppa-ui-008: skip admin tool registration (LTI cartridge, Brickfield, etc.)
|
|
1927
|
+
// These are admin/developer-facing forms, not child-facing registration
|
|
1928
|
+
if (rule.id === 'coppa-ui-008') {
|
|
1929
|
+
if (/cartridge[_-]?registration|brickfield|registersetting|tool_configure/i.test(lineContent) ||
|
|
1930
|
+
/cartridge[_-]?registration|brickfield|registersetting/i.test(normalizedPath)) {
|
|
1931
|
+
continue;
|
|
1932
|
+
}
|
|
1933
|
+
}
|
|
1934
|
+
// Sprint 10b: For coppa-cookies-016: skip consent implementations
|
|
1935
|
+
// Files/code implementing cookie consent are the solution, not the problem
|
|
1936
|
+
if (rule.id === 'coppa-cookies-016') {
|
|
1937
|
+
// File path patterns: cookie-consent.js, gdpr-banner.js, etc.
|
|
1938
|
+
if (/cookie[_-]?(consent|law|notice|banner|policy|popup|gdpr|preferences)/i.test(normalizedPath) ||
|
|
1939
|
+
/(?:consent|gdpr|ccpa|privacy)[_-]?(?:banner|popup|modal|notice|manager)/i.test(normalizedPath)) {
|
|
1940
|
+
continue;
|
|
1941
|
+
}
|
|
1942
|
+
// Line-level: function/variable names showing consent management intent
|
|
1943
|
+
if (/(?:handleConsent|acceptCookies|declineCookies|cookieBanner|consentManager|cookiePreferences|saveCookiePreferences|showCookieNotice|getCookieConsent|setCookieConsent)\s*[=(]/i.test(lineContent) ||
|
|
1944
|
+
/(?:accept|decline|preferences|banner|consent)\s*[=:]/i.test(lineContent) && /cookie/i.test(lineContent)) {
|
|
1945
|
+
continue;
|
|
1946
|
+
}
|
|
1947
|
+
// Import-level: known consent management libraries
|
|
1948
|
+
if (/(?:require|import).*(?:cookieconsent|react-cookie-consent|onetrust|cookiebot|osano|cookie-notice|gdpr-cookie)/i.test(content.substring(0, 2000))) {
|
|
1949
|
+
continue;
|
|
1950
|
+
}
|
|
1951
|
+
// Sprint 11a: Skip cookie DELETION patterns — setting expires to past or max-age=0/-1 is cleanup, not tracking
|
|
1952
|
+
// Seen in: Moodle submit.js — code that removes cookies flagged as if setting them
|
|
1953
|
+
if (/max[_-]?age\s*[=:]\s*['"]?\s*(-\d+|0)\b/i.test(lineContent) ||
|
|
1954
|
+
/expires\s*[=:]\s*['"]?\s*(?:Thu,\s*01\s+Jan\s+1970|new\s+Date\s*\(\s*0\s*\))/i.test(lineContent) ||
|
|
1955
|
+
/new\s+Date\s*\(\s*0\s*\)/.test(lineContent) && /expires/i.test(lineContent) ||
|
|
1956
|
+
/=\s*['"]?\s*deleted\b/i.test(lineContent) ||
|
|
1957
|
+
/(?:delete|remove|clear|expire|destroy)[_-]?cookie/i.test(lineContent) ||
|
|
1958
|
+
/\.cookie\s*=\s*['"][^'"]*;\s*expires\s*=\s*['"]?\s*Thu,\s*01/i.test(lineContent)) {
|
|
1959
|
+
continue;
|
|
1960
|
+
}
|
|
1961
|
+
}
|
|
1345
1962
|
// Check if this violation already exists (avoid duplicates)
|
|
1346
1963
|
const exists = violations.some(v => v.ruleId === rule.id &&
|
|
1347
1964
|
v.line === lineNumber &&
|
|
@@ -1355,6 +1972,14 @@ class HaloEngine {
|
|
|
1355
1972
|
if (suppressed) {
|
|
1356
1973
|
suppressionComment = suppressions.get(lineNumber);
|
|
1357
1974
|
}
|
|
1975
|
+
// Sprint 11b: Extract surrounding code context (5 lines before + 5 after)
|
|
1976
|
+
const contextStart = Math.max(0, lineNumber - 6); // lineNumber is 1-indexed
|
|
1977
|
+
const contextEnd = Math.min(lines.length, lineNumber + 5);
|
|
1978
|
+
const surroundingLines = lines.slice(contextStart, contextEnd).map((l, i) => {
|
|
1979
|
+
const ln = contextStart + i + 1;
|
|
1980
|
+
const marker = ln === lineNumber ? '>>>' : ' ';
|
|
1981
|
+
return `${marker} ${ln}: ${l}`;
|
|
1982
|
+
});
|
|
1358
1983
|
violations.push({
|
|
1359
1984
|
ruleId: rule.id,
|
|
1360
1985
|
ruleName: rule.name,
|
|
@@ -1373,11 +1998,43 @@ class HaloEngine {
|
|
|
1373
1998
|
matchType: 'regex',
|
|
1374
1999
|
fixability: getRemediation(rule.id).fixability,
|
|
1375
2000
|
remediation: getRemediation(rule.id),
|
|
2001
|
+
// Sprint 11b: Enriched context for AI Review Board
|
|
2002
|
+
surroundingCode: surroundingLines.join('\n'),
|
|
2003
|
+
fileMetadata: {
|
|
2004
|
+
language: classification.language,
|
|
2005
|
+
isVendor: classification.isVendor,
|
|
2006
|
+
isTest: classification.isTest,
|
|
2007
|
+
isAdmin: classification.isAdmin,
|
|
2008
|
+
isConsent: classification.isConsent,
|
|
2009
|
+
isDocGenerator: classification.isDocGenerator,
|
|
2010
|
+
detectedFramework: this.config.framework,
|
|
2011
|
+
// Sprint 13a: Extended classification data
|
|
2012
|
+
isMock: classification.isMockOrFactory,
|
|
2013
|
+
isFixture: classification.isFixtureOrSeed,
|
|
2014
|
+
isCIConfig: classification.isCIConfig,
|
|
2015
|
+
isBuildOutput: classification.isBuildOutput,
|
|
2016
|
+
isTypeDefinition: classification.isTypeDefinition,
|
|
2017
|
+
isStorybook: classification.isStorybook,
|
|
2018
|
+
},
|
|
1376
2019
|
});
|
|
1377
2020
|
}
|
|
1378
2021
|
}
|
|
1379
2022
|
}
|
|
1380
2023
|
}
|
|
2024
|
+
// Sprint 10: Apply framework overrides to ALL file types (Python, PHP, HTML, etc.)
|
|
2025
|
+
// Previously this only ran inside scanFileWithAST() for JS/TS files.
|
|
2026
|
+
if (this.config.framework) {
|
|
2027
|
+
const result = (0, frameworks_1.applyFrameworkOverrides)(violations, this.config.framework);
|
|
2028
|
+
violations = result.violations;
|
|
2029
|
+
}
|
|
2030
|
+
// Sprint 12b: Dedup AI-GOVERNANCE-002 / AI-RISK-003 overlap
|
|
2031
|
+
// If both fire on same file+line, suppress AI-RISK-003 (AI-GOVERNANCE-002 subsumes it)
|
|
2032
|
+
const govViolations = new Set(violations
|
|
2033
|
+
.filter(v => v.ruleId === 'AI-GOVERNANCE-002')
|
|
2034
|
+
.map(v => `${v.filePath}:${v.line}`));
|
|
2035
|
+
if (govViolations.size > 0) {
|
|
2036
|
+
violations = violations.filter(v => !(v.ruleId === 'AI-RISK-003' && govViolations.has(`${v.filePath}:${v.line}`)));
|
|
2037
|
+
}
|
|
1381
2038
|
// Filter suppressed if configured
|
|
1382
2039
|
if (this.config.suppressions?.enabled !== false && !this.config.includeSuppressed) {
|
|
1383
2040
|
const unsuppressed = violations.filter(v => !v.suppressed);
|