@runhalo/engine 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/ast-engine.d.ts +60 -0
  2. package/dist/ast-engine.js +653 -0
  3. package/dist/ast-engine.js.map +1 -0
  4. package/dist/context-analyzer.d.ts +209 -0
  5. package/dist/context-analyzer.js +408 -0
  6. package/dist/context-analyzer.js.map +1 -0
  7. package/dist/data-flow-tracer.d.ts +106 -0
  8. package/dist/data-flow-tracer.js +506 -0
  9. package/dist/data-flow-tracer.js.map +1 -0
  10. package/dist/fp-patterns.d.ts +36 -0
  11. package/dist/fp-patterns.js +426 -0
  12. package/dist/fp-patterns.js.map +1 -0
  13. package/dist/frameworks/angular.d.ts +11 -0
  14. package/dist/frameworks/angular.js +41 -0
  15. package/dist/frameworks/angular.js.map +1 -0
  16. package/dist/frameworks/django.d.ts +11 -0
  17. package/dist/frameworks/django.js +57 -0
  18. package/dist/frameworks/django.js.map +1 -0
  19. package/dist/frameworks/index.d.ts +59 -0
  20. package/dist/frameworks/index.js +99 -0
  21. package/dist/frameworks/index.js.map +1 -0
  22. package/dist/frameworks/nextjs.d.ts +11 -0
  23. package/dist/frameworks/nextjs.js +59 -0
  24. package/dist/frameworks/nextjs.js.map +1 -0
  25. package/dist/frameworks/rails.d.ts +11 -0
  26. package/dist/frameworks/rails.js +58 -0
  27. package/dist/frameworks/rails.js.map +1 -0
  28. package/dist/frameworks/react.d.ts +13 -0
  29. package/dist/frameworks/react.js +36 -0
  30. package/dist/frameworks/react.js.map +1 -0
  31. package/dist/frameworks/types.d.ts +29 -0
  32. package/dist/frameworks/types.js +11 -0
  33. package/dist/frameworks/types.js.map +1 -0
  34. package/dist/frameworks/vue.d.ts +9 -0
  35. package/dist/frameworks/vue.js +39 -0
  36. package/dist/frameworks/vue.js.map +1 -0
  37. package/dist/graduation/fp-verdict-logger.d.ts +81 -0
  38. package/dist/graduation/fp-verdict-logger.js +130 -0
  39. package/dist/graduation/fp-verdict-logger.js.map +1 -0
  40. package/dist/graduation/graduation-codifier.d.ts +37 -0
  41. package/dist/graduation/graduation-codifier.js +205 -0
  42. package/dist/graduation/graduation-codifier.js.map +1 -0
  43. package/dist/graduation/graduation-validator.d.ts +73 -0
  44. package/dist/graduation/graduation-validator.js +204 -0
  45. package/dist/graduation/graduation-validator.js.map +1 -0
  46. package/dist/graduation/index.d.ts +71 -0
  47. package/dist/graduation/index.js +105 -0
  48. package/dist/graduation/index.js.map +1 -0
  49. package/dist/graduation/pattern-aggregator.d.ts +77 -0
  50. package/dist/graduation/pattern-aggregator.js +154 -0
  51. package/dist/graduation/pattern-aggregator.js.map +1 -0
  52. package/dist/index.d.ts +99 -0
  53. package/dist/index.js +718 -61
  54. package/dist/index.js.map +1 -1
  55. package/dist/review-board/two-agent-review.d.ts +152 -0
  56. package/dist/review-board/two-agent-review.js +463 -0
  57. package/dist/review-board/two-agent-review.js.map +1 -0
  58. package/dist/scope-analyzer.d.ts +91 -0
  59. package/dist/scope-analyzer.js +300 -0
  60. package/dist/scope-analyzer.js.map +1 -0
  61. package/package.json +9 -2
  62. package/rules/coppa-tier-1.yaml +17 -10
  63. package/rules/rules.json +2094 -99
  64. package/rules/validation-report.json +58 -0
package/dist/index.js CHANGED
@@ -45,11 +45,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
45
45
  };
46
46
  Object.defineProperty(exports, "__esModule", { value: true });
47
47
  exports.SCAFFOLD_REGISTRY = exports.detectFramework = exports.ScaffoldEngine = exports.ComplianceScoreEngine = exports.transformSetDefault = exports.transformSanitizeInput = exports.transformRemoveDefault = exports.transformUrlUpgrade = exports.FixEngine = exports.REMEDIATION_MAP = exports.HaloEngine = exports.AU_SBD_RULES = exports.AI_AUDIT_RULES = exports.ETHICAL_RULES = exports.COPPA_RULES = exports.treeSitterParser = exports.TreeSitterParser = void 0;
48
+ exports.classifyFile = classifyFile;
48
49
  exports.loadRulesFromYAML = loadRulesFromYAML;
49
50
  exports.loadRulesFromJSON = loadRulesFromJSON;
50
51
  exports.loadRulesFromJSONByPack = loadRulesFromJSONByPack;
51
52
  exports.compileRawRules = compileRawRules;
52
53
  exports.parseHaloignore = parseHaloignore;
54
+ exports.isVendorPath = isVendorPath;
55
+ exports.isDocGeneratorPath = isDocGeneratorPath;
53
56
  exports.shouldIgnoreFile = shouldIgnoreFile;
54
57
  exports.shouldIgnoreViolation = shouldIgnoreViolation;
55
58
  exports.getRemediation = getRemediation;
@@ -59,6 +62,9 @@ const tree_sitter_1 = __importDefault(require("tree-sitter"));
59
62
  const tree_sitter_typescript_1 = __importDefault(require("tree-sitter-typescript"));
60
63
  const tree_sitter_javascript_1 = __importDefault(require("tree-sitter-javascript"));
61
64
  const yaml = __importStar(require("js-yaml"));
65
+ const ast_engine_1 = require("./ast-engine");
66
+ const frameworks_1 = require("./frameworks");
67
+ const context_analyzer_1 = require("./context-analyzer");
62
68
  // Extract category from ruleId (e.g. "coppa-auth-001" → "auth", "ETHICAL-001" → "ethical", "AU-SBD-001" → "au-sbd")
63
69
  function extractCategory(ruleId) {
64
70
  if (ruleId.startsWith('ETHICAL'))
@@ -67,6 +73,22 @@ function extractCategory(ruleId) {
67
73
  return 'ai-audit';
68
74
  if (ruleId.startsWith('AU-SBD'))
69
75
  return 'au-sbd';
76
+ if (ruleId.startsWith('AU-OSA'))
77
+ return 'au-osa';
78
+ if (ruleId.startsWith('caadca'))
79
+ return 'caadca';
80
+ if (ruleId.startsWith('AI-RISK'))
81
+ return 'ai-risk';
82
+ if (ruleId.startsWith('AI-TRANSPARENCY'))
83
+ return 'ai-transparency';
84
+ if (ruleId.startsWith('AI-GOVERNANCE'))
85
+ return 'ai-governance';
86
+ if (ruleId.startsWith('AI-OVERSIGHT'))
87
+ return 'ai-oversight';
88
+ if (ruleId.startsWith('AI-ACCURACY'))
89
+ return 'ai-accuracy';
90
+ if (ruleId.startsWith('CAI-'))
91
+ return 'constitutional-ai';
70
92
  const match = ruleId.match(/^coppa-(\w+)-\d+$/);
71
93
  return match ? match[1] : 'unknown';
72
94
  }
@@ -83,6 +105,140 @@ function detectLanguage(filePath) {
83
105
  };
84
106
  return langMap[ext] || 'unknown';
85
107
  }
108
+ /**
109
+ * Sprint 13a: Classify a file using deterministic heuristics.
110
+ * Returns a FileClassification object that the scan loop uses to skip
111
+ * files or suppress specific rules.
112
+ *
113
+ * @param filePath — normalized file path (forward slashes)
114
+ * @param contentPrefix — first 3000 chars of file content (for decorator/annotation detection)
115
+ */
116
+ function classifyFile(filePath, contentPrefix = '') {
117
+ const normalized = filePath.replace(/\\/g, '/');
118
+ const language = detectLanguage(filePath);
119
+ const isVendorResult = isVendorPath(filePath);
120
+ const isDocGeneratorResult = isDocGeneratorPath(filePath);
121
+ // Test/spec/fixture detection (Sprint 8 + 11a, consolidated)
122
+ const isTest = /\.(test|spec)\.(ts|tsx|js|jsx|py|rb|java|go)$/i.test(normalized) ||
123
+ /(^|\/)__tests__\//.test(normalized) ||
124
+ /(^|\/)test\//.test(normalized) ||
125
+ /(^|\/)tests\//.test(normalized) ||
126
+ /(^|\/)spec\//.test(normalized) ||
127
+ /(^|\/)fixtures\//.test(normalized) ||
128
+ /\.(stories|story)\.(ts|tsx|js|jsx)$/i.test(normalized) ||
129
+ /(^|\/)cypress\//.test(normalized) ||
130
+ /(^|\/)e2e\//.test(normalized) ||
131
+ /jest\.config|vitest\.config|playwright\.config/i.test(normalized) ||
132
+ // Sprint 11a: Test environment configs
133
+ /(^|\/)envs\/test[^/]*\.(py|json|ya?ml|toml|cfg|ini)$/i.test(normalized) ||
134
+ /(^|\/)config\/test[^/]*\.(py|json|ya?ml|toml|cfg|ini|js|ts)$/i.test(normalized) ||
135
+ /(^|\/)settings\/test[^/]*\.(py|json|ya?ml|toml)$/i.test(normalized) ||
136
+ /(^|\/)conftest\.py$/i.test(normalized);
137
+ // Consent/privacy implementation files (Sprint 10b)
138
+ const CONSENT_PATH_PATTERNS = /(?:^|\/)(?:consent|cookie[_-]?(?:consent|banner|preferences|notice|policy)|privacy[_-]?(?:policy|notice|banner|settings)|gdpr|ccpa|compliance|data[_-]?(?:deletion|removal|protection))\b/i;
139
+ const isConsent = CONSENT_PATH_PATTERNS.test(normalized);
140
+ // Admin/instructor/staff backend paths (Sprint 11a, updated Sprint 13b)
141
+ // Matches admin directories AND admin.py/admin.rb files (Django/Rails admin registration modules)
142
+ const ADMIN_PATH_PATTERNS = /(?:^|\/)(?:admin|instructor|teacher|staff|management|backoffice|dashboard\/admin|cms|moderator|superuser)(?:\/|\.py|\.rb|\.php|$)/i;
143
+ const isAdmin = ADMIN_PATH_PATTERNS.test(normalized) ||
144
+ /(?:@staff_member_required|@permission_required|@user_passes_test|@login_required.*staff|@admin_required|is_staff|is_superuser)/i.test(contentPrefix);
145
+ // === Sprint 13a: New heuristic patterns ===
146
+ // Django migrations — auto-generated schema changes, no user-facing code
147
+ const isDjangoMigration = /(^|\/)migrations\/\d{4}_[a-zA-Z0-9_]+\.py$/i.test(normalized) ||
148
+ /(^|\/)migrations\/__init__\.py$/i.test(normalized);
149
+ // Rails fixture and seed files — test data, not production behavior
150
+ const isFixtureOrSeed = /(^|\/)fixtures\/[^/]+\.(ya?ml|json|csv)$/i.test(normalized) ||
151
+ /(^|\/)seeds?\//i.test(normalized) ||
152
+ /(^|\/)db\/seeds/i.test(normalized) ||
153
+ /(^|\/)factories?\//i.test(normalized) ||
154
+ /(^|\/)factory\.(ts|js|py|rb)$/i.test(normalized);
155
+ // Mock/factory files — test infrastructure
156
+ const isMockOrFactory = /(?:^|\/)(?:__mocks__|mocks?|fakes?|stubs?)(?:\/|$)/i.test(normalized) ||
157
+ /\.mock\.(ts|tsx|js|jsx|py)$/i.test(normalized) ||
158
+ /\.fake\.(ts|tsx|js|jsx|py)$/i.test(normalized) ||
159
+ /(?:^|\/)(?:mock|fake|stub)[_-]?\w+\.(ts|tsx|js|jsx|py)$/i.test(normalized) ||
160
+ /(?:^|\/)(?:\w+)?[_-](?:mock|fake|stub)\.(ts|tsx|js|jsx|py)$/i.test(normalized);
161
+ // CI/CD configuration files — pipeline definitions, not application code
162
+ const isCIConfig = /(^|\/)\.github\/workflows\//i.test(normalized) ||
163
+ /(^|\/)\.github\/actions\//i.test(normalized) ||
164
+ /(^|\/)\.circleci\//i.test(normalized) ||
165
+ /(^|\/)\.gitlab-ci/i.test(normalized) ||
166
+ /(^|\/)Jenkinsfile$/i.test(normalized) ||
167
+ /(^|\/)\.travis\.yml$/i.test(normalized) ||
168
+ /(^|\/)azure-pipelines/i.test(normalized) ||
169
+ /(^|\/)bitbucket-pipelines/i.test(normalized) ||
170
+ /(^|\/)\.buildkite\//i.test(normalized) ||
171
+ /(^|\/)Dockerfile$/i.test(normalized) ||
172
+ /(^|\/)docker-compose/i.test(normalized);
173
+ // Build output directories — generated code, not source
174
+ const isBuildOutput = /(^|\/)dist\//i.test(normalized) ||
175
+ /(^|\/)build\/(?!src)/i.test(normalized) || // build/ but not build/src/
176
+ /(^|\/)\.next\//i.test(normalized) ||
177
+ /(^|\/)\.nuxt\//i.test(normalized) ||
178
+ /(^|\/)\.svelte-kit\//i.test(normalized) ||
179
+ /(^|\/)out\//i.test(normalized) ||
180
+ /(^|\/)\.output\//i.test(normalized) ||
181
+ /(^|\/)coverage\//i.test(normalized) ||
182
+ /(^|\/)\.cache\//i.test(normalized) ||
183
+ /(^|\/)\.parcel-cache\//i.test(normalized) ||
184
+ /(^|\/)\.turbo\//i.test(normalized);
185
+ // Type definition files — no runtime behavior, only type annotations
186
+ const isTypeDefinition = /\.d\.ts$/i.test(normalized) ||
187
+ /\.pyi$/i.test(normalized) ||
188
+ /(^|\/)@types\//i.test(normalized);
189
+ // Storybook stories — UI component demos, not production code
190
+ const isStorybook = /\.(stories|story)\.(ts|tsx|js|jsx|mdx)$/i.test(normalized) ||
191
+ /(^|\/)\.storybook\//i.test(normalized);
192
+ // Determine if file should be completely skipped
193
+ // (vendor and doc generator are already handled at file-discovery level,
194
+ // but including here for completeness in the classification)
195
+ let shouldSkip = false;
196
+ let skipReason;
197
+ if (isVendorResult) {
198
+ shouldSkip = true;
199
+ skipReason = 'vendor-library';
200
+ }
201
+ else if (isDocGeneratorResult) {
202
+ shouldSkip = true;
203
+ skipReason = 'doc-generator';
204
+ }
205
+ else if (isDjangoMigration) {
206
+ shouldSkip = true;
207
+ skipReason = 'django-migration';
208
+ }
209
+ else if (isBuildOutput) {
210
+ shouldSkip = true;
211
+ skipReason = 'build-output';
212
+ }
213
+ else if (isTypeDefinition) {
214
+ shouldSkip = true;
215
+ skipReason = 'type-definition';
216
+ }
217
+ else if (isCIConfig) {
218
+ shouldSkip = true;
219
+ skipReason = 'ci-config';
220
+ }
221
+ // Note: test, consent, admin, mock, fixture, storybook files are NOT fully skipped
222
+ // They get per-rule suppression instead (some rules ARE valid in these files)
223
+ return {
224
+ method: 'heuristic',
225
+ language,
226
+ isVendor: isVendorResult,
227
+ isTest,
228
+ isConsent,
229
+ isAdmin,
230
+ isDocGenerator: isDocGeneratorResult,
231
+ isDjangoMigration,
232
+ isFixtureOrSeed,
233
+ isMockOrFactory,
234
+ isCIConfig,
235
+ isBuildOutput,
236
+ isTypeDefinition,
237
+ isStorybook,
238
+ shouldSkip,
239
+ skipReason,
240
+ };
241
+ }
86
242
  // YAML Rule Loader - Load rules from coppa-tier-1.yaml
87
243
  function loadRulesFromYAML(yamlPath) {
88
244
  try {
@@ -365,7 +521,7 @@ exports.COPPA_RULES = [
365
521
  /LoginManager\.getInstance\s*\(\s*\)\s*\.logIn/gi
366
522
  ],
367
523
  fixSuggestion: 'Wrap the auth call in a conditional check for user.age >= 13 or use signInWithParentEmail() for children',
368
- penalty: '$51,744 per violation',
524
+ penalty: '$53,088 per violation',
369
525
  languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin', 'swift']
370
526
  },
371
527
  {
@@ -377,11 +533,19 @@ exports.COPPA_RULES = [
377
533
  /(\?|&)(email|first_?name|last_?name|dob|phone|birthdate)=/gi,
378
534
  /axios\.get\s*\(\s*[`'"]https?:\/\/[^\s]*\?[^`'"]*\$\{/gi,
379
535
  /fetch\s*\(\s*[`'"]https?:\/\/[^\s]*\?[^`'"]*\$\{/gi,
380
- /\?[^'"`\s]*\$\{[^}]*(?:\.email|\.firstName|\.lastName|\.dob|\.phone)[^}]*\}/gi
536
+ /\?[^'"`\s]*\$\{[^}]*(?:\.email|\.firstName|\.lastName|\.dob|\.phone)[^}]*\}/gi,
537
+ // Python — requests.get with PII query params
538
+ /requests\.get\s*\([^)]*params\s*=\s*\{[^}]*(?:email|name|phone|dob|birthdate)/gi,
539
+ // Python — Django/Flask redirect with PII in URL
540
+ /(?:redirect|HttpResponseRedirect)\s*\([^)]*\?[^)]*(?:email|name|phone)/gi,
541
+ // PHP — PII in $_GET superglobal
542
+ /\$_GET\s*\[\s*['"](?:email|first_?name|last_?name|dob|phone|birthdate)['"]\s*\]/gi,
543
+ // Ruby — params[] with PII in GET context
544
+ /request\.query_parameters\s*\[\s*:(?:email|name|phone|dob|birthdate)\s*\]/gi
381
545
  ],
382
546
  fixSuggestion: 'Switch to POST method and move PII to request body',
383
- penalty: '$51,744 per violation',
384
- languages: ['typescript', 'javascript', 'python', 'java', 'swift']
547
+ penalty: '$53,088 per violation',
548
+ languages: ['typescript', 'javascript', 'python', 'java', 'swift', 'php', 'ruby']
385
549
  },
386
550
  {
387
551
  id: 'coppa-tracking-003',
@@ -393,11 +557,23 @@ exports.COPPA_RULES = [
393
557
  /ga\s*\(\s*['"]create['"]/gi,
394
558
  /adsbygoogle/gi,
395
559
  /gtag\s*\(\s*['"]config['"]/gi,
396
- /google-analytics\.com\/analytics\.js/gi
560
+ /google-analytics\.com\/analytics\.js/gi,
561
+ // Python — Google Analytics measurement protocol
562
+ /(?:import|from)\s+(?:google\.analytics|pyga|universal_analytics)/gi,
563
+ // Python — Facebook pixel server-side
564
+ /FacebookAdsApi\.init|facebook_business\.adobjects/gi,
565
+ // PHP — Google Analytics server-side
566
+ /(?:TheIconic\\Tracking|Rize\\UriTemplate).*(?:Analytics|Measurement)/gi,
567
+ // PHP — wp_enqueue_script with GA/FB pixel
568
+ /wp_enqueue_script\s*\([^)]*(?:google-analytics|gtag|fbq|facebook-pixel)/gi,
569
+ // Ruby — Google Analytics gems
570
+ /(?:require|gem)\s+['"](?:staccato|google-analytics-rails|gabba)['"]/gi,
571
+ // Java/Kotlin — Firebase Analytics initialization
572
+ /FirebaseAnalytics\.getInstance\s*\(/gi
397
573
  ],
398
574
  fixSuggestion: 'Add "child_directed_treatment": true or "restrictDataProcessing": true to SDK initialization',
399
- penalty: '$51,744 per violation',
400
- languages: ['typescript', 'javascript', 'html']
575
+ penalty: '$53,088 per violation',
576
+ languages: ['typescript', 'javascript', 'html', 'python', 'php', 'ruby', 'java', 'kotlin']
401
577
  },
402
578
  {
403
579
  id: 'coppa-geo-004',
@@ -424,17 +600,24 @@ exports.COPPA_RULES = [
424
600
  // Python — geopy geolocators
425
601
  /(?:Nominatim|GoogleV3|Bing)\s*\([^)]*\)\s*\.(?:geocode|reverse)/gi,
426
602
  // Android manifest — fine location permission
427
- /android\.permission\.ACCESS_FINE_LOCATION/gi
603
+ /android\.permission\.ACCESS_FINE_LOCATION/gi,
604
+ // PHP — geolocation APIs
605
+ /(?:geoip_record_by_name|geoip_country_code_by_name|maxmind)\s*\(/gi,
606
+ // PHP — WordPress geolocation
607
+ /WC_Geolocation::geolocate_ip|wp_geolocate/gi,
608
+ // Ruby — Geocoder gem
609
+ /Geocoder\.search\s*\(|geocode_by\s+:/gi,
610
+ /reverse_geocoded_by\s+:/gi
428
611
  ],
429
612
  fixSuggestion: 'Downgrade accuracy to kCLLocationAccuracyThreeKilometers or require parental consent',
430
- penalty: '$51,744 per violation',
431
- languages: ['typescript', 'javascript', 'swift', 'kotlin', 'java', 'python', 'xml']
613
+ penalty: '$53,088 per violation',
614
+ languages: ['typescript', 'javascript', 'swift', 'kotlin', 'java', 'python', 'xml', 'php', 'ruby']
432
615
  },
433
616
  {
434
617
  id: 'coppa-retention-005',
435
618
  name: 'Missing Data Retention Policy',
436
619
  severity: 'medium',
437
- description: 'User schemas must have deleted_at, expiration_date, or TTL index for data retention',
620
+ description: 'COPPA 2025 explicitly prohibits indefinite retention of children\'s PI. Operators must retain data only as long as reasonably necessary for the purpose collected. Schemas with PII fields must define retention periods, deletion mechanisms, and purpose limitation.',
438
621
  patterns: [
439
622
  // JS/TS — Mongoose schemas
440
623
  /new\s+Schema\s*\(\s*\{[^{}]*\}/gi,
@@ -447,11 +630,17 @@ exports.COPPA_RULES = [
447
630
  // Java/Kotlin — JPA @Entity on user-related classes
448
631
  /@Entity[\s\S]*?class\s+(?:User|Child|Student|Profile|Account|Member)/gi,
449
632
  // Kotlin — data class for user models
450
- /data\s+class\s+(?:User|Child|Student|Profile|Account|Member)\w*\s*\(/gi
633
+ /data\s+class\s+(?:User|Child|Student|Profile|Account|Member)\w*\s*\(/gi,
634
+ // PHP — Laravel/WordPress user models
635
+ /class\s+(?:User|Child|Student|Profile|Account|Member)\w*\s+extends\s+(?:Model|Authenticatable|WP_User)/gi,
636
+ // Ruby — ActiveRecord user models
637
+ /class\s+(?:User|Child|Student|Profile|Account|Member)\w*\s*<\s*(?:ApplicationRecord|ActiveRecord::Base)/gi,
638
+ // Android — SharedPreferences/Editor storing user PII
639
+ /(?:putString|putInt|putBoolean)\s*\(\s*['"](?:user_?(?:name|email|id|phone)|child_?(?:name|email|id|dob)|student_?(?:name|email|id)|email|phone|dob|birthdate)['"]/gi
451
640
  ],
452
- fixSuggestion: 'Add deleted_at column, expiration_date field, or TTL index to database schema',
453
- penalty: 'Regulatory audit failure',
454
- languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin', 'sql']
641
+ fixSuggestion: 'Add explicit retention period (retentionDays, expiresAt, or TTL index), deleted_at column, and document the purpose limitation for data collection per COPPA 2025 § 312.10',
642
+ penalty: '$53,088 per violation (COPPA 2025 indefinite retention prohibition)',
643
+ languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin', 'sql', 'php', 'ruby']
455
644
  },
456
645
  // ========== Rules 6-20 (Sprint 2) ==========
457
646
  // Rule 6: Unencrypted PII Transmission
@@ -465,11 +654,18 @@ exports.COPPA_RULES = [
465
654
  /http:\/\/localhost:[^\s]*(\/api\/)/gi,
466
655
  /axios\.get\s*\(\s*['"]http:\/\//gi,
467
656
  /fetch\s*\(\s*['"]http:\/\//gi,
468
- /http:\/\/[^\s]*email[^\s]*/gi
657
+ /http:\/\/[^\s]*email[^\s]*/gi,
658
+ // Python — requests/urllib with HTTP
659
+ /requests\.(?:get|post)\s*\(\s*['"]http:\/\/(?!localhost)/gi,
660
+ /urllib\.request\.urlopen\s*\(\s*['"]http:\/\/(?!localhost)/gi,
661
+ // PHP — HTTP API calls
662
+ /(?:curl_setopt|file_get_contents|wp_remote_get)\s*\([^)]*['"]http:\/\/(?!localhost)/gi,
663
+ // Ruby — HTTP requests
664
+ /(?:Net::HTTP|HTTParty|Faraday)\.(?:get|post)\s*\([^)]*['"]http:\/\/(?!localhost)/gi
469
665
  ],
470
666
  fixSuggestion: 'Replace http:// with https:// for all API endpoints and resources',
471
667
  penalty: 'Security breach liability + COPPA penalties',
472
- languages: ['typescript', 'javascript', 'python', 'java', 'swift']
668
+ languages: ['typescript', 'javascript', 'python', 'java', 'swift', 'php', 'ruby']
473
669
  },
474
670
  // Rule 7: Passive Audio Recording
475
671
  // Fixed Sprint 4: Skip audio:false, skip AudioContext (playback only), skip import-only
@@ -485,11 +681,18 @@ exports.COPPA_RULES = [
485
681
  /AVAudioSession\s*\.\s*sharedInstance/gi,
486
682
  /AVAudioRecorder\s*\(/gi,
487
683
  /new\s+AudioRecord\s*\(/gi,
488
- /new\s+MediaRecorder\s*\(/gi
684
+ /new\s+MediaRecorder\s*\(/gi,
685
+ // Python — audio recording libraries
686
+ /(?:import|from)\s+(?:pyaudio|sounddevice|speech_recognition)/gi,
687
+ /sounddevice\.rec\s*\(/gi,
688
+ /Recognizer\(\)\.listen/gi,
689
+ // Java/Kotlin — Android AudioRecord
690
+ /AudioRecord\.Builder\s*\(\s*\)/gi,
691
+ /MediaRecorder\s*\(\s*\)\s*\.setAudioSource/gi
489
692
  ],
490
693
  fixSuggestion: 'Wrap audio recording in click handler and add parental consent check',
491
- penalty: '$51,744 per violation',
492
- languages: ['typescript', 'javascript', 'swift', 'kotlin']
694
+ penalty: '$53,088 per violation',
695
+ languages: ['typescript', 'javascript', 'swift', 'kotlin', 'python', 'java']
493
696
  },
494
697
  // Rule 8: Missing Privacy Policy Link
495
698
  // Fixed Sprint 4: Only flag forms with registration-related fields (email, password, name, DOB)
@@ -506,11 +709,17 @@ exports.COPPA_RULES = [
506
709
  // kebab-case / snake_case: sign-up-form, register_form, create-account-form
507
710
  /\b(?:sign[-_]?up|register|registration|create[-_]?account)[-_]form\b/gi,
508
711
  // HTML form elements with registration-related ids/classes
509
- /<form[^>]*(?:id|class|name)\s*=\s*["'][^"']*(?:register|signup|sign[-_]up|create[-_]account)[^"']*["']/gi
712
+ /<form[^>]*(?:id|class|name)\s*=\s*["'][^"']*(?:register|signup|sign[-_]up|create[-_]account)[^"']*["']/gi,
713
+ // Python — Django/Flask registration form classes
714
+ /class\s+(?:SignUp|Register|Registration|CreateAccount)Form\s*\(\s*(?:forms\.Form|ModelForm|FlaskForm)/gi,
715
+ // Ruby — Rails registration routes/controllers
716
+ /def\s+(?:sign_up|register|create_account)\b/gi,
717
+ // PHP — WordPress registration hooks
718
+ /(?:register_new_user|wp_create_user|user_register)\s*\(/gi
510
719
  ],
511
720
  fixSuggestion: 'Add <a href="/privacy">Privacy Policy</a> link to registration form footer',
512
721
  penalty: 'Compliance failure',
513
- languages: ['typescript', 'javascript', 'html', 'tsx', 'jsx', 'php']
722
+ languages: ['typescript', 'javascript', 'html', 'tsx', 'jsx', 'php', 'python', 'ruby']
514
723
  },
515
724
  // Rule 9: Contact Info Collection Without Parent Email
516
725
  {
@@ -520,15 +729,24 @@ exports.COPPA_RULES = [
520
729
  description: 'Forms collecting child email/phone must also require parent email for consent verification',
521
730
  patterns: [
522
731
  /(child_email|student_email)\s*:\s*String/gi,
523
- /(child_email|student_email|kid_email)\s*=/gi
732
+ /(child_email|student_email|kid_email)\s*=/gi,
733
+ // Python — Django model field for child contact
734
+ /(?:child_email|student_email|kid_email)\s*=\s*models\.(?:EmailField|CharField)/gi,
735
+ // PHP — child email in form processing
736
+ /\$(?:child_email|student_email|kid_email)\s*=\s*\$_(?:POST|GET|REQUEST)/gi,
737
+ // Ruby — child contact in params or model
738
+ /(?:child_email|student_email|kid_email)\s*=\s*params\[/gi,
739
+ // Java/Kotlin — child email field
740
+ /(?:private|var|val)\s+\w*\s*(?:childEmail|studentEmail|kidEmail)/gi
524
741
  ],
525
742
  fixSuggestion: 'Make parent_email required when collecting child contact information',
526
- penalty: '$51,744 per violation',
527
- languages: ['typescript', 'javascript', 'python']
743
+ penalty: '$53,088 per violation',
744
+ languages: ['typescript', 'javascript', 'python', 'php', 'ruby', 'java', 'kotlin']
528
745
  },
529
746
  // Rule 10: Insecure Default Passwords
530
747
  {
531
748
  id: 'coppa-sec-010',
749
+ is_active: false, // Sprint 16 W1: 100% FP (0/3 TP) — all hits are test fixture passwords, not production defaults
532
750
  name: 'Weak Default Student Passwords',
533
751
  severity: 'medium',
534
752
  description: 'Default passwords like "password", "123456", or "changeme" create security vulnerabilities',
@@ -557,50 +775,79 @@ exports.COPPA_RULES = [
557
775
  /Freshdesk|FreshChat/gi
558
776
  ],
559
777
  fixSuggestion: 'Disable chat widget for unauthenticated or under-13 users via conditional rendering',
560
- penalty: '$51,744 per violation',
778
+ penalty: '$53,088 per violation',
561
779
  languages: ['typescript', 'javascript', 'html']
562
780
  },
563
781
  // Rule 12: Biometric Data Collection
782
+ // Sprint 15: DISABLED — 0% TP precision (46 entries, ALL false positives).
783
+ // Pattern matches generic terms (FaceID, TouchID, FaceDetector) without
784
+ // distinguishing real biometric capture from SDK type definitions, AWS API
785
+ // schemas, and vendor library code. Rebuild requires AST-level context.
564
786
  {
565
787
  id: 'coppa-bio-012',
566
788
  name: 'Biometric Data Collection',
567
789
  severity: 'critical',
568
- description: 'Face recognition, voice prints, or gait analysis requires explicit parental consent. COPPA 2.0 explicitly classifies biometrics as PI.',
790
+ is_active: false,
791
+ description: 'COPPA 2025 explicitly adds biometric identifiers to the definition of PI. Face recognition, voice prints, gait analysis, behavioral biometrics (keystroke dynamics, mouse movement patterns), iris/pupil scanning, and health biometric APIs all require verifiable parental consent.',
569
792
  patterns: [
570
793
  /(?:import\s+.*from\s+['"]face-api\.js['"]|require\s*\(\s*['"]face-api\.js['"]\s*\))/gi,
571
794
  /LocalAuthentication.*evaluatePolicy/gi,
572
- /FaceID|TouchID/gi,
573
- /biometricAuth|BiometricAuth/g,
574
- /voicePrint|VoicePrint/g,
575
- /livenessCheck|LivenessCheck/g,
576
- /FaceMatcher|FaceDetector|FaceRecognizer/g
795
+ /(?:biometricAuth|BiometricAuth|biometricPrompt|BiometricPrompt)/g,
796
+ /voicePrint|VoicePrint|voiceRecognition|VoiceRecognition|speakerVerification/g,
797
+ /livenessCheck|LivenessCheck|livenessDetection/g,
798
+ /FaceMatcher|FaceDetector|FaceRecognizer|FaceLandmarks/g,
799
+ // Behavioral biometrics (COPPA 2025 expansion)
800
+ /keystrokeDynamic|keystrokePattern|typingBiometric|keyPressAnalysis/g,
801
+ /gaitAnalysis|gaitDetect|gaitRecognition|motionBiometric/g,
802
+ /mouseMovementPattern|cursorTracking|behavioralBiometric/g,
803
+ /irisScann?|pupilDetect|eyeTracking|gazeTracking/gi,
804
+ // Health biometric APIs
805
+ /(?:HKHealthStore|HKQuantityType|HealthKit).*(?:heartRate|stepCount|workout|sleep)/gi,
806
+ /(?:GoogleFit|FitnessOptions|HistoryClient).*(?:heartRate|steps|calories|sleep)/gi,
807
+ // Face detection libraries
808
+ /(?:import|require).*(?:face-api|@mediapipe\/face|@tensorflow\/tfjs-models\/face|deepface|insightface)/gi
577
809
  ],
578
- fixSuggestion: 'Ensure biometric data remains local-only (on-device) or obtain verifiable parental consent',
579
- penalty: '$51,744 per violation',
580
- languages: ['typescript', 'javascript', 'swift', 'kotlin']
810
+ fixSuggestion: 'Ensure biometric data remains local-only (on-device) or obtain verifiable parental consent per COPPA 2025. Do not transmit biometric identifiers to servers without separate parental consent.',
811
+ penalty: '$53,088 per violation',
812
+ languages: ['typescript', 'javascript', 'swift', 'kotlin', 'python', 'java']
581
813
  },
582
814
  // Rule 13: Push Notifications to Children
815
+ // Rebuilt Sprint 18: removed generic Notification constructor & requestPermission (94.4% FP).
816
+ // Now targets push subscription/registration APIs only.
583
817
  {
584
818
  id: 'coppa-notif-013',
585
819
  name: 'Direct Push Notifications Without Consent',
586
- severity: 'medium',
587
- description: 'Push notifications are "Online Contact Info" under COPPA 2.0. Direct notifications to children require parental consent.',
820
+ severity: 'low',
821
+ description: 'FTC declined to codify push notification restrictions in the 2025 final rule but stated it remains concerned about push notifications and engagement techniques. Best practice: gate push subscriptions behind parental consent. Maps to NGL Labs and Sendit enforcement patterns.',
588
822
  patterns: [
589
- /FirebaseMessaging\.subscribeToTopic/gi,
590
- /OneSignal\.promptForPushNotifications/gi,
591
- /sendPushNotification\s*\(/gi,
592
- /fcm\.send\s*\(/gi,
593
- /PushManager\.subscribe\s*\(/gi,
594
- /Notification\.requestPermission/gi,
595
- /new\s+Notification\s*\(/gi
823
+ /FirebaseMessaging\.subscribeToTopic/g,
824
+ /OneSignal\.(?:promptForPushNotifications|init)\s*\(/g,
825
+ /sendPushNotification\s*\(/g,
826
+ /fcm\.send\s*\(/g,
827
+ /PushManager\.subscribe\s*\(/g,
828
+ /pushManager\.subscribe\s*\(/g,
829
+ /messaging\(\)\.getToken\s*\(/g,
830
+ /registerForPushNotifications\s*\(/g,
831
+ /addEventListener\s*\(\s*['"]push['"]/g,
832
+ /expo-notifications/g,
833
+ /react-native-push-notification/g,
834
+ // Python — Django push notification libraries
835
+ /(?:import|from)\s+(?:webpush|pywebpush|push_notifications|django_push_notifications)/gi,
836
+ /webpush\.send\s*\(/gi,
837
+ // PHP — web-push-php library
838
+ /(?:new\s+)?WebPush\s*\(\s*\[/gi,
839
+ /\$webPush->sendOneNotification/gi,
840
+ // Ruby — web-push gem
841
+ /WebPush\.payload_send\s*\(/gi
596
842
  ],
597
843
  fixSuggestion: 'Gate push notification subscription behind parental dashboard setting',
598
- penalty: '$51,744 per violation',
599
- languages: ['typescript', 'javascript', 'swift', 'kotlin']
844
+ penalty: '$53,088 per violation',
845
+ languages: ['typescript', 'javascript', 'swift', 'kotlin', 'python', 'php', 'ruby']
600
846
  },
601
847
  // Rule 14: Unfiltered User Generated Content
602
848
  {
603
849
  id: 'coppa-ugc-014',
850
+ is_active: false, // Sprint 16 W1: 100% FP (0/3 TP) — all hits are API model property assignments, not child UGC
604
851
  name: 'UGC Upload Without PII Filter',
605
852
  severity: 'high',
606
853
  description: 'Text areas for "bio", "about me", or comments must pass through PII scrubbing before database storage',
@@ -613,7 +860,7 @@ exports.COPPA_RULES = [
613
860
  /commentForm.*submit|handleCommentSubmit/gi
614
861
  ],
615
862
  fixSuggestion: 'Add middleware hook for PII scrubbing (regex or AWS Comprehend) before database storage',
616
- penalty: '$51,744 per violation',
863
+ penalty: '$53,088 per violation',
617
864
  languages: ['typescript', 'javascript', 'python']
618
865
  },
619
866
  // Rule 15: XSS Vulnerabilities
@@ -628,11 +875,21 @@ exports.COPPA_RULES = [
628
875
  /\.innerHTML\s*=\s*\$\{/gi,
629
876
  /\.innerHTML\s*=\s*(?!['"]?\s*['"]?\s*;)(?!.*[Ll]ocal(?:ize|ization))(?!.*styleContent)[^;]*\b(?:user|input|query|param|req\.|request\.|body\.|data\.)\w*/gi,
630
877
  /\.html\s*\(\s*(?:user|req\.|request\.|params?\.)/gi,
631
- /v-html\s*=\s*["']?(?!.*sanitize)/gi
878
+ /v-html\s*=\s*["']?(?!.*sanitize)/gi,
879
+ // PHP — echo/print user input without escaping
880
+ /echo\s+\$_(?:GET|POST|REQUEST)\s*\[/gi,
881
+ // PHP — WordPress unescaped output
882
+ /<?php\s+echo\s+\$(?!esc_)/gi,
883
+ // Python — Django mark_safe with user input
884
+ /mark_safe\s*\([^)]*(?:request|user_input|params)/gi,
885
+ // Ruby — Rails raw() with user input
886
+ /raw\s*\(\s*(?:params|@\w*user|@\w*input)/gi,
887
+ // Ruby — html_safe on user input
888
+ /(?:params|request)\[.*\]\.html_safe/gi
632
889
  ],
633
890
  fixSuggestion: 'Use standard JSX rendering or DOMPurify before setting HTML content',
634
891
  penalty: 'Security failure',
635
- languages: ['typescript', 'javascript', 'tsx', 'jsx', 'vue']
892
+ languages: ['typescript', 'javascript', 'tsx', 'jsx', 'vue', 'php', 'python', 'ruby']
636
893
  },
637
894
  // Rule 16: Missing Cookie Consent
638
895
  // Fixed Sprint 4: Only flag tracking/PII cookies, not functional preferences (theme, view mode)
@@ -655,11 +912,17 @@ exports.COPPA_RULES = [
655
912
  // Java/Kotlin — Spring ResponseCookie
656
913
  /ResponseCookie\.from\s*\(/gi,
657
914
  // Generic — any language setting cookies with PII field names
658
- /(?:set_cookie|SetCookie|addCookie|add_cookie)\s*\([^)]*(?:user|email|token|session|track|auth|uid|analytics)/gi
915
+ /(?:set_cookie|SetCookie|addCookie|add_cookie)\s*\([^)]*(?:user|email|token|session|track|auth|uid|analytics)/gi,
916
+ // PHP — setcookie() with PII
917
+ /setcookie\s*\(\s*['"][^'"]*(?:user|email|token|track|auth|uid|analytics)[^'"]*['"]/gi,
918
+ // PHP — WordPress set_transient with PII
919
+ /set_transient\s*\(\s*['"][^'"]*(?:user|email|auth)[^'"]*['"]/gi,
920
+ // Ruby — Rails cookies[] with PII
921
+ /cookies\s*\[\s*:(?:user|email|token|session|track|auth|uid|analytics)\s*\]/gi
659
922
  ],
660
923
  fixSuggestion: 'Add a cookie consent banner component before setting tracking or PII cookies',
661
924
  penalty: 'Compliance warning',
662
- languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin']
925
+ languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin', 'php', 'ruby']
663
926
  },
664
927
  // Rule 17: External Links to Non-Child-Safe Sites
665
928
  // Fixed Sprint 4: Exclude privacy/TOS links, mailto, and common safe targets
@@ -705,13 +968,14 @@ exports.COPPA_RULES = [
705
968
  /(?:setUserId|set_user_id)\s*\([^)]*(?:email|\.name|phone)/gi
706
969
  ],
707
970
  fixSuggestion: 'Hash user ID and omit email/name from analytics payload',
708
- penalty: '$51,744 per violation',
971
+ penalty: '$53,088 per violation',
709
972
  languages: ['typescript', 'javascript', 'python', 'go', 'java', 'kotlin']
710
973
  },
711
974
  // Rule 19: School Official Consent Bypass
712
975
  // Fixed Sprint 4: Tightened patterns to match actual auth/registration flows only
713
976
  {
714
977
  id: 'coppa-edu-019',
978
+ is_active: false,
715
979
  name: 'Missing Teacher/School Verification',
716
980
  severity: 'medium',
717
981
  description: 'Teacher accounts using generic email (@gmail.com) bypass "School Official" consent exception',
@@ -728,6 +992,7 @@ exports.COPPA_RULES = [
728
992
  // Rule 20: Default Privacy Settings Public
729
993
  {
730
994
  id: 'coppa-default-020',
995
+ is_active: false,
731
996
  name: 'Default Public Profile Visibility',
732
997
  severity: 'critical',
733
998
  description: 'Default profile visibility must be private. COPPA 2.0 requires privacy by design.',
@@ -739,8 +1004,41 @@ exports.COPPA_RULES = [
739
1004
  /profileVisibility\s*=\s*['"]?(?:public|Public)['"]?/gi
740
1005
  ],
741
1006
  fixSuggestion: 'Change default visibility to "private" or false',
742
- penalty: '$51,744 per violation',
1007
+ penalty: '$53,088 per violation',
743
1008
  languages: ['typescript', 'javascript', 'python', 'swift']
1009
+ },
1010
+ // Rule 21: Targeted Advertising Without Separate Consent (Sprint 17 — COPPA 2025)
1011
+ {
1012
+ id: 'coppa-ads-021',
1013
+ name: 'Targeted Advertising Without Separate Consent',
1014
+ severity: 'critical',
1015
+ description: 'COPPA 2025 requires separate, specific opt-in consent before collecting children\'s PI for targeted advertising. Marketing consent cannot be bundled with general terms acceptance. Ad SDK initialization without a distinct consent flow is a violation.',
1016
+ patterns: [
1017
+ // Google AdMob
1018
+ /(?:import|require).*(?:google-mobile-ads|@react-native-firebase\/admob|react-native-admob)/gi,
1019
+ /(?:GADMobileAds|GADRequest|GADBannerView|GADInterstitial)\.\w+/gi,
1020
+ /MobileAds\.initialize|AdRequest\.Builder|AdView|InterstitialAd\.load/gi,
1021
+ // Meta Audience Network
1022
+ /(?:FBAudienceNetwork|FBAdView|FBInterstitialAd|FBNativeAd)/gi,
1023
+ /(?:import|require).*(?:react-native-fbads|@react-native-community\/fbads)/gi,
1024
+ // Unity Ads
1025
+ /UnityAds\.(?:initialize|show|load)|import\s+UnityAds/gi,
1026
+ // IronSource
1027
+ /IronSource\.(?:init|showRewardedVideo|loadInterstitial)|import\s+IronSource/gi,
1028
+ // AppLovin
1029
+ /AppLovin\.(?:initialize|showAd)|import.*AppLovinSDK/gi,
1030
+ // Chartboost
1031
+ /Chartboost\.(?:start|showInterstitial|cacheInterstitial)/gi,
1032
+ // AdColony
1033
+ /AdColony\.(?:configure|requestInterstitial)/gi,
1034
+ // Vungle
1035
+ /Vungle\.(?:init|playAd|loadAd)/gi,
1036
+ // MoPub
1037
+ /mopub\.(?:loadBanner|loadInterstitial)|MoPubInterstitial/gi
1038
+ ],
1039
+ fixSuggestion: 'Implement a separate, specific opt-in consent flow for advertising before initializing ad SDKs. Marketing consent must NOT be bundled with general terms acceptance. Use age-gated ad experiences or contextual-only advertising for children under 13.',
1040
+ penalty: '$53,088 per violation (COPPA 2025 separate advertising consent requirement)',
1041
+ languages: ['typescript', 'javascript', 'swift', 'kotlin', 'java', 'python']
744
1042
  }
745
1043
  ];
746
1044
  // Ethical Design Rules (Sprint 5 Preview)
@@ -803,6 +1101,7 @@ exports.ETHICAL_RULES = [
803
1101
  // ETHICAL-004: Manipulative Notifications
804
1102
  {
805
1103
  id: 'ETHICAL-004',
1104
+ is_active: false,
806
1105
  name: 'Manipulative Notification Language',
807
1106
  severity: 'medium',
808
1107
  description: 'Notifications using urgency ("Hurry!", "Missing out") manipulate children\'s fear of social exclusion',
@@ -819,6 +1118,7 @@ exports.ETHICAL_RULES = [
819
1118
  // ETHICAL-005: Artificial Scarcity
820
1119
  {
821
1120
  id: 'ETHICAL-005',
1121
+ is_active: false,
822
1122
  name: 'Artificial Scarcity / Countdowns',
823
1123
  severity: 'medium',
824
1124
  description: 'Fake scarcity ("Only 2 left!") and countdown timers pressure children into impulsive decisions',
@@ -843,6 +1143,7 @@ exports.AI_AUDIT_RULES = [
843
1143
  // AI-AUDIT-001: Placeholder Analytics
844
1144
  {
845
1145
  id: 'AI-AUDIT-001',
1146
+ is_active: false, // Sprint 15: Cut to proposed tier — zero GT
846
1147
  name: 'Placeholder Analytics Script',
847
1148
  severity: 'high',
848
1149
  description: 'AI-generated code frequently includes placeholder analytics (UA-XXXXX, G-XXXXXX, fbq) copied from training data. These may activate real tracking without child_directed_treatment flags.',
@@ -877,6 +1178,7 @@ exports.AI_AUDIT_RULES = [
877
1178
  // AI-AUDIT-003: Hallucinated URLs
878
1179
  {
879
1180
  id: 'AI-AUDIT-003',
1181
+ is_active: false, // Sprint 15: Cut to proposed tier — zero GT
880
1182
  name: 'Hallucinated/Placeholder API URLs',
881
1183
  severity: 'medium',
882
1184
  description: 'AI models often generate fake API endpoints (api.example.com, jsonplaceholder, reqres.in) that may be replaced with real endpoints without proper review.',
@@ -892,6 +1194,7 @@ exports.AI_AUDIT_RULES = [
892
1194
  // AI-AUDIT-004: Copy-Paste Tracking Boilerplate
893
1195
  {
894
1196
  id: 'AI-AUDIT-004',
1197
+ is_active: false, // Sprint 15: Cut to proposed tier — zero GT
895
1198
  name: 'Copy-Paste Tracking Boilerplate',
896
1199
  severity: 'high',
897
1200
  description: 'AI assistants reproduce common analytics setup patterns from training data. These often include user identification, event tracking, and session recording without consent flows.',
@@ -931,6 +1234,7 @@ exports.AI_AUDIT_RULES = [
931
1234
  // AI-AUDIT-006: TODO/FIXME Compliance Gaps
932
1235
  {
933
1236
  id: 'AI-AUDIT-006',
1237
+ is_active: false, // Sprint 15: Cut to proposed tier — zero GT
934
1238
  name: 'Unresolved Compliance TODOs',
935
1239
  severity: 'low',
936
1240
  description: 'AI-generated code often includes TODO/FIXME comments for compliance-related features (consent, age verification, privacy policy) that may ship unimplemented.',
@@ -986,6 +1290,7 @@ exports.AU_SBD_RULES = [
986
1290
  // AU-SBD-003: Unrestricted Direct Messaging
987
1291
  {
988
1292
  id: 'AU-SBD-003',
1293
+ is_active: false,
989
1294
  name: 'Unrestricted Direct Messaging for Minors',
990
1295
  severity: 'critical',
991
1296
  description: 'Direct messaging or chat functionality without safety controls (contact restrictions, message filtering, or parental oversight). The AU Online Safety Act requires platforms to take reasonable steps to prevent child exploitation in private communications.',
@@ -1002,6 +1307,7 @@ exports.AU_SBD_RULES = [
1002
1307
  // AU-SBD-004: Algorithmic Feeds Without Safety Guardrails
1003
1308
  {
1004
1309
  id: 'AU-SBD-004',
1310
+ is_active: false, // Sprint 15: Cut to proposed tier — zero GT
1005
1311
  name: 'Recommendation Algorithm Without Safety Guardrails',
1006
1312
  severity: 'high',
1007
1313
  description: 'Content recommendation or feed algorithms detected without safety filtering, content classification, or age-appropriate guardrails. AU SbD requires platforms to assess and mitigate algorithmic harms, particularly for young users.',
@@ -1019,6 +1325,7 @@ exports.AU_SBD_RULES = [
1019
1325
  // AU-SBD-005: Missing Digital Wellbeing / Screen Time Controls
1020
1326
  {
1021
1327
  id: 'AU-SBD-005',
1328
+ is_active: false, // Sprint 16 W1: 18.2% precision (2/11 TP). All 9 FPs are media player autoplay (jellyfin). Pattern too broad — rebuild needed.
1022
1329
  name: 'Engagement Features Without Time Awareness',
1023
1330
  severity: 'medium',
1024
1331
  description: 'High-engagement features (autoplay, continuous scrolling, notifications) detected without corresponding digital wellbeing controls (screen time limits, break reminders, usage dashboards). AU SbD encourages platforms to build in digital wellbeing tools.',
@@ -1036,6 +1343,7 @@ exports.AU_SBD_RULES = [
1036
1343
  // AU-SBD-006: Location Sharing Without Explicit Opt-In
1037
1344
  {
1038
1345
  id: 'AU-SBD-006',
1346
+ is_active: false,
1039
1347
  name: 'Location Data Without Explicit Consent',
1040
1348
  severity: 'critical',
1041
1349
  description: 'Location data collection or sharing enabled without explicit, informed opt-in. AU SbD and the Privacy Act 1988 require data minimization, especially for children\'s geolocation data — location should never be collected by default.',
@@ -1097,6 +1405,48 @@ function parseHaloignore(content) {
1097
1405
  }
1098
1406
  return config;
1099
1407
  }
1408
+ /**
1409
+ * Sprint 10: Check if a file path is in a vendored/third-party library directory.
1410
+ * Vendor files are auto-suppressed to eliminate false positives from code the project doesn't control.
1411
+ */
1412
+ function isVendorPath(filePath) {
1413
+ const normalized = filePath.replace(/\\/g, '/');
1414
+ return /(^|\/)node_modules\//.test(normalized) ||
1415
+ /(^|\/)vendor\//.test(normalized) ||
1416
+ /(^|\/)bower_components\//.test(normalized) ||
1417
+ /(^|\/)third[_-]?party\//.test(normalized) ||
1418
+ /(^|\/)\.bundle\//.test(normalized) ||
1419
+ /(^|\/)Pods\//.test(normalized) ||
1420
+ /(^|\/)external\//.test(normalized) ||
1421
+ /(^|\/)deps\//.test(normalized) ||
1422
+ /(^|\/)\.yarn\//.test(normalized) ||
1423
+ /(^|\/)\.pnpm\//.test(normalized) ||
1424
+ // Minified files are almost always vendored/built
1425
+ /[.\-]min\.(js|css)$/.test(normalized) ||
1426
+ /\.bundle\.js$/.test(normalized) ||
1427
+ // Well-known vendored library directories (catches lib/google2-service/, lib/aws-sdk/, etc.)
1428
+ /(^|\/)lib\/(google[^/]*|aws[^/]*|yui[^/]*|php[^/]*|jquery[^/]*|bootstrap[^/]*|tinymce[^/]*|h5p[^/]*|firebase[^/]*|simplepie[^/]*|tcpdf[^/]*|guzzle[^/]*|psr[^/]*|font-?awesome[^/]*)\//i.test(normalized) ||
1429
+ // H5P vendored libraries (stored under h5p/h5plib/, not lib/)
1430
+ /(^|\/)h5plib\//.test(normalized);
1431
+ }
1432
+ /**
1433
+ * Sprint 11a: Check if a file path is in a documentation generator output directory.
1434
+ * Doc generator templates and output contain external links, code examples, etc. that are
1435
+ * developer-facing, not child-facing content. Flagging these is a false positive.
1436
+ */
1437
+ function isDocGeneratorPath(filePath) {
1438
+ const normalized = filePath.replace(/\\/g, '/');
1439
+ return /(^|\/)(?:jsdoc|typedoc|apidoc|javadoc|doxygen|sphinx|_build|_static)(?:\/|\.)/i.test(normalized) ||
1440
+ // Documentation template files
1441
+ /(?:^|\/)(?:jsdoc|typedoc|apidoc)\.(?:html|hbs|tmpl|ejs)$/i.test(normalized) ||
1442
+ // Generated API docs
1443
+ /(^|\/)(?:docs?\/(?:api|generated|reference|build))\//i.test(normalized) ||
1444
+ // Sphinx build output
1445
+ /(^|\/)_build\/html\//i.test(normalized) ||
1446
+ // Common doc generator config files with template content
1447
+ /(?:^|\/)\.jsdoc\.(?:json|js)$/i.test(normalized) ||
1448
+ /(?:^|\/)typedoc\.json$/i.test(normalized);
1449
+ }
1100
1450
  /**
1101
1451
  * Check if a file should be ignored based on .haloignore config
1102
1452
  */
@@ -1140,6 +1490,12 @@ class HaloEngine {
1140
1490
  constructor(config = {}) {
1141
1491
  this.config = config;
1142
1492
  this.treeSitter = new TreeSitterParser();
1493
+ this.astEngine = new ast_engine_1.ASTRuleEngine();
1494
+ this.contextAnalyzer = new context_analyzer_1.ContextAnalyzer({
1495
+ framework: config.framework,
1496
+ historicalFPRates: config.historicalFPRates,
1497
+ suppressionRates: config.suppressionRates,
1498
+ });
1143
1499
  // Rule loading priority chain:
1144
1500
  // 1. config.loadedRules — pre-compiled rules from CLI API fetch
1145
1501
  // 2. config.rulesPath — YAML file (legacy)
@@ -1175,6 +1531,35 @@ class HaloEngine {
1175
1531
  this.rules = [...this.rules, ...exports.AU_SBD_RULES];
1176
1532
  }
1177
1533
  }
1534
+ // Sprint 15: Filter out disabled rules.
1535
+ // Static list ensures rules are disabled regardless of source (API, cache, bundled JSON, hardcoded).
1536
+ // is_active flag handles hardcoded rules; DISABLED_RULE_IDS handles all sources.
1537
+ // Sprint 15: Zero-GT rule actions
1538
+ // DISABLE: zero GT entries, cannot validate precision
1539
+ // CUT (to proposed tier): zero GT entries, pattern too broad for production
1540
+ const DISABLED_RULE_IDS = new Set([
1541
+ 'coppa-bio-012', // 0% precision, all FP — rebuild needed
1542
+ // coppa-notif-013 removed — rebuilt Sprint 18 with push-only patterns
1543
+ 'coppa-sec-010', // Sprint 16 W1: 100% FP (0/3 TP) — all hits wrong
1544
+ 'coppa-ugc-014', // Sprint 16 W1: 100% FP (0/3 TP) — all hits wrong
1545
+ 'coppa-edu-019', // Zero GT — teacher registration patterns too narrow to validate
1546
+ 'coppa-default-020', // Zero GT — overlaps with AU-SBD-001 default public profiles
1547
+ 'ETHICAL-004', // Zero GT — manipulative notification language too broad
1548
+ 'ETHICAL-005', // Zero GT — artificial scarcity patterns too broad
1549
+ 'AU-SBD-003', // Zero GT — DM detection patterns too broad
1550
+ 'AU-SBD-005', // Sprint 16 W1: 18.2% precision — autoplay pattern fires on media player APIs
1551
+ 'AU-SBD-006', // Zero GT — location sharing patterns too broad
1552
+ // CUT to proposed tier (zero GT, pattern too broad for production)
1553
+ 'AI-AUDIT-001', // Placeholder analytics — low real-world signal
1554
+ 'AI-AUDIT-003', // Hallucinated URLs — low real-world signal
1555
+ 'AI-AUDIT-004', // Copy-paste tracking boilerplate — too broad
1556
+ 'AI-AUDIT-006', // TODO/FIXME compliance — noise in real codebases
1557
+ 'AU-SBD-004', // Algorithmic feeds — pattern too broad for production
1558
+ // Sprint 17 Day 0: Zero-GT rules validated against 5 repos — patterns don't match real-world code
1559
+ 'ut-sb142-003', // Default DM access — patterns use naming conventions no real app uses (0 hits across Moodle, Discourse, Rocket.Chat, Element, Mastodon)
1560
+ 'ut-sb142-004', // Missing parental tools — 2 hits across 5 repos, both FP. API/bundled pattern mismatch. Needs rebuild
1561
+ ]);
1562
+ this.rules = this.rules.filter(r => r.is_active !== false && !DISABLED_RULE_IDS.has(r.id));
1178
1563
  if (config.severityFilter) {
1179
1564
  this.rules = this.rules.filter(r => config.severityFilter.includes(r.severity));
1180
1565
  }
@@ -1207,6 +1592,8 @@ class HaloEngine {
1207
1592
  packs.push('ai-audit');
1208
1593
  if (config.sectorAuSbd)
1209
1594
  packs.push('au-sbd');
1595
+ if (config.sectorAuOsa)
1596
+ packs.push('au-osa');
1210
1597
  return packs;
1211
1598
  }
1212
1599
  /**
@@ -1220,18 +1607,17 @@ class HaloEngine {
1220
1607
  */
1221
1608
  scanFileWithAST(filePath, content, language = 'typescript') {
1222
1609
  // First get regex-based violations
1223
- const violations = this.scanFile(filePath, content);
1224
- // Then enhance with AST analysis
1610
+ let violations = this.scanFile(filePath, content);
1611
+ // Sprint 8: Enhanced AST analysis with ASTRuleEngine + framework overrides + ContextAnalyzer
1225
1612
  try {
1226
- const identifiers = this.treeSitter.extractIdentifiers(content);
1613
+ const tree = this.treeSitter.parse(content, language);
1614
+ // Legacy Sprint 1: AST-based detection for social login (signInWithPopup)
1227
1615
  const functionCalls = this.treeSitter.findFunctionCalls(content, 'signInWithPopup');
1228
- // Add AST-based detection for social login
1229
1616
  for (const call of functionCalls) {
1230
- // Check if already detected by regex
1231
1617
  const exists = violations.some(v => v.ruleId === 'coppa-auth-001' &&
1232
1618
  v.line === call.line);
1233
1619
  if (!exists) {
1234
- const authRule = exports.COPPA_RULES.find(r => r.id === 'coppa-auth-001');
1620
+ const authRule = this.rules.find(r => r.id === 'coppa-auth-001') || exports.COPPA_RULES.find(r => r.id === 'coppa-auth-001');
1235
1621
  if (authRule) {
1236
1622
  violations.push({
1237
1623
  ruleId: 'coppa-auth-001',
@@ -1253,10 +1639,63 @@ class HaloEngine {
1253
1639
  }
1254
1640
  }
1255
1641
  }
1642
+ // Sprint 8: Run ASTRuleEngine analysis on every violation to classify FPs
1643
+ if (this.config.astAnalysis !== false) {
1644
+ for (const violation of violations) {
1645
+ try {
1646
+ const astResult = this.astEngine.analyzeViolationWithPath(violation.ruleId, filePath, content, {
1647
+ ruleId: violation.ruleId,
1648
+ line: violation.line,
1649
+ column: violation.column,
1650
+ codeSnippet: violation.codeSnippet,
1651
+ }, tree);
1652
+ violation.astVerdict = astResult.verdict;
1653
+ violation.astConfidence = astResult.confidence;
1654
+ violation.astReason = astResult.reason;
1655
+ // Update matchType to reflect AST involvement
1656
+ if (astResult.verdict !== 'regex_only') {
1657
+ violation.matchType = 'hybrid';
1658
+ }
1659
+ }
1660
+ catch (ruleError) {
1661
+ violation.astVerdict = 'regex_only';
1662
+ violation.astConfidence = 0;
1663
+ violation.astReason = 'AST analysis failed for this violation';
1664
+ }
1665
+ }
1666
+ }
1256
1667
  }
1257
1668
  catch (error) {
1258
- // If AST parsing fails, fall back to regex-only
1669
+ // If AST parsing fails entirely, fall back to regex-only
1259
1670
  console.warn('AST parsing failed, using regex-only mode:', error);
1671
+ for (const v of violations) {
1672
+ v.astVerdict = 'regex_only';
1673
+ v.astConfidence = 0;
1674
+ }
1675
+ }
1676
+ // Sprint 10: Framework overrides now applied in scanFile() for ALL file types.
1677
+ // No longer needed here — scanFile() already filtered/downgraded regex violations.
1678
+ // AST-added violations (e.g. signInWithPopup → auth-001) are not in any framework profile.
1679
+ // Sprint 8: ContextAnalyzer — compute confidence scores
1680
+ const violationInputs = violations.map(v => ({
1681
+ ruleId: v.ruleId,
1682
+ severity: v.severity,
1683
+ line: v.line,
1684
+ column: v.column,
1685
+ codeSnippet: v.codeSnippet,
1686
+ astVerdict: v.astVerdict,
1687
+ astConfidence: v.astConfidence,
1688
+ astReason: v.astReason,
1689
+ frameworkSuppressed: v.frameworkSuppressed,
1690
+ }));
1691
+ const confidenceResults = this.contextAnalyzer.analyzeFile(violationInputs, filePath, content);
1692
+ for (let i = 0; i < violations.length; i++) {
1693
+ const result = confidenceResults.get(i);
1694
+ if (result) {
1695
+ violations[i].confidence = result.confidence;
1696
+ violations[i].confidenceInterpretation = result.interpretation;
1697
+ violations[i].confidenceReason = result.reason;
1698
+ }
1260
1699
  }
1261
1700
  return violations;
1262
1701
  }
@@ -1275,8 +1714,66 @@ class HaloEngine {
1275
1714
  if (ignoreConfig && shouldIgnoreFile(filePath, ignoreConfig)) {
1276
1715
  return [];
1277
1716
  }
1278
- const violations = [];
1717
+ // Sprint 10: Skip vendored/third-party library files entirely
1718
+ // These produce massive false positives (84% FP rate on Moodle — all from lib/, vendor/ paths)
1719
+ if (isVendorPath(filePath)) {
1720
+ return [];
1721
+ }
1722
+ // Sprint 11a: Skip documentation generator output files
1723
+ // JSDoc templates, Sphinx output, TypeDoc pages — developer tools, not child-facing content
1724
+ if (isDocGeneratorPath(filePath)) {
1725
+ return [];
1726
+ }
1727
+ let violations = [];
1279
1728
  const lines = content.split('\n');
1729
+ // Sprint 13a: Consolidated file classification (Pre-filter A+)
1730
+ // All heuristics are now in classifyFile() for consistency and future Option C upgrade
1731
+ const normalizedPath = filePath.replace(/\\/g, '/');
1732
+ const classification = classifyFile(filePath, content.substring(0, 3000));
1733
+ // Sprint 13a: Skip files that should never be scanned
1734
+ // (Django migrations, build output, type definitions, CI configs)
1735
+ if (classification.shouldSkip) {
1736
+ return [];
1737
+ }
1738
+ // Rules that commonly false-positive in test/fixture/mock files
1739
+ const TEST_FP_RULES = new Set([
1740
+ 'coppa-sec-010', // Weak passwords in test fixtures
1741
+ 'coppa-tracking-003', // Analytics snippets in test mocks
1742
+ 'coppa-auth-001', // Auth patterns in test helpers
1743
+ 'coppa-sec-015', // XSS patterns in security test cases
1744
+ 'coppa-sec-006', // Sprint 11a: HTTP URLs in test config (e.g., http://example-storage.com in envs/test.py)
1745
+ ]);
1746
+ // Rules that should be suppressed in consent/compliance implementation files
1747
+ // These rules flag patterns that are REQUIRED in consent implementations
1748
+ const CONSENT_SUPPRESSED_RULES = new Set([
1749
+ 'coppa-cookies-016', // Cookie consent banners MUST set cookies to track consent state
1750
+ 'coppa-tracking-003', // Consent management may reference tracking to gate it
1751
+ 'coppa-data-002', // Consent flows may reference PII fields to declare collection scope
1752
+ ]);
1753
+ // Rules that FP in admin/instructor code — these patterns exist for managing users, not collecting child data
1754
+ const ADMIN_FP_RULES = new Set([
1755
+ 'coppa-flow-009', // Contact collection: admin reading existing user emails is not child contact flow
1756
+ 'coppa-data-002', // PII in URLs: admin user lookup endpoints are internal tools
1757
+ 'coppa-ui-008', // Registration forms: admin user management is not child registration
1758
+ 'coppa-sec-006', // Sprint 16: HTTP URLs in admin/instructor views are internal tooling, not child-facing
1759
+ ]);
1760
+ // ── Graduated Heuristics (Sprint 13b) ──────────────────────────────────
1761
+ // Auto-promoted from AI Review Board via the graduation pipeline.
1762
+ // Each pattern was dismissed consistently by the AI reviewer and passed
1763
+ // MVP validation criteria (min dismissals, min confidence, zero false confirmations).
1764
+ // These replace AI review calls with deterministic checks: zero cost, instant execution.
1765
+ // Graduated pattern: admin-path
1766
+ // 193 consistent dismissals | avg confidence 9.0/10 | 0 false confirmations
1767
+ // AI reviewer cost per check: ~$0.014 → now $0.00
1768
+ const GRADUATED_ADMIN_RULES = new Set([
1769
+ 'ut-sb142-001', // UT SB-142 age verification: admin panels are not child-facing
1770
+ 'ut-sb142-002', // UT SB-142 parental consent: admin interfaces require staff auth
1771
+ ]);
1772
+ // Graduated pattern: test-file
1773
+ // 27 consistent dismissals | avg confidence 9.0/10 | 0 false confirmations
1774
+ const GRADUATED_TEST_RULES = new Set([
1775
+ 'ut-sb142-001', // UT SB-142 age verification: test utilities are not production child-facing code
1776
+ ]);
1280
1777
  // Parse suppression comments
1281
1778
  const suppressions = parseSuppressions(content);
1282
1779
  // Track lines with global suppressions (at top of file)
@@ -1287,8 +1784,52 @@ class HaloEngine {
1287
1784
  }
1288
1785
  }
1289
1786
  for (const rule of this.rules) {
1787
+ // Sprint 10: Skip rules that don't target this file's language
1788
+ if (rule.languages && rule.languages.length > 0 && classification.language !== 'unknown') {
1789
+ if (!rule.languages.includes(classification.language)) {
1790
+ continue;
1791
+ }
1792
+ }
1793
+ // Sprint 8+13a: Skip rules that commonly FP in test/fixture/mock/factory files
1794
+ if ((classification.isTest || classification.isMockOrFactory || classification.isFixtureOrSeed) && TEST_FP_RULES.has(rule.id)) {
1795
+ continue;
1796
+ }
1797
+ // Sprint 13a: Skip ALL rules in Storybook stories (UI demos, not production code)
1798
+ if (classification.isStorybook) {
1799
+ continue;
1800
+ }
1801
+ // Sprint 10b: Skip rules that FP in consent/compliance implementation files
1802
+ // Consent forms MUST set cookies, reference tracking, and handle PII — that's the solution, not the problem
1803
+ if (classification.isConsent && CONSENT_SUPPRESSED_RULES.has(rule.id)) {
1804
+ continue;
1805
+ }
1806
+ // Sprint 11a: Skip rules that FP in admin/instructor backend code
1807
+ // Admin functions managing existing user data are not child-facing contact collection flows
1808
+ if (classification.isAdmin && ADMIN_FP_RULES.has(rule.id)) {
1809
+ continue;
1810
+ }
1811
+ // Sprint 13b Graduated: admin-path — admin files are not child-facing
1812
+ // (Promoted from AI Review Board: 193 dismissals, confidence 9.0)
1813
+ if (classification.isAdmin && GRADUATED_ADMIN_RULES.has(rule.id)) {
1814
+ continue;
1815
+ }
1816
+ // Sprint 15: AU-SBD-002 fix — skip in admin/vendor code (85% FP rate from these contexts)
1817
+ if ((classification.isAdmin || classification.isVendor) && rule.id === 'AU-SBD-002') {
1818
+ continue;
1819
+ }
1820
+ // Sprint 13b Graduated: test-file — test/fixture files are not production code
1821
+ // (Promoted from AI Review Board: 27 dismissals, confidence 9.0)
1822
+ if ((classification.isTest || classification.isMockOrFactory || classification.isFixtureOrSeed) && GRADUATED_TEST_RULES.has(rule.id)) {
1823
+ continue;
1824
+ }
1290
1825
  // Special handling for coppa-retention-005: skip if schema has retention fields
1291
1826
  if (rule.id === 'coppa-retention-005') {
1827
+ // Sprint 11a: Skip Python models annotated with no_pii docstrings
1828
+ // OpenEdX convention: `.. no_pii:` in class docstring means model contains no PII
1829
+ // These models have User FKs but only store non-PII data (e.g., calendar sync preferences)
1830
+ if (classification.language === 'python' && /(?:\.\.\s*no_pii\s*:|#\s*no[_-]?pii\b|no_pii\s*=\s*True)/i.test(content)) {
1831
+ continue;
1832
+ }
1292
1833
  // Check if the content has retention-related fields
1293
1834
  const hasRetention = /deletedAt|deleted_at|expires|TTL|retention|paranoid|expiration/i.test(content);
1294
1835
  if (!hasRetention) {
@@ -1342,6 +1883,82 @@ class HaloEngine {
1342
1883
  if (isOwnDomain)
1343
1884
  continue;
1344
1885
  }
1886
+ // Sprint 11a: For coppa-ext-017: skip IE conditional comments
1887
+ // <!--[if lte IE 9]> ... <![endif]--> are deprecated browser banners, not child-facing links
1888
+ // Seen in: OpenEdX templates with Chrome/Firefox download links for IE users
1889
+ if (rule.id === 'coppa-ext-017') {
1890
+ // Check if we're inside an IE conditional comment block
1891
+ const beforeMatch = content.substring(Math.max(0, match.index - 500), match.index);
1892
+ const afterMatch = content.substring(match.index, Math.min(content.length, match.index + 500));
1893
+ if (/<!--\s*\[if\s+(?:lt|lte|gt|gte|!)?\s*IE/i.test(beforeMatch) && /\[endif\]\s*-->/i.test(afterMatch)) {
1894
+ continue;
1895
+ }
1896
+ // Also skip if the line itself contains the IE conditional pattern
1897
+ if (/<!--\s*\[if\s+(?:lt|lte|gt|gte|!)?\s*IE/i.test(lineContent)) {
1898
+ continue;
1899
+ }
1900
+ }
1901
+ // Sprint 10+16: For coppa-sec-006: skip reserved/example/documentation/standards domains
1902
+ // These are IANA-reserved, standards bodies, or universally used in documentation and are never real endpoints
1903
+ // Require http:// before the domain to avoid matching domains in email addresses etc.
1904
+ if (rule.id === 'coppa-sec-006') {
1905
+ const checkText = (match[0] + ' ' + lineContent).toLowerCase();
1906
+ if (/http:\/\/(www\.)?(example\.(com|org|net)|localhost(:\d|\/|['"\s]|$)|127\.0\.0\.1|0\.0\.0\.0|\[::1\]|httpbin\.org|jsonplaceholder\.typicode\.com|testserver(\.com)?[\/\s'"]|imsglobal\.org|flickr\.com|w3\.org)/.test(checkText)) {
1907
+ continue;
1908
+ }
1909
+ // Sprint 16: Skip devstack/Docker service URLs (development-only, never production)
1910
+ if (/devstack/.test(checkText)) {
1911
+ continue;
1912
+ }
1913
+ // Sprint 16: Skip XML schema/namespace URLs (*.xsd, /xmlns/, /schema/)
1914
+ // These are namespace declarations, not API endpoints
1915
+ if (/\.xsd['"\s,)]|\/xmlns\/|\/schema\//.test(checkText)) {
1916
+ continue;
1917
+ }
1918
+ }
1919
+ // Sprint 10: For coppa-sec-015 (XSS): skip innerHTML assignments that are already sanitized
1920
+ // Y.Escape.html(), DOMPurify.sanitize(), etc. show the developer IS handling XSS
1921
+ if (rule.id === 'coppa-sec-015') {
1922
+ if (/(?:escape\.html|dompurify|sanitize|purify)\s*\(/i.test(lineContent)) {
1923
+ continue;
1924
+ }
1925
+ }
1926
+ // Sprint 10: For coppa-ui-008: skip admin tool registration (LTI cartridge, Brickfield, etc.)
1927
+ // These are admin/developer-facing forms, not child-facing registration
1928
+ if (rule.id === 'coppa-ui-008') {
1929
+ if (/cartridge[_-]?registration|brickfield|registersetting|tool_configure/i.test(lineContent) ||
1930
+ /cartridge[_-]?registration|brickfield|registersetting/i.test(normalizedPath)) {
1931
+ continue;
1932
+ }
1933
+ }
1934
+ // Sprint 10b: For coppa-cookies-016: skip consent implementations
1935
+ // Files/code implementing cookie consent are the solution, not the problem
1936
+ if (rule.id === 'coppa-cookies-016') {
1937
+ // File path patterns: cookie-consent.js, gdpr-banner.js, etc.
1938
+ if (/cookie[_-]?(consent|law|notice|banner|policy|popup|gdpr|preferences)/i.test(normalizedPath) ||
1939
+ /(?:consent|gdpr|ccpa|privacy)[_-]?(?:banner|popup|modal|notice|manager)/i.test(normalizedPath)) {
1940
+ continue;
1941
+ }
1942
+ // Line-level: function/variable names showing consent management intent
1943
+ if (/(?:handleConsent|acceptCookies|declineCookies|cookieBanner|consentManager|cookiePreferences|saveCookiePreferences|showCookieNotice|getCookieConsent|setCookieConsent)\s*[=(]/i.test(lineContent) ||
1944
+ /(?:accept|decline|preferences|banner|consent)\s*[=:]/i.test(lineContent) && /cookie/i.test(lineContent)) {
1945
+ continue;
1946
+ }
1947
+ // Import-level: known consent management libraries
1948
+ if (/(?:require|import).*(?:cookieconsent|react-cookie-consent|onetrust|cookiebot|osano|cookie-notice|gdpr-cookie)/i.test(content.substring(0, 2000))) {
1949
+ continue;
1950
+ }
1951
+ // Sprint 11a: Skip cookie DELETION patterns — setting expires to past or max-age=0/-1 is cleanup, not tracking
1952
+ // Seen in: Moodle submit.js — code that removes cookies flagged as if setting them
1953
+ if (/max[_-]?age\s*[=:]\s*['"]?\s*(-\d+|0)\b/i.test(lineContent) ||
1954
+ /expires\s*[=:]\s*['"]?\s*(?:Thu,\s*01\s+Jan\s+1970|new\s+Date\s*\(\s*0\s*\))/i.test(lineContent) ||
1955
+ /new\s+Date\s*\(\s*0\s*\)/.test(lineContent) && /expires/i.test(lineContent) ||
1956
+ /=\s*['"]?\s*deleted\b/i.test(lineContent) ||
1957
+ /(?:delete|remove|clear|expire|destroy)[_-]?cookie/i.test(lineContent) ||
1958
+ /\.cookie\s*=\s*['"][^'"]*;\s*expires\s*=\s*['"]?\s*Thu,\s*01/i.test(lineContent)) {
1959
+ continue;
1960
+ }
1961
+ }
1345
1962
  // Check if this violation already exists (avoid duplicates)
1346
1963
  const exists = violations.some(v => v.ruleId === rule.id &&
1347
1964
  v.line === lineNumber &&
@@ -1355,6 +1972,14 @@ class HaloEngine {
1355
1972
  if (suppressed) {
1356
1973
  suppressionComment = suppressions.get(lineNumber);
1357
1974
  }
1975
+ // Sprint 11b: Extract surrounding code context (5 lines before + 5 after)
1976
+ const contextStart = Math.max(0, lineNumber - 6); // lineNumber is 1-indexed
1977
+ const contextEnd = Math.min(lines.length, lineNumber + 5);
1978
+ const surroundingLines = lines.slice(contextStart, contextEnd).map((l, i) => {
1979
+ const ln = contextStart + i + 1;
1980
+ const marker = ln === lineNumber ? '>>>' : ' ';
1981
+ return `${marker} ${ln}: ${l}`;
1982
+ });
1358
1983
  violations.push({
1359
1984
  ruleId: rule.id,
1360
1985
  ruleName: rule.name,
@@ -1373,11 +1998,43 @@ class HaloEngine {
1373
1998
  matchType: 'regex',
1374
1999
  fixability: getRemediation(rule.id).fixability,
1375
2000
  remediation: getRemediation(rule.id),
2001
+ // Sprint 11b: Enriched context for AI Review Board
2002
+ surroundingCode: surroundingLines.join('\n'),
2003
+ fileMetadata: {
2004
+ language: classification.language,
2005
+ isVendor: classification.isVendor,
2006
+ isTest: classification.isTest,
2007
+ isAdmin: classification.isAdmin,
2008
+ isConsent: classification.isConsent,
2009
+ isDocGenerator: classification.isDocGenerator,
2010
+ detectedFramework: this.config.framework,
2011
+ // Sprint 13a: Extended classification data
2012
+ isMock: classification.isMockOrFactory,
2013
+ isFixture: classification.isFixtureOrSeed,
2014
+ isCIConfig: classification.isCIConfig,
2015
+ isBuildOutput: classification.isBuildOutput,
2016
+ isTypeDefinition: classification.isTypeDefinition,
2017
+ isStorybook: classification.isStorybook,
2018
+ },
1376
2019
  });
1377
2020
  }
1378
2021
  }
1379
2022
  }
1380
2023
  }
2024
+ // Sprint 10: Apply framework overrides to ALL file types (Python, PHP, HTML, etc.)
2025
+ // Previously this only ran inside scanFileWithAST() for JS/TS files.
2026
+ if (this.config.framework) {
2027
+ const result = (0, frameworks_1.applyFrameworkOverrides)(violations, this.config.framework);
2028
+ violations = result.violations;
2029
+ }
2030
+ // Sprint 12b: Dedup AI-GOVERNANCE-002 / AI-RISK-003 overlap
2031
+ // If both fire on same file+line, suppress AI-RISK-003 (AI-GOVERNANCE-002 subsumes it)
2032
+ const govViolations = new Set(violations
2033
+ .filter(v => v.ruleId === 'AI-GOVERNANCE-002')
2034
+ .map(v => `${v.filePath}:${v.line}`));
2035
+ if (govViolations.size > 0) {
2036
+ violations = violations.filter(v => !(v.ruleId === 'AI-RISK-003' && govViolations.has(`${v.filePath}:${v.line}`)));
2037
+ }
1381
2038
  // Filter suppressed if configured
1382
2039
  if (this.config.suppressions?.enabled !== false && !this.config.includeSuppressed) {
1383
2040
  const unsuppressed = violations.filter(v => !v.suppressed);