circle-ir 3.27.1 → 3.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/analyzer.js CHANGED
@@ -51,7 +51,7 @@
51
51
  * – MissingGuardDomPass — false positives in framework-auth codebases (see pass file)
52
52
  * – FeatureEnvyPass — fires on legitimate delegation patterns (see pass file)
53
53
  */
54
- import { initParser, parse, extractMeta, extractTypes, extractCalls, extractImports, extractExports, buildCFG, buildDFG, collectAllNodes, } from './core/index.js';
54
+ import { initParser, parse, disposeTree, extractMeta, extractTypes, extractCalls, extractImports, extractExports, buildCFG, buildDFG, collectAllNodes, } from './core/index.js';
55
55
  import { analyzeTaint, getDefaultConfig, detectUnresolved, analyzeConstantPropagation, isFalsePositive, } from './analysis/index.js';
56
56
  import { registerBuiltinPlugins } from './languages/index.js';
57
57
  import { logger } from './utils/logger.js';
@@ -249,140 +249,147 @@ export async function analyze(code, filePath, language, options = {}) {
249
249
  return analyzeHtmlFile(code, filePath, options);
250
250
  }
251
251
  logger.debug('Analyzing file', { filePath, language, codeLength: code.length });
252
- // Parse the code
252
+ // Parse the code. The Tree holds tree-sitter WASM memory; we MUST dispose
253
+ // it before returning, otherwise the WASM heap grows unboundedly across
254
+ // many analyze() calls in the same process (issue #16).
253
255
  const tree = await parse(code, language);
254
- logger.trace('Parsed AST', { rootNodeType: tree.rootNode.type });
255
- // Collect all node types in a single traversal for better performance
256
- const nodeCache = collectAllNodes(tree.rootNode, getNodeTypesForLanguage(language));
257
- // Extract all IR components
258
- const meta = extractMeta(code, tree, filePath, language);
259
- const types = extractTypes(tree, nodeCache, language);
260
- const calls = extractCalls(tree, nodeCache, language);
261
- const imports = extractImports(tree, language);
262
- const exports = extractExports(types);
263
- const cfg = buildCFG(tree, language);
264
- const dfg = buildDFG(tree, nodeCache, language);
265
- // Build CodeGraph once shared across all passes.
266
- // Taint is empty at construction time; sources/sinks/sanitizers are populated by passes.
267
- const graph = new CodeGraph({
268
- meta, types, calls, cfg, dfg,
269
- taint: { sources: [], sinks: [], sanitizers: [] },
270
- imports, exports, unresolved: [], enriched: {},
271
- });
272
- const config = options.taintConfig ?? getDefaultConfig();
273
- // Build the analysis pipeline with configurable pass options
274
- const disabledPasses = new Set(options.disabledPasses ?? []);
275
- const passOpts = options.passOptions ?? {};
276
- const pipeline = new AnalysisPipeline();
277
- // Core taint analysis passes (always enabled)
278
- pipeline.add(new TaintMatcherPass());
279
- pipeline.add(new ConstantPropagationPass(tree));
280
- pipeline.add(new LanguageSourcesPass());
281
- pipeline.add(new SinkFilterPass());
282
- pipeline.add(new TaintPropagationPass());
283
- pipeline.add(new InterproceduralPass());
284
- // Secret scanner runs after LanguageSourcesPass so the legacy Bash
285
- // `hardcoded-credential` findings are already in the dedup buffer.
286
- if (!disabledPasses.has('scan-secrets'))
287
- pipeline.add(new ScanSecretsPass());
288
- // Optional passes — can be disabled via disabledPasses
289
- if (!disabledPasses.has('dead-code'))
290
- pipeline.add(new DeadCodePass());
291
- if (!disabledPasses.has('missing-await'))
292
- pipeline.add(new MissingAwaitPass());
293
- if (!disabledPasses.has('n-plus-one'))
294
- pipeline.add(new NPlusOnePass());
295
- if (!disabledPasses.has('missing-public-doc'))
296
- pipeline.add(new MissingPublicDocPass());
297
- if (!disabledPasses.has('todo-in-prod'))
298
- pipeline.add(new TodoInProdPass());
299
- if (!disabledPasses.has('string-concat-loop'))
300
- pipeline.add(new StringConcatLoopPass());
301
- if (!disabledPasses.has('sync-io-async'))
302
- pipeline.add(new SyncIoAsyncPass());
303
- if (!disabledPasses.has('unchecked-return'))
304
- pipeline.add(new UncheckedReturnPass());
305
- if (!disabledPasses.has('null-deref'))
306
- pipeline.add(new NullDerefPass());
307
- if (!disabledPasses.has('resource-leak'))
308
- pipeline.add(new ResourceLeakPass());
309
- if (!disabledPasses.has('variable-shadowing'))
310
- pipeline.add(new VariableShadowingPass());
311
- if (!disabledPasses.has('leaked-global'))
312
- pipeline.add(new LeakedGlobalPass());
313
- if (!disabledPasses.has('unused-variable'))
314
- pipeline.add(new UnusedVariablePass());
315
- if (!disabledPasses.has('dependency-fan-out'))
316
- pipeline.add(new DependencyFanOutPass(passOpts.dependencyFanOut));
317
- if (!disabledPasses.has('stale-doc-ref'))
318
- pipeline.add(new StaleDocRefPass());
319
- if (!disabledPasses.has('infinite-loop'))
320
- pipeline.add(new InfiniteLoopPass());
321
- if (!disabledPasses.has('deep-inheritance'))
322
- pipeline.add(new DeepInheritancePass());
323
- if (!disabledPasses.has('redundant-loop-computation'))
324
- pipeline.add(new RedundantLoopPass());
325
- if (!disabledPasses.has('unbounded-collection'))
326
- pipeline.add(new UnboundedCollectionPass(passOpts.unboundedCollection));
327
- if (!disabledPasses.has('serial-await'))
328
- pipeline.add(new SerialAwaitPass());
329
- if (!disabledPasses.has('react-inline-jsx'))
330
- pipeline.add(new ReactInlineJsxPass());
331
- if (!disabledPasses.has('swallowed-exception'))
332
- pipeline.add(new SwallowedExceptionPass());
333
- if (!disabledPasses.has('broad-catch'))
334
- pipeline.add(new BroadCatchPass());
335
- if (!disabledPasses.has('unhandled-exception'))
336
- pipeline.add(new UnhandledExceptionPass());
337
- if (!disabledPasses.has('double-close'))
338
- pipeline.add(new DoubleClosePass());
339
- if (!disabledPasses.has('use-after-close'))
340
- pipeline.add(new UseAfterClosePass());
341
- if (!disabledPasses.has('cleanup-verify'))
342
- pipeline.add(new CleanupVerifyPass());
343
- if (!disabledPasses.has('missing-override'))
344
- pipeline.add(new MissingOverridePass());
345
- if (!disabledPasses.has('unused-interface-method'))
346
- pipeline.add(new UnusedInterfaceMethodPass());
347
- if (!disabledPasses.has('blocking-main-thread'))
348
- pipeline.add(new BlockingMainThreadPass());
349
- if (!disabledPasses.has('excessive-allocation'))
350
- pipeline.add(new ExcessiveAllocationPass());
351
- if (!disabledPasses.has('missing-stream'))
352
- pipeline.add(new MissingStreamPass());
353
- if (!disabledPasses.has('god-class'))
354
- pipeline.add(new GodClassPass());
355
- if (!disabledPasses.has('naming-convention'))
356
- pipeline.add(new NamingConventionPass(passOpts.namingConvention));
357
- if (!disabledPasses.has('security-headers'))
358
- pipeline.add(new SecurityHeadersPass(passOpts.securityHeaders));
359
- // Run the pipeline
360
- const { results, findings } = pipeline.run(graph, code, language, config);
361
- const sinkFilter = results.get('sink-filter');
362
- const interProc = results.get('interprocedural');
363
- const taint = {
364
- sources: sinkFilter.sources,
365
- sinks: [...sinkFilter.sinks, ...interProc.additionalSinks],
366
- sanitizers: sinkFilter.sanitizers,
367
- flows: interProc.additionalFlows,
368
- interprocedural: interProc.interprocedural,
369
- };
370
- const unresolved = detectUnresolved(calls, types, dfg);
371
- const enriched = buildEnriched(types, calls, taint.sources, taint.sinks);
372
- // Compute software metrics (CK suite, Halstead, composite scores)
373
- const metricValues = new MetricRunner().run({ meta, types, calls, cfg, dfg, taint, imports, exports, unresolved, enriched }, code, language);
374
- logger.debug('Analysis complete', {
375
- filePath,
376
- finalSources: taint.sources.length,
377
- finalSinks: taint.sinks.length,
378
- flows: taint.flows?.length ?? 0,
379
- unresolvedItems: unresolved.length,
380
- });
381
- return {
382
- meta, types, calls, cfg, dfg, taint, imports, exports, unresolved, enriched,
383
- findings: findings.length > 0 ? findings : undefined,
384
- metrics: { file: filePath, metrics: metricValues },
385
- };
256
+ try {
257
+ logger.trace('Parsed AST', { rootNodeType: tree.rootNode.type });
258
+ // Collect all node types in a single traversal for better performance
259
+ const nodeCache = collectAllNodes(tree.rootNode, getNodeTypesForLanguage(language));
260
+ // Extract all IR components
261
+ const meta = extractMeta(code, tree, filePath, language);
262
+ const types = extractTypes(tree, nodeCache, language);
263
+ const calls = extractCalls(tree, nodeCache, language);
264
+ const imports = extractImports(tree, language);
265
+ const exports = extractExports(types);
266
+ const cfg = buildCFG(tree, language);
267
+ const dfg = buildDFG(tree, nodeCache, language);
268
+ // Build CodeGraph once shared across all passes.
269
+ // Taint is empty at construction time; sources/sinks/sanitizers are populated by passes.
270
+ const graph = new CodeGraph({
271
+ meta, types, calls, cfg, dfg,
272
+ taint: { sources: [], sinks: [], sanitizers: [] },
273
+ imports, exports, unresolved: [], enriched: {},
274
+ });
275
+ const config = options.taintConfig ?? getDefaultConfig();
276
+ // Build the analysis pipeline with configurable pass options
277
+ const disabledPasses = new Set(options.disabledPasses ?? []);
278
+ const passOpts = options.passOptions ?? {};
279
+ const pipeline = new AnalysisPipeline();
280
+ // Core taint analysis passes (always enabled)
281
+ pipeline.add(new TaintMatcherPass());
282
+ pipeline.add(new ConstantPropagationPass(tree));
283
+ pipeline.add(new LanguageSourcesPass());
284
+ pipeline.add(new SinkFilterPass());
285
+ pipeline.add(new TaintPropagationPass());
286
+ pipeline.add(new InterproceduralPass());
287
+ // Secret scanner runs after LanguageSourcesPass so the legacy Bash
288
+ // `hardcoded-credential` findings are already in the dedup buffer.
289
+ if (!disabledPasses.has('scan-secrets'))
290
+ pipeline.add(new ScanSecretsPass());
291
+ // Optional passes — can be disabled via disabledPasses
292
+ if (!disabledPasses.has('dead-code'))
293
+ pipeline.add(new DeadCodePass());
294
+ if (!disabledPasses.has('missing-await'))
295
+ pipeline.add(new MissingAwaitPass());
296
+ if (!disabledPasses.has('n-plus-one'))
297
+ pipeline.add(new NPlusOnePass());
298
+ if (!disabledPasses.has('missing-public-doc'))
299
+ pipeline.add(new MissingPublicDocPass());
300
+ if (!disabledPasses.has('todo-in-prod'))
301
+ pipeline.add(new TodoInProdPass());
302
+ if (!disabledPasses.has('string-concat-loop'))
303
+ pipeline.add(new StringConcatLoopPass());
304
+ if (!disabledPasses.has('sync-io-async'))
305
+ pipeline.add(new SyncIoAsyncPass());
306
+ if (!disabledPasses.has('unchecked-return'))
307
+ pipeline.add(new UncheckedReturnPass());
308
+ if (!disabledPasses.has('null-deref'))
309
+ pipeline.add(new NullDerefPass());
310
+ if (!disabledPasses.has('resource-leak'))
311
+ pipeline.add(new ResourceLeakPass());
312
+ if (!disabledPasses.has('variable-shadowing'))
313
+ pipeline.add(new VariableShadowingPass());
314
+ if (!disabledPasses.has('leaked-global'))
315
+ pipeline.add(new LeakedGlobalPass());
316
+ if (!disabledPasses.has('unused-variable'))
317
+ pipeline.add(new UnusedVariablePass());
318
+ if (!disabledPasses.has('dependency-fan-out'))
319
+ pipeline.add(new DependencyFanOutPass(passOpts.dependencyFanOut));
320
+ if (!disabledPasses.has('stale-doc-ref'))
321
+ pipeline.add(new StaleDocRefPass());
322
+ if (!disabledPasses.has('infinite-loop'))
323
+ pipeline.add(new InfiniteLoopPass());
324
+ if (!disabledPasses.has('deep-inheritance'))
325
+ pipeline.add(new DeepInheritancePass());
326
+ if (!disabledPasses.has('redundant-loop-computation'))
327
+ pipeline.add(new RedundantLoopPass());
328
+ if (!disabledPasses.has('unbounded-collection'))
329
+ pipeline.add(new UnboundedCollectionPass(passOpts.unboundedCollection));
330
+ if (!disabledPasses.has('serial-await'))
331
+ pipeline.add(new SerialAwaitPass());
332
+ if (!disabledPasses.has('react-inline-jsx'))
333
+ pipeline.add(new ReactInlineJsxPass());
334
+ if (!disabledPasses.has('swallowed-exception'))
335
+ pipeline.add(new SwallowedExceptionPass());
336
+ if (!disabledPasses.has('broad-catch'))
337
+ pipeline.add(new BroadCatchPass());
338
+ if (!disabledPasses.has('unhandled-exception'))
339
+ pipeline.add(new UnhandledExceptionPass());
340
+ if (!disabledPasses.has('double-close'))
341
+ pipeline.add(new DoubleClosePass());
342
+ if (!disabledPasses.has('use-after-close'))
343
+ pipeline.add(new UseAfterClosePass());
344
+ if (!disabledPasses.has('cleanup-verify'))
345
+ pipeline.add(new CleanupVerifyPass());
346
+ if (!disabledPasses.has('missing-override'))
347
+ pipeline.add(new MissingOverridePass());
348
+ if (!disabledPasses.has('unused-interface-method'))
349
+ pipeline.add(new UnusedInterfaceMethodPass());
350
+ if (!disabledPasses.has('blocking-main-thread'))
351
+ pipeline.add(new BlockingMainThreadPass());
352
+ if (!disabledPasses.has('excessive-allocation'))
353
+ pipeline.add(new ExcessiveAllocationPass());
354
+ if (!disabledPasses.has('missing-stream'))
355
+ pipeline.add(new MissingStreamPass());
356
+ if (!disabledPasses.has('god-class'))
357
+ pipeline.add(new GodClassPass());
358
+ if (!disabledPasses.has('naming-convention'))
359
+ pipeline.add(new NamingConventionPass(passOpts.namingConvention));
360
+ if (!disabledPasses.has('security-headers'))
361
+ pipeline.add(new SecurityHeadersPass(passOpts.securityHeaders));
362
+ // Run the pipeline
363
+ const { results, findings } = pipeline.run(graph, code, language, config);
364
+ const sinkFilter = results.get('sink-filter');
365
+ const interProc = results.get('interprocedural');
366
+ const taint = {
367
+ sources: sinkFilter.sources,
368
+ sinks: [...sinkFilter.sinks, ...interProc.additionalSinks],
369
+ sanitizers: sinkFilter.sanitizers,
370
+ flows: interProc.additionalFlows,
371
+ interprocedural: interProc.interprocedural,
372
+ };
373
+ const unresolved = detectUnresolved(calls, types, dfg);
374
+ const enriched = buildEnriched(types, calls, taint.sources, taint.sinks);
375
+ // Compute software metrics (CK suite, Halstead, composite scores)
376
+ const metricValues = new MetricRunner().run({ meta, types, calls, cfg, dfg, taint, imports, exports, unresolved, enriched }, code, language);
377
+ logger.debug('Analysis complete', {
378
+ filePath,
379
+ finalSources: taint.sources.length,
380
+ finalSinks: taint.sinks.length,
381
+ flows: taint.flows?.length ?? 0,
382
+ unresolvedItems: unresolved.length,
383
+ });
384
+ return {
385
+ meta, types, calls, cfg, dfg, taint, imports, exports, unresolved, enriched,
386
+ findings: findings.length > 0 ? findings : undefined,
387
+ metrics: { file: filePath, metrics: metricValues },
388
+ };
389
+ }
390
+ finally {
391
+ disposeTree(tree);
392
+ }
386
393
  }
387
394
  // ---------------------------------------------------------------------------
388
395
  // HTML preprocessor
@@ -396,64 +403,69 @@ async function analyzeHtmlFile(code, filePath, options) {
396
403
  logger.debug('Analyzing HTML file', { filePath, codeLength: code.length });
397
404
  // Parse HTML
398
405
  const tree = await parse(code, 'html');
399
- const meta = extractMeta(code, tree, filePath, 'html');
400
- // Extract script blocks and event handlers
401
- const { scriptBlocks, eventHandlers } = extractHtmlContent(tree.rootNode);
402
- logger.debug('HTML extraction', {
403
- filePath,
404
- inlineScripts: scriptBlocks.filter(b => b.kind === 'inline').length,
405
- externalScripts: scriptBlocks.filter(b => b.kind === 'external-src').length,
406
- eventHandlers: eventHandlers.length,
407
- });
408
- // Analyze each inline script block via standard JS pipeline
409
- const scriptResults = [];
410
- for (const block of scriptBlocks) {
411
- if (block.kind !== 'inline' || !block.code.trim())
412
- continue;
413
- // Determine script language from type/lang attribute
414
- const scriptLang = block.scriptType === 'ts' || block.scriptType === 'typescript' ||
415
- block.scriptType === 'text/typescript'
416
- ? 'typescript'
417
- : 'javascript';
418
- try {
419
- const ir = await analyze(block.code, filePath, scriptLang, options);
420
- scriptResults.push({ ir, lineOffset: block.lineOffset });
406
+ try {
407
+ const meta = extractMeta(code, tree, filePath, 'html');
408
+ // Extract script blocks and event handlers
409
+ const { scriptBlocks, eventHandlers } = extractHtmlContent(tree.rootNode);
410
+ logger.debug('HTML extraction', {
411
+ filePath,
412
+ inlineScripts: scriptBlocks.filter(b => b.kind === 'inline').length,
413
+ externalScripts: scriptBlocks.filter(b => b.kind === 'external-src').length,
414
+ eventHandlers: eventHandlers.length,
415
+ });
416
+ // Analyze each inline script block via standard JS pipeline
417
+ const scriptResults = [];
418
+ for (const block of scriptBlocks) {
419
+ if (block.kind !== 'inline' || !block.code.trim())
420
+ continue;
421
+ // Determine script language from type/lang attribute
422
+ const scriptLang = block.scriptType === 'ts' || block.scriptType === 'typescript' ||
423
+ block.scriptType === 'text/typescript'
424
+ ? 'typescript'
425
+ : 'javascript';
426
+ try {
427
+ const ir = await analyze(block.code, filePath, scriptLang, options);
428
+ scriptResults.push({ ir, lineOffset: block.lineOffset });
429
+ }
430
+ catch (e) {
431
+ logger.warn('Failed to analyze script block', {
432
+ filePath,
433
+ lineOffset: block.lineOffset,
434
+ error: e instanceof Error ? e.message : String(e),
435
+ });
436
+ }
421
437
  }
422
- catch (e) {
423
- logger.warn('Failed to analyze script block', {
424
- filePath,
425
- lineOffset: block.lineOffset,
426
- error: e instanceof Error ? e.message : String(e),
427
- });
438
+ // Analyze inline event handlers (wrap in synthetic function)
439
+ for (const handler of eventHandlers) {
440
+ const wrappedCode = `function __${handler.eventName}_handler() { ${handler.code} }`;
441
+ try {
442
+ const ir = await analyze(wrappedCode, filePath, 'javascript', options);
443
+ scriptResults.push({ ir, lineOffset: handler.line });
444
+ }
445
+ catch (e) {
446
+ logger.warn('Failed to analyze event handler', {
447
+ filePath,
448
+ eventName: handler.eventName,
449
+ line: handler.line,
450
+ error: e instanceof Error ? e.message : String(e),
451
+ });
452
+ }
428
453
  }
454
+ // Run attribute-level security checks
455
+ const attributeFindings = runHtmlAttributeSecurityChecks(tree.rootNode, filePath);
456
+ // Merge everything
457
+ const result = mergeHtmlResults(meta, scriptResults, attributeFindings);
458
+ logger.debug('HTML analysis complete', {
459
+ filePath,
460
+ scriptBlocks: scriptResults.length,
461
+ attributeFindings: attributeFindings.length,
462
+ totalFindings: result.findings?.length ?? 0,
463
+ });
464
+ return result;
429
465
  }
430
- // Analyze inline event handlers (wrap in synthetic function)
431
- for (const handler of eventHandlers) {
432
- const wrappedCode = `function __${handler.eventName}_handler() { ${handler.code} }`;
433
- try {
434
- const ir = await analyze(wrappedCode, filePath, 'javascript', options);
435
- scriptResults.push({ ir, lineOffset: handler.line });
436
- }
437
- catch (e) {
438
- logger.warn('Failed to analyze event handler', {
439
- filePath,
440
- eventName: handler.eventName,
441
- line: handler.line,
442
- error: e instanceof Error ? e.message : String(e),
443
- });
444
- }
466
+ finally {
467
+ disposeTree(tree);
445
468
  }
446
- // Run attribute-level security checks
447
- const attributeFindings = runHtmlAttributeSecurityChecks(tree.rootNode, filePath);
448
- // Merge everything
449
- const result = mergeHtmlResults(meta, scriptResults, attributeFindings);
450
- logger.debug('HTML analysis complete', {
451
- filePath,
452
- scriptBlocks: scriptResults.length,
453
- attributeFindings: attributeFindings.length,
454
- totalFindings: result.findings?.length ?? 0,
455
- });
456
- return result;
457
469
  }
458
470
  // ---------------------------------------------------------------------------
459
471
  // Simplified API response format
@@ -469,75 +481,80 @@ export async function analyzeForAPI(code, filePath, language, options = {}) {
469
481
  const parseStart = performance.now();
470
482
  const tree = await parse(code, language);
471
483
  const parseTime = performance.now() - parseStart;
472
- const analysisStart = performance.now();
473
- const nodeCache = collectAllNodes(tree.rootNode, getNodeTypesForLanguage(language));
474
- const types = extractTypes(tree, nodeCache, language);
475
- const calls = extractCalls(tree, nodeCache, language);
476
- // Run constant propagation
477
- const constPropResult = analyzeConstantPropagation(tree, code);
478
- const config = options.taintConfig ?? getDefaultConfig();
479
- const taint = analyzeTaint(calls, types, config);
480
- // Filter sinks in dead code
481
- let filteredSinks = taint.sinks.filter(sink => !constPropResult.unreachableLines.has(sink.line));
482
- // Filter sinks whose arguments are proven clean (string literals, constants, etc.)
483
- filteredSinks = filterCleanVariableSinks(filteredSinks, calls, constPropResult.tainted, constPropResult.symbols, undefined, constPropResult.sanitizedVars, constPropResult.synchronizedLines);
484
- // Filter sinks wrapped by sanitizers on the same line
485
- filteredSinks = filterSanitizedSinks(filteredSinks, taint.sanitizers ?? [], calls);
486
- // Python: reduce XPath false-positives using forward taint propagation +
487
- // apostrophe-guard sanitizer detection.
488
- let pythonTaintedVars = new Map();
489
- if (language === 'python') {
490
- pythonTaintedVars = buildPythonTaintedVars(code);
491
- const pythonSanitizedVars = buildPythonSanitizedVars(code, pythonTaintedVars);
492
- const sourceLines = code.split('\n');
493
- filteredSinks = filteredSinks.filter(sink => {
494
- if (sink.type !== 'xpath_injection')
484
+ try {
485
+ const analysisStart = performance.now();
486
+ const nodeCache = collectAllNodes(tree.rootNode, getNodeTypesForLanguage(language));
487
+ const types = extractTypes(tree, nodeCache, language);
488
+ const calls = extractCalls(tree, nodeCache, language);
489
+ // Run constant propagation
490
+ const constPropResult = analyzeConstantPropagation(tree, code);
491
+ const config = options.taintConfig ?? getDefaultConfig();
492
+ const taint = analyzeTaint(calls, types, config, undefined, language);
493
+ // Filter sinks in dead code
494
+ let filteredSinks = taint.sinks.filter(sink => !constPropResult.unreachableLines.has(sink.line));
495
+ // Filter sinks whose arguments are proven clean (string literals, constants, etc.)
496
+ filteredSinks = filterCleanVariableSinks(filteredSinks, calls, constPropResult.tainted, constPropResult.symbols, undefined, constPropResult.sanitizedVars, constPropResult.synchronizedLines);
497
+ // Filter sinks wrapped by sanitizers on the same line
498
+ filteredSinks = filterSanitizedSinks(filteredSinks, taint.sanitizers ?? [], calls);
499
+ // Python: reduce XPath false-positives using forward taint propagation +
500
+ // apostrophe-guard sanitizer detection.
501
+ let pythonTaintedVars = new Map();
502
+ if (language === 'python') {
503
+ pythonTaintedVars = buildPythonTaintedVars(code);
504
+ const pythonSanitizedVars = buildPythonSanitizedVars(code, pythonTaintedVars);
505
+ const sourceLines = code.split('\n');
506
+ filteredSinks = filteredSinks.filter(sink => {
507
+ if (sink.type !== 'xpath_injection')
508
+ return true;
509
+ const sinkLineText = sourceLines[sink.line - 1] ?? '';
510
+ const taintedVarOnLine = [...pythonTaintedVars.keys()].find(v => new RegExp(`\\b${v}\\b`).test(sinkLineText));
511
+ if (!taintedVarOnLine)
512
+ return false;
513
+ if (pythonSanitizedVars.has(taintedVarOnLine))
514
+ return false;
515
+ if (new RegExp(`\\.xpath\\s*\\([^)]*\\b\\w+\\s*=\\s*\\b${taintedVarOnLine}\\b`).test(sinkLineText))
516
+ return false;
495
517
  return true;
496
- const sinkLineText = sourceLines[sink.line - 1] ?? '';
497
- const taintedVarOnLine = [...pythonTaintedVars.keys()].find(v => new RegExp(`\\b${v}\\b`).test(sinkLineText));
498
- if (!taintedVarOnLine)
499
- return false;
500
- if (pythonSanitizedVars.has(taintedVarOnLine))
501
- return false;
502
- if (new RegExp(`\\.xpath\\s*\\([^)]*\\b\\w+\\s*=\\s*\\b${taintedVarOnLine}\\b`).test(sinkLineText))
503
- return false;
504
- return true;
505
- });
506
- }
507
- // Generate vulnerabilities from source-sink pairs
508
- const vulnerabilities = findVulnerabilities(taint.sources, filteredSinks, calls, constPropResult);
509
- // Python: detect trust boundary violations (flask.session[key] = taintedVal)
510
- if (language === 'python') {
511
- const trustViolations = findPythonTrustBoundaryViolations(code, pythonTaintedVars);
512
- for (const v of trustViolations) {
513
- const alreadyReported = vulnerabilities.some(existing => existing.sink.line === v.sinkLine && existing.type === 'trust_boundary');
514
- if (!alreadyReported) {
515
- vulnerabilities.push({
516
- type: 'trust_boundary',
517
- cwe: 'CWE-501',
518
- severity: 'medium',
519
- source: { line: v.sourceLine, type: 'http_param' },
520
- sink: { line: v.sinkLine, type: 'trust_boundary' },
521
- confidence: 0.85,
522
- });
518
+ });
519
+ }
520
+ // Generate vulnerabilities from source-sink pairs
521
+ const vulnerabilities = findVulnerabilities(taint.sources, filteredSinks, calls, constPropResult);
522
+ // Python: detect trust boundary violations (flask.session[key] = taintedVal)
523
+ if (language === 'python') {
524
+ const trustViolations = findPythonTrustBoundaryViolations(code, pythonTaintedVars);
525
+ for (const v of trustViolations) {
526
+ const alreadyReported = vulnerabilities.some(existing => existing.sink.line === v.sinkLine && existing.type === 'trust_boundary');
527
+ if (!alreadyReported) {
528
+ vulnerabilities.push({
529
+ type: 'trust_boundary',
530
+ cwe: 'CWE-501',
531
+ severity: 'medium',
532
+ source: { line: v.sourceLine, type: 'http_param' },
533
+ sink: { line: v.sinkLine, type: 'trust_boundary' },
534
+ confidence: 0.85,
535
+ });
536
+ }
523
537
  }
524
538
  }
539
+ const analysisTime = performance.now() - analysisStart;
540
+ const totalTime = performance.now() - startTime;
541
+ return {
542
+ success: true,
543
+ analysis: {
544
+ sources: taint.sources,
545
+ sinks: filteredSinks,
546
+ vulnerabilities,
547
+ },
548
+ meta: {
549
+ parseTimeMs: Math.round(parseTime),
550
+ analysisTimeMs: Math.round(analysisTime),
551
+ totalTimeMs: Math.round(totalTime),
552
+ },
553
+ };
554
+ }
555
+ finally {
556
+ disposeTree(tree);
525
557
  }
526
- const analysisTime = performance.now() - analysisStart;
527
- const totalTime = performance.now() - startTime;
528
- return {
529
- success: true,
530
- analysis: {
531
- sources: taint.sources,
532
- sinks: filteredSinks,
533
- vulnerabilities,
534
- },
535
- meta: {
536
- parseTimeMs: Math.round(parseTime),
537
- analysisTimeMs: Math.round(analysisTime),
538
- totalTimeMs: Math.round(totalTime),
539
- },
540
- };
541
558
  }
542
559
  // ---------------------------------------------------------------------------
543
560
  // Vulnerability matching (used by analyzeForAPI)