circle-ir 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/LICENSE +15 -0
  2. package/README.md +200 -0
  3. package/configs/sinks/code_injection.yaml +672 -0
  4. package/configs/sinks/command.yaml +917 -0
  5. package/configs/sinks/deserialization.yaml +105 -0
  6. package/configs/sinks/ldap.yaml +136 -0
  7. package/configs/sinks/nodejs.json +629 -0
  8. package/configs/sinks/path.yaml +715 -0
  9. package/configs/sinks/python.json +501 -0
  10. package/configs/sinks/rust.json +339 -0
  11. package/configs/sinks/sql.yaml +233 -0
  12. package/configs/sinks/ssrf.yaml +160 -0
  13. package/configs/sinks/xpath.yaml +121 -0
  14. package/configs/sinks/xss.yaml +727 -0
  15. package/configs/sources/db_sources.yaml +90 -0
  16. package/configs/sources/env_sources.yaml +94 -0
  17. package/configs/sources/express.json +197 -0
  18. package/configs/sources/file_sources.yaml +164 -0
  19. package/configs/sources/http_sources.yaml +379 -0
  20. package/configs/sources/io_sources.yaml +519 -0
  21. package/configs/sources/network_sources.yaml +99 -0
  22. package/configs/sources/python.json +230 -0
  23. package/configs/sources/rust.json +286 -0
  24. package/configs/sources/spring.yaml +70 -0
  25. package/dist/analysis/advisory-db.d.ts +86 -0
  26. package/dist/analysis/advisory-db.js +104 -0
  27. package/dist/analysis/advisory-db.js.map +1 -0
  28. package/dist/analysis/cargo-parser.d.ts +42 -0
  29. package/dist/analysis/cargo-parser.js +102 -0
  30. package/dist/analysis/cargo-parser.js.map +1 -0
  31. package/dist/analysis/config-loader.d.ts +37 -0
  32. package/dist/analysis/config-loader.js +1561 -0
  33. package/dist/analysis/config-loader.js.map +1 -0
  34. package/dist/analysis/constant-propagation/ast-utils.d.ts +25 -0
  35. package/dist/analysis/constant-propagation/ast-utils.js +34 -0
  36. package/dist/analysis/constant-propagation/ast-utils.js.map +1 -0
  37. package/dist/analysis/constant-propagation/evaluator.d.ts +32 -0
  38. package/dist/analysis/constant-propagation/evaluator.js +296 -0
  39. package/dist/analysis/constant-propagation/evaluator.js.map +1 -0
  40. package/dist/analysis/constant-propagation/index.d.ts +62 -0
  41. package/dist/analysis/constant-propagation/index.js +152 -0
  42. package/dist/analysis/constant-propagation/index.js.map +1 -0
  43. package/dist/analysis/constant-propagation/patterns.d.ts +8 -0
  44. package/dist/analysis/constant-propagation/patterns.js +126 -0
  45. package/dist/analysis/constant-propagation/patterns.js.map +1 -0
  46. package/dist/analysis/constant-propagation/propagator.d.ts +180 -0
  47. package/dist/analysis/constant-propagation/propagator.js +1985 -0
  48. package/dist/analysis/constant-propagation/propagator.js.map +1 -0
  49. package/dist/analysis/constant-propagation/types.d.ts +63 -0
  50. package/dist/analysis/constant-propagation/types.js +5 -0
  51. package/dist/analysis/constant-propagation/types.js.map +1 -0
  52. package/dist/analysis/constant-propagation.d.ts +9 -0
  53. package/dist/analysis/constant-propagation.js +18 -0
  54. package/dist/analysis/constant-propagation.js.map +1 -0
  55. package/dist/analysis/dependency-scanner.d.ts +79 -0
  56. package/dist/analysis/dependency-scanner.js +122 -0
  57. package/dist/analysis/dependency-scanner.js.map +1 -0
  58. package/dist/analysis/dfg-verifier.d.ts +116 -0
  59. package/dist/analysis/dfg-verifier.js +399 -0
  60. package/dist/analysis/dfg-verifier.js.map +1 -0
  61. package/dist/analysis/findings.d.ts +11 -0
  62. package/dist/analysis/findings.js +228 -0
  63. package/dist/analysis/findings.js.map +1 -0
  64. package/dist/analysis/index.d.ts +16 -0
  65. package/dist/analysis/index.js +18 -0
  66. package/dist/analysis/index.js.map +1 -0
  67. package/dist/analysis/interprocedural.d.ts +99 -0
  68. package/dist/analysis/interprocedural.js +526 -0
  69. package/dist/analysis/interprocedural.js.map +1 -0
  70. package/dist/analysis/path-finder.d.ts +133 -0
  71. package/dist/analysis/path-finder.js +354 -0
  72. package/dist/analysis/path-finder.js.map +1 -0
  73. package/dist/analysis/rules.d.ts +75 -0
  74. package/dist/analysis/rules.js +332 -0
  75. package/dist/analysis/rules.js.map +1 -0
  76. package/dist/analysis/semver.d.ts +27 -0
  77. package/dist/analysis/semver.js +127 -0
  78. package/dist/analysis/semver.js.map +1 -0
  79. package/dist/analysis/taint-matcher.d.ts +15 -0
  80. package/dist/analysis/taint-matcher.js +634 -0
  81. package/dist/analysis/taint-matcher.js.map +1 -0
  82. package/dist/analysis/taint-propagation.d.ts +67 -0
  83. package/dist/analysis/taint-propagation.js +298 -0
  84. package/dist/analysis/taint-propagation.js.map +1 -0
  85. package/dist/analysis/unresolved.d.ts +14 -0
  86. package/dist/analysis/unresolved.js +202 -0
  87. package/dist/analysis/unresolved.js.map +1 -0
  88. package/dist/analyzer.d.ts +43 -0
  89. package/dist/analyzer.js +1010 -0
  90. package/dist/analyzer.js.map +1 -0
  91. package/dist/browser/circle-ir.js +16576 -0
  92. package/dist/browser.d.ts +38 -0
  93. package/dist/browser.js +38 -0
  94. package/dist/browser.js.map +1 -0
  95. package/dist/core/circle-ir-core.cjs +13626 -0
  96. package/dist/core/circle-ir-core.d.ts +59 -0
  97. package/dist/core/circle-ir-core.js +13591 -0
  98. package/dist/core/extractors/calls.d.ts +13 -0
  99. package/dist/core/extractors/calls.js +1429 -0
  100. package/dist/core/extractors/calls.js.map +1 -0
  101. package/dist/core/extractors/cfg.d.ts +9 -0
  102. package/dist/core/extractors/cfg.js +519 -0
  103. package/dist/core/extractors/cfg.js.map +1 -0
  104. package/dist/core/extractors/dfg.d.ts +12 -0
  105. package/dist/core/extractors/dfg.js +1081 -0
  106. package/dist/core/extractors/dfg.js.map +1 -0
  107. package/dist/core/extractors/exports.d.ts +14 -0
  108. package/dist/core/extractors/exports.js +80 -0
  109. package/dist/core/extractors/exports.js.map +1 -0
  110. package/dist/core/extractors/imports.d.ts +9 -0
  111. package/dist/core/extractors/imports.js +739 -0
  112. package/dist/core/extractors/imports.js.map +1 -0
  113. package/dist/core/extractors/index.d.ts +10 -0
  114. package/dist/core/extractors/index.js +11 -0
  115. package/dist/core/extractors/index.js.map +1 -0
  116. package/dist/core/extractors/meta.d.ts +10 -0
  117. package/dist/core/extractors/meta.js +109 -0
  118. package/dist/core/extractors/meta.js.map +1 -0
  119. package/dist/core/extractors/types.d.ts +10 -0
  120. package/dist/core/extractors/types.js +1479 -0
  121. package/dist/core/extractors/types.js.map +1 -0
  122. package/dist/core/index.d.ts +5 -0
  123. package/dist/core/index.js +8 -0
  124. package/dist/core/index.js.map +1 -0
  125. package/dist/core/parser.d.ts +84 -0
  126. package/dist/core/parser.js +250 -0
  127. package/dist/core/parser.js.map +1 -0
  128. package/dist/core-lib.d.ts +59 -0
  129. package/dist/core-lib.js +62 -0
  130. package/dist/core-lib.js.map +1 -0
  131. package/dist/index.d.ts +15 -0
  132. package/dist/index.js +20 -0
  133. package/dist/index.js.map +1 -0
  134. package/dist/languages/index.d.ts +11 -0
  135. package/dist/languages/index.js +14 -0
  136. package/dist/languages/index.js.map +1 -0
  137. package/dist/languages/plugins/base.d.ts +44 -0
  138. package/dist/languages/plugins/base.js +82 -0
  139. package/dist/languages/plugins/base.js.map +1 -0
  140. package/dist/languages/plugins/index.d.ts +14 -0
  141. package/dist/languages/plugins/index.js +25 -0
  142. package/dist/languages/plugins/index.js.map +1 -0
  143. package/dist/languages/plugins/java.d.ts +49 -0
  144. package/dist/languages/plugins/java.js +402 -0
  145. package/dist/languages/plugins/java.js.map +1 -0
  146. package/dist/languages/plugins/javascript.d.ts +48 -0
  147. package/dist/languages/plugins/javascript.js +445 -0
  148. package/dist/languages/plugins/javascript.js.map +1 -0
  149. package/dist/languages/plugins/python.d.ts +47 -0
  150. package/dist/languages/plugins/python.js +480 -0
  151. package/dist/languages/plugins/python.js.map +1 -0
  152. package/dist/languages/plugins/rust.d.ts +47 -0
  153. package/dist/languages/plugins/rust.js +405 -0
  154. package/dist/languages/plugins/rust.js.map +1 -0
  155. package/dist/languages/registry.d.ts +30 -0
  156. package/dist/languages/registry.js +80 -0
  157. package/dist/languages/registry.js.map +1 -0
  158. package/dist/languages/types.d.ts +184 -0
  159. package/dist/languages/types.js +8 -0
  160. package/dist/languages/types.js.map +1 -0
  161. package/dist/resolution/cross-file.d.ts +146 -0
  162. package/dist/resolution/cross-file.js +439 -0
  163. package/dist/resolution/cross-file.js.map +1 -0
  164. package/dist/resolution/index.d.ts +12 -0
  165. package/dist/resolution/index.js +10 -0
  166. package/dist/resolution/index.js.map +1 -0
  167. package/dist/resolution/symbol-table.d.ts +136 -0
  168. package/dist/resolution/symbol-table.js +336 -0
  169. package/dist/resolution/symbol-table.js.map +1 -0
  170. package/dist/resolution/type-hierarchy.d.ts +124 -0
  171. package/dist/resolution/type-hierarchy.js +515 -0
  172. package/dist/resolution/type-hierarchy.js.map +1 -0
  173. package/dist/types/config.d.ts +45 -0
  174. package/dist/types/config.js +5 -0
  175. package/dist/types/config.js.map +1 -0
  176. package/dist/types/index.d.ts +392 -0
  177. package/dist/types/index.js +7 -0
  178. package/dist/types/index.js.map +1 -0
  179. package/dist/utils/logger.d.ts +85 -0
  180. package/dist/utils/logger.js +198 -0
  181. package/dist/utils/logger.js.map +1 -0
  182. package/dist/wasm/tree-sitter-java.wasm +0 -0
  183. package/dist/wasm/tree-sitter-javascript.wasm +0 -0
  184. package/dist/wasm/tree-sitter-python.wasm +0 -0
  185. package/dist/wasm/tree-sitter-rust.wasm +0 -0
  186. package/dist/wasm/web-tree-sitter.wasm +0 -0
  187. package/docs/SPEC.md +1021 -0
  188. package/examples/browser-example.html +610 -0
  189. package/examples/node-example.ts +215 -0
  190. package/package.json +107 -0
  191. package/wasm/tree-sitter-java.wasm +0 -0
  192. package/wasm/tree-sitter-javascript.wasm +0 -0
  193. package/wasm/tree-sitter-python.wasm +0 -0
  194. package/wasm/tree-sitter-rust.wasm +0 -0
package/docs/SPEC.md ADDED
@@ -0,0 +1,1021 @@
1
+ # Circle-IR 3.0 Specification
2
+
3
+ **Status**: Living Document
4
+ **Last Updated**: 2025-01-25
5
+ **Implementation**: Python (reference) → TypeScript (target)
6
+
7
+ ---
8
+
9
+ ## Implementation Status
10
+
11
+ | Section | Python | TypeScript | Status |
12
+ |---------|--------|------------|--------|
13
+ | Meta | ✅ | ✅ | Done |
14
+ | Types | ✅ | ✅ | Done |
15
+ | Methods | ✅ | ✅ | Done |
16
+ | CFG | ✅ | ✅ | Done |
17
+ | DFG | ✅ | ✅ | Done |
18
+ | Calls | ✅ | ✅ | Done |
19
+ | Taint Sources | ✅ | ✅ | Done |
20
+ | Taint Sinks | ✅ | ✅ | Done |
21
+ | Imports | ✅ | ✅ | Done |
22
+ | Exports | ⬜ | ✅ | Done |
23
+ | Sanitizers | ✅ (separate) | ✅ | Done |
24
+ | Constant Propagation | ✅ | ✅ | Done |
25
+ | Call Resolution | ⬜ | ✅ | Done |
26
+ | Unresolved | ⬜ | ✅ | Done |
27
+ | Enriched | ⬜ | ✅ | Done |
28
+ | DFG Chains | ⬜ | ✅ | Done |
29
+ | Project-Level | ⬜ | ✅ | Done |
30
+ | Findings | ⬜ | ✅ | Done |
31
+
32
+ ---
33
+
34
+ ## Output Format
35
+
36
+ Circle-IR produces JSON with this top-level structure:
37
+
38
+ ```json
39
+ {
40
+ "meta": { },
41
+ "types": [ ],
42
+ "calls": [ ],
43
+ "cfg": { },
44
+ "dfg": { },
45
+ "taint": { },
46
+ "imports": [ ],
47
+ "exports": [ ],
48
+ "unresolved": [ ],
49
+ "enriched": { }
50
+ }
51
+ ```
52
+
53
+ ---
54
+
55
+ ## 1. Meta
56
+
57
+ File metadata and version information.
58
+
59
+ ```typescript
60
+ interface Meta {
61
+ circle_ir: "3.0";
62
+ file: string;
63
+ language: "java" | "c" | "cpp";
64
+ loc: number;
65
+ hash: string; // SHA256 prefix (16 chars)
66
+ package?: string; // PENDING: Add to implementation
67
+ }
68
+ ```
69
+
70
+ **Example:**
71
+ ```json
72
+ {
73
+ "meta": {
74
+ "circle_ir": "3.0",
75
+ "file": "/path/to/UserController.java",
76
+ "language": "java",
77
+ "loc": 150,
78
+ "hash": "a1b2c3d4e5f67890"
79
+ }
80
+ }
81
+ ```
82
+
83
+ **Implementation**: `cpg_extractor.py:160-167`
84
+
85
+ ---
86
+
87
+ ## 2. Types
88
+
89
+ Class, interface, and enum definitions with nested methods.
90
+
91
+ ```typescript
92
+ interface TypeInfo {
93
+ name: string;
94
+ kind: "class" | "interface" | "enum";
95
+ package: string | null;
96
+ extends: string | null;
97
+ implements: string[];
98
+ annotations: string[];
99
+ methods: MethodInfo[];
100
+ fields: FieldInfo[];
101
+ start_line: number;
102
+ end_line: number;
103
+ }
104
+
105
+ interface MethodInfo {
106
+ name: string;
107
+ return_type: string | null;
108
+ parameters: ParameterInfo[];
109
+ annotations: string[];
110
+ modifiers: string[]; // ["public", "static", etc.]
111
+ start_line: number;
112
+ end_line: number;
113
+ }
114
+
115
+ interface ParameterInfo {
116
+ name: string;
117
+ type: string | null;
118
+ annotations: string[]; // ["RequestParam", "PathVariable", etc.]
119
+ }
120
+
121
+ interface FieldInfo {
122
+ name: string;
123
+ type: string | null;
124
+ modifiers: string[];
125
+ annotations: string[];
126
+ }
127
+ ```
128
+
129
+ **Example:**
130
+ ```json
131
+ {
132
+ "types": [
133
+ {
134
+ "name": "UserController",
135
+ "kind": "class",
136
+ "package": "com.example.controllers",
137
+ "extends": "BaseController",
138
+ "implements": ["Auditable"],
139
+ "annotations": ["RestController", "RequestMapping(\"/api/users\")"],
140
+ "methods": [
141
+ {
142
+ "name": "getUser",
143
+ "return_type": "User",
144
+ "parameters": [
145
+ {
146
+ "name": "id",
147
+ "type": "Long",
148
+ "annotations": ["PathVariable"]
149
+ }
150
+ ],
151
+ "annotations": ["GetMapping(\"/{id}\")"],
152
+ "modifiers": ["public"],
153
+ "start_line": 15,
154
+ "end_line": 20
155
+ }
156
+ ],
157
+ "fields": [
158
+ {
159
+ "name": "userService",
160
+ "type": "UserService",
161
+ "modifiers": ["private"],
162
+ "annotations": ["Autowired"]
163
+ }
164
+ ],
165
+ "start_line": 10,
166
+ "end_line": 50
167
+ }
168
+ ]
169
+ }
170
+ ```
171
+
172
+ **Implementation**: `type_extractor.py`
173
+
174
+ ---
175
+
176
+ ## 3. Calls
177
+
178
+ Method invocations with arguments and context.
179
+
180
+ ```typescript
181
+ interface CallInfo {
182
+ method_name: string;
183
+ receiver: string | null;
184
+ arguments: ArgumentInfo[];
185
+ location: {
186
+ line: number;
187
+ column: number;
188
+ };
189
+ in_method: string | null;
190
+ }
191
+
192
+ interface ArgumentInfo {
193
+ position: number; // 0-indexed
194
+ expression: string; // Full expression text
195
+ variable: string | null; // Variable name if simple reference
196
+ literal: string | null; // Literal value if constant
197
+ }
198
+ ```
199
+
200
+ **Example:**
201
+ ```json
202
+ {
203
+ "calls": [
204
+ {
205
+ "method_name": "getParameter",
206
+ "receiver": "request",
207
+ "arguments": [
208
+ {
209
+ "position": 0,
210
+ "expression": "\"id\"",
211
+ "variable": null,
212
+ "literal": "id"
213
+ }
214
+ ],
215
+ "location": {"line": 25, "column": 20},
216
+ "in_method": "handleRequest"
217
+ },
218
+ {
219
+ "method_name": "executeQuery",
220
+ "receiver": "stmt",
221
+ "arguments": [
222
+ {
223
+ "position": 0,
224
+ "expression": "\"SELECT * FROM users WHERE id = \" + id",
225
+ "variable": null,
226
+ "literal": null
227
+ }
228
+ ],
229
+ "location": {"line": 30, "column": 8},
230
+ "in_method": "handleRequest"
231
+ }
232
+ ]
233
+ }
234
+ ```
235
+
236
+ **Implementation**: `call_extractor.py`
237
+
238
+ ### PENDING: Call Resolution
239
+
240
+ ```typescript
241
+ // PENDING: Add to CallInfo
242
+ interface CallInfo {
243
+ // ... existing fields ...
244
+
245
+ resolved?: boolean;
246
+ resolution?: {
247
+ status: "resolved" | "external_method" | "interface_method" | "reflection";
248
+ target?: string; // Fully qualified method name
249
+ candidates?: string[]; // For interface/virtual dispatch
250
+ };
251
+ }
252
+ ```
253
+
254
+ **Example (PENDING):**
255
+ ```json
256
+ {
257
+ "method_name": "matches",
258
+ "receiver": "encoder",
259
+ "resolved": false,
260
+ "resolution": {
261
+ "status": "interface_method",
262
+ "candidates": ["BCryptPasswordEncoder.matches", "Argon2PasswordEncoder.matches"]
263
+ }
264
+ }
265
+ ```
266
+
267
+ ---
268
+
269
+ ## 4. CFG (Control Flow Graph)
270
+
271
+ Basic blocks and control flow edges.
272
+
273
+ ```typescript
274
+ interface CFG {
275
+ blocks: CFGBlock[];
276
+ edges: CFGEdge[];
277
+ }
278
+
279
+ interface CFGBlock {
280
+ id: number;
281
+ type: "entry" | "exit" | "normal" | "conditional" | "loop";
282
+ start_line: number;
283
+ end_line: number;
284
+ }
285
+
286
+ interface CFGEdge {
287
+ from: number;
288
+ to: number;
289
+ type: "sequential" | "true" | "false" | "exception" | "back" | "break" | "continue";
290
+ }
291
+ ```
292
+
293
+ **Example:**
294
+ ```json
295
+ {
296
+ "cfg": {
297
+ "blocks": [
298
+ {"id": 1, "type": "entry", "start_line": 15, "end_line": 15},
299
+ {"id": 2, "type": "conditional", "start_line": 16, "end_line": 16},
300
+ {"id": 3, "type": "normal", "start_line": 17, "end_line": 18},
301
+ {"id": 4, "type": "normal", "start_line": 20, "end_line": 21},
302
+ {"id": 5, "type": "exit", "start_line": 23, "end_line": 23}
303
+ ],
304
+ "edges": [
305
+ {"from": 1, "to": 2, "type": "sequential"},
306
+ {"from": 2, "to": 3, "type": "true"},
307
+ {"from": 2, "to": 4, "type": "false"},
308
+ {"from": 3, "to": 5, "type": "sequential"},
309
+ {"from": 4, "to": 5, "type": "sequential"}
310
+ ]
311
+ }
312
+ }
313
+ ```
314
+
315
+ **Implementation**: `cpg_extractor.py:181-395`
316
+
317
+ ---
318
+
319
+ ## 5. DFG (Data Flow Graph)
320
+
321
+ Variable definitions and uses.
322
+
323
+ ```typescript
324
+ interface DFG {
325
+ defs: DFGDef[];
326
+ uses: DFGUse[];
327
+ chains?: DFGChain[]; // PENDING
328
+ }
329
+
330
+ interface DFGDef {
331
+ id: number;
332
+ variable: string;
333
+ line: number;
334
+ kind: "param" | "local" | "field" | "return";
335
+ }
336
+
337
+ interface DFGUse {
338
+ id: number;
339
+ variable: string;
340
+ line: number;
341
+ def_id: number | null; // Reaching definition
342
+ }
343
+ ```
344
+
345
+ **Example:**
346
+ ```json
347
+ {
348
+ "dfg": {
349
+ "defs": [
350
+ {"id": 1, "variable": "id", "line": 15, "kind": "param"},
351
+ {"id": 2, "variable": "name", "line": 16, "kind": "local"},
352
+ {"id": 3, "variable": "sql", "line": 17, "kind": "local"}
353
+ ],
354
+ "uses": [
355
+ {"id": 1, "variable": "request", "line": 16, "def_id": null},
356
+ {"id": 2, "variable": "name", "line": 17, "def_id": 2},
357
+ {"id": 3, "variable": "sql", "line": 18, "def_id": 3}
358
+ ]
359
+ }
360
+ }
361
+ ```
362
+
363
+ **Implementation**: `cpg_extractor.py:398-463`
364
+
365
+ ### PENDING: DFG Chains
366
+
367
+ ```typescript
368
+ // PENDING: Add explicit def-use chains
369
+ interface DFGChain {
370
+ from_def: number; // Definition ID
371
+ to_def: number; // Downstream definition ID
372
+ via: string; // Variable name
373
+ }
374
+ ```
375
+
376
+ **Example (PENDING):**
377
+ ```json
378
+ {
379
+ "dfg": {
380
+ "chains": [
381
+ {"from_def": 1, "to_def": 3, "via": "username"},
382
+ {"from_def": 2, "to_def": 4, "via": "password"}
383
+ ]
384
+ }
385
+ }
386
+ ```
387
+
388
+ ---
389
+
390
+ ## 6. Taint
391
+
392
+ Identified taint sources and sinks.
393
+
394
+ ```typescript
395
+ interface Taint {
396
+ sources: TaintSource[];
397
+ sinks: TaintSink[];
398
+ sanitizers?: TaintSanitizer[]; // PENDING: move to this section
399
+ }
400
+
401
+ interface TaintSource {
402
+ type: string; // See source types below
403
+ location: string; // Human-readable description
404
+ severity: "high" | "medium" | "low";
405
+ line: number;
406
+ confidence: number; // 0.0 - 1.0
407
+ }
408
+
409
+ interface TaintSink {
410
+ type: string; // See sink types below
411
+ cwe: string; // "CWE-89", etc.
412
+ location: string;
413
+ line: number;
414
+ confidence: number;
415
+ }
416
+ ```
417
+
418
+ ### Source Types
419
+
420
+ | Type | Description | Example |
421
+ |------|-------------|---------|
422
+ | `http_param` | HTTP request parameter | `request.getParameter()` |
423
+ | `http_body` | HTTP request body | `@RequestBody` |
424
+ | `http_header` | HTTP header value | `request.getHeader()` |
425
+ | `http_cookie` | HTTP cookie | `request.getCookies()` |
426
+ | `http_path` | URL path variable | `@PathVariable` |
427
+ | `io_input` | File/console input | `BufferedReader.readLine()` |
428
+ | `env_input` | Environment variable | `System.getenv()` |
429
+ | `db_input` | Database result | `ResultSet.getString()` |
430
+
431
+ ### Sink Types
432
+
433
+ | Type | CWE | Description |
434
+ |------|-----|-------------|
435
+ | `sql_injection` | CWE-89 | SQL query execution |
436
+ | `command_injection` | CWE-78 | OS command execution |
437
+ | `path_traversal` | CWE-22 | File path manipulation |
438
+ | `xss` | CWE-79 | Cross-site scripting |
439
+ | `xxe` | CWE-611 | XML external entity |
440
+ | `deserialization` | CWE-502 | Unsafe deserialization |
441
+ | `ldap_injection` | CWE-90 | LDAP injection |
442
+ | `xpath_injection` | CWE-643 | XPath injection |
443
+ | `ssrf` | CWE-918 | Server-side request forgery |
444
+
445
+ **Example:**
446
+ ```json
447
+ {
448
+ "taint": {
449
+ "sources": [
450
+ {
451
+ "type": "http_param",
452
+ "location": "@RequestParam id in getUser",
453
+ "severity": "high",
454
+ "line": 13,
455
+ "confidence": 1.0
456
+ }
457
+ ],
458
+ "sinks": [
459
+ {
460
+ "type": "sql_injection",
461
+ "cwe": "CWE-89",
462
+ "location": "executeQuery(sql) in getUser",
463
+ "line": 17,
464
+ "confidence": 1.0
465
+ }
466
+ ]
467
+ }
468
+ }
469
+ ```
470
+
471
+ **Implementation**: `taint_patterns.py`
472
+
473
+ ### PENDING: Sanitizers in Taint Section
474
+
475
+ ```typescript
476
+ // PENDING: Add to taint section
477
+ interface TaintSanitizer {
478
+ type: string;
479
+ method: string;
480
+ line: number;
481
+ sanitizes: string[]; // Which sink types it sanitizes
482
+ }
483
+ ```
484
+
485
+ **Note**: Sanitizers currently implemented in separate module `taint/sanitizers.py`
486
+
487
+ ---
488
+
489
+ ## 6.1. Constant Propagation
490
+
491
+ Static analysis engine that tracks constant values, detects dead code, and refines taint analysis by eliminating false positives when variables hold known constant values.
492
+
493
+ ```typescript
494
+ interface ConstantValue {
495
+ value: string | number | boolean | null;
496
+ type: 'string' | 'int' | 'float' | 'char' | 'bool' | 'null' | 'unknown';
497
+ sourceLine: number;
498
+ }
499
+
500
+ interface ConstantPropagationResult {
501
+ symbols: Map<string, ConstantValue>; // Variable → constant value
502
+ tainted: Set<string>; // Tainted variable names
503
+ unreachableLines: Set<number>; // Dead code lines
504
+ taintedCollections: Map<string, Set<string>>; // Collection → tainted keys
505
+ }
506
+ ```
507
+
508
+ ### Features
509
+
510
+ | Feature | Description |
511
+ |---------|-------------|
512
+ | **Variable Tracking** | Tracks assignments: `bar = "constant"` vs `bar = taintedParam` |
513
+ | **Expression Evaluation** | Evaluates arithmetic, comparisons, and string methods |
514
+ | **Dead Code Detection** | Evaluates if/switch/ternary conditions, marks unreachable branches |
515
+ | **Collection Taint Tracking** | Per-key tracking for map.put/map.get operations |
516
+ | **Iterative Refinement** | Second pass with fixpoint to re-evaluate taint with updated symbols |
517
+ | **Conditional Branch Handling** | Conservative taint preservation in unknown-condition branches |
518
+
519
+ ### Taint Source Detection
520
+
521
+ Recognizes HTTP input methods as taint sources:
522
+ - `request.getParameter()`, `request.getHeader()`, `request.getCookies()`
523
+ - `request.getParameterMap()`, `request.getPathInfo()`, `request.getRequestURI()`
524
+ - Scanner/BufferedReader input methods
525
+
526
+ ### Dead Code Detection
527
+
528
+ Evaluates conditions to determine unreachable code:
529
+
530
+ ```java
531
+ if (false) {
532
+ // Marked as unreachable - sinks here are false positives
533
+ }
534
+
535
+ int x = 10;
536
+ if (x > 5) {
537
+ // Always executed
538
+ } else {
539
+ // Marked as unreachable
540
+ }
541
+ ```
542
+
543
+ ### False Positive Elimination
544
+
545
+ The `isFalsePositive()` helper identifies:
546
+ - `sink_in_dead_code` - Sink is in unreachable code block
547
+ - `variable_is_constant` - Variable has known constant value
548
+
549
+ **Implementation**: `src/analysis/constant-propagation.ts`
550
+
551
+ ---
552
+
553
+ ## 7. Imports
554
+
555
+ Import declarations for cross-file resolution.
556
+
557
+ ```typescript
558
+ interface ImportInfo {
559
+ imported_name: string;
560
+ from_package: string | null;
561
+ alias: string | null;
562
+ is_wildcard: boolean;
563
+ line_number: number | null;
564
+ }
565
+ ```
566
+
567
+ **Example:**
568
+ ```json
569
+ {
570
+ "imports": [
571
+ {
572
+ "imported_name": "ArrayList",
573
+ "from_package": "java.util",
574
+ "alias": null,
575
+ "is_wildcard": false,
576
+ "line_number": 3
577
+ },
578
+ {
579
+ "imported_name": "*",
580
+ "from_package": "javax.servlet.http",
581
+ "alias": null,
582
+ "is_wildcard": true,
583
+ "line_number": 4
584
+ }
585
+ ]
586
+ }
587
+ ```
588
+
589
+ **Implementation**: `cpg_extractor.py:465-517`
590
+
591
+ ---
592
+
593
+ ## 8. Exports (PENDING)
594
+
595
+ Symbols exported by this file.
596
+
597
+ ```typescript
598
+ // PENDING: Not implemented
599
+ interface ExportInfo {
600
+ symbol: string;
601
+ kind: "class" | "interface" | "method" | "field";
602
+ visibility: "public" | "protected" | "package";
603
+ }
604
+ ```
605
+
606
+ **Example (PENDING):**
607
+ ```json
608
+ {
609
+ "exports": [
610
+ {"symbol": "UserController", "kind": "class", "visibility": "public"},
611
+ {"symbol": "getUser", "kind": "method", "visibility": "public"}
612
+ ]
613
+ }
614
+ ```
615
+
616
+ ---
617
+
618
+ ## 9. Unresolved (PENDING)
619
+
620
+ Items that require LLM resolution.
621
+
622
+ ```typescript
623
+ // PENDING: Not implemented
624
+ interface UnresolvedItem {
625
+ type: "virtual_dispatch" | "taint_propagation" | "reflection" | "dynamic_call";
626
+ call_id?: number;
627
+ reason: string;
628
+ context: {
629
+ code: string;
630
+ line: number;
631
+ candidates?: string[];
632
+ };
633
+ llm_question: string;
634
+ }
635
+ ```
636
+
637
+ **Example (PENDING):**
638
+ ```json
639
+ {
640
+ "unresolved": [
641
+ {
642
+ "type": "virtual_dispatch",
643
+ "call_id": 5,
644
+ "reason": "interface_multiple_impls",
645
+ "context": {
646
+ "code": "encoder.matches(password, hash)",
647
+ "line": 25,
648
+ "candidates": ["BCryptPasswordEncoder.matches", "Argon2PasswordEncoder.matches"]
649
+ },
650
+ "llm_question": "Which PasswordEncoder implementation is used?"
651
+ },
652
+ {
653
+ "type": "taint_propagation",
654
+ "reason": "collection_uncertainty",
655
+ "context": {
656
+ "code": "users.add(user); ... users.get(0)",
657
+ "line": 30
658
+ },
659
+ "llm_question": "Does taint propagate through this collection?"
660
+ }
661
+ ]
662
+ }
663
+ ```
664
+
665
+ ---
666
+
667
+ ## 10. Enriched (PENDING)
668
+
669
+ LLM-enhanced metadata after enrichment pass.
670
+
671
+ ```typescript
672
+ // PENDING: Not implemented
673
+ interface Enriched {
674
+ functions?: EnrichedFunction[];
675
+ additional_sources?: TaintSource[];
676
+ additional_sinks?: TaintSink[];
677
+ resolved_calls?: ResolvedCall[];
678
+ }
679
+
680
+ interface EnrichedFunction {
681
+ method_name: string;
682
+ role: "controller" | "service" | "repository" | "utility";
683
+ risk: "high" | "medium" | "low";
684
+ trust_boundary: "entry_point" | "internal" | "external";
685
+ summary: string;
686
+ }
687
+
688
+ interface ResolvedCall {
689
+ call_id: number;
690
+ resolved_to: string;
691
+ confidence: number;
692
+ reason: string;
693
+ }
694
+ ```
695
+
696
+ **Example (PENDING):**
697
+ ```json
698
+ {
699
+ "enriched": {
700
+ "functions": [
701
+ {
702
+ "method_name": "authenticate",
703
+ "role": "service",
704
+ "risk": "high",
705
+ "trust_boundary": "entry_point",
706
+ "summary": "Validates user credentials against database"
707
+ }
708
+ ],
709
+ "additional_sources": [
710
+ {
711
+ "type": "deserialization",
712
+ "location": "ObjectInputStream.readObject at line 88",
713
+ "severity": "high",
714
+ "line": 88,
715
+ "confidence": 0.85
716
+ }
717
+ ],
718
+ "resolved_calls": [
719
+ {
720
+ "call_id": 5,
721
+ "resolved_to": "BCryptPasswordEncoder.matches",
722
+ "confidence": 0.85,
723
+ "reason": "BCrypt is Spring Security default"
724
+ }
725
+ ]
726
+ }
727
+ }
728
+ ```
729
+
730
+ ---
731
+
732
+ ## 11. Project-Level Schema (PENDING)
733
+
734
+ For multi-file analysis.
735
+
736
+ ### Project Metadata (PENDING)
737
+
738
+ ```typescript
739
+ interface ProjectMeta {
740
+ name: string;
741
+ root: string;
742
+ language: string;
743
+ framework: string;
744
+ framework_version: string;
745
+ build_tool: "maven" | "gradle" | "ant";
746
+ total_files: number;
747
+ total_loc: number;
748
+ }
749
+ ```
750
+
751
+ ### Cross-File Calls (PENDING)
752
+
753
+ ```typescript
754
+ interface CrossFileCall {
755
+ id: string;
756
+ from: {
757
+ file: string;
758
+ method: string;
759
+ line: number;
760
+ };
761
+ to: {
762
+ file: string;
763
+ method: string;
764
+ line: number;
765
+ };
766
+ args_mapping: ArgMapping[];
767
+ }
768
+
769
+ interface ArgMapping {
770
+ caller_arg: number;
771
+ callee_param: number;
772
+ taint_propagates: boolean;
773
+ }
774
+ ```
775
+
776
+ ### Type Hierarchy (PENDING)
777
+
778
+ ```typescript
779
+ interface TypeHierarchy {
780
+ classes: Record<string, {
781
+ extends: string | null;
782
+ implements: string[];
783
+ }>;
784
+ interfaces: Record<string, {
785
+ implementations: string[];
786
+ }>;
787
+ }
788
+ ```
789
+
790
+ ### Taint Paths (PENDING)
791
+
792
+ ```typescript
793
+ interface TaintPath {
794
+ id: string;
795
+ source: {
796
+ file: string;
797
+ line: number;
798
+ type: string;
799
+ };
800
+ sink: {
801
+ file: string;
802
+ line: number;
803
+ type: string;
804
+ cwe: string;
805
+ };
806
+ hops: TaintHop[];
807
+ sanitizers_in_path: string[];
808
+ path_exists: boolean;
809
+ confidence: number;
810
+ }
811
+
812
+ interface TaintHop {
813
+ file: string;
814
+ method: string;
815
+ line: number;
816
+ code: string;
817
+ variable: string;
818
+ }
819
+ ```
820
+
821
+ ---
822
+
823
+ ## 12. Findings (PENDING)
824
+
825
+ Final vulnerability reports.
826
+
827
+ ```typescript
828
+ // PENDING: Not implemented
829
+ interface Finding {
830
+ id: string;
831
+ type: string; // sql_injection, xss, etc.
832
+ cwe: string;
833
+ severity: "critical" | "high" | "medium" | "low";
834
+ confidence: number;
835
+ source: {
836
+ file: string;
837
+ line: number;
838
+ code: string;
839
+ };
840
+ sink: {
841
+ file: string;
842
+ line: number;
843
+ code: string;
844
+ };
845
+ path?: TaintHop[];
846
+ exploitable: boolean;
847
+ explanation: string;
848
+ remediation: string;
849
+ verification: {
850
+ graph_path_exists: boolean;
851
+ llm_verified: boolean;
852
+ llm_confidence: number;
853
+ };
854
+ }
855
+ ```
856
+
857
+ **Example (PENDING):**
858
+ ```json
859
+ {
860
+ "findings": [
861
+ {
862
+ "id": "vuln1",
863
+ "type": "sql_injection",
864
+ "cwe": "CWE-89",
865
+ "severity": "critical",
866
+ "confidence": 0.95,
867
+ "source": {
868
+ "file": "UserController.java",
869
+ "line": 35,
870
+ "code": "request.getParameter(\"id\")"
871
+ },
872
+ "sink": {
873
+ "file": "UserRepository.java",
874
+ "line": 45,
875
+ "code": "em.createNativeQuery(sql)"
876
+ },
877
+ "exploitable": true,
878
+ "explanation": "User input flows directly to SQL without sanitization",
879
+ "remediation": "Use parameterized query with setParameter()",
880
+ "verification": {
881
+ "graph_path_exists": true,
882
+ "llm_verified": true,
883
+ "llm_confidence": 0.95
884
+ }
885
+ }
886
+ ]
887
+ }
888
+ ```
889
+
890
+ ---
891
+
892
+ ## API Response Format
893
+
894
+ For CF Workers deployment.
895
+
896
+ ```typescript
897
+ interface AnalysisResponse {
898
+ success: boolean;
899
+ analysis: {
900
+ sources: TaintSource[];
901
+ sinks: TaintSink[];
902
+ vulnerabilities: Vulnerability[];
903
+ };
904
+ meta: {
905
+ parseTimeMs: number;
906
+ analysisTimeMs: number;
907
+ totalTimeMs: number;
908
+ };
909
+ }
910
+
911
+ interface Vulnerability {
912
+ type: string;
913
+ cwe: string;
914
+ severity: "critical" | "high" | "medium" | "low";
915
+ source: { line: number; type: string };
916
+ sink: { line: number; type: string };
917
+ confidence: number;
918
+ path?: string[];
919
+ }
920
+ ```
921
+
922
+ **Example:**
923
+ ```json
924
+ {
925
+ "success": true,
926
+ "analysis": {
927
+ "sources": [
928
+ {"line": 13, "type": "http_param", "location": "@RequestParam id"}
929
+ ],
930
+ "sinks": [
931
+ {"line": 17, "type": "sql_injection", "cwe": "CWE-89"}
932
+ ],
933
+ "vulnerabilities": [
934
+ {
935
+ "type": "sql_injection",
936
+ "cwe": "CWE-89",
937
+ "severity": "critical",
938
+ "source": {"line": 13, "type": "http_param"},
939
+ "sink": {"line": 17, "type": "sql_injection"},
940
+ "confidence": 0.95,
941
+ "path": ["id", "sql"]
942
+ }
943
+ ]
944
+ },
945
+ "meta": {
946
+ "parseTimeMs": 12,
947
+ "analysisTimeMs": 45,
948
+ "totalTimeMs": 57
949
+ }
950
+ }
951
+ ```
952
+
953
+ ---
954
+
955
+ ## TODO: TypeScript Implementation
956
+
957
+ ### Phase 1: Core (Must Have)
958
+ - [x] Meta extraction
959
+ - [x] Type extraction (classes, interfaces, methods, fields)
960
+ - [x] Call extraction (method invocations, arguments)
961
+ - [x] CFG construction (blocks, edges)
962
+ - [x] DFG construction (defs, uses)
963
+ - [x] Taint source matching
964
+ - [x] Taint sink matching
965
+ - [x] Import extraction
966
+ - [x] JSON serialization matching spec
967
+
968
+ ### Phase 2: Enhanced (Should Have)
969
+ - [x] Export extraction
970
+ - [x] Call resolution tracking
971
+ - [x] Sanitizer detection
972
+ - [x] DFG chains computation
973
+
974
+ ### Phase 3: LLM Integration (Nice to Have)
975
+ - [x] Unresolved section population
976
+ - [x] Enriched section from LLM
977
+ - [x] Finding generation
978
+
979
+ ### Phase 4: Project-Level (Done)
980
+ - [x] Cross-file call graph
981
+ - [x] Type hierarchy
982
+ - [x] Taint path enumeration
983
+ - [x] Multi-file analysis
984
+
985
+ ---
986
+
987
+ ## File Locations
988
+
989
+ ### Python Reference
990
+ | Component | File |
991
+ |-----------|------|
992
+ | CPG Extractor | `src/circle_ir/core/cpg_extractor.py` |
993
+ | Type Extractor | `src/circle_ir/core/type_extractor.py` |
994
+ | Call Extractor | `src/circle_ir/core/call_extractor.py` |
995
+ | Taint Patterns | `src/circle_ir/core/taint_patterns.py` |
996
+ | Constant Propagation | `src/circle_ir/core/constant_propagation.py` |
997
+ | Source Extractor | `src/circle_ir/core/source_extractor.py` |
998
+ | Sanitizers | `src/circle_ir/taint/sanitizers.py` |
999
+ | Path Finder | `src/circle_ir/taint/path_finder.py` |
1000
+ | DFG Verifier | `src/circle_ir/taint/dfg_verifier.py` |
1001
+
1002
+ ### TypeScript Implementation
1003
+ | Component | File |
1004
+ |-----------|------|
1005
+ | Parser | `src/core/parser.ts` |
1006
+ | Analyzer | `src/analyzer.ts` |
1007
+ | Constant Propagation | `src/analysis/constant-propagation.ts` |
1008
+ | Taint Propagation | `src/analysis/taint-propagation.ts` |
1009
+ | Taint Matcher | `src/analysis/taint-matcher.ts` |
1010
+ | Config Loader | `src/analysis/config-loader.ts` |
1011
+ | Benchmark Runner | `src/benchmark/runner.ts` |
1012
+
1013
+ ---
1014
+
1015
+ ## Version History
1016
+
1017
+ | Version | Date | Changes |
1018
+ |---------|------|---------|
1019
+ | 3.0.2 | 2025-01-25 | Added Constant Propagation engine documentation |
1020
+ | 3.0.1 | 2025-01-25 | Consolidated spec with implementation status |
1021
+ | 3.0.0 | 2025-01 | Initial spec, JSON format, three-step pipeline |