al-sem 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (231) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +361 -0
  3. package/package.json +64 -0
  4. package/scripts/d40-diff.ts +44 -0
  5. package/scripts/fetch-native-parser.ts +179 -0
  6. package/scripts/precision-sample.ts +99 -0
  7. package/scripts/precision-study.ts +42 -0
  8. package/scripts/precision-tabulate.ts +52 -0
  9. package/src/cli/baseline.ts +31 -0
  10. package/src/cli/diff.ts +199 -0
  11. package/src/cli/events-chains.ts +56 -0
  12. package/src/cli/events-fanout.ts +87 -0
  13. package/src/cli/exit-code.ts +30 -0
  14. package/src/cli/fingerprint-indexes.ts +130 -0
  15. package/src/cli/fingerprint-query.ts +543 -0
  16. package/src/cli/fingerprint-witness.ts +493 -0
  17. package/src/cli/fingerprint.ts +292 -0
  18. package/src/cli/format-compact-json.ts +45 -0
  19. package/src/cli/format-events.ts +77 -0
  20. package/src/cli/format-fingerprint.ts +295 -0
  21. package/src/cli/format-html.ts +503 -0
  22. package/src/cli/format-json.ts +13 -0
  23. package/src/cli/format-policy.ts +95 -0
  24. package/src/cli/format-sarif.ts +186 -0
  25. package/src/cli/format-terminal.ts +153 -0
  26. package/src/cli/index.ts +566 -0
  27. package/src/cli/policy.ts +204 -0
  28. package/src/config/roots-config.ts +302 -0
  29. package/src/deps/cache-versions.ts +74 -0
  30. package/src/deps/canonical-json.ts +27 -0
  31. package/src/deps/dependency-artifact.ts +144 -0
  32. package/src/deps/dependency-cache.ts +262 -0
  33. package/src/deps/dependency-dag.ts +128 -0
  34. package/src/deps/dependency-package-discovery.ts +85 -0
  35. package/src/deps/dependency-pipeline.ts +483 -0
  36. package/src/deps/dependency-projection.ts +211 -0
  37. package/src/deps/dependency-resolver.ts +154 -0
  38. package/src/deps/workspace-dependencies.ts +114 -0
  39. package/src/detectors/capability-query.ts +145 -0
  40. package/src/detectors/confidence.ts +52 -0
  41. package/src/detectors/d1-db-op-in-loop.ts +457 -0
  42. package/src/detectors/d10-self-modifying-loop.ts +114 -0
  43. package/src/detectors/d11-modify-without-get.ts +129 -0
  44. package/src/detectors/d12-dead-integration-event.ts +81 -0
  45. package/src/detectors/d13-cross-app-internal-call.ts +105 -0
  46. package/src/detectors/d14-dead-routine.ts +151 -0
  47. package/src/detectors/d16-obsolete-routine-call.ts +94 -0
  48. package/src/detectors/d17-min-version-drift.ts +157 -0
  49. package/src/detectors/d18-constant-filter-in-loop.ts +151 -0
  50. package/src/detectors/d19-unused-parameter.ts +116 -0
  51. package/src/detectors/d2-event-fanout-in-loop.ts +240 -0
  52. package/src/detectors/d20-unreachable-after-exit.ts +92 -0
  53. package/src/detectors/d21-read-without-load.ts +128 -0
  54. package/src/detectors/d22-flowfield-without-calcfields.ts +168 -0
  55. package/src/detectors/d29-subscriber-modify-on-event-record.ts +163 -0
  56. package/src/detectors/d3-load-state.ts +72 -0
  57. package/src/detectors/d3-missing-setloadfields.ts +234 -0
  58. package/src/detectors/d32-constant-boolean-parameter.ts +185 -0
  59. package/src/detectors/d33-unfiltered-bulk-write.ts +173 -0
  60. package/src/detectors/d34-commit-in-loop.ts +206 -0
  61. package/src/detectors/d35-commit-in-event-subscriber.ts +138 -0
  62. package/src/detectors/d36-late-setloadfields.ts +162 -0
  63. package/src/detectors/d37-validate-without-persist.ts +271 -0
  64. package/src/detectors/d38-subscriber-to-obsolete-event.ts +140 -0
  65. package/src/detectors/d39-record-left-dirty-across-chain.ts +165 -0
  66. package/src/detectors/d4-repeated-lookup-in-loop.ts +128 -0
  67. package/src/detectors/d40-transitive-load-missing.ts +217 -0
  68. package/src/detectors/d41-transitive-filter-loss.ts +200 -0
  69. package/src/detectors/d42-cross-call-wrong-setloadfields.ts +243 -0
  70. package/src/detectors/d43-event-ishandled-skip.ts +257 -0
  71. package/src/detectors/d44-event-multi-subscriber-overlap.ts +223 -0
  72. package/src/detectors/d45-event-transitive-table-exposure.ts +159 -0
  73. package/src/detectors/d5-set-based-opportunity.ts +162 -0
  74. package/src/detectors/d7-recursive-event-expansion.ts +151 -0
  75. package/src/detectors/d8-commit-in-transaction.ts +132 -0
  76. package/src/detectors/d9-transaction-span-summary.ts +107 -0
  77. package/src/detectors/detector-context.ts +121 -0
  78. package/src/detectors/finding-grouping.ts +61 -0
  79. package/src/detectors/path-merge.ts +174 -0
  80. package/src/detectors/registry.ts +176 -0
  81. package/src/detectors/table-display.ts +42 -0
  82. package/src/diff/diff-abi.ts +195 -0
  83. package/src/diff/diff-capabilities.ts +179 -0
  84. package/src/diff/diff-engine.ts +146 -0
  85. package/src/diff/diff-events.ts +323 -0
  86. package/src/diff/diff-identity.ts +73 -0
  87. package/src/diff/diff-indexes.ts +199 -0
  88. package/src/diff/diff-permissions.ts +260 -0
  89. package/src/diff/diff-policy.ts +101 -0
  90. package/src/diff/diff-preflight.ts +66 -0
  91. package/src/diff/diff-renames.ts +104 -0
  92. package/src/diff/diff-schema.ts +232 -0
  93. package/src/diff/format-diff.ts +148 -0
  94. package/src/engine/attribute-parser.ts +50 -0
  95. package/src/engine/capability-cone.ts +531 -0
  96. package/src/engine/combined-graph.ts +357 -0
  97. package/src/engine/control-flow-walker.ts +1317 -0
  98. package/src/engine/dispatch-sites.ts +199 -0
  99. package/src/engine/effect-lattice.ts +81 -0
  100. package/src/engine/entry-points.ts +57 -0
  101. package/src/engine/event-flow.ts +524 -0
  102. package/src/engine/event-relay.ts +92 -0
  103. package/src/engine/op-classification.ts +92 -0
  104. package/src/engine/path-walker.ts +189 -0
  105. package/src/engine/reverse-call-graph.ts +23 -0
  106. package/src/engine/root-classifier-overlay.ts +194 -0
  107. package/src/engine/root-classifier.ts +135 -0
  108. package/src/engine/scc.ts +110 -0
  109. package/src/engine/source-anchor.ts +25 -0
  110. package/src/engine/summary-context.ts +104 -0
  111. package/src/engine/summary-engine.ts +296 -0
  112. package/src/engine/summary-runner.ts +560 -0
  113. package/src/engine/transaction-spans.ts +112 -0
  114. package/src/engine/uncertainty-util.ts +54 -0
  115. package/src/hash.ts +31 -0
  116. package/src/index/attribute-from-node.ts +141 -0
  117. package/src/index/callee-from-node.ts +181 -0
  118. package/src/index/capability/background.ts +90 -0
  119. package/src/index/capability/commit.ts +44 -0
  120. package/src/index/capability/dispatch.ts +164 -0
  121. package/src/index/capability/events.ts +65 -0
  122. package/src/index/capability/extractor.ts +124 -0
  123. package/src/index/capability/file-blob.ts +137 -0
  124. package/src/index/capability/http.ts +159 -0
  125. package/src/index/capability/hyperlink.ts +60 -0
  126. package/src/index/capability/isolated-storage.ts +179 -0
  127. package/src/index/capability/table.ts +113 -0
  128. package/src/index/capability/telemetry.ts +84 -0
  129. package/src/index/capability/ui.ts +55 -0
  130. package/src/index/capability/value-source.ts +202 -0
  131. package/src/index/expression-from-node.ts +117 -0
  132. package/src/index/indexer.ts +102 -0
  133. package/src/index/intraprocedural-body.ts +1467 -0
  134. package/src/index/intraprocedural-ops.ts +253 -0
  135. package/src/index/intraprocedural-refs.ts +188 -0
  136. package/src/index/object-indexer.ts +279 -0
  137. package/src/index/routine-indexer.ts +282 -0
  138. package/src/index/routine-signature.ts +46 -0
  139. package/src/index/variable-indexer.ts +134 -0
  140. package/src/index/variable-initializer-extractor.ts +155 -0
  141. package/src/index/variable-type-normalizer.ts +83 -0
  142. package/src/index.ts +267 -0
  143. package/src/mcp/server.ts +72 -0
  144. package/src/mcp/session.ts +49 -0
  145. package/src/mcp/tools/explain-path.ts +75 -0
  146. package/src/mcp/tools/get-analysis-health.ts +62 -0
  147. package/src/mcp/tools/get-finding.ts +47 -0
  148. package/src/mcp/tools/get-routine-summary.ts +126 -0
  149. package/src/mcp/tools/list-findings.ts +85 -0
  150. package/src/mcp/tools/list-hotspots.ts +78 -0
  151. package/src/mcp/tools/list-rollups.ts +103 -0
  152. package/src/mcp/tools/validators.ts +25 -0
  153. package/src/model/attributes.ts +120 -0
  154. package/src/model/callee.ts +45 -0
  155. package/src/model/capability.ts +187 -0
  156. package/src/model/coverage.ts +85 -0
  157. package/src/model/entities.ts +628 -0
  158. package/src/model/expression.ts +98 -0
  159. package/src/model/finding.ts +110 -0
  160. package/src/model/graph-edge.ts +93 -0
  161. package/src/model/graph.ts +62 -0
  162. package/src/model/identity.ts +81 -0
  163. package/src/model/ids.ts +90 -0
  164. package/src/model/index.ts +13 -0
  165. package/src/model/model.ts +51 -0
  166. package/src/model/permission.ts +76 -0
  167. package/src/model/root-classification.ts +116 -0
  168. package/src/model/stable-identity.ts +102 -0
  169. package/src/model/summary.ts +96 -0
  170. package/src/parser/ast.ts +82 -0
  171. package/src/parser/native/ffi.ts +145 -0
  172. package/src/parser/native/parse-index-pool.ts +148 -0
  173. package/src/parser/native/parse-index-worker.ts +94 -0
  174. package/src/parser/native/wrapper.ts +353 -0
  175. package/src/parser/parser-init.ts +43 -0
  176. package/src/perf/profiler.ts +66 -0
  177. package/src/policy/policy-default.yaml +83 -0
  178. package/src/policy/policy-engine.ts +339 -0
  179. package/src/policy/policy-loader.ts +257 -0
  180. package/src/policy/policy-schema.json +379 -0
  181. package/src/policy/policy-types.ts +81 -0
  182. package/src/policy/predicate-compiler.ts +151 -0
  183. package/src/policy/predicate-evaluator.ts +267 -0
  184. package/src/policy/predicate-fields.ts +439 -0
  185. package/src/projection/actionable-anchor.ts +48 -0
  186. package/src/projection/finding-filters.ts +44 -0
  187. package/src/projection/finding-fingerprint.ts +54 -0
  188. package/src/projection/finding-groups.ts +41 -0
  189. package/src/projection/finding-summary.ts +110 -0
  190. package/src/projection/rollup-findings.ts +105 -0
  191. package/src/providers/discover.ts +88 -0
  192. package/src/providers/external.ts +46 -0
  193. package/src/providers/types.ts +36 -0
  194. package/src/providers/workspace.ts +117 -0
  195. package/src/resolve/call-resolver.ts +117 -0
  196. package/src/resolve/coverage.ts +61 -0
  197. package/src/resolve/event-graph.ts +166 -0
  198. package/src/resolve/implicit-edges.ts +53 -0
  199. package/src/resolve/record-types.ts +36 -0
  200. package/src/resolve/resolver.ts +23 -0
  201. package/src/resolve/semantic-graph.ts +29 -0
  202. package/src/resolve/symbol-table.ts +69 -0
  203. package/src/snapshot/app-snapshot.ts +74 -0
  204. package/src/snapshot/compose.ts +100 -0
  205. package/src/snapshot/derive/callsite-evidence.ts +76 -0
  206. package/src/snapshot/derive/capability-facts.ts +70 -0
  207. package/src/snapshot/derive/contracts.ts +131 -0
  208. package/src/snapshot/derive/coverage.ts +35 -0
  209. package/src/snapshot/derive/event-declarations.ts +140 -0
  210. package/src/snapshot/derive/identity-table.ts +58 -0
  211. package/src/snapshot/derive/inputs.ts +91 -0
  212. package/src/snapshot/derive/operation-evidence.ts +70 -0
  213. package/src/snapshot/derive/permissions.ts +186 -0
  214. package/src/snapshot/derive/root-classifications.ts +56 -0
  215. package/src/snapshot/derive/schema.ts +130 -0
  216. package/src/snapshot/derive/typed-edges.ts +60 -0
  217. package/src/snapshot/derive/workspace-fingerprint.ts +19 -0
  218. package/src/snapshot/deserialize.ts +40 -0
  219. package/src/snapshot/serialize-cbor-gz.ts +12 -0
  220. package/src/snapshot/serialize-cbor.ts +19 -0
  221. package/src/snapshot/serialize-json.ts +22 -0
  222. package/src/snapshot/shard.ts +134 -0
  223. package/src/snapshot/types.ts +181 -0
  224. package/src/symbols/app-manifest.ts +96 -0
  225. package/src/symbols/app-package-zip.ts +50 -0
  226. package/src/symbols/embedded-source-reader.ts +41 -0
  227. package/src/symbols/package-hash.ts +81 -0
  228. package/src/symbols/symbol-reader.ts +101 -0
  229. package/src/symbols/symbol-reference-parser.ts +378 -0
  230. package/src/symbols/symbol-reference-reader.ts +27 -0
  231. package/tsconfig.json +18 -0
@@ -0,0 +1,483 @@
1
+ import { buildCombinedGraph } from "../engine/combined-graph.ts";
2
+ import { runSummaries } from "../engine/summary-runner.ts";
3
+ import { sha256Hex } from "../hash.ts";
4
+ import { indexObjects } from "../index/object-indexer.ts";
5
+ import { indexRoutines } from "../index/routine-indexer.ts";
6
+ import type { App, Routine } from "../model/entities.ts";
7
+ import type { Diagnostic } from "../model/finding.ts";
8
+ import type { AppIdentity, ModelIdentity } from "../model/identity.ts";
9
+ import type { SemanticIndex, SemanticModel } from "../model/model.ts";
10
+ import { ParseIndexPool, defaultPoolSize } from "../parser/native/parse-index-pool.ts";
11
+ import { NativeParserUnavailableError, parseALSource } from "../parser/parser-init.ts";
12
+ import { makeLap } from "../perf/profiler.ts";
13
+ import { resolveSemanticGraph } from "../resolve/semantic-graph.ts";
14
+ import { iterateEmbeddedSource } from "../symbols/embedded-source-reader.ts";
15
+ import { parseSymbolReference } from "../symbols/symbol-reference-parser.ts";
16
+ import { readSymbolReferenceJson } from "../symbols/symbol-reference-reader.ts";
17
+ import { CACHE_VERSIONS, devFingerprint } from "./cache-versions.ts";
18
+ import {
19
+ DEPENDENCY_ARTIFACT_SCHEMA_VERSION,
20
+ type DependencyArtifact,
21
+ type DependencyPackageRef,
22
+ } from "./dependency-artifact.ts";
23
+ import { projectAbiToIndex } from "./dependency-projection.ts";
24
+
25
+ /** An empty IntraproceduralFeatures — dependency routines carry no raw features in the artifact. */
26
+ const EMPTY_FEATURES: Routine["features"] = {
27
+ loops: [],
28
+ operationSites: [],
29
+ recordOperations: [],
30
+ callSites: [],
31
+ fieldAccesses: [],
32
+ recordVariables: [],
33
+ nestingDepth: 0,
34
+ unreachableStatements: [],
35
+ hasBranching: false,
36
+ statementTree: undefined,
37
+ identifierReferences: [],
38
+ varAssignments: [],
39
+ conditionReferences: [],
40
+ };
41
+
42
+ /** Deterministic preflight resource guard — see resourcePolicyVersion in cache-versions.ts. */
43
+ const MAX_DEP_SOURCE_FILES = 30_000;
44
+
45
+ /**
46
+ * Minimum embedded-source file count before the worker pool is worth the startup cost.
47
+ * Below this threshold, the sequential path wins because spinning up N workers (each
48
+ * dlopen's the native shim, initializes its Parser, and pays structured-clone overhead
49
+ * for results) costs more than the per-file work saved. Empirically: at ~100 files the
50
+ * worker pool starts breaking even; at 7,634 files (Microsoft Base Application) it's
51
+ * 2.5×–3× faster.
52
+ */
53
+ const WORKER_POOL_MIN_FILES = 200;
54
+
55
+ function emptyCoverage(): SemanticModel["coverage"] {
56
+ return {
57
+ sourceUnitsTotal: 0,
58
+ sourceUnitsParsed: 0,
59
+ routinesTotal: 0,
60
+ routinesBodyAvailable: 0,
61
+ routinesParseIncomplete: [],
62
+ opaqueApps: [],
63
+ unresolvedCallsites: [],
64
+ dynamicDispatchSites: [],
65
+ };
66
+ }
67
+
68
+ /** Minimal ModelIdentity for an isolated dependency model — `modelInstanceId` carries the real key. */
69
+ function depIdentity(ref: DependencyPackageRef): ModelIdentity {
70
+ const app: AppIdentity = {
71
+ appGuid: ref.appGuid,
72
+ publisher: ref.publisher,
73
+ name: ref.name,
74
+ version: ref.version,
75
+ sourceKind: ref.includesSource ? "app-source" : "symbol-only",
76
+ };
77
+ return {
78
+ schemaVersion: "1",
79
+ analyzerVersion: CACHE_VERSIONS.analyzer,
80
+ grammarVersion: CACHE_VERSIONS.grammar,
81
+ symbolReaderVersion: CACHE_VERSIONS.symbolReader,
82
+ createdAt: new Date(0).toISOString(),
83
+ apps: [app],
84
+ dependencyGraphHash: "",
85
+ };
86
+ }
87
+
88
+ interface EmbeddedFile {
89
+ relativePath: string;
90
+ content: string;
91
+ }
92
+
93
+ /**
94
+ * Sequential per-file pipeline. Returns `true` if the parser is unavailable (so the caller
95
+ * can fall back to structural-only mode).
96
+ */
97
+ async function runFilesInline(
98
+ allFiles: EmbeddedFile[],
99
+ ref: DependencyPackageRef,
100
+ modelInstanceId: string,
101
+ index: SemanticIndex,
102
+ diagnostics: Diagnostic[],
103
+ ): Promise<boolean> {
104
+ for (const file of allFiles) {
105
+ let tree: Awaited<ReturnType<typeof parseALSource>>;
106
+ try {
107
+ tree = await parseALSource(file.content);
108
+ } catch (err) {
109
+ if (err instanceof NativeParserUnavailableError) {
110
+ diagnostics.push({
111
+ severity: "error",
112
+ stage: "parse",
113
+ message: `[DEP021-NATIVE] ${ref.name}: native parser unavailable — embedded source skipped, falling back to structural-only ABI`,
114
+ sourceRef: ref.appGuid,
115
+ });
116
+ return true;
117
+ }
118
+ diagnostics.push({
119
+ severity: "warning",
120
+ stage: "parse",
121
+ message: `[DEP021] ${ref.name}: failed to parse ${file.relativePath}`,
122
+ sourceRef: file.relativePath,
123
+ });
124
+ continue;
125
+ }
126
+ try {
127
+ const sourceHash = sha256Hex(file.content);
128
+ const objResults = indexObjects({
129
+ tree,
130
+ appGuid: ref.appGuid,
131
+ sourceUnitId: `dep:${ref.appGuid}:${file.relativePath}`,
132
+ modelInstanceId,
133
+ sourceHash,
134
+ });
135
+ for (const objResult of objResults) {
136
+ if (objResult.object === undefined || objResult.objectNode === undefined) continue;
137
+ objResult.object.analysisRole = "dependency";
138
+ index.objects.push(objResult.object);
139
+ if (objResult.table !== undefined) index.tables.push(objResult.table);
140
+ const routines = indexRoutines({
141
+ objectNode: objResult.objectNode,
142
+ object: objResult.object,
143
+ sourceUnitId: `dep:${ref.appGuid}:${file.relativePath}`,
144
+ modelInstanceId,
145
+ });
146
+ for (const r of routines) r.analysisRole = "dependency";
147
+ index.routines.push(...routines);
148
+ }
149
+ } finally {
150
+ tree.delete();
151
+ }
152
+ }
153
+ return false;
154
+ }
155
+
156
+ /**
157
+ * Parallel per-file pipeline. Spins up a Worker pool (size = `defaultPoolSize()`), dispatches
158
+ * every file as an independent job, awaits all results, then merges them into the index in
159
+ * sorted-by-relativePath order (deterministic). Returns `true` if any worker reports
160
+ * parser-unavailable.
161
+ */
162
+ async function runFilesInPool(
163
+ allFiles: EmbeddedFile[],
164
+ ref: DependencyPackageRef,
165
+ modelInstanceId: string,
166
+ index: SemanticIndex,
167
+ diagnostics: Diagnostic[],
168
+ ): Promise<boolean> {
169
+ const poolSize = defaultPoolSize();
170
+ const pool = new ParseIndexPool(poolSize);
171
+ let parserUnavailable = false;
172
+ try {
173
+ const jobs = allFiles.map((f) => ({
174
+ relativePath: f.relativePath,
175
+ content: f.content,
176
+ appGuid: ref.appGuid,
177
+ sourceUnitId: `dep:${ref.appGuid}:${f.relativePath}`,
178
+ modelInstanceId,
179
+ }));
180
+ // Bounded concurrency: at most `poolSize * 2` jobs in flight. Without this,
181
+ // Base Application (7,634 files × ~13 KB of source each) floods every worker's
182
+ // mailbox with the full archive's worth of structured-cloned strings — peak RSS
183
+ // hits multiple GB and the runtime OOMs.
184
+ const resultsByPath = new Map<string, Awaited<ReturnType<typeof pool.submit>>>();
185
+ await pool.mapBounded(jobs, poolSize * 2, (r) => {
186
+ // Free the per-job content from memory by overwriting the corresponding job's
187
+ // content field — the worker has already consumed it via structured clone.
188
+ // `resultsByPath` keys on relativePath which the result carries.
189
+ resultsByPath.set(r.relativePath, r);
190
+ });
191
+
192
+ // Merge in canonical (sorted) order to keep the index byte-stable regardless of
193
+ // worker completion order.
194
+ const sortedPaths = [...resultsByPath.keys()].sort();
195
+ for (const rp of sortedPaths) {
196
+ const r = resultsByPath.get(rp);
197
+ if (!r) continue;
198
+ if (!r.ok) {
199
+ if (r.errorKind === "parser-unavailable") {
200
+ if (!parserUnavailable) {
201
+ parserUnavailable = true;
202
+ diagnostics.push({
203
+ severity: "error",
204
+ stage: "parse",
205
+ message: `[DEP021-NATIVE] ${ref.name}: native parser unavailable — embedded source skipped, falling back to structural-only ABI`,
206
+ sourceRef: ref.appGuid,
207
+ });
208
+ }
209
+ continue;
210
+ }
211
+ diagnostics.push({
212
+ severity: "warning",
213
+ stage: "parse",
214
+ message: `[DEP021] ${ref.name}: failed to parse ${r.relativePath}`,
215
+ sourceRef: r.relativePath,
216
+ });
217
+ continue;
218
+ }
219
+ for (const o of r.objects) {
220
+ o.analysisRole = "dependency";
221
+ index.objects.push(o);
222
+ }
223
+ for (const t of r.tables) index.tables.push(t);
224
+ for (const rt of r.routines) rt.analysisRole = "dependency";
225
+ index.routines.push(...r.routines);
226
+ }
227
+ } finally {
228
+ pool.terminate();
229
+ }
230
+ return parserUnavailable;
231
+ }
232
+
233
+ export interface IngestDependencyAppOptions {
234
+ /**
235
+ * Skip embedded-source parsing entirely; project the ABI from SymbolReference.json and
236
+ * leave every routine with `summary: undefined`. Used by `--no-dep-summaries` so a cold
237
+ * run on Microsoft Base Application doesn't spend wall-clock parsing 7 k+ files only to
238
+ * have the summaries stripped at the resolver layer.
239
+ */
240
+ structuralOnly?: boolean;
241
+ }
242
+
243
+ export interface AppModelResult {
244
+ model: SemanticModel;
245
+ diagnostics: Diagnostic[];
246
+ summaryMode: DependencyArtifact["header"]["summaryMode"];
247
+ }
248
+
249
+ /**
250
+ * Parse a single `.app` (embedded source, or ABI projection when symbol-only) into a FULL
251
+ * SemanticModel: index → resolveSemanticGraph → buildCombinedGraph (populates typedEdges) →
252
+ * runSummaries. Shared by `ingestDependencyApp` (which reduces it to a cached artifact) and the
253
+ * snapshot-from-.app path (which keeps the full model). Entities are stamped
254
+ * `analysisRole: "dependency"`; callers wanting a primary subject remap afterward.
255
+ */
256
+ export async function buildAppModel(
257
+ ref: DependencyPackageRef,
258
+ lowerArtifacts: DependencyArtifact[],
259
+ artifactKey: string,
260
+ options: IngestDependencyAppOptions = {},
261
+ ): Promise<AppModelResult> {
262
+ const modelInstanceId = `dep:${artifactKey}`;
263
+ const diagnostics: Diagnostic[] = [];
264
+
265
+ // Phase profiler — stable labels (no per-run counts) so the benchmark harness can aggregate
266
+ // the same phase across runs/commits. Routes through the structured profiler.
267
+ const lap = makeLap(`ingest:${ref.name}:`);
268
+
269
+ // --- structural ABI from SymbolReference.json (authoritative for visibility) ---
270
+ const symJson = readSymbolReferenceJson(ref.appPath);
271
+ const abi = symJson === null ? null : parseSymbolReference(symJson);
272
+ lap("symbol-read+parse");
273
+ if (symJson === null) {
274
+ diagnostics.push({
275
+ severity: "warning",
276
+ stage: "symbol-read",
277
+ message: `[DEP020] ${ref.name}: SymbolReference.json missing — exported ABI unavailable, calls into this app stay opaque`,
278
+ sourceRef: ref.appGuid,
279
+ });
280
+ } else if (abi !== null && abi.error !== undefined) {
281
+ diagnostics.push({
282
+ severity: "warning",
283
+ stage: "symbol-read",
284
+ message: `[DEP020] ${ref.name}: ${abi.error}`,
285
+ sourceRef: ref.appGuid,
286
+ });
287
+ }
288
+
289
+ // --- build the dependency SemanticIndex ---
290
+ const index: SemanticIndex = {
291
+ identity: depIdentity(ref),
292
+ apps: [
293
+ {
294
+ appGuid: ref.appGuid,
295
+ publisher: ref.publisher,
296
+ name: ref.name,
297
+ version: ref.version,
298
+ analysisRole: "dependency",
299
+ } satisfies App,
300
+ ],
301
+ objects: [],
302
+ routines: [],
303
+ tables: [],
304
+ };
305
+
306
+ // merge lower-dependency artifacts as fixed leaves: their routines already carry summaries,
307
+ // so `runSummaries` will treat them as immutable leaves (default isLeaf = has a summary).
308
+ for (const lower of lowerArtifacts) {
309
+ index.objects.push(...lower.abi.objects);
310
+ index.tables.push(...lower.abi.tables);
311
+ index.routines.push(...lower.abi.routines);
312
+ }
313
+ lap("merge-lowers");
314
+
315
+ let summaryMode: DependencyArtifact["header"]["summaryMode"] = "full";
316
+ let parserUnavailable = false;
317
+
318
+ if (options.structuralOnly) {
319
+ // --no-dep-summaries: skip embedded-source parsing entirely. The structural projection
320
+ // below produces bodyAvailable:false routines; runSummaries then has nothing to compute
321
+ // from a body, so summaries stay undefined.
322
+ summaryMode = "structural-only-no-dep-summaries";
323
+ } else if (ref.includesSource) {
324
+ // --- embedded-source path ---
325
+ // Materialize all .al entries up front (single bulk-unzip — see Phase P). Then either
326
+ // dispatch parse+index to a worker pool (for large deps where the per-file work
327
+ // dominates and parallelism pays off) or process inline (for small deps where worker
328
+ // startup would dwarf the savings).
329
+ const allFiles: { relativePath: string; content: string }[] = [];
330
+ let resourceGuardTripped = false;
331
+ for await (const file of iterateEmbeddedSource(ref.appPath)) {
332
+ if (allFiles.length >= MAX_DEP_SOURCE_FILES) {
333
+ resourceGuardTripped = true;
334
+ summaryMode = "structural-only-resource-guard";
335
+ diagnostics.push({
336
+ severity: "info",
337
+ stage: "symbol-read",
338
+ message: `[DEP022] ${ref.name}: exceeds ${MAX_DEP_SOURCE_FILES} source files — behavioral summaries skipped (structural ABI only)`,
339
+ sourceRef: ref.appGuid,
340
+ });
341
+ break;
342
+ }
343
+ allFiles.push(file);
344
+ }
345
+ lap("materialize-source");
346
+
347
+ if (!resourceGuardTripped && allFiles.length >= WORKER_POOL_MIN_FILES) {
348
+ // --- parallel worker-pool path ---
349
+ parserUnavailable = await runFilesInPool(allFiles, ref, modelInstanceId, index, diagnostics);
350
+ if (parserUnavailable) summaryMode = "structural-only-parser-unavailable";
351
+ lap("pool-parse+index");
352
+ } else if (!resourceGuardTripped) {
353
+ // --- sequential inline path ---
354
+ parserUnavailable = await runFilesInline(allFiles, ref, modelInstanceId, index, diagnostics);
355
+ if (parserUnavailable) summaryMode = "structural-only-parser-unavailable";
356
+ lap("inline-parse+index");
357
+ }
358
+ }
359
+
360
+ if (
361
+ !ref.includesSource ||
362
+ summaryMode === "structural-only-resource-guard" ||
363
+ summaryMode === "structural-only-no-dep-summaries" ||
364
+ parserUnavailable
365
+ ) {
366
+ // --- symbol-only path: project the ABI into bodyAvailable:false routines ---
367
+ if (abi !== null && abi.error === undefined) {
368
+ const projected = projectAbiToIndex(abi, ref, modelInstanceId);
369
+ index.objects.push(...projected.objects);
370
+ index.tables.push(...projected.tables);
371
+ index.routines.push(...projected.routines);
372
+ }
373
+ }
374
+
375
+ // --- resolve + summarize the dependency model in isolation ---
376
+ const { callGraph, eventGraph } = resolveSemanticGraph(index);
377
+ lap("resolveSemanticGraph");
378
+ const depModel: SemanticModel = {
379
+ ...index,
380
+ callGraph,
381
+ eventGraph,
382
+ coverage: emptyCoverage(),
383
+ rootClassifications: [],
384
+ };
385
+ const graph = buildCombinedGraph(depModel);
386
+ lap("buildCombinedGraph");
387
+ runSummaries(depModel, graph, diagnostics); // leaves = routines that already have a summary
388
+ lap("runSummaries");
389
+
390
+ return { model: depModel, diagnostics, summaryMode };
391
+ }
392
+
393
+ /**
394
+ * Ingest one dependency .app into a DependencyArtifact. Memory-bounded: embedded .al files
395
+ * are parsed one at a time and their ASTs released before the next file. Lower-dependency
396
+ * artifacts are merged in as fixed leaves (their routines already carry summaries).
397
+ *
398
+ * When `structuralOnly` is set, skips the embedded-source parse and the per-routine
399
+ * summarization pass, producing an ABI-only artifact (`summaryMode:
400
+ * "structural-only-no-dep-summaries"`). The cache key is namespaced to this mode by the
401
+ * resolver, so a subsequent `--no-dep-summaries` run is a cache hit and a flag-flipped
402
+ * non-no-summaries run does not pick up the structural artifact.
403
+ */
404
+ export async function ingestDependencyApp(
405
+ ref: DependencyPackageRef,
406
+ lowerArtifacts: DependencyArtifact[],
407
+ artifactKey: string,
408
+ options: IngestDependencyAppOptions = {},
409
+ ): Promise<DependencyArtifact> {
410
+ const {
411
+ model: depModel,
412
+ diagnostics,
413
+ summaryMode,
414
+ } = await buildAppModel(ref, lowerArtifacts, artifactKey, options);
415
+ const lap = makeLap(`ingest:${ref.name}:`);
416
+
417
+ // --- project to the compact artifact: keep only THIS app's entities, strip raw features
418
+ // AND strip transitive arrays from each routine summary. A routine's summary stores
419
+ // `dbEffects` and `uncertainties` accumulated TRANSITIVELY across its outgoing call
420
+ // graph; on Microsoft Base Application (~97 k routines) that produces a 1.26 GB
421
+ // artifact when each routine carries hundreds of inherited effect records.
422
+ // `capabilityFactsDirect` + `capabilityFactsInherited` + `coverage` carry the Phase
423
+ // 0b-β capability cone; detectors read them through the capability-query helpers
424
+ // (`touchesDbOf`, `mayCommit`, `writesTablesOf`, `publishesEventsOf`,
425
+ // `reachableCoverage`) — never directly. The pre-Phase-1c legacy boolean lattice
426
+ // (`touchesDb`, `commits`, `writesTables`, `publishesEvents`) is gone. ---
427
+ const byId = <T extends { id: string }>(a: T, b: T): number =>
428
+ a.id < b.id ? -1 : a.id > b.id ? 1 : 0;
429
+ const ownObjects = depModel.objects.filter((o) => o.appGuid === ref.appGuid).sort(byId);
430
+ const ownTables = depModel.tables.filter((t) => t.appGuid === ref.appGuid).sort(byId);
431
+ const stripSummary = summaryMode === "structural-only-no-dep-summaries";
432
+ const ownRoutines = depModel.routines
433
+ .filter((r) => r.canonical.appGuid === ref.appGuid)
434
+ .map((r) => {
435
+ // `summary` is structurally absent from `base` (via spread, not set to undefined).
436
+ // After the canonical-json fix (undefined keys are omitted), this is equivalent to
437
+ // an undefined value; the explicit destructure keeps it unambiguous.
438
+ const { summary: _orig, ...rest } = r;
439
+ void _orig;
440
+ const base = { ...rest, features: EMPTY_FEATURES, analysisRole: "dependency" as const };
441
+ if (stripSummary || r.summary === undefined) return base;
442
+ return {
443
+ ...base,
444
+ summary: {
445
+ ...r.summary,
446
+ // Keep only direct dbEffects (the routine's own ops); inherited effects are
447
+ // reconstructable from the call graph at the consumer's compose step.
448
+ dbEffects: r.summary.dbEffects.filter((e) => e.via === "direct"),
449
+ // Drop transitive uncertainties; the consumer recomposes them from its
450
+ // own edges into this routine if needed.
451
+ uncertainties: [],
452
+ },
453
+ };
454
+ })
455
+ .sort(byId);
456
+ const eventPublishers = [...depModel.eventGraph.events]
457
+ .filter((e) => ownObjects.some((o) => o.id === e.publisherObjectId))
458
+ .sort(byId);
459
+ lap("project-artifact");
460
+
461
+ return {
462
+ header: {
463
+ schemaVersion: DEPENDENCY_ARTIFACT_SCHEMA_VERSION,
464
+ versions: { ...CACHE_VERSIONS, devFingerprint: devFingerprint() },
465
+ artifactKey,
466
+ artifactContentHash: "", // filled by dependency-cache.ts on write
467
+ appIdentity: {
468
+ appGuid: ref.appGuid,
469
+ publisher: ref.publisher,
470
+ name: ref.name,
471
+ version: ref.version,
472
+ },
473
+ packageHash: ref.packageHash,
474
+ packageSemanticHash: "", // filled by the orchestrator (later task) which already computed it
475
+ directDependencies: [], // filled by the orchestrator (later task)
476
+ summaryMode,
477
+ },
478
+ abi: { objects: ownObjects, tables: ownTables, routines: ownRoutines, eventPublishers },
479
+ diagnostics: diagnostics.sort((a, b) =>
480
+ `${a.stage}|${a.message}` < `${b.stage}|${b.message}` ? -1 : 1,
481
+ ),
482
+ };
483
+ }
@@ -0,0 +1,211 @@
1
+ import { sha256Hex, sha256OfStrings } from "../hash.ts";
2
+ import { canonicalRoutineSignature } from "../index/routine-signature.ts";
3
+ import type {
4
+ Field,
5
+ Key,
6
+ ObjectDecl,
7
+ ParameterSymbol,
8
+ ProcedureAccessModifier,
9
+ Routine,
10
+ Table,
11
+ } from "../model/entities.ts";
12
+ import type { SourceAnchor } from "../model/identity.ts";
13
+ import {
14
+ type CanonicalRoutineKey,
15
+ encodeFieldId,
16
+ encodeKeyId,
17
+ encodeObjectId,
18
+ encodeRoutineId,
19
+ encodeTableId,
20
+ } from "../model/ids.ts";
21
+ import type { AbiRoutine, SymbolReferenceAbi } from "../symbols/symbol-reference-parser.ts";
22
+ import type { DependencyPackageRef } from "./dependency-artifact.ts";
23
+
24
+ /** A synthetic, dependency-safe source anchor for entities that have no real source location. */
25
+ export function syntheticAnchor(sourceUnitId: string): SourceAnchor {
26
+ return {
27
+ sourceUnitId,
28
+ range: { startLine: 0, startColumn: 0, endLine: 0, endColumn: 0 },
29
+ enclosingRoutineId: "",
30
+ syntaxKind: "synthetic",
31
+ };
32
+ }
33
+
34
+ /**
35
+ * Project an `AbiParameter` (ABI shape, no record fields) onto a structural
36
+ * `ParameterSymbol` for the canonical-signature helper. `isRecord` / `tableName`
37
+ * are not needed by `canonicalRoutineSignature` (it hashes types only), so we
38
+ * fill safe defaults instead of re-parsing `typeText`.
39
+ */
40
+ function abiParamToSymbol(p: AbiRoutine["parameters"][number], index: number): ParameterSymbol {
41
+ return {
42
+ index,
43
+ name: p.name,
44
+ typeText: p.typeText,
45
+ isVar: p.isVar,
46
+ isRecord: false,
47
+ tableName: undefined,
48
+ };
49
+ }
50
+
51
+ /**
52
+ * Normalized signature hash for an ABI routine — modelInstanceId-independent.
53
+ * Shares the canonical form with the native indexer (`routine-signature.ts`)
54
+ * so the same routine declared in primary source and in a `.app` symbol
55
+ * package mints the same `RoutineId`.
56
+ */
57
+ function abiSignatureHash(r: AbiRoutine): string {
58
+ return sha256Hex(
59
+ canonicalRoutineSignature(r.name, r.parameters.map(abiParamToSymbol), r.returnTypeText),
60
+ );
61
+ }
62
+
63
+ function abiRoutineToRoutine(
64
+ r: AbiRoutine,
65
+ objectId: string,
66
+ appGuid: string,
67
+ objectType: string,
68
+ objectNumber: number,
69
+ modelInstanceId: string,
70
+ sourceUnitId: string,
71
+ ): Routine {
72
+ const canonical: CanonicalRoutineKey = {
73
+ appGuid,
74
+ objectType,
75
+ objectNumber,
76
+ routineKind: r.kind,
77
+ routineName: r.name,
78
+ normalizedSignatureHash: abiSignatureHash(r),
79
+ };
80
+ // Project ABI visibility flags onto `accessModifier`. The .app `IsInternal` flag is
81
+ // the only signal of internal visibility for dependency routines — without this
82
+ // projection, D13 (cross-app internal call) never fires for `.app`-only callees.
83
+ // `IsLocal` is also projected for completeness; AL's public default stays undefined.
84
+ const accessModifier: ProcedureAccessModifier | undefined = r.isInternal
85
+ ? "internal"
86
+ : r.isLocal
87
+ ? "local"
88
+ : undefined;
89
+
90
+ return {
91
+ id: encodeRoutineId(canonical, modelInstanceId),
92
+ canonical,
93
+ objectId,
94
+ name: r.name,
95
+ kind: r.kind,
96
+ parameters: r.parameters.map((p, index) => ({
97
+ index,
98
+ name: p.name,
99
+ typeText: p.typeText,
100
+ isVar: p.isVar,
101
+ isRecord: /^Record\b/i.test(p.typeText),
102
+ })),
103
+ attributes: r.attributes,
104
+ attributesParsed: r.attributesParsed,
105
+ ...(accessModifier !== undefined ? { accessModifier } : {}),
106
+ bodyAvailable: false,
107
+ parseIncomplete: false,
108
+ sourceHash: abiSignatureHash(r),
109
+ sourceAnchor: syntheticAnchor(sourceUnitId),
110
+ features: {
111
+ loops: [],
112
+ operationSites: [],
113
+ recordOperations: [],
114
+ callSites: [],
115
+ fieldAccesses: [],
116
+ recordVariables: [],
117
+ nestingDepth: 0,
118
+ unreachableStatements: [],
119
+ hasBranching: false,
120
+ statementTree: undefined,
121
+ identifierReferences: [],
122
+ variables: [],
123
+ varAssignments: [],
124
+ conditionReferences: [],
125
+ },
126
+ analysisRole: "dependency",
127
+ };
128
+ }
129
+
130
+ export interface ProjectedAbi {
131
+ objects: ObjectDecl[];
132
+ tables: Table[];
133
+ routines: Routine[];
134
+ }
135
+
136
+ /** Project a SymbolReferenceAbi into model entities with dependency-safe synthetic anchors. */
137
+ export function projectAbiToIndex(
138
+ abi: SymbolReferenceAbi,
139
+ ref: DependencyPackageRef,
140
+ modelInstanceId: string,
141
+ ): ProjectedAbi {
142
+ const sourceUnitId = `dep:${ref.appGuid}:__symbols__`;
143
+ const objects: ObjectDecl[] = [];
144
+ const routines: Routine[] = [];
145
+ const tablesByNumber = new Map<number, Table>();
146
+
147
+ for (const t of abi.tables) {
148
+ const tableId = encodeTableId(ref.appGuid, t.objectNumber);
149
+ const objectId = encodeObjectId(ref.appGuid, "Table", t.objectNumber);
150
+ const fields: Field[] = t.fields.map((f) => ({
151
+ id: encodeFieldId(tableId, f.fieldNumber),
152
+ physicalTableId: tableId,
153
+ declaringObjectId: objectId,
154
+ declaringAppId: ref.appGuid,
155
+ fieldNumber: f.fieldNumber,
156
+ name: f.name,
157
+ fieldClass: f.fieldClass,
158
+ dataType: f.dataType,
159
+ isBlobLike: f.isBlobLike,
160
+ }));
161
+ const fieldsByName = new Map(fields.map((f) => [f.name.toLowerCase(), f]));
162
+ const keys: Key[] = t.keys.map((k, index) => ({
163
+ id: encodeKeyId(tableId, index),
164
+ physicalTableId: tableId,
165
+ declaringObjectId: objectId,
166
+ fields: k.fieldNames
167
+ .map((n) => fieldsByName.get(n.toLowerCase())?.id)
168
+ .filter((id): id is string => id !== undefined),
169
+ }));
170
+ tablesByNumber.set(t.objectNumber, {
171
+ id: tableId,
172
+ appGuid: ref.appGuid,
173
+ tableNumber: t.objectNumber,
174
+ name: t.name,
175
+ fields,
176
+ keys,
177
+ });
178
+ }
179
+
180
+ for (const o of abi.objects) {
181
+ const objectId = encodeObjectId(ref.appGuid, o.objectType, o.objectNumber);
182
+ objects.push({
183
+ id: objectId,
184
+ appGuid: ref.appGuid,
185
+ objectType: o.objectType,
186
+ objectNumber: o.objectNumber,
187
+ name: o.name,
188
+ sourceUnitId,
189
+ sourceHash: sha256OfStrings([ref.appGuid, o.objectType, String(o.objectNumber)]),
190
+ sourceAnchor: syntheticAnchor(sourceUnitId),
191
+ analysisRole: "dependency",
192
+ ...(o.objectSubtype !== undefined ? { objectSubtype: o.objectSubtype } : {}),
193
+ ...(o.pageType !== undefined ? { pageType: o.pageType } : {}),
194
+ });
195
+ for (const r of o.routines) {
196
+ routines.push(
197
+ abiRoutineToRoutine(
198
+ r,
199
+ objectId,
200
+ ref.appGuid,
201
+ o.objectType,
202
+ o.objectNumber,
203
+ modelInstanceId,
204
+ sourceUnitId,
205
+ ),
206
+ );
207
+ }
208
+ }
209
+
210
+ return { objects, tables: [...tablesByNumber.values()], routines };
211
+ }