npm - al-sem - Versions diffs - 0.0.1 - Mend

al-sem 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (231) hide show

package/LICENSE +21 -0
package/README.md +361 -0
package/package.json +64 -0
package/scripts/d40-diff.ts +44 -0
package/scripts/fetch-native-parser.ts +179 -0
package/scripts/precision-sample.ts +99 -0
package/scripts/precision-study.ts +42 -0
package/scripts/precision-tabulate.ts +52 -0
package/src/cli/baseline.ts +31 -0
package/src/cli/diff.ts +199 -0
package/src/cli/events-chains.ts +56 -0
package/src/cli/events-fanout.ts +87 -0
package/src/cli/exit-code.ts +30 -0
package/src/cli/fingerprint-indexes.ts +130 -0
package/src/cli/fingerprint-query.ts +543 -0
package/src/cli/fingerprint-witness.ts +493 -0
package/src/cli/fingerprint.ts +292 -0
package/src/cli/format-compact-json.ts +45 -0
package/src/cli/format-events.ts +77 -0
package/src/cli/format-fingerprint.ts +295 -0
package/src/cli/format-html.ts +503 -0
package/src/cli/format-json.ts +13 -0
package/src/cli/format-policy.ts +95 -0
package/src/cli/format-sarif.ts +186 -0
package/src/cli/format-terminal.ts +153 -0
package/src/cli/index.ts +566 -0
package/src/cli/policy.ts +204 -0
package/src/config/roots-config.ts +302 -0
package/src/deps/cache-versions.ts +74 -0
package/src/deps/canonical-json.ts +27 -0
package/src/deps/dependency-artifact.ts +144 -0
package/src/deps/dependency-cache.ts +262 -0
package/src/deps/dependency-dag.ts +128 -0
package/src/deps/dependency-package-discovery.ts +85 -0
package/src/deps/dependency-pipeline.ts +483 -0
package/src/deps/dependency-projection.ts +211 -0
package/src/deps/dependency-resolver.ts +154 -0
package/src/deps/workspace-dependencies.ts +114 -0
package/src/detectors/capability-query.ts +145 -0
package/src/detectors/confidence.ts +52 -0
package/src/detectors/d1-db-op-in-loop.ts +457 -0
package/src/detectors/d10-self-modifying-loop.ts +114 -0
package/src/detectors/d11-modify-without-get.ts +129 -0
package/src/detectors/d12-dead-integration-event.ts +81 -0
package/src/detectors/d13-cross-app-internal-call.ts +105 -0
package/src/detectors/d14-dead-routine.ts +151 -0
package/src/detectors/d16-obsolete-routine-call.ts +94 -0
package/src/detectors/d17-min-version-drift.ts +157 -0
package/src/detectors/d18-constant-filter-in-loop.ts +151 -0
package/src/detectors/d19-unused-parameter.ts +116 -0
package/src/detectors/d2-event-fanout-in-loop.ts +240 -0
package/src/detectors/d20-unreachable-after-exit.ts +92 -0
package/src/detectors/d21-read-without-load.ts +128 -0
package/src/detectors/d22-flowfield-without-calcfields.ts +168 -0
package/src/detectors/d29-subscriber-modify-on-event-record.ts +163 -0
package/src/detectors/d3-load-state.ts +72 -0
package/src/detectors/d3-missing-setloadfields.ts +234 -0
package/src/detectors/d32-constant-boolean-parameter.ts +185 -0
package/src/detectors/d33-unfiltered-bulk-write.ts +173 -0
package/src/detectors/d34-commit-in-loop.ts +206 -0
package/src/detectors/d35-commit-in-event-subscriber.ts +138 -0
package/src/detectors/d36-late-setloadfields.ts +162 -0
package/src/detectors/d37-validate-without-persist.ts +271 -0
package/src/detectors/d38-subscriber-to-obsolete-event.ts +140 -0
package/src/detectors/d39-record-left-dirty-across-chain.ts +165 -0
package/src/detectors/d4-repeated-lookup-in-loop.ts +128 -0
package/src/detectors/d40-transitive-load-missing.ts +217 -0
package/src/detectors/d41-transitive-filter-loss.ts +200 -0
package/src/detectors/d42-cross-call-wrong-setloadfields.ts +243 -0
package/src/detectors/d43-event-ishandled-skip.ts +257 -0
package/src/detectors/d44-event-multi-subscriber-overlap.ts +223 -0
package/src/detectors/d45-event-transitive-table-exposure.ts +159 -0
package/src/detectors/d5-set-based-opportunity.ts +162 -0
package/src/detectors/d7-recursive-event-expansion.ts +151 -0
package/src/detectors/d8-commit-in-transaction.ts +132 -0
package/src/detectors/d9-transaction-span-summary.ts +107 -0
package/src/detectors/detector-context.ts +121 -0
package/src/detectors/finding-grouping.ts +61 -0
package/src/detectors/path-merge.ts +174 -0
package/src/detectors/registry.ts +176 -0
package/src/detectors/table-display.ts +42 -0
package/src/diff/diff-abi.ts +195 -0
package/src/diff/diff-capabilities.ts +179 -0
package/src/diff/diff-engine.ts +146 -0
package/src/diff/diff-events.ts +323 -0
package/src/diff/diff-identity.ts +73 -0
package/src/diff/diff-indexes.ts +199 -0
package/src/diff/diff-permissions.ts +260 -0
package/src/diff/diff-policy.ts +101 -0
package/src/diff/diff-preflight.ts +66 -0
package/src/diff/diff-renames.ts +104 -0
package/src/diff/diff-schema.ts +232 -0
package/src/diff/format-diff.ts +148 -0
package/src/engine/attribute-parser.ts +50 -0
package/src/engine/capability-cone.ts +531 -0
package/src/engine/combined-graph.ts +357 -0
package/src/engine/control-flow-walker.ts +1317 -0
package/src/engine/dispatch-sites.ts +199 -0
package/src/engine/effect-lattice.ts +81 -0
package/src/engine/entry-points.ts +57 -0
package/src/engine/event-flow.ts +524 -0
package/src/engine/event-relay.ts +92 -0
package/src/engine/op-classification.ts +92 -0
package/src/engine/path-walker.ts +189 -0
package/src/engine/reverse-call-graph.ts +23 -0
package/src/engine/root-classifier-overlay.ts +194 -0
package/src/engine/root-classifier.ts +135 -0
package/src/engine/scc.ts +110 -0
package/src/engine/source-anchor.ts +25 -0
package/src/engine/summary-context.ts +104 -0
package/src/engine/summary-engine.ts +296 -0
package/src/engine/summary-runner.ts +560 -0
package/src/engine/transaction-spans.ts +112 -0
package/src/engine/uncertainty-util.ts +54 -0
package/src/hash.ts +31 -0
package/src/index/attribute-from-node.ts +141 -0
package/src/index/callee-from-node.ts +181 -0
package/src/index/capability/background.ts +90 -0
package/src/index/capability/commit.ts +44 -0
package/src/index/capability/dispatch.ts +164 -0
package/src/index/capability/events.ts +65 -0
package/src/index/capability/extractor.ts +124 -0
package/src/index/capability/file-blob.ts +137 -0
package/src/index/capability/http.ts +159 -0
package/src/index/capability/hyperlink.ts +60 -0
package/src/index/capability/isolated-storage.ts +179 -0
package/src/index/capability/table.ts +113 -0
package/src/index/capability/telemetry.ts +84 -0
package/src/index/capability/ui.ts +55 -0
package/src/index/capability/value-source.ts +202 -0
package/src/index/expression-from-node.ts +117 -0
package/src/index/indexer.ts +102 -0
package/src/index/intraprocedural-body.ts +1467 -0
package/src/index/intraprocedural-ops.ts +253 -0
package/src/index/intraprocedural-refs.ts +188 -0
package/src/index/object-indexer.ts +279 -0
package/src/index/routine-indexer.ts +282 -0
package/src/index/routine-signature.ts +46 -0
package/src/index/variable-indexer.ts +134 -0
package/src/index/variable-initializer-extractor.ts +155 -0
package/src/index/variable-type-normalizer.ts +83 -0
package/src/index.ts +267 -0
package/src/mcp/server.ts +72 -0
package/src/mcp/session.ts +49 -0
package/src/mcp/tools/explain-path.ts +75 -0
package/src/mcp/tools/get-analysis-health.ts +62 -0
package/src/mcp/tools/get-finding.ts +47 -0
package/src/mcp/tools/get-routine-summary.ts +126 -0
package/src/mcp/tools/list-findings.ts +85 -0
package/src/mcp/tools/list-hotspots.ts +78 -0
package/src/mcp/tools/list-rollups.ts +103 -0
package/src/mcp/tools/validators.ts +25 -0
package/src/model/attributes.ts +120 -0
package/src/model/callee.ts +45 -0
package/src/model/capability.ts +187 -0
package/src/model/coverage.ts +85 -0
package/src/model/entities.ts +628 -0
package/src/model/expression.ts +98 -0
package/src/model/finding.ts +110 -0
package/src/model/graph-edge.ts +93 -0
package/src/model/graph.ts +62 -0
package/src/model/identity.ts +81 -0
package/src/model/ids.ts +90 -0
package/src/model/index.ts +13 -0
package/src/model/model.ts +51 -0
package/src/model/permission.ts +76 -0
package/src/model/root-classification.ts +116 -0
package/src/model/stable-identity.ts +102 -0
package/src/model/summary.ts +96 -0
package/src/parser/ast.ts +82 -0
package/src/parser/native/ffi.ts +145 -0
package/src/parser/native/parse-index-pool.ts +148 -0
package/src/parser/native/parse-index-worker.ts +94 -0
package/src/parser/native/wrapper.ts +353 -0
package/src/parser/parser-init.ts +43 -0
package/src/perf/profiler.ts +66 -0
package/src/policy/policy-default.yaml +83 -0
package/src/policy/policy-engine.ts +339 -0
package/src/policy/policy-loader.ts +257 -0
package/src/policy/policy-schema.json +379 -0
package/src/policy/policy-types.ts +81 -0
package/src/policy/predicate-compiler.ts +151 -0
package/src/policy/predicate-evaluator.ts +267 -0
package/src/policy/predicate-fields.ts +439 -0
package/src/projection/actionable-anchor.ts +48 -0
package/src/projection/finding-filters.ts +44 -0
package/src/projection/finding-fingerprint.ts +54 -0
package/src/projection/finding-groups.ts +41 -0
package/src/projection/finding-summary.ts +110 -0
package/src/projection/rollup-findings.ts +105 -0
package/src/providers/discover.ts +88 -0
package/src/providers/external.ts +46 -0
package/src/providers/types.ts +36 -0
package/src/providers/workspace.ts +117 -0
package/src/resolve/call-resolver.ts +117 -0
package/src/resolve/coverage.ts +61 -0
package/src/resolve/event-graph.ts +166 -0
package/src/resolve/implicit-edges.ts +53 -0
package/src/resolve/record-types.ts +36 -0
package/src/resolve/resolver.ts +23 -0
package/src/resolve/semantic-graph.ts +29 -0
package/src/resolve/symbol-table.ts +69 -0
package/src/snapshot/app-snapshot.ts +74 -0
package/src/snapshot/compose.ts +100 -0
package/src/snapshot/derive/callsite-evidence.ts +76 -0
package/src/snapshot/derive/capability-facts.ts +70 -0
package/src/snapshot/derive/contracts.ts +131 -0
package/src/snapshot/derive/coverage.ts +35 -0
package/src/snapshot/derive/event-declarations.ts +140 -0
package/src/snapshot/derive/identity-table.ts +58 -0
package/src/snapshot/derive/inputs.ts +91 -0
package/src/snapshot/derive/operation-evidence.ts +70 -0
package/src/snapshot/derive/permissions.ts +186 -0
package/src/snapshot/derive/root-classifications.ts +56 -0
package/src/snapshot/derive/schema.ts +130 -0
package/src/snapshot/derive/typed-edges.ts +60 -0
package/src/snapshot/derive/workspace-fingerprint.ts +19 -0
package/src/snapshot/deserialize.ts +40 -0
package/src/snapshot/serialize-cbor-gz.ts +12 -0
package/src/snapshot/serialize-cbor.ts +19 -0
package/src/snapshot/serialize-json.ts +22 -0
package/src/snapshot/shard.ts +134 -0
package/src/snapshot/types.ts +181 -0
package/src/symbols/app-manifest.ts +96 -0
package/src/symbols/app-package-zip.ts +50 -0
package/src/symbols/embedded-source-reader.ts +41 -0
package/src/symbols/package-hash.ts +81 -0
package/src/symbols/symbol-reader.ts +101 -0
package/src/symbols/symbol-reference-parser.ts +378 -0
package/src/symbols/symbol-reference-reader.ts +27 -0
package/tsconfig.json +18 -0

package/src/deps/dependency-pipeline.ts ADDED Viewed

@@ -0,0 +1,483 @@
+import { buildCombinedGraph } from "../engine/combined-graph.ts";
+import { runSummaries } from "../engine/summary-runner.ts";
+import { sha256Hex } from "../hash.ts";
+import { indexObjects } from "../index/object-indexer.ts";
+import { indexRoutines } from "../index/routine-indexer.ts";
+import type { App, Routine } from "../model/entities.ts";
+import type { Diagnostic } from "../model/finding.ts";
+import type { AppIdentity, ModelIdentity } from "../model/identity.ts";
+import type { SemanticIndex, SemanticModel } from "../model/model.ts";
+import { ParseIndexPool, defaultPoolSize } from "../parser/native/parse-index-pool.ts";
+import { NativeParserUnavailableError, parseALSource } from "../parser/parser-init.ts";
+import { makeLap } from "../perf/profiler.ts";
+import { resolveSemanticGraph } from "../resolve/semantic-graph.ts";
+import { iterateEmbeddedSource } from "../symbols/embedded-source-reader.ts";
+import { parseSymbolReference } from "../symbols/symbol-reference-parser.ts";
+import { readSymbolReferenceJson } from "../symbols/symbol-reference-reader.ts";
+import { CACHE_VERSIONS, devFingerprint } from "./cache-versions.ts";
+import {
+	DEPENDENCY_ARTIFACT_SCHEMA_VERSION,
+	type DependencyArtifact,
+	type DependencyPackageRef,
+} from "./dependency-artifact.ts";
+import { projectAbiToIndex } from "./dependency-projection.ts";
+/** An empty IntraproceduralFeatures — dependency routines carry no raw features in the artifact. */
+const EMPTY_FEATURES: Routine["features"] = {
+	loops: [],
+	operationSites: [],
+	recordOperations: [],
+	callSites: [],
+	fieldAccesses: [],
+	recordVariables: [],
+	nestingDepth: 0,
+	unreachableStatements: [],
+	hasBranching: false,
+	statementTree: undefined,
+	identifierReferences: [],
+	varAssignments: [],
+	conditionReferences: [],
+};
+/** Deterministic preflight resource guard — see resourcePolicyVersion in cache-versions.ts. */
+const MAX_DEP_SOURCE_FILES = 30_000;
+/**
+ * Minimum embedded-source file count before the worker pool is worth the startup cost.
+ * Below this threshold, the sequential path wins because spinning up N workers (each
+ * dlopen's the native shim, initializes its Parser, and pays structured-clone overhead
+ * for results) costs more than the per-file work saved. Empirically: at ~100 files the
+ * worker pool starts breaking even; at 7,634 files (Microsoft Base Application) it's
+ * 2.5×–3× faster.
+ */
+const WORKER_POOL_MIN_FILES = 200;
+function emptyCoverage(): SemanticModel["coverage"] {
+	return {
+		sourceUnitsTotal: 0,
+		sourceUnitsParsed: 0,
+		routinesTotal: 0,
+		routinesBodyAvailable: 0,
+		routinesParseIncomplete: [],
+		opaqueApps: [],
+		unresolvedCallsites: [],
+		dynamicDispatchSites: [],
+	};
+}
+/** Minimal ModelIdentity for an isolated dependency model — `modelInstanceId` carries the real key. */
+function depIdentity(ref: DependencyPackageRef): ModelIdentity {
+	const app: AppIdentity = {
+		appGuid: ref.appGuid,
+		publisher: ref.publisher,
+		name: ref.name,
+		version: ref.version,
+		sourceKind: ref.includesSource ? "app-source" : "symbol-only",
+	};
+	return {
+		schemaVersion: "1",
+		analyzerVersion: CACHE_VERSIONS.analyzer,
+		grammarVersion: CACHE_VERSIONS.grammar,
+		symbolReaderVersion: CACHE_VERSIONS.symbolReader,
+		createdAt: new Date(0).toISOString(),
+		apps: [app],
+		dependencyGraphHash: "",
+	};
+}
+interface EmbeddedFile {
+	relativePath: string;
+	content: string;
+}
+/**
+ * Sequential per-file pipeline. Returns `true` if the parser is unavailable (so the caller
+ * can fall back to structural-only mode).
+ */
+async function runFilesInline(
+	allFiles: EmbeddedFile[],
+	ref: DependencyPackageRef,
+	modelInstanceId: string,
+	index: SemanticIndex,
+	diagnostics: Diagnostic[],
+): Promise<boolean> {
+	for (const file of allFiles) {
+		let tree: Awaited<ReturnType<typeof parseALSource>>;
+		try {
+			tree = await parseALSource(file.content);
+		} catch (err) {
+			if (err instanceof NativeParserUnavailableError) {
+				diagnostics.push({
+					severity: "error",
+					stage: "parse",
+					message: `[DEP021-NATIVE] ${ref.name}: native parser unavailable — embedded source skipped, falling back to structural-only ABI`,
+					sourceRef: ref.appGuid,
+				});
+				return true;
+			}
+			diagnostics.push({
+				severity: "warning",
+				stage: "parse",
+				message: `[DEP021] ${ref.name}: failed to parse ${file.relativePath}`,
+				sourceRef: file.relativePath,
+			});
+			continue;
+		}
+		try {
+			const sourceHash = sha256Hex(file.content);
+			const objResults = indexObjects({
+				tree,
+				appGuid: ref.appGuid,
+				sourceUnitId: `dep:${ref.appGuid}:${file.relativePath}`,
+				modelInstanceId,
+				sourceHash,
+			});
+			for (const objResult of objResults) {
+				if (objResult.object === undefined || objResult.objectNode === undefined) continue;
+				objResult.object.analysisRole = "dependency";
+				index.objects.push(objResult.object);
+				if (objResult.table !== undefined) index.tables.push(objResult.table);
+				const routines = indexRoutines({
+					objectNode: objResult.objectNode,
+					object: objResult.object,
+					sourceUnitId: `dep:${ref.appGuid}:${file.relativePath}`,
+					modelInstanceId,
+				});
+				for (const r of routines) r.analysisRole = "dependency";
+				index.routines.push(...routines);
+			}
+		} finally {
+			tree.delete();
+		}
+	}
+	return false;
+}
+/**
+ * Parallel per-file pipeline. Spins up a Worker pool (size = `defaultPoolSize()`), dispatches
+ * every file as an independent job, awaits all results, then merges them into the index in
+ * sorted-by-relativePath order (deterministic). Returns `true` if any worker reports
+ * parser-unavailable.
+ */
+async function runFilesInPool(
+	allFiles: EmbeddedFile[],
+	ref: DependencyPackageRef,
+	modelInstanceId: string,
+	index: SemanticIndex,
+	diagnostics: Diagnostic[],
+): Promise<boolean> {
+	const poolSize = defaultPoolSize();
+	const pool = new ParseIndexPool(poolSize);
+	let parserUnavailable = false;
+	try {
+		const jobs = allFiles.map((f) => ({
+			relativePath: f.relativePath,
+			content: f.content,
+			appGuid: ref.appGuid,
+			sourceUnitId: `dep:${ref.appGuid}:${f.relativePath}`,
+			modelInstanceId,
+		}));
+		// Bounded concurrency: at most `poolSize * 2` jobs in flight. Without this,
+		// Base Application (7,634 files × ~13 KB of source each) floods every worker's
+		// mailbox with the full archive's worth of structured-cloned strings — peak RSS
+		// hits multiple GB and the runtime OOMs.
+		const resultsByPath = new Map<string, Awaited<ReturnType<typeof pool.submit>>>();
+		await pool.mapBounded(jobs, poolSize * 2, (r) => {
+			// Free the per-job content from memory by overwriting the corresponding job's
+			// content field — the worker has already consumed it via structured clone.
+			// `resultsByPath` keys on relativePath which the result carries.
+			resultsByPath.set(r.relativePath, r);
+		});
+		// Merge in canonical (sorted) order to keep the index byte-stable regardless of
+		// worker completion order.
+		const sortedPaths = [...resultsByPath.keys()].sort();
+		for (const rp of sortedPaths) {
+			const r = resultsByPath.get(rp);
+			if (!r) continue;
+			if (!r.ok) {
+				if (r.errorKind === "parser-unavailable") {
+					if (!parserUnavailable) {
+						parserUnavailable = true;
+						diagnostics.push({
+							severity: "error",
+							stage: "parse",
+							message: `[DEP021-NATIVE] ${ref.name}: native parser unavailable — embedded source skipped, falling back to structural-only ABI`,
+							sourceRef: ref.appGuid,
+						});
+					}
+					continue;
+				}
+				diagnostics.push({
+					severity: "warning",
+					stage: "parse",
+					message: `[DEP021] ${ref.name}: failed to parse ${r.relativePath}`,
+					sourceRef: r.relativePath,
+				});
+				continue;
+			}
+			for (const o of r.objects) {
+				o.analysisRole = "dependency";
+				index.objects.push(o);
+			}
+			for (const t of r.tables) index.tables.push(t);
+			for (const rt of r.routines) rt.analysisRole = "dependency";
+			index.routines.push(...r.routines);
+		}
+	} finally {
+		pool.terminate();
+	}
+	return parserUnavailable;
+}
+export interface IngestDependencyAppOptions {
+	/**
+	 * Skip embedded-source parsing entirely; project the ABI from SymbolReference.json and
+	 * leave every routine with `summary: undefined`. Used by `--no-dep-summaries` so a cold
+	 * run on Microsoft Base Application doesn't spend wall-clock parsing 7 k+ files only to
+	 * have the summaries stripped at the resolver layer.
+	 */
+	structuralOnly?: boolean;
+}
+export interface AppModelResult {
+	model: SemanticModel;
+	diagnostics: Diagnostic[];
+	summaryMode: DependencyArtifact["header"]["summaryMode"];
+}
+/**
+ * Parse a single `.app` (embedded source, or ABI projection when symbol-only) into a FULL
+ * SemanticModel: index → resolveSemanticGraph → buildCombinedGraph (populates typedEdges) →
+ * runSummaries. Shared by `ingestDependencyApp` (which reduces it to a cached artifact) and the
+ * snapshot-from-.app path (which keeps the full model). Entities are stamped
+ * `analysisRole: "dependency"`; callers wanting a primary subject remap afterward.
+ */
+export async function buildAppModel(
+	ref: DependencyPackageRef,
+	lowerArtifacts: DependencyArtifact[],
+	artifactKey: string,
+	options: IngestDependencyAppOptions = {},
+): Promise<AppModelResult> {
+	const modelInstanceId = `dep:${artifactKey}`;
+	const diagnostics: Diagnostic[] = [];
+	// Phase profiler — stable labels (no per-run counts) so the benchmark harness can aggregate
+	// the same phase across runs/commits. Routes through the structured profiler.
+	const lap = makeLap(`ingest:${ref.name}:`);
+	// --- structural ABI from SymbolReference.json (authoritative for visibility) ---
+	const symJson = readSymbolReferenceJson(ref.appPath);
+	const abi = symJson === null ? null : parseSymbolReference(symJson);
+	lap("symbol-read+parse");
+	if (symJson === null) {
+		diagnostics.push({
+			severity: "warning",
+			stage: "symbol-read",
+			message: `[DEP020] ${ref.name}: SymbolReference.json missing — exported ABI unavailable, calls into this app stay opaque`,
+			sourceRef: ref.appGuid,
+		});
+	} else if (abi !== null && abi.error !== undefined) {
+		diagnostics.push({
+			severity: "warning",
+			stage: "symbol-read",
+			message: `[DEP020] ${ref.name}: ${abi.error}`,
+			sourceRef: ref.appGuid,
+		});
+	}
+	// --- build the dependency SemanticIndex ---
+	const index: SemanticIndex = {
+		identity: depIdentity(ref),
+		apps: [
+			{
+				appGuid: ref.appGuid,
+				publisher: ref.publisher,
+				name: ref.name,
+				version: ref.version,
+				analysisRole: "dependency",
+			} satisfies App,
+		],
+		objects: [],
+		routines: [],
+		tables: [],
+	};
+	// merge lower-dependency artifacts as fixed leaves: their routines already carry summaries,
+	// so `runSummaries` will treat them as immutable leaves (default isLeaf = has a summary).
+	for (const lower of lowerArtifacts) {
+		index.objects.push(...lower.abi.objects);
+		index.tables.push(...lower.abi.tables);
+		index.routines.push(...lower.abi.routines);
+	}
+	lap("merge-lowers");
+	let summaryMode: DependencyArtifact["header"]["summaryMode"] = "full";
+	let parserUnavailable = false;
+	if (options.structuralOnly) {
+		// --no-dep-summaries: skip embedded-source parsing entirely. The structural projection
+		// below produces bodyAvailable:false routines; runSummaries then has nothing to compute
+		// from a body, so summaries stay undefined.
+		summaryMode = "structural-only-no-dep-summaries";
+	} else if (ref.includesSource) {
+		// --- embedded-source path ---
+		// Materialize all .al entries up front (single bulk-unzip — see Phase P). Then either
+		// dispatch parse+index to a worker pool (for large deps where the per-file work
+		// dominates and parallelism pays off) or process inline (for small deps where worker
+		// startup would dwarf the savings).
+		const allFiles: { relativePath: string; content: string }[] = [];
+		let resourceGuardTripped = false;
+		for await (const file of iterateEmbeddedSource(ref.appPath)) {
+			if (allFiles.length >= MAX_DEP_SOURCE_FILES) {
+				resourceGuardTripped = true;
+				summaryMode = "structural-only-resource-guard";
+				diagnostics.push({
+					severity: "info",
+					stage: "symbol-read",
+					message: `[DEP022] ${ref.name}: exceeds ${MAX_DEP_SOURCE_FILES} source files — behavioral summaries skipped (structural ABI only)`,
+					sourceRef: ref.appGuid,
+				});
+				break;
+			}
+			allFiles.push(file);
+		}
+		lap("materialize-source");
+		if (!resourceGuardTripped && allFiles.length >= WORKER_POOL_MIN_FILES) {
+			// --- parallel worker-pool path ---
+			parserUnavailable = await runFilesInPool(allFiles, ref, modelInstanceId, index, diagnostics);
+			if (parserUnavailable) summaryMode = "structural-only-parser-unavailable";
+			lap("pool-parse+index");
+		} else if (!resourceGuardTripped) {
+			// --- sequential inline path ---
+			parserUnavailable = await runFilesInline(allFiles, ref, modelInstanceId, index, diagnostics);
+			if (parserUnavailable) summaryMode = "structural-only-parser-unavailable";
+			lap("inline-parse+index");
+		}
+	}
+	if (
+		!ref.includesSource ||
+		summaryMode === "structural-only-resource-guard" ||
+		summaryMode === "structural-only-no-dep-summaries" ||
+		parserUnavailable
+	) {
+		// --- symbol-only path: project the ABI into bodyAvailable:false routines ---
+		if (abi !== null && abi.error === undefined) {
+			const projected = projectAbiToIndex(abi, ref, modelInstanceId);
+			index.objects.push(...projected.objects);
+			index.tables.push(...projected.tables);
+			index.routines.push(...projected.routines);
+		}
+	}
+	// --- resolve + summarize the dependency model in isolation ---
+	const { callGraph, eventGraph } = resolveSemanticGraph(index);
+	lap("resolveSemanticGraph");
+	const depModel: SemanticModel = {
+		...index,
+		callGraph,
+		eventGraph,
+		coverage: emptyCoverage(),
+		rootClassifications: [],
+	};
+	const graph = buildCombinedGraph(depModel);
+	lap("buildCombinedGraph");
+	runSummaries(depModel, graph, diagnostics); // leaves = routines that already have a summary
+	lap("runSummaries");
+	return { model: depModel, diagnostics, summaryMode };
+}
+/**
+ * Ingest one dependency .app into a DependencyArtifact. Memory-bounded: embedded .al files
+ * are parsed one at a time and their ASTs released before the next file. Lower-dependency
+ * artifacts are merged in as fixed leaves (their routines already carry summaries).
+ *
+ * When `structuralOnly` is set, skips the embedded-source parse and the per-routine
+ * summarization pass, producing an ABI-only artifact (`summaryMode:
+ * "structural-only-no-dep-summaries"`). The cache key is namespaced to this mode by the
+ * resolver, so a subsequent `--no-dep-summaries` run is a cache hit and a flag-flipped
+ * non-no-summaries run does not pick up the structural artifact.
+ */
+export async function ingestDependencyApp(
+	ref: DependencyPackageRef,
+	lowerArtifacts: DependencyArtifact[],
+	artifactKey: string,
+	options: IngestDependencyAppOptions = {},
+): Promise<DependencyArtifact> {
+	const {
+		model: depModel,
+		diagnostics,
+		summaryMode,
+	} = await buildAppModel(ref, lowerArtifacts, artifactKey, options);
+	const lap = makeLap(`ingest:${ref.name}:`);
+	// --- project to the compact artifact: keep only THIS app's entities, strip raw features
+	// AND strip transitive arrays from each routine summary. A routine's summary stores
+	// `dbEffects` and `uncertainties` accumulated TRANSITIVELY across its outgoing call
+	// graph; on Microsoft Base Application (~97 k routines) that produces a 1.26 GB
+	// artifact when each routine carries hundreds of inherited effect records.
+	// `capabilityFactsDirect` + `capabilityFactsInherited` + `coverage` carry the Phase
+	// 0b-β capability cone; detectors read them through the capability-query helpers
+	// (`touchesDbOf`, `mayCommit`, `writesTablesOf`, `publishesEventsOf`,
+	// `reachableCoverage`) — never directly. The pre-Phase-1c legacy boolean lattice
+	// (`touchesDb`, `commits`, `writesTables`, `publishesEvents`) is gone. ---
+	const byId = <T extends { id: string }>(a: T, b: T): number =>
+		a.id < b.id ? -1 : a.id > b.id ? 1 : 0;
+	const ownObjects = depModel.objects.filter((o) => o.appGuid === ref.appGuid).sort(byId);
+	const ownTables = depModel.tables.filter((t) => t.appGuid === ref.appGuid).sort(byId);
+	const stripSummary = summaryMode === "structural-only-no-dep-summaries";
+	const ownRoutines = depModel.routines
+		.filter((r) => r.canonical.appGuid === ref.appGuid)
+		.map((r) => {
+			// `summary` is structurally absent from `base` (via spread, not set to undefined).
+			// After the canonical-json fix (undefined keys are omitted), this is equivalent to
+			// an undefined value; the explicit destructure keeps it unambiguous.
+			const { summary: _orig, ...rest } = r;
+			void _orig;
+			const base = { ...rest, features: EMPTY_FEATURES, analysisRole: "dependency" as const };
+			if (stripSummary || r.summary === undefined) return base;
+			return {
+				...base,
+				summary: {
+					...r.summary,
+					// Keep only direct dbEffects (the routine's own ops); inherited effects are
+					// reconstructable from the call graph at the consumer's compose step.
+					dbEffects: r.summary.dbEffects.filter((e) => e.via === "direct"),
+					// Drop transitive uncertainties; the consumer recomposes them from its
+					// own edges into this routine if needed.
+					uncertainties: [],
+				},
+			};
+		})
+		.sort(byId);
+	const eventPublishers = [...depModel.eventGraph.events]
+		.filter((e) => ownObjects.some((o) => o.id === e.publisherObjectId))
+		.sort(byId);
+	lap("project-artifact");
+	return {
+		header: {
+			schemaVersion: DEPENDENCY_ARTIFACT_SCHEMA_VERSION,
+			versions: { ...CACHE_VERSIONS, devFingerprint: devFingerprint() },
+			artifactKey,
+			artifactContentHash: "", // filled by dependency-cache.ts on write
+			appIdentity: {
+				appGuid: ref.appGuid,
+				publisher: ref.publisher,
+				name: ref.name,
+				version: ref.version,
+			},
+			packageHash: ref.packageHash,
+			packageSemanticHash: "", // filled by the orchestrator (later task) which already computed it
+			directDependencies: [], // filled by the orchestrator (later task)
+			summaryMode,
+		},
+		abi: { objects: ownObjects, tables: ownTables, routines: ownRoutines, eventPublishers },
+		diagnostics: diagnostics.sort((a, b) =>
+			`${a.stage}|${a.message}` < `${b.stage}|${b.message}` ? -1 : 1,
+		),
+	};
+}

package/src/deps/dependency-projection.ts ADDED Viewed

@@ -0,0 +1,211 @@
+import { sha256Hex, sha256OfStrings } from "../hash.ts";
+import { canonicalRoutineSignature } from "../index/routine-signature.ts";
+import type {
+	Field,
+	Key,
+	ObjectDecl,
+	ParameterSymbol,
+	ProcedureAccessModifier,
+	Routine,
+	Table,
+} from "../model/entities.ts";
+import type { SourceAnchor } from "../model/identity.ts";
+import {
+	type CanonicalRoutineKey,
+	encodeFieldId,
+	encodeKeyId,
+	encodeObjectId,
+	encodeRoutineId,
+	encodeTableId,
+} from "../model/ids.ts";
+import type { AbiRoutine, SymbolReferenceAbi } from "../symbols/symbol-reference-parser.ts";
+import type { DependencyPackageRef } from "./dependency-artifact.ts";
+/** A synthetic, dependency-safe source anchor for entities that have no real source location. */
+export function syntheticAnchor(sourceUnitId: string): SourceAnchor {
+	return {
+		sourceUnitId,
+		range: { startLine: 0, startColumn: 0, endLine: 0, endColumn: 0 },
+		enclosingRoutineId: "",
+		syntaxKind: "synthetic",
+	};
+}
+/**
+ * Project an `AbiParameter` (ABI shape, no record fields) onto a structural
+ * `ParameterSymbol` for the canonical-signature helper. `isRecord` / `tableName`
+ * are not needed by `canonicalRoutineSignature` (it hashes types only), so we
+ * fill safe defaults instead of re-parsing `typeText`.
+ */
+function abiParamToSymbol(p: AbiRoutine["parameters"][number], index: number): ParameterSymbol {
+	return {
+		index,
+		name: p.name,
+		typeText: p.typeText,
+		isVar: p.isVar,
+		isRecord: false,
+		tableName: undefined,
+	};
+}
+/**
+ * Normalized signature hash for an ABI routine — modelInstanceId-independent.
+ * Shares the canonical form with the native indexer (`routine-signature.ts`)
+ * so the same routine declared in primary source and in a `.app` symbol
+ * package mints the same `RoutineId`.
+ */
+function abiSignatureHash(r: AbiRoutine): string {
+	return sha256Hex(
+		canonicalRoutineSignature(r.name, r.parameters.map(abiParamToSymbol), r.returnTypeText),
+	);
+}
+function abiRoutineToRoutine(
+	r: AbiRoutine,
+	objectId: string,
+	appGuid: string,
+	objectType: string,
+	objectNumber: number,
+	modelInstanceId: string,
+	sourceUnitId: string,
+): Routine {
+	const canonical: CanonicalRoutineKey = {
+		appGuid,
+		objectType,
+		objectNumber,
+		routineKind: r.kind,
+		routineName: r.name,
+		normalizedSignatureHash: abiSignatureHash(r),
+	};
+	// Project ABI visibility flags onto `accessModifier`. The .app `IsInternal` flag is
+	// the only signal of internal visibility for dependency routines — without this
+	// projection, D13 (cross-app internal call) never fires for `.app`-only callees.
+	// `IsLocal` is also projected for completeness; AL's public default stays undefined.
+	const accessModifier: ProcedureAccessModifier | undefined = r.isInternal
+		? "internal"
+		: r.isLocal
+			? "local"
+			: undefined;
+	return {
+		id: encodeRoutineId(canonical, modelInstanceId),
+		canonical,
+		objectId,
+		name: r.name,
+		kind: r.kind,
+		parameters: r.parameters.map((p, index) => ({
+			index,
+			name: p.name,
+			typeText: p.typeText,
+			isVar: p.isVar,
+			isRecord: /^Record\b/i.test(p.typeText),
+		})),
+		attributes: r.attributes,
+		attributesParsed: r.attributesParsed,
+		...(accessModifier !== undefined ? { accessModifier } : {}),
+		bodyAvailable: false,
+		parseIncomplete: false,
+		sourceHash: abiSignatureHash(r),
+		sourceAnchor: syntheticAnchor(sourceUnitId),
+		features: {
+			loops: [],
+			operationSites: [],
+			recordOperations: [],
+			callSites: [],
+			fieldAccesses: [],
+			recordVariables: [],
+			nestingDepth: 0,
+			unreachableStatements: [],
+			hasBranching: false,
+			statementTree: undefined,
+			identifierReferences: [],
+			variables: [],
+			varAssignments: [],
+			conditionReferences: [],
+		},
+		analysisRole: "dependency",
+	};
+}
+export interface ProjectedAbi {
+	objects: ObjectDecl[];
+	tables: Table[];
+	routines: Routine[];
+}
+/** Project a SymbolReferenceAbi into model entities with dependency-safe synthetic anchors. */
+export function projectAbiToIndex(
+	abi: SymbolReferenceAbi,
+	ref: DependencyPackageRef,
+	modelInstanceId: string,
+): ProjectedAbi {
+	const sourceUnitId = `dep:${ref.appGuid}:__symbols__`;
+	const objects: ObjectDecl[] = [];
+	const routines: Routine[] = [];
+	const tablesByNumber = new Map<number, Table>();
+	for (const t of abi.tables) {
+		const tableId = encodeTableId(ref.appGuid, t.objectNumber);
+		const objectId = encodeObjectId(ref.appGuid, "Table", t.objectNumber);
+		const fields: Field[] = t.fields.map((f) => ({
+			id: encodeFieldId(tableId, f.fieldNumber),
+			physicalTableId: tableId,
+			declaringObjectId: objectId,
+			declaringAppId: ref.appGuid,
+			fieldNumber: f.fieldNumber,
+			name: f.name,
+			fieldClass: f.fieldClass,
+			dataType: f.dataType,
+			isBlobLike: f.isBlobLike,
+		}));
+		const fieldsByName = new Map(fields.map((f) => [f.name.toLowerCase(), f]));
+		const keys: Key[] = t.keys.map((k, index) => ({
+			id: encodeKeyId(tableId, index),
+			physicalTableId: tableId,
+			declaringObjectId: objectId,
+			fields: k.fieldNames
+				.map((n) => fieldsByName.get(n.toLowerCase())?.id)
+				.filter((id): id is string => id !== undefined),
+		}));
+		tablesByNumber.set(t.objectNumber, {
+			id: tableId,
+			appGuid: ref.appGuid,
+			tableNumber: t.objectNumber,
+			name: t.name,
+			fields,
+			keys,
+		});
+	}
+	for (const o of abi.objects) {
+		const objectId = encodeObjectId(ref.appGuid, o.objectType, o.objectNumber);
+		objects.push({
+			id: objectId,
+			appGuid: ref.appGuid,
+			objectType: o.objectType,
+			objectNumber: o.objectNumber,
+			name: o.name,
+			sourceUnitId,
+			sourceHash: sha256OfStrings([ref.appGuid, o.objectType, String(o.objectNumber)]),
+			sourceAnchor: syntheticAnchor(sourceUnitId),
+			analysisRole: "dependency",
+			...(o.objectSubtype !== undefined ? { objectSubtype: o.objectSubtype } : {}),
+			...(o.pageType !== undefined ? { pageType: o.pageType } : {}),
+		});
+		for (const r of o.routines) {
+			routines.push(
+				abiRoutineToRoutine(
+					r,
+					objectId,
+					ref.appGuid,
+					o.objectType,
+					o.objectNumber,
+					modelInstanceId,
+					sourceUnitId,
+				),
+			);
+		}
+	}
+	return { objects, tables: [...tablesByNumber.values()], routines };
+}