archbyte 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/archbyte.js +10 -2
- package/dist/agents/pipeline/agents/service-describer.js +4 -4
- package/dist/agents/pipeline/index.d.ts +1 -1
- package/dist/agents/pipeline/index.js +6 -4
- package/dist/agents/pipeline/merger.js +26 -34
- package/dist/agents/static/ignore.d.ts +12 -0
- package/dist/agents/static/ignore.js +140 -0
- package/dist/agents/static/index.d.ts +2 -1
- package/dist/agents/static/index.js +52 -4
- package/dist/agents/static/redactor.d.ts +12 -0
- package/dist/agents/static/redactor.js +206 -0
- package/dist/agents/static/utils.d.ts +3 -1
- package/dist/agents/static/utils.js +34 -11
- package/dist/cli/analyze.d.ts +1 -0
- package/dist/cli/analyze.js +46 -9
- package/dist/cli/run.d.ts +1 -0
- package/dist/cli/run.js +2 -1
- package/dist/cli/serve.d.ts +1 -0
- package/dist/cli/serve.js +1 -0
- package/dist/cli/transparency.d.ts +36 -0
- package/dist/cli/transparency.js +214 -0
- package/dist/cli/yaml-io.d.ts +14 -0
- package/dist/cli/yaml-io.js +15 -0
- package/dist/server/src/index.d.ts +1 -0
- package/dist/server/src/index.js +96 -4
- package/package.json +1 -1
- package/templates/archbyte.yaml +20 -0
- package/ui/dist/assets/index-CWGPRsWP.js +72 -0
- package/ui/dist/index.html +1 -1
- package/ui/dist/assets/index-QllGSFhe.js +0 -72
package/bin/archbyte.js
CHANGED
|
@@ -22,6 +22,11 @@ import { handleVersion, handleUpdate } from '../dist/cli/version.js';
|
|
|
22
22
|
import { requireLicense } from '../dist/cli/license-gate.js';
|
|
23
23
|
import { DEFAULT_PORT } from '../dist/cli/constants.js';
|
|
24
24
|
|
|
25
|
+
// When spawned by `archbyte serve` (internal), skip interactive license checks.
|
|
26
|
+
// The user already authenticated when they started the server.
|
|
27
|
+
const isInternal = process.env.ARCHBYTE_INTERNAL === '1';
|
|
28
|
+
const gate = isInternal ? async () => {} : requireLicense;
|
|
29
|
+
|
|
25
30
|
const require = createRequire(import.meta.url);
|
|
26
31
|
const { version: PKG_VERSION } = require('../package.json');
|
|
27
32
|
|
|
@@ -93,6 +98,7 @@ program
|
|
|
93
98
|
.option('-v, --verbose', 'Show detailed output')
|
|
94
99
|
.option('--force', 'Force full re-scan (skip incremental detection)')
|
|
95
100
|
.option('--dry-run', 'Preview without running')
|
|
101
|
+
.option('--debug', 'Show transparency report (what data is collected and sent)')
|
|
96
102
|
.action(async (options) => {
|
|
97
103
|
// handleRun manages login + setup + requireLicense internally
|
|
98
104
|
await handleRun(options);
|
|
@@ -110,8 +116,9 @@ program
|
|
|
110
116
|
.option('--skip-llm', 'Alias for --static')
|
|
111
117
|
.option('--force', 'Force full re-scan (skip incremental detection)')
|
|
112
118
|
.option('--dry-run', 'Preview without running')
|
|
119
|
+
.option('--debug', 'Show transparency report (what data is collected and sent)')
|
|
113
120
|
.action(async (options) => {
|
|
114
|
-
await
|
|
121
|
+
await gate('analyze');
|
|
115
122
|
await handleAnalyze(options);
|
|
116
123
|
});
|
|
117
124
|
|
|
@@ -122,7 +129,7 @@ program
|
|
|
122
129
|
.option('-o, --output <path>', 'Output diagram (default: .archbyte/architecture.json)')
|
|
123
130
|
.option('-v, --verbose', 'Show detailed output')
|
|
124
131
|
.action(async (options) => {
|
|
125
|
-
await
|
|
132
|
+
await gate('generate');
|
|
126
133
|
await handleGenerate(options);
|
|
127
134
|
});
|
|
128
135
|
|
|
@@ -131,6 +138,7 @@ program
|
|
|
131
138
|
.description('Start the visualization UI server')
|
|
132
139
|
.option('-p, --port <number>', `Server port (default: ${DEFAULT_PORT})`, parseInt)
|
|
133
140
|
.option('-d, --diagram <path>', 'Path to architecture JSON (default: .archbyte/architecture.json)')
|
|
141
|
+
.option('--debug', 'Enable transparency endpoint (/api/transparency)')
|
|
134
142
|
.action(async (options) => {
|
|
135
143
|
await handleServe(options);
|
|
136
144
|
});
|
|
@@ -33,13 +33,13 @@ export const serviceDescriber = {
|
|
|
33
33
|
parts.push(`Detected language: ${ctx.structure.language}`);
|
|
34
34
|
parts.push(`Languages: ${ctx.structure.languages.join(", ") || "none"}`);
|
|
35
35
|
parts.push(`Framework: ${ctx.structure.framework ?? "none"}`);
|
|
36
|
-
// Docs
|
|
36
|
+
// Docs — only project description, NOT externalDependencies.
|
|
37
|
+
// Doc-extracted dependency mentions prime the LLM to hallucinate phantom services
|
|
38
|
+
// (e.g., docs mention "MCP" → LLM creates "MCP Server" component).
|
|
39
|
+
// The LLM should discover services from actual code evidence only.
|
|
37
40
|
if (ctx.docs.projectDescription) {
|
|
38
41
|
parts.push(`\nFrom docs: ${ctx.docs.projectDescription}`);
|
|
39
42
|
}
|
|
40
|
-
if (ctx.docs.externalDependencies.length > 0) {
|
|
41
|
-
parts.push(`\nExternal dependencies mentioned: ${ctx.docs.externalDependencies.join(", ")}`);
|
|
42
|
-
}
|
|
43
43
|
// Docker services — only include if infra/config files changed (or full scan)
|
|
44
44
|
if (ctx.infra.docker.composeFile && (hasInfraChanges || hasConfigChanges)) {
|
|
45
45
|
const svcInfo = ctx.infra.docker.services.map((s) => {
|
|
@@ -6,7 +6,7 @@ import type { IncrementalContext } from "./types.js";
|
|
|
6
6
|
* Run the multi-agent pipeline: 3 parallel fast agents → 2 sequential agents.
|
|
7
7
|
* Each agent gets a single chat() call with pre-collected static context.
|
|
8
8
|
*/
|
|
9
|
-
export declare function runPipeline(ctx: StaticContext, provider: LLMProvider, config: ArchByteConfig, onProgress?: (msg: string) => void, incrementalContext?: IncrementalContext): Promise<StaticAnalysisResult & {
|
|
9
|
+
export declare function runPipeline(ctx: StaticContext, provider: LLMProvider, config: ArchByteConfig, onProgress?: (msg: string) => void, incrementalContext?: IncrementalContext, onDebug?: (agentId: string, model: string, system: string, user: string) => void): Promise<StaticAnalysisResult & {
|
|
10
10
|
tokenUsage?: {
|
|
11
11
|
input: number;
|
|
12
12
|
output: number;
|
|
@@ -92,7 +92,7 @@ function getFallbackData(agentId, inc) {
|
|
|
92
92
|
* Run the multi-agent pipeline: 3 parallel fast agents → 2 sequential agents.
|
|
93
93
|
* Each agent gets a single chat() call with pre-collected static context.
|
|
94
94
|
*/
|
|
95
|
-
export async function runPipeline(ctx, provider, config, onProgress, incrementalContext) {
|
|
95
|
+
export async function runPipeline(ctx, provider, config, onProgress, incrementalContext, onDebug) {
|
|
96
96
|
const agentResults = {};
|
|
97
97
|
const agentMeta = [];
|
|
98
98
|
const skippedAgents = [];
|
|
@@ -118,7 +118,7 @@ export async function runPipeline(ctx, provider, config, onProgress, incremental
|
|
|
118
118
|
agentResults[agent.id] = fallback;
|
|
119
119
|
return Promise.resolve(null);
|
|
120
120
|
}
|
|
121
|
-
return runAgent(agent, ctx, provider, config, parallelPrior, onProgress);
|
|
121
|
+
return runAgent(agent, ctx, provider, config, parallelPrior, onProgress, onDebug);
|
|
122
122
|
}));
|
|
123
123
|
let authFailed = false;
|
|
124
124
|
for (let i = 0; i < parallelTasks.length; i++) {
|
|
@@ -156,7 +156,7 @@ export async function runPipeline(ctx, provider, config, onProgress, incremental
|
|
|
156
156
|
continue;
|
|
157
157
|
}
|
|
158
158
|
try {
|
|
159
|
-
const result = await runAgent(agent, ctx, provider, config, agentResults, onProgress);
|
|
159
|
+
const result = await runAgent(agent, ctx, provider, config, agentResults, onProgress, onDebug);
|
|
160
160
|
if (result) {
|
|
161
161
|
agentResults[agent.id] = result.data;
|
|
162
162
|
agentMeta.push(result);
|
|
@@ -214,10 +214,12 @@ const MAX_TOKENS = {
|
|
|
214
214
|
"flow-detector": 4096,
|
|
215
215
|
"validator": 4096,
|
|
216
216
|
};
|
|
217
|
-
async function runAgent(agent, ctx, provider, config, priorResults, onProgress) {
|
|
217
|
+
async function runAgent(agent, ctx, provider, config, priorResults, onProgress, onDebug) {
|
|
218
218
|
const start = Date.now();
|
|
219
219
|
const model = resolveModel(config.provider, agent.modelTier, config.modelOverrides, config.model);
|
|
220
220
|
const { system, user } = agent.buildPrompt(ctx, priorResults);
|
|
221
|
+
// Debug callback — report what data is being sent
|
|
222
|
+
onDebug?.(agent.id, model, system, user);
|
|
221
223
|
onProgress?.(` ${agent.name}: calling ${model}...`);
|
|
222
224
|
const maxTokens = MAX_TOKENS[agent.id] ?? 4096;
|
|
223
225
|
const response = await provider.chat({
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
// Pipeline — Merger
|
|
2
2
|
// Assembles all agent outputs into a StaticAnalysisResult
|
|
3
|
+
import { categorizeDep } from "../static/taxonomy.js";
|
|
3
4
|
function sanitize(s) {
|
|
4
5
|
if (!s)
|
|
5
6
|
return s;
|
|
@@ -9,21 +10,24 @@ function sanitize(s) {
|
|
|
9
10
|
* Build a set of "evidence tokens" from the static context — things that concretely
|
|
10
11
|
* exist in the codebase (dependencies, env vars, docker images/services).
|
|
11
12
|
* Used to gate LLM-generated databases/external services against hallucination.
|
|
13
|
+
*
|
|
14
|
+
* Uses the package taxonomy to resolve package names to their display names
|
|
15
|
+
* (e.g., "pg" → also adds "postgresql", "stripe" → also adds "stripe").
|
|
16
|
+
* This lets the LLM use human-readable names while still requiring code evidence.
|
|
12
17
|
*/
|
|
13
18
|
function buildEvidenceTokens(ctx) {
|
|
14
19
|
const tokens = new Set();
|
|
20
|
+
/** Add a dependency name + its taxonomy display name as tokens. */
|
|
21
|
+
function addDep(dep) {
|
|
22
|
+
tokens.add(dep.toLowerCase());
|
|
23
|
+
const cat = categorizeDep(dep);
|
|
24
|
+
if (cat)
|
|
25
|
+
tokens.add(cat.displayName.toLowerCase());
|
|
26
|
+
}
|
|
15
27
|
// Package dependencies from import map (codeSamples.importMap: file → imported modules)
|
|
16
28
|
for (const imports of Object.values(ctx.codeSamples.importMap)) {
|
|
17
|
-
for (const imp of imports)
|
|
18
|
-
|
|
19
|
-
// Also add short name for scoped packages: @aws-sdk/client-s3 → client-s3, aws-sdk
|
|
20
|
-
if (imp.startsWith("@")) {
|
|
21
|
-
const parts = imp.split("/");
|
|
22
|
-
if (parts[1])
|
|
23
|
-
tokens.add(parts[1].toLowerCase());
|
|
24
|
-
tokens.add(parts[0].slice(1).toLowerCase());
|
|
25
|
-
}
|
|
26
|
-
}
|
|
29
|
+
for (const imp of imports)
|
|
30
|
+
addDep(imp);
|
|
27
31
|
}
|
|
28
32
|
// Config files may contain dependency info (package.json deps etc.)
|
|
29
33
|
for (const cfg of ctx.codeSamples.configFiles) {
|
|
@@ -31,13 +35,7 @@ function buildEvidenceTokens(ctx) {
|
|
|
31
35
|
try {
|
|
32
36
|
const pkg = JSON.parse(cfg.content);
|
|
33
37
|
for (const dep of Object.keys({ ...pkg.dependencies, ...pkg.devDependencies })) {
|
|
34
|
-
|
|
35
|
-
if (dep.startsWith("@")) {
|
|
36
|
-
const parts = dep.split("/");
|
|
37
|
-
if (parts[1])
|
|
38
|
-
tokens.add(parts[1].toLowerCase());
|
|
39
|
-
tokens.add(parts[0].slice(1).toLowerCase());
|
|
40
|
-
}
|
|
38
|
+
addDep(dep);
|
|
41
39
|
}
|
|
42
40
|
}
|
|
43
41
|
catch { /* ignore parse errors */ }
|
|
@@ -59,35 +57,29 @@ function buildEvidenceTokens(ctx) {
|
|
|
59
57
|
for (const s of ctx.infra.cloud.services) {
|
|
60
58
|
tokens.add(s.toLowerCase());
|
|
61
59
|
}
|
|
62
|
-
//
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
60
|
+
// NOTE: ctx.docs.externalDependencies intentionally excluded.
|
|
61
|
+
// Doc mentions (from markdown/README) are not concrete code evidence and cause
|
|
62
|
+
// hallucination — the LLM sees "MCP" in docs and creates phantom components.
|
|
63
|
+
// Only code-level signals (imports, deps, env vars, Docker, cloud) count.
|
|
66
64
|
return tokens;
|
|
67
65
|
}
|
|
68
66
|
/**
|
|
69
|
-
* Check if a service/database
|
|
70
|
-
*
|
|
67
|
+
* Check if a service/database has concrete evidence in the static context.
|
|
68
|
+
* Strict exact-match only — no substring/regex fuzzy matching.
|
|
69
|
+
* The taxonomy enriches evidence tokens with display names (pg → PostgreSQL)
|
|
70
|
+
* so the LLM can use human-readable names and still match.
|
|
71
71
|
*/
|
|
72
72
|
function hasEvidence(id, name, type, evidenceTokens) {
|
|
73
|
-
// Build candidate keywords from the service
|
|
74
73
|
const candidates = [
|
|
75
74
|
id.toLowerCase(),
|
|
76
75
|
name.toLowerCase(),
|
|
77
76
|
type.toLowerCase(),
|
|
78
|
-
// Split hyphenated IDs: "aws-sqs" →
|
|
77
|
+
// Split hyphenated IDs: "aws-sqs" → also check "aws", "sqs"
|
|
79
78
|
...id.toLowerCase().split("-"),
|
|
80
79
|
].filter(Boolean);
|
|
81
80
|
for (const candidate of candidates) {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
if (token === candidate)
|
|
85
|
-
return true;
|
|
86
|
-
if (token.includes(candidate) && candidate.length >= 3)
|
|
87
|
-
return true;
|
|
88
|
-
if (candidate.includes(token) && token.length >= 3)
|
|
89
|
-
return true;
|
|
90
|
-
}
|
|
81
|
+
if (evidenceTokens.has(candidate))
|
|
82
|
+
return true;
|
|
91
83
|
}
|
|
92
84
|
return false;
|
|
93
85
|
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface IgnoreFilter {
|
|
2
|
+
/** Returns true if the relative path should be excluded from analysis */
|
|
3
|
+
isIgnored(relativePath: string): boolean;
|
|
4
|
+
/** Number of active patterns (excluding comments and blank lines) */
|
|
5
|
+
patternCount: number;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Load `.archbyteignore` from the project root.
|
|
9
|
+
* Returns an IgnoreFilter that matches paths against the patterns.
|
|
10
|
+
* If the file doesn't exist, returns a no-op filter that ignores nothing.
|
|
11
|
+
*/
|
|
12
|
+
export declare function loadIgnoreFile(projectRoot: string): IgnoreFilter;
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
// .archbyteignore — File exclusion filter
|
|
2
|
+
// Supports .gitignore-style patterns: # comments, ! negation, ** globstar, * wildcard
|
|
3
|
+
import * as fs from "fs";
|
|
4
|
+
import * as path from "path";
|
|
5
|
+
/**
|
|
6
|
+
* Load `.archbyteignore` from the project root.
|
|
7
|
+
* Returns an IgnoreFilter that matches paths against the patterns.
|
|
8
|
+
* If the file doesn't exist, returns a no-op filter that ignores nothing.
|
|
9
|
+
*/
|
|
10
|
+
export function loadIgnoreFile(projectRoot) {
|
|
11
|
+
const ignorePath = path.join(projectRoot, ".archbyteignore");
|
|
12
|
+
if (!fs.existsSync(ignorePath)) {
|
|
13
|
+
return { isIgnored: () => false, patternCount: 0 };
|
|
14
|
+
}
|
|
15
|
+
const content = fs.readFileSync(ignorePath, "utf-8");
|
|
16
|
+
const rules = parseIgnorePatterns(content);
|
|
17
|
+
return {
|
|
18
|
+
isIgnored(relativePath) {
|
|
19
|
+
// Normalize path separators
|
|
20
|
+
const normalized = relativePath.replace(/\\/g, "/").replace(/^\//, "");
|
|
21
|
+
let ignored = false;
|
|
22
|
+
for (const rule of rules) {
|
|
23
|
+
if (rule.pattern.test(normalized)) {
|
|
24
|
+
ignored = !rule.negated;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return ignored;
|
|
28
|
+
},
|
|
29
|
+
patternCount: rules.length,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Parse .gitignore-style content into an ordered list of rules.
|
|
34
|
+
*/
|
|
35
|
+
function parseIgnorePatterns(content) {
|
|
36
|
+
const rules = [];
|
|
37
|
+
for (const rawLine of content.split("\n")) {
|
|
38
|
+
const line = rawLine.trim();
|
|
39
|
+
// Skip blank lines and comments
|
|
40
|
+
if (!line || line.startsWith("#"))
|
|
41
|
+
continue;
|
|
42
|
+
let pattern = line;
|
|
43
|
+
let negated = false;
|
|
44
|
+
// Handle negation
|
|
45
|
+
if (pattern.startsWith("!")) {
|
|
46
|
+
negated = true;
|
|
47
|
+
pattern = pattern.slice(1);
|
|
48
|
+
}
|
|
49
|
+
// Remove trailing spaces (unless escaped)
|
|
50
|
+
pattern = pattern.replace(/(?<!\\)\s+$/, "");
|
|
51
|
+
if (!pattern)
|
|
52
|
+
continue;
|
|
53
|
+
const regex = patternToRegex(pattern);
|
|
54
|
+
rules.push({ pattern: regex, negated });
|
|
55
|
+
}
|
|
56
|
+
return rules;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Convert a .gitignore-style pattern to a RegExp.
|
|
60
|
+
* Supports: * (any non-slash), ** (any including slashes), ? (single char),
|
|
61
|
+
* trailing / (directory match), leading / (root-anchored).
|
|
62
|
+
*/
|
|
63
|
+
function patternToRegex(pattern) {
|
|
64
|
+
let anchored = false;
|
|
65
|
+
// Leading / means anchored to root
|
|
66
|
+
if (pattern.startsWith("/")) {
|
|
67
|
+
anchored = true;
|
|
68
|
+
pattern = pattern.slice(1);
|
|
69
|
+
}
|
|
70
|
+
// Trailing / means match directories — for our purposes, match the prefix
|
|
71
|
+
const dirOnly = pattern.endsWith("/");
|
|
72
|
+
if (dirOnly) {
|
|
73
|
+
pattern = pattern.slice(0, -1);
|
|
74
|
+
}
|
|
75
|
+
// Escape regex special chars, then convert glob patterns
|
|
76
|
+
let regex = "";
|
|
77
|
+
let i = 0;
|
|
78
|
+
while (i < pattern.length) {
|
|
79
|
+
const ch = pattern[i];
|
|
80
|
+
const next = pattern[i + 1];
|
|
81
|
+
if (ch === "*" && next === "*") {
|
|
82
|
+
// ** — match anything including path separators
|
|
83
|
+
if (pattern[i + 2] === "/") {
|
|
84
|
+
// **/ — match zero or more directories
|
|
85
|
+
regex += "(?:.*/)?";
|
|
86
|
+
i += 3;
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
// ** at end or before non-slash
|
|
90
|
+
regex += ".*";
|
|
91
|
+
i += 2;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
else if (ch === "*") {
|
|
95
|
+
// * — match anything except /
|
|
96
|
+
regex += "[^/]*";
|
|
97
|
+
i++;
|
|
98
|
+
}
|
|
99
|
+
else if (ch === "?") {
|
|
100
|
+
// ? — match single non-slash char
|
|
101
|
+
regex += "[^/]";
|
|
102
|
+
i++;
|
|
103
|
+
}
|
|
104
|
+
else if (ch === "[") {
|
|
105
|
+
// Character class — pass through until ]
|
|
106
|
+
const closeBracket = pattern.indexOf("]", i + 1);
|
|
107
|
+
if (closeBracket !== -1) {
|
|
108
|
+
regex += pattern.slice(i, closeBracket + 1);
|
|
109
|
+
i = closeBracket + 1;
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
regex += escapeRegex(ch);
|
|
113
|
+
i++;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
regex += escapeRegex(ch);
|
|
118
|
+
i++;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
if (dirOnly) {
|
|
122
|
+
// Match the directory itself or anything under it
|
|
123
|
+
regex += "(?:/.*)?";
|
|
124
|
+
}
|
|
125
|
+
if (anchored) {
|
|
126
|
+
// Must match from the start
|
|
127
|
+
return new RegExp(`^${regex}$`);
|
|
128
|
+
}
|
|
129
|
+
// Unanchored: match if the pattern matches the full path
|
|
130
|
+
// or any suffix after a /
|
|
131
|
+
// If pattern contains /, it's implicitly anchored
|
|
132
|
+
if (pattern.includes("/")) {
|
|
133
|
+
return new RegExp(`^${regex}$`);
|
|
134
|
+
}
|
|
135
|
+
// No slash: match against the basename OR any path segment
|
|
136
|
+
return new RegExp(`(?:^|/)${regex}(?:/.*)?$`);
|
|
137
|
+
}
|
|
138
|
+
function escapeRegex(ch) {
|
|
139
|
+
return ch.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
140
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { StaticAnalysisResult, StaticContext } from "./types.js";
|
|
2
|
+
import type { PrivacyConfig } from "../../cli/yaml-io.js";
|
|
2
3
|
export type { StaticAnalysisResult, StaticContext } from "./types.js";
|
|
3
4
|
export { validateAnalysis } from "./validator.js";
|
|
4
5
|
/**
|
|
@@ -16,4 +17,4 @@ export declare function runStaticAnalysis(projectRoot: string, onProgress?: (msg
|
|
|
16
17
|
* This runs ONLY fact-collectors (no component-detector, connection-mapper, or validator).
|
|
17
18
|
* Output is consumed by the pipeline LLM agents.
|
|
18
19
|
*/
|
|
19
|
-
export declare function runStaticContextCollection(projectRoot: string, onProgress?: (msg: string) => void): Promise<StaticContext>;
|
|
20
|
+
export declare function runStaticContextCollection(projectRoot: string, onProgress?: (msg: string) => void, privacy?: Required<PrivacyConfig>): Promise<StaticContext>;
|
|
@@ -11,6 +11,8 @@ import { mapConnections } from "./connection-mapper.js";
|
|
|
11
11
|
import { validateAnalysis } from "./validator.js";
|
|
12
12
|
import { collectFileTree } from "./file-tree-collector.js";
|
|
13
13
|
import { collectCodeSamples } from "./code-sampler.js";
|
|
14
|
+
import { loadIgnoreFile } from "./ignore.js";
|
|
15
|
+
import { redactContext } from "./redactor.js";
|
|
14
16
|
export { validateAnalysis } from "./validator.js";
|
|
15
17
|
/**
|
|
16
18
|
* Run all static analysis scanners.
|
|
@@ -22,7 +24,11 @@ export { validateAnalysis } from "./validator.js";
|
|
|
22
24
|
* 4. Gap detection — identify what the LLM should resolve
|
|
23
25
|
*/
|
|
24
26
|
export async function runStaticAnalysis(projectRoot, onProgress) {
|
|
25
|
-
const
|
|
27
|
+
const ignoreFilter = loadIgnoreFile(projectRoot);
|
|
28
|
+
if (ignoreFilter.patternCount > 0) {
|
|
29
|
+
onProgress?.(`Loaded .archbyteignore: ${ignoreFilter.patternCount} pattern(s)`);
|
|
30
|
+
}
|
|
31
|
+
const tk = new StaticToolkit(projectRoot, ignoreFilter);
|
|
26
32
|
// Phase 1: parallel scanners (no dependencies)
|
|
27
33
|
onProgress?.("Running parallel scanners...");
|
|
28
34
|
const [structure, docs, infra, events, envs] = await Promise.all([
|
|
@@ -292,8 +298,12 @@ async function collectGaps(analysis, tk) {
|
|
|
292
298
|
* This runs ONLY fact-collectors (no component-detector, connection-mapper, or validator).
|
|
293
299
|
* Output is consumed by the pipeline LLM agents.
|
|
294
300
|
*/
|
|
295
|
-
export async function runStaticContextCollection(projectRoot, onProgress) {
|
|
296
|
-
const
|
|
301
|
+
export async function runStaticContextCollection(projectRoot, onProgress, privacy) {
|
|
302
|
+
const ignoreFilter = loadIgnoreFile(projectRoot);
|
|
303
|
+
if (ignoreFilter.patternCount > 0) {
|
|
304
|
+
onProgress?.(`Loaded .archbyteignore: ${ignoreFilter.patternCount} pattern(s)`);
|
|
305
|
+
}
|
|
306
|
+
const tk = new StaticToolkit(projectRoot, ignoreFilter);
|
|
297
307
|
onProgress?.("Collecting static context (7 scanners in parallel)...");
|
|
298
308
|
const [structure, docs, infra, events, envs, fileTree, codeSamples] = await Promise.all([
|
|
299
309
|
scanStructure(tk),
|
|
@@ -306,5 +316,43 @@ export async function runStaticContextCollection(projectRoot, onProgress) {
|
|
|
306
316
|
]);
|
|
307
317
|
onProgress?.(`Context: ${fileTree.totalFiles} files, ${fileTree.totalDirs} dirs, ${codeSamples.configFiles.length} configs, ${codeSamples.samples.length} samples`);
|
|
308
318
|
onProgress?.(`Detected: ${structure.language}, ${structure.framework ?? "no framework"}, monorepo=${structure.isMonorepo}`);
|
|
309
|
-
|
|
319
|
+
let ctx = { structure, docs, infra, events, envs, fileTree, codeSamples };
|
|
320
|
+
// Apply privacy controls — zero out disabled fields
|
|
321
|
+
if (privacy) {
|
|
322
|
+
if (!privacy.sendCodeSamples) {
|
|
323
|
+
ctx.codeSamples = { ...ctx.codeSamples, samples: [] };
|
|
324
|
+
onProgress?.("Privacy: code samples excluded");
|
|
325
|
+
}
|
|
326
|
+
if (!privacy.sendImportMap) {
|
|
327
|
+
ctx.codeSamples = { ...ctx.codeSamples, importMap: {} };
|
|
328
|
+
onProgress?.("Privacy: import map excluded");
|
|
329
|
+
}
|
|
330
|
+
if (!privacy.sendEnvNames) {
|
|
331
|
+
ctx.envs = { ...ctx.envs, environments: ctx.envs.environments.map((e) => ({ ...e, variables: [] })) };
|
|
332
|
+
onProgress?.("Privacy: env variable names excluded");
|
|
333
|
+
}
|
|
334
|
+
if (!privacy.sendDocs) {
|
|
335
|
+
ctx.docs = { projectDescription: "", architectureNotes: [], apiEndpoints: [], externalDependencies: [] };
|
|
336
|
+
onProgress?.("Privacy: documentation excluded");
|
|
337
|
+
}
|
|
338
|
+
if (!privacy.sendFileTree) {
|
|
339
|
+
ctx.fileTree = { tree: [], totalFiles: ctx.fileTree.totalFiles, totalDirs: ctx.fileTree.totalDirs };
|
|
340
|
+
onProgress?.("Privacy: file tree excluded");
|
|
341
|
+
}
|
|
342
|
+
if (!privacy.sendInfra) {
|
|
343
|
+
ctx.infra = {
|
|
344
|
+
docker: { services: [], composeFile: false },
|
|
345
|
+
kubernetes: { resources: [] },
|
|
346
|
+
cloud: { provider: null, services: [], iac: null },
|
|
347
|
+
ci: { platform: null, pipelines: [] },
|
|
348
|
+
};
|
|
349
|
+
onProgress?.("Privacy: infrastructure details excluded");
|
|
350
|
+
}
|
|
351
|
+
// Redaction — hash identifiers before returning
|
|
352
|
+
if (privacy.redact) {
|
|
353
|
+
ctx = redactContext(ctx);
|
|
354
|
+
onProgress?.("Privacy: redaction applied — identifiers hashed");
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
return ctx;
|
|
310
358
|
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { StaticContext } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Redact sensitive identifiers in a StaticContext.
|
|
4
|
+
* - File paths: hash each segment, preserve extensions and depth
|
|
5
|
+
* - Env var names: hash
|
|
6
|
+
* - Docker service names: hash
|
|
7
|
+
* - String literals in code samples: hash
|
|
8
|
+
* - Preserve: npm package names, language keywords, structural info
|
|
9
|
+
*
|
|
10
|
+
* Returns a deep copy — the original context is not modified.
|
|
11
|
+
*/
|
|
12
|
+
export declare function redactContext(ctx: StaticContext): StaticContext;
|