archbyte 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/archbyte.js +3 -0
- package/dist/agents/pipeline/index.d.ts +1 -1
- package/dist/agents/pipeline/index.js +6 -4
- package/dist/agents/static/ignore.d.ts +12 -0
- package/dist/agents/static/ignore.js +140 -0
- package/dist/agents/static/index.d.ts +2 -1
- package/dist/agents/static/index.js +52 -4
- package/dist/agents/static/redactor.d.ts +12 -0
- package/dist/agents/static/redactor.js +206 -0
- package/dist/agents/static/utils.d.ts +3 -1
- package/dist/agents/static/utils.js +34 -11
- package/dist/cli/analyze.d.ts +1 -0
- package/dist/cli/analyze.js +31 -4
- package/dist/cli/run.d.ts +1 -0
- package/dist/cli/run.js +2 -1
- package/dist/cli/serve.d.ts +1 -0
- package/dist/cli/serve.js +1 -0
- package/dist/cli/transparency.d.ts +36 -0
- package/dist/cli/transparency.js +214 -0
- package/dist/cli/yaml-io.d.ts +14 -0
- package/dist/cli/yaml-io.js +15 -0
- package/dist/server/src/index.d.ts +1 -0
- package/dist/server/src/index.js +71 -0
- package/package.json +1 -1
- package/templates/archbyte.yaml +20 -0
package/bin/archbyte.js
CHANGED
|
@@ -98,6 +98,7 @@ program
|
|
|
98
98
|
.option('-v, --verbose', 'Show detailed output')
|
|
99
99
|
.option('--force', 'Force full re-scan (skip incremental detection)')
|
|
100
100
|
.option('--dry-run', 'Preview without running')
|
|
101
|
+
.option('--debug', 'Show transparency report (what data is collected and sent)')
|
|
101
102
|
.action(async (options) => {
|
|
102
103
|
// handleRun manages login + setup + requireLicense internally
|
|
103
104
|
await handleRun(options);
|
|
@@ -115,6 +116,7 @@ program
|
|
|
115
116
|
.option('--skip-llm', 'Alias for --static')
|
|
116
117
|
.option('--force', 'Force full re-scan (skip incremental detection)')
|
|
117
118
|
.option('--dry-run', 'Preview without running')
|
|
119
|
+
.option('--debug', 'Show transparency report (what data is collected and sent)')
|
|
118
120
|
.action(async (options) => {
|
|
119
121
|
await gate('analyze');
|
|
120
122
|
await handleAnalyze(options);
|
|
@@ -136,6 +138,7 @@ program
|
|
|
136
138
|
.description('Start the visualization UI server')
|
|
137
139
|
.option('-p, --port <number>', `Server port (default: ${DEFAULT_PORT})`, parseInt)
|
|
138
140
|
.option('-d, --diagram <path>', 'Path to architecture JSON (default: .archbyte/architecture.json)')
|
|
141
|
+
.option('--debug', 'Enable transparency endpoint (/api/transparency)')
|
|
139
142
|
.action(async (options) => {
|
|
140
143
|
await handleServe(options);
|
|
141
144
|
});
|
|
@@ -6,7 +6,7 @@ import type { IncrementalContext } from "./types.js";
|
|
|
6
6
|
* Run the multi-agent pipeline: 3 parallel fast agents → 2 sequential agents.
|
|
7
7
|
* Each agent gets a single chat() call with pre-collected static context.
|
|
8
8
|
*/
|
|
9
|
-
export declare function runPipeline(ctx: StaticContext, provider: LLMProvider, config: ArchByteConfig, onProgress?: (msg: string) => void, incrementalContext?: IncrementalContext): Promise<StaticAnalysisResult & {
|
|
9
|
+
export declare function runPipeline(ctx: StaticContext, provider: LLMProvider, config: ArchByteConfig, onProgress?: (msg: string) => void, incrementalContext?: IncrementalContext, onDebug?: (agentId: string, model: string, system: string, user: string) => void): Promise<StaticAnalysisResult & {
|
|
10
10
|
tokenUsage?: {
|
|
11
11
|
input: number;
|
|
12
12
|
output: number;
|
|
@@ -92,7 +92,7 @@ function getFallbackData(agentId, inc) {
|
|
|
92
92
|
* Run the multi-agent pipeline: 3 parallel fast agents → 2 sequential agents.
|
|
93
93
|
* Each agent gets a single chat() call with pre-collected static context.
|
|
94
94
|
*/
|
|
95
|
-
export async function runPipeline(ctx, provider, config, onProgress, incrementalContext) {
|
|
95
|
+
export async function runPipeline(ctx, provider, config, onProgress, incrementalContext, onDebug) {
|
|
96
96
|
const agentResults = {};
|
|
97
97
|
const agentMeta = [];
|
|
98
98
|
const skippedAgents = [];
|
|
@@ -118,7 +118,7 @@ export async function runPipeline(ctx, provider, config, onProgress, incremental
|
|
|
118
118
|
agentResults[agent.id] = fallback;
|
|
119
119
|
return Promise.resolve(null);
|
|
120
120
|
}
|
|
121
|
-
return runAgent(agent, ctx, provider, config, parallelPrior, onProgress);
|
|
121
|
+
return runAgent(agent, ctx, provider, config, parallelPrior, onProgress, onDebug);
|
|
122
122
|
}));
|
|
123
123
|
let authFailed = false;
|
|
124
124
|
for (let i = 0; i < parallelTasks.length; i++) {
|
|
@@ -156,7 +156,7 @@ export async function runPipeline(ctx, provider, config, onProgress, incremental
|
|
|
156
156
|
continue;
|
|
157
157
|
}
|
|
158
158
|
try {
|
|
159
|
-
const result = await runAgent(agent, ctx, provider, config, agentResults, onProgress);
|
|
159
|
+
const result = await runAgent(agent, ctx, provider, config, agentResults, onProgress, onDebug);
|
|
160
160
|
if (result) {
|
|
161
161
|
agentResults[agent.id] = result.data;
|
|
162
162
|
agentMeta.push(result);
|
|
@@ -214,10 +214,12 @@ const MAX_TOKENS = {
|
|
|
214
214
|
"flow-detector": 4096,
|
|
215
215
|
"validator": 4096,
|
|
216
216
|
};
|
|
217
|
-
async function runAgent(agent, ctx, provider, config, priorResults, onProgress) {
|
|
217
|
+
async function runAgent(agent, ctx, provider, config, priorResults, onProgress, onDebug) {
|
|
218
218
|
const start = Date.now();
|
|
219
219
|
const model = resolveModel(config.provider, agent.modelTier, config.modelOverrides, config.model);
|
|
220
220
|
const { system, user } = agent.buildPrompt(ctx, priorResults);
|
|
221
|
+
// Debug callback — report what data is being sent
|
|
222
|
+
onDebug?.(agent.id, model, system, user);
|
|
221
223
|
onProgress?.(` ${agent.name}: calling ${model}...`);
|
|
222
224
|
const maxTokens = MAX_TOKENS[agent.id] ?? 4096;
|
|
223
225
|
const response = await provider.chat({
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface IgnoreFilter {
|
|
2
|
+
/** Returns true if the relative path should be excluded from analysis */
|
|
3
|
+
isIgnored(relativePath: string): boolean;
|
|
4
|
+
/** Number of active patterns (excluding comments and blank lines) */
|
|
5
|
+
patternCount: number;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Load `.archbyteignore` from the project root.
|
|
9
|
+
* Returns an IgnoreFilter that matches paths against the patterns.
|
|
10
|
+
* If the file doesn't exist, returns a no-op filter that ignores nothing.
|
|
11
|
+
*/
|
|
12
|
+
export declare function loadIgnoreFile(projectRoot: string): IgnoreFilter;
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
// .archbyteignore — File exclusion filter
|
|
2
|
+
// Supports .gitignore-style patterns: # comments, ! negation, ** globstar, * wildcard
|
|
3
|
+
import * as fs from "fs";
|
|
4
|
+
import * as path from "path";
|
|
5
|
+
/**
|
|
6
|
+
* Load `.archbyteignore` from the project root.
|
|
7
|
+
* Returns an IgnoreFilter that matches paths against the patterns.
|
|
8
|
+
* If the file doesn't exist, returns a no-op filter that ignores nothing.
|
|
9
|
+
*/
|
|
10
|
+
export function loadIgnoreFile(projectRoot) {
|
|
11
|
+
const ignorePath = path.join(projectRoot, ".archbyteignore");
|
|
12
|
+
if (!fs.existsSync(ignorePath)) {
|
|
13
|
+
return { isIgnored: () => false, patternCount: 0 };
|
|
14
|
+
}
|
|
15
|
+
const content = fs.readFileSync(ignorePath, "utf-8");
|
|
16
|
+
const rules = parseIgnorePatterns(content);
|
|
17
|
+
return {
|
|
18
|
+
isIgnored(relativePath) {
|
|
19
|
+
// Normalize path separators
|
|
20
|
+
const normalized = relativePath.replace(/\\/g, "/").replace(/^\//, "");
|
|
21
|
+
let ignored = false;
|
|
22
|
+
for (const rule of rules) {
|
|
23
|
+
if (rule.pattern.test(normalized)) {
|
|
24
|
+
ignored = !rule.negated;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
return ignored;
|
|
28
|
+
},
|
|
29
|
+
patternCount: rules.length,
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Parse .gitignore-style content into an ordered list of rules.
|
|
34
|
+
*/
|
|
35
|
+
function parseIgnorePatterns(content) {
|
|
36
|
+
const rules = [];
|
|
37
|
+
for (const rawLine of content.split("\n")) {
|
|
38
|
+
const line = rawLine.trim();
|
|
39
|
+
// Skip blank lines and comments
|
|
40
|
+
if (!line || line.startsWith("#"))
|
|
41
|
+
continue;
|
|
42
|
+
let pattern = line;
|
|
43
|
+
let negated = false;
|
|
44
|
+
// Handle negation
|
|
45
|
+
if (pattern.startsWith("!")) {
|
|
46
|
+
negated = true;
|
|
47
|
+
pattern = pattern.slice(1);
|
|
48
|
+
}
|
|
49
|
+
// Remove trailing spaces (unless escaped)
|
|
50
|
+
pattern = pattern.replace(/(?<!\\)\s+$/, "");
|
|
51
|
+
if (!pattern)
|
|
52
|
+
continue;
|
|
53
|
+
const regex = patternToRegex(pattern);
|
|
54
|
+
rules.push({ pattern: regex, negated });
|
|
55
|
+
}
|
|
56
|
+
return rules;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Convert a .gitignore-style pattern to a RegExp.
|
|
60
|
+
* Supports: * (any non-slash), ** (any including slashes), ? (single char),
|
|
61
|
+
* trailing / (directory match), leading / (root-anchored).
|
|
62
|
+
*/
|
|
63
|
+
function patternToRegex(pattern) {
|
|
64
|
+
let anchored = false;
|
|
65
|
+
// Leading / means anchored to root
|
|
66
|
+
if (pattern.startsWith("/")) {
|
|
67
|
+
anchored = true;
|
|
68
|
+
pattern = pattern.slice(1);
|
|
69
|
+
}
|
|
70
|
+
// Trailing / means match directories — for our purposes, match the prefix
|
|
71
|
+
const dirOnly = pattern.endsWith("/");
|
|
72
|
+
if (dirOnly) {
|
|
73
|
+
pattern = pattern.slice(0, -1);
|
|
74
|
+
}
|
|
75
|
+
// Escape regex special chars, then convert glob patterns
|
|
76
|
+
let regex = "";
|
|
77
|
+
let i = 0;
|
|
78
|
+
while (i < pattern.length) {
|
|
79
|
+
const ch = pattern[i];
|
|
80
|
+
const next = pattern[i + 1];
|
|
81
|
+
if (ch === "*" && next === "*") {
|
|
82
|
+
// ** — match anything including path separators
|
|
83
|
+
if (pattern[i + 2] === "/") {
|
|
84
|
+
// **/ — match zero or more directories
|
|
85
|
+
regex += "(?:.*/)?";
|
|
86
|
+
i += 3;
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
// ** at end or before non-slash
|
|
90
|
+
regex += ".*";
|
|
91
|
+
i += 2;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
else if (ch === "*") {
|
|
95
|
+
// * — match anything except /
|
|
96
|
+
regex += "[^/]*";
|
|
97
|
+
i++;
|
|
98
|
+
}
|
|
99
|
+
else if (ch === "?") {
|
|
100
|
+
// ? — match single non-slash char
|
|
101
|
+
regex += "[^/]";
|
|
102
|
+
i++;
|
|
103
|
+
}
|
|
104
|
+
else if (ch === "[") {
|
|
105
|
+
// Character class — pass through until ]
|
|
106
|
+
const closeBracket = pattern.indexOf("]", i + 1);
|
|
107
|
+
if (closeBracket !== -1) {
|
|
108
|
+
regex += pattern.slice(i, closeBracket + 1);
|
|
109
|
+
i = closeBracket + 1;
|
|
110
|
+
}
|
|
111
|
+
else {
|
|
112
|
+
regex += escapeRegex(ch);
|
|
113
|
+
i++;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
regex += escapeRegex(ch);
|
|
118
|
+
i++;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
if (dirOnly) {
|
|
122
|
+
// Match the directory itself or anything under it
|
|
123
|
+
regex += "(?:/.*)?";
|
|
124
|
+
}
|
|
125
|
+
if (anchored) {
|
|
126
|
+
// Must match from the start
|
|
127
|
+
return new RegExp(`^${regex}$`);
|
|
128
|
+
}
|
|
129
|
+
// Unanchored: match if the pattern matches the full path
|
|
130
|
+
// or any suffix after a /
|
|
131
|
+
// If pattern contains /, it's implicitly anchored
|
|
132
|
+
if (pattern.includes("/")) {
|
|
133
|
+
return new RegExp(`^${regex}$`);
|
|
134
|
+
}
|
|
135
|
+
// No slash: match against the basename OR any path segment
|
|
136
|
+
return new RegExp(`(?:^|/)${regex}(?:/.*)?$`);
|
|
137
|
+
}
|
|
138
|
+
function escapeRegex(ch) {
|
|
139
|
+
return ch.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
140
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { StaticAnalysisResult, StaticContext } from "./types.js";
|
|
2
|
+
import type { PrivacyConfig } from "../../cli/yaml-io.js";
|
|
2
3
|
export type { StaticAnalysisResult, StaticContext } from "./types.js";
|
|
3
4
|
export { validateAnalysis } from "./validator.js";
|
|
4
5
|
/**
|
|
@@ -16,4 +17,4 @@ export declare function runStaticAnalysis(projectRoot: string, onProgress?: (msg
|
|
|
16
17
|
* This runs ONLY fact-collectors (no component-detector, connection-mapper, or validator).
|
|
17
18
|
* Output is consumed by the pipeline LLM agents.
|
|
18
19
|
*/
|
|
19
|
-
export declare function runStaticContextCollection(projectRoot: string, onProgress?: (msg: string) => void): Promise<StaticContext>;
|
|
20
|
+
export declare function runStaticContextCollection(projectRoot: string, onProgress?: (msg: string) => void, privacy?: Required<PrivacyConfig>): Promise<StaticContext>;
|
|
@@ -11,6 +11,8 @@ import { mapConnections } from "./connection-mapper.js";
|
|
|
11
11
|
import { validateAnalysis } from "./validator.js";
|
|
12
12
|
import { collectFileTree } from "./file-tree-collector.js";
|
|
13
13
|
import { collectCodeSamples } from "./code-sampler.js";
|
|
14
|
+
import { loadIgnoreFile } from "./ignore.js";
|
|
15
|
+
import { redactContext } from "./redactor.js";
|
|
14
16
|
export { validateAnalysis } from "./validator.js";
|
|
15
17
|
/**
|
|
16
18
|
* Run all static analysis scanners.
|
|
@@ -22,7 +24,11 @@ export { validateAnalysis } from "./validator.js";
|
|
|
22
24
|
* 4. Gap detection — identify what the LLM should resolve
|
|
23
25
|
*/
|
|
24
26
|
export async function runStaticAnalysis(projectRoot, onProgress) {
|
|
25
|
-
const
|
|
27
|
+
const ignoreFilter = loadIgnoreFile(projectRoot);
|
|
28
|
+
if (ignoreFilter.patternCount > 0) {
|
|
29
|
+
onProgress?.(`Loaded .archbyteignore: ${ignoreFilter.patternCount} pattern(s)`);
|
|
30
|
+
}
|
|
31
|
+
const tk = new StaticToolkit(projectRoot, ignoreFilter);
|
|
26
32
|
// Phase 1: parallel scanners (no dependencies)
|
|
27
33
|
onProgress?.("Running parallel scanners...");
|
|
28
34
|
const [structure, docs, infra, events, envs] = await Promise.all([
|
|
@@ -292,8 +298,12 @@ async function collectGaps(analysis, tk) {
|
|
|
292
298
|
* This runs ONLY fact-collectors (no component-detector, connection-mapper, or validator).
|
|
293
299
|
* Output is consumed by the pipeline LLM agents.
|
|
294
300
|
*/
|
|
295
|
-
export async function runStaticContextCollection(projectRoot, onProgress) {
|
|
296
|
-
const
|
|
301
|
+
export async function runStaticContextCollection(projectRoot, onProgress, privacy) {
|
|
302
|
+
const ignoreFilter = loadIgnoreFile(projectRoot);
|
|
303
|
+
if (ignoreFilter.patternCount > 0) {
|
|
304
|
+
onProgress?.(`Loaded .archbyteignore: ${ignoreFilter.patternCount} pattern(s)`);
|
|
305
|
+
}
|
|
306
|
+
const tk = new StaticToolkit(projectRoot, ignoreFilter);
|
|
297
307
|
onProgress?.("Collecting static context (7 scanners in parallel)...");
|
|
298
308
|
const [structure, docs, infra, events, envs, fileTree, codeSamples] = await Promise.all([
|
|
299
309
|
scanStructure(tk),
|
|
@@ -306,5 +316,43 @@ export async function runStaticContextCollection(projectRoot, onProgress) {
|
|
|
306
316
|
]);
|
|
307
317
|
onProgress?.(`Context: ${fileTree.totalFiles} files, ${fileTree.totalDirs} dirs, ${codeSamples.configFiles.length} configs, ${codeSamples.samples.length} samples`);
|
|
308
318
|
onProgress?.(`Detected: ${structure.language}, ${structure.framework ?? "no framework"}, monorepo=${structure.isMonorepo}`);
|
|
309
|
-
|
|
319
|
+
let ctx = { structure, docs, infra, events, envs, fileTree, codeSamples };
|
|
320
|
+
// Apply privacy controls — zero out disabled fields
|
|
321
|
+
if (privacy) {
|
|
322
|
+
if (!privacy.sendCodeSamples) {
|
|
323
|
+
ctx.codeSamples = { ...ctx.codeSamples, samples: [] };
|
|
324
|
+
onProgress?.("Privacy: code samples excluded");
|
|
325
|
+
}
|
|
326
|
+
if (!privacy.sendImportMap) {
|
|
327
|
+
ctx.codeSamples = { ...ctx.codeSamples, importMap: {} };
|
|
328
|
+
onProgress?.("Privacy: import map excluded");
|
|
329
|
+
}
|
|
330
|
+
if (!privacy.sendEnvNames) {
|
|
331
|
+
ctx.envs = { ...ctx.envs, environments: ctx.envs.environments.map((e) => ({ ...e, variables: [] })) };
|
|
332
|
+
onProgress?.("Privacy: env variable names excluded");
|
|
333
|
+
}
|
|
334
|
+
if (!privacy.sendDocs) {
|
|
335
|
+
ctx.docs = { projectDescription: "", architectureNotes: [], apiEndpoints: [], externalDependencies: [] };
|
|
336
|
+
onProgress?.("Privacy: documentation excluded");
|
|
337
|
+
}
|
|
338
|
+
if (!privacy.sendFileTree) {
|
|
339
|
+
ctx.fileTree = { tree: [], totalFiles: ctx.fileTree.totalFiles, totalDirs: ctx.fileTree.totalDirs };
|
|
340
|
+
onProgress?.("Privacy: file tree excluded");
|
|
341
|
+
}
|
|
342
|
+
if (!privacy.sendInfra) {
|
|
343
|
+
ctx.infra = {
|
|
344
|
+
docker: { services: [], composeFile: false },
|
|
345
|
+
kubernetes: { resources: [] },
|
|
346
|
+
cloud: { provider: null, services: [], iac: null },
|
|
347
|
+
ci: { platform: null, pipelines: [] },
|
|
348
|
+
};
|
|
349
|
+
onProgress?.("Privacy: infrastructure details excluded");
|
|
350
|
+
}
|
|
351
|
+
// Redaction — hash identifiers before returning
|
|
352
|
+
if (privacy.redact) {
|
|
353
|
+
ctx = redactContext(ctx);
|
|
354
|
+
onProgress?.("Privacy: redaction applied — identifiers hashed");
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
return ctx;
|
|
310
358
|
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { StaticContext } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Redact sensitive identifiers in a StaticContext.
|
|
4
|
+
* - File paths: hash each segment, preserve extensions and depth
|
|
5
|
+
* - Env var names: hash
|
|
6
|
+
* - Docker service names: hash
|
|
7
|
+
* - String literals in code samples: hash
|
|
8
|
+
* - Preserve: npm package names, language keywords, structural info
|
|
9
|
+
*
|
|
10
|
+
* Returns a deep copy — the original context is not modified.
|
|
11
|
+
*/
|
|
12
|
+
export declare function redactContext(ctx: StaticContext): StaticContext;
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
// Redaction Mode — Hash sensitive identifiers in StaticContext
|
|
2
|
+
// Preserves structure and public package names while hiding proprietary paths/names.
|
|
3
|
+
import { createHash } from "crypto";
|
|
4
|
+
/**
|
|
5
|
+
* Redact sensitive identifiers in a StaticContext.
|
|
6
|
+
* - File paths: hash each segment, preserve extensions and depth
|
|
7
|
+
* - Env var names: hash
|
|
8
|
+
* - Docker service names: hash
|
|
9
|
+
* - String literals in code samples: hash
|
|
10
|
+
* - Preserve: npm package names, language keywords, structural info
|
|
11
|
+
*
|
|
12
|
+
* Returns a deep copy — the original context is not modified.
|
|
13
|
+
*/
|
|
14
|
+
export function redactContext(ctx) {
|
|
15
|
+
return {
|
|
16
|
+
structure: redactStructure(ctx.structure),
|
|
17
|
+
docs: redactDocs(ctx.docs),
|
|
18
|
+
infra: redactInfra(ctx.infra),
|
|
19
|
+
events: redactEvents(ctx.events),
|
|
20
|
+
envs: redactEnvs(ctx.envs),
|
|
21
|
+
fileTree: redactFileTree(ctx.fileTree),
|
|
22
|
+
codeSamples: redactCodeSamples(ctx.codeSamples),
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
// --- Hashing ---
|
|
26
|
+
const hashCache = new Map();
|
|
27
|
+
function hashStr(value) {
|
|
28
|
+
const cached = hashCache.get(value);
|
|
29
|
+
if (cached)
|
|
30
|
+
return cached;
|
|
31
|
+
const hash = createHash("sha256").update(value).digest("hex").slice(0, 8);
|
|
32
|
+
const result = `redacted-${hash}`;
|
|
33
|
+
hashCache.set(value, result);
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Hash each segment of a path, preserving the extension and directory depth.
|
|
38
|
+
* `src/auth/handler.ts` → `redacted-a1b2c3/redacted-d4e5f6/redacted-g7h8.ts`
|
|
39
|
+
*/
|
|
40
|
+
function redactPath(filePath) {
|
|
41
|
+
if (!filePath || filePath === ".")
|
|
42
|
+
return filePath;
|
|
43
|
+
const parts = filePath.split("/");
|
|
44
|
+
return parts
|
|
45
|
+
.map((part) => {
|
|
46
|
+
const dotIdx = part.lastIndexOf(".");
|
|
47
|
+
if (dotIdx > 0 && dotIdx < part.length - 1) {
|
|
48
|
+
const name = part.slice(0, dotIdx);
|
|
49
|
+
const ext = part.slice(dotIdx);
|
|
50
|
+
return `${hashStr(name)}${ext}`;
|
|
51
|
+
}
|
|
52
|
+
return hashStr(part);
|
|
53
|
+
})
|
|
54
|
+
.join("/");
|
|
55
|
+
}
|
|
56
|
+
// --- Structure ---
|
|
57
|
+
function redactStructure(s) {
|
|
58
|
+
return {
|
|
59
|
+
...s,
|
|
60
|
+
// Keep project name generic
|
|
61
|
+
projectName: hashStr(s.projectName),
|
|
62
|
+
// Keep language/framework/build info (public knowledge)
|
|
63
|
+
entryPoints: s.entryPoints.map(redactPath),
|
|
64
|
+
directories: Object.fromEntries(Object.entries(s.directories).map(([dir, isPresent]) => [hashStr(dir), isPresent])),
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
// --- Docs ---
|
|
68
|
+
function redactDocs(d) {
|
|
69
|
+
return {
|
|
70
|
+
// Preserve project description (it's user-written public text)
|
|
71
|
+
projectDescription: d.projectDescription,
|
|
72
|
+
architectureNotes: d.architectureNotes,
|
|
73
|
+
apiEndpoints: d.apiEndpoints.map((ep) => ({
|
|
74
|
+
...ep,
|
|
75
|
+
path: redactApiPath(ep.path),
|
|
76
|
+
})),
|
|
77
|
+
externalDependencies: d.externalDependencies, // Public package names — keep
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
function redactApiPath(apiPath) {
|
|
81
|
+
// Redact path parameters but keep HTTP structure
|
|
82
|
+
return apiPath.replace(/\/[a-zA-Z_]\w*/g, (segment) => {
|
|
83
|
+
// Keep common REST verbs/patterns
|
|
84
|
+
const common = ["/api", "/v1", "/v2", "/v3", "/health", "/status", "/auth"];
|
|
85
|
+
if (common.includes(segment))
|
|
86
|
+
return segment;
|
|
87
|
+
return `/${hashStr(segment.slice(1))}`;
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
// --- Infra ---
|
|
91
|
+
function redactInfra(i) {
|
|
92
|
+
return {
|
|
93
|
+
docker: {
|
|
94
|
+
services: i.docker.services.map((svc) => ({
|
|
95
|
+
...svc,
|
|
96
|
+
name: hashStr(svc.name),
|
|
97
|
+
buildContext: svc.buildContext ? redactPath(svc.buildContext) : undefined,
|
|
98
|
+
environment: svc.environment
|
|
99
|
+
? Object.fromEntries(Object.entries(svc.environment).map(([k, v]) => [hashStr(k), "***"]))
|
|
100
|
+
: undefined,
|
|
101
|
+
})),
|
|
102
|
+
composeFile: i.docker.composeFile,
|
|
103
|
+
composeFilePath: i.docker.composeFilePath ? redactPath(i.docker.composeFilePath) : undefined,
|
|
104
|
+
},
|
|
105
|
+
kubernetes: {
|
|
106
|
+
resources: i.kubernetes.resources.map((r) => ({
|
|
107
|
+
...r,
|
|
108
|
+
name: hashStr(r.name),
|
|
109
|
+
namespace: r.namespace ? hashStr(r.namespace) : undefined,
|
|
110
|
+
})),
|
|
111
|
+
},
|
|
112
|
+
cloud: i.cloud, // Provider names and service types are public
|
|
113
|
+
ci: i.ci, // CI platform names are public
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
// --- Events ---
|
|
117
|
+
function redactEvents(e) {
|
|
118
|
+
return {
|
|
119
|
+
hasEDA: e.hasEDA,
|
|
120
|
+
patterns: e.patterns, // Technology names are public
|
|
121
|
+
events: e.events.map((ev) => ({
|
|
122
|
+
...ev,
|
|
123
|
+
file: redactPath(ev.file),
|
|
124
|
+
})),
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
// --- Envs ---
|
|
128
|
+
function redactEnvs(e) {
|
|
129
|
+
return {
|
|
130
|
+
environments: e.environments.map((env) => ({
|
|
131
|
+
name: env.name, // "production", "staging" etc — keep
|
|
132
|
+
variables: env.variables.map((v) => hashStr(v)),
|
|
133
|
+
})),
|
|
134
|
+
configPattern: e.configPattern,
|
|
135
|
+
hasSecrets: e.hasSecrets,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
// --- File Tree ---
|
|
139
|
+
function redactFileTree(ft) {
|
|
140
|
+
return {
|
|
141
|
+
tree: ft.tree.map(redactTreeEntry),
|
|
142
|
+
totalFiles: ft.totalFiles,
|
|
143
|
+
totalDirs: ft.totalDirs,
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
function redactTreeEntry(entry) {
|
|
147
|
+
return {
|
|
148
|
+
path: redactPath(entry.path),
|
|
149
|
+
type: entry.type,
|
|
150
|
+
children: entry.children?.map(redactTreeEntry),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
// --- Code Samples ---
|
|
154
|
+
function redactCodeSamples(cs) {
|
|
155
|
+
return {
|
|
156
|
+
samples: cs.samples.map((s) => ({
|
|
157
|
+
...s,
|
|
158
|
+
path: redactPath(s.path),
|
|
159
|
+
excerpt: redactCodeExcerpt(s.excerpt),
|
|
160
|
+
})),
|
|
161
|
+
importMap: Object.fromEntries(Object.entries(cs.importMap).map(([file, imports]) => [
|
|
162
|
+
redactPath(file),
|
|
163
|
+
imports.map((imp) => {
|
|
164
|
+
// Keep npm package imports (don't start with . or /)
|
|
165
|
+
if (!imp.startsWith(".") && !imp.startsWith("/"))
|
|
166
|
+
return imp;
|
|
167
|
+
return redactPath(imp);
|
|
168
|
+
}),
|
|
169
|
+
])),
|
|
170
|
+
configFiles: cs.configFiles.map((cf) => ({
|
|
171
|
+
path: redactPath(cf.path),
|
|
172
|
+
content: redactConfigContent(cf.content),
|
|
173
|
+
})),
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Redact string literals in code excerpts while preserving structure.
|
|
178
|
+
* Keeps language keywords, npm imports, and structural tokens.
|
|
179
|
+
*/
|
|
180
|
+
function redactCodeExcerpt(code) {
|
|
181
|
+
// Replace string literals (single/double quoted) with hashed versions
|
|
182
|
+
// But preserve common patterns like import paths to npm packages
|
|
183
|
+
return code.replace(/(["'])([^"']*)\1/g, (_match, quote, content) => {
|
|
184
|
+
// Keep npm package imports
|
|
185
|
+
if (!content.startsWith(".") && !content.startsWith("/") && !content.includes(" ")) {
|
|
186
|
+
return `${quote}${content}${quote}`;
|
|
187
|
+
}
|
|
188
|
+
// Keep short common strings
|
|
189
|
+
if (content.length <= 2)
|
|
190
|
+
return `${quote}${content}${quote}`;
|
|
191
|
+
// Redact everything else
|
|
192
|
+
return `${quote}${hashStr(content)}${quote}`;
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
function redactConfigContent(content) {
|
|
196
|
+
// Redact values in key=value and key: value patterns
|
|
197
|
+
return content
|
|
198
|
+
.replace(/^(\s*[\w.-]+\s*[:=]\s*)(.+)$/gm, (_match, prefix, value) => {
|
|
199
|
+
const trimmed = value.trim();
|
|
200
|
+
// Keep boolean, numeric, null values
|
|
201
|
+
if (/^(true|false|null|undefined|\d+(\.\d+)?)$/i.test(trimmed)) {
|
|
202
|
+
return `${prefix}${value}`;
|
|
203
|
+
}
|
|
204
|
+
return `${prefix}${hashStr(trimmed)}`;
|
|
205
|
+
});
|
|
206
|
+
}
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import type { GrepResult, DirEntry } from "../runtime/types.js";
|
|
2
|
+
import type { IgnoreFilter } from "./ignore.js";
|
|
2
3
|
/**
|
|
3
4
|
* Wraps LocalFSBackend with safe-read helpers for static scanners.
|
|
4
5
|
*/
|
|
5
6
|
export declare class StaticToolkit {
|
|
6
7
|
private fs;
|
|
7
|
-
|
|
8
|
+
private ignore;
|
|
9
|
+
constructor(projectRoot: string, ignoreFilter?: IgnoreFilter);
|
|
8
10
|
readFileSafe(path: string): Promise<string | null>;
|
|
9
11
|
globFiles(pattern: string, cwd?: string): Promise<string[]>;
|
|
10
12
|
grepFiles(pattern: string, searchPath?: string): Promise<GrepResult[]>;
|
|
@@ -6,10 +6,14 @@ import { LocalFSBackend } from "../tools/local-fs.js";
|
|
|
6
6
|
*/
|
|
7
7
|
export class StaticToolkit {
|
|
8
8
|
fs;
|
|
9
|
-
|
|
9
|
+
ignore;
|
|
10
|
+
constructor(projectRoot, ignoreFilter) {
|
|
10
11
|
this.fs = new LocalFSBackend(projectRoot);
|
|
12
|
+
this.ignore = ignoreFilter ?? null;
|
|
11
13
|
}
|
|
12
14
|
async readFileSafe(path) {
|
|
15
|
+
if (this.ignore?.isIgnored(path))
|
|
16
|
+
return null;
|
|
13
17
|
try {
|
|
14
18
|
return await this.fs.readFile(path);
|
|
15
19
|
}
|
|
@@ -21,12 +25,19 @@ export class StaticToolkit {
|
|
|
21
25
|
try {
|
|
22
26
|
// Expand brace patterns like *.{yml,yaml} → [*.yml, *.yaml]
|
|
23
27
|
const patterns = expandBraces(pattern);
|
|
28
|
+
let results;
|
|
24
29
|
if (patterns.length === 1) {
|
|
25
|
-
|
|
30
|
+
results = await this.fs.glob(patterns[0], cwd);
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
const resultSets = await Promise.all(patterns.map((p) => this.fs.glob(p, cwd).catch(() => [])));
|
|
34
|
+
results = [...new Set(resultSets.flat())].sort();
|
|
35
|
+
}
|
|
36
|
+
// Apply ignore filter
|
|
37
|
+
if (this.ignore) {
|
|
38
|
+
results = results.filter((f) => !this.ignore.isIgnored(f));
|
|
26
39
|
}
|
|
27
|
-
|
|
28
|
-
// Deduplicate and sort
|
|
29
|
-
return [...new Set(resultSets.flat())].sort();
|
|
40
|
+
return results;
|
|
30
41
|
}
|
|
31
42
|
catch {
|
|
32
43
|
return [];
|
|
@@ -36,11 +47,16 @@ export class StaticToolkit {
|
|
|
36
47
|
try {
|
|
37
48
|
// Work around LocalFSBackend.grep glob bug with cwd:
|
|
38
49
|
// grep from root and filter by path prefix
|
|
39
|
-
|
|
40
|
-
if (
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
50
|
+
let results = await this.fs.grep(pattern);
|
|
51
|
+
if (searchPath) {
|
|
52
|
+
const prefix = searchPath.endsWith("/") ? searchPath : `${searchPath}/`;
|
|
53
|
+
results = results.filter((r) => r.file.startsWith(prefix));
|
|
54
|
+
}
|
|
55
|
+
// Apply ignore filter
|
|
56
|
+
if (this.ignore) {
|
|
57
|
+
results = results.filter((r) => !this.ignore.isIgnored(r.file));
|
|
58
|
+
}
|
|
59
|
+
return results;
|
|
44
60
|
}
|
|
45
61
|
catch {
|
|
46
62
|
return [];
|
|
@@ -48,7 +64,14 @@ export class StaticToolkit {
|
|
|
48
64
|
}
|
|
49
65
|
async listDir(dirPath) {
|
|
50
66
|
try {
|
|
51
|
-
|
|
67
|
+
const entries = await this.fs.listDir(dirPath);
|
|
68
|
+
if (this.ignore) {
|
|
69
|
+
return entries.filter((e) => {
|
|
70
|
+
const fullPath = dirPath === "." ? e.name : `${dirPath}/${e.name}`;
|
|
71
|
+
return !this.ignore.isIgnored(fullPath);
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
return entries;
|
|
52
75
|
}
|
|
53
76
|
catch {
|
|
54
77
|
return [];
|
package/dist/cli/analyze.d.ts
CHANGED
package/dist/cli/analyze.js
CHANGED
|
@@ -4,9 +4,10 @@ import { execSync } from "child_process";
|
|
|
4
4
|
import chalk from "chalk";
|
|
5
5
|
import { resolveConfig } from "./config.js";
|
|
6
6
|
import { recordUsage } from "./license-gate.js";
|
|
7
|
-
import { staticResultToSpec, writeSpec, writeMetadata, loadSpec, loadMetadata } from "./yaml-io.js";
|
|
7
|
+
import { staticResultToSpec, writeSpec, writeMetadata, loadSpec, loadMetadata, resolvePrivacy } from "./yaml-io.js";
|
|
8
8
|
import { getChangedFiles, mapFilesToComponents, shouldRunAgents, isGitAvailable, categorizeChanges, computeNeighbors, getCommitCount } from "./incremental.js";
|
|
9
9
|
import { progressBar, confirm } from "./ui.js";
|
|
10
|
+
import { buildCollectorReport, buildAgentReport, buildTransparencyReport, printTransparencyReport, saveTransparencyReport, } from "./transparency.js";
|
|
10
11
|
export async function handleAnalyze(options) {
|
|
11
12
|
const rootDir = options.dir ? path.resolve(options.dir) : process.cwd();
|
|
12
13
|
const isStaticOnly = options.static || options.skipLlm;
|
|
@@ -201,13 +202,17 @@ export async function handleAnalyze(options) {
|
|
|
201
202
|
console.log(chalk.yellow(`File tree grew from ${priorCount} to ${currentFileCount} files — running full scan.`));
|
|
202
203
|
}
|
|
203
204
|
}
|
|
204
|
-
// 4.
|
|
205
|
+
// 4. Load privacy config
|
|
206
|
+
const privacySpec = loadSpec(rootDir);
|
|
207
|
+
const privacy = resolvePrivacy(privacySpec);
|
|
208
|
+
const agentReports = [];
|
|
209
|
+
// 5. Run static context collection → LLM pipeline
|
|
205
210
|
const progress = progressBar(7);
|
|
206
211
|
progress.update(0, "Collecting static context...");
|
|
207
212
|
const { runStaticContextCollection } = await import("../agents/static/index.js");
|
|
208
213
|
const ctx = await runStaticContextCollection(rootDir, (msg) => {
|
|
209
214
|
progress.update(0, `Static context: ${msg}`);
|
|
210
|
-
});
|
|
215
|
+
}, privacy);
|
|
211
216
|
// Save static context for debugging / re-runs
|
|
212
217
|
const ctxPath = path.join(rootDir, ".archbyte", "static-context.json");
|
|
213
218
|
if (!fs.existsSync(path.dirname(ctxPath))) {
|
|
@@ -219,6 +224,12 @@ export async function handleAnalyze(options) {
|
|
|
219
224
|
const { runPipeline } = await import("../agents/pipeline/index.js");
|
|
220
225
|
let result;
|
|
221
226
|
let pipelineStep = 1;
|
|
227
|
+
// Debug callback — collect agent reports for transparency log
|
|
228
|
+
const onDebug = options.debug
|
|
229
|
+
? (agentId, model, system, user) => {
|
|
230
|
+
agentReports.push(buildAgentReport(agentId, model, system, user));
|
|
231
|
+
}
|
|
232
|
+
: undefined;
|
|
222
233
|
try {
|
|
223
234
|
result = await runPipeline(ctx, provider, config, (msg) => {
|
|
224
235
|
// Map pipeline progress messages to bar steps
|
|
@@ -249,7 +260,7 @@ export async function handleAnalyze(options) {
|
|
|
249
260
|
else {
|
|
250
261
|
progress.update(pipelineStep, msg.trim());
|
|
251
262
|
}
|
|
252
|
-
}, incrementalContext);
|
|
263
|
+
}, incrementalContext, onDebug);
|
|
253
264
|
}
|
|
254
265
|
catch (err) {
|
|
255
266
|
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
@@ -311,6 +322,22 @@ export async function handleAnalyze(options) {
|
|
|
311
322
|
const spec = staticResultToSpec(result, rootDir, existingSpec?.rules);
|
|
312
323
|
writeSpec(rootDir, spec);
|
|
313
324
|
writeScanMetadata(rootDir, duration, "pipeline", ctx.fileTree.totalFiles, result.tokenUsage, incrementalContext ? true : undefined, result.skippedAgents);
|
|
325
|
+
// Transparency report (--debug)
|
|
326
|
+
if (options.debug) {
|
|
327
|
+
const collectors = buildCollectorReport(ctx);
|
|
328
|
+
const privacyControls = {
|
|
329
|
+
sendCodeSamples: privacy.sendCodeSamples,
|
|
330
|
+
sendImportMap: privacy.sendImportMap,
|
|
331
|
+
sendEnvNames: privacy.sendEnvNames,
|
|
332
|
+
sendDocs: privacy.sendDocs,
|
|
333
|
+
sendFileTree: privacy.sendFileTree,
|
|
334
|
+
sendInfra: privacy.sendInfra,
|
|
335
|
+
};
|
|
336
|
+
const ignoreFilter = (await import("../agents/static/ignore.js")).loadIgnoreFile(rootDir);
|
|
337
|
+
const report = buildTransparencyReport(collectors, agentReports, privacyControls, ignoreFilter.patternCount, privacy.redact);
|
|
338
|
+
printTransparencyReport(report);
|
|
339
|
+
saveTransparencyReport(rootDir, report);
|
|
340
|
+
}
|
|
314
341
|
progress.update(6, "Generating diagram...");
|
|
315
342
|
await autoGenerate(rootDir, options);
|
|
316
343
|
progress.done("Analysis complete");
|
package/dist/cli/run.d.ts
CHANGED
package/dist/cli/run.js
CHANGED
|
@@ -128,11 +128,12 @@ export async function handleRun(options) {
|
|
|
128
128
|
apiKey: options.apiKey,
|
|
129
129
|
force: options.force,
|
|
130
130
|
dryRun: options.dryRun,
|
|
131
|
+
debug: options.debug,
|
|
131
132
|
dir: options.dir,
|
|
132
133
|
skipServeHint: true,
|
|
133
134
|
});
|
|
134
135
|
if (options.dryRun)
|
|
135
136
|
return;
|
|
136
137
|
// 2. Serve the UI
|
|
137
|
-
await handleServe({ port });
|
|
138
|
+
await handleServe({ port, debug: options.debug });
|
|
138
139
|
}
|
package/dist/cli/serve.d.ts
CHANGED
package/dist/cli/serve.js
CHANGED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { StaticContext } from "../agents/static/types.js";
|
|
2
|
+
export interface CollectorReport {
|
|
3
|
+
name: string;
|
|
4
|
+
itemCount: number;
|
|
5
|
+
filePaths: string[];
|
|
6
|
+
byteEstimate: number;
|
|
7
|
+
}
|
|
8
|
+
export interface AgentReport {
|
|
9
|
+
agentId: string;
|
|
10
|
+
model: string;
|
|
11
|
+
dataCategories: string[];
|
|
12
|
+
fileRefs: string[];
|
|
13
|
+
promptTokenEstimate: number;
|
|
14
|
+
}
|
|
15
|
+
export interface TransparencyReport {
|
|
16
|
+
timestamp: string;
|
|
17
|
+
collectors: CollectorReport[];
|
|
18
|
+
agents: AgentReport[];
|
|
19
|
+
privacyControls: Record<string, boolean>;
|
|
20
|
+
ignorePatterns: number;
|
|
21
|
+
redactionEnabled: boolean;
|
|
22
|
+
totals: {
|
|
23
|
+
collectorsRun: number;
|
|
24
|
+
agentsRun: number;
|
|
25
|
+
filesReferenced: number;
|
|
26
|
+
estimatedPromptBytes: number;
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
export declare function buildCollectorReport(ctx: StaticContext): CollectorReport[];
|
|
30
|
+
export declare function buildAgentReport(agentId: string, model: string, systemPrompt: string, userPrompt: string): AgentReport;
|
|
31
|
+
export declare function printTransparencyReport(report: TransparencyReport): void;
|
|
32
|
+
export declare function saveTransparencyReport(rootDir: string, report: TransparencyReport): void;
|
|
33
|
+
/**
|
|
34
|
+
* Build a complete TransparencyReport from collector reports and agent reports.
|
|
35
|
+
*/
|
|
36
|
+
export declare function buildTransparencyReport(collectors: CollectorReport[], agents: AgentReport[], privacy: Record<string, boolean>, ignorePatterns: number, redactionEnabled: boolean): TransparencyReport;
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
// Transparency Log — Debug output showing exactly what data is collected and sent
|
|
2
|
+
// Used with `archbyte analyze --debug`
|
|
3
|
+
import * as fs from "fs";
|
|
4
|
+
import * as path from "path";
|
|
5
|
+
import chalk from "chalk";
|
|
6
|
+
// --- Collector Report ---
|
|
7
|
+
export function buildCollectorReport(ctx) {
|
|
8
|
+
const reports = [];
|
|
9
|
+
// Structure scanner
|
|
10
|
+
const structJson = JSON.stringify(ctx.structure);
|
|
11
|
+
reports.push({
|
|
12
|
+
name: "structure-scanner",
|
|
13
|
+
itemCount: ctx.structure.entryPoints.length + Object.keys(ctx.structure.directories).length,
|
|
14
|
+
filePaths: ctx.structure.entryPoints,
|
|
15
|
+
byteEstimate: structJson.length,
|
|
16
|
+
});
|
|
17
|
+
// Doc parser
|
|
18
|
+
const docsJson = JSON.stringify(ctx.docs);
|
|
19
|
+
reports.push({
|
|
20
|
+
name: "doc-parser",
|
|
21
|
+
itemCount: ctx.docs.apiEndpoints.length + ctx.docs.externalDependencies.length + ctx.docs.architectureNotes.length,
|
|
22
|
+
filePaths: [],
|
|
23
|
+
byteEstimate: docsJson.length,
|
|
24
|
+
});
|
|
25
|
+
// Infra analyzer
|
|
26
|
+
const infraJson = JSON.stringify(ctx.infra);
|
|
27
|
+
reports.push({
|
|
28
|
+
name: "infra-analyzer",
|
|
29
|
+
itemCount: ctx.infra.docker.services.length + ctx.infra.kubernetes.resources.length + ctx.infra.cloud.services.length,
|
|
30
|
+
filePaths: ctx.infra.docker.composeFilePath ? [ctx.infra.docker.composeFilePath] : [],
|
|
31
|
+
byteEstimate: infraJson.length,
|
|
32
|
+
});
|
|
33
|
+
// Event detector
|
|
34
|
+
const eventsJson = JSON.stringify(ctx.events);
|
|
35
|
+
reports.push({
|
|
36
|
+
name: "event-detector",
|
|
37
|
+
itemCount: ctx.events.events.length + ctx.events.patterns.length,
|
|
38
|
+
filePaths: ctx.events.events.map((e) => e.file),
|
|
39
|
+
byteEstimate: eventsJson.length,
|
|
40
|
+
});
|
|
41
|
+
// Env detector
|
|
42
|
+
const envsJson = JSON.stringify(ctx.envs);
|
|
43
|
+
const envVarCount = ctx.envs.environments.reduce((sum, e) => sum + e.variables.length, 0);
|
|
44
|
+
reports.push({
|
|
45
|
+
name: "env-detector",
|
|
46
|
+
itemCount: ctx.envs.environments.length + envVarCount,
|
|
47
|
+
filePaths: [],
|
|
48
|
+
byteEstimate: envsJson.length,
|
|
49
|
+
});
|
|
50
|
+
// File tree collector
|
|
51
|
+
const treeJson = JSON.stringify(ctx.fileTree);
|
|
52
|
+
reports.push({
|
|
53
|
+
name: "file-tree-collector",
|
|
54
|
+
itemCount: ctx.fileTree.totalFiles + ctx.fileTree.totalDirs,
|
|
55
|
+
filePaths: [],
|
|
56
|
+
byteEstimate: treeJson.length,
|
|
57
|
+
});
|
|
58
|
+
// Code sampler
|
|
59
|
+
const samplesJson = JSON.stringify(ctx.codeSamples);
|
|
60
|
+
reports.push({
|
|
61
|
+
name: "code-sampler",
|
|
62
|
+
itemCount: ctx.codeSamples.samples.length + ctx.codeSamples.configFiles.length,
|
|
63
|
+
filePaths: [
|
|
64
|
+
...ctx.codeSamples.samples.map((s) => s.path),
|
|
65
|
+
...ctx.codeSamples.configFiles.map((c) => c.path),
|
|
66
|
+
],
|
|
67
|
+
byteEstimate: samplesJson.length,
|
|
68
|
+
});
|
|
69
|
+
return reports;
|
|
70
|
+
}
|
|
71
|
+
// --- Agent Report ---
|
|
72
|
+
export function buildAgentReport(agentId, model, systemPrompt, userPrompt) {
|
|
73
|
+
const combined = systemPrompt + userPrompt;
|
|
74
|
+
// Extract data categories from prompt content
|
|
75
|
+
const categories = [];
|
|
76
|
+
if (combined.includes("fileTree") || combined.includes("file tree"))
|
|
77
|
+
categories.push("file-tree");
|
|
78
|
+
if (combined.includes("codeSamples") || combined.includes("code sample"))
|
|
79
|
+
categories.push("code-samples");
|
|
80
|
+
if (combined.includes("importMap") || combined.includes("import map"))
|
|
81
|
+
categories.push("import-map");
|
|
82
|
+
if (combined.includes("structure"))
|
|
83
|
+
categories.push("project-structure");
|
|
84
|
+
if (combined.includes("infra"))
|
|
85
|
+
categories.push("infrastructure");
|
|
86
|
+
if (combined.includes("events"))
|
|
87
|
+
categories.push("events");
|
|
88
|
+
if (combined.includes("envs") || combined.includes("environment"))
|
|
89
|
+
categories.push("environments");
|
|
90
|
+
if (combined.includes("docs"))
|
|
91
|
+
categories.push("documentation");
|
|
92
|
+
// Extract file references from prompt
|
|
93
|
+
const fileRefs = [];
|
|
94
|
+
const fileRefPattern = /["']([a-zA-Z0-9_./-]+\.[a-zA-Z]{1,6})["']/g;
|
|
95
|
+
let match;
|
|
96
|
+
while ((match = fileRefPattern.exec(combined)) !== null) {
|
|
97
|
+
if (!fileRefs.includes(match[1])) {
|
|
98
|
+
fileRefs.push(match[1]);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
// Rough token estimate (~4 chars per token)
|
|
102
|
+
const promptTokenEstimate = Math.ceil(combined.length / 4);
|
|
103
|
+
return {
|
|
104
|
+
agentId,
|
|
105
|
+
model,
|
|
106
|
+
dataCategories: categories,
|
|
107
|
+
fileRefs: fileRefs.slice(0, 50),
|
|
108
|
+
promptTokenEstimate,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
// --- Print ---
|
|
112
|
+
export function printTransparencyReport(report) {
|
|
113
|
+
console.error();
|
|
114
|
+
console.error(chalk.bold.cyan("Transparency Report"));
|
|
115
|
+
console.error(chalk.gray("What data was collected and sent to your LLM provider"));
|
|
116
|
+
console.error();
|
|
117
|
+
// Privacy controls
|
|
118
|
+
console.error(chalk.bold("Privacy Controls"));
|
|
119
|
+
for (const [key, value] of Object.entries(report.privacyControls)) {
|
|
120
|
+
const status = value ? chalk.green("enabled") : chalk.gray("disabled");
|
|
121
|
+
console.error(` ${key}: ${status}`);
|
|
122
|
+
}
|
|
123
|
+
if (report.ignorePatterns > 0) {
|
|
124
|
+
console.error(` .archbyteignore: ${chalk.yellow(`${report.ignorePatterns} pattern(s)`)}`);
|
|
125
|
+
}
|
|
126
|
+
if (report.redactionEnabled) {
|
|
127
|
+
console.error(` redaction: ${chalk.yellow("enabled — paths/names hashed")}`);
|
|
128
|
+
}
|
|
129
|
+
console.error();
|
|
130
|
+
// Collectors
|
|
131
|
+
console.error(chalk.bold("Static Collectors"));
|
|
132
|
+
for (const c of report.collectors) {
|
|
133
|
+
const size = formatBytes(c.byteEstimate);
|
|
134
|
+
console.error(` ${c.name.padEnd(22)} ${String(c.itemCount).padStart(4)} items ${size.padStart(8)}`);
|
|
135
|
+
if (c.filePaths.length > 0 && c.filePaths.length <= 5) {
|
|
136
|
+
for (const fp of c.filePaths) {
|
|
137
|
+
console.error(chalk.gray(` ${fp}`));
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
else if (c.filePaths.length > 5) {
|
|
141
|
+
for (const fp of c.filePaths.slice(0, 3)) {
|
|
142
|
+
console.error(chalk.gray(` ${fp}`));
|
|
143
|
+
}
|
|
144
|
+
console.error(chalk.gray(` ... and ${c.filePaths.length - 3} more`));
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
console.error();
|
|
148
|
+
// Agents
|
|
149
|
+
if (report.agents.length > 0) {
|
|
150
|
+
console.error(chalk.bold("LLM Agents"));
|
|
151
|
+
for (const a of report.agents) {
|
|
152
|
+
console.error(` ${a.agentId.padEnd(22)} model=${a.model} ~${a.promptTokenEstimate} tokens`);
|
|
153
|
+
if (a.dataCategories.length > 0) {
|
|
154
|
+
console.error(chalk.gray(` data: ${a.dataCategories.join(", ")}`));
|
|
155
|
+
}
|
|
156
|
+
if (a.fileRefs.length > 0) {
|
|
157
|
+
console.error(chalk.gray(` files: ${a.fileRefs.slice(0, 5).join(", ")}${a.fileRefs.length > 5 ? ` +${a.fileRefs.length - 5} more` : ""}`));
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
console.error();
|
|
161
|
+
}
|
|
162
|
+
// Totals
|
|
163
|
+
console.error(chalk.bold("Totals"));
|
|
164
|
+
console.error(` Collectors: ${report.totals.collectorsRun}`);
|
|
165
|
+
console.error(` Agents: ${report.totals.agentsRun}`);
|
|
166
|
+
console.error(` Files referenced: ${report.totals.filesReferenced}`);
|
|
167
|
+
console.error(` Est. prompt data: ${formatBytes(report.totals.estimatedPromptBytes)}`);
|
|
168
|
+
console.error();
|
|
169
|
+
}
|
|
170
|
+
// --- Save ---
|
|
171
|
+
export function saveTransparencyReport(rootDir, report) {
|
|
172
|
+
const dir = path.join(rootDir, ".archbyte");
|
|
173
|
+
if (!fs.existsSync(dir)) {
|
|
174
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
175
|
+
}
|
|
176
|
+
const reportPath = path.join(dir, "transparency.json");
|
|
177
|
+
fs.writeFileSync(reportPath, JSON.stringify(report, null, 2), "utf-8");
|
|
178
|
+
}
|
|
179
|
+
// --- Helpers ---
|
|
180
|
+
function formatBytes(bytes) {
|
|
181
|
+
if (bytes < 1024)
|
|
182
|
+
return `${bytes} B`;
|
|
183
|
+
if (bytes < 1024 * 1024)
|
|
184
|
+
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
185
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Build a complete TransparencyReport from collector reports and agent reports.
|
|
189
|
+
*/
|
|
190
|
+
export function buildTransparencyReport(collectors, agents, privacy, ignorePatterns, redactionEnabled) {
|
|
191
|
+
const allFiles = new Set();
|
|
192
|
+
for (const c of collectors) {
|
|
193
|
+
for (const fp of c.filePaths)
|
|
194
|
+
allFiles.add(fp);
|
|
195
|
+
}
|
|
196
|
+
for (const a of agents) {
|
|
197
|
+
for (const fp of a.fileRefs)
|
|
198
|
+
allFiles.add(fp);
|
|
199
|
+
}
|
|
200
|
+
return {
|
|
201
|
+
timestamp: new Date().toISOString(),
|
|
202
|
+
collectors,
|
|
203
|
+
agents,
|
|
204
|
+
privacyControls: privacy,
|
|
205
|
+
ignorePatterns,
|
|
206
|
+
redactionEnabled,
|
|
207
|
+
totals: {
|
|
208
|
+
collectorsRun: collectors.length,
|
|
209
|
+
agentsRun: agents.length,
|
|
210
|
+
filesReferenced: allFiles.size,
|
|
211
|
+
estimatedPromptBytes: collectors.reduce((s, c) => s + c.byteEstimate, 0),
|
|
212
|
+
},
|
|
213
|
+
};
|
|
214
|
+
}
|
package/dist/cli/yaml-io.d.ts
CHANGED
|
@@ -41,6 +41,15 @@ export interface ArchbyteSpecFlow {
|
|
|
41
41
|
category: string;
|
|
42
42
|
steps: ArchbyteSpecFlowStep[];
|
|
43
43
|
}
|
|
44
|
+
export interface PrivacyConfig {
|
|
45
|
+
sendCodeSamples?: boolean;
|
|
46
|
+
sendImportMap?: boolean;
|
|
47
|
+
sendEnvNames?: boolean;
|
|
48
|
+
sendDocs?: boolean;
|
|
49
|
+
sendFileTree?: boolean;
|
|
50
|
+
sendInfra?: boolean;
|
|
51
|
+
redact?: boolean;
|
|
52
|
+
}
|
|
44
53
|
export interface ArchbyteSpec {
|
|
45
54
|
version: number;
|
|
46
55
|
project: {
|
|
@@ -58,6 +67,7 @@ export interface ArchbyteSpec {
|
|
|
58
67
|
environments: ArchbyteSpecEnvironment[];
|
|
59
68
|
flows: ArchbyteSpecFlow[];
|
|
60
69
|
rules: Record<string, unknown>;
|
|
70
|
+
privacy?: PrivacyConfig;
|
|
61
71
|
}
|
|
62
72
|
export interface ScanMetadata {
|
|
63
73
|
analyzedAt: string;
|
|
@@ -83,6 +93,10 @@ export declare function writeMetadata(rootDir: string, meta: ScanMetadata): void
|
|
|
83
93
|
* Optionally preserves existing rules from a prior spec.
|
|
84
94
|
*/
|
|
85
95
|
export declare function staticResultToSpec(result: StaticAnalysisResult, rootDir: string, existingRules?: Record<string, unknown>): ArchbyteSpec;
|
|
96
|
+
/**
|
|
97
|
+
* Resolve privacy config with defaults. All fields default to enabled (true) except redact (false).
|
|
98
|
+
*/
|
|
99
|
+
export declare function resolvePrivacy(spec: ArchbyteSpec | null): Required<PrivacyConfig>;
|
|
86
100
|
/**
|
|
87
101
|
* Convert an ArchbyteSpec back to the AnalysisResult format expected by generate.ts.
|
|
88
102
|
* This is the inverse of staticResultToSpec → buildAnalysisFromStatic.
|
package/dist/cli/yaml-io.js
CHANGED
|
@@ -163,6 +163,21 @@ export function staticResultToSpec(result, rootDir, existingRules) {
|
|
|
163
163
|
rules,
|
|
164
164
|
};
|
|
165
165
|
}
|
|
166
|
+
/**
|
|
167
|
+
* Resolve privacy config with defaults. All fields default to enabled (true) except redact (false).
|
|
168
|
+
*/
|
|
169
|
+
export function resolvePrivacy(spec) {
|
|
170
|
+
const p = spec?.privacy ?? {};
|
|
171
|
+
return {
|
|
172
|
+
sendCodeSamples: p.sendCodeSamples ?? true,
|
|
173
|
+
sendImportMap: p.sendImportMap ?? true,
|
|
174
|
+
sendEnvNames: p.sendEnvNames ?? true,
|
|
175
|
+
sendDocs: p.sendDocs ?? true,
|
|
176
|
+
sendFileTree: p.sendFileTree ?? true,
|
|
177
|
+
sendInfra: p.sendInfra ?? true,
|
|
178
|
+
redact: p.redact ?? false,
|
|
179
|
+
};
|
|
180
|
+
}
|
|
166
181
|
/**
|
|
167
182
|
* Convert an ArchbyteSpec back to the AnalysisResult format expected by generate.ts.
|
|
168
183
|
* This is the inverse of staticResultToSpec → buildAnalysisFromStatic.
|
package/dist/server/src/index.js
CHANGED
|
@@ -662,6 +662,77 @@ function createHttpServer() {
|
|
|
662
662
|
}));
|
|
663
663
|
return;
|
|
664
664
|
}
|
|
665
|
+
// API: Transparency report (--debug)
|
|
666
|
+
if (url === "/api/transparency" && req.method === "GET") {
|
|
667
|
+
const transparencyPath = path.join(config.workspaceRoot, ".archbyte", "transparency.json");
|
|
668
|
+
if (existsSync(transparencyPath)) {
|
|
669
|
+
const content = readFileSync(transparencyPath, "utf-8");
|
|
670
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
671
|
+
res.end(content);
|
|
672
|
+
}
|
|
673
|
+
else {
|
|
674
|
+
res.writeHead(404, { "Content-Type": "application/json" });
|
|
675
|
+
res.end(JSON.stringify({ error: "No transparency report found. Run 'archbyte analyze --debug' to generate one." }));
|
|
676
|
+
}
|
|
677
|
+
return;
|
|
678
|
+
}
|
|
679
|
+
// API: Data flow documentation
|
|
680
|
+
if (url === "/api/data-flow" && req.method === "GET") {
|
|
681
|
+
const dataFlow = {
|
|
682
|
+
outboundConnections: [
|
|
683
|
+
{
|
|
684
|
+
destination: "Your LLM Provider (BYOK)",
|
|
685
|
+
protocol: "HTTPS",
|
|
686
|
+
data: [
|
|
687
|
+
"Project structure metadata (language, framework, directories)",
|
|
688
|
+
"File tree (paths only, no contents)",
|
|
689
|
+
"Code excerpts (first ~80 lines of key files)",
|
|
690
|
+
"Import relationships between files",
|
|
691
|
+
"Config file contents (package.json, docker-compose, etc.)",
|
|
692
|
+
"Environment variable names (never values)",
|
|
693
|
+
"Infrastructure details (Docker services, K8s resources)",
|
|
694
|
+
],
|
|
695
|
+
controlledBy: "archbyte.yaml privacy section + .archbyteignore",
|
|
696
|
+
},
|
|
697
|
+
{
|
|
698
|
+
destination: "ArchByte Cloud (api.heartbyte.io)",
|
|
699
|
+
protocol: "HTTPS",
|
|
700
|
+
data: ["Email address", "JWT token", "Scan count"],
|
|
701
|
+
controlledBy: "Required for license validation only",
|
|
702
|
+
},
|
|
703
|
+
],
|
|
704
|
+
neverSent: [
|
|
705
|
+
"Environment variable values",
|
|
706
|
+
"Full source code (only excerpts of key files)",
|
|
707
|
+
"Secrets, passwords, API keys",
|
|
708
|
+
"Git history, commits, diffs",
|
|
709
|
+
"Git credentials (SSH keys, tokens)",
|
|
710
|
+
"Binary files",
|
|
711
|
+
"node_modules / vendor dependencies",
|
|
712
|
+
"User home directory contents",
|
|
713
|
+
],
|
|
714
|
+
privacyControls: {
|
|
715
|
+
archbyteignore: "Exclude files/directories from all scanners (.gitignore syntax)",
|
|
716
|
+
sendCodeSamples: "Toggle code excerpt collection",
|
|
717
|
+
sendImportMap: "Toggle import relationship collection",
|
|
718
|
+
sendEnvNames: "Toggle env variable name collection",
|
|
719
|
+
sendDocs: "Toggle documentation extraction",
|
|
720
|
+
sendFileTree: "Toggle file tree collection",
|
|
721
|
+
sendInfra: "Toggle infrastructure detail collection",
|
|
722
|
+
redact: "Hash all identifiers (paths, names) before sending to LLM",
|
|
723
|
+
},
|
|
724
|
+
localOnly: [
|
|
725
|
+
".archbyte/archbyte.yaml — Architecture spec",
|
|
726
|
+
".archbyte/analysis.json — Full analysis output",
|
|
727
|
+
".archbyte/architecture.json — Diagram layout",
|
|
728
|
+
".archbyte/static-context.json — Raw scanner output",
|
|
729
|
+
".archbyte/transparency.json — Audit trail",
|
|
730
|
+
],
|
|
731
|
+
};
|
|
732
|
+
res.writeHead(200, { "Content-Type": "application/json" });
|
|
733
|
+
res.end(JSON.stringify(dataFlow, null, 2));
|
|
734
|
+
return;
|
|
735
|
+
}
|
|
665
736
|
// API: Health (deprecated - redirect to audit for backward compat)
|
|
666
737
|
if (url === "/api/health" && req.method === "GET") {
|
|
667
738
|
res.writeHead(307, { "Location": "/api/audit" });
|
package/package.json
CHANGED
package/templates/archbyte.yaml
CHANGED
|
@@ -80,6 +80,26 @@ environments: []
|
|
|
80
80
|
# label: "api-gateway -> user-service"
|
|
81
81
|
flows: []
|
|
82
82
|
|
|
83
|
+
# ── Privacy Controls ──
|
|
84
|
+
# Control what data is sent to your LLM provider during analysis.
|
|
85
|
+
# All options default to true (enabled). Set to false to exclude.
|
|
86
|
+
# See: docs/DATA_FLOW.md for full details on what each scanner collects.
|
|
87
|
+
#
|
|
88
|
+
# privacy:
|
|
89
|
+
# sendCodeSamples: true # Code excerpts (first ~80 lines of key files)
|
|
90
|
+
# sendImportMap: true # Import relationships between files
|
|
91
|
+
# sendEnvNames: true # Environment variable names (never values)
|
|
92
|
+
# sendDocs: true # Documentation extracts (README, etc.)
|
|
93
|
+
# sendFileTree: true # Directory structure (paths only)
|
|
94
|
+
# sendInfra: true # Infrastructure details (Docker, K8s, CI)
|
|
95
|
+
# redact: false # Hash all identifiers before sending to LLM
|
|
96
|
+
#
|
|
97
|
+
# Also create .archbyteignore in your project root to exclude specific
|
|
98
|
+
# files/directories from all scanners (.gitignore syntax):
|
|
99
|
+
# secrets/
|
|
100
|
+
# *.env
|
|
101
|
+
# *.pem
|
|
102
|
+
|
|
83
103
|
# ── Architecture Fitness Rules ──
|
|
84
104
|
# Levels: error (fail CI), warn (report), off (skip)
|
|
85
105
|
rules:
|