archbyte 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/archbyte.js CHANGED
@@ -98,6 +98,7 @@ program
98
98
  .option('-v, --verbose', 'Show detailed output')
99
99
  .option('--force', 'Force full re-scan (skip incremental detection)')
100
100
  .option('--dry-run', 'Preview without running')
101
+ .option('--debug', 'Show transparency report (what data is collected and sent)')
101
102
  .action(async (options) => {
102
103
  // handleRun manages login + setup + requireLicense internally
103
104
  await handleRun(options);
@@ -115,6 +116,7 @@ program
115
116
  .option('--skip-llm', 'Alias for --static')
116
117
  .option('--force', 'Force full re-scan (skip incremental detection)')
117
118
  .option('--dry-run', 'Preview without running')
119
+ .option('--debug', 'Show transparency report (what data is collected and sent)')
118
120
  .action(async (options) => {
119
121
  await gate('analyze');
120
122
  await handleAnalyze(options);
@@ -136,6 +138,7 @@ program
136
138
  .description('Start the visualization UI server')
137
139
  .option('-p, --port <number>', `Server port (default: ${DEFAULT_PORT})`, parseInt)
138
140
  .option('-d, --diagram <path>', 'Path to architecture JSON (default: .archbyte/architecture.json)')
141
+ .option('--debug', 'Enable transparency endpoint (/api/transparency)')
139
142
  .action(async (options) => {
140
143
  await handleServe(options);
141
144
  });
@@ -6,7 +6,7 @@ import type { IncrementalContext } from "./types.js";
6
6
  * Run the multi-agent pipeline: 3 parallel fast agents → 2 sequential agents.
7
7
  * Each agent gets a single chat() call with pre-collected static context.
8
8
  */
9
- export declare function runPipeline(ctx: StaticContext, provider: LLMProvider, config: ArchByteConfig, onProgress?: (msg: string) => void, incrementalContext?: IncrementalContext): Promise<StaticAnalysisResult & {
9
+ export declare function runPipeline(ctx: StaticContext, provider: LLMProvider, config: ArchByteConfig, onProgress?: (msg: string) => void, incrementalContext?: IncrementalContext, onDebug?: (agentId: string, model: string, system: string, user: string) => void): Promise<StaticAnalysisResult & {
10
10
  tokenUsage?: {
11
11
  input: number;
12
12
  output: number;
@@ -92,7 +92,7 @@ function getFallbackData(agentId, inc) {
92
92
  * Run the multi-agent pipeline: 3 parallel fast agents → 2 sequential agents.
93
93
  * Each agent gets a single chat() call with pre-collected static context.
94
94
  */
95
- export async function runPipeline(ctx, provider, config, onProgress, incrementalContext) {
95
+ export async function runPipeline(ctx, provider, config, onProgress, incrementalContext, onDebug) {
96
96
  const agentResults = {};
97
97
  const agentMeta = [];
98
98
  const skippedAgents = [];
@@ -118,7 +118,7 @@ export async function runPipeline(ctx, provider, config, onProgress, incremental
118
118
  agentResults[agent.id] = fallback;
119
119
  return Promise.resolve(null);
120
120
  }
121
- return runAgent(agent, ctx, provider, config, parallelPrior, onProgress);
121
+ return runAgent(agent, ctx, provider, config, parallelPrior, onProgress, onDebug);
122
122
  }));
123
123
  let authFailed = false;
124
124
  for (let i = 0; i < parallelTasks.length; i++) {
@@ -156,7 +156,7 @@ export async function runPipeline(ctx, provider, config, onProgress, incremental
156
156
  continue;
157
157
  }
158
158
  try {
159
- const result = await runAgent(agent, ctx, provider, config, agentResults, onProgress);
159
+ const result = await runAgent(agent, ctx, provider, config, agentResults, onProgress, onDebug);
160
160
  if (result) {
161
161
  agentResults[agent.id] = result.data;
162
162
  agentMeta.push(result);
@@ -214,10 +214,12 @@ const MAX_TOKENS = {
214
214
  "flow-detector": 4096,
215
215
  "validator": 4096,
216
216
  };
217
- async function runAgent(agent, ctx, provider, config, priorResults, onProgress) {
217
+ async function runAgent(agent, ctx, provider, config, priorResults, onProgress, onDebug) {
218
218
  const start = Date.now();
219
219
  const model = resolveModel(config.provider, agent.modelTier, config.modelOverrides, config.model);
220
220
  const { system, user } = agent.buildPrompt(ctx, priorResults);
221
+ // Debug callback — report what data is being sent
222
+ onDebug?.(agent.id, model, system, user);
221
223
  onProgress?.(` ${agent.name}: calling ${model}...`);
222
224
  const maxTokens = MAX_TOKENS[agent.id] ?? 4096;
223
225
  const response = await provider.chat({
@@ -0,0 +1,12 @@
1
+ export interface IgnoreFilter {
2
+ /** Returns true if the relative path should be excluded from analysis */
3
+ isIgnored(relativePath: string): boolean;
4
+ /** Number of active patterns (excluding comments and blank lines) */
5
+ patternCount: number;
6
+ }
7
+ /**
8
+ * Load `.archbyteignore` from the project root.
9
+ * Returns an IgnoreFilter that matches paths against the patterns.
10
+ * If the file doesn't exist, returns a no-op filter that ignores nothing.
11
+ */
12
+ export declare function loadIgnoreFile(projectRoot: string): IgnoreFilter;
@@ -0,0 +1,140 @@
1
+ // .archbyteignore — File exclusion filter
2
+ // Supports .gitignore-style patterns: # comments, ! negation, ** globstar, * wildcard
3
+ import * as fs from "fs";
4
+ import * as path from "path";
5
+ /**
6
+ * Load `.archbyteignore` from the project root.
7
+ * Returns an IgnoreFilter that matches paths against the patterns.
8
+ * If the file doesn't exist, returns a no-op filter that ignores nothing.
9
+ */
10
+ export function loadIgnoreFile(projectRoot) {
11
+ const ignorePath = path.join(projectRoot, ".archbyteignore");
12
+ if (!fs.existsSync(ignorePath)) {
13
+ return { isIgnored: () => false, patternCount: 0 };
14
+ }
15
+ const content = fs.readFileSync(ignorePath, "utf-8");
16
+ const rules = parseIgnorePatterns(content);
17
+ return {
18
+ isIgnored(relativePath) {
19
+ // Normalize path separators
20
+ const normalized = relativePath.replace(/\\/g, "/").replace(/^\//, "");
21
+ let ignored = false;
22
+ for (const rule of rules) {
23
+ if (rule.pattern.test(normalized)) {
24
+ ignored = !rule.negated;
25
+ }
26
+ }
27
+ return ignored;
28
+ },
29
+ patternCount: rules.length,
30
+ };
31
+ }
32
+ /**
33
+ * Parse .gitignore-style content into an ordered list of rules.
34
+ */
35
+ function parseIgnorePatterns(content) {
36
+ const rules = [];
37
+ for (const rawLine of content.split("\n")) {
38
+ const line = rawLine.trim();
39
+ // Skip blank lines and comments
40
+ if (!line || line.startsWith("#"))
41
+ continue;
42
+ let pattern = line;
43
+ let negated = false;
44
+ // Handle negation
45
+ if (pattern.startsWith("!")) {
46
+ negated = true;
47
+ pattern = pattern.slice(1);
48
+ }
49
+ // Remove trailing spaces (unless escaped)
50
+ pattern = pattern.replace(/(?<!\\)\s+$/, "");
51
+ if (!pattern)
52
+ continue;
53
+ const regex = patternToRegex(pattern);
54
+ rules.push({ pattern: regex, negated });
55
+ }
56
+ return rules;
57
+ }
58
+ /**
59
+ * Convert a .gitignore-style pattern to a RegExp.
60
+ * Supports: * (any non-slash), ** (any including slashes), ? (single char),
61
+ * trailing / (directory match), leading / (root-anchored).
62
+ */
63
+ function patternToRegex(pattern) {
64
+ let anchored = false;
65
+ // Leading / means anchored to root
66
+ if (pattern.startsWith("/")) {
67
+ anchored = true;
68
+ pattern = pattern.slice(1);
69
+ }
70
+ // Trailing / means match directories — for our purposes, match the prefix
71
+ const dirOnly = pattern.endsWith("/");
72
+ if (dirOnly) {
73
+ pattern = pattern.slice(0, -1);
74
+ }
75
+ // Escape regex special chars, then convert glob patterns
76
+ let regex = "";
77
+ let i = 0;
78
+ while (i < pattern.length) {
79
+ const ch = pattern[i];
80
+ const next = pattern[i + 1];
81
+ if (ch === "*" && next === "*") {
82
+ // ** — match anything including path separators
83
+ if (pattern[i + 2] === "/") {
84
+ // **/ — match zero or more directories
85
+ regex += "(?:.*/)?";
86
+ i += 3;
87
+ }
88
+ else {
89
+ // ** at end or before non-slash
90
+ regex += ".*";
91
+ i += 2;
92
+ }
93
+ }
94
+ else if (ch === "*") {
95
+ // * — match anything except /
96
+ regex += "[^/]*";
97
+ i++;
98
+ }
99
+ else if (ch === "?") {
100
+ // ? — match single non-slash char
101
+ regex += "[^/]";
102
+ i++;
103
+ }
104
+ else if (ch === "[") {
105
+ // Character class — pass through until ]
106
+ const closeBracket = pattern.indexOf("]", i + 1);
107
+ if (closeBracket !== -1) {
108
+ regex += pattern.slice(i, closeBracket + 1);
109
+ i = closeBracket + 1;
110
+ }
111
+ else {
112
+ regex += escapeRegex(ch);
113
+ i++;
114
+ }
115
+ }
116
+ else {
117
+ regex += escapeRegex(ch);
118
+ i++;
119
+ }
120
+ }
121
+ if (dirOnly) {
122
+ // Match the directory itself or anything under it
123
+ regex += "(?:/.*)?";
124
+ }
125
+ if (anchored) {
126
+ // Must match from the start
127
+ return new RegExp(`^${regex}$`);
128
+ }
129
+ // Unanchored: match if the pattern matches the full path
130
+ // or any suffix after a /
131
+ // If pattern contains /, it's implicitly anchored
132
+ if (pattern.includes("/")) {
133
+ return new RegExp(`^${regex}$`);
134
+ }
135
+ // No slash: match against the basename OR any path segment
136
+ return new RegExp(`(?:^|/)${regex}(?:/.*)?$`);
137
+ }
138
+ function escapeRegex(ch) {
139
+ return ch.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
140
+ }
@@ -1,4 +1,5 @@
1
1
  import type { StaticAnalysisResult, StaticContext } from "./types.js";
2
+ import type { PrivacyConfig } from "../../cli/yaml-io.js";
2
3
  export type { StaticAnalysisResult, StaticContext } from "./types.js";
3
4
  export { validateAnalysis } from "./validator.js";
4
5
  /**
@@ -16,4 +17,4 @@ export declare function runStaticAnalysis(projectRoot: string, onProgress?: (msg
16
17
  * This runs ONLY fact-collectors (no component-detector, connection-mapper, or validator).
17
18
  * Output is consumed by the pipeline LLM agents.
18
19
  */
19
- export declare function runStaticContextCollection(projectRoot: string, onProgress?: (msg: string) => void): Promise<StaticContext>;
20
+ export declare function runStaticContextCollection(projectRoot: string, onProgress?: (msg: string) => void, privacy?: Required<PrivacyConfig>): Promise<StaticContext>;
@@ -11,6 +11,8 @@ import { mapConnections } from "./connection-mapper.js";
11
11
  import { validateAnalysis } from "./validator.js";
12
12
  import { collectFileTree } from "./file-tree-collector.js";
13
13
  import { collectCodeSamples } from "./code-sampler.js";
14
+ import { loadIgnoreFile } from "./ignore.js";
15
+ import { redactContext } from "./redactor.js";
14
16
  export { validateAnalysis } from "./validator.js";
15
17
  /**
16
18
  * Run all static analysis scanners.
@@ -22,7 +24,11 @@ export { validateAnalysis } from "./validator.js";
22
24
  * 4. Gap detection — identify what the LLM should resolve
23
25
  */
24
26
  export async function runStaticAnalysis(projectRoot, onProgress) {
25
- const tk = new StaticToolkit(projectRoot);
27
+ const ignoreFilter = loadIgnoreFile(projectRoot);
28
+ if (ignoreFilter.patternCount > 0) {
29
+ onProgress?.(`Loaded .archbyteignore: ${ignoreFilter.patternCount} pattern(s)`);
30
+ }
31
+ const tk = new StaticToolkit(projectRoot, ignoreFilter);
26
32
  // Phase 1: parallel scanners (no dependencies)
27
33
  onProgress?.("Running parallel scanners...");
28
34
  const [structure, docs, infra, events, envs] = await Promise.all([
@@ -292,8 +298,12 @@ async function collectGaps(analysis, tk) {
292
298
  * This runs ONLY fact-collectors (no component-detector, connection-mapper, or validator).
293
299
  * Output is consumed by the pipeline LLM agents.
294
300
  */
295
- export async function runStaticContextCollection(projectRoot, onProgress) {
296
- const tk = new StaticToolkit(projectRoot);
301
+ export async function runStaticContextCollection(projectRoot, onProgress, privacy) {
302
+ const ignoreFilter = loadIgnoreFile(projectRoot);
303
+ if (ignoreFilter.patternCount > 0) {
304
+ onProgress?.(`Loaded .archbyteignore: ${ignoreFilter.patternCount} pattern(s)`);
305
+ }
306
+ const tk = new StaticToolkit(projectRoot, ignoreFilter);
297
307
  onProgress?.("Collecting static context (7 scanners in parallel)...");
298
308
  const [structure, docs, infra, events, envs, fileTree, codeSamples] = await Promise.all([
299
309
  scanStructure(tk),
@@ -306,5 +316,43 @@ export async function runStaticContextCollection(projectRoot, onProgress) {
306
316
  ]);
307
317
  onProgress?.(`Context: ${fileTree.totalFiles} files, ${fileTree.totalDirs} dirs, ${codeSamples.configFiles.length} configs, ${codeSamples.samples.length} samples`);
308
318
  onProgress?.(`Detected: ${structure.language}, ${structure.framework ?? "no framework"}, monorepo=${structure.isMonorepo}`);
309
- return { structure, docs, infra, events, envs, fileTree, codeSamples };
319
+ let ctx = { structure, docs, infra, events, envs, fileTree, codeSamples };
320
+ // Apply privacy controls — zero out disabled fields
321
+ if (privacy) {
322
+ if (!privacy.sendCodeSamples) {
323
+ ctx.codeSamples = { ...ctx.codeSamples, samples: [] };
324
+ onProgress?.("Privacy: code samples excluded");
325
+ }
326
+ if (!privacy.sendImportMap) {
327
+ ctx.codeSamples = { ...ctx.codeSamples, importMap: {} };
328
+ onProgress?.("Privacy: import map excluded");
329
+ }
330
+ if (!privacy.sendEnvNames) {
331
+ ctx.envs = { ...ctx.envs, environments: ctx.envs.environments.map((e) => ({ ...e, variables: [] })) };
332
+ onProgress?.("Privacy: env variable names excluded");
333
+ }
334
+ if (!privacy.sendDocs) {
335
+ ctx.docs = { projectDescription: "", architectureNotes: [], apiEndpoints: [], externalDependencies: [] };
336
+ onProgress?.("Privacy: documentation excluded");
337
+ }
338
+ if (!privacy.sendFileTree) {
339
+ ctx.fileTree = { tree: [], totalFiles: ctx.fileTree.totalFiles, totalDirs: ctx.fileTree.totalDirs };
340
+ onProgress?.("Privacy: file tree excluded");
341
+ }
342
+ if (!privacy.sendInfra) {
343
+ ctx.infra = {
344
+ docker: { services: [], composeFile: false },
345
+ kubernetes: { resources: [] },
346
+ cloud: { provider: null, services: [], iac: null },
347
+ ci: { platform: null, pipelines: [] },
348
+ };
349
+ onProgress?.("Privacy: infrastructure details excluded");
350
+ }
351
+ // Redaction — hash identifiers before returning
352
+ if (privacy.redact) {
353
+ ctx = redactContext(ctx);
354
+ onProgress?.("Privacy: redaction applied — identifiers hashed");
355
+ }
356
+ }
357
+ return ctx;
310
358
  }
@@ -0,0 +1,12 @@
1
+ import type { StaticContext } from "./types.js";
2
+ /**
3
+ * Redact sensitive identifiers in a StaticContext.
4
+ * - File paths: hash each segment, preserve extensions and depth
5
+ * - Env var names: hash
6
+ * - Docker service names: hash
7
+ * - String literals in code samples: hash
8
+ * - Preserve: npm package names, language keywords, structural info
9
+ *
10
+ * Returns a deep copy — the original context is not modified.
11
+ */
12
+ export declare function redactContext(ctx: StaticContext): StaticContext;
@@ -0,0 +1,206 @@
1
+ // Redaction Mode — Hash sensitive identifiers in StaticContext
2
+ // Preserves structure and public package names while hiding proprietary paths/names.
3
+ import { createHash } from "crypto";
4
+ /**
5
+ * Redact sensitive identifiers in a StaticContext.
6
+ * - File paths: hash each segment, preserve extensions and depth
7
+ * - Env var names: hash
8
+ * - Docker service names: hash
9
+ * - String literals in code samples: hash
10
+ * - Preserve: npm package names, language keywords, structural info
11
+ *
12
+ * Returns a deep copy — the original context is not modified.
13
+ */
14
+ export function redactContext(ctx) {
15
+ return {
16
+ structure: redactStructure(ctx.structure),
17
+ docs: redactDocs(ctx.docs),
18
+ infra: redactInfra(ctx.infra),
19
+ events: redactEvents(ctx.events),
20
+ envs: redactEnvs(ctx.envs),
21
+ fileTree: redactFileTree(ctx.fileTree),
22
+ codeSamples: redactCodeSamples(ctx.codeSamples),
23
+ };
24
+ }
25
+ // --- Hashing ---
26
+ const hashCache = new Map();
27
+ function hashStr(value) {
28
+ const cached = hashCache.get(value);
29
+ if (cached)
30
+ return cached;
31
+ const hash = createHash("sha256").update(value).digest("hex").slice(0, 8);
32
+ const result = `redacted-${hash}`;
33
+ hashCache.set(value, result);
34
+ return result;
35
+ }
36
+ /**
37
+ * Hash each segment of a path, preserving the extension and directory depth.
38
+ * `src/auth/handler.ts` → `redacted-a1b2c3/redacted-d4e5f6/redacted-g7h8.ts`
39
+ */
40
+ function redactPath(filePath) {
41
+ if (!filePath || filePath === ".")
42
+ return filePath;
43
+ const parts = filePath.split("/");
44
+ return parts
45
+ .map((part) => {
46
+ const dotIdx = part.lastIndexOf(".");
47
+ if (dotIdx > 0 && dotIdx < part.length - 1) {
48
+ const name = part.slice(0, dotIdx);
49
+ const ext = part.slice(dotIdx);
50
+ return `${hashStr(name)}${ext}`;
51
+ }
52
+ return hashStr(part);
53
+ })
54
+ .join("/");
55
+ }
56
+ // --- Structure ---
57
+ function redactStructure(s) {
58
+ return {
59
+ ...s,
60
+ // Keep project name generic
61
+ projectName: hashStr(s.projectName),
62
+ // Keep language/framework/build info (public knowledge)
63
+ entryPoints: s.entryPoints.map(redactPath),
64
+ directories: Object.fromEntries(Object.entries(s.directories).map(([dir, isPresent]) => [hashStr(dir), isPresent])),
65
+ };
66
+ }
67
+ // --- Docs ---
68
+ function redactDocs(d) {
69
+ return {
70
+ // Preserve project description (it's user-written public text)
71
+ projectDescription: d.projectDescription,
72
+ architectureNotes: d.architectureNotes,
73
+ apiEndpoints: d.apiEndpoints.map((ep) => ({
74
+ ...ep,
75
+ path: redactApiPath(ep.path),
76
+ })),
77
+ externalDependencies: d.externalDependencies, // Public package names — keep
78
+ };
79
+ }
80
+ function redactApiPath(apiPath) {
81
+ // Redact path parameters but keep HTTP structure
82
+ return apiPath.replace(/\/[a-zA-Z_]\w*/g, (segment) => {
83
+ // Keep common REST verbs/patterns
84
+ const common = ["/api", "/v1", "/v2", "/v3", "/health", "/status", "/auth"];
85
+ if (common.includes(segment))
86
+ return segment;
87
+ return `/${hashStr(segment.slice(1))}`;
88
+ });
89
+ }
90
+ // --- Infra ---
91
+ function redactInfra(i) {
92
+ return {
93
+ docker: {
94
+ services: i.docker.services.map((svc) => ({
95
+ ...svc,
96
+ name: hashStr(svc.name),
97
+ buildContext: svc.buildContext ? redactPath(svc.buildContext) : undefined,
98
+ environment: svc.environment
99
+ ? Object.fromEntries(Object.entries(svc.environment).map(([k, v]) => [hashStr(k), "***"]))
100
+ : undefined,
101
+ })),
102
+ composeFile: i.docker.composeFile,
103
+ composeFilePath: i.docker.composeFilePath ? redactPath(i.docker.composeFilePath) : undefined,
104
+ },
105
+ kubernetes: {
106
+ resources: i.kubernetes.resources.map((r) => ({
107
+ ...r,
108
+ name: hashStr(r.name),
109
+ namespace: r.namespace ? hashStr(r.namespace) : undefined,
110
+ })),
111
+ },
112
+ cloud: i.cloud, // Provider names and service types are public
113
+ ci: i.ci, // CI platform names are public
114
+ };
115
+ }
116
+ // --- Events ---
117
+ function redactEvents(e) {
118
+ return {
119
+ hasEDA: e.hasEDA,
120
+ patterns: e.patterns, // Technology names are public
121
+ events: e.events.map((ev) => ({
122
+ ...ev,
123
+ file: redactPath(ev.file),
124
+ })),
125
+ };
126
+ }
127
+ // --- Envs ---
128
+ function redactEnvs(e) {
129
+ return {
130
+ environments: e.environments.map((env) => ({
131
+ name: env.name, // "production", "staging" etc — keep
132
+ variables: env.variables.map((v) => hashStr(v)),
133
+ })),
134
+ configPattern: e.configPattern,
135
+ hasSecrets: e.hasSecrets,
136
+ };
137
+ }
138
+ // --- File Tree ---
139
+ function redactFileTree(ft) {
140
+ return {
141
+ tree: ft.tree.map(redactTreeEntry),
142
+ totalFiles: ft.totalFiles,
143
+ totalDirs: ft.totalDirs,
144
+ };
145
+ }
146
+ function redactTreeEntry(entry) {
147
+ return {
148
+ path: redactPath(entry.path),
149
+ type: entry.type,
150
+ children: entry.children?.map(redactTreeEntry),
151
+ };
152
+ }
153
+ // --- Code Samples ---
154
+ function redactCodeSamples(cs) {
155
+ return {
156
+ samples: cs.samples.map((s) => ({
157
+ ...s,
158
+ path: redactPath(s.path),
159
+ excerpt: redactCodeExcerpt(s.excerpt),
160
+ })),
161
+ importMap: Object.fromEntries(Object.entries(cs.importMap).map(([file, imports]) => [
162
+ redactPath(file),
163
+ imports.map((imp) => {
164
+ // Keep npm package imports (don't start with . or /)
165
+ if (!imp.startsWith(".") && !imp.startsWith("/"))
166
+ return imp;
167
+ return redactPath(imp);
168
+ }),
169
+ ])),
170
+ configFiles: cs.configFiles.map((cf) => ({
171
+ path: redactPath(cf.path),
172
+ content: redactConfigContent(cf.content),
173
+ })),
174
+ };
175
+ }
176
+ /**
177
+ * Redact string literals in code excerpts while preserving structure.
178
+ * Keeps language keywords, npm imports, and structural tokens.
179
+ */
180
+ function redactCodeExcerpt(code) {
181
+ // Replace string literals (single/double quoted) with hashed versions
182
+ // But preserve common patterns like import paths to npm packages
183
+ return code.replace(/(["'])([^"']*)\1/g, (_match, quote, content) => {
184
+ // Keep npm package imports
185
+ if (!content.startsWith(".") && !content.startsWith("/") && !content.includes(" ")) {
186
+ return `${quote}${content}${quote}`;
187
+ }
188
+ // Keep short common strings
189
+ if (content.length <= 2)
190
+ return `${quote}${content}${quote}`;
191
+ // Redact everything else
192
+ return `${quote}${hashStr(content)}${quote}`;
193
+ });
194
+ }
195
+ function redactConfigContent(content) {
196
+ // Redact values in key=value and key: value patterns
197
+ return content
198
+ .replace(/^(\s*[\w.-]+\s*[:=]\s*)(.+)$/gm, (_match, prefix, value) => {
199
+ const trimmed = value.trim();
200
+ // Keep boolean, numeric, null values
201
+ if (/^(true|false|null|undefined|\d+(\.\d+)?)$/i.test(trimmed)) {
202
+ return `${prefix}${value}`;
203
+ }
204
+ return `${prefix}${hashStr(trimmed)}`;
205
+ });
206
+ }
@@ -1,10 +1,12 @@
1
1
  import type { GrepResult, DirEntry } from "../runtime/types.js";
2
+ import type { IgnoreFilter } from "./ignore.js";
2
3
  /**
3
4
  * Wraps LocalFSBackend with safe-read helpers for static scanners.
4
5
  */
5
6
  export declare class StaticToolkit {
6
7
  private fs;
7
- constructor(projectRoot: string);
8
+ private ignore;
9
+ constructor(projectRoot: string, ignoreFilter?: IgnoreFilter);
8
10
  readFileSafe(path: string): Promise<string | null>;
9
11
  globFiles(pattern: string, cwd?: string): Promise<string[]>;
10
12
  grepFiles(pattern: string, searchPath?: string): Promise<GrepResult[]>;
@@ -6,10 +6,14 @@ import { LocalFSBackend } from "../tools/local-fs.js";
6
6
  */
7
7
  export class StaticToolkit {
8
8
  fs;
9
- constructor(projectRoot) {
9
+ ignore;
10
+ constructor(projectRoot, ignoreFilter) {
10
11
  this.fs = new LocalFSBackend(projectRoot);
12
+ this.ignore = ignoreFilter ?? null;
11
13
  }
12
14
  async readFileSafe(path) {
15
+ if (this.ignore?.isIgnored(path))
16
+ return null;
13
17
  try {
14
18
  return await this.fs.readFile(path);
15
19
  }
@@ -21,12 +25,19 @@ export class StaticToolkit {
21
25
  try {
22
26
  // Expand brace patterns like *.{yml,yaml} → [*.yml, *.yaml]
23
27
  const patterns = expandBraces(pattern);
28
+ let results;
24
29
  if (patterns.length === 1) {
25
- return await this.fs.glob(patterns[0], cwd);
30
+ results = await this.fs.glob(patterns[0], cwd);
31
+ }
32
+ else {
33
+ const resultSets = await Promise.all(patterns.map((p) => this.fs.glob(p, cwd).catch(() => [])));
34
+ results = [...new Set(resultSets.flat())].sort();
35
+ }
36
+ // Apply ignore filter
37
+ if (this.ignore) {
38
+ results = results.filter((f) => !this.ignore.isIgnored(f));
26
39
  }
27
- const resultSets = await Promise.all(patterns.map((p) => this.fs.glob(p, cwd).catch(() => [])));
28
- // Deduplicate and sort
29
- return [...new Set(resultSets.flat())].sort();
40
+ return results;
30
41
  }
31
42
  catch {
32
43
  return [];
@@ -36,11 +47,16 @@ export class StaticToolkit {
36
47
  try {
37
48
  // Work around LocalFSBackend.grep glob bug with cwd:
38
49
  // grep from root and filter by path prefix
39
- const results = await this.fs.grep(pattern);
40
- if (!searchPath)
41
- return results;
42
- const prefix = searchPath.endsWith("/") ? searchPath : `${searchPath}/`;
43
- return results.filter((r) => r.file.startsWith(prefix));
50
+ let results = await this.fs.grep(pattern);
51
+ if (searchPath) {
52
+ const prefix = searchPath.endsWith("/") ? searchPath : `${searchPath}/`;
53
+ results = results.filter((r) => r.file.startsWith(prefix));
54
+ }
55
+ // Apply ignore filter
56
+ if (this.ignore) {
57
+ results = results.filter((r) => !this.ignore.isIgnored(r.file));
58
+ }
59
+ return results;
44
60
  }
45
61
  catch {
46
62
  return [];
@@ -48,7 +64,14 @@ export class StaticToolkit {
48
64
  }
49
65
  async listDir(dirPath) {
50
66
  try {
51
- return await this.fs.listDir(dirPath);
67
+ const entries = await this.fs.listDir(dirPath);
68
+ if (this.ignore) {
69
+ return entries.filter((e) => {
70
+ const fullPath = dirPath === "." ? e.name : `${dirPath}/${e.name}`;
71
+ return !this.ignore.isIgnored(fullPath);
72
+ });
73
+ }
74
+ return entries;
52
75
  }
53
76
  catch {
54
77
  return [];
@@ -9,6 +9,7 @@ interface AnalyzeOptions {
9
9
  dryRun?: boolean;
10
10
  force?: boolean;
11
11
  dir?: string;
12
+ debug?: boolean;
12
13
  /** When true, skip "archbyte serve" in the Next steps (e.g. called from `archbyte run`) */
13
14
  skipServeHint?: boolean;
14
15
  }
@@ -4,9 +4,10 @@ import { execSync } from "child_process";
4
4
  import chalk from "chalk";
5
5
  import { resolveConfig } from "./config.js";
6
6
  import { recordUsage } from "./license-gate.js";
7
- import { staticResultToSpec, writeSpec, writeMetadata, loadSpec, loadMetadata } from "./yaml-io.js";
7
+ import { staticResultToSpec, writeSpec, writeMetadata, loadSpec, loadMetadata, resolvePrivacy } from "./yaml-io.js";
8
8
  import { getChangedFiles, mapFilesToComponents, shouldRunAgents, isGitAvailable, categorizeChanges, computeNeighbors, getCommitCount } from "./incremental.js";
9
9
  import { progressBar, confirm } from "./ui.js";
10
+ import { buildCollectorReport, buildAgentReport, buildTransparencyReport, printTransparencyReport, saveTransparencyReport, } from "./transparency.js";
10
11
  export async function handleAnalyze(options) {
11
12
  const rootDir = options.dir ? path.resolve(options.dir) : process.cwd();
12
13
  const isStaticOnly = options.static || options.skipLlm;
@@ -201,13 +202,17 @@ export async function handleAnalyze(options) {
201
202
  console.log(chalk.yellow(`File tree grew from ${priorCount} to ${currentFileCount} files — running full scan.`));
202
203
  }
203
204
  }
204
- // 4. Run static context collection → LLM pipeline
205
+ // 4. Load privacy config
206
+ const privacySpec = loadSpec(rootDir);
207
+ const privacy = resolvePrivacy(privacySpec);
208
+ const agentReports = [];
209
+ // 5. Run static context collection → LLM pipeline
205
210
  const progress = progressBar(7);
206
211
  progress.update(0, "Collecting static context...");
207
212
  const { runStaticContextCollection } = await import("../agents/static/index.js");
208
213
  const ctx = await runStaticContextCollection(rootDir, (msg) => {
209
214
  progress.update(0, `Static context: ${msg}`);
210
- });
215
+ }, privacy);
211
216
  // Save static context for debugging / re-runs
212
217
  const ctxPath = path.join(rootDir, ".archbyte", "static-context.json");
213
218
  if (!fs.existsSync(path.dirname(ctxPath))) {
@@ -219,6 +224,12 @@ export async function handleAnalyze(options) {
219
224
  const { runPipeline } = await import("../agents/pipeline/index.js");
220
225
  let result;
221
226
  let pipelineStep = 1;
227
+ // Debug callback — collect agent reports for transparency log
228
+ const onDebug = options.debug
229
+ ? (agentId, model, system, user) => {
230
+ agentReports.push(buildAgentReport(agentId, model, system, user));
231
+ }
232
+ : undefined;
222
233
  try {
223
234
  result = await runPipeline(ctx, provider, config, (msg) => {
224
235
  // Map pipeline progress messages to bar steps
@@ -249,7 +260,7 @@ export async function handleAnalyze(options) {
249
260
  else {
250
261
  progress.update(pipelineStep, msg.trim());
251
262
  }
252
- }, incrementalContext);
263
+ }, incrementalContext, onDebug);
253
264
  }
254
265
  catch (err) {
255
266
  const errorMsg = err instanceof Error ? err.message : String(err);
@@ -311,6 +322,22 @@ export async function handleAnalyze(options) {
311
322
  const spec = staticResultToSpec(result, rootDir, existingSpec?.rules);
312
323
  writeSpec(rootDir, spec);
313
324
  writeScanMetadata(rootDir, duration, "pipeline", ctx.fileTree.totalFiles, result.tokenUsage, incrementalContext ? true : undefined, result.skippedAgents);
325
+ // Transparency report (--debug)
326
+ if (options.debug) {
327
+ const collectors = buildCollectorReport(ctx);
328
+ const privacyControls = {
329
+ sendCodeSamples: privacy.sendCodeSamples,
330
+ sendImportMap: privacy.sendImportMap,
331
+ sendEnvNames: privacy.sendEnvNames,
332
+ sendDocs: privacy.sendDocs,
333
+ sendFileTree: privacy.sendFileTree,
334
+ sendInfra: privacy.sendInfra,
335
+ };
336
+ const ignoreFilter = (await import("../agents/static/ignore.js")).loadIgnoreFile(rootDir);
337
+ const report = buildTransparencyReport(collectors, agentReports, privacyControls, ignoreFilter.patternCount, privacy.redact);
338
+ printTransparencyReport(report);
339
+ saveTransparencyReport(rootDir, report);
340
+ }
314
341
  progress.update(6, "Generating diagram...");
315
342
  await autoGenerate(rootDir, options);
316
343
  progress.done("Analysis complete");
package/dist/cli/run.d.ts CHANGED
@@ -7,6 +7,7 @@ interface RunOptions {
7
7
  verbose?: boolean;
8
8
  force?: boolean;
9
9
  dryRun?: boolean;
10
+ debug?: boolean;
10
11
  dir?: string;
11
12
  }
12
13
  export declare function handleRun(options: RunOptions): Promise<void>;
package/dist/cli/run.js CHANGED
@@ -128,11 +128,12 @@ export async function handleRun(options) {
128
128
  apiKey: options.apiKey,
129
129
  force: options.force,
130
130
  dryRun: options.dryRun,
131
+ debug: options.debug,
131
132
  dir: options.dir,
132
133
  skipServeHint: true,
133
134
  });
134
135
  if (options.dryRun)
135
136
  return;
136
137
  // 2. Serve the UI
137
- await handleServe({ port });
138
+ await handleServe({ port, debug: options.debug });
138
139
  }
@@ -1,6 +1,7 @@
1
1
  interface ServeOptions {
2
2
  port?: number;
3
3
  diagram?: string;
4
+ debug?: boolean;
4
5
  }
5
6
  /**
6
7
  * Start the ArchByte UI server
package/dist/cli/serve.js CHANGED
@@ -84,6 +84,7 @@ export async function handleServe(options) {
84
84
  diagramPath,
85
85
  workspaceRoot: rootDir,
86
86
  port,
87
+ debug: options.debug,
87
88
  });
88
89
  return;
89
90
  }
@@ -0,0 +1,36 @@
1
+ import type { StaticContext } from "../agents/static/types.js";
2
+ export interface CollectorReport {
3
+ name: string;
4
+ itemCount: number;
5
+ filePaths: string[];
6
+ byteEstimate: number;
7
+ }
8
+ export interface AgentReport {
9
+ agentId: string;
10
+ model: string;
11
+ dataCategories: string[];
12
+ fileRefs: string[];
13
+ promptTokenEstimate: number;
14
+ }
15
+ export interface TransparencyReport {
16
+ timestamp: string;
17
+ collectors: CollectorReport[];
18
+ agents: AgentReport[];
19
+ privacyControls: Record<string, boolean>;
20
+ ignorePatterns: number;
21
+ redactionEnabled: boolean;
22
+ totals: {
23
+ collectorsRun: number;
24
+ agentsRun: number;
25
+ filesReferenced: number;
26
+ estimatedPromptBytes: number;
27
+ };
28
+ }
29
+ export declare function buildCollectorReport(ctx: StaticContext): CollectorReport[];
30
+ export declare function buildAgentReport(agentId: string, model: string, systemPrompt: string, userPrompt: string): AgentReport;
31
+ export declare function printTransparencyReport(report: TransparencyReport): void;
32
+ export declare function saveTransparencyReport(rootDir: string, report: TransparencyReport): void;
33
+ /**
34
+ * Build a complete TransparencyReport from collector reports and agent reports.
35
+ */
36
+ export declare function buildTransparencyReport(collectors: CollectorReport[], agents: AgentReport[], privacy: Record<string, boolean>, ignorePatterns: number, redactionEnabled: boolean): TransparencyReport;
@@ -0,0 +1,214 @@
1
+ // Transparency Log — Debug output showing exactly what data is collected and sent
2
+ // Used with `archbyte analyze --debug`
3
+ import * as fs from "fs";
4
+ import * as path from "path";
5
+ import chalk from "chalk";
6
+ // --- Collector Report ---
7
+ export function buildCollectorReport(ctx) {
8
+ const reports = [];
9
+ // Structure scanner
10
+ const structJson = JSON.stringify(ctx.structure);
11
+ reports.push({
12
+ name: "structure-scanner",
13
+ itemCount: ctx.structure.entryPoints.length + Object.keys(ctx.structure.directories).length,
14
+ filePaths: ctx.structure.entryPoints,
15
+ byteEstimate: structJson.length,
16
+ });
17
+ // Doc parser
18
+ const docsJson = JSON.stringify(ctx.docs);
19
+ reports.push({
20
+ name: "doc-parser",
21
+ itemCount: ctx.docs.apiEndpoints.length + ctx.docs.externalDependencies.length + ctx.docs.architectureNotes.length,
22
+ filePaths: [],
23
+ byteEstimate: docsJson.length,
24
+ });
25
+ // Infra analyzer
26
+ const infraJson = JSON.stringify(ctx.infra);
27
+ reports.push({
28
+ name: "infra-analyzer",
29
+ itemCount: ctx.infra.docker.services.length + ctx.infra.kubernetes.resources.length + ctx.infra.cloud.services.length,
30
+ filePaths: ctx.infra.docker.composeFilePath ? [ctx.infra.docker.composeFilePath] : [],
31
+ byteEstimate: infraJson.length,
32
+ });
33
+ // Event detector
34
+ const eventsJson = JSON.stringify(ctx.events);
35
+ reports.push({
36
+ name: "event-detector",
37
+ itemCount: ctx.events.events.length + ctx.events.patterns.length,
38
+ filePaths: ctx.events.events.map((e) => e.file),
39
+ byteEstimate: eventsJson.length,
40
+ });
41
+ // Env detector
42
+ const envsJson = JSON.stringify(ctx.envs);
43
+ const envVarCount = ctx.envs.environments.reduce((sum, e) => sum + e.variables.length, 0);
44
+ reports.push({
45
+ name: "env-detector",
46
+ itemCount: ctx.envs.environments.length + envVarCount,
47
+ filePaths: [],
48
+ byteEstimate: envsJson.length,
49
+ });
50
+ // File tree collector
51
+ const treeJson = JSON.stringify(ctx.fileTree);
52
+ reports.push({
53
+ name: "file-tree-collector",
54
+ itemCount: ctx.fileTree.totalFiles + ctx.fileTree.totalDirs,
55
+ filePaths: [],
56
+ byteEstimate: treeJson.length,
57
+ });
58
+ // Code sampler
59
+ const samplesJson = JSON.stringify(ctx.codeSamples);
60
+ reports.push({
61
+ name: "code-sampler",
62
+ itemCount: ctx.codeSamples.samples.length + ctx.codeSamples.configFiles.length,
63
+ filePaths: [
64
+ ...ctx.codeSamples.samples.map((s) => s.path),
65
+ ...ctx.codeSamples.configFiles.map((c) => c.path),
66
+ ],
67
+ byteEstimate: samplesJson.length,
68
+ });
69
+ return reports;
70
+ }
71
+ // --- Agent Report ---
72
+ export function buildAgentReport(agentId, model, systemPrompt, userPrompt) {
73
+ const combined = systemPrompt + userPrompt;
74
+ // Extract data categories from prompt content
75
+ const categories = [];
76
+ if (combined.includes("fileTree") || combined.includes("file tree"))
77
+ categories.push("file-tree");
78
+ if (combined.includes("codeSamples") || combined.includes("code sample"))
79
+ categories.push("code-samples");
80
+ if (combined.includes("importMap") || combined.includes("import map"))
81
+ categories.push("import-map");
82
+ if (combined.includes("structure"))
83
+ categories.push("project-structure");
84
+ if (combined.includes("infra"))
85
+ categories.push("infrastructure");
86
+ if (combined.includes("events"))
87
+ categories.push("events");
88
+ if (combined.includes("envs") || combined.includes("environment"))
89
+ categories.push("environments");
90
+ if (combined.includes("docs"))
91
+ categories.push("documentation");
92
+ // Extract file references from prompt
93
+ const fileRefs = [];
94
+ const fileRefPattern = /["']([a-zA-Z0-9_./-]+\.[a-zA-Z]{1,6})["']/g;
95
+ let match;
96
+ while ((match = fileRefPattern.exec(combined)) !== null) {
97
+ if (!fileRefs.includes(match[1])) {
98
+ fileRefs.push(match[1]);
99
+ }
100
+ }
101
+ // Rough token estimate (~4 chars per token)
102
+ const promptTokenEstimate = Math.ceil(combined.length / 4);
103
+ return {
104
+ agentId,
105
+ model,
106
+ dataCategories: categories,
107
+ fileRefs: fileRefs.slice(0, 50),
108
+ promptTokenEstimate,
109
+ };
110
+ }
111
+ // --- Print ---
112
+ export function printTransparencyReport(report) {
113
+ console.error();
114
+ console.error(chalk.bold.cyan("Transparency Report"));
115
+ console.error(chalk.gray("What data was collected and sent to your LLM provider"));
116
+ console.error();
117
+ // Privacy controls
118
+ console.error(chalk.bold("Privacy Controls"));
119
+ for (const [key, value] of Object.entries(report.privacyControls)) {
120
+ const status = value ? chalk.green("enabled") : chalk.gray("disabled");
121
+ console.error(` ${key}: ${status}`);
122
+ }
123
+ if (report.ignorePatterns > 0) {
124
+ console.error(` .archbyteignore: ${chalk.yellow(`${report.ignorePatterns} pattern(s)`)}`);
125
+ }
126
+ if (report.redactionEnabled) {
127
+ console.error(` redaction: ${chalk.yellow("enabled — paths/names hashed")}`);
128
+ }
129
+ console.error();
130
+ // Collectors
131
+ console.error(chalk.bold("Static Collectors"));
132
+ for (const c of report.collectors) {
133
+ const size = formatBytes(c.byteEstimate);
134
+ console.error(` ${c.name.padEnd(22)} ${String(c.itemCount).padStart(4)} items ${size.padStart(8)}`);
135
+ if (c.filePaths.length > 0 && c.filePaths.length <= 5) {
136
+ for (const fp of c.filePaths) {
137
+ console.error(chalk.gray(` ${fp}`));
138
+ }
139
+ }
140
+ else if (c.filePaths.length > 5) {
141
+ for (const fp of c.filePaths.slice(0, 3)) {
142
+ console.error(chalk.gray(` ${fp}`));
143
+ }
144
+ console.error(chalk.gray(` ... and ${c.filePaths.length - 3} more`));
145
+ }
146
+ }
147
+ console.error();
148
+ // Agents
149
+ if (report.agents.length > 0) {
150
+ console.error(chalk.bold("LLM Agents"));
151
+ for (const a of report.agents) {
152
+ console.error(` ${a.agentId.padEnd(22)} model=${a.model} ~${a.promptTokenEstimate} tokens`);
153
+ if (a.dataCategories.length > 0) {
154
+ console.error(chalk.gray(` data: ${a.dataCategories.join(", ")}`));
155
+ }
156
+ if (a.fileRefs.length > 0) {
157
+ console.error(chalk.gray(` files: ${a.fileRefs.slice(0, 5).join(", ")}${a.fileRefs.length > 5 ? ` +${a.fileRefs.length - 5} more` : ""}`));
158
+ }
159
+ }
160
+ console.error();
161
+ }
162
+ // Totals
163
+ console.error(chalk.bold("Totals"));
164
+ console.error(` Collectors: ${report.totals.collectorsRun}`);
165
+ console.error(` Agents: ${report.totals.agentsRun}`);
166
+ console.error(` Files referenced: ${report.totals.filesReferenced}`);
167
+ console.error(` Est. prompt data: ${formatBytes(report.totals.estimatedPromptBytes)}`);
168
+ console.error();
169
+ }
170
+ // --- Save ---
171
+ export function saveTransparencyReport(rootDir, report) {
172
+ const dir = path.join(rootDir, ".archbyte");
173
+ if (!fs.existsSync(dir)) {
174
+ fs.mkdirSync(dir, { recursive: true });
175
+ }
176
+ const reportPath = path.join(dir, "transparency.json");
177
+ fs.writeFileSync(reportPath, JSON.stringify(report, null, 2), "utf-8");
178
+ }
179
+ // --- Helpers ---
180
+ function formatBytes(bytes) {
181
+ if (bytes < 1024)
182
+ return `${bytes} B`;
183
+ if (bytes < 1024 * 1024)
184
+ return `${(bytes / 1024).toFixed(1)} KB`;
185
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
186
+ }
187
+ /**
188
+ * Build a complete TransparencyReport from collector reports and agent reports.
189
+ */
190
+ export function buildTransparencyReport(collectors, agents, privacy, ignorePatterns, redactionEnabled) {
191
+ const allFiles = new Set();
192
+ for (const c of collectors) {
193
+ for (const fp of c.filePaths)
194
+ allFiles.add(fp);
195
+ }
196
+ for (const a of agents) {
197
+ for (const fp of a.fileRefs)
198
+ allFiles.add(fp);
199
+ }
200
+ return {
201
+ timestamp: new Date().toISOString(),
202
+ collectors,
203
+ agents,
204
+ privacyControls: privacy,
205
+ ignorePatterns,
206
+ redactionEnabled,
207
+ totals: {
208
+ collectorsRun: collectors.length,
209
+ agentsRun: agents.length,
210
+ filesReferenced: allFiles.size,
211
+ estimatedPromptBytes: collectors.reduce((s, c) => s + c.byteEstimate, 0),
212
+ },
213
+ };
214
+ }
@@ -41,6 +41,15 @@ export interface ArchbyteSpecFlow {
41
41
  category: string;
42
42
  steps: ArchbyteSpecFlowStep[];
43
43
  }
44
+ export interface PrivacyConfig {
45
+ sendCodeSamples?: boolean;
46
+ sendImportMap?: boolean;
47
+ sendEnvNames?: boolean;
48
+ sendDocs?: boolean;
49
+ sendFileTree?: boolean;
50
+ sendInfra?: boolean;
51
+ redact?: boolean;
52
+ }
44
53
  export interface ArchbyteSpec {
45
54
  version: number;
46
55
  project: {
@@ -58,6 +67,7 @@ export interface ArchbyteSpec {
58
67
  environments: ArchbyteSpecEnvironment[];
59
68
  flows: ArchbyteSpecFlow[];
60
69
  rules: Record<string, unknown>;
70
+ privacy?: PrivacyConfig;
61
71
  }
62
72
  export interface ScanMetadata {
63
73
  analyzedAt: string;
@@ -83,6 +93,10 @@ export declare function writeMetadata(rootDir: string, meta: ScanMetadata): void
83
93
  * Optionally preserves existing rules from a prior spec.
84
94
  */
85
95
  export declare function staticResultToSpec(result: StaticAnalysisResult, rootDir: string, existingRules?: Record<string, unknown>): ArchbyteSpec;
96
+ /**
97
+ * Resolve privacy config with defaults. All fields default to enabled (true) except redact (false).
98
+ */
99
+ export declare function resolvePrivacy(spec: ArchbyteSpec | null): Required<PrivacyConfig>;
86
100
  /**
87
101
  * Convert an ArchbyteSpec back to the AnalysisResult format expected by generate.ts.
88
102
  * This is the inverse of staticResultToSpec → buildAnalysisFromStatic.
@@ -163,6 +163,21 @@ export function staticResultToSpec(result, rootDir, existingRules) {
163
163
  rules,
164
164
  };
165
165
  }
166
+ /**
167
+ * Resolve privacy config with defaults. All fields default to enabled (true) except redact (false).
168
+ */
169
+ export function resolvePrivacy(spec) {
170
+ const p = spec?.privacy ?? {};
171
+ return {
172
+ sendCodeSamples: p.sendCodeSamples ?? true,
173
+ sendImportMap: p.sendImportMap ?? true,
174
+ sendEnvNames: p.sendEnvNames ?? true,
175
+ sendDocs: p.sendDocs ?? true,
176
+ sendFileTree: p.sendFileTree ?? true,
177
+ sendInfra: p.sendInfra ?? true,
178
+ redact: p.redact ?? false,
179
+ };
180
+ }
166
181
  /**
167
182
  * Convert an ArchbyteSpec back to the AnalysisResult format expected by generate.ts.
168
183
  * This is the inverse of staticResultToSpec → buildAnalysisFromStatic.
@@ -4,5 +4,6 @@ export interface ServerConfig {
4
4
  diagramPath: string;
5
5
  workspaceRoot: string;
6
6
  port: number;
7
+ debug?: boolean;
7
8
  }
8
9
  export declare function startServer(cfg: ServerConfig): Promise<void>;
@@ -662,6 +662,77 @@ function createHttpServer() {
662
662
  }));
663
663
  return;
664
664
  }
665
+ // API: Transparency report (--debug)
666
+ if (url === "/api/transparency" && req.method === "GET") {
667
+ const transparencyPath = path.join(config.workspaceRoot, ".archbyte", "transparency.json");
668
+ if (existsSync(transparencyPath)) {
669
+ const content = readFileSync(transparencyPath, "utf-8");
670
+ res.writeHead(200, { "Content-Type": "application/json" });
671
+ res.end(content);
672
+ }
673
+ else {
674
+ res.writeHead(404, { "Content-Type": "application/json" });
675
+ res.end(JSON.stringify({ error: "No transparency report found. Run 'archbyte analyze --debug' to generate one." }));
676
+ }
677
+ return;
678
+ }
679
+ // API: Data flow documentation
680
+ if (url === "/api/data-flow" && req.method === "GET") {
681
+ const dataFlow = {
682
+ outboundConnections: [
683
+ {
684
+ destination: "Your LLM Provider (BYOK)",
685
+ protocol: "HTTPS",
686
+ data: [
687
+ "Project structure metadata (language, framework, directories)",
688
+ "File tree (paths only, no contents)",
689
+ "Code excerpts (first ~80 lines of key files)",
690
+ "Import relationships between files",
691
+ "Config file contents (package.json, docker-compose, etc.)",
692
+ "Environment variable names (never values)",
693
+ "Infrastructure details (Docker services, K8s resources)",
694
+ ],
695
+ controlledBy: "archbyte.yaml privacy section + .archbyteignore",
696
+ },
697
+ {
698
+ destination: "ArchByte Cloud (api.heartbyte.io)",
699
+ protocol: "HTTPS",
700
+ data: ["Email address", "JWT token", "Scan count"],
701
+ controlledBy: "Required for license validation only",
702
+ },
703
+ ],
704
+ neverSent: [
705
+ "Environment variable values",
706
+ "Full source code (only excerpts of key files)",
707
+ "Secrets, passwords, API keys",
708
+ "Git history, commits, diffs",
709
+ "Git credentials (SSH keys, tokens)",
710
+ "Binary files",
711
+ "node_modules / vendor dependencies",
712
+ "User home directory contents",
713
+ ],
714
+ privacyControls: {
715
+ archbyteignore: "Exclude files/directories from all scanners (.gitignore syntax)",
716
+ sendCodeSamples: "Toggle code excerpt collection",
717
+ sendImportMap: "Toggle import relationship collection",
718
+ sendEnvNames: "Toggle env variable name collection",
719
+ sendDocs: "Toggle documentation extraction",
720
+ sendFileTree: "Toggle file tree collection",
721
+ sendInfra: "Toggle infrastructure detail collection",
722
+ redact: "Hash all identifiers (paths, names) before sending to LLM",
723
+ },
724
+ localOnly: [
725
+ ".archbyte/archbyte.yaml — Architecture spec",
726
+ ".archbyte/analysis.json — Full analysis output",
727
+ ".archbyte/architecture.json — Diagram layout",
728
+ ".archbyte/static-context.json — Raw scanner output",
729
+ ".archbyte/transparency.json — Audit trail",
730
+ ],
731
+ };
732
+ res.writeHead(200, { "Content-Type": "application/json" });
733
+ res.end(JSON.stringify(dataFlow, null, 2));
734
+ return;
735
+ }
665
736
  // API: Health (deprecated - redirect to audit for backward compat)
666
737
  if (url === "/api/health" && req.method === "GET") {
667
738
  res.writeHead(307, { "Location": "/api/audit" });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "archbyte",
3
- "version": "0.5.2",
3
+ "version": "0.5.3",
4
4
  "description": "ArchByte - See what agents build. As they build it.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -80,6 +80,26 @@ environments: []
80
80
  # label: "api-gateway -> user-service"
81
81
  flows: []
82
82
 
83
+ # ── Privacy Controls ──
84
+ # Control what data is sent to your LLM provider during analysis.
85
+ # All options default to true (enabled). Set to false to exclude.
86
+ # See: docs/DATA_FLOW.md for full details on what each scanner collects.
87
+ #
88
+ # privacy:
89
+ # sendCodeSamples: true # Code excerpts (first ~80 lines of key files)
90
+ # sendImportMap: true # Import relationships between files
91
+ # sendEnvNames: true # Environment variable names (never values)
92
+ # sendDocs: true # Documentation extracts (README, etc.)
93
+ # sendFileTree: true # Directory structure (paths only)
94
+ # sendInfra: true # Infrastructure details (Docker, K8s, CI)
95
+ # redact: false # Hash all identifiers before sending to LLM
96
+ #
97
+ # Also create .archbyteignore in your project root to exclude specific
98
+ # files/directories from all scanners (.gitignore syntax):
99
+ # secrets/
100
+ # *.env
101
+ # *.pem
102
+
83
103
  # ── Architecture Fitness Rules ──
84
104
  # Levels: error (fail CI), warn (report), off (skip)
85
105
  rules: