archbyte 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,206 @@
1
+ // Redaction Mode — Hash sensitive identifiers in StaticContext
2
+ // Preserves structure and public package names while hiding proprietary paths/names.
3
+ import { createHash } from "crypto";
4
+ /**
5
+ * Redact sensitive identifiers in a StaticContext.
6
+ * - File paths: hash each segment, preserve extensions and depth
7
+ * - Env var names: hash
8
+ * - Docker service names: hash
9
+ * - String literals in code samples: hash
10
+ * - Preserve: npm package names, language keywords, structural info
11
+ *
12
+ * Returns a deep copy — the original context is not modified.
13
+ */
14
+ export function redactContext(ctx) {
15
+ return {
16
+ structure: redactStructure(ctx.structure),
17
+ docs: redactDocs(ctx.docs),
18
+ infra: redactInfra(ctx.infra),
19
+ events: redactEvents(ctx.events),
20
+ envs: redactEnvs(ctx.envs),
21
+ fileTree: redactFileTree(ctx.fileTree),
22
+ codeSamples: redactCodeSamples(ctx.codeSamples),
23
+ };
24
+ }
25
+ // --- Hashing ---
26
+ const hashCache = new Map();
27
+ function hashStr(value) {
28
+ const cached = hashCache.get(value);
29
+ if (cached)
30
+ return cached;
31
+ const hash = createHash("sha256").update(value).digest("hex").slice(0, 8);
32
+ const result = `redacted-${hash}`;
33
+ hashCache.set(value, result);
34
+ return result;
35
+ }
36
+ /**
37
+ * Hash each segment of a path, preserving the extension and directory depth.
38
+ * `src/auth/handler.ts` → `redacted-a1b2c3/redacted-d4e5f6/redacted-g7h8.ts`
39
+ */
40
+ function redactPath(filePath) {
41
+ if (!filePath || filePath === ".")
42
+ return filePath;
43
+ const parts = filePath.split("/");
44
+ return parts
45
+ .map((part) => {
46
+ const dotIdx = part.lastIndexOf(".");
47
+ if (dotIdx > 0 && dotIdx < part.length - 1) {
48
+ const name = part.slice(0, dotIdx);
49
+ const ext = part.slice(dotIdx);
50
+ return `${hashStr(name)}${ext}`;
51
+ }
52
+ return hashStr(part);
53
+ })
54
+ .join("/");
55
+ }
56
+ // --- Structure ---
57
+ function redactStructure(s) {
58
+ return {
59
+ ...s,
60
+ // Keep project name generic
61
+ projectName: hashStr(s.projectName),
62
+ // Keep language/framework/build info (public knowledge)
63
+ entryPoints: s.entryPoints.map(redactPath),
64
+ directories: Object.fromEntries(Object.entries(s.directories).map(([dir, isPresent]) => [hashStr(dir), isPresent])),
65
+ };
66
+ }
67
+ // --- Docs ---
68
+ function redactDocs(d) {
69
+ return {
70
+ // Preserve project description (it's user-written public text)
71
+ projectDescription: d.projectDescription,
72
+ architectureNotes: d.architectureNotes,
73
+ apiEndpoints: d.apiEndpoints.map((ep) => ({
74
+ ...ep,
75
+ path: redactApiPath(ep.path),
76
+ })),
77
+ externalDependencies: d.externalDependencies, // Public package names — keep
78
+ };
79
+ }
80
+ function redactApiPath(apiPath) {
81
+ // Redact path parameters but keep HTTP structure
82
+ return apiPath.replace(/\/[a-zA-Z_]\w*/g, (segment) => {
83
+ // Keep common REST verbs/patterns
84
+ const common = ["/api", "/v1", "/v2", "/v3", "/health", "/status", "/auth"];
85
+ if (common.includes(segment))
86
+ return segment;
87
+ return `/${hashStr(segment.slice(1))}`;
88
+ });
89
+ }
90
+ // --- Infra ---
91
+ function redactInfra(i) {
92
+ return {
93
+ docker: {
94
+ services: i.docker.services.map((svc) => ({
95
+ ...svc,
96
+ name: hashStr(svc.name),
97
+ buildContext: svc.buildContext ? redactPath(svc.buildContext) : undefined,
98
+ environment: svc.environment
99
+ ? Object.fromEntries(Object.entries(svc.environment).map(([k, v]) => [hashStr(k), "***"]))
100
+ : undefined,
101
+ })),
102
+ composeFile: i.docker.composeFile,
103
+ composeFilePath: i.docker.composeFilePath ? redactPath(i.docker.composeFilePath) : undefined,
104
+ },
105
+ kubernetes: {
106
+ resources: i.kubernetes.resources.map((r) => ({
107
+ ...r,
108
+ name: hashStr(r.name),
109
+ namespace: r.namespace ? hashStr(r.namespace) : undefined,
110
+ })),
111
+ },
112
+ cloud: i.cloud, // Provider names and service types are public
113
+ ci: i.ci, // CI platform names are public
114
+ };
115
+ }
116
+ // --- Events ---
117
+ function redactEvents(e) {
118
+ return {
119
+ hasEDA: e.hasEDA,
120
+ patterns: e.patterns, // Technology names are public
121
+ events: e.events.map((ev) => ({
122
+ ...ev,
123
+ file: redactPath(ev.file),
124
+ })),
125
+ };
126
+ }
127
+ // --- Envs ---
128
+ function redactEnvs(e) {
129
+ return {
130
+ environments: e.environments.map((env) => ({
131
+ name: env.name, // "production", "staging" etc — keep
132
+ variables: env.variables.map((v) => hashStr(v)),
133
+ })),
134
+ configPattern: e.configPattern,
135
+ hasSecrets: e.hasSecrets,
136
+ };
137
+ }
138
+ // --- File Tree ---
139
+ function redactFileTree(ft) {
140
+ return {
141
+ tree: ft.tree.map(redactTreeEntry),
142
+ totalFiles: ft.totalFiles,
143
+ totalDirs: ft.totalDirs,
144
+ };
145
+ }
146
+ function redactTreeEntry(entry) {
147
+ return {
148
+ path: redactPath(entry.path),
149
+ type: entry.type,
150
+ children: entry.children?.map(redactTreeEntry),
151
+ };
152
+ }
153
+ // --- Code Samples ---
154
+ function redactCodeSamples(cs) {
155
+ return {
156
+ samples: cs.samples.map((s) => ({
157
+ ...s,
158
+ path: redactPath(s.path),
159
+ excerpt: redactCodeExcerpt(s.excerpt),
160
+ })),
161
+ importMap: Object.fromEntries(Object.entries(cs.importMap).map(([file, imports]) => [
162
+ redactPath(file),
163
+ imports.map((imp) => {
164
+ // Keep npm package imports (don't start with . or /)
165
+ if (!imp.startsWith(".") && !imp.startsWith("/"))
166
+ return imp;
167
+ return redactPath(imp);
168
+ }),
169
+ ])),
170
+ configFiles: cs.configFiles.map((cf) => ({
171
+ path: redactPath(cf.path),
172
+ content: redactConfigContent(cf.content),
173
+ })),
174
+ };
175
+ }
176
+ /**
177
+ * Redact string literals in code excerpts while preserving structure.
178
+ * Keeps language keywords, npm imports, and structural tokens.
179
+ */
180
+ function redactCodeExcerpt(code) {
181
+ // Replace string literals (single/double quoted) with hashed versions
182
+ // But preserve common patterns like import paths to npm packages
183
+ return code.replace(/(["'])([^"']*)\1/g, (_match, quote, content) => {
184
+ // Keep npm package imports
185
+ if (!content.startsWith(".") && !content.startsWith("/") && !content.includes(" ")) {
186
+ return `${quote}${content}${quote}`;
187
+ }
188
+ // Keep short common strings
189
+ if (content.length <= 2)
190
+ return `${quote}${content}${quote}`;
191
+ // Redact everything else
192
+ return `${quote}${hashStr(content)}${quote}`;
193
+ });
194
+ }
195
+ function redactConfigContent(content) {
196
+ // Redact values in key=value and key: value patterns
197
+ return content
198
+ .replace(/^(\s*[\w.-]+\s*[:=]\s*)(.+)$/gm, (_match, prefix, value) => {
199
+ const trimmed = value.trim();
200
+ // Keep boolean, numeric, null values
201
+ if (/^(true|false|null|undefined|\d+(\.\d+)?)$/i.test(trimmed)) {
202
+ return `${prefix}${value}`;
203
+ }
204
+ return `${prefix}${hashStr(trimmed)}`;
205
+ });
206
+ }
@@ -1,10 +1,12 @@
1
1
  import type { GrepResult, DirEntry } from "../runtime/types.js";
2
+ import type { IgnoreFilter } from "./ignore.js";
2
3
  /**
3
4
  * Wraps LocalFSBackend with safe-read helpers for static scanners.
4
5
  */
5
6
  export declare class StaticToolkit {
6
7
  private fs;
7
- constructor(projectRoot: string);
8
+ private ignore;
9
+ constructor(projectRoot: string, ignoreFilter?: IgnoreFilter);
8
10
  readFileSafe(path: string): Promise<string | null>;
9
11
  globFiles(pattern: string, cwd?: string): Promise<string[]>;
10
12
  grepFiles(pattern: string, searchPath?: string): Promise<GrepResult[]>;
@@ -6,10 +6,14 @@ import { LocalFSBackend } from "../tools/local-fs.js";
6
6
  */
7
7
  export class StaticToolkit {
8
8
  fs;
9
- constructor(projectRoot) {
9
+ ignore;
10
+ constructor(projectRoot, ignoreFilter) {
10
11
  this.fs = new LocalFSBackend(projectRoot);
12
+ this.ignore = ignoreFilter ?? null;
11
13
  }
12
14
  async readFileSafe(path) {
15
+ if (this.ignore?.isIgnored(path))
16
+ return null;
13
17
  try {
14
18
  return await this.fs.readFile(path);
15
19
  }
@@ -21,12 +25,19 @@ export class StaticToolkit {
21
25
  try {
22
26
  // Expand brace patterns like *.{yml,yaml} → [*.yml, *.yaml]
23
27
  const patterns = expandBraces(pattern);
28
+ let results;
24
29
  if (patterns.length === 1) {
25
- return await this.fs.glob(patterns[0], cwd);
30
+ results = await this.fs.glob(patterns[0], cwd);
31
+ }
32
+ else {
33
+ const resultSets = await Promise.all(patterns.map((p) => this.fs.glob(p, cwd).catch(() => [])));
34
+ results = [...new Set(resultSets.flat())].sort();
35
+ }
36
+ // Apply ignore filter
37
+ if (this.ignore) {
38
+ results = results.filter((f) => !this.ignore.isIgnored(f));
26
39
  }
27
- const resultSets = await Promise.all(patterns.map((p) => this.fs.glob(p, cwd).catch(() => [])));
28
- // Deduplicate and sort
29
- return [...new Set(resultSets.flat())].sort();
40
+ return results;
30
41
  }
31
42
  catch {
32
43
  return [];
@@ -36,11 +47,16 @@ export class StaticToolkit {
36
47
  try {
37
48
  // Work around LocalFSBackend.grep glob bug with cwd:
38
49
  // grep from root and filter by path prefix
39
- const results = await this.fs.grep(pattern);
40
- if (!searchPath)
41
- return results;
42
- const prefix = searchPath.endsWith("/") ? searchPath : `${searchPath}/`;
43
- return results.filter((r) => r.file.startsWith(prefix));
50
+ let results = await this.fs.grep(pattern);
51
+ if (searchPath) {
52
+ const prefix = searchPath.endsWith("/") ? searchPath : `${searchPath}/`;
53
+ results = results.filter((r) => r.file.startsWith(prefix));
54
+ }
55
+ // Apply ignore filter
56
+ if (this.ignore) {
57
+ results = results.filter((r) => !this.ignore.isIgnored(r.file));
58
+ }
59
+ return results;
44
60
  }
45
61
  catch {
46
62
  return [];
@@ -48,7 +64,14 @@ export class StaticToolkit {
48
64
  }
49
65
  async listDir(dirPath) {
50
66
  try {
51
- return await this.fs.listDir(dirPath);
67
+ const entries = await this.fs.listDir(dirPath);
68
+ if (this.ignore) {
69
+ return entries.filter((e) => {
70
+ const fullPath = dirPath === "." ? e.name : `${dirPath}/${e.name}`;
71
+ return !this.ignore.isIgnored(fullPath);
72
+ });
73
+ }
74
+ return entries;
52
75
  }
53
76
  catch {
54
77
  return [];
@@ -9,6 +9,7 @@ interface AnalyzeOptions {
9
9
  dryRun?: boolean;
10
10
  force?: boolean;
11
11
  dir?: string;
12
+ debug?: boolean;
12
13
  /** When true, skip "archbyte serve" in the Next steps (e.g. called from `archbyte run`) */
13
14
  skipServeHint?: boolean;
14
15
  }
@@ -4,9 +4,10 @@ import { execSync } from "child_process";
4
4
  import chalk from "chalk";
5
5
  import { resolveConfig } from "./config.js";
6
6
  import { recordUsage } from "./license-gate.js";
7
- import { staticResultToSpec, writeSpec, writeMetadata, loadSpec, loadMetadata } from "./yaml-io.js";
7
+ import { staticResultToSpec, writeSpec, writeMetadata, loadSpec, loadMetadata, resolvePrivacy } from "./yaml-io.js";
8
8
  import { getChangedFiles, mapFilesToComponents, shouldRunAgents, isGitAvailable, categorizeChanges, computeNeighbors, getCommitCount } from "./incremental.js";
9
9
  import { progressBar, confirm } from "./ui.js";
10
+ import { buildCollectorReport, buildAgentReport, buildTransparencyReport, printTransparencyReport, saveTransparencyReport, } from "./transparency.js";
10
11
  export async function handleAnalyze(options) {
11
12
  const rootDir = options.dir ? path.resolve(options.dir) : process.cwd();
12
13
  const isStaticOnly = options.static || options.skipLlm;
@@ -50,10 +51,11 @@ export async function handleAnalyze(options) {
50
51
  progress.update(1, "Building analysis...");
51
52
  const freshAnalysis = buildAnalysisFromStatic(result, rootDir);
52
53
  const duration = Date.now() - startTime;
53
- // Merge into existing analysis if it was produced by an agentic run,
54
+ // Merge into existing data if it was produced by an agentic run,
54
55
  // preserving LLM-generated components/connections while refreshing
55
56
  // static data (environments, metadata, project info).
56
57
  const existingAnalysis = loadExistingAnalysis(rootDir);
58
+ const existingSpec = loadSpec(rootDir);
57
59
  const wasAgentic = existingAnalysis && existingAnalysis.metadata?.mode !== "static";
58
60
  const analysis = wasAgentic ? mergeStaticIntoExisting(existingAnalysis, freshAnalysis) : freshAnalysis;
59
61
  // Stamp scan metadata on analysis.json (backward compat)
@@ -62,10 +64,19 @@ export async function handleAnalyze(options) {
62
64
  ameta.mode = wasAgentic ? "static-refresh" : "static";
63
65
  writeAnalysis(rootDir, analysis);
64
66
  // Dual-write: archbyte.yaml + metadata.json
65
- const existingSpec = loadSpec(rootDir);
66
- const spec = staticResultToSpec(result, rootDir, existingSpec?.rules);
67
- writeSpec(rootDir, spec);
68
- writeScanMetadata(rootDir, duration, "static");
67
+ // When prior data came from an agentic run, only refresh static fields
68
+ // (project info, environments) — never overwrite LLM components/connections.
69
+ if (wasAgentic && existingSpec) {
70
+ const freshSpec = staticResultToSpec(result, rootDir, existingSpec.rules);
71
+ existingSpec.project = freshSpec.project;
72
+ existingSpec.environments = freshSpec.environments;
73
+ writeSpec(rootDir, existingSpec);
74
+ }
75
+ else {
76
+ const spec = staticResultToSpec(result, rootDir, existingSpec?.rules);
77
+ writeSpec(rootDir, spec);
78
+ }
79
+ writeScanMetadata(rootDir, duration, wasAgentic ? "static-refresh" : "static");
69
80
  progress.update(2, "Generating diagram...");
70
81
  await autoGenerate(rootDir, options);
71
82
  progress.done("Analysis complete");
@@ -191,13 +202,17 @@ export async function handleAnalyze(options) {
191
202
  console.log(chalk.yellow(`File tree grew from ${priorCount} to ${currentFileCount} files — running full scan.`));
192
203
  }
193
204
  }
194
- // 4. Run static context collection → LLM pipeline
205
+ // 4. Load privacy config
206
+ const privacySpec = loadSpec(rootDir);
207
+ const privacy = resolvePrivacy(privacySpec);
208
+ const agentReports = [];
209
+ // 5. Run static context collection → LLM pipeline
195
210
  const progress = progressBar(7);
196
211
  progress.update(0, "Collecting static context...");
197
212
  const { runStaticContextCollection } = await import("../agents/static/index.js");
198
213
  const ctx = await runStaticContextCollection(rootDir, (msg) => {
199
214
  progress.update(0, `Static context: ${msg}`);
200
- });
215
+ }, privacy);
201
216
  // Save static context for debugging / re-runs
202
217
  const ctxPath = path.join(rootDir, ".archbyte", "static-context.json");
203
218
  if (!fs.existsSync(path.dirname(ctxPath))) {
@@ -209,6 +224,12 @@ export async function handleAnalyze(options) {
209
224
  const { runPipeline } = await import("../agents/pipeline/index.js");
210
225
  let result;
211
226
  let pipelineStep = 1;
227
+ // Debug callback — collect agent reports for transparency log
228
+ const onDebug = options.debug
229
+ ? (agentId, model, system, user) => {
230
+ agentReports.push(buildAgentReport(agentId, model, system, user));
231
+ }
232
+ : undefined;
212
233
  try {
213
234
  result = await runPipeline(ctx, provider, config, (msg) => {
214
235
  // Map pipeline progress messages to bar steps
@@ -239,7 +260,7 @@ export async function handleAnalyze(options) {
239
260
  else {
240
261
  progress.update(pipelineStep, msg.trim());
241
262
  }
242
- }, incrementalContext);
263
+ }, incrementalContext, onDebug);
243
264
  }
244
265
  catch (err) {
245
266
  const errorMsg = err instanceof Error ? err.message : String(err);
@@ -301,6 +322,22 @@ export async function handleAnalyze(options) {
301
322
  const spec = staticResultToSpec(result, rootDir, existingSpec?.rules);
302
323
  writeSpec(rootDir, spec);
303
324
  writeScanMetadata(rootDir, duration, "pipeline", ctx.fileTree.totalFiles, result.tokenUsage, incrementalContext ? true : undefined, result.skippedAgents);
325
+ // Transparency report (--debug)
326
+ if (options.debug) {
327
+ const collectors = buildCollectorReport(ctx);
328
+ const privacyControls = {
329
+ sendCodeSamples: privacy.sendCodeSamples,
330
+ sendImportMap: privacy.sendImportMap,
331
+ sendEnvNames: privacy.sendEnvNames,
332
+ sendDocs: privacy.sendDocs,
333
+ sendFileTree: privacy.sendFileTree,
334
+ sendInfra: privacy.sendInfra,
335
+ };
336
+ const ignoreFilter = (await import("../agents/static/ignore.js")).loadIgnoreFile(rootDir);
337
+ const report = buildTransparencyReport(collectors, agentReports, privacyControls, ignoreFilter.patternCount, privacy.redact);
338
+ printTransparencyReport(report);
339
+ saveTransparencyReport(rootDir, report);
340
+ }
304
341
  progress.update(6, "Generating diagram...");
305
342
  await autoGenerate(rootDir, options);
306
343
  progress.done("Analysis complete");
package/dist/cli/run.d.ts CHANGED
@@ -7,6 +7,7 @@ interface RunOptions {
7
7
  verbose?: boolean;
8
8
  force?: boolean;
9
9
  dryRun?: boolean;
10
+ debug?: boolean;
10
11
  dir?: string;
11
12
  }
12
13
  export declare function handleRun(options: RunOptions): Promise<void>;
package/dist/cli/run.js CHANGED
@@ -128,11 +128,12 @@ export async function handleRun(options) {
128
128
  apiKey: options.apiKey,
129
129
  force: options.force,
130
130
  dryRun: options.dryRun,
131
+ debug: options.debug,
131
132
  dir: options.dir,
132
133
  skipServeHint: true,
133
134
  });
134
135
  if (options.dryRun)
135
136
  return;
136
137
  // 2. Serve the UI
137
- await handleServe({ port });
138
+ await handleServe({ port, debug: options.debug });
138
139
  }
@@ -1,6 +1,7 @@
1
1
  interface ServeOptions {
2
2
  port?: number;
3
3
  diagram?: string;
4
+ debug?: boolean;
4
5
  }
5
6
  /**
6
7
  * Start the ArchByte UI server
package/dist/cli/serve.js CHANGED
@@ -84,6 +84,7 @@ export async function handleServe(options) {
84
84
  diagramPath,
85
85
  workspaceRoot: rootDir,
86
86
  port,
87
+ debug: options.debug,
87
88
  });
88
89
  return;
89
90
  }
@@ -0,0 +1,36 @@
1
+ import type { StaticContext } from "../agents/static/types.js";
2
+ export interface CollectorReport {
3
+ name: string;
4
+ itemCount: number;
5
+ filePaths: string[];
6
+ byteEstimate: number;
7
+ }
8
+ export interface AgentReport {
9
+ agentId: string;
10
+ model: string;
11
+ dataCategories: string[];
12
+ fileRefs: string[];
13
+ promptTokenEstimate: number;
14
+ }
15
+ export interface TransparencyReport {
16
+ timestamp: string;
17
+ collectors: CollectorReport[];
18
+ agents: AgentReport[];
19
+ privacyControls: Record<string, boolean>;
20
+ ignorePatterns: number;
21
+ redactionEnabled: boolean;
22
+ totals: {
23
+ collectorsRun: number;
24
+ agentsRun: number;
25
+ filesReferenced: number;
26
+ estimatedPromptBytes: number;
27
+ };
28
+ }
29
+ export declare function buildCollectorReport(ctx: StaticContext): CollectorReport[];
30
+ export declare function buildAgentReport(agentId: string, model: string, systemPrompt: string, userPrompt: string): AgentReport;
31
+ export declare function printTransparencyReport(report: TransparencyReport): void;
32
+ export declare function saveTransparencyReport(rootDir: string, report: TransparencyReport): void;
33
+ /**
34
+ * Build a complete TransparencyReport from collector reports and agent reports.
35
+ */
36
+ export declare function buildTransparencyReport(collectors: CollectorReport[], agents: AgentReport[], privacy: Record<string, boolean>, ignorePatterns: number, redactionEnabled: boolean): TransparencyReport;