archbyte 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,214 @@
1
+ // Transparency Log — Debug output showing exactly what data is collected and sent
2
+ // Used with `archbyte analyze --debug`
3
+ import * as fs from "fs";
4
+ import * as path from "path";
5
+ import chalk from "chalk";
6
+ // --- Collector Report ---
7
+ export function buildCollectorReport(ctx) {
8
+ const reports = [];
9
+ // Structure scanner
10
+ const structJson = JSON.stringify(ctx.structure);
11
+ reports.push({
12
+ name: "structure-scanner",
13
+ itemCount: ctx.structure.entryPoints.length + Object.keys(ctx.structure.directories).length,
14
+ filePaths: ctx.structure.entryPoints,
15
+ byteEstimate: structJson.length,
16
+ });
17
+ // Doc parser
18
+ const docsJson = JSON.stringify(ctx.docs);
19
+ reports.push({
20
+ name: "doc-parser",
21
+ itemCount: ctx.docs.apiEndpoints.length + ctx.docs.externalDependencies.length + ctx.docs.architectureNotes.length,
22
+ filePaths: [],
23
+ byteEstimate: docsJson.length,
24
+ });
25
+ // Infra analyzer
26
+ const infraJson = JSON.stringify(ctx.infra);
27
+ reports.push({
28
+ name: "infra-analyzer",
29
+ itemCount: ctx.infra.docker.services.length + ctx.infra.kubernetes.resources.length + ctx.infra.cloud.services.length,
30
+ filePaths: ctx.infra.docker.composeFilePath ? [ctx.infra.docker.composeFilePath] : [],
31
+ byteEstimate: infraJson.length,
32
+ });
33
+ // Event detector
34
+ const eventsJson = JSON.stringify(ctx.events);
35
+ reports.push({
36
+ name: "event-detector",
37
+ itemCount: ctx.events.events.length + ctx.events.patterns.length,
38
+ filePaths: ctx.events.events.map((e) => e.file),
39
+ byteEstimate: eventsJson.length,
40
+ });
41
+ // Env detector
42
+ const envsJson = JSON.stringify(ctx.envs);
43
+ const envVarCount = ctx.envs.environments.reduce((sum, e) => sum + e.variables.length, 0);
44
+ reports.push({
45
+ name: "env-detector",
46
+ itemCount: ctx.envs.environments.length + envVarCount,
47
+ filePaths: [],
48
+ byteEstimate: envsJson.length,
49
+ });
50
+ // File tree collector
51
+ const treeJson = JSON.stringify(ctx.fileTree);
52
+ reports.push({
53
+ name: "file-tree-collector",
54
+ itemCount: ctx.fileTree.totalFiles + ctx.fileTree.totalDirs,
55
+ filePaths: [],
56
+ byteEstimate: treeJson.length,
57
+ });
58
+ // Code sampler
59
+ const samplesJson = JSON.stringify(ctx.codeSamples);
60
+ reports.push({
61
+ name: "code-sampler",
62
+ itemCount: ctx.codeSamples.samples.length + ctx.codeSamples.configFiles.length,
63
+ filePaths: [
64
+ ...ctx.codeSamples.samples.map((s) => s.path),
65
+ ...ctx.codeSamples.configFiles.map((c) => c.path),
66
+ ],
67
+ byteEstimate: samplesJson.length,
68
+ });
69
+ return reports;
70
+ }
71
+ // --- Agent Report ---
72
+ export function buildAgentReport(agentId, model, systemPrompt, userPrompt) {
73
+ const combined = systemPrompt + userPrompt;
74
+ // Extract data categories from prompt content
75
+ const categories = [];
76
+ if (combined.includes("fileTree") || combined.includes("file tree"))
77
+ categories.push("file-tree");
78
+ if (combined.includes("codeSamples") || combined.includes("code sample"))
79
+ categories.push("code-samples");
80
+ if (combined.includes("importMap") || combined.includes("import map"))
81
+ categories.push("import-map");
82
+ if (combined.includes("structure"))
83
+ categories.push("project-structure");
84
+ if (combined.includes("infra"))
85
+ categories.push("infrastructure");
86
+ if (combined.includes("events"))
87
+ categories.push("events");
88
+ if (combined.includes("envs") || combined.includes("environment"))
89
+ categories.push("environments");
90
+ if (combined.includes("docs"))
91
+ categories.push("documentation");
92
+ // Extract file references from prompt
93
+ const fileRefs = [];
94
+ const fileRefPattern = /["']([a-zA-Z0-9_./-]+\.[a-zA-Z]{1,6})["']/g;
95
+ let match;
96
+ while ((match = fileRefPattern.exec(combined)) !== null) {
97
+ if (!fileRefs.includes(match[1])) {
98
+ fileRefs.push(match[1]);
99
+ }
100
+ }
101
+ // Rough token estimate (~4 chars per token)
102
+ const promptTokenEstimate = Math.ceil(combined.length / 4);
103
+ return {
104
+ agentId,
105
+ model,
106
+ dataCategories: categories,
107
+ fileRefs: fileRefs.slice(0, 50),
108
+ promptTokenEstimate,
109
+ };
110
+ }
111
+ // --- Print ---
112
+ export function printTransparencyReport(report) {
113
+ console.error();
114
+ console.error(chalk.bold.cyan("Transparency Report"));
115
+ console.error(chalk.gray("What data was collected and sent to your LLM provider"));
116
+ console.error();
117
+ // Privacy controls
118
+ console.error(chalk.bold("Privacy Controls"));
119
+ for (const [key, value] of Object.entries(report.privacyControls)) {
120
+ const status = value ? chalk.green("enabled") : chalk.gray("disabled");
121
+ console.error(` ${key}: ${status}`);
122
+ }
123
+ if (report.ignorePatterns > 0) {
124
+ console.error(` .archbyteignore: ${chalk.yellow(`${report.ignorePatterns} pattern(s)`)}`);
125
+ }
126
+ if (report.redactionEnabled) {
127
+ console.error(` redaction: ${chalk.yellow("enabled — paths/names hashed")}`);
128
+ }
129
+ console.error();
130
+ // Collectors
131
+ console.error(chalk.bold("Static Collectors"));
132
+ for (const c of report.collectors) {
133
+ const size = formatBytes(c.byteEstimate);
134
+ console.error(` ${c.name.padEnd(22)} ${String(c.itemCount).padStart(4)} items ${size.padStart(8)}`);
135
+ if (c.filePaths.length > 0 && c.filePaths.length <= 5) {
136
+ for (const fp of c.filePaths) {
137
+ console.error(chalk.gray(` ${fp}`));
138
+ }
139
+ }
140
+ else if (c.filePaths.length > 5) {
141
+ for (const fp of c.filePaths.slice(0, 3)) {
142
+ console.error(chalk.gray(` ${fp}`));
143
+ }
144
+ console.error(chalk.gray(` ... and ${c.filePaths.length - 3} more`));
145
+ }
146
+ }
147
+ console.error();
148
+ // Agents
149
+ if (report.agents.length > 0) {
150
+ console.error(chalk.bold("LLM Agents"));
151
+ for (const a of report.agents) {
152
+ console.error(` ${a.agentId.padEnd(22)} model=${a.model} ~${a.promptTokenEstimate} tokens`);
153
+ if (a.dataCategories.length > 0) {
154
+ console.error(chalk.gray(` data: ${a.dataCategories.join(", ")}`));
155
+ }
156
+ if (a.fileRefs.length > 0) {
157
+ console.error(chalk.gray(` files: ${a.fileRefs.slice(0, 5).join(", ")}${a.fileRefs.length > 5 ? ` +${a.fileRefs.length - 5} more` : ""}`));
158
+ }
159
+ }
160
+ console.error();
161
+ }
162
+ // Totals
163
+ console.error(chalk.bold("Totals"));
164
+ console.error(` Collectors: ${report.totals.collectorsRun}`);
165
+ console.error(` Agents: ${report.totals.agentsRun}`);
166
+ console.error(` Files referenced: ${report.totals.filesReferenced}`);
167
+ console.error(` Est. prompt data: ${formatBytes(report.totals.estimatedPromptBytes)}`);
168
+ console.error();
169
+ }
170
+ // --- Save ---
171
+ export function saveTransparencyReport(rootDir, report) {
172
+ const dir = path.join(rootDir, ".archbyte");
173
+ if (!fs.existsSync(dir)) {
174
+ fs.mkdirSync(dir, { recursive: true });
175
+ }
176
+ const reportPath = path.join(dir, "transparency.json");
177
+ fs.writeFileSync(reportPath, JSON.stringify(report, null, 2), "utf-8");
178
+ }
179
+ // --- Helpers ---
180
+ function formatBytes(bytes) {
181
+ if (bytes < 1024)
182
+ return `${bytes} B`;
183
+ if (bytes < 1024 * 1024)
184
+ return `${(bytes / 1024).toFixed(1)} KB`;
185
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
186
+ }
187
+ /**
188
+ * Build a complete TransparencyReport from collector reports and agent reports.
189
+ */
190
+ export function buildTransparencyReport(collectors, agents, privacy, ignorePatterns, redactionEnabled) {
191
+ const allFiles = new Set();
192
+ for (const c of collectors) {
193
+ for (const fp of c.filePaths)
194
+ allFiles.add(fp);
195
+ }
196
+ for (const a of agents) {
197
+ for (const fp of a.fileRefs)
198
+ allFiles.add(fp);
199
+ }
200
+ return {
201
+ timestamp: new Date().toISOString(),
202
+ collectors,
203
+ agents,
204
+ privacyControls: privacy,
205
+ ignorePatterns,
206
+ redactionEnabled,
207
+ totals: {
208
+ collectorsRun: collectors.length,
209
+ agentsRun: agents.length,
210
+ filesReferenced: allFiles.size,
211
+ estimatedPromptBytes: collectors.reduce((s, c) => s + c.byteEstimate, 0),
212
+ },
213
+ };
214
+ }
@@ -41,6 +41,15 @@ export interface ArchbyteSpecFlow {
41
41
  category: string;
42
42
  steps: ArchbyteSpecFlowStep[];
43
43
  }
44
+ export interface PrivacyConfig {
45
+ sendCodeSamples?: boolean;
46
+ sendImportMap?: boolean;
47
+ sendEnvNames?: boolean;
48
+ sendDocs?: boolean;
49
+ sendFileTree?: boolean;
50
+ sendInfra?: boolean;
51
+ redact?: boolean;
52
+ }
44
53
  export interface ArchbyteSpec {
45
54
  version: number;
46
55
  project: {
@@ -58,6 +67,7 @@ export interface ArchbyteSpec {
58
67
  environments: ArchbyteSpecEnvironment[];
59
68
  flows: ArchbyteSpecFlow[];
60
69
  rules: Record<string, unknown>;
70
+ privacy?: PrivacyConfig;
61
71
  }
62
72
  export interface ScanMetadata {
63
73
  analyzedAt: string;
@@ -83,6 +93,10 @@ export declare function writeMetadata(rootDir: string, meta: ScanMetadata): void
83
93
  * Optionally preserves existing rules from a prior spec.
84
94
  */
85
95
  export declare function staticResultToSpec(result: StaticAnalysisResult, rootDir: string, existingRules?: Record<string, unknown>): ArchbyteSpec;
96
+ /**
97
+ * Resolve privacy config with defaults. All fields default to enabled (true) except redact (false).
98
+ */
99
+ export declare function resolvePrivacy(spec: ArchbyteSpec | null): Required<PrivacyConfig>;
86
100
  /**
87
101
  * Convert an ArchbyteSpec back to the AnalysisResult format expected by generate.ts.
88
102
  * This is the inverse of staticResultToSpec → buildAnalysisFromStatic.
@@ -163,6 +163,21 @@ export function staticResultToSpec(result, rootDir, existingRules) {
163
163
  rules,
164
164
  };
165
165
  }
166
+ /**
167
+ * Resolve privacy config with defaults. All fields default to enabled (true) except redact (false).
168
+ */
169
+ export function resolvePrivacy(spec) {
170
+ const p = spec?.privacy ?? {};
171
+ return {
172
+ sendCodeSamples: p.sendCodeSamples ?? true,
173
+ sendImportMap: p.sendImportMap ?? true,
174
+ sendEnvNames: p.sendEnvNames ?? true,
175
+ sendDocs: p.sendDocs ?? true,
176
+ sendFileTree: p.sendFileTree ?? true,
177
+ sendInfra: p.sendInfra ?? true,
178
+ redact: p.redact ?? false,
179
+ };
180
+ }
166
181
  /**
167
182
  * Convert an ArchbyteSpec back to the AnalysisResult format expected by generate.ts.
168
183
  * This is the inverse of staticResultToSpec → buildAnalysisFromStatic.
@@ -4,5 +4,6 @@ export interface ServerConfig {
4
4
  diagramPath: string;
5
5
  workspaceRoot: string;
6
6
  port: number;
7
+ debug?: boolean;
7
8
  }
8
9
  export declare function startServer(cfg: ServerConfig): Promise<void>;
@@ -138,6 +138,11 @@ function createHttpServer() {
138
138
  return;
139
139
  }
140
140
  const url = req.url || "/";
141
+ // Log API actions to terminal (skip static files, SSE, health checks)
142
+ if (url.startsWith("/api/") && url !== "/api/health") {
143
+ const label = url.replace("/api/", "").split("?")[0];
144
+ console.error(`[archbyte] ${req.method} ${label}`);
145
+ }
141
146
  // SSE endpoint
142
147
  if (url === "/events") {
143
148
  res.writeHead(200, {
@@ -657,6 +662,77 @@ function createHttpServer() {
657
662
  }));
658
663
  return;
659
664
  }
665
+ // API: Transparency report (--debug)
666
+ if (url === "/api/transparency" && req.method === "GET") {
667
+ const transparencyPath = path.join(config.workspaceRoot, ".archbyte", "transparency.json");
668
+ if (existsSync(transparencyPath)) {
669
+ const content = readFileSync(transparencyPath, "utf-8");
670
+ res.writeHead(200, { "Content-Type": "application/json" });
671
+ res.end(content);
672
+ }
673
+ else {
674
+ res.writeHead(404, { "Content-Type": "application/json" });
675
+ res.end(JSON.stringify({ error: "No transparency report found. Run 'archbyte analyze --debug' to generate one." }));
676
+ }
677
+ return;
678
+ }
679
+ // API: Data flow documentation
680
+ if (url === "/api/data-flow" && req.method === "GET") {
681
+ const dataFlow = {
682
+ outboundConnections: [
683
+ {
684
+ destination: "Your LLM Provider (BYOK)",
685
+ protocol: "HTTPS",
686
+ data: [
687
+ "Project structure metadata (language, framework, directories)",
688
+ "File tree (paths only, no contents)",
689
+ "Code excerpts (first ~80 lines of key files)",
690
+ "Import relationships between files",
691
+ "Config file contents (package.json, docker-compose, etc.)",
692
+ "Environment variable names (never values)",
693
+ "Infrastructure details (Docker services, K8s resources)",
694
+ ],
695
+ controlledBy: "archbyte.yaml privacy section + .archbyteignore",
696
+ },
697
+ {
698
+ destination: "ArchByte Cloud (api.heartbyte.io)",
699
+ protocol: "HTTPS",
700
+ data: ["Email address", "JWT token", "Scan count"],
701
+ controlledBy: "Required for license validation only",
702
+ },
703
+ ],
704
+ neverSent: [
705
+ "Environment variable values",
706
+ "Full source code (only excerpts of key files)",
707
+ "Secrets, passwords, API keys",
708
+ "Git history, commits, diffs",
709
+ "Git credentials (SSH keys, tokens)",
710
+ "Binary files",
711
+ "node_modules / vendor dependencies",
712
+ "User home directory contents",
713
+ ],
714
+ privacyControls: {
715
+ archbyteignore: "Exclude files/directories from all scanners (.gitignore syntax)",
716
+ sendCodeSamples: "Toggle code excerpt collection",
717
+ sendImportMap: "Toggle import relationship collection",
718
+ sendEnvNames: "Toggle env variable name collection",
719
+ sendDocs: "Toggle documentation extraction",
720
+ sendFileTree: "Toggle file tree collection",
721
+ sendInfra: "Toggle infrastructure detail collection",
722
+ redact: "Hash all identifiers (paths, names) before sending to LLM",
723
+ },
724
+ localOnly: [
725
+ ".archbyte/archbyte.yaml — Architecture spec",
726
+ ".archbyte/analysis.json — Full analysis output",
727
+ ".archbyte/architecture.json — Diagram layout",
728
+ ".archbyte/static-context.json — Raw scanner output",
729
+ ".archbyte/transparency.json — Audit trail",
730
+ ],
731
+ };
732
+ res.writeHead(200, { "Content-Type": "application/json" });
733
+ res.end(JSON.stringify(dataFlow, null, 2));
734
+ return;
735
+ }
660
736
  // API: Health (deprecated - redirect to audit for backward compat)
661
737
  if (url === "/api/health" && req.method === "GET") {
662
738
  res.writeHead(307, { "Location": "/api/audit" });
@@ -1129,7 +1205,7 @@ function createHttpServer() {
1129
1205
  const child = spawn(process.execPath, [bin, "generate"], {
1130
1206
  cwd: config.workspaceRoot,
1131
1207
  stdio: ["ignore", "pipe", "pipe"],
1132
- env: { ...process.env, FORCE_COLOR: "0" },
1208
+ env: { ...process.env, FORCE_COLOR: "0", ARCHBYTE_INTERNAL: "1" },
1133
1209
  });
1134
1210
  runningWorkflows.set("__generate__", child);
1135
1211
  broadcastOpsEvent({ type: "generate:started" });
@@ -1221,7 +1297,7 @@ function createHttpServer() {
1221
1297
  const child = spawn(process.execPath, [bin, "workflow", "--run", id], {
1222
1298
  cwd: config.workspaceRoot,
1223
1299
  stdio: ["ignore", "pipe", "pipe"],
1224
- env: { ...process.env, FORCE_COLOR: "0" },
1300
+ env: { ...process.env, FORCE_COLOR: "0", ARCHBYTE_INTERNAL: "1" },
1225
1301
  });
1226
1302
  runningWorkflows.set(id, child);
1227
1303
  broadcastOpsEvent({ type: "workflow:started", id });
@@ -1452,6 +1528,22 @@ function createHttpServer() {
1452
1528
  }
1453
1529
  return;
1454
1530
  }
1531
+ // API: UI event logging — fire-and-forget from UI to server console
1532
+ if (url === "/api/ui-event" && req.method === "POST") {
1533
+ let body = "";
1534
+ req.on("data", (chunk) => { body += chunk.toString(); });
1535
+ req.on("end", () => {
1536
+ try {
1537
+ const { event, detail } = JSON.parse(body);
1538
+ const msg = detail ? `${event} — ${detail}` : event;
1539
+ console.error(`[archbyte] ui: ${msg}`);
1540
+ }
1541
+ catch { }
1542
+ res.writeHead(204);
1543
+ res.end();
1544
+ });
1545
+ return;
1546
+ }
1455
1547
  // API: Telemetry — record agent timing data
1456
1548
  if (url === "/api/telemetry" && req.method === "POST") {
1457
1549
  let body = "";
@@ -1717,7 +1809,7 @@ function runAnalyzePipeline(mode = "static", fileChanges) {
1717
1809
  const analyzeChild = spawn(process.execPath, analyzeArgs, {
1718
1810
  cwd: config.workspaceRoot,
1719
1811
  stdio: ["ignore", "pipe", "pipe"],
1720
- env: { ...process.env, FORCE_COLOR: "0" },
1812
+ env: { ...process.env, FORCE_COLOR: "0", ARCHBYTE_INTERNAL: "1" },
1721
1813
  });
1722
1814
  let analyzeStderr = "";
1723
1815
  analyzeChild.stdout?.on("data", (d) => process.stderr.write(`[analyze] ${d}`));
@@ -1733,7 +1825,7 @@ function runAnalyzePipeline(mode = "static", fileChanges) {
1733
1825
  const genChild = spawn(process.execPath, [bin, "generate"], {
1734
1826
  cwd: config.workspaceRoot,
1735
1827
  stdio: ["ignore", "pipe", "pipe"],
1736
- env: { ...process.env, FORCE_COLOR: "0" },
1828
+ env: { ...process.env, FORCE_COLOR: "0", ARCHBYTE_INTERNAL: "1" },
1737
1829
  });
1738
1830
  genChild.stdout?.on("data", (d) => process.stderr.write(`[generate] ${d}`));
1739
1831
  genChild.stderr?.on("data", (d) => process.stderr.write(`[generate] ${d}`));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "archbyte",
3
- "version": "0.5.1",
3
+ "version": "0.5.3",
4
4
  "description": "ArchByte - See what agents build. As they build it.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -80,6 +80,26 @@ environments: []
80
80
  # label: "api-gateway -> user-service"
81
81
  flows: []
82
82
 
83
+ # ── Privacy Controls ──
84
+ # Control what data is sent to your LLM provider during analysis.
85
+ # All options default to true (enabled). Set to false to exclude.
86
+ # See: docs/DATA_FLOW.md for full details on what each scanner collects.
87
+ #
88
+ # privacy:
89
+ # sendCodeSamples: true # Code excerpts (first ~80 lines of key files)
90
+ # sendImportMap: true # Import relationships between files
91
+ # sendEnvNames: true # Environment variable names (never values)
92
+ # sendDocs: true # Documentation extracts (README, etc.)
93
+ # sendFileTree: true # Directory structure (paths only)
94
+ # sendInfra: true # Infrastructure details (Docker, K8s, CI)
95
+ # redact: false # Hash all identifiers before sending to LLM
96
+ #
97
+ # Also create .archbyteignore in your project root to exclude specific
98
+ # files/directories from all scanners (.gitignore syntax):
99
+ # secrets/
100
+ # *.env
101
+ # *.pem
102
+
83
103
  # ── Architecture Fitness Rules ──
84
104
  # Levels: error (fail CI), warn (report), off (skip)
85
105
  rules: