npm - archbyte - Versions diffs - 0.5.1 → 0.5.3 - Mend

archbyte 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/bin/archbyte.js +10 -2
package/dist/agents/pipeline/agents/service-describer.js +4 -4
package/dist/agents/pipeline/index.d.ts +1 -1
package/dist/agents/pipeline/index.js +6 -4
package/dist/agents/pipeline/merger.js +26 -34
package/dist/agents/static/ignore.d.ts +12 -0
package/dist/agents/static/ignore.js +140 -0
package/dist/agents/static/index.d.ts +2 -1
package/dist/agents/static/index.js +52 -4
package/dist/agents/static/redactor.d.ts +12 -0
package/dist/agents/static/redactor.js +206 -0
package/dist/agents/static/utils.d.ts +3 -1
package/dist/agents/static/utils.js +34 -11
package/dist/cli/analyze.d.ts +1 -0
package/dist/cli/analyze.js +46 -9
package/dist/cli/run.d.ts +1 -0
package/dist/cli/run.js +2 -1
package/dist/cli/serve.d.ts +1 -0
package/dist/cli/serve.js +1 -0
package/dist/cli/transparency.d.ts +36 -0
package/dist/cli/transparency.js +214 -0
package/dist/cli/yaml-io.d.ts +14 -0
package/dist/cli/yaml-io.js +15 -0
package/dist/server/src/index.d.ts +1 -0
package/dist/server/src/index.js +96 -4
package/package.json +1 -1
package/templates/archbyte.yaml +20 -0
package/ui/dist/assets/index-CWGPRsWP.js +72 -0
package/ui/dist/index.html +1 -1
package/ui/dist/assets/index-QllGSFhe.js +0 -72

package/bin/archbyte.js CHANGED Viewed

@@ -22,6 +22,11 @@ import { handleVersion, handleUpdate } from '../dist/cli/version.js';
 import { requireLicense } from '../dist/cli/license-gate.js';
 import { DEFAULT_PORT } from '../dist/cli/constants.js';
+// When spawned by `archbyte serve` (internal), skip interactive license checks.
+// The user already authenticated when they started the server.
+const isInternal = process.env.ARCHBYTE_INTERNAL === '1';
+const gate = isInternal ? async () => {} : requireLicense;
 const require = createRequire(import.meta.url);
 const { version: PKG_VERSION } = require('../package.json');
@@ -93,6 +98,7 @@ program
   .option('-v, --verbose', 'Show detailed output')
   .option('--force', 'Force full re-scan (skip incremental detection)')
   .option('--dry-run', 'Preview without running')
+  .option('--debug', 'Show transparency report (what data is collected and sent)')
   .action(async (options) => {
     // handleRun manages login + setup + requireLicense internally
     await handleRun(options);
@@ -110,8 +116,9 @@ program
   .option('--skip-llm', 'Alias for --static')
   .option('--force', 'Force full re-scan (skip incremental detection)')
   .option('--dry-run', 'Preview without running')
+  .option('--debug', 'Show transparency report (what data is collected and sent)')
   .action(async (options) => {
-    await requireLicense('analyze');
+    await gate('analyze');
     await handleAnalyze(options);
   });
@@ -122,7 +129,7 @@ program
   .option('-o, --output <path>', 'Output diagram (default: .archbyte/architecture.json)')
   .option('-v, --verbose', 'Show detailed output')
   .action(async (options) => {
-    await requireLicense('generate');
+    await gate('generate');
     await handleGenerate(options);
   });
@@ -131,6 +138,7 @@ program
   .description('Start the visualization UI server')
   .option('-p, --port <number>', `Server port (default: ${DEFAULT_PORT})`, parseInt)
   .option('-d, --diagram <path>', 'Path to architecture JSON (default: .archbyte/architecture.json)')
+  .option('--debug', 'Enable transparency endpoint (/api/transparency)')
   .action(async (options) => {
     await handleServe(options);
   });

package/dist/agents/pipeline/agents/service-describer.js CHANGED Viewed

@@ -33,13 +33,13 @@ export const serviceDescriber = {
         parts.push(`Detected language: ${ctx.structure.language}`);
         parts.push(`Languages: ${ctx.structure.languages.join(", ") || "none"}`);
         parts.push(`Framework: ${ctx.structure.framework ?? "none"}`);
-        // Docs
+        // Docs — only project description, NOT externalDependencies.
+        // Doc-extracted dependency mentions prime the LLM to hallucinate phantom services
+        // (e.g., docs mention "MCP" → LLM creates "MCP Server" component).
+        // The LLM should discover services from actual code evidence only.
         if (ctx.docs.projectDescription) {
             parts.push(`\nFrom docs: ${ctx.docs.projectDescription}`);
         }
-        if (ctx.docs.externalDependencies.length > 0) {
-            parts.push(`\nExternal dependencies mentioned: ${ctx.docs.externalDependencies.join(", ")}`);
-        }
         // Docker services — only include if infra/config files changed (or full scan)
         if (ctx.infra.docker.composeFile && (hasInfraChanges || hasConfigChanges)) {
             const svcInfo = ctx.infra.docker.services.map((s) => {

package/dist/agents/pipeline/index.d.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import type { IncrementalContext } from "./types.js";
  * Run the multi-agent pipeline: 3 parallel fast agents → 2 sequential agents.
  * Each agent gets a single chat() call with pre-collected static context.
  */
-export declare function runPipeline(ctx: StaticContext, provider: LLMProvider, config: ArchByteConfig, onProgress?: (msg: string) => void, incrementalContext?: IncrementalContext): Promise<StaticAnalysisResult & {
+export declare function runPipeline(ctx: StaticContext, provider: LLMProvider, config: ArchByteConfig, onProgress?: (msg: string) => void, incrementalContext?: IncrementalContext, onDebug?: (agentId: string, model: string, system: string, user: string) => void): Promise<StaticAnalysisResult & {
     tokenUsage?: {
         input: number;
         output: number;

package/dist/agents/pipeline/index.js CHANGED Viewed

@@ -92,7 +92,7 @@ function getFallbackData(agentId, inc) {
  * Run the multi-agent pipeline: 3 parallel fast agents → 2 sequential agents.
  * Each agent gets a single chat() call with pre-collected static context.
  */
-export async function runPipeline(ctx, provider, config, onProgress, incrementalContext) {
+export async function runPipeline(ctx, provider, config, onProgress, incrementalContext, onDebug) {
     const agentResults = {};
     const agentMeta = [];
     const skippedAgents = [];
@@ -118,7 +118,7 @@ export async function runPipeline(ctx, provider, config, onProgress, incremental
             agentResults[agent.id] = fallback;
             return Promise.resolve(null);
         }
-        return runAgent(agent, ctx, provider, config, parallelPrior, onProgress);
+        return runAgent(agent, ctx, provider, config, parallelPrior, onProgress, onDebug);
     }));
     let authFailed = false;
     for (let i = 0; i < parallelTasks.length; i++) {
@@ -156,7 +156,7 @@ export async function runPipeline(ctx, provider, config, onProgress, incremental
             continue;
         }
         try {
-            const result = await runAgent(agent, ctx, provider, config, agentResults, onProgress);
+            const result = await runAgent(agent, ctx, provider, config, agentResults, onProgress, onDebug);
             if (result) {
                 agentResults[agent.id] = result.data;
                 agentMeta.push(result);
@@ -214,10 +214,12 @@ const MAX_TOKENS = {
     "flow-detector": 4096,
     "validator": 4096,
 };
-async function runAgent(agent, ctx, provider, config, priorResults, onProgress) {
+async function runAgent(agent, ctx, provider, config, priorResults, onProgress, onDebug) {
     const start = Date.now();
     const model = resolveModel(config.provider, agent.modelTier, config.modelOverrides, config.model);
     const { system, user } = agent.buildPrompt(ctx, priorResults);
+    // Debug callback — report what data is being sent
+    onDebug?.(agent.id, model, system, user);
     onProgress?.(`  ${agent.name}: calling ${model}...`);
     const maxTokens = MAX_TOKENS[agent.id] ?? 4096;
     const response = await provider.chat({

package/dist/agents/pipeline/merger.js CHANGED Viewed

@@ -1,5 +1,6 @@
 // Pipeline — Merger
 // Assembles all agent outputs into a StaticAnalysisResult
+import { categorizeDep } from "../static/taxonomy.js";
 function sanitize(s) {
     if (!s)
         return s;
@@ -9,21 +10,24 @@ function sanitize(s) {
  * Build a set of "evidence tokens" from the static context — things that concretely
  * exist in the codebase (dependencies, env vars, docker images/services).
  * Used to gate LLM-generated databases/external services against hallucination.
+ *
+ * Uses the package taxonomy to resolve package names to their display names
+ * (e.g., "pg" → also adds "postgresql", "stripe" → also adds "stripe").
+ * This lets the LLM use human-readable names while still requiring code evidence.
  */
 function buildEvidenceTokens(ctx) {
     const tokens = new Set();
+    /** Add a dependency name + its taxonomy display name as tokens. */
+    function addDep(dep) {
+        tokens.add(dep.toLowerCase());
+        const cat = categorizeDep(dep);
+        if (cat)
+            tokens.add(cat.displayName.toLowerCase());
+    }
     // Package dependencies from import map (codeSamples.importMap: file → imported modules)
     for (const imports of Object.values(ctx.codeSamples.importMap)) {
-        for (const imp of imports) {
-            tokens.add(imp.toLowerCase());
-            // Also add short name for scoped packages: @aws-sdk/client-s3 → client-s3, aws-sdk
-            if (imp.startsWith("@")) {
-                const parts = imp.split("/");
-                if (parts[1])
-                    tokens.add(parts[1].toLowerCase());
-                tokens.add(parts[0].slice(1).toLowerCase());
-            }
-        }
+        for (const imp of imports)
+            addDep(imp);
     }
     // Config files may contain dependency info (package.json deps etc.)
     for (const cfg of ctx.codeSamples.configFiles) {
@@ -31,13 +35,7 @@ function buildEvidenceTokens(ctx) {
             try {
                 const pkg = JSON.parse(cfg.content);
                 for (const dep of Object.keys({ ...pkg.dependencies, ...pkg.devDependencies })) {
-                    tokens.add(dep.toLowerCase());
-                    if (dep.startsWith("@")) {
-                        const parts = dep.split("/");
-                        if (parts[1])
-                            tokens.add(parts[1].toLowerCase());
-                        tokens.add(parts[0].slice(1).toLowerCase());
-                    }
+                    addDep(dep);
                 }
             }
             catch { /* ignore parse errors */ }
@@ -59,35 +57,29 @@ function buildEvidenceTokens(ctx) {
     for (const s of ctx.infra.cloud.services) {
         tokens.add(s.toLowerCase());
     }
-    // External dependencies mentioned in docs
-    for (const dep of ctx.docs.externalDependencies) {
-        tokens.add(dep.toLowerCase());
-    }
+    // NOTE: ctx.docs.externalDependencies intentionally excluded.
+    // Doc mentions (from markdown/README) are not concrete code evidence and cause
+    // hallucination — the LLM sees "MCP" in docs and creates phantom components.
+    // Only code-level signals (imports, deps, env vars, Docker, cloud) count.
     return tokens;
 }
 /**
- * Check if a service/database ID and type have concrete evidence in the static context.
- * Uses fuzzy matching: checks if any evidence token contains or is contained by the service keywords.
+ * Check if a service/database has concrete evidence in the static context.
+ * Strict exact-match only — no substring/regex fuzzy matching.
+ * The taxonomy enriches evidence tokens with display names (pg → PostgreSQL)
+ * so the LLM can use human-readable names and still match.
  */
 function hasEvidence(id, name, type, evidenceTokens) {
-    // Build candidate keywords from the service
     const candidates = [
         id.toLowerCase(),
         name.toLowerCase(),
         type.toLowerCase(),
-        // Split hyphenated IDs: "aws-sqs" → ["aws", "sqs"]
+        // Split hyphenated IDs: "aws-sqs" → also check "aws", "sqs"
         ...id.toLowerCase().split("-"),
     ].filter(Boolean);
     for (const candidate of candidates) {
-        for (const token of evidenceTokens) {
-            // Direct match or substring match (in both directions)
-            if (token === candidate)
-                return true;
-            if (token.includes(candidate) && candidate.length >= 3)
-                return true;
-            if (candidate.includes(token) && token.length >= 3)
-                return true;
-        }
+        if (evidenceTokens.has(candidate))
+            return true;
     }
     return false;
 }

package/dist/agents/static/ignore.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+export interface IgnoreFilter {
+    /** Returns true if the relative path should be excluded from analysis */
+    isIgnored(relativePath: string): boolean;
+    /** Number of active patterns (excluding comments and blank lines) */
+    patternCount: number;
+}
+/**
+ * Load `.archbyteignore` from the project root.
+ * Returns an IgnoreFilter that matches paths against the patterns.
+ * If the file doesn't exist, returns a no-op filter that ignores nothing.
+ */
+export declare function loadIgnoreFile(projectRoot: string): IgnoreFilter;

package/dist/agents/static/ignore.js ADDED Viewed

@@ -0,0 +1,140 @@
+// .archbyteignore — File exclusion filter
+// Supports .gitignore-style patterns: # comments, ! negation, ** globstar, * wildcard
+import * as fs from "fs";
+import * as path from "path";
+/**
+ * Load `.archbyteignore` from the project root.
+ * Returns an IgnoreFilter that matches paths against the patterns.
+ * If the file doesn't exist, returns a no-op filter that ignores nothing.
+ */
+export function loadIgnoreFile(projectRoot) {
+    const ignorePath = path.join(projectRoot, ".archbyteignore");
+    if (!fs.existsSync(ignorePath)) {
+        return { isIgnored: () => false, patternCount: 0 };
+    }
+    const content = fs.readFileSync(ignorePath, "utf-8");
+    const rules = parseIgnorePatterns(content);
+    return {
+        isIgnored(relativePath) {
+            // Normalize path separators
+            const normalized = relativePath.replace(/\\/g, "/").replace(/^\//, "");
+            let ignored = false;
+            for (const rule of rules) {
+                if (rule.pattern.test(normalized)) {
+                    ignored = !rule.negated;
+                }
+            }
+            return ignored;
+        },
+        patternCount: rules.length,
+    };
+}
+/**
+ * Parse .gitignore-style content into an ordered list of rules.
+ */
+function parseIgnorePatterns(content) {
+    const rules = [];
+    for (const rawLine of content.split("\n")) {
+        const line = rawLine.trim();
+        // Skip blank lines and comments
+        if (!line || line.startsWith("#"))
+            continue;
+        let pattern = line;
+        let negated = false;
+        // Handle negation
+        if (pattern.startsWith("!")) {
+            negated = true;
+            pattern = pattern.slice(1);
+        }
+        // Remove trailing spaces (unless escaped)
+        pattern = pattern.replace(/(?<!\\)\s+$/, "");
+        if (!pattern)
+            continue;
+        const regex = patternToRegex(pattern);
+        rules.push({ pattern: regex, negated });
+    }
+    return rules;
+}
+/**
+ * Convert a .gitignore-style pattern to a RegExp.
+ * Supports: * (any non-slash), ** (any including slashes), ? (single char),
+ * trailing / (directory match), leading / (root-anchored).
+ */
+function patternToRegex(pattern) {
+    let anchored = false;
+    // Leading / means anchored to root
+    if (pattern.startsWith("/")) {
+        anchored = true;
+        pattern = pattern.slice(1);
+    }
+    // Trailing / means match directories — for our purposes, match the prefix
+    const dirOnly = pattern.endsWith("/");
+    if (dirOnly) {
+        pattern = pattern.slice(0, -1);
+    }
+    // Escape regex special chars, then convert glob patterns
+    let regex = "";
+    let i = 0;
+    while (i < pattern.length) {
+        const ch = pattern[i];
+        const next = pattern[i + 1];
+        if (ch === "*" && next === "*") {
+            // ** — match anything including path separators
+            if (pattern[i + 2] === "/") {
+                // **/ — match zero or more directories
+                regex += "(?:.*/)?";
+                i += 3;
+            }
+            else {
+                // ** at end or before non-slash
+                regex += ".*";
+                i += 2;
+            }
+        }
+        else if (ch === "*") {
+            // * — match anything except /
+            regex += "[^/]*";
+            i++;
+        }
+        else if (ch === "?") {
+            // ? — match single non-slash char
+            regex += "[^/]";
+            i++;
+        }
+        else if (ch === "[") {
+            // Character class — pass through until ]
+            const closeBracket = pattern.indexOf("]", i + 1);
+            if (closeBracket !== -1) {
+                regex += pattern.slice(i, closeBracket + 1);
+                i = closeBracket + 1;
+            }
+            else {
+                regex += escapeRegex(ch);
+                i++;
+            }
+        }
+        else {
+            regex += escapeRegex(ch);
+            i++;
+        }
+    }
+    if (dirOnly) {
+        // Match the directory itself or anything under it
+        regex += "(?:/.*)?";
+    }
+    if (anchored) {
+        // Must match from the start
+        return new RegExp(`^${regex}$`);
+    }
+    // Unanchored: match if the pattern matches the full path
+    // or any suffix after a /
+    // If pattern contains /, it's implicitly anchored
+    if (pattern.includes("/")) {
+        return new RegExp(`^${regex}$`);
+    }
+    // No slash: match against the basename OR any path segment
+    return new RegExp(`(?:^|/)${regex}(?:/.*)?$`);
+}
+function escapeRegex(ch) {
+    return ch.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}

package/dist/agents/static/index.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { StaticAnalysisResult, StaticContext } from "./types.js";
+import type { PrivacyConfig } from "../../cli/yaml-io.js";
 export type { StaticAnalysisResult, StaticContext } from "./types.js";
 export { validateAnalysis } from "./validator.js";
 /**
@@ -16,4 +17,4 @@ export declare function runStaticAnalysis(projectRoot: string, onProgress?: (msg
  * This runs ONLY fact-collectors (no component-detector, connection-mapper, or validator).
  * Output is consumed by the pipeline LLM agents.
  */
-export declare function runStaticContextCollection(projectRoot: string, onProgress?: (msg: string) => void): Promise<StaticContext>;
+export declare function runStaticContextCollection(projectRoot: string, onProgress?: (msg: string) => void, privacy?: Required<PrivacyConfig>): Promise<StaticContext>;

package/dist/agents/static/index.js CHANGED Viewed

@@ -11,6 +11,8 @@ import { mapConnections } from "./connection-mapper.js";
 import { validateAnalysis } from "./validator.js";
 import { collectFileTree } from "./file-tree-collector.js";
 import { collectCodeSamples } from "./code-sampler.js";
+import { loadIgnoreFile } from "./ignore.js";
+import { redactContext } from "./redactor.js";
 export { validateAnalysis } from "./validator.js";
 /**
  * Run all static analysis scanners.
@@ -22,7 +24,11 @@ export { validateAnalysis } from "./validator.js";
  *   4. Gap detection — identify what the LLM should resolve
  */
 export async function runStaticAnalysis(projectRoot, onProgress) {
-    const tk = new StaticToolkit(projectRoot);
+    const ignoreFilter = loadIgnoreFile(projectRoot);
+    if (ignoreFilter.patternCount > 0) {
+        onProgress?.(`Loaded .archbyteignore: ${ignoreFilter.patternCount} pattern(s)`);
+    }
+    const tk = new StaticToolkit(projectRoot, ignoreFilter);
     // Phase 1: parallel scanners (no dependencies)
     onProgress?.("Running parallel scanners...");
     const [structure, docs, infra, events, envs] = await Promise.all([
@@ -292,8 +298,12 @@ async function collectGaps(analysis, tk) {
  * This runs ONLY fact-collectors (no component-detector, connection-mapper, or validator).
  * Output is consumed by the pipeline LLM agents.
  */
-export async function runStaticContextCollection(projectRoot, onProgress) {
-    const tk = new StaticToolkit(projectRoot);
+export async function runStaticContextCollection(projectRoot, onProgress, privacy) {
+    const ignoreFilter = loadIgnoreFile(projectRoot);
+    if (ignoreFilter.patternCount > 0) {
+        onProgress?.(`Loaded .archbyteignore: ${ignoreFilter.patternCount} pattern(s)`);
+    }
+    const tk = new StaticToolkit(projectRoot, ignoreFilter);
     onProgress?.("Collecting static context (7 scanners in parallel)...");
     const [structure, docs, infra, events, envs, fileTree, codeSamples] = await Promise.all([
         scanStructure(tk),
@@ -306,5 +316,43 @@ export async function runStaticContextCollection(projectRoot, onProgress) {
     ]);
     onProgress?.(`Context: ${fileTree.totalFiles} files, ${fileTree.totalDirs} dirs, ${codeSamples.configFiles.length} configs, ${codeSamples.samples.length} samples`);
     onProgress?.(`Detected: ${structure.language}, ${structure.framework ?? "no framework"}, monorepo=${structure.isMonorepo}`);
-    return { structure, docs, infra, events, envs, fileTree, codeSamples };
+    let ctx = { structure, docs, infra, events, envs, fileTree, codeSamples };
+    // Apply privacy controls — zero out disabled fields
+    if (privacy) {
+        if (!privacy.sendCodeSamples) {
+            ctx.codeSamples = { ...ctx.codeSamples, samples: [] };
+            onProgress?.("Privacy: code samples excluded");
+        }
+        if (!privacy.sendImportMap) {
+            ctx.codeSamples = { ...ctx.codeSamples, importMap: {} };
+            onProgress?.("Privacy: import map excluded");
+        }
+        if (!privacy.sendEnvNames) {
+            ctx.envs = { ...ctx.envs, environments: ctx.envs.environments.map((e) => ({ ...e, variables: [] })) };
+            onProgress?.("Privacy: env variable names excluded");
+        }
+        if (!privacy.sendDocs) {
+            ctx.docs = { projectDescription: "", architectureNotes: [], apiEndpoints: [], externalDependencies: [] };
+            onProgress?.("Privacy: documentation excluded");
+        }
+        if (!privacy.sendFileTree) {
+            ctx.fileTree = { tree: [], totalFiles: ctx.fileTree.totalFiles, totalDirs: ctx.fileTree.totalDirs };
+            onProgress?.("Privacy: file tree excluded");
+        }
+        if (!privacy.sendInfra) {
+            ctx.infra = {
+                docker: { services: [], composeFile: false },
+                kubernetes: { resources: [] },
+                cloud: { provider: null, services: [], iac: null },
+                ci: { platform: null, pipelines: [] },
+            };
+            onProgress?.("Privacy: infrastructure details excluded");
+        }
+        // Redaction — hash identifiers before returning
+        if (privacy.redact) {
+            ctx = redactContext(ctx);
+            onProgress?.("Privacy: redaction applied — identifiers hashed");
+        }
+    }
+    return ctx;
 }

package/dist/agents/static/redactor.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import type { StaticContext } from "./types.js";
+/**
+ * Redact sensitive identifiers in a StaticContext.
+ * - File paths: hash each segment, preserve extensions and depth
+ * - Env var names: hash
+ * - Docker service names: hash
+ * - String literals in code samples: hash
+ * - Preserve: npm package names, language keywords, structural info
+ *
+ * Returns a deep copy — the original context is not modified.
+ */
+export declare function redactContext(ctx: StaticContext): StaticContext;