archbyte 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/archbyte.js CHANGED
@@ -22,6 +22,11 @@ import { handleVersion, handleUpdate } from '../dist/cli/version.js';
22
22
  import { requireLicense } from '../dist/cli/license-gate.js';
23
23
  import { DEFAULT_PORT } from '../dist/cli/constants.js';
24
24
 
25
+ // When spawned by `archbyte serve` (internal), skip interactive license checks.
26
+ // The user already authenticated when they started the server.
27
+ const isInternal = process.env.ARCHBYTE_INTERNAL === '1';
28
+ const gate = isInternal ? async () => {} : requireLicense;
29
+
25
30
  const require = createRequire(import.meta.url);
26
31
  const { version: PKG_VERSION } = require('../package.json');
27
32
 
@@ -111,7 +116,7 @@ program
111
116
  .option('--force', 'Force full re-scan (skip incremental detection)')
112
117
  .option('--dry-run', 'Preview without running')
113
118
  .action(async (options) => {
114
- await requireLicense('analyze');
119
+ await gate('analyze');
115
120
  await handleAnalyze(options);
116
121
  });
117
122
 
@@ -122,7 +127,7 @@ program
122
127
  .option('-o, --output <path>', 'Output diagram (default: .archbyte/architecture.json)')
123
128
  .option('-v, --verbose', 'Show detailed output')
124
129
  .action(async (options) => {
125
- await requireLicense('generate');
130
+ await gate('generate');
126
131
  await handleGenerate(options);
127
132
  });
128
133
 
@@ -33,13 +33,13 @@ export const serviceDescriber = {
33
33
  parts.push(`Detected language: ${ctx.structure.language}`);
34
34
  parts.push(`Languages: ${ctx.structure.languages.join(", ") || "none"}`);
35
35
  parts.push(`Framework: ${ctx.structure.framework ?? "none"}`);
36
- // Docs
36
+ // Docs — only project description, NOT externalDependencies.
37
+ // Doc-extracted dependency mentions prime the LLM to hallucinate phantom services
38
+ // (e.g., docs mention "MCP" → LLM creates "MCP Server" component).
39
+ // The LLM should discover services from actual code evidence only.
37
40
  if (ctx.docs.projectDescription) {
38
41
  parts.push(`\nFrom docs: ${ctx.docs.projectDescription}`);
39
42
  }
40
- if (ctx.docs.externalDependencies.length > 0) {
41
- parts.push(`\nExternal dependencies mentioned: ${ctx.docs.externalDependencies.join(", ")}`);
42
- }
43
43
  // Docker services — only include if infra/config files changed (or full scan)
44
44
  if (ctx.infra.docker.composeFile && (hasInfraChanges || hasConfigChanges)) {
45
45
  const svcInfo = ctx.infra.docker.services.map((s) => {
@@ -1,5 +1,6 @@
1
1
  // Pipeline — Merger
2
2
  // Assembles all agent outputs into a StaticAnalysisResult
3
+ import { categorizeDep } from "../static/taxonomy.js";
3
4
  function sanitize(s) {
4
5
  if (!s)
5
6
  return s;
@@ -9,21 +10,24 @@ function sanitize(s) {
9
10
  * Build a set of "evidence tokens" from the static context — things that concretely
10
11
  * exist in the codebase (dependencies, env vars, docker images/services).
11
12
  * Used to gate LLM-generated databases/external services against hallucination.
13
+ *
14
+ * Uses the package taxonomy to resolve package names to their display names
15
+ * (e.g., "pg" → also adds "postgresql", "stripe" → also adds "stripe").
16
+ * This lets the LLM use human-readable names while still requiring code evidence.
12
17
  */
13
18
  function buildEvidenceTokens(ctx) {
14
19
  const tokens = new Set();
20
+ /** Add a dependency name + its taxonomy display name as tokens. */
21
+ function addDep(dep) {
22
+ tokens.add(dep.toLowerCase());
23
+ const cat = categorizeDep(dep);
24
+ if (cat)
25
+ tokens.add(cat.displayName.toLowerCase());
26
+ }
15
27
  // Package dependencies from import map (codeSamples.importMap: file → imported modules)
16
28
  for (const imports of Object.values(ctx.codeSamples.importMap)) {
17
- for (const imp of imports) {
18
- tokens.add(imp.toLowerCase());
19
- // Also add short name for scoped packages: @aws-sdk/client-s3 → client-s3, aws-sdk
20
- if (imp.startsWith("@")) {
21
- const parts = imp.split("/");
22
- if (parts[1])
23
- tokens.add(parts[1].toLowerCase());
24
- tokens.add(parts[0].slice(1).toLowerCase());
25
- }
26
- }
29
+ for (const imp of imports)
30
+ addDep(imp);
27
31
  }
28
32
  // Config files may contain dependency info (package.json deps etc.)
29
33
  for (const cfg of ctx.codeSamples.configFiles) {
@@ -31,13 +35,7 @@ function buildEvidenceTokens(ctx) {
31
35
  try {
32
36
  const pkg = JSON.parse(cfg.content);
33
37
  for (const dep of Object.keys({ ...pkg.dependencies, ...pkg.devDependencies })) {
34
- tokens.add(dep.toLowerCase());
35
- if (dep.startsWith("@")) {
36
- const parts = dep.split("/");
37
- if (parts[1])
38
- tokens.add(parts[1].toLowerCase());
39
- tokens.add(parts[0].slice(1).toLowerCase());
40
- }
38
+ addDep(dep);
41
39
  }
42
40
  }
43
41
  catch { /* ignore parse errors */ }
@@ -59,35 +57,29 @@ function buildEvidenceTokens(ctx) {
59
57
  for (const s of ctx.infra.cloud.services) {
60
58
  tokens.add(s.toLowerCase());
61
59
  }
62
- // External dependencies mentioned in docs
63
- for (const dep of ctx.docs.externalDependencies) {
64
- tokens.add(dep.toLowerCase());
65
- }
60
+ // NOTE: ctx.docs.externalDependencies intentionally excluded.
61
+ // Doc mentions (from markdown/README) are not concrete code evidence and cause
62
+ // hallucination — the LLM sees "MCP" in docs and creates phantom components.
63
+ // Only code-level signals (imports, deps, env vars, Docker, cloud) count.
66
64
  return tokens;
67
65
  }
68
66
  /**
69
- * Check if a service/database ID and type have concrete evidence in the static context.
70
- * Uses fuzzy matching: checks if any evidence token contains or is contained by the service keywords.
67
+ * Check if a service/database has concrete evidence in the static context.
68
+ * Strict exact-match only no substring/regex fuzzy matching.
69
+ * The taxonomy enriches evidence tokens with display names (pg → PostgreSQL)
70
+ * so the LLM can use human-readable names and still match.
71
71
  */
72
72
  function hasEvidence(id, name, type, evidenceTokens) {
73
- // Build candidate keywords from the service
74
73
  const candidates = [
75
74
  id.toLowerCase(),
76
75
  name.toLowerCase(),
77
76
  type.toLowerCase(),
78
- // Split hyphenated IDs: "aws-sqs" → ["aws", "sqs"]
77
+ // Split hyphenated IDs: "aws-sqs" → also check "aws", "sqs"
79
78
  ...id.toLowerCase().split("-"),
80
79
  ].filter(Boolean);
81
80
  for (const candidate of candidates) {
82
- for (const token of evidenceTokens) {
83
- // Direct match or substring match (in both directions)
84
- if (token === candidate)
85
- return true;
86
- if (token.includes(candidate) && candidate.length >= 3)
87
- return true;
88
- if (candidate.includes(token) && token.length >= 3)
89
- return true;
90
- }
81
+ if (evidenceTokens.has(candidate))
82
+ return true;
91
83
  }
92
84
  return false;
93
85
  }
@@ -1,75 +1,22 @@
1
1
  // Static Analysis — Component Detector
2
2
  // Detects project components via workspaces, conventional directories, build configs, or single-app fallback
3
3
  import { slugify, assignLayer } from "./utils.js";
4
- // Map well-known deps to architecturally significant tech names
5
- const TECH_MAP = {
6
- react: "React", "react-dom": "React", "next": "Next.js", "vue": "Vue", "nuxt": "Nuxt",
7
- svelte: "Svelte", angular: "Angular", "@angular/core": "Angular",
8
- express: "Express", fastify: "Fastify", "@nestjs/core": "NestJS", hono: "Hono", koa: "Koa",
9
- prisma: "Prisma", "@prisma/client": "Prisma", typeorm: "TypeORM", drizzle: "Drizzle",
10
- sequelize: "Sequelize", mongoose: "Mongoose", pg: "PostgreSQL", mysql2: "MySQL",
11
- redis: "Redis", ioredis: "Redis",
12
- graphql: "GraphQL", "@apollo/server": "Apollo GraphQL",
13
- "socket.io": "Socket.IO", ws: "WebSocket",
14
- tailwindcss: "Tailwind CSS", "@xyflow/react": "React Flow",
15
- commander: "Commander.js", yargs: "Yargs",
16
- vite: "Vite", webpack: "Webpack", esbuild: "esbuild",
17
- typescript: "TypeScript",
18
- kafkajs: "Kafka", amqplib: "RabbitMQ", bullmq: "BullMQ", bull: "Bull",
19
- stripe: "Stripe", "@stripe/stripe-js": "Stripe",
20
- playwright: "Playwright", "@playwright/test": "Playwright",
21
- jest: "Jest", vitest: "Vitest", mocha: "Mocha",
22
- flutter: "Flutter",
23
- };
24
- // Conventional directory → type mapping (hint-based, not exclusive)
25
- const DIR_TYPE_HINTS = {
26
- ui: { type: "frontend", label: "UI" },
27
- web: { type: "frontend", label: "Web App" },
28
- frontend: { type: "frontend", label: "Frontend" },
29
- client: { type: "frontend", label: "Client" },
30
- app: { type: "frontend", label: "App" },
31
- server: { type: "api", label: "Server" },
32
- api: { type: "api", label: "API" },
33
- backend: { type: "api", label: "Backend" },
34
- gateway: { type: "api", label: "Gateway" },
35
- cli: { type: "service", label: "CLI" },
36
- agents: { type: "library", label: "Agents" },
37
- lib: { type: "library", label: "Library" },
38
- packages: { type: "library", label: "Packages" },
39
- shared: { type: "library", label: "Shared" },
40
- common: { type: "library", label: "Common" },
41
- cloud: { type: "service", label: "Cloud" },
42
- infra: { type: "service", label: "Infrastructure" },
43
- deploy: { type: "service", label: "Deployment" },
44
- deployment: { type: "service", label: "Deployment" },
45
- workers: { type: "worker", label: "Workers" },
46
- jobs: { type: "worker", label: "Jobs" },
47
- scripts: { type: "service", label: "Scripts" },
48
- e2e: { type: "service", label: "E2E Tests" },
49
- tests: { type: "service", label: "Tests" },
50
- tools: { type: "service", label: "Tools" },
51
- homepage: { type: "frontend", label: "Homepage" },
52
- docs: { type: "service", label: "Docs" },
53
- };
54
- // Build config files that indicate a directory is a standalone component
55
- const BUILD_CONFIG_FILES = [
56
- "package.json",
57
- "Cargo.toml",
58
- "go.mod",
59
- "pyproject.toml",
60
- "requirements.txt",
61
- "setup.py",
62
- "pubspec.yaml",
63
- "build.gradle",
64
- "pom.xml",
65
- "Gemfile",
66
- "Makefile",
67
- "Dockerfile",
68
- "wrangler.toml",
69
- "tsconfig.json",
70
- ];
4
+ import { categorizeDep, categorizeAllDeps, ROLE_TO_TYPE, MANIFEST_FILES } from "./taxonomy.js";
71
5
  // Skip these dirs — not components
72
6
  import { EXCLUDED_DIRS as SKIP_DIRS } from "./excluded-dirs.js";
7
+ // Structural inference patterns — framework-agnostic grep queries
8
+ const STRUCTURAL_PROBES = [
9
+ // Frontend: renders UI to DOM / uses JSX components
10
+ { grep: "createRoot|ReactDOM|hydrateRoot|createApp.*mount|new Vue|bootstrapApplication", type: "frontend", label: "UI renderer" },
11
+ // API: defines HTTP route handlers
12
+ { grep: "app\\.(get|post|put|delete|use)\\(|router\\.|@(Get|Post|Put|Delete|Controller)|handle.*Request|Hono\\(", type: "api", label: "HTTP routes" },
13
+ // Worker: consumes from queue / processes jobs
14
+ { grep: "\\.process\\(|\\.consume\\(|new Worker\\(|@Processor|celery\\.task", type: "worker", label: "Job processor" },
15
+ // CLI: parses command-line args
16
+ { grep: "\\.command\\(|\\.parse\\(process\\.argv|argparse|@Command\\(", type: "service", label: "CLI" },
17
+ // Server: creates a network listener
18
+ { grep: "createServer\\(|listen\\(\\d|serve\\(|app\\.listen", type: "api", label: "Server" },
19
+ ];
73
20
  export async function detectComponents(tk, structure) {
74
21
  // Strategy 1: Monorepo workspaces
75
22
  if (structure.isMonorepo) {
@@ -157,8 +104,8 @@ async function resolveWorkspacePatterns(tk) {
157
104
  }
158
105
  /**
159
106
  * Scan every top-level directory. If it has a build config, README, Dockerfile,
160
- * or code files, treat it as a component. Use dir name hints + config file analysis
161
- * to determine type.
107
+ * or code files, treat it as a component. Use taxonomy + grep-based structural
108
+ * inference to determine type.
162
109
  */
163
110
  async function detectAllComponents(tk, structure) {
164
111
  const components = [];
@@ -171,7 +118,7 @@ async function detectAllComponents(tk, structure) {
171
118
  continue;
172
119
  const fileNames = entries.map((e) => e.name);
173
120
  // Check for build config files
174
- const hasBuildConfig = BUILD_CONFIG_FILES.some((f) => fileNames.includes(f));
121
+ const hasBuildConfig = MANIFEST_FILES.some((f) => fileNames.includes(f));
175
122
  const hasCode = entries.some((e) => e.type === "file" &&
176
123
  /\.(ts|js|tsx|jsx|py|go|rs|java|rb|dart|kt|swift|c|cpp|cs)$/.test(e.name));
177
124
  const hasSrcDir = entries.some((e) => e.type === "directory" && (e.name === "src" || e.name === "lib" || e.name === "app"));
@@ -181,10 +128,12 @@ async function detectAllComponents(tk, structure) {
181
128
  continue;
182
129
  // Detect language + type + tech stack from config files
183
130
  const detected = await detectFromBuildConfigs(tk, dir, fileNames);
184
- const hint = DIR_TYPE_HINTS[dir.toLowerCase()];
185
- const type = detected.type ?? hint?.type ?? "service";
186
- const label = hint?.label ?? capitalize(dir);
187
- const name = label;
131
+ // If no type from deps, try grep-based structural inference
132
+ if (!detected.type) {
133
+ detected.type = await inferTypeFromCode(tk, dir);
134
+ }
135
+ const type = detected.type ?? "service";
136
+ const name = capitalize(dir);
188
137
  // Read description from sub-README if available
189
138
  let description = detected.description;
190
139
  if (!description && hasReadme) {
@@ -205,6 +154,21 @@ async function detectAllComponents(tk, structure) {
205
154
  }
206
155
  return components;
207
156
  }
157
+ /**
158
+ * Infer component type by grepping actual code in the directory.
159
+ * Runs all probes in parallel — first match with hits wins.
160
+ */
161
+ async function inferTypeFromCode(tk, dir) {
162
+ const results = await Promise.all(STRUCTURAL_PROBES.map(async (probe) => ({
163
+ ...probe,
164
+ hits: await tk.grepFiles(probe.grep, dir),
165
+ })));
166
+ for (const r of results) {
167
+ if (r.hits.length > 0)
168
+ return r.type;
169
+ }
170
+ return null;
171
+ }
208
172
  async function detectFromBuildConfigs(tk, dir, fileNames) {
209
173
  const info = {
210
174
  type: null,
@@ -335,57 +299,57 @@ async function detectFromBuildConfigs(tk, dir, fileNames) {
335
299
  info.technologies = [...new Set(info.technologies)].slice(0, 5);
336
300
  return info;
337
301
  }
338
- function buildSingleAppComponent(structure) {
339
- return {
340
- id: slugify(structure.projectName) ?? "app",
341
- name: structure.projectName || "Application",
342
- type: structure.framework
343
- ? (["React", "Vue", "Svelte", "Angular", "Next.js", "Nuxt"].includes(structure.framework)
344
- ? "frontend"
345
- : "api")
346
- : "service",
347
- layer: "application",
348
- path: ".",
349
- description: "",
350
- technologies: [structure.language, structure.framework].filter(Boolean),
351
- };
352
- }
302
+ /** Detect component type from package.json dependencies using taxonomy */
353
303
  function detectTypeFromDeps(pkg) {
354
304
  const deps = Object.keys({
355
305
  ...pkg.dependencies,
356
306
  ...pkg.devDependencies,
357
307
  });
358
- // Frontend indicators
359
- if (deps.some((d) => ["react", "react-dom", "vue", "svelte", "@angular/core", "next", "nuxt"].includes(d))) {
360
- return "frontend";
361
- }
362
- // API indicators
363
- if (deps.some((d) => ["express", "fastify", "@nestjs/core", "hono", "koa"].includes(d))) {
364
- return "api";
365
- }
366
- // Worker indicators
367
- if (deps.some((d) => ["bullmq", "bull", "bee-queue"].includes(d))) {
368
- return "worker";
369
- }
370
- // Test indicators
371
- if (deps.some((d) => ["@playwright/test", "playwright", "cypress"].includes(d))) {
372
- return "service";
308
+ // Priority: meta-framework > ui-framework > http-framework > queue > test
309
+ const priorityCategories = [
310
+ "meta-framework", "ui-framework", "http-framework", "queue-client", "test-framework",
311
+ ];
312
+ for (const targetCat of priorityCategories) {
313
+ for (const dep of deps) {
314
+ const cat = categorizeDep(dep);
315
+ if (cat?.category === targetCat) {
316
+ return ROLE_TO_TYPE[cat.role] ?? "service";
317
+ }
318
+ }
373
319
  }
374
320
  return "service";
375
321
  }
322
+ /** Extract display-worthy tech stack names from deps using taxonomy */
376
323
  function extractTechStack(pkg) {
377
324
  const deps = Object.keys({
378
325
  ...pkg.dependencies,
379
326
  ...pkg.devDependencies,
380
327
  });
328
+ const categorized = categorizeAllDeps(deps);
329
+ const seen = new Set();
381
330
  const techs = [];
382
- for (const dep of deps) {
383
- if (TECH_MAP[dep] && !techs.includes(TECH_MAP[dep])) {
384
- techs.push(TECH_MAP[dep]);
331
+ for (const cat of categorized.values()) {
332
+ if (!seen.has(cat.displayName)) {
333
+ seen.add(cat.displayName);
334
+ techs.push(cat.displayName);
385
335
  }
386
336
  }
387
337
  return techs.slice(0, 5);
388
338
  }
339
+ function buildSingleAppComponent(structure) {
340
+ const type = structure.framework
341
+ ? (categorizeDep(structure.framework.toLowerCase())?.role === "frontend" ? "frontend" : "api")
342
+ : "service";
343
+ return {
344
+ id: slugify(structure.projectName) ?? "app",
345
+ name: structure.projectName || "Application",
346
+ type,
347
+ layer: "application",
348
+ path: ".",
349
+ description: "",
350
+ technologies: [structure.language, structure.framework].filter(Boolean),
351
+ };
352
+ }
389
353
  function capitalize(s) {
390
354
  return s.charAt(0).toUpperCase() + s.slice(1);
391
355
  }
@@ -2,6 +2,7 @@
2
2
  // Maps connections between components via imports, Docker, K8s, env vars, known SDKs
3
3
  // Only deterministic matches — ambiguous mappings are left as gaps for the LLM
4
4
  import { slugify } from "./utils.js";
5
+ import { categorizeDep } from "./taxonomy.js";
5
6
  export async function mapConnections(tk, components, infra, events, importMap) {
6
7
  const connections = [];
7
8
  const componentIds = new Set(components.map((c) => c.id));
@@ -194,34 +195,32 @@ async function detectServerServesUI(tk, components, connections) {
194
195
  }
195
196
  }
196
197
  async function detectKnownSDKConnections(components, connections) {
197
- const externalTechs = {
198
- Stripe: "stripe-api",
199
- Firebase: "firebase",
200
- Supabase: "supabase",
201
- Redis: "redis",
202
- Kafka: "kafka",
203
- RabbitMQ: "rabbitmq",
204
- "Socket.IO": "websocket",
205
- PostgreSQL: "database",
206
- MySQL: "database",
207
- MongoDB: "database",
198
+ // Role connection type mapping
199
+ const roleToConnType = {
200
+ data: "database",
201
+ messaging: "queue",
202
+ external: "http",
208
203
  };
209
204
  for (const comp of components) {
210
205
  for (const tech of comp.technologies) {
211
- const externalId = externalTechs[tech];
212
- if (externalId) {
213
- const target = components.find((c) => c.id !== comp.id && (c.technologies.includes(tech) ||
214
- c.type === "database" && (externalId === "database")));
215
- if (target) {
216
- connections.push({
217
- from: comp.id,
218
- to: target.id,
219
- type: externalId === "database" ? "database" : "http",
220
- description: `${comp.name} uses ${tech}`,
221
- confidence: 95,
222
- async: false,
223
- });
224
- }
206
+ // Use taxonomy to determine the role/connection type of this tech
207
+ const cat = categorizeDep(tech.toLowerCase());
208
+ if (!cat)
209
+ continue;
210
+ const connType = roleToConnType[cat.role];
211
+ if (!connType)
212
+ continue;
213
+ const target = components.find((c) => c.id !== comp.id && (c.technologies.includes(tech) ||
214
+ (c.type === "database" && connType === "database")));
215
+ if (target) {
216
+ connections.push({
217
+ from: comp.id,
218
+ to: target.id,
219
+ type: connType,
220
+ description: `${comp.name} uses ${tech}`,
221
+ confidence: 95,
222
+ async: false,
223
+ });
225
224
  }
226
225
  }
227
226
  }
@@ -0,0 +1,72 @@
1
+ import type { TreeEntry } from "./types.js";
2
+ export interface DeepDrillResult {
3
+ componentId: string;
4
+ componentName: string;
5
+ componentPath: string;
6
+ scannedAt: string;
7
+ durationMs: number;
8
+ fileTree: TreeEntry[];
9
+ metrics: {
10
+ fileCount: number;
11
+ codeFileCount: number;
12
+ totalLines: number;
13
+ languages: Array<{
14
+ language: string;
15
+ files: number;
16
+ }>;
17
+ };
18
+ structure: {
19
+ entryPoints: string[];
20
+ keyFiles: Array<{
21
+ path: string;
22
+ role: string;
23
+ }>;
24
+ publicExports: string[];
25
+ directories: string[];
26
+ };
27
+ dependencies: {
28
+ manifest: string | null;
29
+ production: Array<{
30
+ name: string;
31
+ category: string | null;
32
+ displayName: string;
33
+ }>;
34
+ development: Array<{
35
+ name: string;
36
+ category: string | null;
37
+ displayName: string;
38
+ }>;
39
+ };
40
+ imports: {
41
+ internalImports: Array<{
42
+ targetPath: string;
43
+ sourceFiles: string[];
44
+ }>;
45
+ externalPackages: string[];
46
+ };
47
+ patterns: Array<{
48
+ type: string;
49
+ label: string;
50
+ matches: Array<{
51
+ file: string;
52
+ line: string;
53
+ content: string;
54
+ }>;
55
+ count: number;
56
+ }>;
57
+ connections: {
58
+ outgoing: Array<{
59
+ targetId: string;
60
+ targetName: string;
61
+ type: string;
62
+ description: string;
63
+ }>;
64
+ incoming: Array<{
65
+ sourceId: string;
66
+ sourceName: string;
67
+ type: string;
68
+ description: string;
69
+ }>;
70
+ };
71
+ }
72
+ export declare function runDeepDrill(projectRoot: string, componentId: string, componentName: string, componentPath: string): Promise<DeepDrillResult>;