docguard-cli 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/PHILOSOPHY.md +59 -106
  2. package/README.md +23 -1
  3. package/cli/commands/diagnose.mjs +157 -52
  4. package/cli/commands/fix.mjs +113 -1
  5. package/cli/commands/generate.mjs +91 -0
  6. package/cli/commands/hooks.mjs +40 -2
  7. package/cli/commands/score.mjs +22 -0
  8. package/cli/commands/sync.mjs +123 -0
  9. package/cli/docguard.mjs +22 -0
  10. package/cli/scanners/cdk.mjs +10 -0
  11. package/cli/scanners/frontend.mjs +438 -0
  12. package/cli/scanners/iac.mjs +235 -0
  13. package/cli/scanners/integrations.mjs +116 -0
  14. package/cli/scanners/memory-plan.mjs +242 -0
  15. package/cli/scanners/project-type.mjs +310 -0
  16. package/cli/scanners/routes.mjs +149 -0
  17. package/cli/scanners/schemas.mjs +174 -1
  18. package/cli/shared-ignore.mjs +29 -2
  19. package/cli/shared-source.mjs +2 -1
  20. package/cli/validators/api-surface.mjs +112 -37
  21. package/cli/validators/changelog.mjs +3 -2
  22. package/cli/validators/docs-coverage.mjs +125 -6
  23. package/cli/validators/docs-sync.mjs +49 -8
  24. package/cli/validators/metadata-sync.mjs +6 -1
  25. package/cli/validators/metrics-consistency.mjs +5 -2
  26. package/cli/validators/test-spec.mjs +129 -11
  27. package/cli/validators/todo-tracking.mjs +55 -2
  28. package/cli/writers/api-reference.mjs +101 -0
  29. package/cli/writers/mechanical.mjs +116 -0
  30. package/cli/writers/sections.mjs +148 -0
  31. package/commands/docguard.fix.md +19 -3
  32. package/docs/doc-sections.md +37 -0
  33. package/extensions/spec-kit-docguard/README.md +7 -4
  34. package/extensions/spec-kit-docguard/commands/fix.md +74 -0
  35. package/extensions/spec-kit-docguard/commands/generate.md +25 -2
  36. package/extensions/spec-kit-docguard/commands/sync.md +62 -0
  37. package/extensions/spec-kit-docguard/extension.yml +1 -1
  38. package/extensions/spec-kit-docguard/skills/docguard-fix/SKILL.md +13 -3
  39. package/extensions/spec-kit-docguard/skills/docguard-guard/SKILL.md +2 -2
  40. package/extensions/spec-kit-docguard/skills/docguard-review/SKILL.md +2 -2
  41. package/extensions/spec-kit-docguard/skills/docguard-score/SKILL.md +2 -2
  42. package/extensions/spec-kit-docguard/skills/docguard-sync/SKILL.md +111 -0
  43. package/package.json +1 -1
  44. package/templates/ARCHITECTURE.md.template +52 -0
@@ -0,0 +1,116 @@
1
+ /**
2
+ * External Integrations Scanner — what third-party services does this project talk to?
3
+ *
4
+ * Recognizes common SDKs/clients across all detected ecosystems (JS/TS, Python,
5
+ * Rust, Go, Java, Ruby, PHP, .NET) by name-matching dependencies against a
6
+ * curated registry. Output is the project's external-system surface — the kind
7
+ * of "this integrates with AWS S3, Stripe, OpenAI, Sentry" facts the AI agent
8
+ * uses to write INTEGRATIONS.md.
9
+ *
10
+ * Deterministic facts; the agent narrates. Zero NPM dependencies.
11
+ */
12
+
13
+ import { detectEcosystems } from './project-type.mjs';
14
+
15
+ /**
16
+ * Registry of integrations. Each entry: a category label + name patterns to
17
+ * match against dependency keys (substring, lowercased). Add new SDKs here.
18
+ */
19
+ const REGISTRY = [
20
+ // ── Cloud ──
21
+ { name: 'AWS', category: 'Cloud', patterns: ['@aws-sdk/', 'aws-sdk', 'boto3', 'aws-sdk-go', 'aws-sdk-rust', 'aws.sdk', 'amazon-aws'] },
22
+ { name: 'Google Cloud', category: 'Cloud', patterns: ['@google-cloud/', 'google-cloud-', 'gcloud'] },
23
+ { name: 'Azure', category: 'Cloud', patterns: ['@azure/', 'azure-', 'azure.sdk'] },
24
+ { name: 'Cloudflare', category: 'Cloud', patterns: ['cloudflare', 'wrangler', '@cloudflare/'] },
25
+ { name: 'Vercel', category: 'Cloud', patterns: ['@vercel/', 'vercel/'] },
26
+ // ── Databases / storage ──
27
+ { name: 'PostgreSQL', category: 'Database', patterns: ['pg', '@neondatabase/serverless', 'postgres', 'psycopg', 'sqlx', 'lib/pq'] },
28
+ { name: 'MySQL', category: 'Database', patterns: ['mysql2', 'mysql-connector', 'pymysql'] },
29
+ { name: 'MongoDB', category: 'Database', patterns: ['mongoose', 'mongodb', 'pymongo'] },
30
+ { name: 'DynamoDB', category: 'Database', patterns: ['@aws-sdk/client-dynamodb', 'aws-sdk/dynamodb', 'boto3.dynamodb'] },
31
+ { name: 'Redis', category: 'Database', patterns: ['redis', 'ioredis', 'redis-rs', 'go-redis'] },
32
+ { name: 'Supabase', category: 'Database', patterns: ['@supabase/', 'supabase-py', 'supabase-go'] },
33
+ { name: 'Firebase', category: 'Database', patterns: ['firebase', '@firebase/', 'firebase-admin', 'pyrebase'] },
34
+ // ── Payments ──
35
+ { name: 'Stripe', category: 'Payments', patterns: ['stripe', '@stripe/', 'stripe-go', 'stripe-java'] },
36
+ { name: 'Braintree', category: 'Payments', patterns: ['braintree'] },
37
+ { name: 'PayPal', category: 'Payments', patterns: ['@paypal/', 'paypal-checkout'] },
38
+ // ── Auth ──
39
+ { name: 'Auth0', category: 'Auth', patterns: ['@auth0/', 'auth0-'] },
40
+ { name: 'Clerk', category: 'Auth', patterns: ['@clerk/'] },
41
+ { name: 'NextAuth', category: 'Auth', patterns: ['next-auth', '@auth/'] },
42
+ { name: 'Passport', category: 'Auth', patterns: ['passport', 'passport-'] },
43
+ { name: 'Cognito', category: 'Auth', patterns: ['@aws-sdk/client-cognito-identity', 'amazon-cognito-identity-js', 'aws-amplify'] },
44
+ // ── AI ──
45
+ { name: 'OpenAI', category: 'AI', patterns: ['openai'] },
46
+ { name: 'Anthropic', category: 'AI', patterns: ['@anthropic-ai/sdk', 'anthropic'] },
47
+ { name: 'LangChain', category: 'AI', patterns: ['langchain', '@langchain/'] },
48
+ { name: 'Hugging Face', category: 'AI', patterns: ['huggingface', '@huggingface/', 'transformers'] },
49
+ // ── Messaging / email ──
50
+ { name: 'Twilio', category: 'Messaging', patterns: ['twilio'] },
51
+ { name: 'SendGrid', category: 'Messaging', patterns: ['@sendgrid/', 'sendgrid'] },
52
+ { name: 'Mailgun', category: 'Messaging', patterns: ['mailgun', 'mailgun.js'] },
53
+ { name: 'Resend', category: 'Messaging', patterns: ['resend'] },
54
+ { name: 'Slack', category: 'Messaging', patterns: ['@slack/', 'slack-sdk', 'slack_sdk'] },
55
+ { name: 'Bird (MessageBird)', category: 'Messaging', patterns: ['messagebird', 'bird-sdk', '@birdapp/'] },
56
+ // ── Observability ──
57
+ { name: 'Sentry', category: 'Observability', patterns: ['@sentry/', 'sentry-sdk', 'sentry-go'] },
58
+ { name: 'Datadog', category: 'Observability', patterns: ['dd-trace', '@datadog/', 'datadog'] },
59
+ { name: 'OpenTelemetry', category: 'Observability', patterns: ['@opentelemetry/', 'opentelemetry-'] },
60
+ { name: 'Pino', category: 'Observability', patterns: ['pino'] },
61
+ // ── Search ──
62
+ { name: 'Algolia', category: 'Search', patterns: ['algoliasearch', '@algolia/', 'algolia'] },
63
+ { name: 'Elasticsearch', category: 'Search', patterns: ['@elastic/elasticsearch', 'elasticsearch'] },
64
+ { name: 'Meilisearch',category: 'Search', patterns: ['meilisearch'] },
65
+ { name: 'Typesense', category: 'Search', patterns: ['typesense'] },
66
+ // ── Queues ──
67
+ { name: 'SQS', category: 'Queue', patterns: ['@aws-sdk/client-sqs'] },
68
+ { name: 'RabbitMQ', category: 'Queue', patterns: ['amqplib', 'pika'] },
69
+ { name: 'Kafka', category: 'Queue', patterns: ['kafkajs', 'sarama', 'confluent-kafka'] },
70
+ // ── Storage ──
71
+ { name: 'S3', category: 'Storage', patterns: ['@aws-sdk/client-s3', 'multer-s3', 'boto3.s3'] },
72
+ ];
73
+
74
+ function depKeys(deps) {
75
+ return Object.keys(deps).map(k => k.toLowerCase());
76
+ }
77
+
78
+ function matches(keys, patterns) {
79
+ const evidence = [];
80
+ for (const p of patterns) {
81
+ const needle = p.toLowerCase();
82
+ for (const k of keys) {
83
+ if (k.includes(needle)) evidence.push(k);
84
+ }
85
+ }
86
+ return evidence;
87
+ }
88
+
89
+ /**
90
+ * Detect external integrations across all ecosystems in the project.
91
+ * @returns {Array<{ name, category, ecosystems: string[], evidence: string[] }>}
92
+ */
93
+ export function detectIntegrations(projectDir, config = {}) {
94
+ const ecosystems = detectEcosystems(projectDir, config);
95
+ const found = new Map(); // name -> { name, category, ecosystems:Set, evidence:Set }
96
+
97
+ for (const eco of ecosystems) {
98
+ const keys = depKeys(eco.deps);
99
+ if (keys.length === 0) continue;
100
+ for (const entry of REGISTRY) {
101
+ const ev = matches(keys, entry.patterns);
102
+ if (ev.length === 0) continue;
103
+ let row = found.get(entry.name);
104
+ if (!row) {
105
+ row = { name: entry.name, category: entry.category, ecosystems: new Set(), evidence: new Set() };
106
+ found.set(entry.name, row);
107
+ }
108
+ row.ecosystems.add(eco.language);
109
+ for (const e of ev) row.evidence.add(e);
110
+ }
111
+ }
112
+
113
+ return [...found.values()]
114
+ .map(r => ({ name: r.name, category: r.category, ecosystems: [...r.ecosystems], evidence: [...r.evidence] }))
115
+ .sort((a, b) => a.category.localeCompare(b.category) || a.name.localeCompare(b.name));
116
+ }
@@ -0,0 +1,242 @@
1
+ /**
2
+ * Memory Plan — the orchestration artifact behind AI-powered Generate.
3
+ *
4
+ * DocGuard's job (per the v2 vision) is to ORCHESTRATE: scan the codebase, build
5
+ * the code-truth skeleton in marked sections, and emit a structured **agent task
6
+ * manifest** telling the AI exactly what prose to write for each section,
7
+ * grounded in scanned facts. The agent then writes the content; DocGuard verifies.
8
+ *
9
+ * This is language-aware: the set of documents and sections depends on the
10
+ * detected project profile (a Rust CLI gets no Screens/API doc; a webapp does).
11
+ *
12
+ * Pure read-only assembly. Zero NPM dependencies.
13
+ */
14
+
15
+ import { detectProjectProfile } from './project-type.mjs';
16
+ import { detectDocTools } from './doc-tools.mjs';
17
+ import { scanRoutesDeep } from './routes.mjs';
18
+ import { scanSchemasDeep } from './schemas.mjs';
19
+ import { scanFrontend } from './frontend.mjs';
20
+ import { grepEnvUsage } from '../shared-source.mjs';
21
+ import { detectIntegrations } from './integrations.mjs';
22
+
23
+ const md = {
24
+ table(headers, rows) {
25
+ const head = `| ${headers.join(' | ')} |`;
26
+ const sep = `| ${headers.map(() => '---').join(' | ')} |`;
27
+ const body = rows.map(r => `| ${r.join(' | ')} |`).join('\n');
28
+ return [head, sep, body].join('\n');
29
+ },
30
+ };
31
+
32
+ /**
33
+ * Build the full memory plan for a project.
34
+ * @returns {{ profile, surface, docs, agentTasks }}
35
+ * docs[].sections[]: { id, source:'code', body } OR { id, source:'human', task, grounding }
36
+ * agentTasks: flattened prose tasks the AI must write.
37
+ */
38
+ export function buildMemoryPlan(projectDir, config = {}) {
39
+ const profile = detectProjectProfile(projectDir, config);
40
+ const primaryFramework = profile.primary?.framework || profile.frameworks[0] || '';
41
+
42
+ // ── Gather the code-truth surface ──
43
+ const docTools = detectDocTools(projectDir);
44
+ const routes = scanRoutesDeep(projectDir, { framework: profile.frameworks.join(' ') }, docTools, { config });
45
+ const schemas = scanSchemasDeep(projectDir, { framework: primaryFramework }, docTools);
46
+ const entities = schemas.entities || [];
47
+ const isWebFrontend = profile.ecosystems.some(e => e.kind === 'webapp');
48
+ const fe = isWebFrontend
49
+ ? scanFrontend(projectDir, config)
50
+ : { screens: [], components: [], stores: [], hooks: [], contexts: [], apiCalls: [],
51
+ i18n: { usedKeys: [], locales: [], missing: [] },
52
+ framework: null, stateLib: null, dataLib: null };
53
+ const envVars = [...grepEnvUsage(projectDir, config)].sort();
54
+ const integrations = detectIntegrations(projectDir, config);
55
+
56
+ const surface = {
57
+ profile,
58
+ endpoints: routes.map(r => ({ method: r.method, path: r.path, auth: !!r.auth })),
59
+ entities: entities.map(e => ({ name: e.name, fields: e.fields || [] })),
60
+ screens: fe.screens,
61
+ components: fe.components,
62
+ envVars,
63
+ integrations,
64
+ stores: fe.stores,
65
+ hooks: fe.hooks,
66
+ contexts: fe.contexts,
67
+ apiCalls: fe.apiCalls,
68
+ i18n: fe.i18n,
69
+ frontend: { framework: fe.framework, stateLib: fe.stateLib, dataLib: fe.dataLib },
70
+ };
71
+
72
+ // ── Compose documents + sections (language/kind-aware) ──
73
+ const docs = [];
74
+ const agentTasks = [];
75
+ const addTask = (doc, sectionId, instruction, grounding) => {
76
+ agentTasks.push({ doc, sectionId, instruction, grounding });
77
+ return { id: sectionId, source: 'human', task: instruction, grounding };
78
+ };
79
+
80
+ // ARCHITECTURE — always.
81
+ {
82
+ const sections = [];
83
+ const stackRows = [
84
+ ...profile.ecosystems.map(e => [e.dir, e.language, e.framework || '—', e.kind]),
85
+ ];
86
+ sections.push({
87
+ id: 'tech-stack',
88
+ source: 'code',
89
+ body: md.table(['Path', 'Language', 'Framework', 'Kind'], stackRows),
90
+ });
91
+ sections.push(addTask('docs-canonical/ARCHITECTURE.md', 'overview',
92
+ 'Write a 2-3 sentence System Overview: what this project does and who uses it.',
93
+ { languages: profile.languages, frameworks: profile.frameworks, kind: profile.kind }));
94
+ sections.push(addTask('docs-canonical/ARCHITECTURE.md', 'components',
95
+ 'Describe the major components/modules and their responsibilities, using the real directories below.',
96
+ { ecosystems: profile.ecosystems.map(e => ({ dir: e.dir, language: e.language, framework: e.framework })) }));
97
+
98
+ // Frontend modules (stores/hooks/contexts) — code-truth section when present.
99
+ const feCounts = surface.stores.length + surface.hooks.length + surface.contexts.length;
100
+ if (feCounts > 0) {
101
+ const feRows = [
102
+ ['Stores', String(surface.stores.length), surface.stores.slice(0, 4).map(s => `\`${s.name}\``).join(', ') || '—'],
103
+ ['Hooks', String(surface.hooks.length), surface.hooks.slice(0, 4).map(s => `\`${s.name}\``).join(', ') || '—'],
104
+ ['Contexts', String(surface.contexts.length), surface.contexts.slice(0, 4).map(s => `\`${s.name}\``).join(', ') || '—'],
105
+ ].filter(r => r[1] !== '0');
106
+ sections.push({
107
+ id: 'frontend-modules',
108
+ source: 'code',
109
+ body: md.table(['Kind', 'Count', 'Examples'], feRows),
110
+ });
111
+ }
112
+ docs.push({ path: 'docs-canonical/ARCHITECTURE.md', sections });
113
+ }
114
+
115
+ // API-REFERENCE — only if there's an API surface.
116
+ if (surface.endpoints.length > 0) {
117
+ const rows = surface.endpoints.map(e => [`\`${e.method}\``, `\`${e.path}\``, e.auth ? '🔒' : '🔓']);
118
+ const sections = [{
119
+ id: 'endpoints',
120
+ source: 'code',
121
+ body: md.table(['Method', 'Path', 'Auth'], rows),
122
+ }];
123
+ sections.push(addTask('docs-canonical/API-REFERENCE.md', 'overview',
124
+ `Write a short intro describing the API (${surface.endpoints.length} endpoints) and its auth model.`,
125
+ { endpointCount: surface.endpoints.length, framework: primaryFramework }));
126
+ docs.push({ path: 'docs-canonical/API-REFERENCE.md', sections });
127
+ }
128
+
129
+ // DATA-MODEL — only if entities detected.
130
+ if (surface.entities.length > 0) {
131
+ const rows = surface.entities.map(e => [`\`${e.name}\``, String((e.fields || []).length)]);
132
+ const sections = [{
133
+ id: 'entities',
134
+ source: 'code',
135
+ body: md.table(['Entity', 'Fields'], rows),
136
+ }];
137
+ sections.push(addTask('docs-canonical/DATA-MODEL.md', 'relationships',
138
+ 'Describe the relationships between the entities below and any key indexes.',
139
+ { entities: surface.entities.map(e => e.name) }));
140
+ docs.push({ path: 'docs-canonical/DATA-MODEL.md', sections });
141
+ }
142
+
143
+ // SCREENS — only for web frontends with screens.
144
+ if (surface.screens.length > 0) {
145
+ const rows = surface.screens.map(s => [`\`${s.path}\``, s.component || '—']);
146
+ const sections = [{
147
+ id: 'screens',
148
+ source: 'code',
149
+ body: md.table(['Route', 'Screen'], rows),
150
+ }];
151
+ sections.push(addTask('docs-canonical/SCREENS.md', 'flows',
152
+ `Group the ${surface.screens.length} screens into features/user-flows and describe each flow.`,
153
+ { screens: surface.screens.map(s => s.path), components: surface.components.length }));
154
+ docs.push({ path: 'docs-canonical/SCREENS.md', sections });
155
+ }
156
+
157
+ // INTEGRATIONS — external services / SDKs detected from deps.
158
+ if (surface.integrations.length > 0) {
159
+ const rows = surface.integrations.map(i => [i.category, `**${i.name}**`, i.evidence.slice(0, 3).join(', ')]);
160
+ const sections = [{
161
+ id: 'integrations',
162
+ source: 'code',
163
+ body: md.table(['Category', 'Service', 'Evidence (SDK)'], rows),
164
+ }];
165
+ sections.push(addTask('docs-canonical/INTEGRATIONS.md', 'overview',
166
+ `Describe each detected integration: what role it plays in this system, which module(s) use it, and any operational notes (auth, credentials, regions).`,
167
+ { integrations: surface.integrations.map(i => ({ name: i.name, category: i.category })) }));
168
+ docs.push({ path: 'docs-canonical/INTEGRATIONS.md', sections });
169
+ }
170
+
171
+ // FEATURES — derived from screens + endpoints when there's a UI surface.
172
+ if (surface.screens.length > 0) {
173
+ const groups = {};
174
+ for (const s of surface.screens) {
175
+ const seg = (s.path.split('/').filter(Boolean)[0] || 'root');
176
+ (groups[seg] ??= []).push(s);
177
+ }
178
+ const rows = Object.entries(groups)
179
+ .sort(([a], [b]) => a.localeCompare(b))
180
+ .map(([area, list]) => [`/${area === 'root' ? '' : area}`, String(list.length), list.slice(0, 4).map(s => s.component || s.path).join(', ')]);
181
+ const sections = [{
182
+ id: 'feature-areas',
183
+ source: 'code',
184
+ body: md.table(['Area', 'Screens', 'Examples'], rows),
185
+ }];
186
+ sections.push(addTask('docs-canonical/FEATURES.md', 'features',
187
+ `Turn the candidate feature areas below into a clear feature inventory. For each area: what user job it serves, which screens belong to it, which endpoints back it (use the apiCalls map below as evidence), and the success criteria.`,
188
+ {
189
+ areas: Object.keys(groups),
190
+ screenCount: surface.screens.length,
191
+ endpointCount: surface.endpoints.length,
192
+ apiCalls: surface.apiCalls.slice(0, 30).map(c => ({ method: c.method, path: c.path })),
193
+ storeCount: surface.stores.length,
194
+ }));
195
+ docs.push({ path: 'docs-canonical/FEATURES.md', sections });
196
+ }
197
+
198
+ // ENVIRONMENT — env vars + setup.
199
+ if (surface.envVars.length > 0) {
200
+ const rows = surface.envVars.map(v => [`\`${v}\``, '<!-- describe -->']);
201
+ const sections = [{
202
+ id: 'env-vars',
203
+ source: 'code',
204
+ body: md.table(['Variable', 'Description'], rows),
205
+ }];
206
+ sections.push(addTask('docs-canonical/ENVIRONMENT.md', 'setup',
207
+ 'Write the Prerequisites and Setup Steps (clone → install → run) for this stack.',
208
+ { languages: profile.languages, frameworks: profile.frameworks }));
209
+ docs.push({ path: 'docs-canonical/ENVIRONMENT.md', sections });
210
+ }
211
+
212
+ // ── docs-implementation/ — tribal knowledge the AGENT writes (no code section) ──
213
+ // These can't be derived from code; they capture lessons learned, current
214
+ // state, and operational procedures. DocGuard emits guided prompts; the
215
+ // agent reads git history / chat / human notes and writes them.
216
+ docs.push({
217
+ path: 'docs-implementation/KNOWN-GOTCHAS.md',
218
+ sections: [
219
+ addTask('docs-implementation/KNOWN-GOTCHAS.md', 'gotchas',
220
+ 'Document the non-obvious lessons that have bitten this team. For each: symptom → cause → fix. Mine git log (commit messages, revert/hotfix commits), recent PRs, and chat history. Keep entries terse and actionable.',
221
+ { integrations: surface.integrations.map(i => i.name), primary: profile.primary?.framework }),
222
+ ],
223
+ });
224
+ docs.push({
225
+ path: 'docs-implementation/CURRENT-STATE.md',
226
+ sections: [
227
+ addTask('docs-implementation/CURRENT-STATE.md', 'state',
228
+ 'Snapshot of what is shipped vs in-flight vs planned. What is deployed (where, which versions), which features are behind flags, what is known tech debt. Mine CHANGELOG, deploy logs, feature-flag config, GitHub Issues/Projects.',
229
+ { kind: profile.kind, languages: profile.languages }),
230
+ ],
231
+ });
232
+ docs.push({
233
+ path: 'docs-implementation/RUNBOOKS.md',
234
+ sections: [
235
+ addTask('docs-implementation/RUNBOOKS.md', 'runbooks',
236
+ 'Operational procedures for production: deploy, rollback, hot-fix, common incidents, on-call escalation. For each runbook: when to use, exact steps, and the verification check. Mine scripts/, .github/workflows, deploy docs, and chat history.',
237
+ { integrations: surface.integrations.map(i => i.name) }),
238
+ ],
239
+ });
240
+
241
+ return { profile, surface, docs, agentTasks };
242
+ }
@@ -0,0 +1,310 @@
1
+ /**
2
+ * Project-Type Detection — the language-agnostic spine.
3
+ *
4
+ * DocGuard documents ANY project, not just JS/web. This scanner identifies
5
+ * every ecosystem present (polyglot/monorepo-aware) from its manifest files and
6
+ * extracts deterministic facts (language, framework, kind, dependencies, entry
7
+ * points). The AI agent then writes language-appropriate prose grounded in
8
+ * these facts.
9
+ *
10
+ * Supported ecosystems:
11
+ * JS/TS · Python · Rust · Go · Java/Kotlin · Ruby · PHP · C#/.NET
12
+ *
13
+ * Zero NPM dependencies — minimal manifest parsing with Node.js built-ins.
14
+ */
15
+
16
+ import { existsSync, readFileSync, readdirSync, statSync } from 'node:fs';
17
+ import { resolve, join, relative, dirname, basename } from 'node:path';
18
+
19
+ const IGNORE_DIRS = new Set([
20
+ 'node_modules', '.git', '.next', 'dist', 'build', 'coverage', 'target',
21
+ '.cache', '__pycache__', '.venv', 'venv', 'vendor', '.turbo', '.vercel',
22
+ 'bin', 'obj', '.gradle', '.idea', 'cdk.out', '.claude',
23
+ ]);
24
+
25
+ // Manifest filename → ecosystem language.
26
+ const MANIFESTS = [
27
+ { file: 'package.json', lang: 'JavaScript' },
28
+ { file: 'pyproject.toml', lang: 'Python' },
29
+ { file: 'requirements.txt', lang: 'Python' },
30
+ { file: 'setup.py', lang: 'Python' },
31
+ { file: 'Pipfile', lang: 'Python' },
32
+ { file: 'Cargo.toml', lang: 'Rust' },
33
+ { file: 'go.mod', lang: 'Go' },
34
+ { file: 'pom.xml', lang: 'Java' },
35
+ { file: 'build.gradle', lang: 'Java' },
36
+ { file: 'build.gradle.kts', lang: 'Kotlin' },
37
+ { file: 'Gemfile', lang: 'Ruby' },
38
+ { file: 'composer.json', lang: 'PHP' },
39
+ ];
40
+
41
+ function readSafe(p) { try { return readFileSync(p, 'utf-8'); } catch { return ''; } }
42
+ function readJson(p) { try { return JSON.parse(readFileSync(p, 'utf-8')); } catch { return null; } }
43
+
44
+ /** Recursively find manifest files (bounded depth, ignoring vendor dirs). */
45
+ function findManifests(projectDir, maxDepth = 4) {
46
+ const found = []; // { absDir, file, lang }
47
+ const walk = (dir, depth) => {
48
+ if (depth > maxDepth) return;
49
+ let entries;
50
+ try { entries = readdirSync(dir, { withFileTypes: true }); } catch { return; }
51
+ for (const e of entries) {
52
+ if (e.isDirectory()) {
53
+ if (IGNORE_DIRS.has(e.name) || e.name.startsWith('.')) continue;
54
+ walk(join(dir, e.name), depth + 1);
55
+ } else if (e.isFile()) {
56
+ const m = MANIFESTS.find(x => x.file === e.name);
57
+ if (m) found.push({ absDir: dir, file: e.name, lang: m.lang });
58
+ else if (e.name.endsWith('.csproj')) found.push({ absDir: dir, file: e.name, lang: 'C#' });
59
+ }
60
+ }
61
+ };
62
+ walk(resolve(projectDir), 0);
63
+ return found;
64
+ }
65
+
66
+ // ── Dependency extraction per ecosystem ──────────────────────────────────────
67
+
68
+ /** Extract names from a TOML [section] of `name = "ver"` lines (Cargo, etc.). */
69
+ function tomlSectionDeps(content, sections) {
70
+ const deps = {};
71
+ const lines = content.split('\n');
72
+ let active = false;
73
+ for (const raw of lines) {
74
+ const line = raw.trim();
75
+ const sec = line.match(/^\[([^\]]+)\]/);
76
+ if (sec) { active = sections.includes(sec[1]); continue; }
77
+ if (!active || !line || line.startsWith('#')) continue;
78
+ const m = line.match(/^([A-Za-z0-9_.\-]+)\s*=\s*(?:"([^"]*)"|\{[^}]*version\s*=\s*"([^"]*)"|\{)/);
79
+ if (m) deps[m[1]] = m[2] || m[3] || '*';
80
+ }
81
+ return deps;
82
+ }
83
+
84
+ /** pyproject [project] dependencies = ["pkg>=1", ...] and poetry table form. */
85
+ function pyprojectDeps(content) {
86
+ const deps = {};
87
+ // PEP 621 array form
88
+ const arr = content.match(/dependencies\s*=\s*\[([\s\S]*?)\]/);
89
+ if (arr) {
90
+ for (const m of arr[1].matchAll(/["']([A-Za-z0-9_.\-]+)\s*[><=~!\[]?/g)) deps[m[1]] = '*';
91
+ }
92
+ // Poetry table form
93
+ Object.assign(deps, tomlSectionDeps(content, ['tool.poetry.dependencies']));
94
+ return deps;
95
+ }
96
+
97
+ function requirementsDeps(content) {
98
+ const deps = {};
99
+ for (const raw of content.split('\n')) {
100
+ const line = raw.trim();
101
+ if (!line || line.startsWith('#') || line.startsWith('-')) continue;
102
+ const m = line.match(/^([A-Za-z0-9_.\-]+)\s*(?:[><=~!]=?\s*([0-9][\w.\-]*))?/);
103
+ if (m) deps[m[1].toLowerCase()] = m[2] || '*';
104
+ }
105
+ return deps;
106
+ }
107
+
108
+ function goModDeps(content) {
109
+ const deps = {};
110
+ // require ( ... ) block + single-line requires
111
+ const block = content.match(/require\s*\(([\s\S]*?)\)/);
112
+ const collect = (text) => {
113
+ for (const m of text.matchAll(/^\s*([\w.\-/]+)\s+v([\w.\-]+)/gm)) deps[m[1]] = 'v' + m[2];
114
+ };
115
+ if (block) collect(block[1]);
116
+ for (const m of content.matchAll(/^require\s+([\w.\-/]+)\s+v([\w.\-]+)/gm)) deps[m[1]] = 'v' + m[2];
117
+ return deps;
118
+ }
119
+
120
+ function gradleDeps(content) {
121
+ const deps = {};
122
+ for (const m of content.matchAll(/(?:implementation|api|compile|testImplementation)\s*[(\s]['"]([^'":]+):([^'":]+)(?::([^'"]+))?['"]/g)) {
123
+ deps[`${m[1]}:${m[2]}`] = m[3] || '*';
124
+ }
125
+ return deps;
126
+ }
127
+
128
+ function pomDeps(content) {
129
+ const deps = {};
130
+ for (const m of content.matchAll(/<dependency>[\s\S]*?<groupId>([^<]+)<\/groupId>[\s\S]*?<artifactId>([^<]+)<\/artifactId>/g)) {
131
+ deps[`${m[1].trim()}:${m[2].trim()}`] = '*';
132
+ }
133
+ return deps;
134
+ }
135
+
136
+ function gemfileDeps(content) {
137
+ const deps = {};
138
+ for (const m of content.matchAll(/^\s*gem\s+['"]([^'"]+)['"]/gm)) deps[m[1]] = '*';
139
+ return deps;
140
+ }
141
+
142
+ function csprojDeps(content) {
143
+ const deps = {};
144
+ for (const m of content.matchAll(/<PackageReference\s+Include="([^"]+)"(?:\s+Version="([^"]+)")?/g)) {
145
+ deps[m[1]] = m[2] || '*';
146
+ }
147
+ return deps;
148
+ }
149
+
150
+ // ── Framework + kind classification per ecosystem ────────────────────────────
151
+
152
+ function has(deps, ...names) {
153
+ const keys = Object.keys(deps).map(k => k.toLowerCase());
154
+ return names.some(n => keys.some(k => k === n.toLowerCase() || k.endsWith('/' + n.toLowerCase()) || k.endsWith(':' + n.toLowerCase())));
155
+ }
156
+
157
+ function classify(lang, dir, deps) {
158
+ let framework = null;
159
+ let kind = 'library';
160
+
161
+ if (lang === 'JavaScript' || lang === 'TypeScript') {
162
+ if (has(deps, 'next')) { framework = 'Next.js'; kind = 'webapp'; }
163
+ else if (has(deps, 'react', 'vue', '@angular/core', 'svelte', '@sveltejs/kit', 'nuxt')) { framework = has(deps,'react')?'React':has(deps,'vue')?'Vue':'Frontend'; kind = 'webapp'; }
164
+ else if (has(deps, 'express', 'fastify', 'hono', 'koa', '@nestjs/core')) { framework = has(deps,'express')?'Express':has(deps,'fastify')?'Fastify':has(deps,'@nestjs/core')?'NestJS':'Hono'; kind = 'api'; }
165
+ } else if (lang === 'Python') {
166
+ if (has(deps, 'django') || existsSync(join(dir, 'manage.py'))) { framework = 'Django'; kind = 'webapp'; }
167
+ else if (has(deps, 'fastapi')) { framework = 'FastAPI'; kind = 'api'; }
168
+ else if (has(deps, 'flask')) { framework = 'Flask'; kind = 'api'; }
169
+ else if (has(deps, 'starlette')) { framework = 'Starlette'; kind = 'api'; }
170
+ else if (has(deps, 'click', 'typer')) { framework = has(deps,'typer')?'Typer':'Click'; kind = 'cli'; }
171
+ } else if (lang === 'Rust') {
172
+ if (has(deps, 'actix-web')) { framework = 'Actix Web'; kind = 'service'; }
173
+ else if (has(deps, 'axum')) { framework = 'Axum'; kind = 'service'; }
174
+ else if (has(deps, 'rocket')) { framework = 'Rocket'; kind = 'service'; }
175
+ else if (has(deps, 'warp', 'tide')) { framework = has(deps,'warp')?'Warp':'Tide'; kind = 'service'; }
176
+ else if (has(deps, 'clap', 'structopt')) { framework = 'Clap'; kind = 'cli'; }
177
+ if (existsSync(join(dir, 'src/main.rs')) && kind === 'library') kind = 'cli';
178
+ else if (existsSync(join(dir, 'src/lib.rs')) && !framework) kind = 'library';
179
+ } else if (lang === 'Go') {
180
+ if (has(deps, 'gin', 'gin-gonic/gin')) { framework = 'Gin'; kind = 'service'; }
181
+ else if (has(deps, 'echo', 'labstack/echo')) { framework = 'Echo'; kind = 'service'; }
182
+ else if (has(deps, 'chi', 'go-chi/chi')) { framework = 'Chi'; kind = 'service'; }
183
+ else if (has(deps, 'fiber', 'gofiber/fiber')) { framework = 'Fiber'; kind = 'service'; }
184
+ else if (existsSync(join(dir, 'main.go')) || existsSync(join(dir, 'cmd'))) kind = 'service';
185
+ } else if (lang === 'Java' || lang === 'Kotlin') {
186
+ if (has(deps, 'spring-boot-starter-web', 'spring-boot-starter', 'org.springframework.boot:spring-boot-starter-web')) { framework = 'Spring Boot'; kind = 'api'; }
187
+ } else if (lang === 'Ruby') {
188
+ if (has(deps, 'rails')) { framework = 'Rails'; kind = 'webapp'; }
189
+ else if (has(deps, 'sinatra')) { framework = 'Sinatra'; kind = 'api'; }
190
+ } else if (lang === 'PHP') {
191
+ if (has(deps, 'laravel/framework')) { framework = 'Laravel'; kind = 'webapp'; }
192
+ else if (has(deps, 'symfony/framework-bundle')) { framework = 'Symfony'; kind = 'webapp'; }
193
+ } else if (lang === 'C#') {
194
+ if (has(deps, 'Microsoft.AspNetCore.App') || /Sdk="Microsoft\.NET\.Sdk\.Web"/.test('')) { framework = 'ASP.NET Core'; kind = 'api'; }
195
+ }
196
+
197
+ return { framework, kind };
198
+ }
199
+
200
+ // ── Per-manifest ecosystem builder ───────────────────────────────────────────
201
+
202
+ function buildEcosystem(projectDir, m) {
203
+ const path = join(m.absDir, m.file);
204
+ const content = readSafe(path);
205
+ let deps = {};
206
+ let lang = m.lang;
207
+ let kind = null;
208
+ let entryPoints = [];
209
+
210
+ if (m.file === 'package.json') {
211
+ const pkg = readJson(path) || {};
212
+ deps = { ...(pkg.dependencies || {}), ...(pkg.devDependencies || {}) };
213
+ if (deps.typescript || existsSync(join(m.absDir, 'tsconfig.json'))) lang = 'TypeScript';
214
+ if (pkg.bin) { kind = 'cli'; entryPoints = typeof pkg.bin === 'string' ? [pkg.bin] : Object.values(pkg.bin); }
215
+ else if (pkg.main || pkg.module || pkg.exports) entryPoints = [pkg.main || pkg.module].filter(Boolean);
216
+ } else if (m.file === 'pyproject.toml') {
217
+ deps = pyprojectDeps(content);
218
+ } else if (m.file === 'requirements.txt') {
219
+ deps = requirementsDeps(content);
220
+ } else if (m.file === 'Pipfile') {
221
+ deps = tomlSectionDeps(content, ['packages']);
222
+ } else if (m.file === 'setup.py') {
223
+ for (const mm of content.matchAll(/['"]([A-Za-z0-9_.\-]+)(?:[><=~!]=?[\w.\-]+)?['"]/g)) deps[mm[1].toLowerCase()] = '*';
224
+ } else if (m.file === 'Cargo.toml') {
225
+ deps = tomlSectionDeps(content, ['dependencies']);
226
+ } else if (m.file === 'go.mod') {
227
+ deps = goModDeps(content);
228
+ } else if (m.file === 'pom.xml') {
229
+ deps = pomDeps(content);
230
+ } else if (m.file === 'build.gradle' || m.file === 'build.gradle.kts') {
231
+ deps = gradleDeps(content);
232
+ } else if (m.file === 'Gemfile') {
233
+ deps = gemfileDeps(content);
234
+ } else if (m.file === 'composer.json') {
235
+ const c = readJson(path) || {};
236
+ deps = { ...(c.require || {}), ...(c['require-dev'] || {}) };
237
+ } else if (m.file.endsWith('.csproj')) {
238
+ deps = csprojDeps(content);
239
+ }
240
+
241
+ const cls = classify(lang, m.absDir, deps);
242
+ return {
243
+ language: lang,
244
+ manifest: relative(resolve(projectDir), path) || m.file,
245
+ dir: relative(resolve(projectDir), m.absDir) || '.',
246
+ framework: cls.framework,
247
+ kind: kind || cls.kind || 'library',
248
+ deps,
249
+ entryPoints,
250
+ };
251
+ }
252
+
253
+ /**
254
+ * Detect every ecosystem present in the repo (polyglot-aware).
255
+ * Multiple manifests in the same dir+language merge into one ecosystem.
256
+ * @returns {Array<{ language, manifest, dir, framework, kind, deps, entryPoints }>}
257
+ */
258
+ export function detectEcosystems(projectDir, _config = {}) {
259
+ const manifests = findManifests(projectDir);
260
+ const byKey = new Map(); // `${dir}::${lang-family}` → ecosystem
261
+
262
+ // Group Python manifests (pyproject/requirements/setup/Pipfile) in same dir.
263
+ const langFamily = (lang) => (lang === 'TypeScript' ? 'JavaScript' : lang);
264
+
265
+ for (const m of manifests) {
266
+ const eco = buildEcosystem(projectDir, m);
267
+ const key = `${eco.dir}::${langFamily(eco.language)}`;
268
+ if (byKey.has(key)) {
269
+ // Merge deps + prefer the richer framework/kind/manifest.
270
+ const cur = byKey.get(key);
271
+ cur.deps = { ...cur.deps, ...eco.deps };
272
+ if (!cur.framework && eco.framework) cur.framework = eco.framework;
273
+ if (cur.kind === 'library' && eco.kind !== 'library') cur.kind = eco.kind;
274
+ if (eco.language === 'TypeScript') cur.language = 'TypeScript';
275
+ // Re-classify with merged deps.
276
+ const cls = classify(cur.language, join(resolve(projectDir), cur.dir === '.' ? '' : cur.dir), cur.deps);
277
+ if (!cur.framework) cur.framework = cls.framework;
278
+ } else {
279
+ byKey.set(key, eco);
280
+ }
281
+ }
282
+
283
+ return [...byKey.values()];
284
+ }
285
+
286
+ /**
287
+ * Top-level project profile.
288
+ * @returns {{ ecosystems, primary, polyglot, languages, frameworks, kind }}
289
+ */
290
+ export function detectProjectProfile(projectDir, config = {}) {
291
+ const ecosystems = detectEcosystems(projectDir, config);
292
+
293
+ // Primary = the root-level ecosystem, else the one with the most deps.
294
+ const rootEco = ecosystems.find(e => e.dir === '.');
295
+ const primary = rootEco
296
+ || [...ecosystems].sort((a, b) => Object.keys(b.deps).length - Object.keys(a.deps).length)[0]
297
+ || null;
298
+
299
+ const languages = [...new Set(ecosystems.map(e => e.language))];
300
+ const frameworks = [...new Set(ecosystems.map(e => e.framework).filter(Boolean))];
301
+
302
+ return {
303
+ ecosystems,
304
+ primary,
305
+ polyglot: languages.length > 1,
306
+ languages,
307
+ frameworks,
308
+ kind: primary?.kind || 'unknown',
309
+ };
310
+ }