readme-gen-analyzer 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +4 -0
- package/README.md +77 -0
- package/dist/analyzers/ast-feature.detector.d.ts +10 -0
- package/dist/analyzers/ast-feature.detector.js +151 -0
- package/dist/analyzers/definition.extractor.d.ts +9 -0
- package/dist/analyzers/definition.extractor.js +141 -0
- package/dist/analyzers/dependency.analyzer.d.ts +14 -0
- package/dist/analyzers/dependency.analyzer.js +30 -0
- package/dist/analyzers/devops.analyzer.d.ts +3 -0
- package/dist/analyzers/devops.analyzer.js +42 -0
- package/dist/analyzers/env.extractor.d.ts +7 -0
- package/dist/analyzers/env.extractor.js +46 -0
- package/dist/analyzers/example.analyzer.d.ts +6 -0
- package/dist/analyzers/example.analyzer.js +84 -0
- package/dist/analyzers/feature.detector.d.ts +4 -0
- package/dist/analyzers/feature.detector.js +68 -0
- package/dist/analyzers/package.parser.d.ts +28 -0
- package/dist/analyzers/package.parser.js +341 -0
- package/dist/analyzers/polyglot.extractors.d.ts +18 -0
- package/dist/analyzers/polyglot.extractors.js +153 -0
- package/dist/analyzers/route.extractor.d.ts +10 -0
- package/dist/analyzers/route.extractor.js +41 -0
- package/dist/analyzers/schema.analyzer.d.ts +3 -0
- package/dist/analyzers/schema.analyzer.js +48 -0
- package/dist/analyzers/semantic.refiner.d.ts +16 -0
- package/dist/analyzers/semantic.refiner.js +154 -0
- package/dist/analyzers/structure.analyzer.d.ts +18 -0
- package/dist/analyzers/structure.analyzer.js +150 -0
- package/dist/analyzers/trace.analyzer.d.ts +10 -0
- package/dist/analyzers/trace.analyzer.js +75 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.js +44 -0
- package/dist/internal/analysis/chunker.d.ts +25 -0
- package/dist/internal/analysis/chunker.js +78 -0
- package/dist/internal/analysis/evidence.d.ts +17 -0
- package/dist/internal/analysis/evidence.js +130 -0
- package/dist/internal/analysis/techStack.d.ts +6 -0
- package/dist/internal/analysis/techStack.js +67 -0
- package/dist/internal/llm/llmClient.d.ts +69 -0
- package/dist/internal/llm/llmClient.js +204 -0
- package/dist/internal/pipeline/merge.d.ts +14 -0
- package/dist/internal/pipeline/merge.js +53 -0
- package/dist/internal/pipeline/persona.d.ts +7 -0
- package/dist/internal/pipeline/persona.js +28 -0
- package/dist/internal/pipeline/quality.d.ts +9 -0
- package/dist/internal/pipeline/quality.js +52 -0
- package/dist/internal/pipeline/readme.d.ts +3 -0
- package/dist/internal/pipeline/readme.js +80 -0
- package/dist/internal/pipeline/runPipeline.d.ts +57 -0
- package/dist/internal/pipeline/runPipeline.js +101 -0
- package/dist/internal/pipeline/stages.d.ts +5 -0
- package/dist/internal/pipeline/stages.js +85 -0
- package/dist/internal/pipeline/types.d.ts +98 -0
- package/dist/internal/pipeline/types.js +2 -0
- package/dist/types.d.ts +85 -0
- package/dist/types.js +2 -0
- package/dist/utils/scanner.d.ts +14 -0
- package/dist/utils/scanner.js +81 -0
- package/dist/utils/scriptsMarkdown.d.ts +9 -0
- package/dist/utils/scriptsMarkdown.js +131 -0
- package/package.json +19 -0
- package/src/analyzers/ast-feature.detector.ts +173 -0
- package/src/analyzers/definition.extractor.ts +156 -0
- package/src/analyzers/dependency.analyzer.ts +32 -0
- package/src/analyzers/devops.analyzer.ts +44 -0
- package/src/analyzers/env.extractor.ts +58 -0
- package/src/analyzers/example.analyzer.ts +96 -0
- package/src/analyzers/feature.detector.ts +65 -0
- package/src/analyzers/package.parser.ts +364 -0
- package/src/analyzers/polyglot.extractors.ts +169 -0
- package/src/analyzers/route.extractor.ts +54 -0
- package/src/analyzers/schema.analyzer.ts +50 -0
- package/src/analyzers/semantic.refiner.ts +163 -0
- package/src/analyzers/structure.analyzer.ts +156 -0
- package/src/analyzers/trace.analyzer.ts +75 -0
- package/src/index.ts +29 -0
- package/src/internal/analysis/chunker.ts +103 -0
- package/src/internal/analysis/evidence.ts +152 -0
- package/src/internal/analysis/techStack.ts +71 -0
- package/src/internal/llm/llmClient.ts +261 -0
- package/src/internal/pipeline/merge.ts +63 -0
- package/src/internal/pipeline/persona.ts +27 -0
- package/src/internal/pipeline/quality.ts +47 -0
- package/src/internal/pipeline/readme.ts +98 -0
- package/src/internal/pipeline/runPipeline.ts +153 -0
- package/src/internal/pipeline/stages.ts +89 -0
- package/src/internal/pipeline/types.ts +102 -0
- package/src/types.ts +100 -0
- package/src/utils/scanner.ts +48 -0
- package/src/utils/scriptsMarkdown.ts +140 -0
- package/test-local.ts +16 -0
- package/tsconfig.json +16 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { Project, SyntaxKind, CallExpression, PropertyAccessExpression } from 'ts-morph';
|
|
2
|
+
import { PolyglotExtractors } from './polyglot.extractors';
|
|
3
|
+
|
|
4
|
+
export interface Route {
|
|
5
|
+
method: string;
|
|
6
|
+
path: string;
|
|
7
|
+
file: string;
|
|
8
|
+
snippet?: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export class RouteExtractor {
|
|
12
|
+
private static METHODS = ['get', 'post', 'put', 'delete', 'patch', 'use'];
|
|
13
|
+
|
|
14
|
+
public static extract(files: Record<string, string>): Route[] {
|
|
15
|
+
const project = new Project({ useInMemoryFileSystem: true });
|
|
16
|
+
const routes: Route[] = [];
|
|
17
|
+
|
|
18
|
+
for (const [filePath, content] of Object.entries(files)) {
|
|
19
|
+
if (!filePath.endsWith('.ts') && !filePath.endsWith('.js') && !filePath.endsWith('.tsx') && !filePath.endsWith('.jsx')) {
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const sourceFile = project.createSourceFile(filePath, content);
|
|
24
|
+
|
|
25
|
+
// Look for call expressions like app.get('/', ...) or router.post('/', ...)
|
|
26
|
+
const callExpressions = sourceFile.getDescendantsOfKind(SyntaxKind.CallExpression);
|
|
27
|
+
|
|
28
|
+
for (const call of callExpressions) {
|
|
29
|
+
const expression = call.getExpression();
|
|
30
|
+
|
|
31
|
+
if (expression.getKind() === SyntaxKind.PropertyAccessExpression) {
|
|
32
|
+
const pae = expression as PropertyAccessExpression;
|
|
33
|
+
const methodName = pae.getName().toLowerCase();
|
|
34
|
+
|
|
35
|
+
if (this.METHODS.includes(methodName)) {
|
|
36
|
+
const args = call.getArguments();
|
|
37
|
+
if (args.length > 0 && args[0].getKind() === SyntaxKind.StringLiteral) {
|
|
38
|
+
const routePath = args[0].getText().replace(/['"]/g, '');
|
|
39
|
+
|
|
40
|
+
routes.push({
|
|
41
|
+
method: methodName.toUpperCase(),
|
|
42
|
+
path: routePath,
|
|
43
|
+
file: filePath,
|
|
44
|
+
snippet: call.getText().substring(0, 200) // snippet limited to 200 chars
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return [...routes, ...PolyglotExtractors.extractRoutes(files)];
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { Project, SyntaxKind, ObjectLiteralExpression } from 'ts-morph';
|
|
2
|
+
|
|
3
|
+
export class SchemaAnalyzer {
|
|
4
|
+
public static analyze(files: Record<string, string>) {
|
|
5
|
+
const schemas: any[] = [];
|
|
6
|
+
const project = new Project({ useInMemoryFileSystem: true });
|
|
7
|
+
|
|
8
|
+
for (const [filePath, content] of Object.entries(files)) {
|
|
9
|
+
// 1. Prisma
|
|
10
|
+
if (filePath.endsWith('.prisma')) {
|
|
11
|
+
const models = [...content.matchAll(/model\s+([A-Za-z0-9_-]+)\s+\{([\s\S]+?)\}/g)];
|
|
12
|
+
models.forEach(m => {
|
|
13
|
+
const fields = m[2].trim().split('\n')
|
|
14
|
+
.map(l => l.trim())
|
|
15
|
+
.filter(l => l && !l.startsWith('//') && !l.startsWith('@@'))
|
|
16
|
+
.map(l => l.split(/\s+/)[0]);
|
|
17
|
+
schemas.push({ model: m[1], fields, file: filePath });
|
|
18
|
+
});
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// 2. Mongoose (ST)
|
|
23
|
+
if (filePath.match(/\.(ts|js|tsx|jsx)$/)) {
|
|
24
|
+
const sourceFile = project.createSourceFile(filePath, content);
|
|
25
|
+
const newExpressions = sourceFile.getDescendantsOfKind(SyntaxKind.NewExpression);
|
|
26
|
+
|
|
27
|
+
for (const newExpr of newExpressions) {
|
|
28
|
+
if (newExpr.getExpression().getText().includes('Schema')) {
|
|
29
|
+
const args = newExpr.getArguments();
|
|
30
|
+
if (args.length > 0 && Node.isObjectLiteralExpression(args[0])) {
|
|
31
|
+
const fields = (args[0] as ObjectLiteralExpression).getProperties().map(p => p.getText().split(':')[0].trim());
|
|
32
|
+
// Try to find model name from the same file
|
|
33
|
+
const modelName = content.match(/mongoose\.model\(['"]([^'"]+)['"]/)?.[1] || "UnknownModel";
|
|
34
|
+
schemas.push({ model: modelName, fields, file: filePath });
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return schemas.length > 0 ? schemas : undefined;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// Helper needed because Node is not imported in this scope correctly for isObjectLiteral
|
|
46
|
+
const Node = {
|
|
47
|
+
isObjectLiteralExpression: (node: any): node is ObjectLiteralExpression => {
|
|
48
|
+
return node.getKind() === SyntaxKind.ObjectLiteralExpression;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { ProjectAnalysis } from '../types';
|
|
2
|
+
|
|
3
|
+
export class SemanticRefiner {
|
|
4
|
+
/**
|
|
5
|
+
* Transforms raw analysis into a high-quality, semantically rich structure.
|
|
6
|
+
*/
|
|
7
|
+
public static refine(analysis: any): any {
|
|
8
|
+
const { dependencies = [], devDependencies = [], tree = [], framework, routes = [], envVars = [] } = analysis;
|
|
9
|
+
const allDeps = [...dependencies, ...devDependencies];
|
|
10
|
+
|
|
11
|
+
return {
|
|
12
|
+
projectName: analysis.name || 'Untitled Project',
|
|
13
|
+
description: analysis.description || 'No description provided.',
|
|
14
|
+
|
|
15
|
+
techStack: {
|
|
16
|
+
backend: this.detectBackend(allDeps, framework),
|
|
17
|
+
frontend: this.detectFrontend(allDeps, framework),
|
|
18
|
+
database: this.detectDatabase(allDeps),
|
|
19
|
+
auth: this.detectAuth(allDeps),
|
|
20
|
+
tooling: this.detectTooling(allDeps),
|
|
21
|
+
ai: this.detectAI(allDeps)
|
|
22
|
+
},
|
|
23
|
+
|
|
24
|
+
architecture: this.detectArchitecture(analysis),
|
|
25
|
+
|
|
26
|
+
features: this.enrichFeatures(analysis),
|
|
27
|
+
|
|
28
|
+
api: {
|
|
29
|
+
routes: routes.map((r: any) => ({
|
|
30
|
+
method: r.method,
|
|
31
|
+
path: r.path,
|
|
32
|
+
purpose: this.inferRoutePurpose(r)
|
|
33
|
+
})),
|
|
34
|
+
totalEndpoints: routes.length
|
|
35
|
+
},
|
|
36
|
+
|
|
37
|
+
flows: this.inferFlows(analysis)
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
private static detectBackend(deps: string[], framework?: any): string[] {
|
|
42
|
+
const map: Record<string, string> = {
|
|
43
|
+
'express': 'Express.js',
|
|
44
|
+
'fastify': 'Fastify',
|
|
45
|
+
'nestjs': 'NestJS',
|
|
46
|
+
'koa': 'Koa',
|
|
47
|
+
'hono': 'Hono',
|
|
48
|
+
'apollo-server': 'GraphQL (Apollo)',
|
|
49
|
+
'socket.io': 'Real-time (Socket.io)'
|
|
50
|
+
};
|
|
51
|
+
const found = deps.filter(d => map[d]).map(d => map[d]);
|
|
52
|
+
if (framework?.name === 'Express') found.push('Express.js');
|
|
53
|
+
return Array.from(new Set(found));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
private static detectFrontend(deps: string[], framework?: any): string[] {
|
|
57
|
+
const map: Record<string, string> = {
|
|
58
|
+
'react': 'React',
|
|
59
|
+
'vue': 'Vue',
|
|
60
|
+
'svelte': 'Svelte',
|
|
61
|
+
'next': 'Next.js',
|
|
62
|
+
'nuxt': 'Nuxt.js',
|
|
63
|
+
'tailwindcss': 'Tailwind CSS',
|
|
64
|
+
'framer-motion': 'Animations (Framer Motion)',
|
|
65
|
+
'lucide-react': 'Icons (Lucide)'
|
|
66
|
+
};
|
|
67
|
+
const found = deps.filter(d => map[d]).map(d => map[d]);
|
|
68
|
+
return Array.from(new Set(found));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
private static detectDatabase(deps: string[]): string[] {
|
|
72
|
+
const map: Record<string, string> = {
|
|
73
|
+
'prisma': 'Prisma ORM',
|
|
74
|
+
'mongoose': 'MongoDB (Mongoose)',
|
|
75
|
+
'sequelize': 'Sequelize ORM',
|
|
76
|
+
'typeorm': 'TypeORM',
|
|
77
|
+
'redis': 'Redis (Caching)',
|
|
78
|
+
'pg': 'PostgreSQL'
|
|
79
|
+
};
|
|
80
|
+
return deps.filter(d => map[d]).map(d => map[d]);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
private static detectAuth(deps: string[]): string[] {
|
|
84
|
+
const map: Record<string, string> = {
|
|
85
|
+
'passport': 'Passport.js',
|
|
86
|
+
'next-auth': 'NextAuth.js',
|
|
87
|
+
'jsonwebtoken': 'JWT Authentication',
|
|
88
|
+
'bcrypt': 'Password Hashing (bcrypt)',
|
|
89
|
+
'firebase': 'Firebase Auth'
|
|
90
|
+
};
|
|
91
|
+
return deps.filter(d => map[d]).map(d => map[d]);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
private static detectTooling(deps: string[]): string[] {
|
|
95
|
+
const map: Record<string, string> = {
|
|
96
|
+
'typescript': 'TypeScript',
|
|
97
|
+
'jest': 'Jest (Testing)',
|
|
98
|
+
'vitest': 'Vitest (Testing)',
|
|
99
|
+
'eslint': 'ESLint (Linting)',
|
|
100
|
+
'prettier': 'Prettier (Formatting)',
|
|
101
|
+
'turbo': 'Turborepo (Build system)'
|
|
102
|
+
};
|
|
103
|
+
return deps.filter(d => map[d]).map(d => map[d]);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
private static detectAI(deps: string[]): string[] {
|
|
107
|
+
const map: Record<string, string> = {
|
|
108
|
+
'openai': 'OpenAI SDK',
|
|
109
|
+
'langchain': 'LangChain (LLM Orchestration)',
|
|
110
|
+
'groq': 'Groq AI',
|
|
111
|
+
'@google/generative-ai': 'Google Gemini AI'
|
|
112
|
+
};
|
|
113
|
+
return deps.filter(d => map[d]).map(d => map[d]);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
private static detectArchitecture(analysis: any): { type: string, summary: string } {
|
|
117
|
+
if (analysis.isMonorepo) {
|
|
118
|
+
return {
|
|
119
|
+
type: 'Monorepo',
|
|
120
|
+
summary: 'A multi-package repository managing distinct apps and shared packages using a build system like Turborepo.'
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
type: 'Monolith',
|
|
125
|
+
summary: 'A single-package modular architecture centered around a core application structure.'
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
private static enrichFeatures(analysis: any): string[] {
|
|
130
|
+
const rawFeatures = analysis.features || [];
|
|
131
|
+
const featureMap: Record<string, string> = {
|
|
132
|
+
'Authentication': 'Secure User Authentication & Session Management',
|
|
133
|
+
'API Endpoints': 'RESTful API Interface for external communication',
|
|
134
|
+
'Database Integration': 'Persistent Data Storage with Schema Models',
|
|
135
|
+
'Environment Configuration': 'Robust configuration using environment variables',
|
|
136
|
+
'Real-time': 'Bi-directional real-time communication'
|
|
137
|
+
};
|
|
138
|
+
return rawFeatures.map((f: string) => featureMap[f] || f);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
private static inferRoutePurpose(route: any): string {
|
|
142
|
+
const path = route.path.toLowerCase();
|
|
143
|
+
if (path.includes('auth') || path.includes('login') || path.includes('signup')) return 'Identity & Access Management';
|
|
144
|
+
if (path.includes('user') || path.includes('profile')) return 'User Profile Management';
|
|
145
|
+
if (path.includes('message') || path.includes('chat')) return 'Messaging logic';
|
|
146
|
+
if (path.includes('upload') || path.includes('cloudinary')) return 'Media handling';
|
|
147
|
+
return 'General business logic';
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
private static inferFlows(analysis: any): string[] {
|
|
151
|
+
const flows: string[] = [];
|
|
152
|
+
if (analysis.dependencies?.includes('mongoose')) {
|
|
153
|
+
flows.push('Request -> Controller -> Mongoose Model -> MongoDB');
|
|
154
|
+
}
|
|
155
|
+
if (analysis.dependencies?.includes('jsonwebtoken')) {
|
|
156
|
+
flows.push('Client -> JWT Middleware -> Protected Route -> Response');
|
|
157
|
+
}
|
|
158
|
+
if (analysis.tree?.some((f: string) => f.includes('Frontend') || f.includes('web'))) {
|
|
159
|
+
flows.push('Frontend (React) -> Axios -> API Endpoints (Backend)');
|
|
160
|
+
}
|
|
161
|
+
return flows;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import ignore from 'ignore';
|
|
2
|
+
|
|
3
|
+
export interface FileEntry {
|
|
4
|
+
path: string;
|
|
5
|
+
name: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export class StructureAnalyzer {
|
|
9
|
+
private static ENTRY_BASE_NAMES = new Set([
|
|
10
|
+
'index.ts',
|
|
11
|
+
'index.js',
|
|
12
|
+
'main.py',
|
|
13
|
+
'app.ts',
|
|
14
|
+
'app.js',
|
|
15
|
+
'server.ts',
|
|
16
|
+
'server.js',
|
|
17
|
+
'__main__.py',
|
|
18
|
+
'manage.py',
|
|
19
|
+
'wsgi.py',
|
|
20
|
+
'asgi.py',
|
|
21
|
+
'app.py',
|
|
22
|
+
]);
|
|
23
|
+
|
|
24
|
+
public static async analyze(files: string[], gitignoreContent: string): Promise<{
|
|
25
|
+
entryPoints: string[];
|
|
26
|
+
keyDirectories: string[];
|
|
27
|
+
importantFiles: string[];
|
|
28
|
+
hasDocker: boolean;
|
|
29
|
+
isMonorepo: boolean;
|
|
30
|
+
tree: string[];
|
|
31
|
+
}> {
|
|
32
|
+
const ig = ignore().add(gitignoreContent || '');
|
|
33
|
+
ig.add(['node_modules', '.git', '.turbo', 'dist', 'build', '.next', '.vscode']);
|
|
34
|
+
|
|
35
|
+
const normalizedFiles = files.map(f => f.replace(/\\/g, '/'));
|
|
36
|
+
const filteredFiles = normalizedFiles.filter(f => !ig.ignores(f));
|
|
37
|
+
const entryPoints = filteredFiles.filter(f => this.isEntryPoint(f));
|
|
38
|
+
|
|
39
|
+
// Key directories detection
|
|
40
|
+
const directories = new Set<string>();
|
|
41
|
+
const keyPatterns = [
|
|
42
|
+
'src',
|
|
43
|
+
'lib',
|
|
44
|
+
'app',
|
|
45
|
+
'tests',
|
|
46
|
+
'docs',
|
|
47
|
+
'config',
|
|
48
|
+
'packages',
|
|
49
|
+
'apps',
|
|
50
|
+
'controllers',
|
|
51
|
+
'routes',
|
|
52
|
+
'services',
|
|
53
|
+
'cmd',
|
|
54
|
+
'internal',
|
|
55
|
+
'pkg',
|
|
56
|
+
'api',
|
|
57
|
+
'handlers',
|
|
58
|
+
'middleware',
|
|
59
|
+
];
|
|
60
|
+
|
|
61
|
+
filteredFiles.forEach(f => {
|
|
62
|
+
const parts = f.split('/');
|
|
63
|
+
parts.forEach(part => {
|
|
64
|
+
if (keyPatterns.includes(part.toLowerCase())) {
|
|
65
|
+
directories.add(part);
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
const keyDirs = Array.from(directories);
|
|
70
|
+
|
|
71
|
+
const hasDocker = filteredFiles.some(f => f.toLowerCase().includes('dockerfile') || f.toLowerCase().includes('docker-compose'));
|
|
72
|
+
|
|
73
|
+
// Monorepo detection
|
|
74
|
+
const isMonorepo = filteredFiles.some(f =>
|
|
75
|
+
f === 'pnpm-workspace.yaml' ||
|
|
76
|
+
f === 'lerna.json' ||
|
|
77
|
+
f === 'turbo.json' ||
|
|
78
|
+
f === 'go.work' ||
|
|
79
|
+
(f.endsWith('package.json') && filteredFiles.some(file => file.startsWith('packages/') || file.startsWith('apps/'))) ||
|
|
80
|
+
(f.endsWith('go.mod') && filteredFiles.some(file => file.startsWith('cmd/') || file.startsWith('internal/')))
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
// Scored files
|
|
84
|
+
const scoredFiles = filteredFiles.map(f => ({
|
|
85
|
+
path: f,
|
|
86
|
+
score: this.calculateImportance(f, entryPoints)
|
|
87
|
+
})).sort((a, b) => b.score - a.score);
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
entryPoints,
|
|
91
|
+
keyDirectories: keyDirs,
|
|
92
|
+
importantFiles: scoredFiles.slice(0, 40).map(f => f.path),
|
|
93
|
+
hasDocker,
|
|
94
|
+
isMonorepo,
|
|
95
|
+
tree: this.generateTreeSnippet(filteredFiles, 5) // Increased depth to 5
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
private static calculateImportance(filePath: string, entryPoints: string[]): number {
|
|
100
|
+
let score = 0;
|
|
101
|
+
const name = filePath.split('/').pop() || '';
|
|
102
|
+
const lowerPath = filePath.toLowerCase();
|
|
103
|
+
|
|
104
|
+
if (entryPoints.includes(filePath)) score += 100; // Entry points are critical
|
|
105
|
+
|
|
106
|
+
// Core architecture folders
|
|
107
|
+
if (lowerPath.includes('controller')) score += 60;
|
|
108
|
+
if (lowerPath.includes('route')) score += 60;
|
|
109
|
+
if (lowerPath.includes('api/')) score += 50;
|
|
110
|
+
if (lowerPath.includes('service')) score += 40;
|
|
111
|
+
if (lowerPath.endsWith('.py') && (lowerPath.includes('handler') || lowerPath.includes('/api/'))) score += 45;
|
|
112
|
+
if (lowerPath.endsWith('.go') && (lowerPath.includes('handler') || lowerPath.includes('/http'))) score += 45;
|
|
113
|
+
if (lowerPath.includes('/cmd/')) score += 35;
|
|
114
|
+
if (lowerPath.includes('/internal/')) score += 25;
|
|
115
|
+
|
|
116
|
+
// Monorepo specific deep scoring
|
|
117
|
+
if (lowerPath.includes('apps/') && (lowerPath.includes('/src/') || lowerPath.includes('/app/') || lowerPath.includes('/pages/'))) score += 30;
|
|
118
|
+
if (lowerPath.includes('packages/') && lowerPath.includes('/src/')) score += 20;
|
|
119
|
+
|
|
120
|
+
// File-based Route Detection (Next.js / Nuxt)
|
|
121
|
+
if (lowerPath.includes('pages/api/') || lowerPath.includes('app/api/')) score += 60;
|
|
122
|
+
|
|
123
|
+
// Infrastructure
|
|
124
|
+
if (['package.json', 'go.mod', 'requirements.txt', '.env.example', 'pyproject.toml', 'turbo.json'].includes(name)) score += 60;
|
|
125
|
+
if (lowerPath.includes('docker')) score += 20;
|
|
126
|
+
|
|
127
|
+
// Negative weights
|
|
128
|
+
if (lowerPath.includes('test')) score -= 40;
|
|
129
|
+
if (lowerPath.includes('spec')) score -= 40;
|
|
130
|
+
if (lowerPath.includes('mock')) score -= 50;
|
|
131
|
+
if (lowerPath.includes('util')) score += 5; // Utils are okay but less critical than controllers
|
|
132
|
+
|
|
133
|
+
return score;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
private static generateTreeSnippet(files: string[], maxDepth: number): string[] {
|
|
137
|
+
const tree: string[] = [];
|
|
138
|
+
files.forEach(f => {
|
|
139
|
+
const parts = f.split('/');
|
|
140
|
+
if (parts.length <= maxDepth) {
|
|
141
|
+
tree.push(f);
|
|
142
|
+
}
|
|
143
|
+
});
|
|
144
|
+
return tree.slice(0, 500); // Increased count to 500
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
private static isEntryPoint(filePath: string): boolean {
|
|
148
|
+
const parts = filePath.split('/');
|
|
149
|
+
const base = parts.pop() || '';
|
|
150
|
+
if (this.ENTRY_BASE_NAMES.has(base)) return true;
|
|
151
|
+
if (base === 'main.go') return true;
|
|
152
|
+
if (parts[parts.length - 1] === 'cmd' && base === 'main.go') return true;
|
|
153
|
+
return false;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { Project, SyntaxKind } from 'ts-morph';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
|
|
4
|
+
export class TraceAnalyzer {
|
|
5
|
+
public static analyze(entryPoints: string[], files: Record<string, string>) {
|
|
6
|
+
const project = new Project({ useInMemoryFileSystem: true });
|
|
7
|
+
const graph: Map<string, string[]> = new Map();
|
|
8
|
+
const scores: Map<string, number> = new Map();
|
|
9
|
+
|
|
10
|
+
// Populate project and scores
|
|
11
|
+
for (const [filePath, content] of Object.entries(files)) {
|
|
12
|
+
if (filePath.match(/\.(ts|js|tsx|jsx)$/)) {
|
|
13
|
+
project.createSourceFile(filePath, content);
|
|
14
|
+
scores.set(filePath, 0);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const visited = new Set<string>();
|
|
19
|
+
const stack = [...entryPoints];
|
|
20
|
+
|
|
21
|
+
while (stack.length > 0) {
|
|
22
|
+
const current = stack.pop()!;
|
|
23
|
+
if (visited.has(current)) continue;
|
|
24
|
+
visited.add(current);
|
|
25
|
+
|
|
26
|
+
const sourceFile = project.getSourceFile(current);
|
|
27
|
+
if (!sourceFile) continue;
|
|
28
|
+
|
|
29
|
+
const imports = sourceFile.getImportDeclarations();
|
|
30
|
+
const currentDeps: string[] = [];
|
|
31
|
+
|
|
32
|
+
imports.forEach(imp => {
|
|
33
|
+
const moduleSpecifier = imp.getModuleSpecifierValue();
|
|
34
|
+
if (moduleSpecifier.startsWith('.')) {
|
|
35
|
+
// Resolve relative path
|
|
36
|
+
const dir = path.dirname(current);
|
|
37
|
+
const resolved = this.resolvePath(dir, moduleSpecifier, Object.keys(files));
|
|
38
|
+
if (resolved) {
|
|
39
|
+
currentDeps.push(resolved);
|
|
40
|
+
scores.set(resolved, (scores.get(resolved) || 0) + 1);
|
|
41
|
+
stack.push(resolved);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
});
|
|
45
|
+
graph.set(current, currentDeps);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Top 10 most imported files
|
|
49
|
+
const topFiles = Array.from(scores.entries())
|
|
50
|
+
.sort((a, b) => b[1] - a[1])
|
|
51
|
+
.filter(f => f[1] > 0)
|
|
52
|
+
.slice(0, 10)
|
|
53
|
+
.map(f => f[0]);
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
topFiles,
|
|
57
|
+
entryPoints,
|
|
58
|
+
graph: Object.fromEntries(graph)
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
private static resolvePath(dir: string, specifier: string, allFiles: string[]): string | null {
|
|
63
|
+
const fullPath = path.posix.join(dir, specifier);
|
|
64
|
+
const extensions = ['', '.ts', '.js', '.tsx', '.jsx', '/index.ts', '/index.js'];
|
|
65
|
+
|
|
66
|
+
for (const ext of extensions) {
|
|
67
|
+
const candidate = `${fullPath}${ext}`;
|
|
68
|
+
if (allFiles.includes(candidate)) return candidate;
|
|
69
|
+
// Also check with leading slash if needed
|
|
70
|
+
const absoluteCandidate = candidate.startsWith('/') ? candidate : `/${candidate}`;
|
|
71
|
+
if (allFiles.includes(absoluteCandidate)) return absoluteCandidate;
|
|
72
|
+
}
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export * from './analyzers/structure.analyzer';
|
|
2
|
+
export * from './analyzers/dependency.analyzer';
|
|
3
|
+
export * from './analyzers/route.extractor';
|
|
4
|
+
export * from './analyzers/env.extractor';
|
|
5
|
+
export * from './analyzers/package.parser';
|
|
6
|
+
export * from './analyzers/ast-feature.detector';
|
|
7
|
+
export * from './analyzers/definition.extractor';
|
|
8
|
+
export * from './analyzers/schema.analyzer';
|
|
9
|
+
export * from './analyzers/trace.analyzer';
|
|
10
|
+
export * from './analyzers/example.analyzer';
|
|
11
|
+
export * from './analyzers/devops.analyzer';
|
|
12
|
+
export * from './analyzers/polyglot.extractors';
|
|
13
|
+
export * from './analyzers/semantic.refiner';
|
|
14
|
+
export * from './types';
|
|
15
|
+
export * from './utils/scanner';
|
|
16
|
+
export * from './utils/scriptsMarkdown';
|
|
17
|
+
|
|
18
|
+
// Production-grade semantic README pipeline
|
|
19
|
+
export * from './internal/analysis/chunker';
|
|
20
|
+
export * from './internal/analysis/evidence';
|
|
21
|
+
export * from './internal/analysis/techStack';
|
|
22
|
+
export * from './internal/llm/llmClient';
|
|
23
|
+
export * from './internal/pipeline/types';
|
|
24
|
+
export * from './internal/pipeline/stages';
|
|
25
|
+
export * from './internal/pipeline/merge';
|
|
26
|
+
export * from './internal/pipeline/readme';
|
|
27
|
+
export * from './internal/pipeline/runPipeline';
|
|
28
|
+
export * from './internal/pipeline/quality';
|
|
29
|
+
export * from './internal/pipeline/persona';
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
export interface ChunkerOptions {
|
|
2
|
+
/**
|
|
3
|
+
* ~6k tokens max per call. We approximate 1 token ~= 4 chars.
|
|
4
|
+
* Default 24k chars.
|
|
5
|
+
*/
|
|
6
|
+
maxCharsPerChunk?: number;
|
|
7
|
+
/**
|
|
8
|
+
* Avoid tiny tail chunks.
|
|
9
|
+
*/
|
|
10
|
+
minCharsPerChunk?: number;
|
|
11
|
+
/**
|
|
12
|
+
* Prefix each chunk with a header that helps grounding.
|
|
13
|
+
*/
|
|
14
|
+
chunkHeader?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface TextChunk {
|
|
18
|
+
id: string;
|
|
19
|
+
text: string;
|
|
20
|
+
approxTokens: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function approxTokens(text: string): number {
|
|
24
|
+
return Math.max(1, Math.ceil(text.length / 4));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Intelligently chunk already-extracted evidence blocks (not raw code generation).
|
|
29
|
+
* Keeps blocks intact when possible; falls back to line-splitting for oversized blocks.
|
|
30
|
+
*/
|
|
31
|
+
export function chunkEvidenceBlocks(blocks: string[], options: ChunkerOptions = {}): TextChunk[] {
|
|
32
|
+
const maxChars = options.maxCharsPerChunk ?? 24_000;
|
|
33
|
+
const minChars = options.minCharsPerChunk ?? Math.floor(maxChars * 0.4);
|
|
34
|
+
const header = options.chunkHeader ? `${options.chunkHeader.trim()}\n\n` : '';
|
|
35
|
+
|
|
36
|
+
const normalizedBlocks = blocks
|
|
37
|
+
.map((b) => b.trim())
|
|
38
|
+
.filter(Boolean);
|
|
39
|
+
|
|
40
|
+
const chunks: string[] = [];
|
|
41
|
+
let current = header;
|
|
42
|
+
|
|
43
|
+
const pushCurrent = () => {
|
|
44
|
+
const text = current.trim();
|
|
45
|
+
if (text.length > 0) chunks.push(text + '\n');
|
|
46
|
+
current = header;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
const pushOversizedBlock = (block: string) => {
|
|
50
|
+
// Split oversized blocks by lines, preserving ordering and some context.
|
|
51
|
+
const lines = block.split('\n');
|
|
52
|
+
let buf = header;
|
|
53
|
+
for (const line of lines) {
|
|
54
|
+
const next = (buf ? buf + '\n' : '') + line;
|
|
55
|
+
if (next.length > maxChars) {
|
|
56
|
+
const trimmed = buf.trim();
|
|
57
|
+
if (trimmed.length > 0) chunks.push(trimmed + '\n');
|
|
58
|
+
buf = header + line;
|
|
59
|
+
} else {
|
|
60
|
+
buf = next;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
const trimmed = buf.trim();
|
|
64
|
+
if (trimmed.length > 0) chunks.push(trimmed + '\n');
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
for (const block of normalizedBlocks) {
|
|
68
|
+
if (block.length > maxChars) {
|
|
69
|
+
// Flush anything accumulated so far, then split this block.
|
|
70
|
+
if (current.trim().length > 0) pushCurrent();
|
|
71
|
+
pushOversizedBlock(block);
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const candidate = current.trim().length === 0 ? header + block : `${current.trim()}\n\n${block}`;
|
|
76
|
+
if (candidate.length <= maxChars) {
|
|
77
|
+
current = candidate;
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Candidate would overflow.
|
|
82
|
+
pushCurrent();
|
|
83
|
+
current = header + block;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (current.trim().length > 0) pushCurrent();
|
|
87
|
+
|
|
88
|
+
// Merge last tiny chunk into previous if possible.
|
|
89
|
+
if (chunks.length >= 2) {
|
|
90
|
+
const last = chunks[chunks.length - 1]!;
|
|
91
|
+
const prev = chunks[chunks.length - 2]!;
|
|
92
|
+
if (last.length < minChars && (prev.length + 2 + last.length) <= maxChars) {
|
|
93
|
+
chunks.splice(chunks.length - 2, 2, `${prev.trim()}\n\n${last.trim()}\n`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return chunks.map((text, idx) => ({
|
|
98
|
+
id: `chunk_${idx + 1}`,
|
|
99
|
+
text,
|
|
100
|
+
approxTokens: approxTokens(text),
|
|
101
|
+
}));
|
|
102
|
+
}
|
|
103
|
+
|