@maintainabilityai/research-runner 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +82 -0
- package/bin/research-runner.js +2 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +209 -0
- package/dist/llm/anthropic-client.d.ts +39 -0
- package/dist/llm/anthropic-client.js +74 -0
- package/dist/llm/github-models-client.d.ts +46 -0
- package/dist/llm/github-models-client.js +78 -0
- package/dist/llm/llm-router.d.ts +46 -0
- package/dist/llm/llm-router.js +60 -0
- package/dist/mesh/get-mesh-sha.d.ts +1 -0
- package/dist/mesh/get-mesh-sha.js +27 -0
- package/dist/mesh/mesh-reader.d.ts +14 -0
- package/dist/mesh/mesh-reader.js +392 -0
- package/dist/mesh/prompt-loader.d.ts +22 -0
- package/dist/mesh/prompt-loader.js +119 -0
- package/dist/mesh/threat-model-reader.d.ts +33 -0
- package/dist/mesh/threat-model-reader.js +123 -0
- package/dist/runner/archeologist.d.ts +39 -0
- package/dist/runner/archeologist.js +620 -0
- package/dist/runner/audit-emitter.d.ts +62 -0
- package/dist/runner/audit-emitter.js +210 -0
- package/dist/runner/hatters-tag-builder.d.ts +52 -0
- package/dist/runner/hatters-tag-builder.js +40 -0
- package/dist/runner/nodes/analyze-architecture.d.ts +10 -0
- package/dist/runner/nodes/analyze-architecture.js +447 -0
- package/dist/runner/nodes/arxiv-search.d.ts +12 -0
- package/dist/runner/nodes/arxiv-search.js +52 -0
- package/dist/runner/nodes/clone-and-index.d.ts +32 -0
- package/dist/runner/nodes/clone-and-index.js +158 -0
- package/dist/runner/nodes/dedupe-and-rank.d.ts +27 -0
- package/dist/runner/nodes/dedupe-and-rank.js +98 -0
- package/dist/runner/nodes/deterministic-review.d.ts +55 -0
- package/dist/runner/nodes/deterministic-review.js +206 -0
- package/dist/runner/nodes/expert-review.d.ts +68 -0
- package/dist/runner/nodes/expert-review.js +197 -0
- package/dist/runner/nodes/gap-analysis.d.ts +48 -0
- package/dist/runner/nodes/gap-analysis.js +153 -0
- package/dist/runner/nodes/generate-prd-manifest.d.ts +53 -0
- package/dist/runner/nodes/generate-prd-manifest.js +209 -0
- package/dist/runner/nodes/hackernews-search.d.ts +12 -0
- package/dist/runner/nodes/hackernews-search.js +63 -0
- package/dist/runner/nodes/identify-gaps.d.ts +33 -0
- package/dist/runner/nodes/identify-gaps.js +185 -0
- package/dist/runner/nodes/plan-queries.d.ts +28 -0
- package/dist/runner/nodes/plan-queries.js +120 -0
- package/dist/runner/nodes/prd-validator.d.ts +51 -0
- package/dist/runner/nodes/prd-validator.js +203 -0
- package/dist/runner/nodes/synthesis-archaeology-validator.d.ts +22 -0
- package/dist/runner/nodes/synthesis-archaeology-validator.js +131 -0
- package/dist/runner/nodes/synthesis-validator.d.ts +51 -0
- package/dist/runner/nodes/synthesis-validator.js +185 -0
- package/dist/runner/nodes/synthesize-prd.d.ts +84 -0
- package/dist/runner/nodes/synthesize-prd.js +202 -0
- package/dist/runner/nodes/synthesize-report.d.ts +53 -0
- package/dist/runner/nodes/synthesize-report.js +188 -0
- package/dist/runner/nodes/tavily-search.d.ts +21 -0
- package/dist/runner/nodes/tavily-search.js +57 -0
- package/dist/runner/nodes/uspto-search.d.ts +13 -0
- package/dist/runner/nodes/uspto-search.js +62 -0
- package/dist/runner/nodes/verify-grounding.d.ts +54 -0
- package/dist/runner/nodes/verify-grounding.js +134 -0
- package/dist/runner/prd.d.ts +28 -0
- package/dist/runner/prd.js +494 -0
- package/dist/schemas/audit-event.d.ts +1151 -0
- package/dist/schemas/audit-event.js +141 -0
- package/dist/schemas/index.d.ts +17 -0
- package/dist/schemas/index.js +33 -0
- package/dist/schemas/mesh-context.d.ts +415 -0
- package/dist/schemas/mesh-context.js +95 -0
- package/dist/schemas/observed-architecture.d.ts +262 -0
- package/dist/schemas/observed-architecture.js +90 -0
- package/dist/schemas/prd-brief.d.ts +111 -0
- package/dist/schemas/prd-brief.js +37 -0
- package/dist/schemas/prd-doc.d.ts +249 -0
- package/dist/schemas/prd-doc.js +42 -0
- package/dist/schemas/prd-manifest.d.ts +171 -0
- package/dist/schemas/prd-manifest.js +73 -0
- package/dist/schemas/primitives.d.ts +47 -0
- package/dist/schemas/primitives.js +41 -0
- package/dist/schemas/query-plan.d.ts +33 -0
- package/dist/schemas/query-plan.js +25 -0
- package/dist/schemas/ranked-source.d.ts +82 -0
- package/dist/schemas/ranked-source.js +29 -0
- package/dist/schemas/research-brief.d.ts +114 -0
- package/dist/schemas/research-brief.js +49 -0
- package/dist/schemas/research-doc.d.ts +104 -0
- package/dist/schemas/research-doc.js +37 -0
- package/dist/search/arxiv-client.d.ts +41 -0
- package/dist/search/arxiv-client.js +88 -0
- package/dist/search/hackernews-client.d.ts +33 -0
- package/dist/search/hackernews-client.js +44 -0
- package/dist/search/provider-result.d.ts +25 -0
- package/dist/search/provider-result.js +2 -0
- package/dist/search/tavily-client.d.ts +38 -0
- package/dist/search/tavily-client.js +53 -0
- package/dist/search/uspto-client.d.ts +50 -0
- package/dist/search/uspto-client.js +112 -0
- package/dist/utils/run-id.d.ts +2 -0
- package/dist/utils/run-id.js +22 -0
- package/package.json +53 -0
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.ANALYZER_VERSION = void 0;
|
|
37
|
+
exports.analyzeArchitecture = analyzeArchitecture;
|
|
38
|
+
/**
|
|
39
|
+
* analyze_architecture — pure node (archaeology path), file-based variant.
|
|
40
|
+
*
|
|
41
|
+
* Phase 3a — no tree-sitter. Extracts:
|
|
42
|
+
* - RepositoryProfile (languages, frameworks, manifests, totals)
|
|
43
|
+
* - Modules (top-level src/ subdirectories or root dirs not in SKIP_DIRS),
|
|
44
|
+
* each tagged with an inferred layer (api / web / data / worker / shared)
|
|
45
|
+
* - Endpoints (regex-detected route handlers across the four most common
|
|
46
|
+
* frameworks: Express, FastAPI, Flask, Spring)
|
|
47
|
+
* - Dependencies (parsed from package.json, pyproject.toml requirements
|
|
48
|
+
* section, Cargo.toml [dependencies], go.mod require block)
|
|
49
|
+
*
|
|
50
|
+
* Heuristic, not authoritative. Tree-sitter integration (phase 3b) will
|
|
51
|
+
* give us symbol-level accuracy; for now the synthesis prompt knows this
|
|
52
|
+
* is a file-based pass and the audit log records `analyzer_version` so
|
|
53
|
+
* an auditor can re-run with a deeper analyzer later.
|
|
54
|
+
*/
|
|
55
|
+
const fs = __importStar(require("node:fs"));
|
|
56
|
+
const path = __importStar(require("node:path"));
|
|
57
|
+
exports.ANALYZER_VERSION = 'file-based-v1';
|
|
58
|
+
const LAYER_HINTS = [
|
|
59
|
+
{ layer: 'api', patterns: [/^api$/i, /^server$/i, /^backend$/i, /^routes$/i, /^controllers$/i, /^handlers$/i, /^endpoints$/i] },
|
|
60
|
+
{ layer: 'web', patterns: [/^web$/i, /^frontend$/i, /^client$/i, /^ui$/i, /^app$/i, /^components$/i, /^pages$/i, /^views$/i] },
|
|
61
|
+
{ layer: 'data', patterns: [/^db$/i, /^database$/i, /^models$/i, /^entities$/i, /^migrations$/i, /^prisma$/i, /^schemas?$/i] },
|
|
62
|
+
{ layer: 'worker', patterns: [/^workers?$/i, /^jobs$/i, /^cron$/i, /^queue$/i, /^tasks?$/i, /^consumers?$/i] },
|
|
63
|
+
{ layer: 'shared', patterns: [/^lib$/i, /^libs$/i, /^common$/i, /^shared$/i, /^utils?$/i, /^helpers$/i, /^core$/i, /^types$/i] },
|
|
64
|
+
];
|
|
65
|
+
const ENDPOINT_FILE_EXTS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mjs', '.py', '.java', '.kt']);
|
|
66
|
+
/** Walked-source file cap for endpoint scanning — keep audit cheap. */
|
|
67
|
+
const MAX_ENDPOINT_SCAN_FILES = 200;
|
|
68
|
+
/** Cap on returned endpoints (very chatty Express files can blow up). */
|
|
69
|
+
const MAX_ENDPOINTS = 80;
|
|
70
|
+
function analyzeArchitecture(opts) {
|
|
71
|
+
const { cloneDir, targetRepo, cloneSha, inventory } = opts;
|
|
72
|
+
const languages = detectLanguages(inventory);
|
|
73
|
+
const { frameworks, dependencies } = readManifests(cloneDir, inventory.rootManifests);
|
|
74
|
+
const modules = detectModules(cloneDir, inventory);
|
|
75
|
+
const endpoints = scanEndpoints(cloneDir);
|
|
76
|
+
// Attribute endpoints back to their module (best-effort: longest matching prefix)
|
|
77
|
+
const endpointsByModule = new Map();
|
|
78
|
+
for (const ep of endpoints) {
|
|
79
|
+
const owner = modules
|
|
80
|
+
.map(m => m.name)
|
|
81
|
+
.filter(n => ep.file === n || ep.file.startsWith(`${n}/`) || ep.file.startsWith(`src/${n}/`))
|
|
82
|
+
.sort((a, b) => b.length - a.length)[0];
|
|
83
|
+
if (owner) {
|
|
84
|
+
endpointsByModule.set(owner, (endpointsByModule.get(owner) ?? 0) + 1);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
for (const m of modules) {
|
|
88
|
+
m.endpointCount = endpointsByModule.get(m.name) ?? 0;
|
|
89
|
+
}
|
|
90
|
+
const profile = {
|
|
91
|
+
slug: targetRepo,
|
|
92
|
+
cloneSha,
|
|
93
|
+
totalFiles: inventory.totalFiles,
|
|
94
|
+
totalBytes: inventory.totalBytes,
|
|
95
|
+
byExtension: inventory.byExtension,
|
|
96
|
+
languages,
|
|
97
|
+
frameworks,
|
|
98
|
+
manifests: inventory.rootManifests,
|
|
99
|
+
};
|
|
100
|
+
return { profile, modules, endpoints, dependencies, deviations: [] };
|
|
101
|
+
}
|
|
102
|
+
// ============================================================================
|
|
103
|
+
// Language + framework detection
|
|
104
|
+
// ============================================================================
|
|
105
|
+
function detectLanguages(inventory) {
|
|
106
|
+
const langs = new Set();
|
|
107
|
+
const counts = inventory.byExtension;
|
|
108
|
+
if ((counts['.ts'] ?? 0) + (counts['.tsx'] ?? 0) > 0) {
|
|
109
|
+
langs.add('TypeScript');
|
|
110
|
+
}
|
|
111
|
+
if ((counts['.js'] ?? 0) + (counts['.jsx'] ?? 0) + (counts['.mjs'] ?? 0) + (counts['.cjs'] ?? 0) > 0) {
|
|
112
|
+
langs.add('JavaScript');
|
|
113
|
+
}
|
|
114
|
+
if ((counts['.py'] ?? 0) > 0) {
|
|
115
|
+
langs.add('Python');
|
|
116
|
+
}
|
|
117
|
+
if ((counts['.java'] ?? 0) > 0) {
|
|
118
|
+
langs.add('Java');
|
|
119
|
+
}
|
|
120
|
+
if ((counts['.kt'] ?? 0) + (counts['.kts'] ?? 0) > 0) {
|
|
121
|
+
langs.add('Kotlin');
|
|
122
|
+
}
|
|
123
|
+
if ((counts['.go'] ?? 0) > 0) {
|
|
124
|
+
langs.add('Go');
|
|
125
|
+
}
|
|
126
|
+
if ((counts['.rs'] ?? 0) > 0) {
|
|
127
|
+
langs.add('Rust');
|
|
128
|
+
}
|
|
129
|
+
if ((counts['.rb'] ?? 0) > 0) {
|
|
130
|
+
langs.add('Ruby');
|
|
131
|
+
}
|
|
132
|
+
if ((counts['.php'] ?? 0) > 0) {
|
|
133
|
+
langs.add('PHP');
|
|
134
|
+
}
|
|
135
|
+
if ((counts['.cs'] ?? 0) > 0) {
|
|
136
|
+
langs.add('C#');
|
|
137
|
+
}
|
|
138
|
+
if ((counts['.swift'] ?? 0) > 0) {
|
|
139
|
+
langs.add('Swift');
|
|
140
|
+
}
|
|
141
|
+
if ((counts['.ex'] ?? 0) + (counts['.exs'] ?? 0) > 0) {
|
|
142
|
+
langs.add('Elixir');
|
|
143
|
+
}
|
|
144
|
+
return [...langs].sort();
|
|
145
|
+
}
|
|
146
|
+
function readManifests(cloneDir, manifestNames) {
|
|
147
|
+
const frameworks = new Set();
|
|
148
|
+
const dependencies = new Set();
|
|
149
|
+
for (const name of manifestNames) {
|
|
150
|
+
const full = path.join(cloneDir, name);
|
|
151
|
+
let raw;
|
|
152
|
+
try {
|
|
153
|
+
raw = fs.readFileSync(full, 'utf8');
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
if (/^package\.json$/i.test(name)) {
|
|
159
|
+
parsePackageJson(raw, frameworks, dependencies);
|
|
160
|
+
}
|
|
161
|
+
else if (/^pyproject\.toml$/i.test(name)) {
|
|
162
|
+
parsePyproject(raw, frameworks, dependencies);
|
|
163
|
+
}
|
|
164
|
+
else if (/^requirements\.txt$/i.test(name)) {
|
|
165
|
+
parseRequirementsTxt(raw, frameworks, dependencies);
|
|
166
|
+
}
|
|
167
|
+
else if (/^cargo\.toml$/i.test(name)) {
|
|
168
|
+
parseCargoToml(raw, frameworks, dependencies);
|
|
169
|
+
}
|
|
170
|
+
else if (/^go\.mod$/i.test(name)) {
|
|
171
|
+
parseGoMod(raw, frameworks, dependencies);
|
|
172
|
+
}
|
|
173
|
+
else if (/^pom\.xml$/i.test(name)) {
|
|
174
|
+
parsePomXml(raw, frameworks, dependencies);
|
|
175
|
+
}
|
|
176
|
+
else if (/^gemfile$/i.test(name)) {
|
|
177
|
+
parseGemfile(raw, frameworks, dependencies);
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return {
|
|
181
|
+
frameworks: [...frameworks].sort().slice(0, 30),
|
|
182
|
+
dependencies: [...dependencies].sort().slice(0, 60),
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
const KNOWN_FRAMEWORK_PACKAGES = {
|
|
186
|
+
// JS/TS
|
|
187
|
+
express: 'express', '@nestjs/core': 'nestjs', fastify: 'fastify',
|
|
188
|
+
next: 'next.js', react: 'react', vue: 'vue', '@angular/core': 'angular',
|
|
189
|
+
svelte: 'svelte', remix: 'remix', '@remix-run/server-runtime': 'remix',
|
|
190
|
+
koa: 'koa', hapi: 'hapi',
|
|
191
|
+
// Python
|
|
192
|
+
fastapi: 'fastapi', flask: 'flask', django: 'django', starlette: 'starlette',
|
|
193
|
+
// Java/Kotlin
|
|
194
|
+
'spring-boot-starter-web': 'spring-boot', 'spring-web': 'spring',
|
|
195
|
+
// Go
|
|
196
|
+
'github.com/gin-gonic/gin': 'gin', 'github.com/labstack/echo': 'echo',
|
|
197
|
+
'github.com/gofiber/fiber': 'fiber',
|
|
198
|
+
// Rust
|
|
199
|
+
actix: 'actix-web', 'actix-web': 'actix-web', axum: 'axum', rocket: 'rocket',
|
|
200
|
+
// Ruby
|
|
201
|
+
rails: 'rails', sinatra: 'sinatra',
|
|
202
|
+
};
|
|
203
|
+
function classifyDep(dep, frameworks) {
|
|
204
|
+
const hit = KNOWN_FRAMEWORK_PACKAGES[dep.toLowerCase()];
|
|
205
|
+
if (hit) {
|
|
206
|
+
frameworks.add(hit);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
function parsePackageJson(raw, frameworks, deps) {
|
|
210
|
+
try {
|
|
211
|
+
const obj = JSON.parse(raw);
|
|
212
|
+
const dependencies = obj.dependencies ?? {};
|
|
213
|
+
for (const name of Object.keys(dependencies)) {
|
|
214
|
+
deps.add(name);
|
|
215
|
+
classifyDep(name, frameworks);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
catch { /* malformed package.json — skip */ }
|
|
219
|
+
}
|
|
220
|
+
function parsePyproject(raw, frameworks, deps) {
|
|
221
|
+
// Lightweight: pick up `dependencies = ["fastapi >=0.100", …]` AND
|
|
222
|
+
// `[tool.poetry.dependencies]` keys.
|
|
223
|
+
const arrayMatch = raw.match(/dependencies\s*=\s*\[([\s\S]*?)\]/);
|
|
224
|
+
if (arrayMatch) {
|
|
225
|
+
for (const m of arrayMatch[1].matchAll(/"\s*([a-zA-Z0-9_.-]+)/g)) {
|
|
226
|
+
deps.add(m[1].toLowerCase());
|
|
227
|
+
classifyDep(m[1].toLowerCase(), frameworks);
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
const poetrySection = raw.match(/\[tool\.poetry\.dependencies\]([\s\S]*?)(?:\n\[|$)/);
|
|
231
|
+
if (poetrySection) {
|
|
232
|
+
for (const m of poetrySection[1].matchAll(/^([a-zA-Z0-9_.-]+)\s*=/gm)) {
|
|
233
|
+
const name = m[1].toLowerCase();
|
|
234
|
+
if (name === 'python') {
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
deps.add(name);
|
|
238
|
+
classifyDep(name, frameworks);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
function parseRequirementsTxt(raw, frameworks, deps) {
|
|
243
|
+
for (const line of raw.split('\n')) {
|
|
244
|
+
const trimmed = line.trim();
|
|
245
|
+
if (!trimmed || trimmed.startsWith('#') || trimmed.startsWith('-')) {
|
|
246
|
+
continue;
|
|
247
|
+
}
|
|
248
|
+
const m = trimmed.match(/^([a-zA-Z0-9_.-]+)/);
|
|
249
|
+
if (m) {
|
|
250
|
+
const name = m[1].toLowerCase();
|
|
251
|
+
deps.add(name);
|
|
252
|
+
classifyDep(name, frameworks);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
function parseCargoToml(raw, frameworks, deps) {
|
|
257
|
+
const section = raw.match(/\[dependencies\]([\s\S]*?)(?:\n\[|$)/);
|
|
258
|
+
if (!section) {
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
for (const m of section[1].matchAll(/^([a-zA-Z0-9_.-]+)\s*=/gm)) {
|
|
262
|
+
const name = m[1].toLowerCase();
|
|
263
|
+
deps.add(name);
|
|
264
|
+
classifyDep(name, frameworks);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
function parseGoMod(raw, frameworks, deps) {
|
|
268
|
+
const requireBlock = raw.match(/require\s*\(([\s\S]*?)\)/);
|
|
269
|
+
const body = requireBlock ? requireBlock[1] : raw;
|
|
270
|
+
for (const m of body.matchAll(/^\s*([\w./-]+)\s+v[\d.+\-a-z]+/gm)) {
|
|
271
|
+
const name = m[1].toLowerCase();
|
|
272
|
+
deps.add(name);
|
|
273
|
+
classifyDep(name, frameworks);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
function parsePomXml(raw, frameworks, deps) {
|
|
277
|
+
for (const m of raw.matchAll(/<artifactId>([\w.-]+)<\/artifactId>/gi)) {
|
|
278
|
+
const name = m[1].toLowerCase();
|
|
279
|
+
deps.add(name);
|
|
280
|
+
classifyDep(name, frameworks);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
function parseGemfile(raw, frameworks, deps) {
|
|
284
|
+
for (const m of raw.matchAll(/gem\s+["']([a-zA-Z0-9_.-]+)/g)) {
|
|
285
|
+
const name = m[1].toLowerCase();
|
|
286
|
+
deps.add(name);
|
|
287
|
+
classifyDep(name, frameworks);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
// ============================================================================
|
|
291
|
+
// Module / layer detection
|
|
292
|
+
// ============================================================================
|
|
293
|
+
function detectModules(cloneDir, inventory) {
|
|
294
|
+
const candidateDirs = new Set();
|
|
295
|
+
// Top-level directories (skipped already filtered by clone-and-index)
|
|
296
|
+
for (const entry of inventory.topLevelEntries) {
|
|
297
|
+
const full = path.join(cloneDir, entry);
|
|
298
|
+
try {
|
|
299
|
+
if (fs.statSync(full).isDirectory()) {
|
|
300
|
+
candidateDirs.add(entry);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
catch { /* skip */ }
|
|
304
|
+
}
|
|
305
|
+
// Also walk src/* — many JS/TS repos hide structure there
|
|
306
|
+
const srcDir = path.join(cloneDir, 'src');
|
|
307
|
+
if (fs.existsSync(srcDir) && fs.statSync(srcDir).isDirectory()) {
|
|
308
|
+
try {
|
|
309
|
+
for (const entry of fs.readdirSync(srcDir, { withFileTypes: true })) {
|
|
310
|
+
if (entry.isDirectory()) {
|
|
311
|
+
candidateDirs.add(`src/${entry.name}`);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
catch { /* skip */ }
|
|
316
|
+
}
|
|
317
|
+
const modules = [];
|
|
318
|
+
for (const name of candidateDirs) {
|
|
319
|
+
const full = path.join(cloneDir, name);
|
|
320
|
+
const fileCount = countFiles(full);
|
|
321
|
+
if (fileCount === 0) {
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
modules.push({
|
|
325
|
+
name,
|
|
326
|
+
layer: classifyLayer(name),
|
|
327
|
+
fileCount,
|
|
328
|
+
endpointCount: 0, // populated after endpoint scan
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
return modules.sort((a, b) => b.fileCount - a.fileCount);
|
|
332
|
+
}
|
|
333
|
+
function classifyLayer(modulePath) {
|
|
334
|
+
const leaf = modulePath.split('/').pop().toLowerCase();
|
|
335
|
+
for (const hint of LAYER_HINTS) {
|
|
336
|
+
if (hint.patterns.some(re => re.test(leaf))) {
|
|
337
|
+
return hint.layer;
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
return 'unknown';
|
|
341
|
+
}
|
|
342
|
+
function countFiles(dir) {
|
|
343
|
+
let count = 0;
|
|
344
|
+
function walk(d) {
|
|
345
|
+
let entries;
|
|
346
|
+
try {
|
|
347
|
+
entries = fs.readdirSync(d, { withFileTypes: true });
|
|
348
|
+
}
|
|
349
|
+
catch {
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
352
|
+
for (const ent of entries) {
|
|
353
|
+
if (ent.isDirectory()) {
|
|
354
|
+
walk(path.join(d, ent.name));
|
|
355
|
+
}
|
|
356
|
+
else if (ent.isFile()) {
|
|
357
|
+
count += 1;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
walk(dir);
|
|
362
|
+
return count;
|
|
363
|
+
}
|
|
364
|
+
const ENDPOINT_PATTERNS = [
|
|
365
|
+
// Express / Koa / Fastify shape: `app.get('/users/:id', ...)`
|
|
366
|
+
{ framework: 'express', re: /\b(?:app|router|fastify|server)\.(get|post|put|patch|delete|options|head)\s*\(\s*['"`]([^'"`]+)/gi },
|
|
367
|
+
// FastAPI / Flask: `@app.get("/users/{id}")` / `@app.route("/x")`
|
|
368
|
+
{ framework: 'fastapi', re: /@\w+\.(get|post|put|patch|delete)\s*\(\s*['"]([^'"]+)/gi },
|
|
369
|
+
{ framework: 'flask', re: /@\w+\.route\s*\(\s*['"]([^'"]+)/gi, methodFromDecoratorPos: -1 },
|
|
370
|
+
// Spring (Java/Kotlin): `@GetMapping("/users")` / `@RequestMapping(method=…, value="/x")`
|
|
371
|
+
{ framework: 'spring', re: /@(Get|Post|Put|Patch|Delete)Mapping\s*\(\s*['"]([^'"]+)/g },
|
|
372
|
+
];
|
|
373
|
+
function scanEndpoints(cloneDir) {
|
|
374
|
+
const found = [];
|
|
375
|
+
let scanned = 0;
|
|
376
|
+
function walk(dir) {
|
|
377
|
+
if (found.length >= MAX_ENDPOINTS) {
|
|
378
|
+
return;
|
|
379
|
+
}
|
|
380
|
+
let entries;
|
|
381
|
+
try {
|
|
382
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
383
|
+
}
|
|
384
|
+
catch {
|
|
385
|
+
return;
|
|
386
|
+
}
|
|
387
|
+
for (const ent of entries) {
|
|
388
|
+
if (found.length >= MAX_ENDPOINTS) {
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
391
|
+
const full = path.join(dir, ent.name);
|
|
392
|
+
if (ent.isDirectory()) {
|
|
393
|
+
if (ent.name === '.git' || ent.name === 'node_modules' || ent.name === 'dist' || ent.name === 'build') {
|
|
394
|
+
continue;
|
|
395
|
+
}
|
|
396
|
+
walk(full);
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
if (!ent.isFile()) {
|
|
400
|
+
continue;
|
|
401
|
+
}
|
|
402
|
+
if (!ENDPOINT_FILE_EXTS.has(path.extname(ent.name).toLowerCase())) {
|
|
403
|
+
continue;
|
|
404
|
+
}
|
|
405
|
+
if (scanned >= MAX_ENDPOINT_SCAN_FILES) {
|
|
406
|
+
continue;
|
|
407
|
+
}
|
|
408
|
+
scanned += 1;
|
|
409
|
+
let content;
|
|
410
|
+
try {
|
|
411
|
+
content = fs.readFileSync(full, 'utf8');
|
|
412
|
+
}
|
|
413
|
+
catch {
|
|
414
|
+
continue;
|
|
415
|
+
}
|
|
416
|
+
const relFile = path.relative(cloneDir, full);
|
|
417
|
+
for (const pat of ENDPOINT_PATTERNS) {
|
|
418
|
+
// matchAll needs a global regex; reset lastIndex isn't an issue with matchAll
|
|
419
|
+
for (const m of content.matchAll(pat.re)) {
|
|
420
|
+
let method;
|
|
421
|
+
let routePath;
|
|
422
|
+
if (pat.methodFromDecoratorPos === -1) {
|
|
423
|
+
// Flask: `@app.route("/x")` — method defaults to GET unless `methods=` is set
|
|
424
|
+
method = 'GET';
|
|
425
|
+
routePath = m[1];
|
|
426
|
+
}
|
|
427
|
+
else {
|
|
428
|
+
method = (m[1] || '').toUpperCase();
|
|
429
|
+
routePath = m[2];
|
|
430
|
+
}
|
|
431
|
+
if (!routePath) {
|
|
432
|
+
continue;
|
|
433
|
+
}
|
|
434
|
+
found.push({ method, path: routePath, file: relFile, framework: pat.framework });
|
|
435
|
+
if (found.length >= MAX_ENDPOINTS) {
|
|
436
|
+
break;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
if (found.length >= MAX_ENDPOINTS) {
|
|
440
|
+
break;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
walk(cloneDir);
|
|
446
|
+
return found;
|
|
447
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { ProviderResult } from '../../search/provider-result';
|
|
2
|
+
import type { QueryEnvelope } from './tavily-search';
|
|
3
|
+
export interface ArxivSearchNodeOpts {
|
|
4
|
+
queries: string[];
|
|
5
|
+
maxResultsPerQuery?: number;
|
|
6
|
+
fetchImpl?: typeof fetch;
|
|
7
|
+
}
|
|
8
|
+
export interface ArxivSearchNodeResult {
|
|
9
|
+
envelopes: QueryEnvelope[];
|
|
10
|
+
results: ProviderResult[];
|
|
11
|
+
}
|
|
12
|
+
export declare function runArxivSearch(opts: ArxivSearchNodeOpts): Promise<ArxivSearchNodeResult>;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.runArxivSearch = runArxivSearch;
|
|
4
|
+
/**
|
|
5
|
+
* arxiv_search — pure_api node.
|
|
6
|
+
*
|
|
7
|
+
* Runs each arxiv query against export.arxiv.org in parallel. arXiv's
|
|
8
|
+
* Atom API doesn't return a relevance score; we synthesise one from
|
|
9
|
+
* result position (top result → 0.9 → linear decay to 0.5).
|
|
10
|
+
*/
|
|
11
|
+
const arxiv_client_1 = require("../../search/arxiv-client");
|
|
12
|
+
async function runArxivSearch(opts) {
|
|
13
|
+
const settled = await Promise.allSettled(opts.queries.map(query => (0, arxiv_client_1.arxivSearch)({
|
|
14
|
+
query,
|
|
15
|
+
maxResults: opts.maxResultsPerQuery ?? 5,
|
|
16
|
+
fetchImpl: opts.fetchImpl,
|
|
17
|
+
})));
|
|
18
|
+
const envelopes = [];
|
|
19
|
+
const results = [];
|
|
20
|
+
for (let i = 0; i < opts.queries.length; i++) {
|
|
21
|
+
const query = opts.queries[i];
|
|
22
|
+
const outcome = settled[i];
|
|
23
|
+
if (outcome.status === 'fulfilled') {
|
|
24
|
+
const ok = outcome.value;
|
|
25
|
+
envelopes.push({
|
|
26
|
+
query,
|
|
27
|
+
httpStatus: ok.httpStatus,
|
|
28
|
+
responseBytes: ok.responseBytes,
|
|
29
|
+
resultCount: ok.results.length,
|
|
30
|
+
});
|
|
31
|
+
for (let j = 0; j < ok.results.length; j++) {
|
|
32
|
+
const r = ok.results[j];
|
|
33
|
+
results.push({
|
|
34
|
+
provider: 'arxiv',
|
|
35
|
+
fromQuery: query,
|
|
36
|
+
title: r.title,
|
|
37
|
+
url: r.abstractUrl,
|
|
38
|
+
content: r.summary.slice(0, 500),
|
|
39
|
+
// Position-derived score: arXiv returns by relevance, decay 0.9 → 0.5.
|
|
40
|
+
score: Math.max(0.5, 0.9 - j * 0.1),
|
|
41
|
+
publishedDate: r.published || undefined,
|
|
42
|
+
authors: r.authors,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
const err = outcome.reason instanceof Error ? outcome.reason.message : String(outcome.reason);
|
|
48
|
+
envelopes.push({ query, httpStatus: 0, responseBytes: 0, resultCount: 0, error: err });
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return { envelopes, results };
|
|
52
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
export interface CloneAndIndexOpts {
|
|
2
|
+
/** owner/repo (used for both the URL and the `slug` in RepositoryProfile). */
|
|
3
|
+
targetRepo: string;
|
|
4
|
+
/** Override the parent dir for tests. Defaults to os.tmpdir(). */
|
|
5
|
+
parentDir?: string;
|
|
6
|
+
/** Branch / tag / SHA to check out after clone. Default: repo default branch. */
|
|
7
|
+
ref?: string;
|
|
8
|
+
/**
|
|
9
|
+
* Test injection: override the clone source URL (e.g. a `file://` path
|
|
10
|
+
* pointing at a local repo). When set, replaces the default
|
|
11
|
+
* `https://github.com/<targetRepo>.git`.
|
|
12
|
+
*/
|
|
13
|
+
originUrl?: string;
|
|
14
|
+
}
|
|
15
|
+
export interface FileInventory {
|
|
16
|
+
totalFiles: number;
|
|
17
|
+
totalBytes: number;
|
|
18
|
+
byExtension: Record<string, number>;
|
|
19
|
+
/** Manifest filenames present at repo root (lowercased). */
|
|
20
|
+
rootManifests: string[];
|
|
21
|
+
/** Top-level entries (directories + files at depth 1). */
|
|
22
|
+
topLevelEntries: string[];
|
|
23
|
+
/** Lightly-walked list of source files (cap-applied — we don't enumerate everything). */
|
|
24
|
+
sourceFiles: string[];
|
|
25
|
+
}
|
|
26
|
+
export interface CloneAndIndexResult {
|
|
27
|
+
/** Absolute path to the cloned repo on disk. Caller is responsible for cleanup. */
|
|
28
|
+
cloneDir: string;
|
|
29
|
+
cloneSha: string;
|
|
30
|
+
inventory: FileInventory;
|
|
31
|
+
}
|
|
32
|
+
export declare function cloneAndIndex(opts: CloneAndIndexOpts): CloneAndIndexResult;
|