@devport-kr/portki 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +11 -0
- package/AGENTS.md +205 -0
- package/CLAUDE.md +174 -0
- package/README.md +144 -0
- package/dist/cli.cjs +6 -0
- package/dist/cli.js +4465 -0
- package/package.json +60 -0
package/dist/cli.js
ADDED
|
@@ -0,0 +1,4465 @@
|
|
|
1
|
+
// src/agent.ts
|
|
2
|
+
import path13 from "node:path";
|
|
3
|
+
import { mkdir as mkdir2, writeFile as writeFile2, readFile as readFile2 } from "node:fs/promises";
|
|
4
|
+
import { pathToFileURL } from "node:url";
|
|
5
|
+
|
|
6
|
+
// src/shared/load-env.ts
|
|
7
|
+
import { readFileSync } from "node:fs";
|
|
8
|
+
import path from "node:path";
|
|
9
|
+
var DEFAULT_ENV_FILES = [".env.local", ".env"];
|
|
10
|
+
function loadEnvFiles(options = {}) {
|
|
11
|
+
const cwd = options.cwd ?? process.cwd();
|
|
12
|
+
const files = options.files ?? [...DEFAULT_ENV_FILES];
|
|
13
|
+
const loadedFiles = [];
|
|
14
|
+
const loadedKeys = [];
|
|
15
|
+
for (const file of files) {
|
|
16
|
+
const absolute = path.resolve(cwd, file);
|
|
17
|
+
let source;
|
|
18
|
+
try {
|
|
19
|
+
source = readFileSync(absolute, "utf8");
|
|
20
|
+
} catch {
|
|
21
|
+
continue;
|
|
22
|
+
}
|
|
23
|
+
for (const line of source.split(/\r?\n/)) {
|
|
24
|
+
const entry = parseEnvLine(line);
|
|
25
|
+
if (!entry) {
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
if (process.env[entry.key] !== void 0) {
|
|
29
|
+
continue;
|
|
30
|
+
}
|
|
31
|
+
process.env[entry.key] = entry.value;
|
|
32
|
+
loadedKeys.push(entry.key);
|
|
33
|
+
}
|
|
34
|
+
loadedFiles.push(absolute);
|
|
35
|
+
}
|
|
36
|
+
return {
|
|
37
|
+
loadedFiles,
|
|
38
|
+
loadedKeys
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
function parseEnvLine(line) {
|
|
42
|
+
const trimmed = line.trim();
|
|
43
|
+
if (!trimmed || trimmed.startsWith("#")) {
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
const withoutExport = trimmed.startsWith("export ") ? trimmed.slice(7).trim() : trimmed;
|
|
47
|
+
const separatorIndex = withoutExport.indexOf("=");
|
|
48
|
+
if (separatorIndex <= 0) {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
const key = withoutExport.slice(0, separatorIndex).trim();
|
|
52
|
+
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(key)) {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
const rawValue = withoutExport.slice(separatorIndex + 1).trim();
|
|
56
|
+
const value = unquote(rawValue);
|
|
57
|
+
return { key, value };
|
|
58
|
+
}
|
|
59
|
+
function unquote(value) {
|
|
60
|
+
if (value.length >= 2 && value.startsWith('"') && value.endsWith('"')) {
|
|
61
|
+
return value.slice(1, -1).replace(/\\n/g, "\n").replace(/\\r/g, "\r");
|
|
62
|
+
}
|
|
63
|
+
if (value.length >= 2 && value.startsWith("'") && value.endsWith("'")) {
|
|
64
|
+
return value.slice(1, -1);
|
|
65
|
+
}
|
|
66
|
+
return value;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// src/ingestion/run.ts
|
|
70
|
+
import { createHash as createHash4 } from "node:crypto";
|
|
71
|
+
|
|
72
|
+
// src/ingestion/types.ts
|
|
73
|
+
import { z } from "zod";
|
|
74
|
+
var repoRefInputSchema = z.object({
|
|
75
|
+
repo: z.string().trim().min(1),
|
|
76
|
+
ref: z.string().trim().min(1).optional()
|
|
77
|
+
}).strict();
|
|
78
|
+
var resolvedIngestRefSchema = z.object({
|
|
79
|
+
repo_full_name: z.string().trim().min(1),
|
|
80
|
+
owner: z.string().trim().min(1),
|
|
81
|
+
repo: z.string().trim().min(1),
|
|
82
|
+
requested_ref: z.string().trim().min(1).nullable(),
|
|
83
|
+
requested_ref_type: z.enum(["branch", "sha", "default"]),
|
|
84
|
+
resolved_ref: z.string().trim().min(1),
|
|
85
|
+
commit_sha: z.string().trim().min(4),
|
|
86
|
+
resolved_via: z.string().trim().min(1),
|
|
87
|
+
source_default_branch: z.string().trim().min(1),
|
|
88
|
+
resolved_at: z.string().datetime()
|
|
89
|
+
}).strict();
|
|
90
|
+
var ingestRunInputSchema = z.object({
|
|
91
|
+
repo_ref: repoRefInputSchema,
|
|
92
|
+
force_rebuild: z.boolean().default(false),
|
|
93
|
+
snapshot_root: z.string().trim().min(1).default("devport-output/snapshots"),
|
|
94
|
+
now: z.function().args().returns(z.string()).optional(),
|
|
95
|
+
fixture_commit: z.string().trim().min(4).optional()
|
|
96
|
+
}).strict();
|
|
97
|
+
var treeSummarySchema = z.object({
|
|
98
|
+
total_files: z.number().int().nonnegative(),
|
|
99
|
+
total_directories: z.number().int().nonnegative(),
|
|
100
|
+
max_depth: z.number().int().nonnegative(),
|
|
101
|
+
by_extension: z.record(z.string(), z.number().int().nonnegative())
|
|
102
|
+
}).strict();
|
|
103
|
+
var languageMixSchema = z.record(z.string().trim().min(1), z.number().nonnegative()).default({});
|
|
104
|
+
var ingestMetadataSchema = z.object({
|
|
105
|
+
tree_summary: treeSummarySchema,
|
|
106
|
+
language_mix: languageMixSchema,
|
|
107
|
+
key_paths: z.array(z.string().trim().min(1)),
|
|
108
|
+
files_scanned: z.number().int().nonnegative(),
|
|
109
|
+
total_bytes: z.number().int().nonnegative(),
|
|
110
|
+
manifest_signature: z.string().trim().min(1)
|
|
111
|
+
}).strict();
|
|
112
|
+
var ingestRunArtifactSchema = z.object({
|
|
113
|
+
ingest_run_id: z.string().trim().min(1),
|
|
114
|
+
repo_ref: z.string().trim().min(1),
|
|
115
|
+
requested_ref: z.string().trim().min(1).nullable(),
|
|
116
|
+
resolved_ref: z.string().trim().min(1),
|
|
117
|
+
commit_sha: z.string().trim().min(4),
|
|
118
|
+
snapshot_path: z.string().trim().min(1),
|
|
119
|
+
snapshot_id: z.string().trim().min(1),
|
|
120
|
+
manifest_signature: z.string().trim().min(1),
|
|
121
|
+
files_scanned: z.number().int().nonnegative(),
|
|
122
|
+
idempotent_hit: z.boolean(),
|
|
123
|
+
metadata: ingestMetadataSchema,
|
|
124
|
+
trend_artifacts: z.object({
|
|
125
|
+
window_days: z.number().int().positive(),
|
|
126
|
+
releases_path: z.string().trim().min(1),
|
|
127
|
+
tags_path: z.string().trim().min(1),
|
|
128
|
+
changelog_summary_path: z.string().trim().min(1),
|
|
129
|
+
release_count: z.number().int().nonnegative(),
|
|
130
|
+
tag_count: z.number().int().nonnegative()
|
|
131
|
+
}).strict().optional(),
|
|
132
|
+
official_docs: z.object({
|
|
133
|
+
index_path: z.string().trim().min(1),
|
|
134
|
+
discovered_count: z.number().int().nonnegative(),
|
|
135
|
+
mirrored_count: z.number().int().nonnegative()
|
|
136
|
+
}).strict().optional(),
|
|
137
|
+
created_at: z.string().datetime(),
|
|
138
|
+
completed_at: z.string().datetime(),
|
|
139
|
+
ingest_ms: z.number().int().nonnegative()
|
|
140
|
+
}).strict();
|
|
141
|
+
|
|
142
|
+
// src/ingestion/snapshot.ts
|
|
143
|
+
import { createHash as createHash2 } from "node:crypto";
|
|
144
|
+
import { promises as fs } from "node:fs";
|
|
145
|
+
import { existsSync } from "node:fs";
|
|
146
|
+
import { spawn } from "node:child_process";
|
|
147
|
+
import path2 from "node:path";
|
|
148
|
+
|
|
149
|
+
// src/ingestion/ref.ts
|
|
150
|
+
import { createHash } from "node:crypto";
|
|
151
|
+
var ALLOWED_SEGMENT = /^(?:[a-zA-Z0-9._-]+)$/;
|
|
152
|
+
var SHA_7 = /^[a-fA-F0-9]{7}$/;
|
|
153
|
+
var SHA_40 = /^[a-fA-F0-9]{40}$/;
|
|
154
|
+
function parseRepoRef(input) {
|
|
155
|
+
const trimmed = input.trim();
|
|
156
|
+
const atSplit = trimmed.split("@");
|
|
157
|
+
if (atSplit.length > 2) {
|
|
158
|
+
throw new Error("Invalid repository reference format");
|
|
159
|
+
}
|
|
160
|
+
const repoPart = atSplit[0];
|
|
161
|
+
const refPart = atSplit[1];
|
|
162
|
+
if (!repoPart) {
|
|
163
|
+
throw new Error("Missing repository path");
|
|
164
|
+
}
|
|
165
|
+
const repoSegments = repoPart.split("/");
|
|
166
|
+
if (repoSegments.length !== 2) {
|
|
167
|
+
throw new Error("Repository must be in owner/repo format");
|
|
168
|
+
}
|
|
169
|
+
const [owner, repo] = repoSegments;
|
|
170
|
+
if (!ALLOWED_SEGMENT.test(owner) || !ALLOWED_SEGMENT.test(repo)) {
|
|
171
|
+
throw new Error("Invalid owner/repo characters");
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
repo_full_name: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
175
|
+
owner: owner.toLowerCase(),
|
|
176
|
+
repo: repo.toLowerCase(),
|
|
177
|
+
requested_ref: refPart && refPart.trim().length ? refPart.trim() : null
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
function normalizeRef(raw) {
|
|
181
|
+
if (!raw) {
|
|
182
|
+
return null;
|
|
183
|
+
}
|
|
184
|
+
const trimmed = raw.trim();
|
|
185
|
+
if (!trimmed.length) {
|
|
186
|
+
return null;
|
|
187
|
+
}
|
|
188
|
+
if (trimmed.startsWith("refs/heads/")) {
|
|
189
|
+
return trimmed.replace("refs/heads/", "");
|
|
190
|
+
}
|
|
191
|
+
return trimmed;
|
|
192
|
+
}
|
|
193
|
+
function isLikelyCommitSha(value) {
|
|
194
|
+
return SHA_7.test(value) || SHA_40.test(value);
|
|
195
|
+
}
|
|
196
|
+
function inferRefType(normalizedRef) {
|
|
197
|
+
if (!normalizedRef) {
|
|
198
|
+
return "default";
|
|
199
|
+
}
|
|
200
|
+
return isLikelyCommitSha(normalizedRef) ? "sha" : "branch";
|
|
201
|
+
}
|
|
202
|
+
function formatIngestKey({ owner, repo, commitSha }) {
|
|
203
|
+
const normalized = `${owner.toLowerCase()}-${repo.toLowerCase()}-${commitSha.toLowerCase()}`;
|
|
204
|
+
return createHash("sha1").update(normalized).digest("hex");
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// src/ingestion/snapshot.ts
|
|
208
|
+
var MANIFEST_FILE_NAME = "snapshot-manifest.json";
|
|
209
|
+
var GIT_DIRECTORY = ".git";
|
|
210
|
+
function normalizeRelativePath(raw) {
|
|
211
|
+
return path2.relative(".", raw).replace(/\\/g, "/").replace(/^\.\//, "");
|
|
212
|
+
}
|
|
213
|
+
function isManifestFile(entryPath) {
|
|
214
|
+
return path2.basename(entryPath) === MANIFEST_FILE_NAME;
|
|
215
|
+
}
|
|
216
|
+
function sortEntries(entries) {
|
|
217
|
+
return [...entries].sort((left, right) => left.path.localeCompare(right.path));
|
|
218
|
+
}
|
|
219
|
+
function hashFile(filePath) {
|
|
220
|
+
return fs.readFile(filePath).then((content) => {
|
|
221
|
+
return createHash2("sha1").update(content).digest("hex");
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
async function walkFiles(root, current, entries) {
|
|
225
|
+
const absolute = path2.join(root, current);
|
|
226
|
+
const normalizedCurrent = normalizeRelativePath(current);
|
|
227
|
+
if (isManifestFile(normalizedCurrent)) {
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
const stat = await fs.lstat(absolute);
|
|
231
|
+
if (stat.isSymbolicLink()) {
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
if (stat.isDirectory()) {
|
|
235
|
+
if (normalizedCurrent === GIT_DIRECTORY) {
|
|
236
|
+
return;
|
|
237
|
+
}
|
|
238
|
+
if (path2.basename(absolute) === GIT_DIRECTORY) {
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
const items = await fs.readdir(absolute, { withFileTypes: true });
|
|
242
|
+
const sorted = items.sort((left, right) => left.name.localeCompare(right.name));
|
|
243
|
+
for (const item of sorted) {
|
|
244
|
+
if (item.name === GIT_DIRECTORY) {
|
|
245
|
+
continue;
|
|
246
|
+
}
|
|
247
|
+
if (item.isDirectory()) {
|
|
248
|
+
await walkFiles(root, path2.join(current, item.name), entries);
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
if (item.isFile()) {
|
|
252
|
+
const fileAbsolute = path2.join(root, current, item.name);
|
|
253
|
+
const fileStat = await fs.stat(fileAbsolute);
|
|
254
|
+
entries.push({
|
|
255
|
+
path: normalizeRelativePath(path2.join(current, item.name)),
|
|
256
|
+
bytes: fileStat.size,
|
|
257
|
+
hash: await hashFile(fileAbsolute)
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
if (stat.isFile()) {
|
|
264
|
+
entries.push({
|
|
265
|
+
path: path2.relative(root, absolute).replace(/\\/g, "/"),
|
|
266
|
+
bytes: stat.size,
|
|
267
|
+
hash: await hashFile(absolute)
|
|
268
|
+
});
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
function manifestDigest(entries) {
|
|
272
|
+
const normalized = sortEntries(entries);
|
|
273
|
+
const hash = createHash2("sha1");
|
|
274
|
+
for (const entry of normalized) {
|
|
275
|
+
hash.update(`${entry.path}:`);
|
|
276
|
+
hash.update(String(entry.bytes));
|
|
277
|
+
hash.update(`:${entry.hash}`);
|
|
278
|
+
hash.update("\n");
|
|
279
|
+
}
|
|
280
|
+
return hash.digest("hex");
|
|
281
|
+
}
|
|
282
|
+
async function runGitCommand(args, cwd) {
|
|
283
|
+
await new Promise((resolve, reject) => {
|
|
284
|
+
const processRef = spawn("git", args, {
|
|
285
|
+
cwd,
|
|
286
|
+
stdio: ["ignore", "ignore", "ignore"]
|
|
287
|
+
});
|
|
288
|
+
processRef.once("error", reject);
|
|
289
|
+
processRef.once("exit", (code) => {
|
|
290
|
+
if (code === 0) {
|
|
291
|
+
resolve();
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
294
|
+
reject(new Error(`git command failed: git ${args.join(" ")}`));
|
|
295
|
+
});
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
var defaultGitShell = {
|
|
299
|
+
async materialize({ repoFullName, snapshotPath }) {
|
|
300
|
+
const repoUrl = `https://github.com/${repoFullName}.git`;
|
|
301
|
+
await runGitCommand(["clone", "--depth", "1", "--quiet", repoUrl, snapshotPath], process.cwd());
|
|
302
|
+
}
|
|
303
|
+
};
|
|
304
|
+
var LocalSourceShell = class {
|
|
305
|
+
constructor(sourcePath) {
|
|
306
|
+
this.sourcePath = sourcePath;
|
|
307
|
+
}
|
|
308
|
+
async materialize({ snapshotPath }) {
|
|
309
|
+
await fs.rm(snapshotPath, { recursive: true, force: true });
|
|
310
|
+
await fs.cp(this.sourcePath, snapshotPath, { recursive: true, force: true });
|
|
311
|
+
}
|
|
312
|
+
};
|
|
313
|
+
async function readManifest(snapshotPath) {
|
|
314
|
+
const manifestPath = path2.join(snapshotPath, MANIFEST_FILE_NAME);
|
|
315
|
+
try {
|
|
316
|
+
const raw = await fs.readFile(manifestPath, "utf8");
|
|
317
|
+
return JSON.parse(raw);
|
|
318
|
+
} catch (error) {
|
|
319
|
+
if (error.code === "ENOENT") {
|
|
320
|
+
return null;
|
|
321
|
+
}
|
|
322
|
+
return null;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
async function buildManifest(request, now, snapshotPath) {
|
|
326
|
+
const files = [];
|
|
327
|
+
await walkFiles(snapshotPath, ".", files);
|
|
328
|
+
const filtered = files.filter((entry) => !isManifestFile(entry.path));
|
|
329
|
+
const signature = manifestDigest(filtered);
|
|
330
|
+
const fileCount = filtered.length;
|
|
331
|
+
const totalBytes2 = filtered.reduce((sum, entry) => sum + entry.bytes, 0);
|
|
332
|
+
return {
|
|
333
|
+
repo_full_name: request.repoFullName,
|
|
334
|
+
owner: request.owner,
|
|
335
|
+
repo: request.repo,
|
|
336
|
+
commit_sha: request.commitSha,
|
|
337
|
+
resolved_ref: request.resolvedRef,
|
|
338
|
+
source_ref: request.sourceRef || request.resolvedRef,
|
|
339
|
+
source_default_branch: request.sourceDefaultBranch,
|
|
340
|
+
file_count: fileCount,
|
|
341
|
+
total_bytes: totalBytes2,
|
|
342
|
+
manifest_signature: signature,
|
|
343
|
+
created_at: now()
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
async function pruneOldSnapshots(repoDir, keepId) {
|
|
347
|
+
let entries;
|
|
348
|
+
try {
|
|
349
|
+
entries = await fs.readdir(repoDir, { withFileTypes: true });
|
|
350
|
+
} catch {
|
|
351
|
+
return;
|
|
352
|
+
}
|
|
353
|
+
for (const entry of entries) {
|
|
354
|
+
if (entry.isDirectory() && entry.name !== keepId) {
|
|
355
|
+
await fs.rm(path2.join(repoDir, entry.name), { recursive: true, force: true });
|
|
356
|
+
process.stderr.write(` [snapshot] pruned old snapshot: ${entry.name}
|
|
357
|
+
`);
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
var RepoSnapshotManager = class {
|
|
362
|
+
constructor(config) {
|
|
363
|
+
this.config = config;
|
|
364
|
+
this.now = config.now || (() => (/* @__PURE__ */ new Date()).toISOString());
|
|
365
|
+
this.forceRebuild = Boolean(config.forceRebuild);
|
|
366
|
+
this.rootPath = path2.resolve(config.snapshotRoot || "devport-output/snapshots");
|
|
367
|
+
this.gitShell = config.gitShell || (config.sourcePath ? new LocalSourceShell(config.sourcePath) : defaultGitShell);
|
|
368
|
+
}
|
|
369
|
+
now;
|
|
370
|
+
forceRebuild;
|
|
371
|
+
rootPath;
|
|
372
|
+
gitShell;
|
|
373
|
+
async createSnapshot(request) {
|
|
374
|
+
const snapshotId = formatIngestKey({
|
|
375
|
+
owner: request.owner,
|
|
376
|
+
repo: request.repo,
|
|
377
|
+
commitSha: request.commitSha
|
|
378
|
+
});
|
|
379
|
+
const repoDir = path2.join(this.rootPath, request.owner, request.repo);
|
|
380
|
+
await pruneOldSnapshots(repoDir, snapshotId);
|
|
381
|
+
const snapshotPath = path2.join(repoDir, snapshotId);
|
|
382
|
+
const manifestPath = path2.join(snapshotPath, MANIFEST_FILE_NAME);
|
|
383
|
+
const existing = existsSync(snapshotPath) ? await readManifest(snapshotPath) : null;
|
|
384
|
+
if (!this.forceRebuild && existing) {
|
|
385
|
+
const files = [];
|
|
386
|
+
await walkFiles(snapshotPath, ".", files);
|
|
387
|
+
const filtered = files.filter((entry) => !isManifestFile(entry.path));
|
|
388
|
+
const currentSignature = manifestDigest(filtered);
|
|
389
|
+
if (existing.commit_sha === request.commitSha && existing.manifest_signature === currentSignature) {
|
|
390
|
+
return {
|
|
391
|
+
snapshotPath,
|
|
392
|
+
snapshotId,
|
|
393
|
+
manifest: existing,
|
|
394
|
+
idempotentHit: true
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
await fs.rm(snapshotPath, { recursive: true, force: true });
|
|
399
|
+
await this.gitShell.materialize({
|
|
400
|
+
repoFullName: request.repoFullName,
|
|
401
|
+
commitSha: request.commitSha,
|
|
402
|
+
snapshotPath
|
|
403
|
+
});
|
|
404
|
+
const manifest = await buildManifest(request, this.now, snapshotPath);
|
|
405
|
+
await fs.writeFile(manifestPath, `${JSON.stringify(manifest, null, 2)}
|
|
406
|
+
`, "utf8");
|
|
407
|
+
return {
|
|
408
|
+
snapshotPath,
|
|
409
|
+
snapshotId,
|
|
410
|
+
manifest,
|
|
411
|
+
idempotentHit: false
|
|
412
|
+
};
|
|
413
|
+
}
|
|
414
|
+
};
|
|
415
|
+
|
|
416
|
+
// src/ingestion/metadata.ts
|
|
417
|
+
import { promises as fs2 } from "node:fs";
|
|
418
|
+
import path3 from "node:path";
|
|
419
|
+
var DEFAULT_MAX_KEY_PATHS = 120;
|
|
420
|
+
var DEFAULT_MAX_EXTENSION_BUCKETS = 64;
|
|
421
|
+
var DEFAULT_MAX_LANGUAGE_BUCKETS = 32;
|
|
422
|
+
var NO_EXTENSION_LABEL = "[no_ext]";
|
|
423
|
+
var KEY_PATH_HINTS = [
|
|
424
|
+
/^readme/i,
|
|
425
|
+
/^license/i,
|
|
426
|
+
/^changelog/i,
|
|
427
|
+
/^dockerfile/i,
|
|
428
|
+
/^package\.json$/i,
|
|
429
|
+
/^pnpm-lock\.yaml$/i,
|
|
430
|
+
/^yarn\.lock$/i,
|
|
431
|
+
/^package-lock\.json$/i,
|
|
432
|
+
/^tsconfig\.json$/i,
|
|
433
|
+
/^vite\.config\./i,
|
|
434
|
+
/^jest\.config\./i,
|
|
435
|
+
/^next\.config\./i,
|
|
436
|
+
/^turbo\.json$/i,
|
|
437
|
+
/^go\.mod$/i,
|
|
438
|
+
/^pyproject\.toml$/i,
|
|
439
|
+
/^Cargo\.toml$/i,
|
|
440
|
+
/^requirements\.txt$/i,
|
|
441
|
+
/^Pipfile$/i,
|
|
442
|
+
/^pom\.xml$/i,
|
|
443
|
+
/^build\.gradle$/i,
|
|
444
|
+
/^gradle\.properties$/i,
|
|
445
|
+
/^docker-compose\.ya?ml$/i,
|
|
446
|
+
/^mkdocs\.ya?ml$/i,
|
|
447
|
+
/^Makefile$/i
|
|
448
|
+
].map((entry) => new RegExp(entry));
|
|
449
|
+
function normalizePath(relativePath) {
|
|
450
|
+
return relativePath.replace(/\\/g, "/");
|
|
451
|
+
}
|
|
452
|
+
function normalizeExtension(rawPath) {
|
|
453
|
+
const extension = path3.extname(rawPath).toLowerCase().replace(/^\./, "");
|
|
454
|
+
return extension || NO_EXTENSION_LABEL;
|
|
455
|
+
}
|
|
456
|
+
function truncateSortedObject(input, limit) {
|
|
457
|
+
const sortedKeys = Object.keys(input).sort();
|
|
458
|
+
const bounded = sortedKeys.slice(0, Math.max(0, limit));
|
|
459
|
+
return bounded.reduce((acc, key) => {
|
|
460
|
+
acc[key] = input[key];
|
|
461
|
+
return acc;
|
|
462
|
+
}, {});
|
|
463
|
+
}
|
|
464
|
+
function extensionToLanguage(extension) {
|
|
465
|
+
const ext = extension.toLowerCase();
|
|
466
|
+
const map = {
|
|
467
|
+
ts: "TypeScript",
|
|
468
|
+
tsx: "TypeScript",
|
|
469
|
+
js: "JavaScript",
|
|
470
|
+
jsx: "JavaScript",
|
|
471
|
+
mjs: "JavaScript",
|
|
472
|
+
cjs: "JavaScript",
|
|
473
|
+
py: "Python",
|
|
474
|
+
rb: "Ruby",
|
|
475
|
+
rs: "Rust",
|
|
476
|
+
go: "Go",
|
|
477
|
+
java: "Java",
|
|
478
|
+
kt: "Kotlin",
|
|
479
|
+
kts: "Kotlin",
|
|
480
|
+
cs: "C#",
|
|
481
|
+
cpp: "C++",
|
|
482
|
+
cc: "C++",
|
|
483
|
+
cxx: "C++",
|
|
484
|
+
h: "C",
|
|
485
|
+
c: "C",
|
|
486
|
+
md: "Markdown",
|
|
487
|
+
mdx: "Markdown",
|
|
488
|
+
yaml: "YAML",
|
|
489
|
+
yml: "YAML",
|
|
490
|
+
toml: "TOML",
|
|
491
|
+
json: "JSON",
|
|
492
|
+
xml: "XML",
|
|
493
|
+
sh: "Shell",
|
|
494
|
+
bash: "Shell",
|
|
495
|
+
dockerfile: "Dockerfile",
|
|
496
|
+
php: "PHP",
|
|
497
|
+
swift: "Swift",
|
|
498
|
+
scala: "Scala",
|
|
499
|
+
dart: "Dart"
|
|
500
|
+
};
|
|
501
|
+
return map[ext] ?? "Other";
|
|
502
|
+
}
|
|
503
|
+
function collectTree(rootPath) {
|
|
504
|
+
const entries = [];
|
|
505
|
+
const skipDirs = /* @__PURE__ */ new Set([".git", ".husky", "node_modules", "dist", ".next", "target"]);
|
|
506
|
+
const walk = async (relative) => {
|
|
507
|
+
const absolute = path3.join(rootPath, relative);
|
|
508
|
+
const dirents = await fs2.readdir(absolute, { withFileTypes: true });
|
|
509
|
+
const sorted = dirents.sort((left, right) => left.name.localeCompare(right.name));
|
|
510
|
+
for (const dirent of sorted) {
|
|
511
|
+
if (dirent.name === "snapshot-manifest.json") {
|
|
512
|
+
continue;
|
|
513
|
+
}
|
|
514
|
+
if (dirent.isSymbolicLink()) {
|
|
515
|
+
continue;
|
|
516
|
+
}
|
|
517
|
+
if (dirent.isDirectory()) {
|
|
518
|
+
if (skipDirs.has(dirent.name.toLowerCase())) {
|
|
519
|
+
continue;
|
|
520
|
+
}
|
|
521
|
+
await walk(path3.join(relative, dirent.name));
|
|
522
|
+
continue;
|
|
523
|
+
}
|
|
524
|
+
if (dirent.isFile()) {
|
|
525
|
+
const filePath = path3.join(relative, dirent.name);
|
|
526
|
+
const stat = await fs2.stat(path3.join(rootPath, filePath));
|
|
527
|
+
entries.push({ path: normalizePath(filePath), bytes: stat.size });
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
};
|
|
531
|
+
return walk(".").then(() => entries);
|
|
532
|
+
}
|
|
533
|
+
function calculateSummary(entries, extensionBucketLimit) {
|
|
534
|
+
const byExtension = {};
|
|
535
|
+
let totalDirectories = 0;
|
|
536
|
+
let maxDepth = 0;
|
|
537
|
+
for (const entry of entries) {
|
|
538
|
+
const extension = normalizeExtension(entry.path);
|
|
539
|
+
byExtension[extension] = (byExtension[extension] ?? 0) + 1;
|
|
540
|
+
const segments = entry.path.split("/");
|
|
541
|
+
totalDirectories += segments.length > 1 ? segments.length - 1 : 0;
|
|
542
|
+
maxDepth = Math.max(maxDepth, segments.length);
|
|
543
|
+
}
|
|
544
|
+
const maxTotalDirectories = entries.length > 0 ? totalDirectories : 0;
|
|
545
|
+
return {
|
|
546
|
+
total_files: entries.length,
|
|
547
|
+
total_directories: maxTotalDirectories,
|
|
548
|
+
max_depth: maxDepth,
|
|
549
|
+
by_extension: truncateSortedObject(byExtension, extensionBucketLimit)
|
|
550
|
+
};
|
|
551
|
+
}
|
|
552
|
+
function buildKeyPaths(entries, maxCount) {
|
|
553
|
+
const keyPaths = /* @__PURE__ */ new Set();
|
|
554
|
+
const seenDirectories = /* @__PURE__ */ new Set();
|
|
555
|
+
for (const entry of entries) {
|
|
556
|
+
const basename = path3.basename(entry.path);
|
|
557
|
+
if (!basename || basename.startsWith(".")) {
|
|
558
|
+
continue;
|
|
559
|
+
}
|
|
560
|
+
const depthSegments = entry.path.split("/");
|
|
561
|
+
if (depthSegments.length > 1) {
|
|
562
|
+
seenDirectories.add(depthSegments[0]);
|
|
563
|
+
}
|
|
564
|
+
if (KEY_PATH_HINTS.some((pattern) => pattern.test(basename))) {
|
|
565
|
+
keyPaths.add(entry.path);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
for (const dir of ["src", "packages", "apps", "docs", "examples", "test", "tests", "scripts", "tools", "config"]) {
|
|
569
|
+
if (seenDirectories.has(dir)) {
|
|
570
|
+
keyPaths.add(dir);
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
return Array.from(keyPaths).sort().slice(0, maxCount);
|
|
574
|
+
}
|
|
575
|
+
function deriveLanguageMix(entries, manifestLanguageMix, languageBucketLimit) {
|
|
576
|
+
if (manifestLanguageMix && Object.keys(manifestLanguageMix).length > 0) {
|
|
577
|
+
const sanitized = Object.entries(manifestLanguageMix).reduce((acc, [name, bytes]) => {
|
|
578
|
+
if (typeof name !== "string" || !name.trim().length) {
|
|
579
|
+
return acc;
|
|
580
|
+
}
|
|
581
|
+
if (!Number.isFinite(bytes) || bytes < 0) {
|
|
582
|
+
return acc;
|
|
583
|
+
}
|
|
584
|
+
acc[name] = bytes;
|
|
585
|
+
return acc;
|
|
586
|
+
}, {});
|
|
587
|
+
return truncateSortedObject(sanitized, languageBucketLimit);
|
|
588
|
+
}
|
|
589
|
+
const byLanguage = {};
|
|
590
|
+
for (const entry of entries) {
|
|
591
|
+
const language = extensionToLanguage(normalizeExtension(entry.path));
|
|
592
|
+
byLanguage[language] = (byLanguage[language] ?? 0) + entry.bytes;
|
|
593
|
+
}
|
|
594
|
+
return truncateSortedObject(byLanguage, languageBucketLimit);
|
|
595
|
+
}
|
|
596
|
+
function totalBytes(entries) {
|
|
597
|
+
return entries.reduce((sum, entry) => sum + entry.bytes, 0);
|
|
598
|
+
}
|
|
599
|
+
async function extractMetadata(snapshotPath, options) {
|
|
600
|
+
const files = await collectTree(snapshotPath);
|
|
601
|
+
const summary = calculateSummary(
|
|
602
|
+
files,
|
|
603
|
+
options.extensionBucketLimit ?? DEFAULT_MAX_EXTENSION_BUCKETS
|
|
604
|
+
);
|
|
605
|
+
const keyPaths = buildKeyPaths(files, options.keyPathLimit ?? DEFAULT_MAX_KEY_PATHS);
|
|
606
|
+
const languageMix = deriveLanguageMix(
|
|
607
|
+
files,
|
|
608
|
+
options.languageMix,
|
|
609
|
+
options.languageBucketLimit ?? DEFAULT_MAX_LANGUAGE_BUCKETS
|
|
610
|
+
);
|
|
611
|
+
return {
|
|
612
|
+
tree_summary: {
|
|
613
|
+
total_files: summary.total_files,
|
|
614
|
+
total_directories: summary.total_directories,
|
|
615
|
+
max_depth: summary.max_depth,
|
|
616
|
+
by_extension: summary.by_extension
|
|
617
|
+
},
|
|
618
|
+
language_mix: languageMix,
|
|
619
|
+
key_paths: keyPaths,
|
|
620
|
+
files_scanned: files.length,
|
|
621
|
+
total_bytes: totalBytes(files),
|
|
622
|
+
manifest_signature: options.manifestSignature
|
|
623
|
+
};
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// src/ingestion/github.ts
|
|
627
|
+
import { Octokit } from "@octokit/rest";
|
|
628
|
+
var IngestResolverError = class extends Error {
|
|
629
|
+
constructor(message, code, detail) {
|
|
630
|
+
super(message);
|
|
631
|
+
this.code = code;
|
|
632
|
+
this.detail = detail;
|
|
633
|
+
this.name = "IngestResolverError";
|
|
634
|
+
}
|
|
635
|
+
};
|
|
636
|
+
function normalizeGitHubRepoIdentityForResolver(rawRepoRef) {
|
|
637
|
+
const parsed = parseRepoRef(rawRepoRef);
|
|
638
|
+
if (parsed.requested_ref !== null) {
|
|
639
|
+
throw new Error("repo_ref must not include @ref for freshness detection");
|
|
640
|
+
}
|
|
641
|
+
return {
|
|
642
|
+
owner: parsed.owner,
|
|
643
|
+
repo: parsed.repo,
|
|
644
|
+
repo_ref: parsed.repo_full_name
|
|
645
|
+
};
|
|
646
|
+
}
|
|
647
|
+
var OctokitGitHubResolver = class {
|
|
648
|
+
octokit;
|
|
649
|
+
constructor(token) {
|
|
650
|
+
this.octokit = new Octokit(
|
|
651
|
+
token ? {
|
|
652
|
+
auth: token
|
|
653
|
+
} : void 0
|
|
654
|
+
);
|
|
655
|
+
}
|
|
656
|
+
async getRepositoryMeta(repo) {
|
|
657
|
+
try {
|
|
658
|
+
const response = await this.octokit.rest.repos.get({
|
|
659
|
+
owner: repo.owner,
|
|
660
|
+
repo: repo.repo
|
|
661
|
+
});
|
|
662
|
+
return {
|
|
663
|
+
owner: response.data.owner.login.toLowerCase(),
|
|
664
|
+
repo: response.data.name.toLowerCase(),
|
|
665
|
+
full_name: response.data.full_name.toLowerCase(),
|
|
666
|
+
default_branch: response.data.default_branch,
|
|
667
|
+
homepage_url: response.data.homepage ?? null
|
|
668
|
+
};
|
|
669
|
+
} catch (error) {
|
|
670
|
+
if (isHttpError(error, 404)) {
|
|
671
|
+
throw new IngestResolverError(
|
|
672
|
+
`Repository not found: ${repo.repo_full_name}`,
|
|
673
|
+
"REPO_NOT_FOUND",
|
|
674
|
+
error
|
|
675
|
+
);
|
|
676
|
+
}
|
|
677
|
+
throw new IngestResolverError(
|
|
678
|
+
`Failed to load repository metadata: ${repo.repo_full_name}`,
|
|
679
|
+
"INVALID_REPO",
|
|
680
|
+
error
|
|
681
|
+
);
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
async resolveRefToCommit(repo, ref) {
|
|
685
|
+
try {
|
|
686
|
+
const resolved = await this.octokit.rest.repos.getCommit({
|
|
687
|
+
owner: repo.owner,
|
|
688
|
+
repo: repo.repo,
|
|
689
|
+
ref
|
|
690
|
+
});
|
|
691
|
+
return resolved.data.sha;
|
|
692
|
+
} catch (error) {
|
|
693
|
+
if (isHttpError(error, 404)) {
|
|
694
|
+
throw new IngestResolverError(
|
|
695
|
+
`Unable to resolve ref '${ref}' for ${repo.repo_full_name}`,
|
|
696
|
+
"REF_NOT_FOUND",
|
|
697
|
+
error
|
|
698
|
+
);
|
|
699
|
+
}
|
|
700
|
+
throw new IngestResolverError(
|
|
701
|
+
`Failed to resolve ref '${ref}' for ${repo.repo_full_name}`,
|
|
702
|
+
"UNRESOLVED_REF",
|
|
703
|
+
error
|
|
704
|
+
);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
async getRepositoryLanguages(repo) {
|
|
708
|
+
try {
|
|
709
|
+
const response = await this.octokit.rest.repos.listLanguages({
|
|
710
|
+
owner: repo.owner,
|
|
711
|
+
repo: repo.repo
|
|
712
|
+
});
|
|
713
|
+
return response.data;
|
|
714
|
+
} catch {
|
|
715
|
+
return null;
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
async getRepositoryReleases(repo, input) {
|
|
719
|
+
const perPage = Math.max(1, Math.min(100, input.per_page ?? 100));
|
|
720
|
+
const maxItems = Math.max(1, Math.min(500, input.max_items ?? 200));
|
|
721
|
+
const releases = [];
|
|
722
|
+
let page = 1;
|
|
723
|
+
while (releases.length < maxItems) {
|
|
724
|
+
const response = await this.octokit.rest.repos.listReleases({
|
|
725
|
+
owner: repo.owner,
|
|
726
|
+
repo: repo.repo,
|
|
727
|
+
per_page: perPage,
|
|
728
|
+
page
|
|
729
|
+
});
|
|
730
|
+
const list = response.data ?? [];
|
|
731
|
+
if (list.length === 0) {
|
|
732
|
+
break;
|
|
733
|
+
}
|
|
734
|
+
for (const release of list) {
|
|
735
|
+
const publishedAt = release.published_at ?? null;
|
|
736
|
+
if (publishedAt && publishedAt < input.since) {
|
|
737
|
+
continue;
|
|
738
|
+
}
|
|
739
|
+
releases.push({
|
|
740
|
+
id: release.id,
|
|
741
|
+
tag_name: release.tag_name,
|
|
742
|
+
name: release.name ?? null,
|
|
743
|
+
published_at: publishedAt,
|
|
744
|
+
prerelease: Boolean(release.prerelease),
|
|
745
|
+
draft: Boolean(release.draft),
|
|
746
|
+
html_url: release.html_url ?? null
|
|
747
|
+
});
|
|
748
|
+
if (releases.length >= maxItems) {
|
|
749
|
+
break;
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
if (list.length < perPage) {
|
|
753
|
+
break;
|
|
754
|
+
}
|
|
755
|
+
page += 1;
|
|
756
|
+
}
|
|
757
|
+
return releases;
|
|
758
|
+
}
|
|
759
|
+
async getRepositoryTags(repo, input = {}) {
|
|
760
|
+
const perPage = Math.max(1, Math.min(100, input.per_page ?? 100));
|
|
761
|
+
const maxItems = Math.max(1, Math.min(500, input.max_items ?? 200));
|
|
762
|
+
const tags = [];
|
|
763
|
+
let page = 1;
|
|
764
|
+
while (tags.length < maxItems) {
|
|
765
|
+
const response = await this.octokit.rest.repos.listTags({
|
|
766
|
+
owner: repo.owner,
|
|
767
|
+
repo: repo.repo,
|
|
768
|
+
per_page: perPage,
|
|
769
|
+
page
|
|
770
|
+
});
|
|
771
|
+
const list = response.data ?? [];
|
|
772
|
+
if (list.length === 0) {
|
|
773
|
+
break;
|
|
774
|
+
}
|
|
775
|
+
for (const tag of list) {
|
|
776
|
+
tags.push({
|
|
777
|
+
name: tag.name,
|
|
778
|
+
commit_sha: tag.commit?.sha ?? null,
|
|
779
|
+
tarball_url: tag.tarball_url ?? null,
|
|
780
|
+
zipball_url: tag.zipball_url ?? null
|
|
781
|
+
});
|
|
782
|
+
if (tags.length >= maxItems) {
|
|
783
|
+
break;
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
if (list.length < perPage) {
|
|
787
|
+
break;
|
|
788
|
+
}
|
|
789
|
+
page += 1;
|
|
790
|
+
}
|
|
791
|
+
return tags;
|
|
792
|
+
}
|
|
793
|
+
};
|
|
794
|
+
async function resolveToCommitSha(repo, resolver) {
|
|
795
|
+
const normalizedRef = normalizeRef(repo.requested_ref);
|
|
796
|
+
let metadata;
|
|
797
|
+
try {
|
|
798
|
+
metadata = await resolver.getRepositoryMeta(repo);
|
|
799
|
+
} catch (error) {
|
|
800
|
+
if (error instanceof Error) {
|
|
801
|
+
throw error;
|
|
802
|
+
}
|
|
803
|
+
throw new IngestResolverError("Unable to load repository metadata", "INVALID_REPO");
|
|
804
|
+
}
|
|
805
|
+
const refType = inferRefType(normalizedRef);
|
|
806
|
+
const requestedRef = refType === "default" ? metadata.default_branch : normalizedRef;
|
|
807
|
+
try {
|
|
808
|
+
const commitSha = await resolver.resolveRefToCommit(
|
|
809
|
+
{
|
|
810
|
+
...repo,
|
|
811
|
+
requested_ref: normalizedRef
|
|
812
|
+
},
|
|
813
|
+
requestedRef
|
|
814
|
+
);
|
|
815
|
+
return {
|
|
816
|
+
requested_ref: repo.requested_ref,
|
|
817
|
+
requested_ref_type: refType,
|
|
818
|
+
resolved_ref: requestedRef,
|
|
819
|
+
commit_sha: commitSha.toLowerCase(),
|
|
820
|
+
source_default_branch: metadata.default_branch
|
|
821
|
+
};
|
|
822
|
+
} catch (error) {
|
|
823
|
+
if (error instanceof IngestResolverError) {
|
|
824
|
+
if (refType === "default") {
|
|
825
|
+
throw error;
|
|
826
|
+
}
|
|
827
|
+
throw error;
|
|
828
|
+
}
|
|
829
|
+
throw new IngestResolverError(
|
|
830
|
+
`Failed to resolve ref for ${repo.repo_full_name}`,
|
|
831
|
+
"UNRESOLVED_REF",
|
|
832
|
+
error
|
|
833
|
+
);
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
function isHttpError(error, status) {
|
|
837
|
+
return !!(error && typeof error === "object" && "status" in error && error.status === status);
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
// src/ingestion/official-docs.ts
|
|
841
|
+
import { promises as fs4 } from "node:fs";
|
|
842
|
+
import path5 from "node:path";
|
|
843
|
+
|
|
844
|
+
// src/ingestion/trends.ts
|
|
845
|
+
import { createHash as createHash3 } from "node:crypto";
|
|
846
|
+
import { promises as fs3 } from "node:fs";
|
|
847
|
+
import path4 from "node:path";
|
|
848
|
+
var MANIFEST_FILE_NAME2 = "snapshot-manifest.json";
|
|
849
|
+
var TRENDS_DIR = "__devport__/trends";
|
|
850
|
+
var DEFAULT_TREND_WINDOW_DAYS = 180;
|
|
851
|
+
var MAX_RELEASE_ITEMS = 120;
|
|
852
|
+
var MAX_TAG_ITEMS = 120;
|
|
853
|
+
function normalizePath2(value) {
|
|
854
|
+
return value.replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+/g, "/");
|
|
855
|
+
}
|
|
856
|
+
function normalizeWindowDays(raw) {
|
|
857
|
+
const numeric = typeof raw === "number" ? raw : Number(raw);
|
|
858
|
+
if (!Number.isFinite(numeric) || numeric <= 0) {
|
|
859
|
+
return DEFAULT_TREND_WINDOW_DAYS;
|
|
860
|
+
}
|
|
861
|
+
return Math.max(1, Math.min(3650, Math.floor(numeric)));
|
|
862
|
+
}
|
|
863
|
+
function cutoffIso(now, windowDays) {
|
|
864
|
+
const timestamp = new Date(now).getTime();
|
|
865
|
+
if (!Number.isFinite(timestamp)) {
|
|
866
|
+
return new Date(Date.now() - windowDays * 24 * 60 * 60 * 1e3).toISOString();
|
|
867
|
+
}
|
|
868
|
+
return new Date(timestamp - windowDays * 24 * 60 * 60 * 1e3).toISOString();
|
|
869
|
+
}
|
|
870
|
+
async function collectReleases(repo, resolver, since) {
|
|
871
|
+
if (typeof resolver.getRepositoryReleases !== "function") {
|
|
872
|
+
return [];
|
|
873
|
+
}
|
|
874
|
+
try {
|
|
875
|
+
const releases = await resolver.getRepositoryReleases(repo, {
|
|
876
|
+
since,
|
|
877
|
+
per_page: 100,
|
|
878
|
+
max_items: MAX_RELEASE_ITEMS
|
|
879
|
+
});
|
|
880
|
+
return (releases ?? []).filter((release) => {
|
|
881
|
+
const publishedAt = release.published_at ?? "";
|
|
882
|
+
return publishedAt.length === 0 || publishedAt >= since;
|
|
883
|
+
}).sort((left, right) => {
|
|
884
|
+
const leftPublished = left.published_at ?? "";
|
|
885
|
+
const rightPublished = right.published_at ?? "";
|
|
886
|
+
const byPublishedDesc = rightPublished.localeCompare(leftPublished);
|
|
887
|
+
if (byPublishedDesc !== 0) {
|
|
888
|
+
return byPublishedDesc;
|
|
889
|
+
}
|
|
890
|
+
return (left.tag_name ?? "").localeCompare(right.tag_name ?? "", "en", {
|
|
891
|
+
numeric: true,
|
|
892
|
+
sensitivity: "base"
|
|
893
|
+
});
|
|
894
|
+
}).slice(0, MAX_RELEASE_ITEMS);
|
|
895
|
+
} catch {
|
|
896
|
+
return [];
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
async function collectTags(repo, resolver) {
|
|
900
|
+
if (typeof resolver.getRepositoryTags !== "function") {
|
|
901
|
+
return [];
|
|
902
|
+
}
|
|
903
|
+
try {
|
|
904
|
+
const tags = await resolver.getRepositoryTags(repo, {
|
|
905
|
+
per_page: 100,
|
|
906
|
+
max_items: MAX_TAG_ITEMS
|
|
907
|
+
});
|
|
908
|
+
return (tags ?? []).sort(
|
|
909
|
+
(left, right) => left.name.localeCompare(right.name, "en", {
|
|
910
|
+
numeric: true,
|
|
911
|
+
sensitivity: "base"
|
|
912
|
+
})
|
|
913
|
+
).slice(0, MAX_TAG_ITEMS);
|
|
914
|
+
} catch {
|
|
915
|
+
return [];
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
async function writeJsonFile(filePath, payload) {
|
|
919
|
+
await fs3.mkdir(path4.dirname(filePath), { recursive: true });
|
|
920
|
+
await fs3.writeFile(filePath, `${JSON.stringify(payload, null, 2)}
|
|
921
|
+
`, "utf8");
|
|
922
|
+
}
|
|
923
|
+
async function readSnapshotManifest(snapshotPath) {
|
|
924
|
+
const manifestPath = path4.join(snapshotPath, MANIFEST_FILE_NAME2);
|
|
925
|
+
const raw = await fs3.readFile(manifestPath, "utf8");
|
|
926
|
+
return JSON.parse(raw);
|
|
927
|
+
}
|
|
928
|
+
async function hashFile2(filePath) {
|
|
929
|
+
const content = await fs3.readFile(filePath);
|
|
930
|
+
return createHash3("sha1").update(content).digest("hex");
|
|
931
|
+
}
|
|
932
|
+
async function walkSnapshotFiles(rootPath, relativeDir, entries) {
|
|
933
|
+
const absoluteDir = path4.join(rootPath, relativeDir);
|
|
934
|
+
const dirents = await fs3.readdir(absoluteDir, { withFileTypes: true });
|
|
935
|
+
const sorted = dirents.sort((left, right) => left.name.localeCompare(right.name));
|
|
936
|
+
for (const dirent of sorted) {
|
|
937
|
+
if (dirent.isSymbolicLink()) {
|
|
938
|
+
continue;
|
|
939
|
+
}
|
|
940
|
+
const nextRelative = normalizePath2(path4.join(relativeDir, dirent.name));
|
|
941
|
+
if (nextRelative === ".git" || nextRelative.startsWith(".git/")) {
|
|
942
|
+
continue;
|
|
943
|
+
}
|
|
944
|
+
if (path4.basename(nextRelative) === MANIFEST_FILE_NAME2) {
|
|
945
|
+
continue;
|
|
946
|
+
}
|
|
947
|
+
if (dirent.isDirectory()) {
|
|
948
|
+
await walkSnapshotFiles(rootPath, nextRelative, entries);
|
|
949
|
+
continue;
|
|
950
|
+
}
|
|
951
|
+
if (!dirent.isFile()) {
|
|
952
|
+
continue;
|
|
953
|
+
}
|
|
954
|
+
const absoluteFile = path4.join(rootPath, nextRelative);
|
|
955
|
+
const stat = await fs3.stat(absoluteFile);
|
|
956
|
+
entries.push({
|
|
957
|
+
path: nextRelative,
|
|
958
|
+
bytes: stat.size,
|
|
959
|
+
hash: await hashFile2(absoluteFile)
|
|
960
|
+
});
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
function buildManifestSignature(entries) {
|
|
964
|
+
const sorted = [...entries].sort((left, right) => left.path.localeCompare(right.path));
|
|
965
|
+
const hash = createHash3("sha1");
|
|
966
|
+
for (const entry of sorted) {
|
|
967
|
+
hash.update(`${entry.path}:`);
|
|
968
|
+
hash.update(String(entry.bytes));
|
|
969
|
+
hash.update(`:${entry.hash}`);
|
|
970
|
+
hash.update("\n");
|
|
971
|
+
}
|
|
972
|
+
return hash.digest("hex");
|
|
973
|
+
}
|
|
974
|
+
async function refreshSnapshotManifest(snapshotPath) {
|
|
975
|
+
const manifestPath = path4.join(snapshotPath, MANIFEST_FILE_NAME2);
|
|
976
|
+
const manifest = await readSnapshotManifest(snapshotPath);
|
|
977
|
+
const entries = [];
|
|
978
|
+
await walkSnapshotFiles(snapshotPath, ".", entries);
|
|
979
|
+
const manifestSignature = buildManifestSignature(entries);
|
|
980
|
+
const fileCount = entries.length;
|
|
981
|
+
const totalBytes2 = entries.reduce((sum, entry) => sum + entry.bytes, 0);
|
|
982
|
+
const updatedManifest = {
|
|
983
|
+
...manifest,
|
|
984
|
+
file_count: fileCount,
|
|
985
|
+
total_bytes: totalBytes2,
|
|
986
|
+
manifest_signature: manifestSignature
|
|
987
|
+
};
|
|
988
|
+
await fs3.writeFile(manifestPath, `${JSON.stringify(updatedManifest, null, 2)}
|
|
989
|
+
`, "utf8");
|
|
990
|
+
return manifestSignature;
|
|
991
|
+
}
|
|
992
|
+
async function persistTrendArtifacts(input) {
|
|
993
|
+
const now = (input.now ?? (() => (/* @__PURE__ */ new Date()).toISOString()))();
|
|
994
|
+
const windowDays = normalizeWindowDays(
|
|
995
|
+
input.trendWindowDays ?? process.env.DEVPORT_TREND_WINDOW_DAYS
|
|
996
|
+
);
|
|
997
|
+
const since = cutoffIso(now, windowDays);
|
|
998
|
+
const releases = await collectReleases(input.repo, input.resolver, since);
|
|
999
|
+
const tags = await collectTags(input.repo, input.resolver);
|
|
1000
|
+
const trendDir = path4.join(input.snapshotPath, TRENDS_DIR);
|
|
1001
|
+
const releasesPath = path4.join(trendDir, "releases.json");
|
|
1002
|
+
const tagsPath = path4.join(trendDir, "tags.json");
|
|
1003
|
+
const changelogSummaryPath = path4.join(trendDir, "changelog-summary.json");
|
|
1004
|
+
await writeJsonFile(releasesPath, {
|
|
1005
|
+
repo_ref: input.repo.repo_full_name,
|
|
1006
|
+
commit_sha: input.commitSha,
|
|
1007
|
+
window_days: windowDays,
|
|
1008
|
+
since,
|
|
1009
|
+
releases
|
|
1010
|
+
});
|
|
1011
|
+
await writeJsonFile(tagsPath, {
|
|
1012
|
+
repo_ref: input.repo.repo_full_name,
|
|
1013
|
+
commit_sha: input.commitSha,
|
|
1014
|
+
window_days: windowDays,
|
|
1015
|
+
tags
|
|
1016
|
+
});
|
|
1017
|
+
await writeJsonFile(changelogSummaryPath, {
|
|
1018
|
+
repo_ref: input.repo.repo_full_name,
|
|
1019
|
+
commit_sha: input.commitSha,
|
|
1020
|
+
window_days: windowDays,
|
|
1021
|
+
release_count: releases.length,
|
|
1022
|
+
tag_count: tags.length,
|
|
1023
|
+
latest_release_tags: releases.slice(0, 10).map((release) => release.tag_name),
|
|
1024
|
+
latest_tags: tags.slice(0, 10).map((tag) => tag.name)
|
|
1025
|
+
});
|
|
1026
|
+
const manifestSignature = await refreshSnapshotManifest(input.snapshotPath);
|
|
1027
|
+
return {
|
|
1028
|
+
window_days: windowDays,
|
|
1029
|
+
releases_path: normalizePath2(path4.relative(input.snapshotPath, releasesPath)),
|
|
1030
|
+
tags_path: normalizePath2(path4.relative(input.snapshotPath, tagsPath)),
|
|
1031
|
+
changelog_summary_path: normalizePath2(path4.relative(input.snapshotPath, changelogSummaryPath)),
|
|
1032
|
+
release_count: releases.length,
|
|
1033
|
+
tag_count: tags.length,
|
|
1034
|
+
manifest_signature: manifestSignature
|
|
1035
|
+
};
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
// src/ingestion/official-docs.ts
|
|
1039
|
+
var OFFICIAL_DOCS_DIR = "__devport__/official-docs";
|
|
1040
|
+
var MAX_DISCOVERED_URLS = 20;
|
|
1041
|
+
var MAX_MIRRORED_BYTES = 256e3;
|
|
1042
|
+
var MARKDOWN_LINK_PATTERN = /\[[^\]]+\]\((https?:\/\/[^)\s]+)\)/giu;
|
|
1043
|
+
var RAW_URL_PATTERN = /\bhttps?:\/\/[^\s<>"]+/giu;
|
|
1044
|
+
function normalizePath3(value) {
|
|
1045
|
+
return value.replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+/g, "/");
|
|
1046
|
+
}
|
|
1047
|
+
function normalizeUrl(raw) {
|
|
1048
|
+
try {
|
|
1049
|
+
const parsed = new URL(raw.trim());
|
|
1050
|
+
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
1051
|
+
return null;
|
|
1052
|
+
}
|
|
1053
|
+
parsed.hash = "";
|
|
1054
|
+
return parsed.toString();
|
|
1055
|
+
} catch {
|
|
1056
|
+
return null;
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
async function loadRootReadme(snapshotPath) {
|
|
1060
|
+
const candidates = ["README.md", "README.MD", "Readme.md", "readme.md"];
|
|
1061
|
+
for (const candidate of candidates) {
|
|
1062
|
+
try {
|
|
1063
|
+
return await fs4.readFile(path5.join(snapshotPath, candidate), "utf8");
|
|
1064
|
+
} catch {
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
return "";
|
|
1068
|
+
}
|
|
1069
|
+
function extractHttpUrls(text) {
|
|
1070
|
+
const urls = /* @__PURE__ */ new Set();
|
|
1071
|
+
for (const match of text.matchAll(MARKDOWN_LINK_PATTERN)) {
|
|
1072
|
+
const normalized = normalizeUrl(match[1]);
|
|
1073
|
+
if (normalized) {
|
|
1074
|
+
urls.add(normalized);
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
for (const match of text.matchAll(RAW_URL_PATTERN)) {
|
|
1078
|
+
const normalized = normalizeUrl(match[0]);
|
|
1079
|
+
if (normalized) {
|
|
1080
|
+
urls.add(normalized);
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
1083
|
+
return [...urls].sort((left, right) => left.localeCompare(right, "en", { sensitivity: "base" })).slice(0, MAX_DISCOVERED_URLS);
|
|
1084
|
+
}
|
|
1085
|
+
function resolveFetchImpl(fetchImpl) {
|
|
1086
|
+
if (fetchImpl) {
|
|
1087
|
+
return fetchImpl;
|
|
1088
|
+
}
|
|
1089
|
+
const globalFetch = globalThis.fetch;
|
|
1090
|
+
if (typeof globalFetch === "function") {
|
|
1091
|
+
return globalFetch;
|
|
1092
|
+
}
|
|
1093
|
+
return void 0;
|
|
1094
|
+
}
|
|
1095
|
+
async function mirrorUrlToSnapshot(snapshotPath, sourceUrl, index, fetchImpl) {
|
|
1096
|
+
if (!fetchImpl) {
|
|
1097
|
+
return {
|
|
1098
|
+
source_url: sourceUrl,
|
|
1099
|
+
mirror_path: null,
|
|
1100
|
+
status: "fetch-unavailable"
|
|
1101
|
+
};
|
|
1102
|
+
}
|
|
1103
|
+
try {
|
|
1104
|
+
const response = await fetchImpl(sourceUrl, { redirect: "follow" });
|
|
1105
|
+
if (!response.ok) {
|
|
1106
|
+
return {
|
|
1107
|
+
source_url: sourceUrl,
|
|
1108
|
+
mirror_path: null,
|
|
1109
|
+
status: "fetch-failed"
|
|
1110
|
+
};
|
|
1111
|
+
}
|
|
1112
|
+
const rawText = await response.text();
|
|
1113
|
+
const text = rawText.slice(0, MAX_MIRRORED_BYTES);
|
|
1114
|
+
const mirrorRelativePath = normalizePath3(
|
|
1115
|
+
path5.join(OFFICIAL_DOCS_DIR, `doc-${String(index + 1).padStart(2, "0")}.md`)
|
|
1116
|
+
);
|
|
1117
|
+
const mirrorAbsolutePath = path5.join(snapshotPath, mirrorRelativePath);
|
|
1118
|
+
await fs4.mkdir(path5.dirname(mirrorAbsolutePath), { recursive: true });
|
|
1119
|
+
await fs4.writeFile(
|
|
1120
|
+
mirrorAbsolutePath,
|
|
1121
|
+
`# ${sourceUrl}
|
|
1122
|
+
|
|
1123
|
+
${text}
|
|
1124
|
+
`,
|
|
1125
|
+
"utf8"
|
|
1126
|
+
);
|
|
1127
|
+
return {
|
|
1128
|
+
source_url: sourceUrl,
|
|
1129
|
+
mirror_path: mirrorRelativePath,
|
|
1130
|
+
status: "mirrored"
|
|
1131
|
+
};
|
|
1132
|
+
} catch {
|
|
1133
|
+
return {
|
|
1134
|
+
source_url: sourceUrl,
|
|
1135
|
+
mirror_path: null,
|
|
1136
|
+
status: "fetch-failed"
|
|
1137
|
+
};
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
async function persistOfficialDocsArtifacts(input) {
|
|
1141
|
+
const discoveryMode = (process.env.DEVPORT_OFFICIAL_DOC_DISCOVERY ?? "auto").trim().toLowerCase();
|
|
1142
|
+
const readmeContent = await loadRootReadme(input.snapshotPath);
|
|
1143
|
+
const discoveredUrls = discoveryMode === "off" ? [] : extractHttpUrls(readmeContent);
|
|
1144
|
+
let homepageUrl = null;
|
|
1145
|
+
if (discoveryMode !== "off") {
|
|
1146
|
+
try {
|
|
1147
|
+
const meta = await input.resolver.getRepositoryMeta(input.repo);
|
|
1148
|
+
homepageUrl = normalizeUrl(meta.homepage_url ?? "");
|
|
1149
|
+
} catch {
|
|
1150
|
+
homepageUrl = null;
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
const discoveredWithHomepage = new Set(discoveredUrls);
|
|
1154
|
+
if (homepageUrl) {
|
|
1155
|
+
discoveredWithHomepage.add(homepageUrl);
|
|
1156
|
+
}
|
|
1157
|
+
const finalDiscoveredUrls = [...discoveredWithHomepage].sort((left, right) => left.localeCompare(right, "en", { sensitivity: "base" })).slice(0, MAX_DISCOVERED_URLS);
|
|
1158
|
+
const fetchImpl = discoveryMode === "off" ? void 0 : resolveFetchImpl(input.fetchImpl);
|
|
1159
|
+
const docs = [];
|
|
1160
|
+
for (let index = 0; index < finalDiscoveredUrls.length; index += 1) {
|
|
1161
|
+
docs.push(
|
|
1162
|
+
await mirrorUrlToSnapshot(
|
|
1163
|
+
input.snapshotPath,
|
|
1164
|
+
finalDiscoveredUrls[index],
|
|
1165
|
+
index,
|
|
1166
|
+
fetchImpl
|
|
1167
|
+
)
|
|
1168
|
+
);
|
|
1169
|
+
}
|
|
1170
|
+
const indexRelativePath = normalizePath3(path5.join(OFFICIAL_DOCS_DIR, "index.json"));
|
|
1171
|
+
const indexAbsolutePath = path5.join(input.snapshotPath, indexRelativePath);
|
|
1172
|
+
await fs4.mkdir(path5.dirname(indexAbsolutePath), { recursive: true });
|
|
1173
|
+
await fs4.writeFile(
|
|
1174
|
+
indexAbsolutePath,
|
|
1175
|
+
`${JSON.stringify(
|
|
1176
|
+
{
|
|
1177
|
+
repo_ref: input.repo.repo_full_name,
|
|
1178
|
+
commit_sha: input.commitSha,
|
|
1179
|
+
discovery_mode: discoveryMode,
|
|
1180
|
+
discovered_count: finalDiscoveredUrls.length,
|
|
1181
|
+
mirrored_count: docs.filter((entry) => entry.status === "mirrored").length,
|
|
1182
|
+
docs
|
|
1183
|
+
},
|
|
1184
|
+
null,
|
|
1185
|
+
2
|
|
1186
|
+
)}
|
|
1187
|
+
`,
|
|
1188
|
+
"utf8"
|
|
1189
|
+
);
|
|
1190
|
+
const manifestSignature = await refreshSnapshotManifest(input.snapshotPath);
|
|
1191
|
+
return {
|
|
1192
|
+
index_path: indexRelativePath,
|
|
1193
|
+
discovered_count: finalDiscoveredUrls.length,
|
|
1194
|
+
mirrored_count: docs.filter((entry) => entry.status === "mirrored").length,
|
|
1195
|
+
manifest_signature: manifestSignature
|
|
1196
|
+
};
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
// src/ingestion/run.ts
|
|
1200
|
+
function safeNow(now = () => (/* @__PURE__ */ new Date()).toISOString()) {
|
|
1201
|
+
return now();
|
|
1202
|
+
}
|
|
1203
|
+
function buildIngestRunId(repoFullName, snapshotId, manifestSignature) {
|
|
1204
|
+
return createHash4("sha1").update(`${repoFullName}|${snapshotId}|${manifestSignature}`).digest("hex");
|
|
1205
|
+
}
|
|
1206
|
+
async function resolveReference(repo, requestedRef, resolver, fixtureCommit) {
|
|
1207
|
+
if (fixtureCommit && requestedRef) {
|
|
1208
|
+
return {
|
|
1209
|
+
requested_ref: requestedRef,
|
|
1210
|
+
requested_ref_type: inferRefType(requestedRef),
|
|
1211
|
+
resolved_ref: requestedRef,
|
|
1212
|
+
commit_sha: fixtureCommit.toLowerCase(),
|
|
1213
|
+
source_default_branch: requestedRef
|
|
1214
|
+
};
|
|
1215
|
+
}
|
|
1216
|
+
const metadata = await resolver.getRepositoryMeta(repo);
|
|
1217
|
+
const sourceDefaultBranch = metadata.default_branch;
|
|
1218
|
+
if (fixtureCommit) {
|
|
1219
|
+
const resolvedRef = requestedRef || sourceDefaultBranch;
|
|
1220
|
+
return {
|
|
1221
|
+
requested_ref: requestedRef,
|
|
1222
|
+
requested_ref_type: inferRefType(requestedRef),
|
|
1223
|
+
resolved_ref: resolvedRef,
|
|
1224
|
+
commit_sha: fixtureCommit.toLowerCase(),
|
|
1225
|
+
source_default_branch: sourceDefaultBranch
|
|
1226
|
+
};
|
|
1227
|
+
}
|
|
1228
|
+
const resolution = await resolveToCommitSha(
|
|
1229
|
+
{
|
|
1230
|
+
...repo,
|
|
1231
|
+
requested_ref: requestedRef
|
|
1232
|
+
},
|
|
1233
|
+
resolver
|
|
1234
|
+
);
|
|
1235
|
+
return {
|
|
1236
|
+
requested_ref: requestedRef,
|
|
1237
|
+
requested_ref_type: resolution.requested_ref_type,
|
|
1238
|
+
resolved_ref: resolution.resolved_ref,
|
|
1239
|
+
commit_sha: resolution.commit_sha.toLowerCase(),
|
|
1240
|
+
source_default_branch: resolution.source_default_branch
|
|
1241
|
+
};
|
|
1242
|
+
}
|
|
1243
|
+
async function createOrReuseSnapshot(snapshotManager, repo, resolved) {
|
|
1244
|
+
const request = {
|
|
1245
|
+
repoFullName: repo.repo_full_name,
|
|
1246
|
+
owner: repo.owner,
|
|
1247
|
+
repo: repo.repo,
|
|
1248
|
+
commitSha: resolved.commit_sha,
|
|
1249
|
+
resolvedRef: resolved.resolved_ref,
|
|
1250
|
+
sourceRef: resolved.requested_ref,
|
|
1251
|
+
sourceDefaultBranch: resolved.source_default_branch
|
|
1252
|
+
};
|
|
1253
|
+
return snapshotManager.createSnapshot(request);
|
|
1254
|
+
}
|
|
1255
|
+
async function extractRunMetadata(snapshotPath, manifestSignature, resolver, languageResolver, allowResolverLanguageLookup, languageArgs) {
|
|
1256
|
+
const languageMix = languageResolver ? await languageResolver(languageArgs) : allowResolverLanguageLookup ? await loadLanguageMixFromResolver(resolver, languageArgs) : null;
|
|
1257
|
+
return extractMetadata(snapshotPath, {
|
|
1258
|
+
manifestSignature,
|
|
1259
|
+
keyPathLimit: 120,
|
|
1260
|
+
languageMix
|
|
1261
|
+
});
|
|
1262
|
+
}
|
|
1263
|
+
async function loadLanguageMixFromResolver(resolver, languageArgs) {
|
|
1264
|
+
if (typeof resolver.getRepositoryLanguages !== "function") {
|
|
1265
|
+
return null;
|
|
1266
|
+
}
|
|
1267
|
+
try {
|
|
1268
|
+
return await resolver.getRepositoryLanguages({
|
|
1269
|
+
owner: languageArgs.owner,
|
|
1270
|
+
repo: languageArgs.repo,
|
|
1271
|
+
repo_full_name: languageArgs.fullName,
|
|
1272
|
+
requested_ref: languageArgs.commitSha
|
|
1273
|
+
});
|
|
1274
|
+
} catch {
|
|
1275
|
+
return null;
|
|
1276
|
+
}
|
|
1277
|
+
}
|
|
1278
|
+
function pushStage(stageHistory, stage, startedAt, status = "ok") {
|
|
1279
|
+
stageHistory.push({
|
|
1280
|
+
stage,
|
|
1281
|
+
started_at: startedAt,
|
|
1282
|
+
ended_at: safeNow(),
|
|
1283
|
+
status
|
|
1284
|
+
});
|
|
1285
|
+
}
|
|
1286
|
+
async function runIngest(rawInput, config = {}) {
|
|
1287
|
+
const now = config.now || (() => (/* @__PURE__ */ new Date()).toISOString());
|
|
1288
|
+
const parsedInput = ingestRunInputSchema.parse(rawInput);
|
|
1289
|
+
const startedAt = safeNow(now);
|
|
1290
|
+
const startedMs = Date.now();
|
|
1291
|
+
const stageHistory = [];
|
|
1292
|
+
const repo = parseRepoRef(parsedInput.repo_ref.repo);
|
|
1293
|
+
const normalizedRequestedRef = normalizeRef(parsedInput.repo_ref.ref);
|
|
1294
|
+
const resolver = config.resolver || new OctokitGitHubResolver(process.env.GITHUB_TOKEN);
|
|
1295
|
+
const gitShell = config.sourcePath ? void 0 : defaultGitShell;
|
|
1296
|
+
const snapshotManager = config.snapshotManager || new RepoSnapshotManager({
|
|
1297
|
+
snapshotRoot: parsedInput.snapshot_root,
|
|
1298
|
+
now,
|
|
1299
|
+
forceRebuild: parsedInput.force_rebuild,
|
|
1300
|
+
sourcePath: config.sourcePath,
|
|
1301
|
+
gitShell
|
|
1302
|
+
});
|
|
1303
|
+
let resolved;
|
|
1304
|
+
const resolveStart = safeNow(now);
|
|
1305
|
+
try {
|
|
1306
|
+
resolved = await resolveReference(
|
|
1307
|
+
repo,
|
|
1308
|
+
normalizedRequestedRef,
|
|
1309
|
+
resolver,
|
|
1310
|
+
config.fixtureCommit || parsedInput.fixture_commit
|
|
1311
|
+
);
|
|
1312
|
+
pushStage(stageHistory, "resolve_ref", resolveStart);
|
|
1313
|
+
} catch (error) {
|
|
1314
|
+
pushStage(stageHistory, "resolve_ref", resolveStart, "failed");
|
|
1315
|
+
throw error;
|
|
1316
|
+
}
|
|
1317
|
+
const snapshotStart = safeNow(now);
|
|
1318
|
+
let snapshot;
|
|
1319
|
+
try {
|
|
1320
|
+
snapshot = await createOrReuseSnapshot(snapshotManager, repo, resolved);
|
|
1321
|
+
pushStage(stageHistory, "snapshot", snapshotStart);
|
|
1322
|
+
} catch (error) {
|
|
1323
|
+
pushStage(stageHistory, "snapshot", snapshotStart, "failed");
|
|
1324
|
+
throw error;
|
|
1325
|
+
}
|
|
1326
|
+
const metadataStart = safeNow(now);
|
|
1327
|
+
let metadata;
|
|
1328
|
+
let trendArtifacts;
|
|
1329
|
+
let officialDocsArtifacts;
|
|
1330
|
+
try {
|
|
1331
|
+
const persistedTrendArtifacts = await persistTrendArtifacts({
|
|
1332
|
+
snapshotPath: snapshot.snapshotPath,
|
|
1333
|
+
repo,
|
|
1334
|
+
resolver,
|
|
1335
|
+
commitSha: resolved.commit_sha,
|
|
1336
|
+
now
|
|
1337
|
+
});
|
|
1338
|
+
trendArtifacts = {
|
|
1339
|
+
window_days: persistedTrendArtifacts.window_days,
|
|
1340
|
+
releases_path: persistedTrendArtifacts.releases_path,
|
|
1341
|
+
tags_path: persistedTrendArtifacts.tags_path,
|
|
1342
|
+
changelog_summary_path: persistedTrendArtifacts.changelog_summary_path,
|
|
1343
|
+
release_count: persistedTrendArtifacts.release_count,
|
|
1344
|
+
tag_count: persistedTrendArtifacts.tag_count
|
|
1345
|
+
};
|
|
1346
|
+
if (persistedTrendArtifacts.manifest_signature !== snapshot.manifest.manifest_signature) {
|
|
1347
|
+
snapshot.manifest.manifest_signature = persistedTrendArtifacts.manifest_signature;
|
|
1348
|
+
}
|
|
1349
|
+
const persistedOfficialDocsArtifacts = await persistOfficialDocsArtifacts({
|
|
1350
|
+
snapshotPath: snapshot.snapshotPath,
|
|
1351
|
+
repo,
|
|
1352
|
+
resolver,
|
|
1353
|
+
commitSha: resolved.commit_sha
|
|
1354
|
+
});
|
|
1355
|
+
officialDocsArtifacts = {
|
|
1356
|
+
index_path: persistedOfficialDocsArtifacts.index_path,
|
|
1357
|
+
discovered_count: persistedOfficialDocsArtifacts.discovered_count,
|
|
1358
|
+
mirrored_count: persistedOfficialDocsArtifacts.mirrored_count
|
|
1359
|
+
};
|
|
1360
|
+
if (persistedOfficialDocsArtifacts.manifest_signature !== snapshot.manifest.manifest_signature) {
|
|
1361
|
+
snapshot.manifest.manifest_signature = persistedOfficialDocsArtifacts.manifest_signature;
|
|
1362
|
+
}
|
|
1363
|
+
const fixtureCommit = config.fixtureCommit || parsedInput.fixture_commit;
|
|
1364
|
+
metadata = await extractRunMetadata(
|
|
1365
|
+
snapshot.snapshotPath,
|
|
1366
|
+
snapshot.manifest.manifest_signature,
|
|
1367
|
+
resolver,
|
|
1368
|
+
config.languageResolver,
|
|
1369
|
+
!fixtureCommit,
|
|
1370
|
+
{
|
|
1371
|
+
owner: repo.owner,
|
|
1372
|
+
repo: repo.repo,
|
|
1373
|
+
fullName: repo.repo_full_name,
|
|
1374
|
+
commitSha: resolved.commit_sha
|
|
1375
|
+
}
|
|
1376
|
+
);
|
|
1377
|
+
pushStage(stageHistory, "metadata", metadataStart);
|
|
1378
|
+
} catch (error) {
|
|
1379
|
+
pushStage(stageHistory, "metadata", metadataStart, "failed");
|
|
1380
|
+
throw error;
|
|
1381
|
+
}
|
|
1382
|
+
const finalizeStart = safeNow(now);
|
|
1383
|
+
const ingestMs = Math.max(0, Date.now() - startedMs);
|
|
1384
|
+
const completedAt = safeNow(now);
|
|
1385
|
+
pushStage(stageHistory, "finalize", finalizeStart);
|
|
1386
|
+
return ingestRunArtifactSchema.parse({
|
|
1387
|
+
ingest_run_id: buildIngestRunId(
|
|
1388
|
+
repo.repo_full_name,
|
|
1389
|
+
snapshot.snapshotId,
|
|
1390
|
+
snapshot.manifest.manifest_signature
|
|
1391
|
+
),
|
|
1392
|
+
repo_ref: repo.repo_full_name,
|
|
1393
|
+
requested_ref: resolved.requested_ref,
|
|
1394
|
+
resolved_ref: resolved.resolved_ref,
|
|
1395
|
+
commit_sha: resolved.commit_sha,
|
|
1396
|
+
snapshot_path: snapshot.snapshotPath,
|
|
1397
|
+
snapshot_id: snapshot.snapshotId,
|
|
1398
|
+
manifest_signature: snapshot.manifest.manifest_signature,
|
|
1399
|
+
files_scanned: metadata.files_scanned,
|
|
1400
|
+
idempotent_hit: snapshot.idempotentHit,
|
|
1401
|
+
metadata,
|
|
1402
|
+
trend_artifacts: trendArtifacts,
|
|
1403
|
+
official_docs: officialDocsArtifacts,
|
|
1404
|
+
created_at: startedAt,
|
|
1405
|
+
completed_at: completedAt,
|
|
1406
|
+
ingest_ms: ingestMs
|
|
1407
|
+
});
|
|
1408
|
+
}
|
|
1409
|
+
|
|
1410
|
+
// src/contracts/wiki-delivery.ts
|
|
1411
|
+
import { z as z2 } from "zod";
|
|
1412
|
+
var COMMIT_SHA_PATTERN = /^[a-f0-9]{7,40}$/;
|
|
1413
|
+
var REPO_FULL_NAME_PATTERN = /^[a-z0-9_.-]+\/[a-z0-9_.-]+$/;
|
|
1414
|
+
function normalizeAnchor(value) {
|
|
1415
|
+
const anchor = value.trim().toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9가-힣-]/g, "").replace(/-+/g, "-").replace(/^-+|-+$/g, "");
|
|
1416
|
+
return anchor;
|
|
1417
|
+
}
|
|
1418
|
+
var RequiredSectionTextSchema = z2.string().trim().min(1);
|
|
1419
|
+
var DeliveryProjectIdentitySchema = z2.object({
|
|
1420
|
+
repoFullName: z2.string().trim().regex(REPO_FULL_NAME_PATTERN),
|
|
1421
|
+
commitSha: z2.string().trim().regex(COMMIT_SHA_PATTERN),
|
|
1422
|
+
ingestRunId: z2.string().trim().min(1).optional()
|
|
1423
|
+
}).strict();
|
|
1424
|
+
var DeliverySectionSchema = z2.object({
|
|
1425
|
+
sectionId: RequiredSectionTextSchema,
|
|
1426
|
+
heading: z2.string().trim().min(1).optional(),
|
|
1427
|
+
anchor: z2.string().trim().min(1).optional(),
|
|
1428
|
+
summary: RequiredSectionTextSchema,
|
|
1429
|
+
deepDiveMarkdown: RequiredSectionTextSchema,
|
|
1430
|
+
order: z2.number().int().min(0),
|
|
1431
|
+
subsectionIds: z2.array(z2.string().trim().min(1)).min(1)
|
|
1432
|
+
}).strict().transform((section) => {
|
|
1433
|
+
const heading = section.heading?.trim() ?? section.sectionId;
|
|
1434
|
+
const normalizedAnchor = normalizeAnchor(section.anchor ?? section.sectionId);
|
|
1435
|
+
return {
|
|
1436
|
+
...section,
|
|
1437
|
+
heading,
|
|
1438
|
+
anchor: normalizedAnchor.length > 0 ? normalizedAnchor : normalizeAnchor(section.sectionId),
|
|
1439
|
+
subsectionIds: section.subsectionIds.map((subsectionId) => subsectionId.trim())
|
|
1440
|
+
};
|
|
1441
|
+
});
|
|
1442
|
+
var DeliveryProvenanceSchema = z2.object({
|
|
1443
|
+
generatedAt: z2.string().datetime(),
|
|
1444
|
+
commitSha: z2.string().trim().regex(COMMIT_SHA_PATTERN),
|
|
1445
|
+
counters: z2.object({
|
|
1446
|
+
sectionCount: z2.number().int().min(1),
|
|
1447
|
+
subsectionCount: z2.number().int().min(1),
|
|
1448
|
+
claimCount: z2.number().int().min(0),
|
|
1449
|
+
citationCount: z2.number().int().min(0)
|
|
1450
|
+
}).strict(),
|
|
1451
|
+
run: z2.object({
|
|
1452
|
+
generationRunId: z2.string().trim().min(1),
|
|
1453
|
+
ingestRunId: z2.string().trim().min(1).optional(),
|
|
1454
|
+
modelId: z2.string().trim().min(1)
|
|
1455
|
+
}).strict()
|
|
1456
|
+
}).strict();
|
|
1457
|
+
var DeliveryMetadataSchema = z2.object({
|
|
1458
|
+
artifactType: z2.literal("wiki-delivery"),
|
|
1459
|
+
sourceArtifactType: z2.literal("wiki-draft"),
|
|
1460
|
+
contractVersion: z2.literal("out-01.v1"),
|
|
1461
|
+
generatedAt: z2.string().datetime(),
|
|
1462
|
+
sectionCount: z2.number().int().min(1),
|
|
1463
|
+
subsectionCount: z2.number().int().min(1),
|
|
1464
|
+
deterministicOrdering: z2.object({
|
|
1465
|
+
sections: z2.literal("sectionId:asc"),
|
|
1466
|
+
subsections: z2.literal("subsectionId:asc")
|
|
1467
|
+
}).strict(),
|
|
1468
|
+
provenance: DeliveryProvenanceSchema
|
|
1469
|
+
}).strict();
|
|
1470
|
+
var DeliveryWikiArtifactSchema = z2.object({
|
|
1471
|
+
project: DeliveryProjectIdentitySchema,
|
|
1472
|
+
sections: z2.array(DeliverySectionSchema).min(1),
|
|
1473
|
+
metadata: DeliveryMetadataSchema
|
|
1474
|
+
}).strict().superRefine((value, ctx) => {
|
|
1475
|
+
if (value.metadata.sectionCount !== value.sections.length) {
|
|
1476
|
+
ctx.addIssue({
|
|
1477
|
+
code: z2.ZodIssueCode.custom,
|
|
1478
|
+
message: "metadata.sectionCount must equal sections length",
|
|
1479
|
+
path: ["metadata", "sectionCount"]
|
|
1480
|
+
});
|
|
1481
|
+
}
|
|
1482
|
+
const subsectionCount = value.sections.reduce((count, section) => count + section.subsectionIds.length, 0);
|
|
1483
|
+
if (value.metadata.subsectionCount !== subsectionCount) {
|
|
1484
|
+
ctx.addIssue({
|
|
1485
|
+
code: z2.ZodIssueCode.custom,
|
|
1486
|
+
message: "metadata.subsectionCount must equal subsectionIds aggregate",
|
|
1487
|
+
path: ["metadata", "subsectionCount"]
|
|
1488
|
+
});
|
|
1489
|
+
}
|
|
1490
|
+
if (value.metadata.generatedAt !== value.metadata.provenance.generatedAt) {
|
|
1491
|
+
ctx.addIssue({
|
|
1492
|
+
code: z2.ZodIssueCode.custom,
|
|
1493
|
+
message: "metadata.generatedAt must equal provenance.generatedAt",
|
|
1494
|
+
path: ["metadata", "generatedAt"]
|
|
1495
|
+
});
|
|
1496
|
+
}
|
|
1497
|
+
if (value.project.commitSha !== value.metadata.provenance.commitSha) {
|
|
1498
|
+
ctx.addIssue({
|
|
1499
|
+
code: z2.ZodIssueCode.custom,
|
|
1500
|
+
message: "provenance.commitSha must equal project.commitSha",
|
|
1501
|
+
path: ["metadata", "provenance", "commitSha"]
|
|
1502
|
+
});
|
|
1503
|
+
}
|
|
1504
|
+
if (value.metadata.provenance.counters.sectionCount !== value.metadata.sectionCount) {
|
|
1505
|
+
ctx.addIssue({
|
|
1506
|
+
code: z2.ZodIssueCode.custom,
|
|
1507
|
+
message: "provenance.counters.sectionCount must equal metadata.sectionCount",
|
|
1508
|
+
path: ["metadata", "provenance", "counters", "sectionCount"]
|
|
1509
|
+
});
|
|
1510
|
+
}
|
|
1511
|
+
if (value.metadata.provenance.counters.subsectionCount !== value.metadata.subsectionCount) {
|
|
1512
|
+
ctx.addIssue({
|
|
1513
|
+
code: z2.ZodIssueCode.custom,
|
|
1514
|
+
message: "provenance.counters.subsectionCount must equal metadata.subsectionCount",
|
|
1515
|
+
path: ["metadata", "provenance", "counters", "subsectionCount"]
|
|
1516
|
+
});
|
|
1517
|
+
}
|
|
1518
|
+
const sectionIds = /* @__PURE__ */ new Set();
|
|
1519
|
+
const anchors = /* @__PURE__ */ new Set();
|
|
1520
|
+
for (let index = 0; index < value.sections.length; index += 1) {
|
|
1521
|
+
const section = value.sections[index];
|
|
1522
|
+
if (section.order !== index) {
|
|
1523
|
+
ctx.addIssue({
|
|
1524
|
+
code: z2.ZodIssueCode.custom,
|
|
1525
|
+
message: "sections must use contiguous deterministic order values starting at 0",
|
|
1526
|
+
path: ["sections", index, "order"]
|
|
1527
|
+
});
|
|
1528
|
+
}
|
|
1529
|
+
if (sectionIds.has(section.sectionId)) {
|
|
1530
|
+
ctx.addIssue({
|
|
1531
|
+
code: z2.ZodIssueCode.custom,
|
|
1532
|
+
message: `duplicate sectionId: ${section.sectionId}`,
|
|
1533
|
+
path: ["sections", index, "sectionId"]
|
|
1534
|
+
});
|
|
1535
|
+
}
|
|
1536
|
+
sectionIds.add(section.sectionId);
|
|
1537
|
+
if (anchors.has(section.anchor)) {
|
|
1538
|
+
ctx.addIssue({
|
|
1539
|
+
code: z2.ZodIssueCode.custom,
|
|
1540
|
+
message: `duplicate anchor: ${section.anchor}`,
|
|
1541
|
+
path: ["sections", index, "anchor"]
|
|
1542
|
+
});
|
|
1543
|
+
}
|
|
1544
|
+
anchors.add(section.anchor);
|
|
1545
|
+
}
|
|
1546
|
+
});
|
|
1547
|
+
|
|
1548
|
+
// src/packaging/provenance.ts
|
|
1549
|
+
var DEFAULT_GENERATION_RUN_ID = "delivery-packaging";
|
|
1550
|
+
var DEFAULT_MODEL_ID = "unknown-model";
|
|
1551
|
+
function buildDeliveryProvenance(input) {
|
|
1552
|
+
const subsectionCount = input.sections.reduce((count, section) => count + section.subsectionIds.length, 0);
|
|
1553
|
+
return {
|
|
1554
|
+
generatedAt: input.generatedAt,
|
|
1555
|
+
commitSha: input.commitSha,
|
|
1556
|
+
counters: {
|
|
1557
|
+
sectionCount: input.sections.length,
|
|
1558
|
+
subsectionCount,
|
|
1559
|
+
claimCount: input.claimCount,
|
|
1560
|
+
citationCount: input.citationCount
|
|
1561
|
+
},
|
|
1562
|
+
run: {
|
|
1563
|
+
generationRunId: input.generationRunId?.trim() || DEFAULT_GENERATION_RUN_ID,
|
|
1564
|
+
ingestRunId: input.ingestRunId?.trim() || void 0,
|
|
1565
|
+
modelId: input.modelId?.trim() || DEFAULT_MODEL_ID
|
|
1566
|
+
}
|
|
1567
|
+
};
|
|
1568
|
+
}
|
|
1569
|
+
|
|
1570
|
+
// src/packaging/adapter.ts
|
|
1571
|
+
var SECTION_SORT = "sectionId:asc";
|
|
1572
|
+
var SUBSECTION_SORT = "subsectionId:asc";
|
|
1573
|
+
function adaptAcceptedOutputToDelivery(acceptedOutput, options = {}) {
|
|
1574
|
+
return adaptWikiDraftToDelivery(acceptedOutput.draft, {
|
|
1575
|
+
...options,
|
|
1576
|
+
ingestRunId: acceptedOutput.ingest_run_id,
|
|
1577
|
+
generationRunId: acceptedOutput.ingest_run_id
|
|
1578
|
+
});
|
|
1579
|
+
}
|
|
1580
|
+
function adaptWikiDraftToDelivery(draft, options = {}) {
|
|
1581
|
+
const sections = draft.sections.slice().sort((left, right) => compareByDeterministicId(left.sectionId, right.sectionId)).map((section, index) => {
|
|
1582
|
+
const subsections = section.subsections.slice().sort((left, right) => compareByDeterministicId(left.subsectionId, right.subsectionId));
|
|
1583
|
+
const summary = requireNonEmpty(section.summaryKo, `summaryKo:${section.sectionId}`);
|
|
1584
|
+
const deepDiveMarkdown = buildDeepDiveMarkdown(subsections, section.sectionId);
|
|
1585
|
+
const payload = {
|
|
1586
|
+
sectionId: section.sectionId,
|
|
1587
|
+
heading: section.titleKo,
|
|
1588
|
+
anchor: section.sectionId,
|
|
1589
|
+
summary,
|
|
1590
|
+
deepDiveMarkdown,
|
|
1591
|
+
order: index,
|
|
1592
|
+
subsectionIds: subsections.map((subsection) => subsection.subsectionId)
|
|
1593
|
+
};
|
|
1594
|
+
return payload;
|
|
1595
|
+
});
|
|
1596
|
+
const subsectionCount = sections.reduce((count, section) => count + section.subsectionIds.length, 0);
|
|
1597
|
+
const generatedAt = options.generatedAt ?? (/* @__PURE__ */ new Date()).toISOString();
|
|
1598
|
+
const provenance = buildDeliveryProvenance({
|
|
1599
|
+
sections,
|
|
1600
|
+
commitSha: draft.commitSha,
|
|
1601
|
+
generatedAt,
|
|
1602
|
+
claimCount: draft.claims.length,
|
|
1603
|
+
citationCount: draft.citations.length,
|
|
1604
|
+
ingestRunId: options.ingestRunId,
|
|
1605
|
+
generationRunId: options.generationRunId,
|
|
1606
|
+
modelId: options.modelId
|
|
1607
|
+
});
|
|
1608
|
+
return DeliveryWikiArtifactSchema.parse({
|
|
1609
|
+
project: {
|
|
1610
|
+
repoFullName: draft.repoFullName,
|
|
1611
|
+
commitSha: draft.commitSha,
|
|
1612
|
+
ingestRunId: options.ingestRunId
|
|
1613
|
+
},
|
|
1614
|
+
sections,
|
|
1615
|
+
metadata: {
|
|
1616
|
+
artifactType: "wiki-delivery",
|
|
1617
|
+
sourceArtifactType: draft.artifactType,
|
|
1618
|
+
contractVersion: "out-01.v1",
|
|
1619
|
+
generatedAt,
|
|
1620
|
+
sectionCount: sections.length,
|
|
1621
|
+
subsectionCount,
|
|
1622
|
+
deterministicOrdering: {
|
|
1623
|
+
sections: SECTION_SORT,
|
|
1624
|
+
subsections: SUBSECTION_SORT
|
|
1625
|
+
},
|
|
1626
|
+
provenance
|
|
1627
|
+
}
|
|
1628
|
+
});
|
|
1629
|
+
}
|
|
1630
|
+
function buildDeepDiveMarkdown(subsections, sectionId) {
|
|
1631
|
+
const blocks = subsections.map((subsection) => {
|
|
1632
|
+
const title = requireNonEmpty(subsection.titleKo, `titleKo:${sectionId}:${subsection.subsectionId}`);
|
|
1633
|
+
const body = requireNonEmpty(subsection.bodyKo, `bodyKo:${sectionId}:${subsection.subsectionId}`);
|
|
1634
|
+
return `## ${title}
|
|
1635
|
+
|
|
1636
|
+
${body}`;
|
|
1637
|
+
});
|
|
1638
|
+
return requireNonEmpty(blocks.join("\n\n"), `deepDiveMarkdown:${sectionId}`);
|
|
1639
|
+
}
|
|
1640
|
+
function compareByDeterministicId(left, right) {
|
|
1641
|
+
return left.localeCompare(right, "en", { numeric: true, sensitivity: "base" });
|
|
1642
|
+
}
|
|
1643
|
+
function requireNonEmpty(value, field) {
|
|
1644
|
+
const normalized = value.trim();
|
|
1645
|
+
if (normalized.length === 0) {
|
|
1646
|
+
throw new Error(`OUT-01 mapping violation: ${field} must be non-empty`);
|
|
1647
|
+
}
|
|
1648
|
+
return normalized;
|
|
1649
|
+
}
|
|
1650
|
+
|
|
1651
|
+
// src/packaging/glossary.ts
|
|
1652
|
+
import { z as z3 } from "zod";
|
|
1653
|
+
var PLACEHOLDER_PATTERN = /^(?:n\/?a|na|none|null|tbd|todo|미정|없음|-|_)$/i;
|
|
1654
|
+
var KOREAN_ENGLISH_PAIR_PATTERN = /([가-힣][가-힣a-z0-9\s\-_/]{0,80}?)\s*\(\s*([a-z][a-z0-9\s\-_/]{1,80})\s*\)/gi;
|
|
1655
|
+
var ENGLISH_KOREAN_PAIR_PATTERN = /([a-z][a-z0-9\s\-_/]{1,80})\s*\(\s*([가-힣][가-힣a-z0-9\s\-_/]{0,80}?)\s*\)/gi;
|
|
1656
|
+
var GlossaryEntrySchema = z3.object({
|
|
1657
|
+
termKo: z3.string().trim().min(1),
|
|
1658
|
+
termEn: z3.string().trim().min(1),
|
|
1659
|
+
definition: z3.string().trim().min(10)
|
|
1660
|
+
}).strict();
|
|
1661
|
+
function buildGlossaryFromAcceptedOutput(acceptedOutput) {
|
|
1662
|
+
return buildGlossaryFromDraft(acceptedOutput.draft);
|
|
1663
|
+
}
|
|
1664
|
+
function buildGlossaryFromDraft(draft) {
|
|
1665
|
+
const candidates = [];
|
|
1666
|
+
for (const text of collectDraftText(draft)) {
|
|
1667
|
+
candidates.push(...extractGlossaryCandidatesFromText(text));
|
|
1668
|
+
}
|
|
1669
|
+
return normalizeGlossaryEntries(candidates);
|
|
1670
|
+
}
|
|
1671
|
+
function normalizeGlossaryEntries(candidates) {
|
|
1672
|
+
const canonicalEntries = /* @__PURE__ */ new Map();
|
|
1673
|
+
for (const candidate of candidates) {
|
|
1674
|
+
const parsed = GlossaryEntrySchema.safeParse({
|
|
1675
|
+
termKo: normalizeTermKo(candidate.termKo),
|
|
1676
|
+
termEn: normalizeTermEn(candidate.termEn),
|
|
1677
|
+
definition: normalizeWhitespace(candidate.definition)
|
|
1678
|
+
});
|
|
1679
|
+
if (!parsed.success) {
|
|
1680
|
+
continue;
|
|
1681
|
+
}
|
|
1682
|
+
const normalized = parsed.data;
|
|
1683
|
+
if (isPlaceholder(normalized.termKo) || isPlaceholder(normalized.termEn) || isPlaceholder(normalized.definition)) {
|
|
1684
|
+
continue;
|
|
1685
|
+
}
|
|
1686
|
+
const canonicalKey = toCanonicalEn(normalized.termEn);
|
|
1687
|
+
const existing = canonicalEntries.get(canonicalKey);
|
|
1688
|
+
if (!existing) {
|
|
1689
|
+
canonicalEntries.set(canonicalKey, normalized);
|
|
1690
|
+
continue;
|
|
1691
|
+
}
|
|
1692
|
+
canonicalEntries.set(canonicalKey, {
|
|
1693
|
+
termKo: pickLexicographicallyStable(existing.termKo, normalized.termKo),
|
|
1694
|
+
termEn: pickLexicographicallyStable(existing.termEn, normalized.termEn),
|
|
1695
|
+
definition: pickPreferredDefinition(existing.definition, normalized.definition)
|
|
1696
|
+
});
|
|
1697
|
+
}
|
|
1698
|
+
return Array.from(canonicalEntries.values()).sort(compareGlossaryEntry);
|
|
1699
|
+
}
|
|
1700
|
+
function collectDraftText(draft) {
|
|
1701
|
+
const texts = [draft.overviewKo];
|
|
1702
|
+
for (const section of draft.sections) {
|
|
1703
|
+
texts.push(section.titleKo, section.summaryKo);
|
|
1704
|
+
for (const subsection of section.subsections) {
|
|
1705
|
+
texts.push(subsection.titleKo, subsection.bodyKo);
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
for (const claim of draft.claims) {
|
|
1709
|
+
texts.push(claim.statementKo);
|
|
1710
|
+
}
|
|
1711
|
+
for (const citation of draft.citations) {
|
|
1712
|
+
if (citation.rationale) {
|
|
1713
|
+
texts.push(citation.rationale);
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
return texts;
|
|
1717
|
+
}
|
|
1718
|
+
function extractGlossaryCandidatesFromText(text) {
|
|
1719
|
+
const candidates = [];
|
|
1720
|
+
const sentences = splitSentences(text);
|
|
1721
|
+
for (const sentence of sentences) {
|
|
1722
|
+
candidates.push(...extractPairMatches(sentence, KOREAN_ENGLISH_PAIR_PATTERN));
|
|
1723
|
+
candidates.push(...extractPairMatches(sentence, ENGLISH_KOREAN_PAIR_PATTERN, true));
|
|
1724
|
+
}
|
|
1725
|
+
return candidates;
|
|
1726
|
+
}
|
|
1727
|
+
function extractPairMatches(sentence, pattern, reverse = false) {
|
|
1728
|
+
const matches = [];
|
|
1729
|
+
const regex = new RegExp(pattern.source, pattern.flags);
|
|
1730
|
+
let match = regex.exec(sentence);
|
|
1731
|
+
while (match) {
|
|
1732
|
+
const left = normalizeWhitespace(match[1]);
|
|
1733
|
+
const right = normalizeWhitespace(match[2]);
|
|
1734
|
+
matches.push(
|
|
1735
|
+
reverse ? { termKo: right, termEn: left, definition: sentence } : { termKo: left, termEn: right, definition: sentence }
|
|
1736
|
+
);
|
|
1737
|
+
match = regex.exec(sentence);
|
|
1738
|
+
}
|
|
1739
|
+
return matches;
|
|
1740
|
+
}
|
|
1741
|
+
function splitSentences(text) {
|
|
1742
|
+
return text.split(/[\n\r]+|(?<=[.!?다])\s+/u).map((item) => normalizeWhitespace(item)).filter((item) => item.length > 0);
|
|
1743
|
+
}
|
|
1744
|
+
function toCanonicalEn(value) {
|
|
1745
|
+
return value.toLowerCase().replace(/\s+/g, " ").replace(/[^a-z0-9 ]/g, "").trim();
|
|
1746
|
+
}
|
|
1747
|
+
function pickPreferredDefinition(current, incoming) {
|
|
1748
|
+
if (incoming.length > current.length) {
|
|
1749
|
+
return incoming;
|
|
1750
|
+
}
|
|
1751
|
+
if (incoming.length < current.length) {
|
|
1752
|
+
return current;
|
|
1753
|
+
}
|
|
1754
|
+
return pickLexicographicallyStable(current, incoming);
|
|
1755
|
+
}
|
|
1756
|
+
function pickLexicographicallyStable(left, right) {
|
|
1757
|
+
return left.localeCompare(right, "en", { sensitivity: "base" }) <= 0 ? left : right;
|
|
1758
|
+
}
|
|
1759
|
+
function compareGlossaryEntry(left, right) {
|
|
1760
|
+
const byEnglish = left.termEn.localeCompare(right.termEn, "en", {
|
|
1761
|
+
sensitivity: "base",
|
|
1762
|
+
numeric: true
|
|
1763
|
+
});
|
|
1764
|
+
if (byEnglish !== 0) {
|
|
1765
|
+
return byEnglish;
|
|
1766
|
+
}
|
|
1767
|
+
return left.termKo.localeCompare(right.termKo, "ko", {
|
|
1768
|
+
sensitivity: "base",
|
|
1769
|
+
numeric: true
|
|
1770
|
+
});
|
|
1771
|
+
}
|
|
1772
|
+
function normalizeWhitespace(value) {
|
|
1773
|
+
return (value ?? "").replace(/\s+/g, " ").trim();
|
|
1774
|
+
}
|
|
1775
|
+
function normalizeTermKo(value) {
|
|
1776
|
+
const normalized = normalizeWhitespace(value);
|
|
1777
|
+
if (normalized.length === 0) {
|
|
1778
|
+
return normalized;
|
|
1779
|
+
}
|
|
1780
|
+
let tokens = normalized.split(" ").filter(Boolean);
|
|
1781
|
+
if (tokens.length > 3) {
|
|
1782
|
+
tokens = tokens.slice(tokens.length - 3);
|
|
1783
|
+
}
|
|
1784
|
+
while (tokens.length > 0 && /^(?:와|과|및|그리고|이|그|해당)$/u.test(tokens[0])) {
|
|
1785
|
+
tokens = tokens.slice(1);
|
|
1786
|
+
}
|
|
1787
|
+
if (tokens.length === 0) {
|
|
1788
|
+
return "";
|
|
1789
|
+
}
|
|
1790
|
+
const lastIndex = tokens.length - 1;
|
|
1791
|
+
tokens[lastIndex] = tokens[lastIndex].replace(/(은|는|이|가|을|를|와|과|의|에|에서|로|으로)$/u, "");
|
|
1792
|
+
return tokens.join(" ").trim();
|
|
1793
|
+
}
|
|
1794
|
+
function normalizeTermEn(value) {
|
|
1795
|
+
return normalizeWhitespace(value).replace(/[.,;:]+$/g, "");
|
|
1796
|
+
}
|
|
1797
|
+
function isPlaceholder(value) {
|
|
1798
|
+
return PLACEHOLDER_PATTERN.test(value.trim());
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
// src/packaging/validate.ts
|
|
1802
|
+
import { z as z5 } from "zod";
|
|
1803
|
+
|
|
1804
|
+
// src/contracts/wiki-generation.ts
|
|
1805
|
+
import { z as z4 } from "zod";
|
|
1806
|
+
var COMMIT_SHA_PATTERN2 = /^[a-f0-9]{7,40}$/;
|
|
1807
|
+
var REPO_PATH_PATTERN = /^(?!\/)(?!.*\.\.)(?!.*\s$).+/;
|
|
1808
|
+
var LineRangeSchema = z4.object({
|
|
1809
|
+
start: z4.number().int().min(1),
|
|
1810
|
+
end: z4.number().int().min(1)
|
|
1811
|
+
}).superRefine((value, ctx) => {
|
|
1812
|
+
if (value.end < value.start) {
|
|
1813
|
+
ctx.addIssue({
|
|
1814
|
+
code: z4.ZodIssueCode.custom,
|
|
1815
|
+
message: "line range end must be greater than or equal to start",
|
|
1816
|
+
path: ["end"]
|
|
1817
|
+
});
|
|
1818
|
+
}
|
|
1819
|
+
});
|
|
1820
|
+
var CitationSchema = z4.object({
|
|
1821
|
+
citationId: z4.string().min(1),
|
|
1822
|
+
evidenceId: z4.string().min(1),
|
|
1823
|
+
repoPath: z4.string().regex(REPO_PATH_PATTERN),
|
|
1824
|
+
lineRange: LineRangeSchema,
|
|
1825
|
+
commitSha: z4.string().regex(COMMIT_SHA_PATTERN2),
|
|
1826
|
+
permalink: z4.string().url().optional(),
|
|
1827
|
+
rationale: z4.string().min(1).optional()
|
|
1828
|
+
}).strict();
|
|
1829
|
+
var ClaimSchema = z4.object({
|
|
1830
|
+
claimId: z4.string().min(1),
|
|
1831
|
+
sectionId: z4.string().min(1),
|
|
1832
|
+
subsectionId: z4.string().min(1),
|
|
1833
|
+
statementKo: z4.string().min(20),
|
|
1834
|
+
citationIds: z4.array(z4.string().min(1)).min(1)
|
|
1835
|
+
}).strict();
|
|
1836
|
+
var EvidenceChunkSchema = z4.object({
|
|
1837
|
+
evidenceId: z4.string().min(1),
|
|
1838
|
+
repoPath: z4.string().regex(REPO_PATH_PATTERN),
|
|
1839
|
+
lineRange: LineRangeSchema,
|
|
1840
|
+
snippet: z4.string().min(1),
|
|
1841
|
+
language: z4.string().min(1).optional(),
|
|
1842
|
+
symbol: z4.string().min(1).optional(),
|
|
1843
|
+
lexicalScore: z4.number().min(0).max(1),
|
|
1844
|
+
semanticScore: z4.number().min(0).max(1)
|
|
1845
|
+
}).strict();
|
|
1846
|
+
var SectionSubsectionPlanSchema = z4.object({
|
|
1847
|
+
sectionId: z4.string().min(1),
|
|
1848
|
+
subsectionId: z4.string().min(1),
|
|
1849
|
+
titleKo: z4.string().min(3),
|
|
1850
|
+
objectiveKo: z4.string().min(10),
|
|
1851
|
+
targetEvidenceKinds: z4.array(z4.enum(["code", "config", "tests", "docs"])).min(1),
|
|
1852
|
+
targetCharacterCount: z4.number().int().min(700)
|
|
1853
|
+
}).strict();
|
|
1854
|
+
var SectionPlanSectionSchema = z4.object({
|
|
1855
|
+
sectionId: z4.string().min(1),
|
|
1856
|
+
titleKo: z4.string().min(3),
|
|
1857
|
+
summaryKo: z4.string().min(10),
|
|
1858
|
+
subsections: z4.array(SectionSubsectionPlanSchema).min(3)
|
|
1859
|
+
}).strict();
|
|
1860
|
+
var SectionCrossReferenceSchema = z4.object({
|
|
1861
|
+
fromSectionId: z4.string().min(1),
|
|
1862
|
+
toSectionId: z4.string().min(1),
|
|
1863
|
+
relation: z4.string().min(3)
|
|
1864
|
+
}).strict();
|
|
1865
|
+
var SectionPlanSchema = z4.object({
|
|
1866
|
+
artifactType: z4.literal("section-plan"),
|
|
1867
|
+
repoFullName: z4.string().regex(/^[a-z0-9_.-]+\/[a-z0-9_.-]+$/),
|
|
1868
|
+
commitSha: z4.string().regex(COMMIT_SHA_PATTERN2),
|
|
1869
|
+
generatedAt: z4.string().datetime(),
|
|
1870
|
+
overviewKo: z4.string().min(30),
|
|
1871
|
+
sections: z4.array(SectionPlanSectionSchema).min(6),
|
|
1872
|
+
crossReferences: z4.array(SectionCrossReferenceSchema)
|
|
1873
|
+
}).strict().superRefine((value, ctx) => {
|
|
1874
|
+
const sectionIds = /* @__PURE__ */ new Set();
|
|
1875
|
+
const subsectionIds = /* @__PURE__ */ new Set();
|
|
1876
|
+
for (const section of value.sections) {
|
|
1877
|
+
if (sectionIds.has(section.sectionId)) {
|
|
1878
|
+
ctx.addIssue({
|
|
1879
|
+
code: z4.ZodIssueCode.custom,
|
|
1880
|
+
message: `duplicate sectionId: ${section.sectionId}`,
|
|
1881
|
+
path: ["sections"]
|
|
1882
|
+
});
|
|
1883
|
+
}
|
|
1884
|
+
sectionIds.add(section.sectionId);
|
|
1885
|
+
for (const subsection of section.subsections) {
|
|
1886
|
+
if (subsection.sectionId !== section.sectionId) {
|
|
1887
|
+
ctx.addIssue({
|
|
1888
|
+
code: z4.ZodIssueCode.custom,
|
|
1889
|
+
message: "subsection.sectionId must match parent sectionId",
|
|
1890
|
+
path: ["sections"]
|
|
1891
|
+
});
|
|
1892
|
+
}
|
|
1893
|
+
const key = `${subsection.sectionId}:${subsection.subsectionId}`;
|
|
1894
|
+
if (subsectionIds.has(key)) {
|
|
1895
|
+
ctx.addIssue({
|
|
1896
|
+
code: z4.ZodIssueCode.custom,
|
|
1897
|
+
message: `duplicate subsection key: ${key}`,
|
|
1898
|
+
path: ["sections"]
|
|
1899
|
+
});
|
|
1900
|
+
}
|
|
1901
|
+
subsectionIds.add(key);
|
|
1902
|
+
}
|
|
1903
|
+
}
|
|
1904
|
+
for (const reference of value.crossReferences) {
|
|
1905
|
+
if (!sectionIds.has(reference.fromSectionId) || !sectionIds.has(reference.toSectionId)) {
|
|
1906
|
+
ctx.addIssue({
|
|
1907
|
+
code: z4.ZodIssueCode.custom,
|
|
1908
|
+
message: "cross reference must target existing section ids",
|
|
1909
|
+
path: ["crossReferences"]
|
|
1910
|
+
});
|
|
1911
|
+
}
|
|
1912
|
+
}
|
|
1913
|
+
});
|
|
1914
|
+
var DraftSubsectionSchema = z4.object({
|
|
1915
|
+
sectionId: z4.string().min(1),
|
|
1916
|
+
subsectionId: z4.string().min(1),
|
|
1917
|
+
titleKo: z4.string().min(3),
|
|
1918
|
+
bodyKo: z4.string().min(80)
|
|
1919
|
+
}).strict();
|
|
1920
|
+
var DraftSectionSchema = z4.object({
|
|
1921
|
+
sectionId: z4.string().min(1),
|
|
1922
|
+
titleKo: z4.string().min(3),
|
|
1923
|
+
summaryKo: z4.string().min(20),
|
|
1924
|
+
subsections: z4.array(DraftSubsectionSchema).min(3)
|
|
1925
|
+
}).strict();
|
|
1926
|
+
var SourceDocSchema = z4.object({
|
|
1927
|
+
sourceId: z4.string().min(1),
|
|
1928
|
+
path: z4.string().regex(REPO_PATH_PATTERN)
|
|
1929
|
+
}).strict();
|
|
1930
|
+
var TrendFactSchema = z4.object({
|
|
1931
|
+
factId: z4.string().min(1),
|
|
1932
|
+
category: z4.string().min(1),
|
|
1933
|
+
summaryKo: z4.string().min(10)
|
|
1934
|
+
}).strict();
|
|
1935
|
+
var GroundingIssueSchema = z4.object({
|
|
1936
|
+
code: z4.enum([
|
|
1937
|
+
"MISSING_CITATION",
|
|
1938
|
+
"INVALID_PATH",
|
|
1939
|
+
"INVALID_LINE_RANGE",
|
|
1940
|
+
"UNRESOLVABLE_EVIDENCE",
|
|
1941
|
+
"SEMANTIC_MISMATCH",
|
|
1942
|
+
"LOW_SIGNAL_CITATION",
|
|
1943
|
+
"ANTI_TEMPLATE",
|
|
1944
|
+
"DUPLICATE_GENERIC_CITATION",
|
|
1945
|
+
"UNKNOWN"
|
|
1946
|
+
]),
|
|
1947
|
+
message: z4.string().min(1),
|
|
1948
|
+
claimId: z4.string().min(1).optional(),
|
|
1949
|
+
citationId: z4.string().min(1).optional()
|
|
1950
|
+
}).strict();
|
|
1951
|
+
var TerminologyDiagnosticsSchema = z4.object({
|
|
1952
|
+
consistencyScore: z4.number().min(0).max(1),
|
|
1953
|
+
untranslatedTokenRatio: z4.number().min(0).max(1),
|
|
1954
|
+
analyzedTokenCount: z4.number().int().min(0),
|
|
1955
|
+
notes: z4.array(z4.string().min(1))
|
|
1956
|
+
}).strict();
|
|
1957
|
+
var QualityScorecardSchema = z4.object({
|
|
1958
|
+
semanticFaithfulness: z4.number().min(0).max(1),
|
|
1959
|
+
conceptualDepth: z4.number().min(0).max(1),
|
|
1960
|
+
operationalClarity: z4.number().min(0).max(1),
|
|
1961
|
+
citationQuality: z4.number().min(0).max(1),
|
|
1962
|
+
novelty: z4.number().min(0).max(1)
|
|
1963
|
+
}).strict();
|
|
1964
|
+
var GroundingReportSchema = z4.object({
|
|
1965
|
+
artifactType: z4.literal("grounding-report"),
|
|
1966
|
+
gateId: z4.enum(["GND-01", "GND-03", "GND-04"]),
|
|
1967
|
+
checkedAt: z4.string().datetime(),
|
|
1968
|
+
passed: z4.boolean(),
|
|
1969
|
+
totalClaims: z4.number().int().min(0),
|
|
1970
|
+
claimsWithCitations: z4.number().int().min(0),
|
|
1971
|
+
citationCoverage: z4.number().min(0).max(1),
|
|
1972
|
+
issues: z4.array(GroundingIssueSchema),
|
|
1973
|
+
diagnostics: TerminologyDiagnosticsSchema.optional()
|
|
1974
|
+
}).strict().superRefine((value, ctx) => {
|
|
1975
|
+
if (value.claimsWithCitations > value.totalClaims) {
|
|
1976
|
+
ctx.addIssue({
|
|
1977
|
+
code: z4.ZodIssueCode.custom,
|
|
1978
|
+
message: "claimsWithCitations cannot exceed totalClaims",
|
|
1979
|
+
path: ["claimsWithCitations"]
|
|
1980
|
+
});
|
|
1981
|
+
}
|
|
1982
|
+
if (value.totalClaims === 0 && value.citationCoverage !== 0) {
|
|
1983
|
+
ctx.addIssue({
|
|
1984
|
+
code: z4.ZodIssueCode.custom,
|
|
1985
|
+
message: "citationCoverage must be 0 when totalClaims is 0",
|
|
1986
|
+
path: ["citationCoverage"]
|
|
1987
|
+
});
|
|
1988
|
+
}
|
|
1989
|
+
if (value.totalClaims > 0) {
|
|
1990
|
+
const expected = Number((value.claimsWithCitations / value.totalClaims).toFixed(6));
|
|
1991
|
+
const actual = Number(value.citationCoverage.toFixed(6));
|
|
1992
|
+
if (expected !== actual) {
|
|
1993
|
+
ctx.addIssue({
|
|
1994
|
+
code: z4.ZodIssueCode.custom,
|
|
1995
|
+
message: "citationCoverage must match claimsWithCitations / totalClaims",
|
|
1996
|
+
path: ["citationCoverage"]
|
|
1997
|
+
});
|
|
1998
|
+
}
|
|
1999
|
+
}
|
|
2000
|
+
});
|
|
2001
|
+
var WikiDraftArtifactSchema = z4.object({
|
|
2002
|
+
artifactType: z4.literal("wiki-draft"),
|
|
2003
|
+
repoFullName: z4.string().regex(/^[a-z0-9_.-]+\/[a-z0-9_.-]+$/),
|
|
2004
|
+
commitSha: z4.string().regex(COMMIT_SHA_PATTERN2),
|
|
2005
|
+
generatedAt: z4.string().datetime(),
|
|
2006
|
+
overviewKo: z4.string().min(50),
|
|
2007
|
+
sections: z4.array(DraftSectionSchema).min(6),
|
|
2008
|
+
sourceDocs: z4.array(SourceDocSchema).min(1),
|
|
2009
|
+
trendFacts: z4.array(TrendFactSchema).optional(),
|
|
2010
|
+
claims: z4.array(ClaimSchema).min(1).optional(),
|
|
2011
|
+
citations: z4.array(CitationSchema).min(1).optional(),
|
|
2012
|
+
groundingReport: GroundingReportSchema.optional()
|
|
2013
|
+
}).strict().superRefine((value, ctx) => {
|
|
2014
|
+
const sectionIds = new Set(value.sections.map((section) => section.sectionId));
|
|
2015
|
+
const subsectionIds = /* @__PURE__ */ new Set();
|
|
2016
|
+
for (const section of value.sections) {
|
|
2017
|
+
for (const subsection of section.subsections) {
|
|
2018
|
+
if (subsection.sectionId !== section.sectionId) {
|
|
2019
|
+
ctx.addIssue({
|
|
2020
|
+
code: z4.ZodIssueCode.custom,
|
|
2021
|
+
message: "draft subsection sectionId must match parent sectionId",
|
|
2022
|
+
path: ["sections"]
|
|
2023
|
+
});
|
|
2024
|
+
}
|
|
2025
|
+
subsectionIds.add(`${subsection.sectionId}:${subsection.subsectionId}`);
|
|
2026
|
+
}
|
|
2027
|
+
}
|
|
2028
|
+
const citations = value.citations ?? [];
|
|
2029
|
+
const claims = value.claims ?? [];
|
|
2030
|
+
const citationIds = /* @__PURE__ */ new Set();
|
|
2031
|
+
for (const citation of citations) {
|
|
2032
|
+
if (citationIds.has(citation.citationId)) {
|
|
2033
|
+
ctx.addIssue({
|
|
2034
|
+
code: z4.ZodIssueCode.custom,
|
|
2035
|
+
message: `duplicate citationId: ${citation.citationId}`,
|
|
2036
|
+
path: ["citations"]
|
|
2037
|
+
});
|
|
2038
|
+
}
|
|
2039
|
+
citationIds.add(citation.citationId);
|
|
2040
|
+
}
|
|
2041
|
+
for (const claim of claims) {
|
|
2042
|
+
if (!sectionIds.has(claim.sectionId)) {
|
|
2043
|
+
ctx.addIssue({
|
|
2044
|
+
code: z4.ZodIssueCode.custom,
|
|
2045
|
+
message: `claim references unknown sectionId: ${claim.sectionId}`,
|
|
2046
|
+
path: ["claims"]
|
|
2047
|
+
});
|
|
2048
|
+
}
|
|
2049
|
+
const subsectionKey = `${claim.sectionId}:${claim.subsectionId}`;
|
|
2050
|
+
if (!subsectionIds.has(subsectionKey)) {
|
|
2051
|
+
ctx.addIssue({
|
|
2052
|
+
code: z4.ZodIssueCode.custom,
|
|
2053
|
+
message: `claim references unknown subsection: ${subsectionKey}`,
|
|
2054
|
+
path: ["claims"]
|
|
2055
|
+
});
|
|
2056
|
+
}
|
|
2057
|
+
for (const citationId of claim.citationIds) {
|
|
2058
|
+
if (!citationIds.has(citationId)) {
|
|
2059
|
+
ctx.addIssue({
|
|
2060
|
+
code: z4.ZodIssueCode.custom,
|
|
2061
|
+
message: `claim references unknown citationId: ${citationId}`,
|
|
2062
|
+
path: ["claims"]
|
|
2063
|
+
});
|
|
2064
|
+
}
|
|
2065
|
+
}
|
|
2066
|
+
}
|
|
2067
|
+
});
|
|
2068
|
+
|
|
2069
|
+
// src/packaging/validate.ts
|
|
2070
|
+
var SOURCE_ARTIFACT_TYPE = "wiki-draft";
|
|
2071
|
+
var CONTRACT_VERSION = "out-04.v1";
|
|
2072
|
+
var DeliverySourceSchema = z5.object({
|
|
2073
|
+
ingestRunId: z5.string().trim().min(1),
|
|
2074
|
+
sourceDocCount: z5.number().int().min(0),
|
|
2075
|
+
trendFactCount: z5.number().int().min(0)
|
|
2076
|
+
}).strict();
|
|
2077
|
+
var DeliveryPackagingMetadataSchema = z5.object({
|
|
2078
|
+
generatedAt: z5.string().datetime(),
|
|
2079
|
+
deterministicOrdering: z5.object({
|
|
2080
|
+
sections: z5.literal("sectionId:asc"),
|
|
2081
|
+
subsections: z5.literal("subsectionId:asc"),
|
|
2082
|
+
glossary: z5.literal("termEn:asc,termKo:asc")
|
|
2083
|
+
}).strict(),
|
|
2084
|
+
sourceArtifactType: z5.literal(SOURCE_ARTIFACT_TYPE),
|
|
2085
|
+
qualityScorecard: QualityScorecardSchema
|
|
2086
|
+
}).strict();
|
|
2087
|
+
var DeliveryArtifactEnvelopeSchema = z5.object({
|
|
2088
|
+
contractVersion: z5.literal(CONTRACT_VERSION),
|
|
2089
|
+
project: z5.object({
|
|
2090
|
+
repoRef: z5.string().trim().regex(/^[a-z0-9_.-]+\/[a-z0-9_.-]+$/),
|
|
2091
|
+
commitSha: z5.string().trim().regex(/^[a-f0-9]{7,40}$/),
|
|
2092
|
+
ingestRunId: z5.string().trim().min(1)
|
|
2093
|
+
}).strict(),
|
|
2094
|
+
sections: z5.array(DeliverySectionSchema).min(1),
|
|
2095
|
+
provenance: DeliveryProvenanceSchema,
|
|
2096
|
+
glossary: z5.array(GlossaryEntrySchema).min(1),
|
|
2097
|
+
source: DeliverySourceSchema,
|
|
2098
|
+
metadata: DeliveryPackagingMetadataSchema
|
|
2099
|
+
}).strict().superRefine((value, ctx) => {
|
|
2100
|
+
const parsedDelivery = DeliveryWikiArtifactSchema.safeParse({
|
|
2101
|
+
project: {
|
|
2102
|
+
repoFullName: value.project.repoRef,
|
|
2103
|
+
commitSha: value.project.commitSha,
|
|
2104
|
+
ingestRunId: value.project.ingestRunId
|
|
2105
|
+
},
|
|
2106
|
+
sections: value.sections,
|
|
2107
|
+
metadata: {
|
|
2108
|
+
artifactType: "wiki-delivery",
|
|
2109
|
+
sourceArtifactType: value.metadata.sourceArtifactType,
|
|
2110
|
+
contractVersion: "out-01.v1",
|
|
2111
|
+
generatedAt: value.metadata.generatedAt,
|
|
2112
|
+
sectionCount: value.sections.length,
|
|
2113
|
+
subsectionCount: value.sections.reduce((count, section) => count + section.subsectionIds.length, 0),
|
|
2114
|
+
deterministicOrdering: {
|
|
2115
|
+
sections: value.metadata.deterministicOrdering.sections,
|
|
2116
|
+
subsections: value.metadata.deterministicOrdering.subsections
|
|
2117
|
+
},
|
|
2118
|
+
provenance: value.provenance
|
|
2119
|
+
}
|
|
2120
|
+
});
|
|
2121
|
+
if (!parsedDelivery.success) {
|
|
2122
|
+
for (const issue of parsedDelivery.error.issues) {
|
|
2123
|
+
ctx.addIssue({
|
|
2124
|
+
code: z5.ZodIssueCode.custom,
|
|
2125
|
+
message: issue.message,
|
|
2126
|
+
path: ["sections", ...issue.path]
|
|
2127
|
+
});
|
|
2128
|
+
}
|
|
2129
|
+
}
|
|
2130
|
+
if (value.project.commitSha !== value.provenance.commitSha) {
|
|
2131
|
+
ctx.addIssue({
|
|
2132
|
+
code: z5.ZodIssueCode.custom,
|
|
2133
|
+
message: "provenance.commitSha must equal project.commitSha",
|
|
2134
|
+
path: ["provenance", "commitSha"]
|
|
2135
|
+
});
|
|
2136
|
+
}
|
|
2137
|
+
if (value.metadata.generatedAt !== value.provenance.generatedAt) {
|
|
2138
|
+
ctx.addIssue({
|
|
2139
|
+
code: z5.ZodIssueCode.custom,
|
|
2140
|
+
message: "metadata.generatedAt must equal provenance.generatedAt",
|
|
2141
|
+
path: ["metadata", "generatedAt"]
|
|
2142
|
+
});
|
|
2143
|
+
}
|
|
2144
|
+
if (value.source.ingestRunId !== value.project.ingestRunId) {
|
|
2145
|
+
ctx.addIssue({
|
|
2146
|
+
code: z5.ZodIssueCode.custom,
|
|
2147
|
+
message: "source.ingestRunId must equal project.ingestRunId",
|
|
2148
|
+
path: ["source", "ingestRunId"]
|
|
2149
|
+
});
|
|
2150
|
+
}
|
|
2151
|
+
const duplicateTerms = /* @__PURE__ */ new Set();
|
|
2152
|
+
const canonicalTerms = /* @__PURE__ */ new Set();
|
|
2153
|
+
for (const entry of value.glossary) {
|
|
2154
|
+
const canonical = entry.termEn.toLowerCase().replace(/\s+/g, " ").trim();
|
|
2155
|
+
if (canonicalTerms.has(canonical)) {
|
|
2156
|
+
duplicateTerms.add(canonical);
|
|
2157
|
+
}
|
|
2158
|
+
canonicalTerms.add(canonical);
|
|
2159
|
+
}
|
|
2160
|
+
if (duplicateTerms.size > 0) {
|
|
2161
|
+
ctx.addIssue({
|
|
2162
|
+
code: z5.ZodIssueCode.custom,
|
|
2163
|
+
message: `glossary contains duplicate canonical termEn keys: ${Array.from(duplicateTerms).join(", ")}`,
|
|
2164
|
+
path: ["glossary"]
|
|
2165
|
+
});
|
|
2166
|
+
}
|
|
2167
|
+
if (!isGlossaryDeterministic(value.glossary)) {
|
|
2168
|
+
ctx.addIssue({
|
|
2169
|
+
code: z5.ZodIssueCode.custom,
|
|
2170
|
+
message: "glossary must be sorted deterministically by termEn then termKo",
|
|
2171
|
+
path: ["glossary"]
|
|
2172
|
+
});
|
|
2173
|
+
}
|
|
2174
|
+
});
|
|
2175
|
+
function validateDeliveryEnvelope(input) {
|
|
2176
|
+
const parsed = DeliveryArtifactEnvelopeSchema.safeParse(input);
|
|
2177
|
+
if (parsed.success) {
|
|
2178
|
+
return {
|
|
2179
|
+
ok: true,
|
|
2180
|
+
issues: [],
|
|
2181
|
+
envelope: parsed.data
|
|
2182
|
+
};
|
|
2183
|
+
}
|
|
2184
|
+
return {
|
|
2185
|
+
ok: false,
|
|
2186
|
+
issues: parsed.error.issues.map((issue) => ({
|
|
2187
|
+
path: issue.path.length > 0 ? issue.path.join(".") : "<root>",
|
|
2188
|
+
message: issue.message
|
|
2189
|
+
}))
|
|
2190
|
+
};
|
|
2191
|
+
}
|
|
2192
|
+
function assertValidDeliveryEnvelope(input) {
|
|
2193
|
+
const result = validateDeliveryEnvelope(input);
|
|
2194
|
+
if (result.ok && result.envelope) {
|
|
2195
|
+
return result.envelope;
|
|
2196
|
+
}
|
|
2197
|
+
const reason = (result.issues || []).map((issue, index) => `${index + 1}) ${issue.path}: ${issue.message}`).join("; ");
|
|
2198
|
+
throw new Error(`OUT-04 validation failed: ${reason || "unknown validation error"}`);
|
|
2199
|
+
}
|
|
2200
|
+
function isGlossaryDeterministic(glossary) {
|
|
2201
|
+
for (let index = 1; index < glossary.length; index += 1) {
|
|
2202
|
+
const previous = glossary[index - 1];
|
|
2203
|
+
const current = glossary[index];
|
|
2204
|
+
const byEnglish = previous.termEn.localeCompare(current.termEn, "en", {
|
|
2205
|
+
sensitivity: "base",
|
|
2206
|
+
numeric: true
|
|
2207
|
+
});
|
|
2208
|
+
if (byEnglish > 0) {
|
|
2209
|
+
return false;
|
|
2210
|
+
}
|
|
2211
|
+
if (byEnglish === 0) {
|
|
2212
|
+
const byKorean = previous.termKo.localeCompare(current.termKo, "ko", {
|
|
2213
|
+
sensitivity: "base",
|
|
2214
|
+
numeric: true
|
|
2215
|
+
});
|
|
2216
|
+
if (byKorean > 0) {
|
|
2217
|
+
return false;
|
|
2218
|
+
}
|
|
2219
|
+
}
|
|
2220
|
+
}
|
|
2221
|
+
return true;
|
|
2222
|
+
}
|
|
2223
|
+
|
|
2224
|
+
// src/quality/scorecard.ts
|
|
2225
|
+
var LOW_SIGNAL_PATH_PATTERN = /(\/|^)(readme|license|changelog)(\.|$)|^docs\/vendor\//i;
|
|
2226
|
+
function buildQualityScorecard(acceptedOutput) {
|
|
2227
|
+
const draft = acceptedOutput.draft;
|
|
2228
|
+
const claims = draft.claims ?? [];
|
|
2229
|
+
const citations = draft.citations ?? [];
|
|
2230
|
+
const trendFacts = draft.trendFacts ?? [];
|
|
2231
|
+
const subsectionEntries = draft.sections.flatMap(
|
|
2232
|
+
(section) => section.subsections.map((subsection) => ({
|
|
2233
|
+
sectionId: section.sectionId,
|
|
2234
|
+
subsectionId: subsection.subsectionId,
|
|
2235
|
+
bodyKo: subsection.bodyKo
|
|
2236
|
+
}))
|
|
2237
|
+
);
|
|
2238
|
+
const totalClaims = Math.max(1, claims.length);
|
|
2239
|
+
const totalCitations = Math.max(1, citations.length);
|
|
2240
|
+
const totalSubsections = Math.max(1, subsectionEntries.length);
|
|
2241
|
+
const readabilityScore = calculateReadabilityScore(subsectionEntries.map((entry) => entry.bodyKo));
|
|
2242
|
+
const semanticFaithfulness = readabilityScore;
|
|
2243
|
+
const averageBodyChars = subsectionEntries.reduce((sum, subsection) => sum + subsection.bodyKo.length, 0) / totalSubsections;
|
|
2244
|
+
const conceptualDepth = clamp01(averageBodyChars / 3200);
|
|
2245
|
+
const citationsById = new Map(citations.map((citation) => [citation.citationId, citation]));
|
|
2246
|
+
const claimsBySubsection = /* @__PURE__ */ new Map();
|
|
2247
|
+
for (const claim of claims) {
|
|
2248
|
+
const key = `${claim.sectionId}:${claim.subsectionId}`;
|
|
2249
|
+
const list = claimsBySubsection.get(key) ?? [];
|
|
2250
|
+
list.push(claim);
|
|
2251
|
+
claimsBySubsection.set(key, list);
|
|
2252
|
+
}
|
|
2253
|
+
let anchoredSubsectionCount = 0;
|
|
2254
|
+
for (const subsection of subsectionEntries) {
|
|
2255
|
+
const key = `${subsection.sectionId}:${subsection.subsectionId}`;
|
|
2256
|
+
const claims2 = claimsBySubsection.get(key) ?? [];
|
|
2257
|
+
const citedPaths = claims2.flatMap((claim) => claim.citationIds).map((citationId) => citationsById.get(citationId)?.repoPath).filter((repoPath) => Boolean(repoPath));
|
|
2258
|
+
if (citedPaths.some((repoPath) => subsection.bodyKo.includes(repoPath))) {
|
|
2259
|
+
anchoredSubsectionCount += 1;
|
|
2260
|
+
}
|
|
2261
|
+
}
|
|
2262
|
+
const operationalClarity = clamp01(anchoredSubsectionCount / totalSubsections);
|
|
2263
|
+
const lowSignalCitationCount = citations.filter(
|
|
2264
|
+
(citation) => LOW_SIGNAL_PATH_PATTERN.test(citation.repoPath)
|
|
2265
|
+
).length;
|
|
2266
|
+
const trendMentionCount = subsectionEntries.filter((entry) => /트렌드|릴리스|release|tag|changelog/iu.test(entry.bodyKo)).length;
|
|
2267
|
+
const trendCoverage = clamp01(
|
|
2268
|
+
(trendFacts.length > 0 ? 0.5 : 0) + trendMentionCount / totalSubsections * 0.5
|
|
2269
|
+
);
|
|
2270
|
+
const citationQuality = clamp01(1 - lowSignalCitationCount / totalCitations);
|
|
2271
|
+
const canonicalBodies = subsectionEntries.map(
|
|
2272
|
+
(subsection) => subsection.bodyKo.normalize("NFKC").replace(/\s+/g, " ").replace(/[.,:;!?()\[\]{}"'`-]/g, "").trim()
|
|
2273
|
+
);
|
|
2274
|
+
const uniqueBodyRatio = new Set(canonicalBodies).size / totalSubsections;
|
|
2275
|
+
const novelty = clamp01(uniqueBodyRatio * 0.7 + trendCoverage * 0.3);
|
|
2276
|
+
return QualityScorecardSchema.parse({
|
|
2277
|
+
semanticFaithfulness: round4(semanticFaithfulness),
|
|
2278
|
+
conceptualDepth: round4(conceptualDepth),
|
|
2279
|
+
operationalClarity: round4(operationalClarity),
|
|
2280
|
+
citationQuality: round4((citationQuality + trendCoverage) / 2),
|
|
2281
|
+
novelty: round4(novelty)
|
|
2282
|
+
});
|
|
2283
|
+
}
|
|
2284
|
+
function calculateReadabilityScore(bodies) {
|
|
2285
|
+
if (bodies.length === 0) {
|
|
2286
|
+
return 0;
|
|
2287
|
+
}
|
|
2288
|
+
const scores = bodies.map((body) => {
|
|
2289
|
+
const normalized = body.replace(/\s+/g, " ").trim();
|
|
2290
|
+
if (normalized.length === 0) {
|
|
2291
|
+
return 0;
|
|
2292
|
+
}
|
|
2293
|
+
const sentenceCount = Math.max(1, normalized.split(/[.!?]|다\s+/u).filter((part) => part.trim().length > 0).length);
|
|
2294
|
+
const averageSentenceLength = normalized.length / sentenceCount;
|
|
2295
|
+
const sentenceBandScore = clamp01(1 - Math.abs(averageSentenceLength - 95) / 95);
|
|
2296
|
+
const hasPathLikeToken = /[a-z0-9_.-]+\/[a-z0-9_.-]+/i.test(normalized) ? 1 : 0.5;
|
|
2297
|
+
return clamp01(sentenceBandScore * 0.7 + hasPathLikeToken * 0.3);
|
|
2298
|
+
});
|
|
2299
|
+
return clamp01(scores.reduce((sum, score) => sum + score, 0) / scores.length);
|
|
2300
|
+
}
|
|
2301
|
+
function clamp01(value) {
|
|
2302
|
+
if (!Number.isFinite(value)) {
|
|
2303
|
+
return 0;
|
|
2304
|
+
}
|
|
2305
|
+
if (value < 0) {
|
|
2306
|
+
return 0;
|
|
2307
|
+
}
|
|
2308
|
+
if (value > 1) {
|
|
2309
|
+
return 1;
|
|
2310
|
+
}
|
|
2311
|
+
return value;
|
|
2312
|
+
}
|
|
2313
|
+
function round4(value) {
|
|
2314
|
+
return Number(value.toFixed(4));
|
|
2315
|
+
}
|
|
2316
|
+
|
|
2317
|
+
// src/orchestration/package-delivery.ts
|
|
2318
|
+
function detectBodyKoRepetitionInBodies(bodies) {
|
|
2319
|
+
const errors = [];
|
|
2320
|
+
for (const sub of bodies) {
|
|
2321
|
+
const sentences = splitKoreanSentences(sub.bodyKo).filter((s) => s.length > 50);
|
|
2322
|
+
const seenSentences = /* @__PURE__ */ new Set();
|
|
2323
|
+
for (const sentence of sentences) {
|
|
2324
|
+
if (seenSentences.has(sentence)) {
|
|
2325
|
+
errors.push(
|
|
2326
|
+
`${sub.subsectionId} (${sub.sectionId}): repeated sentence detected \u2014 "${sentence.slice(0, 60)}..."`
|
|
2327
|
+
);
|
|
2328
|
+
break;
|
|
2329
|
+
}
|
|
2330
|
+
seenSentences.add(sentence);
|
|
2331
|
+
}
|
|
2332
|
+
const repeatedChunk = findRepeatedChunkWithinText(sub.bodyKo, {
|
|
2333
|
+
windowChars: 240,
|
|
2334
|
+
strideChars: 40,
|
|
2335
|
+
minimumRepeatChars: 200
|
|
2336
|
+
});
|
|
2337
|
+
if (repeatedChunk) {
|
|
2338
|
+
errors.push(
|
|
2339
|
+
`${sub.subsectionId} (${sub.sectionId}): repeated long chunk detected \u2014 "${repeatedChunk.slice(0, 60)}..."`
|
|
2340
|
+
);
|
|
2341
|
+
}
|
|
2342
|
+
}
|
|
2343
|
+
return errors;
|
|
2344
|
+
}
|
|
2345
|
+
function detectBodyKoRepetition(acceptedOutput) {
|
|
2346
|
+
const bodies = [];
|
|
2347
|
+
for (const section of acceptedOutput.draft.sections) {
|
|
2348
|
+
for (const sub of section.subsections) {
|
|
2349
|
+
bodies.push({ sectionId: section.sectionId, subsectionId: sub.subsectionId, bodyKo: sub.bodyKo });
|
|
2350
|
+
}
|
|
2351
|
+
}
|
|
2352
|
+
return detectBodyKoRepetitionInBodies(bodies);
|
|
2353
|
+
}
|
|
2354
|
+
function detectCrossSubsectionRepetitionInBodies(bodies) {
|
|
2355
|
+
const occurrencesByHash = /* @__PURE__ */ new Map();
|
|
2356
|
+
const bodyEntries = [];
|
|
2357
|
+
for (const sub of bodies) {
|
|
2358
|
+
bodyEntries.push({ sectionId: sub.sectionId, subsectionId: sub.subsectionId, body: sub.bodyKo });
|
|
2359
|
+
const chunks = chunkText(sub.bodyKo, { windowChars: 320, strideChars: 60 });
|
|
2360
|
+
for (const chunk of chunks) {
|
|
2361
|
+
if (chunk.length < 300) {
|
|
2362
|
+
continue;
|
|
2363
|
+
}
|
|
2364
|
+
const canonical = normalizeForRepetition(chunk);
|
|
2365
|
+
if (canonical.length < 280) {
|
|
2366
|
+
continue;
|
|
2367
|
+
}
|
|
2368
|
+
const hash = fnv1a32(canonical);
|
|
2369
|
+
const list = occurrencesByHash.get(hash) ?? [];
|
|
2370
|
+
list.push({ sectionId: sub.sectionId, subsectionId: sub.subsectionId, chunk: canonical });
|
|
2371
|
+
occurrencesByHash.set(hash, list);
|
|
2372
|
+
}
|
|
2373
|
+
}
|
|
2374
|
+
const errors = [];
|
|
2375
|
+
for (const list of occurrencesByHash.values()) {
|
|
2376
|
+
if (list.length < 2) {
|
|
2377
|
+
continue;
|
|
2378
|
+
}
|
|
2379
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
2380
|
+
for (const item of list) {
|
|
2381
|
+
const key = item.chunk;
|
|
2382
|
+
const bucket = grouped.get(key) ?? [];
|
|
2383
|
+
bucket.push(item);
|
|
2384
|
+
grouped.set(key, bucket);
|
|
2385
|
+
}
|
|
2386
|
+
for (const bucket of grouped.values()) {
|
|
2387
|
+
if (bucket.length < 2) {
|
|
2388
|
+
continue;
|
|
2389
|
+
}
|
|
2390
|
+
const uniqueSubsections = /* @__PURE__ */ new Map();
|
|
2391
|
+
for (const occ of bucket) {
|
|
2392
|
+
uniqueSubsections.set(`${occ.sectionId}:${occ.subsectionId}`, occ);
|
|
2393
|
+
}
|
|
2394
|
+
if (uniqueSubsections.size < 2) {
|
|
2395
|
+
continue;
|
|
2396
|
+
}
|
|
2397
|
+
const occurrences = Array.from(uniqueSubsections.values()).slice(0, 4);
|
|
2398
|
+
const locations = occurrences.map((o) => `${o.subsectionId} (${o.sectionId})`).join(", ");
|
|
2399
|
+
const excerpt = bucket[0].chunk.slice(0, 80);
|
|
2400
|
+
errors.push(
|
|
2401
|
+
`cross-subsection repeated chunk detected in: ${locations} \u2014 "${excerpt}..."`
|
|
2402
|
+
);
|
|
2403
|
+
}
|
|
2404
|
+
}
|
|
2405
|
+
const shingleSize = 80;
|
|
2406
|
+
const shingleStep = 40;
|
|
2407
|
+
const minimumCharsForSimilarity = 1800;
|
|
2408
|
+
const shingleSets = bodyEntries.map((item) => ({
|
|
2409
|
+
...item,
|
|
2410
|
+
shingles: buildShingleSet(item.body, { shingleSize, step: shingleStep, minimumChars: minimumCharsForSimilarity })
|
|
2411
|
+
}));
|
|
2412
|
+
for (let i = 0; i < shingleSets.length; i += 1) {
|
|
2413
|
+
for (let j = i + 1; j < shingleSets.length; j += 1) {
|
|
2414
|
+
const left = shingleSets[i];
|
|
2415
|
+
const right = shingleSets[j];
|
|
2416
|
+
if (left.shingles.size === 0 || right.shingles.size === 0) {
|
|
2417
|
+
continue;
|
|
2418
|
+
}
|
|
2419
|
+
const similarity = jaccard(left.shingles, right.shingles);
|
|
2420
|
+
if (similarity >= 0.9) {
|
|
2421
|
+
errors.push(
|
|
2422
|
+
`near-duplicate bodyKo detected (${similarity.toFixed(3)} Jaccard): ${left.subsectionId} (${left.sectionId}) \u2194 ${right.subsectionId} (${right.sectionId})`
|
|
2423
|
+
);
|
|
2424
|
+
}
|
|
2425
|
+
}
|
|
2426
|
+
}
|
|
2427
|
+
return errors;
|
|
2428
|
+
}
|
|
2429
|
+
function detectCrossSubsectionBodyKoRepetition(acceptedOutput) {
|
|
2430
|
+
const bodies = [];
|
|
2431
|
+
for (const section of acceptedOutput.draft.sections) {
|
|
2432
|
+
for (const sub of section.subsections) {
|
|
2433
|
+
bodies.push({ sectionId: section.sectionId, subsectionId: sub.subsectionId, bodyKo: sub.bodyKo });
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2436
|
+
return detectCrossSubsectionRepetitionInBodies(bodies);
|
|
2437
|
+
}
|
|
2438
|
+
function splitKoreanSentences(text) {
|
|
2439
|
+
return normalizeForRepetition(text).split(/[\n\r]+|(?<=[.!?]|다)\s+/u).map((item) => item.trim()).filter((item) => item.length > 0);
|
|
2440
|
+
}
|
|
2441
|
+
function normalizeForRepetition(text) {
|
|
2442
|
+
return (text ?? "").normalize("NFKC").replace(/\r\n/g, "\n").replace(/\s+/g, " ").replace(/\[deterministic-prompt-hash:[^\]]+\]/g, "").replace(/\[reference:[^\]]+\]/g, "").trim();
|
|
2443
|
+
}
|
|
2444
|
+
function chunkText(text, input) {
|
|
2445
|
+
const normalized = normalizeForRepetition(text);
|
|
2446
|
+
const windowChars = Math.max(50, input.windowChars);
|
|
2447
|
+
const strideChars = Math.max(1, input.strideChars);
|
|
2448
|
+
if (normalized.length < windowChars) {
|
|
2449
|
+
return [];
|
|
2450
|
+
}
|
|
2451
|
+
const chunks = [];
|
|
2452
|
+
for (let offset = 0; offset + windowChars <= normalized.length; offset += strideChars) {
|
|
2453
|
+
chunks.push(normalized.slice(offset, offset + windowChars));
|
|
2454
|
+
}
|
|
2455
|
+
return chunks;
|
|
2456
|
+
}
|
|
2457
|
+
function findRepeatedChunkWithinText(text, input) {
|
|
2458
|
+
const chunks = chunkText(text, { windowChars: input.windowChars, strideChars: input.strideChars });
|
|
2459
|
+
if (chunks.length === 0) {
|
|
2460
|
+
return null;
|
|
2461
|
+
}
|
|
2462
|
+
const seen = /* @__PURE__ */ new Map();
|
|
2463
|
+
for (const chunk of chunks) {
|
|
2464
|
+
const canonical = normalizeForRepetition(chunk);
|
|
2465
|
+
if (canonical.length < input.minimumRepeatChars) {
|
|
2466
|
+
continue;
|
|
2467
|
+
}
|
|
2468
|
+
const hash = fnv1a32(canonical);
|
|
2469
|
+
const count = (seen.get(hash) ?? 0) + 1;
|
|
2470
|
+
if (count > 1) {
|
|
2471
|
+
return canonical;
|
|
2472
|
+
}
|
|
2473
|
+
seen.set(hash, count);
|
|
2474
|
+
}
|
|
2475
|
+
return null;
|
|
2476
|
+
}
|
|
2477
|
+
function buildShingleSet(text, input) {
|
|
2478
|
+
const normalized = normalizeForRepetition(text);
|
|
2479
|
+
if (normalized.length < input.minimumChars) {
|
|
2480
|
+
return /* @__PURE__ */ new Set();
|
|
2481
|
+
}
|
|
2482
|
+
const shingleSize = Math.max(20, input.shingleSize);
|
|
2483
|
+
const step = Math.max(1, input.step);
|
|
2484
|
+
if (normalized.length < shingleSize) {
|
|
2485
|
+
return /* @__PURE__ */ new Set();
|
|
2486
|
+
}
|
|
2487
|
+
const set = /* @__PURE__ */ new Set();
|
|
2488
|
+
for (let offset = 0; offset + shingleSize <= normalized.length; offset += step) {
|
|
2489
|
+
const shingle = normalized.slice(offset, offset + shingleSize);
|
|
2490
|
+
set.add(fnv1a32(shingle));
|
|
2491
|
+
}
|
|
2492
|
+
return set;
|
|
2493
|
+
}
|
|
2494
|
+
function jaccard(left, right) {
|
|
2495
|
+
const [small, large] = left.size <= right.size ? [left, right] : [right, left];
|
|
2496
|
+
let intersection = 0;
|
|
2497
|
+
for (const token of small) {
|
|
2498
|
+
if (large.has(token)) {
|
|
2499
|
+
intersection += 1;
|
|
2500
|
+
}
|
|
2501
|
+
}
|
|
2502
|
+
const union = left.size + right.size - intersection;
|
|
2503
|
+
return union === 0 ? 0 : intersection / union;
|
|
2504
|
+
}
|
|
2505
|
+
function fnv1a32(value) {
|
|
2506
|
+
let hash = 2166136261;
|
|
2507
|
+
for (let index = 0; index < value.length; index += 1) {
|
|
2508
|
+
hash ^= value.charCodeAt(index);
|
|
2509
|
+
hash = Math.imul(hash, 16777619);
|
|
2510
|
+
}
|
|
2511
|
+
return (hash >>> 0).toString(16).padStart(8, "0");
|
|
2512
|
+
}
|
|
2513
|
+
var DELIVERY_CONTRACT_VERSION = "out-04.v1";
|
|
2514
|
+
function packageAcceptedOutputsForDelivery(acceptedOutputs, options = {}) {
|
|
2515
|
+
const sortedOutputs = acceptedOutputs.slice().sort((left, right) => compareAcceptedOutputs(left, right));
|
|
2516
|
+
const artifacts = [];
|
|
2517
|
+
const failures = [];
|
|
2518
|
+
for (const acceptedOutput of sortedOutputs) {
|
|
2519
|
+
try {
|
|
2520
|
+
const qualityGateLevel = options.qualityGateLevel ?? "standard";
|
|
2521
|
+
const normalizedAcceptedOutput = normalizeAcceptedOutputForPackaging(acceptedOutput);
|
|
2522
|
+
const legacyCitationSourcePathCount = new Set(
|
|
2523
|
+
normalizedAcceptedOutput.draft.citations.map((citation) => citation.repoPath)
|
|
2524
|
+
).size;
|
|
2525
|
+
const sourceDocCount = normalizedAcceptedOutput.source_doc_count ?? normalizedAcceptedOutput.draft.sourceDocs?.length ?? legacyCitationSourcePathCount;
|
|
2526
|
+
const trendFactCount = normalizedAcceptedOutput.trend_fact_count ?? normalizedAcceptedOutput.draft.trendFacts?.length ?? 0;
|
|
2527
|
+
const repetitionErrors = detectBodyKoRepetition(normalizedAcceptedOutput);
|
|
2528
|
+
if (repetitionErrors.length > 0) {
|
|
2529
|
+
throw new Error(
|
|
2530
|
+
`OUT-04 validation failed: bodyKo repetition detected (${repetitionErrors.length} subsection(s)):
|
|
2531
|
+
` + repetitionErrors.map((e) => ` - ${e}`).join("\n") + "\n Do not repeat sentences to pad character count. Write unique content for each subsection."
|
|
2532
|
+
);
|
|
2533
|
+
}
|
|
2534
|
+
const crossRepetitionErrors = detectCrossSubsectionBodyKoRepetition(normalizedAcceptedOutput);
|
|
2535
|
+
if (crossRepetitionErrors.length > 0) {
|
|
2536
|
+
throw new Error(
|
|
2537
|
+
`OUT-04 validation failed: cross-subsection bodyKo repetition detected (${crossRepetitionErrors.length} finding(s)):
|
|
2538
|
+
` + crossRepetitionErrors.map((e) => ` - ${e}`).join("\n") + "\n Do not paste the same long padding block into multiple subsections."
|
|
2539
|
+
);
|
|
2540
|
+
}
|
|
2541
|
+
if (qualityGateLevel === "strict" && sourceDocCount <= 0) {
|
|
2542
|
+
throw new Error("strict quality gate failed: source_doc_count must be greater than 0");
|
|
2543
|
+
}
|
|
2544
|
+
const qualityScorecard = buildQualityScorecard(normalizedAcceptedOutput);
|
|
2545
|
+
const delivery = adaptAcceptedOutputToDelivery(normalizedAcceptedOutput, {
|
|
2546
|
+
generatedAt: options.generatedAt,
|
|
2547
|
+
modelId: options.modelId
|
|
2548
|
+
});
|
|
2549
|
+
const envelope = assertValidDeliveryEnvelope({
|
|
2550
|
+
contractVersion: DELIVERY_CONTRACT_VERSION,
|
|
2551
|
+
project: {
|
|
2552
|
+
repoRef: acceptedOutput.repo_ref,
|
|
2553
|
+
commitSha: acceptedOutput.commit_sha,
|
|
2554
|
+
ingestRunId: acceptedOutput.ingest_run_id
|
|
2555
|
+
},
|
|
2556
|
+
sections: delivery.sections,
|
|
2557
|
+
provenance: delivery.metadata.provenance,
|
|
2558
|
+
glossary: buildGlossaryFromAcceptedOutput(normalizedAcceptedOutput),
|
|
2559
|
+
source: {
|
|
2560
|
+
ingestRunId: normalizedAcceptedOutput.ingest_run_id,
|
|
2561
|
+
sourceDocCount,
|
|
2562
|
+
trendFactCount
|
|
2563
|
+
},
|
|
2564
|
+
metadata: {
|
|
2565
|
+
generatedAt: delivery.metadata.generatedAt,
|
|
2566
|
+
deterministicOrdering: {
|
|
2567
|
+
sections: delivery.metadata.deterministicOrdering.sections,
|
|
2568
|
+
subsections: delivery.metadata.deterministicOrdering.subsections,
|
|
2569
|
+
glossary: "termEn:asc,termKo:asc"
|
|
2570
|
+
},
|
|
2571
|
+
sourceArtifactType: delivery.metadata.sourceArtifactType,
|
|
2572
|
+
qualityScorecard
|
|
2573
|
+
}
|
|
2574
|
+
});
|
|
2575
|
+
artifacts.push(envelope);
|
|
2576
|
+
} catch (error) {
|
|
2577
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2578
|
+
failures.push(`${acceptedOutput.repo_ref}@${acceptedOutput.commit_sha.slice(0, 12)}: ${message}`);
|
|
2579
|
+
}
|
|
2580
|
+
}
|
|
2581
|
+
if (failures.length > 0) {
|
|
2582
|
+
throw new Error(
|
|
2583
|
+
`OUT-04 packaging blocked (${failures.length}/${sortedOutputs.length}): ${failures.join(" | ")}`
|
|
2584
|
+
);
|
|
2585
|
+
}
|
|
2586
|
+
return {
|
|
2587
|
+
artifacts,
|
|
2588
|
+
summary: {
|
|
2589
|
+
attempted: sortedOutputs.length,
|
|
2590
|
+
packaged: artifacts.length,
|
|
2591
|
+
blocked: sortedOutputs.length - artifacts.length
|
|
2592
|
+
}
|
|
2593
|
+
};
|
|
2594
|
+
}
|
|
2595
|
+
function normalizeAcceptedOutputForPackaging(acceptedOutput) {
|
|
2596
|
+
return {
|
|
2597
|
+
...acceptedOutput,
|
|
2598
|
+
draft: {
|
|
2599
|
+
...acceptedOutput.draft,
|
|
2600
|
+
claims: acceptedOutput.draft.claims ?? [],
|
|
2601
|
+
citations: acceptedOutput.draft.citations ?? [],
|
|
2602
|
+
trendFacts: acceptedOutput.draft.trendFacts ?? []
|
|
2603
|
+
}
|
|
2604
|
+
};
|
|
2605
|
+
}
|
|
2606
|
+
function compareAcceptedOutputs(left, right) {
|
|
2607
|
+
const leftKey = `${left.repo_ref.toLowerCase()}|${left.commit_sha.toLowerCase()}|${left.ingest_run_id}`;
|
|
2608
|
+
const rightKey = `${right.repo_ref.toLowerCase()}|${right.commit_sha.toLowerCase()}|${right.ingest_run_id}`;
|
|
2609
|
+
return leftKey.localeCompare(rightKey, "en", { sensitivity: "base", numeric: true });
|
|
2610
|
+
}
|
|
2611
|
+
|
|
2612
|
+
// src/freshness/detect.ts
|
|
2613
|
+
import { Octokit as Octokit2 } from "@octokit/rest";
|
|
2614
|
+
|
|
2615
|
+
// src/contracts/wiki-freshness.ts
|
|
2616
|
+
import { z as z6 } from "zod";
|
|
2617
|
+
var COMMIT_SHA_PATTERN3 = /^[a-f0-9]{7,40}$/;
|
|
2618
|
+
var REPO_REF_PATTERN = /^[a-z0-9_.-]+\/[a-z0-9_.-]+$/;
|
|
2619
|
+
var REPO_PATH_PATTERN2 = /^(?!\/)(?!.*\.\.)(?!.*\s$).+/;
|
|
2620
|
+
var FreshnessRepoRefSchema = z6.string().trim().regex(REPO_REF_PATTERN);
|
|
2621
|
+
var FreshnessSectionEvidenceSchema = z6.object({
|
|
2622
|
+
sectionId: z6.string().trim().min(1),
|
|
2623
|
+
repoPaths: z6.array(z6.string().trim().regex(REPO_PATH_PATTERN2)).min(1)
|
|
2624
|
+
}).strict();
|
|
2625
|
+
var FreshnessBaselineSchema = z6.object({
|
|
2626
|
+
repo_ref: FreshnessRepoRefSchema,
|
|
2627
|
+
last_delivery_commit: z6.string().trim().regex(COMMIT_SHA_PATTERN3),
|
|
2628
|
+
etag: z6.string().trim().min(1).optional(),
|
|
2629
|
+
sectionEvidenceIndex: z6.array(FreshnessSectionEvidenceSchema).default([])
|
|
2630
|
+
}).strict();
|
|
2631
|
+
var FreshnessStateFileSchema = z6.object({
|
|
2632
|
+
schema_version: z6.literal(1),
|
|
2633
|
+
repos: z6.record(FreshnessRepoRefSchema, FreshnessBaselineSchema)
|
|
2634
|
+
}).strict().superRefine((value, ctx) => {
|
|
2635
|
+
for (const [repoKey, baseline] of Object.entries(value.repos)) {
|
|
2636
|
+
if (repoKey !== baseline.repo_ref) {
|
|
2637
|
+
ctx.addIssue({
|
|
2638
|
+
code: z6.ZodIssueCode.custom,
|
|
2639
|
+
message: "repo map key must match baseline.repo_ref",
|
|
2640
|
+
path: ["repos", repoKey, "repo_ref"]
|
|
2641
|
+
});
|
|
2642
|
+
}
|
|
2643
|
+
}
|
|
2644
|
+
});
|
|
2645
|
+
var ChangedFileStatusSchema = z6.enum([
|
|
2646
|
+
"added",
|
|
2647
|
+
"modified",
|
|
2648
|
+
"removed",
|
|
2649
|
+
"renamed",
|
|
2650
|
+
"copied",
|
|
2651
|
+
"changed",
|
|
2652
|
+
"unchanged"
|
|
2653
|
+
]);
|
|
2654
|
+
var FreshnessChangedFileSchema = z6.object({
|
|
2655
|
+
path: z6.string().trim().min(1),
|
|
2656
|
+
status: ChangedFileStatusSchema,
|
|
2657
|
+
previous_path: z6.string().trim().min(1).optional()
|
|
2658
|
+
}).strict();
|
|
2659
|
+
var FreshnessAmbiguityReasonSchema = z6.enum([
|
|
2660
|
+
"COMPARE_FILE_LIST_MISSING",
|
|
2661
|
+
"COMPARE_PAGINATED",
|
|
2662
|
+
"COMPARE_FILE_CAP_REACHED",
|
|
2663
|
+
"COMPARE_STATUS_DIVERGED",
|
|
2664
|
+
"COMPARE_STATUS_UNKNOWN",
|
|
2665
|
+
"COMPARE_FILE_ENTRY_INVALID"
|
|
2666
|
+
]);
|
|
2667
|
+
var DetectorBaseSchema = z6.object({
|
|
2668
|
+
repo_ref: FreshnessRepoRefSchema,
|
|
2669
|
+
base_commit: z6.string().trim().regex(COMMIT_SHA_PATTERN3),
|
|
2670
|
+
head_commit: z6.string().trim().regex(COMMIT_SHA_PATTERN3),
|
|
2671
|
+
changed_paths: z6.array(z6.string().trim().min(1))
|
|
2672
|
+
}).strict();
|
|
2673
|
+
var FreshnessNoopOutcomeSchema = DetectorBaseSchema.extend({
|
|
2674
|
+
mode: z6.literal("noop"),
|
|
2675
|
+
changed_files: z6.array(FreshnessChangedFileSchema).length(0),
|
|
2676
|
+
ambiguity_reasons: z6.array(FreshnessAmbiguityReasonSchema).length(0)
|
|
2677
|
+
});
|
|
2678
|
+
var FreshnessIncrementalCandidateOutcomeSchema = DetectorBaseSchema.extend({
|
|
2679
|
+
mode: z6.literal("incremental-candidate"),
|
|
2680
|
+
changed_files: z6.array(FreshnessChangedFileSchema).min(1),
|
|
2681
|
+
ambiguity_reasons: z6.array(FreshnessAmbiguityReasonSchema).length(0)
|
|
2682
|
+
});
|
|
2683
|
+
var FreshnessFullRebuildRequiredOutcomeSchema = DetectorBaseSchema.extend({
|
|
2684
|
+
mode: z6.literal("full-rebuild-required"),
|
|
2685
|
+
changed_files: z6.array(FreshnessChangedFileSchema),
|
|
2686
|
+
ambiguity_reasons: z6.array(FreshnessAmbiguityReasonSchema).min(1)
|
|
2687
|
+
});
|
|
2688
|
+
var FreshnessDetectorOutcomeSchema = z6.union([
|
|
2689
|
+
FreshnessNoopOutcomeSchema,
|
|
2690
|
+
FreshnessIncrementalCandidateOutcomeSchema,
|
|
2691
|
+
FreshnessFullRebuildRequiredOutcomeSchema
|
|
2692
|
+
]);
|
|
2693
|
+
|
|
2694
|
+
// src/freshness/detect.ts
|
|
2695
|
+
function createClient(token) {
|
|
2696
|
+
return new Octokit2(token ? { auth: token } : void 0);
|
|
2697
|
+
}
|
|
2698
|
+
function normalizeChangedPath(value) {
|
|
2699
|
+
return value.trim().replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+/g, "/");
|
|
2700
|
+
}
|
|
2701
|
+
function normalizeChangedFiles(files) {
|
|
2702
|
+
if (!Array.isArray(files)) {
|
|
2703
|
+
return {
|
|
2704
|
+
changed_files: [],
|
|
2705
|
+
changed_paths: [],
|
|
2706
|
+
ambiguity_reasons: ["COMPARE_FILE_LIST_MISSING"]
|
|
2707
|
+
};
|
|
2708
|
+
}
|
|
2709
|
+
const changedFiles = [];
|
|
2710
|
+
const changedPaths = /* @__PURE__ */ new Set();
|
|
2711
|
+
const reasons = /* @__PURE__ */ new Set();
|
|
2712
|
+
for (const file of files) {
|
|
2713
|
+
const status = typeof file.status === "string" ? file.status : "";
|
|
2714
|
+
const filename = typeof file.filename === "string" ? normalizeChangedPath(file.filename) : "";
|
|
2715
|
+
const previousFilenameRaw = typeof file.previous_filename === "string" ? file.previous_filename : void 0;
|
|
2716
|
+
const previousFilename = previousFilenameRaw ? normalizeChangedPath(previousFilenameRaw) : void 0;
|
|
2717
|
+
if (!filename || !status) {
|
|
2718
|
+
reasons.add("COMPARE_FILE_ENTRY_INVALID");
|
|
2719
|
+
continue;
|
|
2720
|
+
}
|
|
2721
|
+
try {
|
|
2722
|
+
const parsed = FreshnessChangedFileSchema.parse({
|
|
2723
|
+
path: filename,
|
|
2724
|
+
status,
|
|
2725
|
+
previous_path: previousFilename
|
|
2726
|
+
});
|
|
2727
|
+
changedFiles.push(parsed);
|
|
2728
|
+
changedPaths.add(filename);
|
|
2729
|
+
if (status === "renamed" && previousFilename) {
|
|
2730
|
+
changedPaths.add(previousFilename);
|
|
2731
|
+
}
|
|
2732
|
+
} catch {
|
|
2733
|
+
reasons.add("COMPARE_FILE_ENTRY_INVALID");
|
|
2734
|
+
}
|
|
2735
|
+
}
|
|
2736
|
+
const sortedFiles = changedFiles.slice().sort((left, right) => {
|
|
2737
|
+
const pathOrder = left.path.localeCompare(right.path);
|
|
2738
|
+
if (pathOrder !== 0) {
|
|
2739
|
+
return pathOrder;
|
|
2740
|
+
}
|
|
2741
|
+
const previousOrder = (left.previous_path ?? "").localeCompare(right.previous_path ?? "");
|
|
2742
|
+
if (previousOrder !== 0) {
|
|
2743
|
+
return previousOrder;
|
|
2744
|
+
}
|
|
2745
|
+
return left.status.localeCompare(right.status);
|
|
2746
|
+
});
|
|
2747
|
+
return {
|
|
2748
|
+
changed_files: sortedFiles,
|
|
2749
|
+
changed_paths: [...changedPaths].sort((left, right) => left.localeCompare(right)),
|
|
2750
|
+
ambiguity_reasons: [...reasons].sort((left, right) => left.localeCompare(right))
|
|
2751
|
+
};
|
|
2752
|
+
}
|
|
2753
|
+
function collectAmbiguityReasons(compare) {
|
|
2754
|
+
const reasons = /* @__PURE__ */ new Set();
|
|
2755
|
+
const status = compare.data.status;
|
|
2756
|
+
const files = compare.data.files;
|
|
2757
|
+
if (!Array.isArray(files)) {
|
|
2758
|
+
reasons.add("COMPARE_FILE_LIST_MISSING");
|
|
2759
|
+
}
|
|
2760
|
+
const linkHeader = compare.headers?.link ?? "";
|
|
2761
|
+
if (linkHeader.includes('rel="next"')) {
|
|
2762
|
+
reasons.add("COMPARE_PAGINATED");
|
|
2763
|
+
}
|
|
2764
|
+
if (Array.isArray(files) && files.length >= 300) {
|
|
2765
|
+
reasons.add("COMPARE_FILE_CAP_REACHED");
|
|
2766
|
+
}
|
|
2767
|
+
if (status === "diverged") {
|
|
2768
|
+
reasons.add("COMPARE_STATUS_DIVERGED");
|
|
2769
|
+
}
|
|
2770
|
+
if (typeof status !== "string" || status.length === 0) {
|
|
2771
|
+
reasons.add("COMPARE_STATUS_UNKNOWN");
|
|
2772
|
+
}
|
|
2773
|
+
return [...reasons].sort((left, right) => left.localeCompare(right));
|
|
2774
|
+
}
|
|
2775
|
+
async function detectRepoFreshness(input, options = {}) {
|
|
2776
|
+
const normalizedRepo = normalizeGitHubRepoIdentityForResolver(input.repo_ref);
|
|
2777
|
+
const baseline = FreshnessBaselineSchema.parse({
|
|
2778
|
+
...input.baseline,
|
|
2779
|
+
repo_ref: normalizeGitHubRepoIdentityForResolver(input.baseline.repo_ref).repo_ref,
|
|
2780
|
+
last_delivery_commit: input.baseline.last_delivery_commit.toLowerCase()
|
|
2781
|
+
});
|
|
2782
|
+
if (baseline.repo_ref !== normalizedRepo.repo_ref) {
|
|
2783
|
+
throw new Error("baseline.repo_ref must match detection repo_ref");
|
|
2784
|
+
}
|
|
2785
|
+
const client = options.client ?? createClient(options.token);
|
|
2786
|
+
const repository = await client.rest.repos.get({
|
|
2787
|
+
owner: normalizedRepo.owner,
|
|
2788
|
+
repo: normalizedRepo.repo
|
|
2789
|
+
});
|
|
2790
|
+
const defaultBranch = repository.data.default_branch;
|
|
2791
|
+
const branch = await client.rest.repos.getBranch({
|
|
2792
|
+
owner: normalizedRepo.owner,
|
|
2793
|
+
repo: normalizedRepo.repo,
|
|
2794
|
+
branch: defaultBranch
|
|
2795
|
+
});
|
|
2796
|
+
const headCommit = branch.data.commit.sha.toLowerCase();
|
|
2797
|
+
const baseCommit = baseline.last_delivery_commit.toLowerCase();
|
|
2798
|
+
if (headCommit === baseCommit) {
|
|
2799
|
+
return FreshnessDetectorOutcomeSchema.parse({
|
|
2800
|
+
mode: "noop",
|
|
2801
|
+
repo_ref: normalizedRepo.repo_ref,
|
|
2802
|
+
base_commit: baseCommit,
|
|
2803
|
+
head_commit: headCommit,
|
|
2804
|
+
changed_paths: [],
|
|
2805
|
+
changed_files: [],
|
|
2806
|
+
ambiguity_reasons: []
|
|
2807
|
+
});
|
|
2808
|
+
}
|
|
2809
|
+
const compare = await client.rest.repos.compareCommits({
|
|
2810
|
+
owner: normalizedRepo.owner,
|
|
2811
|
+
repo: normalizedRepo.repo,
|
|
2812
|
+
base: baseCommit,
|
|
2813
|
+
head: headCommit,
|
|
2814
|
+
per_page: 100,
|
|
2815
|
+
page: 1
|
|
2816
|
+
});
|
|
2817
|
+
const normalizedFiles = normalizeChangedFiles(compare.data.files);
|
|
2818
|
+
const reasons = new Set(collectAmbiguityReasons(compare));
|
|
2819
|
+
for (const reason of normalizedFiles.ambiguity_reasons) {
|
|
2820
|
+
reasons.add(reason);
|
|
2821
|
+
}
|
|
2822
|
+
const payload = {
|
|
2823
|
+
repo_ref: normalizedRepo.repo_ref,
|
|
2824
|
+
base_commit: baseCommit,
|
|
2825
|
+
head_commit: headCommit,
|
|
2826
|
+
changed_paths: normalizedFiles.changed_paths,
|
|
2827
|
+
changed_files: normalizedFiles.changed_files,
|
|
2828
|
+
ambiguity_reasons: [...reasons].sort((left, right) => left.localeCompare(right))
|
|
2829
|
+
};
|
|
2830
|
+
if (payload.ambiguity_reasons.length > 0) {
|
|
2831
|
+
return FreshnessDetectorOutcomeSchema.parse({
|
|
2832
|
+
mode: "full-rebuild-required",
|
|
2833
|
+
...payload
|
|
2834
|
+
});
|
|
2835
|
+
}
|
|
2836
|
+
return FreshnessDetectorOutcomeSchema.parse({
|
|
2837
|
+
mode: "incremental-candidate",
|
|
2838
|
+
...payload
|
|
2839
|
+
});
|
|
2840
|
+
}
|
|
2841
|
+
|
|
2842
|
+
// src/freshness/impact-map.ts
|
|
2843
|
+
function normalizeRepoPath(value) {
|
|
2844
|
+
return value.trim().replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+/g, "/");
|
|
2845
|
+
}
|
|
2846
|
+
function compareDeterministic(left, right) {
|
|
2847
|
+
return left.localeCompare(right, "en", { numeric: true, sensitivity: "base" });
|
|
2848
|
+
}
|
|
2849
|
+
function toOrderedUniquePaths(paths) {
|
|
2850
|
+
const unique = /* @__PURE__ */ new Set();
|
|
2851
|
+
for (const path14 of paths) {
|
|
2852
|
+
const normalized = normalizeRepoPath(path14);
|
|
2853
|
+
if (normalized.length > 0) {
|
|
2854
|
+
unique.add(normalized);
|
|
2855
|
+
}
|
|
2856
|
+
}
|
|
2857
|
+
return [...unique].sort(compareDeterministic);
|
|
2858
|
+
}
|
|
2859
|
+
function mapChangedPathsToImpactedSections(input) {
|
|
2860
|
+
const changedPaths = toOrderedUniquePaths(input.changed_paths);
|
|
2861
|
+
if (changedPaths.length === 0) {
|
|
2862
|
+
return {
|
|
2863
|
+
mode: "impact-mapped",
|
|
2864
|
+
impacted_section_ids: [],
|
|
2865
|
+
unmatched_changed_paths: []
|
|
2866
|
+
};
|
|
2867
|
+
}
|
|
2868
|
+
const matchedPaths = /* @__PURE__ */ new Set();
|
|
2869
|
+
const impacted = /* @__PURE__ */ new Set();
|
|
2870
|
+
const normalizedSectionPaths = input.sectionEvidenceIndex.map((section) => ({
|
|
2871
|
+
sectionId: section.sectionId,
|
|
2872
|
+
repoPaths: new Set(toOrderedUniquePaths(section.repoPaths))
|
|
2873
|
+
}));
|
|
2874
|
+
for (const section of normalizedSectionPaths) {
|
|
2875
|
+
const sectionPaths = section.repoPaths;
|
|
2876
|
+
for (const path14 of changedPaths) {
|
|
2877
|
+
if (sectionPaths.has(path14)) {
|
|
2878
|
+
impacted.add(section.sectionId);
|
|
2879
|
+
matchedPaths.add(path14);
|
|
2880
|
+
}
|
|
2881
|
+
}
|
|
2882
|
+
}
|
|
2883
|
+
const impacted_section_ids = [...impacted].sort(compareDeterministic);
|
|
2884
|
+
const unmatched_changed_paths = changedPaths.filter((path14) => !matchedPaths.has(path14));
|
|
2885
|
+
if (impacted_section_ids.length === 0) {
|
|
2886
|
+
return {
|
|
2887
|
+
mode: "full-rebuild-required",
|
|
2888
|
+
impacted_section_ids,
|
|
2889
|
+
unmatched_changed_paths
|
|
2890
|
+
};
|
|
2891
|
+
}
|
|
2892
|
+
return {
|
|
2893
|
+
mode: "impact-mapped",
|
|
2894
|
+
impacted_section_ids,
|
|
2895
|
+
unmatched_changed_paths
|
|
2896
|
+
};
|
|
2897
|
+
}
|
|
2898
|
+
|
|
2899
|
+
// src/freshness/section-evidence.ts
|
|
2900
|
+
function compareDeterministic2(left, right) {
|
|
2901
|
+
return left.localeCompare(right, "en", { numeric: true, sensitivity: "base" });
|
|
2902
|
+
}
|
|
2903
|
+
function normalizeRepoPath2(value) {
|
|
2904
|
+
return value.trim().replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+/g, "/");
|
|
2905
|
+
}
|
|
2906
|
+
function buildSectionEvidenceFromAcceptedOutput(acceptedOutput, sectionIds) {
|
|
2907
|
+
const allowed = sectionIds && sectionIds.length > 0 ? new Set(sectionIds.map((sectionId) => sectionId.trim()).filter(Boolean).sort(compareDeterministic2)) : void 0;
|
|
2908
|
+
const claims = acceptedOutput.draft.claims ?? [];
|
|
2909
|
+
const citations = acceptedOutput.draft.citations ?? [];
|
|
2910
|
+
const citationById = new Map(citations.map((citation) => [citation.citationId, citation]));
|
|
2911
|
+
const pathsBySection = /* @__PURE__ */ new Map();
|
|
2912
|
+
for (const section of acceptedOutput.draft.sections) {
|
|
2913
|
+
const sectionId = section.sectionId.trim();
|
|
2914
|
+
if (!sectionId || allowed && !allowed.has(sectionId)) {
|
|
2915
|
+
continue;
|
|
2916
|
+
}
|
|
2917
|
+
const sectionPaths = pathsBySection.get(sectionId) ?? /* @__PURE__ */ new Set();
|
|
2918
|
+
const directSourcePaths = section.sourcePaths ?? [];
|
|
2919
|
+
for (const sourcePath of directSourcePaths) {
|
|
2920
|
+
const normalized = normalizeRepoPath2(sourcePath);
|
|
2921
|
+
if (normalized.length > 0) {
|
|
2922
|
+
sectionPaths.add(normalized);
|
|
2923
|
+
}
|
|
2924
|
+
}
|
|
2925
|
+
if (sectionPaths.size === 0 && claims.length > 0 && citations.length > 0) {
|
|
2926
|
+
const claimSourcePaths = claims.filter((claim) => claim.sectionId.trim() === sectionId).flatMap((claim) => claim.citationIds).map((citationId) => citationById.get(citationId)?.repoPath ?? "").map((repoPath) => normalizeRepoPath2(repoPath)).filter((repoPath) => repoPath.length > 0);
|
|
2927
|
+
for (const repoPath of claimSourcePaths) {
|
|
2928
|
+
sectionPaths.add(repoPath);
|
|
2929
|
+
}
|
|
2930
|
+
}
|
|
2931
|
+
if (sectionPaths.size === 0) {
|
|
2932
|
+
const globalSourceDocs = acceptedOutput.draft.sourceDocs ?? [];
|
|
2933
|
+
for (const sourceDoc of globalSourceDocs) {
|
|
2934
|
+
const normalized = normalizeRepoPath2(sourceDoc.path);
|
|
2935
|
+
if (normalized.length > 0) {
|
|
2936
|
+
sectionPaths.add(normalized);
|
|
2937
|
+
}
|
|
2938
|
+
}
|
|
2939
|
+
}
|
|
2940
|
+
if (sectionPaths.size > 0) {
|
|
2941
|
+
pathsBySection.set(sectionId, sectionPaths);
|
|
2942
|
+
}
|
|
2943
|
+
}
|
|
2944
|
+
const sectionIdsToEmit = allowed ? [...allowed].sort(compareDeterministic2) : [...new Set(acceptedOutput.draft.sections.map((section) => section.sectionId.trim()).filter(Boolean))].sort(
|
|
2945
|
+
compareDeterministic2
|
|
2946
|
+
);
|
|
2947
|
+
return sectionIdsToEmit.map((sectionId) => {
|
|
2948
|
+
const sectionPaths = pathsBySection.get(sectionId);
|
|
2949
|
+
if (!sectionPaths || sectionPaths.size === 0) {
|
|
2950
|
+
throw new Error(`UPDT-02 regeneration blocked: missing section evidence paths (${sectionId})`);
|
|
2951
|
+
}
|
|
2952
|
+
return {
|
|
2953
|
+
sectionId,
|
|
2954
|
+
repoPaths: [...sectionPaths].sort(compareDeterministic2)
|
|
2955
|
+
};
|
|
2956
|
+
});
|
|
2957
|
+
}
|
|
2958
|
+
function extractSectionEvidenceFromAcceptedOutput(acceptedOutput, sectionIds) {
|
|
2959
|
+
return buildSectionEvidenceFromAcceptedOutput(acceptedOutput, sectionIds);
|
|
2960
|
+
}
|
|
2961
|
+
|
|
2962
|
+
// src/freshness/state.ts
|
|
2963
|
+
import { promises as fs5 } from "node:fs";
|
|
2964
|
+
import path6 from "node:path";
|
|
2965
|
+
var EMPTY_STATE = {
|
|
2966
|
+
schema_version: 1,
|
|
2967
|
+
repos: {}
|
|
2968
|
+
};
|
|
2969
|
+
function normalizeFreshnessRepoRef(repoRef) {
|
|
2970
|
+
const parsed = parseRepoRef(repoRef);
|
|
2971
|
+
if (parsed.requested_ref !== null) {
|
|
2972
|
+
throw new Error("freshness baseline repo_ref must not include @ref");
|
|
2973
|
+
}
|
|
2974
|
+
return parsed.repo_full_name;
|
|
2975
|
+
}
|
|
2976
|
+
function normalizePathList(paths) {
|
|
2977
|
+
const unique = new Set(paths.map((entry) => entry.trim()));
|
|
2978
|
+
return [...unique].sort((left, right) => left.localeCompare(right));
|
|
2979
|
+
}
|
|
2980
|
+
function canonicalizeBaseline(input) {
|
|
2981
|
+
const parsed = FreshnessBaselineSchema.parse({
|
|
2982
|
+
...input,
|
|
2983
|
+
repo_ref: normalizeFreshnessRepoRef(input.repo_ref),
|
|
2984
|
+
last_delivery_commit: input.last_delivery_commit.toLowerCase(),
|
|
2985
|
+
sectionEvidenceIndex: input.sectionEvidenceIndex.map((section) => ({
|
|
2986
|
+
sectionId: section.sectionId.trim(),
|
|
2987
|
+
repoPaths: normalizePathList(section.repoPaths)
|
|
2988
|
+
}))
|
|
2989
|
+
});
|
|
2990
|
+
const sections = [...parsed.sectionEvidenceIndex].sort((left, right) => {
|
|
2991
|
+
const sectionOrder = left.sectionId.localeCompare(right.sectionId);
|
|
2992
|
+
if (sectionOrder !== 0) {
|
|
2993
|
+
return sectionOrder;
|
|
2994
|
+
}
|
|
2995
|
+
const leftSig = left.repoPaths.join("\n");
|
|
2996
|
+
const rightSig = right.repoPaths.join("\n");
|
|
2997
|
+
return leftSig.localeCompare(rightSig);
|
|
2998
|
+
});
|
|
2999
|
+
return {
|
|
3000
|
+
...parsed,
|
|
3001
|
+
sectionEvidenceIndex: sections
|
|
3002
|
+
};
|
|
3003
|
+
}
|
|
3004
|
+
function canonicalizeState(input) {
|
|
3005
|
+
const parsed = FreshnessStateFileSchema.parse(input);
|
|
3006
|
+
const sortedRepoKeys = Object.keys(parsed.repos).sort((left, right) => left.localeCompare(right));
|
|
3007
|
+
const repos = {};
|
|
3008
|
+
for (const repoKey of sortedRepoKeys) {
|
|
3009
|
+
const normalizedRepo = normalizeFreshnessRepoRef(repoKey);
|
|
3010
|
+
repos[normalizedRepo] = canonicalizeBaseline(parsed.repos[repoKey]);
|
|
3011
|
+
}
|
|
3012
|
+
return {
|
|
3013
|
+
schema_version: 1,
|
|
3014
|
+
repos
|
|
3015
|
+
};
|
|
3016
|
+
}
|
|
3017
|
+
function parseFreshnessState(raw) {
|
|
3018
|
+
let payload;
|
|
3019
|
+
try {
|
|
3020
|
+
payload = JSON.parse(raw);
|
|
3021
|
+
} catch {
|
|
3022
|
+
throw new Error("Invalid freshness state JSON payload");
|
|
3023
|
+
}
|
|
3024
|
+
try {
|
|
3025
|
+
return canonicalizeState(payload);
|
|
3026
|
+
} catch {
|
|
3027
|
+
throw new Error("Invalid freshness state schema");
|
|
3028
|
+
}
|
|
3029
|
+
}
|
|
3030
|
+
async function loadFreshnessState(statePath) {
|
|
3031
|
+
const absolute = path6.resolve(statePath);
|
|
3032
|
+
let raw;
|
|
3033
|
+
try {
|
|
3034
|
+
raw = await fs5.readFile(absolute, "utf8");
|
|
3035
|
+
} catch (error) {
|
|
3036
|
+
if (error.code === "ENOENT") {
|
|
3037
|
+
return EMPTY_STATE;
|
|
3038
|
+
}
|
|
3039
|
+
throw error;
|
|
3040
|
+
}
|
|
3041
|
+
return parseFreshnessState(raw);
|
|
3042
|
+
}
|
|
3043
|
+
function serializeFreshnessState(state) {
|
|
3044
|
+
const canonical = canonicalizeState(state);
|
|
3045
|
+
return `${JSON.stringify(canonical, null, 2)}
|
|
3046
|
+
`;
|
|
3047
|
+
}
|
|
3048
|
+
async function saveFreshnessState(statePath, state) {
|
|
3049
|
+
const absolute = path6.resolve(statePath);
|
|
3050
|
+
await fs5.mkdir(path6.dirname(absolute), { recursive: true });
|
|
3051
|
+
await fs5.writeFile(absolute, serializeFreshnessState(state), "utf8");
|
|
3052
|
+
}
|
|
3053
|
+
|
|
3054
|
+
// src/chunked/plan-sections.ts
|
|
3055
|
+
import { promises as fs6 } from "node:fs";
|
|
3056
|
+
import path7 from "node:path";
|
|
3057
|
+
|
|
3058
|
+
// src/contracts/chunked-generation.ts
|
|
3059
|
+
import { z as z7 } from "zod";
|
|
3060
|
+
var COMMIT_SHA_PATTERN4 = /^[a-f0-9]{7,40}$/;
|
|
3061
|
+
var REPO_REF_PATTERN2 = /^[a-z0-9_.-]+\/[a-z0-9_.-]+$/;
|
|
3062
|
+
var PlanContextProfileSchema = z7.object({
|
|
3063
|
+
repoName: z7.string().min(1),
|
|
3064
|
+
primaryLanguage: z7.string().min(1),
|
|
3065
|
+
projectType: z7.string().min(1),
|
|
3066
|
+
domainHint: z7.string().min(1),
|
|
3067
|
+
topLevelDirs: z7.array(z7.string().min(1)),
|
|
3068
|
+
filesScanned: z7.number().int().nonnegative()
|
|
3069
|
+
}).strict();
|
|
3070
|
+
var PlanContextFileTreeEntrySchema = z7.object({
|
|
3071
|
+
dir: z7.string().min(1),
|
|
3072
|
+
files: z7.array(z7.string().min(1)),
|
|
3073
|
+
totalBytes: z7.number().int().nonnegative()
|
|
3074
|
+
}).strict();
|
|
3075
|
+
var PlanContextConstraintsSchema = z7.object({
|
|
3076
|
+
minSections: z7.number().int().min(1),
|
|
3077
|
+
maxSections: z7.number().int().min(1),
|
|
3078
|
+
minSubsectionsPerSection: z7.number().int().min(1),
|
|
3079
|
+
minBodyKoChars: z7.number().int().min(1),
|
|
3080
|
+
requiredElements: z7.array(z7.string().min(1)),
|
|
3081
|
+
sectionIdPattern: z7.string().min(1),
|
|
3082
|
+
subsectionIdPattern: z7.string().min(1)
|
|
3083
|
+
}).strict();
|
|
3084
|
+
var PlanContextSchema = z7.object({
|
|
3085
|
+
artifactType: z7.literal("plan-context"),
|
|
3086
|
+
repoFullName: z7.string().regex(REPO_REF_PATTERN2),
|
|
3087
|
+
commitSha: z7.string().regex(COMMIT_SHA_PATTERN4),
|
|
3088
|
+
ingestRunId: z7.string().min(1),
|
|
3089
|
+
snapshotPath: z7.string().min(1),
|
|
3090
|
+
generatedAt: z7.string().datetime(),
|
|
3091
|
+
profile: PlanContextProfileSchema,
|
|
3092
|
+
readmeExcerpt: z7.string(),
|
|
3093
|
+
keyPaths: z7.array(z7.string().min(1)),
|
|
3094
|
+
fileTree: z7.array(PlanContextFileTreeEntrySchema),
|
|
3095
|
+
constraints: PlanContextConstraintsSchema
|
|
3096
|
+
}).strict();
|
|
3097
|
+
var ChunkedSubsectionPlanSchema = z7.object({
|
|
3098
|
+
subsectionId: z7.string().min(1),
|
|
3099
|
+
titleKo: z7.string().min(3),
|
|
3100
|
+
objectiveKo: z7.string().min(10),
|
|
3101
|
+
targetEvidenceKinds: z7.array(z7.enum(["code", "config", "tests", "docs"])).min(1),
|
|
3102
|
+
targetCharacterCount: z7.number().int().min(700)
|
|
3103
|
+
}).strict();
|
|
3104
|
+
var ChunkedSectionPlanEntrySchema = z7.object({
|
|
3105
|
+
sectionId: z7.string().min(1),
|
|
3106
|
+
titleKo: z7.string().min(3),
|
|
3107
|
+
summaryKo: z7.string().min(10),
|
|
3108
|
+
focusPaths: z7.array(z7.string().min(1)),
|
|
3109
|
+
subsectionCount: z7.number().int().min(3),
|
|
3110
|
+
subsections: z7.array(ChunkedSubsectionPlanSchema).min(3)
|
|
3111
|
+
}).strict();
|
|
3112
|
+
var SectionPlanCrossReferenceSchema = z7.object({
|
|
3113
|
+
fromSectionId: z7.string().min(1),
|
|
3114
|
+
toSectionId: z7.string().min(1),
|
|
3115
|
+
relation: z7.string().min(3)
|
|
3116
|
+
}).strict();
|
|
3117
|
+
var SectionPlanOutputSchema = z7.object({
|
|
3118
|
+
artifactType: z7.literal("chunked-section-plan"),
|
|
3119
|
+
repoFullName: z7.string().regex(REPO_REF_PATTERN2),
|
|
3120
|
+
commitSha: z7.string().regex(COMMIT_SHA_PATTERN4),
|
|
3121
|
+
ingestRunId: z7.string().min(1),
|
|
3122
|
+
snapshotPath: z7.string().min(1),
|
|
3123
|
+
generatedAt: z7.string().datetime(),
|
|
3124
|
+
overviewKo: z7.string().min(30),
|
|
3125
|
+
totalSections: z7.number().int().min(4).max(6),
|
|
3126
|
+
sections: z7.array(ChunkedSectionPlanEntrySchema).min(4).max(6),
|
|
3127
|
+
crossReferences: z7.array(SectionPlanCrossReferenceSchema)
|
|
3128
|
+
}).strict().superRefine((value, ctx) => {
|
|
3129
|
+
if (value.totalSections !== value.sections.length) {
|
|
3130
|
+
ctx.addIssue({
|
|
3131
|
+
code: z7.ZodIssueCode.custom,
|
|
3132
|
+
message: `totalSections (${value.totalSections}) must match sections array length (${value.sections.length})`,
|
|
3133
|
+
path: ["totalSections"]
|
|
3134
|
+
});
|
|
3135
|
+
}
|
|
3136
|
+
const sectionIds = /* @__PURE__ */ new Set();
|
|
3137
|
+
for (const section of value.sections) {
|
|
3138
|
+
if (sectionIds.has(section.sectionId)) {
|
|
3139
|
+
ctx.addIssue({
|
|
3140
|
+
code: z7.ZodIssueCode.custom,
|
|
3141
|
+
message: `duplicate sectionId: ${section.sectionId}`,
|
|
3142
|
+
path: ["sections"]
|
|
3143
|
+
});
|
|
3144
|
+
}
|
|
3145
|
+
sectionIds.add(section.sectionId);
|
|
3146
|
+
if (section.subsectionCount !== section.subsections.length) {
|
|
3147
|
+
ctx.addIssue({
|
|
3148
|
+
code: z7.ZodIssueCode.custom,
|
|
3149
|
+
message: `section ${section.sectionId}: subsectionCount (${section.subsectionCount}) must match subsections length (${section.subsections.length})`,
|
|
3150
|
+
path: ["sections"]
|
|
3151
|
+
});
|
|
3152
|
+
}
|
|
3153
|
+
const subsectionIds = /* @__PURE__ */ new Set();
|
|
3154
|
+
for (const sub of section.subsections) {
|
|
3155
|
+
if (subsectionIds.has(sub.subsectionId)) {
|
|
3156
|
+
ctx.addIssue({
|
|
3157
|
+
code: z7.ZodIssueCode.custom,
|
|
3158
|
+
message: `duplicate subsectionId: ${sub.subsectionId} in section ${section.sectionId}`,
|
|
3159
|
+
path: ["sections"]
|
|
3160
|
+
});
|
|
3161
|
+
}
|
|
3162
|
+
subsectionIds.add(sub.subsectionId);
|
|
3163
|
+
}
|
|
3164
|
+
}
|
|
3165
|
+
for (const ref of value.crossReferences) {
|
|
3166
|
+
if (!sectionIds.has(ref.fromSectionId) || !sectionIds.has(ref.toSectionId)) {
|
|
3167
|
+
ctx.addIssue({
|
|
3168
|
+
code: z7.ZodIssueCode.custom,
|
|
3169
|
+
message: `cross reference targets non-existent section id`,
|
|
3170
|
+
path: ["crossReferences"]
|
|
3171
|
+
});
|
|
3172
|
+
}
|
|
3173
|
+
}
|
|
3174
|
+
});
|
|
3175
|
+
var SectionOutputSubsectionSchema = z7.object({
|
|
3176
|
+
sectionId: z7.string().min(1),
|
|
3177
|
+
subsectionId: z7.string().min(1),
|
|
3178
|
+
titleKo: z7.string().min(3),
|
|
3179
|
+
bodyKo: z7.string().min(80)
|
|
3180
|
+
}).strict();
|
|
3181
|
+
var SectionOutputClaimSchema = z7.object({
|
|
3182
|
+
claimId: z7.string().min(1),
|
|
3183
|
+
sectionId: z7.string().min(1),
|
|
3184
|
+
subsectionId: z7.string().min(1),
|
|
3185
|
+
statementKo: z7.string().min(20),
|
|
3186
|
+
citationIds: z7.array(z7.string().min(1)).min(1)
|
|
3187
|
+
}).strict();
|
|
3188
|
+
var SectionOutputCitationSchema = z7.object({
|
|
3189
|
+
citationId: z7.string().min(1),
|
|
3190
|
+
evidenceId: z7.string().min(1),
|
|
3191
|
+
repoPath: z7.string().min(1),
|
|
3192
|
+
lineRange: z7.object({
|
|
3193
|
+
start: z7.number().int().min(1),
|
|
3194
|
+
end: z7.number().int().min(1)
|
|
3195
|
+
}).strict().refine((v) => v.end >= v.start, { message: "lineRange.end must be >= start" }),
|
|
3196
|
+
commitSha: z7.string().regex(COMMIT_SHA_PATTERN4),
|
|
3197
|
+
permalink: z7.string().url(),
|
|
3198
|
+
rationale: z7.string().min(1)
|
|
3199
|
+
}).strict();
|
|
3200
|
+
var SectionOutputSchema = z7.object({
|
|
3201
|
+
sectionId: z7.string().min(1),
|
|
3202
|
+
titleKo: z7.string().min(3),
|
|
3203
|
+
summaryKo: z7.string().min(20),
|
|
3204
|
+
sourcePaths: z7.array(z7.string().min(1)).min(1),
|
|
3205
|
+
subsections: z7.array(SectionOutputSubsectionSchema).min(3)
|
|
3206
|
+
}).strict().superRefine((value, ctx) => {
|
|
3207
|
+
for (const sub of value.subsections) {
|
|
3208
|
+
if (sub.sectionId !== value.sectionId) {
|
|
3209
|
+
ctx.addIssue({
|
|
3210
|
+
code: z7.ZodIssueCode.custom,
|
|
3211
|
+
message: `subsection ${sub.subsectionId} sectionId "${sub.sectionId}" must match parent "${value.sectionId}"`,
|
|
3212
|
+
path: ["subsections"]
|
|
3213
|
+
});
|
|
3214
|
+
}
|
|
3215
|
+
}
|
|
3216
|
+
const seenSourcePaths = /* @__PURE__ */ new Set();
|
|
3217
|
+
for (const sourcePath of value.sourcePaths) {
|
|
3218
|
+
if (seenSourcePaths.has(sourcePath)) {
|
|
3219
|
+
ctx.addIssue({
|
|
3220
|
+
code: z7.ZodIssueCode.custom,
|
|
3221
|
+
message: `duplicate sourcePath: ${sourcePath}`,
|
|
3222
|
+
path: ["sourcePaths"]
|
|
3223
|
+
});
|
|
3224
|
+
}
|
|
3225
|
+
seenSourcePaths.add(sourcePath);
|
|
3226
|
+
}
|
|
3227
|
+
});
|
|
3228
|
+
var ChunkedSectionStatusSchema = z7.object({
|
|
3229
|
+
status: z7.enum(["pending", "persisted"]),
|
|
3230
|
+
sectionOutputPath: z7.string().min(1).optional(),
|
|
3231
|
+
persistedAt: z7.string().datetime().optional(),
|
|
3232
|
+
chunksInserted: z7.number().int().nonnegative().optional(),
|
|
3233
|
+
claimCount: z7.number().int().nonnegative().optional(),
|
|
3234
|
+
citationCount: z7.number().int().nonnegative().optional(),
|
|
3235
|
+
subsectionCount: z7.number().int().nonnegative().optional(),
|
|
3236
|
+
koreanChars: z7.number().int().nonnegative().optional()
|
|
3237
|
+
}).strict();
|
|
3238
|
+
var ChunkedSessionSchema = z7.object({
|
|
3239
|
+
sessionId: z7.string().min(1),
|
|
3240
|
+
repoFullName: z7.string().regex(REPO_REF_PATTERN2),
|
|
3241
|
+
commitSha: z7.string().regex(COMMIT_SHA_PATTERN4),
|
|
3242
|
+
ingestRunId: z7.string().min(1),
|
|
3243
|
+
planPath: z7.string().min(1),
|
|
3244
|
+
startedAt: z7.string().datetime(),
|
|
3245
|
+
sections: z7.record(z7.string(), ChunkedSectionStatusSchema)
|
|
3246
|
+
}).strict();
|
|
3247
|
+
|
|
3248
|
+
// src/chunked/plan-sections.ts
|
|
3249
|
+
var MAX_README_CHARS = 3e3;
|
|
3250
|
+
var MAX_FILES_PER_DIR = 30;
|
|
3251
|
+
var SKIP_DIRS = /* @__PURE__ */ new Set([".git", ".husky", "node_modules", "dist", ".next", "target"]);
|
|
3252
|
+
var DOMAIN_HINTS = [
|
|
3253
|
+
{ patterns: ["model", "inference", "llm", "ai", "ml", "neural", "torch", "tensorflow"], hintKo: "AI/ML \uBAA8\uB378 \uCD94\uB860" },
|
|
3254
|
+
{ patterns: ["runner", "scheduler", "executor", "orchestrat"], hintKo: "\uC791\uC5C5 \uC2E4\uD589 \uBC0F \uC2A4\uCF00\uC904\uB9C1" },
|
|
3255
|
+
{ patterns: ["database", "db", "storage", "redis", "mongo", "sql", "cache", "kv"], hintKo: "\uB370\uC774\uD130 \uC800\uC7A5\uC18C" },
|
|
3256
|
+
{ patterns: ["network", "http", "grpc", "rpc", "protocol", "proxy", "gateway"], hintKo: "\uB124\uD2B8\uC6CC\uD06C \uD1B5\uC2E0" },
|
|
3257
|
+
{ patterns: ["auth", "oauth", "credential", "token", "session"], hintKo: "\uC778\uC99D \uBC0F \uBCF4\uC548" },
|
|
3258
|
+
{ patterns: ["container", "docker", "kubernetes", "k8s", "pod"], hintKo: "\uCEE8\uD14C\uC774\uB108 \uC624\uCF00\uC2A4\uD2B8\uB808\uC774\uC158" },
|
|
3259
|
+
{ patterns: ["compiler", "parser", "ast", "lexer", "syntax", "lang"], hintKo: "\uC5B8\uC5B4 \uCC98\uB9AC \uBC0F \uCEF4\uD30C\uC77C" },
|
|
3260
|
+
{ patterns: ["render", "ui", "component", "widget", "canvas", "graphics"], hintKo: "UI \uB80C\uB354\uB9C1" },
|
|
3261
|
+
{ patterns: ["crypto", "encrypt", "hash", "cipher", "tls", "ssl"], hintKo: "\uC554\uD638\uD654 \uBC0F \uBCF4\uC548" },
|
|
3262
|
+
{ patterns: ["stream", "event", "queue", "message", "pubsub", "kafka"], hintKo: "\uC774\uBCA4\uD2B8/\uC2A4\uD2B8\uB9BC \uCC98\uB9AC" },
|
|
3263
|
+
{ patterns: ["api", "rest", "endpoint", "route", "handler"], hintKo: "API \uC11C\uBE44\uC2A4" },
|
|
3264
|
+
{ patterns: ["agent", "tool", "prompt", "chain", "rag", "embed"], hintKo: "AI \uC5D0\uC774\uC804\uD2B8" },
|
|
3265
|
+
{ patterns: ["build", "bundle", "webpack", "vite", "rollup", "esbuild"], hintKo: "\uBE4C\uB4DC \uB3C4\uAD6C" },
|
|
3266
|
+
{ patterns: ["plugin", "extension", "addon", "module", "hook"], hintKo: "\uD50C\uB7EC\uADF8\uC778 \uC2DC\uC2A4\uD15C" },
|
|
3267
|
+
{ patterns: ["cli", "cmd", "command", "terminal", "shell"], hintKo: "\uBA85\uB839\uC904 \uC778\uD130\uD398\uC774\uC2A4" }
|
|
3268
|
+
];
|
|
3269
|
+
function detectPrimaryLanguage(languageMix) {
|
|
3270
|
+
const entries = Object.entries(languageMix).sort((a, b) => b[1] - a[1]);
|
|
3271
|
+
if (entries.length === 0) return "Unknown";
|
|
3272
|
+
return entries[0][0];
|
|
3273
|
+
}
|
|
3274
|
+
function detectProjectType(topDirs, primaryLang) {
|
|
3275
|
+
const dirSet = new Set(topDirs.map((d) => d.toLowerCase()));
|
|
3276
|
+
const hasCli = dirSet.has("cmd") || dirSet.has("cli") || dirSet.has("bin");
|
|
3277
|
+
const hasServer = dirSet.has("server") || dirSet.has("api") || dirSet.has("routes");
|
|
3278
|
+
const hasWebApp = dirSet.has("app") || dirSet.has("pages") || dirSet.has("components") || dirSet.has("views");
|
|
3279
|
+
const hasLib = dirSet.has("lib") || dirSet.has("pkg") || dirSet.has("crate") || dirSet.has("packages");
|
|
3280
|
+
const isNative = ["C", "C++", "Zig", "Rust"].includes(primaryLang) && (dirSet.has("deps") || dirSet.has("vendor"));
|
|
3281
|
+
if (hasCli && hasServer) return "cli-server";
|
|
3282
|
+
if (hasCli) return "cli-tool";
|
|
3283
|
+
if (hasServer && hasWebApp) return "web-app";
|
|
3284
|
+
if (hasServer) return "server";
|
|
3285
|
+
if (hasWebApp) return "web-app";
|
|
3286
|
+
if (isNative) return "native-system";
|
|
3287
|
+
if (hasLib) return "library";
|
|
3288
|
+
return "project";
|
|
3289
|
+
}
|
|
3290
|
+
function detectDomainHint(topDirs, keyPaths) {
|
|
3291
|
+
const tokens = [
|
|
3292
|
+
...topDirs.map((d) => d.toLowerCase()),
|
|
3293
|
+
...keyPaths.flatMap((p) => p.toLowerCase().split("/"))
|
|
3294
|
+
];
|
|
3295
|
+
let bestHint = "\uD575\uC2EC \uAE30\uB2A5";
|
|
3296
|
+
let bestScore = 0;
|
|
3297
|
+
for (const { patterns, hintKo } of DOMAIN_HINTS) {
|
|
3298
|
+
const score = patterns.filter((p) => tokens.some((t) => t.includes(p))).length;
|
|
3299
|
+
if (score > bestScore) {
|
|
3300
|
+
bestScore = score;
|
|
3301
|
+
bestHint = hintKo;
|
|
3302
|
+
}
|
|
3303
|
+
}
|
|
3304
|
+
return bestHint;
|
|
3305
|
+
}
|
|
3306
|
+
function normalizeRepoPath3(value) {
|
|
3307
|
+
return value.trim().replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+/g, "/");
|
|
3308
|
+
}
|
|
3309
|
+
async function collectFiles(rootPath) {
|
|
3310
|
+
const entries = [];
|
|
3311
|
+
const walk = async (relativeDir) => {
|
|
3312
|
+
const absolute = path7.join(rootPath, relativeDir);
|
|
3313
|
+
let dirents;
|
|
3314
|
+
try {
|
|
3315
|
+
dirents = await fs6.readdir(absolute, { withFileTypes: true });
|
|
3316
|
+
} catch {
|
|
3317
|
+
return;
|
|
3318
|
+
}
|
|
3319
|
+
const sorted = dirents.sort((a, b) => a.name.localeCompare(b.name, "en", { sensitivity: "base" }));
|
|
3320
|
+
for (const dirent of sorted) {
|
|
3321
|
+
if (dirent.name === "snapshot-manifest.json" || dirent.isSymbolicLink()) {
|
|
3322
|
+
continue;
|
|
3323
|
+
}
|
|
3324
|
+
if (dirent.isDirectory()) {
|
|
3325
|
+
if (SKIP_DIRS.has(dirent.name.toLowerCase())) {
|
|
3326
|
+
continue;
|
|
3327
|
+
}
|
|
3328
|
+
await walk(path7.join(relativeDir, dirent.name));
|
|
3329
|
+
continue;
|
|
3330
|
+
}
|
|
3331
|
+
if (!dirent.isFile()) {
|
|
3332
|
+
continue;
|
|
3333
|
+
}
|
|
3334
|
+
const filePath = normalizeRepoPath3(path7.join(relativeDir, dirent.name));
|
|
3335
|
+
try {
|
|
3336
|
+
const stat = await fs6.stat(path7.join(rootPath, filePath));
|
|
3337
|
+
entries.push({ relativePath: filePath, bytes: stat.size });
|
|
3338
|
+
} catch {
|
|
3339
|
+
}
|
|
3340
|
+
}
|
|
3341
|
+
};
|
|
3342
|
+
await walk(".");
|
|
3343
|
+
return entries;
|
|
3344
|
+
}
|
|
3345
|
+
function buildFileTree(files) {
|
|
3346
|
+
const byDir = /* @__PURE__ */ new Map();
|
|
3347
|
+
for (const file of files) {
|
|
3348
|
+
const parts = file.relativePath.split("/");
|
|
3349
|
+
const dir = parts.length > 1 ? parts[0] : ".";
|
|
3350
|
+
const entry = byDir.get(dir) ?? { files: [], totalBytes: 0 };
|
|
3351
|
+
entry.totalBytes += file.bytes;
|
|
3352
|
+
if (entry.files.length < MAX_FILES_PER_DIR) {
|
|
3353
|
+
entry.files.push(file.relativePath);
|
|
3354
|
+
}
|
|
3355
|
+
byDir.set(dir, entry);
|
|
3356
|
+
}
|
|
3357
|
+
return Array.from(byDir.entries()).map(([dir, data]) => ({ dir, files: data.files, totalBytes: data.totalBytes })).sort((a, b) => b.totalBytes - a.totalBytes);
|
|
3358
|
+
}
|
|
3359
|
+
async function readReadmeExcerpt(snapshotPath) {
|
|
3360
|
+
const candidates = ["README.md", "readme.md", "Readme.md", "README.rst", "README.txt", "README"];
|
|
3361
|
+
for (const name of candidates) {
|
|
3362
|
+
try {
|
|
3363
|
+
const content = await fs6.readFile(path7.join(snapshotPath, name), "utf8");
|
|
3364
|
+
return content.slice(0, MAX_README_CHARS);
|
|
3365
|
+
} catch {
|
|
3366
|
+
}
|
|
3367
|
+
}
|
|
3368
|
+
return "";
|
|
3369
|
+
}
|
|
3370
|
+
function compareDeterministic3(left, right) {
|
|
3371
|
+
return left.localeCompare(right, "en", { numeric: true, sensitivity: "base" });
|
|
3372
|
+
}
|
|
3373
|
+
async function planContext(artifact) {
|
|
3374
|
+
const files = await collectFiles(artifact.snapshot_path);
|
|
3375
|
+
const keyPaths = (artifact.metadata.key_paths ?? []).map((value) => normalizeRepoPath3(value)).filter((value) => value.length > 0).sort(compareDeterministic3);
|
|
3376
|
+
const topDirSet = /* @__PURE__ */ new Set();
|
|
3377
|
+
for (const file of files) {
|
|
3378
|
+
const firstSegment = file.relativePath.split("/")[0];
|
|
3379
|
+
if (firstSegment && firstSegment !== file.relativePath) {
|
|
3380
|
+
topDirSet.add(firstSegment);
|
|
3381
|
+
}
|
|
3382
|
+
}
|
|
3383
|
+
const topDirs = Array.from(topDirSet).sort(compareDeterministic3);
|
|
3384
|
+
const primaryLanguage = detectPrimaryLanguage(artifact.metadata.language_mix);
|
|
3385
|
+
const projectType = detectProjectType(topDirs, primaryLanguage);
|
|
3386
|
+
const domainHint = detectDomainHint(topDirs, keyPaths);
|
|
3387
|
+
const repoRef = artifact.repo_ref;
|
|
3388
|
+
const repoName = repoRef.includes("/") ? repoRef.split("/")[1] : repoRef;
|
|
3389
|
+
const displayName = repoName.charAt(0).toUpperCase() + repoName.slice(1);
|
|
3390
|
+
const readmeExcerpt = await readReadmeExcerpt(artifact.snapshot_path);
|
|
3391
|
+
const fileTree = buildFileTree(files);
|
|
3392
|
+
const context = {
|
|
3393
|
+
artifactType: "plan-context",
|
|
3394
|
+
repoFullName: artifact.repo_ref.toLowerCase(),
|
|
3395
|
+
commitSha: artifact.commit_sha,
|
|
3396
|
+
ingestRunId: artifact.ingest_run_id,
|
|
3397
|
+
snapshotPath: artifact.snapshot_path,
|
|
3398
|
+
generatedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3399
|
+
profile: {
|
|
3400
|
+
repoName: displayName,
|
|
3401
|
+
primaryLanguage,
|
|
3402
|
+
projectType,
|
|
3403
|
+
domainHint,
|
|
3404
|
+
topLevelDirs: topDirs,
|
|
3405
|
+
filesScanned: artifact.files_scanned
|
|
3406
|
+
},
|
|
3407
|
+
readmeExcerpt,
|
|
3408
|
+
keyPaths,
|
|
3409
|
+
fileTree,
|
|
3410
|
+
constraints: {
|
|
3411
|
+
minSections: 4,
|
|
3412
|
+
maxSections: 6,
|
|
3413
|
+
minSubsectionsPerSection: 3,
|
|
3414
|
+
minBodyKoChars: 3e3,
|
|
3415
|
+
requiredElements: ["mermaid-architecture"],
|
|
3416
|
+
sectionIdPattern: "sec-{N} where N starts at 1",
|
|
3417
|
+
subsectionIdPattern: "sub-{sectionN}-{subN} (e.g. sub-1-1, sub-1-2, sub-2-1)"
|
|
3418
|
+
}
|
|
3419
|
+
};
|
|
3420
|
+
return PlanContextSchema.parse(context);
|
|
3421
|
+
}
|
|
3422
|
+
|
|
3423
|
+
// src/chunked/validate-plan.ts
|
|
3424
|
+
import { existsSync as existsSync2 } from "node:fs";
|
|
3425
|
+
import path8 from "node:path";
|
|
3426
|
+
function validatePlan(raw, options) {
|
|
3427
|
+
const parsed = SectionPlanOutputSchema.parse(raw);
|
|
3428
|
+
const errors = [];
|
|
3429
|
+
for (const section of parsed.sections) {
|
|
3430
|
+
for (const focusPath of section.focusPaths) {
|
|
3431
|
+
const absolute = path8.join(options.snapshotPath, focusPath);
|
|
3432
|
+
if (!existsSync2(absolute)) {
|
|
3433
|
+
errors.push(
|
|
3434
|
+
`${section.sectionId}: focusPath "${focusPath}" does not exist in snapshot`
|
|
3435
|
+
);
|
|
3436
|
+
}
|
|
3437
|
+
}
|
|
3438
|
+
}
|
|
3439
|
+
const seenTitles = /* @__PURE__ */ new Set();
|
|
3440
|
+
for (const section of parsed.sections) {
|
|
3441
|
+
const normalized = section.titleKo.trim().toLowerCase();
|
|
3442
|
+
if (seenTitles.has(normalized)) {
|
|
3443
|
+
errors.push(
|
|
3444
|
+
`Duplicate section title: "${section.titleKo}"`
|
|
3445
|
+
);
|
|
3446
|
+
}
|
|
3447
|
+
seenTitles.add(normalized);
|
|
3448
|
+
}
|
|
3449
|
+
for (let i = 0; i < parsed.sections.length; i++) {
|
|
3450
|
+
const expected = `sec-${i + 1}`;
|
|
3451
|
+
if (parsed.sections[i].sectionId !== expected) {
|
|
3452
|
+
errors.push(
|
|
3453
|
+
`Section at index ${i} has sectionId "${parsed.sections[i].sectionId}", expected "${expected}"`
|
|
3454
|
+
);
|
|
3455
|
+
}
|
|
3456
|
+
}
|
|
3457
|
+
for (const section of parsed.sections) {
|
|
3458
|
+
const sectionNum = section.sectionId.replace("sec-", "");
|
|
3459
|
+
for (let i = 0; i < section.subsections.length; i++) {
|
|
3460
|
+
const expected = `sub-${sectionNum}-${i + 1}`;
|
|
3461
|
+
if (section.subsections[i].subsectionId !== expected) {
|
|
3462
|
+
errors.push(
|
|
3463
|
+
`${section.sectionId}: subsection at index ${i} has id "${section.subsections[i].subsectionId}", expected "${expected}"`
|
|
3464
|
+
);
|
|
3465
|
+
}
|
|
3466
|
+
}
|
|
3467
|
+
}
|
|
3468
|
+
if (errors.length > 0) {
|
|
3469
|
+
throw new Error(
|
|
3470
|
+
`Plan validation failed (${errors.length} issue(s)):
|
|
3471
|
+
` + errors.map((e) => ` - ${e}`).join("\n")
|
|
3472
|
+
);
|
|
3473
|
+
}
|
|
3474
|
+
return parsed;
|
|
3475
|
+
}
|
|
3476
|
+
|
|
3477
|
+
// src/chunked/validate-section.ts
|
|
3478
|
+
import { existsSync as existsSync3 } from "node:fs";
|
|
3479
|
+
import path9 from "node:path";
|
|
3480
|
+
function validateSection(section, options) {
|
|
3481
|
+
const errors = [];
|
|
3482
|
+
if (section.subsections.length < 3) {
|
|
3483
|
+
errors.push(
|
|
3484
|
+
`${section.sectionId}: must have at least 3 subsections, got ${section.subsections.length}`
|
|
3485
|
+
);
|
|
3486
|
+
}
|
|
3487
|
+
for (const sub of section.subsections) {
|
|
3488
|
+
if (sub.sectionId !== section.sectionId) {
|
|
3489
|
+
errors.push(
|
|
3490
|
+
`${sub.subsectionId}: sectionId "${sub.sectionId}" must match parent "${section.sectionId}"`
|
|
3491
|
+
);
|
|
3492
|
+
}
|
|
3493
|
+
}
|
|
3494
|
+
for (const sub of section.subsections) {
|
|
3495
|
+
if (sub.bodyKo.length < 3e3) {
|
|
3496
|
+
errors.push(
|
|
3497
|
+
`${sub.subsectionId} (${section.sectionId}): bodyKo is ${sub.bodyKo.length} chars, minimum is 3,000`
|
|
3498
|
+
);
|
|
3499
|
+
}
|
|
3500
|
+
}
|
|
3501
|
+
const bodies = section.subsections.map((sub) => ({
|
|
3502
|
+
sectionId: section.sectionId,
|
|
3503
|
+
subsectionId: sub.subsectionId,
|
|
3504
|
+
bodyKo: sub.bodyKo
|
|
3505
|
+
}));
|
|
3506
|
+
const repetitionErrors = detectBodyKoRepetitionInBodies(bodies);
|
|
3507
|
+
errors.push(...repetitionErrors);
|
|
3508
|
+
if (section.sourcePaths.length === 0) {
|
|
3509
|
+
errors.push(`${section.sectionId}: sourcePaths must contain at least one path`);
|
|
3510
|
+
}
|
|
3511
|
+
for (const sourcePath of section.sourcePaths) {
|
|
3512
|
+
const fullPath = path9.join(options.snapshotPath, sourcePath);
|
|
3513
|
+
if (!existsSync3(fullPath)) {
|
|
3514
|
+
errors.push(
|
|
3515
|
+
`${section.sectionId}: sourcePath "${sourcePath}" does not exist in snapshot`
|
|
3516
|
+
);
|
|
3517
|
+
}
|
|
3518
|
+
}
|
|
3519
|
+
const hasArchitectureMermaid = section.subsections.some(
|
|
3520
|
+
(sub) => /```mermaid[\s\S]*?```/i.test(sub.bodyKo)
|
|
3521
|
+
);
|
|
3522
|
+
if (!hasArchitectureMermaid) {
|
|
3523
|
+
errors.push(
|
|
3524
|
+
`${section.sectionId}: must include at least one architecture mermaid block (\`\`\`mermaid ... \`\`\`)`
|
|
3525
|
+
);
|
|
3526
|
+
}
|
|
3527
|
+
if (options.qualityGateLevel === "strict") {
|
|
3528
|
+
if (!hasArchitectureMermaid) {
|
|
3529
|
+
errors.push(
|
|
3530
|
+
`${section.sectionId}: strict quality requires architecture mermaid block for beginner/trend output`
|
|
3531
|
+
);
|
|
3532
|
+
}
|
|
3533
|
+
}
|
|
3534
|
+
const PREFIX_LENGTH = 20;
|
|
3535
|
+
const PREFIX_REPEAT_THRESHOLD = 5;
|
|
3536
|
+
for (const sub of section.subsections) {
|
|
3537
|
+
const lines = sub.bodyKo.split(/[\n\r]+/).map((line) => line.trim()).filter((line) => line.length >= PREFIX_LENGTH);
|
|
3538
|
+
const prefixCounts = /* @__PURE__ */ new Map();
|
|
3539
|
+
for (const line of lines) {
|
|
3540
|
+
const prefix = line.slice(0, PREFIX_LENGTH);
|
|
3541
|
+
prefixCounts.set(prefix, (prefixCounts.get(prefix) ?? 0) + 1);
|
|
3542
|
+
}
|
|
3543
|
+
for (const [prefix, count] of prefixCounts) {
|
|
3544
|
+
if (count >= PREFIX_REPEAT_THRESHOLD) {
|
|
3545
|
+
errors.push(
|
|
3546
|
+
`${sub.subsectionId} (${section.sectionId}): padding detected \u2014 line prefix "${prefix}..." repeated ${count} times. Write unique content instead of filler lines.`
|
|
3547
|
+
);
|
|
3548
|
+
break;
|
|
3549
|
+
}
|
|
3550
|
+
}
|
|
3551
|
+
}
|
|
3552
|
+
const CODEBLOCK_RE = /```[\s\S]*?```/g;
|
|
3553
|
+
for (const sub of section.subsections) {
|
|
3554
|
+
const proseOnly = sub.bodyKo.replace(CODEBLOCK_RE, "");
|
|
3555
|
+
const haeraCnt = (proseOnly.match(/(?<![합습입]니)다\./g) ?? []).length;
|
|
3556
|
+
const totalSentences = (proseOnly.match(/[.!?]\s/g) ?? []).length || 1;
|
|
3557
|
+
if (haeraCnt / totalSentences > 0.3) {
|
|
3558
|
+
errors.push(
|
|
3559
|
+
`${sub.subsectionId} (${section.sectionId}): bodyKo uses \uD574\uB77C\uCCB4 endings (${haeraCnt} occurrences out of ~${totalSentences} sentences). Use \uD569\uB2C8\uB2E4\uCCB4 (formal polite): ~\uD569\uB2C8\uB2E4, ~\uB429\uB2C8\uB2E4, ~\uC788\uC2B5\uB2C8\uB2E4.`
|
|
3560
|
+
);
|
|
3561
|
+
}
|
|
3562
|
+
}
|
|
3563
|
+
for (const sub of section.subsections) {
|
|
3564
|
+
const escapedNewlineCount = (sub.bodyKo.match(/\\\\n/g) ?? []).length;
|
|
3565
|
+
if (escapedNewlineCount >= 5) {
|
|
3566
|
+
errors.push(
|
|
3567
|
+
`${sub.subsectionId} (${section.sectionId}): bodyKo contains ${escapedNewlineCount} escaped newline sequences (\\\\n). Use real line breaks and write genuine content.`
|
|
3568
|
+
);
|
|
3569
|
+
}
|
|
3570
|
+
}
|
|
3571
|
+
return errors;
|
|
3572
|
+
}
|
|
3573
|
+
|
|
3574
|
+
// src/chunked/session.ts
|
|
3575
|
+
import { promises as fs7 } from "node:fs";
|
|
3576
|
+
import path10 from "node:path";
|
|
3577
|
+
import crypto from "node:crypto";
|
|
3578
|
+
function sessionPathForRepo(repoFullName, rootDir = "devport-output/chunked") {
|
|
3579
|
+
const [owner, repo] = repoFullName.split("/");
|
|
3580
|
+
if (!owner || !repo) {
|
|
3581
|
+
throw new Error(`Invalid repoFullName: ${repoFullName}`);
|
|
3582
|
+
}
|
|
3583
|
+
return path10.resolve(rootDir, owner, repo, "session.json");
|
|
3584
|
+
}
|
|
3585
|
+
async function loadSession(sessionPath) {
|
|
3586
|
+
const absolute = path10.resolve(sessionPath);
|
|
3587
|
+
let raw;
|
|
3588
|
+
try {
|
|
3589
|
+
raw = await fs7.readFile(absolute, "utf8");
|
|
3590
|
+
} catch (error) {
|
|
3591
|
+
if (error.code === "ENOENT") {
|
|
3592
|
+
return null;
|
|
3593
|
+
}
|
|
3594
|
+
throw error;
|
|
3595
|
+
}
|
|
3596
|
+
const parsed = JSON.parse(raw);
|
|
3597
|
+
return ChunkedSessionSchema.parse(parsed);
|
|
3598
|
+
}
|
|
3599
|
+
function initSession(plan, planPath) {
|
|
3600
|
+
const sections = {};
|
|
3601
|
+
for (const section of plan.sections) {
|
|
3602
|
+
sections[section.sectionId] = { status: "pending" };
|
|
3603
|
+
}
|
|
3604
|
+
return {
|
|
3605
|
+
sessionId: crypto.randomUUID(),
|
|
3606
|
+
repoFullName: plan.repoFullName,
|
|
3607
|
+
commitSha: plan.commitSha,
|
|
3608
|
+
ingestRunId: plan.ingestRunId,
|
|
3609
|
+
planPath: path10.resolve(planPath),
|
|
3610
|
+
startedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3611
|
+
sections
|
|
3612
|
+
};
|
|
3613
|
+
}
|
|
3614
|
+
async function saveSession(sessionPath, session) {
|
|
3615
|
+
const validated = ChunkedSessionSchema.parse(session);
|
|
3616
|
+
const absolute = path10.resolve(sessionPath);
|
|
3617
|
+
await fs7.mkdir(path10.dirname(absolute), { recursive: true });
|
|
3618
|
+
await fs7.writeFile(absolute, `${JSON.stringify(validated, null, 2)}
|
|
3619
|
+
`, "utf8");
|
|
3620
|
+
}
|
|
3621
|
+
function markSectionPersisted(session, sectionId, details) {
|
|
3622
|
+
const existing = session.sections[sectionId];
|
|
3623
|
+
if (!existing) {
|
|
3624
|
+
throw new Error(`Section ${sectionId} not found in session`);
|
|
3625
|
+
}
|
|
3626
|
+
return {
|
|
3627
|
+
...session,
|
|
3628
|
+
sections: {
|
|
3629
|
+
...session.sections,
|
|
3630
|
+
[sectionId]: {
|
|
3631
|
+
status: "persisted",
|
|
3632
|
+
sectionOutputPath: details.sectionOutputPath,
|
|
3633
|
+
persistedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3634
|
+
chunksInserted: details.chunksInserted,
|
|
3635
|
+
claimCount: details.claimCount,
|
|
3636
|
+
citationCount: details.citationCount,
|
|
3637
|
+
subsectionCount: details.subsectionCount,
|
|
3638
|
+
koreanChars: details.koreanChars
|
|
3639
|
+
}
|
|
3640
|
+
}
|
|
3641
|
+
};
|
|
3642
|
+
}
|
|
3643
|
+
|
|
3644
|
+
// src/chunked/finalize.ts
|
|
3645
|
+
import { readFile, rm } from "node:fs/promises";
|
|
3646
|
+
import path12 from "node:path";
|
|
3647
|
+
|
|
3648
|
+
// src/output/markdown.ts
|
|
3649
|
+
import path11 from "node:path";
|
|
3650
|
+
import { mkdir, writeFile } from "node:fs/promises";
|
|
3651
|
+
function compareDeterministic4(left, right) {
|
|
3652
|
+
return left.localeCompare(right, "en", { numeric: true, sensitivity: "base" });
|
|
3653
|
+
}
|
|
3654
|
+
function normalizeRepoPath4(value) {
|
|
3655
|
+
return value.trim().replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+/g, "/");
|
|
3656
|
+
}
|
|
3657
|
+
function buildOutputDir(rootDir, repoRef) {
|
|
3658
|
+
const [owner, repo] = repoRef.split("/");
|
|
3659
|
+
if (!owner || !repo) {
|
|
3660
|
+
throw new Error(`repo_ref must be owner/repo, got: ${repoRef}`);
|
|
3661
|
+
}
|
|
3662
|
+
return path11.resolve(rootDir, owner, repo);
|
|
3663
|
+
}
|
|
3664
|
+
function buildSectionFileName(sectionId, sectionIndex) {
|
|
3665
|
+
return `${String(sectionIndex + 1).padStart(2, "0")}-${sectionId}.md`;
|
|
3666
|
+
}
|
|
3667
|
+
function uniquePaths(paths) {
|
|
3668
|
+
return Array.from(
|
|
3669
|
+
new Set(
|
|
3670
|
+
paths.map((value) => normalizeRepoPath4(value)).filter((value) => value.length > 0)
|
|
3671
|
+
)
|
|
3672
|
+
).sort(compareDeterministic4);
|
|
3673
|
+
}
|
|
3674
|
+
function resolveSectionSourcePaths(section, acceptedOutput) {
|
|
3675
|
+
const directSourcePaths = Array.isArray(section.sourcePaths) ? uniquePaths(section.sourcePaths) : [];
|
|
3676
|
+
if (directSourcePaths.length > 0) {
|
|
3677
|
+
return directSourcePaths;
|
|
3678
|
+
}
|
|
3679
|
+
const citedSourcePaths = uniquePaths(
|
|
3680
|
+
(acceptedOutput.draft.claims ?? []).filter((claim) => claim.sectionId === section.sectionId).flatMap((claim) => claim.citationIds).map(
|
|
3681
|
+
(citationId) => (acceptedOutput.draft.citations ?? []).find((citation) => citation.citationId === citationId)?.repoPath ?? ""
|
|
3682
|
+
)
|
|
3683
|
+
);
|
|
3684
|
+
if (citedSourcePaths.length > 0) {
|
|
3685
|
+
return citedSourcePaths;
|
|
3686
|
+
}
|
|
3687
|
+
return [];
|
|
3688
|
+
}
|
|
3689
|
+
function renderSourceList(paths, heading = "## \uCC38\uACE0 \uC18C\uC2A4") {
|
|
3690
|
+
if (paths.length === 0) {
|
|
3691
|
+
return [];
|
|
3692
|
+
}
|
|
3693
|
+
return [
|
|
3694
|
+
heading,
|
|
3695
|
+
"",
|
|
3696
|
+
...paths.map((sourcePath) => `- \`${sourcePath}\``),
|
|
3697
|
+
""
|
|
3698
|
+
];
|
|
3699
|
+
}
|
|
3700
|
+
function renderOverviewMarkdown(acceptedOutput, sectionFiles) {
|
|
3701
|
+
const trendFacts = acceptedOutput.draft.trendFacts ?? [];
|
|
3702
|
+
const sourceDocs = uniquePaths(acceptedOutput.draft.sourceDocs.map((sourceDoc) => sourceDoc.path));
|
|
3703
|
+
return [
|
|
3704
|
+
`# ${acceptedOutput.repo_ref}`,
|
|
3705
|
+
"",
|
|
3706
|
+
`- Commit: \`${acceptedOutput.commit_sha}\``,
|
|
3707
|
+
`- Generated: \`${acceptedOutput.draft.generatedAt}\``,
|
|
3708
|
+
`- Sections: ${acceptedOutput.section_count}`,
|
|
3709
|
+
`- Subsections: ${acceptedOutput.subsection_count}`,
|
|
3710
|
+
"",
|
|
3711
|
+
acceptedOutput.draft.overviewKo.trim(),
|
|
3712
|
+
"",
|
|
3713
|
+
"## \uC77D\uAE30 \uC21C\uC11C",
|
|
3714
|
+
"",
|
|
3715
|
+
...sectionFiles.map(
|
|
3716
|
+
(section, index) => `${index + 1}. [${section.titleKo}](${section.fileName})${section.summaryKo.trim().length > 0 ? ` - ${section.summaryKo.trim()}` : ""}`
|
|
3717
|
+
),
|
|
3718
|
+
"",
|
|
3719
|
+
...renderSourceList(sourceDocs, "## \uC804\uCCB4 \uC18C\uC2A4 \uBB38\uC11C"),
|
|
3720
|
+
...trendFacts.length > 0 ? [
|
|
3721
|
+
"## \uD2B8\uB80C\uB4DC \uC694\uC57D",
|
|
3722
|
+
"",
|
|
3723
|
+
...trendFacts.map((fact) => `- ${fact.summaryKo.trim()}`),
|
|
3724
|
+
""
|
|
3725
|
+
] : []
|
|
3726
|
+
].join("\n");
|
|
3727
|
+
}
|
|
3728
|
+
function renderSectionMarkdown(section, sectionIndex, sourcePaths) {
|
|
3729
|
+
const lines = [
|
|
3730
|
+
`# ${sectionIndex + 1}. ${section.titleKo}`,
|
|
3731
|
+
"",
|
|
3732
|
+
section.summaryKo.trim(),
|
|
3733
|
+
"",
|
|
3734
|
+
...renderSourceList(sourcePaths)
|
|
3735
|
+
];
|
|
3736
|
+
for (const subsection of section.subsections) {
|
|
3737
|
+
lines.push(`## ${subsection.titleKo}`);
|
|
3738
|
+
lines.push("");
|
|
3739
|
+
lines.push(subsection.bodyKo.trim());
|
|
3740
|
+
lines.push("");
|
|
3741
|
+
}
|
|
3742
|
+
return lines.join("\n").trimEnd();
|
|
3743
|
+
}
|
|
3744
|
+
async function writeMarkdownBundle(acceptedOutput, options = {}) {
|
|
3745
|
+
const outputDir = buildOutputDir(options.outDir ?? "devport-output/wiki", acceptedOutput.repo_ref);
|
|
3746
|
+
await mkdir(outputDir, { recursive: true });
|
|
3747
|
+
const files = [];
|
|
3748
|
+
const sectionFiles = acceptedOutput.draft.sections.map((section, index) => ({
|
|
3749
|
+
sectionId: section.sectionId,
|
|
3750
|
+
titleKo: section.titleKo,
|
|
3751
|
+
summaryKo: section.summaryKo,
|
|
3752
|
+
fileName: buildSectionFileName(section.sectionId, index)
|
|
3753
|
+
}));
|
|
3754
|
+
const readmePath = path11.join(outputDir, "README.md");
|
|
3755
|
+
await writeFile(readmePath, `${renderOverviewMarkdown(acceptedOutput, sectionFiles)}
|
|
3756
|
+
`, "utf8");
|
|
3757
|
+
files.push({
|
|
3758
|
+
relativePath: "README.md",
|
|
3759
|
+
absolutePath: readmePath
|
|
3760
|
+
});
|
|
3761
|
+
for (let index = 0; index < acceptedOutput.draft.sections.length; index += 1) {
|
|
3762
|
+
const section = acceptedOutput.draft.sections[index];
|
|
3763
|
+
const sourcePaths = resolveSectionSourcePaths(section, acceptedOutput);
|
|
3764
|
+
const fileName = buildSectionFileName(section.sectionId, index);
|
|
3765
|
+
const absolutePath = path11.join(outputDir, fileName);
|
|
3766
|
+
const markdown = renderSectionMarkdown(section, index, sourcePaths);
|
|
3767
|
+
await writeFile(absolutePath, `${markdown}
|
|
3768
|
+
`, "utf8");
|
|
3769
|
+
files.push({
|
|
3770
|
+
relativePath: fileName,
|
|
3771
|
+
absolutePath
|
|
3772
|
+
});
|
|
3773
|
+
}
|
|
3774
|
+
return {
|
|
3775
|
+
outputDir,
|
|
3776
|
+
files
|
|
3777
|
+
};
|
|
3778
|
+
}
|
|
3779
|
+
|
|
3780
|
+
// src/chunked/finalize.ts
|
|
3781
|
+
async function loadAllSectionOutputs(session) {
|
|
3782
|
+
const outputs = [];
|
|
3783
|
+
for (const [sectionId, status] of Object.entries(session.sections)) {
|
|
3784
|
+
if (status.status !== "persisted") {
|
|
3785
|
+
throw new Error(`Section ${sectionId} is not persisted (status: ${status.status})`);
|
|
3786
|
+
}
|
|
3787
|
+
if (!status.sectionOutputPath) {
|
|
3788
|
+
throw new Error(`Section ${sectionId} has no sectionOutputPath`);
|
|
3789
|
+
}
|
|
3790
|
+
const raw = await readFile(path12.resolve(status.sectionOutputPath), "utf8");
|
|
3791
|
+
const parsed = SectionOutputSchema.parse(JSON.parse(raw));
|
|
3792
|
+
outputs.push(parsed);
|
|
3793
|
+
}
|
|
3794
|
+
outputs.sort((a, b) => a.sectionId.localeCompare(b.sectionId, "en", { numeric: true }));
|
|
3795
|
+
return outputs;
|
|
3796
|
+
}
|
|
3797
|
+
function crossSectionValidation(sections) {
|
|
3798
|
+
const errors = [];
|
|
3799
|
+
const allBodies = [];
|
|
3800
|
+
for (const section of sections) {
|
|
3801
|
+
for (const sub of section.subsections) {
|
|
3802
|
+
allBodies.push({
|
|
3803
|
+
sectionId: section.sectionId,
|
|
3804
|
+
subsectionId: sub.subsectionId,
|
|
3805
|
+
bodyKo: sub.bodyKo
|
|
3806
|
+
});
|
|
3807
|
+
}
|
|
3808
|
+
}
|
|
3809
|
+
const crossRepErrors = detectCrossSubsectionRepetitionInBodies(allBodies);
|
|
3810
|
+
errors.push(...crossRepErrors);
|
|
3811
|
+
return errors;
|
|
3812
|
+
}
|
|
3813
|
+
function assembleAcceptedOutput(plan, sections) {
|
|
3814
|
+
const allSourcePaths = Array.from(
|
|
3815
|
+
new Set(
|
|
3816
|
+
sections.flatMap(
|
|
3817
|
+
(section) => section.sourcePaths.map((sourcePath) => sourcePath.trim()).filter((sourcePath) => sourcePath.length > 0)
|
|
3818
|
+
)
|
|
3819
|
+
)
|
|
3820
|
+
).sort((left, right) => left.localeCompare(right, "en", { numeric: true, sensitivity: "base" }));
|
|
3821
|
+
const draftSections = sections.map((s) => ({
|
|
3822
|
+
sectionId: s.sectionId,
|
|
3823
|
+
titleKo: s.titleKo,
|
|
3824
|
+
summaryKo: s.summaryKo,
|
|
3825
|
+
sourcePaths: s.sourcePaths.map((sourcePath) => sourcePath.trim()).filter((sourcePath) => sourcePath.length > 0).sort((left, right) => left.localeCompare(right, "en", { numeric: true, sensitivity: "base" })),
|
|
3826
|
+
subsections: s.subsections.map((sub) => ({
|
|
3827
|
+
sectionId: sub.sectionId,
|
|
3828
|
+
subsectionId: sub.subsectionId,
|
|
3829
|
+
titleKo: sub.titleKo,
|
|
3830
|
+
bodyKo: sub.bodyKo
|
|
3831
|
+
}))
|
|
3832
|
+
}));
|
|
3833
|
+
const subsectionCount = sections.reduce((sum, s) => sum + s.subsections.length, 0);
|
|
3834
|
+
let totalKoreanChars = plan.overviewKo.length;
|
|
3835
|
+
for (const s of sections) {
|
|
3836
|
+
totalKoreanChars += s.summaryKo.length;
|
|
3837
|
+
for (const sub of s.subsections) {
|
|
3838
|
+
totalKoreanChars += sub.bodyKo.length;
|
|
3839
|
+
}
|
|
3840
|
+
}
|
|
3841
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
3842
|
+
return {
|
|
3843
|
+
ingest_run_id: plan.ingestRunId,
|
|
3844
|
+
repo_ref: plan.repoFullName,
|
|
3845
|
+
commit_sha: plan.commitSha,
|
|
3846
|
+
section_count: sections.length,
|
|
3847
|
+
subsection_count: subsectionCount,
|
|
3848
|
+
total_korean_chars: totalKoreanChars,
|
|
3849
|
+
source_doc_count: allSourcePaths.length,
|
|
3850
|
+
trend_fact_count: 0,
|
|
3851
|
+
draft: {
|
|
3852
|
+
artifactType: "wiki-draft",
|
|
3853
|
+
repoFullName: plan.repoFullName,
|
|
3854
|
+
commitSha: plan.commitSha,
|
|
3855
|
+
generatedAt: now,
|
|
3856
|
+
overviewKo: plan.overviewKo,
|
|
3857
|
+
sections: draftSections,
|
|
3858
|
+
sourceDocs: allSourcePaths.map((sourcePath, index) => ({
|
|
3859
|
+
sourceId: `src-${index + 1}`,
|
|
3860
|
+
path: sourcePath
|
|
3861
|
+
})),
|
|
3862
|
+
trendFacts: []
|
|
3863
|
+
}
|
|
3864
|
+
};
|
|
3865
|
+
}
|
|
3866
|
+
async function finalize(session, plan, options) {
|
|
3867
|
+
const { advanceBaseline, statePath } = options;
|
|
3868
|
+
const pendingSections = Object.entries(session.sections).filter(([_, status]) => status.status !== "persisted").map(([id]) => id);
|
|
3869
|
+
if (pendingSections.length > 0) {
|
|
3870
|
+
throw new Error(
|
|
3871
|
+
`Cannot finalize: sections not yet persisted: ${pendingSections.join(", ")}`
|
|
3872
|
+
);
|
|
3873
|
+
}
|
|
3874
|
+
const sectionOutputs = await loadAllSectionOutputs(session);
|
|
3875
|
+
const crossErrors = crossSectionValidation(sectionOutputs);
|
|
3876
|
+
if (crossErrors.length > 0) {
|
|
3877
|
+
throw new Error(
|
|
3878
|
+
`Cross-section validation failed (${crossErrors.length} issue(s)):
|
|
3879
|
+
` + crossErrors.map((e) => ` - ${e}`).join("\n")
|
|
3880
|
+
);
|
|
3881
|
+
}
|
|
3882
|
+
const acceptedOutput = assembleAcceptedOutput(plan, sectionOutputs);
|
|
3883
|
+
const markdownBundle = await writeMarkdownBundle(acceptedOutput, {
|
|
3884
|
+
outDir: options.outDir
|
|
3885
|
+
});
|
|
3886
|
+
if (advanceBaseline) {
|
|
3887
|
+
try {
|
|
3888
|
+
const evidence = extractSectionEvidenceFromAcceptedOutput(acceptedOutput);
|
|
3889
|
+
const state = await loadFreshnessState(statePath);
|
|
3890
|
+
const repoRef = session.repoFullName.toLowerCase();
|
|
3891
|
+
const nextState = {
|
|
3892
|
+
...state,
|
|
3893
|
+
repos: {
|
|
3894
|
+
...state.repos,
|
|
3895
|
+
[repoRef]: {
|
|
3896
|
+
repo_ref: repoRef,
|
|
3897
|
+
last_delivery_commit: session.commitSha,
|
|
3898
|
+
sectionEvidenceIndex: evidence
|
|
3899
|
+
}
|
|
3900
|
+
}
|
|
3901
|
+
};
|
|
3902
|
+
await saveFreshnessState(statePath, nextState);
|
|
3903
|
+
process.stderr.write(` \u2713 freshness baseline \u2192 ${session.commitSha.slice(0, 7)}
|
|
3904
|
+
`);
|
|
3905
|
+
} catch (err) {
|
|
3906
|
+
process.stderr.write(
|
|
3907
|
+
` \u26A0 freshness baseline not saved: ${String(err)}
|
|
3908
|
+
Re-run finalize --advance_baseline after fixing source paths
|
|
3909
|
+
`
|
|
3910
|
+
);
|
|
3911
|
+
}
|
|
3912
|
+
}
|
|
3913
|
+
const totalSubsections = sectionOutputs.reduce((sum, section) => sum + section.subsections.length, 0);
|
|
3914
|
+
const result = {
|
|
3915
|
+
sectionsAssembled: sectionOutputs.length,
|
|
3916
|
+
totalSubsections,
|
|
3917
|
+
totalSourceDocs: acceptedOutput.source_doc_count,
|
|
3918
|
+
totalTrendFacts: acceptedOutput.trend_fact_count,
|
|
3919
|
+
totalKoreanChars: acceptedOutput.total_korean_chars,
|
|
3920
|
+
outputDir: markdownBundle.outputDir,
|
|
3921
|
+
filesWritten: markdownBundle.files.map((file) => file.relativePath)
|
|
3922
|
+
};
|
|
3923
|
+
if (options.deleteSnapshot) {
|
|
3924
|
+
try {
|
|
3925
|
+
await rm(plan.snapshotPath, { recursive: true, force: true });
|
|
3926
|
+
process.stderr.write(` \u2713 snapshot deleted \u2192 ${plan.snapshotPath}
|
|
3927
|
+
`);
|
|
3928
|
+
} catch (err) {
|
|
3929
|
+
process.stderr.write(` \u26A0 snapshot delete failed: ${String(err)}
|
|
3930
|
+
`);
|
|
3931
|
+
}
|
|
3932
|
+
}
|
|
3933
|
+
return result;
|
|
3934
|
+
}
|
|
3935
|
+
|
|
3936
|
+
// src/agent.ts
|
|
3937
|
+
function getQualityGateLevel(env) {
|
|
3938
|
+
const val = env.DEVPORT_QUALITY_GATE_LEVEL;
|
|
3939
|
+
return val === "standard" || val === "strict" ? val : "strict";
|
|
3940
|
+
}
|
|
3941
|
+
function parseFlags(argv) {
|
|
3942
|
+
const flags = {};
|
|
3943
|
+
for (let i = 0; i < argv.length; i++) {
|
|
3944
|
+
const arg = argv[i];
|
|
3945
|
+
if (!arg.startsWith("--")) continue;
|
|
3946
|
+
const eqIndex = arg.indexOf("=");
|
|
3947
|
+
if (eqIndex !== -1) {
|
|
3948
|
+
flags[arg.slice(2, eqIndex)] = arg.slice(eqIndex + 1);
|
|
3949
|
+
} else {
|
|
3950
|
+
const key = arg.slice(2);
|
|
3951
|
+
const next = argv[i + 1];
|
|
3952
|
+
if (next && !next.startsWith("--")) {
|
|
3953
|
+
flags[key] = next;
|
|
3954
|
+
i++;
|
|
3955
|
+
} else {
|
|
3956
|
+
flags[key] = "true";
|
|
3957
|
+
}
|
|
3958
|
+
}
|
|
3959
|
+
}
|
|
3960
|
+
return flags;
|
|
3961
|
+
}
|
|
3962
|
+
function parseRepo(repoFlag, refFlag) {
|
|
3963
|
+
const at = repoFlag.indexOf("@");
|
|
3964
|
+
if (at !== -1) {
|
|
3965
|
+
return { repo: repoFlag.slice(0, at), ref: repoFlag.slice(at + 1) };
|
|
3966
|
+
}
|
|
3967
|
+
return { repo: repoFlag, ref: refFlag };
|
|
3968
|
+
}
|
|
3969
|
+
function requireFlag(flags, name) {
|
|
3970
|
+
const val = flags[name];
|
|
3971
|
+
if (!val) throw new Error(`--${name} is required`);
|
|
3972
|
+
return val;
|
|
3973
|
+
}
|
|
3974
|
+
function fmtNum(n) {
|
|
3975
|
+
return n.toLocaleString("en-US");
|
|
3976
|
+
}
|
|
3977
|
+
function resolveQualityGateLevel(flags, defaultLevel) {
|
|
3978
|
+
const flag = flags["quality_gate_level"];
|
|
3979
|
+
if (!flag) return defaultLevel;
|
|
3980
|
+
if (flag !== "standard" && flag !== "strict") {
|
|
3981
|
+
throw new Error(`--quality_gate_level must be standard or strict, got: ${flag}`);
|
|
3982
|
+
}
|
|
3983
|
+
return flag;
|
|
3984
|
+
}
|
|
3985
|
+
async function ingestCommand(flags) {
|
|
3986
|
+
const { repo, ref } = parseRepo(requireFlag(flags, "repo"), flags["ref"]);
|
|
3987
|
+
const snapshotRoot = flags["snapshot_root"] ?? "devport-output/snapshots";
|
|
3988
|
+
const outFile = flags["out"];
|
|
3989
|
+
process.stderr.write(`[devport-agent] ingest: ${repo}${ref ? `@${ref}` : ""}
|
|
3990
|
+
`);
|
|
3991
|
+
const artifact = await runIngest({
|
|
3992
|
+
repo_ref: { repo, ...ref ? { ref } : {} },
|
|
3993
|
+
snapshot_root: path13.resolve(snapshotRoot),
|
|
3994
|
+
force_rebuild: flags["force_rebuild"] === "true"
|
|
3995
|
+
});
|
|
3996
|
+
const cacheLabel = artifact.idempotent_hit ? "cache hit" : "downloaded";
|
|
3997
|
+
process.stderr.write(
|
|
3998
|
+
` \u2713 ${artifact.commit_sha.slice(0, 7)} \u2014 ${fmtNum(artifact.files_scanned)} files (${cacheLabel})
|
|
3999
|
+
`
|
|
4000
|
+
);
|
|
4001
|
+
process.stderr.write(` snapshot_path: ${artifact.snapshot_path}
|
|
4002
|
+
`);
|
|
4003
|
+
const json = `${JSON.stringify(artifact, null, 2)}
|
|
4004
|
+
`;
|
|
4005
|
+
if (outFile) {
|
|
4006
|
+
const outPath = path13.resolve(outFile);
|
|
4007
|
+
await mkdir2(path13.dirname(outPath), { recursive: true });
|
|
4008
|
+
await writeFile2(outPath, json, "utf8");
|
|
4009
|
+
process.stderr.write(` artifact \u2192 ${outPath}
|
|
4010
|
+
`);
|
|
4011
|
+
} else {
|
|
4012
|
+
process.stdout.write(json);
|
|
4013
|
+
}
|
|
4014
|
+
}
|
|
4015
|
+
async function detectCommand(flags) {
|
|
4016
|
+
const repoFlag = requireFlag(flags, "repo");
|
|
4017
|
+
const parts = repoFlag.toLowerCase().split("/");
|
|
4018
|
+
if (parts.length !== 2 || !parts[0] || !parts[1]) {
|
|
4019
|
+
throw new Error(`--repo must be owner/repo, got: ${repoFlag}`);
|
|
4020
|
+
}
|
|
4021
|
+
const repoRef = `${parts[0]}/${parts[1]}`;
|
|
4022
|
+
const statePath = flags["state_path"] ?? "devport-output/freshness/state.json";
|
|
4023
|
+
process.stderr.write(`[devport-agent] detect: ${repoRef}
|
|
4024
|
+
`);
|
|
4025
|
+
const state = await loadFreshnessState(statePath);
|
|
4026
|
+
const baseline = state.repos[repoRef];
|
|
4027
|
+
if (!baseline) {
|
|
4028
|
+
process.stderr.write(` \u2192 no baseline \u2014 full rebuild required
|
|
4029
|
+
`);
|
|
4030
|
+
process.stderr.write(
|
|
4031
|
+
` (run \`package --advance_baseline\` or \`finalize --advance_baseline\` after first generation)
|
|
4032
|
+
`
|
|
4033
|
+
);
|
|
4034
|
+
process.stdout.write(
|
|
4035
|
+
`${JSON.stringify(
|
|
4036
|
+
{ status: "full-rebuild", reason: "BASELINE_MISSING", repo_ref: repoRef, changed_paths: [], impacted_section_ids: [] },
|
|
4037
|
+
null,
|
|
4038
|
+
2
|
|
4039
|
+
)}
|
|
4040
|
+
`
|
|
4041
|
+
);
|
|
4042
|
+
return;
|
|
4043
|
+
}
|
|
4044
|
+
process.stderr.write(` base: ${baseline.last_delivery_commit.slice(0, 7)}
|
|
4045
|
+
`);
|
|
4046
|
+
const detection = await detectRepoFreshness(
|
|
4047
|
+
{ repo_ref: repoRef, baseline },
|
|
4048
|
+
{ token: process.env["GITHUB_TOKEN"] }
|
|
4049
|
+
);
|
|
4050
|
+
if (detection.mode === "noop") {
|
|
4051
|
+
process.stderr.write(` \u2713 no changes \u2014 delivery is current at ${detection.head_commit.slice(0, 7)}
|
|
4052
|
+
`);
|
|
4053
|
+
process.stdout.write(
|
|
4054
|
+
`${JSON.stringify(
|
|
4055
|
+
{ status: "noop", repo_ref: repoRef, base_commit: detection.base_commit, head_commit: detection.head_commit, changed_paths: [], impacted_section_ids: [] },
|
|
4056
|
+
null,
|
|
4057
|
+
2
|
|
4058
|
+
)}
|
|
4059
|
+
`
|
|
4060
|
+
);
|
|
4061
|
+
return;
|
|
4062
|
+
}
|
|
4063
|
+
const mapped = mapChangedPathsToImpactedSections({
|
|
4064
|
+
changed_paths: detection.changed_paths,
|
|
4065
|
+
sectionEvidenceIndex: baseline.sectionEvidenceIndex
|
|
4066
|
+
});
|
|
4067
|
+
const status = mapped.mode === "full-rebuild-required" || detection.mode === "full-rebuild-required" ? "full-rebuild" : "incremental";
|
|
4068
|
+
process.stderr.write(
|
|
4069
|
+
` \u2192 ${status}: ${detection.changed_paths.length} paths changed, ${mapped.impacted_section_ids.length} sections impacted
|
|
4070
|
+
`
|
|
4071
|
+
);
|
|
4072
|
+
if (mapped.impacted_section_ids.length > 0) {
|
|
4073
|
+
process.stderr.write(` sections: ${mapped.impacted_section_ids.join(", ")}
|
|
4074
|
+
`);
|
|
4075
|
+
}
|
|
4076
|
+
process.stdout.write(
|
|
4077
|
+
`${JSON.stringify(
|
|
4078
|
+
{
|
|
4079
|
+
status,
|
|
4080
|
+
repo_ref: repoRef,
|
|
4081
|
+
base_commit: detection.base_commit,
|
|
4082
|
+
head_commit: detection.head_commit,
|
|
4083
|
+
changed_paths: detection.changed_paths,
|
|
4084
|
+
impacted_section_ids: mapped.impacted_section_ids
|
|
4085
|
+
},
|
|
4086
|
+
null,
|
|
4087
|
+
2
|
|
4088
|
+
)}
|
|
4089
|
+
`
|
|
4090
|
+
);
|
|
4091
|
+
}
|
|
4092
|
+
async function packageCommand(flags) {
|
|
4093
|
+
const outDir = flags["out_dir"] ?? "devport-output/wiki";
|
|
4094
|
+
const inputFile = flags["input"];
|
|
4095
|
+
const advanceBaseline = flags["advance_baseline"] === "true";
|
|
4096
|
+
const statePath = flags["state_path"] ?? "devport-output/freshness/state.json";
|
|
4097
|
+
const qualityGateLevel = resolveQualityGateLevel(flags, getQualityGateLevel(process.env));
|
|
4098
|
+
let raw;
|
|
4099
|
+
if (inputFile) {
|
|
4100
|
+
raw = await readFile2(path13.resolve(inputFile), "utf8");
|
|
4101
|
+
} else {
|
|
4102
|
+
raw = await readStdin();
|
|
4103
|
+
}
|
|
4104
|
+
const acceptedOutput = JSON.parse(raw);
|
|
4105
|
+
process.stderr.write(
|
|
4106
|
+
`[devport-agent] package: ${acceptedOutput.repo_ref}@${acceptedOutput.commit_sha.slice(0, 7)}
|
|
4107
|
+
`
|
|
4108
|
+
);
|
|
4109
|
+
const packaged = packageAcceptedOutputsForDelivery([acceptedOutput], { qualityGateLevel });
|
|
4110
|
+
const envelope = packaged.artifacts[0];
|
|
4111
|
+
const glossaryCount = Array.isArray(envelope.glossary) ? envelope.glossary.length : 0;
|
|
4112
|
+
const sectionCount = Array.isArray(envelope.sections) ? envelope.sections.length : 0;
|
|
4113
|
+
const markdownBundle = await writeMarkdownBundle(acceptedOutput, {
|
|
4114
|
+
outDir
|
|
4115
|
+
});
|
|
4116
|
+
process.stderr.write(
|
|
4117
|
+
` \u2713 ${sectionCount} sections validated, glossary: ${glossaryCount} terms
|
|
4118
|
+
`
|
|
4119
|
+
);
|
|
4120
|
+
process.stderr.write(` markdown \u2192 ${markdownBundle.outputDir}
|
|
4121
|
+
`);
|
|
4122
|
+
if (advanceBaseline) {
|
|
4123
|
+
try {
|
|
4124
|
+
const evidence = extractSectionEvidenceFromAcceptedOutput(acceptedOutput);
|
|
4125
|
+
const state = await loadFreshnessState(statePath);
|
|
4126
|
+
const repoRef = envelope.project.repoRef.toLowerCase();
|
|
4127
|
+
const nextState = {
|
|
4128
|
+
...state,
|
|
4129
|
+
repos: {
|
|
4130
|
+
...state.repos,
|
|
4131
|
+
[repoRef]: {
|
|
4132
|
+
repo_ref: repoRef,
|
|
4133
|
+
last_delivery_commit: envelope.project.commitSha,
|
|
4134
|
+
sectionEvidenceIndex: evidence
|
|
4135
|
+
}
|
|
4136
|
+
}
|
|
4137
|
+
};
|
|
4138
|
+
await saveFreshnessState(statePath, nextState);
|
|
4139
|
+
process.stderr.write(` \u2713 freshness baseline \u2192 ${envelope.project.commitSha.slice(0, 7)}
|
|
4140
|
+
`);
|
|
4141
|
+
} catch (err) {
|
|
4142
|
+
process.stderr.write(
|
|
4143
|
+
` \u26A0 freshness baseline not saved: ${String(err)}
|
|
4144
|
+
markdown wiki is written; re-run package --advance_baseline after fixing section evidence paths
|
|
4145
|
+
`
|
|
4146
|
+
);
|
|
4147
|
+
}
|
|
4148
|
+
}
|
|
4149
|
+
}
|
|
4150
|
+
async function planSectionsCommand(flags) {
|
|
4151
|
+
const artifactFile = requireFlag(flags, "artifact");
|
|
4152
|
+
const outFile = flags["out"];
|
|
4153
|
+
const raw = await readFile2(path13.resolve(artifactFile), "utf8");
|
|
4154
|
+
const artifact = ingestRunArtifactSchema.parse(JSON.parse(raw));
|
|
4155
|
+
process.stderr.write(
|
|
4156
|
+
`[devport-agent] plan-sections: ${artifact.repo_ref} (${fmtNum(artifact.files_scanned)} files)
|
|
4157
|
+
`
|
|
4158
|
+
);
|
|
4159
|
+
const context = await planContext(artifact);
|
|
4160
|
+
process.stderr.write(
|
|
4161
|
+
` \u2713 plan context generated for ${context.profile.repoName}
|
|
4162
|
+
type: ${context.profile.projectType}, lang: ${context.profile.primaryLanguage}, domain: ${context.profile.domainHint}
|
|
4163
|
+
${context.fileTree.length} directory groups, ${context.keyPaths.length} key paths
|
|
4164
|
+
README excerpt: ${context.readmeExcerpt.length} chars
|
|
4165
|
+
`
|
|
4166
|
+
);
|
|
4167
|
+
const json = `${JSON.stringify(context, null, 2)}
|
|
4168
|
+
`;
|
|
4169
|
+
if (outFile) {
|
|
4170
|
+
const outPath = path13.resolve(outFile);
|
|
4171
|
+
await mkdir2(path13.dirname(outPath), { recursive: true });
|
|
4172
|
+
await writeFile2(outPath, json, "utf8");
|
|
4173
|
+
process.stderr.write(` context \u2192 ${outPath}
|
|
4174
|
+
`);
|
|
4175
|
+
} else {
|
|
4176
|
+
process.stdout.write(json);
|
|
4177
|
+
}
|
|
4178
|
+
}
|
|
4179
|
+
async function validatePlanCommand(flags) {
|
|
4180
|
+
const inputFile = requireFlag(flags, "input");
|
|
4181
|
+
const contextFile = requireFlag(flags, "context");
|
|
4182
|
+
const outFile = flags["out"];
|
|
4183
|
+
const contextRaw = await readFile2(path13.resolve(contextFile), "utf8");
|
|
4184
|
+
const context = PlanContextSchema.parse(JSON.parse(contextRaw));
|
|
4185
|
+
const planRaw = await readFile2(path13.resolve(inputFile), "utf8");
|
|
4186
|
+
const planJson = JSON.parse(planRaw);
|
|
4187
|
+
process.stderr.write(
|
|
4188
|
+
`[devport-agent] validate-plan: ${context.repoFullName}
|
|
4189
|
+
`
|
|
4190
|
+
);
|
|
4191
|
+
const validated = validatePlan(planJson, {
|
|
4192
|
+
snapshotPath: context.snapshotPath
|
|
4193
|
+
});
|
|
4194
|
+
process.stderr.write(
|
|
4195
|
+
` \u2713 plan validated: ${validated.totalSections} sections
|
|
4196
|
+
`
|
|
4197
|
+
);
|
|
4198
|
+
for (const section of validated.sections) {
|
|
4199
|
+
process.stderr.write(
|
|
4200
|
+
` ${section.sectionId}: ${section.titleKo} (${section.focusPaths.length} focus files, ${section.subsectionCount} subsections)
|
|
4201
|
+
`
|
|
4202
|
+
);
|
|
4203
|
+
}
|
|
4204
|
+
const json = `${JSON.stringify(validated, null, 2)}
|
|
4205
|
+
`;
|
|
4206
|
+
if (outFile) {
|
|
4207
|
+
const outPath = path13.resolve(outFile);
|
|
4208
|
+
await mkdir2(path13.dirname(outPath), { recursive: true });
|
|
4209
|
+
await writeFile2(outPath, json, "utf8");
|
|
4210
|
+
process.stderr.write(` validated plan \u2192 ${outPath}
|
|
4211
|
+
`);
|
|
4212
|
+
} else {
|
|
4213
|
+
process.stdout.write(json);
|
|
4214
|
+
}
|
|
4215
|
+
}
|
|
4216
|
+
async function persistSectionCommand(flags) {
|
|
4217
|
+
const planFile = requireFlag(flags, "plan");
|
|
4218
|
+
const sectionId = requireFlag(flags, "section");
|
|
4219
|
+
const inputFile = requireFlag(flags, "input");
|
|
4220
|
+
const sessionFile = flags["session"];
|
|
4221
|
+
const planRaw = await readFile2(path13.resolve(planFile), "utf8");
|
|
4222
|
+
const plan = SectionPlanOutputSchema.parse(JSON.parse(planRaw));
|
|
4223
|
+
const sectionRaw = await readFile2(path13.resolve(inputFile), "utf8");
|
|
4224
|
+
const sectionOutput = SectionOutputSchema.parse(JSON.parse(sectionRaw));
|
|
4225
|
+
if (sectionOutput.sectionId !== sectionId) {
|
|
4226
|
+
throw new Error(
|
|
4227
|
+
`Section ID mismatch: --section ${sectionId} but input has sectionId "${sectionOutput.sectionId}"`
|
|
4228
|
+
);
|
|
4229
|
+
}
|
|
4230
|
+
process.stderr.write(
|
|
4231
|
+
`[devport-agent] persist-section: ${plan.repoFullName} / ${sectionId}
|
|
4232
|
+
`
|
|
4233
|
+
);
|
|
4234
|
+
const qualityGateLevel = resolveQualityGateLevel(flags, getQualityGateLevel(process.env));
|
|
4235
|
+
const validationErrors = validateSection(sectionOutput, {
|
|
4236
|
+
snapshotPath: plan.snapshotPath,
|
|
4237
|
+
qualityGateLevel
|
|
4238
|
+
});
|
|
4239
|
+
if (validationErrors.length > 0) {
|
|
4240
|
+
throw new Error(
|
|
4241
|
+
`Section validation failed for ${sectionId} (${validationErrors.length} issue(s)):
|
|
4242
|
+
` + validationErrors.map((e) => ` - ${e}`).join("\n")
|
|
4243
|
+
);
|
|
4244
|
+
}
|
|
4245
|
+
process.stderr.write(` \u2713 section validation passed
|
|
4246
|
+
`);
|
|
4247
|
+
const sessionPath = sessionFile ? path13.resolve(sessionFile) : sessionPathForRepo(plan.repoFullName);
|
|
4248
|
+
let session = await loadSession(sessionPath);
|
|
4249
|
+
if (!session) {
|
|
4250
|
+
session = initSession(plan, planFile);
|
|
4251
|
+
process.stderr.write(` created new session: ${session.sessionId}
|
|
4252
|
+
`);
|
|
4253
|
+
}
|
|
4254
|
+
let koreanChars = sectionOutput.summaryKo.length;
|
|
4255
|
+
for (const sub of sectionOutput.subsections) {
|
|
4256
|
+
koreanChars += sub.bodyKo.length;
|
|
4257
|
+
}
|
|
4258
|
+
session = markSectionPersisted(session, sectionId, {
|
|
4259
|
+
sectionOutputPath: path13.resolve(inputFile),
|
|
4260
|
+
chunksInserted: 0,
|
|
4261
|
+
claimCount: 0,
|
|
4262
|
+
citationCount: 0,
|
|
4263
|
+
subsectionCount: sectionOutput.subsections.length,
|
|
4264
|
+
koreanChars
|
|
4265
|
+
});
|
|
4266
|
+
await saveSession(sessionPath, session);
|
|
4267
|
+
process.stderr.write(
|
|
4268
|
+
` \u2713 ${sectionId}: locally validated, ${sectionOutput.sourcePaths.length} source paths
|
|
4269
|
+
`
|
|
4270
|
+
);
|
|
4271
|
+
process.stderr.write(` session \u2192 ${sessionPath}
|
|
4272
|
+
`);
|
|
4273
|
+
const totalSections = Object.keys(session.sections).length;
|
|
4274
|
+
const persistedCount = Object.values(session.sections).filter((s) => s.status === "persisted").length;
|
|
4275
|
+
process.stderr.write(` progress: ${persistedCount}/${totalSections} sections ready
|
|
4276
|
+
`);
|
|
4277
|
+
}
|
|
4278
|
+
async function finalizeCommand(flags) {
|
|
4279
|
+
const planFile = requireFlag(flags, "plan");
|
|
4280
|
+
const sessionFile = flags["session"];
|
|
4281
|
+
const advanceBaseline = flags["advance_baseline"] === "true";
|
|
4282
|
+
const statePath = flags["state_path"] ?? "devport-output/freshness/state.json";
|
|
4283
|
+
const deleteSnapshot = flags["delete_snapshot"] === "true";
|
|
4284
|
+
const outDir = flags["out_dir"] ?? "devport-output/wiki";
|
|
4285
|
+
const planRaw = await readFile2(path13.resolve(planFile), "utf8");
|
|
4286
|
+
const plan = SectionPlanOutputSchema.parse(JSON.parse(planRaw));
|
|
4287
|
+
const sessionPath = sessionFile ? path13.resolve(sessionFile) : sessionPathForRepo(plan.repoFullName);
|
|
4288
|
+
const session = await loadSession(sessionPath);
|
|
4289
|
+
if (!session) {
|
|
4290
|
+
throw new Error(
|
|
4291
|
+
`No session found at ${sessionPath}. Run persist-section for at least one section first.`
|
|
4292
|
+
);
|
|
4293
|
+
}
|
|
4294
|
+
process.stderr.write(
|
|
4295
|
+
`[devport-agent] finalize: ${plan.repoFullName} (session ${session.sessionId})
|
|
4296
|
+
`
|
|
4297
|
+
);
|
|
4298
|
+
const result = await finalize(session, plan, {
|
|
4299
|
+
advanceBaseline,
|
|
4300
|
+
statePath,
|
|
4301
|
+
deleteSnapshot,
|
|
4302
|
+
outDir
|
|
4303
|
+
});
|
|
4304
|
+
process.stderr.write(
|
|
4305
|
+
` \u2713 finalized: ${result.sectionsAssembled} sections, ${result.totalSubsections} subsections, ${result.totalSourceDocs} source docs, ${result.totalTrendFacts} trend facts, ${fmtNum(result.totalKoreanChars)} Korean chars
|
|
4306
|
+
`
|
|
4307
|
+
);
|
|
4308
|
+
process.stderr.write(` markdown \u2192 ${result.outputDir} (${result.filesWritten.length} files)
|
|
4309
|
+
`);
|
|
4310
|
+
}
|
|
4311
|
+
function readStdin() {
|
|
4312
|
+
return new Promise((resolve, reject) => {
|
|
4313
|
+
if (process.stdin.isTTY) {
|
|
4314
|
+
reject(new Error("No input provided. Pipe JSON or use --input <file>"));
|
|
4315
|
+
return;
|
|
4316
|
+
}
|
|
4317
|
+
const chunks = [];
|
|
4318
|
+
process.stdin.on("data", (chunk) => chunks.push(chunk));
|
|
4319
|
+
process.stdin.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")));
|
|
4320
|
+
process.stdin.on("error", reject);
|
|
4321
|
+
});
|
|
4322
|
+
}
|
|
4323
|
+
function printHelp() {
|
|
4324
|
+
process.stderr.write(
|
|
4325
|
+
[
|
|
4326
|
+
"",
|
|
4327
|
+
"portki \u2014 public CLI for generating Korean Markdown wikis from GitHub repositories",
|
|
4328
|
+
"Run as an installed package with `portki <command>` or from source with `npx tsx src/agent.ts <command>`.",
|
|
4329
|
+
"",
|
|
4330
|
+
"Commands:",
|
|
4331
|
+
" ingest Snapshot a GitHub repo and emit the ingest artifact",
|
|
4332
|
+
" --repo owner/repo (required)",
|
|
4333
|
+
" --ref branch|sha (optional, uses default branch if omitted)",
|
|
4334
|
+
" --out artifact.json (optional, prints to stdout if omitted)",
|
|
4335
|
+
" --snapshot_root (default: devport-output/snapshots)",
|
|
4336
|
+
" --force_rebuild (re-download even if cache is valid)",
|
|
4337
|
+
"",
|
|
4338
|
+
" detect Detect what changed since the last delivery",
|
|
4339
|
+
" --repo owner/repo (required)",
|
|
4340
|
+
" --state_path (default: devport-output/freshness/state.json)",
|
|
4341
|
+
" stdout: { status, changed_paths, impacted_section_ids, ... }",
|
|
4342
|
+
" status values: noop | incremental | full-rebuild",
|
|
4343
|
+
"",
|
|
4344
|
+
" package Validate AI-generated GroundedAcceptedOutput, write markdown wiki files",
|
|
4345
|
+
" --input accepted-output.json (optional, reads stdin if omitted)",
|
|
4346
|
+
" --out_dir (default: devport-output/wiki)",
|
|
4347
|
+
" --quality_gate_level standard|strict (default from DEVPORT_QUALITY_GATE_LEVEL)",
|
|
4348
|
+
" --advance_baseline save freshness state for future detect",
|
|
4349
|
+
" --state_path (default: devport-output/freshness/state.json)",
|
|
4350
|
+
"",
|
|
4351
|
+
" plan-sections Analyze repo and produce planning context for AI section generation",
|
|
4352
|
+
" --artifact artifact.json (required)",
|
|
4353
|
+
" --out plan-context.json (optional, prints to stdout if omitted)",
|
|
4354
|
+
"",
|
|
4355
|
+
" validate-plan Validate an AI-generated section plan against the schema",
|
|
4356
|
+
" --input section-plan.json (required)",
|
|
4357
|
+
" --context plan-context.json (required)",
|
|
4358
|
+
" --out section-plan.json (optional, prints to stdout if omitted)",
|
|
4359
|
+
"",
|
|
4360
|
+
" persist-section Validate a single section and register it in the local session",
|
|
4361
|
+
" --plan section-plan.json (required)",
|
|
4362
|
+
" --section sec-1 (required)",
|
|
4363
|
+
" --input section-1.json (required)",
|
|
4364
|
+
" --quality_gate_level standard|strict (default from DEVPORT_QUALITY_GATE_LEVEL)",
|
|
4365
|
+
" --session session.json (optional, auto-derived from repo name)",
|
|
4366
|
+
"",
|
|
4367
|
+
" finalize Cross-validate all sections and write the final markdown wiki bundle",
|
|
4368
|
+
" --plan section-plan.json (required)",
|
|
4369
|
+
" --session session.json (optional, auto-derived from repo name)",
|
|
4370
|
+
" --out_dir (default: devport-output/wiki)",
|
|
4371
|
+
" --advance_baseline save freshness state for future detect",
|
|
4372
|
+
" --state_path (default: devport-output/freshness/state.json)",
|
|
4373
|
+
" --delete_snapshot delete snapshot directory after successful finalize",
|
|
4374
|
+
"",
|
|
4375
|
+
"First-run workflow (monolithic):",
|
|
4376
|
+
" 1. portki ingest --repo owner/repo --out artifact.json",
|
|
4377
|
+
" 2. AI reads artifact.json + files under snapshot_path, generates GroundedAcceptedOutput",
|
|
4378
|
+
" 3. portki package --input accepted-output.json --advance_baseline",
|
|
4379
|
+
"",
|
|
4380
|
+
"Chunked workflow (higher quality, section-at-a-time):",
|
|
4381
|
+
" 1. portki ingest --repo owner/repo --out artifact.json",
|
|
4382
|
+
" 2. portki plan-sections --artifact artifact.json --out plan-context.json",
|
|
4383
|
+
" 3. AI reads plan-context.json + README + code, generates section-plan.json",
|
|
4384
|
+
" 4. portki validate-plan --input section-plan.json --context plan-context.json --out section-plan.json",
|
|
4385
|
+
" 5. For each section: AI reads focus files, writes section-N.json",
|
|
4386
|
+
" portki persist-section --plan section-plan.json --section sec-N --input section-N.json",
|
|
4387
|
+
" 6. portki finalize --plan section-plan.json --advance_baseline",
|
|
4388
|
+
" \u2192 writes README.md + section markdown files under devport-output/wiki/{owner}/{repo}/",
|
|
4389
|
+
"",
|
|
4390
|
+
"Incremental update workflow:",
|
|
4391
|
+
" 1. portki detect --repo owner/repo",
|
|
4392
|
+
" \u2192 noop: done. incremental/full-rebuild: continue below",
|
|
4393
|
+
" 2. portki ingest --repo owner/repo --out artifact.json",
|
|
4394
|
+
" 3. AI regenerates (all or only impacted sections) \u2192 accepted-output.json",
|
|
4395
|
+
" 4. portki package --input accepted-output.json --advance_baseline",
|
|
4396
|
+
""
|
|
4397
|
+
].join("\n")
|
|
4398
|
+
);
|
|
4399
|
+
}
|
|
4400
|
+
async function main() {
|
|
4401
|
+
loadEnvFiles();
|
|
4402
|
+
const argv = process.argv.slice(2);
|
|
4403
|
+
const command = argv[0];
|
|
4404
|
+
if (!command || command === "help" || command === "--help" || command === "-h") {
|
|
4405
|
+
printHelp();
|
|
4406
|
+
process.exitCode = command ? 0 : 1;
|
|
4407
|
+
return;
|
|
4408
|
+
}
|
|
4409
|
+
const flags = parseFlags(argv.slice(1));
|
|
4410
|
+
if (command === "ingest") {
|
|
4411
|
+
await ingestCommand(flags);
|
|
4412
|
+
return;
|
|
4413
|
+
}
|
|
4414
|
+
if (command === "detect") {
|
|
4415
|
+
await detectCommand(flags);
|
|
4416
|
+
return;
|
|
4417
|
+
}
|
|
4418
|
+
if (command === "package") {
|
|
4419
|
+
await packageCommand(flags);
|
|
4420
|
+
return;
|
|
4421
|
+
}
|
|
4422
|
+
if (command === "plan-sections") {
|
|
4423
|
+
await planSectionsCommand(flags);
|
|
4424
|
+
return;
|
|
4425
|
+
}
|
|
4426
|
+
if (command === "validate-plan") {
|
|
4427
|
+
await validatePlanCommand(flags);
|
|
4428
|
+
return;
|
|
4429
|
+
}
|
|
4430
|
+
if (command === "persist-section") {
|
|
4431
|
+
await persistSectionCommand(flags);
|
|
4432
|
+
return;
|
|
4433
|
+
}
|
|
4434
|
+
if (command === "finalize") {
|
|
4435
|
+
await finalizeCommand(flags);
|
|
4436
|
+
return;
|
|
4437
|
+
}
|
|
4438
|
+
process.stderr.write(`[devport-agent] unknown command: ${command}
|
|
4439
|
+
`);
|
|
4440
|
+
printHelp();
|
|
4441
|
+
process.exitCode = 1;
|
|
4442
|
+
}
|
|
4443
|
+
function isDirectExecution() {
|
|
4444
|
+
const entry = process.argv[1];
|
|
4445
|
+
if (!entry) {
|
|
4446
|
+
return false;
|
|
4447
|
+
}
|
|
4448
|
+
return import.meta.url === pathToFileURL(entry).href;
|
|
4449
|
+
}
|
|
4450
|
+
if (isDirectExecution()) {
|
|
4451
|
+
main().catch((error) => {
|
|
4452
|
+
process.stderr.write(`
|
|
4453
|
+
[devport-agent] error: ${String(error)}
|
|
4454
|
+
`);
|
|
4455
|
+
process.exitCode = 1;
|
|
4456
|
+
});
|
|
4457
|
+
}
|
|
4458
|
+
|
|
4459
|
+
// src/cli.ts
|
|
4460
|
+
main().catch((error) => {
|
|
4461
|
+
process.stderr.write(`
|
|
4462
|
+
[portki] error: ${String(error)}
|
|
4463
|
+
`);
|
|
4464
|
+
process.exitCode = 1;
|
|
4465
|
+
});
|