sdtk-wiki-kit 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -23
- package/package.json +1 -1
- package/src/commands/enrich.js +51 -0
- package/src/commands/help.js +16 -11
- package/src/commands/lint.js +1 -0
- package/src/commands/operations.js +6 -5
- package/src/commands/search.js +5 -4
- package/src/commands/wiki.js +8 -7
- package/src/index.js +4 -0
- package/src/lib/wiki-compile.js +1201 -68
- package/src/lib/wiki-enrich.js +264 -0
- package/src/lib/wiki-extract.js +685 -9
- package/src/lib/wiki-lint.js +293 -11
- package/src/lib/wiki-paths.js +55 -0
- package/src/lib/wiki-search.js +17 -10
package/src/lib/wiki-extract.js
CHANGED
|
@@ -173,6 +173,39 @@ function collectMarkdownFiles(sourceRoot) {
|
|
|
173
173
|
return { files: files.sort((a, b) => toPosix(a).localeCompare(toPosix(b))), skipped, scanned };
|
|
174
174
|
}
|
|
175
175
|
|
|
176
|
+
function collectJsonFiles(sourceRoot) {
|
|
177
|
+
const files = [];
|
|
178
|
+
let scanned = 0;
|
|
179
|
+
|
|
180
|
+
function visit(current) {
|
|
181
|
+
const stat = fs.statSync(current);
|
|
182
|
+
if (stat.isDirectory()) {
|
|
183
|
+
const excluded = isExcluded(current, sourceRoot);
|
|
184
|
+
if (excluded) return;
|
|
185
|
+
for (const child of fs.readdirSync(current).sort()) {
|
|
186
|
+
visit(path.join(current, child));
|
|
187
|
+
}
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
if (!stat.isFile()) return;
|
|
191
|
+
if (!/\.json$/i.test(current)) return;
|
|
192
|
+
scanned += 1;
|
|
193
|
+
const excluded = isExcluded(current, sourceRoot);
|
|
194
|
+
if (excluded) return;
|
|
195
|
+
files.push(current);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const stat = fs.statSync(sourceRoot);
|
|
199
|
+
if (stat.isFile()) {
|
|
200
|
+
scanned += /\.json$/i.test(sourceRoot) ? 1 : 0;
|
|
201
|
+
if (/\.json$/i.test(sourceRoot)) files.push(sourceRoot);
|
|
202
|
+
} else {
|
|
203
|
+
visit(sourceRoot);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return { files: files.sort((a, b) => toPosix(a).localeCompare(toPosix(b))), scanned };
|
|
207
|
+
}
|
|
208
|
+
|
|
176
209
|
function parseFrontmatterTitle(text) {
|
|
177
210
|
const lines = text.split(/\r?\n/);
|
|
178
211
|
if (!lines.length || lines[0].trim() !== "---") return "";
|
|
@@ -224,6 +257,20 @@ function extractGithubRepos(text) {
|
|
|
224
257
|
return repos;
|
|
225
258
|
}
|
|
226
259
|
|
|
260
|
+
function parseGithubRepoUrl(value) {
|
|
261
|
+
const match = String(value || "").match(/^(?:https?:\/\/)?(?:www\.)?github\.com\/([A-Za-z0-9](?:[A-Za-z0-9-]{0,38}))\/([A-Za-z0-9._-]+)(?:\.git)?(?:[/?#].*)?$/i);
|
|
262
|
+
if (!match) return null;
|
|
263
|
+
const owner = match[1];
|
|
264
|
+
const repo = match[2].replace(/[).,;:]+$/g, "").replace(/\.git$/i, "");
|
|
265
|
+
if (!repo || repo === "..." || repo.includes("...")) return null;
|
|
266
|
+
return {
|
|
267
|
+
owner,
|
|
268
|
+
repo,
|
|
269
|
+
github_url: `https://github.com/${owner}/${repo}`,
|
|
270
|
+
key: `${owner.toLowerCase()}/${repo.toLowerCase()}`,
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
|
|
227
274
|
function extractUnsupportedGithubItems(text) {
|
|
228
275
|
const items = [];
|
|
229
276
|
const invalidRegex = /github\.com\/(?:\.\.\.|[^\s)]+\.{3}[^\s)]*)/gi;
|
|
@@ -234,11 +281,39 @@ function extractUnsupportedGithubItems(text) {
|
|
|
234
281
|
return [...new Set(items)];
|
|
235
282
|
}
|
|
236
283
|
|
|
284
|
+
function normalizeTopic(value) {
|
|
285
|
+
return safeSlug(String(value || "").replace(/_/g, " "), "topic");
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function conceptFromTopic(topic) {
|
|
289
|
+
const name = String(topic || "").trim();
|
|
290
|
+
const slug = normalizeTopic(name);
|
|
291
|
+
if (!slug || slug === "topic") return null;
|
|
292
|
+
return {
|
|
293
|
+
concept_id: `concept_topic_${slug.replace(/-/g, "_")}`,
|
|
294
|
+
name: name.replace(/[_-]+/g, " "),
|
|
295
|
+
aliases: [name],
|
|
296
|
+
definition: `Local structured sources include topic evidence for ${name.replace(/[_-]+/g, " ")}.`,
|
|
297
|
+
related_entities: [],
|
|
298
|
+
source_refs: [],
|
|
299
|
+
provenance_refs: [],
|
|
300
|
+
confidence: 0.6,
|
|
301
|
+
confidence_tier: "medium",
|
|
302
|
+
target_page_path: `wiki/concepts/${slug}.md`,
|
|
303
|
+
};
|
|
304
|
+
}
|
|
305
|
+
|
|
237
306
|
function inferConcepts(text) {
|
|
238
307
|
const lower = text.toLowerCase();
|
|
239
308
|
return CONCEPT_RULES.filter((rule) => rule.keywords.some((keyword) => lower.includes(keyword.toLowerCase())));
|
|
240
309
|
}
|
|
241
310
|
|
|
311
|
+
function inferConceptsFromTopics(topics) {
|
|
312
|
+
return (Array.isArray(topics) ? topics : [])
|
|
313
|
+
.map(conceptFromTopic)
|
|
314
|
+
.filter(Boolean);
|
|
315
|
+
}
|
|
316
|
+
|
|
242
317
|
function categoryForSource(text, concepts) {
|
|
243
318
|
if (concepts.length > 0) return concepts[0].category;
|
|
244
319
|
const lower = text.toLowerCase();
|
|
@@ -246,6 +321,54 @@ function categoryForSource(text, concepts) {
|
|
|
246
321
|
return "uncategorized";
|
|
247
322
|
}
|
|
248
323
|
|
|
324
|
+
function confidenceNumber(value) {
|
|
325
|
+
if (typeof value === "number" && Number.isFinite(value)) return Math.max(0, Math.min(1, value));
|
|
326
|
+
const text = String(value || "").trim().toLowerCase();
|
|
327
|
+
if (text === "high") return 0.85;
|
|
328
|
+
if (text === "medium") return 0.65;
|
|
329
|
+
if (text === "low") return 0.35;
|
|
330
|
+
if (text === "unsupported") return 0.1;
|
|
331
|
+
return 0.5;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
function firstArray(value) {
|
|
335
|
+
if (Array.isArray(value)) return value;
|
|
336
|
+
if (!value || typeof value !== "object") return [];
|
|
337
|
+
for (const key of ["records", "repos", "repositories", "items", "data"]) {
|
|
338
|
+
if (Array.isArray(value[key])) return value[key];
|
|
339
|
+
}
|
|
340
|
+
return [value];
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
function normalizeJsonRepoRecord(raw) {
|
|
344
|
+
if (!raw || typeof raw !== "object" || Array.isArray(raw)) return null;
|
|
345
|
+
const repoUrl = String(raw.repo_url || raw.github_url || raw.url || "").trim();
|
|
346
|
+
const parsedRepo = parseGithubRepoUrl(repoUrl);
|
|
347
|
+
const owner = String(raw.owner || parsedRepo?.owner || "").trim();
|
|
348
|
+
const repoName = String(raw.repo_name || raw.name || raw.repo || parsedRepo?.repo || "").trim();
|
|
349
|
+
const topics = Array.isArray(raw.topics) ? raw.topics.map((topic) => String(topic).trim()).filter(Boolean) : [];
|
|
350
|
+
const snippet = String(raw.message_text_snippet || raw.snippet || raw.description || raw.summary || "").trim();
|
|
351
|
+
const sourceLink = String(raw.source_link || raw.source_url || raw.link || "").trim();
|
|
352
|
+
if (!repoUrl && !owner && !repoName && !snippet && topics.length === 0 && !sourceLink) return null;
|
|
353
|
+
return {
|
|
354
|
+
repo_url: parsedRepo ? parsedRepo.github_url : repoUrl || null,
|
|
355
|
+
owner,
|
|
356
|
+
repo_name: repoName,
|
|
357
|
+
message_text_snippet: snippet,
|
|
358
|
+
source_link: sourceLink || null,
|
|
359
|
+
topics,
|
|
360
|
+
confidence_raw: raw.confidence ?? null,
|
|
361
|
+
confidence: confidenceNumber(raw.confidence),
|
|
362
|
+
parsed_repo: parsedRepo || (owner && repoName ? {
|
|
363
|
+
owner,
|
|
364
|
+
repo: repoName,
|
|
365
|
+
github_url: repoUrl || `https://github.com/${owner}/${repoName}`,
|
|
366
|
+
key: `${owner.toLowerCase()}/${repoName.toLowerCase()}`,
|
|
367
|
+
} : null),
|
|
368
|
+
raw,
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
|
|
249
372
|
function confidenceTier(confidence) {
|
|
250
373
|
if (confidence >= 0.8) return "high";
|
|
251
374
|
if (confidence >= 0.5) return "medium";
|
|
@@ -292,10 +415,67 @@ function lineOf(text, needle) {
|
|
|
292
415
|
return text.slice(0, idx).split(/\r?\n/).length;
|
|
293
416
|
}
|
|
294
417
|
|
|
418
|
+
function boundedText(value, maxLength = 700) {
|
|
419
|
+
const text = String(value || "").replace(/\s+/g, " ").trim();
|
|
420
|
+
if (text.length <= maxLength) return text;
|
|
421
|
+
return `${text.slice(0, maxLength - 1).trim()}...`;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
function asArray(value) {
|
|
425
|
+
return Array.isArray(value) ? value : [];
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
function localRepoSnippet(text, repoUrl) {
|
|
429
|
+
const lines = String(text || "").split(/\r?\n/);
|
|
430
|
+
const index = lines.findIndex((line) => line.includes(repoUrl));
|
|
431
|
+
if (index < 0) return boundedText(text, 500);
|
|
432
|
+
const start = Math.max(0, index - 2);
|
|
433
|
+
const end = Math.min(lines.length, index + 4);
|
|
434
|
+
return boundedText(lines.slice(start, end).join(" "));
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
function pushUnique(array, value, keyFn = (item) => item) {
|
|
438
|
+
if (!value) return;
|
|
439
|
+
const key = keyFn(value);
|
|
440
|
+
if (!array.some((item) => keyFn(item) === key)) array.push(value);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
function sourceRefsForEntity(entity) {
|
|
444
|
+
return [...new Set(asArray(entity && entity.source_refs).filter(Boolean).map(String))];
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
function decisionStrengthsForEntity(entity, conceptName) {
|
|
448
|
+
const strengths = [];
|
|
449
|
+
if (sourceRefsForEntity(entity).length > 1) strengths.push("mentioned by multiple local source records");
|
|
450
|
+
if (asArray(entity.topics).length > 0) strengths.push(`topic fit: ${asArray(entity.topics).slice(0, 3).join(", ")}`);
|
|
451
|
+
if (entity.category && entity.category !== "uncategorized") strengths.push(`category fit: ${entity.category}`);
|
|
452
|
+
if (entity.summary) strengths.push("has local snippet evidence");
|
|
453
|
+
return strengths.length > 0 ? strengths : [`local evidence connects this repository to ${conceptName}`];
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
function decisionCaveatsForEntity(entity) {
|
|
457
|
+
const caveats = ["local evidence only; verify externally before adoption"];
|
|
458
|
+
if (!entity.github_url) caveats.push("missing canonical repository URL");
|
|
459
|
+
if (["low", "unsupported"].includes(entity.confidence_tier)) caveats.push("low extraction confidence");
|
|
460
|
+
if (sourceRefsForEntity(entity).length <= 1) caveats.push("single local source reference");
|
|
461
|
+
return caveats;
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
function confidenceSummary(records) {
|
|
465
|
+
const counts = { high: 0, medium: 0, low: 0, unsupported: 0, unknown: 0 };
|
|
466
|
+
for (const record of asArray(records)) {
|
|
467
|
+
const tier = String(record.confidence_tier || confidenceTier(record.confidence) || "unknown").toLowerCase();
|
|
468
|
+
if (Object.prototype.hasOwnProperty.call(counts, tier)) counts[tier] += 1;
|
|
469
|
+
else counts.unknown += 1;
|
|
470
|
+
}
|
|
471
|
+
return counts;
|
|
472
|
+
}
|
|
473
|
+
|
|
295
474
|
function buildExtraction({ projectPath, sourceRoot }) {
|
|
296
475
|
const generatedAt = new Date().toISOString();
|
|
297
476
|
const sourceRootRef = makeSourceRootRef(sourceRoot);
|
|
298
477
|
const collected = collectMarkdownFiles(sourceRoot);
|
|
478
|
+
const collectedJson = collectJsonFiles(sourceRoot);
|
|
299
479
|
const sources = [];
|
|
300
480
|
const toolEntitiesById = new Map();
|
|
301
481
|
const conceptsById = new Map();
|
|
@@ -367,7 +547,7 @@ function buildExtraction({ projectPath, sourceRoot }) {
|
|
|
367
547
|
notes: qualityNotes,
|
|
368
548
|
},
|
|
369
549
|
provenance_refs: [],
|
|
370
|
-
target_page_path:
|
|
550
|
+
target_page_path: `wiki/sources/${sourceSlug}.md`,
|
|
371
551
|
};
|
|
372
552
|
|
|
373
553
|
sources.push(sourceRecord);
|
|
@@ -435,12 +615,37 @@ function buildExtraction({ projectPath, sourceRoot }) {
|
|
|
435
615
|
confidence_tier: "high",
|
|
436
616
|
source_refs: [],
|
|
437
617
|
provenance_refs: [],
|
|
438
|
-
|
|
618
|
+
topics: concepts.map((concept) => concept.name),
|
|
619
|
+
source_links: [],
|
|
620
|
+
evidence_snippets: [],
|
|
621
|
+
discovery_sources: [],
|
|
622
|
+
evidence_records: [],
|
|
623
|
+
target_page_path: `wiki/entities/tools/${safeSlug(repo.repo, "tool")}--${entityId}.md`,
|
|
439
624
|
});
|
|
440
625
|
}
|
|
441
626
|
const entity = toolEntitiesById.get(entityId);
|
|
442
627
|
if (!entity.source_refs.includes(sourceId)) entity.source_refs.push(sourceId);
|
|
443
628
|
if (!entity.provenance_refs.includes(prov.provenance_id)) entity.provenance_refs.push(prov.provenance_id);
|
|
629
|
+
for (const concept of concepts) {
|
|
630
|
+
if (!entity.topics) entity.topics = [];
|
|
631
|
+
pushUnique(entity.topics, concept.name);
|
|
632
|
+
}
|
|
633
|
+
const snippet = localRepoSnippet(text, repo.github_url);
|
|
634
|
+
if (!entity.evidence_snippets) entity.evidence_snippets = [];
|
|
635
|
+
pushUnique(entity.evidence_snippets, snippet);
|
|
636
|
+
if (!entity.discovery_sources) entity.discovery_sources = [];
|
|
637
|
+
pushUnique(entity.discovery_sources, sourceLogicalPath);
|
|
638
|
+
if (!entity.evidence_records) entity.evidence_records = [];
|
|
639
|
+
pushUnique(entity.evidence_records, {
|
|
640
|
+
source_id: sourceId,
|
|
641
|
+
source_logical_path: sourceLogicalPath,
|
|
642
|
+
source_link: sourceUrl || null,
|
|
643
|
+
snippet,
|
|
644
|
+
topics: concepts.map((concept) => concept.name),
|
|
645
|
+
provenance_refs: [prov.provenance_id],
|
|
646
|
+
confidence: 0.9,
|
|
647
|
+
confidence_tier: "high",
|
|
648
|
+
}, (record) => `${record.source_id}:${record.source_link || ""}:${record.snippet}`);
|
|
444
649
|
|
|
445
650
|
claims.push({
|
|
446
651
|
claim_id: `claim_${sourceId}_${String(claims.length + 1).padStart(3, "0")}`,
|
|
@@ -478,7 +683,7 @@ function buildExtraction({ projectPath, sourceRoot }) {
|
|
|
478
683
|
provenance_refs: [],
|
|
479
684
|
confidence: 0.65,
|
|
480
685
|
confidence_tier: "medium",
|
|
481
|
-
target_page_path:
|
|
686
|
+
target_page_path: `wiki/concepts/${safeSlug(conceptRule.name, "concept")}.md`,
|
|
482
687
|
});
|
|
483
688
|
}
|
|
484
689
|
const concept = conceptsById.get(conceptRule.concept_id);
|
|
@@ -502,6 +707,374 @@ function buildExtraction({ projectPath, sourceRoot }) {
|
|
|
502
707
|
}
|
|
503
708
|
}
|
|
504
709
|
|
|
710
|
+
for (const filePath of collectedJson.files) {
|
|
711
|
+
const sourceRelativePath = toPosix(path.relative(sourceRoot, filePath));
|
|
712
|
+
const sourceDisplayPath = toPosix(filePath);
|
|
713
|
+
const sourceLogicalFilePath = `${sourceRootRef.source_root_label}/${sourceRelativePath}`;
|
|
714
|
+
const fileBytes = fs.readFileSync(filePath);
|
|
715
|
+
const fileHash = sha256(fileBytes);
|
|
716
|
+
const stats = fs.statSync(filePath);
|
|
717
|
+
let parsed;
|
|
718
|
+
|
|
719
|
+
try {
|
|
720
|
+
parsed = JSON.parse(fileBytes.toString("utf-8"));
|
|
721
|
+
} catch (error) {
|
|
722
|
+
const sourceId = `src_${sha256(`local-json-file:v1:${sourceRootRef.source_root_id}:${sourceRelativePath.toLowerCase()}`).slice(0, 16)}`;
|
|
723
|
+
const sourceRecord = {
|
|
724
|
+
source_id: sourceId,
|
|
725
|
+
source_root_id: sourceRootRef.source_root_id,
|
|
726
|
+
source_relative_path: sourceRelativePath,
|
|
727
|
+
source_logical_path: sourceLogicalFilePath,
|
|
728
|
+
source_display_path: sourceDisplayPath,
|
|
729
|
+
source_type: "json",
|
|
730
|
+
title: path.basename(filePath),
|
|
731
|
+
source_url: null,
|
|
732
|
+
source_hash: fileHash,
|
|
733
|
+
size_bytes: stats.size,
|
|
734
|
+
modified_time: stats.mtime.toISOString(),
|
|
735
|
+
encoding_quality: "unknown",
|
|
736
|
+
source_quality: {
|
|
737
|
+
has_mojibake: false,
|
|
738
|
+
mojibake_score: 0,
|
|
739
|
+
has_source_url: false,
|
|
740
|
+
weak_title: false,
|
|
741
|
+
duplicate_candidate: false,
|
|
742
|
+
duplicate_group_id: null,
|
|
743
|
+
low_confidence_extraction: true,
|
|
744
|
+
quality_flags: ["invalid_json"],
|
|
745
|
+
notes: [`Invalid JSON could not be parsed: ${error.message}`],
|
|
746
|
+
},
|
|
747
|
+
provenance_refs: [],
|
|
748
|
+
target_page_path: `wiki/sources/${safeSlug(path.basename(filePath), "json-source")}--${sourceId.slice(0, 8)}.md`,
|
|
749
|
+
};
|
|
750
|
+
sources.push(sourceRecord);
|
|
751
|
+
sourceQualityFindings.push({
|
|
752
|
+
finding_id: `sq_${sourceId}`,
|
|
753
|
+
source_id: sourceId,
|
|
754
|
+
source_relative_path: sourceRelativePath,
|
|
755
|
+
source_logical_path: sourceLogicalFilePath,
|
|
756
|
+
quality_flags: ["invalid_json"],
|
|
757
|
+
confidence: 0.1,
|
|
758
|
+
confidence_tier: "unsupported",
|
|
759
|
+
notes: sourceRecord.source_quality.notes,
|
|
760
|
+
});
|
|
761
|
+
unsupportedItems.push({
|
|
762
|
+
record_type: "unsupported_item",
|
|
763
|
+
item_id: `unsupported_${sourceId}_001`,
|
|
764
|
+
source_id: sourceId,
|
|
765
|
+
reason: "invalid_json",
|
|
766
|
+
raw_observation_summary: `Invalid JSON file: ${sourceRelativePath}`,
|
|
767
|
+
confidence: 0.1,
|
|
768
|
+
confidence_tier: "unsupported",
|
|
769
|
+
provenance_refs: [],
|
|
770
|
+
});
|
|
771
|
+
continue;
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
const rawRecords = firstArray(parsed);
|
|
775
|
+
const normalizedRecords = rawRecords.map(normalizeJsonRepoRecord).filter(Boolean);
|
|
776
|
+
if (normalizedRecords.length === 0) {
|
|
777
|
+
const sourceId = `src_${sha256(`local-json-empty:v1:${sourceRootRef.source_root_id}:${sourceRelativePath.toLowerCase()}`).slice(0, 16)}`;
|
|
778
|
+
const sourceRecord = {
|
|
779
|
+
source_id: sourceId,
|
|
780
|
+
source_root_id: sourceRootRef.source_root_id,
|
|
781
|
+
source_relative_path: sourceRelativePath,
|
|
782
|
+
source_logical_path: sourceLogicalFilePath,
|
|
783
|
+
source_display_path: sourceDisplayPath,
|
|
784
|
+
source_type: "json",
|
|
785
|
+
title: path.basename(filePath),
|
|
786
|
+
source_url: null,
|
|
787
|
+
source_hash: fileHash,
|
|
788
|
+
size_bytes: stats.size,
|
|
789
|
+
modified_time: stats.mtime.toISOString(),
|
|
790
|
+
encoding_quality: "clean",
|
|
791
|
+
source_quality: {
|
|
792
|
+
has_mojibake: false,
|
|
793
|
+
mojibake_score: 0,
|
|
794
|
+
has_source_url: false,
|
|
795
|
+
weak_title: false,
|
|
796
|
+
duplicate_candidate: false,
|
|
797
|
+
duplicate_group_id: null,
|
|
798
|
+
low_confidence_extraction: true,
|
|
799
|
+
quality_flags: ["empty_json_records"],
|
|
800
|
+
notes: ["JSON parsed successfully but contained no supported repository records."],
|
|
801
|
+
},
|
|
802
|
+
provenance_refs: [],
|
|
803
|
+
target_page_path: `wiki/sources/${safeSlug(path.basename(filePath), "json-source")}--${sourceId.slice(0, 8)}.md`,
|
|
804
|
+
};
|
|
805
|
+
sources.push(sourceRecord);
|
|
806
|
+
sourceQualityFindings.push({
|
|
807
|
+
finding_id: `sq_${sourceId}`,
|
|
808
|
+
source_id: sourceId,
|
|
809
|
+
source_relative_path: sourceRelativePath,
|
|
810
|
+
source_logical_path: sourceLogicalFilePath,
|
|
811
|
+
quality_flags: ["empty_json_records"],
|
|
812
|
+
confidence: 0.2,
|
|
813
|
+
confidence_tier: "unsupported",
|
|
814
|
+
notes: sourceRecord.source_quality.notes,
|
|
815
|
+
});
|
|
816
|
+
continue;
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
normalizedRecords.forEach((record, index) => {
|
|
820
|
+
const recordRef = `record-${String(index + 1).padStart(3, "0")}`;
|
|
821
|
+
const repoKey = record.repo_url || `${record.owner}/${record.repo_name}` || sha256(JSON.stringify(record.raw));
|
|
822
|
+
const sourceId = `src_${sha256(`local-json:v1:${sourceRootRef.source_root_id}:${sourceRelativePath.toLowerCase()}:${repoKey.toLowerCase()}`).slice(0, 16)}`;
|
|
823
|
+
const sourceHash = sha256(`${fileHash}:${sha256(JSON.stringify(record.raw))}`);
|
|
824
|
+
const title = record.repo_name || record.owner || `${path.basename(filePath)} ${recordRef}`;
|
|
825
|
+
const sourceLogicalPath = `${sourceLogicalFilePath}#${recordRef}`;
|
|
826
|
+
const sourceDisplayRecordPath = `${sourceDisplayPath}#${recordRef}`;
|
|
827
|
+
const mojibake = detectMojibake(record.message_text_snippet);
|
|
828
|
+
const weakTitle = title.length < 3;
|
|
829
|
+
const qualityFlags = [];
|
|
830
|
+
const qualityNotes = [];
|
|
831
|
+
const sourceUrl = record.repo_url || record.source_link || null;
|
|
832
|
+
const confidence = record.confidence;
|
|
833
|
+
const confidenceBand = confidenceTier(confidence);
|
|
834
|
+
|
|
835
|
+
if (mojibake.hasMojibake) {
|
|
836
|
+
qualityFlags.push("mojibake_detected");
|
|
837
|
+
qualityNotes.push("Potential mojibake or replacement characters detected in JSON snippet.");
|
|
838
|
+
}
|
|
839
|
+
if (!record.parsed_repo) {
|
|
840
|
+
qualityFlags.push("missing_repo_url");
|
|
841
|
+
qualityNotes.push("JSON record does not include a supported GitHub repository URL.");
|
|
842
|
+
}
|
|
843
|
+
if (!record.source_link) {
|
|
844
|
+
qualityFlags.push("missing_source_link");
|
|
845
|
+
qualityNotes.push("JSON record does not include a source_link.");
|
|
846
|
+
}
|
|
847
|
+
if (weakTitle) {
|
|
848
|
+
qualityFlags.push("weak_title");
|
|
849
|
+
qualityNotes.push("Repository title is missing or very short.");
|
|
850
|
+
}
|
|
851
|
+
if (confidence < 0.5) {
|
|
852
|
+
qualityFlags.push("low_confidence_extraction");
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
const sourceRecord = {
|
|
856
|
+
source_id: sourceId,
|
|
857
|
+
source_root_id: sourceRootRef.source_root_id,
|
|
858
|
+
source_relative_path: sourceRelativePath,
|
|
859
|
+
source_logical_path: sourceLogicalPath,
|
|
860
|
+
source_display_path: sourceDisplayRecordPath,
|
|
861
|
+
source_type: "json_record",
|
|
862
|
+
title,
|
|
863
|
+
source_url: sourceUrl,
|
|
864
|
+
source_hash: sourceHash,
|
|
865
|
+
size_bytes: stats.size,
|
|
866
|
+
modified_time: stats.mtime.toISOString(),
|
|
867
|
+
encoding_quality: mojibake.hasMojibake ? "suspect" : "clean",
|
|
868
|
+
source_record_locator: {
|
|
869
|
+
type: "json_record",
|
|
870
|
+
record_index: index,
|
|
871
|
+
record_ref: recordRef,
|
|
872
|
+
record_pointer: `/${index}`,
|
|
873
|
+
},
|
|
874
|
+
structured_fields: {
|
|
875
|
+
repo_url: record.repo_url,
|
|
876
|
+
owner: record.owner,
|
|
877
|
+
repo_name: record.repo_name,
|
|
878
|
+
message_text_snippet: record.message_text_snippet,
|
|
879
|
+
source_link: record.source_link,
|
|
880
|
+
topics: record.topics,
|
|
881
|
+
confidence: record.confidence_raw,
|
|
882
|
+
},
|
|
883
|
+
source_quality: {
|
|
884
|
+
has_mojibake: mojibake.hasMojibake,
|
|
885
|
+
mojibake_score: Number(mojibake.score.toFixed(3)),
|
|
886
|
+
has_source_url: Boolean(sourceUrl),
|
|
887
|
+
weak_title: weakTitle,
|
|
888
|
+
duplicate_candidate: false,
|
|
889
|
+
duplicate_group_id: null,
|
|
890
|
+
low_confidence_extraction: confidence < 0.5,
|
|
891
|
+
quality_flags: qualityFlags,
|
|
892
|
+
notes: qualityNotes,
|
|
893
|
+
},
|
|
894
|
+
provenance_refs: [],
|
|
895
|
+
target_page_path: `wiki/sources/${safeSlug(title || sourceRelativePath, "source")}--${sourceId.slice(0, 8)}.md`,
|
|
896
|
+
};
|
|
897
|
+
|
|
898
|
+
sources.push(sourceRecord);
|
|
899
|
+
if (record.repo_url) {
|
|
900
|
+
const existing = sourceUrlUsage.get(record.repo_url) || [];
|
|
901
|
+
existing.push(sourceRecord);
|
|
902
|
+
sourceUrlUsage.set(record.repo_url, existing);
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
if (qualityFlags.length > 0) {
|
|
906
|
+
sourceQualityFindings.push({
|
|
907
|
+
finding_id: `sq_${sourceId}`,
|
|
908
|
+
source_id: sourceId,
|
|
909
|
+
source_relative_path: sourceRelativePath,
|
|
910
|
+
source_logical_path: sourceLogicalPath,
|
|
911
|
+
quality_flags: [...qualityFlags],
|
|
912
|
+
confidence,
|
|
913
|
+
confidence_tier: confidenceBand,
|
|
914
|
+
notes: qualityNotes,
|
|
915
|
+
});
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
const concepts = [
|
|
919
|
+
...inferConcepts(`${title}\n${record.message_text_snippet}\n${record.topics.join("\n")}`),
|
|
920
|
+
...inferConceptsFromTopics(record.topics),
|
|
921
|
+
];
|
|
922
|
+
const category = record.topics[0] ? normalizeTopic(record.topics[0]) : categoryForSource(record.message_text_snippet, concepts);
|
|
923
|
+
|
|
924
|
+
if (!record.parsed_repo) {
|
|
925
|
+
unsupportedItems.push({
|
|
926
|
+
record_type: "unsupported_item",
|
|
927
|
+
item_id: `unsupported_${sourceId}_001`,
|
|
928
|
+
source_id: sourceId,
|
|
929
|
+
reason: "missing_repo_url",
|
|
930
|
+
raw_observation_summary: `${sourceRelativePath}#${recordRef}`,
|
|
931
|
+
confidence: 0.2,
|
|
932
|
+
confidence_tier: "unsupported",
|
|
933
|
+
provenance_refs: [],
|
|
934
|
+
});
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
if (record.parsed_repo) {
|
|
938
|
+
const repo = record.parsed_repo;
|
|
939
|
+
const entityId = `tool_github_${safeSlug(repo.owner, "owner")}_${safeSlug(repo.repo, "repo")}`;
|
|
940
|
+
const prov = {
|
|
941
|
+
provenance_id: `prov_${sourceId}_${recordRef}`,
|
|
942
|
+
source_id: sourceId,
|
|
943
|
+
source_hash: sourceHash,
|
|
944
|
+
source_relative_path: sourceRelativePath,
|
|
945
|
+
source_logical_path: sourceLogicalPath,
|
|
946
|
+
locator: {
|
|
947
|
+
type: "json_record",
|
|
948
|
+
record_index: index,
|
|
949
|
+
record_ref: recordRef,
|
|
950
|
+
record_pointer: `/${index}`,
|
|
951
|
+
field: "repo_url",
|
|
952
|
+
},
|
|
953
|
+
evidence_quote_hash: sha256(`${sourceId}:${recordRef}:${repo.github_url}`),
|
|
954
|
+
extractor: "sdtk-wiki.semantic-extract",
|
|
955
|
+
extractor_version: "bk140-json-records",
|
|
956
|
+
generated_at: generatedAt,
|
|
957
|
+
confidence,
|
|
958
|
+
};
|
|
959
|
+
provenance.push(prov);
|
|
960
|
+
sourceRecord.provenance_refs.push(prov.provenance_id);
|
|
961
|
+
|
|
962
|
+
if (!toolEntitiesById.has(entityId)) {
|
|
963
|
+
toolEntitiesById.set(entityId, {
|
|
964
|
+
entity_id: entityId,
|
|
965
|
+
entity_type: "tool_entity",
|
|
966
|
+
name: repo.repo,
|
|
967
|
+
repo_owner: repo.owner,
|
|
968
|
+
repo_name: repo.repo,
|
|
969
|
+
github_url: repo.github_url,
|
|
970
|
+
category,
|
|
971
|
+
summary: record.message_text_snippet || `${repo.repo} is a locally sourced GitHub tool candidate in category ${category}.`,
|
|
972
|
+
confidence,
|
|
973
|
+
confidence_tier: confidenceBand,
|
|
974
|
+
source_refs: [],
|
|
975
|
+
provenance_refs: [],
|
|
976
|
+
topics: [...record.topics],
|
|
977
|
+
source_links: record.source_link ? [record.source_link] : [],
|
|
978
|
+
evidence_snippets: record.message_text_snippet ? [boundedText(record.message_text_snippet)] : [],
|
|
979
|
+
discovery_sources: [record.source_link, sourceLogicalPath].filter(Boolean),
|
|
980
|
+
evidence_records: [{
|
|
981
|
+
source_id: sourceId,
|
|
982
|
+
source_logical_path: sourceLogicalPath,
|
|
983
|
+
source_link: record.source_link || null,
|
|
984
|
+
snippet: boundedText(record.message_text_snippet),
|
|
985
|
+
topics: [...record.topics],
|
|
986
|
+
provenance_refs: [prov.provenance_id],
|
|
987
|
+
confidence,
|
|
988
|
+
confidence_tier: confidenceBand,
|
|
989
|
+
}],
|
|
990
|
+
target_page_path: `wiki/entities/tools/${safeSlug(repo.repo, "tool")}--${entityId}.md`,
|
|
991
|
+
});
|
|
992
|
+
}
|
|
993
|
+
const entity = toolEntitiesById.get(entityId);
|
|
994
|
+
if (!entity.source_refs.includes(sourceId)) entity.source_refs.push(sourceId);
|
|
995
|
+
if (!entity.provenance_refs.includes(prov.provenance_id)) entity.provenance_refs.push(prov.provenance_id);
|
|
996
|
+
for (const topic of record.topics) {
|
|
997
|
+
if (!entity.topics) entity.topics = [];
|
|
998
|
+
if (!entity.topics.includes(topic)) entity.topics.push(topic);
|
|
999
|
+
}
|
|
1000
|
+
if (record.source_link) {
|
|
1001
|
+
if (!entity.source_links) entity.source_links = [];
|
|
1002
|
+
if (!entity.source_links.includes(record.source_link)) entity.source_links.push(record.source_link);
|
|
1003
|
+
}
|
|
1004
|
+
if (record.message_text_snippet) {
|
|
1005
|
+
if (!entity.evidence_snippets) entity.evidence_snippets = [];
|
|
1006
|
+
pushUnique(entity.evidence_snippets, boundedText(record.message_text_snippet));
|
|
1007
|
+
}
|
|
1008
|
+
if (!entity.discovery_sources) entity.discovery_sources = [];
|
|
1009
|
+
pushUnique(entity.discovery_sources, record.source_link);
|
|
1010
|
+
pushUnique(entity.discovery_sources, sourceLogicalPath);
|
|
1011
|
+
if (!entity.evidence_records) entity.evidence_records = [];
|
|
1012
|
+
pushUnique(entity.evidence_records, {
|
|
1013
|
+
source_id: sourceId,
|
|
1014
|
+
source_logical_path: sourceLogicalPath,
|
|
1015
|
+
source_link: record.source_link || null,
|
|
1016
|
+
snippet: boundedText(record.message_text_snippet),
|
|
1017
|
+
topics: [...record.topics],
|
|
1018
|
+
provenance_refs: [prov.provenance_id],
|
|
1019
|
+
confidence,
|
|
1020
|
+
confidence_tier: confidenceBand,
|
|
1021
|
+
}, (item) => `${item.source_id}:${item.source_link || ""}:${item.snippet || ""}`);
|
|
1022
|
+
|
|
1023
|
+
claims.push({
|
|
1024
|
+
claim_id: `claim_${sourceId}_${String(claims.length + 1).padStart(3, "0")}`,
|
|
1025
|
+
text: `The local JSON record presents ${repo.repo} as a ${category} repository candidate.`,
|
|
1026
|
+
subject_entity_id: entityId,
|
|
1027
|
+
source_refs: [sourceId],
|
|
1028
|
+
provenance_refs: [prov.provenance_id],
|
|
1029
|
+
confidence,
|
|
1030
|
+
confidence_tier: confidenceBand,
|
|
1031
|
+
contested: false,
|
|
1032
|
+
});
|
|
1033
|
+
|
|
1034
|
+
relations.push({
|
|
1035
|
+
relation_id: `rel_${sourceId}_${String(relations.length + 1).padStart(3, "0")}`,
|
|
1036
|
+
source_id: sourceId,
|
|
1037
|
+
target_id: entityId,
|
|
1038
|
+
relation_type: "source_mentions_entity",
|
|
1039
|
+
evidence: "The local JSON record includes a GitHub repository URL.",
|
|
1040
|
+
source_refs: [sourceId],
|
|
1041
|
+
provenance_refs: [prov.provenance_id],
|
|
1042
|
+
confidence,
|
|
1043
|
+
confidence_tier: confidenceBand,
|
|
1044
|
+
});
|
|
1045
|
+
|
|
1046
|
+
for (const conceptRule of concepts) {
|
|
1047
|
+
if (!conceptsById.has(conceptRule.concept_id)) {
|
|
1048
|
+
conceptsById.set(conceptRule.concept_id, {
|
|
1049
|
+
...conceptRule,
|
|
1050
|
+
related_entities: conceptRule.related_entities || [],
|
|
1051
|
+
source_refs: conceptRule.source_refs || [],
|
|
1052
|
+
provenance_refs: conceptRule.provenance_refs || [],
|
|
1053
|
+
confidence: conceptRule.confidence || 0.6,
|
|
1054
|
+
confidence_tier: conceptRule.confidence_tier || "medium",
|
|
1055
|
+
target_page_path: conceptRule.target_page_path || `wiki/concepts/${safeSlug(conceptRule.name, "concept")}.md`,
|
|
1056
|
+
});
|
|
1057
|
+
}
|
|
1058
|
+
const concept = conceptsById.get(conceptRule.concept_id);
|
|
1059
|
+
if (!concept.source_refs.includes(sourceId)) concept.source_refs.push(sourceId);
|
|
1060
|
+
if (!concept.provenance_refs.includes(prov.provenance_id)) concept.provenance_refs.push(prov.provenance_id);
|
|
1061
|
+
if (!concept.related_entities.includes(entityId)) concept.related_entities.push(entityId);
|
|
1062
|
+
relations.push({
|
|
1063
|
+
relation_id: `rel_${sourceId}_${String(relations.length + 1).padStart(3, "0")}`,
|
|
1064
|
+
source_id: entityId,
|
|
1065
|
+
target_id: conceptRule.concept_id,
|
|
1066
|
+
relation_type: "entity_implements_concept",
|
|
1067
|
+
evidence: "The local JSON record includes matching topics or semantic keywords.",
|
|
1068
|
+
source_refs: [sourceId],
|
|
1069
|
+
provenance_refs: [prov.provenance_id],
|
|
1070
|
+
confidence: Math.min(0.8, confidence + 0.05),
|
|
1071
|
+
confidence_tier: confidenceTier(Math.min(0.8, confidence + 0.05)),
|
|
1072
|
+
});
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
});
|
|
1076
|
+
}
|
|
1077
|
+
|
|
505
1078
|
for (const [sourceUrl, sourceRecords] of sourceUrlUsage.entries()) {
|
|
506
1079
|
if (sourceRecords.length < 2) continue;
|
|
507
1080
|
const duplicateGroupId = `dup_${sha256(sourceUrl).slice(0, 12)}`;
|
|
@@ -525,6 +1098,67 @@ function buildExtraction({ projectPath, sourceRoot }) {
|
|
|
525
1098
|
}
|
|
526
1099
|
}
|
|
527
1100
|
|
|
1101
|
+
const toolEntityValues = [...toolEntitiesById.values()];
|
|
1102
|
+
for (const entity of toolEntityValues) {
|
|
1103
|
+
const entityTopics = new Set(asArray(entity.topics).map((topic) => String(topic).toLowerCase()));
|
|
1104
|
+
const related = [];
|
|
1105
|
+
for (const candidate of toolEntityValues) {
|
|
1106
|
+
if (candidate.entity_id === entity.entity_id) continue;
|
|
1107
|
+
const sharedTopics = asArray(candidate.topics).filter((topic) => entityTopics.has(String(topic).toLowerCase()));
|
|
1108
|
+
const sameCategory = entity.category && candidate.category && entity.category === candidate.category;
|
|
1109
|
+
if (sharedTopics.length === 0 && !sameCategory) continue;
|
|
1110
|
+
related.push({
|
|
1111
|
+
entity_id: candidate.entity_id,
|
|
1112
|
+
name: candidate.name || candidate.repo_name || candidate.entity_id,
|
|
1113
|
+
github_url: candidate.github_url || null,
|
|
1114
|
+
shared_topics: sharedTopics,
|
|
1115
|
+
relation_hint: sharedTopics.length > 0 ? "shared_topic" : "same_category",
|
|
1116
|
+
});
|
|
1117
|
+
}
|
|
1118
|
+
entity.related_repos = related
|
|
1119
|
+
.sort((a, b) => b.shared_topics.length - a.shared_topics.length || a.entity_id.localeCompare(b.entity_id))
|
|
1120
|
+
.slice(0, 8);
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
for (const concept of conceptsById.values()) {
|
|
1124
|
+
const relatedDetails = [];
|
|
1125
|
+
const axisCounts = new Map();
|
|
1126
|
+
for (const entityId of asArray(concept.related_entities)) {
|
|
1127
|
+
const entity = toolEntitiesById.get(entityId);
|
|
1128
|
+
if (!entity) continue;
|
|
1129
|
+
for (const topic of asArray(entity.topics)) {
|
|
1130
|
+
const key = String(topic || "").trim();
|
|
1131
|
+
if (key) axisCounts.set(key, (axisCounts.get(key) || 0) + 1);
|
|
1132
|
+
}
|
|
1133
|
+
if (entity.category) axisCounts.set(entity.category, (axisCounts.get(entity.category) || 0) + 1);
|
|
1134
|
+
relatedDetails.push({
|
|
1135
|
+
entity_id: entity.entity_id,
|
|
1136
|
+
name: entity.name || entity.repo_name || entity.entity_id,
|
|
1137
|
+
repo_owner: entity.repo_owner || null,
|
|
1138
|
+
repo_name: entity.repo_name || null,
|
|
1139
|
+
github_url: entity.github_url || null,
|
|
1140
|
+
category: entity.category || "uncategorized",
|
|
1141
|
+
topics: asArray(entity.topics).slice(0, 8),
|
|
1142
|
+
summary: entity.summary || "",
|
|
1143
|
+
target_page_path: entity.target_page_path || null,
|
|
1144
|
+
source_refs: sourceRefsForEntity(entity),
|
|
1145
|
+
confidence: entity.confidence,
|
|
1146
|
+
confidence_tier: entity.confidence_tier || confidenceTier(entity.confidence),
|
|
1147
|
+
});
|
|
1148
|
+
}
|
|
1149
|
+
concept.related_entity_details = relatedDetails
|
|
1150
|
+
.sort((a, b) => (b.source_refs.length - a.source_refs.length) || a.name.localeCompare(b.name))
|
|
1151
|
+
.slice(0, 12);
|
|
1152
|
+
concept.key_axes = [...axisCounts.entries()]
|
|
1153
|
+
.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
|
|
1154
|
+
.slice(0, 8)
|
|
1155
|
+
.map(([name, count]) => ({ name, evidence_count: count }));
|
|
1156
|
+
concept.patterns = concept.key_axes.slice(0, 5).map((axis) => ({
|
|
1157
|
+
pattern: axis.name,
|
|
1158
|
+
evidence: `${axis.evidence_count} locally extracted tool candidate(s) are associated with this axis.`,
|
|
1159
|
+
}));
|
|
1160
|
+
}
|
|
1161
|
+
|
|
528
1162
|
const toolEntities = [...toolEntitiesById.values()].sort((a, b) => a.entity_id.localeCompare(b.entity_id));
|
|
529
1163
|
const concepts = [...conceptsById.values()].sort((a, b) => a.concept_id.localeCompare(b.concept_id));
|
|
530
1164
|
const comparisons = [];
|
|
@@ -533,27 +1167,69 @@ function buildExtraction({ projectPath, sourceRoot }) {
|
|
|
533
1167
|
for (const concept of concepts) {
|
|
534
1168
|
if (concept.related_entities.length < 2) continue;
|
|
535
1169
|
const topicSlug = safeSlug(concept.name, "topic");
|
|
1170
|
+
const comparedDetails = asArray(concept.related_entity_details).slice(0, 8);
|
|
1171
|
+
const decisionAxes = asArray(concept.key_axes).slice(0, 6);
|
|
1172
|
+
const matrixRows = comparedDetails.map((entity) => ({
|
|
1173
|
+
entity_id: entity.entity_id,
|
|
1174
|
+
name: entity.repo_owner && entity.repo_name ? `${entity.repo_owner}/${entity.repo_name}` : entity.name,
|
|
1175
|
+
github_url: entity.github_url || null,
|
|
1176
|
+
category: entity.category || "uncategorized",
|
|
1177
|
+
topics: asArray(entity.topics).slice(0, 6),
|
|
1178
|
+
strengths: decisionStrengthsForEntity(entity, concept.name),
|
|
1179
|
+
caveats: decisionCaveatsForEntity(entity),
|
|
1180
|
+
source_ref_count: sourceRefsForEntity(entity).length,
|
|
1181
|
+
source_confidence: entity.confidence_tier || confidenceTier(entity.confidence),
|
|
1182
|
+
local_recommendation: sourceRefsForEntity(entity).length > 1
|
|
1183
|
+
? "shortlist for human review"
|
|
1184
|
+
: "keep as a candidate until more local evidence is available",
|
|
1185
|
+
}));
|
|
1186
|
+
const summary = `Local sources mention ${concept.related_entities.length} tool candidate(s) related to ${concept.name}. This page compares candidates for review, not verified ranking.`;
|
|
1187
|
+
const recommendations = matrixRows.slice(0, 3).map((row) => `${row.name}: ${row.local_recommendation}; strengths: ${row.strengths.slice(0, 2).join("; ")}.`);
|
|
536
1188
|
comparisons.push({
|
|
537
1189
|
comparison_id: `comparison_${topicSlug}_${sha256(concept.related_entities.join("|")).slice(0, 8)}`,
|
|
538
1190
|
topic: concept.name,
|
|
539
1191
|
compared_entities: concept.related_entities.slice(0, 8),
|
|
540
|
-
|
|
1192
|
+
compared_entity_details: comparedDetails,
|
|
1193
|
+
decision_axes: decisionAxes,
|
|
1194
|
+
matrix_rows: matrixRows,
|
|
1195
|
+
criteria: ["local evidence", "category/topic fit", "source confidence", "review caveats"],
|
|
1196
|
+
summary,
|
|
1197
|
+
recommendations,
|
|
1198
|
+
caveats: [
|
|
1199
|
+
"The comparison uses local source evidence only.",
|
|
1200
|
+
"Do not treat ordering as an external quality ranking.",
|
|
1201
|
+
"Verify license, maintenance, security, and ecosystem fit before adoption.",
|
|
1202
|
+
],
|
|
1203
|
+
source_confidence_summary: confidenceSummary(matrixRows),
|
|
541
1204
|
source_refs: concept.source_refs,
|
|
542
1205
|
provenance_refs: concept.provenance_refs,
|
|
543
1206
|
confidence: 0.55,
|
|
544
1207
|
confidence_tier: "medium",
|
|
545
|
-
target_page_path:
|
|
1208
|
+
target_page_path: `wiki/comparisons/${topicSlug}.md`,
|
|
546
1209
|
});
|
|
547
1210
|
syntheses.push({
|
|
548
1211
|
synthesis_id: `synthesis_${topicSlug}_${sha256(concept.source_refs.join("|")).slice(0, 8)}`,
|
|
549
1212
|
topic: concept.name,
|
|
550
|
-
summary
|
|
551
|
-
|
|
1213
|
+
summary,
|
|
1214
|
+
landscape_axes: decisionAxes,
|
|
1215
|
+
candidate_tools: matrixRows,
|
|
1216
|
+
patterns: asArray(concept.patterns),
|
|
1217
|
+
related_comparison_path: `wiki/comparisons/${topicSlug}.md`,
|
|
1218
|
+
source_confidence_summary: confidenceSummary(matrixRows),
|
|
1219
|
+
recommendations: [
|
|
1220
|
+
...recommendations,
|
|
1221
|
+
"Use this synthesis to select review candidates; defer adoption until external verification is complete.",
|
|
1222
|
+
],
|
|
1223
|
+
caveats: [
|
|
1224
|
+
"Local extraction can include stale or incomplete source snippets.",
|
|
1225
|
+
"No web verification, GitHub API data, stars, licenses, or release cadence are claimed here.",
|
|
1226
|
+
"Human review should resolve topic fit and source-quality warnings before product decisions.",
|
|
1227
|
+
],
|
|
552
1228
|
source_refs: concept.source_refs,
|
|
553
1229
|
provenance_refs: concept.provenance_refs,
|
|
554
1230
|
confidence: 0.55,
|
|
555
1231
|
confidence_tier: "medium",
|
|
556
|
-
target_page_path:
|
|
1232
|
+
target_page_path: `wiki/syntheses/${topicSlug}.md`,
|
|
557
1233
|
});
|
|
558
1234
|
}
|
|
559
1235
|
|
|
@@ -566,7 +1242,7 @@ function buildExtraction({ projectPath, sourceRoot }) {
|
|
|
566
1242
|
project_path: projectPath,
|
|
567
1243
|
source_root_refs: [sourceRootRef],
|
|
568
1244
|
source_counts: {
|
|
569
|
-
scanned: collected.scanned,
|
|
1245
|
+
scanned: collected.scanned + collectedJson.scanned,
|
|
570
1246
|
indexed: sources.length,
|
|
571
1247
|
extracted: toolEntities.length,
|
|
572
1248
|
skipped: collected.skipped.length,
|