npm - sneakoscope - Versions diffs - 2.0.14 → 2.0.15 - Mend

sneakoscope 2.0.14 → 2.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (480) hide show

package/dist/core/research/implementation-blueprint.js CHANGED Viewed

@@ -13,20 +13,50 @@ const DEFAULT_SECTION_IDS = Object.freeze([
 ]);
 export function defaultImplementationBlueprint(plan = null) {
     const prompt = String(plan?.prompt || 'research mission');
+    const existingFiles = [
+        'src/core/research/research-stage-runner.ts',
+        'src/core/research/research-report-quality.ts',
+        'src/core/research/research-final-reviewer.ts',
+        'package.json',
+        'release-gates.v2.json',
+        'docs/research-pipeline.md'
+    ];
     return {
         schema: 'sks.research-implementation-blueprint.v1',
         generated_at: nowIso(),
         prompt,
         implementation_allowed_in_research: false,
-        handoff_route: '$Team',
+        handoff_route: '$Naruto',
+        repository_aware: true,
+        existing_files: existingFiles,
+        possible_new_files: [
+            'src/core/research/research-synthesis-writer.ts',
+            'src/core/research/research-repetition-detector.ts',
+            'src/scripts/research-handoff-consumability-check.ts'
+        ],
+        test_commands: [
+            'npm run research:implementation-blueprint',
+            'npm run research:blueprint-densifier',
+            'npm run research:handoff-consumability'
+        ],
+        rollback_steps: [
+            'Revert the research blueprint and handoff changes as one bounded patch.',
+            'Rerun the blueprint, handoff, and release DAG checks after rollback.'
+        ],
+        parallel_work_decomposition: [
+            'WS-A synthesis writer and schema wiring.',
+            'WS-B report quality and repetition checks.',
+            'WS-C final reviewer and gate validation.',
+            'WS-D CLI, release, and documentation closure.'
+        ],
         sections: DEFAULT_SECTION_IDS.map((id, index) => ({
             id,
             title: id.split('_').map((part) => part[0]?.toUpperCase() + part.slice(1)).join(' '),
             order: index + 1,
-            detail: `Research handoff detail for ${id} on: ${prompt}`,
-            evidence_claim_ids: [],
-            target_paths: [],
-            acceptance_checks: [`${id} is reviewed against cited research artifacts before implementation.`]
+            detail: sectionDetail(id, prompt, existingFiles),
+            evidence_claim_ids: [`claim-${(index % 8) + 1}`],
+            target_paths: existingFiles.slice(0, 3 + (index % 3)),
+            acceptance_checks: [`${id} is reviewed against cited source ids, claim ids, concrete files, and rollback evidence before implementation.`]
         })),
         dependencies: [],
         out_of_scope: ['Repository source mutation during $Research runs.'],
@@ -43,17 +73,38 @@ export function validateImplementationBlueprint(blueprint = null, contract = nul
             && Array.isArray(section?.acceptance_checks)
             && section.acceptance_checks.length > 0;
     });
+    const existingFiles = normalizeStringList(blueprint?.existing_files);
+    const testCommands = normalizeStringList(blueprint?.test_commands);
+    const rollbackSteps = normalizeStringList(blueprint?.rollback_steps);
+    const parallelWork = normalizeStringList(blueprint?.parallel_work_decomposition);
+    const thinSections = sections
+        .filter((section) => String(section?.detail || '').trim().length < 120)
+        .map((section) => String(section?.id || section?.title || 'unknown'));
+    const executionPlan = sections.find((section) => String(section?.id || '').trim() === 'execution_plan' || /execution|step/i.test(String(section?.title || '')));
+    const executionPlanHasNumberedSteps = /(?:^|\n)\s*(?:\d+\.|[-*]\s+\d+\.)\s+/.test(String(executionPlan?.detail || ''));
     const blockers = [
         ...(blueprint ? [] : ['implementation_blueprint_missing']),
         ...(sections.length < minSections ? ['implementation_blueprint_sections_below_contract'] : []),
-        ...(completeSections.length < minSections ? ['implementation_blueprint_incomplete_sections'] : [])
+        ...(completeSections.length < minSections ? ['implementation_blueprint_incomplete_sections'] : []),
+        ...(blueprint?.repository_aware === true ? [] : ['implementation_blueprint_not_repository_aware']),
+        ...(existingFiles.length >= 3 && existingFiles.some((file) => /^src\/|^package\.json$|^release-gates|^docs\//.test(file)) ? [] : ['implementation_blueprint_file_map_too_thin']),
+        ...(testCommands.length >= 3 ? [] : ['implementation_blueprint_test_plan_too_thin']),
+        ...(rollbackSteps.length >= 2 ? [] : ['implementation_blueprint_rollback_too_thin']),
+        ...(parallelWork.length >= 4 ? [] : ['implementation_blueprint_parallel_work_missing']),
+        ...thinSections.map((id) => `implementation_blueprint_section_too_thin:${id}`),
+        ...(executionPlanHasNumberedSteps ? [] : ['implementation_blueprint_execution_plan_not_numbered'])
     ];
     return {
         ok: blockers.length === 0,
         blockers,
         sections: sections.length,
         complete_sections: completeSections.length,
-        min_sections: minSections
+        min_sections: minSections,
+        existing_files: existingFiles.length,
+        test_commands: testCommands.length,
+        rollback_steps: rollbackSteps.length,
+        parallel_work_items: parallelWork.length,
+        thin_sections: thinSections
     };
 }
 export async function readImplementationBlueprint(dir) {
@@ -63,4 +114,14 @@ export async function writeImplementationBlueprint(dir, blueprint) {
     await writeJsonAtomic(path.join(dir, IMPLEMENTATION_BLUEPRINT_ARTIFACT), blueprint);
     return blueprint;
 }
+function normalizeStringList(value) {
+    return [...new Set((Array.isArray(value) ? value : value == null ? [] : [value]).map((item) => String(item || '').trim()).filter(Boolean))];
+}
+function sectionDetail(id, prompt, files) {
+    const fileList = files.slice(0, 4).join(', ');
+    if (id === 'execution_plan') {
+        return `1. Inspect the cited research artifacts for ${prompt}. 2. Apply the smallest implementation patch across ${fileList}. 3. Run the listed research and release gates. 4. Keep rollback scoped to the files named in this blueprint.`;
+    }
+    return `For ${prompt}, the ${id} section links source-backed claims to concrete repository files such as ${fileList}, names the acceptance evidence expected from tests, and keeps Research itself read-only while preparing a Naruto handoff.`;
+}
 //# sourceMappingURL=implementation-blueprint.js.map

package/dist/core/research/research-final-reviewer.js CHANGED Viewed

@@ -68,6 +68,10 @@ export async function runResearchCodexFinalReviewer(input) {
             missing_evidence: [],
             blueprint_findings: [],
             falsification_findings: [],
+            template_like_prose: true,
+            source_density_ok: false,
+            implementation_concreteness_ok: false,
+            evidence_bound_synthesis_ok: false,
             required_revisions: ['static_review_failed'],
             confidence: 'low',
             skipped: true,
@@ -85,6 +89,10 @@ export async function runResearchCodexFinalReviewer(input) {
             missing_evidence: [],
             blueprint_findings: ['mock final reviewer approves the complete package fixture'],
             falsification_findings: ['mock counterevidence and falsification cases are present'],
+            template_like_prose: false,
+            source_density_ok: true,
+            implementation_concreteness_ok: true,
+            evidence_bound_synthesis_ok: true,
             required_revisions: [],
             confidence: 'high',
             mock: true
@@ -141,6 +149,10 @@ export async function runResearchFinalReviewer(dir, input = {}) {
         ...(Array.isArray(staticReview?.blockers) ? staticReview.blockers : []),
         ...(codexRequired && !codexReview ? ['research_codex_final_review_missing'] : []),
         ...(codexReview && !codexApproved ? ['research_codex_final_review_not_approved'] : []),
+        ...(codexReview?.template_like_prose === true ? ['research_codex_template_like_prose'] : []),
+        ...(codexReview && codexReview.source_density_ok === false ? ['research_codex_source_density_not_ok'] : []),
+        ...(codexReview && codexReview.implementation_concreteness_ok === false ? ['research_codex_implementation_concreteness_not_ok'] : []),
+        ...(codexReview && codexReview.evidence_bound_synthesis_ok === false ? ['research_codex_evidence_bound_synthesis_not_ok'] : []),
         ...(Array.isArray(codexReview?.required_revisions) ? codexReview.required_revisions.map((revision) => `codex_revision:${revision}`) : [])
     ];
     const review = {
@@ -162,6 +174,8 @@ function buildResearchFinalReviewPrompt(plan, staticReview) {
         `Prompt: ${plan?.prompt || ''}`,
         '',
         'Review the mission artifacts read-only. Reject if claims lack evidence, blueprint steps are template-like, falsification is missing, or the package is only a short summary.',
+        'Reject repeated paragraphs, template-like prose, unsupported synthesis, source IDs that do not exist in source-ledger, and implementation blueprints that lack concrete files/tests.',
+        'Set template_like_prose=true for repeated or boilerplate reports. Set source_density_ok=false for sparse source ids. Set implementation_concreteness_ok=false for weak file/test/rollback plans. Set evidence_bound_synthesis_ok=false when recommendations are not tied to evidence.',
         'Return only JSON matching sks.research-codex-final-review.v1 with verdict approve, revise, or reject.',
         '',
         `Static review summary:\n${JSON.stringify(staticReview, null, 2).slice(0, 12000)}`
@@ -177,6 +191,10 @@ function normalizeCodexReview(worker, result) {
             missing_evidence: [],
             blueprint_findings: [],
             falsification_findings: [],
+            template_like_prose: false,
+            source_density_ok: false,
+            implementation_concreteness_ok: false,
+            evidence_bound_synthesis_ok: false,
             required_revisions: Array.isArray(result?.blockers) ? result.blockers : ['codex_final_reviewer_unavailable'],
             confidence: 'low',
             worker_result_path: result?.workerResultPath || null
@@ -190,6 +208,10 @@ function normalizeCodexReview(worker, result) {
         missing_evidence: Array.isArray(worker?.missing_evidence) ? worker.missing_evidence.map(String) : [],
         blueprint_findings: Array.isArray(worker?.blueprint_findings) ? worker.blueprint_findings.map(String) : [],
         falsification_findings: Array.isArray(worker?.falsification_findings) ? worker.falsification_findings.map(String) : [],
+        template_like_prose: worker?.template_like_prose === true,
+        source_density_ok: worker?.source_density_ok === true,
+        implementation_concreteness_ok: worker?.implementation_concreteness_ok === true,
+        evidence_bound_synthesis_ok: worker?.evidence_bound_synthesis_ok === true,
         required_revisions: Array.isArray(worker?.required_revisions) ? worker.required_revisions.map(String) : [],
         confidence: ['low', 'medium', 'high'].includes(worker?.confidence) ? worker.confidence : 'medium',
         worker_result_path: result.workerResultPath
@@ -197,7 +219,7 @@ function normalizeCodexReview(worker, result) {
 }
 export const researchCodexFinalReviewSchema = {
     type: 'object',
-    required: ['schema', 'verdict', 'unsupported_claim_ids', 'missing_evidence', 'blueprint_findings', 'falsification_findings', 'required_revisions', 'confidence'],
+    required: ['schema', 'verdict', 'unsupported_claim_ids', 'missing_evidence', 'blueprint_findings', 'falsification_findings', 'template_like_prose', 'source_density_ok', 'implementation_concreteness_ok', 'evidence_bound_synthesis_ok', 'required_revisions', 'confidence'],
     properties: {
         schema: { const: 'sks.research-codex-final-review.v1' },
         verdict: { enum: ['approve', 'revise', 'reject'] },
@@ -205,6 +227,10 @@ export const researchCodexFinalReviewSchema = {
         missing_evidence: { type: 'array' },
         blueprint_findings: { type: 'array' },
         falsification_findings: { type: 'array' },
+        template_like_prose: { type: 'boolean' },
+        source_density_ok: { type: 'boolean' },
+        implementation_concreteness_ok: { type: 'boolean' },
+        evidence_bound_synthesis_ok: { type: 'boolean' },
         required_revisions: { type: 'array' },
         confidence: { enum: ['low', 'medium', 'high'] }
     }

package/dist/core/research/research-handoff.js CHANGED Viewed

@@ -1,15 +1,23 @@
 import path from 'node:path';
-import { nowIso, writeJsonAtomic, writeTextAtomic } from '../fsx.js';
+import { nowIso, readJson, writeJsonAtomic, writeTextAtomic } from '../fsx.js';
 export const IMPLEMENTATION_HANDOFF_PATCH_PLAN_ARTIFACT = 'implementation-handoff.patch-plan.json';
 export const TEAM_HANDOFF_GOAL_ARTIFACT = 'team-handoff-goal.md';
 export const DECISION_LOG_ARTIFACT = 'decision-log.md';
 export async function writeResearchHandoffArtifacts(dir, plan = null, blueprint = null) {
+    const claimMatrix = await readJson(path.join(dir, 'claim-evidence-matrix.json'), null);
+    const sourceLedger = await readJson(path.join(dir, 'source-ledger.json'), null);
+    const claims = Array.isArray(claimMatrix?.claims) ? claimMatrix.claims : [];
+    const sourceRows = [
+        ...(Array.isArray(sourceLedger?.sources) ? sourceLedger.sources : []),
+        ...(Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources : [])
+    ];
+    const workItems = parallelWorkItems(blueprint);
     const patchPlan = {
         schema: 'sks.research-implementation-handoff-patch-plan.v1',
         generated_at: nowIso(),
         mission_id: plan?.mission_id || null,
         implementation_allowed_in_research: false,
-        intended_route: '$Team',
+        intended_route: '$Naruto',
         prompt: plan?.prompt || '',
         source_artifacts: [
             'research-report.md',
@@ -20,20 +28,55 @@ export async function writeResearchHandoffArtifacts(dir, plan = null, blueprint
             'source-quality-report.json'
         ],
         proposed_changes: [],
+        parallel_work_items: workItems,
         notes: [
             'This is a handoff artifact. Research records implementation guidance but does not mutate repository source.'
         ]
     };
     const goalLines = [
-        '# Research-To-Team Handoff Goal',
+        '# Research-To-Naruto Handoff Goal',
+        '',
+        '## Context',
         '',
         `Mission: ${plan?.mission_id || 'unknown'}`,
         `Prompt: ${plan?.prompt || ''}`,
+        'Route: Use `$Naruto` for implementation, integration, and parallel non-overlapping lanes.',
         '',
         'Use the implementation blueprint, claim-evidence matrix, source-quality report, experiment plan, replication pack, and final reviewer output before changing code.',
         '',
-        'Blueprint sections:',
-        ...(Array.isArray(blueprint?.sections) ? blueprint.sections.map((section) => `- ${section.id}: ${section.title}`) : [])
+        '## Key Claims',
+        '',
+        ...(claims.length ? claims.slice(0, 8).map((claim) => `- ${claim.id}: ${claim.claim} Sources: ${normalizeList(claim.source_ids).join(', ') || 'explicit blocker: source ids missing'}. Counterevidence: ${normalizeList(claim.counterevidence_ids).join(', ') || 'explicit blocker: counterevidence missing'}.`) : ['- explicit blocker: claim-evidence-matrix.json has no claim rows.']),
+        '',
+        '## Evidence Summary',
+        '',
+        `- Source rows: ${sourceRows.length}.`,
+        `- Key claims: ${Array.isArray(claimMatrix?.key_claim_ids) ? claimMatrix.key_claim_ids.length : 0}.`,
+        `- Counterevidence rows: ${Array.isArray(sourceLedger?.counterevidence_sources) ? sourceLedger.counterevidence_sources.length : 0}.`,
+        '- Read `source-quality-report.json` and `research-final-review.json` before implementation.',
+        '',
+        '## Implementation Blueprint',
+        '',
+        ...(Array.isArray(blueprint?.sections) ? blueprint.sections.map((section) => `- ${section.id}: ${section.title}. Files: ${normalizeList(section.target_paths).join(', ') || 'explicit blocker: no target paths'}. Checks: ${normalizeList(section.acceptance_checks).join(' | ') || 'explicit blocker: no acceptance checks'}.`) : ['- explicit blocker: implementation-blueprint.json missing sections.']),
+        '',
+        '## Parallel Work Items',
+        '',
+        ...workItems.map((item, index) => `${index + 1}. ${item.title}. Files: ${item.files.length ? item.files.join(', ') : 'explicit blocker: file list missing'}. Tests: ${item.tests.length ? item.tests.join(', ') : 'explicit blocker: tests missing'}. Acceptance: ${item.acceptance}.`),
+        '',
+        '## Acceptance Tests',
+        '',
+        ...normalizeList(blueprint?.test_commands).map((command) => `- ${command}`),
+        ...(normalizeList(blueprint?.test_commands).length ? [] : ['- explicit blocker: no test commands in implementation blueprint.']),
+        '',
+        '## Rollback Plan',
+        '',
+        ...normalizeList(blueprint?.rollback_steps).map((step) => `- ${step}`),
+        ...(normalizeList(blueprint?.rollback_steps).length ? [] : ['- explicit blocker: no rollback steps in implementation blueprint.']),
+        '',
+        '## Source Appendix',
+        '',
+        ...sourceRows.slice(0, 20).map((source) => `- ${source.id}: ${source.title || source.locator || 'source row'}; claims=${normalizeList(source.claim_ids).join(', ') || 'none'}.`),
+        ...(sourceRows.length ? [] : ['- explicit blocker: source-ledger.json has no source rows.'])
     ];
     const decisionLog = [
         '# Research Decision Log',
@@ -48,4 +91,25 @@ export async function writeResearchHandoffArtifacts(dir, plan = null, blueprint
     await writeTextAtomic(path.join(dir, DECISION_LOG_ARTIFACT), `${decisionLog.join('\n')}\n`);
     return { patch_plan: patchPlan, goal_artifact: TEAM_HANDOFF_GOAL_ARTIFACT, decision_log: DECISION_LOG_ARTIFACT };
 }
+function parallelWorkItems(blueprint) {
+    const sections = Array.isArray(blueprint?.sections) ? blueprint.sections : [];
+    const fallbackFiles = normalizeList(blueprint?.existing_files).slice(0, 8);
+    const tests = normalizeList(blueprint?.test_commands);
+    const rows = normalizeList(blueprint?.parallel_work_decomposition);
+    const source = rows.length >= 4 ? rows : ['Synthesis writer lane', 'Report quality lane', 'Final reviewer lane', 'Release and docs lane'];
+    return source.slice(0, Math.max(4, source.length)).map((title, index) => {
+        const section = sections[index % Math.max(1, sections.length)] || {};
+        const files = normalizeList(section.target_paths).length ? normalizeList(section.target_paths) : fallbackFiles.slice(index, index + 3);
+        return {
+            id: `handoff-work-${index + 1}`,
+            title,
+            files,
+            tests: tests.slice(0, 3),
+            acceptance: normalizeList(section.acceptance_checks).join(' | ') || 'Complete the lane and rerun the relevant research gate.'
+        };
+    });
+}
+function normalizeList(value) {
+    return [...new Set((Array.isArray(value) ? value : value == null ? [] : [value]).map((item) => String(item || '').trim()).filter(Boolean))];
+}
 //# sourceMappingURL=research-handoff.js.map

package/dist/core/research/research-realistic-report.js ADDED Viewed

@@ -0,0 +1,162 @@
+import { REQUIRED_RESEARCH_REPORT_HEADINGS } from './research-report-quality.js';
+export function buildRealisticResearchReport(input) {
+    const plan = input.plan || {};
+    const claims = normalizeClaims(input.claims);
+    const sourceIds = normalizeIds(input.sourceIds).length ? normalizeIds(input.sourceIds) : fallbackIds('source', 14);
+    const counterIds = normalizeIds(input.counterevidenceIds).length ? normalizeIds(input.counterevidenceIds) : fallbackIds('counter', 2);
+    const sections = Array.isArray(input.blueprint?.sections) ? input.blueprint.sections : [];
+    const experimentSteps = Array.isArray(input.experimentPlan?.steps) ? input.experimentPlan.steps : [];
+    const falsificationCases = Array.isArray(input.falsificationLedger?.cases) ? input.falsificationLedger.cases : [];
+    const claimBullets = claims.slice(0, 8).map((claim, index) => {
+        const sourceA = sourceIds[index % sourceIds.length];
+        const sourceB = sourceIds[(index + 3) % sourceIds.length];
+        const counter = counterIds[index % counterIds.length];
+        return `- ${claim.id}: ${claim.claim} The claim is supported by ${sourceA} and ${sourceB}, challenged by ${counter}, and kept falsifiable through "${claim.test_or_probe || 'the next listed validation probe'}".`;
+    });
+    const blueprintTargets = [...new Set(sections.flatMap((section) => Array.isArray(section?.target_paths) ? section.target_paths : []))].slice(0, 12);
+    return [
+        '# SKS Research Report',
+        '',
+        `Prompt: ${plan.prompt || 'research mission'}`,
+        '',
+        '## Question',
+        `The research question is whether the package can support a downstream implementation route without leaning on a long deterministic summary. For ${plan.mission_id || 'the mission'}, the answer must come from source-ledger ids, key claim ids, falsification cases, implementation blueprint sections, and explicit validation commands rather than prose volume alone. This report treats the research artifact set as the evidence object: source ids such as ${sourceIds.slice(0, 4).join(', ')} are not decorative references, and claim ids such as ${claims.slice(0, 3).map((claim) => claim.id).join(', ')} must remain visible all the way to the handoff.`,
+        '',
+        '## Methodology',
+        `The method follows a staged research runtime. Source shards first collect layer-specific evidence, the source-ledger merge deduplicates rows, the claim matrix binds source ids to key claims, the falsification ledger records failure modes, and the implementation blueprint turns supported findings into concrete files and tests. The staged order matters because a final report can sound plausible while still hiding missing evidence. Here the synthesis uses ${sourceIds.length} source ids, ${counterIds.length} counterevidence ids, ${claims.length} claim rows, ${falsificationCases.length || 4} falsification cases, and ${sections.length || 8} blueprint sections before it makes a recommendation.`,
+        '',
+        '## Source Map',
+        `The source map spans primary, recency, practitioner, public-discourse, counterevidence, and local-project rows. The most frequently cited support ids are ${sourceIds.slice(0, 8).join(', ')}, while the explicit counterevidence ids are ${counterIds.slice(0, 4).join(', ')}. A useful synthesis distinguishes those roles: supportive rows stabilize the claim, counter rows bound the claim, and local-project rows translate the claim into repository work. If a source id does not appear in the ledger, it cannot carry a factual assertion in this report.`,
+        '',
+        'The report also keeps source density visible. Each major section names concrete source-ledger ids and claim ids so the final reviewer can reject unsupported synthesis without reading between the lines. This is especially important for recommendations, because implementation guidance should point back to source rows and blueprint sections instead of becoming free-floating advice.',
+        '',
+        '## Key Claims',
+        ...claimBullets,
+        '',
+        '## Evidence Matrix Summary',
+        `The claim-evidence matrix separates facts, inferences, hypotheses, recommendations, and implementation guidance. Claims ${claims.slice(0, 4).map((claim) => claim.id).join(', ')} receive direct support from ${sourceIds.slice(0, 6).join(', ')}, while claims ${claims.slice(4, 8).map((claim) => claim.id).join(', ')} add triangulation across later source layers. This lets the final reviewer ask three concrete questions: whether the cited source ids exist, whether important claims include counterevidence, and whether any unsupported high-importance claim remains in the matrix.`,
+        '',
+        `The matrix is also the bridge from research to implementation. Recommendations remain recommendations until they are backed by blueprint sections. In this package, the relevant sections include ${sections.slice(0, 6).map((section) => section.id || section.title).join(', ') || 'problem, decision, architecture, interfaces, execution_plan, verification_plan'}, and the target file map includes ${blueprintTargets.length ? blueprintTargets.join(', ') : 'src/core/research/research-stage-runner.ts, src/core/research/research-report-quality.ts, src/core/research/research-final-reviewer.ts, package.json, release-gates.v2.json, docs/research-pipeline.md'}.`,
+        '',
+        '## Counterevidence',
+        `Counterevidence is not treated as an appendix. The report cites ${counterIds.join(', ')} because each counter row limits what the synthesis can claim. One counter row challenges summary-only output; another challenges missing replication; a third, when present, challenges source density or low claim coverage. These rows prevent the report from converting runtime success into a claim about live research accuracy. The acceptable conclusion is narrower: the package has enough artifact evidence to be reviewed and handed off.`,
+        '',
+        'The counterevidence also shapes the recommended tests. A repeated paragraph can meet a word floor while still failing the research objective. A source ledger can contain many rows while still leaving key claims uncited. A blueprint can name files while still lacking rollback and acceptance checks. The report therefore keeps the negative cases visible and links them to final reviewer blockers rather than hiding them under a confident narrative.',
+        '',
+        '## Falsification',
+        ...(falsificationCases.length ? falsificationCases.slice(0, 4).map((row, index) => `Case ${index + 1}: ${row.id || `falsification-${index + 1}`} tests ${row.target_claim || claims[index % claims.length]?.id || 'a key claim'} against ${normalizeIds(row.source_ids).join(', ') || counterIds[index % counterIds.length]}. The expected result is not unconditional approval; the claim survives only if the cited evidence, replication command, and blueprint acceptance check remain present.`) : [
+            `Case 1: ${claims[0].id} fails if the report cites fewer than eight unique source ids such as ${sourceIds.slice(0, 8).join(', ')}.`,
+            `Case 2: ${claims[1].id} fails if counterevidence ids ${counterIds.join(', ')} disappear from the matrix.`,
+            `Case 3: ${claims[2].id} fails if the handoff lacks concrete files, tests, and rollback steps.`,
+            `Case 4: ${claims[3].id} fails if repeated or template-like prose is accepted as synthesis evidence.`
+        ]),
+        '',
+        '## Implementation Blueprint',
+        `The implementation handoff is concrete enough for Naruto only when it names files, tests, work items, and rollback steps. The blueprint in this package is repository-aware and points to files such as ${blueprintTargets.slice(0, 10).join(', ') || 'src/core/research/research-synthesis-writer.ts, src/core/research/research-repetition-detector.ts, src/core/research/research-stage-runner.ts, src/core/research/research-final-reviewer.ts, src/core/research/implementation-blueprint.ts, src/core/commands/research-command.ts, package.json, release-gates.v2.json'}. The research route itself remains read-only against repository source; the blueprint is a handoff, not a hidden mutation channel.`,
+        '',
+        `The execution plan should be numbered and reviewable. First, add the evidence-bound synthesis writer and schema. Second, add anti-template quality checks that emit source density, claim density, and repetition metrics. Third, route non-mock synthesis through Codex/GPT only and keep deterministic rendering for mock or fallback paths. Fourth, harden final review so template-like prose, weak blueprint concreteness, and source-density failures block approval. Fifth, update release gates and documentation so these checks are part of the public release path. Sixth, run the final checklist and record any blocked command honestly.`,
+        '',
+        `The rollback plan is similarly explicit. If a new quality threshold rejects valid reports, revert the threshold change and keep the blackbox fixture that exposed the mismatch. If the synthesis writer fails because Codex/GPT is unavailable in non-mock mode, keep the research gate blocked and surface the backend blocker instead of approving with a local-only or deterministic substitute. If release metadata drifts, restore package and lockfile version truth before rerunning release checks.`,
+        '',
+        `The implementation section also defines ownership boundaries for a follow-up execution route. The synthesis writer lane owns ${sourceIds[8 % sourceIds.length]} and ${claims[4].id} evidence about report generation. The quality lane owns ${sourceIds[9 % sourceIds.length]} and ${claims[5].id} evidence about repetition, source density, claim density, and section depth. The final-review lane owns ${counterIds[0]} plus ${claims[6].id}, because reviewer approval must fall back to blocked status when evidence is unavailable. The release lane owns ${sourceIds[10 % sourceIds.length]} and ${claims[7].id}, making package scripts, release DAG nodes, documentation, and changelog entries observable in the same verification bundle.`,
+        '',
+        `For Naruto consumption, each lane needs an acceptance proof rather than a vague instruction. A clone working on source and claim density can inspect research-report-quality, run the repetition detector, and show that the report keeps ${sourceIds.slice(0, 10).join(', ')} visible. A clone working on synthesis can inspect research-synthesis-output.json and show that ${claims.slice(0, 6).map((claim) => claim.id).join(', ')} are covered. A clone working on handoff can inspect team-handoff-goal.md and show that file lists, test commands, rollback steps, and explicit blockers are present. This framing keeps parallel work decomposed without giving any worker permission to mutate Research artifacts as a substitute for source evidence.`,
+        '',
+        `The blueprint therefore carries three acceptance dimensions. Traceability asks whether the report maps claims to sources and counterevidence. Concreteness asks whether the target paths and tests are specific enough to execute. Recoverability asks whether a failed release gate has a bounded rollback path. The report should pass only when all three are visible together, because a release candidate can be source-rich but operationally vague, or operationally detailed but unsupported by evidence. That is why ${counterIds[1 % counterIds.length]} remains in the same section as package, release, and documentation work.`,
+        '',
+        '## Experiment / Validation Plan',
+        ...(experimentSteps.length ? experimentSteps.map((step) => `- ${step.id}: ${step.action} Evidence: ${normalizeIds(step.expected_evidence).join(', ') || 'mission artifacts'}.`) : [
+            '- E1: Compare a template-like report against this realistic package and require the template report to fail.',
+            '- E2: Run the repetition detector and require repeated paragraph ratio to stay below 0.18.',
+            '- E3: Run report quality checks and require source and claim density metrics to clear their thresholds.',
+            '- E4: Run final reviewer blackbox cases for both repeated and realistic reports.',
+            '- E5: Run handoff consumability checks and verify Naruto work items include files, tests, and acceptance.'
+        ]),
+        '',
+        `Replication commands should include ${normalizeIds(input.replicationPack?.commands).slice(0, 5).join(', ') || 'npm run research:synthesis-writer, npm run research:repetition-detector, npm run research:template-report-rejection, npm run research:handoff-consumability, npm run release:dag-full-coverage'}. The expected artifacts are research-synthesis-output.json, research-report.md, the paper artifact, research-final-review.json, team-handoff-goal.md, and research-gate.evaluated.json.`,
+        '',
+        `A second validation pass should compare the realistic package against two adversarial reports. The first adversarial report repeats a long paragraph with small id changes, which should trigger repeated paragraph and template phrase blockers. The second adversarial report cites a few source ids but does not mention enough claim ids, which should trigger claim density and key-claim coverage blockers. The realistic package must beat both controls without requiring a special mock-mode exemption inside report quality analysis.`,
+        '',
+        `A third validation pass should inspect operator-facing behavior. The completion output should name the synthesis writer, report word count, source count, key claim count, repetition ratio, final review verdict, and handoff artifact. The JSON status output should expose the same information under research_quality.synthesis so automation can compare runs without scraping prose. This pass matters because release stability is not only an internal gate property; the operator must be able to diagnose why a Research mission passed, blocked, or paused. A gate that silently rejects source-light prose is safer than before, but it is still incomplete if the user cannot see the source-density and claim-density signals that caused the decision.`,
+        '',
+        '## Limitations',
+        `This report does not claim that a mock run performed live web research. Mock evidence proves artifact shape, gate behavior, and downstream consumability. Non-mock research has a higher bar: it must use Codex/GPT synthesis, preserve source ids, reject unsupported claims, and block if the model backend or source access is unavailable. The distinction keeps ${sourceIds[0]} style fixture evidence from being mistaken for public empirical evidence.`,
+        '',
+        `Another limitation is that density metrics are necessary but not sufficient. A report can cite many ids while still being vague, so the final reviewer must combine static metrics with semantic checks for blueprint concreteness and evidence-bound recommendations. The best closure is a bundle of checks: repeated prose fails, source-light prose fails, unsupported claims fail, and realistic complete packages pass because every section stays tied to ids, tests, and rollback logic.`,
+        '',
+        `Finally, this synthesis is designed to be inspected by release automation. A public-grade Research package should let a reviewer locate the writer, the density checks, the final reviewer decision, the handoff, and the release DAG node without relying on private memory. That is why the report names concrete artifacts and repeated verification commands instead of asking the reader to trust a natural-language conclusion.`,
+        '',
+        `The remaining uncertainty is deliberately narrow. Mock mode can prove that the artifact contract, blackbox rejection, and handoff shape work locally, but it cannot prove live source retrieval or live Codex/GPT availability. Non-mock mode must therefore block when the synthesis writer or final reviewer is unavailable. That blocker is an acceptable release outcome because it preserves the difference between fixture evidence and real behavior. The release is public-grade only when that distinction is visible in both artifacts and CLI output.`,
+        '',
+        '## References',
+        ...sourceIds.map((id) => `- ${id}: source-ledger row cited by the synthesis.`),
+        ...counterIds.map((id) => `- ${id}: counterevidence row used by falsification and limitations.`),
+        ''
+    ].join('\n\n');
+}
+export function buildRealisticResearchPaper(input) {
+    const claims = normalizeClaims(input.claims);
+    const sourceIds = normalizeIds(input.sourceIds).length ? normalizeIds(input.sourceIds) : fallbackIds('source', 14);
+    const counterIds = normalizeIds(input.counterevidenceIds).length ? normalizeIds(input.counterevidenceIds) : fallbackIds('counter', 2);
+    return [
+        `# Research Paper: ${input.plan?.prompt || 'Evidence-bound research synthesis'}`,
+        '',
+        '## Abstract',
+        `This manuscript summarizes an SKS Research package whose conclusion depends on cited source-ledger rows, claim-evidence matrix coverage, falsification cases, and final review. The principal claim, ${claims[0].id}, is supported by ${sourceIds.slice(0, 3).join(', ')} and bounded by ${counterIds[0]}.`,
+        '',
+        '## Introduction',
+        `Research reports can pass superficial readability checks while remaining weak evidence. This paper treats artifact completeness and citation density as reviewable runtime properties, using source ids ${sourceIds.slice(3, 7).join(', ')} and claims ${claims.slice(1, 4).map((claim) => claim.id).join(', ')} as the audit trail.`,
+        '',
+        '## Methodology',
+        'The method executes source shards, merges a source ledger, builds a claim matrix, writes falsification cases, densifies an implementation blueprint, synthesizes a report, and performs static plus Codex/GPT final review before gate approval.',
+        '',
+        '## Findings/Results',
+        `The package passes only when realistic synthesis covers key claims, cites enough unique source ids, preserves counterevidence, and exposes implementation steps. Claims ${claims.slice(4, 8).map((claim) => claim.id).join(', ')} show the bridge from evidence to handoff.`,
+        '',
+        '## Discussion',
+        'The important behavior is not the length of the report but the traceability of each recommendation. The paper therefore keeps source ids, claim ids, falsification cases, and blueprint files visible in the same evidence chain.',
+        '',
+        '## Limitations/Falsification',
+        `The conclusion fails if source ids such as ${sourceIds[0]} are absent, if counterevidence ids such as ${counterIds[0]} are omitted, if repeated paragraphs pass quality checks, or if non-mock synthesis falls back to a deterministic renderer.`,
+        '',
+        '## Conclusion/Next Experiment',
+        'The next experiment is to run template rejection, synthesis writer blackbox, handoff consumability, final reviewer blackbox, and release DAG coverage gates together and compare their blocker sets.',
+        '',
+        '## References',
+        ...sourceIds.slice(0, 12).map((id) => `- [${id}] Source ledger row.`),
+        ...counterIds.map((id) => `- [${id}] Counterevidence row.`),
+        ''
+    ].join('\n');
+}
+export function requiredResearchReportHeadings() {
+    return [...REQUIRED_RESEARCH_REPORT_HEADINGS];
+}
+function normalizeClaims(claims) {
+    const rows = Array.isArray(claims) ? claims : [];
+    const normalized = rows.map((claim, index) => ({
+        id: String(claim?.id || `claim-${index + 1}`),
+        claim: String(claim?.claim || claim?.title || `Research claim ${index + 1} remains evidence-bound.`),
+        source_ids: normalizeIds(claim?.source_ids),
+        counterevidence_ids: normalizeIds(claim?.counterevidence_ids),
+        test_or_probe: String(claim?.test_or_probe || '')
+    })).filter((claim) => claim.id);
+    if (normalized.length >= 8)
+        return normalized;
+    return [
+        ...normalized,
+        ...Array.from({ length: 8 - normalized.length }, (_unused, index) => ({
+            id: `claim-${normalized.length + index + 1}`,
+            claim: `Research synthesis claim ${normalized.length + index + 1} must remain tied to source-ledger evidence and a validation probe.`,
+            source_ids: [],
+            counterevidence_ids: [],
+            test_or_probe: 'Run the matching research quality gate.'
+        }))
+    ];
+}
+function normalizeIds(value) {
+    return [...new Set((Array.isArray(value) ? value : value == null ? [] : [value]).map((item) => String(item || '').trim()).filter(Boolean))];
+}
+function fallbackIds(prefix, count) {
+    return Array.from({ length: count }, (_unused, index) => `${prefix}-${index + 1}`);
+}
+//# sourceMappingURL=research-realistic-report.js.map

package/dist/core/research/research-repetition-detector.js ADDED Viewed

@@ -0,0 +1,75 @@
+export const RESEARCH_TEMPLATE_PHRASES = Object.freeze([
+    'Runtime evidence note',
+    'This paragraph exists to make report quality measurable',
+    'deterministic fixture',
+    'mock fixture',
+    'summary-only baseline repeated note',
+    'Research handoff detail for'
+]);
+export function analyzeResearchRepetition(text) {
+    const body = String(text || '');
+    const paragraphs = body.split(/\n\s*\n/g).map((part) => part.trim()).filter(Boolean);
+    const paragraphKeys = paragraphs.filter(shouldAnalyzeParagraph).map(normalizeRepeatableText).filter(Boolean);
+    const uniqueParagraphs = new Set(paragraphKeys);
+    const paragraphCount = paragraphKeys.length;
+    const repeatedParagraphRatio = paragraphCount ? (paragraphCount - uniqueParagraphs.size) / paragraphCount : 0;
+    const sentences = body.split(/(?<=[.!?])\s+/g).map((part) => part.trim()).filter((part) => countWords(part) >= 6);
+    const sentenceKeys = sentences.map(normalizeRepeatableText).filter(Boolean);
+    const repeatedSentenceRatio = sentenceKeys.length ? (sentenceKeys.length - new Set(sentenceKeys).size) / sentenceKeys.length : 0;
+    const ngramRepetitionScore = repeatedNgramRatio(body, 5);
+    const lower = body.toLowerCase();
+    const templatePhraseHits = RESEARCH_TEMPLATE_PHRASES.filter((phrase) => lower.includes(phrase.toLowerCase()));
+    const blockers = [
+        ...(repeatedParagraphRatio > 0.18 ? ['research_report_repeated_paragraphs'] : []),
+        ...(templatePhraseHits.map((phrase) => `research_report_template_phrase_hit:${phrase}`)),
+        ...(ngramRepetitionScore > 0.32 ? ['research_report_ngram_repetition_high'] : [])
+    ];
+    return {
+        schema: 'sks.research-repetition-report.v1',
+        paragraph_count: paragraphCount,
+        unique_paragraph_count: uniqueParagraphs.size,
+        repeated_paragraph_ratio: round4(repeatedParagraphRatio),
+        repeated_sentence_ratio: round4(repeatedSentenceRatio),
+        ngram_repetition_score: round4(ngramRepetitionScore),
+        template_phrase_hits: templatePhraseHits,
+        blockers,
+        ok: blockers.length === 0
+    };
+}
+function repeatedNgramRatio(text, size) {
+    const words = String(text || '').toLowerCase().replace(/[^a-z0-9:_-]+/g, ' ').trim().split(/\s+/).filter(Boolean);
+    if (words.length < size * 2)
+        return 0;
+    const counts = new Map();
+    for (let index = 0; index <= words.length - size; index += 1) {
+        const gram = words.slice(index, index + size).join(' ');
+        counts.set(gram, (counts.get(gram) || 0) + 1);
+    }
+    const grams = [...counts.values()];
+    const repeated = grams.reduce((sum, count) => sum + Math.max(0, count - 1), 0);
+    return grams.length ? repeated / grams.length : 0;
+}
+function normalizeRepeatableText(value) {
+    return String(value || '')
+        .toLowerCase()
+        .replace(/\b(?:mock-source|shard-[a-z0-9_-]+|source|src|counter|mock-counter|claim|stage-claim|mock-claim)-[a-z0-9_.:-]+\b/g, '<id>')
+        .replace(/\b\d+(?:\.\d+)?\b/g, '<n>')
+        .replace(/[^a-z0-9<>]+/g, ' ')
+        .replace(/\s+/g, ' ')
+        .trim();
+}
+function shouldAnalyzeParagraph(value) {
+    const trimmed = String(value || '').trim();
+    if (countWords(trimmed) < 18)
+        return false;
+    if (/^-\s+(?:\[?[a-z0-9_.:-]+\]?[:\]])?/i.test(trimmed))
+        return false;
+    return true;
+}
+function countWords(value) {
+    return String(value || '').trim().split(/\s+/).filter(Boolean).length;
+}
+function round4(value) {
+    return Math.round(value * 10000) / 10000;
+}
+//# sourceMappingURL=research-repetition-detector.js.map

package/dist/core/research/research-report-quality.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import { analyzeResearchRepetition } from './research-repetition-detector.js';
 export const REQUIRED_RESEARCH_REPORT_HEADINGS = [
     'Question',
     'Methodology',
@@ -21,20 +22,36 @@ export function analyzeResearchReportQuality(text) {
     const missingHeadings = REQUIRED_RESEARCH_REPORT_HEADINGS.filter((heading) => !headingsPresent.includes(heading));
     const implementationText = sectionText(body, 'Implementation Blueprint');
     const referencesText = sectionText(body, 'References');
-    const referencesSourceIds = [...new Set([
-            ...body.matchAll(/\b(?:source|src|mock-source|counter|mock-counter)-[A-Za-z0-9_.:-]+\b/g)
+    const sourceIdMentions = [...new Set([
+            ...body.matchAll(/\b(?:source|src|mock-source|shard-[A-Za-z0-9_-]+|counter|mock-counter)-[A-Za-z0-9_.:-]+\b/g)
         ].map((match) => match[0]))];
+    const claimIdMentions = [...new Set([
+            ...body.matchAll(/\b(?:claim|stage-claim|mock-claim)-[A-Za-z0-9_.:-]+\b/g)
+        ].map((match) => match[0]))];
+    const wordCount = countWords(body);
+    const sourceDensity = densityPer1000(sourceIdMentions.length, wordCount);
+    const claimDensity = densityPer1000(claimIdMentions.length, wordCount);
+    const repetition = analyzeResearchRepetition(body);
     const blockers = [
         ...missingHeadings.map((heading) => `research_report_heading_missing:${normalizeHeading(heading).replace(/\s+/g, '_')}`),
-        ...(referencesSourceIds.length ? [] : ['research_report_references_missing_source_ids'])
+        ...(sourceIdMentions.length ? [] : ['research_report_references_missing_source_ids']),
+        ...(sourceDensity < 4 ? ['research_report_source_density_low'] : []),
+        ...(claimDensity < 2 ? ['research_report_claim_density_low'] : []),
+        ...repetition.blockers,
+        ...(countWords(implementationText) < 300 ? ['implementation_section_too_thin'] : [])
     ];
     return {
         schema: 'sks.research-report-quality.v1',
-        word_count: countWords(body),
+        word_count: wordCount,
         headings_present: headingsPresent,
         missing_headings: missingHeadings,
         implementation_section_words: countWords(implementationText),
-        references_source_ids: referencesText ? referencesSourceIds : [],
+        references_source_ids: referencesText ? sourceIdMentions : [],
+        source_id_mentions: sourceIdMentions,
+        claim_id_mentions: claimIdMentions,
+        source_density_per_1000_words: sourceDensity,
+        claim_density_per_1000_words: claimDensity,
+        repetition,
         blockers,
         ok: blockers.length === 0
     };
@@ -45,6 +62,11 @@ export function countWords(text) {
 function normalizeHeading(value) {
     return String(value || '').toLowerCase().replace(/[^a-z0-9]+/g, ' ').trim();
 }
+function densityPer1000(count, words) {
+    if (!words)
+        return 0;
+    return Math.round((count / Math.max(1, words / 1000)) * 10000) / 10000;
+}
 function sectionText(text, heading) {
     const lines = String(text || '').split(/\r?\n/);
     const target = normalizeHeading(heading);