metame-cli 1.6.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,224 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * wiki-synthesis.js — Evidence synthesis engines for Tier 2 cluster pages
5
+ *
6
+ * Single responsibility: DB queries + pure computation to produce structured
7
+ * intermediate artifacts. Zero LLM calls. All functions are synchronous and
8
+ * take a DatabaseSync instance + array of doc_source ids.
9
+ *
10
+ * Exports:
11
+ * buildComparisonMatrix(db, docSourceIds) → string (markdown table)
12
+ * buildTimeline(db, docSourceIds) → string (markdown list)
13
+ * detectContradictions(db, docSourceIds) → object[]
14
+ * buildCoverageReport(db, docSourceIds) → string (markdown list)
15
+ */
16
+
17
+ const EXPECTED_TYPES = ['problem', 'method', 'result', 'dataset', 'limitation'];
18
+ const MAX_COLS = 8; // max papers in comparison table before truncation
19
+ const MAX_ROWS = 20; // max predicate groups in comparison table
20
+ const TRUNCATE_TITLE = 28; // char limit for column headers
21
+
22
+ // ── helpers ───────────────────────────────────────────────────────────────────
23
+
24
+ function ph(ids) {
25
+ return ids.map(() => '?').join(',');
26
+ }
27
+
28
+ function shortTitle(t, len = TRUNCATE_TITLE) {
29
+ if (!t) return '?';
30
+ return t.length > len ? t.slice(0, len - 1) + '…' : t;
31
+ }
32
+
33
+ // ── buildComparisonMatrix ─────────────────────────────────────────────────────
34
+
35
+ /**
36
+ * Build a markdown comparison table of results/metrics across papers.
37
+ * Groups by predicate; columns are papers (up to MAX_COLS).
38
+ *
39
+ * @param {object} db - DatabaseSync
40
+ * @param {number[]} docSourceIds
41
+ * @returns {string} markdown table, or empty string if no result facts
42
+ */
43
+ function buildComparisonMatrix(db, docSourceIds) {
44
+ if (docSourceIds.length === 0) return '';
45
+
46
+ const rows = db.prepare(`
47
+ SELECT pf.predicate, pf.subject, pf.object, pf.value, pf.unit, pf.context,
48
+ ds.id as doc_id, ds.title
49
+ FROM paper_facts pf
50
+ JOIN doc_sources ds ON ds.id = pf.doc_source_id
51
+ WHERE pf.doc_source_id IN (${ph(docSourceIds)})
52
+ AND pf.fact_type IN ('result','metric','baseline')
53
+ AND pf.predicate IS NOT NULL
54
+ ORDER BY pf.predicate, ds.id
55
+ `).all(...docSourceIds);
56
+
57
+ if (rows.length === 0) return '';
58
+
59
+ // Collect ordered unique papers (cap at MAX_COLS)
60
+ const paperOrder = [];
61
+ const paperTitles = {};
62
+ for (const r of rows) {
63
+ if (!paperTitles[r.doc_id]) {
64
+ paperOrder.push(r.doc_id);
65
+ paperTitles[r.doc_id] = r.title;
66
+ }
67
+ }
68
+ const papers = paperOrder.slice(0, MAX_COLS);
69
+
70
+ // Group by predicate → { docId → cell text }
71
+ const groups = {};
72
+ for (const r of rows) {
73
+ if (!papers.includes(r.doc_id)) continue;
74
+ const key = r.predicate;
75
+ if (!groups[key]) groups[key] = {};
76
+ const parts = [r.subject, r.object].filter(Boolean);
77
+ if (r.value) parts.push(r.value + (r.unit ? ' ' + r.unit : ''));
78
+ if (r.context) parts.push(`*(${r.context})*`);
79
+ // Keep first occurrence per (predicate, docId)
80
+ if (!groups[key][r.doc_id]) groups[key][r.doc_id] = parts.join(' — ');
81
+ }
82
+
83
+ const predicates = Object.keys(groups).slice(0, MAX_ROWS);
84
+ if (predicates.length === 0) return '';
85
+
86
+ // Build table
87
+ const header = ['Metric / Result', ...papers.map(id => shortTitle(paperTitles[id]))];
88
+ const separator = header.map(() => '---');
89
+ const tableRows = predicates.map(pred => {
90
+ const cells = papers.map(id => groups[pred][id] || '—');
91
+ return [pred, ...cells];
92
+ });
93
+
94
+ const fmt = (row) => '| ' + row.join(' | ') + ' |';
95
+ return [fmt(header), fmt(separator), ...tableRows.map(fmt)].join('\n');
96
+ }
97
+
98
+ // ── buildTimeline ─────────────────────────────────────────────────────────────
99
+
100
+ /**
101
+ * Build a chronological timeline of core method contributions per paper.
102
+ *
103
+ * @param {object} db
104
+ * @param {number[]} docSourceIds
105
+ * @returns {string} markdown list
106
+ */
107
+ function buildTimeline(db, docSourceIds) {
108
+ if (docSourceIds.length === 0) return '';
109
+
110
+ // Get top method/claim fact per paper (by confidence desc)
111
+ const rows = db.prepare(`
112
+ SELECT ds.year, ds.title, ds.slug, ds.id as doc_id,
113
+ pf.subject, pf.predicate, pf.object, pf.evidence_text, pf.confidence
114
+ FROM doc_sources ds
115
+ LEFT JOIN paper_facts pf ON pf.doc_source_id = ds.id
116
+ AND pf.fact_type IN ('method','claim')
117
+ WHERE ds.id IN (${ph(docSourceIds)})
118
+ ORDER BY ds.year ASC NULLS LAST, ds.id ASC, pf.confidence DESC
119
+ `).all(...docSourceIds);
120
+
121
+ if (rows.length === 0) return '';
122
+
123
+ // Deduplicate: one entry per doc (keep first = highest confidence)
124
+ const seen = new Set();
125
+ const entries = [];
126
+ for (const r of rows) {
127
+ if (seen.has(r.doc_id)) continue;
128
+ seen.add(r.doc_id);
129
+ entries.push(r);
130
+ }
131
+
132
+ return entries.map(r => {
133
+ const year = r.year ? `**${r.year}**` : '**year unknown**';
134
+ const slug = r.slug ? `[[${r.slug}]]` : shortTitle(r.title);
135
+ let claim = '';
136
+ if (r.subject && r.predicate && r.object) {
137
+ claim = ` — ${r.subject} ${r.predicate} ${r.object}`;
138
+ } else if (r.evidence_text) {
139
+ claim = ` — "${r.evidence_text.slice(0, 120)}"`;
140
+ }
141
+ return `- ${year} ${slug}${claim}`;
142
+ }).join('\n');
143
+ }
144
+
145
+ // ── detectContradictions ──────────────────────────────────────────────────────
146
+
147
+ /**
148
+ * Detect fact pairs where same (subject, predicate) yields different objects
149
+ * across different papers.
150
+ *
151
+ * @param {object} db
152
+ * @param {number[]} docSourceIds
153
+ * @returns {{ slugA, titleA, factA, slugB, titleB, factB }[]}
154
+ */
155
+ function detectContradictions(db, docSourceIds) {
156
+ if (docSourceIds.length < 2) return [];
157
+
158
+ const rows = db.prepare(`
159
+ SELECT
160
+ a.id as id_a, a.subject, a.predicate, a.object as object_a,
161
+ a.evidence_text as ev_a, a.confidence as conf_a,
162
+ b.id as id_b, b.object as object_b,
163
+ b.evidence_text as ev_b, b.confidence as conf_b,
164
+ ds_a.slug as slug_a, ds_a.title as title_a,
165
+ ds_b.slug as slug_b, ds_b.title as title_b
166
+ FROM paper_facts a
167
+ JOIN paper_facts b ON (
168
+ a.subject IS NOT NULL AND a.subject = b.subject AND
169
+ a.predicate IS NOT NULL AND a.predicate = b.predicate AND
170
+ a.object IS NOT NULL AND b.object IS NOT NULL AND
171
+ a.object != b.object AND
172
+ a.doc_source_id < b.doc_source_id
173
+ )
174
+ JOIN doc_sources ds_a ON ds_a.id = a.doc_source_id
175
+ JOIN doc_sources ds_b ON ds_b.id = b.doc_source_id
176
+ WHERE a.doc_source_id IN (${ph(docSourceIds)})
177
+ AND b.doc_source_id IN (${ph(docSourceIds)})
178
+ AND a.fact_type IN ('result','claim','metric')
179
+ AND b.fact_type IN ('result','claim','metric')
180
+ LIMIT 20
181
+ `).all(...docSourceIds, ...docSourceIds);
182
+
183
+ return rows.map(r => ({
184
+ slugA: r.slug_a, titleA: r.title_a,
185
+ factA: { subject: r.subject, predicate: r.predicate, object: r.object_a, evidence: r.ev_a },
186
+ slugB: r.slug_b, titleB: r.title_b,
187
+ factB: { subject: r.subject, predicate: r.predicate, object: r.object_b, evidence: r.ev_b },
188
+ }));
189
+ }
190
+
191
+ // ── buildCoverageReport ───────────────────────────────────────────────────────
192
+
193
+ /**
194
+ * Report which fact types are covered per paper, highlighting gaps.
195
+ *
196
+ * @param {object} db
197
+ * @param {number[]} docSourceIds
198
+ * @returns {string} markdown list
199
+ */
200
+ function buildCoverageReport(db, docSourceIds) {
201
+ if (docSourceIds.length === 0) return '';
202
+
203
+ const rows = db.prepare(`
204
+ SELECT ds.id, ds.title, ds.slug,
205
+ GROUP_CONCAT(DISTINCT pf.fact_type) as covered_types
206
+ FROM doc_sources ds
207
+ LEFT JOIN paper_facts pf ON pf.doc_source_id = ds.id
208
+ WHERE ds.id IN (${ph(docSourceIds)})
209
+ GROUP BY ds.id
210
+ ORDER BY ds.id
211
+ `).all(...docSourceIds);
212
+
213
+ const lines = rows.map(r => {
214
+ const covered = new Set((r.covered_types || '').split(',').filter(Boolean));
215
+ const missing = EXPECTED_TYPES.filter(t => !covered.has(t));
216
+ const covStr = EXPECTED_TYPES.map(t => covered.has(t) ? `✓${t}` : `✗${t}`).join(' ');
217
+ const gapNote = missing.length ? ` — **gaps: ${missing.join(', ')}**` : ' — complete';
218
+ return `- [[${r.slug || '?'}]] ${covStr}${gapNote}`;
219
+ });
220
+
221
+ return lines.join('\n');
222
+ }
223
+
224
+ module.exports = { buildComparisonMatrix, buildTimeline, detectContradictions, buildCoverageReport };