@maintainabilityai/research-runner 0.1.11 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -30,6 +30,19 @@ function canonicalizeUrl(rawUrl) {
|
|
|
30
30
|
return rawUrl.trim().toLowerCase();
|
|
31
31
|
}
|
|
32
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Per-provider quota for the top-N output. Without these floors, Tavily
|
|
35
|
+
* (normalized scores 0.9–1.0) crushes every other provider in pure
|
|
36
|
+
* global ranking — synth would see zero HN signal and zero patent
|
|
37
|
+
* coverage. Quotas sum to topN's default (20). Any unused slack
|
|
38
|
+
* spills over to the highest-scoring non-quota entries across providers.
|
|
39
|
+
*/
|
|
40
|
+
const PROVIDER_QUOTA = {
|
|
41
|
+
tavily: 8,
|
|
42
|
+
arxiv: 5,
|
|
43
|
+
uspto: 4,
|
|
44
|
+
hackernews: 3,
|
|
45
|
+
};
|
|
33
46
|
function dedupeAndRank(opts) {
|
|
34
47
|
const topN = opts.topN ?? 20;
|
|
35
48
|
const retrievedAt = opts.retrievedAt ?? new Date().toISOString();
|
|
@@ -72,12 +85,46 @@ function dedupeAndRank(opts) {
|
|
|
72
85
|
});
|
|
73
86
|
}
|
|
74
87
|
}
|
|
75
|
-
const
|
|
76
|
-
.map(a => {
|
|
88
|
+
const allEntries = [...bucket.values()].map(a => {
|
|
77
89
|
const recall = 1 + 0.15 * (a.queries.size - 1);
|
|
78
90
|
const composite = Math.min(1, a.scoreSum * recall / Math.max(1, a.occurrences));
|
|
79
91
|
return { aggregated: a, composite };
|
|
80
|
-
})
|
|
92
|
+
});
|
|
93
|
+
// Phase 1 — per-provider quota: take each provider's top-K (K from PROVIDER_QUOTA).
|
|
94
|
+
// Phase 2 — spillover: fill the remaining budget with the next-highest entries
|
|
95
|
+
// from anywhere, including providers that have already filled their quota.
|
|
96
|
+
// Phase 3 — re-sort the combined set by composite score for stable display order.
|
|
97
|
+
const used = new Set();
|
|
98
|
+
const picks = [];
|
|
99
|
+
for (const provider of Object.keys(PROVIDER_QUOTA)) {
|
|
100
|
+
const k = PROVIDER_QUOTA[provider];
|
|
101
|
+
if (k === 0) {
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
const fromProvider = allEntries
|
|
105
|
+
.filter(e => e.aggregated.provider === provider)
|
|
106
|
+
.sort((a, b) => b.composite - a.composite)
|
|
107
|
+
.slice(0, k);
|
|
108
|
+
for (const e of fromProvider) {
|
|
109
|
+
if (used.has(e.aggregated.canonicalUrl)) {
|
|
110
|
+
continue;
|
|
111
|
+
}
|
|
112
|
+
picks.push(e);
|
|
113
|
+
used.add(e.aggregated.canonicalUrl);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
const remainingBudget = Math.max(0, topN - picks.length);
|
|
117
|
+
if (remainingBudget > 0) {
|
|
118
|
+
const spillover = allEntries
|
|
119
|
+
.filter(e => !used.has(e.aggregated.canonicalUrl))
|
|
120
|
+
.sort((a, b) => b.composite - a.composite)
|
|
121
|
+
.slice(0, remainingBudget);
|
|
122
|
+
for (const e of spillover) {
|
|
123
|
+
picks.push(e);
|
|
124
|
+
used.add(e.aggregated.canonicalUrl);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
const ranked = picks
|
|
81
128
|
.sort((a, b) => b.composite - a.composite)
|
|
82
129
|
.slice(0, topN);
|
|
83
130
|
return ranked.map((entry, i) => ({
|
|
@@ -46,7 +46,11 @@ function providerSection(label, emoji, provider, sources, totalCount) {
|
|
|
46
46
|
for (const s of sources) {
|
|
47
47
|
const authors = s.authors && s.authors.length > 0 ? ` — _${s.authors.slice(0, 3).join(', ')}${s.authors.length > 3 ? ' et al.' : ''}_` : '';
|
|
48
48
|
const date = s.published_at ? ` _(${s.published_at.slice(0, 10)})_` : '';
|
|
49
|
-
|
|
49
|
+
// Render the citation id as standalone inline code so the synth
|
|
50
|
+
// agent (and a downstream PRD agent) can grep `\bS\d+\b` cleanly.
|
|
51
|
+
// The earlier form **[`S1`] [Title](url)** broke GitHub's markdown
|
|
52
|
+
// parser (it tried to interpret the brackets as a reference link).
|
|
53
|
+
lines.push(`- \`${s.id}\` **[${s.title}](${s.url})** — score ${s.salience_score.toFixed(2)}${date}${authors}`);
|
|
50
54
|
lines.push(` > ${shortExcerpt(s.excerpt)}`);
|
|
51
55
|
}
|
|
52
56
|
lines.push('');
|
|
@@ -159,6 +163,6 @@ function formatForHuman(opts) {
|
|
|
159
163
|
lines.push('');
|
|
160
164
|
lines.push('---');
|
|
161
165
|
lines.push('');
|
|
162
|
-
lines.push(`🤖 Generated by \`research-runner archeologist
|
|
166
|
+
lines.push(`🤖 Generated by \`research-runner archeologist\`. Run id: \`${runId}\` (see the Hatter's Tag for agent version + audit chain).`);
|
|
163
167
|
return { body: lines.join('\n') };
|
|
164
168
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@maintainabilityai/research-runner",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.12",
|
|
4
4
|
"description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "MaintainabilityAI",
|