cawdex 1.35.63 → 1.35.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -819,6 +819,12 @@ export interface SourceResearchCoverage {
|
|
|
819
819
|
resultSources: string[];
|
|
820
820
|
topUrls: string[];
|
|
821
821
|
recentDays: number[];
|
|
822
|
+
datedHitCount: number;
|
|
823
|
+
freshHitCount: number;
|
|
824
|
+
staleHitCount: number;
|
|
825
|
+
unknownDateHitCount: number;
|
|
826
|
+
newestDate: string | null;
|
|
827
|
+
oldestDate: string | null;
|
|
822
828
|
freshTargetedCoverage: boolean;
|
|
823
829
|
kaggleCompetitionsSkipped: boolean;
|
|
824
830
|
coverageNotes: string[];
|
package/dist/benchmark-trace.js
CHANGED
|
@@ -2032,9 +2032,17 @@ export function buildBenchmarkTrajectoryQuality(events, usage = emptyBenchmarkUs
|
|
|
2032
2032
|
if (sourceResearchUsed && !sourceResearchCoverage.completeTargetedCoverage) {
|
|
2033
2033
|
warnings.push('source research was partial; targeted benchmark research should cover arXiv, GitHub github_kind:"all", Hugging Face kind:"all", and Kaggle kaggle_kind:"both" when external research is relevant.');
|
|
2034
2034
|
}
|
|
2035
|
-
if (sourceResearchUsed && sourceResearchCoverage.completeTargetedCoverage &&
|
|
2035
|
+
if (sourceResearchUsed && sourceResearchCoverage.completeTargetedCoverage && sourceResearchCoverage.recentDays.length === 0) {
|
|
2036
2036
|
warnings.push('targeted source research omitted recent_days; newest-science and leaderboard work should bound arXiv, GitHub, and Hugging Face recency before relying on external evidence.');
|
|
2037
2037
|
}
|
|
2038
|
+
if (sourceResearchUsed &&
|
|
2039
|
+
sourceResearchCoverage.completeTargetedCoverage &&
|
|
2040
|
+
sourceResearchCoverage.recentDays.length > 0 &&
|
|
2041
|
+
sourceResearchCoverage.sourceHitCount > 0 &&
|
|
2042
|
+
sourceResearchHasFreshnessAccounting(sourceResearchCoverage) &&
|
|
2043
|
+
sourceResearchCoverage.freshHitCount === 0) {
|
|
2044
|
+
warnings.push(`targeted source research requested recent_days but produced no dated fresh hits (${formatSourceCoverage(sourceResearchCoverage)}). Inspect stale or undated source evidence before relying on newest-science claims.`);
|
|
2045
|
+
}
|
|
2038
2046
|
if (sourceResearchUsed && sourceResearchCoverage.sourceHitCount === 0) {
|
|
2039
2047
|
warnings.push('source research produced no parsed source hits; broaden the query or verify endpoint/auth failures before relying on it.');
|
|
2040
2048
|
}
|
|
@@ -2859,9 +2867,17 @@ function buildBenchmarkProcessDefects(input) {
|
|
|
2859
2867
|
if (input.sourceResearchUsed && !input.sourceResearchCoverage.completeTargetedCoverage) {
|
|
2860
2868
|
add('partial_source_research', 'source_research', 'medium', null, 'Source research was used but did not satisfy targeted arXiv/GitHub/Hugging Face/Kaggle coverage.', formatSourceCoverage(input.sourceResearchCoverage));
|
|
2861
2869
|
}
|
|
2862
|
-
if (input.sourceResearchUsed && input.sourceResearchCoverage.completeTargetedCoverage &&
|
|
2870
|
+
if (input.sourceResearchUsed && input.sourceResearchCoverage.completeTargetedCoverage && input.sourceResearchCoverage.recentDays.length === 0) {
|
|
2863
2871
|
add('source_research_missing_recency', 'source_research', 'medium', null, 'Targeted source research did not include a recency window for newest-science evidence.', formatSourceCoverage(input.sourceResearchCoverage));
|
|
2864
2872
|
}
|
|
2873
|
+
if (input.sourceResearchUsed &&
|
|
2874
|
+
input.sourceResearchCoverage.completeTargetedCoverage &&
|
|
2875
|
+
input.sourceResearchCoverage.recentDays.length > 0 &&
|
|
2876
|
+
input.sourceResearchCoverage.sourceHitCount > 0 &&
|
|
2877
|
+
sourceResearchHasFreshnessAccounting(input.sourceResearchCoverage) &&
|
|
2878
|
+
input.sourceResearchCoverage.freshHitCount === 0) {
|
|
2879
|
+
add('source_research_no_fresh_hits', 'source_research', 'medium', null, 'Targeted source research requested a recency window but produced no dated fresh hits.', formatSourceCoverage(input.sourceResearchCoverage));
|
|
2880
|
+
}
|
|
2865
2881
|
if (input.sourceResearchUsed && input.sourceResearchCoverage.sourceHitCount === 0) {
|
|
2866
2882
|
add('source_research_no_hits', 'source_research', 'medium', null, 'Source research produced no parsed source hits.', formatSourceCoverage(input.sourceResearchCoverage));
|
|
2867
2883
|
}
|
|
@@ -3135,6 +3151,12 @@ export function buildSourceResearchCoverage(events) {
|
|
|
3135
3151
|
resultSources: [],
|
|
3136
3152
|
topUrls: [],
|
|
3137
3153
|
recentDays: [],
|
|
3154
|
+
datedHitCount: 0,
|
|
3155
|
+
freshHitCount: 0,
|
|
3156
|
+
staleHitCount: 0,
|
|
3157
|
+
unknownDateHitCount: 0,
|
|
3158
|
+
newestDate: null,
|
|
3159
|
+
oldestDate: null,
|
|
3138
3160
|
freshTargetedCoverage: false,
|
|
3139
3161
|
kaggleCompetitionsSkipped: false,
|
|
3140
3162
|
coverageNotes: [],
|
|
@@ -3194,9 +3216,32 @@ export function buildSourceResearchCoverage(events) {
|
|
|
3194
3216
|
coverage.huggingFaceKinds.includes('all') &&
|
|
3195
3217
|
coverage.kaggleKinds.includes('both') &&
|
|
3196
3218
|
!coverage.kaggleCompetitionsSkipped;
|
|
3197
|
-
coverage.freshTargetedCoverage =
|
|
3219
|
+
coverage.freshTargetedCoverage =
|
|
3220
|
+
coverage.completeTargetedCoverage &&
|
|
3221
|
+
coverage.recentDays.length > 0 &&
|
|
3222
|
+
sourceResearchHasFreshEvidence(coverage);
|
|
3198
3223
|
return coverage;
|
|
3199
3224
|
}
|
|
3225
|
+
function sourceResearchFreshnessAccountingCount(coverage) {
|
|
3226
|
+
return coverage.datedHitCount +
|
|
3227
|
+
coverage.freshHitCount +
|
|
3228
|
+
coverage.staleHitCount +
|
|
3229
|
+
coverage.unknownDateHitCount;
|
|
3230
|
+
}
|
|
3231
|
+
function sourceResearchHasFreshnessAccounting(coverage) {
|
|
3232
|
+
return sourceResearchFreshnessAccountingCount(coverage) > 0;
|
|
3233
|
+
}
|
|
3234
|
+
function sourceResearchHasFreshEvidence(coverage) {
|
|
3235
|
+
if (coverage.sourceHitCount <= 0)
|
|
3236
|
+
return false;
|
|
3237
|
+
if (!sourceResearchHasFreshnessAccounting(coverage)) {
|
|
3238
|
+
// Older traces predate per-hit date accounting. Preserve compatibility:
|
|
3239
|
+
// a recency-bounded targeted query remains "fresh" unless newer evidence
|
|
3240
|
+
// explicitly proves every hit is stale or undated.
|
|
3241
|
+
return true;
|
|
3242
|
+
}
|
|
3243
|
+
return coverage.freshHitCount > 0;
|
|
3244
|
+
}
|
|
3200
3245
|
export function buildBenchmarkSourceMiningCoverage(events) {
|
|
3201
3246
|
const coverage = {
|
|
3202
3247
|
catalogCallCount: 0,
|
|
@@ -6622,6 +6667,19 @@ function collectSourceResearchJsonEvidence(coverage, packet) {
|
|
|
6622
6667
|
coverage.sourceHitCount += digestHitCount;
|
|
6623
6668
|
if (digestErrorCount != null)
|
|
6624
6669
|
coverage.sourceErrorCount += digestErrorCount;
|
|
6670
|
+
const digestDatedCount = numberFromUnknown(digest?.datedHitCount);
|
|
6671
|
+
const digestFreshCount = numberFromUnknown(digest?.freshHitCount);
|
|
6672
|
+
const digestStaleCount = numberFromUnknown(digest?.staleHitCount);
|
|
6673
|
+
const digestUnknownDateCount = numberFromUnknown(digest?.unknownDateHitCount);
|
|
6674
|
+
if (digestDatedCount != null)
|
|
6675
|
+
coverage.datedHitCount += digestDatedCount;
|
|
6676
|
+
if (digestFreshCount != null)
|
|
6677
|
+
coverage.freshHitCount += digestFreshCount;
|
|
6678
|
+
if (digestStaleCount != null)
|
|
6679
|
+
coverage.staleHitCount += digestStaleCount;
|
|
6680
|
+
if (digestUnknownDateCount != null)
|
|
6681
|
+
coverage.unknownDateHitCount += digestUnknownDateCount;
|
|
6682
|
+
mergeSourceResearchDateRange(coverage, stringFromUnknown(digest?.oldestDate), stringFromUnknown(digest?.newestDate));
|
|
6625
6683
|
const digestSources = objectFromUnknown(digest?.sources);
|
|
6626
6684
|
if (digestSources) {
|
|
6627
6685
|
for (const source of Object.keys(digestSources)) {
|
|
@@ -6639,14 +6697,54 @@ function collectSourceResearchJsonEvidence(coverage, packet) {
|
|
|
6639
6697
|
const record = objectFromUnknown(hit);
|
|
6640
6698
|
const source = typeof record?.source === 'string' ? record.source : '';
|
|
6641
6699
|
const url = typeof record?.url === 'string' ? record.url : '';
|
|
6700
|
+
const date = typeof record?.date === 'string' ? record.date : '';
|
|
6642
6701
|
if (source)
|
|
6643
6702
|
pushUnique(coverage.resultSources, normalizeResearchSourceLabel(source));
|
|
6644
6703
|
if (url && coverage.topUrls.length < 12)
|
|
6645
6704
|
pushUnique(coverage.topUrls, url.replace(/[),.;]+$/, ''));
|
|
6705
|
+
if (digestDatedCount == null && digestFreshCount == null && digestStaleCount == null && digestUnknownDateCount == null) {
|
|
6706
|
+
collectSourceResearchHitFreshness(coverage, date, recentDays);
|
|
6707
|
+
}
|
|
6646
6708
|
}
|
|
6647
6709
|
if (digestErrorCount == null)
|
|
6648
6710
|
coverage.sourceErrorCount += stringArrayFromUnknown(packet.errors).length;
|
|
6649
6711
|
}
|
|
6712
|
+
function collectSourceResearchHitFreshness(coverage, date, recentDays) {
|
|
6713
|
+
const normalized = normalizeTraceIsoDate(date);
|
|
6714
|
+
if (!normalized) {
|
|
6715
|
+
coverage.unknownDateHitCount++;
|
|
6716
|
+
return;
|
|
6717
|
+
}
|
|
6718
|
+
coverage.datedHitCount++;
|
|
6719
|
+
mergeSourceResearchDateRange(coverage, normalized, normalized);
|
|
6720
|
+
if (Number.isFinite(recentDays) && recentDays > 0) {
|
|
6721
|
+
const ms = Date.parse(normalized);
|
|
6722
|
+
if (Number.isFinite(ms) && ms >= Date.now() - Math.floor(recentDays) * 24 * 60 * 60 * 1000) {
|
|
6723
|
+
coverage.freshHitCount++;
|
|
6724
|
+
}
|
|
6725
|
+
else {
|
|
6726
|
+
coverage.staleHitCount++;
|
|
6727
|
+
}
|
|
6728
|
+
}
|
|
6729
|
+
}
|
|
6730
|
+
function mergeSourceResearchDateRange(coverage, oldest, newest) {
|
|
6731
|
+
const normalizedOldest = normalizeTraceIsoDate(oldest);
|
|
6732
|
+
const normalizedNewest = normalizeTraceIsoDate(newest);
|
|
6733
|
+
if (normalizedOldest && (!coverage.oldestDate || normalizedOldest < coverage.oldestDate)) {
|
|
6734
|
+
coverage.oldestDate = normalizedOldest;
|
|
6735
|
+
}
|
|
6736
|
+
if (normalizedNewest && (!coverage.newestDate || normalizedNewest > coverage.newestDate)) {
|
|
6737
|
+
coverage.newestDate = normalizedNewest;
|
|
6738
|
+
}
|
|
6739
|
+
}
|
|
6740
|
+
function normalizeTraceIsoDate(value) {
|
|
6741
|
+
if (!value)
|
|
6742
|
+
return undefined;
|
|
6743
|
+
const ms = Date.parse(value);
|
|
6744
|
+
if (!Number.isFinite(ms))
|
|
6745
|
+
return undefined;
|
|
6746
|
+
return new Date(ms).toISOString().slice(0, 10);
|
|
6747
|
+
}
|
|
6650
6748
|
function applySourceCoverageNote(coverage, note) {
|
|
6651
6749
|
const normalized = note.replace(/\s+/g, ' ').trim();
|
|
6652
6750
|
if (!normalized)
|
|
@@ -6762,6 +6860,22 @@ function collectSourceResearchEvidence(coverage, output) {
|
|
|
6762
6860
|
if (authLine) {
|
|
6763
6861
|
applySourceAuthPreview(coverage, authLine[1]);
|
|
6764
6862
|
}
|
|
6863
|
+
const datedHits = line.match(/^-\s+dated_hits:\s+(\d+)/i);
|
|
6864
|
+
if (datedHits)
|
|
6865
|
+
coverage.datedHitCount += Number(datedHits[1]);
|
|
6866
|
+
const freshHits = line.match(/^-\s+fresh_hits:\s+(\d+)/i);
|
|
6867
|
+
if (freshHits)
|
|
6868
|
+
coverage.freshHitCount += Number(freshHits[1]);
|
|
6869
|
+
const staleHits = line.match(/^-\s+stale_hits:\s+(\d+)/i);
|
|
6870
|
+
if (staleHits)
|
|
6871
|
+
coverage.staleHitCount += Number(staleHits[1]);
|
|
6872
|
+
const unknownDateHits = line.match(/^-\s+unknown_date_hits:\s+(\d+)/i);
|
|
6873
|
+
if (unknownDateHits)
|
|
6874
|
+
coverage.unknownDateHitCount += Number(unknownDateHits[1]);
|
|
6875
|
+
const dateRange = line.match(/^-\s+date_range:\s+(\d{4}-\d{2}-\d{2})\.\.(\d{4}-\d{2}-\d{2})/i);
|
|
6876
|
+
if (dateRange) {
|
|
6877
|
+
mergeSourceResearchDateRange(coverage, dateRange[1], dateRange[2]);
|
|
6878
|
+
}
|
|
6765
6879
|
const url = line.match(/^https?:\/\/\S+/i)?.[0];
|
|
6766
6880
|
if (url && coverage.topUrls.length < 12) {
|
|
6767
6881
|
pushUnique(coverage.topUrls, url.replace(/[),.;]+$/, ''));
|
|
@@ -6814,6 +6928,7 @@ export function buildBenchmarkCompletionReminder(events, usageEvents = [], messa
|
|
|
6814
6928
|
|| warning.includes('multiple full skill prompts')
|
|
6815
6929
|
|| warning.includes('source research was partial')
|
|
6816
6930
|
|| warning.includes('targeted source research omitted recent_days')
|
|
6931
|
+
|| warning.includes('targeted source research requested recent_days but produced no dated fresh hits')
|
|
6817
6932
|
|| warning.includes('source research produced no parsed source hits')
|
|
6818
6933
|
|| warning.includes('source research reported')
|
|
6819
6934
|
|| warning.includes('Kaggle competition research')
|
|
@@ -7438,6 +7553,9 @@ function numberFromUnknown(value) {
|
|
|
7438
7553
|
const n = Number(value);
|
|
7439
7554
|
return Number.isFinite(n) ? n : null;
|
|
7440
7555
|
}
|
|
7556
|
+
function stringFromUnknown(value) {
|
|
7557
|
+
return typeof value === 'string' ? value : undefined;
|
|
7558
|
+
}
|
|
7441
7559
|
function booleanFromUnknown(value) {
|
|
7442
7560
|
return typeof value === 'boolean' ? value : null;
|
|
7443
7561
|
}
|
|
@@ -7489,6 +7607,11 @@ function formatSourceCoverage(coverage) {
|
|
|
7489
7607
|
huggingFace,
|
|
7490
7608
|
kaggle,
|
|
7491
7609
|
coverage.recentDays.length ? `recent_days:${coverage.recentDays.join('|')}` : 'recent_days:none',
|
|
7610
|
+
`dated_hits:${coverage.datedHitCount}`,
|
|
7611
|
+
`fresh_hits:${coverage.freshHitCount}`,
|
|
7612
|
+
`stale_hits:${coverage.staleHitCount}`,
|
|
7613
|
+
`unknown_date_hits:${coverage.unknownDateHitCount}`,
|
|
7614
|
+
coverage.oldestDate && coverage.newestDate ? `date_range:${coverage.oldestDate}..${coverage.newestDate}` : null,
|
|
7492
7615
|
coverage.resultSources.length ? `result_sources:${coverage.resultSources.slice(0, 8).join('|')}` : null,
|
|
7493
7616
|
authSignals ? `auth:${authSignals}` : null,
|
|
7494
7617
|
coverage.kaggleCompetitionsSkipped ? 'kaggle_competitions:skipped' : null,
|
|
@@ -8092,6 +8215,11 @@ function formatResearchSourcesJsonPreviewLines(packet) {
|
|
|
8092
8215
|
`- sources: ${sourceSummary}`,
|
|
8093
8216
|
authPreview ? `- auth: ${authPreview}` : null,
|
|
8094
8217
|
`- top_urls: ${topUrls.length ? topUrls.join(' | ') : 'none'}`,
|
|
8218
|
+
`- dated_hits: ${numberFromUnknown(digest?.datedHitCount) ?? 0}`,
|
|
8219
|
+
`- fresh_hits: ${numberFromUnknown(digest?.freshHitCount) ?? 0}`,
|
|
8220
|
+
`- stale_hits: ${numberFromUnknown(digest?.staleHitCount) ?? 0}`,
|
|
8221
|
+
`- unknown_date_hits: ${numberFromUnknown(digest?.unknownDateHitCount) ?? 0}`,
|
|
8222
|
+
`- date_range: ${stringFromUnknown(digest?.oldestDate) && stringFromUnknown(digest?.newestDate) ? `${stringFromUnknown(digest?.oldestDate)}..${stringFromUnknown(digest?.newestDate)}` : 'none'}`,
|
|
8095
8223
|
].filter((line) => line != null);
|
|
8096
8224
|
lines.push('', ...digestLines);
|
|
8097
8225
|
for (const hit of arrayFromUnknown(packet.hits)) {
|
|
@@ -8106,6 +8234,9 @@ function formatResearchSourcesJsonPreviewLines(packet) {
|
|
|
8106
8234
|
lines.push('', `## ${source}: ${title}`);
|
|
8107
8235
|
if (url)
|
|
8108
8236
|
lines.push(url);
|
|
8237
|
+
if (typeof record.date === 'string' && record.date.trim()) {
|
|
8238
|
+
lines.push(`date ${record.date.trim()}`);
|
|
8239
|
+
}
|
|
8109
8240
|
}
|
|
8110
8241
|
const errors = stringArrayFromUnknown(packet.errors);
|
|
8111
8242
|
if (errors.length > 0) {
|