cawdex 1.35.64 → 1.35.66
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/benchmark-trace.js
CHANGED
|
@@ -2032,9 +2032,17 @@ export function buildBenchmarkTrajectoryQuality(events, usage = emptyBenchmarkUs
|
|
|
2032
2032
|
if (sourceResearchUsed && !sourceResearchCoverage.completeTargetedCoverage) {
|
|
2033
2033
|
warnings.push('source research was partial; targeted benchmark research should cover arXiv, GitHub github_kind:"all", Hugging Face kind:"all", and Kaggle kaggle_kind:"both" when external research is relevant.');
|
|
2034
2034
|
}
|
|
2035
|
-
if (sourceResearchUsed && sourceResearchCoverage.completeTargetedCoverage &&
|
|
2035
|
+
if (sourceResearchUsed && sourceResearchCoverage.completeTargetedCoverage && sourceResearchCoverage.recentDays.length === 0) {
|
|
2036
2036
|
warnings.push('targeted source research omitted recent_days; newest-science and leaderboard work should bound arXiv, GitHub, and Hugging Face recency before relying on external evidence.');
|
|
2037
2037
|
}
|
|
2038
|
+
if (sourceResearchUsed &&
|
|
2039
|
+
sourceResearchCoverage.completeTargetedCoverage &&
|
|
2040
|
+
sourceResearchCoverage.recentDays.length > 0 &&
|
|
2041
|
+
sourceResearchCoverage.sourceHitCount > 0 &&
|
|
2042
|
+
sourceResearchHasFreshnessAccounting(sourceResearchCoverage) &&
|
|
2043
|
+
sourceResearchCoverage.freshHitCount === 0) {
|
|
2044
|
+
warnings.push(`targeted source research requested recent_days but produced no dated fresh hits (${formatSourceCoverage(sourceResearchCoverage)}). Inspect stale or undated source evidence before relying on newest-science claims.`);
|
|
2045
|
+
}
|
|
2038
2046
|
if (sourceResearchUsed && sourceResearchCoverage.sourceHitCount === 0) {
|
|
2039
2047
|
warnings.push('source research produced no parsed source hits; broaden the query or verify endpoint/auth failures before relying on it.');
|
|
2040
2048
|
}
|
|
@@ -2859,9 +2867,17 @@ function buildBenchmarkProcessDefects(input) {
|
|
|
2859
2867
|
if (input.sourceResearchUsed && !input.sourceResearchCoverage.completeTargetedCoverage) {
|
|
2860
2868
|
add('partial_source_research', 'source_research', 'medium', null, 'Source research was used but did not satisfy targeted arXiv/GitHub/Hugging Face/Kaggle coverage.', formatSourceCoverage(input.sourceResearchCoverage));
|
|
2861
2869
|
}
|
|
2862
|
-
if (input.sourceResearchUsed && input.sourceResearchCoverage.completeTargetedCoverage &&
|
|
2870
|
+
if (input.sourceResearchUsed && input.sourceResearchCoverage.completeTargetedCoverage && input.sourceResearchCoverage.recentDays.length === 0) {
|
|
2863
2871
|
add('source_research_missing_recency', 'source_research', 'medium', null, 'Targeted source research did not include a recency window for newest-science evidence.', formatSourceCoverage(input.sourceResearchCoverage));
|
|
2864
2872
|
}
|
|
2873
|
+
if (input.sourceResearchUsed &&
|
|
2874
|
+
input.sourceResearchCoverage.completeTargetedCoverage &&
|
|
2875
|
+
input.sourceResearchCoverage.recentDays.length > 0 &&
|
|
2876
|
+
input.sourceResearchCoverage.sourceHitCount > 0 &&
|
|
2877
|
+
sourceResearchHasFreshnessAccounting(input.sourceResearchCoverage) &&
|
|
2878
|
+
input.sourceResearchCoverage.freshHitCount === 0) {
|
|
2879
|
+
add('source_research_no_fresh_hits', 'source_research', 'medium', null, 'Targeted source research requested a recency window but produced no dated fresh hits.', formatSourceCoverage(input.sourceResearchCoverage));
|
|
2880
|
+
}
|
|
2865
2881
|
if (input.sourceResearchUsed && input.sourceResearchCoverage.sourceHitCount === 0) {
|
|
2866
2882
|
add('source_research_no_hits', 'source_research', 'medium', null, 'Source research produced no parsed source hits.', formatSourceCoverage(input.sourceResearchCoverage));
|
|
2867
2883
|
}
|
|
@@ -3200,9 +3216,32 @@ export function buildSourceResearchCoverage(events) {
|
|
|
3200
3216
|
coverage.huggingFaceKinds.includes('all') &&
|
|
3201
3217
|
coverage.kaggleKinds.includes('both') &&
|
|
3202
3218
|
!coverage.kaggleCompetitionsSkipped;
|
|
3203
|
-
coverage.freshTargetedCoverage =
|
|
3219
|
+
coverage.freshTargetedCoverage =
|
|
3220
|
+
coverage.completeTargetedCoverage &&
|
|
3221
|
+
coverage.recentDays.length > 0 &&
|
|
3222
|
+
sourceResearchHasFreshEvidence(coverage);
|
|
3204
3223
|
return coverage;
|
|
3205
3224
|
}
|
|
3225
|
+
function sourceResearchFreshnessAccountingCount(coverage) {
|
|
3226
|
+
return coverage.datedHitCount +
|
|
3227
|
+
coverage.freshHitCount +
|
|
3228
|
+
coverage.staleHitCount +
|
|
3229
|
+
coverage.unknownDateHitCount;
|
|
3230
|
+
}
|
|
3231
|
+
function sourceResearchHasFreshnessAccounting(coverage) {
|
|
3232
|
+
return sourceResearchFreshnessAccountingCount(coverage) > 0;
|
|
3233
|
+
}
|
|
3234
|
+
function sourceResearchHasFreshEvidence(coverage) {
|
|
3235
|
+
if (coverage.sourceHitCount <= 0)
|
|
3236
|
+
return false;
|
|
3237
|
+
if (!sourceResearchHasFreshnessAccounting(coverage)) {
|
|
3238
|
+
// Older traces predate per-hit date accounting. Preserve compatibility:
|
|
3239
|
+
// a recency-bounded targeted query remains "fresh" unless newer evidence
|
|
3240
|
+
// explicitly proves every hit is stale or undated.
|
|
3241
|
+
return true;
|
|
3242
|
+
}
|
|
3243
|
+
return coverage.freshHitCount > 0;
|
|
3244
|
+
}
|
|
3206
3245
|
export function buildBenchmarkSourceMiningCoverage(events) {
|
|
3207
3246
|
const coverage = {
|
|
3208
3247
|
catalogCallCount: 0,
|
|
@@ -6889,6 +6928,7 @@ export function buildBenchmarkCompletionReminder(events, usageEvents = [], messa
|
|
|
6889
6928
|
|| warning.includes('multiple full skill prompts')
|
|
6890
6929
|
|| warning.includes('source research was partial')
|
|
6891
6930
|
|| warning.includes('targeted source research omitted recent_days')
|
|
6931
|
+
|| warning.includes('targeted source research requested recent_days but produced no dated fresh hits')
|
|
6892
6932
|
|| warning.includes('source research produced no parsed source hits')
|
|
6893
6933
|
|| warning.includes('source research reported')
|
|
6894
6934
|
|| warning.includes('Kaggle competition research')
|