med-pdf-nmo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +298 -0
- package/README.ru.md +298 -0
- package/dist/bm25.d.ts +47 -0
- package/dist/bm25.js +86 -0
- package/dist/browser-shims/buffer.d.ts +30 -0
- package/dist/browser-shims/buffer.js +31 -0
- package/dist/browser-shims/crypto.d.ts +33 -0
- package/dist/browser-shims/crypto.js +45 -0
- package/dist/browser-shims/fs-promises.d.ts +13 -0
- package/dist/browser-shims/fs-promises.js +25 -0
- package/dist/browser-shims/fs.d.ts +14 -0
- package/dist/browser-shims/fs.js +24 -0
- package/dist/browser-shims/globals.d.ts +9 -0
- package/dist/browser-shims/globals.js +23 -0
- package/dist/browser-shims/path.d.ts +57 -0
- package/dist/browser-shims/path.js +65 -0
- package/dist/browser-shims/process.d.ts +22 -0
- package/dist/browser-shims/process.js +27 -0
- package/dist/browser.d.ts +9 -0
- package/dist/browser.js +12 -0
- package/dist/chunk.d.ts +15 -0
- package/dist/chunk.js +76 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +87 -0
- package/dist/index.d.ts +82 -0
- package/dist/index.js +51 -0
- package/dist/med-pdf-nmo.browser.js +40413 -0
- package/dist/med-pdf-nmo.browser.mjs +40395 -0
- package/dist/normalize.d.ts +73 -0
- package/dist/normalize.js +477 -0
- package/dist/pdf.d.ts +35 -0
- package/dist/pdf.js +396 -0
- package/dist/predictor/config.d.ts +28 -0
- package/dist/predictor/config.js +26 -0
- package/dist/predictor/constants.d.ts +3 -0
- package/dist/predictor/constants.js +59 -0
- package/dist/predictor/runtime.d.ts +15 -0
- package/dist/predictor/runtime.js +59 -0
- package/dist/predictor/scorers/biomedical-symbols.d.ts +36 -0
- package/dist/predictor/scorers/biomedical-symbols.js +347 -0
- package/dist/predictor/scorers/coordinate-table.d.ts +82 -0
- package/dist/predictor/scorers/coordinate-table.js +1210 -0
- package/dist/predictor/scorers/direction.d.ts +71 -0
- package/dist/predictor/scorers/direction.js +345 -0
- package/dist/predictor/scorers/drug-dose.d.ts +6 -0
- package/dist/predictor/scorers/drug-dose.js +221 -0
- package/dist/predictor/scorers/exact-answer.d.ts +10 -0
- package/dist/predictor/scorers/exact-answer.js +75 -0
- package/dist/predictor/scorers/fibrosis-stage.d.ts +6 -0
- package/dist/predictor/scorers/fibrosis-stage.js +103 -0
- package/dist/predictor/scorers/focused.d.ts +40 -0
- package/dist/predictor/scorers/focused.js +204 -0
- package/dist/predictor/scorers/frequency.d.ts +10 -0
- package/dist/predictor/scorers/frequency.js +203 -0
- package/dist/predictor/scorers/numeric.d.ts +77 -0
- package/dist/predictor/scorers/numeric.js +1161 -0
- package/dist/predictor/scorers/recommendation-item.d.ts +27 -0
- package/dist/predictor/scorers/recommendation-item.js +469 -0
- package/dist/predictor/scorers/search.d.ts +41 -0
- package/dist/predictor/scorers/search.js +515 -0
- package/dist/predictor/selection.d.ts +30 -0
- package/dist/predictor/selection.js +370 -0
- package/dist/predictor/text-utils.d.ts +49 -0
- package/dist/predictor/text-utils.js +497 -0
- package/dist/predictor/types.d.ts +23 -0
- package/dist/predictor/types.js +1 -0
- package/dist/predictor.d.ts +52 -0
- package/dist/predictor.js +3834 -0
- package/package.json +82 -0
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
import { normalizeForSearch, tokenize } from "../normalize.js";
|
|
2
|
+
const STRUCTURAL_EVIDENCE_WEIGHTS = new Map(Object.entries({
|
|
3
|
+
coordinate_table_row: 1.25,
|
|
4
|
+
coordinate_table_group: 1.25,
|
|
5
|
+
coordinate_table_group_inverse: 1.25,
|
|
6
|
+
coordinate_table_multicell_row: 1.25,
|
|
7
|
+
coordinate_table_membership: 1.15,
|
|
8
|
+
parenthetical_group_segment: 1.05,
|
|
9
|
+
preceding_question_label: 1.05,
|
|
10
|
+
question_continuation_list: 1.05,
|
|
11
|
+
exact_numeric_option_segment: 1.05,
|
|
12
|
+
exact_hour_alias_segment: 1.05,
|
|
13
|
+
short_medical_alias_segment: 0.9,
|
|
14
|
+
visual_table_column: 1.2,
|
|
15
|
+
exact_short_label_visual_row: 1.15,
|
|
16
|
+
short_label_visual_row: 1.05,
|
|
17
|
+
answer_ordinal_row: 1.05,
|
|
18
|
+
fibrosis_stage_row: 1.2,
|
|
19
|
+
gene_sentence_segment: 1.1,
|
|
20
|
+
clinical_feature_segment: 1.0,
|
|
21
|
+
mkb_class_exclusion_absent: 1.0,
|
|
22
|
+
classification_code_segment: 1.15,
|
|
23
|
+
label_number_proximity: 1.0,
|
|
24
|
+
label_definition_segment: 1.0,
|
|
25
|
+
row_label_segment: 0.95,
|
|
26
|
+
bounded_list_segment: 0.95,
|
|
27
|
+
ordinal_list_segment: 0.9,
|
|
28
|
+
drug_dose_segment: 0.9,
|
|
29
|
+
recommendation_item_segment: 0.85,
|
|
30
|
+
explicit_recommendation_target_segment: 0.85,
|
|
31
|
+
numeric_condition_less_than: 0.85,
|
|
32
|
+
numeric_condition_more_than: 0.85,
|
|
33
|
+
numeric_condition_equal: 0.85,
|
|
34
|
+
conditioned_number_segment: 0.8,
|
|
35
|
+
cloze_gap_local: 0.8,
|
|
36
|
+
}));
|
|
37
|
+
const BROAD_EVIDENCE_KINDS = new Set([
|
|
38
|
+
"bm25_question_answer",
|
|
39
|
+
"question_chunk_answer",
|
|
40
|
+
"answer_chunk_question",
|
|
41
|
+
"answer_window",
|
|
42
|
+
"focused_answer_window",
|
|
43
|
+
"shared_multi_segment",
|
|
44
|
+
]);
|
|
45
|
+
const NOISY_SHARED_EVIDENCE_KINDS = new Set(["question_chunk_answer", "bm25_question_answer", "shared_multi_segment"]);
|
|
46
|
+
/**
|
|
47
|
+
* Преобразует raw score вариантов в относительные confidence-like score.
|
|
48
|
+
*/
|
|
49
|
+
export function calibrateScores(answerScores) {
|
|
50
|
+
const rawValues = answerScores.map((item) => item.raw);
|
|
51
|
+
const max = Math.max(...rawValues, 0.0001);
|
|
52
|
+
const min = Math.min(...rawValues, 0);
|
|
53
|
+
const span = Math.max(0.0001, max - min);
|
|
54
|
+
const expValues = rawValues.map((value) => Math.exp((value - max) / 2.2));
|
|
55
|
+
const expSum = expValues.reduce((sum, value) => sum + value, 0) || 1;
|
|
56
|
+
return answerScores.map((item, index) => {
|
|
57
|
+
const relative = (item.raw - min) / span;
|
|
58
|
+
const probability = expValues[index] / expSum;
|
|
59
|
+
const confidence = Math.max(probability, relative * 0.88);
|
|
60
|
+
return { ...item, score: round4(confidence), relative };
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Применяет зафиксированные post-scoring корректировки, оставленные после
|
|
65
|
+
* валидационных прогонов.
|
|
66
|
+
*
|
|
67
|
+
* Это по-прежнему детерминированная non-LLM логика: она использует только
|
|
68
|
+
* признаки score/evidence, полученные эвристическими scorers.
|
|
69
|
+
*/
|
|
70
|
+
export function applyFrozenFeatureRanker(answerScores, mode, config, context = {}) {
|
|
71
|
+
if (!config.frozenFeatureRanker)
|
|
72
|
+
return answerScores;
|
|
73
|
+
const allowMultiPruning = mode === "multi" && multiPruningAllowed(context.question ?? "");
|
|
74
|
+
const ranked = answerScores.map((item) => {
|
|
75
|
+
const summary = summarizeEvidence(item);
|
|
76
|
+
let raw = item.raw;
|
|
77
|
+
if (summary.bestStructuralScore >= 10) {
|
|
78
|
+
raw += Math.min(1.1, summary.bestStructuralScore * 0.035 * summary.structuralWeight);
|
|
79
|
+
}
|
|
80
|
+
if (allowMultiPruning && summary.broadOnly && item.raw >= 8) {
|
|
81
|
+
raw *= 0.985;
|
|
82
|
+
}
|
|
83
|
+
if (allowMultiPruning && summary.noisySharedOnly && item.raw >= 8) {
|
|
84
|
+
raw *= 0.975;
|
|
85
|
+
}
|
|
86
|
+
return { ...item, raw };
|
|
87
|
+
});
|
|
88
|
+
const contrasted = config.pairwiseContrastRanker ? applyPairwiseContrast(ranked, mode) : ranked;
|
|
89
|
+
return config.structuralClusterAdjustments ? applyStructuralClusterAdjustments(contrasted, mode, allowMultiPruning) : contrasted;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Выбирает финальные id ответов из калиброванных score для single/multi режима.
|
|
93
|
+
*/
|
|
94
|
+
export function selectAnswers(scored, mode, config) {
|
|
95
|
+
const sorted = [...scored].sort((a, b) => b.raw - a.raw);
|
|
96
|
+
if (mode === "single")
|
|
97
|
+
return selectSingleAnswer(sorted, config);
|
|
98
|
+
const maxRaw = sorted[0]?.raw ?? 0;
|
|
99
|
+
const minRaw = sorted[sorted.length - 1]?.raw ?? 0;
|
|
100
|
+
const span = Math.max(0.0001, maxRaw - minRaw);
|
|
101
|
+
const threshold = Math.max(config.multiAbsoluteThreshold, maxRaw * config.multiRelativeThreshold);
|
|
102
|
+
let selected = sorted
|
|
103
|
+
.filter((item) => {
|
|
104
|
+
const gapRelative = (item.raw - minRaw) / span;
|
|
105
|
+
return item.raw >= threshold || gapRelative >= config.multiGapThreshold;
|
|
106
|
+
})
|
|
107
|
+
.map((item) => item.answer.id);
|
|
108
|
+
if (config.multiMinAnswers > 1 && selected.length < config.multiMinAnswers && sorted.length >= config.multiMinAnswers) {
|
|
109
|
+
selected = sorted.slice(0, config.multiMinAnswers).map((item) => item.answer.id);
|
|
110
|
+
}
|
|
111
|
+
if (selected.length === 2 &&
|
|
112
|
+
sorted.length >= 3 &&
|
|
113
|
+
sorted[1].raw - sorted[2].raw <= config.multiThirdGapThreshold &&
|
|
114
|
+
sorted[2].raw >= maxRaw * config.multiThirdRelativeThreshold) {
|
|
115
|
+
selected = sorted.slice(0, 3).map((item) => item.answer.id);
|
|
116
|
+
}
|
|
117
|
+
if (config.multiAllOptionsGuard) {
|
|
118
|
+
selected = applyMultiAllOptionsGuard(sorted, selected, scored);
|
|
119
|
+
}
|
|
120
|
+
if (config.multiCrowdedTailGuard) {
|
|
121
|
+
selected = applyMultiCrowdedTailGuard(sorted, selected);
|
|
122
|
+
}
|
|
123
|
+
if (config.multiCardinalityModel) {
|
|
124
|
+
selected = applyMultiCardinalityModel(sorted, selected, scored);
|
|
125
|
+
}
|
|
126
|
+
if (config.multiCardinalityModel) {
|
|
127
|
+
selected = applyStructuralEvidenceGroupCompletion(sorted, selected, scored);
|
|
128
|
+
}
|
|
129
|
+
selected = dedupeSelectedByAnswerText(selected, sorted);
|
|
130
|
+
if (!selected.length && sorted.length)
|
|
131
|
+
selected = [sorted[0].answer.id];
|
|
132
|
+
return selected.sort((a, b) => scored.findIndex((item) => item.answer.id === a) - scored.findIndex((item) => item.answer.id === b));
|
|
133
|
+
}
|
|
134
|
+
function selectSingleAnswer(sorted, config) {
|
|
135
|
+
const top = sorted[0];
|
|
136
|
+
const second = sorted[1];
|
|
137
|
+
if (!top)
|
|
138
|
+
return [];
|
|
139
|
+
if (!config.singleSpecificityTieBreak || !second)
|
|
140
|
+
return [top.answer.id];
|
|
141
|
+
const rawGap = top.raw - second.raw;
|
|
142
|
+
const rawRatio = second.raw / Math.max(0.001, top.raw);
|
|
143
|
+
if (rawGap > config.singleTieMaxRawGap || rawRatio < config.singleTieMinRawRatio)
|
|
144
|
+
return [top.answer.id];
|
|
145
|
+
const specificityGap = answerSpecificityScore(second.answer.text) - answerSpecificityScore(top.answer.text);
|
|
146
|
+
if (specificityGap >= config.singleTieSpecificityGap)
|
|
147
|
+
return [second.answer.id];
|
|
148
|
+
return [top.answer.id];
|
|
149
|
+
}
|
|
150
|
+
const SINGLE_TIE_NEGATION_CUES = ["\u043d\u0435", "\u043e\u0442\u0441\u0443\u0442\u0441\u0442", "\u043d\u0435\u0432\u044b\u043f"].map((item) => normalizeForSearch(item));
|
|
151
|
+
function answerSpecificityScore(answerText) {
|
|
152
|
+
const normalized = normalizeForSearch(answerText);
|
|
153
|
+
const negation = SINGLE_TIE_NEGATION_CUES.some((cue) => normalized.includes(cue)) ? 0.5 : 0;
|
|
154
|
+
return normalized.length * 0.02 + tokenize(answerText).length * 0.4 + negation;
|
|
155
|
+
}
|
|
156
|
+
function summarizeEvidence(item) {
|
|
157
|
+
let bestStructuralScore = 0;
|
|
158
|
+
let structuralWeight = 0;
|
|
159
|
+
let broadCount = 0;
|
|
160
|
+
let noisySharedCount = 0;
|
|
161
|
+
let bestKind = "";
|
|
162
|
+
let bestScore = 0;
|
|
163
|
+
for (const evidence of item.evidence ?? []) {
|
|
164
|
+
const score = evidence.score ?? 0;
|
|
165
|
+
if (score > bestScore) {
|
|
166
|
+
bestScore = score;
|
|
167
|
+
bestKind = evidence.kind;
|
|
168
|
+
}
|
|
169
|
+
const weight = STRUCTURAL_EVIDENCE_WEIGHTS.get(evidence.kind) ?? 0;
|
|
170
|
+
if (weight > 0) {
|
|
171
|
+
bestStructuralScore = Math.max(bestStructuralScore, score);
|
|
172
|
+
structuralWeight = Math.max(structuralWeight, weight);
|
|
173
|
+
}
|
|
174
|
+
if (BROAD_EVIDENCE_KINDS.has(evidence.kind))
|
|
175
|
+
broadCount += 1;
|
|
176
|
+
if (NOISY_SHARED_EVIDENCE_KINDS.has(evidence.kind))
|
|
177
|
+
noisySharedCount += 1;
|
|
178
|
+
}
|
|
179
|
+
return {
|
|
180
|
+
bestKind,
|
|
181
|
+
bestScore,
|
|
182
|
+
bestStructuralScore,
|
|
183
|
+
structuralWeight,
|
|
184
|
+
broadCount,
|
|
185
|
+
noisySharedCount,
|
|
186
|
+
hasStructural: bestStructuralScore > 0,
|
|
187
|
+
broadOnly: broadCount > 0 && bestStructuralScore <= 0,
|
|
188
|
+
noisySharedOnly: noisySharedCount > 0 && bestStructuralScore <= 0,
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
function applyPairwiseContrast(answerScores, mode) {
|
|
192
|
+
const sorted = [...answerScores].sort((a, b) => b.raw - a.raw);
|
|
193
|
+
if (mode !== "single" || sorted.length < 2)
|
|
194
|
+
return answerScores;
|
|
195
|
+
const top = sorted[0];
|
|
196
|
+
const second = sorted[1];
|
|
197
|
+
const rawGap = top.raw - second.raw;
|
|
198
|
+
const rawRatio = second.raw / Math.max(0.001, top.raw);
|
|
199
|
+
if (rawGap > 0.85 || rawRatio < 0.965)
|
|
200
|
+
return answerScores;
|
|
201
|
+
const topSummary = summarizeEvidence(top);
|
|
202
|
+
const secondSummary = summarizeEvidence(second);
|
|
203
|
+
const structuralAdvantage = secondSummary.bestStructuralScore * Math.max(0.75, secondSummary.structuralWeight) -
|
|
204
|
+
topSummary.bestStructuralScore * Math.max(0.75, topSummary.structuralWeight);
|
|
205
|
+
if (structuralAdvantage < 3.8 || secondSummary.bestStructuralScore < 9)
|
|
206
|
+
return answerScores;
|
|
207
|
+
return answerScores.map((item) => item.answer.id === second.answer.id ? { ...item, raw: item.raw + Math.min(0.7, structuralAdvantage * 0.08) } : item);
|
|
208
|
+
}
|
|
209
|
+
function applyStructuralClusterAdjustments(answerScores, mode, allowMultiPruning) {
|
|
210
|
+
if (mode !== "multi" || !allowMultiPruning || answerScores.length < 4)
|
|
211
|
+
return answerScores;
|
|
212
|
+
const clusters = new Map();
|
|
213
|
+
for (const item of answerScores) {
|
|
214
|
+
const evidence = (item.evidence ?? []).find((entry) => NOISY_SHARED_EVIDENCE_KINDS.has(entry.kind) && (entry.text?.length ?? 0) >= 60);
|
|
215
|
+
if (!evidence)
|
|
216
|
+
continue;
|
|
217
|
+
const key = `${evidence.page}:${evidence.kind}:${normalizeForSearch(evidence.text).slice(0, 180)}`;
|
|
218
|
+
const list = clusters.get(key) ?? [];
|
|
219
|
+
list.push(item);
|
|
220
|
+
clusters.set(key, list);
|
|
221
|
+
}
|
|
222
|
+
const penalties = new Map();
|
|
223
|
+
for (const cluster of clusters.values()) {
|
|
224
|
+
if (cluster.length < 3)
|
|
225
|
+
continue;
|
|
226
|
+
const sorted = [...cluster].sort((a, b) => b.raw - a.raw);
|
|
227
|
+
const clusterTop = sorted[0]?.raw ?? 0;
|
|
228
|
+
for (const item of sorted.slice(2)) {
|
|
229
|
+
const summary = summarizeEvidence(item);
|
|
230
|
+
if (summary.hasStructural)
|
|
231
|
+
continue;
|
|
232
|
+
if (item.raw < clusterTop * 0.92)
|
|
233
|
+
continue;
|
|
234
|
+
penalties.set(item.answer.id, Math.max(penalties.get(item.answer.id) ?? 0, 0.035));
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
if (!penalties.size)
|
|
238
|
+
return answerScores;
|
|
239
|
+
return answerScores.map((item) => {
|
|
240
|
+
const penalty = penalties.get(item.answer.id) ?? 0;
|
|
241
|
+
return penalty ? { ...item, raw: item.raw * (1 - penalty) } : item;
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
function multiPruningAllowed(question) {
|
|
245
|
+
const normalized = normalizeForSearch(question);
|
|
246
|
+
const recommendationLike = normalized.includes(normalizeForSearch("\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434")) ||
|
|
247
|
+
normalized.includes(normalizeForSearch("\u043d\u0430\u0437\u043d\u0430\u0447")) ||
|
|
248
|
+
normalized.includes(normalizeForSearch("\u043f\u043e\u043a\u0430\u0437\u0430\u043d"));
|
|
249
|
+
if (!recommendationLike)
|
|
250
|
+
return false;
|
|
251
|
+
const broadListLike = [
|
|
252
|
+
"\u0432\u0441\u0435\u043c \u043f\u0430\u0446\u0438\u0435\u043d\u0442",
|
|
253
|
+
"\u0432\u0441\u0435\u0445 \u043f\u0430\u0446\u0438\u0435\u043d\u0442",
|
|
254
|
+
"\u0434\u0435\u043b\u044f\u0442\u0441\u044f",
|
|
255
|
+
"\u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a",
|
|
256
|
+
"\u0440\u0435\u0436\u0438\u043c \u0434\u043e\u0437\u0438\u0440\u043e\u0432\u0430\u043d",
|
|
257
|
+
"\u0434\u0438\u0444\u0444\u0435\u0440\u0435\u043d\u0446\u0438\u0430\u043b\u044c\u043d\u043e\u0439 \u0434\u0438\u0430\u0433\u043d\u043e\u0441\u0442",
|
|
258
|
+
"\u0438\u0441\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u043d\u0438\u0435 \u0443\u0440\u043e\u0432\u043d\u044f",
|
|
259
|
+
].map((item) => normalizeForSearch(item));
|
|
260
|
+
return !broadListLike.some((cue) => normalized.includes(cue));
|
|
261
|
+
}
|
|
262
|
+
function applyMultiCardinalityModel(sorted, selectedIds, scored) {
|
|
263
|
+
let selected = [...selectedIds];
|
|
264
|
+
const selectedSet = new Set(selected);
|
|
265
|
+
if (selected.length >= 3) {
|
|
266
|
+
const selectedSorted = sorted.filter((item) => selectedSet.has(item.answer.id));
|
|
267
|
+
const weakest = selectedSorted[selectedSorted.length - 1];
|
|
268
|
+
const previous = selectedSorted[selectedSorted.length - 2];
|
|
269
|
+
if (weakest && previous) {
|
|
270
|
+
const weakestSummary = summarizeEvidence(weakest);
|
|
271
|
+
const topRaw = selectedSorted[0]?.raw ?? 0;
|
|
272
|
+
const weakGap = previous.raw - weakest.raw;
|
|
273
|
+
if (!weakestSummary.hasStructural && weakest.raw < topRaw * 0.74 && weakGap > 0.32) {
|
|
274
|
+
selected = selected.filter((id) => id !== weakest.answer.id);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
if (selected.length === 2 && sorted.length >= 3) {
|
|
279
|
+
const third = sorted[2];
|
|
280
|
+
const thirdSummary = summarizeEvidence(third);
|
|
281
|
+
const topRaw = sorted[0]?.raw ?? 0;
|
|
282
|
+
if (thirdSummary.bestStructuralScore >= 11 && third.raw >= topRaw * 0.46 && sorted[1].raw - third.raw <= 1.2) {
|
|
283
|
+
selected = sorted.slice(0, 3).map((item) => item.answer.id);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
return selected.sort((a, b) => scored.findIndex((item) => item.answer.id === a) - scored.findIndex((item) => item.answer.id === b));
|
|
287
|
+
}
|
|
288
|
+
const STRUCTURAL_GROUP_COMPLETION_KINDS = new Set(["coordinate_table_multicell_row", "coordinate_table_membership"]);
|
|
289
|
+
function structuralGroupEvidenceKey(evidence) {
|
|
290
|
+
if (!STRUCTURAL_GROUP_COMPLETION_KINDS.has(evidence.kind))
|
|
291
|
+
return "";
|
|
292
|
+
const minScore = evidence.kind === "coordinate_table_membership" ? 18 : 24;
|
|
293
|
+
if ((evidence.score ?? 0) < minScore || (evidence.text?.length ?? 0) < 80)
|
|
294
|
+
return "";
|
|
295
|
+
return `${evidence.kind}:${evidence.page}:${normalizeForSearch(evidence.text).slice(0, 520)}`;
|
|
296
|
+
}
|
|
297
|
+
function applyStructuralEvidenceGroupCompletion(sorted, selectedIds, scored) {
|
|
298
|
+
if (selectedIds.length < 2 || sorted.length < 3)
|
|
299
|
+
return selectedIds;
|
|
300
|
+
const selected = new Set(selectedIds);
|
|
301
|
+
const selectedGroupCounts = new Map();
|
|
302
|
+
for (const item of sorted) {
|
|
303
|
+
if (!selected.has(item.answer.id))
|
|
304
|
+
continue;
|
|
305
|
+
const keys = new Set((item.evidence ?? []).map(structuralGroupEvidenceKey).filter(Boolean));
|
|
306
|
+
for (const key of keys)
|
|
307
|
+
selectedGroupCounts.set(key, (selectedGroupCounts.get(key) ?? 0) + 1);
|
|
308
|
+
}
|
|
309
|
+
const strongGroups = new Set([...selectedGroupCounts.entries()].filter(([, count]) => count >= 2).map(([key]) => key));
|
|
310
|
+
if (!strongGroups.size)
|
|
311
|
+
return selectedIds;
|
|
312
|
+
const topRaw = sorted[0]?.raw ?? 0;
|
|
313
|
+
const additions = [];
|
|
314
|
+
for (const item of sorted) {
|
|
315
|
+
if (selected.has(item.answer.id))
|
|
316
|
+
continue;
|
|
317
|
+
const evidence = (item.evidence ?? []).find((entry) => strongGroups.has(structuralGroupEvidenceKey(entry)));
|
|
318
|
+
if (!evidence)
|
|
319
|
+
continue;
|
|
320
|
+
const minRawRatio = evidence.kind === "coordinate_table_membership" ? 0.35 : 0.42;
|
|
321
|
+
if (item.raw < Math.max(12, topRaw * minRawRatio))
|
|
322
|
+
continue;
|
|
323
|
+
additions.push(item.answer.id);
|
|
324
|
+
if (additions.length >= 2)
|
|
325
|
+
break;
|
|
326
|
+
}
|
|
327
|
+
if (!additions.length)
|
|
328
|
+
return selectedIds;
|
|
329
|
+
return [...selectedIds, ...additions].sort((a, b) => scored.findIndex((item) => item.answer.id === a) - scored.findIndex((item) => item.answer.id === b));
|
|
330
|
+
}
|
|
331
|
+
function applyMultiAllOptionsGuard(sorted, selectedIds, scored) {
|
|
332
|
+
if (selectedIds.length !== scored.length)
|
|
333
|
+
return selectedIds;
|
|
334
|
+
if (scored.length < 3 || scored.length > 4)
|
|
335
|
+
return selectedIds;
|
|
336
|
+
return sorted.slice(0, 2).map((item) => item.answer.id);
|
|
337
|
+
}
|
|
338
|
+
function applyMultiCrowdedTailGuard(sorted, selectedIds) {
|
|
339
|
+
if (sorted.length !== 4 || selectedIds.length !== 3)
|
|
340
|
+
return selectedIds;
|
|
341
|
+
const topGap = sorted[0].raw - sorted[1].raw;
|
|
342
|
+
const tailGap = sorted[2].raw - sorted[3].raw;
|
|
343
|
+
if (topGap <= 0 || tailGap >= 0.3)
|
|
344
|
+
return selectedIds;
|
|
345
|
+
return sorted.slice(0, 2).map((item) => item.answer.id);
|
|
346
|
+
}
|
|
347
|
+
function dedupeSelectedByAnswerText(selectedIds, sorted) {
|
|
348
|
+
if (selectedIds.length < 2)
|
|
349
|
+
return selectedIds;
|
|
350
|
+
const selected = new Set(selectedIds);
|
|
351
|
+
const seenText = new Set();
|
|
352
|
+
const kept = [];
|
|
353
|
+
for (const item of sorted) {
|
|
354
|
+
if (!selected.has(item.answer.id))
|
|
355
|
+
continue;
|
|
356
|
+
const key = normalizeForSearch(item.answer.text);
|
|
357
|
+
if (key && seenText.has(key))
|
|
358
|
+
continue;
|
|
359
|
+
if (key)
|
|
360
|
+
seenText.add(key);
|
|
361
|
+
kept.push(item.answer.id);
|
|
362
|
+
}
|
|
363
|
+
return kept.length ? kept : selectedIds;
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Округляет числовой score до четырех знаков после запятой.
|
|
367
|
+
*/
|
|
368
|
+
export function round4(value) {
|
|
369
|
+
return Math.round((Number.isFinite(value) ? value : 0) * 10000) / 10000;
|
|
370
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Возвращает имя ближайшей к центру локального окна группы cue.
|
|
3
|
+
*
|
|
4
|
+
* Общий хелпер: используется temporal/condition scorer'ами, чтобы выбрать,
|
|
5
|
+
* какой из взаимоисключающих наборов подсказок (например, день/ночь или
|
|
6
|
+
* статусы) ближе к фокусу.
|
|
7
|
+
*/
|
|
8
|
+
export declare function nearestCueName(local: any, entries: any): any;
|
|
9
|
+
export declare function rawTokens(text: any): [] | RegExpMatchArray;
|
|
10
|
+
/**
|
|
11
|
+
* Проверяет вхождение токена с границами по пробелам/краям строки.
|
|
12
|
+
*
|
|
13
|
+
* Общий хелпер: не дает короткому токену совпасть как подстроке внутри
|
|
14
|
+
* другого слова (например, `i` внутри `ii`).
|
|
15
|
+
*/
|
|
16
|
+
export declare function tokenBoundaryIncludes(normalizedText: any, normalizedToken: any): boolean;
|
|
17
|
+
export declare function findPhraseOccurrences(text: any, phrase: any, { textIsNormalized }?: {
|
|
18
|
+
textIsNormalized?: boolean;
|
|
19
|
+
}): any[];
|
|
20
|
+
export declare function hasSearchBoundaries(text: any, index: any, length: any): boolean;
|
|
21
|
+
export declare function answerSearchPhrases(answerText: any): any[];
|
|
22
|
+
export declare function focusedAnswerSearchPhrases(answerText: any): unknown[];
|
|
23
|
+
export declare function containsPhrase(haystack: any, needle: any): boolean;
|
|
24
|
+
export declare function containsNormalizedPhrase(normalizedHaystack: any, needle: any): boolean;
|
|
25
|
+
export declare function tokenizeNormalized(text: any): any[];
|
|
26
|
+
export declare function tokenSequenceIncludes(haystackTokens: any, needleTokens: any): boolean;
|
|
27
|
+
export declare function rawSoftCoverage(queryTokens: any, documentTokens: any): number;
|
|
28
|
+
export declare function escapeRegExp(value: any): string;
|
|
29
|
+
export declare function softCoverage(queryTokens: string[], documentTokens: string[]): number;
|
|
30
|
+
export declare function strictSoftCoverage(queryTokens: string[], documentTokens: string[]): number;
|
|
31
|
+
export declare function tokenHitCount(queryTokens: any, documentTokens: any): number;
|
|
32
|
+
export declare function evidenceFromChunk(answerIdValue: any, chunk: any, score: any, kind: any): {
|
|
33
|
+
answerId: any;
|
|
34
|
+
page: any;
|
|
35
|
+
text: any;
|
|
36
|
+
score: any;
|
|
37
|
+
kind: any;
|
|
38
|
+
};
|
|
39
|
+
export declare function betterEvidence(left: any, right: any): any;
|
|
40
|
+
export declare function evidenceSnippet(pageText: any, ...needles: any[]): string;
|
|
41
|
+
export declare function numberCoverage(answer: any, text: any): number;
|
|
42
|
+
export declare function expandNumberToken(token: any): any[];
|
|
43
|
+
export declare function tokenProximity(questionTokens: any, answerTokens: any, documentTokens: any): number;
|
|
44
|
+
export declare function cachedPageTokens(page: any): any;
|
|
45
|
+
export declare function lineWindowSegments(page: any, radius?: number): any[];
|
|
46
|
+
export declare function cachedLineWindowSegments(page: any): any;
|
|
47
|
+
export declare function pageWindow(page: any, center: any, radius?: number): any;
|
|
48
|
+
export declare function proximityBonus(distance: any, radius: any): number;
|
|
49
|
+
export declare function answerHasQuestionNumbers(answer: any, question: any): boolean;
|