med-pdf-nmo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +298 -0
  3. package/README.ru.md +298 -0
  4. package/dist/bm25.d.ts +47 -0
  5. package/dist/bm25.js +86 -0
  6. package/dist/browser-shims/buffer.d.ts +30 -0
  7. package/dist/browser-shims/buffer.js +31 -0
  8. package/dist/browser-shims/crypto.d.ts +33 -0
  9. package/dist/browser-shims/crypto.js +45 -0
  10. package/dist/browser-shims/fs-promises.d.ts +13 -0
  11. package/dist/browser-shims/fs-promises.js +25 -0
  12. package/dist/browser-shims/fs.d.ts +14 -0
  13. package/dist/browser-shims/fs.js +24 -0
  14. package/dist/browser-shims/globals.d.ts +9 -0
  15. package/dist/browser-shims/globals.js +23 -0
  16. package/dist/browser-shims/path.d.ts +57 -0
  17. package/dist/browser-shims/path.js +65 -0
  18. package/dist/browser-shims/process.d.ts +22 -0
  19. package/dist/browser-shims/process.js +27 -0
  20. package/dist/browser.d.ts +9 -0
  21. package/dist/browser.js +12 -0
  22. package/dist/chunk.d.ts +15 -0
  23. package/dist/chunk.js +76 -0
  24. package/dist/cli.d.ts +2 -0
  25. package/dist/cli.js +87 -0
  26. package/dist/index.d.ts +82 -0
  27. package/dist/index.js +51 -0
  28. package/dist/med-pdf-nmo.browser.js +40413 -0
  29. package/dist/med-pdf-nmo.browser.mjs +40395 -0
  30. package/dist/normalize.d.ts +73 -0
  31. package/dist/normalize.js +477 -0
  32. package/dist/pdf.d.ts +35 -0
  33. package/dist/pdf.js +396 -0
  34. package/dist/predictor/config.d.ts +28 -0
  35. package/dist/predictor/config.js +26 -0
  36. package/dist/predictor/constants.d.ts +3 -0
  37. package/dist/predictor/constants.js +59 -0
  38. package/dist/predictor/runtime.d.ts +15 -0
  39. package/dist/predictor/runtime.js +59 -0
  40. package/dist/predictor/scorers/biomedical-symbols.d.ts +36 -0
  41. package/dist/predictor/scorers/biomedical-symbols.js +347 -0
  42. package/dist/predictor/scorers/coordinate-table.d.ts +82 -0
  43. package/dist/predictor/scorers/coordinate-table.js +1210 -0
  44. package/dist/predictor/scorers/direction.d.ts +71 -0
  45. package/dist/predictor/scorers/direction.js +345 -0
  46. package/dist/predictor/scorers/drug-dose.d.ts +6 -0
  47. package/dist/predictor/scorers/drug-dose.js +221 -0
  48. package/dist/predictor/scorers/exact-answer.d.ts +10 -0
  49. package/dist/predictor/scorers/exact-answer.js +75 -0
  50. package/dist/predictor/scorers/fibrosis-stage.d.ts +6 -0
  51. package/dist/predictor/scorers/fibrosis-stage.js +103 -0
  52. package/dist/predictor/scorers/focused.d.ts +40 -0
  53. package/dist/predictor/scorers/focused.js +204 -0
  54. package/dist/predictor/scorers/frequency.d.ts +10 -0
  55. package/dist/predictor/scorers/frequency.js +203 -0
  56. package/dist/predictor/scorers/numeric.d.ts +77 -0
  57. package/dist/predictor/scorers/numeric.js +1161 -0
  58. package/dist/predictor/scorers/recommendation-item.d.ts +27 -0
  59. package/dist/predictor/scorers/recommendation-item.js +469 -0
  60. package/dist/predictor/scorers/search.d.ts +41 -0
  61. package/dist/predictor/scorers/search.js +515 -0
  62. package/dist/predictor/selection.d.ts +30 -0
  63. package/dist/predictor/selection.js +370 -0
  64. package/dist/predictor/text-utils.d.ts +49 -0
  65. package/dist/predictor/text-utils.js +497 -0
  66. package/dist/predictor/types.d.ts +23 -0
  67. package/dist/predictor/types.js +1 -0
  68. package/dist/predictor.d.ts +52 -0
  69. package/dist/predictor.js +3834 -0
  70. package/package.json +82 -0
@@ -0,0 +1,1210 @@
1
+ import { coverage, extractNumbers, normalizeForSearch, tokenize, uniqueTokens } from "../../normalize.js";
2
+ import { FOCUS_STOPWORDS } from "../constants.js";
3
+ import { answerSearchPhrases, betterEvidence, containsNormalizedPhrase, expandNumberToken, numberCoverage, strictSoftCoverage, tokenizeNormalized, tokenHitCount, } from "../text-utils.js";
4
+ const COORDINATE_TABLE_GENERIC_TOKENS = new Set([
5
+ "\u0442\u0430\u0431\u043b\u0438\u0446\u0430 \u0442\u0430\u0431\u043b\u0438\u0446\u0435 \u0442\u0430\u0431\u043b\u0438\u0447\u043d\u044b\u0439 \u0441\u043e\u0433\u043b\u0430\u0441\u043d\u043e",
6
+ "\u043f\u043e\u043a\u0430\u0437\u0430\u0442\u0435\u043b\u044c \u043f\u043e\u043a\u0430\u0437\u0430\u0442\u0435\u043b\u0438 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044f",
7
+ "\u043a\u0440\u0438\u0442\u0435\u0440\u0438\u0439 \u043a\u0440\u0438\u0442\u0435\u0440\u0438\u0438 \u043f\u0440\u0438\u0437\u043d\u0430\u043a \u043f\u0440\u0438\u0437\u043d\u0430\u043a\u0438",
8
+ "\u043a\u043b\u0430\u0441\u0441\u0438\u0444\u0438\u043a\u0430\u0446\u0438\u044f \u043a\u043b\u0430\u0441\u0441\u0438\u0444\u0438\u043a\u0430\u0446\u0438\u0438 \u0433\u0440\u0430\u0434\u0430\u0446\u0438\u044f",
9
+ "\u043f\u043e\u043a\u0430\u0437\u0430\u043d\u0438\u044f \u043f\u043e\u043a\u0430\u0437\u0430\u043d\u0438\u0435 \u0441\u043e\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u0442 \u0441\u043e\u0441\u0442\u0430\u0432\u043b\u044f\u044e\u0442",
10
+ ].flatMap((item) => uniqueTokens(item)));
11
+ const COORDINATE_TABLE_CUE_TOKENS = new Set([
12
+ "\u0442\u0430\u0431\u043b\u0438\u0446\u0430 \u0442\u0430\u0431\u043b\u0438\u0446\u0435 \u0448\u043a\u0430\u043b\u0430 \u0448\u043a\u0430\u043b\u0435 \u043a\u043b\u0430\u0441\u0441\u0438\u0444\u0438\u043a\u0430\u0446\u0438\u044f \u043a\u043b\u0430\u0441\u0441\u0438\u0444\u0438\u043a\u0430\u0446\u0438\u0438",
13
+ "\u0441\u0442\u0435\u043f\u0435\u043d\u044c \u0441\u0442\u0435\u043f\u0435\u043d\u0438 \u0441\u0442\u0430\u0434\u0438\u044f \u0441\u0442\u0430\u0434\u0438\u0438 \u043a\u043b\u0430\u0441\u0441 \u043a\u043b\u0430\u0441\u0441\u0430 \u043a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u044f \u043a\u0430\u0442\u0435\u0433\u043e\u0440\u0438\u0438",
14
+ "\u043f\u043e\u043a\u0430\u0437\u0430\u0442\u0435\u043b\u044c \u043f\u043e\u043a\u0430\u0437\u0430\u0442\u0435\u043b\u0438 \u043f\u043e\u043a\u0430\u0437\u0430\u043d\u0438\u044f \u043f\u043e\u043a\u0430\u0437\u0430\u043d\u0438\u0435",
15
+ "\u043a\u0440\u0438\u0442\u0435\u0440\u0438\u0439 \u043a\u0440\u0438\u0442\u0435\u0440\u0438\u0438 \u0433\u0440\u0430\u0434\u0430\u0446\u0438\u044f \u0430\u0431\u0441\u043e\u043b\u044e\u0442\u043d\u044b\u0435 \u043e\u0442\u043d\u043e\u0441\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u0435",
16
+ ].flatMap((item) => uniqueTokens(item)));
17
+ const COORDINATE_TABLE_MEMBERSHIP_GENERIC_TOKENS = new Set([
18
+ "препарат препараты лекарственный лекарственные лекарственных",
19
+ "группа группы групп средство средства терапия терапии лечение лечения",
20
+ "применяют применяется применяются используются назначают назначение",
21
+ "местно виде мазь мазей суппозиторий суппозиториев",
22
+ "цель цели достижение достижения риск риска снижение снижения",
23
+ ].flatMap((item) => uniqueTokens(item)));
24
+ function numericSearchBoundary(normalizedText, index, length) {
25
+ const before = index > 0 ? normalizedText[index - 1] : "";
26
+ const after = index + length < normalizedText.length ? normalizedText[index + length] : "";
27
+ return !/[a-zа-я0-9]/iu.test(before) && !/[a-zа-я0-9]/iu.test(after);
28
+ }
29
+ /**
30
+ * Быстрый gate для coordinate-table scorer'ов: включает их только когда вопрос
31
+ * похож на таблицу, шкалу, классификацию, степень, стадию или числовой критерий.
32
+ */
33
+ export function hasCoordinateTableCue(question, focusTokens) {
34
+ const raw = String(question ?? "").toLowerCase();
35
+ const rawCue = [
36
+ "\u0442\u0430\u0431\u043b\u0438\u0446",
37
+ "\u0448\u043a\u0430\u043b",
38
+ "\u043a\u043b\u0430\u0441\u0441\u0438\u0444",
39
+ "\u0441\u0442\u0435\u043f\u0435\u043d",
40
+ "\u0441\u0442\u0430\u0434",
41
+ "\u043a\u043b\u0430\u0441\u0441",
42
+ "\u043a\u0430\u0442\u0435\u0433\u043e\u0440",
43
+ "\u043f\u043e\u043a\u0430\u0437\u0430\u0442\u0435\u043b",
44
+ "\u043f\u043e\u043a\u0430\u0437\u0430\u043d",
45
+ "\u043a\u0440\u0438\u0442\u0435\u0440",
46
+ "\u0433\u0440\u0430\u0434\u0430\u0446",
47
+ "\u0430\u0431\u0441\u043e\u043b\u044e\u0442",
48
+ "\u043e\u0442\u043d\u043e\u0441\u0438\u0442\u0435\u043b",
49
+ ].some((cue) => raw.includes(cue));
50
+ if (rawCue)
51
+ return true;
52
+ const tokens = [...new Set([...(focusTokens ?? []), ...uniqueTokens(question)])];
53
+ return tokens.some((token) => COORDINATE_TABLE_CUE_TOKENS.has(token));
54
+ }
55
+ /**
56
+ * Более широкий gate для multi-групп: кроме явных таблиц допускает list-like
57
+ * вопросы и формулировки про группы/состав/комбинации, где ответы часто живут
58
+ * в одной табличной строке.
59
+ */
60
+ export function hasCoordinateTableGroupCue(question, focusTokens, intent) {
61
+ if (hasCoordinateTableCue(question, focusTokens))
62
+ return true;
63
+ if (intent?.listLike)
64
+ return true;
65
+ const normalized = normalizeForSearch(question);
66
+ const cuePhrases = [
67
+ "\u0433\u0440\u0443\u043f\u043f",
68
+ "\u043e\u0442\u043d\u043e\u0441",
69
+ "\u0432\u043a\u043b\u044e\u0447",
70
+ "\u0441\u043e\u0441\u0442\u0430\u0432",
71
+ "\u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432",
72
+ "\u043a\u043e\u043c\u0431\u0438\u043d\u0430\u0446",
73
+ ].map((item) => normalizeForSearch(item));
74
+ if (cuePhrases.some((cue) => containsNormalizedPhrase(normalized, cue)))
75
+ return true;
76
+ const tokens = [...new Set([...(focusTokens ?? []), ...uniqueTokens(question)])];
77
+ return tokenHitCount([...COORDINATE_TABLE_CUE_TOKENS], tokens) > 0;
78
+ }
79
+ function coordinateCellText(cell) {
80
+ return String(cell?.text ?? "").replace(/\s+/g, " ").trim();
81
+ }
82
+ function coordinateLineCells(line) {
83
+ const items = [...(line?.items ?? [])]
84
+ .filter((item) => String(item?.text ?? "").trim())
85
+ .sort((a, b) => (a.x ?? 0) - (b.x ?? 0));
86
+ const cells = [];
87
+ for (const item of items) {
88
+ const text = String(item.text ?? "").replace(/\s+/g, " ").trim();
89
+ const x = item.x ?? 0;
90
+ const width = item.width ?? Math.max(8, text.length * 4.2);
91
+ const endX = x + Math.max(width, 4);
92
+ const previous = cells[cells.length - 1];
93
+ if (!previous) {
94
+ cells.push({ text, x, endX, y: item.y ?? line?.y ?? 0, itemCount: 1 });
95
+ continue;
96
+ }
97
+ const visualGap = x - previous.endX;
98
+ const originGap = x - previous.x;
99
+ if (visualGap > 18 && originGap > 34) {
100
+ cells.push({ text, x, endX, y: item.y ?? line?.y ?? 0, itemCount: 1 });
101
+ }
102
+ else {
103
+ previous.text = `${previous.text} ${text}`.replace(/\s+/g, " ").trim();
104
+ previous.endX = Math.max(previous.endX, endX);
105
+ previous.itemCount += 1;
106
+ }
107
+ }
108
+ return cells.filter((cell) => coordinateCellText(cell));
109
+ }
110
+ function coordinateGroupLineCells(line) {
111
+ const items = [...(line?.items ?? [])]
112
+ .filter((item) => String(item?.text ?? "").trim())
113
+ .sort((a, b) => (a.x ?? 0) - (b.x ?? 0));
114
+ const cells = [];
115
+ for (const item of items) {
116
+ const text = String(item.text ?? "").replace(/\s+/g, " ").trim();
117
+ const x = item.x ?? 0;
118
+ const width = item.width ?? Math.max(8, text.length * 4.2);
119
+ const endX = x + Math.max(width, 4);
120
+ const previous = cells[cells.length - 1];
121
+ if (!previous) {
122
+ cells.push({ text, x, endX, y: item.y ?? line?.y ?? 0, itemCount: 1 });
123
+ continue;
124
+ }
125
+ const visualGap = x - previous.endX;
126
+ const originGap = x - previous.x;
127
+ if (visualGap > 18 || originGap > 64) {
128
+ cells.push({ text, x, endX, y: item.y ?? line?.y ?? 0, itemCount: 1 });
129
+ }
130
+ else {
131
+ previous.text = `${previous.text} ${text}`.replace(/\s+/g, " ").trim();
132
+ previous.endX = Math.max(previous.endX, endX);
133
+ previous.itemCount += 1;
134
+ }
135
+ }
136
+ return cells.filter((cell) => coordinateCellText(cell));
137
+ }
138
+ function coordinateCellsSpread(cells) {
139
+ if (cells.length < 2)
140
+ return 0;
141
+ return Math.max(...cells.map((cell) => cell.endX)) - Math.min(...cells.map((cell) => cell.x));
142
+ }
143
+ function coordinateCellsHaveNumericValue(cells) {
144
+ return cells.some((cell) => extractNumbers(cell.text).length > 0 || /[<>≤≥=]/u.test(String(cell.text ?? "")));
145
+ }
146
+ function isCoordinateTableLine(line, cells = coordinateLineCells(line)) {
147
+ if (!cells.length)
148
+ return false;
149
+ const text = String(line?.text ?? "").replace(/\s+/g, " ").trim();
150
+ const spread = coordinateCellsSpread(cells);
151
+ if (text.length > 340)
152
+ return false;
153
+ if (cells.length >= 3 && spread >= 135)
154
+ return true;
155
+ if (cells.length >= 2 && spread >= 190 && coordinateCellsHaveNumericValue(cells))
156
+ return true;
157
+ return false;
158
+ }
159
+ function coordinateLineHasHeaderCue(line) {
160
+ const tokens = tokenize(line?.text ?? "");
161
+ return tokenHitCount([...COORDINATE_TABLE_CUE_TOKENS], tokens) > 0;
162
+ }
163
+ function coordinateTextHasTableCaption(text) {
164
+ const normalized = normalizeForSearch(text);
165
+ if (containsNormalizedPhrase(normalized, "\u0441\u043e\u0433\u043b\u0430\u0441\u043d\u043e \u0442\u0430\u0431\u043b\u0438\u0446"))
166
+ return false;
167
+ return (containsNormalizedPhrase(normalized, "\u0442\u0430\u0431\u043b\u0438\u0446") ||
168
+ containsNormalizedPhrase(normalized, "\u0448\u043a\u0430\u043b") ||
169
+ containsNormalizedPhrase(normalized, "\u0433\u0440\u0430\u0434\u0430\u0446") ||
170
+ containsNormalizedPhrase(normalized, "\u043a\u043b\u0430\u0441\u0441\u0438\u0444"));
171
+ }
172
+ function coordinateTextHasExplicitTableCaption(text) {
173
+ const normalized = normalizeForSearch(text);
174
+ if (containsNormalizedPhrase(normalized, "\u0441\u043e\u0433\u043b\u0430\u0441\u043d\u043e \u0442\u0430\u0431\u043b\u0438\u0446"))
175
+ return false;
176
+ return containsNormalizedPhrase(normalized, "\u0442\u0430\u0431\u043b\u0438\u0446");
177
+ }
178
+ function coordinateTextIsRecommendationMeta(text) {
179
+ const raw = String(text ?? "").toLowerCase();
180
+ if (raw.includes("\u0443\u0440\u043e\u0432\u0435\u043d\u044c \u0443\u0431\u0435\u0434\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442") ||
181
+ raw.includes("\u0434\u043e\u0441\u0442\u043e\u0432\u0435\u0440\u043d\u043e\u0441\u0442") ||
182
+ raw.includes("\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434") ||
183
+ raw.includes("\u043a\u043e\u043c\u043c\u0435\u043d\u0442")) {
184
+ return true;
185
+ }
186
+ const normalized = normalizeForSearch(text);
187
+ return (containsNormalizedPhrase(normalized, "\u0443\u0440\u043e\u0432\u0435\u043d\u044c \u0443\u0431\u0435\u0434\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442") ||
188
+ containsNormalizedPhrase(normalized, "\u0434\u043e\u0441\u0442\u043e\u0432\u0435\u0440\u043d\u043e\u0441\u0442\u0438 \u0434\u043e\u043a\u0430\u0437") ||
189
+ containsNormalizedPhrase(normalized, "\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434") ||
190
+ containsNormalizedPhrase(normalized, "\u043a\u043e\u043c\u043c\u0435\u043d\u0442\u0430\u0440"));
191
+ }
192
+ function coordinateLineLooksLikeDataRow(line, cells = coordinateLineCells(line)) {
193
+ if (!cells.length)
194
+ return false;
195
+ if (coordinateLineHasHeaderCue(line))
196
+ return false;
197
+ const firstCell = normalizeForSearch(cells[0]?.text ?? "");
198
+ const firstTwoText = cells
199
+ .slice(0, 2)
200
+ .map((cell) => cell.text)
201
+ .join(" ");
202
+ if (/^(?:[ivxlcdm]+|\d+(?:[.)])?)$/iu.test(firstCell))
203
+ return true;
204
+ if (severityCue(firstTwoText))
205
+ return true;
206
+ if (cells.length >= 3 && coordinateCellsHaveNumericValue(cells) && !containsNormalizedPhrase(normalizeForSearch(line?.text ?? ""), "\u0442\u0430\u0431\u043b\u0438\u0446"))
207
+ return true;
208
+ return false;
209
+ }
210
+ function coordinateSeverityCueCount(text) {
211
+ const normalized = normalizeForSearch(text);
212
+ const cues = [
213
+ "\u043a\u0440\u0430\u0439\u043d",
214
+ "\u0441\u0440\u0435\u0434\u043d\u0435\u0442\u044f\u0436",
215
+ "\u0441\u0440\u0435\u0434\u043d",
216
+ "\u0443\u043c\u0435\u0440\u0435\u043d",
217
+ "\u0442\u044f\u0436\u0435\u043b",
218
+ "\u043b\u0435\u0433\u043a",
219
+ ];
220
+ let count = 0;
221
+ for (const cue of cues) {
222
+ if (containsNormalizedPhrase(normalized, cue))
223
+ count += 1;
224
+ }
225
+ return count;
226
+ }
227
+ function coordinateRowHasTableContext(row) {
228
+ const firstCell = normalizeForSearch(row.cells?.[0]?.text ?? "");
229
+ const firstTwoText = (row.cells ?? [])
230
+ .slice(0, 2)
231
+ .map((cell) => cell.text)
232
+ .join(" ");
233
+ const structuralFirstCell = ((row.cells?.length ?? 0) >= 3 && /^(?:[ivxlcdm]+|\d+(?:[.)])?)$/iu.test(firstCell)) ||
234
+ ((row.cells?.length ?? 0) >= 3 && severityCue(firstTwoText));
235
+ if (coordinateTextIsRecommendationMeta(row.sourceText || row.text) && !structuralFirstCell)
236
+ return false;
237
+ if (coordinateTextHasTableCaption(row.headerText))
238
+ return true;
239
+ if (structuralFirstCell)
240
+ return true;
241
+ return false;
242
+ }
243
+ function coordinateTableQuestionBlocked(question) {
244
+ const normalized = normalizeForSearch(question);
245
+ return containsNormalizedPhrase(normalized, "\u0444\u0438\u0431\u0440\u043e\u0437") || containsNormalizedPhrase(normalized, "metavir");
246
+ }
247
+ function nearestCoordinateCell(cells, x) {
248
+ let best = null;
249
+ let bestDistance = Infinity;
250
+ for (const cell of cells) {
251
+ const center = (cell.x + cell.endX) / 2;
252
+ const distance = Math.min(Math.abs(cell.x - x), Math.abs(center - x));
253
+ if (distance < bestDistance) {
254
+ best = cell;
255
+ bestDistance = distance;
256
+ }
257
+ }
258
+ return bestDistance <= 54 ? best : null;
259
+ }
260
+ function appendCoordinateContinuation(baseCells, continuationCells) {
261
+ let appended = false;
262
+ for (const cell of continuationCells) {
263
+ const target = nearestCoordinateCell(baseCells, cell.x);
264
+ if (!target)
265
+ continue;
266
+ target.text = `${target.text} ${cell.text}`.replace(/\s+/g, " ").trim();
267
+ target.endX = Math.max(target.endX, cell.endX);
268
+ target.itemCount += cell.itemCount ?? 1;
269
+ appended = true;
270
+ }
271
+ return appended;
272
+ }
273
+ function coordinateHeaderText(lines, index) {
274
+ const parts = [];
275
+ for (let current = index - 1; current >= 0 && parts.length < 5; current -= 1) {
276
+ const line = lines[current];
277
+ const text = String(line?.text ?? "").replace(/\s+/g, " ").trim();
278
+ if (!text)
279
+ continue;
280
+ if (coordinateTextIsRecommendationMeta(text))
281
+ break;
282
+ const cells = coordinateLineCells(line);
283
+ if (coordinateLineLooksLikeDataRow(line, cells))
284
+ break;
285
+ const normalized = normalizeForSearch(text);
286
+ const headerLike = isCoordinateTableLine(line, cells) ||
287
+ containsNormalizedPhrase(normalized, "\u0442\u0430\u0431\u043b\u0438\u0446") ||
288
+ (text.length <= 140 && (cells.length <= 2 || coordinateCellsSpread(cells) < 180));
289
+ if (!headerLike)
290
+ break;
291
+ parts.unshift(text);
292
+ }
293
+ return parts.join(" ").replace(/\s+/g, " ").trim();
294
+ }
295
+ function coordinateNearbyTableContext(lines, index) {
296
+ const localHeader = coordinateHeaderText(lines, index);
297
+ if (coordinateTextHasTableCaption(localHeader))
298
+ return localHeader;
299
+ const parts = [];
300
+ for (let current = index - 1; current >= 0 && current >= index - 24; current -= 1) {
301
+ const line = lines[current];
302
+ const text = String(line?.text ?? "").replace(/\s+/g, " ").trim();
303
+ if (!text)
304
+ continue;
305
+ if (coordinateTextHasTableCaption(text)) {
306
+ parts.unshift(text);
307
+ break;
308
+ }
309
+ }
310
+ return [...parts, localHeader].join(" ").replace(/\s+/g, " ").trim();
311
+ }
312
+ function coordinateTableRows(page) {
313
+ if (page.__coordinateTableRows)
314
+ return page.__coordinateTableRows;
315
+ const lines = page.lineItems ?? [];
316
+ const rows = [];
317
+ for (let index = 0; index < lines.length; index += 1) {
318
+ const line = lines[index];
319
+ const baseCells = coordinateLineCells(line).map((cell) => ({ ...cell }));
320
+ if (!isCoordinateTableLine(line, baseCells))
321
+ continue;
322
+ let previousY = line?.y ?? 0;
323
+ const rowLineTexts = [line.text];
324
+ for (let nextIndex = index + 1; nextIndex < lines.length && nextIndex <= index + 4; nextIndex += 1) {
325
+ const nextLine = lines[nextIndex];
326
+ const y = nextLine?.y ?? previousY;
327
+ if (Math.abs(y - previousY) > 25)
328
+ break;
329
+ const nextCells = coordinateLineCells(nextLine);
330
+ if (!nextCells.length)
331
+ break;
332
+ const looksLikeNewRow = isCoordinateTableLine(nextLine, nextCells) &&
333
+ nextCells.length >= Math.max(2, baseCells.length - 1) &&
334
+ Math.abs((nextCells[0]?.x ?? 0) - (baseCells[0]?.x ?? 0)) <= 32;
335
+ if (looksLikeNewRow)
336
+ break;
337
+ const appended = appendCoordinateContinuation(baseCells, nextCells);
338
+ if (!appended)
339
+ break;
340
+ rowLineTexts.push(nextLine.text);
341
+ previousY = y;
342
+ }
343
+ const text = baseCells.map((cell) => cell.text).join(" ").replace(/\s+/g, " ").trim();
344
+ if (text.length < 8)
345
+ continue;
346
+ rows.push({
347
+ page: page.page,
348
+ index,
349
+ y: line?.y ?? 0,
350
+ headerText: coordinateHeaderText(lines, index),
351
+ text,
352
+ sourceText: rowLineTexts.join(" ").replace(/\s+/g, " ").trim(),
353
+ cells: baseCells.map((cell, cellIndex) => ({
354
+ ...cell,
355
+ index: cellIndex,
356
+ normalized: normalizeForSearch(cell.text),
357
+ tokens: tokenize(cell.text),
358
+ })),
359
+ });
360
+ }
361
+ Object.defineProperty(page, "__coordinateTableRows", {
362
+ value: rows,
363
+ enumerable: false,
364
+ });
365
+ return rows;
366
+ }
367
+ /**
368
+ * Строит по страницам обычные coordinate rows: строка PDF разбивается на
369
+ * x-ячейки, рядом лежащие continuation-строки приклеиваются к базовой строке.
370
+ */
371
+ export function buildCoordinateTableRowsByPage(pages, topQuestionPages) {
372
+ const byPage = new Map();
373
+ for (const page of pages) {
374
+ const nearTopPage = !topQuestionPages?.size || topQuestionPages.has(page.page) || topQuestionPages.has(page.page - 1) || topQuestionPages.has(page.page + 1);
375
+ if (!nearTopPage)
376
+ continue;
377
+ const rows = coordinateTableRows(page);
378
+ if (rows.length)
379
+ byPage.set(page.page, rows);
380
+ }
381
+ return byPage;
382
+ }
383
+ function coordinateGroupLineLooksLikeStart(cells) {
384
+ if (cells.length < 2)
385
+ return false;
386
+ const spread = coordinateCellsSpread(cells);
387
+ if (spread < 115)
388
+ return false;
389
+ const firstX = cells[0]?.x ?? 0;
390
+ const lastX = cells[cells.length - 1]?.x ?? firstX;
391
+ return lastX - firstX >= 85;
392
+ }
393
+ function coordinateLooksLikeTableBoundary(line) {
394
+ const text = String(line?.text ?? "").replace(/\s+/g, " ").trim();
395
+ if (!text)
396
+ return true;
397
+ const normalized = normalizeForSearch(text);
398
+ if (containsNormalizedPhrase(normalized, "\u0442\u0430\u0431\u043b\u0438\u0446") && !/^\s*\u0442\u0430\u0431\u043b\u0438\u0446/u.test(text.toLowerCase()))
399
+ return false;
400
+ if (/^\s*(?:\d+\.){1,3}\s+/u.test(text))
401
+ return true;
402
+ if (text.length <= 90 && /^(?:\u0440\u0438\u0441\u0443\u043d\u043e\u043a|\u0441\u043f\u0438\u0441\u043e\u043a|\u043f\u0440\u0438\u043b\u043e\u0436\u0435\u043d\u0438\u0435)\b/iu.test(text))
403
+ return true;
404
+ return false;
405
+ }
406
+ function coordinateShortCodeLike(text) {
407
+ const value = String(text ?? "").replace(/\s+/g, " ").trim();
408
+ if (!value)
409
+ return false;
410
+ if (value.length > 44)
411
+ return false;
412
+ if (/[a-z\u0430-\u044f]{3,}/u.test(value))
413
+ return false;
414
+ if (/[()]/u.test(value) && /[A-Z\u0410-\u042f0-9]{2,}/u.test(value))
415
+ return true;
416
+ if (/\*\*/u.test(value))
417
+ return true;
418
+ return /^[A-Z\u0410-\u042f0-9./+-]{2,}(?:\s+[A-Z\u0410-\u042f0-9./+-]{2,}){0,2}$/u.test(value);
419
+ }
420
+ function coordinateLabelContinuationLikely(labelText, nextLabelText, nextValueText) {
421
+ const labelTokens = uniqueTokens(labelText);
422
+ const nextTokens = uniqueTokens(nextLabelText);
423
+ if (!labelTokens.length || !nextTokens.length)
424
+ return false;
425
+ if (coordinateShortCodeLike(nextValueText))
426
+ return true;
427
+ if (String(labelText ?? "").length <= 48 && /[()/]/u.test(String(nextLabelText ?? "")))
428
+ return true;
429
+ return false;
430
+ }
431
+ function coordinateGroupHeaderCells(cells) {
432
+ const text = cells
433
+ .map((cell) => cell.text)
434
+ .join(" ")
435
+ .replace(/\s+/g, " ")
436
+ .trim();
437
+ const normalized = normalizeForSearch(text);
438
+ const columnCueCount = [
439
+ "\u043a\u043b\u0430\u0441\u0441",
440
+ "\u0433\u0440\u0443\u043f\u043f",
441
+ "\u043f\u0440\u0435\u043f\u0430\u0440\u0430\u0442",
442
+ "\u043f\u043e\u043a\u0430\u0437\u0430\u0442",
443
+ "\u0437\u043d\u0430\u0447\u0435\u043d",
444
+ "\u043a\u0440\u0438\u0442\u0435\u0440",
445
+ "\u043f\u0440\u0438\u0437\u043d\u0430\u043a",
446
+ "\u043a\u0430\u0442\u0435\u0433\u043e\u0440",
447
+ "\u044d\u0444\u0444\u0435\u043a\u0442",
448
+ ]
449
+ .map((item) => normalizeForSearch(item))
450
+ .filter((cue) => containsNormalizedPhrase(normalized, cue)).length;
451
+ return columnCueCount >= 2 && cells.every((cell) => coordinateCellText(cell).length <= 70);
452
+ }
453
+ function coordinateSplitGroupCells(cells, valueX) {
454
+ const labelCells = [];
455
+ const valueCells = [];
456
+ for (const cell of cells) {
457
+ const x = cell.x ?? 0;
458
+ const center = (x + (cell.endX ?? x)) / 2;
459
+ if (center < valueX - 28)
460
+ labelCells.push(cell);
461
+ else
462
+ valueCells.push(cell);
463
+ }
464
+ return { labelCells, valueCells };
465
+ }
466
+ function coordinateAppendGroupText(parts, cells) {
467
+ for (const cell of cells) {
468
+ const text = coordinateCellText(cell);
469
+ if (text)
470
+ parts.push(text);
471
+ }
472
+ }
473
+ function coordinateTableGroups(page) {
474
+ if (page.__coordinateTableGroups)
475
+ return page.__coordinateTableGroups;
476
+ const lines = page.lineItems ?? [];
477
+ const groups = [];
478
+ for (let index = 0; index < lines.length; index += 1) {
479
+ const line = lines[index];
480
+ const cells = coordinateGroupLineCells(line).map((cell) => ({ ...cell }));
481
+ if (!coordinateGroupLineLooksLikeStart(cells))
482
+ continue;
483
+ if (coordinateGroupHeaderCells(cells))
484
+ continue;
485
+ const valueX = cells[cells.length - 1]?.x ?? 0;
486
+ const labelX = cells[0]?.x ?? 0;
487
+ const baseSplit = coordinateSplitGroupCells(cells, valueX);
488
+ if (!baseSplit.labelCells.length || !baseSplit.valueCells.length)
489
+ continue;
490
+ const labelParts = [];
491
+ const valueParts = [];
492
+ const rowLineTexts = [line.text];
493
+ coordinateAppendGroupText(labelParts, baseSplit.labelCells);
494
+ coordinateAppendGroupText(valueParts, baseSplit.valueCells);
495
+ let previousY = line?.y ?? 0;
496
+ for (let nextIndex = index + 1; nextIndex < lines.length && nextIndex <= index + 9; nextIndex += 1) {
497
+ const nextLine = lines[nextIndex];
498
+ const y = nextLine?.y ?? previousY;
499
+ if (Math.abs(y - previousY) > 28)
500
+ break;
501
+ if (coordinateLooksLikeTableBoundary(nextLine))
502
+ break;
503
+ const nextCells = coordinateGroupLineCells(nextLine).map((cell) => ({ ...cell }));
504
+ if (!nextCells.length)
505
+ break;
506
+ const split = coordinateSplitGroupCells(nextCells, valueX);
507
+ const nextLabelText = split.labelCells.map((cell) => cell.text).join(" ").replace(/\s+/g, " ").trim();
508
+ const nextValueText = split.valueCells.map((cell) => cell.text).join(" ").replace(/\s+/g, " ").trim();
509
+ const hasAlignedLabel = split.labelCells.some((cell) => Math.abs((cell.x ?? 0) - labelX) <= 34);
510
+ const hasAlignedValue = split.valueCells.some((cell) => Math.abs((cell.x ?? 0) - valueX) <= 58);
511
+ const looksLikeNewStart = coordinateGroupLineLooksLikeStart(nextCells) && hasAlignedLabel && hasAlignedValue;
512
+ const shouldMergeStart = looksLikeNewStart &&
513
+ coordinateLabelContinuationLikely(labelParts.join(" "), nextLabelText, nextValueText);
514
+ if (looksLikeNewStart && !shouldMergeStart)
515
+ break;
516
+ if (!hasAlignedValue && !hasAlignedLabel)
517
+ break;
518
+ coordinateAppendGroupText(labelParts, split.labelCells);
519
+ coordinateAppendGroupText(valueParts, split.valueCells);
520
+ rowLineTexts.push(nextLine.text);
521
+ previousY = y;
522
+ }
523
+ const labelText = labelParts.join(" ").replace(/\s+/g, " ").trim();
524
+ const valueText = valueParts.join(" ").replace(/\s+/g, " ").trim();
525
+ const text = `${labelText} ${valueText}`.replace(/\s+/g, " ").trim();
526
+ if (labelText.length < 3 || valueText.length < 3 || text.length < 12)
527
+ continue;
528
+ groups.push({
529
+ page: page.page,
530
+ index,
531
+ y: line?.y ?? 0,
532
+ headerText: coordinateNearbyTableContext(lines, index),
533
+ labelText,
534
+ valueText,
535
+ text,
536
+ sourceText: rowLineTexts.join(" ").replace(/\s+/g, " ").trim(),
537
+ valueX,
538
+ labelX,
539
+ labelTokens: uniqueTokens(labelText),
540
+ valueTokens: uniqueTokens(valueText),
541
+ });
542
+ }
543
+ Object.defineProperty(page, "__coordinateTableGroups", {
544
+ value: groups,
545
+ enumerable: false,
546
+ });
547
+ return groups;
548
+ }
549
+ /**
550
+ * Строит группы вида `левая метка -> правые значения` для multi-вопросов, где
551
+ * несколько правильных вариантов перечислены в одной строке или ее продолжениях.
552
+ */
553
+ export function buildCoordinateTableGroupsByPage(pages, topQuestionPages) {
554
+ const byPage = new Map();
555
+ for (const page of pages) {
556
+ const nearTopPage = !topQuestionPages?.size || topQuestionPages.has(page.page) || topQuestionPages.has(page.page - 1) || topQuestionPages.has(page.page + 1);
557
+ if (!nearTopPage)
558
+ continue;
559
+ const groups = coordinateTableGroups(page).filter((group) => coordinateTextHasExplicitTableCaption(group.headerText));
560
+ if (groups.length)
561
+ byPage.set(page.page, groups);
562
+ }
563
+ return byPage;
564
+ }
565
+ function coordinateMultiCellHeaderRow(cells) {
566
+ const first = normalizeForSearch(cells[0]?.text ?? "");
567
+ const rest = normalizeForSearch(cells
568
+ .slice(1)
569
+ .map((cell) => cell.text)
570
+ .join(" "));
571
+ const firstHeader = containsNormalizedPhrase(first, "\u0441\u0442\u0435\u043f\u0435\u043d") ||
572
+ containsNormalizedPhrase(first, "\u0441\u0442\u0430\u0434") ||
573
+ containsNormalizedPhrase(first, "\u043a\u043b\u0430\u0441\u0441") ||
574
+ containsNormalizedPhrase(first, "\u043a\u0430\u0442\u0435\u0433\u043e\u0440") ||
575
+ containsNormalizedPhrase(first, "\u0433\u0440\u0443\u043f\u043f");
576
+ const restHeader = containsNormalizedPhrase(rest, "\u043a\u043b\u0438\u043d\u0438\u0447") ||
577
+ containsNormalizedPhrase(rest, "\u043f\u0440\u0438\u0437\u043d\u0430\u043a") ||
578
+ containsNormalizedPhrase(rest, "\u043e\u0431\u044a\u0435\u043c") ||
579
+ containsNormalizedPhrase(rest, "\u0437\u043d\u0430\u0447\u0435\u043d") ||
580
+ containsNormalizedPhrase(rest, "\u043f\u043e\u043a\u0430\u0437");
581
+ return firstHeader && restHeader;
582
+ }
583
+ function coordinateMultiCellGenericLabel(text) {
584
+ const normalized = normalizeForSearch(text);
585
+ return [
586
+ "\u044d\u0444\u0444\u0435\u043a\u0442",
587
+ "\u0433\u0440\u0443\u043f\u043f\u0430",
588
+ "\u043f\u0440\u0438\u0437\u043d\u0430\u043a",
589
+ "\u043f\u043e\u043a\u0430\u0437\u0430\u0442\u0435\u043b\u044c",
590
+ "\u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435",
591
+ "\u043f\u0440\u0435\u043f\u0430\u0440\u0430\u0442\u044b",
592
+ "\u0441\u043f\u043e\u0441\u043e\u0431",
593
+ ].some((cue) => containsNormalizedPhrase(normalized, cue));
594
+ }
595
+ function coordinateMultiCellGenericValue(text) {
596
+ const normalized = normalizeForSearch(text);
597
+ return [
598
+ "\u0433\u0440\u0443\u043f\u043f\u0430",
599
+ "\u043f\u0440\u0435\u043f\u0430\u0440\u0430\u0442\u044b",
600
+ "\u0441\u043f\u043e\u0441\u043e\u0431 \u043f\u0440\u0438\u043c\u0435\u043d\u0435\u043d\u0438\u044f",
601
+ ].some((cue) => containsNormalizedPhrase(normalized, cue));
602
+ }
603
+ function coordinateMultiCellRows(page) {
604
+ if (page.__coordinateMultiCellRows)
605
+ return page.__coordinateMultiCellRows;
606
+ const lines = page.lineItems ?? [];
607
+ const rows = [];
608
+ for (let index = 0; index < lines.length; index += 1) {
609
+ const line = lines[index];
610
+ const cells = coordinateGroupLineCells(line).map((cell) => ({ ...cell }));
611
+ if (!coordinateGroupLineLooksLikeStart(cells))
612
+ continue;
613
+ if (coordinateGroupHeaderCells(cells) || coordinateMultiCellHeaderRow(cells))
614
+ continue;
615
+ const headerText = coordinateNearbyTableContext(lines, index);
616
+ if (!coordinateTextHasExplicitTableCaption(headerText))
617
+ continue;
618
+ const labelCell = cells[0];
619
+ const labelText = coordinateCellText(labelCell);
620
+ if (labelText.length < 3 || labelText.length > 90)
621
+ continue;
622
+ if (coordinateMultiCellGenericLabel(labelText) && coordinateMultiCellGenericValue(cells.slice(1).map((cell) => cell.text).join(" ")))
623
+ continue;
624
+ const labelX = labelCell.x ?? 0;
625
+ const valueParts = cells.slice(1).map((cell) => coordinateCellText(cell)).filter(Boolean);
626
+ if (!valueParts.length)
627
+ continue;
628
+ const rowLineTexts = [line.text];
629
+ let previousY = line?.y ?? 0;
630
+ for (let nextIndex = index + 1; nextIndex < lines.length && nextIndex <= index + 12; nextIndex += 1) {
631
+ const nextLine = lines[nextIndex];
632
+ const y = nextLine?.y ?? previousY;
633
+ if (Math.abs(y - previousY) > 28)
634
+ break;
635
+ if (coordinateLooksLikeTableBoundary(nextLine))
636
+ break;
637
+ const nextCells = coordinateGroupLineCells(nextLine).map((cell) => ({ ...cell }));
638
+ if (!nextCells.length)
639
+ break;
640
+ const nextStartsRow = coordinateGroupLineLooksLikeStart(nextCells) &&
641
+ Math.abs((nextCells[0]?.x ?? 0) - labelX) <= 36 &&
642
+ coordinateCellText(nextCells[0]).length >= 3;
643
+ if (nextStartsRow)
644
+ break;
645
+ const continuation = nextCells
646
+ .filter((cell) => (cell.x ?? 0) > labelX + 48)
647
+ .map((cell) => coordinateCellText(cell))
648
+ .filter(Boolean);
649
+ if (!continuation.length)
650
+ break;
651
+ valueParts.push(...continuation);
652
+ rowLineTexts.push(nextLine.text);
653
+ previousY = y;
654
+ }
655
+ const valueText = valueParts.join(" ").replace(/\s+/g, " ").trim();
656
+ const text = `${labelText} ${valueText}`.replace(/\s+/g, " ").trim();
657
+ if (valueText.length < 8 || text.length < 14)
658
+ continue;
659
+ rows.push({
660
+ page: page.page,
661
+ index,
662
+ y: line?.y ?? 0,
663
+ headerText,
664
+ labelText,
665
+ valueText,
666
+ text,
667
+ sourceText: rowLineTexts.join(" ").replace(/\s+/g, " ").trim(),
668
+ labelX,
669
+ labelTokens: uniqueTokens(labelText),
670
+ valueTokens: uniqueTokens(valueText),
671
+ });
672
+ }
673
+ Object.defineProperty(page, "__coordinateMultiCellRows", {
674
+ value: rows,
675
+ enumerable: false,
676
+ });
677
+ return rows;
678
+ }
679
+ /**
680
+ * Строит multi-cell rows для таблиц, где одна строка содержит несколько
681
+ * самостоятельных значений/кандидатов, связанных общей меткой и заголовком.
682
+ */
683
+ export function buildCoordinateMultiCellRowsByPage(pages, topQuestionPages) {
684
+ const byPage = new Map();
685
+ for (const page of pages) {
686
+ const nearTopPage = !topQuestionPages?.size || topQuestionPages.has(page.page) || topQuestionPages.has(page.page - 1) || topQuestionPages.has(page.page + 1);
687
+ if (!nearTopPage)
688
+ continue;
689
+ const rows = coordinateMultiCellRows(page);
690
+ if (rows.length)
691
+ byPage.set(page.page, rows);
692
+ }
693
+ return byPage;
694
+ }
695
+ function coordinateTableMembershipBlocks(page) {
696
+ const lines = page.lines ?? [];
697
+ const blocks = [];
698
+ for (let index = 0; index < lines.length; index += 1) {
699
+ const line = lines[index];
700
+ const captionIndex = coordinateTextHasExplicitTableCaption(line) && String(line ?? "").length >= 12
701
+ ? index
702
+ : index > 0 && coordinateTextHasExplicitTableCaption(`${lines[index - 1]} ${line}`)
703
+ ? index - 1
704
+ : -1;
705
+ if (captionIndex < 0)
706
+ continue;
707
+ const headerStart = Math.max(0, captionIndex - 1);
708
+ const parts = [];
709
+ for (let cursor = headerStart; cursor < lines.length && cursor <= captionIndex + 72; cursor += 1) {
710
+ const text = String(lines[cursor] ?? "").replace(/\s+/g, " ").trim();
711
+ if (!text)
712
+ continue;
713
+ if (cursor > captionIndex + 2 && coordinateTextHasExplicitTableCaption(text))
714
+ break;
715
+ parts.push(text);
716
+ }
717
+ const text = parts.join(" ").replace(/\s+/g, " ").trim();
718
+ if (text.length < 80)
719
+ continue;
720
+ const headerText = parts.slice(0, Math.min(parts.length, 4)).join(" ");
721
+ blocks.push({
722
+ page: page.page,
723
+ headerText,
724
+ text,
725
+ normalized: normalizeForSearch(text),
726
+ headerTokens: uniqueTokens(headerText),
727
+ tokens: tokenize(text),
728
+ });
729
+ }
730
+ return blocks;
731
+ }
732
+ /**
733
+ * Строит membership-блоки явных таблиц. В отличие от row-scorer'ов, этот слой
734
+ * отвечает на вопрос "входит ли вариант в релевантную таблицу целиком".
735
+ */
736
+ export function buildCoordinateTableMembershipsByPage(pages, topQuestionPages) {
737
+ const byPage = new Map();
738
+ for (const page of pages) {
739
+ const nearTopPage = !topQuestionPages?.size || topQuestionPages.has(page.page) || topQuestionPages.has(page.page - 1) || topQuestionPages.has(page.page + 1);
740
+ if (!nearTopPage)
741
+ continue;
742
+ const blocks = coordinateTableMembershipBlocks(page);
743
+ if (blocks.length)
744
+ byPage.set(page.page, blocks);
745
+ }
746
+ return byPage;
747
+ }
748
+ function coordinateTableMembershipFocus(question, focusTokens, answerTokens) {
749
+ return coordinateTableFocusTokens(question, focusTokens, answerTokens).filter((token) => (token.length >= 4 || /^\d/u.test(token)) && !COORDINATE_TABLE_MEMBERSHIP_GENERIC_TOKENS.has(token));
750
+ }
751
+ function coordinateTableMembershipAnswerHit(block, answerText) {
752
+ return answerSearchPhrases(answerText).some((phrase) => {
753
+ const normalizedPhrase = normalizeForSearch(phrase);
754
+ return normalizedPhrase.length >= 3 && containsNormalizedPhrase(block.normalized, normalizedPhrase);
755
+ });
756
+ }
757
+ function coordinateTableMembershipHasOpposingActionDuration(answers) {
758
+ const normalized = (answers ?? []).map((answer) => normalizeForSearch(answer.text));
759
+ const hasShort = normalized.some((text) => containsNormalizedPhrase(text, "короткодейств"));
760
+ const hasLong = normalized.some((text) => containsNormalizedPhrase(text, "длительнодейств"));
761
+ return hasShort && hasLong;
762
+ }
763
+ /**
764
+ * Ищет multi-вариант внутри тела явной таблицы, caption/заголовок которой
765
+ * совпадает с фокусом вопроса. Это помогает полным table-list вопросам, где
766
+ * несколько правильных ответов находятся в разных строках одной таблицы.
767
+ */
768
+ export function bestCoordinateTableMembershipSupport({ mode, question, answer, answers, answerTokens, focusTokens, intent, coordinateTableMembershipsByPage, }) {
769
+ if (mode !== "multi" || intent?.negative || intent?.exception || !coordinateTableMembershipsByPage?.size)
770
+ return null;
771
+ if (coordinateTableMembershipHasOpposingActionDuration(answers))
772
+ return null;
773
+ const normalizedQuestion = normalizeForSearch(question);
774
+ if (containsNormalizedPhrase(normalizedQuestion, "местно") ||
775
+ containsNormalizedPhrase(normalizedQuestion, "в виде") ||
776
+ containsNormalizedPhrase(normalizedQuestion, "маз") ||
777
+ containsNormalizedPhrase(normalizedQuestion, "суппозитор")) {
778
+ return null;
779
+ }
780
+ const tableFocus = coordinateTableMembershipFocus(question, focusTokens, answerTokens);
781
+ if (tableFocus.length < 2)
782
+ return null;
783
+ const answerPhrases = answerSearchPhrases(answer.text).slice(0, 10);
784
+ let best = null;
785
+ for (const blocks of coordinateTableMembershipsByPage.values()) {
786
+ for (const block of blocks) {
787
+ const headerCoverage = coverage(tableFocus, block.headerTokens);
788
+ const headerHits = tokenHitCount(tableFocus, block.headerTokens);
789
+ const blockCoverage = coverage(tableFocus, block.tokens);
790
+ const relevantHeader = headerHits >= 2 || headerCoverage >= 0.22 || (headerHits >= 1 && blockCoverage >= 0.34);
791
+ if (!relevantHeader)
792
+ continue;
793
+ const answerHitCount = (answers ?? []).filter((candidate) => coordinateTableMembershipAnswerHit(block, candidate.text)).length;
794
+ if ((answers?.length ?? 0) >= 3 && (answers?.length ?? 0) <= 4 && answerHitCount >= (answers?.length ?? 0))
795
+ continue;
796
+ const phraseHit = answerPhrases.some((phrase) => coordinateTableMembershipAnswerHit(block, phrase));
797
+ const lexicalSupport = answerTokens.length ? strictSoftCoverage(answerTokens, block.tokens) : 0;
798
+ if (!phraseHit && lexicalSupport < 0.94)
799
+ continue;
800
+ const score = 11.8 +
801
+ Math.min(0.6, headerCoverage) * 6.0 +
802
+ Math.min(4, headerHits) * 0.85 +
803
+ Math.min(0.45, blockCoverage) * 2.2 +
804
+ (phraseHit ? 3.4 : 0) +
805
+ lexicalSupport * 1.2;
806
+ best = betterEvidence(best, {
807
+ answerId: answer.id,
808
+ page: block.page,
809
+ text: block.text.slice(0, 1200),
810
+ score,
811
+ kind: "coordinate_table_membership",
812
+ });
813
+ }
814
+ }
815
+ return best;
816
+ }
817
+ function coordinateTableFocusTokens(question, focusTokens, answerTokens) {
818
+ const answerSet = new Set(answerTokens ?? []);
819
+ const out = [];
820
+ for (const token of [...(focusTokens ?? []), ...uniqueTokens(question)]) {
821
+ if (!token || token.length < 3)
822
+ continue;
823
+ if (FOCUS_STOPWORDS.has(token) || COORDINATE_TABLE_GENERIC_TOKENS.has(token))
824
+ continue;
825
+ if (answerSet.has(token) && !/^\d/u.test(token))
826
+ continue;
827
+ if (!out.includes(token))
828
+ out.push(token);
829
+ }
830
+ return out.slice(0, 12);
831
+ }
832
+ function coordinateCompoundFocusMatches(tableFocus, labelTokens) {
833
+ const compound = tableFocus.filter((token) => /[+/]/u.test(token));
834
+ if (!compound.length)
835
+ return true;
836
+ const labelSet = new Set(labelTokens ?? []);
837
+ for (const token of compound) {
838
+ if (labelSet.has(token))
839
+ return true;
840
+ const parts = token
841
+ .split(/[+/]+/u)
842
+ .map((part) => part.trim())
843
+ .filter((part) => part.length >= 2);
844
+ if (parts.length >= 2 && parts.every((part) => labelSet.has(part)))
845
+ return true;
846
+ }
847
+ return false;
848
+ }
849
+ function coordinateRouteSynonymSupport(answerText, cellText) {
850
+ const answer = normalizeForSearch(answerText);
851
+ const cell = normalizeForSearch(cellText);
852
+ const routeGroups = [
853
+ ["\u043f\u0435\u0440\u043e\u0440\u0430\u043b", "\u0432\u043d\u0443\u0442\u0440\u044c", "per os", "peros", "p o"],
854
+ ["\u0432\u043d\u0443\u0442\u0440\u0438\u0432", "\u0432/\u0432"],
855
+ ["\u0432\u043d\u0443\u0442\u0440\u0438\u043c\u044b\u0448", "\u0432/\u043c"],
856
+ ["\u043f\u043e\u0434\u043a\u043e\u0436", "\u043f/\u043a"],
857
+ ];
858
+ for (const cues of routeGroups) {
859
+ const answerHit = cues.some((cue) => containsNormalizedPhrase(answer, cue));
860
+ if (!answerHit)
861
+ continue;
862
+ const cellHit = cues.some((cue) => containsNormalizedPhrase(cell, cue));
863
+ if (cellHit)
864
+ return 0.96;
865
+ }
866
+ return 0;
867
+ }
868
+ function severityCue(text) {
869
+ const normalized = normalizeForSearch(text);
870
+ if (containsNormalizedPhrase(normalized, "\u043a\u0440\u0430\u0439\u043d") && containsNormalizedPhrase(normalized, "\u0442\u044f\u0436"))
871
+ return "very_severe";
872
+ if (containsNormalizedPhrase(normalized, "\u0441\u0440\u0435\u0434\u043d\u0435\u0442\u044f\u0436") ||
873
+ containsNormalizedPhrase(normalized, "\u0441\u0440\u0435\u0434\u043d") ||
874
+ containsNormalizedPhrase(normalized, "\u0443\u043c\u0435\u0440\u0435\u043d")) {
875
+ return "moderate";
876
+ }
877
+ if (containsNormalizedPhrase(normalized, "\u0442\u044f\u0436\u0435\u043b"))
878
+ return "severe";
879
+ if (containsNormalizedPhrase(normalized, "\u043b\u0435\u0433\u043a"))
880
+ return "mild";
881
+ return null;
882
+ }
883
+ function coordinateDirectionCuesAroundNumber(normalizedText, number) {
884
+ const forms = [...new Set(expandNumberToken(number).map((item) => normalizeForSearch(item)).filter(Boolean))];
885
+ const directions = new Set();
886
+ for (const form of forms) {
887
+ let start = 0;
888
+ while (start < normalizedText.length) {
889
+ const index = normalizedText.indexOf(form, start);
890
+ if (index < 0)
891
+ break;
892
+ if (!numericSearchBoundary(normalizedText, index, form.length)) {
893
+ start = index + Math.max(1, form.length);
894
+ continue;
895
+ }
896
+ const local = normalizedText.slice(Math.max(0, index - 32), Math.min(normalizedText.length, index + form.length + 20));
897
+ if (containsNormalizedPhrase(local, "\u0431\u043e\u043b\u0435\u0435") ||
898
+ containsNormalizedPhrase(local, "\u0431\u043e\u043b\u044c\u0448\u0435") ||
899
+ containsNormalizedPhrase(local, "\u0432\u044b\u0448\u0435") ||
900
+ />|>=/u.test(local)) {
901
+ directions.add("gt");
902
+ }
903
+ if (containsNormalizedPhrase(local, "\u043c\u0435\u043d\u0435\u0435") ||
904
+ containsNormalizedPhrase(local, "\u043c\u0435\u043d\u044c\u0448\u0435") ||
905
+ containsNormalizedPhrase(local, "\u043d\u0438\u0436\u0435") ||
906
+ containsNormalizedPhrase(local, "\u0434\u043e ") ||
907
+ /<|<=/u.test(local)) {
908
+ directions.add("lt");
909
+ }
910
+ if (containsNormalizedPhrase(local, "\u043d\u0435 \u0431\u043e\u043b\u0435\u0435") ||
911
+ containsNormalizedPhrase(local, "\u043d\u0435\u0431\u043e\u043b\u0435\u0435")) {
912
+ directions.delete("gt");
913
+ directions.add("lt");
914
+ }
915
+ if (containsNormalizedPhrase(local, "\u043d\u0435 \u043c\u0435\u043d\u0435\u0435") ||
916
+ containsNormalizedPhrase(local, "\u043d\u0435\u043c\u0435\u043d\u0435\u0435")) {
917
+ directions.delete("lt");
918
+ directions.add("gt");
919
+ }
920
+ start = index + Math.max(1, form.length);
921
+ }
922
+ }
923
+ return directions;
924
+ }
925
+ function coordinateNumericDirectionCompatible(cellText, answerText, answerNumbers) {
926
+ if (!answerNumbers.length)
927
+ return true;
928
+ const normalizedCell = normalizeForSearch(cellText);
929
+ const normalizedAnswer = normalizeForSearch(answerText);
930
+ for (const number of answerNumbers) {
931
+ const answerDirections = coordinateDirectionCuesAroundNumber(normalizedAnswer, number);
932
+ if (!answerDirections.size)
933
+ continue;
934
+ const cellDirections = coordinateDirectionCuesAroundNumber(normalizedCell, number);
935
+ if (!cellDirections.size)
936
+ continue;
937
+ const sameDirection = [...answerDirections].some((direction) => cellDirections.has(direction));
938
+ if (!sameDirection)
939
+ return false;
940
+ }
941
+ return true;
942
+ }
943
+ function coordinateCellAnswerSupport(cell, answer, answerTokens, answerPhrases, answerNumbers) {
944
+ const text = coordinateCellText(cell);
945
+ const normalized = normalizeForSearch(text);
946
+ const tokens = tokenizeNormalized(normalized);
947
+ const numericCoverage = numberCoverage(answer.text, normalized);
948
+ const phraseHit = answerPhrases.some((phrase) => containsNormalizedPhrase(normalized, phrase));
949
+ const tokenSupport = answerTokens.length ? strictSoftCoverage(answerTokens, tokens) : 0;
950
+ let support = Math.max(tokenSupport, phraseHit ? 1 : 0, numericCoverage);
951
+ if (answerNumbers.length) {
952
+ const expanded = [...new Set(answerNumbers.flatMap(expandNumberToken))];
953
+ const required = expanded.length > 1 ? 0.82 : 0.5;
954
+ if (numericCoverage < required)
955
+ support = Math.min(support, numericCoverage * 0.7);
956
+ }
957
+ return { support, numericCoverage, phraseHit, tokens, normalized };
958
+ }
959
+ function coordinateRowContrastBonus(row, bestCell, tableFocus, bestCellSupport, wholeRowAnswerMatch) {
960
+ if (!row?.cells?.length || !bestCell || wholeRowAnswerMatch)
961
+ return -0.35;
962
+ const cellIndex = bestCell.index ?? -1;
963
+ if (cellIndex < 0)
964
+ return -0.35;
965
+ const labelText = row.cells
966
+ .filter((cell) => (cell.index ?? 0) < cellIndex)
967
+ .slice(-2)
968
+ .map((cell) => cell.text)
969
+ .join(" ");
970
+ const labelTokens = tokenize(labelText);
971
+ const leftFocusHits = tokenHitCount(tableFocus, labelTokens);
972
+ const leftFocusCoverage = tableFocus.length ? coverage(tableFocus, labelTokens) : 0;
973
+ const headerCue = coordinateTextHasTableCaption(row.headerText) ? 0.25 : 0;
974
+ const numericSpecificity = bestCellSupport?.numericCoverage >= 0.82 ? 0.35 : 0;
975
+ if (leftFocusHits <= 0 && leftFocusCoverage < 0.18)
976
+ return headerCue + numericSpecificity - 0.2;
977
+ return Math.min(1.4, leftFocusHits * 0.35 + leftFocusCoverage * 1.6 + headerCue + numericSpecificity);
978
+ }
979
+ /**
980
+ * Оценивает single-answer поддержку из coordinate row: вариант должен совпасть
981
+ * с конкретной ячейкой, а соседние ячейки/заголовок должны объяснять фокус вопроса.
982
+ */
983
+ export function bestCoordinateTableRowSupport({ mode, question, answer, answerTokens, focusTokens, coordinateTableRowsByPage, }) {
984
+ if (!coordinateTableRowsByPage)
985
+ return null;
986
+ if (mode !== "single")
987
+ return null;
988
+ if (coordinateTableQuestionBlocked(question))
989
+ return null;
990
+ const answerNumbers = extractNumbers(answer.text);
991
+ const answerPhrases = answerSearchPhrases(answer.text).slice(0, 12);
992
+ const tableFocus = coordinateTableFocusTokens(question, focusTokens, answerTokens);
993
+ if (!tableFocus.length && !answerNumbers.length)
994
+ return null;
995
+ const questionSeverity = severityCue(question);
996
+ let best = null;
997
+ for (const rows of coordinateTableRowsByPage.values()) {
998
+ for (const row of rows) {
999
+ if (!row.cells?.length)
1000
+ continue;
1001
+ if (!coordinateRowHasTableContext(row))
1002
+ continue;
1003
+ if (questionSeverity && coordinateSeverityCueCount(row.text) > 1)
1004
+ continue;
1005
+ let bestCell = null;
1006
+ let bestCellSupport = null;
1007
+ for (const cell of row.cells) {
1008
+ const support = coordinateCellAnswerSupport(cell, answer, answerTokens, answerPhrases, answerNumbers);
1009
+ if (!bestCellSupport || support.support > bestCellSupport.support) {
1010
+ bestCell = cell;
1011
+ bestCellSupport = support;
1012
+ }
1013
+ }
1014
+ const minAnswerSupport = answerNumbers.length ? 0.5 : 0.64;
1015
+ let wholeRowAnswerMatch = false;
1016
+ if ((!bestCellSupport || bestCellSupport.support < minAnswerSupport) && answerNumbers.length) {
1017
+ const rowSupport = coordinateCellAnswerSupport({ text: `${row.headerText} ${row.text}`.replace(/\s+/g, " ").trim(), index: -1 }, answer, answerTokens, answerPhrases, answerNumbers);
1018
+ if (rowSupport.support >= minAnswerSupport) {
1019
+ bestCell = { text: "", index: -1 };
1020
+ bestCellSupport = rowSupport;
1021
+ wholeRowAnswerMatch = true;
1022
+ }
1023
+ }
1024
+ if (!bestCell || !bestCellSupport || bestCellSupport.support < minAnswerSupport)
1025
+ continue;
1026
+ const otherCellsText = row.cells
1027
+ .filter((cell) => wholeRowAnswerMatch || cell.index !== bestCell.index)
1028
+ .map((cell) => cell.text)
1029
+ .join(" ");
1030
+ const rowSpecificTokens = tokenize(otherCellsText);
1031
+ const rowSpecificCoverage = tableFocus.length ? coverage(tableFocus, rowSpecificTokens) : 0;
1032
+ const rowSpecificHits = tokenHitCount(tableFocus, rowSpecificTokens);
1033
+ const headerTokens = tokenize(row.headerText);
1034
+ const headerCoverage = tableFocus.length ? coverage(tableFocus, headerTokens) : 0;
1035
+ if (tableFocus.length && rowSpecificCoverage < 0.16 && rowSpecificHits < 1)
1036
+ continue;
1037
+ const rowLabelText = row.cells
1038
+ .filter((cell) => cell.index !== bestCell.index)
1039
+ .slice(0, 2)
1040
+ .map((cell) => cell.text)
1041
+ .join(" ");
1042
+ const rowSeverity = severityCue(rowLabelText || otherCellsText);
1043
+ if (questionSeverity && rowSeverity !== questionSeverity)
1044
+ continue;
1045
+ const score = 13.4 +
1046
+ Math.min(1, bestCellSupport.support) * 8.4 +
1047
+ Math.min(0.75, rowSpecificCoverage) * 7.0 +
1048
+ Math.min(3, rowSpecificHits) * 1.2 +
1049
+ Math.min(0.45, headerCoverage) * 2.4 +
1050
+ bestCellSupport.numericCoverage * 2.6 +
1051
+ (bestCellSupport.phraseHit ? 1.1 : 0) +
1052
+ (row.cells.length >= 3 ? 1.3 : 0) +
1053
+ coordinateRowContrastBonus(row, bestCell, tableFocus, bestCellSupport, wholeRowAnswerMatch);
1054
+ best = betterEvidence(best, {
1055
+ answerId: answer.id,
1056
+ page: row.page,
1057
+ text: `${row.headerText} ${row.sourceText || row.text}`.replace(/\s+/g, " ").trim(),
1058
+ score,
1059
+ kind: "coordinate_table_row",
1060
+ });
1061
+ }
1062
+ }
1063
+ return best;
1064
+ }
1065
+ /**
1066
+ * Оценивает multi-answer поддержку из явной табличной группы и допускает
1067
+ * обратное связывание `value -> label`, когда значение находится в вопросе.
1068
+ */
1069
+ export function bestCoordinateTableGroupSupport({ mode, question, answer, answerTokens, focusTokens, coordinateTableGroupsByPage, }) {
1070
+ if (mode !== "multi")
1071
+ return null;
1072
+ if (!coordinateTableGroupsByPage)
1073
+ return null;
1074
+ const answerNumbers = extractNumbers(answer.text);
1075
+ const answerPhrases = answerSearchPhrases(answer.text).slice(0, 12);
1076
+ const tableFocus = coordinateTableFocusTokens(question, focusTokens, answerTokens);
1077
+ if (tableFocus.length < 2 && !answerNumbers.length)
1078
+ return null;
1079
+ let best = null;
1080
+ for (const groups of coordinateTableGroupsByPage.values()) {
1081
+ for (const group of groups) {
1082
+ const answerSupport = coordinateCellAnswerSupport({ text: group.valueText, index: 1 }, answer, answerTokens, answerPhrases, answerNumbers);
1083
+ const synonymSupport = coordinateRouteSynonymSupport(answer.text, `${group.valueText} ${group.headerText}`);
1084
+ const effectiveAnswerSupport = Math.max(answerSupport.support, synonymSupport);
1085
+ const minAnswerSupport = answerNumbers.length ? 0.5 : 0.58;
1086
+ const lexicalAnswerSupport = answerTokens.length ? strictSoftCoverage(answerTokens, answerSupport.tokens) : 0;
1087
+ if (effectiveAnswerSupport >= minAnswerSupport && (answerSupport.phraseHit || synonymSupport > 0 || lexicalAnswerSupport >= 0.42)) {
1088
+ const labelCoverage = tableFocus.length ? coverage(tableFocus, group.labelTokens) : 0;
1089
+ const labelHits = tokenHitCount(tableFocus, group.labelTokens);
1090
+ const headerCoverage = tableFocus.length ? coverage(tableFocus, uniqueTokens(group.headerText)) : 0;
1091
+ const hasSpecificLabel = labelCoverage >= 0.22 || labelHits >= Math.min(3, Math.max(2, Math.ceil(tableFocus.length * 0.25)));
1092
+ if ((hasSpecificLabel || headerCoverage >= 0.42) && coordinateCompoundFocusMatches(tableFocus, group.labelTokens)) {
1093
+ const score = 14.6 +
1094
+ Math.min(1, effectiveAnswerSupport) * 8.6 +
1095
+ Math.min(0.78, labelCoverage) * 8.2 +
1096
+ Math.min(4, labelHits) * 1.45 +
1097
+ Math.min(0.5, headerCoverage) * 2.0 +
1098
+ (answerSupport.phraseHit ? 1.4 : 0) +
1099
+ synonymSupport * 1.4 +
1100
+ lexicalAnswerSupport * 2.0 +
1101
+ answerSupport.numericCoverage * 2.2;
1102
+ best = betterEvidence(best, {
1103
+ answerId: answer.id,
1104
+ page: group.page,
1105
+ text: `${group.headerText} | ${group.labelText} -> ${group.valueText}`.replace(/\s+/g, " ").trim(),
1106
+ score,
1107
+ kind: "coordinate_table_group",
1108
+ });
1109
+ }
1110
+ }
1111
+ const inverseFocusCoverage = tableFocus.length ? coverage(tableFocus, group.valueTokens) : 0;
1112
+ const inverseFocusHits = tokenHitCount(tableFocus, group.valueTokens);
1113
+ const inverseHeaderCoverage = tableFocus.length ? coverage(tableFocus, uniqueTokens(group.headerText)) : 0;
1114
+ const inverseFocusSupported = inverseFocusCoverage >= 0.28 ||
1115
+ inverseFocusHits >= Math.min(3, Math.max(2, Math.ceil(tableFocus.length * 0.25))) ||
1116
+ (inverseHeaderCoverage >= 0.42 && inverseFocusHits >= 1);
1117
+ if (!inverseFocusSupported)
1118
+ continue;
1119
+ const inverseAnswerSupport = coordinateCellAnswerSupport({ text: group.labelText, index: 0 }, answer, answerTokens, answerPhrases, answerNumbers);
1120
+ const inverseSynonymSupport = coordinateRouteSynonymSupport(answer.text, `${group.labelText} ${group.headerText}`);
1121
+ const inverseEffectiveAnswerSupport = Math.max(inverseAnswerSupport.support, inverseSynonymSupport);
1122
+ const inverseMinAnswerSupport = answerNumbers.length ? 0.5 : 0.58;
1123
+ if (inverseEffectiveAnswerSupport < inverseMinAnswerSupport)
1124
+ continue;
1125
+ const inverseLexicalAnswerSupport = answerTokens.length ? strictSoftCoverage(answerTokens, inverseAnswerSupport.tokens) : 0;
1126
+ if (!inverseAnswerSupport.phraseHit && inverseSynonymSupport <= 0 && inverseLexicalAnswerSupport < 0.42)
1127
+ continue;
1128
+ const inverseScore = 14.4 +
1129
+ Math.min(1, inverseEffectiveAnswerSupport) * 8.2 +
1130
+ Math.min(0.78, inverseFocusCoverage) * 8.0 +
1131
+ Math.min(4, inverseFocusHits) * 1.35 +
1132
+ Math.min(0.5, inverseHeaderCoverage) * 1.6 +
1133
+ (inverseAnswerSupport.phraseHit ? 1.2 : 0) +
1134
+ inverseSynonymSupport * 1.2 +
1135
+ inverseLexicalAnswerSupport * 1.8 +
1136
+ inverseAnswerSupport.numericCoverage * 2.0;
1137
+ best = betterEvidence(best, {
1138
+ answerId: answer.id,
1139
+ page: group.page,
1140
+ text: `${group.headerText} | ${group.valueText} <- ${group.labelText}`.replace(/\s+/g, " ").trim(),
1141
+ score: inverseScore,
1142
+ kind: "coordinate_table_group_inverse",
1143
+ });
1144
+ }
1145
+ }
1146
+ return best;
1147
+ }
1148
+ /**
1149
+ * Оценивает multi-cell row, где правильный ответ может находиться в любой
1150
+ * ячейке строки, но строка должна быть привязана к фокусу вопроса и заголовку.
1151
+ */
1152
+ export function bestCoordinateMultiCellRowSupport({ mode, question, answer, answerTokens, focusTokens, coordinateMultiCellRowsByPage, }) {
1153
+ if (mode !== "multi")
1154
+ return null;
1155
+ if (!coordinateMultiCellRowsByPage)
1156
+ return null;
1157
+ const answerNumbers = extractNumbers(answer.text);
1158
+ const answerPhrases = answerSearchPhrases(answer.text).slice(0, 12);
1159
+ const tableFocus = coordinateTableFocusTokens(question, focusTokens, answerTokens);
1160
+ if (tableFocus.length < 1 && !answerNumbers.length)
1161
+ return null;
1162
+ const questionSeverity = severityCue(question);
1163
+ let best = null;
1164
+ for (const rows of coordinateMultiCellRowsByPage.values()) {
1165
+ for (const row of rows) {
1166
+ const rowSeverity = severityCue(row.labelText);
1167
+ if (questionSeverity && rowSeverity !== questionSeverity)
1168
+ continue;
1169
+ const labelCoverage = tableFocus.length ? coverage(tableFocus, row.labelTokens) : 0;
1170
+ const labelHits = tokenHitCount(tableFocus, row.labelTokens);
1171
+ const headerCoverage = tableFocus.length ? coverage(tableFocus, uniqueTokens(row.headerText)) : 0;
1172
+ const labelSupported = questionSeverity || labelCoverage >= 0.18 || labelHits >= 1;
1173
+ if (!labelSupported && headerCoverage < 0.38)
1174
+ continue;
1175
+ const answerSupport = coordinateCellAnswerSupport({ text: row.valueText, index: 1 }, answer, answerTokens, answerPhrases, answerNumbers);
1176
+ if (!coordinateNumericDirectionCompatible(row.valueText, answer.text, answerNumbers))
1177
+ continue;
1178
+ const synonymSupport = coordinateRouteSynonymSupport(answer.text, `${row.valueText} ${row.headerText}`);
1179
+ const effectiveAnswerSupport = Math.max(answerSupport.support, synonymSupport);
1180
+ const answerTokenHits = tokenHitCount(answerTokens, answerSupport.tokens);
1181
+ const longListSupport = answerTokens.length >= 6 && answerTokenHits >= 4 && answerSupport.support >= 0.52;
1182
+ const minAnswerSupport = longListSupport ? 0.52 : answerNumbers.length ? 0.5 : 0.58;
1183
+ if (effectiveAnswerSupport < minAnswerSupport)
1184
+ continue;
1185
+ const lexicalAnswerSupport = answerTokens.length ? strictSoftCoverage(answerTokens, answerSupport.tokens) : 0;
1186
+ const minLexicalSupport = longListSupport ? 0.5 : 0.38;
1187
+ if (!answerSupport.phraseHit && synonymSupport <= 0 && lexicalAnswerSupport < minLexicalSupport)
1188
+ continue;
1189
+ const score = 14.2 +
1190
+ Math.min(1, effectiveAnswerSupport) * 8.3 +
1191
+ Math.min(0.75, labelCoverage) * 7.4 +
1192
+ Math.min(3, labelHits) * 1.4 +
1193
+ (questionSeverity ? 2.2 : 0) +
1194
+ Math.min(0.5, headerCoverage) * 2.0 +
1195
+ (answerSupport.phraseHit ? 1.4 : 0) +
1196
+ synonymSupport * 1.3 +
1197
+ lexicalAnswerSupport * 1.8 +
1198
+ answerSupport.numericCoverage * 2.0 +
1199
+ (longListSupport ? 1.2 : 0);
1200
+ best = betterEvidence(best, {
1201
+ answerId: answer.id,
1202
+ page: row.page,
1203
+ text: `${row.headerText} | ${row.labelText} -> ${row.valueText}`.replace(/\s+/g, " ").trim(),
1204
+ score,
1205
+ kind: "coordinate_table_multicell_row",
1206
+ });
1207
+ }
1208
+ }
1209
+ return best;
1210
+ }