med-pdf-nmo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +298 -0
- package/README.ru.md +298 -0
- package/dist/bm25.d.ts +47 -0
- package/dist/bm25.js +86 -0
- package/dist/browser-shims/buffer.d.ts +30 -0
- package/dist/browser-shims/buffer.js +31 -0
- package/dist/browser-shims/crypto.d.ts +33 -0
- package/dist/browser-shims/crypto.js +45 -0
- package/dist/browser-shims/fs-promises.d.ts +13 -0
- package/dist/browser-shims/fs-promises.js +25 -0
- package/dist/browser-shims/fs.d.ts +14 -0
- package/dist/browser-shims/fs.js +24 -0
- package/dist/browser-shims/globals.d.ts +9 -0
- package/dist/browser-shims/globals.js +23 -0
- package/dist/browser-shims/path.d.ts +57 -0
- package/dist/browser-shims/path.js +65 -0
- package/dist/browser-shims/process.d.ts +22 -0
- package/dist/browser-shims/process.js +27 -0
- package/dist/browser.d.ts +9 -0
- package/dist/browser.js +12 -0
- package/dist/chunk.d.ts +15 -0
- package/dist/chunk.js +76 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +87 -0
- package/dist/index.d.ts +82 -0
- package/dist/index.js +51 -0
- package/dist/med-pdf-nmo.browser.js +40413 -0
- package/dist/med-pdf-nmo.browser.mjs +40395 -0
- package/dist/normalize.d.ts +73 -0
- package/dist/normalize.js +477 -0
- package/dist/pdf.d.ts +35 -0
- package/dist/pdf.js +396 -0
- package/dist/predictor/config.d.ts +28 -0
- package/dist/predictor/config.js +26 -0
- package/dist/predictor/constants.d.ts +3 -0
- package/dist/predictor/constants.js +59 -0
- package/dist/predictor/runtime.d.ts +15 -0
- package/dist/predictor/runtime.js +59 -0
- package/dist/predictor/scorers/biomedical-symbols.d.ts +36 -0
- package/dist/predictor/scorers/biomedical-symbols.js +347 -0
- package/dist/predictor/scorers/coordinate-table.d.ts +82 -0
- package/dist/predictor/scorers/coordinate-table.js +1210 -0
- package/dist/predictor/scorers/direction.d.ts +71 -0
- package/dist/predictor/scorers/direction.js +345 -0
- package/dist/predictor/scorers/drug-dose.d.ts +6 -0
- package/dist/predictor/scorers/drug-dose.js +221 -0
- package/dist/predictor/scorers/exact-answer.d.ts +10 -0
- package/dist/predictor/scorers/exact-answer.js +75 -0
- package/dist/predictor/scorers/fibrosis-stage.d.ts +6 -0
- package/dist/predictor/scorers/fibrosis-stage.js +103 -0
- package/dist/predictor/scorers/focused.d.ts +40 -0
- package/dist/predictor/scorers/focused.js +204 -0
- package/dist/predictor/scorers/frequency.d.ts +10 -0
- package/dist/predictor/scorers/frequency.js +203 -0
- package/dist/predictor/scorers/numeric.d.ts +77 -0
- package/dist/predictor/scorers/numeric.js +1161 -0
- package/dist/predictor/scorers/recommendation-item.d.ts +27 -0
- package/dist/predictor/scorers/recommendation-item.js +469 -0
- package/dist/predictor/scorers/search.d.ts +41 -0
- package/dist/predictor/scorers/search.js +515 -0
- package/dist/predictor/selection.d.ts +30 -0
- package/dist/predictor/selection.js +370 -0
- package/dist/predictor/text-utils.d.ts +49 -0
- package/dist/predictor/text-utils.js +497 -0
- package/dist/predictor/types.d.ts +23 -0
- package/dist/predictor/types.js +1 -0
- package/dist/predictor.d.ts +52 -0
- package/dist/predictor.js +3834 -0
- package/package.json +82 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Ищет поддержку варианта внутри того рекомендательного блока, который относится к препарату/вмешательству
|
|
3
|
+
* из вопроса вида "рекомендовано назначение X". Если вариант уверенно найден только в соседней рекомендации
|
|
4
|
+
* про другой X, возвращается мягкий штраф вместо поддержки.
|
|
5
|
+
*/
|
|
6
|
+
export declare function explicitRecommendationTargetAdjustment({ mode, pages, question, answer, answers, answerTokens }: {
|
|
7
|
+
mode: any;
|
|
8
|
+
pages: any;
|
|
9
|
+
question: any;
|
|
10
|
+
answer: any;
|
|
11
|
+
answers: any;
|
|
12
|
+
answerTokens: any;
|
|
13
|
+
}): {
|
|
14
|
+
support: any;
|
|
15
|
+
adjustment: number;
|
|
16
|
+
evidence: any;
|
|
17
|
+
} | {
|
|
18
|
+
support: any;
|
|
19
|
+
adjustment: number;
|
|
20
|
+
evidence: any;
|
|
21
|
+
};
|
|
22
|
+
export declare function bestRecommendationItemSupport({ pages, question, answer, answerTokens }: {
|
|
23
|
+
pages: any;
|
|
24
|
+
question: any;
|
|
25
|
+
answer: any;
|
|
26
|
+
answerTokens: any;
|
|
27
|
+
}): any;
|
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
import { extractNumbers, normalizeForSearch, normalizeText, uniqueTokens } from "../../normalize.js";
|
|
2
|
+
import { FOCUS_STOPWORDS } from "../constants.js";
|
|
3
|
+
import { answerSearchPhrases, betterEvidence, containsNormalizedPhrase, numberCoverage, strictSoftCoverage, tokenizeNormalized } from "../text-utils.js";
|
|
4
|
+
const RECOMMENDATION_QUESTION_GENERIC = new Set([
|
|
5
|
+
"\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434\u0443\u0435\u0442\u0441\u044f",
|
|
6
|
+
"\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434\u043e\u0432\u0430\u043d\u043e",
|
|
7
|
+
"\u043f\u0430\u0446\u0438\u0435\u043d\u0442\u0430\u043c",
|
|
8
|
+
"\u043f\u0430\u0446\u0438\u0435\u043d\u0442\u043e\u0432",
|
|
9
|
+
"\u043f\u0430\u0446\u0438\u0435\u043d\u0442\u044b",
|
|
10
|
+
"\u043f\u0440\u0438",
|
|
11
|
+
"\u0434\u043b\u044f",
|
|
12
|
+
"\u0441",
|
|
13
|
+
"\u0438",
|
|
14
|
+
"\u0443",
|
|
15
|
+
"\u044f\u0432\u043b\u044f\u044e\u0442\u0441\u044f",
|
|
16
|
+
"\u043f\u0440\u0435\u043f\u0430\u0440\u0430\u0442\u0430\u043c\u0438",
|
|
17
|
+
"\u043f\u0435\u0440\u0432\u043e\u0439",
|
|
18
|
+
"\u043b\u0438\u043d\u0438\u0438",
|
|
19
|
+
].flatMap((item) => uniqueTokens(item)));
|
|
20
|
+
const RECOMMENDATION_TARGET_GENERIC = new Set([
|
|
21
|
+
"\u043d\u0430\u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435",
|
|
22
|
+
"\u043d\u0430\u0437\u043d\u0430\u0447",
|
|
23
|
+
"\u043f\u0440\u043e\u0432\u0435\u0434",
|
|
24
|
+
"\u043f\u0440\u043e\u0432\u043e\u0434",
|
|
25
|
+
"\u0432\u044b\u043f\u043e\u043b\u043d",
|
|
26
|
+
"\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434",
|
|
27
|
+
"\u043f\u0430\u0446\u0438\u0435\u043d\u0442",
|
|
28
|
+
"\u043f\u0430\u0446\u0438\u0435\u043d\u0442\u0430\u043c",
|
|
29
|
+
"\u043f\u0430\u0446\u0438\u0435\u043d\u0442\u043e\u0432",
|
|
30
|
+
"\u043f\u0440\u0435\u043f\u0430\u0440\u0430\u0442",
|
|
31
|
+
"\u043b\u0435\u043a\u0430\u0440\u0441\u0442\u0432",
|
|
32
|
+
"\u0441",
|
|
33
|
+
"\u043f\u0440\u0438",
|
|
34
|
+
"\u0434\u043b\u044f",
|
|
35
|
+
"\u0438",
|
|
36
|
+
].flatMap((item) => uniqueTokens(item)));
|
|
37
|
+
function recommendationItemQuestion(question) {
|
|
38
|
+
const normalized = normalizeForSearch(question);
|
|
39
|
+
const firstLineTherapy = containsNormalizedPhrase(normalized, "\u043f\u0435\u0440\u0432\u043e\u0439 \u043b\u0438\u043d\u0438\u0438");
|
|
40
|
+
const valveProsthesisChoice = containsNormalizedPhrase(normalized, "\u043f\u0440\u043e\u0442\u0435\u0437") &&
|
|
41
|
+
containsNormalizedPhrase(normalized, "\u043a\u043b\u0430\u043f") &&
|
|
42
|
+
(containsNormalizedPhrase(normalized, "\u0431\u0438\u043e\u043b\u043e\u0433") || containsNormalizedPhrase(normalized, "\u043c\u0435\u0445\u0430\u043d"));
|
|
43
|
+
const universalInstrumental = containsNormalizedPhrase(normalized, "\u0432\u0441\u0435\u043c \u043f\u0430\u0446\u0438\u0435\u043d\u0442") &&
|
|
44
|
+
((containsNormalizedPhrase(normalized, "\u043f\u0435\u0440\u0432\u0438\u0447") && containsNormalizedPhrase(normalized, "\u0441\u0442\u0430\u0434")) ||
|
|
45
|
+
(containsNormalizedPhrase(normalized, "\u0434\u0438\u043d\u0430\u043c\u0438\u0447") && containsNormalizedPhrase(normalized, "\u044d\u0444\u0444\u0435\u043a\u0442")));
|
|
46
|
+
return firstLineTherapy || valveProsthesisChoice || universalInstrumental;
|
|
47
|
+
}
|
|
48
|
+
function recommendationQuestionTokens(question) {
|
|
49
|
+
return uniqueTokens(question).filter((token) => token.length >= 4 && !FOCUS_STOPWORDS.has(token) && !RECOMMENDATION_QUESTION_GENERIC.has(token));
|
|
50
|
+
}
|
|
51
|
+
function isPageNumberOnly(line) {
|
|
52
|
+
return /^\s*\d+\s*$/u.test(String(line ?? ""));
|
|
53
|
+
}
|
|
54
|
+
function startsBullet(line) {
|
|
55
|
+
return /^\s*[•*\-]\s*/u.test(String(line ?? ""));
|
|
56
|
+
}
|
|
57
|
+
function recommendationLineStart(line) {
|
|
58
|
+
if (isPageNumberOnly(line))
|
|
59
|
+
return false;
|
|
60
|
+
const normalized = normalizeForSearch(line);
|
|
61
|
+
return (startsBullet(line) ||
|
|
62
|
+
containsNormalizedPhrase(normalized, "\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434") ||
|
|
63
|
+
containsNormalizedPhrase(normalized, "\u043f\u0435\u0440\u0432\u043e\u0439 \u043b\u0438\u043d\u0438\u0438"));
|
|
64
|
+
}
|
|
65
|
+
function recommendationBoundaryLine(line, isFirstLine) {
|
|
66
|
+
if (isPageNumberOnly(line))
|
|
67
|
+
return true;
|
|
68
|
+
if (!isFirstLine && startsBullet(line))
|
|
69
|
+
return true;
|
|
70
|
+
const normalized = normalizeForSearch(line);
|
|
71
|
+
return (/^e\s*o?k\b/iu.test(normalized) ||
|
|
72
|
+
normalized.startsWith("eok") ||
|
|
73
|
+
normalized.startsWith("ypobeh") ||
|
|
74
|
+
containsNormalizedPhrase(normalized, "\u0443\u0443\u0440") ||
|
|
75
|
+
containsNormalizedPhrase(normalized, "\u0443\u0434\u0434"));
|
|
76
|
+
}
|
|
77
|
+
function collectRecommendationSegment(pages, pageIndex, lineIndex) {
|
|
78
|
+
const lines = [];
|
|
79
|
+
for (let currentPageIndex = pageIndex; currentPageIndex < Math.min(pages.length, pageIndex + 2); currentPageIndex += 1) {
|
|
80
|
+
const page = pages[currentPageIndex];
|
|
81
|
+
const pageLines = page.lines ?? [];
|
|
82
|
+
const startLine = currentPageIndex === pageIndex ? lineIndex : 0;
|
|
83
|
+
for (let index = startLine; index < pageLines.length && lines.length < 12; index += 1) {
|
|
84
|
+
const line = pageLines[index];
|
|
85
|
+
if (recommendationBoundaryLine(line, currentPageIndex === pageIndex && index === lineIndex)) {
|
|
86
|
+
if (!isPageNumberOnly(line))
|
|
87
|
+
return lines.join(" ");
|
|
88
|
+
continue;
|
|
89
|
+
}
|
|
90
|
+
lines.push(line);
|
|
91
|
+
}
|
|
92
|
+
if (lines.length >= 12)
|
|
93
|
+
break;
|
|
94
|
+
const nextPage = pages[currentPageIndex + 1];
|
|
95
|
+
if (!nextPage?.lines?.length || startsBullet(nextPage.lines[0]))
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
return lines.join(" ");
|
|
99
|
+
}
|
|
100
|
+
function recommendationSegments(pages) {
|
|
101
|
+
const segments = [];
|
|
102
|
+
for (let pageIndex = 0; pageIndex < pages.length; pageIndex += 1) {
|
|
103
|
+
const page = pages[pageIndex];
|
|
104
|
+
const lines = page.lines ?? [];
|
|
105
|
+
for (let lineIndex = 0; lineIndex < lines.length; lineIndex += 1) {
|
|
106
|
+
if (!recommendationLineStart(lines[lineIndex]))
|
|
107
|
+
continue;
|
|
108
|
+
const text = collectRecommendationSegment(pages, pageIndex, lineIndex).replace(/\s+/gu, " ").trim();
|
|
109
|
+
if (text.length < 24)
|
|
110
|
+
continue;
|
|
111
|
+
segments.push({
|
|
112
|
+
page: page.page,
|
|
113
|
+
text,
|
|
114
|
+
normalized: normalizeForSearch(text),
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return segments;
|
|
119
|
+
}
|
|
120
|
+
function explicitRecommendationLineStart(line) {
|
|
121
|
+
if (isPageNumberOnly(line))
|
|
122
|
+
return false;
|
|
123
|
+
const normalized = normalizeForSearch(line);
|
|
124
|
+
return containsNormalizedPhrase(normalized, "\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434\u043e\u0432\u0430") || containsNormalizedPhrase(normalized, "\u0440\u0435\u043a\u043e\u043c\u0435\u043d\u0434\u0443");
|
|
125
|
+
}
|
|
126
|
+
function collectExplicitRecommendationBlock(pages, pageIndex, lineIndex) {
|
|
127
|
+
const lines = [];
|
|
128
|
+
for (let currentPageIndex = pageIndex; currentPageIndex < Math.min(pages.length, pageIndex + 2); currentPageIndex += 1) {
|
|
129
|
+
const page = pages[currentPageIndex];
|
|
130
|
+
const pageLines = page.lines ?? [];
|
|
131
|
+
const startLine = currentPageIndex === pageIndex ? lineIndex : 0;
|
|
132
|
+
for (let index = startLine; index < pageLines.length && lines.length < 22; index += 1) {
|
|
133
|
+
const line = pageLines[index];
|
|
134
|
+
if (isPageNumberOnly(line))
|
|
135
|
+
continue;
|
|
136
|
+
if (!(currentPageIndex === pageIndex && index === lineIndex) && explicitRecommendationLineStart(line)) {
|
|
137
|
+
return lines.join(" ");
|
|
138
|
+
}
|
|
139
|
+
lines.push(line);
|
|
140
|
+
}
|
|
141
|
+
if (lines.length >= 22)
|
|
142
|
+
break;
|
|
143
|
+
}
|
|
144
|
+
return lines.join(" ");
|
|
145
|
+
}
|
|
146
|
+
function explicitRecommendationSegments(pages) {
|
|
147
|
+
const segments = [];
|
|
148
|
+
for (let pageIndex = 0; pageIndex < pages.length; pageIndex += 1) {
|
|
149
|
+
const page = pages[pageIndex];
|
|
150
|
+
const lines = page.lines ?? [];
|
|
151
|
+
for (let lineIndex = 0; lineIndex < lines.length; lineIndex += 1) {
|
|
152
|
+
if (!explicitRecommendationLineStart(lines[lineIndex]))
|
|
153
|
+
continue;
|
|
154
|
+
const text = collectExplicitRecommendationBlock(pages, pageIndex, lineIndex).replace(/\s+/gu, " ").trim();
|
|
155
|
+
if (text.length < 24)
|
|
156
|
+
continue;
|
|
157
|
+
segments.push({
|
|
158
|
+
page: page.page,
|
|
159
|
+
text,
|
|
160
|
+
normalized: normalizeForSearch(text),
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return segments;
|
|
165
|
+
}
|
|
166
|
+
function recommendationSubjectCompatible(questionNorm, segmentNorm) {
|
|
167
|
+
const questionBiological = containsNormalizedPhrase(questionNorm, "\u0431\u0438\u043e\u043b\u043e\u0433");
|
|
168
|
+
const questionMechanical = containsNormalizedPhrase(questionNorm, "\u043c\u0435\u0445\u0430\u043d");
|
|
169
|
+
const segmentBiological = containsNormalizedPhrase(segmentNorm, "\u0431\u0438\u043e\u043b\u043e\u0433");
|
|
170
|
+
const segmentMechanical = containsNormalizedPhrase(segmentNorm, "\u043c\u0435\u0445\u0430\u043d");
|
|
171
|
+
if (questionBiological && segmentMechanical && !segmentBiological)
|
|
172
|
+
return false;
|
|
173
|
+
if (questionMechanical && segmentBiological && !segmentMechanical)
|
|
174
|
+
return false;
|
|
175
|
+
if (questionBiological && !segmentBiological)
|
|
176
|
+
return false;
|
|
177
|
+
if (questionMechanical && !segmentMechanical)
|
|
178
|
+
return false;
|
|
179
|
+
if (containsNormalizedPhrase(questionNorm, "\u043f\u0435\u0440\u0432\u043e\u0439 \u043b\u0438\u043d\u0438\u0438") && !containsNormalizedPhrase(segmentNorm, "\u043f\u0435\u0440\u0432\u043e\u0439 \u043b\u0438\u043d\u0438\u0438")) {
|
|
180
|
+
return false;
|
|
181
|
+
}
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
function recommendationQuestionCoverage(questionNorm, questionTokens, segmentNorm) {
|
|
185
|
+
const segmentTokens = tokenizeNormalized(segmentNorm);
|
|
186
|
+
let coverageScore = strictSoftCoverage(questionTokens, segmentTokens);
|
|
187
|
+
const valveProsthesisQuestion = containsNormalizedPhrase(questionNorm, "\u043f\u0440\u043e\u0442\u0435\u0437") &&
|
|
188
|
+
containsNormalizedPhrase(questionNorm, "\u0430\u043e\u0440\u0442") &&
|
|
189
|
+
containsNormalizedPhrase(questionNorm, "\u043a\u043b\u0430\u043f");
|
|
190
|
+
if (valveProsthesisQuestion && containsNormalizedPhrase(segmentNorm, "\u041f\u0410\u041a"))
|
|
191
|
+
coverageScore = Math.max(coverageScore, 0.58);
|
|
192
|
+
return coverageScore;
|
|
193
|
+
}
|
|
194
|
+
function recommendationAnswerWindow(questionNorm, segmentNorm) {
|
|
195
|
+
if (containsNormalizedPhrase(questionNorm, "\u0434\u0438\u043b\u0430\u0442\u0430\u0446")) {
|
|
196
|
+
const withoutDilation = segmentNorm.indexOf(normalizeForSearch("\u0431\u0435\u0437 \u0434\u0438\u043b\u0430\u0442\u0430\u0446"));
|
|
197
|
+
if (withoutDilation > 80)
|
|
198
|
+
return segmentNorm.slice(0, withoutDilation);
|
|
199
|
+
}
|
|
200
|
+
return segmentNorm;
|
|
201
|
+
}
|
|
202
|
+
function recommendationAliasSupport(answerText, segmentNorm) {
|
|
203
|
+
const answerNorm = normalizeForSearch(answerText);
|
|
204
|
+
let support = 0;
|
|
205
|
+
if (containsNormalizedPhrase(answerNorm, "\u0438\u043d\u0433\u0438\u0431") &&
|
|
206
|
+
containsNormalizedPhrase(answerNorm, "\u0430\u043f\u0444") &&
|
|
207
|
+
containsNormalizedPhrase(segmentNorm, "\u0438\u0410\u041f\u0424")) {
|
|
208
|
+
support = Math.max(support, 0.98);
|
|
209
|
+
}
|
|
210
|
+
if (containsNormalizedPhrase(answerNorm, "\u0431\u0435\u0442\u0430") &&
|
|
211
|
+
containsNormalizedPhrase(answerNorm, "\u0430\u0434\u0440\u0435\u043d\u043e") &&
|
|
212
|
+
containsNormalizedPhrase(answerNorm, "\u0431\u043b\u043e\u043a") &&
|
|
213
|
+
containsNormalizedPhrase(segmentNorm, "\u0431\u0435\u0442\u0430")) {
|
|
214
|
+
support = Math.max(support, 0.96);
|
|
215
|
+
}
|
|
216
|
+
return support;
|
|
217
|
+
}
|
|
218
|
+
function anticoagulationContraPolarity(normalized) {
|
|
219
|
+
if (!containsNormalizedPhrase(normalized, "\u0430\u043d\u0442\u0438\u043a\u043e\u0430\u0433"))
|
|
220
|
+
return null;
|
|
221
|
+
const contra = normalizeForSearch("\u043f\u0440\u043e\u0442\u0438\u0432\u043e\u043f\u043e\u043a\u0430\u0437");
|
|
222
|
+
let start = 0;
|
|
223
|
+
while (start < normalized.length) {
|
|
224
|
+
const index = normalized.indexOf(contra, start);
|
|
225
|
+
if (index < 0)
|
|
226
|
+
break;
|
|
227
|
+
const before = normalized.slice(Math.max(0, index - 58), index);
|
|
228
|
+
if (containsNormalizedPhrase(before, "\u043e\u0442\u0441\u0443\u0442"))
|
|
229
|
+
return "absence";
|
|
230
|
+
if (containsNormalizedPhrase(before, "\u043d\u0430\u043b\u0438\u0447"))
|
|
231
|
+
return "presence";
|
|
232
|
+
start = index + contra.length;
|
|
233
|
+
}
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
function recommendationPresenceMismatch(answerText, segmentNorm) {
|
|
237
|
+
const answerNorm = normalizeForSearch(answerText);
|
|
238
|
+
const answerContraPolarity = anticoagulationContraPolarity(answerNorm);
|
|
239
|
+
const segmentContraPolarity = anticoagulationContraPolarity(segmentNorm);
|
|
240
|
+
if (answerContraPolarity && segmentContraPolarity && answerContraPolarity !== segmentContraPolarity)
|
|
241
|
+
return true;
|
|
242
|
+
if (containsNormalizedPhrase(answerNorm, "\u043e\u043f\u0442\u0438\u043c") && !containsNormalizedPhrase(segmentNorm, "\u043e\u043f\u0442\u0438\u043c"))
|
|
243
|
+
return true;
|
|
244
|
+
if ((containsNormalizedPhrase(answerNorm, "\u043c\u0435\u043d\u044c\u0448") || containsNormalizedPhrase(answerNorm, "\u043d\u0438\u0436\u0435")) &&
|
|
245
|
+
!containsNormalizedPhrase(segmentNorm, "\u043c\u0435\u043d\u044c\u0448") &&
|
|
246
|
+
!containsNormalizedPhrase(segmentNorm, "\u043d\u0438\u0436\u0435")) {
|
|
247
|
+
return true;
|
|
248
|
+
}
|
|
249
|
+
const answerAbsence = containsNormalizedPhrase(answerNorm, "\u043e\u0442\u0441\u0443\u0442\u0441\u0442");
|
|
250
|
+
const answerPresence = containsNormalizedPhrase(answerNorm, "\u043d\u0430\u043b\u0438\u0447");
|
|
251
|
+
const segmentAbsence = containsNormalizedPhrase(segmentNorm, "\u043e\u0442\u0441\u0443\u0442\u0441\u0442");
|
|
252
|
+
const segmentPresence = containsNormalizedPhrase(segmentNorm, "\u043d\u0430\u043b\u0438\u0447");
|
|
253
|
+
const contra = containsNormalizedPhrase(answerNorm, "\u043f\u0440\u043e\u0442\u0438\u0432\u043e\u043f\u043e\u043a\u0430\u0437") || containsNormalizedPhrase(segmentNorm, "\u043f\u0440\u043e\u0442\u0438\u0432\u043e\u043f\u043e\u043a\u0430\u0437");
|
|
254
|
+
if (contra && answerAbsence && segmentPresence && !segmentAbsence)
|
|
255
|
+
return true;
|
|
256
|
+
if (contra && answerPresence && segmentAbsence && !segmentPresence)
|
|
257
|
+
return true;
|
|
258
|
+
return false;
|
|
259
|
+
}
|
|
260
|
+
function appointmentTargetTokens(question) {
|
|
261
|
+
const normalized = normalizeText(question);
|
|
262
|
+
const cues = [
|
|
263
|
+
"\u043d\u0430\u0437\u043d\u0430\u0447\u0435\u043d\u0438\u0435",
|
|
264
|
+
"\u043f\u0440\u043e\u0432\u0435\u0434\u0435\u043d\u0438\u0435",
|
|
265
|
+
"\u043f\u0440\u043e\u0432\u043e\u0434\u0438\u0442\u044c",
|
|
266
|
+
"\u0432\u044b\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0435",
|
|
267
|
+
].map((item) => normalizeText(item));
|
|
268
|
+
let cue = "";
|
|
269
|
+
let cueIndex = -1;
|
|
270
|
+
for (const candidate of cues) {
|
|
271
|
+
const index = normalized.indexOf(candidate);
|
|
272
|
+
if (index >= 0 && (cueIndex < 0 || index < cueIndex)) {
|
|
273
|
+
cue = candidate;
|
|
274
|
+
cueIndex = index;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
if (cueIndex < 0)
|
|
278
|
+
return [];
|
|
279
|
+
const tail = normalized.slice(cueIndex + cue.length).trim();
|
|
280
|
+
const boundaryCues = [
|
|
281
|
+
"\u0441 \u0446\u0435\u043b\u044c\u044e",
|
|
282
|
+
"\u0432 \u0434\u043e\u0437",
|
|
283
|
+
"\u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432",
|
|
284
|
+
"\u043f\u0440\u0438 \u043d\u0430\u043b\u0438\u0447",
|
|
285
|
+
"\u043f\u0430\u0446\u0438\u0435\u043d\u0442",
|
|
286
|
+
].map((item) => normalizeText(item));
|
|
287
|
+
let end = tail.length;
|
|
288
|
+
for (const boundary of boundaryCues) {
|
|
289
|
+
const index = tail.indexOf(boundary);
|
|
290
|
+
if (index > 0)
|
|
291
|
+
end = Math.min(end, index);
|
|
292
|
+
}
|
|
293
|
+
return uniqueTokens(tail.slice(0, end))
|
|
294
|
+
.filter((token) => token.length >= 4 && !FOCUS_STOPWORDS.has(token) && !RECOMMENDATION_TARGET_GENERIC.has(token))
|
|
295
|
+
.slice(0, 7);
|
|
296
|
+
}
|
|
297
|
+
function targetCoverage(targetTokens, segmentTokens) {
|
|
298
|
+
if (!targetTokens.length)
|
|
299
|
+
return 0;
|
|
300
|
+
return strictSoftCoverage(targetTokens, segmentTokens);
|
|
301
|
+
}
|
|
302
|
+
function appointmentContextTokens(question, targetTokens) {
|
|
303
|
+
const targetSet = new Set(targetTokens);
|
|
304
|
+
return uniqueTokens(question)
|
|
305
|
+
.filter((token) => token.length >= 4 && !targetSet.has(token) && !FOCUS_STOPWORDS.has(token) && !RECOMMENDATION_TARGET_GENERIC.has(token))
|
|
306
|
+
.slice(0, 8);
|
|
307
|
+
}
|
|
308
|
+
function contextCoverage(contextTokens, segmentTokens) {
|
|
309
|
+
if (contextTokens.length < 2)
|
|
310
|
+
return 1;
|
|
311
|
+
return strictSoftCoverage(contextTokens, segmentTokens);
|
|
312
|
+
}
|
|
313
|
+
function recommendationSegmentAnswerHit(answer, answerTokens, segmentNorm, segmentTokens) {
|
|
314
|
+
const answerNorm = normalizeForSearch(answer.text);
|
|
315
|
+
const strongPhrases = new Set([answerNorm]);
|
|
316
|
+
const withoutParentheses = answerNorm.replace(/\([^)]*\)/g, " ").replace(/\s+/g, " ").trim();
|
|
317
|
+
if (withoutParentheses)
|
|
318
|
+
strongPhrases.add(withoutParentheses);
|
|
319
|
+
const hyphenSplit = normalizeForSearch(String(answer.text ?? "").replace(/\s*[-\u2010-\u2015]\s*/g, " "));
|
|
320
|
+
if (hyphenSplit)
|
|
321
|
+
strongPhrases.add(hyphenSplit);
|
|
322
|
+
const strongPhraseHit = [...strongPhrases].filter((phrase) => phrase.length >= 8).some((phrase) => containsNormalizedPhrase(segmentNorm, phrase));
|
|
323
|
+
const answerPhrases = answerSearchPhrases(answer.text).slice(0, 18);
|
|
324
|
+
const phraseHit = answerPhrases.some((phrase) => containsNormalizedPhrase(segmentNorm, phrase));
|
|
325
|
+
const answerCoverage = strictSoftCoverage(answerTokens, segmentTokens);
|
|
326
|
+
const numeric = extractNumbers(answer.text).length > 0;
|
|
327
|
+
const numericCoverage = numeric ? numberCoverage(answer.text, segmentNorm) : 0;
|
|
328
|
+
const longText = answerTokens.length >= 5;
|
|
329
|
+
const supportHit = strongPhraseHit ||
|
|
330
|
+
(numeric && ((phraseHit && answerCoverage >= 0.74 && numericCoverage >= 0.72) || (answerCoverage >= 0.8 && numericCoverage >= 0.9))) ||
|
|
331
|
+
(!numeric && (longText ? answerCoverage >= 0.9 : phraseHit || answerCoverage >= 0.62));
|
|
332
|
+
const mismatchHit = phraseHit || (answerCoverage >= (numeric ? 0.62 : 0.58) && (!numeric || numericCoverage >= 0.45));
|
|
333
|
+
return { phraseHit, strongPhraseHit, answerCoverage, numericCoverage, supportHit, mismatchHit };
|
|
334
|
+
}
|
|
335
|
+
function genericPopulationAnswerText(answerText) {
|
|
336
|
+
const normalized = normalizeForSearch(answerText);
|
|
337
|
+
return (normalized.startsWith(normalizeForSearch("\u0432\u0441\u0435\u043c \u043f\u0430\u0446\u0438\u0435\u043d\u0442")) ||
|
|
338
|
+
normalized.startsWith(normalizeForSearch("\u0432\u0441\u0435 \u043f\u0430\u0446\u0438\u0435\u043d\u0442")) ||
|
|
339
|
+
normalized.startsWith(normalizeForSearch("\u0432\u0441\u0435\u043c \u043f\u043e\u0441\u0442\u0440\u0430\u0434")) ||
|
|
340
|
+
normalized.startsWith(normalizeForSearch("\u0432\u0441\u0435 \u043f\u043e\u0441\u0442\u0440\u0430\u0434")) ||
|
|
341
|
+
normalized.startsWith(normalizeForSearch("\u0432\u0441\u0435\u043c \u0431\u043e\u043b\u044c\u043d")) ||
|
|
342
|
+
normalized.startsWith(normalizeForSearch("\u0432\u0441\u0435 \u0431\u043e\u043b\u044c\u043d")));
|
|
343
|
+
}
|
|
344
|
+
function populationStem(answerText) {
|
|
345
|
+
const stems = ["\u043f\u0430\u0446\u0438\u0435\u043d\u0442", "\u043f\u043e\u0441\u0442\u0440\u0430\u0434", "\u0431\u043e\u043b\u044c\u043d"].map((item) => normalizeForSearch(item));
|
|
346
|
+
return uniqueTokens(answerText).find((token) => stems.some((stem) => token.startsWith(stem.slice(0, Math.min(8, stem.length))))) ?? null;
|
|
347
|
+
}
|
|
348
|
+
function hasSpecificPopulationAlternative(answers, genericAnswer) {
|
|
349
|
+
const stem = populationStem(genericAnswer.text);
|
|
350
|
+
if (!stem)
|
|
351
|
+
return false;
|
|
352
|
+
return (answers ?? []).some((candidate) => {
|
|
353
|
+
if (candidate.id === genericAnswer.id)
|
|
354
|
+
return false;
|
|
355
|
+
const normalized = normalizeForSearch(candidate.text);
|
|
356
|
+
const candidateTokens = uniqueTokens(candidate.text);
|
|
357
|
+
if (!candidateTokens.some((token) => token.startsWith(stem.slice(0, Math.min(8, stem.length)))))
|
|
358
|
+
return false;
|
|
359
|
+
return (containsNormalizedPhrase(normalized, "\u0441\u0440\u0435\u0434\u043d") ||
|
|
360
|
+
containsNormalizedPhrase(normalized, "\u0442\u044f\u0436\u0435\u043b") ||
|
|
361
|
+
containsNormalizedPhrase(normalized, "\u0441\u0442\u0435\u043f\u0435\u043d") ||
|
|
362
|
+
containsNormalizedPhrase(normalized, "\u043f\u0440\u0438 \u043d\u0430\u043b\u0438\u0447") ||
|
|
363
|
+
containsNormalizedPhrase(normalized, "\u0441 \u043d\u0430\u043b\u0438\u0447"));
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
function followUpFrequencyAnswer(answerText) {
|
|
367
|
+
const normalized = normalizeForSearch(answerText);
|
|
368
|
+
return (extractNumbers(answerText).length > 0 &&
|
|
369
|
+
(containsNormalizedPhrase(normalized, "\u043a\u0430\u0436\u0434") || containsNormalizedPhrase(normalized, "\u0440\u0430\u0437 \u0432")) &&
|
|
370
|
+
(containsNormalizedPhrase(normalized, "\u043c\u0435\u0441\u044f\u0446") ||
|
|
371
|
+
containsNormalizedPhrase(normalized, "\u0433\u043e\u0434") ||
|
|
372
|
+
containsNormalizedPhrase(normalized, "\u043b\u0435\u0442") ||
|
|
373
|
+
containsNormalizedPhrase(normalized, "\u043d\u0435\u0434\u0435\u043b")));
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Ищет поддержку варианта внутри того рекомендательного блока, который относится к препарату/вмешательству
|
|
377
|
+
* из вопроса вида "рекомендовано назначение X". Если вариант уверенно найден только в соседней рекомендации
|
|
378
|
+
* про другой X, возвращается мягкий штраф вместо поддержки.
|
|
379
|
+
*/
|
|
380
|
+
export function explicitRecommendationTargetAdjustment({ mode, pages, question, answer, answers, answerTokens }) {
|
|
381
|
+
if (mode !== "multi")
|
|
382
|
+
return { support: null, adjustment: 0, evidence: null };
|
|
383
|
+
const targetTokens = appointmentTargetTokens(question);
|
|
384
|
+
if (!targetTokens.length)
|
|
385
|
+
return { support: null, adjustment: 0, evidence: null };
|
|
386
|
+
const contextTokens = appointmentContextTokens(question, targetTokens);
|
|
387
|
+
let bestSupport = null;
|
|
388
|
+
let bestMismatch = null;
|
|
389
|
+
let targetSegmentCount = 0;
|
|
390
|
+
for (const segment of explicitRecommendationSegments(pages)) {
|
|
391
|
+
const segmentTokens = tokenizeNormalized(segment.normalized);
|
|
392
|
+
const segmentTargetCoverage = targetCoverage(targetTokens, segmentTokens);
|
|
393
|
+
const segmentContextCoverage = contextCoverage(contextTokens, segmentTokens);
|
|
394
|
+
const answerHit = recommendationSegmentAnswerHit(answer, answerTokens, segment.normalized, segmentTokens);
|
|
395
|
+
if (segmentTargetCoverage >= 0.72 && segmentContextCoverage >= 0.45) {
|
|
396
|
+
targetSegmentCount += 1;
|
|
397
|
+
const genericSpecificConflict = genericPopulationAnswerText(answer.text) && hasSpecificPopulationAlternative(answers, answer);
|
|
398
|
+
if (!answerHit.supportHit || genericSpecificConflict)
|
|
399
|
+
continue;
|
|
400
|
+
const score = 12.8 +
|
|
401
|
+
segmentTargetCoverage * 4.4 +
|
|
402
|
+
Math.min(1, segmentContextCoverage) * 1.6 +
|
|
403
|
+
answerHit.answerCoverage * 4.2 +
|
|
404
|
+
answerHit.numericCoverage * 1.8 +
|
|
405
|
+
(answerHit.strongPhraseHit ? 2.8 : answerHit.phraseHit ? 1.4 : 0);
|
|
406
|
+
bestSupport = betterEvidence(bestSupport, {
|
|
407
|
+
answerId: answer.id,
|
|
408
|
+
page: segment.page,
|
|
409
|
+
text: segment.text,
|
|
410
|
+
score,
|
|
411
|
+
kind: "explicit_recommendation_target_segment",
|
|
412
|
+
});
|
|
413
|
+
continue;
|
|
414
|
+
}
|
|
415
|
+
if (!answerHit.mismatchHit || followUpFrequencyAnswer(answer.text) || (segmentTargetCoverage > 0.35 && segmentContextCoverage >= 0.45))
|
|
416
|
+
continue;
|
|
417
|
+
const mismatchScore = 9.4 + answerHit.answerCoverage * 3.1 + answerHit.numericCoverage * 1.6 + (answerHit.phraseHit ? 2.0 : 0);
|
|
418
|
+
bestMismatch = betterEvidence(bestMismatch, {
|
|
419
|
+
answerId: answer.id,
|
|
420
|
+
page: segment.page,
|
|
421
|
+
text: segment.text,
|
|
422
|
+
score: mismatchScore,
|
|
423
|
+
kind: "explicit_recommendation_target_mismatch",
|
|
424
|
+
});
|
|
425
|
+
}
|
|
426
|
+
if (bestSupport)
|
|
427
|
+
return { support: bestSupport, adjustment: 0, evidence: null };
|
|
428
|
+
if (targetSegmentCount > 0 && bestMismatch && bestMismatch.score >= 11.2) {
|
|
429
|
+
return { support: null, adjustment: -3.8, evidence: bestMismatch };
|
|
430
|
+
}
|
|
431
|
+
return { support: null, adjustment: 0, evidence: null };
|
|
432
|
+
}
|
|
433
|
+
export function bestRecommendationItemSupport({ pages, question, answer, answerTokens }) {
|
|
434
|
+
if (!recommendationItemQuestion(question))
|
|
435
|
+
return null;
|
|
436
|
+
const questionNorm = normalizeForSearch(question);
|
|
437
|
+
const qTokens = recommendationQuestionTokens(question);
|
|
438
|
+
if (!qTokens.length)
|
|
439
|
+
return null;
|
|
440
|
+
let best = null;
|
|
441
|
+
for (const segment of recommendationSegments(pages)) {
|
|
442
|
+
const answerNorm = normalizeForSearch(answer.text);
|
|
443
|
+
if (containsNormalizedPhrase(answerNorm, "\u043e\u043f\u0442\u0438\u043c") && !containsNormalizedPhrase(questionNorm, "\u043e\u043f\u0442\u0438\u043c"))
|
|
444
|
+
continue;
|
|
445
|
+
if (!recommendationSubjectCompatible(questionNorm, segment.normalized))
|
|
446
|
+
continue;
|
|
447
|
+
const qCoverage = recommendationQuestionCoverage(questionNorm, qTokens, segment.normalized);
|
|
448
|
+
if (qCoverage < 0.34)
|
|
449
|
+
continue;
|
|
450
|
+
const answerWindow = recommendationAnswerWindow(questionNorm, segment.normalized);
|
|
451
|
+
if (recommendationPresenceMismatch(answer.text, answerWindow))
|
|
452
|
+
continue;
|
|
453
|
+
const tokens = tokenizeNormalized(answerWindow);
|
|
454
|
+
const phraseHit = answerSearchPhrases(answer.text).some((phrase) => containsNormalizedPhrase(answerWindow, phrase));
|
|
455
|
+
const alias = recommendationAliasSupport(answer.text, answerWindow);
|
|
456
|
+
const answerCoverage = Math.max(strictSoftCoverage(answerTokens, tokens), alias);
|
|
457
|
+
if (!phraseHit && answerCoverage < 0.62)
|
|
458
|
+
continue;
|
|
459
|
+
const score = 15.8 + qCoverage * 4.0 + answerCoverage * 6.2 + (phraseHit ? 2.4 : 0) + alias * 2.0;
|
|
460
|
+
best = betterEvidence(best, {
|
|
461
|
+
answerId: answer.id,
|
|
462
|
+
page: segment.page,
|
|
463
|
+
text: segment.text,
|
|
464
|
+
score,
|
|
465
|
+
kind: "recommendation_item_segment",
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
return best;
|
|
469
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
export declare function findAnchorSegments(pages: any, question: any): any[];
|
|
2
|
+
export declare function findSectionSegments(pages: any, question: any): any[];
|
|
3
|
+
export declare function bestPhraseSupport({ pages, question, answer, questionTokens, answerTokens, intent }: {
|
|
4
|
+
pages: any;
|
|
5
|
+
question: any;
|
|
6
|
+
answer: any;
|
|
7
|
+
questionTokens: any;
|
|
8
|
+
answerTokens: any;
|
|
9
|
+
intent: any;
|
|
10
|
+
}): any;
|
|
11
|
+
/**
|
|
12
|
+
* Находит ответы-заголовки перед длинным описанием из вопроса.
|
|
13
|
+
*
|
|
14
|
+
* В рекомендациях часто встречается форма `Название: описание...`, а вопрос
|
|
15
|
+
* дословно цитирует только описание. Обычный поиск после вопроса в таком случае
|
|
16
|
+
* может выбрать следующий заголовок, поэтому этот scorer ищет вариант ответа
|
|
17
|
+
* непосредственно перед найденным фрагментом вопроса.
|
|
18
|
+
*/
|
|
19
|
+
export declare function bestPrecedingQuestionLabelSupport({ mode, pages, question, answer, answerTokens }: {
|
|
20
|
+
mode: any;
|
|
21
|
+
pages: any;
|
|
22
|
+
question: any;
|
|
23
|
+
answer: any;
|
|
24
|
+
answerTokens: any;
|
|
25
|
+
}): any;
|
|
26
|
+
export declare function bestAnchorSupport({ anchorSegments, answer, answerTokens }: {
|
|
27
|
+
anchorSegments: any;
|
|
28
|
+
answer: any;
|
|
29
|
+
answerTokens: any;
|
|
30
|
+
}): any;
|
|
31
|
+
export declare function bestSectionSupport({ sectionSegments, answer, answerTokens }: {
|
|
32
|
+
sectionSegments: any;
|
|
33
|
+
answer: any;
|
|
34
|
+
answerTokens: any;
|
|
35
|
+
}): any;
|
|
36
|
+
export declare function findRowSegments(pages: any, question: any, topQuestionPages: any): any[];
|
|
37
|
+
export declare function bestRowLabelSupport({ rowSegments, answer, answerTokens }: {
|
|
38
|
+
rowSegments: any;
|
|
39
|
+
answer: any;
|
|
40
|
+
answerTokens: any;
|
|
41
|
+
}): any;
|