med-pdf-nmo 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/README.ru.md +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +1 -1
- package/dist/med-pdf-nmo.browser.js +9 -3
- package/dist/med-pdf-nmo.browser.mjs +9 -3
- package/dist/pdf.d.ts +1 -1
- package/dist/pdf.js +7 -1
- package/dist/predictor.js +1 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -157,6 +157,7 @@ const result = await answerQuestion(pdf, {
|
|
|
157
157
|
- `mode`: alias for `type`.
|
|
158
158
|
- `cacheKey`: optional PDF text cache key.
|
|
159
159
|
- `pdfjsLib`: optional explicit PDF.js module override.
|
|
160
|
+
- `pdfVerbosity`: optional PDF.js logging level. By default only PDF.js errors are shown, so non-fatal font warnings such as `TT: undefined function` are suppressed.
|
|
160
161
|
|
|
161
162
|
Variants can be plain strings:
|
|
162
163
|
|
package/README.ru.md
CHANGED
|
@@ -157,6 +157,7 @@ const result = await answerQuestion(pdf, {
|
|
|
157
157
|
- `mode`: алиас для `type`.
|
|
158
158
|
- `cacheKey`: необязательный ключ кеша для текста PDF.
|
|
159
159
|
- `pdfjsLib`: необязательная явная передача PDF.js модуля.
|
|
160
|
+
- `pdfVerbosity`: необязательный уровень логирования PDF.js. По умолчанию показываются только ошибки PDF.js, поэтому нефатальные font warnings вроде `TT: undefined function` подавляются.
|
|
160
161
|
|
|
161
162
|
Варианты можно передавать строками:
|
|
162
163
|
|
package/dist/index.d.ts
CHANGED
|
@@ -41,6 +41,8 @@ export interface AnswerQuestionOptions {
|
|
|
41
41
|
cacheKey?: string;
|
|
42
42
|
/** Явно переданный модуль PDF.js, полезно для браузерного окружения. */
|
|
43
43
|
pdfjsLib?: any;
|
|
44
|
+
/** Уровень логирования PDF.js. По умолчанию показываются только ошибки. */
|
|
45
|
+
pdfVerbosity?: number;
|
|
44
46
|
}
|
|
45
47
|
/**
|
|
46
48
|
* Высокоуровневый результат, который возвращает {@link answerQuestion}.
|
package/dist/index.js
CHANGED
|
@@ -29,7 +29,7 @@ export async function answerQuestion(pdf, options = { question: "" }) {
|
|
|
29
29
|
question: options.question,
|
|
30
30
|
answers,
|
|
31
31
|
mode: options.type ?? options.mode ?? "single",
|
|
32
|
-
}, { pdfjsLib: options.pdfjsLib });
|
|
32
|
+
}, { pdfjsLib: options.pdfjsLib, pdfVerbosity: options.pdfVerbosity });
|
|
33
33
|
const selectedAnswers = output.selected
|
|
34
34
|
.map((id) => answers.find((answer) => answer.id === id))
|
|
35
35
|
.filter(Boolean);
|
|
@@ -32079,6 +32079,10 @@ fn fs_main(in : VertexOutput) -> @location(0) vec4<f32> {
|
|
|
32079
32079
|
);
|
|
32080
32080
|
}
|
|
32081
32081
|
}
|
|
32082
|
+
function pdfVerbosity(pdfjs, options = {}) {
|
|
32083
|
+
if (typeof options.pdfVerbosity === "number") return options.pdfVerbosity;
|
|
32084
|
+
return pdfjs.VerbosityLevel?.ERRORS ?? 0;
|
|
32085
|
+
}
|
|
32082
32086
|
async function toUint8Array(input) {
|
|
32083
32087
|
if (input instanceof Uint8Array) {
|
|
32084
32088
|
return new Uint8Array(input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength));
|
|
@@ -32277,7 +32281,8 @@ fn fs_main(in : VertexOutput) -> @location(0) vec4<f32> {
|
|
|
32277
32281
|
data,
|
|
32278
32282
|
disableWorker: true,
|
|
32279
32283
|
useSystemFonts: true,
|
|
32280
|
-
isEvalSupported: false
|
|
32284
|
+
isEvalSupported: false,
|
|
32285
|
+
verbosity: pdfVerbosity(pdfjs, options)
|
|
32281
32286
|
});
|
|
32282
32287
|
const pdf = await loadingTask.promise;
|
|
32283
32288
|
const pages = [];
|
|
@@ -40143,7 +40148,8 @@ ${nextPage.text}`;
|
|
|
40143
40148
|
if (!answers.length) throw new Error("predict input requires answers");
|
|
40144
40149
|
const runtime = await getPdfRuntime(pdfInput, {
|
|
40145
40150
|
cacheKey: input.cacheKey ?? input.pdfPath ?? input.pdfUrl ?? input.url,
|
|
40146
|
-
pdfjsLib: options.pdfjsLib
|
|
40151
|
+
pdfjsLib: options.pdfjsLib,
|
|
40152
|
+
pdfVerbosity: options.pdfVerbosity
|
|
40147
40153
|
});
|
|
40148
40154
|
const question = String(input.question ?? "");
|
|
40149
40155
|
const questionTokens = uniqueTokens(question);
|
|
@@ -40387,7 +40393,7 @@ ${nextPage.text}`;
|
|
|
40387
40393
|
answers,
|
|
40388
40394
|
mode: options.type ?? options.mode ?? "single"
|
|
40389
40395
|
},
|
|
40390
|
-
{ pdfjsLib: options.pdfjsLib }
|
|
40396
|
+
{ pdfjsLib: options.pdfjsLib, pdfVerbosity: options.pdfVerbosity }
|
|
40391
40397
|
);
|
|
40392
40398
|
const selectedAnswers = output.selected.map((id) => answers.find((answer) => answer.id === id)).filter(Boolean);
|
|
40393
40399
|
return {
|
|
@@ -32057,6 +32057,10 @@ async function resolvePdfJs(options = {}) {
|
|
|
32057
32057
|
);
|
|
32058
32058
|
}
|
|
32059
32059
|
}
|
|
32060
|
+
function pdfVerbosity(pdfjs, options = {}) {
|
|
32061
|
+
if (typeof options.pdfVerbosity === "number") return options.pdfVerbosity;
|
|
32062
|
+
return pdfjs.VerbosityLevel?.ERRORS ?? 0;
|
|
32063
|
+
}
|
|
32060
32064
|
async function toUint8Array(input) {
|
|
32061
32065
|
if (input instanceof Uint8Array) {
|
|
32062
32066
|
return new Uint8Array(input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength));
|
|
@@ -32255,7 +32259,8 @@ async function extractPdfText(pdfInput, options = {}) {
|
|
|
32255
32259
|
data,
|
|
32256
32260
|
disableWorker: true,
|
|
32257
32261
|
useSystemFonts: true,
|
|
32258
|
-
isEvalSupported: false
|
|
32262
|
+
isEvalSupported: false,
|
|
32263
|
+
verbosity: pdfVerbosity(pdfjs, options)
|
|
32259
32264
|
});
|
|
32260
32265
|
const pdf = await loadingTask.promise;
|
|
32261
32266
|
const pages = [];
|
|
@@ -40121,7 +40126,8 @@ async function predict(input, options = {}) {
|
|
|
40121
40126
|
if (!answers.length) throw new Error("predict input requires answers");
|
|
40122
40127
|
const runtime = await getPdfRuntime(pdfInput, {
|
|
40123
40128
|
cacheKey: input.cacheKey ?? input.pdfPath ?? input.pdfUrl ?? input.url,
|
|
40124
|
-
pdfjsLib: options.pdfjsLib
|
|
40129
|
+
pdfjsLib: options.pdfjsLib,
|
|
40130
|
+
pdfVerbosity: options.pdfVerbosity
|
|
40125
40131
|
});
|
|
40126
40132
|
const question = String(input.question ?? "");
|
|
40127
40133
|
const questionTokens = uniqueTokens(question);
|
|
@@ -40365,7 +40371,7 @@ async function answerQuestion(pdf, options = { question: "" }) {
|
|
|
40365
40371
|
answers,
|
|
40366
40372
|
mode: options.type ?? options.mode ?? "single"
|
|
40367
40373
|
},
|
|
40368
|
-
{ pdfjsLib: options.pdfjsLib }
|
|
40374
|
+
{ pdfjsLib: options.pdfjsLib, pdfVerbosity: options.pdfVerbosity }
|
|
40369
40375
|
);
|
|
40370
40376
|
const selectedAnswers = output.selected.map((id) => answers.find((answer) => answer.id === id)).filter(Boolean);
|
|
40371
40377
|
return {
|
package/dist/pdf.d.ts
CHANGED
|
@@ -15,7 +15,7 @@ export declare function setPdfJsLib(pdfjsLib: any): void;
|
|
|
15
15
|
* текст и флаг `ocrNeeded`, если в PDF найдено подозрительно мало текста.
|
|
16
16
|
*
|
|
17
17
|
* @param pdfInput Байты PDF, File/Blob, ArrayBuffer, Uint8Array или URL.
|
|
18
|
-
* @param options Необязательный `cacheKey
|
|
18
|
+
* @param options Необязательный `cacheKey`, явно переданный `pdfjsLib` и уровень логирования PDF.js.
|
|
19
19
|
* @returns Текст страниц и метаданные, которые использует predictor.
|
|
20
20
|
*/
|
|
21
21
|
export declare function extractPdfText(pdfInput: any, options?: any): Promise<{
|
package/dist/pdf.js
CHANGED
|
@@ -26,6 +26,11 @@ async function resolvePdfJs(options = {}) {
|
|
|
26
26
|
throw new Error("PDF.js is not available. In the browser, include pdf.js before this library or call setPdfJsLib(pdfjsLib).");
|
|
27
27
|
}
|
|
28
28
|
}
|
|
29
|
+
function pdfVerbosity(pdfjs, options = {}) {
|
|
30
|
+
if (typeof options.pdfVerbosity === "number")
|
|
31
|
+
return options.pdfVerbosity;
|
|
32
|
+
return pdfjs.VerbosityLevel?.ERRORS ?? 0;
|
|
33
|
+
}
|
|
29
34
|
async function toUint8Array(input) {
|
|
30
35
|
if (input instanceof Uint8Array) {
|
|
31
36
|
return new Uint8Array(input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength));
|
|
@@ -339,7 +344,7 @@ function removeFrontMatterAppendixList(pages) {
|
|
|
339
344
|
* текст и флаг `ocrNeeded`, если в PDF найдено подозрительно мало текста.
|
|
340
345
|
*
|
|
341
346
|
* @param pdfInput Байты PDF, File/Blob, ArrayBuffer, Uint8Array или URL.
|
|
342
|
-
* @param options Необязательный `cacheKey
|
|
347
|
+
* @param options Необязательный `cacheKey`, явно переданный `pdfjsLib` и уровень логирования PDF.js.
|
|
343
348
|
* @returns Текст страниц и метаданные, которые использует predictor.
|
|
344
349
|
*/
|
|
345
350
|
export async function extractPdfText(pdfInput, options = {}) {
|
|
@@ -350,6 +355,7 @@ export async function extractPdfText(pdfInput, options = {}) {
|
|
|
350
355
|
disableWorker: true,
|
|
351
356
|
useSystemFonts: true,
|
|
352
357
|
isEvalSupported: false,
|
|
358
|
+
verbosity: pdfVerbosity(pdfjs, options),
|
|
353
359
|
});
|
|
354
360
|
const pdf = await loadingTask.promise;
|
|
355
361
|
const pages = [];
|
package/dist/predictor.js
CHANGED
|
@@ -3576,6 +3576,7 @@ export async function predict(input, options = {}) {
|
|
|
3576
3576
|
const runtime = await getPdfRuntime(pdfInput, {
|
|
3577
3577
|
cacheKey: input.cacheKey ?? input.pdfPath ?? input.pdfUrl ?? input.url,
|
|
3578
3578
|
pdfjsLib: options.pdfjsLib,
|
|
3579
|
+
pdfVerbosity: options.pdfVerbosity,
|
|
3579
3580
|
});
|
|
3580
3581
|
const question = String(input.question ?? "");
|
|
3581
3582
|
const questionTokens = uniqueTokens(question);
|