@internetarchive/bookreader 5.0.0-96 → 5.0.0-98
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BookReader/474.js +2 -0
- package/BookReader/474.js.map +1 -0
- package/BookReader/BookReader.css +39 -34
- package/BookReader/BookReader.js +1 -1
- package/BookReader/BookReader.js.map +1 -1
- package/BookReader/bergamot-translator-worker.js +2966 -0
- package/BookReader/bergamot-translator-worker.wasm +0 -0
- package/BookReader/ia-bookreader-bundle.js +1 -1
- package/BookReader/ia-bookreader-bundle.js.map +1 -1
- package/BookReader/images/icon_experiment.svg +1 -0
- package/BookReader/images/translate.svg +1 -0
- package/BookReader/plugins/plugin.experiments.js +1 -1
- package/BookReader/plugins/plugin.experiments.js.map +1 -1
- package/BookReader/plugins/plugin.text_selection.js +1 -1
- package/BookReader/plugins/plugin.text_selection.js.map +1 -1
- package/BookReader/plugins/plugin.translate.js +3 -0
- package/BookReader/plugins/plugin.translate.js.LICENSE.txt +1 -0
- package/BookReader/plugins/plugin.translate.js.map +1 -0
- package/BookReader/plugins/plugin.tts.js +1 -1
- package/BookReader/plugins/plugin.tts.js.map +1 -1
- package/BookReader/plugins/translator-worker.js +2 -0
- package/BookReader/plugins/translator-worker.js.map +1 -0
- package/BookReader/silence.mp3 +0 -0
- package/BookReader/translator-worker.js +475 -0
- package/package.json +6 -3
- package/src/BookNavigator/book-navigator.js +1 -0
- package/src/BookReader/Mode1UpLit.js +6 -1
- package/src/BookReader/Mode2UpLit.js +11 -1
- package/src/BookReader/Navbar/Navbar.js +61 -0
- package/src/BookReader/options.js +12 -8
- package/src/BookReader.js +67 -140
- package/src/assets/images/icon_experiment.svg +1 -0
- package/src/assets/images/translate.svg +1 -0
- package/src/assets/silence.mp3 +0 -0
- package/src/css/_BRnav.scss +0 -24
- package/src/css/_BRsearch.scss +1 -5
- package/src/css/_TextSelection.scss +38 -9
- package/src/plugins/plugin.experiments.js +34 -9
- package/src/plugins/plugin.text_selection.js +17 -20
- package/src/plugins/translate/TranslationManager.js +170 -0
- package/src/plugins/translate/plugin.translate.js +489 -0
- package/src/plugins/tts/AbstractTTSEngine.js +3 -4
- package/src/plugins/tts/PageChunk.js +28 -9
- package/src/plugins/tts/WebTTSEngine.js +5 -7
- package/src/plugins/tts/plugin.tts.js +40 -4
- package/src/plugins/tts/utils.js +21 -22
- package/src/util/cache.js +20 -0
|
@@ -3,31 +3,13 @@ import { createDIVPageLayer } from '../BookReader/PageContainer.js';
|
|
|
3
3
|
import { SelectionObserver } from '../BookReader/utils/SelectionObserver.js';
|
|
4
4
|
import { BookReaderPlugin } from '../BookReaderPlugin.js';
|
|
5
5
|
import { applyVariables } from '../util/strings.js';
|
|
6
|
+
import { Cache } from '../util/cache.js';
|
|
7
|
+
import { toISO6391 } from './tts/utils.js';
|
|
6
8
|
/** @typedef {import('../util/strings.js').StringWithVars} StringWithVars */
|
|
7
9
|
/** @typedef {import('../BookReader/PageContainer.js').PageContainer} PageContainer */
|
|
8
10
|
|
|
9
11
|
const BookReader = /** @type {typeof import('../BookReader').default} */(window.BookReader);
|
|
10
12
|
|
|
11
|
-
/**
|
|
12
|
-
* @template T
|
|
13
|
-
*/
|
|
14
|
-
export class Cache {
|
|
15
|
-
constructor(maxSize = 10) {
|
|
16
|
-
this.maxSize = maxSize;
|
|
17
|
-
/** @type {T[]} */
|
|
18
|
-
this.entries = [];
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* @param {T} entry
|
|
23
|
-
*/
|
|
24
|
-
add(entry) {
|
|
25
|
-
if (this.entries.length >= this.maxSize) {
|
|
26
|
-
this.entries.shift();
|
|
27
|
-
}
|
|
28
|
-
this.entries.push(entry);
|
|
29
|
-
}
|
|
30
|
-
}
|
|
31
13
|
|
|
32
14
|
export class TextSelectionPlugin extends BookReaderPlugin {
|
|
33
15
|
options = {
|
|
@@ -327,9 +309,16 @@ export class TextSelectionPlugin extends BookReaderPlugin {
|
|
|
327
309
|
}
|
|
328
310
|
|
|
329
311
|
const textLayer = createDIVPageLayer(pageContainer.page, 'BRtextLayer');
|
|
312
|
+
// Have to wait to make sure the page container is actually rendered,
|
|
313
|
+
// otherwise width/height are unset after a mode change.
|
|
314
|
+
await Promise.resolve();
|
|
330
315
|
const ratioW = parseFloat(pageContainer.$container[0].style.width) / pageContainer.page.width;
|
|
331
316
|
const ratioH = parseFloat(pageContainer.$container[0].style.height) / pageContainer.page.height;
|
|
332
317
|
textLayer.style.transform = `scale(${ratioW}, ${ratioH})`;
|
|
318
|
+
const bookLangCode = toISO6391(this.br.options.bookLanguage);
|
|
319
|
+
if (bookLangCode) {
|
|
320
|
+
textLayer.setAttribute("lang", bookLangCode);
|
|
321
|
+
}
|
|
333
322
|
textLayer.setAttribute("dir", this.rtl ? "rtl" : "ltr");
|
|
334
323
|
|
|
335
324
|
const ocrParagraphs = $(XMLpage).find("PARAGRAPH[coords]").toArray();
|
|
@@ -356,6 +345,10 @@ export class TextSelectionPlugin extends BookReaderPlugin {
|
|
|
356
345
|
}
|
|
357
346
|
$container.append(textLayer);
|
|
358
347
|
this.stopPageFlip($container);
|
|
348
|
+
this.br.trigger('textLayerRendered', {
|
|
349
|
+
pageIndex,
|
|
350
|
+
pageContainer,
|
|
351
|
+
});
|
|
359
352
|
}
|
|
360
353
|
|
|
361
354
|
/**
|
|
@@ -365,6 +358,10 @@ export class TextSelectionPlugin extends BookReaderPlugin {
|
|
|
365
358
|
renderParagraph(ocrParagraph) {
|
|
366
359
|
const paragEl = document.createElement('p');
|
|
367
360
|
paragEl.classList.add('BRparagraphElement');
|
|
361
|
+
if (ocrParagraph.getAttribute("x-role")) {
|
|
362
|
+
paragEl.classList.add('ocr-role-header-footer');
|
|
363
|
+
paragEl.ariaHidden = "true";
|
|
364
|
+
}
|
|
368
365
|
const [paragLeft, paragBottom, paragRight, paragTop] = $(ocrParagraph).attr("coords").split(",").map(parseFloat);
|
|
369
366
|
const wordHeightArr = [];
|
|
370
367
|
const lines = $(ocrParagraph).find("LINE[coords]").toArray();
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
// @ts-check
|
|
2
|
+
import { Cache } from '../../util/cache.js';
|
|
3
|
+
import { BatchTranslator } from '@internetarchive/bergamot-translator/translator.js';
|
|
4
|
+
import { toISO6391 } from '../tts/utils.js';
|
|
5
|
+
|
|
6
|
+
export const langs = /** @type {{[lang: string]: string}} */ {
|
|
7
|
+
"bg": "Bulgarian",
|
|
8
|
+
"ca": "Catalan",
|
|
9
|
+
"cs": "Czech",
|
|
10
|
+
"nl": "Dutch",
|
|
11
|
+
"en": "English",
|
|
12
|
+
"et": "Estonian",
|
|
13
|
+
"de": "German",
|
|
14
|
+
"fr": "French",
|
|
15
|
+
"is": "Icelandic",
|
|
16
|
+
"it": "Italian",
|
|
17
|
+
"nb": "Norwegian Bokmål",
|
|
18
|
+
"nn": "Norwegian Nynorsk",
|
|
19
|
+
"fa": "Persian",
|
|
20
|
+
"pl": "Polish",
|
|
21
|
+
"pt": "Portuguese",
|
|
22
|
+
"ru": "Russian",
|
|
23
|
+
"es": "Spanish",
|
|
24
|
+
"uk": "Ukrainian",
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export class TranslationManager {
|
|
28
|
+
/** @type {Cache<{index: string, response: string}>} */
|
|
29
|
+
alreadyTranslated = new Cache(100);
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* @typedef {Object} genericModelInfo
|
|
33
|
+
* @property {string} name
|
|
34
|
+
* @property {number} size
|
|
35
|
+
* @property {number} estimatedCompressedSize
|
|
36
|
+
* @property {any} [qualityModel]
|
|
37
|
+
* @property {string} [expectedSha256Hash]
|
|
38
|
+
* @property {string} [modelType]
|
|
39
|
+
*/
|
|
40
|
+
/**
|
|
41
|
+
* @type { {[langPair: string] : {model: genericModelInfo, lex: genericModelInfo, vocab: genericModelInfo, quality?: genericModelInfo}} }
|
|
42
|
+
*/
|
|
43
|
+
modelRegistry = {};
|
|
44
|
+
|
|
45
|
+
/** @type {Record<key, {promise: Promise<string>, resolve: function, reject: function}>} */
|
|
46
|
+
currentlyTranslating = {}
|
|
47
|
+
|
|
48
|
+
/** @type {Record<string, string>[]} */
|
|
49
|
+
fromLanguages = [];
|
|
50
|
+
/** @type {Record<string, string>[]} */
|
|
51
|
+
toLanguages = [];
|
|
52
|
+
|
|
53
|
+
/** @type {boolean} */
|
|
54
|
+
active = false;
|
|
55
|
+
|
|
56
|
+
publicPath = '';
|
|
57
|
+
|
|
58
|
+
constructor() {
|
|
59
|
+
//TODO Should default to the book language as the first element
|
|
60
|
+
const enModel = {code: "en", name: "English", type: "prod"};
|
|
61
|
+
this.fromLanguages.push(enModel);
|
|
62
|
+
this.toLanguages.push(enModel);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async initWorker() {
|
|
67
|
+
if (this.initPromise) return this.initPromise;
|
|
68
|
+
this.initPromise = new Promise((resolve, reject) => {
|
|
69
|
+
this._initResolve = resolve;
|
|
70
|
+
this._initReject = reject;
|
|
71
|
+
});
|
|
72
|
+
const registryUrl = "https://cors.archive.org/cors/mozilla-translate-models/firefox_models/";
|
|
73
|
+
const registryJson = await fetch(registryUrl + "registry.json").then(r => r.json());
|
|
74
|
+
for (const language of Object.values(registryJson)) {
|
|
75
|
+
for (const file of Object.values(language)) {
|
|
76
|
+
file.name = registryUrl + file.name;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/** @type {BatchTranslator} */
|
|
81
|
+
// Arbitrary setting for number of workers, 1 is already quite fast
|
|
82
|
+
// batchSize from 8 -> 4 for improved performance
|
|
83
|
+
this.translator = new BatchTranslator({
|
|
84
|
+
registryUrl: `data:application/json,${encodeURIComponent(JSON.stringify(registryJson))}`,
|
|
85
|
+
workers: 2,
|
|
86
|
+
batchSize: 4,
|
|
87
|
+
workerUrl: this.publicPath + '/translator-worker.js',
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const modelType = await this.translator.backing.registry;
|
|
91
|
+
const arr = {}; // unsure if we need to keep track of the files
|
|
92
|
+
for (const obj of Object.values(modelType)) {
|
|
93
|
+
const firstLang = obj['from'];
|
|
94
|
+
const secondLang = obj['to'];
|
|
95
|
+
const fromModelType = obj['files'];
|
|
96
|
+
arr[`${firstLang}${secondLang}`] = fromModelType;
|
|
97
|
+
// Assuming that all of the languages loaded from the registryUrl inside @internetarchive/bergamot-translator/translator.js are prod
|
|
98
|
+
// List of dev models found here https://github.com/mozilla/firefox-translations-models/tree/main/models/base
|
|
99
|
+
// There are also differences between the model types in the repo above here: https://github.com/mozilla/firefox-translations-models?tab=readme-ov-file#firefox-translations-models
|
|
100
|
+
if (firstLang !== "en") {
|
|
101
|
+
this.fromLanguages.push({code: firstLang, name: toISO6391(firstLang, true), type: "prod"});
|
|
102
|
+
}
|
|
103
|
+
if (secondLang !== "en") {
|
|
104
|
+
this.toLanguages.push({code: secondLang, name: toISO6391(secondLang, true), type: "prod"});
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
this._initResolve([this.modelRegistry]);
|
|
108
|
+
return this.initPromise;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Targets the page and paragraph of a text layer to create a translation from the "fromLang" to the "toLang". Tries to force order in translation by using the pageIndex (+1000 if the current page is not visible) and paragraphIndex
|
|
113
|
+
* @param {string} fromLang
|
|
114
|
+
* @param {string} toLang
|
|
115
|
+
* @param {string} pageIndex
|
|
116
|
+
* @param {number} paragraphIndex
|
|
117
|
+
* @param {string} text
|
|
118
|
+
* @param {number} priority
|
|
119
|
+
* @return {Promise<string>} translated text
|
|
120
|
+
*/
|
|
121
|
+
|
|
122
|
+
getTranslation = async (fromLang, toLang, pageIndex, paragraphIndex, text, priority) => {
|
|
123
|
+
this.active = true;
|
|
124
|
+
if (fromLang == toLang || !fromLang || !toLang) {
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
const key = `${fromLang}${toLang}-${pageIndex}:${paragraphIndex}`;
|
|
128
|
+
const cachedEntry = this.alreadyTranslated.entries.find(x => x.index == key);
|
|
129
|
+
|
|
130
|
+
if (cachedEntry) {
|
|
131
|
+
return cachedEntry.response;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (key in this.currentlyTranslating) {
|
|
135
|
+
return this.currentlyTranslating[key].promise;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
let _resolve = null;
|
|
139
|
+
let _reject = null;
|
|
140
|
+
const promise = new Promise((res, rej) => {
|
|
141
|
+
_resolve = res;
|
|
142
|
+
_reject = rej;
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
this.currentlyTranslating[key] = {
|
|
146
|
+
promise,
|
|
147
|
+
resolve: _resolve,
|
|
148
|
+
reject: _reject,
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
if (!text) {
|
|
152
|
+
this.currentlyTranslating[key].reject("No text was provided");
|
|
153
|
+
return promise;
|
|
154
|
+
}
|
|
155
|
+
this.translator.translate({
|
|
156
|
+
to: toLang,
|
|
157
|
+
from: fromLang,
|
|
158
|
+
text: text,
|
|
159
|
+
html: false,
|
|
160
|
+
priority: priority,
|
|
161
|
+
}).then((resp) => {
|
|
162
|
+
const response = resp;
|
|
163
|
+
this.currentlyTranslating[key].resolve(response.target.text);
|
|
164
|
+
this.alreadyTranslated.add({index: key, response: response.target.text});
|
|
165
|
+
delete this.currentlyTranslating[key];
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
return promise;
|
|
169
|
+
}
|
|
170
|
+
}
|