@internetarchive/bookreader 5.0.0-96 → 5.0.0-98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/BookReader/474.js +2 -0
  2. package/BookReader/474.js.map +1 -0
  3. package/BookReader/BookReader.css +39 -34
  4. package/BookReader/BookReader.js +1 -1
  5. package/BookReader/BookReader.js.map +1 -1
  6. package/BookReader/bergamot-translator-worker.js +2966 -0
  7. package/BookReader/bergamot-translator-worker.wasm +0 -0
  8. package/BookReader/ia-bookreader-bundle.js +1 -1
  9. package/BookReader/ia-bookreader-bundle.js.map +1 -1
  10. package/BookReader/images/icon_experiment.svg +1 -0
  11. package/BookReader/images/translate.svg +1 -0
  12. package/BookReader/plugins/plugin.experiments.js +1 -1
  13. package/BookReader/plugins/plugin.experiments.js.map +1 -1
  14. package/BookReader/plugins/plugin.text_selection.js +1 -1
  15. package/BookReader/plugins/plugin.text_selection.js.map +1 -1
  16. package/BookReader/plugins/plugin.translate.js +3 -0
  17. package/BookReader/plugins/plugin.translate.js.LICENSE.txt +1 -0
  18. package/BookReader/plugins/plugin.translate.js.map +1 -0
  19. package/BookReader/plugins/plugin.tts.js +1 -1
  20. package/BookReader/plugins/plugin.tts.js.map +1 -1
  21. package/BookReader/plugins/translator-worker.js +2 -0
  22. package/BookReader/plugins/translator-worker.js.map +1 -0
  23. package/BookReader/silence.mp3 +0 -0
  24. package/BookReader/translator-worker.js +475 -0
  25. package/package.json +6 -3
  26. package/src/BookNavigator/book-navigator.js +1 -0
  27. package/src/BookReader/Mode1UpLit.js +6 -1
  28. package/src/BookReader/Mode2UpLit.js +11 -1
  29. package/src/BookReader/Navbar/Navbar.js +61 -0
  30. package/src/BookReader/options.js +12 -8
  31. package/src/BookReader.js +67 -140
  32. package/src/assets/images/icon_experiment.svg +1 -0
  33. package/src/assets/images/translate.svg +1 -0
  34. package/src/assets/silence.mp3 +0 -0
  35. package/src/css/_BRnav.scss +0 -24
  36. package/src/css/_BRsearch.scss +1 -5
  37. package/src/css/_TextSelection.scss +38 -9
  38. package/src/plugins/plugin.experiments.js +34 -9
  39. package/src/plugins/plugin.text_selection.js +17 -20
  40. package/src/plugins/translate/TranslationManager.js +170 -0
  41. package/src/plugins/translate/plugin.translate.js +489 -0
  42. package/src/plugins/tts/AbstractTTSEngine.js +3 -4
  43. package/src/plugins/tts/PageChunk.js +28 -9
  44. package/src/plugins/tts/WebTTSEngine.js +5 -7
  45. package/src/plugins/tts/plugin.tts.js +40 -4
  46. package/src/plugins/tts/utils.js +21 -22
  47. package/src/util/cache.js +20 -0
@@ -0,0 +1,2 @@
1
+ !function(){"use strict";var e={};"undefined"==typeof self&&(global.Module=e,global.self=new class{#e;constructor(){const{parentPort:e}=require("node:worker_threads");this.#e=e}addEventListener(e,t){this.#e.on(e,(e=>t({data:e})))}postMessage(e){this.#e.postMessage(e)}importScripts(...e){const{readFileSync:t}=require("node:fs"),{join:n}=require("node:path");for(let r of e){const e=t(n(__dirname,r),{encoding:"utf-8"});eval.call(global,e)}}async fetch(e,t){if("file:"===e.protocol){const{readFile:t}=require("node:fs/promises"),n=await t(e.pathname),r=new Blob([n]);return new Response(r,{status:200,statusText:"OK",headers:{"Content-Type":"application/wasm","Content-Length":r.size.toString()}})}return await fetch(e,t)}get location(){return new URL(`file://${__filename}`)}});class t{static parse(e){const t={};return e.split("\n").reduce(((e,n,r)=>{let s;if(s=n.match(/^\s*-\s+(.+?)$/))Array.isArray(t[e])||(t[e]=t[e].trim()?[t[e]]:[]),t[e].push(s[1].trim());else if(s=n.match(/^\s*([A-Za-z0-9_][A-Za-z0-9_-]*):\s*(.*)$/))e=s[1],t[e]=s[2].trim();else if(n.trim())throw Error(`Could not parse line ${r+1}: "${n}"`);return e}),null),t}static stringify(e){return Object.entries(e).reduce(((e,[t,n])=>{let r="";return r=Array.isArray(n)?n.map((e=>`\n - ${e}`)).join(""):("number"==typeof n||"boolean"==typeof n||n.match(/^\d*(\.\d+)?$/),`${n}`),`${e}${t}: ${r}\n`}),"")}}class n{static GEMM_TO_FALLBACK_FUNCTIONS_MAP={int8_prepare_a:"int8PrepareAFallback",int8_prepare_b:"int8PrepareBFallback",int8_prepare_b_from_transposed:"int8PrepareBFromTransposedFallback",int8_prepare_b_from_quantized_transposed:"int8PrepareBFromQuantizedTransposedFallback",int8_prepare_bias:"int8PrepareBiasFallback",int8_multiply_and_add_bias:"int8MultiplyAndAddBiasFallback",int8_select_columns_of_b:"int8SelectColumnsOfBFallback"};static NATIVE_INT_GEMM="mozIntGemm";constructor(e){}async initialize(e){this.options=e||{},this.models=new Map,this.module=await this.loadModule(),this.service=await this.loadTranslationService()}linkNativeIntGemm(e){if(!WebAssembly.mozIntGemm)return console.warn("Native gemm requested but not available, falling back to embedded gemm"),this.linkFallbackIntGemm(e);const t=new WebAssembly.Instance(WebAssembly.mozIntGemm(),{"":{memory:e.env.memory}});return Array.from(Object.keys(n.GEMM_TO_FALLBACK_FUNCTIONS_MAP)).every((e=>t.exports[e]))?t.exports:(console.warn("Native gemm is missing expected functions, falling back to embedded gemm"),this.linkFallbackIntGemm(e))}linkFallbackIntGemm(t){const r=Object.entries(n.GEMM_TO_FALLBACK_FUNCTIONS_MAP).map((([t,n])=>[t,(...t)=>e.asm[n](...t)]));return Object.fromEntries(r)}loadModule(){return new Promise((async(t,n)=>{try{const r=await self.fetch(new URL("./bergamot-translator-worker.wasm",self.location));Object.assign(e,{instantiateWasm:(e,t)=>{try{WebAssembly.instantiateStreaming(r,{...e,wasm_gemm:this.options.useNativeIntGemm?this.linkNativeIntGemm(e):this.linkFallbackIntGemm(e)}).then((({instance:e})=>t(e))).catch(n)}catch(e){n(e)}return{}},onRuntimeInitialized:()=>{t(e)}}),self.Module=e,self.importScripts("bergamot-translator-worker.js")}catch(e){n(e)}}))}loadTranslationService(){return new this.module.BlockingService({cacheSize:Math.max(this.options.cacheSize||0,0)})}hasTranslationModel({from:e,to:t}){const n=JSON.stringify({from:e,to:t});return this.models.has(n)}loadTranslationModel({from:e,to:n},r){const s=r.vocabs.filter(((e,t,n)=>!n.slice(0,t).includes(e))),[a,i,o,...l]=[this.prepareAlignedMemoryFromBuffer(r.model,256),this.prepareAlignedMemoryFromBuffer(r.shortlist,64),r.qualityModel?this.prepareAlignedMemoryFromBuffer(r.qualityModel,64):null,...s.map((e=>this.prepareAlignedMemoryFromBuffer(e,64)))],m=new this.module.AlignedMemoryList;l.forEach((e=>m.push_back(e)));let c=t.parse("\n beam-size: 1\n normalize: 1.0\n word-penalty: 0\n cpu-threads: 0\n gemm-precision: int8shiftAlphaAll\n skip-cost: true\n ");r.config&&Object.assign(c,r.config),"int8"===c["gemm-precision"]&&(c["gemm-precision"]="int8shiftAll"),Object.assign(c,t.parse("\n alignment: soft\n quiet: true\n quiet-translation: true\n max-length-break: 128\n mini-batch-words: 1024\n workspace: 128\n max-length-factor: 2.0\n "));const d=JSON.stringify({from:e,to:n});this.models.set(d,new this.module.TranslationModel(t.stringify(c),a,i,m,o))}freeTranslationModel({from:e,to:t}){const n=JSON.stringify({from:e,to:t});if(!this.models.has(n))return;const r=this.models.get(n);this.models.delete(n),r.delete()}prepareAlignedMemoryFromBuffer(e,t){const n=new Int8Array(e),r=new this.module.AlignedMemory(n.byteLength,t);return r.getByteArrayView().set(n),r}translate({models:e,texts:t}){let n=new this.module.VectorString;t.forEach((({text:e})=>n.push_back(e)));let r=new this.module.VectorResponseOptions;t.forEach((({html:e,qualityScores:t})=>r.push_back({alignment:!1,html:e,qualityScores:t})));const s=e.map((({from:e,to:t})=>{const n=JSON.stringify({from:e,to:t});return this.models.get(n)})),a=e.length>1?this.service.translateViaPivoting(...s,n,r):this.service.translate(...s,n,r);n.delete(),r.delete();const i=t.map(((e,t)=>({target:{text:a.get(t).getTranslatedText()}})));return a.delete(),i}}function r(e){return{name:e.name,message:e.message,stack:e.stack}}const s=new n;self.addEventListener("message",(async function({data:{id:e,name:t,args:n}}){e||console.error("Received message without id",arguments[0]);try{if("function"!=typeof s[t])throw TypeError(`worker[${t}] is not a function`);const r=await Promise.resolve(Reflect.apply(s[t],s,n));self.postMessage({id:e,result:r})}catch(t){self.postMessage({id:e,error:r(t)})}}))}();
2
+ //# sourceMappingURL=translator-worker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"plugins/translator-worker.js","mappings":"yBAKA,IAAIA,EAAS,CAAC,EAMM,oBAATC,OACPC,OAAOF,OAASA,EAEhBE,OAAOD,KAAO,IAAI,MAEd,GAEA,WAAAE,GACI,MAAM,WAACC,GAAcC,QAAkC,uBACvDC,MAAK,EAAQF,CACjB,CAOA,gBAAAG,CAAiBC,EAAWC,GACxBH,MAAK,EAAMI,GAAGF,GAAYG,GAASF,EAAS,CAACE,UACjD,CAMA,WAAAC,CAAYC,GACRP,MAAK,EAAMM,YAAYC,EAC3B,CAKA,aAAAC,IAAiBC,GACb,MAAM,aAACC,GAAgBX,QAAkC,YACnD,KAACY,GAAQZ,QAAkC,aACjD,IAAK,IAAIa,KAAYH,EAAS,CAC1B,MAAMI,EAASH,EAAaC,EAAKG,UAAWF,GAAW,CAACG,SAAU,UAClEC,KAAKC,KAAKrB,OAAQiB,EACtB,CACJ,CASA,WAAMK,CAAMC,EAAKC,GACb,GAAqB,UAAjBD,EAAIE,SAAsB,CAC1B,MAAM,SAACC,GAAYvB,QAAkC,oBAC/CwB,QAAeD,EAASH,EAAIP,UAC5BY,EAAO,IAAIC,KAAK,CAACF,IACvB,OAAO,IAAIG,SAASF,EAAM,CACtBG,OAAQ,IACRC,WAAY,KACZC,QAAS,CACL,eAAgB,mBAChB,iBAAkBL,EAAKM,KAAKC,aAGxC,CAEA,aAAab,MAAMC,EAAKC,EAC5B,CAEA,YAAIY,GACA,OAAO,IAAIC,IAAI,UAAUC,aAC7B,IAIR,MAAMC,EAQF,YAAOC,CAAMC,GACT,MAAMC,EAAM,CAAC,EAsBb,OApBAD,EAAKE,MAAM,MAAMC,QAAO,CAACC,EAAKC,EAAMC,KAChC,IAAIC,EACJ,GAAIA,EAAQF,EAAKE,MAAM,kBACdC,MAAMC,QAAQR,EAAIG,MACnBH,EAAIG,GAAOH,EAAIG,GAAKM,OAAS,CAACT,EAAIG,IAAQ,IAC9CH,EAAIG,GAAKO,KAAKJ,EAAM,GAAGG,aAEtB,GAAIH,EAAQF,EAAKE,MAAM,6CACxBH,EAAMG,EAAM,GACZN,EAAIG,GAAOG,EAAM,GAAGG,YAEnB,GAAKL,EAAKK,OAIX,MAAME,MAAM,wBAAwBN,EAAE,OAAOD,MAEjD,OAAOD,CAAG,GACX,MAEIH,CACX,CAQA,gBAAOY,CAAU7C,GACb,OAAO8C,OAAOC,QAAQ/C,GAAMmC,QAAO,CAACa,GAAMZ,EAAKa,MAC3C,IAAIC,EAAS,GAQb,OANIA,EADAV,MAAMC,QAAQQ,GACLA,EAAME,KAAIC,GAAO,SAASA,MAAO9C,KAAK,KACzB,iBAAV2C,GAAuC,kBAAVA,GAAuBA,EAAMV,MAAM,iBACnE,GAAGU,KAIT,GAAGD,IAAMZ,MAAQc,KAAU,GACnC,GACP,EAOJ,MAAMG,EAMFC,sCAAwC,CACpC,eAAkB,uBAClB,eAAkB,uBAClB,+BAAkC,qCAClC,yCAA4C,8CAC5C,kBAAqB,0BACrB,2BAA8B,iCAC9B,yBAA4B,gCAOhCA,uBAAyB,aAMzB,WAAA9D,CAAYuB,GAAU,CAiBtB,gBAAMwC,CAAWxC,GACbpB,KAAKoB,QAAUA,GAAW,CAAC,EAC3BpB,KAAK6D,OAAS,IAAIC,IAClB9D,KAAK+D,aAAe/D,KAAKgE,aACzBhE,KAAKiE,cAAgBjE,KAAKkE,wBAC9B,CAUA,iBAAAC,CAAkBC,GACd,IAAKC,YAAwB,WAEzB,OADAC,QAAQC,KAAK,0EACNvE,KAAKwE,oBAAoBJ,GAGpC,MAAMK,EAAW,IAAIJ,YAAYK,SAASL,YAAwB,aAAK,CACnE,GAAI,CAACM,OAAQP,EAAU,IAAU,UAGrC,OAAKvB,MAAM+B,KAAKzB,OAAO0B,KAAKnB,EAAyBoB,iCAAiCC,OAAMC,GAAOP,EAASQ,QAAQD,KAK7GP,EAASQ,SAJZX,QAAQC,KAAK,4EACNvE,KAAKwE,oBAAoBJ,GAIxC,CASA,mBAAAI,CAAoBJ,GAChB,MAAMc,EAAU/B,OAAOC,QAAQM,EAAyBoB,gCAAgCtB,KAAI,EAAEf,EAAK0C,KACxF,CAAC1C,EAAK,IAAI2C,IAAS1F,EAAY,IAAEyF,MAASC,MAGrD,OAAOjC,OAAOkC,YAAYH,EAC9B,CAQA,UAAAlB,GACI,OAAO,IAAIsB,SAAQC,MAAOC,EAASC,KAC/B,IACI,MAAMC,QAAiB/F,KAAKuB,MAAM,IAAIe,IAAI,oCAAqCtC,KAAKqC,WAEpFmB,OAAOwC,OAAOjG,EAAQ,CAClBkG,gBAAiB,CAACxB,EAAMyB,KACpB,IACIxB,YAAYyB,qBAAqBJ,EAAU,IACpCtB,EACH,UAAapE,KAAKoB,QAAQ2E,iBACpB/F,KAAKmE,kBAAkBC,GACvBpE,KAAKwE,oBAAoBJ,KAChC4B,MAAK,EAAEvB,cAAcoB,EAAOpB,KAAWwB,MAAMR,EACpD,CAAE,MAAOS,GACLT,EAAOS,EACX,CACA,MAAO,CAAC,CAAC,EAEbC,qBAAsB,KAClBX,EAAQ9F,EAAO,IAKvBC,KAAKD,OAASA,EACdC,KAAKa,cAAc,gCACvB,CAAE,MAAO0F,GACLT,EAAOS,EACX,IAER,CAMA,sBAAAhC,GACI,OAAO,IAAIlE,KAAK+D,OAAOqC,gBAAgB,CACnCC,UAAWC,KAAKC,IAAIvG,KAAKoB,QAAQiF,WAAa,EAAG,IAEzD,CAQA,mBAAAG,EAAoB,KAAC5B,EAAI,GAAC6B,IACtB,MAAMhE,EAAMiE,KAAKxD,UAAU,CAAC0B,OAAK6B,OACjC,OAAOzG,KAAK6D,OAAO8C,IAAIlE,EAC3B,CAiBA,oBAAAmE,EAAqB,KAAChC,EAAI,GAAE6B,GAAKI,GAE7B,MAAMC,EAAeD,EAAQE,OAAOC,QAAO,CAACC,EAAOC,EAAOH,KAC9CA,EAAOI,MAAM,EAAGD,GAAOE,SAASH,MAGrCI,EAAaC,EAAiBC,KAAiBC,GAAe,CACjExH,KAAKyH,+BAA+BZ,EAAQa,MAAO,KACnD1H,KAAKyH,+BAA+BZ,EAAQc,UAAW,IACvDd,EAAQU,aACFvH,KAAKyH,+BAA+BZ,EAAQU,aAAc,IAC1D,QACHT,EAAatD,KAAIyD,GAASjH,KAAKyH,+BAA+BR,EAAO,OAGtEF,EAAS,IAAI/G,KAAK+D,OAAO6D,kBAC/BJ,EAAYK,SAAQZ,GAASF,EAAOe,UAAUb,KAG9C,IAAIc,EAAc5F,EAAKC,MAAM,yMASzByE,EAAQmB,QACR7E,OAAOwC,OAAOoC,EAAalB,EAAQmB,QAGD,SAAlCD,EAAY,oBACZA,EAAY,kBAAoB,gBAGpC5E,OAAOwC,OAAOoC,EAAa5F,EAAKC,MAAM,iPAUtC,MAAMK,EAAMiE,KAAKxD,UAAU,CAAC0B,OAAK6B,OACjCzG,KAAK6D,OAAOoE,IAAIxF,EAAK,IAAIzC,KAAK+D,OAAOmE,iBAAiB/F,EAAKe,UAAU6E,GAAcV,EAAaC,EAAiBP,EAAQQ,GAC7H,CAOA,oBAAAY,EAAqB,KAACvD,EAAI,GAAE6B,IACxB,MAAMhE,EAAMiE,KAAKxD,UAAU,CAAC0B,OAAK6B,OAEjC,IAAKzG,KAAK6D,OAAO8C,IAAIlE,GACjB,OAEJ,MAAMiF,EAAQ1H,KAAK6D,OAAOuE,IAAI3F,GAC9BzC,KAAK6D,OAAOwE,OAAO5F,GAEnBiF,EAAMW,QACV,CASA,8BAAAZ,CAA+BlG,EAAQ+G,GACnC,MAAMC,EAAQ,IAAIC,UAAUjH,GACtBoD,EAAS,IAAI3E,KAAK+D,OAAO0E,cAAcF,EAAMG,WAAYJ,GAE/D,OADA3D,EAAOgE,mBAAmBV,IAAIM,GACvB5D,CACX,CASA,SAAAiE,EAAU,OAAC/E,EAAM,MAAEgF,IAEf,IAAIC,EAAQ,IAAI9I,KAAK+D,OAAOgF,aAC5BF,EAAMhB,SAAQ,EAAEmB,UAAUF,EAAMhB,UAAUkB,KAG1C,IAAI5H,EAAU,IAAIpB,KAAK+D,OAAOkF,sBAC9BJ,EAAMhB,SAAQ,EAAEqB,OAAMC,mBAAmB/H,EAAQ0G,UAAU,CAACsB,WAAW,EAAOF,OAAMC,oBAGpF,MAAME,EAAoBxF,EAAOL,KAAI,EAAEoB,OAAK6B,SACxC,MAAMhE,EAAMiE,KAAKxD,UAAU,CAAC0B,OAAK6B,OACjC,OAAOzG,KAAK6D,OAAOuE,IAAI3F,EAAI,IAIzB6G,EAAYzF,EAAO0F,OAAS,EAC5BvJ,KAAKiE,QAAQuF,wBAAwBH,EAAmBP,EAAO1H,GAC/DpB,KAAKiE,QAAQ2E,aAAaS,EAAmBP,EAAO1H,GAE1D0H,EAAMT,SACNjH,EAAQiH,SAKR,MAAMoB,EAAeZ,EAAMrF,KAAI,CAACkG,EAAG/G,KAAM,CACrCgH,OAAQ,CACJX,KAAMM,EAAUlB,IAAIzF,GAAGiH,yBAM/B,OAFAN,EAAUjB,SAEHoB,CACX,EAaJ,SAASI,EAAWC,GAChB,MAAO,CACH3E,KAAM2E,EAAM3E,KACZ5E,QAASuJ,EAAMvJ,QACfwJ,MAAOD,EAAMC,MAErB,CAIA,MAAMC,EAAS,IAAItG,EAEnB/D,KAAKM,iBAAiB,WAAWsF,gBAAgBlF,MAAM,GAAC4J,EAAE,KAAE9E,EAAI,KAAEC,KACzD6E,GACD3F,QAAQwF,MAAM,8BAA+BI,UAAU,IAE3D,IACI,GAA4B,mBAAjBF,EAAO7E,GACd,MAAMgF,UAAU,UAAUhF,wBAI9B,MAAMiF,QAAe9E,QAAQE,QAAQ6E,QAAQC,MAAMN,EAAO7E,GAAO6E,EAAQ5E,IACzEzF,KAAKW,YAAY,CAAC2J,KAAIG,UAC1B,CAAE,MAAON,GACLnK,KAAKW,YAAY,CACb2J,KACAH,MAAOD,EAAWC,IAE1B,CACJ,G","sources":["webpack://@internetarchive/bookreader/./node_modules/@internetarchive/bergamot-translator/worker/translator-worker.js"],"sourcesContent":["/**\n * Wrapper around the dirty bits of Bergamot's WASM bindings.\n */\n\n// Global because importScripts is global.\nvar Module = {};\n\n/**\n * node.js compatibility: Fake GlobalWorkerScope that emulates being inside a\n * WebWorker\n */\nif (typeof self === 'undefined') {\n global.Module = Module;\n\n global.self = new class GlobalWorkerScope {\n /** @type {import(\"node:worker_threads\").MessagePort} */\n #port;\n\n constructor() {\n const {parentPort} = require(/* webpackIgnore: true */ 'node:worker_threads');\n this.#port = parentPort;\n }\n\n /**\n * Add event listener to listen for messages posted to the worker.\n * @param {string} eventName\n * @param {(object)} callback\n */\n addEventListener(eventName, callback) {\n this.#port.on(eventName, (data) => callback({data}));\n }\n\n /**\n * Post message outside, to the owner of the Worker.\n * @param {any} message\n */\n postMessage(message) {\n this.#port.postMessage(message);\n }\n\n /**\n * @param {...string} scripts - Paths to scripts to import in that order\n */\n importScripts(...scripts) {\n const {readFileSync} = require(/* webpackIgnore: true */ 'node:fs');\n const {join} = require(/* webpackIgnore: true */ 'node:path');\n for (let pathname of scripts) {\n const script = readFileSync(join(__dirname, pathname), {encoding: 'utf-8'});\n eval.call(global, script);\n }\n }\n\n /**\n * Adds support for local file urls. Assumes anything that doesn't start\n * with \"http\" to be a local path.\n * @param {string} url - path or url\n * @param {object?} options - See `fetch()` options\n * @return {Promise<Response>}\n */\n async fetch(url, options) {\n if (url.protocol === 'file:') {\n const {readFile} = require(/* webpackIgnore: true */ 'node:fs/promises');\n const buffer = await readFile(url.pathname);\n const blob = new Blob([buffer]);\n return new Response(blob, {\n status: 200,\n statusText: 'OK',\n headers: {\n 'Content-Type': 'application/wasm',\n 'Content-Length': blob.size.toString()\n }\n });\n }\n\n return await fetch(url, options);\n }\n\n get location() {\n return new URL(`file://${__filename}`);\n }\n }\n}\n\nclass YAML {\n /**\n * Parses YAML into dictionary. Does not interpret types, all values are a\n * string or a list of strings. No support for objects other than the top\n * level.\n * @param {string} yaml\n * @return {{[string]: string | string[]}}\n */\n static parse(yaml) {\n const out = {};\n\n yaml.split('\\n').reduce((key, line, i) => {\n let match;\n if (match = line.match(/^\\s*-\\s+(.+?)$/)) {\n if (!Array.isArray(out[key]))\n out[key] = out[key].trim() ? [out[key]] : [];\n out[key].push(match[1].trim());\n }\n else if (match = line.match(/^\\s*([A-Za-z0-9_][A-Za-z0-9_-]*):\\s*(.*)$/)) {\n key = match[1];\n out[key] = match[2].trim();\n }\n else if (!line.trim()) {\n // whitespace, ignore\n }\n else {\n throw Error(`Could not parse line ${i+1}: \"${line}\"`);\n }\n return key;\n }, null);\n\n return out;\n }\n\n /**\n * Turns an object into a YAML string. No support for objects, only simple\n * types and lists of simple types.\n * @param {{[string]: string | number | boolean | string[]}} data\n * @return {string}\n */\n static stringify(data) {\n return Object.entries(data).reduce((str, [key, value]) => {\n let valstr = '';\n if (Array.isArray(value))\n valstr = value.map(val => `\\n - ${val}`).join('');\n else if (typeof value === 'number' || typeof value === 'boolean' || value.match(/^\\d*(\\.\\d+)?$/))\n valstr = `${value}`;\n else\n valstr = `${value}`; // Quote?\n\n return `${str}${key}: ${valstr}\\n`;\n }, '');\n }\n}\n\n/**\n * Wrapper around the bergamot-translator exported module that hides the need\n * of working with C++ style data structures and does model management.\n */\nclass BergamotTranslatorWorker {\n /**\n * Map of expected symbol -> name of fallback symbol for functions that can\n * be swizzled for a faster implementation. Firefox Nightly makes use of\n * this.\n */\n static GEMM_TO_FALLBACK_FUNCTIONS_MAP = {\n 'int8_prepare_a': 'int8PrepareAFallback',\n 'int8_prepare_b': 'int8PrepareBFallback',\n 'int8_prepare_b_from_transposed': 'int8PrepareBFromTransposedFallback',\n 'int8_prepare_b_from_quantized_transposed': 'int8PrepareBFromQuantizedTransposedFallback',\n 'int8_prepare_bias': 'int8PrepareBiasFallback',\n 'int8_multiply_and_add_bias': 'int8MultiplyAndAddBiasFallback',\n 'int8_select_columns_of_b': 'int8SelectColumnsOfBFallback'\n };\n\n /**\n * Name of module exported by Firefox Nightly that exports an optimised\n * implementation of the symbols mentioned above.\n */\n static NATIVE_INT_GEMM = 'mozIntGemm';\n\n /**\n * Empty because we can't do async constructors yet. It is the\n * responsibility of whoever owns this WebWorker to call `initialize()`.\n */\n constructor(options) {}\n\n /**\n * Instantiates a new translation worker with optional options object.\n * If this call succeeds, the WASM runtime is loaded and ready.\n * \n * Available options are:\n * useNativeIntGemm: {true | false} defaults to false. If true, it will\n * attempt to link to the intgemm module available in\n * Firefox Nightly which makes translations much faster.\n * cacheSize: {Number} defaults to 0 which disables translation\n * cache entirely. Note that this is a theoretical\n * upper bound. In practice it will use about 1/3th of\n * the cache specified here. 2^14 is not a bad starting\n * value.\n * @param {{useNativeIntGemm: boolean, cacheSize: number}} options\n */\n async initialize(options) {\n this.options = options || {};\n this.models = new Map(); // Map<str,Promise<TranslationModel>>\n this.module = await this.loadModule();\n this.service = await this.loadTranslationService();\n }\n\n /**\n * Tries to load native IntGEMM module for bergamot-translator. If that\n * fails because it or any of the expected functions is not available, it\n * falls back to using the naive implementations that come with the wasm\n * binary itself through `linkFallbackIntGemm()`.\n * @param {{env: {memory: WebAssembly.Memory}}} info\n * @return {{[method:string]: (...any) => any}}\n */\n linkNativeIntGemm(info) {\n if (!WebAssembly['mozIntGemm']) {\n console.warn('Native gemm requested but not available, falling back to embedded gemm');\n return this.linkFallbackIntGemm(info);\n }\n\n const instance = new WebAssembly.Instance(WebAssembly['mozIntGemm'](), {\n '': {memory: info['env']['memory']}\n });\n\n if (!Array.from(Object.keys(BergamotTranslatorWorker.GEMM_TO_FALLBACK_FUNCTIONS_MAP)).every(fun => instance.exports[fun])) {\n console.warn('Native gemm is missing expected functions, falling back to embedded gemm');\n return this.linkFallbackIntGemm(info);\n }\n\n return instance.exports;\n }\n\n /**\n * Links intgemm functions that are already available in the wasm binary,\n * but just exports them under the name that is expected by\n * bergamot-translator.\n * @param {{env: {memory: WebAssembly.Memory}}} info\n * @return {{[method:string]: (...any) => any}}\n */\n linkFallbackIntGemm(info) {\n const mapping = Object.entries(BergamotTranslatorWorker.GEMM_TO_FALLBACK_FUNCTIONS_MAP).map(([key, name]) => {\n return [key, (...args) => Module['asm'][name](...args)]\n });\n\n return Object.fromEntries(mapping);\n }\n\n /**\n * Internal method. Reads and instantiates the WASM binary. Returns a\n * promise for the exported Module object that contains all the classes\n * and functions exported by bergamot-translator.\n * @return {Promise<BergamotTranslator>}\n */\n loadModule() {\n return new Promise(async (resolve, reject) => {\n try {\n const response = await self.fetch(new URL('./bergamot-translator-worker.wasm', self.location));\n\n Object.assign(Module, {\n instantiateWasm: (info, accept) => {\n try {\n WebAssembly.instantiateStreaming(response, {\n ...info,\n 'wasm_gemm': this.options.useNativeIntGemm\n ? this.linkNativeIntGemm(info)\n : this.linkFallbackIntGemm(info)\n }).then(({instance}) => accept(instance)).catch(reject);\n } catch (err) {\n reject(err);\n }\n return {};\n },\n onRuntimeInitialized: () => {\n resolve(Module);\n }\n });\n\n // Emscripten glue code. Webpack et al. should not mangle the `Module` property name!\n self.Module = Module;\n self.importScripts('bergamot-translator-worker.js');\n } catch (err) {\n reject(err);\n }\n });\n }\n\n /**\n * Internal method. Instantiates a BlockingService()\n * @return {BergamotTranslator.BlockingService}\n */\n loadTranslationService() {\n return new this.module.BlockingService({\n cacheSize: Math.max(this.options.cacheSize || 0, 0)\n });\n }\n\n /**\n * Returns whether a model has already been loaded in this worker. Marked\n * async because the message passing interface we use expects async methods.\n * @param {{from:string, to:string}}\n * @return boolean\n */ \n hasTranslationModel({from,to}) {\n const key = JSON.stringify({from,to});\n return this.models.has(key);\n }\n\n /**\n * Loads a translation model from a set of file buffers. After this, the\n * model is available to translate with and `hasTranslationModel()` will\n * return true for this pair.\n * @param {{from:string, to:string}}\n * @param {{\n * model: ArrayBuffer,\n * shortlist: ArrayBuffer,\n * vocabs: ArrayBuffer[],\n * qualityModel: ArrayBuffer?,\n * config?: {\n * [key:string]: string\n * }\n * }} buffers\n */ \n loadTranslationModel({from, to}, buffers) {\n // This because service_bindings.cpp:prepareVocabsSmartMemories :(\n const uniqueVocabs = buffers.vocabs.filter((vocab, index, vocabs) => {\n return !vocabs.slice(0, index).includes(vocab);\n });\n\n const [modelMemory, shortlistMemory, qualityModel, ...vocabMemory] = [\n this.prepareAlignedMemoryFromBuffer(buffers.model, 256),\n this.prepareAlignedMemoryFromBuffer(buffers.shortlist, 64),\n buffers.qualityModel // optional quality model\n ? this.prepareAlignedMemoryFromBuffer(buffers.qualityModel, 64)\n : null,\n ...uniqueVocabs.map(vocab => this.prepareAlignedMemoryFromBuffer(vocab, 64))\n ];\n\n const vocabs = new this.module.AlignedMemoryList();\n vocabMemory.forEach(vocab => vocabs.push_back(vocab));\n\n // Defaults\n let modelConfig = YAML.parse(`\n beam-size: 1\n normalize: 1.0\n word-penalty: 0\n cpu-threads: 0\n gemm-precision: int8shiftAlphaAll\n skip-cost: true\n `);\n\n if (buffers.config)\n Object.assign(modelConfig, buffers.config);\n\n // WASM marian is only compiled with support for shiftedAll.\n if (modelConfig['gemm-precision'] === 'int8')\n modelConfig['gemm-precision'] = 'int8shiftAll';\n\n // Override these\n Object.assign(modelConfig, YAML.parse(`\n alignment: soft\n quiet: true\n quiet-translation: true\n max-length-break: 128\n mini-batch-words: 1024\n workspace: 128\n max-length-factor: 2.0\n `));\n\n const key = JSON.stringify({from,to});\n this.models.set(key, new this.module.TranslationModel(YAML.stringify(modelConfig), modelMemory, shortlistMemory, vocabs, qualityModel));\n }\n\n /**\n * Frees up memory used by old translation model. Does nothing if model is\n * already deleted.\n * @param {{from:string, to:string}}\n */\n freeTranslationModel({from, to}) {\n const key = JSON.stringify({from,to});\n \n if (!this.models.has(key))\n return;\n \n const model = this.models.get(key);\n this.models.delete(key);\n\n model.delete();\n }\n\n /**\n * Internal function. Copies the data from an ArrayBuffer into memory that\n * can be used inside the WASM vm by Marian.\n * @param {{ArrayBuffer}} buffer\n * @param {number} alignmentSize\n * @return {BergamotTranslator.AlignedMemory}\n */\n prepareAlignedMemoryFromBuffer(buffer, alignmentSize) {\n const bytes = new Int8Array(buffer);\n const memory = new this.module.AlignedMemory(bytes.byteLength, alignmentSize);\n memory.getByteArrayView().set(bytes);\n return memory;\n }\n\n /**\n * Public. Does actual translation work. You have to make sure that the\n * models necessary for translating text are already loaded before calling\n * this method. Returns a promise with translation responses.\n * @param {{models: {from:string, to:string}[], texts: {text: string, html: boolean}[]}}\n * @return {Promise<{target: {text: string}}[]>}\n */\n translate({models, texts}) {\n // Convert texts array into a std::vector<std::string>.\n let input = new this.module.VectorString();\n texts.forEach(({text}) => input.push_back(text));\n\n // Extracts the texts[].html options into ResponseOption objects\n let options = new this.module.VectorResponseOptions();\n texts.forEach(({html, qualityScores}) => options.push_back({alignment: false, html, qualityScores}));\n\n // Turn our model names into a list of TranslationModel pointers\n const translationModels = models.map(({from,to}) => {\n const key = JSON.stringify({from,to});\n return this.models.get(key);\n });\n\n // translate the input, which is a vector<String>; the result is a vector<Response>\n const responses = models.length > 1\n ? this.service.translateViaPivoting(...translationModels, input, options)\n : this.service.translate(...translationModels, input, options);\n \n input.delete();\n options.delete();\n\n // Convert the Response WASM wrappers into native JavaScript types we\n // can send over the 'wire' (message passing) in the same format as we\n // use in bergamot-translator.\n const translations = texts.map((_, i) => ({\n target: {\n text: responses.get(i).getTranslatedText()\n }\n }));\n\n responses.delete();\n\n return translations;\n }\n}\n\n/**\n * Because you can't put an Error object in a message. But you can post a\n * generic object!\n * @param {Error} error\n * @return {{\n * name: string?,\n * message: string?,\n * stack: string?\n * }}\n */\nfunction cloneError(error) {\n return {\n name: error.name,\n message: error.message,\n stack: error.stack\n };\n}\n\n// (Constructor doesn't really do anything, we need to call `initialize()`\n// first before using it. That happens from outside the worker.)\nconst worker = new BergamotTranslatorWorker();\n\nself.addEventListener('message', async function({data: {id, name, args}}) {\n if (!id)\n console.error('Received message without id', arguments[0]);\n\n try {\n if (typeof worker[name] !== 'function')\n throw TypeError(`worker[${name}] is not a function`);\n\n // Using `Promise.resolve` to await any promises that worker[name]\n // possibly returns.\n const result = await Promise.resolve(Reflect.apply(worker[name], worker, args));\n self.postMessage({id, result});\n } catch (error) {\n self.postMessage({\n id,\n error: cloneError(error)\n })\n }\n});\n"],"names":["Module","self","global","constructor","parentPort","require","this","addEventListener","eventName","callback","on","data","postMessage","message","importScripts","scripts","readFileSync","join","pathname","script","__dirname","encoding","eval","call","fetch","url","options","protocol","readFile","buffer","blob","Blob","Response","status","statusText","headers","size","toString","location","URL","__filename","YAML","parse","yaml","out","split","reduce","key","line","i","match","Array","isArray","trim","push","Error","stringify","Object","entries","str","value","valstr","map","val","BergamotTranslatorWorker","static","initialize","models","Map","module","loadModule","service","loadTranslationService","linkNativeIntGemm","info","WebAssembly","console","warn","linkFallbackIntGemm","instance","Instance","memory","from","keys","GEMM_TO_FALLBACK_FUNCTIONS_MAP","every","fun","exports","mapping","name","args","fromEntries","Promise","async","resolve","reject","response","assign","instantiateWasm","accept","instantiateStreaming","useNativeIntGemm","then","catch","err","onRuntimeInitialized","BlockingService","cacheSize","Math","max","hasTranslationModel","to","JSON","has","loadTranslationModel","buffers","uniqueVocabs","vocabs","filter","vocab","index","slice","includes","modelMemory","shortlistMemory","qualityModel","vocabMemory","prepareAlignedMemoryFromBuffer","model","shortlist","AlignedMemoryList","forEach","push_back","modelConfig","config","set","TranslationModel","freeTranslationModel","get","delete","alignmentSize","bytes","Int8Array","AlignedMemory","byteLength","getByteArrayView","translate","texts","input","VectorString","text","VectorResponseOptions","html","qualityScores","alignment","translationModels","responses","length","translateViaPivoting","translations","_","target","getTranslatedText","cloneError","error","stack","worker","id","arguments","TypeError","result","Reflect","apply"],"sourceRoot":""}
Binary file
@@ -0,0 +1,475 @@
1
+ /**
2
+ * Wrapper around the dirty bits of Bergamot's WASM bindings.
3
+ */
4
+
5
+ // Global because importScripts is global.
6
+ var Module = {};
7
+
8
+ /**
9
+ * node.js compatibility: Fake GlobalWorkerScope that emulates being inside a
10
+ * WebWorker
11
+ */
12
+ if (typeof self === 'undefined') {
13
+ global.Module = Module;
14
+
15
+ global.self = new class GlobalWorkerScope {
16
+ /** @type {import("node:worker_threads").MessagePort} */
17
+ #port;
18
+
19
+ constructor() {
20
+ const {parentPort} = require(/* webpackIgnore: true */ 'node:worker_threads');
21
+ this.#port = parentPort;
22
+ }
23
+
24
+ /**
25
+ * Add event listener to listen for messages posted to the worker.
26
+ * @param {string} eventName
27
+ * @param {(object)} callback
28
+ */
29
+ addEventListener(eventName, callback) {
30
+ this.#port.on(eventName, (data) => callback({data}));
31
+ }
32
+
33
+ /**
34
+ * Post message outside, to the owner of the Worker.
35
+ * @param {any} message
36
+ */
37
+ postMessage(message) {
38
+ this.#port.postMessage(message);
39
+ }
40
+
41
+ /**
42
+ * @param {...string} scripts - Paths to scripts to import in that order
43
+ */
44
+ importScripts(...scripts) {
45
+ const {readFileSync} = require(/* webpackIgnore: true */ 'node:fs');
46
+ const {join} = require(/* webpackIgnore: true */ 'node:path');
47
+ for (let pathname of scripts) {
48
+ const script = readFileSync(join(__dirname, pathname), {encoding: 'utf-8'});
49
+ eval.call(global, script);
50
+ }
51
+ }
52
+
53
+ /**
54
+ * Adds support for local file urls. Assumes anything that doesn't start
55
+ * with "http" to be a local path.
56
+ * @param {string} url - path or url
57
+ * @param {object?} options - See `fetch()` options
58
+ * @return {Promise<Response>}
59
+ */
60
+ async fetch(url, options) {
61
+ if (url.protocol === 'file:') {
62
+ const {readFile} = require(/* webpackIgnore: true */ 'node:fs/promises');
63
+ const buffer = await readFile(url.pathname);
64
+ const blob = new Blob([buffer]);
65
+ return new Response(blob, {
66
+ status: 200,
67
+ statusText: 'OK',
68
+ headers: {
69
+ 'Content-Type': 'application/wasm',
70
+ 'Content-Length': blob.size.toString()
71
+ }
72
+ });
73
+ }
74
+
75
+ return await fetch(url, options);
76
+ }
77
+
78
+ get location() {
79
+ return new URL(`file://${__filename}`);
80
+ }
81
+ }
82
+ }
83
+
84
+ class YAML {
85
+ /**
86
+ * Parses YAML into dictionary. Does not interpret types, all values are a
87
+ * string or a list of strings. No support for objects other than the top
88
+ * level.
89
+ * @param {string} yaml
90
+ * @return {{[string]: string | string[]}}
91
+ */
92
+ static parse(yaml) {
93
+ const out = {};
94
+
95
+ yaml.split('\n').reduce((key, line, i) => {
96
+ let match;
97
+ if (match = line.match(/^\s*-\s+(.+?)$/)) {
98
+ if (!Array.isArray(out[key]))
99
+ out[key] = out[key].trim() ? [out[key]] : [];
100
+ out[key].push(match[1].trim());
101
+ }
102
+ else if (match = line.match(/^\s*([A-Za-z0-9_][A-Za-z0-9_-]*):\s*(.*)$/)) {
103
+ key = match[1];
104
+ out[key] = match[2].trim();
105
+ }
106
+ else if (!line.trim()) {
107
+ // whitespace, ignore
108
+ }
109
+ else {
110
+ throw Error(`Could not parse line ${i+1}: "${line}"`);
111
+ }
112
+ return key;
113
+ }, null);
114
+
115
+ return out;
116
+ }
117
+
118
+ /**
119
+ * Turns an object into a YAML string. No support for objects, only simple
120
+ * types and lists of simple types.
121
+ * @param {{[string]: string | number | boolean | string[]}} data
122
+ * @return {string}
123
+ */
124
+ static stringify(data) {
125
+ return Object.entries(data).reduce((str, [key, value]) => {
126
+ let valstr = '';
127
+ if (Array.isArray(value))
128
+ valstr = value.map(val => `\n - ${val}`).join('');
129
+ else if (typeof value === 'number' || typeof value === 'boolean' || value.match(/^\d*(\.\d+)?$/))
130
+ valstr = `${value}`;
131
+ else
132
+ valstr = `${value}`; // Quote?
133
+
134
+ return `${str}${key}: ${valstr}\n`;
135
+ }, '');
136
+ }
137
+ }
138
+
139
+ /**
140
+ * Wrapper around the bergamot-translator exported module that hides the need
141
+ * of working with C++ style data structures and does model management.
142
+ */
143
+ class BergamotTranslatorWorker {
144
+ /**
145
+ * Map of expected symbol -> name of fallback symbol for functions that can
146
+ * be swizzled for a faster implementation. Firefox Nightly makes use of
147
+ * this.
148
+ */
149
+ static GEMM_TO_FALLBACK_FUNCTIONS_MAP = {
150
+ 'int8_prepare_a': 'int8PrepareAFallback',
151
+ 'int8_prepare_b': 'int8PrepareBFallback',
152
+ 'int8_prepare_b_from_transposed': 'int8PrepareBFromTransposedFallback',
153
+ 'int8_prepare_b_from_quantized_transposed': 'int8PrepareBFromQuantizedTransposedFallback',
154
+ 'int8_prepare_bias': 'int8PrepareBiasFallback',
155
+ 'int8_multiply_and_add_bias': 'int8MultiplyAndAddBiasFallback',
156
+ 'int8_select_columns_of_b': 'int8SelectColumnsOfBFallback'
157
+ };
158
+
159
+ /**
160
+ * Name of module exported by Firefox Nightly that exports an optimised
161
+ * implementation of the symbols mentioned above.
162
+ */
163
+ static NATIVE_INT_GEMM = 'mozIntGemm';
164
+
165
+ /**
166
+ * Empty because we can't do async constructors yet. It is the
167
+ * responsibility of whoever owns this WebWorker to call `initialize()`.
168
+ */
169
+ constructor(options) {}
170
+
171
+ /**
172
+ * Instantiates a new translation worker with optional options object.
173
+ * If this call succeeds, the WASM runtime is loaded and ready.
174
+ *
175
+ * Available options are:
176
+ * useNativeIntGemm: {true | false} defaults to false. If true, it will
177
+ * attempt to link to the intgemm module available in
178
+ * Firefox Nightly which makes translations much faster.
179
+ * cacheSize: {Number} defaults to 0 which disables translation
180
+ * cache entirely. Note that this is a theoretical
181
+ * upper bound. In practice it will use about 1/3th of
182
+ * the cache specified here. 2^14 is not a bad starting
183
+ * value.
184
+ * @param {{useNativeIntGemm: boolean, cacheSize: number}} options
185
+ */
186
+ async initialize(options) {
187
+ this.options = options || {};
188
+ this.models = new Map(); // Map<str,Promise<TranslationModel>>
189
+ this.module = await this.loadModule();
190
+ this.service = await this.loadTranslationService();
191
+ }
192
+
193
+ /**
194
+ * Tries to load native IntGEMM module for bergamot-translator. If that
195
+ * fails because it or any of the expected functions is not available, it
196
+ * falls back to using the naive implementations that come with the wasm
197
+ * binary itself through `linkFallbackIntGemm()`.
198
+ * @param {{env: {memory: WebAssembly.Memory}}} info
199
+ * @return {{[method:string]: (...any) => any}}
200
+ */
201
+ linkNativeIntGemm(info) {
202
+ if (!WebAssembly['mozIntGemm']) {
203
+ console.warn('Native gemm requested but not available, falling back to embedded gemm');
204
+ return this.linkFallbackIntGemm(info);
205
+ }
206
+
207
+ const instance = new WebAssembly.Instance(WebAssembly['mozIntGemm'](), {
208
+ '': {memory: info['env']['memory']}
209
+ });
210
+
211
+ if (!Array.from(Object.keys(BergamotTranslatorWorker.GEMM_TO_FALLBACK_FUNCTIONS_MAP)).every(fun => instance.exports[fun])) {
212
+ console.warn('Native gemm is missing expected functions, falling back to embedded gemm');
213
+ return this.linkFallbackIntGemm(info);
214
+ }
215
+
216
+ return instance.exports;
217
+ }
218
+
219
+ /**
220
+ * Links intgemm functions that are already available in the wasm binary,
221
+ * but just exports them under the name that is expected by
222
+ * bergamot-translator.
223
+ * @param {{env: {memory: WebAssembly.Memory}}} info
224
+ * @return {{[method:string]: (...any) => any}}
225
+ */
226
+ linkFallbackIntGemm(info) {
227
+ const mapping = Object.entries(BergamotTranslatorWorker.GEMM_TO_FALLBACK_FUNCTIONS_MAP).map(([key, name]) => {
228
+ return [key, (...args) => Module['asm'][name](...args)]
229
+ });
230
+
231
+ return Object.fromEntries(mapping);
232
+ }
233
+
234
+ /**
235
+ * Internal method. Reads and instantiates the WASM binary. Returns a
236
+ * promise for the exported Module object that contains all the classes
237
+ * and functions exported by bergamot-translator.
238
+ * @return {Promise<BergamotTranslator>}
239
+ */
240
+ loadModule() {
241
+ return new Promise(async (resolve, reject) => {
242
+ try {
243
+ const response = await self.fetch(new URL('./bergamot-translator-worker.wasm', self.location));
244
+
245
+ Object.assign(Module, {
246
+ instantiateWasm: (info, accept) => {
247
+ try {
248
+ WebAssembly.instantiateStreaming(response, {
249
+ ...info,
250
+ 'wasm_gemm': this.options.useNativeIntGemm
251
+ ? this.linkNativeIntGemm(info)
252
+ : this.linkFallbackIntGemm(info)
253
+ }).then(({instance}) => accept(instance)).catch(reject);
254
+ } catch (err) {
255
+ reject(err);
256
+ }
257
+ return {};
258
+ },
259
+ onRuntimeInitialized: () => {
260
+ resolve(Module);
261
+ }
262
+ });
263
+
264
+ // Emscripten glue code. Webpack et al. should not mangle the `Module` property name!
265
+ self.Module = Module;
266
+ self.importScripts('bergamot-translator-worker.js');
267
+ } catch (err) {
268
+ reject(err);
269
+ }
270
+ });
271
+ }
272
+
273
+ /**
274
+ * Internal method. Instantiates a BlockingService()
275
+ * @return {BergamotTranslator.BlockingService}
276
+ */
277
+ loadTranslationService() {
278
+ return new this.module.BlockingService({
279
+ cacheSize: Math.max(this.options.cacheSize || 0, 0)
280
+ });
281
+ }
282
+
283
+ /**
284
+ * Returns whether a model has already been loaded in this worker. Marked
285
+ * async because the message passing interface we use expects async methods.
286
+ * @param {{from:string, to:string}}
287
+ * @return boolean
288
+ */
289
+ hasTranslationModel({from,to}) {
290
+ const key = JSON.stringify({from,to});
291
+ return this.models.has(key);
292
+ }
293
+
294
+ /**
295
+ * Loads a translation model from a set of file buffers. After this, the
296
+ * model is available to translate with and `hasTranslationModel()` will
297
+ * return true for this pair.
298
+ * @param {{from:string, to:string}}
299
+ * @param {{
300
+ * model: ArrayBuffer,
301
+ * shortlist: ArrayBuffer,
302
+ * vocabs: ArrayBuffer[],
303
+ * qualityModel: ArrayBuffer?,
304
+ * config?: {
305
+ * [key:string]: string
306
+ * }
307
+ * }} buffers
308
+ */
309
+ loadTranslationModel({from, to}, buffers) {
310
+ // This because service_bindings.cpp:prepareVocabsSmartMemories :(
311
+ const uniqueVocabs = buffers.vocabs.filter((vocab, index, vocabs) => {
312
+ return !vocabs.slice(0, index).includes(vocab);
313
+ });
314
+
315
+ const [modelMemory, shortlistMemory, qualityModel, ...vocabMemory] = [
316
+ this.prepareAlignedMemoryFromBuffer(buffers.model, 256),
317
+ this.prepareAlignedMemoryFromBuffer(buffers.shortlist, 64),
318
+ buffers.qualityModel // optional quality model
319
+ ? this.prepareAlignedMemoryFromBuffer(buffers.qualityModel, 64)
320
+ : null,
321
+ ...uniqueVocabs.map(vocab => this.prepareAlignedMemoryFromBuffer(vocab, 64))
322
+ ];
323
+
324
+ const vocabs = new this.module.AlignedMemoryList();
325
+ vocabMemory.forEach(vocab => vocabs.push_back(vocab));
326
+
327
+ // Defaults
328
+ let modelConfig = YAML.parse(`
329
+ beam-size: 1
330
+ normalize: 1.0
331
+ word-penalty: 0
332
+ cpu-threads: 0
333
+ gemm-precision: int8shiftAlphaAll
334
+ skip-cost: true
335
+ `);
336
+
337
+ if (buffers.config)
338
+ Object.assign(modelConfig, buffers.config);
339
+
340
+ // WASM marian is only compiled with support for shiftedAll.
341
+ if (modelConfig['gemm-precision'] === 'int8')
342
+ modelConfig['gemm-precision'] = 'int8shiftAll';
343
+
344
+ // Override these
345
+ Object.assign(modelConfig, YAML.parse(`
346
+ alignment: soft
347
+ quiet: true
348
+ quiet-translation: true
349
+ max-length-break: 128
350
+ mini-batch-words: 1024
351
+ workspace: 128
352
+ max-length-factor: 2.0
353
+ `));
354
+
355
+ const key = JSON.stringify({from,to});
356
+ this.models.set(key, new this.module.TranslationModel(YAML.stringify(modelConfig), modelMemory, shortlistMemory, vocabs, qualityModel));
357
+ }
358
+
359
+ /**
360
+ * Frees up memory used by old translation model. Does nothing if model is
361
+ * already deleted.
362
+ * @param {{from:string, to:string}}
363
+ */
364
+ freeTranslationModel({from, to}) {
365
+ const key = JSON.stringify({from,to});
366
+
367
+ if (!this.models.has(key))
368
+ return;
369
+
370
+ const model = this.models.get(key);
371
+ this.models.delete(key);
372
+
373
+ model.delete();
374
+ }
375
+
376
+ /**
377
+ * Internal function. Copies the data from an ArrayBuffer into memory that
378
+ * can be used inside the WASM vm by Marian.
379
+ * @param {{ArrayBuffer}} buffer
380
+ * @param {number} alignmentSize
381
+ * @return {BergamotTranslator.AlignedMemory}
382
+ */
383
+ prepareAlignedMemoryFromBuffer(buffer, alignmentSize) {
384
+ const bytes = new Int8Array(buffer);
385
+ const memory = new this.module.AlignedMemory(bytes.byteLength, alignmentSize);
386
+ memory.getByteArrayView().set(bytes);
387
+ return memory;
388
+ }
389
+
390
+ /**
391
+ * Public. Does actual translation work. You have to make sure that the
392
+ * models necessary for translating text are already loaded before calling
393
+ * this method. Returns a promise with translation responses.
394
+ * @param {{models: {from:string, to:string}[], texts: {text: string, html: boolean}[]}}
395
+ * @return {Promise<{target: {text: string}}[]>}
396
+ */
397
+ translate({models, texts}) {
398
+ // Convert texts array into a std::vector<std::string>.
399
+ let input = new this.module.VectorString();
400
+ texts.forEach(({text}) => input.push_back(text));
401
+
402
+ // Extracts the texts[].html options into ResponseOption objects
403
+ let options = new this.module.VectorResponseOptions();
404
+ texts.forEach(({html, qualityScores}) => options.push_back({alignment: false, html, qualityScores}));
405
+
406
+ // Turn our model names into a list of TranslationModel pointers
407
+ const translationModels = models.map(({from,to}) => {
408
+ const key = JSON.stringify({from,to});
409
+ return this.models.get(key);
410
+ });
411
+
412
+ // translate the input, which is a vector<String>; the result is a vector<Response>
413
+ const responses = models.length > 1
414
+ ? this.service.translateViaPivoting(...translationModels, input, options)
415
+ : this.service.translate(...translationModels, input, options);
416
+
417
+ input.delete();
418
+ options.delete();
419
+
420
+ // Convert the Response WASM wrappers into native JavaScript types we
421
+ // can send over the 'wire' (message passing) in the same format as we
422
+ // use in bergamot-translator.
423
+ const translations = texts.map((_, i) => ({
424
+ target: {
425
+ text: responses.get(i).getTranslatedText()
426
+ }
427
+ }));
428
+
429
+ responses.delete();
430
+
431
+ return translations;
432
+ }
433
+ }
434
+
435
+ /**
436
+ * Because you can't put an Error object in a message. But you can post a
437
+ * generic object!
438
+ * @param {Error} error
439
+ * @return {{
440
+ * name: string?,
441
+ * message: string?,
442
+ * stack: string?
443
+ * }}
444
+ */
445
+ function cloneError(error) {
446
+ return {
447
+ name: error.name,
448
+ message: error.message,
449
+ stack: error.stack
450
+ };
451
+ }
452
+
453
+ // (Constructor doesn't really do anything, we need to call `initialize()`
454
+ // first before using it. That happens from outside the worker.)
455
+ const worker = new BergamotTranslatorWorker();
456
+
457
+ self.addEventListener('message', async function({data: {id, name, args}}) {
458
+ if (!id)
459
+ console.error('Received message without id', arguments[0]);
460
+
461
+ try {
462
+ if (typeof worker[name] !== 'function')
463
+ throw TypeError(`worker[${name}] is not a function`);
464
+
465
+ // Using `Promise.resolve` to await any promises that worker[name]
466
+ // possibly returns.
467
+ const result = await Promise.resolve(Reflect.apply(worker[name], worker, args));
468
+ self.postMessage({id, result});
469
+ } catch (error) {
470
+ self.postMessage({
471
+ id,
472
+ error: cloneError(error)
473
+ })
474
+ }
475
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@internetarchive/bookreader",
3
- "version": "5.0.0-96",
3
+ "version": "5.0.0-98",
4
4
  "description": "The Internet Archive BookReader.",
5
5
  "type": "module",
6
6
  "files": [
@@ -31,6 +31,7 @@
31
31
  "homepage": "https://github.com/internetarchive/bookreader#readme",
32
32
  "private": false,
33
33
  "dependencies": {
34
+ "@internetarchive/bergamot-translator": "^0.4.9-ia.1",
34
35
  "@internetarchive/ia-activity-indicator": "^0.0.4",
35
36
  "@internetarchive/ia-item-navigator": "^2.1.2",
36
37
  "@internetarchive/icon-bookmark": "^1.3.4",
@@ -66,7 +67,7 @@
66
67
  "http-server": "14.1.1",
67
68
  "hypothesis": "^1.1627.0",
68
69
  "interactjs": "^1.10.18",
69
- "iso-language-codes": "1.1.0",
70
+ "iso-language-codes": "2.0.0",
70
71
  "jest": "29.7.0",
71
72
  "jest-environment-jsdom": "^29.7.0",
72
73
  "jquery": "3.6.1",
@@ -106,9 +107,11 @@
106
107
  "preversion": "npm run test && node scripts/preversion.js",
107
108
  "version": "node scripts/version.js",
108
109
  "postversion": "node scripts/postversion.js",
109
- "build": "npm run clean && npx concurrently --group npm:build-js npm:build-css npm:build-assets npm:build-hypothesis",
110
+ "build": "npm run clean && npx concurrently --group npm:build-js npm:build-css npm:build-assets npm:build-hypothesis npm:build-bergamot",
110
111
  "build-assets": "npx cpx \"src/assets/**/*\" BookReader && npx svgo -f BookReader/icons && npx svgo -f BookReader/images",
111
112
  "build-assets:watch": "npx cpx --watch --verbose \"src/assets/**/*\" BookReader",
113
+ "build-bergamot": "npx cpx \"node_modules/@internetarchive/bergamot-translator/worker/*\" BookReader/",
114
+ "build-bergamot:watch": "npx cpx --watch \"node_modules/@internetarchive/bergamot-translator/worker/*\" BookReader/",
112
115
  "build-hypothesis": "npx cpx \"node_modules/hypothesis/**/*\" BookReader/hypothesis",
113
116
  "build-hypothesis:watch": "npx cpx --watch --verbose \"node_modules/hypothesis/**/*\" BookReader/hypothesis",
114
117
  "build-js": "npx webpack",
@@ -86,6 +86,7 @@ export class BookNavigator extends LitElement {
86
86
  'volumes',
87
87
  'chapters',
88
88
  'search',
89
+ 'translate',
89
90
  'bookmarks',
90
91
  'downloads',
91
92
  'visualAdjustments',
@@ -283,7 +283,12 @@ export class Mode1UpLit extends LitElement {
283
283
  }).$container[0];
284
284
 
285
285
  pageContainerEl.style.transform = transform;
286
- pageContainerEl.classList.toggle('BRpage-visible', this.visiblePages.includes(page));
286
+ // Prevent trigger pageVisible when scrolling outside of BookReader
287
+ const wasVisible = pageContainerEl.classList.contains('BRpage-visible');
288
+ const visibleStatus = pageContainerEl.classList.toggle('BRpage-visible', this.visiblePages.includes(page));
289
+ if (visibleStatus && !wasVisible) {
290
+ this.br.trigger('pageVisible', { pageContainerEl });
291
+ }
287
292
  return pageContainerEl;
288
293
  }
289
294
 
@@ -337,7 +337,12 @@ export class Mode2UpLit extends LitElement {
337
337
  reduce: page.width / wToV(this.computePageWidth(page)),
338
338
  }).$container[0];
339
339
 
340
- pageContainerEl.classList.toggle('BRpage-visible', isVisible);
340
+ // Should keep for initial render of the page
341
+ const wasVisible = pageContainerEl.classList.contains('BRpage-visible');
342
+ const visibleStatus = pageContainerEl.classList.toggle('BRpage-visible', isVisible);
343
+ if (visibleStatus && !wasVisible) {
344
+ this.br.trigger('pageVisible', { pageContainerEl });
345
+ }
341
346
  return pageContainerEl;
342
347
  }
343
348
 
@@ -622,6 +627,11 @@ export class Mode2UpLit extends LitElement {
622
627
  this.visiblePages = (
623
628
  progression == 'lr' ? [nextSpread.left, nextSpread.right] : [nextSpread.right, nextSpread.left]
624
629
  ).filter(x => x);
630
+ nextPageContainers.forEach(c => {
631
+ this.br.trigger('pageVisible', {
632
+ pageContainerEl: c.$container[0],
633
+ });
634
+ });
625
635
  }
626
636
 
627
637
  /**