shamela 1.3.1 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +362 -406
- package/dist/index.d.ts +34 -27
- package/dist/index.js +9 -7
- package/dist/index.js.map +1 -1
- package/package.json +11 -11
package/dist/index.d.ts
CHANGED
|
@@ -398,45 +398,52 @@ declare const configure: (config: ConfigureOptions) => void;
|
|
|
398
398
|
* Clears runtime configuration overrides and restores the default logger.
|
|
399
399
|
*/
|
|
400
400
|
declare const resetConfig: () => void;
|
|
401
|
-
/**
|
|
402
|
-
* Creates a default configuration for Node.js environments.
|
|
403
|
-
* Automatically sets the correct sqlJsWasmUrl path for bundled environments.
|
|
404
|
-
*
|
|
405
|
-
* This helper is optional - the library will auto-detect the WASM file location
|
|
406
|
-
* in most cases. Use this if you want explicit control or are experiencing issues.
|
|
407
|
-
*
|
|
408
|
-
* @param config - Your API configuration
|
|
409
|
-
* @returns Complete configuration with sqlJsWasmUrl set for Node.js
|
|
410
|
-
*
|
|
411
|
-
* @example
|
|
412
|
-
* ```typescript
|
|
413
|
-
* import { configure, createNodeConfig } from 'shamela';
|
|
414
|
-
*
|
|
415
|
-
* configure(createNodeConfig({
|
|
416
|
-
* apiKey: process.env.SHAMELA_API_KEY,
|
|
417
|
-
* booksEndpoint: process.env.SHAMELA_BOOKS_ENDPOINT,
|
|
418
|
-
* masterPatchEndpoint: process.env.SHAMELA_MASTER_ENDPOINT,
|
|
419
|
-
* }));
|
|
420
|
-
* ```
|
|
421
|
-
*/
|
|
422
|
-
declare const createNodeConfig: (config: Omit<ShamelaConfig, "sqlJsWasmUrl">) => ShamelaConfig;
|
|
423
401
|
//#endregion
|
|
424
402
|
//#region src/content.d.ts
|
|
425
403
|
type Line = {
|
|
426
404
|
id?: string;
|
|
427
405
|
text: string;
|
|
428
406
|
};
|
|
407
|
+
/**
|
|
408
|
+
* Parses Shamela HTML content into structured lines while preserving headings.
|
|
409
|
+
*
|
|
410
|
+
* @param content - The raw HTML markup representing a page
|
|
411
|
+
* @returns An array of {@link Line} objects containing text and optional IDs
|
|
412
|
+
*/
|
|
429
413
|
declare const parseContentRobust: (content: string) => Line[];
|
|
430
414
|
/**
|
|
431
|
-
*
|
|
432
|
-
*
|
|
433
|
-
* @param
|
|
434
|
-
* @
|
|
415
|
+
* Sanitises page content by applying regex replacement rules.
|
|
416
|
+
*
|
|
417
|
+
* @param text - The text to clean
|
|
418
|
+
* @param rules - Optional custom replacements, defaults to {@link DEFAULT_SANITIZATION_RULES}
|
|
419
|
+
* @returns The sanitised content
|
|
435
420
|
*/
|
|
436
421
|
declare const sanitizePageContent: (text: string, rules?: Record<string, string>) => string;
|
|
422
|
+
/**
|
|
423
|
+
* Splits a page body from its trailing footnotes using a marker string.
|
|
424
|
+
*
|
|
425
|
+
* @param content - Combined body and footnote text
|
|
426
|
+
* @param footnoteMarker - Marker indicating the start of footnotes
|
|
427
|
+
* @returns A tuple containing the page body followed by the footnote section
|
|
428
|
+
*/
|
|
437
429
|
declare const splitPageBodyFromFooter: (content: string, footnoteMarker?: string) => readonly [string, string];
|
|
430
|
+
/**
|
|
431
|
+
* Removes Arabic numeral page markers enclosed in turtle ⦗ ⦘ brackets.
|
|
432
|
+
* Replaces the marker along with up to two preceding whitespace characters
|
|
433
|
+
* (space or carriage return) and up to one following whitespace character
|
|
434
|
+
* with a single space.
|
|
435
|
+
*
|
|
436
|
+
* @param text - Text potentially containing page markers
|
|
437
|
+
* @returns The text with numeric markers replaced by a single space
|
|
438
|
+
*/
|
|
438
439
|
declare const removeArabicNumericPageMarkers: (text: string) => string;
|
|
440
|
+
/**
|
|
441
|
+
* Removes anchor and hadeeth tags from the content while preserving spans.
|
|
442
|
+
*
|
|
443
|
+
* @param content - HTML string containing various tags
|
|
444
|
+
* @returns The content with only span tags retained
|
|
445
|
+
*/
|
|
439
446
|
declare const removeTagsExceptSpan: (content: string) => string;
|
|
440
447
|
//#endregion
|
|
441
|
-
export { Author, Book, BookData, Category, type ConfigureOptions, DownloadBookOptions, DownloadMasterOptions, GetBookMetadataOptions, GetBookMetadataResponsePayload, GetMasterMetadataResponsePayload, Line, type Logger, MasterData, OutputOptions, Page, ShamelaConfig, ShamelaConfigKey, Title, configure,
|
|
448
|
+
export { Author, Book, BookData, Category, type ConfigureOptions, DownloadBookOptions, DownloadMasterOptions, GetBookMetadataOptions, GetBookMetadataResponsePayload, GetMasterMetadataResponsePayload, Line, type Logger, MasterData, OutputOptions, Page, ShamelaConfig, ShamelaConfigKey, Title, configure, downloadBook, downloadMasterDatabase, getBook, getBookMetadata, getCoverUrl, getMaster, getMasterMetadata, parseContentRobust, removeArabicNumericPageMarkers, removeTagsExceptSpan, resetConfig, sanitizePageContent, splitPageBodyFromFooter };
|
|
442
449
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import e from"sql.js";import{unzipSync as t}from"fflate";var n=(e=>typeof require<`u`?require:typeof Proxy<`u`?new Proxy(e,{get:(e,t)=>(typeof require<`u`?require:e)[t]}):e)(function(e){if(typeof require<`u`)return require.apply(this,arguments);throw Error('Calling `require` for "'+e+"\" in an environment that doesn't expose the `require` function.")});const r=Object.freeze({debug:()=>{},error:()=>{},info:()=>{},warn:()=>{}});let i=r;const a=e=>{if(!e){i=r;return}let t=[`debug`,`error`,`info`,`warn`].find(t=>typeof e[t]!=`function`);if(t)throw Error(`Logger must implement debug, error, info, and warn methods. Missing: ${String(t)}`);i=e},o=()=>i,s=()=>{i=r};var c=new Proxy({},{get:(e,t)=>{let n=o(),r=n[t];return typeof r==`function`?(...e)=>r.apply(n,e):r}});let l={};const u={apiKey:`SHAMELA_API_KEY`,booksEndpoint:`SHAMELA_API_BOOKS_ENDPOINT`,masterPatchEndpoint:`SHAMELA_API_MASTER_PATCH_ENDPOINT`,sqlJsWasmUrl:`SHAMELA_SQLJS_WASM_URL`},ee=typeof process<`u`&&!!process?.env,d=e=>{let t=l[e];if(t!==void 0)return t;let n=u[e];if(ee)return process.env[n]},te=e=>{let{logger:t,...n}=e;`logger`in e&&a(t),l={...l,...n}},f=e=>e===`fetchImplementation`?l.fetchImplementation:d(e),p=()=>({apiKey:d(`apiKey`),booksEndpoint:d(`booksEndpoint`),fetchImplementation:l.fetchImplementation,masterPatchEndpoint:d(`masterPatchEndpoint`),sqlJsWasmUrl:d(`sqlJsWasmUrl`)}),m=e=>{if(e===`fetchImplementation`)throw Error(`fetchImplementation must be provided via configure().`);let t=f(e);if(!t)throw Error(`${u[e]} environment variable not set`);return t},ne=()=>{l={},s()}
|
|
1
|
+
import e from"sql.js";import{unzipSync as t}from"fflate";var n=(e=>typeof require<`u`?require:typeof Proxy<`u`?new Proxy(e,{get:(e,t)=>(typeof require<`u`?require:e)[t]}):e)(function(e){if(typeof require<`u`)return require.apply(this,arguments);throw Error('Calling `require` for "'+e+"\" in an environment that doesn't expose the `require` function.")});const r=Object.freeze({debug:()=>{},error:()=>{},info:()=>{},warn:()=>{}});let i=r;const a=e=>{if(!e){i=r;return}let t=[`debug`,`error`,`info`,`warn`].find(t=>typeof e[t]!=`function`);if(t)throw Error(`Logger must implement debug, error, info, and warn methods. Missing: ${String(t)}`);i=e},o=()=>i,s=()=>{i=r};var c=new Proxy({},{get:(e,t)=>{let n=o(),r=n[t];return typeof r==`function`?(...e)=>r.apply(n,e):r}});let l={};const u={apiKey:`SHAMELA_API_KEY`,booksEndpoint:`SHAMELA_API_BOOKS_ENDPOINT`,masterPatchEndpoint:`SHAMELA_API_MASTER_PATCH_ENDPOINT`,sqlJsWasmUrl:`SHAMELA_SQLJS_WASM_URL`},ee=typeof process<`u`&&!!process?.env,d=e=>{let t=l[e];if(t!==void 0)return t;let n=u[e];if(ee)return process.env[n]},te=e=>{let{logger:t,...n}=e;`logger`in e&&a(t),l={...l,...n}},f=e=>e===`fetchImplementation`?l.fetchImplementation:d(e),p=()=>({apiKey:d(`apiKey`),booksEndpoint:d(`booksEndpoint`),fetchImplementation:l.fetchImplementation,masterPatchEndpoint:d(`masterPatchEndpoint`),sqlJsWasmUrl:d(`sqlJsWasmUrl`)}),m=e=>{if(e===`fetchImplementation`)throw Error(`fetchImplementation must be provided via configure().`);let t=f(e);if(!t)throw Error(`${u[e]} environment variable not set`);return t},ne=()=>{l={},s()};let h=function(e){return e.Authors=`author`,e.Books=`book`,e.Categories=`category`,e.Page=`page`,e.Title=`title`,e}({});const g=(e,t)=>e.query(`PRAGMA table_info(${t})`).all(),_=(e,t)=>!!e.query(`SELECT name FROM sqlite_master WHERE type='table' AND name = ?1`).get(t),v=(e,t)=>_(e,t)?e.query(`SELECT * FROM ${t}`).all():[],y=e=>String(e.is_deleted)===`1`,b=(e,t,n)=>{let r={};for(let i of n){if(i===`id`){r.id=(t??e)?.id??null;continue}if(t&&i in t){let e=t[i];if(e!==`#`&&e!=null){r[i]=e;continue}}if(e&&i in e){r[i]=e[i];continue}r[i]=null}return r},re=(e,t,n)=>{let r=new Set,i=new Map;for(let t of e)r.add(String(t.id));for(let e of t)i.set(String(e.id),e);let a=[];for(let t of e){let e=i.get(String(t.id));e&&y(e)||a.push(b(t,e,n))}for(let e of t){let t=String(e.id);r.has(t)||y(e)||a.push(b(void 0,e,n))}return a},ie=(e,t,n,r)=>{if(r.length===0)return;let i=n.map(()=>`?`).join(`,`),a=e.prepare(`INSERT INTO ${t} (${n.join(`,`)}) VALUES (${i})`);r.forEach(e=>{let t=n.map(t=>t in e?e[t]:null);a.run(...t)}),a.finalize()},ae=(e,t,n)=>{let r=t.query(`SELECT sql FROM sqlite_master WHERE type='table' AND name = ?1`).get(n);return r?.sql?(e.run(`DROP TABLE IF EXISTS ${n}`),e.run(r.sql),!0):(c.warn(`${n} table definition missing in source database`),!1)},x=(e,t,n,r)=>{if(!_(t,r)){c.warn(`${r} table missing in source database`);return}if(!ae(e,t,r))return;let i=g(t,r),a=n&&_(n,r)?g(n,r):[],o=i.map(e=>e.name);for(let t of a)if(!o.includes(t.name)){let n=t.type&&t.type.length>0?t.type:`TEXT`;e.run(`ALTER TABLE ${r} ADD COLUMN ${t.name} ${n}`),o.push(t.name)}ie(e,r,o,re(v(t,r),n?v(n,r):[],o))},oe=(e,t,n)=>{e.transaction(()=>{x(e,t,n,h.Page),x(e,t,n,h.Title)})()},se=(e,t)=>{e.transaction(()=>{x(e,t,null,h.Page),x(e,t,null,h.Title)})()},ce=e=>{e.run(`CREATE TABLE ${h.Page} (
|
|
2
2
|
id INTEGER,
|
|
3
3
|
content TEXT,
|
|
4
4
|
part TEXT,
|
|
@@ -12,8 +12,8 @@ import e from"sql.js";import{unzipSync as t}from"fflate";var n=(e=>typeof requir
|
|
|
12
12
|
page INTEGER,
|
|
13
13
|
parent INTEGER,
|
|
14
14
|
is_deleted TEXT
|
|
15
|
-
)`)},
|
|
16
|
-
`);throw Error(e)}}else O=`https://cdn.jsdelivr.net/npm/sql.js@1.13.0/dist/sql-wasm.wasm`}return O},k=()=>(D||=e({locateFile:()=>
|
|
15
|
+
)`)},S=e=>e.query(`SELECT * FROM ${h.Page}`).all(),C=e=>e.query(`SELECT * FROM ${h.Title}`).all(),w=e=>({pages:S(e),titles:C(e)}),T=e=>{try{return n(`node:fs`).existsSync(e)}catch{return!1}},le=()=>{if(n!==void 0&&n.resolve!==void 0)try{let e=n.resolve(`sql.js`),t=n(`node:path`),r=t.dirname(e),i=t.join(r,`dist`,`sql-wasm.wasm`);if(T(i))return i}catch{}if(typeof process<`u`&&process.cwd)try{let e=n(`node:path`),t=process.cwd(),r=[e.join(t,`node_modules`,`sql.js`,`dist`,`sql-wasm.wasm`),e.join(t,`..`,`node_modules`,`sql.js`,`dist`,`sql-wasm.wasm`),e.join(t,`../..`,`node_modules`,`sql.js`,`dist`,`sql-wasm.wasm`),e.join(t,`.next`,`server`,`node_modules`,`sql.js`,`dist`,`sql-wasm.wasm`)];for(let e of r)if(T(e))return e}catch{}if(n!==void 0&&n.resolve!==void 0&&n.resolve.paths)try{let e=n(`node:path`),t=n.resolve.paths(`sql.js`)||[];for(let n of t){let t=e.join(n,`sql.js`,`dist`,`sql-wasm.wasm`);if(T(t))return t}}catch{}try{if(import.meta.url){let e=new URL(`../../node_modules/sql.js/dist/sql-wasm.wasm`,import.meta.url),t=decodeURIComponent(e.pathname),n=process.platform===`win32`&&t.startsWith(`/`)?t.slice(1):t;if(T(n))return n}}catch{}return null};var ue=class{constructor(e){this.statement=e}run=(...e)=>{e.length>0&&this.statement.bind(e),this.statement.step(),this.statement.reset()};finalize=()=>{this.statement.free()}},E=class{constructor(e){this.db=e}run=(e,t=[])=>{this.db.run(e,t)};prepare=e=>new ue(this.db.prepare(e));query=e=>({all:(...t)=>this.all(e,t),get:(...t)=>this.get(e,t)});transaction=e=>()=>{this.db.run(`BEGIN TRANSACTION`);try{e(),this.db.run(`COMMIT`)}catch(e){throw this.db.run(`ROLLBACK`),e}};close=()=>{this.db.close()};export=()=>this.db.export();all=(e,t)=>{let n=this.db.prepare(e);try{t.length>0&&n.bind(t);let e=[];for(;n.step();)e.push(n.getAsObject());return e}finally{n.free()}};get=(e,t)=>this.all(e,t)[0]};let D=null,O=null;const de=typeof process<`u`&&!!process?.versions?.node,fe=()=>{if(!O){let e=f(`sqlJsWasmUrl`);if(e)O=e;else if(de){let e=le();if(e)O=e;else{let e=[`Unable to automatically locate sql-wasm.wasm file.`,`This can happen in bundled environments (Next.js, webpack, etc.).`,``,`Quick fix - add this to your code before using shamela:`,``,` import { configure, createNodeConfig } from "shamela";`,` configure(createNodeConfig({`,` apiKey: process.env.SHAMELA_API_KEY,`,` booksEndpoint: process.env.SHAMELA_BOOKS_ENDPOINT,`,` masterPatchEndpoint: process.env.SHAMELA_MASTER_ENDPOINT,`,` }));`,``,`Or manually specify the path:`,``,` import { configure } from "shamela";`,` import { join } from "node:path";`,` configure({`,` sqlJsWasmUrl: join(process.cwd(), "node_modules", "sql.js", "dist", "sql-wasm.wasm")`,` });`].join(`
|
|
16
|
+
`);throw Error(e)}}else O=`https://cdn.jsdelivr.net/npm/sql.js@1.13.0/dist/sql-wasm.wasm`}return O},k=()=>(D||=e({locateFile:()=>fe()}),D),A=async()=>new E(new(await(k())).Database),j=async e=>new E(new(await(k())).Database(e)),pe=(e,t,n)=>{let r=t.query(`SELECT sql FROM sqlite_master WHERE type='table' AND name = ?1`).get(n);if(!r?.sql)throw Error(`Missing table definition for ${n} in source database`);e.run(`DROP TABLE IF EXISTS ${n}`),e.run(r.sql)},me=async(e,t)=>{let n={author:h.Authors,book:h.Books,category:h.Categories},r={};for(let e of t){let t=n[(e.name.split(`/`).pop()?.split(`\\`).pop()??e.name).replace(/\.(sqlite|db)$/i,``).toLowerCase()];t&&(r[t]=await j(e.data))}try{let t=Object.entries(r);e.transaction(()=>{for(let[n,r]of t){pe(e,r,n);let t=r.query(`PRAGMA table_info(${n})`).all().map(e=>e.name);if(t.length===0)continue;let i=r.query(`SELECT * FROM ${n}`).all();if(i.length===0)continue;let a=t.map(()=>`?`).join(`,`),o=t.map(e=>e===`order`?`"order"`:e),s=e.prepare(`INSERT INTO ${n} (${o.join(`,`)}) VALUES (${a})`);try{for(let e of i){let n=t.map(t=>t in e?e[t]:null);s.run(...n)}}finally{s.finalize()}}})()}finally{Object.values(r).forEach(e=>e?.close())}},M=(e,t,n)=>{e.run(`DROP VIEW IF EXISTS ${t}`),e.run(`CREATE VIEW ${t} AS SELECT * FROM ${n}`)},he=e=>{e.run(`CREATE TABLE ${h.Authors} (
|
|
17
17
|
id INTEGER,
|
|
18
18
|
is_deleted TEXT,
|
|
19
19
|
name TEXT,
|
|
@@ -40,9 +40,11 @@ import e from"sql.js";import{unzipSync as t}from"fflate";var n=(e=>typeof requir
|
|
|
40
40
|
is_deleted TEXT,
|
|
41
41
|
"order" TEXT,
|
|
42
42
|
name TEXT
|
|
43
|
-
)`),M(e,`authors`,h.Authors),M(e,`books`,h.Books),M(e,`categories`,h.Categories)},
|
|
43
|
+
)`),M(e,`authors`,h.Authors),M(e,`books`,h.Books),M(e,`categories`,h.Categories)},ge=e=>e.query(`SELECT * FROM ${h.Authors}`).all(),_e=e=>e.query(`SELECT * FROM ${h.Books}`).all(),ve=e=>e.query(`SELECT * FROM ${h.Categories}`).all(),N=(e,t)=>({authors:ge(e),books:_e(e),categories:ve(e),version:t}),P=(e,t=[`api_key`,`token`,`password`,`secret`,`auth`])=>{let n=typeof e==`string`?new URL(e):new URL(e.toString());return t.forEach(e=>{let t=n.searchParams.get(e);if(t&&t.length>6){let r=`${t.slice(0,3)}***${t.slice(-3)}`;n.searchParams.set(e,r)}else t&&n.searchParams.set(e,`***`)}),n.toString()},F=e=>({content:e.content,id:e.id,...e.number&&{number:e.number},...e.page&&{page:Number(e.page)},...e.part&&{part:e.part}}),ye=e=>{let t=Number(e.parent);return{content:e.content,id:e.id,page:Number(e.page),...t&&{parent:t}}},I={"<img[^>]*>>":``,舄:``,"﵀":`رَحِمَهُ ٱللَّٰهُ`,"﵁":`رضي الله عنه`,"﵂":`رَضِيَ ٱللَّٰهُ عَنْهَا`,"﵃":`رَضِيَ اللَّهُ عَنْهُمْ`,"﵄":`رَضِيَ ٱللَّٰهُ عَنْهُمَا`,"﵅":`رَضِيَ اللَّهُ عَنْهُنَّ`,"﵇":`عَلَيْهِ ٱلسَّلَٰمُ`,"﵈":`عَلَيْهِمُ السَّلامُ`,"﵌":`صلى الله عليه وآله وسلم`,"﵎":`تبارك وتعالى`,"﵏":`رَحِمَهُمُ ٱللَّٰهُ`,"﷽":``,"﷿":`عَزَّ وَجَلَّ`},L=e=>{let t=new URL(e);return t.protocol=`https`,t.toString()},R=e=>/\.(sqlite|db)$/i.test(e.name),z=e=>e.find(R),B=e=>{let t=/\.([^.]+)$/.exec(e);return t?`.${t[1].toLowerCase()}`:``},V=(e,t,n=!0)=>{let r=new URL(e),i=new URLSearchParams;return Object.entries(t).forEach(([e,t])=>{i.append(e,t.toString())}),n&&i.append(`api_key`,m(`apiKey`)),r.search=i.toString(),r},H=async(e,t={})=>{let n=typeof e==`string`?e:e.toString(),r=await(t.fetchImpl??p().fetchImplementation??fetch)(n);if(!r.ok)throw Error(`Error making request: ${r.status} ${r.statusText}`);if((r.headers.get(`content-type`)??``).includes(`application/json`))return await r.json();let i=await r.arrayBuffer();return new Uint8Array(i)},be=typeof process<`u`&&!!process?.versions?.node,xe=async()=>{if(!be)throw Error(`File system operations are only supported in Node.js environments`);return import(`node:fs/promises`)},Se=async e=>{let[t,n]=await Promise.all([xe(),import(`node:path`)]),r=n.dirname(e);return await t.mkdir(r,{recursive:!0}),t},U=async e=>{let n=await H(e),r=n instanceof Uint8Array?n.length:n&&typeof n.byteLength==`number`?n.byteLength:0;return c.debug(`unzipFromUrl:bytes`,r),new Promise((e,r)=>{let i=n instanceof Uint8Array?n:new Uint8Array(n);try{let n=t(i),r=Object.entries(n).map(([e,t])=>({data:t,name:e}));c.debug(`unzipFromUrl:entries`,r.map(e=>e.name)),e(r)}catch(e){r(Error(`Error processing URL: ${e.message}`))}})},W=async(e,t)=>{if(e.writer){await e.writer(t);return}if(!e.path)throw Error(`Output options must include either a writer or a path`);let n=await Se(e.path);typeof t==`string`?await n.writeFile(e.path,t,`utf-8`):await n.writeFile(e.path,t)},Ce=[`author.sqlite`,`book.sqlite`,`category.sqlite`],G=()=>{let{apiKey:e,booksEndpoint:t,masterPatchEndpoint:n}=p(),r=[[`apiKey`,e],[`booksEndpoint`,t],[`masterPatchEndpoint`,n]].filter(([,e])=>!e).map(([e])=>e);if(r.length)throw Error(`${r.join(`, `)} environment variables not set`)},we=e=>{let t=new Set(e.map(e=>e.match(/[^\\/]+$/)?.[0]??e).map(e=>e.toLowerCase()));return Ce.every(e=>t.has(e.toLowerCase()))},K=async(e,t)=>{c.info(`Setting up book database for ${e}`);let n=t||await J(e),r=n.minorReleaseUrl?U(n.minorReleaseUrl):Promise.resolve([]),[i,a]=await Promise.all([U(n.majorReleaseUrl),r]),o=z(i);if(!o)throw Error(`Unable to locate book database in archive`);let s=await A();try{c.info(`Creating tables`),ce(s);let e=await j(o.data);try{let t=z(a);if(t){c.info(`Applying patches from ${t.name} to ${o.name}`);let n=await j(t.data);try{oe(s,e,n)}finally{n.close()}}else c.info(`Copying table data from ${o.name}`),se(s,e)}finally{e.close()}return{cleanup:async()=>{s.close()},client:s}}catch(e){throw s.close(),e}},q=async e=>{c.info(`Setting up master database`);let t=e||await Y(0);c.info(`Downloading master database ${t.version} from: ${P(t.url)}`);let n=await U(L(t.url));if(c.debug?.(`sourceTables downloaded: ${n.map(e=>e.name).toString()}`),!we(n.map(e=>e.name)))throw c.error(`Some source tables were not found: ${n.map(e=>e.name).toString()}`),Error(`Expected tables not found!`);let r=await A();try{return c.info(`Creating master tables`),he(r),c.info(`Copying data to master table`),await me(r,n.filter(R)),{cleanup:async()=>{r.close()},client:r,version:t.version}}catch(e){throw r.close(),e}},J=async(e,t)=>{G();let n=V(`${m(`booksEndpoint`)}/${e}`,{major_release:(t?.majorVersion||0).toString(),minor_release:(t?.minorVersion||0).toString()});c.info(`Fetching shamela.ws book link: ${P(n)}`);try{let e=await H(n);return{majorRelease:e.major_release,majorReleaseUrl:L(e.major_release_url),...e.minor_release_url&&{minorReleaseUrl:L(e.minor_release_url)},...e.minor_release_url&&{minorRelease:e.minor_release}}}catch(e){throw Error(`Error fetching book metadata: ${e.message}`)}},Te=async(e,t)=>{if(c.info(`downloadBook ${e} ${JSON.stringify(t)}`),!t.outputFile.path)throw Error(`outputFile.path must be provided to determine output format`);let n=B(t.outputFile.path).toLowerCase(),{client:r,cleanup:i}=await K(e,t?.bookMetadata);try{if(n===`.json`){let e=await w(r);await W(t.outputFile,JSON.stringify(e,null,2))}else if(n===`.db`||n===`.sqlite`){let e=r.export();await W(t.outputFile,e)}else throw Error(`Unsupported output extension: ${n}`)}finally{await i()}return t.outputFile.path},Y=async(e=0)=>{G();let t=V(m(`masterPatchEndpoint`),{version:e.toString()});c.info(`Fetching shamela.ws master database patch link: ${P(t)}`);try{let e=await H(t);return{url:e.patch_url,version:e.version}}catch(e){throw Error(`Error fetching master patch: ${e.message}`)}},Ee=e=>{let t=m(`masterPatchEndpoint`),{origin:n}=new URL(t);return`${n}/covers/${e}.jpg`},De=async e=>{if(c.info(`downloadMasterDatabase ${JSON.stringify(e)}`),!e.outputFile.path)throw Error(`outputFile.path must be provided to determine output format`);let t=B(e.outputFile.path),{client:n,cleanup:r,version:i}=await q(e.masterMetadata);try{if(t===`.json`){let t=N(n,i);await W(e.outputFile,JSON.stringify(t,null,2))}else if(t===`.db`||t===`.sqlite`)await W(e.outputFile,n.export());else throw Error(`Unsupported output extension: ${t}`)}finally{await r()}return e.outputFile.path},Oe=async e=>{c.info(`getBook ${e}`);let{client:t,cleanup:n}=await K(e);try{let e=await w(t);return{pages:e.pages.map(F),titles:e.titles.map(ye)}}finally{await n()}},ke=async()=>{c.info(`getMaster`);let{client:e,cleanup:t,version:n}=await q();try{return N(e,n)}finally{await t()}},Ae=/^[)\]\u00BB"”'’.,?!:\u061B\u060C\u061F\u06D4\u2026]+$/,X=e=>{let t=[];for(let n of e){let e=t[t.length-1];e&&Ae.test(n.text)?e.text+=n.text:t.push(n)}return t},je=e=>e.replace(/\r\n/g,`
|
|
44
44
|
`).replace(/\r/g,`
|
|
45
|
-
`)
|
|
46
|
-
`)),t.split(`
|
|
47
|
-
`)
|
|
45
|
+
`).split(`
|
|
46
|
+
`).map(e=>e.trim()).filter(Boolean),Me=e=>je(e).map(e=>({text:e})),Z=(e,t)=>{let n=RegExp(`${t}\\s*=\\s*("([^"]*)"|'([^']*)'|([^s>]+))`,`i`),r=e.match(n);if(r)return r[2]??r[3]??r[4]},Ne=e=>{let t=[],n=/<[^>]+>/g,r=0,i;for(i=n.exec(e);i;){i.index>r&&t.push({type:`text`,value:e.slice(r,i.index)});let a=i[0],o=/^<\//.test(a),s=a.match(/^<\/?\s*([a-zA-Z0-9:-]+)/),c=s?s[1].toLowerCase():``;if(o)t.push({name:c,type:`end`});else{let e={};e.id=Z(a,`id`),e[`data-type`]=Z(a,`data-type`),t.push({attributes:e,name:c,type:`start`})}r=n.lastIndex,i=n.exec(e)}return r<e.length&&t.push({type:`text`,value:e.slice(r)}),t},Q=(e,t)=>{let n=e.trim();return n?t?{id:t,text:n}:{text:n}:null},Pe=e=>{for(let t=e.length-1;t>=0;t--){let n=e[t];if(n.isTitle&&n.id)return n.id}},Fe=(e,t)=>{if(!e)return;let n=e.split(`
|
|
47
|
+
`);for(let e=0;e<n.length;e++){if(e>0){let e=Q(t.currentText,t.currentId);e&&t.result.push(e),t.currentText=``,t.currentId=Pe(t.spanStack)||void 0}n[e]&&(t.currentText+=n[e])}},Ie=(e,t)=>{let n=e.attributes[`data-type`]===`title`,r;n&&(r=(e.attributes.id??``).replace(/^toc-/,``)),t.spanStack.push({id:r,isTitle:n}),n&&r&&!t.currentId&&(t.currentId=r)},Le=e=>{if(e=e.replace(/\r\n/g,`
|
|
48
|
+
`).replace(/\r/g,`
|
|
49
|
+
`),!/<span[^>]*>/i.test(e))return X(Me(e));let t=Ne(`<root>${e}</root>`),n={currentId:void 0,currentText:``,result:[],spanStack:[]};for(let e of t)e.type===`text`?Fe(e.value,n):e.type===`start`&&e.name===`span`?Ie(e,n):e.type===`end`&&e.name===`span`&&n.spanStack.pop();let r=Q(n.currentText,n.currentId);return r&&n.result.push(r),X(n.result).filter(e=>e.text.length>0)},$=Object.entries(I).map(([e,t])=>({regex:new RegExp(e,`g`),replacement:t})),Re=e=>{if(e===I)return $;let t=[];for(let n in e)t.push({regex:new RegExp(n,`g`),replacement:e[n]});return t},ze=(e,t=I)=>{let n=Re(t),r=e;for(let e=0;e<n.length;e++){let{regex:t,replacement:i}=n[e];r=r.replace(t,i)}return r},Be=(e,t=`_________`)=>{let n=``,r=e.lastIndexOf(t);return r>=0&&(n=e.slice(r+t.length),e=e.slice(0,r)),[e,n]},Ve=e=>e.replace(/(?: |\r){0,2}⦗[\u0660-\u0669]+⦘(?: |\r)?/g,` `),He=e=>(e=e.replace(/<a[^>]*>(.*?)<\/a>/gs,`$1`),e=e.replace(/<hadeeth[^>]*>|<\/hadeeth>|<hadeeth-\d+>/gs,``),e);export{te as configure,Te as downloadBook,De as downloadMasterDatabase,Oe as getBook,J as getBookMetadata,Ee as getCoverUrl,ke as getMaster,Y as getMasterMetadata,Le as parseContentRobust,Ve as removeArabicNumericPageMarkers,He as removeTagsExceptSpan,ne as resetConfig,ze as sanitizePageContent,Be as splitPageBodyFromFooter};
|
|
48
50
|
//# sourceMappingURL=index.js.map
|