@abfcode/spine 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/dist/index.d.ts +225 -0
- package/dist/index.js +1397 -0
- package/package.json +36 -0
package/README.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# @abfcode/spine
|
|
2
|
+
|
|
3
|
+
An in-browser EPUB parser — a TypeScript port of [Spine](https://github.com/ABFCode/Spine) (Go).
|
|
4
|
+
|
|
5
|
+
Runs on `fflate` + the platform `DOMParser` (native in browsers; provide one via
|
|
6
|
+
`globalThis.DOMParser` in Node, e.g. from `jsdom`). Produces structured content
|
|
7
|
+
blocks, a normalized TOC, metadata, chunks, and anchor resolution — validated
|
|
8
|
+
exact/near-exact against the Go implementation on a corpus of real books.
|
|
9
|
+
|
|
10
|
+
## Usage
|
|
11
|
+
|
|
12
|
+
```ts
|
|
13
|
+
import { parse } from '@abfcode/spine'
|
|
14
|
+
|
|
15
|
+
const book = parse(epubBytes) // epubBytes: Uint8Array
|
|
16
|
+
book.metadata.title
|
|
17
|
+
book.toc // normalized table of contents
|
|
18
|
+
book.blocks(0) // structured blocks for spine item 0
|
|
19
|
+
book.chunks({ mode: 'size', maxChars: 2000 })
|
|
20
|
+
book.resolveAnchor('OEBPS/ch1.xhtml#frag')
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
In Node, set a DOMParser first:
|
|
24
|
+
|
|
25
|
+
```ts
|
|
26
|
+
import { JSDOM } from 'jsdom'
|
|
27
|
+
globalThis.DOMParser = new JSDOM('').window.DOMParser
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Scripts
|
|
31
|
+
|
|
32
|
+
- `npm run build` — emit `dist/` (JS + `.d.ts`)
|
|
33
|
+
- `npm test` — run the vitest suite (jsdom)
|
|
34
|
+
|
|
35
|
+
## Status
|
|
36
|
+
|
|
37
|
+
Extracted from the Librium reader. API may still change before 1.0.
|
|
38
|
+
|
|
39
|
+
## License
|
|
40
|
+
|
|
41
|
+
Apache-2.0
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
/** In-memory EPUB archive with clean-path lookup (mirrors Go's indexZipFiles). */
|
|
2
|
+
declare class Archive {
|
|
3
|
+
private byClean;
|
|
4
|
+
readonly names: string[];
|
|
5
|
+
constructor(bytes: Uint8Array);
|
|
6
|
+
private key;
|
|
7
|
+
has(p: string): boolean;
|
|
8
|
+
bytes(p: string): Uint8Array | undefined;
|
|
9
|
+
text(p: string): string | undefined;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
interface Metadata {
|
|
13
|
+
title: string;
|
|
14
|
+
titles: MetaValue[];
|
|
15
|
+
authors: string[];
|
|
16
|
+
creators: MetaValue[];
|
|
17
|
+
contributors: MetaValue[];
|
|
18
|
+
language: string;
|
|
19
|
+
languages: string[];
|
|
20
|
+
identifiers: Identifier[];
|
|
21
|
+
publisher: string;
|
|
22
|
+
publishers: MetaValue[];
|
|
23
|
+
pubDate: string;
|
|
24
|
+
dates: MetaValue[];
|
|
25
|
+
series: string;
|
|
26
|
+
seriesIndex: string;
|
|
27
|
+
subjects: MetaValue[];
|
|
28
|
+
rights: MetaValue[];
|
|
29
|
+
descriptions: MetaValue[];
|
|
30
|
+
modified: string;
|
|
31
|
+
collections: Collection[];
|
|
32
|
+
coverHref: string;
|
|
33
|
+
}
|
|
34
|
+
interface CoverInfo {
|
|
35
|
+
href: string;
|
|
36
|
+
contentType: string;
|
|
37
|
+
}
|
|
38
|
+
interface Cover extends CoverInfo {
|
|
39
|
+
bytes: Uint8Array;
|
|
40
|
+
}
|
|
41
|
+
interface Identifier {
|
|
42
|
+
id: string;
|
|
43
|
+
scheme: string;
|
|
44
|
+
value: string;
|
|
45
|
+
type: string;
|
|
46
|
+
}
|
|
47
|
+
interface MetaValue {
|
|
48
|
+
id: string;
|
|
49
|
+
value: string;
|
|
50
|
+
language: string;
|
|
51
|
+
scheme: string;
|
|
52
|
+
fileAs: string;
|
|
53
|
+
role: string;
|
|
54
|
+
displaySeq: number;
|
|
55
|
+
refinements: Record<string, string>;
|
|
56
|
+
}
|
|
57
|
+
interface Collection {
|
|
58
|
+
id: string;
|
|
59
|
+
name: string;
|
|
60
|
+
type: string;
|
|
61
|
+
position: string;
|
|
62
|
+
}
|
|
63
|
+
interface ManifestItem {
|
|
64
|
+
id: string;
|
|
65
|
+
href: string;
|
|
66
|
+
mediaType: string;
|
|
67
|
+
properties: string[];
|
|
68
|
+
path: string;
|
|
69
|
+
}
|
|
70
|
+
interface SpineItem {
|
|
71
|
+
idRef: string;
|
|
72
|
+
href: string;
|
|
73
|
+
linear: boolean;
|
|
74
|
+
properties: string[];
|
|
75
|
+
}
|
|
76
|
+
interface TOCItem {
|
|
77
|
+
id: string;
|
|
78
|
+
label: string;
|
|
79
|
+
href: string;
|
|
80
|
+
children: TOCItem[];
|
|
81
|
+
target?: AnchorRef;
|
|
82
|
+
}
|
|
83
|
+
interface FlatTOCItem {
|
|
84
|
+
id: string;
|
|
85
|
+
label: string;
|
|
86
|
+
href: string;
|
|
87
|
+
depth: number;
|
|
88
|
+
parent: number;
|
|
89
|
+
target?: AnchorRef;
|
|
90
|
+
}
|
|
91
|
+
interface AnchorRef {
|
|
92
|
+
spineIndex: number;
|
|
93
|
+
blockIndex: number;
|
|
94
|
+
chunkId: string;
|
|
95
|
+
offset: number;
|
|
96
|
+
href: string;
|
|
97
|
+
}
|
|
98
|
+
interface Warning {
|
|
99
|
+
code: string;
|
|
100
|
+
message: string;
|
|
101
|
+
path: string;
|
|
102
|
+
}
|
|
103
|
+
type BlockKind = 'paragraph' | 'heading' | 'list_item' | 'blockquote' | 'pre' | 'hr' | 'table' | 'figure';
|
|
104
|
+
type InlineKind = 'text' | 'emphasis' | 'strong' | 'link' | 'image' | 'code';
|
|
105
|
+
interface Block {
|
|
106
|
+
kind: BlockKind;
|
|
107
|
+
level?: number;
|
|
108
|
+
ordered?: boolean;
|
|
109
|
+
listIndex?: number;
|
|
110
|
+
table?: Table;
|
|
111
|
+
figure?: Figure;
|
|
112
|
+
inlines?: Inline[];
|
|
113
|
+
anchors?: string[];
|
|
114
|
+
}
|
|
115
|
+
interface Inline {
|
|
116
|
+
kind: InlineKind;
|
|
117
|
+
text?: string;
|
|
118
|
+
href?: string;
|
|
119
|
+
src?: string;
|
|
120
|
+
alt?: string;
|
|
121
|
+
emph?: boolean;
|
|
122
|
+
strong?: boolean;
|
|
123
|
+
}
|
|
124
|
+
interface Table {
|
|
125
|
+
rows: TableRow[];
|
|
126
|
+
}
|
|
127
|
+
interface TableRow {
|
|
128
|
+
cells: TableCell[];
|
|
129
|
+
}
|
|
130
|
+
interface TableCell {
|
|
131
|
+
inlines: Inline[];
|
|
132
|
+
header?: boolean;
|
|
133
|
+
}
|
|
134
|
+
interface Figure {
|
|
135
|
+
images: Inline[];
|
|
136
|
+
caption: Inline[];
|
|
137
|
+
}
|
|
138
|
+
type ChunkingMode = 'paragraph' | 'size';
|
|
139
|
+
interface ChunkingOptions {
|
|
140
|
+
mode: ChunkingMode;
|
|
141
|
+
maxChars: number;
|
|
142
|
+
}
|
|
143
|
+
interface Chunk {
|
|
144
|
+
id: string;
|
|
145
|
+
text: string;
|
|
146
|
+
href: string;
|
|
147
|
+
spineIndex: number;
|
|
148
|
+
blockIndexFrom: number;
|
|
149
|
+
blockIndexTo: number;
|
|
150
|
+
startOffset: number;
|
|
151
|
+
endOffset: number;
|
|
152
|
+
anchors: string[];
|
|
153
|
+
blocks: Block[];
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
interface FallbackPolicy {
|
|
157
|
+
scanArchive: boolean;
|
|
158
|
+
inferSpine: boolean;
|
|
159
|
+
generateTOC: boolean;
|
|
160
|
+
generateMetadata: boolean;
|
|
161
|
+
}
|
|
162
|
+
interface Config {
|
|
163
|
+
chunking: ChunkingOptions;
|
|
164
|
+
rootfilePath?: string;
|
|
165
|
+
fallbacks: FallbackPolicy;
|
|
166
|
+
}
|
|
167
|
+
declare class Book {
|
|
168
|
+
metadata: Metadata;
|
|
169
|
+
toc: TOCItem[];
|
|
170
|
+
landmarks: TOCItem[];
|
|
171
|
+
pageList: TOCItem[];
|
|
172
|
+
manifest: Map<string, ManifestItem>;
|
|
173
|
+
spine: SpineItem[];
|
|
174
|
+
warnings: Warning[];
|
|
175
|
+
anchors: Map<string, AnchorRef>;
|
|
176
|
+
rootfilePath: string;
|
|
177
|
+
basePath: string;
|
|
178
|
+
private anchorsById;
|
|
179
|
+
private archive;
|
|
180
|
+
private cfg;
|
|
181
|
+
private chunksCache;
|
|
182
|
+
private blockCache;
|
|
183
|
+
constructor(archive: Archive, cfg: Config);
|
|
184
|
+
warn(code: string, message: string, path?: string): void;
|
|
185
|
+
private cleanHref;
|
|
186
|
+
openResource(href: string): Uint8Array | undefined;
|
|
187
|
+
documentFor(href: string): Document | undefined;
|
|
188
|
+
blocks(spineIndex: number): Block[];
|
|
189
|
+
blocksByHref(href: string): Block[];
|
|
190
|
+
cover(): Cover | undefined;
|
|
191
|
+
private manifestItemByPath;
|
|
192
|
+
chunks(opts?: ChunkingOptions): Chunk[];
|
|
193
|
+
resolveAnchor(href: string): AnchorRef | undefined;
|
|
194
|
+
/** Port of deriveTOCFromHeadings — build a TOC from heading blocks. */
|
|
195
|
+
deriveTocFromHeadings(): TOCItem[];
|
|
196
|
+
/** Port of fillMissingMetadata. */
|
|
197
|
+
fillMissingMetadata(): void;
|
|
198
|
+
}
|
|
199
|
+
/** Port of parse.go's openWithConfig — parse an EPUB from bytes into a Book. */
|
|
200
|
+
declare function parse(bytes: Uint8Array, opts?: Partial<Config>): Book;
|
|
201
|
+
|
|
202
|
+
/** Port of FlattenTOC. */
|
|
203
|
+
declare function flattenToc(items: TOCItem[]): FlatTOCItem[];
|
|
204
|
+
|
|
205
|
+
interface TextOptions {
|
|
206
|
+
preserveLineBreaks: boolean;
|
|
207
|
+
includeHeadings: boolean;
|
|
208
|
+
headingMarkers: boolean;
|
|
209
|
+
includeListMarkers: boolean;
|
|
210
|
+
includeHorizontalRules: boolean;
|
|
211
|
+
}
|
|
212
|
+
/** Port of blockToText (Spine's internal text extraction). */
|
|
213
|
+
declare function blockToText(block: Block, opts?: TextOptions): string;
|
|
214
|
+
|
|
215
|
+
/** Go path.Clean — lexical cleanup of a slash path. */
|
|
216
|
+
declare function posixClean(p: string): string;
|
|
217
|
+
/** Go path.Join — join non-empty parts then Clean. */
|
|
218
|
+
declare function posixJoin(...parts: string[]): string;
|
|
219
|
+
/** Go path.Dir. */
|
|
220
|
+
declare function posixDir(p: string): string;
|
|
221
|
+
/** Port of resolvePath(base, href) — resolves an href against a base dir, keeping any #fragment. */
|
|
222
|
+
declare function resolvePath(base: string, href: string): string;
|
|
223
|
+
declare function contentTypeFromHref(href: string): string;
|
|
224
|
+
|
|
225
|
+
export { type AnchorRef, type Block, type BlockKind, Book, type Chunk, type ChunkingMode, type ChunkingOptions, type Collection, type Config, type Cover, type CoverInfo, type FallbackPolicy, type Figure, type FlatTOCItem, type Identifier, type Inline, type InlineKind, type ManifestItem, type MetaValue, type Metadata, type SpineItem, type TOCItem, type Table, type TableCell, type TableRow, type Warning, blockToText, contentTypeFromHref, flattenToc, parse, posixClean, posixDir, posixJoin, resolvePath };
|