@strav/pdf 0.4.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +79 -0
  2. package/package.json +51 -0
  3. package/src/color/cie.ts +61 -0
  4. package/src/color/color.ts +77 -0
  5. package/src/color/conversion.ts +26 -0
  6. package/src/color/device.ts +37 -0
  7. package/src/color/devicen.ts +74 -0
  8. package/src/color/icc.ts +103 -0
  9. package/src/color/index.ts +15 -0
  10. package/src/color/separation.ts +94 -0
  11. package/src/color/space.ts +47 -0
  12. package/src/content/content_stream.ts +373 -0
  13. package/src/content/graphics_state.ts +64 -0
  14. package/src/content/index.ts +16 -0
  15. package/src/content/operators.ts +70 -0
  16. package/src/content/path.ts +51 -0
  17. package/src/content/resources.ts +119 -0
  18. package/src/content/text_object.ts +140 -0
  19. package/src/document/catalog.ts +16 -0
  20. package/src/document/index.ts +13 -0
  21. package/src/document/object_table.ts +67 -0
  22. package/src/document/page.ts +74 -0
  23. package/src/document/page_tree.ts +78 -0
  24. package/src/document/pdf_document.ts +310 -0
  25. package/src/document/types.ts +65 -0
  26. package/src/document/xref.ts +68 -0
  27. package/src/ext-gstate/ext_gstate.ts +69 -0
  28. package/src/ext-gstate/index.ts +2 -0
  29. package/src/fonts/cff.ts +123 -0
  30. package/src/fonts/cid_encoding.ts +45 -0
  31. package/src/fonts/cmap_table.ts +180 -0
  32. package/src/fonts/font.ts +342 -0
  33. package/src/fonts/glyf.ts +59 -0
  34. package/src/fonts/hmtx.ts +21 -0
  35. package/src/fonts/index.ts +20 -0
  36. package/src/fonts/name_table.ts +50 -0
  37. package/src/fonts/os2.ts +41 -0
  38. package/src/fonts/sfnt.ts +224 -0
  39. package/src/fonts/standard_14.ts +132 -0
  40. package/src/fonts/subset.ts +221 -0
  41. package/src/fonts/to_unicode.ts +82 -0
  42. package/src/fonts/win_ansi.ts +69 -0
  43. package/src/images/image.ts +111 -0
  44. package/src/images/index.ts +6 -0
  45. package/src/images/jpeg.ts +103 -0
  46. package/src/images/png.ts +239 -0
  47. package/src/images/smask.ts +24 -0
  48. package/src/index.ts +57 -0
  49. package/src/metadata/index.ts +3 -0
  50. package/src/metadata/info_dict.ts +28 -0
  51. package/src/metadata/xmp.ts +110 -0
  52. package/src/objects/encode.ts +77 -0
  53. package/src/objects/index.ts +43 -0
  54. package/src/objects/indirect_ref.ts +17 -0
  55. package/src/objects/name.ts +50 -0
  56. package/src/objects/number.ts +43 -0
  57. package/src/objects/string.ts +136 -0
  58. package/src/objects/types.ts +86 -0
  59. package/src/output/buffer_sink.ts +40 -0
  60. package/src/output/byte_sink.ts +12 -0
  61. package/src/output/index.ts +3 -0
  62. package/src/output/stream_sink.ts +62 -0
  63. package/src/patterns/index.ts +10 -0
  64. package/src/patterns/shading.ts +162 -0
  65. package/src/patterns/tiling_pattern.ts +68 -0
  66. package/src/standards/context.ts +10 -0
  67. package/src/standards/index.ts +23 -0
  68. package/src/standards/pdf_a.ts +23 -0
  69. package/src/standards/pdf_x.ts +31 -0
  70. package/src/streams/ascii85.ts +61 -0
  71. package/src/streams/ascii_hex.ts +33 -0
  72. package/src/streams/flate.ts +17 -0
  73. package/src/streams/index.ts +9 -0
  74. package/src/streams/stream.ts +66 -0
  75. package/src/util/ascii.ts +63 -0
  76. package/src/util/binary.ts +71 -0
  77. package/src/util/errors.ts +61 -0
  78. package/src/util/index.ts +10 -0
  79. package/src/util/units.ts +24 -0
  80. package/tsconfig.json +5 -0
@@ -0,0 +1,224 @@
1
+ /**
2
+ * SFNT container parsing (spec §10.2). Reads the table directory and the
3
+ * `head` / `hhea` / `maxp` tables. TrueType (`glyf`) is the milestone-5 path;
4
+ * OpenType/CFF (`OTTO`) is rejected here and arrives in milestone 7.
5
+ *
6
+ * Supports a single-face `.ttf`/`.otf` and a `.ttc` collection (face picked by
7
+ * index). For a `.ttc` face the referenced tables are re-serialized into a
8
+ * standalone SFNT so it can be embedded as a `FontFile2`.
9
+ */
10
+
11
+ import { BinaryReader } from '../util/binary.ts'
12
+ import { UnsupportedFontError, PdfGenError } from '../util/errors.ts'
13
+
14
+ const TTF_TRUE = 0x00010000
15
+ const TTF_TAG_true = 0x74727565 // 'true'
16
+ const TTF_TAG_ttcf = 0x74746366 // 'ttcf'
17
+ const TTF_TAG_OTTO = 0x4f54544f // 'OTTO'
18
+
19
+ export interface TableRecord {
20
+ tag: string
21
+ offset: number
22
+ length: number
23
+ }
24
+
25
+ export interface HeadTable {
26
+ unitsPerEm: number
27
+ /** 0 = short loca (offsets ×2), 1 = long loca. */
28
+ indexToLocFormat: 0 | 1
29
+ xMin: number
30
+ yMin: number
31
+ xMax: number
32
+ yMax: number
33
+ macStyle: number
34
+ }
35
+
36
+ export interface HheaTable {
37
+ ascent: number
38
+ descent: number
39
+ lineGap: number
40
+ numberOfHMetrics: number
41
+ }
42
+
43
+ function tagToStr(n: number): string {
44
+ return String.fromCharCode((n >>> 24) & 0xff, (n >>> 16) & 0xff, (n >>> 8) & 0xff, n & 0xff)
45
+ }
46
+
47
+ export class SfntFont {
48
+ readonly tables = new Map<string, TableRecord>()
49
+ readonly head: HeadTable
50
+ readonly hhea: HheaTable
51
+ readonly numGlyphs: number
52
+
53
+ /** The standalone SFNT bytes to embed (verbatim, or rebuilt for a TTC). */
54
+ readonly programBytes: Uint8Array
55
+
56
+ constructor(bytes: Uint8Array, faceIndex = 0) {
57
+ const r = new BinaryReader(bytes)
58
+ const sfntVersion = r.u32()
59
+
60
+ if (sfntVersion === TTF_TAG_ttcf) {
61
+ r.u32() // major/minor version
62
+ const numFonts = r.u32()
63
+ if (faceIndex < 0 || faceIndex >= numFonts) {
64
+ throw new PdfGenError(
65
+ 'PDF_UNSUPPORTED_FONT',
66
+ `TTC face index ${faceIndex} out of range (0..${numFonts - 1})`
67
+ )
68
+ }
69
+ const offsets: number[] = []
70
+ for (let i = 0; i < numFonts; i++) offsets.push(r.u32())
71
+ this.readDirectory(bytes, offsets[faceIndex]!)
72
+ this.programBytes = this.rebuildSfnt(bytes)
73
+ } else if (
74
+ sfntVersion === TTF_TRUE ||
75
+ sfntVersion === TTF_TAG_true ||
76
+ sfntVersion === TTF_TAG_OTTO // OpenType/CFF — same SFNT container
77
+ ) {
78
+ this.readDirectory(bytes, 0)
79
+ this.programBytes = bytes // single face — embed verbatim
80
+ } else {
81
+ throw new UnsupportedFontError(
82
+ `Unrecognized font format (sfnt version 0x${sfntVersion.toString(16)})`
83
+ )
84
+ }
85
+
86
+ this.head = this.parseHead()
87
+ this.hhea = this.parseHhea()
88
+ this.numGlyphs = this.parseMaxpNumGlyphs()
89
+ }
90
+
91
+ /** True for an OpenType/CFF font (`CFF ` outlines, no `glyf`/`loca`). */
92
+ get isCFF(): boolean {
93
+ return this.tables.has('CFF ')
94
+ }
95
+
96
+ private readDirectory(bytes: Uint8Array, dirOffset: number): void {
97
+ const r = new BinaryReader(bytes, dirOffset)
98
+ r.u32() // sfnt version (already validated for the chosen face)
99
+ const numTables = r.u16()
100
+ r.u16() // searchRange
101
+ r.u16() // entrySelector
102
+ r.u16() // rangeShift
103
+ for (let i = 0; i < numTables; i++) {
104
+ const tag = tagToStr(r.u32())
105
+ r.u32() // checksum
106
+ const offset = r.u32()
107
+ const length = r.u32()
108
+ this.tables.set(tag, { tag, offset, length })
109
+ }
110
+ }
111
+
112
+ private require(tag: string): TableRecord {
113
+ const t = this.tables.get(tag)
114
+ if (!t) {
115
+ throw new UnsupportedFontError(`Font is missing the required '${tag}' table`)
116
+ }
117
+ return t
118
+ }
119
+
120
+ /** Raw bytes of a table (subarray, no copy), or undefined if absent. */
121
+ table(tag: string): Uint8Array | undefined {
122
+ const t = this.tables.get(tag)
123
+ return t ? this.bytes.subarray(t.offset, t.offset + t.length) : undefined
124
+ }
125
+
126
+ private get bytes(): Uint8Array {
127
+ return this.programBytes
128
+ }
129
+
130
+ private parseHead(): HeadTable {
131
+ const t = this.require('head')
132
+ const r = new BinaryReader(this.bytes, t.offset)
133
+ r.seek(t.offset + 18)
134
+ const unitsPerEm = r.u16()
135
+ r.seek(t.offset + 36)
136
+ const xMin = r.i16()
137
+ const yMin = r.i16()
138
+ const xMax = r.i16()
139
+ const yMax = r.i16()
140
+ const macStyle = r.u16()
141
+ r.seek(t.offset + 50)
142
+ const indexToLocFormat = r.i16() as 0 | 1
143
+ return { unitsPerEm, indexToLocFormat, xMin, yMin, xMax, yMax, macStyle }
144
+ }
145
+
146
+ private parseHhea(): HheaTable {
147
+ const t = this.require('hhea')
148
+ const r = new BinaryReader(this.bytes, t.offset)
149
+ r.seek(t.offset + 4)
150
+ const ascent = r.i16()
151
+ const descent = r.i16()
152
+ const lineGap = r.i16()
153
+ r.seek(t.offset + 34)
154
+ const numberOfHMetrics = r.u16()
155
+ return { ascent, descent, lineGap, numberOfHMetrics }
156
+ }
157
+
158
+ private parseMaxpNumGlyphs(): number {
159
+ const t = this.require('maxp')
160
+ const r = new BinaryReader(this.bytes, t.offset + 4)
161
+ return r.u16()
162
+ }
163
+
164
+ /** Rebuild a standalone, 4-byte-aligned SFNT from this face's tables. */
165
+ private rebuildSfnt(src: Uint8Array): Uint8Array {
166
+ const entries = [...this.tables.values()].sort((a, b) => (a.tag < b.tag ? -1 : 1))
167
+ const numTables = entries.length
168
+ const headerLen = 12 + numTables * 16
169
+
170
+ const blocks: { rec: TableRecord; data: Uint8Array; offset: number }[] = []
171
+ let cursor = headerLen
172
+ for (const rec of entries) {
173
+ const data = src.subarray(rec.offset, rec.offset + rec.length)
174
+ blocks.push({ rec, data, offset: cursor })
175
+ cursor += (rec.length + 3) & ~3 // pad each table to 4 bytes
176
+ }
177
+
178
+ const out = new Uint8Array(cursor)
179
+ const dv = new DataView(out.buffer)
180
+ dv.setUint32(0, TTF_TRUE)
181
+ dv.setUint16(4, numTables)
182
+ const maxPow = Math.floor(Math.log2(numTables))
183
+ const searchRange = 16 * 2 ** maxPow
184
+ dv.setUint16(6, searchRange)
185
+ dv.setUint16(8, maxPow)
186
+ dv.setUint16(10, numTables * 16 - searchRange)
187
+
188
+ let p = 12
189
+ for (const b of blocks) {
190
+ for (let i = 0; i < 4; i++) out[p + i] = b.rec.tag.charCodeAt(i)
191
+ dv.setUint32(p + 4, tableChecksum(b.data))
192
+ dv.setUint32(p + 8, b.offset)
193
+ dv.setUint32(p + 12, b.rec.length)
194
+ out.set(b.data, b.offset)
195
+ p += 16
196
+ }
197
+
198
+ // Patch head.checkSumAdjustment, then rewrite this.tables to the new layout.
199
+ const head = blocks.find(b => b.rec.tag === 'head')
200
+ if (head) {
201
+ const adj = (0xb1b0afba - tableChecksum(out)) >>> 0
202
+ new DataView(out.buffer).setUint32(head.offset + 8, adj)
203
+ }
204
+ this.tables.clear()
205
+ for (const b of blocks) {
206
+ this.tables.set(b.rec.tag, { tag: b.rec.tag, offset: b.offset, length: b.rec.length })
207
+ }
208
+ return out
209
+ }
210
+ }
211
+
212
+ /** SFNT table checksum: sum of big-endian uint32s, zero-padded (ISO/OpenType). */
213
+ export function tableChecksum(data: Uint8Array): number {
214
+ let sum = 0
215
+ const n = (data.length + 3) & ~3
216
+ for (let i = 0; i < n; i += 4) {
217
+ const b0 = data[i] ?? 0
218
+ const b1 = data[i + 1] ?? 0
219
+ const b2 = data[i + 2] ?? 0
220
+ const b3 = data[i + 3] ?? 0
221
+ sum = (sum + (((b0 << 24) | (b1 << 16) | (b2 << 8) | b3) >>> 0)) >>> 0
222
+ }
223
+ return sum >>> 0
224
+ }
@@ -0,0 +1,132 @@
1
+ /**
2
+ * The Standard-14 fonts (spec §10.1). Referenced only — never embedded; the
3
+ * viewer is assumed to have them. Under a conformance mode they are rejected
4
+ * (enforced in `PdfDocument`); embedded fonts arrive in milestone 5.
5
+ *
6
+ * Width tables are the canonical Adobe Core-14 AFM metrics (units per 1000 em)
7
+ * for the printable ASCII range (codes 32–126). We do **not** emit a `/Widths`
8
+ * array for Standard-14 fonts (the viewer supplies metrics), so these are used
9
+ * only by the optional `PdfFont.widthOfText()` helper. Metrics for the
10
+ * non-ASCII WinAnsi range are approximated by a per-font default and refined
11
+ * in a later milestone — rendering is unaffected.
12
+ */
13
+
14
+ export type StandardFontName =
15
+ | 'Helvetica'
16
+ | 'Helvetica-Bold'
17
+ | 'Helvetica-Oblique'
18
+ | 'Helvetica-BoldOblique'
19
+ | 'Times-Roman'
20
+ | 'Times-Bold'
21
+ | 'Times-Italic'
22
+ | 'Times-BoldItalic'
23
+ | 'Courier'
24
+ | 'Courier-Bold'
25
+ | 'Courier-Oblique'
26
+ | 'Courier-BoldOblique'
27
+ | 'Symbol'
28
+ | 'ZapfDingbats'
29
+
30
+ // Widths for codes 32..126 (index = code - 32).
31
+ const HELVETICA = [
32
+ 278, 278, 355, 556, 556, 889, 667, 191, 333, 333, 389, 584, 278, 333, 278,
33
+ 278, 556, 556, 556, 556, 556, 556, 556, 556, 556, 556, 278, 278, 584, 584,
34
+ 584, 556, 1015, 667, 667, 722, 722, 667, 611, 778, 722, 278, 500, 667, 556,
35
+ 833, 722, 778, 667, 778, 722, 667, 611, 722, 667, 944, 667, 667, 611, 278,
36
+ 278, 278, 469, 556, 333, 556, 556, 500, 556, 556, 278, 556, 556, 222, 222,
37
+ 500, 222, 833, 556, 556, 556, 556, 333, 500, 278, 556, 500, 722, 500, 500,
38
+ 500, 334, 260, 334, 584,
39
+ ]
40
+
41
+ const HELVETICA_BOLD = [
42
+ 278, 333, 474, 556, 556, 889, 722, 238, 333, 333, 389, 584, 278, 333, 278,
43
+ 278, 556, 556, 556, 556, 556, 556, 556, 556, 556, 556, 333, 333, 584, 584,
44
+ 584, 611, 975, 722, 722, 722, 722, 667, 611, 778, 722, 278, 556, 722, 611,
45
+ 833, 722, 778, 667, 778, 722, 667, 611, 722, 667, 944, 667, 667, 611, 333,
46
+ 278, 333, 584, 556, 333, 556, 611, 556, 611, 556, 333, 611, 611, 278, 278,
47
+ 556, 278, 889, 611, 611, 611, 611, 389, 556, 333, 611, 556, 778, 556, 556,
48
+ 500, 389, 280, 389, 584,
49
+ ]
50
+
51
+ const TIMES_ROMAN = [
52
+ 250, 333, 408, 500, 500, 833, 778, 180, 333, 333, 500, 564, 250, 333, 250,
53
+ 278, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 278, 278, 564, 564,
54
+ 564, 444, 921, 722, 667, 667, 722, 611, 556, 722, 722, 333, 389, 722, 611,
55
+ 889, 722, 722, 556, 722, 667, 556, 611, 722, 722, 944, 722, 722, 611, 333,
56
+ 278, 333, 469, 500, 333, 444, 500, 444, 500, 444, 333, 500, 500, 278, 278,
57
+ 500, 278, 778, 500, 500, 500, 500, 333, 389, 278, 500, 500, 722, 500, 500,
58
+ 444, 480, 200, 480, 541,
59
+ ]
60
+
61
+ const TIMES_BOLD = [
62
+ 250, 333, 555, 500, 500, 1000, 833, 278, 333, 333, 500, 570, 250, 333, 250,
63
+ 278, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 333, 333, 570, 570,
64
+ 570, 500, 930, 722, 667, 722, 722, 667, 611, 778, 778, 389, 500, 778, 667,
65
+ 944, 722, 778, 611, 778, 722, 556, 667, 722, 722, 1000, 722, 722, 667, 333,
66
+ 278, 333, 581, 500, 333, 500, 556, 444, 556, 444, 333, 500, 556, 278, 333,
67
+ 556, 278, 833, 556, 500, 556, 556, 444, 389, 333, 556, 500, 722, 500, 500,
68
+ 444, 394, 220, 394, 520,
69
+ ]
70
+
71
+ const TIMES_ITALIC = [
72
+ 250, 333, 420, 500, 500, 833, 778, 214, 333, 333, 500, 675, 250, 333, 250,
73
+ 278, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 333, 333, 675, 675,
74
+ 675, 500, 920, 611, 611, 667, 722, 611, 611, 722, 722, 333, 444, 667, 556,
75
+ 833, 667, 722, 611, 722, 611, 500, 556, 722, 611, 833, 611, 556, 556, 389,
76
+ 278, 389, 422, 500, 333, 500, 500, 444, 500, 444, 278, 500, 500, 278, 278,
77
+ 444, 278, 722, 500, 500, 500, 500, 389, 389, 278, 500, 444, 667, 444, 444,
78
+ 389, 400, 275, 400, 541,
79
+ ]
80
+
81
+ const TIMES_BOLD_ITALIC = [
82
+ 250, 389, 555, 500, 500, 833, 778, 278, 333, 333, 500, 570, 250, 333, 250,
83
+ 278, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 333, 333, 570, 570,
84
+ 570, 500, 832, 667, 667, 667, 722, 667, 667, 722, 778, 389, 500, 667, 611,
85
+ 889, 722, 722, 611, 722, 667, 556, 611, 722, 667, 889, 667, 611, 611, 333,
86
+ 278, 333, 570, 500, 333, 500, 500, 444, 500, 444, 333, 500, 556, 278, 278,
87
+ 500, 278, 778, 556, 500, 500, 500, 389, 389, 278, 556, 444, 667, 500, 444,
88
+ 389, 348, 220, 348, 570,
89
+ ]
90
+
91
+ const COURIER_WIDTH = 600 // monospaced — every glyph
92
+
93
+ interface FontMeta {
94
+ /** ASCII 32..126 width table, or null for monospaced/symbolic. */
95
+ ascii: number[] | null
96
+ /** Width for codes outside 32..126 (and the symbolic fonts). */
97
+ fallback: number
98
+ /** WinAnsiEncoding applies (text fonts); Symbol/ZapfDingbats use built-in. */
99
+ winAnsi: boolean
100
+ }
101
+
102
+ const META: Record<StandardFontName, FontMeta> = {
103
+ Helvetica: { ascii: HELVETICA, fallback: 556, winAnsi: true },
104
+ 'Helvetica-Oblique': { ascii: HELVETICA, fallback: 556, winAnsi: true },
105
+ 'Helvetica-Bold': { ascii: HELVETICA_BOLD, fallback: 611, winAnsi: true },
106
+ 'Helvetica-BoldOblique': { ascii: HELVETICA_BOLD, fallback: 611, winAnsi: true },
107
+ 'Times-Roman': { ascii: TIMES_ROMAN, fallback: 500, winAnsi: true },
108
+ 'Times-Bold': { ascii: TIMES_BOLD, fallback: 500, winAnsi: true },
109
+ 'Times-Italic': { ascii: TIMES_ITALIC, fallback: 500, winAnsi: true },
110
+ 'Times-BoldItalic': { ascii: TIMES_BOLD_ITALIC, fallback: 500, winAnsi: true },
111
+ Courier: { ascii: null, fallback: COURIER_WIDTH, winAnsi: true },
112
+ 'Courier-Bold': { ascii: null, fallback: COURIER_WIDTH, winAnsi: true },
113
+ 'Courier-Oblique': { ascii: null, fallback: COURIER_WIDTH, winAnsi: true },
114
+ 'Courier-BoldOblique': { ascii: null, fallback: COURIER_WIDTH, winAnsi: true },
115
+ Symbol: { ascii: null, fallback: 600, winAnsi: false },
116
+ ZapfDingbats: { ascii: null, fallback: 600, winAnsi: false },
117
+ }
118
+
119
+ export function isStandardFontName(name: string): name is StandardFontName {
120
+ return name in META
121
+ }
122
+
123
+ export function usesWinAnsi(name: StandardFontName): boolean {
124
+ return META[name].winAnsi
125
+ }
126
+
127
+ /** Glyph width in 1000-em units for a WinAnsi byte (0–255). */
128
+ export function standardGlyphWidth(name: StandardFontName, byte: number): number {
129
+ const m = META[name]
130
+ if (m.ascii && byte >= 32 && byte <= 126) return m.ascii[byte - 32]!
131
+ return m.fallback
132
+ }
@@ -0,0 +1,221 @@
1
+ /**
2
+ * TrueType glyph subsetting (spec §10.3).
3
+ *
4
+ * Strategy (per spec): keep the original glyph indices — unused glyphs become
5
+ * zero-length, trailing unused glyphs are dropped (numGlyphs = maxUsedGid+1).
6
+ * Because indices are preserved, the content stream's Identity-H codes, the
7
+ * CIDFont `/W` array, `ToUnicode` and `CIDToGIDMap /Identity` need no rewrite.
8
+ *
9
+ * Tables rewritten: `glyf`, `loca` (long), `hmtx`, `maxp.numGlyphs`,
10
+ * `hhea.numberOfHMetrics`, `head` (indexToLocFormat + checkSumAdjustment).
11
+ * Every other table is passed through unchanged.
12
+ *
13
+ * The 6-letter subset tag is derived from the subset content (sorted glyph
14
+ * set + source numGlyphs), so identical input → identical font (determinism).
15
+ */
16
+
17
+ import { tableChecksum, type SfntFont } from './sfnt.ts'
18
+ import { GlyfTable } from './glyf.ts'
19
+
20
+ export interface SubsetResult {
21
+ bytes: Uint8Array
22
+ /** Six uppercase letters, e.g. `ABCDEF` (caller forms `ABCDEF+Name`). */
23
+ tag: string
24
+ }
25
+
26
+ function pad4(n: number): number {
27
+ return (n + 3) & ~3
28
+ }
29
+
30
+ /** FNV-1a over the subset's identifying content → 6 uppercase A–Z letters. */
31
+ function subsetTag(sortedGids: number[], srcNumGlyphs: number): string {
32
+ let h = 0x811c9dc5
33
+ const mix = (x: number) => {
34
+ h ^= x & 0xff
35
+ h = Math.imul(h, 0x01000193) >>> 0
36
+ }
37
+ mix(srcNumGlyphs & 0xff)
38
+ mix((srcNumGlyphs >> 8) & 0xff)
39
+ for (const g of sortedGids) {
40
+ mix(g & 0xff)
41
+ mix((g >> 8) & 0xff)
42
+ }
43
+ let tag = ''
44
+ let v = h >>> 0
45
+ for (let i = 0; i < 6; i++) {
46
+ tag += String.fromCharCode(65 + (v % 26))
47
+ v = Math.floor(v / 26) + 0x9e3779b1 // re-stir so all 6 letters vary
48
+ v >>>= 0
49
+ }
50
+ return tag
51
+ }
52
+
53
+ /** Read (advance, lsb) for every glyph 0..n-1 from the original hmtx. */
54
+ function readMetrics(
55
+ hmtx: Uint8Array,
56
+ numberOfHMetrics: number,
57
+ count: number
58
+ ): { adv: number; lsb: number }[] {
59
+ const out: { adv: number; lsb: number }[] = []
60
+ let lastAdv = 0
61
+ for (let g = 0; g < count; g++) {
62
+ if (g < numberOfHMetrics) {
63
+ const o = g * 4
64
+ lastAdv = (hmtx[o]! << 8) | hmtx[o + 1]!
65
+ const lsbRaw = (hmtx[o + 2]! << 8) | hmtx[o + 3]!
66
+ out.push({ adv: lastAdv, lsb: (lsbRaw << 16) >> 16 })
67
+ } else {
68
+ const o = numberOfHMetrics * 4 + (g - numberOfHMetrics) * 2
69
+ const lsbRaw = ((hmtx[o] ?? 0) << 8) | (hmtx[o + 1] ?? 0)
70
+ out.push({ adv: lastAdv, lsb: (lsbRaw << 16) >> 16 })
71
+ }
72
+ }
73
+ return out
74
+ }
75
+
76
+ /** Assemble a standalone, 4-byte-aligned SFNT and patch head.checkSumAdjustment. */
77
+ function assembleSfnt(tables: Map<string, Uint8Array>): Uint8Array {
78
+ const tags = [...tables.keys()].sort()
79
+ const numTables = tags.length
80
+ const headerLen = 12 + numTables * 16
81
+
82
+ const placed: { tag: string; data: Uint8Array; offset: number; length: number }[] = []
83
+ let cursor = headerLen
84
+ for (const tag of tags) {
85
+ const data = tables.get(tag)!
86
+ placed.push({ tag, data, offset: cursor, length: data.length })
87
+ cursor += pad4(data.length)
88
+ }
89
+
90
+ const out = new Uint8Array(cursor)
91
+ const dv = new DataView(out.buffer)
92
+ dv.setUint32(0, 0x00010000)
93
+ dv.setUint16(4, numTables)
94
+ const maxPow = Math.floor(Math.log2(numTables))
95
+ const searchRange = 16 * 2 ** maxPow
96
+ dv.setUint16(6, searchRange)
97
+ dv.setUint16(8, maxPow)
98
+ dv.setUint16(10, numTables * 16 - searchRange)
99
+
100
+ let p = 12
101
+ for (const b of placed) {
102
+ for (let i = 0; i < 4; i++) out[p + i] = b.tag.charCodeAt(i)
103
+ dv.setUint32(p + 4, tableChecksum(b.data))
104
+ dv.setUint32(p + 8, b.offset)
105
+ dv.setUint32(p + 12, b.length)
106
+ out.set(b.data, b.offset)
107
+ p += 16
108
+ }
109
+
110
+ const head = placed.find(b => b.tag === 'head')
111
+ if (head) {
112
+ dv.setUint32(head.offset + 8, 0) // zero before computing
113
+ const adj = (0xb1b0afba - tableChecksum(out)) >>> 0
114
+ dv.setUint32(head.offset + 8, adj)
115
+ }
116
+ return out
117
+ }
118
+
119
+ export function subsetTrueType(
120
+ sfnt: SfntFont,
121
+ used: Iterable<number>
122
+ ): SubsetResult {
123
+ const loca = sfnt.table('loca')
124
+ const glyf = sfnt.table('glyf')
125
+ if (!loca || !glyf) {
126
+ throw new Error('subsetTrueType requires a glyf-based TrueType font')
127
+ }
128
+ const srcN = sfnt.numGlyphs
129
+ const glyfTable = new GlyfTable(loca, glyf, srcN, sfnt.head.indexToLocFormat === 1)
130
+
131
+ // Used set + transitive composite-component closure; .notdef always in.
132
+ const keep = new Set<number>([0])
133
+ const stack: number[] = [0]
134
+ for (const g of used) {
135
+ if (g >= 0 && g < srcN && !keep.has(g)) {
136
+ keep.add(g)
137
+ stack.push(g)
138
+ }
139
+ }
140
+ while (stack.length) {
141
+ const g = stack.pop()!
142
+ for (const comp of glyfTable.componentGids(g)) {
143
+ if (comp >= 0 && comp < srcN && !keep.has(comp)) {
144
+ keep.add(comp)
145
+ stack.push(comp)
146
+ }
147
+ }
148
+ }
149
+
150
+ const sortedGids = [...keep].sort((a, b) => a - b)
151
+ const maxGid = sortedGids[sortedGids.length - 1]!
152
+ const newN = maxGid + 1
153
+
154
+ // Rebuild glyf + long loca, keeping original indices (empty if unused).
155
+ const glyphs: Uint8Array[] = []
156
+ const locaOffsets: number[] = []
157
+ let off = 0
158
+ for (let gid = 0; gid < newN; gid++) {
159
+ locaOffsets.push(off)
160
+ if (keep.has(gid)) {
161
+ const data = glyfTable.glyphData(gid)
162
+ const padded = pad4(data.length)
163
+ const buf = new Uint8Array(padded)
164
+ buf.set(data)
165
+ glyphs.push(buf)
166
+ off += padded
167
+ }
168
+ }
169
+ locaOffsets.push(off)
170
+
171
+ const newGlyf = new Uint8Array(off)
172
+ {
173
+ let o = 0
174
+ for (const g of glyphs) {
175
+ newGlyf.set(g, o)
176
+ o += g.length
177
+ }
178
+ }
179
+ const newLoca = new Uint8Array((newN + 1) * 4)
180
+ {
181
+ const dv = new DataView(newLoca.buffer)
182
+ for (let i = 0; i <= newN; i++) dv.setUint32(i * 4, locaOffsets[i]!)
183
+ }
184
+
185
+ // Fresh hmtx: full (advance, lsb) pair for every kept glyph.
186
+ const hmtxTable = sfnt.table('hmtx')!
187
+ const metrics = readMetrics(hmtxTable, sfnt.hhea.numberOfHMetrics, newN)
188
+ const newHmtx = new Uint8Array(newN * 4)
189
+ {
190
+ const dv = new DataView(newHmtx.buffer)
191
+ for (let g = 0; g < newN; g++) {
192
+ dv.setUint16(g * 4, metrics[g]!.adv)
193
+ dv.setInt16(g * 4 + 2, metrics[g]!.lsb)
194
+ }
195
+ }
196
+
197
+ // Patched head / hhea / maxp (copies of the originals).
198
+ const head = Uint8Array.from(sfnt.table('head')!)
199
+ new DataView(head.buffer, head.byteOffset).setUint32(8, 0) // checkSumAdjustment
200
+ new DataView(head.buffer, head.byteOffset).setInt16(50, 1) // indexToLocFormat=long
201
+
202
+ const hhea = Uint8Array.from(sfnt.table('hhea')!)
203
+ new DataView(hhea.buffer, hhea.byteOffset).setUint16(34, newN) // numberOfHMetrics
204
+
205
+ const maxp = Uint8Array.from(sfnt.table('maxp')!)
206
+ new DataView(maxp.buffer, maxp.byteOffset).setUint16(4, newN) // numGlyphs
207
+
208
+ const out = new Map<string, Uint8Array>()
209
+ for (const tag of sfnt.tables.keys()) {
210
+ const data = sfnt.table(tag)
211
+ if (data) out.set(tag, data)
212
+ }
213
+ out.set('glyf', newGlyf)
214
+ out.set('loca', newLoca)
215
+ out.set('hmtx', newHmtx)
216
+ out.set('head', head)
217
+ out.set('hhea', hhea)
218
+ out.set('maxp', maxp)
219
+
220
+ return { bytes: assembleSfnt(out), tag: subsetTag(sortedGids, srcN) }
221
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * ToUnicode CMap writer (spec §10.6). Maps each used CID/GID back to its
3
+ * Unicode code point(s) so text is searchable and copy/pasteable (mandatory
4
+ * for PDF/A). `bfrange` is used for consecutive runs, `bfchar` otherwise.
5
+ */
6
+
7
+ function hex4(n: number): string {
8
+ return n.toString(16).toUpperCase().padStart(4, '0')
9
+ }
10
+
11
+ /** UTF-16BE hex for one code point (surrogate pair when astral). */
12
+ function utf16beHex(cp: number): string {
13
+ if (cp <= 0xffff) return hex4(cp)
14
+ const v = cp - 0x10000
15
+ return hex4(0xd800 + (v >> 10)) + hex4(0xdc00 + (v & 0x3ff))
16
+ }
17
+
18
+ const HEADER =
19
+ '/CIDInit /ProcSet findresource begin\n' +
20
+ '12 dict begin\n' +
21
+ 'begincmap\n' +
22
+ '/CIDSystemInfo << /Registry (Adobe) /Ordering (UCS) /Supplement 0 >> def\n' +
23
+ '/CMapName /Adobe-Identity-UCS def\n' +
24
+ '/CMapType 2 def\n' +
25
+ '1 begincodespacerange\n<0000> <FFFF>\nendcodespacerange\n'
26
+
27
+ const FOOTER = 'endcmap\nCMapName currentdict /CMap defineresource pop\nend\nend\n'
28
+
29
+ /**
30
+ * Build the ToUnicode CMap stream content from a `gid → code point(s)` map.
31
+ * Single-code-point entries that form consecutive GID→cp runs collapse into
32
+ * `bfrange`; everything else (and multi-cp entries) uses `bfchar`.
33
+ */
34
+ export function buildToUnicode(map: Map<number, number[]>): string {
35
+ const gids = [...map.keys()].sort((a, b) => a - b)
36
+
37
+ const ranges: { start: number; end: number; cp: number }[] = []
38
+ const chars: { gid: number; cps: number[] }[] = []
39
+
40
+ for (let i = 0; i < gids.length; ) {
41
+ const gid = gids[i]!
42
+ const cps = map.get(gid)!
43
+ if (cps.length === 1) {
44
+ let j = i
45
+ for (;;) {
46
+ const nextGid = gids[j + 1]
47
+ if (nextGid === undefined || nextGid !== gids[j]! + 1) break
48
+ const cur = map.get(gids[j]!)!
49
+ const nxt = map.get(nextGid)!
50
+ if (cur.length !== 1 || nxt.length !== 1 || nxt[0]! !== cur[0]! + 1) break
51
+ j++
52
+ }
53
+ if (j > i) {
54
+ ranges.push({ start: gid, end: gids[j]!, cp: cps[0]! })
55
+ i = j + 1
56
+ continue
57
+ }
58
+ }
59
+ chars.push({ gid, cps })
60
+ i++
61
+ }
62
+
63
+ let body = ''
64
+ for (let k = 0; k < chars.length; k += 100) {
65
+ const slice = chars.slice(k, k + 100)
66
+ body += `${slice.length} beginbfchar\n`
67
+ for (const { gid, cps } of slice) {
68
+ body += `<${hex4(gid)}> <${cps.map(utf16beHex).join('')}>\n`
69
+ }
70
+ body += 'endbfchar\n'
71
+ }
72
+ for (let k = 0; k < ranges.length; k += 100) {
73
+ const slice = ranges.slice(k, k + 100)
74
+ body += `${slice.length} beginbfrange\n`
75
+ for (const { start, end, cp } of slice) {
76
+ body += `<${hex4(start)}> <${hex4(end)}> <${utf16beHex(cp)}>\n`
77
+ }
78
+ body += 'endbfrange\n'
79
+ }
80
+
81
+ return HEADER + body + FOOTER
82
+ }