@strav/pdf 0.4.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -0
- package/package.json +51 -0
- package/src/color/cie.ts +61 -0
- package/src/color/color.ts +77 -0
- package/src/color/conversion.ts +26 -0
- package/src/color/device.ts +37 -0
- package/src/color/devicen.ts +74 -0
- package/src/color/icc.ts +103 -0
- package/src/color/index.ts +15 -0
- package/src/color/separation.ts +94 -0
- package/src/color/space.ts +47 -0
- package/src/content/content_stream.ts +373 -0
- package/src/content/graphics_state.ts +64 -0
- package/src/content/index.ts +16 -0
- package/src/content/operators.ts +70 -0
- package/src/content/path.ts +51 -0
- package/src/content/resources.ts +119 -0
- package/src/content/text_object.ts +140 -0
- package/src/document/catalog.ts +16 -0
- package/src/document/index.ts +13 -0
- package/src/document/object_table.ts +67 -0
- package/src/document/page.ts +74 -0
- package/src/document/page_tree.ts +78 -0
- package/src/document/pdf_document.ts +310 -0
- package/src/document/types.ts +65 -0
- package/src/document/xref.ts +68 -0
- package/src/ext-gstate/ext_gstate.ts +69 -0
- package/src/ext-gstate/index.ts +2 -0
- package/src/fonts/cff.ts +123 -0
- package/src/fonts/cid_encoding.ts +45 -0
- package/src/fonts/cmap_table.ts +180 -0
- package/src/fonts/font.ts +342 -0
- package/src/fonts/glyf.ts +59 -0
- package/src/fonts/hmtx.ts +21 -0
- package/src/fonts/index.ts +20 -0
- package/src/fonts/name_table.ts +50 -0
- package/src/fonts/os2.ts +41 -0
- package/src/fonts/sfnt.ts +224 -0
- package/src/fonts/standard_14.ts +132 -0
- package/src/fonts/subset.ts +221 -0
- package/src/fonts/to_unicode.ts +82 -0
- package/src/fonts/win_ansi.ts +69 -0
- package/src/images/image.ts +111 -0
- package/src/images/index.ts +6 -0
- package/src/images/jpeg.ts +103 -0
- package/src/images/png.ts +239 -0
- package/src/images/smask.ts +24 -0
- package/src/index.ts +57 -0
- package/src/metadata/index.ts +3 -0
- package/src/metadata/info_dict.ts +28 -0
- package/src/metadata/xmp.ts +110 -0
- package/src/objects/encode.ts +77 -0
- package/src/objects/index.ts +43 -0
- package/src/objects/indirect_ref.ts +17 -0
- package/src/objects/name.ts +50 -0
- package/src/objects/number.ts +43 -0
- package/src/objects/string.ts +136 -0
- package/src/objects/types.ts +86 -0
- package/src/output/buffer_sink.ts +40 -0
- package/src/output/byte_sink.ts +12 -0
- package/src/output/index.ts +3 -0
- package/src/output/stream_sink.ts +62 -0
- package/src/patterns/index.ts +10 -0
- package/src/patterns/shading.ts +162 -0
- package/src/patterns/tiling_pattern.ts +68 -0
- package/src/standards/context.ts +10 -0
- package/src/standards/index.ts +23 -0
- package/src/standards/pdf_a.ts +23 -0
- package/src/standards/pdf_x.ts +31 -0
- package/src/streams/ascii85.ts +61 -0
- package/src/streams/ascii_hex.ts +33 -0
- package/src/streams/flate.ts +17 -0
- package/src/streams/index.ts +9 -0
- package/src/streams/stream.ts +66 -0
- package/src/util/ascii.ts +63 -0
- package/src/util/binary.ts +71 -0
- package/src/util/errors.ts +61 -0
- package/src/util/index.ts +10 -0
- package/src/util/units.ts +24 -0
- package/tsconfig.json +5 -0
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* XMP metadata packet (spec §14.2). The authoritative metadata in PDF 1.7+
|
|
3
|
+
* and required for PDF/A-2b and PDF/X-4. Emitted as an **uncompressed**
|
|
4
|
+
* `/Metadata` stream (PDF/A forensic-readability requirement); the document
|
|
5
|
+
* Info dictionary carries the same values.
|
|
6
|
+
*
|
|
7
|
+
* `bfrange`-free, deterministic: with a fixed creation date the bytes are
|
|
8
|
+
* byte-identical across runs (the xpacket id is a fixed constant).
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { utf8 } from '../util/ascii.ts'
|
|
12
|
+
import { name } from '../objects/types.ts'
|
|
13
|
+
import type { PdfStream } from '../objects/types.ts'
|
|
14
|
+
import { makeStream } from '../streams/stream.ts'
|
|
15
|
+
import type { DocumentInfo } from '../document/types.ts'
|
|
16
|
+
import type { ConformanceLevel } from '../document/types.ts'
|
|
17
|
+
|
|
18
|
+
const XPACKET_ID = 'W5M0MpCehiHzreSzNTczkc9d'
|
|
19
|
+
|
|
20
|
+
function esc(s: string): string {
|
|
21
|
+
return s
|
|
22
|
+
.replace(/&/g, '&')
|
|
23
|
+
.replace(/</g, '<')
|
|
24
|
+
.replace(/>/g, '>')
|
|
25
|
+
.replace(/"/g, '"')
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** ISO 8601 UTC, e.g. `2026-05-18T09:30:00+00:00`. */
|
|
29
|
+
function isoDate(d: Date): string {
|
|
30
|
+
const p = (n: number, w = 2) => String(n).padStart(w, '0')
|
|
31
|
+
return (
|
|
32
|
+
`${d.getUTCFullYear()}-${p(d.getUTCMonth() + 1)}-${p(d.getUTCDate())}` +
|
|
33
|
+
`T${p(d.getUTCHours())}:${p(d.getUTCMinutes())}:${p(d.getUTCSeconds())}+00:00`
|
|
34
|
+
)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface XmpOptions {
|
|
38
|
+
info: DocumentInfo
|
|
39
|
+
creationDate: Date
|
|
40
|
+
producer: string
|
|
41
|
+
conformance: ConformanceLevel
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function buildXmpPacket(o: XmpOptions): string {
|
|
45
|
+
const date = isoDate(o.creationDate)
|
|
46
|
+
const lines: string[] = []
|
|
47
|
+
const ns = [
|
|
48
|
+
'xmlns:dc="http://purl.org/dc/elements/1.1/"',
|
|
49
|
+
'xmlns:xmp="http://ns.adobe.com/xap/1.0/"',
|
|
50
|
+
'xmlns:pdf="http://ns.adobe.com/pdf/1.3/"',
|
|
51
|
+
]
|
|
52
|
+
if (o.conformance === 'PDF/A-2b') ns.push('xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/"')
|
|
53
|
+
if (o.conformance === 'PDF/X-4') ns.push('xmlns:pdfxid="http://www.npes.org/pdfx/ns/id/"')
|
|
54
|
+
|
|
55
|
+
if (o.info.title) {
|
|
56
|
+
lines.push(
|
|
57
|
+
`<dc:title><rdf:Alt><rdf:li xml:lang="x-default">${esc(o.info.title)}</rdf:li></rdf:Alt></dc:title>`
|
|
58
|
+
)
|
|
59
|
+
}
|
|
60
|
+
if (o.info.author) {
|
|
61
|
+
lines.push(
|
|
62
|
+
`<dc:creator><rdf:Seq><rdf:li>${esc(o.info.author)}</rdf:li></rdf:Seq></dc:creator>`
|
|
63
|
+
)
|
|
64
|
+
}
|
|
65
|
+
if (o.info.subject) {
|
|
66
|
+
lines.push(
|
|
67
|
+
`<dc:description><rdf:Alt><rdf:li xml:lang="x-default">${esc(o.info.subject)}</rdf:li></rdf:Alt></dc:description>`
|
|
68
|
+
)
|
|
69
|
+
}
|
|
70
|
+
if (o.info.keywords) {
|
|
71
|
+
const bag = o.info.keywords
|
|
72
|
+
.split(/[,;]\s*/)
|
|
73
|
+
.filter(Boolean)
|
|
74
|
+
.map(k => `<rdf:li>${esc(k)}</rdf:li>`)
|
|
75
|
+
.join('')
|
|
76
|
+
lines.push(`<dc:subject><rdf:Bag>${bag}</rdf:Bag></dc:subject>`)
|
|
77
|
+
lines.push(`<pdf:Keywords>${esc(o.info.keywords)}</pdf:Keywords>`)
|
|
78
|
+
}
|
|
79
|
+
lines.push('<dc:format>application/pdf</dc:format>')
|
|
80
|
+
lines.push(`<xmp:CreateDate>${date}</xmp:CreateDate>`)
|
|
81
|
+
lines.push(`<xmp:ModifyDate>${date}</xmp:ModifyDate>`)
|
|
82
|
+
lines.push(`<xmp:MetadataDate>${date}</xmp:MetadataDate>`)
|
|
83
|
+
if (o.info.creator) lines.push(`<xmp:CreatorTool>${esc(o.info.creator)}</xmp:CreatorTool>`)
|
|
84
|
+
lines.push(`<pdf:Producer>${esc(o.producer)}</pdf:Producer>`)
|
|
85
|
+
if (o.conformance === 'PDF/A-2b') {
|
|
86
|
+
lines.push('<pdfaid:part>2</pdfaid:part>')
|
|
87
|
+
lines.push('<pdfaid:conformance>B</pdfaid:conformance>')
|
|
88
|
+
}
|
|
89
|
+
if (o.conformance === 'PDF/X-4') {
|
|
90
|
+
lines.push('<pdfxid:GTS_PDFXVersion>PDF/X-4</pdfxid:GTS_PDFXVersion>')
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return (
|
|
94
|
+
`<?xpacket begin="" id="${XPACKET_ID}"?>\n` +
|
|
95
|
+
'<x:xmpmeta xmlns:x="adobe:ns:meta/">\n' +
|
|
96
|
+
'<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n' +
|
|
97
|
+
`<rdf:Description rdf:about="" ${ns.join(' ')}>\n` +
|
|
98
|
+
lines.map(l => ' ' + l).join('\n') +
|
|
99
|
+
'\n</rdf:Description>\n</rdf:RDF>\n</x:xmpmeta>\n' +
|
|
100
|
+
'<?xpacket end="w"?>'
|
|
101
|
+
)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** The `/Metadata` stream object — uncompressed, no filters (PDF/A). */
|
|
105
|
+
export function buildXmpStream(o: XmpOptions): PdfStream {
|
|
106
|
+
return makeStream(utf8(buildXmpPacket(o)), {
|
|
107
|
+
filter: 'none',
|
|
108
|
+
extra: { Type: name('Metadata'), Subtype: name('XML') },
|
|
109
|
+
})
|
|
110
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Object → bytes (spec §5.1).
|
|
3
|
+
*
|
|
4
|
+
* Pure and deterministic: the same {@link PdfObject} always encodes to the
|
|
5
|
+
* same bytes. Dictionary keys are emitted in `Map` insertion order.
|
|
6
|
+
*
|
|
7
|
+
* This encodes the *value* of an object. Wrapping a value in `N G obj … endobj`
|
|
8
|
+
* is the document serializer's job (document/xref.ts).
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { ascii, concatBytes, SPACE, LF } from '../util/ascii.ts'
|
|
12
|
+
import { PdfGenError } from '../util/errors.ts'
|
|
13
|
+
import type { PdfObject, PdfDictionary } from './types.ts'
|
|
14
|
+
import { formatNumber } from './number.ts'
|
|
15
|
+
import { encodeName } from './name.ts'
|
|
16
|
+
import { encodeLiteral, encodeHex } from './string.ts'
|
|
17
|
+
import { refToken } from './indirect_ref.ts'
|
|
18
|
+
|
|
19
|
+
const TRUE = ascii('true')
|
|
20
|
+
const FALSE = ascii('false')
|
|
21
|
+
const NULL = ascii('null')
|
|
22
|
+
const DICT_OPEN = ascii('<<')
|
|
23
|
+
const DICT_CLOSE = ascii('>>')
|
|
24
|
+
const STREAM_KW = ascii('stream\n')
|
|
25
|
+
const ENDSTREAM_KW = ascii('\nendstream')
|
|
26
|
+
|
|
27
|
+
export function encodeObject(o: PdfObject): Uint8Array {
|
|
28
|
+
switch (o.kind) {
|
|
29
|
+
case 'null':
|
|
30
|
+
return NULL
|
|
31
|
+
case 'bool':
|
|
32
|
+
return o.value ? TRUE : FALSE
|
|
33
|
+
case 'num':
|
|
34
|
+
return ascii(formatNumber(o.value))
|
|
35
|
+
case 'str':
|
|
36
|
+
return o.encoding === 'hex' ? encodeHex(o.value) : encodeLiteral(o.value)
|
|
37
|
+
case 'name':
|
|
38
|
+
return encodeName(o.value)
|
|
39
|
+
case 'ref':
|
|
40
|
+
return ascii(refToken(o))
|
|
41
|
+
case 'arr': {
|
|
42
|
+
const parts: Uint8Array[] = [Uint8Array.of(0x5b)] // [
|
|
43
|
+
for (let i = 0; i < o.items.length; i++) {
|
|
44
|
+
if (i > 0) parts.push(Uint8Array.of(SPACE))
|
|
45
|
+
parts.push(encodeObject(o.items[i]!))
|
|
46
|
+
}
|
|
47
|
+
parts.push(Uint8Array.of(0x5d)) // ]
|
|
48
|
+
return concatBytes(parts)
|
|
49
|
+
}
|
|
50
|
+
case 'dict':
|
|
51
|
+
return encodeDict(o)
|
|
52
|
+
case 'stream': {
|
|
53
|
+
// /Length must equal the post-filter data length (spec §5.1).
|
|
54
|
+
o.dict.entries.set('Length', { kind: 'num', value: o.data.length })
|
|
55
|
+
return concatBytes([
|
|
56
|
+
encodeDict(o.dict),
|
|
57
|
+
Uint8Array.of(LF),
|
|
58
|
+
STREAM_KW,
|
|
59
|
+
o.data,
|
|
60
|
+
ENDSTREAM_KW,
|
|
61
|
+
])
|
|
62
|
+
}
|
|
63
|
+
default: {
|
|
64
|
+
const _exhaustive: never = o
|
|
65
|
+
throw new PdfGenError('PDF_INVALID_OBJECT', `Unknown object: ${_exhaustive}`)
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function encodeDict(d: PdfDictionary): Uint8Array {
|
|
71
|
+
const parts: Uint8Array[] = [DICT_OPEN]
|
|
72
|
+
for (const [key, value] of d.entries) {
|
|
73
|
+
parts.push(encodeName(key), Uint8Array.of(SPACE), encodeObject(value), Uint8Array.of(SPACE))
|
|
74
|
+
}
|
|
75
|
+
parts.push(DICT_CLOSE)
|
|
76
|
+
return concatBytes(parts)
|
|
77
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
export type {
|
|
2
|
+
PdfObject,
|
|
3
|
+
PdfNull,
|
|
4
|
+
PdfBoolean,
|
|
5
|
+
PdfNumber,
|
|
6
|
+
PdfString,
|
|
7
|
+
PdfName,
|
|
8
|
+
PdfArray,
|
|
9
|
+
PdfDictionary,
|
|
10
|
+
PdfStream,
|
|
11
|
+
IndirectRef,
|
|
12
|
+
} from './types.ts'
|
|
13
|
+
export {
|
|
14
|
+
NULL,
|
|
15
|
+
bool,
|
|
16
|
+
num,
|
|
17
|
+
name,
|
|
18
|
+
arr,
|
|
19
|
+
dict,
|
|
20
|
+
ref,
|
|
21
|
+
dictSet,
|
|
22
|
+
isNull,
|
|
23
|
+
isBool,
|
|
24
|
+
isNum,
|
|
25
|
+
isStr,
|
|
26
|
+
isName,
|
|
27
|
+
isArr,
|
|
28
|
+
isDict,
|
|
29
|
+
isStream,
|
|
30
|
+
isRef,
|
|
31
|
+
} from './types.ts'
|
|
32
|
+
export { refToken, objHeader } from './indirect_ref.ts'
|
|
33
|
+
export { formatNumber } from './number.ts'
|
|
34
|
+
export { encodeName } from './name.ts'
|
|
35
|
+
export {
|
|
36
|
+
literalBytes,
|
|
37
|
+
hexBytes,
|
|
38
|
+
textString,
|
|
39
|
+
dateString,
|
|
40
|
+
encodeLiteral,
|
|
41
|
+
encodeHex,
|
|
42
|
+
} from './string.ts'
|
|
43
|
+
export { encodeObject } from './encode.ts'
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Indirect reference helpers (spec §5.2). The {@link IndirectRef} type itself
|
|
3
|
+
* lives in objects/types.ts; this module holds formatting used by both the
|
|
4
|
+
* object encoder and the xref writer.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { IndirectRef } from './types.ts'
|
|
8
|
+
|
|
9
|
+
/** `"<num> <gen> R"` — the in-body reference token. */
|
|
10
|
+
export function refToken(r: IndirectRef): string {
|
|
11
|
+
return `${r.num} ${r.gen} R`
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/** `"<num> <gen> obj"` — the indirect object definition header. */
|
|
15
|
+
export function objHeader(num: number, gen: number): string {
|
|
16
|
+
return `${num} ${gen} obj`
|
|
17
|
+
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Name token escaping (spec §5.1).
|
|
3
|
+
*
|
|
4
|
+
* A name is written as `/` followed by its characters. Any byte outside the
|
|
5
|
+
* regular range `! (0x21) … ~ (0x7e)`, plus the delimiters and `#` itself,
|
|
6
|
+
* is written as `#XX` (two uppercase hex digits).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { ascii, concatBytes } from '../util/ascii.ts'
|
|
10
|
+
|
|
11
|
+
const HEX = '0123456789ABCDEF'
|
|
12
|
+
|
|
13
|
+
function needsEscape(b: number): boolean {
|
|
14
|
+
if (b < 0x21 || b > 0x7e) return true
|
|
15
|
+
switch (b) {
|
|
16
|
+
case 0x23: // #
|
|
17
|
+
case 0x28: // (
|
|
18
|
+
case 0x29: // )
|
|
19
|
+
case 0x3c: // <
|
|
20
|
+
case 0x3e: // >
|
|
21
|
+
case 0x5b: // [
|
|
22
|
+
case 0x5d: // ]
|
|
23
|
+
case 0x7b: // {
|
|
24
|
+
case 0x7d: // }
|
|
25
|
+
case 0x2f: // /
|
|
26
|
+
case 0x25: // %
|
|
27
|
+
return true
|
|
28
|
+
default:
|
|
29
|
+
return false
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Encode a name's value (without the leading `/`) to escaped bytes. */
|
|
34
|
+
export function encodeName(value: string): Uint8Array {
|
|
35
|
+
const src = ascii(value)
|
|
36
|
+
const parts: Uint8Array[] = []
|
|
37
|
+
let runStart = 0
|
|
38
|
+
|
|
39
|
+
for (let i = 0; i < src.length; i++) {
|
|
40
|
+
const b = src[i]!
|
|
41
|
+
if (needsEscape(b)) {
|
|
42
|
+
if (i > runStart) parts.push(src.subarray(runStart, i))
|
|
43
|
+
parts.push(Uint8Array.from([0x23, HEX.charCodeAt(b >> 4), HEX.charCodeAt(b & 0x0f)]))
|
|
44
|
+
runStart = i + 1
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
if (runStart < src.length) parts.push(src.subarray(runStart))
|
|
48
|
+
|
|
49
|
+
return concatBytes([Uint8Array.from([0x2f]), ...parts]) // leading '/'
|
|
50
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF number formatting (spec §5.1).
|
|
3
|
+
*
|
|
4
|
+
* - Integers serialized without a decimal point.
|
|
5
|
+
* - Reals serialized with up to 6 decimal places; trailing zeros stripped;
|
|
6
|
+
* never in exponential notation.
|
|
7
|
+
* - Infinity, NaN, and -0 throw at the boundary.
|
|
8
|
+
*
|
|
9
|
+
* The same formatter is used for object encoding AND content-stream operands
|
|
10
|
+
* (spec §8.5) so numeric output is byte-identical everywhere.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { PdfGenError } from '../util/errors.ts'
|
|
14
|
+
|
|
15
|
+
const MAX_DECIMALS = 6
|
|
16
|
+
|
|
17
|
+
export function formatNumber(value: number): string {
|
|
18
|
+
if (!Number.isFinite(value)) {
|
|
19
|
+
throw new PdfGenError(
|
|
20
|
+
'PDF_INVALID_NUMBER',
|
|
21
|
+
`Cannot serialize non-finite number: ${value}`
|
|
22
|
+
)
|
|
23
|
+
}
|
|
24
|
+
// Reject negative zero explicitly: Object.is distinguishes -0 from 0.
|
|
25
|
+
if (Object.is(value, -0)) {
|
|
26
|
+
throw new PdfGenError('PDF_INVALID_NUMBER', 'Cannot serialize negative zero')
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (Number.isInteger(value)) {
|
|
30
|
+
return String(value)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// toFixed avoids exponential notation for the magnitudes PDF uses.
|
|
34
|
+
let s = value.toFixed(MAX_DECIMALS)
|
|
35
|
+
|
|
36
|
+
// Strip trailing zeros, then a trailing decimal point if all decimals went.
|
|
37
|
+
s = s.replace(/(\.\d*?)0+$/, '$1').replace(/\.$/, '')
|
|
38
|
+
|
|
39
|
+
// Rounding at 6 dp can yield "-0" or "-0.000000" → "-0"; normalize.
|
|
40
|
+
if (s === '-0') return '0'
|
|
41
|
+
|
|
42
|
+
return s
|
|
43
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF strings (spec §5.1).
|
|
3
|
+
*
|
|
4
|
+
* A {@link PdfString} stores raw bytes plus a serialization hint. This module
|
|
5
|
+
* provides:
|
|
6
|
+
* - constructors that turn JS strings / dates into the right byte sequence
|
|
7
|
+
* (UTF-16BE with BOM by default for text strings, PDFDocEncoding opt-in);
|
|
8
|
+
* - the literal and hex serializers used by objects/encode.ts.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { PdfGenError } from '../util/errors.ts'
|
|
12
|
+
import { ascii } from '../util/ascii.ts'
|
|
13
|
+
import type { PdfString } from './types.ts'
|
|
14
|
+
|
|
15
|
+
// ── Constructors ──────────────────────────────────────────────────────────
|
|
16
|
+
|
|
17
|
+
/** A string from raw bytes, serialized as a literal `( )` string. */
|
|
18
|
+
export function literalBytes(value: Uint8Array): PdfString {
|
|
19
|
+
return { kind: 'str', value, encoding: 'literal' }
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** A string from raw bytes, serialized as a hex `< >` string. */
|
|
23
|
+
export function hexBytes(value: Uint8Array): PdfString {
|
|
24
|
+
return { kind: 'str', value, encoding: 'hex' }
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* A human-readable "text string". Defaults to UTF-16BE with a leading
|
|
29
|
+
* `\xFE\xFF` BOM (PDF's text-string convention). With `encoding:'pdfdoc'`,
|
|
30
|
+
* characters must be representable in a single byte (Latin-1 subset of
|
|
31
|
+
* PDFDocEncoding) and are written verbatim.
|
|
32
|
+
*/
|
|
33
|
+
export function textString(
|
|
34
|
+
s: string,
|
|
35
|
+
opts: { encoding?: 'utf16be' | 'pdfdoc' } = {}
|
|
36
|
+
): PdfString {
|
|
37
|
+
if (opts.encoding === 'pdfdoc') {
|
|
38
|
+
const out = new Uint8Array(s.length)
|
|
39
|
+
for (let i = 0; i < s.length; i++) {
|
|
40
|
+
const c = s.charCodeAt(i)
|
|
41
|
+
if (c > 0xff) {
|
|
42
|
+
throw new PdfGenError(
|
|
43
|
+
'PDF_INVALID_STRING',
|
|
44
|
+
`Character U+${c.toString(16)} not representable in PDFDocEncoding; use UTF-16BE`
|
|
45
|
+
)
|
|
46
|
+
}
|
|
47
|
+
out[i] = c
|
|
48
|
+
}
|
|
49
|
+
return { kind: 'str', value: out, encoding: 'literal' }
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// UTF-16BE with BOM. Iterate code points so astral chars become surrogate
|
|
53
|
+
// pairs (two 16-bit units), which is the correct UTF-16 representation.
|
|
54
|
+
const units: number[] = [0xfe, 0xff]
|
|
55
|
+
for (let i = 0; i < s.length; i++) {
|
|
56
|
+
const code = s.charCodeAt(i)
|
|
57
|
+
units.push(code >> 8, code & 0xff)
|
|
58
|
+
}
|
|
59
|
+
return { kind: 'str', value: Uint8Array.from(units), encoding: 'hex' }
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* A PDF date string: `D:YYYYMMDDHHmmSSOHH'mm'` (ISO 32000-1 §7.9.4).
|
|
64
|
+
* Always UTC (`Z`-equivalent rendered as `+00'00'`) for determinism.
|
|
65
|
+
*/
|
|
66
|
+
export function dateString(date: Date): PdfString {
|
|
67
|
+
const p = (n: number, w = 2) => String(n).padStart(w, '0')
|
|
68
|
+
const s =
|
|
69
|
+
`D:${date.getUTCFullYear()}${p(date.getUTCMonth() + 1)}${p(date.getUTCDate())}` +
|
|
70
|
+
`${p(date.getUTCHours())}${p(date.getUTCMinutes())}${p(date.getUTCSeconds())}+00'00'`
|
|
71
|
+
return { kind: 'str', value: ascii(s), encoding: 'literal' }
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ── Serializers ───────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
const HEX = '0123456789ABCDEF'
|
|
77
|
+
|
|
78
|
+
/** Serialize bytes as a literal string `( ... )` with the minimal escaping. */
|
|
79
|
+
export function encodeLiteral(value: Uint8Array): Uint8Array {
|
|
80
|
+
const out: number[] = [0x28] // (
|
|
81
|
+
for (const b of value) {
|
|
82
|
+
switch (b) {
|
|
83
|
+
case 0x5c: // backslash
|
|
84
|
+
out.push(0x5c, 0x5c)
|
|
85
|
+
break
|
|
86
|
+
case 0x28: // (
|
|
87
|
+
out.push(0x5c, 0x28)
|
|
88
|
+
break
|
|
89
|
+
case 0x29: // )
|
|
90
|
+
out.push(0x5c, 0x29)
|
|
91
|
+
break
|
|
92
|
+
case 0x0a: // \n
|
|
93
|
+
out.push(0x5c, 0x6e)
|
|
94
|
+
break
|
|
95
|
+
case 0x0d: // \r
|
|
96
|
+
out.push(0x5c, 0x72)
|
|
97
|
+
break
|
|
98
|
+
case 0x09: // \t
|
|
99
|
+
out.push(0x5c, 0x74)
|
|
100
|
+
break
|
|
101
|
+
case 0x08: // \b
|
|
102
|
+
out.push(0x5c, 0x62)
|
|
103
|
+
break
|
|
104
|
+
case 0x0c: // \f
|
|
105
|
+
out.push(0x5c, 0x66)
|
|
106
|
+
break
|
|
107
|
+
default:
|
|
108
|
+
if (b < 0x20 || b > 0x7e) {
|
|
109
|
+
// Octal escape \ddd for non-printable / high bytes.
|
|
110
|
+
out.push(
|
|
111
|
+
0x5c,
|
|
112
|
+
0x30 + ((b >> 6) & 0x7),
|
|
113
|
+
0x30 + ((b >> 3) & 0x7),
|
|
114
|
+
0x30 + (b & 0x7)
|
|
115
|
+
)
|
|
116
|
+
} else {
|
|
117
|
+
out.push(b)
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
out.push(0x29) // )
|
|
122
|
+
return Uint8Array.from(out)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Serialize bytes as a hex string `<...>`. */
|
|
126
|
+
export function encodeHex(value: Uint8Array): Uint8Array {
|
|
127
|
+
const out = new Uint8Array(value.length * 2 + 2)
|
|
128
|
+
out[0] = 0x3c // <
|
|
129
|
+
let i = 1
|
|
130
|
+
for (const b of value) {
|
|
131
|
+
out[i++] = HEX.charCodeAt(b >> 4)
|
|
132
|
+
out[i++] = HEX.charCodeAt(b & 0x0f)
|
|
133
|
+
}
|
|
134
|
+
out[i] = 0x3e // >
|
|
135
|
+
return out
|
|
136
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The PDF object model (spec §5). The seven PDF object types plus the
|
|
3
|
+
* indirect reference, modelled as a discriminated union on `kind`.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export type PdfObject =
|
|
7
|
+
| PdfNull
|
|
8
|
+
| PdfBoolean
|
|
9
|
+
| PdfNumber
|
|
10
|
+
| PdfString
|
|
11
|
+
| PdfName
|
|
12
|
+
| PdfArray
|
|
13
|
+
| PdfDictionary
|
|
14
|
+
| PdfStream
|
|
15
|
+
| IndirectRef
|
|
16
|
+
|
|
17
|
+
export type PdfNull = { kind: 'null' }
|
|
18
|
+
export type PdfBoolean = { kind: 'bool'; value: boolean }
|
|
19
|
+
/** Integer or real. Serialization decides formatting (objects/number.ts). */
|
|
20
|
+
export type PdfNumber = { kind: 'num'; value: number }
|
|
21
|
+
export type PdfString = {
|
|
22
|
+
kind: 'str'
|
|
23
|
+
/** Raw bytes — already in the final on-disk byte sequence. */
|
|
24
|
+
value: Uint8Array
|
|
25
|
+
/** Serialization hint: literal `( )` or hex `< >`. */
|
|
26
|
+
encoding: 'literal' | 'hex'
|
|
27
|
+
}
|
|
28
|
+
/** Unescaped name value (no leading slash, decoded #XX). */
|
|
29
|
+
export type PdfName = { kind: 'name'; value: string }
|
|
30
|
+
export type PdfArray = { kind: 'arr'; items: PdfObject[] }
|
|
31
|
+
export type PdfDictionary = { kind: 'dict'; entries: Map<string, PdfObject> }
|
|
32
|
+
export type PdfStream = {
|
|
33
|
+
kind: 'stream'
|
|
34
|
+
/** Includes /Length and /Filter. */
|
|
35
|
+
dict: PdfDictionary
|
|
36
|
+
/** Already-filtered bytes (final on-disk data). */
|
|
37
|
+
data: Uint8Array
|
|
38
|
+
}
|
|
39
|
+
export type IndirectRef = { kind: 'ref'; num: number; gen: number }
|
|
40
|
+
|
|
41
|
+
// ── Constructors ──────────────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
export const NULL: PdfNull = { kind: 'null' }
|
|
44
|
+
|
|
45
|
+
export function bool(value: boolean): PdfBoolean {
|
|
46
|
+
return { kind: 'bool', value }
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function num(value: number): PdfNumber {
|
|
50
|
+
return { kind: 'num', value }
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function name(value: string): PdfName {
|
|
54
|
+
return { kind: 'name', value }
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function arr(items: PdfObject[] = []): PdfArray {
|
|
58
|
+
return { kind: 'arr', items }
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Build a dictionary from an object literal (insertion order preserved). */
|
|
62
|
+
export function dict(entries: Record<string, PdfObject> = {}): PdfDictionary {
|
|
63
|
+
return { kind: 'dict', entries: new Map(Object.entries(entries)) }
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function ref(num: number, gen = 0): IndirectRef {
|
|
67
|
+
return { kind: 'ref', num, gen }
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ── Type guards ───────────────────────────────────────────────────────────
|
|
71
|
+
|
|
72
|
+
export const isNull = (o: PdfObject): o is PdfNull => o.kind === 'null'
|
|
73
|
+
export const isBool = (o: PdfObject): o is PdfBoolean => o.kind === 'bool'
|
|
74
|
+
export const isNum = (o: PdfObject): o is PdfNumber => o.kind === 'num'
|
|
75
|
+
export const isStr = (o: PdfObject): o is PdfString => o.kind === 'str'
|
|
76
|
+
export const isName = (o: PdfObject): o is PdfName => o.kind === 'name'
|
|
77
|
+
export const isArr = (o: PdfObject): o is PdfArray => o.kind === 'arr'
|
|
78
|
+
export const isDict = (o: PdfObject): o is PdfDictionary => o.kind === 'dict'
|
|
79
|
+
export const isStream = (o: PdfObject): o is PdfStream => o.kind === 'stream'
|
|
80
|
+
export const isRef = (o: PdfObject): o is IndirectRef => o.kind === 'ref'
|
|
81
|
+
|
|
82
|
+
/** Set a dictionary entry, returning the dictionary for chaining. */
|
|
83
|
+
export function dictSet(d: PdfDictionary, key: string, value: PdfObject): PdfDictionary {
|
|
84
|
+
d.entries.set(key, value)
|
|
85
|
+
return d
|
|
86
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A {@link ByteSink} that accumulates everything into a single Uint8Array,
|
|
3
|
+
* returned by `PdfDocument.save()` (spec §3.3).
|
|
4
|
+
*
|
|
5
|
+
* Uses a geometrically-growing backing buffer to keep appends amortized O(1)
|
|
6
|
+
* and peak memory bounded (spec §18.6).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { ByteSink } from './byte_sink.ts'
|
|
10
|
+
|
|
11
|
+
export class BufferSink implements ByteSink {
|
|
12
|
+
private buf: Uint8Array
|
|
13
|
+
private len = 0
|
|
14
|
+
|
|
15
|
+
constructor(initialCapacity = 64 * 1024) {
|
|
16
|
+
this.buf = new Uint8Array(initialCapacity)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
get length(): number {
|
|
20
|
+
return this.len
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
write(bytes: Uint8Array): void {
|
|
24
|
+
const needed = this.len + bytes.length
|
|
25
|
+
if (needed > this.buf.length) {
|
|
26
|
+
let cap = this.buf.length * 2
|
|
27
|
+
while (cap < needed) cap *= 2
|
|
28
|
+
const next = new Uint8Array(cap)
|
|
29
|
+
next.set(this.buf.subarray(0, this.len))
|
|
30
|
+
this.buf = next
|
|
31
|
+
}
|
|
32
|
+
this.buf.set(bytes, this.len)
|
|
33
|
+
this.len = needed
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Return the exact written bytes (a copy sized to `length`). */
|
|
37
|
+
toBytes(): Uint8Array {
|
|
38
|
+
return this.buf.slice(0, this.len)
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The serialize pass writes through a {@link ByteSink} (spec §3.3). This keeps
|
|
3
|
+
* the document serializer agnostic to whether output is buffered in memory or
|
|
4
|
+
* streamed to a Writable. M1–M3 ship the buffer sink; StreamSink is M12.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export interface ByteSink {
|
|
8
|
+
/** Append bytes. Implementations must not retain the passed array. */
|
|
9
|
+
write(bytes: Uint8Array): void
|
|
10
|
+
/** Total number of bytes written so far (used for xref offsets). */
|
|
11
|
+
readonly length: number
|
|
12
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A {@link ByteSink} that streams to a Node `Writable` (spec §3.3) — for
|
|
3
|
+
* HTTP responses or files without buffering the whole PDF in memory.
|
|
4
|
+
*
|
|
5
|
+
* The serialize pass calls `write()` synchronously many times; chunks are
|
|
6
|
+
* handed straight to the stream (Node applies its own backpressure buffering).
|
|
7
|
+
* `done()` ends the stream and resolves once it has fully flushed; a stream
|
|
8
|
+
* error at any point (including during the synchronous write burst, which
|
|
9
|
+
* surfaces asynchronously) makes `done()` reject.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { Writable } from 'node:stream'
|
|
13
|
+
import type { ByteSink } from './byte_sink.ts'
|
|
14
|
+
|
|
15
|
+
export class StreamSink implements ByteSink {
|
|
16
|
+
private len = 0
|
|
17
|
+
private err?: Error
|
|
18
|
+
|
|
19
|
+
constructor(private readonly out: Writable) {
|
|
20
|
+
// Capture errors that occur during the write burst, before done().
|
|
21
|
+
out.on('error', e => {
|
|
22
|
+
this.err ??= e as Error
|
|
23
|
+
})
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
get length(): number {
|
|
27
|
+
return this.len
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
write(bytes: Uint8Array): void {
|
|
31
|
+
if (this.err) return
|
|
32
|
+
try {
|
|
33
|
+
// Copy: the serializer may reuse buffers; Node keeps the chunk async.
|
|
34
|
+
// The per-write callback reports failures even when no 'error' event
|
|
35
|
+
// is emitted (some runtimes don't emit one for a failed write).
|
|
36
|
+
this.out.write(Buffer.from(bytes), e => {
|
|
37
|
+
if (e) this.err ??= e
|
|
38
|
+
})
|
|
39
|
+
} catch (e) {
|
|
40
|
+
this.err ??= e as Error
|
|
41
|
+
}
|
|
42
|
+
this.len += bytes.length
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** End the stream; resolves on flush, rejects on any stream error. */
|
|
46
|
+
done(): Promise<void> {
|
|
47
|
+
return new Promise<void>((resolve, reject) => {
|
|
48
|
+
if (this.err) return reject(this.err)
|
|
49
|
+
const onError = (e?: Error) => {
|
|
50
|
+
this.out.off('finish', onFinish)
|
|
51
|
+
reject(this.err ?? e ?? new Error('stream error'))
|
|
52
|
+
}
|
|
53
|
+
const onFinish = () => {
|
|
54
|
+
this.out.off('error', onError)
|
|
55
|
+
this.err ? reject(this.err) : resolve()
|
|
56
|
+
}
|
|
57
|
+
this.out.once('error', onError)
|
|
58
|
+
this.out.once('finish', onFinish)
|
|
59
|
+
this.out.end()
|
|
60
|
+
})
|
|
61
|
+
}
|
|
62
|
+
}
|