@file-type/pdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE.txt ADDED
@@ -0,0 +1,9 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright © 2025 Borewit
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,50 @@
1
+ [![NPM version](https://img.shields.io/npm/v/@file-type/pdf.svg)](https://npmjs.org/package/@file-type/pdf)
2
+ [![Node.js CI](https://github.com/Borewit/file-type-pdf/actions/workflows/nodejs-ci.yml/badge.svg)](https://github.com/Borewit/file-type-pdf/actions/workflows/nodejs-ci.yml)
3
+ [![npm downloads](http://img.shields.io/npm/dm/@file-type/pdf.svg)](https://npmcharts.com/compare/@file-type/pdf?start=365)
4
+
5
+ # @file-type/pdf
6
+
7
+ Detector plugin for [file-type](https://github.com/sindresorhus/file-type) that identifies
8
+ [PDF (Portable Document Format)](https://en.wikipedia.org/wiki/PDF) files and selected PDF-based subtypes.
9
+
10
+ This plugin goes beyond simple magic-number detection and can inspect the internal PDF
11
+ structure to distinguish between generic PDF files and specific producer formats such as
12
+ **Adobe Illustrator (.ai)**.
13
+
14
+ The detector is designed to be:
15
+ - tokenizer-based (using `strtok3`)
16
+ - streaming-friendly
17
+ - safe to chain with other `file-type` detectors
18
+ - compatible with both Node.js and browser environments
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ npm install @file-type/pdf
24
+ ```
25
+
26
+ ## Usage
27
+
28
+ The following example shows how to add the PDF detector to [file-type](https://github.com/sindresorhus/file-type):
29
+
30
+ ```js
31
+ import { FileTypeParser } from 'file-type';
32
+ import { detectPdf } from '@file-type/pdf';
33
+
34
+ const parser = new FileTypeParser({
35
+ customDetectors: [detectPdf],
36
+ });
37
+
38
+ const fileType = await parser.fromFile('example.pdf');
39
+ console.log(fileType);
40
+ ```
41
+
42
+ ## Supported file formats
43
+
44
+ - `.ai` / `application/illustrator`: Adobe Illustrator
45
+ - `.pdf` / `application/pdf`: Generic Portable Document Format files
46
+
47
+ ## License
48
+
49
+ This project is licensed under the [MIT License](LICENSE.txt).
50
+ Feel free to use, modify, and distribute it as needed.
@@ -0,0 +1,36 @@
1
+ import type { ITokenizer } from "strtok3";
2
+ export type PdfTokenizerReaderOptions = {
3
+ chunkSize?: number;
4
+ debug?: boolean;
5
+ };
6
+ export declare class PdfTokenizerReader {
7
+ private tokenizer;
8
+ private buf;
9
+ private pos;
10
+ private chunkSize;
11
+ private eof;
12
+ private debug;
13
+ constructor(tokenizer: ITokenizer, opts?: PdfTokenizerReaderOptions);
14
+ private log;
15
+ /**
16
+ * Logical file position of the next byte that will be consumed by the reader.
17
+ */
18
+ getPosition(): number;
19
+ private peekMayBeLess;
20
+ private readMayBeLess;
21
+ private compactBuffer;
22
+ private fill;
23
+ /**
24
+ * Reads a line terminated by '\n' (supports '\r\n').
25
+ * Returns the line (latin1) without line ending, or null at EOF.
26
+ */
27
+ readLine(): Promise<string | null>;
28
+ /**
29
+ * Reads exactly n bytes, or returns null if EOF occurs before n bytes are available.
30
+ */
31
+ readBytes(n: number): Promise<Buffer | null>;
32
+ /**
33
+ * Consume exactly one EOL after the 'stream' keyword if present.
34
+ */
35
+ consumeStreamEol(): Promise<void>;
36
+ }
@@ -0,0 +1,145 @@
1
+ export class PdfTokenizerReader {
2
+ constructor(tokenizer, opts = {}) {
3
+ this.buf = Buffer.alloc(0);
4
+ this.pos = 0;
5
+ this.eof = false;
6
+ this.tokenizer = tokenizer;
7
+ this.chunkSize = opts.chunkSize ?? 64 * 1024;
8
+ this.debug = !!opts.debug;
9
+ }
10
+ log(msg) {
11
+ if (this.debug)
12
+ console.log(msg);
13
+ }
14
+ /**
15
+ * Logical file position of the next byte that will be consumed by the reader.
16
+ */
17
+ getPosition() {
18
+ const bufferedRemaining = this.buf.length - this.pos;
19
+ return this.tokenizer.position - bufferedRemaining;
20
+ }
21
+ async peekMayBeLess(target, length) {
22
+ const opts = { length, mayBeLess: true };
23
+ try {
24
+ return await this.tokenizer.peekBuffer(target, opts);
25
+ }
26
+ catch (e) {
27
+ if (isEndOfStreamError(e))
28
+ return 0;
29
+ throw e;
30
+ }
31
+ }
32
+ async readMayBeLess(target, length) {
33
+ const opts = { length, mayBeLess: true };
34
+ try {
35
+ return await this.tokenizer.readBuffer(target, opts);
36
+ }
37
+ catch (e) {
38
+ if (isEndOfStreamError(e))
39
+ return 0;
40
+ throw e;
41
+ }
42
+ }
43
+ compactBuffer() {
44
+ if (this.pos > 0) {
45
+ this.buf = this.buf.subarray(this.pos);
46
+ this.pos = 0;
47
+ }
48
+ }
49
+ async fill(minBytes = 1) {
50
+ if (this.eof)
51
+ return;
52
+ while (!this.eof && (this.buf.length - this.pos) < minBytes) {
53
+ this.compactBuffer();
54
+ // Peek first, then read exactly what we peeked
55
+ const peekBuf = Buffer.alloc(this.chunkSize);
56
+ const peeked = await this.peekMayBeLess(peekBuf, peekBuf.length);
57
+ if (!peeked) {
58
+ this.eof = true;
59
+ this.log(`[READER] EOF @${this.getPosition()} (peekBuffer returned 0)`);
60
+ break;
61
+ }
62
+ const readBuf = Buffer.alloc(peeked);
63
+ const read = await this.readMayBeLess(readBuf, readBuf.length);
64
+ if (!read) {
65
+ this.eof = true;
66
+ this.log(`[READER] EOF @${this.getPosition()} (readBuffer returned 0)`);
67
+ break;
68
+ }
69
+ const slice = readBuf.subarray(0, read);
70
+ this.buf = this.buf.length ? Buffer.concat([this.buf, slice]) : slice;
71
+ }
72
+ }
73
+ /**
74
+ * Reads a line terminated by '\n' (supports '\r\n').
75
+ * Returns the line (latin1) without line ending, or null at EOF.
76
+ */
77
+ async readLine() {
78
+ while (true) {
79
+ const idx = this.buf.indexOf(0x0a, this.pos); // '\n'
80
+ if (idx !== -1) {
81
+ let lineBuf = this.buf.subarray(this.pos, idx);
82
+ if (lineBuf.length && lineBuf[lineBuf.length - 1] === 0x0d) {
83
+ lineBuf = lineBuf.subarray(0, lineBuf.length - 1); // drop '\r'
84
+ }
85
+ this.pos = idx + 1;
86
+ return lineBuf.toString("latin1");
87
+ }
88
+ const before = this.buf.length - this.pos;
89
+ await this.fill(before + 1);
90
+ const after = this.buf.length - this.pos;
91
+ if (after === before && this.eof) {
92
+ if (before === 0)
93
+ return null;
94
+ const tail = this.buf.subarray(this.pos);
95
+ this.pos = this.buf.length;
96
+ return tail.toString("latin1");
97
+ }
98
+ }
99
+ }
100
+ /**
101
+ * Reads exactly n bytes, or returns null if EOF occurs before n bytes are available.
102
+ */
103
+ async readBytes(n) {
104
+ if (n < 0)
105
+ throw new Error("readBytes(n): n must be >= 0");
106
+ if (n === 0)
107
+ return Buffer.alloc(0);
108
+ await this.fill(n);
109
+ const avail = this.buf.length - this.pos;
110
+ if (avail < n)
111
+ return null;
112
+ const out = this.buf.subarray(this.pos, this.pos + n);
113
+ this.pos += n;
114
+ return out;
115
+ }
116
+ /**
117
+ * Consume exactly one EOL after the 'stream' keyword if present.
118
+ */
119
+ async consumeStreamEol() {
120
+ await this.fill(1);
121
+ const avail = this.buf.length - this.pos;
122
+ if (avail <= 0)
123
+ return;
124
+ const b0 = this.buf[this.pos];
125
+ if (b0 === 0x0d) {
126
+ await this.fill(2);
127
+ const avail2 = this.buf.length - this.pos;
128
+ if (avail2 >= 2 && this.buf[this.pos + 1] === 0x0a)
129
+ this.pos += 2; // \r\n
130
+ else
131
+ this.pos += 1; // \r
132
+ }
133
+ else if (b0 === 0x0a) {
134
+ this.pos += 1; // \n
135
+ }
136
+ }
137
+ }
138
+ function isEndOfStreamError(e) {
139
+ if (!e || typeof e !== "object")
140
+ return false;
141
+ const anyE = e;
142
+ const name = typeof anyE.name === "string" ? anyE.name : "";
143
+ const message = typeof anyE.message === "string" ? anyE.message : "";
144
+ return name === "EndOfStreamError" || message.includes("End-Of-Stream");
145
+ }
package/lib/index.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ import type { Detector } from 'file-type';
2
+ export declare const detectPdf: Detector;
package/lib/index.js ADDED
@@ -0,0 +1,348 @@
1
+ import sax from "sax";
2
+ import { PdfTokenizerReader } from "./PdfTokenizerReader.js";
3
+ const OBJ_REGEX = /^\s*(\d+)\s+(\d+)\s+obj\b/;
4
+ const PDF_TYPE = Object.freeze({ ext: "pdf", mime: "application/pdf" });
5
+ const AI_TYPE = Object.freeze({ ext: "ai", mime: "application/illustrator" });
6
+ const encoder = new TextEncoder();
7
+ const utf8Decoder = new TextDecoder("utf-8");
8
+ function indexOfBytes(hay, needle) {
9
+ if (needle.length === 0)
10
+ return 0;
11
+ outer: for (let i = 0; i <= hay.length - needle.length; i++) {
12
+ for (let j = 0; j < needle.length; j++) {
13
+ if (hay[i + j] !== needle[j])
14
+ continue outer;
15
+ }
16
+ return i;
17
+ }
18
+ return -1;
19
+ }
20
+ async function peekPdfHeader(tokenizer) {
21
+ const buf = new Uint8Array(1024);
22
+ let n = 0;
23
+ try {
24
+ const opts = { length: buf.length, mayBeLess: true };
25
+ n = await tokenizer.peekBuffer(buf, opts);
26
+ }
27
+ catch {
28
+ return { isPdf: false, headerOffset: -1 };
29
+ }
30
+ if (!n)
31
+ return { isPdf: false, headerOffset: -1 };
32
+ const hay = buf.subarray(0, n);
33
+ const idx = indexOfBytes(hay, encoder.encode("%PDF-"));
34
+ if (idx === -1)
35
+ return { isPdf: false, headerOffset: -1 };
36
+ return { isPdf: true, headerOffset: idx };
37
+ }
38
+ async function skipBytes(tokenizer, n) {
39
+ if (n <= 0)
40
+ return;
41
+ const tmp = new Uint8Array(Math.min(64 * 1024, n));
42
+ let left = n;
43
+ while (left > 0) {
44
+ const len = Math.min(tmp.length, left);
45
+ const opts = { length: len };
46
+ const read = await tokenizer.readBuffer(tmp, opts);
47
+ if (!read)
48
+ throw new Error("Unexpected EOF while skipping bytes");
49
+ left -= read;
50
+ }
51
+ }
52
+ function parseDictFromRaw(raw) {
53
+ const dictRegex = /\/(\w+)(?:\s+([^/>\n\r]+))?/g;
54
+ const info = {};
55
+ let match = dictRegex.exec(raw);
56
+ while (match !== null) {
57
+ const key = match[1];
58
+ const value = match[2] ? match[2].trim() : true;
59
+ info[key] = value;
60
+ match = dictRegex.exec(raw);
61
+ }
62
+ return info;
63
+ }
64
+ function normalizeFilters(filterValue) {
65
+ if (!filterValue || filterValue === true)
66
+ return [];
67
+ const names = String(filterValue).match(/FlateDecode|ASCII85Decode|LZWDecode|RunLengthDecode/g);
68
+ return names ? [...new Set(names)] : [];
69
+ }
70
+ async function inflateFlateDecode(data) {
71
+ try {
72
+ return await inflateWithFormat("deflate", data);
73
+ }
74
+ catch {
75
+ return await inflateWithFormat("deflate-raw", data);
76
+ }
77
+ }
78
+ async function inflateWithFormat(format, data) {
79
+ // Normalize input so TS sees an ArrayBuffer-backed Uint8Array (not ArrayBufferLike/SharedArrayBuffer).
80
+ const normalized = new Uint8Array(data.byteLength);
81
+ normalized.set(data);
82
+ const ds = new DecompressionStream(format);
83
+ // Use the most permissive stream element type and cast pipeThrough to avoid DOM lib generic friction.
84
+ const input = new ReadableStream({
85
+ start(controller) {
86
+ controller.enqueue(normalized); // Uint8Array is a valid chunk at runtime
87
+ controller.close();
88
+ },
89
+ });
90
+ const out = input.pipeThrough(ds);
91
+ const ab = await new Response(out).arrayBuffer();
92
+ return new Uint8Array(ab);
93
+ }
94
+ async function decodeStreamBytes(objectInfo, rawBytes) {
95
+ const filters = normalizeFilters(objectInfo.Filter);
96
+ if (!filters.length)
97
+ return rawBytes;
98
+ let out = rawBytes;
99
+ for (const f of filters) {
100
+ if (f === "FlateDecode") {
101
+ out = await inflateFlateDecode(out);
102
+ }
103
+ else {
104
+ // Unsupported filters, return raw stream
105
+ return rawBytes;
106
+ }
107
+ }
108
+ return out;
109
+ }
110
+ async function readDictionaryBlock(reader, firstLine) {
111
+ let raw = firstLine;
112
+ while (!raw.includes(">>")) {
113
+ const next = await reader.readLine();
114
+ if (next === null)
115
+ break;
116
+ raw += `\n${next}`;
117
+ }
118
+ const start = raw.indexOf("<<");
119
+ const end = raw.indexOf(">>", start + 2);
120
+ if (start === -1 || end === -1)
121
+ return { dictText: null, streamInline: false };
122
+ const dictText = raw.slice(start + 2, end).trim();
123
+ const after = raw.slice(end + 2).trim();
124
+ const streamInline = after === "stream" || after.startsWith("stream ");
125
+ return { dictText, streamInline };
126
+ }
127
+ class XmlHandler {
128
+ constructor(opts = {}) {
129
+ this.readingCreatorTool = false;
130
+ this.onCreatorTool = opts.onCreatorTool;
131
+ this.saxParser = sax.parser(true, { xmlns: true });
132
+ this.saxParser.onerror = (e) => {
133
+ if (e.message.startsWith("Invalid character entity")) {
134
+ this.saxParser.error = null;
135
+ this.saxParser.resume();
136
+ return;
137
+ }
138
+ throw e;
139
+ };
140
+ this.saxParser.onopentag = (node) => {
141
+ const tag = node;
142
+ const isCreatorTool = tag.uri === "http://ns.adobe.com/xap/1.0/" && tag.local === "CreatorTool";
143
+ // Fallback by name, in case xmlns typing/runtime differs
144
+ const nameMatch = typeof tag.name === "string" &&
145
+ (tag.name === "xap:CreatorTool" ||
146
+ tag.name.endsWith(":CreatorTool") ||
147
+ tag.name === "CreatorTool");
148
+ this.readingCreatorTool = isCreatorTool || nameMatch;
149
+ };
150
+ this.saxParser.ontext = (text) => {
151
+ if (!this.readingCreatorTool)
152
+ return;
153
+ this.onCreatorTool?.(text);
154
+ this.readingCreatorTool = false;
155
+ };
156
+ this.saxParser.onclosetag = () => {
157
+ this.readingCreatorTool = false;
158
+ };
159
+ }
160
+ write(text) {
161
+ this.saxParser.write(text);
162
+ }
163
+ close() {
164
+ this.saxParser.close();
165
+ }
166
+ }
167
+ function createIllustratorProbe() {
168
+ return {
169
+ name: "adobe-illustrator",
170
+ onDict: (_ctx, dictText, dict) => {
171
+ if (dict.Illustrator === true)
172
+ return AI_TYPE;
173
+ if (dictText.includes("/Illustrator"))
174
+ return AI_TYPE;
175
+ const creator = dict.Creator;
176
+ const producer = dict.Producer;
177
+ if (creator && creator !== true && String(creator).includes("Illustrator"))
178
+ return AI_TYPE;
179
+ if (producer && producer !== true && String(producer).includes("Illustrator"))
180
+ return AI_TYPE;
181
+ if (dictText.includes("Adobe Illustrator"))
182
+ return AI_TYPE;
183
+ return undefined;
184
+ },
185
+ onCreatorTool: (_ctx, creatorTool) => {
186
+ if (creatorTool.toLowerCase().includes("illustrator"))
187
+ return AI_TYPE;
188
+ return undefined;
189
+ },
190
+ onStreamText: (_ctx, streamText) => {
191
+ if (streamText.includes("Adobe Illustrator"))
192
+ return AI_TYPE;
193
+ return undefined;
194
+ },
195
+ };
196
+ }
197
+ const subtypeProbes = [createIllustratorProbe()];
198
+ /**
199
+ * File-type detector plugin:
200
+ * - returns undefined if NOT a PDF (and does not advance tokenizer.position in that case)
201
+ * - returns PDF_TYPE for PDF
202
+ * - returns subtype result when a probe matches (e.g. AI_TYPE)
203
+ */
204
+ async function _detectPdf(tokenizer, opts = {}) {
205
+ const debug = !!opts.debug;
206
+ const maxScanLines = opts.maxScanLines ?? 50000;
207
+ const log = (...args) => {
208
+ if (debug)
209
+ console.log(...args);
210
+ };
211
+ const ctx = { debug, log };
212
+ // NOT PDF => PEEK ONLY, do not advance
213
+ const { isPdf, headerOffset } = await peekPdfHeader(tokenizer);
214
+ if (!isPdf)
215
+ return undefined;
216
+ // Confirmed PDF => ok to advance
217
+ log(`[PDF] Detected %PDF- header at +${headerOffset} (abs=${tokenizer.position + headerOffset})`);
218
+ if (headerOffset > 0)
219
+ await skipBytes(tokenizer, headerOffset);
220
+ const reader = new PdfTokenizerReader(tokenizer, { debug });
221
+ // pushback so we don't lose a line when probing for "stream"
222
+ let pendingLine = null;
223
+ const readLine = async () => {
224
+ if (pendingLine !== null) {
225
+ const l = pendingLine;
226
+ pendingLine = null;
227
+ return l;
228
+ }
229
+ return await reader.readLine();
230
+ };
231
+ const creatorToolListeners = subtypeProbes
232
+ .map(p => p.onCreatorTool)
233
+ .filter((fn) => typeof fn === "function");
234
+ log("[ROOT] Start parsing (PDF)");
235
+ let state = 0; // ROOT=0, OBJ=10
236
+ let scannedLines = 0;
237
+ while (scannedLines++ < maxScanLines) {
238
+ const line = await readLine();
239
+ if (line === null)
240
+ break;
241
+ if (state === 0) {
242
+ const m = OBJ_REGEX.exec(line);
243
+ if (m) {
244
+ log(`Found object: ${m[1]} Generation: ${m[2]}`);
245
+ state = 10;
246
+ }
247
+ continue;
248
+ }
249
+ if (state === 10) {
250
+ if (line.trim() === "endobj") {
251
+ log("[OBJ] => [ROOT]");
252
+ state = 0;
253
+ continue;
254
+ }
255
+ if (!line.includes("<<"))
256
+ continue;
257
+ const { dictText, streamInline } = await readDictionaryBlock(reader, line);
258
+ if (!dictText)
259
+ continue;
260
+ log(`[OBJ] Dictionary content: ${dictText.replace(/\s+/g, " ")}`);
261
+ log(streamInline ? "[OBJ] Stream keyword detected: stream" : "[OBJ] No stream keyword present on this line.");
262
+ const objectInfo = parseDictFromRaw(dictText);
263
+ // Dict probes
264
+ for (const probe of subtypeProbes) {
265
+ if (!probe.onDict)
266
+ continue;
267
+ const hit = probe.onDict(ctx, dictText, objectInfo);
268
+ if (hit)
269
+ return hit;
270
+ }
271
+ // Stream check with pushback
272
+ let hasStream = streamInline;
273
+ if (!hasStream) {
274
+ const nextLine = await readLine();
275
+ if (nextLine === null)
276
+ break;
277
+ if (nextLine.trim() === "stream") {
278
+ hasStream = true;
279
+ }
280
+ else {
281
+ pendingLine = nextLine;
282
+ }
283
+ }
284
+ if (!hasStream)
285
+ continue;
286
+ // Length may be indirect like "12 0 R", skip if not numeric
287
+ const lenVal = objectInfo.Length;
288
+ if (!lenVal || lenVal === true)
289
+ continue;
290
+ const streamLength = parseInt(lenVal, 10);
291
+ if (!Number.isFinite(streamLength) || streamLength < 0)
292
+ continue;
293
+ log(`[OBJ] => [STREAM] Start read stream of ${streamLength} bytes`);
294
+ await reader.consumeStreamEol();
295
+ const rawBytes = await reader.readBytes(streamLength);
296
+ if (!rawBytes)
297
+ break;
298
+ const decodedBytes = await decodeStreamBytes(objectInfo, rawBytes);
299
+ const streamText = utf8Decoder.decode(decodedBytes);
300
+ // Stream probes
301
+ for (const probe of subtypeProbes) {
302
+ if (!probe.onStreamText)
303
+ continue;
304
+ const hit = probe.onStreamText(ctx, streamText, objectInfo);
305
+ if (hit)
306
+ return hit;
307
+ }
308
+ // XMP CreatorTool
309
+ const looksLikeXmp = objectInfo.Type === "Metadata" ||
310
+ objectInfo.Type === "/Metadata" ||
311
+ objectInfo.Subtype === "XML" ||
312
+ objectInfo.Subtype === "/XML" ||
313
+ objectInfo.XML === true;
314
+ if (looksLikeXmp && creatorToolListeners.length) {
315
+ log("[STREAM] XML metadata detected, feeding SAX");
316
+ const xml = new XmlHandler({
317
+ onCreatorTool: (v) => {
318
+ log(`CreatorTool=${v}`);
319
+ for (const fn of creatorToolListeners) {
320
+ const hit = fn(ctx, v);
321
+ if (hit)
322
+ throw hit;
323
+ }
324
+ },
325
+ });
326
+ try {
327
+ xml.write(streamText);
328
+ xml.close();
329
+ }
330
+ catch (e) {
331
+ if (e && typeof e === "object" && "ext" in e && "mime" in e) {
332
+ return e;
333
+ }
334
+ throw e;
335
+ }
336
+ }
337
+ log("[STREAM] => [OBJ]");
338
+ }
339
+ }
340
+ log("[ROOT] Done parsing (PDF)");
341
+ return PDF_TYPE;
342
+ }
343
+ export const detectPdf = {
344
+ id: 'cfbf',
345
+ detect: async (tokenizer) => {
346
+ return _detectPdf(tokenizer);
347
+ }
348
+ };
package/package.json ADDED
@@ -0,0 +1,52 @@
1
+ {
2
+ "name": "@file-type/pdf",
3
+ "version": "0.1.0",
4
+ "description": "file-type plugin to parse PDF files",
5
+ "type": "module",
6
+ "exports": {
7
+ "default": "./parse.js"
8
+ },
9
+ "files": [
10
+ "lib/**/*.js",
11
+ "lib/**/*.d.ts"
12
+ ],
13
+ "scripts": {
14
+ "clean": "del-cli lib/**/*.js lib/**/*.js.map lib/**/*.d.ts src/**/*.d.ts",
15
+ "compile": "tsc -p lib",
16
+ "build": "npm run clean && npm run compile",
17
+ "lint:ts": "biome check",
18
+ "test": "mocha",
19
+ "prepublishOnly": "npm run build",
20
+ "update-biome": "npm add -D --exact @biomejs/biome && npx @biomejs/biome migrate --write"
21
+ },
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "git+https://github.com/Borewit/file-type-pdf.git"
25
+ },
26
+ "keywords": [
27
+ "file-type",
28
+ "PDF",
29
+ "XMP"
30
+ ],
31
+ "author": "Borewit",
32
+ "license": "MIT",
33
+ "bugs": {
34
+ "url": "https://github.com/Borewit/file-type-pdf/issues"
35
+ },
36
+ "homepage": "https://github.com/Borewit/file-type-pdf#readme",
37
+ "dependencies": {
38
+ "read-next-line": "^0.5.0",
39
+ "sax": "^1.4.1"
40
+ },
41
+ "devDependencies": {
42
+ "@biomejs/biome": "^2.3.10",
43
+ "@types/node": "^25.0.3",
44
+ "@types/sax": "^1.2.7",
45
+ "chai": "^6.2.2",
46
+ "del-cli": "^7.0.0",
47
+ "file-type": "^21.2.0",
48
+ "mocha": "^11.7.5",
49
+ "strtok3": "^10.3.4",
50
+ "typescript": "^5.9.3"
51
+ }
52
+ }