@likecoin/epubcheck-ts 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,7 +1,420 @@
1
1
  import { XmlDocument } from 'libxml2-wasm';
2
+ import { parse, walk } from 'css-tree';
2
3
  import { unzipSync, strFromU8, gunzipSync } from 'fflate';
3
4
 
4
5
  // src/content/validator.ts
6
+ var BLESSED_FONT_TYPES = /* @__PURE__ */ new Set([
7
+ "application/font-woff",
8
+ "application/font-woff2",
9
+ "font/woff",
10
+ "font/woff2",
11
+ "font/otf",
12
+ "font/ttf",
13
+ "application/vnd.ms-opentype",
14
+ "application/font-sfnt",
15
+ "application/x-font-ttf",
16
+ "application/x-font-opentype",
17
+ "application/x-font-truetype"
18
+ ]);
19
+ var FONT_EXTENSION_TO_TYPE = {
20
+ ".woff": "font/woff",
21
+ ".woff2": "font/woff2",
22
+ ".otf": "font/otf",
23
+ ".ttf": "font/ttf"
24
+ };
25
+ var CSSValidator = class {
26
+ /**
27
+ * Validate CSS content and extract references
28
+ */
29
+ validate(context, css, resourcePath) {
30
+ const result = {
31
+ references: [],
32
+ fontFamilies: []
33
+ };
34
+ let ast;
35
+ try {
36
+ ast = parse(css, {
37
+ positions: true,
38
+ onParseError: (error) => {
39
+ const err = error;
40
+ const location = {
41
+ path: resourcePath
42
+ };
43
+ if (err.line !== void 0) location.line = err.line;
44
+ if (err.column !== void 0) location.column = err.column;
45
+ context.messages.push({
46
+ id: "CSS-008",
47
+ severity: "error",
48
+ message: `CSS parse error: ${error.formattedMessage}`,
49
+ location
50
+ });
51
+ }
52
+ });
53
+ } catch (error) {
54
+ context.messages.push({
55
+ id: "CSS-008",
56
+ severity: "error",
57
+ message: `CSS parse error: ${error instanceof Error ? error.message : "Unknown error"}`,
58
+ location: { path: resourcePath }
59
+ });
60
+ return result;
61
+ }
62
+ this.checkDiscouragedProperties(context, ast, resourcePath);
63
+ this.checkAtRules(context, ast, resourcePath, result);
64
+ this.checkMediaOverlayClasses(context, ast, resourcePath);
65
+ return result;
66
+ }
67
+ /**
68
+ * Check for forbidden and discouraged CSS properties in EPUB
69
+ */
70
+ checkDiscouragedProperties(context, ast, resourcePath) {
71
+ walk(ast, (node) => {
72
+ if (node.type === "Declaration") {
73
+ this.checkForbiddenProperties(context, node, resourcePath);
74
+ this.checkPositionProperty(context, node, resourcePath);
75
+ }
76
+ });
77
+ }
78
+ /**
79
+ * Check for forbidden CSS properties (direction, unicode-bidi)
80
+ * These properties must not be used in EPUB content per EPUB spec
81
+ */
82
+ checkForbiddenProperties(context, node, resourcePath) {
83
+ const property = node.property.toLowerCase();
84
+ const forbiddenProperties = ["direction", "unicode-bidi"];
85
+ if (!forbiddenProperties.includes(property)) return;
86
+ const loc = node.loc;
87
+ const start = loc?.start;
88
+ const location = { path: resourcePath };
89
+ if (start) {
90
+ location.line = start.line;
91
+ location.column = start.column;
92
+ }
93
+ context.messages.push({
94
+ id: "CSS-001",
95
+ severity: "error",
96
+ message: `CSS property "${property}" must not be included in an EPUB Style Sheet`,
97
+ location
98
+ });
99
+ }
100
+ /**
101
+ * Check position property for discouraged values
102
+ */
103
+ checkPositionProperty(context, node, resourcePath) {
104
+ const property = node.property.toLowerCase();
105
+ if (property !== "position") return;
106
+ const value = this.getDeclarationValue(node);
107
+ const loc = node.loc;
108
+ const start = loc?.start;
109
+ const location = { path: resourcePath };
110
+ if (start) {
111
+ location.line = start.line;
112
+ location.column = start.column;
113
+ }
114
+ if (value === "fixed") {
115
+ context.messages.push({
116
+ id: "CSS-006",
117
+ severity: "warning",
118
+ message: 'CSS property "position: fixed" is discouraged in EPUB',
119
+ location
120
+ });
121
+ }
122
+ if (value === "absolute") {
123
+ context.messages.push({
124
+ id: "CSS-019",
125
+ severity: "warning",
126
+ message: 'CSS property "position: absolute" should be used with caution in EPUB',
127
+ location
128
+ });
129
+ }
130
+ }
131
+ /**
132
+ * Extract the value from a Declaration node
133
+ */
134
+ getDeclarationValue(node) {
135
+ const value = node.value;
136
+ if (value.type === "Value") {
137
+ const first = value.children.first;
138
+ if (first?.type === "Identifier") {
139
+ return first.name.toLowerCase();
140
+ }
141
+ }
142
+ return "";
143
+ }
144
+ /**
145
+ * Check at-rules (@import, @font-face)
146
+ */
147
+ checkAtRules(context, ast, resourcePath, result) {
148
+ walk(ast, (node) => {
149
+ if (node.type === "Atrule") {
150
+ const atRule = node;
151
+ const ruleName = atRule.name.toLowerCase();
152
+ if (ruleName === "import") {
153
+ this.checkImport(context, atRule, resourcePath, result);
154
+ } else if (ruleName === "font-face") {
155
+ this.checkFontFace(context, atRule, resourcePath, result);
156
+ }
157
+ }
158
+ });
159
+ }
160
+ /**
161
+ * Check @import at-rule
162
+ */
163
+ checkImport(context, atRule, resourcePath, result) {
164
+ const loc = atRule.loc;
165
+ const start = loc?.start;
166
+ const location = { path: resourcePath };
167
+ if (start) {
168
+ location.line = start.line;
169
+ location.column = start.column;
170
+ }
171
+ if (!atRule.prelude) {
172
+ context.messages.push({
173
+ id: "CSS-002",
174
+ severity: "error",
175
+ message: "Empty @import rule",
176
+ location
177
+ });
178
+ return;
179
+ }
180
+ let importUrl = "";
181
+ walk(atRule.prelude, (node) => {
182
+ if (importUrl) return;
183
+ if (node.type === "Url") {
184
+ importUrl = this.extractUrlValue(node);
185
+ } else if (node.type === "String") {
186
+ importUrl = node.value;
187
+ }
188
+ });
189
+ if (!importUrl || importUrl.trim() === "") {
190
+ context.messages.push({
191
+ id: "CSS-002",
192
+ severity: "error",
193
+ message: "Empty or NULL reference found in @import",
194
+ location
195
+ });
196
+ return;
197
+ }
198
+ result.references.push({
199
+ url: importUrl,
200
+ type: "import",
201
+ line: start?.line,
202
+ column: start?.column
203
+ });
204
+ }
205
+ /**
206
+ * Check @font-face at-rule
207
+ */
208
+ checkFontFace(context, atRule, resourcePath, result) {
209
+ const loc = atRule.loc;
210
+ const start = loc?.start;
211
+ const location = { path: resourcePath };
212
+ if (start) {
213
+ location.line = start.line;
214
+ location.column = start.column;
215
+ }
216
+ if (context.options.includeUsage) {
217
+ context.messages.push({
218
+ id: "CSS-028",
219
+ severity: "usage",
220
+ message: "Use of @font-face declaration",
221
+ location
222
+ });
223
+ }
224
+ if (!atRule.block || atRule.block.children.isEmpty) {
225
+ context.messages.push({
226
+ id: "CSS-019",
227
+ severity: "warning",
228
+ message: "@font-face declaration has no attributes",
229
+ location
230
+ });
231
+ return;
232
+ }
233
+ const state = { hasSrc: false, fontFamily: null };
234
+ walk(atRule.block, (node) => {
235
+ if (node.type === "Declaration") {
236
+ const propName = node.property.toLowerCase();
237
+ if (propName === "font-family") {
238
+ state.fontFamily = this.extractFontFamily(node);
239
+ } else if (propName === "src") {
240
+ state.hasSrc = true;
241
+ this.checkFontFaceSrc(context, node, resourcePath, result);
242
+ }
243
+ }
244
+ });
245
+ if (state.fontFamily) {
246
+ result.fontFamilies.push(state.fontFamily);
247
+ }
248
+ if (!state.hasSrc) {
249
+ context.messages.push({
250
+ id: "CSS-019",
251
+ severity: "warning",
252
+ message: "@font-face declaration is missing src property",
253
+ location
254
+ });
255
+ }
256
+ }
257
+ /**
258
+ * Check src property in @font-face
259
+ */
260
+ checkFontFaceSrc(context, decl, resourcePath, result) {
261
+ const loc = decl.loc;
262
+ const start = loc?.start;
263
+ const location = { path: resourcePath };
264
+ if (start) {
265
+ location.line = start.line;
266
+ location.column = start.column;
267
+ }
268
+ walk(decl.value, (node) => {
269
+ if (node.type === "Url") {
270
+ const urlNode = node;
271
+ const urlValue = this.extractUrlValue(urlNode);
272
+ if (!urlValue || urlValue.trim() === "") {
273
+ context.messages.push({
274
+ id: "CSS-002",
275
+ severity: "error",
276
+ message: "Empty or NULL reference found in @font-face src",
277
+ location
278
+ });
279
+ return;
280
+ }
281
+ if (urlValue.startsWith("data:") || urlValue.startsWith("#")) {
282
+ return;
283
+ }
284
+ result.references.push({
285
+ url: urlValue,
286
+ type: "font",
287
+ line: start?.line,
288
+ column: start?.column
289
+ });
290
+ this.checkFontType(context, urlValue, resourcePath, location);
291
+ }
292
+ });
293
+ }
294
+ /**
295
+ * Check if font type is a blessed EPUB font type
296
+ */
297
+ checkFontType(context, fontUrl, resourcePath, location) {
298
+ const urlPath = fontUrl.split("?")[0] ?? fontUrl;
299
+ const extMatch = /\.[a-zA-Z0-9]+$/.exec(urlPath);
300
+ if (!extMatch) return;
301
+ const ext = extMatch[0].toLowerCase();
302
+ const mimeType = FONT_EXTENSION_TO_TYPE[ext];
303
+ if (mimeType && !BLESSED_FONT_TYPES.has(mimeType)) {
304
+ context.messages.push({
305
+ id: "CSS-007",
306
+ severity: "error",
307
+ message: `Font-face reference "${fontUrl}" refers to non-standard font type "${mimeType}"`,
308
+ location
309
+ });
310
+ }
311
+ const packageDoc = context.packageDocument;
312
+ if (packageDoc) {
313
+ const cssDir = resourcePath.includes("/") ? resourcePath.substring(0, resourcePath.lastIndexOf("/")) : "";
314
+ const resolvedPath = this.resolvePath(cssDir, fontUrl);
315
+ const manifestItem = packageDoc.manifest.find((item) => item.href === resolvedPath);
316
+ if (manifestItem && !BLESSED_FONT_TYPES.has(manifestItem.mediaType)) {
317
+ context.messages.push({
318
+ id: "CSS-007",
319
+ severity: "error",
320
+ message: `Font-face reference "${fontUrl}" has non-standard media type "${manifestItem.mediaType}" in manifest`,
321
+ location
322
+ });
323
+ }
324
+ }
325
+ }
326
+ /**
327
+ * Extract URL value from Url node
328
+ */
329
+ extractUrlValue(urlNode) {
330
+ const value = urlNode.value;
331
+ if (typeof value === "string") {
332
+ return value;
333
+ }
334
+ return "";
335
+ }
336
+ /**
337
+ * Extract font-family value from declaration
338
+ */
339
+ extractFontFamily(decl) {
340
+ const value = decl.value;
341
+ if (value.type === "Value") {
342
+ const first = value.children.first;
343
+ if (first?.type === "String") {
344
+ return first.value;
345
+ }
346
+ if (first?.type === "Identifier") {
347
+ return first.name;
348
+ }
349
+ }
350
+ return null;
351
+ }
352
+ /**
353
+ * Resolve a relative path from a base path
354
+ */
355
+ resolvePath(basePath, relativePath) {
356
+ if (relativePath.startsWith("/")) {
357
+ return relativePath.substring(1);
358
+ }
359
+ const baseSegments = basePath.split("/").filter(Boolean);
360
+ const relativeSegments = relativePath.split("/");
361
+ const resultSegments = [...baseSegments];
362
+ for (const segment of relativeSegments) {
363
+ if (segment === "..") {
364
+ resultSegments.pop();
365
+ } else if (segment !== "." && segment !== "") {
366
+ resultSegments.push(segment);
367
+ }
368
+ }
369
+ return resultSegments.join("/");
370
+ }
371
+ /**
372
+ * Check for reserved media overlay class names
373
+ */
374
+ checkMediaOverlayClasses(context, ast, resourcePath) {
375
+ const reservedClassNames = /* @__PURE__ */ new Set([
376
+ "-epub-media-overlay-active",
377
+ "media-overlay-active",
378
+ "-epub-media-overlay-playing",
379
+ "media-overlay-playing"
380
+ ]);
381
+ walk(ast, (node) => {
382
+ if (node.type === "ClassSelector") {
383
+ const className = node.name.toLowerCase();
384
+ if (reservedClassNames.has(className)) {
385
+ const loc = node.loc;
386
+ const start = loc?.start;
387
+ const location = { path: resourcePath };
388
+ if (start) {
389
+ location.line = start.line;
390
+ location.column = start.column;
391
+ }
392
+ context.messages.push({
393
+ id: "CSS-029",
394
+ severity: "error",
395
+ message: `Class name "${className}" is reserved for media overlays`,
396
+ location
397
+ });
398
+ }
399
+ if (className.startsWith("-epub-media-overlay-")) {
400
+ const loc = node.loc;
401
+ const start = loc?.start;
402
+ const location = { path: resourcePath };
403
+ if (start) {
404
+ location.line = start.line;
405
+ location.column = start.column;
406
+ }
407
+ context.messages.push({
408
+ id: "CSS-030",
409
+ severity: "warning",
410
+ message: `Class names starting with "-epub-media-overlay-" are reserved for future use`,
411
+ location
412
+ });
413
+ }
414
+ }
415
+ });
416
+ }
417
+ };
5
418
 
6
419
  // src/references/types.ts
7
420
  function isPublicationResourceReference(type) {
@@ -43,6 +456,8 @@ var ContentValidator = class {
43
456
  return;
44
457
  }
45
458
  const cssContent = new TextDecoder().decode(cssData);
459
+ const cssValidator = new CSSValidator();
460
+ cssValidator.validate(context, cssContent, path);
46
461
  this.extractCSSImports(path, cssContent, opfDir, refValidator);
47
462
  }
48
463
  validateXHTMLDocument(context, path, itemId, opfDir, registry, refValidator) {
@@ -177,6 +592,7 @@ var ContentValidator = class {
177
592
  this.extractAndRegisterHyperlinks(path, root, opfDir, refValidator);
178
593
  this.extractAndRegisterStylesheets(path, root, opfDir, refValidator);
179
594
  this.extractAndRegisterImages(path, root, opfDir, refValidator);
595
+ this.extractAndRegisterCiteAttributes(path, root, opfDir, refValidator);
180
596
  }
181
597
  } finally {
182
598
  doc.dispose();
@@ -273,10 +689,18 @@ var ContentValidator = class {
273
689
  }
274
690
  }
275
691
  detectScripts(_context, _path, root) {
276
- const htmlScript = root.get(".//html:script", { html: "http://www.w3.org/1999/xhtml" });
277
- if (htmlScript) return true;
278
- const svgScript = root.get(".//svg:script", { svg: "http://www.w3.org/2000/svg" });
279
- if (svgScript) return true;
692
+ const htmlScripts = root.find(".//html:script", { html: "http://www.w3.org/1999/xhtml" });
693
+ for (const script of htmlScripts) {
694
+ if (this.isScriptType(this.getAttribute(script, "type"))) {
695
+ return true;
696
+ }
697
+ }
698
+ const svgScripts = root.find(".//svg:script", { svg: "http://www.w3.org/2000/svg" });
699
+ for (const script of svgScripts) {
700
+ if (this.isScriptType(this.getAttribute(script, "type"))) {
701
+ return true;
702
+ }
703
+ }
280
704
  const form = root.get(".//html:form", { html: "http://www.w3.org/1999/xhtml" });
281
705
  if (form) return true;
282
706
  const elementsWithEvents = root.find(
@@ -285,6 +709,35 @@ var ContentValidator = class {
285
709
  if (elementsWithEvents.length > 0) return true;
286
710
  return false;
287
711
  }
712
+ /**
713
+ * Check if the script type is a JavaScript type that requires "scripted" property.
714
+ * Per EPUB spec and Java EPUBCheck, only JavaScript types require it.
715
+ * Data block types like application/ld+json, application/json do NOT require it.
716
+ */
717
+ isScriptType(type) {
718
+ if (!type || type.trim() === "") return true;
719
+ const jsTypes = /* @__PURE__ */ new Set([
720
+ "application/javascript",
721
+ "text/javascript",
722
+ "application/ecmascript",
723
+ "application/x-ecmascript",
724
+ "application/x-javascript",
725
+ "text/ecmascript",
726
+ "text/javascript1.0",
727
+ "text/javascript1.1",
728
+ "text/javascript1.2",
729
+ "text/javascript1.3",
730
+ "text/javascript1.4",
731
+ "text/javascript1.5",
732
+ "text/jscript",
733
+ "text/livescript",
734
+ "text/x-ecmascript",
735
+ "text/x-javascript",
736
+ "module"
737
+ // ES modules
738
+ ]);
739
+ return jsTypes.has(type.toLowerCase());
740
+ }
288
741
  detectMathML(_context, _path, root) {
289
742
  const mathMLElements = root.find(".//math:*", { math: "http://www.w3.org/1998/Math/MathML" });
290
743
  return mathMLElements.length > 0;
@@ -859,6 +1312,53 @@ var ContentValidator = class {
859
1312
  });
860
1313
  }
861
1314
  }
1315
+ /**
1316
+ * Extract cite attribute references from blockquote, q, ins, del elements
1317
+ * These need to be validated as RSC-007 if the referenced resource is missing
1318
+ */
1319
+ extractAndRegisterCiteAttributes(path, root, opfDir, refValidator) {
1320
+ const docDir = path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "";
1321
+ const citeElements = [
1322
+ ...root.find(".//html:blockquote[@cite]", { html: "http://www.w3.org/1999/xhtml" }),
1323
+ ...root.find(".//html:q[@cite]", { html: "http://www.w3.org/1999/xhtml" }),
1324
+ ...root.find(".//html:ins[@cite]", { html: "http://www.w3.org/1999/xhtml" }),
1325
+ ...root.find(".//html:del[@cite]", { html: "http://www.w3.org/1999/xhtml" })
1326
+ ];
1327
+ for (const elem of citeElements) {
1328
+ const cite = this.getAttribute(elem, "cite");
1329
+ if (!cite) continue;
1330
+ const line = elem.line;
1331
+ if (cite.startsWith("http://") || cite.startsWith("https://")) {
1332
+ continue;
1333
+ }
1334
+ if (cite.startsWith("#")) {
1335
+ const targetResource2 = path;
1336
+ const fragment2 = cite.slice(1);
1337
+ refValidator.addReference({
1338
+ url: cite,
1339
+ targetResource: targetResource2,
1340
+ fragment: fragment2,
1341
+ type: "hyperlink" /* HYPERLINK */,
1342
+ location: { path, line }
1343
+ });
1344
+ continue;
1345
+ }
1346
+ const resolvedPath = this.resolveRelativePath(docDir, cite, opfDir);
1347
+ const hashIndex = resolvedPath.indexOf("#");
1348
+ const targetResource = hashIndex >= 0 ? resolvedPath.slice(0, hashIndex) : resolvedPath;
1349
+ const fragment = hashIndex >= 0 ? resolvedPath.slice(hashIndex + 1) : void 0;
1350
+ const ref = {
1351
+ url: cite,
1352
+ targetResource,
1353
+ type: "hyperlink" /* HYPERLINK */,
1354
+ location: { path, line }
1355
+ };
1356
+ if (fragment) {
1357
+ ref.fragment = fragment;
1358
+ }
1359
+ refValidator.addReference(ref);
1360
+ }
1361
+ }
862
1362
  resolveRelativePath(docDir, href, _opfDir) {
863
1363
  const hrefWithoutFragment = href.split("#")[0] ?? href;
864
1364
  const fragment = href.includes("#") ? href.split("#")[1] : "";
@@ -1177,6 +1677,93 @@ var ZipReader = class _ZipReader {
1177
1677
  const prefix = dirPath.endsWith("/") ? dirPath : `${dirPath}/`;
1178
1678
  return this._paths.filter((p) => p.startsWith(prefix));
1179
1679
  }
1680
+ /**
1681
+ * Check for filenames that are not valid UTF-8 by parsing raw ZIP data
1682
+ *
1683
+ * ZIP files store filenames as bytes. The EPUB spec requires filenames to be UTF-8.
1684
+ * This method parses the ZIP central directory to find filenames with invalid UTF-8.
1685
+ *
1686
+ * @returns Array of filenames with invalid UTF-8 encoding
1687
+ */
1688
+ getInvalidUtf8Filenames() {
1689
+ const invalid = [];
1690
+ const data = this._rawData;
1691
+ let eocdOffset = -1;
1692
+ for (let i = data.length - 22; i >= 0; i--) {
1693
+ if (data[i] === 80 && data[i + 1] === 75 && data[i + 2] === 5 && data[i + 3] === 6) {
1694
+ eocdOffset = i;
1695
+ break;
1696
+ }
1697
+ }
1698
+ if (eocdOffset === -1) {
1699
+ return invalid;
1700
+ }
1701
+ const cdOffset = (data[eocdOffset + 16] ?? 0) | (data[eocdOffset + 17] ?? 0) << 8 | (data[eocdOffset + 18] ?? 0) << 16 | (data[eocdOffset + 19] ?? 0) << 24;
1702
+ let offset = cdOffset;
1703
+ while (offset < eocdOffset) {
1704
+ if (data[offset] !== 80 || data[offset + 1] !== 75 || data[offset + 2] !== 1 || data[offset + 3] !== 2) {
1705
+ break;
1706
+ }
1707
+ const filenameLength = (data[offset + 28] ?? 0) | (data[offset + 29] ?? 0) << 8;
1708
+ const extraLength = (data[offset + 30] ?? 0) | (data[offset + 31] ?? 0) << 8;
1709
+ const commentLength = (data[offset + 32] ?? 0) | (data[offset + 33] ?? 0) << 8;
1710
+ const filenameBytes = data.slice(offset + 46, offset + 46 + filenameLength);
1711
+ const utf8Error = this.validateUtf8(filenameBytes);
1712
+ if (utf8Error) {
1713
+ const filename = strFromU8(filenameBytes);
1714
+ invalid.push({ filename, reason: utf8Error });
1715
+ }
1716
+ offset += 46 + filenameLength + extraLength + commentLength;
1717
+ }
1718
+ return invalid;
1719
+ }
1720
+ /**
1721
+ * Validate that bytes form a valid UTF-8 sequence
1722
+ *
1723
+ * @returns Error description if invalid, undefined if valid
1724
+ */
1725
+ validateUtf8(bytes) {
1726
+ let i = 0;
1727
+ while (i < bytes.length) {
1728
+ const byte = bytes[i] ?? 0;
1729
+ if (byte <= 127) {
1730
+ i++;
1731
+ } else if ((byte & 224) === 192) {
1732
+ if (byte < 194) {
1733
+ return `Overlong encoding at byte ${String(i)}`;
1734
+ }
1735
+ if (i + 1 >= bytes.length || ((bytes[i + 1] ?? 0) & 192) !== 128) {
1736
+ return `Invalid continuation byte at position ${String(i + 1)}`;
1737
+ }
1738
+ i += 2;
1739
+ } else if ((byte & 240) === 224) {
1740
+ if (i + 2 >= bytes.length || ((bytes[i + 1] ?? 0) & 192) !== 128 || ((bytes[i + 2] ?? 0) & 192) !== 128) {
1741
+ return `Invalid continuation byte in 3-byte sequence at position ${String(i)}`;
1742
+ }
1743
+ if (byte === 224 && (bytes[i + 1] ?? 0) < 160) {
1744
+ return `Overlong 3-byte encoding at byte ${String(i)}`;
1745
+ }
1746
+ if (byte === 237 && (bytes[i + 1] ?? 0) >= 160) {
1747
+ return `UTF-16 surrogate at byte ${String(i)}`;
1748
+ }
1749
+ i += 3;
1750
+ } else if ((byte & 248) === 240) {
1751
+ if (i + 3 >= bytes.length || ((bytes[i + 1] ?? 0) & 192) !== 128 || ((bytes[i + 2] ?? 0) & 192) !== 128 || ((bytes[i + 3] ?? 0) & 192) !== 128) {
1752
+ return `Invalid continuation byte in 4-byte sequence at position ${String(i)}`;
1753
+ }
1754
+ if (byte === 240 && (bytes[i + 1] ?? 0) < 144) {
1755
+ return `Overlong 4-byte encoding at byte ${String(i)}`;
1756
+ }
1757
+ if (byte > 244 || byte === 244 && (bytes[i + 1] ?? 0) > 143) {
1758
+ return `Code point exceeds U+10FFFF at byte ${String(i)}`;
1759
+ }
1760
+ i += 4;
1761
+ } else {
1762
+ return `Invalid UTF-8 start byte 0x${byte.toString(16).toUpperCase()} at position ${String(i)}`;
1763
+ }
1764
+ }
1765
+ return void 0;
1766
+ }
1180
1767
  };
1181
1768
 
1182
1769
  // src/ocf/validator.ts
@@ -1207,6 +1794,8 @@ var OCFValidator = class {
1207
1794
  this.validateContainer(zip, context);
1208
1795
  this.validateMetaInf(zip, context.messages);
1209
1796
  this.validateFilenames(zip, context.messages);
1797
+ this.validateDuplicateFilenames(zip, context.messages);
1798
+ this.validateUtf8Filenames(zip, context.messages);
1210
1799
  this.validateEmptyDirectories(zip, context.messages);
1211
1800
  }
1212
1801
  /**
@@ -1273,20 +1862,11 @@ var OCFValidator = class {
1273
1862
  });
1274
1863
  return;
1275
1864
  }
1276
- const trimmed = content.trim();
1277
- if (trimmed !== EPUB_MIMETYPE) {
1865
+ if (content !== EPUB_MIMETYPE) {
1278
1866
  messages.push({
1279
1867
  id: "PKG-007",
1280
1868
  severity: "error",
1281
- message: `Mimetype file must contain "${EPUB_MIMETYPE}", found "${trimmed}"`,
1282
- location: { path: "mimetype" }
1283
- });
1284
- }
1285
- if (content !== EPUB_MIMETYPE) {
1286
- messages.push({
1287
- id: "PKG-008",
1288
- severity: "warning",
1289
- message: "Mimetype file should not contain leading/trailing whitespace or newlines",
1869
+ message: `Mimetype file must contain exactly "${EPUB_MIMETYPE}"`,
1290
1870
  location: { path: "mimetype" }
1291
1871
  });
1292
1872
  }
@@ -1298,9 +1878,9 @@ var OCFValidator = class {
1298
1878
  const containerPath = "META-INF/container.xml";
1299
1879
  if (!zip.has(containerPath)) {
1300
1880
  context.messages.push({
1301
- id: "PKG-003",
1881
+ id: "RSC-002",
1302
1882
  severity: "fatal",
1303
- message: "Missing META-INF/container.xml",
1883
+ message: "Required file META-INF/container.xml was not found",
1304
1884
  location: { path: containerPath }
1305
1885
  });
1306
1886
  return;
@@ -1308,7 +1888,7 @@ var OCFValidator = class {
1308
1888
  const content = zip.readText(containerPath);
1309
1889
  if (!content) {
1310
1890
  context.messages.push({
1311
- id: "PKG-003",
1891
+ id: "RSC-002",
1312
1892
  severity: "fatal",
1313
1893
  message: "Could not read META-INF/container.xml",
1314
1894
  location: { path: containerPath }
@@ -1382,8 +1962,15 @@ var OCFValidator = class {
1382
1962
  }
1383
1963
  /**
1384
1964
  * Validate filenames for invalid characters
1965
+ *
1966
+ * Per EPUB 3.3 spec and Java EPUBCheck:
1967
+ * - PKG-009: Disallowed characters (ASCII special chars, control chars, private use, etc.)
1968
+ * - PKG-010: Whitespace characters (warning)
1969
+ * - PKG-011: Filename ends with period
1970
+ * - PKG-012: Non-ASCII characters (usage info)
1385
1971
  */
1386
1972
  validateFilenames(zip, messages) {
1973
+ const DISALLOWED_ASCII = /* @__PURE__ */ new Set([34, 42, 58, 60, 62, 63, 92, 124]);
1387
1974
  for (const path of zip.paths) {
1388
1975
  if (path === "mimetype") continue;
1389
1976
  if (path.endsWith("/")) continue;
@@ -1397,30 +1984,164 @@ var OCFValidator = class {
1397
1984
  });
1398
1985
  continue;
1399
1986
  }
1987
+ const disallowed = [];
1988
+ let hasSpaces = false;
1400
1989
  for (let i = 0; i < filename.length; i++) {
1401
1990
  const code = filename.charCodeAt(i);
1402
- if (code < 32 || code === 127 || code >= 128 && code <= 159) {
1403
- messages.push({
1404
- id: "PKG-010",
1405
- severity: "error",
1406
- message: `Filename contains control character: "${path}"`,
1407
- location: { path }
1408
- });
1409
- break;
1991
+ if (DISALLOWED_ASCII.has(code)) {
1992
+ const char = filename[i] ?? "";
1993
+ disallowed.push(`U+${code.toString(16).toUpperCase().padStart(4, "0")} (${char})`);
1994
+ } else if (code <= 31 || code === 127 || code >= 128 && code <= 159) {
1995
+ disallowed.push(`U+${code.toString(16).toUpperCase().padStart(4, "0")} (CONTROL)`);
1996
+ } else if (code >= 57344 && code <= 63743) {
1997
+ disallowed.push(`U+${code.toString(16).toUpperCase().padStart(4, "0")} (PRIVATE USE)`);
1998
+ } else if (code >= 65520 && code <= 65535) {
1999
+ disallowed.push(`U+${code.toString(16).toUpperCase().padStart(4, "0")} (SPECIALS)`);
1410
2000
  }
1411
- }
1412
- const specialChars = '<>:"|?*';
1413
- for (const char of specialChars) {
1414
- if (filename.includes(char)) {
1415
- messages.push({
1416
- id: "PKG-011",
1417
- severity: "error",
1418
- message: `Filename contains special character: "${path}"`,
1419
- location: { path }
1420
- });
1421
- break;
2001
+ if (code === 32 || code === 9 || code === 10 || code === 13) {
2002
+ hasSpaces = true;
1422
2003
  }
1423
2004
  }
2005
+ if (filename.endsWith(".")) {
2006
+ messages.push({
2007
+ id: "PKG-011",
2008
+ severity: "error",
2009
+ message: `Filename must not end with a period: "${path}"`,
2010
+ location: { path }
2011
+ });
2012
+ }
2013
+ if (disallowed.length > 0) {
2014
+ messages.push({
2015
+ id: "PKG-009",
2016
+ severity: "error",
2017
+ message: `Filename "${path}" contains disallowed characters: ${disallowed.join(", ")}`,
2018
+ location: { path }
2019
+ });
2020
+ }
2021
+ if (hasSpaces) {
2022
+ messages.push({
2023
+ id: "PKG-010",
2024
+ severity: "warning",
2025
+ message: `Filename "${path}" contains spaces`,
2026
+ location: { path }
2027
+ });
2028
+ }
2029
+ }
2030
+ }
2031
+ /**
2032
+ * Check for duplicate filenames after Unicode normalization and case folding
2033
+ *
2034
+ * Per EPUB spec, filenames must be unique after applying:
2035
+ * - Unicode Canonical Case Fold Normalization (NFD + case folding)
2036
+ *
2037
+ * OPF-060: Duplicate filename after normalization
2038
+ */
2039
+ validateDuplicateFilenames(zip, messages) {
2040
+ const seenPaths = /* @__PURE__ */ new Set();
2041
+ const normalizedPaths = /* @__PURE__ */ new Map();
2042
+ for (const path of zip.paths) {
2043
+ if (path.endsWith("/")) continue;
2044
+ if (seenPaths.has(path)) {
2045
+ messages.push({
2046
+ id: "OPF-060",
2047
+ severity: "error",
2048
+ message: `Duplicate ZIP entry: "${path}"`,
2049
+ location: { path }
2050
+ });
2051
+ continue;
2052
+ }
2053
+ seenPaths.add(path);
2054
+ const normalized = this.canonicalCaseFold(path);
2055
+ const existing = normalizedPaths.get(normalized);
2056
+ if (existing !== void 0) {
2057
+ messages.push({
2058
+ id: "OPF-060",
2059
+ severity: "error",
2060
+ message: `Duplicate filename after Unicode normalization: "${path}" conflicts with "${existing}"`,
2061
+ location: { path }
2062
+ });
2063
+ } else {
2064
+ normalizedPaths.set(normalized, path);
2065
+ }
2066
+ }
2067
+ }
2068
+ /**
2069
+ * Apply Unicode Canonical Case Fold Normalization
2070
+ *
2071
+ * This applies:
2072
+ * 1. NFD (Canonical Decomposition) - decomposes combined characters
2073
+ * 2. Full Unicode case folding
2074
+ *
2075
+ * Based on Unicode case folding rules for filename comparison.
2076
+ */
2077
+ canonicalCaseFold(str) {
2078
+ let result = str.normalize("NFD");
2079
+ result = this.unicodeCaseFold(result);
2080
+ return result;
2081
+ }
2082
+ /**
2083
+ * Perform Unicode full case folding
2084
+ *
2085
+ * Handles special Unicode case folding rules beyond simple toLowerCase:
2086
+ * - ß (U+00DF) -> ss
2087
+ * - ẞ (U+1E9E) -> ss (capital sharp s)
2088
+ * - fi (U+FB01) -> fi
2089
+ * - fl (U+FB02) -> fl
2090
+ * - ff (U+FB00) -> ff
2091
+ * - ffi (U+FB03) -> ffi
2092
+ * - ffl (U+FB04) -> ffl
2093
+ * - ſt (U+FB05) -> st
2094
+ * - st (U+FB06) -> st
2095
+ * And other Unicode case folding rules
2096
+ */
2097
+ unicodeCaseFold(str) {
2098
+ const caseFoldMap = {
2099
+ "\xDF": "ss",
2100
+ // ß -> ss
2101
+ "\u1E9E": "ss",
2102
+ // ẞ -> ss (capital sharp s)
2103
+ "\uFB00": "ff",
2104
+ // ff -> ff
2105
+ "\uFB01": "fi",
2106
+ // fi -> fi
2107
+ "\uFB02": "fl",
2108
+ // fl -> fl
2109
+ "\uFB03": "ffi",
2110
+ // ffi -> ffi
2111
+ "\uFB04": "ffl",
2112
+ // ffl -> ffl
2113
+ "\uFB05": "st",
2114
+ // ſt -> st
2115
+ "\uFB06": "st",
2116
+ // st -> st
2117
+ "\u0130": "i\u0307"
2118
+ // İ -> i + combining dot above
2119
+ };
2120
+ let result = "";
2121
+ for (const char of str) {
2122
+ const folded = caseFoldMap[char];
2123
+ if (folded !== void 0) {
2124
+ result += folded;
2125
+ } else {
2126
+ result += char.toLowerCase();
2127
+ }
2128
+ }
2129
+ return result;
2130
+ }
2131
+ /**
2132
+ * Validate that filenames are encoded as UTF-8
2133
+ *
2134
+ * PKG-027: Filenames in EPUB ZIP archives must be UTF-8 encoded
2135
+ */
2136
+ validateUtf8Filenames(zip, messages) {
2137
+ const invalidFilenames = zip.getInvalidUtf8Filenames();
2138
+ for (const { filename, reason } of invalidFilenames) {
2139
+ messages.push({
2140
+ id: "PKG-027",
2141
+ severity: "fatal",
2142
+ message: `Filename is not valid UTF-8: "${filename}" (${reason})`,
2143
+ location: { path: filename }
2144
+ });
1424
2145
  }
1425
2146
  }
1426
2147
  /**
@@ -2115,9 +2836,9 @@ var OPFValidator = class {
2115
2836
  const fullPath = resolvePath(opfPath, item.href);
2116
2837
  if (!context.files.has(fullPath) && !item.href.startsWith("http")) {
2117
2838
  context.messages.push({
2118
- id: "OPF-010",
2839
+ id: "RSC-001",
2119
2840
  severity: "error",
2120
- message: `Manifest item "${item.id}" references missing file: ${item.href}`,
2841
+ message: `Referenced resource "${item.href}" could not be found in the EPUB`,
2121
2842
  location: { path: opfPath }
2122
2843
  });
2123
2844
  }
@@ -2204,31 +2925,6 @@ var OPFValidator = class {
2204
2925
  });
2205
2926
  }
2206
2927
  }
2207
- this.checkUndeclaredResources(context, opfPath);
2208
- }
2209
- /**
2210
- * Check for files in container that are not declared in manifest
2211
- */
2212
- checkUndeclaredResources(context, opfPath) {
2213
- if (!this.packageDoc) return;
2214
- const declaredPaths = /* @__PURE__ */ new Set();
2215
- for (const item of this.packageDoc.manifest) {
2216
- const fullPath = resolvePath(opfPath, item.href);
2217
- declaredPaths.add(fullPath);
2218
- }
2219
- declaredPaths.add(opfPath);
2220
- for (const filePath of context.files.keys()) {
2221
- if (filePath.endsWith("/")) continue;
2222
- if (filePath.startsWith("META-INF/")) continue;
2223
- if (filePath === "mimetype") continue;
2224
- if (declaredPaths.has(filePath)) continue;
2225
- context.messages.push({
2226
- id: "RSC-008",
2227
- severity: "error",
2228
- message: `File in container is not declared in manifest: ${filePath}`,
2229
- location: { path: filePath }
2230
- });
2231
- }
2232
2928
  }
2233
2929
  /**
2234
2930
  * Validate spine section
@@ -2253,32 +2949,30 @@ var OPFValidator = class {
2253
2949
  location: { path: opfPath }
2254
2950
  });
2255
2951
  }
2256
- if (this.packageDoc.version === "2.0") {
2257
- const ncxId = this.packageDoc.spineToc;
2258
- if (!ncxId) {
2952
+ const ncxId = this.packageDoc.spineToc;
2953
+ if (this.packageDoc.version === "2.0" && !ncxId) {
2954
+ context.messages.push({
2955
+ id: "OPF-050",
2956
+ severity: "warning",
2957
+ message: "EPUB 2 spine should have a toc attribute referencing the NCX",
2958
+ location: { path: opfPath }
2959
+ });
2960
+ } else if (ncxId) {
2961
+ const ncxItem = this.manifestById.get(ncxId);
2962
+ if (!ncxItem) {
2963
+ context.messages.push({
2964
+ id: "OPF-049",
2965
+ severity: "error",
2966
+ message: `Spine toc attribute references non-existent item: "${ncxId}"`,
2967
+ location: { path: opfPath }
2968
+ });
2969
+ } else if (ncxItem.mediaType !== "application/x-dtbncx+xml") {
2259
2970
  context.messages.push({
2260
2971
  id: "OPF-050",
2261
- severity: "warning",
2262
- message: "EPUB 2 spine should have a toc attribute referencing the NCX",
2972
+ severity: "error",
2973
+ message: `Spine toc attribute must reference an NCX document (media-type "application/x-dtbncx+xml"), found: "${ncxItem.mediaType}"`,
2263
2974
  location: { path: opfPath }
2264
2975
  });
2265
- } else {
2266
- const ncxItem = this.manifestById.get(ncxId);
2267
- if (!ncxItem) {
2268
- context.messages.push({
2269
- id: "OPF-049",
2270
- severity: "error",
2271
- message: `Spine toc attribute references non-existent item: "${ncxId}"`,
2272
- location: { path: opfPath }
2273
- });
2274
- } else if (ncxItem.mediaType !== "application/x-dtbncx+xml") {
2275
- context.messages.push({
2276
- id: "OPF-050",
2277
- severity: "error",
2278
- message: `NCX item must have media-type "application/x-dtbncx+xml", found: "${ncxItem.mediaType}"`,
2279
- location: { path: opfPath }
2280
- });
2281
- }
2282
2976
  }
2283
2977
  }
2284
2978
  const seenIdrefs = /* @__PURE__ */ new Set();
@@ -2293,7 +2987,7 @@ var OPFValidator = class {
2293
2987
  });
2294
2988
  continue;
2295
2989
  }
2296
- if (this.packageDoc.version === "2.0" && seenIdrefs.has(itemref.idref)) {
2990
+ if (seenIdrefs.has(itemref.idref)) {
2297
2991
  context.messages.push({
2298
2992
  id: "OPF-034",
2299
2993
  severity: "error",
@@ -2785,7 +3479,18 @@ var ReferenceValidator = class {
2785
3479
  }
2786
3480
  if (!this.registry.hasResource(resourcePath)) {
2787
3481
  const fileExistsInContainer = context.files.has(resourcePath);
2788
- if (!fileExistsInContainer) {
3482
+ if (fileExistsInContainer) {
3483
+ if (!context.referencedUndeclaredResources?.has(resourcePath)) {
3484
+ context.messages.push({
3485
+ id: "RSC-008",
3486
+ severity: "error",
3487
+ message: `Referenced resource "${resourcePath}" is not declared in the OPF manifest`,
3488
+ location: reference.location
3489
+ });
3490
+ context.referencedUndeclaredResources ??= /* @__PURE__ */ new Set();
3491
+ context.referencedUndeclaredResources.add(resourcePath);
3492
+ }
3493
+ } else {
2789
3494
  const isLinkRef = reference.type === "link" /* LINK */;
2790
3495
  context.messages.push({
2791
3496
  id: isLinkRef ? "RSC-007w" : "RSC-007",