@likecoin/epubcheck-ts 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1,9 +1,422 @@
1
1
  'use strict';
2
2
 
3
3
  var libxml2Wasm = require('libxml2-wasm');
4
+ var cssTree = require('css-tree');
4
5
  var fflate = require('fflate');
5
6
 
6
7
  // src/content/validator.ts
8
+ var BLESSED_FONT_TYPES = /* @__PURE__ */ new Set([
9
+ "application/font-woff",
10
+ "application/font-woff2",
11
+ "font/woff",
12
+ "font/woff2",
13
+ "font/otf",
14
+ "font/ttf",
15
+ "application/vnd.ms-opentype",
16
+ "application/font-sfnt",
17
+ "application/x-font-ttf",
18
+ "application/x-font-opentype",
19
+ "application/x-font-truetype"
20
+ ]);
21
+ var FONT_EXTENSION_TO_TYPE = {
22
+ ".woff": "font/woff",
23
+ ".woff2": "font/woff2",
24
+ ".otf": "font/otf",
25
+ ".ttf": "font/ttf"
26
+ };
27
+ var CSSValidator = class {
28
+ /**
29
+ * Validate CSS content and extract references
30
+ */
31
+ validate(context, css, resourcePath) {
32
+ const result = {
33
+ references: [],
34
+ fontFamilies: []
35
+ };
36
+ let ast;
37
+ try {
38
+ ast = cssTree.parse(css, {
39
+ positions: true,
40
+ onParseError: (error) => {
41
+ const err = error;
42
+ const location = {
43
+ path: resourcePath
44
+ };
45
+ if (err.line !== void 0) location.line = err.line;
46
+ if (err.column !== void 0) location.column = err.column;
47
+ context.messages.push({
48
+ id: "CSS-008",
49
+ severity: "error",
50
+ message: `CSS parse error: ${error.formattedMessage}`,
51
+ location
52
+ });
53
+ }
54
+ });
55
+ } catch (error) {
56
+ context.messages.push({
57
+ id: "CSS-008",
58
+ severity: "error",
59
+ message: `CSS parse error: ${error instanceof Error ? error.message : "Unknown error"}`,
60
+ location: { path: resourcePath }
61
+ });
62
+ return result;
63
+ }
64
+ this.checkDiscouragedProperties(context, ast, resourcePath);
65
+ this.checkAtRules(context, ast, resourcePath, result);
66
+ this.checkMediaOverlayClasses(context, ast, resourcePath);
67
+ return result;
68
+ }
69
+ /**
70
+ * Check for forbidden and discouraged CSS properties in EPUB
71
+ */
72
+ checkDiscouragedProperties(context, ast, resourcePath) {
73
+ cssTree.walk(ast, (node) => {
74
+ if (node.type === "Declaration") {
75
+ this.checkForbiddenProperties(context, node, resourcePath);
76
+ this.checkPositionProperty(context, node, resourcePath);
77
+ }
78
+ });
79
+ }
80
+ /**
81
+ * Check for forbidden CSS properties (direction, unicode-bidi)
82
+ * These properties must not be used in EPUB content per EPUB spec
83
+ */
84
+ checkForbiddenProperties(context, node, resourcePath) {
85
+ const property = node.property.toLowerCase();
86
+ const forbiddenProperties = ["direction", "unicode-bidi"];
87
+ if (!forbiddenProperties.includes(property)) return;
88
+ const loc = node.loc;
89
+ const start = loc?.start;
90
+ const location = { path: resourcePath };
91
+ if (start) {
92
+ location.line = start.line;
93
+ location.column = start.column;
94
+ }
95
+ context.messages.push({
96
+ id: "CSS-001",
97
+ severity: "error",
98
+ message: `CSS property "${property}" must not be included in an EPUB Style Sheet`,
99
+ location
100
+ });
101
+ }
102
+ /**
103
+ * Check position property for discouraged values
104
+ */
105
+ checkPositionProperty(context, node, resourcePath) {
106
+ const property = node.property.toLowerCase();
107
+ if (property !== "position") return;
108
+ const value = this.getDeclarationValue(node);
109
+ const loc = node.loc;
110
+ const start = loc?.start;
111
+ const location = { path: resourcePath };
112
+ if (start) {
113
+ location.line = start.line;
114
+ location.column = start.column;
115
+ }
116
+ if (value === "fixed") {
117
+ context.messages.push({
118
+ id: "CSS-006",
119
+ severity: "warning",
120
+ message: 'CSS property "position: fixed" is discouraged in EPUB',
121
+ location
122
+ });
123
+ }
124
+ if (value === "absolute") {
125
+ context.messages.push({
126
+ id: "CSS-019",
127
+ severity: "warning",
128
+ message: 'CSS property "position: absolute" should be used with caution in EPUB',
129
+ location
130
+ });
131
+ }
132
+ }
133
+ /**
134
+ * Extract the value from a Declaration node
135
+ */
136
+ getDeclarationValue(node) {
137
+ const value = node.value;
138
+ if (value.type === "Value") {
139
+ const first = value.children.first;
140
+ if (first?.type === "Identifier") {
141
+ return first.name.toLowerCase();
142
+ }
143
+ }
144
+ return "";
145
+ }
146
+ /**
147
+ * Check at-rules (@import, @font-face)
148
+ */
149
+ checkAtRules(context, ast, resourcePath, result) {
150
+ cssTree.walk(ast, (node) => {
151
+ if (node.type === "Atrule") {
152
+ const atRule = node;
153
+ const ruleName = atRule.name.toLowerCase();
154
+ if (ruleName === "import") {
155
+ this.checkImport(context, atRule, resourcePath, result);
156
+ } else if (ruleName === "font-face") {
157
+ this.checkFontFace(context, atRule, resourcePath, result);
158
+ }
159
+ }
160
+ });
161
+ }
162
+ /**
163
+ * Check @import at-rule
164
+ */
165
+ checkImport(context, atRule, resourcePath, result) {
166
+ const loc = atRule.loc;
167
+ const start = loc?.start;
168
+ const location = { path: resourcePath };
169
+ if (start) {
170
+ location.line = start.line;
171
+ location.column = start.column;
172
+ }
173
+ if (!atRule.prelude) {
174
+ context.messages.push({
175
+ id: "CSS-002",
176
+ severity: "error",
177
+ message: "Empty @import rule",
178
+ location
179
+ });
180
+ return;
181
+ }
182
+ let importUrl = "";
183
+ cssTree.walk(atRule.prelude, (node) => {
184
+ if (importUrl) return;
185
+ if (node.type === "Url") {
186
+ importUrl = this.extractUrlValue(node);
187
+ } else if (node.type === "String") {
188
+ importUrl = node.value;
189
+ }
190
+ });
191
+ if (!importUrl || importUrl.trim() === "") {
192
+ context.messages.push({
193
+ id: "CSS-002",
194
+ severity: "error",
195
+ message: "Empty or NULL reference found in @import",
196
+ location
197
+ });
198
+ return;
199
+ }
200
+ result.references.push({
201
+ url: importUrl,
202
+ type: "import",
203
+ line: start?.line,
204
+ column: start?.column
205
+ });
206
+ }
207
+ /**
208
+ * Check @font-face at-rule
209
+ */
210
+ checkFontFace(context, atRule, resourcePath, result) {
211
+ const loc = atRule.loc;
212
+ const start = loc?.start;
213
+ const location = { path: resourcePath };
214
+ if (start) {
215
+ location.line = start.line;
216
+ location.column = start.column;
217
+ }
218
+ if (context.options.includeUsage) {
219
+ context.messages.push({
220
+ id: "CSS-028",
221
+ severity: "usage",
222
+ message: "Use of @font-face declaration",
223
+ location
224
+ });
225
+ }
226
+ if (!atRule.block || atRule.block.children.isEmpty) {
227
+ context.messages.push({
228
+ id: "CSS-019",
229
+ severity: "warning",
230
+ message: "@font-face declaration has no attributes",
231
+ location
232
+ });
233
+ return;
234
+ }
235
+ const state = { hasSrc: false, fontFamily: null };
236
+ cssTree.walk(atRule.block, (node) => {
237
+ if (node.type === "Declaration") {
238
+ const propName = node.property.toLowerCase();
239
+ if (propName === "font-family") {
240
+ state.fontFamily = this.extractFontFamily(node);
241
+ } else if (propName === "src") {
242
+ state.hasSrc = true;
243
+ this.checkFontFaceSrc(context, node, resourcePath, result);
244
+ }
245
+ }
246
+ });
247
+ if (state.fontFamily) {
248
+ result.fontFamilies.push(state.fontFamily);
249
+ }
250
+ if (!state.hasSrc) {
251
+ context.messages.push({
252
+ id: "CSS-019",
253
+ severity: "warning",
254
+ message: "@font-face declaration is missing src property",
255
+ location
256
+ });
257
+ }
258
+ }
259
+ /**
260
+ * Check src property in @font-face
261
+ */
262
+ checkFontFaceSrc(context, decl, resourcePath, result) {
263
+ const loc = decl.loc;
264
+ const start = loc?.start;
265
+ const location = { path: resourcePath };
266
+ if (start) {
267
+ location.line = start.line;
268
+ location.column = start.column;
269
+ }
270
+ cssTree.walk(decl.value, (node) => {
271
+ if (node.type === "Url") {
272
+ const urlNode = node;
273
+ const urlValue = this.extractUrlValue(urlNode);
274
+ if (!urlValue || urlValue.trim() === "") {
275
+ context.messages.push({
276
+ id: "CSS-002",
277
+ severity: "error",
278
+ message: "Empty or NULL reference found in @font-face src",
279
+ location
280
+ });
281
+ return;
282
+ }
283
+ if (urlValue.startsWith("data:") || urlValue.startsWith("#")) {
284
+ return;
285
+ }
286
+ result.references.push({
287
+ url: urlValue,
288
+ type: "font",
289
+ line: start?.line,
290
+ column: start?.column
291
+ });
292
+ this.checkFontType(context, urlValue, resourcePath, location);
293
+ }
294
+ });
295
+ }
296
+ /**
297
+ * Check if font type is a blessed EPUB font type
298
+ */
299
+ checkFontType(context, fontUrl, resourcePath, location) {
300
+ const urlPath = fontUrl.split("?")[0] ?? fontUrl;
301
+ const extMatch = /\.[a-zA-Z0-9]+$/.exec(urlPath);
302
+ if (!extMatch) return;
303
+ const ext = extMatch[0].toLowerCase();
304
+ const mimeType = FONT_EXTENSION_TO_TYPE[ext];
305
+ if (mimeType && !BLESSED_FONT_TYPES.has(mimeType)) {
306
+ context.messages.push({
307
+ id: "CSS-007",
308
+ severity: "error",
309
+ message: `Font-face reference "${fontUrl}" refers to non-standard font type "${mimeType}"`,
310
+ location
311
+ });
312
+ }
313
+ const packageDoc = context.packageDocument;
314
+ if (packageDoc) {
315
+ const cssDir = resourcePath.includes("/") ? resourcePath.substring(0, resourcePath.lastIndexOf("/")) : "";
316
+ const resolvedPath = this.resolvePath(cssDir, fontUrl);
317
+ const manifestItem = packageDoc.manifest.find((item) => item.href === resolvedPath);
318
+ if (manifestItem && !BLESSED_FONT_TYPES.has(manifestItem.mediaType)) {
319
+ context.messages.push({
320
+ id: "CSS-007",
321
+ severity: "error",
322
+ message: `Font-face reference "${fontUrl}" has non-standard media type "${manifestItem.mediaType}" in manifest`,
323
+ location
324
+ });
325
+ }
326
+ }
327
+ }
328
+ /**
329
+ * Extract URL value from Url node
330
+ */
331
+ extractUrlValue(urlNode) {
332
+ const value = urlNode.value;
333
+ if (typeof value === "string") {
334
+ return value;
335
+ }
336
+ return "";
337
+ }
338
+ /**
339
+ * Extract font-family value from declaration
340
+ */
341
+ extractFontFamily(decl) {
342
+ const value = decl.value;
343
+ if (value.type === "Value") {
344
+ const first = value.children.first;
345
+ if (first?.type === "String") {
346
+ return first.value;
347
+ }
348
+ if (first?.type === "Identifier") {
349
+ return first.name;
350
+ }
351
+ }
352
+ return null;
353
+ }
354
+ /**
355
+ * Resolve a relative path from a base path
356
+ */
357
+ resolvePath(basePath, relativePath) {
358
+ if (relativePath.startsWith("/")) {
359
+ return relativePath.substring(1);
360
+ }
361
+ const baseSegments = basePath.split("/").filter(Boolean);
362
+ const relativeSegments = relativePath.split("/");
363
+ const resultSegments = [...baseSegments];
364
+ for (const segment of relativeSegments) {
365
+ if (segment === "..") {
366
+ resultSegments.pop();
367
+ } else if (segment !== "." && segment !== "") {
368
+ resultSegments.push(segment);
369
+ }
370
+ }
371
+ return resultSegments.join("/");
372
+ }
373
+ /**
374
+ * Check for reserved media overlay class names
375
+ */
376
+ checkMediaOverlayClasses(context, ast, resourcePath) {
377
+ const reservedClassNames = /* @__PURE__ */ new Set([
378
+ "-epub-media-overlay-active",
379
+ "media-overlay-active",
380
+ "-epub-media-overlay-playing",
381
+ "media-overlay-playing"
382
+ ]);
383
+ cssTree.walk(ast, (node) => {
384
+ if (node.type === "ClassSelector") {
385
+ const className = node.name.toLowerCase();
386
+ if (reservedClassNames.has(className)) {
387
+ const loc = node.loc;
388
+ const start = loc?.start;
389
+ const location = { path: resourcePath };
390
+ if (start) {
391
+ location.line = start.line;
392
+ location.column = start.column;
393
+ }
394
+ context.messages.push({
395
+ id: "CSS-029",
396
+ severity: "error",
397
+ message: `Class name "${className}" is reserved for media overlays`,
398
+ location
399
+ });
400
+ }
401
+ if (className.startsWith("-epub-media-overlay-")) {
402
+ const loc = node.loc;
403
+ const start = loc?.start;
404
+ const location = { path: resourcePath };
405
+ if (start) {
406
+ location.line = start.line;
407
+ location.column = start.column;
408
+ }
409
+ context.messages.push({
410
+ id: "CSS-030",
411
+ severity: "warning",
412
+ message: `Class names starting with "-epub-media-overlay-" are reserved for future use`,
413
+ location
414
+ });
415
+ }
416
+ }
417
+ });
418
+ }
419
+ };
7
420
 
8
421
  // src/references/types.ts
9
422
  function isPublicationResourceReference(type) {
@@ -45,6 +458,8 @@ var ContentValidator = class {
45
458
  return;
46
459
  }
47
460
  const cssContent = new TextDecoder().decode(cssData);
461
+ const cssValidator = new CSSValidator();
462
+ cssValidator.validate(context, cssContent, path);
48
463
  this.extractCSSImports(path, cssContent, opfDir, refValidator);
49
464
  }
50
465
  validateXHTMLDocument(context, path, itemId, opfDir, registry, refValidator) {
@@ -179,6 +594,7 @@ var ContentValidator = class {
179
594
  this.extractAndRegisterHyperlinks(path, root, opfDir, refValidator);
180
595
  this.extractAndRegisterStylesheets(path, root, opfDir, refValidator);
181
596
  this.extractAndRegisterImages(path, root, opfDir, refValidator);
597
+ this.extractAndRegisterCiteAttributes(path, root, opfDir, refValidator);
182
598
  }
183
599
  } finally {
184
600
  doc.dispose();
@@ -275,10 +691,18 @@ var ContentValidator = class {
275
691
  }
276
692
  }
277
693
  detectScripts(_context, _path, root) {
278
- const htmlScript = root.get(".//html:script", { html: "http://www.w3.org/1999/xhtml" });
279
- if (htmlScript) return true;
280
- const svgScript = root.get(".//svg:script", { svg: "http://www.w3.org/2000/svg" });
281
- if (svgScript) return true;
694
+ const htmlScripts = root.find(".//html:script", { html: "http://www.w3.org/1999/xhtml" });
695
+ for (const script of htmlScripts) {
696
+ if (this.isScriptType(this.getAttribute(script, "type"))) {
697
+ return true;
698
+ }
699
+ }
700
+ const svgScripts = root.find(".//svg:script", { svg: "http://www.w3.org/2000/svg" });
701
+ for (const script of svgScripts) {
702
+ if (this.isScriptType(this.getAttribute(script, "type"))) {
703
+ return true;
704
+ }
705
+ }
282
706
  const form = root.get(".//html:form", { html: "http://www.w3.org/1999/xhtml" });
283
707
  if (form) return true;
284
708
  const elementsWithEvents = root.find(
@@ -287,6 +711,35 @@ var ContentValidator = class {
287
711
  if (elementsWithEvents.length > 0) return true;
288
712
  return false;
289
713
  }
714
+ /**
715
+ * Check if the script type is a JavaScript type that requires "scripted" property.
716
+ * Per EPUB spec and Java EPUBCheck, only JavaScript types require it.
717
+ * Data block types like application/ld+json, application/json do NOT require it.
718
+ */
719
+ isScriptType(type) {
720
+ if (!type || type.trim() === "") return true;
721
+ const jsTypes = /* @__PURE__ */ new Set([
722
+ "application/javascript",
723
+ "text/javascript",
724
+ "application/ecmascript",
725
+ "application/x-ecmascript",
726
+ "application/x-javascript",
727
+ "text/ecmascript",
728
+ "text/javascript1.0",
729
+ "text/javascript1.1",
730
+ "text/javascript1.2",
731
+ "text/javascript1.3",
732
+ "text/javascript1.4",
733
+ "text/javascript1.5",
734
+ "text/jscript",
735
+ "text/livescript",
736
+ "text/x-ecmascript",
737
+ "text/x-javascript",
738
+ "module"
739
+ // ES modules
740
+ ]);
741
+ return jsTypes.has(type.toLowerCase());
742
+ }
290
743
  detectMathML(_context, _path, root) {
291
744
  const mathMLElements = root.find(".//math:*", { math: "http://www.w3.org/1998/Math/MathML" });
292
745
  return mathMLElements.length > 0;
@@ -861,6 +1314,53 @@ var ContentValidator = class {
861
1314
  });
862
1315
  }
863
1316
  }
1317
+ /**
1318
+ * Extract cite attribute references from blockquote, q, ins, del elements
1319
+ * These need to be validated as RSC-007 if the referenced resource is missing
1320
+ */
1321
+ extractAndRegisterCiteAttributes(path, root, opfDir, refValidator) {
1322
+ const docDir = path.includes("/") ? path.substring(0, path.lastIndexOf("/")) : "";
1323
+ const citeElements = [
1324
+ ...root.find(".//html:blockquote[@cite]", { html: "http://www.w3.org/1999/xhtml" }),
1325
+ ...root.find(".//html:q[@cite]", { html: "http://www.w3.org/1999/xhtml" }),
1326
+ ...root.find(".//html:ins[@cite]", { html: "http://www.w3.org/1999/xhtml" }),
1327
+ ...root.find(".//html:del[@cite]", { html: "http://www.w3.org/1999/xhtml" })
1328
+ ];
1329
+ for (const elem of citeElements) {
1330
+ const cite = this.getAttribute(elem, "cite");
1331
+ if (!cite) continue;
1332
+ const line = elem.line;
1333
+ if (cite.startsWith("http://") || cite.startsWith("https://")) {
1334
+ continue;
1335
+ }
1336
+ if (cite.startsWith("#")) {
1337
+ const targetResource2 = path;
1338
+ const fragment2 = cite.slice(1);
1339
+ refValidator.addReference({
1340
+ url: cite,
1341
+ targetResource: targetResource2,
1342
+ fragment: fragment2,
1343
+ type: "hyperlink" /* HYPERLINK */,
1344
+ location: { path, line }
1345
+ });
1346
+ continue;
1347
+ }
1348
+ const resolvedPath = this.resolveRelativePath(docDir, cite, opfDir);
1349
+ const hashIndex = resolvedPath.indexOf("#");
1350
+ const targetResource = hashIndex >= 0 ? resolvedPath.slice(0, hashIndex) : resolvedPath;
1351
+ const fragment = hashIndex >= 0 ? resolvedPath.slice(hashIndex + 1) : void 0;
1352
+ const ref = {
1353
+ url: cite,
1354
+ targetResource,
1355
+ type: "hyperlink" /* HYPERLINK */,
1356
+ location: { path, line }
1357
+ };
1358
+ if (fragment) {
1359
+ ref.fragment = fragment;
1360
+ }
1361
+ refValidator.addReference(ref);
1362
+ }
1363
+ }
864
1364
  resolveRelativePath(docDir, href, _opfDir) {
865
1365
  const hrefWithoutFragment = href.split("#")[0] ?? href;
866
1366
  const fragment = href.includes("#") ? href.split("#")[1] : "";
@@ -1179,6 +1679,93 @@ var ZipReader = class _ZipReader {
1179
1679
  const prefix = dirPath.endsWith("/") ? dirPath : `${dirPath}/`;
1180
1680
  return this._paths.filter((p) => p.startsWith(prefix));
1181
1681
  }
1682
+ /**
1683
+ * Check for filenames that are not valid UTF-8 by parsing raw ZIP data
1684
+ *
1685
+ * ZIP files store filenames as bytes. The EPUB spec requires filenames to be UTF-8.
1686
+ * This method parses the ZIP central directory to find filenames with invalid UTF-8.
1687
+ *
1688
+ * @returns Array of filenames with invalid UTF-8 encoding
1689
+ */
1690
+ getInvalidUtf8Filenames() {
1691
+ const invalid = [];
1692
+ const data = this._rawData;
1693
+ let eocdOffset = -1;
1694
+ for (let i = data.length - 22; i >= 0; i--) {
1695
+ if (data[i] === 80 && data[i + 1] === 75 && data[i + 2] === 5 && data[i + 3] === 6) {
1696
+ eocdOffset = i;
1697
+ break;
1698
+ }
1699
+ }
1700
+ if (eocdOffset === -1) {
1701
+ return invalid;
1702
+ }
1703
+ const cdOffset = (data[eocdOffset + 16] ?? 0) | (data[eocdOffset + 17] ?? 0) << 8 | (data[eocdOffset + 18] ?? 0) << 16 | (data[eocdOffset + 19] ?? 0) << 24;
1704
+ let offset = cdOffset;
1705
+ while (offset < eocdOffset) {
1706
+ if (data[offset] !== 80 || data[offset + 1] !== 75 || data[offset + 2] !== 1 || data[offset + 3] !== 2) {
1707
+ break;
1708
+ }
1709
+ const filenameLength = (data[offset + 28] ?? 0) | (data[offset + 29] ?? 0) << 8;
1710
+ const extraLength = (data[offset + 30] ?? 0) | (data[offset + 31] ?? 0) << 8;
1711
+ const commentLength = (data[offset + 32] ?? 0) | (data[offset + 33] ?? 0) << 8;
1712
+ const filenameBytes = data.slice(offset + 46, offset + 46 + filenameLength);
1713
+ const utf8Error = this.validateUtf8(filenameBytes);
1714
+ if (utf8Error) {
1715
+ const filename = fflate.strFromU8(filenameBytes);
1716
+ invalid.push({ filename, reason: utf8Error });
1717
+ }
1718
+ offset += 46 + filenameLength + extraLength + commentLength;
1719
+ }
1720
+ return invalid;
1721
+ }
1722
+ /**
1723
+ * Validate that bytes form a valid UTF-8 sequence
1724
+ *
1725
+ * @returns Error description if invalid, undefined if valid
1726
+ */
1727
+ validateUtf8(bytes) {
1728
+ let i = 0;
1729
+ while (i < bytes.length) {
1730
+ const byte = bytes[i] ?? 0;
1731
+ if (byte <= 127) {
1732
+ i++;
1733
+ } else if ((byte & 224) === 192) {
1734
+ if (byte < 194) {
1735
+ return `Overlong encoding at byte ${String(i)}`;
1736
+ }
1737
+ if (i + 1 >= bytes.length || ((bytes[i + 1] ?? 0) & 192) !== 128) {
1738
+ return `Invalid continuation byte at position ${String(i + 1)}`;
1739
+ }
1740
+ i += 2;
1741
+ } else if ((byte & 240) === 224) {
1742
+ if (i + 2 >= bytes.length || ((bytes[i + 1] ?? 0) & 192) !== 128 || ((bytes[i + 2] ?? 0) & 192) !== 128) {
1743
+ return `Invalid continuation byte in 3-byte sequence at position ${String(i)}`;
1744
+ }
1745
+ if (byte === 224 && (bytes[i + 1] ?? 0) < 160) {
1746
+ return `Overlong 3-byte encoding at byte ${String(i)}`;
1747
+ }
1748
+ if (byte === 237 && (bytes[i + 1] ?? 0) >= 160) {
1749
+ return `UTF-16 surrogate at byte ${String(i)}`;
1750
+ }
1751
+ i += 3;
1752
+ } else if ((byte & 248) === 240) {
1753
+ if (i + 3 >= bytes.length || ((bytes[i + 1] ?? 0) & 192) !== 128 || ((bytes[i + 2] ?? 0) & 192) !== 128 || ((bytes[i + 3] ?? 0) & 192) !== 128) {
1754
+ return `Invalid continuation byte in 4-byte sequence at position ${String(i)}`;
1755
+ }
1756
+ if (byte === 240 && (bytes[i + 1] ?? 0) < 144) {
1757
+ return `Overlong 4-byte encoding at byte ${String(i)}`;
1758
+ }
1759
+ if (byte > 244 || byte === 244 && (bytes[i + 1] ?? 0) > 143) {
1760
+ return `Code point exceeds U+10FFFF at byte ${String(i)}`;
1761
+ }
1762
+ i += 4;
1763
+ } else {
1764
+ return `Invalid UTF-8 start byte 0x${byte.toString(16).toUpperCase()} at position ${String(i)}`;
1765
+ }
1766
+ }
1767
+ return void 0;
1768
+ }
1182
1769
  };
1183
1770
 
1184
1771
  // src/ocf/validator.ts
@@ -1209,6 +1796,8 @@ var OCFValidator = class {
1209
1796
  this.validateContainer(zip, context);
1210
1797
  this.validateMetaInf(zip, context.messages);
1211
1798
  this.validateFilenames(zip, context.messages);
1799
+ this.validateDuplicateFilenames(zip, context.messages);
1800
+ this.validateUtf8Filenames(zip, context.messages);
1212
1801
  this.validateEmptyDirectories(zip, context.messages);
1213
1802
  }
1214
1803
  /**
@@ -1275,20 +1864,11 @@ var OCFValidator = class {
1275
1864
  });
1276
1865
  return;
1277
1866
  }
1278
- const trimmed = content.trim();
1279
- if (trimmed !== EPUB_MIMETYPE) {
1867
+ if (content !== EPUB_MIMETYPE) {
1280
1868
  messages.push({
1281
1869
  id: "PKG-007",
1282
1870
  severity: "error",
1283
- message: `Mimetype file must contain "${EPUB_MIMETYPE}", found "${trimmed}"`,
1284
- location: { path: "mimetype" }
1285
- });
1286
- }
1287
- if (content !== EPUB_MIMETYPE) {
1288
- messages.push({
1289
- id: "PKG-008",
1290
- severity: "warning",
1291
- message: "Mimetype file should not contain leading/trailing whitespace or newlines",
1871
+ message: `Mimetype file must contain exactly "${EPUB_MIMETYPE}"`,
1292
1872
  location: { path: "mimetype" }
1293
1873
  });
1294
1874
  }
@@ -1300,9 +1880,9 @@ var OCFValidator = class {
1300
1880
  const containerPath = "META-INF/container.xml";
1301
1881
  if (!zip.has(containerPath)) {
1302
1882
  context.messages.push({
1303
- id: "PKG-003",
1883
+ id: "RSC-002",
1304
1884
  severity: "fatal",
1305
- message: "Missing META-INF/container.xml",
1885
+ message: "Required file META-INF/container.xml was not found",
1306
1886
  location: { path: containerPath }
1307
1887
  });
1308
1888
  return;
@@ -1310,7 +1890,7 @@ var OCFValidator = class {
1310
1890
  const content = zip.readText(containerPath);
1311
1891
  if (!content) {
1312
1892
  context.messages.push({
1313
- id: "PKG-003",
1893
+ id: "RSC-002",
1314
1894
  severity: "fatal",
1315
1895
  message: "Could not read META-INF/container.xml",
1316
1896
  location: { path: containerPath }
@@ -1384,8 +1964,15 @@ var OCFValidator = class {
1384
1964
  }
1385
1965
  /**
1386
1966
  * Validate filenames for invalid characters
1967
+ *
1968
+ * Per EPUB 3.3 spec and Java EPUBCheck:
1969
+ * - PKG-009: Disallowed characters (ASCII special chars, control chars, private use, etc.)
1970
+ * - PKG-010: Whitespace characters (warning)
1971
+ * - PKG-011: Filename ends with period
1972
+ * - PKG-012: Non-ASCII characters (usage info)
1387
1973
  */
1388
1974
  validateFilenames(zip, messages) {
1975
+ const DISALLOWED_ASCII = /* @__PURE__ */ new Set([34, 42, 58, 60, 62, 63, 92, 124]);
1389
1976
  for (const path of zip.paths) {
1390
1977
  if (path === "mimetype") continue;
1391
1978
  if (path.endsWith("/")) continue;
@@ -1399,30 +1986,164 @@ var OCFValidator = class {
1399
1986
  });
1400
1987
  continue;
1401
1988
  }
1989
+ const disallowed = [];
1990
+ let hasSpaces = false;
1402
1991
  for (let i = 0; i < filename.length; i++) {
1403
1992
  const code = filename.charCodeAt(i);
1404
- if (code < 32 || code === 127 || code >= 128 && code <= 159) {
1405
- messages.push({
1406
- id: "PKG-010",
1407
- severity: "error",
1408
- message: `Filename contains control character: "${path}"`,
1409
- location: { path }
1410
- });
1411
- break;
1993
+ if (DISALLOWED_ASCII.has(code)) {
1994
+ const char = filename[i] ?? "";
1995
+ disallowed.push(`U+${code.toString(16).toUpperCase().padStart(4, "0")} (${char})`);
1996
+ } else if (code <= 31 || code === 127 || code >= 128 && code <= 159) {
1997
+ disallowed.push(`U+${code.toString(16).toUpperCase().padStart(4, "0")} (CONTROL)`);
1998
+ } else if (code >= 57344 && code <= 63743) {
1999
+ disallowed.push(`U+${code.toString(16).toUpperCase().padStart(4, "0")} (PRIVATE USE)`);
2000
+ } else if (code >= 65520 && code <= 65535) {
2001
+ disallowed.push(`U+${code.toString(16).toUpperCase().padStart(4, "0")} (SPECIALS)`);
1412
2002
  }
1413
- }
1414
- const specialChars = '<>:"|?*';
1415
- for (const char of specialChars) {
1416
- if (filename.includes(char)) {
1417
- messages.push({
1418
- id: "PKG-011",
1419
- severity: "error",
1420
- message: `Filename contains special character: "${path}"`,
1421
- location: { path }
1422
- });
1423
- break;
2003
+ if (code === 32 || code === 9 || code === 10 || code === 13) {
2004
+ hasSpaces = true;
1424
2005
  }
1425
2006
  }
2007
+ if (filename.endsWith(".")) {
2008
+ messages.push({
2009
+ id: "PKG-011",
2010
+ severity: "error",
2011
+ message: `Filename must not end with a period: "${path}"`,
2012
+ location: { path }
2013
+ });
2014
+ }
2015
+ if (disallowed.length > 0) {
2016
+ messages.push({
2017
+ id: "PKG-009",
2018
+ severity: "error",
2019
+ message: `Filename "${path}" contains disallowed characters: ${disallowed.join(", ")}`,
2020
+ location: { path }
2021
+ });
2022
+ }
2023
+ if (hasSpaces) {
2024
+ messages.push({
2025
+ id: "PKG-010",
2026
+ severity: "warning",
2027
+ message: `Filename "${path}" contains spaces`,
2028
+ location: { path }
2029
+ });
2030
+ }
2031
+ }
2032
+ }
2033
+ /**
2034
+ * Check for duplicate filenames after Unicode normalization and case folding
2035
+ *
2036
+ * Per EPUB spec, filenames must be unique after applying:
2037
+ * - Unicode Canonical Case Fold Normalization (NFD + case folding)
2038
+ *
2039
+ * OPF-060: Duplicate filename after normalization
2040
+ */
2041
+ validateDuplicateFilenames(zip, messages) {
2042
+ const seenPaths = /* @__PURE__ */ new Set();
2043
+ const normalizedPaths = /* @__PURE__ */ new Map();
2044
+ for (const path of zip.paths) {
2045
+ if (path.endsWith("/")) continue;
2046
+ if (seenPaths.has(path)) {
2047
+ messages.push({
2048
+ id: "OPF-060",
2049
+ severity: "error",
2050
+ message: `Duplicate ZIP entry: "${path}"`,
2051
+ location: { path }
2052
+ });
2053
+ continue;
2054
+ }
2055
+ seenPaths.add(path);
2056
+ const normalized = this.canonicalCaseFold(path);
2057
+ const existing = normalizedPaths.get(normalized);
2058
+ if (existing !== void 0) {
2059
+ messages.push({
2060
+ id: "OPF-060",
2061
+ severity: "error",
2062
+ message: `Duplicate filename after Unicode normalization: "${path}" conflicts with "${existing}"`,
2063
+ location: { path }
2064
+ });
2065
+ } else {
2066
+ normalizedPaths.set(normalized, path);
2067
+ }
2068
+ }
2069
+ }
2070
+ /**
2071
+ * Apply Unicode Canonical Case Fold Normalization
2072
+ *
2073
+ * This applies:
2074
+ * 1. NFD (Canonical Decomposition) - decomposes combined characters
2075
+ * 2. Full Unicode case folding
2076
+ *
2077
+ * Based on Unicode case folding rules for filename comparison.
2078
+ */
2079
+ canonicalCaseFold(str) {
2080
+ let result = str.normalize("NFD");
2081
+ result = this.unicodeCaseFold(result);
2082
+ return result;
2083
+ }
2084
+ /**
2085
+ * Perform Unicode full case folding
2086
+ *
2087
+ * Handles special Unicode case folding rules beyond simple toLowerCase:
2088
+ * - ß (U+00DF) -> ss
2089
+ * - ẞ (U+1E9E) -> ss (capital sharp s)
2090
+ * - fi (U+FB01) -> fi
2091
+ * - fl (U+FB02) -> fl
2092
+ * - ff (U+FB00) -> ff
2093
+ * - ffi (U+FB03) -> ffi
2094
+ * - ffl (U+FB04) -> ffl
2095
+ * - ſt (U+FB05) -> st
2096
+ * - st (U+FB06) -> st
2097
+ * And other Unicode case folding rules
2098
+ */
2099
+ unicodeCaseFold(str) {
2100
+ const caseFoldMap = {
2101
+ "\xDF": "ss",
2102
+ // ß -> ss
2103
+ "\u1E9E": "ss",
2104
+ // ẞ -> ss (capital sharp s)
2105
+ "\uFB00": "ff",
2106
+ // ff -> ff
2107
+ "\uFB01": "fi",
2108
+ // fi -> fi
2109
+ "\uFB02": "fl",
2110
+ // fl -> fl
2111
+ "\uFB03": "ffi",
2112
+ // ffi -> ffi
2113
+ "\uFB04": "ffl",
2114
+ // ffl -> ffl
2115
+ "\uFB05": "st",
2116
+ // ſt -> st
2117
+ "\uFB06": "st",
2118
+ // st -> st
2119
+ "\u0130": "i\u0307"
2120
+ // İ -> i + combining dot above
2121
+ };
2122
+ let result = "";
2123
+ for (const char of str) {
2124
+ const folded = caseFoldMap[char];
2125
+ if (folded !== void 0) {
2126
+ result += folded;
2127
+ } else {
2128
+ result += char.toLowerCase();
2129
+ }
2130
+ }
2131
+ return result;
2132
+ }
2133
+ /**
2134
+ * Validate that filenames are encoded as UTF-8
2135
+ *
2136
+ * PKG-027: Filenames in EPUB ZIP archives must be UTF-8 encoded
2137
+ */
2138
+ validateUtf8Filenames(zip, messages) {
2139
+ const invalidFilenames = zip.getInvalidUtf8Filenames();
2140
+ for (const { filename, reason } of invalidFilenames) {
2141
+ messages.push({
2142
+ id: "PKG-027",
2143
+ severity: "fatal",
2144
+ message: `Filename is not valid UTF-8: "${filename}" (${reason})`,
2145
+ location: { path: filename }
2146
+ });
1426
2147
  }
1427
2148
  }
1428
2149
  /**
@@ -2117,9 +2838,9 @@ var OPFValidator = class {
2117
2838
  const fullPath = resolvePath(opfPath, item.href);
2118
2839
  if (!context.files.has(fullPath) && !item.href.startsWith("http")) {
2119
2840
  context.messages.push({
2120
- id: "OPF-010",
2841
+ id: "RSC-001",
2121
2842
  severity: "error",
2122
- message: `Manifest item "${item.id}" references missing file: ${item.href}`,
2843
+ message: `Referenced resource "${item.href}" could not be found in the EPUB`,
2123
2844
  location: { path: opfPath }
2124
2845
  });
2125
2846
  }
@@ -2206,31 +2927,6 @@ var OPFValidator = class {
2206
2927
  });
2207
2928
  }
2208
2929
  }
2209
- this.checkUndeclaredResources(context, opfPath);
2210
- }
2211
- /**
2212
- * Check for files in container that are not declared in manifest
2213
- */
2214
- checkUndeclaredResources(context, opfPath) {
2215
- if (!this.packageDoc) return;
2216
- const declaredPaths = /* @__PURE__ */ new Set();
2217
- for (const item of this.packageDoc.manifest) {
2218
- const fullPath = resolvePath(opfPath, item.href);
2219
- declaredPaths.add(fullPath);
2220
- }
2221
- declaredPaths.add(opfPath);
2222
- for (const filePath of context.files.keys()) {
2223
- if (filePath.endsWith("/")) continue;
2224
- if (filePath.startsWith("META-INF/")) continue;
2225
- if (filePath === "mimetype") continue;
2226
- if (declaredPaths.has(filePath)) continue;
2227
- context.messages.push({
2228
- id: "RSC-008",
2229
- severity: "error",
2230
- message: `File in container is not declared in manifest: ${filePath}`,
2231
- location: { path: filePath }
2232
- });
2233
- }
2234
2930
  }
2235
2931
  /**
2236
2932
  * Validate spine section
@@ -2255,32 +2951,30 @@ var OPFValidator = class {
2255
2951
  location: { path: opfPath }
2256
2952
  });
2257
2953
  }
2258
- if (this.packageDoc.version === "2.0") {
2259
- const ncxId = this.packageDoc.spineToc;
2260
- if (!ncxId) {
2954
+ const ncxId = this.packageDoc.spineToc;
2955
+ if (this.packageDoc.version === "2.0" && !ncxId) {
2956
+ context.messages.push({
2957
+ id: "OPF-050",
2958
+ severity: "warning",
2959
+ message: "EPUB 2 spine should have a toc attribute referencing the NCX",
2960
+ location: { path: opfPath }
2961
+ });
2962
+ } else if (ncxId) {
2963
+ const ncxItem = this.manifestById.get(ncxId);
2964
+ if (!ncxItem) {
2965
+ context.messages.push({
2966
+ id: "OPF-049",
2967
+ severity: "error",
2968
+ message: `Spine toc attribute references non-existent item: "${ncxId}"`,
2969
+ location: { path: opfPath }
2970
+ });
2971
+ } else if (ncxItem.mediaType !== "application/x-dtbncx+xml") {
2261
2972
  context.messages.push({
2262
2973
  id: "OPF-050",
2263
- severity: "warning",
2264
- message: "EPUB 2 spine should have a toc attribute referencing the NCX",
2974
+ severity: "error",
2975
+ message: `Spine toc attribute must reference an NCX document (media-type "application/x-dtbncx+xml"), found: "${ncxItem.mediaType}"`,
2265
2976
  location: { path: opfPath }
2266
2977
  });
2267
- } else {
2268
- const ncxItem = this.manifestById.get(ncxId);
2269
- if (!ncxItem) {
2270
- context.messages.push({
2271
- id: "OPF-049",
2272
- severity: "error",
2273
- message: `Spine toc attribute references non-existent item: "${ncxId}"`,
2274
- location: { path: opfPath }
2275
- });
2276
- } else if (ncxItem.mediaType !== "application/x-dtbncx+xml") {
2277
- context.messages.push({
2278
- id: "OPF-050",
2279
- severity: "error",
2280
- message: `NCX item must have media-type "application/x-dtbncx+xml", found: "${ncxItem.mediaType}"`,
2281
- location: { path: opfPath }
2282
- });
2283
- }
2284
2978
  }
2285
2979
  }
2286
2980
  const seenIdrefs = /* @__PURE__ */ new Set();
@@ -2295,7 +2989,7 @@ var OPFValidator = class {
2295
2989
  });
2296
2990
  continue;
2297
2991
  }
2298
- if (this.packageDoc.version === "2.0" && seenIdrefs.has(itemref.idref)) {
2992
+ if (seenIdrefs.has(itemref.idref)) {
2299
2993
  context.messages.push({
2300
2994
  id: "OPF-034",
2301
2995
  severity: "error",
@@ -2787,7 +3481,18 @@ var ReferenceValidator = class {
2787
3481
  }
2788
3482
  if (!this.registry.hasResource(resourcePath)) {
2789
3483
  const fileExistsInContainer = context.files.has(resourcePath);
2790
- if (!fileExistsInContainer) {
3484
+ if (fileExistsInContainer) {
3485
+ if (!context.referencedUndeclaredResources?.has(resourcePath)) {
3486
+ context.messages.push({
3487
+ id: "RSC-008",
3488
+ severity: "error",
3489
+ message: `Referenced resource "${resourcePath}" is not declared in the OPF manifest`,
3490
+ location: reference.location
3491
+ });
3492
+ context.referencedUndeclaredResources ??= /* @__PURE__ */ new Set();
3493
+ context.referencedUndeclaredResources.add(resourcePath);
3494
+ }
3495
+ } else {
2791
3496
  const isLinkRef = reference.type === "link" /* LINK */;
2792
3497
  context.messages.push({
2793
3498
  id: isLinkRef ? "RSC-007w" : "RSC-007",