aurochs 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,5 +3,5 @@
3
3
  *
4
4
  * Exports types for PDF document structure.
5
5
  */
6
- export type { PdfElement, PdfPage, PdfDocument, PdfEmbeddedFont } from './types';
6
+ export type { PdfElement, PdfPage, PdfDocument, PdfEmbeddedFont, PdfFontToUnicode, PdfFontMetrics } from './types';
7
7
  export { PDF_UNITS, isPdfPath, isPdfText, isPdfImage } from './types';
@@ -1,6 +1,7 @@
1
1
  import { PdfImage } from '../image';
2
2
  import { PdfPath } from '../path';
3
3
  import { PdfText } from '../text';
4
+ import { CIDOrdering } from '../font';
4
5
  export type PdfElement = PdfPath | PdfText | PdfImage;
5
6
  /**
6
7
  * Represents a parsed PDF page.
@@ -45,6 +46,29 @@ export type PdfPage = {
45
46
  */
46
47
  readonly elements: readonly PdfElement[];
47
48
  };
49
+ /**
50
+ * ToUnicode mapping data for round-trip preservation.
51
+ * Used to reconstruct ToUnicode CMap when writing PDF.
52
+ */
53
+ export type PdfFontToUnicode = {
54
+ /** Source bytes (hex) → Unicode string mapping. Key is uppercase hex (e.g., "8140" → "ア"). */
55
+ readonly byteMapping: ReadonlyMap<string, string>;
56
+ /** Source code byte lengths from codespace ranges (descending order). */
57
+ readonly sourceCodeByteLengths: readonly number[];
58
+ };
59
+ /**
60
+ * Font metrics for PDF writing.
61
+ */
62
+ export type PdfFontMetrics = {
63
+ /** Ascender height in 1/1000 em units */
64
+ readonly ascender: number;
65
+ /** Descender depth in 1/1000 em units (negative) */
66
+ readonly descender: number;
67
+ /** Glyph widths: character code → width in 1/1000 em units */
68
+ readonly widths: ReadonlyMap<number, number>;
69
+ /** Default glyph width when not found in widths */
70
+ readonly defaultWidth: number;
71
+ };
48
72
  /**
49
73
  * Embedded font data extracted from PDF.
50
74
  *
@@ -59,6 +83,16 @@ export type PdfEmbeddedFont = {
59
83
  readonly data: Uint8Array;
60
84
  /** MIME type */
61
85
  readonly mimeType: string;
86
+ /** Original BaseFont name from PDF (e.g., "/ZRDQJE+Hiragino-Sans"). Includes subset prefix. */
87
+ readonly baseFontName?: string;
88
+ /** ToUnicode CMap information for round-trip preservation. */
89
+ readonly toUnicode?: PdfFontToUnicode;
90
+ /** Font metrics for accurate text layout. */
91
+ readonly metrics?: PdfFontMetrics;
92
+ /** CID ordering (Japan1, GB1, CNS1, Korea1, Identity). */
93
+ readonly ordering?: CIDOrdering;
94
+ /** Number of bytes per character code (1 for single-byte, 2 for CID fonts). */
95
+ readonly codeByteWidth?: 1 | 2;
62
96
  };
63
97
  export type PdfDocument = {
64
98
  readonly pages: readonly PdfPage[];
@@ -1,4 +1,5 @@
1
1
  import { PdfText } from '../../domain/text';
2
+ import { PdfEmbeddedFont } from '../../domain/document';
2
3
  /**
3
4
  * Context for text serialization.
4
5
  */
@@ -8,6 +9,11 @@ export type TextSerializationContext = {
8
9
  * e.g., "Helvetica" -> "F1"
9
10
  */
10
11
  readonly fontNameToResource: ReadonlyMap<string, string>;
12
+ /**
13
+ * Embedded fonts for CID font detection.
14
+ * When present, CID fonts will use hex string output.
15
+ */
16
+ readonly embeddedFonts?: readonly PdfEmbeddedFont[];
11
17
  };
12
18
  /**
13
19
  * Serialize a PdfText element to PDF content stream operators.
@@ -20,6 +20,22 @@ export declare function buildType1Font(fontName: string, tracker: PdfObjectTrack
20
20
  * @returns The font dictionary object number
21
21
  */
22
22
  export declare function buildEmbeddedFont(font: PdfEmbeddedFont, tracker: PdfObjectTracker): number;
23
+ /**
24
+ * Build a Type0 CID font with Identity-H encoding.
25
+ *
26
+ * Structure:
27
+ * - Type0 font dictionary (top-level)
28
+ * - CIDFont dictionary (descendant)
29
+ * - FontDescriptor
30
+ * - CIDToGIDMap (Identity)
31
+ * - ToUnicode CMap stream
32
+ * - Embedded font file
33
+ *
34
+ * @param font - The embedded font data with CID information
35
+ * @param tracker - Object tracker for allocation
36
+ * @returns The Type0 font dictionary object number
37
+ */
38
+ export declare function buildType0Font(font: PdfEmbeddedFont, tracker: PdfObjectTracker): number;
23
39
  /**
24
40
  * Build fonts for a document.
25
41
  * Returns a map of font name to object number.
@@ -3,6 +3,7 @@
3
3
  */
4
4
  export { PdfObjectTracker, type PdfObjectEntry, } from './object-tracker';
5
5
  export { buildResourceDict, buildEmptyResourceDict, type ResourceRefs, } from './resource-builder';
6
- export { buildType1Font, buildEmbeddedFont, buildFonts, } from './font-builder';
6
+ export { buildType1Font, buildEmbeddedFont, buildType0Font, buildFonts, } from './font-builder';
7
+ export { generateToUnicodeStream, } from './tounicode-writer';
7
8
  export { buildImageXObject, buildImages, } from './image-builder';
8
9
  export { buildPage, type PageBuildResult, type BuildPageOptions, } from './page-builder';
@@ -1,4 +1,4 @@
1
- import { PdfPage } from '../../domain/document';
1
+ import { PdfPage, PdfEmbeddedFont } from '../../domain/document';
2
2
  import { PdfImage } from '../../domain/image';
3
3
  import { ResourceRefs } from './resource-builder';
4
4
  import { PdfObjectTracker } from './object-tracker';
@@ -23,6 +23,8 @@ export type BuildPageOptions = {
23
23
  readonly fontObjMap: ReadonlyMap<string, number>;
24
24
  readonly imageObjMap: ReadonlyMap<number, number>;
25
25
  readonly tracker: PdfObjectTracker;
26
+ /** Embedded fonts for CID font text serialization. */
27
+ readonly embeddedFonts?: readonly PdfEmbeddedFont[];
26
28
  };
27
29
  /**
28
30
  * Build a page object.
@@ -0,0 +1,8 @@
1
+ import { PdfFontToUnicode } from '../../domain/document';
2
+ /**
3
+ * Generate a ToUnicode CMap stream.
4
+ *
5
+ * @param toUnicode - The ToUnicode mapping data
6
+ * @returns CMap stream content as UTF-8 encoded bytes
7
+ */
8
+ export declare function generateToUnicodeStream(toUnicode: PdfFontToUnicode): Uint8Array;