fontisan 0.2.16 → 0.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 356bd0157b272462910d8822c6b77a6c768e12a45f4c56b2646d6bc683413239
4
- data.tar.gz: 981dca0c122d3d3e2dab683c6a7962bc2bcc1be35fb5c0818857d6c55ff7bbbf
3
+ metadata.gz: 42bca448396aff910f6d3c92ac31bfa68afdbb85b6f9217d996d73cdbb62af5c
4
+ data.tar.gz: 8e3a5ad55f4c4ba9ac4430caa9447b07ef317840559b84b936cc57791470ee3b
5
5
  SHA512:
6
- metadata.gz: '0852e6bc36559c125cab91cf4fe4d65256628df2e8c873bc52b4f291b2adc944126ed2a0e26d6daafbe7d568934a9788677cb47c8b70982097320addbc93365a'
7
- data.tar.gz: dc9d8fdacf95fc0761bb62cfec0f56e7a7da1a78986915db76243461cf2434d3f8142a0547e153d6746e242aa4a3a5dc33b5b505e8ac073e467e9b9c269d97bf
6
+ metadata.gz: de88ca7106b4934b37c0060f76f41b7972c0092a798c860cbc37795357ffdbce11089ae556f461c490c1fdd43ab48bc97ba3f1d90687abfb95fbea14d42259a7
7
+ data.tar.gz: 2aa4ccdca9901b1ce95eb442238530d86f24a52eae0e3eebfbd23470f3c614858b905adac85fa198fd6d3e8b66f3b6b762015575adff30bafbccd0c5c99115c7
data/Gemfile CHANGED
@@ -5,12 +5,12 @@ source "https://rubygems.org"
5
5
  # Specify your gem's dependencies in fontisan.gemspec
6
6
  gemspec
7
7
 
8
- gem "canon", "~> 0.1.3"
9
- gem "get_process_mem", "~> 0.2"
8
+ gem "benchmark"
10
9
  # bigdecimal is required by get_process_mem for Ruby 3.4+ compatibility
11
10
  gem "bigdecimal"
11
+ gem "canon", "~> 0.1.3"
12
+ gem "get_process_mem", "~> 0.2"
12
13
  gem "openssl", "~> 3.0"
13
- # sys-proctable is required by get_process_mem on Windows
14
14
  gem "rake"
15
15
  gem "rspec"
16
16
  gem "rubocop"
@@ -18,4 +18,7 @@ gem "rubocop-performance"
18
18
  gem "rubocop-rake"
19
19
  gem "rubocop-rspec"
20
20
  gem "rubyzip"
21
+ # sys-proctable is required by get_process_mem on Windows
21
22
  gem "sys-proctable", platforms: %i[mswin mingw mswin64]
23
+ # win32ole was a default gem until Ruby 4.0 — pin it for Windows runners
24
+ gem "win32ole", platforms: %i[mswin mingw mswin64]
@@ -40,9 +40,11 @@ font = Fontisan::FontLoader.load(File.read('font.ttf', mode: 'rb'))
40
40
 
41
41
  **Raises:** Fontisan::FormatError if format is unsupported
42
42
 
43
- ### detect_format(source)
43
+ ### detect_format(path)
44
44
 
45
- Detect font format without loading.
45
+ Detect a font's on-disk format from its content (magic bytes). The file
46
+ extension is ignored — a `.ttc` that actually contains a single OpenType-CFF
47
+ font is reported as `:otf`.
46
48
 
47
49
  ```ruby
48
50
  format = Fontisan::FontLoader.detect_format('font.ttf')
@@ -52,24 +54,28 @@ format = Fontisan::FontLoader.detect_format('font.otf')
52
54
  # => :otf
53
55
 
54
56
  format = Fontisan::FontLoader.detect_format('font.pfb')
55
- # => :type1
57
+ # => :pfb
58
+
59
+ format = Fontisan::FontLoader.detect_format('font.pfa')
60
+ # => :pfa
56
61
  ```
57
62
 
58
- **Returns:** Symbol or nil
63
+ **Returns:** Symbol (`:ttf`, `:otf`, `:ttc`, `:otc`, `:woff`, `:woff2`,
64
+ `:dfont`, `:pfa`, `:pfb`) or `nil` if the format is not recognised.
59
65
 
60
66
  ## Supported Formats
61
67
 
62
- | Format | Detection | Notes |
63
- |--------|-----------|-------|
64
- | TTF | Magic number | TrueType |
65
- | OTF | Magic number | OpenType/CFF |
66
- | TTC | Magic number | TrueType Collection |
67
- | OTC | Magic number | OpenType Collection |
68
- | WOFF | Magic number | Web Open Font Format |
69
- | WOFF2 | Magic number | Web Open Font Format 2 |
70
- | PFB | Marker byte | Adobe Type 1 Binary |
71
- | PFA | Text | Adobe Type 1 ASCII |
72
- | dfont | Magic number | Apple Data Fork |
68
+ | Symbol | Detection | Notes |
69
+ |---------|-------------|--------------------------------|
70
+ | `:ttf` | Magic bytes | TrueType |
71
+ | `:otf` | Magic bytes | OpenType / CFF |
72
+ | `:ttc` | Magic bytes | TrueType Collection |
73
+ | `:otc` | Magic bytes | OpenType Collection |
74
+ | `:woff` | Magic bytes | Web Open Font Format |
75
+ | `:woff2` | Magic bytes | Web Open Font Format 2 |
76
+ | `:pfb` | Marker byte | Adobe Type 1 Binary |
77
+ | `:pfa` | Text header | Adobe Type 1 ASCII |
78
+ | `:dfont` | Magic bytes | Apple Data-Fork resource fork |
73
79
 
74
80
  ## Examples
75
81
 
data/fontisan.gemspec CHANGED
@@ -41,6 +41,7 @@ Gem::Specification.new do |spec|
41
41
  spec.add_dependency "base64"
42
42
  spec.add_dependency "bindata", "~> 2.5"
43
43
  spec.add_dependency "brotli", "~> 0.5"
44
+ spec.add_dependency "logger"
44
45
  spec.add_dependency "lutaml-model", "~> 0.8"
45
46
  spec.add_dependency "nokogiri", "~> 1.16"
46
47
  spec.add_dependency "thor", "~> 1.3"
@@ -9,7 +9,7 @@ module Fontisan
9
9
  module Constants
10
10
  # TrueType Collection file signature tag.
11
11
  # All valid TTC files must begin with this 4-byte tag.
12
- TTC_TAG = "ttcf"
12
+ TTC_TAG = "ttcf".b.freeze
13
13
 
14
14
  # TrueType Collection Version 1.0 identifier.
15
15
  # Represents the original TTC format version.
@@ -25,11 +25,23 @@ module Fontisan
25
25
  # SFNT version for OpenType fonts with CFF outlines ('OTTO')
26
26
  SFNT_VERSION_OTTO = 0x4F54544F
27
27
 
28
- # Apple 'true' TrueType signature (alternate to 0x00010000)
29
- SFNT_VERSION_TRUE = 0x74727965 # 'true' in ASCII
30
-
31
- # dfont resource fork signatures
32
- DFONT_RESOURCE_HEADER = "\x00\x00\x01\x00"
28
+ # Apple 'true' TrueType signature (alternate to 0x00010000).
29
+ # Bytes: 0x74 ('t') 0x72 ('r') 0x75 ('u') 0x65 ('e').
30
+ SFNT_VERSION_TRUE = 0x74727565
31
+
32
+ # Four-byte file signatures used for content-based format detection.
33
+ # Pre-packed once here so format detection doesn't repack on every call.
34
+ SFNT_TRUETYPE_MAGIC = "\x00\x01\x00\x00".b.freeze # packed SFNT_VERSION_TRUETYPE
35
+ SFNT_TRUE_MAGIC = "true".b.freeze # Apple legacy TrueType
36
+ SFNT_OTTO_MAGIC = "OTTO".b.freeze # OpenType / CFF
37
+ WOFF_MAGIC = "wOFF".b.freeze
38
+ WOFF2_MAGIC = "wOF2".b.freeze
39
+
40
+ # dfont resource fork signatures.
41
+ # Note: bytes differ from SFNT_TRUETYPE_MAGIC despite the visual similarity —
42
+ # dfont is "\x00\x00\x01\x00" (resource-data offset 256, big-endian),
43
+ # SFNT_TRUETYPE_MAGIC is "\x00\x01\x00\x00" (sfnt version 0x00010000).
44
+ DFONT_RESOURCE_HEADER = "\x00\x00\x01\x00".b.freeze
33
45
  SFNT_RESOURCE_TYPE = "sfnt"
34
46
  FOND_RESOURCE_TYPE = "FOND"
35
47
 
@@ -173,5 +185,16 @@ module Fontisan
173
185
  def self.intern_string(str)
174
186
  STRING_POOL[str] || str.freeze
175
187
  end
188
+
189
+ # Classify a 4-byte SFNT-style signature read from a font file's magic.
190
+ #
191
+ # @param signature [String, nil]
192
+ # @return [Symbol, nil] :ttf, :otf, or nil for unrecognised signatures
193
+ def self.sfnt_format_for(signature)
194
+ case signature
195
+ when SFNT_TRUETYPE_MAGIC, SFNT_TRUE_MAGIC then :ttf
196
+ when SFNT_OTTO_MAGIC then :otf
197
+ end
198
+ end
176
199
  end
177
200
  end
@@ -1,22 +1,31 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "stringio"
3
4
  require_relative "constants"
4
5
  require_relative "loading_modes"
5
6
  require_relative "true_type_font"
6
7
  require_relative "open_type_font"
7
8
  require_relative "true_type_collection"
8
9
  require_relative "open_type_collection"
10
+ require_relative "dfont_collection"
9
11
  require_relative "woff_font"
10
12
  require_relative "woff2_font"
11
13
  require_relative "type1_font"
14
+ require_relative "parsers/dfont_parser"
12
15
  require_relative "error"
13
16
 
14
17
  module Fontisan
15
- # FontLoader provides unified font loading with automatic format detection.
18
+ # FontLoader provides unified font loading with content-based format detection.
16
19
  #
17
20
  # This class is the primary entry point for loading fonts in Fontisan.
18
- # It automatically detects the font format and returns the appropriate
19
- # domain object (TrueTypeFont, OpenTypeFont, Type1Font, TrueTypeCollection, or OpenTypeCollection).
21
+ # It inspects each file's magic bytes to determine the on-disk format and
22
+ # returns the appropriate domain object (TrueTypeFont, OpenTypeFont,
23
+ # Type1Font, TrueTypeCollection, or OpenTypeCollection).
24
+ #
25
+ # Detection is purely content-based — the file extension is ignored. This
26
+ # matters because vendors occasionally ship files with a misleading
27
+ # extension (e.g. Apple ships a single OpenType-CFF font as `.ttc` in
28
+ # macOS's private FontServices framework).
20
29
  #
21
30
  # @example Load any font type
22
31
  # font = FontLoader.load("font.ttf") # => TrueTypeFont
@@ -34,7 +43,28 @@ module Fontisan
34
43
  # font = FontLoader.load("font.ttf", lazy: true) # Tables loaded on-demand
35
44
  # font = FontLoader.load("font.ttf", lazy: false) # All tables loaded upfront
36
45
  class FontLoader
37
- # Load a font from file with automatic format detection
46
+ # Number of bytes read from the start of a file to identify its format.
47
+ # 100 bytes is enough to comfortably contain the Adobe Type 1 PFA header
48
+ # plus its leading whitespace, and far more than the 4 bytes needed for
49
+ # any SFNT-style or dfont magic.
50
+ PFA_PROBE_LENGTH = 100
51
+ private_constant :PFA_PROBE_LENGTH
52
+
53
+ # Map of collection format symbols to the class that loads them. Single
54
+ # source of truth for "what counts as a collection"; both {.collection?}
55
+ # and {.load_collection} dispatch off this table.
56
+ COLLECTION_CLASSES = {
57
+ ttc: TrueTypeCollection,
58
+ otc: OpenTypeCollection,
59
+ dfont: DfontCollection,
60
+ }.freeze
61
+ private_constant :COLLECTION_CLASSES
62
+
63
+ # Load a font from file with content-based format detection.
64
+ #
65
+ # The file's bytes determine its format; the extension is ignored. See
66
+ # {.detect_format} for the full list of recognised formats and how they
67
+ # are detected.
38
68
  #
39
69
  # @param path [String] Path to the font file
40
70
  # @param font_index [Integer] Index of font in collection (0-based, default: 0)
@@ -45,125 +75,87 @@ module Fontisan
45
75
  # @raise [UnsupportedFormatError] for unsupported formats
46
76
  # @raise [InvalidFontError] for corrupted or unknown formats
47
77
  def self.load(path, font_index: 0, mode: nil, lazy: nil)
48
- raise Errno::ENOENT, "File not found: #{path}" unless File.exist?(path)
49
-
50
- # Resolve mode and lazy parameters with environment variables
51
78
  resolved_mode = mode || env_mode || LoadingModes::FULL
52
79
  resolved_lazy = if lazy.nil?
53
80
  env_lazy.nil? ? false : env_lazy
54
81
  else
55
82
  lazy
56
83
  end
57
-
58
- # Validate mode
59
84
  LoadingModes.validate_mode!(resolved_mode)
60
85
 
61
- # Check for Type 1 format first (PFB/PFA have different signatures)
62
- if type1_font?(path)
63
- return Type1Font.from_file(path, mode: resolved_mode)
64
- end
65
-
66
- File.open(path, "rb") do |io|
67
- signature = io.read(4)
68
- io.rewind
69
-
70
- case signature
71
- when Constants::TTC_TAG
72
- load_from_collection(io, path, font_index, mode: resolved_mode,
73
- lazy: resolved_lazy)
74
- when pack_uint32(Constants::SFNT_VERSION_TRUETYPE), "true"
75
- TrueTypeFont.from_file(path, mode: resolved_mode, lazy: resolved_lazy)
76
- when "OTTO"
77
- OpenTypeFont.from_file(path, mode: resolved_mode, lazy: resolved_lazy)
78
- when "wOFF"
79
- WoffFont.from_file(path, mode: resolved_mode, lazy: resolved_lazy)
80
- when "wOF2"
81
- Woff2Font.from_file(path, mode: resolved_mode, lazy: resolved_lazy)
82
- when Constants::DFONT_RESOURCE_HEADER
83
- extract_and_load_dfont(io, path, font_index, resolved_mode,
84
- resolved_lazy)
85
- else
86
- raise InvalidFontError,
87
- "Unknown font format. Expected TTF, OTF, TTC, OTC, WOFF, WOFF2, PFB, or PFA file."
88
- end
86
+ format = detect(path)
87
+ case format
88
+ when :ttf then TrueTypeFont.from_file(path, mode: resolved_mode, lazy: resolved_lazy)
89
+ when :otf then OpenTypeFont.from_file(path, mode: resolved_mode, lazy: resolved_lazy)
90
+ when :woff then WoffFont.from_file(path, mode: resolved_mode, lazy: resolved_lazy)
91
+ when :woff2 then Woff2Font.from_file(path, mode: resolved_mode, lazy: resolved_lazy)
92
+ when :ttc, :otc then load_from_collection(path, format, font_index, mode: resolved_mode)
93
+ when :dfont then load_dfont(path, font_index: font_index, mode: resolved_mode)
94
+ when :pfa, :pfb then Type1Font.from_file(path, mode: resolved_mode)
95
+ else
96
+ raise InvalidFontError,
97
+ "Unknown font format. Expected TTF, OTF, TTC, OTC, WOFF, WOFF2, PFB, or PFA file."
89
98
  end
90
99
  end
91
100
 
92
- # Check if a file is a collection (TTC or OTC)
101
+ # Check if a file is a collection (TTC, OTC, or dfont).
102
+ #
103
+ # Returns `false` for a ttcf-headed file whose inner fonts can't be
104
+ # classified (truncated header, offsets past EOF, unrecognised inner
105
+ # SFNT versions). Such a file is structurally invalid as a collection
106
+ # and would fail to load, so reporting it as "not a collection" matches
107
+ # what callers can actually do with it.
93
108
  #
94
109
  # @param path [String] Path to the font file
95
- # @return [Boolean] true if file is a TTC/OTC collection
110
+ # @return [Boolean] true if file is a loadable collection
96
111
  # @raise [Errno::ENOENT] if file does not exist
97
112
  #
98
113
  # @example Check if file is collection
99
114
  # FontLoader.collection?("fonts.ttc") # => true
100
115
  # FontLoader.collection?("font.ttf") # => false
101
- def self.collection?(path)
102
- raise Errno::ENOENT, "File not found: #{path}" unless File.exist?(path)
103
-
104
- File.open(path, "rb") do |io|
105
- signature = io.read(4)
106
- io.rewind
107
-
108
- # Check for TTC/OTC signature
109
- return true if signature == Constants::TTC_TAG
110
-
111
- # Check for dfont - dfont is a collection format even if it contains only one font
112
- if signature == Constants::DFONT_RESOURCE_HEADER
113
- require_relative "parsers/dfont_parser"
114
- return Parsers::DfontParser.dfont?(io)
115
- end
116
-
117
- false
118
- end
119
- end
116
+ def self.collection?(path) = COLLECTION_CLASSES.key?(detect(path))
120
117
 
121
- # Load a collection object without extracting fonts
122
- #
123
- # Returns the collection object (TrueTypeCollection, OpenTypeCollection, or DfontCollection)
124
- # without extracting individual fonts. Useful for inspecting collection
125
- # metadata and structure.
118
+ # Identify a font file by inspecting its magic bytes (content-based detection).
126
119
  #
127
- # = Collection Format Understanding
120
+ # Returns the actual on-disk format regardless of the file extension. This
121
+ # is the authoritative way to determine how a file should be parsed,
122
+ # because vendors occasionally ship files with a misleading extension
123
+ # (for example, Apple ships a single OpenType-CFF font as `.ttc` in
124
+ # macOS's private FontServices framework).
128
125
  #
129
- # Both TTC (TrueType Collection) and OTC (OpenType Collection) files use
130
- # the same "ttcf" signature. The distinction between TTC and OTC is NOT
131
- # in the collection format itself, but in the fonts contained within:
126
+ # Collections are distinguished by scanning the inner fonts: if any inner
127
+ # font is OpenType (CFF), the file is reported as `:otc`; otherwise (all
128
+ # inner fonts are TrueType) it is reported as `:ttc`. A ttcf-headed file
129
+ # whose inner fonts can't be classified (truncated header, offsets past
130
+ # EOF, unrecognised inner SFNT versions) returns `nil`. dfont detection
131
+ # uses the canonical resource-data-offset (256) magic only; non-canonical
132
+ # but structurally valid dfonts are accepted by {.load_collection} as a
133
+ # fallback but not reported here.
132
134
  #
133
- # - TTC typically contains TrueType fonts (glyf outlines)
134
- # - OTC typically contains OpenType fonts (CFF/CFF2 outlines)
135
- # - Mixed collections are possible (both TTF and OTF in same collection)
136
- #
137
- # dfont (Data Fork Font) is an Apple-specific format that contains Mac
138
- # font suitcase resources. It can contain multiple SFNT fonts (TrueType
139
- # or OpenType).
140
- #
141
- # Each collection can contain multiple SFNT-format font files, with table
142
- # deduplication to save space. Individual fonts within a collection are
143
- # stored at different offsets within the file, each with their own table
144
- # directory and data tables.
145
- #
146
- # = Detection Strategy
135
+ # @param path [String] Path to the font file
136
+ # @return [Symbol, nil] One of `:ttf`, `:otf`, `:ttc`, `:otc`, `:woff`,
137
+ # `:woff2`, `:dfont`, `:pfa`, `:pfb`, or `nil` when the format is not
138
+ # recognised.
139
+ # @raise [Errno::ENOENT] if the file does not exist
147
140
  #
148
- # This method scans ALL fonts in the collection to determine the collection
149
- # type accurately:
141
+ # @example Detect a real collection
142
+ # FontLoader.detect_format("fonts.ttc") # => :ttc
150
143
  #
151
- # 1. Reads all font offsets from the collection header
152
- # 2. Examines the sfnt_version of each font in the collection
153
- # 3. Counts TrueType fonts (0x00010000 or 0x74727565 "true") vs OpenType fonts (0x4F54544F "OTTO")
154
- # 4. If ANY font is OpenType (CFF), returns OpenTypeCollection
155
- # 5. Only returns TrueTypeCollection if ALL fonts are TrueType
144
+ # @example Detect a single OTF mislabeled as .ttc
145
+ # FontLoader.detect_format("SauberScript.ttc") # => :otf
146
+ def self.detect_format(path) = detect(path)
147
+
148
+ # Load a collection object without extracting fonts
156
149
  #
157
- # For dfont files, returns DfontCollection.
150
+ # Returns the collection object (TrueTypeCollection, OpenTypeCollection,
151
+ # or DfontCollection) without extracting individual fonts. Useful for
152
+ # inspecting collection metadata and structure.
158
153
  #
159
- # This approach correctly handles:
160
- # - Homogeneous collections (all TTF or all OTF)
161
- # - Mixed collections (both TTF and OTF fonts) - uses OpenTypeCollection
162
- # - Large collections with many fonts (like NotoSerifCJK.ttc with 35 fonts)
163
- # - dfont suitcases (Apple-specific)
154
+ # The TTC vs. OTC distinction is resolved by {.detect_format}, which
155
+ # scans the inner fonts; see that method for details.
164
156
  #
165
157
  # @param path [String] Path to the collection file
166
- # @return [TrueTypeCollection, OpenTypeCollection, DfontCollection] The collection object
158
+ # @return [TrueTypeCollection, OpenTypeCollection, DfontCollection]
167
159
  # @raise [Errno::ENOENT] if file does not exist
168
160
  # @raise [InvalidFontError] if file is not a collection or type cannot be determined
169
161
  #
@@ -171,69 +163,88 @@ module Fontisan
171
163
  # collection = FontLoader.load_collection("fonts.ttc")
172
164
  # puts "Collection has #{collection.num_fonts} fonts"
173
165
  def self.load_collection(path)
174
- raise Errno::ENOENT, "File not found: #{path}" unless File.exist?(path)
166
+ format = detect(path)
167
+ return COLLECTION_CLASSES.fetch(format).from_file(path) if COLLECTION_CLASSES.key?(format)
175
168
 
169
+ # Lenient fallback: a dfont whose resource-data offset isn't the
170
+ # canonical 256 fails the strict magic test in {.detect} but may still
171
+ # be structurally valid; try the structural check before giving up.
176
172
  File.open(path, "rb") do |io|
177
- signature = io.read(4)
178
- io.rewind
179
-
180
- # Check for dfont
181
- if signature == Constants::DFONT_RESOURCE_HEADER || dfont_signature?(io)
182
- require_relative "dfont_collection"
183
- return DfontCollection.from_file(path)
184
- end
185
-
186
- # Check for TTC/OTC
187
- unless signature == Constants::TTC_TAG
188
- raise InvalidFontError,
189
- "File is not a collection (TTC/OTC/dfont). Use FontLoader.load instead."
190
- end
191
-
192
- # Read version and num_fonts
193
- io.seek(8) # Skip tag (4) + version (4)
194
- num_fonts = io.read(4).unpack1("N")
195
-
196
- # Read all font offsets
197
- font_offsets = Array.new(num_fonts) { io.read(4).unpack1("N") }
173
+ return DfontCollection.from_file(path) if Parsers::DfontParser.dfont?(io)
174
+ end
175
+ raise InvalidFontError,
176
+ "File is not a collection (TTC/OTC/dfont). Use FontLoader.load instead."
177
+ end
198
178
 
199
- # Scan all fonts to determine collection type (not just first)
200
- truetype_count = 0
201
- opentype_count = 0
179
+ # Content-based detection. Reads 4 bytes first (covers every SFNT-style
180
+ # and canonical dfont magic), then tops up to {PFA_PROBE_LENGTH} for
181
+ # Type 1 only on an SFNT miss.
182
+ def self.detect(path)
183
+ raise Errno::ENOENT, "File not found: #{path}" unless File.exist?(path)
202
184
 
203
- font_offsets.each do |offset|
204
- io.rewind
205
- io.seek(offset)
206
- sfnt_version = io.read(4).unpack1("N")
185
+ File.open(path, "rb") do |io|
186
+ head4 = io.read(4)
187
+ return nil if head4.nil? || head4.empty?
188
+
189
+ sfnt = case head4
190
+ when Constants::TTC_TAG then scan_collection(io)
191
+ when Constants::SFNT_OTTO_MAGIC then :otf
192
+ when Constants::SFNT_TRUETYPE_MAGIC, Constants::SFNT_TRUE_MAGIC then :ttf
193
+ when Constants::WOFF_MAGIC then :woff
194
+ when Constants::WOFF2_MAGIC then :woff2
195
+ when Constants::DFONT_RESOURCE_HEADER
196
+ io.rewind
197
+ Parsers::DfontParser.dfont?(io) ? :dfont : nil
198
+ end
199
+ return sfnt if sfnt
200
+
201
+ rest = head4.bytesize < PFA_PROBE_LENGTH ? io.read(PFA_PROBE_LENGTH - head4.bytesize) : nil
202
+ type1_format_from_header(rest ? head4 + rest : head4)
203
+ end
204
+ end
207
205
 
208
- case sfnt_version
209
- when Constants::SFNT_VERSION_TRUETYPE, 0x74727565 # 0x74727565 = 'true'
210
- truetype_count += 1
211
- when Constants::SFNT_VERSION_OTTO
212
- opentype_count += 1
213
- else
214
- raise InvalidFontError,
215
- "Unknown font type in collection at offset #{offset} (sfnt version: 0x#{sfnt_version.to_s(16)})"
216
- end
206
+ # Identify the Type 1 sub-format (`:pfa` or `:pfb`) from a probe of the
207
+ # file's leading bytes. Returns nil if the bytes don't match Type 1.
208
+ def self.type1_format_from_header(header)
209
+ if header.bytesize >= 2
210
+ marker = (header.getbyte(0) << 8) | header.getbyte(1)
211
+ if [Constants::PFB_ASCII_CHUNK, Constants::PFB_BINARY_CHUNK].include?(marker)
212
+ return :pfb
217
213
  end
214
+ end
215
+
216
+ # PFA is plain text — the Adobe Type 1 header must appear at the very
217
+ # start (allowing only leading ASCII whitespace), not anywhere in the
218
+ # probe. Using start_with? avoids matching a non-Type-1 PostScript file
219
+ # that happens to mention the signature in a comment.
220
+ stripped = header.lstrip
221
+ if stripped.start_with?(Constants::PFA_SIGNATURE_ADOBE_1_0, Constants::PFA_SIGNATURE_ADOBE_3_0)
222
+ return :pfa
223
+ end
218
224
 
219
- io.rewind
225
+ nil
226
+ end
220
227
 
221
- # Determine collection type based on what fonts are inside
222
- # If ANY font is OpenType, use OpenTypeCollection (more general format)
223
- # Only use TrueTypeCollection if ALL fonts are TrueType
224
- if opentype_count.positive?
225
- OpenTypeCollection.from_file(path)
226
- else
227
- # All fonts are TrueType
228
- TrueTypeCollection.from_file(path)
228
+ # Walk a ttcf-headed file via BaseCollection. Returns `:ttc`, `:otc`, or
229
+ # nil for any truncation, unreadable offset, or unrecognised inner magic.
230
+ def self.scan_collection(io)
231
+ io.rewind
232
+ header = BaseCollection.read(io)
233
+ has_otf = false
234
+ header.font_offsets.each do |offset|
235
+ io.seek(offset)
236
+ case Constants.sfnt_format_for(io.read(4))
237
+ when :otf then has_otf = true
238
+ when :ttf then next
239
+ else return nil
229
240
  end
230
241
  end
242
+ has_otf ? :otc : :ttc
243
+ rescue BinData::ValidityError, IOError
244
+ nil
231
245
  end
232
246
 
233
- # Get mode from environment variable
234
- #
235
- # @return [Symbol, nil] Mode from FONTISAN_MODE or nil
236
- # @api private
247
+ # Mode override from FONTISAN_MODE env var, or nil.
237
248
  def self.env_mode
238
249
  env_value = ENV["FONTISAN_MODE"]
239
250
  return nil unless env_value
@@ -242,10 +253,7 @@ module Fontisan
242
253
  LoadingModes.valid_mode?(mode) ? mode : nil
243
254
  end
244
255
 
245
- # Get lazy setting from environment variable
246
- #
247
- # @return [Boolean, nil] Lazy setting from FONTISAN_LAZY or nil if not set
248
- # @api private
256
+ # Lazy override from FONTISAN_LAZY env var, or nil.
249
257
  def self.env_lazy
250
258
  env_value = ENV["FONTISAN_LAZY"]
251
259
  return nil unless env_value
@@ -253,182 +261,40 @@ module Fontisan
253
261
  env_value.downcase == "true"
254
262
  end
255
263
 
256
- # Load from a collection file (TTC or OTC)
257
- #
258
- # This is the internal method that handles loading individual fonts from
259
- # collection files. It reads the collection header to determine the type
260
- # (TTC vs OTC) and extracts the requested font.
261
- #
262
- # = Collection Header Structure
263
- #
264
- # TTC/OTC files start with:
265
- # - Bytes 0-3: "ttcf" tag (4 bytes)
266
- # - Bytes 4-7: version (2 bytes major + 2 bytes minor)
267
- # - Bytes 8-11: num_fonts (4 bytes, big-endian uint32)
268
- # - Bytes 12+: font offset array (4 bytes per font, big-endian uint32)
269
- #
270
- # CRITICAL: The method seeks to position 8 (after tag and version) to read
271
- # num_fonts, NOT position 12 which is where the offset array starts. This
272
- # was a bug that caused "Unknown font type" errors when the first offset
273
- # was misread as num_fonts.
274
- #
275
- # @param io [IO] Open file handle
276
- # @param path [String] Path to the collection file
277
- # @param font_index [Integer] Index of font to extract
278
- # @param mode [Symbol] Loading mode (:metadata or :full)
279
- # @param lazy [Boolean] If true, load tables on demand
280
- # @return [TrueTypeFont, OpenTypeFont] The loaded font object
281
- # @raise [InvalidFontError] if collection type cannot be determined
282
- def self.load_from_collection(io, path, font_index,
283
- mode: LoadingModes::FULL, lazy: true)
284
- # Read collection header to get font offsets
285
- io.seek(8) # Skip tag (4) + version (4)
286
- num_fonts = io.read(4).unpack1("N")
287
-
288
- if font_index >= num_fonts
264
+ # Load a single font from a TTC/OTC collection. `format` is the detected
265
+ # symbol routed from `.load`'s case statement, so no second magic read.
266
+ def self.load_from_collection(path, format, font_index, mode:)
267
+ collection = COLLECTION_CLASSES.fetch(format).from_file(path)
268
+ if font_index >= collection.num_fonts
289
269
  raise InvalidFontError,
290
- "Font index #{font_index} out of range (collection has #{num_fonts} fonts)"
291
- end
292
-
293
- # Read all font offsets
294
- font_offsets = Array.new(num_fonts) { io.read(4).unpack1("N") }
295
-
296
- # Scan all fonts to determine collection type (not just first)
297
- truetype_count = 0
298
- opentype_count = 0
299
-
300
- font_offsets.each do |offset|
301
- io.rewind
302
- io.seek(offset)
303
- sfnt_version = io.read(4).unpack1("N")
304
-
305
- case sfnt_version
306
- when Constants::SFNT_VERSION_TRUETYPE, 0x74727565 # 0x74727565 = 'true'
307
- truetype_count += 1
308
- when Constants::SFNT_VERSION_OTTO
309
- opentype_count += 1
310
- else
311
- raise InvalidFontError,
312
- "Unknown font type in collection at offset #{offset} (sfnt version: 0x#{sfnt_version.to_s(16)})"
313
- end
270
+ "Font index #{font_index} out of range (collection has #{collection.num_fonts} fonts)"
314
271
  end
315
272
 
316
- io.rewind
317
-
318
- # If ANY font is OpenType, use OpenTypeCollection (more general format)
319
- # Only use TrueTypeCollection if ALL fonts are TrueType
320
- if opentype_count.positive?
321
- # OpenType Collection
322
- otc = OpenTypeCollection.from_file(path)
323
- File.open(path, "rb") { |f| otc.font(font_index, f, mode: mode) }
324
- else
325
- # TrueType Collection (all fonts are TrueType)
326
- ttc = TrueTypeCollection.from_file(path)
327
- File.open(path, "rb") { |f| ttc.font(font_index, f, mode: mode) }
328
- end
273
+ File.open(path, "rb") { |io| collection.font(font_index, io, mode: mode) }
329
274
  end
330
275
 
331
- # Extract and load font from dfont resource fork
332
- #
333
- # @param io [IO] Open file handle
334
- # @param path [String] Path to dfont file
335
- # @param font_index [Integer] Font index in suitcase
336
- # @param mode [Symbol] Loading mode
337
- # @param lazy [Boolean] Lazy loading flag
338
- # @return [TrueTypeFont, OpenTypeFont] Loaded font
339
- # @api private
340
- def self.extract_and_load_dfont(io, _path, font_index, mode, lazy)
341
- require_relative "parsers/dfont_parser"
342
-
343
- # Extract SFNT data from resource fork
344
- sfnt_data = Parsers::DfontParser.extract_sfnt(io, index: font_index)
345
-
346
- # Create StringIO with SFNT data
347
- sfnt_io = StringIO.new(sfnt_data)
348
-
349
- # Detect SFNT signature
350
- signature = sfnt_io.read(4)
351
- sfnt_io.rewind
352
-
353
- # Read and setup font based on signature
354
- case signature
355
- when pack_uint32(Constants::SFNT_VERSION_TRUETYPE), "true"
356
- font = TrueTypeFont.read(sfnt_io)
357
- font.initialize_storage
358
- font.loading_mode = mode
359
- font.lazy_load_enabled = lazy
360
- font.read_table_data(sfnt_io) unless lazy
361
- font
362
- when "OTTO"
363
- font = OpenTypeFont.read(sfnt_io)
364
- font.initialize_storage
365
- font.loading_mode = mode
366
- font.lazy_load_enabled = lazy
367
- font.read_table_data(sfnt_io) unless lazy
368
- font
369
- else
370
- raise InvalidFontError,
371
- "Invalid SFNT data in dfont resource (signature: #{signature.inspect})"
372
- end
373
- end
374
-
375
- # Pack uint32 value to big-endian bytes
376
- #
377
- # @param value [Integer] The uint32 value
378
- # @return [String] 4-byte binary string
379
- # @api private
380
- def self.pack_uint32(value)
381
- [value].pack("N")
382
- end
383
-
384
- private_class_method :load_from_collection, :pack_uint32, :env_mode,
385
- :env_lazy, :extract_and_load_dfont
386
-
387
- # Check if file has dfont signature
388
- #
389
- # @param io [IO] Open file handle
390
- # @return [Boolean] true if dfont
391
- # @api private
392
- def self.dfont_signature?(io)
393
- require_relative "parsers/dfont_parser"
394
- Parsers::DfontParser.dfont?(io)
395
- end
396
-
397
- private_class_method :dfont_signature?
398
-
399
- # Check if file is a Type 1 font (PFB or PFA)
400
- #
401
- # Type 1 fonts come in two formats:
402
- # - PFB (Printer Font Binary): Binary format with chunk markers
403
- # - PFA (Printer Font ASCII): ASCII text format with hex encoding
404
- #
405
- # @param path [String] Path to the font file
406
- # @return [Boolean] true if Type 1 font
407
- # @api private
408
- def self.type1_font?(path)
409
- # Check file extension first (quick check)
410
- ext = File.extname(path).downcase
411
- return true if [".pfb", ".pfa", ".ps"].include?(ext)
412
-
413
- # Check PFB signature (first byte should be 0x80 or 0x81)
276
+ # Extract an SFNT from a dfont resource fork into memory and load it via
277
+ # `SfntFont.from_collection` so the loading-mode handling matches the
278
+ # TTC/OTC path. Lazy loading is a no-op for in-memory StringIO so the
279
+ # public `lazy:` flag is not threaded through this path.
280
+ def self.load_dfont(path, font_index:, mode:)
414
281
  File.open(path, "rb") do |io|
415
- first_byte = io.getbyte
416
- return true if [Constants::PFB_ASCII_CHUNK, Constants::PFB_BINARY_CHUNK].include?(first_byte)
282
+ sfnt_io = StringIO.new(Parsers::DfontParser.extract_sfnt(io, index: font_index))
283
+ klass = case Constants.sfnt_format_for(sfnt_io.read(4))
284
+ when :ttf then TrueTypeFont
285
+ when :otf then OpenTypeFont
286
+ else raise InvalidFontError, "Invalid SFNT in dfont resource"
287
+ end
288
+ klass.from_collection(sfnt_io, 0, mode: mode)
417
289
  end
418
-
419
- # Check PFA signature (text file with Adobe header)
420
- File.open(path, "rb") do |io|
421
- # Read first 100 bytes to check for PFA signature
422
- header = io.read(100)
423
- return true if header.include?(Constants::PFA_SIGNATURE_ADOBE_1_0) ||
424
- header.include?(Constants::PFA_SIGNATURE_ADOBE_3_0)
425
- end
426
-
427
- false
428
- rescue IOError, Errno::ENOENT
429
- false
430
290
  end
431
291
 
432
- private_class_method :type1_font?
292
+ private_class_method :detect,
293
+ :type1_format_from_header,
294
+ :scan_collection,
295
+ :env_mode,
296
+ :env_lazy,
297
+ :load_from_collection,
298
+ :load_dfont
433
299
  end
434
300
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Fontisan
4
- VERSION = "0.2.16"
4
+ VERSION = "0.2.17"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fontisan
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.16
4
+ version: 0.2.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-02 00:00:00.000000000 Z
11
+ date: 2026-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.5'
55
+ - !ruby/object:Gem::Dependency
56
+ name: logger
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: lutaml-model
57
71
  requirement: !ruby/object:Gem::Requirement