fontisan 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +529 -65
  3. data/Gemfile +1 -0
  4. data/LICENSE +5 -1
  5. data/README.adoc +1301 -275
  6. data/Rakefile +27 -2
  7. data/benchmark/variation_quick_bench.rb +47 -0
  8. data/docs/EXTRACT_TTC_MIGRATION.md +549 -0
  9. data/fontisan.gemspec +4 -1
  10. data/lib/fontisan/binary/base_record.rb +22 -1
  11. data/lib/fontisan/cli.rb +309 -0
  12. data/lib/fontisan/collection/builder.rb +260 -0
  13. data/lib/fontisan/collection/offset_calculator.rb +227 -0
  14. data/lib/fontisan/collection/table_analyzer.rb +204 -0
  15. data/lib/fontisan/collection/table_deduplicator.rb +241 -0
  16. data/lib/fontisan/collection/writer.rb +306 -0
  17. data/lib/fontisan/commands/base_command.rb +8 -1
  18. data/lib/fontisan/commands/convert_command.rb +291 -0
  19. data/lib/fontisan/commands/export_command.rb +161 -0
  20. data/lib/fontisan/commands/info_command.rb +40 -6
  21. data/lib/fontisan/commands/instance_command.rb +295 -0
  22. data/lib/fontisan/commands/ls_command.rb +113 -0
  23. data/lib/fontisan/commands/pack_command.rb +241 -0
  24. data/lib/fontisan/commands/subset_command.rb +245 -0
  25. data/lib/fontisan/commands/unpack_command.rb +338 -0
  26. data/lib/fontisan/commands/validate_command.rb +178 -0
  27. data/lib/fontisan/commands/variable_command.rb +30 -1
  28. data/lib/fontisan/config/collection_settings.yml +56 -0
  29. data/lib/fontisan/config/conversion_matrix.yml +212 -0
  30. data/lib/fontisan/config/export_settings.yml +66 -0
  31. data/lib/fontisan/config/subset_profiles.yml +100 -0
  32. data/lib/fontisan/config/svg_settings.yml +60 -0
  33. data/lib/fontisan/config/validation_rules.yml +149 -0
  34. data/lib/fontisan/config/variable_settings.yml +99 -0
  35. data/lib/fontisan/config/woff2_settings.yml +77 -0
  36. data/lib/fontisan/constants.rb +69 -0
  37. data/lib/fontisan/converters/conversion_strategy.rb +96 -0
  38. data/lib/fontisan/converters/format_converter.rb +259 -0
  39. data/lib/fontisan/converters/outline_converter.rb +936 -0
  40. data/lib/fontisan/converters/svg_generator.rb +244 -0
  41. data/lib/fontisan/converters/table_copier.rb +117 -0
  42. data/lib/fontisan/converters/woff2_encoder.rb +416 -0
  43. data/lib/fontisan/converters/woff_writer.rb +391 -0
  44. data/lib/fontisan/error.rb +203 -0
  45. data/lib/fontisan/export/exporter.rb +262 -0
  46. data/lib/fontisan/export/table_serializer.rb +255 -0
  47. data/lib/fontisan/export/transformers/font_to_ttx.rb +172 -0
  48. data/lib/fontisan/export/transformers/head_transformer.rb +96 -0
  49. data/lib/fontisan/export/transformers/hhea_transformer.rb +59 -0
  50. data/lib/fontisan/export/transformers/maxp_transformer.rb +63 -0
  51. data/lib/fontisan/export/transformers/name_transformer.rb +63 -0
  52. data/lib/fontisan/export/transformers/os2_transformer.rb +121 -0
  53. data/lib/fontisan/export/transformers/post_transformer.rb +51 -0
  54. data/lib/fontisan/export/ttx_generator.rb +527 -0
  55. data/lib/fontisan/export/ttx_parser.rb +300 -0
  56. data/lib/fontisan/font_loader.rb +121 -12
  57. data/lib/fontisan/font_writer.rb +301 -0
  58. data/lib/fontisan/formatters/text_formatter.rb +102 -0
  59. data/lib/fontisan/glyph_accessor.rb +503 -0
  60. data/lib/fontisan/hints/hint_converter.rb +177 -0
  61. data/lib/fontisan/hints/postscript_hint_applier.rb +185 -0
  62. data/lib/fontisan/hints/postscript_hint_extractor.rb +254 -0
  63. data/lib/fontisan/hints/truetype_hint_applier.rb +71 -0
  64. data/lib/fontisan/hints/truetype_hint_extractor.rb +162 -0
  65. data/lib/fontisan/loading_modes.rb +113 -0
  66. data/lib/fontisan/metrics_calculator.rb +277 -0
  67. data/lib/fontisan/models/collection_font_summary.rb +52 -0
  68. data/lib/fontisan/models/collection_info.rb +76 -0
  69. data/lib/fontisan/models/collection_list_info.rb +37 -0
  70. data/lib/fontisan/models/font_export.rb +158 -0
  71. data/lib/fontisan/models/font_summary.rb +48 -0
  72. data/lib/fontisan/models/glyph_outline.rb +343 -0
  73. data/lib/fontisan/models/hint.rb +233 -0
  74. data/lib/fontisan/models/outline.rb +664 -0
  75. data/lib/fontisan/models/table_sharing_info.rb +40 -0
  76. data/lib/fontisan/models/ttx/glyph_order.rb +31 -0
  77. data/lib/fontisan/models/ttx/tables/binary_table.rb +67 -0
  78. data/lib/fontisan/models/ttx/tables/head_table.rb +74 -0
  79. data/lib/fontisan/models/ttx/tables/hhea_table.rb +74 -0
  80. data/lib/fontisan/models/ttx/tables/maxp_table.rb +55 -0
  81. data/lib/fontisan/models/ttx/tables/name_table.rb +45 -0
  82. data/lib/fontisan/models/ttx/tables/os2_table.rb +157 -0
  83. data/lib/fontisan/models/ttx/tables/post_table.rb +50 -0
  84. data/lib/fontisan/models/ttx/ttfont.rb +49 -0
  85. data/lib/fontisan/models/validation_report.rb +203 -0
  86. data/lib/fontisan/open_type_collection.rb +156 -2
  87. data/lib/fontisan/open_type_font.rb +296 -10
  88. data/lib/fontisan/optimizers/charstring_rewriter.rb +161 -0
  89. data/lib/fontisan/optimizers/pattern_analyzer.rb +308 -0
  90. data/lib/fontisan/optimizers/stack_tracker.rb +246 -0
  91. data/lib/fontisan/optimizers/subroutine_builder.rb +134 -0
  92. data/lib/fontisan/optimizers/subroutine_generator.rb +207 -0
  93. data/lib/fontisan/optimizers/subroutine_optimizer.rb +107 -0
  94. data/lib/fontisan/outline_extractor.rb +423 -0
  95. data/lib/fontisan/subset/builder.rb +268 -0
  96. data/lib/fontisan/subset/glyph_mapping.rb +215 -0
  97. data/lib/fontisan/subset/options.rb +142 -0
  98. data/lib/fontisan/subset/profile.rb +152 -0
  99. data/lib/fontisan/subset/table_subsetter.rb +461 -0
  100. data/lib/fontisan/svg/font_face_generator.rb +278 -0
  101. data/lib/fontisan/svg/font_generator.rb +264 -0
  102. data/lib/fontisan/svg/glyph_generator.rb +168 -0
  103. data/lib/fontisan/svg/view_box_calculator.rb +137 -0
  104. data/lib/fontisan/tables/cff/cff_glyph.rb +176 -0
  105. data/lib/fontisan/tables/cff/charset.rb +282 -0
  106. data/lib/fontisan/tables/cff/charstring.rb +905 -0
  107. data/lib/fontisan/tables/cff/charstring_builder.rb +322 -0
  108. data/lib/fontisan/tables/cff/charstrings_index.rb +162 -0
  109. data/lib/fontisan/tables/cff/dict.rb +351 -0
  110. data/lib/fontisan/tables/cff/dict_builder.rb +242 -0
  111. data/lib/fontisan/tables/cff/encoding.rb +274 -0
  112. data/lib/fontisan/tables/cff/header.rb +102 -0
  113. data/lib/fontisan/tables/cff/index.rb +237 -0
  114. data/lib/fontisan/tables/cff/index_builder.rb +170 -0
  115. data/lib/fontisan/tables/cff/private_dict.rb +284 -0
  116. data/lib/fontisan/tables/cff/top_dict.rb +236 -0
  117. data/lib/fontisan/tables/cff.rb +487 -0
  118. data/lib/fontisan/tables/cff2/blend_operator.rb +240 -0
  119. data/lib/fontisan/tables/cff2/charstring_parser.rb +591 -0
  120. data/lib/fontisan/tables/cff2/operand_stack.rb +232 -0
  121. data/lib/fontisan/tables/cff2.rb +341 -0
  122. data/lib/fontisan/tables/cvar.rb +242 -0
  123. data/lib/fontisan/tables/fvar.rb +2 -2
  124. data/lib/fontisan/tables/glyf/compound_glyph.rb +483 -0
  125. data/lib/fontisan/tables/glyf/compound_glyph_resolver.rb +136 -0
  126. data/lib/fontisan/tables/glyf/curve_converter.rb +343 -0
  127. data/lib/fontisan/tables/glyf/glyph_builder.rb +450 -0
  128. data/lib/fontisan/tables/glyf/simple_glyph.rb +382 -0
  129. data/lib/fontisan/tables/glyf.rb +235 -0
  130. data/lib/fontisan/tables/gvar.rb +270 -0
  131. data/lib/fontisan/tables/hhea.rb +124 -0
  132. data/lib/fontisan/tables/hmtx.rb +287 -0
  133. data/lib/fontisan/tables/hvar.rb +191 -0
  134. data/lib/fontisan/tables/loca.rb +322 -0
  135. data/lib/fontisan/tables/maxp.rb +192 -0
  136. data/lib/fontisan/tables/mvar.rb +185 -0
  137. data/lib/fontisan/tables/name.rb +99 -30
  138. data/lib/fontisan/tables/variation_common.rb +346 -0
  139. data/lib/fontisan/tables/vvar.rb +234 -0
  140. data/lib/fontisan/true_type_collection.rb +156 -2
  141. data/lib/fontisan/true_type_font.rb +297 -11
  142. data/lib/fontisan/utilities/brotli_wrapper.rb +159 -0
  143. data/lib/fontisan/utilities/checksum_calculator.rb +18 -0
  144. data/lib/fontisan/utils/thread_pool.rb +134 -0
  145. data/lib/fontisan/validation/checksum_validator.rb +170 -0
  146. data/lib/fontisan/validation/consistency_validator.rb +197 -0
  147. data/lib/fontisan/validation/structure_validator.rb +198 -0
  148. data/lib/fontisan/validation/table_validator.rb +158 -0
  149. data/lib/fontisan/validation/validator.rb +152 -0
  150. data/lib/fontisan/variable/axis_normalizer.rb +215 -0
  151. data/lib/fontisan/variable/delta_applicator.rb +313 -0
  152. data/lib/fontisan/variable/glyph_delta_processor.rb +218 -0
  153. data/lib/fontisan/variable/instancer.rb +344 -0
  154. data/lib/fontisan/variable/metric_delta_processor.rb +282 -0
  155. data/lib/fontisan/variable/region_matcher.rb +208 -0
  156. data/lib/fontisan/variable/static_font_builder.rb +213 -0
  157. data/lib/fontisan/variable/table_updater.rb +219 -0
  158. data/lib/fontisan/variation/blend_applier.rb +199 -0
  159. data/lib/fontisan/variation/cache.rb +298 -0
  160. data/lib/fontisan/variation/cache_key_builder.rb +162 -0
  161. data/lib/fontisan/variation/converter.rb +268 -0
  162. data/lib/fontisan/variation/data_extractor.rb +86 -0
  163. data/lib/fontisan/variation/delta_applier.rb +266 -0
  164. data/lib/fontisan/variation/delta_parser.rb +228 -0
  165. data/lib/fontisan/variation/inspector.rb +275 -0
  166. data/lib/fontisan/variation/instance_generator.rb +273 -0
  167. data/lib/fontisan/variation/interpolator.rb +231 -0
  168. data/lib/fontisan/variation/metrics_adjuster.rb +318 -0
  169. data/lib/fontisan/variation/optimizer.rb +418 -0
  170. data/lib/fontisan/variation/parallel_generator.rb +150 -0
  171. data/lib/fontisan/variation/region_matcher.rb +221 -0
  172. data/lib/fontisan/variation/subsetter.rb +463 -0
  173. data/lib/fontisan/variation/table_accessor.rb +105 -0
  174. data/lib/fontisan/variation/validator.rb +345 -0
  175. data/lib/fontisan/variation/variation_context.rb +211 -0
  176. data/lib/fontisan/version.rb +1 -1
  177. data/lib/fontisan/woff2/directory.rb +257 -0
  178. data/lib/fontisan/woff2/header.rb +101 -0
  179. data/lib/fontisan/woff2/table_transformer.rb +163 -0
  180. data/lib/fontisan/woff2_font.rb +712 -0
  181. data/lib/fontisan/woff_font.rb +483 -0
  182. data/lib/fontisan.rb +120 -0
  183. data/scripts/compare_stack_aware.rb +187 -0
  184. data/scripts/measure_optimization.rb +141 -0
  185. metadata +205 -4
@@ -0,0 +1,308 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+ require_relative "stack_tracker"
5
+
6
+ module Fontisan
7
+ module Optimizers
8
+ # Analyzes CharString patterns across glyphs to identify repeated sequences
9
+ # suitable for subroutinization. Implements suffix tree-based pattern matching
10
+ # for efficient detection of repeated byte sequences.
11
+ #
12
+ # Can optionally use stack-aware detection to ensure patterns are stack-neutral,
13
+ # making them safe for subroutinization without causing stack underflow/overflow.
14
+ #
15
+ # @example Basic usage
16
+ # analyzer = PatternAnalyzer.new(min_length: 10)
17
+ # charstrings = { 0 => "\x01\x02...", 1 => "\x01\x02..." }
18
+ # patterns = analyzer.analyze(charstrings)
19
+ #
20
+ # @example Stack-aware analysis
21
+ # analyzer = PatternAnalyzer.new(min_length: 10, stack_aware: true)
22
+ # patterns = analyzer.analyze(charstrings)
23
+ #
24
+ # @see docs/SUBROUTINE_ARCHITECTURE.md
25
+ class PatternAnalyzer
26
+ # Pattern data structure representing a repeated CharString sequence
27
+ Pattern = Struct.new(
28
+ :bytes, # String: pattern byte sequence
29
+ :length, # Integer: byte length
30
+ :glyphs, # Array<Integer>: glyph IDs containing pattern
31
+ :frequency, # Integer: number of occurrences
32
+ :savings, # Integer: total byte savings
33
+ :positions, # Hash<Integer, Array<Integer>>: glyph_id => [positions]
34
+ :stack_neutral, # Boolean: whether pattern is stack-neutral
35
+ ) do
36
+ # Calculate overhead for calling this pattern as a subroutine
37
+ # @return [Integer] byte overhead (callsubr + number + return)
38
+ def call_overhead
39
+ 1 + number_size(frequency) + 1 # callsubr + number + return
40
+ end
41
+
42
+ # Calculate CFF integer encoding size
43
+ # @param n [Integer] number to encode
44
+ # @return [Integer] byte size of encoded number
45
+ def number_size(num)
46
+ return 1 if num >= -107 && num <= 107
47
+ return 2 if num >= -1131 && num <= 1131
48
+ return 3 if num >= -32768 && num <= 32767
49
+
50
+ 5
51
+ end
52
+ end
53
+
54
+ # Initialize pattern analyzer
55
+ # @param min_length [Integer] minimum pattern length in bytes
56
+ # @param stack_aware [Boolean] whether to enforce stack-neutral patterns
57
+ def initialize(min_length: 10, stack_aware: false)
58
+ @min_length = min_length
59
+ @stack_aware = stack_aware
60
+ @patterns = {}
61
+ @stack_trackers = {} # Cache StackTracker instances per glyph
62
+ end
63
+
64
+ # Analyze CharStrings to find repeated patterns
65
+ #
66
+ # @param charstrings [Hash<Integer, String>] glyph_id => charstring_bytes
67
+ # @return [Array<Pattern>] patterns sorted by savings (descending)
68
+ def analyze(charstrings)
69
+ raise ArgumentError, "No CharStrings provided" if charstrings.empty?
70
+
71
+ # Build stack trackers if stack-aware mode enabled
72
+ build_stack_trackers(charstrings) if @stack_aware
73
+
74
+ # Extract all byte sequences and build pattern candidates
75
+ extract_patterns(charstrings)
76
+
77
+ # Calculate savings for each pattern
78
+ calculate_savings
79
+
80
+ # Filter patterns by minimum length and positive savings
81
+ filter_patterns
82
+
83
+ # Sort by savings (descending) and return
84
+ @patterns.values.sort_by { |p| -p.savings }
85
+ end
86
+
87
+ private
88
+
89
+ # Find operator boundaries in CharString
90
+ # Returns positions where operators end, which are valid pattern boundaries
91
+ # @param charstring [String] CharString bytes
92
+ # @return [Array<Integer>] byte positions of boundaries
93
+ def find_operator_boundaries(charstring)
94
+ io = StringIO.new(charstring)
95
+ boundaries = [0] # Start is always a boundary
96
+
97
+ until io.eof?
98
+ byte = io.getbyte
99
+
100
+ if byte <= 31 && byte != 28
101
+ # Operator byte (28 is a number encoding prefix)
102
+ if byte == 12
103
+ # Two-byte operator
104
+ io.getbyte
105
+ end
106
+ # Mark position after operator as boundary
107
+ boundaries << io.pos
108
+ else
109
+ # Number - skip it
110
+ io.pos -= 1
111
+ skip_number(io)
112
+ end
113
+ end
114
+
115
+ boundaries
116
+ end
117
+
118
+ # Skip over a number without decoding
119
+ # Handles all CFF integer encoding formats
120
+ # @param io [StringIO] input stream
121
+ def skip_number(io)
122
+ byte = io.getbyte
123
+ return if byte.nil?
124
+
125
+ case byte
126
+ when 28
127
+ # 3-byte signed integer
128
+ io.read(2)
129
+ when 32..246
130
+ # Single byte integer - already consumed
131
+ when 247..254
132
+ # 2-byte integer
133
+ io.getbyte
134
+ when 255
135
+ # 5-byte integer
136
+ io.read(4)
137
+ end
138
+ end
139
+
140
+ # Build stack trackers for all CharStrings (if stack-aware)
141
+ def build_stack_trackers(charstrings)
142
+ charstrings.each do |glyph_id, charstring|
143
+ tracker = StackTracker.new(charstring)
144
+ tracker.track
145
+ @stack_trackers[glyph_id] = tracker
146
+ end
147
+ end
148
+
149
+ # Extract patterns from all CharStrings
150
+ # Uses operator boundaries to ensure patterns are syntactically valid
151
+ # OPTIMIZED: Samples glyphs and uses discrete lengths to avoid O(n³) complexity
152
+ def extract_patterns(charstrings)
153
+ pattern_occurrences = Hash.new { |h, k| h[k] = [] }
154
+
155
+ # OPTIMIZATION 1: Sample glyphs if there are too many
156
+ # For large fonts (1000+ glyphs), sample 30% of glyphs
157
+ sample_size = if charstrings.length > 1000
158
+ (charstrings.length * 0.3).to_i
159
+ else
160
+ charstrings.length
161
+ end
162
+
163
+ sampled_glyphs = charstrings.keys.sample(sample_size)
164
+
165
+ # NEW: Pre-compute boundaries for sampled glyphs
166
+ # Check if boundaries are useful (more than just start position)
167
+ glyph_boundaries = {}
168
+ use_boundaries = false
169
+ sampled_glyphs.each do |glyph_id|
170
+ boundaries = find_operator_boundaries(charstrings[glyph_id])
171
+ glyph_boundaries[glyph_id] = boundaries
172
+ # If any glyph has meaningful boundaries (more than just [0]), use boundary mode
173
+ use_boundaries = true if boundaries.length > 2
174
+ end
175
+
176
+ # OPTIMIZATION 2: Use discrete pattern lengths instead of continuous range
177
+ # This reduces iterations from 40 to ~5
178
+ pattern_lengths = [@min_length, @min_length + 5, @min_length + 10,
179
+ @min_length + 15, @min_length + 20]
180
+
181
+ # For each sampled glyph, extract patterns
182
+ sampled_glyphs.each do |glyph_id|
183
+ charstring = charstrings[glyph_id]
184
+ next if charstring.length < @min_length
185
+
186
+ if use_boundaries
187
+ # Use boundary-based extraction for valid CFF CharStrings
188
+ boundaries = glyph_boundaries[glyph_id]
189
+
190
+ # Try each boundary as a potential start position
191
+ boundaries.each do |start_pos|
192
+ # Find boundaries that could be end positions
193
+ pattern_lengths.each do |target_length|
194
+ # Find next boundary that gives us approximately target_length
195
+ end_pos = boundaries.find { |b| b >= start_pos + target_length }
196
+ next unless end_pos
197
+
198
+ actual_length = end_pos - start_pos
199
+ next if actual_length < @min_length
200
+ next if actual_length > @min_length + 25 # Max pattern size
201
+
202
+ # Check if pattern is stack-neutral (if stack-aware mode)
203
+ if @stack_aware
204
+ tracker = @stack_trackers[glyph_id]
205
+ next unless tracker
206
+ next unless tracker.stack_neutral?(start_pos, end_pos)
207
+ end
208
+
209
+ pattern_bytes = charstring[start_pos, actual_length]
210
+
211
+ # Record occurrence: pattern => [[glyph_id, position], ...]
212
+ pattern_occurrences[pattern_bytes] << [glyph_id, start_pos]
213
+ end
214
+ end
215
+ else
216
+ # Fall back to sliding window for non-CFF data (e.g., test data)
217
+ pattern_lengths.each do |length|
218
+ break if length > charstring.length
219
+
220
+ (0..charstring.length - length).each do |start_pos|
221
+ # Check if pattern is stack-neutral (if stack-aware mode)
222
+ if @stack_aware
223
+ tracker = @stack_trackers[glyph_id]
224
+ next unless tracker
225
+ next unless tracker.stack_neutral?(start_pos, start_pos + length)
226
+ end
227
+
228
+ pattern_bytes = charstring[start_pos, length]
229
+
230
+ # Record occurrence: pattern => [[glyph_id, position], ...]
231
+ pattern_occurrences[pattern_bytes] << [glyph_id, start_pos]
232
+ end
233
+ end
234
+ end
235
+ end
236
+
237
+ # Convert occurrences to Pattern objects
238
+ pattern_occurrences.each do |bytes, occurrences|
239
+ # Only keep patterns that appear in at least 2 glyphs or 2+ times
240
+ next if occurrences.length < 2
241
+
242
+ # Group by glyph_id
243
+ by_glyph = occurrences.group_by(&:first)
244
+
245
+ # Only keep if appears in multiple glyphs
246
+ next if by_glyph.keys.length < 2
247
+
248
+ # Build positions hash
249
+ positions = {}
250
+ by_glyph.each do |glyph_id, glyph_occurrences|
251
+ positions[glyph_id] = glyph_occurrences.map(&:last)
252
+ end
253
+
254
+ @patterns[bytes] = Pattern.new(
255
+ bytes,
256
+ bytes.length,
257
+ by_glyph.keys,
258
+ occurrences.length,
259
+ 0, # Will be calculated later
260
+ positions,
261
+ @stack_aware, # Mark if validated as stack-neutral
262
+ )
263
+ end
264
+ end
265
+
266
+ # Calculate byte savings for each pattern
267
+ def calculate_savings
268
+ @patterns.each_value do |pattern|
269
+ # Savings = (pattern_length - overhead) * (frequency - 1)
270
+ # -1 because we keep one occurrence in a subroutine
271
+ overhead = pattern.call_overhead
272
+ savings_per_use = pattern.length - overhead
273
+
274
+ # Total savings across all uses (minus the subroutine definition)
275
+ pattern.savings = if savings_per_use.positive?
276
+ savings_per_use * (pattern.frequency - 1)
277
+ else
278
+ 0
279
+ end
280
+ end
281
+ end
282
+
283
+ # Filter patterns by criteria
284
+ def filter_patterns
285
+ @patterns.select! do |_bytes, pattern|
286
+ # Must meet minimum length
287
+ next false if pattern.length < @min_length
288
+
289
+ # Must have positive savings
290
+ next false if pattern.savings <= 0
291
+
292
+ # Must appear in at least 2 glyphs
293
+ next false if pattern.glyphs.length < 2
294
+
295
+ true
296
+ end
297
+ end
298
+
299
+ # Find maximal patterns (not contained in larger patterns)
300
+ # TODO: Implement in optimization phase
301
+ def find_maximal_patterns
302
+ # For now, keep all patterns
303
+ # Future: remove patterns that are substrings of larger patterns
304
+ # with same or higher frequency
305
+ end
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,246 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+
5
+ module Fontisan
6
+ module Optimizers
7
+ # Tracks operand stack depth during CharString execution without full
8
+ # interpretation. Used to identify stack-neutral patterns suitable for
9
+ # subroutinization.
10
+ #
11
+ # A stack-neutral pattern is one where the stack depth is the same before
12
+ # and after the pattern executes. This ensures that replacing the pattern
13
+ # with a subroutine call won't cause stack underflow/overflow.
14
+ #
15
+ # @example Basic usage
16
+ # tracker = StackTracker.new(charstring_bytes)
17
+ # stack_map = tracker.track
18
+ # start_depth = stack_map[start_pos]
19
+ # end_depth = stack_map[end_pos]
20
+ # is_neutral = (start_depth == end_depth)
21
+ #
22
+ # @see docs/SUBROUTINE_ARCHITECTURE.md
23
+ class StackTracker
24
+ # Type 2 CharString operator stack effects
25
+ # Maps operator => [operands_consumed, operands_produced]
26
+ OPERATOR_STACK_EFFECTS = {
27
+ # Path construction operators
28
+ hstem: [2, 0], # y dy hstem
29
+ vstem: [2, 0], # x dx vstem
30
+ vmoveto: [1, 0], # dy vmoveto
31
+ rlineto: [-1, 0], # {dxa dya}+ (variable, pairs)
32
+ hlineto: [-1, 0], # dx1 {dya dxb}* (variable, alternating)
33
+ vlineto: [-1, 0], # dy1 {dxb dya}* (variable, alternating)
34
+ rrcurveto: [-1, 0], # {dxa dya dxb dyb dxc dyc}+ (variable, 6-tuples)
35
+ callsubr: [1, 0], # subr# callsubr (note: subr may affect stack)
36
+ return: [0, 0], # return
37
+ endchar: [0, 0], # endchar
38
+ hstemhm: [2, 0], # y dy hstemhm
39
+ hintmask: [0, 0], # hintmask
40
+ cntrmask: [0, 0], # cntrmask
41
+ rmoveto: [2, 0], # dx dy rmoveto
42
+ hmoveto: [1, 0], # dx hmoveto
43
+ vstemhm: [2, 0], # x dx vstemhm
44
+ rcurveline: [-1, 0], # {dxa dya dxb dyb dxc dyc}+ dxd dyd (variable)
45
+ rlinecurve: [-1, 0], # {dxa dya}+ dxb dyb dxc dyc dxd dyd (variable)
46
+ vvcurveto: [-1, 0], # dx1? {dya dxb dyb dyc}+ (variable)
47
+ hhcurveto: [-1, 0], # dy1? {dxa dxb dyb dxc}+ (variable)
48
+ shortint: [0, 1], # (16-bit number)
49
+ callgsubr: [1, 0], # subr# callgsubr
50
+ vhcurveto: [-1, 0], # dy1 dx2 dy2 dx3 {dxa dxb dyb dyc dyd dxe dye dxf}* (variable)
51
+ hvcurveto: [-1, 0], # dx1 dx2 dy2 dy3 {dya dxb dyb dxc dxd dxe dye dyf}* (variable)
52
+
53
+ # Arithmetic operators (12 prefix)
54
+ and: [2, 1], # num1 num2 and
55
+ or: [2, 1], # num1 num2 or
56
+ not: [1, 1], # num1 not
57
+ abs: [1, 1], # num abs
58
+ add: [2, 1], # num1 num2 add
59
+ sub: [2, 1], # num1 num2 sub
60
+ div: [2, 1], # num1 num2 div
61
+ neg: [1, 1], # num neg
62
+ eq: [2, 1], # num1 num2 eq
63
+ drop: [1, 0], # any drop
64
+ put: [2, 0], # val i put
65
+ get: [1, 1], # i get
66
+ ifelse: [4, 1], # v1 v2 s1 s2 ifelse
67
+ random: [0, 1], # random
68
+ mul: [2, 1], # num1 num2 mul
69
+ sqrt: [1, 1], # num sqrt
70
+ dup: [1, 2], # any dup
71
+ exch: [2, 2], # any1 any2 exch
72
+ index: [1, 1], # i index (actually [i+1, i+1])
73
+ roll: [2, 0], # N J roll (rotates top N elements)
74
+
75
+ # Flex operators (12 prefix)
76
+ hflex: [7, 0], # dx1 dx2 dy2 dx3 dx4 dx5 dx6 hflex
77
+ flex: [13, 0], # dx1 dy1 dx2 dy2 dx3 dy3 dx4 dy4 dx5 dy5 dx6 dy6 fd flex
78
+ hflex1: [9, 0], # dx1 dy1 dx2 dy2 dx3 dx4 dx5 dy5 dx6 hflex1
79
+ flex1: [11, 0], # dx1 dy1 dx2 dy2 dx3 dy3 dx4 dy4 dx5 dy5 d6 flex1
80
+ }.freeze
81
+
82
+ # Type 2 CharString operator codes
83
+ OPERATORS = {
84
+ 1 => :hstem,
85
+ 3 => :vstem,
86
+ 4 => :vmoveto,
87
+ 5 => :rlineto,
88
+ 6 => :hlineto,
89
+ 7 => :vlineto,
90
+ 8 => :rrcurveto,
91
+ 10 => :callsubr,
92
+ 11 => :return,
93
+ 14 => :endchar,
94
+ 18 => :hstemhm,
95
+ 19 => :hintmask,
96
+ 20 => :cntrmask,
97
+ 21 => :rmoveto,
98
+ 22 => :hmoveto,
99
+ 23 => :vstemhm,
100
+ 24 => :rcurveline,
101
+ 25 => :rlinecurve,
102
+ 26 => :vvcurveto,
103
+ 27 => :hhcurveto,
104
+ 28 => :shortint,
105
+ 29 => :callgsubr,
106
+ 30 => :vhcurveto,
107
+ 31 => :hvcurveto,
108
+ [12, 3] => :and,
109
+ [12, 4] => :or,
110
+ [12, 5] => :not,
111
+ [12, 9] => :abs,
112
+ [12, 10] => :add,
113
+ [12, 11] => :sub,
114
+ [12, 12] => :div,
115
+ [12, 14] => :neg,
116
+ [12, 15] => :eq,
117
+ [12, 18] => :drop,
118
+ [12, 20] => :put,
119
+ [12, 21] => :get,
120
+ [12, 22] => :ifelse,
121
+ [12, 23] => :random,
122
+ [12, 24] => :mul,
123
+ [12, 26] => :sqrt,
124
+ [12, 27] => :dup,
125
+ [12, 28] => :exch,
126
+ [12, 29] => :index,
127
+ [12, 30] => :roll,
128
+ [12, 34] => :hflex,
129
+ [12, 35] => :flex,
130
+ [12, 36] => :hflex1,
131
+ [12, 37] => :flex1,
132
+ }.freeze
133
+
134
+ # Initialize stack tracker
135
+ # @param charstring [String] CharString bytes to track
136
+ def initialize(charstring)
137
+ @charstring = charstring
138
+ @stack_depth_map = {}
139
+ end
140
+
141
+ # Track stack depth at each byte position
142
+ # @return [Hash<Integer, Integer>] position => stack_depth
143
+ def track
144
+ io = StringIO.new(@charstring)
145
+ depth = 0
146
+
147
+ # Record initial depth
148
+ @stack_depth_map[0] = depth
149
+
150
+ while !io.eof?
151
+ byte = io.getbyte
152
+
153
+ if byte <= 31 && byte != 28
154
+ # Operator
155
+ operator = read_operator(io, byte)
156
+ depth = apply_operator_effect(operator, depth)
157
+ else
158
+ # Number - pushes one value
159
+ io.pos -= 1
160
+ skip_number(io)
161
+ depth += 1
162
+ end
163
+
164
+ # Record depth after processing this element
165
+ @stack_depth_map[io.pos] = depth
166
+ end
167
+
168
+ @stack_depth_map
169
+ end
170
+
171
+ # Check if a pattern is stack-neutral
172
+ # @param start_pos [Integer] pattern start position
173
+ # @param end_pos [Integer] pattern end position (exclusive)
174
+ # @return [Boolean] true if stack depth is same at start and end
175
+ def stack_neutral?(start_pos, end_pos)
176
+ return false unless @stack_depth_map.key?(start_pos)
177
+ return false unless @stack_depth_map.key?(end_pos)
178
+
179
+ @stack_depth_map[start_pos] == @stack_depth_map[end_pos]
180
+ end
181
+
182
+ # Get stack depth at a position
183
+ # @param position [Integer] byte position
184
+ # @return [Integer, nil] stack depth or nil if not tracked
185
+ def depth_at(position)
186
+ @stack_depth_map[position]
187
+ end
188
+
189
+ private
190
+
191
+ # Read operator from CharString
192
+ def read_operator(io, first_byte)
193
+ if first_byte == 12
194
+ second_byte = io.getbyte
195
+ return :unknown if second_byte.nil?
196
+
197
+ operator_key = [first_byte, second_byte]
198
+ OPERATORS[operator_key] || :unknown
199
+ else
200
+ OPERATORS[first_byte] || :unknown
201
+ end
202
+ end
203
+
204
+ # Skip over a number without reading its value
205
+ def skip_number(io)
206
+ byte = io.getbyte
207
+ return if byte.nil?
208
+
209
+ case byte
210
+ when 28
211
+ # 3-byte signed integer
212
+ io.read(2)
213
+ when 32..246
214
+ # Single byte integer
215
+ when 247..254
216
+ # 2-byte integer
217
+ io.getbyte
218
+ when 255
219
+ # 5-byte integer
220
+ io.read(4)
221
+ end
222
+ end
223
+
224
+ # Apply operator's stack effect
225
+ def apply_operator_effect(operator, current_depth)
226
+ effect = OPERATOR_STACK_EFFECTS[operator]
227
+ return current_depth if effect.nil? # Unknown operator
228
+
229
+ consumed, produced = effect
230
+
231
+ if consumed == -1
232
+ # Variable consumption - these operators consume all available operands
233
+ # For path operators like rlineto, rrcurveto, etc., they consume pairs/tuples
234
+ # The stack depth after execution is just what they produce (usually 0)
235
+ new_depth = produced
236
+ else
237
+ new_depth = current_depth - consumed + produced
238
+ # Ensure depth doesn't go negative
239
+ new_depth = [new_depth, 0].max
240
+ end
241
+
242
+ new_depth
243
+ end
244
+ end
245
+ end
246
+ end
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Optimizers
5
+ # Builds CFF subroutines from analyzed patterns. Converts pattern byte
6
+ # sequences into valid CFF CharStrings with return operators, calculates
7
+ # bias values, and generates callsubr operators for pattern replacement.
8
+ #
9
+ # @example Basic usage
10
+ # patterns = analyzer.analyze(charstrings)
11
+ # builder = SubroutineBuilder.new(patterns, type: :local)
12
+ # subroutines = builder.build
13
+ # bias = builder.bias
14
+ # call = builder.create_call(0) # Call first subroutine
15
+ #
16
+ # @see docs/SUBROUTINE_ARCHITECTURE.md
17
+ class SubroutineBuilder
18
+ # CFF return operator
19
+ RETURN_OPERATOR = "\x0b"
20
+
21
+ # CFF callsubr operator
22
+ CALLSUBR_OPERATOR = "\x0a"
23
+
24
+ # Initialize subroutine builder
25
+ # @param patterns [Array<Pattern>] patterns to convert to subroutines
26
+ # @param type [Symbol] subroutine type (:local or :global)
27
+ def initialize(patterns, type: :local)
28
+ @patterns = patterns
29
+ @type = type
30
+ @subroutines = []
31
+ end
32
+
33
+ # Build subroutines from patterns
34
+ # Each subroutine consists of the pattern bytes followed by a return
35
+ # operator. The order matches the pattern array order.
36
+ #
37
+ # @return [Array<String>] subroutine CharStrings
38
+ def build
39
+ @subroutines = @patterns.map do |pattern|
40
+ build_subroutine_charstring(pattern)
41
+ end
42
+ @subroutines
43
+ end
44
+
45
+ # Calculate CFF bias for current subroutine count
46
+ # Bias values defined by CFF specification:
47
+ # - 107 for count < 1240
48
+ # - 1131 for count < 33900
49
+ # - 32768 for count >= 33900
50
+ #
51
+ # @return [Integer] bias value
52
+ def bias
53
+ calculate_bias(@subroutines.length)
54
+ end
55
+
56
+ # Create callsubr operator for a subroutine
57
+ # Encodes the biased subroutine ID as a CFF integer followed by the
58
+ # callsubr operator.
59
+ #
60
+ # @param subroutine_id [Integer] zero-based subroutine index
61
+ # @return [String] encoded callsubr operator
62
+ def create_call(subroutine_id)
63
+ biased_id = subroutine_id - bias
64
+ encode_integer(biased_id) + CALLSUBR_OPERATOR
65
+ end
66
+
67
+ private
68
+
69
+ # Build a subroutine CharString from a pattern
70
+ # @param pattern [Pattern] pattern to convert
71
+ # @return [String] subroutine CharString (pattern + return)
72
+ def build_subroutine_charstring(pattern)
73
+ pattern.bytes + RETURN_OPERATOR
74
+ end
75
+
76
+ # Calculate bias based on subroutine count
77
+ # @param count [Integer] number of subroutines
78
+ # @return [Integer] bias value
79
+ def calculate_bias(count)
80
+ return 107 if count < 1240
81
+ return 1131 if count < 33_900
82
+
83
+ 32_768
84
+ end
85
+
86
+ # Encode an integer using CFF integer encoding
87
+ # CFF spec defines multiple encoding formats based on value range:
88
+ # - -107..107: single byte (32 + n)
89
+ # - 108..1131: two bytes (247 prefix)
90
+ # - -1131..-108: two bytes (251 prefix)
91
+ # - -32768..32767: three bytes (29 prefix)
92
+ # - Otherwise: five bytes (255 prefix)
93
+ #
94
+ # @param num [Integer] integer to encode
95
+ # @return [String] encoded bytes
96
+ def encode_integer(num)
97
+ # Range 1: -107 to 107 (single byte)
98
+ if num >= -107 && num <= 107
99
+ return [32 + num].pack("c")
100
+ end
101
+
102
+ # Range 2: 108 to 1131 (two bytes)
103
+ if num >= 108 && num <= 1131
104
+ b0 = 247 + ((num - 108) >> 8)
105
+ b1 = (num - 108) & 0xff
106
+ return [b0, b1].pack("c*")
107
+ end
108
+
109
+ # Range 3: -1131 to -108 (two bytes)
110
+ if num >= -1131 && num <= -108
111
+ b0 = 251 - ((num + 108) >> 8)
112
+ b1 = -(num + 108) & 0xff
113
+ return [b0, b1].pack("c*")
114
+ end
115
+
116
+ # Range 4: -32768 to 32767 (three bytes)
117
+ if num >= -32_768 && num <= 32_767
118
+ b0 = 29
119
+ b1 = (num >> 8) & 0xff
120
+ b2 = num & 0xff
121
+ return [b0, b1, b2].pack("c*")
122
+ end
123
+
124
+ # Range 5: Larger numbers (five bytes)
125
+ b0 = 255
126
+ b1 = (num >> 24) & 0xff
127
+ b2 = (num >> 16) & 0xff
128
+ b3 = (num >> 8) & 0xff
129
+ b4 = num & 0xff
130
+ [b0, b1, b2, b3, b4].pack("c*")
131
+ end
132
+ end
133
+ end
134
+ end