fontisan 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +529 -65
  3. data/Gemfile +1 -0
  4. data/LICENSE +5 -1
  5. data/README.adoc +1301 -275
  6. data/Rakefile +27 -2
  7. data/benchmark/variation_quick_bench.rb +47 -0
  8. data/docs/EXTRACT_TTC_MIGRATION.md +549 -0
  9. data/fontisan.gemspec +4 -1
  10. data/lib/fontisan/binary/base_record.rb +22 -1
  11. data/lib/fontisan/cli.rb +309 -0
  12. data/lib/fontisan/collection/builder.rb +260 -0
  13. data/lib/fontisan/collection/offset_calculator.rb +227 -0
  14. data/lib/fontisan/collection/table_analyzer.rb +204 -0
  15. data/lib/fontisan/collection/table_deduplicator.rb +241 -0
  16. data/lib/fontisan/collection/writer.rb +306 -0
  17. data/lib/fontisan/commands/base_command.rb +8 -1
  18. data/lib/fontisan/commands/convert_command.rb +291 -0
  19. data/lib/fontisan/commands/export_command.rb +161 -0
  20. data/lib/fontisan/commands/info_command.rb +40 -6
  21. data/lib/fontisan/commands/instance_command.rb +295 -0
  22. data/lib/fontisan/commands/ls_command.rb +113 -0
  23. data/lib/fontisan/commands/pack_command.rb +241 -0
  24. data/lib/fontisan/commands/subset_command.rb +245 -0
  25. data/lib/fontisan/commands/unpack_command.rb +338 -0
  26. data/lib/fontisan/commands/validate_command.rb +178 -0
  27. data/lib/fontisan/commands/variable_command.rb +30 -1
  28. data/lib/fontisan/config/collection_settings.yml +56 -0
  29. data/lib/fontisan/config/conversion_matrix.yml +212 -0
  30. data/lib/fontisan/config/export_settings.yml +66 -0
  31. data/lib/fontisan/config/subset_profiles.yml +100 -0
  32. data/lib/fontisan/config/svg_settings.yml +60 -0
  33. data/lib/fontisan/config/validation_rules.yml +149 -0
  34. data/lib/fontisan/config/variable_settings.yml +99 -0
  35. data/lib/fontisan/config/woff2_settings.yml +77 -0
  36. data/lib/fontisan/constants.rb +69 -0
  37. data/lib/fontisan/converters/conversion_strategy.rb +96 -0
  38. data/lib/fontisan/converters/format_converter.rb +259 -0
  39. data/lib/fontisan/converters/outline_converter.rb +936 -0
  40. data/lib/fontisan/converters/svg_generator.rb +244 -0
  41. data/lib/fontisan/converters/table_copier.rb +117 -0
  42. data/lib/fontisan/converters/woff2_encoder.rb +416 -0
  43. data/lib/fontisan/converters/woff_writer.rb +391 -0
  44. data/lib/fontisan/error.rb +203 -0
  45. data/lib/fontisan/export/exporter.rb +262 -0
  46. data/lib/fontisan/export/table_serializer.rb +255 -0
  47. data/lib/fontisan/export/transformers/font_to_ttx.rb +172 -0
  48. data/lib/fontisan/export/transformers/head_transformer.rb +96 -0
  49. data/lib/fontisan/export/transformers/hhea_transformer.rb +59 -0
  50. data/lib/fontisan/export/transformers/maxp_transformer.rb +63 -0
  51. data/lib/fontisan/export/transformers/name_transformer.rb +63 -0
  52. data/lib/fontisan/export/transformers/os2_transformer.rb +121 -0
  53. data/lib/fontisan/export/transformers/post_transformer.rb +51 -0
  54. data/lib/fontisan/export/ttx_generator.rb +527 -0
  55. data/lib/fontisan/export/ttx_parser.rb +300 -0
  56. data/lib/fontisan/font_loader.rb +121 -12
  57. data/lib/fontisan/font_writer.rb +301 -0
  58. data/lib/fontisan/formatters/text_formatter.rb +102 -0
  59. data/lib/fontisan/glyph_accessor.rb +503 -0
  60. data/lib/fontisan/hints/hint_converter.rb +177 -0
  61. data/lib/fontisan/hints/postscript_hint_applier.rb +185 -0
  62. data/lib/fontisan/hints/postscript_hint_extractor.rb +254 -0
  63. data/lib/fontisan/hints/truetype_hint_applier.rb +71 -0
  64. data/lib/fontisan/hints/truetype_hint_extractor.rb +162 -0
  65. data/lib/fontisan/loading_modes.rb +113 -0
  66. data/lib/fontisan/metrics_calculator.rb +277 -0
  67. data/lib/fontisan/models/collection_font_summary.rb +52 -0
  68. data/lib/fontisan/models/collection_info.rb +76 -0
  69. data/lib/fontisan/models/collection_list_info.rb +37 -0
  70. data/lib/fontisan/models/font_export.rb +158 -0
  71. data/lib/fontisan/models/font_summary.rb +48 -0
  72. data/lib/fontisan/models/glyph_outline.rb +343 -0
  73. data/lib/fontisan/models/hint.rb +233 -0
  74. data/lib/fontisan/models/outline.rb +664 -0
  75. data/lib/fontisan/models/table_sharing_info.rb +40 -0
  76. data/lib/fontisan/models/ttx/glyph_order.rb +31 -0
  77. data/lib/fontisan/models/ttx/tables/binary_table.rb +67 -0
  78. data/lib/fontisan/models/ttx/tables/head_table.rb +74 -0
  79. data/lib/fontisan/models/ttx/tables/hhea_table.rb +74 -0
  80. data/lib/fontisan/models/ttx/tables/maxp_table.rb +55 -0
  81. data/lib/fontisan/models/ttx/tables/name_table.rb +45 -0
  82. data/lib/fontisan/models/ttx/tables/os2_table.rb +157 -0
  83. data/lib/fontisan/models/ttx/tables/post_table.rb +50 -0
  84. data/lib/fontisan/models/ttx/ttfont.rb +49 -0
  85. data/lib/fontisan/models/validation_report.rb +203 -0
  86. data/lib/fontisan/open_type_collection.rb +156 -2
  87. data/lib/fontisan/open_type_font.rb +296 -10
  88. data/lib/fontisan/optimizers/charstring_rewriter.rb +161 -0
  89. data/lib/fontisan/optimizers/pattern_analyzer.rb +308 -0
  90. data/lib/fontisan/optimizers/stack_tracker.rb +246 -0
  91. data/lib/fontisan/optimizers/subroutine_builder.rb +134 -0
  92. data/lib/fontisan/optimizers/subroutine_generator.rb +207 -0
  93. data/lib/fontisan/optimizers/subroutine_optimizer.rb +107 -0
  94. data/lib/fontisan/outline_extractor.rb +423 -0
  95. data/lib/fontisan/subset/builder.rb +268 -0
  96. data/lib/fontisan/subset/glyph_mapping.rb +215 -0
  97. data/lib/fontisan/subset/options.rb +142 -0
  98. data/lib/fontisan/subset/profile.rb +152 -0
  99. data/lib/fontisan/subset/table_subsetter.rb +461 -0
  100. data/lib/fontisan/svg/font_face_generator.rb +278 -0
  101. data/lib/fontisan/svg/font_generator.rb +264 -0
  102. data/lib/fontisan/svg/glyph_generator.rb +168 -0
  103. data/lib/fontisan/svg/view_box_calculator.rb +137 -0
  104. data/lib/fontisan/tables/cff/cff_glyph.rb +176 -0
  105. data/lib/fontisan/tables/cff/charset.rb +282 -0
  106. data/lib/fontisan/tables/cff/charstring.rb +905 -0
  107. data/lib/fontisan/tables/cff/charstring_builder.rb +322 -0
  108. data/lib/fontisan/tables/cff/charstrings_index.rb +162 -0
  109. data/lib/fontisan/tables/cff/dict.rb +351 -0
  110. data/lib/fontisan/tables/cff/dict_builder.rb +242 -0
  111. data/lib/fontisan/tables/cff/encoding.rb +274 -0
  112. data/lib/fontisan/tables/cff/header.rb +102 -0
  113. data/lib/fontisan/tables/cff/index.rb +237 -0
  114. data/lib/fontisan/tables/cff/index_builder.rb +170 -0
  115. data/lib/fontisan/tables/cff/private_dict.rb +284 -0
  116. data/lib/fontisan/tables/cff/top_dict.rb +236 -0
  117. data/lib/fontisan/tables/cff.rb +487 -0
  118. data/lib/fontisan/tables/cff2/blend_operator.rb +240 -0
  119. data/lib/fontisan/tables/cff2/charstring_parser.rb +591 -0
  120. data/lib/fontisan/tables/cff2/operand_stack.rb +232 -0
  121. data/lib/fontisan/tables/cff2.rb +341 -0
  122. data/lib/fontisan/tables/cvar.rb +242 -0
  123. data/lib/fontisan/tables/fvar.rb +2 -2
  124. data/lib/fontisan/tables/glyf/compound_glyph.rb +483 -0
  125. data/lib/fontisan/tables/glyf/compound_glyph_resolver.rb +136 -0
  126. data/lib/fontisan/tables/glyf/curve_converter.rb +343 -0
  127. data/lib/fontisan/tables/glyf/glyph_builder.rb +450 -0
  128. data/lib/fontisan/tables/glyf/simple_glyph.rb +382 -0
  129. data/lib/fontisan/tables/glyf.rb +235 -0
  130. data/lib/fontisan/tables/gvar.rb +270 -0
  131. data/lib/fontisan/tables/hhea.rb +124 -0
  132. data/lib/fontisan/tables/hmtx.rb +287 -0
  133. data/lib/fontisan/tables/hvar.rb +191 -0
  134. data/lib/fontisan/tables/loca.rb +322 -0
  135. data/lib/fontisan/tables/maxp.rb +192 -0
  136. data/lib/fontisan/tables/mvar.rb +185 -0
  137. data/lib/fontisan/tables/name.rb +99 -30
  138. data/lib/fontisan/tables/variation_common.rb +346 -0
  139. data/lib/fontisan/tables/vvar.rb +234 -0
  140. data/lib/fontisan/true_type_collection.rb +156 -2
  141. data/lib/fontisan/true_type_font.rb +297 -11
  142. data/lib/fontisan/utilities/brotli_wrapper.rb +159 -0
  143. data/lib/fontisan/utilities/checksum_calculator.rb +18 -0
  144. data/lib/fontisan/utils/thread_pool.rb +134 -0
  145. data/lib/fontisan/validation/checksum_validator.rb +170 -0
  146. data/lib/fontisan/validation/consistency_validator.rb +197 -0
  147. data/lib/fontisan/validation/structure_validator.rb +198 -0
  148. data/lib/fontisan/validation/table_validator.rb +158 -0
  149. data/lib/fontisan/validation/validator.rb +152 -0
  150. data/lib/fontisan/variable/axis_normalizer.rb +215 -0
  151. data/lib/fontisan/variable/delta_applicator.rb +313 -0
  152. data/lib/fontisan/variable/glyph_delta_processor.rb +218 -0
  153. data/lib/fontisan/variable/instancer.rb +344 -0
  154. data/lib/fontisan/variable/metric_delta_processor.rb +282 -0
  155. data/lib/fontisan/variable/region_matcher.rb +208 -0
  156. data/lib/fontisan/variable/static_font_builder.rb +213 -0
  157. data/lib/fontisan/variable/table_updater.rb +219 -0
  158. data/lib/fontisan/variation/blend_applier.rb +199 -0
  159. data/lib/fontisan/variation/cache.rb +298 -0
  160. data/lib/fontisan/variation/cache_key_builder.rb +162 -0
  161. data/lib/fontisan/variation/converter.rb +268 -0
  162. data/lib/fontisan/variation/data_extractor.rb +86 -0
  163. data/lib/fontisan/variation/delta_applier.rb +266 -0
  164. data/lib/fontisan/variation/delta_parser.rb +228 -0
  165. data/lib/fontisan/variation/inspector.rb +275 -0
  166. data/lib/fontisan/variation/instance_generator.rb +273 -0
  167. data/lib/fontisan/variation/interpolator.rb +231 -0
  168. data/lib/fontisan/variation/metrics_adjuster.rb +318 -0
  169. data/lib/fontisan/variation/optimizer.rb +418 -0
  170. data/lib/fontisan/variation/parallel_generator.rb +150 -0
  171. data/lib/fontisan/variation/region_matcher.rb +221 -0
  172. data/lib/fontisan/variation/subsetter.rb +463 -0
  173. data/lib/fontisan/variation/table_accessor.rb +105 -0
  174. data/lib/fontisan/variation/validator.rb +345 -0
  175. data/lib/fontisan/variation/variation_context.rb +211 -0
  176. data/lib/fontisan/version.rb +1 -1
  177. data/lib/fontisan/woff2/directory.rb +257 -0
  178. data/lib/fontisan/woff2/header.rb +101 -0
  179. data/lib/fontisan/woff2/table_transformer.rb +163 -0
  180. data/lib/fontisan/woff2_font.rb +712 -0
  181. data/lib/fontisan/woff_font.rb +483 -0
  182. data/lib/fontisan.rb +120 -0
  183. data/scripts/compare_stack_aware.rb +187 -0
  184. data/scripts/measure_optimization.rb +141 -0
  185. metadata +205 -4
@@ -0,0 +1,227 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Collection
5
+ # OffsetCalculator calculates file offsets for TTC/OTC structure
6
+ #
7
+ # Single responsibility: Calculate all file offsets for the collection structure
8
+ # including TTC header, offset table, font directories, and table data.
9
+ # Handles 4-byte alignment requirements.
10
+ #
11
+ # TTC/OTC Structure:
12
+ # - TTC Header (12 bytes)
13
+ # - Offset Table (4 bytes per font)
14
+ # - Font 0 Table Directory
15
+ # - Font 1 Table Directory
16
+ # - ...
17
+ # - Shared Tables
18
+ # - Unique Tables
19
+ #
20
+ # @example Calculate offsets
21
+ # calculator = OffsetCalculator.new(sharing_map, fonts)
22
+ # offsets = calculator.calculate
23
+ # header_offset = offsets[:header_offset]
24
+ # font_directory_offsets = offsets[:font_directory_offsets]
25
+ class OffsetCalculator
26
+ # Alignment requirement for tables (4 bytes)
27
+ TABLE_ALIGNMENT = 4
28
+
29
+ # TTC header size (12 bytes)
30
+ TTC_HEADER_SIZE = 12
31
+
32
+ # Size of each font offset entry (4 bytes)
33
+ FONT_OFFSET_SIZE = 4
34
+
35
+ # Size of font directory header (12 bytes: sfnt_version, num_tables, searchRange, entrySelector, rangeShift)
36
+ FONT_DIRECTORY_HEADER_SIZE = 12
37
+
38
+ # Size of each table directory entry (16 bytes: tag, checksum, offset, length)
39
+ TABLE_DIRECTORY_ENTRY_SIZE = 16
40
+
41
+ # Initialize calculator
42
+ #
43
+ # @param sharing_map [Hash] Sharing map from TableDeduplicator
44
+ # @param fonts [Array<TrueTypeFont, OpenTypeFont>] Source fonts
45
+ # @raise [ArgumentError] if parameters are invalid
46
+ def initialize(sharing_map, fonts)
47
+ raise ArgumentError, "sharing_map cannot be nil" if sharing_map.nil?
48
+
49
+ if fonts.nil? || fonts.empty?
50
+ raise ArgumentError,
51
+ "fonts cannot be nil or empty"
52
+ end
53
+
54
+ @sharing_map = sharing_map
55
+ @fonts = fonts
56
+ @offsets = {}
57
+ end
58
+
59
+ # Calculate all offsets for the collection
60
+ #
61
+ # @return [Hash] Complete offset map with:
62
+ # - :header_offset [Integer] - TTC header offset (always 0)
63
+ # - :offset_table_offset [Integer] - Offset table offset (always 12)
64
+ # - :font_directory_offsets [Array<Integer>] - Offset to each font's directory
65
+ # - :table_offsets [Hash] - Map of canonical_id to file offset
66
+ # - :font_table_directories [Hash] - Per-font table directory info
67
+ def calculate
68
+ @offsets = {
69
+ header_offset: 0,
70
+ offset_table_offset: TTC_HEADER_SIZE,
71
+ font_directory_offsets: [],
72
+ table_offsets: {},
73
+ font_table_directories: {},
74
+ }
75
+
76
+ # Calculate offset after TTC header and offset table
77
+ current_offset = TTC_HEADER_SIZE + (@fonts.size * FONT_OFFSET_SIZE)
78
+
79
+ # Calculate offsets for each font's table directory
80
+ calculate_font_directory_offsets(current_offset)
81
+
82
+ # Calculate offsets for table data
83
+ calculate_table_data_offsets
84
+
85
+ @offsets
86
+ end
87
+
88
+ # Get offset for specific font's directory
89
+ #
90
+ # @param font_index [Integer] Font index
91
+ # @return [Integer, nil] Offset or nil if not calculated
92
+ def font_directory_offset(font_index)
93
+ calculate unless @offsets.key?(:font_directory_offsets) && @offsets[:font_directory_offsets].any?
94
+ @offsets[:font_directory_offsets][font_index]
95
+ end
96
+
97
+ # Get offset for specific table
98
+ #
99
+ # @param canonical_id [String] Canonical table ID
100
+ # @return [Integer, nil] Offset or nil if not found
101
+ def table_offset(canonical_id)
102
+ calculate unless @offsets.key?(:table_offsets) && @offsets[:table_offsets].any?
103
+ @offsets[:table_offsets][canonical_id]
104
+ end
105
+
106
+ private
107
+
108
+ # Calculate offsets for each font's table directory
109
+ #
110
+ # Each font directory contains:
111
+ # - Font directory header (12 bytes)
112
+ # - Table directory entries (16 bytes each)
113
+ #
114
+ # @param start_offset [Integer] Starting offset
115
+ # @return [void]
116
+ def calculate_font_directory_offsets(start_offset)
117
+ current_offset = start_offset
118
+
119
+ @fonts.each_with_index do |font, font_index|
120
+ # Store this font's directory offset
121
+ @offsets[:font_directory_offsets] << current_offset
122
+
123
+ # Calculate size of this font's directory
124
+ num_tables = font.table_names.size
125
+ directory_size = FONT_DIRECTORY_HEADER_SIZE + (num_tables * TABLE_DIRECTORY_ENTRY_SIZE)
126
+
127
+ # Store directory info
128
+ @offsets[:font_table_directories][font_index] = {
129
+ offset: current_offset,
130
+ size: directory_size,
131
+ num_tables: num_tables,
132
+ table_tags: font.table_names,
133
+ }
134
+
135
+ # Move to next font's directory (with alignment)
136
+ current_offset = align_offset(current_offset + directory_size)
137
+ end
138
+
139
+ # Store offset where table data begins
140
+ @table_data_start_offset = current_offset
141
+ end
142
+
143
+ # Calculate offsets for all table data
144
+ #
145
+ # Processes tables in two groups:
146
+ # 1. Shared tables (stored once)
147
+ # 2. Unique tables (stored per font)
148
+ #
149
+ # @return [void]
150
+ def calculate_table_data_offsets
151
+ current_offset = @table_data_start_offset
152
+
153
+ # Collect all unique canonical tables
154
+ canonical_tables = {}
155
+ @sharing_map.each_value do |tables|
156
+ tables.each do |tag, info|
157
+ canonical_id = info[:canonical_id]
158
+ next if canonical_tables[canonical_id] # Already processed
159
+
160
+ canonical_tables[canonical_id] = {
161
+ tag: tag,
162
+ size: info[:size],
163
+ shared: info[:shared],
164
+ }
165
+ end
166
+ end
167
+
168
+ # First, assign offsets to shared tables
169
+ # Shared tables are stored once and referenced by multiple fonts
170
+ canonical_tables.each do |canonical_id, info|
171
+ next unless info[:shared]
172
+
173
+ @offsets[:table_offsets][canonical_id] = current_offset
174
+ current_offset = align_offset(current_offset + info[:size])
175
+ end
176
+
177
+ # Then, assign offsets to unique tables
178
+ # Each font gets its own copy of unique tables
179
+ canonical_tables.each do |canonical_id, info|
180
+ next if info[:shared]
181
+
182
+ @offsets[:table_offsets][canonical_id] = current_offset
183
+ current_offset = align_offset(current_offset + info[:size])
184
+ end
185
+ end
186
+
187
+ # Align offset to TABLE_ALIGNMENT boundary
188
+ #
189
+ # @param offset [Integer] Unaligned offset
190
+ # @return [Integer] Aligned offset
191
+ def align_offset(offset)
192
+ remainder = offset % TABLE_ALIGNMENT
193
+ return offset if remainder.zero?
194
+
195
+ offset + (TABLE_ALIGNMENT - remainder)
196
+ end
197
+
198
+ # Calculate search range parameters for font directory header
199
+ #
200
+ # These values are used in the font directory header for binary search:
201
+ # - searchRange: (max power of 2 <= numTables) * 16
202
+ # - entrySelector: log2(max power of 2 <= numTables)
203
+ # - rangeShift: numTables * 16 - searchRange
204
+ #
205
+ # @param num_tables [Integer] Number of tables
206
+ # @return [Hash] Search parameters
207
+ def calculate_search_params(num_tables)
208
+ max_power = 0
209
+ n = num_tables
210
+ while n > 1
211
+ n >>= 1
212
+ max_power += 1
213
+ end
214
+
215
+ search_range = (1 << max_power) * 16
216
+ entry_selector = max_power
217
+ range_shift = (num_tables * 16) - search_range
218
+
219
+ {
220
+ search_range: search_range,
221
+ entry_selector: entry_selector,
222
+ range_shift: range_shift,
223
+ }
224
+ end
225
+ end
226
+ end
227
+ end
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest/sha2"
4
+
5
+ module Fontisan
6
+ module Collection
7
+ # TableAnalyzer analyzes tables across multiple fonts to identify sharing opportunities
8
+ #
9
+ # Single responsibility: Analyze tables across fonts to identify identical tables
10
+ # that can be shared in a font collection. Uses SHA256 checksums for reliable
11
+ # content comparison.
12
+ #
13
+ # @example Analyze tables across fonts
14
+ # analyzer = TableAnalyzer.new([font1, font2, font3])
15
+ # report = analyzer.analyze
16
+ # puts "Potential savings: #{report[:space_savings]} bytes"
17
+ # puts "Shared tables: #{report[:shared_tables].keys.join(', ')}"
18
+ class TableAnalyzer
19
+ # Analysis report structure
20
+ # @return [Hash] Analysis results
21
+ attr_reader :report
22
+
23
+ # Initialize analyzer with fonts
24
+ #
25
+ # @param fonts [Array<TrueTypeFont, OpenTypeFont>] Fonts to analyze
26
+ # @raise [ArgumentError] if fonts array is empty or contains invalid fonts
27
+ def initialize(fonts)
28
+ if fonts.nil? || fonts.empty?
29
+ raise ArgumentError,
30
+ "fonts cannot be nil or empty"
31
+ end
32
+ raise ArgumentError, "fonts must be an array" unless fonts.is_a?(Array)
33
+
34
+ @fonts = fonts
35
+ @report = nil
36
+ end
37
+
38
+ # Analyze tables across all fonts
39
+ #
40
+ # Identifies tables that are identical across fonts based on content checksum.
41
+ # Returns a comprehensive analysis report with sharing opportunities and
42
+ # potential space savings.
43
+ #
44
+ # @return [Hash] Analysis report with:
45
+ # - :total_fonts [Integer] Number of fonts analyzed
46
+ # - :table_checksums [Hash<String, Hash>] Map of tag to checksum to font indices
47
+ # - :shared_tables [Hash<String, Array>] Map of tag to array of font indices sharing that table
48
+ # - :unique_tables [Hash<String, Array>] Map of tag to array of font indices with unique versions
49
+ # - :space_savings [Integer] Potential bytes saved by sharing
50
+ # - :sharing_percentage [Float] Percentage of tables that can be shared
51
+ def analyze
52
+ @report = {
53
+ total_fonts: @fonts.size,
54
+ table_checksums: {},
55
+ shared_tables: {},
56
+ unique_tables: {},
57
+ space_savings: 0,
58
+ sharing_percentage: 0.0,
59
+ }
60
+
61
+ # Collect checksums for all tables across all fonts
62
+ collect_table_checksums
63
+
64
+ # Identify which tables are shared
65
+ identify_shared_tables
66
+
67
+ # Calculate space savings
68
+ calculate_space_savings
69
+
70
+ @report
71
+ end
72
+
73
+ # Get tables that can be shared
74
+ #
75
+ # @return [Hash<String, Array<Integer>>] Map of table tag to font indices
76
+ def shared_tables
77
+ analyze unless @report
78
+ @report[:shared_tables]
79
+ end
80
+
81
+ # Get potential space savings in bytes
82
+ #
83
+ # @return [Integer] Bytes that can be saved by sharing
84
+ def space_savings
85
+ analyze unless @report
86
+ @report[:space_savings]
87
+ end
88
+
89
+ # Get sharing percentage
90
+ #
91
+ # @return [Float] Percentage of tables that can be shared (0.0-100.0)
92
+ def sharing_percentage
93
+ analyze unless @report
94
+ @report[:sharing_percentage]
95
+ end
96
+
97
+ private
98
+
99
+ # Collect checksums for all tables in all fonts
100
+ #
101
+ # Builds a map of: tag -> checksum -> array of font indices
102
+ # This allows quick identification of which fonts share identical tables.
103
+ #
104
+ # @return [void]
105
+ def collect_table_checksums
106
+ @fonts.each_with_index do |font, font_index|
107
+ font.table_names.each do |tag|
108
+ # Get raw table data
109
+ table_data = font.table_data[tag]
110
+ next unless table_data
111
+
112
+ # Calculate checksum
113
+ checksum = calculate_checksum(table_data)
114
+
115
+ # Store in report
116
+ @report[:table_checksums][tag] ||= {}
117
+ @report[:table_checksums][tag][checksum] ||= []
118
+ @report[:table_checksums][tag][checksum] << font_index
119
+ end
120
+ end
121
+ end
122
+
123
+ # Identify which tables are shared across fonts
124
+ #
125
+ # A table is considered shared if 2 or more fonts have identical content
126
+ # (same checksum) for that table.
127
+ #
128
+ # @return [void]
129
+ def identify_shared_tables
130
+ @report[:table_checksums].each do |tag, checksums|
131
+ checksums.each do |checksum, font_indices|
132
+ if font_indices.size > 1
133
+ # This table is shared across multiple fonts
134
+ @report[:shared_tables][tag] ||= []
135
+ @report[:shared_tables][tag] << {
136
+ checksum: checksum,
137
+ font_indices: font_indices,
138
+ count: font_indices.size,
139
+ }
140
+ else
141
+ # This table is unique to one font
142
+ @report[:unique_tables][tag] ||= []
143
+ @report[:unique_tables][tag] << {
144
+ checksum: checksum,
145
+ font_index: font_indices.first,
146
+ }
147
+ end
148
+ end
149
+ end
150
+ end
151
+
152
+ # Calculate potential space savings from table sharing
153
+ #
154
+ # Space is saved when N fonts share a table - we only need to store it once
155
+ # instead of N times. Savings = (N-1) * table_size
156
+ #
157
+ # @return [void]
158
+ def calculate_space_savings
159
+ total_savings = 0
160
+ total_table_instances = 0
161
+ shared_table_instances = 0
162
+
163
+ @report[:shared_tables].each do |tag, sharing_groups|
164
+ sharing_groups.each do |group|
165
+ font_indices = group[:font_indices]
166
+ count = font_indices.size
167
+
168
+ # Get table size from first font in group
169
+ table_data = @fonts[font_indices.first].table_data[tag]
170
+ table_size = table_data.bytesize
171
+
172
+ # Savings = (count - 1) * table_size
173
+ # We only need to store the table once instead of count times
174
+ savings = (count - 1) * table_size
175
+ total_savings += savings
176
+
177
+ shared_table_instances += count
178
+ end
179
+ end
180
+
181
+ # Count total table instances
182
+ @fonts.each do |font|
183
+ total_table_instances += font.table_names.size
184
+ end
185
+
186
+ @report[:space_savings] = total_savings
187
+
188
+ # Calculate sharing percentage
189
+ if total_table_instances.positive?
190
+ @report[:sharing_percentage] =
191
+ (shared_table_instances.to_f / total_table_instances * 100).round(2)
192
+ end
193
+ end
194
+
195
+ # Calculate SHA256 checksum for table data
196
+ #
197
+ # @param data [String] Binary table data
198
+ # @return [String] Hexadecimal checksum
199
+ def calculate_checksum(data)
200
+ Digest::SHA256.hexdigest(data)
201
+ end
202
+ end
203
+ end
204
+ end
@@ -0,0 +1,241 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest/sha2"
4
+
5
+ module Fontisan
6
+ module Collection
7
+ # TableDeduplicator deduplicates identical tables across fonts
8
+ #
9
+ # Single responsibility: Group identical tables and create a canonical mapping
10
+ # for shared table references. Ensures that each unique table content is stored
11
+ # only once in the collection.
12
+ #
13
+ # @example Deduplicate tables
14
+ # deduplicator = TableDeduplicator.new([font1, font2, font3])
15
+ # sharing_map = deduplicator.build_sharing_map
16
+ # canonical_tables = deduplicator.canonical_tables
17
+ class TableDeduplicator
18
+ # Canonical tables (unique table data)
19
+ # @return [Hash<String, Hash>] Map of table tag to canonical versions
20
+ attr_reader :canonical_tables
21
+
22
+ # Sharing map (font -> table -> canonical reference)
23
+ # @return [Hash<Integer, Hash<String, Hash>>] Sharing map
24
+ attr_reader :sharing_map
25
+
26
+ # Initialize deduplicator with fonts
27
+ #
28
+ # @param fonts [Array<TrueTypeFont, OpenTypeFont>] Fonts to process
29
+ # @raise [ArgumentError] if fonts array is empty or invalid
30
+ def initialize(fonts)
31
+ if fonts.nil? || fonts.empty?
32
+ raise ArgumentError,
33
+ "fonts cannot be nil or empty"
34
+ end
35
+ raise ArgumentError, "fonts must be an array" unless fonts.is_a?(Array)
36
+
37
+ @fonts = fonts
38
+ @canonical_tables = {}
39
+ @sharing_map = {}
40
+ @checksum_to_canonical = {}
41
+ end
42
+
43
+ # Build sharing map for all fonts
44
+ #
45
+ # Creates a map structure that indicates which canonical table each font
46
+ # should reference for each table tag. This enables efficient table sharing
47
+ # in the final collection.
48
+ #
49
+ # @return [Hash<Integer, Hash<String, Hash>>] Sharing map with structure:
50
+ # {
51
+ # font_index => {
52
+ # table_tag => {
53
+ # canonical_id: unique_id,
54
+ # checksum: sha256_checksum,
55
+ # data: table_data,
56
+ # shared: true/false,
57
+ # shared_with: [font_indices]
58
+ # }
59
+ # }
60
+ # }
61
+ def build_sharing_map
62
+ # First pass: collect all unique tables
63
+ collect_canonical_tables
64
+
65
+ # Second pass: build sharing map for each font
66
+ build_font_sharing_references
67
+
68
+ @sharing_map
69
+ end
70
+
71
+ # Get canonical table data for a specific table
72
+ #
73
+ # @param tag [String] Table tag
74
+ # @param canonical_id [String] Canonical table identifier
75
+ # @return [String, nil] Binary table data
76
+ def canonical_table_data(tag, canonical_id)
77
+ @canonical_tables.dig(tag, canonical_id, :data)
78
+ end
79
+
80
+ # Get all canonical tables for a specific tag
81
+ #
82
+ # @param tag [String] Table tag
83
+ # @return [Hash<String, Hash>, nil] Map of canonical_id to table info
84
+ def canonical_tables_for_tag(tag)
85
+ @canonical_tables[tag]
86
+ end
87
+
88
+ # Get sharing statistics
89
+ #
90
+ # @return [Hash] Statistics about table sharing
91
+ def statistics
92
+ total_tables = 0
93
+ shared_tables = 0
94
+ unique_tables = 0
95
+
96
+ @sharing_map.each_value do |tables|
97
+ tables.each_value do |info|
98
+ total_tables += 1
99
+ if info[:shared]
100
+ shared_tables += 1
101
+ else
102
+ unique_tables += 1
103
+ end
104
+ end
105
+ end
106
+
107
+ {
108
+ total_tables: total_tables,
109
+ shared_tables: shared_tables,
110
+ unique_tables: unique_tables,
111
+ sharing_percentage: total_tables.positive? ? (shared_tables.to_f / total_tables * 100).round(2) : 0.0,
112
+ canonical_count: @canonical_tables.values.sum(&:size),
113
+ }
114
+ end
115
+
116
+ private
117
+
118
+ # Collect all unique (canonical) tables across all fonts
119
+ #
120
+ # Identifies unique table content based on checksum and stores one
121
+ # canonical version of each unique table.
122
+ #
123
+ # @return [void]
124
+ def collect_canonical_tables
125
+ @fonts.each_with_index do |font, font_index|
126
+ font.table_names.each do |tag|
127
+ table_data = font.table_data[tag]
128
+ next unless table_data
129
+
130
+ # Calculate checksum
131
+ checksum = calculate_checksum(table_data)
132
+
133
+ # Check if we've seen this exact table content before
134
+ canonical_id = find_or_create_canonical(tag, checksum, table_data,
135
+ font_index)
136
+
137
+ # Track which fonts use this canonical table
138
+ @canonical_tables[tag][canonical_id][:font_indices] << font_index
139
+ end
140
+ end
141
+
142
+ # Mark shared tables
143
+ mark_shared_tables
144
+ end
145
+
146
+ # Find existing canonical table or create new one
147
+ #
148
+ # @param tag [String] Table tag
149
+ # @param checksum [String] Table checksum
150
+ # @param data [String] Table data
151
+ # @param font_index [Integer] Font index
152
+ # @return [String] Canonical table ID
153
+ def find_or_create_canonical(tag, checksum, data, _font_index)
154
+ # Initialize tag entry if needed
155
+ @canonical_tables[tag] ||= {}
156
+ @checksum_to_canonical[tag] ||= {}
157
+
158
+ # Check if we already have this exact table content
159
+ if @checksum_to_canonical[tag][checksum]
160
+ # Reuse existing canonical table
161
+ @checksum_to_canonical[tag][checksum]
162
+ else
163
+ # Create new canonical table
164
+ canonical_id = generate_canonical_id(tag, checksum)
165
+ @checksum_to_canonical[tag][checksum] = canonical_id
166
+
167
+ @canonical_tables[tag][canonical_id] = {
168
+ checksum: checksum,
169
+ data: data,
170
+ size: data.bytesize,
171
+ font_indices: [],
172
+ shared: false,
173
+ }
174
+
175
+ canonical_id
176
+ end
177
+ end
178
+
179
+ # Generate unique canonical ID for a table
180
+ #
181
+ # @param tag [String] Table tag
182
+ # @param checksum [String] Table checksum
183
+ # @return [String] Canonical ID
184
+ def generate_canonical_id(tag, checksum)
185
+ # Use first 12 characters of checksum for brevity
186
+ "#{tag}_#{checksum[0...12]}"
187
+ end
188
+
189
+ # Mark tables that are shared across multiple fonts
190
+ #
191
+ # @return [void]
192
+ def mark_shared_tables
193
+ @canonical_tables.each_value do |canonical_versions|
194
+ canonical_versions.each_value do |info|
195
+ info[:shared] = info[:font_indices].size > 1
196
+ info[:shared_with] = info[:font_indices].dup if info[:shared]
197
+ end
198
+ end
199
+ end
200
+
201
+ # Build sharing references for each font
202
+ #
203
+ # Creates a map for each font indicating which canonical table it should
204
+ # reference for each tag.
205
+ #
206
+ # @return [void]
207
+ def build_font_sharing_references
208
+ @fonts.each_with_index do |font, font_index|
209
+ @sharing_map[font_index] = {}
210
+
211
+ font.table_names.each do |tag|
212
+ table_data = font.table_data[tag]
213
+ next unless table_data
214
+
215
+ checksum = calculate_checksum(table_data)
216
+ canonical_id = @checksum_to_canonical[tag][checksum]
217
+
218
+ # Reference canonical table
219
+ canonical_info = @canonical_tables[tag][canonical_id]
220
+ @sharing_map[font_index][tag] = {
221
+ canonical_id: canonical_id,
222
+ checksum: checksum,
223
+ data: canonical_info[:data],
224
+ size: canonical_info[:size],
225
+ shared: canonical_info[:shared],
226
+ shared_with: canonical_info[:shared_with] || [],
227
+ }
228
+ end
229
+ end
230
+ end
231
+
232
+ # Calculate SHA256 checksum for table data
233
+ #
234
+ # @param data [String] Binary table data
235
+ # @return [String] Hexadecimal checksum
236
+ def calculate_checksum(data)
237
+ Digest::SHA256.hexdigest(data)
238
+ end
239
+ end
240
+ end
241
+ end