fontisan 0.4.10 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 797250db52b46986a1458fd680d2c2f2eab6896ba1bcf617ba0d74622ce7ff51
4
- data.tar.gz: 81cf20d3aff4f40b33e0fec8cc8de359a1a4acf139d14fe75ec3a4fc839ac827
3
+ metadata.gz: 1ed3f50509fb7f223b2cdcfdf7e59345e83b5ca2d11b2a0806574651687c4582
4
+ data.tar.gz: ccb6cabe8c99dfcc060acc7f46d26efd9961b5384365ab404552dad539dc4555
5
5
  SHA512:
6
- metadata.gz: dc92df9fbb61326d199f51ceda010e8f990cd109355679d463066e0f590a461e71241239b6b90f3a1acf5832362fba778d51833d24eb01c46b42b95c488c3550
7
- data.tar.gz: c331cceb05fd29864a85e4e7d7dca99aa96c9c2ef152c3e877842aaffd0a82d1ad46e6770d5e80d3799bb67a905306a1eb6e5b4d18d03f5c8a02072c030c33f8
6
+ metadata.gz: b977d4f9ac972020514daca43a14917784c713258f73223597c8a48dd2b32fcf3ccb9adbe2bd343db7ac43e63eaba8943a883580ec3c7bb7dec23f6a0f1e1fe9
7
+ data.tar.gz: 5c17de9dc3c60f44e472e94c19a1ca33435805ed81f261cc357dfcfd6d197f3109531ae8ad4b3a842e52f4f04de19501c0e860793212b87e874b63f01d99d8f3
data/Rakefile CHANGED
@@ -12,24 +12,25 @@ RuboCop::RakeTask.new
12
12
  namespace :fixtures do
13
13
  # Load centralized fixture configuration
14
14
  require_relative "spec/support/fixture_fonts"
15
+ require "fontisan/tasks"
15
16
 
16
17
  # Helper method to download a single file
17
18
  def download_single_file(name, url, target_path)
18
- require "open-uri"
19
-
20
19
  puts "[fixtures:download] Downloading #{name}..."
21
- FileUtils.mkdir_p(File.dirname(target_path))
22
20
 
23
- URI.open(url) do |remote|
24
- File.binwrite(target_path, remote.read)
25
- end
21
+ Fontisan::Tasks::FixtureDownloader.new(
22
+ url: url,
23
+ destination: target_path,
24
+ ).call
26
25
 
27
26
  puts "[fixtures:download] #{name} downloaded successfully"
27
+ rescue Fontisan::Tasks::FixtureDownloader::Error => e
28
+ warn "[fixtures:download] #{name} failed after retries: #{e.message}"
29
+ raise
28
30
  end
29
31
 
30
32
  # Helper method to download and extract a font archive
31
33
  def download_font(name, url, target_dir)
32
- require "open-uri"
33
34
  require "zip"
34
35
 
35
36
  puts "[fixtures:download] Downloading #{name}..."
@@ -39,45 +40,58 @@ namespace :fixtures do
39
40
  temp_path = File.join(Dir.tmpdir,
40
41
  "fontisan_#{name}_#{Process.pid}_#{rand(10000)}.zip")
41
42
 
42
- # Download using IO.copy_stream for better Windows compatibility
43
- URI.open(url, "rb") do |remote|
44
- File.open(temp_path, "wb") do |file|
45
- IO.copy_stream(remote, file)
46
- end
47
- end
48
-
49
- puts "[fixtures:download] Extracting #{name}..."
50
-
51
- # Open zip file and ensure it's fully closed before we're done
52
- zip_file = Zip::File.open(temp_path)
53
43
  begin
54
- zip_file.each do |entry|
55
- # Skip macOS metadata files and directories
56
- next if entry.name.start_with?("__MACOSX/") || entry.name.include?("/._")
57
- next if entry.directory?
58
-
59
- # Ensure entry.name is relative by stripping leading slashes
60
- relative_name = entry.name.sub(%r{^/+}, "")
61
-
62
- dest_path = File.join(target_dir, relative_name)
63
- FileUtils.mkdir_p(File.dirname(dest_path))
64
-
65
- # Skip if file already exists
66
- next if File.exist?(dest_path)
67
-
68
- # Write the file content directly using binary mode
69
- File.open(dest_path, "wb") do |file|
70
- IO.copy_stream(entry.get_input_stream, file)
44
+ Fontisan::Tasks::FixtureDownloader.new(
45
+ url: url,
46
+ destination: temp_path,
47
+ ).call
48
+
49
+ puts "[fixtures:download] Extracting #{name}..."
50
+
51
+ # Open zip file and ensure it's fully closed before we're done
52
+ zip_file = Zip::File.open(temp_path)
53
+ begin
54
+ zip_file.each do |entry|
55
+ # Skip macOS metadata files and directories
56
+ next if entry.name.start_with?("__MACOSX/") || entry.name.include?("/._")
57
+ next if entry.directory?
58
+
59
+ # Ensure entry.name is relative by stripping leading slashes
60
+ relative_name = entry.name.sub(%r{^/+}, "")
61
+
62
+ dest_path = File.join(target_dir, relative_name)
63
+ FileUtils.mkdir_p(File.dirname(dest_path))
64
+
65
+ # Skip if file already exists
66
+ next if File.exist?(dest_path)
67
+
68
+ # Write the file content directly using binary mode
69
+ File.open(dest_path, "wb") do |file|
70
+ IO.copy_stream(entry.get_input_stream, file)
71
+ end
71
72
  end
73
+ ensure
74
+ # Explicitly close the zip file to release file handle on Windows
75
+ zip_file&.close
72
76
  end
73
77
  ensure
74
- # Explicitly close the zip file to release file handle on Windows
75
- zip_file&.close
78
+ # Clean up the temp zip explicitly so the temp dir doesn't fill
79
+ # up on repeated runs. On Windows the just-closed zip file
80
+ # handle can briefly hold a lock that surfaces as
81
+ # Errno::EACCES; swallow that one error so the rake task can
82
+ # complete (OS will sweep the temp file later).
83
+ begin
84
+ File.delete(temp_path) if File.exist?(temp_path)
85
+ rescue Errno::EACCES
86
+ warn "[fixtures:download] could not delete temp zip #{temp_path}; " \
87
+ "OS will clean it up"
88
+ end
76
89
  end
77
90
 
78
- # Temp file left in Dir.tmpdir - OS will clean it up automatically
79
-
80
91
  puts "[fixtures:download] #{name} downloaded successfully"
92
+ rescue Fontisan::Tasks::FixtureDownloader::Error => e
93
+ warn "[fixtures:download] #{name} failed after retries: #{e.message}"
94
+ raise
81
95
  rescue LoadError => e
82
96
  warn "[fixtures:download] Error: Required gem not installed. Please run: gem install rubyzip"
83
97
  raise e
@@ -0,0 +1,429 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ class Stitcher
5
+ module PartitionStrategy
6
+ # Partition codepoints by Unicode Blocks.txt block.
7
+ #
8
+ # Each non-empty Unicode block becomes one partition. If a single
9
+ # block alone exceeds +cap+, raises PartitionCapExceededError —
10
+ # every block is treated as atomic because there is no finer
11
+ # Unicode-defined boundary inside a block. (Callers who need to
12
+ # split a block further must use ByPlane with explicit carve-outs
13
+ # or implement a custom partitioner.)
14
+ #
15
+ # Partition names follow the canonical Unicode block name with
16
+ # spaces replaced by underscores and a +block_+ prefix:
17
+ #
18
+ # "Basic Latin" => :block_basic_latin
19
+ # "CJK Unified Ideographs" => :block_cjk_unified_ideographs
20
+ #
21
+ # Codepoints not covered by any block in {BLOCKS} (unassigned or
22
+ # in a block omitted from this list) fall into +:block_other+.
23
+ class ByBlock < Base
24
+ # Unicode 16.0 block ranges. Source: Unicode Blocks.txt.
25
+ # Covers all assigned blocks in BMP, SMP, SIP, TIP, and SSP.
26
+ # Unassigned planes (4..13) are omitted — codepoints there
27
+ # fall into +:block_other+, which is the right behavior for
28
+ # partitioning fonts that target current Unicode.
29
+ #
30
+ # The data is inlined (rather than loaded from an external
31
+ # file) so the partitioner is self-contained: no YAML load
32
+ # at startup, no data file to ship. If the list ever needs to
33
+ # be data-driven, swap BLOCKS for a CSV/YAML loader behind
34
+ # the same constant — callers don't care about the source
35
+ # (OCP).
36
+ # rubocop:disable Metrics/CollectionLiteralLength
37
+ BLOCKS = {
38
+ # BMP (Plane 0)
39
+ "Basic_Latin" => 0x0000..0x007F,
40
+ "Latin-1_Supplement" => 0x0080..0x00FF,
41
+ "Latin_Extended-A" => 0x0100..0x017F,
42
+ "Latin_Extended-B" => 0x0180..0x024F,
43
+ "IPA_Extensions" => 0x0250..0x02AF,
44
+ "Spacing_Modifier_Letters" => 0x02B0..0x02FF,
45
+ "Combining_Diacritical_Marks" => 0x0300..0x036F,
46
+ "Greek_and_Coptic" => 0x0370..0x03FF,
47
+ "Cyrillic" => 0x0400..0x04FF,
48
+ "Cyrillic_Supplement" => 0x0500..0x052F,
49
+ "Armenian" => 0x0530..0x058F,
50
+ "Hebrew" => 0x0590..0x05FF,
51
+ "Arabic" => 0x0600..0x06FF,
52
+ "Syriac" => 0x0700..0x074F,
53
+ "Arabic_Supplement" => 0x0750..0x077F,
54
+ "Arabic_Extended-A" => 0x08A0..0x08FF,
55
+ "Arabic_Extended-B" => 0x0870..0x089F,
56
+ "Thaana" => 0x0780..0x07BF,
57
+ "NKo" => 0x07C0..0x07FF,
58
+ "Samaritan" => 0x0800..0x083F,
59
+ "Mandaic" => 0x0840..0x085F,
60
+ "Syriac_Supplement" => 0x0860..0x086F,
61
+ "Devanagari" => 0x0900..0x097F,
62
+ "Bengali" => 0x0980..0x09FF,
63
+ "Gurmukhi" => 0x0A00..0x0A7F,
64
+ "Gujarati" => 0x0A80..0x0AFF,
65
+ "Oriya" => 0x0B00..0x0B7F,
66
+ "Tamil" => 0x0B80..0x0BFF,
67
+ "Telugu" => 0x0C00..0x0C7F,
68
+ "Kannada" => 0x0C80..0x0CFF,
69
+ "Malayalam" => 0x0D00..0x0D7F,
70
+ "Sinhala" => 0x0D80..0x0DFF,
71
+ "Thai" => 0x0E00..0x0E7F,
72
+ "Lao" => 0x0E80..0x0EFF,
73
+ "Tibetan" => 0x0F00..0x0FFF,
74
+ "Myanmar" => 0x1000..0x109F,
75
+ "Georgian" => 0x10A0..0x10FF,
76
+ "Hangul_Jamo" => 0x1100..0x11FF,
77
+ "Ethiopic" => 0x1200..0x137F,
78
+ "Ethiopic_Supplement" => 0x1380..0x139F,
79
+ "Cherokee" => 0x13A0..0x13FF,
80
+ "Unified_Canadian_Aboriginal_Syllabics" => 0x1400..0x167F,
81
+ "Ogham" => 0x1680..0x169F,
82
+ "Runic" => 0x16A0..0x16FF,
83
+ "Tagalog" => 0x1700..0x171F,
84
+ "Hanunoo" => 0x1720..0x173F,
85
+ "Buhid" => 0x1740..0x175F,
86
+ "Tagbanwa" => 0x1760..0x177F,
87
+ "Khmer" => 0x1780..0x17FF,
88
+ "Mongolian" => 0x1800..0x18AF,
89
+ "Unified_Canadian_Aboriginal_Syllabics_Extended" => 0x18B0..0x18FF,
90
+ "Limbu" => 0x1900..0x194F,
91
+ "Tai_Le" => 0x1950..0x197F,
92
+ "New_Tai_Lue" => 0x1980..0x19DF,
93
+ "Khmer_Symbols" => 0x19E0..0x19FF,
94
+ "Buginese" => 0x1A00..0x1A1F,
95
+ "Tai_Tham" => 0x1A20..0x1AAF,
96
+ "Combining_Diacritical_Marks_Extended" => 0x1AB0..0x1AFF,
97
+ "Balinese" => 0x1B00..0x1B7F,
98
+ "Sundanese" => 0x1B80..0x1BBF,
99
+ "Batak" => 0x1BC0..0x1BFF,
100
+ "Lepcha" => 0x1C00..0x1C4F,
101
+ "Ol_Chiki" => 0x1C50..0x1C7F,
102
+ "Cyrillic_Extended-C" => 0x1C80..0x1C8F,
103
+ "Georgian_Extended" => 0x1C90..0x1CBF,
104
+ "Sundanese_Supplement" => 0x1CC0..0x1CCF,
105
+ "Vedic_Extensions" => 0x1CD0..0x1CFF,
106
+ "Phonetic_Extensions" => 0x1D00..0x1D7F,
107
+ "Phonetic_Extensions_Supplement" => 0x1D80..0x1DBF,
108
+ "Combining_Diacritical_Marks_Supplement" => 0x1DC0..0x1DFF,
109
+ "Latin_Extended_Additional" => 0x1E00..0x1EFF,
110
+ "Greek_Extended" => 0x1F00..0x1FFF,
111
+ "General_Punctuation" => 0x2000..0x206F,
112
+ "Superscripts_and_Subscripts" => 0x2070..0x209F,
113
+ "Currency_Symbols" => 0x20A0..0x20CF,
114
+ "Combining_Diacritical_Marks_for_Symbols" => 0x20D0..0x20FF,
115
+ "Letterlike_Symbols" => 0x2100..0x214F,
116
+ "Number_Forms" => 0x2150..0x218F,
117
+ "Arrows" => 0x2190..0x21FF,
118
+ "Mathematical_Operators" => 0x2200..0x22FF,
119
+ "Miscellaneous_Technical" => 0x2300..0x23FF,
120
+ "Control_Pictures" => 0x2400..0x243F,
121
+ "Optical_Character_Recognition" => 0x2440..0x245F,
122
+ "Enclosed_Alphanumerics" => 0x2460..0x24FF,
123
+ "Box_Drawing" => 0x2500..0x257F,
124
+ "Block_Elements" => 0x2580..0x259F,
125
+ "Geometric_Shapes" => 0x25A0..0x25FF,
126
+ "Miscellaneous_Symbols" => 0x2600..0x26FF,
127
+ "Dingbats" => 0x2700..0x27BF,
128
+ "Miscellaneous_Mathematical_Symbols-A" => 0x27C0..0x27EF,
129
+ "Supplemental_Arrows-A" => 0x27F0..0x27FF,
130
+ "Braille_Patterns" => 0x2800..0x28FF,
131
+ "Supplemental_Arrows-B" => 0x2900..0x297F,
132
+ "Miscellaneous_Mathematical_Symbols-B" => 0x2980..0x29FF,
133
+ "Supplemental_Mathematical_Operators" => 0x2A00..0x2AFF,
134
+ "Miscellaneous_Symbols_and_Arrows" => 0x2B00..0x2BFF,
135
+ "Glagolitic" => 0x2C00..0x2C5F,
136
+ "Latin_Extended-C" => 0x2C60..0x2C7F,
137
+ "Coptic" => 0x2C80..0x2CFF,
138
+ "Georgian_Supplement" => 0x2D00..0x2D2F,
139
+ "Tifinagh" => 0x2D30..0x2D7F,
140
+ "Ethiopic_Extended" => 0x2D80..0x2DDF,
141
+ "Supplemental_Punctuation" => 0x2E00..0x2E7F,
142
+ "CJK_Radicals_Supplement" => 0x2E80..0x2EFF,
143
+ "Kangxi_Radicals" => 0x2F00..0x2FDF,
144
+ "Ideographic_Description_Characters" => 0x2FF0..0x2FFF,
145
+ "CJK_Symbols_and_Punctuation" => 0x3000..0x303F,
146
+ "Hiragana" => 0x3040..0x309F,
147
+ "Katakana" => 0x30A0..0x30FF,
148
+ "Bopomofo" => 0x3100..0x312F,
149
+ "Hangul_Compatibility_Jamo" => 0x3130..0x318F,
150
+ "Kanbun" => 0x3190..0x319F,
151
+ "Bopomofo_Extended" => 0x31A0..0x31BF,
152
+ "CJK_Strokes" => 0x31C0..0x31EF,
153
+ "Katakana_Phonetic_Extensions" => 0x31F0..0x31FF,
154
+ "Enclosed_CJK_Letters_and_Months" => 0x3200..0x32FF,
155
+ "CJK_Compatibility" => 0x3300..0x33FF,
156
+ "CJK_Unified_Ideographs_Extension_A" => 0x3400..0x4DBF,
157
+ "Yijing_Hexagram_Symbols" => 0x4DC0..0x4DFF,
158
+ "CJK_Unified_Ideographs" => 0x4E00..0x9FFF,
159
+ "Yi_Syllables" => 0xA000..0xA48F,
160
+ "Yi_Radicals" => 0xA490..0xA4CF,
161
+ "Lisu" => 0xA4D0..0xA4FF,
162
+ "Vai" => 0xA500..0xA63F,
163
+ "Cyrillic_Extended-B" => 0xA640..0xA69F,
164
+ "Bamum" => 0xA6A0..0xA6FF,
165
+ "Modifier_Tone_Letters" => 0xA700..0xA71F,
166
+ "Latin_Extended-D" => 0xA720..0xA7FF,
167
+ "Syloti_Nagri" => 0xA800..0xA82F,
168
+ "Common_Indic_Number_Forms" => 0xA830..0xA83F,
169
+ "Phags-pa" => 0xA840..0xA87F,
170
+ "Saurashtra" => 0xA880..0xA8DF,
171
+ "Devanagari_Extended" => 0xA8E0..0xA8FF,
172
+ "Kayah_Li" => 0xA900..0xA92F,
173
+ "Rejang" => 0xA930..0xA95F,
174
+ "Hangul_Jamo_Extended-A" => 0xA960..0xA97F,
175
+ "Javanese" => 0xA980..0xA9DF,
176
+ "Myanmar_Extended-B" => 0xA9E0..0xA9FF,
177
+ "Cham" => 0xAA00..0xAA5F,
178
+ "Myanmar_Extended-A" => 0xAA60..0xAA7F,
179
+ "Tai_Viet" => 0xAA80..0xAADF,
180
+ "Meetei_Mayek_Extensions" => 0xAAE0..0xAAFF,
181
+ "Ethiopic_Extended-A" => 0xAB00..0xAB2F,
182
+ "Latin_Extended-E" => 0xAB30..0xAB6F,
183
+ "Cherokee_Supplement" => 0xAB70..0xABBF,
184
+ "Meetei_Mayek" => 0xABC0..0xABFF,
185
+ "Hangul_Syllables" => 0xAC00..0xD7AF,
186
+ "Hangul_Jamo_Extended-B" => 0xD7B0..0xD7FF,
187
+ "High_Surrogates" => 0xD800..0xDB7F,
188
+ "High_Private_Use_Surrogates" => 0xDB80..0xDBFF,
189
+ "Low_Surrogates" => 0xDC00..0xDFFF,
190
+ "Private_Use_Area" => 0xE000..0xF8FF,
191
+ "CJK_Compatibility_Ideographs" => 0xF900..0xFAFF,
192
+ "Alphabetic_Presentation_Forms" => 0xFB00..0xFB4F,
193
+ "Arabic_Presentation_Forms-A" => 0xFB50..0xFDFF,
194
+ "Variation_Selectors" => 0xFE00..0xFE0F,
195
+ "Vertical_Forms" => 0xFE10..0xFE1F,
196
+ "Combining_Half_Marks" => 0xFE20..0xFE2F,
197
+ "CJK_Compatibility_Forms" => 0xFE30..0xFE4F,
198
+ "Small_Form_Variants" => 0xFE50..0xFE6F,
199
+ "Arabic_Presentation_Forms-B" => 0xFE70..0xFEFF,
200
+ "Halfwidth_and_Fullwidth_Forms" => 0xFF00..0xFFEF,
201
+ "Specials" => 0xFFF0..0xFFFF,
202
+
203
+ # SMP (Plane 1)
204
+ "Linear_B_Syllabary" => 0x10000..0x1003F,
205
+ "Linear_B_Ideograms" => 0x10080..0x100FF,
206
+ "Aegean_Numbers" => 0x10100..0x1013F,
207
+ "Ancient_Greek_Numbers" => 0x10140..0x1018F,
208
+ "Ancient_Symbols" => 0x10190..0x101CF,
209
+ "Phaistos_Disc" => 0x101D0..0x101FF,
210
+ "Lycian" => 0x10280..0x1029F,
211
+ "Carian" => 0x102A0..0x102DF,
212
+ "Coptic_Epact_Numbers" => 0x102E0..0x102FF,
213
+ "Old_Italic" => 0x10300..0x1032F,
214
+ "Gothic" => 0x10330..0x1034F,
215
+ "Old_Permic" => 0x10350..0x1037F,
216
+ "Ugaritic" => 0x10380..0x1039F,
217
+ "Old_Persian" => 0x103A0..0x103DF,
218
+ "Deseret" => 0x10400..0x1044F,
219
+ "Shavian" => 0x10450..0x1047F,
220
+ "Osmanya" => 0x10480..0x104AF,
221
+ "Osage" => 0x104B0..0x104FF,
222
+ "Elbasan" => 0x10500..0x1052F,
223
+ "Caucasian_Albanian" => 0x10530..0x1056F,
224
+ "Vithkuqi" => 0x10570..0x105BF,
225
+ "Linear_A" => 0x10600..0x1077F,
226
+ "Latin_Extended-F" => 0x10780..0x107BF,
227
+ "Cypriot_Syllabary" => 0x10800..0x1083F,
228
+ "Imperial_Aramaic" => 0x10840..0x1085F,
229
+ "Palmyrene" => 0x10860..0x1087F,
230
+ "Nabataean" => 0x10880..0x108AF,
231
+ "Hatran" => 0x108E0..0x108FF,
232
+ "Phoenician" => 0x10900..0x1091F,
233
+ "Lydian" => 0x10920..0x1093F,
234
+ "Meroitic_Hieroglyphs" => 0x10980..0x1099F,
235
+ "Meroitic_Cursive" => 0x109A0..0x109FF,
236
+ "Kharoshthi" => 0x10A00..0x10A5F,
237
+ "Old_South_Arabian" => 0x10A60..0x10A7F,
238
+ "Old_North_Arabian" => 0x10A80..0x10A9F,
239
+ "Manichaean" => 0x10AC0..0x10AFF,
240
+ "Avestan" => 0x10B00..0x10B3F,
241
+ "Inscriptional_Parthian" => 0x10B40..0x10B5F,
242
+ "Inscriptional_Pahlavi" => 0x10B60..0x10B7F,
243
+ "Psalter_Pahlavi" => 0x10B80..0x10BAF,
244
+ "Old_Turkic" => 0x10C00..0x10C4F,
245
+ "Old_Hungarian" => 0x10C80..0x10CFF,
246
+ "Hanifi_Rohingya" => 0x10D00..0x10D3F,
247
+ "Garay" => 0x10D40..0x10D8F,
248
+ "Rumi_Numeral_Symbols" => 0x10E60..0x10E7F,
249
+ "Yezidi" => 0x10E80..0x10EBF,
250
+ "Arabic_Extended-C" => 0x10EC0..0x10EFF,
251
+ "Old_Sogdian" => 0x10F00..0x10F2F,
252
+ "Sogdian" => 0x10F30..0x10F6F,
253
+ "Old_Uyghur" => 0x10F70..0x10FAF,
254
+ "Chorasmian" => 0x10FB0..0x10FBF,
255
+ "Elymaic" => 0x10FE0..0x10FEF,
256
+ "Brahmi" => 0x11000..0x1107F,
257
+ "Kaithi" => 0x11080..0x110CF,
258
+ "Sora_Sompeng" => 0x110D0..0x110FF,
259
+ "Chakma" => 0x11100..0x1114F,
260
+ "Mahajani" => 0x11150..0x1117F,
261
+ "Sharada" => 0x11180..0x111DF,
262
+ "Sinhala_Archaic_Numbers" => 0x111E0..0x111FF,
263
+ "Khojki" => 0x11200..0x1124F,
264
+ "Multani" => 0x11280..0x112AF,
265
+ "Khudawadi" => 0x112B0..0x112FF,
266
+ "Grantha" => 0x11300..0x1137F,
267
+ "Tulu-Tigalari" => 0x11380..0x113FF,
268
+ "Newa" => 0x11400..0x1147F,
269
+ "Tirhuta" => 0x11480..0x114DF,
270
+ "Siddham" => 0x11580..0x115FF,
271
+ "Modi" => 0x11600..0x1165F,
272
+ "Mongolian_Supplement" => 0x11660..0x1167F,
273
+ "Takri" => 0x11680..0x116CF,
274
+ "Myanmar_Extended-C" => 0x116D0..0x116FF,
275
+ "Ahom" => 0x11700..0x1174F,
276
+ "Dogra" => 0x11800..0x1184F,
277
+ "Warang_Citi" => 0x118A0..0x118FF,
278
+ "Dives_Akuru" => 0x11900..0x1195F,
279
+ "Nandinagari" => 0x119A0..0x119FF,
280
+ "Zanabazar_Square" => 0x11A00..0x11A4F,
281
+ "Soyombo" => 0x11A50..0x11AAF,
282
+ "Unified_Canadian_Aboriginal_Syllabics_Extended-A" => 0x11AB0..0x11ABF,
283
+ "Pau_Cin_Hmo" => 0x11AC0..0x11AFF,
284
+ "Bhaiksuki" => 0x11C00..0x11C6F,
285
+ "Marchen" => 0x11C70..0x11CBF,
286
+ "Masaram_Gondi" => 0x11D00..0x11D5F,
287
+ "Gunjala_Gondi" => 0x11D60..0x11DAF,
288
+ "Makasar" => 0x11EE0..0x11EFF,
289
+ "Kawi" => 0x11F00..0x11F5F,
290
+ "Lisu_Supplement" => 0x11FB0..0x11FBF,
291
+ "Tamil_Supplement" => 0x11FC0..0x11FFF,
292
+ "Cuneiform" => 0x12000..0x123FF,
293
+ "Cuneiform_Numbers_and_Punctuation" => 0x12400..0x1247F,
294
+ "Early_Dynastic_Cuneiform" => 0x12480..0x1254F,
295
+ "Cypro-Minoan" => 0x12F90..0x12FFF,
296
+ "Egyptian_Hieroglyphs" => 0x13000..0x1342F,
297
+ "Egyptian_Hieroglyph_Format_Controls" => 0x13430..0x1345F,
298
+ "Egyptian_Hieroglyphs_Extended-A" => 0x13460..0x143FF,
299
+ "Anatolian_Hieroglyphs" => 0x14400..0x1467F,
300
+ "Bamum_Supplement" => 0x16800..0x16A3F,
301
+ "Mro" => 0x16A40..0x16A6F,
302
+ "Tangsa" => 0x16A70..0x16ACF,
303
+ "Bassa_Vah" => 0x16AD0..0x16AFF,
304
+ "Pahawh_Hmong" => 0x16B00..0x16B8F,
305
+ "Medefaidrin" => 0x16E40..0x16E9F,
306
+ "Miao" => 0x16F00..0x16F9F,
307
+ "Ideographic_Symbols_and_Punctuation" => 0x16FE0..0x16FFF,
308
+ "Tangut" => 0x17000..0x187FF,
309
+ "Tangut_Components" => 0x18800..0x18AFF,
310
+ "Khitan_Small_Script" => 0x18B00..0x18CFF,
311
+ "Tangut_Supplement" => 0x18D00..0x18D7F,
312
+ "Kana_Supplement" => 0x1B000..0x1B0FF,
313
+ "Kana_Extended-A" => 0x1B100..0x1B12F,
314
+ "Small_Kana_Extension" => 0x1B130..0x1B16F,
315
+ "Nushu" => 0x1B170..0x1B2FF,
316
+ "Duployan" => 0x1BC00..0x1BC9F,
317
+ "Shorthand_Format_Controls" => 0x1BCA0..0x1BCAF,
318
+ "Znamenny_Musical_Notation" => 0x1CF00..0x1CFCF,
319
+ "Byzantine_Musical_Symbols" => 0x1D000..0x1D0FF,
320
+ "Musical_Symbols" => 0x1D100..0x1D1FF,
321
+ "Ancient_Greek_Musical_Notation" => 0x1D200..0x1D24F,
322
+ "Kaktovik_Numerals" => 0x1D2C0..0x1D2FF,
323
+ "Tai_Xuan_Jing_Symbols" => 0x1D300..0x1D35F,
324
+ "Counting_Rod_Numerals" => 0x1D360..0x1D37F,
325
+ "Mathematical_Alphanumeric_Symbols" => 0x1D400..0x1D7FF,
326
+ "Sutton_SignWriting" => 0x1D800..0x1DAAF,
327
+ "Latin_Extended-G" => 0x1DF00..0x1DFFF,
328
+ "Glagolitic_Supplement" => 0x1E000..0x1E02F,
329
+ "Cyrillic_Extended-D" => 0x1E030..0x1E08F,
330
+ "Nyiakeng_Puachue_Hmong" => 0x1E100..0x1E14F,
331
+ "Toto" => 0x1E290..0x1E2BF,
332
+ "Wancho" => 0x1E2C0..0x1E2FF,
333
+ "Nag_Mundari" => 0x1E4D0..0x1E4FF,
334
+ "Ethiopian_Extended-B" => 0x1E7E0..0x1E7FF,
335
+ "Mende_Kikakui" => 0x1E800..0x1E8DF,
336
+ "Adlam" => 0x1E900..0x1E95F,
337
+ "Indic_Siyaq_Numbers" => 0x1EC70..0x1ECBF,
338
+ "Ottoman_Siyaq_Numbers" => 0x1ED00..0x1ED4F,
339
+ "Arabic_Mathematical_Alphabetic_Symbols" => 0x1EE00..0x1EEFF,
340
+ "Mahjong_Tiles" => 0x1F000..0x1F02F,
341
+ "Domino_Tiles" => 0x1F030..0x1F09F,
342
+ "Playing_Cards" => 0x1F0A0..0x1F0FF,
343
+ "Enclosed_Alphanumeric_Supplement" => 0x1F100..0x1F1FF,
344
+ "Enclosed_Ideographic_Supplement" => 0x1F200..0x1F2FF,
345
+ "Miscellaneous_Symbols_and_Pictographs" => 0x1F300..0x1F5FF,
346
+ "Emoticons" => 0x1F600..0x1F64F,
347
+ "Ornamental_Dingbats" => 0x1F650..0x1F67F,
348
+ "Transport_and_Map_Symbols" => 0x1F680..0x1F6FF,
349
+ "Alchemical_Symbols" => 0x1F700..0x1F77F,
350
+ "Geometric_Shapes_Extended" => 0x1F780..0x1F7FF,
351
+ "Supplemental_Arrows-C" => 0x1F800..0x1F8FF,
352
+ "Supplemental_Symbols_and_Pictographs" => 0x1F900..0x1F9FF,
353
+ "Chess_Symbols" => 0x1FA00..0x1FA6F,
354
+ "Symbols_and_Pictographs_Extended-A" => 0x1FA70..0x1FAFF,
355
+ "Symbols_for_Legacy_Computing" => 0x1FB00..0x1FBFF,
356
+
357
+ # SIP (Plane 2)
358
+ "CJK_Unified_Ideographs_Extension-B" => 0x20000..0x2A6DF,
359
+ "CJK_Unified_Ideographs_Extension-C" => 0x2A700..0x2B73F,
360
+ "CJK_Unified_Ideographs_Extension-D" => 0x2B740..0x2B81F,
361
+ "CJK_Unified_Ideographs_Extension-E" => 0x2B820..0x2CEAF,
362
+ "CJK_Unified_Ideographs_Extension-F" => 0x2CEB0..0x2EBEF,
363
+ "CJK_Unified_Ideographs_Extension-I" => 0x2EBF0..0x2EE5F,
364
+ "CJK_Compatibility_Ideographs_Supplement" => 0x2F800..0x2FA1F,
365
+
366
+ # TIP (Plane 3)
367
+ # Note: Unicode 16.0 also defines Extension-H (U+31350..U+323AF)
368
+ # which overlaps with Extension-G (U+30000..U+313AF). Omitted
369
+ # here to keep the no-overlap invariant clean; codepoints in
370
+ # the Extension-H range fall through to Extension-G, which is
371
+ # correct for every codepoint outside the rare overlap region.
372
+ "CJK_Unified_Ideographs_Extension-G" => 0x30000..0x313AF,
373
+
374
+ # SSP (Plane 14)
375
+ "Tags" => 0xE0000..0xE007F,
376
+ "Variation_Selectors_Supplement" => 0xE0100..0xE01EF,
377
+ }.freeze
378
+ # rubocop:enable Metrics/CollectionLiteralLength
379
+
380
+ # @param cp_map [Hash{Integer=>Object}] codepoint → donor label
381
+ # @param cap [Integer] max codepoints per partition
382
+ # @return [Blueprint]
383
+ def call(cp_map, cap: DEFAULT_CAP)
384
+ grouped = group_by_block(cp_map)
385
+ partitions = grouped.map do |label, entries|
386
+ if entries.size > cap
387
+ raise PartitionCapExceededError.new(
388
+ block_label: label,
389
+ actual: entries.size,
390
+ cap: cap,
391
+ )
392
+ end
393
+
394
+ Partition.new(
395
+ name: partition_name(label),
396
+ cps: entries.map(&:first),
397
+ donor_map: entries.to_h,
398
+ )
399
+ end
400
+ Blueprint.new(partitions: partitions)
401
+ end
402
+
403
+ private
404
+
405
+ def group_by_block(cp_map)
406
+ cp_map.each_with_object(Hash.new { |h, k| h[k] = [] }) do |(cp, label), h|
407
+ block_label = find_block_label(cp) || :other
408
+ h[block_label] << [cp, label]
409
+ end
410
+ end
411
+
412
+ # Linear scan is fine for ~340 entries at typical partition-time
413
+ # call volumes (once per Stitcher.run). If profiling shows it
414
+ # hot, swap for a sorted [start, end, label] array + binary
415
+ # search — the data structure can change without touching the
416
+ # public API.
417
+ def find_block_label(codepoint)
418
+ BLOCKS.find { |_label, range| range.cover?(codepoint) }&.first
419
+ end
420
+
421
+ def partition_name(label)
422
+ return :block_other if label == :other
423
+
424
+ :"block_#{label.downcase}"
425
+ end
426
+ end
427
+ end
428
+ end
429
+ end
@@ -0,0 +1,305 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ class Stitcher
5
+ module PartitionStrategy
6
+ # Partition codepoints by Unicode script property.
7
+ #
8
+ # Each non-empty script becomes one or more partitions. Scripts
9
+ # that overflow +cap+ are chunked (Han has 80k+ codepoints) —
10
+ # chunks are named +:script_<name>_a+, +:script_<name>_b+, etc.,
11
+ # matching ByPlane's sub-split convention.
12
+ #
13
+ # Codepoints whose script is unknown (unassigned blocks) fall
14
+ # into +:script_other+. Codepoints in the +Common+ script
15
+ # (digits, punctuation shared across scripts) and +Inherited+
16
+ # script (combining marks) get their own buckets — they're
17
+ # typically useful as standalone subfonts.
18
+ #
19
+ # The script data is derived from {ByBlock::BLOCKS}: each Unicode
20
+ # block maps to exactly one primary script (with a few exceptions
21
+ # that we resolve explicitly). This keeps the data DRY — the
22
+ # authoritative block list lives in one place.
23
+ class ByScript < Base
24
+ # Maps each Unicode block label to its primary script.
25
+ # Block labels match ByBlock::BLOCKS keys.
26
+ #
27
+ # Special scripts:
28
+ # - +:common+ — shared across scripts (digits, punctuation)
29
+ # - +:inherited+ — combining marks that inherit the base char's script
30
+ # - +:other+ — fallback for unlisted blocks
31
+ SCRIPT_OF_BLOCK = {
32
+ # Latin family
33
+ "Basic_Latin" => :common,
34
+ "Latin-1_Supplement" => :latin,
35
+ "Latin_Extended-A" => :latin,
36
+ "Latin_Extended-B" => :latin,
37
+ "Latin_Extended-C" => :latin,
38
+ "Latin_Extended-D" => :latin,
39
+ "Latin_Extended-E" => :latin,
40
+ "Latin_Extended-F" => :latin,
41
+ "Latin_Extended_Additional" => :latin,
42
+ "Phonetic_Extensions" => :latin,
43
+ "Phonetic_Extensions_Supplement" => :latin,
44
+ "Modifier_Tone_Letters" => :latin,
45
+ "Spacing_Modifier_Letters" => :common,
46
+ "Combining_Diacritical_Marks" => :inherited,
47
+ "Combining_Diacritical_Marks_Extended" => :inherited,
48
+ "Combining_Diacritical_Marks_Supplement" => :inherited,
49
+ "Combining_Diacritical_Marks_for_Symbols" => :inherited,
50
+ "Combining_Half_Marks" => :inherited,
51
+
52
+ # Greek
53
+ "Greek_and_Coptic" => :greek,
54
+ "Greek_Extended" => :greek,
55
+
56
+ # Cyrillic
57
+ "Cyrillic" => :cyrillic,
58
+ "Cyrillic_Supplement" => :cyrillic,
59
+ "Cyrillic_Extended-A" => :cyrillic,
60
+ "Cyrillic_Extended-B" => :cyrillic,
61
+ "Cyrillic_Extended-C" => :cyrillic,
62
+ "Cyrillic_Extended-D" => :cyrillic,
63
+
64
+ # Middle Eastern
65
+ "Armenian" => :armenian,
66
+ "Hebrew" => :hebrew,
67
+ "Arabic" => :arabic,
68
+ "Arabic_Supplement" => :arabic,
69
+ "Arabic_Extended-A" => :arabic,
70
+ "Arabic_Extended-B" => :arabic,
71
+ "Arabic_Extended-C" => :arabic,
72
+ "Arabic_Extended-D" => :arabic,
73
+ "Arabic_Presentation_Forms-A" => :arabic,
74
+ "Arabic_Presentation_Forms-B" => :arabic,
75
+ "Syriac" => :syriac,
76
+ "Syriac_Supplement" => :syriac,
77
+ "Thaana" => :thaana,
78
+ "Samaritan" => :samaritan,
79
+ "Mandaic" => :mandaic,
80
+
81
+ # Indic
82
+ "Devanagari" => :devanagari,
83
+ "Devanagari_Extended" => :devanagari,
84
+ "Bengali" => :bengali,
85
+ "Gurmukhi" => :gurmukhi,
86
+ "Gujarati" => :gujarati,
87
+ "Oriya" => :oriya,
88
+ "Tamil" => :tamil,
89
+ "Tamil_Supplement" => :tamil,
90
+ "Telugu" => :telugu,
91
+ "Kannada" => :kannada,
92
+ "Malayalam" => :malayalam,
93
+ "Sinhala" => :sinhala,
94
+ "Sinhala_Archaic_Numbers" => :sinhala,
95
+ "Vedic_Extensions" => :inherited,
96
+
97
+ # Southeast Asian
98
+ "Thai" => :thai,
99
+ "Lao" => :lao,
100
+ "Tibetan" => :tibetan,
101
+ "Myanmar" => :myanmar,
102
+ "Myanmar_Extended-A" => :myanmar,
103
+ "Myanmar_Extended-B" => :myanmar,
104
+ "Myanmar_Extended-C" => :myanmar,
105
+ "Khmer" => :khmer,
106
+ "Khmer_Symbols" => :khmer,
107
+ "Tagalog" => :tagalog,
108
+ "Hanunoo" => :hanunoo,
109
+ "Buhid" => :buhid,
110
+ "Tagbanwa" => :tagbanwa,
111
+
112
+ # Hangul
113
+ "Hangul_Jamo" => :hangul,
114
+ "Hangul_Compatibility_Jamo" => :hangul,
115
+ "Hangul_Jamo_Extended-A" => :hangul,
116
+ "Hangul_Jamo_Extended-B" => :hangul,
117
+ "Hangul_Syllables" => :hangul,
118
+
119
+ # CJK — Han + native Japanese / Korean
120
+ "CJK_Unified_Ideographs" => :han,
121
+ "CJK_Unified_Ideographs_Extension_A" => :han,
122
+ "CJK_Unified_Ideographs_Extension_B" => :han,
123
+ "CJK_Unified_Ideographs_Extension_C" => :han,
124
+ "CJK_Unified_Ideographs_Extension_D" => :han,
125
+ "CJK_Unified_Ideographs_Extension-E" => :han,
126
+ "CJK_Unified_Ideographs_Extension-F" => :han,
127
+ "CJK_Unified_Ideographs_Extension_G" => :han,
128
+ "CJK_Unified_Ideographs_Extension_H" => :han,
129
+ "CJK_Unified_Ideographs_Extension_I" => :han,
130
+ "CJK_Compatibility_Ideographs" => :han,
131
+ "CJK_Compatibility_Ideographs_Supplement" => :han,
132
+ "CJK_Radicals_Supplement" => :han,
133
+ "Kangxi_Radicals" => :han,
134
+ "CJK_Symbols_and_Punctuation" => :common,
135
+ "CJK_Compatibility" => :han,
136
+ "CJK_Compatibility_Forms" => :common,
137
+ "CJK_Strokes" => :han,
138
+ "Ideographic_Description_Characters" => :common,
139
+ "Enclosed_CJK_Letters_and_Months" => :common,
140
+ "Hiragana" => :hiragana,
141
+ "Katakana" => :katakana,
142
+ "Katakana_Phonetic_Extensions" => :katakana,
143
+ "Kana_Supplement" => :hiragana,
144
+ "Kana_Extended-A" => :hiragana,
145
+ "Kana_Extended-B" => :hiragana,
146
+ "Small_Kana_Extension" => :hiragana,
147
+ "Halfwidth_and_Fullwidth_Forms" => :common,
148
+ "Vertical_Forms" => :common,
149
+ "Ideographic_Symbols_and_Punctuation" => :common,
150
+
151
+ # Other scripts (single-block each)
152
+ "Ethiopic" => :ethiopic,
153
+ "Ethiopic_Supplement" => :ethiopic,
154
+ "Ethiopic_Extended" => :ethiopic,
155
+ "Ethiopic_Extended-A" => :ethiopic,
156
+ "Ethiopian_Extended-B" => :ethiopic,
157
+ "Cherokee" => :cherokee,
158
+ "Cherokee_Supplement" => :cherokee,
159
+ "Unified_Canadian_Aboriginal_Syllabics" => :canadian_aboriginal,
160
+ "Unified_Canadian_Aboriginal_Syllabics_Extended" => :canadian_aboriginal,
161
+ "Unified_Canadian_Aboriginal_Syllabics_Extended-A" => :canadian_aboriginal,
162
+ "Ogham" => :ogham,
163
+ "Runic" => :runic,
164
+ "Glagolitic" => :glagolitic,
165
+ "Glagolitic_Supplement" => :glagolitic,
166
+ "Tifinagh" => :tifinagh,
167
+ "Georgian" => :georgian,
168
+ "Georgian_Supplement" => :georgian,
169
+ "Georgian_Extended" => :georgian,
170
+ "Mongolian" => :mongolian,
171
+ "Mongolian_Supplement" => :mongolian,
172
+ "Limbu" => :limbu,
173
+ "Tai_Le" => :tai_le,
174
+ "New_Tai_Lue" => :new_tai_lue,
175
+ "Tai_Tham" => :tai_tham,
176
+ "Tai_Viet" => :tai_viet,
177
+ "Ol_Chiki" => :ol_chiki,
178
+ "Bopomofo" => :bopomofo,
179
+ "Bopomofo_Extended" => :bopomofo,
180
+ "Yi_Syllables" => :yi,
181
+ "Yi_Radicals" => :yi,
182
+ "Vai" => :vai,
183
+ "Bamum" => :bamum,
184
+ "Bamum_Supplement" => :bamum,
185
+ "Syloti_Nagri" => :syloti_nagri,
186
+ "Phags-pa" => :phags_pa,
187
+ "Saurashtra" => :saurashtra,
188
+ "Kayah_Li" => :kayah_li,
189
+ "Rejang" => :rejang,
190
+ "Javanese" => :javanese,
191
+ "Cham" => :cham,
192
+ "Lepcha" => :lepcha,
193
+ "Meetei_Mayek" => :meetei_mayek,
194
+ "Meetei_Mayek_Extensions" => :meetei_mayek,
195
+ "Lisu" => :lisu,
196
+ "Lisu_Supplement" => :lisu,
197
+ "Sundanese" => :sundanese,
198
+ "Sundanese_Supplement" => :sundanese,
199
+ "Batak" => :batak,
200
+ "Buginese" => :buginese,
201
+ "Ahom" => :ahom,
202
+ "Dogra" => :dogra,
203
+ "Tulu-Tigalari" => :tulu_tigalari,
204
+ "Grantha" => :grantha,
205
+ "Newa" => :newa,
206
+ "Tirhuta" => :tirhuta,
207
+ "Siddham" => :siddham,
208
+ "Modi" => :modi,
209
+ "Sharada" => :sharada,
210
+ "Takri" => :takri,
211
+ "Kaithi" => :kaithi,
212
+ "Mahajani" => :mahajani,
213
+ "Multani" => :multani,
214
+ "Khudawadi" => :khudawadi,
215
+ "Nandinagari" => :nandinagari,
216
+ "Nushu" => :nushu,
217
+ "Wancho" => :wancho,
218
+ "Toto" => :toto,
219
+ "Nag_Mundari" => :nag_mundari,
220
+
221
+ # Numerals / symbols (Common)
222
+ "Superscripts_and_Subscripts" => :common,
223
+ "Number_Forms" => :common,
224
+ "Currency_Symbols" => :common,
225
+ "Letterlike_Symbols" => :common,
226
+ "Arrows" => :common,
227
+ "Mathematical_Operators" => :common,
228
+ "Miscellaneous_Technical" => :common,
229
+ "Control_Pictures" => :common,
230
+ "Optical_Character_Recognition" => :common,
231
+ "Enclosed_Alphanumerics" => :common,
232
+ "Box_Drawing" => :common,
233
+ "Block_Elements" => :common,
234
+ "Geometric_Shapes" => :common,
235
+ "Miscellaneous_Symbols" => :common,
236
+ "Dingbats" => :common,
237
+ "Miscellaneous_Mathematical_Symbols-A" => :common,
238
+ "Miscellaneous_Mathematical_Symbols-B" => :common,
239
+ "Supplemental_Arrows-A" => :common,
240
+ "Supplemental_Arrows-B" => :common,
241
+ "Supplemental_Arrows-C" => :common,
242
+ "Supplemental_Mathematical_Operators" => :common,
243
+ "Miscellaneous_Symbols_and_Arrows" => :common,
244
+ "Braille_Patterns" => :braille,
245
+ "General_Punctuation" => :common,
246
+ "Supplemental_Punctuation" => :common,
247
+ "Alphabetic_Presentation_Forms" => :common,
248
+ "Specials" => :common,
249
+ "Variation_Selectors" => :inherited,
250
+ "Variation_Selectors_Supplement" => :inherited,
251
+ "Tags" => :common,
252
+ }.freeze
253
+
254
+ # @param cp_map [Hash{Integer=>Object}] codepoint → donor label
255
+ # @param cap [Integer] max codepoints per partition
256
+ # @return [Blueprint]
257
+ def call(cp_map, cap: DEFAULT_CAP)
258
+ grouped = group_by_script(cp_map)
259
+ partitions = []
260
+
261
+ grouped.each do |script, entries|
262
+ chunks(entries, cap).each_with_index do |chunk, idx|
263
+ partitions << Partition.new(
264
+ name: partition_name(script, idx),
265
+ cps: chunk.map(&:first),
266
+ donor_map: chunk.to_h,
267
+ )
268
+ end
269
+ end
270
+
271
+ Blueprint.new(partitions: partitions)
272
+ end
273
+
274
+ private
275
+
276
+ def group_by_script(cp_map)
277
+ cp_map.each_with_object(Hash.new { |h, k| h[k] = [] }) do |(cp, label), h|
278
+ script = script_of_codepoint(cp)
279
+ h[script] << [cp, label]
280
+ end
281
+ end
282
+
283
+ def script_of_codepoint(cp)
284
+ block_label = ByBlock::BLOCKS.find { |_label, range| range.cover?(cp) }&.first
285
+ return :other unless block_label
286
+
287
+ SCRIPT_OF_BLOCK[block_label] || :other
288
+ end
289
+
290
+ def chunks(entries, cap)
291
+ entries.each_slice(cap).to_a
292
+ end
293
+
294
+ # First partition per script has no suffix; subsequent ones
295
+ # get _a, _b, ... matching ByPlane's sub-split convention.
296
+ def partition_name(script, idx)
297
+ return :"script_#{script}" if idx.zero?
298
+
299
+ suffix = ("a".ord + idx - 1).chr
300
+ :"script_#{script}_#{suffix}"
301
+ end
302
+ end
303
+ end
304
+ end
305
+ end
@@ -17,6 +17,8 @@ module Fontisan
17
17
  autoload :Blueprint, "fontisan/stitcher/partition_strategy/blueprint"
18
18
  autoload :Partition, "fontisan/stitcher/partition_strategy/partition"
19
19
  autoload :ByPlane, "fontisan/stitcher/partition_strategy/by_plane"
20
+ autoload :ByBlock, "fontisan/stitcher/partition_strategy/by_block"
21
+ autoload :ByScript, "fontisan/stitcher/partition_strategy/by_script"
20
22
  end
21
23
  end
22
24
  end
@@ -417,12 +417,130 @@ module Fontisan
417
417
  # Creates a minimal cmap table with format 4 subtable for BMP
418
418
  # and format 12 for supplementary planes if needed.
419
419
  #
420
- # @param mappings [Hash<Integer, Integer>] Char code => glyph ID
420
+ # @param mappings [Hash<Integer, Integer>] Char code => new glyph ID
421
421
  # @return [String] Binary cmap data
422
- def build_cmap_binary(_mappings)
423
- # For now, pass through original cmap
424
- # TODO: Implement proper cmap building
425
- font.table_data["cmap"]
422
+ def build_cmap_binary(mappings)
423
+ # Edge case: empty mappings (e.g., block with no covered chars).
424
+ # Emit a minimal valid cmap with one format 4 subtable mapping
425
+ # only U+0000 → .notdef so the table isn't empty.
426
+ mappings = { 0 => 0 } if mappings.empty?
427
+
428
+ bmp = mappings.select { |cp, _| cp <= 0xFFFF }
429
+ supp = mappings.select { |cp, _| cp > 0xFFFF }
430
+
431
+ subtables = []
432
+ records = [] # [platform_id, encoding_id, subtable_index]
433
+
434
+ unless bmp.empty?
435
+ subtables << build_cmap_format_4(bmp)
436
+ idx = subtables.size - 1
437
+ records << [3, 1, idx] # Windows BMP
438
+ records << [0, 3, idx] # Unicode BMP
439
+ end
440
+
441
+ unless supp.empty?
442
+ # Format 12 covers both BMP and supplementary — include all
443
+ # mappings so a single subtable covers the full range.
444
+ subtables << build_cmap_format_12(mappings)
445
+ idx = subtables.size - 1
446
+ records << [3, 10, idx] # Windows UCS-4
447
+ records << [0, 4, idx] # Unicode full
448
+ end
449
+
450
+ # Header: version (uint16) + numTables (uint16)
451
+ num_tables = records.size
452
+ header = [0, num_tables].pack("nn")
453
+
454
+ # Encoding records start immediately after the header.
455
+ # Each record is 8 bytes; subtables follow.
456
+ subtable_base = 4 + (8 * num_tables)
457
+
458
+ offsets = []
459
+ running = subtable_base
460
+ subtables.each do |st|
461
+ offsets << running
462
+ running += st.bytesize
463
+ end
464
+
465
+ record_bytes = +""
466
+ records.each do |pid, eid, idx|
467
+ record_bytes << [pid, eid, offsets[idx]].pack("nnN")
468
+ end
469
+
470
+ header + record_bytes + subtables.join
471
+ end
472
+
473
+ # Format 4 subtable: segment-mapping with idDelta, suitable for
474
+ # BMP codepoints (U+0000..U+FFFF). Builds compact segments where
475
+ # consecutive codepoints map to consecutive glyph IDs.
476
+ def build_cmap_format_4(bmp_mappings)
477
+ segments = coalesce_segments(bmp_mappings)
478
+ # Mandatory final segment: U+FFFF → gid 0 (per OpenType spec).
479
+ segments << { start_cp: 0xFFFF, end_cp: 0xFFFF, start_gid: 0 }
480
+
481
+ seg_count = segments.size
482
+ seg_count_x2 = seg_count * 2
483
+ search_range = 2**Math.log2(seg_count).floor * 2
484
+ search_range = 2 if search_range < 2
485
+ entry_selector = Math.log2(search_range / 2).to_i
486
+ range_shift = seg_count_x2 - search_range
487
+
488
+ end_codes = segments.map { |s| s[:end_cp] }
489
+ start_codes = segments.map { |s| s[:start_cp] }
490
+ # idDelta is int16 stored as uint16 (two's complement). For a
491
+ # sequential segment, idDelta = (start_gid - start_cp) & 0xFFFF.
492
+ id_deltas = segments.map { |s| (s[:start_gid] - s[:start_cp]) & 0xFFFF }
493
+ id_range_offsets = [0] * seg_count
494
+
495
+ subtable = +""
496
+ subtable << [4, 0, 0, seg_count_x2,
497
+ search_range, entry_selector, range_shift].pack("n*")
498
+ subtable << end_codes.pack("n*")
499
+ subtable << [0].pack("n") # reservedPad
500
+ subtable << start_codes.pack("n*")
501
+ subtable << id_deltas.pack("n*")
502
+ subtable << id_range_offsets.pack("n*")
503
+
504
+ # Patch the length field (was placeholder 0).
505
+ subtable[2, 2] = [subtable.bytesize].pack("n")
506
+ subtable
507
+ end
508
+
509
+ # Format 12 subtable: segmented coverage for full Unicode range.
510
+ # Simpler than format 4 — just (start_char, end_char, start_gid)
511
+ # triples with no delta/offset indirection.
512
+ def build_cmap_format_12(all_mappings)
513
+ groups = coalesce_segments(all_mappings)
514
+ num_groups = groups.size
515
+
516
+ subtable = +""
517
+ subtable << [12, 0, 0, 0, num_groups].pack("nnNNN")
518
+ groups.each do |g|
519
+ subtable << [g[:start_cp], g[:end_cp], g[:start_gid]].pack("NNN")
520
+ end
521
+
522
+ # Patch the length field (was placeholder 0). Total length is
523
+ # 16-byte header + 12 bytes per group.
524
+ subtable[4, 4] = [subtable.bytesize].pack("N")
525
+ subtable
526
+ end
527
+
528
+ # Group codepoints into consecutive runs where both codepoint AND
529
+ # glyph ID are sequential. Each run becomes one segment/group.
530
+ def coalesce_segments(mappings)
531
+ sorted = mappings.sort_by { |cp, _| cp }
532
+ segments = []
533
+ current = nil
534
+ sorted.each do |cp, gid|
535
+ if current && cp == current[:end_cp] + 1 && gid == current[:start_gid] + (cp - current[:start_cp])
536
+ current[:end_cp] = cp
537
+ else
538
+ segments << current if current
539
+ current = { start_cp: cp, end_cp: cp, start_gid: gid }
540
+ end
541
+ end
542
+ segments << current if current
543
+ segments
426
544
  end
427
545
 
428
546
  # Build post table version 3.0 (no glyph names)
@@ -0,0 +1,162 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open-uri"
4
+ require "net/http"
5
+ require "fileutils"
6
+
7
+ module Fontisan
8
+ # Tasks supporting the developer workflow: fixture downloads, etc.
9
+ # Lives under its own namespace so Rakefiles and other tooling can
10
+ # load just the task plumbing without pulling in the full fontisan
11
+ # stack (BinData tables, UFO, etc.).
12
+ module Tasks
13
+ # Downloads a single fixture file with retry on transient network
14
+ # failures. Used by `rake fixtures:download` so a single CDN blip
15
+ # (5xx, connection reset, OpenTimeout) doesn't sink a fresh
16
+ # checkout. Permanent failures (404, malformed URL) surface
17
+ # immediately.
18
+ #
19
+ # The downloader is a focused class, not a procedural Rakefile
20
+ # patch, so the retry logic is unit-testable in isolation.
21
+ #
22
+ # @example
23
+ # Fontisan::Tasks::FixtureDownloader.new(
24
+ # url: "https://github.com/.../font.ttf",
25
+ # destination: "spec/fixtures/font.ttf",
26
+ # ).call
27
+ class FixtureDownloader
28
+ RETRIABLE_ERRORS = [
29
+ Net::OpenTimeout,
30
+ Net::ReadTimeout,
31
+ Errno::ECONNRESET,
32
+ Errno::ECONNREFUSED,
33
+ Errno::EHOSTUNREACH,
34
+ Errno::ETIMEDOUT,
35
+ EOFError,
36
+ IOError,
37
+ ].freeze
38
+
39
+ # 5xx HTTP responses are transient server errors worth retrying.
40
+ # 4xx are permanent (404, 403) and must fail fast.
41
+ RETRIABLE_HTTP_STATUSES = (500..599)
42
+
43
+ DEFAULT_MAX_RETRIES = 3
44
+ DEFAULT_BASE_BACKOFF = 0.5 # seconds; doubles per attempt
45
+
46
+ # Error raised after exhausting all retries. Carries the last
47
+ # underlying exception so callers can log the root cause.
48
+ class Error < StandardError
49
+ attr_reader :last_error
50
+
51
+ def initialize(url:, attempts:, last_error:)
52
+ @last_error = last_error
53
+ super("Failed to download #{url} after #{attempts} attempts: " \
54
+ "#{last_error.class}: #{last_error.message}")
55
+ end
56
+ end
57
+
58
+ attr_reader :url, :destination, :max_retries, :base_backoff, :sleep_method
59
+
60
+ # @param url [String] source URL.
61
+ # @param destination [String] path to write bytes to. Parent dir
62
+ # is auto-created.
63
+ # @param max_retries [Integer] total attempts including the
64
+ # first. 3 means: try, retry, retry.
65
+ # @param base_backoff [Float] seconds to sleep before the first
66
+ # retry. Doubles per attempt.
67
+ # @param sleep_method [#call] injectable sleep (for tests).
68
+ # Defaults to Kernel.sleep.
69
+ def initialize(url:, destination:, max_retries: DEFAULT_MAX_RETRIES,
70
+ base_backoff: DEFAULT_BASE_BACKOFF, sleep_method: method(:sleep))
71
+ @url = url
72
+ @destination = destination
73
+ @max_retries = max_retries
74
+ @base_backoff = base_backoff
75
+ @sleep_method = sleep_method
76
+ end
77
+
78
+ # Performs the download. Returns the destination path on
79
+ # success. Raises {Error} after exhausting retries.
80
+ #
81
+ # @return [String] destination path
82
+ # @raise [Error]
83
+ def call
84
+ attempts = 0
85
+ nil
86
+
87
+ begin
88
+ attempts += 1
89
+ fetch_to_destination
90
+ destination
91
+ rescue StandardError => e
92
+ e
93
+ raise if permanent_failure?(e)
94
+ raise Error.new(url: url, attempts: attempts, last_error: e) if attempts >= max_retries
95
+
96
+ backoff = base_backoff * (2**(attempts - 1))
97
+ sleep_method.call(backoff)
98
+ retry
99
+ end
100
+ end
101
+
102
+ private
103
+
104
+ def fetch_to_destination
105
+ FileUtils.mkdir_p(File.dirname(destination))
106
+
107
+ # IO.copy_stream avoids loading the whole response into memory
108
+ # and is more Windows-compatible than remote.read + File.binwrite.
109
+ # URLs come from FixtureFonts config (version-controlled), not
110
+ # user input — same trust model as the previous inline URI.open
111
+ # call in the Rakefile.
112
+ #
113
+ # Parsing with URI.parse first satisfies CodeQL's "open with
114
+ # non-constant value" check: any string that isn't a valid URI
115
+ # raises URI::InvalidURIError before OpenURI can dispatch on
116
+ # it. The parsed URI's .open is OpenURI's standard entry.
117
+ # rubocop:disable Security/Open
118
+ URI.parse(url).open(open_uri_options) do |remote|
119
+ File.open(destination, "wb") do |file|
120
+ IO.copy_stream(remote, file)
121
+ end
122
+ end
123
+ # rubocop:enable Security/Open
124
+ end
125
+
126
+ # `open-uri` follows redirects by default and surfaces HTTP
127
+ # errors as `OpenURI::HTTPError` whose `io.status` is the `[code,
128
+ # message]` array. We re-raise non-retriable 4xx as
129
+ # `permanent-failure`-tagged exceptions so the retry loop exits.
130
+ def open_uri_options
131
+ {
132
+ "User-Agent" => "fontisan-fixtures/1.0",
133
+ redirect: true,
134
+ open_timeout: 30,
135
+ read_timeout: 120,
136
+ }
137
+ end
138
+
139
+ def permanent_failure?(error)
140
+ case error
141
+ when OpenURI::HTTPError
142
+ status = parse_http_status(error)
143
+ status && !RETRIABLE_HTTP_STATUSES.cover?(status)
144
+ else
145
+ RETRIABLE_ERRORS.none? { |klass| error.is_a?(klass) }
146
+ end
147
+ end
148
+
149
+ def parse_http_status(error)
150
+ io = error.io
151
+ return nil unless io
152
+
153
+ status = io.status
154
+ return nil unless status
155
+
156
+ status.first.to_i
157
+ rescue StandardError
158
+ nil
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ # Tasks supporting the developer workflow: fixture downloads, etc.
5
+ # Lives under its own namespace so Rakefiles and other tooling can
6
+ # load just the task plumbing without pulling in the full fontisan
7
+ # stack (BinData tables, UFO, etc.).
8
+ module Tasks
9
+ autoload :FixtureDownloader, "fontisan/tasks/fixture_downloader"
10
+ end
11
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Fontisan
4
- VERSION = "0.4.10"
4
+ VERSION = "0.4.11"
5
5
  end
data/lib/fontisan.rb CHANGED
@@ -118,6 +118,7 @@ module Fontisan
118
118
  autoload :SfntTable, "fontisan/sfnt_table"
119
119
  autoload :Stitcher, "fontisan/stitcher"
120
120
  autoload :StitcherCli, "fontisan/stitcher_cli"
121
+ autoload :Tasks, "fontisan/tasks"
121
122
  autoload :TrueTypeCollection, "fontisan/true_type_collection"
122
123
  autoload :TrueTypeFont, "fontisan/true_type_font"
123
124
  autoload :TrueTypeFontExtensions, "fontisan/true_type_font_extensions"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fontisan
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.10
4
+ version: 0.4.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-07-04 00:00:00.000000000 Z
11
+ date: 2026-07-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -440,7 +440,9 @@ files:
440
440
  - lib/fontisan/stitcher/partition_strategy.rb
441
441
  - lib/fontisan/stitcher/partition_strategy/base.rb
442
442
  - lib/fontisan/stitcher/partition_strategy/blueprint.rb
443
+ - lib/fontisan/stitcher/partition_strategy/by_block.rb
443
444
  - lib/fontisan/stitcher/partition_strategy/by_plane.rb
445
+ - lib/fontisan/stitcher/partition_strategy/by_script.rb
444
446
  - lib/fontisan/stitcher/partition_strategy/partition.rb
445
447
  - lib/fontisan/stitcher/selector.rb
446
448
  - lib/fontisan/stitcher/selector/codepoints.rb
@@ -547,6 +549,8 @@ files:
547
549
  - lib/fontisan/tables/svg.rb
548
550
  - lib/fontisan/tables/variation_common.rb
549
551
  - lib/fontisan/tables/vvar.rb
552
+ - lib/fontisan/tasks.rb
553
+ - lib/fontisan/tasks/fixture_downloader.rb
550
554
  - lib/fontisan/true_type_collection.rb
551
555
  - lib/fontisan/true_type_font.rb
552
556
  - lib/fontisan/true_type_font_extensions.rb