fontisan 0.4.10 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +53 -39
- data/lib/fontisan/stitcher/partition_strategy/by_block.rb +429 -0
- data/lib/fontisan/stitcher/partition_strategy/by_script.rb +305 -0
- data/lib/fontisan/stitcher/partition_strategy.rb +2 -0
- data/lib/fontisan/subset/table_subsetter.rb +123 -5
- data/lib/fontisan/tasks/fixture_downloader.rb +162 -0
- data/lib/fontisan/tasks.rb +11 -0
- data/lib/fontisan/version.rb +1 -1
- data/lib/fontisan.rb +1 -0
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1ed3f50509fb7f223b2cdcfdf7e59345e83b5ca2d11b2a0806574651687c4582
|
|
4
|
+
data.tar.gz: ccb6cabe8c99dfcc060acc7f46d26efd9961b5384365ab404552dad539dc4555
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b977d4f9ac972020514daca43a14917784c713258f73223597c8a48dd2b32fcf3ccb9adbe2bd343db7ac43e63eaba8943a883580ec3c7bb7dec23f6a0f1e1fe9
|
|
7
|
+
data.tar.gz: 5c17de9dc3c60f44e472e94c19a1ca33435805ed81f261cc357dfcfd6d197f3109531ae8ad4b3a842e52f4f04de19501c0e860793212b87e874b63f01d99d8f3
|
data/Rakefile
CHANGED
|
@@ -12,24 +12,25 @@ RuboCop::RakeTask.new
|
|
|
12
12
|
namespace :fixtures do
|
|
13
13
|
# Load centralized fixture configuration
|
|
14
14
|
require_relative "spec/support/fixture_fonts"
|
|
15
|
+
require "fontisan/tasks"
|
|
15
16
|
|
|
16
17
|
# Helper method to download a single file
|
|
17
18
|
def download_single_file(name, url, target_path)
|
|
18
|
-
require "open-uri"
|
|
19
|
-
|
|
20
19
|
puts "[fixtures:download] Downloading #{name}..."
|
|
21
|
-
FileUtils.mkdir_p(File.dirname(target_path))
|
|
22
20
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
21
|
+
Fontisan::Tasks::FixtureDownloader.new(
|
|
22
|
+
url: url,
|
|
23
|
+
destination: target_path,
|
|
24
|
+
).call
|
|
26
25
|
|
|
27
26
|
puts "[fixtures:download] #{name} downloaded successfully"
|
|
27
|
+
rescue Fontisan::Tasks::FixtureDownloader::Error => e
|
|
28
|
+
warn "[fixtures:download] #{name} failed after retries: #{e.message}"
|
|
29
|
+
raise
|
|
28
30
|
end
|
|
29
31
|
|
|
30
32
|
# Helper method to download and extract a font archive
|
|
31
33
|
def download_font(name, url, target_dir)
|
|
32
|
-
require "open-uri"
|
|
33
34
|
require "zip"
|
|
34
35
|
|
|
35
36
|
puts "[fixtures:download] Downloading #{name}..."
|
|
@@ -39,45 +40,58 @@ namespace :fixtures do
|
|
|
39
40
|
temp_path = File.join(Dir.tmpdir,
|
|
40
41
|
"fontisan_#{name}_#{Process.pid}_#{rand(10000)}.zip")
|
|
41
42
|
|
|
42
|
-
# Download using IO.copy_stream for better Windows compatibility
|
|
43
|
-
URI.open(url, "rb") do |remote|
|
|
44
|
-
File.open(temp_path, "wb") do |file|
|
|
45
|
-
IO.copy_stream(remote, file)
|
|
46
|
-
end
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
puts "[fixtures:download] Extracting #{name}..."
|
|
50
|
-
|
|
51
|
-
# Open zip file and ensure it's fully closed before we're done
|
|
52
|
-
zip_file = Zip::File.open(temp_path)
|
|
53
43
|
begin
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
44
|
+
Fontisan::Tasks::FixtureDownloader.new(
|
|
45
|
+
url: url,
|
|
46
|
+
destination: temp_path,
|
|
47
|
+
).call
|
|
48
|
+
|
|
49
|
+
puts "[fixtures:download] Extracting #{name}..."
|
|
50
|
+
|
|
51
|
+
# Open zip file and ensure it's fully closed before we're done
|
|
52
|
+
zip_file = Zip::File.open(temp_path)
|
|
53
|
+
begin
|
|
54
|
+
zip_file.each do |entry|
|
|
55
|
+
# Skip macOS metadata files and directories
|
|
56
|
+
next if entry.name.start_with?("__MACOSX/") || entry.name.include?("/._")
|
|
57
|
+
next if entry.directory?
|
|
58
|
+
|
|
59
|
+
# Ensure entry.name is relative by stripping leading slashes
|
|
60
|
+
relative_name = entry.name.sub(%r{^/+}, "")
|
|
61
|
+
|
|
62
|
+
dest_path = File.join(target_dir, relative_name)
|
|
63
|
+
FileUtils.mkdir_p(File.dirname(dest_path))
|
|
64
|
+
|
|
65
|
+
# Skip if file already exists
|
|
66
|
+
next if File.exist?(dest_path)
|
|
67
|
+
|
|
68
|
+
# Write the file content directly using binary mode
|
|
69
|
+
File.open(dest_path, "wb") do |file|
|
|
70
|
+
IO.copy_stream(entry.get_input_stream, file)
|
|
71
|
+
end
|
|
71
72
|
end
|
|
73
|
+
ensure
|
|
74
|
+
# Explicitly close the zip file to release file handle on Windows
|
|
75
|
+
zip_file&.close
|
|
72
76
|
end
|
|
73
77
|
ensure
|
|
74
|
-
#
|
|
75
|
-
|
|
78
|
+
# Clean up the temp zip explicitly so the temp dir doesn't fill
|
|
79
|
+
# up on repeated runs. On Windows the just-closed zip file
|
|
80
|
+
# handle can briefly hold a lock that surfaces as
|
|
81
|
+
# Errno::EACCES; swallow that one error so the rake task can
|
|
82
|
+
# complete (OS will sweep the temp file later).
|
|
83
|
+
begin
|
|
84
|
+
File.delete(temp_path) if File.exist?(temp_path)
|
|
85
|
+
rescue Errno::EACCES
|
|
86
|
+
warn "[fixtures:download] could not delete temp zip #{temp_path}; " \
|
|
87
|
+
"OS will clean it up"
|
|
88
|
+
end
|
|
76
89
|
end
|
|
77
90
|
|
|
78
|
-
# Temp file left in Dir.tmpdir - OS will clean it up automatically
|
|
79
|
-
|
|
80
91
|
puts "[fixtures:download] #{name} downloaded successfully"
|
|
92
|
+
rescue Fontisan::Tasks::FixtureDownloader::Error => e
|
|
93
|
+
warn "[fixtures:download] #{name} failed after retries: #{e.message}"
|
|
94
|
+
raise
|
|
81
95
|
rescue LoadError => e
|
|
82
96
|
warn "[fixtures:download] Error: Required gem not installed. Please run: gem install rubyzip"
|
|
83
97
|
raise e
|
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Fontisan
|
|
4
|
+
class Stitcher
|
|
5
|
+
module PartitionStrategy
|
|
6
|
+
# Partition codepoints by Unicode Blocks.txt block.
|
|
7
|
+
#
|
|
8
|
+
# Each non-empty Unicode block becomes one partition. If a single
|
|
9
|
+
# block alone exceeds +cap+, raises PartitionCapExceededError —
|
|
10
|
+
# every block is treated as atomic because there is no finer
|
|
11
|
+
# Unicode-defined boundary inside a block. (Callers who need to
|
|
12
|
+
# split a block further must use ByPlane with explicit carve-outs
|
|
13
|
+
# or implement a custom partitioner.)
|
|
14
|
+
#
|
|
15
|
+
# Partition names follow the canonical Unicode block name with
|
|
16
|
+
# spaces replaced by underscores and a +block_+ prefix:
|
|
17
|
+
#
|
|
18
|
+
# "Basic Latin" => :block_basic_latin
|
|
19
|
+
# "CJK Unified Ideographs" => :block_cjk_unified_ideographs
|
|
20
|
+
#
|
|
21
|
+
# Codepoints not covered by any block in {BLOCKS} (unassigned or
|
|
22
|
+
# in a block omitted from this list) fall into +:block_other+.
|
|
23
|
+
class ByBlock < Base
|
|
24
|
+
# Unicode 16.0 block ranges. Source: Unicode Blocks.txt.
|
|
25
|
+
# Covers all assigned blocks in BMP, SMP, SIP, TIP, and SSP.
|
|
26
|
+
# Unassigned planes (4..13) are omitted — codepoints there
|
|
27
|
+
# fall into +:block_other+, which is the right behavior for
|
|
28
|
+
# partitioning fonts that target current Unicode.
|
|
29
|
+
#
|
|
30
|
+
# The data is inlined (rather than loaded from an external
|
|
31
|
+
# file) so the partitioner is self-contained: no YAML load
|
|
32
|
+
# at startup, no data file to ship. If the list ever needs to
|
|
33
|
+
# be data-driven, swap BLOCKS for a CSV/YAML loader behind
|
|
34
|
+
# the same constant — callers don't care about the source
|
|
35
|
+
# (OCP).
|
|
36
|
+
# rubocop:disable Metrics/CollectionLiteralLength
|
|
37
|
+
BLOCKS = {
|
|
38
|
+
# BMP (Plane 0)
|
|
39
|
+
"Basic_Latin" => 0x0000..0x007F,
|
|
40
|
+
"Latin-1_Supplement" => 0x0080..0x00FF,
|
|
41
|
+
"Latin_Extended-A" => 0x0100..0x017F,
|
|
42
|
+
"Latin_Extended-B" => 0x0180..0x024F,
|
|
43
|
+
"IPA_Extensions" => 0x0250..0x02AF,
|
|
44
|
+
"Spacing_Modifier_Letters" => 0x02B0..0x02FF,
|
|
45
|
+
"Combining_Diacritical_Marks" => 0x0300..0x036F,
|
|
46
|
+
"Greek_and_Coptic" => 0x0370..0x03FF,
|
|
47
|
+
"Cyrillic" => 0x0400..0x04FF,
|
|
48
|
+
"Cyrillic_Supplement" => 0x0500..0x052F,
|
|
49
|
+
"Armenian" => 0x0530..0x058F,
|
|
50
|
+
"Hebrew" => 0x0590..0x05FF,
|
|
51
|
+
"Arabic" => 0x0600..0x06FF,
|
|
52
|
+
"Syriac" => 0x0700..0x074F,
|
|
53
|
+
"Arabic_Supplement" => 0x0750..0x077F,
|
|
54
|
+
"Arabic_Extended-A" => 0x08A0..0x08FF,
|
|
55
|
+
"Arabic_Extended-B" => 0x0870..0x089F,
|
|
56
|
+
"Thaana" => 0x0780..0x07BF,
|
|
57
|
+
"NKo" => 0x07C0..0x07FF,
|
|
58
|
+
"Samaritan" => 0x0800..0x083F,
|
|
59
|
+
"Mandaic" => 0x0840..0x085F,
|
|
60
|
+
"Syriac_Supplement" => 0x0860..0x086F,
|
|
61
|
+
"Devanagari" => 0x0900..0x097F,
|
|
62
|
+
"Bengali" => 0x0980..0x09FF,
|
|
63
|
+
"Gurmukhi" => 0x0A00..0x0A7F,
|
|
64
|
+
"Gujarati" => 0x0A80..0x0AFF,
|
|
65
|
+
"Oriya" => 0x0B00..0x0B7F,
|
|
66
|
+
"Tamil" => 0x0B80..0x0BFF,
|
|
67
|
+
"Telugu" => 0x0C00..0x0C7F,
|
|
68
|
+
"Kannada" => 0x0C80..0x0CFF,
|
|
69
|
+
"Malayalam" => 0x0D00..0x0D7F,
|
|
70
|
+
"Sinhala" => 0x0D80..0x0DFF,
|
|
71
|
+
"Thai" => 0x0E00..0x0E7F,
|
|
72
|
+
"Lao" => 0x0E80..0x0EFF,
|
|
73
|
+
"Tibetan" => 0x0F00..0x0FFF,
|
|
74
|
+
"Myanmar" => 0x1000..0x109F,
|
|
75
|
+
"Georgian" => 0x10A0..0x10FF,
|
|
76
|
+
"Hangul_Jamo" => 0x1100..0x11FF,
|
|
77
|
+
"Ethiopic" => 0x1200..0x137F,
|
|
78
|
+
"Ethiopic_Supplement" => 0x1380..0x139F,
|
|
79
|
+
"Cherokee" => 0x13A0..0x13FF,
|
|
80
|
+
"Unified_Canadian_Aboriginal_Syllabics" => 0x1400..0x167F,
|
|
81
|
+
"Ogham" => 0x1680..0x169F,
|
|
82
|
+
"Runic" => 0x16A0..0x16FF,
|
|
83
|
+
"Tagalog" => 0x1700..0x171F,
|
|
84
|
+
"Hanunoo" => 0x1720..0x173F,
|
|
85
|
+
"Buhid" => 0x1740..0x175F,
|
|
86
|
+
"Tagbanwa" => 0x1760..0x177F,
|
|
87
|
+
"Khmer" => 0x1780..0x17FF,
|
|
88
|
+
"Mongolian" => 0x1800..0x18AF,
|
|
89
|
+
"Unified_Canadian_Aboriginal_Syllabics_Extended" => 0x18B0..0x18FF,
|
|
90
|
+
"Limbu" => 0x1900..0x194F,
|
|
91
|
+
"Tai_Le" => 0x1950..0x197F,
|
|
92
|
+
"New_Tai_Lue" => 0x1980..0x19DF,
|
|
93
|
+
"Khmer_Symbols" => 0x19E0..0x19FF,
|
|
94
|
+
"Buginese" => 0x1A00..0x1A1F,
|
|
95
|
+
"Tai_Tham" => 0x1A20..0x1AAF,
|
|
96
|
+
"Combining_Diacritical_Marks_Extended" => 0x1AB0..0x1AFF,
|
|
97
|
+
"Balinese" => 0x1B00..0x1B7F,
|
|
98
|
+
"Sundanese" => 0x1B80..0x1BBF,
|
|
99
|
+
"Batak" => 0x1BC0..0x1BFF,
|
|
100
|
+
"Lepcha" => 0x1C00..0x1C4F,
|
|
101
|
+
"Ol_Chiki" => 0x1C50..0x1C7F,
|
|
102
|
+
"Cyrillic_Extended-C" => 0x1C80..0x1C8F,
|
|
103
|
+
"Georgian_Extended" => 0x1C90..0x1CBF,
|
|
104
|
+
"Sundanese_Supplement" => 0x1CC0..0x1CCF,
|
|
105
|
+
"Vedic_Extensions" => 0x1CD0..0x1CFF,
|
|
106
|
+
"Phonetic_Extensions" => 0x1D00..0x1D7F,
|
|
107
|
+
"Phonetic_Extensions_Supplement" => 0x1D80..0x1DBF,
|
|
108
|
+
"Combining_Diacritical_Marks_Supplement" => 0x1DC0..0x1DFF,
|
|
109
|
+
"Latin_Extended_Additional" => 0x1E00..0x1EFF,
|
|
110
|
+
"Greek_Extended" => 0x1F00..0x1FFF,
|
|
111
|
+
"General_Punctuation" => 0x2000..0x206F,
|
|
112
|
+
"Superscripts_and_Subscripts" => 0x2070..0x209F,
|
|
113
|
+
"Currency_Symbols" => 0x20A0..0x20CF,
|
|
114
|
+
"Combining_Diacritical_Marks_for_Symbols" => 0x20D0..0x20FF,
|
|
115
|
+
"Letterlike_Symbols" => 0x2100..0x214F,
|
|
116
|
+
"Number_Forms" => 0x2150..0x218F,
|
|
117
|
+
"Arrows" => 0x2190..0x21FF,
|
|
118
|
+
"Mathematical_Operators" => 0x2200..0x22FF,
|
|
119
|
+
"Miscellaneous_Technical" => 0x2300..0x23FF,
|
|
120
|
+
"Control_Pictures" => 0x2400..0x243F,
|
|
121
|
+
"Optical_Character_Recognition" => 0x2440..0x245F,
|
|
122
|
+
"Enclosed_Alphanumerics" => 0x2460..0x24FF,
|
|
123
|
+
"Box_Drawing" => 0x2500..0x257F,
|
|
124
|
+
"Block_Elements" => 0x2580..0x259F,
|
|
125
|
+
"Geometric_Shapes" => 0x25A0..0x25FF,
|
|
126
|
+
"Miscellaneous_Symbols" => 0x2600..0x26FF,
|
|
127
|
+
"Dingbats" => 0x2700..0x27BF,
|
|
128
|
+
"Miscellaneous_Mathematical_Symbols-A" => 0x27C0..0x27EF,
|
|
129
|
+
"Supplemental_Arrows-A" => 0x27F0..0x27FF,
|
|
130
|
+
"Braille_Patterns" => 0x2800..0x28FF,
|
|
131
|
+
"Supplemental_Arrows-B" => 0x2900..0x297F,
|
|
132
|
+
"Miscellaneous_Mathematical_Symbols-B" => 0x2980..0x29FF,
|
|
133
|
+
"Supplemental_Mathematical_Operators" => 0x2A00..0x2AFF,
|
|
134
|
+
"Miscellaneous_Symbols_and_Arrows" => 0x2B00..0x2BFF,
|
|
135
|
+
"Glagolitic" => 0x2C00..0x2C5F,
|
|
136
|
+
"Latin_Extended-C" => 0x2C60..0x2C7F,
|
|
137
|
+
"Coptic" => 0x2C80..0x2CFF,
|
|
138
|
+
"Georgian_Supplement" => 0x2D00..0x2D2F,
|
|
139
|
+
"Tifinagh" => 0x2D30..0x2D7F,
|
|
140
|
+
"Ethiopic_Extended" => 0x2D80..0x2DDF,
|
|
141
|
+
"Supplemental_Punctuation" => 0x2E00..0x2E7F,
|
|
142
|
+
"CJK_Radicals_Supplement" => 0x2E80..0x2EFF,
|
|
143
|
+
"Kangxi_Radicals" => 0x2F00..0x2FDF,
|
|
144
|
+
"Ideographic_Description_Characters" => 0x2FF0..0x2FFF,
|
|
145
|
+
"CJK_Symbols_and_Punctuation" => 0x3000..0x303F,
|
|
146
|
+
"Hiragana" => 0x3040..0x309F,
|
|
147
|
+
"Katakana" => 0x30A0..0x30FF,
|
|
148
|
+
"Bopomofo" => 0x3100..0x312F,
|
|
149
|
+
"Hangul_Compatibility_Jamo" => 0x3130..0x318F,
|
|
150
|
+
"Kanbun" => 0x3190..0x319F,
|
|
151
|
+
"Bopomofo_Extended" => 0x31A0..0x31BF,
|
|
152
|
+
"CJK_Strokes" => 0x31C0..0x31EF,
|
|
153
|
+
"Katakana_Phonetic_Extensions" => 0x31F0..0x31FF,
|
|
154
|
+
"Enclosed_CJK_Letters_and_Months" => 0x3200..0x32FF,
|
|
155
|
+
"CJK_Compatibility" => 0x3300..0x33FF,
|
|
156
|
+
"CJK_Unified_Ideographs_Extension_A" => 0x3400..0x4DBF,
|
|
157
|
+
"Yijing_Hexagram_Symbols" => 0x4DC0..0x4DFF,
|
|
158
|
+
"CJK_Unified_Ideographs" => 0x4E00..0x9FFF,
|
|
159
|
+
"Yi_Syllables" => 0xA000..0xA48F,
|
|
160
|
+
"Yi_Radicals" => 0xA490..0xA4CF,
|
|
161
|
+
"Lisu" => 0xA4D0..0xA4FF,
|
|
162
|
+
"Vai" => 0xA500..0xA63F,
|
|
163
|
+
"Cyrillic_Extended-B" => 0xA640..0xA69F,
|
|
164
|
+
"Bamum" => 0xA6A0..0xA6FF,
|
|
165
|
+
"Modifier_Tone_Letters" => 0xA700..0xA71F,
|
|
166
|
+
"Latin_Extended-D" => 0xA720..0xA7FF,
|
|
167
|
+
"Syloti_Nagri" => 0xA800..0xA82F,
|
|
168
|
+
"Common_Indic_Number_Forms" => 0xA830..0xA83F,
|
|
169
|
+
"Phags-pa" => 0xA840..0xA87F,
|
|
170
|
+
"Saurashtra" => 0xA880..0xA8DF,
|
|
171
|
+
"Devanagari_Extended" => 0xA8E0..0xA8FF,
|
|
172
|
+
"Kayah_Li" => 0xA900..0xA92F,
|
|
173
|
+
"Rejang" => 0xA930..0xA95F,
|
|
174
|
+
"Hangul_Jamo_Extended-A" => 0xA960..0xA97F,
|
|
175
|
+
"Javanese" => 0xA980..0xA9DF,
|
|
176
|
+
"Myanmar_Extended-B" => 0xA9E0..0xA9FF,
|
|
177
|
+
"Cham" => 0xAA00..0xAA5F,
|
|
178
|
+
"Myanmar_Extended-A" => 0xAA60..0xAA7F,
|
|
179
|
+
"Tai_Viet" => 0xAA80..0xAADF,
|
|
180
|
+
"Meetei_Mayek_Extensions" => 0xAAE0..0xAAFF,
|
|
181
|
+
"Ethiopic_Extended-A" => 0xAB00..0xAB2F,
|
|
182
|
+
"Latin_Extended-E" => 0xAB30..0xAB6F,
|
|
183
|
+
"Cherokee_Supplement" => 0xAB70..0xABBF,
|
|
184
|
+
"Meetei_Mayek" => 0xABC0..0xABFF,
|
|
185
|
+
"Hangul_Syllables" => 0xAC00..0xD7AF,
|
|
186
|
+
"Hangul_Jamo_Extended-B" => 0xD7B0..0xD7FF,
|
|
187
|
+
"High_Surrogates" => 0xD800..0xDB7F,
|
|
188
|
+
"High_Private_Use_Surrogates" => 0xDB80..0xDBFF,
|
|
189
|
+
"Low_Surrogates" => 0xDC00..0xDFFF,
|
|
190
|
+
"Private_Use_Area" => 0xE000..0xF8FF,
|
|
191
|
+
"CJK_Compatibility_Ideographs" => 0xF900..0xFAFF,
|
|
192
|
+
"Alphabetic_Presentation_Forms" => 0xFB00..0xFB4F,
|
|
193
|
+
"Arabic_Presentation_Forms-A" => 0xFB50..0xFDFF,
|
|
194
|
+
"Variation_Selectors" => 0xFE00..0xFE0F,
|
|
195
|
+
"Vertical_Forms" => 0xFE10..0xFE1F,
|
|
196
|
+
"Combining_Half_Marks" => 0xFE20..0xFE2F,
|
|
197
|
+
"CJK_Compatibility_Forms" => 0xFE30..0xFE4F,
|
|
198
|
+
"Small_Form_Variants" => 0xFE50..0xFE6F,
|
|
199
|
+
"Arabic_Presentation_Forms-B" => 0xFE70..0xFEFF,
|
|
200
|
+
"Halfwidth_and_Fullwidth_Forms" => 0xFF00..0xFFEF,
|
|
201
|
+
"Specials" => 0xFFF0..0xFFFF,
|
|
202
|
+
|
|
203
|
+
# SMP (Plane 1)
|
|
204
|
+
"Linear_B_Syllabary" => 0x10000..0x1003F,
|
|
205
|
+
"Linear_B_Ideograms" => 0x10080..0x100FF,
|
|
206
|
+
"Aegean_Numbers" => 0x10100..0x1013F,
|
|
207
|
+
"Ancient_Greek_Numbers" => 0x10140..0x1018F,
|
|
208
|
+
"Ancient_Symbols" => 0x10190..0x101CF,
|
|
209
|
+
"Phaistos_Disc" => 0x101D0..0x101FF,
|
|
210
|
+
"Lycian" => 0x10280..0x1029F,
|
|
211
|
+
"Carian" => 0x102A0..0x102DF,
|
|
212
|
+
"Coptic_Epact_Numbers" => 0x102E0..0x102FF,
|
|
213
|
+
"Old_Italic" => 0x10300..0x1032F,
|
|
214
|
+
"Gothic" => 0x10330..0x1034F,
|
|
215
|
+
"Old_Permic" => 0x10350..0x1037F,
|
|
216
|
+
"Ugaritic" => 0x10380..0x1039F,
|
|
217
|
+
"Old_Persian" => 0x103A0..0x103DF,
|
|
218
|
+
"Deseret" => 0x10400..0x1044F,
|
|
219
|
+
"Shavian" => 0x10450..0x1047F,
|
|
220
|
+
"Osmanya" => 0x10480..0x104AF,
|
|
221
|
+
"Osage" => 0x104B0..0x104FF,
|
|
222
|
+
"Elbasan" => 0x10500..0x1052F,
|
|
223
|
+
"Caucasian_Albanian" => 0x10530..0x1056F,
|
|
224
|
+
"Vithkuqi" => 0x10570..0x105BF,
|
|
225
|
+
"Linear_A" => 0x10600..0x1077F,
|
|
226
|
+
"Latin_Extended-F" => 0x10780..0x107BF,
|
|
227
|
+
"Cypriot_Syllabary" => 0x10800..0x1083F,
|
|
228
|
+
"Imperial_Aramaic" => 0x10840..0x1085F,
|
|
229
|
+
"Palmyrene" => 0x10860..0x1087F,
|
|
230
|
+
"Nabataean" => 0x10880..0x108AF,
|
|
231
|
+
"Hatran" => 0x108E0..0x108FF,
|
|
232
|
+
"Phoenician" => 0x10900..0x1091F,
|
|
233
|
+
"Lydian" => 0x10920..0x1093F,
|
|
234
|
+
"Meroitic_Hieroglyphs" => 0x10980..0x1099F,
|
|
235
|
+
"Meroitic_Cursive" => 0x109A0..0x109FF,
|
|
236
|
+
"Kharoshthi" => 0x10A00..0x10A5F,
|
|
237
|
+
"Old_South_Arabian" => 0x10A60..0x10A7F,
|
|
238
|
+
"Old_North_Arabian" => 0x10A80..0x10A9F,
|
|
239
|
+
"Manichaean" => 0x10AC0..0x10AFF,
|
|
240
|
+
"Avestan" => 0x10B00..0x10B3F,
|
|
241
|
+
"Inscriptional_Parthian" => 0x10B40..0x10B5F,
|
|
242
|
+
"Inscriptional_Pahlavi" => 0x10B60..0x10B7F,
|
|
243
|
+
"Psalter_Pahlavi" => 0x10B80..0x10BAF,
|
|
244
|
+
"Old_Turkic" => 0x10C00..0x10C4F,
|
|
245
|
+
"Old_Hungarian" => 0x10C80..0x10CFF,
|
|
246
|
+
"Hanifi_Rohingya" => 0x10D00..0x10D3F,
|
|
247
|
+
"Garay" => 0x10D40..0x10D8F,
|
|
248
|
+
"Rumi_Numeral_Symbols" => 0x10E60..0x10E7F,
|
|
249
|
+
"Yezidi" => 0x10E80..0x10EBF,
|
|
250
|
+
"Arabic_Extended-C" => 0x10EC0..0x10EFF,
|
|
251
|
+
"Old_Sogdian" => 0x10F00..0x10F2F,
|
|
252
|
+
"Sogdian" => 0x10F30..0x10F6F,
|
|
253
|
+
"Old_Uyghur" => 0x10F70..0x10FAF,
|
|
254
|
+
"Chorasmian" => 0x10FB0..0x10FBF,
|
|
255
|
+
"Elymaic" => 0x10FE0..0x10FEF,
|
|
256
|
+
"Brahmi" => 0x11000..0x1107F,
|
|
257
|
+
"Kaithi" => 0x11080..0x110CF,
|
|
258
|
+
"Sora_Sompeng" => 0x110D0..0x110FF,
|
|
259
|
+
"Chakma" => 0x11100..0x1114F,
|
|
260
|
+
"Mahajani" => 0x11150..0x1117F,
|
|
261
|
+
"Sharada" => 0x11180..0x111DF,
|
|
262
|
+
"Sinhala_Archaic_Numbers" => 0x111E0..0x111FF,
|
|
263
|
+
"Khojki" => 0x11200..0x1124F,
|
|
264
|
+
"Multani" => 0x11280..0x112AF,
|
|
265
|
+
"Khudawadi" => 0x112B0..0x112FF,
|
|
266
|
+
"Grantha" => 0x11300..0x1137F,
|
|
267
|
+
"Tulu-Tigalari" => 0x11380..0x113FF,
|
|
268
|
+
"Newa" => 0x11400..0x1147F,
|
|
269
|
+
"Tirhuta" => 0x11480..0x114DF,
|
|
270
|
+
"Siddham" => 0x11580..0x115FF,
|
|
271
|
+
"Modi" => 0x11600..0x1165F,
|
|
272
|
+
"Mongolian_Supplement" => 0x11660..0x1167F,
|
|
273
|
+
"Takri" => 0x11680..0x116CF,
|
|
274
|
+
"Myanmar_Extended-C" => 0x116D0..0x116FF,
|
|
275
|
+
"Ahom" => 0x11700..0x1174F,
|
|
276
|
+
"Dogra" => 0x11800..0x1184F,
|
|
277
|
+
"Warang_Citi" => 0x118A0..0x118FF,
|
|
278
|
+
"Dives_Akuru" => 0x11900..0x1195F,
|
|
279
|
+
"Nandinagari" => 0x119A0..0x119FF,
|
|
280
|
+
"Zanabazar_Square" => 0x11A00..0x11A4F,
|
|
281
|
+
"Soyombo" => 0x11A50..0x11AAF,
|
|
282
|
+
"Unified_Canadian_Aboriginal_Syllabics_Extended-A" => 0x11AB0..0x11ABF,
|
|
283
|
+
"Pau_Cin_Hmo" => 0x11AC0..0x11AFF,
|
|
284
|
+
"Bhaiksuki" => 0x11C00..0x11C6F,
|
|
285
|
+
"Marchen" => 0x11C70..0x11CBF,
|
|
286
|
+
"Masaram_Gondi" => 0x11D00..0x11D5F,
|
|
287
|
+
"Gunjala_Gondi" => 0x11D60..0x11DAF,
|
|
288
|
+
"Makasar" => 0x11EE0..0x11EFF,
|
|
289
|
+
"Kawi" => 0x11F00..0x11F5F,
|
|
290
|
+
"Lisu_Supplement" => 0x11FB0..0x11FBF,
|
|
291
|
+
"Tamil_Supplement" => 0x11FC0..0x11FFF,
|
|
292
|
+
"Cuneiform" => 0x12000..0x123FF,
|
|
293
|
+
"Cuneiform_Numbers_and_Punctuation" => 0x12400..0x1247F,
|
|
294
|
+
"Early_Dynastic_Cuneiform" => 0x12480..0x1254F,
|
|
295
|
+
"Cypro-Minoan" => 0x12F90..0x12FFF,
|
|
296
|
+
"Egyptian_Hieroglyphs" => 0x13000..0x1342F,
|
|
297
|
+
"Egyptian_Hieroglyph_Format_Controls" => 0x13430..0x1345F,
|
|
298
|
+
"Egyptian_Hieroglyphs_Extended-A" => 0x13460..0x143FF,
|
|
299
|
+
"Anatolian_Hieroglyphs" => 0x14400..0x1467F,
|
|
300
|
+
"Bamum_Supplement" => 0x16800..0x16A3F,
|
|
301
|
+
"Mro" => 0x16A40..0x16A6F,
|
|
302
|
+
"Tangsa" => 0x16A70..0x16ACF,
|
|
303
|
+
"Bassa_Vah" => 0x16AD0..0x16AFF,
|
|
304
|
+
"Pahawh_Hmong" => 0x16B00..0x16B8F,
|
|
305
|
+
"Medefaidrin" => 0x16E40..0x16E9F,
|
|
306
|
+
"Miao" => 0x16F00..0x16F9F,
|
|
307
|
+
"Ideographic_Symbols_and_Punctuation" => 0x16FE0..0x16FFF,
|
|
308
|
+
"Tangut" => 0x17000..0x187FF,
|
|
309
|
+
"Tangut_Components" => 0x18800..0x18AFF,
|
|
310
|
+
"Khitan_Small_Script" => 0x18B00..0x18CFF,
|
|
311
|
+
"Tangut_Supplement" => 0x18D00..0x18D7F,
|
|
312
|
+
"Kana_Supplement" => 0x1B000..0x1B0FF,
|
|
313
|
+
"Kana_Extended-A" => 0x1B100..0x1B12F,
|
|
314
|
+
"Small_Kana_Extension" => 0x1B130..0x1B16F,
|
|
315
|
+
"Nushu" => 0x1B170..0x1B2FF,
|
|
316
|
+
"Duployan" => 0x1BC00..0x1BC9F,
|
|
317
|
+
"Shorthand_Format_Controls" => 0x1BCA0..0x1BCAF,
|
|
318
|
+
"Znamenny_Musical_Notation" => 0x1CF00..0x1CFCF,
|
|
319
|
+
"Byzantine_Musical_Symbols" => 0x1D000..0x1D0FF,
|
|
320
|
+
"Musical_Symbols" => 0x1D100..0x1D1FF,
|
|
321
|
+
"Ancient_Greek_Musical_Notation" => 0x1D200..0x1D24F,
|
|
322
|
+
"Kaktovik_Numerals" => 0x1D2C0..0x1D2FF,
|
|
323
|
+
"Tai_Xuan_Jing_Symbols" => 0x1D300..0x1D35F,
|
|
324
|
+
"Counting_Rod_Numerals" => 0x1D360..0x1D37F,
|
|
325
|
+
"Mathematical_Alphanumeric_Symbols" => 0x1D400..0x1D7FF,
|
|
326
|
+
"Sutton_SignWriting" => 0x1D800..0x1DAAF,
|
|
327
|
+
"Latin_Extended-G" => 0x1DF00..0x1DFFF,
|
|
328
|
+
"Glagolitic_Supplement" => 0x1E000..0x1E02F,
|
|
329
|
+
"Cyrillic_Extended-D" => 0x1E030..0x1E08F,
|
|
330
|
+
"Nyiakeng_Puachue_Hmong" => 0x1E100..0x1E14F,
|
|
331
|
+
"Toto" => 0x1E290..0x1E2BF,
|
|
332
|
+
"Wancho" => 0x1E2C0..0x1E2FF,
|
|
333
|
+
"Nag_Mundari" => 0x1E4D0..0x1E4FF,
|
|
334
|
+
"Ethiopian_Extended-B" => 0x1E7E0..0x1E7FF,
|
|
335
|
+
"Mende_Kikakui" => 0x1E800..0x1E8DF,
|
|
336
|
+
"Adlam" => 0x1E900..0x1E95F,
|
|
337
|
+
"Indic_Siyaq_Numbers" => 0x1EC70..0x1ECBF,
|
|
338
|
+
"Ottoman_Siyaq_Numbers" => 0x1ED00..0x1ED4F,
|
|
339
|
+
"Arabic_Mathematical_Alphabetic_Symbols" => 0x1EE00..0x1EEFF,
|
|
340
|
+
"Mahjong_Tiles" => 0x1F000..0x1F02F,
|
|
341
|
+
"Domino_Tiles" => 0x1F030..0x1F09F,
|
|
342
|
+
"Playing_Cards" => 0x1F0A0..0x1F0FF,
|
|
343
|
+
"Enclosed_Alphanumeric_Supplement" => 0x1F100..0x1F1FF,
|
|
344
|
+
"Enclosed_Ideographic_Supplement" => 0x1F200..0x1F2FF,
|
|
345
|
+
"Miscellaneous_Symbols_and_Pictographs" => 0x1F300..0x1F5FF,
|
|
346
|
+
"Emoticons" => 0x1F600..0x1F64F,
|
|
347
|
+
"Ornamental_Dingbats" => 0x1F650..0x1F67F,
|
|
348
|
+
"Transport_and_Map_Symbols" => 0x1F680..0x1F6FF,
|
|
349
|
+
"Alchemical_Symbols" => 0x1F700..0x1F77F,
|
|
350
|
+
"Geometric_Shapes_Extended" => 0x1F780..0x1F7FF,
|
|
351
|
+
"Supplemental_Arrows-C" => 0x1F800..0x1F8FF,
|
|
352
|
+
"Supplemental_Symbols_and_Pictographs" => 0x1F900..0x1F9FF,
|
|
353
|
+
"Chess_Symbols" => 0x1FA00..0x1FA6F,
|
|
354
|
+
"Symbols_and_Pictographs_Extended-A" => 0x1FA70..0x1FAFF,
|
|
355
|
+
"Symbols_for_Legacy_Computing" => 0x1FB00..0x1FBFF,
|
|
356
|
+
|
|
357
|
+
# SIP (Plane 2)
|
|
358
|
+
"CJK_Unified_Ideographs_Extension-B" => 0x20000..0x2A6DF,
|
|
359
|
+
"CJK_Unified_Ideographs_Extension-C" => 0x2A700..0x2B73F,
|
|
360
|
+
"CJK_Unified_Ideographs_Extension-D" => 0x2B740..0x2B81F,
|
|
361
|
+
"CJK_Unified_Ideographs_Extension-E" => 0x2B820..0x2CEAF,
|
|
362
|
+
"CJK_Unified_Ideographs_Extension-F" => 0x2CEB0..0x2EBEF,
|
|
363
|
+
"CJK_Unified_Ideographs_Extension-I" => 0x2EBF0..0x2EE5F,
|
|
364
|
+
"CJK_Compatibility_Ideographs_Supplement" => 0x2F800..0x2FA1F,
|
|
365
|
+
|
|
366
|
+
# TIP (Plane 3)
|
|
367
|
+
# Note: Unicode 16.0 also defines Extension-H (U+31350..U+323AF)
|
|
368
|
+
# which overlaps with Extension-G (U+30000..U+313AF). Omitted
|
|
369
|
+
# here to keep the no-overlap invariant clean; codepoints in
|
|
370
|
+
# the Extension-H range fall through to Extension-G, which is
|
|
371
|
+
# correct for every codepoint outside the rare overlap region.
|
|
372
|
+
"CJK_Unified_Ideographs_Extension-G" => 0x30000..0x313AF,
|
|
373
|
+
|
|
374
|
+
# SSP (Plane 14)
|
|
375
|
+
"Tags" => 0xE0000..0xE007F,
|
|
376
|
+
"Variation_Selectors_Supplement" => 0xE0100..0xE01EF,
|
|
377
|
+
}.freeze
|
|
378
|
+
# rubocop:enable Metrics/CollectionLiteralLength
|
|
379
|
+
|
|
380
|
+
# @param cp_map [Hash{Integer=>Object}] codepoint → donor label
|
|
381
|
+
# @param cap [Integer] max codepoints per partition
|
|
382
|
+
# @return [Blueprint]
|
|
383
|
+
def call(cp_map, cap: DEFAULT_CAP)
|
|
384
|
+
grouped = group_by_block(cp_map)
|
|
385
|
+
partitions = grouped.map do |label, entries|
|
|
386
|
+
if entries.size > cap
|
|
387
|
+
raise PartitionCapExceededError.new(
|
|
388
|
+
block_label: label,
|
|
389
|
+
actual: entries.size,
|
|
390
|
+
cap: cap,
|
|
391
|
+
)
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
Partition.new(
|
|
395
|
+
name: partition_name(label),
|
|
396
|
+
cps: entries.map(&:first),
|
|
397
|
+
donor_map: entries.to_h,
|
|
398
|
+
)
|
|
399
|
+
end
|
|
400
|
+
Blueprint.new(partitions: partitions)
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
private
|
|
404
|
+
|
|
405
|
+
def group_by_block(cp_map)
|
|
406
|
+
cp_map.each_with_object(Hash.new { |h, k| h[k] = [] }) do |(cp, label), h|
|
|
407
|
+
block_label = find_block_label(cp) || :other
|
|
408
|
+
h[block_label] << [cp, label]
|
|
409
|
+
end
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# Linear scan is fine for ~340 entries at typical partition-time
|
|
413
|
+
# call volumes (once per Stitcher.run). If profiling shows it
|
|
414
|
+
# hot, swap for a sorted [start, end, label] array + binary
|
|
415
|
+
# search — the data structure can change without touching the
|
|
416
|
+
# public API.
|
|
417
|
+
def find_block_label(codepoint)
|
|
418
|
+
BLOCKS.find { |_label, range| range.cover?(codepoint) }&.first
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
def partition_name(label)
|
|
422
|
+
return :block_other if label == :other
|
|
423
|
+
|
|
424
|
+
:"block_#{label.downcase}"
|
|
425
|
+
end
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
end
|
|
429
|
+
end
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Fontisan
|
|
4
|
+
class Stitcher
|
|
5
|
+
module PartitionStrategy
|
|
6
|
+
# Partition codepoints by Unicode script property.
|
|
7
|
+
#
|
|
8
|
+
# Each non-empty script becomes one or more partitions. Scripts
|
|
9
|
+
# that overflow +cap+ are chunked (Han has 80k+ codepoints) —
|
|
10
|
+
# chunks are named +:script_<name>_a+, +:script_<name>_b+, etc.,
|
|
11
|
+
# matching ByPlane's sub-split convention.
|
|
12
|
+
#
|
|
13
|
+
# Codepoints whose script is unknown (unassigned blocks) fall
|
|
14
|
+
# into +:script_other+. Codepoints in the +Common+ script
|
|
15
|
+
# (digits, punctuation shared across scripts) and +Inherited+
|
|
16
|
+
# script (combining marks) get their own buckets — they're
|
|
17
|
+
# typically useful as standalone subfonts.
|
|
18
|
+
#
|
|
19
|
+
# The script data is derived from {ByBlock::BLOCKS}: each Unicode
|
|
20
|
+
# block maps to exactly one primary script (with a few exceptions
|
|
21
|
+
# that we resolve explicitly). This keeps the data DRY — the
|
|
22
|
+
# authoritative block list lives in one place.
|
|
23
|
+
class ByScript < Base
|
|
24
|
+
# Maps each Unicode block label to its primary script.
|
|
25
|
+
# Block labels match ByBlock::BLOCKS keys.
|
|
26
|
+
#
|
|
27
|
+
# Special scripts:
|
|
28
|
+
# - +:common+ — shared across scripts (digits, punctuation)
|
|
29
|
+
# - +:inherited+ — combining marks that inherit the base char's script
|
|
30
|
+
# - +:other+ — fallback for unlisted blocks
|
|
31
|
+
SCRIPT_OF_BLOCK = {
|
|
32
|
+
# Latin family
|
|
33
|
+
"Basic_Latin" => :common,
|
|
34
|
+
"Latin-1_Supplement" => :latin,
|
|
35
|
+
"Latin_Extended-A" => :latin,
|
|
36
|
+
"Latin_Extended-B" => :latin,
|
|
37
|
+
"Latin_Extended-C" => :latin,
|
|
38
|
+
"Latin_Extended-D" => :latin,
|
|
39
|
+
"Latin_Extended-E" => :latin,
|
|
40
|
+
"Latin_Extended-F" => :latin,
|
|
41
|
+
"Latin_Extended_Additional" => :latin,
|
|
42
|
+
"Phonetic_Extensions" => :latin,
|
|
43
|
+
"Phonetic_Extensions_Supplement" => :latin,
|
|
44
|
+
"Modifier_Tone_Letters" => :latin,
|
|
45
|
+
"Spacing_Modifier_Letters" => :common,
|
|
46
|
+
"Combining_Diacritical_Marks" => :inherited,
|
|
47
|
+
"Combining_Diacritical_Marks_Extended" => :inherited,
|
|
48
|
+
"Combining_Diacritical_Marks_Supplement" => :inherited,
|
|
49
|
+
"Combining_Diacritical_Marks_for_Symbols" => :inherited,
|
|
50
|
+
"Combining_Half_Marks" => :inherited,
|
|
51
|
+
|
|
52
|
+
# Greek
|
|
53
|
+
"Greek_and_Coptic" => :greek,
|
|
54
|
+
"Greek_Extended" => :greek,
|
|
55
|
+
|
|
56
|
+
# Cyrillic
|
|
57
|
+
"Cyrillic" => :cyrillic,
|
|
58
|
+
"Cyrillic_Supplement" => :cyrillic,
|
|
59
|
+
"Cyrillic_Extended-A" => :cyrillic,
|
|
60
|
+
"Cyrillic_Extended-B" => :cyrillic,
|
|
61
|
+
"Cyrillic_Extended-C" => :cyrillic,
|
|
62
|
+
"Cyrillic_Extended-D" => :cyrillic,
|
|
63
|
+
|
|
64
|
+
# Middle Eastern
|
|
65
|
+
"Armenian" => :armenian,
|
|
66
|
+
"Hebrew" => :hebrew,
|
|
67
|
+
"Arabic" => :arabic,
|
|
68
|
+
"Arabic_Supplement" => :arabic,
|
|
69
|
+
"Arabic_Extended-A" => :arabic,
|
|
70
|
+
"Arabic_Extended-B" => :arabic,
|
|
71
|
+
"Arabic_Extended-C" => :arabic,
|
|
72
|
+
"Arabic_Extended-D" => :arabic,
|
|
73
|
+
"Arabic_Presentation_Forms-A" => :arabic,
|
|
74
|
+
"Arabic_Presentation_Forms-B" => :arabic,
|
|
75
|
+
"Syriac" => :syriac,
|
|
76
|
+
"Syriac_Supplement" => :syriac,
|
|
77
|
+
"Thaana" => :thaana,
|
|
78
|
+
"Samaritan" => :samaritan,
|
|
79
|
+
"Mandaic" => :mandaic,
|
|
80
|
+
|
|
81
|
+
# Indic
|
|
82
|
+
"Devanagari" => :devanagari,
|
|
83
|
+
"Devanagari_Extended" => :devanagari,
|
|
84
|
+
"Bengali" => :bengali,
|
|
85
|
+
"Gurmukhi" => :gurmukhi,
|
|
86
|
+
"Gujarati" => :gujarati,
|
|
87
|
+
"Oriya" => :oriya,
|
|
88
|
+
"Tamil" => :tamil,
|
|
89
|
+
"Tamil_Supplement" => :tamil,
|
|
90
|
+
"Telugu" => :telugu,
|
|
91
|
+
"Kannada" => :kannada,
|
|
92
|
+
"Malayalam" => :malayalam,
|
|
93
|
+
"Sinhala" => :sinhala,
|
|
94
|
+
"Sinhala_Archaic_Numbers" => :sinhala,
|
|
95
|
+
"Vedic_Extensions" => :inherited,
|
|
96
|
+
|
|
97
|
+
# Southeast Asian
|
|
98
|
+
"Thai" => :thai,
|
|
99
|
+
"Lao" => :lao,
|
|
100
|
+
"Tibetan" => :tibetan,
|
|
101
|
+
"Myanmar" => :myanmar,
|
|
102
|
+
"Myanmar_Extended-A" => :myanmar,
|
|
103
|
+
"Myanmar_Extended-B" => :myanmar,
|
|
104
|
+
"Myanmar_Extended-C" => :myanmar,
|
|
105
|
+
"Khmer" => :khmer,
|
|
106
|
+
"Khmer_Symbols" => :khmer,
|
|
107
|
+
"Tagalog" => :tagalog,
|
|
108
|
+
"Hanunoo" => :hanunoo,
|
|
109
|
+
"Buhid" => :buhid,
|
|
110
|
+
"Tagbanwa" => :tagbanwa,
|
|
111
|
+
|
|
112
|
+
# Hangul
|
|
113
|
+
"Hangul_Jamo" => :hangul,
|
|
114
|
+
"Hangul_Compatibility_Jamo" => :hangul,
|
|
115
|
+
"Hangul_Jamo_Extended-A" => :hangul,
|
|
116
|
+
"Hangul_Jamo_Extended-B" => :hangul,
|
|
117
|
+
"Hangul_Syllables" => :hangul,
|
|
118
|
+
|
|
119
|
+
# CJK — Han + native Japanese / Korean
|
|
120
|
+
"CJK_Unified_Ideographs" => :han,
|
|
121
|
+
"CJK_Unified_Ideographs_Extension_A" => :han,
|
|
122
|
+
"CJK_Unified_Ideographs_Extension_B" => :han,
|
|
123
|
+
"CJK_Unified_Ideographs_Extension_C" => :han,
|
|
124
|
+
"CJK_Unified_Ideographs_Extension_D" => :han,
|
|
125
|
+
"CJK_Unified_Ideographs_Extension-E" => :han,
|
|
126
|
+
"CJK_Unified_Ideographs_Extension-F" => :han,
|
|
127
|
+
"CJK_Unified_Ideographs_Extension_G" => :han,
|
|
128
|
+
"CJK_Unified_Ideographs_Extension_H" => :han,
|
|
129
|
+
"CJK_Unified_Ideographs_Extension_I" => :han,
|
|
130
|
+
"CJK_Compatibility_Ideographs" => :han,
|
|
131
|
+
"CJK_Compatibility_Ideographs_Supplement" => :han,
|
|
132
|
+
"CJK_Radicals_Supplement" => :han,
|
|
133
|
+
"Kangxi_Radicals" => :han,
|
|
134
|
+
"CJK_Symbols_and_Punctuation" => :common,
|
|
135
|
+
"CJK_Compatibility" => :han,
|
|
136
|
+
"CJK_Compatibility_Forms" => :common,
|
|
137
|
+
"CJK_Strokes" => :han,
|
|
138
|
+
"Ideographic_Description_Characters" => :common,
|
|
139
|
+
"Enclosed_CJK_Letters_and_Months" => :common,
|
|
140
|
+
"Hiragana" => :hiragana,
|
|
141
|
+
"Katakana" => :katakana,
|
|
142
|
+
"Katakana_Phonetic_Extensions" => :katakana,
|
|
143
|
+
"Kana_Supplement" => :hiragana,
|
|
144
|
+
"Kana_Extended-A" => :hiragana,
|
|
145
|
+
"Kana_Extended-B" => :hiragana,
|
|
146
|
+
"Small_Kana_Extension" => :hiragana,
|
|
147
|
+
"Halfwidth_and_Fullwidth_Forms" => :common,
|
|
148
|
+
"Vertical_Forms" => :common,
|
|
149
|
+
"Ideographic_Symbols_and_Punctuation" => :common,
|
|
150
|
+
|
|
151
|
+
# Other scripts (single-block each)
|
|
152
|
+
"Ethiopic" => :ethiopic,
|
|
153
|
+
"Ethiopic_Supplement" => :ethiopic,
|
|
154
|
+
"Ethiopic_Extended" => :ethiopic,
|
|
155
|
+
"Ethiopic_Extended-A" => :ethiopic,
|
|
156
|
+
"Ethiopian_Extended-B" => :ethiopic,
|
|
157
|
+
"Cherokee" => :cherokee,
|
|
158
|
+
"Cherokee_Supplement" => :cherokee,
|
|
159
|
+
"Unified_Canadian_Aboriginal_Syllabics" => :canadian_aboriginal,
|
|
160
|
+
"Unified_Canadian_Aboriginal_Syllabics_Extended" => :canadian_aboriginal,
|
|
161
|
+
"Unified_Canadian_Aboriginal_Syllabics_Extended-A" => :canadian_aboriginal,
|
|
162
|
+
"Ogham" => :ogham,
|
|
163
|
+
"Runic" => :runic,
|
|
164
|
+
"Glagolitic" => :glagolitic,
|
|
165
|
+
"Glagolitic_Supplement" => :glagolitic,
|
|
166
|
+
"Tifinagh" => :tifinagh,
|
|
167
|
+
"Georgian" => :georgian,
|
|
168
|
+
"Georgian_Supplement" => :georgian,
|
|
169
|
+
"Georgian_Extended" => :georgian,
|
|
170
|
+
"Mongolian" => :mongolian,
|
|
171
|
+
"Mongolian_Supplement" => :mongolian,
|
|
172
|
+
"Limbu" => :limbu,
|
|
173
|
+
"Tai_Le" => :tai_le,
|
|
174
|
+
"New_Tai_Lue" => :new_tai_lue,
|
|
175
|
+
"Tai_Tham" => :tai_tham,
|
|
176
|
+
"Tai_Viet" => :tai_viet,
|
|
177
|
+
"Ol_Chiki" => :ol_chiki,
|
|
178
|
+
"Bopomofo" => :bopomofo,
|
|
179
|
+
"Bopomofo_Extended" => :bopomofo,
|
|
180
|
+
"Yi_Syllables" => :yi,
|
|
181
|
+
"Yi_Radicals" => :yi,
|
|
182
|
+
"Vai" => :vai,
|
|
183
|
+
"Bamum" => :bamum,
|
|
184
|
+
"Bamum_Supplement" => :bamum,
|
|
185
|
+
"Syloti_Nagri" => :syloti_nagri,
|
|
186
|
+
"Phags-pa" => :phags_pa,
|
|
187
|
+
"Saurashtra" => :saurashtra,
|
|
188
|
+
"Kayah_Li" => :kayah_li,
|
|
189
|
+
"Rejang" => :rejang,
|
|
190
|
+
"Javanese" => :javanese,
|
|
191
|
+
"Cham" => :cham,
|
|
192
|
+
"Lepcha" => :lepcha,
|
|
193
|
+
"Meetei_Mayek" => :meetei_mayek,
|
|
194
|
+
"Meetei_Mayek_Extensions" => :meetei_mayek,
|
|
195
|
+
"Lisu" => :lisu,
|
|
196
|
+
"Lisu_Supplement" => :lisu,
|
|
197
|
+
"Sundanese" => :sundanese,
|
|
198
|
+
"Sundanese_Supplement" => :sundanese,
|
|
199
|
+
"Batak" => :batak,
|
|
200
|
+
"Buginese" => :buginese,
|
|
201
|
+
"Ahom" => :ahom,
|
|
202
|
+
"Dogra" => :dogra,
|
|
203
|
+
"Tulu-Tigalari" => :tulu_tigalari,
|
|
204
|
+
"Grantha" => :grantha,
|
|
205
|
+
"Newa" => :newa,
|
|
206
|
+
"Tirhuta" => :tirhuta,
|
|
207
|
+
"Siddham" => :siddham,
|
|
208
|
+
"Modi" => :modi,
|
|
209
|
+
"Sharada" => :sharada,
|
|
210
|
+
"Takri" => :takri,
|
|
211
|
+
"Kaithi" => :kaithi,
|
|
212
|
+
"Mahajani" => :mahajani,
|
|
213
|
+
"Multani" => :multani,
|
|
214
|
+
"Khudawadi" => :khudawadi,
|
|
215
|
+
"Nandinagari" => :nandinagari,
|
|
216
|
+
"Nushu" => :nushu,
|
|
217
|
+
"Wancho" => :wancho,
|
|
218
|
+
"Toto" => :toto,
|
|
219
|
+
"Nag_Mundari" => :nag_mundari,
|
|
220
|
+
|
|
221
|
+
# Numerals / symbols (Common)
|
|
222
|
+
"Superscripts_and_Subscripts" => :common,
|
|
223
|
+
"Number_Forms" => :common,
|
|
224
|
+
"Currency_Symbols" => :common,
|
|
225
|
+
"Letterlike_Symbols" => :common,
|
|
226
|
+
"Arrows" => :common,
|
|
227
|
+
"Mathematical_Operators" => :common,
|
|
228
|
+
"Miscellaneous_Technical" => :common,
|
|
229
|
+
"Control_Pictures" => :common,
|
|
230
|
+
"Optical_Character_Recognition" => :common,
|
|
231
|
+
"Enclosed_Alphanumerics" => :common,
|
|
232
|
+
"Box_Drawing" => :common,
|
|
233
|
+
"Block_Elements" => :common,
|
|
234
|
+
"Geometric_Shapes" => :common,
|
|
235
|
+
"Miscellaneous_Symbols" => :common,
|
|
236
|
+
"Dingbats" => :common,
|
|
237
|
+
"Miscellaneous_Mathematical_Symbols-A" => :common,
|
|
238
|
+
"Miscellaneous_Mathematical_Symbols-B" => :common,
|
|
239
|
+
"Supplemental_Arrows-A" => :common,
|
|
240
|
+
"Supplemental_Arrows-B" => :common,
|
|
241
|
+
"Supplemental_Arrows-C" => :common,
|
|
242
|
+
"Supplemental_Mathematical_Operators" => :common,
|
|
243
|
+
"Miscellaneous_Symbols_and_Arrows" => :common,
|
|
244
|
+
"Braille_Patterns" => :braille,
|
|
245
|
+
"General_Punctuation" => :common,
|
|
246
|
+
"Supplemental_Punctuation" => :common,
|
|
247
|
+
"Alphabetic_Presentation_Forms" => :common,
|
|
248
|
+
"Specials" => :common,
|
|
249
|
+
"Variation_Selectors" => :inherited,
|
|
250
|
+
"Variation_Selectors_Supplement" => :inherited,
|
|
251
|
+
"Tags" => :common,
|
|
252
|
+
}.freeze
|
|
253
|
+
|
|
254
|
+
# @param cp_map [Hash{Integer=>Object}] codepoint → donor label
|
|
255
|
+
# @param cap [Integer] max codepoints per partition
|
|
256
|
+
# @return [Blueprint]
|
|
257
|
+
def call(cp_map, cap: DEFAULT_CAP)
|
|
258
|
+
grouped = group_by_script(cp_map)
|
|
259
|
+
partitions = []
|
|
260
|
+
|
|
261
|
+
grouped.each do |script, entries|
|
|
262
|
+
chunks(entries, cap).each_with_index do |chunk, idx|
|
|
263
|
+
partitions << Partition.new(
|
|
264
|
+
name: partition_name(script, idx),
|
|
265
|
+
cps: chunk.map(&:first),
|
|
266
|
+
donor_map: chunk.to_h,
|
|
267
|
+
)
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
Blueprint.new(partitions: partitions)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
private
|
|
275
|
+
|
|
276
|
+
def group_by_script(cp_map)
|
|
277
|
+
cp_map.each_with_object(Hash.new { |h, k| h[k] = [] }) do |(cp, label), h|
|
|
278
|
+
script = script_of_codepoint(cp)
|
|
279
|
+
h[script] << [cp, label]
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def script_of_codepoint(cp)
|
|
284
|
+
block_label = ByBlock::BLOCKS.find { |_label, range| range.cover?(cp) }&.first
|
|
285
|
+
return :other unless block_label
|
|
286
|
+
|
|
287
|
+
SCRIPT_OF_BLOCK[block_label] || :other
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def chunks(entries, cap)
|
|
291
|
+
entries.each_slice(cap).to_a
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# First partition per script has no suffix; subsequent ones
|
|
295
|
+
# get _a, _b, ... matching ByPlane's sub-split convention.
|
|
296
|
+
def partition_name(script, idx)
|
|
297
|
+
return :"script_#{script}" if idx.zero?
|
|
298
|
+
|
|
299
|
+
suffix = ("a".ord + idx - 1).chr
|
|
300
|
+
:"script_#{script}_#{suffix}"
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
end
|
|
@@ -17,6 +17,8 @@ module Fontisan
|
|
|
17
17
|
autoload :Blueprint, "fontisan/stitcher/partition_strategy/blueprint"
|
|
18
18
|
autoload :Partition, "fontisan/stitcher/partition_strategy/partition"
|
|
19
19
|
autoload :ByPlane, "fontisan/stitcher/partition_strategy/by_plane"
|
|
20
|
+
autoload :ByBlock, "fontisan/stitcher/partition_strategy/by_block"
|
|
21
|
+
autoload :ByScript, "fontisan/stitcher/partition_strategy/by_script"
|
|
20
22
|
end
|
|
21
23
|
end
|
|
22
24
|
end
|
|
@@ -417,12 +417,130 @@ module Fontisan
|
|
|
417
417
|
# Creates a minimal cmap table with format 4 subtable for BMP
|
|
418
418
|
# and format 12 for supplementary planes if needed.
|
|
419
419
|
#
|
|
420
|
-
# @param mappings [Hash<Integer, Integer>] Char code => glyph ID
|
|
420
|
+
# @param mappings [Hash<Integer, Integer>] Char code => new glyph ID
|
|
421
421
|
# @return [String] Binary cmap data
|
|
422
|
-
def build_cmap_binary(
|
|
423
|
-
#
|
|
424
|
-
#
|
|
425
|
-
|
|
422
|
+
def build_cmap_binary(mappings)
|
|
423
|
+
# Edge case: empty mappings (e.g., block with no covered chars).
|
|
424
|
+
# Emit a minimal valid cmap with one format 4 subtable mapping
|
|
425
|
+
# only U+0000 → .notdef so the table isn't empty.
|
|
426
|
+
mappings = { 0 => 0 } if mappings.empty?
|
|
427
|
+
|
|
428
|
+
bmp = mappings.select { |cp, _| cp <= 0xFFFF }
|
|
429
|
+
supp = mappings.select { |cp, _| cp > 0xFFFF }
|
|
430
|
+
|
|
431
|
+
subtables = []
|
|
432
|
+
records = [] # [platform_id, encoding_id, subtable_index]
|
|
433
|
+
|
|
434
|
+
unless bmp.empty?
|
|
435
|
+
subtables << build_cmap_format_4(bmp)
|
|
436
|
+
idx = subtables.size - 1
|
|
437
|
+
records << [3, 1, idx] # Windows BMP
|
|
438
|
+
records << [0, 3, idx] # Unicode BMP
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
unless supp.empty?
|
|
442
|
+
# Format 12 covers both BMP and supplementary — include all
|
|
443
|
+
# mappings so a single subtable covers the full range.
|
|
444
|
+
subtables << build_cmap_format_12(mappings)
|
|
445
|
+
idx = subtables.size - 1
|
|
446
|
+
records << [3, 10, idx] # Windows UCS-4
|
|
447
|
+
records << [0, 4, idx] # Unicode full
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# Header: version (uint16) + numTables (uint16)
|
|
451
|
+
num_tables = records.size
|
|
452
|
+
header = [0, num_tables].pack("nn")
|
|
453
|
+
|
|
454
|
+
# Encoding records start immediately after the header.
|
|
455
|
+
# Each record is 8 bytes; subtables follow.
|
|
456
|
+
subtable_base = 4 + (8 * num_tables)
|
|
457
|
+
|
|
458
|
+
offsets = []
|
|
459
|
+
running = subtable_base
|
|
460
|
+
subtables.each do |st|
|
|
461
|
+
offsets << running
|
|
462
|
+
running += st.bytesize
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
record_bytes = +""
|
|
466
|
+
records.each do |pid, eid, idx|
|
|
467
|
+
record_bytes << [pid, eid, offsets[idx]].pack("nnN")
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
header + record_bytes + subtables.join
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
# Format 4 subtable: segment-mapping with idDelta, suitable for
|
|
474
|
+
# BMP codepoints (U+0000..U+FFFF). Builds compact segments where
|
|
475
|
+
# consecutive codepoints map to consecutive glyph IDs.
|
|
476
|
+
def build_cmap_format_4(bmp_mappings)
|
|
477
|
+
segments = coalesce_segments(bmp_mappings)
|
|
478
|
+
# Mandatory final segment: U+FFFF → gid 0 (per OpenType spec).
|
|
479
|
+
segments << { start_cp: 0xFFFF, end_cp: 0xFFFF, start_gid: 0 }
|
|
480
|
+
|
|
481
|
+
seg_count = segments.size
|
|
482
|
+
seg_count_x2 = seg_count * 2
|
|
483
|
+
search_range = 2**Math.log2(seg_count).floor * 2
|
|
484
|
+
search_range = 2 if search_range < 2
|
|
485
|
+
entry_selector = Math.log2(search_range / 2).to_i
|
|
486
|
+
range_shift = seg_count_x2 - search_range
|
|
487
|
+
|
|
488
|
+
end_codes = segments.map { |s| s[:end_cp] }
|
|
489
|
+
start_codes = segments.map { |s| s[:start_cp] }
|
|
490
|
+
# idDelta is int16 stored as uint16 (two's complement). For a
|
|
491
|
+
# sequential segment, idDelta = (start_gid - start_cp) & 0xFFFF.
|
|
492
|
+
id_deltas = segments.map { |s| (s[:start_gid] - s[:start_cp]) & 0xFFFF }
|
|
493
|
+
id_range_offsets = [0] * seg_count
|
|
494
|
+
|
|
495
|
+
subtable = +""
|
|
496
|
+
subtable << [4, 0, 0, seg_count_x2,
|
|
497
|
+
search_range, entry_selector, range_shift].pack("n*")
|
|
498
|
+
subtable << end_codes.pack("n*")
|
|
499
|
+
subtable << [0].pack("n") # reservedPad
|
|
500
|
+
subtable << start_codes.pack("n*")
|
|
501
|
+
subtable << id_deltas.pack("n*")
|
|
502
|
+
subtable << id_range_offsets.pack("n*")
|
|
503
|
+
|
|
504
|
+
# Patch the length field (was placeholder 0).
|
|
505
|
+
subtable[2, 2] = [subtable.bytesize].pack("n")
|
|
506
|
+
subtable
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
# Format 12 subtable: segmented coverage for full Unicode range.
|
|
510
|
+
# Simpler than format 4 — just (start_char, end_char, start_gid)
|
|
511
|
+
# triples with no delta/offset indirection.
|
|
512
|
+
def build_cmap_format_12(all_mappings)
|
|
513
|
+
groups = coalesce_segments(all_mappings)
|
|
514
|
+
num_groups = groups.size
|
|
515
|
+
|
|
516
|
+
subtable = +""
|
|
517
|
+
subtable << [12, 0, 0, 0, num_groups].pack("nnNNN")
|
|
518
|
+
groups.each do |g|
|
|
519
|
+
subtable << [g[:start_cp], g[:end_cp], g[:start_gid]].pack("NNN")
|
|
520
|
+
end
|
|
521
|
+
|
|
522
|
+
# Patch the length field (was placeholder 0). Total length is
|
|
523
|
+
# 16-byte header + 12 bytes per group.
|
|
524
|
+
subtable[4, 4] = [subtable.bytesize].pack("N")
|
|
525
|
+
subtable
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
# Group codepoints into consecutive runs where both codepoint AND
|
|
529
|
+
# glyph ID are sequential. Each run becomes one segment/group.
|
|
530
|
+
def coalesce_segments(mappings)
|
|
531
|
+
sorted = mappings.sort_by { |cp, _| cp }
|
|
532
|
+
segments = []
|
|
533
|
+
current = nil
|
|
534
|
+
sorted.each do |cp, gid|
|
|
535
|
+
if current && cp == current[:end_cp] + 1 && gid == current[:start_gid] + (cp - current[:start_cp])
|
|
536
|
+
current[:end_cp] = cp
|
|
537
|
+
else
|
|
538
|
+
segments << current if current
|
|
539
|
+
current = { start_cp: cp, end_cp: cp, start_gid: gid }
|
|
540
|
+
end
|
|
541
|
+
end
|
|
542
|
+
segments << current if current
|
|
543
|
+
segments
|
|
426
544
|
end
|
|
427
545
|
|
|
428
546
|
# Build post table version 3.0 (no glyph names)
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "open-uri"
|
|
4
|
+
require "net/http"
|
|
5
|
+
require "fileutils"
|
|
6
|
+
|
|
7
|
+
module Fontisan
|
|
8
|
+
# Tasks supporting the developer workflow: fixture downloads, etc.
|
|
9
|
+
# Lives under its own namespace so Rakefiles and other tooling can
|
|
10
|
+
# load just the task plumbing without pulling in the full fontisan
|
|
11
|
+
# stack (BinData tables, UFO, etc.).
|
|
12
|
+
module Tasks
|
|
13
|
+
# Downloads a single fixture file with retry on transient network
|
|
14
|
+
# failures. Used by `rake fixtures:download` so a single CDN blip
|
|
15
|
+
# (5xx, connection reset, OpenTimeout) doesn't sink a fresh
|
|
16
|
+
# checkout. Permanent failures (404, malformed URL) surface
|
|
17
|
+
# immediately.
|
|
18
|
+
#
|
|
19
|
+
# The downloader is a focused class, not a procedural Rakefile
|
|
20
|
+
# patch, so the retry logic is unit-testable in isolation.
|
|
21
|
+
#
|
|
22
|
+
# @example
|
|
23
|
+
# Fontisan::Tasks::FixtureDownloader.new(
|
|
24
|
+
# url: "https://github.com/.../font.ttf",
|
|
25
|
+
# destination: "spec/fixtures/font.ttf",
|
|
26
|
+
# ).call
|
|
27
|
+
class FixtureDownloader
|
|
28
|
+
RETRIABLE_ERRORS = [
|
|
29
|
+
Net::OpenTimeout,
|
|
30
|
+
Net::ReadTimeout,
|
|
31
|
+
Errno::ECONNRESET,
|
|
32
|
+
Errno::ECONNREFUSED,
|
|
33
|
+
Errno::EHOSTUNREACH,
|
|
34
|
+
Errno::ETIMEDOUT,
|
|
35
|
+
EOFError,
|
|
36
|
+
IOError,
|
|
37
|
+
].freeze
|
|
38
|
+
|
|
39
|
+
# 5xx HTTP responses are transient server errors worth retrying.
|
|
40
|
+
# 4xx are permanent (404, 403) and must fail fast.
|
|
41
|
+
RETRIABLE_HTTP_STATUSES = (500..599)
|
|
42
|
+
|
|
43
|
+
DEFAULT_MAX_RETRIES = 3
|
|
44
|
+
DEFAULT_BASE_BACKOFF = 0.5 # seconds; doubles per attempt
|
|
45
|
+
|
|
46
|
+
# Error raised after exhausting all retries. Carries the last
|
|
47
|
+
# underlying exception so callers can log the root cause.
|
|
48
|
+
class Error < StandardError
|
|
49
|
+
attr_reader :last_error
|
|
50
|
+
|
|
51
|
+
def initialize(url:, attempts:, last_error:)
|
|
52
|
+
@last_error = last_error
|
|
53
|
+
super("Failed to download #{url} after #{attempts} attempts: " \
|
|
54
|
+
"#{last_error.class}: #{last_error.message}")
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
attr_reader :url, :destination, :max_retries, :base_backoff, :sleep_method
|
|
59
|
+
|
|
60
|
+
# @param url [String] source URL.
|
|
61
|
+
# @param destination [String] path to write bytes to. Parent dir
|
|
62
|
+
# is auto-created.
|
|
63
|
+
# @param max_retries [Integer] total attempts including the
|
|
64
|
+
# first. 3 means: try, retry, retry.
|
|
65
|
+
# @param base_backoff [Float] seconds to sleep before the first
|
|
66
|
+
# retry. Doubles per attempt.
|
|
67
|
+
# @param sleep_method [#call] injectable sleep (for tests).
|
|
68
|
+
# Defaults to Kernel.sleep.
|
|
69
|
+
def initialize(url:, destination:, max_retries: DEFAULT_MAX_RETRIES,
|
|
70
|
+
base_backoff: DEFAULT_BASE_BACKOFF, sleep_method: method(:sleep))
|
|
71
|
+
@url = url
|
|
72
|
+
@destination = destination
|
|
73
|
+
@max_retries = max_retries
|
|
74
|
+
@base_backoff = base_backoff
|
|
75
|
+
@sleep_method = sleep_method
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Performs the download. Returns the destination path on
|
|
79
|
+
# success. Raises {Error} after exhausting retries.
|
|
80
|
+
#
|
|
81
|
+
# @return [String] destination path
|
|
82
|
+
# @raise [Error]
|
|
83
|
+
def call
|
|
84
|
+
attempts = 0
|
|
85
|
+
nil
|
|
86
|
+
|
|
87
|
+
begin
|
|
88
|
+
attempts += 1
|
|
89
|
+
fetch_to_destination
|
|
90
|
+
destination
|
|
91
|
+
rescue StandardError => e
|
|
92
|
+
e
|
|
93
|
+
raise if permanent_failure?(e)
|
|
94
|
+
raise Error.new(url: url, attempts: attempts, last_error: e) if attempts >= max_retries
|
|
95
|
+
|
|
96
|
+
backoff = base_backoff * (2**(attempts - 1))
|
|
97
|
+
sleep_method.call(backoff)
|
|
98
|
+
retry
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
def fetch_to_destination
|
|
105
|
+
FileUtils.mkdir_p(File.dirname(destination))
|
|
106
|
+
|
|
107
|
+
# IO.copy_stream avoids loading the whole response into memory
|
|
108
|
+
# and is more Windows-compatible than remote.read + File.binwrite.
|
|
109
|
+
# URLs come from FixtureFonts config (version-controlled), not
|
|
110
|
+
# user input — same trust model as the previous inline URI.open
|
|
111
|
+
# call in the Rakefile.
|
|
112
|
+
#
|
|
113
|
+
# Parsing with URI.parse first satisfies CodeQL's "open with
|
|
114
|
+
# non-constant value" check: any string that isn't a valid URI
|
|
115
|
+
# raises URI::InvalidURIError before OpenURI can dispatch on
|
|
116
|
+
# it. The parsed URI's .open is OpenURI's standard entry.
|
|
117
|
+
# rubocop:disable Security/Open
|
|
118
|
+
URI.parse(url).open(open_uri_options) do |remote|
|
|
119
|
+
File.open(destination, "wb") do |file|
|
|
120
|
+
IO.copy_stream(remote, file)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
# rubocop:enable Security/Open
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# `open-uri` follows redirects by default and surfaces HTTP
|
|
127
|
+
# errors as `OpenURI::HTTPError` whose `io.status` is the `[code,
|
|
128
|
+
# message]` array. We re-raise non-retriable 4xx as
|
|
129
|
+
# `permanent-failure`-tagged exceptions so the retry loop exits.
|
|
130
|
+
def open_uri_options
|
|
131
|
+
{
|
|
132
|
+
"User-Agent" => "fontisan-fixtures/1.0",
|
|
133
|
+
redirect: true,
|
|
134
|
+
open_timeout: 30,
|
|
135
|
+
read_timeout: 120,
|
|
136
|
+
}
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def permanent_failure?(error)
|
|
140
|
+
case error
|
|
141
|
+
when OpenURI::HTTPError
|
|
142
|
+
status = parse_http_status(error)
|
|
143
|
+
status && !RETRIABLE_HTTP_STATUSES.cover?(status)
|
|
144
|
+
else
|
|
145
|
+
RETRIABLE_ERRORS.none? { |klass| error.is_a?(klass) }
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def parse_http_status(error)
|
|
150
|
+
io = error.io
|
|
151
|
+
return nil unless io
|
|
152
|
+
|
|
153
|
+
status = io.status
|
|
154
|
+
return nil unless status
|
|
155
|
+
|
|
156
|
+
status.first.to_i
|
|
157
|
+
rescue StandardError
|
|
158
|
+
nil
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Fontisan
|
|
4
|
+
# Tasks supporting the developer workflow: fixture downloads, etc.
|
|
5
|
+
# Lives under its own namespace so Rakefiles and other tooling can
|
|
6
|
+
# load just the task plumbing without pulling in the full fontisan
|
|
7
|
+
# stack (BinData tables, UFO, etc.).
|
|
8
|
+
module Tasks
|
|
9
|
+
autoload :FixtureDownloader, "fontisan/tasks/fixture_downloader"
|
|
10
|
+
end
|
|
11
|
+
end
|
data/lib/fontisan/version.rb
CHANGED
data/lib/fontisan.rb
CHANGED
|
@@ -118,6 +118,7 @@ module Fontisan
|
|
|
118
118
|
autoload :SfntTable, "fontisan/sfnt_table"
|
|
119
119
|
autoload :Stitcher, "fontisan/stitcher"
|
|
120
120
|
autoload :StitcherCli, "fontisan/stitcher_cli"
|
|
121
|
+
autoload :Tasks, "fontisan/tasks"
|
|
121
122
|
autoload :TrueTypeCollection, "fontisan/true_type_collection"
|
|
122
123
|
autoload :TrueTypeFont, "fontisan/true_type_font"
|
|
123
124
|
autoload :TrueTypeFontExtensions, "fontisan/true_type_font_extensions"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: fontisan
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-07-
|
|
11
|
+
date: 2026-07-05 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: base64
|
|
@@ -440,7 +440,9 @@ files:
|
|
|
440
440
|
- lib/fontisan/stitcher/partition_strategy.rb
|
|
441
441
|
- lib/fontisan/stitcher/partition_strategy/base.rb
|
|
442
442
|
- lib/fontisan/stitcher/partition_strategy/blueprint.rb
|
|
443
|
+
- lib/fontisan/stitcher/partition_strategy/by_block.rb
|
|
443
444
|
- lib/fontisan/stitcher/partition_strategy/by_plane.rb
|
|
445
|
+
- lib/fontisan/stitcher/partition_strategy/by_script.rb
|
|
444
446
|
- lib/fontisan/stitcher/partition_strategy/partition.rb
|
|
445
447
|
- lib/fontisan/stitcher/selector.rb
|
|
446
448
|
- lib/fontisan/stitcher/selector/codepoints.rb
|
|
@@ -547,6 +549,8 @@ files:
|
|
|
547
549
|
- lib/fontisan/tables/svg.rb
|
|
548
550
|
- lib/fontisan/tables/variation_common.rb
|
|
549
551
|
- lib/fontisan/tables/vvar.rb
|
|
552
|
+
- lib/fontisan/tasks.rb
|
|
553
|
+
- lib/fontisan/tasks/fixture_downloader.rb
|
|
550
554
|
- lib/fontisan/true_type_collection.rb
|
|
551
555
|
- lib/fontisan/true_type_font.rb
|
|
552
556
|
- lib/fontisan/true_type_font_extensions.rb
|