ucode 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/unicode17_universal_glyph_set.yml +1 -1
- data/lib/ucode/cli.rb +12 -0
- data/lib/ucode/commands/emit_metadata.rb +62 -0
- data/lib/ucode/commands.rb +1 -0
- data/lib/ucode/error.rb +5 -0
- data/lib/ucode/unicode/block.rb +28 -0
- data/lib/ucode/unicode/catalog.rb +135 -0
- data/lib/ucode/unicode/metadata/v15_0_0.rb +354 -0
- data/lib/ucode/unicode/metadata/v15_1_0.rb +355 -0
- data/lib/ucode/unicode/metadata/v16_0_0.rb +365 -0
- data/lib/ucode/unicode/metadata/v17_0_0.rb +374 -0
- data/lib/ucode/unicode/metadata_writer.rb +136 -0
- data/lib/ucode/unicode/plane.rb +23 -0
- data/lib/ucode/unicode.rb +85 -0
- data/lib/ucode/version.rb +1 -1
- data/lib/ucode.rb +2 -0
- metadata +11 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 39b510b33a2215d748cbf04e9794149209ae500fae5e9fdce455703c08b3bc7c
|
|
4
|
+
data.tar.gz: 7a4c6ac03258db64ac786c5fd324d5232205ca8cda18865fbc083e55eb091709
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7a03ec1efe4bb8396e3b7405cafacbc674fdb6efe4cfa01c9b41e621a30a823893af287eb73c6d7b1fce450d302700b5a7147824116ecd4c8b2f5544ad30fa3f
|
|
7
|
+
data.tar.gz: e68b9a8a08ad174b4b38f8ab03c73a32e1fb16a37caff25164a8ea91f160c25a8e1272e288c00993396b3db5d24b4d8551dd5ef0ea3526e8c878dbc98550a5f0
|
data/lib/ucode/cli.rb
CHANGED
|
@@ -651,6 +651,18 @@ module Ucode
|
|
|
651
651
|
puts JSON.pretty_generate(result.to_h)
|
|
652
652
|
end
|
|
653
653
|
|
|
654
|
+
desc "emit-metadata [VERSION]", "Generate frozen Ruby metadata module from UCD data"
|
|
655
|
+
option :gem_root, type: :string, default: nil,
|
|
656
|
+
desc: "Gem root for output path (default: auto-detect)"
|
|
657
|
+
def emit_metadata(version = nil)
|
|
658
|
+
version_str = VersionResolver.resolve(version)
|
|
659
|
+
result = Commands::EmitMetadataCommand.new.call(
|
|
660
|
+
version_str,
|
|
661
|
+
gem_root: options[:gem_root],
|
|
662
|
+
)
|
|
663
|
+
puts JSON.pretty_generate(result)
|
|
664
|
+
end
|
|
665
|
+
|
|
654
666
|
private
|
|
655
667
|
|
|
656
668
|
def result_to_h(result)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Commands
|
|
7
|
+
# `ucode emit-metadata` — generates frozen Ruby metadata modules
|
|
8
|
+
# from cached UCD text files.
|
|
9
|
+
#
|
|
10
|
+
# Run after `ucode fetch ucd <version>` to produce the metadata
|
|
11
|
+
# module that ships with the gem. The output is written to
|
|
12
|
+
# `lib/ucode/unicode/metadata/<vXX_Y_Z>.rb` and must be committed.
|
|
13
|
+
class EmitMetadataCommand
|
|
14
|
+
# @param version [String] e.g. "17.0.0"
|
|
15
|
+
# @param gem_root [String, Pathname, nil] gem root for output path
|
|
16
|
+
# resolution; defaults to the conventional location.
|
|
17
|
+
# @return [Hash] { version:, path:, bytes:, blocks:, assigned_count: }
|
|
18
|
+
def call(version, gem_root: nil)
|
|
19
|
+
ucd_dir = Cache.ucd_dir(version)
|
|
20
|
+
raise Ucode::Error, "UCD not cached for #{version}. Run: ucode fetch ucd #{version}" unless ucd_dir.exist?
|
|
21
|
+
|
|
22
|
+
source = Ucode::Unicode::MetadataWriter.generate(
|
|
23
|
+
ucd_dir: ucd_dir, version: version,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
out_path = resolve_output_path(version, gem_root)
|
|
27
|
+
write_atomic(out_path, source)
|
|
28
|
+
|
|
29
|
+
metadata = Ucode::Unicode::MetadataWriter
|
|
30
|
+
metadata.version_to_module(version)
|
|
31
|
+
{
|
|
32
|
+
version: version,
|
|
33
|
+
path: out_path.to_s,
|
|
34
|
+
bytes: source.bytesize,
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def resolve_output_path(version, gem_root)
|
|
41
|
+
filename = Ucode::Unicode::MetadataWriter.version_to_filename(version)
|
|
42
|
+
base = gem_root ? Pathname.new(gem_root) : default_gem_root
|
|
43
|
+
dir = base.join("lib", "ucode", "unicode", "metadata")
|
|
44
|
+
dir.mkpath unless dir.exist?
|
|
45
|
+
dir.join("#{filename}.rb")
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def default_gem_root
|
|
49
|
+
Pathname.new(__dir__).join("..", "..", "..")
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def write_atomic(path, content)
|
|
53
|
+
return if path.exist? && path.read == content
|
|
54
|
+
|
|
55
|
+
path.dirname.mkpath
|
|
56
|
+
tmp = path.sub_ext(".rb.tmp")
|
|
57
|
+
tmp.write(content)
|
|
58
|
+
tmp.rename(path.to_s)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
data/lib/ucode/commands.rb
CHANGED
|
@@ -13,6 +13,7 @@ module Ucode
|
|
|
13
13
|
autoload :LookupCommand, "ucode/commands/lookup"
|
|
14
14
|
autoload :CacheCommand, "ucode/commands/cache"
|
|
15
15
|
autoload :BuildCommand, "ucode/commands/build"
|
|
16
|
+
autoload :EmitMetadataCommand, "ucode/commands/emit_metadata"
|
|
16
17
|
autoload :CanonicalBuildCommand, "ucode/commands/canonical_build"
|
|
17
18
|
autoload :FontCoverageCommand, "ucode/commands/font_coverage"
|
|
18
19
|
autoload :UniversalSet, "ucode/commands/universal_set"
|
data/lib/ucode/error.rb
CHANGED
|
@@ -124,4 +124,9 @@ module Ucode
|
|
|
124
124
|
# just PDFs: source config schema, font file presence, coverage
|
|
125
125
|
# assertion.
|
|
126
126
|
class UniversalSetPreBuildError < GlyphError; end
|
|
127
|
+
|
|
128
|
+
# The requested Unicode version has no metadata module shipped with
|
|
129
|
+
# this gem. The context carries the requested version and the list of
|
|
130
|
+
# supported versions.
|
|
131
|
+
class UnknownUnicodeVersionError < LookupError; end
|
|
127
132
|
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Unicode
|
|
5
|
+
# A Unicode block — a contiguous range of codepoints with a name.
|
|
6
|
+
# There are ~346 blocks in Unicode 17.0.0.
|
|
7
|
+
#
|
|
8
|
+
# Pure value object like {Plane}. The +id+ field uses the underscore
|
|
9
|
+
# form (e.g., +"Basic_Latin"+) for filesystem/JSON key compatibility;
|
|
10
|
+
# the +name+ field preserves the original Unicode spelling.
|
|
11
|
+
Block = Struct.new(
|
|
12
|
+
:id,
|
|
13
|
+
:name,
|
|
14
|
+
:first_cp,
|
|
15
|
+
:last_cp,
|
|
16
|
+
:plane_number,
|
|
17
|
+
keyword_init: true,
|
|
18
|
+
) do
|
|
19
|
+
def range
|
|
20
|
+
(first_cp..last_cp)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def cover?(codepoint)
|
|
24
|
+
range.cover?(codepoint)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Unicode
|
|
5
|
+
# Version-specific query interface for Unicode metadata.
|
|
6
|
+
#
|
|
7
|
+
# Deep module: small interface (10 public methods), large frozen
|
|
8
|
+
# dataset behind it (~346 blocks + 17 planes + counts). Constructed
|
|
9
|
+
# once per version; all lookups are O(1) or O(log N).
|
|
10
|
+
#
|
|
11
|
+
# Thread-safe: all internal structures are frozen at construction.
|
|
12
|
+
# No mutation after +initialize+. No locks needed.
|
|
13
|
+
#
|
|
14
|
+
# Construct via {Unicode.for_version} — do not call +new+ directly
|
|
15
|
+
# unless you have a pre-normalized version string.
|
|
16
|
+
class Catalog
|
|
17
|
+
attr_reader :version
|
|
18
|
+
|
|
19
|
+
def initialize(version:)
|
|
20
|
+
@version = version
|
|
21
|
+
metadata = load_metadata(version)
|
|
22
|
+
build_indexes(metadata)
|
|
23
|
+
freeze
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def assigned_count
|
|
27
|
+
@assigned_count
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def assigned_in_plane(plane_number)
|
|
31
|
+
@assigned_by_plane[plane_number] || 0
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def find_plane(plane_number)
|
|
35
|
+
@planes_by_number[plane_number]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def find_plane_by_codepoint(codepoint)
|
|
39
|
+
find_plane(codepoint >> 16)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def find_block(block_id)
|
|
43
|
+
@blocks_by_id[block_id]
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def find_block_by_codepoint(codepoint)
|
|
47
|
+
idx = @block_ranges.bsearch_index do |(_first, last, _block)|
|
|
48
|
+
if codepoint < _first
|
|
49
|
+
-1
|
|
50
|
+
elsif codepoint > last
|
|
51
|
+
1
|
|
52
|
+
else
|
|
53
|
+
0
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
idx.nil? ? nil : @block_ranges[idx][2]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def blocks_in_plane(plane_number)
|
|
60
|
+
@blocks_by_plane[plane_number] || EMPTY_BLOCKS
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def all_blocks
|
|
64
|
+
@all_blocks
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def all_planes
|
|
68
|
+
@all_planes
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
private
|
|
72
|
+
|
|
73
|
+
EMPTY_BLOCKS = [].freeze
|
|
74
|
+
private_constant :EMPTY_BLOCKS
|
|
75
|
+
|
|
76
|
+
def load_metadata(version)
|
|
77
|
+
module_name = "V#{version.tr('.', '_')}"
|
|
78
|
+
Metadata.const_get(module_name)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def build_indexes(metadata)
|
|
82
|
+
@assigned_count = metadata::ASSIGNED_COUNT
|
|
83
|
+
@assigned_by_plane = metadata::ASSIGNED_BY_PLANE.freeze
|
|
84
|
+
|
|
85
|
+
build_plane_indexes(metadata)
|
|
86
|
+
build_block_indexes(metadata)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def build_plane_indexes(_metadata)
|
|
90
|
+
@planes_by_number = {}
|
|
91
|
+
@all_planes = []
|
|
92
|
+
17.times do |n|
|
|
93
|
+
names = PLANE_NAMES[n] || { short_name: nil, display_name: "Plane #{n}" }
|
|
94
|
+
plane = Plane.new(
|
|
95
|
+
number: n,
|
|
96
|
+
range: (n << 16)..((n << 16) | 0xFFFF),
|
|
97
|
+
short_name: names[:short_name],
|
|
98
|
+
display_name: names[:display_name],
|
|
99
|
+
assigned_count: @assigned_by_plane[n] || 0,
|
|
100
|
+
).freeze
|
|
101
|
+
@planes_by_number[n] = plane
|
|
102
|
+
@all_planes << plane
|
|
103
|
+
end
|
|
104
|
+
@all_planes.freeze
|
|
105
|
+
@planes_by_number.freeze
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def build_block_indexes(metadata)
|
|
109
|
+
@blocks_by_id = {}
|
|
110
|
+
blocks_by_plane_temp = Hash.new { |h, k| h[k] = [] }
|
|
111
|
+
@block_ranges = []
|
|
112
|
+
@all_blocks = []
|
|
113
|
+
|
|
114
|
+
metadata::BLOCKS.each do |entry|
|
|
115
|
+
block = Block.new(
|
|
116
|
+
id: entry[:id],
|
|
117
|
+
name: entry[:name],
|
|
118
|
+
first_cp: entry[:first_cp],
|
|
119
|
+
last_cp: entry[:last_cp],
|
|
120
|
+
plane_number: entry[:first_cp] >> 16,
|
|
121
|
+
).freeze
|
|
122
|
+
@blocks_by_id[block.id] = block
|
|
123
|
+
blocks_by_plane_temp[block.plane_number] << block
|
|
124
|
+
@block_ranges << [block.first_cp, block.last_cp, block]
|
|
125
|
+
@all_blocks << block
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
@blocks_by_id.freeze
|
|
129
|
+
@blocks_by_plane = blocks_by_plane_temp.transform_values(&:freeze).freeze
|
|
130
|
+
@block_ranges.sort_by!(&:first).freeze
|
|
131
|
+
@all_blocks.freeze
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|