ucode 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +10 -2
- data/README.md +66 -20
- data/Rakefile +19 -8
- data/config/unicode17_universal_glyph_set.yml +1 -1
- data/lib/ucode/audit/reference_factory.rb +1 -1
- data/lib/ucode/cli.rb +9 -7
- data/lib/ucode/code_chart/extractor.rb +3 -5
- data/lib/ucode/code_chart/provenance.rb +6 -5
- data/lib/ucode/code_chart/sidecar.rb +2 -2
- data/lib/ucode/code_chart/writer.rb +1 -1
- data/lib/ucode/code_chart.rb +1 -1
- data/lib/ucode/commands/fetch.rb +1 -1
- data/lib/ucode/commands/glyphs.rb +1 -1
- data/lib/ucode/commands/lookup.rb +1 -1
- data/lib/ucode/commands/parse.rb +1 -1
- data/lib/ucode/coordinator/indices.rb +2 -2
- data/lib/ucode/fetch/code_charts.rb +2 -3
- data/lib/ucode/fetch/http.rb +12 -14
- data/lib/ucode/glyphs/page_renderer.rb +15 -2
- data/lib/ucode/glyphs/pipeline.rb +1 -2
- data/lib/ucode/repo/aggregate_writer.rb +1 -1
- data/lib/ucode/repo/writers/blocks_writer.rb +13 -13
- data/lib/ucode/repo/writers/enums_writer.rb +2 -2
- data/lib/ucode/repo/writers/indexes_writer.rb +4 -4
- data/lib/ucode/repo/writers/manifest_writer.rb +4 -4
- data/lib/ucode/repo/writers/named_sequences_writer.rb +1 -1
- data/lib/ucode/repo/writers/planes_writer.rb +17 -17
- data/lib/ucode/repo/writers/relationships_writer.rb +1 -1
- data/lib/ucode/repo/writers/scripts_writer.rb +6 -6
- data/lib/ucode/repo/writers.rb +1 -1
- data/lib/ucode/version.rb +1 -1
- data/ucode.gemspec +6 -1
- metadata +6 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e2e60f073662cc78885f8ee6a0333a2307a8b894ddb9789b1e206f6ae63d25e3
|
|
4
|
+
data.tar.gz: 50b3984de26589d0aab193250e9c8dd3f56fd0d10a4a77a21ffdcb236c38c737
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 88d9ef3df0f99af9b3897cd429666092d48a373d9c60d3c83a6e06a59404e0523c736a8d971fbb6d6d29c9b4b80d9610cad54a64cda439326e19a80670d67ba9
|
|
7
|
+
data.tar.gz: 8146f6984defddb5b45204fe7a8852cd3051bf1474584eddc7489e5ec9fd4900d48452ac5462a1c6c2ac9b8213823ace68cebe6c5b5176dd0c25c922f8f1d4ba
|
data/Gemfile
CHANGED
|
@@ -4,8 +4,13 @@ source "https://rubygems.org"
|
|
|
4
4
|
|
|
5
5
|
gemspec
|
|
6
6
|
|
|
7
|
+
# Rake must be in the default group (not :development) because the
|
|
8
|
+
# GHA release workflow runs `bundle exec rake release` to publish the
|
|
9
|
+
# gem. The release runner installs with `--without development`, so
|
|
10
|
+
# gems in the :development group are excluded.
|
|
11
|
+
gem "rake"
|
|
12
|
+
|
|
7
13
|
group :development do
|
|
8
|
-
gem "rake"
|
|
9
14
|
gem "rspec"
|
|
10
15
|
gem "rubocop"
|
|
11
16
|
gem "rubocop-performance"
|
|
@@ -19,4 +24,7 @@ end
|
|
|
19
24
|
# local sibling checkout, set FONTISAN_PATH before running bundle.
|
|
20
25
|
# FONTISAN_PATH=../fontisan bundle install
|
|
21
26
|
gem "fontisan", path: ENV["FONTISAN_PATH"] if ENV["FONTISAN_PATH"]
|
|
22
|
-
|
|
27
|
+
# Pin fontisan to 0.2.22 — 0.2.23+ removed
|
|
28
|
+
# `Fontisan::Commands::AuditCommand` and 0.4.x removed the Audit
|
|
29
|
+
# subsystem entirely. See ucode.gemspec for the rationale.
|
|
30
|
+
gem "fontisan", "= 0.2.22" unless ENV["FONTISAN_PATH"]
|
data/README.md
CHANGED
|
@@ -2,14 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
`ucode` is a Ruby toolkit for the Unicode Character Database (UCD). It turns the
|
|
4
4
|
official UCD text files into a structured, browsable dataset: one JSON document
|
|
5
|
-
per assigned codepoint, plus a Vitepress site for navigation.
|
|
5
|
+
per assigned codepoint, plus a Vitepress site for navigation. It also extracts
|
|
6
|
+
per-codepoint SVG glyphs from the Unicode Code Charts PDFs and audits font
|
|
7
|
+
coverage against the Unicode baseline.
|
|
6
8
|
|
|
7
|
-
> **Status (v0.1).** The JSON dataset, lookup index,
|
|
8
|
-
>
|
|
9
|
-
>
|
|
10
|
-
>
|
|
9
|
+
> **Status (v0.2.1).** The JSON dataset, lookup index, Vitepress site, and
|
|
10
|
+
> 4-tier glyph extraction pipeline are production-ready. The `ucode code-chart`
|
|
11
|
+
> subcommand extracts standalone SVGs + provenance sidecars from per-block
|
|
12
|
+
> Code Charts PDFs. Font coverage auditing (`ucode audit`) is production-ready.
|
|
11
13
|
|
|
12
|
-
## What you get (v0.
|
|
14
|
+
## What you get (v0.2)
|
|
13
15
|
|
|
14
16
|
- **Per-codepoint JSON** at `output/blocks/<BLOCK>/<U+XXXX>/index.json` with
|
|
15
17
|
full UCD properties, the human-curated relationships from `NamesList.txt`
|
|
@@ -21,6 +23,14 @@ per assigned codepoint, plus a Vitepress site for navigation.
|
|
|
21
23
|
relationships, named sequences, manifest.
|
|
22
24
|
- **SQLite lookup index** for fast codepoint → block/script/char queries.
|
|
23
25
|
- **Vitepress site** at `site/` for browsing Plane → Block → Character.
|
|
26
|
+
- **4-tier glyph extraction** — per-codepoint `glyph.svg` sourced from real
|
|
27
|
+
fonts (Tier 1), PDF-embedded fonts (Pillars 1+2), or Last Resort UFO
|
|
28
|
+
(Pillar 3).
|
|
29
|
+
- **Per-block Code Chart extraction** — `ucode code-chart extract` produces
|
|
30
|
+
standalone SVG + provenance JSON for every codepoint in a block.
|
|
31
|
+
- **Font coverage audit** — `ucode audit` compares a font's cmap against the
|
|
32
|
+
Unicode baseline and reports per-block coverage, missing codepoints, and
|
|
33
|
+
optional HTML browsers.
|
|
24
34
|
|
|
25
35
|
## Install
|
|
26
36
|
|
|
@@ -31,7 +41,7 @@ gem install ucode
|
|
|
31
41
|
Or in a Gemfile:
|
|
32
42
|
|
|
33
43
|
```ruby
|
|
34
|
-
gem "ucode", "~> 0.
|
|
44
|
+
gem "ucode", "~> 0.2"
|
|
35
45
|
```
|
|
36
46
|
|
|
37
47
|
## Quick start
|
|
@@ -107,7 +117,39 @@ Then:
|
|
|
107
117
|
cd site && npm install && npm run dev
|
|
108
118
|
```
|
|
109
119
|
|
|
110
|
-
##
|
|
120
|
+
## Code Chart extraction
|
|
121
|
+
|
|
122
|
+
Extract per-codepoint SVG glyphs from a Unicode Code Charts PDF, with
|
|
123
|
+
provenance sidecar JSON:
|
|
124
|
+
|
|
125
|
+
```sh
|
|
126
|
+
# Download the Code Charts PDF for a block
|
|
127
|
+
ucode code-chart fetch --block Sidetic
|
|
128
|
+
|
|
129
|
+
# Extract every codepoint as SVG + provenance JSON
|
|
130
|
+
ucode code-chart extract --block Sidetic --to /tmp/sidetic/
|
|
131
|
+
|
|
132
|
+
# List cached Code Charts PDFs
|
|
133
|
+
ucode code-chart list
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Output layout:
|
|
137
|
+
|
|
138
|
+
```
|
|
139
|
+
/tmp/sidetic/Sidetic/
|
|
140
|
+
U+10920.svg # vector glyph outline
|
|
141
|
+
U+10920.json # provenance: source PDF, sha256, version, timestamp
|
|
142
|
+
U+10921.svg
|
|
143
|
+
U+10921.json
|
|
144
|
+
...
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
The extractor uses the existing 4-tier glyph sourcing pipeline (Tier 1 →
|
|
148
|
+
Pillar 1 → Pillar 2 → Pillar 3). No new extraction logic — the
|
|
149
|
+
`ucode code-chart` subcommand is a thin CLI wrapper over `Ucode::CodeChart::Writer`,
|
|
150
|
+
which orchestrates `Ucode::Glyphs::Resolver` for each codepoint.
|
|
151
|
+
|
|
152
|
+
## Glyph extraction (4-tier pipeline)
|
|
111
153
|
|
|
112
154
|
The `ucode glyphs` command and the `--include-glyphs` flag on `ucode build`
|
|
113
155
|
are **opt-in and experimental in v0.1**. They emit per-codepoint `glyph.svg`
|
|
@@ -436,29 +478,33 @@ from the dataset.
|
|
|
436
478
|
|
|
437
479
|
## Architecture
|
|
438
480
|
|
|
439
|
-
|
|
481
|
+
Seven concerns, each isolated:
|
|
440
482
|
|
|
441
483
|
1. **`Ucode::Models`** — `lutaml-model` classes for every UCD aggregate.
|
|
442
484
|
2. **`Ucode::Parsers`** — one streaming parser per UCD text file.
|
|
443
485
|
3. **`Ucode::Coordinator`** — single-pass enrichment that merges indices
|
|
444
486
|
into each `CodePoint` as it streams.
|
|
445
|
-
4. **`Ucode::Repo`** — atomic, idempotent writers for the output tree
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
487
|
+
4. **`Ucode::Repo`** — atomic, idempotent writers for the output tree
|
|
488
|
+
(per-concern writers under `Repo::Writers::*`).
|
|
489
|
+
5. **`Ucode::Glyphs`** — 4-tier vector glyph extraction from Code Charts PDFs
|
|
490
|
+
(RealFonts, EmbeddedFonts, LastResort + Resolver).
|
|
491
|
+
6. **`Ucode::CodeChart`** — per-block SVG extraction + provenance sidecar
|
|
492
|
+
for the "Code Chart donor" use case.
|
|
493
|
+
7. **`Ucode::Site`** — Vitepress scaffold + config/page generator.
|
|
449
494
|
|
|
450
495
|
CLI is thin Thor dispatch over `Ucode::Commands::*`. Each command class
|
|
451
|
-
is a pure, in-process testable unit.
|
|
496
|
+
is a pure, in-process testable unit. Version resolution happens once
|
|
497
|
+
per CLI invocation and threads through to all sub-commands.
|
|
452
498
|
|
|
453
|
-
See `
|
|
454
|
-
`docs/
|
|
499
|
+
See `docs/architecture.md` for the canonical architecture reference and
|
|
500
|
+
`docs/adr/` for Architecture Decision Records.
|
|
455
501
|
|
|
456
502
|
## Authoritative source
|
|
457
503
|
|
|
458
|
-
ucode parses the **UCD text files** (
|
|
459
|
-
`
|
|
460
|
-
the human-curated relationship data
|
|
461
|
-
|
|
504
|
+
ucode parses the **UCD text files** (`UnicodeData.txt`, `NamesList.txt`,
|
|
505
|
+
`Blocks.txt`, etc.) per UAX #44. Never uses the flat XML dump — the text
|
|
506
|
+
files carry the human-curated relationship data that makes this project
|
|
507
|
+
valuable.
|
|
462
508
|
|
|
463
509
|
## License
|
|
464
510
|
|
data/Rakefile
CHANGED
|
@@ -4,15 +4,26 @@ require "rubygems"
|
|
|
4
4
|
require "rake"
|
|
5
5
|
require "bundler/gem_tasks"
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
begin
|
|
8
|
+
require "rspec/core/rake_task"
|
|
9
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
10
|
+
rescue LoadError
|
|
11
|
+
# rspec is in the :development group; not available in the
|
|
12
|
+
# release runner (`bundle install --without development`).
|
|
13
|
+
end
|
|
9
14
|
|
|
10
|
-
|
|
11
|
-
|
|
15
|
+
begin
|
|
16
|
+
require "rubocop/rake_task"
|
|
17
|
+
RuboCop::RakeTask.new
|
|
18
|
+
rescue LoadError
|
|
19
|
+
end
|
|
12
20
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
21
|
+
begin
|
|
22
|
+
require "yard"
|
|
23
|
+
YARD::Rake::YardocTask.new do |t|
|
|
24
|
+
t.options = ["--output-dir", "docs/api"]
|
|
25
|
+
end
|
|
26
|
+
rescue LoadError
|
|
16
27
|
end
|
|
17
28
|
|
|
18
|
-
task default: %i[spec rubocop]
|
|
29
|
+
task default: %i[spec rubocop]
|
data/lib/ucode/cli.rb
CHANGED
|
@@ -164,7 +164,7 @@ module Ucode
|
|
|
164
164
|
class CodeChartCmd < Thor
|
|
165
165
|
desc "fetch --block BLOCK [VERSION]", "Download the Code Charts PDF for a block"
|
|
166
166
|
option :block, type: :string, required: true,
|
|
167
|
-
|
|
167
|
+
desc: "Block identifier (e.g. Sidetic, Basic_Latin)"
|
|
168
168
|
def fetch(version = nil)
|
|
169
169
|
with_codechart_errors do
|
|
170
170
|
block_first_cp = resolve_block_first_cp!(options[:block], version)
|
|
@@ -179,7 +179,7 @@ module Ucode
|
|
|
179
179
|
desc "extract --block BLOCK --to DIR [VERSION]",
|
|
180
180
|
"Extract per-codepoint SVG + provenance sidecars from a Code Charts PDF"
|
|
181
181
|
option :block, type: :string, required: true,
|
|
182
|
-
|
|
182
|
+
desc: "Block identifier (e.g. Sidetic)"
|
|
183
183
|
option :to, type: :string, required: true,
|
|
184
184
|
desc: "Output directory (will contain <block_id>/<U+XXXX>.svg + .json)"
|
|
185
185
|
def extract(version = nil)
|
|
@@ -193,10 +193,12 @@ module Ucode
|
|
|
193
193
|
|
|
194
194
|
pdf = Ucode::Glyphs::PdfFetcher.new(version_str)
|
|
195
195
|
.fetch(block_first_cp: block_first_cp)
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
196
|
+
unless pdf
|
|
197
|
+
raise Ucode::CodeChartNotFoundError.new(
|
|
198
|
+
"Code Charts PDF unavailable for block #{block.id.inspect}",
|
|
199
|
+
context: { block_id: block.id, version: version_str },
|
|
200
|
+
)
|
|
201
|
+
end
|
|
200
202
|
|
|
201
203
|
writer = Ucode::CodeChart::Writer.new(
|
|
202
204
|
output_root: Pathname.new(options[:to]),
|
|
@@ -218,7 +220,7 @@ module Ucode
|
|
|
218
220
|
return
|
|
219
221
|
end
|
|
220
222
|
files.each do |f|
|
|
221
|
-
puts f.basename
|
|
223
|
+
puts f.basename
|
|
222
224
|
end
|
|
223
225
|
end
|
|
224
226
|
|
|
@@ -93,12 +93,10 @@ module Ucode
|
|
|
93
93
|
# injected, only assigned codepoints (those the embedded
|
|
94
94
|
# font actually covers) yield Results; the rest are silently
|
|
95
95
|
# skipped, satisfying the REQ's "skip unassigned codepoints".
|
|
96
|
-
def each_codepoint
|
|
96
|
+
def each_codepoint(&)
|
|
97
97
|
return enum_for(:each_codepoint) unless block_given?
|
|
98
98
|
|
|
99
|
-
(@block.range_first..@block.range_last).each
|
|
100
|
-
yield cp
|
|
101
|
-
end
|
|
99
|
+
(@block.range_first..@block.range_last).each(&)
|
|
102
100
|
end
|
|
103
101
|
|
|
104
102
|
def build_resolver
|
|
@@ -119,4 +117,4 @@ module Ucode
|
|
|
119
117
|
end
|
|
120
118
|
end
|
|
121
119
|
end
|
|
122
|
-
end
|
|
120
|
+
end
|
|
@@ -33,14 +33,15 @@ module Ucode
|
|
|
33
33
|
|
|
34
34
|
# Computes the source PDF's URL from a block name and first
|
|
35
35
|
# codepoint. Mirrors the per-block URL convention in
|
|
36
|
-
# {Ucode::Fetch::CodeCharts}:
|
|
37
|
-
#
|
|
36
|
+
# {Ucode::Fetch::CodeCharts}: the hex representation of the
|
|
37
|
+
# codepoint, zero-padded to a minimum of 4 digits (e.g.
|
|
38
|
+
# `U0000.pdf` for BMP, `U10920.pdf` for Plane 1,
|
|
39
|
+
# `U100000.pdf` for Plane 16 SPUA-B).
|
|
38
40
|
#
|
|
39
41
|
# @param block_first_cp [Integer]
|
|
40
42
|
# @return [String]
|
|
41
43
|
def self.code_chart_url(block_first_cp)
|
|
42
|
-
|
|
43
|
-
slug = block_first_cp.to_s(16).upcase.rjust(width, "0")
|
|
44
|
+
slug = block_first_cp.to_s(16).upcase.rjust(4, "0")
|
|
44
45
|
"#{Ucode.configuration.charts_base_url}/U#{slug}.pdf"
|
|
45
46
|
end
|
|
46
47
|
|
|
@@ -78,4 +79,4 @@ module Ucode
|
|
|
78
79
|
Digest::SHA256.file(path).hexdigest
|
|
79
80
|
end
|
|
80
81
|
end
|
|
81
|
-
end
|
|
82
|
+
end
|
|
@@ -31,7 +31,7 @@ module Ucode
|
|
|
31
31
|
# @return [Pathname] the written sidecar path
|
|
32
32
|
def write(provenance)
|
|
33
33
|
path = path_for(provenance)
|
|
34
|
-
payload = JSON.pretty_generate(provenance.to_h)
|
|
34
|
+
payload = "#{JSON.pretty_generate(provenance.to_h)}\n"
|
|
35
35
|
write_atomic(path, payload)
|
|
36
36
|
path
|
|
37
37
|
end
|
|
@@ -49,4 +49,4 @@ module Ucode
|
|
|
49
49
|
end
|
|
50
50
|
end
|
|
51
51
|
end
|
|
52
|
-
end
|
|
52
|
+
end
|
data/lib/ucode/code_chart.rb
CHANGED
data/lib/ucode/commands/fetch.rb
CHANGED
data/lib/ucode/commands/parse.rb
CHANGED
|
@@ -69,7 +69,7 @@ module Ucode
|
|
|
69
69
|
# `output/relationships/`
|
|
70
70
|
# @yieldparam records [Hash<Integer|String, Record|Array<Record>>]
|
|
71
71
|
# @return [Enumerator] when no block is given
|
|
72
|
-
def each_relationship(&
|
|
72
|
+
def each_relationship(&)
|
|
73
73
|
return enum_for(:each_relationship) unless block_given?
|
|
74
74
|
|
|
75
75
|
RELATIONSHIPS.each do |slug, field|
|
|
@@ -78,4 +78,4 @@ module Ucode
|
|
|
78
78
|
end
|
|
79
79
|
end
|
|
80
80
|
end
|
|
81
|
-
end
|
|
81
|
+
end
|
|
@@ -6,7 +6,7 @@ module Ucode
|
|
|
6
6
|
#
|
|
7
7
|
# URL pattern: `https://www.unicode.org/charts/PDF/U<XXXX>.pdf`
|
|
8
8
|
# where `XXXX` is the block's first codepoint zero-padded to 4 digits
|
|
9
|
-
# (5
|
|
9
|
+
# (5 digits for planes > 0).
|
|
10
10
|
module CodeCharts
|
|
11
11
|
class << self
|
|
12
12
|
# @param version [String] used as the on-disk path namespace; PDFs
|
|
@@ -48,8 +48,7 @@ module Ucode
|
|
|
48
48
|
private
|
|
49
49
|
|
|
50
50
|
def hex_pad(codepoint)
|
|
51
|
-
|
|
52
|
-
codepoint.to_s(16).upcase.rjust(width, "0")
|
|
51
|
+
codepoint.to_s(16).upcase.rjust(4, "0")
|
|
53
52
|
end
|
|
54
53
|
end
|
|
55
54
|
end
|
data/lib/ucode/fetch/http.rb
CHANGED
|
@@ -44,21 +44,19 @@ module Ucode
|
|
|
44
44
|
|
|
45
45
|
last_error = nil
|
|
46
46
|
(attempts + 1).times do |attempt|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
"
|
|
58
|
-
"#{e.class}: #{e.message}; retrying in #{sleep_for}s"
|
|
59
|
-
end
|
|
60
|
-
sleep(sleep_for)
|
|
47
|
+
response = stream_to(uri, destination, read_timeout)
|
|
48
|
+
validate_response!(validate, response, destination) if validate
|
|
49
|
+
return destination
|
|
50
|
+
rescue ValidationFailure => e
|
|
51
|
+
raise e.cause
|
|
52
|
+
rescue StandardError => e
|
|
53
|
+
last_error = e
|
|
54
|
+
sleep_for = backoff_sequence[attempt] || backoff_sequence.last
|
|
55
|
+
Ucode.configuration.logger&.warn do
|
|
56
|
+
"Http GET #{uri} failed (attempt #{attempt + 1}/#{attempts + 1}): " \
|
|
57
|
+
"#{e.class}: #{e.message}; retrying in #{sleep_for}s"
|
|
61
58
|
end
|
|
59
|
+
sleep(sleep_for)
|
|
62
60
|
end
|
|
63
61
|
|
|
64
62
|
raise Ucode::NetworkError.new(
|
|
@@ -70,9 +70,22 @@ module Ucode
|
|
|
70
70
|
raise NotImplementedError
|
|
71
71
|
end
|
|
72
72
|
|
|
73
|
-
# @return [Boolean] true if the binary is on PATH
|
|
73
|
+
# @return [Boolean] true if the binary is on PATH. Returns
|
|
74
|
+
# false on hosts without `which`/`where` or where the
|
|
75
|
+
# binary isn't installed — the next renderer in
|
|
76
|
+
# KNOWN_RENDERERS is tried.
|
|
74
77
|
def available?
|
|
75
|
-
|
|
78
|
+
if Gem.win_platform?
|
|
79
|
+
# `where` returns the first match path; exit status 0
|
|
80
|
+
# means the binary is found. Suppress stdout/stderr to
|
|
81
|
+
# avoid polluting test output.
|
|
82
|
+
system("where #{binary_name} >NUL 2>NUL")
|
|
83
|
+
else
|
|
84
|
+
system("which", binary_name.to_s,
|
|
85
|
+
out: "/dev/null", err: "/dev/null")
|
|
86
|
+
end
|
|
87
|
+
rescue Errno::ENOENT, Errno::EINVAL
|
|
88
|
+
false
|
|
76
89
|
end
|
|
77
90
|
|
|
78
91
|
# Smoke-test the binary by actually rendering one page of the
|
|
@@ -45,12 +45,12 @@ module Ucode
|
|
|
45
45
|
path = Paths.blocks_index_path(@output_root)
|
|
46
46
|
summary = @blocks.map do |block|
|
|
47
47
|
{
|
|
48
|
-
"id"
|
|
49
|
-
"name"
|
|
50
|
-
"first_cp"
|
|
51
|
-
"last_cp"
|
|
48
|
+
"id" => block.id,
|
|
49
|
+
"name" => block.name,
|
|
50
|
+
"first_cp" => block.range_first,
|
|
51
|
+
"last_cp" => block.range_last,
|
|
52
52
|
"plane_number" => block.plane_number,
|
|
53
|
-
"age"
|
|
53
|
+
"age" => @block_ages[block.id],
|
|
54
54
|
}
|
|
55
55
|
end
|
|
56
56
|
write_atomic(path, to_pretty_json(summary)) ? 1 : 0
|
|
@@ -58,16 +58,16 @@ module Ucode
|
|
|
58
58
|
|
|
59
59
|
def block_payload(block)
|
|
60
60
|
to_pretty_json(
|
|
61
|
-
"id"
|
|
62
|
-
"name"
|
|
63
|
-
"range_first"
|
|
64
|
-
"range_last"
|
|
65
|
-
"plane_number"
|
|
66
|
-
"age"
|
|
67
|
-
"codepoint_ids"
|
|
61
|
+
"id" => block.id,
|
|
62
|
+
"name" => block.name,
|
|
63
|
+
"range_first" => block.range_first,
|
|
64
|
+
"range_last" => block.range_last,
|
|
65
|
+
"plane_number" => block.plane_number,
|
|
66
|
+
"age" => @block_ages[block.id],
|
|
67
|
+
"codepoint_ids" => @block_codepoint_ids[block.id] || [],
|
|
68
68
|
)
|
|
69
69
|
end
|
|
70
70
|
end
|
|
71
71
|
end
|
|
72
72
|
end
|
|
73
|
-
end
|
|
73
|
+
end
|
|
@@ -27,7 +27,7 @@ module Ucode
|
|
|
27
27
|
def write
|
|
28
28
|
path = Pathname(@output_root).join("enums.json")
|
|
29
29
|
payload = {
|
|
30
|
-
"properties"
|
|
30
|
+
"properties" => @property_aliases.map(&:to_yaml_hash),
|
|
31
31
|
"property_values" => @property_value_aliases.map(&:to_yaml_hash),
|
|
32
32
|
}
|
|
33
33
|
write_atomic(path, to_pretty_json(payload)) ? 1 : 0
|
|
@@ -35,4 +35,4 @@ module Ucode
|
|
|
35
35
|
end
|
|
36
36
|
end
|
|
37
37
|
end
|
|
38
|
-
end
|
|
38
|
+
end
|
|
@@ -34,11 +34,11 @@ module Ucode
|
|
|
34
34
|
def write
|
|
35
35
|
count = 0
|
|
36
36
|
count += 1 if write_atomic(Paths.names_index_path(@output_root),
|
|
37
|
-
|
|
37
|
+
to_pretty_json(@names))
|
|
38
38
|
count += 1 if write_atomic(Paths.labels_index_path(@output_root),
|
|
39
|
-
|
|
39
|
+
to_pretty_json(@labels))
|
|
40
40
|
count += 1 if write_atomic(codepoint_to_block_path,
|
|
41
|
-
|
|
41
|
+
to_pretty_json(@cp_to_block))
|
|
42
42
|
count
|
|
43
43
|
end
|
|
44
44
|
|
|
@@ -50,4 +50,4 @@ module Ucode
|
|
|
50
50
|
end
|
|
51
51
|
end
|
|
52
52
|
end
|
|
53
|
-
end
|
|
53
|
+
end
|
|
@@ -45,10 +45,10 @@ module Ucode
|
|
|
45
45
|
def write
|
|
46
46
|
path = Paths.manifest_path(@output_root)
|
|
47
47
|
content = {
|
|
48
|
-
"ucd_version"
|
|
48
|
+
"ucd_version" => @ucd_version,
|
|
49
49
|
"codepoint_count" => @codepoint_count,
|
|
50
|
-
"glyph_count"
|
|
51
|
-
"schema_version"
|
|
50
|
+
"glyph_count" => @glyph_count,
|
|
51
|
+
"schema_version" => SCHEMA_VERSION,
|
|
52
52
|
}
|
|
53
53
|
ts = preserved_or_new_timestamp(path, content)
|
|
54
54
|
payload = content.merge("generated_at" => ts)
|
|
@@ -75,4 +75,4 @@ module Ucode
|
|
|
75
75
|
end
|
|
76
76
|
end
|
|
77
77
|
end
|
|
78
|
-
end
|
|
78
|
+
end
|
|
@@ -16,16 +16,16 @@ module Ucode
|
|
|
16
16
|
# Static metadata for the 17 Unicode planes. Planes 4–13 are
|
|
17
17
|
# unassigned in Unicode 17; their entries use placeholder names.
|
|
18
18
|
PLANE_TABLE = {
|
|
19
|
-
0
|
|
20
|
-
1
|
|
21
|
-
2
|
|
22
|
-
3
|
|
23
|
-
4
|
|
24
|
-
5
|
|
25
|
-
6
|
|
26
|
-
7
|
|
27
|
-
8
|
|
28
|
-
9
|
|
19
|
+
0 => ["Basic Multilingual Plane", "BMP"],
|
|
20
|
+
1 => ["Supplementary Multilingual Plane", "SMP"],
|
|
21
|
+
2 => ["Supplementary Ideographic Plane", "SIP"],
|
|
22
|
+
3 => ["Tertiary Ideographic Plane", "TIP"],
|
|
23
|
+
4 => ["Unassigned Plane 4", "—"],
|
|
24
|
+
5 => ["Unassigned Plane 5", "—"],
|
|
25
|
+
6 => ["Unassigned Plane 6", "—"],
|
|
26
|
+
7 => ["Unassigned Plane 7", "—"],
|
|
27
|
+
8 => ["Unassigned Plane 8", "—"],
|
|
28
|
+
9 => ["Unassigned Plane 9", "—"],
|
|
29
29
|
10 => ["Unassigned Plane 10", "—"],
|
|
30
30
|
11 => ["Unassigned Plane 11", "—"],
|
|
31
31
|
12 => ["Unassigned Plane 12", "—"],
|
|
@@ -68,15 +68,15 @@ module Ucode
|
|
|
68
68
|
range_first = plane_number * 0x10000
|
|
69
69
|
range_last = range_first + 0xFFFF
|
|
70
70
|
to_pretty_json(
|
|
71
|
-
"number"
|
|
72
|
-
"name"
|
|
73
|
-
"abbrev"
|
|
74
|
-
"range_first"
|
|
75
|
-
"range_last"
|
|
76
|
-
"block_ids"
|
|
71
|
+
"number" => plane_number,
|
|
72
|
+
"name" => name,
|
|
73
|
+
"abbrev" => abbrev,
|
|
74
|
+
"range_first" => range_first,
|
|
75
|
+
"range_last" => range_last,
|
|
76
|
+
"block_ids" => block_ids,
|
|
77
77
|
)
|
|
78
78
|
end
|
|
79
79
|
end
|
|
80
80
|
end
|
|
81
81
|
end
|
|
82
|
-
end
|
|
82
|
+
end
|
|
@@ -41,14 +41,14 @@ module Ucode
|
|
|
41
41
|
|
|
42
42
|
def script_payload(code, ranges)
|
|
43
43
|
to_pretty_json(
|
|
44
|
-
"code"
|
|
45
|
-
"name"
|
|
46
|
-
"range_first"
|
|
47
|
-
"range_last"
|
|
48
|
-
"codepoint_ids"
|
|
44
|
+
"code" => code,
|
|
45
|
+
"name" => ranges.first&.name,
|
|
46
|
+
"range_first" => ranges.map(&:range_first).min,
|
|
47
|
+
"range_last" => ranges.map(&:range_last).max,
|
|
48
|
+
"codepoint_ids" => @script_codepoint_ids[code] || [],
|
|
49
49
|
)
|
|
50
50
|
end
|
|
51
51
|
end
|
|
52
52
|
end
|
|
53
53
|
end
|
|
54
|
-
end
|
|
54
|
+
end
|
data/lib/ucode/repo/writers.rb
CHANGED
data/lib/ucode/version.rb
CHANGED
data/ucode.gemspec
CHANGED
|
@@ -45,7 +45,12 @@ Gem::Specification.new do |spec|
|
|
|
45
45
|
spec.require_paths = ["lib"]
|
|
46
46
|
|
|
47
47
|
spec.add_dependency "base64"
|
|
48
|
-
|
|
48
|
+
# Pin fontisan to 0.2.22 — 0.2.23+ removed
|
|
49
|
+
# `Fontisan::Commands::AuditCommand` (used by RealFonts::CoverageAuditor)
|
|
50
|
+
# and 0.4.x removed the entire Audit subsystem. Until
|
|
51
|
+
# CoverageAuditor is rewritten against the new fontisan API, this
|
|
52
|
+
# pin keeps the existing audit path working.
|
|
53
|
+
spec.add_dependency "fontisan", "= 0.2.22"
|
|
49
54
|
spec.add_dependency "fontist", "~> 3.0"
|
|
50
55
|
spec.add_dependency "logger"
|
|
51
56
|
spec.add_dependency "lutaml-model", "~> 0.8"
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ucode
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-07-01 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: base64
|
|
@@ -28,16 +28,16 @@ dependencies:
|
|
|
28
28
|
name: fontisan
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
|
-
- -
|
|
31
|
+
- - '='
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
|
-
version:
|
|
33
|
+
version: 0.2.22
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
|
-
- -
|
|
38
|
+
- - '='
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version:
|
|
40
|
+
version: 0.2.22
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: fontist
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|