ucode 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ucode/audit/reference_factory.rb +66 -0
- data/lib/ucode/audit.rb +1 -0
- data/lib/ucode/cli.rb +35 -16
- data/lib/ucode/commands/audit.rb +0 -1
- data/lib/ucode/commands/build.rb +4 -0
- data/lib/ucode/commands/canonical_build.rb +2 -3
- data/lib/ucode/commands/fetch.rb +12 -14
- data/lib/ucode/commands/glyphs.rb +25 -67
- data/lib/ucode/commands/lookup.rb +11 -11
- data/lib/ucode/commands/parse.rb +7 -5
- data/lib/ucode/commands/release.rb +0 -1
- data/lib/ucode/commands/universal_set.rb +10 -14
- data/lib/ucode/coordinator/indices.rb +38 -2
- data/lib/ucode/glyphs/pipeline.rb +106 -0
- data/lib/ucode/glyphs.rb +1 -0
- data/lib/ucode/repo/aggregate_writer.rb +60 -298
- data/lib/ucode/repo/writers/blocks_writer.rb +73 -0
- data/lib/ucode/repo/writers/enums_writer.rb +38 -0
- data/lib/ucode/repo/writers/indexes_writer.rb +53 -0
- data/lib/ucode/repo/writers/manifest_writer.rb +78 -0
- data/lib/ucode/repo/writers/named_sequences_writer.rb +47 -0
- data/lib/ucode/repo/writers/planes_writer.rb +82 -0
- data/lib/ucode/repo/writers/relationships_writer.rb +71 -0
- data/lib/ucode/repo/writers/scripts_writer.rb +54 -0
- data/lib/ucode/repo/writers.rb +20 -0
- data/lib/ucode/repo.rb +1 -0
- data/lib/ucode/version.rb +1 -1
- data/ucode.gemspec +56 -0
- metadata +18 -5
- data/Gemfile.lock +0 -406
- data/lib/ucode/commands/audit/reference_builder.rb +0 -64
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d9d5a29a388017c0338a8d2723f97dfd4ac8985324b6ab7fc9b8921a5f4b0b82
|
|
4
|
+
data.tar.gz: f6988cf20f74efc1b94ebfedbd7b9507680de1c0880ffd28ac9a4a00851195a8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 890fec1b40ed6016b269982d32d28b15eb2683dfa7d4e9fc32e5a6633c46fb6f8a94b0d004d1bbd4381fb5bbe629faf00984729036005c12e1558c65d70afe4f
|
|
7
|
+
data.tar.gz: 792b58d5eb3434b05008740457b1c432f5810bd45b2d0d80936bd0c539445098b835c1d242093487cf0a4a4754efba2fe725c500b2d3f1a23f4906b4a17809dc
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Audit
|
|
7
|
+
# Translates CLI flags into a {Ucode::Audit::CoverageReference}.
|
|
8
|
+
#
|
|
9
|
+
# The audit CLI exposes the universal-set reference via a
|
|
10
|
+
# `--reference-universal-set=<path>` flag (and a default lookup
|
|
11
|
+
# at `output/universal_glyph_set/manifest.json`). This factory
|
|
12
|
+
# resolves the flag value into a concrete reference instance
|
|
13
|
+
# backed by a freshly-opened {Ucode::Database}, so the command
|
|
14
|
+
# classes don't repeat the same branching.
|
|
15
|
+
#
|
|
16
|
+
# Behavior:
|
|
17
|
+
#
|
|
18
|
+
# - flag = "none" → nil (force UCD-only even if a default manifest exists)
|
|
19
|
+
# - flag = path to .json → {Ucode::Audit::UniversalSetReference}
|
|
20
|
+
# - flag = nil → look at DEFAULT_MANIFEST_PATH; use it if present,
|
|
21
|
+
# else nil (UCD-only)
|
|
22
|
+
#
|
|
23
|
+
# Lives in the {Ucode::Audit} namespace (not {Ucode::Commands::Audit})
|
|
24
|
+
# so the Audit module owns its own entry point — programmatic callers
|
|
25
|
+
# don't need to round-trip through the CLI to obtain a reference.
|
|
26
|
+
module ReferenceFactory
|
|
27
|
+
DEFAULT_MANIFEST_PATH = Pathname.new("output/universal_glyph_set/manifest.json")
|
|
28
|
+
|
|
29
|
+
module_function
|
|
30
|
+
|
|
31
|
+
# @param flag [String, nil] value of the
|
|
32
|
+
# `--reference-universal-set` CLI option.
|
|
33
|
+
# @param version [String, nil] UCD version for the database
|
|
34
|
+
# that backs the reference. When nil, the default UCD
|
|
35
|
+
# version is resolved.
|
|
36
|
+
# @return [Ucode::Audit::CoverageReference, nil]
|
|
37
|
+
def build_from_cli(flag:, version: nil)
|
|
38
|
+
return nil if flag == "none"
|
|
39
|
+
|
|
40
|
+
path = resolve_manifest_path(flag)
|
|
41
|
+
return nil unless path && File.exist?(path)
|
|
42
|
+
|
|
43
|
+
database = open_database(version)
|
|
44
|
+
return nil unless database
|
|
45
|
+
|
|
46
|
+
Ucode::Audit::UniversalSetReference.new(
|
|
47
|
+
manifest: path, database: database,
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def resolve_manifest_path(flag)
|
|
52
|
+
return Pathname.new(flag) if flag && flag != "none"
|
|
53
|
+
return DEFAULT_MANIFEST_PATH if DEFAULT_MANIFEST_PATH.exist?
|
|
54
|
+
|
|
55
|
+
nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def open_database(version)
|
|
59
|
+
resolved = version || Ucode::VersionResolver.resolve(nil)
|
|
60
|
+
Ucode::Database.open(resolved)
|
|
61
|
+
rescue Ucode::UnknownVersionError, Ucode::DatabaseMissingError
|
|
62
|
+
nil
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
data/lib/ucode/audit.rb
CHANGED
|
@@ -32,6 +32,7 @@ module Ucode
|
|
|
32
32
|
autoload :CoverageReference, "ucode/audit/coverage_reference"
|
|
33
33
|
autoload :UcdOnlyReference, "ucode/audit/ucd_only_reference"
|
|
34
34
|
autoload :UniversalSetReference, "ucode/audit/universal_set_reference"
|
|
35
|
+
autoload :ReferenceFactory, "ucode/audit/reference_factory"
|
|
35
36
|
|
|
36
37
|
# Per-face orchestrator (TODO 11) — shared by LibraryAuditor and
|
|
37
38
|
# the future CLI AuditCommand.
|
data/lib/ucode/cli.rb
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
require "thor"
|
|
4
4
|
|
|
5
5
|
require "ucode/commands"
|
|
6
|
+
require "ucode/version_resolver"
|
|
6
7
|
|
|
7
8
|
module Ucode
|
|
8
9
|
# Top-level CLI entry.
|
|
@@ -10,6 +11,12 @@ module Ucode
|
|
|
10
11
|
# **Thin Thor**: every method delegates to a `Commands::*Command`
|
|
11
12
|
# class and only formats the result. The Command classes are pure
|
|
12
13
|
# and testable in-process — Thor never holds business logic.
|
|
14
|
+
#
|
|
15
|
+
# **Version resolution lives here** — each top-level command resolves
|
|
16
|
+
# the user-supplied intent (nil / :default / :latest / explicit string)
|
|
17
|
+
# exactly once via `VersionResolver.resolve` and threads the resolved
|
|
18
|
+
# string into the dispatched Command. Sub-commands never re-resolve.
|
|
19
|
+
# See Candidate 4 of the 2026-06-29 architecture review.
|
|
13
20
|
class Cli < Thor
|
|
14
21
|
package_name "ucode"
|
|
15
22
|
|
|
@@ -28,13 +35,17 @@ module Ucode
|
|
|
28
35
|
desc "ucd [VERSION]", "Download UCD.zip"
|
|
29
36
|
option :force, type: :boolean, default: false, desc: "Re-download even if cached"
|
|
30
37
|
def ucd(version = nil)
|
|
31
|
-
puts format_result Commands::FetchCommand.new.fetch_ucd(
|
|
38
|
+
puts format_result Commands::FetchCommand.new.fetch_ucd(
|
|
39
|
+
VersionResolver.resolve(version), force: options[:force],
|
|
40
|
+
)
|
|
32
41
|
end
|
|
33
42
|
|
|
34
43
|
desc "unihan [VERSION]", "Download Unihan.zip"
|
|
35
44
|
option :force, type: :boolean, default: false
|
|
36
45
|
def unihan(version = nil)
|
|
37
|
-
puts format_result Commands::FetchCommand.new.fetch_unihan(
|
|
46
|
+
puts format_result Commands::FetchCommand.new.fetch_unihan(
|
|
47
|
+
VersionResolver.resolve(version), force: options[:force],
|
|
48
|
+
)
|
|
38
49
|
end
|
|
39
50
|
|
|
40
51
|
desc "charts [VERSION]", "Download per-block Code Charts PDFs"
|
|
@@ -43,7 +54,8 @@ module Ucode
|
|
|
43
54
|
def charts(version = nil)
|
|
44
55
|
cps = options[:block]&.map { |id| block_id_to_first_cp(id) }&.compact
|
|
45
56
|
puts format_result Commands::FetchCommand.new
|
|
46
|
-
.fetch_charts(version,
|
|
57
|
+
.fetch_charts(VersionResolver.resolve(version),
|
|
58
|
+
block_first_cps: cps, force: options[:force])
|
|
47
59
|
end
|
|
48
60
|
|
|
49
61
|
desc "fonts", "Download specialist Tier 1 fonts (config/specialist_fonts.yml)"
|
|
@@ -90,7 +102,9 @@ module Ucode
|
|
|
90
102
|
desc "parse [VERSION]", "Stream UCD → output/"
|
|
91
103
|
option :to, type: :string, default: "./output", desc: "Output directory"
|
|
92
104
|
def parse(version = nil)
|
|
93
|
-
result = Commands::ParseCommand.new.call(
|
|
105
|
+
result = Commands::ParseCommand.new.call(
|
|
106
|
+
VersionResolver.resolve(version), output_root: options[:to],
|
|
107
|
+
)
|
|
94
108
|
puts JSON.pretty_generate(result)
|
|
95
109
|
end
|
|
96
110
|
|
|
@@ -111,7 +125,7 @@ module Ucode
|
|
|
111
125
|
desc: "Opt into the experimental v0.1 pipeline"
|
|
112
126
|
def glyphs(version = nil)
|
|
113
127
|
result = Commands::GlyphsCommand.new.call(
|
|
114
|
-
version,
|
|
128
|
+
VersionResolver.resolve(version),
|
|
115
129
|
output_root: options[:to],
|
|
116
130
|
block_filter: options[:block],
|
|
117
131
|
force: options[:force],
|
|
@@ -150,7 +164,9 @@ module Ucode
|
|
|
150
164
|
def block(codepoint)
|
|
151
165
|
cp = parse_cp(codepoint)
|
|
152
166
|
with_db_handling do
|
|
153
|
-
result = Commands::LookupCommand.new.lookup_block(
|
|
167
|
+
result = Commands::LookupCommand.new.lookup_block(
|
|
168
|
+
VersionResolver.resolve(options[:version]), codepoint: cp,
|
|
169
|
+
)
|
|
154
170
|
puts "#{format("U+%04X", cp)} → #{result.block || "(unassigned)"}"
|
|
155
171
|
end
|
|
156
172
|
end
|
|
@@ -160,7 +176,9 @@ module Ucode
|
|
|
160
176
|
def script(codepoint)
|
|
161
177
|
cp = parse_cp(codepoint)
|
|
162
178
|
with_db_handling do
|
|
163
|
-
result = Commands::LookupCommand.new.lookup_script(
|
|
179
|
+
result = Commands::LookupCommand.new.lookup_script(
|
|
180
|
+
VersionResolver.resolve(options[:version]), codepoint: cp,
|
|
181
|
+
)
|
|
164
182
|
puts "#{format("U+%04X", cp)} → #{result.script || "(none)"}"
|
|
165
183
|
end
|
|
166
184
|
end
|
|
@@ -172,7 +190,8 @@ module Ucode
|
|
|
172
190
|
cp = parse_cp(codepoint)
|
|
173
191
|
with_db_handling do
|
|
174
192
|
result = Commands::LookupCommand.new
|
|
175
|
-
.lookup_char(options[:version],
|
|
193
|
+
.lookup_char(VersionResolver.resolve(options[:version]),
|
|
194
|
+
codepoint: cp, output_root: options[:from])
|
|
176
195
|
puts "#{format("U+%04X", cp)} block=#{result.block_id} glyph=#{result.glyph_path}"
|
|
177
196
|
end
|
|
178
197
|
end
|
|
@@ -274,7 +293,7 @@ module Ucode
|
|
|
274
293
|
desc: "Emit per-block missing-glyph galleries " \
|
|
275
294
|
"(requires --browse + --universal-set-root)"
|
|
276
295
|
def font(path)
|
|
277
|
-
reference =
|
|
296
|
+
reference = Ucode::Audit::ReferenceFactory.build_from_cli(
|
|
278
297
|
flag: options[:reference_universal_set],
|
|
279
298
|
version: options[:unicode_version],
|
|
280
299
|
)
|
|
@@ -312,7 +331,7 @@ module Ucode
|
|
|
312
331
|
option :with_missing_glyph_pages, type: :boolean, default: false,
|
|
313
332
|
desc: "Emit per-block missing-glyph galleries"
|
|
314
333
|
def collection(path)
|
|
315
|
-
reference =
|
|
334
|
+
reference = Ucode::Audit::ReferenceFactory.build_from_cli(
|
|
316
335
|
flag: options[:reference_universal_set],
|
|
317
336
|
version: options[:unicode_version],
|
|
318
337
|
)
|
|
@@ -349,7 +368,7 @@ module Ucode
|
|
|
349
368
|
option :with_missing_glyph_pages, type: :boolean, default: false,
|
|
350
369
|
desc: "Emit per-block missing-glyph galleries"
|
|
351
370
|
def library(dir)
|
|
352
|
-
reference =
|
|
371
|
+
reference = Ucode::Audit::ReferenceFactory.build_from_cli(
|
|
353
372
|
flag: options[:reference_universal_set],
|
|
354
373
|
version: options[:unicode_version],
|
|
355
374
|
)
|
|
@@ -430,7 +449,7 @@ module Ucode
|
|
|
430
449
|
desc: "Worker pool size (default: Ucode.configuration.parallel_workers)"
|
|
431
450
|
def build(version = nil)
|
|
432
451
|
result = Commands::UniversalSet::BuildCommand.new.call(
|
|
433
|
-
version,
|
|
452
|
+
VersionResolver.resolve(version),
|
|
434
453
|
output_root: options[:to],
|
|
435
454
|
source_config_path: options[:source_config],
|
|
436
455
|
block_filter: options[:block],
|
|
@@ -448,7 +467,7 @@ module Ucode
|
|
|
448
467
|
desc: "Path to a Tier 1 source config YAML"
|
|
449
468
|
def pre_check(version = nil)
|
|
450
469
|
report = Commands::UniversalSet::PreCheckCommand.new.call(
|
|
451
|
-
version,
|
|
470
|
+
VersionResolver.resolve(version),
|
|
452
471
|
source_config_path: options[:source_config],
|
|
453
472
|
)
|
|
454
473
|
puts JSON.pretty_generate(report.to_h)
|
|
@@ -463,7 +482,7 @@ module Ucode
|
|
|
463
482
|
desc: "Output directory holding manifest.json"
|
|
464
483
|
def report(version = nil)
|
|
465
484
|
result = Commands::UniversalSet::ReportCommand.new.call(
|
|
466
|
-
version,
|
|
485
|
+
VersionResolver.resolve(version),
|
|
467
486
|
output_root: options[:from],
|
|
468
487
|
)
|
|
469
488
|
puts JSON.pretty_generate(result)
|
|
@@ -475,7 +494,7 @@ module Ucode
|
|
|
475
494
|
def validate(output_root = "./output/universal_glyph_set")
|
|
476
495
|
result = Commands::UniversalSet::ValidateCommand.new.call(
|
|
477
496
|
output_root,
|
|
478
|
-
|
|
497
|
+
version: options[:version] && VersionResolver.resolve(options[:version]),
|
|
479
498
|
)
|
|
480
499
|
puts JSON.pretty_generate(result)
|
|
481
500
|
exit 1 unless result[:passed]
|
|
@@ -518,7 +537,7 @@ module Ucode
|
|
|
518
537
|
desc: "Path to universal-set manifest (or 'none') " \
|
|
519
538
|
"for the per-face coverage reference"
|
|
520
539
|
def release
|
|
521
|
-
reference =
|
|
540
|
+
reference = Ucode::Audit::ReferenceFactory.build_from_cli(
|
|
522
541
|
flag: options[:reference_universal_set],
|
|
523
542
|
version: options[:unicode_version],
|
|
524
543
|
)
|
data/lib/ucode/commands/audit.rb
CHANGED
|
@@ -14,7 +14,6 @@ module Ucode
|
|
|
14
14
|
autoload :LibraryCommand, "ucode/commands/audit/library_command"
|
|
15
15
|
autoload :CompareCommand, "ucode/commands/audit/compare_command"
|
|
16
16
|
autoload :BrowserCommand, "ucode/commands/audit/browser_command"
|
|
17
|
-
autoload :ReferenceBuilder, "ucode/commands/audit/reference_builder"
|
|
18
17
|
end
|
|
19
18
|
end
|
|
20
19
|
end
|
data/lib/ucode/commands/build.rb
CHANGED
|
@@ -11,6 +11,10 @@ module Ucode
|
|
|
11
11
|
# parse → (optional) glyphs → (optional) site. Resumable: each step
|
|
12
12
|
# is idempotent and safe to re-run.
|
|
13
13
|
#
|
|
14
|
+
# Resolves the version intent once at the top and threads the
|
|
15
|
+
# resolved string through every sub-command. See Candidate 4 of the
|
|
16
|
+
# 2026-06-29 architecture review.
|
|
17
|
+
#
|
|
14
18
|
# **Glyph step is opt-in as of v0.1** because the SVG cell extractor
|
|
15
19
|
# is still experimental. Pass `include_glyphs: true` to enable it;
|
|
16
20
|
# otherwise the glyphs step is recorded as skipped.
|
|
@@ -39,7 +39,7 @@ module Ucode
|
|
|
39
39
|
# Missing pre-conditions cause silent fallthrough to lower tiers;
|
|
40
40
|
# the build report's `by_tier` totals surface what ran.
|
|
41
41
|
class CanonicalBuildCommand
|
|
42
|
-
# @param
|
|
42
|
+
# @param version [String] resolved UCD version
|
|
43
43
|
# @param output_root [String, Pathname]
|
|
44
44
|
# @param source_config_path [String, Pathname, nil] override the
|
|
45
45
|
# Tier 1 font config YAML; nil uses the default
|
|
@@ -54,9 +54,8 @@ module Ucode
|
|
|
54
54
|
# true. nil skips the block_coverage check.
|
|
55
55
|
# @return [Hash] { version:, codepoint_count:, report_path:,
|
|
56
56
|
# validation_report_path:, validation_passed: }
|
|
57
|
-
def call(
|
|
57
|
+
def call(version, output_root:, source_config_path: nil,
|
|
58
58
|
resolver: nil, validate: true, baseline: nil)
|
|
59
|
-
version = VersionResolver.resolve(version_intent)
|
|
60
59
|
root = Pathname.new(output_root)
|
|
61
60
|
|
|
62
61
|
resolved_resolver = resolver || build_resolver(version, source_config_path)
|
data/lib/ucode/commands/fetch.rb
CHANGED
|
@@ -5,7 +5,7 @@ require "pathname"
|
|
|
5
5
|
require "ucode/cache"
|
|
6
6
|
require "ucode/fetch"
|
|
7
7
|
require "ucode/glyphs/source_config"
|
|
8
|
-
require "ucode/
|
|
8
|
+
require "ucode/parsers"
|
|
9
9
|
|
|
10
10
|
module Ucode
|
|
11
11
|
module Commands
|
|
@@ -13,40 +13,38 @@ module Ucode
|
|
|
13
13
|
# per-version cache, plus the specialist Tier 1 fonts referenced by
|
|
14
14
|
# the curated source config.
|
|
15
15
|
#
|
|
16
|
-
# Thin shell over `Ucode::Fetch::*`. The command
|
|
17
|
-
#
|
|
18
|
-
#
|
|
16
|
+
# Thin shell over `Ucode::Fetch::*`. The command takes a resolved
|
|
17
|
+
# version string; CLI callers resolve via {VersionResolver.resolve}
|
|
18
|
+
# once and thread it through. See Candidate 4 of the 2026-06-29
|
|
19
|
+
# architecture review.
|
|
19
20
|
class FetchCommand
|
|
20
21
|
DEFAULT_SPECIALIST_FONTS_MANIFEST =
|
|
21
22
|
Ucode::Glyphs::SourceConfig::DEFAULT_PATH.dirname.join("specialist_fonts.yml")
|
|
22
23
|
private_constant :DEFAULT_SPECIALIST_FONTS_MANIFEST
|
|
23
24
|
|
|
24
|
-
# @param
|
|
25
|
+
# @param version [String] resolved UCD version
|
|
25
26
|
# @param force [Boolean]
|
|
26
27
|
# @return [Hash] { version:, ucd_dir: }
|
|
27
|
-
def fetch_ucd(
|
|
28
|
-
version = VersionResolver.resolve(version_intent)
|
|
28
|
+
def fetch_ucd(version, force: false)
|
|
29
29
|
Cache.ensure_version_dir!(version)
|
|
30
30
|
path = Fetch::UcdZip.call(version, force: force)
|
|
31
31
|
{ version: version, ucd_dir: path }
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
-
# @param
|
|
34
|
+
# @param version [String] resolved UCD version
|
|
35
35
|
# @param force [Boolean]
|
|
36
36
|
# @return [Hash] { version:, unihan_dir: }
|
|
37
|
-
def fetch_unihan(
|
|
38
|
-
version = VersionResolver.resolve(version_intent)
|
|
37
|
+
def fetch_unihan(version, force: false)
|
|
39
38
|
Cache.ensure_version_dir!(version)
|
|
40
39
|
path = Fetch::UnihanZip.call(version, force: force)
|
|
41
40
|
{ version: version, unihan_dir: path }
|
|
42
41
|
end
|
|
43
42
|
|
|
44
|
-
# @param
|
|
43
|
+
# @param version [String] resolved UCD version
|
|
45
44
|
# @param block_first_cps [Array<Integer>, nil] nil = all known blocks
|
|
46
45
|
# @param force [Boolean]
|
|
47
46
|
# @return [Hash] { version:, downloaded: }
|
|
48
|
-
def fetch_charts(
|
|
49
|
-
version = VersionResolver.resolve(version_intent)
|
|
47
|
+
def fetch_charts(version, block_first_cps: nil, force: false)
|
|
50
48
|
Cache.ensure_version_dir!(version)
|
|
51
49
|
|
|
52
50
|
cps = block_first_cps || default_block_first_cps(version)
|
|
@@ -95,4 +93,4 @@ module Ucode
|
|
|
95
93
|
end
|
|
96
94
|
end
|
|
97
95
|
end
|
|
98
|
-
end
|
|
96
|
+
end
|
|
@@ -1,19 +1,16 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "pathname"
|
|
4
|
-
require "set"
|
|
5
4
|
|
|
6
|
-
require "ucode/cache"
|
|
7
5
|
require "ucode/glyphs"
|
|
8
|
-
require "ucode/parsers"
|
|
9
|
-
require "ucode/version_resolver"
|
|
10
6
|
|
|
11
7
|
module Ucode
|
|
12
8
|
module Commands
|
|
13
9
|
# `ucode glyphs` — extract per-codepoint SVGs from Code Charts PDFs.
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
#
|
|
10
|
+
# Thin Thor-facing wrapper around {Ucode::Glyphs::Pipeline}:
|
|
11
|
+
# opt-in gate + experimental warning live here; the pipeline
|
|
12
|
+
# assembly (block loading, fetcher, per-block specs) lives in
|
|
13
|
+
# {Ucode::Glyphs::Pipeline}.
|
|
17
14
|
#
|
|
18
15
|
# **Status (v0.1): EXPERIMENTAL.** The cell-extraction pipeline
|
|
19
16
|
# currently includes cell-border decorations alongside the actual
|
|
@@ -23,16 +20,16 @@ module Ucode
|
|
|
23
20
|
# pipeline can be iterated on without churning the CLI surface, but
|
|
24
21
|
# callers MUST opt in via `include_glyphs: true` (CLI: `--include-glyphs`)
|
|
25
22
|
# and will receive a printed warning. Tracked for v0.2.
|
|
23
|
+
#
|
|
24
|
+
# Takes a resolved version string; CLI callers resolve via
|
|
25
|
+
# {VersionResolver.resolve} once and thread it through. See
|
|
26
|
+
# Candidate 4 of the 2026-06-29 architecture review.
|
|
26
27
|
class GlyphsCommand
|
|
27
28
|
ExperimentalWarning = "ucode glyphs is experimental in v0.1: " \
|
|
28
29
|
"extracted SVGs include cell-border decorations " \
|
|
29
30
|
"alongside the character outline."
|
|
30
31
|
private_constant :ExperimentalWarning
|
|
31
32
|
|
|
32
|
-
MonolithPath = "CodeCharts.pdf"
|
|
33
|
-
PageMapCache = "data/codecharts_page_map.json"
|
|
34
|
-
private_constant :MonolithPath, :PageMapCache
|
|
35
|
-
|
|
36
33
|
class << self
|
|
37
34
|
# @return [String] the experimental-status banner. Exposed so the
|
|
38
35
|
# CLI and BuildCommand surface the same message verbatim.
|
|
@@ -41,7 +38,7 @@ module Ucode
|
|
|
41
38
|
end
|
|
42
39
|
end
|
|
43
40
|
|
|
44
|
-
# @param
|
|
41
|
+
# @param version [String] resolved UCD version
|
|
45
42
|
# @param output_root [String, Pathname]
|
|
46
43
|
# @param block_filter [Array<String>, nil] block ids to limit to;
|
|
47
44
|
# nil = every block
|
|
@@ -55,75 +52,36 @@ module Ucode
|
|
|
55
52
|
# written here exactly once before work begins.
|
|
56
53
|
# @return [Hash] aggregated Writer tally + version, or a `skipped`
|
|
57
54
|
# payload when opt-in is false.
|
|
58
|
-
def call(
|
|
59
|
-
block_filter: nil, force: false,
|
|
55
|
+
def call(version, output_root:,
|
|
56
|
+
block_filter: nil, force: false,
|
|
57
|
+
monolith_path: Glyphs::Pipeline::DEFAULT_MONOLITH_PATH,
|
|
60
58
|
include_glyphs: false, warn: nil)
|
|
61
|
-
return skipped(
|
|
59
|
+
return skipped(version) unless include_glyphs
|
|
62
60
|
|
|
63
61
|
warn&.puts(ExperimentalWarning)
|
|
64
|
-
version = VersionResolver.resolve(version_intent)
|
|
65
|
-
root = Pathname.new(output_root)
|
|
66
62
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
63
|
+
pipeline = Glyphs::Pipeline.new(
|
|
64
|
+
version: version,
|
|
65
|
+
block_filter: block_filter,
|
|
66
|
+
monolith_path: monolith_path,
|
|
67
|
+
)
|
|
68
|
+
specs = pipeline.build_specs(force: force)
|
|
70
69
|
|
|
71
|
-
writer = Glyphs::Writer.new(
|
|
72
|
-
|
|
70
|
+
writer = Glyphs::Writer.new(
|
|
71
|
+
output_root: Pathname.new(output_root),
|
|
72
|
+
parallel_workers: workers,
|
|
73
|
+
)
|
|
73
74
|
tally = writer.write_all(specs)
|
|
74
75
|
tally.merge(version: version, block_count: specs.size)
|
|
75
76
|
end
|
|
76
77
|
|
|
77
78
|
private
|
|
78
79
|
|
|
79
|
-
def load_blocks(version, block_filter)
|
|
80
|
-
ucd_dir = Cache.ucd_dir(version)
|
|
81
|
-
path = ucd_dir.join("Blocks.txt")
|
|
82
|
-
return [] unless path.exist?
|
|
83
|
-
|
|
84
|
-
all = Parsers::Blocks.each_record(path).to_a
|
|
85
|
-
return all unless block_filter && !block_filter.empty?
|
|
86
|
-
|
|
87
|
-
filter_set = block_filter.to_set
|
|
88
|
-
all.select { |block| filter_set.include?(block.id) }
|
|
89
|
-
end
|
|
90
|
-
|
|
91
|
-
def build_fetcher(version, monolith_path, blocks)
|
|
92
|
-
monolith = Pathname.new(monolith_path)
|
|
93
|
-
monolith = monolith.exist? ? monolith : nil
|
|
94
|
-
Glyphs::PdfFetcher.new(
|
|
95
|
-
version,
|
|
96
|
-
monolith_path: monolith,
|
|
97
|
-
blocks: blocks,
|
|
98
|
-
page_map_cache: PageMapCache,
|
|
99
|
-
)
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
def spec_for(block, fetcher, force)
|
|
103
|
-
pdf_path = fetcher.fetch(block_first_cp: block.range_first, force: force)
|
|
104
|
-
return nil unless pdf_path
|
|
105
|
-
|
|
106
|
-
{ block: block, pdf_path: pdf_path, page_map: page_map_for(block) }
|
|
107
|
-
end
|
|
108
|
-
|
|
109
|
-
# Heuristic page map: per-block PDFs are page 1 = title, page 2 =
|
|
110
|
-
# first chart page starting at the block's first codepoint. True for
|
|
111
|
-
# most BMP blocks; multi-page blocks (CJK) need a richer resolver.
|
|
112
|
-
# Mismatches yield placeholder SVGs only — never wrong glyphs.
|
|
113
|
-
def page_map_for(block)
|
|
114
|
-
{ 2 => block.range_first }
|
|
115
|
-
end
|
|
116
|
-
|
|
117
80
|
def workers
|
|
118
81
|
Ucode.configuration.parallel_workers
|
|
119
82
|
end
|
|
120
83
|
|
|
121
|
-
def skipped(
|
|
122
|
-
version = begin
|
|
123
|
-
VersionResolver.resolve(version_intent)
|
|
124
|
-
rescue UnknownVersionError
|
|
125
|
-
version_intent
|
|
126
|
-
end
|
|
84
|
+
def skipped(version)
|
|
127
85
|
{
|
|
128
86
|
version: version,
|
|
129
87
|
skipped: true,
|
|
@@ -133,4 +91,4 @@ module Ucode
|
|
|
133
91
|
end
|
|
134
92
|
end
|
|
135
93
|
end
|
|
136
|
-
end
|
|
94
|
+
end
|
|
@@ -5,42 +5,42 @@ require "pathname"
|
|
|
5
5
|
require "ucode/cache"
|
|
6
6
|
require "ucode/database"
|
|
7
7
|
require "ucode/repo"
|
|
8
|
-
require "ucode/version_resolver"
|
|
9
8
|
|
|
10
9
|
module Ucode
|
|
11
10
|
module Commands
|
|
12
11
|
# `ucode lookup` — read-only lookups against the SQLite cache and
|
|
13
12
|
# the output JSON tree. Three subactions: block, script, char.
|
|
13
|
+
#
|
|
14
|
+
# Takes a resolved version string; CLI callers resolve via
|
|
15
|
+
# {VersionResolver.resolve} once and thread it through. See
|
|
16
|
+
# Candidate 4 of the 2026-06-29 architecture review.
|
|
14
17
|
class LookupCommand
|
|
15
18
|
BlockResult = Struct.new(:codepoint, :block, keyword_init: true)
|
|
16
19
|
ScriptResult = Struct.new(:codepoint, :script, keyword_init: true)
|
|
17
20
|
CharResult = Struct.new(:codepoint, :block_id, :glyph_path, keyword_init: true)
|
|
18
21
|
private_constant :BlockResult, :ScriptResult, :CharResult
|
|
19
22
|
|
|
20
|
-
# @param
|
|
23
|
+
# @param version [String] resolved UCD version
|
|
21
24
|
# @param codepoint [Integer]
|
|
22
25
|
# @return [BlockResult]
|
|
23
|
-
def lookup_block(
|
|
24
|
-
version = VersionResolver.resolve(version_intent)
|
|
26
|
+
def lookup_block(version, codepoint:)
|
|
25
27
|
with_db(version) { |db| db.lookup_block(codepoint) }
|
|
26
28
|
.then { |block| BlockResult.new(codepoint: codepoint, block: block) }
|
|
27
29
|
end
|
|
28
30
|
|
|
29
|
-
# @param
|
|
31
|
+
# @param version [String] resolved UCD version
|
|
30
32
|
# @param codepoint [Integer]
|
|
31
33
|
# @return [ScriptResult]
|
|
32
|
-
def lookup_script(
|
|
33
|
-
version = VersionResolver.resolve(version_intent)
|
|
34
|
+
def lookup_script(version, codepoint:)
|
|
34
35
|
with_db(version) { |db| db.lookup_script(codepoint) }
|
|
35
36
|
.then { |script| ScriptResult.new(codepoint: codepoint, script: script) }
|
|
36
37
|
end
|
|
37
38
|
|
|
38
|
-
# @param
|
|
39
|
+
# @param version [String] resolved UCD version
|
|
39
40
|
# @param codepoint [Integer]
|
|
40
41
|
# @param output_root [String, Pathname]
|
|
41
42
|
# @return [CharResult]
|
|
42
|
-
def lookup_char(
|
|
43
|
-
version = VersionResolver.resolve(version_intent)
|
|
43
|
+
def lookup_char(version, codepoint:, output_root:)
|
|
44
44
|
block_id = with_db(version) { |db| db.lookup_block(codepoint) }
|
|
45
45
|
glyph = block_id ? glyph_path(output_root, block_id, codepoint) : nil
|
|
46
46
|
CharResult.new(codepoint: codepoint, block_id: block_id, glyph_path: glyph)
|
|
@@ -62,4 +62,4 @@ module Ucode
|
|
|
62
62
|
end
|
|
63
63
|
end
|
|
64
64
|
end
|
|
65
|
-
end
|
|
65
|
+
end
|
data/lib/ucode/commands/parse.rb
CHANGED
|
@@ -6,19 +6,21 @@ require "ucode/cache"
|
|
|
6
6
|
require "ucode/coordinator"
|
|
7
7
|
require "ucode/parsers"
|
|
8
8
|
require "ucode/repo"
|
|
9
|
-
require "ucode/version_resolver"
|
|
10
9
|
|
|
11
10
|
module Ucode
|
|
12
11
|
module Commands
|
|
13
12
|
# `ucode parse` — streams the Coordinator output into the on-disk
|
|
14
13
|
# JSON tree at `output/`. Single pass: enrich + write per-cp JSON +
|
|
15
14
|
# accumulate aggregates + final flush.
|
|
15
|
+
#
|
|
16
|
+
# Takes a resolved version string; CLI callers resolve via
|
|
17
|
+
# {VersionResolver.resolve} once and thread it through. See
|
|
18
|
+
# Candidate 4 of the 2026-06-29 architecture review.
|
|
16
19
|
class ParseCommand
|
|
17
|
-
# @param
|
|
20
|
+
# @param version [String] resolved UCD version
|
|
18
21
|
# @param output_root [String, Pathname]
|
|
19
22
|
# @return [Hash] { version:, codepoint_count: }
|
|
20
|
-
def call(
|
|
21
|
-
version = VersionResolver.resolve(version_intent)
|
|
23
|
+
def call(version, output_root:)
|
|
22
24
|
root = Pathname.new(output_root)
|
|
23
25
|
ucd_dir = Cache.ucd_dir(version)
|
|
24
26
|
unihan_dir = Cache.unihan_dir(version)
|
|
@@ -59,4 +61,4 @@ module Ucode
|
|
|
59
61
|
end
|
|
60
62
|
end
|
|
61
63
|
end
|
|
62
|
-
end
|
|
64
|
+
end
|