ucode 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/Gemfile.lock +2 -2
- data/TODO.full/00-README.md +116 -0
- data/TODO.full/01-panglyph-vision.md +112 -0
- data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
- data/TODO.full/03-panglyph-font-builder.md +201 -0
- data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
- data/TODO.full/05-ucode-0-1-1-release.md +139 -0
- data/TODO.full/06-fontisan-remove-audit.md +142 -0
- data/TODO.full/07-fontisan-remove-ucd.md +125 -0
- data/TODO.full/08-archive-private-bin-build.md +143 -0
- data/TODO.full/09-archive-public-structure.md +164 -0
- data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
- data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
- data/TODO.full/12-implementation-order.md +216 -0
- data/TODO.full/13-fontisan-font-writer-api.md +189 -0
- data/TODO.full/14-fontisan-table-writers.md +66 -0
- data/TODO.full/15-panglyph-builder-real.md +82 -0
- data/TODO.full/16-archive-public-sync-workflows.md +167 -0
- data/TODO.full/17-fontist-org-font-picker.md +73 -0
- data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
- data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
- data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
- data/TODO.new/00-README.md +30 -0
- data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
- data/TODO.new/24-universal-glyph-set-build.md +189 -0
- data/TODO.new/25-font-audit-against-universal-set.md +195 -0
- data/TODO.new/26-missing-glyph-reporter.md +189 -0
- data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
- data/TODO.new/28-implementation-order-update.md +187 -0
- data/TODO.new/29-universal-set-curation-uc17.md +312 -0
- data/TODO.new/30-tier1-font-acquisition.md +241 -0
- data/TODO.new/31-universal-set-production-build.md +205 -0
- data/TODO.new/32-uc17-coverage-matrix.md +165 -0
- data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
- data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
- data/TODO.new/35-universal-set-production-run.md +160 -0
- data/TODO.new/36-per-font-coverage-audit.md +145 -0
- data/TODO.new/37-coverage-highlight-reporter.md +125 -0
- data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
- data/TODO.new/39-implementation-order-update-32-38.md +258 -0
- data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
- data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
- data/config/specialist_fonts.yml +102 -0
- data/config/unicode17_tier1_fonts.yml +42 -0
- data/config/unicode17_universal_glyph_set.yml +293 -0
- data/lib/ucode/audit/block_aggregator.rb +57 -29
- data/lib/ucode/audit/browser/face_page.rb +128 -0
- data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
- data/lib/ucode/audit/browser/library_page.rb +74 -0
- data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
- data/lib/ucode/audit/browser/template.rb +47 -0
- data/lib/ucode/audit/browser/templates/face.css +200 -0
- data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
- data/lib/ucode/audit/browser/templates/face.js +298 -0
- data/lib/ucode/audit/browser/templates/library.css +119 -0
- data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
- data/lib/ucode/audit/browser/templates/library.js +99 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
- data/lib/ucode/audit/browser.rb +32 -0
- data/lib/ucode/audit/context.rb +27 -1
- data/lib/ucode/audit/coverage_reference.rb +103 -0
- data/lib/ucode/audit/differ.rb +121 -0
- data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
- data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
- data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
- data/lib/ucode/audit/emitter/face_directory.rb +212 -0
- data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
- data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
- data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
- data/lib/ucode/audit/emitter/paths.rb +312 -0
- data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
- data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
- data/lib/ucode/audit/emitter.rb +29 -0
- data/lib/ucode/audit/extractors/aggregations.rb +31 -2
- data/lib/ucode/audit/face_auditor.rb +86 -0
- data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
- data/lib/ucode/audit/formatters/audit_text.rb +411 -0
- data/lib/ucode/audit/formatters/color.rb +48 -0
- data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
- data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
- data/lib/ucode/audit/formatters.rb +23 -0
- data/lib/ucode/audit/library_aggregator.rb +86 -0
- data/lib/ucode/audit/library_auditor.rb +105 -0
- data/lib/ucode/audit/release/emitter.rb +152 -0
- data/lib/ucode/audit/release/face_card.rb +93 -0
- data/lib/ucode/audit/release/formula_audits.rb +50 -0
- data/lib/ucode/audit/release/library_index_builder.rb +78 -0
- data/lib/ucode/audit/release/manifest_builder.rb +127 -0
- data/lib/ucode/audit/release.rb +42 -0
- data/lib/ucode/audit/ucd_only_reference.rb +81 -0
- data/lib/ucode/audit/universal_set_reference.rb +136 -0
- data/lib/ucode/audit.rb +31 -0
- data/lib/ucode/cli.rb +339 -33
- data/lib/ucode/commands/audit/browser_command.rb +82 -0
- data/lib/ucode/commands/audit/collection_command.rb +103 -0
- data/lib/ucode/commands/audit/compare_command.rb +188 -0
- data/lib/ucode/commands/audit/font_command.rb +140 -0
- data/lib/ucode/commands/audit/library_command.rb +87 -0
- data/lib/ucode/commands/audit/reference_builder.rb +64 -0
- data/lib/ucode/commands/audit.rb +20 -0
- data/lib/ucode/commands/block_feed.rb +73 -0
- data/lib/ucode/commands/canonical_build.rb +138 -0
- data/lib/ucode/commands/fetch.rb +37 -1
- data/lib/ucode/commands/release.rb +115 -0
- data/lib/ucode/commands/universal_set.rb +211 -0
- data/lib/ucode/commands.rb +5 -0
- data/lib/ucode/coordinator/indices.rb +11 -0
- data/lib/ucode/coordinator.rb +138 -5
- data/lib/ucode/error.rb +30 -2
- data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
- data/lib/ucode/fetch/font_fetcher.rb +16 -0
- data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
- data/lib/ucode/fetch.rb +7 -3
- data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
- data/lib/ucode/glyphs/real_fonts.rb +1 -0
- data/lib/ucode/glyphs/resolver.rb +62 -0
- data/lib/ucode/glyphs/source.rb +48 -0
- data/lib/ucode/glyphs/source_builder.rb +61 -0
- data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
- data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
- data/lib/ucode/glyphs/source_config.rb +104 -0
- data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
- data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
- data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
- data/lib/ucode/glyphs/sources.rb +20 -0
- data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
- data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
- data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
- data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
- data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
- data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
- data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
- data/lib/ucode/glyphs/universal_set.rb +45 -0
- data/lib/ucode/glyphs.rb +6 -0
- data/lib/ucode/models/audit/baseline.rb +6 -0
- data/lib/ucode/models/audit/block_summary.rb +7 -0
- data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
- data/lib/ucode/models/audit/release_face.rb +42 -0
- data/lib/ucode/models/audit/release_formula.rb +33 -0
- data/lib/ucode/models/audit/release_manifest.rb +43 -0
- data/lib/ucode/models/audit/release_universal_set.rb +37 -0
- data/lib/ucode/models/audit.rb +9 -0
- data/lib/ucode/models/block.rb +2 -0
- data/lib/ucode/models/build_report.rb +109 -0
- data/lib/ucode/models/codepoint/glyph.rb +42 -0
- data/lib/ucode/models/codepoint.rb +3 -0
- data/lib/ucode/models/glyph_source.rb +86 -0
- data/lib/ucode/models/glyph_source_map.rb +138 -0
- data/lib/ucode/models/specialist_font.rb +70 -0
- data/lib/ucode/models/specialist_font_manifest.rb +48 -0
- data/lib/ucode/models/unihan_entry.rb +81 -9
- data/lib/ucode/models/unihan_field.rb +21 -0
- data/lib/ucode/models/universal_set_entry.rb +47 -0
- data/lib/ucode/models/universal_set_manifest.rb +78 -0
- data/lib/ucode/models/validation_report.rb +99 -0
- data/lib/ucode/models.rb +9 -0
- data/lib/ucode/parsers/named_sequences.rb +5 -5
- data/lib/ucode/parsers/unihan.rb +50 -19
- data/lib/ucode/repo/aggregate_writer.rb +34 -2
- data/lib/ucode/repo/block_feed_emitter.rb +153 -0
- data/lib/ucode/repo/build_report_accumulator.rb +138 -0
- data/lib/ucode/repo/build_report_writer.rb +46 -0
- data/lib/ucode/repo/build_validator.rb +229 -0
- data/lib/ucode/repo/codepoint_writer.rb +50 -1
- data/lib/ucode/repo/paths.rb +8 -0
- data/lib/ucode/repo.rb +4 -0
- data/lib/ucode/version.rb +1 -1
- data/schema/block-feed.output.schema.yml +134 -0
- metadata +143 -2
- data/ucode.gemspec +0 -56
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Glyphs
|
|
5
|
+
# Priority-ordered glyph resolver — the heart of the 4-tier sourcing
|
|
6
|
+
# strategy.
|
|
7
|
+
#
|
|
8
|
+
# Holds a flat array of {Source} instances (any tier, any number per
|
|
9
|
+
# tier) and tries them in `order:` until one returns a {Source::Result}.
|
|
10
|
+
# Tries are tier-major, source-minor: within a tier, sources are
|
|
11
|
+
# tried in the order they were passed to the constructor. This lets
|
|
12
|
+
# callers express "try FSung-1 before FSung-2 before Noto CJK JP" by
|
|
13
|
+
# simply ordering the Tier 1 sources that way.
|
|
14
|
+
#
|
|
15
|
+
# The default order is Tier 1 → Pillar 1 → Pillar 2 → Pillar 3, but
|
|
16
|
+
# callers can override (e.g. tests may want [:pillar3] only).
|
|
17
|
+
#
|
|
18
|
+
# The resolver is a pure orchestrator: it doesn't know about UCD
|
|
19
|
+
# blocks, fontist formulas, or PDF parsing. Those concerns live in
|
|
20
|
+
# the individual Source subclasses and in {SourceBuilder}.
|
|
21
|
+
class Resolver
|
|
22
|
+
DEFAULT_ORDER = %i[tier1 pillar1 pillar2 pillar3].freeze
|
|
23
|
+
private_constant :DEFAULT_ORDER
|
|
24
|
+
|
|
25
|
+
# @param sources [Array<Source>] flat list; grouped by tier
|
|
26
|
+
# internally. Sources with the same tier are tried in the order
|
|
27
|
+
# they appear here.
|
|
28
|
+
# @param order [Array<Symbol>] tier resolution order. Default:
|
|
29
|
+
# %i[tier1 pillar1 pillar2 pillar3].
|
|
30
|
+
def initialize(sources:, order: DEFAULT_ORDER)
|
|
31
|
+
@sources_by_tier = sources.group_by(&:tier)
|
|
32
|
+
@order = order
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @param codepoint [Integer]
|
|
36
|
+
# @return [Source::Result, nil] nil only when every source in
|
|
37
|
+
# every configured tier returned nil. With a Pillar 3 source
|
|
38
|
+
# configured, this should be unreachable for assigned
|
|
39
|
+
# codepoints — Pillar 3 catches the tail.
|
|
40
|
+
def resolve(codepoint)
|
|
41
|
+
@order.each do |tier|
|
|
42
|
+
Array(@sources_by_tier[tier]).each do |source|
|
|
43
|
+
result = source.fetch(codepoint)
|
|
44
|
+
return result if result
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
nil
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# @return [Array<Source>] every source the resolver holds, flat.
|
|
51
|
+
def sources
|
|
52
|
+
@sources_by_tier.values.flatten
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# @param tier [Symbol]
|
|
56
|
+
# @return [Array<Source>] sources registered for the given tier
|
|
57
|
+
def sources_for_tier(tier)
|
|
58
|
+
Array(@sources_by_tier[tier])
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Glyphs
|
|
5
|
+
# Abstract glyph source — one tier of the 4-tier sourcing strategy.
|
|
6
|
+
#
|
|
7
|
+
# The canonical {Resolver} holds an ordered array of Source subclasses
|
|
8
|
+
# and returns the first non-nil Result for a given codepoint. Each
|
|
9
|
+
# tier is one subclass:
|
|
10
|
+
#
|
|
11
|
+
# * Tier 1 — {Sources::Tier1RealFont}: real-font cmap + outline
|
|
12
|
+
# extraction (highest fidelity).
|
|
13
|
+
# * Pillar 1 — {Sources::Pillar1EmbeddedTounicode}: PDF-embedded
|
|
14
|
+
# CIDFont + /ToUnicode CMap.
|
|
15
|
+
# * Pillar 2 — {Sources::Pillar2Correlator}: PDF content-stream
|
|
16
|
+
# positional correlation for fonts without /ToUnicode.
|
|
17
|
+
# * Pillar 3 — {Sources::Pillar3LastResort}: Last Resort UFO
|
|
18
|
+
# placeholder outlines (catches the tail).
|
|
19
|
+
#
|
|
20
|
+
# Subclasses must implement {#tier}, {#provenance}, and {#fetch}.
|
|
21
|
+
# {#fetch} returns nil when the source cannot produce a glyph for
|
|
22
|
+
# the given codepoint — this is NOT an error, it's the signal for
|
|
23
|
+
# the resolver to try the next source.
|
|
24
|
+
class Source
|
|
25
|
+
# One resolved glyph. Carries the SVG payload and enough
|
|
26
|
+
# provenance to debug "where did this glyph come from?" without
|
|
27
|
+
# holding a reference back to the source.
|
|
28
|
+
Result = Struct.new(:tier, :codepoint, :svg, :provenance, keyword_init: true)
|
|
29
|
+
|
|
30
|
+
# @return [Symbol] one of :tier1, :pillar1, :pillar2, :pillar3
|
|
31
|
+
def tier
|
|
32
|
+
raise NotImplementedError
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @return [String] dotted provenance string, e.g.
|
|
36
|
+
# "tier-1:lentariso", "pillar-3:last-resort"
|
|
37
|
+
def provenance
|
|
38
|
+
raise NotImplementedError
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @param codepoint [Integer]
|
|
42
|
+
# @return [Result, nil] nil when this source cannot produce a glyph
|
|
43
|
+
def fetch(codepoint)
|
|
44
|
+
raise NotImplementedError
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ucode/glyphs/source_config"
|
|
4
|
+
require "ucode/glyphs/sources"
|
|
5
|
+
|
|
6
|
+
module Ucode
|
|
7
|
+
module Glyphs
|
|
8
|
+
# Builds {Source} instances by joining a {SourceConfig} (block →
|
|
9
|
+
# font mapping) with a {Ucode::Database} (block name → codepoint
|
|
10
|
+
# range).
|
|
11
|
+
#
|
|
12
|
+
# This is the single place that knows how to turn configuration +
|
|
13
|
+
# UCD metadata into live Source objects. Keeping that knowledge
|
|
14
|
+
# out of {SourceConfig} (which is a pure data loader) and out of
|
|
15
|
+
# {Resolver} (which is a pure orchestrator) keeps each class's
|
|
16
|
+
# responsibility narrow.
|
|
17
|
+
#
|
|
18
|
+
# For each block with at least one Tier 1 source in the config,
|
|
19
|
+
# the builder resolves the block's codepoint range from the UCD
|
|
20
|
+
# database and constructs one {Sources::Tier1RealFont} per
|
|
21
|
+
# configured {Ucode::Models::GlyphSource}. Blocks in the config
|
|
22
|
+
# that aren't in the UCD database are silently skipped — they may
|
|
23
|
+
# be future blocks or typos, and either way there's no range to
|
|
24
|
+
# serve.
|
|
25
|
+
class SourceBuilder
|
|
26
|
+
# @param config [SourceConfig]
|
|
27
|
+
# @param database [Ucode::Database] UCD index used to resolve
|
|
28
|
+
# block names to codepoint ranges
|
|
29
|
+
def initialize(config:, database:)
|
|
30
|
+
@config = config
|
|
31
|
+
@database = database
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# @param install [Boolean] forwarded to {Sources::Tier1RealFont}.
|
|
35
|
+
# Tests pass false to suppress fontist downloads.
|
|
36
|
+
# @return [Array<Source>] one Tier1RealFont per (block, source)
|
|
37
|
+
# pair in the config whose block exists in the UCD database
|
|
38
|
+
def tier1_sources(install: true)
|
|
39
|
+
@config.configured_block_ids.flat_map do |block_id|
|
|
40
|
+
range = block_range_for(block_id)
|
|
41
|
+
next [] unless range
|
|
42
|
+
|
|
43
|
+
@config.fonts_for(block_id).map do |source|
|
|
44
|
+
Sources::Tier1RealFont.new(block_range: range, source: source, install: install)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def block_range_for(block_id)
|
|
52
|
+
entries = @database.block_ranges_by_name(block_id)
|
|
53
|
+
return nil if entries.empty?
|
|
54
|
+
|
|
55
|
+
first_cp = entries.map(&:first_cp).min
|
|
56
|
+
last_cp = entries.map(&:last_cp).max
|
|
57
|
+
(first_cp..last_cp)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
|
|
5
|
+
require "ucode/glyphs/source_config"
|
|
6
|
+
require "ucode/glyphs/source_config/gap_report"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Glyphs
|
|
10
|
+
class SourceConfig
|
|
11
|
+
# Development-time walker that asks: "for every assigned
|
|
12
|
+
# codepoint in this Unicode version, does at least one Tier 1
|
|
13
|
+
# source's cmap cover it?" Codepoints with no Tier 1 coverage
|
|
14
|
+
# are recorded in a {GapReport}.
|
|
15
|
+
#
|
|
16
|
+
# This is a **curation review tool**, not a build gate. The
|
|
17
|
+
# universal-set build (TODO 24) still runs and falls through to
|
|
18
|
+
# pillars 1-3 for any gap; this report just makes the gaps
|
|
19
|
+
# visible to a human curator.
|
|
20
|
+
#
|
|
21
|
+
# Dependencies are injected so the walker stays pure:
|
|
22
|
+
#
|
|
23
|
+
# - `source_map` — typed {Ucode::Models::GlyphSourceMap} from
|
|
24
|
+
# {SourceConfig#map}.
|
|
25
|
+
# - `database` — open {Ucode::Database} for the Unicode version
|
|
26
|
+
# being audited. Supplies the assigned-codepoint ranges.
|
|
27
|
+
# - `cmaps` — any object responding to
|
|
28
|
+
# `covers?(GlyphSource, Integer) => Boolean`. Default:
|
|
29
|
+
# {RealFonts::CmapCache}, which lazily loads each referenced
|
|
30
|
+
# font's cmap via fontisan.
|
|
31
|
+
#
|
|
32
|
+
# The walker never raises for a missing font or a failed cmap
|
|
33
|
+
# load — those codepoints are recorded as gaps. A missing font
|
|
34
|
+
# is itself a curation finding.
|
|
35
|
+
class CoverageAssertion
|
|
36
|
+
# @param source_map [Ucode::Models::GlyphSourceMap]
|
|
37
|
+
# @param database [Ucode::Database]
|
|
38
|
+
# @param cmaps [#covers?] object responding to
|
|
39
|
+
# `covers?(source, codepoint) => Boolean`. Defaults to a
|
|
40
|
+
# fresh {RealFonts::CmapCache}.
|
|
41
|
+
# @param unicode_version [String, nil] recorded on the report.
|
|
42
|
+
# Defaults to the database's `ucd_version`.
|
|
43
|
+
def initialize(source_map:, database:, cmaps:,
|
|
44
|
+
unicode_version: nil)
|
|
45
|
+
@source_map = source_map
|
|
46
|
+
@database = database
|
|
47
|
+
@cmaps = cmaps
|
|
48
|
+
@unicode_version = unicode_version || database.ucd_version
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @return [GapReport]
|
|
52
|
+
def call
|
|
53
|
+
gaps = Hash.new { |h, k| h[k] = [] }
|
|
54
|
+
total = 0
|
|
55
|
+
|
|
56
|
+
@database.block_entries.each do |range|
|
|
57
|
+
block_id = range.name
|
|
58
|
+
sources = @source_map.sources_for(block_id)
|
|
59
|
+
next if sources.empty? # uncurated block; not a gap, just unconfigured
|
|
60
|
+
|
|
61
|
+
(range.first_cp..range.last_cp).each do |cp|
|
|
62
|
+
next if sources.any? { |src| @cmaps.covers?(src, cp) }
|
|
63
|
+
|
|
64
|
+
gaps[block_id] << cp
|
|
65
|
+
total += 1
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
GapReport.new(
|
|
70
|
+
unicode_version: @unicode_version,
|
|
71
|
+
generated_at: Time.now.utc.iso8601,
|
|
72
|
+
gaps_by_block: gaps.freeze,
|
|
73
|
+
total_gaps: total,
|
|
74
|
+
)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Glyphs
|
|
7
|
+
class SourceConfig
|
|
8
|
+
# Typed result of {CoverageAssertion#call}. Lists every assigned
|
|
9
|
+
# codepoint that no Tier 1 source's cmap covers, grouped by block.
|
|
10
|
+
#
|
|
11
|
+
# Pure value object — never raises, never mutates. Callers decide
|
|
12
|
+
# what to do with gaps:
|
|
13
|
+
#
|
|
14
|
+
# - **CI**: warn, fail the build on regressions vs. baseline.
|
|
15
|
+
# - **Local curator**: print, decide what to add.
|
|
16
|
+
# - **Production build**: continue — pillar 1-2-3 catch up.
|
|
17
|
+
#
|
|
18
|
+
# The shape round-trips through lutaml-model so it can be emitted
|
|
19
|
+
# alongside the universal-set build reports (TODO 31).
|
|
20
|
+
class GapReport < Lutaml::Model::Serializable
|
|
21
|
+
attribute :unicode_version, :string
|
|
22
|
+
attribute :generated_at, :string
|
|
23
|
+
attribute :gaps_by_block, :hash, default: -> { {} }
|
|
24
|
+
attribute :total_gaps, :integer, default: -> { 0 }
|
|
25
|
+
|
|
26
|
+
key_value do
|
|
27
|
+
map "unicode_version", to: :unicode_version
|
|
28
|
+
map "generated_at", to: :generated_at
|
|
29
|
+
map "gaps_by_block", to: :gaps_by_block
|
|
30
|
+
map "total_gaps", to: :total_gaps
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# @return [Boolean] true when every assigned codepoint in the
|
|
34
|
+
# walked range has at least one Tier 1 covering font.
|
|
35
|
+
def empty?
|
|
36
|
+
total_gaps.zero?
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# @param block_id [String]
|
|
40
|
+
# @return [Array<Integer>] codepoints with no Tier 1 coverage
|
|
41
|
+
# in this block. Empty for blocks with full coverage or
|
|
42
|
+
# blocks that weren't walked.
|
|
43
|
+
def codepoints_for(block_id)
|
|
44
|
+
Array(gaps_by_block[block_id])
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# @return [Array<String>] block ids that have at least one gap.
|
|
48
|
+
def block_ids_with_gaps
|
|
49
|
+
gaps_by_block.keys
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "yaml"
|
|
5
|
+
|
|
6
|
+
require "ucode/models/glyph_source_map"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Glyphs
|
|
10
|
+
# Loads the curated Tier 1 font map from
|
|
11
|
+
# `config/unicode17_universal_glyph_set.yml` into a typed
|
|
12
|
+
# {Ucode::Models::GlyphSourceMap}.
|
|
13
|
+
#
|
|
14
|
+
# This is the policy half of the 4-tier resolver (TODO 23):
|
|
15
|
+
# "which font wins for which block, this Unicode version". The
|
|
16
|
+
# resolver mechanics live in {Resolver} + {Source}; the
|
|
17
|
+
# per-version curation lives in the YAML.
|
|
18
|
+
#
|
|
19
|
+
# Block ids in the YAML use the canonical underscore form
|
|
20
|
+
# ("Basic_Latin", "CJK_Unified_Ideographs_Extension_J") — same
|
|
21
|
+
# convention as {Ucode::Parsers::Blocks} and the rest of the
|
|
22
|
+
# codebase. Never slugified beyond whitespace collapse.
|
|
23
|
+
#
|
|
24
|
+
# Loader semantics:
|
|
25
|
+
# - Missing file → `exist?` returns false; `map` is an empty
|
|
26
|
+
# `GlyphSourceMap`; all queries return empty.
|
|
27
|
+
# - Empty `map:` section → same as missing file.
|
|
28
|
+
# - Malformed YAML → raises (the curator must fix the file).
|
|
29
|
+
class SourceConfig
|
|
30
|
+
# Sub-constants for curation review (TODO 29). The class itself
|
|
31
|
+
# remains the loader; these are the development-time walkers +
|
|
32
|
+
# typed results that consume the loaded map.
|
|
33
|
+
autoload :CoverageAssertion,
|
|
34
|
+
"ucode/glyphs/source_config/coverage_assertion"
|
|
35
|
+
autoload :GapReport, "ucode/glyphs/source_config/gap_report"
|
|
36
|
+
|
|
37
|
+
# Default location of the curated Tier 1 font map. Public so the
|
|
38
|
+
# canonical build + universal set commands can reference it when
|
|
39
|
+
# no override is supplied. Keeping it on the class (not an
|
|
40
|
+
# instance attr) lets callers use it without constructing a
|
|
41
|
+
# SourceConfig first.
|
|
42
|
+
DEFAULT_PATH = Pathname.new("config/unicode17_universal_glyph_set.yml")
|
|
43
|
+
|
|
44
|
+
# @param path [String, Pathname] path to the YAML config file.
|
|
45
|
+
def initialize(path: DEFAULT_PATH)
|
|
46
|
+
@path = Pathname.new(path)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# @return [Pathname] the resolved config file path
|
|
50
|
+
attr_reader :path
|
|
51
|
+
|
|
52
|
+
# @return [Boolean] true if the config file exists on disk
|
|
53
|
+
def exist?
|
|
54
|
+
@path.exist?
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# The loaded typed map. Memoized on first access. An empty
|
|
58
|
+
# {Ucode::Models::GlyphSourceMap} when the file is missing or
|
|
59
|
+
# has no `map:` section.
|
|
60
|
+
#
|
|
61
|
+
# @return [Ucode::Models::GlyphSourceMap]
|
|
62
|
+
def map
|
|
63
|
+
@map ||= load_map
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# @param block_id [String] verbatim block id (underscore form)
|
|
67
|
+
# @return [Array<Ucode::Models::GlyphSource>] sources for this
|
|
68
|
+
# block in priority order; empty when unconfigured.
|
|
69
|
+
def fonts_for(block_id)
|
|
70
|
+
map.sources_for(block_id)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# @return [Array<String>] block_ids with at least one Tier 1
|
|
74
|
+
# source configured.
|
|
75
|
+
def configured_block_ids
|
|
76
|
+
map.configured_block_ids
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Class-method shortcut: load and return the typed map. Useful
|
|
80
|
+
# for one-shot scripts that don't need to query `exist?` first.
|
|
81
|
+
#
|
|
82
|
+
# @param yaml_path [String, Pathname]
|
|
83
|
+
# @return [Ucode::Models::GlyphSourceMap]
|
|
84
|
+
def self.load(yaml_path = DEFAULT_PATH)
|
|
85
|
+
new(path: yaml_path).map
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
def load_map
|
|
91
|
+
return empty_map unless @path.exist?
|
|
92
|
+
|
|
93
|
+
parsed = YAML.safe_load(@path.read, aliases: true)
|
|
94
|
+
return empty_map unless parsed.is_a?(Hash)
|
|
95
|
+
|
|
96
|
+
Ucode::Models::GlyphSourceMap.from_hash(parsed)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def empty_map
|
|
100
|
+
Ucode::Models::GlyphSourceMap.new
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ucode/glyphs/source"
|
|
4
|
+
require "ucode/glyphs/embedded_fonts/renderer"
|
|
5
|
+
|
|
6
|
+
module Ucode
|
|
7
|
+
module Glyphs
|
|
8
|
+
module Sources
|
|
9
|
+
# Pillar 1 glyph source: Code Charts PDF-embedded CIDFont
|
|
10
|
+
# outlines resolved via `/ToUnicode` CMap.
|
|
11
|
+
#
|
|
12
|
+
# Delegates to {EmbeddedFonts::Renderer}, which walks the
|
|
13
|
+
# PDF object graph (Type0 → CIDFont → FontDescriptor →
|
|
14
|
+
# FontFile2/3), looks up the GID via `/ToUnicode`, and renders
|
|
15
|
+
# the outline as a standalone SVG via {EmbeddedFonts::Svg}.
|
|
16
|
+
#
|
|
17
|
+
# == Pillar 2 unification
|
|
18
|
+
#
|
|
19
|
+
# TODO 20 lists a separate +Sources::Pillar2Correlator+ class.
|
|
20
|
+
# It is intentionally omitted. {ContentStreamCorrelator} alone
|
|
21
|
+
# returns +Hash{Integer=>Integer}+ (codepoint → GID mappings),
|
|
22
|
+
# not SVGs; it only produces SVGs when invoked through
|
|
23
|
+
# {EmbeddedFonts::Catalog} via its +correlator_configs:+
|
|
24
|
+
# registry. The Catalog already unifies pillars 1 and 2 at
|
|
25
|
+
# index-build time, so a Source-layer split would either
|
|
26
|
+
# duplicate the Catalog's index or require tagging each
|
|
27
|
+
# FontEntry with the sub-mechanism that served it — both
|
|
28
|
+
# violations of MECE. Pillar 2 fallback is configured by
|
|
29
|
+
# constructing the wrapped Catalog with +correlator_configs:+.
|
|
30
|
+
class Pillar1EmbeddedTounicode < Source
|
|
31
|
+
# @param renderer [EmbeddedFonts::Renderer] the renderer to
|
|
32
|
+
# delegate to. Callers typically construct it with the
|
|
33
|
+
# {EmbeddedFonts::Catalog} built from the resolved Code
|
|
34
|
+
# Charts {EmbeddedFonts::Source}. To enable pillar-2
|
|
35
|
+
# fallback, that Catalog must be constructed with
|
|
36
|
+
# +correlator_configs:+.
|
|
37
|
+
def initialize(renderer:)
|
|
38
|
+
super()
|
|
39
|
+
@renderer = renderer
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# @return [Symbol] :pillar1
|
|
43
|
+
def tier
|
|
44
|
+
:pillar1
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# @return [String] "pillar-1:embedded-tounicode"
|
|
48
|
+
def provenance
|
|
49
|
+
"pillar-1:embedded-tounicode"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# (see Source#fetch)
|
|
53
|
+
def fetch(codepoint)
|
|
54
|
+
result = @renderer.render(codepoint)
|
|
55
|
+
return nil unless result
|
|
56
|
+
|
|
57
|
+
Result.new(tier: tier, codepoint: codepoint,
|
|
58
|
+
svg: result.svg, provenance: provenance)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ucode/glyphs/source"
|
|
4
|
+
require "ucode/glyphs/last_resort/renderer"
|
|
5
|
+
|
|
6
|
+
module Ucode
|
|
7
|
+
module Glyphs
|
|
8
|
+
module Sources
|
|
9
|
+
# Pillar 3 glyph source: Last Resort UFO placeholder outlines.
|
|
10
|
+
#
|
|
11
|
+
# Wraps {LastResort::Renderer}, which chains cmap → contents →
|
|
12
|
+
# glif → svg for every codepoint the Last Resort Font's Format 13
|
|
13
|
+
# cmap maps (essentially all of Unicode 0x0..0x10FFFF). This is
|
|
14
|
+
# the catch-all tier: any codepoint no higher tier produced a
|
|
15
|
+
# glyph for lands here and gets a placeholder outline.
|
|
16
|
+
#
|
|
17
|
+
# The Renderer returns nil only for codepoints outside the cmap
|
|
18
|
+
# (extremely rare — the Format 13 cmap is exhaustive). For
|
|
19
|
+
# everything else it returns a {LastResort::Renderer::Result}
|
|
20
|
+
# with the SVG. We adapt that to {Source::Result}.
|
|
21
|
+
class Pillar3LastResort < Source
|
|
22
|
+
# @param renderer [LastResort::Renderer] the renderer to
|
|
23
|
+
# delegate to. Callers typically construct it with the
|
|
24
|
+
# resolved {LastResort::Source}.
|
|
25
|
+
def initialize(renderer:)
|
|
26
|
+
super()
|
|
27
|
+
@renderer = renderer
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @return [Symbol] :pillar3
|
|
31
|
+
def tier
|
|
32
|
+
:pillar3
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @return [String] "pillar-3:last-resort"
|
|
36
|
+
def provenance
|
|
37
|
+
"pillar-3:last-resort"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# (see Source#fetch)
|
|
41
|
+
def fetch(codepoint)
|
|
42
|
+
result = @renderer.render(codepoint)
|
|
43
|
+
return nil unless result&.ok?
|
|
44
|
+
|
|
45
|
+
Result.new(tier: tier, codepoint: codepoint,
|
|
46
|
+
svg: result.svg, provenance: provenance)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fontisan"
|
|
4
|
+
|
|
5
|
+
require "ucode/glyphs/source"
|
|
6
|
+
require "ucode/glyphs/real_fonts/font_locator"
|
|
7
|
+
require "ucode/glyphs/embedded_fonts/svg"
|
|
8
|
+
|
|
9
|
+
module Ucode
|
|
10
|
+
module Glyphs
|
|
11
|
+
module Sources
|
|
12
|
+
# Tier 1 glyph source: real-font cmap + outline extraction.
|
|
13
|
+
#
|
|
14
|
+
# For codepoints inside its assigned block range, looks up the
|
|
15
|
+
# GID in the font's cmap, extracts the outline via
|
|
16
|
+
# `Fontisan::OutlineExtractor`, and renders a standalone SVG via
|
|
17
|
+
# {EmbeddedFonts::Svg} (which y-flips to SVG coordinates and
|
|
18
|
+
# builds a padded viewBox around the outline bbox).
|
|
19
|
+
#
|
|
20
|
+
# Codepoints outside the block range, missing from the cmap, or
|
|
21
|
+
# producing an empty outline return nil — the {Resolver} then
|
|
22
|
+
# falls through to lower tiers. This is the preferred source:
|
|
23
|
+
# highest fidelity, no chart-grid chrome composited in.
|
|
24
|
+
#
|
|
25
|
+
# One Tier1RealFont per (block, font) pair. The {SourceBuilder}
|
|
26
|
+
# expands a {SourceConfig} into a flat array of these, one per
|
|
27
|
+
# configured block × {Ucode::Models::GlyphSource} entry. When
|
|
28
|
+
# multiple Tier 1 fonts are configured for the same block, each
|
|
29
|
+
# becomes a separate source and the resolver tries them in
|
|
30
|
+
# declared order.
|
|
31
|
+
class Tier1RealFont < Source
|
|
32
|
+
# @param block_range [Range<Integer>] codepoints this source
|
|
33
|
+
# serves. Codepoints outside the range return nil without
|
|
34
|
+
# consulting the font.
|
|
35
|
+
# @param source [Ucode::Models::GlyphSource] typed curation
|
|
36
|
+
# entry. Drives font resolution via
|
|
37
|
+
# {RealFonts::FontLocator} through {GlyphSource#to_font_spec}.
|
|
38
|
+
# @param install [Boolean] passed through to FontLocator. When
|
|
39
|
+
# true (default), fontist downloads missing fonts. Tests
|
|
40
|
+
# disable this to avoid network calls.
|
|
41
|
+
def initialize(block_range:, source:, install: true)
|
|
42
|
+
super()
|
|
43
|
+
@block_range = block_range
|
|
44
|
+
@source = source
|
|
45
|
+
@install = install
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [Symbol] :tier1
|
|
49
|
+
def tier
|
|
50
|
+
:tier1
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# @return [String] "tier-1:<label>" — the label is the
|
|
54
|
+
# {GlyphSource#label} from the curation entry.
|
|
55
|
+
def provenance
|
|
56
|
+
"tier-1:#{@source.label}"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# (see Source#fetch)
|
|
60
|
+
def fetch(codepoint)
|
|
61
|
+
return nil unless @block_range.cover?(codepoint)
|
|
62
|
+
|
|
63
|
+
gid = cmap[codepoint]
|
|
64
|
+
return nil unless gid
|
|
65
|
+
|
|
66
|
+
outline = extractor.extract(gid)
|
|
67
|
+
return nil if outline.nil? || outline.empty?
|
|
68
|
+
|
|
69
|
+
svg = EmbeddedFonts::Svg.new(outline, codepoint: codepoint,
|
|
70
|
+
base_font: base_font).to_s
|
|
71
|
+
Result.new(tier: tier, codepoint: codepoint, svg: svg,
|
|
72
|
+
provenance: provenance)
|
|
73
|
+
rescue StandardError
|
|
74
|
+
# Font load failures, outline extraction errors, etc. — all
|
|
75
|
+
# translate to "this source can't help". The resolver will
|
|
76
|
+
# try the next tier.
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def cmap
|
|
83
|
+
@cmap ||= font.table("cmap").unicode_mappings
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def font
|
|
87
|
+
@font ||= Fontisan::FontLoader.load(path)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def path
|
|
91
|
+
@path ||= RealFonts::FontLocator.new.locate(@source.to_font_spec, install: @install).path
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def extractor
|
|
95
|
+
@extractor ||= Fontisan::OutlineExtractor.new(font)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def base_font
|
|
99
|
+
File.basename(path.to_s, ".*")
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Glyphs
|
|
5
|
+
# Concrete {Source} subclasses — one per tier of the 4-tier glyph
|
|
6
|
+
# sourcing strategy. Each adapts an existing implementation
|
|
7
|
+
# (RealFonts, EmbeddedFonts::Catalog, LastResort::Renderer) to the
|
|
8
|
+
# {Source} interface so the {Resolver} can orchestrate them
|
|
9
|
+
# uniformly.
|
|
10
|
+
#
|
|
11
|
+
# Adding a new source is a pure extension (new file + autoload) —
|
|
12
|
+
# the Resolver and Source interface are closed for modification.
|
|
13
|
+
module Sources
|
|
14
|
+
autoload :Tier1RealFont, "ucode/glyphs/sources/tier1_real_font"
|
|
15
|
+
autoload :Pillar1EmbeddedTounicode,
|
|
16
|
+
"ucode/glyphs/sources/pillar1_embedded_tounicode"
|
|
17
|
+
autoload :Pillar3LastResort, "ucode/glyphs/sources/pillar3_last_resort"
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|