ucode 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -0
  3. data/Gemfile.lock +2 -2
  4. data/TODO.full/00-README.md +116 -0
  5. data/TODO.full/01-panglyph-vision.md +112 -0
  6. data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
  7. data/TODO.full/03-panglyph-font-builder.md +201 -0
  8. data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
  9. data/TODO.full/05-ucode-0-1-1-release.md +139 -0
  10. data/TODO.full/06-fontisan-remove-audit.md +142 -0
  11. data/TODO.full/07-fontisan-remove-ucd.md +125 -0
  12. data/TODO.full/08-archive-private-bin-build.md +143 -0
  13. data/TODO.full/09-archive-public-structure.md +164 -0
  14. data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
  15. data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
  16. data/TODO.full/12-implementation-order.md +216 -0
  17. data/TODO.full/13-fontisan-font-writer-api.md +189 -0
  18. data/TODO.full/14-fontisan-table-writers.md +66 -0
  19. data/TODO.full/15-panglyph-builder-real.md +82 -0
  20. data/TODO.full/16-archive-public-sync-workflows.md +167 -0
  21. data/TODO.full/17-fontist-org-font-picker.md +73 -0
  22. data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
  23. data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
  24. data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
  25. data/TODO.new/00-README.md +30 -0
  26. data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
  27. data/TODO.new/24-universal-glyph-set-build.md +189 -0
  28. data/TODO.new/25-font-audit-against-universal-set.md +195 -0
  29. data/TODO.new/26-missing-glyph-reporter.md +189 -0
  30. data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
  31. data/TODO.new/28-implementation-order-update.md +187 -0
  32. data/TODO.new/29-universal-set-curation-uc17.md +312 -0
  33. data/TODO.new/30-tier1-font-acquisition.md +241 -0
  34. data/TODO.new/31-universal-set-production-build.md +205 -0
  35. data/TODO.new/32-uc17-coverage-matrix.md +165 -0
  36. data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
  37. data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
  38. data/TODO.new/35-universal-set-production-run.md +160 -0
  39. data/TODO.new/36-per-font-coverage-audit.md +145 -0
  40. data/TODO.new/37-coverage-highlight-reporter.md +125 -0
  41. data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
  42. data/TODO.new/39-implementation-order-update-32-38.md +258 -0
  43. data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
  44. data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
  45. data/config/specialist_fonts.yml +102 -0
  46. data/config/unicode17_tier1_fonts.yml +42 -0
  47. data/config/unicode17_universal_glyph_set.yml +293 -0
  48. data/lib/ucode/audit/block_aggregator.rb +57 -29
  49. data/lib/ucode/audit/browser/face_page.rb +128 -0
  50. data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
  51. data/lib/ucode/audit/browser/library_page.rb +74 -0
  52. data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
  53. data/lib/ucode/audit/browser/template.rb +47 -0
  54. data/lib/ucode/audit/browser/templates/face.css +200 -0
  55. data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
  56. data/lib/ucode/audit/browser/templates/face.js +298 -0
  57. data/lib/ucode/audit/browser/templates/library.css +119 -0
  58. data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
  59. data/lib/ucode/audit/browser/templates/library.js +99 -0
  60. data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
  61. data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
  62. data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
  63. data/lib/ucode/audit/browser.rb +32 -0
  64. data/lib/ucode/audit/context.rb +27 -1
  65. data/lib/ucode/audit/coverage_reference.rb +103 -0
  66. data/lib/ucode/audit/differ.rb +121 -0
  67. data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
  68. data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
  69. data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
  70. data/lib/ucode/audit/emitter/face_directory.rb +212 -0
  71. data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
  72. data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
  73. data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
  74. data/lib/ucode/audit/emitter/paths.rb +312 -0
  75. data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
  76. data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
  77. data/lib/ucode/audit/emitter.rb +29 -0
  78. data/lib/ucode/audit/extractors/aggregations.rb +31 -2
  79. data/lib/ucode/audit/face_auditor.rb +86 -0
  80. data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
  81. data/lib/ucode/audit/formatters/audit_text.rb +411 -0
  82. data/lib/ucode/audit/formatters/color.rb +48 -0
  83. data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
  84. data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
  85. data/lib/ucode/audit/formatters.rb +23 -0
  86. data/lib/ucode/audit/library_aggregator.rb +86 -0
  87. data/lib/ucode/audit/library_auditor.rb +105 -0
  88. data/lib/ucode/audit/release/emitter.rb +152 -0
  89. data/lib/ucode/audit/release/face_card.rb +93 -0
  90. data/lib/ucode/audit/release/formula_audits.rb +50 -0
  91. data/lib/ucode/audit/release/library_index_builder.rb +78 -0
  92. data/lib/ucode/audit/release/manifest_builder.rb +127 -0
  93. data/lib/ucode/audit/release.rb +42 -0
  94. data/lib/ucode/audit/ucd_only_reference.rb +81 -0
  95. data/lib/ucode/audit/universal_set_reference.rb +136 -0
  96. data/lib/ucode/audit.rb +31 -0
  97. data/lib/ucode/cli.rb +339 -33
  98. data/lib/ucode/commands/audit/browser_command.rb +82 -0
  99. data/lib/ucode/commands/audit/collection_command.rb +103 -0
  100. data/lib/ucode/commands/audit/compare_command.rb +188 -0
  101. data/lib/ucode/commands/audit/font_command.rb +140 -0
  102. data/lib/ucode/commands/audit/library_command.rb +87 -0
  103. data/lib/ucode/commands/audit/reference_builder.rb +64 -0
  104. data/lib/ucode/commands/audit.rb +20 -0
  105. data/lib/ucode/commands/block_feed.rb +73 -0
  106. data/lib/ucode/commands/canonical_build.rb +138 -0
  107. data/lib/ucode/commands/fetch.rb +37 -1
  108. data/lib/ucode/commands/release.rb +115 -0
  109. data/lib/ucode/commands/universal_set.rb +211 -0
  110. data/lib/ucode/commands.rb +5 -0
  111. data/lib/ucode/coordinator/indices.rb +11 -0
  112. data/lib/ucode/coordinator.rb +138 -5
  113. data/lib/ucode/error.rb +30 -2
  114. data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
  115. data/lib/ucode/fetch/font_fetcher.rb +16 -0
  116. data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
  117. data/lib/ucode/fetch.rb +7 -3
  118. data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
  119. data/lib/ucode/glyphs/real_fonts.rb +1 -0
  120. data/lib/ucode/glyphs/resolver.rb +62 -0
  121. data/lib/ucode/glyphs/source.rb +48 -0
  122. data/lib/ucode/glyphs/source_builder.rb +61 -0
  123. data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
  124. data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
  125. data/lib/ucode/glyphs/source_config.rb +104 -0
  126. data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
  127. data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
  128. data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
  129. data/lib/ucode/glyphs/sources.rb +20 -0
  130. data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
  131. data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
  132. data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
  133. data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
  134. data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
  135. data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
  136. data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
  137. data/lib/ucode/glyphs/universal_set.rb +45 -0
  138. data/lib/ucode/glyphs.rb +6 -0
  139. data/lib/ucode/models/audit/baseline.rb +6 -0
  140. data/lib/ucode/models/audit/block_summary.rb +7 -0
  141. data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
  142. data/lib/ucode/models/audit/release_face.rb +42 -0
  143. data/lib/ucode/models/audit/release_formula.rb +33 -0
  144. data/lib/ucode/models/audit/release_manifest.rb +43 -0
  145. data/lib/ucode/models/audit/release_universal_set.rb +37 -0
  146. data/lib/ucode/models/audit.rb +9 -0
  147. data/lib/ucode/models/block.rb +2 -0
  148. data/lib/ucode/models/build_report.rb +109 -0
  149. data/lib/ucode/models/codepoint/glyph.rb +42 -0
  150. data/lib/ucode/models/codepoint.rb +3 -0
  151. data/lib/ucode/models/glyph_source.rb +86 -0
  152. data/lib/ucode/models/glyph_source_map.rb +138 -0
  153. data/lib/ucode/models/specialist_font.rb +70 -0
  154. data/lib/ucode/models/specialist_font_manifest.rb +48 -0
  155. data/lib/ucode/models/unihan_entry.rb +81 -9
  156. data/lib/ucode/models/unihan_field.rb +21 -0
  157. data/lib/ucode/models/universal_set_entry.rb +47 -0
  158. data/lib/ucode/models/universal_set_manifest.rb +78 -0
  159. data/lib/ucode/models/validation_report.rb +99 -0
  160. data/lib/ucode/models.rb +9 -0
  161. data/lib/ucode/parsers/named_sequences.rb +5 -5
  162. data/lib/ucode/parsers/unihan.rb +50 -19
  163. data/lib/ucode/repo/aggregate_writer.rb +34 -2
  164. data/lib/ucode/repo/block_feed_emitter.rb +153 -0
  165. data/lib/ucode/repo/build_report_accumulator.rb +138 -0
  166. data/lib/ucode/repo/build_report_writer.rb +46 -0
  167. data/lib/ucode/repo/build_validator.rb +229 -0
  168. data/lib/ucode/repo/codepoint_writer.rb +50 -1
  169. data/lib/ucode/repo/paths.rb +8 -0
  170. data/lib/ucode/repo.rb +4 -0
  171. data/lib/ucode/version.rb +1 -1
  172. data/schema/block-feed.output.schema.yml +134 -0
  173. metadata +143 -2
  174. data/ucode.gemspec +0 -56
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Glyphs
5
+ # Priority-ordered glyph resolver — the heart of the 4-tier sourcing
6
+ # strategy.
7
+ #
8
+ # Holds a flat array of {Source} instances (any tier, any number per
9
+ # tier) and tries them in `order:` until one returns a {Source::Result}.
10
+ # Tries are tier-major, source-minor: within a tier, sources are
11
+ # tried in the order they were passed to the constructor. This lets
12
+ # callers express "try FSung-1 before FSung-2 before Noto CJK JP" by
13
+ # simply ordering the Tier 1 sources that way.
14
+ #
15
+ # The default order is Tier 1 → Pillar 1 → Pillar 2 → Pillar 3, but
16
+ # callers can override (e.g. tests may want [:pillar3] only).
17
+ #
18
+ # The resolver is a pure orchestrator: it doesn't know about UCD
19
+ # blocks, fontist formulas, or PDF parsing. Those concerns live in
20
+ # the individual Source subclasses and in {SourceBuilder}.
21
+ class Resolver
22
+ DEFAULT_ORDER = %i[tier1 pillar1 pillar2 pillar3].freeze
23
+ private_constant :DEFAULT_ORDER
24
+
25
+ # @param sources [Array<Source>] flat list; grouped by tier
26
+ # internally. Sources with the same tier are tried in the order
27
+ # they appear here.
28
+ # @param order [Array<Symbol>] tier resolution order. Default:
29
+ # %i[tier1 pillar1 pillar2 pillar3].
30
+ def initialize(sources:, order: DEFAULT_ORDER)
31
+ @sources_by_tier = sources.group_by(&:tier)
32
+ @order = order
33
+ end
34
+
35
+ # @param codepoint [Integer]
36
+ # @return [Source::Result, nil] nil only when every source in
37
+ # every configured tier returned nil. With a Pillar 3 source
38
+ # configured, this should be unreachable for assigned
39
+ # codepoints — Pillar 3 catches the tail.
40
+ def resolve(codepoint)
41
+ @order.each do |tier|
42
+ Array(@sources_by_tier[tier]).each do |source|
43
+ result = source.fetch(codepoint)
44
+ return result if result
45
+ end
46
+ end
47
+ nil
48
+ end
49
+
50
+ # @return [Array<Source>] every source the resolver holds, flat.
51
+ def sources
52
+ @sources_by_tier.values.flatten
53
+ end
54
+
55
+ # @param tier [Symbol]
56
+ # @return [Array<Source>] sources registered for the given tier
57
+ def sources_for_tier(tier)
58
+ Array(@sources_by_tier[tier])
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Glyphs
5
+ # Abstract glyph source — one tier of the 4-tier sourcing strategy.
6
+ #
7
+ # The canonical {Resolver} holds an ordered array of Source subclasses
8
+ # and returns the first non-nil Result for a given codepoint. Each
9
+ # tier is one subclass:
10
+ #
11
+ # * Tier 1 — {Sources::Tier1RealFont}: real-font cmap + outline
12
+ # extraction (highest fidelity).
13
+ # * Pillar 1 — {Sources::Pillar1EmbeddedTounicode}: PDF-embedded
14
+ # CIDFont + /ToUnicode CMap.
15
+ # * Pillar 2 — {Sources::Pillar2Correlator}: PDF content-stream
16
+ # positional correlation for fonts without /ToUnicode.
17
+ # * Pillar 3 — {Sources::Pillar3LastResort}: Last Resort UFO
18
+ # placeholder outlines (catches the tail).
19
+ #
20
+ # Subclasses must implement {#tier}, {#provenance}, and {#fetch}.
21
+ # {#fetch} returns nil when the source cannot produce a glyph for
22
+ # the given codepoint — this is NOT an error, it's the signal for
23
+ # the resolver to try the next source.
24
+ class Source
25
+ # One resolved glyph. Carries the SVG payload and enough
26
+ # provenance to debug "where did this glyph come from?" without
27
+ # holding a reference back to the source.
28
+ Result = Struct.new(:tier, :codepoint, :svg, :provenance, keyword_init: true)
29
+
30
+ # @return [Symbol] one of :tier1, :pillar1, :pillar2, :pillar3
31
+ def tier
32
+ raise NotImplementedError
33
+ end
34
+
35
+ # @return [String] dotted provenance string, e.g.
36
+ # "tier-1:lentariso", "pillar-3:last-resort"
37
+ def provenance
38
+ raise NotImplementedError
39
+ end
40
+
41
+ # @param codepoint [Integer]
42
+ # @return [Result, nil] nil when this source cannot produce a glyph
43
+ def fetch(codepoint)
44
+ raise NotImplementedError
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/glyphs/source_config"
4
+ require "ucode/glyphs/sources"
5
+
6
+ module Ucode
7
+ module Glyphs
8
+ # Builds {Source} instances by joining a {SourceConfig} (block →
9
+ # font mapping) with a {Ucode::Database} (block name → codepoint
10
+ # range).
11
+ #
12
+ # This is the single place that knows how to turn configuration +
13
+ # UCD metadata into live Source objects. Keeping that knowledge
14
+ # out of {SourceConfig} (which is a pure data loader) and out of
15
+ # {Resolver} (which is a pure orchestrator) keeps each class's
16
+ # responsibility narrow.
17
+ #
18
+ # For each block with at least one Tier 1 source in the config,
19
+ # the builder resolves the block's codepoint range from the UCD
20
+ # database and constructs one {Sources::Tier1RealFont} per
21
+ # configured {Ucode::Models::GlyphSource}. Blocks in the config
22
+ # that aren't in the UCD database are silently skipped — they may
23
+ # be future blocks or typos, and either way there's no range to
24
+ # serve.
25
+ class SourceBuilder
26
+ # @param config [SourceConfig]
27
+ # @param database [Ucode::Database] UCD index used to resolve
28
+ # block names to codepoint ranges
29
+ def initialize(config:, database:)
30
+ @config = config
31
+ @database = database
32
+ end
33
+
34
+ # @param install [Boolean] forwarded to {Sources::Tier1RealFont}.
35
+ # Tests pass false to suppress fontist downloads.
36
+ # @return [Array<Source>] one Tier1RealFont per (block, source)
37
+ # pair in the config whose block exists in the UCD database
38
+ def tier1_sources(install: true)
39
+ @config.configured_block_ids.flat_map do |block_id|
40
+ range = block_range_for(block_id)
41
+ next [] unless range
42
+
43
+ @config.fonts_for(block_id).map do |source|
44
+ Sources::Tier1RealFont.new(block_range: range, source: source, install: install)
45
+ end
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ def block_range_for(block_id)
52
+ entries = @database.block_ranges_by_name(block_id)
53
+ return nil if entries.empty?
54
+
55
+ first_cp = entries.map(&:first_cp).min
56
+ last_cp = entries.map(&:last_cp).max
57
+ (first_cp..last_cp)
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "time"
4
+
5
+ require "ucode/glyphs/source_config"
6
+ require "ucode/glyphs/source_config/gap_report"
7
+
8
+ module Ucode
9
+ module Glyphs
10
+ class SourceConfig
11
+ # Development-time walker that asks: "for every assigned
12
+ # codepoint in this Unicode version, does at least one Tier 1
13
+ # source's cmap cover it?" Codepoints with no Tier 1 coverage
14
+ # are recorded in a {GapReport}.
15
+ #
16
+ # This is a **curation review tool**, not a build gate. The
17
+ # universal-set build (TODO 24) still runs and falls through to
18
+ # pillars 1-3 for any gap; this report just makes the gaps
19
+ # visible to a human curator.
20
+ #
21
+ # Dependencies are injected so the walker stays pure:
22
+ #
23
+ # - `source_map` — typed {Ucode::Models::GlyphSourceMap} from
24
+ # {SourceConfig#map}.
25
+ # - `database` — open {Ucode::Database} for the Unicode version
26
+ # being audited. Supplies the assigned-codepoint ranges.
27
+ # - `cmaps` — any object responding to
28
+ # `covers?(GlyphSource, Integer) => Boolean`. Default:
29
+ # {RealFonts::CmapCache}, which lazily loads each referenced
30
+ # font's cmap via fontisan.
31
+ #
32
+ # The walker never raises for a missing font or a failed cmap
33
+ # load — those codepoints are recorded as gaps. A missing font
34
+ # is itself a curation finding.
35
+ class CoverageAssertion
36
+ # @param source_map [Ucode::Models::GlyphSourceMap]
37
+ # @param database [Ucode::Database]
38
+ # @param cmaps [#covers?] object responding to
39
+ # `covers?(source, codepoint) => Boolean`. Defaults to a
40
+ # fresh {RealFonts::CmapCache}.
41
+ # @param unicode_version [String, nil] recorded on the report.
42
+ # Defaults to the database's `ucd_version`.
43
+ def initialize(source_map:, database:, cmaps:,
44
+ unicode_version: nil)
45
+ @source_map = source_map
46
+ @database = database
47
+ @cmaps = cmaps
48
+ @unicode_version = unicode_version || database.ucd_version
49
+ end
50
+
51
+ # @return [GapReport]
52
+ def call
53
+ gaps = Hash.new { |h, k| h[k] = [] }
54
+ total = 0
55
+
56
+ @database.block_entries.each do |range|
57
+ block_id = range.name
58
+ sources = @source_map.sources_for(block_id)
59
+ next if sources.empty? # uncurated block; not a gap, just unconfigured
60
+
61
+ (range.first_cp..range.last_cp).each do |cp|
62
+ next if sources.any? { |src| @cmaps.covers?(src, cp) }
63
+
64
+ gaps[block_id] << cp
65
+ total += 1
66
+ end
67
+ end
68
+
69
+ GapReport.new(
70
+ unicode_version: @unicode_version,
71
+ generated_at: Time.now.utc.iso8601,
72
+ gaps_by_block: gaps.freeze,
73
+ total_gaps: total,
74
+ )
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Glyphs
7
+ class SourceConfig
8
+ # Typed result of {CoverageAssertion#call}. Lists every assigned
9
+ # codepoint that no Tier 1 source's cmap covers, grouped by block.
10
+ #
11
+ # Pure value object — never raises, never mutates. Callers decide
12
+ # what to do with gaps:
13
+ #
14
+ # - **CI**: warn, fail the build on regressions vs. baseline.
15
+ # - **Local curator**: print, decide what to add.
16
+ # - **Production build**: continue — pillar 1-2-3 catch up.
17
+ #
18
+ # The shape round-trips through lutaml-model so it can be emitted
19
+ # alongside the universal-set build reports (TODO 31).
20
+ class GapReport < Lutaml::Model::Serializable
21
+ attribute :unicode_version, :string
22
+ attribute :generated_at, :string
23
+ attribute :gaps_by_block, :hash, default: -> { {} }
24
+ attribute :total_gaps, :integer, default: -> { 0 }
25
+
26
+ key_value do
27
+ map "unicode_version", to: :unicode_version
28
+ map "generated_at", to: :generated_at
29
+ map "gaps_by_block", to: :gaps_by_block
30
+ map "total_gaps", to: :total_gaps
31
+ end
32
+
33
+ # @return [Boolean] true when every assigned codepoint in the
34
+ # walked range has at least one Tier 1 covering font.
35
+ def empty?
36
+ total_gaps.zero?
37
+ end
38
+
39
+ # @param block_id [String]
40
+ # @return [Array<Integer>] codepoints with no Tier 1 coverage
41
+ # in this block. Empty for blocks with full coverage or
42
+ # blocks that weren't walked.
43
+ def codepoints_for(block_id)
44
+ Array(gaps_by_block[block_id])
45
+ end
46
+
47
+ # @return [Array<String>] block ids that have at least one gap.
48
+ def block_ids_with_gaps
49
+ gaps_by_block.keys
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "yaml"
5
+
6
+ require "ucode/models/glyph_source_map"
7
+
8
+ module Ucode
9
+ module Glyphs
10
+ # Loads the curated Tier 1 font map from
11
+ # `config/unicode17_universal_glyph_set.yml` into a typed
12
+ # {Ucode::Models::GlyphSourceMap}.
13
+ #
14
+ # This is the policy half of the 4-tier resolver (TODO 23):
15
+ # "which font wins for which block, this Unicode version". The
16
+ # resolver mechanics live in {Resolver} + {Source}; the
17
+ # per-version curation lives in the YAML.
18
+ #
19
+ # Block ids in the YAML use the canonical underscore form
20
+ # ("Basic_Latin", "CJK_Unified_Ideographs_Extension_J") — same
21
+ # convention as {Ucode::Parsers::Blocks} and the rest of the
22
+ # codebase. Never slugified beyond whitespace collapse.
23
+ #
24
+ # Loader semantics:
25
+ # - Missing file → `exist?` returns false; `map` is an empty
26
+ # `GlyphSourceMap`; all queries return empty.
27
+ # - Empty `map:` section → same as missing file.
28
+ # - Malformed YAML → raises (the curator must fix the file).
29
+ class SourceConfig
30
+ # Sub-constants for curation review (TODO 29). The class itself
31
+ # remains the loader; these are the development-time walkers +
32
+ # typed results that consume the loaded map.
33
+ autoload :CoverageAssertion,
34
+ "ucode/glyphs/source_config/coverage_assertion"
35
+ autoload :GapReport, "ucode/glyphs/source_config/gap_report"
36
+
37
+ # Default location of the curated Tier 1 font map. Public so the
38
+ # canonical build + universal set commands can reference it when
39
+ # no override is supplied. Keeping it on the class (not an
40
+ # instance attr) lets callers use it without constructing a
41
+ # SourceConfig first.
42
+ DEFAULT_PATH = Pathname.new("config/unicode17_universal_glyph_set.yml")
43
+
44
+ # @param path [String, Pathname] path to the YAML config file.
45
+ def initialize(path: DEFAULT_PATH)
46
+ @path = Pathname.new(path)
47
+ end
48
+
49
+ # @return [Pathname] the resolved config file path
50
+ attr_reader :path
51
+
52
+ # @return [Boolean] true if the config file exists on disk
53
+ def exist?
54
+ @path.exist?
55
+ end
56
+
57
+ # The loaded typed map. Memoized on first access. An empty
58
+ # {Ucode::Models::GlyphSourceMap} when the file is missing or
59
+ # has no `map:` section.
60
+ #
61
+ # @return [Ucode::Models::GlyphSourceMap]
62
+ def map
63
+ @map ||= load_map
64
+ end
65
+
66
+ # @param block_id [String] verbatim block id (underscore form)
67
+ # @return [Array<Ucode::Models::GlyphSource>] sources for this
68
+ # block in priority order; empty when unconfigured.
69
+ def fonts_for(block_id)
70
+ map.sources_for(block_id)
71
+ end
72
+
73
+ # @return [Array<String>] block_ids with at least one Tier 1
74
+ # source configured.
75
+ def configured_block_ids
76
+ map.configured_block_ids
77
+ end
78
+
79
+ # Class-method shortcut: load and return the typed map. Useful
80
+ # for one-shot scripts that don't need to query `exist?` first.
81
+ #
82
+ # @param yaml_path [String, Pathname]
83
+ # @return [Ucode::Models::GlyphSourceMap]
84
+ def self.load(yaml_path = DEFAULT_PATH)
85
+ new(path: yaml_path).map
86
+ end
87
+
88
+ private
89
+
90
+ def load_map
91
+ return empty_map unless @path.exist?
92
+
93
+ parsed = YAML.safe_load(@path.read, aliases: true)
94
+ return empty_map unless parsed.is_a?(Hash)
95
+
96
+ Ucode::Models::GlyphSourceMap.from_hash(parsed)
97
+ end
98
+
99
+ def empty_map
100
+ Ucode::Models::GlyphSourceMap.new
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/glyphs/source"
4
+ require "ucode/glyphs/embedded_fonts/renderer"
5
+
6
+ module Ucode
7
+ module Glyphs
8
+ module Sources
9
+ # Pillar 1 glyph source: Code Charts PDF-embedded CIDFont
10
+ # outlines resolved via `/ToUnicode` CMap.
11
+ #
12
+ # Delegates to {EmbeddedFonts::Renderer}, which walks the
13
+ # PDF object graph (Type0 → CIDFont → FontDescriptor →
14
+ # FontFile2/3), looks up the GID via `/ToUnicode`, and renders
15
+ # the outline as a standalone SVG via {EmbeddedFonts::Svg}.
16
+ #
17
+ # == Pillar 2 unification
18
+ #
19
+ # TODO 20 lists a separate +Sources::Pillar2Correlator+ class.
20
+ # It is intentionally omitted. {ContentStreamCorrelator} alone
21
+ # returns +Hash{Integer=>Integer}+ (codepoint → GID mappings),
22
+ # not SVGs; it only produces SVGs when invoked through
23
+ # {EmbeddedFonts::Catalog} via its +correlator_configs:+
24
+ # registry. The Catalog already unifies pillars 1 and 2 at
25
+ # index-build time, so a Source-layer split would either
26
+ # duplicate the Catalog's index or require tagging each
27
+ # FontEntry with the sub-mechanism that served it — both
28
+ # violations of MECE. Pillar 2 fallback is configured by
29
+ # constructing the wrapped Catalog with +correlator_configs:+.
30
+ class Pillar1EmbeddedTounicode < Source
31
+ # @param renderer [EmbeddedFonts::Renderer] the renderer to
32
+ # delegate to. Callers typically construct it with the
33
+ # {EmbeddedFonts::Catalog} built from the resolved Code
34
+ # Charts {EmbeddedFonts::Source}. To enable pillar-2
35
+ # fallback, that Catalog must be constructed with
36
+ # +correlator_configs:+.
37
+ def initialize(renderer:)
38
+ super()
39
+ @renderer = renderer
40
+ end
41
+
42
+ # @return [Symbol] :pillar1
43
+ def tier
44
+ :pillar1
45
+ end
46
+
47
+ # @return [String] "pillar-1:embedded-tounicode"
48
+ def provenance
49
+ "pillar-1:embedded-tounicode"
50
+ end
51
+
52
+ # (see Source#fetch)
53
+ def fetch(codepoint)
54
+ result = @renderer.render(codepoint)
55
+ return nil unless result
56
+
57
+ Result.new(tier: tier, codepoint: codepoint,
58
+ svg: result.svg, provenance: provenance)
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/glyphs/source"
4
+ require "ucode/glyphs/last_resort/renderer"
5
+
6
+ module Ucode
7
+ module Glyphs
8
+ module Sources
9
+ # Pillar 3 glyph source: Last Resort UFO placeholder outlines.
10
+ #
11
+ # Wraps {LastResort::Renderer}, which chains cmap → contents →
12
+ # glif → svg for every codepoint the Last Resort Font's Format 13
13
+ # cmap maps (essentially all of Unicode 0x0..0x10FFFF). This is
14
+ # the catch-all tier: any codepoint no higher tier produced a
15
+ # glyph for lands here and gets a placeholder outline.
16
+ #
17
+ # The Renderer returns nil only for codepoints outside the cmap
18
+ # (extremely rare — the Format 13 cmap is exhaustive). For
19
+ # everything else it returns a {LastResort::Renderer::Result}
20
+ # with the SVG. We adapt that to {Source::Result}.
21
+ class Pillar3LastResort < Source
22
+ # @param renderer [LastResort::Renderer] the renderer to
23
+ # delegate to. Callers typically construct it with the
24
+ # resolved {LastResort::Source}.
25
+ def initialize(renderer:)
26
+ super()
27
+ @renderer = renderer
28
+ end
29
+
30
+ # @return [Symbol] :pillar3
31
+ def tier
32
+ :pillar3
33
+ end
34
+
35
+ # @return [String] "pillar-3:last-resort"
36
+ def provenance
37
+ "pillar-3:last-resort"
38
+ end
39
+
40
+ # (see Source#fetch)
41
+ def fetch(codepoint)
42
+ result = @renderer.render(codepoint)
43
+ return nil unless result&.ok?
44
+
45
+ Result.new(tier: tier, codepoint: codepoint,
46
+ svg: result.svg, provenance: provenance)
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fontisan"
4
+
5
+ require "ucode/glyphs/source"
6
+ require "ucode/glyphs/real_fonts/font_locator"
7
+ require "ucode/glyphs/embedded_fonts/svg"
8
+
9
+ module Ucode
10
+ module Glyphs
11
+ module Sources
12
+ # Tier 1 glyph source: real-font cmap + outline extraction.
13
+ #
14
+ # For codepoints inside its assigned block range, looks up the
15
+ # GID in the font's cmap, extracts the outline via
16
+ # `Fontisan::OutlineExtractor`, and renders a standalone SVG via
17
+ # {EmbeddedFonts::Svg} (which y-flips to SVG coordinates and
18
+ # builds a padded viewBox around the outline bbox).
19
+ #
20
+ # Codepoints outside the block range, missing from the cmap, or
21
+ # producing an empty outline return nil — the {Resolver} then
22
+ # falls through to lower tiers. This is the preferred source:
23
+ # highest fidelity, no chart-grid chrome composited in.
24
+ #
25
+ # One Tier1RealFont per (block, font) pair. The {SourceBuilder}
26
+ # expands a {SourceConfig} into a flat array of these, one per
27
+ # configured block × {Ucode::Models::GlyphSource} entry. When
28
+ # multiple Tier 1 fonts are configured for the same block, each
29
+ # becomes a separate source and the resolver tries them in
30
+ # declared order.
31
+ class Tier1RealFont < Source
32
+ # @param block_range [Range<Integer>] codepoints this source
33
+ # serves. Codepoints outside the range return nil without
34
+ # consulting the font.
35
+ # @param source [Ucode::Models::GlyphSource] typed curation
36
+ # entry. Drives font resolution via
37
+ # {RealFonts::FontLocator} through {GlyphSource#to_font_spec}.
38
+ # @param install [Boolean] passed through to FontLocator. When
39
+ # true (default), fontist downloads missing fonts. Tests
40
+ # disable this to avoid network calls.
41
+ def initialize(block_range:, source:, install: true)
42
+ super()
43
+ @block_range = block_range
44
+ @source = source
45
+ @install = install
46
+ end
47
+
48
+ # @return [Symbol] :tier1
49
+ def tier
50
+ :tier1
51
+ end
52
+
53
+ # @return [String] "tier-1:<label>" — the label is the
54
+ # {GlyphSource#label} from the curation entry.
55
+ def provenance
56
+ "tier-1:#{@source.label}"
57
+ end
58
+
59
+ # (see Source#fetch)
60
+ def fetch(codepoint)
61
+ return nil unless @block_range.cover?(codepoint)
62
+
63
+ gid = cmap[codepoint]
64
+ return nil unless gid
65
+
66
+ outline = extractor.extract(gid)
67
+ return nil if outline.nil? || outline.empty?
68
+
69
+ svg = EmbeddedFonts::Svg.new(outline, codepoint: codepoint,
70
+ base_font: base_font).to_s
71
+ Result.new(tier: tier, codepoint: codepoint, svg: svg,
72
+ provenance: provenance)
73
+ rescue StandardError
74
+ # Font load failures, outline extraction errors, etc. — all
75
+ # translate to "this source can't help". The resolver will
76
+ # try the next tier.
77
+ nil
78
+ end
79
+
80
+ private
81
+
82
+ def cmap
83
+ @cmap ||= font.table("cmap").unicode_mappings
84
+ end
85
+
86
+ def font
87
+ @font ||= Fontisan::FontLoader.load(path)
88
+ end
89
+
90
+ def path
91
+ @path ||= RealFonts::FontLocator.new.locate(@source.to_font_spec, install: @install).path
92
+ end
93
+
94
+ def extractor
95
+ @extractor ||= Fontisan::OutlineExtractor.new(font)
96
+ end
97
+
98
+ def base_font
99
+ File.basename(path.to_s, ".*")
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Glyphs
5
+ # Concrete {Source} subclasses — one per tier of the 4-tier glyph
6
+ # sourcing strategy. Each adapts an existing implementation
7
+ # (RealFonts, EmbeddedFonts::Catalog, LastResort::Renderer) to the
8
+ # {Source} interface so the {Resolver} can orchestrate them
9
+ # uniformly.
10
+ #
11
+ # Adding a new source is a pure extension (new file + autoload) —
12
+ # the Resolver and Source interface are closed for modification.
13
+ module Sources
14
+ autoload :Tier1RealFont, "ucode/glyphs/sources/tier1_real_font"
15
+ autoload :Pillar1EmbeddedTounicode,
16
+ "ucode/glyphs/sources/pillar1_embedded_tounicode"
17
+ autoload :Pillar3LastResort, "ucode/glyphs/sources/pillar3_last_resort"
18
+ end
19
+ end
20
+ end