ucode 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -0
  3. data/Gemfile.lock +2 -2
  4. data/TODO.full/00-README.md +116 -0
  5. data/TODO.full/01-panglyph-vision.md +112 -0
  6. data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
  7. data/TODO.full/03-panglyph-font-builder.md +201 -0
  8. data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
  9. data/TODO.full/05-ucode-0-1-1-release.md +139 -0
  10. data/TODO.full/06-fontisan-remove-audit.md +142 -0
  11. data/TODO.full/07-fontisan-remove-ucd.md +125 -0
  12. data/TODO.full/08-archive-private-bin-build.md +143 -0
  13. data/TODO.full/09-archive-public-structure.md +164 -0
  14. data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
  15. data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
  16. data/TODO.full/12-implementation-order.md +216 -0
  17. data/TODO.full/13-fontisan-font-writer-api.md +189 -0
  18. data/TODO.full/14-fontisan-table-writers.md +66 -0
  19. data/TODO.full/15-panglyph-builder-real.md +82 -0
  20. data/TODO.full/16-archive-public-sync-workflows.md +167 -0
  21. data/TODO.full/17-fontist-org-font-picker.md +73 -0
  22. data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
  23. data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
  24. data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
  25. data/TODO.new/00-README.md +30 -0
  26. data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
  27. data/TODO.new/24-universal-glyph-set-build.md +189 -0
  28. data/TODO.new/25-font-audit-against-universal-set.md +195 -0
  29. data/TODO.new/26-missing-glyph-reporter.md +189 -0
  30. data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
  31. data/TODO.new/28-implementation-order-update.md +187 -0
  32. data/TODO.new/29-universal-set-curation-uc17.md +312 -0
  33. data/TODO.new/30-tier1-font-acquisition.md +241 -0
  34. data/TODO.new/31-universal-set-production-build.md +205 -0
  35. data/TODO.new/32-uc17-coverage-matrix.md +165 -0
  36. data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
  37. data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
  38. data/TODO.new/35-universal-set-production-run.md +160 -0
  39. data/TODO.new/36-per-font-coverage-audit.md +145 -0
  40. data/TODO.new/37-coverage-highlight-reporter.md +125 -0
  41. data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
  42. data/TODO.new/39-implementation-order-update-32-38.md +258 -0
  43. data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
  44. data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
  45. data/config/specialist_fonts.yml +102 -0
  46. data/config/unicode17_tier1_fonts.yml +42 -0
  47. data/config/unicode17_universal_glyph_set.yml +293 -0
  48. data/lib/ucode/audit/block_aggregator.rb +57 -29
  49. data/lib/ucode/audit/browser/face_page.rb +128 -0
  50. data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
  51. data/lib/ucode/audit/browser/library_page.rb +74 -0
  52. data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
  53. data/lib/ucode/audit/browser/template.rb +47 -0
  54. data/lib/ucode/audit/browser/templates/face.css +200 -0
  55. data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
  56. data/lib/ucode/audit/browser/templates/face.js +298 -0
  57. data/lib/ucode/audit/browser/templates/library.css +119 -0
  58. data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
  59. data/lib/ucode/audit/browser/templates/library.js +99 -0
  60. data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
  61. data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
  62. data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
  63. data/lib/ucode/audit/browser.rb +32 -0
  64. data/lib/ucode/audit/context.rb +27 -1
  65. data/lib/ucode/audit/coverage_reference.rb +103 -0
  66. data/lib/ucode/audit/differ.rb +121 -0
  67. data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
  68. data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
  69. data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
  70. data/lib/ucode/audit/emitter/face_directory.rb +212 -0
  71. data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
  72. data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
  73. data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
  74. data/lib/ucode/audit/emitter/paths.rb +312 -0
  75. data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
  76. data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
  77. data/lib/ucode/audit/emitter.rb +29 -0
  78. data/lib/ucode/audit/extractors/aggregations.rb +31 -2
  79. data/lib/ucode/audit/face_auditor.rb +86 -0
  80. data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
  81. data/lib/ucode/audit/formatters/audit_text.rb +411 -0
  82. data/lib/ucode/audit/formatters/color.rb +48 -0
  83. data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
  84. data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
  85. data/lib/ucode/audit/formatters.rb +23 -0
  86. data/lib/ucode/audit/library_aggregator.rb +86 -0
  87. data/lib/ucode/audit/library_auditor.rb +105 -0
  88. data/lib/ucode/audit/release/emitter.rb +152 -0
  89. data/lib/ucode/audit/release/face_card.rb +93 -0
  90. data/lib/ucode/audit/release/formula_audits.rb +50 -0
  91. data/lib/ucode/audit/release/library_index_builder.rb +78 -0
  92. data/lib/ucode/audit/release/manifest_builder.rb +127 -0
  93. data/lib/ucode/audit/release.rb +42 -0
  94. data/lib/ucode/audit/ucd_only_reference.rb +81 -0
  95. data/lib/ucode/audit/universal_set_reference.rb +136 -0
  96. data/lib/ucode/audit.rb +31 -0
  97. data/lib/ucode/cli.rb +339 -33
  98. data/lib/ucode/commands/audit/browser_command.rb +82 -0
  99. data/lib/ucode/commands/audit/collection_command.rb +103 -0
  100. data/lib/ucode/commands/audit/compare_command.rb +188 -0
  101. data/lib/ucode/commands/audit/font_command.rb +140 -0
  102. data/lib/ucode/commands/audit/library_command.rb +87 -0
  103. data/lib/ucode/commands/audit/reference_builder.rb +64 -0
  104. data/lib/ucode/commands/audit.rb +20 -0
  105. data/lib/ucode/commands/block_feed.rb +73 -0
  106. data/lib/ucode/commands/canonical_build.rb +138 -0
  107. data/lib/ucode/commands/fetch.rb +37 -1
  108. data/lib/ucode/commands/release.rb +115 -0
  109. data/lib/ucode/commands/universal_set.rb +211 -0
  110. data/lib/ucode/commands.rb +5 -0
  111. data/lib/ucode/coordinator/indices.rb +11 -0
  112. data/lib/ucode/coordinator.rb +138 -5
  113. data/lib/ucode/error.rb +30 -2
  114. data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
  115. data/lib/ucode/fetch/font_fetcher.rb +16 -0
  116. data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
  117. data/lib/ucode/fetch.rb +7 -3
  118. data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
  119. data/lib/ucode/glyphs/real_fonts.rb +1 -0
  120. data/lib/ucode/glyphs/resolver.rb +62 -0
  121. data/lib/ucode/glyphs/source.rb +48 -0
  122. data/lib/ucode/glyphs/source_builder.rb +61 -0
  123. data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
  124. data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
  125. data/lib/ucode/glyphs/source_config.rb +104 -0
  126. data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
  127. data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
  128. data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
  129. data/lib/ucode/glyphs/sources.rb +20 -0
  130. data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
  131. data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
  132. data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
  133. data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
  134. data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
  135. data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
  136. data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
  137. data/lib/ucode/glyphs/universal_set.rb +45 -0
  138. data/lib/ucode/glyphs.rb +6 -0
  139. data/lib/ucode/models/audit/baseline.rb +6 -0
  140. data/lib/ucode/models/audit/block_summary.rb +7 -0
  141. data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
  142. data/lib/ucode/models/audit/release_face.rb +42 -0
  143. data/lib/ucode/models/audit/release_formula.rb +33 -0
  144. data/lib/ucode/models/audit/release_manifest.rb +43 -0
  145. data/lib/ucode/models/audit/release_universal_set.rb +37 -0
  146. data/lib/ucode/models/audit.rb +9 -0
  147. data/lib/ucode/models/block.rb +2 -0
  148. data/lib/ucode/models/build_report.rb +109 -0
  149. data/lib/ucode/models/codepoint/glyph.rb +42 -0
  150. data/lib/ucode/models/codepoint.rb +3 -0
  151. data/lib/ucode/models/glyph_source.rb +86 -0
  152. data/lib/ucode/models/glyph_source_map.rb +138 -0
  153. data/lib/ucode/models/specialist_font.rb +70 -0
  154. data/lib/ucode/models/specialist_font_manifest.rb +48 -0
  155. data/lib/ucode/models/unihan_entry.rb +81 -9
  156. data/lib/ucode/models/unihan_field.rb +21 -0
  157. data/lib/ucode/models/universal_set_entry.rb +47 -0
  158. data/lib/ucode/models/universal_set_manifest.rb +78 -0
  159. data/lib/ucode/models/validation_report.rb +99 -0
  160. data/lib/ucode/models.rb +9 -0
  161. data/lib/ucode/parsers/named_sequences.rb +5 -5
  162. data/lib/ucode/parsers/unihan.rb +50 -19
  163. data/lib/ucode/repo/aggregate_writer.rb +34 -2
  164. data/lib/ucode/repo/block_feed_emitter.rb +153 -0
  165. data/lib/ucode/repo/build_report_accumulator.rb +138 -0
  166. data/lib/ucode/repo/build_report_writer.rb +46 -0
  167. data/lib/ucode/repo/build_validator.rb +229 -0
  168. data/lib/ucode/repo/codepoint_writer.rb +50 -1
  169. data/lib/ucode/repo/paths.rb +8 -0
  170. data/lib/ucode/repo.rb +4 -0
  171. data/lib/ucode/version.rb +1 -1
  172. data/schema/block-feed.output.schema.yml +134 -0
  173. metadata +143 -2
  174. data/ucode.gemspec +0 -56
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Universal-set reference section of a {ReleaseManifest}.
9
+ #
10
+ # Records whether the release tree carries a co-located universal
11
+ # glyph set (TODO 24) and where to find it. fontist.org consumes
12
+ # this to decide whether to render missing-codepoint thumbnails
13
+ # from the universal set or fall back to text-only chips.
14
+ #
15
+ # When `available` is false, `reason` carries a short diagnostic
16
+ # string ("universal-set directory not found at <path>"). The
17
+ # other fields are nil.
18
+ class ReleaseUniversalSet < Lutaml::Model::Serializable
19
+ attribute :available, Lutaml::Model::Type::Boolean
20
+ attribute :manifest_path, :string
21
+ attribute :glyphs_dir, :string
22
+ attribute :unicode_version, :string
23
+ attribute :totals, :hash, default: -> { {} }
24
+ attribute :reason, :string
25
+
26
+ key_value do
27
+ map "available", to: :available
28
+ map "manifest_path", to: :manifest_path
29
+ map "glyphs_dir", to: :glyphs_dir
30
+ map "unicode_version", to: :unicode_version
31
+ map "totals", to: :totals
32
+ map "reason", to: :reason
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -23,6 +23,7 @@ module Ucode
23
23
  autoload :PlaneSummary, "ucode/models/audit/plane_summary"
24
24
  autoload :Discrepancy, "ucode/models/audit/discrepancy"
25
25
  autoload :CodepointDetail, "ucode/models/audit/codepoint_detail"
26
+ autoload :CodepointProvenance, "ucode/models/audit/codepoint_provenance"
26
27
 
27
28
  # Ported from fontisan (namespace swap + minor renames)
28
29
  autoload :AuditReport, "ucode/models/audit/audit_report"
@@ -45,6 +46,14 @@ module Ucode
45
46
  autoload :DuplicateGroup, "ucode/models/audit/duplicate_group"
46
47
  autoload :LibrarySummary, "ucode/models/audit/library_summary"
47
48
  autoload :AuditDiff, "ucode/models/audit/audit_diff"
49
+
50
+ # Release-tree models (TODO 27) — fontist.org-consumable
51
+ # artifact manifest. ReleaseManifest is the top-level shape;
52
+ # the others are nested entries.
53
+ autoload :ReleaseManifest, "ucode/models/audit/release_manifest"
54
+ autoload :ReleaseFormulaEntry, "ucode/models/audit/release_formula"
55
+ autoload :ReleaseFaceEntry, "ucode/models/audit/release_face"
56
+ autoload :ReleaseUniversalSet, "ucode/models/audit/release_universal_set"
48
57
  end
49
58
  end
50
59
  end
@@ -13,6 +13,7 @@ module Ucode
13
13
  attribute :range_first, :integer
14
14
  attribute :range_last, :integer
15
15
  attribute :plane_number, :integer
16
+ attribute :age, :string
16
17
  attribute :codepoint_ids, :string, collection: true, default: -> { [] }
17
18
 
18
19
  key_value do
@@ -21,6 +22,7 @@ module Ucode
21
22
  map "range_first", to: :range_first
22
23
  map "range_last", to: :range_last
23
24
  map "plane_number", to: :plane_number
25
+ map "age", to: :age
24
26
  map "codepoint_ids", to: :codepoint_ids
25
27
  end
26
28
 
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # Build report for one canonical Unicode dataset run. The
8
+ # deliverable spec'd in TODO 21: emitted at the end of a Mode 1
9
+ # build as `output/build-report.json`, summarizing what got built,
10
+ # how (per-tier), per-block, and any failures.
11
+ #
12
+ # The model is passive — accumulation logic lives in
13
+ # {Ucode::Repo::BuildReportAccumulator}; this class only describes
14
+ # the wire shape and handles (de)serialization via lutaml-model.
15
+ #
16
+ # Wire format (see TODO 21):
17
+ #
18
+ # {
19
+ # "unicode_version": "17.0.0",
20
+ # "ucode_version": "0.2.0",
21
+ # "generated_at": "2026-07-01T12:00:00Z",
22
+ # "totals": { "assigned": 150012, "built": 150012,
23
+ # "skipped": 0, "failed": 0 },
24
+ # "by_tier": { "tier-1": 150012, "pillar-1": 3000, ... },
25
+ # "by_block": [
26
+ # { "name": "Basic Latin", "assigned": 128, "built": 128,
27
+ # "tier_breakdown": { "tier-1": 128 } },
28
+ # ...
29
+ # ],
30
+ # "failures": []
31
+ # }
32
+ #
33
+ # `by_tier` counts overlap across tiers (a codepoint attempted via
34
+ # Tier 1 but falling through to Pillar 1 is counted in both);
35
+ # `built` per-codepoint is the tier that actually produced its
36
+ # glyph.
37
+ class BuildReport < Lutaml::Model::Serializable
38
+ # Total counts for the run.
39
+ class Totals < Lutaml::Model::Serializable
40
+ attribute :assigned, :integer, default: 0
41
+ attribute :built, :integer, default: 0
42
+ attribute :skipped, :integer, default: 0
43
+ attribute :failed, :integer, default: 0
44
+
45
+ key_value do
46
+ map "assigned", to: :assigned
47
+ map "built", to: :built
48
+ map "skipped", to: :skipped
49
+ map "failed", to: :failed
50
+ end
51
+ end
52
+
53
+ # Per-block rollup. One entry per Unicode block in the run.
54
+ class BlockSummary < Lutaml::Model::Serializable
55
+ attribute :name, :string
56
+ attribute :assigned, :integer, default: 0
57
+ attribute :built, :integer, default: 0
58
+ attribute :tier_breakdown, :hash, default: -> { {} }
59
+
60
+ key_value do
61
+ map "name", to: :name
62
+ map "assigned", to: :assigned
63
+ map "built", to: :built
64
+ map "tier_breakdown", to: :tier_breakdown
65
+ end
66
+ end
67
+
68
+ # One failure record. `codepoint` is the integer codepoint (or
69
+ # nil if the failure is structural); `tier` is the resolver tier
70
+ # that raised (or nil); `error_class` and `message` carry the
71
+ # exception details; `backtrace` is optional.
72
+ class Failure < Lutaml::Model::Serializable
73
+ attribute :codepoint, :integer
74
+ attribute :block_name, :string
75
+ attribute :tier, :string
76
+ attribute :error_class, :string
77
+ attribute :message, :string
78
+ attribute :backtrace, :string, collection: true, default: -> { [] }
79
+
80
+ key_value do
81
+ map "codepoint", to: :codepoint
82
+ map "block_name", to: :block_name
83
+ map "tier", to: :tier
84
+ map "error_class", to: :error_class
85
+ map "message", to: :message
86
+ map "backtrace", to: :backtrace
87
+ end
88
+ end
89
+
90
+ attribute :unicode_version, :string
91
+ attribute :ucode_version, :string
92
+ attribute :generated_at, :string
93
+ attribute :totals, Totals
94
+ attribute :by_tier, :hash, default: -> { {} }
95
+ attribute :by_block, BlockSummary, collection: true, default: -> { [] }
96
+ attribute :failures, Failure, collection: true, default: -> { [] }
97
+
98
+ key_value do
99
+ map "unicode_version", to: :unicode_version
100
+ map "ucode_version", to: :ucode_version
101
+ map "generated_at", to: :generated_at
102
+ map "totals", to: :totals
103
+ map "by_tier", to: :by_tier
104
+ map "by_block", to: :by_block
105
+ map "failures", to: :failures
106
+ end
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Glyph bundle for one codepoint. Records where the SVG lives on
9
+ # disk and which resolver tier produced it.
10
+ #
11
+ # The `svg_path` is relative to the codepoint's own directory
12
+ # (always "glyph.svg" — the layout is fixed in {Ucode::Repo::Paths}).
13
+ # The `source` bundle carries the resolver tier name and
14
+ # provenance string, so the dataset is debuggable end-to-end:
15
+ # every glyph in the build can be traced back to its origin
16
+ # (real font, embedded ToUnicode, correlator, or Last Resort).
17
+ class Glyph < Lutaml::Model::Serializable
18
+ # Provenance bundle for a glyph — which tier of the 4-tier
19
+ # resolver produced it. The Ruby class name `Source` mirrors
20
+ # the wire field name; it is unrelated to the
21
+ # {Ucode::Glyphs::Source} abstract base.
22
+ class Source < Lutaml::Model::Serializable
23
+ attribute :tier, :string
24
+ attribute :provenance, :string
25
+
26
+ key_value do
27
+ map "tier", to: :tier
28
+ map "provenance", to: :provenance
29
+ end
30
+ end
31
+
32
+ attribute :svg_path, :string, default: -> { "glyph.svg" }
33
+ attribute :source, Source
34
+
35
+ key_value do
36
+ map "svg_path", to: :svg_path
37
+ map "source", to: :source
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -26,6 +26,7 @@ module Ucode
26
26
  autoload :Emoji, "ucode/models/codepoint/emoji"
27
27
  autoload :Identifier, "ucode/models/codepoint/identifier"
28
28
  autoload :Normalization, "ucode/models/codepoint/normalization"
29
+ autoload :Glyph, "ucode/models/codepoint/glyph"
29
30
 
30
31
  # Identity + scalar attributes
31
32
  attribute :cp, :integer
@@ -74,6 +75,7 @@ module Ucode
74
75
  collection: true, default: -> { [] }
75
76
  attribute :unihan, "Ucode::Models::UnihanEntry"
76
77
  attribute :names_list, "Ucode::Models::NamesListEntry"
78
+ attribute :glyph, Glyph
77
79
 
78
80
  key_value do
79
81
  map "codepoint", to: :cp
@@ -116,6 +118,7 @@ module Ucode
116
118
  map "standardized_variants", to: :standardized_variants
117
119
  map "unihan", to: :unihan
118
120
  map "names_list", to: :names_list
121
+ map "glyph", to: :glyph
119
122
  end
120
123
  end
121
124
  end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One Tier 1 font entry inside a {GlyphSourceMap}. Corresponds to
8
+ # one `- kind: …` item under a block's `sources:` list in
9
+ # `config/unicode17_universal_glyph_set.yml`.
10
+ #
11
+ # This is the typed representation of a font curation choice. The
12
+ # {Ucode::Glyphs::Sources::Tier1RealFont} source consumes it to
13
+ # resolve and load the font; the resolver treats each entry as an
14
+ # independent tier-1 attempt.
15
+ #
16
+ # Wire shape (YAML / JSON identical):
17
+ #
18
+ # kind: fontist # one of: fontist, path, system
19
+ # label: noto-sans # human + provenance key
20
+ # priority: 1 # lower wins; resolver tries in order
21
+ # license: OFL # optional; OFL / PROPRIETARY / etc.
22
+ # provenance: "Google Noto Sans" # optional citation
23
+ # path: "/abs/font.ttf" # required when kind == :path
24
+ #
25
+ # `kind` is stored as a plain string on the wire (lutaml-model has
26
+ # no Symbol adapter for key_value); the {#kind_sym} reader casts it
27
+ # for internal dispatch.
28
+ class GlyphSource < Lutaml::Model::Serializable
29
+ KIND_FONTIST = "fontist"
30
+ KIND_PATH = "path"
31
+ KIND_SYSTEM = "system"
32
+ KINDS = [KIND_FONTIST, KIND_PATH, KIND_SYSTEM].freeze
33
+ private_constant :KIND_FONTIST, :KIND_PATH, :KIND_SYSTEM, :KINDS
34
+
35
+ attribute :kind, :string
36
+ attribute :label, :string
37
+ attribute :priority, :integer, default: -> { 100 }
38
+ attribute :license, :string
39
+ attribute :provenance, :string
40
+ attribute :path, :string
41
+
42
+ key_value do
43
+ map "kind", to: :kind
44
+ map "label", to: :label
45
+ map "priority", to: :priority
46
+ map "license", to: :license
47
+ map "provenance", to: :provenance
48
+ map "path", to: :path
49
+ end
50
+
51
+ # @return [Symbol] :fontist, :path, :system; raises if kind is
52
+ # blank — every entry must declare its kind.
53
+ def kind_sym
54
+ raise ArgumentError, "GlyphSource#kind is required" if kind.nil? || kind.empty?
55
+
56
+ kind.to_sym
57
+ end
58
+
59
+ # @return [Boolean] true when this entry requires a `path` field
60
+ # (kind == :path). Used by the loader to validate structure.
61
+ def requires_path?
62
+ kind_sym == :path
63
+ end
64
+
65
+ # Renders this source as the legacy font-spec string consumed by
66
+ # {Ucode::Glyphs::RealFonts::FontLocator}: `label=/path/to/font`
67
+ # for kind=path, or `label` (the fontist formula name) for
68
+ # kind=fontist. The locator's `locate` understands both shapes.
69
+ #
70
+ # This is the one adapter method that lets the typed model
71
+ # integrate with the existing locator without rewriting it.
72
+ #
73
+ # @return [String]
74
+ def to_font_spec
75
+ case kind_sym
76
+ when :path
77
+ raise ArgumentError, "GlyphSource#{label} has kind=path but no path" if path.nil? || path.empty?
78
+
79
+ "#{label}=#{path}"
80
+ when :fontist, :system
81
+ label
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ require "ucode/models/glyph_source"
6
+
7
+ module Ucode
8
+ module Models
9
+ # Top-level shape of `config/unicode17_universal_glyph_set.yml`.
10
+ # Pairs envelope metadata (Unicode + ucode version, generated_at)
11
+ # with the block→sources map itself.
12
+ #
13
+ # Block keys are the verbatim Unicode block name with runs of
14
+ # whitespace collapsed to a single underscore — the canonical
15
+ # block id used everywhere else in this codebase (see
16
+ # {Ucode::Parsers::Blocks}): "Basic_Latin", "Greek_and_Coptic",
17
+ # "CJK_Unified_Ideographs_Extension_J". Never slugified beyond
18
+ # whitespace collapsing.
19
+ #
20
+ # Wire shape (note: `map:` is a hash keyed by block id, not an
21
+ # array):
22
+ #
23
+ # unicode_version: "17.0.0"
24
+ # ucode_version: "0.2.0"
25
+ # generated_at: "2026-06-28T00:00:00Z"
26
+ # default_sources: # applies when a block's sources are absent/empty
27
+ # - kind: fontist
28
+ # label: noto-sans
29
+ # priority: 1
30
+ # license: OFL
31
+ # map:
32
+ # Basic_Latin:
33
+ # sources:
34
+ # - kind: fontist
35
+ # label: noto-sans
36
+ # priority: 1
37
+ # Sidetic:
38
+ # sources: []
39
+ #
40
+ # An entry with `sources: []` (or omitted) is valid: it declares
41
+ # "no block-specific Tier 1 font; fall back to `default_sources`,
42
+ # then to Pillars 1-3". The fallback chain is implemented in
43
+ # {#sources_for}; the raw map is left untouched.
44
+ #
45
+ # The hash is stored as a raw `:hash` attribute (lutaml-model
46
+ # collection semantics don't pair cleanly with a hash-keyed wire
47
+ # shape); the typed accessors wrap each entry's raw hashes in
48
+ # {GlyphSource} instances on demand.
49
+ class GlyphSourceMap < Lutaml::Model::Serializable
50
+ attribute :unicode_version, :string
51
+ attribute :ucode_version, :string
52
+ attribute :generated_at, :string
53
+ attribute :default_sources_raw, :hash, collection: true, default: -> { [] }
54
+ attribute :block_sources, :hash, default: -> { {} }
55
+
56
+ key_value do
57
+ map "unicode_version", to: :unicode_version
58
+ map "ucode_version", to: :ucode_version
59
+ map "generated_at", to: :generated_at
60
+ map "default_sources", to: :default_sources_raw
61
+ map "map", to: :block_sources
62
+ end
63
+
64
+ # @param block_id [String] verbatim block id (underscore form)
65
+ # @return [Array<GlyphSource>] sources for the block, in
66
+ # priority order (ascending). Falls through block-specific →
67
+ # `default_sources` → empty.
68
+ def sources_for(block_id)
69
+ raw = block_sources[block_id]
70
+ list = extract_sources_list(raw)
71
+ list = default_sources_list if list.empty?
72
+ list.map { |h| GlyphSource.from_hash(h.transform_keys(&:to_s)) }
73
+ .sort_by(&:priority)
74
+ end
75
+
76
+ # @return [Array<GlyphSource>] the top-level default sources,
77
+ # typed and priority-sorted. Empty when not declared.
78
+ def default_sources
79
+ default_sources_list
80
+ .map { |h| GlyphSource.from_hash(h.transform_keys(&:to_s)) }
81
+ .sort_by(&:priority)
82
+ end
83
+
84
+ # @param block_id [String]
85
+ # @return [Boolean] true if the block has any entry in the map
86
+ # (even with empty sources). Does not consider `default_sources`.
87
+ def has_block?(block_id)
88
+ block_sources.key?(block_id)
89
+ end
90
+
91
+ # @return [Array<String>] every block_id that appears in the map
92
+ # (regardless of whether it has sources).
93
+ def block_ids
94
+ block_sources.keys
95
+ end
96
+
97
+ # @return [Array<String>] block_ids whose own `sources:` list has
98
+ # at least one entry. Blocks relying on `default_sources` are
99
+ # excluded — they have no block-specific policy.
100
+ def configured_block_ids
101
+ block_sources.each_with_object([]) do |(block_id, raw), acc|
102
+ acc << block_id if any_sources?(raw)
103
+ end
104
+ end
105
+
106
+ private
107
+
108
+ # Each block's value in the YAML is either:
109
+ # - `{sources: [...]}` (canonical form), or
110
+ # - `[...]` (shorthand: the sources list directly).
111
+ # Return the sources array in both cases; empty for `nil`.
112
+ def extract_sources_list(raw)
113
+ return [] if raw.nil?
114
+ return raw if raw.is_a?(Array)
115
+ return Array(raw["sources"]) if raw.is_a?(Hash) && raw.key?("sources")
116
+ return Array(raw[:sources]) if raw.is_a?(Hash) && raw.key?(:sources)
117
+
118
+ []
119
+ end
120
+
121
+ # Each block's value in the YAML is either `{sources: [...]}` or
122
+ # directly an array (shorthand). Normalize to the array of
123
+ # source-hashes form.
124
+ def any_sources?(raw)
125
+ return false if raw.nil?
126
+ return raw.any? if raw.is_a?(Array)
127
+
128
+ raw.is_a?(Hash) && Array(raw["sources"] || raw[:sources]).any?
129
+ end
130
+
131
+ # `default_sources` on the wire is a list of source hashes. Older
132
+ # configs may omit it; treat absence as an empty list.
133
+ def default_sources_list
134
+ Array(default_sources_raw)
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One entry in `config/specialist_fonts.yml` — a Tier 1 font that
8
+ # fontist's formula index does not carry (academic sites, SIL
9
+ # downloads, GitHub releases). The fetcher walks a list of these
10
+ # and materializes each `path` on disk.
11
+ #
12
+ # Wire shape (YAML):
13
+ #
14
+ # label: Lentariso
15
+ # version: "1.033"
16
+ # license: OFL
17
+ # url: "https://github.com/.../Lentariso.otf"
18
+ # sha256: "<hex>" # null until first successful fetch
19
+ # path: "data/fonts/Lentariso.otf"
20
+ # extract: false
21
+ # extract_member: null # required when extract: true
22
+ # provenance: "Imperial Aramaic / Phoenician / Sidetic coverage"
23
+ #
24
+ # `url: null` marks a local-only entry: the user supplies the
25
+ # file at `path` (which may use `~` and shell globs); the fetcher
26
+ # never attempts a network download for these.
27
+ class SpecialistFont < Lutaml::Model::Serializable
28
+ LICENSE_OFL = "OFL"
29
+ private_constant :LICENSE_OFL
30
+
31
+ attribute :label, :string
32
+ attribute :version, :string
33
+ attribute :license, :string, default: -> { LICENSE_OFL }
34
+ attribute :url, :string
35
+ attribute :sha256, :string
36
+ attribute :path, :string
37
+ attribute :extract, :boolean, default: -> { false }
38
+ attribute :extract_member, :string
39
+ attribute :provenance, :string
40
+
41
+ key_value do
42
+ map "label", to: :label
43
+ map "version", to: :version
44
+ map "license", to: :license
45
+ map "url", to: :url
46
+ map "sha256", to: :sha256
47
+ map "path", to: :path
48
+ map "extract", to: :extract
49
+ map "extract_member", to: :extract_member
50
+ map "provenance", to: :provenance
51
+ end
52
+
53
+ def local_only?
54
+ url.nil? || url.empty?
55
+ end
56
+
57
+ def ofl?
58
+ license == LICENSE_OFL
59
+ end
60
+
61
+ def hash_known?
62
+ !sha256.nil? && !sha256.empty?
63
+ end
64
+
65
+ def extract?
66
+ extract == true
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ require "ucode/models/specialist_font"
6
+
7
+ module Ucode
8
+ module Models
9
+ # Typed view over `config/specialist_fonts.yml`. Carries the full
10
+ # list of {SpecialistFont} entries; provides lookup by label so
11
+ # the fetcher can honor `--label Lentariso` without scanning the
12
+ # array itself.
13
+ #
14
+ # The manifest is pure data — it does not know the path it was
15
+ # loaded from. Persistence of computed SHA256 hashes back to disk
16
+ # is the responsibility of {Ucode::Fetch::SpecialistFontFetcher},
17
+ # which owns the file path and writes atomically after a run.
18
+ class SpecialistFontManifest < Lutaml::Model::Serializable
19
+ attribute :fonts, SpecialistFont, collection: true
20
+
21
+ key_value do
22
+ map "fonts", to: :fonts
23
+ end
24
+
25
+ # @param label [String] exact label match
26
+ # @return [SpecialistFont, nil]
27
+ def find_by_label(label)
28
+ fonts.find { |font| font.label == label }
29
+ end
30
+
31
+ # @return [Array<String>] labels of every entry, in declared order
32
+ def labels
33
+ fonts.map(&:label)
34
+ end
35
+
36
+ # @param label [String]
37
+ # @return [SpecialistFontManifest] a new manifest containing only
38
+ # the matching font. Returns self unchanged if the label is
39
+ # unknown (the fetcher reports it as a separate failure).
40
+ def only(label)
41
+ match = find_by_label(label)
42
+ return self if match.nil?
43
+
44
+ self.class.new(fonts: [match])
45
+ end
46
+ end
47
+ end
48
+ end