ucode 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -0
  3. data/Gemfile.lock +2 -2
  4. data/TODO.full/00-README.md +116 -0
  5. data/TODO.full/01-panglyph-vision.md +112 -0
  6. data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
  7. data/TODO.full/03-panglyph-font-builder.md +201 -0
  8. data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
  9. data/TODO.full/05-ucode-0-1-1-release.md +139 -0
  10. data/TODO.full/06-fontisan-remove-audit.md +142 -0
  11. data/TODO.full/07-fontisan-remove-ucd.md +125 -0
  12. data/TODO.full/08-archive-private-bin-build.md +143 -0
  13. data/TODO.full/09-archive-public-structure.md +164 -0
  14. data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
  15. data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
  16. data/TODO.full/12-implementation-order.md +216 -0
  17. data/TODO.full/13-fontisan-font-writer-api.md +189 -0
  18. data/TODO.full/14-fontisan-table-writers.md +66 -0
  19. data/TODO.full/15-panglyph-builder-real.md +82 -0
  20. data/TODO.full/16-archive-public-sync-workflows.md +167 -0
  21. data/TODO.full/17-fontist-org-font-picker.md +73 -0
  22. data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
  23. data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
  24. data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
  25. data/TODO.new/00-README.md +30 -0
  26. data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
  27. data/TODO.new/24-universal-glyph-set-build.md +189 -0
  28. data/TODO.new/25-font-audit-against-universal-set.md +195 -0
  29. data/TODO.new/26-missing-glyph-reporter.md +189 -0
  30. data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
  31. data/TODO.new/28-implementation-order-update.md +187 -0
  32. data/TODO.new/29-universal-set-curation-uc17.md +312 -0
  33. data/TODO.new/30-tier1-font-acquisition.md +241 -0
  34. data/TODO.new/31-universal-set-production-build.md +205 -0
  35. data/TODO.new/32-uc17-coverage-matrix.md +165 -0
  36. data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
  37. data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
  38. data/TODO.new/35-universal-set-production-run.md +160 -0
  39. data/TODO.new/36-per-font-coverage-audit.md +145 -0
  40. data/TODO.new/37-coverage-highlight-reporter.md +125 -0
  41. data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
  42. data/TODO.new/39-implementation-order-update-32-38.md +258 -0
  43. data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
  44. data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
  45. data/config/specialist_fonts.yml +102 -0
  46. data/config/unicode17_tier1_fonts.yml +42 -0
  47. data/config/unicode17_universal_glyph_set.yml +293 -0
  48. data/lib/ucode/audit/block_aggregator.rb +57 -29
  49. data/lib/ucode/audit/browser/face_page.rb +128 -0
  50. data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
  51. data/lib/ucode/audit/browser/library_page.rb +74 -0
  52. data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
  53. data/lib/ucode/audit/browser/template.rb +47 -0
  54. data/lib/ucode/audit/browser/templates/face.css +200 -0
  55. data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
  56. data/lib/ucode/audit/browser/templates/face.js +298 -0
  57. data/lib/ucode/audit/browser/templates/library.css +119 -0
  58. data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
  59. data/lib/ucode/audit/browser/templates/library.js +99 -0
  60. data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
  61. data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
  62. data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
  63. data/lib/ucode/audit/browser.rb +32 -0
  64. data/lib/ucode/audit/context.rb +27 -1
  65. data/lib/ucode/audit/coverage_reference.rb +103 -0
  66. data/lib/ucode/audit/differ.rb +121 -0
  67. data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
  68. data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
  69. data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
  70. data/lib/ucode/audit/emitter/face_directory.rb +212 -0
  71. data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
  72. data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
  73. data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
  74. data/lib/ucode/audit/emitter/paths.rb +312 -0
  75. data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
  76. data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
  77. data/lib/ucode/audit/emitter.rb +29 -0
  78. data/lib/ucode/audit/extractors/aggregations.rb +31 -2
  79. data/lib/ucode/audit/face_auditor.rb +86 -0
  80. data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
  81. data/lib/ucode/audit/formatters/audit_text.rb +411 -0
  82. data/lib/ucode/audit/formatters/color.rb +48 -0
  83. data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
  84. data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
  85. data/lib/ucode/audit/formatters.rb +23 -0
  86. data/lib/ucode/audit/library_aggregator.rb +86 -0
  87. data/lib/ucode/audit/library_auditor.rb +105 -0
  88. data/lib/ucode/audit/release/emitter.rb +152 -0
  89. data/lib/ucode/audit/release/face_card.rb +93 -0
  90. data/lib/ucode/audit/release/formula_audits.rb +50 -0
  91. data/lib/ucode/audit/release/library_index_builder.rb +78 -0
  92. data/lib/ucode/audit/release/manifest_builder.rb +127 -0
  93. data/lib/ucode/audit/release.rb +42 -0
  94. data/lib/ucode/audit/ucd_only_reference.rb +81 -0
  95. data/lib/ucode/audit/universal_set_reference.rb +136 -0
  96. data/lib/ucode/audit.rb +31 -0
  97. data/lib/ucode/cli.rb +339 -33
  98. data/lib/ucode/commands/audit/browser_command.rb +82 -0
  99. data/lib/ucode/commands/audit/collection_command.rb +103 -0
  100. data/lib/ucode/commands/audit/compare_command.rb +188 -0
  101. data/lib/ucode/commands/audit/font_command.rb +140 -0
  102. data/lib/ucode/commands/audit/library_command.rb +87 -0
  103. data/lib/ucode/commands/audit/reference_builder.rb +64 -0
  104. data/lib/ucode/commands/audit.rb +20 -0
  105. data/lib/ucode/commands/block_feed.rb +73 -0
  106. data/lib/ucode/commands/canonical_build.rb +138 -0
  107. data/lib/ucode/commands/fetch.rb +37 -1
  108. data/lib/ucode/commands/release.rb +115 -0
  109. data/lib/ucode/commands/universal_set.rb +211 -0
  110. data/lib/ucode/commands.rb +5 -0
  111. data/lib/ucode/coordinator/indices.rb +11 -0
  112. data/lib/ucode/coordinator.rb +138 -5
  113. data/lib/ucode/error.rb +30 -2
  114. data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
  115. data/lib/ucode/fetch/font_fetcher.rb +16 -0
  116. data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
  117. data/lib/ucode/fetch.rb +7 -3
  118. data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
  119. data/lib/ucode/glyphs/real_fonts.rb +1 -0
  120. data/lib/ucode/glyphs/resolver.rb +62 -0
  121. data/lib/ucode/glyphs/source.rb +48 -0
  122. data/lib/ucode/glyphs/source_builder.rb +61 -0
  123. data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
  124. data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
  125. data/lib/ucode/glyphs/source_config.rb +104 -0
  126. data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
  127. data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
  128. data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
  129. data/lib/ucode/glyphs/sources.rb +20 -0
  130. data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
  131. data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
  132. data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
  133. data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
  134. data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
  135. data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
  136. data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
  137. data/lib/ucode/glyphs/universal_set.rb +45 -0
  138. data/lib/ucode/glyphs.rb +6 -0
  139. data/lib/ucode/models/audit/baseline.rb +6 -0
  140. data/lib/ucode/models/audit/block_summary.rb +7 -0
  141. data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
  142. data/lib/ucode/models/audit/release_face.rb +42 -0
  143. data/lib/ucode/models/audit/release_formula.rb +33 -0
  144. data/lib/ucode/models/audit/release_manifest.rb +43 -0
  145. data/lib/ucode/models/audit/release_universal_set.rb +37 -0
  146. data/lib/ucode/models/audit.rb +9 -0
  147. data/lib/ucode/models/block.rb +2 -0
  148. data/lib/ucode/models/build_report.rb +109 -0
  149. data/lib/ucode/models/codepoint/glyph.rb +42 -0
  150. data/lib/ucode/models/codepoint.rb +3 -0
  151. data/lib/ucode/models/glyph_source.rb +86 -0
  152. data/lib/ucode/models/glyph_source_map.rb +138 -0
  153. data/lib/ucode/models/specialist_font.rb +70 -0
  154. data/lib/ucode/models/specialist_font_manifest.rb +48 -0
  155. data/lib/ucode/models/unihan_entry.rb +81 -9
  156. data/lib/ucode/models/unihan_field.rb +21 -0
  157. data/lib/ucode/models/universal_set_entry.rb +47 -0
  158. data/lib/ucode/models/universal_set_manifest.rb +78 -0
  159. data/lib/ucode/models/validation_report.rb +99 -0
  160. data/lib/ucode/models.rb +9 -0
  161. data/lib/ucode/parsers/named_sequences.rb +5 -5
  162. data/lib/ucode/parsers/unihan.rb +50 -19
  163. data/lib/ucode/repo/aggregate_writer.rb +34 -2
  164. data/lib/ucode/repo/block_feed_emitter.rb +153 -0
  165. data/lib/ucode/repo/build_report_accumulator.rb +138 -0
  166. data/lib/ucode/repo/build_report_writer.rb +46 -0
  167. data/lib/ucode/repo/build_validator.rb +229 -0
  168. data/lib/ucode/repo/codepoint_writer.rb +50 -1
  169. data/lib/ucode/repo/paths.rb +8 -0
  170. data/lib/ucode/repo.rb +4 -0
  171. data/lib/ucode/version.rb +1 -1
  172. data/schema/block-feed.output.schema.yml +134 -0
  173. metadata +143 -2
  174. data/ucode.gemspec +0 -56
@@ -0,0 +1,197 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/error"
6
+ require "ucode/glyphs/real_fonts/font_locator"
7
+ require "ucode/glyphs/source_config"
8
+ require "ucode/glyphs/source_config/coverage_assertion"
9
+ require "ucode/glyphs/source_config/gap_report"
10
+ require "ucode/models/glyph_source_map"
11
+
12
+ module Ucode
13
+ module Glyphs
14
+ module UniversalSet
15
+ # Report produced by {PreBuildCheck}. Carries the raw findings
16
+ # without deciding whether to abort — that decision lives on
17
+ # the check itself so callers can inspect the report without
18
+ # rescuing.
19
+ PreBuildReport = Struct.new(:source_config_path, :unicode_version,
20
+ :config_loaded, :missing_fonts,
21
+ :coverage_gaps, keyword_init: true) do
22
+ def ok?
23
+ config_loaded && missing_fonts.empty?
24
+ end
25
+ end
26
+
27
+ # Pre-flight validation for a universal-set build. Runs the
28
+ # three checks TODO 31 specifies:
29
+ #
30
+ # 1. **Source config loads cleanly.** `SourceConfig.new(path:)`
31
+ # returns a map without raising, and the file exists.
32
+ # 2. **All fonts present.** Every source in the map resolves to
33
+ # a file on disk (kind=path) or via fontist's index
34
+ # (kind=fontist, install: false). Missing fonts are listed.
35
+ # 3. **Coverage assertion runs.** TODO 29's CoverageAssertion
36
+ # walks every assigned codepoint; gaps are surfaced but do
37
+ # not abort (expected for residual curation cases).
38
+ #
39
+ # The check raises {Ucode::UniversalSetPreBuildError} when
40
+ # `missing_fonts` is non-empty or the config fails to load. The
41
+ # CLI catches this and renders the failing checks; the build
42
+ # never starts with known-bad inputs.
43
+ class PreBuildCheck
44
+ # @param source_config_path [String, Pathname]
45
+ # @param database [Ucode::Database] open database for the
46
+ # target Unicode version. Used by CoverageAssertion.
47
+ # @param cmaps [#covers?] defaults to RealFonts::CmapCache.
48
+ # Injectable for testing (e.g. StaticCmaps).
49
+ # @param font_locator [#locate] defaults to a fresh
50
+ # FontLocator. Injectable for testing.
51
+ def initialize(source_config_path:, database:, cmaps: nil,
52
+ font_locator: RealFonts::FontLocator.new)
53
+ @source_config_path = Pathname.new(source_config_path)
54
+ @database = database
55
+ @cmaps = cmaps || RealFonts::CmapCache.new
56
+ @font_locator = font_locator
57
+ end
58
+
59
+ # @raise [Ucode::UniversalSetPreBuildError] when missing_fonts
60
+ # is non-empty or the source config fails to load.
61
+ # @return [PreBuildReport]
62
+ def call
63
+ report = build_report
64
+ unless report.ok?
65
+ raise Ucode::UniversalSetPreBuildError.new(
66
+ "pre-build validation failed",
67
+ context: {
68
+ source_config_path: @source_config_path.to_s,
69
+ missing_fonts: report.missing_fonts,
70
+ config_loaded: report.config_loaded,
71
+ },
72
+ )
73
+ end
74
+
75
+ report
76
+ end
77
+
78
+ private
79
+
80
+ def build_report
81
+ config, loaded = load_config
82
+ missing = loaded ? collect_missing_fonts(config.map) : []
83
+ gaps = loaded ? run_coverage_assertion(config.map) : empty_gap_report
84
+
85
+ PreBuildReport.new(
86
+ source_config_path: @source_config_path.to_s,
87
+ unicode_version: @database.ucd_version,
88
+ config_loaded: loaded,
89
+ missing_fonts: missing,
90
+ coverage_gaps: gaps,
91
+ )
92
+ end
93
+
94
+ def load_config
95
+ config = SourceConfig.new(path: @source_config_path)
96
+ [config, config.exist?]
97
+ rescue StandardError => e
98
+ warn_with(e)
99
+ [nil, false]
100
+ end
101
+
102
+ def collect_missing_fonts(source_map)
103
+ unique_sources(source_map).each_with_object([]) do |src, acc|
104
+ acc.concat(findings_for(src))
105
+ end
106
+ end
107
+
108
+ # All distinct sources referenced by the map, typed. Block-
109
+ # specific sources plus the top-level defaults. Deduplicated
110
+ # by (kind, label, path) so a font referenced by N blocks is
111
+ # only checked once.
112
+ def unique_sources(source_map)
113
+ block_sources = source_map.block_ids.flat_map do |block_id|
114
+ source_map.sources_for(block_id)
115
+ end
116
+ (block_sources + source_map.default_sources).uniq do |src|
117
+ [src.kind, src.label, src.path]
118
+ end
119
+ end
120
+
121
+ # Resolve one source against the filesystem / fontist index.
122
+ # Returns an array of findings (empty when the source is OK).
123
+ def findings_for(src)
124
+ kind = safe_kind(src)
125
+ case kind
126
+ when :path
127
+ path_resolves?(src.path) ? [] : [missing_path(src)]
128
+ when :fontist, :system
129
+ fontist_resolves?(src.label) ? [] : [missing_fontist(src, kind)]
130
+ when nil
131
+ [malformed_entry(src)]
132
+ end
133
+ end
134
+
135
+ # Returns the source's kind as a symbol, or nil when the
136
+ # entry is malformed (no `kind` field). A nil kind is itself
137
+ # a finding — every entry must declare its kind.
138
+ def safe_kind(src)
139
+ src.kind.nil? || src.kind.empty? ? nil : src.kind.to_sym
140
+ end
141
+
142
+ def path_resolves?(raw_path)
143
+ return false if raw_path.nil? || raw_path.empty?
144
+
145
+ expanded = File.expand_path(raw_path)
146
+ Dir.glob(expanded).any? { |p| File.file?(p) }
147
+ end
148
+
149
+ def fontist_resolves?(label)
150
+ return false if label.nil? || label.empty?
151
+
152
+ result = @font_locator.locate(label, install: false)
153
+ !result.nil? && !result.path.nil?
154
+ rescue StandardError
155
+ false
156
+ end
157
+
158
+ def missing_path(src)
159
+ { kind: "path", label: src.label, spec: src.path,
160
+ reason: "file not found at #{src.path.inspect}" }
161
+ end
162
+
163
+ def missing_fontist(src, kind)
164
+ { kind: kind.to_s, label: src.label, spec: src.label,
165
+ reason: "fontist could not resolve formula #{src.label.inspect}" }
166
+ end
167
+
168
+ def malformed_entry(src)
169
+ { kind: "(missing)", label: src.label,
170
+ spec: src.path || src.label,
171
+ reason: "source entry has no `kind` field — must be fontist, path, or system" }
172
+ end
173
+
174
+ def run_coverage_assertion(source_map)
175
+ SourceConfig::CoverageAssertion.new(
176
+ source_map: source_map, database: @database, cmaps: @cmaps,
177
+ ).call
178
+ end
179
+
180
+ def empty_gap_report
181
+ SourceConfig::GapReport.new(
182
+ unicode_version: @database.ucd_version,
183
+ generated_at: Time.now.utc.iso8601,
184
+ gaps_by_block: {}.freeze,
185
+ total_gaps: 0,
186
+ )
187
+ end
188
+
189
+ def warn_with(error)
190
+ Ucode.configuration.logger&.warn do
191
+ "pre-build: source config failed to load: #{error.class}: #{error.message}"
192
+ end
193
+ end
194
+ end
195
+ end
196
+ end
197
+ end
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "pathname"
5
+ require "time"
6
+
7
+ require "ucode/models"
8
+ require "ucode/glyphs/universal_set/idempotency"
9
+
10
+ module Ucode
11
+ module Glyphs
12
+ module UniversalSet
13
+ # Post-build validator for a universal-set build (TODO 31 §Post-
14
+ # build validation). Walks the manifest + glyphs directory and
15
+ # runs the four structural checks:
16
+ #
17
+ # 1. `manifest_loadable` — `manifest.json` parses via
18
+ # `Ucode::Models::UniversalSetManifest.from_hash`.
19
+ # 2. `glyph_files_present` — every entry has a corresponding
20
+ # `glyphs/<id>.svg` on disk.
21
+ # 3. `totals_reconcile` — manifest totals match the actual
22
+ # entry counts (`built == entries.size`).
23
+ # 4. `provenance_complete` — every entry has non-empty `tier`
24
+ # and `source`.
25
+ #
26
+ # Tofu (pillar-3) investigation and per-tier / per-block
27
+ # breakdowns live in {CoverageReport} — those are coverage
28
+ # questions, not structural ones. The idempotency check (TODO 31
29
+ # §5) is exercised by re-running the build, not by reading
30
+ # on-disk state.
31
+ #
32
+ # The validator is stateless from the outside: one call to
33
+ # {#validate} walks the manifest, builds a
34
+ # {Ucode::Models::ValidationReport}, and writes it atomically to
35
+ # `<output_root>/reports/validation.json`. Safe to re-run.
36
+ class Validator
37
+ include Idempotency
38
+
39
+ CHECK_MANIFEST = "manifest_loadable"
40
+ CHECK_GLYPHS = "glyph_files_present"
41
+ CHECK_TOTALS = "totals_reconcile"
42
+ CHECK_PROVENANCE = "provenance_complete"
43
+ ALL_CHECKS = [
44
+ CHECK_MANIFEST, CHECK_GLYPHS, CHECK_TOTALS, CHECK_PROVENANCE
45
+ ].freeze
46
+ private_constant :ALL_CHECKS
47
+
48
+ # @param output_root [String, Pathname] directory holding
49
+ # `manifest.json` + `glyphs/` + `reports/`.
50
+ # @param unicode_version [String, nil] stamped onto the report;
51
+ # nil falls back to the manifest's recorded version.
52
+ def initialize(output_root, unicode_version: nil)
53
+ @output_root = Pathname.new(output_root)
54
+ @unicode_version = unicode_version
55
+ end
56
+
57
+ # Walk the manifest + glyphs dir, run all checks, emit
58
+ # `reports/validation.json`. Returns the structured outcome.
59
+ #
60
+ # @return [Hash] { report:, report_path:, passed:, manifest_loaded: }
61
+ def validate
62
+ manifest, manifest_failures = load_manifest
63
+ entries = manifest ? manifest.entries : []
64
+
65
+ findings = manifest_failures.dup
66
+ if manifest
67
+ findings.concat(check_glyph_files(entries))
68
+ findings.concat(check_totals(manifest))
69
+ findings.concat(check_provenance(entries))
70
+ end
71
+
72
+ report = build_report(entries, findings, manifest)
73
+ report_path = write_report(report)
74
+ {
75
+ report: report,
76
+ report_path: report_path,
77
+ passed: report.totals.failures.zero?,
78
+ manifest_loaded: !manifest.nil?,
79
+ }
80
+ end
81
+
82
+ private
83
+
84
+ def load_manifest
85
+ path = manifest_path(@output_root)
86
+ unless path.exist?
87
+ return [nil, [make_failure(CHECK_MANIFEST,
88
+ "manifest.json not found at #{path}")]]
89
+ end
90
+
91
+ hash = JSON.parse(path.read)
92
+ model = Ucode::Models::UniversalSetManifest.from_hash(hash)
93
+ [model, []]
94
+ rescue JSON::ParserError => e
95
+ [nil, [make_failure(CHECK_MANIFEST,
96
+ "manifest JSON parse failed: #{e.message}")]]
97
+ rescue StandardError => e
98
+ [nil, [make_failure(CHECK_MANIFEST,
99
+ "manifest deserialization failed: #{e.class}: #{e.message}")]]
100
+ end
101
+
102
+ def check_glyph_files(entries)
103
+ entries.each_with_object([]) do |entry, acc|
104
+ path = glyph_path(@output_root, entry.id)
105
+ next if path.exist?
106
+
107
+ acc << make_failure(CHECK_GLYPHS,
108
+ "missing glyph file at #{path}",
109
+ codepoint: entry.codepoint)
110
+ end
111
+ end
112
+
113
+ def check_totals(manifest)
114
+ entries_size = manifest.entries.size
115
+ built = manifest.totals.codepoints_built
116
+ return [] if built == entries_size
117
+
118
+ [make_failure(CHECK_TOTALS,
119
+ "totals.codepoints_built=#{built} but entries.size=#{entries_size}")]
120
+ end
121
+
122
+ def check_provenance(entries)
123
+ entries.each_with_object([]) do |entry, acc|
124
+ acc.concat(provenance_findings_for(entry))
125
+ end
126
+ end
127
+
128
+ def provenance_findings_for(entry)
129
+ findings = []
130
+ if entry.tier.nil? || entry.tier.empty?
131
+ findings << make_failure(CHECK_PROVENANCE, "entry has no tier",
132
+ codepoint: entry.codepoint)
133
+ end
134
+ if entry.source.nil? || entry.source.empty?
135
+ findings << make_failure(CHECK_PROVENANCE, "entry has no source",
136
+ codepoint: entry.codepoint)
137
+ end
138
+ findings
139
+ end
140
+
141
+ def build_report(entries, findings, manifest)
142
+ checks = ALL_CHECKS.map do |name|
143
+ build_check_summary(name, findings, entries.size, manifest)
144
+ end
145
+
146
+ Ucode::Models::ValidationReport.new(
147
+ unicode_version: (@unicode_version || manifest&.unicode_version).to_s,
148
+ generated_at: Time.now.utc.iso8601,
149
+ totals: Ucode::Models::ValidationReport::Totals.new(
150
+ codepoints_checked: entries.size,
151
+ failures: findings.length,
152
+ checks_run: checks.count { |c| c.status != "skipped" },
153
+ checks_passed: checks.count { |c| c.status == "passed" },
154
+ ),
155
+ checks: checks,
156
+ failures: findings,
157
+ )
158
+ end
159
+
160
+ def build_check_summary(name, findings, entries_size, manifest)
161
+ count = findings.count { |f| f.check == name }
162
+ status = check_status(name, count, manifest)
163
+ total = check_total(name, entries_size, manifest)
164
+
165
+ Ucode::Models::ValidationReport::CheckSummary.new(
166
+ name: name,
167
+ status: status,
168
+ total: total,
169
+ failures: count,
170
+ )
171
+ end
172
+
173
+ def check_status(name, count, manifest)
174
+ return "skipped" if manifest.nil? && name != CHECK_MANIFEST
175
+
176
+ count.zero? ? "passed" : "failed"
177
+ end
178
+
179
+ def check_total(name, entries_size, manifest)
180
+ return 1 if name == CHECK_MANIFEST
181
+ return 0 if manifest.nil?
182
+
183
+ entries_size
184
+ end
185
+
186
+ def write_report(report)
187
+ path = @output_root.join(REPORTS_DIR, "validation.json")
188
+ path.dirname.mkpath
189
+ write_atomic(path, report.to_json(pretty: true))
190
+ path
191
+ end
192
+
193
+ def make_failure(check, message, codepoint: nil)
194
+ Ucode::Models::ValidationReport::Failure.new(
195
+ codepoint: codepoint,
196
+ block: nil,
197
+ check: check,
198
+ message: message,
199
+ )
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Glyphs
5
+ # Universal glyph set — one SVG per assigned Unicode codepoint,
6
+ # sourced via the 4-tier resolver using the curated Tier 1 config
7
+ # from TODO 23. The set is the canonical reference for "what
8
+ # Unicode 17 looks like" — every codepoint has exactly one glyph,
9
+ # with documented provenance, in a flat layout designed for fast
10
+ # lookup by audits (TODO 25) and the fontist.org consumer
11
+ # (TODO 27).
12
+ #
13
+ # Output layout (per TODO 24):
14
+ #
15
+ # output/universal_glyph_set/
16
+ # ├── manifest.json # one entry per codepoint + totals
17
+ # ├── glyphs/
18
+ # │ ├── U+0000.svg
19
+ # │ └── ...
20
+ # └── reports/
21
+ # ├── by_tier.json # tier-1: N1, pillar-1: N2, ...
22
+ # ├── by_block.json # per-block tier breakdown
23
+ # └── gaps.json # assigned codepoints with no glyph
24
+ #
25
+ # Components:
26
+ #
27
+ # - {Builder} drains a codepoint stream through the resolver and
28
+ # writes glyphs + manifest atomically.
29
+ # - {ManifestAccumulator} is the thread-safe tally that produces
30
+ # the final {Ucode::Models::UniversalSetManifest}.
31
+ # - {ManifestWriter} emits the manifest and per-tier / per-block /
32
+ # gaps reports under the output root.
33
+ # - {Idempotency} wraps {Ucode::Repo::AtomicWrites} with the
34
+ # "skip if SVG unchanged" semantic documented in TODO 24.
35
+ module UniversalSet
36
+ autoload :Builder, "ucode/glyphs/universal_set/builder"
37
+ autoload :ManifestAccumulator, "ucode/glyphs/universal_set/manifest_accumulator"
38
+ autoload :ManifestWriter, "ucode/glyphs/universal_set/manifest_writer"
39
+ autoload :Idempotency, "ucode/glyphs/universal_set/idempotency"
40
+ autoload :PreBuildCheck, "ucode/glyphs/universal_set/pre_build_check"
41
+ autoload :Validator, "ucode/glyphs/universal_set/validator"
42
+ autoload :CoverageReport, "ucode/glyphs/universal_set/coverage_report"
43
+ end
44
+ end
45
+ end
data/lib/ucode/glyphs.rb CHANGED
@@ -23,5 +23,11 @@ module Ucode
23
23
  autoload :LastResort, "ucode/glyphs/last_resort"
24
24
  autoload :EmbeddedFonts, "ucode/glyphs/embedded_fonts"
25
25
  autoload :RealFonts, "ucode/glyphs/real_fonts"
26
+ autoload :Source, "ucode/glyphs/source"
27
+ autoload :Resolver, "ucode/glyphs/resolver"
28
+ autoload :SourceConfig, "ucode/glyphs/source_config"
29
+ autoload :SourceBuilder, "ucode/glyphs/source_builder"
30
+ autoload :Sources, "ucode/glyphs/sources"
31
+ autoload :UniversalSet, "ucode/glyphs/universal_set"
26
32
  end
27
33
  end
@@ -18,6 +18,11 @@ module Ucode
18
18
  attribute :fontisan_version, :string
19
19
  attribute :source, :string
20
20
  attribute :generated_at, :string
21
+ # Which CoverageReference produced the per-block counts.
22
+ # "ucd" for UcdOnlyReference (default), "universal-set" for
23
+ # UniversalSetReference (TODO 25). nil on legacy reports —
24
+ # consumers should treat nil as "ucd".
25
+ attribute :reference_kind, :string
21
26
 
22
27
  key_value do
23
28
  map "unicode_version", to: :unicode_version
@@ -25,6 +30,7 @@ module Ucode
25
30
  map "fontisan_version", to: :fontisan_version
26
31
  map "source", to: :source
27
32
  map "generated_at", to: :generated_at
33
+ map "reference_kind", to: :reference_kind
28
34
  end
29
35
  end
30
36
  end
@@ -30,6 +30,12 @@ module Ucode
30
30
  attribute :status, :string
31
31
  attribute :missing_codepoints, :integer, collection: true, default: -> { [] }
32
32
  attribute :covered_codepoints, :integer, collection: true, default: -> { [] }
33
+ # Per-codepoint provenance for the missing set. Populated only
34
+ # when the audit ran against a UniversalSetReference (TODO 25).
35
+ # Empty for UCD-only audits — the field serializes as [] and
36
+ # consumers treat that as "no provenance available".
37
+ attribute :missing_codepoint_provenance, CodepointProvenance,
38
+ collection: true, default: -> { [] }
33
39
 
34
40
  key_value do
35
41
  map "name", to: :name
@@ -44,6 +50,7 @@ module Ucode
44
50
  map "status", to: :status
45
51
  map "missing_codepoints", to: :missing_codepoints
46
52
  map "covered_codepoints", to: :covered_codepoints
53
+ map "missing_codepoint_provenance", to: :missing_codepoint_provenance
47
54
  end
48
55
 
49
56
  # Derive the canonical status string for a block given its
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Per-codepoint provenance row attached to a {BlockSummary}'s
9
+ # `missing_codepoint_provenance` collection.
10
+ #
11
+ # Populated only when the audit used a
12
+ # {Ucode::Audit::UniversalSetReference}. UCD-only audits omit
13
+ # the field entirely — preserving the legacy wire shape.
14
+ #
15
+ # Wire shape (one entry per missing codepoint):
16
+ #
17
+ # {
18
+ # "codepoint": 10981,
19
+ # "tier": "tier-1",
20
+ # "source": "lentariso"
21
+ # }
22
+ #
23
+ # `tier` and `source` mirror the universal-set manifest
24
+ # ({UniversalSetEntry}) and let downstream renderers (TODO 26)
25
+ # display the missing glyph + its provenance next to each row.
26
+ class CodepointProvenance < Lutaml::Model::Serializable
27
+ attribute :codepoint, :integer
28
+ attribute :tier, :string
29
+ attribute :source, :string
30
+
31
+ key_value do
32
+ map "codepoint", to: :codepoint
33
+ map "tier", to: :tier
34
+ map "source", to: :source
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Per-face entry in a {ReleaseFormulaEntry}.
9
+ #
10
+ # Compact card view derived from a per-face {AuditReport}. Carries
11
+ # identity + coverage totals + relative paths into the release
12
+ # tree. The fontist.org renderer iterates these cards to build its
13
+ # font index; clicking a card fetches the per-face audit subtree
14
+ # at `index_path`.
15
+ class ReleaseFaceEntry < Lutaml::Model::Serializable
16
+ attribute :postscript_name, :string
17
+ attribute :family_name, :string
18
+ attribute :weight_class, :integer
19
+ attribute :total_codepoints, :integer
20
+ attribute :covered_codepoints, :integer
21
+ attribute :blocks_complete, :integer
22
+ attribute :blocks_partial, :integer
23
+ attribute :source_sha256, :string
24
+ attribute :index_path, :string
25
+ attribute :html_path, :string
26
+
27
+ key_value do
28
+ map "postscript_name", to: :postscript_name
29
+ map "family_name", to: :family_name
30
+ map "weight_class", to: :weight_class
31
+ map "total_codepoints", to: :total_codepoints
32
+ map "covered_codepoints", to: :covered_codepoints
33
+ map "blocks_complete", to: :blocks_complete
34
+ map "blocks_partial", to: :blocks_partial
35
+ map "source_sha256", to: :source_sha256
36
+ map "index_path", to: :index_path
37
+ map "html_path", to: :html_path
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Per-formula entry in a {ReleaseManifest}.
9
+ #
10
+ # A formula is a single fontist formula (one installable unit).
11
+ # Each formula contributes one or more face entries to the release
12
+ # tree. The `slug` is the formula's URL-safe identifier used as
13
+ # the directory name under `<release_root>/audit/<slug>/`.
14
+ #
15
+ # `source_path` records where the original library audit ran so a
16
+ # consumer reading the manifest can trace the audit back to its
17
+ # input directory.
18
+ class ReleaseFormulaEntry < Lutaml::Model::Serializable
19
+ attribute :slug, :string
20
+ attribute :source_path, :string
21
+ attribute :faces_total, :integer
22
+ attribute :faces, ReleaseFaceEntry, collection: true, default: -> { [] }
23
+
24
+ key_value do
25
+ map "slug", to: :slug
26
+ map "source_path", to: :source_path
27
+ map "faces_total", to: :faces_total
28
+ map "faces", to: :faces
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Top-level release manifest for the fontist.org-consumable
9
+ # artifact (TODO 27).
10
+ #
11
+ # One manifest per release tree at `<release_root>/manifest.json`.
12
+ # Records the ucode/unicode versions, optional source-config
13
+ # sha256 (for Tier 1 curation provenance), aggregate formula/face
14
+ # counts, the universal-set reference section, and the per-formula
15
+ # face index.
16
+ #
17
+ # fontist.org's `scripts/fetch-data.sh` reads this manifest first
18
+ # to decide whether to fetch the universal-set zip and which
19
+ # per-formula audit subtrees to pull.
20
+ class ReleaseManifest < Lutaml::Model::Serializable
21
+ attribute :ucode_version, :string
22
+ attribute :unicode_version, :string
23
+ attribute :generated_at, :string
24
+ attribute :source_config_sha256, :string
25
+ attribute :formulas_total, :integer
26
+ attribute :faces_total, :integer
27
+ attribute :universal_set, ReleaseUniversalSet
28
+ attribute :formulas, ReleaseFormulaEntry, collection: true, default: -> { [] }
29
+
30
+ key_value do
31
+ map "ucode_version", to: :ucode_version
32
+ map "unicode_version", to: :unicode_version
33
+ map "generated_at", to: :generated_at
34
+ map "source_config_sha256", to: :source_config_sha256
35
+ map "formulas_total", to: :formulas_total
36
+ map "faces_total", to: :faces_total
37
+ map "universal_set", to: :universal_set
38
+ map "formulas", to: :formulas
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end