ucode 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/Gemfile.lock +2 -2
- data/TODO.full/00-README.md +116 -0
- data/TODO.full/01-panglyph-vision.md +112 -0
- data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
- data/TODO.full/03-panglyph-font-builder.md +201 -0
- data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
- data/TODO.full/05-ucode-0-1-1-release.md +139 -0
- data/TODO.full/06-fontisan-remove-audit.md +142 -0
- data/TODO.full/07-fontisan-remove-ucd.md +125 -0
- data/TODO.full/08-archive-private-bin-build.md +143 -0
- data/TODO.full/09-archive-public-structure.md +164 -0
- data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
- data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
- data/TODO.full/12-implementation-order.md +216 -0
- data/TODO.full/13-fontisan-font-writer-api.md +189 -0
- data/TODO.full/14-fontisan-table-writers.md +66 -0
- data/TODO.full/15-panglyph-builder-real.md +82 -0
- data/TODO.full/16-archive-public-sync-workflows.md +167 -0
- data/TODO.full/17-fontist-org-font-picker.md +73 -0
- data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
- data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
- data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
- data/TODO.new/00-README.md +30 -0
- data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
- data/TODO.new/24-universal-glyph-set-build.md +189 -0
- data/TODO.new/25-font-audit-against-universal-set.md +195 -0
- data/TODO.new/26-missing-glyph-reporter.md +189 -0
- data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
- data/TODO.new/28-implementation-order-update.md +187 -0
- data/TODO.new/29-universal-set-curation-uc17.md +312 -0
- data/TODO.new/30-tier1-font-acquisition.md +241 -0
- data/TODO.new/31-universal-set-production-build.md +205 -0
- data/TODO.new/32-uc17-coverage-matrix.md +165 -0
- data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
- data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
- data/TODO.new/35-universal-set-production-run.md +160 -0
- data/TODO.new/36-per-font-coverage-audit.md +145 -0
- data/TODO.new/37-coverage-highlight-reporter.md +125 -0
- data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
- data/TODO.new/39-implementation-order-update-32-38.md +258 -0
- data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
- data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
- data/config/specialist_fonts.yml +102 -0
- data/config/unicode17_tier1_fonts.yml +42 -0
- data/config/unicode17_universal_glyph_set.yml +293 -0
- data/lib/ucode/audit/block_aggregator.rb +57 -29
- data/lib/ucode/audit/browser/face_page.rb +128 -0
- data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
- data/lib/ucode/audit/browser/library_page.rb +74 -0
- data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
- data/lib/ucode/audit/browser/template.rb +47 -0
- data/lib/ucode/audit/browser/templates/face.css +200 -0
- data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
- data/lib/ucode/audit/browser/templates/face.js +298 -0
- data/lib/ucode/audit/browser/templates/library.css +119 -0
- data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
- data/lib/ucode/audit/browser/templates/library.js +99 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
- data/lib/ucode/audit/browser.rb +32 -0
- data/lib/ucode/audit/context.rb +27 -1
- data/lib/ucode/audit/coverage_reference.rb +103 -0
- data/lib/ucode/audit/differ.rb +121 -0
- data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
- data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
- data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
- data/lib/ucode/audit/emitter/face_directory.rb +212 -0
- data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
- data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
- data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
- data/lib/ucode/audit/emitter/paths.rb +312 -0
- data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
- data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
- data/lib/ucode/audit/emitter.rb +29 -0
- data/lib/ucode/audit/extractors/aggregations.rb +31 -2
- data/lib/ucode/audit/face_auditor.rb +86 -0
- data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
- data/lib/ucode/audit/formatters/audit_text.rb +411 -0
- data/lib/ucode/audit/formatters/color.rb +48 -0
- data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
- data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
- data/lib/ucode/audit/formatters.rb +23 -0
- data/lib/ucode/audit/library_aggregator.rb +86 -0
- data/lib/ucode/audit/library_auditor.rb +105 -0
- data/lib/ucode/audit/release/emitter.rb +152 -0
- data/lib/ucode/audit/release/face_card.rb +93 -0
- data/lib/ucode/audit/release/formula_audits.rb +50 -0
- data/lib/ucode/audit/release/library_index_builder.rb +78 -0
- data/lib/ucode/audit/release/manifest_builder.rb +127 -0
- data/lib/ucode/audit/release.rb +42 -0
- data/lib/ucode/audit/ucd_only_reference.rb +81 -0
- data/lib/ucode/audit/universal_set_reference.rb +136 -0
- data/lib/ucode/audit.rb +31 -0
- data/lib/ucode/cli.rb +339 -33
- data/lib/ucode/commands/audit/browser_command.rb +82 -0
- data/lib/ucode/commands/audit/collection_command.rb +103 -0
- data/lib/ucode/commands/audit/compare_command.rb +188 -0
- data/lib/ucode/commands/audit/font_command.rb +140 -0
- data/lib/ucode/commands/audit/library_command.rb +87 -0
- data/lib/ucode/commands/audit/reference_builder.rb +64 -0
- data/lib/ucode/commands/audit.rb +20 -0
- data/lib/ucode/commands/block_feed.rb +73 -0
- data/lib/ucode/commands/canonical_build.rb +138 -0
- data/lib/ucode/commands/fetch.rb +37 -1
- data/lib/ucode/commands/release.rb +115 -0
- data/lib/ucode/commands/universal_set.rb +211 -0
- data/lib/ucode/commands.rb +5 -0
- data/lib/ucode/coordinator/indices.rb +11 -0
- data/lib/ucode/coordinator.rb +138 -5
- data/lib/ucode/error.rb +30 -2
- data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
- data/lib/ucode/fetch/font_fetcher.rb +16 -0
- data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
- data/lib/ucode/fetch.rb +7 -3
- data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
- data/lib/ucode/glyphs/real_fonts.rb +1 -0
- data/lib/ucode/glyphs/resolver.rb +62 -0
- data/lib/ucode/glyphs/source.rb +48 -0
- data/lib/ucode/glyphs/source_builder.rb +61 -0
- data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
- data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
- data/lib/ucode/glyphs/source_config.rb +104 -0
- data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
- data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
- data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
- data/lib/ucode/glyphs/sources.rb +20 -0
- data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
- data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
- data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
- data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
- data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
- data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
- data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
- data/lib/ucode/glyphs/universal_set.rb +45 -0
- data/lib/ucode/glyphs.rb +6 -0
- data/lib/ucode/models/audit/baseline.rb +6 -0
- data/lib/ucode/models/audit/block_summary.rb +7 -0
- data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
- data/lib/ucode/models/audit/release_face.rb +42 -0
- data/lib/ucode/models/audit/release_formula.rb +33 -0
- data/lib/ucode/models/audit/release_manifest.rb +43 -0
- data/lib/ucode/models/audit/release_universal_set.rb +37 -0
- data/lib/ucode/models/audit.rb +9 -0
- data/lib/ucode/models/block.rb +2 -0
- data/lib/ucode/models/build_report.rb +109 -0
- data/lib/ucode/models/codepoint/glyph.rb +42 -0
- data/lib/ucode/models/codepoint.rb +3 -0
- data/lib/ucode/models/glyph_source.rb +86 -0
- data/lib/ucode/models/glyph_source_map.rb +138 -0
- data/lib/ucode/models/specialist_font.rb +70 -0
- data/lib/ucode/models/specialist_font_manifest.rb +48 -0
- data/lib/ucode/models/unihan_entry.rb +81 -9
- data/lib/ucode/models/unihan_field.rb +21 -0
- data/lib/ucode/models/universal_set_entry.rb +47 -0
- data/lib/ucode/models/universal_set_manifest.rb +78 -0
- data/lib/ucode/models/validation_report.rb +99 -0
- data/lib/ucode/models.rb +9 -0
- data/lib/ucode/parsers/named_sequences.rb +5 -5
- data/lib/ucode/parsers/unihan.rb +50 -19
- data/lib/ucode/repo/aggregate_writer.rb +34 -2
- data/lib/ucode/repo/block_feed_emitter.rb +153 -0
- data/lib/ucode/repo/build_report_accumulator.rb +138 -0
- data/lib/ucode/repo/build_report_writer.rb +46 -0
- data/lib/ucode/repo/build_validator.rb +229 -0
- data/lib/ucode/repo/codepoint_writer.rb +50 -1
- data/lib/ucode/repo/paths.rb +8 -0
- data/lib/ucode/repo.rb +4 -0
- data/lib/ucode/version.rb +1 -1
- data/schema/block-feed.output.schema.yml +134 -0
- metadata +143 -2
- data/ucode.gemspec +0 -56
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/error"
|
|
6
|
+
require "ucode/glyphs/real_fonts/font_locator"
|
|
7
|
+
require "ucode/glyphs/source_config"
|
|
8
|
+
require "ucode/glyphs/source_config/coverage_assertion"
|
|
9
|
+
require "ucode/glyphs/source_config/gap_report"
|
|
10
|
+
require "ucode/models/glyph_source_map"
|
|
11
|
+
|
|
12
|
+
module Ucode
|
|
13
|
+
module Glyphs
|
|
14
|
+
module UniversalSet
|
|
15
|
+
# Report produced by {PreBuildCheck}. Carries the raw findings
|
|
16
|
+
# without deciding whether to abort — that decision lives on
|
|
17
|
+
# the check itself so callers can inspect the report without
|
|
18
|
+
# rescuing.
|
|
19
|
+
PreBuildReport = Struct.new(:source_config_path, :unicode_version,
|
|
20
|
+
:config_loaded, :missing_fonts,
|
|
21
|
+
:coverage_gaps, keyword_init: true) do
|
|
22
|
+
def ok?
|
|
23
|
+
config_loaded && missing_fonts.empty?
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Pre-flight validation for a universal-set build. Runs the
|
|
28
|
+
# three checks TODO 31 specifies:
|
|
29
|
+
#
|
|
30
|
+
# 1. **Source config loads cleanly.** `SourceConfig.new(path:)`
|
|
31
|
+
# returns a map without raising, and the file exists.
|
|
32
|
+
# 2. **All fonts present.** Every source in the map resolves to
|
|
33
|
+
# a file on disk (kind=path) or via fontist's index
|
|
34
|
+
# (kind=fontist, install: false). Missing fonts are listed.
|
|
35
|
+
# 3. **Coverage assertion runs.** TODO 29's CoverageAssertion
|
|
36
|
+
# walks every assigned codepoint; gaps are surfaced but do
|
|
37
|
+
# not abort (expected for residual curation cases).
|
|
38
|
+
#
|
|
39
|
+
# The check raises {Ucode::UniversalSetPreBuildError} when
|
|
40
|
+
# `missing_fonts` is non-empty or the config fails to load. The
|
|
41
|
+
# CLI catches this and renders the failing checks; the build
|
|
42
|
+
# never starts with known-bad inputs.
|
|
43
|
+
class PreBuildCheck
|
|
44
|
+
# @param source_config_path [String, Pathname]
|
|
45
|
+
# @param database [Ucode::Database] open database for the
|
|
46
|
+
# target Unicode version. Used by CoverageAssertion.
|
|
47
|
+
# @param cmaps [#covers?] defaults to RealFonts::CmapCache.
|
|
48
|
+
# Injectable for testing (e.g. StaticCmaps).
|
|
49
|
+
# @param font_locator [#locate] defaults to a fresh
|
|
50
|
+
# FontLocator. Injectable for testing.
|
|
51
|
+
def initialize(source_config_path:, database:, cmaps: nil,
|
|
52
|
+
font_locator: RealFonts::FontLocator.new)
|
|
53
|
+
@source_config_path = Pathname.new(source_config_path)
|
|
54
|
+
@database = database
|
|
55
|
+
@cmaps = cmaps || RealFonts::CmapCache.new
|
|
56
|
+
@font_locator = font_locator
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# @raise [Ucode::UniversalSetPreBuildError] when missing_fonts
|
|
60
|
+
# is non-empty or the source config fails to load.
|
|
61
|
+
# @return [PreBuildReport]
|
|
62
|
+
def call
|
|
63
|
+
report = build_report
|
|
64
|
+
unless report.ok?
|
|
65
|
+
raise Ucode::UniversalSetPreBuildError.new(
|
|
66
|
+
"pre-build validation failed",
|
|
67
|
+
context: {
|
|
68
|
+
source_config_path: @source_config_path.to_s,
|
|
69
|
+
missing_fonts: report.missing_fonts,
|
|
70
|
+
config_loaded: report.config_loaded,
|
|
71
|
+
},
|
|
72
|
+
)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
report
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def build_report
|
|
81
|
+
config, loaded = load_config
|
|
82
|
+
missing = loaded ? collect_missing_fonts(config.map) : []
|
|
83
|
+
gaps = loaded ? run_coverage_assertion(config.map) : empty_gap_report
|
|
84
|
+
|
|
85
|
+
PreBuildReport.new(
|
|
86
|
+
source_config_path: @source_config_path.to_s,
|
|
87
|
+
unicode_version: @database.ucd_version,
|
|
88
|
+
config_loaded: loaded,
|
|
89
|
+
missing_fonts: missing,
|
|
90
|
+
coverage_gaps: gaps,
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def load_config
|
|
95
|
+
config = SourceConfig.new(path: @source_config_path)
|
|
96
|
+
[config, config.exist?]
|
|
97
|
+
rescue StandardError => e
|
|
98
|
+
warn_with(e)
|
|
99
|
+
[nil, false]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def collect_missing_fonts(source_map)
|
|
103
|
+
unique_sources(source_map).each_with_object([]) do |src, acc|
|
|
104
|
+
acc.concat(findings_for(src))
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# All distinct sources referenced by the map, typed. Block-
|
|
109
|
+
# specific sources plus the top-level defaults. Deduplicated
|
|
110
|
+
# by (kind, label, path) so a font referenced by N blocks is
|
|
111
|
+
# only checked once.
|
|
112
|
+
def unique_sources(source_map)
|
|
113
|
+
block_sources = source_map.block_ids.flat_map do |block_id|
|
|
114
|
+
source_map.sources_for(block_id)
|
|
115
|
+
end
|
|
116
|
+
(block_sources + source_map.default_sources).uniq do |src|
|
|
117
|
+
[src.kind, src.label, src.path]
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Resolve one source against the filesystem / fontist index.
|
|
122
|
+
# Returns an array of findings (empty when the source is OK).
|
|
123
|
+
def findings_for(src)
|
|
124
|
+
kind = safe_kind(src)
|
|
125
|
+
case kind
|
|
126
|
+
when :path
|
|
127
|
+
path_resolves?(src.path) ? [] : [missing_path(src)]
|
|
128
|
+
when :fontist, :system
|
|
129
|
+
fontist_resolves?(src.label) ? [] : [missing_fontist(src, kind)]
|
|
130
|
+
when nil
|
|
131
|
+
[malformed_entry(src)]
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Returns the source's kind as a symbol, or nil when the
|
|
136
|
+
# entry is malformed (no `kind` field). A nil kind is itself
|
|
137
|
+
# a finding — every entry must declare its kind.
|
|
138
|
+
def safe_kind(src)
|
|
139
|
+
src.kind.nil? || src.kind.empty? ? nil : src.kind.to_sym
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def path_resolves?(raw_path)
|
|
143
|
+
return false if raw_path.nil? || raw_path.empty?
|
|
144
|
+
|
|
145
|
+
expanded = File.expand_path(raw_path)
|
|
146
|
+
Dir.glob(expanded).any? { |p| File.file?(p) }
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def fontist_resolves?(label)
|
|
150
|
+
return false if label.nil? || label.empty?
|
|
151
|
+
|
|
152
|
+
result = @font_locator.locate(label, install: false)
|
|
153
|
+
!result.nil? && !result.path.nil?
|
|
154
|
+
rescue StandardError
|
|
155
|
+
false
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def missing_path(src)
|
|
159
|
+
{ kind: "path", label: src.label, spec: src.path,
|
|
160
|
+
reason: "file not found at #{src.path.inspect}" }
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def missing_fontist(src, kind)
|
|
164
|
+
{ kind: kind.to_s, label: src.label, spec: src.label,
|
|
165
|
+
reason: "fontist could not resolve formula #{src.label.inspect}" }
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def malformed_entry(src)
|
|
169
|
+
{ kind: "(missing)", label: src.label,
|
|
170
|
+
spec: src.path || src.label,
|
|
171
|
+
reason: "source entry has no `kind` field — must be fontist, path, or system" }
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def run_coverage_assertion(source_map)
|
|
175
|
+
SourceConfig::CoverageAssertion.new(
|
|
176
|
+
source_map: source_map, database: @database, cmaps: @cmaps,
|
|
177
|
+
).call
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def empty_gap_report
|
|
181
|
+
SourceConfig::GapReport.new(
|
|
182
|
+
unicode_version: @database.ucd_version,
|
|
183
|
+
generated_at: Time.now.utc.iso8601,
|
|
184
|
+
gaps_by_block: {}.freeze,
|
|
185
|
+
total_gaps: 0,
|
|
186
|
+
)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def warn_with(error)
|
|
190
|
+
Ucode.configuration.logger&.warn do
|
|
191
|
+
"pre-build: source config failed to load: #{error.class}: #{error.message}"
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "pathname"
|
|
5
|
+
require "time"
|
|
6
|
+
|
|
7
|
+
require "ucode/models"
|
|
8
|
+
require "ucode/glyphs/universal_set/idempotency"
|
|
9
|
+
|
|
10
|
+
module Ucode
|
|
11
|
+
module Glyphs
|
|
12
|
+
module UniversalSet
|
|
13
|
+
# Post-build validator for a universal-set build (TODO 31 §Post-
|
|
14
|
+
# build validation). Walks the manifest + glyphs directory and
|
|
15
|
+
# runs the four structural checks:
|
|
16
|
+
#
|
|
17
|
+
# 1. `manifest_loadable` — `manifest.json` parses via
|
|
18
|
+
# `Ucode::Models::UniversalSetManifest.from_hash`.
|
|
19
|
+
# 2. `glyph_files_present` — every entry has a corresponding
|
|
20
|
+
# `glyphs/<id>.svg` on disk.
|
|
21
|
+
# 3. `totals_reconcile` — manifest totals match the actual
|
|
22
|
+
# entry counts (`built == entries.size`).
|
|
23
|
+
# 4. `provenance_complete` — every entry has non-empty `tier`
|
|
24
|
+
# and `source`.
|
|
25
|
+
#
|
|
26
|
+
# Tofu (pillar-3) investigation and per-tier / per-block
|
|
27
|
+
# breakdowns live in {CoverageReport} — those are coverage
|
|
28
|
+
# questions, not structural ones. The idempotency check (TODO 31
|
|
29
|
+
# §5) is exercised by re-running the build, not by reading
|
|
30
|
+
# on-disk state.
|
|
31
|
+
#
|
|
32
|
+
# The validator is stateless from the outside: one call to
|
|
33
|
+
# {#validate} walks the manifest, builds a
|
|
34
|
+
# {Ucode::Models::ValidationReport}, and writes it atomically to
|
|
35
|
+
# `<output_root>/reports/validation.json`. Safe to re-run.
|
|
36
|
+
class Validator
|
|
37
|
+
include Idempotency
|
|
38
|
+
|
|
39
|
+
CHECK_MANIFEST = "manifest_loadable"
|
|
40
|
+
CHECK_GLYPHS = "glyph_files_present"
|
|
41
|
+
CHECK_TOTALS = "totals_reconcile"
|
|
42
|
+
CHECK_PROVENANCE = "provenance_complete"
|
|
43
|
+
ALL_CHECKS = [
|
|
44
|
+
CHECK_MANIFEST, CHECK_GLYPHS, CHECK_TOTALS, CHECK_PROVENANCE
|
|
45
|
+
].freeze
|
|
46
|
+
private_constant :ALL_CHECKS
|
|
47
|
+
|
|
48
|
+
# @param output_root [String, Pathname] directory holding
|
|
49
|
+
# `manifest.json` + `glyphs/` + `reports/`.
|
|
50
|
+
# @param unicode_version [String, nil] stamped onto the report;
|
|
51
|
+
# nil falls back to the manifest's recorded version.
|
|
52
|
+
def initialize(output_root, unicode_version: nil)
|
|
53
|
+
@output_root = Pathname.new(output_root)
|
|
54
|
+
@unicode_version = unicode_version
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Walk the manifest + glyphs dir, run all checks, emit
|
|
58
|
+
# `reports/validation.json`. Returns the structured outcome.
|
|
59
|
+
#
|
|
60
|
+
# @return [Hash] { report:, report_path:, passed:, manifest_loaded: }
|
|
61
|
+
def validate
|
|
62
|
+
manifest, manifest_failures = load_manifest
|
|
63
|
+
entries = manifest ? manifest.entries : []
|
|
64
|
+
|
|
65
|
+
findings = manifest_failures.dup
|
|
66
|
+
if manifest
|
|
67
|
+
findings.concat(check_glyph_files(entries))
|
|
68
|
+
findings.concat(check_totals(manifest))
|
|
69
|
+
findings.concat(check_provenance(entries))
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
report = build_report(entries, findings, manifest)
|
|
73
|
+
report_path = write_report(report)
|
|
74
|
+
{
|
|
75
|
+
report: report,
|
|
76
|
+
report_path: report_path,
|
|
77
|
+
passed: report.totals.failures.zero?,
|
|
78
|
+
manifest_loaded: !manifest.nil?,
|
|
79
|
+
}
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
def load_manifest
|
|
85
|
+
path = manifest_path(@output_root)
|
|
86
|
+
unless path.exist?
|
|
87
|
+
return [nil, [make_failure(CHECK_MANIFEST,
|
|
88
|
+
"manifest.json not found at #{path}")]]
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
hash = JSON.parse(path.read)
|
|
92
|
+
model = Ucode::Models::UniversalSetManifest.from_hash(hash)
|
|
93
|
+
[model, []]
|
|
94
|
+
rescue JSON::ParserError => e
|
|
95
|
+
[nil, [make_failure(CHECK_MANIFEST,
|
|
96
|
+
"manifest JSON parse failed: #{e.message}")]]
|
|
97
|
+
rescue StandardError => e
|
|
98
|
+
[nil, [make_failure(CHECK_MANIFEST,
|
|
99
|
+
"manifest deserialization failed: #{e.class}: #{e.message}")]]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def check_glyph_files(entries)
|
|
103
|
+
entries.each_with_object([]) do |entry, acc|
|
|
104
|
+
path = glyph_path(@output_root, entry.id)
|
|
105
|
+
next if path.exist?
|
|
106
|
+
|
|
107
|
+
acc << make_failure(CHECK_GLYPHS,
|
|
108
|
+
"missing glyph file at #{path}",
|
|
109
|
+
codepoint: entry.codepoint)
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def check_totals(manifest)
|
|
114
|
+
entries_size = manifest.entries.size
|
|
115
|
+
built = manifest.totals.codepoints_built
|
|
116
|
+
return [] if built == entries_size
|
|
117
|
+
|
|
118
|
+
[make_failure(CHECK_TOTALS,
|
|
119
|
+
"totals.codepoints_built=#{built} but entries.size=#{entries_size}")]
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def check_provenance(entries)
|
|
123
|
+
entries.each_with_object([]) do |entry, acc|
|
|
124
|
+
acc.concat(provenance_findings_for(entry))
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def provenance_findings_for(entry)
|
|
129
|
+
findings = []
|
|
130
|
+
if entry.tier.nil? || entry.tier.empty?
|
|
131
|
+
findings << make_failure(CHECK_PROVENANCE, "entry has no tier",
|
|
132
|
+
codepoint: entry.codepoint)
|
|
133
|
+
end
|
|
134
|
+
if entry.source.nil? || entry.source.empty?
|
|
135
|
+
findings << make_failure(CHECK_PROVENANCE, "entry has no source",
|
|
136
|
+
codepoint: entry.codepoint)
|
|
137
|
+
end
|
|
138
|
+
findings
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def build_report(entries, findings, manifest)
|
|
142
|
+
checks = ALL_CHECKS.map do |name|
|
|
143
|
+
build_check_summary(name, findings, entries.size, manifest)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
Ucode::Models::ValidationReport.new(
|
|
147
|
+
unicode_version: (@unicode_version || manifest&.unicode_version).to_s,
|
|
148
|
+
generated_at: Time.now.utc.iso8601,
|
|
149
|
+
totals: Ucode::Models::ValidationReport::Totals.new(
|
|
150
|
+
codepoints_checked: entries.size,
|
|
151
|
+
failures: findings.length,
|
|
152
|
+
checks_run: checks.count { |c| c.status != "skipped" },
|
|
153
|
+
checks_passed: checks.count { |c| c.status == "passed" },
|
|
154
|
+
),
|
|
155
|
+
checks: checks,
|
|
156
|
+
failures: findings,
|
|
157
|
+
)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def build_check_summary(name, findings, entries_size, manifest)
|
|
161
|
+
count = findings.count { |f| f.check == name }
|
|
162
|
+
status = check_status(name, count, manifest)
|
|
163
|
+
total = check_total(name, entries_size, manifest)
|
|
164
|
+
|
|
165
|
+
Ucode::Models::ValidationReport::CheckSummary.new(
|
|
166
|
+
name: name,
|
|
167
|
+
status: status,
|
|
168
|
+
total: total,
|
|
169
|
+
failures: count,
|
|
170
|
+
)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def check_status(name, count, manifest)
|
|
174
|
+
return "skipped" if manifest.nil? && name != CHECK_MANIFEST
|
|
175
|
+
|
|
176
|
+
count.zero? ? "passed" : "failed"
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def check_total(name, entries_size, manifest)
|
|
180
|
+
return 1 if name == CHECK_MANIFEST
|
|
181
|
+
return 0 if manifest.nil?
|
|
182
|
+
|
|
183
|
+
entries_size
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def write_report(report)
|
|
187
|
+
path = @output_root.join(REPORTS_DIR, "validation.json")
|
|
188
|
+
path.dirname.mkpath
|
|
189
|
+
write_atomic(path, report.to_json(pretty: true))
|
|
190
|
+
path
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def make_failure(check, message, codepoint: nil)
|
|
194
|
+
Ucode::Models::ValidationReport::Failure.new(
|
|
195
|
+
codepoint: codepoint,
|
|
196
|
+
block: nil,
|
|
197
|
+
check: check,
|
|
198
|
+
message: message,
|
|
199
|
+
)
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Glyphs
|
|
5
|
+
# Universal glyph set — one SVG per assigned Unicode codepoint,
|
|
6
|
+
# sourced via the 4-tier resolver using the curated Tier 1 config
|
|
7
|
+
# from TODO 23. The set is the canonical reference for "what
|
|
8
|
+
# Unicode 17 looks like" — every codepoint has exactly one glyph,
|
|
9
|
+
# with documented provenance, in a flat layout designed for fast
|
|
10
|
+
# lookup by audits (TODO 25) and the fontist.org consumer
|
|
11
|
+
# (TODO 27).
|
|
12
|
+
#
|
|
13
|
+
# Output layout (per TODO 24):
|
|
14
|
+
#
|
|
15
|
+
# output/universal_glyph_set/
|
|
16
|
+
# ├── manifest.json # one entry per codepoint + totals
|
|
17
|
+
# ├── glyphs/
|
|
18
|
+
# │ ├── U+0000.svg
|
|
19
|
+
# │ └── ...
|
|
20
|
+
# └── reports/
|
|
21
|
+
# ├── by_tier.json # tier-1: N1, pillar-1: N2, ...
|
|
22
|
+
# ├── by_block.json # per-block tier breakdown
|
|
23
|
+
# └── gaps.json # assigned codepoints with no glyph
|
|
24
|
+
#
|
|
25
|
+
# Components:
|
|
26
|
+
#
|
|
27
|
+
# - {Builder} drains a codepoint stream through the resolver and
|
|
28
|
+
# writes glyphs + manifest atomically.
|
|
29
|
+
# - {ManifestAccumulator} is the thread-safe tally that produces
|
|
30
|
+
# the final {Ucode::Models::UniversalSetManifest}.
|
|
31
|
+
# - {ManifestWriter} emits the manifest and per-tier / per-block /
|
|
32
|
+
# gaps reports under the output root.
|
|
33
|
+
# - {Idempotency} wraps {Ucode::Repo::AtomicWrites} with the
|
|
34
|
+
# "skip if SVG unchanged" semantic documented in TODO 24.
|
|
35
|
+
module UniversalSet
|
|
36
|
+
autoload :Builder, "ucode/glyphs/universal_set/builder"
|
|
37
|
+
autoload :ManifestAccumulator, "ucode/glyphs/universal_set/manifest_accumulator"
|
|
38
|
+
autoload :ManifestWriter, "ucode/glyphs/universal_set/manifest_writer"
|
|
39
|
+
autoload :Idempotency, "ucode/glyphs/universal_set/idempotency"
|
|
40
|
+
autoload :PreBuildCheck, "ucode/glyphs/universal_set/pre_build_check"
|
|
41
|
+
autoload :Validator, "ucode/glyphs/universal_set/validator"
|
|
42
|
+
autoload :CoverageReport, "ucode/glyphs/universal_set/coverage_report"
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
data/lib/ucode/glyphs.rb
CHANGED
|
@@ -23,5 +23,11 @@ module Ucode
|
|
|
23
23
|
autoload :LastResort, "ucode/glyphs/last_resort"
|
|
24
24
|
autoload :EmbeddedFonts, "ucode/glyphs/embedded_fonts"
|
|
25
25
|
autoload :RealFonts, "ucode/glyphs/real_fonts"
|
|
26
|
+
autoload :Source, "ucode/glyphs/source"
|
|
27
|
+
autoload :Resolver, "ucode/glyphs/resolver"
|
|
28
|
+
autoload :SourceConfig, "ucode/glyphs/source_config"
|
|
29
|
+
autoload :SourceBuilder, "ucode/glyphs/source_builder"
|
|
30
|
+
autoload :Sources, "ucode/glyphs/sources"
|
|
31
|
+
autoload :UniversalSet, "ucode/glyphs/universal_set"
|
|
26
32
|
end
|
|
27
33
|
end
|
|
@@ -18,6 +18,11 @@ module Ucode
|
|
|
18
18
|
attribute :fontisan_version, :string
|
|
19
19
|
attribute :source, :string
|
|
20
20
|
attribute :generated_at, :string
|
|
21
|
+
# Which CoverageReference produced the per-block counts.
|
|
22
|
+
# "ucd" for UcdOnlyReference (default), "universal-set" for
|
|
23
|
+
# UniversalSetReference (TODO 25). nil on legacy reports —
|
|
24
|
+
# consumers should treat nil as "ucd".
|
|
25
|
+
attribute :reference_kind, :string
|
|
21
26
|
|
|
22
27
|
key_value do
|
|
23
28
|
map "unicode_version", to: :unicode_version
|
|
@@ -25,6 +30,7 @@ module Ucode
|
|
|
25
30
|
map "fontisan_version", to: :fontisan_version
|
|
26
31
|
map "source", to: :source
|
|
27
32
|
map "generated_at", to: :generated_at
|
|
33
|
+
map "reference_kind", to: :reference_kind
|
|
28
34
|
end
|
|
29
35
|
end
|
|
30
36
|
end
|
|
@@ -30,6 +30,12 @@ module Ucode
|
|
|
30
30
|
attribute :status, :string
|
|
31
31
|
attribute :missing_codepoints, :integer, collection: true, default: -> { [] }
|
|
32
32
|
attribute :covered_codepoints, :integer, collection: true, default: -> { [] }
|
|
33
|
+
# Per-codepoint provenance for the missing set. Populated only
|
|
34
|
+
# when the audit ran against a UniversalSetReference (TODO 25).
|
|
35
|
+
# Empty for UCD-only audits — the field serializes as [] and
|
|
36
|
+
# consumers treat that as "no provenance available".
|
|
37
|
+
attribute :missing_codepoint_provenance, CodepointProvenance,
|
|
38
|
+
collection: true, default: -> { [] }
|
|
33
39
|
|
|
34
40
|
key_value do
|
|
35
41
|
map "name", to: :name
|
|
@@ -44,6 +50,7 @@ module Ucode
|
|
|
44
50
|
map "status", to: :status
|
|
45
51
|
map "missing_codepoints", to: :missing_codepoints
|
|
46
52
|
map "covered_codepoints", to: :covered_codepoints
|
|
53
|
+
map "missing_codepoint_provenance", to: :missing_codepoint_provenance
|
|
47
54
|
end
|
|
48
55
|
|
|
49
56
|
# Derive the canonical status string for a block given its
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Models
|
|
7
|
+
module Audit
|
|
8
|
+
# Per-codepoint provenance row attached to a {BlockSummary}'s
|
|
9
|
+
# `missing_codepoint_provenance` collection.
|
|
10
|
+
#
|
|
11
|
+
# Populated only when the audit used a
|
|
12
|
+
# {Ucode::Audit::UniversalSetReference}. UCD-only audits omit
|
|
13
|
+
# the field entirely — preserving the legacy wire shape.
|
|
14
|
+
#
|
|
15
|
+
# Wire shape (one entry per missing codepoint):
|
|
16
|
+
#
|
|
17
|
+
# {
|
|
18
|
+
# "codepoint": 10981,
|
|
19
|
+
# "tier": "tier-1",
|
|
20
|
+
# "source": "lentariso"
|
|
21
|
+
# }
|
|
22
|
+
#
|
|
23
|
+
# `tier` and `source` mirror the universal-set manifest
|
|
24
|
+
# ({UniversalSetEntry}) and let downstream renderers (TODO 26)
|
|
25
|
+
# display the missing glyph + its provenance next to each row.
|
|
26
|
+
class CodepointProvenance < Lutaml::Model::Serializable
|
|
27
|
+
attribute :codepoint, :integer
|
|
28
|
+
attribute :tier, :string
|
|
29
|
+
attribute :source, :string
|
|
30
|
+
|
|
31
|
+
key_value do
|
|
32
|
+
map "codepoint", to: :codepoint
|
|
33
|
+
map "tier", to: :tier
|
|
34
|
+
map "source", to: :source
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Models
|
|
7
|
+
module Audit
|
|
8
|
+
# Per-face entry in a {ReleaseFormulaEntry}.
|
|
9
|
+
#
|
|
10
|
+
# Compact card view derived from a per-face {AuditReport}. Carries
|
|
11
|
+
# identity + coverage totals + relative paths into the release
|
|
12
|
+
# tree. The fontist.org renderer iterates these cards to build its
|
|
13
|
+
# font index; clicking a card fetches the per-face audit subtree
|
|
14
|
+
# at `index_path`.
|
|
15
|
+
class ReleaseFaceEntry < Lutaml::Model::Serializable
|
|
16
|
+
attribute :postscript_name, :string
|
|
17
|
+
attribute :family_name, :string
|
|
18
|
+
attribute :weight_class, :integer
|
|
19
|
+
attribute :total_codepoints, :integer
|
|
20
|
+
attribute :covered_codepoints, :integer
|
|
21
|
+
attribute :blocks_complete, :integer
|
|
22
|
+
attribute :blocks_partial, :integer
|
|
23
|
+
attribute :source_sha256, :string
|
|
24
|
+
attribute :index_path, :string
|
|
25
|
+
attribute :html_path, :string
|
|
26
|
+
|
|
27
|
+
key_value do
|
|
28
|
+
map "postscript_name", to: :postscript_name
|
|
29
|
+
map "family_name", to: :family_name
|
|
30
|
+
map "weight_class", to: :weight_class
|
|
31
|
+
map "total_codepoints", to: :total_codepoints
|
|
32
|
+
map "covered_codepoints", to: :covered_codepoints
|
|
33
|
+
map "blocks_complete", to: :blocks_complete
|
|
34
|
+
map "blocks_partial", to: :blocks_partial
|
|
35
|
+
map "source_sha256", to: :source_sha256
|
|
36
|
+
map "index_path", to: :index_path
|
|
37
|
+
map "html_path", to: :html_path
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Models
|
|
7
|
+
module Audit
|
|
8
|
+
# Per-formula entry in a {ReleaseManifest}.
|
|
9
|
+
#
|
|
10
|
+
# A formula is a single fontist formula (one installable unit).
|
|
11
|
+
# Each formula contributes one or more face entries to the release
|
|
12
|
+
# tree. The `slug` is the formula's URL-safe identifier used as
|
|
13
|
+
# the directory name under `<release_root>/audit/<slug>/`.
|
|
14
|
+
#
|
|
15
|
+
# `source_path` records where the original library audit ran so a
|
|
16
|
+
# consumer reading the manifest can trace the audit back to its
|
|
17
|
+
# input directory.
|
|
18
|
+
class ReleaseFormulaEntry < Lutaml::Model::Serializable
|
|
19
|
+
attribute :slug, :string
|
|
20
|
+
attribute :source_path, :string
|
|
21
|
+
attribute :faces_total, :integer
|
|
22
|
+
attribute :faces, ReleaseFaceEntry, collection: true, default: -> { [] }
|
|
23
|
+
|
|
24
|
+
key_value do
|
|
25
|
+
map "slug", to: :slug
|
|
26
|
+
map "source_path", to: :source_path
|
|
27
|
+
map "faces_total", to: :faces_total
|
|
28
|
+
map "faces", to: :faces
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Models
|
|
7
|
+
module Audit
|
|
8
|
+
# Top-level release manifest for the fontist.org-consumable
|
|
9
|
+
# artifact (TODO 27).
|
|
10
|
+
#
|
|
11
|
+
# One manifest per release tree at `<release_root>/manifest.json`.
|
|
12
|
+
# Records the ucode/unicode versions, optional source-config
|
|
13
|
+
# sha256 (for Tier 1 curation provenance), aggregate formula/face
|
|
14
|
+
# counts, the universal-set reference section, and the per-formula
|
|
15
|
+
# face index.
|
|
16
|
+
#
|
|
17
|
+
# fontist.org's `scripts/fetch-data.sh` reads this manifest first
|
|
18
|
+
# to decide whether to fetch the universal-set zip and which
|
|
19
|
+
# per-formula audit subtrees to pull.
|
|
20
|
+
class ReleaseManifest < Lutaml::Model::Serializable
|
|
21
|
+
attribute :ucode_version, :string
|
|
22
|
+
attribute :unicode_version, :string
|
|
23
|
+
attribute :generated_at, :string
|
|
24
|
+
attribute :source_config_sha256, :string
|
|
25
|
+
attribute :formulas_total, :integer
|
|
26
|
+
attribute :faces_total, :integer
|
|
27
|
+
attribute :universal_set, ReleaseUniversalSet
|
|
28
|
+
attribute :formulas, ReleaseFormulaEntry, collection: true, default: -> { [] }
|
|
29
|
+
|
|
30
|
+
key_value do
|
|
31
|
+
map "ucode_version", to: :ucode_version
|
|
32
|
+
map "unicode_version", to: :unicode_version
|
|
33
|
+
map "generated_at", to: :generated_at
|
|
34
|
+
map "source_config_sha256", to: :source_config_sha256
|
|
35
|
+
map "formulas_total", to: :formulas_total
|
|
36
|
+
map "faces_total", to: :faces_total
|
|
37
|
+
map "universal_set", to: :universal_set
|
|
38
|
+
map "formulas", to: :formulas
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|