ucode 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/Gemfile.lock +2 -2
- data/TODO.full/00-README.md +116 -0
- data/TODO.full/01-panglyph-vision.md +112 -0
- data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
- data/TODO.full/03-panglyph-font-builder.md +201 -0
- data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
- data/TODO.full/05-ucode-0-1-1-release.md +139 -0
- data/TODO.full/06-fontisan-remove-audit.md +142 -0
- data/TODO.full/07-fontisan-remove-ucd.md +125 -0
- data/TODO.full/08-archive-private-bin-build.md +143 -0
- data/TODO.full/09-archive-public-structure.md +164 -0
- data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
- data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
- data/TODO.full/12-implementation-order.md +216 -0
- data/TODO.full/13-fontisan-font-writer-api.md +189 -0
- data/TODO.full/14-fontisan-table-writers.md +66 -0
- data/TODO.full/15-panglyph-builder-real.md +82 -0
- data/TODO.full/16-archive-public-sync-workflows.md +167 -0
- data/TODO.full/17-fontist-org-font-picker.md +73 -0
- data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
- data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
- data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
- data/TODO.new/00-README.md +30 -0
- data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
- data/TODO.new/24-universal-glyph-set-build.md +189 -0
- data/TODO.new/25-font-audit-against-universal-set.md +195 -0
- data/TODO.new/26-missing-glyph-reporter.md +189 -0
- data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
- data/TODO.new/28-implementation-order-update.md +187 -0
- data/TODO.new/29-universal-set-curation-uc17.md +312 -0
- data/TODO.new/30-tier1-font-acquisition.md +241 -0
- data/TODO.new/31-universal-set-production-build.md +205 -0
- data/TODO.new/32-uc17-coverage-matrix.md +165 -0
- data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
- data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
- data/TODO.new/35-universal-set-production-run.md +160 -0
- data/TODO.new/36-per-font-coverage-audit.md +145 -0
- data/TODO.new/37-coverage-highlight-reporter.md +125 -0
- data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
- data/TODO.new/39-implementation-order-update-32-38.md +258 -0
- data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
- data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
- data/config/specialist_fonts.yml +102 -0
- data/config/unicode17_tier1_fonts.yml +42 -0
- data/config/unicode17_universal_glyph_set.yml +293 -0
- data/lib/ucode/audit/block_aggregator.rb +57 -29
- data/lib/ucode/audit/browser/face_page.rb +128 -0
- data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
- data/lib/ucode/audit/browser/library_page.rb +74 -0
- data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
- data/lib/ucode/audit/browser/template.rb +47 -0
- data/lib/ucode/audit/browser/templates/face.css +200 -0
- data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
- data/lib/ucode/audit/browser/templates/face.js +298 -0
- data/lib/ucode/audit/browser/templates/library.css +119 -0
- data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
- data/lib/ucode/audit/browser/templates/library.js +99 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
- data/lib/ucode/audit/browser.rb +32 -0
- data/lib/ucode/audit/context.rb +27 -1
- data/lib/ucode/audit/coverage_reference.rb +103 -0
- data/lib/ucode/audit/differ.rb +121 -0
- data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
- data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
- data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
- data/lib/ucode/audit/emitter/face_directory.rb +212 -0
- data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
- data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
- data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
- data/lib/ucode/audit/emitter/paths.rb +312 -0
- data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
- data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
- data/lib/ucode/audit/emitter.rb +29 -0
- data/lib/ucode/audit/extractors/aggregations.rb +31 -2
- data/lib/ucode/audit/face_auditor.rb +86 -0
- data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
- data/lib/ucode/audit/formatters/audit_text.rb +411 -0
- data/lib/ucode/audit/formatters/color.rb +48 -0
- data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
- data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
- data/lib/ucode/audit/formatters.rb +23 -0
- data/lib/ucode/audit/library_aggregator.rb +86 -0
- data/lib/ucode/audit/library_auditor.rb +105 -0
- data/lib/ucode/audit/release/emitter.rb +152 -0
- data/lib/ucode/audit/release/face_card.rb +93 -0
- data/lib/ucode/audit/release/formula_audits.rb +50 -0
- data/lib/ucode/audit/release/library_index_builder.rb +78 -0
- data/lib/ucode/audit/release/manifest_builder.rb +127 -0
- data/lib/ucode/audit/release.rb +42 -0
- data/lib/ucode/audit/ucd_only_reference.rb +81 -0
- data/lib/ucode/audit/universal_set_reference.rb +136 -0
- data/lib/ucode/audit.rb +31 -0
- data/lib/ucode/cli.rb +339 -33
- data/lib/ucode/commands/audit/browser_command.rb +82 -0
- data/lib/ucode/commands/audit/collection_command.rb +103 -0
- data/lib/ucode/commands/audit/compare_command.rb +188 -0
- data/lib/ucode/commands/audit/font_command.rb +140 -0
- data/lib/ucode/commands/audit/library_command.rb +87 -0
- data/lib/ucode/commands/audit/reference_builder.rb +64 -0
- data/lib/ucode/commands/audit.rb +20 -0
- data/lib/ucode/commands/block_feed.rb +73 -0
- data/lib/ucode/commands/canonical_build.rb +138 -0
- data/lib/ucode/commands/fetch.rb +37 -1
- data/lib/ucode/commands/release.rb +115 -0
- data/lib/ucode/commands/universal_set.rb +211 -0
- data/lib/ucode/commands.rb +5 -0
- data/lib/ucode/coordinator/indices.rb +11 -0
- data/lib/ucode/coordinator.rb +138 -5
- data/lib/ucode/error.rb +30 -2
- data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
- data/lib/ucode/fetch/font_fetcher.rb +16 -0
- data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
- data/lib/ucode/fetch.rb +7 -3
- data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
- data/lib/ucode/glyphs/real_fonts.rb +1 -0
- data/lib/ucode/glyphs/resolver.rb +62 -0
- data/lib/ucode/glyphs/source.rb +48 -0
- data/lib/ucode/glyphs/source_builder.rb +61 -0
- data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
- data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
- data/lib/ucode/glyphs/source_config.rb +104 -0
- data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
- data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
- data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
- data/lib/ucode/glyphs/sources.rb +20 -0
- data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
- data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
- data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
- data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
- data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
- data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
- data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
- data/lib/ucode/glyphs/universal_set.rb +45 -0
- data/lib/ucode/glyphs.rb +6 -0
- data/lib/ucode/models/audit/baseline.rb +6 -0
- data/lib/ucode/models/audit/block_summary.rb +7 -0
- data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
- data/lib/ucode/models/audit/release_face.rb +42 -0
- data/lib/ucode/models/audit/release_formula.rb +33 -0
- data/lib/ucode/models/audit/release_manifest.rb +43 -0
- data/lib/ucode/models/audit/release_universal_set.rb +37 -0
- data/lib/ucode/models/audit.rb +9 -0
- data/lib/ucode/models/block.rb +2 -0
- data/lib/ucode/models/build_report.rb +109 -0
- data/lib/ucode/models/codepoint/glyph.rb +42 -0
- data/lib/ucode/models/codepoint.rb +3 -0
- data/lib/ucode/models/glyph_source.rb +86 -0
- data/lib/ucode/models/glyph_source_map.rb +138 -0
- data/lib/ucode/models/specialist_font.rb +70 -0
- data/lib/ucode/models/specialist_font_manifest.rb +48 -0
- data/lib/ucode/models/unihan_entry.rb +81 -9
- data/lib/ucode/models/unihan_field.rb +21 -0
- data/lib/ucode/models/universal_set_entry.rb +47 -0
- data/lib/ucode/models/universal_set_manifest.rb +78 -0
- data/lib/ucode/models/validation_report.rb +99 -0
- data/lib/ucode/models.rb +9 -0
- data/lib/ucode/parsers/named_sequences.rb +5 -5
- data/lib/ucode/parsers/unihan.rb +50 -19
- data/lib/ucode/repo/aggregate_writer.rb +34 -2
- data/lib/ucode/repo/block_feed_emitter.rb +153 -0
- data/lib/ucode/repo/build_report_accumulator.rb +138 -0
- data/lib/ucode/repo/build_report_writer.rb +46 -0
- data/lib/ucode/repo/build_validator.rb +229 -0
- data/lib/ucode/repo/codepoint_writer.rb +50 -1
- data/lib/ucode/repo/paths.rb +8 -0
- data/lib/ucode/repo.rb +4 -0
- data/lib/ucode/version.rb +1 -1
- data/schema/block-feed.output.schema.yml +134 -0
- metadata +143 -2
- data/ucode.gemspec +0 -56
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/glyphs/universal_set/idempotency"
|
|
6
|
+
require "ucode/glyphs/universal_set/manifest_accumulator"
|
|
7
|
+
require "ucode/glyphs/universal_set/manifest_writer"
|
|
8
|
+
|
|
9
|
+
module Ucode
|
|
10
|
+
module Glyphs
|
|
11
|
+
module UniversalSet
|
|
12
|
+
# Drains a codepoint stream through the 4-tier {Resolver} and
|
|
13
|
+
# produces the universal glyph set: one SVG per codepoint +
|
|
14
|
+
# manifest.json + reports.
|
|
15
|
+
#
|
|
16
|
+
# This is the orchestrator described by TODO 24. It owns three
|
|
17
|
+
# concerns and only three:
|
|
18
|
+
#
|
|
19
|
+
# 1. Iterate the codepoint stream (single-threaded or worker
|
|
20
|
+
# pool, depending on `parallel_workers:`).
|
|
21
|
+
# 2. For each codepoint: resolve via the {Resolver}, write
|
|
22
|
+
# the SVG via {Idempotency}, route the outcome to the
|
|
23
|
+
# {ManifestAccumulator}.
|
|
24
|
+
# 3. After the drain: hand the manifest + per-block breakdown
|
|
25
|
+
# to the {ManifestWriter} for atomic emission.
|
|
26
|
+
#
|
|
27
|
+
# The Builder is intentionally agnostic of how the codepoint
|
|
28
|
+
# stream is produced. The CLI command (TODO 24) constructs a
|
|
29
|
+
# {Ucode::Coordinator} enumerator; tests construct a small
|
|
30
|
+
# Array. The Builder doesn't know about UCD text files, fontist,
|
|
31
|
+
# or PDFs — those live behind the {Resolver}.
|
|
32
|
+
#
|
|
33
|
+
# == Idempotency
|
|
34
|
+
#
|
|
35
|
+
# SVG writes go through {Idempotency#write_glyph}, which uses
|
|
36
|
+
# {Ucode::Repo::AtomicWrites#write_atomic} for byte-level
|
|
37
|
+
# idempotency. Re-running with the same resolver + SVG payloads
|
|
38
|
+
# produces zero file writes. The manifest is regenerated each
|
|
39
|
+
# run; its `generated_at` updates but its entries remain stable
|
|
40
|
+
# when content is unchanged.
|
|
41
|
+
class Builder
|
|
42
|
+
include Idempotency
|
|
43
|
+
|
|
44
|
+
# @param output_root [String, Pathname] directory that will hold
|
|
45
|
+
# `manifest.json`, `glyphs/`, `reports/`.
|
|
46
|
+
# @param resolver [Ucode::Glyphs::Resolver]
|
|
47
|
+
# @param unicode_version [String]
|
|
48
|
+
# @param ucode_version [String]
|
|
49
|
+
# @param source_config_sha256 [String] hex digest of the YAML
|
|
50
|
+
# config that produced this build (recorded in the manifest
|
|
51
|
+
# so audits can detect drift).
|
|
52
|
+
# @param parallel_workers [Integer] size of the worker pool.
|
|
53
|
+
# Set to 1 (or less) for inline mode — used in tests.
|
|
54
|
+
# @param block_filter [String, nil] only build codepoints whose
|
|
55
|
+
# `block_id` matches this verbatim (canonical underscore form).
|
|
56
|
+
def initialize(output_root:, resolver:, unicode_version:,
|
|
57
|
+
ucode_version:, source_config_sha256:,
|
|
58
|
+
parallel_workers: 1, block_filter: nil)
|
|
59
|
+
@output_root = Pathname.new(output_root)
|
|
60
|
+
@resolver = resolver
|
|
61
|
+
@unicode_version = unicode_version
|
|
62
|
+
@ucode_version = ucode_version
|
|
63
|
+
@source_config_sha256 = source_config_sha256
|
|
64
|
+
@parallel_workers = parallel_workers
|
|
65
|
+
@block_filter = block_filter
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Drain `codepoints` through the resolver and emit the
|
|
69
|
+
# manifest + reports. Returns the path to the written manifest.
|
|
70
|
+
#
|
|
71
|
+
# @param codepoints [Enumerable<Ucode::Models::CodePoint>]
|
|
72
|
+
# @return [Pathname] path to the written manifest.json
|
|
73
|
+
def build(codepoints)
|
|
74
|
+
accumulator = ManifestAccumulator.new(
|
|
75
|
+
unicode_version: @unicode_version,
|
|
76
|
+
ucode_version: @ucode_version,
|
|
77
|
+
source_config_sha256: @source_config_sha256,
|
|
78
|
+
)
|
|
79
|
+
drain(codepoints, accumulator)
|
|
80
|
+
write_outputs(accumulator)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
def drain(codepoints, accumulator)
|
|
86
|
+
return drain_inline(codepoints, accumulator) if @parallel_workers <= 1
|
|
87
|
+
|
|
88
|
+
drain_threaded(codepoints, accumulator)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def drain_inline(codepoints, accumulator)
|
|
92
|
+
codepoints.each do |cp|
|
|
93
|
+
build_one(cp, accumulator)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def drain_threaded(codepoints, accumulator)
|
|
98
|
+
queue = Queue.new
|
|
99
|
+
workers = Array.new(@parallel_workers) do
|
|
100
|
+
Thread.new do
|
|
101
|
+
loop do
|
|
102
|
+
cp = queue.pop
|
|
103
|
+
break if cp.nil?
|
|
104
|
+
|
|
105
|
+
build_one(cp, accumulator)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
codepoints.each do |cp|
|
|
111
|
+
queue << cp
|
|
112
|
+
end
|
|
113
|
+
@parallel_workers.times { queue << nil }
|
|
114
|
+
workers.each(&:join)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Resolve one codepoint, write its SVG (if any), and route
|
|
118
|
+
# the outcome to the accumulator. Exceptions are caught here
|
|
119
|
+
# so a single bad codepoint doesn't abort the run.
|
|
120
|
+
#
|
|
121
|
+
# @param cp [Ucode::Models::CodePoint]
|
|
122
|
+
# @param accumulator [ManifestAccumulator]
|
|
123
|
+
def build_one(cp, accumulator)
|
|
124
|
+
return unless matches_filter?(cp)
|
|
125
|
+
|
|
126
|
+
result = @resolver.resolve(cp.cp)
|
|
127
|
+
if result.nil?
|
|
128
|
+
accumulator.record_skip(cp)
|
|
129
|
+
return
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
svg = result.svg
|
|
133
|
+
write_glyph(@output_root, cp_id(cp), svg)
|
|
134
|
+
accumulator.record_build(cp, result, svg: svg)
|
|
135
|
+
rescue StandardError => e
|
|
136
|
+
accumulator.record_failure(cp, e)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def matches_filter?(cp)
|
|
140
|
+
return true if @block_filter.nil?
|
|
141
|
+
|
|
142
|
+
cp.block_id == @block_filter
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def cp_id(cp)
|
|
146
|
+
Ucode::Repo::Paths.cp_id(cp.cp)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def write_outputs(accumulator)
|
|
150
|
+
manifest = accumulator.to_manifest
|
|
151
|
+
ManifestWriter.new(@output_root).write(
|
|
152
|
+
manifest,
|
|
153
|
+
by_block: accumulator.by_block,
|
|
154
|
+
gaps: accumulator.gaps,
|
|
155
|
+
failures: accumulator.failures,
|
|
156
|
+
)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "pathname"
|
|
5
|
+
|
|
6
|
+
require "ucode/glyphs/universal_set/idempotency"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Glyphs
|
|
10
|
+
module UniversalSet
|
|
11
|
+
# Standalone emitter for the universal-set coverage reports
|
|
12
|
+
# described by TODO 31 §Per-tier / §Per-block / §Gap
|
|
13
|
+
# investigation. Reads a {Ucode::Models::UniversalSetManifest}
|
|
14
|
+
# and produces three JSON files under `<output_root>/reports/`:
|
|
15
|
+
#
|
|
16
|
+
# - `by_tier.json` — manifest.by_tier verbatim (`tier-1` => N,
|
|
17
|
+
# `pillar-1` => N, ...). Quick "how much of the set is real
|
|
18
|
+
# fonts vs. tofu?" answer.
|
|
19
|
+
# - `by_block.json` — per-block per-tier breakdown:
|
|
20
|
+
#
|
|
21
|
+
# { "Sidetic": { "assigned": 26, "tier-1": 26,
|
|
22
|
+
# "pillar-1": 0, "pillar-2": 0, "pillar-3": 0 } }
|
|
23
|
+
#
|
|
24
|
+
# `assigned` is the count of manifest entries whose codepoint
|
|
25
|
+
# falls in this block (via {Ucode::Database#lookup_block}).
|
|
26
|
+
# Each tier key counts the entries that resolved at that tier.
|
|
27
|
+
# - `gaps.json` — array of `{ codepoint, block, reason }` for
|
|
28
|
+
# every manifest entry at `pillar-3`. These are the "tofu
|
|
29
|
+
# leaks" TODO 31 calls out as actionable curation follow-ups
|
|
30
|
+
# (excluding the documented-residual cases: unassigned, PUA,
|
|
31
|
+
# noncharacter — those are correctly Last Resort).
|
|
32
|
+
#
|
|
33
|
+
# An optional `failures:` payload (from
|
|
34
|
+
# {ManifestAccumulator#failures}) writes a fourth file,
|
|
35
|
+
# `failures.json`, with per-codepoint exception log. Kept
|
|
36
|
+
# separate from `gaps.json` so the two concepts (tofu vs.
|
|
37
|
+
# crash) don't collide.
|
|
38
|
+
#
|
|
39
|
+
# All writes are atomic via {Idempotency}. Re-running on an
|
|
40
|
+
# unchanged manifest is a no-op modulo nothing — JSON output is
|
|
41
|
+
# stable (sorted keys, deterministic ordering).
|
|
42
|
+
class CoverageReport
|
|
43
|
+
include Idempotency
|
|
44
|
+
|
|
45
|
+
# Reason stamped on every pillar-3 gap entry. The detailed
|
|
46
|
+
# "why did this fall through?" path is in the manifest entry's
|
|
47
|
+
# `source` field; this string is the high-level category.
|
|
48
|
+
TOFU_REASON = "resolved to pillar-3 (Last Resort placeholder)"
|
|
49
|
+
private_constant :TOFU_REASON
|
|
50
|
+
|
|
51
|
+
# @param output_root [String, Pathname] directory holding
|
|
52
|
+
# `manifest.json` + `reports/`.
|
|
53
|
+
# @param database [Ucode::Database] used for codepoint → block
|
|
54
|
+
# lookup. The `report` CLI command opens one for the target
|
|
55
|
+
# Unicode version; tests pass a small in-memory database.
|
|
56
|
+
def initialize(output_root, database:)
|
|
57
|
+
@output_root = Pathname.new(output_root)
|
|
58
|
+
@database = database
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Write the three coverage reports. Returns the structured
|
|
62
|
+
# payload so callers (CLI) can render a summary without
|
|
63
|
+
# re-reading the files.
|
|
64
|
+
#
|
|
65
|
+
# @param manifest [Ucode::Models::UniversalSetManifest]
|
|
66
|
+
# @param failures [Array<Hash>] optional per-codepoint
|
|
67
|
+
# exception log from {ManifestAccumulator#failures}. When
|
|
68
|
+
# non-empty, also writes `reports/failures.json`.
|
|
69
|
+
# @return [Hash] { by_tier:, by_block:, gaps:, failures:,
|
|
70
|
+
# by_tier_path:, by_block_path:, gaps_path:, failures_path: }
|
|
71
|
+
def emit(manifest, failures: [])
|
|
72
|
+
by_tier = manifest.by_tier
|
|
73
|
+
by_block = build_by_block(manifest)
|
|
74
|
+
gaps = build_gaps(manifest)
|
|
75
|
+
|
|
76
|
+
by_tier_path = by_tier_report_path(@output_root)
|
|
77
|
+
by_block_path = by_block_report_path(@output_root)
|
|
78
|
+
gaps_path = gaps_report_path(@output_root)
|
|
79
|
+
write_atomic(by_tier_path, to_pretty_json(by_tier))
|
|
80
|
+
write_atomic(by_block_path, to_pretty_json(by_block))
|
|
81
|
+
write_atomic(gaps_path, to_pretty_json(gaps))
|
|
82
|
+
failures_path = write_failures(failures)
|
|
83
|
+
|
|
84
|
+
{
|
|
85
|
+
by_tier: by_tier,
|
|
86
|
+
by_block: by_block,
|
|
87
|
+
gaps: gaps,
|
|
88
|
+
failures: failures,
|
|
89
|
+
by_tier_path: by_tier_path,
|
|
90
|
+
by_block_path: by_block_path,
|
|
91
|
+
gaps_path: gaps_path,
|
|
92
|
+
failures_path: failures_path,
|
|
93
|
+
}
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
private
|
|
97
|
+
|
|
98
|
+
def build_by_block(manifest)
|
|
99
|
+
tally = Hash.new do |h, block|
|
|
100
|
+
h[block] = { "assigned" => 0, "tier-1" => 0, "pillar-1" => 0,
|
|
101
|
+
"pillar-2" => 0, "pillar-3" => 0 }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
manifest.entries.each do |entry|
|
|
105
|
+
block = @database.lookup_block(entry.codepoint)
|
|
106
|
+
next unless block
|
|
107
|
+
|
|
108
|
+
tally[block]["assigned"] += 1
|
|
109
|
+
tally[block][entry.tier] = (tally[block][entry.tier] || 0) + 1
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Sort by block name for deterministic output — re-running
|
|
113
|
+
# on the same manifest produces byte-identical JSON.
|
|
114
|
+
tally.sort.to_h
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def build_gaps(manifest)
|
|
118
|
+
manifest.entries.each_with_object([]) do |entry, acc|
|
|
119
|
+
next unless entry.tier == "pillar-3"
|
|
120
|
+
|
|
121
|
+
acc << {
|
|
122
|
+
"codepoint" => entry.codepoint,
|
|
123
|
+
"block" => @database.lookup_block(entry.codepoint),
|
|
124
|
+
"reason" => TOFU_REASON,
|
|
125
|
+
}
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def write_failures(failures)
|
|
130
|
+
return nil if failures.empty?
|
|
131
|
+
|
|
132
|
+
path = @output_root.join(REPORTS_DIR, "failures.json")
|
|
133
|
+
write_atomic(path, to_pretty_json(failures))
|
|
134
|
+
path
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/repo/atomic_writes"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Glyphs
|
|
9
|
+
module UniversalSet
|
|
10
|
+
# Idempotency + path helpers for the universal set build.
|
|
11
|
+
#
|
|
12
|
+
# TODO 24 specifies "a codepoint whose source font mtime +
|
|
13
|
+
# content hash are unchanged is skipped." The content-hash half
|
|
14
|
+
# is exactly {Ucode::Repo::AtomicWrites#write_atomic} —
|
|
15
|
+
# byte-identical payloads are a no-op. The mtime half is a
|
|
16
|
+
# future optimization (skip the resolver call entirely when the
|
|
17
|
+
# font hasn't changed); for now, byte-comparison gives semantic
|
|
18
|
+
# correctness, which is the load-bearing property.
|
|
19
|
+
#
|
|
20
|
+
# This module centralizes the universal-set write semantic so
|
|
21
|
+
# future mtime-based short-circuitting lands in one place. The
|
|
22
|
+
# {Builder} and {ManifestWriter} mix this in.
|
|
23
|
+
module Idempotency
|
|
24
|
+
include Ucode::Repo::AtomicWrites
|
|
25
|
+
|
|
26
|
+
# Directory under the output root that holds the per-codepoint SVGs.
|
|
27
|
+
GLYPHS_DIR = "glyphs"
|
|
28
|
+
# Directory under the output root that holds the by-tier / by-block /
|
|
29
|
+
# gaps reports emitted alongside the manifest.
|
|
30
|
+
REPORTS_DIR = "reports"
|
|
31
|
+
# The manifest filename at the output root.
|
|
32
|
+
MANIFEST_FILENAME = "manifest.json"
|
|
33
|
+
# Report filenames.
|
|
34
|
+
BY_TIER_REPORT = "by_tier.json"
|
|
35
|
+
BY_BLOCK_REPORT = "by_block.json"
|
|
36
|
+
GAPS_REPORT = "gaps.json"
|
|
37
|
+
|
|
38
|
+
private_constant :GLYPHS_DIR, :REPORTS_DIR, :MANIFEST_FILENAME,
|
|
39
|
+
:BY_TIER_REPORT, :BY_BLOCK_REPORT, :GAPS_REPORT
|
|
40
|
+
|
|
41
|
+
# Write the SVG payload to the canonical `glyphs/<id>.svg`
|
|
42
|
+
# path if-and-only-if the content changed. Returns true when
|
|
43
|
+
# the file was written; false when skipped (byte-identical).
|
|
44
|
+
#
|
|
45
|
+
# @param output_root [Pathname]
|
|
46
|
+
# @param cp_id [String] e.g. "U+0041"
|
|
47
|
+
# @param svg [String]
|
|
48
|
+
# @return [Boolean]
|
|
49
|
+
def write_glyph(output_root, cp_id, svg)
|
|
50
|
+
write_atomic(glyph_path(output_root, cp_id), svg)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# @param output_root [Pathname]
|
|
54
|
+
# @param cp_id [String]
|
|
55
|
+
# @return [Pathname] <output_root>/glyphs/<cp_id>.svg
|
|
56
|
+
def glyph_path(output_root, cp_id)
|
|
57
|
+
Pathname.new(output_root).join(GLYPHS_DIR, "#{cp_id}.svg")
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @param output_root [Pathname]
|
|
61
|
+
# @return [Pathname]
|
|
62
|
+
def manifest_path(output_root)
|
|
63
|
+
Pathname.new(output_root).join(MANIFEST_FILENAME)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# @param output_root [Pathname]
|
|
67
|
+
# @return [Pathname]
|
|
68
|
+
def by_tier_report_path(output_root)
|
|
69
|
+
Pathname.new(output_root).join(REPORTS_DIR, BY_TIER_REPORT)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# @param output_root [Pathname]
|
|
73
|
+
# @return [Pathname]
|
|
74
|
+
def by_block_report_path(output_root)
|
|
75
|
+
Pathname.new(output_root).join(REPORTS_DIR, BY_BLOCK_REPORT)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# @param output_root [Pathname]
|
|
79
|
+
# @return [Pathname]
|
|
80
|
+
def gaps_report_path(output_root)
|
|
81
|
+
Pathname.new(output_root).join(REPORTS_DIR, GAPS_REPORT)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
require "time"
|
|
5
|
+
|
|
6
|
+
require "ucode/models"
|
|
7
|
+
require "ucode/repo/paths"
|
|
8
|
+
|
|
9
|
+
module Ucode
|
|
10
|
+
module Glyphs
|
|
11
|
+
module UniversalSet
|
|
12
|
+
# Thread-safe accumulator that observes the {Builder}'s per-
|
|
13
|
+
# codepoint attempts and produces the final
|
|
14
|
+
# {Ucode::Models::UniversalSetManifest} plus the per-block
|
|
15
|
+
# breakdown the {ManifestWriter} emits as `reports/by_block.json`.
|
|
16
|
+
#
|
|
17
|
+
# Mirrors the {Ucode::Repo::BuildReportAccumulator} pattern from
|
|
18
|
+
# Mode 1: the orchestrating command passes this instance to the
|
|
19
|
+
# builder, which calls {#record_build} (or {#record_skip}) from
|
|
20
|
+
# inside its worker pool. After the drain completes,
|
|
21
|
+
# {#to_manifest} returns the immutable snapshot.
|
|
22
|
+
#
|
|
23
|
+
# == Semantics
|
|
24
|
+
#
|
|
25
|
+
# - `codepoints_assigned` counts every codepoint the builder
|
|
26
|
+
# attempted (passed the block_filter guard).
|
|
27
|
+
# - `codepoints_built` counts codepoints whose resolver returned
|
|
28
|
+
# a glyph.
|
|
29
|
+
# - `codepoints_skipped` counts codepoints that resolved to nil
|
|
30
|
+
# (no tier produced a glyph) — these are the "gaps" the gaps
|
|
31
|
+
# report enumerates.
|
|
32
|
+
# - `codepoints_failed` counts exceptions recorded via
|
|
33
|
+
# {#record_failure}.
|
|
34
|
+
#
|
|
35
|
+
# `by_tier` counts the winning tier per codepoint (one increment
|
|
36
|
+
# per built codepoint). The map uses the wire form ("tier-1",
|
|
37
|
+
# "pillar-1", ...) so the manifest is stable across Ruby symbol
|
|
38
|
+
# changes.
|
|
39
|
+
#
|
|
40
|
+
# `by_block` is a hash keyed by block_id, with built / skipped /
|
|
41
|
+
# failed counters per block. Computed from the codepoint stream
|
|
42
|
+
# the Builder drains — the accumulator reads {CodePoint#block_id}
|
|
43
|
+
# directly. Block ids follow the canonical underscore form.
|
|
44
|
+
class ManifestAccumulator
|
|
45
|
+
TIER_TO_WIRE = {
|
|
46
|
+
tier1: "tier-1",
|
|
47
|
+
pillar1: "pillar-1",
|
|
48
|
+
pillar2: "pillar-2",
|
|
49
|
+
pillar3: "pillar-3",
|
|
50
|
+
}.freeze
|
|
51
|
+
private_constant :TIER_TO_WIRE
|
|
52
|
+
|
|
53
|
+
# @param unicode_version [String]
|
|
54
|
+
# @param ucode_version [String]
|
|
55
|
+
# @param source_config_sha256 [String]
|
|
56
|
+
def initialize(unicode_version:, ucode_version:, source_config_sha256:)
|
|
57
|
+
@unicode_version = unicode_version
|
|
58
|
+
@ucode_version = ucode_version
|
|
59
|
+
@source_config_sha256 = source_config_sha256
|
|
60
|
+
@totals = { codepoints_assigned: 0, codepoints_built: 0,
|
|
61
|
+
codepoints_skipped: 0, codepoints_failed: 0 }
|
|
62
|
+
@by_tier = Hash.new(0)
|
|
63
|
+
@by_block = Hash.new do |h, block_id|
|
|
64
|
+
h[block_id] = { built: 0, skipped: 0, failed: 0 }
|
|
65
|
+
end
|
|
66
|
+
@entries = []
|
|
67
|
+
@gaps = []
|
|
68
|
+
@failures = []
|
|
69
|
+
@mutex = Mutex.new
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Observer entry — the builder calls this for every codepoint
|
|
73
|
+
# the resolver produced a glyph for. Records the entry and
|
|
74
|
+
# bumps the built counter + per-tier + per-block rollups.
|
|
75
|
+
#
|
|
76
|
+
# @param codepoint [Ucode::Models::CodePoint]
|
|
77
|
+
# @param result [Ucode::Glyphs::Source::Result] non-nil
|
|
78
|
+
# @param svg [String] the SVG bytes that were written
|
|
79
|
+
# @return [void]
|
|
80
|
+
def record_build(codepoint, result, svg:)
|
|
81
|
+
entry = build_entry(codepoint.cp, result, svg)
|
|
82
|
+
tier_wire = wire_tier(result.tier)
|
|
83
|
+
synchronize do
|
|
84
|
+
@totals[:codepoints_assigned] += 1
|
|
85
|
+
@totals[:codepoints_built] += 1
|
|
86
|
+
@by_tier[tier_wire] += 1
|
|
87
|
+
@by_block[codepoint.block_id][:built] += 1
|
|
88
|
+
@entries << entry
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Observer entry — the builder calls this when the resolver
|
|
93
|
+
# returned nil for a codepoint. Counts the attempt and adds
|
|
94
|
+
# it to the gaps list for the gaps report.
|
|
95
|
+
#
|
|
96
|
+
# @param codepoint [Ucode::Models::CodePoint]
|
|
97
|
+
# @return [void]
|
|
98
|
+
def record_skip(codepoint)
|
|
99
|
+
synchronize do
|
|
100
|
+
@totals[:codepoints_assigned] += 1
|
|
101
|
+
@totals[:codepoints_skipped] += 1
|
|
102
|
+
@by_block[codepoint.block_id][:skipped] += 1
|
|
103
|
+
@gaps << codepoint.cp
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Record an exception. The builder rescues per-codepoint
|
|
108
|
+
# errors and routes them here so one bad codepoint doesn't
|
|
109
|
+
# abort the run.
|
|
110
|
+
#
|
|
111
|
+
# @param codepoint [Ucode::Models::CodePoint, nil]
|
|
112
|
+
# @param error [StandardError]
|
|
113
|
+
# @return [void]
|
|
114
|
+
def record_failure(codepoint, error)
|
|
115
|
+
synchronize do
|
|
116
|
+
@totals[:codepoints_assigned] += 1 unless codepoint.nil?
|
|
117
|
+
@totals[:codepoints_failed] += 1
|
|
118
|
+
@by_block[codepoint&.block_id][:failed] += 1 unless codepoint.nil?
|
|
119
|
+
@failures << { codepoint: codepoint&.cp,
|
|
120
|
+
block_id: codepoint&.block_id,
|
|
121
|
+
error_class: error.class.name,
|
|
122
|
+
message: error.message }
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# @return [Ucode::Models::UniversalSetManifest] immutable snapshot
|
|
127
|
+
def to_manifest
|
|
128
|
+
synchronize do
|
|
129
|
+
Ucode::Models::UniversalSetManifest.new(
|
|
130
|
+
unicode_version: @unicode_version,
|
|
131
|
+
ucode_version: @ucode_version,
|
|
132
|
+
generated_at: Time.now.utc.iso8601,
|
|
133
|
+
source_config_sha256: @source_config_sha256,
|
|
134
|
+
totals: Ucode::Models::UniversalSetManifest::Totals.new(@totals),
|
|
135
|
+
by_tier: @by_tier.dup,
|
|
136
|
+
entries: @entries.dup,
|
|
137
|
+
)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# @return [Hash{String=>Hash}] per-block built/skipped/failed
|
|
142
|
+
# counts, deep-copied so callers can't mutate accumulator state.
|
|
143
|
+
def by_block
|
|
144
|
+
synchronize do
|
|
145
|
+
@by_block.transform_values(&:dup)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# @return [Array<Integer>] codepoints that resolved to nil, sorted
|
|
150
|
+
def gaps
|
|
151
|
+
synchronize { @gaps.sort }
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# @return [Array<Hash>] recorded failures (each with codepoint,
|
|
155
|
+
# block_id, error_class, message)
|
|
156
|
+
def failures
|
|
157
|
+
synchronize { @failures.dup }
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
private
|
|
161
|
+
|
|
162
|
+
def build_entry(codepoint, result, svg)
|
|
163
|
+
Ucode::Models::UniversalSetEntry.new(
|
|
164
|
+
codepoint: codepoint,
|
|
165
|
+
id: Ucode::Repo::Paths.cp_id(codepoint),
|
|
166
|
+
tier: wire_tier(result.tier),
|
|
167
|
+
source: source_label(result.provenance),
|
|
168
|
+
svg_sha256: sha256(svg),
|
|
169
|
+
svg_size_bytes: svg.bytesize,
|
|
170
|
+
)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
# Extract the source identifier from a dotted provenance
|
|
174
|
+
# string ("tier-1:noto-sans" -> "noto-sans"). When there's no
|
|
175
|
+
# `:` separator, returns the input verbatim — defensive
|
|
176
|
+
# against malformed provenance.
|
|
177
|
+
def source_label(provenance)
|
|
178
|
+
provenance.to_s.split(":", 2).last || provenance.to_s
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def sha256(payload)
|
|
182
|
+
Digest::SHA256.hexdigest(payload)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def wire_tier(symbol)
|
|
186
|
+
TIER_TO_WIRE.fetch(symbol, symbol.to_s)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def synchronize(&)
|
|
190
|
+
@mutex.synchronize(&)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
require "ucode/glyphs/universal_set/idempotency"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Glyphs
|
|
10
|
+
module UniversalSet
|
|
11
|
+
# Writes the final manifest + reports under the output root.
|
|
12
|
+
#
|
|
13
|
+
# One manifest, three reports:
|
|
14
|
+
#
|
|
15
|
+
# - `manifest.json` — full {Ucode::Models::UniversalSetManifest}.
|
|
16
|
+
# - `reports/by_tier.json` — `by_tier` counts alone (small file
|
|
17
|
+
# for quick "how much of the set is tier 1?" inspection).
|
|
18
|
+
# - `reports/by_block.json` — per-block built/skipped totals,
|
|
19
|
+
# computed from the manifest's entries + the codepoint's
|
|
20
|
+
# block_id (resolved by the Builder).
|
|
21
|
+
# - `reports/gaps.json` — array of codepoint integers that
|
|
22
|
+
# resolved to nil (should be empty for a healthy run).
|
|
23
|
+
#
|
|
24
|
+
# All writes are atomic via {Idempotency} (which includes
|
|
25
|
+
# {Ucode::Repo::AtomicWrites}). Re-running on an unchanged
|
|
26
|
+
# manifest is a no-op modulo `generated_at`.
|
|
27
|
+
class ManifestWriter
|
|
28
|
+
include Idempotency
|
|
29
|
+
|
|
30
|
+
# @param output_root [String, Pathname]
|
|
31
|
+
def initialize(output_root)
|
|
32
|
+
@output_root = Pathname.new(output_root)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Write the manifest + reports atomically.
|
|
36
|
+
#
|
|
37
|
+
# @param manifest [Ucode::Models::UniversalSetManifest]
|
|
38
|
+
# @param by_block [Hash{String=>Hash}] per-block breakdown:
|
|
39
|
+
# `{ "Basic_Latin" => { built: 64, skipped: 0, failed: 0 } }`.
|
|
40
|
+
# Computed by the {Builder}; this writer just serializes it.
|
|
41
|
+
# @param gaps [Array<Integer>] codepoints with no glyph
|
|
42
|
+
# @param failures [Array<Hash>] per-codepoint failures
|
|
43
|
+
# @return [Pathname] path to the written manifest
|
|
44
|
+
def write(manifest, by_block:, gaps:, failures:)
|
|
45
|
+
write_atomic(manifest_path(@output_root), manifest_to_json(manifest))
|
|
46
|
+
write_atomic(by_tier_report_path(@output_root), to_pretty_json(manifest.by_tier))
|
|
47
|
+
write_atomic(by_block_report_path(@output_root), to_pretty_json(by_block))
|
|
48
|
+
write_atomic(gaps_report_path(@output_root),
|
|
49
|
+
to_pretty_json(gaps: gaps, failures: failures))
|
|
50
|
+
manifest_path(@output_root)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def manifest_to_json(manifest)
|
|
56
|
+
manifest.to_json(pretty: true)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|