ucode 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/Gemfile.lock +2 -2
- data/TODO.full/00-README.md +116 -0
- data/TODO.full/01-panglyph-vision.md +112 -0
- data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
- data/TODO.full/03-panglyph-font-builder.md +201 -0
- data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
- data/TODO.full/05-ucode-0-1-1-release.md +139 -0
- data/TODO.full/06-fontisan-remove-audit.md +142 -0
- data/TODO.full/07-fontisan-remove-ucd.md +125 -0
- data/TODO.full/08-archive-private-bin-build.md +143 -0
- data/TODO.full/09-archive-public-structure.md +164 -0
- data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
- data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
- data/TODO.full/12-implementation-order.md +216 -0
- data/TODO.full/13-fontisan-font-writer-api.md +189 -0
- data/TODO.full/14-fontisan-table-writers.md +66 -0
- data/TODO.full/15-panglyph-builder-real.md +82 -0
- data/TODO.full/16-archive-public-sync-workflows.md +167 -0
- data/TODO.full/17-fontist-org-font-picker.md +73 -0
- data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
- data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
- data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
- data/TODO.new/00-README.md +30 -0
- data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
- data/TODO.new/24-universal-glyph-set-build.md +189 -0
- data/TODO.new/25-font-audit-against-universal-set.md +195 -0
- data/TODO.new/26-missing-glyph-reporter.md +189 -0
- data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
- data/TODO.new/28-implementation-order-update.md +187 -0
- data/TODO.new/29-universal-set-curation-uc17.md +312 -0
- data/TODO.new/30-tier1-font-acquisition.md +241 -0
- data/TODO.new/31-universal-set-production-build.md +205 -0
- data/TODO.new/32-uc17-coverage-matrix.md +165 -0
- data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
- data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
- data/TODO.new/35-universal-set-production-run.md +160 -0
- data/TODO.new/36-per-font-coverage-audit.md +145 -0
- data/TODO.new/37-coverage-highlight-reporter.md +125 -0
- data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
- data/TODO.new/39-implementation-order-update-32-38.md +258 -0
- data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
- data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
- data/config/specialist_fonts.yml +102 -0
- data/config/unicode17_tier1_fonts.yml +42 -0
- data/config/unicode17_universal_glyph_set.yml +293 -0
- data/lib/ucode/audit/block_aggregator.rb +57 -29
- data/lib/ucode/audit/browser/face_page.rb +128 -0
- data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
- data/lib/ucode/audit/browser/library_page.rb +74 -0
- data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
- data/lib/ucode/audit/browser/template.rb +47 -0
- data/lib/ucode/audit/browser/templates/face.css +200 -0
- data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
- data/lib/ucode/audit/browser/templates/face.js +298 -0
- data/lib/ucode/audit/browser/templates/library.css +119 -0
- data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
- data/lib/ucode/audit/browser/templates/library.js +99 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
- data/lib/ucode/audit/browser.rb +32 -0
- data/lib/ucode/audit/context.rb +27 -1
- data/lib/ucode/audit/coverage_reference.rb +103 -0
- data/lib/ucode/audit/differ.rb +121 -0
- data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
- data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
- data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
- data/lib/ucode/audit/emitter/face_directory.rb +212 -0
- data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
- data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
- data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
- data/lib/ucode/audit/emitter/paths.rb +312 -0
- data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
- data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
- data/lib/ucode/audit/emitter.rb +29 -0
- data/lib/ucode/audit/extractors/aggregations.rb +31 -2
- data/lib/ucode/audit/face_auditor.rb +86 -0
- data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
- data/lib/ucode/audit/formatters/audit_text.rb +411 -0
- data/lib/ucode/audit/formatters/color.rb +48 -0
- data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
- data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
- data/lib/ucode/audit/formatters.rb +23 -0
- data/lib/ucode/audit/library_aggregator.rb +86 -0
- data/lib/ucode/audit/library_auditor.rb +105 -0
- data/lib/ucode/audit/release/emitter.rb +152 -0
- data/lib/ucode/audit/release/face_card.rb +93 -0
- data/lib/ucode/audit/release/formula_audits.rb +50 -0
- data/lib/ucode/audit/release/library_index_builder.rb +78 -0
- data/lib/ucode/audit/release/manifest_builder.rb +127 -0
- data/lib/ucode/audit/release.rb +42 -0
- data/lib/ucode/audit/ucd_only_reference.rb +81 -0
- data/lib/ucode/audit/universal_set_reference.rb +136 -0
- data/lib/ucode/audit.rb +31 -0
- data/lib/ucode/cli.rb +339 -33
- data/lib/ucode/commands/audit/browser_command.rb +82 -0
- data/lib/ucode/commands/audit/collection_command.rb +103 -0
- data/lib/ucode/commands/audit/compare_command.rb +188 -0
- data/lib/ucode/commands/audit/font_command.rb +140 -0
- data/lib/ucode/commands/audit/library_command.rb +87 -0
- data/lib/ucode/commands/audit/reference_builder.rb +64 -0
- data/lib/ucode/commands/audit.rb +20 -0
- data/lib/ucode/commands/block_feed.rb +73 -0
- data/lib/ucode/commands/canonical_build.rb +138 -0
- data/lib/ucode/commands/fetch.rb +37 -1
- data/lib/ucode/commands/release.rb +115 -0
- data/lib/ucode/commands/universal_set.rb +211 -0
- data/lib/ucode/commands.rb +5 -0
- data/lib/ucode/coordinator/indices.rb +11 -0
- data/lib/ucode/coordinator.rb +138 -5
- data/lib/ucode/error.rb +30 -2
- data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
- data/lib/ucode/fetch/font_fetcher.rb +16 -0
- data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
- data/lib/ucode/fetch.rb +7 -3
- data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
- data/lib/ucode/glyphs/real_fonts.rb +1 -0
- data/lib/ucode/glyphs/resolver.rb +62 -0
- data/lib/ucode/glyphs/source.rb +48 -0
- data/lib/ucode/glyphs/source_builder.rb +61 -0
- data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
- data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
- data/lib/ucode/glyphs/source_config.rb +104 -0
- data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
- data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
- data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
- data/lib/ucode/glyphs/sources.rb +20 -0
- data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
- data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
- data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
- data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
- data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
- data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
- data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
- data/lib/ucode/glyphs/universal_set.rb +45 -0
- data/lib/ucode/glyphs.rb +6 -0
- data/lib/ucode/models/audit/baseline.rb +6 -0
- data/lib/ucode/models/audit/block_summary.rb +7 -0
- data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
- data/lib/ucode/models/audit/release_face.rb +42 -0
- data/lib/ucode/models/audit/release_formula.rb +33 -0
- data/lib/ucode/models/audit/release_manifest.rb +43 -0
- data/lib/ucode/models/audit/release_universal_set.rb +37 -0
- data/lib/ucode/models/audit.rb +9 -0
- data/lib/ucode/models/block.rb +2 -0
- data/lib/ucode/models/build_report.rb +109 -0
- data/lib/ucode/models/codepoint/glyph.rb +42 -0
- data/lib/ucode/models/codepoint.rb +3 -0
- data/lib/ucode/models/glyph_source.rb +86 -0
- data/lib/ucode/models/glyph_source_map.rb +138 -0
- data/lib/ucode/models/specialist_font.rb +70 -0
- data/lib/ucode/models/specialist_font_manifest.rb +48 -0
- data/lib/ucode/models/unihan_entry.rb +81 -9
- data/lib/ucode/models/unihan_field.rb +21 -0
- data/lib/ucode/models/universal_set_entry.rb +47 -0
- data/lib/ucode/models/universal_set_manifest.rb +78 -0
- data/lib/ucode/models/validation_report.rb +99 -0
- data/lib/ucode/models.rb +9 -0
- data/lib/ucode/parsers/named_sequences.rb +5 -5
- data/lib/ucode/parsers/unihan.rb +50 -19
- data/lib/ucode/repo/aggregate_writer.rb +34 -2
- data/lib/ucode/repo/block_feed_emitter.rb +153 -0
- data/lib/ucode/repo/build_report_accumulator.rb +138 -0
- data/lib/ucode/repo/build_report_writer.rb +46 -0
- data/lib/ucode/repo/build_validator.rb +229 -0
- data/lib/ucode/repo/codepoint_writer.rb +50 -1
- data/lib/ucode/repo/paths.rb +8 -0
- data/lib/ucode/repo.rb +4 -0
- data/lib/ucode/version.rb +1 -1
- data/schema/block-feed.output.schema.yml +134 -0
- metadata +143 -2
- data/ucode.gemspec +0 -56
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "pathname"
|
|
5
|
+
require "time"
|
|
6
|
+
|
|
7
|
+
require "ucode/repo/atomic_writes"
|
|
8
|
+
|
|
9
|
+
module Ucode
|
|
10
|
+
module Repo
|
|
11
|
+
# Emits a flat, per-block Unicode data feed from ucode's canonical
|
|
12
|
+
# output tree. The feed is a denormalized shape: each block file
|
|
13
|
+
# inlines all its codepoints (no joins needed at read time).
|
|
14
|
+
#
|
|
15
|
+
# Three files are emitted under `output_root`:
|
|
16
|
+
#
|
|
17
|
+
# unicode-blocks.json
|
|
18
|
+
# [{ start, end, name, unicode_version }, ...]
|
|
19
|
+
#
|
|
20
|
+
# unicode/blocks/<slug>.json
|
|
21
|
+
# { chars: [{ cp, n, c, s, cc?, bc?, mir? }, ...] }
|
|
22
|
+
#
|
|
23
|
+
# unicode-version.json
|
|
24
|
+
# { version, blockCount, charCount, generatedAt }
|
|
25
|
+
#
|
|
26
|
+
# This emitter reads ucode's canonical output (blocks/index.json,
|
|
27
|
+
# blocks/<ID>/index.json, index/labels.json) and translates shapes.
|
|
28
|
+
# ucode stays canonical; the feed is one-way derived.
|
|
29
|
+
#
|
|
30
|
+
# Block slug algorithm (matches common practice; no consumer
|
|
31
|
+
# assumptions baked in):
|
|
32
|
+
#
|
|
33
|
+
# name.downcase.gsub(/[^a-z0-9]+/, "-").gsub(/^-|-$/, "")
|
|
34
|
+
#
|
|
35
|
+
# Block display name uses Unicode's verbatim spacing (e.g.
|
|
36
|
+
# "Basic Latin", "Greek and Coptic") from ucode's canonical name.
|
|
37
|
+
#
|
|
38
|
+
# The shape of this feed is documented in
|
|
39
|
+
# schema/block-feed.output.schema.yml — that YAML is the canonical
|
|
40
|
+
# contract for any consumer of the feed.
|
|
41
|
+
class BlockFeedEmitter
|
|
42
|
+
include AtomicWrites
|
|
43
|
+
|
|
44
|
+
# @param ucode_output_root [String, Pathname] ucode's `output/`
|
|
45
|
+
# @param output_root [String, Pathname] target directory;
|
|
46
|
+
# `unicode-blocks.json`, `unicode-version.json`, and `unicode/`
|
|
47
|
+
# are written here.
|
|
48
|
+
def initialize(ucode_output_root, output_root)
|
|
49
|
+
@ucode_root = Pathname.new(ucode_output_root)
|
|
50
|
+
@output_root = Pathname.new(output_root)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# @param ucd_version [String] e.g. "17.0.0"
|
|
54
|
+
# @return [Hash] { blocks_written:, codepoints_written:,
|
|
55
|
+
# unicode_blocks_path:, unicode_version_path: }
|
|
56
|
+
def emit(ucd_version:)
|
|
57
|
+
labels = load_json(ucode_path("index", "labels.json"))
|
|
58
|
+
blocks_index = load_json(ucode_path("blocks", "index.json"))
|
|
59
|
+
|
|
60
|
+
per_block = blocks_index.map do |entry|
|
|
61
|
+
emit_block(entry, labels)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
write_unicode_blocks(per_block)
|
|
65
|
+
version_payload = write_unicode_version(ucd_version, per_block)
|
|
66
|
+
|
|
67
|
+
{
|
|
68
|
+
blocks_written: per_block.length,
|
|
69
|
+
codepoints_written: per_block.sum { |b| b[:char_count] },
|
|
70
|
+
unicode_blocks_path: @output_root.join("unicode-blocks.json"),
|
|
71
|
+
unicode_version_path: @output_root.join("unicode-version.json"),
|
|
72
|
+
version: version_payload,
|
|
73
|
+
}
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
def emit_block(entry, labels)
|
|
79
|
+
block_id = entry["id"]
|
|
80
|
+
block_file = load_json(ucode_path("blocks", block_id, "index.json"))
|
|
81
|
+
chars = chars_for(block_file["codepoint_ids"] || [], labels)
|
|
82
|
+
slug = block_slug(entry["name"])
|
|
83
|
+
|
|
84
|
+
write_block_file(slug, chars)
|
|
85
|
+
|
|
86
|
+
{
|
|
87
|
+
slug: slug,
|
|
88
|
+
char_count: chars.length,
|
|
89
|
+
summary: {
|
|
90
|
+
"start" => entry["first_cp"],
|
|
91
|
+
"end" => entry["last_cp"],
|
|
92
|
+
"name" => entry["name"],
|
|
93
|
+
"unicode_version" => entry["age"] || block_file["age"] || "1.1",
|
|
94
|
+
},
|
|
95
|
+
}
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def chars_for(codepoint_ids, labels)
|
|
99
|
+
codepoint_ids.map do |cp_id|
|
|
100
|
+
label = labels[cp_id] || {}
|
|
101
|
+
{
|
|
102
|
+
"cp" => cp_id_to_i(cp_id),
|
|
103
|
+
"n" => label["name"],
|
|
104
|
+
"c" => label["gc"],
|
|
105
|
+
"s" => label["sc"],
|
|
106
|
+
"cc" => label["cc"],
|
|
107
|
+
"bc" => label["bc"],
|
|
108
|
+
"mir" => label["mir"],
|
|
109
|
+
}.reject { |_, v| v.nil? || v == "" }
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def write_block_file(slug, chars)
|
|
114
|
+
path = @output_root.join("unicode", "blocks", "#{slug}.json")
|
|
115
|
+
write_atomic(path, to_pretty_json("chars" => chars))
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def write_unicode_blocks(per_block)
|
|
119
|
+
path = @output_root.join("unicode-blocks.json")
|
|
120
|
+
summaries = per_block.map { |b| b[:summary] }
|
|
121
|
+
write_atomic(path, to_pretty_json(summaries))
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def write_unicode_version(ucd_version, per_block)
|
|
125
|
+
payload = {
|
|
126
|
+
"version" => ucd_version,
|
|
127
|
+
"blockCount" => per_block.length,
|
|
128
|
+
"charCount" => per_block.sum { |b| b[:char_count] },
|
|
129
|
+
"generatedAt" => Time.now.utc.iso8601,
|
|
130
|
+
}
|
|
131
|
+
path = @output_root.join("unicode-version.json")
|
|
132
|
+
write_atomic(path, to_pretty_json(payload))
|
|
133
|
+
payload
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def block_slug(name)
|
|
137
|
+
name.downcase.gsub(/[^a-z0-9]+/, "-").gsub(/^-|-$/, "")
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def cp_id_to_i(cp_id)
|
|
141
|
+
cp_id.to_s.sub(/^U\+/i, "").to_i(16)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def ucode_path(*parts)
|
|
145
|
+
@ucode_root.join(*parts)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def load_json(path)
|
|
149
|
+
JSON.parse(path.read)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
|
|
5
|
+
require "ucode/models/build_report"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Repo
|
|
9
|
+
# Observes {CodepointWriter} and tallies per-tier + per-block
|
|
10
|
+
# statistics for the canonical build report (TODO 21).
|
|
11
|
+
#
|
|
12
|
+
# Wire as the `observer:` kwarg on {CodepointWriter}:
|
|
13
|
+
#
|
|
14
|
+
# accumulator = BuildReportAccumulator.new(unicode_version: "17.0.0")
|
|
15
|
+
# writer = CodepointWriter.new(root, resolver: resolver, observer: accumulator)
|
|
16
|
+
# coordinator.each_codepoint { |cp| writer.write(cp) }
|
|
17
|
+
# report = accumulator.to_report
|
|
18
|
+
#
|
|
19
|
+
# The accumulator is thread-safe — the writer's worker pool calls
|
|
20
|
+
# `#call` from multiple threads.
|
|
21
|
+
#
|
|
22
|
+
# == Semantics
|
|
23
|
+
#
|
|
24
|
+
# `assigned` counts every codepoint the writer attempted (passed
|
|
25
|
+
# the block_id guard). `built` counts codepoints whose resolver
|
|
26
|
+
# returned a glyph. `skipped` counts codepoints that resolved to
|
|
27
|
+
# nil (no tier produced a glyph). `failed` counts exceptions
|
|
28
|
+
# recorded via {#record_failure} (the writer rescues nothing;
|
|
29
|
+
# the orchestrating command decides what to surface).
|
|
30
|
+
#
|
|
31
|
+
# `by_tier` counts ONLY the winning tier per codepoint (not the
|
|
32
|
+
# overlap semantics mentioned in TODO 21's example). TODO 21
|
|
33
|
+
# notes the overlap counts as descriptive; the per-codepoint
|
|
34
|
+
# winning tier is the load-bearing number for validation.
|
|
35
|
+
class BuildReportAccumulator
|
|
36
|
+
TIER_TO_WIRE = {
|
|
37
|
+
tier1: "tier-1",
|
|
38
|
+
pillar1: "pillar-1",
|
|
39
|
+
pillar2: "pillar-2",
|
|
40
|
+
pillar3: "pillar-3",
|
|
41
|
+
}.freeze
|
|
42
|
+
private_constant :TIER_TO_WIRE
|
|
43
|
+
|
|
44
|
+
# @param unicode_version [String]
|
|
45
|
+
# @param ucode_version [String]
|
|
46
|
+
def initialize(unicode_version:, ucode_version:)
|
|
47
|
+
@unicode_version = unicode_version
|
|
48
|
+
@ucode_version = ucode_version
|
|
49
|
+
@totals = { assigned: 0, built: 0, skipped: 0, failed: 0 }
|
|
50
|
+
@by_tier = Hash.new(0)
|
|
51
|
+
@by_block = Hash.new do |h, name|
|
|
52
|
+
h[name] = { assigned: 0, built: 0, tier_breakdown: Hash.new(0) }
|
|
53
|
+
end
|
|
54
|
+
@failures = []
|
|
55
|
+
@mutex = Mutex.new
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Observer entry point — invoked by {CodepointWriter#write} as
|
|
59
|
+
# `observer.call(codepoint, result)`. Records one attempt.
|
|
60
|
+
#
|
|
61
|
+
# @param codepoint [Ucode::Models::CodePoint]
|
|
62
|
+
# @param result [Ucode::Glyphs::Source::Result, nil]
|
|
63
|
+
# @return [void]
|
|
64
|
+
def call(codepoint, result)
|
|
65
|
+
synchronize do
|
|
66
|
+
@totals[:assigned] += 1
|
|
67
|
+
block_stats = @by_block[codepoint.block_id]
|
|
68
|
+
block_stats[:assigned] += 1
|
|
69
|
+
|
|
70
|
+
if result
|
|
71
|
+
@totals[:built] += 1
|
|
72
|
+
tier = wire_tier(result.tier)
|
|
73
|
+
@by_tier[tier] += 1
|
|
74
|
+
block_stats[:built] += 1
|
|
75
|
+
block_stats[:tier_breakdown][tier] += 1
|
|
76
|
+
else
|
|
77
|
+
@totals[:skipped] += 1
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Record an exception encountered while building a codepoint.
|
|
83
|
+
# The orchestrating command calls this when rescuing around
|
|
84
|
+
# writer.write; the writer itself does not rescue.
|
|
85
|
+
#
|
|
86
|
+
# @param codepoint [Ucode::Models::CodePoint, nil]
|
|
87
|
+
# @param error [StandardError]
|
|
88
|
+
# @param tier [Symbol, nil] resolver tier that raised, if known
|
|
89
|
+
# @return [void]
|
|
90
|
+
def record_failure(codepoint, error, tier: nil)
|
|
91
|
+
synchronize do
|
|
92
|
+
@totals[:failed] += 1
|
|
93
|
+
@failures << Ucode::Models::BuildReport::Failure.new(
|
|
94
|
+
codepoint: codepoint&.cp,
|
|
95
|
+
block_name: codepoint&.block_id,
|
|
96
|
+
tier: tier&.to_s,
|
|
97
|
+
error_class: error.class.name,
|
|
98
|
+
message: error.message,
|
|
99
|
+
backtrace: Array(error.backtrace).first(10),
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Build the immutable {Ucode::Models::BuildReport} snapshot.
|
|
105
|
+
# @return [Ucode::Models::BuildReport]
|
|
106
|
+
def to_report
|
|
107
|
+
synchronize do
|
|
108
|
+
Ucode::Models::BuildReport.new(
|
|
109
|
+
unicode_version: @unicode_version,
|
|
110
|
+
ucode_version: @ucode_version,
|
|
111
|
+
generated_at: Time.now.utc.iso8601,
|
|
112
|
+
totals: Ucode::Models::BuildReport::Totals.new(@totals),
|
|
113
|
+
by_tier: @by_tier.dup,
|
|
114
|
+
by_block: @by_block.map do |name, stats|
|
|
115
|
+
Ucode::Models::BuildReport::BlockSummary.new(
|
|
116
|
+
name: name,
|
|
117
|
+
assigned: stats[:assigned],
|
|
118
|
+
built: stats[:built],
|
|
119
|
+
tier_breakdown: stats[:tier_breakdown].dup,
|
|
120
|
+
)
|
|
121
|
+
end,
|
|
122
|
+
failures: @failures.dup,
|
|
123
|
+
)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
private
|
|
128
|
+
|
|
129
|
+
def wire_tier(symbol)
|
|
130
|
+
TIER_TO_WIRE.fetch(symbol, symbol.to_s)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def synchronize(&)
|
|
134
|
+
@mutex.synchronize(&)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/repo/atomic_writes"
|
|
6
|
+
require "ucode/repo/paths"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Repo
|
|
10
|
+
# Writes the canonical build report (TODO 21) to
|
|
11
|
+
# `output/build-report.json` atomically and idempotently.
|
|
12
|
+
#
|
|
13
|
+
# Re-running a build with no changed stats produces zero file
|
|
14
|
+
# writes — the existing build-report.json is byte-compared to the
|
|
15
|
+
# new payload before writing.
|
|
16
|
+
#
|
|
17
|
+
# The `generated_at` field is the only non-deterministic part of
|
|
18
|
+
# the report; callers wanting strict idempotency can override the
|
|
19
|
+
# accumulator's `to_report` to use a fixed timestamp.
|
|
20
|
+
class BuildReportWriter
|
|
21
|
+
include AtomicWrites
|
|
22
|
+
|
|
23
|
+
# @param output_root [String, Pathname]
|
|
24
|
+
def initialize(output_root)
|
|
25
|
+
@output_root = Pathname.new(output_root)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @param report [Ucode::Models::BuildReport]
|
|
29
|
+
# @return [Pathname, nil] the path written, or nil if the
|
|
30
|
+
# existing file was byte-identical (no-op).
|
|
31
|
+
def write(report)
|
|
32
|
+
path = @output_root.join("build-report.json")
|
|
33
|
+
payload = serialize(report)
|
|
34
|
+
return nil unless write_atomic(path, payload)
|
|
35
|
+
|
|
36
|
+
path
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def serialize(report)
|
|
42
|
+
report.to_json(pretty: true)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "pathname"
|
|
5
|
+
require "time"
|
|
6
|
+
|
|
7
|
+
require "ucode/models"
|
|
8
|
+
require "ucode/repo/atomic_writes"
|
|
9
|
+
|
|
10
|
+
module Ucode
|
|
11
|
+
module Repo
|
|
12
|
+
# Walks an output tree produced by {CanonicalBuildCommand} and
|
|
13
|
+
# runs the four automated validation checks from TODO 21
|
|
14
|
+
# §Validation:
|
|
15
|
+
#
|
|
16
|
+
# 1. `completeness` — every codepoint folder has both
|
|
17
|
+
# `index.json` and `glyph.svg`.
|
|
18
|
+
# 2. `schema` — every `index.json` deserializes via
|
|
19
|
+
# `Ucode::Models::CodePoint.from_hash`.
|
|
20
|
+
# 3. `provenance_sanity` — every deserialized CodePoint carries
|
|
21
|
+
# a non-nil `glyph.source.tier`.
|
|
22
|
+
# 4. `block_coverage` — per-block built count matches the
|
|
23
|
+
# supplied baseline (status is `skipped` when no baseline).
|
|
24
|
+
#
|
|
25
|
+
# Sample inspection (check 5 in TODO 21) is manual and out of
|
|
26
|
+
# scope.
|
|
27
|
+
#
|
|
28
|
+
# The validator is stateless from the outside: one call to
|
|
29
|
+
# {#validate} walks the tree, builds a {ValidationReport}, and
|
|
30
|
+
# writes it atomically to `output/validation-report.json`. Safe
|
|
31
|
+
# to re-run on the same tree — idempotent via {AtomicWrites}.
|
|
32
|
+
#
|
|
33
|
+
# == Baseline shape
|
|
34
|
+
#
|
|
35
|
+
# `baseline:` is a `Hash{String block_name => Integer expected}`
|
|
36
|
+
# — the per-block built count expected from TODO 05's audit.
|
|
37
|
+
# Missing blocks in the baseline are ignored; blocks present in
|
|
38
|
+
# the output but absent from the baseline are not flagged (the
|
|
39
|
+
# baseline is authoritative only for what it covers).
|
|
40
|
+
class BuildValidator
|
|
41
|
+
include AtomicWrites
|
|
42
|
+
|
|
43
|
+
CHECK_COMPLETENESS = "completeness"
|
|
44
|
+
CHECK_SCHEMA = "schema"
|
|
45
|
+
CHECK_PROVENANCE = "provenance_sanity"
|
|
46
|
+
CHECK_BLOCK_COVERAGE = "block_coverage"
|
|
47
|
+
ALL_CHECKS = [
|
|
48
|
+
CHECK_COMPLETENESS, CHECK_SCHEMA, CHECK_PROVENANCE, CHECK_BLOCK_COVERAGE
|
|
49
|
+
].freeze
|
|
50
|
+
private_constant :ALL_CHECKS
|
|
51
|
+
|
|
52
|
+
# @param output_root [String, Pathname]
|
|
53
|
+
# @param unicode_version [String, nil] stamped onto the report;
|
|
54
|
+
# nil leaves the field blank (callers usually know the version).
|
|
55
|
+
# @param baseline [Hash{String=>Integer}, nil] per-block expected
|
|
56
|
+
# built counts; when nil, the block_coverage check is skipped.
|
|
57
|
+
def initialize(output_root, unicode_version: nil, baseline: nil)
|
|
58
|
+
@output_root = Pathname.new(output_root)
|
|
59
|
+
@unicode_version = unicode_version
|
|
60
|
+
@baseline = baseline
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Walk the tree, run all checks, emit validation-report.json.
|
|
64
|
+
# @return [Hash] { report:, report_path:, passed: }
|
|
65
|
+
def validate
|
|
66
|
+
failures = []
|
|
67
|
+
per_block_counts = Hash.new(0)
|
|
68
|
+
|
|
69
|
+
each_codepoint_dir do |block_name, cp_id, cp_dir|
|
|
70
|
+
per_block_counts[block_name] += 1
|
|
71
|
+
validate_codepoint(block_name, cp_id, cp_dir, failures)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
validate_block_coverage(per_block_counts, failures)
|
|
75
|
+
|
|
76
|
+
report = build_report(failures, per_block_counts)
|
|
77
|
+
report_path = write_report(report)
|
|
78
|
+
{
|
|
79
|
+
report: report,
|
|
80
|
+
report_path: report_path,
|
|
81
|
+
passed: report.totals.failures.zero?,
|
|
82
|
+
}
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
private
|
|
86
|
+
|
|
87
|
+
def each_codepoint_dir
|
|
88
|
+
blocks_path = @output_root.join("blocks")
|
|
89
|
+
return unless blocks_path.exist?
|
|
90
|
+
|
|
91
|
+
blocks_path.children.select(&:directory?).each do |block_dir|
|
|
92
|
+
block_name = block_dir.basename.to_s
|
|
93
|
+
block_dir.children.select(&:directory?).each do |cp_dir|
|
|
94
|
+
yield block_name, cp_dir.basename.to_s, cp_dir
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def validate_codepoint(block_name, cp_id, cp_dir, failures)
|
|
100
|
+
index_path = cp_dir.join("index.json")
|
|
101
|
+
glyph_path = cp_dir.join(Paths.glyph_filename)
|
|
102
|
+
cp_int = parse_cp_int(cp_id)
|
|
103
|
+
|
|
104
|
+
unless index_path.exist?
|
|
105
|
+
failures << make_failure(cp_int, block_name, CHECK_COMPLETENESS,
|
|
106
|
+
"missing index.json")
|
|
107
|
+
return
|
|
108
|
+
end
|
|
109
|
+
unless glyph_path.exist?
|
|
110
|
+
failures << make_failure(cp_int, block_name, CHECK_COMPLETENESS,
|
|
111
|
+
"missing glyph.svg")
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
parsed = parse_index(index_path, cp_int, block_name, failures)
|
|
115
|
+
return unless parsed
|
|
116
|
+
|
|
117
|
+
check_provenance(parsed, cp_int, block_name, failures)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def parse_index(index_path, cp_int, block_name, failures)
|
|
121
|
+
hash = parse_json(index_path.read, cp_int, block_name, failures)
|
|
122
|
+
return nil unless hash
|
|
123
|
+
|
|
124
|
+
begin
|
|
125
|
+
Ucode::Models::CodePoint.from_hash(hash)
|
|
126
|
+
rescue StandardError => e
|
|
127
|
+
failures << make_failure(cp_int, block_name, CHECK_SCHEMA,
|
|
128
|
+
"deserialization failed: #{e.class}: #{e.message}")
|
|
129
|
+
nil
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def parse_json(body, cp_int, block_name, failures)
|
|
134
|
+
JSON.parse(body)
|
|
135
|
+
rescue JSON::ParserError => e
|
|
136
|
+
failures << make_failure(cp_int, block_name, CHECK_SCHEMA,
|
|
137
|
+
"JSON parse failed: #{e.message}")
|
|
138
|
+
nil
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def check_provenance(model, cp_int, block_name, failures)
|
|
142
|
+
return if model.glyph&.source&.tier
|
|
143
|
+
|
|
144
|
+
failures << make_failure(cp_int, block_name, CHECK_PROVENANCE,
|
|
145
|
+
"glyph.source.tier is missing")
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def validate_block_coverage(per_block_counts, failures)
|
|
149
|
+
return if @baseline.nil?
|
|
150
|
+
|
|
151
|
+
@baseline.each do |block_name, expected|
|
|
152
|
+
actual = per_block_counts[block_name]
|
|
153
|
+
next if actual == expected
|
|
154
|
+
|
|
155
|
+
failures << make_failure(nil, block_name, CHECK_BLOCK_COVERAGE,
|
|
156
|
+
"expected #{expected} built, found #{actual}")
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def build_report(failures, per_block_counts)
|
|
161
|
+
checks = ALL_CHECKS.map do |name|
|
|
162
|
+
build_check_summary(name, failures, per_block_counts)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
Ucode::Models::ValidationReport.new(
|
|
166
|
+
unicode_version: @unicode_version.to_s,
|
|
167
|
+
generated_at: Time.now.utc.iso8601,
|
|
168
|
+
totals: Ucode::Models::ValidationReport::Totals.new(
|
|
169
|
+
codepoints_checked: per_block_counts.values.sum,
|
|
170
|
+
failures: failures.length,
|
|
171
|
+
checks_run: checks.count { |c| c.status != "skipped" },
|
|
172
|
+
checks_passed: checks.count { |c| c.status == "passed" },
|
|
173
|
+
),
|
|
174
|
+
checks: checks,
|
|
175
|
+
failures: failures,
|
|
176
|
+
)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def build_check_summary(name, failures, per_block_counts)
|
|
180
|
+
count = failures.count { |f| f.check == name }
|
|
181
|
+
total = total_for_check(name, per_block_counts)
|
|
182
|
+
|
|
183
|
+
status = if name == CHECK_BLOCK_COVERAGE && @baseline.nil?
|
|
184
|
+
"skipped"
|
|
185
|
+
elsif count.zero?
|
|
186
|
+
"passed"
|
|
187
|
+
else
|
|
188
|
+
"failed"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
Ucode::Models::ValidationReport::CheckSummary.new(
|
|
192
|
+
name: name,
|
|
193
|
+
status: status,
|
|
194
|
+
total: total,
|
|
195
|
+
failures: count,
|
|
196
|
+
)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def total_for_check(name, per_block_counts)
|
|
200
|
+
return @baseline&.length || 0 if name == CHECK_BLOCK_COVERAGE
|
|
201
|
+
|
|
202
|
+
per_block_counts.values.sum
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
def write_report(report)
|
|
206
|
+
path = @output_root.join("validation-report.json")
|
|
207
|
+
write_atomic(path, report.to_json(pretty: true))
|
|
208
|
+
path
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def make_failure(cp_int, block_name, check, message)
|
|
212
|
+
Ucode::Models::ValidationReport::Failure.new(
|
|
213
|
+
codepoint: cp_int,
|
|
214
|
+
block: block_name,
|
|
215
|
+
check: check,
|
|
216
|
+
message: message,
|
|
217
|
+
)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def parse_cp_int(cp_id)
|
|
221
|
+
return nil unless cp_id.start_with?("U+")
|
|
222
|
+
|
|
223
|
+
Integer("0x#{cp_id[2..]}")
|
|
224
|
+
rescue ArgumentError
|
|
225
|
+
nil
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
end
|
|
@@ -22,15 +22,38 @@ module Ucode
|
|
|
22
22
|
# - **Atomic**: writes go to `<path>.tmp`, then rename. A crash
|
|
23
23
|
# mid-write leaves either the old file or no file, never a
|
|
24
24
|
# truncated one.
|
|
25
|
+
#
|
|
26
|
+
# When a {Ucode::Glyphs::Resolver} is supplied via `resolver:`, each
|
|
27
|
+
# write also resolves the codepoint's glyph, writes `glyph.svg`
|
|
28
|
+
# alongside `index.json` (same atomic + idempotent semantics), and
|
|
29
|
+
# records the resolver tier + provenance on the codepoint's `glyph`
|
|
30
|
+
# attribute so it lands in the serialized JSON. When `resolver:` is
|
|
31
|
+
# nil (default), the writer is glyph-agnostic and only writes
|
|
32
|
+
# `index.json` — preserving backward compatibility.
|
|
25
33
|
class CodepointWriter
|
|
26
34
|
include AtomicWrites
|
|
27
35
|
|
|
28
36
|
# @param output_root [String, Pathname]
|
|
29
37
|
# @param parallel_workers [Integer] size of the worker pool. Set to
|
|
30
38
|
# 1 (or less) to run synchronously — useful in tests.
|
|
31
|
-
|
|
39
|
+
# @param resolver [Ucode::Glyphs::Resolver, nil] when non-nil, each
|
|
40
|
+
# write resolves the codepoint's glyph via this resolver and
|
|
41
|
+
# writes `glyph.svg` next to `index.json`. Sources inside the
|
|
42
|
+
# resolver must be safe for concurrent access — the worker pool
|
|
43
|
+
# calls into them from multiple threads.
|
|
44
|
+
# @param observer [#call, nil] when non-nil, invoked as
|
|
45
|
+
# `observer.call(codepoint, result)` after each resolve attempt
|
|
46
|
+
# (and before the JSON write). `result` is the
|
|
47
|
+
# {Ucode::Glyphs::Source::Result} when a tier produced a glyph,
|
|
48
|
+
# or nil when no resolver is configured / no tier matched. Used
|
|
49
|
+
# by {Ucode::Repo::BuildReportAccumulator} to tally per-tier
|
|
50
|
+
# stats. The observer must be thread-safe.
|
|
51
|
+
def initialize(output_root, parallel_workers: 8, resolver: nil,
|
|
52
|
+
observer: nil)
|
|
32
53
|
@output_root = Pathname.new(output_root)
|
|
33
54
|
@parallel_workers = parallel_workers
|
|
55
|
+
@resolver = resolver
|
|
56
|
+
@observer = observer
|
|
34
57
|
end
|
|
35
58
|
|
|
36
59
|
# Write one codepoint synchronously.
|
|
@@ -38,6 +61,8 @@ module Ucode
|
|
|
38
61
|
# @return [Pathname, nil] the path written, or nil if skipped
|
|
39
62
|
# (missing block_id or content-identical to existing file)
|
|
40
63
|
def write(codepoint)
|
|
64
|
+
result = codepoint.block_id.nil? ? nil : resolve_glyph(codepoint)
|
|
65
|
+
@observer&.call(codepoint, result)
|
|
41
66
|
return nil if codepoint.block_id.nil?
|
|
42
67
|
|
|
43
68
|
path = Paths.codepoint_json_path(@output_root, codepoint.block_id, codepoint.id)
|
|
@@ -91,6 +116,30 @@ module Ucode
|
|
|
91
116
|
def serialize(codepoint)
|
|
92
117
|
codepoint.to_json(pretty: true)
|
|
93
118
|
end
|
|
119
|
+
|
|
120
|
+
def resolve_glyph(codepoint)
|
|
121
|
+
return nil unless @resolver
|
|
122
|
+
|
|
123
|
+
result = @resolver.resolve(codepoint.cp)
|
|
124
|
+
codepoint.glyph = build_glyph_bundle(result)
|
|
125
|
+
return nil unless result
|
|
126
|
+
|
|
127
|
+
path = Paths.codepoint_glyph_path(@output_root, codepoint.block_id, codepoint.id)
|
|
128
|
+
write_atomic(path, result.svg)
|
|
129
|
+
result
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def build_glyph_bundle(result)
|
|
133
|
+
return nil unless result
|
|
134
|
+
|
|
135
|
+
Ucode::Models::CodePoint::Glyph.new(
|
|
136
|
+
svg_path: Paths.glyph_filename,
|
|
137
|
+
source: Ucode::Models::CodePoint::Glyph::Source.new(
|
|
138
|
+
tier: result.tier.to_s,
|
|
139
|
+
provenance: result.provenance,
|
|
140
|
+
),
|
|
141
|
+
)
|
|
142
|
+
end
|
|
94
143
|
end
|
|
95
144
|
end
|
|
96
145
|
end
|
data/lib/ucode/repo/paths.rb
CHANGED
|
@@ -24,6 +24,14 @@ module Ucode
|
|
|
24
24
|
:PLANE_FILENAME_PREFIX
|
|
25
25
|
|
|
26
26
|
class << self
|
|
27
|
+
# The fixed filename every codepoint's SVG glyph is written to
|
|
28
|
+
# (relative to the codepoint's own directory). Exposed so the
|
|
29
|
+
# Glyph model bundle records the same string the layout uses.
|
|
30
|
+
# @return [String]
|
|
31
|
+
def glyph_filename
|
|
32
|
+
GLYPH_FILENAME
|
|
33
|
+
end
|
|
34
|
+
|
|
27
35
|
# Format an integer codepoint as the canonical "U+XXXX" id used
|
|
28
36
|
# everywhere (paths, JSON, cross-references). Always at least
|
|
29
37
|
# 4 hex digits, uppercase, no extra padding.
|
data/lib/ucode/repo.rb
CHANGED
|
@@ -18,5 +18,9 @@ module Ucode
|
|
|
18
18
|
autoload :AtomicWrites, "ucode/repo/atomic_writes"
|
|
19
19
|
autoload :CodepointWriter, "ucode/repo/codepoint_writer"
|
|
20
20
|
autoload :AggregateWriter, "ucode/repo/aggregate_writer"
|
|
21
|
+
autoload :BuildReportAccumulator, "ucode/repo/build_report_accumulator"
|
|
22
|
+
autoload :BuildReportWriter, "ucode/repo/build_report_writer"
|
|
23
|
+
autoload :BuildValidator, "ucode/repo/build_validator"
|
|
24
|
+
autoload :BlockFeedEmitter, "ucode/repo/block_feed_emitter"
|
|
21
25
|
end
|
|
22
26
|
end
|
data/lib/ucode/version.rb
CHANGED