ucode 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -0
  3. data/Gemfile.lock +2 -2
  4. data/TODO.full/00-README.md +116 -0
  5. data/TODO.full/01-panglyph-vision.md +112 -0
  6. data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
  7. data/TODO.full/03-panglyph-font-builder.md +201 -0
  8. data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
  9. data/TODO.full/05-ucode-0-1-1-release.md +139 -0
  10. data/TODO.full/06-fontisan-remove-audit.md +142 -0
  11. data/TODO.full/07-fontisan-remove-ucd.md +125 -0
  12. data/TODO.full/08-archive-private-bin-build.md +143 -0
  13. data/TODO.full/09-archive-public-structure.md +164 -0
  14. data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
  15. data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
  16. data/TODO.full/12-implementation-order.md +216 -0
  17. data/TODO.full/13-fontisan-font-writer-api.md +189 -0
  18. data/TODO.full/14-fontisan-table-writers.md +66 -0
  19. data/TODO.full/15-panglyph-builder-real.md +82 -0
  20. data/TODO.full/16-archive-public-sync-workflows.md +167 -0
  21. data/TODO.full/17-fontist-org-font-picker.md +73 -0
  22. data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
  23. data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
  24. data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
  25. data/TODO.new/00-README.md +30 -0
  26. data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
  27. data/TODO.new/24-universal-glyph-set-build.md +189 -0
  28. data/TODO.new/25-font-audit-against-universal-set.md +195 -0
  29. data/TODO.new/26-missing-glyph-reporter.md +189 -0
  30. data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
  31. data/TODO.new/28-implementation-order-update.md +187 -0
  32. data/TODO.new/29-universal-set-curation-uc17.md +312 -0
  33. data/TODO.new/30-tier1-font-acquisition.md +241 -0
  34. data/TODO.new/31-universal-set-production-build.md +205 -0
  35. data/TODO.new/32-uc17-coverage-matrix.md +165 -0
  36. data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
  37. data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
  38. data/TODO.new/35-universal-set-production-run.md +160 -0
  39. data/TODO.new/36-per-font-coverage-audit.md +145 -0
  40. data/TODO.new/37-coverage-highlight-reporter.md +125 -0
  41. data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
  42. data/TODO.new/39-implementation-order-update-32-38.md +258 -0
  43. data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
  44. data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
  45. data/config/specialist_fonts.yml +102 -0
  46. data/config/unicode17_tier1_fonts.yml +42 -0
  47. data/config/unicode17_universal_glyph_set.yml +293 -0
  48. data/lib/ucode/audit/block_aggregator.rb +57 -29
  49. data/lib/ucode/audit/browser/face_page.rb +128 -0
  50. data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
  51. data/lib/ucode/audit/browser/library_page.rb +74 -0
  52. data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
  53. data/lib/ucode/audit/browser/template.rb +47 -0
  54. data/lib/ucode/audit/browser/templates/face.css +200 -0
  55. data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
  56. data/lib/ucode/audit/browser/templates/face.js +298 -0
  57. data/lib/ucode/audit/browser/templates/library.css +119 -0
  58. data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
  59. data/lib/ucode/audit/browser/templates/library.js +99 -0
  60. data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
  61. data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
  62. data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
  63. data/lib/ucode/audit/browser.rb +32 -0
  64. data/lib/ucode/audit/context.rb +27 -1
  65. data/lib/ucode/audit/coverage_reference.rb +103 -0
  66. data/lib/ucode/audit/differ.rb +121 -0
  67. data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
  68. data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
  69. data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
  70. data/lib/ucode/audit/emitter/face_directory.rb +212 -0
  71. data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
  72. data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
  73. data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
  74. data/lib/ucode/audit/emitter/paths.rb +312 -0
  75. data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
  76. data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
  77. data/lib/ucode/audit/emitter.rb +29 -0
  78. data/lib/ucode/audit/extractors/aggregations.rb +31 -2
  79. data/lib/ucode/audit/face_auditor.rb +86 -0
  80. data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
  81. data/lib/ucode/audit/formatters/audit_text.rb +411 -0
  82. data/lib/ucode/audit/formatters/color.rb +48 -0
  83. data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
  84. data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
  85. data/lib/ucode/audit/formatters.rb +23 -0
  86. data/lib/ucode/audit/library_aggregator.rb +86 -0
  87. data/lib/ucode/audit/library_auditor.rb +105 -0
  88. data/lib/ucode/audit/release/emitter.rb +152 -0
  89. data/lib/ucode/audit/release/face_card.rb +93 -0
  90. data/lib/ucode/audit/release/formula_audits.rb +50 -0
  91. data/lib/ucode/audit/release/library_index_builder.rb +78 -0
  92. data/lib/ucode/audit/release/manifest_builder.rb +127 -0
  93. data/lib/ucode/audit/release.rb +42 -0
  94. data/lib/ucode/audit/ucd_only_reference.rb +81 -0
  95. data/lib/ucode/audit/universal_set_reference.rb +136 -0
  96. data/lib/ucode/audit.rb +31 -0
  97. data/lib/ucode/cli.rb +339 -33
  98. data/lib/ucode/commands/audit/browser_command.rb +82 -0
  99. data/lib/ucode/commands/audit/collection_command.rb +103 -0
  100. data/lib/ucode/commands/audit/compare_command.rb +188 -0
  101. data/lib/ucode/commands/audit/font_command.rb +140 -0
  102. data/lib/ucode/commands/audit/library_command.rb +87 -0
  103. data/lib/ucode/commands/audit/reference_builder.rb +64 -0
  104. data/lib/ucode/commands/audit.rb +20 -0
  105. data/lib/ucode/commands/block_feed.rb +73 -0
  106. data/lib/ucode/commands/canonical_build.rb +138 -0
  107. data/lib/ucode/commands/fetch.rb +37 -1
  108. data/lib/ucode/commands/release.rb +115 -0
  109. data/lib/ucode/commands/universal_set.rb +211 -0
  110. data/lib/ucode/commands.rb +5 -0
  111. data/lib/ucode/coordinator/indices.rb +11 -0
  112. data/lib/ucode/coordinator.rb +138 -5
  113. data/lib/ucode/error.rb +30 -2
  114. data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
  115. data/lib/ucode/fetch/font_fetcher.rb +16 -0
  116. data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
  117. data/lib/ucode/fetch.rb +7 -3
  118. data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
  119. data/lib/ucode/glyphs/real_fonts.rb +1 -0
  120. data/lib/ucode/glyphs/resolver.rb +62 -0
  121. data/lib/ucode/glyphs/source.rb +48 -0
  122. data/lib/ucode/glyphs/source_builder.rb +61 -0
  123. data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
  124. data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
  125. data/lib/ucode/glyphs/source_config.rb +104 -0
  126. data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
  127. data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
  128. data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
  129. data/lib/ucode/glyphs/sources.rb +20 -0
  130. data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
  131. data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
  132. data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
  133. data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
  134. data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
  135. data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
  136. data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
  137. data/lib/ucode/glyphs/universal_set.rb +45 -0
  138. data/lib/ucode/glyphs.rb +6 -0
  139. data/lib/ucode/models/audit/baseline.rb +6 -0
  140. data/lib/ucode/models/audit/block_summary.rb +7 -0
  141. data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
  142. data/lib/ucode/models/audit/release_face.rb +42 -0
  143. data/lib/ucode/models/audit/release_formula.rb +33 -0
  144. data/lib/ucode/models/audit/release_manifest.rb +43 -0
  145. data/lib/ucode/models/audit/release_universal_set.rb +37 -0
  146. data/lib/ucode/models/audit.rb +9 -0
  147. data/lib/ucode/models/block.rb +2 -0
  148. data/lib/ucode/models/build_report.rb +109 -0
  149. data/lib/ucode/models/codepoint/glyph.rb +42 -0
  150. data/lib/ucode/models/codepoint.rb +3 -0
  151. data/lib/ucode/models/glyph_source.rb +86 -0
  152. data/lib/ucode/models/glyph_source_map.rb +138 -0
  153. data/lib/ucode/models/specialist_font.rb +70 -0
  154. data/lib/ucode/models/specialist_font_manifest.rb +48 -0
  155. data/lib/ucode/models/unihan_entry.rb +81 -9
  156. data/lib/ucode/models/unihan_field.rb +21 -0
  157. data/lib/ucode/models/universal_set_entry.rb +47 -0
  158. data/lib/ucode/models/universal_set_manifest.rb +78 -0
  159. data/lib/ucode/models/validation_report.rb +99 -0
  160. data/lib/ucode/models.rb +9 -0
  161. data/lib/ucode/parsers/named_sequences.rb +5 -5
  162. data/lib/ucode/parsers/unihan.rb +50 -19
  163. data/lib/ucode/repo/aggregate_writer.rb +34 -2
  164. data/lib/ucode/repo/block_feed_emitter.rb +153 -0
  165. data/lib/ucode/repo/build_report_accumulator.rb +138 -0
  166. data/lib/ucode/repo/build_report_writer.rb +46 -0
  167. data/lib/ucode/repo/build_validator.rb +229 -0
  168. data/lib/ucode/repo/codepoint_writer.rb +50 -1
  169. data/lib/ucode/repo/paths.rb +8 -0
  170. data/lib/ucode/repo.rb +4 -0
  171. data/lib/ucode/version.rb +1 -1
  172. data/schema/block-feed.output.schema.yml +134 -0
  173. metadata +143 -2
  174. data/ucode.gemspec +0 -56
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "pathname"
5
+ require "time"
6
+
7
+ require "ucode/repo/atomic_writes"
8
+
9
+ module Ucode
10
+ module Repo
11
+ # Emits a flat, per-block Unicode data feed from ucode's canonical
12
+ # output tree. The feed is a denormalized shape: each block file
13
+ # inlines all its codepoints (no joins needed at read time).
14
+ #
15
+ # Three files are emitted under `output_root`:
16
+ #
17
+ # unicode-blocks.json
18
+ # [{ start, end, name, unicode_version }, ...]
19
+ #
20
+ # unicode/blocks/<slug>.json
21
+ # { chars: [{ cp, n, c, s, cc?, bc?, mir? }, ...] }
22
+ #
23
+ # unicode-version.json
24
+ # { version, blockCount, charCount, generatedAt }
25
+ #
26
+ # This emitter reads ucode's canonical output (blocks/index.json,
27
+ # blocks/<ID>/index.json, index/labels.json) and translates shapes.
28
+ # ucode stays canonical; the feed is one-way derived.
29
+ #
30
+ # Block slug algorithm (matches common practice; no consumer
31
+ # assumptions baked in):
32
+ #
33
+ # name.downcase.gsub(/[^a-z0-9]+/, "-").gsub(/^-|-$/, "")
34
+ #
35
+ # Block display name uses Unicode's verbatim spacing (e.g.
36
+ # "Basic Latin", "Greek and Coptic") from ucode's canonical name.
37
+ #
38
+ # The shape of this feed is documented in
39
+ # schema/block-feed.output.schema.yml — that YAML is the canonical
40
+ # contract for any consumer of the feed.
41
+ class BlockFeedEmitter
42
+ include AtomicWrites
43
+
44
+ # @param ucode_output_root [String, Pathname] ucode's `output/`
45
+ # @param output_root [String, Pathname] target directory;
46
+ # `unicode-blocks.json`, `unicode-version.json`, and `unicode/`
47
+ # are written here.
48
+ def initialize(ucode_output_root, output_root)
49
+ @ucode_root = Pathname.new(ucode_output_root)
50
+ @output_root = Pathname.new(output_root)
51
+ end
52
+
53
+ # @param ucd_version [String] e.g. "17.0.0"
54
+ # @return [Hash] { blocks_written:, codepoints_written:,
55
+ # unicode_blocks_path:, unicode_version_path: }
56
+ def emit(ucd_version:)
57
+ labels = load_json(ucode_path("index", "labels.json"))
58
+ blocks_index = load_json(ucode_path("blocks", "index.json"))
59
+
60
+ per_block = blocks_index.map do |entry|
61
+ emit_block(entry, labels)
62
+ end
63
+
64
+ write_unicode_blocks(per_block)
65
+ version_payload = write_unicode_version(ucd_version, per_block)
66
+
67
+ {
68
+ blocks_written: per_block.length,
69
+ codepoints_written: per_block.sum { |b| b[:char_count] },
70
+ unicode_blocks_path: @output_root.join("unicode-blocks.json"),
71
+ unicode_version_path: @output_root.join("unicode-version.json"),
72
+ version: version_payload,
73
+ }
74
+ end
75
+
76
+ private
77
+
78
+ def emit_block(entry, labels)
79
+ block_id = entry["id"]
80
+ block_file = load_json(ucode_path("blocks", block_id, "index.json"))
81
+ chars = chars_for(block_file["codepoint_ids"] || [], labels)
82
+ slug = block_slug(entry["name"])
83
+
84
+ write_block_file(slug, chars)
85
+
86
+ {
87
+ slug: slug,
88
+ char_count: chars.length,
89
+ summary: {
90
+ "start" => entry["first_cp"],
91
+ "end" => entry["last_cp"],
92
+ "name" => entry["name"],
93
+ "unicode_version" => entry["age"] || block_file["age"] || "1.1",
94
+ },
95
+ }
96
+ end
97
+
98
+ def chars_for(codepoint_ids, labels)
99
+ codepoint_ids.map do |cp_id|
100
+ label = labels[cp_id] || {}
101
+ {
102
+ "cp" => cp_id_to_i(cp_id),
103
+ "n" => label["name"],
104
+ "c" => label["gc"],
105
+ "s" => label["sc"],
106
+ "cc" => label["cc"],
107
+ "bc" => label["bc"],
108
+ "mir" => label["mir"],
109
+ }.reject { |_, v| v.nil? || v == "" }
110
+ end
111
+ end
112
+
113
+ def write_block_file(slug, chars)
114
+ path = @output_root.join("unicode", "blocks", "#{slug}.json")
115
+ write_atomic(path, to_pretty_json("chars" => chars))
116
+ end
117
+
118
+ def write_unicode_blocks(per_block)
119
+ path = @output_root.join("unicode-blocks.json")
120
+ summaries = per_block.map { |b| b[:summary] }
121
+ write_atomic(path, to_pretty_json(summaries))
122
+ end
123
+
124
+ def write_unicode_version(ucd_version, per_block)
125
+ payload = {
126
+ "version" => ucd_version,
127
+ "blockCount" => per_block.length,
128
+ "charCount" => per_block.sum { |b| b[:char_count] },
129
+ "generatedAt" => Time.now.utc.iso8601,
130
+ }
131
+ path = @output_root.join("unicode-version.json")
132
+ write_atomic(path, to_pretty_json(payload))
133
+ payload
134
+ end
135
+
136
+ def block_slug(name)
137
+ name.downcase.gsub(/[^a-z0-9]+/, "-").gsub(/^-|-$/, "")
138
+ end
139
+
140
+ def cp_id_to_i(cp_id)
141
+ cp_id.to_s.sub(/^U\+/i, "").to_i(16)
142
+ end
143
+
144
+ def ucode_path(*parts)
145
+ @ucode_root.join(*parts)
146
+ end
147
+
148
+ def load_json(path)
149
+ JSON.parse(path.read)
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "time"
4
+
5
+ require "ucode/models/build_report"
6
+
7
+ module Ucode
8
+ module Repo
9
+ # Observes {CodepointWriter} and tallies per-tier + per-block
10
+ # statistics for the canonical build report (TODO 21).
11
+ #
12
+ # Wire as the `observer:` kwarg on {CodepointWriter}:
13
+ #
14
+ # accumulator = BuildReportAccumulator.new(unicode_version: "17.0.0")
15
+ # writer = CodepointWriter.new(root, resolver: resolver, observer: accumulator)
16
+ # coordinator.each_codepoint { |cp| writer.write(cp) }
17
+ # report = accumulator.to_report
18
+ #
19
+ # The accumulator is thread-safe — the writer's worker pool calls
20
+ # `#call` from multiple threads.
21
+ #
22
+ # == Semantics
23
+ #
24
+ # `assigned` counts every codepoint the writer attempted (passed
25
+ # the block_id guard). `built` counts codepoints whose resolver
26
+ # returned a glyph. `skipped` counts codepoints that resolved to
27
+ # nil (no tier produced a glyph). `failed` counts exceptions
28
+ # recorded via {#record_failure} (the writer rescues nothing;
29
+ # the orchestrating command decides what to surface).
30
+ #
31
+ # `by_tier` counts ONLY the winning tier per codepoint (not the
32
+ # overlap semantics mentioned in TODO 21's example). TODO 21
33
+ # notes the overlap counts as descriptive; the per-codepoint
34
+ # winning tier is the load-bearing number for validation.
35
+ class BuildReportAccumulator
36
+ TIER_TO_WIRE = {
37
+ tier1: "tier-1",
38
+ pillar1: "pillar-1",
39
+ pillar2: "pillar-2",
40
+ pillar3: "pillar-3",
41
+ }.freeze
42
+ private_constant :TIER_TO_WIRE
43
+
44
+ # @param unicode_version [String]
45
+ # @param ucode_version [String]
46
+ def initialize(unicode_version:, ucode_version:)
47
+ @unicode_version = unicode_version
48
+ @ucode_version = ucode_version
49
+ @totals = { assigned: 0, built: 0, skipped: 0, failed: 0 }
50
+ @by_tier = Hash.new(0)
51
+ @by_block = Hash.new do |h, name|
52
+ h[name] = { assigned: 0, built: 0, tier_breakdown: Hash.new(0) }
53
+ end
54
+ @failures = []
55
+ @mutex = Mutex.new
56
+ end
57
+
58
+ # Observer entry point — invoked by {CodepointWriter#write} as
59
+ # `observer.call(codepoint, result)`. Records one attempt.
60
+ #
61
+ # @param codepoint [Ucode::Models::CodePoint]
62
+ # @param result [Ucode::Glyphs::Source::Result, nil]
63
+ # @return [void]
64
+ def call(codepoint, result)
65
+ synchronize do
66
+ @totals[:assigned] += 1
67
+ block_stats = @by_block[codepoint.block_id]
68
+ block_stats[:assigned] += 1
69
+
70
+ if result
71
+ @totals[:built] += 1
72
+ tier = wire_tier(result.tier)
73
+ @by_tier[tier] += 1
74
+ block_stats[:built] += 1
75
+ block_stats[:tier_breakdown][tier] += 1
76
+ else
77
+ @totals[:skipped] += 1
78
+ end
79
+ end
80
+ end
81
+
82
+ # Record an exception encountered while building a codepoint.
83
+ # The orchestrating command calls this when rescuing around
84
+ # writer.write; the writer itself does not rescue.
85
+ #
86
+ # @param codepoint [Ucode::Models::CodePoint, nil]
87
+ # @param error [StandardError]
88
+ # @param tier [Symbol, nil] resolver tier that raised, if known
89
+ # @return [void]
90
+ def record_failure(codepoint, error, tier: nil)
91
+ synchronize do
92
+ @totals[:failed] += 1
93
+ @failures << Ucode::Models::BuildReport::Failure.new(
94
+ codepoint: codepoint&.cp,
95
+ block_name: codepoint&.block_id,
96
+ tier: tier&.to_s,
97
+ error_class: error.class.name,
98
+ message: error.message,
99
+ backtrace: Array(error.backtrace).first(10),
100
+ )
101
+ end
102
+ end
103
+
104
+ # Build the immutable {Ucode::Models::BuildReport} snapshot.
105
+ # @return [Ucode::Models::BuildReport]
106
+ def to_report
107
+ synchronize do
108
+ Ucode::Models::BuildReport.new(
109
+ unicode_version: @unicode_version,
110
+ ucode_version: @ucode_version,
111
+ generated_at: Time.now.utc.iso8601,
112
+ totals: Ucode::Models::BuildReport::Totals.new(@totals),
113
+ by_tier: @by_tier.dup,
114
+ by_block: @by_block.map do |name, stats|
115
+ Ucode::Models::BuildReport::BlockSummary.new(
116
+ name: name,
117
+ assigned: stats[:assigned],
118
+ built: stats[:built],
119
+ tier_breakdown: stats[:tier_breakdown].dup,
120
+ )
121
+ end,
122
+ failures: @failures.dup,
123
+ )
124
+ end
125
+ end
126
+
127
+ private
128
+
129
+ def wire_tier(symbol)
130
+ TIER_TO_WIRE.fetch(symbol, symbol.to_s)
131
+ end
132
+
133
+ def synchronize(&)
134
+ @mutex.synchronize(&)
135
+ end
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/repo/atomic_writes"
6
+ require "ucode/repo/paths"
7
+
8
+ module Ucode
9
+ module Repo
10
+ # Writes the canonical build report (TODO 21) to
11
+ # `output/build-report.json` atomically and idempotently.
12
+ #
13
+ # Re-running a build with no changed stats produces zero file
14
+ # writes — the existing build-report.json is byte-compared to the
15
+ # new payload before writing.
16
+ #
17
+ # The `generated_at` field is the only non-deterministic part of
18
+ # the report; callers wanting strict idempotency can override the
19
+ # accumulator's `to_report` to use a fixed timestamp.
20
+ class BuildReportWriter
21
+ include AtomicWrites
22
+
23
+ # @param output_root [String, Pathname]
24
+ def initialize(output_root)
25
+ @output_root = Pathname.new(output_root)
26
+ end
27
+
28
+ # @param report [Ucode::Models::BuildReport]
29
+ # @return [Pathname, nil] the path written, or nil if the
30
+ # existing file was byte-identical (no-op).
31
+ def write(report)
32
+ path = @output_root.join("build-report.json")
33
+ payload = serialize(report)
34
+ return nil unless write_atomic(path, payload)
35
+
36
+ path
37
+ end
38
+
39
+ private
40
+
41
+ def serialize(report)
42
+ report.to_json(pretty: true)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,229 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "pathname"
5
+ require "time"
6
+
7
+ require "ucode/models"
8
+ require "ucode/repo/atomic_writes"
9
+
10
+ module Ucode
11
+ module Repo
12
+ # Walks an output tree produced by {CanonicalBuildCommand} and
13
+ # runs the four automated validation checks from TODO 21
14
+ # §Validation:
15
+ #
16
+ # 1. `completeness` — every codepoint folder has both
17
+ # `index.json` and `glyph.svg`.
18
+ # 2. `schema` — every `index.json` deserializes via
19
+ # `Ucode::Models::CodePoint.from_hash`.
20
+ # 3. `provenance_sanity` — every deserialized CodePoint carries
21
+ # a non-nil `glyph.source.tier`.
22
+ # 4. `block_coverage` — per-block built count matches the
23
+ # supplied baseline (status is `skipped` when no baseline).
24
+ #
25
+ # Sample inspection (check 5 in TODO 21) is manual and out of
26
+ # scope.
27
+ #
28
+ # The validator is stateless from the outside: one call to
29
+ # {#validate} walks the tree, builds a {ValidationReport}, and
30
+ # writes it atomically to `output/validation-report.json`. Safe
31
+ # to re-run on the same tree — idempotent via {AtomicWrites}.
32
+ #
33
+ # == Baseline shape
34
+ #
35
+ # `baseline:` is a `Hash{String block_name => Integer expected}`
36
+ # — the per-block built count expected from TODO 05's audit.
37
+ # Missing blocks in the baseline are ignored; blocks present in
38
+ # the output but absent from the baseline are not flagged (the
39
+ # baseline is authoritative only for what it covers).
40
+ class BuildValidator
41
+ include AtomicWrites
42
+
43
+ CHECK_COMPLETENESS = "completeness"
44
+ CHECK_SCHEMA = "schema"
45
+ CHECK_PROVENANCE = "provenance_sanity"
46
+ CHECK_BLOCK_COVERAGE = "block_coverage"
47
+ ALL_CHECKS = [
48
+ CHECK_COMPLETENESS, CHECK_SCHEMA, CHECK_PROVENANCE, CHECK_BLOCK_COVERAGE
49
+ ].freeze
50
+ private_constant :ALL_CHECKS
51
+
52
+ # @param output_root [String, Pathname]
53
+ # @param unicode_version [String, nil] stamped onto the report;
54
+ # nil leaves the field blank (callers usually know the version).
55
+ # @param baseline [Hash{String=>Integer}, nil] per-block expected
56
+ # built counts; when nil, the block_coverage check is skipped.
57
+ def initialize(output_root, unicode_version: nil, baseline: nil)
58
+ @output_root = Pathname.new(output_root)
59
+ @unicode_version = unicode_version
60
+ @baseline = baseline
61
+ end
62
+
63
+ # Walk the tree, run all checks, emit validation-report.json.
64
+ # @return [Hash] { report:, report_path:, passed: }
65
+ def validate
66
+ failures = []
67
+ per_block_counts = Hash.new(0)
68
+
69
+ each_codepoint_dir do |block_name, cp_id, cp_dir|
70
+ per_block_counts[block_name] += 1
71
+ validate_codepoint(block_name, cp_id, cp_dir, failures)
72
+ end
73
+
74
+ validate_block_coverage(per_block_counts, failures)
75
+
76
+ report = build_report(failures, per_block_counts)
77
+ report_path = write_report(report)
78
+ {
79
+ report: report,
80
+ report_path: report_path,
81
+ passed: report.totals.failures.zero?,
82
+ }
83
+ end
84
+
85
+ private
86
+
87
+ def each_codepoint_dir
88
+ blocks_path = @output_root.join("blocks")
89
+ return unless blocks_path.exist?
90
+
91
+ blocks_path.children.select(&:directory?).each do |block_dir|
92
+ block_name = block_dir.basename.to_s
93
+ block_dir.children.select(&:directory?).each do |cp_dir|
94
+ yield block_name, cp_dir.basename.to_s, cp_dir
95
+ end
96
+ end
97
+ end
98
+
99
+ def validate_codepoint(block_name, cp_id, cp_dir, failures)
100
+ index_path = cp_dir.join("index.json")
101
+ glyph_path = cp_dir.join(Paths.glyph_filename)
102
+ cp_int = parse_cp_int(cp_id)
103
+
104
+ unless index_path.exist?
105
+ failures << make_failure(cp_int, block_name, CHECK_COMPLETENESS,
106
+ "missing index.json")
107
+ return
108
+ end
109
+ unless glyph_path.exist?
110
+ failures << make_failure(cp_int, block_name, CHECK_COMPLETENESS,
111
+ "missing glyph.svg")
112
+ end
113
+
114
+ parsed = parse_index(index_path, cp_int, block_name, failures)
115
+ return unless parsed
116
+
117
+ check_provenance(parsed, cp_int, block_name, failures)
118
+ end
119
+
120
+ def parse_index(index_path, cp_int, block_name, failures)
121
+ hash = parse_json(index_path.read, cp_int, block_name, failures)
122
+ return nil unless hash
123
+
124
+ begin
125
+ Ucode::Models::CodePoint.from_hash(hash)
126
+ rescue StandardError => e
127
+ failures << make_failure(cp_int, block_name, CHECK_SCHEMA,
128
+ "deserialization failed: #{e.class}: #{e.message}")
129
+ nil
130
+ end
131
+ end
132
+
133
+ def parse_json(body, cp_int, block_name, failures)
134
+ JSON.parse(body)
135
+ rescue JSON::ParserError => e
136
+ failures << make_failure(cp_int, block_name, CHECK_SCHEMA,
137
+ "JSON parse failed: #{e.message}")
138
+ nil
139
+ end
140
+
141
+ def check_provenance(model, cp_int, block_name, failures)
142
+ return if model.glyph&.source&.tier
143
+
144
+ failures << make_failure(cp_int, block_name, CHECK_PROVENANCE,
145
+ "glyph.source.tier is missing")
146
+ end
147
+
148
+ def validate_block_coverage(per_block_counts, failures)
149
+ return if @baseline.nil?
150
+
151
+ @baseline.each do |block_name, expected|
152
+ actual = per_block_counts[block_name]
153
+ next if actual == expected
154
+
155
+ failures << make_failure(nil, block_name, CHECK_BLOCK_COVERAGE,
156
+ "expected #{expected} built, found #{actual}")
157
+ end
158
+ end
159
+
160
+ def build_report(failures, per_block_counts)
161
+ checks = ALL_CHECKS.map do |name|
162
+ build_check_summary(name, failures, per_block_counts)
163
+ end
164
+
165
+ Ucode::Models::ValidationReport.new(
166
+ unicode_version: @unicode_version.to_s,
167
+ generated_at: Time.now.utc.iso8601,
168
+ totals: Ucode::Models::ValidationReport::Totals.new(
169
+ codepoints_checked: per_block_counts.values.sum,
170
+ failures: failures.length,
171
+ checks_run: checks.count { |c| c.status != "skipped" },
172
+ checks_passed: checks.count { |c| c.status == "passed" },
173
+ ),
174
+ checks: checks,
175
+ failures: failures,
176
+ )
177
+ end
178
+
179
+ def build_check_summary(name, failures, per_block_counts)
180
+ count = failures.count { |f| f.check == name }
181
+ total = total_for_check(name, per_block_counts)
182
+
183
+ status = if name == CHECK_BLOCK_COVERAGE && @baseline.nil?
184
+ "skipped"
185
+ elsif count.zero?
186
+ "passed"
187
+ else
188
+ "failed"
189
+ end
190
+
191
+ Ucode::Models::ValidationReport::CheckSummary.new(
192
+ name: name,
193
+ status: status,
194
+ total: total,
195
+ failures: count,
196
+ )
197
+ end
198
+
199
+ def total_for_check(name, per_block_counts)
200
+ return @baseline&.length || 0 if name == CHECK_BLOCK_COVERAGE
201
+
202
+ per_block_counts.values.sum
203
+ end
204
+
205
+ def write_report(report)
206
+ path = @output_root.join("validation-report.json")
207
+ write_atomic(path, report.to_json(pretty: true))
208
+ path
209
+ end
210
+
211
+ def make_failure(cp_int, block_name, check, message)
212
+ Ucode::Models::ValidationReport::Failure.new(
213
+ codepoint: cp_int,
214
+ block: block_name,
215
+ check: check,
216
+ message: message,
217
+ )
218
+ end
219
+
220
+ def parse_cp_int(cp_id)
221
+ return nil unless cp_id.start_with?("U+")
222
+
223
+ Integer("0x#{cp_id[2..]}")
224
+ rescue ArgumentError
225
+ nil
226
+ end
227
+ end
228
+ end
229
+ end
@@ -22,15 +22,38 @@ module Ucode
22
22
  # - **Atomic**: writes go to `<path>.tmp`, then rename. A crash
23
23
  # mid-write leaves either the old file or no file, never a
24
24
  # truncated one.
25
+ #
26
+ # When a {Ucode::Glyphs::Resolver} is supplied via `resolver:`, each
27
+ # write also resolves the codepoint's glyph, writes `glyph.svg`
28
+ # alongside `index.json` (same atomic + idempotent semantics), and
29
+ # records the resolver tier + provenance on the codepoint's `glyph`
30
+ # attribute so it lands in the serialized JSON. When `resolver:` is
31
+ # nil (default), the writer is glyph-agnostic and only writes
32
+ # `index.json` — preserving backward compatibility.
25
33
  class CodepointWriter
26
34
  include AtomicWrites
27
35
 
28
36
  # @param output_root [String, Pathname]
29
37
  # @param parallel_workers [Integer] size of the worker pool. Set to
30
38
  # 1 (or less) to run synchronously — useful in tests.
31
- def initialize(output_root, parallel_workers: 8)
39
+ # @param resolver [Ucode::Glyphs::Resolver, nil] when non-nil, each
40
+ # write resolves the codepoint's glyph via this resolver and
41
+ # writes `glyph.svg` next to `index.json`. Sources inside the
42
+ # resolver must be safe for concurrent access — the worker pool
43
+ # calls into them from multiple threads.
44
+ # @param observer [#call, nil] when non-nil, invoked as
45
+ # `observer.call(codepoint, result)` after each resolve attempt
46
+ # (and before the JSON write). `result` is the
47
+ # {Ucode::Glyphs::Source::Result} when a tier produced a glyph,
48
+ # or nil when no resolver is configured / no tier matched. Used
49
+ # by {Ucode::Repo::BuildReportAccumulator} to tally per-tier
50
+ # stats. The observer must be thread-safe.
51
+ def initialize(output_root, parallel_workers: 8, resolver: nil,
52
+ observer: nil)
32
53
  @output_root = Pathname.new(output_root)
33
54
  @parallel_workers = parallel_workers
55
+ @resolver = resolver
56
+ @observer = observer
34
57
  end
35
58
 
36
59
  # Write one codepoint synchronously.
@@ -38,6 +61,8 @@ module Ucode
38
61
  # @return [Pathname, nil] the path written, or nil if skipped
39
62
  # (missing block_id or content-identical to existing file)
40
63
  def write(codepoint)
64
+ result = codepoint.block_id.nil? ? nil : resolve_glyph(codepoint)
65
+ @observer&.call(codepoint, result)
41
66
  return nil if codepoint.block_id.nil?
42
67
 
43
68
  path = Paths.codepoint_json_path(@output_root, codepoint.block_id, codepoint.id)
@@ -91,6 +116,30 @@ module Ucode
91
116
  def serialize(codepoint)
92
117
  codepoint.to_json(pretty: true)
93
118
  end
119
+
120
+ def resolve_glyph(codepoint)
121
+ return nil unless @resolver
122
+
123
+ result = @resolver.resolve(codepoint.cp)
124
+ codepoint.glyph = build_glyph_bundle(result)
125
+ return nil unless result
126
+
127
+ path = Paths.codepoint_glyph_path(@output_root, codepoint.block_id, codepoint.id)
128
+ write_atomic(path, result.svg)
129
+ result
130
+ end
131
+
132
+ def build_glyph_bundle(result)
133
+ return nil unless result
134
+
135
+ Ucode::Models::CodePoint::Glyph.new(
136
+ svg_path: Paths.glyph_filename,
137
+ source: Ucode::Models::CodePoint::Glyph::Source.new(
138
+ tier: result.tier.to_s,
139
+ provenance: result.provenance,
140
+ ),
141
+ )
142
+ end
94
143
  end
95
144
  end
96
145
  end
@@ -24,6 +24,14 @@ module Ucode
24
24
  :PLANE_FILENAME_PREFIX
25
25
 
26
26
  class << self
27
+ # The fixed filename every codepoint's SVG glyph is written to
28
+ # (relative to the codepoint's own directory). Exposed so the
29
+ # Glyph model bundle records the same string the layout uses.
30
+ # @return [String]
31
+ def glyph_filename
32
+ GLYPH_FILENAME
33
+ end
34
+
27
35
  # Format an integer codepoint as the canonical "U+XXXX" id used
28
36
  # everywhere (paths, JSON, cross-references). Always at least
29
37
  # 4 hex digits, uppercase, no extra padding.
data/lib/ucode/repo.rb CHANGED
@@ -18,5 +18,9 @@ module Ucode
18
18
  autoload :AtomicWrites, "ucode/repo/atomic_writes"
19
19
  autoload :CodepointWriter, "ucode/repo/codepoint_writer"
20
20
  autoload :AggregateWriter, "ucode/repo/aggregate_writer"
21
+ autoload :BuildReportAccumulator, "ucode/repo/build_report_accumulator"
22
+ autoload :BuildReportWriter, "ucode/repo/build_report_writer"
23
+ autoload :BuildValidator, "ucode/repo/build_validator"
24
+ autoload :BlockFeedEmitter, "ucode/repo/block_feed_emitter"
21
25
  end
22
26
  end
data/lib/ucode/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Ucode
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end