ucode 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -0
  3. data/Gemfile.lock +2 -2
  4. data/TODO.full/00-README.md +116 -0
  5. data/TODO.full/01-panglyph-vision.md +112 -0
  6. data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
  7. data/TODO.full/03-panglyph-font-builder.md +201 -0
  8. data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
  9. data/TODO.full/05-ucode-0-1-1-release.md +139 -0
  10. data/TODO.full/06-fontisan-remove-audit.md +142 -0
  11. data/TODO.full/07-fontisan-remove-ucd.md +125 -0
  12. data/TODO.full/08-archive-private-bin-build.md +143 -0
  13. data/TODO.full/09-archive-public-structure.md +164 -0
  14. data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
  15. data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
  16. data/TODO.full/12-implementation-order.md +216 -0
  17. data/TODO.full/13-fontisan-font-writer-api.md +189 -0
  18. data/TODO.full/14-fontisan-table-writers.md +66 -0
  19. data/TODO.full/15-panglyph-builder-real.md +82 -0
  20. data/TODO.full/16-archive-public-sync-workflows.md +167 -0
  21. data/TODO.full/17-fontist-org-font-picker.md +73 -0
  22. data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
  23. data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
  24. data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
  25. data/TODO.new/00-README.md +30 -0
  26. data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
  27. data/TODO.new/24-universal-glyph-set-build.md +189 -0
  28. data/TODO.new/25-font-audit-against-universal-set.md +195 -0
  29. data/TODO.new/26-missing-glyph-reporter.md +189 -0
  30. data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
  31. data/TODO.new/28-implementation-order-update.md +187 -0
  32. data/TODO.new/29-universal-set-curation-uc17.md +312 -0
  33. data/TODO.new/30-tier1-font-acquisition.md +241 -0
  34. data/TODO.new/31-universal-set-production-build.md +205 -0
  35. data/TODO.new/32-uc17-coverage-matrix.md +165 -0
  36. data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
  37. data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
  38. data/TODO.new/35-universal-set-production-run.md +160 -0
  39. data/TODO.new/36-per-font-coverage-audit.md +145 -0
  40. data/TODO.new/37-coverage-highlight-reporter.md +125 -0
  41. data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
  42. data/TODO.new/39-implementation-order-update-32-38.md +258 -0
  43. data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
  44. data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
  45. data/config/specialist_fonts.yml +102 -0
  46. data/config/unicode17_tier1_fonts.yml +42 -0
  47. data/config/unicode17_universal_glyph_set.yml +293 -0
  48. data/lib/ucode/audit/block_aggregator.rb +57 -29
  49. data/lib/ucode/audit/browser/face_page.rb +128 -0
  50. data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
  51. data/lib/ucode/audit/browser/library_page.rb +74 -0
  52. data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
  53. data/lib/ucode/audit/browser/template.rb +47 -0
  54. data/lib/ucode/audit/browser/templates/face.css +200 -0
  55. data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
  56. data/lib/ucode/audit/browser/templates/face.js +298 -0
  57. data/lib/ucode/audit/browser/templates/library.css +119 -0
  58. data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
  59. data/lib/ucode/audit/browser/templates/library.js +99 -0
  60. data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
  61. data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
  62. data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
  63. data/lib/ucode/audit/browser.rb +32 -0
  64. data/lib/ucode/audit/context.rb +27 -1
  65. data/lib/ucode/audit/coverage_reference.rb +103 -0
  66. data/lib/ucode/audit/differ.rb +121 -0
  67. data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
  68. data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
  69. data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
  70. data/lib/ucode/audit/emitter/face_directory.rb +212 -0
  71. data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
  72. data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
  73. data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
  74. data/lib/ucode/audit/emitter/paths.rb +312 -0
  75. data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
  76. data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
  77. data/lib/ucode/audit/emitter.rb +29 -0
  78. data/lib/ucode/audit/extractors/aggregations.rb +31 -2
  79. data/lib/ucode/audit/face_auditor.rb +86 -0
  80. data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
  81. data/lib/ucode/audit/formatters/audit_text.rb +411 -0
  82. data/lib/ucode/audit/formatters/color.rb +48 -0
  83. data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
  84. data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
  85. data/lib/ucode/audit/formatters.rb +23 -0
  86. data/lib/ucode/audit/library_aggregator.rb +86 -0
  87. data/lib/ucode/audit/library_auditor.rb +105 -0
  88. data/lib/ucode/audit/release/emitter.rb +152 -0
  89. data/lib/ucode/audit/release/face_card.rb +93 -0
  90. data/lib/ucode/audit/release/formula_audits.rb +50 -0
  91. data/lib/ucode/audit/release/library_index_builder.rb +78 -0
  92. data/lib/ucode/audit/release/manifest_builder.rb +127 -0
  93. data/lib/ucode/audit/release.rb +42 -0
  94. data/lib/ucode/audit/ucd_only_reference.rb +81 -0
  95. data/lib/ucode/audit/universal_set_reference.rb +136 -0
  96. data/lib/ucode/audit.rb +31 -0
  97. data/lib/ucode/cli.rb +339 -33
  98. data/lib/ucode/commands/audit/browser_command.rb +82 -0
  99. data/lib/ucode/commands/audit/collection_command.rb +103 -0
  100. data/lib/ucode/commands/audit/compare_command.rb +188 -0
  101. data/lib/ucode/commands/audit/font_command.rb +140 -0
  102. data/lib/ucode/commands/audit/library_command.rb +87 -0
  103. data/lib/ucode/commands/audit/reference_builder.rb +64 -0
  104. data/lib/ucode/commands/audit.rb +20 -0
  105. data/lib/ucode/commands/block_feed.rb +73 -0
  106. data/lib/ucode/commands/canonical_build.rb +138 -0
  107. data/lib/ucode/commands/fetch.rb +37 -1
  108. data/lib/ucode/commands/release.rb +115 -0
  109. data/lib/ucode/commands/universal_set.rb +211 -0
  110. data/lib/ucode/commands.rb +5 -0
  111. data/lib/ucode/coordinator/indices.rb +11 -0
  112. data/lib/ucode/coordinator.rb +138 -5
  113. data/lib/ucode/error.rb +30 -2
  114. data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
  115. data/lib/ucode/fetch/font_fetcher.rb +16 -0
  116. data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
  117. data/lib/ucode/fetch.rb +7 -3
  118. data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
  119. data/lib/ucode/glyphs/real_fonts.rb +1 -0
  120. data/lib/ucode/glyphs/resolver.rb +62 -0
  121. data/lib/ucode/glyphs/source.rb +48 -0
  122. data/lib/ucode/glyphs/source_builder.rb +61 -0
  123. data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
  124. data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
  125. data/lib/ucode/glyphs/source_config.rb +104 -0
  126. data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
  127. data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
  128. data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
  129. data/lib/ucode/glyphs/sources.rb +20 -0
  130. data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
  131. data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
  132. data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
  133. data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
  134. data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
  135. data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
  136. data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
  137. data/lib/ucode/glyphs/universal_set.rb +45 -0
  138. data/lib/ucode/glyphs.rb +6 -0
  139. data/lib/ucode/models/audit/baseline.rb +6 -0
  140. data/lib/ucode/models/audit/block_summary.rb +7 -0
  141. data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
  142. data/lib/ucode/models/audit/release_face.rb +42 -0
  143. data/lib/ucode/models/audit/release_formula.rb +33 -0
  144. data/lib/ucode/models/audit/release_manifest.rb +43 -0
  145. data/lib/ucode/models/audit/release_universal_set.rb +37 -0
  146. data/lib/ucode/models/audit.rb +9 -0
  147. data/lib/ucode/models/block.rb +2 -0
  148. data/lib/ucode/models/build_report.rb +109 -0
  149. data/lib/ucode/models/codepoint/glyph.rb +42 -0
  150. data/lib/ucode/models/codepoint.rb +3 -0
  151. data/lib/ucode/models/glyph_source.rb +86 -0
  152. data/lib/ucode/models/glyph_source_map.rb +138 -0
  153. data/lib/ucode/models/specialist_font.rb +70 -0
  154. data/lib/ucode/models/specialist_font_manifest.rb +48 -0
  155. data/lib/ucode/models/unihan_entry.rb +81 -9
  156. data/lib/ucode/models/unihan_field.rb +21 -0
  157. data/lib/ucode/models/universal_set_entry.rb +47 -0
  158. data/lib/ucode/models/universal_set_manifest.rb +78 -0
  159. data/lib/ucode/models/validation_report.rb +99 -0
  160. data/lib/ucode/models.rb +9 -0
  161. data/lib/ucode/parsers/named_sequences.rb +5 -5
  162. data/lib/ucode/parsers/unihan.rb +50 -19
  163. data/lib/ucode/repo/aggregate_writer.rb +34 -2
  164. data/lib/ucode/repo/block_feed_emitter.rb +153 -0
  165. data/lib/ucode/repo/build_report_accumulator.rb +138 -0
  166. data/lib/ucode/repo/build_report_writer.rb +46 -0
  167. data/lib/ucode/repo/build_validator.rb +229 -0
  168. data/lib/ucode/repo/codepoint_writer.rb +50 -1
  169. data/lib/ucode/repo/paths.rb +8 -0
  170. data/lib/ucode/repo.rb +4 -0
  171. data/lib/ucode/version.rb +1 -1
  172. data/schema/block-feed.output.schema.yml +134 -0
  173. metadata +143 -2
  174. data/ucode.gemspec +0 -56
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/glyphs/universal_set/idempotency"
6
+ require "ucode/glyphs/universal_set/manifest_accumulator"
7
+ require "ucode/glyphs/universal_set/manifest_writer"
8
+
9
+ module Ucode
10
+ module Glyphs
11
+ module UniversalSet
12
+ # Drains a codepoint stream through the 4-tier {Resolver} and
13
+ # produces the universal glyph set: one SVG per codepoint +
14
+ # manifest.json + reports.
15
+ #
16
+ # This is the orchestrator described by TODO 24. It owns three
17
+ # concerns and only three:
18
+ #
19
+ # 1. Iterate the codepoint stream (single-threaded or worker
20
+ # pool, depending on `parallel_workers:`).
21
+ # 2. For each codepoint: resolve via the {Resolver}, write
22
+ # the SVG via {Idempotency}, route the outcome to the
23
+ # {ManifestAccumulator}.
24
+ # 3. After the drain: hand the manifest + per-block breakdown
25
+ # to the {ManifestWriter} for atomic emission.
26
+ #
27
+ # The Builder is intentionally agnostic of how the codepoint
28
+ # stream is produced. The CLI command (TODO 24) constructs a
29
+ # {Ucode::Coordinator} enumerator; tests construct a small
30
+ # Array. The Builder doesn't know about UCD text files, fontist,
31
+ # or PDFs — those live behind the {Resolver}.
32
+ #
33
+ # == Idempotency
34
+ #
35
+ # SVG writes go through {Idempotency#write_glyph}, which uses
36
+ # {Ucode::Repo::AtomicWrites#write_atomic} for byte-level
37
+ # idempotency. Re-running with the same resolver + SVG payloads
38
+ # produces zero file writes. The manifest is regenerated each
39
+ # run; its `generated_at` updates but its entries remain stable
40
+ # when content is unchanged.
41
+ class Builder
42
+ include Idempotency
43
+
44
+ # @param output_root [String, Pathname] directory that will hold
45
+ # `manifest.json`, `glyphs/`, `reports/`.
46
+ # @param resolver [Ucode::Glyphs::Resolver]
47
+ # @param unicode_version [String]
48
+ # @param ucode_version [String]
49
+ # @param source_config_sha256 [String] hex digest of the YAML
50
+ # config that produced this build (recorded in the manifest
51
+ # so audits can detect drift).
52
+ # @param parallel_workers [Integer] size of the worker pool.
53
+ # Set to 1 (or less) for inline mode — used in tests.
54
+ # @param block_filter [String, nil] only build codepoints whose
55
+ # `block_id` matches this verbatim (canonical underscore form).
56
+ def initialize(output_root:, resolver:, unicode_version:,
57
+ ucode_version:, source_config_sha256:,
58
+ parallel_workers: 1, block_filter: nil)
59
+ @output_root = Pathname.new(output_root)
60
+ @resolver = resolver
61
+ @unicode_version = unicode_version
62
+ @ucode_version = ucode_version
63
+ @source_config_sha256 = source_config_sha256
64
+ @parallel_workers = parallel_workers
65
+ @block_filter = block_filter
66
+ end
67
+
68
+ # Drain `codepoints` through the resolver and emit the
69
+ # manifest + reports. Returns the path to the written manifest.
70
+ #
71
+ # @param codepoints [Enumerable<Ucode::Models::CodePoint>]
72
+ # @return [Pathname] path to the written manifest.json
73
+ def build(codepoints)
74
+ accumulator = ManifestAccumulator.new(
75
+ unicode_version: @unicode_version,
76
+ ucode_version: @ucode_version,
77
+ source_config_sha256: @source_config_sha256,
78
+ )
79
+ drain(codepoints, accumulator)
80
+ write_outputs(accumulator)
81
+ end
82
+
83
+ private
84
+
85
+ def drain(codepoints, accumulator)
86
+ return drain_inline(codepoints, accumulator) if @parallel_workers <= 1
87
+
88
+ drain_threaded(codepoints, accumulator)
89
+ end
90
+
91
+ def drain_inline(codepoints, accumulator)
92
+ codepoints.each do |cp|
93
+ build_one(cp, accumulator)
94
+ end
95
+ end
96
+
97
+ def drain_threaded(codepoints, accumulator)
98
+ queue = Queue.new
99
+ workers = Array.new(@parallel_workers) do
100
+ Thread.new do
101
+ loop do
102
+ cp = queue.pop
103
+ break if cp.nil?
104
+
105
+ build_one(cp, accumulator)
106
+ end
107
+ end
108
+ end
109
+
110
+ codepoints.each do |cp|
111
+ queue << cp
112
+ end
113
+ @parallel_workers.times { queue << nil }
114
+ workers.each(&:join)
115
+ end
116
+
117
+ # Resolve one codepoint, write its SVG (if any), and route
118
+ # the outcome to the accumulator. Exceptions are caught here
119
+ # so a single bad codepoint doesn't abort the run.
120
+ #
121
+ # @param cp [Ucode::Models::CodePoint]
122
+ # @param accumulator [ManifestAccumulator]
123
+ def build_one(cp, accumulator)
124
+ return unless matches_filter?(cp)
125
+
126
+ result = @resolver.resolve(cp.cp)
127
+ if result.nil?
128
+ accumulator.record_skip(cp)
129
+ return
130
+ end
131
+
132
+ svg = result.svg
133
+ write_glyph(@output_root, cp_id(cp), svg)
134
+ accumulator.record_build(cp, result, svg: svg)
135
+ rescue StandardError => e
136
+ accumulator.record_failure(cp, e)
137
+ end
138
+
139
+ def matches_filter?(cp)
140
+ return true if @block_filter.nil?
141
+
142
+ cp.block_id == @block_filter
143
+ end
144
+
145
+ def cp_id(cp)
146
+ Ucode::Repo::Paths.cp_id(cp.cp)
147
+ end
148
+
149
+ def write_outputs(accumulator)
150
+ manifest = accumulator.to_manifest
151
+ ManifestWriter.new(@output_root).write(
152
+ manifest,
153
+ by_block: accumulator.by_block,
154
+ gaps: accumulator.gaps,
155
+ failures: accumulator.failures,
156
+ )
157
+ end
158
+ end
159
+ end
160
+ end
161
+ end
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "pathname"
5
+
6
+ require "ucode/glyphs/universal_set/idempotency"
7
+
8
+ module Ucode
9
+ module Glyphs
10
+ module UniversalSet
11
+ # Standalone emitter for the universal-set coverage reports
12
+ # described by TODO 31 §Per-tier / §Per-block / §Gap
13
+ # investigation. Reads a {Ucode::Models::UniversalSetManifest}
14
+ # and produces three JSON files under `<output_root>/reports/`:
15
+ #
16
+ # - `by_tier.json` — manifest.by_tier verbatim (`tier-1` => N,
17
+ # `pillar-1` => N, ...). Quick "how much of the set is real
18
+ # fonts vs. tofu?" answer.
19
+ # - `by_block.json` — per-block per-tier breakdown:
20
+ #
21
+ # { "Sidetic": { "assigned": 26, "tier-1": 26,
22
+ # "pillar-1": 0, "pillar-2": 0, "pillar-3": 0 } }
23
+ #
24
+ # `assigned` is the count of manifest entries whose codepoint
25
+ # falls in this block (via {Ucode::Database#lookup_block}).
26
+ # Each tier key counts the entries that resolved at that tier.
27
+ # - `gaps.json` — array of `{ codepoint, block, reason }` for
28
+ # every manifest entry at `pillar-3`. These are the "tofu
29
+ # leaks" TODO 31 calls out as actionable curation follow-ups
30
+ # (excluding the documented-residual cases: unassigned, PUA,
31
+ # noncharacter — those are correctly Last Resort).
32
+ #
33
+ # An optional `failures:` payload (from
34
+ # {ManifestAccumulator#failures}) writes a fourth file,
35
+ # `failures.json`, with per-codepoint exception log. Kept
36
+ # separate from `gaps.json` so the two concepts (tofu vs.
37
+ # crash) don't collide.
38
+ #
39
+ # All writes are atomic via {Idempotency}. Re-running on an
40
+ # unchanged manifest is a no-op modulo nothing — JSON output is
41
+ # stable (sorted keys, deterministic ordering).
42
+ class CoverageReport
43
+ include Idempotency
44
+
45
+ # Reason stamped on every pillar-3 gap entry. The detailed
46
+ # "why did this fall through?" path is in the manifest entry's
47
+ # `source` field; this string is the high-level category.
48
+ TOFU_REASON = "resolved to pillar-3 (Last Resort placeholder)"
49
+ private_constant :TOFU_REASON
50
+
51
+ # @param output_root [String, Pathname] directory holding
52
+ # `manifest.json` + `reports/`.
53
+ # @param database [Ucode::Database] used for codepoint → block
54
+ # lookup. The `report` CLI command opens one for the target
55
+ # Unicode version; tests pass a small in-memory database.
56
+ def initialize(output_root, database:)
57
+ @output_root = Pathname.new(output_root)
58
+ @database = database
59
+ end
60
+
61
+ # Write the three coverage reports. Returns the structured
62
+ # payload so callers (CLI) can render a summary without
63
+ # re-reading the files.
64
+ #
65
+ # @param manifest [Ucode::Models::UniversalSetManifest]
66
+ # @param failures [Array<Hash>] optional per-codepoint
67
+ # exception log from {ManifestAccumulator#failures}. When
68
+ # non-empty, also writes `reports/failures.json`.
69
+ # @return [Hash] { by_tier:, by_block:, gaps:, failures:,
70
+ # by_tier_path:, by_block_path:, gaps_path:, failures_path: }
71
+ def emit(manifest, failures: [])
72
+ by_tier = manifest.by_tier
73
+ by_block = build_by_block(manifest)
74
+ gaps = build_gaps(manifest)
75
+
76
+ by_tier_path = by_tier_report_path(@output_root)
77
+ by_block_path = by_block_report_path(@output_root)
78
+ gaps_path = gaps_report_path(@output_root)
79
+ write_atomic(by_tier_path, to_pretty_json(by_tier))
80
+ write_atomic(by_block_path, to_pretty_json(by_block))
81
+ write_atomic(gaps_path, to_pretty_json(gaps))
82
+ failures_path = write_failures(failures)
83
+
84
+ {
85
+ by_tier: by_tier,
86
+ by_block: by_block,
87
+ gaps: gaps,
88
+ failures: failures,
89
+ by_tier_path: by_tier_path,
90
+ by_block_path: by_block_path,
91
+ gaps_path: gaps_path,
92
+ failures_path: failures_path,
93
+ }
94
+ end
95
+
96
+ private
97
+
98
+ def build_by_block(manifest)
99
+ tally = Hash.new do |h, block|
100
+ h[block] = { "assigned" => 0, "tier-1" => 0, "pillar-1" => 0,
101
+ "pillar-2" => 0, "pillar-3" => 0 }
102
+ end
103
+
104
+ manifest.entries.each do |entry|
105
+ block = @database.lookup_block(entry.codepoint)
106
+ next unless block
107
+
108
+ tally[block]["assigned"] += 1
109
+ tally[block][entry.tier] = (tally[block][entry.tier] || 0) + 1
110
+ end
111
+
112
+ # Sort by block name for deterministic output — re-running
113
+ # on the same manifest produces byte-identical JSON.
114
+ tally.sort.to_h
115
+ end
116
+
117
+ def build_gaps(manifest)
118
+ manifest.entries.each_with_object([]) do |entry, acc|
119
+ next unless entry.tier == "pillar-3"
120
+
121
+ acc << {
122
+ "codepoint" => entry.codepoint,
123
+ "block" => @database.lookup_block(entry.codepoint),
124
+ "reason" => TOFU_REASON,
125
+ }
126
+ end
127
+ end
128
+
129
+ def write_failures(failures)
130
+ return nil if failures.empty?
131
+
132
+ path = @output_root.join(REPORTS_DIR, "failures.json")
133
+ write_atomic(path, to_pretty_json(failures))
134
+ path
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/repo/atomic_writes"
6
+
7
+ module Ucode
8
+ module Glyphs
9
+ module UniversalSet
10
+ # Idempotency + path helpers for the universal set build.
11
+ #
12
+ # TODO 24 specifies "a codepoint whose source font mtime +
13
+ # content hash are unchanged is skipped." The content-hash half
14
+ # is exactly {Ucode::Repo::AtomicWrites#write_atomic} —
15
+ # byte-identical payloads are a no-op. The mtime half is a
16
+ # future optimization (skip the resolver call entirely when the
17
+ # font hasn't changed); for now, byte-comparison gives semantic
18
+ # correctness, which is the load-bearing property.
19
+ #
20
+ # This module centralizes the universal-set write semantic so
21
+ # future mtime-based short-circuitting lands in one place. The
22
+ # {Builder} and {ManifestWriter} mix this in.
23
+ module Idempotency
24
+ include Ucode::Repo::AtomicWrites
25
+
26
+ # Directory under the output root that holds the per-codepoint SVGs.
27
+ GLYPHS_DIR = "glyphs"
28
+ # Directory under the output root that holds the by-tier / by-block /
29
+ # gaps reports emitted alongside the manifest.
30
+ REPORTS_DIR = "reports"
31
+ # The manifest filename at the output root.
32
+ MANIFEST_FILENAME = "manifest.json"
33
+ # Report filenames.
34
+ BY_TIER_REPORT = "by_tier.json"
35
+ BY_BLOCK_REPORT = "by_block.json"
36
+ GAPS_REPORT = "gaps.json"
37
+
38
+ private_constant :GLYPHS_DIR, :REPORTS_DIR, :MANIFEST_FILENAME,
39
+ :BY_TIER_REPORT, :BY_BLOCK_REPORT, :GAPS_REPORT
40
+
41
+ # Write the SVG payload to the canonical `glyphs/<id>.svg`
42
+ # path if-and-only-if the content changed. Returns true when
43
+ # the file was written; false when skipped (byte-identical).
44
+ #
45
+ # @param output_root [Pathname]
46
+ # @param cp_id [String] e.g. "U+0041"
47
+ # @param svg [String]
48
+ # @return [Boolean]
49
+ def write_glyph(output_root, cp_id, svg)
50
+ write_atomic(glyph_path(output_root, cp_id), svg)
51
+ end
52
+
53
+ # @param output_root [Pathname]
54
+ # @param cp_id [String]
55
+ # @return [Pathname] <output_root>/glyphs/<cp_id>.svg
56
+ def glyph_path(output_root, cp_id)
57
+ Pathname.new(output_root).join(GLYPHS_DIR, "#{cp_id}.svg")
58
+ end
59
+
60
+ # @param output_root [Pathname]
61
+ # @return [Pathname]
62
+ def manifest_path(output_root)
63
+ Pathname.new(output_root).join(MANIFEST_FILENAME)
64
+ end
65
+
66
+ # @param output_root [Pathname]
67
+ # @return [Pathname]
68
+ def by_tier_report_path(output_root)
69
+ Pathname.new(output_root).join(REPORTS_DIR, BY_TIER_REPORT)
70
+ end
71
+
72
+ # @param output_root [Pathname]
73
+ # @return [Pathname]
74
+ def by_block_report_path(output_root)
75
+ Pathname.new(output_root).join(REPORTS_DIR, BY_BLOCK_REPORT)
76
+ end
77
+
78
+ # @param output_root [Pathname]
79
+ # @return [Pathname]
80
+ def gaps_report_path(output_root)
81
+ Pathname.new(output_root).join(REPORTS_DIR, GAPS_REPORT)
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,195 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require "time"
5
+
6
+ require "ucode/models"
7
+ require "ucode/repo/paths"
8
+
9
+ module Ucode
10
+ module Glyphs
11
+ module UniversalSet
12
+ # Thread-safe accumulator that observes the {Builder}'s per-
13
+ # codepoint attempts and produces the final
14
+ # {Ucode::Models::UniversalSetManifest} plus the per-block
15
+ # breakdown the {ManifestWriter} emits as `reports/by_block.json`.
16
+ #
17
+ # Mirrors the {Ucode::Repo::BuildReportAccumulator} pattern from
18
+ # Mode 1: the orchestrating command passes this instance to the
19
+ # builder, which calls {#record_build} (or {#record_skip}) from
20
+ # inside its worker pool. After the drain completes,
21
+ # {#to_manifest} returns the immutable snapshot.
22
+ #
23
+ # == Semantics
24
+ #
25
+ # - `codepoints_assigned` counts every codepoint the builder
26
+ # attempted (passed the block_filter guard).
27
+ # - `codepoints_built` counts codepoints whose resolver returned
28
+ # a glyph.
29
+ # - `codepoints_skipped` counts codepoints that resolved to nil
30
+ # (no tier produced a glyph) — these are the "gaps" the gaps
31
+ # report enumerates.
32
+ # - `codepoints_failed` counts exceptions recorded via
33
+ # {#record_failure}.
34
+ #
35
+ # `by_tier` counts the winning tier per codepoint (one increment
36
+ # per built codepoint). The map uses the wire form ("tier-1",
37
+ # "pillar-1", ...) so the manifest is stable across Ruby symbol
38
+ # changes.
39
+ #
40
+ # `by_block` is a hash keyed by block_id, with built / skipped /
41
+ # failed counters per block. Computed from the codepoint stream
42
+ # the Builder drains — the accumulator reads {CodePoint#block_id}
43
+ # directly. Block ids follow the canonical underscore form.
44
+ class ManifestAccumulator
45
+ TIER_TO_WIRE = {
46
+ tier1: "tier-1",
47
+ pillar1: "pillar-1",
48
+ pillar2: "pillar-2",
49
+ pillar3: "pillar-3",
50
+ }.freeze
51
+ private_constant :TIER_TO_WIRE
52
+
53
+ # @param unicode_version [String]
54
+ # @param ucode_version [String]
55
+ # @param source_config_sha256 [String]
56
+ def initialize(unicode_version:, ucode_version:, source_config_sha256:)
57
+ @unicode_version = unicode_version
58
+ @ucode_version = ucode_version
59
+ @source_config_sha256 = source_config_sha256
60
+ @totals = { codepoints_assigned: 0, codepoints_built: 0,
61
+ codepoints_skipped: 0, codepoints_failed: 0 }
62
+ @by_tier = Hash.new(0)
63
+ @by_block = Hash.new do |h, block_id|
64
+ h[block_id] = { built: 0, skipped: 0, failed: 0 }
65
+ end
66
+ @entries = []
67
+ @gaps = []
68
+ @failures = []
69
+ @mutex = Mutex.new
70
+ end
71
+
72
+ # Observer entry — the builder calls this for every codepoint
73
+ # the resolver produced a glyph for. Records the entry and
74
+ # bumps the built counter + per-tier + per-block rollups.
75
+ #
76
+ # @param codepoint [Ucode::Models::CodePoint]
77
+ # @param result [Ucode::Glyphs::Source::Result] non-nil
78
+ # @param svg [String] the SVG bytes that were written
79
+ # @return [void]
80
+ def record_build(codepoint, result, svg:)
81
+ entry = build_entry(codepoint.cp, result, svg)
82
+ tier_wire = wire_tier(result.tier)
83
+ synchronize do
84
+ @totals[:codepoints_assigned] += 1
85
+ @totals[:codepoints_built] += 1
86
+ @by_tier[tier_wire] += 1
87
+ @by_block[codepoint.block_id][:built] += 1
88
+ @entries << entry
89
+ end
90
+ end
91
+
92
+ # Observer entry — the builder calls this when the resolver
93
+ # returned nil for a codepoint. Counts the attempt and adds
94
+ # it to the gaps list for the gaps report.
95
+ #
96
+ # @param codepoint [Ucode::Models::CodePoint]
97
+ # @return [void]
98
+ def record_skip(codepoint)
99
+ synchronize do
100
+ @totals[:codepoints_assigned] += 1
101
+ @totals[:codepoints_skipped] += 1
102
+ @by_block[codepoint.block_id][:skipped] += 1
103
+ @gaps << codepoint.cp
104
+ end
105
+ end
106
+
107
+ # Record an exception. The builder rescues per-codepoint
108
+ # errors and routes them here so one bad codepoint doesn't
109
+ # abort the run.
110
+ #
111
+ # @param codepoint [Ucode::Models::CodePoint, nil]
112
+ # @param error [StandardError]
113
+ # @return [void]
114
+ def record_failure(codepoint, error)
115
+ synchronize do
116
+ @totals[:codepoints_assigned] += 1 unless codepoint.nil?
117
+ @totals[:codepoints_failed] += 1
118
+ @by_block[codepoint&.block_id][:failed] += 1 unless codepoint.nil?
119
+ @failures << { codepoint: codepoint&.cp,
120
+ block_id: codepoint&.block_id,
121
+ error_class: error.class.name,
122
+ message: error.message }
123
+ end
124
+ end
125
+
126
+ # @return [Ucode::Models::UniversalSetManifest] immutable snapshot
127
+ def to_manifest
128
+ synchronize do
129
+ Ucode::Models::UniversalSetManifest.new(
130
+ unicode_version: @unicode_version,
131
+ ucode_version: @ucode_version,
132
+ generated_at: Time.now.utc.iso8601,
133
+ source_config_sha256: @source_config_sha256,
134
+ totals: Ucode::Models::UniversalSetManifest::Totals.new(@totals),
135
+ by_tier: @by_tier.dup,
136
+ entries: @entries.dup,
137
+ )
138
+ end
139
+ end
140
+
141
+ # @return [Hash{String=>Hash}] per-block built/skipped/failed
142
+ # counts, deep-copied so callers can't mutate accumulator state.
143
+ def by_block
144
+ synchronize do
145
+ @by_block.transform_values(&:dup)
146
+ end
147
+ end
148
+
149
+ # @return [Array<Integer>] codepoints that resolved to nil, sorted
150
+ def gaps
151
+ synchronize { @gaps.sort }
152
+ end
153
+
154
+ # @return [Array<Hash>] recorded failures (each with codepoint,
155
+ # block_id, error_class, message)
156
+ def failures
157
+ synchronize { @failures.dup }
158
+ end
159
+
160
+ private
161
+
162
+ def build_entry(codepoint, result, svg)
163
+ Ucode::Models::UniversalSetEntry.new(
164
+ codepoint: codepoint,
165
+ id: Ucode::Repo::Paths.cp_id(codepoint),
166
+ tier: wire_tier(result.tier),
167
+ source: source_label(result.provenance),
168
+ svg_sha256: sha256(svg),
169
+ svg_size_bytes: svg.bytesize,
170
+ )
171
+ end
172
+
173
+ # Extract the source identifier from a dotted provenance
174
+ # string ("tier-1:noto-sans" -> "noto-sans"). When there's no
175
+ # `:` separator, returns the input verbatim — defensive
176
+ # against malformed provenance.
177
+ def source_label(provenance)
178
+ provenance.to_s.split(":", 2).last || provenance.to_s
179
+ end
180
+
181
+ def sha256(payload)
182
+ Digest::SHA256.hexdigest(payload)
183
+ end
184
+
185
+ def wire_tier(symbol)
186
+ TIER_TO_WIRE.fetch(symbol, symbol.to_s)
187
+ end
188
+
189
+ def synchronize(&)
190
+ @mutex.synchronize(&)
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "json"
5
+
6
+ require "ucode/glyphs/universal_set/idempotency"
7
+
8
+ module Ucode
9
+ module Glyphs
10
+ module UniversalSet
11
+ # Writes the final manifest + reports under the output root.
12
+ #
13
+ # One manifest, three reports:
14
+ #
15
+ # - `manifest.json` — full {Ucode::Models::UniversalSetManifest}.
16
+ # - `reports/by_tier.json` — `by_tier` counts alone (small file
17
+ # for quick "how much of the set is tier 1?" inspection).
18
+ # - `reports/by_block.json` — per-block built/skipped totals,
19
+ # computed from the manifest's entries + the codepoint's
20
+ # block_id (resolved by the Builder).
21
+ # - `reports/gaps.json` — array of codepoint integers that
22
+ # resolved to nil (should be empty for a healthy run).
23
+ #
24
+ # All writes are atomic via {Idempotency} (which includes
25
+ # {Ucode::Repo::AtomicWrites}). Re-running on an unchanged
26
+ # manifest is a no-op modulo `generated_at`.
27
+ class ManifestWriter
28
+ include Idempotency
29
+
30
+ # @param output_root [String, Pathname]
31
+ def initialize(output_root)
32
+ @output_root = Pathname.new(output_root)
33
+ end
34
+
35
+ # Write the manifest + reports atomically.
36
+ #
37
+ # @param manifest [Ucode::Models::UniversalSetManifest]
38
+ # @param by_block [Hash{String=>Hash}] per-block breakdown:
39
+ # `{ "Basic_Latin" => { built: 64, skipped: 0, failed: 0 } }`.
40
+ # Computed by the {Builder}; this writer just serializes it.
41
+ # @param gaps [Array<Integer>] codepoints with no glyph
42
+ # @param failures [Array<Hash>] per-codepoint failures
43
+ # @return [Pathname] path to the written manifest
44
+ def write(manifest, by_block:, gaps:, failures:)
45
+ write_atomic(manifest_path(@output_root), manifest_to_json(manifest))
46
+ write_atomic(by_tier_report_path(@output_root), to_pretty_json(manifest.by_tier))
47
+ write_atomic(by_block_report_path(@output_root), to_pretty_json(by_block))
48
+ write_atomic(gaps_report_path(@output_root),
49
+ to_pretty_json(gaps: gaps, failures: failures))
50
+ manifest_path(@output_root)
51
+ end
52
+
53
+ private
54
+
55
+ def manifest_to_json(manifest)
56
+ manifest.to_json(pretty: true)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end