ucode 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -0
  3. data/Gemfile.lock +2 -2
  4. data/TODO.full/00-README.md +116 -0
  5. data/TODO.full/01-panglyph-vision.md +112 -0
  6. data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
  7. data/TODO.full/03-panglyph-font-builder.md +201 -0
  8. data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
  9. data/TODO.full/05-ucode-0-1-1-release.md +139 -0
  10. data/TODO.full/06-fontisan-remove-audit.md +142 -0
  11. data/TODO.full/07-fontisan-remove-ucd.md +125 -0
  12. data/TODO.full/08-archive-private-bin-build.md +143 -0
  13. data/TODO.full/09-archive-public-structure.md +164 -0
  14. data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
  15. data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
  16. data/TODO.full/12-implementation-order.md +216 -0
  17. data/TODO.full/13-fontisan-font-writer-api.md +189 -0
  18. data/TODO.full/14-fontisan-table-writers.md +66 -0
  19. data/TODO.full/15-panglyph-builder-real.md +82 -0
  20. data/TODO.full/16-archive-public-sync-workflows.md +167 -0
  21. data/TODO.full/17-fontist-org-font-picker.md +73 -0
  22. data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
  23. data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
  24. data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
  25. data/TODO.new/00-README.md +30 -0
  26. data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
  27. data/TODO.new/24-universal-glyph-set-build.md +189 -0
  28. data/TODO.new/25-font-audit-against-universal-set.md +195 -0
  29. data/TODO.new/26-missing-glyph-reporter.md +189 -0
  30. data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
  31. data/TODO.new/28-implementation-order-update.md +187 -0
  32. data/TODO.new/29-universal-set-curation-uc17.md +312 -0
  33. data/TODO.new/30-tier1-font-acquisition.md +241 -0
  34. data/TODO.new/31-universal-set-production-build.md +205 -0
  35. data/TODO.new/32-uc17-coverage-matrix.md +165 -0
  36. data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
  37. data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
  38. data/TODO.new/35-universal-set-production-run.md +160 -0
  39. data/TODO.new/36-per-font-coverage-audit.md +145 -0
  40. data/TODO.new/37-coverage-highlight-reporter.md +125 -0
  41. data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
  42. data/TODO.new/39-implementation-order-update-32-38.md +258 -0
  43. data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
  44. data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
  45. data/config/specialist_fonts.yml +102 -0
  46. data/config/unicode17_tier1_fonts.yml +42 -0
  47. data/config/unicode17_universal_glyph_set.yml +293 -0
  48. data/lib/ucode/audit/block_aggregator.rb +57 -29
  49. data/lib/ucode/audit/browser/face_page.rb +128 -0
  50. data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
  51. data/lib/ucode/audit/browser/library_page.rb +74 -0
  52. data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
  53. data/lib/ucode/audit/browser/template.rb +47 -0
  54. data/lib/ucode/audit/browser/templates/face.css +200 -0
  55. data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
  56. data/lib/ucode/audit/browser/templates/face.js +298 -0
  57. data/lib/ucode/audit/browser/templates/library.css +119 -0
  58. data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
  59. data/lib/ucode/audit/browser/templates/library.js +99 -0
  60. data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
  61. data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
  62. data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
  63. data/lib/ucode/audit/browser.rb +32 -0
  64. data/lib/ucode/audit/context.rb +27 -1
  65. data/lib/ucode/audit/coverage_reference.rb +103 -0
  66. data/lib/ucode/audit/differ.rb +121 -0
  67. data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
  68. data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
  69. data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
  70. data/lib/ucode/audit/emitter/face_directory.rb +212 -0
  71. data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
  72. data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
  73. data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
  74. data/lib/ucode/audit/emitter/paths.rb +312 -0
  75. data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
  76. data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
  77. data/lib/ucode/audit/emitter.rb +29 -0
  78. data/lib/ucode/audit/extractors/aggregations.rb +31 -2
  79. data/lib/ucode/audit/face_auditor.rb +86 -0
  80. data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
  81. data/lib/ucode/audit/formatters/audit_text.rb +411 -0
  82. data/lib/ucode/audit/formatters/color.rb +48 -0
  83. data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
  84. data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
  85. data/lib/ucode/audit/formatters.rb +23 -0
  86. data/lib/ucode/audit/library_aggregator.rb +86 -0
  87. data/lib/ucode/audit/library_auditor.rb +105 -0
  88. data/lib/ucode/audit/release/emitter.rb +152 -0
  89. data/lib/ucode/audit/release/face_card.rb +93 -0
  90. data/lib/ucode/audit/release/formula_audits.rb +50 -0
  91. data/lib/ucode/audit/release/library_index_builder.rb +78 -0
  92. data/lib/ucode/audit/release/manifest_builder.rb +127 -0
  93. data/lib/ucode/audit/release.rb +42 -0
  94. data/lib/ucode/audit/ucd_only_reference.rb +81 -0
  95. data/lib/ucode/audit/universal_set_reference.rb +136 -0
  96. data/lib/ucode/audit.rb +31 -0
  97. data/lib/ucode/cli.rb +339 -33
  98. data/lib/ucode/commands/audit/browser_command.rb +82 -0
  99. data/lib/ucode/commands/audit/collection_command.rb +103 -0
  100. data/lib/ucode/commands/audit/compare_command.rb +188 -0
  101. data/lib/ucode/commands/audit/font_command.rb +140 -0
  102. data/lib/ucode/commands/audit/library_command.rb +87 -0
  103. data/lib/ucode/commands/audit/reference_builder.rb +64 -0
  104. data/lib/ucode/commands/audit.rb +20 -0
  105. data/lib/ucode/commands/block_feed.rb +73 -0
  106. data/lib/ucode/commands/canonical_build.rb +138 -0
  107. data/lib/ucode/commands/fetch.rb +37 -1
  108. data/lib/ucode/commands/release.rb +115 -0
  109. data/lib/ucode/commands/universal_set.rb +211 -0
  110. data/lib/ucode/commands.rb +5 -0
  111. data/lib/ucode/coordinator/indices.rb +11 -0
  112. data/lib/ucode/coordinator.rb +138 -5
  113. data/lib/ucode/error.rb +30 -2
  114. data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
  115. data/lib/ucode/fetch/font_fetcher.rb +16 -0
  116. data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
  117. data/lib/ucode/fetch.rb +7 -3
  118. data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
  119. data/lib/ucode/glyphs/real_fonts.rb +1 -0
  120. data/lib/ucode/glyphs/resolver.rb +62 -0
  121. data/lib/ucode/glyphs/source.rb +48 -0
  122. data/lib/ucode/glyphs/source_builder.rb +61 -0
  123. data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
  124. data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
  125. data/lib/ucode/glyphs/source_config.rb +104 -0
  126. data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
  127. data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
  128. data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
  129. data/lib/ucode/glyphs/sources.rb +20 -0
  130. data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
  131. data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
  132. data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
  133. data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
  134. data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
  135. data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
  136. data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
  137. data/lib/ucode/glyphs/universal_set.rb +45 -0
  138. data/lib/ucode/glyphs.rb +6 -0
  139. data/lib/ucode/models/audit/baseline.rb +6 -0
  140. data/lib/ucode/models/audit/block_summary.rb +7 -0
  141. data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
  142. data/lib/ucode/models/audit/release_face.rb +42 -0
  143. data/lib/ucode/models/audit/release_formula.rb +33 -0
  144. data/lib/ucode/models/audit/release_manifest.rb +43 -0
  145. data/lib/ucode/models/audit/release_universal_set.rb +37 -0
  146. data/lib/ucode/models/audit.rb +9 -0
  147. data/lib/ucode/models/block.rb +2 -0
  148. data/lib/ucode/models/build_report.rb +109 -0
  149. data/lib/ucode/models/codepoint/glyph.rb +42 -0
  150. data/lib/ucode/models/codepoint.rb +3 -0
  151. data/lib/ucode/models/glyph_source.rb +86 -0
  152. data/lib/ucode/models/glyph_source_map.rb +138 -0
  153. data/lib/ucode/models/specialist_font.rb +70 -0
  154. data/lib/ucode/models/specialist_font_manifest.rb +48 -0
  155. data/lib/ucode/models/unihan_entry.rb +81 -9
  156. data/lib/ucode/models/unihan_field.rb +21 -0
  157. data/lib/ucode/models/universal_set_entry.rb +47 -0
  158. data/lib/ucode/models/universal_set_manifest.rb +78 -0
  159. data/lib/ucode/models/validation_report.rb +99 -0
  160. data/lib/ucode/models.rb +9 -0
  161. data/lib/ucode/parsers/named_sequences.rb +5 -5
  162. data/lib/ucode/parsers/unihan.rb +50 -19
  163. data/lib/ucode/repo/aggregate_writer.rb +34 -2
  164. data/lib/ucode/repo/block_feed_emitter.rb +153 -0
  165. data/lib/ucode/repo/build_report_accumulator.rb +138 -0
  166. data/lib/ucode/repo/build_report_writer.rb +46 -0
  167. data/lib/ucode/repo/build_validator.rb +229 -0
  168. data/lib/ucode/repo/codepoint_writer.rb +50 -1
  169. data/lib/ucode/repo/paths.rb +8 -0
  170. data/lib/ucode/repo.rb +4 -0
  171. data/lib/ucode/version.rb +1 -1
  172. data/schema/block-feed.output.schema.yml +134 -0
  173. metadata +143 -2
  174. data/ucode.gemspec +0 -56
@@ -0,0 +1,58 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title><%= block_name %> — missing glyphs — ucode audit</title>
7
+ <style>
8
+ <%= _css %>
9
+ </style>
10
+ </head>
11
+ <body data-universal-set-available="<%= universal_set_available %>">
12
+ <header class="page-header">
13
+ <h1><%= block_name %></h1>
14
+ <p class="tagline">
15
+ <%= total_count %> missing codepoint<%= total_count == 1 ? "" : "s" %>
16
+ <% if overflow_count.positive? %>
17
+ — showing first <%= visible_count %>
18
+ <% end %>
19
+ <% unless universal_set_available %>
20
+ — universal-set glyphs unavailable
21
+ <% end %>
22
+ </p>
23
+ </header>
24
+
25
+ <main>
26
+ <% if panels.empty? %>
27
+ <p class="hint">No missing codepoints in this block.</p>
28
+ <% else %>
29
+ <ul class="glyph-grid">
30
+ <% panels.each do |panel| %>
31
+ <li class="glyph-cell" data-codepoint="<%= panel["codepoint"] %>">
32
+ <div class="glyph-thumb">
33
+ <% if panel["svg"] %>
34
+ <%= panel["svg"] %>
35
+ <% else %>
36
+ <span class="glyph-na">n/a</span>
37
+ <% end %>
38
+ </div>
39
+ <div class="glyph-meta">
40
+ <span class="cp-id"><%= panel["id"] %></span>
41
+ <% if panel["source"] %>
42
+ <span class="source"><%= panel["source"] %></span>
43
+ <% end %>
44
+ </div>
45
+ </li>
46
+ <% end %>
47
+ </ul>
48
+ <% if overflow_count.positive? %>
49
+ <p class="overflow">+<%= overflow_count %> more codepoints not shown — see the face browser for the full list.</p>
50
+ <% end %>
51
+ <% end %>
52
+ </main>
53
+
54
+ <script>
55
+ <%= _js %>
56
+ </script>
57
+ </body>
58
+ </html>
@@ -0,0 +1,2 @@
1
+ // ucode audit missing-glyph gallery — static page, no interactivity required.
2
+ // SVGs are inlined into the HTML at render time so the page works under file://.
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ module Ucode
6
+ module Audit
7
+ # Standalone HTML browsers for Mode 2 audit output.
8
+ #
9
+ # Two pages:
10
+ #
11
+ # - {Browser::FacePage} — one face's audit, fully self-contained
12
+ # (no external CSS/JS), with JSON inlined for instant render via
13
+ # `file://`. Block expansion and codepoint detail lazy-fetch
14
+ # the chunks emitted by {Emitter::FaceDirectory}.
15
+ # - {Browser::LibraryPage} — one library's index, also self-contained,
16
+ # with cards linking into each face page.
17
+ #
18
+ # Both pages reuse the chunk files emitted by {Emitter} — they don't
19
+ # duplicate the JSON, they just inline the overview slice that the
20
+ # initial render needs.
21
+ module Browser
22
+ TEMPLATE_DIR = Pathname.new(__dir__).join("browser/templates")
23
+ private_constant :TEMPLATE_DIR
24
+
25
+ autoload :Template, "ucode/audit/browser/template"
26
+ autoload :FacePage, "ucode/audit/browser/face_page"
27
+ autoload :LibraryPage, "ucode/audit/browser/library_page"
28
+ autoload :GlyphPanel, "ucode/audit/browser/glyph_panel"
29
+ autoload :MissingGlyphPage, "ucode/audit/browser/missing_glyph_page"
30
+ end
31
+ end
32
+ end
@@ -39,14 +39,21 @@ module Ucode
39
39
  # @param options [Hash{Symbol=>Object}] audit options (ucd_version,
40
40
  # all_codepoints, with_glyphs, etc.).
41
41
  # @param renderer [Object, nil] glyph renderer for --with-glyphs mode.
42
+ # @param reference [CoverageReference, nil] the baseline the
43
+ # audit compares the font's cmap against. When nil, defaults
44
+ # to a {UcdOnlyReference} built from the resolved baseline
45
+ # database (TODO 25). Passing a {UniversalSetReference}
46
+ # attaches per-codepoint provenance to every missing-codepoint
47
+ # row.
42
48
  def initialize(font:, font_path:, font_index:, num_fonts_in_source:,
43
- options:, renderer: nil)
49
+ options:, renderer: nil, reference: nil)
44
50
  @font = font
45
51
  @font_path = font_path
46
52
  @font_index = font_index
47
53
  @num_fonts_in_source = num_fonts_in_source
48
54
  @options = options
49
55
  @renderer = renderer
56
+ @reference_override = reference
50
57
  end
51
58
 
52
59
  # Codepoints the font's cmap actually maps. Memoized.
@@ -63,6 +70,18 @@ module Ucode
63
70
  @baseline ||= resolve_baseline
64
71
  end
65
72
 
73
+ # The {CoverageReference} the audit compares against. Defaults
74
+ # to a {UcdOnlyReference} built from the resolved baseline
75
+ # database. When the caller supplied a reference at construction
76
+ # (typically a {UniversalSetReference}), that one is used
77
+ # verbatim. Memoized.
78
+ #
79
+ # @return [CoverageReference, nil] nil when the baseline itself
80
+ # couldn't be resolved (database missing).
81
+ def reference
82
+ @reference ||= @reference_override || build_default_reference
83
+ end
84
+
66
85
  # Detected source format string ("ttf", "otf", "ttc", ...). Memoized.
67
86
  # @return [String, nil]
68
87
  def source_format
@@ -132,6 +151,13 @@ module Ucode
132
151
  generated_at: Time.now.utc.iso8601,
133
152
  )
134
153
  end
154
+
155
+ def build_default_reference
156
+ database = baseline.database
157
+ return nil if database.nil?
158
+
159
+ UcdOnlyReference.new(database: database)
160
+ end
135
161
  end
136
162
  end
137
163
  end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Audit
5
+ # Common interface for any "what is the assigned codepoint set"
6
+ # reference used by the audit pipeline.
7
+ #
8
+ # Two implementations:
9
+ #
10
+ # - {UcdOnlyReference} — derives the assigned set from the UCD
11
+ # database alone (block ranges). Carries no per-codepoint
12
+ # provenance. This is the legacy behaviour: a font audit
13
+ # compares against the abstract Unicode assigned-codepoint list.
14
+ #
15
+ # - {UniversalSetReference} — derives the assigned set from a
16
+ # universal-set manifest (TODO 24). Every codepoint carries
17
+ # tier + source provenance, so a missing-codepoint report can
18
+ # answer "what does the missing glyph look like, and where did
19
+ # the universal set get it from?".
20
+ #
21
+ # The audit pipeline (Context → Aggregations extractor →
22
+ # BlockAggregator) talks exclusively to this interface. Adding a
23
+ # new reference kind = one new subclass; no caller changes
24
+ # (open/closed).
25
+ class CoverageReference
26
+ # Immutable per-codepoint row exposed by every reference. The
27
+ # `tier` and `source` fields are nil for references that don't
28
+ # carry provenance (e.g. {UcdOnlyReference}).
29
+ Entry = Struct.new(:codepoint, :id, :tier, :source, keyword_init: true) do
30
+ # True when this entry carries provenance from a universal-set
31
+ # manifest. False for UCD-only references.
32
+ def provenance?
33
+ !tier.nil? || !source.nil?
34
+ end
35
+ end
36
+
37
+ def initialize; end
38
+
39
+ # Symbol identifying the reference kind. Used by the audit
40
+ # report's `baseline.reference_kind` field so consumers know
41
+ # which reference produced the per-block counts.
42
+ #
43
+ # @return [Symbol] e.g. :ucd, :universal_set
44
+ def kind
45
+ raise NotImplementedError
46
+ end
47
+
48
+ # @param codepoint [Integer]
49
+ # @return [Boolean] true if the codepoint is in the reference set
50
+ def include?(codepoint)
51
+ raise NotImplementedError
52
+ end
53
+
54
+ # Block name (verbatim Unicode identifier, e.g. "Basic_Latin")
55
+ # the codepoint falls under, or nil if it isn't in any known
56
+ # block. Used by {BlockAggregator} to group a font's cmap by
57
+ # block without needing direct access to the underlying
58
+ # {Ucode::Database}.
59
+ #
60
+ # @param codepoint [Integer]
61
+ # @return [String, nil]
62
+ def block_name_for(codepoint)
63
+ raise NotImplementedError
64
+ end
65
+
66
+ # Every assigned codepoint in the block, with tier + source
67
+ # attached when the reference carries provenance.
68
+ #
69
+ # @param block_id [String] verbatim Unicode block name
70
+ # (e.g. "Basic_Latin", "Greek_and_Coptic")
71
+ # @return [Array<Entry>] sorted by codepoint; empty for unknown
72
+ # block names or blocks with no assigned codepoints
73
+ def entries_for_block(block_id)
74
+ raise NotImplementedError
75
+ end
76
+
77
+ # Stable identifier for the reference, embedded in audit reports
78
+ # so consumers can detect drift. Examples:
79
+ #
80
+ # "ucd:17.0.0"
81
+ # "universal-set:17.0.0:abc12345"
82
+ #
83
+ # @return [String]
84
+ def reference_id
85
+ raise NotImplementedError
86
+ end
87
+
88
+ # Provenance rows for a list of codepoints, or nil when the
89
+ # reference carries no provenance (UCD-only). Returning nil
90
+ # (rather than an empty array) is the signal that the audit
91
+ # report should omit the `missing_codepoint_provenance` field
92
+ # entirely — preserving the legacy wire shape for UCD-only
93
+ # audits.
94
+ #
95
+ # @param codepoints [Enumerable<Integer>]
96
+ # @return [Array<Hash{Symbol=>Object}>, nil] one hash per
97
+ # codepoint with `:codepoint`, `:tier`, `:source` keys; or nil
98
+ def provenance_for(codepoints)
99
+ raise NotImplementedError
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Audit
5
+ # Computes a {Models::Audit::AuditDiff} between two AuditReports.
6
+ #
7
+ # Pure: no I/O, no font parsing. Both reports must already be built
8
+ # — the CLI's compare command loads them from disk or audits fresh
9
+ # fonts before invoking the differ.
10
+ #
11
+ # Comparison shape:
12
+ # - Scalar fields: one {Models::Audit::FieldChange} per differing
13
+ # field.
14
+ # - Codepoint coverage: {Models::Audit::CodepointSetDiff} built
15
+ # from the cmap range lists (expanded to integer sets for set
16
+ # arithmetic, then re-coalesced to ranges for output).
17
+ # - Structural inventories (features, scripts, blocks): simple
18
+ # array set-diffs. ucode drops the CLDR languages diff that
19
+ # fontisan carries (CLDR is out of scope here).
20
+ class Differ
21
+ # Scalar AuditReport fields compared field-by-field. Excludes
22
+ # generated_at / source_sha256 / source_file (per-report identity),
23
+ # codepoints / codepoint_ranges (handled via CodepointSetDiff),
24
+ # and nested models (surfaced via structural add/remove lists).
25
+ COMPARED_FIELDS = %i[
26
+ family_name subfamily_name full_name postscript_name version
27
+ font_revision weight_class width_class italic bold panose
28
+ total_codepoints total_glyphs
29
+ ].freeze
30
+
31
+ # @param left_report [Models::Audit::AuditReport]
32
+ # @param right_report [Models::Audit::AuditReport]
33
+ def initialize(left_report, right_report)
34
+ @left = left_report
35
+ @right = right_report
36
+ end
37
+
38
+ # @return [Models::Audit::AuditDiff]
39
+ def diff
40
+ Models::Audit::AuditDiff.new(
41
+ left_source: @left.source_file,
42
+ right_source: @right.source_file,
43
+ field_changes: field_changes,
44
+ codepoints: codepoint_diff,
45
+ added_features: set_diff(features(@right), features(@left)),
46
+ removed_features: set_diff(features(@left), features(@right)),
47
+ added_scripts: set_diff(scripts(@right), scripts(@left)),
48
+ removed_scripts: set_diff(scripts(@left), scripts(@right)),
49
+ added_blocks: set_diff(blocks(@right), blocks(@left)),
50
+ removed_blocks: set_diff(blocks(@left), blocks(@right)),
51
+ )
52
+ end
53
+
54
+ private
55
+
56
+ def field_changes
57
+ COMPARED_FIELDS.filter_map do |field|
58
+ left_val = @left.public_send(field)
59
+ right_val = @right.public_send(field)
60
+ next if left_val == right_val
61
+
62
+ Models::Audit::FieldChange.new(
63
+ field: field.to_s,
64
+ left: serialize_value(left_val),
65
+ right: serialize_value(right_val),
66
+ )
67
+ end
68
+ end
69
+
70
+ def codepoint_diff
71
+ left_set = codepoints_from_ranges(@left)
72
+ right_set = codepoints_from_ranges(@right)
73
+ added = right_set - left_set
74
+ removed = left_set - right_set
75
+ unchanged = left_set & right_set
76
+
77
+ Models::Audit::CodepointSetDiff.new(
78
+ added: CodepointRangeCoalescer.call(added.to_a),
79
+ removed: CodepointRangeCoalescer.call(removed.to_a),
80
+ added_count: added.size,
81
+ removed_count: removed.size,
82
+ unchanged_count: unchanged.size,
83
+ )
84
+ end
85
+
86
+ # Expand a report's compact codepoint range list into a Set<Integer>.
87
+ def codepoints_from_ranges(report)
88
+ ranges = report.codepoint_ranges || []
89
+ ranges.each_with_object(Set.new) do |range, set|
90
+ (range.first_cp..range.last_cp).each { |cp| set << cp }
91
+ end
92
+ end
93
+
94
+ def features(report)
95
+ Array(report.opentype_layout&.features)
96
+ end
97
+
98
+ # ucode's report carries ScriptSummary[] (structured), not String[].
99
+ # Diff on the script_code key — it's the stable identifier.
100
+ def scripts(report)
101
+ Array(report.scripts).map(&:script_code)
102
+ end
103
+
104
+ def blocks(report)
105
+ Array(report.blocks).map(&:name)
106
+ end
107
+
108
+ def set_diff(minuend, subtrahend)
109
+ (Array(minuend) - Array(subtrahend)).sort
110
+ end
111
+
112
+ def serialize_value(value)
113
+ case value
114
+ when nil then ""
115
+ when String, Integer, Float, true, false then value.to_s
116
+ else value.to_yaml
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/repo/atomic_writes"
6
+ require "ucode/audit/emitter/paths"
7
+
8
+ module Ucode
9
+ module Audit
10
+ module Emitter
11
+ # Writes `<face_dir>/blocks/<NAME>.json` — one file per touched
12
+ # block.
13
+ #
14
+ # The filename uses the block name verbatim (no slugifying) per
15
+ # `03-directory-output-spec.md` §"Block filename encoding". The
16
+ # only character that needs escaping is `/`, which Unicode block
17
+ # names never contain today.
18
+ #
19
+ # Each file is a single BlockSummary serialized via lutaml-model.
20
+ # The browser fetches these lazily when the user expands a block
21
+ # in the coverage map.
22
+ class BlockEmitter
23
+ include Ucode::Repo::AtomicWrites
24
+
25
+ # @param face_dir [String, Pathname]
26
+ # @param block [Models::Audit::BlockSummary]
27
+ # @return [Boolean] true if written, false if skipped
28
+ def emit(face_dir, block)
29
+ path = Paths.block_under(face_dir, encode_name(block.name))
30
+ write_atomic(path, to_pretty_json(serialize_block(block)))
31
+ end
32
+
33
+ private
34
+
35
+ # Spec: per-block `missing_codepoints` is always embedded even
36
+ # when empty. lutaml-model omits empty arrays by default, so we
37
+ # restore the key post-serialization.
38
+ def serialize_block(block)
39
+ block.to_hash.tap do |hash|
40
+ hash["missing_codepoints"] = block.missing_codepoints
41
+ end
42
+ end
43
+
44
+ # Unicode block names are filesystem-safe as-is (no slashes).
45
+ # This is a defensive guard.
46
+ def encode_name(name)
47
+ name.to_s.tr("/", "_")
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/repo/atomic_writes"
6
+ require "ucode/audit/emitter/paths"
7
+ require "ucode/models/audit/codepoint_detail"
8
+
9
+ module Ucode
10
+ module Audit
11
+ module Emitter
12
+ # Writes `<face_dir>/codepoints/<NAME>.json` — the verbose
13
+ # per-block codepoint detail list, emitted only in `--verbose`
14
+ # mode.
15
+ #
16
+ # For each touched block, walks the font's covered codepoints in
17
+ # that block and emits a {Models::Audit::CodepointDetail} per row,
18
+ # enriched with UCD metadata (name, gc, script, age) via the
19
+ # supplied {Ucode::Database}.
20
+ #
21
+ # Per-block chunking keeps each file under ~1MB even for CJK
22
+ # Extension J (~4,300 codepoints × ~200 bytes/detail ≈ 850KB).
23
+ #
24
+ # Glyph SVG paths are written as relative URIs so the browser can
25
+ # fetch each glyph on click. The `with_glyph_paths` flag controls
26
+ # whether to populate the path field — when false, the field is
27
+ # omitted entirely.
28
+ class CodepointEmitter
29
+ include Ucode::Repo::AtomicWrites
30
+
31
+ # @param face_dir [String, Pathname]
32
+ # @param block [Models::Audit::BlockSummary]
33
+ # @param database [Ucode::Database, nil] baseline lookup; when
34
+ # nil, UCD fields are omitted
35
+ # @param with_glyph_paths [Boolean] when true, each detail
36
+ # includes a relative `glyph_svg_path` linking into `glyphs/`
37
+ # @return [Boolean] true if written, false if skipped
38
+ def emit(face_dir, block, database: nil, with_glyph_paths: false)
39
+ path = Paths.codepoints_under(face_dir, encode_name(block.name))
40
+ payload = to_pretty_json(build_chunk(block, database, with_glyph_paths))
41
+ write_atomic(path, payload)
42
+ end
43
+
44
+ private
45
+
46
+ def build_chunk(block, database, with_glyph_paths)
47
+ {
48
+ "block_name" => block.name,
49
+ "first_cp" => block.first_cp,
50
+ "last_cp" => block.last_cp,
51
+ "codepoints" => build_details(block, database, with_glyph_paths),
52
+ }
53
+ end
54
+
55
+ def build_details(block, database, with_glyph_paths)
56
+ block.covered_codepoints.map do |cp|
57
+ build_detail(cp, block, database, with_glyph_paths)
58
+ end
59
+ end
60
+
61
+ def build_detail(codepoint, block, database, with_glyph_paths)
62
+ detail = Models::Audit::CodepointDetail.new(
63
+ codepoint: codepoint,
64
+ block_name: block.name,
65
+ )
66
+ enrich_from_baseline(detail, codepoint, database)
67
+ detail.glyph_svg_path = glyph_relative_path(codepoint) if with_glyph_paths
68
+ detail.to_hash.compact
69
+ end
70
+
71
+ def enrich_from_baseline(detail, codepoint, database)
72
+ return unless database
73
+
74
+ detail.script = database.lookup_script(codepoint)
75
+ end
76
+
77
+ def glyph_relative_path(codepoint)
78
+ "glyphs/#{format('U+%04X', codepoint)}.svg"
79
+ end
80
+
81
+ def encode_name(name)
82
+ name.to_s.tr("/", "_")
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/repo/atomic_writes"
6
+ require "ucode/audit/emitter/paths"
7
+
8
+ module Ucode
9
+ module Audit
10
+ module Emitter
11
+ # Writes the per-collection layout for TTC/OTC inputs.
12
+ #
13
+ # For a collection with N faces, produces:
14
+ #
15
+ # output/font_audit/<source_label>/
16
+ # ├── index.json # collection-level summary
17
+ # ├── 00-<face_ps>/index.json
18
+ # ├── 00-<face_ps>/blocks/…
19
+ # ├── 01-<face_ps>/index.json
20
+ # └── …
21
+ #
22
+ # Per-face chunks are delegated to {FaceDirectory} via the
23
+ # `emit_collection_face` hook; this class owns only the
24
+ # collection-level summary that points at each sibling face
25
+ # directory.
26
+ class CollectionEmitter
27
+ include Ucode::Repo::AtomicWrites
28
+
29
+ # @param output_root [String, Pathname]
30
+ # @param source_label [String] sanitized collection label
31
+ # @param reports [Array<Models::Audit::AuditReport>] one per face
32
+ # @param face_directory [FaceDirectory] per-face emitter
33
+ # @return [Array<String>] the per-face subdirectory names written
34
+ def emit(output_root, source_label, reports, face_directory:)
35
+ face_dirs = reports.each_with_index.map do |report, index|
36
+ face_directory.emit_collection_face(
37
+ source_label: source_label, face_index: index, report: report,
38
+ )
39
+ end
40
+
41
+ emit_collection_index(output_root, source_label, reports, face_dirs)
42
+ face_dirs
43
+ end
44
+
45
+ private
46
+
47
+ def emit_collection_index(output_root, source_label, reports, face_dirs)
48
+ return if reports.empty?
49
+
50
+ payload = build_collection_index(reports, face_dirs)
51
+ path = Paths.face_index_path(output_root, source_label)
52
+ write_atomic(path, to_pretty_json(payload))
53
+ end
54
+
55
+ def build_collection_index(reports, face_dirs)
56
+ {
57
+ "num_fonts_in_source" => reports.first&.num_fonts_in_source || reports.size,
58
+ "source_file" => reports.first&.source_file,
59
+ "source_sha256" => reports.first&.source_sha256,
60
+ "faces" => face_cards(reports, face_dirs),
61
+ }.compact
62
+ end
63
+
64
+ def face_cards(reports, face_dirs)
65
+ reports.each_with_index.map do |report, index|
66
+ {
67
+ "font_index" => index,
68
+ "postscript_name" => report.postscript_name,
69
+ "family_name" => report.family_name,
70
+ "weight_class" => report.weight_class,
71
+ "total_codepoints" => report.total_codepoints,
72
+ "total_glyphs" => report.total_glyphs,
73
+ "directory" => face_dirs[index],
74
+ }
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end