ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/index"
4
+ require "ucode/range_entry"
5
+
6
+ module Ucode
7
+ # Streaming accumulator that turns a sequence of CodePoint records
8
+ # into per-property sorted + coalesced Index instances.
9
+ #
10
+ # Lifecycle:
11
+ #
12
+ # builder = IndexBuilder.new
13
+ # Coordinator.new.each_codepoint(...) { |cp| builder.add(cp) }
14
+ # builder.blocks_index # => Index
15
+ # builder.scripts_index # => Index
16
+ #
17
+ # The Coordinator yields cps in ascending cp order, so the per-name
18
+ # cp arrays are already sorted. The final pass coalesces adjacent
19
+ # cps (gap of 1) into RangeEntry runs.
20
+ #
21
+ # **Coalescing caveat**: ranges are derived from ASSIGNED cps only.
22
+ # If a block has unassigned cps in the middle, the resulting range
23
+ # will fragment around them. For lookup_block(cp) on an assigned cp,
24
+ # the answer is correct. For an unassigned cp, the lookup returns
25
+ # nil. This is a deliberate trade-off for streaming memory bounds —
26
+ # the canonical block ranges are in `Coordinator#indices.blocks`,
27
+ # not in the streamed cps.
28
+ class IndexBuilder
29
+ def initialize
30
+ @cps_by_block = Hash.new { |h, k| h[k] = [] }
31
+ @cps_by_script = Hash.new { |h, k| h[k] = [] }
32
+ end
33
+
34
+ # Fold one CodePoint into the per-property accumulators. No-ops if
35
+ # the cp has no block_id / script_code (e.g. an unassigned cp
36
+ # surfaced through UnicodeData, or a cp outside any fixture range).
37
+ # @param cp [Ucode::Models::CodePoint]
38
+ # @return [void]
39
+ def add(cp)
40
+ push_named(@cps_by_block, cp.block_id, cp.cp)
41
+ push_named(@cps_by_script, cp.script_code, cp.cp)
42
+ end
43
+
44
+ # @return [Index]
45
+ def blocks_index
46
+ Index.new(to_entries(@cps_by_block))
47
+ end
48
+
49
+ # @return [Index]
50
+ def scripts_index
51
+ Index.new(to_entries(@cps_by_script))
52
+ end
53
+
54
+ private
55
+
56
+ def push_named(target, name, cp)
57
+ return if name.nil? || name.empty?
58
+
59
+ target[name] << cp
60
+ end
61
+
62
+ # Flatten {name => [cp, cp, ...]} into Array<RangeEntry>, sorted
63
+ # by first_cp. Within each name, adjacent cps (gap == 1) coalesce.
64
+ def to_entries(cps_by_name)
65
+ cps_by_name.flat_map do |name, cps|
66
+ coalesce(cps).map { |first, last| RangeEntry.new(first, last, name) }
67
+ end
68
+ end
69
+
70
+ # Coalesces a sorted cp list into [first, last] runs. cps already
71
+ # arrive sorted (Coordinator yields in ascending cp order), but
72
+ # we sort defensively in case the stream was reordered.
73
+ def coalesce(cps)
74
+ return [] if cps.empty?
75
+
76
+ sorted = cps.sort
77
+ runs = []
78
+ first = sorted[0]
79
+ last = sorted[0]
80
+
81
+ sorted[1..].each do |cp|
82
+ if cp == last + 1
83
+ last = cp
84
+ else
85
+ runs << [first, last]
86
+ first = cp
87
+ last = cp
88
+ end
89
+ end
90
+ runs << [first, last]
91
+ runs
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # One fvar axis descriptor on an {AuditReport}.
9
+ #
10
+ # `min_value` / `default_value` / `max_value` are used (rather than
11
+ # `min` / `default` / `max`) to avoid colliding with Ruby's built-in
12
+ # `default` method on classes.
13
+ class AuditAxis < Lutaml::Model::Serializable
14
+ attribute :tag, :string
15
+ attribute :min_value, :float
16
+ attribute :default_value, :float
17
+ attribute :max_value, :float
18
+ attribute :name, :string
19
+
20
+ key_value do
21
+ map "tag", to: :tag
22
+ map "min_value", to: :min_value
23
+ map "default_value", to: :default_value
24
+ map "max_value", to: :max_value
25
+ map "name", to: :name
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Structural diff between two {AuditReport}s.
9
+ #
10
+ # `left_source`/`right_source` are the original source_file paths
11
+ # (or report paths) so a consumer reading the diff alone can locate
12
+ # the inputs.
13
+ #
14
+ # `field_changes` lists scalar fields whose values changed.
15
+ # `codepoints` is the cmap delta ({CodepointSetDiff}).
16
+ # The remaining fields are array set-diffs over the report's
17
+ # structural inventory: OpenType features, scripts, UCD blocks.
18
+ # Each is split into `added_*` (in right, not left) and
19
+ # `removed_*` (in left, not right).
20
+ #
21
+ # ucode delta vs fontisan: drops `added_languages` / `removed_languages`
22
+ # (CLDR is out of scope).
23
+ class AuditDiff < Lutaml::Model::Serializable
24
+ attribute :left_source, :string
25
+ attribute :right_source, :string
26
+ attribute :field_changes, FieldChange, collection: true, default: -> { [] }
27
+ attribute :codepoints, CodepointSetDiff
28
+ attribute :added_features, :string, collection: true, default: -> { [] }
29
+ attribute :removed_features, :string, collection: true, default: -> { [] }
30
+ attribute :added_scripts, :string, collection: true, default: -> { [] }
31
+ attribute :removed_scripts, :string, collection: true, default: -> { [] }
32
+ attribute :added_blocks, :string, collection: true, default: -> { [] }
33
+ attribute :removed_blocks, :string, collection: true, default: -> { [] }
34
+
35
+ key_value do
36
+ map "left_source", to: :left_source
37
+ map "right_source", to: :right_source
38
+ map "field_changes", to: :field_changes
39
+ map "codepoints", to: :codepoints
40
+ map "added_features", to: :added_features
41
+ map "removed_features", to: :removed_features
42
+ map "added_scripts", to: :added_scripts
43
+ map "removed_scripts", to: :removed_scripts
44
+ map "added_blocks", to: :added_blocks
45
+ map "removed_blocks", to: :removed_blocks
46
+ end
47
+
48
+ # True when nothing differs. Useful for the text formatter.
49
+ #
50
+ # @return [Boolean]
51
+ def empty?
52
+ added_codepoints.zero? && removed_codepoints.zero? &&
53
+ all_collections_empty?(
54
+ field_changes,
55
+ added_features, removed_features,
56
+ added_scripts, removed_scripts,
57
+ added_blocks, removed_blocks
58
+ )
59
+ end
60
+
61
+ def added_codepoints
62
+ codepoints&.added_count || 0
63
+ end
64
+
65
+ def removed_codepoints
66
+ codepoints&.removed_count || 0
67
+ end
68
+
69
+ private
70
+
71
+ def all_collections_empty?(*collections)
72
+ collections.all? { |c| c.nil? || c.empty? }
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Complete font audit report for a single face.
9
+ #
10
+ # Self-describing: one face per file. Carries source provenance
11
+ # (`source_file`, `source_sha256`, `font_index`, `num_fonts_in_source`)
12
+ # so a consumer reading a single face report knows whether the
13
+ # source was a standalone font or a collection face, and can locate
14
+ # siblings via the source hash.
15
+ #
16
+ # The model is passive — no font-parsing logic lives here. The
17
+ # AuditCommand + Extractors populate every field.
18
+ #
19
+ # ucode deltas vs fontisan's AuditReport:
20
+ #
21
+ # - Drops CLDR (`cldr_version`, `language_coverage`).
22
+ # - Renames `fontisan_version` → `ucode_version`.
23
+ # - Replaces `ucd_version: String` with `baseline: Baseline` (richer
24
+ # provenance + pairs with the resolved UCD database).
25
+ # - Replaces `unicode_scripts: String[]` with `scripts: ScriptSummary[]`
26
+ # (structured per-script coverage).
27
+ # - Replaces `blocks: AuditBlock` with `blocks: BlockSummary` (richer
28
+ # per-block status + plane tagging).
29
+ # - Adds `plane_summaries` (per-plane rollup).
30
+ # - Adds `discrepancies` (cheap audit signals).
31
+ class AuditReport < Lutaml::Model::Serializable
32
+ # --- Provenance ---
33
+ attribute :generated_at, :string
34
+ attribute :ucode_version, :string
35
+ attribute :source_file, :string
36
+ attribute :source_sha256, :string
37
+ attribute :source_format, :string
38
+
39
+ # --- Source layout ---
40
+ attribute :font_index, :integer
41
+ attribute :num_fonts_in_source, :integer
42
+
43
+ # --- Identity (name table) ---
44
+ attribute :family_name, :string
45
+ attribute :subfamily_name, :string
46
+ attribute :full_name, :string
47
+ attribute :postscript_name, :string
48
+ attribute :version, :string
49
+ attribute :font_revision, :float
50
+
51
+ # --- Style (OS/2 + head) ---
52
+ attribute :weight_class, :integer
53
+ attribute :width_class, :integer
54
+ attribute :italic, Lutaml::Model::Type::Boolean
55
+ attribute :bold, Lutaml::Model::Type::Boolean
56
+ attribute :panose, :string
57
+
58
+ # --- Coverage ---
59
+ attribute :total_codepoints, :integer
60
+ attribute :total_glyphs, :integer
61
+ attribute :cmap_subtables, :integer, collection: true, default: -> { [] }
62
+ attribute :codepoint_ranges, CodepointRange, collection: true, default: -> { [] }
63
+ attribute :codepoints, :string, collection: true, default: -> { [] }
64
+ # --- Aggregations (driven by ucode's own UCD, not ucd.all.flat.zip) ---
65
+ attribute :baseline, Baseline
66
+ attribute :blocks, BlockSummary, collection: true, default: -> { [] }
67
+ attribute :scripts, ScriptSummary, collection: true, default: -> { [] }
68
+ attribute :plane_summaries, PlaneSummary, collection: true, default: -> { [] }
69
+
70
+ # --- Optional deep tables (nil for Type 1) ---
71
+ attribute :licensing, Licensing
72
+ attribute :metrics, Metrics
73
+ attribute :hinting, Hinting
74
+ attribute :color_capabilities, ColorCapabilities
75
+ attribute :variation, VariationDetail
76
+ attribute :opentype_layout, OpenTypeLayout
77
+
78
+ # --- Audit signals ---
79
+ attribute :discrepancies, Discrepancy, collection: true, default: -> { [] }
80
+ attribute :warning, :string
81
+
82
+ key_value do
83
+ # Provenance
84
+ map "generated_at", to: :generated_at
85
+ map "ucode_version", to: :ucode_version
86
+ map "source_file", to: :source_file
87
+ map "source_sha256", to: :source_sha256
88
+ map "source_format", to: :source_format
89
+
90
+ # Source layout
91
+ map "font_index", to: :font_index
92
+ map "num_fonts_in_source", to: :num_fonts_in_source
93
+
94
+ # Identity
95
+ map "family_name", to: :family_name
96
+ map "subfamily_name", to: :subfamily_name
97
+ map "full_name", to: :full_name
98
+ map "postscript_name", to: :postscript_name
99
+ map "version", to: :version
100
+ map "font_revision", to: :font_revision
101
+
102
+ # Style
103
+ map "weight_class", to: :weight_class
104
+ map "width_class", to: :width_class
105
+ map "italic", to: :italic
106
+ map "bold", to: :bold
107
+ map "panose", to: :panose
108
+
109
+ # Coverage
110
+ map "total_codepoints", to: :total_codepoints
111
+ map "total_glyphs", to: :total_glyphs
112
+ map "cmap_subtables", to: :cmap_subtables
113
+ map "codepoint_ranges", to: :codepoint_ranges
114
+ map "codepoints", to: :codepoints
115
+
116
+ # Aggregations
117
+ map "baseline", to: :baseline
118
+ map "blocks", to: :blocks
119
+ map "scripts", to: :scripts
120
+ map "plane_summaries", to: :plane_summaries
121
+
122
+ # Deep tables
123
+ map "licensing", to: :licensing
124
+ map "metrics", to: :metrics
125
+ map "hinting", to: :hinting
126
+ map "color_capabilities", to: :color_capabilities
127
+ map "variation", to: :variation
128
+ map "opentype_layout", to: :opentype_layout
129
+
130
+ # Audit signals
131
+ map "discrepancies", to: :discrepancies
132
+ map "warning", to: :warning
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Metadata about the UCD baseline that an {AuditReport} was
9
+ # compared against.
10
+ #
11
+ # Replaces fontisan's bare `ucd_version: String` field. Carries
12
+ # enough provenance that a consumer reading the report knows which
13
+ # UCD build, which ucode/fontisan versions produced the baseline,
14
+ # and when.
15
+ class Baseline < Lutaml::Model::Serializable
16
+ attribute :unicode_version, :string
17
+ attribute :ucode_version, :string
18
+ attribute :fontisan_version, :string
19
+ attribute :source, :string
20
+ attribute :generated_at, :string
21
+
22
+ key_value do
23
+ map "unicode_version", to: :unicode_version
24
+ map "ucode_version", to: :ucode_version
25
+ map "fontisan_version", to: :fontisan_version
26
+ map "source", to: :source
27
+ map "generated_at", to: :generated_at
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # One Unicode block coverage row on an {AuditReport}.
9
+ #
10
+ # Replaces fontisan's `AuditBlock`. Carries per-block coverage
11
+ # computed against ucode's own UCD baseline (not the legacy
12
+ # ucd.all.flat.zip), plus an explicit `status` enum so consumers
13
+ # can filter/sort without recomputing from raw counts.
14
+ class BlockSummary < Lutaml::Model::Serializable
15
+ STATUS_COMPLETE = "COMPLETE"
16
+ STATUS_PARTIAL = "PARTIAL"
17
+ STATUS_UNCOVERED_ASSIGNED = "UNCOVERED_ASSIGNED"
18
+ STATUS_NO_ASSIGNED_IN_BLOCK = "NO_ASSIGNED_IN_BLOCK"
19
+ STATUS_OUTSIDE_BASELINE = "OUTSIDE_BASELINE"
20
+
21
+ attribute :name, :string
22
+ attribute :first_cp, :integer
23
+ attribute :last_cp, :integer
24
+ attribute :range, :string
25
+ attribute :plane, :integer
26
+ attribute :total_assigned, :integer
27
+ attribute :covered_count, :integer
28
+ attribute :missing_count, :integer
29
+ attribute :coverage_percent, :float
30
+ attribute :status, :string
31
+ attribute :missing_codepoints, :integer, collection: true, default: -> { [] }
32
+ attribute :covered_codepoints, :integer, collection: true, default: -> { [] }
33
+
34
+ key_value do
35
+ map "name", to: :name
36
+ map "first_cp", to: :first_cp
37
+ map "last_cp", to: :last_cp
38
+ map "range", to: :range
39
+ map "plane", to: :plane
40
+ map "total_assigned", to: :total_assigned
41
+ map "covered_count", to: :covered_count
42
+ map "missing_count", to: :missing_count
43
+ map "coverage_percent", to: :coverage_percent
44
+ map "status", to: :status
45
+ map "missing_codepoints", to: :missing_codepoints
46
+ map "covered_codepoints", to: :covered_codepoints
47
+ end
48
+
49
+ # Derive the canonical status string for a block given its
50
+ # counts. Centralized so the Aggregations extractor and any
51
+ # downstream consumer compute identically.
52
+ #
53
+ # @param covered_count [Integer]
54
+ # @param total_assigned [Integer]
55
+ # @param in_baseline [Boolean] false if the block exists in the
56
+ # font's cmap but not in the resolved baseline (e.g. PUA blocks
57
+ # or a newer Unicode version than ucode knows about).
58
+ # @return [String] one of the STATUS_* constants
59
+ def self.derive_status(covered_count:, total_assigned:, in_baseline: true)
60
+ return STATUS_OUTSIDE_BASELINE unless in_baseline
61
+ return STATUS_NO_ASSIGNED_IN_BLOCK if total_assigned.zero?
62
+
63
+ case covered_count
64
+ when total_assigned then STATUS_COMPLETE
65
+ when 0 then STATUS_UNCOVERED_ASSIGNED
66
+ else STATUS_PARTIAL
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Per-codepoint detail row, emitted only in `--verbose` mode.
9
+ #
10
+ # Lives in a separate `codepoints.json` file under the directory
11
+ # emitter (TODO 13) so the main `audit.json` stays compact. Each
12
+ # row pairs UCD metadata (name, gc, script, age) with the font's
13
+ # glyph id and (optionally) a relative path to the rendered SVG.
14
+ class CodepointDetail < Lutaml::Model::Serializable
15
+ attribute :codepoint, :integer
16
+ attribute :name, :string
17
+ attribute :general_category, :string
18
+ attribute :script, :string
19
+ attribute :script_extensions, :string, collection: true, default: -> { [] }
20
+ attribute :block_name, :string
21
+ attribute :age, :string
22
+ attribute :glyph_id, :integer
23
+ attribute :glyph_svg_path, :string
24
+
25
+ key_value do
26
+ map "codepoint", to: :codepoint
27
+ map "name", to: :name
28
+ map "general_category", to: :general_category
29
+ map "script", to: :script
30
+ map "script_extensions", to: :script_extensions
31
+ map "block_name", to: :block_name
32
+ map "age", to: :age
33
+ map "glyph_id", to: :glyph_id
34
+ map "glyph_svg_path", to: :glyph_svg_path
35
+ end
36
+
37
+ # "U+XXXX" form for human display. Not serialized.
38
+ # @return [String]
39
+ def cp_id
40
+ format("U+%04X", codepoint)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # A contiguous run of covered codepoints.
9
+ #
10
+ # `first_cp`/`last_cp` are inclusive integer endpoints. A single-codepoint
11
+ # "range" has first_cp == last_cp and renders as `U+XXXX` (no dash).
12
+ #
13
+ # Produced by the cmap coverage coalescer from the font's cmap. The
14
+ # range view replaces a flat per-codepoint list — a 60k-codepoint CJK
15
+ # font produces tens of ranges rather than 60k strings.
16
+ class CodepointRange < Lutaml::Model::Serializable
17
+ attribute :first_cp, :integer
18
+ attribute :last_cp, :integer
19
+
20
+ key_value do
21
+ map "first_cp", to: :first_cp
22
+ map "last_cp", to: :last_cp
23
+ end
24
+
25
+ # Human-readable form: `U+XXXX` for single codepoints,
26
+ # `U+XXXX-U+XXXX` for true ranges.
27
+ #
28
+ # @return [String]
29
+ def to_s
30
+ if first_cp == last_cp
31
+ format("U+%04<cp>X", cp: first_cp)
32
+ else
33
+ format("U+%04<first>X-U+%04<last>X", first: first_cp, last: last_cp)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Diff between two cmap codepoint sets.
9
+ #
10
+ # `added`/`removed` are compact range lists ({CodepointRange}) so a
11
+ # large delta (e.g. CJK extension added) renders as a handful of
12
+ # ranges rather than thousands of codepoints.
13
+ #
14
+ # `unchanged_count` is the intersection size — useful as a sanity
15
+ # check that the two reports share enough coverage to be meaningfully
16
+ # comparable.
17
+ class CodepointSetDiff < Lutaml::Model::Serializable
18
+ attribute :added, CodepointRange, collection: true, default: -> { [] }
19
+ attribute :removed, CodepointRange, collection: true, default: -> { [] }
20
+ attribute :added_count, :integer
21
+ attribute :removed_count, :integer
22
+ attribute :unchanged_count, :integer
23
+
24
+ key_value do
25
+ map "added", to: :added
26
+ map "removed", to: :removed
27
+ map "added_count", to: :added_count
28
+ map "removed_count", to: :removed_count
29
+ map "unchanged_count", to: :unchanged_count
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Color-font capability summary for one face.
9
+ #
10
+ # Answers: is this a color font, and if so, which format(s)?
11
+ # Modern color font formats are mutually exclusive in design but a
12
+ # single face can carry more than one (e.g. NotoColorEmoji ships
13
+ # COLR + CBDT + SVG so legacy and modern stacks all render).
14
+ #
15
+ # `color_formats` is derived at extraction time so consumers read a
16
+ # flat string list instead of re-deriving from the boolean lattice.
17
+ # Empty array ⇒ no color support.
18
+ class ColorCapabilities < Lutaml::Model::Serializable
19
+ FORMAT_COLR_V0 = "colr_v0"
20
+ FORMAT_COLR_V1 = "colr_v1"
21
+ FORMAT_CPAL = "cpal"
22
+ FORMAT_SVG = "svg"
23
+ FORMAT_CBDT = "cbdt"
24
+ FORMAT_SBIX = "sbix"
25
+
26
+ # COLR (vector color glyphs).
27
+ attribute :has_colr, Lutaml::Model::Type::Boolean
28
+ attribute :colr_version, :integer
29
+ attribute :colr_base_glyph_count, :integer
30
+ attribute :colr_layer_count, :integer
31
+
32
+ # CPAL (color palette).
33
+ attribute :has_cpal, Lutaml::Model::Type::Boolean
34
+ attribute :cpal_palette_count, :integer
35
+ attribute :cpal_color_count, :integer
36
+
37
+ # SVG-in-OpenType.
38
+ attribute :has_svg, Lutaml::Model::Type::Boolean
39
+ attribute :svg_document_count, :integer
40
+
41
+ # CBDT/CBLC (color bitmaps — paired tables).
42
+ attribute :has_cbdt, Lutaml::Model::Type::Boolean
43
+ attribute :has_cblc, Lutaml::Model::Type::Boolean
44
+ # Strike count comes from the paired CBLC locator table.
45
+ attribute :cbdt_strike_count, :integer
46
+
47
+ # sbix (Apple color bitmaps).
48
+ attribute :has_sbix, Lutaml::Model::Type::Boolean
49
+ attribute :sbix_strike_count, :integer
50
+
51
+ # Derived: ordered list of active color format tags.
52
+ attribute :color_formats, :string, collection: true, default: -> { [] }
53
+
54
+ key_value do
55
+ map "has_colr", to: :has_colr
56
+ map "colr_version", to: :colr_version
57
+ map "colr_base_glyph_count", to: :colr_base_glyph_count
58
+ map "colr_layer_count", to: :colr_layer_count
59
+ map "has_cpal", to: :has_cpal
60
+ map "cpal_palette_count", to: :cpal_palette_count
61
+ map "cpal_color_count", to: :cpal_color_count
62
+ map "has_svg", to: :has_svg
63
+ map "svg_document_count", to: :svg_document_count
64
+ map "has_cbdt", to: :has_cbdt
65
+ map "has_cblc", to: :has_cblc
66
+ map "cbdt_strike_count", to: :cbdt_strike_count
67
+ map "has_sbix", to: :has_sbix
68
+ map "sbix_strike_count", to: :sbix_strike_count
69
+ map "color_formats", to: :color_formats
70
+ end
71
+
72
+ # Derive the canonical color_formats list from individual flags.
73
+ # COLR v1 takes precedence over v0 — a v1 table can serve both.
74
+ #
75
+ # @return [Array<String>]
76
+ def self.derive_formats(has_colr:, colr_version:, has_cpal:,
77
+ has_svg:, has_cbdt:, has_sbix:)
78
+ [].tap do |arr|
79
+ if has_colr
80
+ arr << (colr_version == 1 ? FORMAT_COLR_V1 : FORMAT_COLR_V0)
81
+ end
82
+ arr << FORMAT_CPAL if has_cpal
83
+ arr << FORMAT_SVG if has_svg
84
+ arr << FORMAT_CBDT if has_cbdt
85
+ arr << FORMAT_SBIX if has_sbix
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end