ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fontisan"
4
+
5
+ module Ucode
6
+ module Audit
7
+ module Extractors
8
+ # OpenType layout summary: union of GSUB + GPOS scripts and features,
9
+ # plus a per-script breakdown preserving which feature belongs to
10
+ # which script under which table.
11
+ #
12
+ # Returned fields:
13
+ # opentype_layout: Models::Audit::OpenTypeLayout, or nil for
14
+ # Type 1
15
+ #
16
+ # Owned here (MECE split from Aggregations, which is UCD-only).
17
+ class OpenTypeLayout < Base
18
+ # @param context [Ucode::Audit::Context]
19
+ # @return [Hash{Symbol=>Object}]
20
+ def extract(context)
21
+ font = context.font
22
+ return { opentype_layout: nil } unless sfnt?(font)
23
+
24
+ gsub_scripts = scripts_in(font, "GSUB")
25
+ gpos_scripts = scripts_in(font, "GPOS")
26
+ all_scripts = (gsub_scripts + gpos_scripts).uniq.sort
27
+
28
+ by_script = all_scripts.map do |tag|
29
+ Models::Audit::ScriptFeatures.new(
30
+ script: tag,
31
+ gsub_features: features_for(font, "GSUB", tag),
32
+ gpos_features: features_for(font, "GPOS", tag),
33
+ )
34
+ end
35
+
36
+ { opentype_layout: Models::Audit::OpenTypeLayout.new(
37
+ scripts: all_scripts,
38
+ features: aggregate_features(by_script),
39
+ by_script: by_script,
40
+ has_gsub: font.has_table?("GSUB"),
41
+ has_gpos: font.has_table?("GPOS"),
42
+ ) }
43
+ end
44
+
45
+ private
46
+
47
+ def sfnt?(font)
48
+ font.is_a?(Fontisan::SfntFont)
49
+ end
50
+
51
+ def scripts_in(font, tag)
52
+ return [] unless font.has_table?(tag)
53
+
54
+ font.table(tag).scripts
55
+ end
56
+
57
+ def features_for(font, tag, script)
58
+ return [] unless font.has_table?(tag)
59
+
60
+ font.table(tag).features(script_tag: script).sort
61
+ end
62
+
63
+ def aggregate_features(by_script)
64
+ gsub = by_script.flat_map(&:gsub_features)
65
+ gpos = by_script.flat_map(&:gpos_features)
66
+ (gsub + gpos).uniq.sort
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require "time"
5
+
6
+ module Ucode
7
+ module Audit
8
+ module Extractors
9
+ # Provenance fields: who generated this report, when, from what.
10
+ #
11
+ # Returned fields:
12
+ # generated_at, ucode_version, source_file, source_sha256,
13
+ # source_format, font_index, num_fonts_in_source
14
+ #
15
+ # ucode delta vs fontisan: `fontisan_version` is renamed to
16
+ # `ucode_version` and reads from `Ucode::VERSION`.
17
+ class Provenance < Base
18
+ # @param context [Ucode::Audit::Context]
19
+ # @return [Hash{Symbol=>Object}]
20
+ def extract(context)
21
+ {
22
+ generated_at: Time.now.utc.iso8601,
23
+ ucode_version: Ucode::VERSION,
24
+ source_file: File.expand_path(context.font_path),
25
+ source_sha256: Digest::SHA256.file(context.font_path).hexdigest,
26
+ source_format: context.source_format,
27
+ font_index: context.font_index,
28
+ num_fonts_in_source: context.num_fonts_in_source,
29
+ }
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Audit
5
+ module Extractors
6
+ # Style fields: weight, width, italic/bold flags, Panose family
7
+ # classification.
8
+ #
9
+ # Returned fields:
10
+ # weight_class, width_class, italic, bold, panose
11
+ #
12
+ # ucode delta vs fontisan: the standalone `StyleExtractor` class is
13
+ # NOT ported. The OS/2 + head interpretation rules live inline here.
14
+ #
15
+ # MECE: this extractor owns OS/2 + head. fvar-derived fields (axes,
16
+ # named instances, variable presence) live on the VariationDetail
17
+ # extractor (TODO 09).
18
+ #
19
+ # Boundary: uses only `font.has_table?(tag)` and `font.table(tag)`.
20
+ # No class-specific branching — any SFNT-compatible font works.
21
+ class Style < Base
22
+ FS_SELECTION_ITALIC_BIT = 0
23
+ MAC_STYLE_BOLD_BIT = 0
24
+ private_constant :FS_SELECTION_ITALIC_BIT, :MAC_STYLE_BOLD_BIT
25
+
26
+ # @param context [Ucode::Audit::Context]
27
+ # @return [Hash{Symbol=>Object}]
28
+ def extract(context)
29
+ font = context.font
30
+ {
31
+ weight_class: weight_class(font),
32
+ width_class: width_class(font),
33
+ italic: italic(font),
34
+ bold: bold(font),
35
+ panose: panose(font),
36
+ }
37
+ end
38
+
39
+ private
40
+
41
+ def weight_class(font)
42
+ os2(font)&.us_weight_class&.to_i
43
+ end
44
+
45
+ def width_class(font)
46
+ os2(font)&.us_width_class&.to_i
47
+ end
48
+
49
+ # OS/2.fsSelection bit 0 (ITALIC).
50
+ def italic(font)
51
+ table = os2(font)
52
+ return nil if table.nil?
53
+
54
+ (table.fs_selection.to_i & (1 << FS_SELECTION_ITALIC_BIT)).nonzero?
55
+ end
56
+
57
+ # head.macStyle bit 0 (BOLD). Per OpenType convention, bold is
58
+ # read from head, not OS/2.
59
+ def bold(font)
60
+ table = head(font)
61
+ return nil if table.nil?
62
+
63
+ (table.mac_style.to_i & (1 << MAC_STYLE_BOLD_BIT)).nonzero?
64
+ end
65
+
66
+ # OS/2.panose as a space-joined 10-digit string,
67
+ # e.g. "2 0 5 3 0 0 0 0 0 0". nil when no OS/2 table.
68
+ def panose(font)
69
+ bytes = os2(font)&.panose
70
+ return nil if bytes.nil?
71
+
72
+ bytes = bytes.to_a
73
+ return nil if bytes.empty?
74
+
75
+ bytes.join(" ")
76
+ end
77
+
78
+ def os2(font)
79
+ font.has_table?("OS/2") ? font.table("OS/2") : nil
80
+ end
81
+
82
+ def head(font)
83
+ font.has_table?("head") ? font.table("head") : nil
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fontisan"
4
+
5
+ module Ucode
6
+ module Audit
7
+ module Extractors
8
+ # Variable-font detail: fvar axes + named instances + presence flags
9
+ # for every variation side-table (avar, cvar, HVAR, VVAR, MVAR, gvar).
10
+ #
11
+ # Returned fields:
12
+ # variation: Models::Audit::VariationDetail, or nil for non-variable
13
+ # faces and Type 1 fonts
14
+ #
15
+ # A face is considered variable iff the fvar table is present. CFF2
16
+ # outlines without fvar are not "variable" by this definition (they
17
+ # may carry variation data but no user-facing axes).
18
+ class VariationDetail < Base
19
+ # @param context [Ucode::Audit::Context]
20
+ # @return [Hash{Symbol=>Object}]
21
+ def extract(context)
22
+ font = context.font
23
+ return { variation: nil } unless variable?(font)
24
+
25
+ fvar = font.table("fvar")
26
+ return { variation: nil } unless fvar
27
+
28
+ name_table = font.has_table?("name") ? font.table("name") : nil
29
+ axis_tags = axis_tags_from(fvar)
30
+
31
+ { variation: Models::Audit::VariationDetail.new(
32
+ axes: build_axes(name_table, fvar),
33
+ named_instances: build_instances(name_table, fvar, axis_tags),
34
+ has_avar: font.has_table?("avar"),
35
+ has_cvar: font.has_table?("cvar"),
36
+ has_hvar: font.has_table?("HVAR"),
37
+ has_vvar: font.has_table?("VVAR"),
38
+ has_mvar: font.has_table?("MVAR"),
39
+ has_gvar: font.has_table?("gvar"),
40
+ ) }
41
+ end
42
+
43
+ private
44
+
45
+ def variable?(font)
46
+ font.is_a?(Fontisan::SfntFont) && font.has_table?("fvar")
47
+ end
48
+
49
+ def build_axes(name_table, fvar)
50
+ return [] unless fvar.axes
51
+
52
+ fvar.axes.map do |axis|
53
+ Models::Audit::AuditAxis.new(
54
+ tag: axis.axis_tag,
55
+ min_value: axis.min_value,
56
+ default_value: axis.default_value,
57
+ max_value: axis.max_value,
58
+ name: english_name(name_table, axis.axis_name_id),
59
+ )
60
+ end
61
+ end
62
+
63
+ def build_instances(name_table, fvar, axis_tags)
64
+ instances = fvar.instances
65
+ return [] unless instances
66
+
67
+ instances.map do |instance|
68
+ build_instance(name_table, instance, axis_tags)
69
+ end
70
+ end
71
+
72
+ def build_instance(name_table, instance, axis_tags)
73
+ subfamily_name = english_name(name_table, instance[:name_id])
74
+ ps_name_id = instance[:postscript_name_id]
75
+ ps_name = ps_name_id ? english_name(name_table, ps_name_id) : nil
76
+ coords = Models::Audit::NamedInstance.format_coordinates(
77
+ axis_tags, instance[:coordinates]
78
+ )
79
+
80
+ Models::Audit::NamedInstance.new(
81
+ subfamily_name: subfamily_name,
82
+ postscript_name: ps_name,
83
+ coordinates: coords,
84
+ )
85
+ end
86
+
87
+ def english_name(name_table, name_id)
88
+ return nil unless name_table && name_id
89
+
90
+ name_table.english_name(name_id)
91
+ end
92
+
93
+ def axis_tags_from(fvar)
94
+ return [] unless fvar.axes
95
+
96
+ fvar.axes.map(&:axis_tag)
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Autoload hub for the Ucode::Audit::Extractors namespace.
4
+ #
5
+ # Each extractor is a small MECE class with a single `#extract(context)`
6
+ # method returning a hash of AuditReport fields. The Audit::Registry
7
+ # declares the ordered list.
8
+ module Ucode
9
+ module Audit
10
+ module Extractors
11
+ # Cheap extractors (TODO 08) — brief-mode eligible.
12
+ autoload :Base, "ucode/audit/extractors/base"
13
+ autoload :Provenance, "ucode/audit/extractors/provenance"
14
+ autoload :Identity, "ucode/audit/extractors/identity"
15
+ autoload :Style, "ucode/audit/extractors/style"
16
+ autoload :Licensing, "ucode/audit/extractors/licensing"
17
+ autoload :Coverage, "ucode/audit/extractors/coverage"
18
+
19
+ # Expensive extractors (TODO 09) — full-mode only.
20
+ autoload :Metrics, "ucode/audit/extractors/metrics"
21
+ autoload :Hinting, "ucode/audit/extractors/hinting"
22
+ autoload :ColorCapabilities, "ucode/audit/extractors/color_capabilities"
23
+ autoload :VariationDetail, "ucode/audit/extractors/variation_detail"
24
+ autoload :OpenTypeLayout, "ucode/audit/extractors/opentype_layout"
25
+
26
+ # Aggregations (TODO 10) — full-mode only. Driven by ucode's own
27
+ # UCD baseline, so it depends on baseline resolution succeeding.
28
+ autoload :Aggregations, "ucode/audit/extractors/aggregations"
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Audit
5
+ # Rolls up {Models::Audit::BlockSummary}[] into one
6
+ # {Models::Audit::PlaneSummary} per Unicode plane.
7
+ #
8
+ # Pure transformation: input is BlockSummary[], output is
9
+ # PlaneSummary[] sorted by plane number. No I/O, no Database
10
+ # access — the per-block work is already done.
11
+ class PlaneAggregator
12
+ # @param block_summaries [Array<Models::Audit::BlockSummary>]
13
+ # @return [Array<Models::Audit::PlaneSummary>] sorted by plane
14
+ def call(block_summaries)
15
+ block_summaries.group_by(&:plane).map do |plane, blocks|
16
+ assigned = blocks.sum(&:total_assigned)
17
+ covered = blocks.sum(&:covered_count)
18
+ Models::Audit::PlaneSummary.new(
19
+ plane: plane,
20
+ blocks_total: blocks.size,
21
+ assigned_total: assigned,
22
+ covered_total: covered,
23
+ coverage_percent: percent(covered, assigned),
24
+ )
25
+ end.sort_by(&:plane)
26
+ end
27
+
28
+ private
29
+
30
+ def percent(covered, total)
31
+ return 0.0 if total.zero?
32
+
33
+ (covered.to_f / total * 100).round(2)
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Audit
5
+ # Ordered list of extractor classes run for every audit face.
6
+ #
7
+ # Order matters only for human-readable output (text formatter).
8
+ # All extractors are independent; their outputs are merged into
9
+ # one big hash before constructing the AuditReport.
10
+ #
11
+ # MECE / OCP: a new concern = one file under
12
+ # `Ucode::Audit::Extractors::*` + one line in the appropriate list
13
+ # below. AuditCommand never enumerates extractors directly.
14
+ module Registry
15
+ # Full audit: every concern. The five cheap extractors come from
16
+ # TODO 08; the five expensive extractors come from TODO 09. The
17
+ # Aggregations extractor (TODO 10) sits last because it depends
18
+ # on UCD baseline resolution.
19
+ ORDERED_EXTRACTORS = [
20
+ Extractors::Provenance,
21
+ Extractors::Identity,
22
+ Extractors::Style,
23
+ Extractors::Licensing,
24
+ Extractors::Coverage,
25
+ Extractors::Metrics,
26
+ Extractors::Hinting,
27
+ Extractors::ColorCapabilities,
28
+ Extractors::VariationDetail,
29
+ Extractors::OpenTypeLayout,
30
+ Extractors::Aggregations,
31
+ ].freeze
32
+
33
+ # Brief audit: cheap, name-table-only extractors. Used by
34
+ # `ucode audit --brief` for a fast inventory pass. Stable list —
35
+ # the expensive extractors are never part of brief mode.
36
+ BRIEF_EXTRACTORS = [
37
+ Extractors::Provenance,
38
+ Extractors::Identity,
39
+ Extractors::Style,
40
+ Extractors::Licensing,
41
+ Extractors::Coverage,
42
+ ].freeze
43
+
44
+ # Iterate the extractors appropriate for the given mode.
45
+ #
46
+ # @param mode [Symbol] :full (default) or :brief
47
+ # @yieldparam extractor_class [Class]
48
+ # @return [void]
49
+ def self.each(mode: :full, &)
50
+ extractors_for(mode).each(&)
51
+ end
52
+
53
+ # @param mode [Symbol] :full or :brief
54
+ # @return [Array<Class>] the extractor list for the given mode
55
+ def self.extractors_for(mode)
56
+ case mode
57
+ when :brief then BRIEF_EXTRACTORS
58
+ else ORDERED_EXTRACTORS
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Audit
5
+ # Produces one {Models::Audit::ScriptSummary} per touched Unicode
6
+ # script for a font's cmap codepoint set, compared against a ucode
7
+ # UCD baseline.
8
+ #
9
+ # Pure transformation: takes the resolved baseline Database + the
10
+ # font's codepoint list, returns ScriptSummary[].
11
+ #
12
+ # v1 scope: uses the Scripts.txt primary-script lookup (one ISO
13
+ # 15924 code per codepoint). ScriptExtensions — where a single
14
+ # codepoint contributes to multiple scripts (e.g. punctuation used
15
+ # across Latn, Grek, Cyrl) — requires a Database schema bump and
16
+ # is intentionally deferred.
17
+ class ScriptAggregator
18
+ # @param database [Ucode::Database, nil]
19
+ def initialize(database)
20
+ @database = database
21
+ end
22
+
23
+ # @param codepoints [Enumerable<Integer>]
24
+ # @return [Array<Models::Audit::ScriptSummary>] sorted by script_code
25
+ def call(codepoints)
26
+ return [] if @database.nil? || codepoints.empty?
27
+
28
+ grouped = group_by_script(codepoints)
29
+ grouped.map { |code, covered| build_summary(code, covered) }
30
+ .sort_by(&:script_code)
31
+ end
32
+
33
+ private
34
+
35
+ def group_by_script(codepoints)
36
+ codepoints.each_with_object(Hash.new { |h, k| h[k] = [] }) do |cp, acc|
37
+ code = @database.lookup_script(cp)
38
+ acc[code] << cp if code
39
+ end
40
+ end
41
+
42
+ def build_summary(script_code, covered_cps)
43
+ ranges = @database.script_ranges_by_name(script_code)
44
+ assigned_set = expand_assigned(ranges)
45
+ covered_set = covered_cps.to_set & assigned_set
46
+ Models::Audit::ScriptSummary.new(
47
+ script_code: script_code,
48
+ script_name: script_name_for(script_code),
49
+ blocks_total: count_distinct_blocks(ranges),
50
+ assigned_total: assigned_set.size,
51
+ covered_total: covered_set.size,
52
+ coverage_percent: percent(covered_set.size, assigned_set.size),
53
+ status: Models::Audit::ScriptSummary.derive_status(
54
+ covered_total: covered_set.size,
55
+ assigned_total: assigned_set.size,
56
+ ),
57
+ )
58
+ end
59
+
60
+ def expand_assigned(ranges)
61
+ ranges.each_with_object(Set.new) do |r, acc|
62
+ (r.first_cp..r.last_cp).each { |cp| acc << cp }
63
+ end
64
+ end
65
+
66
+ # Distinct block names that any of this script's ranges overlaps.
67
+ # "How many Unicode blocks contain codepoints of this script?"
68
+ def count_distinct_blocks(ranges)
69
+ names = Set.new
70
+ ranges.each do |r|
71
+ @database.each_block_overlapping(r.first_cp, r.last_cp)
72
+ .each { |e| names << e.name }
73
+ end
74
+ names.size
75
+ end
76
+
77
+ def script_name_for(code)
78
+ # The Database stores ISO 15924 codes (Latn, Grek, ...). The
79
+ # long-form name lives in PropertyValueAliases.txt; the audit
80
+ # does not need it for v1 — code is canonical and consumers
81
+ # can resolve the long form downstream.
82
+ code
83
+ end
84
+
85
+ def percent(covered, total)
86
+ return 0.0 if total.zero?
87
+
88
+ (covered.to_f / total * 100).round(2)
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Autoload hub for the Ucode::Audit namespace.
4
+ #
5
+ # The audit pipeline takes a font face (or a library of faces) and
6
+ # produces a structured {Models::Audit::AuditReport} per face, plus a
7
+ # {Models::Audit::LibrarySummary} for directory-level rollups. The
8
+ # pipeline is:
9
+ #
10
+ # CLI → AuditCommand → Context → Registry.each { |extractor| ... }
11
+ # → merge hashes → AuditReport
12
+ #
13
+ # MECE: every concern (provenance, identity, style, licensing, coverage,
14
+ # aggregations, etc.) lives in exactly one Extractor. Adding a concern =
15
+ # one file under Extractors/ + one line in Registry.
16
+ module Ucode
17
+ module Audit
18
+ autoload :Context, "ucode/audit/context"
19
+ autoload :Registry, "ucode/audit/registry"
20
+ autoload :Extractors, "ucode/audit/extractors"
21
+ autoload :CodepointRangeCoalescer, "ucode/audit/codepoint_range_coalescer"
22
+ autoload :BlockAggregator, "ucode/audit/block_aggregator"
23
+ autoload :ScriptAggregator, "ucode/audit/script_aggregator"
24
+ autoload :PlaneAggregator, "ucode/audit/plane_aggregator"
25
+ autoload :DiscrepancyDetector, "ucode/audit/discrepancy_detector"
26
+ end
27
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ module Ucode
6
+ # On-disk cache layout for fetched UCD sources and derived indices.
7
+ #
8
+ # Pure filesystem module. No network access, no parsing. Reads
9
+ # Ucode.configuration.cache_root for the root path so tests can swap
10
+ # roots without touching ENV.
11
+ #
12
+ # Layout per version:
13
+ #
14
+ # <root>/<version>/
15
+ # ucd/ # extracted UCD.zip
16
+ # unihan/ # extracted Unihan.zip
17
+ # pdfs/ # per-block PDFs
18
+ # index/ # blocks.yml, scripts.yml (YAML bsearch index)
19
+ # sqlite/ # ucode.sqlite3 (primary lookup)
20
+ module Cache
21
+ UCD_DIR = "ucd"
22
+ UNIHAN_DIR = "unihan"
23
+ PDFS_DIR = "pdfs"
24
+ INDEX_DIR = "index"
25
+ SQLITE_DIR = "sqlite"
26
+ SQLITE_FILENAME = "ucode.sqlite3"
27
+ BLOCKS_INDEX_FILENAME = "blocks.yml"
28
+ SCRIPTS_INDEX_FILENAME = "scripts.yml"
29
+
30
+ private_constant :UCD_DIR, :UNIHAN_DIR, :PDFS_DIR, :INDEX_DIR,
31
+ :SQLITE_DIR, :SQLITE_FILENAME,
32
+ :BLOCKS_INDEX_FILENAME, :SCRIPTS_INDEX_FILENAME
33
+
34
+ class << self
35
+ # @return [Pathname]
36
+ def root
37
+ Ucode.configuration.cache_root
38
+ end
39
+
40
+ # @param version [String] e.g. "17.0.0"
41
+ # @return [Pathname]
42
+ def version_dir(version)
43
+ root.join(version)
44
+ end
45
+
46
+ def ucd_dir(version)
47
+ version_dir(version).join(UCD_DIR)
48
+ end
49
+
50
+ def unihan_dir(version)
51
+ version_dir(version).join(UNIHAN_DIR)
52
+ end
53
+
54
+ def pdfs_dir(version)
55
+ version_dir(version).join(PDFS_DIR)
56
+ end
57
+
58
+ def index_dir(version)
59
+ version_dir(version).join(INDEX_DIR)
60
+ end
61
+
62
+ def sqlite_dir(version)
63
+ version_dir(version).join(SQLITE_DIR)
64
+ end
65
+
66
+ def sqlite_path(version)
67
+ sqlite_dir(version).join(SQLITE_FILENAME)
68
+ end
69
+
70
+ def blocks_index_path(version)
71
+ index_dir(version).join(BLOCKS_INDEX_FILENAME)
72
+ end
73
+
74
+ def scripts_index_path(version)
75
+ index_dir(version).join(SCRIPTS_INDEX_FILENAME)
76
+ end
77
+
78
+ # True if any extracted content exists for `version`.
79
+ # @param version [String]
80
+ # @return [Boolean]
81
+ def cached?(version)
82
+ version_dir(version).directory?
83
+ end
84
+
85
+ # All versions present in the cache, sorted ascending.
86
+ # @return [Array<String>]
87
+ def cached_versions
88
+ return [] unless root.directory?
89
+
90
+ root.children.select(&:directory?).map { |p| p.basename.to_s }.sort
91
+ end
92
+
93
+ # Idempotent: create the per-version subdirectory tree.
94
+ # @param version [String]
95
+ # @return [void]
96
+ def ensure_version_dir!(version)
97
+ ucd_dir(version).mkpath
98
+ unihan_dir(version).mkpath
99
+ pdfs_dir(version).mkpath
100
+ index_dir(version).mkpath
101
+ sqlite_dir(version).mkpath
102
+ end
103
+
104
+ # Remove a version from the cache. No-op if absent.
105
+ # @param version [String]
106
+ # @return [void]
107
+ def remove_version(version)
108
+ dir = version_dir(version)
109
+ dir.rmtree if dir.exist?
110
+ end
111
+ end
112
+ end
113
+ end