ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fontisan"
4
+
5
+ module Ucode
6
+ module Audit
7
+ module Extractors
8
+ # Color-font capability summary: which color formats a face carries
9
+ # (COLR v0/v1, CPAL, SVG, CBDT/CBLC, sbix) plus lightweight counts
10
+ # from each table's header.
11
+ #
12
+ # Returned fields:
13
+ # color_capabilities: Models::Audit::ColorCapabilities, or nil
14
+ # for Type 1
15
+ #
16
+ # Counts are best-effort — any table that fails to parse yields nil
17
+ # for its corresponding count fields rather than crashing the audit.
18
+ class ColorCapabilities < Base
19
+ # @param context [Ucode::Audit::Context]
20
+ # @return [Hash{Symbol=>Object}]
21
+ def extract(context)
22
+ font = context.font
23
+ return { color_capabilities: nil } unless sfnt?(font)
24
+
25
+ { color_capabilities: Models::Audit::ColorCapabilities.new(**gather(font)) }
26
+ end
27
+
28
+ private
29
+
30
+ def sfnt?(font)
31
+ font.is_a?(Fontisan::SfntFont)
32
+ end
33
+
34
+ def gather(font)
35
+ colr = colr_fields(font)
36
+ cpal = cpal_fields(font)
37
+ svg = svg_fields(font)
38
+ cbdt = cbdt_fields(font)
39
+ sbix = sbix_fields(font)
40
+
41
+ formats = Models::Audit::ColorCapabilities.derive_formats(
42
+ has_colr: colr[:has_colr], colr_version: colr[:colr_version],
43
+ has_cpal: cpal[:has_cpal], has_svg: svg[:has_svg],
44
+ has_cbdt: cbdt[:has_cbdt], has_sbix: sbix[:has_sbix]
45
+ )
46
+
47
+ colr.merge(cpal).merge(svg).merge(cbdt).merge(sbix)
48
+ .merge(color_formats: formats)
49
+ end
50
+
51
+ def colr_fields(font)
52
+ return empty_colr unless font.has_table?("COLR")
53
+
54
+ colr = font.table("COLR")
55
+ return empty_colr unless colr
56
+
57
+ {
58
+ has_colr: true,
59
+ colr_version: colr.version&.to_i,
60
+ colr_base_glyph_count: colr.num_base_glyph_records&.to_i,
61
+ colr_layer_count: colr.num_layer_records&.to_i,
62
+ }
63
+ end
64
+
65
+ def empty_colr
66
+ { has_colr: false, colr_version: nil,
67
+ colr_base_glyph_count: nil, colr_layer_count: nil }
68
+ end
69
+
70
+ def cpal_fields(font)
71
+ return empty_cpal unless font.has_table?("CPAL")
72
+
73
+ cpal = font.table("CPAL")
74
+ return empty_cpal unless cpal
75
+
76
+ {
77
+ has_cpal: true,
78
+ cpal_palette_count: cpal.num_palettes&.to_i,
79
+ cpal_color_count: cpal.num_color_records&.to_i,
80
+ }
81
+ end
82
+
83
+ def empty_cpal
84
+ { has_cpal: false, cpal_palette_count: nil, cpal_color_count: nil }
85
+ end
86
+
87
+ def svg_fields(font)
88
+ return empty_svg unless font.has_table?("SVG ")
89
+
90
+ svg = font.table("SVG ")
91
+ return empty_svg unless svg
92
+
93
+ {
94
+ has_svg: true,
95
+ svg_document_count: svg.num_svg_documents&.to_i,
96
+ }
97
+ end
98
+
99
+ def empty_svg
100
+ { has_svg: false, svg_document_count: nil }
101
+ end
102
+
103
+ # CBDT/CBLC are paired tables: CBLC holds the strike index,
104
+ # CBDT holds the bitmap data. has_cbdt vs has_cblc disagreement
105
+ # is reported as-is — audit consumers can spot the inconsistency.
106
+ def cbdt_fields(font)
107
+ has_cbdt = font.has_table?("CBDT")
108
+ has_cblc = font.has_table?("CBLC")
109
+ strike_count = cblc_strike_count(font) if has_cblc
110
+
111
+ {
112
+ has_cbdt: has_cbdt,
113
+ has_cblc: has_cblc,
114
+ cbdt_strike_count: strike_count,
115
+ }
116
+ end
117
+
118
+ def cblc_strike_count(font)
119
+ cblc = font.table("CBLC")
120
+ return nil unless cblc
121
+
122
+ cblc.num_sizes&.to_i
123
+ end
124
+
125
+ def sbix_fields(font)
126
+ return empty_sbix unless font.has_table?("sbix")
127
+
128
+ sbix = font.table("sbix")
129
+ return empty_sbix unless sbix
130
+
131
+ {
132
+ has_sbix: true,
133
+ sbix_strike_count: sbix.num_strikes&.to_i,
134
+ }
135
+ end
136
+
137
+ def empty_sbix
138
+ { has_sbix: false, sbix_strike_count: nil }
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Audit
5
+ module Extractors
6
+ # Coverage fields: how many codepoints and glyphs the font ships,
7
+ # the compact codepoint-range view (default), and the optional flat
8
+ # per-codepoint list (only when `--all-codepoints` is on).
9
+ #
10
+ # Returned fields:
11
+ # total_codepoints, total_glyphs, cmap_subtables,
12
+ # codepoint_ranges, codepoints
13
+ #
14
+ # ucode delta vs fontisan: the `codepoints` field uses "U+XXXX"
15
+ # string form per `02-audit-schema-design.md`. Does NOT emit
16
+ # aggregations (blocks/scripts) — that's the Aggregations
17
+ # extractor in TODO 10. Coverage only emits the raw codepoint set.
18
+ class Coverage < Base
19
+ # @param context [Ucode::Audit::Context]
20
+ # @return [Hash{Symbol=>Object}]
21
+ def extract(context)
22
+ font = context.font
23
+ codepoints = context.codepoints
24
+ {
25
+ total_codepoints: codepoints.length,
26
+ total_glyphs: total_glyphs(font),
27
+ cmap_subtables: cmap_subtable_formats(font),
28
+ codepoint_ranges: CodepointRangeCoalescer.call(codepoints),
29
+ codepoints: codepoints_for_report(context, codepoints),
30
+ }
31
+ end
32
+
33
+ private
34
+
35
+ def total_glyphs(font)
36
+ return nil unless font.has_table?("maxp")
37
+
38
+ font.table("maxp").num_glyphs
39
+ end
40
+
41
+ def cmap_subtable_formats(font)
42
+ return [] unless font.has_table?("cmap")
43
+
44
+ font.table("cmap").subtable_formats
45
+ end
46
+
47
+ def codepoints_for_report(context, codepoints)
48
+ return [] unless context.all_codepoints?
49
+
50
+ codepoints.map { |cp| format("U+%<cp>04X", cp: cp) }
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+
5
+ require "fontisan"
6
+
7
+ module Ucode
8
+ module Audit
9
+ module Extractors
10
+ # Hinting summary: TrueType bytecode counts + gasp policy + CFF stem
11
+ # count, with derived `is_unhinted` and `hinting_format` fields.
12
+ #
13
+ # Returned fields:
14
+ # hinting: Models::Audit::Hinting instance, or nil for Type 1
15
+ #
16
+ # The fpgm/prep/cvt/gasp tables have no BinData classes yet — they
17
+ # are read as raw bytes from `font.table_data`. Bytecode is one
18
+ # byte per instruction; cvt is an array of FWord (int16), so the
19
+ # entry count is bytesize / 2.
20
+ class Hinting < Base
21
+ # Raw CFF / CFF2 charstring operator bytes that declare stem hints.
22
+ HSTEM = 1
23
+ VSTEM = 3
24
+ HSTEMHM = 18
25
+ VSTEMHM = 23
26
+ HINTMASK = 19
27
+ CNTRMASK = 20
28
+
29
+ # @param context [Ucode::Audit::Context]
30
+ # @return [Hash{Symbol=>Object}]
31
+ def extract(context)
32
+ font = context.font
33
+ return { hinting: nil } unless sfnt?(font)
34
+
35
+ { hinting: Models::Audit::Hinting.new(**gather(font)) }
36
+ end
37
+
38
+ private
39
+
40
+ def sfnt?(font)
41
+ font.is_a?(Fontisan::SfntFont)
42
+ end
43
+
44
+ def gather(font)
45
+ tt = truetype_fields(font)
46
+ cff = cff_fields(font)
47
+ gasp = parse_gasp(font)
48
+
49
+ derived = Models::Audit::Hinting.derive_flags(
50
+ has_tt: tt[:has_fpgm] || tt[:has_prep] || tt[:has_cvt],
51
+ has_cff: cff[:cff_has_private_dict],
52
+ has_gasp: !gasp.empty?,
53
+ )
54
+
55
+ tt.merge(cff).merge(gasp_ranges: gasp).merge(derived)
56
+ end
57
+
58
+ def truetype_fields(font)
59
+ {
60
+ has_fpgm: font.has_table?("fpgm"),
61
+ fpgm_instruction_count: byte_count(font, "fpgm"),
62
+ has_prep: font.has_table?("prep"),
63
+ prep_instruction_count: byte_count(font, "prep"),
64
+ has_cvt: font.has_table?("cvt"),
65
+ cvt_entry_count: cvt_entry_count(font),
66
+ has_cvar: font.has_table?("cvar"),
67
+ }
68
+ end
69
+
70
+ def cff_fields(font)
71
+ has_cff1 = font.has_table?("CFF ")
72
+ has_cff2 = font.has_table?("CFF2")
73
+ has_private = has_cff1 || has_cff2
74
+
75
+ {
76
+ cff_has_private_dict: has_private,
77
+ cff_hint_count: has_cff1 ? count_cff_stems(font) : nil,
78
+ }
79
+ end
80
+
81
+ def byte_count(font, tag)
82
+ return nil unless font.has_table?(tag)
83
+
84
+ font.table_data[tag]&.bytesize
85
+ end
86
+
87
+ def cvt_entry_count(font)
88
+ return nil unless font.has_table?("cvt")
89
+
90
+ bytes = font.table_data["cvt"]
91
+ return nil unless bytes
92
+
93
+ bytes.bytesize / 2
94
+ end
95
+
96
+ # Parse the gasp table from raw bytes. Format: uint16 version,
97
+ # uint16 numRanges, then numRanges × (uint16 rangeMaxPPEM,
98
+ # uint16 rangeFlags). Returns [] if gasp is absent or truncated.
99
+ def parse_gasp(font)
100
+ return [] unless font.has_table?("gasp")
101
+
102
+ data = font.table_data["gasp"]
103
+ return [] unless data && data.bytesize >= 4
104
+
105
+ _version, num_ranges = data.unpack("nn")
106
+ ranges = []
107
+ offset = 4
108
+ num_ranges.times do
109
+ break if offset + 4 > data.bytesize
110
+
111
+ max_ppem, flags = data[offset, 4].unpack("nn")
112
+ ranges << Models::Audit::GaspRange.from_flags(max_ppem, flags)
113
+ offset += 4
114
+ end
115
+ ranges
116
+ end
117
+
118
+ def count_cff_stems(font)
119
+ return nil unless font.has_table?("CFF ")
120
+
121
+ cff = font.table("CFF ")
122
+ return nil unless cff
123
+
124
+ index = cff.charstrings_index(0)
125
+ return nil unless index
126
+
127
+ total = 0
128
+ index.count.times do |glyph_index|
129
+ data = index[glyph_index]
130
+ next unless data
131
+
132
+ total += count_stems_in_charstring(data)
133
+ end
134
+ total
135
+ rescue Fontisan::CorruptedTableError
136
+ nil
137
+ end
138
+
139
+ # Lightweight Type-2 CharString scanner that counts stem hints
140
+ # without instantiating a full CharString (which needs a Private
141
+ # DICT, global/local subrs, etc.). Operates purely on bytes.
142
+ def count_stems_in_charstring(data)
143
+ io = StringIO.new(data)
144
+ stack = 0
145
+ stems = 0
146
+
147
+ until io.eof?
148
+ byte = io.getbyte
149
+ next if byte.nil?
150
+
151
+ stack, stems = process_byte(io, byte, stack, stems)
152
+ end
153
+
154
+ stems
155
+ end
156
+
157
+ def process_byte(io, byte, stack, stems)
158
+ if operator_byte?(byte)
159
+ apply_operator(io, byte, stack, stems)
160
+ else
161
+ [consume_operand(io, byte, stack), stems]
162
+ end
163
+ end
164
+
165
+ def operator_byte?(byte)
166
+ byte <= 31 && byte != 28
167
+ end
168
+
169
+ def apply_operator(io, byte, stack, stems)
170
+ case byte
171
+ when 12
172
+ io.getbyte
173
+ [0, stems]
174
+ when HSTEM, VSTEM, HSTEMHM, VSTEMHM
175
+ [0, stems + stack / 2]
176
+ when HINTMASK, CNTRMASK
177
+ new_stems = stems + stack / 2
178
+ io.read((new_stems + 7) / 8)
179
+ [0, new_stems]
180
+ else
181
+ [0, stems]
182
+ end
183
+ end
184
+
185
+ def consume_operand(io, byte, stack)
186
+ case byte
187
+ when 28
188
+ io.read(2)
189
+ when 255
190
+ io.read(4)
191
+ when 247..254
192
+ io.getbyte
193
+ end
194
+ stack + 1
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fontisan"
4
+
5
+ module Ucode
6
+ module Audit
7
+ module Extractors
8
+ # Identity fields: the human-readable names a font uses to describe
9
+ # itself, drawn from the `name` table (SFNT) or font dictionary
10
+ # (Type 1).
11
+ #
12
+ # Returned fields:
13
+ # family_name, subfamily_name, full_name, postscript_name,
14
+ # version, font_revision
15
+ class Identity < Base
16
+ # @param context [Ucode::Audit::Context]
17
+ # @return [Hash{Symbol=>Object}]
18
+ def extract(context)
19
+ font = context.font
20
+ if font.is_a?(Fontisan::Type1Font)
21
+ type1_identity(font)
22
+ else
23
+ sfnt_identity(font)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def sfnt_identity(font)
30
+ name_table = table(font, "name")
31
+ head_table = table(font, "head")
32
+
33
+ {
34
+ family_name: english_name(name_table, Fontisan::Tables::Name::FAMILY),
35
+ subfamily_name: english_name(name_table, Fontisan::Tables::Name::SUBFAMILY),
36
+ full_name: english_name(name_table, Fontisan::Tables::Name::FULL_NAME),
37
+ postscript_name: english_name(name_table, Fontisan::Tables::Name::POSTSCRIPT_NAME),
38
+ version: english_name(name_table, Fontisan::Tables::Name::VERSION),
39
+ font_revision: head_table&.font_revision,
40
+ }
41
+ end
42
+
43
+ def type1_identity(font)
44
+ font_info = font.font_dictionary&.font_info
45
+ {
46
+ family_name: font_info&.family_name,
47
+ subfamily_name: nil,
48
+ full_name: font_info&.full_name,
49
+ postscript_name: font.font_name,
50
+ version: font_info&.version,
51
+ font_revision: nil,
52
+ }
53
+ end
54
+
55
+ def table(font, tag)
56
+ font.table(tag) if font.has_table?(tag)
57
+ end
58
+
59
+ def english_name(name_table, name_id)
60
+ name_table&.english_name(name_id)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fontisan"
4
+
5
+ module Ucode
6
+ module Audit
7
+ module Extractors
8
+ # Licensing + embedding permissions + vendor provenance.
9
+ #
10
+ # Returned fields:
11
+ # licensing: Models::Audit::Licensing instance, or nil for Type 1
12
+ #
13
+ # Type 1 fonts have no OS/2 table; their licensing is nil. WOFF/
14
+ # WOFF2 carry the same OS/2 + name tables as TTF/OTF and need no
15
+ # special handling.
16
+ class Licensing < Base
17
+ # nameID → AuditReport field name, per OpenType name table spec.
18
+ NAME_IDS = {
19
+ copyright: 0,
20
+ trademark: 7,
21
+ manufacturer: 8,
22
+ designer: 9,
23
+ description: 10,
24
+ vendor_url: 11,
25
+ designer_url: 12,
26
+ license_description: 13,
27
+ license_url: 14,
28
+ }.freeze
29
+ private_constant :NAME_IDS
30
+
31
+ # @param context [Ucode::Audit::Context]
32
+ # @return [Hash{Symbol=>Object}]
33
+ def extract(context)
34
+ font = context.font
35
+ return { licensing: nil } unless sfnt?(font)
36
+
37
+ os2 = table(font, "OS/2")
38
+ name = table(font, "name")
39
+
40
+ {
41
+ licensing: Models::Audit::Licensing.new(
42
+ **name_fields(name),
43
+ vendor_id: sanitized_vendor_id(os2),
44
+ embedding_type: Models::Audit::EmbeddingType.decode(os2&.fs_type&.to_i),
45
+ fs_selection_flags: Models::Audit::FsSelectionFlags.decode(os2&.fs_selection&.to_i),
46
+ ),
47
+ }
48
+ end
49
+
50
+ private
51
+
52
+ def sfnt?(font)
53
+ font.is_a?(Fontisan::SfntFont)
54
+ end
55
+
56
+ def table(font, tag)
57
+ font.table(tag) if font.has_table?(tag)
58
+ end
59
+
60
+ def name_fields(name)
61
+ return {} unless name
62
+
63
+ NAME_IDS.transform_values { |id| name.english_name(id) }
64
+ end
65
+
66
+ def sanitized_vendor_id(os2)
67
+ raw = os2&.ach_vend_id
68
+ return nil if raw.nil?
69
+
70
+ raw.gsub(/[\x00\s]+$/, "")
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fontisan"
4
+
5
+ module Ucode
6
+ module Audit
7
+ module Extractors
8
+ # Layout-critical metrics consolidated from head, hhea, OS/2, post.
9
+ #
10
+ # Returned fields:
11
+ # metrics: Models::Audit::Metrics instance, or nil for Type 1
12
+ #
13
+ # All table reads are nil-safe; tables may be absent in stripped
14
+ # WOFF builds or legacy formats.
15
+ class Metrics < Base
16
+ # @param context [Ucode::Audit::Context]
17
+ # @return [Hash{Symbol=>Object}]
18
+ def extract(context)
19
+ font = context.font
20
+ return { metrics: nil } unless sfnt?(font)
21
+
22
+ { metrics: Models::Audit::Metrics.new(**gather(font)) }
23
+ end
24
+
25
+ private
26
+
27
+ def sfnt?(font)
28
+ font.is_a?(Fontisan::SfntFont)
29
+ end
30
+
31
+ def gather(font)
32
+ head_fields(font)
33
+ .merge(hhea_fields(font))
34
+ .merge(os2_fields(font))
35
+ .merge(post_fields(font))
36
+ end
37
+
38
+ def head_fields(font)
39
+ head = table(font, "head")
40
+ return {} unless head
41
+
42
+ {
43
+ units_per_em: head.units_per_em&.to_i,
44
+ bbox_x_min: head.x_min&.to_i,
45
+ bbox_y_min: head.y_min&.to_i,
46
+ bbox_x_max: head.x_max&.to_i,
47
+ bbox_y_max: head.y_max&.to_i,
48
+ }
49
+ end
50
+
51
+ def hhea_fields(font)
52
+ hhea = table(font, "hhea")
53
+ return {} unless hhea
54
+
55
+ {
56
+ hhea_ascent: hhea.ascent&.to_i,
57
+ hhea_descent: hhea.descent&.to_i,
58
+ hhea_line_gap: hhea.line_gap&.to_i,
59
+ }
60
+ end
61
+
62
+ # OS/2 table fields exposed on Metrics, as
63
+ # `Metrics attribute name` => `OS/2 reader method`.
64
+ OS2_FIELDS = {
65
+ typo_ascender: :s_typo_ascender,
66
+ typo_descender: :s_typo_descender,
67
+ typo_line_gap: :s_typo_line_gap,
68
+ win_ascent: :us_win_ascent,
69
+ win_descent: :us_win_descent,
70
+ x_height: :sx_height,
71
+ cap_height: :s_cap_height,
72
+ subscript_x_size: :y_subscript_x_size,
73
+ subscript_y_size: :y_subscript_y_size,
74
+ subscript_x_offset: :y_subscript_x_offset,
75
+ subscript_y_offset: :y_subscript_y_offset,
76
+ superscript_x_size: :y_superscript_x_size,
77
+ superscript_y_size: :y_superscript_y_size,
78
+ superscript_x_offset: :y_superscript_x_offset,
79
+ superscript_y_offset: :y_superscript_y_offset,
80
+ strikeout_size: :y_strikeout_size,
81
+ strikeout_position: :y_strikeout_position,
82
+ }.freeze
83
+ private_constant :OS2_FIELDS
84
+
85
+ def os2_fields(font)
86
+ os2 = table(font, "OS/2")
87
+ return {} unless os2
88
+
89
+ OS2_FIELDS.transform_values { |reader| os2.public_send(reader)&.to_i }
90
+ end
91
+
92
+ def post_fields(font)
93
+ post = table(font, "post")
94
+ return {} unless post
95
+
96
+ {
97
+ underline_position: post.underline_position&.to_f,
98
+ underline_thickness: post.underline_thickness&.to_f,
99
+ }
100
+ end
101
+
102
+ def table(font, tag)
103
+ font.table(tag) if font.has_table?(tag)
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end