ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # One cheap audit signal on an {AuditReport}.
9
+ #
10
+ # Discrepancies are issues the audit noticed but that don't fit
11
+ # neatly into a per-table summary (e.g. an OS/2 ulUnicodeRange
12
+ # bit set with zero cmap codepoints in that block). They're the
13
+ # "you should look at this" list — actionable without being a
14
+ # hard error.
15
+ class Discrepancy < Lutaml::Model::Serializable
16
+ # Canonical `kind` values. New kinds = one constant here + one
17
+ # extractor check. Extractors MUST use these constants rather
18
+ # than hand-rolled strings.
19
+ KIND_OS2_UNICODE_RANGE_BIT_WITHOUT_CMAP_CODEPOINTS =
20
+ "os2_unicode_range_bit_without_cmap_codepoints"
21
+ KIND_NAME_TABLE_BUG = "name_table_bug"
22
+ KIND_METRICS_INCONSISTENT = "metrics_inconsistent"
23
+
24
+ attribute :kind, :string
25
+ attribute :detail, :string
26
+ attribute :block_name, :string
27
+ attribute :bit_position, :integer
28
+
29
+ key_value do
30
+ map "kind", to: :kind
31
+ map "detail", to: :detail
32
+ map "block_name", to: :block_name
33
+ map "bit_position", to: :bit_position
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Group of files that share an identical `source_sha256`.
9
+ #
10
+ # Detecting duplicate byte-identical files (regardless of filename)
11
+ # is the cheapest form of library hygiene: same bytes = same font.
12
+ class DuplicateGroup < Lutaml::Model::Serializable
13
+ attribute :source_sha256, :string
14
+ attribute :files, :string, collection: true, default: -> { [] }
15
+
16
+ key_value do
17
+ map "source_sha256", to: :source_sha256
18
+ map "files", to: :files
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Decoded OS/2 fsType bitfield → canonical embedding-permission string.
9
+ #
10
+ # Per OpenType spec, fsType is a bitfield. Only one of bits 0-3 should
11
+ # be set (the basic permission level); bits 4-7 are modifiers that
12
+ # only apply when INSTALLABLE (bit 3) is set.
13
+ #
14
+ # The decoder normalizes to one of seven canonical strings so
15
+ # downstream consumers don't need to know the bit layout.
16
+ class EmbeddingType < Lutaml::Model::Serializable
17
+ # Bit masks (OpenType fsType bitfield).
18
+ RESTRICTED_LICENSE_NO_EMBEDDING = 0x0001
19
+ PREVIEW_AND_PRINT = 0x0002
20
+ EDITABLE_EMBEDDING = 0x0004
21
+ INSTALLABLE_EMBEDDING = 0x0008
22
+ NO_SUBSETTING = 0x0100
23
+ BITMAP_EMBEDDING_ONLY = 0x0200
24
+
25
+ attribute :value, :string
26
+
27
+ key_value do
28
+ map "value", to: :value
29
+ end
30
+
31
+ # Decoded canonical string for the given fsType bitfield.
32
+ #
33
+ # @param fs_type [Integer, nil] raw OS/2 fsType value
34
+ # @return [String, nil] canonical permission name, or nil when
35
+ # fs_type is nil
36
+ def self.decode(fs_type)
37
+ return nil if fs_type.nil?
38
+ return "installable" if fs_type.zero?
39
+
40
+ matched = PERMISSION_BITS.find { |mask, _| (fs_type & mask).nonzero? }
41
+ label = matched ? matched.last : "unknown"
42
+ label == "installable" ? installable_subcategory(fs_type) : label
43
+ end
44
+
45
+ # Ordered permission-bit table. First match wins, matching the
46
+ # OpenType rule that only one of bits 0-3 should be set.
47
+ PERMISSION_BITS = [
48
+ [RESTRICTED_LICENSE_NO_EMBEDDING, "restricted_license"],
49
+ [PREVIEW_AND_PRINT, "preview_print"],
50
+ [EDITABLE_EMBEDDING, "editable"],
51
+ [INSTALLABLE_EMBEDDING, "installable"],
52
+ ].freeze
53
+ private_constant :PERMISSION_BITS
54
+
55
+ # Construct from a decoded canonical string.
56
+ #
57
+ # @param fs_type [Integer, nil]
58
+ def self.from_fs_type(fs_type)
59
+ new(value: decode(fs_type))
60
+ end
61
+
62
+ def to_s
63
+ value
64
+ end
65
+
66
+ def self.installable_subcategory(fs_type)
67
+ if fs_type & NO_SUBSETTING != 0 && fs_type & BITMAP_EMBEDDING_ONLY != 0
68
+ "installable_no_subsetting_bitmap_only"
69
+ elsif fs_type & NO_SUBSETTING != 0
70
+ "installable_no_subsetting"
71
+ elsif fs_type & BITMAP_EMBEDDING_ONLY != 0
72
+ "installable_bitmap_only"
73
+ else
74
+ "installable"
75
+ end
76
+ end
77
+ private_class_method :installable_subcategory
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # One scalar field that differs between two {AuditReport}s.
9
+ #
10
+ # `field` is the dotted attribute name (e.g. "weight_class").
11
+ # `left`/`right` are stringified values: nil → "", String → itself,
12
+ # anything else → its YAML form. Comparing the YAML form of nested
13
+ # models is intentionally avoided here — those diffs surface as
14
+ # structural add/remove lists on {AuditDiff} itself.
15
+ class FieldChange < Lutaml::Model::Serializable
16
+ attribute :field, :string
17
+ attribute :left, :string
18
+ attribute :right, :string
19
+
20
+ key_value do
21
+ map "field", to: :field
22
+ map "left", to: :left
23
+ map "right", to: :right
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Decoded OS/2 fsSelection bitfield → sorted array of flag names.
9
+ #
10
+ # Per OpenType spec, fsSelection is a bitfield with these bits:
11
+ #
12
+ # bit 0 (0x001): italic
13
+ # bit 1 (0x002): underscore
14
+ # bit 2 (0x004): negative
15
+ # bit 3 (0x008): outlined
16
+ # bit 4 (0x010): strikeout
17
+ # bit 5 (0x020): bold
18
+ # bit 6 (0x040): regular
19
+ # bit 7 (0x080): use_typo_metrics
20
+ # bit 8 (0x100): wws
21
+ # bit 9 (0x200): oblique
22
+ #
23
+ # Returns names in spec order (bit ascending).
24
+ class FsSelectionFlags < Lutaml::Model::Serializable
25
+ FLAGS = {
26
+ 0x001 => "italic",
27
+ 0x002 => "underscore",
28
+ 0x004 => "negative",
29
+ 0x008 => "outlined",
30
+ 0x010 => "strikeout",
31
+ 0x020 => "bold",
32
+ 0x040 => "regular",
33
+ 0x080 => "use_typo_metrics",
34
+ 0x100 => "wws",
35
+ 0x200 => "oblique",
36
+ }.freeze
37
+
38
+ attribute :flags, :string, collection: true, default: -> { [] }
39
+
40
+ key_value do
41
+ map "flags", to: :flags
42
+ end
43
+
44
+ # Decoded array of flag names in spec order (bit ascending).
45
+ #
46
+ # @param fs_selection [Integer, nil] raw OS/2 fsSelection value
47
+ # @return [Array<String>, nil]
48
+ def self.decode(fs_selection)
49
+ return nil if fs_selection.nil?
50
+
51
+ FLAGS.each_with_object([]) do |(mask, name), acc|
52
+ acc << name if fs_selection & mask != 0
53
+ end
54
+ end
55
+
56
+ # Construct from a raw fsSelection value.
57
+ #
58
+ # @param fs_selection [Integer, nil]
59
+ def self.from_fs_selection(fs_selection)
60
+ new(flags: decode(fs_selection))
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # One entry from the TrueType `gasp` (Grid-fitting And Scan-conversion
9
+ # Procedure) table.
10
+ #
11
+ # Each entry describes the hinting/smoothing policy that applies up to
12
+ # the given `max_ppem` (pixels-per-em). The OpenType spec defines four
13
+ # single-bit flags; the high 12 bits of the raw rangeFlags uint16 are
14
+ # reserved.
15
+ #
16
+ # Construct via {.from_flags} from the raw uint16 pair; never hand-build
17
+ # the bit decoding at call sites.
18
+ class GaspRange < Lutaml::Model::Serializable
19
+ # OpenType gasp rangeFlags bit masks.
20
+ GRIDFIT = 0x0001
21
+ DO_GRAY = 0x0002
22
+ SYMMETRIC_GRIDFIT = 0x0004
23
+ SYMMETRIC_SMOOTHING = 0x0008
24
+
25
+ attribute :max_ppem, :integer
26
+ attribute :gridfit, Lutaml::Model::Type::Boolean
27
+ attribute :do_gray, Lutaml::Model::Type::Boolean
28
+ attribute :symmetric_gridfit, Lutaml::Model::Type::Boolean
29
+ attribute :symmetric_smoothing, Lutaml::Model::Type::Boolean
30
+
31
+ key_value do
32
+ map "max_ppem", to: :max_ppem
33
+ map "gridfit", to: :gridfit
34
+ map "do_gray", to: :do_gray
35
+ map "symmetric_gridfit", to: :symmetric_gridfit
36
+ map "symmetric_smoothing", to: :symmetric_smoothing
37
+ end
38
+
39
+ # Build a GaspRange from the raw uint16 pair stored in the gasp table.
40
+ #
41
+ # @param max_ppem [Integer] rangeMaxPPEM (exclusive upper bound)
42
+ # @param flags [Integer] raw rangeFlags bitfield
43
+ # @return [GaspRange]
44
+ def self.from_flags(max_ppem, flags)
45
+ new(
46
+ max_ppem: max_ppem,
47
+ gridfit: (flags & GRIDFIT).positive?,
48
+ do_gray: (flags & DO_GRAY).positive?,
49
+ symmetric_gridfit: (flags & SYMMETRIC_GRIDFIT).positive?,
50
+ symmetric_smoothing: (flags & SYMMETRIC_SMOOTHING).positive?,
51
+ )
52
+ end
53
+
54
+ # Derived: both gridfit and do_gray are set. Mac historically treated
55
+ # this combination as "do everything". Not serialized — compute on
56
+ # demand.
57
+ def gridfit_and_smoothing?
58
+ gridfit && do_gray
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Hinting summary for one face.
9
+ #
10
+ # Answers the practical questions a designer or QA engineer asks:
11
+ # "Is this font hinted at all? What flavour? How much hinting, by
12
+ # byte / instruction count?" Unhinted fonts render poorly at small
13
+ # sizes; heavily hinted fonts can be 20%+ bytecode by file size.
14
+ #
15
+ # TrueType hinting surfaces as the fpgm/prep/cvt programs plus the
16
+ # gasp per-ppem policy. CFF/CFF2 hinting surfaces as stem hints
17
+ # encoded inside each CharString. This model carries both, plus a
18
+ # derived `is_unhinted` flag and `hinting_format` classification so
19
+ # downstream tooling does not need to re-derive either.
20
+ #
21
+ # All counts are nil-safe: a face with no hinting at all produces
22
+ # `Hinting.new` with every field falsy/nil rather than raising.
23
+ class Hinting < Lutaml::Model::Serializable
24
+ FORMAT_TRUETYPE = "truetype"
25
+ FORMAT_CFF = "cff"
26
+ FORMAT_MIXED = "mixed"
27
+ FORMAT_NONE = "none"
28
+
29
+ # TrueType bytecode programs.
30
+ attribute :has_fpgm, Lutaml::Model::Type::Boolean
31
+ attribute :fpgm_instruction_count, :integer
32
+ attribute :has_prep, Lutaml::Model::Type::Boolean
33
+ attribute :prep_instruction_count, :integer
34
+
35
+ # TrueType Control Value Table (hinting metrics).
36
+ attribute :has_cvt, Lutaml::Model::Type::Boolean
37
+ attribute :cvt_entry_count, :integer
38
+
39
+ # CVT variation table for variable TrueType fonts. Carried for
40
+ # context only — never included in cvt_entry_count.
41
+ attribute :has_cvar, Lutaml::Model::Type::Boolean
42
+
43
+ # gasp policy ranges, ordered by ascending max_ppem.
44
+ attribute :gasp_ranges, GaspRange, collection: true, default: -> { [] }
45
+
46
+ # CFF/CFF2 hinting. cff_has_private_dict is true for every CFF
47
+ # face (Private DICT is mandatory); cff_hint_count sums stem
48
+ # declarations across all CharStrings, nil when unparsable.
49
+ attribute :cff_has_private_dict, Lutaml::Model::Type::Boolean
50
+ attribute :cff_hint_count, :integer
51
+
52
+ # Derived at extraction time so consumers read flat fields.
53
+ attribute :is_unhinted, Lutaml::Model::Type::Boolean
54
+ attribute :hinting_format, :string
55
+
56
+ key_value do
57
+ map "has_fpgm", to: :has_fpgm
58
+ map "fpgm_instruction_count", to: :fpgm_instruction_count
59
+ map "has_prep", to: :has_prep
60
+ map "prep_instruction_count", to: :prep_instruction_count
61
+ map "has_cvt", to: :has_cvt
62
+ map "cvt_entry_count", to: :cvt_entry_count
63
+ map "has_cvar", to: :has_cvar
64
+ map "gasp_ranges", to: :gasp_ranges
65
+ map "cff_has_private_dict", to: :cff_has_private_dict
66
+ map "cff_hint_count", to: :cff_hint_count
67
+ map "is_unhinted", to: :is_unhinted
68
+ map "hinting_format", to: :hinting_format
69
+ end
70
+
71
+ # Derive {is_unhinted} and {hinting_format} from individual flags.
72
+ # Called by the extractor before construction so the values land
73
+ # in serialized output without recomputation at read time.
74
+ #
75
+ # gasp is a TrueType-specific table, so it counts toward the
76
+ # TrueType hinting bucket even when no fpgm/prep/cvt is present.
77
+ #
78
+ # @return [Hash] keys :is_unhinted, :hinting_format
79
+ def self.derive_flags(has_tt:, has_cff:, has_gasp:)
80
+ tt_hints = has_tt || has_gasp
81
+ {
82
+ is_unhinted: !(tt_hints || has_cff),
83
+ hinting_format: format_for(tt_hints, has_cff),
84
+ }
85
+ end
86
+
87
+ def self.format_for(has_tt, has_cff)
88
+ case [has_tt, has_cff]
89
+ when [true, true] then FORMAT_MIXED
90
+ when [true, false] then FORMAT_TRUETYPE
91
+ when [false, true] then FORMAT_CFF
92
+ else FORMAT_NONE
93
+ end
94
+ end
95
+ private_class_method :format_for
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Aggregate view over a directory (tree) of audited fonts.
9
+ #
10
+ # Built by the library auditor. Combines a flat list of per-face
11
+ # {AuditReport}s with derived cross-face rollups: script coverage
12
+ # matrix, duplicate detection (by source_sha256), and license
13
+ # distribution. Lets a librarian inventory a font collection in
14
+ # one pass.
15
+ class LibrarySummary < Lutaml::Model::Serializable
16
+ attribute :root_path, :string
17
+ attribute :total_files, :integer
18
+ attribute :total_faces, :integer
19
+ attribute :scanned_extensions, :string, collection: true, default: -> { [] }
20
+ attribute :aggregate_metrics, :hash, default: -> { {} }
21
+ attribute :script_coverage, ScriptCoverageRow, collection: true, default: -> { [] }
22
+ attribute :duplicate_groups, DuplicateGroup, collection: true, default: -> { [] }
23
+ attribute :license_distribution, :hash, default: -> { {} }
24
+ attribute :per_face_reports, AuditReport, collection: true, default: -> { [] }
25
+
26
+ key_value do
27
+ map "root_path", to: :root_path
28
+ map "total_files", to: :total_files
29
+ map "total_faces", to: :total_faces
30
+ map "scanned_extensions", to: :scanned_extensions
31
+ map "aggregate_metrics", to: :aggregate_metrics
32
+ map "script_coverage", to: :script_coverage
33
+ map "duplicate_groups", to: :duplicate_groups
34
+ map "license_distribution", to: :license_distribution
35
+ map "per_face_reports", to: :per_face_reports
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Licensing + embedding + vendor provenance fields for a face.
9
+ #
10
+ # Combines the human-readable legal/identity fields from the name
11
+ # table with the machine-readable embedding permissions from OS/2.
12
+ # Type 1 fonts have no OS/2 — callers must tolerate a nil
13
+ # embedding_type / fs_selection_flags / vendor_id.
14
+ class Licensing < Lutaml::Model::Serializable
15
+ # Name-table fields (English name IDs)
16
+ attribute :copyright, :string
17
+ attribute :trademark, :string
18
+ attribute :manufacturer, :string
19
+ attribute :designer, :string
20
+ attribute :description, :string
21
+ attribute :vendor_url, :string
22
+ attribute :designer_url, :string
23
+ attribute :license_description, :string
24
+ attribute :license_url, :string
25
+
26
+ # OS/2 fields
27
+ attribute :vendor_id, :string
28
+ attribute :embedding_type, :string
29
+ attribute :fs_selection_flags, :string, collection: true, default: -> { [] }
30
+
31
+ key_value do
32
+ map "copyright", to: :copyright
33
+ map "trademark", to: :trademark
34
+ map "manufacturer", to: :manufacturer
35
+ map "designer", to: :designer
36
+ map "description", to: :description
37
+ map "vendor_url", to: :vendor_url
38
+ map "designer_url", to: :designer_url
39
+ map "license_description", to: :license_description
40
+ map "license_url", to: :license_url
41
+ map "vendor_id", to: :vendor_id
42
+ map "embedding_type", to: :embedding_type
43
+ map "fs_selection_flags", to: :fs_selection_flags
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Layout-critical metrics for a face, consolidated from head, hhea,
9
+ # OS/2, and post tables. Designers and engineers can read all
10
+ # spacing-relevant numbers in one place instead of cross-referencing
11
+ # raw table dumps.
12
+ #
13
+ # All fields are nil-safe — Type 1 fonts and stripped WOFF builds
14
+ # may not carry every table. Derived booleans (e.g. metrics_consistent?)
15
+ # tolerate nil inputs and return false rather than raising.
16
+ class Metrics < Lutaml::Model::Serializable
17
+ # head
18
+ attribute :units_per_em, :integer
19
+ attribute :bbox_x_min, :integer
20
+ attribute :bbox_y_min, :integer
21
+ attribute :bbox_x_max, :integer
22
+ attribute :bbox_y_max, :integer
23
+
24
+ # hhea (horizontal)
25
+ attribute :hhea_ascent, :integer
26
+ attribute :hhea_descent, :integer
27
+ attribute :hhea_line_gap, :integer
28
+
29
+ # OS/2 typo
30
+ attribute :typo_ascender, :integer
31
+ attribute :typo_descender, :integer
32
+ attribute :typo_line_gap, :integer
33
+
34
+ # OS/2 win
35
+ attribute :win_ascent, :integer
36
+ attribute :win_descent, :integer
37
+
38
+ # OS/2 v2+ (optional)
39
+ attribute :x_height, :integer
40
+ attribute :cap_height, :integer
41
+
42
+ # OS/2 subscript/superscript
43
+ attribute :subscript_x_size, :integer
44
+ attribute :subscript_y_size, :integer
45
+ attribute :subscript_x_offset, :integer
46
+ attribute :subscript_y_offset, :integer
47
+ attribute :superscript_x_size, :integer
48
+ attribute :superscript_y_size, :integer
49
+ attribute :superscript_x_offset, :integer
50
+ attribute :superscript_y_offset, :integer
51
+
52
+ # OS/2 strikeout
53
+ attribute :strikeout_size, :integer
54
+ attribute :strikeout_position, :integer
55
+
56
+ # post underline
57
+ attribute :underline_position, :float
58
+ attribute :underline_thickness, :float
59
+
60
+ key_value do
61
+ map "units_per_em", to: :units_per_em
62
+ map "bbox_x_min", to: :bbox_x_min
63
+ map "bbox_y_min", to: :bbox_y_min
64
+ map "bbox_x_max", to: :bbox_x_max
65
+ map "bbox_y_max", to: :bbox_y_max
66
+
67
+ map "hhea_ascent", to: :hhea_ascent
68
+ map "hhea_descent", to: :hhea_descent
69
+ map "hhea_line_gap", to: :hhea_line_gap
70
+
71
+ map "typo_ascender", to: :typo_ascender
72
+ map "typo_descender", to: :typo_descender
73
+ map "typo_line_gap", to: :typo_line_gap
74
+
75
+ map "win_ascent", to: :win_ascent
76
+ map "win_descent", to: :win_descent
77
+
78
+ map "x_height", to: :x_height
79
+ map "cap_height", to: :cap_height
80
+
81
+ map "subscript_x_size", to: :subscript_x_size
82
+ map "subscript_y_size", to: :subscript_y_size
83
+ map "subscript_x_offset", to: :subscript_x_offset
84
+ map "subscript_y_offset", to: :subscript_y_offset
85
+ map "superscript_x_size", to: :superscript_x_size
86
+ map "superscript_y_size", to: :superscript_y_size
87
+ map "superscript_x_offset", to: :superscript_x_offset
88
+ map "superscript_y_offset", to: :superscript_y_offset
89
+
90
+ map "strikeout_size", to: :strikeout_size
91
+ map "strikeout_position", to: :strikeout_position
92
+
93
+ map "underline_position", to: :underline_position
94
+ map "underline_thickness", to: :underline_thickness
95
+ end
96
+
97
+ # True when hhea ascent/descent match OS/2 typo ascent/descent.
98
+ # Mismatch is a common font bug that causes inconsistent line
99
+ # height across platforms.
100
+ #
101
+ # @return [Boolean]
102
+ def metrics_consistent?
103
+ return false if hhea_ascent.nil? || typo_ascender.nil?
104
+ return false if hhea_descent.nil? || typo_descender.nil?
105
+
106
+ hhea_ascent == typo_ascender && hhea_descent == typo_descender
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # One fvar named instance (e.g. "Bold", "SemiCondensed").
9
+ #
10
+ # `coordinates` is serialized as a compact "tag=value,tag=value" string
11
+ # (e.g. "wght=700,wdth=100") for human readability. The AuditReport is
12
+ # primarily a human-facing artifact; downstream tooling that needs
13
+ # structured coordinates can re-derive them from fvar.
14
+ class NamedInstance < Lutaml::Model::Serializable
15
+ attribute :subfamily_name, :string
16
+ attribute :postscript_name, :string
17
+ attribute :coordinates, :string
18
+
19
+ key_value do
20
+ map "subfamily_name", to: :subfamily_name
21
+ map "postscript_name", to: :postscript_name
22
+ map "coordinates", to: :coordinates
23
+ end
24
+
25
+ # Build the coordinates string from a parallel array of axis tags
26
+ # and fvar coordinate values. Returns nil if either side is empty.
27
+ #
28
+ # @param axis_tags [Array<String>] ordered axis tags (e.g. ["wght", "wdth"])
29
+ # @param values [Array<Numeric>] ordered coordinate values
30
+ # @return [String, nil]
31
+ def self.format_coordinates(axis_tags, values)
32
+ return nil if axis_tags.nil? || values.nil?
33
+ return nil if axis_tags.empty? || values.empty?
34
+
35
+ pairs = axis_tags.zip(values).map { |tag, val| "#{tag}=#{val}" }
36
+ pairs.join(",")
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Structured OpenType layout summary for one face.
9
+ #
10
+ # Carries:
11
+ #
12
+ # - `scripts`: union of GSUB + GPOS script tags (sorted, unique).
13
+ # - `features`: union of GSUB + GPOS feature tags across every
14
+ # script (sorted, unique).
15
+ # - `by_script`: per-script breakdown preserving the
16
+ # "feature X is for script Y" relationship.
17
+ # - `has_gsub` / `has_gpos`: presence flags so consumers can tell
18
+ # "font has no layout" from "font has GSUB but no GPOS".
19
+ #
20
+ # nil for Type 1 fonts (no SFNT table structure).
21
+ class OpenTypeLayout < Lutaml::Model::Serializable
22
+ attribute :scripts, :string, collection: true, default: -> { [] }
23
+ attribute :features, :string, collection: true, default: -> { [] }
24
+ attribute :by_script, ScriptFeatures, collection: true, default: -> { [] }
25
+ attribute :has_gsub, Lutaml::Model::Type::Boolean
26
+ attribute :has_gpos, Lutaml::Model::Type::Boolean
27
+
28
+ key_value do
29
+ map "scripts", to: :scripts
30
+ map "features", to: :features
31
+ map "by_script", to: :by_script
32
+ map "has_gsub", to: :has_gsub
33
+ map "has_gpos", to: :has_gpos
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end