ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Per-plane coverage rollup on an {AuditReport}.
9
+ #
10
+ # Planes are the top-level grouping of Unicode (0 = BMP, 1 = SMP,
11
+ # 2 = SIP, …, 14 = SSP, 15/16 = PUA). One PlaneSummary per plane
12
+ # that has any block coverage — lets consumers compare coverage
13
+ # across planes at a glance without re-iterating every block.
14
+ class PlaneSummary < Lutaml::Model::Serializable
15
+ attribute :plane, :integer
16
+ attribute :blocks_total, :integer
17
+ attribute :assigned_total, :integer
18
+ attribute :covered_total, :integer
19
+ attribute :coverage_percent, :float
20
+
21
+ key_value do
22
+ map "plane", to: :plane
23
+ map "blocks_total", to: :blocks_total
24
+ map "assigned_total", to: :assigned_total
25
+ map "covered_total", to: :covered_total
26
+ map "coverage_percent", to: :coverage_percent
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # One row in a {LibrarySummary}'s script-coverage matrix.
9
+ #
10
+ # Lists every face (by postscript_name) whose cmap covers at least
11
+ # one codepoint assigned to a Unicode script. Lets a librarian
12
+ # answer "which fonts cover Cyrillic?" without re-auditing.
13
+ class ScriptCoverageRow < Lutaml::Model::Serializable
14
+ attribute :script, :string
15
+ attribute :face_count, :integer
16
+ attribute :faces, :string, collection: true, default: -> { [] }
17
+
18
+ key_value do
19
+ map "script", to: :script
20
+ map "face_count", to: :face_count
21
+ map "faces", to: :faces
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Per-script breakdown of OpenType features.
9
+ #
10
+ # Pairs a script tag (e.g. "latn", "kana ") with the GSUB features
11
+ # and GPOS features that apply to it. The two collections are
12
+ # kept separate because substitution and positioning have different
13
+ # semantics — consumers answering "does this font support kerning
14
+ # for Latin?" want to look at GPOS only.
15
+ class ScriptFeatures < Lutaml::Model::Serializable
16
+ attribute :script, :string
17
+ attribute :gsub_features, :string, collection: true, default: -> { [] }
18
+ attribute :gpos_features, :string, collection: true, default: -> { [] }
19
+
20
+ key_value do
21
+ map "script", to: :script
22
+ map "gsub_features", to: :gsub_features
23
+ map "gpos_features", to: :gpos_features
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Per-script coverage rollup on an {AuditReport}.
9
+ #
10
+ # Replaces fontisan's bare `unicode_scripts: String[]` list with
11
+ # structured coverage per Unicode script (Latn, Hani, …). Lets a
12
+ # consumer answer "which scripts does this font cover, and how
13
+ # well?" without re-deriving from raw codepoint lists.
14
+ class ScriptSummary < Lutaml::Model::Serializable
15
+ STATUS_COMPLETE = "COMPLETE"
16
+ STATUS_PARTIAL = "PARTIAL"
17
+ STATUS_UNCOVERED_ASSIGNED = "UNCOVERED_ASSIGNED"
18
+ STATUS_NO_ASSIGNED_IN_SCRIPT = "NO_ASSIGNED_IN_SCRIPT"
19
+
20
+ attribute :script_code, :string
21
+ attribute :script_name, :string
22
+ attribute :blocks_total, :integer
23
+ attribute :assigned_total, :integer
24
+ attribute :covered_total, :integer
25
+ attribute :coverage_percent, :float
26
+ attribute :status, :string
27
+
28
+ key_value do
29
+ map "script_code", to: :script_code
30
+ map "script_name", to: :script_name
31
+ map "blocks_total", to: :blocks_total
32
+ map "assigned_total", to: :assigned_total
33
+ map "covered_total", to: :covered_total
34
+ map "coverage_percent", to: :coverage_percent
35
+ map "status", to: :status
36
+ end
37
+
38
+ # Same enum logic as {BlockSummary.derive_status} minus
39
+ # OUTSIDE_BASELINE (scripts are always inside the baseline).
40
+ #
41
+ # @return [String] one of the STATUS_* constants
42
+ def self.derive_status(covered_total:, assigned_total:)
43
+ return STATUS_NO_ASSIGNED_IN_SCRIPT if assigned_total.zero?
44
+
45
+ case covered_total
46
+ when assigned_total then STATUS_COMPLETE
47
+ when 0 then STATUS_UNCOVERED_ASSIGNED
48
+ else STATUS_PARTIAL
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ module Audit
8
+ # Variable-font detail for one face.
9
+ #
10
+ # Bundles everything fvar-derived (axes + named instances) with the
11
+ # presence flags for every variation side-table (avar/cvar/HVAR/VVAR/
12
+ # MVAR/gvar). A face is variable iff this object is non-nil.
13
+ #
14
+ # `axes` reuses the existing {AuditAxis} shape; `named_instances` is
15
+ # a parallel {NamedInstance} collection. The has_* booleans are
16
+ # presence checks only — they don't validate the table contents.
17
+ class VariationDetail < Lutaml::Model::Serializable
18
+ attribute :axes, AuditAxis, collection: true, default: -> { [] }
19
+ attribute :named_instances, NamedInstance, collection: true, default: -> { [] }
20
+
21
+ # Variation side-table presence flags.
22
+ attribute :has_avar, Lutaml::Model::Type::Boolean # axis variation
23
+ attribute :has_cvar, Lutaml::Model::Type::Boolean # CVT variation
24
+ attribute :has_hvar, Lutaml::Model::Type::Boolean # horizontal metrics
25
+ attribute :has_vvar, Lutaml::Model::Type::Boolean # vertical metrics
26
+ attribute :has_mvar, Lutaml::Model::Type::Boolean # metrics variation
27
+ attribute :has_gvar, Lutaml::Model::Type::Boolean # glyph variation (TT)
28
+
29
+ key_value do
30
+ map "axes", to: :axes
31
+ map "named_instances", to: :named_instances
32
+ map "has_avar", to: :has_avar
33
+ map "has_cvar", to: :has_cvar
34
+ map "has_hvar", to: :has_hvar
35
+ map "has_vvar", to: :has_vvar
36
+ map "has_mvar", to: :has_mvar
37
+ map "has_gvar", to: :has_gvar
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Models
5
+ # Models for the per-face font audit pipeline.
6
+ #
7
+ # MECE with the UCD-side models (`Models::Block`, `Models::Script`,
8
+ # `Models::CodePoint`, …): those are the source-of-truth UCD
9
+ # representation. The classes here are the *audit artifact* shape —
10
+ # coverage summaries, per-face report, diffs, library rollups.
11
+ #
12
+ # Conventions (inherited from `Models`):
13
+ #
14
+ # - Inheritance, not include: `class Foo < Lutaml::Model::Serializable`
15
+ # - Wire shape via `key_value do … end`
16
+ # - Booleans via `Lutaml::Model::Type::Boolean` (not Ruby `:boolean`)
17
+ # - NEVER hand-rolled `to_h` / `from_h`
18
+ module Audit
19
+ # New models (ucode-specific schema, see TODO 02)
20
+ autoload :Baseline, "ucode/models/audit/baseline"
21
+ autoload :BlockSummary, "ucode/models/audit/block_summary"
22
+ autoload :ScriptSummary, "ucode/models/audit/script_summary"
23
+ autoload :PlaneSummary, "ucode/models/audit/plane_summary"
24
+ autoload :Discrepancy, "ucode/models/audit/discrepancy"
25
+ autoload :CodepointDetail, "ucode/models/audit/codepoint_detail"
26
+
27
+ # Ported from fontisan (namespace swap + minor renames)
28
+ autoload :AuditReport, "ucode/models/audit/audit_report"
29
+ autoload :CodepointRange, "ucode/models/audit/codepoint_range"
30
+ autoload :CodepointSetDiff, "ucode/models/audit/codepoint_set_diff"
31
+ autoload :AuditAxis, "ucode/models/audit/audit_axis"
32
+ autoload :NamedInstance, "ucode/models/audit/named_instance"
33
+ autoload :Licensing, "ucode/models/audit/licensing"
34
+ autoload :Metrics, "ucode/models/audit/metrics"
35
+ autoload :Hinting, "ucode/models/audit/hinting"
36
+ autoload :ColorCapabilities, "ucode/models/audit/color_capabilities"
37
+ autoload :VariationDetail, "ucode/models/audit/variation_detail"
38
+ autoload :OpenTypeLayout, "ucode/models/audit/opentype_layout"
39
+ autoload :FsSelectionFlags, "ucode/models/audit/fs_selection_flags"
40
+ autoload :GaspRange, "ucode/models/audit/gasp_range"
41
+ autoload :EmbeddingType, "ucode/models/audit/embedding_type"
42
+ autoload :ScriptCoverageRow, "ucode/models/audit/script_coverage_row"
43
+ autoload :ScriptFeatures, "ucode/models/audit/script_features"
44
+ autoload :FieldChange, "ucode/models/audit/field_change"
45
+ autoload :DuplicateGroup, "ucode/models/audit/duplicate_group"
46
+ autoload :LibrarySummary, "ucode/models/audit/library_summary"
47
+ autoload :AuditDiff, "ucode/models/audit/audit_diff"
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `BidiBrackets.txt`. `type` is `o` (open) or `c` (close).
8
+ class BidiBracketPair < Lutaml::Model::Serializable
9
+ attribute :codepoint, :integer
10
+ attribute :paired_id, :string
11
+ attribute :type, :string
12
+
13
+ key_value do
14
+ map "codepoint", to: :codepoint
15
+ map "paired_id", to: :paired_id
16
+ map "type", to: :type
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `BidiMirroring.txt`. `mirrored_id` is the ID string of
8
+ # the bidi mirroring partner.
9
+ class BidiMirroring < Lutaml::Model::Serializable
10
+ attribute :codepoint, :integer
11
+ attribute :mirrored_id, :string
12
+
13
+ key_value do
14
+ map "codepoint", to: :codepoint
15
+ map "mirrored_id", to: :mirrored_id
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One positive assignment from `DerivedCoreProperties.txt` (or any
8
+ # other binary-property file). The source file only lists codepoints
9
+ # for which the property is *true*; absence implies false.
10
+ #
11
+ # `property_short` carries the property name as written in the file.
12
+ # The Coordinator may resolve it to the long form via PropertyAliases
13
+ # before merging into `CodePoint.binary_properties`.
14
+ class BinaryPropertyAssignment < Lutaml::Model::Serializable
15
+ attribute :codepoint, :integer
16
+ attribute :property_short, :string
17
+ attribute :enabled, :boolean, default: true
18
+
19
+ key_value do
20
+ map "codepoint", to: :codepoint
21
+ map "property_short", to: :property_short
22
+ map "enabled", to: :enabled, render_default: true
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `Blocks.txt`. The `id` is the verbatim value from the
8
+ # file (e.g. `ASCII`, `CJK_Ext_A`, `Greek_And_Coptic`) — used as the
9
+ # folder name and JSON block identifier. NEVER slugified.
10
+ class Block < Lutaml::Model::Serializable
11
+ attribute :id, :string
12
+ attribute :name, :string
13
+ attribute :range_first, :integer
14
+ attribute :range_last, :integer
15
+ attribute :plane_number, :integer
16
+ attribute :codepoint_ids, :string, collection: true, default: -> { [] }
17
+
18
+ key_value do
19
+ map "id", to: :id
20
+ map "name", to: :name
21
+ map "range_first", to: :range_first
22
+ map "range_last", to: :range_last
23
+ map "plane_number", to: :plane_number
24
+ map "codepoint_ids", to: :codepoint_ids
25
+ end
26
+
27
+ def covers?(codepoint)
28
+ codepoint >= range_first && codepoint <= range_last
29
+ end
30
+
31
+ def size
32
+ range_last - range_first + 1
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `CaseFolding.txt`. `status` is one of: C (common),
8
+ # F (full), S (simple), T (turkic).
9
+ class CaseFoldingRule < Lutaml::Model::Serializable
10
+ attribute :codepoint, :integer
11
+ attribute :status, :string
12
+ attribute :mapping_ids, :string, collection: true, default: -> { [] }
13
+ attribute :comment, :string
14
+
15
+ key_value do
16
+ map "codepoint", to: :codepoint
17
+ map "status", to: :status
18
+ map "mapping_ids", to: :mapping_ids
19
+ map "comment", to: :comment
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `CJKRadicals.txt`. Maps a CJK radical number to its
8
+ # radical ideograph and (optionally) its canonical ideograph.
9
+ class CjkRadical < Lutaml::Model::Serializable
10
+ attribute :radical_number, :integer
11
+ attribute :cjk_radical_id, :string
12
+ attribute :ideograph_id, :string
13
+ attribute :canonical_ideograph_id, :string
14
+
15
+ key_value do
16
+ map "radical_number", to: :radical_number
17
+ map "cjk_radical_id", to: :cjk_radical_id
18
+ map "ideograph_id", to: :ideograph_id
19
+ map "canonical_ideograph_id", to: :canonical_ideograph_id
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Bidirectional class + mirroring + bracketing. Mirroring glyph and
9
+ # paired bracket are ID strings ("U+XXXX") — never nested CodePoint
10
+ # objects.
11
+ class Bidi < Lutaml::Model::Serializable
12
+ attribute :bidi_class, :string
13
+ attribute :is_mirrored, :boolean, default: false
14
+ attribute :mirroring_glyph_id, :string
15
+ attribute :paired_bracket_type, :string
16
+ attribute :paired_bracket_id, :string
17
+
18
+ key_value do
19
+ map "class", to: :bidi_class
20
+ map "is_mirrored", to: :is_mirrored
21
+ map "mirroring_glyph_id", to: :mirroring_glyph_id
22
+ map "paired_bracket_type", to: :paired_bracket_type
23
+ map "paired_bracket_id", to: :paired_bracket_id
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Grapheme / Word / Sentence break classification (UAX #29).
9
+ class BreakSegmentation < Lutaml::Model::Serializable
10
+ attribute :grapheme, :string
11
+ attribute :word, :string
12
+ attribute :sentence, :string
13
+
14
+ key_value do
15
+ map "grapheme", to: :grapheme
16
+ map "word", to: :word
17
+ map "sentence", to: :sentence
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Case folding rule from CaseFolding.txt. One row per codepoint,
9
+ # possibly with multiple statuses (C/S/F/T).
10
+ class CaseFolding < Lutaml::Model::Serializable
11
+ attribute :common_id, :string
12
+ attribute :simple_id, :string
13
+ attribute :full_ids, :string, collection: true, default: -> { [] }
14
+ attribute :turkic_id, :string
15
+
16
+ key_value do
17
+ map "common_id", to: :common_id
18
+ map "simple_id", to: :simple_id
19
+ map "full_ids", to: :full_ids
20
+ map "turkic_id", to: :turkic_id
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Simple + full case mappings. `simple_*_id` come from UnicodeData.txt
9
+ # fields 12-14; `full_*_ids` come from SpecialCasing.txt. When the
10
+ # full array is empty, the consumer falls back to the simple field.
11
+ class Casing < Lutaml::Model::Serializable
12
+ attribute :simple_upper_id, :string
13
+ attribute :simple_lower_id, :string
14
+ attribute :simple_title_id, :string
15
+ attribute :full_upper_ids, :string, collection: true, default: -> { [] }
16
+ attribute :full_lower_ids, :string, collection: true, default: -> { [] }
17
+ attribute :full_title_ids, :string, collection: true, default: -> { [] }
18
+ attribute :conditions, :string, collection: true, default: -> { [] }
19
+
20
+ key_value do
21
+ map "simple_upper_id", to: :simple_upper_id
22
+ map "simple_lower_id", to: :simple_lower_id
23
+ map "simple_title_id", to: :simple_title_id
24
+ map "full_upper_ids", to: :full_upper_ids
25
+ map "full_lower_ids", to: :full_lower_ids
26
+ map "full_title_ids", to: :full_title_ids
27
+ map "conditions", to: :conditions
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Decomposition mapping for a codepoint. From UnicodeData.txt fields
9
+ # 5 (type) and 6 (mapping). Type `none` means no decomposition.
10
+ #
11
+ # `codepoint_ids` are the decomposed-into codepoints as ID strings.
12
+ class Decomposition < Lutaml::Model::Serializable
13
+ attribute :type, :string, default: "none"
14
+ attribute :codepoint_ids, :string, collection: true, default: -> { [] }
15
+
16
+ key_value do
17
+ map "type", to: :type
18
+ map "codepoint_ids", to: :codepoint_ids
19
+ end
20
+
21
+ def is_canonical?
22
+ type == "can"
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Display-class sub-model: East Asian Width, Line Break Class,
9
+ # Vertical Orientation. Short codes only — expanded client-side via
10
+ # enums.json.
11
+ class Display < Lutaml::Model::Serializable
12
+ attribute :east_asian_width, :string
13
+ attribute :line_break_class, :string
14
+ attribute :vertical_orientation, :string
15
+
16
+ key_value do
17
+ map "east_asian_width", to: :east_asian_width
18
+ map "line_break_class", to: :line_break_class
19
+ map "vertical_orientation", to: :vertical_orientation
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Emoji property bundle. Each flag corresponds to one Emoji property
9
+ # from `extracted/DerivedBinaryProperties.txt` / emoji-data.txt.
10
+ class Emoji < Lutaml::Model::Serializable
11
+ attribute :is_emoji, :boolean, default: false
12
+ attribute :is_presentation_default, :boolean, default: false
13
+ attribute :is_modifier, :boolean, default: false
14
+ attribute :is_base, :boolean, default: false
15
+ attribute :is_component, :boolean, default: false
16
+ attribute :is_extended_pictographic, :boolean, default: false
17
+
18
+ key_value do
19
+ map "is_emoji", to: :is_emoji
20
+ map "is_presentation_default", to: :is_presentation_default
21
+ map "is_modifier", to: :is_modifier
22
+ map "is_base", to: :is_base
23
+ map "is_component", to: :is_component
24
+ map "is_extended_pictographic", to: :is_extended_pictographic
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Hangul syllable metadata (hst + JSN).
9
+ class HangulSyllable < Lutaml::Model::Serializable
10
+ attribute :type, :string, default: "NA"
11
+ attribute :jamo_short_name, :string
12
+
13
+ key_value do
14
+ map "type", to: :type
15
+ map "jamo_short_name", to: :jamo_short_name
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Identifier-related properties: ID_Start / ID_Continue / XID_Start /
9
+ # XID_Continue, plus status + types from IdentifierStatus.txt and
10
+ # IdentifierType.txt.
11
+ class Identifier < Lutaml::Model::Serializable
12
+ attribute :is_start, :boolean, default: false
13
+ attribute :is_continue, :boolean, default: false
14
+ attribute :xid_start, :boolean, default: false
15
+ attribute :xid_continue, :boolean, default: false
16
+ attribute :status, :string
17
+ attribute :types, :string, collection: true, default: -> { [] }
18
+
19
+ key_value do
20
+ map "is_start", to: :is_start
21
+ map "is_continue", to: :is_continue
22
+ map "xid_start", to: :xid_start
23
+ map "xid_continue", to: :xid_continue
24
+ map "status", to: :status
25
+ map "types", to: :types
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Indic positional + syllabic category (for complex Brahmic shaping).
9
+ class Indic < Lutaml::Model::Serializable
10
+ attribute :syllabic_category, :string
11
+ attribute :positional_category, :string
12
+
13
+ key_value do
14
+ map "syllabic_category", to: :syllabic_category
15
+ map "positional_category", to: :positional_category
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end