ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Arabic shaping: joining type (U/L/R/D/T/C) + joining group.
9
+ class Joining < Lutaml::Model::Serializable
10
+ attribute :type, :string
11
+ attribute :group, :string
12
+
13
+ key_value do
14
+ map "type", to: :type
15
+ map "group", to: :group
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Normalization Quick Check fields (NFC/NFD/NFKC/NFKD QC) plus the
9
+ # derived "Changes_When_*" booleans.
10
+ class Normalization < Lutaml::Model::Serializable
11
+ attribute :nfc_qc, :string, default: "Y"
12
+ attribute :nfd_qc, :boolean, default: true
13
+ attribute :nfkc_qc, :string, default: "Y"
14
+ attribute :nfkd_qc, :boolean, default: true
15
+ attribute :composition_exclusion, :boolean, default: false
16
+ attribute :is_cased, :boolean, default: false
17
+ attribute :changes_when_casefolded, :boolean, default: false
18
+ attribute :changes_when_casemapped, :boolean, default: false
19
+ attribute :changes_when_nfkc_casefolded, :boolean, default: false
20
+
21
+ key_value do
22
+ map "nfc_qc", to: :nfc_qc
23
+ map "nfd_qc", to: :nfd_qc
24
+ map "nfkc_qc", to: :nfkc_qc
25
+ map "nfkd_qc", to: :nfkd_qc
26
+ map "composition_exclusion", to: :composition_exclusion
27
+ map "is_cased", to: :is_cased
28
+ map "changes_when_casefolded", to: :changes_when_casefolded
29
+ map "changes_when_casemapped", to: :changes_when_casemapped
30
+ map "changes_when_nfkc_casefolded", to: :changes_when_nfkc_casefolded
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ class CodePoint < Lutaml::Model::Serializable
8
+ # Numeric value of a codepoint (UnicodeData.txt fields 7+8). Stored
9
+ # as numerator + denominator (Integers) so JSON serialization is
10
+ # exact (1/2, not 0.5). The Rational reconstruction is computed on
11
+ # demand via #to_r.
12
+ class NumericValue < Lutaml::Model::Serializable
13
+ attribute :type, :string, default: "None"
14
+ attribute :numerator, :integer, default: 0
15
+ attribute :denominator, :integer, default: 1
16
+
17
+ key_value do
18
+ map "type", to: :type
19
+ map "numerator", to: :numerator
20
+ map "denominator", to: :denominator
21
+ end
22
+
23
+ def is_decimal?
24
+ type == "de"
25
+ end
26
+
27
+ def to_r
28
+ return Rational(0) if denominator.nil? || denominator.zero?
29
+
30
+ Rational(numerator, denominator)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # Central CodePoint entity. Carries identity, scalar UCD properties,
8
+ # and typed sub-model bundles. Every cross-codepoint reference is a
9
+ # "U+XXXX" string; nested CodePoint objects are forbidden (single
10
+ # source of truth — each codepoint's data lives only in its own
11
+ # folder).
12
+ #
13
+ # Sub-model classes are nested under CodePoint for cohesion (one
14
+ # namespace per concern). They're autoloaded from this file.
15
+ class CodePoint < Lutaml::Model::Serializable
16
+ autoload :Decomposition, "ucode/models/codepoint/decomposition"
17
+ autoload :NumericValue, "ucode/models/codepoint/numeric_value"
18
+ autoload :Casing, "ucode/models/codepoint/casing"
19
+ autoload :CaseFolding, "ucode/models/codepoint/case_folding"
20
+ autoload :Bidi, "ucode/models/codepoint/bidi"
21
+ autoload :Joining, "ucode/models/codepoint/joining"
22
+ autoload :Display, "ucode/models/codepoint/display"
23
+ autoload :BreakSegmentation, "ucode/models/codepoint/break_segmentation"
24
+ autoload :HangulSyllable, "ucode/models/codepoint/hangul"
25
+ autoload :Indic, "ucode/models/codepoint/indic"
26
+ autoload :Emoji, "ucode/models/codepoint/emoji"
27
+ autoload :Identifier, "ucode/models/codepoint/identifier"
28
+ autoload :Normalization, "ucode/models/codepoint/normalization"
29
+
30
+ # Identity + scalar attributes
31
+ attribute :cp, :integer
32
+ attribute :id, :string
33
+ attribute :name, :string
34
+ attribute :name1, :string
35
+ attribute :json_name, :string
36
+ attribute :block_id, :string
37
+ attribute :plane_number, :integer
38
+ attribute :script_code, :string
39
+ attribute :script_extensions, :string, collection: true, default: -> { [] }
40
+ attribute :age, :string
41
+ attribute :general_category, :string
42
+ attribute :combining_class, :integer, default: 0
43
+
44
+ # Sub-model bundles (nullable; present iff data exists)
45
+ attribute :decomposition, Decomposition
46
+ attribute :numeric, NumericValue
47
+ attribute :casing, Casing
48
+ attribute :case_folding, CaseFolding
49
+ attribute :bidi, Bidi
50
+ attribute :joining, Joining
51
+ attribute :display, Display
52
+ attribute :break_segmentation, BreakSegmentation
53
+ attribute :hangul, HangulSyllable
54
+ attribute :indic, Indic
55
+ attribute :emoji, Emoji
56
+ attribute :identifier, Identifier
57
+ attribute :normalization, Normalization
58
+
59
+ # Cross-codepoint relationships — polymorphic; see Relationship.
60
+ attribute :relationships, "Ucode::Models::Relationship",
61
+ collection: true,
62
+ default: -> { [] },
63
+ polymorphic: %w[
64
+ Ucode::Models::Relationship::CrossReference
65
+ Ucode::Models::Relationship::SampleSequence
66
+ Ucode::Models::Relationship::CompatEquiv
67
+ Ucode::Models::Relationship::InformalAlias
68
+ Ucode::Models::Relationship::Footnote
69
+ Ucode::Models::Relationship::VariationSequence
70
+ ]
71
+
72
+ attribute :binary_properties, :string, collection: true, default: -> { [] }
73
+ attribute :standardized_variants, "Ucode::Models::StandardizedVariant",
74
+ collection: true, default: -> { [] }
75
+ attribute :unihan, "Ucode::Models::UnihanEntry"
76
+ attribute :names_list, "Ucode::Models::NamesListEntry"
77
+
78
+ key_value do
79
+ map "codepoint", to: :cp
80
+ map "id", to: :id
81
+ map "name", to: :name
82
+ map "name1", to: :name1
83
+ map "json_name", to: :json_name
84
+ map "block_id", to: :block_id
85
+ map "plane_number", to: :plane_number
86
+ map "script_code", to: :script_code
87
+ map "script_extensions", to: :script_extensions
88
+ map "age", to: :age
89
+ map "general_category", to: :general_category
90
+ map "combining_class", to: :combining_class
91
+ map "decomposition", to: :decomposition
92
+ map "numeric", to: :numeric
93
+ map "casing", to: :casing
94
+ map "case_folding", to: :case_folding
95
+ map "bidi", to: :bidi
96
+ map "joining", to: :joining
97
+ map "display", to: :display
98
+ map "break_segmentation", to: :break_segmentation
99
+ map "hangul", to: :hangul
100
+ map "indic", to: :indic
101
+ map "emoji", to: :emoji
102
+ map "identifier", to: :identifier
103
+ map "normalization", to: :normalization
104
+ map "relationships", to: :relationships, polymorphic: {
105
+ attribute: "kind",
106
+ class_map: {
107
+ "see_also" => "Ucode::Models::Relationship::CrossReference",
108
+ "sample_sequence" => "Ucode::Models::Relationship::SampleSequence",
109
+ "compatibility_equivalent" => "Ucode::Models::Relationship::CompatEquiv",
110
+ "alias" => "Ucode::Models::Relationship::InformalAlias",
111
+ "footnote" => "Ucode::Models::Relationship::Footnote",
112
+ "variation_sequence" => "Ucode::Models::Relationship::VariationSequence",
113
+ },
114
+ }
115
+ map "binary_properties", to: :binary_properties
116
+ map "standardized_variants", to: :standardized_variants
117
+ map "unihan", to: :unihan
118
+ map "names_list", to: :names_list
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `NameAliases.txt`. `type` is one of:
8
+ # correction / control / alternate / figment / abbreviation.
9
+ class NameAlias < Lutaml::Model::Serializable
10
+ attribute :codepoint, :integer
11
+ attribute :text, :string
12
+ attribute :type, :string
13
+
14
+ key_value do
15
+ map "codepoint", to: :codepoint
16
+ map "text", to: :text
17
+ map "type", to: :type
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `NamedSequences.txt`. `codepoint_ids` is the ordered
8
+ # sequence of codepoints that make up the named sequence.
9
+ class NamedSequence < Lutaml::Model::Serializable
10
+ attribute :name, :string
11
+ attribute :codepoint_ids, :string, collection: true, default: -> { [] }
12
+
13
+ key_value do
14
+ map "name", to: :name
15
+ map "codepoint_ids", to: :codepoint_ids
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # A NamesList.txt entry header plus its annotations. Each annotation
8
+ # array holds typed Relationship subclass instances (see Relationship).
9
+ #
10
+ # The Coordinator flattens these into CodePoint.relationships for the
11
+ # primary codepoint; this standalone model is emitted only when a
12
+ # consumer needs the raw, scope-preserved grouping.
13
+ class NamesListEntry < Lutaml::Model::Serializable
14
+ attribute :codepoint, :integer
15
+ attribute :name, :string
16
+ attribute :cross_references, "Ucode::Models::Relationship",
17
+ collection: true, default: -> { [] }
18
+ attribute :sample_sequences, "Ucode::Models::Relationship",
19
+ collection: true, default: -> { [] }
20
+ attribute :compatibility_equivalents, "Ucode::Models::Relationship",
21
+ collection: true, default: -> { [] }
22
+ attribute :informal_aliases, "Ucode::Models::Relationship",
23
+ collection: true, default: -> { [] }
24
+ attribute :footnotes, "Ucode::Models::Relationship",
25
+ collection: true, default: -> { [] }
26
+
27
+ key_value do
28
+ map "codepoint", to: :codepoint
29
+ map "name", to: :name
30
+ map "cross_references", to: :cross_references
31
+ map "sample_sequences", to: :sample_sequences
32
+ map "compatibility_equivalents", to: :compatibility_equivalents
33
+ map "informal_aliases", to: :informal_aliases
34
+ map "footnotes", to: :footnotes
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One of the 17 Unicode planes (BMP through Plane 16).
8
+ #
9
+ # Plane metadata is derived from the codepoint range. Plane pages are
10
+ # pre-rendered; per-codepoint pages are loaded client-side.
11
+ class Plane < Lutaml::Model::Serializable
12
+ attribute :number, :integer
13
+ attribute :name, :string
14
+ attribute :abbrev, :string
15
+ attribute :range_first, :integer
16
+ attribute :range_last, :integer
17
+ attribute :block_ids, :string, collection: true, default: -> { [] }
18
+
19
+ key_value do
20
+ map "number", to: :number
21
+ map "name", to: :name
22
+ map "abbrev", to: :abbrev
23
+ map "range_first", to: :range_first
24
+ map "range_last", to: :range_last
25
+ map "block_ids", to: :block_ids
26
+ end
27
+
28
+ # Canonical short description derived from the codepoint range.
29
+ # Planes 3..13 are the "Surrogate / Private Use / Special" range — kept
30
+ # together under a single display grouping.
31
+ def codepoint_count
32
+ range_last - range_first + 1
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `PropertyAliases.txt`:
8
+ #
9
+ # short_code; long_name; other_alias; other_alias; ...
10
+ #
11
+ # Example: `ccc; Canonical_Combining_Class; ccc`
12
+ class PropertyAlias < Lutaml::Model::Serializable
13
+ attribute :short, :string
14
+ attribute :long, :string
15
+ attribute :other_aliases, :string, collection: true, default: -> { [] }
16
+
17
+ key_value do
18
+ map "short", to: :short
19
+ map "long", to: :long
20
+ map "other_aliases", to: :other_aliases
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `PropertyValueAliases.txt`:
8
+ #
9
+ # property; short_value; long_value; other_alias; ...
10
+ #
11
+ # Example: `gc; Lu; Uppercase_Letter`.
12
+ class PropertyValueAlias < Lutaml::Model::Serializable
13
+ attribute :property, :string
14
+ attribute :short, :string
15
+ attribute :long, :string
16
+ attribute :other_aliases, :string, collection: true, default: -> { [] }
17
+
18
+ key_value do
19
+ map "property", to: :property
20
+ map "short", to: :short
21
+ map "long", to: :long
22
+ map "other_aliases", to: :other_aliases
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/models/relationship"
4
+
5
+ module Ucode
6
+ module Models
7
+ class Relationship < Lutaml::Model::Serializable
8
+ # `≡ U+XXXX note` from NamesList.txt. Compatibility equivalent.
9
+ # Exactly one target.
10
+ class CompatEquiv < Relationship
11
+ KIND = "compatibility_equivalent"
12
+ private_constant :KIND
13
+
14
+ attribute :kind, :string, polymorphic_class: true, default: KIND
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/models/relationship"
4
+
5
+ module Ucode
6
+ module Models
7
+ class Relationship < Lutaml::Model::Serializable
8
+ # `→ U+XXXX note` from NamesList.txt. Always exactly one target.
9
+ class CrossReference < Relationship
10
+ KIND = "see_also"
11
+ private_constant :KIND
12
+
13
+ attribute :kind, :string, polymorphic_class: true, default: KIND
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/models/relationship"
4
+
5
+ module Ucode
6
+ module Models
7
+ class Relationship < Lutaml::Model::Serializable
8
+ # `* footnote text` from NamesList.txt. Targets always empty.
9
+ # `category` carries usage/history/design (future split).
10
+ class Footnote < Relationship
11
+ KIND = "footnote"
12
+ private_constant :KIND
13
+
14
+ attribute :kind, :string, polymorphic_class: true, default: KIND
15
+
16
+ attribute :category, :string
17
+
18
+ key_value do
19
+ map "category", to: :category
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/models/relationship"
4
+
5
+ module Ucode
6
+ module Models
7
+ class Relationship < Lutaml::Model::Serializable
8
+ # `= alias text` from NamesList.txt. Targets is always empty; the
9
+ # alias text lives in `description`.
10
+ class InformalAlias < Relationship
11
+ KIND = "alias"
12
+ private_constant :KIND
13
+
14
+ attribute :kind, :string, polymorphic_class: true, default: KIND
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/models/relationship"
4
+
5
+ module Ucode
6
+ module Models
7
+ class Relationship < Lutaml::Model::Serializable
8
+ # `× U+XXXX U+YYYY note` from NamesList.txt. `target_ids` is the
9
+ # ordered sequence; `rendered_form` is the visual result (optional).
10
+ class SampleSequence < Relationship
11
+ KIND = "sample_sequence"
12
+ private_constant :KIND
13
+
14
+ attribute :kind, :string, polymorphic_class: true, default: KIND
15
+
16
+ attribute :rendered_form, :string
17
+
18
+ key_value do
19
+ map "rendered_form", to: :rendered_form
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/models/relationship"
4
+
5
+ module Ucode
6
+ module Models
7
+ class Relationship < Lutaml::Model::Serializable
8
+ # Variation sequence from StandardizedVariants.txt.
9
+ # `target_ids[0]` is the variation selector; `contexts` carries the
10
+ # shaping contexts.
11
+ class VariationSequence < Relationship
12
+ KIND = "variation_sequence"
13
+ private_constant :KIND
14
+
15
+ attribute :kind, :string, polymorphic_class: true, default: KIND
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # Polymorphic Relationship base. The `kind` attribute is the
8
+ # discriminator that identifies the concrete subclass on the wire.
9
+ #
10
+ # Six concrete subclasses model the six marker types in NamesList.txt
11
+ # plus standardized variants:
12
+ #
13
+ # CrossReference (→ see also)
14
+ # SampleSequence (× typical usage)
15
+ # CompatEquiv (≡ compatibility equivalent)
16
+ # InformalAlias (= informal alias)
17
+ # Footnote (* explanatory note)
18
+ # VariationSequence (from StandardizedVariants.txt)
19
+ #
20
+ # Adding a new relationship kind later is OCP: subclass + autoload + one
21
+ # entry in each polymorphic map. Nothing else changes.
22
+ class Relationship < Lutaml::Model::Serializable
23
+ autoload :CrossReference, "ucode/models/relationship/cross_reference"
24
+ autoload :SampleSequence, "ucode/models/relationship/sample_sequence"
25
+ autoload :CompatEquiv, "ucode/models/relationship/compat_equiv"
26
+ autoload :InformalAlias, "ucode/models/relationship/informal_alias"
27
+ autoload :Footnote, "ucode/models/relationship/footnote"
28
+ autoload :VariationSequence, "ucode/models/relationship/variation_sequence"
29
+
30
+ KIND = "relationship"
31
+ private_constant :KIND
32
+
33
+ attribute :kind, :string, polymorphic_class: true, default: KIND
34
+ attribute :target_ids, :string, collection: true, default: -> { [] }
35
+ attribute :description, :string
36
+ attribute :source, :string
37
+ attribute :contexts, :string, collection: true, default: -> { [] }
38
+
39
+ key_value do
40
+ map "kind", to: :kind,
41
+ polymorphic_map: {
42
+ "see_also" => "Ucode::Models::Relationship::CrossReference",
43
+ "sample_sequence" => "Ucode::Models::Relationship::SampleSequence",
44
+ "compatibility_equivalent" => "Ucode::Models::Relationship::CompatEquiv",
45
+ "alias" => "Ucode::Models::Relationship::InformalAlias",
46
+ "footnote" => "Ucode::Models::Relationship::Footnote",
47
+ "variation_sequence" => "Ucode::Models::Relationship::VariationSequence",
48
+ },
49
+ render_default: true
50
+ map "targets", to: :target_ids
51
+ map "description", to: :description
52
+ map "source", to: :source
53
+ map "contexts", to: :contexts
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One script assignment from `Scripts.txt`. Each row is a single
8
+ # contiguous range; the Coordinator bsearches by `range_first` to
9
+ # find which script covers a given codepoint.
10
+ #
11
+ # Multiple disjoint ranges can share a script name (e.g. `Latin`
12
+ # appears in several ranges). The Repo (TODO 30) groups Script
13
+ # instances by name for the "all Latin codepoints" view; the model
14
+ # here represents one range per instance.
15
+ #
16
+ # `code` is the ISO 15924 4-letter code, resolved by the Coordinator
17
+ # via PropertyValueAliases (property=sc). The parser stores the long
18
+ # `name` only; the Coordinator fills `code`.
19
+ class Script < Lutaml::Model::Serializable
20
+ attribute :code, :string
21
+ attribute :name, :string
22
+ attribute :range_first, :integer
23
+ attribute :range_last, :integer
24
+
25
+ key_value do
26
+ map "code", to: :code
27
+ map "name", to: :name
28
+ map "range_first", to: :range_first
29
+ map "range_last", to: :range_last
30
+ end
31
+
32
+ def covers?(codepoint)
33
+ codepoint >= range_first && codepoint <= range_last
34
+ end
35
+
36
+ def size
37
+ range_last - range_first + 1
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `SpecialCasing.txt`. `conditions` may include context
8
+ # identifiers like `"Final_Sigma"` or `"After_I"`, and locale codes
9
+ # like `"tr"` or `"az"`. Filtering by condition is the consumer's job.
10
+ class SpecialCasingRule < Lutaml::Model::Serializable
11
+ attribute :codepoint, :integer
12
+ attribute :lower_ids, :string, collection: true, default: -> { [] }
13
+ attribute :title_ids, :string, collection: true, default: -> { [] }
14
+ attribute :upper_ids, :string, collection: true, default: -> { [] }
15
+ attribute :conditions, :string, collection: true, default: -> { [] }
16
+ attribute :comment, :string
17
+
18
+ key_value do
19
+ map "codepoint", to: :codepoint
20
+ map "lower_ids", to: :lower_ids
21
+ map "title_ids", to: :title_ids
22
+ map "upper_ids", to: :upper_ids
23
+ map "conditions", to: :conditions
24
+ map "comment", to: :comment
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "lutaml/model"
4
+
5
+ module Ucode
6
+ module Models
7
+ # One row from `StandardizedVariants.txt`. `base_id` + `variation_selector_id`
8
+ # is the key; `description` is the visual result; `contexts` is the
9
+ # shaping contexts (may be empty).
10
+ class StandardizedVariant < Lutaml::Model::Serializable
11
+ attribute :base_id, :string
12
+ attribute :variation_selector_id, :string
13
+ attribute :description, :string
14
+ attribute :contexts, :string, collection: true, default: -> { [] }
15
+
16
+ key_value do
17
+ map "base_id", to: :base_id
18
+ map "variation_selector_id", to: :variation_selector_id
19
+ map "description", to: :description
20
+ map "contexts", to: :contexts
21
+ end
22
+ end
23
+ end
24
+ end