ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
data/lib/ucode/cli.rb ADDED
@@ -0,0 +1,272 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+
5
+ require "ucode/commands"
6
+
7
+ module Ucode
8
+ # Top-level CLI entry.
9
+ #
10
+ # **Thin Thor**: every method delegates to a `Commands::*Command`
11
+ # class and only formats the result. The Command classes are pure
12
+ # and testable in-process — Thor never holds business logic.
13
+ class Cli < Thor
14
+ package_name "ucode"
15
+
16
+ def self.exit_on_failure?
17
+ true
18
+ end
19
+
20
+ # ─────────────── version ───────────────
21
+ desc "version", "Print ucode version"
22
+ def version
23
+ puts "ucode #{Ucode::VERSION}"
24
+ end
25
+
26
+ # ─────────────── fetch ───────────────
27
+ class Fetch < Thor
28
+ desc "ucd [VERSION]", "Download UCD.zip"
29
+ option :force, type: :boolean, default: false, desc: "Re-download even if cached"
30
+ def ucd(version = nil)
31
+ puts format_result Commands::FetchCommand.new.fetch_ucd(version, force: options[:force])
32
+ end
33
+
34
+ desc "unihan [VERSION]", "Download Unihan.zip"
35
+ option :force, type: :boolean, default: false
36
+ def unihan(version = nil)
37
+ puts format_result Commands::FetchCommand.new.fetch_unihan(version, force: options[:force])
38
+ end
39
+
40
+ desc "charts [VERSION]", "Download per-block Code Charts PDFs"
41
+ option :force, type: :boolean, default: false
42
+ option :block, type: :array, desc: "Limit to these block ids"
43
+ def charts(version = nil)
44
+ cps = options[:block]&.map { |id| block_id_to_first_cp(id) }&.compact
45
+ puts format_result Commands::FetchCommand.new
46
+ .fetch_charts(version, block_first_cps: cps, force: options[:force])
47
+ end
48
+
49
+ private
50
+
51
+ def block_id_to_first_cp(id)
52
+ return Integer(id) if id.match?(/\A\d+\z/)
53
+
54
+ warn "Warning: --block=#{id.inspect} cannot be resolved to a first codepoint; skipping"
55
+ nil
56
+ end
57
+
58
+ def format_result(result)
59
+ JSON.pretty_generate(result)
60
+ end
61
+ end
62
+
63
+ desc "fetch", "Download UCD sources"
64
+ subcommand "fetch", Fetch
65
+
66
+ # ─────────────── parse ───────────────
67
+ desc "parse [VERSION]", "Stream UCD → output/"
68
+ option :to, type: :string, default: "./output", desc: "Output directory"
69
+ def parse(version = nil)
70
+ result = Commands::ParseCommand.new.call(version, output_root: options[:to])
71
+ puts JSON.pretty_generate(result)
72
+ end
73
+
74
+ # ─────────────── glyphs ───────────────
75
+ desc "glyphs [VERSION]", "Extract per-codepoint SVGs from Code Charts PDFs (experimental)"
76
+ long_desc <<~LONG
77
+ EXPERIMENTAL in v0.1. The cell extractor currently includes cell-border
78
+ decorations alongside the actual character outline, so the output is not
79
+ yet suitable for end-user display. Opt in with --include-glyphs to run
80
+ the pipeline anyway; otherwise it returns a skipped payload.
81
+ LONG
82
+ option :to, type: :string, default: "./output"
83
+ option :block, type: :array, desc: "Limit to these block ids"
84
+ option :force, type: :boolean, default: false
85
+ option :monolith, type: :string, default: "CodeCharts.pdf",
86
+ desc: "Path to CodeCharts.pdf for fallback slicing"
87
+ option :include_glyphs, type: :boolean, default: false,
88
+ desc: "Opt into the experimental v0.1 pipeline"
89
+ def glyphs(version = nil)
90
+ result = Commands::GlyphsCommand.new.call(
91
+ version,
92
+ output_root: options[:to],
93
+ block_filter: options[:block],
94
+ force: options[:force],
95
+ monolith_path: options[:monolith],
96
+ include_glyphs: options[:include_glyphs],
97
+ warn: $stderr,
98
+ )
99
+ puts JSON.pretty_generate(result)
100
+ end
101
+
102
+ # ─────────────── site ───────────────
103
+ class Site < Thor
104
+ desc "init", "Copy the Vitepress scaffold into site/"
105
+ option :to, type: :string, default: "./site"
106
+ def init
107
+ puts JSON.pretty_generate(Commands::SiteCommand.new.init(site_root: options[:to]))
108
+ end
109
+
110
+ desc "build", "Regenerate site/.vitepress/config.ts + pages from output/"
111
+ option :from, type: :string, default: "./output", desc: "Dataset root"
112
+ option :to, type: :string, default: "./site", desc: "Site root"
113
+ def build
114
+ puts JSON.pretty_generate(
115
+ Commands::SiteCommand.new.build(output_root: options[:from], site_root: options[:to]),
116
+ )
117
+ end
118
+ end
119
+
120
+ desc "site", "Generate the Vitepress site"
121
+ subcommand "site", Site
122
+
123
+ # ─────────────── lookup ───────────────
124
+ class Lookup < Thor
125
+ desc "block CODEPOINT", "Block name covering CODEPOINT (integer or 0xNNNN)"
126
+ option :version, type: :string, default: nil
127
+ def block(codepoint)
128
+ cp = parse_cp(codepoint)
129
+ with_db_handling do
130
+ result = Commands::LookupCommand.new.lookup_block(options[:version], codepoint: cp)
131
+ puts "#{format("U+%04X", cp)} → #{result.block || "(unassigned)"}"
132
+ end
133
+ end
134
+
135
+ desc "script CODEPOINT", "Script name covering CODEPOINT"
136
+ option :version, type: :string, default: nil
137
+ def script(codepoint)
138
+ cp = parse_cp(codepoint)
139
+ with_db_handling do
140
+ result = Commands::LookupCommand.new.lookup_script(options[:version], codepoint: cp)
141
+ puts "#{format("U+%04X", cp)} → #{result.script || "(none)"}"
142
+ end
143
+ end
144
+
145
+ desc "char CODEPOINT", "Block + glyph path for CODEPOINT"
146
+ option :version, type: :string, default: nil
147
+ option :from, type: :string, default: "./output"
148
+ def char(codepoint)
149
+ cp = parse_cp(codepoint)
150
+ with_db_handling do
151
+ result = Commands::LookupCommand.new
152
+ .lookup_char(options[:version], codepoint: cp, output_root: options[:from])
153
+ puts "#{format("U+%04X", cp)} block=#{result.block_id} glyph=#{result.glyph_path}"
154
+ end
155
+ end
156
+
157
+ private
158
+
159
+ def parse_cp(s)
160
+ Integer(s.gsub(/^U\+/i, ""), 16)
161
+ rescue ArgumentError
162
+ raise Thor::Error, "Invalid codepoint: #{s.inspect} (try '0x0041' or 'U+0041')"
163
+ end
164
+
165
+ def with_db_handling
166
+ yield
167
+ rescue Ucode::DatabaseMissingError => e
168
+ version = e.context[:version]
169
+ raise Thor::Error, "No SQLite cache for version #{version.inspect}. " \
170
+ "Run: ucode build #{version} --to ./output"
171
+ end
172
+ end
173
+
174
+ desc "lookup", "Read-only lookups against the SQLite cache"
175
+ subcommand "lookup", Lookup
176
+
177
+ # ─────────────── cache ───────────────
178
+ class Cache < Thor
179
+ desc "list", "List cached UCD versions"
180
+ def list
181
+ Commands::CacheCommand.new.list.each { |v| puts v }
182
+ end
183
+
184
+ desc "info VERSION", "Show what's cached for VERSION"
185
+ def info(version)
186
+ result = Commands::CacheCommand.new.info(version)
187
+ if result.nil?
188
+ puts "Nothing cached for #{version}"
189
+ else
190
+ puts JSON.pretty_generate(result.to_h)
191
+ end
192
+ end
193
+
194
+ desc "remove VERSION", "Remove VERSION from the cache"
195
+ def remove(version)
196
+ ok = Commands::CacheCommand.new.remove(version)
197
+ puts(ok ? "Removed #{version}" : "#{version} not in cache")
198
+ end
199
+ end
200
+
201
+ desc "cache", "Inspect and manage the cache"
202
+ subcommand "cache", Cache
203
+
204
+ # ─────────────── build ───────────────
205
+ desc "build [VERSION]", "Full pipeline: fetch + parse + (optional) glyphs + site"
206
+ option :to, type: :string, default: "./output"
207
+ option :site, type: :string, default: nil, desc: "Build the site here (skipped if nil)"
208
+ option :monolith, type: :string, default: "CodeCharts.pdf"
209
+ option :force_fetch, type: :boolean, default: false
210
+ option :include_glyphs, type: :boolean, default: false,
211
+ desc: "Opt into the experimental v0.1 glyph step"
212
+ def build(version = nil)
213
+ result = Commands::BuildCommand.new.call(
214
+ version,
215
+ output_root: options[:to],
216
+ site_root: options[:site],
217
+ monolith_path: options[:monolith],
218
+ force_fetch: options[:force_fetch],
219
+ include_glyphs: options[:include_glyphs],
220
+ warn: $stderr,
221
+ )
222
+ puts JSON.pretty_generate(result)
223
+ end
224
+
225
+ # ─────────────── font-coverage ───────────────
226
+ desc "font-coverage FONT [FONT...]", "Audit Unicode 17 block coverage for one or more fonts"
227
+ long_desc <<~LONG
228
+ Each FONT argument is either a fontist formula name (resolved via
229
+ `Fontist::Font.find` then `install`) or `label=/path/to/font.ttf`
230
+ (uses the local file directly). For every font, walks the cmap via
231
+ fontisan and emits per-Unicode-17-block coverage to
232
+ `<to>/font_coverage/<label>.json`.
233
+
234
+ Examples:
235
+
236
+ ucode font-coverage Lentariso=/tmp/lentariso/TTFs/Lentariso-Re.ttf \\
237
+ Kedebideri=/tmp/kedebideri/Kedebideri-3.001/Kedebideri-Regular.ttf
238
+
239
+ ucode font-coverage Kedebideri # resolves + installs via fontist
240
+ LONG
241
+ option :to, type: :string, default: "./output"
242
+ option :no_install, type: :boolean, default: false,
243
+ desc: "Don't auto-install missing fonts via fontist"
244
+ def font_coverage(*fonts)
245
+ raise Thor::Error, "Provide at least one font" if fonts.empty?
246
+
247
+ results = Commands::FontCoverageCommand.new.call(
248
+ fonts,
249
+ output_root: options[:to],
250
+ install: !options[:no_install],
251
+ )
252
+ puts JSON.pretty_generate(results.map { |r| result_to_h(r) })
253
+ end
254
+
255
+ private
256
+
257
+ def result_to_h(result)
258
+ if result.error
259
+ { spec: result.spec, error: result.error }
260
+ else
261
+ {
262
+ spec: result.spec,
263
+ label: result.located.name,
264
+ source: result.located.path.to_s,
265
+ via: result.located.via,
266
+ output_path: result.output_path.to_s,
267
+ complete_blocks: result.complete_blocks,
268
+ }
269
+ end
270
+ end
271
+ end
272
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/commands"
6
+ require "ucode/version_resolver"
7
+
8
+ module Ucode
9
+ module Commands
10
+ # `ucode build` — full pipeline: fetch (ucd + unihan + charts) →
11
+ # parse → (optional) glyphs → (optional) site. Resumable: each step
12
+ # is idempotent and safe to re-run.
13
+ #
14
+ # **Glyph step is opt-in as of v0.1** because the SVG cell extractor
15
+ # is still experimental. Pass `include_glyphs: true` to enable it;
16
+ # otherwise the glyphs step is recorded as skipped.
17
+ class BuildCommand
18
+ # @param version_intent [nil, :default, :latest, String]
19
+ # @param output_root [String, Pathname]
20
+ # @param site_root [String, Pathname, nil] if nil, skip site build
21
+ # @param monolith_path [String, Pathname, nil] CodeCharts.pdf fallback
22
+ # @param force_fetch [Boolean] re-download sources
23
+ # @param include_glyphs [Boolean] opt into the experimental glyph
24
+ # step (default false)
25
+ # @param warn [IO, nil] forwarded to GlyphsCommand when enabled
26
+ # @return [Hash] aggregated step results
27
+ def call(version_intent, output_root:, site_root: nil,
28
+ monolith_path: nil, force_fetch: false,
29
+ include_glyphs: false, warn: nil)
30
+ version = VersionResolver.resolve(version_intent)
31
+ steps = {}
32
+
33
+ steps[:fetch] = run_fetch(version, force: force_fetch)
34
+ steps[:parse] = ParseCommand.new.call(version, output_root: output_root)
35
+ steps[:glyphs] = run_glyphs(version, output_root, monolith_path,
36
+ include_glyphs: include_glyphs, warn: warn)
37
+ steps[:site] = run_site(output_root, site_root) if site_root
38
+
39
+ { version: version, steps: steps }
40
+ end
41
+
42
+ private
43
+
44
+ def run_fetch(version, force:)
45
+ fetch = FetchCommand.new
46
+ {
47
+ ucd: fetch.fetch_ucd(version, force: force),
48
+ unihan: fetch.fetch_unihan(version, force: force),
49
+ charts: fetch.fetch_charts(version, force: force),
50
+ }
51
+ end
52
+
53
+ def run_glyphs(version, output_root, monolith_path, include_glyphs:, warn:)
54
+ GlyphsCommand.new.call(
55
+ version,
56
+ output_root: output_root,
57
+ monolith_path: monolith_path || "CodeCharts.pdf",
58
+ include_glyphs: include_glyphs,
59
+ warn: warn,
60
+ )
61
+ end
62
+
63
+ def run_site(output_root, site_root)
64
+ SiteCommand.new.build(output_root: output_root, site_root: site_root)
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/cache"
6
+
7
+ module Ucode
8
+ module Commands
9
+ # `ucode cache` — inspect and manage the on-disk cache.
10
+ # Three subactions: list, info, remove.
11
+ class CacheCommand
12
+ VersionInfo = Struct.new(:version, :path, :has_ucd, :has_unihan,
13
+ :has_pdfs, :has_sqlite, keyword_init: true)
14
+ private_constant :VersionInfo
15
+
16
+ # @return [Array<String>] sorted versions present in the cache
17
+ def list
18
+ Cache.cached_versions
19
+ end
20
+
21
+ # @param version [String]
22
+ # @return [VersionInfo, nil] nil if version not in cache
23
+ def info(version)
24
+ return nil unless Cache.cached?(version)
25
+
26
+ VersionInfo.new(
27
+ version: version,
28
+ path: Cache.version_dir(version),
29
+ has_ucd: Cache.ucd_dir(version).join("UnicodeData.txt").exist?,
30
+ has_unihan: Cache.unihan_dir(version).children.any?,
31
+ has_pdfs: Cache.pdfs_dir(version).children.any?,
32
+ has_sqlite: Cache.sqlite_path(version).exist?,
33
+ )
34
+ end
35
+
36
+ # @param version [String]
37
+ # @return [Boolean] true if a directory was removed
38
+ def remove(version)
39
+ return false unless Cache.cached?(version)
40
+
41
+ Cache.remove_version(version)
42
+ true
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/cache"
6
+ require "ucode/fetch"
7
+ require "ucode/version_resolver"
8
+
9
+ module Ucode
10
+ module Commands
11
+ # `ucode fetch` — downloads UCD/Unihan/Code-Charts sources into the
12
+ # per-version cache. Three subactions: ucd, unihan, charts.
13
+ #
14
+ # Thin shell over `Ucode::Fetch::*`. The command layer's job is to
15
+ # resolve the version intent and format the result; the fetcher does
16
+ # the network I/O.
17
+ class FetchCommand
18
+ # @param version_intent [nil, :default, :latest, String]
19
+ # @param force [Boolean]
20
+ # @return [Hash] { version:, ucd_dir: }
21
+ def fetch_ucd(version_intent, force: false)
22
+ version = VersionResolver.resolve(version_intent)
23
+ Cache.ensure_version_dir!(version)
24
+ path = Fetch::UcdZip.call(version, force: force)
25
+ { version: version, ucd_dir: path }
26
+ end
27
+
28
+ # @param version_intent [nil, :default, :latest, String]
29
+ # @param force [Boolean]
30
+ # @return [Hash] { version:, unihan_dir: }
31
+ def fetch_unihan(version_intent, force: false)
32
+ version = VersionResolver.resolve(version_intent)
33
+ Cache.ensure_version_dir!(version)
34
+ path = Fetch::UnihanZip.call(version, force: force)
35
+ { version: version, unihan_dir: path }
36
+ end
37
+
38
+ # @param version_intent [nil, :default, :latest, String]
39
+ # @param block_first_cps [Array<Integer>, nil] nil = all known blocks
40
+ # @param force [Boolean]
41
+ # @return [Hash] { version:, downloaded: }
42
+ def fetch_charts(version_intent, block_first_cps: nil, force: false)
43
+ version = VersionResolver.resolve(version_intent)
44
+ Cache.ensure_version_dir!(version)
45
+
46
+ cps = block_first_cps || default_block_first_cps(version)
47
+ count = Fetch::CodeCharts.call(version, block_first_cps: cps, force: force)
48
+ { version: version, downloaded: count }
49
+ end
50
+
51
+ private
52
+
53
+ def default_block_first_cps(version)
54
+ ucd_dir = Cache.ucd_dir(version)
55
+ blocks_file = ucd_dir.join("Blocks.txt")
56
+ return [] unless blocks_file.exist?
57
+
58
+ Parsers::Blocks.each_record(blocks_file).map(&:range_first)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "json"
5
+
6
+ require "ucode/glyphs/real_fonts"
7
+
8
+ module Ucode
9
+ module Commands
10
+ # `ucode font-coverage` — audit named fonts against the Unicode
11
+ # 17 new-blocks table and emit per-font JSON coverage reports
12
+ # under `output/font_coverage/`.
13
+ #
14
+ # Each font spec on the CLI is either a bare name (resolved via
15
+ # `Fontist::Font.find` then `install`) or `label=/path/to/font.ttf`
16
+ # (uses the local file directly). The label is what shows up in
17
+ # the audit JSON; the path is what gets audited.
18
+ class FontCoverageCommand
19
+ Result = Struct.new(:spec, :located, :output_path, :covered_blocks,
20
+ :complete_blocks, :error, keyword_init: true)
21
+ private_constant :Result
22
+
23
+ # @param specs [Array<String>] font specs (see file docs).
24
+ # @param output_root [Pathname, String] parent directory.
25
+ # @param install [Boolean] allow fontist install on miss.
26
+ # @return [Array<Result>]
27
+ def call(specs, output_root:, install: true)
28
+ locator = Ucode::Glyphs::RealFonts::FontLocator.new
29
+ auditor = Ucode::Glyphs::RealFonts::CoverageAuditor.new
30
+ writer = Ucode::Glyphs::RealFonts::Writer.new(output_root)
31
+
32
+ specs.map do |spec|
33
+ audit_one(spec, locator, auditor, writer, install: install)
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def audit_one(spec, locator, auditor, writer, install:)
40
+ located = locator.locate(spec, install: install)
41
+ report = auditor.audit(located.path)
42
+ path = writer.write(report)
43
+ Result.new(spec: spec, located: located, output_path: path,
44
+ **summary_kwargs(report))
45
+ rescue StandardError => e
46
+ Result.new(spec: spec, error: "#{e.class}: #{e.message}")
47
+ end
48
+
49
+ def summary_kwargs(report)
50
+ {
51
+ covered_blocks: report.blocks.count { |b| b.covered.positive? },
52
+ complete_blocks: report.blocks.count(&:complete?),
53
+ }
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "set"
5
+
6
+ require "ucode/cache"
7
+ require "ucode/glyphs"
8
+ require "ucode/parsers"
9
+ require "ucode/version_resolver"
10
+
11
+ module Ucode
12
+ module Commands
13
+ # `ucode glyphs` — extract per-codepoint SVGs from Code Charts PDFs.
14
+ # Builds block specs from the cached Blocks.txt + per-block PDFs (or
15
+ # monolith fallback), then drains them through the Glyphs::Writer
16
+ # worker pool.
17
+ #
18
+ # **Status (v0.1): EXPERIMENTAL.** The cell-extraction pipeline
19
+ # currently includes cell-border decorations alongside the actual
20
+ # character outline because the Code Charts PDFs composite the two
21
+ # into a single glyph definition. The output is therefore not yet
22
+ # suitable for end-user display. The command is retained so the
23
+ # pipeline can be iterated on without churning the CLI surface, but
24
+ # callers MUST opt in via `include_glyphs: true` (CLI: `--include-glyphs`)
25
+ # and will receive a printed warning. Tracked for v0.2.
26
+ class GlyphsCommand
27
+ ExperimentalWarning = "ucode glyphs is experimental in v0.1: " \
28
+ "extracted SVGs include cell-border decorations " \
29
+ "alongside the character outline."
30
+ private_constant :ExperimentalWarning
31
+
32
+ MonolithPath = "CodeCharts.pdf"
33
+ PageMapCache = "data/codecharts_page_map.json"
34
+ private_constant :MonolithPath, :PageMapCache
35
+
36
+ class << self
37
+ # @return [String] the experimental-status banner. Exposed so the
38
+ # CLI and BuildCommand surface the same message verbatim.
39
+ def experimental_warning
40
+ ExperimentalWarning
41
+ end
42
+ end
43
+
44
+ # @param version_intent [nil, :default, :latest, String]
45
+ # @param output_root [String, Pathname]
46
+ # @param block_filter [Array<String>, nil] block ids to limit to;
47
+ # nil = every block
48
+ # @param force [Boolean] re-fetch PDFs even when cached
49
+ # @param monolith_path [String, Pathname, nil] path to CodeCharts.pdf
50
+ # for fallback slicing; defaults to ./CodeCharts.pdf
51
+ # @param include_glyphs [Boolean] opt-in for the experimental v0.1
52
+ # pipeline. When false (default), the command returns a `skipped`
53
+ # payload without touching disk.
54
+ # @param warn [IO, nil] when provided, the experimental warning is
55
+ # written here exactly once before work begins.
56
+ # @return [Hash] aggregated Writer tally + version, or a `skipped`
57
+ # payload when opt-in is false.
58
+ def call(version_intent, output_root:,
59
+ block_filter: nil, force: false, monolith_path: MonolithPath,
60
+ include_glyphs: false, warn: nil)
61
+ return skipped(version_intent) unless include_glyphs
62
+
63
+ warn&.puts(ExperimentalWarning)
64
+ version = VersionResolver.resolve(version_intent)
65
+ root = Pathname.new(output_root)
66
+
67
+ blocks = load_blocks(version, block_filter)
68
+ fetcher = build_fetcher(version, monolith_path, blocks)
69
+ specs = blocks.map { |block| spec_for(block, fetcher, force) }.compact
70
+
71
+ writer = Glyphs::Writer.new(output_root: root,
72
+ parallel_workers: workers)
73
+ tally = writer.write_all(specs)
74
+ tally.merge(version: version, block_count: specs.size)
75
+ end
76
+
77
+ private
78
+
79
+ def load_blocks(version, block_filter)
80
+ ucd_dir = Cache.ucd_dir(version)
81
+ path = ucd_dir.join("Blocks.txt")
82
+ return [] unless path.exist?
83
+
84
+ all = Parsers::Blocks.each_record(path).to_a
85
+ return all unless block_filter && !block_filter.empty?
86
+
87
+ filter_set = block_filter.to_set
88
+ all.select { |block| filter_set.include?(block.id) }
89
+ end
90
+
91
+ def build_fetcher(version, monolith_path, blocks)
92
+ monolith = Pathname.new(monolith_path)
93
+ monolith = monolith.exist? ? monolith : nil
94
+ Glyphs::PdfFetcher.new(
95
+ version,
96
+ monolith_path: monolith,
97
+ blocks: blocks,
98
+ page_map_cache: PageMapCache,
99
+ )
100
+ end
101
+
102
+ def spec_for(block, fetcher, force)
103
+ pdf_path = fetcher.fetch(block_first_cp: block.range_first, force: force)
104
+ return nil unless pdf_path
105
+
106
+ { block: block, pdf_path: pdf_path, page_map: page_map_for(block) }
107
+ end
108
+
109
+ # Heuristic page map: per-block PDFs are page 1 = title, page 2 =
110
+ # first chart page starting at the block's first codepoint. True for
111
+ # most BMP blocks; multi-page blocks (CJK) need a richer resolver.
112
+ # Mismatches yield placeholder SVGs only — never wrong glyphs.
113
+ def page_map_for(block)
114
+ { 2 => block.range_first }
115
+ end
116
+
117
+ def workers
118
+ Ucode.configuration.parallel_workers
119
+ end
120
+
121
+ def skipped(version_intent)
122
+ version = begin
123
+ VersionResolver.resolve(version_intent)
124
+ rescue UnknownVersionError
125
+ version_intent
126
+ end
127
+ {
128
+ version: version,
129
+ skipped: true,
130
+ reason: :experimental_v0_1,
131
+ warning: ExperimentalWarning,
132
+ }
133
+ end
134
+ end
135
+ end
136
+ end