ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
@@ -0,0 +1,247 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/glyphs/last_resort/glif"
4
+
5
+ module Ucode
6
+ module Glyphs
7
+ module LastResort
8
+ # Converts a {Glif::Outline} into a standalone SVG document.
9
+ #
10
+ # Two transforms are applied:
11
+ #
12
+ # 1. **Y-axis flip.** UFO point y grows upward (PostScript
13
+ # convention); SVG y grows downward. We reflect y about the
14
+ # glyph's vertical midpoint so the rendered glyph appears
15
+ # upright.
16
+ #
17
+ # 2. **ViewBox normalization.** The viewBox is set to the
18
+ # outline's bounding box, with a small padding so strokes
19
+ # are not clipped at the edges. The `width`/`height`
20
+ # attributes match the viewBox aspect ratio so consumers
21
+ # can scale via CSS.
22
+ #
23
+ # Path data semantics:
24
+ #
25
+ # * `move` → `M x y`
26
+ # * `line` → `L x y`
27
+ # * `curve` → `C cx1 cy1 cx2 cy2 x y` (cubic; preceding 1–2
28
+ # off-curve points are control points)
29
+ # * `qcurve` → `Q cx cy x y` (quadratic; ≥1 preceding off-curve
30
+ # points; multiple off-curves are emitted as chained
31
+ # quadratic segments with implicit on-curve midpoints
32
+ # per the UFO spec)
33
+ #
34
+ # Contours are closed with `Z` per UFO convention.
35
+ class Svg
36
+ # Padding ratio applied around the glyph bbox for the viewBox.
37
+ PaddingRatio = 0.08
38
+ private_constant :PaddingRatio
39
+
40
+ # @param outline [Glif::Outline]
41
+ # @param codepoint [Integer, nil] optional codepoint for the
42
+ # `<title>` element (accessibility + debugging)
43
+ def initialize(outline, codepoint: nil)
44
+ @outline = outline
45
+ @codepoint = codepoint
46
+ end
47
+
48
+ # @return [String] complete `<svg>...</svg>` document
49
+ def to_s
50
+ box = view_box
51
+ lines = []
52
+ lines << %(<svg xmlns="http://www.w3.org/2000/svg" viewBox="#{format_dims(box)}" width="#{format_num(box[:width])}" height="#{format_num(box[:height])}" preserveAspectRatio="xMidYMid meet">)
53
+ lines << %( <title>U+#{format("%04X", @codepoint)} (Last Resort)</title>) if @codepoint
54
+ lines << %( <path d="#{path_data.strip}" fill="currentColor" fill-rule="evenodd"/>)
55
+ lines << %(</svg>)
56
+ %(<?xml version="1.0" encoding="UTF-8"?>\n#{lines.join("\n")}\n)
57
+ end
58
+
59
+ # Just the path `d` attribute — exposed for tests and for callers
60
+ # that want to embed the path inside their own SVG wrapper.
61
+ #
62
+ # @return [String]
63
+ def path_data
64
+ @outline.contours.map { |contour| PathBuilder.new(contour.points).to_path }.join(" ")
65
+ end
66
+
67
+ private
68
+
69
+ def format_dims(box)
70
+ format("%<min_x>.2f %<min_y>.2f %<width>.2f %<height>.2f", box)
71
+ end
72
+
73
+ # Build the SVG viewBox with padding around the glyph bbox.
74
+ # Returns min_x, min_y, width, height — already y-flipped so
75
+ # min_y is the top edge in SVG space.
76
+ def view_box
77
+ bbox = @outline.bbox
78
+ if bbox.nil?
79
+ return { min_x: 0, min_y: 0, width: 1, height: 1 }
80
+ end
81
+
82
+ min_x = bbox[:min_x].to_f
83
+ max_x = bbox[:max_x].to_f
84
+ min_y = bbox[:min_y].to_f
85
+ max_y = bbox[:max_y].to_f
86
+ width = (max_x - min_x).nonzero? || 1.0
87
+ height = (max_y - min_y).nonzero? || 1.0
88
+ pad_x = width * PaddingRatio
89
+ pad_y = height * PaddingRatio
90
+ {
91
+ min_x: min_x - pad_x,
92
+ min_y: -(max_y + pad_y),
93
+ width: width + (2 * pad_x),
94
+ height: height + (2 * pad_y),
95
+ }
96
+ end
97
+
98
+ def format_num(n)
99
+ if n.to_f == n.to_i
100
+ n.to_i.to_s
101
+ else
102
+ format("%.2f", n)
103
+ end
104
+ end
105
+ end
106
+
107
+ # Internal helper: walks a contour's points and emits SVG path
108
+ # commands per the UFO point-type rules.
109
+ #
110
+ # Contour-start handling: the first on-curve point we encounter
111
+ # becomes the implicit `M` target. We do NOT also emit `L`/`C`/
112
+ # `Q` for it — that would draw a degenerate zero-length segment.
113
+ # Subsequent on-curve points emit their proper command.
114
+ class PathBuilder
115
+ def initialize(points)
116
+ @points = points
117
+ @out = +""
118
+ @i = 0
119
+ @pending_offcurve = []
120
+ @last_oncurve = nil
121
+ @started = false
122
+ end
123
+
124
+ def to_path
125
+ until @i >= @points.length
126
+ point = @points[@i]
127
+ case point.kind
128
+ when :offcurve then consume_offcurve(point)
129
+ when :move then emit_move(point)
130
+ when :line then emit_line(point)
131
+ when :curve then emit_curve(point)
132
+ when :qcurve then emit_qcurve(point)
133
+ end
134
+ @i += 1
135
+ end
136
+ flush_trailing_offcurve
137
+ append_close
138
+ @out.strip
139
+ end
140
+
141
+ private
142
+
143
+ def consume_offcurve(point)
144
+ @pending_offcurve << point
145
+ end
146
+
147
+ def emit_move(point)
148
+ @out << "M #{flip_xy(point)} "
149
+ @last_oncurve = point
150
+ @started = true
151
+ end
152
+
153
+ def emit_line(point)
154
+ return start_contour(point) unless @started
155
+
156
+ @out << "L #{flip_xy(point)} "
157
+ @last_oncurve = point
158
+ end
159
+
160
+ def emit_curve(point)
161
+ return start_contour(point) unless @started
162
+
163
+ c1 = @pending_offcurve[0] || point
164
+ c2 = @pending_offcurve[1] || point
165
+ @out << "C #{flip_xy(c1)} #{flip_xy(c2)} #{flip_xy(point)} "
166
+ @pending_offcurve.clear
167
+ @last_oncurve = point
168
+ end
169
+
170
+ def emit_qcurve(point)
171
+ return start_contour(point) unless @started
172
+
173
+ if @pending_offcurve.length == 1
174
+ ctrl = @pending_offcurve[0]
175
+ @out << "Q #{flip_xy(ctrl)} #{flip_xy(point)} "
176
+ else
177
+ emit_qcurve_chain(@pending_offcurve, point)
178
+ end
179
+ @pending_offcurve.clear
180
+ @last_oncurve = point
181
+ end
182
+
183
+ # When a contour's first point is not an explicit `move`, the
184
+ # first on-curve point we hit (curve/line/qcurve) is the
185
+ # implicit start. Emit just `M` for it; any pending off-curves
186
+ # are wrap-around controls that flush via {flush_trailing_offcurve}.
187
+ def start_contour(point)
188
+ @out << "M #{flip_xy(point)} "
189
+ @last_oncurve = point
190
+ @started = true
191
+ end
192
+
193
+ def emit_qcurve_chain(controls, terminal)
194
+ controls.each_with_index do |ctrl, idx|
195
+ next_ctrl = controls[idx + 1]
196
+ if next_ctrl.nil?
197
+ @out << "Q #{flip_xy(ctrl)} #{flip_xy(terminal)} "
198
+ else
199
+ mid_x = (ctrl.x + next_ctrl.x) / 2.0
200
+ mid_y = (ctrl.y + next_ctrl.y) / 2.0
201
+ @out << "Q #{flip_xy(ctrl)} #{flip_xy_struct(mid_x, mid_y)} "
202
+ end
203
+ end
204
+ end
205
+
206
+ def append_close
207
+ @out << "Z"
208
+ end
209
+
210
+ # UFO contours are implicitly closed. If off-curve points
211
+ # remain unflushed at the end of the contour, they are the
212
+ # wrap-around control points leading back to the contour's
213
+ # first on-curve point. Emit them as a final curve to that
214
+ # start point; the closing `Z` then completes the geometry.
215
+ def flush_trailing_offcurve
216
+ return if @pending_offcurve.empty? || @last_oncurve.nil?
217
+
218
+ endpoint = @last_oncurve
219
+ if @pending_offcurve.length == 1
220
+ @out << "Q #{flip_xy(@pending_offcurve[0])} #{flip_xy(endpoint)} "
221
+ else
222
+ emit_qcurve_chain(@pending_offcurve, endpoint)
223
+ end
224
+ end
225
+
226
+ def flip_xy(point)
227
+ flip_xy_struct(point.x, point.y)
228
+ end
229
+
230
+ # UFO y grows up; SVG y grows down. We negate y — the viewBox
231
+ # translation handles the vertical offset so the glyph appears
232
+ # upright in user space.
233
+ def flip_xy_struct(x, y)
234
+ "#{format_num(x)} #{format_num(-y)}"
235
+ end
236
+
237
+ def format_num(n)
238
+ if n.is_a?(Integer) || n.to_f == n.to_i
239
+ n.to_i.to_s
240
+ else
241
+ format("%.2f", n)
242
+ end
243
+ end
244
+ end
245
+ end
246
+ end
247
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/glyphs/last_resort/renderer"
6
+ require "ucode/glyphs/last_resort/source"
7
+ require "ucode/repo/atomic_writes"
8
+ require "ucode/repo/paths"
9
+
10
+ module Ucode
11
+ module Glyphs
12
+ module LastResort
13
+ # Writes one `glyph.svg` per codepoint in `codepoints`, sourcing
14
+ # the outline from the Last Resort UFO.
15
+ #
16
+ # Single Renderer instance shared across the loop, so the parsed
17
+ # cmap and contents.plist are paid for once.
18
+ #
19
+ # **Idempotent**: re-runs are no-ops via `Repo::AtomicWrites`
20
+ # (byte comparison; same content is skipped). Safe to re-run on
21
+ # the whole output tree.
22
+ #
23
+ # **Atomic**: writes go through `<path>.tmp` + rename. A crash
24
+ # mid-write leaves either the old file or no file.
25
+ #
26
+ # Block membership is the caller's responsibility — the Writer
27
+ # doesn't gate codepoints by assigned/unassigned. Last Resort
28
+ # placeholders exist for every codepoint in the cmap, including
29
+ # assigned ones, but the v0.2 pipeline only writes Last Resort
30
+ # SVGs for codepoints whose chart cell shows a placeholder box
31
+ # (see README "two pillars").
32
+ class Writer
33
+ include Repo::AtomicWrites
34
+
35
+ # @param output_root [String, Pathname]
36
+ # @param source [Source]
37
+ def initialize(output_root:, source:)
38
+ @output_root = Pathname.new(output_root)
39
+ @source = source
40
+ @renderer = Renderer.new(source)
41
+ end
42
+
43
+ # Write `glyph.svg` for every codepoint in `codepoints` whose
44
+ # block is known, using the Last Resort outline.
45
+ #
46
+ # @param codepoints [Array<Integer>, Enumerable<Integer>]
47
+ # @param block_lookup [Proc, #call] codepoint → block id string
48
+ # (e.g. `"Basic_Latin"`). Returns nil for codepoints without
49
+ # a block; those are skipped.
50
+ # @return [Hash] tally `{ written:, skipped:, missing:, total: }`
51
+ def write_many(codepoints, block_lookup:)
52
+ tally = { written: 0, skipped: 0, missing: 0, total: 0 }
53
+ codepoints.each do |cp|
54
+ tally[:total] += 1
55
+ block_id = block_lookup.call(cp)
56
+ if block_id.nil?
57
+ tally[:missing] += 1
58
+ next
59
+ end
60
+
61
+ result = @renderer.render(cp)
62
+ if result.nil? || !result.ok?
63
+ tally[:missing] += 1
64
+ next
65
+ end
66
+
67
+ written = write_glyph(block_id, cp, result.svg)
68
+ tally[written ? :written : :skipped] += 1
69
+ end
70
+ tally
71
+ end
72
+
73
+ private
74
+
75
+ def write_glyph(block_id, codepoint, svg)
76
+ cp_id = Repo::Paths.cp_id(codepoint)
77
+ path = Repo::Paths.codepoint_glyph_path(@output_root, block_id, cp_id)
78
+ write_atomic(path, svg)
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Glyphs
5
+ # Last Resort Font integration — pillar 2 of the v0.2 glyph strategy.
6
+ #
7
+ # For codepoints whose Code Charts cell shows a placeholder box
8
+ # (unassigned, noncharacter, PUA), the chart glyph is drawn from
9
+ # Unicode's Last Resort Font. The Last Resort Font ships as a UFO
10
+ # source with two parts that matter to us:
11
+ #
12
+ # * `cmap-f13.ttx` — a Format 13 `cmap` that maps every codepoint
13
+ # (0x0..0x10FFFF) to a placeholder glyph name. 1,114,112 entries.
14
+ # * `font.ufo/glyphs/*.glif` — 380 outline files, one per Unicode
15
+ # block + a handful of special types (`notdef`,
16
+ # `notdefplanezero`, the noncharacter / unassigned planes, …).
17
+ # * `font.ufo/glyphs/contents.plist` — glyph name → `.glif` file.
18
+ #
19
+ # The pipeline is read-only and stateless: cmap (cp → name) →
20
+ # contents (name → file) → glif (file → outline) → svg (outline →
21
+ # SVG document). No PDF parsing, no cell extraction, no border
22
+ # compositing — the placeholder outline is exactly what the Code
23
+ # Charts display.
24
+ #
25
+ # See {Source} for how to locate the UFO on disk.
26
+ module LastResort
27
+ autoload :Source, "ucode/glyphs/last_resort/source"
28
+ autoload :CmapIndex, "ucode/glyphs/last_resort/cmap_index"
29
+ autoload :Contents, "ucode/glyphs/last_resort/contents"
30
+ autoload :Glif, "ucode/glyphs/last_resort/glif"
31
+ autoload :Svg, "ucode/glyphs/last_resort/svg"
32
+ autoload :Renderer, "ucode/glyphs/last_resort/renderer"
33
+ autoload :Writer, "ucode/glyphs/last_resort/writer"
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "json"
5
+ require "open3"
6
+
7
+ module Ucode
8
+ module Glyphs
9
+ # Maps a Unicode block's first codepoint to its page range inside the
10
+ # monolith `CodeCharts.pdf` by parsing the PDF's bookmark outline and
11
+ # matching each bookmark title to a Block.name from `Blocks.txt`.
12
+ #
13
+ # Each chart cluster printed by the Unicode Consortium is a single
14
+ # bookmark entry:
15
+ #
16
+ # BookmarkTitle: Greek and Coptic
17
+ # BookmarkLevel: 1
18
+ # BookmarkPageNumber: 415
19
+ #
20
+ # The cluster title usually equals a Block.name verbatim, but a few
21
+ # clusters carry a heading that prepends "C0 Controls and " /
22
+ # "C1 Controls and " to the block name. We resolve both forms.
23
+ #
24
+ # End-page of a cluster is one page before the next cluster's start
25
+ # page (last cluster's end-page is the PDF's last page).
26
+ #
27
+ # The map is cached as JSON at `data/codecharts_page_map.json` so
28
+ # we don't re-scan the 3,156-page monolith on every run.
29
+ class MonolithPageMap
30
+ BookmarkTitleRegex = /BookmarkTitle:\s*(.+)/.freeze
31
+ BookmarkPageRegex = /BookmarkPageNumber:\s*(\d+)/.freeze
32
+ private_constant :BookmarkTitleRegex, :BookmarkPageRegex
33
+
34
+ # The Unicode charts print these multi-block clusters as a single
35
+ # chart page (the C0/C1 control chars are drawn alongside their
36
+ # block's other characters). Each cluster title maps to the single
37
+ # block it belongs to.
38
+ ClusterPrefixes = [
39
+ "C0 Controls and ",
40
+ "C1 Controls and ",
41
+ ].freeze
42
+ private_constant :ClusterPrefixes
43
+
44
+ MapEntry = Struct.new(:first_cp, :start_page, :end_page, keyword_init: true)
45
+
46
+ class << self
47
+ # Build the map by parsing the monolith's outline and matching
48
+ # each bookmark title to a Block.
49
+ #
50
+ # @param monolith_path [String, Pathname]
51
+ # @param blocks [Array<Ucode::Models::Block>] the parsed Blocks table
52
+ # @return [Hash{Integer => MapEntry}] keyed by block.range_first
53
+ def build(monolith_path:, blocks:)
54
+ name_to_first_cp = blocks.each_with_object({}) do |b, h|
55
+ h[b.name] = b.range_first
56
+ end
57
+ total_pages = page_count(monolith_path)
58
+ entries = parse_bookmarks(dump_bookmarks(monolith_path), name_to_first_cp)
59
+ attach_end_pages(entries, total_pages)
60
+ entries.each_with_object({}) do |e, h|
61
+ h[e.first_cp] = e
62
+ end
63
+ end
64
+
65
+ # Pure: parse a `pdftk dump_data` string into a list of
66
+ # MapEntry rows (without end_pages). Exposed for unit tests
67
+ # and any caller that already has the dump cached.
68
+ #
69
+ # @param dump [String] the raw `pdftk dump_data` output
70
+ # @param name_to_first_cp [Hash{String => Integer}]
71
+ # @return [Array<MapEntry>]
72
+ def parse_bookmarks(dump, name_to_first_cp)
73
+ entries = []
74
+ current_title = nil
75
+ dump.each_line do |line|
76
+ case line
77
+ when BookmarkTitleRegex
78
+ current_title = Regexp.last_match(1).strip
79
+ when BookmarkPageRegex
80
+ page = Regexp.last_match(1).to_i
81
+ cp = resolve_first_cp(current_title, name_to_first_cp)
82
+ entries << MapEntry.new(first_cp: cp, start_page: page) if cp
83
+ current_title = nil
84
+ end
85
+ end
86
+ entries.sort_by(&:start_page)
87
+ end
88
+
89
+ # Pure: attach end_pages by sorting entries and assigning each
90
+ # entry's end to one page before the next entry's start.
91
+ #
92
+ # @param entries [Array<MapEntry>]
93
+ # @param total_pages [Integer, nil] page count of the source PDF;
94
+ # the last entry's end_page falls back to this when present.
95
+ # @return [Array<MapEntry>] the same entries, mutated with end_pages.
96
+ def attach_end_pages(entries, total_pages = nil)
97
+ sorted = entries.sort_by(&:start_page)
98
+ sorted.each_with_index do |entry, i|
99
+ next_entry = sorted[i + 1]
100
+ entry.end_page = next_entry ? next_entry.start_page - 1 : total_pages
101
+ end
102
+ sorted
103
+ end
104
+
105
+ # Load from cache, or build and cache.
106
+ # @param monolith_path [String, Pathname]
107
+ # @param blocks [Array<Ucode::Models::Block>]
108
+ # @param cache_path [String, Pathname, nil]
109
+ # @return [Hash{Integer => MapEntry}]
110
+ def load(monolith_path:, blocks:, cache_path: nil)
111
+ cache = cache_path && Pathname.new(cache_path)
112
+ if cache&.exist?
113
+ return load_from_json(cache.read)
114
+ end
115
+
116
+ map = build(monolith_path: monolith_path, blocks: blocks)
117
+ write_cache(map, cache) if cache
118
+ map
119
+ end
120
+
121
+ # Look up a block's page range by its first cp.
122
+ # @param map [Hash{Integer => MapEntry}]
123
+ # @param block_first_cp [Integer]
124
+ # @return [MapEntry, nil]
125
+ def range_for(map, block_first_cp)
126
+ map[block_first_cp]
127
+ end
128
+
129
+ # ---- I/O helpers (impure) --------------------------------------
130
+
131
+ def dump_bookmarks(monolith_path)
132
+ out, status = Open3.capture2e("pdftk", monolith_path.to_s, "dump_data")
133
+ return "" unless status.success?
134
+
135
+ out
136
+ end
137
+
138
+ def page_count(monolith_path)
139
+ out, status = Open3.capture2e("pdfinfo", monolith_path.to_s)
140
+ return nil unless status.success?
141
+
142
+ match = out.match(/^Pages:\s+(\d+)/)
143
+ match ? match[1].to_i : nil
144
+ end
145
+
146
+ private
147
+
148
+ def resolve_first_cp(title, name_to_first_cp)
149
+ return nil unless title
150
+
151
+ return name_to_first_cp[title] if name_to_first_cp.key?(title)
152
+
153
+ ClusterPrefixes.each do |prefix|
154
+ stripped = title.sub(/\A#{Regexp.escape(prefix)}/, "")
155
+ return name_to_first_cp[stripped] if name_to_first_cp.key?(stripped)
156
+ end
157
+
158
+ nil
159
+ end
160
+
161
+ def write_cache(map, cache_path)
162
+ payload = map.values.map { |e| { "first_cp" => e.first_cp,
163
+ "start_page" => e.start_page,
164
+ "end_page" => e.end_page } }
165
+ cache_path.dirname.mkpath
166
+ cache_path.write(JSON.pretty_generate(payload))
167
+ end
168
+
169
+ def load_from_json(json)
170
+ payload = JSON.parse(json)
171
+ payload.each_with_object({}) do |row, h|
172
+ entry = MapEntry.new(first_cp: row["first_cp"],
173
+ start_page: row["start_page"],
174
+ end_page: row["end_page"])
175
+ h[entry.first_cp] = entry
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ucode/glyphs/page_renderer"
4
+
5
+ module Ucode
6
+ module Glyphs
7
+ # `mutool draw` from MuPDF — typically the fastest and cleanest.
8
+ # Emits one `<svg>` per page with `<path>` vector data.
9
+ #
10
+ # Command: `mutool draw -F svg -o <out.svg> <in.pdf> <page>`
11
+ class MutoolRenderer < PageRenderer
12
+ class << self
13
+ def renderer_name
14
+ :mutool
15
+ end
16
+
17
+ def binary_name
18
+ :mutool
19
+ end
20
+
21
+ def build_command(pdf_path, page_num, out_path)
22
+ ["mutool", "draw", "-F", "svg", "-o", out_path.to_s,
23
+ pdf_path.to_s, page_num.to_s]
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end