ucode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CLAUDE.md +211 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +406 -0
- data/README.md +469 -0
- data/Rakefile +18 -0
- data/TODO.new/00-README.md +66 -0
- data/TODO.new/01-pillar-terminology-alignment.md +69 -0
- data/TODO.new/02-audit-schema-design.md +255 -0
- data/TODO.new/03-directory-output-spec.md +203 -0
- data/TODO.new/04-fontist-org-contract.md +173 -0
- data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
- data/TODO.new/06-audit-namespace-skeleton.md +105 -0
- data/TODO.new/07-audit-models-port.md +132 -0
- data/TODO.new/08-extractors-cheap-port.md +113 -0
- data/TODO.new/09-extractors-expensive-port.md +99 -0
- data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
- data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
- data/TODO.new/12-formatters-port.md +115 -0
- data/TODO.new/13-directory-emitter.md +147 -0
- data/TODO.new/14-html-face-browser.md +144 -0
- data/TODO.new/15-html-library-browser.md +102 -0
- data/TODO.new/16-cli-audit-subcommands.md +142 -0
- data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
- data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
- data/TODO.new/19-fontisan-docs-update.md +155 -0
- data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
- data/TODO.new/21-canonical-unicode17-build.md +148 -0
- data/TODO.new/22-implementation-order.md +176 -0
- data/UCODE_CHANGELOG.md +97 -0
- data/exe/ucode +8 -0
- data/lib/ucode/aggregator.rb +77 -0
- data/lib/ucode/audit/block_aggregator.rb +90 -0
- data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
- data/lib/ucode/audit/context.rb +137 -0
- data/lib/ucode/audit/discrepancy_detector.rb +213 -0
- data/lib/ucode/audit/extractors/aggregations.rb +70 -0
- data/lib/ucode/audit/extractors/base.rb +21 -0
- data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
- data/lib/ucode/audit/extractors/coverage.rb +55 -0
- data/lib/ucode/audit/extractors/hinting.rb +199 -0
- data/lib/ucode/audit/extractors/identity.rb +65 -0
- data/lib/ucode/audit/extractors/licensing.rb +75 -0
- data/lib/ucode/audit/extractors/metrics.rb +108 -0
- data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
- data/lib/ucode/audit/extractors/provenance.rb +34 -0
- data/lib/ucode/audit/extractors/style.rb +88 -0
- data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
- data/lib/ucode/audit/extractors.rb +31 -0
- data/lib/ucode/audit/plane_aggregator.rb +37 -0
- data/lib/ucode/audit/registry.rb +63 -0
- data/lib/ucode/audit/script_aggregator.rb +92 -0
- data/lib/ucode/audit.rb +27 -0
- data/lib/ucode/cache.rb +113 -0
- data/lib/ucode/cli.rb +272 -0
- data/lib/ucode/commands/build.rb +68 -0
- data/lib/ucode/commands/cache.rb +46 -0
- data/lib/ucode/commands/fetch.rb +62 -0
- data/lib/ucode/commands/font_coverage.rb +57 -0
- data/lib/ucode/commands/glyphs.rb +136 -0
- data/lib/ucode/commands/lookup.rb +65 -0
- data/lib/ucode/commands/parse.rb +62 -0
- data/lib/ucode/commands/site.rb +33 -0
- data/lib/ucode/commands.rb +19 -0
- data/lib/ucode/config.rb +110 -0
- data/lib/ucode/coordinator/indices.rb +34 -0
- data/lib/ucode/coordinator.rb +397 -0
- data/lib/ucode/database.rb +214 -0
- data/lib/ucode/db_builder.rb +107 -0
- data/lib/ucode/error.rb +96 -0
- data/lib/ucode/fetch/code_charts.rb +57 -0
- data/lib/ucode/fetch/http.rb +83 -0
- data/lib/ucode/fetch/ucd_zip.rb +57 -0
- data/lib/ucode/fetch/unihan_zip.rb +57 -0
- data/lib/ucode/fetch.rb +14 -0
- data/lib/ucode/glyphs/cell_extractor.rb +130 -0
- data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
- data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
- data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
- data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
- data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
- data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
- data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
- data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
- data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
- data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
- data/lib/ucode/glyphs/grid.rb +30 -0
- data/lib/ucode/glyphs/grid_detector.rb +165 -0
- data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
- data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
- data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
- data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
- data/lib/ucode/glyphs/last_resort/source.rb +125 -0
- data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
- data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
- data/lib/ucode/glyphs/last_resort.rb +36 -0
- data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
- data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
- data/lib/ucode/glyphs/page_renderer.rb +221 -0
- data/lib/ucode/glyphs/path_bbox.rb +62 -0
- data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
- data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
- data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
- data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
- data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
- data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
- data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
- data/lib/ucode/glyphs/real_fonts.rb +32 -0
- data/lib/ucode/glyphs/writer.rb +250 -0
- data/lib/ucode/glyphs.rb +27 -0
- data/lib/ucode/index.rb +106 -0
- data/lib/ucode/index_builder.rb +94 -0
- data/lib/ucode/models/audit/audit_axis.rb +30 -0
- data/lib/ucode/models/audit/audit_diff.rb +77 -0
- data/lib/ucode/models/audit/audit_report.rb +137 -0
- data/lib/ucode/models/audit/baseline.rb +32 -0
- data/lib/ucode/models/audit/block_summary.rb +72 -0
- data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
- data/lib/ucode/models/audit/codepoint_range.rb +39 -0
- data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
- data/lib/ucode/models/audit/color_capabilities.rb +91 -0
- data/lib/ucode/models/audit/discrepancy.rb +38 -0
- data/lib/ucode/models/audit/duplicate_group.rb +23 -0
- data/lib/ucode/models/audit/embedding_type.rb +81 -0
- data/lib/ucode/models/audit/field_change.rb +28 -0
- data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
- data/lib/ucode/models/audit/gasp_range.rb +63 -0
- data/lib/ucode/models/audit/hinting.rb +99 -0
- data/lib/ucode/models/audit/library_summary.rb +40 -0
- data/lib/ucode/models/audit/licensing.rb +48 -0
- data/lib/ucode/models/audit/metrics.rb +111 -0
- data/lib/ucode/models/audit/named_instance.rb +41 -0
- data/lib/ucode/models/audit/opentype_layout.rb +38 -0
- data/lib/ucode/models/audit/plane_summary.rb +31 -0
- data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
- data/lib/ucode/models/audit/script_features.rb +28 -0
- data/lib/ucode/models/audit/script_summary.rb +54 -0
- data/lib/ucode/models/audit/variation_detail.rb +42 -0
- data/lib/ucode/models/audit.rb +50 -0
- data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
- data/lib/ucode/models/bidi_mirroring.rb +19 -0
- data/lib/ucode/models/binary_property_assignment.rb +26 -0
- data/lib/ucode/models/block.rb +36 -0
- data/lib/ucode/models/case_folding_rule.rb +23 -0
- data/lib/ucode/models/cjk_radical.rb +23 -0
- data/lib/ucode/models/codepoint/bidi.rb +28 -0
- data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
- data/lib/ucode/models/codepoint/case_folding.rb +25 -0
- data/lib/ucode/models/codepoint/casing.rb +32 -0
- data/lib/ucode/models/codepoint/decomposition.rb +27 -0
- data/lib/ucode/models/codepoint/display.rb +24 -0
- data/lib/ucode/models/codepoint/emoji.rb +29 -0
- data/lib/ucode/models/codepoint/hangul.rb +20 -0
- data/lib/ucode/models/codepoint/identifier.rb +30 -0
- data/lib/ucode/models/codepoint/indic.rb +20 -0
- data/lib/ucode/models/codepoint/joining.rb +20 -0
- data/lib/ucode/models/codepoint/normalization.rb +35 -0
- data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
- data/lib/ucode/models/codepoint.rb +122 -0
- data/lib/ucode/models/name_alias.rb +21 -0
- data/lib/ucode/models/named_sequence.rb +19 -0
- data/lib/ucode/models/names_list_entry.rb +38 -0
- data/lib/ucode/models/plane.rb +36 -0
- data/lib/ucode/models/property_alias.rb +24 -0
- data/lib/ucode/models/property_value_alias.rb +26 -0
- data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
- data/lib/ucode/models/relationship/cross_reference.rb +17 -0
- data/lib/ucode/models/relationship/footnote.rb +24 -0
- data/lib/ucode/models/relationship/informal_alias.rb +18 -0
- data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
- data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
- data/lib/ucode/models/relationship.rb +57 -0
- data/lib/ucode/models/script.rb +41 -0
- data/lib/ucode/models/special_casing_rule.rb +28 -0
- data/lib/ucode/models/standardized_variant.rb +24 -0
- data/lib/ucode/models/unihan_entry.rb +23 -0
- data/lib/ucode/models.rb +47 -0
- data/lib/ucode/parsers/auxiliary.rb +26 -0
- data/lib/ucode/parsers/base.rb +137 -0
- data/lib/ucode/parsers/bidi_brackets.rb +41 -0
- data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
- data/lib/ucode/parsers/blocks.rb +63 -0
- data/lib/ucode/parsers/case_folding.rb +53 -0
- data/lib/ucode/parsers/cjk_radicals.rb +102 -0
- data/lib/ucode/parsers/derived_age.rb +59 -0
- data/lib/ucode/parsers/derived_core_properties.rb +60 -0
- data/lib/ucode/parsers/extracted_properties.rb +74 -0
- data/lib/ucode/parsers/name_aliases.rb +44 -0
- data/lib/ucode/parsers/named_sequences.rb +51 -0
- data/lib/ucode/parsers/names_list.rb +250 -0
- data/lib/ucode/parsers/property_aliases.rb +41 -0
- data/lib/ucode/parsers/property_value_aliases.rb +46 -0
- data/lib/ucode/parsers/script_extensions.rb +64 -0
- data/lib/ucode/parsers/scripts.rb +60 -0
- data/lib/ucode/parsers/special_casing.rb +62 -0
- data/lib/ucode/parsers/standardized_variants.rb +56 -0
- data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
- data/lib/ucode/parsers/unicode_data.rb +268 -0
- data/lib/ucode/parsers/unihan.rb +125 -0
- data/lib/ucode/parsers.rb +35 -0
- data/lib/ucode/range_entry.rb +58 -0
- data/lib/ucode/repo/aggregate_writer.rb +364 -0
- data/lib/ucode/repo/atomic_writes.rb +48 -0
- data/lib/ucode/repo/codepoint_writer.rb +96 -0
- data/lib/ucode/repo/paths.rb +122 -0
- data/lib/ucode/repo.rb +22 -0
- data/lib/ucode/site/config_emitter.rb +124 -0
- data/lib/ucode/site/generator.rb +178 -0
- data/lib/ucode/site/search_index.rb +68 -0
- data/lib/ucode/site/template/.gitignore +4 -0
- data/lib/ucode/site/template/.vitepress/config.ts +8 -0
- data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
- data/lib/ucode/site/template/char/[codepoint].md +13 -0
- data/lib/ucode/site/template/components/BlockView.vue +57 -0
- data/lib/ucode/site/template/components/CharView.vue +85 -0
- data/lib/ucode/site/template/components/PlaneView.vue +56 -0
- data/lib/ucode/site/template/components/SearchView.vue +66 -0
- data/lib/ucode/site/template/index.md +25 -0
- data/lib/ucode/site/template/package.json +18 -0
- data/lib/ucode/site/template/search.md +9 -0
- data/lib/ucode/site.rb +13 -0
- data/lib/ucode/version.rb +5 -0
- data/lib/ucode/version_resolver.rb +76 -0
- data/lib/ucode.rb +74 -0
- data/ucode.gemspec +56 -0
- metadata +404 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ucode/glyphs/last_resort/glif"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Glyphs
|
|
7
|
+
module LastResort
|
|
8
|
+
# Converts a {Glif::Outline} into a standalone SVG document.
|
|
9
|
+
#
|
|
10
|
+
# Two transforms are applied:
|
|
11
|
+
#
|
|
12
|
+
# 1. **Y-axis flip.** UFO point y grows upward (PostScript
|
|
13
|
+
# convention); SVG y grows downward. We reflect y about the
|
|
14
|
+
# glyph's vertical midpoint so the rendered glyph appears
|
|
15
|
+
# upright.
|
|
16
|
+
#
|
|
17
|
+
# 2. **ViewBox normalization.** The viewBox is set to the
|
|
18
|
+
# outline's bounding box, with a small padding so strokes
|
|
19
|
+
# are not clipped at the edges. The `width`/`height`
|
|
20
|
+
# attributes match the viewBox aspect ratio so consumers
|
|
21
|
+
# can scale via CSS.
|
|
22
|
+
#
|
|
23
|
+
# Path data semantics:
|
|
24
|
+
#
|
|
25
|
+
# * `move` → `M x y`
|
|
26
|
+
# * `line` → `L x y`
|
|
27
|
+
# * `curve` → `C cx1 cy1 cx2 cy2 x y` (cubic; preceding 1–2
|
|
28
|
+
# off-curve points are control points)
|
|
29
|
+
# * `qcurve` → `Q cx cy x y` (quadratic; ≥1 preceding off-curve
|
|
30
|
+
# points; multiple off-curves are emitted as chained
|
|
31
|
+
# quadratic segments with implicit on-curve midpoints
|
|
32
|
+
# per the UFO spec)
|
|
33
|
+
#
|
|
34
|
+
# Contours are closed with `Z` per UFO convention.
|
|
35
|
+
class Svg
|
|
36
|
+
# Padding ratio applied around the glyph bbox for the viewBox.
|
|
37
|
+
PaddingRatio = 0.08
|
|
38
|
+
private_constant :PaddingRatio
|
|
39
|
+
|
|
40
|
+
# @param outline [Glif::Outline]
|
|
41
|
+
# @param codepoint [Integer, nil] optional codepoint for the
|
|
42
|
+
# `<title>` element (accessibility + debugging)
|
|
43
|
+
def initialize(outline, codepoint: nil)
|
|
44
|
+
@outline = outline
|
|
45
|
+
@codepoint = codepoint
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [String] complete `<svg>...</svg>` document
|
|
49
|
+
def to_s
|
|
50
|
+
box = view_box
|
|
51
|
+
lines = []
|
|
52
|
+
lines << %(<svg xmlns="http://www.w3.org/2000/svg" viewBox="#{format_dims(box)}" width="#{format_num(box[:width])}" height="#{format_num(box[:height])}" preserveAspectRatio="xMidYMid meet">)
|
|
53
|
+
lines << %( <title>U+#{format("%04X", @codepoint)} (Last Resort)</title>) if @codepoint
|
|
54
|
+
lines << %( <path d="#{path_data.strip}" fill="currentColor" fill-rule="evenodd"/>)
|
|
55
|
+
lines << %(</svg>)
|
|
56
|
+
%(<?xml version="1.0" encoding="UTF-8"?>\n#{lines.join("\n")}\n)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Just the path `d` attribute — exposed for tests and for callers
|
|
60
|
+
# that want to embed the path inside their own SVG wrapper.
|
|
61
|
+
#
|
|
62
|
+
# @return [String]
|
|
63
|
+
def path_data
|
|
64
|
+
@outline.contours.map { |contour| PathBuilder.new(contour.points).to_path }.join(" ")
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
private
|
|
68
|
+
|
|
69
|
+
def format_dims(box)
|
|
70
|
+
format("%<min_x>.2f %<min_y>.2f %<width>.2f %<height>.2f", box)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Build the SVG viewBox with padding around the glyph bbox.
|
|
74
|
+
# Returns min_x, min_y, width, height — already y-flipped so
|
|
75
|
+
# min_y is the top edge in SVG space.
|
|
76
|
+
def view_box
|
|
77
|
+
bbox = @outline.bbox
|
|
78
|
+
if bbox.nil?
|
|
79
|
+
return { min_x: 0, min_y: 0, width: 1, height: 1 }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
min_x = bbox[:min_x].to_f
|
|
83
|
+
max_x = bbox[:max_x].to_f
|
|
84
|
+
min_y = bbox[:min_y].to_f
|
|
85
|
+
max_y = bbox[:max_y].to_f
|
|
86
|
+
width = (max_x - min_x).nonzero? || 1.0
|
|
87
|
+
height = (max_y - min_y).nonzero? || 1.0
|
|
88
|
+
pad_x = width * PaddingRatio
|
|
89
|
+
pad_y = height * PaddingRatio
|
|
90
|
+
{
|
|
91
|
+
min_x: min_x - pad_x,
|
|
92
|
+
min_y: -(max_y + pad_y),
|
|
93
|
+
width: width + (2 * pad_x),
|
|
94
|
+
height: height + (2 * pad_y),
|
|
95
|
+
}
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def format_num(n)
|
|
99
|
+
if n.to_f == n.to_i
|
|
100
|
+
n.to_i.to_s
|
|
101
|
+
else
|
|
102
|
+
format("%.2f", n)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Internal helper: walks a contour's points and emits SVG path
|
|
108
|
+
# commands per the UFO point-type rules.
|
|
109
|
+
#
|
|
110
|
+
# Contour-start handling: the first on-curve point we encounter
|
|
111
|
+
# becomes the implicit `M` target. We do NOT also emit `L`/`C`/
|
|
112
|
+
# `Q` for it — that would draw a degenerate zero-length segment.
|
|
113
|
+
# Subsequent on-curve points emit their proper command.
|
|
114
|
+
class PathBuilder
|
|
115
|
+
def initialize(points)
|
|
116
|
+
@points = points
|
|
117
|
+
@out = +""
|
|
118
|
+
@i = 0
|
|
119
|
+
@pending_offcurve = []
|
|
120
|
+
@last_oncurve = nil
|
|
121
|
+
@started = false
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def to_path
|
|
125
|
+
until @i >= @points.length
|
|
126
|
+
point = @points[@i]
|
|
127
|
+
case point.kind
|
|
128
|
+
when :offcurve then consume_offcurve(point)
|
|
129
|
+
when :move then emit_move(point)
|
|
130
|
+
when :line then emit_line(point)
|
|
131
|
+
when :curve then emit_curve(point)
|
|
132
|
+
when :qcurve then emit_qcurve(point)
|
|
133
|
+
end
|
|
134
|
+
@i += 1
|
|
135
|
+
end
|
|
136
|
+
flush_trailing_offcurve
|
|
137
|
+
append_close
|
|
138
|
+
@out.strip
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
private
|
|
142
|
+
|
|
143
|
+
def consume_offcurve(point)
|
|
144
|
+
@pending_offcurve << point
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def emit_move(point)
|
|
148
|
+
@out << "M #{flip_xy(point)} "
|
|
149
|
+
@last_oncurve = point
|
|
150
|
+
@started = true
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def emit_line(point)
|
|
154
|
+
return start_contour(point) unless @started
|
|
155
|
+
|
|
156
|
+
@out << "L #{flip_xy(point)} "
|
|
157
|
+
@last_oncurve = point
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def emit_curve(point)
|
|
161
|
+
return start_contour(point) unless @started
|
|
162
|
+
|
|
163
|
+
c1 = @pending_offcurve[0] || point
|
|
164
|
+
c2 = @pending_offcurve[1] || point
|
|
165
|
+
@out << "C #{flip_xy(c1)} #{flip_xy(c2)} #{flip_xy(point)} "
|
|
166
|
+
@pending_offcurve.clear
|
|
167
|
+
@last_oncurve = point
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def emit_qcurve(point)
|
|
171
|
+
return start_contour(point) unless @started
|
|
172
|
+
|
|
173
|
+
if @pending_offcurve.length == 1
|
|
174
|
+
ctrl = @pending_offcurve[0]
|
|
175
|
+
@out << "Q #{flip_xy(ctrl)} #{flip_xy(point)} "
|
|
176
|
+
else
|
|
177
|
+
emit_qcurve_chain(@pending_offcurve, point)
|
|
178
|
+
end
|
|
179
|
+
@pending_offcurve.clear
|
|
180
|
+
@last_oncurve = point
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# When a contour's first point is not an explicit `move`, the
|
|
184
|
+
# first on-curve point we hit (curve/line/qcurve) is the
|
|
185
|
+
# implicit start. Emit just `M` for it; any pending off-curves
|
|
186
|
+
# are wrap-around controls that flush via {flush_trailing_offcurve}.
|
|
187
|
+
def start_contour(point)
|
|
188
|
+
@out << "M #{flip_xy(point)} "
|
|
189
|
+
@last_oncurve = point
|
|
190
|
+
@started = true
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def emit_qcurve_chain(controls, terminal)
|
|
194
|
+
controls.each_with_index do |ctrl, idx|
|
|
195
|
+
next_ctrl = controls[idx + 1]
|
|
196
|
+
if next_ctrl.nil?
|
|
197
|
+
@out << "Q #{flip_xy(ctrl)} #{flip_xy(terminal)} "
|
|
198
|
+
else
|
|
199
|
+
mid_x = (ctrl.x + next_ctrl.x) / 2.0
|
|
200
|
+
mid_y = (ctrl.y + next_ctrl.y) / 2.0
|
|
201
|
+
@out << "Q #{flip_xy(ctrl)} #{flip_xy_struct(mid_x, mid_y)} "
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def append_close
|
|
207
|
+
@out << "Z"
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# UFO contours are implicitly closed. If off-curve points
|
|
211
|
+
# remain unflushed at the end of the contour, they are the
|
|
212
|
+
# wrap-around control points leading back to the contour's
|
|
213
|
+
# first on-curve point. Emit them as a final curve to that
|
|
214
|
+
# start point; the closing `Z` then completes the geometry.
|
|
215
|
+
def flush_trailing_offcurve
|
|
216
|
+
return if @pending_offcurve.empty? || @last_oncurve.nil?
|
|
217
|
+
|
|
218
|
+
endpoint = @last_oncurve
|
|
219
|
+
if @pending_offcurve.length == 1
|
|
220
|
+
@out << "Q #{flip_xy(@pending_offcurve[0])} #{flip_xy(endpoint)} "
|
|
221
|
+
else
|
|
222
|
+
emit_qcurve_chain(@pending_offcurve, endpoint)
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def flip_xy(point)
|
|
227
|
+
flip_xy_struct(point.x, point.y)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# UFO y grows up; SVG y grows down. We negate y — the viewBox
|
|
231
|
+
# translation handles the vertical offset so the glyph appears
|
|
232
|
+
# upright in user space.
|
|
233
|
+
def flip_xy_struct(x, y)
|
|
234
|
+
"#{format_num(x)} #{format_num(-y)}"
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def format_num(n)
|
|
238
|
+
if n.is_a?(Integer) || n.to_f == n.to_i
|
|
239
|
+
n.to_i.to_s
|
|
240
|
+
else
|
|
241
|
+
format("%.2f", n)
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/glyphs/last_resort/renderer"
|
|
6
|
+
require "ucode/glyphs/last_resort/source"
|
|
7
|
+
require "ucode/repo/atomic_writes"
|
|
8
|
+
require "ucode/repo/paths"
|
|
9
|
+
|
|
10
|
+
module Ucode
|
|
11
|
+
module Glyphs
|
|
12
|
+
module LastResort
|
|
13
|
+
# Writes one `glyph.svg` per codepoint in `codepoints`, sourcing
|
|
14
|
+
# the outline from the Last Resort UFO.
|
|
15
|
+
#
|
|
16
|
+
# Single Renderer instance shared across the loop, so the parsed
|
|
17
|
+
# cmap and contents.plist are paid for once.
|
|
18
|
+
#
|
|
19
|
+
# **Idempotent**: re-runs are no-ops via `Repo::AtomicWrites`
|
|
20
|
+
# (byte comparison; same content is skipped). Safe to re-run on
|
|
21
|
+
# the whole output tree.
|
|
22
|
+
#
|
|
23
|
+
# **Atomic**: writes go through `<path>.tmp` + rename. A crash
|
|
24
|
+
# mid-write leaves either the old file or no file.
|
|
25
|
+
#
|
|
26
|
+
# Block membership is the caller's responsibility — the Writer
|
|
27
|
+
# doesn't gate codepoints by assigned/unassigned. Last Resort
|
|
28
|
+
# placeholders exist for every codepoint in the cmap, including
|
|
29
|
+
# assigned ones, but the v0.2 pipeline only writes Last Resort
|
|
30
|
+
# SVGs for codepoints whose chart cell shows a placeholder box
|
|
31
|
+
# (see README "two pillars").
|
|
32
|
+
class Writer
|
|
33
|
+
include Repo::AtomicWrites
|
|
34
|
+
|
|
35
|
+
# @param output_root [String, Pathname]
|
|
36
|
+
# @param source [Source]
|
|
37
|
+
def initialize(output_root:, source:)
|
|
38
|
+
@output_root = Pathname.new(output_root)
|
|
39
|
+
@source = source
|
|
40
|
+
@renderer = Renderer.new(source)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Write `glyph.svg` for every codepoint in `codepoints` whose
|
|
44
|
+
# block is known, using the Last Resort outline.
|
|
45
|
+
#
|
|
46
|
+
# @param codepoints [Array<Integer>, Enumerable<Integer>]
|
|
47
|
+
# @param block_lookup [Proc, #call] codepoint → block id string
|
|
48
|
+
# (e.g. `"Basic_Latin"`). Returns nil for codepoints without
|
|
49
|
+
# a block; those are skipped.
|
|
50
|
+
# @return [Hash] tally `{ written:, skipped:, missing:, total: }`
|
|
51
|
+
def write_many(codepoints, block_lookup:)
|
|
52
|
+
tally = { written: 0, skipped: 0, missing: 0, total: 0 }
|
|
53
|
+
codepoints.each do |cp|
|
|
54
|
+
tally[:total] += 1
|
|
55
|
+
block_id = block_lookup.call(cp)
|
|
56
|
+
if block_id.nil?
|
|
57
|
+
tally[:missing] += 1
|
|
58
|
+
next
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
result = @renderer.render(cp)
|
|
62
|
+
if result.nil? || !result.ok?
|
|
63
|
+
tally[:missing] += 1
|
|
64
|
+
next
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
written = write_glyph(block_id, cp, result.svg)
|
|
68
|
+
tally[written ? :written : :skipped] += 1
|
|
69
|
+
end
|
|
70
|
+
tally
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def write_glyph(block_id, codepoint, svg)
|
|
76
|
+
cp_id = Repo::Paths.cp_id(codepoint)
|
|
77
|
+
path = Repo::Paths.codepoint_glyph_path(@output_root, block_id, cp_id)
|
|
78
|
+
write_atomic(path, svg)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Glyphs
|
|
5
|
+
# Last Resort Font integration — pillar 2 of the v0.2 glyph strategy.
|
|
6
|
+
#
|
|
7
|
+
# For codepoints whose Code Charts cell shows a placeholder box
|
|
8
|
+
# (unassigned, noncharacter, PUA), the chart glyph is drawn from
|
|
9
|
+
# Unicode's Last Resort Font. The Last Resort Font ships as a UFO
|
|
10
|
+
# source with two parts that matter to us:
|
|
11
|
+
#
|
|
12
|
+
# * `cmap-f13.ttx` — a Format 13 `cmap` that maps every codepoint
|
|
13
|
+
# (0x0..0x10FFFF) to a placeholder glyph name. 1,114,112 entries.
|
|
14
|
+
# * `font.ufo/glyphs/*.glif` — 380 outline files, one per Unicode
|
|
15
|
+
# block + a handful of special types (`notdef`,
|
|
16
|
+
# `notdefplanezero`, the noncharacter / unassigned planes, …).
|
|
17
|
+
# * `font.ufo/glyphs/contents.plist` — glyph name → `.glif` file.
|
|
18
|
+
#
|
|
19
|
+
# The pipeline is read-only and stateless: cmap (cp → name) →
|
|
20
|
+
# contents (name → file) → glif (file → outline) → svg (outline →
|
|
21
|
+
# SVG document). No PDF parsing, no cell extraction, no border
|
|
22
|
+
# compositing — the placeholder outline is exactly what the Code
|
|
23
|
+
# Charts display.
|
|
24
|
+
#
|
|
25
|
+
# See {Source} for how to locate the UFO on disk.
|
|
26
|
+
module LastResort
|
|
27
|
+
autoload :Source, "ucode/glyphs/last_resort/source"
|
|
28
|
+
autoload :CmapIndex, "ucode/glyphs/last_resort/cmap_index"
|
|
29
|
+
autoload :Contents, "ucode/glyphs/last_resort/contents"
|
|
30
|
+
autoload :Glif, "ucode/glyphs/last_resort/glif"
|
|
31
|
+
autoload :Svg, "ucode/glyphs/last_resort/svg"
|
|
32
|
+
autoload :Renderer, "ucode/glyphs/last_resort/renderer"
|
|
33
|
+
autoload :Writer, "ucode/glyphs/last_resort/writer"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "json"
|
|
5
|
+
require "open3"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Glyphs
|
|
9
|
+
# Maps a Unicode block's first codepoint to its page range inside the
|
|
10
|
+
# monolith `CodeCharts.pdf` by parsing the PDF's bookmark outline and
|
|
11
|
+
# matching each bookmark title to a Block.name from `Blocks.txt`.
|
|
12
|
+
#
|
|
13
|
+
# Each chart cluster printed by the Unicode Consortium is a single
|
|
14
|
+
# bookmark entry:
|
|
15
|
+
#
|
|
16
|
+
# BookmarkTitle: Greek and Coptic
|
|
17
|
+
# BookmarkLevel: 1
|
|
18
|
+
# BookmarkPageNumber: 415
|
|
19
|
+
#
|
|
20
|
+
# The cluster title usually equals a Block.name verbatim, but a few
|
|
21
|
+
# clusters carry a heading that prepends "C0 Controls and " /
|
|
22
|
+
# "C1 Controls and " to the block name. We resolve both forms.
|
|
23
|
+
#
|
|
24
|
+
# End-page of a cluster is one page before the next cluster's start
|
|
25
|
+
# page (last cluster's end-page is the PDF's last page).
|
|
26
|
+
#
|
|
27
|
+
# The map is cached as JSON at `data/codecharts_page_map.json` so
|
|
28
|
+
# we don't re-scan the 3,156-page monolith on every run.
|
|
29
|
+
class MonolithPageMap
|
|
30
|
+
BookmarkTitleRegex = /BookmarkTitle:\s*(.+)/.freeze
|
|
31
|
+
BookmarkPageRegex = /BookmarkPageNumber:\s*(\d+)/.freeze
|
|
32
|
+
private_constant :BookmarkTitleRegex, :BookmarkPageRegex
|
|
33
|
+
|
|
34
|
+
# The Unicode charts print these multi-block clusters as a single
|
|
35
|
+
# chart page (the C0/C1 control chars are drawn alongside their
|
|
36
|
+
# block's other characters). Each cluster title maps to the single
|
|
37
|
+
# block it belongs to.
|
|
38
|
+
ClusterPrefixes = [
|
|
39
|
+
"C0 Controls and ",
|
|
40
|
+
"C1 Controls and ",
|
|
41
|
+
].freeze
|
|
42
|
+
private_constant :ClusterPrefixes
|
|
43
|
+
|
|
44
|
+
MapEntry = Struct.new(:first_cp, :start_page, :end_page, keyword_init: true)
|
|
45
|
+
|
|
46
|
+
class << self
|
|
47
|
+
# Build the map by parsing the monolith's outline and matching
|
|
48
|
+
# each bookmark title to a Block.
|
|
49
|
+
#
|
|
50
|
+
# @param monolith_path [String, Pathname]
|
|
51
|
+
# @param blocks [Array<Ucode::Models::Block>] the parsed Blocks table
|
|
52
|
+
# @return [Hash{Integer => MapEntry}] keyed by block.range_first
|
|
53
|
+
def build(monolith_path:, blocks:)
|
|
54
|
+
name_to_first_cp = blocks.each_with_object({}) do |b, h|
|
|
55
|
+
h[b.name] = b.range_first
|
|
56
|
+
end
|
|
57
|
+
total_pages = page_count(monolith_path)
|
|
58
|
+
entries = parse_bookmarks(dump_bookmarks(monolith_path), name_to_first_cp)
|
|
59
|
+
attach_end_pages(entries, total_pages)
|
|
60
|
+
entries.each_with_object({}) do |e, h|
|
|
61
|
+
h[e.first_cp] = e
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Pure: parse a `pdftk dump_data` string into a list of
|
|
66
|
+
# MapEntry rows (without end_pages). Exposed for unit tests
|
|
67
|
+
# and any caller that already has the dump cached.
|
|
68
|
+
#
|
|
69
|
+
# @param dump [String] the raw `pdftk dump_data` output
|
|
70
|
+
# @param name_to_first_cp [Hash{String => Integer}]
|
|
71
|
+
# @return [Array<MapEntry>]
|
|
72
|
+
def parse_bookmarks(dump, name_to_first_cp)
|
|
73
|
+
entries = []
|
|
74
|
+
current_title = nil
|
|
75
|
+
dump.each_line do |line|
|
|
76
|
+
case line
|
|
77
|
+
when BookmarkTitleRegex
|
|
78
|
+
current_title = Regexp.last_match(1).strip
|
|
79
|
+
when BookmarkPageRegex
|
|
80
|
+
page = Regexp.last_match(1).to_i
|
|
81
|
+
cp = resolve_first_cp(current_title, name_to_first_cp)
|
|
82
|
+
entries << MapEntry.new(first_cp: cp, start_page: page) if cp
|
|
83
|
+
current_title = nil
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
entries.sort_by(&:start_page)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Pure: attach end_pages by sorting entries and assigning each
|
|
90
|
+
# entry's end to one page before the next entry's start.
|
|
91
|
+
#
|
|
92
|
+
# @param entries [Array<MapEntry>]
|
|
93
|
+
# @param total_pages [Integer, nil] page count of the source PDF;
|
|
94
|
+
# the last entry's end_page falls back to this when present.
|
|
95
|
+
# @return [Array<MapEntry>] the same entries, mutated with end_pages.
|
|
96
|
+
def attach_end_pages(entries, total_pages = nil)
|
|
97
|
+
sorted = entries.sort_by(&:start_page)
|
|
98
|
+
sorted.each_with_index do |entry, i|
|
|
99
|
+
next_entry = sorted[i + 1]
|
|
100
|
+
entry.end_page = next_entry ? next_entry.start_page - 1 : total_pages
|
|
101
|
+
end
|
|
102
|
+
sorted
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Load from cache, or build and cache.
|
|
106
|
+
# @param monolith_path [String, Pathname]
|
|
107
|
+
# @param blocks [Array<Ucode::Models::Block>]
|
|
108
|
+
# @param cache_path [String, Pathname, nil]
|
|
109
|
+
# @return [Hash{Integer => MapEntry}]
|
|
110
|
+
def load(monolith_path:, blocks:, cache_path: nil)
|
|
111
|
+
cache = cache_path && Pathname.new(cache_path)
|
|
112
|
+
if cache&.exist?
|
|
113
|
+
return load_from_json(cache.read)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
map = build(monolith_path: monolith_path, blocks: blocks)
|
|
117
|
+
write_cache(map, cache) if cache
|
|
118
|
+
map
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Look up a block's page range by its first cp.
|
|
122
|
+
# @param map [Hash{Integer => MapEntry}]
|
|
123
|
+
# @param block_first_cp [Integer]
|
|
124
|
+
# @return [MapEntry, nil]
|
|
125
|
+
def range_for(map, block_first_cp)
|
|
126
|
+
map[block_first_cp]
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# ---- I/O helpers (impure) --------------------------------------
|
|
130
|
+
|
|
131
|
+
def dump_bookmarks(monolith_path)
|
|
132
|
+
out, status = Open3.capture2e("pdftk", monolith_path.to_s, "dump_data")
|
|
133
|
+
return "" unless status.success?
|
|
134
|
+
|
|
135
|
+
out
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def page_count(monolith_path)
|
|
139
|
+
out, status = Open3.capture2e("pdfinfo", monolith_path.to_s)
|
|
140
|
+
return nil unless status.success?
|
|
141
|
+
|
|
142
|
+
match = out.match(/^Pages:\s+(\d+)/)
|
|
143
|
+
match ? match[1].to_i : nil
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
private
|
|
147
|
+
|
|
148
|
+
def resolve_first_cp(title, name_to_first_cp)
|
|
149
|
+
return nil unless title
|
|
150
|
+
|
|
151
|
+
return name_to_first_cp[title] if name_to_first_cp.key?(title)
|
|
152
|
+
|
|
153
|
+
ClusterPrefixes.each do |prefix|
|
|
154
|
+
stripped = title.sub(/\A#{Regexp.escape(prefix)}/, "")
|
|
155
|
+
return name_to_first_cp[stripped] if name_to_first_cp.key?(stripped)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
nil
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def write_cache(map, cache_path)
|
|
162
|
+
payload = map.values.map { |e| { "first_cp" => e.first_cp,
|
|
163
|
+
"start_page" => e.start_page,
|
|
164
|
+
"end_page" => e.end_page } }
|
|
165
|
+
cache_path.dirname.mkpath
|
|
166
|
+
cache_path.write(JSON.pretty_generate(payload))
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def load_from_json(json)
|
|
170
|
+
payload = JSON.parse(json)
|
|
171
|
+
payload.each_with_object({}) do |row, h|
|
|
172
|
+
entry = MapEntry.new(first_cp: row["first_cp"],
|
|
173
|
+
start_page: row["start_page"],
|
|
174
|
+
end_page: row["end_page"])
|
|
175
|
+
h[entry.first_cp] = entry
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ucode/glyphs/page_renderer"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Glyphs
|
|
7
|
+
# `mutool draw` from MuPDF — typically the fastest and cleanest.
|
|
8
|
+
# Emits one `<svg>` per page with `<path>` vector data.
|
|
9
|
+
#
|
|
10
|
+
# Command: `mutool draw -F svg -o <out.svg> <in.pdf> <page>`
|
|
11
|
+
class MutoolRenderer < PageRenderer
|
|
12
|
+
class << self
|
|
13
|
+
def renderer_name
|
|
14
|
+
:mutool
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def binary_name
|
|
18
|
+
:mutool
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def build_command(pdf_path, page_num, out_path)
|
|
22
|
+
["mutool", "draw", "-F", "svg", "-o", out_path.to_s,
|
|
23
|
+
pdf_path.to_s, page_num.to_s]
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|