ucode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CLAUDE.md +211 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +406 -0
- data/README.md +469 -0
- data/Rakefile +18 -0
- data/TODO.new/00-README.md +66 -0
- data/TODO.new/01-pillar-terminology-alignment.md +69 -0
- data/TODO.new/02-audit-schema-design.md +255 -0
- data/TODO.new/03-directory-output-spec.md +203 -0
- data/TODO.new/04-fontist-org-contract.md +173 -0
- data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
- data/TODO.new/06-audit-namespace-skeleton.md +105 -0
- data/TODO.new/07-audit-models-port.md +132 -0
- data/TODO.new/08-extractors-cheap-port.md +113 -0
- data/TODO.new/09-extractors-expensive-port.md +99 -0
- data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
- data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
- data/TODO.new/12-formatters-port.md +115 -0
- data/TODO.new/13-directory-emitter.md +147 -0
- data/TODO.new/14-html-face-browser.md +144 -0
- data/TODO.new/15-html-library-browser.md +102 -0
- data/TODO.new/16-cli-audit-subcommands.md +142 -0
- data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
- data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
- data/TODO.new/19-fontisan-docs-update.md +155 -0
- data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
- data/TODO.new/21-canonical-unicode17-build.md +148 -0
- data/TODO.new/22-implementation-order.md +176 -0
- data/UCODE_CHANGELOG.md +97 -0
- data/exe/ucode +8 -0
- data/lib/ucode/aggregator.rb +77 -0
- data/lib/ucode/audit/block_aggregator.rb +90 -0
- data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
- data/lib/ucode/audit/context.rb +137 -0
- data/lib/ucode/audit/discrepancy_detector.rb +213 -0
- data/lib/ucode/audit/extractors/aggregations.rb +70 -0
- data/lib/ucode/audit/extractors/base.rb +21 -0
- data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
- data/lib/ucode/audit/extractors/coverage.rb +55 -0
- data/lib/ucode/audit/extractors/hinting.rb +199 -0
- data/lib/ucode/audit/extractors/identity.rb +65 -0
- data/lib/ucode/audit/extractors/licensing.rb +75 -0
- data/lib/ucode/audit/extractors/metrics.rb +108 -0
- data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
- data/lib/ucode/audit/extractors/provenance.rb +34 -0
- data/lib/ucode/audit/extractors/style.rb +88 -0
- data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
- data/lib/ucode/audit/extractors.rb +31 -0
- data/lib/ucode/audit/plane_aggregator.rb +37 -0
- data/lib/ucode/audit/registry.rb +63 -0
- data/lib/ucode/audit/script_aggregator.rb +92 -0
- data/lib/ucode/audit.rb +27 -0
- data/lib/ucode/cache.rb +113 -0
- data/lib/ucode/cli.rb +272 -0
- data/lib/ucode/commands/build.rb +68 -0
- data/lib/ucode/commands/cache.rb +46 -0
- data/lib/ucode/commands/fetch.rb +62 -0
- data/lib/ucode/commands/font_coverage.rb +57 -0
- data/lib/ucode/commands/glyphs.rb +136 -0
- data/lib/ucode/commands/lookup.rb +65 -0
- data/lib/ucode/commands/parse.rb +62 -0
- data/lib/ucode/commands/site.rb +33 -0
- data/lib/ucode/commands.rb +19 -0
- data/lib/ucode/config.rb +110 -0
- data/lib/ucode/coordinator/indices.rb +34 -0
- data/lib/ucode/coordinator.rb +397 -0
- data/lib/ucode/database.rb +214 -0
- data/lib/ucode/db_builder.rb +107 -0
- data/lib/ucode/error.rb +96 -0
- data/lib/ucode/fetch/code_charts.rb +57 -0
- data/lib/ucode/fetch/http.rb +83 -0
- data/lib/ucode/fetch/ucd_zip.rb +57 -0
- data/lib/ucode/fetch/unihan_zip.rb +57 -0
- data/lib/ucode/fetch.rb +14 -0
- data/lib/ucode/glyphs/cell_extractor.rb +130 -0
- data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
- data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
- data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
- data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
- data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
- data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
- data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
- data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
- data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
- data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
- data/lib/ucode/glyphs/grid.rb +30 -0
- data/lib/ucode/glyphs/grid_detector.rb +165 -0
- data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
- data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
- data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
- data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
- data/lib/ucode/glyphs/last_resort/source.rb +125 -0
- data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
- data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
- data/lib/ucode/glyphs/last_resort.rb +36 -0
- data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
- data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
- data/lib/ucode/glyphs/page_renderer.rb +221 -0
- data/lib/ucode/glyphs/path_bbox.rb +62 -0
- data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
- data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
- data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
- data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
- data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
- data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
- data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
- data/lib/ucode/glyphs/real_fonts.rb +32 -0
- data/lib/ucode/glyphs/writer.rb +250 -0
- data/lib/ucode/glyphs.rb +27 -0
- data/lib/ucode/index.rb +106 -0
- data/lib/ucode/index_builder.rb +94 -0
- data/lib/ucode/models/audit/audit_axis.rb +30 -0
- data/lib/ucode/models/audit/audit_diff.rb +77 -0
- data/lib/ucode/models/audit/audit_report.rb +137 -0
- data/lib/ucode/models/audit/baseline.rb +32 -0
- data/lib/ucode/models/audit/block_summary.rb +72 -0
- data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
- data/lib/ucode/models/audit/codepoint_range.rb +39 -0
- data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
- data/lib/ucode/models/audit/color_capabilities.rb +91 -0
- data/lib/ucode/models/audit/discrepancy.rb +38 -0
- data/lib/ucode/models/audit/duplicate_group.rb +23 -0
- data/lib/ucode/models/audit/embedding_type.rb +81 -0
- data/lib/ucode/models/audit/field_change.rb +28 -0
- data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
- data/lib/ucode/models/audit/gasp_range.rb +63 -0
- data/lib/ucode/models/audit/hinting.rb +99 -0
- data/lib/ucode/models/audit/library_summary.rb +40 -0
- data/lib/ucode/models/audit/licensing.rb +48 -0
- data/lib/ucode/models/audit/metrics.rb +111 -0
- data/lib/ucode/models/audit/named_instance.rb +41 -0
- data/lib/ucode/models/audit/opentype_layout.rb +38 -0
- data/lib/ucode/models/audit/plane_summary.rb +31 -0
- data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
- data/lib/ucode/models/audit/script_features.rb +28 -0
- data/lib/ucode/models/audit/script_summary.rb +54 -0
- data/lib/ucode/models/audit/variation_detail.rb +42 -0
- data/lib/ucode/models/audit.rb +50 -0
- data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
- data/lib/ucode/models/bidi_mirroring.rb +19 -0
- data/lib/ucode/models/binary_property_assignment.rb +26 -0
- data/lib/ucode/models/block.rb +36 -0
- data/lib/ucode/models/case_folding_rule.rb +23 -0
- data/lib/ucode/models/cjk_radical.rb +23 -0
- data/lib/ucode/models/codepoint/bidi.rb +28 -0
- data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
- data/lib/ucode/models/codepoint/case_folding.rb +25 -0
- data/lib/ucode/models/codepoint/casing.rb +32 -0
- data/lib/ucode/models/codepoint/decomposition.rb +27 -0
- data/lib/ucode/models/codepoint/display.rb +24 -0
- data/lib/ucode/models/codepoint/emoji.rb +29 -0
- data/lib/ucode/models/codepoint/hangul.rb +20 -0
- data/lib/ucode/models/codepoint/identifier.rb +30 -0
- data/lib/ucode/models/codepoint/indic.rb +20 -0
- data/lib/ucode/models/codepoint/joining.rb +20 -0
- data/lib/ucode/models/codepoint/normalization.rb +35 -0
- data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
- data/lib/ucode/models/codepoint.rb +122 -0
- data/lib/ucode/models/name_alias.rb +21 -0
- data/lib/ucode/models/named_sequence.rb +19 -0
- data/lib/ucode/models/names_list_entry.rb +38 -0
- data/lib/ucode/models/plane.rb +36 -0
- data/lib/ucode/models/property_alias.rb +24 -0
- data/lib/ucode/models/property_value_alias.rb +26 -0
- data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
- data/lib/ucode/models/relationship/cross_reference.rb +17 -0
- data/lib/ucode/models/relationship/footnote.rb +24 -0
- data/lib/ucode/models/relationship/informal_alias.rb +18 -0
- data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
- data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
- data/lib/ucode/models/relationship.rb +57 -0
- data/lib/ucode/models/script.rb +41 -0
- data/lib/ucode/models/special_casing_rule.rb +28 -0
- data/lib/ucode/models/standardized_variant.rb +24 -0
- data/lib/ucode/models/unihan_entry.rb +23 -0
- data/lib/ucode/models.rb +47 -0
- data/lib/ucode/parsers/auxiliary.rb +26 -0
- data/lib/ucode/parsers/base.rb +137 -0
- data/lib/ucode/parsers/bidi_brackets.rb +41 -0
- data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
- data/lib/ucode/parsers/blocks.rb +63 -0
- data/lib/ucode/parsers/case_folding.rb +53 -0
- data/lib/ucode/parsers/cjk_radicals.rb +102 -0
- data/lib/ucode/parsers/derived_age.rb +59 -0
- data/lib/ucode/parsers/derived_core_properties.rb +60 -0
- data/lib/ucode/parsers/extracted_properties.rb +74 -0
- data/lib/ucode/parsers/name_aliases.rb +44 -0
- data/lib/ucode/parsers/named_sequences.rb +51 -0
- data/lib/ucode/parsers/names_list.rb +250 -0
- data/lib/ucode/parsers/property_aliases.rb +41 -0
- data/lib/ucode/parsers/property_value_aliases.rb +46 -0
- data/lib/ucode/parsers/script_extensions.rb +64 -0
- data/lib/ucode/parsers/scripts.rb +60 -0
- data/lib/ucode/parsers/special_casing.rb +62 -0
- data/lib/ucode/parsers/standardized_variants.rb +56 -0
- data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
- data/lib/ucode/parsers/unicode_data.rb +268 -0
- data/lib/ucode/parsers/unihan.rb +125 -0
- data/lib/ucode/parsers.rb +35 -0
- data/lib/ucode/range_entry.rb +58 -0
- data/lib/ucode/repo/aggregate_writer.rb +364 -0
- data/lib/ucode/repo/atomic_writes.rb +48 -0
- data/lib/ucode/repo/codepoint_writer.rb +96 -0
- data/lib/ucode/repo/paths.rb +122 -0
- data/lib/ucode/repo.rb +22 -0
- data/lib/ucode/site/config_emitter.rb +124 -0
- data/lib/ucode/site/generator.rb +178 -0
- data/lib/ucode/site/search_index.rb +68 -0
- data/lib/ucode/site/template/.gitignore +4 -0
- data/lib/ucode/site/template/.vitepress/config.ts +8 -0
- data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
- data/lib/ucode/site/template/char/[codepoint].md +13 -0
- data/lib/ucode/site/template/components/BlockView.vue +57 -0
- data/lib/ucode/site/template/components/CharView.vue +85 -0
- data/lib/ucode/site/template/components/PlaneView.vue +56 -0
- data/lib/ucode/site/template/components/SearchView.vue +66 -0
- data/lib/ucode/site/template/index.md +25 -0
- data/lib/ucode/site/template/package.json +18 -0
- data/lib/ucode/site/template/search.md +9 -0
- data/lib/ucode/site.rb +13 -0
- data/lib/ucode/version.rb +5 -0
- data/lib/ucode/version_resolver.rb +76 -0
- data/lib/ucode.rb +74 -0
- data/ucode.gemspec +56 -0
- metadata +404 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
|
|
5
|
+
require "ucode/glyphs/grid"
|
|
6
|
+
require "ucode/glyphs/path_bbox"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Glyphs
|
|
10
|
+
# Detects the chart grid in a Code Charts PDF page rendered to SVG.
|
|
11
|
+
#
|
|
12
|
+
# The PDF page produced by pdftocairo / pdf2svg / dvisvgm contains
|
|
13
|
+
# every visible element (title, block name, row labels, codepoint
|
|
14
|
+
# digits, and the actual character glyphs) as positioned `<use>`
|
|
15
|
+
# references into a `<defs>` block of named glyph outlines. The
|
|
16
|
+
# character cells we want to extract correspond to glyphs whose
|
|
17
|
+
# bounding box is larger than every label or digit font on the
|
|
18
|
+
# page — the chart's character samples are drawn at a larger size
|
|
19
|
+
# than any of the surrounding text.
|
|
20
|
+
#
|
|
21
|
+
# Algorithm:
|
|
22
|
+
# 1. Walk `<defs>`, estimate each glyph's bbox via `PathBbox`.
|
|
23
|
+
# 2. Classify a glyph as "character-sized" when its width and
|
|
24
|
+
# height both exceed `CharSizeThreshold` (default 8 pt).
|
|
25
|
+
# This excludes title, row-label, and digit glyphs while
|
|
26
|
+
# keeping every actual character sample — including pages
|
|
27
|
+
# where the chart mixes multiple character fonts (e.g. the
|
|
28
|
+
# Basic Latin page uses one font for punctuation/digits and
|
|
29
|
+
# another for letters).
|
|
30
|
+
# 3. Collect every `<use>` that references a character-sized
|
|
31
|
+
# glyph; these are the cell origins.
|
|
32
|
+
# 4. Cluster the Y values of those uses into rows, and within
|
|
33
|
+
# each row cluster the X values into columns.
|
|
34
|
+
# 5. Drop rows whose column count diverges from the modal value
|
|
35
|
+
# (these are footer/header artifacts, not chart rows).
|
|
36
|
+
# 6. Return a `Grid` value object anchored at the top-left cell
|
|
37
|
+
# with uniform column/row pitches derived from the median
|
|
38
|
+
# spacing between adjacent clusters.
|
|
39
|
+
#
|
|
40
|
+
# This is pure (no I/O). The detector takes a parsed Nokogiri
|
|
41
|
+
# document and returns a `Grid`.
|
|
42
|
+
class GridDetector
|
|
43
|
+
CharSizeThreshold = 8.0
|
|
44
|
+
ClusterEpsilon = 15.0
|
|
45
|
+
private_constant :CharSizeThreshold, :ClusterEpsilon
|
|
46
|
+
|
|
47
|
+
class << self
|
|
48
|
+
# @param doc [Nokogiri::XML::Document]
|
|
49
|
+
# @param block_first_cp [Integer] first codepoint of the block;
|
|
50
|
+
# stored on the Grid so callers can map codepoint ↔ cell.
|
|
51
|
+
# @return [Ucode::Glyphs::Grid, nil] nil if no character grid
|
|
52
|
+
# could be detected.
|
|
53
|
+
def detect(doc, block_first_cp:)
|
|
54
|
+
uses = collect_uses(doc)
|
|
55
|
+
return nil if uses.empty?
|
|
56
|
+
|
|
57
|
+
char_glyph_ids = char_sized_glyph_ids(doc)
|
|
58
|
+
return nil if char_glyph_ids.empty?
|
|
59
|
+
|
|
60
|
+
cell_uses = uses.select { |u| char_glyph_ids.include?(u.glyph_id) }
|
|
61
|
+
return nil if cell_uses.empty?
|
|
62
|
+
|
|
63
|
+
build_grid(cell_uses, block_first_cp)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
UsePosition = Struct.new(:x, :y, :glyph_id, :set_id, keyword_init: true)
|
|
69
|
+
|
|
70
|
+
def collect_uses(doc)
|
|
71
|
+
doc.css("use").map do |node|
|
|
72
|
+
href = node["xlink:href"] || node["href"] || ""
|
|
73
|
+
glyph_id = href.sub(/\A#/, "")
|
|
74
|
+
match = glyph_id.match(/\Aglyph-(\d+)-(\d+)\z/)
|
|
75
|
+
next nil unless match
|
|
76
|
+
|
|
77
|
+
UsePosition.new(
|
|
78
|
+
x: node["x"].to_f,
|
|
79
|
+
y: node["y"].to_f,
|
|
80
|
+
glyph_id: glyph_id,
|
|
81
|
+
set_id: match[1].to_i,
|
|
82
|
+
)
|
|
83
|
+
end.compact
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def char_sized_glyph_ids(doc)
|
|
87
|
+
doc.css("defs g[id^='glyph-']").each_with_object({}) do |g, acc|
|
|
88
|
+
id = g["id"]
|
|
89
|
+
next unless id =~ /\Aglyph-\d+-\d+\z/
|
|
90
|
+
|
|
91
|
+
paths = g.css("path")
|
|
92
|
+
next if paths.empty?
|
|
93
|
+
|
|
94
|
+
bbox = paths.map { |p| PathBbox.estimate(p["d"]) }.reject(&:empty?).reduce do |a, b|
|
|
95
|
+
PathBbox::Result.new(
|
|
96
|
+
min_x: [a.min_x, b.min_x].min,
|
|
97
|
+
min_y: [a.min_y, b.min_y].min,
|
|
98
|
+
max_x: [a.max_x, b.max_x].max,
|
|
99
|
+
max_y: [a.max_y, b.max_y].max,
|
|
100
|
+
)
|
|
101
|
+
end
|
|
102
|
+
next unless bbox
|
|
103
|
+
|
|
104
|
+
acc[id] = true if char_sized?(bbox)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def char_sized?(bbox)
|
|
109
|
+
bbox.width >= CharSizeThreshold && bbox.height >= CharSizeThreshold
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def median(values)
|
|
113
|
+
return 0.0 if values.empty?
|
|
114
|
+
|
|
115
|
+
sorted = values.sort
|
|
116
|
+
mid = sorted.size / 2
|
|
117
|
+
sorted.size.even? ? (sorted[mid - 1] + sorted[mid]) / 2.0 : sorted[mid]
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def build_grid(cell_uses, block_first_cp)
|
|
121
|
+
row_clusters = cluster_by_value(cell_uses, :y)
|
|
122
|
+
return nil if row_clusters.empty?
|
|
123
|
+
|
|
124
|
+
column_clusters = cluster_by_value(cell_uses, :x)
|
|
125
|
+
return nil if column_clusters.empty?
|
|
126
|
+
|
|
127
|
+
column_starts = column_clusters.map { |c| c.map(&:x).min }.sort
|
|
128
|
+
row_starts = row_clusters.map { |c| c.map(&:y).min }.sort
|
|
129
|
+
|
|
130
|
+
Grid.new(
|
|
131
|
+
origin_x: column_starts.first,
|
|
132
|
+
origin_y: row_starts.first,
|
|
133
|
+
column_pitch: median_pitch(column_starts),
|
|
134
|
+
row_pitch: median_pitch(row_starts),
|
|
135
|
+
columns: column_starts.size,
|
|
136
|
+
rows: row_starts.size,
|
|
137
|
+
block_first_cp: block_first_cp,
|
|
138
|
+
)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def cluster_by_value(items, attr)
|
|
142
|
+
sorted = items.sort_by { |i| i.public_send(attr) }
|
|
143
|
+
clusters = []
|
|
144
|
+
sorted.each do |item|
|
|
145
|
+
value = item.public_send(attr)
|
|
146
|
+
if clusters.empty? || (value - clusters.last[:max]).abs > ClusterEpsilon
|
|
147
|
+
clusters << { max: value, items: [item] }
|
|
148
|
+
else
|
|
149
|
+
clusters.last[:max] = value
|
|
150
|
+
clusters.last[:items] << item
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
clusters.map { |c| c[:items] }
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def median_pitch(sorted_values)
|
|
157
|
+
return 0.0 if sorted_values.size < 2
|
|
158
|
+
|
|
159
|
+
pitches = sorted_values.each_cons(2).map { |a, b| b - a }
|
|
160
|
+
median(pitches)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
|
|
5
|
+
require "ucode/error"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Glyphs
|
|
9
|
+
module LastResort
|
|
10
|
+
# Parses the Last Resort Font `cmap-f13.ttx` once into a flat
|
|
11
|
+
# `{codepoint_int => glyph_name}` lookup.
|
|
12
|
+
#
|
|
13
|
+
# The Format 13 cmap has 1,114,112 entries (every codepoint from
|
|
14
|
+
# U+0000 to U+10FFFF). Each entry looks like:
|
|
15
|
+
#
|
|
16
|
+
# <map code="0x0" name="lastresortlatin"/>
|
|
17
|
+
#
|
|
18
|
+
# We parse every `<map>` child of every `<cmap_format_*>` element,
|
|
19
|
+
# ignore the platform/encoding attributes (Format 13 only here),
|
|
20
|
+
# and build a single Hash. Memory cost is ~80 MB for the parsed
|
|
21
|
+
# Hash on Ruby 3.x — acceptable for the CLI, paid once per run.
|
|
22
|
+
#
|
|
23
|
+
# For long-running processes (e.g. the site dev server), the
|
|
24
|
+
# parsed index can be cached via the optional `cache:` constructor
|
|
25
|
+
# argument. The cache contract is `cache.read(key) -> Hash | nil`
|
|
26
|
+
# and `cache.write(key, hash) -> void`; pass an object with both
|
|
27
|
+
# methods (e.g. `Ucode::Cache`).
|
|
28
|
+
class CmapIndex
|
|
29
|
+
CodeAttr = "code"
|
|
30
|
+
private_constant :CodeAttr
|
|
31
|
+
|
|
32
|
+
NameAttr = "name"
|
|
33
|
+
private_constant :NameAttr
|
|
34
|
+
|
|
35
|
+
# Parse the cmap file at `path` and return a frozen Hash.
|
|
36
|
+
#
|
|
37
|
+
# @param path [String, Pathname, #to_path] cmap-f13.ttx path
|
|
38
|
+
# @return [Hash{Integer=>String}] codepoint → glyph name
|
|
39
|
+
def self.parse(path)
|
|
40
|
+
new(path).to_h
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @param path [String, Pathname, #to_path] cmap-f13.ttx path
|
|
44
|
+
def initialize(path)
|
|
45
|
+
@path = Pathname.new(path)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @return [Hash{Integer=>String}] frozen codepoint → glyph name
|
|
49
|
+
def to_h
|
|
50
|
+
@to_h ||= build_index.freeze
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# @param codepoint [Integer]
|
|
54
|
+
# @return [String, nil] glyph name or nil if no entry
|
|
55
|
+
def [](codepoint)
|
|
56
|
+
to_h[codepoint]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# @return [Boolean]
|
|
60
|
+
def key?(codepoint)
|
|
61
|
+
to_h.key?(codepoint)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# @return [Integer] number of entries
|
|
65
|
+
def size
|
|
66
|
+
to_h.size
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def build_index
|
|
72
|
+
doc = Nokogiri::XML(@path.read) do |config|
|
|
73
|
+
config.noblanks.strict
|
|
74
|
+
end
|
|
75
|
+
index = {}
|
|
76
|
+
doc.xpath("/ttFont/cmap/cmap_format_13/map").each do |node|
|
|
77
|
+
code = parse_code(node[CodeAttr])
|
|
78
|
+
name = node[NameAttr]
|
|
79
|
+
next if code.nil? || name.nil? || name.empty?
|
|
80
|
+
|
|
81
|
+
index[code] = name
|
|
82
|
+
end
|
|
83
|
+
index
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def parse_code(raw)
|
|
87
|
+
return nil if raw.nil? || raw.empty?
|
|
88
|
+
|
|
89
|
+
raw.start_with?("0x", "0X") ? raw[2..].to_i(16) : raw.to_i(16)
|
|
90
|
+
rescue ArgumentError
|
|
91
|
+
nil
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Glyphs
|
|
7
|
+
module LastResort
|
|
8
|
+
# Parses the UFO `contents.plist` once into a
|
|
9
|
+
# `{glyph_name => glif_basename}` lookup.
|
|
10
|
+
#
|
|
11
|
+
# The plist is the standard UFO v3 format:
|
|
12
|
+
#
|
|
13
|
+
# <dict>
|
|
14
|
+
# <key>lastresortlatin</key>
|
|
15
|
+
# <string>lastresortlatin.glif</string>
|
|
16
|
+
# ...
|
|
17
|
+
# </dict>
|
|
18
|
+
#
|
|
19
|
+
# 380 entries (one per placeholder glyph). Tiny file, but parsing
|
|
20
|
+
# it once per Writer avoids 380 redundant Nokogiri passes across
|
|
21
|
+
# the per-codepoint loop.
|
|
22
|
+
class Contents
|
|
23
|
+
KeyEl = "key"
|
|
24
|
+
private_constant :KeyEl
|
|
25
|
+
|
|
26
|
+
StringEl = "string"
|
|
27
|
+
private_constant :StringEl
|
|
28
|
+
|
|
29
|
+
# Parse the plist file at `path` and return a frozen Hash.
|
|
30
|
+
#
|
|
31
|
+
# @param path [String, Pathname, #to_path] contents.plist path
|
|
32
|
+
# @return [Hash{String=>String}] glyph name → glif basename
|
|
33
|
+
def self.parse(path)
|
|
34
|
+
new(path).to_h
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @param path [String, Pathname, #to_path] contents.plist path
|
|
38
|
+
def initialize(path)
|
|
39
|
+
@path = Pathname.new(path)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# @return [Hash{String=>String}] frozen glyph name → glif basename
|
|
43
|
+
def to_h
|
|
44
|
+
@to_h ||= build_index.freeze
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# @param glyph_name [String]
|
|
48
|
+
# @return [String, nil] glif basename (e.g. "lastresortlatin.glif")
|
|
49
|
+
def [](glyph_name)
|
|
50
|
+
to_h[glyph_name]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# @return [Boolean]
|
|
54
|
+
def key?(glyph_name)
|
|
55
|
+
to_h.key?(glyph_name)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def build_index
|
|
61
|
+
doc = Nokogiri::XML(@path.read) do |config|
|
|
62
|
+
config.noblanks.strict
|
|
63
|
+
end
|
|
64
|
+
pairs = doc.xpath("/plist/dict/*").each_slice(2)
|
|
65
|
+
pairs.each_with_object({}) do |(key_node, val_node), hash|
|
|
66
|
+
next unless key_node.name == KeyEl && val_node&.name == StringEl
|
|
67
|
+
|
|
68
|
+
hash[key_node.text] = val_node.text
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "nokogiri"
|
|
4
|
+
|
|
5
|
+
require "ucode/error"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Glyphs
|
|
9
|
+
module LastResort
|
|
10
|
+
# Parses one UFO `.glif` outline file into a {Glif::Outline} value
|
|
11
|
+
# object: advance width + list of contours, each contour being a
|
|
12
|
+
# list of {Glif::Point}s.
|
|
13
|
+
#
|
|
14
|
+
# UFO point semantics:
|
|
15
|
+
#
|
|
16
|
+
# * `type="move"` — on-curve; starts a new contour.
|
|
17
|
+
# * `type="line"` — on-curve; straight line from previous.
|
|
18
|
+
# * `type="curve"` — on-curve; cubic Bezier. The preceding 1–2
|
|
19
|
+
# points with no `type` are off-curve control
|
|
20
|
+
# points.
|
|
21
|
+
# * `type="qcurve"` — on-curve; quadratic Bezier. Preceding N
|
|
22
|
+
# points with no `type` are off-curve controls.
|
|
23
|
+
# * no `type` — off-curve control point.
|
|
24
|
+
#
|
|
25
|
+
# Contours are implicitly closed (UFO follows PostScript
|
|
26
|
+
# convention). {Svg} adds the closing `Z` when emitting SVG path
|
|
27
|
+
# data, so the outline representation here is open.
|
|
28
|
+
#
|
|
29
|
+
# All coordinates are in font units (integers in the Last Resort
|
|
30
|
+
# UFO; the parser accepts floats too for forward compatibility).
|
|
31
|
+
module Glif
|
|
32
|
+
# Single outline point. `kind` is one of `:offcurve`, `:move`,
|
|
33
|
+
# `:line`, `:curve`, `:qcurve`.
|
|
34
|
+
Point = Struct.new(:x, :y, :kind, keyword_init: true) do
|
|
35
|
+
def on_curve?
|
|
36
|
+
kind != :offcurve
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# One contour — an ordered list of {Point}s.
|
|
41
|
+
Contour = Struct.new(:points, keyword_init: true)
|
|
42
|
+
|
|
43
|
+
# Parsed outline value object.
|
|
44
|
+
Outline = Struct.new(:advance, :contours, keyword_init: true) do
|
|
45
|
+
def bbox
|
|
46
|
+
return nil if contours.empty?
|
|
47
|
+
|
|
48
|
+
xs = []
|
|
49
|
+
ys = []
|
|
50
|
+
contours.each do |contour|
|
|
51
|
+
contour.points.each do |point|
|
|
52
|
+
xs << point.x
|
|
53
|
+
ys << point.y
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
return nil if xs.empty?
|
|
57
|
+
|
|
58
|
+
{ min_x: xs.min, min_y: ys.min, max_x: xs.max, max_y: ys.max }
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# @param path [String, Pathname, #to_path] `.glif` file path
|
|
63
|
+
# @return [Outline]
|
|
64
|
+
def self.read(path)
|
|
65
|
+
parse(Pathname.new(path))
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# @param path [String, Pathname, #to_path] `.glif` file path
|
|
69
|
+
# @return [Outline]
|
|
70
|
+
def self.parse(path)
|
|
71
|
+
doc = Nokogiri::XML(path.read) do |config|
|
|
72
|
+
config.noblanks.strict
|
|
73
|
+
end
|
|
74
|
+
glyph = doc.at_xpath("/glyph") || doc.at_xpath("//glyph")
|
|
75
|
+
raise Ucode::GlyphError, "not a UFO .glif file: #{path}" unless glyph
|
|
76
|
+
|
|
77
|
+
advance = parse_advance(glyph)
|
|
78
|
+
contours = parse_contours(glyph)
|
|
79
|
+
Outline.new(advance: advance, contours: contours)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
class << self
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
def parse_advance(glyph)
|
|
86
|
+
node = glyph.at_xpath("advance")
|
|
87
|
+
return 0 unless node
|
|
88
|
+
|
|
89
|
+
width = node["width"]
|
|
90
|
+
width ? width.to_i : 0
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def parse_contours(glyph)
|
|
94
|
+
outline_node = glyph.at_xpath("outline")
|
|
95
|
+
return [] unless outline_node
|
|
96
|
+
|
|
97
|
+
outline_node.xpath("contour").map do |contour_node|
|
|
98
|
+
points = contour_node.xpath("point").map do |point_node|
|
|
99
|
+
Point.new(
|
|
100
|
+
x: point_node["x"].to_i,
|
|
101
|
+
y: point_node["y"].to_i,
|
|
102
|
+
kind: parse_kind(point_node["type"]),
|
|
103
|
+
)
|
|
104
|
+
end
|
|
105
|
+
Contour.new(points: points)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def parse_kind(type)
|
|
110
|
+
case type
|
|
111
|
+
when nil then :offcurve
|
|
112
|
+
when "move" then :move
|
|
113
|
+
when "line" then :line
|
|
114
|
+
when "curve" then :curve
|
|
115
|
+
when "qcurve" then :qcurve
|
|
116
|
+
else
|
|
117
|
+
raise Ucode::GlyphError, "unknown glif point type: #{type.inspect}"
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ucode/error"
|
|
4
|
+
require "ucode/glyphs/last_resort/cmap_index"
|
|
5
|
+
require "ucode/glyphs/last_resort/contents"
|
|
6
|
+
require "ucode/glyphs/last_resort/glif"
|
|
7
|
+
require "ucode/glyphs/last_resort/svg"
|
|
8
|
+
|
|
9
|
+
module Ucode
|
|
10
|
+
module Glyphs
|
|
11
|
+
module LastResort
|
|
12
|
+
# Chains the four lookup stages needed to render one codepoint's
|
|
13
|
+
# Last Resort glyph: cmap (cp → name) → contents (name → file)
|
|
14
|
+
# → glif (file → outline) → svg (outline → SVG document).
|
|
15
|
+
#
|
|
16
|
+
# The CmapIndex and Contents are lazily built and memoized per
|
|
17
|
+
# Renderer instance, so rendering many codepoints shares the
|
|
18
|
+
# parsed cmap (1,114,112 entries) and plist (380 entries).
|
|
19
|
+
#
|
|
20
|
+
# Pure-ish: reads from disk via the Source paths; produces a
|
|
21
|
+
# {Result} struct. Never raises on missing codepoints — returns
|
|
22
|
+
# `nil` so callers can decide whether to log or fall back to a
|
|
23
|
+
# generic placeholder.
|
|
24
|
+
class Renderer
|
|
25
|
+
# Result of rendering one codepoint.
|
|
26
|
+
Result = Struct.new(:codepoint, :glyph_name, :svg, keyword_init: true) do
|
|
27
|
+
def ok?
|
|
28
|
+
!svg.nil?
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# @param source [Source]
|
|
33
|
+
def initialize(source)
|
|
34
|
+
@source = source
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @param codepoint [Integer]
|
|
38
|
+
# @return [Result, nil] nil when the codepoint isn't in the cmap
|
|
39
|
+
# or the named glyph is missing from disk
|
|
40
|
+
def render(codepoint)
|
|
41
|
+
glyph_name = cmap[codepoint]
|
|
42
|
+
return nil unless glyph_name
|
|
43
|
+
|
|
44
|
+
basename = contents[glyph_name]
|
|
45
|
+
return nil unless basename
|
|
46
|
+
|
|
47
|
+
path = @source.glif_path(basename)
|
|
48
|
+
return nil unless path.exist?
|
|
49
|
+
|
|
50
|
+
outline = Glif.read(path)
|
|
51
|
+
svg = Svg.new(outline, codepoint: codepoint).to_s
|
|
52
|
+
Result.new(codepoint: codepoint, glyph_name: glyph_name, svg: svg)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# @return [CmapIndex]
|
|
56
|
+
def cmap
|
|
57
|
+
@cmap ||= CmapIndex.new(@source.cmap_path)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @return [Contents]
|
|
61
|
+
def contents
|
|
62
|
+
@contents ||= Contents.new(@source.contents_path)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/error"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Glyphs
|
|
9
|
+
module LastResort
|
|
10
|
+
# Locates the Last Resort Font UFO source on disk.
|
|
11
|
+
#
|
|
12
|
+
# Resolution order (first match wins):
|
|
13
|
+
#
|
|
14
|
+
# 1. Explicit `root:` argument.
|
|
15
|
+
# 2. `UCODE_LAST_RESORT_FONT_ROOT` environment variable.
|
|
16
|
+
# 3. `Ucode::Config#last_resort_font_root` (if configured).
|
|
17
|
+
# 4. Conventional sibling-of-repo path `../../external/unicode/
|
|
18
|
+
# last-resort-font` relative to the gem root.
|
|
19
|
+
#
|
|
20
|
+
# The UFO must contain:
|
|
21
|
+
#
|
|
22
|
+
# * `cmap-f13.ttx` — Format 13 cmap (cp → glyph name).
|
|
23
|
+
# * `font.ufo/glyphs/contents.plist` — glyph name → .glif file.
|
|
24
|
+
# * `font.ufo/glyphs/*.glif` — outline files.
|
|
25
|
+
#
|
|
26
|
+
# If any required artifact is missing, the constructor raises
|
|
27
|
+
# {Ucode::LastResortMissingError} with a `context:` payload listing
|
|
28
|
+
# the resolved root and which artifact is absent. The CLI catches
|
|
29
|
+
# this to print a friendly "see README for setup" message.
|
|
30
|
+
class Source
|
|
31
|
+
attr_reader :root, :cmap_path, :glyphs_dir, :contents_path
|
|
32
|
+
|
|
33
|
+
# Expected layout inside the UFO root.
|
|
34
|
+
CMAP_REL = "cmap-f13.ttx"
|
|
35
|
+
private_constant :CMAP_REL
|
|
36
|
+
|
|
37
|
+
GLYPHS_REL = "font.ufo/glyphs"
|
|
38
|
+
private_constant :GLYPHS_REL
|
|
39
|
+
|
|
40
|
+
CONTENTS_REL = "font.ufo/glyphs/contents.plist"
|
|
41
|
+
private_constant :CONTENTS_REL
|
|
42
|
+
|
|
43
|
+
# @param root [String, Pathname, nil] explicit UFO root
|
|
44
|
+
# @param env [Hash{String=>String}] env var source (defaults to ENV)
|
|
45
|
+
# @param gem_root [String, Pathname, nil] gem root for the
|
|
46
|
+
# conventional fallback (defaults to the directory holding
|
|
47
|
+
# `lib/ucode`); injectable for tests
|
|
48
|
+
# @raise [Ucode::LastResortMissingError] if a required artifact
|
|
49
|
+
# is missing at the resolved root
|
|
50
|
+
def initialize(root: nil, env: ENV, gem_root: nil)
|
|
51
|
+
@root = resolve_root(root, env, gem_root)
|
|
52
|
+
validate!
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# @return [Boolean] true if all required artifacts are present
|
|
56
|
+
def available?
|
|
57
|
+
[
|
|
58
|
+
@cmap_path,
|
|
59
|
+
@glyphs_dir,
|
|
60
|
+
@contents_path,
|
|
61
|
+
].all?(&:exist?)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Path to a specific `.glif` file by basename. Does NOT verify
|
|
65
|
+
# the file exists; callers resolve via {Contents} first.
|
|
66
|
+
#
|
|
67
|
+
# @param basename [String] e.g. "lastresortlatin.glif"
|
|
68
|
+
# @return [Pathname]
|
|
69
|
+
def glif_path(basename)
|
|
70
|
+
@glyphs_dir.join(basename)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def resolve_root(explicit, env, gem_root)
|
|
76
|
+
return Pathname.new(explicit).expand_path if explicit
|
|
77
|
+
|
|
78
|
+
candidates = []
|
|
79
|
+
env_val = env["UCODE_LAST_RESORT_FONT_ROOT"]
|
|
80
|
+
candidates << Pathname.new(env_val) if env_val && !env_val.empty?
|
|
81
|
+
candidates << conventional_path(gem_root)
|
|
82
|
+
candidates.find { |c| c.exist? && looks_like_ufo_root?(c) }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def conventional_path(gem_root)
|
|
86
|
+
base = gem_root ? Pathname.new(gem_root) : default_gem_root
|
|
87
|
+
# gem_root is the project root (e.g. /.../fontist/ucode).
|
|
88
|
+
# The Last Resort Font is conventionally checked out as a
|
|
89
|
+
# sibling-of-the-workspace at <workspace>/external/unicode/
|
|
90
|
+
# last-resort-font — that's two levels up from the gem root.
|
|
91
|
+
base.expand_path.parent.parent.join("external", "unicode", "last-resort-font")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def default_gem_root
|
|
95
|
+
# __dir__ = lib/ucode/glyphs/last_resort. Four `..` get us back
|
|
96
|
+
# to the project root (the directory containing `lib/`).
|
|
97
|
+
Pathname.new(__dir__).join("..", "..", "..", "..")
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def looks_like_ufo_root?(path)
|
|
101
|
+
path.join("font.ufo", "glyphs").directory?
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def validate!
|
|
105
|
+
raise_missing if @root.nil?
|
|
106
|
+
|
|
107
|
+
@cmap_path = @root.join(CMAP_REL)
|
|
108
|
+
@glyphs_dir = @root.join(GLYPHS_REL)
|
|
109
|
+
@contents_path = @root.join(CONTENTS_REL)
|
|
110
|
+
raise_missing unless available?
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def raise_missing
|
|
114
|
+
raise Ucode::LastResortMissingError.new(
|
|
115
|
+
"Last Resort Font UFO source not found",
|
|
116
|
+
context: {
|
|
117
|
+
resolved_root: @root&.to_s,
|
|
118
|
+
env_var: "UCODE_LAST_RESORT_FONT_ROOT",
|
|
119
|
+
},
|
|
120
|
+
)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|