ucode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CLAUDE.md +211 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +406 -0
- data/README.md +469 -0
- data/Rakefile +18 -0
- data/TODO.new/00-README.md +66 -0
- data/TODO.new/01-pillar-terminology-alignment.md +69 -0
- data/TODO.new/02-audit-schema-design.md +255 -0
- data/TODO.new/03-directory-output-spec.md +203 -0
- data/TODO.new/04-fontist-org-contract.md +173 -0
- data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
- data/TODO.new/06-audit-namespace-skeleton.md +105 -0
- data/TODO.new/07-audit-models-port.md +132 -0
- data/TODO.new/08-extractors-cheap-port.md +113 -0
- data/TODO.new/09-extractors-expensive-port.md +99 -0
- data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
- data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
- data/TODO.new/12-formatters-port.md +115 -0
- data/TODO.new/13-directory-emitter.md +147 -0
- data/TODO.new/14-html-face-browser.md +144 -0
- data/TODO.new/15-html-library-browser.md +102 -0
- data/TODO.new/16-cli-audit-subcommands.md +142 -0
- data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
- data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
- data/TODO.new/19-fontisan-docs-update.md +155 -0
- data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
- data/TODO.new/21-canonical-unicode17-build.md +148 -0
- data/TODO.new/22-implementation-order.md +176 -0
- data/UCODE_CHANGELOG.md +97 -0
- data/exe/ucode +8 -0
- data/lib/ucode/aggregator.rb +77 -0
- data/lib/ucode/audit/block_aggregator.rb +90 -0
- data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
- data/lib/ucode/audit/context.rb +137 -0
- data/lib/ucode/audit/discrepancy_detector.rb +213 -0
- data/lib/ucode/audit/extractors/aggregations.rb +70 -0
- data/lib/ucode/audit/extractors/base.rb +21 -0
- data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
- data/lib/ucode/audit/extractors/coverage.rb +55 -0
- data/lib/ucode/audit/extractors/hinting.rb +199 -0
- data/lib/ucode/audit/extractors/identity.rb +65 -0
- data/lib/ucode/audit/extractors/licensing.rb +75 -0
- data/lib/ucode/audit/extractors/metrics.rb +108 -0
- data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
- data/lib/ucode/audit/extractors/provenance.rb +34 -0
- data/lib/ucode/audit/extractors/style.rb +88 -0
- data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
- data/lib/ucode/audit/extractors.rb +31 -0
- data/lib/ucode/audit/plane_aggregator.rb +37 -0
- data/lib/ucode/audit/registry.rb +63 -0
- data/lib/ucode/audit/script_aggregator.rb +92 -0
- data/lib/ucode/audit.rb +27 -0
- data/lib/ucode/cache.rb +113 -0
- data/lib/ucode/cli.rb +272 -0
- data/lib/ucode/commands/build.rb +68 -0
- data/lib/ucode/commands/cache.rb +46 -0
- data/lib/ucode/commands/fetch.rb +62 -0
- data/lib/ucode/commands/font_coverage.rb +57 -0
- data/lib/ucode/commands/glyphs.rb +136 -0
- data/lib/ucode/commands/lookup.rb +65 -0
- data/lib/ucode/commands/parse.rb +62 -0
- data/lib/ucode/commands/site.rb +33 -0
- data/lib/ucode/commands.rb +19 -0
- data/lib/ucode/config.rb +110 -0
- data/lib/ucode/coordinator/indices.rb +34 -0
- data/lib/ucode/coordinator.rb +397 -0
- data/lib/ucode/database.rb +214 -0
- data/lib/ucode/db_builder.rb +107 -0
- data/lib/ucode/error.rb +96 -0
- data/lib/ucode/fetch/code_charts.rb +57 -0
- data/lib/ucode/fetch/http.rb +83 -0
- data/lib/ucode/fetch/ucd_zip.rb +57 -0
- data/lib/ucode/fetch/unihan_zip.rb +57 -0
- data/lib/ucode/fetch.rb +14 -0
- data/lib/ucode/glyphs/cell_extractor.rb +130 -0
- data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
- data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
- data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
- data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
- data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
- data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
- data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
- data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
- data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
- data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
- data/lib/ucode/glyphs/grid.rb +30 -0
- data/lib/ucode/glyphs/grid_detector.rb +165 -0
- data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
- data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
- data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
- data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
- data/lib/ucode/glyphs/last_resort/source.rb +125 -0
- data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
- data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
- data/lib/ucode/glyphs/last_resort.rb +36 -0
- data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
- data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
- data/lib/ucode/glyphs/page_renderer.rb +221 -0
- data/lib/ucode/glyphs/path_bbox.rb +62 -0
- data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
- data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
- data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
- data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
- data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
- data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
- data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
- data/lib/ucode/glyphs/real_fonts.rb +32 -0
- data/lib/ucode/glyphs/writer.rb +250 -0
- data/lib/ucode/glyphs.rb +27 -0
- data/lib/ucode/index.rb +106 -0
- data/lib/ucode/index_builder.rb +94 -0
- data/lib/ucode/models/audit/audit_axis.rb +30 -0
- data/lib/ucode/models/audit/audit_diff.rb +77 -0
- data/lib/ucode/models/audit/audit_report.rb +137 -0
- data/lib/ucode/models/audit/baseline.rb +32 -0
- data/lib/ucode/models/audit/block_summary.rb +72 -0
- data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
- data/lib/ucode/models/audit/codepoint_range.rb +39 -0
- data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
- data/lib/ucode/models/audit/color_capabilities.rb +91 -0
- data/lib/ucode/models/audit/discrepancy.rb +38 -0
- data/lib/ucode/models/audit/duplicate_group.rb +23 -0
- data/lib/ucode/models/audit/embedding_type.rb +81 -0
- data/lib/ucode/models/audit/field_change.rb +28 -0
- data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
- data/lib/ucode/models/audit/gasp_range.rb +63 -0
- data/lib/ucode/models/audit/hinting.rb +99 -0
- data/lib/ucode/models/audit/library_summary.rb +40 -0
- data/lib/ucode/models/audit/licensing.rb +48 -0
- data/lib/ucode/models/audit/metrics.rb +111 -0
- data/lib/ucode/models/audit/named_instance.rb +41 -0
- data/lib/ucode/models/audit/opentype_layout.rb +38 -0
- data/lib/ucode/models/audit/plane_summary.rb +31 -0
- data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
- data/lib/ucode/models/audit/script_features.rb +28 -0
- data/lib/ucode/models/audit/script_summary.rb +54 -0
- data/lib/ucode/models/audit/variation_detail.rb +42 -0
- data/lib/ucode/models/audit.rb +50 -0
- data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
- data/lib/ucode/models/bidi_mirroring.rb +19 -0
- data/lib/ucode/models/binary_property_assignment.rb +26 -0
- data/lib/ucode/models/block.rb +36 -0
- data/lib/ucode/models/case_folding_rule.rb +23 -0
- data/lib/ucode/models/cjk_radical.rb +23 -0
- data/lib/ucode/models/codepoint/bidi.rb +28 -0
- data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
- data/lib/ucode/models/codepoint/case_folding.rb +25 -0
- data/lib/ucode/models/codepoint/casing.rb +32 -0
- data/lib/ucode/models/codepoint/decomposition.rb +27 -0
- data/lib/ucode/models/codepoint/display.rb +24 -0
- data/lib/ucode/models/codepoint/emoji.rb +29 -0
- data/lib/ucode/models/codepoint/hangul.rb +20 -0
- data/lib/ucode/models/codepoint/identifier.rb +30 -0
- data/lib/ucode/models/codepoint/indic.rb +20 -0
- data/lib/ucode/models/codepoint/joining.rb +20 -0
- data/lib/ucode/models/codepoint/normalization.rb +35 -0
- data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
- data/lib/ucode/models/codepoint.rb +122 -0
- data/lib/ucode/models/name_alias.rb +21 -0
- data/lib/ucode/models/named_sequence.rb +19 -0
- data/lib/ucode/models/names_list_entry.rb +38 -0
- data/lib/ucode/models/plane.rb +36 -0
- data/lib/ucode/models/property_alias.rb +24 -0
- data/lib/ucode/models/property_value_alias.rb +26 -0
- data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
- data/lib/ucode/models/relationship/cross_reference.rb +17 -0
- data/lib/ucode/models/relationship/footnote.rb +24 -0
- data/lib/ucode/models/relationship/informal_alias.rb +18 -0
- data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
- data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
- data/lib/ucode/models/relationship.rb +57 -0
- data/lib/ucode/models/script.rb +41 -0
- data/lib/ucode/models/special_casing_rule.rb +28 -0
- data/lib/ucode/models/standardized_variant.rb +24 -0
- data/lib/ucode/models/unihan_entry.rb +23 -0
- data/lib/ucode/models.rb +47 -0
- data/lib/ucode/parsers/auxiliary.rb +26 -0
- data/lib/ucode/parsers/base.rb +137 -0
- data/lib/ucode/parsers/bidi_brackets.rb +41 -0
- data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
- data/lib/ucode/parsers/blocks.rb +63 -0
- data/lib/ucode/parsers/case_folding.rb +53 -0
- data/lib/ucode/parsers/cjk_radicals.rb +102 -0
- data/lib/ucode/parsers/derived_age.rb +59 -0
- data/lib/ucode/parsers/derived_core_properties.rb +60 -0
- data/lib/ucode/parsers/extracted_properties.rb +74 -0
- data/lib/ucode/parsers/name_aliases.rb +44 -0
- data/lib/ucode/parsers/named_sequences.rb +51 -0
- data/lib/ucode/parsers/names_list.rb +250 -0
- data/lib/ucode/parsers/property_aliases.rb +41 -0
- data/lib/ucode/parsers/property_value_aliases.rb +46 -0
- data/lib/ucode/parsers/script_extensions.rb +64 -0
- data/lib/ucode/parsers/scripts.rb +60 -0
- data/lib/ucode/parsers/special_casing.rb +62 -0
- data/lib/ucode/parsers/standardized_variants.rb +56 -0
- data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
- data/lib/ucode/parsers/unicode_data.rb +268 -0
- data/lib/ucode/parsers/unihan.rb +125 -0
- data/lib/ucode/parsers.rb +35 -0
- data/lib/ucode/range_entry.rb +58 -0
- data/lib/ucode/repo/aggregate_writer.rb +364 -0
- data/lib/ucode/repo/atomic_writes.rb +48 -0
- data/lib/ucode/repo/codepoint_writer.rb +96 -0
- data/lib/ucode/repo/paths.rb +122 -0
- data/lib/ucode/repo.rb +22 -0
- data/lib/ucode/site/config_emitter.rb +124 -0
- data/lib/ucode/site/generator.rb +178 -0
- data/lib/ucode/site/search_index.rb +68 -0
- data/lib/ucode/site/template/.gitignore +4 -0
- data/lib/ucode/site/template/.vitepress/config.ts +8 -0
- data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
- data/lib/ucode/site/template/char/[codepoint].md +13 -0
- data/lib/ucode/site/template/components/BlockView.vue +57 -0
- data/lib/ucode/site/template/components/CharView.vue +85 -0
- data/lib/ucode/site/template/components/PlaneView.vue +56 -0
- data/lib/ucode/site/template/components/SearchView.vue +66 -0
- data/lib/ucode/site/template/index.md +25 -0
- data/lib/ucode/site/template/package.json +18 -0
- data/lib/ucode/site/template/search.md +9 -0
- data/lib/ucode/site.rb +13 -0
- data/lib/ucode/version.rb +5 -0
- data/lib/ucode/version_resolver.rb +76 -0
- data/lib/ucode.rb +74 -0
- data/ucode.gemspec +56 -0
- metadata +404 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
require "pathname"
|
|
5
|
+
require "tempfile"
|
|
6
|
+
|
|
7
|
+
require "fontisan"
|
|
8
|
+
require_relative "../../error"
|
|
9
|
+
|
|
10
|
+
module Ucode
|
|
11
|
+
module Glyphs
|
|
12
|
+
module EmbeddedFonts
|
|
13
|
+
# Value object describing one Type0 font discovered in the Code
|
|
14
|
+
# Charts PDF, plus lazy accessors for its outline data.
|
|
15
|
+
#
|
|
16
|
+
# A FontEntry is constructed by {Catalog} during the PDF walk and
|
|
17
|
+
# is the unit of work for the renderer. Each entry owns:
|
|
18
|
+
#
|
|
19
|
+
# * identity — `base_font` name, font dict object number
|
|
20
|
+
# * stream refs — object numbers of the FontDescriptor's
|
|
21
|
+
# FontFile2 (TrueType) / FontFile3 (CFF) and the ToUnicode CMap
|
|
22
|
+
# * `cid_to_gid_map` — `:identity` (gid == cid) or `:stream`
|
|
23
|
+
# (we'd need to parse a separate map; not currently supported)
|
|
24
|
+
# * `codepoint_to_gid` — the per-font map built from the parsed
|
|
25
|
+
# ToUnicode CMap. Frozen.
|
|
26
|
+
#
|
|
27
|
+
# The fontisan accessor is built lazily on first {#accessor} call,
|
|
28
|
+
# and the font program is extracted to the {Source} cache directory
|
|
29
|
+
# at the same point. Subsequent calls reuse the cached file unless
|
|
30
|
+
# the PDF is newer than the cache.
|
|
31
|
+
class FontEntry
|
|
32
|
+
attr_reader :base_font, :font_obj_id, :fontfile_obj_id,
|
|
33
|
+
:fontfile_kind, :tounicode_obj_id, :cid_to_gid_map,
|
|
34
|
+
:codepoint_to_gid, :source
|
|
35
|
+
|
|
36
|
+
# @param base_font [String] e.g. "CIAIIP+Uni2000Generalpunctuation"
|
|
37
|
+
# @param font_obj_id [Integer] Type0 font dict object number
|
|
38
|
+
# @param fontfile_obj_id [Integer] FontFile2/3 stream object number
|
|
39
|
+
# @param fontfile_kind [Symbol] :ttf (FontFile2) or :cff (FontFile3)
|
|
40
|
+
# @param tounicode_obj_id [Integer] ToUnicode CMap stream object number
|
|
41
|
+
# @param cid_to_gid_map [Symbol] :identity (we only support this)
|
|
42
|
+
# @param codepoint_to_gid [Hash{Integer=>Integer}] frozen cp → gid
|
|
43
|
+
# @param source [Source] for cache path + pdf path
|
|
44
|
+
def initialize(base_font:, font_obj_id:, fontfile_obj_id:,
|
|
45
|
+
fontfile_kind:, tounicode_obj_id:, cid_to_gid_map:,
|
|
46
|
+
codepoint_to_gid:, source:)
|
|
47
|
+
@base_font = base_font
|
|
48
|
+
@font_obj_id = font_obj_id
|
|
49
|
+
@fontfile_obj_id = fontfile_obj_id
|
|
50
|
+
@fontfile_kind = fontfile_kind
|
|
51
|
+
@tounicode_obj_id = tounicode_obj_id
|
|
52
|
+
@cid_to_gid_map = cid_to_gid_map
|
|
53
|
+
@codepoint_to_gid = codepoint_to_gid
|
|
54
|
+
@source = source
|
|
55
|
+
@accessor = nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# @param codepoint [Integer]
|
|
59
|
+
# @return [Integer, nil] GID for the codepoint in this font, or
|
|
60
|
+
# nil if the codepoint isn't covered
|
|
61
|
+
def gid_for(codepoint)
|
|
62
|
+
@codepoint_to_gid[codepoint]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# @return [Array<Integer>] codepoints covered by this font
|
|
66
|
+
def codepoints
|
|
67
|
+
@codepoint_to_gid.keys
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @return [String] ".ttf" or ".cff" — cache file extension
|
|
71
|
+
def fontfile_extension
|
|
72
|
+
@fontfile_kind == :ttf ? ".ttf" : ".cff"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# @return [Pathname] where the extracted font stream is cached
|
|
76
|
+
def cache_path
|
|
77
|
+
@source.font_cache_path(@base_font, fontfile_extension)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Lazy: extracts the font program to the cache (if missing or
|
|
81
|
+
# stale) and loads it via fontisan. Memoized per FontEntry.
|
|
82
|
+
#
|
|
83
|
+
# @return [Fontisan::GlyphAccessor]
|
|
84
|
+
def accessor
|
|
85
|
+
@accessor ||= build_accessor
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Force-clear the cached accessor and fontisan state. Useful in
|
|
89
|
+
# long-running processes that walk many fonts.
|
|
90
|
+
#
|
|
91
|
+
# @return [void]
|
|
92
|
+
def reset_accessor!
|
|
93
|
+
@accessor = nil
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
private
|
|
97
|
+
|
|
98
|
+
def build_accessor
|
|
99
|
+
ensure_font_cached!
|
|
100
|
+
font = Fontisan::FontLoader.load(cache_path.to_s)
|
|
101
|
+
Fontisan::GlyphAccessor.new(font)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def ensure_font_cached!
|
|
105
|
+
return if cache_path.exist? && cache_path.mtime >= @source.pdf_path.mtime
|
|
106
|
+
|
|
107
|
+
cache_path.dirname.mkpath unless cache_path.dirname.exist?
|
|
108
|
+
extract_font_stream!
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def extract_font_stream!
|
|
112
|
+
Tempfile.create([@base_font, fontfile_extension], cache_path.dirname.to_s, binmode: true) do |tmp|
|
|
113
|
+
tmp.close
|
|
114
|
+
ok = system("mutool", "show", "-o", tmp.path, "-b",
|
|
115
|
+
@source.pdf_to_s, @fontfile_obj_id.to_s,
|
|
116
|
+
out: File::NULL, err: File::NULL)
|
|
117
|
+
raise Ucode::EmbeddedFontsMissingError,
|
|
118
|
+
"mutool failed to extract font stream (obj=#{@fontfile_obj_id})" unless ok
|
|
119
|
+
|
|
120
|
+
FileUtils.mv(tmp.path, cache_path.to_s, force: true)
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "svg"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Glyphs
|
|
7
|
+
module EmbeddedFonts
|
|
8
|
+
# Renders one codepoint's glyph by chaining the {Catalog} index
|
|
9
|
+
# lookup → {FontEntry} accessor → {Svg} wrapper.
|
|
10
|
+
#
|
|
11
|
+
# Mirrors {LastResort::Renderer}: a Result struct is returned on
|
|
12
|
+
# success, nil on miss. The caller (Writer or CLI) decides how to
|
|
13
|
+
# handle misses — typically by falling back to the LastResort
|
|
14
|
+
# renderer.
|
|
15
|
+
class Renderer
|
|
16
|
+
# Result of rendering one codepoint.
|
|
17
|
+
Result = Struct.new(:codepoint, :base_font, :gid, :svg, keyword_init: true) do
|
|
18
|
+
def ok?
|
|
19
|
+
!svg.nil?
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @param catalog [Catalog]
|
|
24
|
+
def initialize(catalog)
|
|
25
|
+
@catalog = catalog
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @param codepoint [Integer]
|
|
29
|
+
# @return [Result, nil] nil when no font in the PDF covers this
|
|
30
|
+
# codepoint, or when the GID's outline is empty
|
|
31
|
+
def render(codepoint)
|
|
32
|
+
entry = @catalog.lookup(codepoint)
|
|
33
|
+
return nil unless entry
|
|
34
|
+
|
|
35
|
+
gid = entry.gid_for(codepoint)
|
|
36
|
+
return nil unless gid
|
|
37
|
+
|
|
38
|
+
outline = entry.accessor.outline_for_id(gid)
|
|
39
|
+
return nil if outline.nil? || outline.empty?
|
|
40
|
+
|
|
41
|
+
svg = Svg.new(outline, codepoint: codepoint, base_font: entry.base_font).to_s
|
|
42
|
+
Result.new(codepoint: codepoint, base_font: entry.base_font, gid: gid, svg: svg)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require_relative "../../error"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Glyphs
|
|
9
|
+
module EmbeddedFonts
|
|
10
|
+
# Locates the Code Charts PDF on disk and the directory where
|
|
11
|
+
# extracted font streams are cached.
|
|
12
|
+
#
|
|
13
|
+
# PDF resolution order (first match wins):
|
|
14
|
+
#
|
|
15
|
+
# 1. Explicit `pdf:` argument.
|
|
16
|
+
# 2. `UCODE_CODE_CHARTS_PDF` environment variable.
|
|
17
|
+
# 3. Conventional `<gem_root>/CodeCharts.pdf`.
|
|
18
|
+
#
|
|
19
|
+
# Per-block PDFs (preferred for incremental runs) can be supplied
|
|
20
|
+
# via the `pdf:` argument by the caller — typically the CLI.
|
|
21
|
+
#
|
|
22
|
+
# Cache resolution order:
|
|
23
|
+
#
|
|
24
|
+
# 1. Explicit `cache_dir:` argument.
|
|
25
|
+
# 2. `UCODE_PDF_FONT_CACHE` environment variable.
|
|
26
|
+
# 3. Conventional `<gem_root>/data/pdf-fonts/`.
|
|
27
|
+
#
|
|
28
|
+
# The cache holds one file per embedded font program, named after
|
|
29
|
+
# the BaseFont (e.g. `CIAIIP+Uni2000Generalpunctuation.ttf`).
|
|
30
|
+
# Re-runs skip extraction when the cached file is newer than the
|
|
31
|
+
# PDF.
|
|
32
|
+
class Source
|
|
33
|
+
attr_reader :pdf_path, :cache_dir
|
|
34
|
+
|
|
35
|
+
# @param pdf [String, Pathname, nil] path to a Code Charts PDF
|
|
36
|
+
# @param cache_dir [String, Pathname, nil] directory for cached
|
|
37
|
+
# font files; created on demand
|
|
38
|
+
# @param env [Hash{String=>String}] env var source (defaults to ENV)
|
|
39
|
+
# @param gem_root [String, Pathname, nil] gem root for the
|
|
40
|
+
# conventional fallback; injectable for tests
|
|
41
|
+
# @raise [Ucode::EmbeddedFontsMissingError] if the PDF is missing
|
|
42
|
+
def initialize(pdf: nil, cache_dir: nil, env: ENV, gem_root: nil)
|
|
43
|
+
@pdf_path = resolve_pdf(pdf, env, gem_root)
|
|
44
|
+
raise Ucode::EmbeddedFontsMissingError,
|
|
45
|
+
"Code Charts PDF not found at #{@pdf_path}" unless @pdf_path&.exist?
|
|
46
|
+
|
|
47
|
+
@cache_dir = resolve_cache(cache_dir, env, gem_root)
|
|
48
|
+
@cache_dir.mkpath unless @cache_dir.exist?
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @return [String] absolute path to the PDF, suitable for shelling
|
|
52
|
+
# out to `mutool`
|
|
53
|
+
def pdf_to_s
|
|
54
|
+
@pdf_path.to_s
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# @param base_font [String] e.g. "CIAIIP+Uni2000Generalpunctuation"
|
|
58
|
+
# @param extension [String] e.g. ".ttf" or ".cff"
|
|
59
|
+
# @return [Pathname] cache path for the named font
|
|
60
|
+
def font_cache_path(base_font, extension)
|
|
61
|
+
@cache_dir.join("#{base_font}#{extension}")
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def resolve_pdf(explicit, env, gem_root)
|
|
67
|
+
return Pathname.new(explicit).expand_path if explicit
|
|
68
|
+
|
|
69
|
+
env_val = env["UCODE_CODE_CHARTS_PDF"]
|
|
70
|
+
return Pathname.new(env_val).expand_path if env_val && !env_val.empty?
|
|
71
|
+
|
|
72
|
+
base = gem_root ? Pathname.new(gem_root) : default_gem_root
|
|
73
|
+
base.expand_path.join("CodeCharts.pdf")
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def resolve_cache(explicit, env, gem_root)
|
|
77
|
+
return Pathname.new(explicit).expand_path if explicit
|
|
78
|
+
|
|
79
|
+
env_val = env["UCODE_PDF_FONT_CACHE"]
|
|
80
|
+
return Pathname.new(env_val).expand_path if env_val && !env_val.empty?
|
|
81
|
+
|
|
82
|
+
base = gem_root ? Pathname.new(gem_root) : default_gem_root
|
|
83
|
+
base.expand_path.join("data", "pdf-fonts")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# __dir__ = lib/ucode/glyphs/embedded_fonts/. Five `..` get us
|
|
87
|
+
# back to the project root (the directory containing `lib/`).
|
|
88
|
+
def default_gem_root
|
|
89
|
+
Pathname.new(__dir__).join("..", "..", "..", "..", "..")
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Glyphs
|
|
5
|
+
module EmbeddedFonts
|
|
6
|
+
# Converts a fontisan `GlyphOutline` into a standalone SVG document
|
|
7
|
+
# shaped to match the {LastResort::Svg} output (y-flipped, viewBox
|
|
8
|
+
# padded around the bbox, single `<path>` child).
|
|
9
|
+
#
|
|
10
|
+
# The fontisan outline is in font units, with y growing upward
|
|
11
|
+
# (PostScript convention). SVG y grows downward. We:
|
|
12
|
+
#
|
|
13
|
+
# 1. Walk `outline.to_commands` and re-emit each command with
|
|
14
|
+
# the y coordinate negated. The commands we get are
|
|
15
|
+
# `:move_to`, `:line_to`, `:curve_to` (quadratic; one
|
|
16
|
+
# control + one end point), and `:close_path`.
|
|
17
|
+
# 2. Build a viewBox from the outline's bbox with a small pad,
|
|
18
|
+
# y-flipped so min_y is the SVG-space top.
|
|
19
|
+
#
|
|
20
|
+
# The y-negation happens at emit time, not at parse time, so we
|
|
21
|
+
# never have to read back a serialized path string.
|
|
22
|
+
class Svg
|
|
23
|
+
PaddingRatio = 0.08
|
|
24
|
+
private_constant :PaddingRatio
|
|
25
|
+
|
|
26
|
+
# @param outline [Fontisan::Models::GlyphOutline]
|
|
27
|
+
# @param codepoint [Integer, nil] optional, for the `<title>`
|
|
28
|
+
# @param base_font [String, nil] optional source-font name, also
|
|
29
|
+
# for the `<title>` (debugging which PDF font a glyph came from)
|
|
30
|
+
def initialize(outline, codepoint: nil, base_font: nil)
|
|
31
|
+
@outline = outline
|
|
32
|
+
@codepoint = codepoint
|
|
33
|
+
@base_font = base_font
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# @return [String] complete `<svg>...</svg>` document
|
|
37
|
+
def to_s
|
|
38
|
+
box = view_box
|
|
39
|
+
lines = []
|
|
40
|
+
lines << %(<svg xmlns="http://www.w3.org/2000/svg" viewBox="#{format_dims(box)}" width="#{format_num(box[:width])}" height="#{format_num(box[:height])}" preserveAspectRatio="xMidYMid meet">)
|
|
41
|
+
lines << %( <title>#{title_text}</title>) if title_text
|
|
42
|
+
lines << %( <path d="#{path_data}" fill="currentColor" fill-rule="evenodd"/>)
|
|
43
|
+
lines << %(</svg>)
|
|
44
|
+
%(<?xml version="1.0" encoding="UTF-8"?>\n#{lines.join("\n")}\n)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# SVG path data with y already negated. Exposed for tests and
|
|
48
|
+
# for callers that want to embed the path in their own wrapper.
|
|
49
|
+
#
|
|
50
|
+
# @return [String]
|
|
51
|
+
def path_data
|
|
52
|
+
parts = []
|
|
53
|
+
@outline.to_commands.each do |cmd|
|
|
54
|
+
case cmd.first
|
|
55
|
+
when :move_to then parts << format_cmd("M", cmd[1], cmd[2])
|
|
56
|
+
when :line_to then parts << format_cmd("L", cmd[1], cmd[2])
|
|
57
|
+
when :curve_to
|
|
58
|
+
parts << format_cmd_q(cmd[1], cmd[2], cmd[3], cmd[4])
|
|
59
|
+
when :close_path then parts << "Z"
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
parts.join(" ")
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def title_text
|
|
68
|
+
return nil unless @codepoint
|
|
69
|
+
|
|
70
|
+
label = "U+#{format("%04X", @codepoint)}"
|
|
71
|
+
label << " (Code Charts#{": #{@base_font}" if @base_font})"
|
|
72
|
+
label
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def view_box
|
|
76
|
+
bb = @outline.bbox
|
|
77
|
+
if bb.nil? || empty_bbox?(bb)
|
|
78
|
+
return { min_x: 0, min_y: 0, width: 1, height: 1 }
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
min_x = bb[:x_min].to_f
|
|
82
|
+
max_x = bb[:x_max].to_f
|
|
83
|
+
min_y = bb[:y_min].to_f
|
|
84
|
+
max_y = bb[:y_max].to_f
|
|
85
|
+
width = (max_x - min_x).nonzero? || 1.0
|
|
86
|
+
height = (max_y - min_y).nonzero? || 1.0
|
|
87
|
+
pad_x = width * PaddingRatio
|
|
88
|
+
pad_y = height * PaddingRatio
|
|
89
|
+
{
|
|
90
|
+
min_x: min_x - pad_x,
|
|
91
|
+
min_y: -(max_y + pad_y),
|
|
92
|
+
width: width + (2 * pad_x),
|
|
93
|
+
height: height + (2 * pad_y),
|
|
94
|
+
}
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def empty_bbox?(bb)
|
|
98
|
+
bb[:x_min] == 0 && bb[:y_min] == 0 && bb[:x_max] == 0 && bb[:y_max] == 0
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def format_dims(box)
|
|
102
|
+
format("%<min_x>.2f %<min_y>.2f %<width>.2f %<height>.2f", box)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def format_cmd(letter, x, y)
|
|
106
|
+
"#{letter} #{format_num(x)} #{format_num(-y)}"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def format_cmd_q(cx, cy, ex, ey)
|
|
110
|
+
"Q #{format_num(cx)} #{format_num(-cy)} #{format_num(ex)} #{format_num(-ey)}"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def format_num(n)
|
|
114
|
+
if n.is_a?(Integer) || n.to_f == n.to_i
|
|
115
|
+
n.to_i.to_s
|
|
116
|
+
else
|
|
117
|
+
format("%.2f", n)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Glyphs
|
|
5
|
+
module EmbeddedFonts
|
|
6
|
+
# Parses a PDF ToUnicode CMap stream into a `{cid => codepoint}` Hash.
|
|
7
|
+
#
|
|
8
|
+
# PDF ToUnicode CMaps (Adobe Technical Note #5014) use a small
|
|
9
|
+
# PostScript-like syntax with three constructs that matter to us:
|
|
10
|
+
#
|
|
11
|
+
# * `N begincodespacerange ... endcodespacerange` — declares the
|
|
12
|
+
# valid code space. We ignore this; we just take whatever the
|
|
13
|
+
# bfchar/bfrange entries hand us.
|
|
14
|
+
# * `N beginbfchar ... endbfchar` — one-to-one cid → unicode
|
|
15
|
+
# mappings, one pair per line: `<cid_hex> <uni_hex>`.
|
|
16
|
+
# * `N beginbfrange ... endbfrange` — range mappings. Two forms:
|
|
17
|
+
# * `<lo> <hi> <start>` — cids lo..hi map to consecutive
|
|
18
|
+
# codepoints starting at `start`.
|
|
19
|
+
# * `<lo> <hi> [<u1> <u2> ... <un>]` — explicit per-cid
|
|
20
|
+
# mapping within the range.
|
|
21
|
+
#
|
|
22
|
+
# The unicode target string may encode one codepoint (4 hex digits
|
|
23
|
+
# for BMP, 8 for an astral codepoint via UTF-16 surrogate pair) or
|
|
24
|
+
# a sequence (multiple codepoints, used for ligatures). For our
|
|
25
|
+
# purposes — attributing one Code Charts glyph to one codepoint —
|
|
26
|
+
# we take the first codepoint of the target string and ignore the
|
|
27
|
+
# rest.
|
|
28
|
+
module ToUnicode
|
|
29
|
+
# @param cmap_text [String] raw decoded CMap stream text
|
|
30
|
+
# @return [Hash{Integer=>Integer}] frozen cid → codepoint map
|
|
31
|
+
def self.parse(cmap_text)
|
|
32
|
+
result = {}
|
|
33
|
+
scan_bfchar(cmap_text, result)
|
|
34
|
+
scan_bfrange(cmap_text, result)
|
|
35
|
+
result.freeze
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
class << self
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def scan_bfchar(text, result)
|
|
42
|
+
text.scan(/beginbfchar\s*(.*?)\s*endbfchar/m) do
|
|
43
|
+
body = Regexp.last_match(1)
|
|
44
|
+
body.scan(/<([0-9A-Fa-f]+)>\s*<([0-9A-Fa-f]+)>/).each do |cid_h, uni_h|
|
|
45
|
+
cid = cid_h.to_i(16)
|
|
46
|
+
cp = decode_target(uni_h)
|
|
47
|
+
result[cid] = cp if cp
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def scan_bfrange(text, result)
|
|
53
|
+
text.scan(/beginbfrange\s*(.*?)\s*endbfrange/m) do
|
|
54
|
+
body = Regexp.last_match(1)
|
|
55
|
+
# Match either `<lo> <hi> <start>` or `<lo> <hi> [<u1> ... <un>]`
|
|
56
|
+
body.scan(/<([0-9A-Fa-f]+)>\s*<([0-9A-Fa-f]+)>\s*(?:<([0-9A-Fa-f]+)>|\[([^\]]*)\])/).each do |lo_h, hi_h, start_h, arr|
|
|
57
|
+
lo = lo_h.to_i(16)
|
|
58
|
+
hi = hi_h.to_i(16)
|
|
59
|
+
if start_h
|
|
60
|
+
start = decode_target(start_h)
|
|
61
|
+
next unless start
|
|
62
|
+
(lo..hi).each_with_index do |cid, i|
|
|
63
|
+
result[cid] = start + i
|
|
64
|
+
end
|
|
65
|
+
elsif arr
|
|
66
|
+
entries = arr.scan(/<([0-9A-Fa-f]+)>/).flatten
|
|
67
|
+
entries.each_with_index do |uni_h, i|
|
|
68
|
+
cid = lo + i
|
|
69
|
+
break if cid > hi
|
|
70
|
+
cp = decode_target(uni_h)
|
|
71
|
+
result[cid] = cp if cp
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Decode a CMap target hex string into a single codepoint.
|
|
79
|
+
# The target may be 4 hex digits (BMP), 8 (UTF-16 surrogate pair
|
|
80
|
+
# for astral), or longer (a sequence — we take the first cp).
|
|
81
|
+
#
|
|
82
|
+
# @param hex [String] hexadecimal digits
|
|
83
|
+
# @return [Integer, nil] the first codepoint, or nil if hex is empty
|
|
84
|
+
def decode_target(hex)
|
|
85
|
+
return nil if hex.nil? || hex.empty?
|
|
86
|
+
return hex.to_i(16) if hex.length == 4
|
|
87
|
+
|
|
88
|
+
if hex.length >= 8 && hex.length % 4 == 0
|
|
89
|
+
first = hex[0, 4].to_i(16)
|
|
90
|
+
if first >= 0xD800 && first <= 0xDBFF
|
|
91
|
+
second = hex[4, 4].to_i(16)
|
|
92
|
+
return 0x10000 + ((first - 0xD800) << 10) + (second - 0xDC00)
|
|
93
|
+
end
|
|
94
|
+
return first
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
hex[0, 4].to_i(16)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require_relative "renderer"
|
|
6
|
+
require_relative "../../repo/atomic_writes"
|
|
7
|
+
require_relative "../../repo/paths"
|
|
8
|
+
|
|
9
|
+
module Ucode
|
|
10
|
+
module Glyphs
|
|
11
|
+
module EmbeddedFonts
|
|
12
|
+
# Writes one `glyph.svg` per codepoint in `codepoints`, sourcing
|
|
13
|
+
# the outline from the Code Charts PDF's embedded font program.
|
|
14
|
+
#
|
|
15
|
+
# The Catalog and Renderer are shared across the loop so the
|
|
16
|
+
# expensive PDF walk + ToUnicode parse + fontisan load happen
|
|
17
|
+
# once per process. Each FontEntry memoizes its own fontisan
|
|
18
|
+
# accessor; in long CJK runs you may want to call
|
|
19
|
+
# `entry.reset_accessor!` periodically (the Writer doesn't).
|
|
20
|
+
#
|
|
21
|
+
# Idempotent and atomic via `Repo::AtomicWrites` — same protocol
|
|
22
|
+
# as the LastResort and v0.1 cell-extractor writers.
|
|
23
|
+
class Writer
|
|
24
|
+
include Repo::AtomicWrites
|
|
25
|
+
|
|
26
|
+
# @param output_root [String, Pathname]
|
|
27
|
+
# @param catalog [Catalog]
|
|
28
|
+
def initialize(output_root:, catalog:)
|
|
29
|
+
@output_root = Pathname.new(output_root)
|
|
30
|
+
@catalog = catalog
|
|
31
|
+
@renderer = Renderer.new(catalog)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Write `glyph.svg` for every codepoint covered by the PDF.
|
|
35
|
+
#
|
|
36
|
+
# @param codepoints [Array<Integer>, Enumerable<Integer>] which
|
|
37
|
+
# codepoints to render. Defaults to all codepoints the Catalog
|
|
38
|
+
# has fonts for.
|
|
39
|
+
# @param block_lookup [Proc, #call] codepoint → block id string
|
|
40
|
+
# (e.g. `"Basic_Latin"`). Returns nil for codepoints without
|
|
41
|
+
# a block; those are skipped.
|
|
42
|
+
# @return [Hash] tally `{ written:, skipped:, missing:, total: }`
|
|
43
|
+
def write_many(codepoints = nil, block_lookup:)
|
|
44
|
+
cps = codepoints || @catalog.codepoints
|
|
45
|
+
tally = { written: 0, skipped: 0, missing: 0, total: 0 }
|
|
46
|
+
cps.each do |cp|
|
|
47
|
+
tally[:total] += 1
|
|
48
|
+
block_id = block_lookup.call(cp)
|
|
49
|
+
if block_id.nil?
|
|
50
|
+
tally[:missing] += 1
|
|
51
|
+
next
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
result = @renderer.render(cp)
|
|
55
|
+
if result.nil? || !result.ok?
|
|
56
|
+
tally[:missing] += 1
|
|
57
|
+
next
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
written = write_glyph(block_id, cp, result.svg)
|
|
61
|
+
tally[written ? :written : :skipped] += 1
|
|
62
|
+
end
|
|
63
|
+
tally
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
def write_glyph(block_id, codepoint, svg)
|
|
69
|
+
cp_id = Repo::Paths.cp_id(codepoint)
|
|
70
|
+
path = Repo::Paths.codepoint_glyph_path(@output_root, block_id, cp_id)
|
|
71
|
+
write_atomic(path, svg)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Glyphs
|
|
5
|
+
# Code Charts PDF font-stream extraction — pillar 1 of the v0.2 glyph
|
|
6
|
+
# strategy.
|
|
7
|
+
#
|
|
8
|
+
# The Unicode Code Charts PDFs (per-block or the `CodeCharts.pdf`
|
|
9
|
+
# monolith) embed one subsetted CID-keyed font per "script group"
|
|
10
|
+
# shown in the charts. Each font is a Type0 font whose descendant
|
|
11
|
+
# CIDFont uses `/CIDToGIDMap /Identity` — so the 2-byte character
|
|
12
|
+
# code used in the page's text-show operators IS the GID into the
|
|
13
|
+
# embedded font program. The codepoint mapping lives in the Type0
|
|
14
|
+
# font's `/ToUnicode` CMap stream.
|
|
15
|
+
#
|
|
16
|
+
# The pipeline is therefore:
|
|
17
|
+
#
|
|
18
|
+
# 1. {Catalog} walks the PDF's fonts (via `mutool info`) and builds
|
|
19
|
+
# a global `{codepoint => [font_entry, gid]}` index by parsing
|
|
20
|
+
# every Type0 font's ToUnicode CMap.
|
|
21
|
+
# 2. {Renderer} looks up a codepoint, lazily extracts the font's
|
|
22
|
+
# stream to a cache file, loads it via `fontisan`, and asks for
|
|
23
|
+
# the outline at the resolved GID.
|
|
24
|
+
# 3. {Svg} wraps the fontisan outline as a standalone SVG document
|
|
25
|
+
# (y-flipped, viewBox-padded) — same shape as the LastResort
|
|
26
|
+
# SVGs so downstream consumers don't care which pillar produced
|
|
27
|
+
# the glyph.
|
|
28
|
+
#
|
|
29
|
+
# The v0.1 cell extractor operated on rendered PDF pages and was
|
|
30
|
+
# defeated by the chart cell border being composited into the same
|
|
31
|
+
# glyph as the outline. Going straight to the embedded font program
|
|
32
|
+
# sidesteps that bug entirely: the font's outlines are clean
|
|
33
|
+
# vector geometry with no page chrome.
|
|
34
|
+
#
|
|
35
|
+
# System dependency: `mutool` (mupdf-tools) is on the PATH. Used for
|
|
36
|
+
# `mutool info` (font enumeration) and `mutool show -b -o` (raw
|
|
37
|
+
# stream extraction).
|
|
38
|
+
module EmbeddedFonts
|
|
39
|
+
autoload :Source, "ucode/glyphs/embedded_fonts/source"
|
|
40
|
+
autoload :ToUnicode, "ucode/glyphs/embedded_fonts/tounicode"
|
|
41
|
+
autoload :FontEntry, "ucode/glyphs/embedded_fonts/font_entry"
|
|
42
|
+
autoload :Catalog, "ucode/glyphs/embedded_fonts/catalog"
|
|
43
|
+
autoload :ContentStreamCorrelator,
|
|
44
|
+
"ucode/glyphs/embedded_fonts/content_stream_correlator"
|
|
45
|
+
autoload :Svg, "ucode/glyphs/embedded_fonts/svg"
|
|
46
|
+
autoload :Renderer, "ucode/glyphs/embedded_fonts/renderer"
|
|
47
|
+
autoload :Writer, "ucode/glyphs/embedded_fonts/writer"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Glyphs
|
|
5
|
+
Grid = Struct.new(
|
|
6
|
+
:origin_x, :origin_y,
|
|
7
|
+
:column_pitch, :row_pitch,
|
|
8
|
+
:columns, :rows,
|
|
9
|
+
:block_first_cp,
|
|
10
|
+
keyword_init: true,
|
|
11
|
+
) do
|
|
12
|
+
def cell_position(codepoint)
|
|
13
|
+
offset = codepoint - block_first_cp
|
|
14
|
+
return nil if offset.negative?
|
|
15
|
+
|
|
16
|
+
row, col = offset.divmod(columns)
|
|
17
|
+
return nil if row >= rows
|
|
18
|
+
|
|
19
|
+
[origin_x + (col * column_pitch), origin_y + (row * row_pitch)]
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def codepoint_at(row, col)
|
|
23
|
+
return nil if row.negative? || row >= rows
|
|
24
|
+
return nil if col.negative? || col >= columns
|
|
25
|
+
|
|
26
|
+
block_first_cp + (row * columns) + col
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|