ucode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CLAUDE.md +211 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +406 -0
- data/README.md +469 -0
- data/Rakefile +18 -0
- data/TODO.new/00-README.md +66 -0
- data/TODO.new/01-pillar-terminology-alignment.md +69 -0
- data/TODO.new/02-audit-schema-design.md +255 -0
- data/TODO.new/03-directory-output-spec.md +203 -0
- data/TODO.new/04-fontist-org-contract.md +173 -0
- data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
- data/TODO.new/06-audit-namespace-skeleton.md +105 -0
- data/TODO.new/07-audit-models-port.md +132 -0
- data/TODO.new/08-extractors-cheap-port.md +113 -0
- data/TODO.new/09-extractors-expensive-port.md +99 -0
- data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
- data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
- data/TODO.new/12-formatters-port.md +115 -0
- data/TODO.new/13-directory-emitter.md +147 -0
- data/TODO.new/14-html-face-browser.md +144 -0
- data/TODO.new/15-html-library-browser.md +102 -0
- data/TODO.new/16-cli-audit-subcommands.md +142 -0
- data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
- data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
- data/TODO.new/19-fontisan-docs-update.md +155 -0
- data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
- data/TODO.new/21-canonical-unicode17-build.md +148 -0
- data/TODO.new/22-implementation-order.md +176 -0
- data/UCODE_CHANGELOG.md +97 -0
- data/exe/ucode +8 -0
- data/lib/ucode/aggregator.rb +77 -0
- data/lib/ucode/audit/block_aggregator.rb +90 -0
- data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
- data/lib/ucode/audit/context.rb +137 -0
- data/lib/ucode/audit/discrepancy_detector.rb +213 -0
- data/lib/ucode/audit/extractors/aggregations.rb +70 -0
- data/lib/ucode/audit/extractors/base.rb +21 -0
- data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
- data/lib/ucode/audit/extractors/coverage.rb +55 -0
- data/lib/ucode/audit/extractors/hinting.rb +199 -0
- data/lib/ucode/audit/extractors/identity.rb +65 -0
- data/lib/ucode/audit/extractors/licensing.rb +75 -0
- data/lib/ucode/audit/extractors/metrics.rb +108 -0
- data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
- data/lib/ucode/audit/extractors/provenance.rb +34 -0
- data/lib/ucode/audit/extractors/style.rb +88 -0
- data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
- data/lib/ucode/audit/extractors.rb +31 -0
- data/lib/ucode/audit/plane_aggregator.rb +37 -0
- data/lib/ucode/audit/registry.rb +63 -0
- data/lib/ucode/audit/script_aggregator.rb +92 -0
- data/lib/ucode/audit.rb +27 -0
- data/lib/ucode/cache.rb +113 -0
- data/lib/ucode/cli.rb +272 -0
- data/lib/ucode/commands/build.rb +68 -0
- data/lib/ucode/commands/cache.rb +46 -0
- data/lib/ucode/commands/fetch.rb +62 -0
- data/lib/ucode/commands/font_coverage.rb +57 -0
- data/lib/ucode/commands/glyphs.rb +136 -0
- data/lib/ucode/commands/lookup.rb +65 -0
- data/lib/ucode/commands/parse.rb +62 -0
- data/lib/ucode/commands/site.rb +33 -0
- data/lib/ucode/commands.rb +19 -0
- data/lib/ucode/config.rb +110 -0
- data/lib/ucode/coordinator/indices.rb +34 -0
- data/lib/ucode/coordinator.rb +397 -0
- data/lib/ucode/database.rb +214 -0
- data/lib/ucode/db_builder.rb +107 -0
- data/lib/ucode/error.rb +96 -0
- data/lib/ucode/fetch/code_charts.rb +57 -0
- data/lib/ucode/fetch/http.rb +83 -0
- data/lib/ucode/fetch/ucd_zip.rb +57 -0
- data/lib/ucode/fetch/unihan_zip.rb +57 -0
- data/lib/ucode/fetch.rb +14 -0
- data/lib/ucode/glyphs/cell_extractor.rb +130 -0
- data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
- data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
- data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
- data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
- data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
- data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
- data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
- data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
- data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
- data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
- data/lib/ucode/glyphs/grid.rb +30 -0
- data/lib/ucode/glyphs/grid_detector.rb +165 -0
- data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
- data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
- data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
- data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
- data/lib/ucode/glyphs/last_resort/source.rb +125 -0
- data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
- data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
- data/lib/ucode/glyphs/last_resort.rb +36 -0
- data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
- data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
- data/lib/ucode/glyphs/page_renderer.rb +221 -0
- data/lib/ucode/glyphs/path_bbox.rb +62 -0
- data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
- data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
- data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
- data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
- data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
- data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
- data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
- data/lib/ucode/glyphs/real_fonts.rb +32 -0
- data/lib/ucode/glyphs/writer.rb +250 -0
- data/lib/ucode/glyphs.rb +27 -0
- data/lib/ucode/index.rb +106 -0
- data/lib/ucode/index_builder.rb +94 -0
- data/lib/ucode/models/audit/audit_axis.rb +30 -0
- data/lib/ucode/models/audit/audit_diff.rb +77 -0
- data/lib/ucode/models/audit/audit_report.rb +137 -0
- data/lib/ucode/models/audit/baseline.rb +32 -0
- data/lib/ucode/models/audit/block_summary.rb +72 -0
- data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
- data/lib/ucode/models/audit/codepoint_range.rb +39 -0
- data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
- data/lib/ucode/models/audit/color_capabilities.rb +91 -0
- data/lib/ucode/models/audit/discrepancy.rb +38 -0
- data/lib/ucode/models/audit/duplicate_group.rb +23 -0
- data/lib/ucode/models/audit/embedding_type.rb +81 -0
- data/lib/ucode/models/audit/field_change.rb +28 -0
- data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
- data/lib/ucode/models/audit/gasp_range.rb +63 -0
- data/lib/ucode/models/audit/hinting.rb +99 -0
- data/lib/ucode/models/audit/library_summary.rb +40 -0
- data/lib/ucode/models/audit/licensing.rb +48 -0
- data/lib/ucode/models/audit/metrics.rb +111 -0
- data/lib/ucode/models/audit/named_instance.rb +41 -0
- data/lib/ucode/models/audit/opentype_layout.rb +38 -0
- data/lib/ucode/models/audit/plane_summary.rb +31 -0
- data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
- data/lib/ucode/models/audit/script_features.rb +28 -0
- data/lib/ucode/models/audit/script_summary.rb +54 -0
- data/lib/ucode/models/audit/variation_detail.rb +42 -0
- data/lib/ucode/models/audit.rb +50 -0
- data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
- data/lib/ucode/models/bidi_mirroring.rb +19 -0
- data/lib/ucode/models/binary_property_assignment.rb +26 -0
- data/lib/ucode/models/block.rb +36 -0
- data/lib/ucode/models/case_folding_rule.rb +23 -0
- data/lib/ucode/models/cjk_radical.rb +23 -0
- data/lib/ucode/models/codepoint/bidi.rb +28 -0
- data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
- data/lib/ucode/models/codepoint/case_folding.rb +25 -0
- data/lib/ucode/models/codepoint/casing.rb +32 -0
- data/lib/ucode/models/codepoint/decomposition.rb +27 -0
- data/lib/ucode/models/codepoint/display.rb +24 -0
- data/lib/ucode/models/codepoint/emoji.rb +29 -0
- data/lib/ucode/models/codepoint/hangul.rb +20 -0
- data/lib/ucode/models/codepoint/identifier.rb +30 -0
- data/lib/ucode/models/codepoint/indic.rb +20 -0
- data/lib/ucode/models/codepoint/joining.rb +20 -0
- data/lib/ucode/models/codepoint/normalization.rb +35 -0
- data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
- data/lib/ucode/models/codepoint.rb +122 -0
- data/lib/ucode/models/name_alias.rb +21 -0
- data/lib/ucode/models/named_sequence.rb +19 -0
- data/lib/ucode/models/names_list_entry.rb +38 -0
- data/lib/ucode/models/plane.rb +36 -0
- data/lib/ucode/models/property_alias.rb +24 -0
- data/lib/ucode/models/property_value_alias.rb +26 -0
- data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
- data/lib/ucode/models/relationship/cross_reference.rb +17 -0
- data/lib/ucode/models/relationship/footnote.rb +24 -0
- data/lib/ucode/models/relationship/informal_alias.rb +18 -0
- data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
- data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
- data/lib/ucode/models/relationship.rb +57 -0
- data/lib/ucode/models/script.rb +41 -0
- data/lib/ucode/models/special_casing_rule.rb +28 -0
- data/lib/ucode/models/standardized_variant.rb +24 -0
- data/lib/ucode/models/unihan_entry.rb +23 -0
- data/lib/ucode/models.rb +47 -0
- data/lib/ucode/parsers/auxiliary.rb +26 -0
- data/lib/ucode/parsers/base.rb +137 -0
- data/lib/ucode/parsers/bidi_brackets.rb +41 -0
- data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
- data/lib/ucode/parsers/blocks.rb +63 -0
- data/lib/ucode/parsers/case_folding.rb +53 -0
- data/lib/ucode/parsers/cjk_radicals.rb +102 -0
- data/lib/ucode/parsers/derived_age.rb +59 -0
- data/lib/ucode/parsers/derived_core_properties.rb +60 -0
- data/lib/ucode/parsers/extracted_properties.rb +74 -0
- data/lib/ucode/parsers/name_aliases.rb +44 -0
- data/lib/ucode/parsers/named_sequences.rb +51 -0
- data/lib/ucode/parsers/names_list.rb +250 -0
- data/lib/ucode/parsers/property_aliases.rb +41 -0
- data/lib/ucode/parsers/property_value_aliases.rb +46 -0
- data/lib/ucode/parsers/script_extensions.rb +64 -0
- data/lib/ucode/parsers/scripts.rb +60 -0
- data/lib/ucode/parsers/special_casing.rb +62 -0
- data/lib/ucode/parsers/standardized_variants.rb +56 -0
- data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
- data/lib/ucode/parsers/unicode_data.rb +268 -0
- data/lib/ucode/parsers/unihan.rb +125 -0
- data/lib/ucode/parsers.rb +35 -0
- data/lib/ucode/range_entry.rb +58 -0
- data/lib/ucode/repo/aggregate_writer.rb +364 -0
- data/lib/ucode/repo/atomic_writes.rb +48 -0
- data/lib/ucode/repo/codepoint_writer.rb +96 -0
- data/lib/ucode/repo/paths.rb +122 -0
- data/lib/ucode/repo.rb +22 -0
- data/lib/ucode/site/config_emitter.rb +124 -0
- data/lib/ucode/site/generator.rb +178 -0
- data/lib/ucode/site/search_index.rb +68 -0
- data/lib/ucode/site/template/.gitignore +4 -0
- data/lib/ucode/site/template/.vitepress/config.ts +8 -0
- data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
- data/lib/ucode/site/template/char/[codepoint].md +13 -0
- data/lib/ucode/site/template/components/BlockView.vue +57 -0
- data/lib/ucode/site/template/components/CharView.vue +85 -0
- data/lib/ucode/site/template/components/PlaneView.vue +56 -0
- data/lib/ucode/site/template/components/SearchView.vue +66 -0
- data/lib/ucode/site/template/index.md +25 -0
- data/lib/ucode/site/template/package.json +18 -0
- data/lib/ucode/site/template/search.md +9 -0
- data/lib/ucode/site.rb +13 -0
- data/lib/ucode/version.rb +5 -0
- data/lib/ucode/version_resolver.rb +76 -0
- data/lib/ucode.rb +74 -0
- data/ucode.gemspec +56 -0
- metadata +404 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fontisan"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Audit
|
|
7
|
+
module Extractors
|
|
8
|
+
# Color-font capability summary: which color formats a face carries
|
|
9
|
+
# (COLR v0/v1, CPAL, SVG, CBDT/CBLC, sbix) plus lightweight counts
|
|
10
|
+
# from each table's header.
|
|
11
|
+
#
|
|
12
|
+
# Returned fields:
|
|
13
|
+
# color_capabilities: Models::Audit::ColorCapabilities, or nil
|
|
14
|
+
# for Type 1
|
|
15
|
+
#
|
|
16
|
+
# Counts are best-effort — any table that fails to parse yields nil
|
|
17
|
+
# for its corresponding count fields rather than crashing the audit.
|
|
18
|
+
class ColorCapabilities < Base
|
|
19
|
+
# @param context [Ucode::Audit::Context]
|
|
20
|
+
# @return [Hash{Symbol=>Object}]
|
|
21
|
+
def extract(context)
|
|
22
|
+
font = context.font
|
|
23
|
+
return { color_capabilities: nil } unless sfnt?(font)
|
|
24
|
+
|
|
25
|
+
{ color_capabilities: Models::Audit::ColorCapabilities.new(**gather(font)) }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def sfnt?(font)
|
|
31
|
+
font.is_a?(Fontisan::SfntFont)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def gather(font)
|
|
35
|
+
colr = colr_fields(font)
|
|
36
|
+
cpal = cpal_fields(font)
|
|
37
|
+
svg = svg_fields(font)
|
|
38
|
+
cbdt = cbdt_fields(font)
|
|
39
|
+
sbix = sbix_fields(font)
|
|
40
|
+
|
|
41
|
+
formats = Models::Audit::ColorCapabilities.derive_formats(
|
|
42
|
+
has_colr: colr[:has_colr], colr_version: colr[:colr_version],
|
|
43
|
+
has_cpal: cpal[:has_cpal], has_svg: svg[:has_svg],
|
|
44
|
+
has_cbdt: cbdt[:has_cbdt], has_sbix: sbix[:has_sbix]
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
colr.merge(cpal).merge(svg).merge(cbdt).merge(sbix)
|
|
48
|
+
.merge(color_formats: formats)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def colr_fields(font)
|
|
52
|
+
return empty_colr unless font.has_table?("COLR")
|
|
53
|
+
|
|
54
|
+
colr = font.table("COLR")
|
|
55
|
+
return empty_colr unless colr
|
|
56
|
+
|
|
57
|
+
{
|
|
58
|
+
has_colr: true,
|
|
59
|
+
colr_version: colr.version&.to_i,
|
|
60
|
+
colr_base_glyph_count: colr.num_base_glyph_records&.to_i,
|
|
61
|
+
colr_layer_count: colr.num_layer_records&.to_i,
|
|
62
|
+
}
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def empty_colr
|
|
66
|
+
{ has_colr: false, colr_version: nil,
|
|
67
|
+
colr_base_glyph_count: nil, colr_layer_count: nil }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def cpal_fields(font)
|
|
71
|
+
return empty_cpal unless font.has_table?("CPAL")
|
|
72
|
+
|
|
73
|
+
cpal = font.table("CPAL")
|
|
74
|
+
return empty_cpal unless cpal
|
|
75
|
+
|
|
76
|
+
{
|
|
77
|
+
has_cpal: true,
|
|
78
|
+
cpal_palette_count: cpal.num_palettes&.to_i,
|
|
79
|
+
cpal_color_count: cpal.num_color_records&.to_i,
|
|
80
|
+
}
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def empty_cpal
|
|
84
|
+
{ has_cpal: false, cpal_palette_count: nil, cpal_color_count: nil }
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def svg_fields(font)
|
|
88
|
+
return empty_svg unless font.has_table?("SVG ")
|
|
89
|
+
|
|
90
|
+
svg = font.table("SVG ")
|
|
91
|
+
return empty_svg unless svg
|
|
92
|
+
|
|
93
|
+
{
|
|
94
|
+
has_svg: true,
|
|
95
|
+
svg_document_count: svg.num_svg_documents&.to_i,
|
|
96
|
+
}
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def empty_svg
|
|
100
|
+
{ has_svg: false, svg_document_count: nil }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# CBDT/CBLC are paired tables: CBLC holds the strike index,
|
|
104
|
+
# CBDT holds the bitmap data. has_cbdt vs has_cblc disagreement
|
|
105
|
+
# is reported as-is — audit consumers can spot the inconsistency.
|
|
106
|
+
def cbdt_fields(font)
|
|
107
|
+
has_cbdt = font.has_table?("CBDT")
|
|
108
|
+
has_cblc = font.has_table?("CBLC")
|
|
109
|
+
strike_count = cblc_strike_count(font) if has_cblc
|
|
110
|
+
|
|
111
|
+
{
|
|
112
|
+
has_cbdt: has_cbdt,
|
|
113
|
+
has_cblc: has_cblc,
|
|
114
|
+
cbdt_strike_count: strike_count,
|
|
115
|
+
}
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def cblc_strike_count(font)
|
|
119
|
+
cblc = font.table("CBLC")
|
|
120
|
+
return nil unless cblc
|
|
121
|
+
|
|
122
|
+
cblc.num_sizes&.to_i
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def sbix_fields(font)
|
|
126
|
+
return empty_sbix unless font.has_table?("sbix")
|
|
127
|
+
|
|
128
|
+
sbix = font.table("sbix")
|
|
129
|
+
return empty_sbix unless sbix
|
|
130
|
+
|
|
131
|
+
{
|
|
132
|
+
has_sbix: true,
|
|
133
|
+
sbix_strike_count: sbix.num_strikes&.to_i,
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def empty_sbix
|
|
138
|
+
{ has_sbix: false, sbix_strike_count: nil }
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Audit
|
|
5
|
+
module Extractors
|
|
6
|
+
# Coverage fields: how many codepoints and glyphs the font ships,
|
|
7
|
+
# the compact codepoint-range view (default), and the optional flat
|
|
8
|
+
# per-codepoint list (only when `--all-codepoints` is on).
|
|
9
|
+
#
|
|
10
|
+
# Returned fields:
|
|
11
|
+
# total_codepoints, total_glyphs, cmap_subtables,
|
|
12
|
+
# codepoint_ranges, codepoints
|
|
13
|
+
#
|
|
14
|
+
# ucode delta vs fontisan: the `codepoints` field uses "U+XXXX"
|
|
15
|
+
# string form per `02-audit-schema-design.md`. Does NOT emit
|
|
16
|
+
# aggregations (blocks/scripts) — that's the Aggregations
|
|
17
|
+
# extractor in TODO 10. Coverage only emits the raw codepoint set.
|
|
18
|
+
class Coverage < Base
|
|
19
|
+
# @param context [Ucode::Audit::Context]
|
|
20
|
+
# @return [Hash{Symbol=>Object}]
|
|
21
|
+
def extract(context)
|
|
22
|
+
font = context.font
|
|
23
|
+
codepoints = context.codepoints
|
|
24
|
+
{
|
|
25
|
+
total_codepoints: codepoints.length,
|
|
26
|
+
total_glyphs: total_glyphs(font),
|
|
27
|
+
cmap_subtables: cmap_subtable_formats(font),
|
|
28
|
+
codepoint_ranges: CodepointRangeCoalescer.call(codepoints),
|
|
29
|
+
codepoints: codepoints_for_report(context, codepoints),
|
|
30
|
+
}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def total_glyphs(font)
|
|
36
|
+
return nil unless font.has_table?("maxp")
|
|
37
|
+
|
|
38
|
+
font.table("maxp").num_glyphs
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def cmap_subtable_formats(font)
|
|
42
|
+
return [] unless font.has_table?("cmap")
|
|
43
|
+
|
|
44
|
+
font.table("cmap").subtable_formats
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def codepoints_for_report(context, codepoints)
|
|
48
|
+
return [] unless context.all_codepoints?
|
|
49
|
+
|
|
50
|
+
codepoints.map { |cp| format("U+%<cp>04X", cp: cp) }
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "stringio"
|
|
4
|
+
|
|
5
|
+
require "fontisan"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Audit
|
|
9
|
+
module Extractors
|
|
10
|
+
# Hinting summary: TrueType bytecode counts + gasp policy + CFF stem
|
|
11
|
+
# count, with derived `is_unhinted` and `hinting_format` fields.
|
|
12
|
+
#
|
|
13
|
+
# Returned fields:
|
|
14
|
+
# hinting: Models::Audit::Hinting instance, or nil for Type 1
|
|
15
|
+
#
|
|
16
|
+
# The fpgm/prep/cvt/gasp tables have no BinData classes yet — they
|
|
17
|
+
# are read as raw bytes from `font.table_data`. Bytecode is one
|
|
18
|
+
# byte per instruction; cvt is an array of FWord (int16), so the
|
|
19
|
+
# entry count is bytesize / 2.
|
|
20
|
+
class Hinting < Base
|
|
21
|
+
# Raw CFF / CFF2 charstring operator bytes that declare stem hints.
|
|
22
|
+
HSTEM = 1
|
|
23
|
+
VSTEM = 3
|
|
24
|
+
HSTEMHM = 18
|
|
25
|
+
VSTEMHM = 23
|
|
26
|
+
HINTMASK = 19
|
|
27
|
+
CNTRMASK = 20
|
|
28
|
+
|
|
29
|
+
# @param context [Ucode::Audit::Context]
|
|
30
|
+
# @return [Hash{Symbol=>Object}]
|
|
31
|
+
def extract(context)
|
|
32
|
+
font = context.font
|
|
33
|
+
return { hinting: nil } unless sfnt?(font)
|
|
34
|
+
|
|
35
|
+
{ hinting: Models::Audit::Hinting.new(**gather(font)) }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def sfnt?(font)
|
|
41
|
+
font.is_a?(Fontisan::SfntFont)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def gather(font)
|
|
45
|
+
tt = truetype_fields(font)
|
|
46
|
+
cff = cff_fields(font)
|
|
47
|
+
gasp = parse_gasp(font)
|
|
48
|
+
|
|
49
|
+
derived = Models::Audit::Hinting.derive_flags(
|
|
50
|
+
has_tt: tt[:has_fpgm] || tt[:has_prep] || tt[:has_cvt],
|
|
51
|
+
has_cff: cff[:cff_has_private_dict],
|
|
52
|
+
has_gasp: !gasp.empty?,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
tt.merge(cff).merge(gasp_ranges: gasp).merge(derived)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def truetype_fields(font)
|
|
59
|
+
{
|
|
60
|
+
has_fpgm: font.has_table?("fpgm"),
|
|
61
|
+
fpgm_instruction_count: byte_count(font, "fpgm"),
|
|
62
|
+
has_prep: font.has_table?("prep"),
|
|
63
|
+
prep_instruction_count: byte_count(font, "prep"),
|
|
64
|
+
has_cvt: font.has_table?("cvt"),
|
|
65
|
+
cvt_entry_count: cvt_entry_count(font),
|
|
66
|
+
has_cvar: font.has_table?("cvar"),
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def cff_fields(font)
|
|
71
|
+
has_cff1 = font.has_table?("CFF ")
|
|
72
|
+
has_cff2 = font.has_table?("CFF2")
|
|
73
|
+
has_private = has_cff1 || has_cff2
|
|
74
|
+
|
|
75
|
+
{
|
|
76
|
+
cff_has_private_dict: has_private,
|
|
77
|
+
cff_hint_count: has_cff1 ? count_cff_stems(font) : nil,
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def byte_count(font, tag)
|
|
82
|
+
return nil unless font.has_table?(tag)
|
|
83
|
+
|
|
84
|
+
font.table_data[tag]&.bytesize
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def cvt_entry_count(font)
|
|
88
|
+
return nil unless font.has_table?("cvt")
|
|
89
|
+
|
|
90
|
+
bytes = font.table_data["cvt"]
|
|
91
|
+
return nil unless bytes
|
|
92
|
+
|
|
93
|
+
bytes.bytesize / 2
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Parse the gasp table from raw bytes. Format: uint16 version,
|
|
97
|
+
# uint16 numRanges, then numRanges × (uint16 rangeMaxPPEM,
|
|
98
|
+
# uint16 rangeFlags). Returns [] if gasp is absent or truncated.
|
|
99
|
+
def parse_gasp(font)
|
|
100
|
+
return [] unless font.has_table?("gasp")
|
|
101
|
+
|
|
102
|
+
data = font.table_data["gasp"]
|
|
103
|
+
return [] unless data && data.bytesize >= 4
|
|
104
|
+
|
|
105
|
+
_version, num_ranges = data.unpack("nn")
|
|
106
|
+
ranges = []
|
|
107
|
+
offset = 4
|
|
108
|
+
num_ranges.times do
|
|
109
|
+
break if offset + 4 > data.bytesize
|
|
110
|
+
|
|
111
|
+
max_ppem, flags = data[offset, 4].unpack("nn")
|
|
112
|
+
ranges << Models::Audit::GaspRange.from_flags(max_ppem, flags)
|
|
113
|
+
offset += 4
|
|
114
|
+
end
|
|
115
|
+
ranges
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def count_cff_stems(font)
|
|
119
|
+
return nil unless font.has_table?("CFF ")
|
|
120
|
+
|
|
121
|
+
cff = font.table("CFF ")
|
|
122
|
+
return nil unless cff
|
|
123
|
+
|
|
124
|
+
index = cff.charstrings_index(0)
|
|
125
|
+
return nil unless index
|
|
126
|
+
|
|
127
|
+
total = 0
|
|
128
|
+
index.count.times do |glyph_index|
|
|
129
|
+
data = index[glyph_index]
|
|
130
|
+
next unless data
|
|
131
|
+
|
|
132
|
+
total += count_stems_in_charstring(data)
|
|
133
|
+
end
|
|
134
|
+
total
|
|
135
|
+
rescue Fontisan::CorruptedTableError
|
|
136
|
+
nil
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Lightweight Type-2 CharString scanner that counts stem hints
|
|
140
|
+
# without instantiating a full CharString (which needs a Private
|
|
141
|
+
# DICT, global/local subrs, etc.). Operates purely on bytes.
|
|
142
|
+
def count_stems_in_charstring(data)
|
|
143
|
+
io = StringIO.new(data)
|
|
144
|
+
stack = 0
|
|
145
|
+
stems = 0
|
|
146
|
+
|
|
147
|
+
until io.eof?
|
|
148
|
+
byte = io.getbyte
|
|
149
|
+
next if byte.nil?
|
|
150
|
+
|
|
151
|
+
stack, stems = process_byte(io, byte, stack, stems)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
stems
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def process_byte(io, byte, stack, stems)
|
|
158
|
+
if operator_byte?(byte)
|
|
159
|
+
apply_operator(io, byte, stack, stems)
|
|
160
|
+
else
|
|
161
|
+
[consume_operand(io, byte, stack), stems]
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def operator_byte?(byte)
|
|
166
|
+
byte <= 31 && byte != 28
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def apply_operator(io, byte, stack, stems)
|
|
170
|
+
case byte
|
|
171
|
+
when 12
|
|
172
|
+
io.getbyte
|
|
173
|
+
[0, stems]
|
|
174
|
+
when HSTEM, VSTEM, HSTEMHM, VSTEMHM
|
|
175
|
+
[0, stems + stack / 2]
|
|
176
|
+
when HINTMASK, CNTRMASK
|
|
177
|
+
new_stems = stems + stack / 2
|
|
178
|
+
io.read((new_stems + 7) / 8)
|
|
179
|
+
[0, new_stems]
|
|
180
|
+
else
|
|
181
|
+
[0, stems]
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def consume_operand(io, byte, stack)
|
|
186
|
+
case byte
|
|
187
|
+
when 28
|
|
188
|
+
io.read(2)
|
|
189
|
+
when 255
|
|
190
|
+
io.read(4)
|
|
191
|
+
when 247..254
|
|
192
|
+
io.getbyte
|
|
193
|
+
end
|
|
194
|
+
stack + 1
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fontisan"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Audit
|
|
7
|
+
module Extractors
|
|
8
|
+
# Identity fields: the human-readable names a font uses to describe
|
|
9
|
+
# itself, drawn from the `name` table (SFNT) or font dictionary
|
|
10
|
+
# (Type 1).
|
|
11
|
+
#
|
|
12
|
+
# Returned fields:
|
|
13
|
+
# family_name, subfamily_name, full_name, postscript_name,
|
|
14
|
+
# version, font_revision
|
|
15
|
+
class Identity < Base
|
|
16
|
+
# @param context [Ucode::Audit::Context]
|
|
17
|
+
# @return [Hash{Symbol=>Object}]
|
|
18
|
+
def extract(context)
|
|
19
|
+
font = context.font
|
|
20
|
+
if font.is_a?(Fontisan::Type1Font)
|
|
21
|
+
type1_identity(font)
|
|
22
|
+
else
|
|
23
|
+
sfnt_identity(font)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def sfnt_identity(font)
|
|
30
|
+
name_table = table(font, "name")
|
|
31
|
+
head_table = table(font, "head")
|
|
32
|
+
|
|
33
|
+
{
|
|
34
|
+
family_name: english_name(name_table, Fontisan::Tables::Name::FAMILY),
|
|
35
|
+
subfamily_name: english_name(name_table, Fontisan::Tables::Name::SUBFAMILY),
|
|
36
|
+
full_name: english_name(name_table, Fontisan::Tables::Name::FULL_NAME),
|
|
37
|
+
postscript_name: english_name(name_table, Fontisan::Tables::Name::POSTSCRIPT_NAME),
|
|
38
|
+
version: english_name(name_table, Fontisan::Tables::Name::VERSION),
|
|
39
|
+
font_revision: head_table&.font_revision,
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def type1_identity(font)
|
|
44
|
+
font_info = font.font_dictionary&.font_info
|
|
45
|
+
{
|
|
46
|
+
family_name: font_info&.family_name,
|
|
47
|
+
subfamily_name: nil,
|
|
48
|
+
full_name: font_info&.full_name,
|
|
49
|
+
postscript_name: font.font_name,
|
|
50
|
+
version: font_info&.version,
|
|
51
|
+
font_revision: nil,
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def table(font, tag)
|
|
56
|
+
font.table(tag) if font.has_table?(tag)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def english_name(name_table, name_id)
|
|
60
|
+
name_table&.english_name(name_id)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fontisan"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Audit
|
|
7
|
+
module Extractors
|
|
8
|
+
# Licensing + embedding permissions + vendor provenance.
|
|
9
|
+
#
|
|
10
|
+
# Returned fields:
|
|
11
|
+
# licensing: Models::Audit::Licensing instance, or nil for Type 1
|
|
12
|
+
#
|
|
13
|
+
# Type 1 fonts have no OS/2 table; their licensing is nil. WOFF/
|
|
14
|
+
# WOFF2 carry the same OS/2 + name tables as TTF/OTF and need no
|
|
15
|
+
# special handling.
|
|
16
|
+
class Licensing < Base
|
|
17
|
+
# nameID → AuditReport field name, per OpenType name table spec.
|
|
18
|
+
NAME_IDS = {
|
|
19
|
+
copyright: 0,
|
|
20
|
+
trademark: 7,
|
|
21
|
+
manufacturer: 8,
|
|
22
|
+
designer: 9,
|
|
23
|
+
description: 10,
|
|
24
|
+
vendor_url: 11,
|
|
25
|
+
designer_url: 12,
|
|
26
|
+
license_description: 13,
|
|
27
|
+
license_url: 14,
|
|
28
|
+
}.freeze
|
|
29
|
+
private_constant :NAME_IDS
|
|
30
|
+
|
|
31
|
+
# @param context [Ucode::Audit::Context]
|
|
32
|
+
# @return [Hash{Symbol=>Object}]
|
|
33
|
+
def extract(context)
|
|
34
|
+
font = context.font
|
|
35
|
+
return { licensing: nil } unless sfnt?(font)
|
|
36
|
+
|
|
37
|
+
os2 = table(font, "OS/2")
|
|
38
|
+
name = table(font, "name")
|
|
39
|
+
|
|
40
|
+
{
|
|
41
|
+
licensing: Models::Audit::Licensing.new(
|
|
42
|
+
**name_fields(name),
|
|
43
|
+
vendor_id: sanitized_vendor_id(os2),
|
|
44
|
+
embedding_type: Models::Audit::EmbeddingType.decode(os2&.fs_type&.to_i),
|
|
45
|
+
fs_selection_flags: Models::Audit::FsSelectionFlags.decode(os2&.fs_selection&.to_i),
|
|
46
|
+
),
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def sfnt?(font)
|
|
53
|
+
font.is_a?(Fontisan::SfntFont)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def table(font, tag)
|
|
57
|
+
font.table(tag) if font.has_table?(tag)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def name_fields(name)
|
|
61
|
+
return {} unless name
|
|
62
|
+
|
|
63
|
+
NAME_IDS.transform_values { |id| name.english_name(id) }
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def sanitized_vendor_id(os2)
|
|
67
|
+
raw = os2&.ach_vend_id
|
|
68
|
+
return nil if raw.nil?
|
|
69
|
+
|
|
70
|
+
raw.gsub(/[\x00\s]+$/, "")
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fontisan"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Audit
|
|
7
|
+
module Extractors
|
|
8
|
+
# Layout-critical metrics consolidated from head, hhea, OS/2, post.
|
|
9
|
+
#
|
|
10
|
+
# Returned fields:
|
|
11
|
+
# metrics: Models::Audit::Metrics instance, or nil for Type 1
|
|
12
|
+
#
|
|
13
|
+
# All table reads are nil-safe; tables may be absent in stripped
|
|
14
|
+
# WOFF builds or legacy formats.
|
|
15
|
+
class Metrics < Base
|
|
16
|
+
# @param context [Ucode::Audit::Context]
|
|
17
|
+
# @return [Hash{Symbol=>Object}]
|
|
18
|
+
def extract(context)
|
|
19
|
+
font = context.font
|
|
20
|
+
return { metrics: nil } unless sfnt?(font)
|
|
21
|
+
|
|
22
|
+
{ metrics: Models::Audit::Metrics.new(**gather(font)) }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def sfnt?(font)
|
|
28
|
+
font.is_a?(Fontisan::SfntFont)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def gather(font)
|
|
32
|
+
head_fields(font)
|
|
33
|
+
.merge(hhea_fields(font))
|
|
34
|
+
.merge(os2_fields(font))
|
|
35
|
+
.merge(post_fields(font))
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def head_fields(font)
|
|
39
|
+
head = table(font, "head")
|
|
40
|
+
return {} unless head
|
|
41
|
+
|
|
42
|
+
{
|
|
43
|
+
units_per_em: head.units_per_em&.to_i,
|
|
44
|
+
bbox_x_min: head.x_min&.to_i,
|
|
45
|
+
bbox_y_min: head.y_min&.to_i,
|
|
46
|
+
bbox_x_max: head.x_max&.to_i,
|
|
47
|
+
bbox_y_max: head.y_max&.to_i,
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def hhea_fields(font)
|
|
52
|
+
hhea = table(font, "hhea")
|
|
53
|
+
return {} unless hhea
|
|
54
|
+
|
|
55
|
+
{
|
|
56
|
+
hhea_ascent: hhea.ascent&.to_i,
|
|
57
|
+
hhea_descent: hhea.descent&.to_i,
|
|
58
|
+
hhea_line_gap: hhea.line_gap&.to_i,
|
|
59
|
+
}
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# OS/2 table fields exposed on Metrics, as
|
|
63
|
+
# `Metrics attribute name` => `OS/2 reader method`.
|
|
64
|
+
OS2_FIELDS = {
|
|
65
|
+
typo_ascender: :s_typo_ascender,
|
|
66
|
+
typo_descender: :s_typo_descender,
|
|
67
|
+
typo_line_gap: :s_typo_line_gap,
|
|
68
|
+
win_ascent: :us_win_ascent,
|
|
69
|
+
win_descent: :us_win_descent,
|
|
70
|
+
x_height: :sx_height,
|
|
71
|
+
cap_height: :s_cap_height,
|
|
72
|
+
subscript_x_size: :y_subscript_x_size,
|
|
73
|
+
subscript_y_size: :y_subscript_y_size,
|
|
74
|
+
subscript_x_offset: :y_subscript_x_offset,
|
|
75
|
+
subscript_y_offset: :y_subscript_y_offset,
|
|
76
|
+
superscript_x_size: :y_superscript_x_size,
|
|
77
|
+
superscript_y_size: :y_superscript_y_size,
|
|
78
|
+
superscript_x_offset: :y_superscript_x_offset,
|
|
79
|
+
superscript_y_offset: :y_superscript_y_offset,
|
|
80
|
+
strikeout_size: :y_strikeout_size,
|
|
81
|
+
strikeout_position: :y_strikeout_position,
|
|
82
|
+
}.freeze
|
|
83
|
+
private_constant :OS2_FIELDS
|
|
84
|
+
|
|
85
|
+
def os2_fields(font)
|
|
86
|
+
os2 = table(font, "OS/2")
|
|
87
|
+
return {} unless os2
|
|
88
|
+
|
|
89
|
+
OS2_FIELDS.transform_values { |reader| os2.public_send(reader)&.to_i }
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def post_fields(font)
|
|
93
|
+
post = table(font, "post")
|
|
94
|
+
return {} unless post
|
|
95
|
+
|
|
96
|
+
{
|
|
97
|
+
underline_position: post.underline_position&.to_f,
|
|
98
|
+
underline_thickness: post.underline_thickness&.to_f,
|
|
99
|
+
}
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def table(font, tag)
|
|
103
|
+
font.table(tag) if font.has_table?(tag)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|