ucode 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/Gemfile.lock +2 -2
- data/TODO.full/00-README.md +116 -0
- data/TODO.full/01-panglyph-vision.md +112 -0
- data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
- data/TODO.full/03-panglyph-font-builder.md +201 -0
- data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
- data/TODO.full/05-ucode-0-1-1-release.md +139 -0
- data/TODO.full/06-fontisan-remove-audit.md +142 -0
- data/TODO.full/07-fontisan-remove-ucd.md +125 -0
- data/TODO.full/08-archive-private-bin-build.md +143 -0
- data/TODO.full/09-archive-public-structure.md +164 -0
- data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
- data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
- data/TODO.full/12-implementation-order.md +216 -0
- data/TODO.full/13-fontisan-font-writer-api.md +189 -0
- data/TODO.full/14-fontisan-table-writers.md +66 -0
- data/TODO.full/15-panglyph-builder-real.md +82 -0
- data/TODO.full/16-archive-public-sync-workflows.md +167 -0
- data/TODO.full/17-fontist-org-font-picker.md +73 -0
- data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
- data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
- data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
- data/TODO.new/00-README.md +30 -0
- data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
- data/TODO.new/24-universal-glyph-set-build.md +189 -0
- data/TODO.new/25-font-audit-against-universal-set.md +195 -0
- data/TODO.new/26-missing-glyph-reporter.md +189 -0
- data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
- data/TODO.new/28-implementation-order-update.md +187 -0
- data/TODO.new/29-universal-set-curation-uc17.md +312 -0
- data/TODO.new/30-tier1-font-acquisition.md +241 -0
- data/TODO.new/31-universal-set-production-build.md +205 -0
- data/TODO.new/32-uc17-coverage-matrix.md +165 -0
- data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
- data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
- data/TODO.new/35-universal-set-production-run.md +160 -0
- data/TODO.new/36-per-font-coverage-audit.md +145 -0
- data/TODO.new/37-coverage-highlight-reporter.md +125 -0
- data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
- data/TODO.new/39-implementation-order-update-32-38.md +258 -0
- data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
- data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
- data/config/specialist_fonts.yml +102 -0
- data/config/unicode17_tier1_fonts.yml +42 -0
- data/config/unicode17_universal_glyph_set.yml +293 -0
- data/lib/ucode/audit/block_aggregator.rb +57 -29
- data/lib/ucode/audit/browser/face_page.rb +128 -0
- data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
- data/lib/ucode/audit/browser/library_page.rb +74 -0
- data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
- data/lib/ucode/audit/browser/template.rb +47 -0
- data/lib/ucode/audit/browser/templates/face.css +200 -0
- data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
- data/lib/ucode/audit/browser/templates/face.js +298 -0
- data/lib/ucode/audit/browser/templates/library.css +119 -0
- data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
- data/lib/ucode/audit/browser/templates/library.js +99 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
- data/lib/ucode/audit/browser.rb +32 -0
- data/lib/ucode/audit/context.rb +27 -1
- data/lib/ucode/audit/coverage_reference.rb +103 -0
- data/lib/ucode/audit/differ.rb +121 -0
- data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
- data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
- data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
- data/lib/ucode/audit/emitter/face_directory.rb +212 -0
- data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
- data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
- data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
- data/lib/ucode/audit/emitter/paths.rb +312 -0
- data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
- data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
- data/lib/ucode/audit/emitter.rb +29 -0
- data/lib/ucode/audit/extractors/aggregations.rb +31 -2
- data/lib/ucode/audit/face_auditor.rb +86 -0
- data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
- data/lib/ucode/audit/formatters/audit_text.rb +411 -0
- data/lib/ucode/audit/formatters/color.rb +48 -0
- data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
- data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
- data/lib/ucode/audit/formatters.rb +23 -0
- data/lib/ucode/audit/library_aggregator.rb +86 -0
- data/lib/ucode/audit/library_auditor.rb +105 -0
- data/lib/ucode/audit/release/emitter.rb +152 -0
- data/lib/ucode/audit/release/face_card.rb +93 -0
- data/lib/ucode/audit/release/formula_audits.rb +50 -0
- data/lib/ucode/audit/release/library_index_builder.rb +78 -0
- data/lib/ucode/audit/release/manifest_builder.rb +127 -0
- data/lib/ucode/audit/release.rb +42 -0
- data/lib/ucode/audit/ucd_only_reference.rb +81 -0
- data/lib/ucode/audit/universal_set_reference.rb +136 -0
- data/lib/ucode/audit.rb +31 -0
- data/lib/ucode/cli.rb +339 -33
- data/lib/ucode/commands/audit/browser_command.rb +82 -0
- data/lib/ucode/commands/audit/collection_command.rb +103 -0
- data/lib/ucode/commands/audit/compare_command.rb +188 -0
- data/lib/ucode/commands/audit/font_command.rb +140 -0
- data/lib/ucode/commands/audit/library_command.rb +87 -0
- data/lib/ucode/commands/audit/reference_builder.rb +64 -0
- data/lib/ucode/commands/audit.rb +20 -0
- data/lib/ucode/commands/block_feed.rb +73 -0
- data/lib/ucode/commands/canonical_build.rb +138 -0
- data/lib/ucode/commands/fetch.rb +37 -1
- data/lib/ucode/commands/release.rb +115 -0
- data/lib/ucode/commands/universal_set.rb +211 -0
- data/lib/ucode/commands.rb +5 -0
- data/lib/ucode/coordinator/indices.rb +11 -0
- data/lib/ucode/coordinator.rb +138 -5
- data/lib/ucode/error.rb +30 -2
- data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
- data/lib/ucode/fetch/font_fetcher.rb +16 -0
- data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
- data/lib/ucode/fetch.rb +7 -3
- data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
- data/lib/ucode/glyphs/real_fonts.rb +1 -0
- data/lib/ucode/glyphs/resolver.rb +62 -0
- data/lib/ucode/glyphs/source.rb +48 -0
- data/lib/ucode/glyphs/source_builder.rb +61 -0
- data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
- data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
- data/lib/ucode/glyphs/source_config.rb +104 -0
- data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
- data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
- data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
- data/lib/ucode/glyphs/sources.rb +20 -0
- data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
- data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
- data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
- data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
- data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
- data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
- data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
- data/lib/ucode/glyphs/universal_set.rb +45 -0
- data/lib/ucode/glyphs.rb +6 -0
- data/lib/ucode/models/audit/baseline.rb +6 -0
- data/lib/ucode/models/audit/block_summary.rb +7 -0
- data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
- data/lib/ucode/models/audit/release_face.rb +42 -0
- data/lib/ucode/models/audit/release_formula.rb +33 -0
- data/lib/ucode/models/audit/release_manifest.rb +43 -0
- data/lib/ucode/models/audit/release_universal_set.rb +37 -0
- data/lib/ucode/models/audit.rb +9 -0
- data/lib/ucode/models/block.rb +2 -0
- data/lib/ucode/models/build_report.rb +109 -0
- data/lib/ucode/models/codepoint/glyph.rb +42 -0
- data/lib/ucode/models/codepoint.rb +3 -0
- data/lib/ucode/models/glyph_source.rb +86 -0
- data/lib/ucode/models/glyph_source_map.rb +138 -0
- data/lib/ucode/models/specialist_font.rb +70 -0
- data/lib/ucode/models/specialist_font_manifest.rb +48 -0
- data/lib/ucode/models/unihan_entry.rb +81 -9
- data/lib/ucode/models/unihan_field.rb +21 -0
- data/lib/ucode/models/universal_set_entry.rb +47 -0
- data/lib/ucode/models/universal_set_manifest.rb +78 -0
- data/lib/ucode/models/validation_report.rb +99 -0
- data/lib/ucode/models.rb +9 -0
- data/lib/ucode/parsers/named_sequences.rb +5 -5
- data/lib/ucode/parsers/unihan.rb +50 -19
- data/lib/ucode/repo/aggregate_writer.rb +34 -2
- data/lib/ucode/repo/block_feed_emitter.rb +153 -0
- data/lib/ucode/repo/build_report_accumulator.rb +138 -0
- data/lib/ucode/repo/build_report_writer.rb +46 -0
- data/lib/ucode/repo/build_validator.rb +229 -0
- data/lib/ucode/repo/codepoint_writer.rb +50 -1
- data/lib/ucode/repo/paths.rb +8 -0
- data/lib/ucode/repo.rb +4 -0
- data/lib/ucode/version.rb +1 -1
- data/schema/block-feed.output.schema.yml +134 -0
- metadata +143 -2
- data/ucode.gemspec +0 -56
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="utf-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
6
|
+
<title><%= block_name %> — missing glyphs — ucode audit</title>
|
|
7
|
+
<style>
|
|
8
|
+
<%= _css %>
|
|
9
|
+
</style>
|
|
10
|
+
</head>
|
|
11
|
+
<body data-universal-set-available="<%= universal_set_available %>">
|
|
12
|
+
<header class="page-header">
|
|
13
|
+
<h1><%= block_name %></h1>
|
|
14
|
+
<p class="tagline">
|
|
15
|
+
<%= total_count %> missing codepoint<%= total_count == 1 ? "" : "s" %>
|
|
16
|
+
<% if overflow_count.positive? %>
|
|
17
|
+
— showing first <%= visible_count %>
|
|
18
|
+
<% end %>
|
|
19
|
+
<% unless universal_set_available %>
|
|
20
|
+
— universal-set glyphs unavailable
|
|
21
|
+
<% end %>
|
|
22
|
+
</p>
|
|
23
|
+
</header>
|
|
24
|
+
|
|
25
|
+
<main>
|
|
26
|
+
<% if panels.empty? %>
|
|
27
|
+
<p class="hint">No missing codepoints in this block.</p>
|
|
28
|
+
<% else %>
|
|
29
|
+
<ul class="glyph-grid">
|
|
30
|
+
<% panels.each do |panel| %>
|
|
31
|
+
<li class="glyph-cell" data-codepoint="<%= panel["codepoint"] %>">
|
|
32
|
+
<div class="glyph-thumb">
|
|
33
|
+
<% if panel["svg"] %>
|
|
34
|
+
<%= panel["svg"] %>
|
|
35
|
+
<% else %>
|
|
36
|
+
<span class="glyph-na">n/a</span>
|
|
37
|
+
<% end %>
|
|
38
|
+
</div>
|
|
39
|
+
<div class="glyph-meta">
|
|
40
|
+
<span class="cp-id"><%= panel["id"] %></span>
|
|
41
|
+
<% if panel["source"] %>
|
|
42
|
+
<span class="source"><%= panel["source"] %></span>
|
|
43
|
+
<% end %>
|
|
44
|
+
</div>
|
|
45
|
+
</li>
|
|
46
|
+
<% end %>
|
|
47
|
+
</ul>
|
|
48
|
+
<% if overflow_count.positive? %>
|
|
49
|
+
<p class="overflow">+<%= overflow_count %> more codepoints not shown — see the face browser for the full list.</p>
|
|
50
|
+
<% end %>
|
|
51
|
+
<% end %>
|
|
52
|
+
</main>
|
|
53
|
+
|
|
54
|
+
<script>
|
|
55
|
+
<%= _js %>
|
|
56
|
+
</script>
|
|
57
|
+
</body>
|
|
58
|
+
</html>
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Audit
|
|
7
|
+
# Standalone HTML browsers for Mode 2 audit output.
|
|
8
|
+
#
|
|
9
|
+
# Two pages:
|
|
10
|
+
#
|
|
11
|
+
# - {Browser::FacePage} — one face's audit, fully self-contained
|
|
12
|
+
# (no external CSS/JS), with JSON inlined for instant render via
|
|
13
|
+
# `file://`. Block expansion and codepoint detail lazy-fetch
|
|
14
|
+
# the chunks emitted by {Emitter::FaceDirectory}.
|
|
15
|
+
# - {Browser::LibraryPage} — one library's index, also self-contained,
|
|
16
|
+
# with cards linking into each face page.
|
|
17
|
+
#
|
|
18
|
+
# Both pages reuse the chunk files emitted by {Emitter} — they don't
|
|
19
|
+
# duplicate the JSON, they just inline the overview slice that the
|
|
20
|
+
# initial render needs.
|
|
21
|
+
module Browser
|
|
22
|
+
TEMPLATE_DIR = Pathname.new(__dir__).join("browser/templates")
|
|
23
|
+
private_constant :TEMPLATE_DIR
|
|
24
|
+
|
|
25
|
+
autoload :Template, "ucode/audit/browser/template"
|
|
26
|
+
autoload :FacePage, "ucode/audit/browser/face_page"
|
|
27
|
+
autoload :LibraryPage, "ucode/audit/browser/library_page"
|
|
28
|
+
autoload :GlyphPanel, "ucode/audit/browser/glyph_panel"
|
|
29
|
+
autoload :MissingGlyphPage, "ucode/audit/browser/missing_glyph_page"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
data/lib/ucode/audit/context.rb
CHANGED
|
@@ -39,14 +39,21 @@ module Ucode
|
|
|
39
39
|
# @param options [Hash{Symbol=>Object}] audit options (ucd_version,
|
|
40
40
|
# all_codepoints, with_glyphs, etc.).
|
|
41
41
|
# @param renderer [Object, nil] glyph renderer for --with-glyphs mode.
|
|
42
|
+
# @param reference [CoverageReference, nil] the baseline the
|
|
43
|
+
# audit compares the font's cmap against. When nil, defaults
|
|
44
|
+
# to a {UcdOnlyReference} built from the resolved baseline
|
|
45
|
+
# database (TODO 25). Passing a {UniversalSetReference}
|
|
46
|
+
# attaches per-codepoint provenance to every missing-codepoint
|
|
47
|
+
# row.
|
|
42
48
|
def initialize(font:, font_path:, font_index:, num_fonts_in_source:,
|
|
43
|
-
options:, renderer: nil)
|
|
49
|
+
options:, renderer: nil, reference: nil)
|
|
44
50
|
@font = font
|
|
45
51
|
@font_path = font_path
|
|
46
52
|
@font_index = font_index
|
|
47
53
|
@num_fonts_in_source = num_fonts_in_source
|
|
48
54
|
@options = options
|
|
49
55
|
@renderer = renderer
|
|
56
|
+
@reference_override = reference
|
|
50
57
|
end
|
|
51
58
|
|
|
52
59
|
# Codepoints the font's cmap actually maps. Memoized.
|
|
@@ -63,6 +70,18 @@ module Ucode
|
|
|
63
70
|
@baseline ||= resolve_baseline
|
|
64
71
|
end
|
|
65
72
|
|
|
73
|
+
# The {CoverageReference} the audit compares against. Defaults
|
|
74
|
+
# to a {UcdOnlyReference} built from the resolved baseline
|
|
75
|
+
# database. When the caller supplied a reference at construction
|
|
76
|
+
# (typically a {UniversalSetReference}), that one is used
|
|
77
|
+
# verbatim. Memoized.
|
|
78
|
+
#
|
|
79
|
+
# @return [CoverageReference, nil] nil when the baseline itself
|
|
80
|
+
# couldn't be resolved (database missing).
|
|
81
|
+
def reference
|
|
82
|
+
@reference ||= @reference_override || build_default_reference
|
|
83
|
+
end
|
|
84
|
+
|
|
66
85
|
# Detected source format string ("ttf", "otf", "ttc", ...). Memoized.
|
|
67
86
|
# @return [String, nil]
|
|
68
87
|
def source_format
|
|
@@ -132,6 +151,13 @@ module Ucode
|
|
|
132
151
|
generated_at: Time.now.utc.iso8601,
|
|
133
152
|
)
|
|
134
153
|
end
|
|
154
|
+
|
|
155
|
+
def build_default_reference
|
|
156
|
+
database = baseline.database
|
|
157
|
+
return nil if database.nil?
|
|
158
|
+
|
|
159
|
+
UcdOnlyReference.new(database: database)
|
|
160
|
+
end
|
|
135
161
|
end
|
|
136
162
|
end
|
|
137
163
|
end
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Audit
|
|
5
|
+
# Common interface for any "what is the assigned codepoint set"
|
|
6
|
+
# reference used by the audit pipeline.
|
|
7
|
+
#
|
|
8
|
+
# Two implementations:
|
|
9
|
+
#
|
|
10
|
+
# - {UcdOnlyReference} — derives the assigned set from the UCD
|
|
11
|
+
# database alone (block ranges). Carries no per-codepoint
|
|
12
|
+
# provenance. This is the legacy behaviour: a font audit
|
|
13
|
+
# compares against the abstract Unicode assigned-codepoint list.
|
|
14
|
+
#
|
|
15
|
+
# - {UniversalSetReference} — derives the assigned set from a
|
|
16
|
+
# universal-set manifest (TODO 24). Every codepoint carries
|
|
17
|
+
# tier + source provenance, so a missing-codepoint report can
|
|
18
|
+
# answer "what does the missing glyph look like, and where did
|
|
19
|
+
# the universal set get it from?".
|
|
20
|
+
#
|
|
21
|
+
# The audit pipeline (Context → Aggregations extractor →
|
|
22
|
+
# BlockAggregator) talks exclusively to this interface. Adding a
|
|
23
|
+
# new reference kind = one new subclass; no caller changes
|
|
24
|
+
# (open/closed).
|
|
25
|
+
class CoverageReference
|
|
26
|
+
# Immutable per-codepoint row exposed by every reference. The
|
|
27
|
+
# `tier` and `source` fields are nil for references that don't
|
|
28
|
+
# carry provenance (e.g. {UcdOnlyReference}).
|
|
29
|
+
Entry = Struct.new(:codepoint, :id, :tier, :source, keyword_init: true) do
|
|
30
|
+
# True when this entry carries provenance from a universal-set
|
|
31
|
+
# manifest. False for UCD-only references.
|
|
32
|
+
def provenance?
|
|
33
|
+
!tier.nil? || !source.nil?
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def initialize; end
|
|
38
|
+
|
|
39
|
+
# Symbol identifying the reference kind. Used by the audit
|
|
40
|
+
# report's `baseline.reference_kind` field so consumers know
|
|
41
|
+
# which reference produced the per-block counts.
|
|
42
|
+
#
|
|
43
|
+
# @return [Symbol] e.g. :ucd, :universal_set
|
|
44
|
+
def kind
|
|
45
|
+
raise NotImplementedError
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @param codepoint [Integer]
|
|
49
|
+
# @return [Boolean] true if the codepoint is in the reference set
|
|
50
|
+
def include?(codepoint)
|
|
51
|
+
raise NotImplementedError
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Block name (verbatim Unicode identifier, e.g. "Basic_Latin")
|
|
55
|
+
# the codepoint falls under, or nil if it isn't in any known
|
|
56
|
+
# block. Used by {BlockAggregator} to group a font's cmap by
|
|
57
|
+
# block without needing direct access to the underlying
|
|
58
|
+
# {Ucode::Database}.
|
|
59
|
+
#
|
|
60
|
+
# @param codepoint [Integer]
|
|
61
|
+
# @return [String, nil]
|
|
62
|
+
def block_name_for(codepoint)
|
|
63
|
+
raise NotImplementedError
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Every assigned codepoint in the block, with tier + source
|
|
67
|
+
# attached when the reference carries provenance.
|
|
68
|
+
#
|
|
69
|
+
# @param block_id [String] verbatim Unicode block name
|
|
70
|
+
# (e.g. "Basic_Latin", "Greek_and_Coptic")
|
|
71
|
+
# @return [Array<Entry>] sorted by codepoint; empty for unknown
|
|
72
|
+
# block names or blocks with no assigned codepoints
|
|
73
|
+
def entries_for_block(block_id)
|
|
74
|
+
raise NotImplementedError
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Stable identifier for the reference, embedded in audit reports
|
|
78
|
+
# so consumers can detect drift. Examples:
|
|
79
|
+
#
|
|
80
|
+
# "ucd:17.0.0"
|
|
81
|
+
# "universal-set:17.0.0:abc12345"
|
|
82
|
+
#
|
|
83
|
+
# @return [String]
|
|
84
|
+
def reference_id
|
|
85
|
+
raise NotImplementedError
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Provenance rows for a list of codepoints, or nil when the
|
|
89
|
+
# reference carries no provenance (UCD-only). Returning nil
|
|
90
|
+
# (rather than an empty array) is the signal that the audit
|
|
91
|
+
# report should omit the `missing_codepoint_provenance` field
|
|
92
|
+
# entirely — preserving the legacy wire shape for UCD-only
|
|
93
|
+
# audits.
|
|
94
|
+
#
|
|
95
|
+
# @param codepoints [Enumerable<Integer>]
|
|
96
|
+
# @return [Array<Hash{Symbol=>Object}>, nil] one hash per
|
|
97
|
+
# codepoint with `:codepoint`, `:tier`, `:source` keys; or nil
|
|
98
|
+
def provenance_for(codepoints)
|
|
99
|
+
raise NotImplementedError
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Audit
|
|
5
|
+
# Computes a {Models::Audit::AuditDiff} between two AuditReports.
|
|
6
|
+
#
|
|
7
|
+
# Pure: no I/O, no font parsing. Both reports must already be built
|
|
8
|
+
# — the CLI's compare command loads them from disk or audits fresh
|
|
9
|
+
# fonts before invoking the differ.
|
|
10
|
+
#
|
|
11
|
+
# Comparison shape:
|
|
12
|
+
# - Scalar fields: one {Models::Audit::FieldChange} per differing
|
|
13
|
+
# field.
|
|
14
|
+
# - Codepoint coverage: {Models::Audit::CodepointSetDiff} built
|
|
15
|
+
# from the cmap range lists (expanded to integer sets for set
|
|
16
|
+
# arithmetic, then re-coalesced to ranges for output).
|
|
17
|
+
# - Structural inventories (features, scripts, blocks): simple
|
|
18
|
+
# array set-diffs. ucode drops the CLDR languages diff that
|
|
19
|
+
# fontisan carries (CLDR is out of scope here).
|
|
20
|
+
class Differ
|
|
21
|
+
# Scalar AuditReport fields compared field-by-field. Excludes
|
|
22
|
+
# generated_at / source_sha256 / source_file (per-report identity),
|
|
23
|
+
# codepoints / codepoint_ranges (handled via CodepointSetDiff),
|
|
24
|
+
# and nested models (surfaced via structural add/remove lists).
|
|
25
|
+
COMPARED_FIELDS = %i[
|
|
26
|
+
family_name subfamily_name full_name postscript_name version
|
|
27
|
+
font_revision weight_class width_class italic bold panose
|
|
28
|
+
total_codepoints total_glyphs
|
|
29
|
+
].freeze
|
|
30
|
+
|
|
31
|
+
# @param left_report [Models::Audit::AuditReport]
|
|
32
|
+
# @param right_report [Models::Audit::AuditReport]
|
|
33
|
+
def initialize(left_report, right_report)
|
|
34
|
+
@left = left_report
|
|
35
|
+
@right = right_report
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @return [Models::Audit::AuditDiff]
|
|
39
|
+
def diff
|
|
40
|
+
Models::Audit::AuditDiff.new(
|
|
41
|
+
left_source: @left.source_file,
|
|
42
|
+
right_source: @right.source_file,
|
|
43
|
+
field_changes: field_changes,
|
|
44
|
+
codepoints: codepoint_diff,
|
|
45
|
+
added_features: set_diff(features(@right), features(@left)),
|
|
46
|
+
removed_features: set_diff(features(@left), features(@right)),
|
|
47
|
+
added_scripts: set_diff(scripts(@right), scripts(@left)),
|
|
48
|
+
removed_scripts: set_diff(scripts(@left), scripts(@right)),
|
|
49
|
+
added_blocks: set_diff(blocks(@right), blocks(@left)),
|
|
50
|
+
removed_blocks: set_diff(blocks(@left), blocks(@right)),
|
|
51
|
+
)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def field_changes
|
|
57
|
+
COMPARED_FIELDS.filter_map do |field|
|
|
58
|
+
left_val = @left.public_send(field)
|
|
59
|
+
right_val = @right.public_send(field)
|
|
60
|
+
next if left_val == right_val
|
|
61
|
+
|
|
62
|
+
Models::Audit::FieldChange.new(
|
|
63
|
+
field: field.to_s,
|
|
64
|
+
left: serialize_value(left_val),
|
|
65
|
+
right: serialize_value(right_val),
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def codepoint_diff
|
|
71
|
+
left_set = codepoints_from_ranges(@left)
|
|
72
|
+
right_set = codepoints_from_ranges(@right)
|
|
73
|
+
added = right_set - left_set
|
|
74
|
+
removed = left_set - right_set
|
|
75
|
+
unchanged = left_set & right_set
|
|
76
|
+
|
|
77
|
+
Models::Audit::CodepointSetDiff.new(
|
|
78
|
+
added: CodepointRangeCoalescer.call(added.to_a),
|
|
79
|
+
removed: CodepointRangeCoalescer.call(removed.to_a),
|
|
80
|
+
added_count: added.size,
|
|
81
|
+
removed_count: removed.size,
|
|
82
|
+
unchanged_count: unchanged.size,
|
|
83
|
+
)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Expand a report's compact codepoint range list into a Set<Integer>.
|
|
87
|
+
def codepoints_from_ranges(report)
|
|
88
|
+
ranges = report.codepoint_ranges || []
|
|
89
|
+
ranges.each_with_object(Set.new) do |range, set|
|
|
90
|
+
(range.first_cp..range.last_cp).each { |cp| set << cp }
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def features(report)
|
|
95
|
+
Array(report.opentype_layout&.features)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# ucode's report carries ScriptSummary[] (structured), not String[].
|
|
99
|
+
# Diff on the script_code key — it's the stable identifier.
|
|
100
|
+
def scripts(report)
|
|
101
|
+
Array(report.scripts).map(&:script_code)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def blocks(report)
|
|
105
|
+
Array(report.blocks).map(&:name)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def set_diff(minuend, subtrahend)
|
|
109
|
+
(Array(minuend) - Array(subtrahend)).sort
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def serialize_value(value)
|
|
113
|
+
case value
|
|
114
|
+
when nil then ""
|
|
115
|
+
when String, Integer, Float, true, false then value.to_s
|
|
116
|
+
else value.to_yaml
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/repo/atomic_writes"
|
|
6
|
+
require "ucode/audit/emitter/paths"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Audit
|
|
10
|
+
module Emitter
|
|
11
|
+
# Writes `<face_dir>/blocks/<NAME>.json` — one file per touched
|
|
12
|
+
# block.
|
|
13
|
+
#
|
|
14
|
+
# The filename uses the block name verbatim (no slugifying) per
|
|
15
|
+
# `03-directory-output-spec.md` §"Block filename encoding". The
|
|
16
|
+
# only character that needs escaping is `/`, which Unicode block
|
|
17
|
+
# names never contain today.
|
|
18
|
+
#
|
|
19
|
+
# Each file is a single BlockSummary serialized via lutaml-model.
|
|
20
|
+
# The browser fetches these lazily when the user expands a block
|
|
21
|
+
# in the coverage map.
|
|
22
|
+
class BlockEmitter
|
|
23
|
+
include Ucode::Repo::AtomicWrites
|
|
24
|
+
|
|
25
|
+
# @param face_dir [String, Pathname]
|
|
26
|
+
# @param block [Models::Audit::BlockSummary]
|
|
27
|
+
# @return [Boolean] true if written, false if skipped
|
|
28
|
+
def emit(face_dir, block)
|
|
29
|
+
path = Paths.block_under(face_dir, encode_name(block.name))
|
|
30
|
+
write_atomic(path, to_pretty_json(serialize_block(block)))
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
# Spec: per-block `missing_codepoints` is always embedded even
|
|
36
|
+
# when empty. lutaml-model omits empty arrays by default, so we
|
|
37
|
+
# restore the key post-serialization.
|
|
38
|
+
def serialize_block(block)
|
|
39
|
+
block.to_hash.tap do |hash|
|
|
40
|
+
hash["missing_codepoints"] = block.missing_codepoints
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Unicode block names are filesystem-safe as-is (no slashes).
|
|
45
|
+
# This is a defensive guard.
|
|
46
|
+
def encode_name(name)
|
|
47
|
+
name.to_s.tr("/", "_")
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/repo/atomic_writes"
|
|
6
|
+
require "ucode/audit/emitter/paths"
|
|
7
|
+
require "ucode/models/audit/codepoint_detail"
|
|
8
|
+
|
|
9
|
+
module Ucode
|
|
10
|
+
module Audit
|
|
11
|
+
module Emitter
|
|
12
|
+
# Writes `<face_dir>/codepoints/<NAME>.json` — the verbose
|
|
13
|
+
# per-block codepoint detail list, emitted only in `--verbose`
|
|
14
|
+
# mode.
|
|
15
|
+
#
|
|
16
|
+
# For each touched block, walks the font's covered codepoints in
|
|
17
|
+
# that block and emits a {Models::Audit::CodepointDetail} per row,
|
|
18
|
+
# enriched with UCD metadata (name, gc, script, age) via the
|
|
19
|
+
# supplied {Ucode::Database}.
|
|
20
|
+
#
|
|
21
|
+
# Per-block chunking keeps each file under ~1MB even for CJK
|
|
22
|
+
# Extension J (~4,300 codepoints × ~200 bytes/detail ≈ 850KB).
|
|
23
|
+
#
|
|
24
|
+
# Glyph SVG paths are written as relative URIs so the browser can
|
|
25
|
+
# fetch each glyph on click. The `with_glyph_paths` flag controls
|
|
26
|
+
# whether to populate the path field — when false, the field is
|
|
27
|
+
# omitted entirely.
|
|
28
|
+
class CodepointEmitter
|
|
29
|
+
include Ucode::Repo::AtomicWrites
|
|
30
|
+
|
|
31
|
+
# @param face_dir [String, Pathname]
|
|
32
|
+
# @param block [Models::Audit::BlockSummary]
|
|
33
|
+
# @param database [Ucode::Database, nil] baseline lookup; when
|
|
34
|
+
# nil, UCD fields are omitted
|
|
35
|
+
# @param with_glyph_paths [Boolean] when true, each detail
|
|
36
|
+
# includes a relative `glyph_svg_path` linking into `glyphs/`
|
|
37
|
+
# @return [Boolean] true if written, false if skipped
|
|
38
|
+
def emit(face_dir, block, database: nil, with_glyph_paths: false)
|
|
39
|
+
path = Paths.codepoints_under(face_dir, encode_name(block.name))
|
|
40
|
+
payload = to_pretty_json(build_chunk(block, database, with_glyph_paths))
|
|
41
|
+
write_atomic(path, payload)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def build_chunk(block, database, with_glyph_paths)
|
|
47
|
+
{
|
|
48
|
+
"block_name" => block.name,
|
|
49
|
+
"first_cp" => block.first_cp,
|
|
50
|
+
"last_cp" => block.last_cp,
|
|
51
|
+
"codepoints" => build_details(block, database, with_glyph_paths),
|
|
52
|
+
}
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def build_details(block, database, with_glyph_paths)
|
|
56
|
+
block.covered_codepoints.map do |cp|
|
|
57
|
+
build_detail(cp, block, database, with_glyph_paths)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def build_detail(codepoint, block, database, with_glyph_paths)
|
|
62
|
+
detail = Models::Audit::CodepointDetail.new(
|
|
63
|
+
codepoint: codepoint,
|
|
64
|
+
block_name: block.name,
|
|
65
|
+
)
|
|
66
|
+
enrich_from_baseline(detail, codepoint, database)
|
|
67
|
+
detail.glyph_svg_path = glyph_relative_path(codepoint) if with_glyph_paths
|
|
68
|
+
detail.to_hash.compact
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def enrich_from_baseline(detail, codepoint, database)
|
|
72
|
+
return unless database
|
|
73
|
+
|
|
74
|
+
detail.script = database.lookup_script(codepoint)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def glyph_relative_path(codepoint)
|
|
78
|
+
"glyphs/#{format('U+%04X', codepoint)}.svg"
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def encode_name(name)
|
|
82
|
+
name.to_s.tr("/", "_")
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/repo/atomic_writes"
|
|
6
|
+
require "ucode/audit/emitter/paths"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Audit
|
|
10
|
+
module Emitter
|
|
11
|
+
# Writes the per-collection layout for TTC/OTC inputs.
|
|
12
|
+
#
|
|
13
|
+
# For a collection with N faces, produces:
|
|
14
|
+
#
|
|
15
|
+
# output/font_audit/<source_label>/
|
|
16
|
+
# ├── index.json # collection-level summary
|
|
17
|
+
# ├── 00-<face_ps>/index.json
|
|
18
|
+
# ├── 00-<face_ps>/blocks/…
|
|
19
|
+
# ├── 01-<face_ps>/index.json
|
|
20
|
+
# └── …
|
|
21
|
+
#
|
|
22
|
+
# Per-face chunks are delegated to {FaceDirectory} via the
|
|
23
|
+
# `emit_collection_face` hook; this class owns only the
|
|
24
|
+
# collection-level summary that points at each sibling face
|
|
25
|
+
# directory.
|
|
26
|
+
class CollectionEmitter
|
|
27
|
+
include Ucode::Repo::AtomicWrites
|
|
28
|
+
|
|
29
|
+
# @param output_root [String, Pathname]
|
|
30
|
+
# @param source_label [String] sanitized collection label
|
|
31
|
+
# @param reports [Array<Models::Audit::AuditReport>] one per face
|
|
32
|
+
# @param face_directory [FaceDirectory] per-face emitter
|
|
33
|
+
# @return [Array<String>] the per-face subdirectory names written
|
|
34
|
+
def emit(output_root, source_label, reports, face_directory:)
|
|
35
|
+
face_dirs = reports.each_with_index.map do |report, index|
|
|
36
|
+
face_directory.emit_collection_face(
|
|
37
|
+
source_label: source_label, face_index: index, report: report,
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
emit_collection_index(output_root, source_label, reports, face_dirs)
|
|
42
|
+
face_dirs
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def emit_collection_index(output_root, source_label, reports, face_dirs)
|
|
48
|
+
return if reports.empty?
|
|
49
|
+
|
|
50
|
+
payload = build_collection_index(reports, face_dirs)
|
|
51
|
+
path = Paths.face_index_path(output_root, source_label)
|
|
52
|
+
write_atomic(path, to_pretty_json(payload))
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def build_collection_index(reports, face_dirs)
|
|
56
|
+
{
|
|
57
|
+
"num_fonts_in_source" => reports.first&.num_fonts_in_source || reports.size,
|
|
58
|
+
"source_file" => reports.first&.source_file,
|
|
59
|
+
"source_sha256" => reports.first&.source_sha256,
|
|
60
|
+
"faces" => face_cards(reports, face_dirs),
|
|
61
|
+
}.compact
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def face_cards(reports, face_dirs)
|
|
65
|
+
reports.each_with_index.map do |report, index|
|
|
66
|
+
{
|
|
67
|
+
"font_index" => index,
|
|
68
|
+
"postscript_name" => report.postscript_name,
|
|
69
|
+
"family_name" => report.family_name,
|
|
70
|
+
"weight_class" => report.weight_class,
|
|
71
|
+
"total_codepoints" => report.total_codepoints,
|
|
72
|
+
"total_glyphs" => report.total_glyphs,
|
|
73
|
+
"directory" => face_dirs[index],
|
|
74
|
+
}
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|