ucode 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +72 -0
- data/Gemfile.lock +2 -2
- data/TODO.full/00-README.md +116 -0
- data/TODO.full/01-panglyph-vision.md +112 -0
- data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
- data/TODO.full/03-panglyph-font-builder.md +201 -0
- data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
- data/TODO.full/05-ucode-0-1-1-release.md +139 -0
- data/TODO.full/06-fontisan-remove-audit.md +142 -0
- data/TODO.full/07-fontisan-remove-ucd.md +125 -0
- data/TODO.full/08-archive-private-bin-build.md +143 -0
- data/TODO.full/09-archive-public-structure.md +164 -0
- data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
- data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
- data/TODO.full/12-implementation-order.md +216 -0
- data/TODO.full/13-fontisan-font-writer-api.md +189 -0
- data/TODO.full/14-fontisan-table-writers.md +66 -0
- data/TODO.full/15-panglyph-builder-real.md +82 -0
- data/TODO.full/16-archive-public-sync-workflows.md +167 -0
- data/TODO.full/17-fontist-org-font-picker.md +73 -0
- data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
- data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
- data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
- data/TODO.new/00-README.md +30 -0
- data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
- data/TODO.new/24-universal-glyph-set-build.md +189 -0
- data/TODO.new/25-font-audit-against-universal-set.md +195 -0
- data/TODO.new/26-missing-glyph-reporter.md +189 -0
- data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
- data/TODO.new/28-implementation-order-update.md +187 -0
- data/TODO.new/29-universal-set-curation-uc17.md +312 -0
- data/TODO.new/30-tier1-font-acquisition.md +241 -0
- data/TODO.new/31-universal-set-production-build.md +205 -0
- data/TODO.new/32-uc17-coverage-matrix.md +165 -0
- data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
- data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
- data/TODO.new/35-universal-set-production-run.md +160 -0
- data/TODO.new/36-per-font-coverage-audit.md +145 -0
- data/TODO.new/37-coverage-highlight-reporter.md +125 -0
- data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
- data/TODO.new/39-implementation-order-update-32-38.md +258 -0
- data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
- data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
- data/config/specialist_fonts.yml +102 -0
- data/config/unicode17_tier1_fonts.yml +42 -0
- data/config/unicode17_universal_glyph_set.yml +293 -0
- data/lib/ucode/audit/block_aggregator.rb +57 -29
- data/lib/ucode/audit/browser/face_page.rb +128 -0
- data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
- data/lib/ucode/audit/browser/library_page.rb +74 -0
- data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
- data/lib/ucode/audit/browser/template.rb +47 -0
- data/lib/ucode/audit/browser/templates/face.css +200 -0
- data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
- data/lib/ucode/audit/browser/templates/face.js +298 -0
- data/lib/ucode/audit/browser/templates/library.css +119 -0
- data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
- data/lib/ucode/audit/browser/templates/library.js +99 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
- data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
- data/lib/ucode/audit/browser.rb +32 -0
- data/lib/ucode/audit/context.rb +27 -1
- data/lib/ucode/audit/coverage_reference.rb +103 -0
- data/lib/ucode/audit/differ.rb +121 -0
- data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
- data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
- data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
- data/lib/ucode/audit/emitter/face_directory.rb +212 -0
- data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
- data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
- data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
- data/lib/ucode/audit/emitter/paths.rb +312 -0
- data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
- data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
- data/lib/ucode/audit/emitter.rb +29 -0
- data/lib/ucode/audit/extractors/aggregations.rb +31 -2
- data/lib/ucode/audit/face_auditor.rb +86 -0
- data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
- data/lib/ucode/audit/formatters/audit_text.rb +411 -0
- data/lib/ucode/audit/formatters/color.rb +48 -0
- data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
- data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
- data/lib/ucode/audit/formatters.rb +23 -0
- data/lib/ucode/audit/library_aggregator.rb +86 -0
- data/lib/ucode/audit/library_auditor.rb +105 -0
- data/lib/ucode/audit/release/emitter.rb +152 -0
- data/lib/ucode/audit/release/face_card.rb +93 -0
- data/lib/ucode/audit/release/formula_audits.rb +50 -0
- data/lib/ucode/audit/release/library_index_builder.rb +78 -0
- data/lib/ucode/audit/release/manifest_builder.rb +127 -0
- data/lib/ucode/audit/release.rb +42 -0
- data/lib/ucode/audit/ucd_only_reference.rb +81 -0
- data/lib/ucode/audit/universal_set_reference.rb +136 -0
- data/lib/ucode/audit.rb +31 -0
- data/lib/ucode/cli.rb +339 -33
- data/lib/ucode/commands/audit/browser_command.rb +82 -0
- data/lib/ucode/commands/audit/collection_command.rb +103 -0
- data/lib/ucode/commands/audit/compare_command.rb +188 -0
- data/lib/ucode/commands/audit/font_command.rb +140 -0
- data/lib/ucode/commands/audit/library_command.rb +87 -0
- data/lib/ucode/commands/audit/reference_builder.rb +64 -0
- data/lib/ucode/commands/audit.rb +20 -0
- data/lib/ucode/commands/block_feed.rb +73 -0
- data/lib/ucode/commands/canonical_build.rb +138 -0
- data/lib/ucode/commands/fetch.rb +37 -1
- data/lib/ucode/commands/release.rb +115 -0
- data/lib/ucode/commands/universal_set.rb +211 -0
- data/lib/ucode/commands.rb +5 -0
- data/lib/ucode/coordinator/indices.rb +11 -0
- data/lib/ucode/coordinator.rb +138 -5
- data/lib/ucode/error.rb +30 -2
- data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
- data/lib/ucode/fetch/font_fetcher.rb +16 -0
- data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
- data/lib/ucode/fetch.rb +7 -3
- data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
- data/lib/ucode/glyphs/real_fonts.rb +1 -0
- data/lib/ucode/glyphs/resolver.rb +62 -0
- data/lib/ucode/glyphs/source.rb +48 -0
- data/lib/ucode/glyphs/source_builder.rb +61 -0
- data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
- data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
- data/lib/ucode/glyphs/source_config.rb +104 -0
- data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
- data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
- data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
- data/lib/ucode/glyphs/sources.rb +20 -0
- data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
- data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
- data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
- data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
- data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
- data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
- data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
- data/lib/ucode/glyphs/universal_set.rb +45 -0
- data/lib/ucode/glyphs.rb +6 -0
- data/lib/ucode/models/audit/baseline.rb +6 -0
- data/lib/ucode/models/audit/block_summary.rb +7 -0
- data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
- data/lib/ucode/models/audit/release_face.rb +42 -0
- data/lib/ucode/models/audit/release_formula.rb +33 -0
- data/lib/ucode/models/audit/release_manifest.rb +43 -0
- data/lib/ucode/models/audit/release_universal_set.rb +37 -0
- data/lib/ucode/models/audit.rb +9 -0
- data/lib/ucode/models/block.rb +2 -0
- data/lib/ucode/models/build_report.rb +109 -0
- data/lib/ucode/models/codepoint/glyph.rb +42 -0
- data/lib/ucode/models/codepoint.rb +3 -0
- data/lib/ucode/models/glyph_source.rb +86 -0
- data/lib/ucode/models/glyph_source_map.rb +138 -0
- data/lib/ucode/models/specialist_font.rb +70 -0
- data/lib/ucode/models/specialist_font_manifest.rb +48 -0
- data/lib/ucode/models/unihan_entry.rb +81 -9
- data/lib/ucode/models/unihan_field.rb +21 -0
- data/lib/ucode/models/universal_set_entry.rb +47 -0
- data/lib/ucode/models/universal_set_manifest.rb +78 -0
- data/lib/ucode/models/validation_report.rb +99 -0
- data/lib/ucode/models.rb +9 -0
- data/lib/ucode/parsers/named_sequences.rb +5 -5
- data/lib/ucode/parsers/unihan.rb +50 -19
- data/lib/ucode/repo/aggregate_writer.rb +34 -2
- data/lib/ucode/repo/block_feed_emitter.rb +153 -0
- data/lib/ucode/repo/build_report_accumulator.rb +138 -0
- data/lib/ucode/repo/build_report_writer.rb +46 -0
- data/lib/ucode/repo/build_validator.rb +229 -0
- data/lib/ucode/repo/codepoint_writer.rb +50 -1
- data/lib/ucode/repo/paths.rb +8 -0
- data/lib/ucode/repo.rb +4 -0
- data/lib/ucode/version.rb +1 -1
- data/schema/block-feed.output.schema.yml +134 -0
- metadata +143 -2
- data/ucode.gemspec +0 -56
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Models
|
|
7
|
+
module Audit
|
|
8
|
+
# Universal-set reference section of a {ReleaseManifest}.
|
|
9
|
+
#
|
|
10
|
+
# Records whether the release tree carries a co-located universal
|
|
11
|
+
# glyph set (TODO 24) and where to find it. fontist.org consumes
|
|
12
|
+
# this to decide whether to render missing-codepoint thumbnails
|
|
13
|
+
# from the universal set or fall back to text-only chips.
|
|
14
|
+
#
|
|
15
|
+
# When `available` is false, `reason` carries a short diagnostic
|
|
16
|
+
# string ("universal-set directory not found at <path>"). The
|
|
17
|
+
# other fields are nil.
|
|
18
|
+
class ReleaseUniversalSet < Lutaml::Model::Serializable
|
|
19
|
+
attribute :available, Lutaml::Model::Type::Boolean
|
|
20
|
+
attribute :manifest_path, :string
|
|
21
|
+
attribute :glyphs_dir, :string
|
|
22
|
+
attribute :unicode_version, :string
|
|
23
|
+
attribute :totals, :hash, default: -> { {} }
|
|
24
|
+
attribute :reason, :string
|
|
25
|
+
|
|
26
|
+
key_value do
|
|
27
|
+
map "available", to: :available
|
|
28
|
+
map "manifest_path", to: :manifest_path
|
|
29
|
+
map "glyphs_dir", to: :glyphs_dir
|
|
30
|
+
map "unicode_version", to: :unicode_version
|
|
31
|
+
map "totals", to: :totals
|
|
32
|
+
map "reason", to: :reason
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
data/lib/ucode/models/audit.rb
CHANGED
|
@@ -23,6 +23,7 @@ module Ucode
|
|
|
23
23
|
autoload :PlaneSummary, "ucode/models/audit/plane_summary"
|
|
24
24
|
autoload :Discrepancy, "ucode/models/audit/discrepancy"
|
|
25
25
|
autoload :CodepointDetail, "ucode/models/audit/codepoint_detail"
|
|
26
|
+
autoload :CodepointProvenance, "ucode/models/audit/codepoint_provenance"
|
|
26
27
|
|
|
27
28
|
# Ported from fontisan (namespace swap + minor renames)
|
|
28
29
|
autoload :AuditReport, "ucode/models/audit/audit_report"
|
|
@@ -45,6 +46,14 @@ module Ucode
|
|
|
45
46
|
autoload :DuplicateGroup, "ucode/models/audit/duplicate_group"
|
|
46
47
|
autoload :LibrarySummary, "ucode/models/audit/library_summary"
|
|
47
48
|
autoload :AuditDiff, "ucode/models/audit/audit_diff"
|
|
49
|
+
|
|
50
|
+
# Release-tree models (TODO 27) — fontist.org-consumable
|
|
51
|
+
# artifact manifest. ReleaseManifest is the top-level shape;
|
|
52
|
+
# the others are nested entries.
|
|
53
|
+
autoload :ReleaseManifest, "ucode/models/audit/release_manifest"
|
|
54
|
+
autoload :ReleaseFormulaEntry, "ucode/models/audit/release_formula"
|
|
55
|
+
autoload :ReleaseFaceEntry, "ucode/models/audit/release_face"
|
|
56
|
+
autoload :ReleaseUniversalSet, "ucode/models/audit/release_universal_set"
|
|
48
57
|
end
|
|
49
58
|
end
|
|
50
59
|
end
|
data/lib/ucode/models/block.rb
CHANGED
|
@@ -13,6 +13,7 @@ module Ucode
|
|
|
13
13
|
attribute :range_first, :integer
|
|
14
14
|
attribute :range_last, :integer
|
|
15
15
|
attribute :plane_number, :integer
|
|
16
|
+
attribute :age, :string
|
|
16
17
|
attribute :codepoint_ids, :string, collection: true, default: -> { [] }
|
|
17
18
|
|
|
18
19
|
key_value do
|
|
@@ -21,6 +22,7 @@ module Ucode
|
|
|
21
22
|
map "range_first", to: :range_first
|
|
22
23
|
map "range_last", to: :range_last
|
|
23
24
|
map "plane_number", to: :plane_number
|
|
25
|
+
map "age", to: :age
|
|
24
26
|
map "codepoint_ids", to: :codepoint_ids
|
|
25
27
|
end
|
|
26
28
|
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Models
|
|
7
|
+
# Build report for one canonical Unicode dataset run. The
|
|
8
|
+
# deliverable spec'd in TODO 21: emitted at the end of a Mode 1
|
|
9
|
+
# build as `output/build-report.json`, summarizing what got built,
|
|
10
|
+
# how (per-tier), per-block, and any failures.
|
|
11
|
+
#
|
|
12
|
+
# The model is passive — accumulation logic lives in
|
|
13
|
+
# {Ucode::Repo::BuildReportAccumulator}; this class only describes
|
|
14
|
+
# the wire shape and handles (de)serialization via lutaml-model.
|
|
15
|
+
#
|
|
16
|
+
# Wire format (see TODO 21):
|
|
17
|
+
#
|
|
18
|
+
# {
|
|
19
|
+
# "unicode_version": "17.0.0",
|
|
20
|
+
# "ucode_version": "0.2.0",
|
|
21
|
+
# "generated_at": "2026-07-01T12:00:00Z",
|
|
22
|
+
# "totals": { "assigned": 150012, "built": 150012,
|
|
23
|
+
# "skipped": 0, "failed": 0 },
|
|
24
|
+
# "by_tier": { "tier-1": 150012, "pillar-1": 3000, ... },
|
|
25
|
+
# "by_block": [
|
|
26
|
+
# { "name": "Basic Latin", "assigned": 128, "built": 128,
|
|
27
|
+
# "tier_breakdown": { "tier-1": 128 } },
|
|
28
|
+
# ...
|
|
29
|
+
# ],
|
|
30
|
+
# "failures": []
|
|
31
|
+
# }
|
|
32
|
+
#
|
|
33
|
+
# `by_tier` counts overlap across tiers (a codepoint attempted via
|
|
34
|
+
# Tier 1 but falling through to Pillar 1 is counted in both);
|
|
35
|
+
# `built` per-codepoint is the tier that actually produced its
|
|
36
|
+
# glyph.
|
|
37
|
+
class BuildReport < Lutaml::Model::Serializable
|
|
38
|
+
# Total counts for the run.
|
|
39
|
+
class Totals < Lutaml::Model::Serializable
|
|
40
|
+
attribute :assigned, :integer, default: 0
|
|
41
|
+
attribute :built, :integer, default: 0
|
|
42
|
+
attribute :skipped, :integer, default: 0
|
|
43
|
+
attribute :failed, :integer, default: 0
|
|
44
|
+
|
|
45
|
+
key_value do
|
|
46
|
+
map "assigned", to: :assigned
|
|
47
|
+
map "built", to: :built
|
|
48
|
+
map "skipped", to: :skipped
|
|
49
|
+
map "failed", to: :failed
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Per-block rollup. One entry per Unicode block in the run.
|
|
54
|
+
class BlockSummary < Lutaml::Model::Serializable
|
|
55
|
+
attribute :name, :string
|
|
56
|
+
attribute :assigned, :integer, default: 0
|
|
57
|
+
attribute :built, :integer, default: 0
|
|
58
|
+
attribute :tier_breakdown, :hash, default: -> { {} }
|
|
59
|
+
|
|
60
|
+
key_value do
|
|
61
|
+
map "name", to: :name
|
|
62
|
+
map "assigned", to: :assigned
|
|
63
|
+
map "built", to: :built
|
|
64
|
+
map "tier_breakdown", to: :tier_breakdown
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# One failure record. `codepoint` is the integer codepoint (or
|
|
69
|
+
# nil if the failure is structural); `tier` is the resolver tier
|
|
70
|
+
# that raised (or nil); `error_class` and `message` carry the
|
|
71
|
+
# exception details; `backtrace` is optional.
|
|
72
|
+
class Failure < Lutaml::Model::Serializable
|
|
73
|
+
attribute :codepoint, :integer
|
|
74
|
+
attribute :block_name, :string
|
|
75
|
+
attribute :tier, :string
|
|
76
|
+
attribute :error_class, :string
|
|
77
|
+
attribute :message, :string
|
|
78
|
+
attribute :backtrace, :string, collection: true, default: -> { [] }
|
|
79
|
+
|
|
80
|
+
key_value do
|
|
81
|
+
map "codepoint", to: :codepoint
|
|
82
|
+
map "block_name", to: :block_name
|
|
83
|
+
map "tier", to: :tier
|
|
84
|
+
map "error_class", to: :error_class
|
|
85
|
+
map "message", to: :message
|
|
86
|
+
map "backtrace", to: :backtrace
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
attribute :unicode_version, :string
|
|
91
|
+
attribute :ucode_version, :string
|
|
92
|
+
attribute :generated_at, :string
|
|
93
|
+
attribute :totals, Totals
|
|
94
|
+
attribute :by_tier, :hash, default: -> { {} }
|
|
95
|
+
attribute :by_block, BlockSummary, collection: true, default: -> { [] }
|
|
96
|
+
attribute :failures, Failure, collection: true, default: -> { [] }
|
|
97
|
+
|
|
98
|
+
key_value do
|
|
99
|
+
map "unicode_version", to: :unicode_version
|
|
100
|
+
map "ucode_version", to: :ucode_version
|
|
101
|
+
map "generated_at", to: :generated_at
|
|
102
|
+
map "totals", to: :totals
|
|
103
|
+
map "by_tier", to: :by_tier
|
|
104
|
+
map "by_block", to: :by_block
|
|
105
|
+
map "failures", to: :failures
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Models
|
|
7
|
+
class CodePoint < Lutaml::Model::Serializable
|
|
8
|
+
# Glyph bundle for one codepoint. Records where the SVG lives on
|
|
9
|
+
# disk and which resolver tier produced it.
|
|
10
|
+
#
|
|
11
|
+
# The `svg_path` is relative to the codepoint's own directory
|
|
12
|
+
# (always "glyph.svg" — the layout is fixed in {Ucode::Repo::Paths}).
|
|
13
|
+
# The `source` bundle carries the resolver tier name and
|
|
14
|
+
# provenance string, so the dataset is debuggable end-to-end:
|
|
15
|
+
# every glyph in the build can be traced back to its origin
|
|
16
|
+
# (real font, embedded ToUnicode, correlator, or Last Resort).
|
|
17
|
+
class Glyph < Lutaml::Model::Serializable
|
|
18
|
+
# Provenance bundle for a glyph — which tier of the 4-tier
|
|
19
|
+
# resolver produced it. The Ruby class name `Source` mirrors
|
|
20
|
+
# the wire field name; it is unrelated to the
|
|
21
|
+
# {Ucode::Glyphs::Source} abstract base.
|
|
22
|
+
class Source < Lutaml::Model::Serializable
|
|
23
|
+
attribute :tier, :string
|
|
24
|
+
attribute :provenance, :string
|
|
25
|
+
|
|
26
|
+
key_value do
|
|
27
|
+
map "tier", to: :tier
|
|
28
|
+
map "provenance", to: :provenance
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
attribute :svg_path, :string, default: -> { "glyph.svg" }
|
|
33
|
+
attribute :source, Source
|
|
34
|
+
|
|
35
|
+
key_value do
|
|
36
|
+
map "svg_path", to: :svg_path
|
|
37
|
+
map "source", to: :source
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -26,6 +26,7 @@ module Ucode
|
|
|
26
26
|
autoload :Emoji, "ucode/models/codepoint/emoji"
|
|
27
27
|
autoload :Identifier, "ucode/models/codepoint/identifier"
|
|
28
28
|
autoload :Normalization, "ucode/models/codepoint/normalization"
|
|
29
|
+
autoload :Glyph, "ucode/models/codepoint/glyph"
|
|
29
30
|
|
|
30
31
|
# Identity + scalar attributes
|
|
31
32
|
attribute :cp, :integer
|
|
@@ -74,6 +75,7 @@ module Ucode
|
|
|
74
75
|
collection: true, default: -> { [] }
|
|
75
76
|
attribute :unihan, "Ucode::Models::UnihanEntry"
|
|
76
77
|
attribute :names_list, "Ucode::Models::NamesListEntry"
|
|
78
|
+
attribute :glyph, Glyph
|
|
77
79
|
|
|
78
80
|
key_value do
|
|
79
81
|
map "codepoint", to: :cp
|
|
@@ -116,6 +118,7 @@ module Ucode
|
|
|
116
118
|
map "standardized_variants", to: :standardized_variants
|
|
117
119
|
map "unihan", to: :unihan
|
|
118
120
|
map "names_list", to: :names_list
|
|
121
|
+
map "glyph", to: :glyph
|
|
119
122
|
end
|
|
120
123
|
end
|
|
121
124
|
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Models
|
|
7
|
+
# One Tier 1 font entry inside a {GlyphSourceMap}. Corresponds to
|
|
8
|
+
# one `- kind: …` item under a block's `sources:` list in
|
|
9
|
+
# `config/unicode17_universal_glyph_set.yml`.
|
|
10
|
+
#
|
|
11
|
+
# This is the typed representation of a font curation choice. The
|
|
12
|
+
# {Ucode::Glyphs::Sources::Tier1RealFont} source consumes it to
|
|
13
|
+
# resolve and load the font; the resolver treats each entry as an
|
|
14
|
+
# independent tier-1 attempt.
|
|
15
|
+
#
|
|
16
|
+
# Wire shape (YAML / JSON identical):
|
|
17
|
+
#
|
|
18
|
+
# kind: fontist # one of: fontist, path, system
|
|
19
|
+
# label: noto-sans # human + provenance key
|
|
20
|
+
# priority: 1 # lower wins; resolver tries in order
|
|
21
|
+
# license: OFL # optional; OFL / PROPRIETARY / etc.
|
|
22
|
+
# provenance: "Google Noto Sans" # optional citation
|
|
23
|
+
# path: "/abs/font.ttf" # required when kind == :path
|
|
24
|
+
#
|
|
25
|
+
# `kind` is stored as a plain string on the wire (lutaml-model has
|
|
26
|
+
# no Symbol adapter for key_value); the {#kind_sym} reader casts it
|
|
27
|
+
# for internal dispatch.
|
|
28
|
+
class GlyphSource < Lutaml::Model::Serializable
|
|
29
|
+
KIND_FONTIST = "fontist"
|
|
30
|
+
KIND_PATH = "path"
|
|
31
|
+
KIND_SYSTEM = "system"
|
|
32
|
+
KINDS = [KIND_FONTIST, KIND_PATH, KIND_SYSTEM].freeze
|
|
33
|
+
private_constant :KIND_FONTIST, :KIND_PATH, :KIND_SYSTEM, :KINDS
|
|
34
|
+
|
|
35
|
+
attribute :kind, :string
|
|
36
|
+
attribute :label, :string
|
|
37
|
+
attribute :priority, :integer, default: -> { 100 }
|
|
38
|
+
attribute :license, :string
|
|
39
|
+
attribute :provenance, :string
|
|
40
|
+
attribute :path, :string
|
|
41
|
+
|
|
42
|
+
key_value do
|
|
43
|
+
map "kind", to: :kind
|
|
44
|
+
map "label", to: :label
|
|
45
|
+
map "priority", to: :priority
|
|
46
|
+
map "license", to: :license
|
|
47
|
+
map "provenance", to: :provenance
|
|
48
|
+
map "path", to: :path
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @return [Symbol] :fontist, :path, :system; raises if kind is
|
|
52
|
+
# blank — every entry must declare its kind.
|
|
53
|
+
def kind_sym
|
|
54
|
+
raise ArgumentError, "GlyphSource#kind is required" if kind.nil? || kind.empty?
|
|
55
|
+
|
|
56
|
+
kind.to_sym
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# @return [Boolean] true when this entry requires a `path` field
|
|
60
|
+
# (kind == :path). Used by the loader to validate structure.
|
|
61
|
+
def requires_path?
|
|
62
|
+
kind_sym == :path
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Renders this source as the legacy font-spec string consumed by
|
|
66
|
+
# {Ucode::Glyphs::RealFonts::FontLocator}: `label=/path/to/font`
|
|
67
|
+
# for kind=path, or `label` (the fontist formula name) for
|
|
68
|
+
# kind=fontist. The locator's `locate` understands both shapes.
|
|
69
|
+
#
|
|
70
|
+
# This is the one adapter method that lets the typed model
|
|
71
|
+
# integrate with the existing locator without rewriting it.
|
|
72
|
+
#
|
|
73
|
+
# @return [String]
|
|
74
|
+
def to_font_spec
|
|
75
|
+
case kind_sym
|
|
76
|
+
when :path
|
|
77
|
+
raise ArgumentError, "GlyphSource#{label} has kind=path but no path" if path.nil? || path.empty?
|
|
78
|
+
|
|
79
|
+
"#{label}=#{path}"
|
|
80
|
+
when :fontist, :system
|
|
81
|
+
label
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
require "ucode/models/glyph_source"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Models
|
|
9
|
+
# Top-level shape of `config/unicode17_universal_glyph_set.yml`.
|
|
10
|
+
# Pairs envelope metadata (Unicode + ucode version, generated_at)
|
|
11
|
+
# with the block→sources map itself.
|
|
12
|
+
#
|
|
13
|
+
# Block keys are the verbatim Unicode block name with runs of
|
|
14
|
+
# whitespace collapsed to a single underscore — the canonical
|
|
15
|
+
# block id used everywhere else in this codebase (see
|
|
16
|
+
# {Ucode::Parsers::Blocks}): "Basic_Latin", "Greek_and_Coptic",
|
|
17
|
+
# "CJK_Unified_Ideographs_Extension_J". Never slugified beyond
|
|
18
|
+
# whitespace collapsing.
|
|
19
|
+
#
|
|
20
|
+
# Wire shape (note: `map:` is a hash keyed by block id, not an
|
|
21
|
+
# array):
|
|
22
|
+
#
|
|
23
|
+
# unicode_version: "17.0.0"
|
|
24
|
+
# ucode_version: "0.2.0"
|
|
25
|
+
# generated_at: "2026-06-28T00:00:00Z"
|
|
26
|
+
# default_sources: # applies when a block's sources are absent/empty
|
|
27
|
+
# - kind: fontist
|
|
28
|
+
# label: noto-sans
|
|
29
|
+
# priority: 1
|
|
30
|
+
# license: OFL
|
|
31
|
+
# map:
|
|
32
|
+
# Basic_Latin:
|
|
33
|
+
# sources:
|
|
34
|
+
# - kind: fontist
|
|
35
|
+
# label: noto-sans
|
|
36
|
+
# priority: 1
|
|
37
|
+
# Sidetic:
|
|
38
|
+
# sources: []
|
|
39
|
+
#
|
|
40
|
+
# An entry with `sources: []` (or omitted) is valid: it declares
|
|
41
|
+
# "no block-specific Tier 1 font; fall back to `default_sources`,
|
|
42
|
+
# then to Pillars 1-3". The fallback chain is implemented in
|
|
43
|
+
# {#sources_for}; the raw map is left untouched.
|
|
44
|
+
#
|
|
45
|
+
# The hash is stored as a raw `:hash` attribute (lutaml-model
|
|
46
|
+
# collection semantics don't pair cleanly with a hash-keyed wire
|
|
47
|
+
# shape); the typed accessors wrap each entry's raw hashes in
|
|
48
|
+
# {GlyphSource} instances on demand.
|
|
49
|
+
class GlyphSourceMap < Lutaml::Model::Serializable
|
|
50
|
+
attribute :unicode_version, :string
|
|
51
|
+
attribute :ucode_version, :string
|
|
52
|
+
attribute :generated_at, :string
|
|
53
|
+
attribute :default_sources_raw, :hash, collection: true, default: -> { [] }
|
|
54
|
+
attribute :block_sources, :hash, default: -> { {} }
|
|
55
|
+
|
|
56
|
+
key_value do
|
|
57
|
+
map "unicode_version", to: :unicode_version
|
|
58
|
+
map "ucode_version", to: :ucode_version
|
|
59
|
+
map "generated_at", to: :generated_at
|
|
60
|
+
map "default_sources", to: :default_sources_raw
|
|
61
|
+
map "map", to: :block_sources
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# @param block_id [String] verbatim block id (underscore form)
|
|
65
|
+
# @return [Array<GlyphSource>] sources for the block, in
|
|
66
|
+
# priority order (ascending). Falls through block-specific →
|
|
67
|
+
# `default_sources` → empty.
|
|
68
|
+
def sources_for(block_id)
|
|
69
|
+
raw = block_sources[block_id]
|
|
70
|
+
list = extract_sources_list(raw)
|
|
71
|
+
list = default_sources_list if list.empty?
|
|
72
|
+
list.map { |h| GlyphSource.from_hash(h.transform_keys(&:to_s)) }
|
|
73
|
+
.sort_by(&:priority)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# @return [Array<GlyphSource>] the top-level default sources,
|
|
77
|
+
# typed and priority-sorted. Empty when not declared.
|
|
78
|
+
def default_sources
|
|
79
|
+
default_sources_list
|
|
80
|
+
.map { |h| GlyphSource.from_hash(h.transform_keys(&:to_s)) }
|
|
81
|
+
.sort_by(&:priority)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# @param block_id [String]
|
|
85
|
+
# @return [Boolean] true if the block has any entry in the map
|
|
86
|
+
# (even with empty sources). Does not consider `default_sources`.
|
|
87
|
+
def has_block?(block_id)
|
|
88
|
+
block_sources.key?(block_id)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @return [Array<String>] every block_id that appears in the map
|
|
92
|
+
# (regardless of whether it has sources).
|
|
93
|
+
def block_ids
|
|
94
|
+
block_sources.keys
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# @return [Array<String>] block_ids whose own `sources:` list has
|
|
98
|
+
# at least one entry. Blocks relying on `default_sources` are
|
|
99
|
+
# excluded — they have no block-specific policy.
|
|
100
|
+
def configured_block_ids
|
|
101
|
+
block_sources.each_with_object([]) do |(block_id, raw), acc|
|
|
102
|
+
acc << block_id if any_sources?(raw)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
private
|
|
107
|
+
|
|
108
|
+
# Each block's value in the YAML is either:
|
|
109
|
+
# - `{sources: [...]}` (canonical form), or
|
|
110
|
+
# - `[...]` (shorthand: the sources list directly).
|
|
111
|
+
# Return the sources array in both cases; empty for `nil`.
|
|
112
|
+
def extract_sources_list(raw)
|
|
113
|
+
return [] if raw.nil?
|
|
114
|
+
return raw if raw.is_a?(Array)
|
|
115
|
+
return Array(raw["sources"]) if raw.is_a?(Hash) && raw.key?("sources")
|
|
116
|
+
return Array(raw[:sources]) if raw.is_a?(Hash) && raw.key?(:sources)
|
|
117
|
+
|
|
118
|
+
[]
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Each block's value in the YAML is either `{sources: [...]}` or
|
|
122
|
+
# directly an array (shorthand). Normalize to the array of
|
|
123
|
+
# source-hashes form.
|
|
124
|
+
def any_sources?(raw)
|
|
125
|
+
return false if raw.nil?
|
|
126
|
+
return raw.any? if raw.is_a?(Array)
|
|
127
|
+
|
|
128
|
+
raw.is_a?(Hash) && Array(raw["sources"] || raw[:sources]).any?
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# `default_sources` on the wire is a list of source hashes. Older
|
|
132
|
+
# configs may omit it; treat absence as an empty list.
|
|
133
|
+
def default_sources_list
|
|
134
|
+
Array(default_sources_raw)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Models
|
|
7
|
+
# One entry in `config/specialist_fonts.yml` — a Tier 1 font that
|
|
8
|
+
# fontist's formula index does not carry (academic sites, SIL
|
|
9
|
+
# downloads, GitHub releases). The fetcher walks a list of these
|
|
10
|
+
# and materializes each `path` on disk.
|
|
11
|
+
#
|
|
12
|
+
# Wire shape (YAML):
|
|
13
|
+
#
|
|
14
|
+
# label: Lentariso
|
|
15
|
+
# version: "1.033"
|
|
16
|
+
# license: OFL
|
|
17
|
+
# url: "https://github.com/.../Lentariso.otf"
|
|
18
|
+
# sha256: "<hex>" # null until first successful fetch
|
|
19
|
+
# path: "data/fonts/Lentariso.otf"
|
|
20
|
+
# extract: false
|
|
21
|
+
# extract_member: null # required when extract: true
|
|
22
|
+
# provenance: "Imperial Aramaic / Phoenician / Sidetic coverage"
|
|
23
|
+
#
|
|
24
|
+
# `url: null` marks a local-only entry: the user supplies the
|
|
25
|
+
# file at `path` (which may use `~` and shell globs); the fetcher
|
|
26
|
+
# never attempts a network download for these.
|
|
27
|
+
class SpecialistFont < Lutaml::Model::Serializable
|
|
28
|
+
LICENSE_OFL = "OFL"
|
|
29
|
+
private_constant :LICENSE_OFL
|
|
30
|
+
|
|
31
|
+
attribute :label, :string
|
|
32
|
+
attribute :version, :string
|
|
33
|
+
attribute :license, :string, default: -> { LICENSE_OFL }
|
|
34
|
+
attribute :url, :string
|
|
35
|
+
attribute :sha256, :string
|
|
36
|
+
attribute :path, :string
|
|
37
|
+
attribute :extract, :boolean, default: -> { false }
|
|
38
|
+
attribute :extract_member, :string
|
|
39
|
+
attribute :provenance, :string
|
|
40
|
+
|
|
41
|
+
key_value do
|
|
42
|
+
map "label", to: :label
|
|
43
|
+
map "version", to: :version
|
|
44
|
+
map "license", to: :license
|
|
45
|
+
map "url", to: :url
|
|
46
|
+
map "sha256", to: :sha256
|
|
47
|
+
map "path", to: :path
|
|
48
|
+
map "extract", to: :extract
|
|
49
|
+
map "extract_member", to: :extract_member
|
|
50
|
+
map "provenance", to: :provenance
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def local_only?
|
|
54
|
+
url.nil? || url.empty?
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def ofl?
|
|
58
|
+
license == LICENSE_OFL
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def hash_known?
|
|
62
|
+
!sha256.nil? && !sha256.empty?
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def extract?
|
|
66
|
+
extract == true
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "lutaml/model"
|
|
4
|
+
|
|
5
|
+
require "ucode/models/specialist_font"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Models
|
|
9
|
+
# Typed view over `config/specialist_fonts.yml`. Carries the full
|
|
10
|
+
# list of {SpecialistFont} entries; provides lookup by label so
|
|
11
|
+
# the fetcher can honor `--label Lentariso` without scanning the
|
|
12
|
+
# array itself.
|
|
13
|
+
#
|
|
14
|
+
# The manifest is pure data — it does not know the path it was
|
|
15
|
+
# loaded from. Persistence of computed SHA256 hashes back to disk
|
|
16
|
+
# is the responsibility of {Ucode::Fetch::SpecialistFontFetcher},
|
|
17
|
+
# which owns the file path and writes atomically after a run.
|
|
18
|
+
class SpecialistFontManifest < Lutaml::Model::Serializable
|
|
19
|
+
attribute :fonts, SpecialistFont, collection: true
|
|
20
|
+
|
|
21
|
+
key_value do
|
|
22
|
+
map "fonts", to: :fonts
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# @param label [String] exact label match
|
|
26
|
+
# @return [SpecialistFont, nil]
|
|
27
|
+
def find_by_label(label)
|
|
28
|
+
fonts.find { |font| font.label == label }
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @return [Array<String>] labels of every entry, in declared order
|
|
32
|
+
def labels
|
|
33
|
+
fonts.map(&:label)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# @param label [String]
|
|
37
|
+
# @return [SpecialistFontManifest] a new manifest containing only
|
|
38
|
+
# the matching font. Returns self unchanged if the label is
|
|
39
|
+
# unknown (the fetcher reports it as a separate failure).
|
|
40
|
+
def only(label)
|
|
41
|
+
match = find_by_label(label)
|
|
42
|
+
return self if match.nil?
|
|
43
|
+
|
|
44
|
+
self.class.new(fonts: [match])
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|