ucode 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -0
  3. data/Gemfile.lock +2 -2
  4. data/TODO.full/00-README.md +116 -0
  5. data/TODO.full/01-panglyph-vision.md +112 -0
  6. data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
  7. data/TODO.full/03-panglyph-font-builder.md +201 -0
  8. data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
  9. data/TODO.full/05-ucode-0-1-1-release.md +139 -0
  10. data/TODO.full/06-fontisan-remove-audit.md +142 -0
  11. data/TODO.full/07-fontisan-remove-ucd.md +125 -0
  12. data/TODO.full/08-archive-private-bin-build.md +143 -0
  13. data/TODO.full/09-archive-public-structure.md +164 -0
  14. data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
  15. data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
  16. data/TODO.full/12-implementation-order.md +216 -0
  17. data/TODO.full/13-fontisan-font-writer-api.md +189 -0
  18. data/TODO.full/14-fontisan-table-writers.md +66 -0
  19. data/TODO.full/15-panglyph-builder-real.md +82 -0
  20. data/TODO.full/16-archive-public-sync-workflows.md +167 -0
  21. data/TODO.full/17-fontist-org-font-picker.md +73 -0
  22. data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
  23. data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
  24. data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
  25. data/TODO.new/00-README.md +30 -0
  26. data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
  27. data/TODO.new/24-universal-glyph-set-build.md +189 -0
  28. data/TODO.new/25-font-audit-against-universal-set.md +195 -0
  29. data/TODO.new/26-missing-glyph-reporter.md +189 -0
  30. data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
  31. data/TODO.new/28-implementation-order-update.md +187 -0
  32. data/TODO.new/29-universal-set-curation-uc17.md +312 -0
  33. data/TODO.new/30-tier1-font-acquisition.md +241 -0
  34. data/TODO.new/31-universal-set-production-build.md +205 -0
  35. data/TODO.new/32-uc17-coverage-matrix.md +165 -0
  36. data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
  37. data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
  38. data/TODO.new/35-universal-set-production-run.md +160 -0
  39. data/TODO.new/36-per-font-coverage-audit.md +145 -0
  40. data/TODO.new/37-coverage-highlight-reporter.md +125 -0
  41. data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
  42. data/TODO.new/39-implementation-order-update-32-38.md +258 -0
  43. data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
  44. data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
  45. data/config/specialist_fonts.yml +102 -0
  46. data/config/unicode17_tier1_fonts.yml +42 -0
  47. data/config/unicode17_universal_glyph_set.yml +293 -0
  48. data/lib/ucode/audit/block_aggregator.rb +57 -29
  49. data/lib/ucode/audit/browser/face_page.rb +128 -0
  50. data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
  51. data/lib/ucode/audit/browser/library_page.rb +74 -0
  52. data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
  53. data/lib/ucode/audit/browser/template.rb +47 -0
  54. data/lib/ucode/audit/browser/templates/face.css +200 -0
  55. data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
  56. data/lib/ucode/audit/browser/templates/face.js +298 -0
  57. data/lib/ucode/audit/browser/templates/library.css +119 -0
  58. data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
  59. data/lib/ucode/audit/browser/templates/library.js +99 -0
  60. data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
  61. data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
  62. data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
  63. data/lib/ucode/audit/browser.rb +32 -0
  64. data/lib/ucode/audit/context.rb +27 -1
  65. data/lib/ucode/audit/coverage_reference.rb +103 -0
  66. data/lib/ucode/audit/differ.rb +121 -0
  67. data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
  68. data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
  69. data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
  70. data/lib/ucode/audit/emitter/face_directory.rb +212 -0
  71. data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
  72. data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
  73. data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
  74. data/lib/ucode/audit/emitter/paths.rb +312 -0
  75. data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
  76. data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
  77. data/lib/ucode/audit/emitter.rb +29 -0
  78. data/lib/ucode/audit/extractors/aggregations.rb +31 -2
  79. data/lib/ucode/audit/face_auditor.rb +86 -0
  80. data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
  81. data/lib/ucode/audit/formatters/audit_text.rb +411 -0
  82. data/lib/ucode/audit/formatters/color.rb +48 -0
  83. data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
  84. data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
  85. data/lib/ucode/audit/formatters.rb +23 -0
  86. data/lib/ucode/audit/library_aggregator.rb +86 -0
  87. data/lib/ucode/audit/library_auditor.rb +105 -0
  88. data/lib/ucode/audit/release/emitter.rb +152 -0
  89. data/lib/ucode/audit/release/face_card.rb +93 -0
  90. data/lib/ucode/audit/release/formula_audits.rb +50 -0
  91. data/lib/ucode/audit/release/library_index_builder.rb +78 -0
  92. data/lib/ucode/audit/release/manifest_builder.rb +127 -0
  93. data/lib/ucode/audit/release.rb +42 -0
  94. data/lib/ucode/audit/ucd_only_reference.rb +81 -0
  95. data/lib/ucode/audit/universal_set_reference.rb +136 -0
  96. data/lib/ucode/audit.rb +31 -0
  97. data/lib/ucode/cli.rb +339 -33
  98. data/lib/ucode/commands/audit/browser_command.rb +82 -0
  99. data/lib/ucode/commands/audit/collection_command.rb +103 -0
  100. data/lib/ucode/commands/audit/compare_command.rb +188 -0
  101. data/lib/ucode/commands/audit/font_command.rb +140 -0
  102. data/lib/ucode/commands/audit/library_command.rb +87 -0
  103. data/lib/ucode/commands/audit/reference_builder.rb +64 -0
  104. data/lib/ucode/commands/audit.rb +20 -0
  105. data/lib/ucode/commands/block_feed.rb +73 -0
  106. data/lib/ucode/commands/canonical_build.rb +138 -0
  107. data/lib/ucode/commands/fetch.rb +37 -1
  108. data/lib/ucode/commands/release.rb +115 -0
  109. data/lib/ucode/commands/universal_set.rb +211 -0
  110. data/lib/ucode/commands.rb +5 -0
  111. data/lib/ucode/coordinator/indices.rb +11 -0
  112. data/lib/ucode/coordinator.rb +138 -5
  113. data/lib/ucode/error.rb +30 -2
  114. data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
  115. data/lib/ucode/fetch/font_fetcher.rb +16 -0
  116. data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
  117. data/lib/ucode/fetch.rb +7 -3
  118. data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
  119. data/lib/ucode/glyphs/real_fonts.rb +1 -0
  120. data/lib/ucode/glyphs/resolver.rb +62 -0
  121. data/lib/ucode/glyphs/source.rb +48 -0
  122. data/lib/ucode/glyphs/source_builder.rb +61 -0
  123. data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
  124. data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
  125. data/lib/ucode/glyphs/source_config.rb +104 -0
  126. data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
  127. data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
  128. data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
  129. data/lib/ucode/glyphs/sources.rb +20 -0
  130. data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
  131. data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
  132. data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
  133. data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
  134. data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
  135. data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
  136. data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
  137. data/lib/ucode/glyphs/universal_set.rb +45 -0
  138. data/lib/ucode/glyphs.rb +6 -0
  139. data/lib/ucode/models/audit/baseline.rb +6 -0
  140. data/lib/ucode/models/audit/block_summary.rb +7 -0
  141. data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
  142. data/lib/ucode/models/audit/release_face.rb +42 -0
  143. data/lib/ucode/models/audit/release_formula.rb +33 -0
  144. data/lib/ucode/models/audit/release_manifest.rb +43 -0
  145. data/lib/ucode/models/audit/release_universal_set.rb +37 -0
  146. data/lib/ucode/models/audit.rb +9 -0
  147. data/lib/ucode/models/block.rb +2 -0
  148. data/lib/ucode/models/build_report.rb +109 -0
  149. data/lib/ucode/models/codepoint/glyph.rb +42 -0
  150. data/lib/ucode/models/codepoint.rb +3 -0
  151. data/lib/ucode/models/glyph_source.rb +86 -0
  152. data/lib/ucode/models/glyph_source_map.rb +138 -0
  153. data/lib/ucode/models/specialist_font.rb +70 -0
  154. data/lib/ucode/models/specialist_font_manifest.rb +48 -0
  155. data/lib/ucode/models/unihan_entry.rb +81 -9
  156. data/lib/ucode/models/unihan_field.rb +21 -0
  157. data/lib/ucode/models/universal_set_entry.rb +47 -0
  158. data/lib/ucode/models/universal_set_manifest.rb +78 -0
  159. data/lib/ucode/models/validation_report.rb +99 -0
  160. data/lib/ucode/models.rb +9 -0
  161. data/lib/ucode/parsers/named_sequences.rb +5 -5
  162. data/lib/ucode/parsers/unihan.rb +50 -19
  163. data/lib/ucode/repo/aggregate_writer.rb +34 -2
  164. data/lib/ucode/repo/block_feed_emitter.rb +153 -0
  165. data/lib/ucode/repo/build_report_accumulator.rb +138 -0
  166. data/lib/ucode/repo/build_report_writer.rb +46 -0
  167. data/lib/ucode/repo/build_validator.rb +229 -0
  168. data/lib/ucode/repo/codepoint_writer.rb +50 -1
  169. data/lib/ucode/repo/paths.rb +8 -0
  170. data/lib/ucode/repo.rb +4 -0
  171. data/lib/ucode/version.rb +1 -1
  172. data/schema/block-feed.output.schema.yml +134 -0
  173. metadata +143 -2
  174. data/ucode.gemspec +0 -56
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Audit
5
+ # Release-tree assembler (TODO 27).
6
+ #
7
+ # Composes a fontist.org-consumable release artifact from one or
8
+ # more per-formula library audits plus the universal-set reference.
9
+ # The release tree is the unit `fontist.org/scripts/fetch-data.sh`
10
+ # consumes — a single tarball of `<output_root>/font_audit_release/`
11
+ # is self-contained.
12
+ #
13
+ # Layout (per TODO 27):
14
+ #
15
+ # <output_root>/font_audit_release/
16
+ # ├── audit/<formula_slug>/<postscript_name>/... # per-face audit
17
+ # ├── universal_glyph_set/ # TODO 24 build
18
+ # ├── library.json # formulas + faces
19
+ # └── manifest.json # versions + sha256s
20
+ #
21
+ # Components:
22
+ #
23
+ # - {FormulaAudits} — value object pairing a formula slug with its
24
+ # library-wide audit summary.
25
+ # - {LibraryIndexBuilder} — pure builder for `library.json`.
26
+ # - {ManifestBuilder} — pure builder for `manifest.json` (returns a
27
+ # {Models::Audit::ReleaseManifest}).
28
+ # - {Emitter} — orchestrator that drives {Emitter::FaceDirectory}
29
+ # per formula and writes the two top-level indices.
30
+ #
31
+ # The emitter is pure I/O: it consumes ready-built
32
+ # {Models::Audit::LibrarySummary} instances. Running the audits is
33
+ # the caller's responsibility (see {Ucode::Commands::ReleaseCommand}).
34
+ module Release
35
+ autoload :FormulaAudits, "ucode/audit/release/formula_audits"
36
+ autoload :FaceCard, "ucode/audit/release/face_card"
37
+ autoload :LibraryIndexBuilder, "ucode/audit/release/library_index_builder"
38
+ autoload :ManifestBuilder, "ucode/audit/release/manifest_builder"
39
+ autoload :Emitter, "ucode/audit/release/emitter"
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ module Audit
5
+ # {CoverageReference} backed by a {Ucode::Database}. The "legacy"
6
+ # reference: derives the assigned codepoint set from block ranges
7
+ # alone, with no per-codepoint provenance.
8
+ #
9
+ # Used by the audit pipeline when no universal-set manifest is
10
+ # available (or the user explicitly opts out via
11
+ # `--reference-universal-set=none`). All audits before TODO 25
12
+ # behaved this way.
13
+ class UcdOnlyReference < CoverageReference
14
+ # @param database [Ucode::Database, nil] when nil, every query
15
+ # returns empty / false — caller should surface a warning.
16
+ def initialize(database:)
17
+ super()
18
+ @database = database
19
+ end
20
+
21
+ attr_reader :database
22
+
23
+ # @return [Symbol] :ucd
24
+ def kind
25
+ :ucd
26
+ end
27
+
28
+ # (see CoverageReference#include?)
29
+ def include?(codepoint)
30
+ return false if @database.nil?
31
+
32
+ !@database.lookup_block(codepoint).nil?
33
+ end
34
+
35
+ # (see CoverageReference#block_name_for)
36
+ def block_name_for(codepoint)
37
+ return nil if @database.nil?
38
+
39
+ @database.lookup_block(codepoint)
40
+ end
41
+
42
+ # (see CoverageReference#entries_for_block)
43
+ def entries_for_block(block_id)
44
+ return [] if @database.nil?
45
+
46
+ ranges = @database.block_ranges_by_name(block_id)
47
+ return [] if ranges.nil? || ranges.empty?
48
+
49
+ ranges.flat_map { |r| expand_range(r) }
50
+ end
51
+
52
+ # (see CoverageReference#reference_id)
53
+ def reference_id
54
+ version = @database&.ucd_version || "unknown"
55
+ "ucd:#{version}"
56
+ end
57
+
58
+ # UCD-only references carry no provenance. Returning nil signals
59
+ # "do not populate `missing_codepoint_provenance`" so the audit
60
+ # report preserves the legacy wire shape.
61
+ #
62
+ # @return [nil]
63
+ def provenance_for(_codepoints)
64
+ nil
65
+ end
66
+
67
+ private
68
+
69
+ def expand_range(range)
70
+ (range.first_cp..range.last_cp).map do |cp|
71
+ Entry.new(codepoint: cp, id: format_id(cp), tier: nil, source: nil)
72
+ end
73
+ end
74
+
75
+ def format_id(cp)
76
+ width = cp <= 0xFFFF ? 4 : 6
77
+ format("U+%0*X", width, cp)
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "pathname"
5
+
6
+ module Ucode
7
+ module Audit
8
+ # {CoverageReference} backed by a universal-set manifest (TODO 24).
9
+ # Every codepoint in the set carries tier + source provenance, so
10
+ # a missing-codepoint report can answer "what does the missing
11
+ # glyph look like, and where did the universal set source it
12
+ # from?".
13
+ #
14
+ # The manifest itself records codepoints but not block
15
+ # membership, so a {Ucode::Database} is still required to map
16
+ # block name -> assigned codepoints. The reference answers per
17
+ # codepoint "is this in the universal set, and what tier/source
18
+ # did it come from?".
19
+ class UniversalSetReference < CoverageReference
20
+ # @param manifest [Ucode::Models::UniversalSetManifest, String, Pathname]
21
+ # pre-loaded manifest, or a path to a manifest.json. A path is
22
+ # loaded lazily on first query.
23
+ # @param database [Ucode::Database, nil] used for block lookups.
24
+ def initialize(manifest:, database:)
25
+ super()
26
+ @manifest_source = manifest
27
+ @database = database
28
+ end
29
+
30
+ # @return [Symbol] :universal_set
31
+ def kind
32
+ :universal_set
33
+ end
34
+
35
+ # (see CoverageReference#include?)
36
+ def include?(codepoint)
37
+ entries_by_cp.key?(codepoint)
38
+ end
39
+
40
+ # (see CoverageReference#block_name_for)
41
+ def block_name_for(codepoint)
42
+ return nil if @database.nil?
43
+
44
+ @database.lookup_block(codepoint)
45
+ end
46
+
47
+ # (see CoverageReference#entries_for_block)
48
+ def entries_for_block(block_id)
49
+ return [] if @database.nil?
50
+
51
+ ranges = @database.block_ranges_by_name(block_id)
52
+ return [] if ranges.nil? || ranges.empty?
53
+
54
+ ranges.flat_map { |r| expand_range(r) }.compact
55
+ end
56
+
57
+ # (see CoverageReference#reference_id)
58
+ def reference_id
59
+ sha = manifest.source_config_sha256
60
+ short_sha = sha ? sha.to_s[0, 12] : "no-sha"
61
+ "universal-set:#{manifest.unicode_version}:#{short_sha}"
62
+ end
63
+
64
+ # @return [Hash{String=>Object}] provenance metadata for the
65
+ # audit report's `baseline` field
66
+ def baseline_metadata
67
+ {
68
+ "unicode_version" => manifest.unicode_version,
69
+ "ucode_version" => manifest.ucode_version,
70
+ "source_config_sha256" => manifest.source_config_sha256,
71
+ "reference_kind" => "universal-set",
72
+ }
73
+ end
74
+
75
+ # (see CoverageReference#provenance_for)
76
+ # @return [Array<Hash{Symbol=>Object}>] one hash per codepoint,
77
+ # in input order
78
+ def provenance_for(codepoints)
79
+ codepoints.map { |cp| row_for(cp) }
80
+ end
81
+
82
+ # The underlying manifest model, loaded lazily from disk.
83
+ # @return [Ucode::Models::UniversalSetManifest]
84
+ def manifest
85
+ @manifest ||= load_manifest
86
+ end
87
+
88
+ # The UCD database used for block lookups. Exposed so the
89
+ # BlockAggregator can map codepoints -> block names through the
90
+ # same Database instance the reference was built against.
91
+ # @return [Ucode::Database, nil]
92
+ attr_reader :database
93
+
94
+ private
95
+
96
+ def entries_by_cp
97
+ @entries_by_cp ||= manifest.entries.to_h { |e| [e.codepoint, e] }
98
+ end
99
+
100
+ def expand_range(range)
101
+ (range.first_cp..range.last_cp).map do |cp|
102
+ entry = entries_by_cp[cp]
103
+ next nil unless entry
104
+
105
+ CoverageReference::Entry.new(
106
+ codepoint: cp, id: entry.id,
107
+ tier: entry.tier, source: entry.source,
108
+ )
109
+ end
110
+ end
111
+
112
+ def row_for(codepoint)
113
+ entry = entries_by_cp[codepoint]
114
+ {
115
+ codepoint: codepoint,
116
+ tier: entry&.tier,
117
+ source: entry&.source,
118
+ }
119
+ end
120
+
121
+ def load_manifest
122
+ case @manifest_source
123
+ when Ucode::Models::UniversalSetManifest
124
+ @manifest_source
125
+ when String, Pathname
126
+ hash = JSON.parse(Pathname.new(@manifest_source).read)
127
+ Ucode::Models::UniversalSetManifest.from_hash(hash)
128
+ else
129
+ raise ArgumentError,
130
+ "manifest must be a UniversalSetManifest or a path, " \
131
+ "got #{@manifest_source.class}"
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
data/lib/ucode/audit.rb CHANGED
@@ -23,5 +23,36 @@ module Ucode
23
23
  autoload :ScriptAggregator, "ucode/audit/script_aggregator"
24
24
  autoload :PlaneAggregator, "ucode/audit/plane_aggregator"
25
25
  autoload :DiscrepancyDetector, "ucode/audit/discrepancy_detector"
26
+
27
+ # CoverageReference hierarchy (TODO 25): pluggable baseline that
28
+ # the audit pipeline compares a font's cmap against. The default
29
+ # is UCD-only; supply a UniversalSetReference when a universal
30
+ # glyph set manifest is on disk so per-codepoint provenance is
31
+ # attached to every missing-codepoint row.
32
+ autoload :CoverageReference, "ucode/audit/coverage_reference"
33
+ autoload :UcdOnlyReference, "ucode/audit/ucd_only_reference"
34
+ autoload :UniversalSetReference, "ucode/audit/universal_set_reference"
35
+
36
+ # Per-face orchestrator (TODO 11) — shared by LibraryAuditor and
37
+ # the future CLI AuditCommand.
38
+ autoload :FaceAuditor, "ucode/audit/face_auditor"
39
+
40
+ # Cross-report orchestration (TODO 11).
41
+ autoload :Differ, "ucode/audit/differ"
42
+ autoload :LibraryAuditor, "ucode/audit/library_auditor"
43
+ autoload :LibraryAggregator, "ucode/audit/library_aggregator"
44
+
45
+ # Human-readable text output (TODO 12).
46
+ autoload :Formatters, "ucode/audit/formatters"
47
+
48
+ # Mode 2 directory output writers (TODO 13).
49
+ autoload :Emitter, "ucode/audit/emitter"
50
+
51
+ # Standalone HTML browsers for Mode 2 output (TODOs 14+15).
52
+ autoload :Browser, "ucode/audit/browser"
53
+
54
+ # Release-tree assembler (TODO 27) — fontist.org-consumable
55
+ # artifact that composes per-formula audits + the universal set.
56
+ autoload :Release, "ucode/audit/release"
26
57
  end
27
58
  end