ucode 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -0
  3. data/Gemfile.lock +2 -2
  4. data/TODO.full/00-README.md +116 -0
  5. data/TODO.full/01-panglyph-vision.md +112 -0
  6. data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
  7. data/TODO.full/03-panglyph-font-builder.md +201 -0
  8. data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
  9. data/TODO.full/05-ucode-0-1-1-release.md +139 -0
  10. data/TODO.full/06-fontisan-remove-audit.md +142 -0
  11. data/TODO.full/07-fontisan-remove-ucd.md +125 -0
  12. data/TODO.full/08-archive-private-bin-build.md +143 -0
  13. data/TODO.full/09-archive-public-structure.md +164 -0
  14. data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
  15. data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
  16. data/TODO.full/12-implementation-order.md +216 -0
  17. data/TODO.full/13-fontisan-font-writer-api.md +189 -0
  18. data/TODO.full/14-fontisan-table-writers.md +66 -0
  19. data/TODO.full/15-panglyph-builder-real.md +82 -0
  20. data/TODO.full/16-archive-public-sync-workflows.md +167 -0
  21. data/TODO.full/17-fontist-org-font-picker.md +73 -0
  22. data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
  23. data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
  24. data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
  25. data/TODO.new/00-README.md +30 -0
  26. data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
  27. data/TODO.new/24-universal-glyph-set-build.md +189 -0
  28. data/TODO.new/25-font-audit-against-universal-set.md +195 -0
  29. data/TODO.new/26-missing-glyph-reporter.md +189 -0
  30. data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
  31. data/TODO.new/28-implementation-order-update.md +187 -0
  32. data/TODO.new/29-universal-set-curation-uc17.md +312 -0
  33. data/TODO.new/30-tier1-font-acquisition.md +241 -0
  34. data/TODO.new/31-universal-set-production-build.md +205 -0
  35. data/TODO.new/32-uc17-coverage-matrix.md +165 -0
  36. data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
  37. data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
  38. data/TODO.new/35-universal-set-production-run.md +160 -0
  39. data/TODO.new/36-per-font-coverage-audit.md +145 -0
  40. data/TODO.new/37-coverage-highlight-reporter.md +125 -0
  41. data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
  42. data/TODO.new/39-implementation-order-update-32-38.md +258 -0
  43. data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
  44. data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
  45. data/config/specialist_fonts.yml +102 -0
  46. data/config/unicode17_tier1_fonts.yml +42 -0
  47. data/config/unicode17_universal_glyph_set.yml +293 -0
  48. data/lib/ucode/audit/block_aggregator.rb +57 -29
  49. data/lib/ucode/audit/browser/face_page.rb +128 -0
  50. data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
  51. data/lib/ucode/audit/browser/library_page.rb +74 -0
  52. data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
  53. data/lib/ucode/audit/browser/template.rb +47 -0
  54. data/lib/ucode/audit/browser/templates/face.css +200 -0
  55. data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
  56. data/lib/ucode/audit/browser/templates/face.js +298 -0
  57. data/lib/ucode/audit/browser/templates/library.css +119 -0
  58. data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
  59. data/lib/ucode/audit/browser/templates/library.js +99 -0
  60. data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
  61. data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
  62. data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
  63. data/lib/ucode/audit/browser.rb +32 -0
  64. data/lib/ucode/audit/context.rb +27 -1
  65. data/lib/ucode/audit/coverage_reference.rb +103 -0
  66. data/lib/ucode/audit/differ.rb +121 -0
  67. data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
  68. data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
  69. data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
  70. data/lib/ucode/audit/emitter/face_directory.rb +212 -0
  71. data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
  72. data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
  73. data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
  74. data/lib/ucode/audit/emitter/paths.rb +312 -0
  75. data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
  76. data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
  77. data/lib/ucode/audit/emitter.rb +29 -0
  78. data/lib/ucode/audit/extractors/aggregations.rb +31 -2
  79. data/lib/ucode/audit/face_auditor.rb +86 -0
  80. data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
  81. data/lib/ucode/audit/formatters/audit_text.rb +411 -0
  82. data/lib/ucode/audit/formatters/color.rb +48 -0
  83. data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
  84. data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
  85. data/lib/ucode/audit/formatters.rb +23 -0
  86. data/lib/ucode/audit/library_aggregator.rb +86 -0
  87. data/lib/ucode/audit/library_auditor.rb +105 -0
  88. data/lib/ucode/audit/release/emitter.rb +152 -0
  89. data/lib/ucode/audit/release/face_card.rb +93 -0
  90. data/lib/ucode/audit/release/formula_audits.rb +50 -0
  91. data/lib/ucode/audit/release/library_index_builder.rb +78 -0
  92. data/lib/ucode/audit/release/manifest_builder.rb +127 -0
  93. data/lib/ucode/audit/release.rb +42 -0
  94. data/lib/ucode/audit/ucd_only_reference.rb +81 -0
  95. data/lib/ucode/audit/universal_set_reference.rb +136 -0
  96. data/lib/ucode/audit.rb +31 -0
  97. data/lib/ucode/cli.rb +339 -33
  98. data/lib/ucode/commands/audit/browser_command.rb +82 -0
  99. data/lib/ucode/commands/audit/collection_command.rb +103 -0
  100. data/lib/ucode/commands/audit/compare_command.rb +188 -0
  101. data/lib/ucode/commands/audit/font_command.rb +140 -0
  102. data/lib/ucode/commands/audit/library_command.rb +87 -0
  103. data/lib/ucode/commands/audit/reference_builder.rb +64 -0
  104. data/lib/ucode/commands/audit.rb +20 -0
  105. data/lib/ucode/commands/block_feed.rb +73 -0
  106. data/lib/ucode/commands/canonical_build.rb +138 -0
  107. data/lib/ucode/commands/fetch.rb +37 -1
  108. data/lib/ucode/commands/release.rb +115 -0
  109. data/lib/ucode/commands/universal_set.rb +211 -0
  110. data/lib/ucode/commands.rb +5 -0
  111. data/lib/ucode/coordinator/indices.rb +11 -0
  112. data/lib/ucode/coordinator.rb +138 -5
  113. data/lib/ucode/error.rb +30 -2
  114. data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
  115. data/lib/ucode/fetch/font_fetcher.rb +16 -0
  116. data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
  117. data/lib/ucode/fetch.rb +7 -3
  118. data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
  119. data/lib/ucode/glyphs/real_fonts.rb +1 -0
  120. data/lib/ucode/glyphs/resolver.rb +62 -0
  121. data/lib/ucode/glyphs/source.rb +48 -0
  122. data/lib/ucode/glyphs/source_builder.rb +61 -0
  123. data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
  124. data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
  125. data/lib/ucode/glyphs/source_config.rb +104 -0
  126. data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
  127. data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
  128. data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
  129. data/lib/ucode/glyphs/sources.rb +20 -0
  130. data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
  131. data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
  132. data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
  133. data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
  134. data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
  135. data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
  136. data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
  137. data/lib/ucode/glyphs/universal_set.rb +45 -0
  138. data/lib/ucode/glyphs.rb +6 -0
  139. data/lib/ucode/models/audit/baseline.rb +6 -0
  140. data/lib/ucode/models/audit/block_summary.rb +7 -0
  141. data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
  142. data/lib/ucode/models/audit/release_face.rb +42 -0
  143. data/lib/ucode/models/audit/release_formula.rb +33 -0
  144. data/lib/ucode/models/audit/release_manifest.rb +43 -0
  145. data/lib/ucode/models/audit/release_universal_set.rb +37 -0
  146. data/lib/ucode/models/audit.rb +9 -0
  147. data/lib/ucode/models/block.rb +2 -0
  148. data/lib/ucode/models/build_report.rb +109 -0
  149. data/lib/ucode/models/codepoint/glyph.rb +42 -0
  150. data/lib/ucode/models/codepoint.rb +3 -0
  151. data/lib/ucode/models/glyph_source.rb +86 -0
  152. data/lib/ucode/models/glyph_source_map.rb +138 -0
  153. data/lib/ucode/models/specialist_font.rb +70 -0
  154. data/lib/ucode/models/specialist_font_manifest.rb +48 -0
  155. data/lib/ucode/models/unihan_entry.rb +81 -9
  156. data/lib/ucode/models/unihan_field.rb +21 -0
  157. data/lib/ucode/models/universal_set_entry.rb +47 -0
  158. data/lib/ucode/models/universal_set_manifest.rb +78 -0
  159. data/lib/ucode/models/validation_report.rb +99 -0
  160. data/lib/ucode/models.rb +9 -0
  161. data/lib/ucode/parsers/named_sequences.rb +5 -5
  162. data/lib/ucode/parsers/unihan.rb +50 -19
  163. data/lib/ucode/repo/aggregate_writer.rb +34 -2
  164. data/lib/ucode/repo/block_feed_emitter.rb +153 -0
  165. data/lib/ucode/repo/build_report_accumulator.rb +138 -0
  166. data/lib/ucode/repo/build_report_writer.rb +46 -0
  167. data/lib/ucode/repo/build_validator.rb +229 -0
  168. data/lib/ucode/repo/codepoint_writer.rb +50 -1
  169. data/lib/ucode/repo/paths.rb +8 -0
  170. data/lib/ucode/repo.rb +4 -0
  171. data/lib/ucode/version.rb +1 -1
  172. data/schema/block-feed.output.schema.yml +134 -0
  173. metadata +143 -2
  174. data/ucode.gemspec +0 -56
@@ -0,0 +1,138 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/cache"
6
+ require "ucode/commands"
7
+ require "ucode/coordinator"
8
+ require "ucode/database"
9
+ require "ucode/glyphs"
10
+ require "ucode/models"
11
+ require "ucode/repo"
12
+ require "ucode/version"
13
+ require "ucode/version_resolver"
14
+
15
+ module Ucode
16
+ module Commands
17
+ # `ucode canonical-build` — Mode 1's canonical Unicode dataset build
18
+ # (TODO 21). Single pass: enrich each codepoint via {Coordinator},
19
+ # resolve its glyph via the 4-tier {Ucode::Glyphs::Resolver}, write
20
+ # `index.json` + `glyph.svg` atomically, accumulate per-tier +
21
+ # per-block stats, and emit `output/build-report.json`.
22
+ #
23
+ # This is the v0.2 replacement for the v0.1 cell-extractor pipeline
24
+ # in {GlyphsCommand}. The two coexist until the v0.1 pipeline is
25
+ # removed (TODOs 17-19); CanonicalBuildCommand is the path forward
26
+ # for production dataset runs.
27
+ #
28
+ # == Pre-conditions (per TODO 21)
29
+ #
30
+ # 1. UCD + Unihan fetched for `version` (`ucode fetch ucd`,
31
+ # `ucode fetch unihan`).
32
+ # 2. Ucode::Database built for `version` (`ucode db build`).
33
+ # 3. Tier 1 fonts resolvable via the configured SourceConfig YAML.
34
+ # 4. Code Charts PDFs cached (for Pillar 1) — optional, only if
35
+ # pillar-1 sources are configured.
36
+ # 5. Last Resort UFO cloned (for Pillar 3) — optional, only if
37
+ # pillar-3 fallback is configured.
38
+ #
39
+ # Missing pre-conditions cause silent fallthrough to lower tiers;
40
+ # the build report's `by_tier` totals surface what ran.
41
+ class CanonicalBuildCommand
42
+ # @param version_intent [nil, :default, :latest, String]
43
+ # @param output_root [String, Pathname]
44
+ # @param source_config_path [String, Pathname, nil] override the
45
+ # Tier 1 font config YAML; nil uses the default
46
+ # (`config/unicode17_tier1_fonts.yml`).
47
+ # @param resolver [Ucode::Glyphs::Resolver, nil] inject a
48
+ # pre-built resolver (skips SourceBuilder); used by tests.
49
+ # @param validate [Boolean] run {Ucode::Repo::BuildValidator}
50
+ # after the build and emit `validation-report.json`. Default
51
+ # true; tests that don't care about validation pass false.
52
+ # @param baseline [Hash{String=>Integer}, nil] per-block expected
53
+ # built counts forwarded to the validator when `validate:` is
54
+ # true. nil skips the block_coverage check.
55
+ # @return [Hash] { version:, codepoint_count:, report_path:,
56
+ # validation_report_path:, validation_passed: }
57
+ def call(version_intent, output_root:, source_config_path: nil,
58
+ resolver: nil, validate: true, baseline: nil)
59
+ version = VersionResolver.resolve(version_intent)
60
+ root = Pathname.new(output_root)
61
+
62
+ resolved_resolver = resolver || build_resolver(version, source_config_path)
63
+ accumulator = Repo::BuildReportAccumulator.new(
64
+ unicode_version: version,
65
+ ucode_version: Ucode::VERSION,
66
+ )
67
+
68
+ coordinator = Coordinator.new
69
+ writer = Repo::CodepointWriter.new(
70
+ root,
71
+ parallel_workers: workers,
72
+ resolver: resolved_resolver,
73
+ observer: accumulator,
74
+ )
75
+
76
+ ucd_dir = Cache.ucd_dir(version)
77
+ unihan_dir = Cache.unihan_dir(version)
78
+ codepoint_count = iterate(coordinator, ucd_dir, unihan_dir, writer,
79
+ accumulator)
80
+
81
+ report = accumulator.to_report
82
+ report_path = Repo::BuildReportWriter.new(root).write(report)
83
+
84
+ result = {
85
+ version: version,
86
+ codepoint_count: codepoint_count,
87
+ report_path: report_path,
88
+ totals: report.totals.to_hash,
89
+ }
90
+ return result unless validate
91
+
92
+ merge_validation_result(result, root, version, baseline)
93
+ end
94
+
95
+ private
96
+
97
+ def merge_validation_result(result, root, version, baseline)
98
+ outcome = Repo::BuildValidator.new(
99
+ root, unicode_version: version, baseline: baseline,
100
+ ).validate
101
+ result.merge(
102
+ validation_report_path: outcome[:report_path],
103
+ validation_passed: outcome[:passed],
104
+ )
105
+ end
106
+
107
+ def workers
108
+ Ucode.configuration.parallel_workers
109
+ end
110
+
111
+ def iterate(coordinator, ucd_dir, unihan_dir, writer, accumulator)
112
+ count = 0
113
+ coordinator.each_codepoint(ucd_dir: ucd_dir, unihan_dir: unihan_dir) do |cp|
114
+ begin
115
+ writer.write(cp)
116
+ rescue StandardError => e
117
+ accumulator.record_failure(cp, e)
118
+ end
119
+ count += 1
120
+ end
121
+ count
122
+ end
123
+
124
+ def build_resolver(version, source_config_path)
125
+ database = Database.open(version)
126
+ config = Glyphs::SourceConfig.new(path: source_config_path_or_default(source_config_path))
127
+ builder = Glyphs::SourceBuilder.new(config: config, database: database)
128
+ Glyphs::Resolver.new(sources: builder.tier1_sources(install: false))
129
+ end
130
+
131
+ def source_config_path_or_default(path)
132
+ return Glyphs::SourceConfig::DEFAULT_PATH if path.nil?
133
+
134
+ Pathname.new(path)
135
+ end
136
+ end
137
+ end
138
+ end
@@ -4,17 +4,23 @@ require "pathname"
4
4
 
5
5
  require "ucode/cache"
6
6
  require "ucode/fetch"
7
+ require "ucode/glyphs/source_config"
7
8
  require "ucode/version_resolver"
8
9
 
9
10
  module Ucode
10
11
  module Commands
11
12
  # `ucode fetch` — downloads UCD/Unihan/Code-Charts sources into the
12
- # per-version cache. Three subactions: ucd, unihan, charts.
13
+ # per-version cache, plus the specialist Tier 1 fonts referenced by
14
+ # the curated source config.
13
15
  #
14
16
  # Thin shell over `Ucode::Fetch::*`. The command layer's job is to
15
17
  # resolve the version intent and format the result; the fetcher does
16
18
  # the network I/O.
17
19
  class FetchCommand
20
+ DEFAULT_SPECIALIST_FONTS_MANIFEST =
21
+ Ucode::Glyphs::SourceConfig::DEFAULT_PATH.dirname.join("specialist_fonts.yml")
22
+ private_constant :DEFAULT_SPECIALIST_FONTS_MANIFEST
23
+
18
24
  # @param version_intent [nil, :default, :latest, String]
19
25
  # @param force [Boolean]
20
26
  # @return [Hash] { version:, ucd_dir: }
@@ -48,6 +54,36 @@ module Ucode
48
54
  { version: version, downloaded: count }
49
55
  end
50
56
 
57
+ # Fetch specialist Tier 1 fonts listed in the manifest. Returns
58
+ # a structured summary; per-font detail lives on the returned
59
+ # results array (one {Fetch::FontFetcher::Result} per entry).
60
+ #
61
+ # @param manifest_path [String, Pathname, nil] defaults to
62
+ # `config/specialist_fonts.yml`.
63
+ # @param only_label [String, nil] restrict to one label.
64
+ # @param allow_proprietary [Boolean] required for non-OFL entries.
65
+ # @param dry_run [Boolean] plan only; no network or disk writes.
66
+ # @return [Hash] { manifest:, total:, downloaded:, skipped:,
67
+ # failed:, local:, planned:, results: }
68
+ def fetch_fonts(manifest_path: nil, only_label: nil, allow_proprietary: false,
69
+ dry_run: false)
70
+ path = Pathname.new(manifest_path || DEFAULT_SPECIALIST_FONTS_MANIFEST)
71
+ results = Fetch::SpecialistFontFetcher.new(
72
+ manifest_path: path,
73
+ allow_proprietary: allow_proprietary,
74
+ dry_run: dry_run,
75
+ ).call(only_label: only_label)
76
+
77
+ { manifest: path.to_s,
78
+ total: results.size,
79
+ downloaded: results.count(&:downloaded?),
80
+ skipped: results.count(&:skipped?),
81
+ failed: results.count(&:failed?),
82
+ local: results.count(&:local?),
83
+ planned: results.count(&:planned?),
84
+ results: results }
85
+ end
86
+
51
87
  private
52
88
 
53
89
  def default_block_first_cps(version)
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "time"
5
+
6
+ require "ucode/audit"
7
+ require "ucode/audit/library_auditor"
8
+ require "ucode/audit/release"
9
+ require "ucode/audit/emitter/paths"
10
+ require "ucode/commands/audit/reference_builder"
11
+
12
+ module Ucode
13
+ module Commands
14
+ # `ucode release` — assemble the fontist.org-consumable release
15
+ # tree (TODO 27).
16
+ #
17
+ # Walks a directory of per-formula font subdirectories, audits each
18
+ # via {Audit::LibraryAuditor}, and passes the resulting
19
+ # {Audit::Release::FormulaAudits} list to {Audit::Release::Emitter}.
20
+ #
21
+ # The release tree lives at `<output_root>/font_audit_release/`.
22
+ # The CI collector job invokes this after matrix-auditing every
23
+ # formula and pre-staging the universal-set directory.
24
+ class ReleaseCommand
25
+ FormulaSource = Struct.new(:slug, :path, keyword_init: true)
26
+
27
+ Result = Struct.new(:release_root, :formulas_total, :faces_total,
28
+ :formulas, :universal_set_available,
29
+ :library_index_written, :manifest_written,
30
+ :error, keyword_init: true)
31
+
32
+ # @param from [String, Pathname] directory containing one
33
+ # subdirectory per formula. Each subdirectory's name becomes
34
+ # the formula slug; its contents are audited via
35
+ # {Audit::LibraryAuditor} (recursive walk).
36
+ # @param output_root [String, Pathname] parent of the release
37
+ # root. Release tree lives at
38
+ # `<output_root>/font_audit_release/`.
39
+ # @param universal_set_root [String, Pathname, nil] location of
40
+ # the universal_glyph_set directory. Defaults to
41
+ # `<release_root>/universal_glyph_set` inside the release tree.
42
+ # @param unicode_version [String, nil] baseline UCD version.
43
+ # @param recursive [Boolean] recursively walk each formula
44
+ # subdirectory. Default true.
45
+ # @param brief [Boolean] cheap-extractor-only audit mode.
46
+ # @param browse [Boolean] also emit per-face HTML browsers.
47
+ # @param source_config_sha256 [String, nil] sha256 of the Tier 1
48
+ # source-config YAML (TODO 23). Recorded in the manifest for
49
+ # curation provenance.
50
+ # @param reference [Audit::CoverageReference, nil] baseline
51
+ # forwarded to every per-face audit (TODO 25).
52
+ # @param generated_at [String] ISO8601 timestamp. Default: now.
53
+ # @return [Result]
54
+ def call(from:, output_root:, universal_set_root: nil, unicode_version: nil,
55
+ recursive: true, brief: false, browse: true,
56
+ source_config_sha256: nil, reference: nil,
57
+ generated_at: Time.now.utc.iso8601)
58
+ formula_sources = discover_formulas(from)
59
+ formulas = formula_sources.map do |src|
60
+ summary = audit_formula(src.path, recursive: recursive,
61
+ unicode_version: unicode_version,
62
+ brief: brief, reference: reference)
63
+ Ucode::Audit::Release::FormulaAudits.new(slug: src.slug, summary: summary)
64
+ end
65
+
66
+ emitter = Ucode::Audit::Release::Emitter.new(
67
+ output_root: output_root,
68
+ universal_set_root: universal_set_root,
69
+ with_missing_glyph_pages: browse,
70
+ )
71
+ emit_result = emitter.emit(
72
+ formulas: formulas,
73
+ unicode_version: unicode_version,
74
+ generated_at: generated_at,
75
+ source_config_sha256: source_config_sha256,
76
+ )
77
+
78
+ Result.new(
79
+ release_root: emit_result.release_root,
80
+ formulas_total: emit_result.formulas_total,
81
+ faces_total: emit_result.faces_total,
82
+ formulas: formula_sources,
83
+ universal_set_available: emit_result.universal_set_available,
84
+ library_index_written: emit_result.library_index_written,
85
+ manifest_written: emit_result.manifest_written,
86
+ )
87
+ rescue StandardError => e
88
+ Result.new(error: "#{e.class}: #{e.message}")
89
+ end
90
+
91
+ private
92
+
93
+ def discover_formulas(from)
94
+ Pathname.new(from).children.select(&:directory?).sort.map do |d|
95
+ FormulaSource.new(slug: d.basename.to_s, path: d.to_s)
96
+ end
97
+ end
98
+
99
+ def audit_formula(path, recursive:, unicode_version:, brief:, reference:)
100
+ options = audit_options(unicode_version: unicode_version, brief: brief)
101
+ auditor = Ucode::Audit::LibraryAuditor.new(
102
+ path, recursive: recursive, options: options, reference: reference
103
+ )
104
+ auditor.audit
105
+ end
106
+
107
+ def audit_options(unicode_version:, brief:)
108
+ opts = {}
109
+ opts[:ucd_version] = unicode_version if unicode_version
110
+ opts[:audit_brief] = true if brief
111
+ opts
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,211 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+ require "json"
5
+ require "pathname"
6
+
7
+ require "ucode/cache"
8
+ require "ucode/commands"
9
+ require "ucode/coordinator"
10
+ require "ucode/database"
11
+ require "ucode/glyphs"
12
+ require "ucode/models"
13
+ require "ucode/version"
14
+ require "ucode/version_resolver"
15
+
16
+ module Ucode
17
+ module Commands
18
+ # `ucode universal-set` subcommands (TODOs 24 + 31):
19
+ #
20
+ # - `BuildCommand` — drain the codepoint stream through the
21
+ # 4-tier resolver and write glyphs + manifest. Runs
22
+ # {PreBuildCheck} first; emits {CoverageReport} after.
23
+ # - `PreCheckCommand` — standalone pre-build validation
24
+ # (config + fonts + coverage assertion).
25
+ # - `ReportCommand` — re-emit coverage reports from an
26
+ # existing manifest. Useful for iterating on curation without
27
+ # re-running the build.
28
+ # - `ValidateCommand` — post-build structural validation
29
+ # (manifest parses, every entry has a glyph, totals reconcile,
30
+ # provenance recorded).
31
+ #
32
+ # The set is the canonical reference for "what Unicode 17 looks
33
+ # like" — every assigned codepoint has exactly one glyph, with
34
+ # documented provenance. Audits (TODO 25) and the fontist.org
35
+ # consumer (TODO 27) read the manifest to answer "is this
36
+ # codepoint in the universal set?" without re-reading every SVG.
37
+ module UniversalSet
38
+ # `ucode universal-set build` action class. Pure Ruby — Thor
39
+ # (in `lib/ucode/cli.rb`) is responsible only for argument
40
+ # parsing and dispatch.
41
+ class BuildCommand
42
+ # @param version_intent [nil, :default, :latest, String]
43
+ # @param output_root [String, Pathname] directory that will
44
+ # hold `manifest.json`, `glyphs/`, `reports/`.
45
+ # @param source_config_path [String, Pathname, nil] override
46
+ # the Tier 1 font config YAML; nil uses the default at
47
+ # `Ucode::Glyphs::SourceConfig::DEFAULT_PATH`.
48
+ # @param resolver [Ucode::Glyphs::Resolver, nil] inject a
49
+ # pre-built resolver (skips SourceBuilder + PreBuildCheck);
50
+ # used by tests.
51
+ # @param block_filter [String, nil] limit the build to one
52
+ # block (canonical underscore form). Useful for partial
53
+ # rebuilds when iterating on Tier 1 curation.
54
+ # @param parallel_workers [Integer] forwarded to the Builder.
55
+ # Defaults to {Ucode::Configuration#parallel_workers}.
56
+ # @param skip_pre_check [Boolean] when true, skip the
57
+ # {PreBuildCheck} step. Used by tests that inject a custom
58
+ # resolver and don't have a real source config on disk.
59
+ # @return [Hash] { version:, manifest_path:, totals:,
60
+ # by_tier:, coverage:, validation: }
61
+ def call(version_intent, output_root:, source_config_path: nil,
62
+ resolver: nil, block_filter: nil,
63
+ parallel_workers: default_workers, skip_pre_check: false)
64
+ version = VersionResolver.resolve(version_intent)
65
+ root = Pathname.new(output_root)
66
+
67
+ config_path = source_config_path_or_default(source_config_path)
68
+ sha = source_config_sha256(config_path)
69
+ database = Database.open(version)
70
+
71
+ run_pre_check(config_path, database) unless skip_pre_check
72
+
73
+ resolved_resolver = resolver || build_resolver(version, config_path, database)
74
+
75
+ builder = Glyphs::UniversalSet::Builder.new(
76
+ output_root: root,
77
+ resolver: resolved_resolver,
78
+ unicode_version: version,
79
+ ucode_version: Ucode::VERSION,
80
+ source_config_sha256: sha,
81
+ parallel_workers: parallel_workers,
82
+ block_filter: block_filter,
83
+ )
84
+
85
+ manifest_path = builder.build(codepoint_enum(version))
86
+
87
+ manifest = Ucode::Models::UniversalSetManifest.from_hash(
88
+ JSON.parse(manifest_path.read),
89
+ )
90
+ coverage = Glyphs::UniversalSet::CoverageReport
91
+ .new(root, database: database).emit(manifest)
92
+ validation = Glyphs::UniversalSet::Validator
93
+ .new(root, unicode_version: version).validate
94
+ {
95
+ version: version,
96
+ manifest_path: manifest_path,
97
+ totals: manifest.totals.to_hash,
98
+ by_tier: manifest.by_tier,
99
+ coverage: coverage,
100
+ validation: validation,
101
+ }
102
+ end
103
+
104
+ private
105
+
106
+ def default_workers
107
+ Ucode.configuration.parallel_workers
108
+ end
109
+
110
+ def source_config_path_or_default(path)
111
+ return Glyphs::SourceConfig::DEFAULT_PATH if path.nil?
112
+
113
+ Pathname.new(path)
114
+ end
115
+
116
+ def source_config_sha256(path)
117
+ return "" unless path.exist?
118
+
119
+ Digest::SHA256.file(path).hexdigest
120
+ end
121
+
122
+ def run_pre_check(config_path, database)
123
+ Glyphs::UniversalSet::PreBuildCheck.new(
124
+ source_config_path: config_path,
125
+ database: database,
126
+ ).call
127
+ end
128
+
129
+ def build_resolver(_version, config_path, database)
130
+ config = Glyphs::SourceConfig.new(path: config_path)
131
+ builder = Glyphs::SourceBuilder.new(config: config, database: database)
132
+ Glyphs::Resolver.new(sources: builder.tier1_sources(install: false))
133
+ end
134
+
135
+ def codepoint_enum(version)
136
+ ucd_dir = Cache.ucd_dir(version)
137
+ unihan_dir = Cache.unihan_dir(version)
138
+ Coordinator.new.each_codepoint(ucd_dir: ucd_dir, unihan_dir: unihan_dir)
139
+ end
140
+ end
141
+
142
+ # `ucode universal-set pre-check` — standalone pre-build
143
+ # validation. Runs the three TODO 31 §Pre-build validation
144
+ # checks (config loads, fonts present, coverage assertion runs)
145
+ # without starting the 4-hour build.
146
+ class PreCheckCommand
147
+ # @param version_intent [nil, :default, :latest, String]
148
+ # @param source_config_path [String, Pathname, nil]
149
+ # @param cmaps [#covers?] injectable; defaults to
150
+ # RealFonts::CmapCache.
151
+ # @param font_locator [#locate] injectable; defaults to a
152
+ # fresh FontLocator.
153
+ # @return [Ucode::Glyphs::UniversalSet::PreBuildReport]
154
+ # @raise [Ucode::UniversalSetPreBuildError] when missing_fonts
155
+ # is non-empty or the config fails to load.
156
+ def call(version_intent, source_config_path: nil, cmaps: nil,
157
+ font_locator: nil)
158
+ version = VersionResolver.resolve(version_intent)
159
+ database = Database.open(version)
160
+ config_path = source_config_path || Glyphs::SourceConfig::DEFAULT_PATH
161
+
162
+ kwargs = { source_config_path: config_path, database: database }
163
+ kwargs[:cmaps] = cmaps if cmaps
164
+ kwargs[:font_locator] = font_locator if font_locator
165
+ Glyphs::UniversalSet::PreBuildCheck.new(**kwargs).call
166
+ end
167
+ end
168
+
169
+ # `ucode universal-set report` — re-emit coverage reports from
170
+ # an existing manifest. Useful when iterating on the manifest
171
+ # shape (or regenerating reports after a model change) without
172
+ # re-running the build.
173
+ class ReportCommand
174
+ # @param version_intent [nil, :default, :latest, String]
175
+ # @param output_root [String, Pathname] directory holding
176
+ # `manifest.json`.
177
+ # @return [Hash] the {CoverageReport#emit} payload.
178
+ def call(version_intent, output_root:)
179
+ version = VersionResolver.resolve(version_intent)
180
+ root = Pathname.new(output_root)
181
+ manifest_path = root.join("manifest.json")
182
+ raise Ucode::Error, "manifest not found at #{manifest_path}" unless manifest_path.exist?
183
+
184
+ manifest = Ucode::Models::UniversalSetManifest.from_hash(
185
+ JSON.parse(manifest_path.read),
186
+ )
187
+ database = Database.open(version)
188
+ Glyphs::UniversalSet::CoverageReport.new(root, database: database)
189
+ .emit(manifest)
190
+ end
191
+ end
192
+
193
+ # `ucode universal-set validate` — post-build structural
194
+ # validation. Reads `manifest.json` + `glyphs/` and runs the
195
+ # four checks (manifest_loadable, glyph_files_present,
196
+ # totals_reconcile, provenance_complete).
197
+ class ValidateCommand
198
+ # @param output_root [String, Pathname]
199
+ # @param version_intent [nil, :default, :latest, String]
200
+ # used only to stamp the report's unicode_version when the
201
+ # manifest's recorded value is missing.
202
+ # @return [Hash] the {Validator#validate} payload.
203
+ def call(output_root, version_intent: nil)
204
+ version = version_intent && VersionResolver.resolve(version_intent)
205
+ Glyphs::UniversalSet::Validator
206
+ .new(output_root, unicode_version: version).validate
207
+ end
208
+ end
209
+ end
210
+ end
211
+ end
@@ -14,6 +14,11 @@ module Ucode
14
14
  autoload :LookupCommand, "ucode/commands/lookup"
15
15
  autoload :CacheCommand, "ucode/commands/cache"
16
16
  autoload :BuildCommand, "ucode/commands/build"
17
+ autoload :CanonicalBuildCommand, "ucode/commands/canonical_build"
17
18
  autoload :FontCoverageCommand, "ucode/commands/font_coverage"
19
+ autoload :UniversalSet, "ucode/commands/universal_set"
20
+ autoload :Audit, "ucode/commands/audit"
21
+ autoload :ReleaseCommand, "ucode/commands/release"
22
+ autoload :BlockFeedCommand, "ucode/commands/block_feed"
18
23
  end
19
24
  end
@@ -28,6 +28,17 @@ module Ucode
28
28
  :standardized_variants,
29
29
  :names_list,
30
30
  :unihan,
31
+ :line_break,
32
+ :east_asian_width,
33
+ :vertical_orientation,
34
+ :grapheme_break,
35
+ :word_break,
36
+ :sentence_break,
37
+ :indic_positional,
38
+ :indic_syllabic,
39
+ :hangul_syllable_type,
40
+ :emoji_properties,
41
+ :extra_binary_properties,
31
42
  keyword_init: true,
32
43
  )
33
44
  end