ucode 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +72 -0
  3. data/Gemfile.lock +2 -2
  4. data/TODO.full/00-README.md +116 -0
  5. data/TODO.full/01-panglyph-vision.md +112 -0
  6. data/TODO.full/02-panglyph-repo-bootstrap.md +184 -0
  7. data/TODO.full/03-panglyph-font-builder.md +201 -0
  8. data/TODO.full/04-panglyph-publish-pipeline.md +126 -0
  9. data/TODO.full/05-ucode-0-1-1-release.md +139 -0
  10. data/TODO.full/06-fontisan-remove-audit.md +142 -0
  11. data/TODO.full/07-fontisan-remove-ucd.md +125 -0
  12. data/TODO.full/08-archive-private-bin-build.md +143 -0
  13. data/TODO.full/09-archive-public-structure.md +164 -0
  14. data/TODO.full/10-fontist-org-woff-glyphs.md +131 -0
  15. data/TODO.full/11-fontist-org-audit-coverage.md +140 -0
  16. data/TODO.full/12-implementation-order.md +216 -0
  17. data/TODO.full/13-fontisan-font-writer-api.md +189 -0
  18. data/TODO.full/14-fontisan-table-writers.md +66 -0
  19. data/TODO.full/15-panglyph-builder-real.md +82 -0
  20. data/TODO.full/16-archive-public-sync-workflows.md +167 -0
  21. data/TODO.full/17-fontist-org-font-picker.md +73 -0
  22. data/TODO.full/18-comprehensive-spec-coverage.md +64 -0
  23. data/TODO.full/19-ucode-0-1-2-patch.md +32 -0
  24. data/TODO.full/20-fontisan-0-2-23-release.md +52 -0
  25. data/TODO.new/00-README.md +30 -0
  26. data/TODO.new/23-universal-glyph-set-source-map.md +312 -0
  27. data/TODO.new/24-universal-glyph-set-build.md +189 -0
  28. data/TODO.new/25-font-audit-against-universal-set.md +195 -0
  29. data/TODO.new/26-missing-glyph-reporter.md +189 -0
  30. data/TODO.new/27-fontist-org-consumer-integration.md +200 -0
  31. data/TODO.new/28-implementation-order-update.md +187 -0
  32. data/TODO.new/29-universal-set-curation-uc17.md +312 -0
  33. data/TODO.new/30-tier1-font-acquisition.md +241 -0
  34. data/TODO.new/31-universal-set-production-build.md +205 -0
  35. data/TODO.new/32-uc17-coverage-matrix.md +165 -0
  36. data/TODO.new/33-specialist-font-acquisition-refresh.md +138 -0
  37. data/TODO.new/34-pillar2-content-stream-correlator.md +147 -0
  38. data/TODO.new/35-universal-set-production-run.md +160 -0
  39. data/TODO.new/36-per-font-coverage-audit.md +145 -0
  40. data/TODO.new/37-coverage-highlight-reporter.md +125 -0
  41. data/TODO.new/38-fontist-org-glyph-consumer.md +141 -0
  42. data/TODO.new/39-implementation-order-update-32-38.md +258 -0
  43. data/TODO.new/40-archive-private-uses-ucode-audit.md +124 -0
  44. data/TODO.new/41-ucode-unicode-archive-bridge.md +160 -0
  45. data/config/specialist_fonts.yml +102 -0
  46. data/config/unicode17_tier1_fonts.yml +42 -0
  47. data/config/unicode17_universal_glyph_set.yml +293 -0
  48. data/lib/ucode/audit/block_aggregator.rb +57 -29
  49. data/lib/ucode/audit/browser/face_page.rb +128 -0
  50. data/lib/ucode/audit/browser/glyph_panel.rb +124 -0
  51. data/lib/ucode/audit/browser/library_page.rb +74 -0
  52. data/lib/ucode/audit/browser/missing_glyph_page.rb +87 -0
  53. data/lib/ucode/audit/browser/template.rb +47 -0
  54. data/lib/ucode/audit/browser/templates/face.css +200 -0
  55. data/lib/ucode/audit/browser/templates/face.html.erb +41 -0
  56. data/lib/ucode/audit/browser/templates/face.js +298 -0
  57. data/lib/ucode/audit/browser/templates/library.css +119 -0
  58. data/lib/ucode/audit/browser/templates/library.html.erb +42 -0
  59. data/lib/ucode/audit/browser/templates/library.js +99 -0
  60. data/lib/ucode/audit/browser/templates/missing_glyph_page.css +119 -0
  61. data/lib/ucode/audit/browser/templates/missing_glyph_page.html.erb +58 -0
  62. data/lib/ucode/audit/browser/templates/missing_glyph_page.js +2 -0
  63. data/lib/ucode/audit/browser.rb +32 -0
  64. data/lib/ucode/audit/context.rb +27 -1
  65. data/lib/ucode/audit/coverage_reference.rb +103 -0
  66. data/lib/ucode/audit/differ.rb +121 -0
  67. data/lib/ucode/audit/emitter/block_emitter.rb +52 -0
  68. data/lib/ucode/audit/emitter/codepoint_emitter.rb +87 -0
  69. data/lib/ucode/audit/emitter/collection_emitter.rb +80 -0
  70. data/lib/ucode/audit/emitter/face_directory.rb +212 -0
  71. data/lib/ucode/audit/emitter/glyph_emitter.rb +48 -0
  72. data/lib/ucode/audit/emitter/index_emitter.rb +149 -0
  73. data/lib/ucode/audit/emitter/library_emitter.rb +96 -0
  74. data/lib/ucode/audit/emitter/paths.rb +312 -0
  75. data/lib/ucode/audit/emitter/plane_emitter.rb +29 -0
  76. data/lib/ucode/audit/emitter/script_emitter.rb +29 -0
  77. data/lib/ucode/audit/emitter.rb +29 -0
  78. data/lib/ucode/audit/extractors/aggregations.rb +31 -2
  79. data/lib/ucode/audit/face_auditor.rb +86 -0
  80. data/lib/ucode/audit/formatters/audit_diff_text.rb +112 -0
  81. data/lib/ucode/audit/formatters/audit_text.rb +411 -0
  82. data/lib/ucode/audit/formatters/color.rb +48 -0
  83. data/lib/ucode/audit/formatters/library_summary_text.rb +98 -0
  84. data/lib/ucode/audit/formatters/text_formatter.rb +83 -0
  85. data/lib/ucode/audit/formatters.rb +23 -0
  86. data/lib/ucode/audit/library_aggregator.rb +86 -0
  87. data/lib/ucode/audit/library_auditor.rb +105 -0
  88. data/lib/ucode/audit/release/emitter.rb +152 -0
  89. data/lib/ucode/audit/release/face_card.rb +93 -0
  90. data/lib/ucode/audit/release/formula_audits.rb +50 -0
  91. data/lib/ucode/audit/release/library_index_builder.rb +78 -0
  92. data/lib/ucode/audit/release/manifest_builder.rb +127 -0
  93. data/lib/ucode/audit/release.rb +42 -0
  94. data/lib/ucode/audit/ucd_only_reference.rb +81 -0
  95. data/lib/ucode/audit/universal_set_reference.rb +136 -0
  96. data/lib/ucode/audit.rb +31 -0
  97. data/lib/ucode/cli.rb +339 -33
  98. data/lib/ucode/commands/audit/browser_command.rb +82 -0
  99. data/lib/ucode/commands/audit/collection_command.rb +103 -0
  100. data/lib/ucode/commands/audit/compare_command.rb +188 -0
  101. data/lib/ucode/commands/audit/font_command.rb +140 -0
  102. data/lib/ucode/commands/audit/library_command.rb +87 -0
  103. data/lib/ucode/commands/audit/reference_builder.rb +64 -0
  104. data/lib/ucode/commands/audit.rb +20 -0
  105. data/lib/ucode/commands/block_feed.rb +73 -0
  106. data/lib/ucode/commands/canonical_build.rb +138 -0
  107. data/lib/ucode/commands/fetch.rb +37 -1
  108. data/lib/ucode/commands/release.rb +115 -0
  109. data/lib/ucode/commands/universal_set.rb +211 -0
  110. data/lib/ucode/commands.rb +5 -0
  111. data/lib/ucode/coordinator/indices.rb +11 -0
  112. data/lib/ucode/coordinator.rb +138 -5
  113. data/lib/ucode/error.rb +30 -2
  114. data/lib/ucode/fetch/font_fetcher/result.rb +39 -0
  115. data/lib/ucode/fetch/font_fetcher.rb +16 -0
  116. data/lib/ucode/fetch/specialist_font_fetcher.rb +280 -0
  117. data/lib/ucode/fetch.rb +7 -3
  118. data/lib/ucode/glyphs/real_fonts/cmap_cache.rb +74 -0
  119. data/lib/ucode/glyphs/real_fonts.rb +1 -0
  120. data/lib/ucode/glyphs/resolver.rb +62 -0
  121. data/lib/ucode/glyphs/source.rb +48 -0
  122. data/lib/ucode/glyphs/source_builder.rb +61 -0
  123. data/lib/ucode/glyphs/source_config/coverage_assertion.rb +79 -0
  124. data/lib/ucode/glyphs/source_config/gap_report.rb +54 -0
  125. data/lib/ucode/glyphs/source_config.rb +104 -0
  126. data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +63 -0
  127. data/lib/ucode/glyphs/sources/pillar3_last_resort.rb +51 -0
  128. data/lib/ucode/glyphs/sources/tier1_real_font.rb +104 -0
  129. data/lib/ucode/glyphs/sources.rb +20 -0
  130. data/lib/ucode/glyphs/universal_set/builder.rb +161 -0
  131. data/lib/ucode/glyphs/universal_set/coverage_report.rb +139 -0
  132. data/lib/ucode/glyphs/universal_set/idempotency.rb +86 -0
  133. data/lib/ucode/glyphs/universal_set/manifest_accumulator.rb +195 -0
  134. data/lib/ucode/glyphs/universal_set/manifest_writer.rb +61 -0
  135. data/lib/ucode/glyphs/universal_set/pre_build_check.rb +197 -0
  136. data/lib/ucode/glyphs/universal_set/validator.rb +204 -0
  137. data/lib/ucode/glyphs/universal_set.rb +45 -0
  138. data/lib/ucode/glyphs.rb +6 -0
  139. data/lib/ucode/models/audit/baseline.rb +6 -0
  140. data/lib/ucode/models/audit/block_summary.rb +7 -0
  141. data/lib/ucode/models/audit/codepoint_provenance.rb +39 -0
  142. data/lib/ucode/models/audit/release_face.rb +42 -0
  143. data/lib/ucode/models/audit/release_formula.rb +33 -0
  144. data/lib/ucode/models/audit/release_manifest.rb +43 -0
  145. data/lib/ucode/models/audit/release_universal_set.rb +37 -0
  146. data/lib/ucode/models/audit.rb +9 -0
  147. data/lib/ucode/models/block.rb +2 -0
  148. data/lib/ucode/models/build_report.rb +109 -0
  149. data/lib/ucode/models/codepoint/glyph.rb +42 -0
  150. data/lib/ucode/models/codepoint.rb +3 -0
  151. data/lib/ucode/models/glyph_source.rb +86 -0
  152. data/lib/ucode/models/glyph_source_map.rb +138 -0
  153. data/lib/ucode/models/specialist_font.rb +70 -0
  154. data/lib/ucode/models/specialist_font_manifest.rb +48 -0
  155. data/lib/ucode/models/unihan_entry.rb +81 -9
  156. data/lib/ucode/models/unihan_field.rb +21 -0
  157. data/lib/ucode/models/universal_set_entry.rb +47 -0
  158. data/lib/ucode/models/universal_set_manifest.rb +78 -0
  159. data/lib/ucode/models/validation_report.rb +99 -0
  160. data/lib/ucode/models.rb +9 -0
  161. data/lib/ucode/parsers/named_sequences.rb +5 -5
  162. data/lib/ucode/parsers/unihan.rb +50 -19
  163. data/lib/ucode/repo/aggregate_writer.rb +34 -2
  164. data/lib/ucode/repo/block_feed_emitter.rb +153 -0
  165. data/lib/ucode/repo/build_report_accumulator.rb +138 -0
  166. data/lib/ucode/repo/build_report_writer.rb +46 -0
  167. data/lib/ucode/repo/build_validator.rb +229 -0
  168. data/lib/ucode/repo/codepoint_writer.rb +50 -1
  169. data/lib/ucode/repo/paths.rb +8 -0
  170. data/lib/ucode/repo.rb +4 -0
  171. data/lib/ucode/version.rb +1 -1
  172. data/schema/block-feed.output.schema.yml +134 -0
  173. metadata +143 -2
  174. data/ucode.gemspec +0 -56
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a3b414e4e08c27e99b6d90ff8af3c62a8be0b4ac753c1f856bff3bf0558b7a73
4
- data.tar.gz: 0afafb185a45a754b2fca7fad30a8b95f75c9f36861c20c6619f7e767af47d72
3
+ metadata.gz: 2bf2b84f7b6350471c31bd58b724534f5bde2a0c1e2bf44cfe41cd36a8ab6165
4
+ data.tar.gz: 7506b44a00233009e01dd5668d375082ec42f9ab2c6e3d0d319722949599496a
5
5
  SHA512:
6
- metadata.gz: 1baf8cfab570ef9113c54ba978f191226bd025f01b3bc7a565b1a4cd2ca3f54030b74e046c254c6e94089f5cc3582b8d8244a852021c92154d4ffcd872b2d729
7
- data.tar.gz: 332d8247601762a92319f31301b88b69af264f75bc09e01f52c14ecfd6d25969e1d4706c1030b36116b1941b1e65b1728ed44870804492ac00c4ae3e6941fccb
6
+ metadata.gz: 217b105ac13f10fcdaa51b39dbb23f8db3ba2b2026a0c9db4533df4ffc0434a06b1cbe89f078b6be2faa7603e0b8bf06b9a2e1ea5c775480cb213822ec8456e7
7
+ data.tar.gz: 1a0866921d181bb0b1cb8aa5113ea887a5a33eb1e0661d4d40eeec00e341d4b07cfb98f5ba9127b7bbcf834d79394c713a1f44a414b2759ba4bc08e96ef491db
data/CHANGELOG.md ADDED
@@ -0,0 +1,72 @@
1
+ # Changelog
2
+
3
+ All notable changes to ucode will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [0.1.1] — 2026-06-XX
9
+
10
+ ### Added
11
+
12
+ - **BlockFeedEmitter**: emits a compact per-block Unicode data feed
13
+ (`unicode-blocks.json`, `unicode-version.json`, `unicode/blocks/<slug>.json`).
14
+ Renamed from `FontistConsumerEmitter` — the data is plain Unicode
15
+ data, not consumer-specific.
16
+ - **Schema**: `schema/block-feed.output.schema.yml` documents the
17
+ canonical shape of the block-feed output as a YAML-encoded JSON
18
+ Schema. Acts as the contract between ucode (producer) and any
19
+ consumer of the feed.
20
+ - **Categorized Unihan model**: 8 typed collections matching the
21
+ Unihan standard file structure (Dictionary Indices, Readings,
22
+ Variants, Numeric Values, Radical-Stroke Counts, Dictionary-Like
23
+ Data, IRG Sources, Other Mappings). Each category is a collection
24
+ of `UnihanField { name, values }` records.
25
+ - **Real-font Tier 1 source map** for the universal glyph set (~17
26
+ specialists + Noto family default).
27
+ - **Pillar 1 + Pillar 2 glyph extraction** via the 4-tier canonical
28
+ resolver.
29
+ - **Per-codepoint properties from `extracted/` and `auxiliary/` UCD
30
+ files**: `display` (East Asian Width, Line Break Class, Vertical
31
+ Orientation), `break_segmentation` (Grapheme/Word/Sentence),
32
+ `indic` (Positional + Syllabic Category), `hangul` (Syllable Type),
33
+ `emoji` (6 booleans), full `binary_properties` list (now includes
34
+ PropList entries beyond DerivedCoreProperties).
35
+ - **Audit subsystem** ported from fontisan: `ucode audit font`,
36
+ `ucode audit library`, `ucode audit compare`, `ucode audit browser`.
37
+ - **Universal-set build infrastructure**: `ucode universal-set build`,
38
+ `pre-check`, `validate`, `report`.
39
+ - **Block-feed CLI command**: `ucode block-feed` (renamed from
40
+ `ucode fontist-consumer`).
41
+
42
+ ### Fixed
43
+
44
+ - `Parsers::NamedSequences` field order — real UAX#44 is
45
+ `Name; cp1 cp2 cp3 ...`, not the inverse.
46
+ - `BlockFeedEmitter` canonical path — uses `blocks/<ID>/index.json`
47
+ (matches AggregateWriter output), not `blocks/<ID>.json`.
48
+ - fontist.org `PropertyDetailPage.vue` route params — combining and
49
+ bidiclass routes were `:cc` / `:bc` but the page read `route.params.code`;
50
+ unified to `:code`.
51
+ - Vite dev server case-sensitive `codepoints/` path — fetch now
52
+ lowercases the hex from the route URL.
53
+ - Vue route-watcher for per-char data — top-level `await` only ran
54
+ once on initial mount; navigation between `/unicode/char/X` and
55
+ `/unicode/char/Y` left `charData` and `detail` refs stale.
56
+ - `scrollBehavior` added to the router — page navigation now resets
57
+ scroll to top instead of preserving the prior page's position.
58
+
59
+ ### Removed
60
+
61
+ - All references to "fontist-consumer" naming from ucode (now
62
+ "block-feed"). The data emitted is plain Unicode data, not
63
+ consumer-specific. Renames affect:
64
+ - `lib/ucode/repo/fontist_consumer_emitter.rb` → `block_feed_emitter.rb`
65
+ - `lib/ucode/commands/fontist_consumer.rb` → `block_feed.rb`
66
+ - `Repo::FontistConsumerEmitter` → `Repo::BlockFeedEmitter`
67
+ - `Commands::FontistConsumerCommand` → `Commands::BlockFeedCommand`
68
+ - CLI command `ucode fontist-consumer` → `ucode block-feed`
69
+
70
+ ## [0.1.0] — 2026-XX-XX
71
+
72
+ Initial release.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ucode (0.1.0)
4
+ ucode (0.1.1)
5
5
  base64
6
6
  fontisan (~> 0.2)
7
7
  fontist (~> 3.0)
@@ -393,7 +393,7 @@ CHECKSUMS
393
393
  thor (1.5.0) sha256=e3a9e55fe857e44859ce104a84675ab6e8cd59c650a49106a05f55f136425e73
394
394
  track_open_instances (0.1.15) sha256=7f0e48821e6b4c881daaa40fb1583e308937c22a9c84883c150b399c3b5c3029
395
395
  tzinfo (2.0.6) sha256=8daf828cc77bcf7d63b0e3bdb6caa47e2272dcfaf4fbfe46f8c3a9df087a829b
396
- ucode (0.1.0)
396
+ ucode (0.1.1)
397
397
  unibuf (0.1.2) sha256=2453cb9ff97b4a80ebb6ed4fec88669da847decf23f4f2b2d2490dcb270fc061
398
398
  unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
399
399
  unicode-emoji (4.2.0) sha256=519e69150f75652e40bf736106cfbc8f0f73aa3fb6a65afe62fefa7f80b0f80f
@@ -0,0 +1,116 @@
1
+ # TODO.full — Master plan: panglyph + ucode release + fontisan cleanup + full pipeline
2
+
3
+ This is the **shipping plan** for the full Fontist Unicode 17 stack:
4
+
5
+ 1. **ucode** — Unicode data + audit tool + universal glyph set (SVGs)
6
+ 2. **panglyph** (NEW) — assembles ucode's universal set into a single redistributable font
7
+ 3. **fontisan** — slim font parsing + WOFF conversion library (audit + UCD removed)
8
+ 4. **fontist-archive-private** — CI matrix runs ucode audit + fontisan convert per formula
9
+ 5. **fontist-archive-public** — public artifacts: `coverage/` + `woff/` + `unicode/` + `panglyph/`
10
+ 6. **fontist.org** — per-font unicode browser using WOFF (open-license) + ucode audit (all)
11
+
12
+ The plan below covers everything that remains after TODO.new/ (which built the
13
+ infrastructure). TODO.full/ is about wiring it all into a shippable product.
14
+
15
+ ## Directives from user
16
+
17
+ - **D0** — Define + build "Fontist universal glyph set for Unicode 17" as a
18
+ single font, in a new repo `panglyph`. Uses fontisan to extract outlines
19
+ from source fonts, assembles into one redistributable font file.
20
+ - **D1** — Publish ucode as a patch release (0.1.0 → 0.1.1).
21
+ - **D2** — Clean up fontisan: remove `AuditCommand` and UCD/UCDXML
22
+ subsystems (now in ucode). Keep: font parsing primitives, ConvertCommand.
23
+ - **D3** — Wire `fontist-archive-private` to use fontisan (WOFF) + ucode
24
+ (audit) for ALL fonts. Wire `fontist-archive-public` to host all artifacts.
25
+ - **D4** — Update fontist.org to consume fontist-archive-public: render
26
+ per-font glyphs from WOFF (open-license), show per-font coverage from
27
+ ucode audit (all fonts).
28
+
29
+ ## File index
30
+
31
+ ### Foundation
32
+
33
+ - [01 — Panglyph vision: what the universal font is, why it exists](01-panglyph-vision.md)
34
+ - [02 — Panglyph repo bootstrap (gem skeleton, CLI, CI)](02-panglyph-repo-bootstrap.md)
35
+ - [03 — Panglyph font builder (outline extract + assemble + write)](03-panglyph-font-builder.md)
36
+ - [04 — Panglyph publish pipeline (release artifacts to fontist-archive-public)](04-panglyph-publish-pipeline.md)
37
+
38
+ ### Releases
39
+
40
+ - [05 — ucode 0.1.1 patch release](05-ucode-0-1-1-release.md)
41
+
42
+ ### Cleanup
43
+
44
+ - [06 — fontisan: remove AuditCommand (and audit/ namespace)](06-fontisan-remove-audit.md)
45
+ - [07 — fontisan: remove UCD/UCDXML subsystem](07-fontisan-remove-ucd.md)
46
+
47
+ ### Pipeline
48
+
49
+ - [08 — fontist-archive-private bin/build uses ucode audit + fontisan convert](08-archive-private-bin-build.md)
50
+ - [09 — fontist-archive-public structure: unicode/ + panglyph/ + coverage/ + woff/](09-archive-public-structure.md)
51
+
52
+ ### Consumer
53
+
54
+ - [10 — fontist.org: per-font WOFF glyph rendering (open-license)](10-fontist-org-woff-glyphs.md)
55
+ - [11 — fontist.org: per-font ucode audit rendering (ALL fonts)](11-fontist-org-audit-coverage.md)
56
+
57
+ ### Sequencing
58
+
59
+ - [12 — Implementation order (all directives)](12-implementation-order.md)
60
+
61
+ ## Critical path (high-level)
62
+
63
+ ```
64
+ ┌──────────────────────────────────┐
65
+ │ 05 ucode 0.1.1 patch release │ ← unblocks all downstream
66
+ └──────────────┬───────────────────┘
67
+
68
+ ┌──────────────┴───────────────────┐
69
+ │ │
70
+ ▼ ▼
71
+ ┌────────────────────────┐ ┌────────────────────────┐
72
+ │ 06 fontisan audit │ │ 01–04 panglyph │
73
+ │ removal │ │ (new repo) │
74
+ │ 07 fontisan UCD │ └────────────┬───────────┘
75
+ │ removal │ │
76
+ └────────────┬───────────┘ │
77
+ │ │
78
+ ▼ │
79
+ ┌────────────────────────┐ │
80
+ │ 08 archive-private │ │
81
+ │ bin/build refactor │ │
82
+ └────────────┬───────────┘ │
83
+ │ │
84
+ ▼ ▼
85
+ ┌────────────────────────┐ ┌────────────────────────┐
86
+ │ 09 archive-public │◄────────┤ 04 panglyph publish │
87
+ │ structure │ │ to archive-public │
88
+ └────────────┬───────────┘ └────────────────────────┘
89
+
90
+
91
+ ┌────────────────────────┐
92
+ │ 10 fontist.org WOFF │
93
+ │ 11 fontist.org audit │
94
+ └────────────────────────┘
95
+ ```
96
+
97
+ ## Repositories involved
98
+
99
+ | Repo | Role | Branch / state |
100
+ |---|---|---|
101
+ | `fontist/ucode` | Unicode data + audit tool + universal glyph set | `fix/fontist-consumer-canonical-path` (PR #43) |
102
+ | `fontist/panglyph` (NEW) | Universal font assembler | not yet created |
103
+ | `fontist/fontisan` | Font parser + WOFF converter | `fix/ucdxml-real-shape-parsing` (cleanup target) |
104
+ | `fontist/fontist-archive-private` | CI build env (per-formula) | main (uses old fontisan AuditCommand) |
105
+ | `fontist/fontist-archive-public` | Public artifacts | main (no unicode/ or panglyph/ yet) |
106
+ | `fontist/fontist.github.io` | Consumer site | `fix/unicode-char-page-fields` (PR #45) |
107
+
108
+ ## Conventions
109
+
110
+ - **PR-per-TODO** unless tightly coupled.
111
+ - **Merging requires explicit user authorization per PR.**
112
+ - **Never push tags directly.** Tag + `rake release` only after explicit user sign-off.
113
+ - **No AI attribution** in commits, PRs, or release notes.
114
+ - **Original block names verbatim** (`CJK_Ext_A`, never slugified) in source data.
115
+ - **Vector-only glyph extraction.** No OCR.
116
+ - Branch naming: `<repo-scope>/<track-slug>` (e.g. `audit/remove-audit-command`).
@@ -0,0 +1,112 @@
1
+ # 01 — Panglyph vision: the Fontist universal Unicode 17 font
2
+
3
+ ## What it is
4
+
5
+ **panglyph** is a single redistributable font file that covers every
6
+ assigned Unicode 17.0 codepoint (~299,382 glyphs). It's the **materialized
7
+ form of ucode's universal glyph set**: where ucode produces one SVG per
8
+ codepoint (sourced from Tier 1 fonts via fontisan), panglyph assembles
9
+ those outlines into one OpenType/TrueType font that any application can
10
+ install and use as a Unicode 17 fallback.
11
+
12
+ Think "Noto Sans for everything" — except:
13
+ - Sourced from many Tier 1 fonts (Noto family + FSung for CJK, Lentariso
14
+ for Sidetic, Kedebideri for Beria Erfe, NotoSerifTaiYo, UniHieroglyphica,
15
+ Egyptian Text, Symbola, BabelStone, etc.)
16
+ - One font file, multiple script sources
17
+ - Built reproducibly from ucode's per-block coverage matrix (TODO 32 in
18
+ TODO.new/)
19
+ - Open License (OFL for the assembled font, matching source licenses)
20
+
21
+ ## Why it exists
22
+
23
+ Today, no single font covers Unicode 17. Noto Sans comes closest but
24
+ misses:
25
+ - Rare UC17 additions (Sidetic, Beria Erfe, Tolong Siki, Tai Yo)
26
+ - Egyptian Hieroglyphs Extended-B (needs UniHieroglyphica v16)
27
+ - CJK Extension J (needs FSung)
28
+ - Symbols for Legacy Computing Supplement (needs BabelStone)
29
+
30
+ Users who want "Unicode 17 everywhere" must install 10+ fonts. panglyph
31
+ collapses that to one.
32
+
33
+ ## Use cases
34
+
35
+ 1. **Browser fallback.** Browsers can be configured to use panglyph as
36
+ the last-resort font. Any codepoint not covered by the active font
37
+ gets panglyph's outline instead of tofu.
38
+ 2. **OS-level Unicode 17 coverage.** Install once, every app gets full
39
+ Unicode 17 rendering.
40
+ 3. **Print/PDF embedding.** Designers can embed a single font for any
41
+ Unicode 17 text.
42
+ 4. **Search/indexing tools.** Text extraction tools that need glyph
43
+ recognition for rare scripts get a uniform source.
44
+ 5. **Fontist.org specimen rendering.** When fontist.org shows a char
45
+ that the active font misses, fall back to panglyph instead of tofu.
46
+
47
+ ## What it is NOT
48
+
49
+ - **Not a replacement for source fonts.** panglyph is a fallback. Active
50
+ fonts (the user's chosen Noto Sans, FSung, etc.) take priority.
51
+ - **Not a font designer's tool.** It's a redistribution mechanism.
52
+ - **Not a copy of Noto.** Different sources, different coverage policy.
53
+ - **Not color emoji.** Vector outlines only (same as ucode's universal
54
+ glyph set). Color emoji would need a separate TODO.
55
+
56
+ ## Source policy
57
+
58
+ panglyph is assembled from ucode's universal-set manifest
59
+ (`output/universal_glyph_set/manifest.json`). For each codepoint:
60
+
61
+ 1. Look up the Tier 1 source font (per `config/unicode17_universal_glyph_set.yml`)
62
+ 2. Open the source font via fontisan
63
+ 3. Extract the glyf outline (or CFF charstring for OTF) for the codepoint's GID
64
+ 4. Copy the outline into panglyph's glyf table at the same GID
65
+
66
+ Tier 2 (PDF-embedded extraction via correlate-v4 generalization) and
67
+ Tier 3 (Last Resort tofu) are fallbacks when Tier 1 is unavailable.
68
+ Tier 3 produces the recognizable "box with codepoint hex label" glyph
69
+ familiar from Last Resort Font.
70
+
71
+ ## Licensing
72
+
73
+ panglyph's assembled font is **OFL**. Every source font in the universal
74
+ set must be OFL (or compatible — Apache, MIT, BSD, CC0, UFL, Bitstream,
75
+ GUST, CC-BY). Specialist fonts with proprietary licenses cannot be
76
+ included; their codepoints fall back to pillar 2 or pillar 3.
77
+
78
+ This is enforced at ucode's universal-set pre-check (TODO 35 in TODO.new/).
79
+
80
+ ## Output formats
81
+
82
+ | Format | Purpose |
83
+ |---|---|
84
+ | `panglyph-unicode17.ttf` | Installable system font (largest compatibility) |
85
+ | `panglyph-unicode17.woff2` | Web font (smaller, used by fontist.org) |
86
+ | `panglyph-unicode17.otf` | CFF-based variant (smaller for CJK-heavy ranges) |
87
+
88
+ All three are produced by the build pipeline.
89
+
90
+ ## Versioning
91
+
92
+ - **`panglyph-unicode17-17.0.0.ofl`** — pinned to UCD 17.0.0
93
+ - **`panglyph-unicode17-17.0.1.ofl`** — patch release (e.g. fixed an
94
+ extraction bug); same Unicode data, regenerated glyphs
95
+ - **`panglyph-unicode17-17.1.0.ofl`** — minor (new Tier 1 fonts added)
96
+ - **`panglyph-unicode18-18.0.0.ofl`** — major (UCD 18 baseline)
97
+
98
+ The first version tag is `v17.0.0` to match UCD.
99
+
100
+ ## Deliverables
101
+
102
+ - One redistributable font file (TTF + WOFF2 + OTF)
103
+ - A SHA-256 manifest of source contributions (provenance)
104
+ - An OFL license file
105
+ - A coverage report (per-block % sourced from Tier 1 / Pillar 2 / Pillar 3)
106
+
107
+ ## References
108
+
109
+ - [TODO.new/32](../TODO.new/32-uc17-coverage-matrix.md) — Tier 1 source policy
110
+ - [TODO.new/35](../TODO.new/35-universal-set-production-run.md) — universal-set SVGs (panglyph input)
111
+ - [TODO.full/02](02-panglyph-repo-bootstrap.md) — repo skeleton
112
+ - [TODO.full/03](03-panglyph-font-builder.md) — build implementation
@@ -0,0 +1,184 @@
1
+ # 02 — Panglyph repo bootstrap
2
+
3
+ ## Goal
4
+
5
+ Create the `fontist/panglyph` repository skeleton: gem structure, CLI
6
+ entry point, CI workflow, README. No build logic yet (that's TODO 03) —
7
+ just the scaffold that lets development proceed.
8
+
9
+ ## Why a separate repo
10
+
11
+ panglyph consumes ucode's universal set as INPUT and produces a font
12
+ as OUTPUT. The assembly logic (fontisan outline extraction + font
13
+ writing + OFL packaging) is a distinct concern from ucode's Unicode
14
+ data + audit work.
15
+
16
+ Separation keeps:
17
+ - ucode focused on Unicode data + audit
18
+ - panglyph focused on font assembly
19
+ - fontisan focused on parsing + conversion primitives
20
+
21
+ ## Repository structure
22
+
23
+ ```
24
+ panglyph/
25
+ ├── README.md # what panglyph is, how to build
26
+ ├── LICENSE # OFL for the assembled font
27
+ ├── CHANGELOG.md # version history
28
+ ├── Gemfile # dev deps
29
+ ├── panglyph.gemspec # gem metadata
30
+ ├── lib/
31
+ │ ├── panglyph.rb # top-level namespace + autoloads
32
+ │ └── panglyph/
33
+ │ ├── version.rb # VERSION = "17.0.0"
34
+ │ ├── cli.rb # Thor CLI entry point
35
+ │ ├── builder.rb # orchestrates the full build
36
+ │ ├── outline_extractor.rb # extracts glyf/CFF outline via fontisan
37
+ │ ├── font_assembler.rb # assembles outlines into a font
38
+ │ ├── woff2_writer.rb # converts TTF → WOFF2 (via fontisan)
39
+ │ ├── manifest_reader.rb # reads ucode's universal-set manifest
40
+ │ ├── coverage_report.rb # emits per-block source breakdown
41
+ │ └── publisher.rb # pushes artifacts to fontist-archive-public
42
+ ├── exe/
43
+ │ └── panglyph # CLI executable
44
+ ├── spec/
45
+ │ ├── spec_helper.rb
46
+ │ └── panglyph/
47
+ │ ├── builder_spec.rb
48
+ │ ├── outline_extractor_spec.rb
49
+ │ └── ...
50
+ ├── data/
51
+ │ └── OFL.txt # OFL license template
52
+ ├── docs/
53
+ │ ├── architecture.md # build pipeline reference
54
+ │ └── coverage-policy.md # which fonts cover which blocks
55
+ └── .github/
56
+ └── workflows/
57
+ └── build.yml # CI: build panglyph on tag push
58
+ ```
59
+
60
+ ## CLI surface
61
+
62
+ ```
63
+ $ panglyph --help
64
+ panglyph commands:
65
+ panglyph build [UCD_VERSION] # Build panglyph-unicode<version>.<ext>
66
+ panglyph help [COMMAND] # Describe subcommands
67
+ panglyph manifest [UCD_VERSION] # Print source contributions manifest
68
+ panglyph publish [VERSION] # Publish built artifacts to archive-public
69
+ panglyph validate [FONT_PATH] # Verify the built font against the universal set
70
+ panglyph version # Print panglyph version
71
+ ```
72
+
73
+ ### `panglyph build`
74
+
75
+ ```
76
+ $ panglyph build 17.0.0
77
+ → reads ucode's universal-set manifest (must already be built)
78
+ → for each codepoint: extracts outline from the Tier 1 source font
79
+ → assembles outlines into a TTF in memory
80
+ → writes panglyph-unicode17-17.0.0.ttf
81
+ → converts to panglyph-unicode17-17.0.0.woff2
82
+ → emits coverage-report.json (per-block source breakdown)
83
+ ```
84
+
85
+ ### `panglyph validate`
86
+
87
+ ```
88
+ $ panglyph validate panglyph-unicode17-17.0.0.ttf
89
+ → cmap-walks the built font
90
+ → compares against ucode's universal-set codepoint list
91
+ → reports: 299382 codepoints, 297415 covered (99.3%), 1967 missing
92
+ → lists missing codepoints with their Tier 1 source (so the build can be fixed)
93
+ ```
94
+
95
+ ### `panglyph publish`
96
+
97
+ ```
98
+ $ panglyph publish 17.0.0
99
+ → clones fontist-archive-public (shallow)
100
+ → copies panglyph-unicode17-17.0.0.{ttf,woff2} to archive-public/panglyph/
101
+ → updates archive-public/panglyph/manifest.json
102
+ → commits + pushes via GHA bot token
103
+ ```
104
+
105
+ ## Dependencies
106
+
107
+ ```ruby
108
+ # panglyph.gemspec
109
+ spec.add_dependency "fontisan", "~> 0.3" # font parsing + writing primitives
110
+ spec.add_dependency "ucode", "~> 0.1" # universal-set manifest reader
111
+ spec.add_dependency "thor", "~> 1.3" # CLI
112
+ spec.add_dependency "json", "~> 2.0"
113
+ spec.add_dependency "rubyzip", "~> 2.3" # OFL packaging
114
+ ```
115
+
116
+ fontisan needs font-WRITING primitives added (it currently only reads +
117
+ converts). TODO 03 lists what fontisan needs to expose.
118
+
119
+ ## CI workflow
120
+
121
+ `.github/workflows/build.yml`:
122
+
123
+ ```yaml
124
+ name: Build panglyph
125
+
126
+ on:
127
+ push:
128
+ tags: ['v*']
129
+ workflow_dispatch:
130
+ inputs:
131
+ ucd_version:
132
+ description: 'UCD version to build (e.g. 17.0.0)'
133
+ required: true
134
+ default: '17.0.0'
135
+
136
+ jobs:
137
+ build:
138
+ runs-on: ubuntu-latest
139
+ steps:
140
+ - uses: actions/checkout@v4
141
+ - uses: ruby/setup-ruby@v1
142
+ with:
143
+ ruby-version: '3.4'
144
+ bundler-cache: true
145
+ - name: Fetch ucode universal set
146
+ run: |
147
+ git clone --depth 1 https://github.com/fontist/ucode ucode-repo
148
+ cd ucode-repo && bundle exec ucode universal-set build ${{ inputs.ucd_version }}
149
+ tar czf /tmp/universal-set.tar.gz output/universal_glyph_set/
150
+ - name: Build panglyph
151
+ run: |
152
+ bundle exec panglyph build ${{ inputs.ucd_version }} \
153
+ --universal-set=/tmp/universal-set.tar.gz
154
+ - name: Validate
155
+ run: bundle exec panglyph validate panglyph-unicode*.ttf
156
+ - name: Publish to fontist-archive-public
157
+ env:
158
+ GH_TOKEN: ${{ secrets.ARCHIVE_PUBLIC_BOT_TOKEN }}
159
+ run: bundle exec panglyph publish ${{ inputs.ucd_version }}
160
+ - uses: actions/upload-artifact@v4
161
+ with:
162
+ name: panglyph-${{ inputs.ucd_version }}
163
+ path: |
164
+ panglyph-unicode17-*.ttf
165
+ panglyph-unicode17-*.woff2
166
+ coverage-report.json
167
+ ```
168
+
169
+ ## Acceptance
170
+
171
+ - [ ] `fontist/panglyph` repo exists
172
+ - [ ] README.md explains what panglyph is + how to build (per TODO 01)
173
+ - [ ] LICENSE is OFL
174
+ - [ ] `bundle exec panglyph version` prints `17.0.0`
175
+ - [ ] `bundle exec panglyph --help` lists build/manifest/publish/validate
176
+ - [ ] CI workflow file exists and is syntactically valid
177
+ - [ ] One trivial spec passes (`spec/panglyph/version_spec.rb`)
178
+ - [ ] Repo is public
179
+
180
+ ## References
181
+
182
+ - [TODO 01](01-panglyph-vision.md) — vision
183
+ - [TODO 03](03-panglyph-font-builder.md) — build implementation
184
+ - [TODO.new/35](../TODO.new/35-universal-set-production-run.md) — input format