ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
@@ -0,0 +1,25 @@
1
+ ---
2
+ layout: home
3
+
4
+ hero:
5
+ name: "ucode"
6
+ text: "Unicode Character Database"
7
+ tagline: Code charts, properties, relationships — for every assigned codepoint.
8
+ actions:
9
+ - theme: brand
10
+ text: Browse Plane 0
11
+ link: /plane/0
12
+ - theme: alt
13
+ text: Search
14
+ link: /search
15
+
16
+ features:
17
+ - title: Official glyphs
18
+ details: Vector SVG extracted directly from the Unicode Code Charts PDFs — no OCR.
19
+ - title: Human-curated relationships
20
+ details: Cross-references, see-also, compatibility equivalents from NamesList.txt.
21
+ - title: Full UCD properties
22
+ details: Every property from UnicodeData.txt, DerivedCoreProperties, Unihan, and more.
23
+ - title: Static + dynamic
24
+ details: ~363 static pages (planes + blocks) plus a single dynamic route for ~160k characters.
25
+ ---
@@ -0,0 +1,18 @@
1
+ {
2
+ "name": "ucode-site",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "type": "module",
6
+ "scripts": {
7
+ "dev": "vitepress dev",
8
+ "build": "vitepress build",
9
+ "preview": "vitepress preview"
10
+ },
11
+ "dependencies": {
12
+ "minisearch": "^7.1.0",
13
+ "vue": "^3.5.13"
14
+ },
15
+ "devDependencies": {
16
+ "vitepress": "^1.5.0"
17
+ }
18
+ }
@@ -0,0 +1,9 @@
1
+ ---
2
+ layout: doc
3
+ ---
4
+
5
+ <script setup>
6
+ import SearchView from "../components/SearchView.vue";
7
+ </script>
8
+
9
+ <SearchView />
data/lib/ucode/site.rb ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ # Site — Vitepress app generator under site/.
5
+ #
6
+ # Generates ~363 static pages (17 planes + ~346 blocks). Character
7
+ # detail is a single dynamic route that fetches JSON per codepoint.
8
+ module Site
9
+ autoload :Generator, "ucode/site/generator"
10
+ autoload :ConfigEmitter, "ucode/site/config_emitter"
11
+ autoload :SearchIndex, "ucode/site/search_index"
12
+ end
13
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "uri"
5
+ require "rubygems"
6
+
7
+ module Ucode
8
+ # Resolves a user-supplied version intent to a concrete version string.
9
+ #
10
+ # Three input modes:
11
+ #
12
+ # resolve(nil) # default_version from config
13
+ # resolve(:default) # default_version from config
14
+ # resolve(:latest) # probes listing_url, picks highest; falls
15
+ # # back to default on failure
16
+ # resolve("17.0.0") # explicit; validated against known_versions
17
+ #
18
+ module VersionResolver
19
+ class << self
20
+ # @param intent [nil, :default, :latest, String]
21
+ # @return [String]
22
+ def resolve(intent)
23
+ case intent
24
+ when nil, :default
25
+ Ucode.configuration.default_version
26
+ when :latest
27
+ probe_latest
28
+ else
29
+ validate!(intent)
30
+ intent
31
+ end
32
+ end
33
+
34
+ # Raise UnknownVersionError unless `version` is in known_versions.
35
+ # @param version [String]
36
+ # @return [void]
37
+ def validate!(version)
38
+ return if Ucode.configuration.known?(version)
39
+
40
+ raise Ucode::UnknownVersionError.new(
41
+ "UCD version #{version.inspect} is not recognized.",
42
+ context: { version: version,
43
+ known: Ucode.configuration.known_versions },
44
+ )
45
+ end
46
+
47
+ private
48
+
49
+ def probe_latest
50
+ versions = fetch_directory_versions
51
+ if versions.empty?
52
+ return fallback_latest("directory listing was empty")
53
+ end
54
+
55
+ highest = versions.max_by { |v| Gem::Version.new(v) }
56
+ return Ucode.configuration.default_version unless Ucode.configuration.known?(highest)
57
+
58
+ highest
59
+ rescue StandardError => e
60
+ fallback_latest(e.message)
61
+ end
62
+
63
+ def fallback_latest(reason)
64
+ warn "Ucode::VersionResolver: --latest probe failed (#{reason}); " \
65
+ "falling back to default #{Ucode.configuration.default_version.inspect}"
66
+ Ucode.configuration.default_version
67
+ end
68
+
69
+ def fetch_directory_versions
70
+ uri = URI(Ucode.configuration.listing_url)
71
+ html = Net::HTTP.get(uri)
72
+ html.scan(%r{href="(\d+\.\d+\.\d+)/?"}i).flatten.uniq
73
+ end
74
+ end
75
+ end
76
+ end
data/lib/ucode.rb ADDED
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "logger"
4
+ require "pathname"
5
+ require "lutaml/model"
6
+
7
+ # ucode — Unicode Character Database toolkit.
8
+ #
9
+ # Top-level hub. Every namespace under Ucode has its own hub file at
10
+ # lib/ucode/<ns>.rb declaring autoloads for its children. This file
11
+ # autoloads those hubs plus the flat Ucode::* classes. Files are loaded
12
+ # lazily on first reference.
13
+ module Ucode
14
+ autoload :VERSION, "ucode/version"
15
+
16
+ # Foundation
17
+ autoload :Config, "ucode/config"
18
+ autoload :Error, "ucode/error"
19
+ # Error subclasses are referenced independently of Ucode::Error in
20
+ # rescue / raise clauses throughout the library. Declaring autoloads
21
+ # for each ensures any one of them triggers the single load of
22
+ # error.rb (which defines all of them in one pass).
23
+ autoload :FetchError, "ucode/error"
24
+ autoload :NetworkError, "ucode/error"
25
+ autoload :ChecksumError, "ucode/error"
26
+ autoload :ParseError, "ucode/error"
27
+ autoload :MalformedLineError, "ucode/error"
28
+ autoload :UnknownPropertyError, "ucode/error"
29
+ autoload :LookupError, "ucode/error"
30
+ autoload :DatabaseMissingError, "ucode/error"
31
+ autoload :DatabaseSchemaError, "ucode/error"
32
+ autoload :UnknownVersionError, "ucode/error"
33
+ autoload :GlyphError, "ucode/error"
34
+ autoload :PdfRenderError, "ucode/error"
35
+ autoload :GridDetectionError, "ucode/error"
36
+ autoload :LastResortMissingError, "ucode/error"
37
+ autoload :EmbeddedFontsMissingError, "ucode/error"
38
+
39
+ # Infrastructure
40
+ autoload :Cache, "ucode/cache"
41
+ autoload :VersionResolver, "ucode/version_resolver"
42
+
43
+ # Namespace hubs (each hub declares its own child autoloads)
44
+ autoload :Fetch, "ucode/fetch"
45
+ autoload :Models, "ucode/models"
46
+ autoload :Parsers, "ucode/parsers"
47
+ autoload :Coordinator, "ucode/coordinator"
48
+ autoload :RangeEntry, "ucode/range_entry"
49
+ autoload :Index, "ucode/index"
50
+ autoload :Database, "ucode/database"
51
+ autoload :DbBuilder, "ucode/db_builder"
52
+ autoload :IndexBuilder, "ucode/index_builder"
53
+ autoload :Aggregator, "ucode/aggregator"
54
+ autoload :Repo, "ucode/repo"
55
+ autoload :Glyphs, "ucode/glyphs"
56
+ autoload :Audit, "ucode/audit"
57
+ autoload :Site, "ucode/site"
58
+ autoload :Commands, "ucode/commands"
59
+ autoload :Cli, "ucode/cli"
60
+
61
+ class << self
62
+ # @return [Ucode::Config]
63
+ def configuration
64
+ @configuration ||= Config.new
65
+ end
66
+
67
+ # @yield [config]
68
+ # @yieldparam config [Ucode::Config]
69
+ # @return [void]
70
+ def configure
71
+ yield(configuration)
72
+ end
73
+ end
74
+ end
data/ucode.gemspec ADDED
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/ucode/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "ucode"
7
+ spec.version = Ucode::VERSION
8
+ spec.authors = ["Ribose Inc."]
9
+ spec.email = ["open.source@ribose.com"]
10
+
11
+ spec.summary = "Unicode Character Database toolkit — lookup, dataset, glyphs, site"
12
+ spec.description = <<~DESC
13
+ ucode turns the Unicode Character Database (UCD) text files and the official
14
+ Unicode Code Charts into a structured, browsable dataset. For every assigned
15
+ codepoint it produces a JSON document with full UCD properties, the
16
+ human-curated relationships from NamesList.txt, Unihan readings, and
17
+ machine-computed references; an SVG of the official glyph vector-extracted
18
+ from the Code Charts; and a Vitepress site for browsing Plane, Block,
19
+ and Character.
20
+ DESC
21
+
22
+ spec.homepage = "https://github.com/fontist/ucode"
23
+ spec.license = "BSD-2-Clause"
24
+ spec.required_ruby_version = ">= 3.2.0"
25
+
26
+ spec.metadata["homepage_uri"] = spec.homepage
27
+ spec.metadata["source_code_uri"] = "https://github.com/fontist/ucode"
28
+ spec.metadata["changelog_uri"] = "https://github.com/fontist/ucode/blob/main/CHANGELOG.md"
29
+ spec.metadata["rubygems_mfa_required"] = "true"
30
+
31
+ spec.files = Dir.chdir(__dir__) do
32
+ `git ls-files -z`.split("\x0").reject do |f|
33
+ f == __FILE__ ||
34
+ f.start_with?(".") ||
35
+ f.start_with?("spec/") ||
36
+ f.start_with?("benchmark/") ||
37
+ f.start_with?("TODO.impl/") ||
38
+ f.start_with?("docs/") ||
39
+ f.start_with?("site/")
40
+ end
41
+ end
42
+
43
+ spec.bindir = "exe"
44
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
45
+ spec.require_paths = ["lib"]
46
+
47
+ spec.add_dependency "base64"
48
+ spec.add_dependency "fontisan", "~> 0.2"
49
+ spec.add_dependency "fontist", "~> 3.0"
50
+ spec.add_dependency "logger"
51
+ spec.add_dependency "lutaml-model", "~> 0.8"
52
+ spec.add_dependency "nokogiri", "~> 1.16"
53
+ spec.add_dependency "rubyzip", "~> 2.3"
54
+ spec.add_dependency "sqlite3", "~> 2.0"
55
+ spec.add_dependency "thor", "~> 1.3"
56
+ end
metadata ADDED
@@ -0,0 +1,404 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ucode
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ribose Inc.
8
+ bindir: exe
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: base64
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: fontisan
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '0.2'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '0.2'
40
+ - !ruby/object:Gem::Dependency
41
+ name: fontist
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: logger
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ - !ruby/object:Gem::Dependency
69
+ name: lutaml-model
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '0.8'
75
+ type: :runtime
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '0.8'
82
+ - !ruby/object:Gem::Dependency
83
+ name: nokogiri
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '1.16'
89
+ type: :runtime
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '1.16'
96
+ - !ruby/object:Gem::Dependency
97
+ name: rubyzip
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '2.3'
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '2.3'
110
+ - !ruby/object:Gem::Dependency
111
+ name: sqlite3
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '2.0'
117
+ type: :runtime
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '2.0'
124
+ - !ruby/object:Gem::Dependency
125
+ name: thor
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '1.3'
131
+ type: :runtime
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '1.3'
138
+ description: |
139
+ ucode turns the Unicode Character Database (UCD) text files and the official
140
+ Unicode Code Charts into a structured, browsable dataset. For every assigned
141
+ codepoint it produces a JSON document with full UCD properties, the
142
+ human-curated relationships from NamesList.txt, Unihan readings, and
143
+ machine-computed references; an SVG of the official glyph vector-extracted
144
+ from the Code Charts; and a Vitepress site for browsing Plane, Block,
145
+ and Character.
146
+ email:
147
+ - open.source@ribose.com
148
+ executables:
149
+ - ucode
150
+ extensions: []
151
+ extra_rdoc_files: []
152
+ files:
153
+ - CLAUDE.md
154
+ - Gemfile
155
+ - Gemfile.lock
156
+ - README.md
157
+ - Rakefile
158
+ - TODO.new/00-README.md
159
+ - TODO.new/01-pillar-terminology-alignment.md
160
+ - TODO.new/02-audit-schema-design.md
161
+ - TODO.new/03-directory-output-spec.md
162
+ - TODO.new/04-fontist-org-contract.md
163
+ - TODO.new/05-baseline-unicode17-coverage-audit.md
164
+ - TODO.new/06-audit-namespace-skeleton.md
165
+ - TODO.new/07-audit-models-port.md
166
+ - TODO.new/08-extractors-cheap-port.md
167
+ - TODO.new/09-extractors-expensive-port.md
168
+ - TODO.new/10-aggregations-ucd-rewrite.md
169
+ - TODO.new/11-differ-and-library-auditor-port.md
170
+ - TODO.new/12-formatters-port.md
171
+ - TODO.new/13-directory-emitter.md
172
+ - TODO.new/14-html-face-browser.md
173
+ - TODO.new/15-html-library-browser.md
174
+ - TODO.new/16-cli-audit-subcommands.md
175
+ - TODO.new/17-fontisan-cleanup-audit.md
176
+ - TODO.new/18-fontisan-cleanup-ucd.md
177
+ - TODO.new/19-fontisan-docs-update.md
178
+ - TODO.new/20-canonical-resolver-4-tier.md
179
+ - TODO.new/21-canonical-unicode17-build.md
180
+ - TODO.new/22-implementation-order.md
181
+ - UCODE_CHANGELOG.md
182
+ - exe/ucode
183
+ - lib/ucode.rb
184
+ - lib/ucode/aggregator.rb
185
+ - lib/ucode/audit.rb
186
+ - lib/ucode/audit/block_aggregator.rb
187
+ - lib/ucode/audit/codepoint_range_coalescer.rb
188
+ - lib/ucode/audit/context.rb
189
+ - lib/ucode/audit/discrepancy_detector.rb
190
+ - lib/ucode/audit/extractors.rb
191
+ - lib/ucode/audit/extractors/aggregations.rb
192
+ - lib/ucode/audit/extractors/base.rb
193
+ - lib/ucode/audit/extractors/color_capabilities.rb
194
+ - lib/ucode/audit/extractors/coverage.rb
195
+ - lib/ucode/audit/extractors/hinting.rb
196
+ - lib/ucode/audit/extractors/identity.rb
197
+ - lib/ucode/audit/extractors/licensing.rb
198
+ - lib/ucode/audit/extractors/metrics.rb
199
+ - lib/ucode/audit/extractors/opentype_layout.rb
200
+ - lib/ucode/audit/extractors/provenance.rb
201
+ - lib/ucode/audit/extractors/style.rb
202
+ - lib/ucode/audit/extractors/variation_detail.rb
203
+ - lib/ucode/audit/plane_aggregator.rb
204
+ - lib/ucode/audit/registry.rb
205
+ - lib/ucode/audit/script_aggregator.rb
206
+ - lib/ucode/cache.rb
207
+ - lib/ucode/cli.rb
208
+ - lib/ucode/commands.rb
209
+ - lib/ucode/commands/build.rb
210
+ - lib/ucode/commands/cache.rb
211
+ - lib/ucode/commands/fetch.rb
212
+ - lib/ucode/commands/font_coverage.rb
213
+ - lib/ucode/commands/glyphs.rb
214
+ - lib/ucode/commands/lookup.rb
215
+ - lib/ucode/commands/parse.rb
216
+ - lib/ucode/commands/site.rb
217
+ - lib/ucode/config.rb
218
+ - lib/ucode/coordinator.rb
219
+ - lib/ucode/coordinator/indices.rb
220
+ - lib/ucode/database.rb
221
+ - lib/ucode/db_builder.rb
222
+ - lib/ucode/error.rb
223
+ - lib/ucode/fetch.rb
224
+ - lib/ucode/fetch/code_charts.rb
225
+ - lib/ucode/fetch/http.rb
226
+ - lib/ucode/fetch/ucd_zip.rb
227
+ - lib/ucode/fetch/unihan_zip.rb
228
+ - lib/ucode/glyphs.rb
229
+ - lib/ucode/glyphs/cell_extractor.rb
230
+ - lib/ucode/glyphs/dvisvgm_renderer.rb
231
+ - lib/ucode/glyphs/embedded_fonts.rb
232
+ - lib/ucode/glyphs/embedded_fonts/catalog.rb
233
+ - lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb
234
+ - lib/ucode/glyphs/embedded_fonts/font_entry.rb
235
+ - lib/ucode/glyphs/embedded_fonts/renderer.rb
236
+ - lib/ucode/glyphs/embedded_fonts/source.rb
237
+ - lib/ucode/glyphs/embedded_fonts/svg.rb
238
+ - lib/ucode/glyphs/embedded_fonts/tounicode.rb
239
+ - lib/ucode/glyphs/embedded_fonts/writer.rb
240
+ - lib/ucode/glyphs/grid.rb
241
+ - lib/ucode/glyphs/grid_detector.rb
242
+ - lib/ucode/glyphs/last_resort.rb
243
+ - lib/ucode/glyphs/last_resort/cmap_index.rb
244
+ - lib/ucode/glyphs/last_resort/contents.rb
245
+ - lib/ucode/glyphs/last_resort/glif.rb
246
+ - lib/ucode/glyphs/last_resort/renderer.rb
247
+ - lib/ucode/glyphs/last_resort/source.rb
248
+ - lib/ucode/glyphs/last_resort/svg.rb
249
+ - lib/ucode/glyphs/last_resort/writer.rb
250
+ - lib/ucode/glyphs/monolith_page_map.rb
251
+ - lib/ucode/glyphs/mutool_renderer.rb
252
+ - lib/ucode/glyphs/page_renderer.rb
253
+ - lib/ucode/glyphs/path_bbox.rb
254
+ - lib/ucode/glyphs/pdf2svg_renderer.rb
255
+ - lib/ucode/glyphs/pdf_fetcher.rb
256
+ - lib/ucode/glyphs/pdftocairo_renderer.rb
257
+ - lib/ucode/glyphs/real_fonts.rb
258
+ - lib/ucode/glyphs/real_fonts/block_coverage.rb
259
+ - lib/ucode/glyphs/real_fonts/coverage_auditor.rb
260
+ - lib/ucode/glyphs/real_fonts/font_coverage_report.rb
261
+ - lib/ucode/glyphs/real_fonts/font_locator.rb
262
+ - lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb
263
+ - lib/ucode/glyphs/real_fonts/writer.rb
264
+ - lib/ucode/glyphs/writer.rb
265
+ - lib/ucode/index.rb
266
+ - lib/ucode/index_builder.rb
267
+ - lib/ucode/models.rb
268
+ - lib/ucode/models/audit.rb
269
+ - lib/ucode/models/audit/audit_axis.rb
270
+ - lib/ucode/models/audit/audit_diff.rb
271
+ - lib/ucode/models/audit/audit_report.rb
272
+ - lib/ucode/models/audit/baseline.rb
273
+ - lib/ucode/models/audit/block_summary.rb
274
+ - lib/ucode/models/audit/codepoint_detail.rb
275
+ - lib/ucode/models/audit/codepoint_range.rb
276
+ - lib/ucode/models/audit/codepoint_set_diff.rb
277
+ - lib/ucode/models/audit/color_capabilities.rb
278
+ - lib/ucode/models/audit/discrepancy.rb
279
+ - lib/ucode/models/audit/duplicate_group.rb
280
+ - lib/ucode/models/audit/embedding_type.rb
281
+ - lib/ucode/models/audit/field_change.rb
282
+ - lib/ucode/models/audit/fs_selection_flags.rb
283
+ - lib/ucode/models/audit/gasp_range.rb
284
+ - lib/ucode/models/audit/hinting.rb
285
+ - lib/ucode/models/audit/library_summary.rb
286
+ - lib/ucode/models/audit/licensing.rb
287
+ - lib/ucode/models/audit/metrics.rb
288
+ - lib/ucode/models/audit/named_instance.rb
289
+ - lib/ucode/models/audit/opentype_layout.rb
290
+ - lib/ucode/models/audit/plane_summary.rb
291
+ - lib/ucode/models/audit/script_coverage_row.rb
292
+ - lib/ucode/models/audit/script_features.rb
293
+ - lib/ucode/models/audit/script_summary.rb
294
+ - lib/ucode/models/audit/variation_detail.rb
295
+ - lib/ucode/models/bidi_bracket_pair.rb
296
+ - lib/ucode/models/bidi_mirroring.rb
297
+ - lib/ucode/models/binary_property_assignment.rb
298
+ - lib/ucode/models/block.rb
299
+ - lib/ucode/models/case_folding_rule.rb
300
+ - lib/ucode/models/cjk_radical.rb
301
+ - lib/ucode/models/codepoint.rb
302
+ - lib/ucode/models/codepoint/bidi.rb
303
+ - lib/ucode/models/codepoint/break_segmentation.rb
304
+ - lib/ucode/models/codepoint/case_folding.rb
305
+ - lib/ucode/models/codepoint/casing.rb
306
+ - lib/ucode/models/codepoint/decomposition.rb
307
+ - lib/ucode/models/codepoint/display.rb
308
+ - lib/ucode/models/codepoint/emoji.rb
309
+ - lib/ucode/models/codepoint/hangul.rb
310
+ - lib/ucode/models/codepoint/identifier.rb
311
+ - lib/ucode/models/codepoint/indic.rb
312
+ - lib/ucode/models/codepoint/joining.rb
313
+ - lib/ucode/models/codepoint/normalization.rb
314
+ - lib/ucode/models/codepoint/numeric_value.rb
315
+ - lib/ucode/models/name_alias.rb
316
+ - lib/ucode/models/named_sequence.rb
317
+ - lib/ucode/models/names_list_entry.rb
318
+ - lib/ucode/models/plane.rb
319
+ - lib/ucode/models/property_alias.rb
320
+ - lib/ucode/models/property_value_alias.rb
321
+ - lib/ucode/models/relationship.rb
322
+ - lib/ucode/models/relationship/compat_equiv.rb
323
+ - lib/ucode/models/relationship/cross_reference.rb
324
+ - lib/ucode/models/relationship/footnote.rb
325
+ - lib/ucode/models/relationship/informal_alias.rb
326
+ - lib/ucode/models/relationship/sample_sequence.rb
327
+ - lib/ucode/models/relationship/variation_sequence.rb
328
+ - lib/ucode/models/script.rb
329
+ - lib/ucode/models/special_casing_rule.rb
330
+ - lib/ucode/models/standardized_variant.rb
331
+ - lib/ucode/models/unihan_entry.rb
332
+ - lib/ucode/parsers.rb
333
+ - lib/ucode/parsers/auxiliary.rb
334
+ - lib/ucode/parsers/base.rb
335
+ - lib/ucode/parsers/bidi_brackets.rb
336
+ - lib/ucode/parsers/bidi_mirroring.rb
337
+ - lib/ucode/parsers/blocks.rb
338
+ - lib/ucode/parsers/case_folding.rb
339
+ - lib/ucode/parsers/cjk_radicals.rb
340
+ - lib/ucode/parsers/derived_age.rb
341
+ - lib/ucode/parsers/derived_core_properties.rb
342
+ - lib/ucode/parsers/extracted_properties.rb
343
+ - lib/ucode/parsers/name_aliases.rb
344
+ - lib/ucode/parsers/named_sequences.rb
345
+ - lib/ucode/parsers/names_list.rb
346
+ - lib/ucode/parsers/property_aliases.rb
347
+ - lib/ucode/parsers/property_value_aliases.rb
348
+ - lib/ucode/parsers/script_extensions.rb
349
+ - lib/ucode/parsers/scripts.rb
350
+ - lib/ucode/parsers/special_casing.rb
351
+ - lib/ucode/parsers/standardized_variants.rb
352
+ - lib/ucode/parsers/unicode_data.rb
353
+ - lib/ucode/parsers/unicode_data/hangul_name.rb
354
+ - lib/ucode/parsers/unihan.rb
355
+ - lib/ucode/range_entry.rb
356
+ - lib/ucode/repo.rb
357
+ - lib/ucode/repo/aggregate_writer.rb
358
+ - lib/ucode/repo/atomic_writes.rb
359
+ - lib/ucode/repo/codepoint_writer.rb
360
+ - lib/ucode/repo/paths.rb
361
+ - lib/ucode/site.rb
362
+ - lib/ucode/site/config_emitter.rb
363
+ - lib/ucode/site/generator.rb
364
+ - lib/ucode/site/search_index.rb
365
+ - lib/ucode/site/template/.gitignore
366
+ - lib/ucode/site/template/.vitepress/config.ts
367
+ - lib/ucode/site/template/.vitepress/theme/index.js
368
+ - lib/ucode/site/template/char/[codepoint].md
369
+ - lib/ucode/site/template/components/BlockView.vue
370
+ - lib/ucode/site/template/components/CharView.vue
371
+ - lib/ucode/site/template/components/PlaneView.vue
372
+ - lib/ucode/site/template/components/SearchView.vue
373
+ - lib/ucode/site/template/index.md
374
+ - lib/ucode/site/template/package.json
375
+ - lib/ucode/site/template/search.md
376
+ - lib/ucode/version.rb
377
+ - lib/ucode/version_resolver.rb
378
+ - ucode.gemspec
379
+ homepage: https://github.com/fontist/ucode
380
+ licenses:
381
+ - BSD-2-Clause
382
+ metadata:
383
+ homepage_uri: https://github.com/fontist/ucode
384
+ source_code_uri: https://github.com/fontist/ucode
385
+ changelog_uri: https://github.com/fontist/ucode/blob/main/CHANGELOG.md
386
+ rubygems_mfa_required: 'true'
387
+ rdoc_options: []
388
+ require_paths:
389
+ - lib
390
+ required_ruby_version: !ruby/object:Gem::Requirement
391
+ requirements:
392
+ - - ">="
393
+ - !ruby/object:Gem::Version
394
+ version: 3.2.0
395
+ required_rubygems_version: !ruby/object:Gem::Requirement
396
+ requirements:
397
+ - - ">="
398
+ - !ruby/object:Gem::Version
399
+ version: '0'
400
+ requirements: []
401
+ rubygems_version: 3.6.9
402
+ specification_version: 4
403
+ summary: Unicode Character Database toolkit — lookup, dataset, glyphs, site
404
+ test_files: []