ucode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. checksums.yaml +7 -0
  2. data/CLAUDE.md +211 -0
  3. data/Gemfile +22 -0
  4. data/Gemfile.lock +406 -0
  5. data/README.md +469 -0
  6. data/Rakefile +18 -0
  7. data/TODO.new/00-README.md +66 -0
  8. data/TODO.new/01-pillar-terminology-alignment.md +69 -0
  9. data/TODO.new/02-audit-schema-design.md +255 -0
  10. data/TODO.new/03-directory-output-spec.md +203 -0
  11. data/TODO.new/04-fontist-org-contract.md +173 -0
  12. data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
  13. data/TODO.new/06-audit-namespace-skeleton.md +105 -0
  14. data/TODO.new/07-audit-models-port.md +132 -0
  15. data/TODO.new/08-extractors-cheap-port.md +113 -0
  16. data/TODO.new/09-extractors-expensive-port.md +99 -0
  17. data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
  18. data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
  19. data/TODO.new/12-formatters-port.md +115 -0
  20. data/TODO.new/13-directory-emitter.md +147 -0
  21. data/TODO.new/14-html-face-browser.md +144 -0
  22. data/TODO.new/15-html-library-browser.md +102 -0
  23. data/TODO.new/16-cli-audit-subcommands.md +142 -0
  24. data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
  25. data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
  26. data/TODO.new/19-fontisan-docs-update.md +155 -0
  27. data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
  28. data/TODO.new/21-canonical-unicode17-build.md +148 -0
  29. data/TODO.new/22-implementation-order.md +176 -0
  30. data/UCODE_CHANGELOG.md +97 -0
  31. data/exe/ucode +8 -0
  32. data/lib/ucode/aggregator.rb +77 -0
  33. data/lib/ucode/audit/block_aggregator.rb +90 -0
  34. data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
  35. data/lib/ucode/audit/context.rb +137 -0
  36. data/lib/ucode/audit/discrepancy_detector.rb +213 -0
  37. data/lib/ucode/audit/extractors/aggregations.rb +70 -0
  38. data/lib/ucode/audit/extractors/base.rb +21 -0
  39. data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
  40. data/lib/ucode/audit/extractors/coverage.rb +55 -0
  41. data/lib/ucode/audit/extractors/hinting.rb +199 -0
  42. data/lib/ucode/audit/extractors/identity.rb +65 -0
  43. data/lib/ucode/audit/extractors/licensing.rb +75 -0
  44. data/lib/ucode/audit/extractors/metrics.rb +108 -0
  45. data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
  46. data/lib/ucode/audit/extractors/provenance.rb +34 -0
  47. data/lib/ucode/audit/extractors/style.rb +88 -0
  48. data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
  49. data/lib/ucode/audit/extractors.rb +31 -0
  50. data/lib/ucode/audit/plane_aggregator.rb +37 -0
  51. data/lib/ucode/audit/registry.rb +63 -0
  52. data/lib/ucode/audit/script_aggregator.rb +92 -0
  53. data/lib/ucode/audit.rb +27 -0
  54. data/lib/ucode/cache.rb +113 -0
  55. data/lib/ucode/cli.rb +272 -0
  56. data/lib/ucode/commands/build.rb +68 -0
  57. data/lib/ucode/commands/cache.rb +46 -0
  58. data/lib/ucode/commands/fetch.rb +62 -0
  59. data/lib/ucode/commands/font_coverage.rb +57 -0
  60. data/lib/ucode/commands/glyphs.rb +136 -0
  61. data/lib/ucode/commands/lookup.rb +65 -0
  62. data/lib/ucode/commands/parse.rb +62 -0
  63. data/lib/ucode/commands/site.rb +33 -0
  64. data/lib/ucode/commands.rb +19 -0
  65. data/lib/ucode/config.rb +110 -0
  66. data/lib/ucode/coordinator/indices.rb +34 -0
  67. data/lib/ucode/coordinator.rb +397 -0
  68. data/lib/ucode/database.rb +214 -0
  69. data/lib/ucode/db_builder.rb +107 -0
  70. data/lib/ucode/error.rb +96 -0
  71. data/lib/ucode/fetch/code_charts.rb +57 -0
  72. data/lib/ucode/fetch/http.rb +83 -0
  73. data/lib/ucode/fetch/ucd_zip.rb +57 -0
  74. data/lib/ucode/fetch/unihan_zip.rb +57 -0
  75. data/lib/ucode/fetch.rb +14 -0
  76. data/lib/ucode/glyphs/cell_extractor.rb +130 -0
  77. data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
  78. data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
  79. data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
  80. data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
  81. data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
  82. data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
  83. data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
  84. data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
  85. data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
  86. data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
  87. data/lib/ucode/glyphs/grid.rb +30 -0
  88. data/lib/ucode/glyphs/grid_detector.rb +165 -0
  89. data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
  90. data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
  91. data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
  92. data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
  93. data/lib/ucode/glyphs/last_resort/source.rb +125 -0
  94. data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
  95. data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
  96. data/lib/ucode/glyphs/last_resort.rb +36 -0
  97. data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
  98. data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
  99. data/lib/ucode/glyphs/page_renderer.rb +221 -0
  100. data/lib/ucode/glyphs/path_bbox.rb +62 -0
  101. data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
  102. data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
  103. data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
  104. data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
  105. data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
  106. data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
  107. data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
  108. data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
  109. data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
  110. data/lib/ucode/glyphs/real_fonts.rb +32 -0
  111. data/lib/ucode/glyphs/writer.rb +250 -0
  112. data/lib/ucode/glyphs.rb +27 -0
  113. data/lib/ucode/index.rb +106 -0
  114. data/lib/ucode/index_builder.rb +94 -0
  115. data/lib/ucode/models/audit/audit_axis.rb +30 -0
  116. data/lib/ucode/models/audit/audit_diff.rb +77 -0
  117. data/lib/ucode/models/audit/audit_report.rb +137 -0
  118. data/lib/ucode/models/audit/baseline.rb +32 -0
  119. data/lib/ucode/models/audit/block_summary.rb +72 -0
  120. data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
  121. data/lib/ucode/models/audit/codepoint_range.rb +39 -0
  122. data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
  123. data/lib/ucode/models/audit/color_capabilities.rb +91 -0
  124. data/lib/ucode/models/audit/discrepancy.rb +38 -0
  125. data/lib/ucode/models/audit/duplicate_group.rb +23 -0
  126. data/lib/ucode/models/audit/embedding_type.rb +81 -0
  127. data/lib/ucode/models/audit/field_change.rb +28 -0
  128. data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
  129. data/lib/ucode/models/audit/gasp_range.rb +63 -0
  130. data/lib/ucode/models/audit/hinting.rb +99 -0
  131. data/lib/ucode/models/audit/library_summary.rb +40 -0
  132. data/lib/ucode/models/audit/licensing.rb +48 -0
  133. data/lib/ucode/models/audit/metrics.rb +111 -0
  134. data/lib/ucode/models/audit/named_instance.rb +41 -0
  135. data/lib/ucode/models/audit/opentype_layout.rb +38 -0
  136. data/lib/ucode/models/audit/plane_summary.rb +31 -0
  137. data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
  138. data/lib/ucode/models/audit/script_features.rb +28 -0
  139. data/lib/ucode/models/audit/script_summary.rb +54 -0
  140. data/lib/ucode/models/audit/variation_detail.rb +42 -0
  141. data/lib/ucode/models/audit.rb +50 -0
  142. data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
  143. data/lib/ucode/models/bidi_mirroring.rb +19 -0
  144. data/lib/ucode/models/binary_property_assignment.rb +26 -0
  145. data/lib/ucode/models/block.rb +36 -0
  146. data/lib/ucode/models/case_folding_rule.rb +23 -0
  147. data/lib/ucode/models/cjk_radical.rb +23 -0
  148. data/lib/ucode/models/codepoint/bidi.rb +28 -0
  149. data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
  150. data/lib/ucode/models/codepoint/case_folding.rb +25 -0
  151. data/lib/ucode/models/codepoint/casing.rb +32 -0
  152. data/lib/ucode/models/codepoint/decomposition.rb +27 -0
  153. data/lib/ucode/models/codepoint/display.rb +24 -0
  154. data/lib/ucode/models/codepoint/emoji.rb +29 -0
  155. data/lib/ucode/models/codepoint/hangul.rb +20 -0
  156. data/lib/ucode/models/codepoint/identifier.rb +30 -0
  157. data/lib/ucode/models/codepoint/indic.rb +20 -0
  158. data/lib/ucode/models/codepoint/joining.rb +20 -0
  159. data/lib/ucode/models/codepoint/normalization.rb +35 -0
  160. data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
  161. data/lib/ucode/models/codepoint.rb +122 -0
  162. data/lib/ucode/models/name_alias.rb +21 -0
  163. data/lib/ucode/models/named_sequence.rb +19 -0
  164. data/lib/ucode/models/names_list_entry.rb +38 -0
  165. data/lib/ucode/models/plane.rb +36 -0
  166. data/lib/ucode/models/property_alias.rb +24 -0
  167. data/lib/ucode/models/property_value_alias.rb +26 -0
  168. data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
  169. data/lib/ucode/models/relationship/cross_reference.rb +17 -0
  170. data/lib/ucode/models/relationship/footnote.rb +24 -0
  171. data/lib/ucode/models/relationship/informal_alias.rb +18 -0
  172. data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
  173. data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
  174. data/lib/ucode/models/relationship.rb +57 -0
  175. data/lib/ucode/models/script.rb +41 -0
  176. data/lib/ucode/models/special_casing_rule.rb +28 -0
  177. data/lib/ucode/models/standardized_variant.rb +24 -0
  178. data/lib/ucode/models/unihan_entry.rb +23 -0
  179. data/lib/ucode/models.rb +47 -0
  180. data/lib/ucode/parsers/auxiliary.rb +26 -0
  181. data/lib/ucode/parsers/base.rb +137 -0
  182. data/lib/ucode/parsers/bidi_brackets.rb +41 -0
  183. data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
  184. data/lib/ucode/parsers/blocks.rb +63 -0
  185. data/lib/ucode/parsers/case_folding.rb +53 -0
  186. data/lib/ucode/parsers/cjk_radicals.rb +102 -0
  187. data/lib/ucode/parsers/derived_age.rb +59 -0
  188. data/lib/ucode/parsers/derived_core_properties.rb +60 -0
  189. data/lib/ucode/parsers/extracted_properties.rb +74 -0
  190. data/lib/ucode/parsers/name_aliases.rb +44 -0
  191. data/lib/ucode/parsers/named_sequences.rb +51 -0
  192. data/lib/ucode/parsers/names_list.rb +250 -0
  193. data/lib/ucode/parsers/property_aliases.rb +41 -0
  194. data/lib/ucode/parsers/property_value_aliases.rb +46 -0
  195. data/lib/ucode/parsers/script_extensions.rb +64 -0
  196. data/lib/ucode/parsers/scripts.rb +60 -0
  197. data/lib/ucode/parsers/special_casing.rb +62 -0
  198. data/lib/ucode/parsers/standardized_variants.rb +56 -0
  199. data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
  200. data/lib/ucode/parsers/unicode_data.rb +268 -0
  201. data/lib/ucode/parsers/unihan.rb +125 -0
  202. data/lib/ucode/parsers.rb +35 -0
  203. data/lib/ucode/range_entry.rb +58 -0
  204. data/lib/ucode/repo/aggregate_writer.rb +364 -0
  205. data/lib/ucode/repo/atomic_writes.rb +48 -0
  206. data/lib/ucode/repo/codepoint_writer.rb +96 -0
  207. data/lib/ucode/repo/paths.rb +122 -0
  208. data/lib/ucode/repo.rb +22 -0
  209. data/lib/ucode/site/config_emitter.rb +124 -0
  210. data/lib/ucode/site/generator.rb +178 -0
  211. data/lib/ucode/site/search_index.rb +68 -0
  212. data/lib/ucode/site/template/.gitignore +4 -0
  213. data/lib/ucode/site/template/.vitepress/config.ts +8 -0
  214. data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
  215. data/lib/ucode/site/template/char/[codepoint].md +13 -0
  216. data/lib/ucode/site/template/components/BlockView.vue +57 -0
  217. data/lib/ucode/site/template/components/CharView.vue +85 -0
  218. data/lib/ucode/site/template/components/PlaneView.vue +56 -0
  219. data/lib/ucode/site/template/components/SearchView.vue +66 -0
  220. data/lib/ucode/site/template/index.md +25 -0
  221. data/lib/ucode/site/template/package.json +18 -0
  222. data/lib/ucode/site/template/search.md +9 -0
  223. data/lib/ucode/site.rb +13 -0
  224. data/lib/ucode/version.rb +5 -0
  225. data/lib/ucode/version_resolver.rb +76 -0
  226. data/lib/ucode.rb +74 -0
  227. data/ucode.gemspec +56 -0
  228. metadata +404 -0
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/cache"
6
+ require "ucode/database"
7
+ require "ucode/repo"
8
+ require "ucode/version_resolver"
9
+
10
+ module Ucode
11
+ module Commands
12
+ # `ucode lookup` — read-only lookups against the SQLite cache and
13
+ # the output JSON tree. Three subactions: block, script, char.
14
+ class LookupCommand
15
+ BlockResult = Struct.new(:codepoint, :block, keyword_init: true)
16
+ ScriptResult = Struct.new(:codepoint, :script, keyword_init: true)
17
+ CharResult = Struct.new(:codepoint, :block_id, :glyph_path, keyword_init: true)
18
+ private_constant :BlockResult, :ScriptResult, :CharResult
19
+
20
+ # @param version_intent [nil, :default, :latest, String]
21
+ # @param codepoint [Integer]
22
+ # @return [BlockResult]
23
+ def lookup_block(version_intent, codepoint:)
24
+ version = VersionResolver.resolve(version_intent)
25
+ with_db(version) { |db| db.lookup_block(codepoint) }
26
+ .then { |block| BlockResult.new(codepoint: codepoint, block: block) }
27
+ end
28
+
29
+ # @param version_intent [nil, :default, :latest, String]
30
+ # @param codepoint [Integer]
31
+ # @return [ScriptResult]
32
+ def lookup_script(version_intent, codepoint:)
33
+ version = VersionResolver.resolve(version_intent)
34
+ with_db(version) { |db| db.lookup_script(codepoint) }
35
+ .then { |script| ScriptResult.new(codepoint: codepoint, script: script) }
36
+ end
37
+
38
+ # @param version_intent [nil, :default, :latest, String]
39
+ # @param codepoint [Integer]
40
+ # @param output_root [String, Pathname]
41
+ # @return [CharResult]
42
+ def lookup_char(version_intent, codepoint:, output_root:)
43
+ version = VersionResolver.resolve(version_intent)
44
+ block_id = with_db(version) { |db| db.lookup_block(codepoint) }
45
+ glyph = block_id ? glyph_path(output_root, block_id, codepoint) : nil
46
+ CharResult.new(codepoint: codepoint, block_id: block_id, glyph_path: glyph)
47
+ end
48
+
49
+ private
50
+
51
+ def with_db(version)
52
+ db = Database.open(version)
53
+ yield db
54
+ ensure
55
+ db&.close
56
+ end
57
+
58
+ def glyph_path(output_root, block_id, codepoint)
59
+ cp_id = Repo::Paths.cp_id(codepoint)
60
+ path = Repo::Paths.codepoint_glyph_path(output_root, block_id, cp_id)
61
+ path.exist? ? path : nil
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/cache"
6
+ require "ucode/coordinator"
7
+ require "ucode/parsers"
8
+ require "ucode/repo"
9
+ require "ucode/version_resolver"
10
+
11
+ module Ucode
12
+ module Commands
13
+ # `ucode parse` — streams the Coordinator output into the on-disk
14
+ # JSON tree at `output/`. Single pass: enrich + write per-cp JSON +
15
+ # accumulate aggregates + final flush.
16
+ class ParseCommand
17
+ # @param version_intent [nil, :default, :latest, String]
18
+ # @param output_root [String, Pathname]
19
+ # @return [Hash] { version:, codepoint_count: }
20
+ def call(version_intent, output_root:)
21
+ version = VersionResolver.resolve(version_intent)
22
+ root = Pathname.new(output_root)
23
+ ucd_dir = Cache.ucd_dir(version)
24
+ unihan_dir = Cache.unihan_dir(version)
25
+
26
+ coordinator = Coordinator.new
27
+ codepoint_writer = Repo::CodepointWriter.new(root, parallel_workers: workers)
28
+ aggregate = Repo::AggregateWriter.new(root)
29
+ indices_holder = nil
30
+
31
+ coordinator.each_codepoint_with_indices(ucd_dir: ucd_dir, unihan_dir: unihan_dir) do |indices, cp|
32
+ indices_holder ||= indices
33
+ codepoint_writer.write(cp)
34
+ aggregate.add(cp)
35
+ end
36
+
37
+ aggregate.flush(
38
+ ucd_version: version,
39
+ indices: indices_holder || coordinator.indices_for(ucd_dir: ucd_dir, unihan_dir: unihan_dir),
40
+ property_aliases: load_records(ucd_dir, "PropertyAliases.txt", Parsers::PropertyAliases),
41
+ property_value_aliases: load_records(ucd_dir, "PropertyValueAliases.txt", Parsers::PropertyValueAliases),
42
+ named_sequences: load_records(ucd_dir, "NamedSequences.txt", Parsers::NamedSequences),
43
+ )
44
+
45
+ { version: version, codepoint_count: aggregate.codepoint_count }
46
+ end
47
+
48
+ private
49
+
50
+ def workers
51
+ Ucode.configuration.parallel_workers
52
+ end
53
+
54
+ def load_records(ucd_dir, filename, parser)
55
+ path = ucd_dir.join(filename)
56
+ return [] unless path.exist?
57
+
58
+ parser.each_record(path).to_a
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ require "ucode/site"
6
+ require "ucode/version_resolver"
7
+
8
+ module Ucode
9
+ module Commands
10
+ # `ucode site` — init the Vitepress scaffold + build config/pages
11
+ # from the current `output/` tree. Two subactions.
12
+ class SiteCommand
13
+ # @param site_root [String, Pathname]
14
+ # @return [Hash] { files_copied: }
15
+ def init(site_root:)
16
+ root = Pathname.new(site_root)
17
+ count = Site::Generator.new(output_root: "/", site_root: root).init
18
+ { files_copied: count }
19
+ end
20
+
21
+ # @param output_root [String, Pathname]
22
+ # @param site_root [String, Pathname]
23
+ # @return [Hash] the Generator's build tally
24
+ def build(output_root:, site_root:, **_unused)
25
+ gen = Site::Generator.new(
26
+ output_root: Pathname.new(output_root),
27
+ site_root: Pathname.new(site_root),
28
+ )
29
+ gen.build
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ # Commands — one Thor class per CLI subcommand.
5
+ #
6
+ # Each command delegates the actual work to a `*Command::Action` (or
7
+ # similar) structured-result class. The Thor method is purely dispatch
8
+ # + formatting. This keeps Thor thin and the work testable in-process.
9
+ module Commands
10
+ autoload :FetchCommand, "ucode/commands/fetch"
11
+ autoload :ParseCommand, "ucode/commands/parse"
12
+ autoload :GlyphsCommand, "ucode/commands/glyphs"
13
+ autoload :SiteCommand, "ucode/commands/site"
14
+ autoload :LookupCommand, "ucode/commands/lookup"
15
+ autoload :CacheCommand, "ucode/commands/cache"
16
+ autoload :BuildCommand, "ucode/commands/build"
17
+ autoload :FontCoverageCommand, "ucode/commands/font_coverage"
18
+ end
19
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "logger"
5
+
6
+ module Ucode
7
+ # Single injection point for all ucode runtime configuration.
8
+ #
9
+ # This is the ONLY place in the codebase that reads ENV directly. Every
10
+ # other class reads configuration through Ucode.configuration.
11
+ #
12
+ # Tests inject fresh Config instances; production reads ENV once on first
13
+ # access via Ucode.configuration.
14
+ class Config
15
+ KNOWN_VERSIONS = %w[15.0.0 15.1.0 16.0.0 17.0.0].freeze
16
+
17
+ DEFAULT_CACHE_ROOT = nil
18
+
19
+ attr_accessor :cache_root, :output_dir, :default_version, :known_versions,
20
+ :http_timeout, :http_retries, :pdf_renderer,
21
+ :parallel_workers, :ucd_base_url, :unihan_base_url,
22
+ :charts_base_url, :listing_url, :extracted_files,
23
+ :auxiliary_files
24
+
25
+ def initialize
26
+ @cache_root = default_cache_root
27
+ @output_dir = Pathname.new("./output")
28
+ @default_version = "17.0.0"
29
+ @known_versions = KNOWN_VERSIONS.dup
30
+ @http_timeout = env_int("UCODE_HTTP_TIMEOUT", 30)
31
+ @http_retries = env_int("UCODE_HTTP_RETRIES", 3)
32
+ @pdf_renderer = :mutool
33
+ @parallel_workers = env_int("UCODE_PARALLEL_WORKERS", 8)
34
+ @ucd_base_url = "https://www.unicode.org/Public"
35
+ @unihan_base_url = "https://www.unicode.org/Public"
36
+ @charts_base_url = "https://www.unicode.org/charts/PDF"
37
+ @listing_url = "https://www.unicode.org/Public/"
38
+ @extracted_files = default_extracted_files
39
+ @auxiliary_files = default_auxiliary_files
40
+ @logger = Logger.new($stderr, level: Logger::WARN)
41
+ end
42
+
43
+ # Logger shared by every subsystem (Fetch, Coordinator, Writer, …).
44
+ # Tests can swap to a StringIO logger to capture output.
45
+ attr_reader :logger
46
+
47
+ def logger=(logger)
48
+ @logger = logger
49
+ end
50
+
51
+ def known?(version)
52
+ known_versions.include?(version)
53
+ end
54
+
55
+ private
56
+
57
+ def default_cache_root
58
+ xdg = ENV["XDG_CACHE_HOME"]
59
+ base = nil_or_empty?(xdg) ? File.join(Dir.home, ".cache") : xdg
60
+ Pathname.new(base).join("ucode", "unicode")
61
+ end
62
+
63
+ def nil_or_empty?(value)
64
+ value.nil? || value.empty?
65
+ end
66
+
67
+ def env_int(name, default)
68
+ value = ENV[name]
69
+ return default if value.nil? || value.empty?
70
+
71
+ Integer(value)
72
+ rescue ArgumentError
73
+ default
74
+ end
75
+
76
+ def default_extracted_files
77
+ %w[
78
+ DerivedName.txt
79
+ DerivedGeneralCategory.txt
80
+ DerivedCombiningClass.txt
81
+ DerivedBidiClass.txt
82
+ DerivedDecompositionType.txt
83
+ DerivedNumericType.txt
84
+ DerivedNumericValues.txt
85
+ DerivedJoiningGroup.txt
86
+ DerivedJoiningType.txt
87
+ DerivedLineBreak.txt
88
+ DerivedBinaryProperties.txt
89
+ DerivedAge.txt
90
+ DerivedCoreProperties.txt
91
+ DerivedNormalizationProps.txt
92
+ ]
93
+ end
94
+
95
+ def default_auxiliary_files
96
+ %w[
97
+ auxiliary/GraphemeBreakProperty.txt
98
+ auxiliary/WordBreakProperty.txt
99
+ auxiliary/SentenceBreakProperty.txt
100
+ auxiliary/VerticalOrientation.txt
101
+ auxiliary/IndicPositionalCategory.txt
102
+ auxiliary/IndicSyllabicCategory.txt
103
+ auxiliary/IdentifierStatus.txt
104
+ auxiliary/IdentifierType.txt
105
+ LineBreak.txt
106
+ EastAsianWidth.txt
107
+ ]
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ucode
4
+ class Coordinator
5
+ # Bag of pre-built indices consumed by the per-codepoint enrichment
6
+ # pass. Every field is a frozen-shaped collection that is read-only
7
+ # after `build_indices` returns: range files land in sorted Arrays
8
+ # (bsearched by `range_first`); per-cp files land in flat Hashes keyed
9
+ # by Integer codepoint or by "U+XXXX" id string.
10
+ #
11
+ # Defined with `keyword_init: true` so the Coordinator's `Indices.new`
12
+ # call reads as a self-documenting catalogue of every parsed file —
13
+ # adding a new index is one keyword arg here, one builder call in
14
+ # `Coordinator#build_indices`, and one assignment in `#enrich`.
15
+ Indices = Struct.new(
16
+ :blocks,
17
+ :scripts,
18
+ :property_value_aliases,
19
+ :derived_age,
20
+ :binary_properties,
21
+ :script_extensions,
22
+ :bidi_mirroring,
23
+ :bidi_brackets,
24
+ :special_casing,
25
+ :case_folding,
26
+ :name_aliases,
27
+ :cjk_radicals,
28
+ :standardized_variants,
29
+ :names_list,
30
+ :unihan,
31
+ keyword_init: true,
32
+ )
33
+ end
34
+ end