fontisan 0.2.16 → 0.2.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (318) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +14 -90
  3. data/Gemfile +6 -3
  4. data/README.adoc +257 -1
  5. data/docs/.vitepress/config.ts +68 -8
  6. data/docs/.vitepress/theme/style.css +570 -272
  7. data/docs/CONVERSION_GUIDE.adoc +31 -8
  8. data/docs/EXTRACT_TTC_MIGRATION.md +1 -1
  9. data/docs/WOFF_WOFF2_FORMATS.adoc +53 -0
  10. data/docs/api/conversion-options.md +37 -14
  11. data/docs/api/font-loader.md +21 -15
  12. data/docs/cli/audit.md +337 -0
  13. data/docs/cli/convert.md +20 -1
  14. data/docs/cli/index.md +31 -0
  15. data/docs/guide/color.md +1 -1
  16. data/docs/guide/conversion/options.md +32 -3
  17. data/docs/guide/conversion/ttf-otf.md +1 -1
  18. data/docs/guide/conversion/type1.md +1 -1
  19. data/docs/guide/conversion/web.md +91 -32
  20. data/docs/guide/conversion.md +6 -5
  21. data/docs/guide/formats/woff.md +35 -11
  22. data/docs/guide/index.md +2 -2
  23. data/docs/guide/migrations/extract-ttc.md +1 -1
  24. data/docs/guide/quick-start.md +4 -4
  25. data/docs/guide/type1.md +4 -4
  26. data/docs/guide/woff.md +19 -17
  27. data/docs/index.md +2 -0
  28. data/docs/lychee.toml +5 -1
  29. data/docs/package.json +1 -1
  30. data/docs/public/robots.txt +4 -0
  31. data/docs/scripts/post-build.mjs +81 -0
  32. data/lib/fontisan/audit/codepoint_range_coalescer.rb +41 -0
  33. data/lib/fontisan/audit/context.rb +122 -0
  34. data/lib/fontisan/audit/differ.rb +124 -0
  35. data/lib/fontisan/audit/extractors/aggregations.rb +54 -0
  36. data/lib/fontisan/audit/extractors/base.rb +26 -0
  37. data/lib/fontisan/audit/extractors/color_capabilities.rb +141 -0
  38. data/lib/fontisan/audit/extractors/coverage.rb +48 -0
  39. data/lib/fontisan/audit/extractors/hinting.rb +197 -0
  40. data/lib/fontisan/audit/extractors/identity.rb +52 -0
  41. data/lib/fontisan/audit/extractors/language_coverage.rb +37 -0
  42. data/lib/fontisan/audit/extractors/licensing.rb +79 -0
  43. data/lib/fontisan/audit/extractors/metrics.rb +103 -0
  44. data/lib/fontisan/audit/extractors/opentype_layout.rb +69 -0
  45. data/lib/fontisan/audit/extractors/provenance.rb +29 -0
  46. data/lib/fontisan/audit/extractors/style.rb +32 -0
  47. data/lib/fontisan/audit/extractors/variation_detail.rb +99 -0
  48. data/lib/fontisan/audit/extractors.rb +27 -0
  49. data/lib/fontisan/audit/library_aggregator.rb +83 -0
  50. data/lib/fontisan/audit/library_auditor.rb +90 -0
  51. data/lib/fontisan/audit/registry.rb +60 -0
  52. data/lib/fontisan/audit/style_extractor.rb +80 -0
  53. data/lib/fontisan/audit.rb +20 -0
  54. data/lib/fontisan/base_collection.rb +23 -9
  55. data/lib/fontisan/binary/structures.rb +0 -2
  56. data/lib/fontisan/binary.rb +11 -0
  57. data/lib/fontisan/cldr/aggregator.rb +33 -0
  58. data/lib/fontisan/cldr/cache_manager.rb +110 -0
  59. data/lib/fontisan/cldr/config.rb +59 -0
  60. data/lib/fontisan/cldr/download_error.rb +9 -0
  61. data/lib/fontisan/cldr/downloader.rb +79 -0
  62. data/lib/fontisan/cldr/error.rb +8 -0
  63. data/lib/fontisan/cldr/index.rb +64 -0
  64. data/lib/fontisan/cldr/index_builder.rb +72 -0
  65. data/lib/fontisan/cldr/unicode_set_parser.rb +172 -0
  66. data/lib/fontisan/cldr/unknown_version_error.rb +9 -0
  67. data/lib/fontisan/cldr/version_resolver.rb +91 -0
  68. data/lib/fontisan/cldr.rb +23 -0
  69. data/lib/fontisan/cli/cldr_cli.rb +85 -0
  70. data/lib/fontisan/cli/ucd_cli.rb +97 -0
  71. data/lib/fontisan/cli.rb +201 -2
  72. data/lib/fontisan/collection/builder.rb +0 -4
  73. data/lib/fontisan/collection/dfont_builder.rb +0 -4
  74. data/lib/fontisan/collection/shared_logic.rb +0 -2
  75. data/lib/fontisan/collection/writer.rb +0 -3
  76. data/lib/fontisan/collection.rb +15 -0
  77. data/lib/fontisan/commands/audit_command.rb +123 -0
  78. data/lib/fontisan/commands/audit_compare_command.rb +66 -0
  79. data/lib/fontisan/commands/audit_library_command.rb +46 -0
  80. data/lib/fontisan/commands/base_command.rb +0 -3
  81. data/lib/fontisan/commands/convert_command.rb +25 -20
  82. data/lib/fontisan/commands/dump_table_command.rb +0 -3
  83. data/lib/fontisan/commands/export_command.rb +0 -4
  84. data/lib/fontisan/commands/features_command.rb +0 -3
  85. data/lib/fontisan/commands/instance_command.rb +0 -5
  86. data/lib/fontisan/commands/ls_command.rb +0 -6
  87. data/lib/fontisan/commands/optical_size_command.rb +0 -3
  88. data/lib/fontisan/commands/pack_command.rb +0 -5
  89. data/lib/fontisan/commands/scripts_command.rb +0 -2
  90. data/lib/fontisan/commands/subset_command.rb +0 -3
  91. data/lib/fontisan/commands/unicode_command.rb +0 -3
  92. data/lib/fontisan/commands/unpack_command.rb +0 -7
  93. data/lib/fontisan/commands/validate_command.rb +0 -8
  94. data/lib/fontisan/commands/variable_command.rb +0 -3
  95. data/lib/fontisan/commands.rb +29 -0
  96. data/lib/fontisan/config/cldr.yml +22 -0
  97. data/lib/fontisan/config/conversion_matrix.yml +38 -0
  98. data/lib/fontisan/config/ucd.yml +23 -0
  99. data/lib/fontisan/constants.rb +48 -6
  100. data/lib/fontisan/conversion_options.rb +30 -19
  101. data/lib/fontisan/converters/cff_table_builder.rb +0 -3
  102. data/lib/fontisan/converters/collection_converter.rb +0 -8
  103. data/lib/fontisan/converters/conversion_strategy.rb +161 -46
  104. data/lib/fontisan/converters/format_converter.rb +143 -32
  105. data/lib/fontisan/converters/glyf_table_builder.rb +0 -2
  106. data/lib/fontisan/converters/outline_converter.rb +0 -19
  107. data/lib/fontisan/converters/outline_extraction.rb +0 -5
  108. data/lib/fontisan/converters/outline_optimizer.rb +0 -5
  109. data/lib/fontisan/converters/svg_generator.rb +0 -4
  110. data/lib/fontisan/converters/table_copier.rb +0 -2
  111. data/lib/fontisan/converters/type1_converter.rb +0 -11
  112. data/lib/fontisan/converters/woff2_encoder.rb +49 -20
  113. data/lib/fontisan/converters/woff_writer.rb +211 -282
  114. data/lib/fontisan/converters.rb +21 -0
  115. data/lib/fontisan/dfont_collection.rb +29 -10
  116. data/lib/fontisan/export/exporter.rb +0 -6
  117. data/lib/fontisan/export/transformers/font_to_ttx.rb +0 -9
  118. data/lib/fontisan/export/transformers/head_transformer.rb +0 -2
  119. data/lib/fontisan/export/transformers/hhea_transformer.rb +0 -2
  120. data/lib/fontisan/export/transformers/maxp_transformer.rb +0 -2
  121. data/lib/fontisan/export/transformers/name_transformer.rb +0 -2
  122. data/lib/fontisan/export/transformers/os2_transformer.rb +0 -2
  123. data/lib/fontisan/export/transformers/post_transformer.rb +0 -2
  124. data/lib/fontisan/export/transformers.rb +17 -0
  125. data/lib/fontisan/export.rb +13 -0
  126. data/lib/fontisan/font_loader.rb +189 -328
  127. data/lib/fontisan/font_writer.rb +0 -1
  128. data/lib/fontisan/formatters/audit_diff_text_renderer.rb +122 -0
  129. data/lib/fontisan/formatters/audit_text_renderer.rb +324 -0
  130. data/lib/fontisan/formatters/library_summary_text_renderer.rb +99 -0
  131. data/lib/fontisan/formatters/text_formatter.rb +6 -0
  132. data/lib/fontisan/formatters.rb +12 -0
  133. data/lib/fontisan/hints/hint_converter.rb +0 -1
  134. data/lib/fontisan/hints/postscript_hint_applier.rb +0 -9
  135. data/lib/fontisan/hints/postscript_hint_extractor.rb +0 -2
  136. data/lib/fontisan/hints/truetype_hint_extractor.rb +0 -2
  137. data/lib/fontisan/hints.rb +16 -0
  138. data/lib/fontisan/metrics_calculator.rb +0 -2
  139. data/lib/fontisan/models/all_scripts_features_info.rb +0 -1
  140. data/lib/fontisan/models/audit/audit_axis.rb +30 -0
  141. data/lib/fontisan/models/audit/audit_block.rb +32 -0
  142. data/lib/fontisan/models/audit/audit_diff.rb +77 -0
  143. data/lib/fontisan/models/audit/audit_report.rb +153 -0
  144. data/lib/fontisan/models/audit/codepoint_range.rb +40 -0
  145. data/lib/fontisan/models/audit/codepoint_set_diff.rb +34 -0
  146. data/lib/fontisan/models/audit/color_capabilities.rb +93 -0
  147. data/lib/fontisan/models/audit/duplicate_group.rb +23 -0
  148. data/lib/fontisan/models/audit/embedding_type.rb +76 -0
  149. data/lib/fontisan/models/audit/field_change.rb +28 -0
  150. data/lib/fontisan/models/audit/fs_selection_flags.rb +61 -0
  151. data/lib/fontisan/models/audit/gasp_range.rb +63 -0
  152. data/lib/fontisan/models/audit/hinting.rb +93 -0
  153. data/lib/fontisan/models/audit/library_summary.rb +40 -0
  154. data/lib/fontisan/models/audit/licensing.rb +48 -0
  155. data/lib/fontisan/models/audit/metrics.rb +111 -0
  156. data/lib/fontisan/models/audit/named_instance.rb +41 -0
  157. data/lib/fontisan/models/audit/opentype_layout.rb +40 -0
  158. data/lib/fontisan/models/audit/script_coverage_row.rb +26 -0
  159. data/lib/fontisan/models/audit/script_features.rb +28 -0
  160. data/lib/fontisan/models/audit/variation_detail.rb +44 -0
  161. data/lib/fontisan/models/audit.rb +33 -0
  162. data/lib/fontisan/models/cldr/language_coverage.rb +31 -0
  163. data/lib/fontisan/models/cldr.rb +12 -0
  164. data/lib/fontisan/models/collection_brief_info.rb +0 -1
  165. data/lib/fontisan/models/collection_info.rb +0 -2
  166. data/lib/fontisan/models/collection_list_info.rb +0 -1
  167. data/lib/fontisan/models/collection_validation_report.rb +0 -2
  168. data/lib/fontisan/models/color_glyph.rb +0 -1
  169. data/lib/fontisan/models/font_report.rb +0 -1
  170. data/lib/fontisan/models/ttx/tables.rb +21 -0
  171. data/lib/fontisan/models/ttx/ttfont.rb +0 -8
  172. data/lib/fontisan/models/ttx.rb +14 -0
  173. data/lib/fontisan/models/ucd/ucd.rb +38 -0
  174. data/lib/fontisan/models/ucd/ucd_char.rb +67 -0
  175. data/lib/fontisan/models/ucd.rb +19 -0
  176. data/lib/fontisan/models.rb +47 -0
  177. data/lib/fontisan/open_type_collection.rb +6 -5
  178. data/lib/fontisan/open_type_font.rb +8 -2
  179. data/lib/fontisan/open_type_font_extensions.rb +9 -9
  180. data/lib/fontisan/optimizers/pattern_analyzer.rb +0 -1
  181. data/lib/fontisan/optimizers.rb +14 -0
  182. data/lib/fontisan/outline_extractor.rb +0 -2
  183. data/lib/fontisan/parsers/dfont_parser.rb +0 -1
  184. data/lib/fontisan/parsers.rb +10 -0
  185. data/lib/fontisan/pipeline/format_detector.rb +29 -102
  186. data/lib/fontisan/pipeline/output_writer.rb +11 -9
  187. data/lib/fontisan/pipeline/strategies/instance_strategy.rb +0 -4
  188. data/lib/fontisan/pipeline/strategies/named_strategy.rb +0 -4
  189. data/lib/fontisan/pipeline/strategies/preserve_strategy.rb +0 -2
  190. data/lib/fontisan/pipeline/strategies.rb +14 -0
  191. data/lib/fontisan/pipeline/transformation_pipeline.rb +0 -7
  192. data/lib/fontisan/pipeline/variation_resolver.rb +0 -7
  193. data/lib/fontisan/pipeline.rb +13 -0
  194. data/lib/fontisan/sfnt_font.rb +29 -14
  195. data/lib/fontisan/sfnt_table.rb +0 -4
  196. data/lib/fontisan/subset/builder.rb +0 -6
  197. data/lib/fontisan/subset.rb +13 -0
  198. data/lib/fontisan/svg/font_generator.rb +0 -4
  199. data/lib/fontisan/svg/glyph_generator.rb +0 -2
  200. data/lib/fontisan/svg.rb +12 -0
  201. data/lib/fontisan/tables/cbdt.rb +0 -1
  202. data/lib/fontisan/tables/cblc.rb +0 -1
  203. data/lib/fontisan/tables/cff/charset.rb +0 -1
  204. data/lib/fontisan/tables/cff/charstring.rb +0 -1
  205. data/lib/fontisan/tables/cff/charstring_rebuilder.rb +0 -4
  206. data/lib/fontisan/tables/cff/charstrings_index.rb +0 -3
  207. data/lib/fontisan/tables/cff/dict.rb +0 -1
  208. data/lib/fontisan/tables/cff/encoding.rb +0 -1
  209. data/lib/fontisan/tables/cff/header.rb +0 -2
  210. data/lib/fontisan/tables/cff/hint_operation_injector.rb +0 -2
  211. data/lib/fontisan/tables/cff/index.rb +0 -1
  212. data/lib/fontisan/tables/cff/private_dict.rb +0 -2
  213. data/lib/fontisan/tables/cff/private_dict_writer.rb +0 -2
  214. data/lib/fontisan/tables/cff/table_builder.rb +0 -6
  215. data/lib/fontisan/tables/cff/top_dict.rb +0 -2
  216. data/lib/fontisan/tables/cff.rb +22 -15
  217. data/lib/fontisan/tables/cff2/charstring_parser.rb +0 -2
  218. data/lib/fontisan/tables/cff2/table_builder.rb +0 -11
  219. data/lib/fontisan/tables/cff2/table_reader.rb +0 -2
  220. data/lib/fontisan/tables/cff2.rb +13 -14
  221. data/lib/fontisan/tables/cmap.rb +24 -2
  222. data/lib/fontisan/tables/cmap_table.rb +0 -3
  223. data/lib/fontisan/tables/colr.rb +0 -1
  224. data/lib/fontisan/tables/cpal.rb +0 -1
  225. data/lib/fontisan/tables/cvar.rb +0 -2
  226. data/lib/fontisan/tables/fvar.rb +0 -1
  227. data/lib/fontisan/tables/glyf/compound_glyph_resolver.rb +0 -2
  228. data/lib/fontisan/tables/glyf/glyph_builder.rb +0 -3
  229. data/lib/fontisan/tables/glyf.rb +0 -6
  230. data/lib/fontisan/tables/glyf_table.rb +0 -3
  231. data/lib/fontisan/tables/gpos.rb +0 -2
  232. data/lib/fontisan/tables/gsub.rb +0 -2
  233. data/lib/fontisan/tables/gvar.rb +0 -2
  234. data/lib/fontisan/tables/head.rb +0 -2
  235. data/lib/fontisan/tables/head_table.rb +0 -3
  236. data/lib/fontisan/tables/hhea.rb +0 -2
  237. data/lib/fontisan/tables/hhea_table.rb +0 -3
  238. data/lib/fontisan/tables/hmtx.rb +0 -2
  239. data/lib/fontisan/tables/hmtx_table.rb +0 -3
  240. data/lib/fontisan/tables/hvar.rb +0 -3
  241. data/lib/fontisan/tables/loca.rb +0 -2
  242. data/lib/fontisan/tables/loca_table.rb +0 -3
  243. data/lib/fontisan/tables/maxp.rb +0 -2
  244. data/lib/fontisan/tables/maxp_table.rb +0 -3
  245. data/lib/fontisan/tables/mvar.rb +0 -3
  246. data/lib/fontisan/tables/name.rb +0 -2
  247. data/lib/fontisan/tables/name_table.rb +0 -3
  248. data/lib/fontisan/tables/os2_table.rb +0 -3
  249. data/lib/fontisan/tables/post_table.rb +0 -3
  250. data/lib/fontisan/tables/sbix.rb +0 -1
  251. data/lib/fontisan/tables/svg.rb +0 -1
  252. data/lib/fontisan/tables/variation_common.rb +0 -1
  253. data/lib/fontisan/tables/vvar.rb +0 -3
  254. data/lib/fontisan/tables.rb +54 -0
  255. data/lib/fontisan/true_type_collection.rb +6 -14
  256. data/lib/fontisan/true_type_font.rb +8 -2
  257. data/lib/fontisan/true_type_font_extensions.rb +9 -9
  258. data/lib/fontisan/type1/afm_generator.rb +0 -4
  259. data/lib/fontisan/type1/conversion_options.rb +0 -2
  260. data/lib/fontisan/type1/encodings.rb +0 -2
  261. data/lib/fontisan/type1/generator.rb +0 -8
  262. data/lib/fontisan/type1/pfa_generator.rb +0 -3
  263. data/lib/fontisan/type1/pfb_generator.rb +0 -5
  264. data/lib/fontisan/type1/pfm_generator.rb +0 -4
  265. data/lib/fontisan/type1.rb +42 -69
  266. data/lib/fontisan/type1_font.rb +40 -11
  267. data/lib/fontisan/ucd/aggregator.rb +73 -0
  268. data/lib/fontisan/ucd/cache_manager.rb +111 -0
  269. data/lib/fontisan/ucd/config.rb +59 -0
  270. data/lib/fontisan/ucd/download_error.rb +9 -0
  271. data/lib/fontisan/ucd/downloader.rb +88 -0
  272. data/lib/fontisan/ucd/error.rb +8 -0
  273. data/lib/fontisan/ucd/index.rb +103 -0
  274. data/lib/fontisan/ucd/index_builder.rb +107 -0
  275. data/lib/fontisan/ucd/range_entry.rb +56 -0
  276. data/lib/fontisan/ucd/unknown_version_error.rb +9 -0
  277. data/lib/fontisan/ucd/version_resolver.rb +79 -0
  278. data/lib/fontisan/ucd.rb +23 -0
  279. data/lib/fontisan/utilities/checksum_calculator.rb +0 -1
  280. data/lib/fontisan/utilities.rb +10 -0
  281. data/lib/fontisan/utils.rb +10 -0
  282. data/lib/fontisan/validation/collection_validator.rb +0 -2
  283. data/lib/fontisan/validation.rb +9 -0
  284. data/lib/fontisan/validators/basic_validator.rb +0 -2
  285. data/lib/fontisan/validators/font_book_validator.rb +0 -2
  286. data/lib/fontisan/validators/opentype_validator.rb +0 -2
  287. data/lib/fontisan/validators/profile_loader.rb +0 -5
  288. data/lib/fontisan/validators/validator.rb +0 -2
  289. data/lib/fontisan/validators/web_font_validator.rb +0 -2
  290. data/lib/fontisan/validators.rb +14 -0
  291. data/lib/fontisan/variable/delta_applicator.rb +0 -4
  292. data/lib/fontisan/variable/instancer.rb +0 -3
  293. data/lib/fontisan/variable/static_font_builder.rb +0 -3
  294. data/lib/fontisan/variable.rb +16 -0
  295. data/lib/fontisan/variation/blend_applier.rb +0 -2
  296. data/lib/fontisan/variation/cache.rb +0 -2
  297. data/lib/fontisan/variation/converter.rb +0 -3
  298. data/lib/fontisan/variation/data_extractor.rb +0 -2
  299. data/lib/fontisan/variation/delta_applier.rb +0 -5
  300. data/lib/fontisan/variation/inspector.rb +0 -1
  301. data/lib/fontisan/variation/instance_generator.rb +0 -6
  302. data/lib/fontisan/variation/instance_writer.rb +0 -5
  303. data/lib/fontisan/variation/metrics_adjuster.rb +0 -4
  304. data/lib/fontisan/variation/optimizer.rb +0 -3
  305. data/lib/fontisan/variation/parallel_generator.rb +0 -3
  306. data/lib/fontisan/variation/subsetter.rb +0 -4
  307. data/lib/fontisan/variation/tuple_variation_header.rb +0 -2
  308. data/lib/fontisan/variation/variable_svg_generator.rb +0 -3
  309. data/lib/fontisan/variation/variation_context.rb +0 -3
  310. data/lib/fontisan/variation/variation_preserver.rb +0 -3
  311. data/lib/fontisan/variation.rb +31 -0
  312. data/lib/fontisan/version.rb +1 -1
  313. data/lib/fontisan/woff2.rb +13 -0
  314. data/lib/fontisan/woff2_font.rb +31 -9
  315. data/lib/fontisan/woff_font.rb +31 -2
  316. data/lib/fontisan.rb +124 -196
  317. metadata +128 -7
  318. data/fontisan.gemspec +0 -47
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Ucd
5
+ # Produces audit-ready aggregations from a codepoint list + UCD indices.
6
+ #
7
+ # Pure: no I/O, no side effects. Caller passes the codepoints and the
8
+ # blocks/scripts indices; Aggregator returns the aggregated summaries.
9
+ module Aggregator
10
+ module_function
11
+
12
+ # Aggregate codepoints per Unicode block.
13
+ #
14
+ # Returns one hash per overlapping block, sorted by first_cp:
15
+ #
16
+ # { name:, first_cp:, last_cp:, total:, covered:, fill_ratio:, complete: }
17
+ #
18
+ # @param codepoints [Array<Integer>] sorted not required
19
+ # @param blocks_index [Index]
20
+ # @return [Array<Hash>]
21
+ def aggregate_blocks(codepoints, blocks_index)
22
+ sorted = codepoints.sort
23
+ return [] if sorted.empty?
24
+
25
+ coverage = Hash.new { |h, k| h[k] = 0 }
26
+ coverage.compare_by_identity
27
+ first_cp = sorted.first
28
+ last_cp = sorted.last
29
+
30
+ overlapping = blocks_index.each_overlapping(first_cp, last_cp).to_a
31
+ overlapping.each do |entry|
32
+ coverage[entry] = count_in_range(sorted, [entry.first_cp, entry.last_cp])
33
+ end
34
+
35
+ overlapping.map do |entry|
36
+ covered = coverage[entry]
37
+ total = entry.size
38
+ {
39
+ name: entry.name,
40
+ first_cp: entry.first_cp,
41
+ last_cp: entry.last_cp,
42
+ total: total,
43
+ covered: covered,
44
+ fill_ratio: covered.fdiv(total).round(4),
45
+ complete: covered == total,
46
+ }
47
+ end
48
+ end
49
+
50
+ # Aggregate unique script names from codepoints.
51
+ #
52
+ # @param codepoints [Array<Integer>]
53
+ # @param scripts_index [Index]
54
+ # @return [Array<String>] sorted unique script names
55
+ def aggregate_scripts(codepoints, scripts_index)
56
+ scripts = codepoints.filter_map { |cp| scripts_index.lookup(cp) }
57
+ scripts.uniq.sort
58
+ end
59
+
60
+ # Count codepoints in `sorted` that fall within [first, last].
61
+ # `sorted` must be sorted ascending.
62
+ def count_in_range(sorted, range)
63
+ first, last = range
64
+ left = sorted.bsearch_index { |cp| cp >= first } || sorted.size
65
+ return 0 if left == sorted.size
66
+
67
+ right = sorted.bsearch_index { |cp| cp > last } || sorted.size
68
+ right - left
69
+ end
70
+ private_class_method :count_in_range
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+
5
+ module Fontisan
6
+ module Ucd
7
+ # Manages the on-disk UCD cache layout.
8
+ #
9
+ # Cache root resolution honors `XDG_CONFIG_HOME` per the XDG Base
10
+ # Directory Specification. Falls back to `~/.config` on Unix and
11
+ # `~/.config` (literal) elsewhere — consistent with other Fontisan
12
+ # config paths.
13
+ #
14
+ # Layout:
15
+ #
16
+ # <root>/
17
+ # <version>/
18
+ # ucdxml/
19
+ # ucd.all.flat.xml
20
+ # index/
21
+ # blocks.yml
22
+ # scripts.yml
23
+ #
24
+ # No network access — all methods are pure filesystem operations.
25
+ module CacheManager
26
+ UCDXML_FILENAME = "ucd.all.flat.xml"
27
+ private_constant :UCDXML_FILENAME
28
+
29
+ BLOCKS_INDEX_FILENAME = "blocks.yml"
30
+ SCRIPTS_INDEX_FILENAME = "scripts.yml"
31
+ private_constant :BLOCKS_INDEX_FILENAME, :SCRIPTS_INDEX_FILENAME
32
+
33
+ class << self
34
+ # Root path of the UCD cache.
35
+ # @return [Pathname]
36
+ def root
37
+ base = xdg_config_home || File.join(Dir.home, ".config")
38
+ Pathname.new(base).join("fontisan", "unicode")
39
+ end
40
+
41
+ # Per-version directory.
42
+ # @param version [String] e.g. "17.0.0"
43
+ # @return [Pathname]
44
+ def version_dir(version)
45
+ root.join(version)
46
+ end
47
+
48
+ # Path to the unpacked UCDXML flat file for a version.
49
+ # @param version [String]
50
+ # @return [Pathname]
51
+ def ucdxml_path(version)
52
+ version_dir(version).join("ucdxml", UCDXML_FILENAME)
53
+ end
54
+
55
+ # Directory holding the derived RLE indices for a version.
56
+ # @param version [String]
57
+ # @return [Pathname]
58
+ def index_dir(version)
59
+ version_dir(version).join("index")
60
+ end
61
+
62
+ def blocks_index_path(version)
63
+ index_dir(version).join(BLOCKS_INDEX_FILENAME)
64
+ end
65
+
66
+ def scripts_index_path(version)
67
+ index_dir(version).join(SCRIPTS_INDEX_FILENAME)
68
+ end
69
+
70
+ # True if the UCDXML file is present for this version.
71
+ # @param version [String]
72
+ # @return [Boolean]
73
+ def cached?(version)
74
+ ucdxml_path(version).exist?
75
+ end
76
+
77
+ # All versions currently in the cache (sorted ascending).
78
+ # @return [Array<String>]
79
+ def cached_versions
80
+ return [] unless root.exist?
81
+
82
+ root.children.select(&:directory?).map { |p| p.basename.to_s }.sort
83
+ end
84
+
85
+ # Create the version directory and ucdxml/index subdirs.
86
+ # Idempotent.
87
+ # @param version [String]
88
+ def ensure_version_dir!(version)
89
+ ucdxml_path(version).dirname.mkpath
90
+ index_dir(version).mkpath
91
+ end
92
+
93
+ # Remove a version from the cache. No-op if absent.
94
+ # @param version [String]
95
+ def remove_version(version)
96
+ dir = version_dir(version)
97
+ dir.rmtree if dir.exist?
98
+ end
99
+
100
+ private
101
+
102
+ def xdg_config_home
103
+ env = ENV["XDG_CONFIG_HOME"]
104
+ return nil if env.nil? || env.empty?
105
+
106
+ env
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+
5
+ module Fontisan
6
+ module Ucd
7
+ # Single source of truth for UCD version selection.
8
+ #
9
+ # Wraps `lib/fontisan/config/ucd.yml`. Loads the YAML once at first
10
+ # access and memoizes. All other Ucd::* classes resolve versions,
11
+ # URLs, and known-version validation through this module.
12
+ module Config
13
+ CONFIG_PATH = File.expand_path("../config/ucd.yml", __dir__)
14
+ private_constant :CONFIG_PATH
15
+
16
+ class << self
17
+ # The version Fontisan uses by default for auto-download and
18
+ # `fontisan ucd download` (no args). String like "17.0.0".
19
+ def default_version
20
+ data[:default_version]
21
+ end
22
+
23
+ # Array of version strings this Fontisan release recognizes.
24
+ # Used by VersionResolver to reject unknown versions early.
25
+ def known_versions
26
+ data[:known_versions]
27
+ end
28
+
29
+ # Base URL for fetching UCDXML artifacts.
30
+ def base_url
31
+ data[:base_url]
32
+ end
33
+
34
+ # Listing URL for `--latest` probing.
35
+ def listing_url
36
+ data[:listing_url]
37
+ end
38
+
39
+ # Full URL to the UCDXML flat zip for a given version.
40
+ # @param version [String] e.g. "17.0.0"
41
+ # @return [String]
42
+ def ucdxml_url_for(version)
43
+ "#{base_url}/#{version}/ucdxml/ucd.all.flat.zip"
44
+ end
45
+
46
+ # True if the version appears in `known_versions`.
47
+ def known?(version)
48
+ known_versions.include?(version)
49
+ end
50
+
51
+ private
52
+
53
+ def data
54
+ @data ||= YAML.load_file(CONFIG_PATH).transform_keys(&:to_sym)
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Ucd
5
+ # Raised by Ucd::Downloader when the upstream HTTP fetch or the zip
6
+ # extraction fails. Caught by AuditCommand to degrade-with-warning.
7
+ class DownloadError < Ucd::Error; end
8
+ end
9
+ end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "uri"
5
+ require "tempfile"
6
+ require "zip"
7
+
8
+ module Fontisan
9
+ module Ucd
10
+ # Fetches UCDXML zips from unicode.org and unpacks them into the cache.
11
+ #
12
+ # Single entry point: `Downloader.download(version, force:)`.
13
+ # Idempotent unless `force: true`. Returns the path to the unpacked
14
+ # `ucd.all.flat.xml`.
15
+ module Downloader
16
+ UCDXML_ZIP_ENTRY = "ucd.all.flat.xml"
17
+ private_constant :UCDXML_ZIP_ENTRY
18
+
19
+ class << self
20
+ # Download and unpack UCDXML for `version`.
21
+ #
22
+ # @param version [String] e.g. "17.0.0"
23
+ # @param force [Boolean] if false and cache already has the file,
24
+ # return the existing path without re-fetching.
25
+ # @return [Pathname] path to the unpacked ucd.all.flat.xml
26
+ # @raise [DownloadError] on HTTP failure or zip extraction failure
27
+ def download(version, force: false)
28
+ target = CacheManager.ucdxml_path(version)
29
+ return target if target.exist? && !force
30
+
31
+ CacheManager.ensure_version_dir!(version)
32
+ zip_data = fetch_zip(version)
33
+ extract_xml(zip_data, target)
34
+ target
35
+ end
36
+
37
+ private
38
+
39
+ def fetch_zip(version)
40
+ uri = URI(Config.ucdxml_url_for(version))
41
+ response = Net::HTTP.get_response(uri)
42
+ unless response.is_a?(Net::HTTPSuccess)
43
+ raise DownloadError,
44
+ "GET #{uri} returned HTTP #{response.code}: #{response.message}"
45
+ end
46
+
47
+ body = response.body
48
+ if body.nil? || body.empty?
49
+ raise DownloadError, "GET #{uri} returned an empty body"
50
+ end
51
+
52
+ body
53
+ rescue StandardError => e
54
+ raise e if e.is_a?(DownloadError)
55
+
56
+ raise DownloadError, "Failed to fetch #{uri}: #{e.message}"
57
+ end
58
+
59
+ def extract_xml(zip_data, target)
60
+ Tempfile.create(["fontisan-ucd", ".zip"]) do |tmp|
61
+ tmp.binmode
62
+ tmp.write(zip_data)
63
+ tmp.flush
64
+ tmp.rewind
65
+
66
+ write_xml_entry(tmp.path, target)
67
+ end
68
+ end
69
+
70
+ def write_xml_entry(zip_path, target)
71
+ Zip::File.open(zip_path) do |zip|
72
+ entry = zip.find_entry(UCDXML_ZIP_ENTRY) ||
73
+ zip.glob("#{UCDXML_ZIP_ENTRY}*", include_directories: false).first
74
+ unless entry
75
+ raise DownloadError,
76
+ "UCDXML zip did not contain #{UCDXML_ZIP_ENTRY.inspect}"
77
+ end
78
+
79
+ # Atomic-ish: write to .part then rename.
80
+ partial = target.sub_ext(".xml.part")
81
+ zip.extract(entry, partial.to_s) { true } # overwrite
82
+ File.rename(partial.to_s, target.to_s)
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Ucd
5
+ # Base error class for all UCD-related failures.
6
+ class Error < StandardError; end
7
+ end
8
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "yaml"
5
+
6
+ module Fontisan
7
+ module Ucd
8
+ # Sorted, run-length-encoded lookup table over Unicode codepoints.
9
+ #
10
+ # One Index answers "what <thing> does codepoint N belong to?" for one
11
+ # property (block, or script). Lookup is O(log N) via bsearch.
12
+ class Index
13
+ include Enumerable
14
+
15
+ # @param entries [Array<RangeEntry>] sorted, disjoint
16
+ def initialize(entries)
17
+ @entries = entries.sort
18
+ end
19
+
20
+ # @return [Array<RangeEntry>]
21
+ attr_reader :entries
22
+
23
+ def each(&)
24
+ @entries.each(&)
25
+ end
26
+
27
+ def size
28
+ @entries.size
29
+ end
30
+
31
+ # @param codepoint [Integer] Unicode codepoint
32
+ # @return [String, nil] the name of the range covering `codepoint`, or nil
33
+ def lookup(codepoint)
34
+ idx = bsearch_index(codepoint)
35
+ idx && @entries[idx].name
36
+ end
37
+
38
+ # Enumerate every range whose [first_cp, last_cp] overlaps the
39
+ # inclusive query range.
40
+ # @param first [Integer]
41
+ # @param last [Integer]
42
+ # @return [Enumerator<RangeEntry>]
43
+ def each_overlapping(first, last, &)
44
+ return enum_for(:each_overlapping, first, last) unless block_given?
45
+
46
+ start_idx = bsearch_first_overlap(first)
47
+ return if start_idx.nil?
48
+
49
+ @entries[start_idx..].each do |entry|
50
+ break if entry.first_cp > last
51
+
52
+ yield entry if entry.last_cp >= first
53
+ end
54
+ end
55
+
56
+ # Serialize to a YAML file.
57
+ # @param path [String, Pathname]
58
+ # @return [void]
59
+ def save(path)
60
+ File.open(path, "w") do |file|
61
+ YAML.dump(@entries.map(&:to_h), file)
62
+ end
63
+ end
64
+
65
+ # Load from a YAML file previously written by #save.
66
+ # @param path [String, Pathname] (required)
67
+ # @return [Index]
68
+ def self.load(path)
69
+ hashes = YAML.load_file(path)
70
+ new(hashes.map { |h| RangeEntry.from_h(h) })
71
+ end
72
+
73
+ # Build an Index from raw [first_cp, last_cp, name] triples.
74
+ # @param triples [Array<Array(Integer, Integer, String)>]
75
+ # @return [Index]
76
+ def self.from_triples(triples)
77
+ new(triples.map { |first, last, name| RangeEntry.new(first, last, name) })
78
+ end
79
+
80
+ private
81
+
82
+ # Binary search for the entry whose range contains `codepoint`.
83
+ # Returns the index in @entries, or nil.
84
+ def bsearch_index(codepoint)
85
+ @entries.bsearch_index do |entry|
86
+ if codepoint < entry.first_cp
87
+ -1
88
+ elsif codepoint > entry.last_cp
89
+ 1
90
+ else
91
+ 0
92
+ end
93
+ end
94
+ end
95
+
96
+ # Find the first entry whose last_cp >= `first`. Returns nil if no
97
+ # entry overlaps anything >= `first`.
98
+ def bsearch_first_overlap(first)
99
+ @entries.bsearch_index { |entry| entry.last_cp >= first }
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Ucd
5
+ # Turns a parsed Models::Ucd::Ucd instance into two compact
6
+ # run-length-encoded indices (blocks + scripts), and persists them to
7
+ # the cache for future Index loads.
8
+ #
9
+ # Index layout on disk (YAML):
10
+ #
11
+ # <root>/<version>/index/
12
+ # blocks.yml
13
+ # scripts.yml
14
+ #
15
+ # Each file is an array of `{ first_cp:, last_cp:, name: }` hashes,
16
+ # sorted by first_cp, disjoint.
17
+ module IndexBuilder
18
+ class << self
19
+ # Build + persist both indices for a cached version.
20
+ # @param version [String]
21
+ # @return [Array(Index, Index)] blocks_index, scripts_index
22
+ def build(version)
23
+ ucd = load_ucd(version)
24
+ blocks, scripts = build_from_ucd(ucd)
25
+ CacheManager.index_dir(version).mkpath
26
+ blocks.save(CacheManager.blocks_index_path(version))
27
+ scripts.save(CacheManager.scripts_index_path(version))
28
+ [blocks, scripts]
29
+ end
30
+
31
+ # Pure: build both indices from an in-memory Ucd model.
32
+ # @param ucd [Models::Ucd::Ucd]
33
+ # @return [Array(Index, Index)]
34
+ def build_from_ucd(ucd)
35
+ blocks_runs = collect_runs(ucd, :block)
36
+ scripts_runs = collect_runs(ucd, :script)
37
+ [Index.new(to_entries(blocks_runs)), Index.new(to_entries(scripts_runs))]
38
+ end
39
+
40
+ private
41
+
42
+ def load_ucd(version)
43
+ path = CacheManager.ucdxml_path(version)
44
+ xml = File.read(path)
45
+ Models::Ucd::Ucd.from_xml(xml)
46
+ end
47
+
48
+ # Walk all UcdChar entries, group by the given property
49
+ # (:block or :script), and accumulate codepoint ranges per name.
50
+ # Returns Hash<String, Array<[Integer, Integer]>>.
51
+ def collect_runs(ucd, property)
52
+ runs_by_name = Hash.new { |h, k| h[k] = [] }
53
+
54
+ ucd.chars.each do |char|
55
+ name = char.public_send(property)
56
+ next if name.nil? || name.empty?
57
+
58
+ ranges_for_char(char).each do |first, last|
59
+ runs_by_name[name] << [first, last]
60
+ end
61
+ end
62
+
63
+ runs_by_name.each_value { |runs| coalesce!(runs) }
64
+ runs_by_name
65
+ end
66
+
67
+ # Returns Array<[Integer, Integer]> — the codepoint range(s) this
68
+ # char covers.
69
+ def ranges_for_char(char)
70
+ if char.range?
71
+ [[char.first_cp.to_i(16), char.last_cp.to_i(16)]]
72
+ elsif char.cp
73
+ cp_int = char.cp.to_i(16)
74
+ [[cp_int, cp_int]]
75
+ else
76
+ []
77
+ end
78
+ end
79
+
80
+ # Sort + merge adjacent/overlapping ranges in place.
81
+ # Input: Array<[Integer, Integer]>, mutated.
82
+ def coalesce!(runs)
83
+ return if runs.empty?
84
+
85
+ runs.sort!
86
+ merged = [runs.first]
87
+ runs[1..].each do |first, last|
88
+ prev = merged.last
89
+ if first <= prev[1] + 1
90
+ prev[1] = [prev[1], last].max
91
+ else
92
+ merged << [first, last]
93
+ end
94
+ end
95
+ runs.replace(merged)
96
+ end
97
+
98
+ # Flatten {name => [[first,last],...]} into Array<RangeEntry>.
99
+ def to_entries(runs_by_name)
100
+ runs_by_name.flat_map do |name, runs|
101
+ runs.map { |first, last| RangeEntry.new(first, last, name) }
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Ucd
5
+ # Value object representing one row in a run-length-encoded UCD index.
6
+ #
7
+ # Sorted by `first_cp`. Entries within a single Index are disjoint
8
+ # (no overlapping ranges).
9
+ class RangeEntry
10
+ include Comparable
11
+
12
+ attr_reader :first_cp, :last_cp, :name
13
+
14
+ def initialize(first_cp, last_cp, name)
15
+ @first_cp = first_cp
16
+ @last_cp = last_cp
17
+ @name = name
18
+ end
19
+
20
+ def covers?(codepoint)
21
+ codepoint >= @first_cp && codepoint <= @last_cp
22
+ end
23
+
24
+ def size
25
+ @last_cp - @first_cp + 1
26
+ end
27
+
28
+ def <=>(other)
29
+ [@first_cp, @last_cp] <=> [other.first_cp, other.last_cp]
30
+ end
31
+
32
+ def ==(other)
33
+ other.is_a?(RangeEntry) &&
34
+ @first_cp == other.first_cp &&
35
+ @last_cp == other.last_cp &&
36
+ @name == other.name
37
+ end
38
+ alias eql? ==
39
+
40
+ def hash
41
+ [@first_cp, @last_cp, @name].hash
42
+ end
43
+
44
+ # Compact YAML-friendly form.
45
+ def to_h
46
+ { first_cp: @first_cp, last_cp: @last_cp, name: @name }
47
+ end
48
+
49
+ def self.from_h(hash)
50
+ new(hash[:first_cp] || hash["first_cp"],
51
+ hash[:last_cp] || hash["last_cp"],
52
+ hash[:name] || hash["name"])
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Ucd
5
+ # Raised by Ucd::VersionResolver when a user-supplied version string
6
+ # is not in Ucd::Config.known_versions.
7
+ class UnknownVersionError < Ucd::Error; end
8
+ end
9
+ end