fontisan 0.2.17 → 0.2.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (316) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +14 -90
  3. data/README.adoc +257 -1
  4. data/docs/.vitepress/config.ts +68 -8
  5. data/docs/.vitepress/theme/style.css +570 -272
  6. data/docs/CONVERSION_GUIDE.adoc +31 -8
  7. data/docs/EXTRACT_TTC_MIGRATION.md +1 -1
  8. data/docs/WOFF_WOFF2_FORMATS.adoc +53 -0
  9. data/docs/api/conversion-options.md +37 -14
  10. data/docs/cli/audit.md +337 -0
  11. data/docs/cli/convert.md +20 -1
  12. data/docs/cli/index.md +31 -0
  13. data/docs/guide/color.md +1 -1
  14. data/docs/guide/conversion/options.md +32 -3
  15. data/docs/guide/conversion/ttf-otf.md +1 -1
  16. data/docs/guide/conversion/type1.md +1 -1
  17. data/docs/guide/conversion/web.md +91 -32
  18. data/docs/guide/conversion.md +6 -5
  19. data/docs/guide/formats/woff.md +35 -11
  20. data/docs/guide/index.md +2 -2
  21. data/docs/guide/migrations/extract-ttc.md +1 -1
  22. data/docs/guide/quick-start.md +4 -4
  23. data/docs/guide/type1.md +4 -4
  24. data/docs/guide/woff.md +19 -17
  25. data/docs/index.md +2 -0
  26. data/docs/lychee.toml +5 -1
  27. data/docs/package.json +1 -1
  28. data/docs/public/robots.txt +4 -0
  29. data/docs/scripts/post-build.mjs +81 -0
  30. data/lib/fontisan/audit/codepoint_range_coalescer.rb +41 -0
  31. data/lib/fontisan/audit/context.rb +122 -0
  32. data/lib/fontisan/audit/differ.rb +124 -0
  33. data/lib/fontisan/audit/extractors/aggregations.rb +54 -0
  34. data/lib/fontisan/audit/extractors/base.rb +26 -0
  35. data/lib/fontisan/audit/extractors/color_capabilities.rb +141 -0
  36. data/lib/fontisan/audit/extractors/coverage.rb +48 -0
  37. data/lib/fontisan/audit/extractors/hinting.rb +197 -0
  38. data/lib/fontisan/audit/extractors/identity.rb +52 -0
  39. data/lib/fontisan/audit/extractors/language_coverage.rb +37 -0
  40. data/lib/fontisan/audit/extractors/licensing.rb +79 -0
  41. data/lib/fontisan/audit/extractors/metrics.rb +103 -0
  42. data/lib/fontisan/audit/extractors/opentype_layout.rb +69 -0
  43. data/lib/fontisan/audit/extractors/provenance.rb +29 -0
  44. data/lib/fontisan/audit/extractors/style.rb +32 -0
  45. data/lib/fontisan/audit/extractors/variation_detail.rb +99 -0
  46. data/lib/fontisan/audit/extractors.rb +27 -0
  47. data/lib/fontisan/audit/library_aggregator.rb +83 -0
  48. data/lib/fontisan/audit/library_auditor.rb +90 -0
  49. data/lib/fontisan/audit/registry.rb +60 -0
  50. data/lib/fontisan/audit/style_extractor.rb +80 -0
  51. data/lib/fontisan/audit.rb +20 -0
  52. data/lib/fontisan/base_collection.rb +23 -9
  53. data/lib/fontisan/binary/structures.rb +0 -2
  54. data/lib/fontisan/binary.rb +11 -0
  55. data/lib/fontisan/cldr/aggregator.rb +33 -0
  56. data/lib/fontisan/cldr/cache_manager.rb +110 -0
  57. data/lib/fontisan/cldr/config.rb +59 -0
  58. data/lib/fontisan/cldr/download_error.rb +9 -0
  59. data/lib/fontisan/cldr/downloader.rb +79 -0
  60. data/lib/fontisan/cldr/error.rb +8 -0
  61. data/lib/fontisan/cldr/index.rb +64 -0
  62. data/lib/fontisan/cldr/index_builder.rb +72 -0
  63. data/lib/fontisan/cldr/unicode_set_parser.rb +172 -0
  64. data/lib/fontisan/cldr/unknown_version_error.rb +9 -0
  65. data/lib/fontisan/cldr/version_resolver.rb +91 -0
  66. data/lib/fontisan/cldr.rb +23 -0
  67. data/lib/fontisan/cli/cldr_cli.rb +85 -0
  68. data/lib/fontisan/cli/ucd_cli.rb +97 -0
  69. data/lib/fontisan/cli.rb +201 -2
  70. data/lib/fontisan/collection/builder.rb +0 -4
  71. data/lib/fontisan/collection/dfont_builder.rb +0 -4
  72. data/lib/fontisan/collection/shared_logic.rb +0 -2
  73. data/lib/fontisan/collection/writer.rb +0 -3
  74. data/lib/fontisan/collection.rb +15 -0
  75. data/lib/fontisan/commands/audit_command.rb +123 -0
  76. data/lib/fontisan/commands/audit_compare_command.rb +66 -0
  77. data/lib/fontisan/commands/audit_library_command.rb +46 -0
  78. data/lib/fontisan/commands/base_command.rb +0 -3
  79. data/lib/fontisan/commands/convert_command.rb +25 -20
  80. data/lib/fontisan/commands/dump_table_command.rb +0 -3
  81. data/lib/fontisan/commands/export_command.rb +0 -4
  82. data/lib/fontisan/commands/features_command.rb +0 -3
  83. data/lib/fontisan/commands/instance_command.rb +0 -5
  84. data/lib/fontisan/commands/ls_command.rb +0 -6
  85. data/lib/fontisan/commands/optical_size_command.rb +0 -3
  86. data/lib/fontisan/commands/pack_command.rb +0 -5
  87. data/lib/fontisan/commands/scripts_command.rb +0 -2
  88. data/lib/fontisan/commands/subset_command.rb +0 -3
  89. data/lib/fontisan/commands/unicode_command.rb +0 -3
  90. data/lib/fontisan/commands/unpack_command.rb +0 -7
  91. data/lib/fontisan/commands/validate_command.rb +0 -8
  92. data/lib/fontisan/commands/variable_command.rb +0 -3
  93. data/lib/fontisan/commands.rb +29 -0
  94. data/lib/fontisan/config/cldr.yml +22 -0
  95. data/lib/fontisan/config/conversion_matrix.yml +38 -0
  96. data/lib/fontisan/config/ucd.yml +23 -0
  97. data/lib/fontisan/constants.rb +19 -0
  98. data/lib/fontisan/conversion_options.rb +30 -19
  99. data/lib/fontisan/converters/cff_table_builder.rb +0 -3
  100. data/lib/fontisan/converters/collection_converter.rb +0 -8
  101. data/lib/fontisan/converters/conversion_strategy.rb +161 -46
  102. data/lib/fontisan/converters/format_converter.rb +143 -32
  103. data/lib/fontisan/converters/glyf_table_builder.rb +0 -2
  104. data/lib/fontisan/converters/outline_converter.rb +0 -19
  105. data/lib/fontisan/converters/outline_extraction.rb +0 -5
  106. data/lib/fontisan/converters/outline_optimizer.rb +0 -5
  107. data/lib/fontisan/converters/svg_generator.rb +0 -4
  108. data/lib/fontisan/converters/table_copier.rb +0 -2
  109. data/lib/fontisan/converters/type1_converter.rb +0 -11
  110. data/lib/fontisan/converters/woff2_encoder.rb +49 -20
  111. data/lib/fontisan/converters/woff_writer.rb +211 -282
  112. data/lib/fontisan/converters.rb +21 -0
  113. data/lib/fontisan/dfont_collection.rb +29 -10
  114. data/lib/fontisan/export/exporter.rb +0 -6
  115. data/lib/fontisan/export/transformers/font_to_ttx.rb +0 -9
  116. data/lib/fontisan/export/transformers/head_transformer.rb +0 -2
  117. data/lib/fontisan/export/transformers/hhea_transformer.rb +0 -2
  118. data/lib/fontisan/export/transformers/maxp_transformer.rb +0 -2
  119. data/lib/fontisan/export/transformers/name_transformer.rb +0 -2
  120. data/lib/fontisan/export/transformers/os2_transformer.rb +0 -2
  121. data/lib/fontisan/export/transformers/post_transformer.rb +0 -2
  122. data/lib/fontisan/export/transformers.rb +17 -0
  123. data/lib/fontisan/export.rb +13 -0
  124. data/lib/fontisan/font_loader.rb +14 -19
  125. data/lib/fontisan/font_writer.rb +0 -1
  126. data/lib/fontisan/formatters/audit_diff_text_renderer.rb +122 -0
  127. data/lib/fontisan/formatters/audit_text_renderer.rb +324 -0
  128. data/lib/fontisan/formatters/library_summary_text_renderer.rb +99 -0
  129. data/lib/fontisan/formatters/text_formatter.rb +6 -0
  130. data/lib/fontisan/formatters.rb +12 -0
  131. data/lib/fontisan/hints/hint_converter.rb +0 -1
  132. data/lib/fontisan/hints/postscript_hint_applier.rb +0 -9
  133. data/lib/fontisan/hints/postscript_hint_extractor.rb +0 -2
  134. data/lib/fontisan/hints/truetype_hint_extractor.rb +0 -2
  135. data/lib/fontisan/hints.rb +16 -0
  136. data/lib/fontisan/metrics_calculator.rb +0 -2
  137. data/lib/fontisan/models/all_scripts_features_info.rb +0 -1
  138. data/lib/fontisan/models/audit/audit_axis.rb +30 -0
  139. data/lib/fontisan/models/audit/audit_block.rb +32 -0
  140. data/lib/fontisan/models/audit/audit_diff.rb +77 -0
  141. data/lib/fontisan/models/audit/audit_report.rb +153 -0
  142. data/lib/fontisan/models/audit/codepoint_range.rb +40 -0
  143. data/lib/fontisan/models/audit/codepoint_set_diff.rb +34 -0
  144. data/lib/fontisan/models/audit/color_capabilities.rb +93 -0
  145. data/lib/fontisan/models/audit/duplicate_group.rb +23 -0
  146. data/lib/fontisan/models/audit/embedding_type.rb +76 -0
  147. data/lib/fontisan/models/audit/field_change.rb +28 -0
  148. data/lib/fontisan/models/audit/fs_selection_flags.rb +61 -0
  149. data/lib/fontisan/models/audit/gasp_range.rb +63 -0
  150. data/lib/fontisan/models/audit/hinting.rb +93 -0
  151. data/lib/fontisan/models/audit/library_summary.rb +40 -0
  152. data/lib/fontisan/models/audit/licensing.rb +48 -0
  153. data/lib/fontisan/models/audit/metrics.rb +111 -0
  154. data/lib/fontisan/models/audit/named_instance.rb +41 -0
  155. data/lib/fontisan/models/audit/opentype_layout.rb +40 -0
  156. data/lib/fontisan/models/audit/script_coverage_row.rb +26 -0
  157. data/lib/fontisan/models/audit/script_features.rb +28 -0
  158. data/lib/fontisan/models/audit/variation_detail.rb +44 -0
  159. data/lib/fontisan/models/audit.rb +33 -0
  160. data/lib/fontisan/models/cldr/language_coverage.rb +31 -0
  161. data/lib/fontisan/models/cldr.rb +12 -0
  162. data/lib/fontisan/models/collection_brief_info.rb +0 -1
  163. data/lib/fontisan/models/collection_info.rb +0 -2
  164. data/lib/fontisan/models/collection_list_info.rb +0 -1
  165. data/lib/fontisan/models/collection_validation_report.rb +0 -2
  166. data/lib/fontisan/models/color_glyph.rb +0 -1
  167. data/lib/fontisan/models/font_report.rb +0 -1
  168. data/lib/fontisan/models/ttx/tables.rb +21 -0
  169. data/lib/fontisan/models/ttx/ttfont.rb +0 -8
  170. data/lib/fontisan/models/ttx.rb +14 -0
  171. data/lib/fontisan/models/ucd/ucd.rb +38 -0
  172. data/lib/fontisan/models/ucd/ucd_char.rb +67 -0
  173. data/lib/fontisan/models/ucd.rb +19 -0
  174. data/lib/fontisan/models.rb +47 -0
  175. data/lib/fontisan/open_type_collection.rb +6 -5
  176. data/lib/fontisan/open_type_font.rb +8 -2
  177. data/lib/fontisan/open_type_font_extensions.rb +9 -9
  178. data/lib/fontisan/optimizers/pattern_analyzer.rb +0 -1
  179. data/lib/fontisan/optimizers.rb +14 -0
  180. data/lib/fontisan/outline_extractor.rb +0 -2
  181. data/lib/fontisan/parsers/dfont_parser.rb +0 -1
  182. data/lib/fontisan/parsers.rb +10 -0
  183. data/lib/fontisan/pipeline/format_detector.rb +29 -102
  184. data/lib/fontisan/pipeline/output_writer.rb +11 -9
  185. data/lib/fontisan/pipeline/strategies/instance_strategy.rb +0 -4
  186. data/lib/fontisan/pipeline/strategies/named_strategy.rb +0 -4
  187. data/lib/fontisan/pipeline/strategies/preserve_strategy.rb +0 -2
  188. data/lib/fontisan/pipeline/strategies.rb +14 -0
  189. data/lib/fontisan/pipeline/transformation_pipeline.rb +0 -7
  190. data/lib/fontisan/pipeline/variation_resolver.rb +0 -7
  191. data/lib/fontisan/pipeline.rb +13 -0
  192. data/lib/fontisan/sfnt_font.rb +29 -14
  193. data/lib/fontisan/sfnt_table.rb +0 -4
  194. data/lib/fontisan/subset/builder.rb +0 -6
  195. data/lib/fontisan/subset.rb +13 -0
  196. data/lib/fontisan/svg/font_generator.rb +0 -4
  197. data/lib/fontisan/svg/glyph_generator.rb +0 -2
  198. data/lib/fontisan/svg.rb +12 -0
  199. data/lib/fontisan/tables/cbdt.rb +0 -1
  200. data/lib/fontisan/tables/cblc.rb +0 -1
  201. data/lib/fontisan/tables/cff/charset.rb +0 -1
  202. data/lib/fontisan/tables/cff/charstring.rb +0 -1
  203. data/lib/fontisan/tables/cff/charstring_rebuilder.rb +0 -4
  204. data/lib/fontisan/tables/cff/charstrings_index.rb +0 -3
  205. data/lib/fontisan/tables/cff/dict.rb +0 -1
  206. data/lib/fontisan/tables/cff/encoding.rb +0 -1
  207. data/lib/fontisan/tables/cff/header.rb +0 -2
  208. data/lib/fontisan/tables/cff/hint_operation_injector.rb +0 -2
  209. data/lib/fontisan/tables/cff/index.rb +0 -1
  210. data/lib/fontisan/tables/cff/private_dict.rb +0 -2
  211. data/lib/fontisan/tables/cff/private_dict_writer.rb +0 -2
  212. data/lib/fontisan/tables/cff/table_builder.rb +0 -6
  213. data/lib/fontisan/tables/cff/top_dict.rb +0 -2
  214. data/lib/fontisan/tables/cff.rb +22 -15
  215. data/lib/fontisan/tables/cff2/charstring_parser.rb +0 -2
  216. data/lib/fontisan/tables/cff2/table_builder.rb +0 -11
  217. data/lib/fontisan/tables/cff2/table_reader.rb +0 -2
  218. data/lib/fontisan/tables/cff2.rb +13 -14
  219. data/lib/fontisan/tables/cmap.rb +24 -2
  220. data/lib/fontisan/tables/cmap_table.rb +0 -3
  221. data/lib/fontisan/tables/colr.rb +0 -1
  222. data/lib/fontisan/tables/cpal.rb +0 -1
  223. data/lib/fontisan/tables/cvar.rb +0 -2
  224. data/lib/fontisan/tables/fvar.rb +0 -1
  225. data/lib/fontisan/tables/glyf/compound_glyph_resolver.rb +0 -2
  226. data/lib/fontisan/tables/glyf/glyph_builder.rb +0 -3
  227. data/lib/fontisan/tables/glyf.rb +0 -6
  228. data/lib/fontisan/tables/glyf_table.rb +0 -3
  229. data/lib/fontisan/tables/gpos.rb +0 -2
  230. data/lib/fontisan/tables/gsub.rb +0 -2
  231. data/lib/fontisan/tables/gvar.rb +0 -2
  232. data/lib/fontisan/tables/head.rb +0 -2
  233. data/lib/fontisan/tables/head_table.rb +0 -3
  234. data/lib/fontisan/tables/hhea.rb +0 -2
  235. data/lib/fontisan/tables/hhea_table.rb +0 -3
  236. data/lib/fontisan/tables/hmtx.rb +0 -2
  237. data/lib/fontisan/tables/hmtx_table.rb +0 -3
  238. data/lib/fontisan/tables/hvar.rb +0 -3
  239. data/lib/fontisan/tables/loca.rb +0 -2
  240. data/lib/fontisan/tables/loca_table.rb +0 -3
  241. data/lib/fontisan/tables/maxp.rb +0 -2
  242. data/lib/fontisan/tables/maxp_table.rb +0 -3
  243. data/lib/fontisan/tables/mvar.rb +0 -3
  244. data/lib/fontisan/tables/name.rb +0 -2
  245. data/lib/fontisan/tables/name_table.rb +0 -3
  246. data/lib/fontisan/tables/os2_table.rb +0 -3
  247. data/lib/fontisan/tables/post_table.rb +0 -3
  248. data/lib/fontisan/tables/sbix.rb +0 -1
  249. data/lib/fontisan/tables/svg.rb +0 -1
  250. data/lib/fontisan/tables/variation_common.rb +0 -1
  251. data/lib/fontisan/tables/vvar.rb +0 -3
  252. data/lib/fontisan/tables.rb +54 -0
  253. data/lib/fontisan/true_type_collection.rb +6 -14
  254. data/lib/fontisan/true_type_font.rb +8 -2
  255. data/lib/fontisan/true_type_font_extensions.rb +9 -9
  256. data/lib/fontisan/type1/afm_generator.rb +0 -4
  257. data/lib/fontisan/type1/conversion_options.rb +0 -2
  258. data/lib/fontisan/type1/encodings.rb +0 -2
  259. data/lib/fontisan/type1/generator.rb +0 -8
  260. data/lib/fontisan/type1/pfa_generator.rb +0 -3
  261. data/lib/fontisan/type1/pfb_generator.rb +0 -5
  262. data/lib/fontisan/type1/pfm_generator.rb +0 -4
  263. data/lib/fontisan/type1.rb +42 -69
  264. data/lib/fontisan/type1_font.rb +40 -11
  265. data/lib/fontisan/ucd/aggregator.rb +73 -0
  266. data/lib/fontisan/ucd/cache_manager.rb +111 -0
  267. data/lib/fontisan/ucd/config.rb +59 -0
  268. data/lib/fontisan/ucd/download_error.rb +9 -0
  269. data/lib/fontisan/ucd/downloader.rb +88 -0
  270. data/lib/fontisan/ucd/error.rb +8 -0
  271. data/lib/fontisan/ucd/index.rb +103 -0
  272. data/lib/fontisan/ucd/index_builder.rb +107 -0
  273. data/lib/fontisan/ucd/range_entry.rb +56 -0
  274. data/lib/fontisan/ucd/unknown_version_error.rb +9 -0
  275. data/lib/fontisan/ucd/version_resolver.rb +79 -0
  276. data/lib/fontisan/ucd.rb +23 -0
  277. data/lib/fontisan/utilities/checksum_calculator.rb +0 -1
  278. data/lib/fontisan/utilities.rb +10 -0
  279. data/lib/fontisan/utils.rb +10 -0
  280. data/lib/fontisan/validation/collection_validator.rb +0 -2
  281. data/lib/fontisan/validation.rb +9 -0
  282. data/lib/fontisan/validators/basic_validator.rb +0 -2
  283. data/lib/fontisan/validators/font_book_validator.rb +0 -2
  284. data/lib/fontisan/validators/opentype_validator.rb +0 -2
  285. data/lib/fontisan/validators/profile_loader.rb +0 -5
  286. data/lib/fontisan/validators/validator.rb +0 -2
  287. data/lib/fontisan/validators/web_font_validator.rb +0 -2
  288. data/lib/fontisan/validators.rb +14 -0
  289. data/lib/fontisan/variable/delta_applicator.rb +0 -4
  290. data/lib/fontisan/variable/instancer.rb +0 -3
  291. data/lib/fontisan/variable/static_font_builder.rb +0 -3
  292. data/lib/fontisan/variable.rb +16 -0
  293. data/lib/fontisan/variation/blend_applier.rb +0 -2
  294. data/lib/fontisan/variation/cache.rb +0 -2
  295. data/lib/fontisan/variation/converter.rb +0 -3
  296. data/lib/fontisan/variation/data_extractor.rb +0 -2
  297. data/lib/fontisan/variation/delta_applier.rb +0 -5
  298. data/lib/fontisan/variation/inspector.rb +0 -1
  299. data/lib/fontisan/variation/instance_generator.rb +0 -6
  300. data/lib/fontisan/variation/instance_writer.rb +0 -5
  301. data/lib/fontisan/variation/metrics_adjuster.rb +0 -4
  302. data/lib/fontisan/variation/optimizer.rb +0 -3
  303. data/lib/fontisan/variation/parallel_generator.rb +0 -3
  304. data/lib/fontisan/variation/subsetter.rb +0 -4
  305. data/lib/fontisan/variation/tuple_variation_header.rb +0 -2
  306. data/lib/fontisan/variation/variable_svg_generator.rb +0 -3
  307. data/lib/fontisan/variation/variation_context.rb +0 -3
  308. data/lib/fontisan/variation/variation_preserver.rb +0 -3
  309. data/lib/fontisan/variation.rb +31 -0
  310. data/lib/fontisan/version.rb +1 -1
  311. data/lib/fontisan/woff2.rb +13 -0
  312. data/lib/fontisan/woff2_font.rb +31 -9
  313. data/lib/fontisan/woff_font.rb +31 -2
  314. data/lib/fontisan.rb +124 -196
  315. metadata +114 -7
  316. data/fontisan.gemspec +0 -48
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "uri"
5
+ require "tempfile"
6
+ require "zip"
7
+
8
+ module Fontisan
9
+ module Cldr
10
+ # Fetches CLDR JSON archives from unicode-org/cldr-json GitHub releases
11
+ # and unpacks them into the cache.
12
+ #
13
+ # Single entry point: `Downloader.download(version, force:)`.
14
+ # Idempotent unless `force: true`. Returns the path to the extracted
15
+ # `main/` characters directory.
16
+ module Downloader
17
+ class << self
18
+ # Download and unpack CLDR JSON for `version`.
19
+ #
20
+ # @param version [String] e.g. "46.0.0"
21
+ # @param force [Boolean] if false and cache already has the
22
+ # extracted files, return without re-fetching.
23
+ # @return [Pathname] path to the extracted main/ characters dir
24
+ # @raise [DownloadError] on HTTP failure or zip extraction failure
25
+ def download(version, force: false)
26
+ target = CacheManager.characters_main_dir(version)
27
+ return target if target.exist? && !force
28
+
29
+ CacheManager.ensure_version_dir!(version)
30
+ zip_data = fetch_zip(version)
31
+ extract_archive(zip_data, CacheManager.json_dir(version))
32
+ target
33
+ end
34
+
35
+ private
36
+
37
+ def fetch_zip(version)
38
+ uri = URI(Config.archive_url_for(version))
39
+ response = Net::HTTP.get_response(uri)
40
+ unless response.is_a?(Net::HTTPSuccess)
41
+ raise DownloadError,
42
+ "GET #{uri} returned HTTP #{response.code}: #{response.message}"
43
+ end
44
+
45
+ body = response.body
46
+ if body.nil? || body.empty?
47
+ raise DownloadError, "GET #{uri} returned an empty body"
48
+ end
49
+
50
+ body
51
+ rescue StandardError => e
52
+ raise e if e.is_a?(DownloadError)
53
+
54
+ raise DownloadError, "Failed to fetch #{uri}: #{e.message}"
55
+ end
56
+
57
+ def extract_archive(zip_data, target_dir)
58
+ Tempfile.create(["fontisan-cldr", ".zip"]) do |tmp|
59
+ tmp.binmode
60
+ tmp.write(zip_data)
61
+ tmp.flush
62
+ tmp.rewind
63
+
64
+ target_dir.mkpath
65
+ Zip::File.open(tmp.path) do |zip|
66
+ zip.each do |entry|
67
+ next unless entry.file?
68
+
69
+ out = target_dir.join(entry.name)
70
+ out.dirname.mkpath
71
+ entry.extract(out.to_s) { true } # overwrite
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Cldr
5
+ # Base error class for all CLDR-related failures.
6
+ class Error < StandardError; end
7
+ end
8
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Cldr
5
+ # In-memory per-language codepoint lookup.
6
+ #
7
+ # Loads a YAML index of `{language: [codepoint, ...]}`. Each language's
8
+ # codepoints are stored as a Set<Integer> for O(1) intersection checks.
9
+ #
10
+ # Used by {Cldr::Aggregator} to compute per-language coverage %.
11
+ class Index
12
+ include Enumerable
13
+
14
+ # @param entries [Hash{String=>Set<Integer>, Array<Integer>}]
15
+ def initialize(entries = {})
16
+ @entries = entries.transform_values do |cps|
17
+ cps.is_a?(Set) ? cps : Set.new(cps)
18
+ end
19
+ end
20
+
21
+ # @return [Hash{String=>Set<Integer>}]
22
+ attr_reader :entries
23
+
24
+ def each(&)
25
+ @entries.each(&)
26
+ end
27
+
28
+ def size
29
+ @entries.size
30
+ end
31
+
32
+ def languages
33
+ @entries.keys.sort
34
+ end
35
+
36
+ # @param language [String]
37
+ # @return [Set<Integer>, nil]
38
+ def lookup(language)
39
+ @entries[language]
40
+ end
41
+
42
+ def include?(language)
43
+ @entries.key?(language)
44
+ end
45
+
46
+ # Serialize to a YAML file.
47
+ # @param path [String, Pathname]
48
+ # @return [void]
49
+ def save(path)
50
+ File.open(path, "w") do |file|
51
+ YAML.dump(@entries.transform_values(&:sort), file)
52
+ end
53
+ end
54
+
55
+ # Load from a YAML file previously written by #save.
56
+ # @param path [String, Pathname]
57
+ # @return [Index]
58
+ def self.load(path)
59
+ hash = YAML.load_file(path)
60
+ new(hash)
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module Fontisan
6
+ module Cldr
7
+ # Builds the per-language codepoint index from a cached CLDR JSON
8
+ # archive and persists it as a YAML file for future Index loads.
9
+ #
10
+ # Walks every `main/<lang>/characters.json` file under the cached
11
+ # archive, extracts `exemplarCharacters` (plus auxiliary and index
12
+ # sets when present), parses each via {UnicodeSetParser}, and unions
13
+ # the result into a single codepoint set per language.
14
+ module IndexBuilder
15
+ class << self
16
+ # Build + persist the languages index for a cached version.
17
+ # @param version [String]
18
+ # @return [Index]
19
+ def build(version)
20
+ entries = collect_from_cache(version)
21
+ CacheManager.index_dir(version).mkpath
22
+ index = Index.new(entries)
23
+ index.save(CacheManager.languages_index_path(version))
24
+ index
25
+ end
26
+
27
+ # Pure: build an Index from a hash of `language => exemplar_string`.
28
+ # @param exemplars_by_lang [Hash{String=>String}]
29
+ # @return [Index]
30
+ def build_from_exemplars(exemplars_by_lang)
31
+ entries = exemplars_by_lang.transform_values do |set_str|
32
+ set_str.nil? ? Set.new : Set.new(UnicodeSetParser.call(set_str))
33
+ end
34
+ Index.new(entries)
35
+ end
36
+
37
+ private
38
+
39
+ def collect_from_cache(version)
40
+ main_dir = CacheManager.characters_main_dir(version)
41
+ return {} unless main_dir.exist?
42
+
43
+ main_dir.children.select(&:directory?).each_with_object({}) do |lang_dir, hash|
44
+ file = lang_dir.join("characters.json")
45
+ next unless file.exist?
46
+
47
+ lang = lang_dir.basename.to_s
48
+ hash[lang] = parse_language_file(file)
49
+ end
50
+ end
51
+
52
+ def parse_language_file(file)
53
+ data = JSON.parse(file.read)
54
+ lang_key = data.dig("main", "locale") ||
55
+ data["main"]&.keys&.first
56
+ return Set.new unless lang_key
57
+
58
+ chars_node = data.dig("main", lang_key, "characters") || {}
59
+ sets = %w[exemplarCharacters auxiliary exemplarCharactersIndex
60
+ exemplarCharactersPunctuation].filter_map do |field|
61
+ chars_node[field]
62
+ end
63
+ sets.inject(Set.new) do |acc, set_str|
64
+ acc | Set.new(UnicodeSetParser.call(set_str))
65
+ rescue UnicodeSetParser::ParseError
66
+ acc
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Cldr
5
+ # Parses ICU UnicodeSet bracket notation as used in CLDR
6
+ # exemplarCharacters fields.
7
+ #
8
+ # Supported syntax (sufficient for exemplar sets):
9
+ # - Single chars: `a`, `à`, any BMP or supplementary codepoint
10
+ # - Ranges: `a-z`, `A-Z`
11
+ # - Escapes: `\uXXXX`, `\UXXXXXXXX`, `\u{XXXX...}`
12
+ # - Negation: `[^...]` (inverts against 0..0x10FFFF)
13
+ #
14
+ # Unsupported (CLDR exemplars do not use these; raise ParseError):
15
+ # - Property syntax `[:script=Latin:]`
16
+ # - Set operations `[a-z & [b-c]]`
17
+ # - Nested sets `[a[b-c]]`
18
+ # - Named sequences `{a b c}`
19
+ #
20
+ # Output: sorted, deduplicated Array<Integer> of codepoints.
21
+ module UnicodeSetParser
22
+ class ParseError < Cldr::Error; end
23
+ MAX_CODEPOINT = 0x10FFFF
24
+ private_constant :MAX_CODEPOINT
25
+
26
+ module_function
27
+
28
+ # @param set_string [String] bracketed ICU UnicodeSet, e.g. "[a-zà]"
29
+ # @return [Array<Integer>] sorted, deduplicated codepoints
30
+ def call(set_string)
31
+ raise ParseError, "input must be bracketed" unless set_string.start_with?("[") && set_string.end_with?("]")
32
+
33
+ body = set_string[1..-2]
34
+ negate = body.start_with?("^")
35
+ body = body[1..] if negate
36
+
37
+ cps = parse_body(body)
38
+ cps = invert(cps) if negate
39
+ cps.sort.uniq
40
+ end
41
+
42
+ # Walk the body char by char, emitting codepoints and ranges.
43
+ def parse_body(body)
44
+ cps = []
45
+ chars = body.chars.to_a
46
+ i = 0
47
+ prev_cp = nil
48
+
49
+ while i < chars.length
50
+ ch = chars[i]
51
+
52
+ case ch
53
+ when "\\"
54
+ cp, advance = parse_escape(chars, i)
55
+ cps << cp
56
+ prev_cp = cp
57
+ i += advance
58
+ when "-"
59
+ raise ParseError, "dangling '-' at start" if prev_cp.nil?
60
+ raise ParseError, "dangling '-' at end" if i + 1 >= chars.length
61
+
62
+ next_cp, advance = read_next_codepoint(chars, i + 1)
63
+ raise ParseError, "range with no upper bound" if next_cp.nil?
64
+
65
+ cps.concat(((prev_cp + 1)..next_cp).to_a)
66
+ prev_cp = next_cp
67
+ i += 1 + advance
68
+ when "[", "]"
69
+ raise ParseError, "nested set syntax is not supported"
70
+ when "{"
71
+ raise ParseError, "named sequences ({...}) are not supported"
72
+ when ":"
73
+ raise ParseError, "property syntax ([:...:]) is not supported"
74
+ else
75
+ cps << ch.ord
76
+ prev_cp = ch.ord
77
+ i += 1
78
+ end
79
+ end
80
+
81
+ cps
82
+ end
83
+ private_class_method :parse_body
84
+
85
+ # Read the next codepoint starting at index `start`. Handles escapes.
86
+ # @return [Array(Integer, Integer)] codepoint + chars consumed, or
87
+ # [nil, 0] if no codepoint is available.
88
+ def read_next_codepoint(chars, start)
89
+ return [nil, 0] if start >= chars.length
90
+
91
+ ch = chars[start]
92
+ if ch == "\\"
93
+ parse_escape(chars, start)
94
+ else
95
+ [ch.ord, 1]
96
+ end
97
+ end
98
+ private_class_method :read_next_codepoint
99
+
100
+ # Parse a backslash escape sequence.
101
+ # Supports \uXXXX, \UXXXXXXXX, \u{XXXX...}, and standard backslash
102
+ # escapes (\\, \[, \], \-, \^).
103
+ # @return [Array(Integer, Integer)] codepoint + chars consumed
104
+ def parse_escape(chars, start)
105
+ # chars[start] is "\\"
106
+ return [0, 1] if start + 1 >= chars.length
107
+
108
+ marker = chars[start + 1]
109
+ case marker
110
+ when "u"
111
+ brace_form(chars, start) || four_hex(chars, start, "u")
112
+ when "U"
113
+ eight_hex(chars, start)
114
+ when "\\"
115
+ [0x5C, 2]
116
+ when "[", "]", "-", "^"
117
+ [marker.ord, 2]
118
+ else
119
+ raise ParseError, "unknown escape sequence \\#{marker}"
120
+ end
121
+ end
122
+ private_class_method :parse_escape
123
+
124
+ def brace_form(chars, start)
125
+ return nil unless chars[start + 2] == "{"
126
+
127
+ # \u{XXX...} variable hex
128
+ end_idx = (start + 3..).find { |j| j >= chars.length || chars[j] == "}" }
129
+ raise ParseError, "unclosed \\u{ escape" if end_idx.nil? || chars[end_idx] != "}"
130
+
131
+ hex = chars[(start + 3)...end_idx].join
132
+ cp = hex.to_i(16)
133
+ raise ParseError, "\\u{ escape with no digits" if cp.zero? && hex.empty?
134
+
135
+ [cp, (end_idx - start) + 1]
136
+ end
137
+ private_class_method :brace_form
138
+
139
+ def four_hex(chars, start, marker)
140
+ # \uXXXX — exactly 4 hex digits
141
+ hex = chars[(start + 2), 4]&.join
142
+ raise ParseError, "truncated \\#{marker} escape" if hex.nil? || hex.length < 4
143
+
144
+ cp = hex.to_i(16)
145
+ raise ParseError, "\\#{marker} escape with non-hex digits" if cp.zero? && !hex.match?(/\A0+\z/)
146
+
147
+ [cp, 6]
148
+ end
149
+ private_class_method :four_hex
150
+
151
+ def eight_hex(chars, start)
152
+ # \UXXXXXXXX — exactly 8 hex digits
153
+ hex = chars[(start + 2), 8]&.join
154
+ raise ParseError, "truncated \\U escape" if hex.nil? || hex.length < 8
155
+
156
+ cp = hex.to_i(16)
157
+ raise ParseError, "\\U escape with non-hex digits" if cp.zero? && !hex.match?(/\A0+\z/)
158
+
159
+ [cp, 10]
160
+ end
161
+ private_class_method :eight_hex
162
+
163
+ def invert(cps)
164
+ set = cps.to_set
165
+ (0..MAX_CODEPOINT).each_with_object([]) do |cp, arr|
166
+ arr << cp unless set.include?(cp)
167
+ end
168
+ end
169
+ private_class_method :invert
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Fontisan
4
+ module Cldr
5
+ # Raised by Cldr::VersionResolver when a user-supplied version string
6
+ # is not in Cldr::Config.known_versions.
7
+ class UnknownVersionError < Cldr::Error; end
8
+ end
9
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "uri"
5
+ require "json"
6
+ require "rubygems"
7
+
8
+ module Fontisan
9
+ module Cldr
10
+ # Resolves a user-supplied version intent to a concrete CLDR version.
11
+ #
12
+ # Mirrors {Ucd::VersionResolver}. Three input modes:
13
+ #
14
+ # resolve(nil) # default_version from config
15
+ # resolve(:default) # default_version from config
16
+ # resolve("46.0.0") # explicit; validated against known_versions
17
+ # resolve(:latest) # probes GitHub releases, picks highest;
18
+ # # falls back to default on failure
19
+ module VersionResolver
20
+ GITHUB_RELEASE_TAG = %r{ref/tags/(\d+(?:\.\d+)+)}
21
+ private_constant :GITHUB_RELEASE_TAG
22
+
23
+ class << self
24
+ # @param intent [nil, :default, :latest, String]
25
+ # @return [String] a concrete version string
26
+ def resolve(intent)
27
+ case intent
28
+ when nil, :default
29
+ Config.default_version
30
+ when :latest
31
+ probe_latest
32
+ else
33
+ validate!(intent)
34
+ intent
35
+ end
36
+ end
37
+
38
+ # Raise UnknownVersionError unless `version` is in known_versions.
39
+ # @param version [String]
40
+ # @return [void]
41
+ def validate!(version)
42
+ return if Config.known?(version)
43
+
44
+ raise UnknownVersionError,
45
+ "CLDR version #{version.inspect} is not recognized. " \
46
+ "Known versions: #{Config.known_versions.join(', ')}"
47
+ end
48
+
49
+ private
50
+
51
+ # Best-effort probe of the GitHub releases API for cldr-json.
52
+ # Returns the highest semver found among tagged releases, or
53
+ # Config.default_version on any failure.
54
+ def probe_latest
55
+ versions = fetch_release_versions
56
+ return fallback_latest("releases listing was empty") if versions.empty?
57
+
58
+ highest = versions.max_by { |v| Gem::Version.new(v) }
59
+ if Config.known?(highest)
60
+ highest
61
+ else
62
+ fallback_latest("#{highest.inspect} is not in known_versions; using default")
63
+ end
64
+ rescue StandardError => e
65
+ fallback_latest(e.message)
66
+ end
67
+
68
+ def fallback_latest(reason)
69
+ warn "Cldr::VersionResolver: --latest probe failed (#{reason}); " \
70
+ "falling back to default #{Config.default_version.inspect}"
71
+ Config.default_version
72
+ end
73
+
74
+ def fetch_release_versions
75
+ uri = URI(Config.listing_url)
76
+ response = Net::HTTP.get_response(uri)
77
+ return [] unless response.is_a?(Net::HTTPSuccess)
78
+
79
+ releases = JSON.parse(response.body || "[]")
80
+ releases.filter_map do |release|
81
+ tag = release["tag_name"]
82
+ next unless tag
83
+
84
+ match = tag.match(/\A(\d+(?:\.\d+)+)\z/)
85
+ match && match[1]
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Namespace hub for CLDR (Common Locale Data Repository) support.
4
+ #
5
+ # Provides per-language exemplar character sets so the audit can
6
+ # compute "this font covers X% of language Y". All Cldr::* constants
7
+ # are autoloaded from here.
8
+
9
+ module Fontisan
10
+ module Cldr
11
+ autoload :Config, "fontisan/cldr/config"
12
+ autoload :CacheManager, "fontisan/cldr/cache_manager"
13
+ autoload :Downloader, "fontisan/cldr/downloader"
14
+ autoload :VersionResolver, "fontisan/cldr/version_resolver"
15
+ autoload :IndexBuilder, "fontisan/cldr/index_builder"
16
+ autoload :Index, "fontisan/cldr/index"
17
+ autoload :Aggregator, "fontisan/cldr/aggregator"
18
+ autoload :UnicodeSetParser, "fontisan/cldr/unicode_set_parser"
19
+ autoload :Error, "fontisan/cldr/error"
20
+ autoload :DownloadError, "fontisan/cldr/download_error"
21
+ autoload :UnknownVersionError, "fontisan/cldr/unknown_version_error"
22
+ end
23
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thor"
4
+
5
+ module Fontisan
6
+ # Thor subcommand for managing the local CLDR (Common Locale Data
7
+ # Repository) cache used by `fontisan audit` for per-language coverage.
8
+ #
9
+ # fontisan cldr download [VERSION] fetch + index CLDR exemplars
10
+ # fontisan cldr status show what's cached
11
+ # fontisan cldr path [VERSION] print local cache path
12
+ # fontisan cldr list list known versions
13
+ # fontisan cldr remove VERSION delete a cached version
14
+ #
15
+ # With no arguments, `download` resolves the configured default version
16
+ # (see lib/fontisan/config/cldr.yml).
17
+ class CldrCli < Thor
18
+ desc "download [VERSION]",
19
+ "Download and index CLDR exemplar characters (default: configured default version)"
20
+ option :force, type: :boolean, default: false,
21
+ desc: "Re-download even if already cached"
22
+ option :latest, type: :boolean, default: false,
23
+ desc: "Probe GitHub releases for the latest version"
24
+ def download(version = nil)
25
+ intent = resolve_intent(version, options[:latest])
26
+ actual = Cldr::VersionResolver.resolve(intent)
27
+
28
+ Cldr::Downloader.download(actual, force: options[:force])
29
+ Cldr::IndexBuilder.build(actual) unless index_present?(actual)
30
+ puts "CLDR #{actual} ready at: #{Cldr::CacheManager.version_dir(actual)}"
31
+ rescue Cldr::Error => e
32
+ warn "ERROR: #{e.message}"
33
+ exit 1
34
+ end
35
+
36
+ desc "status", "Show cached CLDR versions and default version"
37
+ def status
38
+ cached = Cldr::CacheManager.cached_versions
39
+ puts "Default version: #{Cldr::Config.default_version}"
40
+ puts "Cache root: #{Cldr::CacheManager.root}"
41
+ puts "Cached versions: #{cached.empty? ? '(none)' : cached.join(', ')}"
42
+ end
43
+
44
+ desc "path [VERSION]", "Print local cache directory for a version"
45
+ def path(version = nil)
46
+ actual = Cldr::VersionResolver.resolve(version)
47
+ puts Cldr::CacheManager.version_dir(actual)
48
+ rescue Cldr::UnknownVersionError => e
49
+ warn "ERROR: #{e.message}"
50
+ exit 1
51
+ end
52
+
53
+ desc "list", "List CLDR versions known to this Fontisan release"
54
+ def list
55
+ Cldr::Config.known_versions.each { |v| puts v }
56
+ end
57
+
58
+ desc "remove VERSION", "Remove a cached CLDR version"
59
+ def remove(version)
60
+ Cldr::VersionResolver.validate!(version)
61
+ unless Cldr::CacheManager.cached?(version)
62
+ warn "Version #{version} is not cached; nothing to remove."
63
+ return
64
+ end
65
+
66
+ Cldr::CacheManager.remove_version(version)
67
+ puts "Removed CLDR #{version}."
68
+ rescue Cldr::UnknownVersionError => e
69
+ warn "ERROR: #{e.message}"
70
+ exit 1
71
+ end
72
+
73
+ private
74
+
75
+ def resolve_intent(version, latest)
76
+ return :latest if latest && version.nil?
77
+
78
+ version
79
+ end
80
+
81
+ def index_present?(version)
82
+ Cldr::CacheManager.languages_index_path(version).exist?
83
+ end
84
+ end
85
+ end