fontisan 0.2.22 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +6 -0
  3. data/.rubocop_todo.yml +93 -17
  4. data/CHANGELOG.md +12 -2
  5. data/README.adoc +6 -210
  6. data/fontisan.gemspec +48 -0
  7. data/lib/fontisan/cldr/unicode_set_parser.rb +23 -6
  8. data/lib/fontisan/cldr/version_resolver.rb +1 -1
  9. data/lib/fontisan/cli.rb +0 -170
  10. data/lib/fontisan/commands.rb +0 -3
  11. data/lib/fontisan/formatters/text_formatter.rb +0 -6
  12. data/lib/fontisan/formatters.rb +0 -3
  13. data/lib/fontisan/hints.rb +6 -3
  14. data/lib/fontisan/models.rb +4 -4
  15. data/lib/fontisan/pipeline/strategies.rb +4 -2
  16. data/lib/fontisan/pipeline.rb +2 -1
  17. data/lib/fontisan/tables/cff.rb +2 -1
  18. data/lib/fontisan/tables.rb +2 -1
  19. data/lib/fontisan/version.rb +1 -1
  20. data/lib/fontisan.rb +0 -3
  21. metadata +7 -70
  22. data/lib/fontisan/audit/codepoint_range_coalescer.rb +0 -41
  23. data/lib/fontisan/audit/context.rb +0 -122
  24. data/lib/fontisan/audit/differ.rb +0 -124
  25. data/lib/fontisan/audit/extractors/aggregations.rb +0 -54
  26. data/lib/fontisan/audit/extractors/base.rb +0 -26
  27. data/lib/fontisan/audit/extractors/color_capabilities.rb +0 -141
  28. data/lib/fontisan/audit/extractors/coverage.rb +0 -48
  29. data/lib/fontisan/audit/extractors/hinting.rb +0 -197
  30. data/lib/fontisan/audit/extractors/identity.rb +0 -52
  31. data/lib/fontisan/audit/extractors/language_coverage.rb +0 -37
  32. data/lib/fontisan/audit/extractors/licensing.rb +0 -79
  33. data/lib/fontisan/audit/extractors/metrics.rb +0 -103
  34. data/lib/fontisan/audit/extractors/opentype_layout.rb +0 -69
  35. data/lib/fontisan/audit/extractors/provenance.rb +0 -29
  36. data/lib/fontisan/audit/extractors/style.rb +0 -32
  37. data/lib/fontisan/audit/extractors/variation_detail.rb +0 -99
  38. data/lib/fontisan/audit/extractors.rb +0 -27
  39. data/lib/fontisan/audit/library_aggregator.rb +0 -83
  40. data/lib/fontisan/audit/library_auditor.rb +0 -90
  41. data/lib/fontisan/audit/registry.rb +0 -60
  42. data/lib/fontisan/audit/style_extractor.rb +0 -80
  43. data/lib/fontisan/audit.rb +0 -20
  44. data/lib/fontisan/cli/ucd_cli.rb +0 -97
  45. data/lib/fontisan/commands/audit_command.rb +0 -123
  46. data/lib/fontisan/commands/audit_compare_command.rb +0 -66
  47. data/lib/fontisan/commands/audit_library_command.rb +0 -46
  48. data/lib/fontisan/config/ucd.yml +0 -23
  49. data/lib/fontisan/formatters/audit_diff_text_renderer.rb +0 -122
  50. data/lib/fontisan/formatters/audit_text_renderer.rb +0 -324
  51. data/lib/fontisan/formatters/library_summary_text_renderer.rb +0 -99
  52. data/lib/fontisan/models/audit/audit_axis.rb +0 -30
  53. data/lib/fontisan/models/audit/audit_block.rb +0 -32
  54. data/lib/fontisan/models/audit/audit_diff.rb +0 -77
  55. data/lib/fontisan/models/audit/audit_report.rb +0 -153
  56. data/lib/fontisan/models/audit/codepoint_range.rb +0 -40
  57. data/lib/fontisan/models/audit/codepoint_set_diff.rb +0 -34
  58. data/lib/fontisan/models/audit/color_capabilities.rb +0 -93
  59. data/lib/fontisan/models/audit/duplicate_group.rb +0 -23
  60. data/lib/fontisan/models/audit/embedding_type.rb +0 -76
  61. data/lib/fontisan/models/audit/field_change.rb +0 -28
  62. data/lib/fontisan/models/audit/fs_selection_flags.rb +0 -61
  63. data/lib/fontisan/models/audit/gasp_range.rb +0 -63
  64. data/lib/fontisan/models/audit/hinting.rb +0 -93
  65. data/lib/fontisan/models/audit/library_summary.rb +0 -40
  66. data/lib/fontisan/models/audit/licensing.rb +0 -48
  67. data/lib/fontisan/models/audit/metrics.rb +0 -111
  68. data/lib/fontisan/models/audit/named_instance.rb +0 -41
  69. data/lib/fontisan/models/audit/opentype_layout.rb +0 -40
  70. data/lib/fontisan/models/audit/script_coverage_row.rb +0 -26
  71. data/lib/fontisan/models/audit/script_features.rb +0 -28
  72. data/lib/fontisan/models/audit/variation_detail.rb +0 -44
  73. data/lib/fontisan/models/audit.rb +0 -33
  74. data/lib/fontisan/models/ucd/ucd.rb +0 -38
  75. data/lib/fontisan/models/ucd/ucd_char.rb +0 -67
  76. data/lib/fontisan/models/ucd.rb +0 -19
  77. data/lib/fontisan/ucd/aggregator.rb +0 -73
  78. data/lib/fontisan/ucd/cache_manager.rb +0 -111
  79. data/lib/fontisan/ucd/config.rb +0 -59
  80. data/lib/fontisan/ucd/download_error.rb +0 -9
  81. data/lib/fontisan/ucd/downloader.rb +0 -88
  82. data/lib/fontisan/ucd/error.rb +0 -8
  83. data/lib/fontisan/ucd/index.rb +0 -103
  84. data/lib/fontisan/ucd/index_builder.rb +0 -107
  85. data/lib/fontisan/ucd/range_entry.rb +0 -56
  86. data/lib/fontisan/ucd/unknown_version_error.rb +0 -9
  87. data/lib/fontisan/ucd/version_resolver.rb +0 -79
  88. data/lib/fontisan/ucd.rb +0 -23
@@ -1,40 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Audit
8
- # Aggregate view over a directory (tree) of audited fonts.
9
- #
10
- # Built by {Audit::LibraryAuditor}. Combines a flat list of
11
- # per-face {AuditReport}s with derived cross-face rollups:
12
- # script coverage matrix, duplicate detection (by source_sha256),
13
- # and license distribution. Lets a librarian inventory a font
14
- # collection in one pass.
15
- class LibrarySummary < Lutaml::Model::Serializable
16
- attribute :root_path, :string
17
- attribute :total_files, :integer
18
- attribute :total_faces, :integer
19
- attribute :scanned_extensions, :string, collection: true
20
- attribute :aggregate_metrics, :hash
21
- attribute :script_coverage, ScriptCoverageRow, collection: true
22
- attribute :duplicate_groups, DuplicateGroup, collection: true
23
- attribute :license_distribution, :hash
24
- attribute :per_face_reports, AuditReport, collection: true
25
-
26
- key_value do
27
- map "root_path", to: :root_path
28
- map "total_files", to: :total_files
29
- map "total_faces", to: :total_faces
30
- map "scanned_extensions", to: :scanned_extensions
31
- map "aggregate_metrics", to: :aggregate_metrics
32
- map "script_coverage", to: :script_coverage
33
- map "duplicate_groups", to: :duplicate_groups
34
- map "license_distribution", to: :license_distribution
35
- map "per_face_reports", to: :per_face_reports
36
- end
37
- end
38
- end
39
- end
40
- end
@@ -1,48 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Audit
8
- # Licensing + embedding + vendor provenance fields for a face.
9
- #
10
- # Combines the human-readable legal/identity fields from the name
11
- # table with the machine-readable embedding permissions from OS/2.
12
- # Type 1 fonts have no OS/2 — callers must tolerate a nil
13
- # embedding_type / fs_selection_flags / vendor_id.
14
- class Licensing < Lutaml::Model::Serializable
15
- # Name-table fields (English name IDs)
16
- attribute :copyright, :string
17
- attribute :trademark, :string
18
- attribute :manufacturer, :string
19
- attribute :designer, :string
20
- attribute :description, :string
21
- attribute :vendor_url, :string
22
- attribute :designer_url, :string
23
- attribute :license_description, :string
24
- attribute :license_url, :string
25
-
26
- # OS/2 fields
27
- attribute :vendor_id, :string
28
- attribute :embedding_type, :string
29
- attribute :fs_selection_flags, :string, collection: true
30
-
31
- key_value do
32
- map "copyright", to: :copyright
33
- map "trademark", to: :trademark
34
- map "manufacturer", to: :manufacturer
35
- map "designer", to: :designer
36
- map "description", to: :description
37
- map "vendor_url", to: :vendor_url
38
- map "designer_url", to: :designer_url
39
- map "license_description", to: :license_description
40
- map "license_url", to: :license_url
41
- map "vendor_id", to: :vendor_id
42
- map "embedding_type", to: :embedding_type
43
- map "fs_selection_flags", to: :fs_selection_flags
44
- end
45
- end
46
- end
47
- end
48
- end
@@ -1,111 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Audit
8
- # Layout-critical metrics for a face, consolidated from head, hhea,
9
- # OS/2, and post tables. Designers and engineers can read all
10
- # spacing-relevant numbers in one place instead of cross-referencing
11
- # raw table dumps.
12
- #
13
- # All fields are nil-safe — Type 1 fonts and stripped WOFF builds
14
- # may not carry every table. Derived booleans (e.g. metrics_consistent?)
15
- # tolerate nil inputs and return false rather than raising.
16
- class Metrics < Lutaml::Model::Serializable
17
- # head
18
- attribute :units_per_em, :integer
19
- attribute :bbox_x_min, :integer
20
- attribute :bbox_y_min, :integer
21
- attribute :bbox_x_max, :integer
22
- attribute :bbox_y_max, :integer
23
-
24
- # hhea (horizontal)
25
- attribute :hhea_ascent, :integer
26
- attribute :hhea_descent, :integer
27
- attribute :hhea_line_gap, :integer
28
-
29
- # OS/2 typo
30
- attribute :typo_ascender, :integer
31
- attribute :typo_descender, :integer
32
- attribute :typo_line_gap, :integer
33
-
34
- # OS/2 win
35
- attribute :win_ascent, :integer
36
- attribute :win_descent, :integer
37
-
38
- # OS/2 v2+ (optional)
39
- attribute :x_height, :integer
40
- attribute :cap_height, :integer
41
-
42
- # OS/2 subscript/superscript
43
- attribute :subscript_x_size, :integer
44
- attribute :subscript_y_size, :integer
45
- attribute :subscript_x_offset, :integer
46
- attribute :subscript_y_offset, :integer
47
- attribute :superscript_x_size, :integer
48
- attribute :superscript_y_size, :integer
49
- attribute :superscript_x_offset, :integer
50
- attribute :superscript_y_offset, :integer
51
-
52
- # OS/2 strikeout
53
- attribute :strikeout_size, :integer
54
- attribute :strikeout_position, :integer
55
-
56
- # post underline
57
- attribute :underline_position, :float
58
- attribute :underline_thickness, :float
59
-
60
- key_value do
61
- map "units_per_em", to: :units_per_em
62
- map "bbox_x_min", to: :bbox_x_min
63
- map "bbox_y_min", to: :bbox_y_min
64
- map "bbox_x_max", to: :bbox_x_max
65
- map "bbox_y_max", to: :bbox_y_max
66
-
67
- map "hhea_ascent", to: :hhea_ascent
68
- map "hhea_descent", to: :hhea_descent
69
- map "hhea_line_gap", to: :hhea_line_gap
70
-
71
- map "typo_ascender", to: :typo_ascender
72
- map "typo_descender", to: :typo_descender
73
- map "typo_line_gap", to: :typo_line_gap
74
-
75
- map "win_ascent", to: :win_ascent
76
- map "win_descent", to: :win_descent
77
-
78
- map "x_height", to: :x_height
79
- map "cap_height", to: :cap_height
80
-
81
- map "subscript_x_size", to: :subscript_x_size
82
- map "subscript_y_size", to: :subscript_y_size
83
- map "subscript_x_offset", to: :subscript_x_offset
84
- map "subscript_y_offset", to: :subscript_y_offset
85
- map "superscript_x_size", to: :superscript_x_size
86
- map "superscript_y_size", to: :superscript_y_size
87
- map "superscript_x_offset", to: :superscript_x_offset
88
- map "superscript_y_offset", to: :superscript_y_offset
89
-
90
- map "strikeout_size", to: :strikeout_size
91
- map "strikeout_position", to: :strikeout_position
92
-
93
- map "underline_position", to: :underline_position
94
- map "underline_thickness", to: :underline_thickness
95
- end
96
-
97
- # True when hhea ascent/descent match OS/2 typo ascent/descent.
98
- # Mismatch is a common font bug that causes inconsistent line
99
- # height across platforms.
100
- #
101
- # @return [Boolean]
102
- def metrics_consistent?
103
- return false if hhea_ascent.nil? || typo_ascender.nil?
104
- return false if hhea_descent.nil? || typo_descender.nil?
105
-
106
- hhea_ascent == typo_ascender && hhea_descent == typo_descender
107
- end
108
- end
109
- end
110
- end
111
- end
@@ -1,41 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Audit
8
- # One fvar named instance (e.g. "Bold", "SemiCondensed").
9
- #
10
- # `coordinates` is serialized as a compact "tag=value,tag=value" string
11
- # (e.g. "wght=700,wdth=100") for human readability. The AuditReport is
12
- # primarily a human-facing artifact; downstream tooling that needs
13
- # structured coordinates can re-derive them from fvar.
14
- class NamedInstance < Lutaml::Model::Serializable
15
- attribute :subfamily_name, :string
16
- attribute :postscript_name, :string
17
- attribute :coordinates, :string
18
-
19
- key_value do
20
- map "subfamily_name", to: :subfamily_name
21
- map "postscript_name", to: :postscript_name
22
- map "coordinates", to: :coordinates
23
- end
24
-
25
- # Build the coordinates string from a parallel array of axis tags
26
- # and fvar coordinate values. Returns nil if either side is empty.
27
- #
28
- # @param axis_tags [Array<String>] ordered axis tags (e.g. ["wght", "wdth"])
29
- # @param values [Array<Numeric>] ordered coordinate values
30
- # @return [String, nil]
31
- def self.format_coordinates(axis_tags, values)
32
- return nil if axis_tags.nil? || values.nil?
33
- return nil if axis_tags.empty? || values.empty?
34
-
35
- pairs = axis_tags.zip(values).map { |tag, val| "#{tag}=#{val}" }
36
- pairs.join(",")
37
- end
38
- end
39
- end
40
- end
41
- end
@@ -1,40 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Audit
8
- # Structured OpenType layout summary for one face.
9
- #
10
- # Replaces the previous flat `opentype_scripts` + `features` pair
11
- # on AuditReport for MECE cleanliness. Carries:
12
- #
13
- # - `scripts`: union of GSUB + GPOS script tags (sorted, unique).
14
- # - `features`: union of GSUB + GPOS feature tags across every
15
- # script (sorted, unique).
16
- # - `by_script`: per-script breakdown preserving the
17
- # "feature X is for script Y" relationship that the flat arrays
18
- # discarded.
19
- # - `has_gsub` / `has_gpos`: presence flags so consumers can tell
20
- # "font has no layout" from "font has GSUB but no GPOS".
21
- #
22
- # nil for Type 1 fonts (no SFNT table structure).
23
- class OpenTypeLayout < Lutaml::Model::Serializable
24
- attribute :scripts, :string, collection: true
25
- attribute :features, :string, collection: true
26
- attribute :by_script, ScriptFeatures, collection: true
27
- attribute :has_gsub, Lutaml::Model::Type::Boolean
28
- attribute :has_gpos, Lutaml::Model::Type::Boolean
29
-
30
- key_value do
31
- map "scripts", to: :scripts
32
- map "features", to: :features
33
- map "by_script", to: :by_script
34
- map "has_gsub", to: :has_gsub
35
- map "has_gpos", to: :has_gpos
36
- end
37
- end
38
- end
39
- end
40
- end
@@ -1,26 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Audit
8
- # One row in a LibrarySummary's script-coverage matrix.
9
- #
10
- # Lists every face (by postscript_name) whose cmap covers at least
11
- # one codepoint assigned to a Unicode script. Lets a librarian
12
- # answer "which fonts cover Cyrillic?" without re-auditing.
13
- class ScriptCoverageRow < Lutaml::Model::Serializable
14
- attribute :script, :string
15
- attribute :face_count, :integer
16
- attribute :faces, :string, collection: true
17
-
18
- key_value do
19
- map "script", to: :script
20
- map "face_count", to: :face_count
21
- map "faces", to: :faces
22
- end
23
- end
24
- end
25
- end
26
- end
@@ -1,28 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Audit
8
- # Per-script breakdown of OpenType features.
9
- #
10
- # Pairs a script tag (e.g. "latn", "kana ") with the GSUB features
11
- # and GPOS features that apply to it. The two collections are
12
- # kept separate because substitution and positioning have different
13
- # semantics — consumers answering "does this font support kerning
14
- # for Latin?" want to look at GPOS only.
15
- class ScriptFeatures < Lutaml::Model::Serializable
16
- attribute :script, :string
17
- attribute :gsub_features, :string, collection: true
18
- attribute :gpos_features, :string, collection: true
19
-
20
- key_value do
21
- map "script", to: :script
22
- map "gsub_features", to: :gsub_features
23
- map "gpos_features", to: :gpos_features
24
- end
25
- end
26
- end
27
- end
28
- end
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Audit
8
- # Variable-font detail for one face.
9
- #
10
- # Bundles everything fvar-derived (axes + named instances) with the
11
- # presence flags for every variation side-table (avar/cvar/HVAR/VVAR/
12
- # MVAR/gvar). Replaces the previous flat `axes` + `is_variable` pair
13
- # on AuditReport for MECE cleanliness — a face is variable iff this
14
- # object is non-nil.
15
- #
16
- # `axes` reuses the existing AuditAxis shape; `named_instances` is a
17
- # parallel NamedInstance collection. The has_* booleans are presence
18
- # checks only — they don't validate the table contents.
19
- class VariationDetail < Lutaml::Model::Serializable
20
- attribute :axes, AuditAxis, collection: true
21
- attribute :named_instances, NamedInstance, collection: true
22
-
23
- # Variation side-table presence flags.
24
- attribute :has_avar, Lutaml::Model::Type::Boolean # axis variation
25
- attribute :has_cvar, Lutaml::Model::Type::Boolean # CVT variation
26
- attribute :has_hvar, Lutaml::Model::Type::Boolean # horizontal metrics
27
- attribute :has_vvar, Lutaml::Model::Type::Boolean # vertical metrics
28
- attribute :has_mvar, Lutaml::Model::Type::Boolean # metrics variation
29
- attribute :has_gvar, Lutaml::Model::Type::Boolean # glyph variation (TT)
30
-
31
- key_value do
32
- map "axes", to: :axes
33
- map "named_instances", to: :named_instances
34
- map "has_avar", to: :has_avar
35
- map "has_cvar", to: :has_cvar
36
- map "has_hvar", to: :has_hvar
37
- map "has_vvar", to: :has_vvar
38
- map "has_mvar", to: :has_mvar
39
- map "has_gvar", to: :has_gvar
40
- end
41
- end
42
- end
43
- end
44
- end
@@ -1,33 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Namespace hub for audit-related models.
4
- #
5
- # All Models::Audit::* constants are autoloaded from here.
6
-
7
- module Fontisan
8
- module Models
9
- module Audit
10
- autoload :AuditBlock, "fontisan/models/audit/audit_block"
11
- autoload :AuditAxis, "fontisan/models/audit/audit_axis"
12
- autoload :AuditDiff, "fontisan/models/audit/audit_diff"
13
- autoload :AuditReport, "fontisan/models/audit/audit_report"
14
- autoload :CodepointRange, "fontisan/models/audit/codepoint_range"
15
- autoload :CodepointSetDiff, "fontisan/models/audit/codepoint_set_diff"
16
- autoload :ColorCapabilities, "fontisan/models/audit/color_capabilities"
17
- autoload :DuplicateGroup, "fontisan/models/audit/duplicate_group"
18
- autoload :EmbeddingType, "fontisan/models/audit/embedding_type"
19
- autoload :FieldChange, "fontisan/models/audit/field_change"
20
- autoload :FsSelectionFlags, "fontisan/models/audit/fs_selection_flags"
21
- autoload :GaspRange, "fontisan/models/audit/gasp_range"
22
- autoload :Hinting, "fontisan/models/audit/hinting"
23
- autoload :LibrarySummary, "fontisan/models/audit/library_summary"
24
- autoload :Licensing, "fontisan/models/audit/licensing"
25
- autoload :Metrics, "fontisan/models/audit/metrics"
26
- autoload :NamedInstance, "fontisan/models/audit/named_instance"
27
- autoload :OpenTypeLayout, "fontisan/models/audit/opentype_layout"
28
- autoload :ScriptCoverageRow, "fontisan/models/audit/script_coverage_row"
29
- autoload :ScriptFeatures, "fontisan/models/audit/script_features"
30
- autoload :VariationDetail, "fontisan/models/audit/variation_detail"
31
- end
32
- end
33
- end
@@ -1,38 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Ucd
8
- # Root <ucd> element of the UCDXML flat file.
9
- #
10
- # The flat UCDXML file has the structure:
11
- #
12
- # <ucd>
13
- # <description>...</description>
14
- # <last_revision date="2025-..." version="17.0.0" />
15
- # <char cp="0000" .../>
16
- # <char cp="0001" .../>
17
- # ...
18
- # <char first-cp="3400" last-cp="4DBF" .../>
19
- # ...
20
- # </ucd>
21
- #
22
- # The flat variant merges all per-category UCD files (Blocks.txt,
23
- # Scripts.txt, UnicodeData.txt, etc.) into one stream of <char>
24
- # elements. Roughly 340,000 entries for Unicode 17.0.0.
25
- class Ucd < Lutaml::Model::Serializable
26
- attribute :last_revision, :string
27
- attribute :chars, UcdChar, collection: true
28
-
29
- xml do
30
- element "ucd"
31
-
32
- map_element "last_revision", to: :last_revision
33
- map_element "char", to: :chars
34
- end
35
- end
36
- end
37
- end
38
- end
@@ -1,67 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- module Ucd
8
- # Single <char> element from the UCDXML flat file.
9
- #
10
- # UCDXML uses two forms:
11
- # <char cp="0041" name="..." script="Latin" block="Basic Latin" .../>
12
- # <char first-cp="3400" last-cp="4DBF" name="..." script="Han" .../>
13
- #
14
- # The first form describes one codepoint. The second form describes a
15
- # closed range of codepoints that share the same properties (used for
16
- # CJK ideograph ranges where each codepoint would otherwise need its
17
- # own <char> entry).
18
- #
19
- # Both forms can appear in the same document; `cp` is mutually
20
- # exclusive with `first-cp`/`last-cp`.
21
- class UcdChar < Lutaml::Model::Serializable
22
- attribute :cp, :string
23
- attribute :first_cp, :string
24
- attribute :last_cp, :string
25
- attribute :name, :string
26
- attribute :general_category, :string
27
- attribute :script, :string
28
- attribute :block, :string
29
- attribute :age, :string
30
-
31
- xml do
32
- element "char"
33
-
34
- map_attribute "cp", to: :cp
35
- map_attribute "first-cp", to: :first_cp
36
- map_attribute "last-cp", to: :last_cp
37
- map_attribute "name", to: :name
38
- map_attribute "general-category", to: :general_category
39
- map_attribute "script", to: :script
40
- map_attribute "block", to: :block
41
- map_attribute "age", to: :age
42
- end
43
-
44
- # True if this entry describes a codepoint range rather than a
45
- # single codepoint.
46
- def range?
47
- !first_cp.nil? && !last_cp.nil?
48
- end
49
-
50
- # The codepoints covered by this entry, as Integers.
51
- # For a single-codepoint entry, returns a one-element array.
52
- # For a range entry, returns the inclusive range as an array
53
- # (caller should treat this lazily if the range is huge — CJK
54
- # ranges can have tens of thousands of codepoints).
55
- def codepoints
56
- if range?
57
- (first_cp.to_i(16)..last_cp.to_i(16)).to_a
58
- elsif cp
59
- [cp.to_i(16)]
60
- else
61
- []
62
- end
63
- end
64
- end
65
- end
66
- end
67
- end
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "lutaml/model"
4
-
5
- module Fontisan
6
- module Models
7
- # Namespace for UCDXML deserialization models.
8
- #
9
- # These classes deserialize the upstream UCDXML flat file
10
- # (https://www.unicode.org/Public/<version>/ucdxml/ucd.all.flat.zip)
11
- # into Ruby objects. They are used by Fontisan::Ucd::IndexBuilder to
12
- # derive compact run-length-encoded indices for Unicode block and
13
- # script lookup.
14
- module Ucd
15
- autoload :UcdChar, "fontisan/models/ucd/ucd_char"
16
- autoload :Ucd, "fontisan/models/ucd/ucd"
17
- end
18
- end
19
- end
@@ -1,73 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Fontisan
4
- module Ucd
5
- # Produces audit-ready aggregations from a codepoint list + UCD indices.
6
- #
7
- # Pure: no I/O, no side effects. Caller passes the codepoints and the
8
- # blocks/scripts indices; Aggregator returns the aggregated summaries.
9
- module Aggregator
10
- module_function
11
-
12
- # Aggregate codepoints per Unicode block.
13
- #
14
- # Returns one hash per overlapping block, sorted by first_cp:
15
- #
16
- # { name:, first_cp:, last_cp:, total:, covered:, fill_ratio:, complete: }
17
- #
18
- # @param codepoints [Array<Integer>] sorted not required
19
- # @param blocks_index [Index]
20
- # @return [Array<Hash>]
21
- def aggregate_blocks(codepoints, blocks_index)
22
- sorted = codepoints.sort
23
- return [] if sorted.empty?
24
-
25
- coverage = Hash.new { |h, k| h[k] = 0 }
26
- coverage.compare_by_identity
27
- first_cp = sorted.first
28
- last_cp = sorted.last
29
-
30
- overlapping = blocks_index.each_overlapping(first_cp, last_cp).to_a
31
- overlapping.each do |entry|
32
- coverage[entry] = count_in_range(sorted, [entry.first_cp, entry.last_cp])
33
- end
34
-
35
- overlapping.map do |entry|
36
- covered = coverage[entry]
37
- total = entry.size
38
- {
39
- name: entry.name,
40
- first_cp: entry.first_cp,
41
- last_cp: entry.last_cp,
42
- total: total,
43
- covered: covered,
44
- fill_ratio: covered.fdiv(total).round(4),
45
- complete: covered == total,
46
- }
47
- end
48
- end
49
-
50
- # Aggregate unique script names from codepoints.
51
- #
52
- # @param codepoints [Array<Integer>]
53
- # @param scripts_index [Index]
54
- # @return [Array<String>] sorted unique script names
55
- def aggregate_scripts(codepoints, scripts_index)
56
- scripts = codepoints.filter_map { |cp| scripts_index.lookup(cp) }
57
- scripts.uniq.sort
58
- end
59
-
60
- # Count codepoints in `sorted` that fall within [first, last].
61
- # `sorted` must be sorted ascending.
62
- def count_in_range(sorted, range)
63
- first, last = range
64
- left = sorted.bsearch_index { |cp| cp >= first } || sorted.size
65
- return 0 if left == sorted.size
66
-
67
- right = sorted.bsearch_index { |cp| cp > last } || sorted.size
68
- right - left
69
- end
70
- private_class_method :count_in_range
71
- end
72
- end
73
- end