RubyGems - fontisan - Versions diffs - 0.1.0 - Mend

fontisan 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +13 -0
data/.rubocop_todo.yml +217 -0
data/Gemfile +15 -0
data/LICENSE +24 -0
data/README.adoc +984 -0
data/Rakefile +95 -0
data/exe/fontisan +7 -0
data/fontisan.gemspec +44 -0
data/lib/fontisan/binary/base_record.rb +57 -0
data/lib/fontisan/binary/structures.rb +84 -0
data/lib/fontisan/cli.rb +192 -0
data/lib/fontisan/commands/base_command.rb +82 -0
data/lib/fontisan/commands/dump_table_command.rb +71 -0
data/lib/fontisan/commands/features_command.rb +94 -0
data/lib/fontisan/commands/glyphs_command.rb +50 -0
data/lib/fontisan/commands/info_command.rb +120 -0
data/lib/fontisan/commands/optical_size_command.rb +41 -0
data/lib/fontisan/commands/scripts_command.rb +59 -0
data/lib/fontisan/commands/tables_command.rb +52 -0
data/lib/fontisan/commands/unicode_command.rb +76 -0
data/lib/fontisan/commands/variable_command.rb +61 -0
data/lib/fontisan/config/features.yml +143 -0
data/lib/fontisan/config/scripts.yml +42 -0
data/lib/fontisan/constants.rb +78 -0
data/lib/fontisan/error.rb +15 -0
data/lib/fontisan/font_loader.rb +109 -0
data/lib/fontisan/formatters/text_formatter.rb +314 -0
data/lib/fontisan/models/all_scripts_features_info.rb +21 -0
data/lib/fontisan/models/features_info.rb +42 -0
data/lib/fontisan/models/font_info.rb +99 -0
data/lib/fontisan/models/glyph_info.rb +26 -0
data/lib/fontisan/models/optical_size_info.rb +33 -0
data/lib/fontisan/models/scripts_info.rb +39 -0
data/lib/fontisan/models/table_info.rb +55 -0
data/lib/fontisan/models/unicode_mappings.rb +42 -0
data/lib/fontisan/models/variable_font_info.rb +82 -0
data/lib/fontisan/open_type_collection.rb +97 -0
data/lib/fontisan/open_type_font.rb +292 -0
data/lib/fontisan/parsers/tag.rb +77 -0
data/lib/fontisan/tables/cmap.rb +284 -0
data/lib/fontisan/tables/fvar.rb +157 -0
data/lib/fontisan/tables/gpos.rb +111 -0
data/lib/fontisan/tables/gsub.rb +111 -0
data/lib/fontisan/tables/head.rb +114 -0
data/lib/fontisan/tables/layout_common.rb +73 -0
data/lib/fontisan/tables/name.rb +188 -0
data/lib/fontisan/tables/os2.rb +175 -0
data/lib/fontisan/tables/post.rb +148 -0
data/lib/fontisan/true_type_collection.rb +98 -0
data/lib/fontisan/true_type_font.rb +313 -0
data/lib/fontisan/utilities/checksum_calculator.rb +89 -0
data/lib/fontisan/version.rb +5 -0
data/lib/fontisan.rb +80 -0
metadata +150 -0

data/lib/fontisan/open_type_font.rb ADDED Viewed

@@ -0,0 +1,292 @@
+# frozen_string_literal: true
+require "bindata"
+require_relative "constants"
+require_relative "utilities/checksum_calculator"
+module Fontisan
+  # OpenType Font domain object using BinData
+  #
+  # Represents a complete OpenType Font file (CFF outlines) using BinData's declarative
+  # DSL for binary structure definition. Parallel to TrueTypeFont but for CFF format.
+  #
+  # @example Reading and analyzing a font
+  #   otf = OpenTypeFont.from_file("font.otf")
+  #   puts otf.header.num_tables  # => 12
+  #   name_table = otf.table("name")
+  #   puts name_table.english_name(Tables::Name::FAMILY)
+  #
+  # @example Writing a font
+  #   otf.to_file("output.otf")
+  class OpenTypeFont < BinData::Record
+    endian :big
+    offset_table :header
+    array :tables, type: :table_directory, initial_length: lambda {
+      header.num_tables
+    }
+    # Table data is stored separately since it's at variable offsets
+    attr_accessor :table_data
+    # Parsed table instances cache
+    attr_accessor :parsed_tables
+    # Read OpenType Font from a file
+    #
+    # @param path [String] Path to the OTF file
+    # @return [OpenTypeFont] A new instance
+    # @raise [ArgumentError] if path is nil or empty
+    # @raise [Errno::ENOENT] if file does not exist
+    # @raise [RuntimeError] if file format is invalid
+    def self.from_file(path)
+      if path.nil? || path.to_s.empty?
+        raise ArgumentError,
+              "path cannot be nil or empty"
+      end
+      raise Errno::ENOENT, "File not found: #{path}" unless File.exist?(path)
+      File.open(path, "rb") do |io|
+        font = read(io)
+        font.initialize_storage
+        font.read_table_data(io)
+        font
+      end
+    rescue BinData::ValidityError, EOFError => e
+      raise "Invalid OTF file: #{e.message}"
+    end
+    # Read OpenType Font from collection at specific offset
+    #
+    # @param io [IO] Open file handle
+    # @param offset [Integer] Byte offset to the font
+    # @return [OpenTypeFont] A new instance
+    def self.from_collection(io, offset)
+      io.seek(offset)
+      font = read(io)
+      font.initialize_storage
+      font.read_table_data(io)
+      font
+    end
+    # Initialize storage hashes
+    #
+    # @return [void]
+    def initialize_storage
+      @table_data = {}
+      @parsed_tables = {}
+    end
+    # Read table data for all tables
+    #
+    # @param io [IO] Open file handle
+    # @return [void]
+    def read_table_data(io)
+      @table_data = {}
+      tables.each do |entry|
+        io.seek(entry.offset)
+        # Force UTF-8 encoding on tag for hash key consistency
+        tag_key = entry.tag.dup.force_encoding("UTF-8")
+        @table_data[tag_key] = io.read(entry.table_length)
+      end
+    end
+    # Write OpenType Font to a file
+    #
+    # Writes the complete OTF structure to disk, including proper checksum
+    # calculation and table alignment.
+    #
+    # @param path [String] Path where the OTF file will be written
+    # @return [Integer] Number of bytes written
+    # @raise [IOError] if writing fails
+    def to_file(path)
+      File.open(path, "wb") do |io|
+        # Write header and tables (directory)
+        write_structure(io)
+        # Write table data with updated offsets
+        write_table_data_with_offsets(io)
+        io.pos
+      end
+      # Update checksum adjustment in head table
+      update_checksum_adjustment_in_file(path) if head_table
+      File.size(path)
+    end
+    # Validate format correctness
+    #
+    # @return [Boolean] true if the OTF format is valid, false otherwise
+    def valid?
+      return false unless header
+      return false unless tables.respond_to?(:length)
+      return false unless @table_data.is_a?(Hash)
+      return false if tables.length != header.num_tables
+      return false unless head_table
+      return false unless has_table?(Constants::CFF_TAG)
+      true
+    end
+    # Check if font has a specific table
+    #
+    # @param tag [String] The table tag to check for
+    # @return [Boolean] true if table exists, false otherwise
+    def has_table?(tag)
+      tables.any? { |entry| entry.tag == tag }
+    end
+    # Find a table entry by tag
+    #
+    # @param tag [String] The table tag to find
+    # @return [TableDirectory, nil] The table entry or nil
+    def find_table_entry(tag)
+      tables.find { |entry| entry.tag == tag }
+    end
+    # Get the head table entry
+    #
+    # @return [TableDirectory, nil] The head table entry or nil
+    def head_table
+      find_table_entry(Constants::HEAD_TAG)
+    end
+    # Get list of all table tags
+    #
+    # @return [Array<String>] Array of table tag strings
+    def table_names
+      tables.map(&:tag)
+    end
+    # Get parsed table instance
+    #
+    # This method parses the raw table data into a structured table object
+    # and caches the result for subsequent calls.
+    #
+    # @param tag [String] The table tag to retrieve
+    # @return [Tables::*, nil] Parsed table object or nil if not found
+    def table(tag)
+      @parsed_tables[tag] ||= parse_table(tag)
+    end
+    # Get units per em from head table
+    #
+    # @return [Integer, nil] Units per em value
+    def units_per_em
+      head = table(Constants::HEAD_TAG)
+      head&.units_per_em
+    end
+    private
+    # Parse a table from raw data
+    #
+    # @param tag [String] The table tag to parse
+    # @return [Tables::*, nil] Parsed table object or nil
+    def parse_table(tag)
+      raw_data = @table_data[tag]
+      return nil unless raw_data
+      table_class = table_class_for(tag)
+      return nil unless table_class
+      table_class.read(raw_data)
+    end
+    # Map table tag to parser class
+    #
+    # @param tag [String] The table tag
+    # @return [Class, nil] Table parser class or nil
+    def table_class_for(tag)
+      {
+        Constants::HEAD_TAG => Tables::Head,
+        Constants::NAME_TAG => Tables::Name,
+        Constants::OS2_TAG => Tables::Os2,
+        Constants::POST_TAG => Tables::Post,
+        Constants::CMAP_TAG => Tables::Cmap,
+        Constants::FVAR_TAG => Tables::Fvar,
+        Constants::GSUB_TAG => Tables::Gsub,
+        Constants::GPOS_TAG => Tables::Gpos,
+      }[tag]
+    end
+    # Write the structure (header + table directory) to IO
+    #
+    # @param io [IO] Open file handle
+    # @return [void]
+    def write_structure(io)
+      # Write header
+      header.write(io)
+      # Write table directory with placeholder offsets
+      tables.each do |entry|
+        io.write(entry.tag)
+        io.write([entry.checksum].pack("N"))
+        io.write([0].pack("N")) # Placeholder offset
+        io.write([entry.table_length].pack("N"))
+      end
+    end
+    # Write table data and update offsets in directory
+    #
+    # @param io [IO] Open file handle
+    # @return [void]
+    def write_table_data_with_offsets(io)
+      tables.each_with_index do |entry, index|
+        # Record current position
+        current_position = io.pos
+        # Write table data
+        data = @table_data[entry.tag]
+        raise IOError, "Missing table data for tag '#{entry.tag}'" if data.nil?
+        io.write(data)
+        # Add padding to align to 4-byte boundary
+        padding = (Constants::TABLE_ALIGNMENT - (io.pos % Constants::TABLE_ALIGNMENT)) % Constants::TABLE_ALIGNMENT
+        io.write("\x00" * padding) if padding.positive?
+        # Zero out checksumAdjustment field in head table
+        if entry.tag == Constants::HEAD_TAG
+          current_pos = io.pos
+          io.seek(current_position + 8)
+          io.write([0].pack("N"))
+          io.seek(current_pos)
+        end
+        # Update offset in table directory
+        # Table directory starts at byte 12, each entry is 16 bytes
+        # Offset field is at byte 8 within each entry
+        directory_offset_position = 12 + (index * 16) + 8
+        current_pos = io.pos
+        io.seek(directory_offset_position)
+        io.write([current_position].pack("N"))
+        io.seek(current_pos)
+      end
+    end
+    # Update checksumAdjustment field in head table
+    #
+    # @param path [String] Path to the OTF file
+    # @return [void]
+    def update_checksum_adjustment_in_file(path)
+      # Calculate file checksum
+      checksum = Utilities::ChecksumCalculator.calculate_file_checksum(path)
+      # Calculate adjustment
+      adjustment = Utilities::ChecksumCalculator.calculate_adjustment(checksum)
+      # Find head table position
+      head_entry = head_table
+      return unless head_entry
+      # Write adjustment to head table (offset 8 within head table)
+      File.open(path, "r+b") do |io|
+        io.seek(head_entry.offset + 8)
+        io.write([adjustment].pack("N"))
+      end
+    end
+  end
+end

data/lib/fontisan/parsers/tag.rb ADDED Viewed

@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+module Fontisan
+  module Parsers
+    # Represents an OpenType tag (4-character identifier)
+    #
+    # OpenType tags are four-byte identifiers used to identify tables,
+    # scripts, languages, and features. Tags are case-sensitive and
+    # padded with spaces if shorter than 4 characters.
+    class Tag
+      attr_reader :value
+      # Initialize a new Tag
+      #
+      # @param value [String] Tag value (1-4 characters)
+      # @raise [Fontisan::Error] If value is not a String
+      def initialize(value)
+        @value = normalize_tag(value)
+      end
+      # Convert tag to string
+      #
+      # @return [String] 4-character tag string
+      def to_s
+        @value
+      end
+      # Compare tag with another tag or string
+      #
+      # @param other [Tag, String] Object to compare with
+      # @return [Boolean] True if tags are equal
+      def ==(other)
+        case other
+        when Tag
+          @value == other.value
+        when String
+          @value == normalize_tag(other)
+        else
+          false
+        end
+      end
+      alias eql? ==
+      # Generate hash for use as Hash key
+      #
+      # @return [Integer] Hash value
+      def hash
+        @value.hash
+      end
+      # Check if tag is valid (exactly 4 characters)
+      #
+      # @return [Boolean] True if tag is valid
+      def valid?
+        @value.length == 4
+      end
+      private
+      # Normalize tag to 4 characters
+      #
+      # @param tag [String] Tag to normalize
+      # @return [String] Normalized 4-character tag
+      # @raise [Fontisan::Error] If tag is not a String
+      def normalize_tag(tag)
+        case tag
+        when String
+          tag = tag.slice(0, 4).ljust(4, " ")
+        else
+          raise Error, "Invalid tag: #{tag.inspect}"
+        end
+        tag
+      end
+    end
+  end
+end

data/lib/fontisan/tables/cmap.rb ADDED Viewed

@@ -0,0 +1,284 @@
+# frozen_string_literal: true
+require_relative "../binary/base_record"
+module Fontisan
+  module Tables
+    # Parser for the 'cmap' (Character to Glyph Index Mapping) table
+    #
+    # The cmap table maps character codes to glyph indices. It supports
+    # multiple encoding formats to accommodate different character sets and
+    # Unicode planes.
+    #
+    # This implementation focuses on:
+    # - Format 4: Segment mapping for BMP (Basic Multilingual Plane, U+0000-U+FFFF)
+    # - Format 12: Segmented coverage for full Unicode support
+    #
+    # Reference: OpenType specification, cmap table
+    class Cmap < Binary::BaseRecord
+      # Platform IDs
+      PLATFORM_UNICODE = 0
+      PLATFORM_MACINTOSH = 1
+      PLATFORM_MICROSOFT = 3
+      # Microsoft Encoding IDs
+      ENC_MS_UNICODE_BMP = 1    # Unicode BMP (UCS-2)
+      ENC_MS_UNICODE_UCS4 = 10  # Unicode full repertoire (UCS-4)
+      endian :big
+      uint16 :version
+      uint16 :num_tables
+      rest :remaining_data
+      # Parse encoding records and subtables
+      def unicode_mappings
+        @unicode_mappings ||= parse_mappings
+      end
+      private
+      # Parse all encoding records and extract Unicode mappings
+      def parse_mappings
+        mappings = {}
+        # Get the full binary data
+        data = to_binary_s
+        # Read encoding records
+        records = read_encoding_records(data)
+        # Try to find the best Unicode subtable
+        # Prefer Microsoft Unicode UCS-4 (format 12), then Unicode BMP (format 4)
+        subtable_data = find_best_unicode_subtable(records, data)
+        return mappings unless subtable_data
+        # Parse the subtable based on its format
+        format = subtable_data[0, 2].unpack1("n")
+        case format
+        when 4
+          parse_format_4(subtable_data, mappings)
+        when 12
+          parse_format_12(subtable_data, mappings)
+        end
+        mappings
+      end
+      # Read encoding records from the beginning of the table
+      def read_encoding_records(data)
+        records = []
+        offset = 4 # Skip version and numTables
+        num_tables.times do
+          break if offset + 8 > data.length
+          platform_id = data[offset, 2].unpack1("n")
+          encoding_id = data[offset + 2, 2].unpack1("n")
+          subtable_offset = data[offset + 4, 4].unpack1("N")
+          records << {
+            platform_id: platform_id,
+            encoding_id: encoding_id,
+            offset: subtable_offset,
+          }
+          offset += 8
+        end
+        records
+      end
+      # Find the best Unicode subtable from encoding records
+      def find_best_unicode_subtable(records, data)
+        # Try in priority order: UCS-4, BMP, Unicode
+        find_ucs4_subtable(records, data) ||
+          find_bmp_subtable(records, data) ||
+          find_unicode_subtable(records, data)
+      end
+      # Find Microsoft Unicode UCS-4 subtable (full Unicode)
+      def find_ucs4_subtable(records, data)
+        record = records.find do |r|
+          r[:platform_id] == PLATFORM_MICROSOFT &&
+            r[:encoding_id] == ENC_MS_UNICODE_UCS4
+        end
+        extract_subtable_data(record, data)
+      end
+      # Find Microsoft Unicode BMP subtable
+      def find_bmp_subtable(records, data)
+        record = records.find do |r|
+          r[:platform_id] == PLATFORM_MICROSOFT &&
+            r[:encoding_id] == ENC_MS_UNICODE_BMP
+        end
+        extract_subtable_data(record, data)
+      end
+      # Find Unicode platform subtable (any encoding)
+      def find_unicode_subtable(records, data)
+        record = records.find { |r| r[:platform_id] == PLATFORM_UNICODE }
+        extract_subtable_data(record, data)
+      end
+      # Extract subtable data if record exists and offset is valid
+      def extract_subtable_data(record, data)
+        return nil unless record
+        return nil unless record[:offset] < data.length
+        data[record[:offset]..]
+      end
+      # Parse Format 4 subtable (segment mapping to delta values)
+      # Format 4 is the most common format for BMP Unicode fonts
+      # rubocop:disable Metrics/MethodLength
+      # rubocop:disable Metrics/CyclomaticComplexity
+      # rubocop:disable Metrics/PerceivedComplexity
+      def parse_format_4(data, mappings)
+        return if data.length < 14
+        # Format 4 header
+        format = data[0, 2].unpack1("n")
+        return unless format == 4
+        length = data[2, 2].unpack1("n")
+        return if length > data.length
+        seg_count_x2 = data[6, 2].unpack1("n")
+        seg_count = seg_count_x2 / 2
+        # Arrays start at offset 14
+        offset = 14
+        # Read endCode array
+        end_codes = []
+        seg_count.times do
+          break if offset + 2 > length
+          end_codes << data[offset, 2].unpack1("n")
+          offset += 2
+        end
+        # Skip reservedPad (2 bytes)
+        offset += 2
+        # Read startCode array
+        start_codes = []
+        seg_count.times do
+          break if offset + 2 > length
+          start_codes << data[offset, 2].unpack1("n")
+          offset += 2
+        end
+        # Read idDelta array
+        id_deltas = []
+        seg_count.times do
+          break if offset + 2 > length
+          id_deltas << data[offset, 2].unpack1("n")
+          offset += 2
+        end
+        # Read idRangeOffset array
+        id_range_offsets = []
+        id_range_offset_pos = offset
+        seg_count.times do
+          break if offset + 2 > length
+          id_range_offsets << data[offset, 2].unpack1("n")
+          offset += 2
+        end
+        # Process each segment
+        seg_count.times do |i|
+          start_code = start_codes[i]
+          end_code = end_codes[i]
+          id_delta = id_deltas[i]
+          id_range_offset = id_range_offsets[i]
+          # Skip the final segment (0xFFFF)
+          next if start_code == 0xFFFF
+          if id_range_offset.zero?
+            # Use idDelta directly
+            (start_code..end_code).each do |code|
+              glyph_index = (code + id_delta) & 0xFFFF
+              mappings[code] = glyph_index if glyph_index != 0
+            end
+          else
+            # Use glyphIdArray
+            (start_code..end_code).each do |code|
+              # Calculate position in glyphIdArray
+              array_offset = id_range_offset_pos + (i * 2) + id_range_offset
+              array_offset += (code - start_code) * 2
+              next if array_offset + 2 > length
+              glyph_index = data[array_offset, 2].unpack1("n")
+              next if glyph_index.zero?
+              glyph_index = (glyph_index + id_delta) & 0xFFFF
+              mappings[code] = glyph_index if glyph_index != 0
+            end
+          end
+        end
+      end
+      # rubocop:enable Metrics/MethodLength
+      # rubocop:enable Metrics/CyclomaticComplexity
+      # rubocop:enable Metrics/PerceivedComplexity
+      # Parse Format 12 subtable (segmented coverage)
+      # Format 12 supports full Unicode range
+      def parse_format_12(data, mappings)
+        header = parse_format_12_header(data)
+        return unless header
+        parse_format_12_groups(data, header[:num_groups], header[:length],
+                               mappings)
+      end
+      # Parse Format 12 header
+      def parse_format_12_header(data)
+        return nil if data.length < 16
+        format = data[0, 2].unpack1("n")
+        return nil unless format == 12
+        length = data[4, 4].unpack1("N")
+        return nil if length > data.length
+        num_groups = data[12, 4].unpack1("N")
+        { length: length, num_groups: num_groups }
+      end
+      # Parse Format 12 sequential map groups
+      def parse_format_12_groups(data, num_groups, length, mappings)
+        offset = 16
+        num_groups.times do
+          break if offset + 12 > length
+          start_char_code = data[offset, 4].unpack1("N")
+          end_char_code = data[offset + 4, 4].unpack1("N")
+          start_glyph_id = data[offset + 8, 4].unpack1("N")
+          map_character_range(start_char_code, end_char_code, start_glyph_id,
+                              mappings)
+          offset += 12
+        end
+      end
+      # Map a range of characters to glyphs
+      def map_character_range(start_char, end_char, start_glyph, mappings)
+        (start_char..end_char).each do |code|
+          glyph_index = start_glyph + (code - start_char)
+          mappings[code] = glyph_index if glyph_index != 0
+        end
+      end
+    end
+  end
+end