RubyGems - wahwah - Versions diffs - 0.1.0.pre.test - Mend

wahwah 0.1.0.pre.test

Files changed (39) hide show

checksums.yaml +7 -0
data/LICENSE +21 -0
data/lib/wahwah.rb +76 -0
data/lib/wahwah/asf/object.rb +39 -0
data/lib/wahwah/asf_tag.rb +220 -0
data/lib/wahwah/errors.rb +6 -0
data/lib/wahwah/flac/block.rb +57 -0
data/lib/wahwah/flac/streaminfo_block.rb +51 -0
data/lib/wahwah/flac_tag.rb +84 -0
data/lib/wahwah/helper.rb +37 -0
data/lib/wahwah/id3/comment_frame_body.rb +21 -0
data/lib/wahwah/id3/frame.rb +180 -0
data/lib/wahwah/id3/frame_body.rb +36 -0
data/lib/wahwah/id3/genre_frame_body.rb +15 -0
data/lib/wahwah/id3/image_frame_body.rb +60 -0
data/lib/wahwah/id3/text_frame_body.rb +16 -0
data/lib/wahwah/id3/v1.rb +96 -0
data/lib/wahwah/id3/v2.rb +60 -0
data/lib/wahwah/id3/v2_header.rb +53 -0
data/lib/wahwah/mp3/mpeg_frame_header.rb +141 -0
data/lib/wahwah/mp3/vbri_header.rb +47 -0
data/lib/wahwah/mp3/xing_header.rb +45 -0
data/lib/wahwah/mp3_tag.rb +110 -0
data/lib/wahwah/mp4/atom.rb +105 -0
data/lib/wahwah/mp4_tag.rb +126 -0
data/lib/wahwah/ogg/flac_tag.rb +37 -0
data/lib/wahwah/ogg/opus_tag.rb +33 -0
data/lib/wahwah/ogg/packets.rb +41 -0
data/lib/wahwah/ogg/page.rb +121 -0
data/lib/wahwah/ogg/pages.rb +24 -0
data/lib/wahwah/ogg/vorbis_comment.rb +51 -0
data/lib/wahwah/ogg/vorbis_tag.rb +35 -0
data/lib/wahwah/ogg_tag.rb +66 -0
data/lib/wahwah/riff/chunk.rb +54 -0
data/lib/wahwah/riff_tag.rb +140 -0
data/lib/wahwah/tag.rb +59 -0
data/lib/wahwah/tag_delegate.rb +16 -0
data/lib/wahwah/version.rb +5 -0
metadata +167 -0

@@ -0,0 +1,121 @@
+# frozen_string_literal: true
+module WahWah
+  module Ogg
+    # The Ogg page header has the following format:
+    #
+    #  0                   1                   2                   3
+    #  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1| Byte
+    # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    # | capture_pattern: Magic number for page start "OggS"           | 0-3
+    # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    # | version       | header_type   | granule_position              | 4-7
+    # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    # |                                                               | 8-11
+    # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    # |                               | bitstream_serial_number       | 12-15
+    # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    # |                               | page_sequence_number          | 16-19
+    # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    # |                               | CRC_checksum                  | 20-23
+    # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    # |                               |page_segments  | segment_table | 24-27
+    # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    # | ...                                                           | 28-
+    # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+    #
+    #
+    # The fields in the page header have the following meaning:
+    #
+    # 1. capture_pattern: a 4 Byte field that signifies the beginning of a
+    #    page.  It contains the magic numbers:
+    #          0x4f 'O'
+    #          0x67 'g'
+    #          0x67 'g'
+    #          0x53 'S'
+    #    It helps a decoder to find the page boundaries and regain
+    #    synchronisation after parsing a corrupted stream.  Once the
+    #    capture pattern is found, the decoder verifies page sync and
+    #    integrity by computing and comparing the checksum.
+    # 2. stream_structure_version: 1 Byte signifying the version number of
+    #    the Ogg file format used in this stream (this document specifies
+    #    version 0).
+    # 3. header_type_flag: the bits in this 1 Byte field identify the
+    #    specific type of this page.
+    #    *  bit 0x01
+    #       set: page contains data of a packet continued from the previous
+    #          page
+    #       unset: page contains a fresh packet
+    #    *  bit 0x02
+    #       set: this is the first page of a logical bitstream (bos)
+    #       unset: this page is not a first page
+    #    *  bit 0x04
+    #       set: this is the last page of a logical bitstream (eos)
+    #       unset: this page is not a last page
+    # 4. granule_position: an 8 Byte field containing position information.
+    #    For example, for an audio stream, it MAY contain the total number
+    #    of PCM samples encoded after including all frames finished on this
+    #    page.  For a video stream it MAY contain the total number of video
+    #    frames encoded after this page.  This is a hint for the decoder
+    #    and gives it some timing and position information.  Its meaning is
+    #    dependent on the codec for that logical bitstream and specified in
+    #    a specific media mapping.  A special value of -1 (in two's
+    #    complement) indicates that no packets finish on this page.
+    # 5. bitstream_serial_number: a 4 Byte field containing the unique
+    #    serial number by which the logical bitstream is identified.
+    # 6. page_sequence_number: a 4 Byte field containing the sequence
+    #    number of the page so the decoder can identify page loss.  This
+    #    sequence number is increasing on each logical bitstream
+    #    separately.
+    # 7. CRC_checksum: a 4 Byte field containing a 32 bit CRC checksum of
+    #    the page (including header with zero CRC field and page content).
+    #    The generator polynomial is 0x04c11db7.
+    # 8. number_page_segments: 1 Byte giving the number of segment entries
+    #    encoded in the segment table.
+    # 9. segment_table: number_page_segments Bytes containing the lacing
+    #    values of all segments in this page.  Each Byte contains one
+    #    lacing value.
+    class Page
+      HEADER_SIZE = 27
+      HEADER_FORMAT = 'A4CxQx12C'
+      attr_reader :segments, :granule_position
+      def initialize(file_io)
+        header_content = file_io.read(HEADER_SIZE)
+        @capture_pattern, @version, @granule_position, page_segments = header_content.unpack(HEADER_FORMAT) if header_content.size >= HEADER_SIZE
+        return unless valid?
+        segment_table = file_io.read(page_segments).unpack('C' * page_segments)
+        @segments = segment_table.map { |segment_length| file_io.read(segment_length) }
+      end
+      def valid?
+        @capture_pattern == 'OggS' && @version == 0
+      end
+    end
+  end
+end

data/lib/wahwah/ogg/pages.rb ADDED

@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+module WahWah
+  module Ogg
+    class Pages
+      include Enumerable
+      def initialize(file_io)
+        @file_io = file_io
+      end
+      def each
+        @file_io.rewind
+        until @file_io.eof?
+          page = Ogg::Page.new(@file_io)
+          break unless page.valid?
+          yield page
+        end
+      end
+    end
+  end
+end

data/lib/wahwah/ogg/vorbis_comment.rb ADDED

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+module WahWah
+  module Ogg
+    # Vorbis comment structure:
+    #
+    # 1) [vendor_length] = read an unsigned integer of 32 bits
+    # 2) [vendor_string] = read a UTF-8 vector as [vendor_length] octets
+    # 3) [user_comment_list_length] = read an unsigned integer of 32 bits
+    # 4) iterate [user_comment_list_length] times {
+    #      5) [length] = read an unsigned integer of 32 bits
+    #      6) this iteration’s user comment = read a UTF-8 vector as [length] octets
+    #    }
+    # 7) [framing_bit] = read a single bit as boolean
+    # 8) if ( [framing_bit] unset or end-of-packet ) then ERROR
+    # 9) done.
+    module VorbisComment
+      COMMET_FIELD_MAPPING = {
+        'TITLE' => :title,
+        'ALBUM' => :album,
+        'ALBUMARTIST' => :albumartist,
+        'TRACKNUMBER' => :track,
+        'ARTIST' => :artist,
+        'DATE' => :year,
+        'GENRE' => :genre,
+        'DISCNUMBER' => :disc,
+        'COMPOSER' => :composer
+      }
+      def parse_vorbis_comment(comment_content)
+        comment_content = StringIO.new(comment_content)
+        vendor_length = comment_content.read(4).unpack('V').first
+        comment_content.seek(vendor_length, IO::SEEK_CUR) # Skip vendor_string
+        comment_list_length = comment_content.read(4).unpack('V').first
+        comment_list_length.times do
+          comment_length = comment_content.read(4).unpack('V').first
+          comment = Helper.encode_to_utf8(comment_content.read(comment_length))
+          field_name, field_value = comment.split('=', 2)
+          attr_name = COMMET_FIELD_MAPPING[field_name]
+          field_value = field_value.to_i if %i(track disc).include? attr_name
+          instance_variable_set("@#{attr_name}", field_value) unless attr_name.nil?
+        end
+      end
+    end
+  end
+end

data/lib/wahwah/ogg/vorbis_tag.rb ADDED

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+module WahWah
+  module Ogg
+    class VorbisTag
+      include VorbisComment
+      attr_reader :bitrate, :sample_rate, *COMMET_FIELD_MAPPING.values
+      def initialize(identification_packet, comment_packet)
+        # Identification packet structure:
+        #
+        # 1) "\x01vorbis"
+        # 2) [vorbis_version] = read 32 bits as unsigned integer
+        # 3) [audio_channels] = read 8 bit integer as unsigned
+        # 4) [audio_sample_rate] = read 32 bits as unsigned integer
+        # 5) [bitrate_maximum] = read 32 bits as signed integer
+        # 6) [bitrate_nominal] = read 32 bits as signed integer
+        # 7) [bitrate_minimum] = read 32 bits as signed integer
+        # 8) [blocksize_0] = 2 exponent (read 4 bits as unsigned integer)
+        # 9) [blocksize_1] = 2 exponent (read 4 bits as unsigned integer)
+        # 10) [framing_flag] = read one bit
+        @sample_rate, bitrate = identification_packet[12, 12].unpack('Vx4V')
+        @bitrate = bitrate / 1000
+        comment_packet_id, comment_packet_body = [comment_packet[0..6], comment_packet[7..-1]]
+        # Vorbis comment packet start with "\x03vorbis"
+        return unless comment_packet_id == "\x03vorbis"
+        parse_vorbis_comment(comment_packet_body)
+      end
+    end
+  end
+end

data/lib/wahwah/ogg_tag.rb ADDED

@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+module WahWah
+  class OggTag < Tag
+    extend TagDelegate
+    tag_delegate :@tag,
+      :title,
+      :album,
+      :albumartist,
+      :track,
+      :artist,
+      :year,
+      :genre,
+      :disc,
+      :composer,
+      :sample_rate
+    def duration
+      @duration ||= parse_duration
+    end
+    def bitrate
+      @bitrate ||= parse_bitrate
+    end
+    private
+      def packets
+        @packets ||= Ogg::Packets.new(@file_io)
+      end
+      def pages
+        @pages ||= Ogg::Pages.new(@file_io)
+      end
+      def parse
+        identification_packet, comment_packet = packets.first(2)
+        return if identification_packet.nil? || comment_packet.nil?
+        @overhead_packets_size = identification_packet.size + comment_packet.size
+        @tag = case true
+               when identification_packet.start_with?("\x01vorbis")
+                 Ogg::VorbisTag.new(identification_packet, comment_packet)
+               when identification_packet.start_with?('OpusHead')
+                 Ogg::OpusTag.new(identification_packet, comment_packet)
+               when identification_packet.start_with?("\x7FFLAC")
+                 Ogg::FlacTag.new(identification_packet, comment_packet)
+        end
+      end
+      def parse_duration
+        return @tag.duration if @tag.respond_to? :duration
+        last_page = pages.to_a.last
+        pre_skip = @tag.respond_to?(:pre_skip) ? @tag.pre_skip : 0
+        ((last_page.granule_position - pre_skip) / @tag.sample_rate.to_f).round
+      end
+      def parse_bitrate
+        return @tag.bitrate if @tag.respond_to? :bitrate
+        ((file_size - @overhead_packets_size) * 8.0 / duration / 1000).round
+      end
+  end
+end

data/lib/wahwah/riff/chunk.rb ADDED

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+module WahWah
+  module Riff
+    # RIFF files consist entirely of "chunks".
+    # All chunks have the following format:
+    # 4 bytes: an ASCII identifier for this chunk (examples are "fmt " and "data"; note the space in "fmt ").
+    # 4 bytes: an unsigned, little-endian 32-bit integer with the length of this chunk (except this field itself and the chunk identifier).
+    # variable-sized field: the chunk data itself, of the size given in the previous field.
+    # a pad byte, if the chunk's length is not even.
+    # chunk identifiers, "RIFF" and "LIST", introduce a chunk that can contain subchunks. The RIFF and LIST chunk data (appearing after the identifier and length) have the following format:
+    # 4 bytes: an ASCII identifier for this particular RIFF or LIST chunk (for RIFF in the typical case, these 4 bytes describe the content of the entire file, such as "AVI " or "WAVE").
+    # rest of data: subchunks.
+    class Chunk
+      HEADER_SIZE = 8
+      HEADER_FORMAT = 'A4V'
+      HEADER_TYPE_SIZE = 4
+      attr_reader :id, :type
+      def initialize(file_io)
+        @id, @size = file_io.read(HEADER_SIZE)&.unpack(HEADER_FORMAT)
+        return unless valid?
+        @type = file_io.read(HEADER_TYPE_SIZE).unpack('A4').first if have_type?
+        @file_io = file_io
+        @position = file_io.pos
+      end
+      def size
+        @size = @size + 1 if @size.odd?
+        have_type? ? @size - HEADER_TYPE_SIZE : @size
+      end
+      def data
+        @file_io.seek(@position)
+        @file_io.read(size)
+      end
+      def valid?
+        !@id.empty? && !@size.nil? && @size > 0
+      end
+      private
+        def have_type?
+          %w(RIFF LIST).include? @id
+        end
+    end
+  end
+end

data/lib/wahwah/riff_tag.rb ADDED

@@ -0,0 +1,140 @@
+# frozen_string_literal: true
+module WahWah
+  class RiffTag < Tag
+    extend TagDelegate
+    # see https://exiftool.org/TagNames/RIFF.html#Info for more info
+    INFO_ID_MAPPING = {
+      INAM: :title,
+      TITL: :title,
+      IART: :artist,
+      IPRD: :album,
+      ICMT: :comment,
+      ICRD: :year,
+      YEAR: :year,
+      IGNR: :genre,
+      TRCK: :track
+    }
+    CHANNEL_MODE_INDEX = %w(Mono Stereo)
+    tag_delegate :@id3_tag,
+      :title,
+      :artist,
+      :album,
+      :albumartist,
+      :composer,
+      :comments,
+      :track,
+      :track_total,
+      :genre,
+      :year,
+      :disc,
+      :disc_total,
+      :images
+    def channel_mode
+      CHANNEL_MODE_INDEX[@channel - 1]
+    end
+    private
+      def parse
+        top_chunk = Riff::Chunk.new(@file_io)
+        return unless top_chunk.valid?
+        total_chunk_size = top_chunk.size + Riff::Chunk::HEADER_SIZE
+        # The top "RIFF" chunks include an additional field in the first four bytes of the data field.
+        # This additional field provides the form type of the field.
+        # For wav file, the value of the type field is 'WAVE'
+        return unless top_chunk.id == 'RIFF' && top_chunk.type == 'WAVE'
+        until total_chunk_size <= @file_io.pos || @file_io.eof? do
+          sub_chunk = Riff::Chunk.new(@file_io)
+          parse_sub_chunk(sub_chunk)
+        end
+      end
+      def parse_sub_chunk(sub_chunk)
+        return unless sub_chunk.valid?
+        case sub_chunk.id
+        when 'fmt'
+          parse_fmt_chunk(sub_chunk)
+        when 'data'
+          parse_data_chunk(sub_chunk)
+        when 'LIST'
+          parse_list_chunk(sub_chunk)
+        when 'id3', 'ID3'
+          parse_id3_chunk(sub_chunk)
+        else
+          @file_io.seek(sub_chunk.size, IO::SEEK_CUR)
+        end
+      end
+      # The fmt chunk data structure:
+      # Length             Meaning       Description
+      #
+      # 2(little endian)   AudioFormat   PCM = 1 (i.e. Linear quantization)
+      #                                  Values other than 1 indicate some
+      #                                  form of compression.
+      #
+      # 2(little endian)   NumChannels   Mono = 1, Stereo = 2, etc.
+      #
+      # 4(little endian)   SampleRate    8000, 44100, etc.
+      #
+      # 4(little endian)   ByteRate      == SampleRate * NumChannels * BitsPerSample/8
+      #
+      # 2(little endian)   BlockAlign    == NumChannels * BitsPerSample/8
+      #                                  The number of bytes for one sample including
+      #                                  all channels.
+      #
+      # 2(little endian)   BitsPerSample 8 bits = 8, 16 bits = 16, etc.
+      def parse_fmt_chunk(chunk)
+        _, @channel, @sample_rate, _, _, @bits_per_sample = chunk.data.unpack('vvVVvv')
+        @bitrate = @sample_rate * @channel * @bits_per_sample / 1000
+      end
+      def parse_data_chunk(chunk)
+        @duration = chunk.size * 8 / (@bitrate * 1000)
+        @file_io.seek(chunk.size, IO::SEEK_CUR)
+      end
+      def parse_list_chunk(chunk)
+        list_chunk_end_position = @file_io.pos + chunk.size
+        # RIFF can be tagged with metadata in the INFO chunk.
+        # And INFO chunk as a subchunk for LIST chunk.
+        if chunk.type != 'INFO'
+          @file_io.seek(chunk.size, IO::SEEK_CUR)
+        else
+          until list_chunk_end_position <= @file_io.pos do
+            info_chunk = Riff::Chunk.new(@file_io)
+            unless INFO_ID_MAPPING.keys.include? info_chunk.id.to_sym
+              @file_io.seek(info_chunk.size, IO::SEEK_CUR); next
+            end
+            update_attribute(info_chunk)
+          end
+        end
+      end
+      def parse_id3_chunk(chunk)
+        @id3_tag = ID3::V2.new(StringIO.new(chunk.data))
+      end
+      def update_attribute(chunk)
+        attribute_name = INFO_ID_MAPPING[chunk.id.to_sym]
+        chunk_data = Helper.encode_to_utf8(chunk.data)
+        case attribute_name
+        when :comment
+          @comments.push(chunk_data)
+        else
+          instance_variable_set("@#{attribute_name}", chunk_data)
+        end
+      end
+  end
+end