wahwah 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +21 -0
  3. data/lib/wahwah.rb +74 -3
  4. data/lib/wahwah/asf/object.rb +39 -0
  5. data/lib/wahwah/asf_tag.rb +220 -0
  6. data/lib/wahwah/errors.rb +6 -0
  7. data/lib/wahwah/flac/block.rb +57 -0
  8. data/lib/wahwah/flac/streaminfo_block.rb +51 -0
  9. data/lib/wahwah/flac_tag.rb +84 -0
  10. data/lib/wahwah/helper.rb +37 -0
  11. data/lib/wahwah/id3/comment_frame_body.rb +21 -0
  12. data/lib/wahwah/id3/frame.rb +180 -0
  13. data/lib/wahwah/id3/frame_body.rb +36 -0
  14. data/lib/wahwah/id3/genre_frame_body.rb +15 -0
  15. data/lib/wahwah/id3/image_frame_body.rb +60 -0
  16. data/lib/wahwah/id3/text_frame_body.rb +16 -0
  17. data/lib/wahwah/id3/v1.rb +96 -0
  18. data/lib/wahwah/id3/v2.rb +60 -0
  19. data/lib/wahwah/id3/v2_header.rb +53 -0
  20. data/lib/wahwah/mp3/mpeg_frame_header.rb +141 -0
  21. data/lib/wahwah/mp3/vbri_header.rb +47 -0
  22. data/lib/wahwah/mp3/xing_header.rb +45 -0
  23. data/lib/wahwah/mp3_tag.rb +110 -0
  24. data/lib/wahwah/mp4/atom.rb +105 -0
  25. data/lib/wahwah/mp4_tag.rb +126 -0
  26. data/lib/wahwah/ogg/flac_tag.rb +37 -0
  27. data/lib/wahwah/ogg/opus_tag.rb +33 -0
  28. data/lib/wahwah/ogg/packets.rb +41 -0
  29. data/lib/wahwah/ogg/page.rb +121 -0
  30. data/lib/wahwah/ogg/pages.rb +24 -0
  31. data/lib/wahwah/ogg/vorbis_comment.rb +51 -0
  32. data/lib/wahwah/ogg/vorbis_tag.rb +35 -0
  33. data/lib/wahwah/ogg_tag.rb +66 -0
  34. data/lib/wahwah/riff/chunk.rb +54 -0
  35. data/lib/wahwah/riff_tag.rb +140 -0
  36. data/lib/wahwah/tag.rb +59 -0
  37. data/lib/wahwah/tag_delegate.rb +16 -0
  38. data/lib/wahwah/version.rb +4 -2
  39. metadata +94 -23
  40. data/.gitignore +0 -8
  41. data/.travis.yml +0 -5
  42. data/Gemfile +0 -6
  43. data/Gemfile.lock +0 -22
  44. data/README.md +0 -35
  45. data/Rakefile +0 -10
  46. data/bin/console +0 -14
  47. data/bin/setup +0 -8
  48. data/wahwah.gemspec +0 -27
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WahWah
4
+ module Ogg
5
+ # The Ogg page header has the following format:
6
+ #
7
+ # 0 1 2 3
8
+ # 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1| Byte
9
+ # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
10
+ # | capture_pattern: Magic number for page start "OggS" | 0-3
11
+ # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
12
+ # | version | header_type | granule_position | 4-7
13
+ # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
14
+ # | | 8-11
15
+ # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
16
+ # | | bitstream_serial_number | 12-15
17
+ # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
18
+ # | | page_sequence_number | 16-19
19
+ # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
20
+ # | | CRC_checksum | 20-23
21
+ # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
22
+ # | |page_segments | segment_table | 24-27
23
+ # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
24
+ # | ... | 28-
25
+ # +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
26
+ #
27
+ #
28
+ # The fields in the page header have the following meaning:
29
+ #
30
+ # 1. capture_pattern: a 4 Byte field that signifies the beginning of a
31
+ # page. It contains the magic numbers:
32
+
33
+ # 0x4f 'O'
34
+
35
+ # 0x67 'g'
36
+
37
+ # 0x67 'g'
38
+
39
+ # 0x53 'S'
40
+
41
+ # It helps a decoder to find the page boundaries and regain
42
+ # synchronisation after parsing a corrupted stream. Once the
43
+ # capture pattern is found, the decoder verifies page sync and
44
+ # integrity by computing and comparing the checksum.
45
+
46
+ # 2. stream_structure_version: 1 Byte signifying the version number of
47
+ # the Ogg file format used in this stream (this document specifies
48
+ # version 0).
49
+
50
+ # 3. header_type_flag: the bits in this 1 Byte field identify the
51
+ # specific type of this page.
52
+
53
+ # * bit 0x01
54
+
55
+ # set: page contains data of a packet continued from the previous
56
+ # page
57
+
58
+ # unset: page contains a fresh packet
59
+
60
+ # * bit 0x02
61
+
62
+ # set: this is the first page of a logical bitstream (bos)
63
+
64
+ # unset: this page is not a first page
65
+
66
+ # * bit 0x04
67
+
68
+ # set: this is the last page of a logical bitstream (eos)
69
+
70
+ # unset: this page is not a last page
71
+
72
+ # 4. granule_position: an 8 Byte field containing position information.
73
+ # For example, for an audio stream, it MAY contain the total number
74
+ # of PCM samples encoded after including all frames finished on this
75
+ # page. For a video stream it MAY contain the total number of video
76
+ # frames encoded after this page. This is a hint for the decoder
77
+ # and gives it some timing and position information. Its meaning is
78
+ # dependent on the codec for that logical bitstream and specified in
79
+ # a specific media mapping. A special value of -1 (in two's
80
+ # complement) indicates that no packets finish on this page.
81
+
82
+ # 5. bitstream_serial_number: a 4 Byte field containing the unique
83
+ # serial number by which the logical bitstream is identified.
84
+
85
+ # 6. page_sequence_number: a 4 Byte field containing the sequence
86
+ # number of the page so the decoder can identify page loss. This
87
+ # sequence number is increasing on each logical bitstream
88
+ # separately.
89
+
90
+ # 7. CRC_checksum: a 4 Byte field containing a 32 bit CRC checksum of
91
+ # the page (including header with zero CRC field and page content).
92
+ # The generator polynomial is 0x04c11db7.
93
+
94
+ # 8. number_page_segments: 1 Byte giving the number of segment entries
95
+ # encoded in the segment table.
96
+
97
+ # 9. segment_table: number_page_segments Bytes containing the lacing
98
+ # values of all segments in this page. Each Byte contains one
99
+ # lacing value.
100
+ class Page
101
+ HEADER_SIZE = 27
102
+ HEADER_FORMAT = 'A4CxQx12C'
103
+
104
+ attr_reader :segments, :granule_position
105
+
106
+ def initialize(file_io)
107
+ header_content = file_io.read(HEADER_SIZE)
108
+ @capture_pattern, @version, @granule_position, page_segments = header_content.unpack(HEADER_FORMAT) if header_content.size >= HEADER_SIZE
109
+
110
+ return unless valid?
111
+
112
+ segment_table = file_io.read(page_segments).unpack('C' * page_segments)
113
+ @segments = segment_table.map { |segment_length| file_io.read(segment_length) }
114
+ end
115
+
116
+ def valid?
117
+ @capture_pattern == 'OggS' && @version == 0
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WahWah
4
+ module Ogg
5
+ class Pages
6
+ include Enumerable
7
+
8
+ def initialize(file_io)
9
+ @file_io = file_io
10
+ end
11
+
12
+ def each
13
+ @file_io.rewind
14
+
15
+ until @file_io.eof?
16
+ page = Ogg::Page.new(@file_io)
17
+ break unless page.valid?
18
+
19
+ yield page
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WahWah
4
+ module Ogg
5
+ # Vorbis comment structure:
6
+ #
7
+ # 1) [vendor_length] = read an unsigned integer of 32 bits
8
+ # 2) [vendor_string] = read a UTF-8 vector as [vendor_length] octets
9
+ # 3) [user_comment_list_length] = read an unsigned integer of 32 bits
10
+ # 4) iterate [user_comment_list_length] times {
11
+ # 5) [length] = read an unsigned integer of 32 bits
12
+ # 6) this iteration’s user comment = read a UTF-8 vector as [length] octets
13
+ # }
14
+ # 7) [framing_bit] = read a single bit as boolean
15
+ # 8) if ( [framing_bit] unset or end-of-packet ) then ERROR
16
+ # 9) done.
17
+ module VorbisComment
18
+ COMMET_FIELD_MAPPING = {
19
+ 'TITLE' => :title,
20
+ 'ALBUM' => :album,
21
+ 'ALBUMARTIST' => :albumartist,
22
+ 'TRACKNUMBER' => :track,
23
+ 'ARTIST' => :artist,
24
+ 'DATE' => :year,
25
+ 'GENRE' => :genre,
26
+ 'DISCNUMBER' => :disc,
27
+ 'COMPOSER' => :composer
28
+ }
29
+
30
+ def parse_vorbis_comment(comment_content)
31
+ comment_content = StringIO.new(comment_content)
32
+
33
+ vendor_length = comment_content.read(4).unpack('V').first
34
+ comment_content.seek(vendor_length, IO::SEEK_CUR) # Skip vendor_string
35
+
36
+ comment_list_length = comment_content.read(4).unpack('V').first
37
+
38
+ comment_list_length.times do
39
+ comment_length = comment_content.read(4).unpack('V').first
40
+ comment = Helper.encode_to_utf8(comment_content.read(comment_length))
41
+ field_name, field_value = comment.split('=', 2)
42
+ attr_name = COMMET_FIELD_MAPPING[field_name]
43
+
44
+ field_value = field_value.to_i if %i(track disc).include? attr_name
45
+
46
+ instance_variable_set("@#{attr_name}", field_value) unless attr_name.nil?
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WahWah
4
+ module Ogg
5
+ class VorbisTag
6
+ include VorbisComment
7
+
8
+ attr_reader :bitrate, :sample_rate, *COMMET_FIELD_MAPPING.values
9
+
10
+ def initialize(identification_packet, comment_packet)
11
+ # Identification packet structure:
12
+ #
13
+ # 1) "\x01vorbis"
14
+ # 2) [vorbis_version] = read 32 bits as unsigned integer
15
+ # 3) [audio_channels] = read 8 bit integer as unsigned
16
+ # 4) [audio_sample_rate] = read 32 bits as unsigned integer
17
+ # 5) [bitrate_maximum] = read 32 bits as signed integer
18
+ # 6) [bitrate_nominal] = read 32 bits as signed integer
19
+ # 7) [bitrate_minimum] = read 32 bits as signed integer
20
+ # 8) [blocksize_0] = 2 exponent (read 4 bits as unsigned integer)
21
+ # 9) [blocksize_1] = 2 exponent (read 4 bits as unsigned integer)
22
+ # 10) [framing_flag] = read one bit
23
+ @sample_rate, bitrate = identification_packet[12, 12].unpack('Vx4V')
24
+ @bitrate = bitrate / 1000
25
+
26
+ comment_packet_id, comment_packet_body = [comment_packet[0..6], comment_packet[7..-1]]
27
+
28
+ # Vorbis comment packet start with "\x03vorbis"
29
+ return unless comment_packet_id == "\x03vorbis"
30
+
31
+ parse_vorbis_comment(comment_packet_body)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WahWah
4
+ class OggTag < Tag
5
+ extend TagDelegate
6
+
7
+ tag_delegate :@tag,
8
+ :title,
9
+ :album,
10
+ :albumartist,
11
+ :track,
12
+ :artist,
13
+ :year,
14
+ :genre,
15
+ :disc,
16
+ :composer,
17
+ :sample_rate
18
+
19
+ def duration
20
+ @duration ||= parse_duration
21
+ end
22
+
23
+ def bitrate
24
+ @bitrate ||= parse_bitrate
25
+ end
26
+
27
+ private
28
+ def packets
29
+ @packets ||= Ogg::Packets.new(@file_io)
30
+ end
31
+
32
+ def pages
33
+ @pages ||= Ogg::Pages.new(@file_io)
34
+ end
35
+
36
+ def parse
37
+ identification_packet, comment_packet = packets.first(2)
38
+ return if identification_packet.nil? || comment_packet.nil?
39
+
40
+ @overhead_packets_size = identification_packet.size + comment_packet.size
41
+
42
+ @tag = case true
43
+ when identification_packet.start_with?("\x01vorbis")
44
+ Ogg::VorbisTag.new(identification_packet, comment_packet)
45
+ when identification_packet.start_with?('OpusHead')
46
+ Ogg::OpusTag.new(identification_packet, comment_packet)
47
+ when identification_packet.start_with?("\x7FFLAC")
48
+ Ogg::FlacTag.new(identification_packet, comment_packet)
49
+ end
50
+ end
51
+
52
+ def parse_duration
53
+ return @tag.duration if @tag.respond_to? :duration
54
+
55
+ last_page = pages.to_a.last
56
+ pre_skip = @tag.respond_to?(:pre_skip) ? @tag.pre_skip : 0
57
+
58
+ ((last_page.granule_position - pre_skip) / @tag.sample_rate.to_f).round
59
+ end
60
+
61
+ def parse_bitrate
62
+ return @tag.bitrate if @tag.respond_to? :bitrate
63
+ ((file_size - @overhead_packets_size) * 8.0 / duration / 1000).round
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WahWah
4
+ module Riff
5
+ # RIFF files consist entirely of "chunks".
6
+
7
+ # All chunks have the following format:
8
+
9
+ # 4 bytes: an ASCII identifier for this chunk (examples are "fmt " and "data"; note the space in "fmt ").
10
+ # 4 bytes: an unsigned, little-endian 32-bit integer with the length of this chunk (except this field itself and the chunk identifier).
11
+ # variable-sized field: the chunk data itself, of the size given in the previous field.
12
+ # a pad byte, if the chunk's length is not even.
13
+
14
+ # chunk identifiers, "RIFF" and "LIST", introduce a chunk that can contain subchunks. The RIFF and LIST chunk data (appearing after the identifier and length) have the following format:
15
+
16
+ # 4 bytes: an ASCII identifier for this particular RIFF or LIST chunk (for RIFF in the typical case, these 4 bytes describe the content of the entire file, such as "AVI " or "WAVE").
17
+ # rest of data: subchunks.
18
+ class Chunk
19
+ HEADER_SIZE = 8
20
+ HEADER_FORMAT = 'A4V'
21
+ HEADER_TYPE_SIZE = 4
22
+
23
+ attr_reader :id, :type
24
+
25
+ def initialize(file_io)
26
+ @id, @size = file_io.read(HEADER_SIZE)&.unpack(HEADER_FORMAT)
27
+ return unless valid?
28
+
29
+ @type = file_io.read(HEADER_TYPE_SIZE).unpack('A4').first if have_type?
30
+ @file_io = file_io
31
+ @position = file_io.pos
32
+ end
33
+
34
+ def size
35
+ @size = @size + 1 if @size.odd?
36
+ have_type? ? @size - HEADER_TYPE_SIZE : @size
37
+ end
38
+
39
+ def data
40
+ @file_io.seek(@position)
41
+ @file_io.read(size)
42
+ end
43
+
44
+ def valid?
45
+ !@id.empty? && !@size.nil? && @size > 0
46
+ end
47
+
48
+ private
49
+ def have_type?
50
+ %w(RIFF LIST).include? @id
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WahWah
4
+ class RiffTag < Tag
5
+ extend TagDelegate
6
+
7
+ # see https://exiftool.org/TagNames/RIFF.html#Info for more info
8
+ INFO_ID_MAPPING = {
9
+ INAM: :title,
10
+ TITL: :title,
11
+ IART: :artist,
12
+ IPRD: :album,
13
+ ICMT: :comment,
14
+ ICRD: :year,
15
+ YEAR: :year,
16
+ IGNR: :genre,
17
+ TRCK: :track
18
+ }
19
+
20
+ CHANNEL_MODE_INDEX = %w(Mono Stereo)
21
+
22
+ tag_delegate :@id3_tag,
23
+ :title,
24
+ :artist,
25
+ :album,
26
+ :albumartist,
27
+ :composer,
28
+ :comments,
29
+ :track,
30
+ :track_total,
31
+ :genre,
32
+ :year,
33
+ :disc,
34
+ :disc_total,
35
+ :images
36
+
37
+ def channel_mode
38
+ CHANNEL_MODE_INDEX[@channel - 1]
39
+ end
40
+
41
+ private
42
+ def parse
43
+ top_chunk = Riff::Chunk.new(@file_io)
44
+ return unless top_chunk.valid?
45
+
46
+ total_chunk_size = top_chunk.size + Riff::Chunk::HEADER_SIZE
47
+
48
+ # The top "RIFF" chunks include an additional field in the first four bytes of the data field.
49
+ # This additional field provides the form type of the field.
50
+ # For wav file, the value of the type field is 'WAVE'
51
+ return unless top_chunk.id == 'RIFF' && top_chunk.type == 'WAVE'
52
+
53
+ until total_chunk_size <= @file_io.pos || @file_io.eof? do
54
+ sub_chunk = Riff::Chunk.new(@file_io)
55
+ parse_sub_chunk(sub_chunk)
56
+ end
57
+ end
58
+
59
+ def parse_sub_chunk(sub_chunk)
60
+ return unless sub_chunk.valid?
61
+
62
+ case sub_chunk.id
63
+ when 'fmt'
64
+ parse_fmt_chunk(sub_chunk)
65
+ when 'data'
66
+ parse_data_chunk(sub_chunk)
67
+ when 'LIST'
68
+ parse_list_chunk(sub_chunk)
69
+ when 'id3', 'ID3'
70
+ parse_id3_chunk(sub_chunk)
71
+ else
72
+ @file_io.seek(sub_chunk.size, IO::SEEK_CUR)
73
+ end
74
+ end
75
+
76
+ # The fmt chunk data structure:
77
+ # Length Meaning Description
78
+ #
79
+ # 2(little endian) AudioFormat PCM = 1 (i.e. Linear quantization)
80
+ # Values other than 1 indicate some
81
+ # form of compression.
82
+ #
83
+ # 2(little endian) NumChannels Mono = 1, Stereo = 2, etc.
84
+ #
85
+ # 4(little endian) SampleRate 8000, 44100, etc.
86
+ #
87
+ # 4(little endian) ByteRate == SampleRate * NumChannels * BitsPerSample/8
88
+ #
89
+ # 2(little endian) BlockAlign == NumChannels * BitsPerSample/8
90
+ # The number of bytes for one sample including
91
+ # all channels.
92
+ #
93
+ # 2(little endian) BitsPerSample 8 bits = 8, 16 bits = 16, etc.
94
+ def parse_fmt_chunk(chunk)
95
+ _, @channel, @sample_rate, _, _, @bits_per_sample = chunk.data.unpack('vvVVvv')
96
+ @bitrate = @sample_rate * @channel * @bits_per_sample / 1000
97
+ end
98
+
99
+ def parse_data_chunk(chunk)
100
+ @duration = chunk.size * 8 / (@bitrate * 1000)
101
+ @file_io.seek(chunk.size, IO::SEEK_CUR)
102
+ end
103
+
104
+ def parse_list_chunk(chunk)
105
+ list_chunk_end_position = @file_io.pos + chunk.size
106
+
107
+ # RIFF can be tagged with metadata in the INFO chunk.
108
+ # And INFO chunk as a subchunk for LIST chunk.
109
+ if chunk.type != 'INFO'
110
+ @file_io.seek(chunk.size, IO::SEEK_CUR)
111
+ else
112
+ until list_chunk_end_position <= @file_io.pos do
113
+ info_chunk = Riff::Chunk.new(@file_io)
114
+
115
+ unless INFO_ID_MAPPING.keys.include? info_chunk.id.to_sym
116
+ @file_io.seek(info_chunk.size, IO::SEEK_CUR); next
117
+ end
118
+
119
+ update_attribute(info_chunk)
120
+ end
121
+ end
122
+ end
123
+
124
+ def parse_id3_chunk(chunk)
125
+ @id3_tag = ID3::V2.new(StringIO.new(chunk.data))
126
+ end
127
+
128
+ def update_attribute(chunk)
129
+ attribute_name = INFO_ID_MAPPING[chunk.id.to_sym]
130
+ chunk_data = Helper.encode_to_utf8(chunk.data)
131
+
132
+ case attribute_name
133
+ when :comment
134
+ @comments.push(chunk_data)
135
+ else
136
+ instance_variable_set("@#{attribute_name}", chunk_data)
137
+ end
138
+ end
139
+ end
140
+ end