fileshunter 0.1.0.20130725
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +3 -0
- data/ChangeLog +5 -0
- data/Credits +21 -0
- data/LICENSE +31 -0
- data/README +15 -0
- data/README.md +11 -0
- data/Rakefile +7 -0
- data/ReleaseInfo +8 -0
- data/bin/fileshunt +216 -0
- data/ext/fileshunter/Decoders/_FLAC.c +233 -0
- data/ext/fileshunter/Decoders/extconf.rb +3 -0
- data/lib/fileshunter/BeginPatternDecoder.rb +218 -0
- data/lib/fileshunter/Decoder.rb +66 -0
- data/lib/fileshunter/Decoders/ASF.rb +50 -0
- data/lib/fileshunter/Decoders/BMP.rb +118 -0
- data/lib/fileshunter/Decoders/CAB.rb +140 -0
- data/lib/fileshunter/Decoders/CFBF.rb +92 -0
- data/lib/fileshunter/Decoders/EBML.rb +369 -0
- data/lib/fileshunter/Decoders/EXE.rb +505 -0
- data/lib/fileshunter/Decoders/FLAC.rb +387 -0
- data/lib/fileshunter/Decoders/ICO.rb +71 -0
- data/lib/fileshunter/Decoders/JPEG.rb +247 -0
- data/lib/fileshunter/Decoders/M2V.rb +30 -0
- data/lib/fileshunter/Decoders/MP3.rb +341 -0
- data/lib/fileshunter/Decoders/MP4.rb +620 -0
- data/lib/fileshunter/Decoders/MPG_Video.rb +30 -0
- data/lib/fileshunter/Decoders/OGG.rb +74 -0
- data/lib/fileshunter/Decoders/RIFF.rb +437 -0
- data/lib/fileshunter/Decoders/TIFF.rb +350 -0
- data/lib/fileshunter/Decoders/Text.rb +240 -0
- data/lib/fileshunter/Segment.rb +50 -0
- data/lib/fileshunter/SegmentsAnalyzer.rb +251 -0
- data/lib/fileshunter.rb +15 -0
- metadata +130 -0
| @@ -0,0 +1,74 @@ | |
| 1 | 
            +
            module FilesHunter
         | 
| 2 | 
            +
             | 
| 3 | 
            +
              module Decoders
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                class OGG < BeginPatternDecoder
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                  BEGIN_PATTERN_OGG = "OggS\x00".force_encoding(Encoding::ASCII_8BIT)
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                  # Sorted by the least dominating extension first
         | 
| 10 | 
            +
                  KNOWN_EXTENSIONS = {
         | 
| 11 | 
            +
                    'vorbis'.force_encoding(Encoding::ASCII_8BIT) => :oga,
         | 
| 12 | 
            +
                    'theora'.force_encoding(Encoding::ASCII_8BIT) => :ogv
         | 
| 13 | 
            +
                  }
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def get_begin_pattern
         | 
| 16 | 
            +
                    return BEGIN_PATTERN_OGG, { :offset_inc => 5 }
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def decode(offset)
         | 
| 20 | 
            +
                    ending_offset = nil
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                    cursor = offset
         | 
| 23 | 
            +
                    extensions = [ :ogg, :ogx ] # By default
         | 
| 24 | 
            +
                    nbr_pages = 0
         | 
| 25 | 
            +
                    bitstreams = []
         | 
| 26 | 
            +
                    while (ending_offset == nil)
         | 
| 27 | 
            +
                      #version = @data[cursor+4].ord
         | 
| 28 | 
            +
                      header_type = @data[cursor+5].ord
         | 
| 29 | 
            +
                      invalid_data("@#{cursor} - Invalid header type: #{header_type}") if (header_type > 7)
         | 
| 30 | 
            +
                      #granule_position = @data[cursor+6..cursor+13]
         | 
| 31 | 
            +
                      bitstream_sn = BinData::Uint32le.read(@data[cursor+14..cursor+17])
         | 
| 32 | 
            +
                      page_sequence_idx = BinData::Uint32le.read(@data[cursor+18..cursor+21])
         | 
| 33 | 
            +
                      #checksum = @data[cursor+22..cursor+25]
         | 
| 34 | 
            +
                      # Read the number of segments
         | 
| 35 | 
            +
                      nbr_segments = @data[cursor+26].ord
         | 
| 36 | 
            +
                      # Compute the total size of the payload
         | 
| 37 | 
            +
                      size = 0
         | 
| 38 | 
            +
                      @data[cursor+27..cursor+26+nbr_segments].bytes.each do |segment_size|
         | 
| 39 | 
            +
                        size += segment_size
         | 
| 40 | 
            +
                      end
         | 
| 41 | 
            +
                      log_debug("@#{cursor} - [ Bitstream ##{bitstream_sn} / Page ##{page_sequence_idx} ]: Type #{header_type}, having #{nbr_segments} (total size of #{size})")
         | 
| 42 | 
            +
                      cursor += 27 + nbr_segments
         | 
| 43 | 
            +
                      found_relevant_data(extensions)
         | 
| 44 | 
            +
                      if ((header_type & 0b00000010) != 0)
         | 
| 45 | 
            +
                        # Page of type BOS: Beginning of Stream
         | 
| 46 | 
            +
                        invalid_data("@#{cursor} - Stream #{bitstream_sn} was already marked as begun.") if (bitstreams.include?(bitstream_sn))
         | 
| 47 | 
            +
                        # We can find whether it is a video file or an audio one
         | 
| 48 | 
            +
                        KNOWN_EXTENSIONS.each do |token, extension|
         | 
| 49 | 
            +
                          extensions.unshift(extension) if (@data[cursor..cursor+size-1].index(token) != nil)
         | 
| 50 | 
            +
                          extensions.delete(:oga) if (extensions.include?(:ogv))
         | 
| 51 | 
            +
                          found_relevant_data(extensions)
         | 
| 52 | 
            +
                        end
         | 
| 53 | 
            +
                        bitstreams << bitstream_sn
         | 
| 54 | 
            +
                      elsif ((header_type & 0b00000100) == 0)
         | 
| 55 | 
            +
                        # This is a packet in the middle of a stream
         | 
| 56 | 
            +
                        missing_previous_data if (!bitstreams.include?(bitstream_sn))
         | 
| 57 | 
            +
                        #invalid_data("@#{cursor} - Stream #{bitstream_sn} has not been declared previously.") if (!bitstreams.include?(bitstream_sn))
         | 
| 58 | 
            +
                      end
         | 
| 59 | 
            +
                      cursor += size
         | 
| 60 | 
            +
                      progress(cursor)
         | 
| 61 | 
            +
                      nbr_pages += 1
         | 
| 62 | 
            +
                      # Check if a subsequent page is present
         | 
| 63 | 
            +
                      ending_offset = cursor if ((cursor == @end_offset) or (@data[cursor..cursor+4] != BEGIN_PATTERN_OGG))
         | 
| 64 | 
            +
                    end
         | 
| 65 | 
            +
                    metadata( :nbr_pages => nbr_pages )
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                    return ending_offset
         | 
| 68 | 
            +
                  end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                end
         | 
| 71 | 
            +
             | 
| 72 | 
            +
              end
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            end
         | 
| @@ -0,0 +1,437 @@ | |
| 1 | 
            +
            # encoding: ASCII-8BIT
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module FilesHunter
         | 
| 4 | 
            +
             | 
| 5 | 
            +
              module Decoders
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                # WAV files can contain MP3 files
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                class RIFF < BeginPatternDecoder
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                  # Reference: http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/RIFF.html
         | 
| 12 | 
            +
                  # Reference: http://msdn.microsoft.com/en-us/library/windows/desktop/dd318189%28v=vs.85%29.aspx
         | 
| 13 | 
            +
                  # Reference: http://www.the-labs.com/Video/odmlff2-avidef.pdf
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  BEGIN_PATTERN_RIFF = 'RIFF'
         | 
| 16 | 
            +
                  BEGIN_PATTERN_RIFX = 'RIFX'
         | 
| 17 | 
            +
                  BEGIN_PATTERN_JUNK = 'JUNK'
         | 
| 18 | 
            +
                  BEGIN_PATTERN_FILE = Regexp.new("RIF(F|X)", nil, 'n')
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  # INFO elements
         | 
| 21 | 
            +
                  INFO_ELEMENTS_ID = {
         | 
| 22 | 
            +
                    'AGES' => :Rated,
         | 
| 23 | 
            +
                    'CMNT' => :Comment,
         | 
| 24 | 
            +
                    'CODE' => :EncodedBy,
         | 
| 25 | 
            +
                    'COMM' => :Comments,
         | 
| 26 | 
            +
                    'DIRC' => :Directory,
         | 
| 27 | 
            +
                    'DISP' => :SoundSchemeTitle,
         | 
| 28 | 
            +
                    'DTIM' => :DateTimeOriginal,
         | 
| 29 | 
            +
                    'GENR' => :Genre,
         | 
| 30 | 
            +
                    'IARL' => :ArchivalLocation,
         | 
| 31 | 
            +
                    'IART' => :Artist,
         | 
| 32 | 
            +
                    'IAS1' => :FirstLanguage,
         | 
| 33 | 
            +
                    'IAS2' => :SecondLanguage,
         | 
| 34 | 
            +
                    'IAS3' => :ThirdLanguage,
         | 
| 35 | 
            +
                    'IAS4' => :FourthLanguage,
         | 
| 36 | 
            +
                    'IAS5' => :FifthLanguage,
         | 
| 37 | 
            +
                    'IAS6' => :SixthLanguage,
         | 
| 38 | 
            +
                    'IAS7' => :SeventhLanguage,
         | 
| 39 | 
            +
                    'IAS8' => :EighthLanguage,
         | 
| 40 | 
            +
                    'IAS9' => :NinthLanguage,
         | 
| 41 | 
            +
                    'IBSU' => :BaseURL,
         | 
| 42 | 
            +
                    'ICAS' => :DefaultAudioStream,
         | 
| 43 | 
            +
                    'ICDS' => :CostumeDesigner,
         | 
| 44 | 
            +
                    'ICMS' => :Commissioned,
         | 
| 45 | 
            +
                    'ICMT' => :Comment,
         | 
| 46 | 
            +
                    'ICNM' => :Cinematographer,
         | 
| 47 | 
            +
                    'ICNT' => :Country,
         | 
| 48 | 
            +
                    'ICOP' => :Copyright,
         | 
| 49 | 
            +
                    'ICRD' => :DateCreated,
         | 
| 50 | 
            +
                    'ICRP' => :Cropped,
         | 
| 51 | 
            +
                    'IDIM' => :Dimensions,
         | 
| 52 | 
            +
                    'IDPI' => :DotsPerInch,
         | 
| 53 | 
            +
                    'IDST' => :DistributedBy,
         | 
| 54 | 
            +
                    'IEDT' => :EditedBy,
         | 
| 55 | 
            +
                    'IENC' => :EncodedBy,
         | 
| 56 | 
            +
                    'IENG' => :Engineer,
         | 
| 57 | 
            +
                    'IGNR' => :Genre,
         | 
| 58 | 
            +
                    'IKEY' => :Keywords,
         | 
| 59 | 
            +
                    'ILGT' => :Lightness,
         | 
| 60 | 
            +
                    'ILGU' => :LogoURL,
         | 
| 61 | 
            +
                    'ILIU' => :LogoIconURL,
         | 
| 62 | 
            +
                    'ILNG' => :Language,
         | 
| 63 | 
            +
                    'IMBI' => :MoreInfoBannerImage,
         | 
| 64 | 
            +
                    'IMBU' => :MoreInfoBannerURL,
         | 
| 65 | 
            +
                    'IMED' => :Medium,
         | 
| 66 | 
            +
                    'IMIT' => :MoreInfoText,
         | 
| 67 | 
            +
                    'IMIU' => :MoreInfoURL,
         | 
| 68 | 
            +
                    'IMUS' => :MusicBy,
         | 
| 69 | 
            +
                    'INAM' => :Title,
         | 
| 70 | 
            +
                    'IPDS' => :ProductionDesigner,
         | 
| 71 | 
            +
                    'IPLT' => :NumColors,
         | 
| 72 | 
            +
                    'IPRD' => :Product,
         | 
| 73 | 
            +
                    'IPRO' => :ProducedBy,
         | 
| 74 | 
            +
                    'IRIP' => :RippedBy,
         | 
| 75 | 
            +
                    'IRTD' => :Rating,
         | 
| 76 | 
            +
                    'ISBJ' => :Subject,
         | 
| 77 | 
            +
                    'ISFT' => :Software,
         | 
| 78 | 
            +
                    'ISGN' => :SecondaryGenre,
         | 
| 79 | 
            +
                    'ISHP' => :Sharpness,
         | 
| 80 | 
            +
                    'ISRC' => :Source,
         | 
| 81 | 
            +
                    'ISRF' => :SourceForm,
         | 
| 82 | 
            +
                    'ISTD' => :ProductionStudio,
         | 
| 83 | 
            +
                    'ISTR' => :Starring,
         | 
| 84 | 
            +
                    'ITCH' => :Technician,
         | 
| 85 | 
            +
                    'IWMU' => :WatermarkURL,
         | 
| 86 | 
            +
                    'IWRI' => :WrittenBy,
         | 
| 87 | 
            +
                    'LANG' => :Language,
         | 
| 88 | 
            +
                    'LOCA' => :Location,
         | 
| 89 | 
            +
                    'PRT1' => :Part,
         | 
| 90 | 
            +
                    'PRT2' => :NumberOfParts,
         | 
| 91 | 
            +
                    'RATE' => :Rate,
         | 
| 92 | 
            +
                    'STAR' => :Starring,
         | 
| 93 | 
            +
                    'STAT' => :Statistics,
         | 
| 94 | 
            +
                    'TAPE' => :TapeName,
         | 
| 95 | 
            +
                    'TCDO' => :EndTimecode,
         | 
| 96 | 
            +
                    'TCOD' => :StartTimecode,
         | 
| 97 | 
            +
                    'TITL' => :Title,
         | 
| 98 | 
            +
                    'TLEN' => :Length,
         | 
| 99 | 
            +
                    'TORG' => :Organization,
         | 
| 100 | 
            +
                    'TRCK' => :TrackNumber,
         | 
| 101 | 
            +
                    'TURL' => :URL,
         | 
| 102 | 
            +
                    'TVER' => :Version,
         | 
| 103 | 
            +
                    'VMAJ' => :VegasVersionMajor,
         | 
| 104 | 
            +
                    'VMIN' => :VegasVersionMinor,
         | 
| 105 | 
            +
                    'YEAR' => :Year,
         | 
| 106 | 
            +
                    # Exif tags
         | 
| 107 | 
            +
                    'ecor' => :Make,
         | 
| 108 | 
            +
                    'emdl' => :Model,
         | 
| 109 | 
            +
                    'emnt' => :MakerNotes,
         | 
| 110 | 
            +
                    'erel' => :RelatedImageFile,
         | 
| 111 | 
            +
                    'etim' => :TimeCreated,
         | 
| 112 | 
            +
                    'eucm' => :UserComment,
         | 
| 113 | 
            +
                    'ever' => :ExifVersion
         | 
| 114 | 
            +
                  }
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                  # Wave elements
         | 
| 117 | 
            +
                  ELEMENT_ID_WAVE = 'WAVE'
         | 
| 118 | 
            +
                  ELEMENT_ID_FORMAT = 'fmt '
         | 
| 119 | 
            +
                  ELEMENT_ID_DATA = 'data'
         | 
| 120 | 
            +
                  ELEMENT_ID_FACT = 'fact'
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                  # AVI elements
         | 
| 123 | 
            +
                  ELEMENT_ID_AVI = 'AVI '
         | 
| 124 | 
            +
                  ELEMENT_ID_STRH = 'strh'
         | 
| 125 | 
            +
                  ELEMENT_ID_STRF = 'strf'
         | 
| 126 | 
            +
                  ELEMENT_ID_MOVI = 'movi'
         | 
| 127 | 
            +
                  ELEMENT_ID_IDX1 = 'idx1'
         | 
| 128 | 
            +
                  ELEMENT_ID_DMLH = 'dmlh'
         | 
| 129 | 
            +
                  ELEMENT_ID_IDIT = 'IDIT'
         | 
| 130 | 
            +
                  HDLR_ELEMENTS = {
         | 
| 131 | 
            +
                    ELEMENT_ID_IDIT => nil,
         | 
| 132 | 
            +
                    'ISMP' => nil,
         | 
| 133 | 
            +
                    'avih' => nil
         | 
| 134 | 
            +
                  }
         | 
| 135 | 
            +
                  STREAM_ELEMENTS = {
         | 
| 136 | 
            +
                    'strd' => nil,
         | 
| 137 | 
            +
                    'strf' => nil,
         | 
| 138 | 
            +
                    'strh' => nil,
         | 
| 139 | 
            +
                    'strn' => nil,
         | 
| 140 | 
            +
                    'indx' => nil
         | 
| 141 | 
            +
                  }
         | 
| 142 | 
            +
                  ODML_IDX_ELEMENTS = {}
         | 
| 143 | 
            +
                  100.times do |idx|
         | 
| 144 | 
            +
                    ODML_IDX_ELEMENTS[sprintf('ix%.2d', idx)] = nil
         | 
| 145 | 
            +
                  end
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                  # ANI elements
         | 
| 148 | 
            +
                  ELEMENT_ID_ANI = 'ACON'
         | 
| 149 | 
            +
                  ELEMENT_ID_ANIH = 'anih'
         | 
| 150 | 
            +
                  ELEMENT_ID_ICON = 'icon'
         | 
| 151 | 
            +
                  ELEMENT_ID_SEQ = 'seq '
         | 
| 152 | 
            +
                  ELEMENT_ID_RATE = 'rate'
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                  RIFF_INFO_ELEMENTS = {}
         | 
| 155 | 
            +
                  INFO_ELEMENTS_ID.keys.each do |info_element_id|
         | 
| 156 | 
            +
                    RIFF_INFO_ELEMENTS[info_element_id] = nil
         | 
| 157 | 
            +
                  end
         | 
| 158 | 
            +
                  ELEMENT_ID_LIST = 'LIST'
         | 
| 159 | 
            +
                  RIFF_GENERIC_ELEMENTS = {
         | 
| 160 | 
            +
                    BEGIN_PATTERN_JUNK => nil,
         | 
| 161 | 
            +
                    ELEMENT_ID_LIST => {
         | 
| 162 | 
            +
                      'INFO' => RIFF_INFO_ELEMENTS,
         | 
| 163 | 
            +
                      # AVI elements
         | 
| 164 | 
            +
                      'hdrl' => HDLR_ELEMENTS,
         | 
| 165 | 
            +
                      'strl' => STREAM_ELEMENTS,
         | 
| 166 | 
            +
                      ELEMENT_ID_MOVI => nil,
         | 
| 167 | 
            +
                      'ncdt' => { :element_info => { :ignore_unknown_elements => true } },
         | 
| 168 | 
            +
                      'odml' => {
         | 
| 169 | 
            +
                        ELEMENT_ID_DMLH => nil
         | 
| 170 | 
            +
                      },
         | 
| 171 | 
            +
                      # ANI elements
         | 
| 172 | 
            +
                      'fram' => {
         | 
| 173 | 
            +
                        ELEMENT_ID_ICON => nil
         | 
| 174 | 
            +
                      }
         | 
| 175 | 
            +
                    }.merge(ODML_IDX_ELEMENTS)
         | 
| 176 | 
            +
                  }
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                  RIFF_ROOT_ELEMENTS = {
         | 
| 179 | 
            +
                    # Wave elements
         | 
| 180 | 
            +
                    ELEMENT_ID_WAVE => {
         | 
| 181 | 
            +
                      ELEMENT_ID_FORMAT => nil,
         | 
| 182 | 
            +
                      ELEMENT_ID_DATA => nil,
         | 
| 183 | 
            +
                      ELEMENT_ID_FACT => nil
         | 
| 184 | 
            +
                    },
         | 
| 185 | 
            +
                    # AVI elements
         | 
| 186 | 
            +
                    ELEMENT_ID_AVI => nil,
         | 
| 187 | 
            +
                    ELEMENT_ID_IDX1 => nil,
         | 
| 188 | 
            +
                    # ANI elements
         | 
| 189 | 
            +
                    ELEMENT_ID_ANI => {
         | 
| 190 | 
            +
                      ELEMENT_ID_ANIH => nil,
         | 
| 191 | 
            +
                      ELEMENT_ID_SEQ => nil,
         | 
| 192 | 
            +
                      ELEMENT_ID_RATE => nil
         | 
| 193 | 
            +
                    }
         | 
| 194 | 
            +
                  }
         | 
| 195 | 
            +
                  RIFF_ELEMENTS = {
         | 
| 196 | 
            +
                    BEGIN_PATTERN_RIFF => RIFF_ROOT_ELEMENTS,
         | 
| 197 | 
            +
                    BEGIN_PATTERN_RIFX => RIFF_ROOT_ELEMENTS
         | 
| 198 | 
            +
                  }
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                  RIFF_ELEMENTS_WITH_SIZE = [
         | 
| 201 | 
            +
                    BEGIN_PATTERN_RIFF,
         | 
| 202 | 
            +
                    BEGIN_PATTERN_RIFX,
         | 
| 203 | 
            +
                    BEGIN_PATTERN_JUNK,
         | 
| 204 | 
            +
                    ELEMENT_ID_LIST,
         | 
| 205 | 
            +
                    # WAVE elements
         | 
| 206 | 
            +
                    ELEMENT_ID_FORMAT,
         | 
| 207 | 
            +
                    ELEMENT_ID_DATA,
         | 
| 208 | 
            +
                    ELEMENT_ID_FACT,
         | 
| 209 | 
            +
                    # AVI elements
         | 
| 210 | 
            +
                    ELEMENT_ID_IDX1,
         | 
| 211 | 
            +
                    ELEMENT_ID_DMLH,
         | 
| 212 | 
            +
                    # ANI elements
         | 
| 213 | 
            +
                    ELEMENT_ID_ANIH,
         | 
| 214 | 
            +
                    ELEMENT_ID_ICON,
         | 
| 215 | 
            +
                    ELEMENT_ID_SEQ,
         | 
| 216 | 
            +
                    ELEMENT_ID_RATE
         | 
| 217 | 
            +
                  ] +
         | 
| 218 | 
            +
                    RIFF_INFO_ELEMENTS.keys +
         | 
| 219 | 
            +
                    HDLR_ELEMENTS.keys +
         | 
| 220 | 
            +
                    STREAM_ELEMENTS.keys +
         | 
| 221 | 
            +
                    ODML_IDX_ELEMENTS.keys
         | 
| 222 | 
            +
             | 
| 223 | 
            +
                  AVI_STREAM_TYPES = [ 'db', 'dc', 'pc', 'wb' ]
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                  TRAILING_00_REGEXP = Regexp.new("\x00*$".force_encoding(Encoding::ASCII_8BIT), nil, 'n')
         | 
| 226 | 
            +
             | 
| 227 | 
            +
                  def get_begin_pattern
         | 
| 228 | 
            +
                    return BEGIN_PATTERN_FILE, { :offset_inc => 4, :max_regexp_size => 4 }
         | 
| 229 | 
            +
                  end
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                  def decode(offset)
         | 
| 232 | 
            +
                    ending_offset = nil
         | 
| 233 | 
            +
             | 
| 234 | 
            +
                    # Check endianness
         | 
| 235 | 
            +
                    name = @data[offset..offset+3]
         | 
| 236 | 
            +
                    @bindata_16 = BinData::Uint16le
         | 
| 237 | 
            +
                    @bindata_32 = BinData::Uint32le
         | 
| 238 | 
            +
                    if (name == BEGIN_PATTERN_RIFX)
         | 
| 239 | 
            +
                      @bindata_16 = BinData::Uint16be
         | 
| 240 | 
            +
                      @bindata_32 = BinData::Uint32be
         | 
| 241 | 
            +
                    end
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                    # Parse RIFF
         | 
| 244 | 
            +
                    found_RIFF = false
         | 
| 245 | 
            +
                    found_WAVE_data = false
         | 
| 246 | 
            +
                    found_AVI_data = false
         | 
| 247 | 
            +
                    extension = nil
         | 
| 248 | 
            +
                    cursor, nbr_elements = parse_riff_element(offset, RIFF_ELEMENTS) do |element_hierarchy, element_cursor, size, container_end_offset|
         | 
| 249 | 
            +
                      element_name = element_hierarchy[-1]
         | 
| 250 | 
            +
                      if ((element_name == BEGIN_PATTERN_RIFF) or
         | 
| 251 | 
            +
                          (element_name == BEGIN_PATTERN_RIFX))
         | 
| 252 | 
            +
                        # Check we are not getting on a second RIFF file
         | 
| 253 | 
            +
                        if found_RIFF
         | 
| 254 | 
            +
                          ending_offset = element_cursor - 8
         | 
| 255 | 
            +
                          next nil
         | 
| 256 | 
            +
                        end
         | 
| 257 | 
            +
                        found_RIFF = true
         | 
| 258 | 
            +
                      elsif (INFO_ELEMENTS_ID[element_name] != nil)
         | 
| 259 | 
            +
                        # Standard info
         | 
| 260 | 
            +
                        metadata( INFO_ELEMENTS_ID[element_name] => read_ascii(element_cursor, size) )
         | 
| 261 | 
            +
                      else
         | 
| 262 | 
            +
                        # Special cases
         | 
| 263 | 
            +
                        case element_name
         | 
| 264 | 
            +
             | 
| 265 | 
            +
                        # Wave elements
         | 
| 266 | 
            +
                        when ELEMENT_ID_WAVE
         | 
| 267 | 
            +
                          extension = :wav
         | 
| 268 | 
            +
                          found_relevant_data(extension)
         | 
| 269 | 
            +
                        when ELEMENT_ID_FORMAT
         | 
| 270 | 
            +
                          invalid_data("@#{cursor} - Wave file having an invalid fmt size: #{size}") if (size < 16)
         | 
| 271 | 
            +
                          # Decode header
         | 
| 272 | 
            +
                          audio_format = @bindata_16.read(@data[element_cursor..element_cursor+1])
         | 
| 273 | 
            +
                          num_channels = @bindata_16.read(@data[element_cursor+2..element_cursor+3])
         | 
| 274 | 
            +
                          sample_rate = @bindata_32.read(@data[element_cursor+4..element_cursor+7])
         | 
| 275 | 
            +
                          byte_rate = @bindata_32.read(@data[element_cursor+8..element_cursor+11])
         | 
| 276 | 
            +
                          block_align = @bindata_16.read(@data[element_cursor+12..element_cursor+13])
         | 
| 277 | 
            +
                          bits_per_sample = @bindata_16.read(@data[element_cursor+14..element_cursor+15])
         | 
| 278 | 
            +
                          metadata(
         | 
| 279 | 
            +
                            :audio_format => audio_format,
         | 
| 280 | 
            +
                            :num_channels => num_channels,
         | 
| 281 | 
            +
                            :sample_rate => sample_rate,
         | 
| 282 | 
            +
                            :byte_rate => byte_rate,
         | 
| 283 | 
            +
                            :block_align => block_align,
         | 
| 284 | 
            +
                            :bits_per_sample => bits_per_sample
         | 
| 285 | 
            +
                          )
         | 
| 286 | 
            +
                        when ELEMENT_ID_DATA
         | 
| 287 | 
            +
                          found_WAVE_data = true
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                        # AVI elements
         | 
| 290 | 
            +
                        when ELEMENT_ID_AVI
         | 
| 291 | 
            +
                          extension = :avi
         | 
| 292 | 
            +
                          found_relevant_data(:avi)
         | 
| 293 | 
            +
                        when ELEMENT_ID_MOVI
         | 
| 294 | 
            +
                          # Parse the following RIFF tags manually
         | 
| 295 | 
            +
                          cursor = element_cursor
         | 
| 296 | 
            +
                          stream_id = @data[cursor..cursor+1]
         | 
| 297 | 
            +
                          stream_type = @data[cursor+2..cursor+3]
         | 
| 298 | 
            +
                          while ((cursor < container_end_offset) and
         | 
| 299 | 
            +
                                 (stream_id.match(/^\d\d$/) != nil) and
         | 
| 300 | 
            +
                                 (AVI_STREAM_TYPES.include?(stream_type)))
         | 
| 301 | 
            +
                            # Read size
         | 
| 302 | 
            +
                            stream_size = @bindata_32.read(@data[cursor+4..cursor+7])
         | 
| 303 | 
            +
                            stream_size += 1 if stream_size.odd?
         | 
| 304 | 
            +
                            log_debug "@#{cursor} - Found AVI stream #{stream_id}#{stream_type} of size #{stream_size}"
         | 
| 305 | 
            +
                            cursor += 8 + stream_size
         | 
| 306 | 
            +
                            stream_id = @data[cursor..cursor+1]
         | 
| 307 | 
            +
                            stream_type = @data[cursor+2..cursor+3]
         | 
| 308 | 
            +
                          end
         | 
| 309 | 
            +
                          found_AVI_data = true
         | 
| 310 | 
            +
                          next cursor
         | 
| 311 | 
            +
                        when ELEMENT_ID_IDIT
         | 
| 312 | 
            +
                          metadata( :date_time_original => read_ascii(element_cursor, size) )
         | 
| 313 | 
            +
             | 
| 314 | 
            +
                        # ANI elements
         | 
| 315 | 
            +
                        when ELEMENT_ID_ANI
         | 
| 316 | 
            +
                          extension = :ani
         | 
| 317 | 
            +
                          found_relevant_data(:ani)
         | 
| 318 | 
            +
             | 
| 319 | 
            +
                        end
         | 
| 320 | 
            +
             | 
| 321 | 
            +
                      end
         | 
| 322 | 
            +
             | 
| 323 | 
            +
                      # By default: no data
         | 
| 324 | 
            +
                      next element_cursor
         | 
| 325 | 
            +
                    end
         | 
| 326 | 
            +
                    metadata( :nbr_elements => nbr_elements )
         | 
| 327 | 
            +
                    invalid_data("@#{cursor} - Missing WAVE data.") if ((extension == :wav) and (!found_WAVE_data))
         | 
| 328 | 
            +
                    invalid_data("@#{cursor} - Missing AVI data.") if ((extension == :avi) and (!found_AVI_data))
         | 
| 329 | 
            +
                    ending_offset = cursor if (ending_offset == nil)
         | 
| 330 | 
            +
             | 
| 331 | 
            +
                    return ending_offset
         | 
| 332 | 
            +
                  end
         | 
| 333 | 
            +
             | 
| 334 | 
            +
                  private
         | 
| 335 | 
            +
             | 
| 336 | 
            +
                  # Parse a RIFF element, calling a callback for each sub-element read (recursively)
         | 
| 337 | 
            +
                  #
         | 
| 338 | 
            +
                  # Parameters::
         | 
| 339 | 
            +
                  # * *cursor* (_Fixnum_): Current parsing cursor
         | 
| 340 | 
            +
                  # * *element_names* (<em>map<String,Object></em>): Possible element names, with their possible sub-elements (or nil if none).
         | 
| 341 | 
            +
                  # * *hierarchy* (<em>list<String></em>): The hierarchy of element names leading to this element [default = []]
         | 
| 342 | 
            +
                  # * *max_cursor* (_Fixnum_): Maximal cursor for the element. This is set using the size of the element containing the ones being parsed. Can be nil if unknown. [default = nil]
         | 
| 343 | 
            +
                  # * *&proc* (_Proc_): Code block called for each box encountered.
         | 
| 344 | 
            +
                  #   * Parameters::
         | 
| 345 | 
            +
                  #   * *element_hierarchy* (<em>list<String></em>): Complete element names hierarchy leading to this element
         | 
| 346 | 
            +
                  #   * *element_cursor* (_Fixnum_): Cursor of the beginning of this element data
         | 
| 347 | 
            +
                  #   * *element_size* (_Fixnum_): Size of this element data
         | 
| 348 | 
            +
                  #   * *container_end_offset* (_Fixnum_): End offset of this element's container
         | 
| 349 | 
            +
                  #   * Result::
         | 
| 350 | 
            +
                  #   * _Fixnum_: The cursor ending parsing this element, or nil to stop the parsing
         | 
| 351 | 
            +
                  # Result::
         | 
| 352 | 
            +
                  # * _Fixnum_: The new cursor after having parsed this element, or nil to stop the parsing
         | 
| 353 | 
            +
                  # * _Fixnum_: The number of elements parsed
         | 
| 354 | 
            +
                  def parse_riff_element(cursor, element_names, hierarchy = [], max_cursor = nil, &proc)
         | 
| 355 | 
            +
                    nbr_elements = 0
         | 
| 356 | 
            +
                    nbr_direct_subelements = 0
         | 
| 357 | 
            +
                    container_element_max_cursor = ((max_cursor == nil) ? @end_offset : max_cursor)
         | 
| 358 | 
            +
                    # Compute the map of possible element names
         | 
| 359 | 
            +
                    complete_element_names = element_names.merge(RIFF_GENERIC_ELEMENTS)
         | 
| 360 | 
            +
                    ignore_unknown_elements = ((element_names[:element_info] != nil) and (element_names[:element_info][:ignore_unknown_elements] = true))
         | 
| 361 | 
            +
                    while (cursor < container_element_max_cursor)
         | 
| 362 | 
            +
                      name = @data[cursor..cursor+3]
         | 
| 363 | 
            +
                      # Check the validity of the element
         | 
| 364 | 
            +
                      if ((!ignore_unknown_elements) and
         | 
| 365 | 
            +
                          (!complete_element_names.has_key?(name)))
         | 
| 366 | 
            +
                        log_debug "@#{cursor} - Invalid element name: #{name.inspect} within #{hierarchy.join('/')}. Known ones are: #{complete_element_names.keys.join(', ')}."
         | 
| 367 | 
            +
                        if (max_cursor == nil)
         | 
| 368 | 
            +
                          # We consider the file is finished, as the element being parsed is the root one.
         | 
| 369 | 
            +
                          return cursor, nbr_elements
         | 
| 370 | 
            +
                        else
         | 
| 371 | 
            +
                          truncated_data("@#{cursor} - No valid element found, but container element has not been parsed completely.")
         | 
| 372 | 
            +
                        end
         | 
| 373 | 
            +
                      end
         | 
| 374 | 
            +
                      # If there is a size, read it
         | 
| 375 | 
            +
                      # Consider that if we ignore unknown elements they all HAVE a size
         | 
| 376 | 
            +
                      size = nil
         | 
| 377 | 
            +
                      if (ignore_unknown_elements or
         | 
| 378 | 
            +
                          (RIFF_ELEMENTS_WITH_SIZE.include?(name)))
         | 
| 379 | 
            +
                        size = @bindata_32.read(@data[cursor+4..cursor+7])
         | 
| 380 | 
            +
                        size += 1 if size.odd?
         | 
| 381 | 
            +
                      end
         | 
| 382 | 
            +
                      # This element is valid
         | 
| 383 | 
            +
                      nbr_elements += 1
         | 
| 384 | 
            +
                      nbr_direct_subelements += 1
         | 
| 385 | 
            +
                      element_hierarchy = hierarchy + [name]
         | 
| 386 | 
            +
                      cursor += 4
         | 
| 387 | 
            +
                      cursor += 4 if (size != nil)
         | 
| 388 | 
            +
                      element_cursor = cursor
         | 
| 389 | 
            +
                      log_debug "@#{cursor} - Found element #{element_hierarchy.join('/')} of size #{size} - Data: #{@data[element_cursor..element_cursor+(((size != nil) and (size < 32)) ? size : ((@end_offset-element_cursor < 32) ? @end_offset-element_cursor : 32))-1].inspect}"
         | 
| 390 | 
            +
                      # Parse this element's data
         | 
| 391 | 
            +
                      element_cursor_end = yield(element_hierarchy, element_cursor, size, container_element_max_cursor)
         | 
| 392 | 
            +
                      if (element_cursor_end == nil)
         | 
| 393 | 
            +
                        cursor = nil
         | 
| 394 | 
            +
                        break
         | 
| 395 | 
            +
                      end
         | 
| 396 | 
            +
                      invalid_data("@#{cursor} - Element parsing exceeded its element's size (#{element_cursor_end} > #{element_cursor + size})") if ((size != nil) and (element_cursor_end > element_cursor + size))
         | 
| 397 | 
            +
                      if (max_cursor == nil)
         | 
| 398 | 
            +
                        # For root elements, this error is synonym of truncated data as container_element_max_cursor is set arbitrarily to @end_offset
         | 
| 399 | 
            +
                        truncated_data("@#{cursor} - Element parsing exceeded its container limit (#{element_cursor_end} > #{container_element_max_cursor})", container_element_max_cursor) if (element_cursor_end > container_element_max_cursor)
         | 
| 400 | 
            +
                      else
         | 
| 401 | 
            +
                        invalid_data("@#{cursor} - Element parsing exceeded its container limit (#{element_cursor_end} > #{container_element_max_cursor})") if (element_cursor_end > container_element_max_cursor)
         | 
| 402 | 
            +
                      end
         | 
| 403 | 
            +
                      cursor = element_cursor_end
         | 
| 404 | 
            +
                      if ((complete_element_names[name] != nil) and
         | 
| 405 | 
            +
                          (cursor < container_element_max_cursor))
         | 
| 406 | 
            +
                        # Now call sub-elements that should start at current cursor
         | 
| 407 | 
            +
                        new_cursor, nbr_subelements = parse_riff_element(cursor, complete_element_names[name], element_hierarchy, (size == nil) ? container_element_max_cursor : element_cursor + size, &proc)
         | 
| 408 | 
            +
                        nbr_elements += nbr_subelements
         | 
| 409 | 
            +
                        cursor = new_cursor
         | 
| 410 | 
            +
                        break if (new_cursor == nil)
         | 
| 411 | 
            +
                        # Check cursor is at the correct position
         | 
| 412 | 
            +
                        invalid_data("@#{cursor} - Element parsing should have stopped at #{element_cursor + size} but is instead at #{cursor}") if ((size != nil) and (cursor != element_cursor + size))
         | 
| 413 | 
            +
                      end
         | 
| 414 | 
            +
                      invalid_data("@#{cursor} - Element #{element_hierarchy.join('/')} with size #{size} finishes at cursor #{element_cursor + size}, but container element set maximal cursor to #{container_element_max_cursor}.") if ((size != nil) and (element_cursor + size > container_element_max_cursor))
         | 
| 415 | 
            +
                      cursor = element_cursor + size if (size != nil)
         | 
| 416 | 
            +
                      progress(cursor)
         | 
| 417 | 
            +
                    end
         | 
| 418 | 
            +
             | 
| 419 | 
            +
                    return cursor, nbr_elements
         | 
| 420 | 
            +
                  end
         | 
| 421 | 
            +
             | 
| 422 | 
            +
                  # Read an ASCII value
         | 
| 423 | 
            +
                  #
         | 
| 424 | 
            +
                  # Parameters::
         | 
| 425 | 
            +
                  # * *cursor* (_Fixnum_): The cursor to read from
         | 
| 426 | 
            +
                  # * *size* (_Fixnum_): Size of the string
         | 
| 427 | 
            +
                  # Result::
         | 
| 428 | 
            +
                  # * _String_ or <em>list<String></em>: Resulting string or list of strings if several.
         | 
| 429 | 
            +
                  def read_ascii(cursor, size)
         | 
| 430 | 
            +
                    return @data[cursor..cursor+size-1].gsub(TRAILING_00_REGEXP, '').strip
         | 
| 431 | 
            +
                  end
         | 
| 432 | 
            +
             | 
| 433 | 
            +
                end
         | 
| 434 | 
            +
             | 
| 435 | 
            +
              end
         | 
| 436 | 
            +
             | 
| 437 | 
            +
            end
         |