format_parser 2.7.0 → 2.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f7b8b37e26143e2ea941db88183475e70c90ab658adcdad362b32b457a7d19ac
4
- data.tar.gz: 50758e107065e1a2ab4fbca7775359d928cd1a62942277a94fcbabfefa1bfe10
3
+ metadata.gz: 5d2679a365f7c735d2b8c962765c4783b6336bf23461ffde1705ab141d250591
4
+ data.tar.gz: e1e4b4caa2956cbf1653d39498a9db84589cd0c9c979c6d84e9f3b3027427274
5
5
  SHA512:
6
- metadata.gz: d34bcd7b0162fe6f911bdd8c3b626dd9ce35b98139bca6ec1b54e653bc7e50453af734c74134745a46e91fbc3528d88f1663fabdbd35e06ae908a41f8e81dcd7
7
- data.tar.gz: 2ebbc65f373a3e34a2e8300d40bb239df6d2003dab54b9c461a8cc75f651800d93273aa73bfb4f1a14f195e65f348c218b90b4c1bae860c8269e024ebb2e890c
6
+ metadata.gz: 52e775ae2a4ced22d2879fff48108974bfe4087333b70d74c5c0910229aa7298826664bbd474dd9b4221777637cc51bf969735a830df976f58f0ff7302dd69c5
7
+ data.tar.gz: 31b8f95ff8fbcba01d60fe2377699a9abebc61a28d74afc05aa8456d6660f786ee268c2a621c0dc83490e3f9b04dc5e1a60319ae6f4c54503b3dfab9d39d520e
data/README.md CHANGED
@@ -217,7 +217,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
217
217
  - NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
218
218
 
219
219
  ### OGG
220
- - `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `with_garbage_at_the_end.ogg` have been generated by the project contributors
220
+ - `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `invalid_with_garbage_at_the_end.ogg` have been generated by the project contributors
221
221
 
222
222
  ### PDF
223
223
  - PDF 2.0 files downloaded from the [PDF Association public Github repository](https://github.com/pdf-association/pdf20examples). These files are licensed under the Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.
@@ -236,7 +236,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
236
236
  ### WAV
237
237
  - c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
238
238
  - c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
239
- - c_M1F1-Alaw-AFsp.wav and d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
239
+ - c_M1F1-Alaw-AFsp.wav and invalid_d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
240
240
 
241
241
  ### WEBP
242
242
  - With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
@@ -1,3 +1,3 @@
1
1
  module FormatParser
2
- VERSION = '2.7.0'
2
+ VERSION = '2.7.1'
3
3
  end
data/lib/format_parser.rb CHANGED
@@ -36,6 +36,9 @@ module FormatParser
36
36
  # The value will ensure the parser having it will be applied to the file last.
37
37
  LEAST_PRIORITY = 99
38
38
 
39
+ @registered_natures = []
40
+ @registered_formats = []
41
+
39
42
  # Register a parser object to be used to perform file format detection. Each parser FormatParser
40
43
  # provides out of the box registers itself using this method.
41
44
  #
@@ -68,9 +71,20 @@ module FormatParser
68
71
  end
69
72
  @parser_priorities ||= {}
70
73
  @parser_priorities[callable_parser] = priority
74
+
75
+ @registered_natures |= parser_provided_natures
76
+ @registered_formats |= parser_provided_formats
71
77
  end
72
78
  end
73
79
 
80
+ def self.registered_natures
81
+ @registered_natures
82
+ end
83
+
84
+ def self.registered_formats
85
+ @registered_formats
86
+ end
87
+
74
88
  # Deregister a parser object (makes FormatParser forget this parser existed). Is mostly used in
75
89
  # tests, but can also be used to forcibly disable some formats completely.
76
90
  #
@@ -76,6 +76,11 @@ class FormatParser::MP3Parser
76
76
  io.seek(0)
77
77
  return if TIFF_HEADER_BYTES.include?(safe_read(io, 4))
78
78
 
79
+ # Prevention against parsing WAV files.
80
+ io.seek(0)
81
+ wav_chunk_id, _wav_size, wav_riff_type = safe_read(io, 12).unpack('a4la4')
82
+ return if wav_chunk_id == 'RIFF' || wav_riff_type == 'WAVE'
83
+
79
84
  # Read all the ID3 tags (or at least attempt to)
80
85
  io.seek(0)
81
86
  id3v1 = ID3Extraction.attempt_id3_v1_extraction(io)
@@ -315,5 +320,5 @@ class FormatParser::MP3Parser
315
320
  end
316
321
  end
317
322
 
318
- FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 99
323
+ FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 101
319
324
  end
@@ -34,6 +34,26 @@ describe FormatParser do
34
34
  end
35
35
  end
36
36
 
37
+ it "fixtures with 'invalid' in the filename should fail to parse" do
38
+ Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
39
+ file_name = File.basename(fixture_path)
40
+ next unless file_name.include? "invalid"
41
+ File.open(fixture_path, 'rb') do |file|
42
+ FormatParser.parse(file)
43
+ end
44
+ end
45
+ end
46
+
47
+ it "fixtures without 'invalid' in the filename should be parsed successfully" do
48
+ Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
49
+ file_name = File.basename(fixture_path)
50
+ next if file_name.include? "invalid"
51
+ File.open(fixture_path, 'rb') do |file|
52
+ FormatParser.parse(file)
53
+ end
54
+ end
55
+ end
56
+
37
57
  it 'triggers parsers in a certain order that corresponds to the parser priorities' do
38
58
  file_contents = StringIO.new('a' * 4096)
39
59
 
@@ -189,12 +209,20 @@ describe FormatParser do
189
209
  'FormatParser::CR3Parser',
190
210
  'FormatParser::DPXParser',
191
211
  'FormatParser::FLACParser',
192
- 'FormatParser::MP3Parser',
193
212
  'FormatParser::OggParser',
194
213
  'FormatParser::TIFFParser',
195
- 'FormatParser::WAVParser'
214
+ 'FormatParser::WAVParser',
215
+ 'FormatParser::MP3Parser'
196
216
  ])
197
217
  end
218
+
219
+ it 'ensures that MP3 parser is the last one among all' do
220
+ natures = FormatParser.registered_natures
221
+ formats = FormatParser.registered_formats
222
+ prioritised_parsers = FormatParser.parsers_for(natures, formats)
223
+ parser_class_names = prioritised_parsers.map { |parser| parser.class.name }
224
+ expect(parser_class_names.last).to eq 'FormatParser::MP3Parser'
225
+ end
198
226
  end
199
227
 
200
228
  describe '.register_parser and .deregister_parser' do
@@ -55,7 +55,7 @@ describe FormatParser::FLACParser do
55
55
  end
56
56
 
57
57
  it 'raises an error when sample rate is 0' do
58
- fpath = fixtures_dir + 'FLAC/sample_rate_0.flac'
58
+ fpath = fixtures_dir + 'FLAC/invalid_sample_rate_0.flac'
59
59
 
60
60
  expect {
61
61
  subject.call(File.open(fpath, 'rb'))
@@ -99,7 +99,7 @@ describe FormatParser::JSONParser do
99
99
 
100
100
  describe 'When reading objects invalid JSON files' do
101
101
  it "rejects files with corrupted JSON data" do
102
- io = load_file 'malformed.json'
102
+ io = load_file 'invalid_malformed.json'
103
103
 
104
104
  parsed = subject.call(io)
105
105
 
@@ -107,7 +107,7 @@ describe FormatParser::JSONParser do
107
107
  end
108
108
 
109
109
  it "rejects invalid files early without reading the whole content" do
110
- io = load_file 'lorem_ipsum.json'
110
+ io = load_file 'invalid_lorem_ipsum.json'
111
111
 
112
112
  parsed = subject.call(io)
113
113
 
@@ -11,7 +11,7 @@ describe FormatParser::M3UParser do
11
11
  end
12
12
 
13
13
  describe 'an m3u file with missing header' do
14
- let(:m3u_file) { 'plain_text.m3u' }
14
+ let(:m3u_file) { 'invalid_plain_text.m3u' }
15
15
 
16
16
  it 'does not parse the file successfully' do
17
17
  expect(parsed_m3u).to be_nil
@@ -36,6 +36,12 @@ describe FormatParser::MP3Parser do
36
36
  expect(parsed).to be_nil
37
37
  end
38
38
 
39
+ it 'does not misdetect a WAV' do
40
+ fpath = fixtures_dir + '/WAV/c_SCAM_MIC_SOL001_RUN001.wav'
41
+ parsed = subject.call(File.open(fpath, 'rb'))
42
+ expect(parsed).to be_nil
43
+ end
44
+
39
45
  describe 'title/artist/album attributes' do
40
46
  let(:parsed) { subject.call(File.open(fpath, 'rb')) }
41
47
 
@@ -13,7 +13,7 @@ describe FormatParser::OggParser do
13
13
  end
14
14
 
15
15
  it 'skips a file if it contains more than MAX_POSSIBLE_OGG_PAGE_SIZE bytes of garbage at the end' do
16
- parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/with_garbage_at_the_end.ogg', 'rb'))
16
+ parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/invalid_with_garbage_at_the_end.ogg', 'rb'))
17
17
  expect(parse_result).to be_nil
18
18
  end
19
19
 
@@ -46,17 +46,17 @@ describe FormatParser::PDFParser do
46
46
 
47
47
  describe 'broken PDF files should not parse' do
48
48
  it 'PDF with missing version header' do
49
- parsed_pdf = parse_pdf 'not_a.pdf'
49
+ parsed_pdf = parse_pdf 'invalid_not_a.pdf'
50
50
  expect(parsed_pdf).to be_nil
51
51
  end
52
52
 
53
53
  it 'PDF 2.0 with offset start' do
54
- parsed_pdf = parse_pdf 'PDF 2.0 with offset start.pdf'
54
+ parsed_pdf = parse_pdf 'invalid PDF 2.0 with offset start.pdf'
55
55
  expect(parsed_pdf).to be_nil
56
56
  end
57
57
 
58
58
  it 'exceeds the PDF read limit' do
59
- parsed_pdf = parse_pdf 'exceed_PDF_read_limit.pdf'
59
+ parsed_pdf = parse_pdf 'invalid_exceed_PDF_read_limit.pdf'
60
60
  expect(parsed_pdf).to be_nil
61
61
  end
62
62
  end
@@ -48,7 +48,7 @@ describe FormatParser::WAVParser do
48
48
 
49
49
  it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
50
50
  expect {
51
- subject.call(File.open(__dir__ + '/../fixtures/WAV/d_6_Channel_ID.wav', 'rb'))
51
+ subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb'))
52
52
  }.to raise_error(FormatParser::IOUtils::InvalidRead)
53
53
  end
54
54
  end
@@ -7,7 +7,7 @@ describe FormatParser::WebpParser do
7
7
  end
8
8
 
9
9
  it 'does not parse files with an unrecognised variant' do
10
- result = subject.call(File.open(fixtures_dir + 'WEBP/unrecognised-variant.webp', 'rb'))
10
+ result = subject.call(File.open(fixtures_dir + 'WEBP/invalid-unrecognised-variant.webp', 'rb'))
11
11
  expect(result).to be_nil
12
12
  end
13
13
 
@@ -104,6 +104,43 @@ describe 'Fetching data from HTTP remotes' do
104
104
  expect(file_information.format).to eq(:png)
105
105
  end
106
106
 
107
+ describe 'correctly parses WAV files without falling back to another filetype' do
108
+ ['c_8kmp316.wav', 'c_SCAM_MIC_SOL001_RUN001.wav'].each do |filename|
109
+ it "parses WAV file #{filename}" do
110
+ remote_url = 'http://localhost:9399/WAV/' + filename
111
+ file_information = FormatParser.parse_http(remote_url)
112
+ expect(file_information).not_to be_nil
113
+ expect(file_information.format).to eq(:wav)
114
+ end
115
+ end
116
+ end
117
+
118
+ describe "correctly parses files over HTTP without filename hint" do
119
+ Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
120
+ file_name = File.basename(fixture_path)
121
+ next if file_name.include? "invalid"
122
+
123
+ file_type_dir = fixture_path.delete_prefix(fixtures_dir).delete_suffix(file_name)
124
+ file_type_dir.delete_prefix!('/').delete_suffix!('/')
125
+ next if file_type_dir.empty?
126
+
127
+ # skipping this one because it's a special case
128
+ next if file_name == "arch_many_entries.zip"
129
+
130
+ it "parses #{file_type_dir} file: #{file_name}" do
131
+ url = "http://localhost:9399/#{file_type_dir}/#{file_name}?some_param=test".gsub(' ', '%20')
132
+ result_with_hint = FormatParser.parse_http(url, filename_hint: file_name)
133
+ result_no_hint = FormatParser.parse_http(url)
134
+
135
+ expect(result_with_hint).not_to be_nil
136
+ expect(result_no_hint).not_to be_nil
137
+
138
+ expect(result_no_hint.nature).to eq(result_with_hint.nature)
139
+ expect(result_no_hint.format).to eq(result_with_hint.format)
140
+ end
141
+ end
142
+ end
143
+
107
144
  describe 'when parsing remote fixtures' do
108
145
  Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
109
146
  filename = File.basename(fixture_path)
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: format_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.7.0
4
+ version: 2.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Noah Berman
8
8
  - Julik Tarkhanov
9
- autorequire:
9
+ autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2023-06-27 00:00:00.000000000 Z
12
+ date: 2023-08-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: exifr
@@ -319,7 +319,7 @@ licenses:
319
319
  - MIT (Hippocratic)
320
320
  metadata:
321
321
  allowed_push_host: https://rubygems.org
322
- post_install_message:
322
+ post_install_message:
323
323
  rdoc_options: []
324
324
  require_paths:
325
325
  - lib
@@ -334,8 +334,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
334
334
  - !ruby/object:Gem::Version
335
335
  version: '0'
336
336
  requirements: []
337
- rubygems_version: 3.3.7
338
- signing_key:
337
+ rubygems_version: 3.1.6
338
+ signing_key:
339
339
  specification_version: 4
340
340
  summary: A library for efficient parsing of file metadata
341
341
  test_files: []