format_parser 2.6.0 → 2.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/lib/format_parser/version.rb +1 -1
- data/lib/format_parser.rb +15 -0
- data/lib/parsers/json_parser/validator.rb +319 -0
- data/lib/parsers/json_parser.rb +25 -0
- data/lib/parsers/mp3_parser.rb +6 -1
- data/lib/utf8_reader.rb +68 -0
- data/spec/format_parser_spec.rb +30 -2
- data/spec/parsers/flac_parser_spec.rb +1 -1
- data/spec/parsers/json_parser/validator_spec.rb +321 -0
- data/spec/parsers/json_parser_spec.rb +118 -0
- data/spec/parsers/m3u_parser_spec.rb +1 -1
- data/spec/parsers/mp3_parser_spec.rb +6 -0
- data/spec/parsers/ogg_parser_spec.rb +1 -1
- data/spec/parsers/pdf_parser_spec.rb +3 -3
- data/spec/parsers/wav_parser_spec.rb +1 -1
- data/spec/parsers/webp_parser_spec.rb +1 -1
- data/spec/remote_fetching_spec.rb +37 -0
- metadata +11 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d2679a365f7c735d2b8c962765c4783b6336bf23461ffde1705ab141d250591
|
4
|
+
data.tar.gz: e1e4b4caa2956cbf1653d39498a9db84589cd0c9c979c6d84e9f3b3027427274
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52e775ae2a4ced22d2879fff48108974bfe4087333b70d74c5c0910229aa7298826664bbd474dd9b4221777637cc51bf969735a830df976f58f0ff7302dd69c5
|
7
|
+
data.tar.gz: 31b8f95ff8fbcba01d60fe2377699a9abebc61a28d74afc05aa8456d6660f786ee268c2a621c0dc83490e3f9b04dc5e1a60319ae6f4c54503b3dfab9d39d520e
|
data/README.md
CHANGED
@@ -26,6 +26,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
26
26
|
* HEIC
|
27
27
|
* HEIF
|
28
28
|
* JPEG
|
29
|
+
* JSON
|
29
30
|
* M3U
|
30
31
|
* M4A
|
31
32
|
* M4B
|
@@ -216,7 +217,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
216
217
|
- NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
217
218
|
|
218
219
|
### OGG
|
219
|
-
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `
|
220
|
+
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `invalid_with_garbage_at_the_end.ogg` have been generated by the project contributors
|
220
221
|
|
221
222
|
### PDF
|
222
223
|
- PDF 2.0 files downloaded from the [PDF Association public Github repository](https://github.com/pdf-association/pdf20examples). These files are licensed under the Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.
|
@@ -235,7 +236,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
235
236
|
### WAV
|
236
237
|
- c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
|
237
238
|
- c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
|
238
|
-
- c_M1F1-Alaw-AFsp.wav and
|
239
|
+
- c_M1F1-Alaw-AFsp.wav and invalid_d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
|
239
240
|
|
240
241
|
### WEBP
|
241
242
|
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
|
data/lib/format_parser.rb
CHANGED
@@ -17,6 +17,7 @@ module FormatParser
|
|
17
17
|
require_relative 'read_limits_config'
|
18
18
|
require_relative 'remote_io'
|
19
19
|
require_relative 'io_constraint'
|
20
|
+
require_relative 'utf8_reader'
|
20
21
|
require_relative 'care'
|
21
22
|
require_relative 'active_storage/blob_analyzer'
|
22
23
|
require_relative 'text'
|
@@ -35,6 +36,9 @@ module FormatParser
|
|
35
36
|
# The value will ensure the parser having it will be applied to the file last.
|
36
37
|
LEAST_PRIORITY = 99
|
37
38
|
|
39
|
+
@registered_natures = []
|
40
|
+
@registered_formats = []
|
41
|
+
|
38
42
|
# Register a parser object to be used to perform file format detection. Each parser FormatParser
|
39
43
|
# provides out of the box registers itself using this method.
|
40
44
|
#
|
@@ -67,9 +71,20 @@ module FormatParser
|
|
67
71
|
end
|
68
72
|
@parser_priorities ||= {}
|
69
73
|
@parser_priorities[callable_parser] = priority
|
74
|
+
|
75
|
+
@registered_natures |= parser_provided_natures
|
76
|
+
@registered_formats |= parser_provided_formats
|
70
77
|
end
|
71
78
|
end
|
72
79
|
|
80
|
+
def self.registered_natures
|
81
|
+
@registered_natures
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.registered_formats
|
85
|
+
@registered_formats
|
86
|
+
end
|
87
|
+
|
73
88
|
# Deregister a parser object (makes FormatParser forget this parser existed). Is mostly used in
|
74
89
|
# tests, but can also be used to forcibly disable some formats completely.
|
75
90
|
#
|
@@ -0,0 +1,319 @@
|
|
1
|
+
##
|
2
|
+
# This class checks whether a given file is a valid JSON file.
|
3
|
+
# The validation process DOES NOT assemble an object with the contents of the JSON file in memory,
|
4
|
+
# Instead, it implements a simple state-machine-like that digests the contents of the file while traversing
|
5
|
+
# the hierarchy of nodes in the document.
|
6
|
+
#
|
7
|
+
# Although this is based on the IETF standard (https://www.rfc-editor.org/rfc/rfc8259),
|
8
|
+
# it does cut a few corners for the sake of simplicity. For instance, instead of validating
|
9
|
+
# Numbers, "true", "false" and "null" tokens, it supports a type called Literal to hold generic sequences of characters.
|
10
|
+
# This decision makes the implementation simpler while being a good-enough approach to identify JSON files.
|
11
|
+
#
|
12
|
+
# There is also a cap. Large files are not read all the way through. Instead, if the beginning of file is
|
13
|
+
# JSON-compliant, it is assumed that the file is a JSON file.
|
14
|
+
|
15
|
+
class FormatParser::JSONParser::Validator
|
16
|
+
class JSONParserError < StandardError
|
17
|
+
end
|
18
|
+
|
19
|
+
MAX_SAMPLE_SIZE = 1024
|
20
|
+
MAX_LITERAL_SIZE = 30 # much larger then necessary.
|
21
|
+
ESCAPE_CHAR = "\\"
|
22
|
+
WHITESPACE_CHARS = [" ", "\t", "\n", "\r"]
|
23
|
+
ENDING_VALUE_CHARS = [",", "]", "}"]
|
24
|
+
LITERALS_CHAR_TEMPLATE = /\w|[+\-.]/ # any alphanumeric, "+", "-" and "."
|
25
|
+
|
26
|
+
def initialize(io)
|
27
|
+
@io = io
|
28
|
+
@current_node = nil # :object, :array, :string, :literal
|
29
|
+
@parent_nodes = []
|
30
|
+
@current_state = :awaiting_root_node
|
31
|
+
@escape_next = false
|
32
|
+
@current_literal_size = 0
|
33
|
+
@pos = 0
|
34
|
+
|
35
|
+
@all_parsers = {}
|
36
|
+
|
37
|
+
@execution_stats = {
|
38
|
+
array: 0,
|
39
|
+
object: 0,
|
40
|
+
literal: 0,
|
41
|
+
string: 0
|
42
|
+
}
|
43
|
+
|
44
|
+
setup_transitions
|
45
|
+
end
|
46
|
+
|
47
|
+
def validate
|
48
|
+
char_reader = FormatParser::UTF8Reader.new(@io)
|
49
|
+
|
50
|
+
while (c = char_reader.read_char)
|
51
|
+
@pos += 1
|
52
|
+
parse_char c
|
53
|
+
|
54
|
+
# Halt validation if the sampling limit is reached.
|
55
|
+
if @pos >= MAX_SAMPLE_SIZE
|
56
|
+
raise JSONParserError, "Invalid JSON file" if @current_state == :awaiting_root_node
|
57
|
+
return false
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Raising error in case the EOF is reached earlier than expected
|
62
|
+
raise JSONParserError, "Incomplete JSON file" if @current_state != :closed
|
63
|
+
true
|
64
|
+
rescue FormatParser::UTF8Reader::UTF8CharReaderError
|
65
|
+
raise JSONParserError, "Invalid UTF-8 character"
|
66
|
+
end
|
67
|
+
|
68
|
+
def stats(node_type)
|
69
|
+
@execution_stats[node_type]
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def setup_transitions
|
75
|
+
when_its :awaiting_root_node, ->(c) do
|
76
|
+
read_whitespace(c) or
|
77
|
+
start_object(c) or
|
78
|
+
start_array(c)
|
79
|
+
end
|
80
|
+
|
81
|
+
when_its :awaiting_object_attribute_key, ->(c) do
|
82
|
+
read_whitespace(c) or
|
83
|
+
start_attribute_key(c) or
|
84
|
+
close_object(c)
|
85
|
+
end
|
86
|
+
|
87
|
+
when_its :reading_object_attribute_key, ->(c) do
|
88
|
+
close_attribute_key(c) or
|
89
|
+
read_valid_string_char(c)
|
90
|
+
end
|
91
|
+
|
92
|
+
when_its :awaiting_object_colon_separator, ->(c) do
|
93
|
+
read_whitespace(c) or
|
94
|
+
read_colon(c)
|
95
|
+
end
|
96
|
+
|
97
|
+
when_its :awaiting_object_attribute_value, ->(c) do
|
98
|
+
read_whitespace(c) or
|
99
|
+
start_object(c) or
|
100
|
+
start_array(c) or
|
101
|
+
start_string(c) or
|
102
|
+
start_literal(c)
|
103
|
+
end
|
104
|
+
|
105
|
+
when_its :awaiting_array_value, ->(c) do
|
106
|
+
read_whitespace(c) or
|
107
|
+
start_object(c) or
|
108
|
+
start_array(c) or
|
109
|
+
start_string(c) or
|
110
|
+
start_literal(c) or
|
111
|
+
close_array(c)
|
112
|
+
end
|
113
|
+
|
114
|
+
when_its :reading_string, ->(c) do
|
115
|
+
close_string(c) or
|
116
|
+
read_valid_string_char(c)
|
117
|
+
end
|
118
|
+
|
119
|
+
when_its :awaiting_next_or_close, ->(c) do
|
120
|
+
read_whitespace(c) or
|
121
|
+
read_comma_separator(c) or
|
122
|
+
close_object(c) or
|
123
|
+
close_array(c)
|
124
|
+
end
|
125
|
+
|
126
|
+
when_its :reading_literal, ->(c) do
|
127
|
+
read_valid_literal_char(c) or (
|
128
|
+
close_literal(c) and (
|
129
|
+
read_whitespace(c) or
|
130
|
+
read_comma_separator(c) or
|
131
|
+
close_array(c) or
|
132
|
+
close_object(c)))
|
133
|
+
end
|
134
|
+
|
135
|
+
when_its :closed, ->(c) do
|
136
|
+
read_whitespace(c)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def when_its(state, act)
|
141
|
+
@all_parsers[state] = act
|
142
|
+
end
|
143
|
+
|
144
|
+
def parse_char(c)
|
145
|
+
next_step = @all_parsers[@current_state]
|
146
|
+
accepted = next_step.call(c)
|
147
|
+
reject_char(c) unless accepted
|
148
|
+
end
|
149
|
+
|
150
|
+
def read_whitespace(c)
|
151
|
+
whitespace?(c)
|
152
|
+
end
|
153
|
+
|
154
|
+
def read_colon(c)
|
155
|
+
if c == ":"
|
156
|
+
@current_state = :awaiting_object_attribute_value
|
157
|
+
return true
|
158
|
+
end
|
159
|
+
false
|
160
|
+
end
|
161
|
+
|
162
|
+
def read_valid_string_char(c)
|
163
|
+
if @escape_next
|
164
|
+
@escape_next = false
|
165
|
+
return true
|
166
|
+
end
|
167
|
+
|
168
|
+
if c == ESCAPE_CHAR
|
169
|
+
@escape_next = true
|
170
|
+
return true
|
171
|
+
end
|
172
|
+
!control_char?(c) and c != "\""
|
173
|
+
end
|
174
|
+
|
175
|
+
def read_valid_literal_char(c)
|
176
|
+
if valid_literal_char?(c)
|
177
|
+
@current_literal_size += 1
|
178
|
+
return true
|
179
|
+
end
|
180
|
+
|
181
|
+
false
|
182
|
+
end
|
183
|
+
|
184
|
+
def read_comma_separator(c)
|
185
|
+
if c == ","
|
186
|
+
@current_state = :awaiting_object_attribute_key if @current_node == :object
|
187
|
+
@current_state = :awaiting_array_value if @current_node == :array
|
188
|
+
return true
|
189
|
+
end
|
190
|
+
false
|
191
|
+
end
|
192
|
+
|
193
|
+
# Object: {"k1":"val", "k2":[1,2,3], "k4": undefined, "k5": {"l1": 6}}
|
194
|
+
def start_object(c)
|
195
|
+
return false if whitespace?(c)
|
196
|
+
return false unless c == "{"
|
197
|
+
|
198
|
+
begin_node(:object)
|
199
|
+
@current_state = :awaiting_object_attribute_key
|
200
|
+
true
|
201
|
+
end
|
202
|
+
|
203
|
+
def close_object(c)
|
204
|
+
return false if whitespace?(c)
|
205
|
+
return false unless @current_node == :object and c == "}"
|
206
|
+
|
207
|
+
end_node
|
208
|
+
@current_state = :awaiting_next_or_close unless @current_node.nil?
|
209
|
+
true
|
210
|
+
end
|
211
|
+
|
212
|
+
# Array: [1, "two", true, undefined, {}, []]
|
213
|
+
def start_array(c)
|
214
|
+
return false unless c == "["
|
215
|
+
|
216
|
+
begin_node(:array)
|
217
|
+
@current_state = :awaiting_array_value
|
218
|
+
true
|
219
|
+
end
|
220
|
+
|
221
|
+
def close_array(c)
|
222
|
+
return false if whitespace?(c)
|
223
|
+
return false unless @current_node == :array and c == "]"
|
224
|
+
|
225
|
+
end_node
|
226
|
+
@current_state = :awaiting_next_or_close unless @current_node.nil?
|
227
|
+
true
|
228
|
+
end
|
229
|
+
|
230
|
+
def start_attribute_key(c)
|
231
|
+
return false unless c == "\""
|
232
|
+
|
233
|
+
begin_node(:string)
|
234
|
+
@current_state = :reading_object_attribute_key
|
235
|
+
true
|
236
|
+
end
|
237
|
+
|
238
|
+
def close_attribute_key(c)
|
239
|
+
return false if @escape_next
|
240
|
+
return false unless c == "\""
|
241
|
+
end_node
|
242
|
+
@current_state = :awaiting_object_colon_separator
|
243
|
+
true
|
244
|
+
end
|
245
|
+
|
246
|
+
# Strings: "Foo"
|
247
|
+
def start_string(c)
|
248
|
+
return false unless c == "\""
|
249
|
+
|
250
|
+
begin_node(:string)
|
251
|
+
@current_state = :reading_string
|
252
|
+
true
|
253
|
+
end
|
254
|
+
|
255
|
+
def close_string(c)
|
256
|
+
return false if @escape_next
|
257
|
+
return false unless c == "\""
|
258
|
+
end_node
|
259
|
+
@current_state = :awaiting_next_or_close
|
260
|
+
true
|
261
|
+
end
|
262
|
+
|
263
|
+
# literals: null, undefined, true, false, NaN, infinity, -123.456e10 -123,456e10
|
264
|
+
def start_literal(c)
|
265
|
+
return false unless valid_literal_char?(c)
|
266
|
+
|
267
|
+
begin_node(:literal)
|
268
|
+
@current_state = :reading_literal
|
269
|
+
@current_literal_size = 1
|
270
|
+
true
|
271
|
+
end
|
272
|
+
|
273
|
+
def close_literal(c)
|
274
|
+
raise JSONParserError, "Literal to large at #{@pos}" if @current_literal_size > MAX_LITERAL_SIZE
|
275
|
+
|
276
|
+
if whitespace?(c) || ENDING_VALUE_CHARS.include?(c)
|
277
|
+
end_node
|
278
|
+
@current_state = :awaiting_next_or_close
|
279
|
+
return true
|
280
|
+
end
|
281
|
+
|
282
|
+
false
|
283
|
+
end
|
284
|
+
|
285
|
+
# Marks the creation of a node (object, array, string or literal)
|
286
|
+
def begin_node(node_type)
|
287
|
+
# Accounting for the new node
|
288
|
+
@execution_stats[node_type] ||= 0
|
289
|
+
@execution_stats[node_type] += 1
|
290
|
+
|
291
|
+
# Managing the node execution stack
|
292
|
+
@parent_nodes.push(@current_node)
|
293
|
+
@current_node = node_type
|
294
|
+
end
|
295
|
+
|
296
|
+
# Marks the closure of a node (object, array, string or literal)
|
297
|
+
def end_node
|
298
|
+
@current_node = @parent_nodes.pop
|
299
|
+
@current_state = :closed if @current_node.nil?
|
300
|
+
end
|
301
|
+
|
302
|
+
def reject_char(char)
|
303
|
+
raise JSONParserError, "Unexpected char #{char} in position #{@pos}"
|
304
|
+
end
|
305
|
+
|
306
|
+
def whitespace?(c)
|
307
|
+
WHITESPACE_CHARS.include?(c)
|
308
|
+
end
|
309
|
+
|
310
|
+
def control_char?(c)
|
311
|
+
# control characters: (U+0000 through U+001F)
|
312
|
+
utf8_code = c.unpack('U*')[0]
|
313
|
+
utf8_code <= 31
|
314
|
+
end
|
315
|
+
|
316
|
+
def valid_literal_char?(c)
|
317
|
+
LITERALS_CHAR_TEMPLATE === c
|
318
|
+
end
|
319
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class FormatParser::JSONParser
|
2
|
+
include FormatParser::IOUtils
|
3
|
+
require_relative 'json_parser/validator'
|
4
|
+
|
5
|
+
JSON_MIME_TYPE = 'application/json'
|
6
|
+
|
7
|
+
def likely_match?(filename)
|
8
|
+
filename =~ /\.json$/i
|
9
|
+
end
|
10
|
+
|
11
|
+
def call(io)
|
12
|
+
io = FormatParser::IOConstraint.new(io)
|
13
|
+
validator = Validator.new(io)
|
14
|
+
|
15
|
+
validator.validate
|
16
|
+
|
17
|
+
FormatParser::Text.new(
|
18
|
+
format: :json,
|
19
|
+
content_type: JSON_MIME_TYPE,
|
20
|
+
)
|
21
|
+
rescue Validator::JSONParserError
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
FormatParser.register_parser new, natures: :text, formats: :json
|
25
|
+
end
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -76,6 +76,11 @@ class FormatParser::MP3Parser
|
|
76
76
|
io.seek(0)
|
77
77
|
return if TIFF_HEADER_BYTES.include?(safe_read(io, 4))
|
78
78
|
|
79
|
+
# Prevention against parsing WAV files.
|
80
|
+
io.seek(0)
|
81
|
+
wav_chunk_id, _wav_size, wav_riff_type = safe_read(io, 12).unpack('a4la4')
|
82
|
+
return if wav_chunk_id == 'RIFF' || wav_riff_type == 'WAVE'
|
83
|
+
|
79
84
|
# Read all the ID3 tags (or at least attempt to)
|
80
85
|
io.seek(0)
|
81
86
|
id3v1 = ID3Extraction.attempt_id3_v1_extraction(io)
|
@@ -315,5 +320,5 @@ class FormatParser::MP3Parser
|
|
315
320
|
end
|
316
321
|
end
|
317
322
|
|
318
|
-
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority:
|
323
|
+
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 101
|
319
324
|
end
|
data/lib/utf8_reader.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
##
|
2
|
+
# This class Reads individual characters from files using UTF-8 encoding
|
3
|
+
# This deals with two main concerns:
|
4
|
+
# - Variable byte length of characters
|
5
|
+
# - Reducing the number of read operations by loading bytes in chunks
|
6
|
+
|
7
|
+
class FormatParser::UTF8Reader
|
8
|
+
READ_CHUNK_SIZE = 128
|
9
|
+
|
10
|
+
class UTF8CharReaderError < StandardError
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(io)
|
14
|
+
@io = io
|
15
|
+
@chunk = ""
|
16
|
+
@index = 0
|
17
|
+
@eof = false
|
18
|
+
end
|
19
|
+
|
20
|
+
def read_char
|
21
|
+
first_byte = read_byte
|
22
|
+
return if first_byte.nil?
|
23
|
+
|
24
|
+
char_length = assess_char_length(first_byte)
|
25
|
+
as_bytes = Array.new(char_length) do |i|
|
26
|
+
next first_byte if i == 0
|
27
|
+
read_byte
|
28
|
+
end
|
29
|
+
|
30
|
+
char = as_bytes.pack('c*').force_encoding('UTF-8')
|
31
|
+
raise UTF8CharReaderError, "Invalid UTF-8 character" unless char.valid_encoding?
|
32
|
+
|
33
|
+
char
|
34
|
+
rescue TypeError
|
35
|
+
raise UTF8CharReaderError, "Invalid UTF-8 character"
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def read_byte
|
41
|
+
manage_data_chunk
|
42
|
+
return if @chunk.nil?
|
43
|
+
byte = @chunk.bytes[@index]
|
44
|
+
@index += 1 unless byte.nil?
|
45
|
+
byte
|
46
|
+
end
|
47
|
+
|
48
|
+
def manage_data_chunk
|
49
|
+
return if @index < @chunk.length
|
50
|
+
@chunk = @io.read(READ_CHUNK_SIZE)
|
51
|
+
@chunk ||= ""
|
52
|
+
@index = 0
|
53
|
+
@eof = true if @chunk.nil? or @chunk.length < READ_CHUNK_SIZE
|
54
|
+
end
|
55
|
+
|
56
|
+
def assess_char_length(first_byte)
|
57
|
+
# 0_______ (1 byte)
|
58
|
+
# 110_____ (2 bytes) 192
|
59
|
+
# 1110____ (3 bytes) 224
|
60
|
+
# 11110___ (4 bytes) 240
|
61
|
+
case first_byte
|
62
|
+
when 240.. then 4
|
63
|
+
when 224..239 then 3
|
64
|
+
when 192..223 then 2
|
65
|
+
else 1
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/spec/format_parser_spec.rb
CHANGED
@@ -34,6 +34,26 @@ describe FormatParser do
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
it "fixtures with 'invalid' in the filename should fail to parse" do
|
38
|
+
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
|
39
|
+
file_name = File.basename(fixture_path)
|
40
|
+
next unless file_name.include? "invalid"
|
41
|
+
File.open(fixture_path, 'rb') do |file|
|
42
|
+
FormatParser.parse(file)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it "fixtures without 'invalid' in the filename should be parsed successfully" do
|
48
|
+
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
|
49
|
+
file_name = File.basename(fixture_path)
|
50
|
+
next if file_name.include? "invalid"
|
51
|
+
File.open(fixture_path, 'rb') do |file|
|
52
|
+
FormatParser.parse(file)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
37
57
|
it 'triggers parsers in a certain order that corresponds to the parser priorities' do
|
38
58
|
file_contents = StringIO.new('a' * 4096)
|
39
59
|
|
@@ -189,12 +209,20 @@ describe FormatParser do
|
|
189
209
|
'FormatParser::CR3Parser',
|
190
210
|
'FormatParser::DPXParser',
|
191
211
|
'FormatParser::FLACParser',
|
192
|
-
'FormatParser::MP3Parser',
|
193
212
|
'FormatParser::OggParser',
|
194
213
|
'FormatParser::TIFFParser',
|
195
|
-
'FormatParser::WAVParser'
|
214
|
+
'FormatParser::WAVParser',
|
215
|
+
'FormatParser::MP3Parser'
|
196
216
|
])
|
197
217
|
end
|
218
|
+
|
219
|
+
it 'ensures that MP3 parser is the last one among all' do
|
220
|
+
natures = FormatParser.registered_natures
|
221
|
+
formats = FormatParser.registered_formats
|
222
|
+
prioritised_parsers = FormatParser.parsers_for(natures, formats)
|
223
|
+
parser_class_names = prioritised_parsers.map { |parser| parser.class.name }
|
224
|
+
expect(parser_class_names.last).to eq 'FormatParser::MP3Parser'
|
225
|
+
end
|
198
226
|
end
|
199
227
|
|
200
228
|
describe '.register_parser and .deregister_parser' do
|
@@ -55,7 +55,7 @@ describe FormatParser::FLACParser do
|
|
55
55
|
end
|
56
56
|
|
57
57
|
it 'raises an error when sample rate is 0' do
|
58
|
-
fpath = fixtures_dir + 'FLAC/
|
58
|
+
fpath = fixtures_dir + 'FLAC/invalid_sample_rate_0.flac'
|
59
59
|
|
60
60
|
expect {
|
61
61
|
subject.call(File.open(fpath, 'rb'))
|
@@ -0,0 +1,321 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::JSONParser::Validator do
|
4
|
+
def load_file(file_name)
|
5
|
+
io = File.open(Pathname.new(fixtures_dir).join('JSON').join(file_name), 'rb')
|
6
|
+
FormatParser::JSONParser::Validator.new(io)
|
7
|
+
end
|
8
|
+
|
9
|
+
def load_string(content)
|
10
|
+
io = StringIO.new(content.encode(Encoding::UTF_8))
|
11
|
+
FormatParser::JSONParser::Validator.new(io)
|
12
|
+
end
|
13
|
+
|
14
|
+
describe 'When reading root nodes' do
|
15
|
+
it "identifies objects as root nodes" do
|
16
|
+
v = load_string '{"key": "value"}'
|
17
|
+
|
18
|
+
completed = v.validate
|
19
|
+
|
20
|
+
expect(completed).to be true
|
21
|
+
expect(v.stats(:object)).to be 1
|
22
|
+
expect(v.stats(:string)).to be 2
|
23
|
+
end
|
24
|
+
|
25
|
+
it "identifies arrays as root nodes" do
|
26
|
+
v = load_string '["e1", "e2"]'
|
27
|
+
|
28
|
+
completed = v.validate
|
29
|
+
|
30
|
+
expect(completed).to be true
|
31
|
+
expect(v.stats(:array)).to be 1
|
32
|
+
expect(v.stats(:string)).to be 2
|
33
|
+
end
|
34
|
+
|
35
|
+
it "rejects strings as root nodes" do
|
36
|
+
expect do
|
37
|
+
v = load_string '"this is a string"'
|
38
|
+
v.validate
|
39
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "rejects literals as root nodes" do
|
43
|
+
expect do
|
44
|
+
v = load_string 'true'
|
45
|
+
v.validate
|
46
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'When reading objects' do
|
51
|
+
it "recognizes empty objects" do
|
52
|
+
v = load_string '{}'
|
53
|
+
|
54
|
+
completed = v.validate
|
55
|
+
expect(completed).to be true
|
56
|
+
expect(v.stats(:object)).to be 1
|
57
|
+
expect(v.stats(:string)).to be 0
|
58
|
+
end
|
59
|
+
|
60
|
+
it "recognizes objects with a single attribute" do
|
61
|
+
v = load_string '{"key": "value"}'
|
62
|
+
|
63
|
+
completed = v.validate
|
64
|
+
expect(completed).to be true
|
65
|
+
expect(v.stats(:object)).to be 1
|
66
|
+
expect(v.stats(:string)).to be 2
|
67
|
+
end
|
68
|
+
|
69
|
+
it "recognizes objects with attributes of different types" do
|
70
|
+
v = load_string '{"k1": "value", "k2": -123.456, "k3": null}'
|
71
|
+
|
72
|
+
completed = v.validate
|
73
|
+
expect(completed).to be true
|
74
|
+
expect(v.stats(:object)).to be 1
|
75
|
+
expect(v.stats(:string)).to be 4
|
76
|
+
expect(v.stats(:literal)).to be 2
|
77
|
+
end
|
78
|
+
|
79
|
+
it "recognizes condensed objects (no whitespaces)" do
|
80
|
+
v = load_string '{"a":"b","c":"d"}'
|
81
|
+
|
82
|
+
completed = v.validate
|
83
|
+
expect(completed).to be true
|
84
|
+
expect(v.stats(:object)).to be 1
|
85
|
+
expect(v.stats(:string)).to be 4
|
86
|
+
end
|
87
|
+
|
88
|
+
it "recognizes formatted objects" do
|
89
|
+
v = load_string '{
|
90
|
+
"a":"b",
|
91
|
+
"c":"d"
|
92
|
+
}'
|
93
|
+
|
94
|
+
completed = v.validate
|
95
|
+
expect(completed).to be true
|
96
|
+
expect(v.stats(:object)).to be 1
|
97
|
+
expect(v.stats(:string)).to be 4
|
98
|
+
end
|
99
|
+
|
100
|
+
it "recognizes objects with nested objects and arrays" do
|
101
|
+
v = load_string '{
|
102
|
+
"a": {
|
103
|
+
"a1": "-",
|
104
|
+
"a2": "-",
|
105
|
+
"a3": {
|
106
|
+
"a3.1": "-"
|
107
|
+
},
|
108
|
+
},
|
109
|
+
"c": [1, null]
|
110
|
+
}'
|
111
|
+
|
112
|
+
completed = v.validate
|
113
|
+
expect(completed).to be true
|
114
|
+
expect(v.stats(:object)).to be 3
|
115
|
+
expect(v.stats(:array)).to be 1
|
116
|
+
expect(v.stats(:string)).to be 9
|
117
|
+
expect(v.stats(:literal)).to be 2
|
118
|
+
end
|
119
|
+
|
120
|
+
it "rejects objects without double-quoted attribute names" do
|
121
|
+
expect do
|
122
|
+
v = load_string '{a:"b",c:"d"}'
|
123
|
+
v.validate
|
124
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
125
|
+
end
|
126
|
+
|
127
|
+
it "rejects objects without comma separators" do
|
128
|
+
expect do
|
129
|
+
v = load_string '{
|
130
|
+
"a":"b"
|
131
|
+
"c":"d"
|
132
|
+
}'
|
133
|
+
v.validate
|
134
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
describe 'When reading arrays' do
|
139
|
+
it "recognizes empty arrays" do
|
140
|
+
v = load_string '[]'
|
141
|
+
|
142
|
+
completed = v.validate
|
143
|
+
expect(completed).to be true
|
144
|
+
expect(v.stats(:array)).to be 1
|
145
|
+
expect(v.stats(:string)).to be 0
|
146
|
+
end
|
147
|
+
|
148
|
+
it "recognizes arrays with a single element" do
|
149
|
+
v = load_string '[{}]'
|
150
|
+
|
151
|
+
completed = v.validate
|
152
|
+
expect(completed).to be true
|
153
|
+
expect(v.stats(:array)).to be 1
|
154
|
+
expect(v.stats(:object)).to be 1
|
155
|
+
end
|
156
|
+
|
157
|
+
it "recognizes arrays with elements of different types" do
|
158
|
+
v = load_string '[{"k1": "value"}, [], "a string", null, -123.456]'
|
159
|
+
|
160
|
+
completed = v.validate
|
161
|
+
expect(completed).to be true
|
162
|
+
expect(v.stats(:array)).to be 2
|
163
|
+
expect(v.stats(:object)).to be 1
|
164
|
+
expect(v.stats(:string)).to be 3
|
165
|
+
expect(v.stats(:literal)).to be 2
|
166
|
+
end
|
167
|
+
|
168
|
+
it "recognizes condensed arrays (no whitespaces)" do
|
169
|
+
v = load_string '["a",2,null,false]'
|
170
|
+
|
171
|
+
completed = v.validate
|
172
|
+
expect(completed).to be true
|
173
|
+
expect(v.stats(:array)).to be 1
|
174
|
+
expect(v.stats(:string)).to be 1
|
175
|
+
expect(v.stats(:literal)).to be 3
|
176
|
+
end
|
177
|
+
|
178
|
+
it "recognizes formatted arrays" do
|
179
|
+
v = load_string '[
|
180
|
+
{
|
181
|
+
"a":"b"
|
182
|
+
},
|
183
|
+
{
|
184
|
+
"c":"d"
|
185
|
+
}
|
186
|
+
]'
|
187
|
+
|
188
|
+
completed = v.validate
|
189
|
+
expect(completed).to be true
|
190
|
+
expect(v.stats(:array)).to be 1
|
191
|
+
expect(v.stats(:object)).to be 2
|
192
|
+
expect(v.stats(:string)).to be 4
|
193
|
+
end
|
194
|
+
|
195
|
+
it "recognizes arrays with nested objects and arrays" do
|
196
|
+
v = load_string '[{
|
197
|
+
"a": {
|
198
|
+
"a1": "-",
|
199
|
+
"a2": "-",
|
200
|
+
"a3": {
|
201
|
+
"a3.1": "-"
|
202
|
+
},
|
203
|
+
},
|
204
|
+
"c": [1, null]
|
205
|
+
},
|
206
|
+
[{ "a": "b" }, { "c":"d" }]
|
207
|
+
]'
|
208
|
+
|
209
|
+
completed = v.validate
|
210
|
+
expect(completed).to be true
|
211
|
+
expect(v.stats(:array)).to be 3
|
212
|
+
expect(v.stats(:object)).to be 5
|
213
|
+
expect(v.stats(:string)).to be 13
|
214
|
+
expect(v.stats(:literal)).to be 2
|
215
|
+
end
|
216
|
+
|
217
|
+
it "rejects arrays without comma separators" do
|
218
|
+
expect do
|
219
|
+
v = load_string '[
|
220
|
+
"abc"
|
221
|
+
"def"
|
222
|
+
]'
|
223
|
+
v.validate
|
224
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
describe 'When reading strings' do
|
229
|
+
it "recognizes regular strings" do
|
230
|
+
v = load_string '["abc", "def", "ghi"]'
|
231
|
+
|
232
|
+
completed = v.validate
|
233
|
+
expect(completed).to be true
|
234
|
+
expect(v.stats(:string)).to be 3
|
235
|
+
end
|
236
|
+
|
237
|
+
it "recognizes strings containing excaped characters" do
|
238
|
+
v = load_string '["ab\"c", "6\\2=3"]'
|
239
|
+
|
240
|
+
completed = v.validate
|
241
|
+
expect(completed).to be true
|
242
|
+
expect(v.stats(:string)).to be 2
|
243
|
+
end
|
244
|
+
|
245
|
+
it "recognizes strings containing UTF8 characters" do
|
246
|
+
v = load_string '["abc😃🐶👀", "😃2🐶3👀"]'
|
247
|
+
|
248
|
+
completed = v.validate
|
249
|
+
expect(completed).to be true
|
250
|
+
expect(v.stats(:string)).to be 2
|
251
|
+
end
|
252
|
+
|
253
|
+
it "recognizes long strings containing UTF8 characters" do
|
254
|
+
v = load_string '["aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀"]'
|
255
|
+
|
256
|
+
completed = v.validate
|
257
|
+
expect(completed).to be true
|
258
|
+
expect(v.stats(:string)).to be 1
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
describe 'When reading literals' do
|
263
|
+
it "recognizes numbers" do
|
264
|
+
v = load_string '[1, -2.4, 1.0E+2]'
|
265
|
+
|
266
|
+
completed = v.validate
|
267
|
+
expect(completed).to be true
|
268
|
+
expect(v.stats(:literal)).to be 3
|
269
|
+
end
|
270
|
+
|
271
|
+
it "recognizes boolean values" do
|
272
|
+
v = load_string '[true, false]'
|
273
|
+
|
274
|
+
completed = v.validate
|
275
|
+
expect(completed).to be true
|
276
|
+
expect(v.stats(:literal)).to be 2
|
277
|
+
end
|
278
|
+
|
279
|
+
it "recognizes 'true', 'false' and 'null'" do
|
280
|
+
v = load_string '[true, false, null]'
|
281
|
+
|
282
|
+
completed = v.validate
|
283
|
+
expect(completed).to be true
|
284
|
+
expect(v.stats(:literal)).to be 3
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
describe 'When reading invalid JSON content' do
|
289
|
+
it "rejects truncated JSON content" do
|
290
|
+
expect do
|
291
|
+
v = load_string '[{
|
292
|
+
"a": ["abc","def"],
|
293
|
+
"b": 4'
|
294
|
+
v.validate
|
295
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
describe 'When reading large JSON files' do
|
300
|
+
it "Returns 'false' without throwing errors when the initial chunk of a file is a valid JSON" do
|
301
|
+
v = load_file 'long_file_valid.json'
|
302
|
+
|
303
|
+
completed = v.validate
|
304
|
+
expect(completed).to be false
|
305
|
+
end
|
306
|
+
|
307
|
+
it "Returns 'false' without throwing errors when for long non-formatted JSON files" do
|
308
|
+
v = load_file 'long_file_valid_non_formatted.json'
|
309
|
+
|
310
|
+
completed = v.validate
|
311
|
+
expect(completed).to be false
|
312
|
+
end
|
313
|
+
|
314
|
+
it "Returns 'false' without throwing errors when the initial chunk of a file is a valid JSON even if there's an issue later" do
|
315
|
+
v = load_file 'long_file_malformed.json'
|
316
|
+
|
317
|
+
completed = v.validate
|
318
|
+
expect(completed).to be false
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::JSONParser do
|
4
|
+
MAX_READS = 100
|
5
|
+
|
6
|
+
def load_file(file_name)
|
7
|
+
io = File.open(Pathname.new(fixtures_dir).join('JSON').join(file_name), 'rb')
|
8
|
+
FormatParser::ReadLimiter.new(io, max_reads: MAX_READS)
|
9
|
+
end
|
10
|
+
|
11
|
+
def file_size(file_name)
|
12
|
+
File.size(Pathname.new(fixtures_dir).join('JSON').join(file_name))
|
13
|
+
end
|
14
|
+
|
15
|
+
describe 'When reading objects valid JSON files' do
|
16
|
+
it "identifies JSON files with objects as root nodes" do
|
17
|
+
io = load_file 'object.json'
|
18
|
+
|
19
|
+
parsed = subject.call(io)
|
20
|
+
|
21
|
+
expect(parsed).not_to be_nil
|
22
|
+
expect(parsed.nature).to eq(:text)
|
23
|
+
expect(parsed.format).to eq(:json)
|
24
|
+
expect(parsed.content_type).to eq('application/json')
|
25
|
+
end
|
26
|
+
|
27
|
+
it "identifies JSON files carrying arrays as root nodes" do
|
28
|
+
io = load_file 'array.json'
|
29
|
+
|
30
|
+
parsed = subject.call(io)
|
31
|
+
|
32
|
+
expect(parsed).not_to be_nil
|
33
|
+
expect(parsed.nature).to eq(:text)
|
34
|
+
expect(parsed.format).to eq(:json)
|
35
|
+
expect(parsed.content_type).to eq('application/json')
|
36
|
+
end
|
37
|
+
|
38
|
+
it "identifies formatted JSON files" do
|
39
|
+
io = load_file 'formatted_object_utf8.json'
|
40
|
+
|
41
|
+
parsed = subject.call(io)
|
42
|
+
|
43
|
+
expect(parsed).not_to be_nil
|
44
|
+
expect(parsed.nature).to eq(:text)
|
45
|
+
expect(parsed.format).to eq(:json)
|
46
|
+
expect(parsed.content_type).to eq('application/json')
|
47
|
+
end
|
48
|
+
|
49
|
+
it "identifies files wrapped in whitespace characters" do
|
50
|
+
io = load_file 'whitespaces.json'
|
51
|
+
|
52
|
+
parsed = subject.call(io)
|
53
|
+
|
54
|
+
expect(parsed).not_to be_nil
|
55
|
+
expect(parsed.nature).to eq(:text)
|
56
|
+
expect(parsed.format).to eq(:json)
|
57
|
+
expect(parsed.content_type).to eq('application/json')
|
58
|
+
end
|
59
|
+
|
60
|
+
it "identifies files with nested objects and arrays" do
|
61
|
+
io = load_file 'nested_objects.json'
|
62
|
+
|
63
|
+
parsed = subject.call(io)
|
64
|
+
|
65
|
+
expect(parsed).not_to be_nil
|
66
|
+
expect(parsed.nature).to eq(:text)
|
67
|
+
expect(parsed.format).to eq(:json)
|
68
|
+
expect(parsed.content_type).to eq('application/json')
|
69
|
+
end
|
70
|
+
|
71
|
+
it "is reads the whole content of small files before accepting them" do
|
72
|
+
file_name = 'nested_objects.json'
|
73
|
+
io = load_file file_name
|
74
|
+
file_size = file_size file_name
|
75
|
+
|
76
|
+
parsed = subject.call(io)
|
77
|
+
|
78
|
+
expect(parsed).not_to be_nil
|
79
|
+
expect(parsed.nature).to eq(:text)
|
80
|
+
expect(parsed.format).to eq(:json)
|
81
|
+
expect(parsed.content_type).to eq('application/json')
|
82
|
+
expect(io.bytes).to be >= file_size
|
83
|
+
end
|
84
|
+
|
85
|
+
it "is accepts long files before reading the whole content" do
|
86
|
+
file_name = 'long_array_numbers.json'
|
87
|
+
io = load_file file_name
|
88
|
+
file_size = file_size file_name
|
89
|
+
|
90
|
+
parsed = subject.call(io)
|
91
|
+
|
92
|
+
expect(parsed).not_to be_nil
|
93
|
+
expect(parsed.nature).to eq(:text)
|
94
|
+
expect(parsed.format).to eq(:json)
|
95
|
+
expect(parsed.content_type).to eq('application/json')
|
96
|
+
expect(io.bytes).to be < file_size
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
describe 'When reading objects invalid JSON files' do
|
101
|
+
it "rejects files with corrupted JSON data" do
|
102
|
+
io = load_file 'invalid_malformed.json'
|
103
|
+
|
104
|
+
parsed = subject.call(io)
|
105
|
+
|
106
|
+
expect(parsed).to be_nil
|
107
|
+
end
|
108
|
+
|
109
|
+
it "rejects invalid files early without reading the whole content" do
|
110
|
+
io = load_file 'invalid_lorem_ipsum.json'
|
111
|
+
|
112
|
+
parsed = subject.call(io)
|
113
|
+
|
114
|
+
expect(parsed).to be_nil
|
115
|
+
expect(io.reads).to eq(1)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -11,7 +11,7 @@ describe FormatParser::M3UParser do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
describe 'an m3u file with missing header' do
|
14
|
-
let(:m3u_file) { '
|
14
|
+
let(:m3u_file) { 'invalid_plain_text.m3u' }
|
15
15
|
|
16
16
|
it 'does not parse the file successfully' do
|
17
17
|
expect(parsed_m3u).to be_nil
|
@@ -36,6 +36,12 @@ describe FormatParser::MP3Parser do
|
|
36
36
|
expect(parsed).to be_nil
|
37
37
|
end
|
38
38
|
|
39
|
+
it 'does not misdetect a WAV' do
|
40
|
+
fpath = fixtures_dir + '/WAV/c_SCAM_MIC_SOL001_RUN001.wav'
|
41
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
42
|
+
expect(parsed).to be_nil
|
43
|
+
end
|
44
|
+
|
39
45
|
describe 'title/artist/album attributes' do
|
40
46
|
let(:parsed) { subject.call(File.open(fpath, 'rb')) }
|
41
47
|
|
@@ -13,7 +13,7 @@ describe FormatParser::OggParser do
|
|
13
13
|
end
|
14
14
|
|
15
15
|
it 'skips a file if it contains more than MAX_POSSIBLE_OGG_PAGE_SIZE bytes of garbage at the end' do
|
16
|
-
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/
|
16
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/invalid_with_garbage_at_the_end.ogg', 'rb'))
|
17
17
|
expect(parse_result).to be_nil
|
18
18
|
end
|
19
19
|
|
@@ -46,17 +46,17 @@ describe FormatParser::PDFParser do
|
|
46
46
|
|
47
47
|
describe 'broken PDF files should not parse' do
|
48
48
|
it 'PDF with missing version header' do
|
49
|
-
parsed_pdf = parse_pdf '
|
49
|
+
parsed_pdf = parse_pdf 'invalid_not_a.pdf'
|
50
50
|
expect(parsed_pdf).to be_nil
|
51
51
|
end
|
52
52
|
|
53
53
|
it 'PDF 2.0 with offset start' do
|
54
|
-
parsed_pdf = parse_pdf 'PDF 2.0 with offset start.pdf'
|
54
|
+
parsed_pdf = parse_pdf 'invalid PDF 2.0 with offset start.pdf'
|
55
55
|
expect(parsed_pdf).to be_nil
|
56
56
|
end
|
57
57
|
|
58
58
|
it 'exceeds the PDF read limit' do
|
59
|
-
parsed_pdf = parse_pdf '
|
59
|
+
parsed_pdf = parse_pdf 'invalid_exceed_PDF_read_limit.pdf'
|
60
60
|
expect(parsed_pdf).to be_nil
|
61
61
|
end
|
62
62
|
end
|
@@ -48,7 +48,7 @@ describe FormatParser::WAVParser do
|
|
48
48
|
|
49
49
|
it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
|
50
50
|
expect {
|
51
|
-
subject.call(File.open(__dir__ + '/../fixtures/WAV/
|
51
|
+
subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb'))
|
52
52
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
53
53
|
end
|
54
54
|
end
|
@@ -7,7 +7,7 @@ describe FormatParser::WebpParser do
|
|
7
7
|
end
|
8
8
|
|
9
9
|
it 'does not parse files with an unrecognised variant' do
|
10
|
-
result = subject.call(File.open(fixtures_dir + 'WEBP/unrecognised-variant.webp', 'rb'))
|
10
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/invalid-unrecognised-variant.webp', 'rb'))
|
11
11
|
expect(result).to be_nil
|
12
12
|
end
|
13
13
|
|
@@ -104,6 +104,43 @@ describe 'Fetching data from HTTP remotes' do
|
|
104
104
|
expect(file_information.format).to eq(:png)
|
105
105
|
end
|
106
106
|
|
107
|
+
describe 'correctly parses WAV files without falling back to another filetype' do
|
108
|
+
['c_8kmp316.wav', 'c_SCAM_MIC_SOL001_RUN001.wav'].each do |filename|
|
109
|
+
it "parses WAV file #{filename}" do
|
110
|
+
remote_url = 'http://localhost:9399/WAV/' + filename
|
111
|
+
file_information = FormatParser.parse_http(remote_url)
|
112
|
+
expect(file_information).not_to be_nil
|
113
|
+
expect(file_information.format).to eq(:wav)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "correctly parses files over HTTP without filename hint" do
|
119
|
+
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
|
120
|
+
file_name = File.basename(fixture_path)
|
121
|
+
next if file_name.include? "invalid"
|
122
|
+
|
123
|
+
file_type_dir = fixture_path.delete_prefix(fixtures_dir).delete_suffix(file_name)
|
124
|
+
file_type_dir.delete_prefix!('/').delete_suffix!('/')
|
125
|
+
next if file_type_dir.empty?
|
126
|
+
|
127
|
+
# skipping this one because it's a special case
|
128
|
+
next if file_name == "arch_many_entries.zip"
|
129
|
+
|
130
|
+
it "parses #{file_type_dir} file: #{file_name}" do
|
131
|
+
url = "http://localhost:9399/#{file_type_dir}/#{file_name}?some_param=test".gsub(' ', '%20')
|
132
|
+
result_with_hint = FormatParser.parse_http(url, filename_hint: file_name)
|
133
|
+
result_no_hint = FormatParser.parse_http(url)
|
134
|
+
|
135
|
+
expect(result_with_hint).not_to be_nil
|
136
|
+
expect(result_no_hint).not_to be_nil
|
137
|
+
|
138
|
+
expect(result_no_hint.nature).to eq(result_with_hint.nature)
|
139
|
+
expect(result_no_hint.format).to eq(result_with_hint.format)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
107
144
|
describe 'when parsing remote fixtures' do
|
108
145
|
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
|
109
146
|
filename = File.basename(fixture_path)
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
8
8
|
- Julik Tarkhanov
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-
|
12
|
+
date: 2023-08-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: exifr
|
@@ -236,6 +236,8 @@ files:
|
|
236
236
|
- lib/parsers/iso_base_media_file_format/decoder.rb
|
237
237
|
- lib/parsers/iso_base_media_file_format/utils.rb
|
238
238
|
- lib/parsers/jpeg_parser.rb
|
239
|
+
- lib/parsers/json_parser.rb
|
240
|
+
- lib/parsers/json_parser/validator.rb
|
239
241
|
- lib/parsers/m3u_parser.rb
|
240
242
|
- lib/parsers/mov_parser.rb
|
241
243
|
- lib/parsers/mov_parser/decoder.rb
|
@@ -260,6 +262,7 @@ files:
|
|
260
262
|
- lib/remote_io.rb
|
261
263
|
- lib/string.rb
|
262
264
|
- lib/text.rb
|
265
|
+
- lib/utf8_reader.rb
|
263
266
|
- lib/video.rb
|
264
267
|
- spec/active_storage/blob_io_spec.rb
|
265
268
|
- spec/active_storage/rails_app_spec.rb
|
@@ -289,6 +292,8 @@ files:
|
|
289
292
|
- spec/parsers/iso_base_media_file_format/decoder_spec.rb
|
290
293
|
- spec/parsers/iso_base_media_file_format/utils_spec.rb
|
291
294
|
- spec/parsers/jpeg_parser_spec.rb
|
295
|
+
- spec/parsers/json_parser/validator_spec.rb
|
296
|
+
- spec/parsers/json_parser_spec.rb
|
292
297
|
- spec/parsers/m3u_parser_spec.rb
|
293
298
|
- spec/parsers/mov_parser_spec.rb
|
294
299
|
- spec/parsers/mp3_parser_spec.rb
|
@@ -314,7 +319,7 @@ licenses:
|
|
314
319
|
- MIT (Hippocratic)
|
315
320
|
metadata:
|
316
321
|
allowed_push_host: https://rubygems.org
|
317
|
-
post_install_message:
|
322
|
+
post_install_message:
|
318
323
|
rdoc_options: []
|
319
324
|
require_paths:
|
320
325
|
- lib
|
@@ -329,8 +334,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
329
334
|
- !ruby/object:Gem::Version
|
330
335
|
version: '0'
|
331
336
|
requirements: []
|
332
|
-
rubygems_version: 3.
|
333
|
-
signing_key:
|
337
|
+
rubygems_version: 3.1.6
|
338
|
+
signing_key:
|
334
339
|
specification_version: 4
|
335
340
|
summary: A library for efficient parsing of file metadata
|
336
341
|
test_files: []
|