format_parser 2.6.0 → 2.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -2
- data/lib/format_parser/version.rb +1 -1
- data/lib/format_parser.rb +15 -0
- data/lib/parsers/json_parser/validator.rb +319 -0
- data/lib/parsers/json_parser.rb +25 -0
- data/lib/parsers/mp3_parser.rb +6 -1
- data/lib/utf8_reader.rb +68 -0
- data/spec/format_parser_spec.rb +30 -2
- data/spec/parsers/flac_parser_spec.rb +1 -1
- data/spec/parsers/json_parser/validator_spec.rb +321 -0
- data/spec/parsers/json_parser_spec.rb +118 -0
- data/spec/parsers/m3u_parser_spec.rb +1 -1
- data/spec/parsers/mp3_parser_spec.rb +6 -0
- data/spec/parsers/ogg_parser_spec.rb +1 -1
- data/spec/parsers/pdf_parser_spec.rb +3 -3
- data/spec/parsers/wav_parser_spec.rb +1 -1
- data/spec/parsers/webp_parser_spec.rb +1 -1
- data/spec/remote_fetching_spec.rb +37 -0
- metadata +11 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5d2679a365f7c735d2b8c962765c4783b6336bf23461ffde1705ab141d250591
|
4
|
+
data.tar.gz: e1e4b4caa2956cbf1653d39498a9db84589cd0c9c979c6d84e9f3b3027427274
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52e775ae2a4ced22d2879fff48108974bfe4087333b70d74c5c0910229aa7298826664bbd474dd9b4221777637cc51bf969735a830df976f58f0ff7302dd69c5
|
7
|
+
data.tar.gz: 31b8f95ff8fbcba01d60fe2377699a9abebc61a28d74afc05aa8456d6660f786ee268c2a621c0dc83490e3f9b04dc5e1a60319ae6f4c54503b3dfab9d39d520e
|
data/README.md
CHANGED
@@ -26,6 +26,7 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
26
26
|
* HEIC
|
27
27
|
* HEIF
|
28
28
|
* JPEG
|
29
|
+
* JSON
|
29
30
|
* M3U
|
30
31
|
* M4A
|
31
32
|
* M4B
|
@@ -216,7 +217,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
216
217
|
- NEF examples are downloaded from http://www.rawsamples.ch/ and are Creative Common Licensed.
|
217
218
|
|
218
219
|
### OGG
|
219
|
-
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `
|
220
|
+
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `invalid_with_garbage_at_the_end.ogg` have been generated by the project contributors
|
220
221
|
|
221
222
|
### PDF
|
222
223
|
- PDF 2.0 files downloaded from the [PDF Association public Github repository](https://github.com/pdf-association/pdf20examples). These files are licensed under the Creative Commons Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.
|
@@ -235,7 +236,7 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
235
236
|
### WAV
|
236
237
|
- c_11k16bitpcm.wav and c_8kmp316.wav are from [Wikipedia WAV](https://en.wikipedia.org/wiki/WAV#Comparison_of_coding_schemes), retrieved January 7, 2018
|
237
238
|
- c_39064__alienbomb__atmo-truck.wav is from [freesound](https://freesound.org/people/alienbomb/sounds/39064/) and is CC0 licensed
|
238
|
-
- c_M1F1-Alaw-AFsp.wav and
|
239
|
+
- c_M1F1-Alaw-AFsp.wav and invalid_d_6_Channel_ID.wav are from a [McGill Engineering site](http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/Samples.html)
|
239
240
|
|
240
241
|
### WEBP
|
241
242
|
- With the exception of extended-animation.webp, which was obtained from Wikimedia Commons and is Creative Commons
|
data/lib/format_parser.rb
CHANGED
@@ -17,6 +17,7 @@ module FormatParser
|
|
17
17
|
require_relative 'read_limits_config'
|
18
18
|
require_relative 'remote_io'
|
19
19
|
require_relative 'io_constraint'
|
20
|
+
require_relative 'utf8_reader'
|
20
21
|
require_relative 'care'
|
21
22
|
require_relative 'active_storage/blob_analyzer'
|
22
23
|
require_relative 'text'
|
@@ -35,6 +36,9 @@ module FormatParser
|
|
35
36
|
# The value will ensure the parser having it will be applied to the file last.
|
36
37
|
LEAST_PRIORITY = 99
|
37
38
|
|
39
|
+
@registered_natures = []
|
40
|
+
@registered_formats = []
|
41
|
+
|
38
42
|
# Register a parser object to be used to perform file format detection. Each parser FormatParser
|
39
43
|
# provides out of the box registers itself using this method.
|
40
44
|
#
|
@@ -67,9 +71,20 @@ module FormatParser
|
|
67
71
|
end
|
68
72
|
@parser_priorities ||= {}
|
69
73
|
@parser_priorities[callable_parser] = priority
|
74
|
+
|
75
|
+
@registered_natures |= parser_provided_natures
|
76
|
+
@registered_formats |= parser_provided_formats
|
70
77
|
end
|
71
78
|
end
|
72
79
|
|
80
|
+
def self.registered_natures
|
81
|
+
@registered_natures
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.registered_formats
|
85
|
+
@registered_formats
|
86
|
+
end
|
87
|
+
|
73
88
|
# Deregister a parser object (makes FormatParser forget this parser existed). Is mostly used in
|
74
89
|
# tests, but can also be used to forcibly disable some formats completely.
|
75
90
|
#
|
@@ -0,0 +1,319 @@
|
|
1
|
+
##
|
2
|
+
# This class checks whether a given file is a valid JSON file.
|
3
|
+
# The validation process DOES NOT assemble an object with the contents of the JSON file in memory,
|
4
|
+
# Instead, it implements a simple state-machine-like that digests the contents of the file while traversing
|
5
|
+
# the hierarchy of nodes in the document.
|
6
|
+
#
|
7
|
+
# Although this is based on the IETF standard (https://www.rfc-editor.org/rfc/rfc8259),
|
8
|
+
# it does cut a few corners for the sake of simplicity. For instance, instead of validating
|
9
|
+
# Numbers, "true", "false" and "null" tokens, it supports a type called Literal to hold generic sequences of characters.
|
10
|
+
# This decision makes the implementation simpler while being a good-enough approach to identify JSON files.
|
11
|
+
#
|
12
|
+
# There is also a cap. Large files are not read all the way through. Instead, if the beginning of file is
|
13
|
+
# JSON-compliant, it is assumed that the file is a JSON file.
|
14
|
+
|
15
|
+
class FormatParser::JSONParser::Validator
|
16
|
+
class JSONParserError < StandardError
|
17
|
+
end
|
18
|
+
|
19
|
+
MAX_SAMPLE_SIZE = 1024
|
20
|
+
MAX_LITERAL_SIZE = 30 # much larger then necessary.
|
21
|
+
ESCAPE_CHAR = "\\"
|
22
|
+
WHITESPACE_CHARS = [" ", "\t", "\n", "\r"]
|
23
|
+
ENDING_VALUE_CHARS = [",", "]", "}"]
|
24
|
+
LITERALS_CHAR_TEMPLATE = /\w|[+\-.]/ # any alphanumeric, "+", "-" and "."
|
25
|
+
|
26
|
+
def initialize(io)
|
27
|
+
@io = io
|
28
|
+
@current_node = nil # :object, :array, :string, :literal
|
29
|
+
@parent_nodes = []
|
30
|
+
@current_state = :awaiting_root_node
|
31
|
+
@escape_next = false
|
32
|
+
@current_literal_size = 0
|
33
|
+
@pos = 0
|
34
|
+
|
35
|
+
@all_parsers = {}
|
36
|
+
|
37
|
+
@execution_stats = {
|
38
|
+
array: 0,
|
39
|
+
object: 0,
|
40
|
+
literal: 0,
|
41
|
+
string: 0
|
42
|
+
}
|
43
|
+
|
44
|
+
setup_transitions
|
45
|
+
end
|
46
|
+
|
47
|
+
def validate
|
48
|
+
char_reader = FormatParser::UTF8Reader.new(@io)
|
49
|
+
|
50
|
+
while (c = char_reader.read_char)
|
51
|
+
@pos += 1
|
52
|
+
parse_char c
|
53
|
+
|
54
|
+
# Halt validation if the sampling limit is reached.
|
55
|
+
if @pos >= MAX_SAMPLE_SIZE
|
56
|
+
raise JSONParserError, "Invalid JSON file" if @current_state == :awaiting_root_node
|
57
|
+
return false
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Raising error in case the EOF is reached earlier than expected
|
62
|
+
raise JSONParserError, "Incomplete JSON file" if @current_state != :closed
|
63
|
+
true
|
64
|
+
rescue FormatParser::UTF8Reader::UTF8CharReaderError
|
65
|
+
raise JSONParserError, "Invalid UTF-8 character"
|
66
|
+
end
|
67
|
+
|
68
|
+
def stats(node_type)
|
69
|
+
@execution_stats[node_type]
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def setup_transitions
|
75
|
+
when_its :awaiting_root_node, ->(c) do
|
76
|
+
read_whitespace(c) or
|
77
|
+
start_object(c) or
|
78
|
+
start_array(c)
|
79
|
+
end
|
80
|
+
|
81
|
+
when_its :awaiting_object_attribute_key, ->(c) do
|
82
|
+
read_whitespace(c) or
|
83
|
+
start_attribute_key(c) or
|
84
|
+
close_object(c)
|
85
|
+
end
|
86
|
+
|
87
|
+
when_its :reading_object_attribute_key, ->(c) do
|
88
|
+
close_attribute_key(c) or
|
89
|
+
read_valid_string_char(c)
|
90
|
+
end
|
91
|
+
|
92
|
+
when_its :awaiting_object_colon_separator, ->(c) do
|
93
|
+
read_whitespace(c) or
|
94
|
+
read_colon(c)
|
95
|
+
end
|
96
|
+
|
97
|
+
when_its :awaiting_object_attribute_value, ->(c) do
|
98
|
+
read_whitespace(c) or
|
99
|
+
start_object(c) or
|
100
|
+
start_array(c) or
|
101
|
+
start_string(c) or
|
102
|
+
start_literal(c)
|
103
|
+
end
|
104
|
+
|
105
|
+
when_its :awaiting_array_value, ->(c) do
|
106
|
+
read_whitespace(c) or
|
107
|
+
start_object(c) or
|
108
|
+
start_array(c) or
|
109
|
+
start_string(c) or
|
110
|
+
start_literal(c) or
|
111
|
+
close_array(c)
|
112
|
+
end
|
113
|
+
|
114
|
+
when_its :reading_string, ->(c) do
|
115
|
+
close_string(c) or
|
116
|
+
read_valid_string_char(c)
|
117
|
+
end
|
118
|
+
|
119
|
+
when_its :awaiting_next_or_close, ->(c) do
|
120
|
+
read_whitespace(c) or
|
121
|
+
read_comma_separator(c) or
|
122
|
+
close_object(c) or
|
123
|
+
close_array(c)
|
124
|
+
end
|
125
|
+
|
126
|
+
when_its :reading_literal, ->(c) do
|
127
|
+
read_valid_literal_char(c) or (
|
128
|
+
close_literal(c) and (
|
129
|
+
read_whitespace(c) or
|
130
|
+
read_comma_separator(c) or
|
131
|
+
close_array(c) or
|
132
|
+
close_object(c)))
|
133
|
+
end
|
134
|
+
|
135
|
+
when_its :closed, ->(c) do
|
136
|
+
read_whitespace(c)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def when_its(state, act)
|
141
|
+
@all_parsers[state] = act
|
142
|
+
end
|
143
|
+
|
144
|
+
def parse_char(c)
|
145
|
+
next_step = @all_parsers[@current_state]
|
146
|
+
accepted = next_step.call(c)
|
147
|
+
reject_char(c) unless accepted
|
148
|
+
end
|
149
|
+
|
150
|
+
def read_whitespace(c)
|
151
|
+
whitespace?(c)
|
152
|
+
end
|
153
|
+
|
154
|
+
def read_colon(c)
|
155
|
+
if c == ":"
|
156
|
+
@current_state = :awaiting_object_attribute_value
|
157
|
+
return true
|
158
|
+
end
|
159
|
+
false
|
160
|
+
end
|
161
|
+
|
162
|
+
def read_valid_string_char(c)
|
163
|
+
if @escape_next
|
164
|
+
@escape_next = false
|
165
|
+
return true
|
166
|
+
end
|
167
|
+
|
168
|
+
if c == ESCAPE_CHAR
|
169
|
+
@escape_next = true
|
170
|
+
return true
|
171
|
+
end
|
172
|
+
!control_char?(c) and c != "\""
|
173
|
+
end
|
174
|
+
|
175
|
+
def read_valid_literal_char(c)
|
176
|
+
if valid_literal_char?(c)
|
177
|
+
@current_literal_size += 1
|
178
|
+
return true
|
179
|
+
end
|
180
|
+
|
181
|
+
false
|
182
|
+
end
|
183
|
+
|
184
|
+
def read_comma_separator(c)
|
185
|
+
if c == ","
|
186
|
+
@current_state = :awaiting_object_attribute_key if @current_node == :object
|
187
|
+
@current_state = :awaiting_array_value if @current_node == :array
|
188
|
+
return true
|
189
|
+
end
|
190
|
+
false
|
191
|
+
end
|
192
|
+
|
193
|
+
# Object: {"k1":"val", "k2":[1,2,3], "k4": undefined, "k5": {"l1": 6}}
|
194
|
+
def start_object(c)
|
195
|
+
return false if whitespace?(c)
|
196
|
+
return false unless c == "{"
|
197
|
+
|
198
|
+
begin_node(:object)
|
199
|
+
@current_state = :awaiting_object_attribute_key
|
200
|
+
true
|
201
|
+
end
|
202
|
+
|
203
|
+
def close_object(c)
|
204
|
+
return false if whitespace?(c)
|
205
|
+
return false unless @current_node == :object and c == "}"
|
206
|
+
|
207
|
+
end_node
|
208
|
+
@current_state = :awaiting_next_or_close unless @current_node.nil?
|
209
|
+
true
|
210
|
+
end
|
211
|
+
|
212
|
+
# Array: [1, "two", true, undefined, {}, []]
|
213
|
+
def start_array(c)
|
214
|
+
return false unless c == "["
|
215
|
+
|
216
|
+
begin_node(:array)
|
217
|
+
@current_state = :awaiting_array_value
|
218
|
+
true
|
219
|
+
end
|
220
|
+
|
221
|
+
def close_array(c)
|
222
|
+
return false if whitespace?(c)
|
223
|
+
return false unless @current_node == :array and c == "]"
|
224
|
+
|
225
|
+
end_node
|
226
|
+
@current_state = :awaiting_next_or_close unless @current_node.nil?
|
227
|
+
true
|
228
|
+
end
|
229
|
+
|
230
|
+
def start_attribute_key(c)
|
231
|
+
return false unless c == "\""
|
232
|
+
|
233
|
+
begin_node(:string)
|
234
|
+
@current_state = :reading_object_attribute_key
|
235
|
+
true
|
236
|
+
end
|
237
|
+
|
238
|
+
def close_attribute_key(c)
|
239
|
+
return false if @escape_next
|
240
|
+
return false unless c == "\""
|
241
|
+
end_node
|
242
|
+
@current_state = :awaiting_object_colon_separator
|
243
|
+
true
|
244
|
+
end
|
245
|
+
|
246
|
+
# Strings: "Foo"
|
247
|
+
def start_string(c)
|
248
|
+
return false unless c == "\""
|
249
|
+
|
250
|
+
begin_node(:string)
|
251
|
+
@current_state = :reading_string
|
252
|
+
true
|
253
|
+
end
|
254
|
+
|
255
|
+
def close_string(c)
|
256
|
+
return false if @escape_next
|
257
|
+
return false unless c == "\""
|
258
|
+
end_node
|
259
|
+
@current_state = :awaiting_next_or_close
|
260
|
+
true
|
261
|
+
end
|
262
|
+
|
263
|
+
# literals: null, undefined, true, false, NaN, infinity, -123.456e10 -123,456e10
|
264
|
+
def start_literal(c)
|
265
|
+
return false unless valid_literal_char?(c)
|
266
|
+
|
267
|
+
begin_node(:literal)
|
268
|
+
@current_state = :reading_literal
|
269
|
+
@current_literal_size = 1
|
270
|
+
true
|
271
|
+
end
|
272
|
+
|
273
|
+
def close_literal(c)
|
274
|
+
raise JSONParserError, "Literal to large at #{@pos}" if @current_literal_size > MAX_LITERAL_SIZE
|
275
|
+
|
276
|
+
if whitespace?(c) || ENDING_VALUE_CHARS.include?(c)
|
277
|
+
end_node
|
278
|
+
@current_state = :awaiting_next_or_close
|
279
|
+
return true
|
280
|
+
end
|
281
|
+
|
282
|
+
false
|
283
|
+
end
|
284
|
+
|
285
|
+
# Marks the creation of a node (object, array, string or literal)
|
286
|
+
def begin_node(node_type)
|
287
|
+
# Accounting for the new node
|
288
|
+
@execution_stats[node_type] ||= 0
|
289
|
+
@execution_stats[node_type] += 1
|
290
|
+
|
291
|
+
# Managing the node execution stack
|
292
|
+
@parent_nodes.push(@current_node)
|
293
|
+
@current_node = node_type
|
294
|
+
end
|
295
|
+
|
296
|
+
# Marks the closure of a node (object, array, string or literal)
|
297
|
+
def end_node
|
298
|
+
@current_node = @parent_nodes.pop
|
299
|
+
@current_state = :closed if @current_node.nil?
|
300
|
+
end
|
301
|
+
|
302
|
+
def reject_char(char)
|
303
|
+
raise JSONParserError, "Unexpected char #{char} in position #{@pos}"
|
304
|
+
end
|
305
|
+
|
306
|
+
def whitespace?(c)
|
307
|
+
WHITESPACE_CHARS.include?(c)
|
308
|
+
end
|
309
|
+
|
310
|
+
def control_char?(c)
|
311
|
+
# control characters: (U+0000 through U+001F)
|
312
|
+
utf8_code = c.unpack('U*')[0]
|
313
|
+
utf8_code <= 31
|
314
|
+
end
|
315
|
+
|
316
|
+
def valid_literal_char?(c)
|
317
|
+
LITERALS_CHAR_TEMPLATE === c
|
318
|
+
end
|
319
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
class FormatParser::JSONParser
|
2
|
+
include FormatParser::IOUtils
|
3
|
+
require_relative 'json_parser/validator'
|
4
|
+
|
5
|
+
JSON_MIME_TYPE = 'application/json'
|
6
|
+
|
7
|
+
def likely_match?(filename)
|
8
|
+
filename =~ /\.json$/i
|
9
|
+
end
|
10
|
+
|
11
|
+
def call(io)
|
12
|
+
io = FormatParser::IOConstraint.new(io)
|
13
|
+
validator = Validator.new(io)
|
14
|
+
|
15
|
+
validator.validate
|
16
|
+
|
17
|
+
FormatParser::Text.new(
|
18
|
+
format: :json,
|
19
|
+
content_type: JSON_MIME_TYPE,
|
20
|
+
)
|
21
|
+
rescue Validator::JSONParserError
|
22
|
+
nil
|
23
|
+
end
|
24
|
+
FormatParser.register_parser new, natures: :text, formats: :json
|
25
|
+
end
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -76,6 +76,11 @@ class FormatParser::MP3Parser
|
|
76
76
|
io.seek(0)
|
77
77
|
return if TIFF_HEADER_BYTES.include?(safe_read(io, 4))
|
78
78
|
|
79
|
+
# Prevention against parsing WAV files.
|
80
|
+
io.seek(0)
|
81
|
+
wav_chunk_id, _wav_size, wav_riff_type = safe_read(io, 12).unpack('a4la4')
|
82
|
+
return if wav_chunk_id == 'RIFF' || wav_riff_type == 'WAVE'
|
83
|
+
|
79
84
|
# Read all the ID3 tags (or at least attempt to)
|
80
85
|
io.seek(0)
|
81
86
|
id3v1 = ID3Extraction.attempt_id3_v1_extraction(io)
|
@@ -315,5 +320,5 @@ class FormatParser::MP3Parser
|
|
315
320
|
end
|
316
321
|
end
|
317
322
|
|
318
|
-
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority:
|
323
|
+
FormatParser.register_parser new, natures: :audio, formats: :mp3, priority: 101
|
319
324
|
end
|
data/lib/utf8_reader.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
##
|
2
|
+
# This class Reads individual characters from files using UTF-8 encoding
|
3
|
+
# This deals with two main concerns:
|
4
|
+
# - Variable byte length of characters
|
5
|
+
# - Reducing the number of read operations by loading bytes in chunks
|
6
|
+
|
7
|
+
class FormatParser::UTF8Reader
|
8
|
+
READ_CHUNK_SIZE = 128
|
9
|
+
|
10
|
+
class UTF8CharReaderError < StandardError
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(io)
|
14
|
+
@io = io
|
15
|
+
@chunk = ""
|
16
|
+
@index = 0
|
17
|
+
@eof = false
|
18
|
+
end
|
19
|
+
|
20
|
+
def read_char
|
21
|
+
first_byte = read_byte
|
22
|
+
return if first_byte.nil?
|
23
|
+
|
24
|
+
char_length = assess_char_length(first_byte)
|
25
|
+
as_bytes = Array.new(char_length) do |i|
|
26
|
+
next first_byte if i == 0
|
27
|
+
read_byte
|
28
|
+
end
|
29
|
+
|
30
|
+
char = as_bytes.pack('c*').force_encoding('UTF-8')
|
31
|
+
raise UTF8CharReaderError, "Invalid UTF-8 character" unless char.valid_encoding?
|
32
|
+
|
33
|
+
char
|
34
|
+
rescue TypeError
|
35
|
+
raise UTF8CharReaderError, "Invalid UTF-8 character"
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def read_byte
|
41
|
+
manage_data_chunk
|
42
|
+
return if @chunk.nil?
|
43
|
+
byte = @chunk.bytes[@index]
|
44
|
+
@index += 1 unless byte.nil?
|
45
|
+
byte
|
46
|
+
end
|
47
|
+
|
48
|
+
def manage_data_chunk
|
49
|
+
return if @index < @chunk.length
|
50
|
+
@chunk = @io.read(READ_CHUNK_SIZE)
|
51
|
+
@chunk ||= ""
|
52
|
+
@index = 0
|
53
|
+
@eof = true if @chunk.nil? or @chunk.length < READ_CHUNK_SIZE
|
54
|
+
end
|
55
|
+
|
56
|
+
def assess_char_length(first_byte)
|
57
|
+
# 0_______ (1 byte)
|
58
|
+
# 110_____ (2 bytes) 192
|
59
|
+
# 1110____ (3 bytes) 224
|
60
|
+
# 11110___ (4 bytes) 240
|
61
|
+
case first_byte
|
62
|
+
when 240.. then 4
|
63
|
+
when 224..239 then 3
|
64
|
+
when 192..223 then 2
|
65
|
+
else 1
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/spec/format_parser_spec.rb
CHANGED
@@ -34,6 +34,26 @@ describe FormatParser do
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
it "fixtures with 'invalid' in the filename should fail to parse" do
|
38
|
+
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
|
39
|
+
file_name = File.basename(fixture_path)
|
40
|
+
next unless file_name.include? "invalid"
|
41
|
+
File.open(fixture_path, 'rb') do |file|
|
42
|
+
FormatParser.parse(file)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it "fixtures without 'invalid' in the filename should be parsed successfully" do
|
48
|
+
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
|
49
|
+
file_name = File.basename(fixture_path)
|
50
|
+
next if file_name.include? "invalid"
|
51
|
+
File.open(fixture_path, 'rb') do |file|
|
52
|
+
FormatParser.parse(file)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
37
57
|
it 'triggers parsers in a certain order that corresponds to the parser priorities' do
|
38
58
|
file_contents = StringIO.new('a' * 4096)
|
39
59
|
|
@@ -189,12 +209,20 @@ describe FormatParser do
|
|
189
209
|
'FormatParser::CR3Parser',
|
190
210
|
'FormatParser::DPXParser',
|
191
211
|
'FormatParser::FLACParser',
|
192
|
-
'FormatParser::MP3Parser',
|
193
212
|
'FormatParser::OggParser',
|
194
213
|
'FormatParser::TIFFParser',
|
195
|
-
'FormatParser::WAVParser'
|
214
|
+
'FormatParser::WAVParser',
|
215
|
+
'FormatParser::MP3Parser'
|
196
216
|
])
|
197
217
|
end
|
218
|
+
|
219
|
+
it 'ensures that MP3 parser is the last one among all' do
|
220
|
+
natures = FormatParser.registered_natures
|
221
|
+
formats = FormatParser.registered_formats
|
222
|
+
prioritised_parsers = FormatParser.parsers_for(natures, formats)
|
223
|
+
parser_class_names = prioritised_parsers.map { |parser| parser.class.name }
|
224
|
+
expect(parser_class_names.last).to eq 'FormatParser::MP3Parser'
|
225
|
+
end
|
198
226
|
end
|
199
227
|
|
200
228
|
describe '.register_parser and .deregister_parser' do
|
@@ -55,7 +55,7 @@ describe FormatParser::FLACParser do
|
|
55
55
|
end
|
56
56
|
|
57
57
|
it 'raises an error when sample rate is 0' do
|
58
|
-
fpath = fixtures_dir + 'FLAC/
|
58
|
+
fpath = fixtures_dir + 'FLAC/invalid_sample_rate_0.flac'
|
59
59
|
|
60
60
|
expect {
|
61
61
|
subject.call(File.open(fpath, 'rb'))
|
@@ -0,0 +1,321 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::JSONParser::Validator do
|
4
|
+
def load_file(file_name)
|
5
|
+
io = File.open(Pathname.new(fixtures_dir).join('JSON').join(file_name), 'rb')
|
6
|
+
FormatParser::JSONParser::Validator.new(io)
|
7
|
+
end
|
8
|
+
|
9
|
+
def load_string(content)
|
10
|
+
io = StringIO.new(content.encode(Encoding::UTF_8))
|
11
|
+
FormatParser::JSONParser::Validator.new(io)
|
12
|
+
end
|
13
|
+
|
14
|
+
describe 'When reading root nodes' do
|
15
|
+
it "identifies objects as root nodes" do
|
16
|
+
v = load_string '{"key": "value"}'
|
17
|
+
|
18
|
+
completed = v.validate
|
19
|
+
|
20
|
+
expect(completed).to be true
|
21
|
+
expect(v.stats(:object)).to be 1
|
22
|
+
expect(v.stats(:string)).to be 2
|
23
|
+
end
|
24
|
+
|
25
|
+
it "identifies arrays as root nodes" do
|
26
|
+
v = load_string '["e1", "e2"]'
|
27
|
+
|
28
|
+
completed = v.validate
|
29
|
+
|
30
|
+
expect(completed).to be true
|
31
|
+
expect(v.stats(:array)).to be 1
|
32
|
+
expect(v.stats(:string)).to be 2
|
33
|
+
end
|
34
|
+
|
35
|
+
it "rejects strings as root nodes" do
|
36
|
+
expect do
|
37
|
+
v = load_string '"this is a string"'
|
38
|
+
v.validate
|
39
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "rejects literals as root nodes" do
|
43
|
+
expect do
|
44
|
+
v = load_string 'true'
|
45
|
+
v.validate
|
46
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'When reading objects' do
|
51
|
+
it "recognizes empty objects" do
|
52
|
+
v = load_string '{}'
|
53
|
+
|
54
|
+
completed = v.validate
|
55
|
+
expect(completed).to be true
|
56
|
+
expect(v.stats(:object)).to be 1
|
57
|
+
expect(v.stats(:string)).to be 0
|
58
|
+
end
|
59
|
+
|
60
|
+
it "recognizes objects with a single attribute" do
|
61
|
+
v = load_string '{"key": "value"}'
|
62
|
+
|
63
|
+
completed = v.validate
|
64
|
+
expect(completed).to be true
|
65
|
+
expect(v.stats(:object)).to be 1
|
66
|
+
expect(v.stats(:string)).to be 2
|
67
|
+
end
|
68
|
+
|
69
|
+
it "recognizes objects with attributes of different types" do
|
70
|
+
v = load_string '{"k1": "value", "k2": -123.456, "k3": null}'
|
71
|
+
|
72
|
+
completed = v.validate
|
73
|
+
expect(completed).to be true
|
74
|
+
expect(v.stats(:object)).to be 1
|
75
|
+
expect(v.stats(:string)).to be 4
|
76
|
+
expect(v.stats(:literal)).to be 2
|
77
|
+
end
|
78
|
+
|
79
|
+
it "recognizes condensed objects (no whitespaces)" do
|
80
|
+
v = load_string '{"a":"b","c":"d"}'
|
81
|
+
|
82
|
+
completed = v.validate
|
83
|
+
expect(completed).to be true
|
84
|
+
expect(v.stats(:object)).to be 1
|
85
|
+
expect(v.stats(:string)).to be 4
|
86
|
+
end
|
87
|
+
|
88
|
+
it "recognizes formatted objects" do
|
89
|
+
v = load_string '{
|
90
|
+
"a":"b",
|
91
|
+
"c":"d"
|
92
|
+
}'
|
93
|
+
|
94
|
+
completed = v.validate
|
95
|
+
expect(completed).to be true
|
96
|
+
expect(v.stats(:object)).to be 1
|
97
|
+
expect(v.stats(:string)).to be 4
|
98
|
+
end
|
99
|
+
|
100
|
+
it "recognizes objects with nested objects and arrays" do
|
101
|
+
v = load_string '{
|
102
|
+
"a": {
|
103
|
+
"a1": "-",
|
104
|
+
"a2": "-",
|
105
|
+
"a3": {
|
106
|
+
"a3.1": "-"
|
107
|
+
},
|
108
|
+
},
|
109
|
+
"c": [1, null]
|
110
|
+
}'
|
111
|
+
|
112
|
+
completed = v.validate
|
113
|
+
expect(completed).to be true
|
114
|
+
expect(v.stats(:object)).to be 3
|
115
|
+
expect(v.stats(:array)).to be 1
|
116
|
+
expect(v.stats(:string)).to be 9
|
117
|
+
expect(v.stats(:literal)).to be 2
|
118
|
+
end
|
119
|
+
|
120
|
+
it "rejects objects without double-quoted attribute names" do
|
121
|
+
expect do
|
122
|
+
v = load_string '{a:"b",c:"d"}'
|
123
|
+
v.validate
|
124
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
125
|
+
end
|
126
|
+
|
127
|
+
it "rejects objects without comma separators" do
|
128
|
+
expect do
|
129
|
+
v = load_string '{
|
130
|
+
"a":"b"
|
131
|
+
"c":"d"
|
132
|
+
}'
|
133
|
+
v.validate
|
134
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
describe 'When reading arrays' do
|
139
|
+
it "recognizes empty arrays" do
|
140
|
+
v = load_string '[]'
|
141
|
+
|
142
|
+
completed = v.validate
|
143
|
+
expect(completed).to be true
|
144
|
+
expect(v.stats(:array)).to be 1
|
145
|
+
expect(v.stats(:string)).to be 0
|
146
|
+
end
|
147
|
+
|
148
|
+
it "recognizes arrays with a single element" do
|
149
|
+
v = load_string '[{}]'
|
150
|
+
|
151
|
+
completed = v.validate
|
152
|
+
expect(completed).to be true
|
153
|
+
expect(v.stats(:array)).to be 1
|
154
|
+
expect(v.stats(:object)).to be 1
|
155
|
+
end
|
156
|
+
|
157
|
+
it "recognizes arrays with elements of different types" do
|
158
|
+
v = load_string '[{"k1": "value"}, [], "a string", null, -123.456]'
|
159
|
+
|
160
|
+
completed = v.validate
|
161
|
+
expect(completed).to be true
|
162
|
+
expect(v.stats(:array)).to be 2
|
163
|
+
expect(v.stats(:object)).to be 1
|
164
|
+
expect(v.stats(:string)).to be 3
|
165
|
+
expect(v.stats(:literal)).to be 2
|
166
|
+
end
|
167
|
+
|
168
|
+
it "recognizes condensed arrays (no whitespaces)" do
|
169
|
+
v = load_string '["a",2,null,false]'
|
170
|
+
|
171
|
+
completed = v.validate
|
172
|
+
expect(completed).to be true
|
173
|
+
expect(v.stats(:array)).to be 1
|
174
|
+
expect(v.stats(:string)).to be 1
|
175
|
+
expect(v.stats(:literal)).to be 3
|
176
|
+
end
|
177
|
+
|
178
|
+
it "recognizes formatted arrays" do
|
179
|
+
v = load_string '[
|
180
|
+
{
|
181
|
+
"a":"b"
|
182
|
+
},
|
183
|
+
{
|
184
|
+
"c":"d"
|
185
|
+
}
|
186
|
+
]'
|
187
|
+
|
188
|
+
completed = v.validate
|
189
|
+
expect(completed).to be true
|
190
|
+
expect(v.stats(:array)).to be 1
|
191
|
+
expect(v.stats(:object)).to be 2
|
192
|
+
expect(v.stats(:string)).to be 4
|
193
|
+
end
|
194
|
+
|
195
|
+
it "recognizes arrays with nested objects and arrays" do
|
196
|
+
v = load_string '[{
|
197
|
+
"a": {
|
198
|
+
"a1": "-",
|
199
|
+
"a2": "-",
|
200
|
+
"a3": {
|
201
|
+
"a3.1": "-"
|
202
|
+
},
|
203
|
+
},
|
204
|
+
"c": [1, null]
|
205
|
+
},
|
206
|
+
[{ "a": "b" }, { "c":"d" }]
|
207
|
+
]'
|
208
|
+
|
209
|
+
completed = v.validate
|
210
|
+
expect(completed).to be true
|
211
|
+
expect(v.stats(:array)).to be 3
|
212
|
+
expect(v.stats(:object)).to be 5
|
213
|
+
expect(v.stats(:string)).to be 13
|
214
|
+
expect(v.stats(:literal)).to be 2
|
215
|
+
end
|
216
|
+
|
217
|
+
it "rejects arrays without comma separators" do
|
218
|
+
expect do
|
219
|
+
v = load_string '[
|
220
|
+
"abc"
|
221
|
+
"def"
|
222
|
+
]'
|
223
|
+
v.validate
|
224
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
describe 'When reading strings' do
|
229
|
+
it "recognizes regular strings" do
|
230
|
+
v = load_string '["abc", "def", "ghi"]'
|
231
|
+
|
232
|
+
completed = v.validate
|
233
|
+
expect(completed).to be true
|
234
|
+
expect(v.stats(:string)).to be 3
|
235
|
+
end
|
236
|
+
|
237
|
+
it "recognizes strings containing excaped characters" do
|
238
|
+
v = load_string '["ab\"c", "6\\2=3"]'
|
239
|
+
|
240
|
+
completed = v.validate
|
241
|
+
expect(completed).to be true
|
242
|
+
expect(v.stats(:string)).to be 2
|
243
|
+
end
|
244
|
+
|
245
|
+
it "recognizes strings containing UTF8 characters" do
|
246
|
+
v = load_string '["abc😃🐶👀", "😃2🐶3👀"]'
|
247
|
+
|
248
|
+
completed = v.validate
|
249
|
+
expect(completed).to be true
|
250
|
+
expect(v.stats(:string)).to be 2
|
251
|
+
end
|
252
|
+
|
253
|
+
it "recognizes long strings containing UTF8 characters" do
|
254
|
+
v = load_string '["aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀aØڃಚ😁🐶👀"]'
|
255
|
+
|
256
|
+
completed = v.validate
|
257
|
+
expect(completed).to be true
|
258
|
+
expect(v.stats(:string)).to be 1
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
describe 'When reading literals' do
|
263
|
+
it "recognizes numbers" do
|
264
|
+
v = load_string '[1, -2.4, 1.0E+2]'
|
265
|
+
|
266
|
+
completed = v.validate
|
267
|
+
expect(completed).to be true
|
268
|
+
expect(v.stats(:literal)).to be 3
|
269
|
+
end
|
270
|
+
|
271
|
+
it "recognizes boolean values" do
|
272
|
+
v = load_string '[true, false]'
|
273
|
+
|
274
|
+
completed = v.validate
|
275
|
+
expect(completed).to be true
|
276
|
+
expect(v.stats(:literal)).to be 2
|
277
|
+
end
|
278
|
+
|
279
|
+
it "recognizes 'true', 'false' and 'null'" do
|
280
|
+
v = load_string '[true, false, null]'
|
281
|
+
|
282
|
+
completed = v.validate
|
283
|
+
expect(completed).to be true
|
284
|
+
expect(v.stats(:literal)).to be 3
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
describe 'When reading invalid JSON content' do
|
289
|
+
it "rejects truncated JSON content" do
|
290
|
+
expect do
|
291
|
+
v = load_string '[{
|
292
|
+
"a": ["abc","def"],
|
293
|
+
"b": 4'
|
294
|
+
v.validate
|
295
|
+
end.to raise_error(FormatParser::JSONParser::Validator::JSONParserError)
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
describe 'When reading large JSON files' do
|
300
|
+
it "Returns 'false' without throwing errors when the initial chunk of a file is a valid JSON" do
|
301
|
+
v = load_file 'long_file_valid.json'
|
302
|
+
|
303
|
+
completed = v.validate
|
304
|
+
expect(completed).to be false
|
305
|
+
end
|
306
|
+
|
307
|
+
it "Returns 'false' without throwing errors when for long non-formatted JSON files" do
|
308
|
+
v = load_file 'long_file_valid_non_formatted.json'
|
309
|
+
|
310
|
+
completed = v.validate
|
311
|
+
expect(completed).to be false
|
312
|
+
end
|
313
|
+
|
314
|
+
it "Returns 'false' without throwing errors when the initial chunk of a file is a valid JSON even if there's an issue later" do
|
315
|
+
v = load_file 'long_file_malformed.json'
|
316
|
+
|
317
|
+
completed = v.validate
|
318
|
+
expect(completed).to be false
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::JSONParser do
|
4
|
+
MAX_READS = 100
|
5
|
+
|
6
|
+
def load_file(file_name)
|
7
|
+
io = File.open(Pathname.new(fixtures_dir).join('JSON').join(file_name), 'rb')
|
8
|
+
FormatParser::ReadLimiter.new(io, max_reads: MAX_READS)
|
9
|
+
end
|
10
|
+
|
11
|
+
def file_size(file_name)
|
12
|
+
File.size(Pathname.new(fixtures_dir).join('JSON').join(file_name))
|
13
|
+
end
|
14
|
+
|
15
|
+
describe 'When reading objects valid JSON files' do
|
16
|
+
it "identifies JSON files with objects as root nodes" do
|
17
|
+
io = load_file 'object.json'
|
18
|
+
|
19
|
+
parsed = subject.call(io)
|
20
|
+
|
21
|
+
expect(parsed).not_to be_nil
|
22
|
+
expect(parsed.nature).to eq(:text)
|
23
|
+
expect(parsed.format).to eq(:json)
|
24
|
+
expect(parsed.content_type).to eq('application/json')
|
25
|
+
end
|
26
|
+
|
27
|
+
it "identifies JSON files carrying arrays as root nodes" do
|
28
|
+
io = load_file 'array.json'
|
29
|
+
|
30
|
+
parsed = subject.call(io)
|
31
|
+
|
32
|
+
expect(parsed).not_to be_nil
|
33
|
+
expect(parsed.nature).to eq(:text)
|
34
|
+
expect(parsed.format).to eq(:json)
|
35
|
+
expect(parsed.content_type).to eq('application/json')
|
36
|
+
end
|
37
|
+
|
38
|
+
it "identifies formatted JSON files" do
|
39
|
+
io = load_file 'formatted_object_utf8.json'
|
40
|
+
|
41
|
+
parsed = subject.call(io)
|
42
|
+
|
43
|
+
expect(parsed).not_to be_nil
|
44
|
+
expect(parsed.nature).to eq(:text)
|
45
|
+
expect(parsed.format).to eq(:json)
|
46
|
+
expect(parsed.content_type).to eq('application/json')
|
47
|
+
end
|
48
|
+
|
49
|
+
it "identifies files wrapped in whitespace characters" do
|
50
|
+
io = load_file 'whitespaces.json'
|
51
|
+
|
52
|
+
parsed = subject.call(io)
|
53
|
+
|
54
|
+
expect(parsed).not_to be_nil
|
55
|
+
expect(parsed.nature).to eq(:text)
|
56
|
+
expect(parsed.format).to eq(:json)
|
57
|
+
expect(parsed.content_type).to eq('application/json')
|
58
|
+
end
|
59
|
+
|
60
|
+
it "identifies files with nested objects and arrays" do
|
61
|
+
io = load_file 'nested_objects.json'
|
62
|
+
|
63
|
+
parsed = subject.call(io)
|
64
|
+
|
65
|
+
expect(parsed).not_to be_nil
|
66
|
+
expect(parsed.nature).to eq(:text)
|
67
|
+
expect(parsed.format).to eq(:json)
|
68
|
+
expect(parsed.content_type).to eq('application/json')
|
69
|
+
end
|
70
|
+
|
71
|
+
it "is reads the whole content of small files before accepting them" do
|
72
|
+
file_name = 'nested_objects.json'
|
73
|
+
io = load_file file_name
|
74
|
+
file_size = file_size file_name
|
75
|
+
|
76
|
+
parsed = subject.call(io)
|
77
|
+
|
78
|
+
expect(parsed).not_to be_nil
|
79
|
+
expect(parsed.nature).to eq(:text)
|
80
|
+
expect(parsed.format).to eq(:json)
|
81
|
+
expect(parsed.content_type).to eq('application/json')
|
82
|
+
expect(io.bytes).to be >= file_size
|
83
|
+
end
|
84
|
+
|
85
|
+
it "is accepts long files before reading the whole content" do
|
86
|
+
file_name = 'long_array_numbers.json'
|
87
|
+
io = load_file file_name
|
88
|
+
file_size = file_size file_name
|
89
|
+
|
90
|
+
parsed = subject.call(io)
|
91
|
+
|
92
|
+
expect(parsed).not_to be_nil
|
93
|
+
expect(parsed.nature).to eq(:text)
|
94
|
+
expect(parsed.format).to eq(:json)
|
95
|
+
expect(parsed.content_type).to eq('application/json')
|
96
|
+
expect(io.bytes).to be < file_size
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
describe 'When reading objects invalid JSON files' do
|
101
|
+
it "rejects files with corrupted JSON data" do
|
102
|
+
io = load_file 'invalid_malformed.json'
|
103
|
+
|
104
|
+
parsed = subject.call(io)
|
105
|
+
|
106
|
+
expect(parsed).to be_nil
|
107
|
+
end
|
108
|
+
|
109
|
+
it "rejects invalid files early without reading the whole content" do
|
110
|
+
io = load_file 'invalid_lorem_ipsum.json'
|
111
|
+
|
112
|
+
parsed = subject.call(io)
|
113
|
+
|
114
|
+
expect(parsed).to be_nil
|
115
|
+
expect(io.reads).to eq(1)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
@@ -11,7 +11,7 @@ describe FormatParser::M3UParser do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
describe 'an m3u file with missing header' do
|
14
|
-
let(:m3u_file) { '
|
14
|
+
let(:m3u_file) { 'invalid_plain_text.m3u' }
|
15
15
|
|
16
16
|
it 'does not parse the file successfully' do
|
17
17
|
expect(parsed_m3u).to be_nil
|
@@ -36,6 +36,12 @@ describe FormatParser::MP3Parser do
|
|
36
36
|
expect(parsed).to be_nil
|
37
37
|
end
|
38
38
|
|
39
|
+
it 'does not misdetect a WAV' do
|
40
|
+
fpath = fixtures_dir + '/WAV/c_SCAM_MIC_SOL001_RUN001.wav'
|
41
|
+
parsed = subject.call(File.open(fpath, 'rb'))
|
42
|
+
expect(parsed).to be_nil
|
43
|
+
end
|
44
|
+
|
39
45
|
describe 'title/artist/album attributes' do
|
40
46
|
let(:parsed) { subject.call(File.open(fpath, 'rb')) }
|
41
47
|
|
@@ -13,7 +13,7 @@ describe FormatParser::OggParser do
|
|
13
13
|
end
|
14
14
|
|
15
15
|
it 'skips a file if it contains more than MAX_POSSIBLE_OGG_PAGE_SIZE bytes of garbage at the end' do
|
16
|
-
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/
|
16
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/invalid_with_garbage_at_the_end.ogg', 'rb'))
|
17
17
|
expect(parse_result).to be_nil
|
18
18
|
end
|
19
19
|
|
@@ -46,17 +46,17 @@ describe FormatParser::PDFParser do
|
|
46
46
|
|
47
47
|
describe 'broken PDF files should not parse' do
|
48
48
|
it 'PDF with missing version header' do
|
49
|
-
parsed_pdf = parse_pdf '
|
49
|
+
parsed_pdf = parse_pdf 'invalid_not_a.pdf'
|
50
50
|
expect(parsed_pdf).to be_nil
|
51
51
|
end
|
52
52
|
|
53
53
|
it 'PDF 2.0 with offset start' do
|
54
|
-
parsed_pdf = parse_pdf 'PDF 2.0 with offset start.pdf'
|
54
|
+
parsed_pdf = parse_pdf 'invalid PDF 2.0 with offset start.pdf'
|
55
55
|
expect(parsed_pdf).to be_nil
|
56
56
|
end
|
57
57
|
|
58
58
|
it 'exceeds the PDF read limit' do
|
59
|
-
parsed_pdf = parse_pdf '
|
59
|
+
parsed_pdf = parse_pdf 'invalid_exceed_PDF_read_limit.pdf'
|
60
60
|
expect(parsed_pdf).to be_nil
|
61
61
|
end
|
62
62
|
end
|
@@ -48,7 +48,7 @@ describe FormatParser::WAVParser do
|
|
48
48
|
|
49
49
|
it "cannot parse file with audio format different from 1 and no 'fact' chunk" do
|
50
50
|
expect {
|
51
|
-
subject.call(File.open(__dir__ + '/../fixtures/WAV/
|
51
|
+
subject.call(File.open(__dir__ + '/../fixtures/WAV/invalid_d_6_Channel_ID.wav', 'rb'))
|
52
52
|
}.to raise_error(FormatParser::IOUtils::InvalidRead)
|
53
53
|
end
|
54
54
|
end
|
@@ -7,7 +7,7 @@ describe FormatParser::WebpParser do
|
|
7
7
|
end
|
8
8
|
|
9
9
|
it 'does not parse files with an unrecognised variant' do
|
10
|
-
result = subject.call(File.open(fixtures_dir + 'WEBP/unrecognised-variant.webp', 'rb'))
|
10
|
+
result = subject.call(File.open(fixtures_dir + 'WEBP/invalid-unrecognised-variant.webp', 'rb'))
|
11
11
|
expect(result).to be_nil
|
12
12
|
end
|
13
13
|
|
@@ -104,6 +104,43 @@ describe 'Fetching data from HTTP remotes' do
|
|
104
104
|
expect(file_information.format).to eq(:png)
|
105
105
|
end
|
106
106
|
|
107
|
+
describe 'correctly parses WAV files without falling back to another filetype' do
|
108
|
+
['c_8kmp316.wav', 'c_SCAM_MIC_SOL001_RUN001.wav'].each do |filename|
|
109
|
+
it "parses WAV file #{filename}" do
|
110
|
+
remote_url = 'http://localhost:9399/WAV/' + filename
|
111
|
+
file_information = FormatParser.parse_http(remote_url)
|
112
|
+
expect(file_information).not_to be_nil
|
113
|
+
expect(file_information.format).to eq(:wav)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "correctly parses files over HTTP without filename hint" do
|
119
|
+
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
|
120
|
+
file_name = File.basename(fixture_path)
|
121
|
+
next if file_name.include? "invalid"
|
122
|
+
|
123
|
+
file_type_dir = fixture_path.delete_prefix(fixtures_dir).delete_suffix(file_name)
|
124
|
+
file_type_dir.delete_prefix!('/').delete_suffix!('/')
|
125
|
+
next if file_type_dir.empty?
|
126
|
+
|
127
|
+
# skipping this one because it's a special case
|
128
|
+
next if file_name == "arch_many_entries.zip"
|
129
|
+
|
130
|
+
it "parses #{file_type_dir} file: #{file_name}" do
|
131
|
+
url = "http://localhost:9399/#{file_type_dir}/#{file_name}?some_param=test".gsub(' ', '%20')
|
132
|
+
result_with_hint = FormatParser.parse_http(url, filename_hint: file_name)
|
133
|
+
result_no_hint = FormatParser.parse_http(url)
|
134
|
+
|
135
|
+
expect(result_with_hint).not_to be_nil
|
136
|
+
expect(result_no_hint).not_to be_nil
|
137
|
+
|
138
|
+
expect(result_no_hint.nature).to eq(result_with_hint.nature)
|
139
|
+
expect(result_no_hint.format).to eq(result_with_hint.format)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
107
144
|
describe 'when parsing remote fixtures' do
|
108
145
|
Dir.glob(fixtures_dir + '/**/*.*').sort.each do |fixture_path|
|
109
146
|
filename = File.basename(fixture_path)
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
8
8
|
- Julik Tarkhanov
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2023-
|
12
|
+
date: 2023-08-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: exifr
|
@@ -236,6 +236,8 @@ files:
|
|
236
236
|
- lib/parsers/iso_base_media_file_format/decoder.rb
|
237
237
|
- lib/parsers/iso_base_media_file_format/utils.rb
|
238
238
|
- lib/parsers/jpeg_parser.rb
|
239
|
+
- lib/parsers/json_parser.rb
|
240
|
+
- lib/parsers/json_parser/validator.rb
|
239
241
|
- lib/parsers/m3u_parser.rb
|
240
242
|
- lib/parsers/mov_parser.rb
|
241
243
|
- lib/parsers/mov_parser/decoder.rb
|
@@ -260,6 +262,7 @@ files:
|
|
260
262
|
- lib/remote_io.rb
|
261
263
|
- lib/string.rb
|
262
264
|
- lib/text.rb
|
265
|
+
- lib/utf8_reader.rb
|
263
266
|
- lib/video.rb
|
264
267
|
- spec/active_storage/blob_io_spec.rb
|
265
268
|
- spec/active_storage/rails_app_spec.rb
|
@@ -289,6 +292,8 @@ files:
|
|
289
292
|
- spec/parsers/iso_base_media_file_format/decoder_spec.rb
|
290
293
|
- spec/parsers/iso_base_media_file_format/utils_spec.rb
|
291
294
|
- spec/parsers/jpeg_parser_spec.rb
|
295
|
+
- spec/parsers/json_parser/validator_spec.rb
|
296
|
+
- spec/parsers/json_parser_spec.rb
|
292
297
|
- spec/parsers/m3u_parser_spec.rb
|
293
298
|
- spec/parsers/mov_parser_spec.rb
|
294
299
|
- spec/parsers/mp3_parser_spec.rb
|
@@ -314,7 +319,7 @@ licenses:
|
|
314
319
|
- MIT (Hippocratic)
|
315
320
|
metadata:
|
316
321
|
allowed_push_host: https://rubygems.org
|
317
|
-
post_install_message:
|
322
|
+
post_install_message:
|
318
323
|
rdoc_options: []
|
319
324
|
require_paths:
|
320
325
|
- lib
|
@@ -329,8 +334,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
329
334
|
- !ruby/object:Gem::Version
|
330
335
|
version: '0'
|
331
336
|
requirements: []
|
332
|
-
rubygems_version: 3.
|
333
|
-
signing_key:
|
337
|
+
rubygems_version: 3.1.6
|
338
|
+
signing_key:
|
334
339
|
specification_version: 4
|
335
340
|
summary: A library for efficient parsing of file metadata
|
336
341
|
test_files: []
|