format_parser 0.3.3 → 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/format_parser.gemspec +1 -1
- data/lib/format_parser.rb +22 -19
- data/lib/format_parser/version.rb +1 -1
- data/lib/io_constraint.rb +0 -1
- data/lib/parsers/exif_parser.rb +16 -2
- data/lib/read_limiter.rb +6 -0
- data/spec/format_parser_spec.rb +20 -0
- data/spec/parsers/exif_parser_spec.rb +26 -0
- data/spec/read_limiter_spec.rb +10 -0
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99c08f5e2482e524cf7b6c54eb4de371bbf1a107
|
4
|
+
data.tar.gz: 3c33f9336c6ec7479b03102f8dbc4ab8610a6420
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b9892a12a6007744c70baa01200d432b40868d6f8d854b506dea7349ef84282133f49ab244a9d383b7772221d29dc950ae07c604b57247784f42aae74a03474b
|
7
|
+
data.tar.gz: 643b274aa45ef8a11dde7e178d03036488767ba56f4e4680edf647ebb5098aff52c08bcecc9578c73bd6835aa235ac31358032806e0c28c5c113d8edad9734e1
|
data/format_parser.gemspec
CHANGED
@@ -39,5 +39,5 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.add_development_dependency 'simplecov', '~> 0.15'
|
40
40
|
spec.add_development_dependency 'pry', '~> 0.11'
|
41
41
|
spec.add_development_dependency 'yard', '~> 0.9'
|
42
|
-
spec.add_development_dependency 'wetransfer_style', '0.
|
42
|
+
spec.add_development_dependency 'wetransfer_style', '0.5.0'
|
43
43
|
end
|
data/lib/format_parser.rb
CHANGED
@@ -13,9 +13,11 @@ module FormatParser
|
|
13
13
|
require_relative 'care'
|
14
14
|
|
15
15
|
PARSER_MUX = Mutex.new
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
|
17
|
+
MAX_BYTES_READ_PER_PARSER = 512 * 1024
|
18
|
+
MAX_READS_PER_PARSER = 64 * 1024
|
19
|
+
MAX_SEEKS_PER_PARSER = 64 * 1024
|
20
|
+
MAX_CACHE_PAGE_FAULTS_PER_PARSER = 4
|
19
21
|
|
20
22
|
def self.register_parser(callable_or_responding_to_new, formats:, natures:)
|
21
23
|
parser_provided_formats = Array(formats)
|
@@ -46,24 +48,17 @@ module FormatParser
|
|
46
48
|
end
|
47
49
|
|
48
50
|
def self.parse_http(url, **kwargs)
|
49
|
-
|
50
|
-
cached_io = Care::IOWrapper.new(remote_io)
|
51
|
-
|
52
|
-
# Prefetch the first page, since it is very likely to be touched
|
53
|
-
# by all parsers anyway. Additionally, when using RemoteIO we need
|
54
|
-
# to explicitly obtain the size of the resource, which is only available
|
55
|
-
# after having performed at least one successful GET - at least on S3
|
56
|
-
cached_io.read(1)
|
57
|
-
cached_io.seek(0)
|
58
|
-
|
59
|
-
parse(cached_io, **kwargs)
|
51
|
+
parse(RemoteIO.new(url), **kwargs)
|
60
52
|
end
|
61
53
|
|
62
54
|
# Return all by default
|
63
55
|
def self.parse(io, natures: @parsers_per_nature.keys, formats: @parsers_per_format.keys, results: :first)
|
64
|
-
#
|
65
|
-
#
|
66
|
-
|
56
|
+
# Limit the number of cached _pages_ we may fetch. This allows us to limit the number
|
57
|
+
# of page faults (page cache misses) a parser may incur
|
58
|
+
read_limiter_under_cache = FormatParser::ReadLimiter.new(io, max_reads: MAX_CACHE_PAGE_FAULTS_PER_PARSER)
|
59
|
+
|
60
|
+
# Then configure a layer of caching on top of that
|
61
|
+
cached_io = Care::IOWrapper.new(read_limiter_under_cache)
|
67
62
|
|
68
63
|
# How many results has the user asked for? Used to determinate whether an array
|
69
64
|
# is returned or not.
|
@@ -83,9 +78,15 @@ module FormatParser
|
|
83
78
|
|
84
79
|
results = parsers.lazy.map do |parser|
|
85
80
|
# We need to rewind for each parser, anew
|
86
|
-
|
81
|
+
cached_io.seek(0)
|
82
|
+
|
83
|
+
# ...and we have to reset the cache page fault limit, each parser is allowed to cause N page faults
|
84
|
+
# - i.e. this is not a shared limit
|
85
|
+
read_limiter_under_cache.reset_limits!
|
86
|
+
|
87
87
|
# Limit how many operations the parser can perform
|
88
|
-
limited_io = ReadLimiter.new(
|
88
|
+
limited_io = ReadLimiter.new(cached_io, max_bytes: MAX_BYTES_READ_PER_PARSER, max_reads: MAX_READS_PER_PARSER, max_seeks: MAX_SEEKS_PER_PARSER)
|
89
|
+
|
89
90
|
begin
|
90
91
|
parser.call(limited_io)
|
91
92
|
rescue IOUtils::InvalidRead
|
@@ -95,6 +96,8 @@ module FormatParser
|
|
95
96
|
# The parser tried to read too much - most likely the file structure
|
96
97
|
# caused the parser to go off-track. Strictly speaking we should log this
|
97
98
|
# and examine the file more closely.
|
99
|
+
# Or the parser caused too many cache pages to be fetched, which likely means we should not allow
|
100
|
+
# it to continue
|
98
101
|
end
|
99
102
|
end.reject(&:nil?).take(amount)
|
100
103
|
|
data/lib/io_constraint.rb
CHANGED
data/lib/parsers/exif_parser.rb
CHANGED
@@ -5,8 +5,8 @@ require 'delegate'
|
|
5
5
|
class FormatParser::EXIFParser
|
6
6
|
include FormatParser::IOUtils
|
7
7
|
|
8
|
-
# EXIFR kindly requests the presence of
|
9
|
-
#
|
8
|
+
# EXIFR kindly requests the presence of a few more methods than what our IOConstraint
|
9
|
+
# is willing to provide, but they can be derived from the available ones
|
10
10
|
class IOExt < SimpleDelegator
|
11
11
|
def readbyte
|
12
12
|
if byte = read(1)
|
@@ -14,6 +14,20 @@ class FormatParser::EXIFParser
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
+
def seek(n, seek_mode = IO::SEEK_SET)
|
18
|
+
io = __getobj__
|
19
|
+
case seek_mode
|
20
|
+
when IO::SEEK_SET
|
21
|
+
io.seek(n)
|
22
|
+
when IO::SEEK_CUR
|
23
|
+
io.seek(io.pos + n)
|
24
|
+
when IO::SEEK_END
|
25
|
+
io.seek(io.size + n)
|
26
|
+
else
|
27
|
+
raise Errno::EINVAL
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
17
31
|
alias_method :getbyte, :readbyte
|
18
32
|
end
|
19
33
|
|
data/lib/read_limiter.rb
CHANGED
data/spec/format_parser_spec.rb
CHANGED
@@ -21,6 +21,26 @@ describe FormatParser do
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
it 'fails gracefully when a parser module reads more and more causing page faults and prevents too many reads on the source' do
|
25
|
+
exploit = ->(io) {
|
26
|
+
loop {
|
27
|
+
skip = 16 * 1024
|
28
|
+
io.read(1)
|
29
|
+
io.seek(io.pos + skip)
|
30
|
+
}
|
31
|
+
}
|
32
|
+
FormatParser.register_parser exploit, natures: :document, formats: :exploit
|
33
|
+
|
34
|
+
sample_io = StringIO.new(Random.new.bytes(1024 * 1024 * 4))
|
35
|
+
|
36
|
+
expect(sample_io).to receive(:read).at_most(4).times.and_call_original
|
37
|
+
|
38
|
+
result = FormatParser.parse(sample_io, formats: [:exploit])
|
39
|
+
expect(result).to be_nil
|
40
|
+
|
41
|
+
FormatParser.deregister_parser(exploit)
|
42
|
+
end
|
43
|
+
|
24
44
|
describe 'multiple values return' do
|
25
45
|
let(:blob) { StringIO.new(Random.new.bytes(512 * 1024)) }
|
26
46
|
let(:audio) { FormatParser::Audio.new(format: :aiff, num_audio_channels: 1) }
|
@@ -30,4 +30,30 @@ describe FormatParser::EXIFParser do
|
|
30
30
|
end
|
31
31
|
end
|
32
32
|
end
|
33
|
+
|
34
|
+
describe 'IOExt' do
|
35
|
+
it 'supports readbyte' do
|
36
|
+
io = FormatParser::EXIFParser::IOExt.new(StringIO.new('hello'))
|
37
|
+
expect(io.readbyte).to eq(104)
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'supports getbyte' do
|
41
|
+
io = FormatParser::EXIFParser::IOExt.new(StringIO.new('hello'))
|
42
|
+
expect(io.getbyte).to eq(104)
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'supports seek modes' do
|
46
|
+
io = FormatParser::EXIFParser::IOExt.new(StringIO.new('hello'))
|
47
|
+
io.seek(1, IO::SEEK_SET)
|
48
|
+
|
49
|
+
io.seek(1, IO::SEEK_CUR)
|
50
|
+
expect(io.read(1)).to eq('l')
|
51
|
+
|
52
|
+
io.seek(-1, IO::SEEK_END)
|
53
|
+
expect(io.read(1)).to eq('o')
|
54
|
+
|
55
|
+
io.seek(1)
|
56
|
+
expect(io.read(1)).to eq('e')
|
57
|
+
end
|
58
|
+
end
|
33
59
|
end
|
data/spec/read_limiter_spec.rb
CHANGED
@@ -41,4 +41,14 @@ describe FormatParser::ReadLimiter do
|
|
41
41
|
reader.read(1)
|
42
42
|
}.to raise_error(/bytes budget \(512\) exceeded/)
|
43
43
|
end
|
44
|
+
|
45
|
+
it 'can be reset!' do
|
46
|
+
reader = FormatParser::ReadLimiter.new(io, max_bytes: 512)
|
47
|
+
reader.read(512)
|
48
|
+
expect {
|
49
|
+
reader.read(1)
|
50
|
+
}.to raise_error(/budget/)
|
51
|
+
reader.reset_limits!
|
52
|
+
reader.read(1)
|
53
|
+
end
|
44
54
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-
|
12
|
+
date: 2018-03-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -129,14 +129,14 @@ dependencies:
|
|
129
129
|
requirements:
|
130
130
|
- - '='
|
131
131
|
- !ruby/object:Gem::Version
|
132
|
-
version: 0.
|
132
|
+
version: 0.5.0
|
133
133
|
type: :development
|
134
134
|
prerelease: false
|
135
135
|
version_requirements: !ruby/object:Gem::Requirement
|
136
136
|
requirements:
|
137
137
|
- - '='
|
138
138
|
- !ruby/object:Gem::Version
|
139
|
-
version: 0.
|
139
|
+
version: 0.5.0
|
140
140
|
description: |-
|
141
141
|
A Ruby library for prying open files you can convert to a previewable format, such as video, image and audio files. It includes
|
142
142
|
a number of parser modules that try to recover metadata useful for post-processing and layout while reading the absolute
|
@@ -230,7 +230,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
230
230
|
version: '0'
|
231
231
|
requirements: []
|
232
232
|
rubyforge_project:
|
233
|
-
rubygems_version: 2.
|
233
|
+
rubygems_version: 2.6.11
|
234
234
|
signing_key:
|
235
235
|
specification_version: 4
|
236
236
|
summary: A library for efficient parsing of file metadata
|