format_parser 0.13.6 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +10 -1
- data/format_parser.gemspec +2 -1
- data/lib/care.rb +2 -2
- data/lib/format_parser.rb +8 -7
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/exif_parser.rb +4 -2
- data/lib/parsers/jpeg_parser.rb +2 -2
- data/lib/parsers/moov_parser.rb +3 -1
- data/lib/parsers/mp3_parser.rb +2 -5
- data/lib/parsers/ogg_parser.rb +218 -0
- data/lib/parsers/pdf_parser.rb +1 -56
- data/lib/read_limiter.rb +3 -3
- data/lib/remote_io.rb +3 -3
- data/spec/format_parser_spec.rb +4 -0
- data/spec/parsers/mp3_parser_spec.rb +1 -1
- data/spec/parsers/ogg_parser_spec.rb +28 -0
- data/spec/parsers/pdf_parser_spec.rb +1 -21
- metadata +24 -4
- data/lib/measurometer.rb +0 -100
- data/spec/measurometer_spec.rb +0 -48
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d9daf0603ac099b75a9ddb85b8061190dc8fe2f3aad46633ff94b1f4a99020e
|
4
|
+
data.tar.gz: d4343aa08f9ec6a6864a5aed3b7ea174f779c3ea0954417e9b31a2c871126f27
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d63a7d9802157e35260b91f5a8077008fa4e7c19837cc4a01ff0d3954fa22d29455a13cb914bc892782d986a7d85b9465eb21b8c753d16c0da774ddb0f5c47c
|
7
|
+
data.tar.gz: 651d41396efdeb4e9f74173d284617baa0bf34f3ce6e8a6890a1ceadf2ef4a3556a3dd0b4b282dc84ea9f65c020ecbe1ae942da78eed85bed6d969161dbb0cbb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.14.0
|
2
|
+
* PDF: Reduce the PDF parser to the basic binary detection (PDF/not PDF) until we have a better/more robust PDF parser
|
3
|
+
* MP3: Fix the byte length of MPEG frames calculation to correctly account for ID3V1 and ID3V2 instead of ID3V1 twice
|
4
|
+
* MP3: Remove the workaround for `id3tag` choking on non-matching genre strings (bumps dependency on `id3tag`)
|
5
|
+
* Use Measurometer provided by the [measurometer gem](https://rubygems.org/gems/measurometer)
|
6
|
+
* Ogg: Add support for the Ogg format
|
7
|
+
|
1
8
|
## 0.13.6
|
2
9
|
* Make all reads in the MOOV decoder strict - fail early if reads are improperly sized
|
3
10
|
* Disable parsing for `udta` atoms in MP4/MOV since we do not have a good way of parsing them yet
|
data/README.md
CHANGED
@@ -30,12 +30,13 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
30
30
|
* M4A
|
31
31
|
* ZIP
|
32
32
|
* DOCX, PPTX, XLSX
|
33
|
+
* OGG
|
33
34
|
|
34
35
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
35
36
|
|
36
37
|
## Basic usage
|
37
38
|
|
38
|
-
Pass an IO object that responds to `read` and `
|
39
|
+
Pass an IO object that responds to `read`, `seek` and `size` to `FormatParser.parse` and the first confirmed match will be returned.
|
39
40
|
|
40
41
|
```ruby
|
41
42
|
match = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
@@ -107,6 +108,11 @@ Therefore we adapt the following approaches:
|
|
107
108
|
is easier to verify and test, and we likely don't care about all the metadata anyway
|
108
109
|
* Avoid using C libraries which are likely to contain buffer overflows/underflows - we stay memory safe
|
109
110
|
|
111
|
+
## Acknowledgements
|
112
|
+
|
113
|
+
We are incredibly grateful to Remco van't Veer for [exifr](https://github.com/remvee/exifr) and to
|
114
|
+
Krists Ozols for [id3tag](https://github.com/krists/id3tag) that we are using for crucial tasks.
|
115
|
+
|
110
116
|
## Fixture Sources
|
111
117
|
|
112
118
|
Unless specified otherwise in this section the fixture files are MIT licensed and from the FastImage and Dimensions projects.
|
@@ -145,6 +151,9 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
145
151
|
- atc_fixture_vbr.flac is a converted version of the MP3 with the same name
|
146
152
|
- c_11k16btipcm.flac is a converted version of the WAV with the same name
|
147
153
|
|
154
|
+
### OGG
|
155
|
+
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `with_garbage_at_the_end.ogg` have been generated by the project contributors
|
156
|
+
|
148
157
|
### M4A
|
149
158
|
- fixture.m4a was created by one of the project maintainers and is MIT licensed
|
150
159
|
|
data/format_parser.gemspec
CHANGED
@@ -32,8 +32,9 @@ Gem::Specification.new do |spec|
|
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0.1'
|
34
34
|
spec.add_dependency 'exifr', '~> 1.0'
|
35
|
-
spec.add_dependency 'id3tag', '~> 0.10'
|
35
|
+
spec.add_dependency 'id3tag', '~> 0.10', '>= 0.10.1'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
|
+
spec.add_dependency 'measurometer', '~> 1'
|
37
38
|
|
38
39
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
39
40
|
spec.add_development_dependency 'rake', '~> 12'
|
data/lib/care.rb
CHANGED
@@ -173,10 +173,10 @@ class Care
|
|
173
173
|
# @param io[IO] the IO to read from
|
174
174
|
# @param page_i[Integer] which page (zero-based) to read
|
175
175
|
def read_page(io, page_i)
|
176
|
-
|
176
|
+
Measurometer.increment_counter('format_parser.parser.Care.page_reads_from_upsteam', 1)
|
177
177
|
|
178
178
|
io.seek(page_i * @page_size)
|
179
|
-
read_result = io.read(@page_size)
|
179
|
+
read_result = Measurometer.instrument('format_parser.Care.read_page') { io.read(@page_size) }
|
180
180
|
if read_result.nil?
|
181
181
|
# If the read went past the end of the IO the read result will be nil,
|
182
182
|
# so we know our IO is exhausted here
|
data/lib/format_parser.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'set'
|
2
|
+
require 'measurometer'
|
2
3
|
|
3
4
|
# A pretty nimble module for parsing file metadata using partial reads. Contains all the
|
4
5
|
# top-level methods of the library.
|
@@ -17,6 +18,10 @@ module FormatParser
|
|
17
18
|
require_relative 'io_constraint'
|
18
19
|
require_relative 'care'
|
19
20
|
|
21
|
+
# Define Measurometer in the internal namespace as well
|
22
|
+
# so that we stay compatible for the applications that use it
|
23
|
+
const_set(:Measurometer, ::Measurometer)
|
24
|
+
|
20
25
|
# Is used to manage access to the shared array of parser constructors, which might
|
21
26
|
# potentially be mutated from different threads. The mutex won't be hit too often
|
22
27
|
# since it only locks when adding/removing parsers.
|
@@ -95,10 +100,9 @@ module FormatParser
|
|
95
100
|
# @param formats[Array] an array of file formats to scope the parsing to.
|
96
101
|
# For example `[:jpg, :tif]` will scope the parsing to TIFF and JPEG files.
|
97
102
|
# The default value is "all formats known to FormatParser"
|
98
|
-
# @param results[:first, :all
|
99
|
-
# is ambiguous. The default is `:first` which returns the first matching result.
|
100
|
-
#
|
101
|
-
# at most N results.
|
103
|
+
# @param results[:first, :all] one of the values defining how many results to return if parsing
|
104
|
+
# is ambiguous. The default is `:first` which returns the first matching result. `:all` will return all results.
|
105
|
+
# When using `:first` parsing will stop at the first successful match and other parsers won't run.
|
102
106
|
# @param limits_config[ReadLimitsConfig] the configuration object for various read/cache limits. The default
|
103
107
|
# one should be good for most cases.
|
104
108
|
# @return [Array<Result>, Result, nil] either an Array of results, a single parsing result or `nil`if
|
@@ -251,7 +255,4 @@ module FormatParser
|
|
251
255
|
Dir.glob(__dir__ + '/parsers/*.rb').sort.each do |parser_file|
|
252
256
|
require parser_file
|
253
257
|
end
|
254
|
-
# The Measurometer latches itself onto existing classes, so load it after
|
255
|
-
# we have loaded all the parsers
|
256
|
-
require_relative 'measurometer'
|
257
258
|
end
|
data/lib/parsers/exif_parser.rb
CHANGED
@@ -74,7 +74,9 @@ module FormatParser::EXIFParser
|
|
74
74
|
EXIFR.logger = Logger.new(nil)
|
75
75
|
|
76
76
|
def exif_from_tiff_io(constrained_io)
|
77
|
-
|
78
|
-
|
77
|
+
Measurometer.instrument('format_parser.EXIFParser.exif_from_tiff_io') do
|
78
|
+
raw_exif_data = EXIFR::TIFF.new(IOExt.new(constrained_io))
|
79
|
+
raw_exif_data ? EXIFResult.new(raw_exif_data) : nil
|
80
|
+
end
|
79
81
|
end
|
80
82
|
end
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -60,7 +60,7 @@ class FormatParser::JPEGParser
|
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
63
|
-
|
63
|
+
Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_read_until_capture', @buf.pos)
|
64
64
|
|
65
65
|
# Return at the earliest possible opportunity
|
66
66
|
if @width && @height
|
@@ -137,7 +137,7 @@ class FormatParser::JPEGParser
|
|
137
137
|
# ...and only then read the marker contents and parse it as EXIF
|
138
138
|
exif_data = safe_read(@buf, app1_frame_content_length - EXIF_MAGIC_STRING.bytesize)
|
139
139
|
|
140
|
-
|
140
|
+
Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_sent_to_exif_parser', exif_data.bytesize)
|
141
141
|
|
142
142
|
@exif_data = exif_from_tiff_io(StringIO.new(exif_data))
|
143
143
|
rescue EXIFR::MalformedTIFF
|
data/lib/parsers/moov_parser.rb
CHANGED
@@ -27,7 +27,9 @@ class FormatParser::MOOVParser
|
|
27
27
|
# size that gets parsed just before.
|
28
28
|
max_read_offset = 0xFFFFFFFF
|
29
29
|
decoder = Decoder.new
|
30
|
-
atom_tree =
|
30
|
+
atom_tree = Measurometer.instrument('format_parser.Decoder.extract_atom_stream') do
|
31
|
+
decoder.extract_atom_stream(io, max_read_offset)
|
32
|
+
end
|
31
33
|
|
32
34
|
ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
|
33
35
|
file_type = ftyp_atom.field_value(:major_brand)
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -43,10 +43,7 @@ class FormatParser::MP3Parser
|
|
43
43
|
def to_h
|
44
44
|
tag = __getobj__
|
45
45
|
MEMBERS.each_with_object({}) do |k, h|
|
46
|
-
|
47
|
-
# If this guard is removed, it fails when trying to do a gsub on a nil,
|
48
|
-
# in /lib/id3tag/frames/v2/genre_frame/genre_parser_pre_24.rb:25:in `just_genres'
|
49
|
-
value = tag.public_send(k) rescue nil
|
46
|
+
value = tag.public_send(k)
|
50
47
|
h[k] = value if value
|
51
48
|
end
|
52
49
|
end
|
@@ -74,7 +71,7 @@ class FormatParser::MP3Parser
|
|
74
71
|
# Compute how many bytes are occupied by the actual MPEG frames
|
75
72
|
ignore_bytes_at_tail = id3v1 ? 128 : 0
|
76
73
|
ignore_bytes_at_head = io.pos
|
77
|
-
bytes_used_by_frames = io.size -
|
74
|
+
bytes_used_by_frames = io.size - ignore_bytes_at_head - ignore_bytes_at_tail
|
78
75
|
|
79
76
|
io.seek(ignore_bytes_at_head)
|
80
77
|
|
@@ -0,0 +1,218 @@
|
|
1
|
+
# https://xiph.org/vorbis/doc/Vorbis_I_spec.pdf
|
2
|
+
# https://en.wikipedia.org/wiki/Ogg#Page_structure
|
3
|
+
class FormatParser::OggParser
|
4
|
+
include FormatParser::IOUtils
|
5
|
+
|
6
|
+
# Maximum size of an Ogg page
|
7
|
+
MAX_POSSIBLE_PAGE_SIZE = 65307
|
8
|
+
|
9
|
+
def call(io)
|
10
|
+
# The format consists of chunks of data each called an "Ogg page". Each page
|
11
|
+
# begins with the characters, "OggS", to identify the file as Ogg format.
|
12
|
+
capture_pattern = safe_read(io, 4)
|
13
|
+
return unless capture_pattern == 'OggS'
|
14
|
+
|
15
|
+
io.seek(28) # skip not important bytes
|
16
|
+
|
17
|
+
# Each header packet begins with the same header fields.
|
18
|
+
# 1) packet_type: 8 bit value (the identification header is type 1)
|
19
|
+
# 2) the characters v','o','r','b','i','s' as six octets
|
20
|
+
packet_type, vorbis, _vorbis_version, channels, sample_rate = safe_read(io, 16).unpack('Ca6VCV')
|
21
|
+
return unless packet_type == 1 && vorbis == 'vorbis'
|
22
|
+
|
23
|
+
# In order to calculate the audio duration we have to read a
|
24
|
+
# granule_position of the last Ogg page of the file. Unfortunately, we don't
|
25
|
+
# know where the last page starts. But we do know that max size of an Ogg
|
26
|
+
# page is 65307 bytes. So we read the last 65307 bytes from the file and try
|
27
|
+
# to find the last page in this tail.
|
28
|
+
pos = io.size - MAX_POSSIBLE_PAGE_SIZE
|
29
|
+
pos = 0 if pos < 0
|
30
|
+
io.seek(pos)
|
31
|
+
tail = io.read(MAX_POSSIBLE_PAGE_SIZE)
|
32
|
+
return unless tail
|
33
|
+
|
34
|
+
granule_position = find_last_granule_position(tail)
|
35
|
+
return unless granule_position
|
36
|
+
|
37
|
+
duration = granule_position / sample_rate.to_f
|
38
|
+
return if duration == Float::INFINITY
|
39
|
+
|
40
|
+
FormatParser::Audio.new(
|
41
|
+
format: :ogg,
|
42
|
+
audio_sample_rate_hz: sample_rate,
|
43
|
+
num_audio_channels: channels,
|
44
|
+
media_duration_seconds: duration
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def all_indices_of_substr_in_str(of_substring, in_string)
|
51
|
+
last_i = 0
|
52
|
+
found_at_indices = []
|
53
|
+
while last_i = in_string.index(of_substring, last_i)
|
54
|
+
found_at_indices << last_i
|
55
|
+
last_i += of_substring.bytesize
|
56
|
+
end
|
57
|
+
found_at_indices
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns granule_position of the last valid Ogg page contained in the given
|
61
|
+
# tail. Since the tail may contain multiple "OggS" entries the method searches
|
62
|
+
# them recursively starting from the end. The search stops when the first
|
63
|
+
# valid Oggs page is found.
|
64
|
+
#
|
65
|
+
# The granule position contains the offset of the page in terms of the
|
66
|
+
# number of samples from the start of file. So once we know that number
|
67
|
+
# we can estimate how long the file is. We _do_ need to add the number
|
68
|
+
# of samples the granule covers though
|
69
|
+
def find_last_granule_position(in_string)
|
70
|
+
# The Ogg page always starts with "OggS". Find all of them
|
71
|
+
# in the given tail, since we want to scan "tail to head" -
|
72
|
+
# starting with the last index and going down to the first
|
73
|
+
rev_indices = all_indices_of_substr_in_str('OggS', in_string).reverse
|
74
|
+
rev_indices.each do |idx|
|
75
|
+
if granule_pos = extract_granule_position_from_string_at(in_string, idx)
|
76
|
+
return granule_pos
|
77
|
+
end
|
78
|
+
end
|
79
|
+
nil # Nothing matched or the list of indices was empty
|
80
|
+
end
|
81
|
+
|
82
|
+
# Since the magic bits may occur inside the body of the page we have to
|
83
|
+
# validate that what we found is actually an Ogg page by calculating the
|
84
|
+
# checksum. For this reason we have to read the entire page and calculate
|
85
|
+
# its checksum. In order to read the entire Ogg page we first have to read a
|
86
|
+
# part of its header to find out the size of the page.
|
87
|
+
def extract_granule_position_from_string_at(string, at)
|
88
|
+
header_size = 27
|
89
|
+
header_bytes = string.byteslice(at, header_size)
|
90
|
+
return unless header_bytes && header_bytes.bytesize == header_size
|
91
|
+
|
92
|
+
# Read the Ogg page header excluding the segment table (in other words read
|
93
|
+
# first 27 bytes). See https://en.wikipedia.org/wiki/Ogg#Page_structure
|
94
|
+
_capture_pattern,
|
95
|
+
_version,
|
96
|
+
_header_type,
|
97
|
+
granule_position,
|
98
|
+
_bitstream_serial_number,
|
99
|
+
_page_sequence_number,
|
100
|
+
checksum,
|
101
|
+
num_bytes_page_segments = header_bytes.unpack('a4CCQ<VVVC')
|
102
|
+
|
103
|
+
# Read the segment table part of the Ogg page header. Its size is stored in page_segments.
|
104
|
+
#
|
105
|
+
# The segment table is a vector of 8-bit values, each indicating the length
|
106
|
+
# of the corresponding segment within the page body.
|
107
|
+
# If there are no segments in the segment table the page is certainly invalid
|
108
|
+
return if num_bytes_page_segments == 0
|
109
|
+
|
110
|
+
# Read the segment table
|
111
|
+
segment_table_pos = at + header_size
|
112
|
+
segment_table = string.byteslice(segment_table_pos, num_bytes_page_segments)
|
113
|
+
return unless segment_table && segment_table.bytesize == num_bytes_page_segments
|
114
|
+
|
115
|
+
# Calculate the size of the Ogg page
|
116
|
+
num_bytes_used_for_segments = segment_table.unpack('C*').inject(&:+)
|
117
|
+
page_size = header_size + num_bytes_page_segments + num_bytes_used_for_segments
|
118
|
+
|
119
|
+
# Read the entire page now that we know how much we have to read
|
120
|
+
entire_page = string.byteslice(at, page_size)
|
121
|
+
return unless entire_page && entire_page.bytesize == page_size
|
122
|
+
|
123
|
+
# Compute and check the checksum. If this check fails it means one of the two:
|
124
|
+
# - the data is corrupted
|
125
|
+
# - the "OggS" capture pattern occures inside the body of the page and is
|
126
|
+
# we were scanning a random piece of content which was not an Ogg page
|
127
|
+
return unless checksum == calculate_checksum(entire_page)
|
128
|
+
|
129
|
+
# ...and only having gone through all these motions - return the granule position.
|
130
|
+
granule_position
|
131
|
+
end
|
132
|
+
|
133
|
+
# Calculate the CRC using the 0x04C11DB7 polynomial. We cannot use Zlib since
|
134
|
+
# it generates different checksums. Copied from https://github.com/anibali/ruby-ogg
|
135
|
+
def calculate_checksum(data)
|
136
|
+
crc_reg = 0
|
137
|
+
data.each_byte.with_index do |byte, i|
|
138
|
+
# The checksum is calculated over _the entire page_ but with the
|
139
|
+
# placeholder for the checksum - the 4 bytes - zeroed out. The checksum
|
140
|
+
# is then substituted _into_ the page at that offset. So when we go
|
141
|
+
# over bytes at these offsets we will substitute them with 0s
|
142
|
+
b = (22..25).cover?(i) ? 0 : byte
|
143
|
+
crc_reg = (crc_reg << 8) ^ CRC_LOOKUP[((crc_reg >> 24) & 0xff) ^ b]
|
144
|
+
crc_reg = crc_reg % 2**32
|
145
|
+
end
|
146
|
+
|
147
|
+
crc_reg
|
148
|
+
end
|
149
|
+
|
150
|
+
CRC_LOOKUP = [
|
151
|
+
0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
|
152
|
+
0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
|
153
|
+
0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
|
154
|
+
0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
|
155
|
+
0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
|
156
|
+
0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
|
157
|
+
0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
|
158
|
+
0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
|
159
|
+
0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
|
160
|
+
0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
|
161
|
+
0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81,
|
162
|
+
0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
|
163
|
+
0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49,
|
164
|
+
0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
|
165
|
+
0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
|
166
|
+
0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
|
167
|
+
0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
|
168
|
+
0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
|
169
|
+
0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
|
170
|
+
0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
|
171
|
+
0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
|
172
|
+
0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
|
173
|
+
0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066,
|
174
|
+
0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
|
175
|
+
0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
|
176
|
+
0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
|
177
|
+
0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
|
178
|
+
0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
|
179
|
+
0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
|
180
|
+
0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
|
181
|
+
0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686,
|
182
|
+
0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
|
183
|
+
0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
|
184
|
+
0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
|
185
|
+
0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
|
186
|
+
0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
|
187
|
+
0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
|
188
|
+
0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
|
189
|
+
0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
|
190
|
+
0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
|
191
|
+
0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7,
|
192
|
+
0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
|
193
|
+
0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f,
|
194
|
+
0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
|
195
|
+
0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
|
196
|
+
0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
|
197
|
+
0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
|
198
|
+
0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
|
199
|
+
0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
|
200
|
+
0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
|
201
|
+
0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
|
202
|
+
0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
|
203
|
+
0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30,
|
204
|
+
0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
|
205
|
+
0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
|
206
|
+
0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
|
207
|
+
0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
|
208
|
+
0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
|
209
|
+
0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
|
210
|
+
0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
|
211
|
+
0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0,
|
212
|
+
0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
|
213
|
+
0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
|
214
|
+
0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
|
215
|
+
].freeze
|
216
|
+
|
217
|
+
FormatParser.register_parser self, natures: :audio, formats: :ogg
|
218
|
+
end
|
data/lib/parsers/pdf_parser.rb
CHANGED
@@ -9,67 +9,12 @@ class FormatParser::PDFParser
|
|
9
9
|
#
|
10
10
|
PDF_MARKER = /%PDF-1\.[0-8]{1}/
|
11
11
|
|
12
|
-
# Page counts have different markers depending on
|
13
|
-
# the PDF type. There is not a single common way of solving
|
14
|
-
# this. The only way of solving this correctly is by adding
|
15
|
-
# different types of PDF's in the specs.
|
16
|
-
#
|
17
|
-
COUNT_MARKERS = ['Count ']
|
18
|
-
EOF_MARKER = '%EOF'
|
19
|
-
|
20
12
|
def call(io)
|
21
13
|
io = FormatParser::IOConstraint.new(io)
|
22
14
|
|
23
15
|
return unless safe_read(io, 9) =~ PDF_MARKER
|
24
16
|
|
25
|
-
|
26
|
-
|
27
|
-
FormatParser::Document.new(
|
28
|
-
format: :pdf,
|
29
|
-
page_count: attributes[:page_count]
|
30
|
-
)
|
31
|
-
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
# Read ahead bytes until one of % or / is reached.
|
36
|
-
# A header in a PDF always starts with a /
|
37
|
-
# The % is to detect the EOF
|
38
|
-
#
|
39
|
-
def scan_for_attributes(io)
|
40
|
-
result = {}
|
41
|
-
|
42
|
-
while read = safe_read(io, 1)
|
43
|
-
case read
|
44
|
-
when '%'
|
45
|
-
break if safe_read(io, EOF_MARKER.size) == EOF_MARKER
|
46
|
-
when '/'
|
47
|
-
find_page_count(io, result)
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
result
|
52
|
-
end
|
53
|
-
|
54
|
-
def find_page_count(io, result)
|
55
|
-
COUNT_MARKERS.each do |marker|
|
56
|
-
if safe_read(io, marker.size) == marker
|
57
|
-
result[:page_count] = read_numbers(io)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
# Read ahead bytes until no more numbers are found
|
63
|
-
# This assumes that the position of io starts at a
|
64
|
-
# number
|
65
|
-
def read_numbers(io)
|
66
|
-
numbers = ''
|
67
|
-
|
68
|
-
while c = safe_read(io, 1)
|
69
|
-
c =~ /\d+/ ? numbers << c : break
|
70
|
-
end
|
71
|
-
|
72
|
-
numbers.to_i
|
17
|
+
FormatParser::Document.new(format: :pdf)
|
73
18
|
end
|
74
19
|
|
75
20
|
FormatParser.register_parser self, natures: :document, formats: :pdf
|
data/lib/read_limiter.rb
CHANGED
@@ -77,9 +77,9 @@ class FormatParser::ReadLimiter
|
|
77
77
|
# `format_parser.TIFF.read_limiter.num_seeks` and so forth
|
78
78
|
# @return void
|
79
79
|
def send_metrics(prefix)
|
80
|
-
|
81
|
-
|
82
|
-
|
80
|
+
Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_seeks' % prefix, @seeks)
|
81
|
+
Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_reads' % prefix, @reads)
|
82
|
+
Measurometer.add_distribution_value('format_parser.%s.read_limiter.read_bytes' % prefix, @bytes)
|
83
83
|
end
|
84
84
|
|
85
85
|
# Resets all the recorded call counters so that the object can be reused for the next parser,
|
data/lib/remote_io.rb
CHANGED
@@ -60,7 +60,7 @@ class FormatParser::RemoteIO
|
|
60
60
|
# @return [String] the read bytes
|
61
61
|
def read(n_bytes)
|
62
62
|
http_range = (@pos..(@pos + n_bytes - 1))
|
63
|
-
maybe_size, maybe_body = request_range(http_range)
|
63
|
+
maybe_size, maybe_body = Measurometer.instrument('format_parser.RemoteIO.read') { request_range(http_range) }
|
64
64
|
if maybe_size && maybe_body
|
65
65
|
@remote_size = maybe_size
|
66
66
|
@pos += maybe_body.bytesize
|
@@ -103,10 +103,10 @@ class FormatParser::RemoteIO
|
|
103
103
|
# cannot hint size with this response - at lease not when working with S3
|
104
104
|
return
|
105
105
|
when 500..599
|
106
|
-
|
106
|
+
Measurometer.increment_counter('format_parser.RemoteIO.upstream50x_errors', 1)
|
107
107
|
raise IntermittentFailure.new(response.status, "Server at #{@uri} replied with a #{response.status} and we might want to retry")
|
108
108
|
else
|
109
|
-
|
109
|
+
Measurometer.increment_counter('format_parser.RemoteIO.invalid_request_errors', 1)
|
110
110
|
raise InvalidRequest.new(response.status, "Server at #{@uri} replied with a #{response.status} and refused our request")
|
111
111
|
end
|
112
112
|
end
|
data/spec/format_parser_spec.rb
CHANGED
@@ -5,6 +5,10 @@ describe FormatParser do
|
|
5
5
|
expect(FormatParser::VERSION).to be_kind_of(String)
|
6
6
|
end
|
7
7
|
|
8
|
+
it 'exposes the Measurometer constant' do
|
9
|
+
expect(FormatParser::Measurometer).to be_kind_of(Module)
|
10
|
+
end
|
11
|
+
|
8
12
|
describe '.parse' do
|
9
13
|
it 'returns nil when trying to parse an empty IO' do
|
10
14
|
d = StringIO.new('')
|
@@ -57,7 +57,7 @@ describe FormatParser::MP3Parser do
|
|
57
57
|
expect(parsed.format).to eq(:mp3)
|
58
58
|
expect(parsed.num_audio_channels).to eq(2)
|
59
59
|
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
60
|
-
expect(parsed.media_duration_seconds).to be_within(0.1).of(
|
60
|
+
expect(parsed.media_duration_seconds).to be_within(0.1).of(1098.03)
|
61
61
|
|
62
62
|
expect(parsed.intrinsics).not_to be_nil
|
63
63
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::OggParser do
|
4
|
+
it 'parses an ogg file' do
|
5
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/vorbis.ogg', 'rb'))
|
6
|
+
|
7
|
+
expect(parse_result.nature).to eq(:audio)
|
8
|
+
expect(parse_result.format).to eq(:ogg)
|
9
|
+
expect(parse_result.num_audio_channels).to eq(1)
|
10
|
+
expect(parse_result.audio_sample_rate_hz).to eq(16000)
|
11
|
+
expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'skips a file if it contains more than MAX_POSSIBLE_OGG_PAGE_SIZE bytes of garbage at the end' do
|
15
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/with_garbage_at_the_end.ogg', 'rb'))
|
16
|
+
expect(parse_result).to be_nil
|
17
|
+
end
|
18
|
+
|
19
|
+
it "correctly parses an ogg file when a magic string occurs in the page's body" do
|
20
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/with_confusing_magic_string.ogg', 'rb'))
|
21
|
+
|
22
|
+
expect(parse_result.nature).to eq(:audio)
|
23
|
+
expect(parse_result.format).to eq(:ogg)
|
24
|
+
expect(parse_result.num_audio_channels).to eq(1)
|
25
|
+
expect(parse_result.audio_sample_rate_hz).to eq(8000)
|
26
|
+
expect(parse_result.media_duration_seconds).to be_within(0.01).of(0.45)
|
27
|
+
end
|
28
|
+
end
|
@@ -18,10 +18,6 @@ describe FormatParser::PDFParser do
|
|
18
18
|
expect(parsed_pdf.nature).to eq(:document)
|
19
19
|
expect(parsed_pdf.format).to eq(:pdf)
|
20
20
|
end
|
21
|
-
|
22
|
-
it 'has a correct page count' do
|
23
|
-
expect(parsed_pdf.page_count).to eq(hash.fetch(:page_count))
|
24
|
-
end
|
25
21
|
end
|
26
22
|
|
27
23
|
describe 'a PDF file with a missing version header' do
|
@@ -44,25 +40,9 @@ describe FormatParser::PDFParser do
|
|
44
40
|
pending 'does not parse succesfully'
|
45
41
|
end
|
46
42
|
|
47
|
-
describe 'a PDF file with a missing COUNT_HEADER' do
|
48
|
-
let(:pdf_file) { 'missing_page_count.pdf' }
|
49
|
-
|
50
|
-
it 'does not return a page count' do
|
51
|
-
expect(parsed_pdf.page_count).to eq(nil)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
43
|
describe 'parses a PDF file' do
|
56
44
|
describe 'a single page file' do
|
57
|
-
include_examples :behave_like_pdf, file: '1_page.pdf'
|
58
|
-
end
|
59
|
-
|
60
|
-
describe 'a multi page pdf file' do
|
61
|
-
include_examples :behave_like_pdf, file: '2_pages.pdf', page_count: 2
|
62
|
-
end
|
63
|
-
|
64
|
-
describe 'a multi page pdf file with content' do
|
65
|
-
include_examples :behave_like_pdf, file: '10_pages.pdf', page_count: 10
|
45
|
+
include_examples :behave_like_pdf, file: '1_page.pdf'
|
66
46
|
end
|
67
47
|
end
|
68
48
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-
|
12
|
+
date: 2018-06-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -46,6 +46,9 @@ dependencies:
|
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: '0.10'
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 0.10.1
|
49
52
|
type: :runtime
|
50
53
|
prerelease: false
|
51
54
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -53,6 +56,9 @@ dependencies:
|
|
53
56
|
- - "~>"
|
54
57
|
- !ruby/object:Gem::Version
|
55
58
|
version: '0.10'
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.10.1
|
56
62
|
- !ruby/object:Gem::Dependency
|
57
63
|
name: faraday
|
58
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,6 +73,20 @@ dependencies:
|
|
67
73
|
- - "~>"
|
68
74
|
- !ruby/object:Gem::Version
|
69
75
|
version: '0.13'
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: measurometer
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1'
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1'
|
70
90
|
- !ruby/object:Gem::Dependency
|
71
91
|
name: rspec
|
72
92
|
requirement: !ruby/object:Gem::Requirement
|
@@ -186,7 +206,6 @@ files:
|
|
186
206
|
- lib/image.rb
|
187
207
|
- lib/io_constraint.rb
|
188
208
|
- lib/io_utils.rb
|
189
|
-
- lib/measurometer.rb
|
190
209
|
- lib/parsers/aiff_parser.rb
|
191
210
|
- lib/parsers/bmp_parser.rb
|
192
211
|
- lib/parsers/cr2_parser.rb
|
@@ -201,6 +220,7 @@ files:
|
|
201
220
|
- lib/parsers/moov_parser/decoder.rb
|
202
221
|
- lib/parsers/mp3_parser.rb
|
203
222
|
- lib/parsers/mp3_parser/id3_extraction.rb
|
223
|
+
- lib/parsers/ogg_parser.rb
|
204
224
|
- lib/parsers/pdf_parser.rb
|
205
225
|
- lib/parsers/png_parser.rb
|
206
226
|
- lib/parsers/psd_parser.rb
|
@@ -220,7 +240,6 @@ files:
|
|
220
240
|
- spec/format_parser_inspect_spec.rb
|
221
241
|
- spec/format_parser_spec.rb
|
222
242
|
- spec/io_utils_spec.rb
|
223
|
-
- spec/measurometer_spec.rb
|
224
243
|
- spec/parsers/aiff_parser_spec.rb
|
225
244
|
- spec/parsers/bmp_parser_spec.rb
|
226
245
|
- spec/parsers/cr2_parser_spec.rb
|
@@ -232,6 +251,7 @@ files:
|
|
232
251
|
- spec/parsers/jpeg_parser_spec.rb
|
233
252
|
- spec/parsers/moov_parser_spec.rb
|
234
253
|
- spec/parsers/mp3_parser_spec.rb
|
254
|
+
- spec/parsers/ogg_parser_spec.rb
|
235
255
|
- spec/parsers/pdf_parser_spec.rb
|
236
256
|
- spec/parsers/png_parser_spec.rb
|
237
257
|
- spec/parsers/psd_parser_spec.rb
|
data/lib/measurometer.rb
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
class FormatParser::Measurometer
|
2
|
-
class << self
|
3
|
-
# Permits adding instrumentation drivers. Measurometer is 1-1 API
|
4
|
-
# compatible with Appsignal, which we use a lot. So to magically
|
5
|
-
# obtain all Appsignal instrumentation, add the Appsignal module
|
6
|
-
# as a driver.
|
7
|
-
#
|
8
|
-
# Measurometer.drivers << Appsignal
|
9
|
-
#
|
10
|
-
# A driver must be reentrant and thread-safe - it should be possible
|
11
|
-
# to have multiple `instrument` calls open from different threads at the
|
12
|
-
# same time.
|
13
|
-
# The driver must support the same interface as the Measurometer class
|
14
|
-
# itself, minus the `drivers` and `instrument_instance_method` methods.
|
15
|
-
#
|
16
|
-
# @return Array
|
17
|
-
def drivers
|
18
|
-
@drivers ||= []
|
19
|
-
@drivers
|
20
|
-
end
|
21
|
-
|
22
|
-
# Runs a given block within a cascade of `instrument` blocks of all the
|
23
|
-
# added drivers.
|
24
|
-
#
|
25
|
-
# Measurometer.instrument('do_foo') { compute! }
|
26
|
-
#
|
27
|
-
# unfolds to
|
28
|
-
# Appsignal.instrument('do_foo') do
|
29
|
-
# Statsd.timing('do_foo') do
|
30
|
-
# compute!
|
31
|
-
# end
|
32
|
-
# end
|
33
|
-
#
|
34
|
-
# A driver must be reentrant and thread-safe - it should be possible
|
35
|
-
# to have multiple `instrument` calls open from different threads at the
|
36
|
-
# same time.
|
37
|
-
# The driver must support the same interface as the Measurometer class
|
38
|
-
# itself, minus the `drivers` and `instrument_instance_method` methods.
|
39
|
-
#
|
40
|
-
# @param block_name[String] under which path to push the metric
|
41
|
-
# @param blk[#call] the block to instrument
|
42
|
-
# @return [Object] the return value of &blk
|
43
|
-
def instrument(block_name, &blk)
|
44
|
-
return yield unless @drivers && @drivers.any? # The block wrapping business is not free
|
45
|
-
@drivers.inject(blk) { |outer_block, driver|
|
46
|
-
-> {
|
47
|
-
driver.instrument(block_name, &outer_block)
|
48
|
-
}
|
49
|
-
}.call
|
50
|
-
end
|
51
|
-
|
52
|
-
# Adds a distribution value (sample) under a given path
|
53
|
-
#
|
54
|
-
# @param value_path[String] under which path to push the metric
|
55
|
-
# @param value[Numeric] distribution value
|
56
|
-
# @return nil
|
57
|
-
def add_distribution_value(value_path, value)
|
58
|
-
(@drivers || []).each { |d| d.add_distribution_value(value_path, value) }
|
59
|
-
nil
|
60
|
-
end
|
61
|
-
|
62
|
-
# Increment a named counter under a given path
|
63
|
-
#
|
64
|
-
# @param counter_path[String] under which path to push the metric
|
65
|
-
# @param by[Integer] the counter increment to apply
|
66
|
-
# @return nil
|
67
|
-
def increment_counter(counter_path, by)
|
68
|
-
(@drivers || []).each { |d| d.increment_counter(counter_path, by) }
|
69
|
-
nil
|
70
|
-
end
|
71
|
-
|
72
|
-
# Wrap an anonymous module around an instance method in the given class to have
|
73
|
-
# it instrumented automatically. The name of the measurement will be interpolated as:
|
74
|
-
#
|
75
|
-
# "#{prefix}.#{rightmost_class_constant_name}.#{instance_method_name}"
|
76
|
-
#
|
77
|
-
# @param target_class[Class] the class to instrument
|
78
|
-
# @param instance_method_name_to_instrument[Symbol] the method name to instrument
|
79
|
-
# @param path_prefix[String] under which path to push the instrumented metric
|
80
|
-
# @return void
|
81
|
-
def instrument_instance_method(target_class, instance_method_name_to_instrument, path_prefix)
|
82
|
-
short_class_name = target_class.to_s.split('::').last
|
83
|
-
instrumentation_name = [path_prefix, short_class_name, instance_method_name_to_instrument].join('.')
|
84
|
-
instrumenter_module = Module.new do
|
85
|
-
define_method(instance_method_name_to_instrument) do |*any|
|
86
|
-
::FormatParser::Measurometer.instrument(instrumentation_name) { super(*any) }
|
87
|
-
end
|
88
|
-
end
|
89
|
-
target_class.prepend(instrumenter_module)
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Instrument things interesting in the global sense
|
94
|
-
instrument_instance_method(FormatParser::RemoteIO, :read, 'format_parser')
|
95
|
-
instrument_instance_method(Care::Cache, :read_page, 'format_parser')
|
96
|
-
|
97
|
-
# Instrument more specific things on a per-parser basis
|
98
|
-
instrument_instance_method(FormatParser::EXIFParser, :scan_image_tiff, 'format_parser')
|
99
|
-
instrument_instance_method(FormatParser::MOOVParser::Decoder, :extract_atom_stream, 'format_parser.parsers.MOOVParser')
|
100
|
-
end
|
data/spec/measurometer_spec.rb
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe FormatParser::Measurometer do
|
4
|
-
RSpec::Matchers.define :include_counter_or_measurement_named do |named|
|
5
|
-
match do |actual|
|
6
|
-
actual.any? do |e|
|
7
|
-
e[0] == named && e[1] > 0
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'instruments a full cycle FormatParser.parse' do
|
13
|
-
driver_class = Class.new do
|
14
|
-
attr_accessor :timings, :counters, :distributions
|
15
|
-
def instrument(block_name)
|
16
|
-
s = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
17
|
-
yield.tap do
|
18
|
-
delta = Process.clock_gettime(Process::CLOCK_MONOTONIC) - s
|
19
|
-
@timings ||= []
|
20
|
-
@timings << [block_name, delta * 1000]
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def add_distribution_value(value_path, value)
|
25
|
-
@distributions ||= []
|
26
|
-
@distributions << [value_path, value]
|
27
|
-
end
|
28
|
-
|
29
|
-
def increment_counter(value_path, value)
|
30
|
-
@counters ||= []
|
31
|
-
@counters << [value_path, value]
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
instrumenter = driver_class.new
|
36
|
-
described_class.drivers << instrumenter
|
37
|
-
|
38
|
-
FormatParser.parse(File.open(fixtures_dir + 'JPEG/keynote_recognized_as_jpeg.key', 'rb'), results: :all)
|
39
|
-
|
40
|
-
described_class.drivers.delete(instrumenter)
|
41
|
-
expect(described_class.drivers).not_to include(instrumenter)
|
42
|
-
|
43
|
-
expect(instrumenter.counters).to include_counter_or_measurement_named('format_parser.detected_formats.zip')
|
44
|
-
expect(instrumenter.counters).to include_counter_or_measurement_named('format_parser.parser.Care.page_reads_from_upsteam')
|
45
|
-
expect(instrumenter.distributions).to include_counter_or_measurement_named('format_parser.ZIPParser.read_limiter.read_bytes')
|
46
|
-
expect(instrumenter.timings).to include_counter_or_measurement_named('format_parser.Cache.read_page')
|
47
|
-
end
|
48
|
-
end
|