format_parser 0.13.6 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +10 -1
- data/format_parser.gemspec +2 -1
- data/lib/care.rb +2 -2
- data/lib/format_parser.rb +8 -7
- data/lib/format_parser/version.rb +1 -1
- data/lib/parsers/exif_parser.rb +4 -2
- data/lib/parsers/jpeg_parser.rb +2 -2
- data/lib/parsers/moov_parser.rb +3 -1
- data/lib/parsers/mp3_parser.rb +2 -5
- data/lib/parsers/ogg_parser.rb +218 -0
- data/lib/parsers/pdf_parser.rb +1 -56
- data/lib/read_limiter.rb +3 -3
- data/lib/remote_io.rb +3 -3
- data/spec/format_parser_spec.rb +4 -0
- data/spec/parsers/mp3_parser_spec.rb +1 -1
- data/spec/parsers/ogg_parser_spec.rb +28 -0
- data/spec/parsers/pdf_parser_spec.rb +1 -21
- metadata +24 -4
- data/lib/measurometer.rb +0 -100
- data/spec/measurometer_spec.rb +0 -48
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d9daf0603ac099b75a9ddb85b8061190dc8fe2f3aad46633ff94b1f4a99020e
|
4
|
+
data.tar.gz: d4343aa08f9ec6a6864a5aed3b7ea174f779c3ea0954417e9b31a2c871126f27
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d63a7d9802157e35260b91f5a8077008fa4e7c19837cc4a01ff0d3954fa22d29455a13cb914bc892782d986a7d85b9465eb21b8c753d16c0da774ddb0f5c47c
|
7
|
+
data.tar.gz: 651d41396efdeb4e9f74173d284617baa0bf34f3ce6e8a6890a1ceadf2ef4a3556a3dd0b4b282dc84ea9f65c020ecbe1ae942da78eed85bed6d969161dbb0cbb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.14.0
|
2
|
+
* PDF: Reduce the PDF parser to the basic binary detection (PDF/not PDF) until we have a better/more robust PDF parser
|
3
|
+
* MP3: Fix the byte length of MPEG frames calculation to correctly account for ID3V1 and ID3V2 instead of ID3V1 twice
|
4
|
+
* MP3: Remove the workaround for `id3tag` choking on non-matching genre strings (bumps dependency on `id3tag`)
|
5
|
+
* Use Measurometer provided by the [measurometer gem](https://rubygems.org/gems/measurometer)
|
6
|
+
* Ogg: Add support for the Ogg format
|
7
|
+
|
1
8
|
## 0.13.6
|
2
9
|
* Make all reads in the MOOV decoder strict - fail early if reads are improperly sized
|
3
10
|
* Disable parsing for `udta` atoms in MP4/MOV since we do not have a good way of parsing them yet
|
data/README.md
CHANGED
@@ -30,12 +30,13 @@ and [dimensions,](https://github.com/sstephenson/dimensions) borrowing from them
|
|
30
30
|
* M4A
|
31
31
|
* ZIP
|
32
32
|
* DOCX, PPTX, XLSX
|
33
|
+
* OGG
|
33
34
|
|
34
35
|
...with [more](https://github.com/WeTransfer/format_parser/issues?q=is%3Aissue+is%3Aopen+label%3Aformats) on the way!
|
35
36
|
|
36
37
|
## Basic usage
|
37
38
|
|
38
|
-
Pass an IO object that responds to `read` and `
|
39
|
+
Pass an IO object that responds to `read`, `seek` and `size` to `FormatParser.parse` and the first confirmed match will be returned.
|
39
40
|
|
40
41
|
```ruby
|
41
42
|
match = FormatParser.parse(File.open("myimage.jpg", "rb"))
|
@@ -107,6 +108,11 @@ Therefore we adapt the following approaches:
|
|
107
108
|
is easier to verify and test, and we likely don't care about all the metadata anyway
|
108
109
|
* Avoid using C libraries which are likely to contain buffer overflows/underflows - we stay memory safe
|
109
110
|
|
111
|
+
## Acknowledgements
|
112
|
+
|
113
|
+
We are incredibly grateful to Remco van't Veer for [exifr](https://github.com/remvee/exifr) and to
|
114
|
+
Krists Ozols for [id3tag](https://github.com/krists/id3tag) that we are using for crucial tasks.
|
115
|
+
|
110
116
|
## Fixture Sources
|
111
117
|
|
112
118
|
Unless specified otherwise in this section the fixture files are MIT licensed and from the FastImage and Dimensions projects.
|
@@ -145,6 +151,9 @@ Unless specified otherwise in this section the fixture files are MIT licensed an
|
|
145
151
|
- atc_fixture_vbr.flac is a converted version of the MP3 with the same name
|
146
152
|
- c_11k16btipcm.flac is a converted version of the WAV with the same name
|
147
153
|
|
154
|
+
### OGG
|
155
|
+
- `hi.ogg`, `vorbis.ogg`, `with_confusing_magic_string.ogg`, `with_garbage_at_the_end.ogg` have been generated by the project contributors
|
156
|
+
|
148
157
|
### M4A
|
149
158
|
- fixture.m4a was created by one of the project maintainers and is MIT licensed
|
150
159
|
|
data/format_parser.gemspec
CHANGED
@@ -32,8 +32,9 @@ Gem::Specification.new do |spec|
|
|
32
32
|
|
33
33
|
spec.add_dependency 'ks', '~> 0.0.1'
|
34
34
|
spec.add_dependency 'exifr', '~> 1.0'
|
35
|
-
spec.add_dependency 'id3tag', '~> 0.10'
|
35
|
+
spec.add_dependency 'id3tag', '~> 0.10', '>= 0.10.1'
|
36
36
|
spec.add_dependency 'faraday', '~> 0.13'
|
37
|
+
spec.add_dependency 'measurometer', '~> 1'
|
37
38
|
|
38
39
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
39
40
|
spec.add_development_dependency 'rake', '~> 12'
|
data/lib/care.rb
CHANGED
@@ -173,10 +173,10 @@ class Care
|
|
173
173
|
# @param io[IO] the IO to read from
|
174
174
|
# @param page_i[Integer] which page (zero-based) to read
|
175
175
|
def read_page(io, page_i)
|
176
|
-
|
176
|
+
Measurometer.increment_counter('format_parser.parser.Care.page_reads_from_upsteam', 1)
|
177
177
|
|
178
178
|
io.seek(page_i * @page_size)
|
179
|
-
read_result = io.read(@page_size)
|
179
|
+
read_result = Measurometer.instrument('format_parser.Care.read_page') { io.read(@page_size) }
|
180
180
|
if read_result.nil?
|
181
181
|
# If the read went past the end of the IO the read result will be nil,
|
182
182
|
# so we know our IO is exhausted here
|
data/lib/format_parser.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'set'
|
2
|
+
require 'measurometer'
|
2
3
|
|
3
4
|
# A pretty nimble module for parsing file metadata using partial reads. Contains all the
|
4
5
|
# top-level methods of the library.
|
@@ -17,6 +18,10 @@ module FormatParser
|
|
17
18
|
require_relative 'io_constraint'
|
18
19
|
require_relative 'care'
|
19
20
|
|
21
|
+
# Define Measurometer in the internal namespace as well
|
22
|
+
# so that we stay compatible for the applications that use it
|
23
|
+
const_set(:Measurometer, ::Measurometer)
|
24
|
+
|
20
25
|
# Is used to manage access to the shared array of parser constructors, which might
|
21
26
|
# potentially be mutated from different threads. The mutex won't be hit too often
|
22
27
|
# since it only locks when adding/removing parsers.
|
@@ -95,10 +100,9 @@ module FormatParser
|
|
95
100
|
# @param formats[Array] an array of file formats to scope the parsing to.
|
96
101
|
# For example `[:jpg, :tif]` will scope the parsing to TIFF and JPEG files.
|
97
102
|
# The default value is "all formats known to FormatParser"
|
98
|
-
# @param results[:first, :all
|
99
|
-
# is ambiguous. The default is `:first` which returns the first matching result.
|
100
|
-
#
|
101
|
-
# at most N results.
|
103
|
+
# @param results[:first, :all] one of the values defining how many results to return if parsing
|
104
|
+
# is ambiguous. The default is `:first` which returns the first matching result. `:all` will return all results.
|
105
|
+
# When using `:first` parsing will stop at the first successful match and other parsers won't run.
|
102
106
|
# @param limits_config[ReadLimitsConfig] the configuration object for various read/cache limits. The default
|
103
107
|
# one should be good for most cases.
|
104
108
|
# @return [Array<Result>, Result, nil] either an Array of results, a single parsing result or `nil`if
|
@@ -251,7 +255,4 @@ module FormatParser
|
|
251
255
|
Dir.glob(__dir__ + '/parsers/*.rb').sort.each do |parser_file|
|
252
256
|
require parser_file
|
253
257
|
end
|
254
|
-
# The Measurometer latches itself onto existing classes, so load it after
|
255
|
-
# we have loaded all the parsers
|
256
|
-
require_relative 'measurometer'
|
257
258
|
end
|
data/lib/parsers/exif_parser.rb
CHANGED
@@ -74,7 +74,9 @@ module FormatParser::EXIFParser
|
|
74
74
|
EXIFR.logger = Logger.new(nil)
|
75
75
|
|
76
76
|
def exif_from_tiff_io(constrained_io)
|
77
|
-
|
78
|
-
|
77
|
+
Measurometer.instrument('format_parser.EXIFParser.exif_from_tiff_io') do
|
78
|
+
raw_exif_data = EXIFR::TIFF.new(IOExt.new(constrained_io))
|
79
|
+
raw_exif_data ? EXIFResult.new(raw_exif_data) : nil
|
80
|
+
end
|
79
81
|
end
|
80
82
|
end
|
data/lib/parsers/jpeg_parser.rb
CHANGED
@@ -60,7 +60,7 @@ class FormatParser::JPEGParser
|
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
63
|
-
|
63
|
+
Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_read_until_capture', @buf.pos)
|
64
64
|
|
65
65
|
# Return at the earliest possible opportunity
|
66
66
|
if @width && @height
|
@@ -137,7 +137,7 @@ class FormatParser::JPEGParser
|
|
137
137
|
# ...and only then read the marker contents and parse it as EXIF
|
138
138
|
exif_data = safe_read(@buf, app1_frame_content_length - EXIF_MAGIC_STRING.bytesize)
|
139
139
|
|
140
|
-
|
140
|
+
Measurometer.add_distribution_value('format_parser.JPEGParser.bytes_sent_to_exif_parser', exif_data.bytesize)
|
141
141
|
|
142
142
|
@exif_data = exif_from_tiff_io(StringIO.new(exif_data))
|
143
143
|
rescue EXIFR::MalformedTIFF
|
data/lib/parsers/moov_parser.rb
CHANGED
@@ -27,7 +27,9 @@ class FormatParser::MOOVParser
|
|
27
27
|
# size that gets parsed just before.
|
28
28
|
max_read_offset = 0xFFFFFFFF
|
29
29
|
decoder = Decoder.new
|
30
|
-
atom_tree =
|
30
|
+
atom_tree = Measurometer.instrument('format_parser.Decoder.extract_atom_stream') do
|
31
|
+
decoder.extract_atom_stream(io, max_read_offset)
|
32
|
+
end
|
31
33
|
|
32
34
|
ftyp_atom = decoder.find_first_atom_by_path(atom_tree, 'ftyp')
|
33
35
|
file_type = ftyp_atom.field_value(:major_brand)
|
data/lib/parsers/mp3_parser.rb
CHANGED
@@ -43,10 +43,7 @@ class FormatParser::MP3Parser
|
|
43
43
|
def to_h
|
44
44
|
tag = __getobj__
|
45
45
|
MEMBERS.each_with_object({}) do |k, h|
|
46
|
-
|
47
|
-
# If this guard is removed, it fails when trying to do a gsub on a nil,
|
48
|
-
# in /lib/id3tag/frames/v2/genre_frame/genre_parser_pre_24.rb:25:in `just_genres'
|
49
|
-
value = tag.public_send(k) rescue nil
|
46
|
+
value = tag.public_send(k)
|
50
47
|
h[k] = value if value
|
51
48
|
end
|
52
49
|
end
|
@@ -74,7 +71,7 @@ class FormatParser::MP3Parser
|
|
74
71
|
# Compute how many bytes are occupied by the actual MPEG frames
|
75
72
|
ignore_bytes_at_tail = id3v1 ? 128 : 0
|
76
73
|
ignore_bytes_at_head = io.pos
|
77
|
-
bytes_used_by_frames = io.size -
|
74
|
+
bytes_used_by_frames = io.size - ignore_bytes_at_head - ignore_bytes_at_tail
|
78
75
|
|
79
76
|
io.seek(ignore_bytes_at_head)
|
80
77
|
|
@@ -0,0 +1,218 @@
|
|
1
|
+
# https://xiph.org/vorbis/doc/Vorbis_I_spec.pdf
|
2
|
+
# https://en.wikipedia.org/wiki/Ogg#Page_structure
|
3
|
+
class FormatParser::OggParser
|
4
|
+
include FormatParser::IOUtils
|
5
|
+
|
6
|
+
# Maximum size of an Ogg page
|
7
|
+
MAX_POSSIBLE_PAGE_SIZE = 65307
|
8
|
+
|
9
|
+
def call(io)
|
10
|
+
# The format consists of chunks of data each called an "Ogg page". Each page
|
11
|
+
# begins with the characters, "OggS", to identify the file as Ogg format.
|
12
|
+
capture_pattern = safe_read(io, 4)
|
13
|
+
return unless capture_pattern == 'OggS'
|
14
|
+
|
15
|
+
io.seek(28) # skip not important bytes
|
16
|
+
|
17
|
+
# Each header packet begins with the same header fields.
|
18
|
+
# 1) packet_type: 8 bit value (the identification header is type 1)
|
19
|
+
# 2) the characters v','o','r','b','i','s' as six octets
|
20
|
+
packet_type, vorbis, _vorbis_version, channels, sample_rate = safe_read(io, 16).unpack('Ca6VCV')
|
21
|
+
return unless packet_type == 1 && vorbis == 'vorbis'
|
22
|
+
|
23
|
+
# In order to calculate the audio duration we have to read a
|
24
|
+
# granule_position of the last Ogg page of the file. Unfortunately, we don't
|
25
|
+
# know where the last page starts. But we do know that max size of an Ogg
|
26
|
+
# page is 65307 bytes. So we read the last 65307 bytes from the file and try
|
27
|
+
# to find the last page in this tail.
|
28
|
+
pos = io.size - MAX_POSSIBLE_PAGE_SIZE
|
29
|
+
pos = 0 if pos < 0
|
30
|
+
io.seek(pos)
|
31
|
+
tail = io.read(MAX_POSSIBLE_PAGE_SIZE)
|
32
|
+
return unless tail
|
33
|
+
|
34
|
+
granule_position = find_last_granule_position(tail)
|
35
|
+
return unless granule_position
|
36
|
+
|
37
|
+
duration = granule_position / sample_rate.to_f
|
38
|
+
return if duration == Float::INFINITY
|
39
|
+
|
40
|
+
FormatParser::Audio.new(
|
41
|
+
format: :ogg,
|
42
|
+
audio_sample_rate_hz: sample_rate,
|
43
|
+
num_audio_channels: channels,
|
44
|
+
media_duration_seconds: duration
|
45
|
+
)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def all_indices_of_substr_in_str(of_substring, in_string)
|
51
|
+
last_i = 0
|
52
|
+
found_at_indices = []
|
53
|
+
while last_i = in_string.index(of_substring, last_i)
|
54
|
+
found_at_indices << last_i
|
55
|
+
last_i += of_substring.bytesize
|
56
|
+
end
|
57
|
+
found_at_indices
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns granule_position of the last valid Ogg page contained in the given
|
61
|
+
# tail. Since the tail may contain multiple "OggS" entries the method searches
|
62
|
+
# them recursively starting from the end. The search stops when the first
|
63
|
+
# valid Oggs page is found.
|
64
|
+
#
|
65
|
+
# The granule position contains the offset of the page in terms of the
|
66
|
+
# number of samples from the start of file. So once we know that number
|
67
|
+
# we can estimate how long the file is. We _do_ need to add the number
|
68
|
+
# of samples the granule covers though
|
69
|
+
def find_last_granule_position(in_string)
|
70
|
+
# The Ogg page always starts with "OggS". Find all of them
|
71
|
+
# in the given tail, since we want to scan "tail to head" -
|
72
|
+
# starting with the last index and going down to the first
|
73
|
+
rev_indices = all_indices_of_substr_in_str('OggS', in_string).reverse
|
74
|
+
rev_indices.each do |idx|
|
75
|
+
if granule_pos = extract_granule_position_from_string_at(in_string, idx)
|
76
|
+
return granule_pos
|
77
|
+
end
|
78
|
+
end
|
79
|
+
nil # Nothing matched or the list of indices was empty
|
80
|
+
end
|
81
|
+
|
82
|
+
# Since the magic bits may occur inside the body of the page we have to
|
83
|
+
# validate that what we found is actually an Ogg page by calculating the
|
84
|
+
# checksum. For this reason we have to read the entire page and calculate
|
85
|
+
# its checksum. In order to read the entire Ogg page we first have to read a
|
86
|
+
# part of its header to find out the size of the page.
|
87
|
+
def extract_granule_position_from_string_at(string, at)
|
88
|
+
header_size = 27
|
89
|
+
header_bytes = string.byteslice(at, header_size)
|
90
|
+
return unless header_bytes && header_bytes.bytesize == header_size
|
91
|
+
|
92
|
+
# Read the Ogg page header excluding the segment table (in other words read
|
93
|
+
# first 27 bytes). See https://en.wikipedia.org/wiki/Ogg#Page_structure
|
94
|
+
_capture_pattern,
|
95
|
+
_version,
|
96
|
+
_header_type,
|
97
|
+
granule_position,
|
98
|
+
_bitstream_serial_number,
|
99
|
+
_page_sequence_number,
|
100
|
+
checksum,
|
101
|
+
num_bytes_page_segments = header_bytes.unpack('a4CCQ<VVVC')
|
102
|
+
|
103
|
+
# Read the segment table part of the Ogg page header. Its size is stored in page_segments.
|
104
|
+
#
|
105
|
+
# The segment table is a vector of 8-bit values, each indicating the length
|
106
|
+
# of the corresponding segment within the page body.
|
107
|
+
# If there are no segments in the segment table the page is certainly invalid
|
108
|
+
return if num_bytes_page_segments == 0
|
109
|
+
|
110
|
+
# Read the segment table
|
111
|
+
segment_table_pos = at + header_size
|
112
|
+
segment_table = string.byteslice(segment_table_pos, num_bytes_page_segments)
|
113
|
+
return unless segment_table && segment_table.bytesize == num_bytes_page_segments
|
114
|
+
|
115
|
+
# Calculate the size of the Ogg page
|
116
|
+
num_bytes_used_for_segments = segment_table.unpack('C*').inject(&:+)
|
117
|
+
page_size = header_size + num_bytes_page_segments + num_bytes_used_for_segments
|
118
|
+
|
119
|
+
# Read the entire page now that we know how much we have to read
|
120
|
+
entire_page = string.byteslice(at, page_size)
|
121
|
+
return unless entire_page && entire_page.bytesize == page_size
|
122
|
+
|
123
|
+
# Compute and check the checksum. If this check fails it means one of the two:
|
124
|
+
# - the data is corrupted
|
125
|
+
# - the "OggS" capture pattern occures inside the body of the page and is
|
126
|
+
# we were scanning a random piece of content which was not an Ogg page
|
127
|
+
return unless checksum == calculate_checksum(entire_page)
|
128
|
+
|
129
|
+
# ...and only having gone through all these motions - return the granule position.
|
130
|
+
granule_position
|
131
|
+
end
|
132
|
+
|
133
|
+
# Calculate the CRC using the 0x04C11DB7 polynomial. We cannot use Zlib since
|
134
|
+
# it generates different checksums. Copied from https://github.com/anibali/ruby-ogg
|
135
|
+
def calculate_checksum(data)
|
136
|
+
crc_reg = 0
|
137
|
+
data.each_byte.with_index do |byte, i|
|
138
|
+
# The checksum is calculated over _the entire page_ but with the
|
139
|
+
# placeholder for the checksum - the 4 bytes - zeroed out. The checksum
|
140
|
+
# is then substituted _into_ the page at that offset. So when we go
|
141
|
+
# over bytes at these offsets we will substitute them with 0s
|
142
|
+
b = (22..25).cover?(i) ? 0 : byte
|
143
|
+
crc_reg = (crc_reg << 8) ^ CRC_LOOKUP[((crc_reg >> 24) & 0xff) ^ b]
|
144
|
+
crc_reg = crc_reg % 2**32
|
145
|
+
end
|
146
|
+
|
147
|
+
crc_reg
|
148
|
+
end
|
149
|
+
|
150
|
+
CRC_LOOKUP = [
|
151
|
+
0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9,
|
152
|
+
0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005,
|
153
|
+
0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61,
|
154
|
+
0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd,
|
155
|
+
0x4c11db70, 0x48d0c6c7, 0x4593e01e, 0x4152fda9,
|
156
|
+
0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75,
|
157
|
+
0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011,
|
158
|
+
0x791d4014, 0x7ddc5da3, 0x709f7b7a, 0x745e66cd,
|
159
|
+
0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039,
|
160
|
+
0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5,
|
161
|
+
0xbe2b5b58, 0xbaea46ef, 0xb7a96036, 0xb3687d81,
|
162
|
+
0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d,
|
163
|
+
0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49,
|
164
|
+
0xc7361b4c, 0xc3f706fb, 0xceb42022, 0xca753d95,
|
165
|
+
0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1,
|
166
|
+
0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d,
|
167
|
+
0x34867077, 0x30476dc0, 0x3d044b19, 0x39c556ae,
|
168
|
+
0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072,
|
169
|
+
0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16,
|
170
|
+
0x018aeb13, 0x054bf6a4, 0x0808d07d, 0x0cc9cdca,
|
171
|
+
0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde,
|
172
|
+
0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02,
|
173
|
+
0x5e9f46bf, 0x5a5e5b08, 0x571d7dd1, 0x53dc6066,
|
174
|
+
0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba,
|
175
|
+
0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e,
|
176
|
+
0xbfa1b04b, 0xbb60adfc, 0xb6238b25, 0xb2e29692,
|
177
|
+
0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6,
|
178
|
+
0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a,
|
179
|
+
0xe0b41de7, 0xe4750050, 0xe9362689, 0xedf73b3e,
|
180
|
+
0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2,
|
181
|
+
0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686,
|
182
|
+
0xd5b88683, 0xd1799b34, 0xdc3abded, 0xd8fba05a,
|
183
|
+
0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637,
|
184
|
+
0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb,
|
185
|
+
0x4f040d56, 0x4bc510e1, 0x46863638, 0x42472b8f,
|
186
|
+
0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53,
|
187
|
+
0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47,
|
188
|
+
0x36194d42, 0x32d850f5, 0x3f9b762c, 0x3b5a6b9b,
|
189
|
+
0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff,
|
190
|
+
0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623,
|
191
|
+
0xf12f560e, 0xf5ee4bb9, 0xf8ad6d60, 0xfc6c70d7,
|
192
|
+
0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b,
|
193
|
+
0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f,
|
194
|
+
0xc423cd6a, 0xc0e2d0dd, 0xcda1f604, 0xc960ebb3,
|
195
|
+
0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7,
|
196
|
+
0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b,
|
197
|
+
0x9b3660c6, 0x9ff77d71, 0x92b45ba8, 0x9675461f,
|
198
|
+
0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3,
|
199
|
+
0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640,
|
200
|
+
0x4e8ee645, 0x4a4ffbf2, 0x470cdd2b, 0x43cdc09c,
|
201
|
+
0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8,
|
202
|
+
0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24,
|
203
|
+
0x119b4be9, 0x155a565e, 0x18197087, 0x1cd86d30,
|
204
|
+
0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec,
|
205
|
+
0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088,
|
206
|
+
0x2497d08d, 0x2056cd3a, 0x2d15ebe3, 0x29d4f654,
|
207
|
+
0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0,
|
208
|
+
0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c,
|
209
|
+
0xe3a1cbc1, 0xe760d676, 0xea23f0af, 0xeee2ed18,
|
210
|
+
0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4,
|
211
|
+
0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0,
|
212
|
+
0x9abc8bd5, 0x9e7d9662, 0x933eb0bb, 0x97ffad0c,
|
213
|
+
0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668,
|
214
|
+
0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4
|
215
|
+
].freeze
|
216
|
+
|
217
|
+
FormatParser.register_parser self, natures: :audio, formats: :ogg
|
218
|
+
end
|
data/lib/parsers/pdf_parser.rb
CHANGED
@@ -9,67 +9,12 @@ class FormatParser::PDFParser
|
|
9
9
|
#
|
10
10
|
PDF_MARKER = /%PDF-1\.[0-8]{1}/
|
11
11
|
|
12
|
-
# Page counts have different markers depending on
|
13
|
-
# the PDF type. There is not a single common way of solving
|
14
|
-
# this. The only way of solving this correctly is by adding
|
15
|
-
# different types of PDF's in the specs.
|
16
|
-
#
|
17
|
-
COUNT_MARKERS = ['Count ']
|
18
|
-
EOF_MARKER = '%EOF'
|
19
|
-
|
20
12
|
def call(io)
|
21
13
|
io = FormatParser::IOConstraint.new(io)
|
22
14
|
|
23
15
|
return unless safe_read(io, 9) =~ PDF_MARKER
|
24
16
|
|
25
|
-
|
26
|
-
|
27
|
-
FormatParser::Document.new(
|
28
|
-
format: :pdf,
|
29
|
-
page_count: attributes[:page_count]
|
30
|
-
)
|
31
|
-
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
# Read ahead bytes until one of % or / is reached.
|
36
|
-
# A header in a PDF always starts with a /
|
37
|
-
# The % is to detect the EOF
|
38
|
-
#
|
39
|
-
def scan_for_attributes(io)
|
40
|
-
result = {}
|
41
|
-
|
42
|
-
while read = safe_read(io, 1)
|
43
|
-
case read
|
44
|
-
when '%'
|
45
|
-
break if safe_read(io, EOF_MARKER.size) == EOF_MARKER
|
46
|
-
when '/'
|
47
|
-
find_page_count(io, result)
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
result
|
52
|
-
end
|
53
|
-
|
54
|
-
def find_page_count(io, result)
|
55
|
-
COUNT_MARKERS.each do |marker|
|
56
|
-
if safe_read(io, marker.size) == marker
|
57
|
-
result[:page_count] = read_numbers(io)
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
# Read ahead bytes until no more numbers are found
|
63
|
-
# This assumes that the position of io starts at a
|
64
|
-
# number
|
65
|
-
def read_numbers(io)
|
66
|
-
numbers = ''
|
67
|
-
|
68
|
-
while c = safe_read(io, 1)
|
69
|
-
c =~ /\d+/ ? numbers << c : break
|
70
|
-
end
|
71
|
-
|
72
|
-
numbers.to_i
|
17
|
+
FormatParser::Document.new(format: :pdf)
|
73
18
|
end
|
74
19
|
|
75
20
|
FormatParser.register_parser self, natures: :document, formats: :pdf
|
data/lib/read_limiter.rb
CHANGED
@@ -77,9 +77,9 @@ class FormatParser::ReadLimiter
|
|
77
77
|
# `format_parser.TIFF.read_limiter.num_seeks` and so forth
|
78
78
|
# @return void
|
79
79
|
def send_metrics(prefix)
|
80
|
-
|
81
|
-
|
82
|
-
|
80
|
+
Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_seeks' % prefix, @seeks)
|
81
|
+
Measurometer.add_distribution_value('format_parser.%s.read_limiter.num_reads' % prefix, @reads)
|
82
|
+
Measurometer.add_distribution_value('format_parser.%s.read_limiter.read_bytes' % prefix, @bytes)
|
83
83
|
end
|
84
84
|
|
85
85
|
# Resets all the recorded call counters so that the object can be reused for the next parser,
|
data/lib/remote_io.rb
CHANGED
@@ -60,7 +60,7 @@ class FormatParser::RemoteIO
|
|
60
60
|
# @return [String] the read bytes
|
61
61
|
def read(n_bytes)
|
62
62
|
http_range = (@pos..(@pos + n_bytes - 1))
|
63
|
-
maybe_size, maybe_body = request_range(http_range)
|
63
|
+
maybe_size, maybe_body = Measurometer.instrument('format_parser.RemoteIO.read') { request_range(http_range) }
|
64
64
|
if maybe_size && maybe_body
|
65
65
|
@remote_size = maybe_size
|
66
66
|
@pos += maybe_body.bytesize
|
@@ -103,10 +103,10 @@ class FormatParser::RemoteIO
|
|
103
103
|
# cannot hint size with this response - at lease not when working with S3
|
104
104
|
return
|
105
105
|
when 500..599
|
106
|
-
|
106
|
+
Measurometer.increment_counter('format_parser.RemoteIO.upstream50x_errors', 1)
|
107
107
|
raise IntermittentFailure.new(response.status, "Server at #{@uri} replied with a #{response.status} and we might want to retry")
|
108
108
|
else
|
109
|
-
|
109
|
+
Measurometer.increment_counter('format_parser.RemoteIO.invalid_request_errors', 1)
|
110
110
|
raise InvalidRequest.new(response.status, "Server at #{@uri} replied with a #{response.status} and refused our request")
|
111
111
|
end
|
112
112
|
end
|
data/spec/format_parser_spec.rb
CHANGED
@@ -5,6 +5,10 @@ describe FormatParser do
|
|
5
5
|
expect(FormatParser::VERSION).to be_kind_of(String)
|
6
6
|
end
|
7
7
|
|
8
|
+
it 'exposes the Measurometer constant' do
|
9
|
+
expect(FormatParser::Measurometer).to be_kind_of(Module)
|
10
|
+
end
|
11
|
+
|
8
12
|
describe '.parse' do
|
9
13
|
it 'returns nil when trying to parse an empty IO' do
|
10
14
|
d = StringIO.new('')
|
@@ -57,7 +57,7 @@ describe FormatParser::MP3Parser do
|
|
57
57
|
expect(parsed.format).to eq(:mp3)
|
58
58
|
expect(parsed.num_audio_channels).to eq(2)
|
59
59
|
expect(parsed.audio_sample_rate_hz).to eq(44100)
|
60
|
-
expect(parsed.media_duration_seconds).to be_within(0.1).of(
|
60
|
+
expect(parsed.media_duration_seconds).to be_within(0.1).of(1098.03)
|
61
61
|
|
62
62
|
expect(parsed.intrinsics).not_to be_nil
|
63
63
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FormatParser::OggParser do
|
4
|
+
it 'parses an ogg file' do
|
5
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/vorbis.ogg', 'rb'))
|
6
|
+
|
7
|
+
expect(parse_result.nature).to eq(:audio)
|
8
|
+
expect(parse_result.format).to eq(:ogg)
|
9
|
+
expect(parse_result.num_audio_channels).to eq(1)
|
10
|
+
expect(parse_result.audio_sample_rate_hz).to eq(16000)
|
11
|
+
expect(parse_result.media_duration_seconds).to be_within(0.01).of(2973.95)
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'skips a file if it contains more than MAX_POSSIBLE_OGG_PAGE_SIZE bytes of garbage at the end' do
|
15
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/with_garbage_at_the_end.ogg', 'rb'))
|
16
|
+
expect(parse_result).to be_nil
|
17
|
+
end
|
18
|
+
|
19
|
+
it "correctly parses an ogg file when a magic string occurs in the page's body" do
|
20
|
+
parse_result = subject.call(File.open(__dir__ + '/../fixtures/Ogg/with_confusing_magic_string.ogg', 'rb'))
|
21
|
+
|
22
|
+
expect(parse_result.nature).to eq(:audio)
|
23
|
+
expect(parse_result.format).to eq(:ogg)
|
24
|
+
expect(parse_result.num_audio_channels).to eq(1)
|
25
|
+
expect(parse_result.audio_sample_rate_hz).to eq(8000)
|
26
|
+
expect(parse_result.media_duration_seconds).to be_within(0.01).of(0.45)
|
27
|
+
end
|
28
|
+
end
|
@@ -18,10 +18,6 @@ describe FormatParser::PDFParser do
|
|
18
18
|
expect(parsed_pdf.nature).to eq(:document)
|
19
19
|
expect(parsed_pdf.format).to eq(:pdf)
|
20
20
|
end
|
21
|
-
|
22
|
-
it 'has a correct page count' do
|
23
|
-
expect(parsed_pdf.page_count).to eq(hash.fetch(:page_count))
|
24
|
-
end
|
25
21
|
end
|
26
22
|
|
27
23
|
describe 'a PDF file with a missing version header' do
|
@@ -44,25 +40,9 @@ describe FormatParser::PDFParser do
|
|
44
40
|
pending 'does not parse succesfully'
|
45
41
|
end
|
46
42
|
|
47
|
-
describe 'a PDF file with a missing COUNT_HEADER' do
|
48
|
-
let(:pdf_file) { 'missing_page_count.pdf' }
|
49
|
-
|
50
|
-
it 'does not return a page count' do
|
51
|
-
expect(parsed_pdf.page_count).to eq(nil)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
|
55
43
|
describe 'parses a PDF file' do
|
56
44
|
describe 'a single page file' do
|
57
|
-
include_examples :behave_like_pdf, file: '1_page.pdf'
|
58
|
-
end
|
59
|
-
|
60
|
-
describe 'a multi page pdf file' do
|
61
|
-
include_examples :behave_like_pdf, file: '2_pages.pdf', page_count: 2
|
62
|
-
end
|
63
|
-
|
64
|
-
describe 'a multi page pdf file with content' do
|
65
|
-
include_examples :behave_like_pdf, file: '10_pages.pdf', page_count: 10
|
45
|
+
include_examples :behave_like_pdf, file: '1_page.pdf'
|
66
46
|
end
|
67
47
|
end
|
68
48
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: format_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Noah Berman
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-
|
12
|
+
date: 2018-06-18 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ks
|
@@ -46,6 +46,9 @@ dependencies:
|
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: '0.10'
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: 0.10.1
|
49
52
|
type: :runtime
|
50
53
|
prerelease: false
|
51
54
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -53,6 +56,9 @@ dependencies:
|
|
53
56
|
- - "~>"
|
54
57
|
- !ruby/object:Gem::Version
|
55
58
|
version: '0.10'
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.10.1
|
56
62
|
- !ruby/object:Gem::Dependency
|
57
63
|
name: faraday
|
58
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,6 +73,20 @@ dependencies:
|
|
67
73
|
- - "~>"
|
68
74
|
- !ruby/object:Gem::Version
|
69
75
|
version: '0.13'
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: measurometer
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1'
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1'
|
70
90
|
- !ruby/object:Gem::Dependency
|
71
91
|
name: rspec
|
72
92
|
requirement: !ruby/object:Gem::Requirement
|
@@ -186,7 +206,6 @@ files:
|
|
186
206
|
- lib/image.rb
|
187
207
|
- lib/io_constraint.rb
|
188
208
|
- lib/io_utils.rb
|
189
|
-
- lib/measurometer.rb
|
190
209
|
- lib/parsers/aiff_parser.rb
|
191
210
|
- lib/parsers/bmp_parser.rb
|
192
211
|
- lib/parsers/cr2_parser.rb
|
@@ -201,6 +220,7 @@ files:
|
|
201
220
|
- lib/parsers/moov_parser/decoder.rb
|
202
221
|
- lib/parsers/mp3_parser.rb
|
203
222
|
- lib/parsers/mp3_parser/id3_extraction.rb
|
223
|
+
- lib/parsers/ogg_parser.rb
|
204
224
|
- lib/parsers/pdf_parser.rb
|
205
225
|
- lib/parsers/png_parser.rb
|
206
226
|
- lib/parsers/psd_parser.rb
|
@@ -220,7 +240,6 @@ files:
|
|
220
240
|
- spec/format_parser_inspect_spec.rb
|
221
241
|
- spec/format_parser_spec.rb
|
222
242
|
- spec/io_utils_spec.rb
|
223
|
-
- spec/measurometer_spec.rb
|
224
243
|
- spec/parsers/aiff_parser_spec.rb
|
225
244
|
- spec/parsers/bmp_parser_spec.rb
|
226
245
|
- spec/parsers/cr2_parser_spec.rb
|
@@ -232,6 +251,7 @@ files:
|
|
232
251
|
- spec/parsers/jpeg_parser_spec.rb
|
233
252
|
- spec/parsers/moov_parser_spec.rb
|
234
253
|
- spec/parsers/mp3_parser_spec.rb
|
254
|
+
- spec/parsers/ogg_parser_spec.rb
|
235
255
|
- spec/parsers/pdf_parser_spec.rb
|
236
256
|
- spec/parsers/png_parser_spec.rb
|
237
257
|
- spec/parsers/psd_parser_spec.rb
|
data/lib/measurometer.rb
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
class FormatParser::Measurometer
|
2
|
-
class << self
|
3
|
-
# Permits adding instrumentation drivers. Measurometer is 1-1 API
|
4
|
-
# compatible with Appsignal, which we use a lot. So to magically
|
5
|
-
# obtain all Appsignal instrumentation, add the Appsignal module
|
6
|
-
# as a driver.
|
7
|
-
#
|
8
|
-
# Measurometer.drivers << Appsignal
|
9
|
-
#
|
10
|
-
# A driver must be reentrant and thread-safe - it should be possible
|
11
|
-
# to have multiple `instrument` calls open from different threads at the
|
12
|
-
# same time.
|
13
|
-
# The driver must support the same interface as the Measurometer class
|
14
|
-
# itself, minus the `drivers` and `instrument_instance_method` methods.
|
15
|
-
#
|
16
|
-
# @return Array
|
17
|
-
def drivers
|
18
|
-
@drivers ||= []
|
19
|
-
@drivers
|
20
|
-
end
|
21
|
-
|
22
|
-
# Runs a given block within a cascade of `instrument` blocks of all the
|
23
|
-
# added drivers.
|
24
|
-
#
|
25
|
-
# Measurometer.instrument('do_foo') { compute! }
|
26
|
-
#
|
27
|
-
# unfolds to
|
28
|
-
# Appsignal.instrument('do_foo') do
|
29
|
-
# Statsd.timing('do_foo') do
|
30
|
-
# compute!
|
31
|
-
# end
|
32
|
-
# end
|
33
|
-
#
|
34
|
-
# A driver must be reentrant and thread-safe - it should be possible
|
35
|
-
# to have multiple `instrument` calls open from different threads at the
|
36
|
-
# same time.
|
37
|
-
# The driver must support the same interface as the Measurometer class
|
38
|
-
# itself, minus the `drivers` and `instrument_instance_method` methods.
|
39
|
-
#
|
40
|
-
# @param block_name[String] under which path to push the metric
|
41
|
-
# @param blk[#call] the block to instrument
|
42
|
-
# @return [Object] the return value of &blk
|
43
|
-
def instrument(block_name, &blk)
|
44
|
-
return yield unless @drivers && @drivers.any? # The block wrapping business is not free
|
45
|
-
@drivers.inject(blk) { |outer_block, driver|
|
46
|
-
-> {
|
47
|
-
driver.instrument(block_name, &outer_block)
|
48
|
-
}
|
49
|
-
}.call
|
50
|
-
end
|
51
|
-
|
52
|
-
# Adds a distribution value (sample) under a given path
|
53
|
-
#
|
54
|
-
# @param value_path[String] under which path to push the metric
|
55
|
-
# @param value[Numeric] distribution value
|
56
|
-
# @return nil
|
57
|
-
def add_distribution_value(value_path, value)
|
58
|
-
(@drivers || []).each { |d| d.add_distribution_value(value_path, value) }
|
59
|
-
nil
|
60
|
-
end
|
61
|
-
|
62
|
-
# Increment a named counter under a given path
|
63
|
-
#
|
64
|
-
# @param counter_path[String] under which path to push the metric
|
65
|
-
# @param by[Integer] the counter increment to apply
|
66
|
-
# @return nil
|
67
|
-
def increment_counter(counter_path, by)
|
68
|
-
(@drivers || []).each { |d| d.increment_counter(counter_path, by) }
|
69
|
-
nil
|
70
|
-
end
|
71
|
-
|
72
|
-
# Wrap an anonymous module around an instance method in the given class to have
|
73
|
-
# it instrumented automatically. The name of the measurement will be interpolated as:
|
74
|
-
#
|
75
|
-
# "#{prefix}.#{rightmost_class_constant_name}.#{instance_method_name}"
|
76
|
-
#
|
77
|
-
# @param target_class[Class] the class to instrument
|
78
|
-
# @param instance_method_name_to_instrument[Symbol] the method name to instrument
|
79
|
-
# @param path_prefix[String] under which path to push the instrumented metric
|
80
|
-
# @return void
|
81
|
-
def instrument_instance_method(target_class, instance_method_name_to_instrument, path_prefix)
|
82
|
-
short_class_name = target_class.to_s.split('::').last
|
83
|
-
instrumentation_name = [path_prefix, short_class_name, instance_method_name_to_instrument].join('.')
|
84
|
-
instrumenter_module = Module.new do
|
85
|
-
define_method(instance_method_name_to_instrument) do |*any|
|
86
|
-
::FormatParser::Measurometer.instrument(instrumentation_name) { super(*any) }
|
87
|
-
end
|
88
|
-
end
|
89
|
-
target_class.prepend(instrumenter_module)
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Instrument things interesting in the global sense
|
94
|
-
instrument_instance_method(FormatParser::RemoteIO, :read, 'format_parser')
|
95
|
-
instrument_instance_method(Care::Cache, :read_page, 'format_parser')
|
96
|
-
|
97
|
-
# Instrument more specific things on a per-parser basis
|
98
|
-
instrument_instance_method(FormatParser::EXIFParser, :scan_image_tiff, 'format_parser')
|
99
|
-
instrument_instance_method(FormatParser::MOOVParser::Decoder, :extract_atom_stream, 'format_parser.parsers.MOOVParser')
|
100
|
-
end
|
data/spec/measurometer_spec.rb
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
describe FormatParser::Measurometer do
|
4
|
-
RSpec::Matchers.define :include_counter_or_measurement_named do |named|
|
5
|
-
match do |actual|
|
6
|
-
actual.any? do |e|
|
7
|
-
e[0] == named && e[1] > 0
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
it 'instruments a full cycle FormatParser.parse' do
|
13
|
-
driver_class = Class.new do
|
14
|
-
attr_accessor :timings, :counters, :distributions
|
15
|
-
def instrument(block_name)
|
16
|
-
s = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
17
|
-
yield.tap do
|
18
|
-
delta = Process.clock_gettime(Process::CLOCK_MONOTONIC) - s
|
19
|
-
@timings ||= []
|
20
|
-
@timings << [block_name, delta * 1000]
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def add_distribution_value(value_path, value)
|
25
|
-
@distributions ||= []
|
26
|
-
@distributions << [value_path, value]
|
27
|
-
end
|
28
|
-
|
29
|
-
def increment_counter(value_path, value)
|
30
|
-
@counters ||= []
|
31
|
-
@counters << [value_path, value]
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
instrumenter = driver_class.new
|
36
|
-
described_class.drivers << instrumenter
|
37
|
-
|
38
|
-
FormatParser.parse(File.open(fixtures_dir + 'JPEG/keynote_recognized_as_jpeg.key', 'rb'), results: :all)
|
39
|
-
|
40
|
-
described_class.drivers.delete(instrumenter)
|
41
|
-
expect(described_class.drivers).not_to include(instrumenter)
|
42
|
-
|
43
|
-
expect(instrumenter.counters).to include_counter_or_measurement_named('format_parser.detected_formats.zip')
|
44
|
-
expect(instrumenter.counters).to include_counter_or_measurement_named('format_parser.parser.Care.page_reads_from_upsteam')
|
45
|
-
expect(instrumenter.distributions).to include_counter_or_measurement_named('format_parser.ZIPParser.read_limiter.read_bytes')
|
46
|
-
expect(instrumenter.timings).to include_counter_or_measurement_named('format_parser.Cache.read_page')
|
47
|
-
end
|
48
|
-
end
|