pdf-reader 2.5.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +42 -0
- data/README.md +16 -1
- data/Rakefile +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +90 -46
- data/lib/pdf/reader/cid_widths.rb +1 -0
- data/lib/pdf/reader/cmap.rb +65 -50
- data/lib/pdf/reader/encoding.rb +3 -2
- data/lib/pdf/reader/error.rb +19 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +11 -9
- data/lib/pdf/reader/filter/flate.rb +4 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +2 -1
- data/lib/pdf/reader/font.rb +72 -16
- data/lib/pdf/reader/font_descriptor.rb +19 -17
- data/lib/pdf/reader/form_xobject.rb +15 -5
- data/lib/pdf/reader/glyph_hash.rb +16 -9
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +4 -2
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +252 -44
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
- data/lib/pdf/reader/page.rb +99 -19
- data/lib/pdf/reader/page_layout.rb +36 -37
- data/lib/pdf/reader/page_state.rb +12 -11
- data/lib/pdf/reader/page_text_receiver.rb +57 -10
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +23 -12
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +2 -1
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +14 -6
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +27 -4
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +46 -15
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +1978 -0
- metadata +21 -10
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,7 +1,9 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'pdf/reader/overlapping_runs_filter'
|
6
|
+
require 'pdf/reader/zero_width_runs_filter'
|
5
7
|
|
6
8
|
class PDF::Reader
|
7
9
|
|
@@ -15,14 +17,15 @@ class PDF::Reader
|
|
15
17
|
DEFAULT_FONT_SIZE = 12
|
16
18
|
|
17
19
|
def initialize(runs, mediabox)
|
18
|
-
|
20
|
+
# mediabox is a 4-element array for now, but it'd be nice to switch to a
|
21
|
+
# PDF::Reader::Rectangle at some point
|
22
|
+
PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")
|
19
23
|
|
20
|
-
@
|
24
|
+
@mediabox = process_mediabox(mediabox)
|
25
|
+
@runs = runs
|
21
26
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
22
27
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
23
|
-
@
|
24
|
-
@page_width = (mediabox[2] - mediabox[0]).abs
|
25
|
-
@page_height = (mediabox[3] - mediabox[1]).abs
|
28
|
+
@median_glyph_width = median(@runs.map(&:mean_character_width)) || 0
|
26
29
|
@x_offset = @runs.map(&:x).sort.first || 0
|
27
30
|
lowest_y = @runs.map(&:y).sort.first || 0
|
28
31
|
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
@@ -45,6 +48,14 @@ class PDF::Reader
|
|
45
48
|
|
46
49
|
private
|
47
50
|
|
51
|
+
def page_width
|
52
|
+
@mediabox.width
|
53
|
+
end
|
54
|
+
|
55
|
+
def page_height
|
56
|
+
@mediabox.height
|
57
|
+
end
|
58
|
+
|
48
59
|
# given an array of strings, return a new array with empty rows from the
|
49
60
|
# beginning and end removed.
|
50
61
|
#
|
@@ -63,19 +74,19 @@ class PDF::Reader
|
|
63
74
|
end
|
64
75
|
|
65
76
|
def row_count
|
66
|
-
@row_count ||= (
|
77
|
+
@row_count ||= (page_height / @mean_font_size).floor
|
67
78
|
end
|
68
79
|
|
69
80
|
def col_count
|
70
|
-
@col_count ||= ((
|
81
|
+
@col_count ||= ((page_width / @median_glyph_width) * 1.05).floor
|
71
82
|
end
|
72
83
|
|
73
84
|
def row_multiplier
|
74
|
-
@row_multiplier ||=
|
85
|
+
@row_multiplier ||= page_height.to_f / row_count.to_f
|
75
86
|
end
|
76
87
|
|
77
88
|
def col_multiplier
|
78
|
-
@col_multiplier ||=
|
89
|
+
@col_multiplier ||= page_width.to_f / col_count.to_f
|
79
90
|
end
|
80
91
|
|
81
92
|
def mean(collection)
|
@@ -86,40 +97,28 @@ class PDF::Reader
|
|
86
97
|
end
|
87
98
|
end
|
88
99
|
|
89
|
-
def
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
100
|
+
def median(collection)
|
101
|
+
if collection.size == 0
|
102
|
+
0
|
103
|
+
else
|
104
|
+
collection.sort[(collection.size * 0.5).floor]
|
105
|
+
end
|
95
106
|
end
|
96
107
|
|
97
|
-
|
98
|
-
|
99
|
-
def merge_runs(runs)
|
100
|
-
runs.group_by { |char|
|
101
|
-
char.y.to_i
|
102
|
-
}.map { |y, chars|
|
103
|
-
group_chars_into_runs(chars.sort)
|
104
|
-
}.flatten.sort
|
108
|
+
def local_string_insert(haystack, needle, index)
|
109
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
105
110
|
end
|
106
111
|
|
107
|
-
def
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
runs << head
|
116
|
-
end
|
112
|
+
def process_mediabox(mediabox)
|
113
|
+
if mediabox.is_a?(Array)
|
114
|
+
msg = "Passing the mediabox to PageLayout as an Array is deprecated," +
|
115
|
+
" please use a Rectangle instead"
|
116
|
+
$stderr.puts msg
|
117
|
+
PDF::Reader::Rectangle.from_array(mediabox)
|
118
|
+
else
|
119
|
+
mediabox
|
117
120
|
end
|
118
|
-
runs
|
119
121
|
end
|
120
122
|
|
121
|
-
def local_string_insert(haystack, needle, index)
|
122
|
-
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
123
|
-
end
|
124
123
|
end
|
125
124
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'pdf/reader/transformation_matrix'
|
@@ -312,7 +313,7 @@ class PDF::Reader
|
|
312
313
|
# may need to be added
|
313
314
|
#
|
314
315
|
def process_glyph_displacement(w0, tj, word_boundary)
|
315
|
-
fs =
|
316
|
+
fs = state[:text_font_size]
|
316
317
|
tc = state[:char_spacing]
|
317
318
|
if word_boundary
|
318
319
|
tw = state[:word_spacing]
|
@@ -330,16 +331,16 @@ class PDF::Reader
|
|
330
331
|
# apply horizontal scaling to spacing values but not font size
|
331
332
|
tx = ((w0 * fs) + tc + tw) * th
|
332
333
|
end
|
333
|
-
|
334
|
-
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
335
|
-
# ctm[0] here, but this gets my tests green and I'm out of
|
336
|
-
# ideas for now
|
337
334
|
# TODO: support ty > 0
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
335
|
+
ty = 0
|
336
|
+
temp = TransformationMatrix.new(1, 0,
|
337
|
+
0, 1,
|
338
|
+
tx, ty)
|
339
|
+
@text_matrix = temp.multiply!(
|
340
|
+
@text_matrix.a, @text_matrix.b,
|
341
|
+
@text_matrix.c, @text_matrix.d,
|
342
|
+
@text_matrix.e, @text_matrix.f
|
343
|
+
)
|
343
344
|
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
344
345
|
end
|
345
346
|
|
@@ -383,7 +384,7 @@ class PDF::Reader
|
|
383
384
|
#
|
384
385
|
def build_fonts(raw_fonts)
|
385
386
|
wrapped_fonts = raw_fonts.map { |label, font|
|
386
|
-
[label, PDF::Reader::Font.new(@objects, @objects.
|
387
|
+
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
|
387
388
|
}
|
388
389
|
|
389
390
|
::Hash[wrapped_fonts]
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'forwardable'
|
@@ -44,14 +45,34 @@ module PDF
|
|
44
45
|
@page = page
|
45
46
|
@content = []
|
46
47
|
@characters = []
|
47
|
-
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
48
|
-
device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
|
49
|
-
device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
|
50
|
-
@device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
|
51
48
|
end
|
52
49
|
|
50
|
+
def runs(opts = {})
|
51
|
+
runs = @characters
|
52
|
+
|
53
|
+
if rect = opts.fetch(:rect, @page.rectangles[:CropBox])
|
54
|
+
runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect)
|
55
|
+
end
|
56
|
+
|
57
|
+
if opts.fetch(:skip_zero_width, true)
|
58
|
+
runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs)
|
59
|
+
end
|
60
|
+
|
61
|
+
if opts.fetch(:skip_overlapping, true)
|
62
|
+
runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
|
63
|
+
end
|
64
|
+
|
65
|
+
if opts.fetch(:merge, true)
|
66
|
+
runs = merge_runs(runs)
|
67
|
+
end
|
68
|
+
|
69
|
+
runs
|
70
|
+
end
|
71
|
+
|
72
|
+
# deprecated
|
53
73
|
def content
|
54
|
-
|
74
|
+
mediabox = @page.rectangles[:MediaBox]
|
75
|
+
PageLayout.new(runs, mediabox).to_s
|
55
76
|
end
|
56
77
|
|
57
78
|
#####################################################
|
@@ -66,8 +87,10 @@ module PDF
|
|
66
87
|
params.each do |arg|
|
67
88
|
if arg.is_a?(String)
|
68
89
|
internal_show_text(arg)
|
69
|
-
|
90
|
+
elsif arg.is_a?(Numeric)
|
70
91
|
@state.process_glyph_displacement(0, arg, false)
|
92
|
+
else
|
93
|
+
# skip it
|
71
94
|
end
|
72
95
|
end
|
73
96
|
end
|
@@ -98,6 +121,7 @@ module PDF
|
|
98
121
|
private
|
99
122
|
|
100
123
|
def internal_show_text(string)
|
124
|
+
PDF::Reader::Error.validate_type_as_malformed(string, "string", String)
|
101
125
|
if @state.current_font.nil?
|
102
126
|
raise PDF::Reader::MalformedPDFError, "current font is invalid"
|
103
127
|
end
|
@@ -111,7 +135,7 @@ module PDF
|
|
111
135
|
|
112
136
|
# apply to glyph displacment for the current glyph so the next
|
113
137
|
# glyph will appear in the correct position
|
114
|
-
glyph_width = @state.current_font.
|
138
|
+
glyph_width = @state.current_font.glyph_width_in_text_space(glyph_code)
|
115
139
|
th = 1
|
116
140
|
scaled_glyph_width = glyph_width * @state.font_size * th
|
117
141
|
unless utf8_chars == SPACE
|
@@ -128,14 +152,37 @@ module PDF
|
|
128
152
|
y = tmp * -1
|
129
153
|
elsif @page.rotate == 180
|
130
154
|
y *= -1
|
155
|
+
x *= -1
|
131
156
|
elsif @page.rotate == 270
|
132
|
-
tmp =
|
133
|
-
|
134
|
-
|
157
|
+
tmp = y
|
158
|
+
y = x
|
159
|
+
x = tmp * -1
|
135
160
|
end
|
136
161
|
return x, y
|
137
162
|
end
|
138
163
|
|
164
|
+
# take a collection of TextRun objects and merge any that are in close
|
165
|
+
# proximity
|
166
|
+
def merge_runs(runs)
|
167
|
+
runs.group_by { |char|
|
168
|
+
char.y.to_i
|
169
|
+
}.map { |y, chars|
|
170
|
+
group_chars_into_runs(chars.sort)
|
171
|
+
}.flatten.sort
|
172
|
+
end
|
173
|
+
|
174
|
+
def group_chars_into_runs(chars)
|
175
|
+
chars.each_with_object([]) do |char, runs|
|
176
|
+
if runs.empty?
|
177
|
+
runs << char
|
178
|
+
elsif runs.last.mergable?(char)
|
179
|
+
runs[-1] = runs.last + char
|
180
|
+
else
|
181
|
+
runs << char
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
139
186
|
end
|
140
187
|
end
|
141
188
|
end
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -79,8 +80,8 @@ class PDF::Reader
|
|
79
80
|
token
|
80
81
|
elsif operators.has_key? token
|
81
82
|
Token.new(token)
|
82
|
-
elsif token.
|
83
|
-
token
|
83
|
+
elsif token.frozen?
|
84
|
+
token
|
84
85
|
elsif token =~ /\d*\.\d/
|
85
86
|
token.to_f
|
86
87
|
else
|
@@ -102,7 +103,7 @@ class PDF::Reader
|
|
102
103
|
obj = parse_token
|
103
104
|
post_obj = parse_token
|
104
105
|
|
105
|
-
if post_obj == "stream"
|
106
|
+
if obj.is_a?(Hash) && post_obj == "stream"
|
106
107
|
stream(obj)
|
107
108
|
else
|
108
109
|
obj
|
@@ -120,7 +121,7 @@ class PDF::Reader
|
|
120
121
|
key = parse_token
|
121
122
|
break if key.kind_of?(Token) and key == ">>"
|
122
123
|
raise MalformedPDFError, "unterminated dict" if @buffer.empty?
|
123
|
-
|
124
|
+
PDF::Reader::Error.validate_type_as_malformed(key, "Dictionary key", Symbol)
|
124
125
|
|
125
126
|
value = parse_token
|
126
127
|
value.kind_of?(Token) and Error.str_assert_not(value, ">>")
|
@@ -166,7 +167,9 @@ class PDF::Reader
|
|
166
167
|
|
167
168
|
# add a missing digit if required, as required by the spec
|
168
169
|
str << "0" unless str.size % 2 == 0
|
169
|
-
str.
|
170
|
+
str.chars.each_slice(2).map { |nibbles|
|
171
|
+
nibbles.join("").hex.chr
|
172
|
+
}.join.force_encoding("binary")
|
170
173
|
end
|
171
174
|
################################################################################
|
172
175
|
# Reads a PDF String from the buffer and converts it to a Ruby String
|
@@ -175,15 +178,18 @@ class PDF::Reader
|
|
175
178
|
return "".dup.force_encoding("binary") if str == ")"
|
176
179
|
Error.assert_equal(parse_token, ")")
|
177
180
|
|
178
|
-
str.gsub!(/\\([nrtbf()\\\n]
|
179
|
-
|
181
|
+
str.gsub!(/\\(\r\n|[nrtbf()\\\n\r]|([0-7]{1,3}))?|\r\n?/m) do |match|
|
182
|
+
if $2.nil? # not octal digits
|
183
|
+
MAPPING[match] || "".dup
|
184
|
+
else # must be octal digits
|
185
|
+
($2.oct & 0xff).chr # ignore high level overflow
|
186
|
+
end
|
180
187
|
end
|
181
188
|
str.force_encoding("binary")
|
182
189
|
end
|
183
190
|
|
184
191
|
MAPPING = {
|
185
192
|
"\r" => "\n",
|
186
|
-
"\n\r" => "\n",
|
187
193
|
"\r\n" => "\n",
|
188
194
|
"\\n" => "\n",
|
189
195
|
"\\r" => "\r",
|
@@ -194,20 +200,25 @@ class PDF::Reader
|
|
194
200
|
"\\)" => ")",
|
195
201
|
"\\\\" => "\\",
|
196
202
|
"\\\n" => "",
|
203
|
+
"\\\r" => "",
|
204
|
+
"\\\r\n" => "",
|
197
205
|
}
|
198
|
-
0.upto(9) { |n| MAPPING["\\00"+n.to_s] = ("00"+n.to_s).oct.chr }
|
199
|
-
0.upto(99) { |n| MAPPING["\\0"+n.to_s] = ("0"+n.to_s).oct.chr }
|
200
|
-
0.upto(377) { |n| MAPPING["\\"+n.to_s] = n.to_s.oct.chr }
|
201
206
|
|
202
207
|
################################################################################
|
203
208
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
204
209
|
def stream(dict)
|
205
210
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
206
211
|
if @objects
|
207
|
-
length = @objects.
|
212
|
+
length = @objects.deref_integer(dict[:Length])
|
213
|
+
if dict[:Filter]
|
214
|
+
dict[:Filter] = @objects.deref_name_or_array(dict[:Filter])
|
215
|
+
end
|
208
216
|
else
|
209
217
|
length = dict[:Length] || 0
|
210
218
|
end
|
219
|
+
|
220
|
+
PDF::Reader::Error.validate_type_as_malformed(length, "length", Numeric)
|
221
|
+
|
211
222
|
data = @buffer.read(length, :skip_eol => true)
|
212
223
|
|
213
224
|
Error.str_assert(parse_token, "endstream")
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs are all about positioning content on a page, so there's lots of need to
|
9
|
+
# work with a set of X,Y coordinates.
|
10
|
+
#
|
11
|
+
class Point
|
12
|
+
|
13
|
+
attr_reader :x, :y
|
14
|
+
|
15
|
+
def initialize(x, y)
|
16
|
+
@x, @y = x, y
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
other.respond_to?(:x) && other.respond_to?(:y) && x == other.x && y == other.y
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
require 'digest/md5'
|
6
|
+
require 'rc4'
|
7
|
+
|
8
|
+
class PDF::Reader
|
9
|
+
|
10
|
+
# Decrypts data using the RC4 algorithim defined in the PDF spec. Requires
|
11
|
+
# a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
|
12
|
+
#
|
13
|
+
class Rc4SecurityHandler
|
14
|
+
|
15
|
+
def initialize(key)
|
16
|
+
@encrypt_key = key
|
17
|
+
end
|
18
|
+
|
19
|
+
##7.6.2 General Encryption Algorithm
|
20
|
+
#
|
21
|
+
# Algorithm 1: Encryption of data using the RC4 algorithm
|
22
|
+
#
|
23
|
+
# version <=3 or (version == 4 and CFM == V2)
|
24
|
+
#
|
25
|
+
# buf - a string to decrypt
|
26
|
+
# ref - a PDF::Reader::Reference for the object to decrypt
|
27
|
+
#
|
28
|
+
def decrypt( buf, ref )
|
29
|
+
objKey = @encrypt_key.dup
|
30
|
+
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
31
|
+
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
32
|
+
length = objKey.length < 16 ? objKey.length : 16
|
33
|
+
rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
|
34
|
+
rc4.decrypt(buf)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs represent rectangles all over the place. They're 4 element arrays, like this:
|
9
|
+
#
|
10
|
+
# [A, B, C, D]
|
11
|
+
#
|
12
|
+
# Four element arrays are yucky to work with though, so here's a class that's better.
|
13
|
+
# Initialize it with the 4 elements, and get utility functions (width, height, etc)
|
14
|
+
# for free.
|
15
|
+
#
|
16
|
+
# By convention the first two elements are x1, y1, the co-ords for the bottom left corner
|
17
|
+
# of the rectangle. The third and fourth elements are x2, y2, the co-ords for the top left
|
18
|
+
# corner of the rectangle. It's valid for the alternative corners to be used though, so
|
19
|
+
# we don't assume which is which.
|
20
|
+
#
|
21
|
+
class Rectangle
|
22
|
+
|
23
|
+
attr_reader :bottom_left, :bottom_right, :top_left, :top_right
|
24
|
+
|
25
|
+
def initialize(x1, y1, x2, y2)
|
26
|
+
set_corners(x1, y1, x2, y2)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.from_array(arr)
|
30
|
+
if arr.size != 4
|
31
|
+
raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
|
32
|
+
end
|
33
|
+
|
34
|
+
PDF::Reader::Rectangle.new(
|
35
|
+
arr[0].to_f,
|
36
|
+
arr[1].to_f,
|
37
|
+
arr[2].to_f,
|
38
|
+
arr[3].to_f,
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def ==(other)
|
43
|
+
to_a == other.to_a
|
44
|
+
end
|
45
|
+
|
46
|
+
def height
|
47
|
+
top_right.y - bottom_right.y
|
48
|
+
end
|
49
|
+
|
50
|
+
def width
|
51
|
+
bottom_right.x - bottom_left.x
|
52
|
+
end
|
53
|
+
|
54
|
+
def contains?(point)
|
55
|
+
point.x >= bottom_left.x && point.x <= top_right.x &&
|
56
|
+
point.y >= bottom_left.y && point.y <= top_right.y
|
57
|
+
end
|
58
|
+
|
59
|
+
# A pdf-style 4-number array
|
60
|
+
def to_a
|
61
|
+
[
|
62
|
+
bottom_left.x,
|
63
|
+
bottom_left.y,
|
64
|
+
top_right.x,
|
65
|
+
top_right.y,
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
def apply_rotation(degrees)
|
70
|
+
return if degrees != 90 && degrees != 180 && degrees != 270
|
71
|
+
|
72
|
+
if degrees == 90
|
73
|
+
new_x1 = bottom_left.x
|
74
|
+
new_y1 = bottom_left.y - width
|
75
|
+
new_x2 = bottom_left.x + height
|
76
|
+
new_y2 = bottom_left.y
|
77
|
+
elsif degrees == 180
|
78
|
+
new_x1 = bottom_left.x - width
|
79
|
+
new_y1 = bottom_left.y - height
|
80
|
+
new_x2 = bottom_left.x
|
81
|
+
new_y2 = bottom_left.y
|
82
|
+
elsif degrees == 270
|
83
|
+
new_x1 = bottom_left.x - height
|
84
|
+
new_y1 = bottom_left.y
|
85
|
+
new_x2 = bottom_left.x
|
86
|
+
new_y2 = bottom_left.y + width
|
87
|
+
end
|
88
|
+
set_corners(new_x1 || 0, new_y1 || 0, new_x2 || 0, new_y2 || 0)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def set_corners(x1, y1, x2, y2)
|
94
|
+
@bottom_left = PDF::Reader::Point.new(
|
95
|
+
[x1, x2].min,
|
96
|
+
[y1, y2].min,
|
97
|
+
)
|
98
|
+
@bottom_right = PDF::Reader::Point.new(
|
99
|
+
[x1, x2].max,
|
100
|
+
[y1, y2].min,
|
101
|
+
)
|
102
|
+
@top_left = PDF::Reader::Point.new(
|
103
|
+
[x1, x2].min,
|
104
|
+
[y1, y2].max,
|
105
|
+
)
|
106
|
+
@top_right = PDF::Reader::Point.new(
|
107
|
+
[x1, x2].max,
|
108
|
+
[y1, y2].max,
|
109
|
+
)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
module PDF
|
@@ -6,7 +7,13 @@ module PDF
|
|
6
7
|
|
7
8
|
# mixin for common methods in Page and FormXobjects
|
8
9
|
#
|
9
|
-
|
10
|
+
class Resources
|
11
|
+
|
12
|
+
def initialize(objects, resources)
|
13
|
+
@objects = objects
|
14
|
+
@resources = resources
|
15
|
+
end
|
16
|
+
|
10
17
|
# Returns a Hash of color spaces that are available to this page
|
11
18
|
#
|
12
19
|
# NOTE: this method de-serialise objects from the underlying PDF
|
@@ -14,7 +21,7 @@ module PDF
|
|
14
21
|
# of calling it over and over.
|
15
22
|
#
|
16
23
|
def color_spaces
|
17
|
-
@objects.
|
24
|
+
@objects.deref_hash!(@resources[:ColorSpace]) || {}
|
18
25
|
end
|
19
26
|
|
20
27
|
# Returns a Hash of fonts that are available to this page
|
@@ -24,7 +31,7 @@ module PDF
|
|
24
31
|
# of calling it over and over.
|
25
32
|
#
|
26
33
|
def fonts
|
27
|
-
@objects.
|
34
|
+
@objects.deref_hash!(@resources[:Font]) || {}
|
28
35
|
end
|
29
36
|
|
30
37
|
# Returns a Hash of external graphic states that are available to this
|
@@ -35,7 +42,7 @@ module PDF
|
|
35
42
|
# of calling it over and over.
|
36
43
|
#
|
37
44
|
def graphic_states
|
38
|
-
@objects.
|
45
|
+
@objects.deref_hash!(@resources[:ExtGState]) || {}
|
39
46
|
end
|
40
47
|
|
41
48
|
# Returns a Hash of patterns that are available to this page
|
@@ -45,7 +52,7 @@ module PDF
|
|
45
52
|
# of calling it over and over.
|
46
53
|
#
|
47
54
|
def patterns
|
48
|
-
@objects.
|
55
|
+
@objects.deref_hash!(@resources[:Pattern]) || {}
|
49
56
|
end
|
50
57
|
|
51
58
|
# Returns an Array of procedure sets that are available to this page
|
@@ -55,7 +62,7 @@ module PDF
|
|
55
62
|
# of calling it over and over.
|
56
63
|
#
|
57
64
|
def procedure_sets
|
58
|
-
@objects.
|
65
|
+
@objects.deref_array!(@resources[:ProcSet]) || []
|
59
66
|
end
|
60
67
|
|
61
68
|
# Returns a Hash of properties sets that are available to this page
|
@@ -65,7 +72,7 @@ module PDF
|
|
65
72
|
# of calling it over and over.
|
66
73
|
#
|
67
74
|
def properties
|
68
|
-
@objects.
|
75
|
+
@objects.deref_hash!(@resources[:Properties]) || {}
|
69
76
|
end
|
70
77
|
|
71
78
|
# Returns a Hash of shadings that are available to this page
|
@@ -75,7 +82,7 @@ module PDF
|
|
75
82
|
# of calling it over and over.
|
76
83
|
#
|
77
84
|
def shadings
|
78
|
-
@objects.
|
85
|
+
@objects.deref_hash!(@resources[:Shading]) || {}
|
79
86
|
end
|
80
87
|
|
81
88
|
# Returns a Hash of XObjects that are available to this page
|
@@ -85,7 +92,7 @@ module PDF
|
|
85
92
|
# of calling it over and over.
|
86
93
|
#
|
87
94
|
def xobjects
|
88
|
-
@objects.
|
95
|
+
@objects.deref_hash!(@resources[:XObject]) || {}
|
89
96
|
end
|
90
97
|
|
91
98
|
end
|