pdf-reader 2.5.0 → 2.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +42 -0
- data/README.md +16 -1
- data/Rakefile +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +90 -46
- data/lib/pdf/reader/cid_widths.rb +1 -0
- data/lib/pdf/reader/cmap.rb +65 -50
- data/lib/pdf/reader/encoding.rb +3 -2
- data/lib/pdf/reader/error.rb +19 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +11 -9
- data/lib/pdf/reader/filter/flate.rb +4 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +2 -1
- data/lib/pdf/reader/font.rb +72 -16
- data/lib/pdf/reader/font_descriptor.rb +19 -17
- data/lib/pdf/reader/form_xobject.rb +15 -5
- data/lib/pdf/reader/glyph_hash.rb +16 -9
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +4 -2
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +252 -44
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
- data/lib/pdf/reader/page.rb +99 -19
- data/lib/pdf/reader/page_layout.rb +36 -37
- data/lib/pdf/reader/page_state.rb +12 -11
- data/lib/pdf/reader/page_text_receiver.rb +57 -10
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +23 -12
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +2 -1
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +14 -6
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +27 -4
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +46 -15
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +1978 -0
- metadata +21 -10
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,7 +1,9 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'pdf/reader/overlapping_runs_filter'
|
6
|
+
require 'pdf/reader/zero_width_runs_filter'
|
5
7
|
|
6
8
|
class PDF::Reader
|
7
9
|
|
@@ -15,14 +17,15 @@ class PDF::Reader
|
|
15
17
|
DEFAULT_FONT_SIZE = 12
|
16
18
|
|
17
19
|
def initialize(runs, mediabox)
|
18
|
-
|
20
|
+
# mediabox is a 4-element array for now, but it'd be nice to switch to a
|
21
|
+
# PDF::Reader::Rectangle at some point
|
22
|
+
PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")
|
19
23
|
|
20
|
-
@
|
24
|
+
@mediabox = process_mediabox(mediabox)
|
25
|
+
@runs = runs
|
21
26
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
22
27
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
23
|
-
@
|
24
|
-
@page_width = (mediabox[2] - mediabox[0]).abs
|
25
|
-
@page_height = (mediabox[3] - mediabox[1]).abs
|
28
|
+
@median_glyph_width = median(@runs.map(&:mean_character_width)) || 0
|
26
29
|
@x_offset = @runs.map(&:x).sort.first || 0
|
27
30
|
lowest_y = @runs.map(&:y).sort.first || 0
|
28
31
|
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
@@ -45,6 +48,14 @@ class PDF::Reader
|
|
45
48
|
|
46
49
|
private
|
47
50
|
|
51
|
+
def page_width
|
52
|
+
@mediabox.width
|
53
|
+
end
|
54
|
+
|
55
|
+
def page_height
|
56
|
+
@mediabox.height
|
57
|
+
end
|
58
|
+
|
48
59
|
# given an array of strings, return a new array with empty rows from the
|
49
60
|
# beginning and end removed.
|
50
61
|
#
|
@@ -63,19 +74,19 @@ class PDF::Reader
|
|
63
74
|
end
|
64
75
|
|
65
76
|
def row_count
|
66
|
-
@row_count ||= (
|
77
|
+
@row_count ||= (page_height / @mean_font_size).floor
|
67
78
|
end
|
68
79
|
|
69
80
|
def col_count
|
70
|
-
@col_count ||= ((
|
81
|
+
@col_count ||= ((page_width / @median_glyph_width) * 1.05).floor
|
71
82
|
end
|
72
83
|
|
73
84
|
def row_multiplier
|
74
|
-
@row_multiplier ||=
|
85
|
+
@row_multiplier ||= page_height.to_f / row_count.to_f
|
75
86
|
end
|
76
87
|
|
77
88
|
def col_multiplier
|
78
|
-
@col_multiplier ||=
|
89
|
+
@col_multiplier ||= page_width.to_f / col_count.to_f
|
79
90
|
end
|
80
91
|
|
81
92
|
def mean(collection)
|
@@ -86,40 +97,28 @@ class PDF::Reader
|
|
86
97
|
end
|
87
98
|
end
|
88
99
|
|
89
|
-
def
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
100
|
+
def median(collection)
|
101
|
+
if collection.size == 0
|
102
|
+
0
|
103
|
+
else
|
104
|
+
collection.sort[(collection.size * 0.5).floor]
|
105
|
+
end
|
95
106
|
end
|
96
107
|
|
97
|
-
|
98
|
-
|
99
|
-
def merge_runs(runs)
|
100
|
-
runs.group_by { |char|
|
101
|
-
char.y.to_i
|
102
|
-
}.map { |y, chars|
|
103
|
-
group_chars_into_runs(chars.sort)
|
104
|
-
}.flatten.sort
|
108
|
+
def local_string_insert(haystack, needle, index)
|
109
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
105
110
|
end
|
106
111
|
|
107
|
-
def
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
runs << head
|
116
|
-
end
|
112
|
+
def process_mediabox(mediabox)
|
113
|
+
if mediabox.is_a?(Array)
|
114
|
+
msg = "Passing the mediabox to PageLayout as an Array is deprecated," +
|
115
|
+
" please use a Rectangle instead"
|
116
|
+
$stderr.puts msg
|
117
|
+
PDF::Reader::Rectangle.from_array(mediabox)
|
118
|
+
else
|
119
|
+
mediabox
|
117
120
|
end
|
118
|
-
runs
|
119
121
|
end
|
120
122
|
|
121
|
-
def local_string_insert(haystack, needle, index)
|
122
|
-
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
123
|
-
end
|
124
123
|
end
|
125
124
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'pdf/reader/transformation_matrix'
|
@@ -312,7 +313,7 @@ class PDF::Reader
|
|
312
313
|
# may need to be added
|
313
314
|
#
|
314
315
|
def process_glyph_displacement(w0, tj, word_boundary)
|
315
|
-
fs =
|
316
|
+
fs = state[:text_font_size]
|
316
317
|
tc = state[:char_spacing]
|
317
318
|
if word_boundary
|
318
319
|
tw = state[:word_spacing]
|
@@ -330,16 +331,16 @@ class PDF::Reader
|
|
330
331
|
# apply horizontal scaling to spacing values but not font size
|
331
332
|
tx = ((w0 * fs) + tc + tw) * th
|
332
333
|
end
|
333
|
-
|
334
|
-
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
335
|
-
# ctm[0] here, but this gets my tests green and I'm out of
|
336
|
-
# ideas for now
|
337
334
|
# TODO: support ty > 0
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
335
|
+
ty = 0
|
336
|
+
temp = TransformationMatrix.new(1, 0,
|
337
|
+
0, 1,
|
338
|
+
tx, ty)
|
339
|
+
@text_matrix = temp.multiply!(
|
340
|
+
@text_matrix.a, @text_matrix.b,
|
341
|
+
@text_matrix.c, @text_matrix.d,
|
342
|
+
@text_matrix.e, @text_matrix.f
|
343
|
+
)
|
343
344
|
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
344
345
|
end
|
345
346
|
|
@@ -383,7 +384,7 @@ class PDF::Reader
|
|
383
384
|
#
|
384
385
|
def build_fonts(raw_fonts)
|
385
386
|
wrapped_fonts = raw_fonts.map { |label, font|
|
386
|
-
[label, PDF::Reader::Font.new(@objects, @objects.
|
387
|
+
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
|
387
388
|
}
|
388
389
|
|
389
390
|
::Hash[wrapped_fonts]
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'forwardable'
|
@@ -44,14 +45,34 @@ module PDF
|
|
44
45
|
@page = page
|
45
46
|
@content = []
|
46
47
|
@characters = []
|
47
|
-
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
48
|
-
device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
|
49
|
-
device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
|
50
|
-
@device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
|
51
48
|
end
|
52
49
|
|
50
|
+
def runs(opts = {})
|
51
|
+
runs = @characters
|
52
|
+
|
53
|
+
if rect = opts.fetch(:rect, @page.rectangles[:CropBox])
|
54
|
+
runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect)
|
55
|
+
end
|
56
|
+
|
57
|
+
if opts.fetch(:skip_zero_width, true)
|
58
|
+
runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs)
|
59
|
+
end
|
60
|
+
|
61
|
+
if opts.fetch(:skip_overlapping, true)
|
62
|
+
runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
|
63
|
+
end
|
64
|
+
|
65
|
+
if opts.fetch(:merge, true)
|
66
|
+
runs = merge_runs(runs)
|
67
|
+
end
|
68
|
+
|
69
|
+
runs
|
70
|
+
end
|
71
|
+
|
72
|
+
# deprecated
|
53
73
|
def content
|
54
|
-
|
74
|
+
mediabox = @page.rectangles[:MediaBox]
|
75
|
+
PageLayout.new(runs, mediabox).to_s
|
55
76
|
end
|
56
77
|
|
57
78
|
#####################################################
|
@@ -66,8 +87,10 @@ module PDF
|
|
66
87
|
params.each do |arg|
|
67
88
|
if arg.is_a?(String)
|
68
89
|
internal_show_text(arg)
|
69
|
-
|
90
|
+
elsif arg.is_a?(Numeric)
|
70
91
|
@state.process_glyph_displacement(0, arg, false)
|
92
|
+
else
|
93
|
+
# skip it
|
71
94
|
end
|
72
95
|
end
|
73
96
|
end
|
@@ -98,6 +121,7 @@ module PDF
|
|
98
121
|
private
|
99
122
|
|
100
123
|
def internal_show_text(string)
|
124
|
+
PDF::Reader::Error.validate_type_as_malformed(string, "string", String)
|
101
125
|
if @state.current_font.nil?
|
102
126
|
raise PDF::Reader::MalformedPDFError, "current font is invalid"
|
103
127
|
end
|
@@ -111,7 +135,7 @@ module PDF
|
|
111
135
|
|
112
136
|
# apply to glyph displacment for the current glyph so the next
|
113
137
|
# glyph will appear in the correct position
|
114
|
-
glyph_width = @state.current_font.
|
138
|
+
glyph_width = @state.current_font.glyph_width_in_text_space(glyph_code)
|
115
139
|
th = 1
|
116
140
|
scaled_glyph_width = glyph_width * @state.font_size * th
|
117
141
|
unless utf8_chars == SPACE
|
@@ -128,14 +152,37 @@ module PDF
|
|
128
152
|
y = tmp * -1
|
129
153
|
elsif @page.rotate == 180
|
130
154
|
y *= -1
|
155
|
+
x *= -1
|
131
156
|
elsif @page.rotate == 270
|
132
|
-
tmp =
|
133
|
-
|
134
|
-
|
157
|
+
tmp = y
|
158
|
+
y = x
|
159
|
+
x = tmp * -1
|
135
160
|
end
|
136
161
|
return x, y
|
137
162
|
end
|
138
163
|
|
164
|
+
# take a collection of TextRun objects and merge any that are in close
|
165
|
+
# proximity
|
166
|
+
def merge_runs(runs)
|
167
|
+
runs.group_by { |char|
|
168
|
+
char.y.to_i
|
169
|
+
}.map { |y, chars|
|
170
|
+
group_chars_into_runs(chars.sort)
|
171
|
+
}.flatten.sort
|
172
|
+
end
|
173
|
+
|
174
|
+
def group_chars_into_runs(chars)
|
175
|
+
chars.each_with_object([]) do |char, runs|
|
176
|
+
if runs.empty?
|
177
|
+
runs << char
|
178
|
+
elsif runs.last.mergable?(char)
|
179
|
+
runs[-1] = runs.last + char
|
180
|
+
else
|
181
|
+
runs << char
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
139
186
|
end
|
140
187
|
end
|
141
188
|
end
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -79,8 +80,8 @@ class PDF::Reader
|
|
79
80
|
token
|
80
81
|
elsif operators.has_key? token
|
81
82
|
Token.new(token)
|
82
|
-
elsif token.
|
83
|
-
token
|
83
|
+
elsif token.frozen?
|
84
|
+
token
|
84
85
|
elsif token =~ /\d*\.\d/
|
85
86
|
token.to_f
|
86
87
|
else
|
@@ -102,7 +103,7 @@ class PDF::Reader
|
|
102
103
|
obj = parse_token
|
103
104
|
post_obj = parse_token
|
104
105
|
|
105
|
-
if post_obj == "stream"
|
106
|
+
if obj.is_a?(Hash) && post_obj == "stream"
|
106
107
|
stream(obj)
|
107
108
|
else
|
108
109
|
obj
|
@@ -120,7 +121,7 @@ class PDF::Reader
|
|
120
121
|
key = parse_token
|
121
122
|
break if key.kind_of?(Token) and key == ">>"
|
122
123
|
raise MalformedPDFError, "unterminated dict" if @buffer.empty?
|
123
|
-
|
124
|
+
PDF::Reader::Error.validate_type_as_malformed(key, "Dictionary key", Symbol)
|
124
125
|
|
125
126
|
value = parse_token
|
126
127
|
value.kind_of?(Token) and Error.str_assert_not(value, ">>")
|
@@ -166,7 +167,9 @@ class PDF::Reader
|
|
166
167
|
|
167
168
|
# add a missing digit if required, as required by the spec
|
168
169
|
str << "0" unless str.size % 2 == 0
|
169
|
-
str.
|
170
|
+
str.chars.each_slice(2).map { |nibbles|
|
171
|
+
nibbles.join("").hex.chr
|
172
|
+
}.join.force_encoding("binary")
|
170
173
|
end
|
171
174
|
################################################################################
|
172
175
|
# Reads a PDF String from the buffer and converts it to a Ruby String
|
@@ -175,15 +178,18 @@ class PDF::Reader
|
|
175
178
|
return "".dup.force_encoding("binary") if str == ")"
|
176
179
|
Error.assert_equal(parse_token, ")")
|
177
180
|
|
178
|
-
str.gsub!(/\\([nrtbf()\\\n]
|
179
|
-
|
181
|
+
str.gsub!(/\\(\r\n|[nrtbf()\\\n\r]|([0-7]{1,3}))?|\r\n?/m) do |match|
|
182
|
+
if $2.nil? # not octal digits
|
183
|
+
MAPPING[match] || "".dup
|
184
|
+
else # must be octal digits
|
185
|
+
($2.oct & 0xff).chr # ignore high level overflow
|
186
|
+
end
|
180
187
|
end
|
181
188
|
str.force_encoding("binary")
|
182
189
|
end
|
183
190
|
|
184
191
|
MAPPING = {
|
185
192
|
"\r" => "\n",
|
186
|
-
"\n\r" => "\n",
|
187
193
|
"\r\n" => "\n",
|
188
194
|
"\\n" => "\n",
|
189
195
|
"\\r" => "\r",
|
@@ -194,20 +200,25 @@ class PDF::Reader
|
|
194
200
|
"\\)" => ")",
|
195
201
|
"\\\\" => "\\",
|
196
202
|
"\\\n" => "",
|
203
|
+
"\\\r" => "",
|
204
|
+
"\\\r\n" => "",
|
197
205
|
}
|
198
|
-
0.upto(9) { |n| MAPPING["\\00"+n.to_s] = ("00"+n.to_s).oct.chr }
|
199
|
-
0.upto(99) { |n| MAPPING["\\0"+n.to_s] = ("0"+n.to_s).oct.chr }
|
200
|
-
0.upto(377) { |n| MAPPING["\\"+n.to_s] = n.to_s.oct.chr }
|
201
206
|
|
202
207
|
################################################################################
|
203
208
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
204
209
|
def stream(dict)
|
205
210
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
206
211
|
if @objects
|
207
|
-
length = @objects.
|
212
|
+
length = @objects.deref_integer(dict[:Length])
|
213
|
+
if dict[:Filter]
|
214
|
+
dict[:Filter] = @objects.deref_name_or_array(dict[:Filter])
|
215
|
+
end
|
208
216
|
else
|
209
217
|
length = dict[:Length] || 0
|
210
218
|
end
|
219
|
+
|
220
|
+
PDF::Reader::Error.validate_type_as_malformed(length, "length", Numeric)
|
221
|
+
|
211
222
|
data = @buffer.read(length, :skip_eol => true)
|
212
223
|
|
213
224
|
Error.str_assert(parse_token, "endstream")
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs are all about positioning content on a page, so there's lots of need to
|
9
|
+
# work with a set of X,Y coordinates.
|
10
|
+
#
|
11
|
+
class Point
|
12
|
+
|
13
|
+
attr_reader :x, :y
|
14
|
+
|
15
|
+
def initialize(x, y)
|
16
|
+
@x, @y = x, y
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
other.respond_to?(:x) && other.respond_to?(:y) && x == other.x && y == other.y
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
require 'digest/md5'
|
6
|
+
require 'rc4'
|
7
|
+
|
8
|
+
class PDF::Reader
|
9
|
+
|
10
|
+
# Decrypts data using the RC4 algorithim defined in the PDF spec. Requires
|
11
|
+
# a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
|
12
|
+
#
|
13
|
+
class Rc4SecurityHandler
|
14
|
+
|
15
|
+
def initialize(key)
|
16
|
+
@encrypt_key = key
|
17
|
+
end
|
18
|
+
|
19
|
+
##7.6.2 General Encryption Algorithm
|
20
|
+
#
|
21
|
+
# Algorithm 1: Encryption of data using the RC4 algorithm
|
22
|
+
#
|
23
|
+
# version <=3 or (version == 4 and CFM == V2)
|
24
|
+
#
|
25
|
+
# buf - a string to decrypt
|
26
|
+
# ref - a PDF::Reader::Reference for the object to decrypt
|
27
|
+
#
|
28
|
+
def decrypt( buf, ref )
|
29
|
+
objKey = @encrypt_key.dup
|
30
|
+
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
31
|
+
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
32
|
+
length = objKey.length < 16 ? objKey.length : 16
|
33
|
+
rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
|
34
|
+
rc4.decrypt(buf)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs represent rectangles all over the place. They're 4 element arrays, like this:
|
9
|
+
#
|
10
|
+
# [A, B, C, D]
|
11
|
+
#
|
12
|
+
# Four element arrays are yucky to work with though, so here's a class that's better.
|
13
|
+
# Initialize it with the 4 elements, and get utility functions (width, height, etc)
|
14
|
+
# for free.
|
15
|
+
#
|
16
|
+
# By convention the first two elements are x1, y1, the co-ords for the bottom left corner
|
17
|
+
# of the rectangle. The third and fourth elements are x2, y2, the co-ords for the top left
|
18
|
+
# corner of the rectangle. It's valid for the alternative corners to be used though, so
|
19
|
+
# we don't assume which is which.
|
20
|
+
#
|
21
|
+
class Rectangle
|
22
|
+
|
23
|
+
attr_reader :bottom_left, :bottom_right, :top_left, :top_right
|
24
|
+
|
25
|
+
def initialize(x1, y1, x2, y2)
|
26
|
+
set_corners(x1, y1, x2, y2)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.from_array(arr)
|
30
|
+
if arr.size != 4
|
31
|
+
raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
|
32
|
+
end
|
33
|
+
|
34
|
+
PDF::Reader::Rectangle.new(
|
35
|
+
arr[0].to_f,
|
36
|
+
arr[1].to_f,
|
37
|
+
arr[2].to_f,
|
38
|
+
arr[3].to_f,
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def ==(other)
|
43
|
+
to_a == other.to_a
|
44
|
+
end
|
45
|
+
|
46
|
+
def height
|
47
|
+
top_right.y - bottom_right.y
|
48
|
+
end
|
49
|
+
|
50
|
+
def width
|
51
|
+
bottom_right.x - bottom_left.x
|
52
|
+
end
|
53
|
+
|
54
|
+
def contains?(point)
|
55
|
+
point.x >= bottom_left.x && point.x <= top_right.x &&
|
56
|
+
point.y >= bottom_left.y && point.y <= top_right.y
|
57
|
+
end
|
58
|
+
|
59
|
+
# A pdf-style 4-number array
|
60
|
+
def to_a
|
61
|
+
[
|
62
|
+
bottom_left.x,
|
63
|
+
bottom_left.y,
|
64
|
+
top_right.x,
|
65
|
+
top_right.y,
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
def apply_rotation(degrees)
|
70
|
+
return if degrees != 90 && degrees != 180 && degrees != 270
|
71
|
+
|
72
|
+
if degrees == 90
|
73
|
+
new_x1 = bottom_left.x
|
74
|
+
new_y1 = bottom_left.y - width
|
75
|
+
new_x2 = bottom_left.x + height
|
76
|
+
new_y2 = bottom_left.y
|
77
|
+
elsif degrees == 180
|
78
|
+
new_x1 = bottom_left.x - width
|
79
|
+
new_y1 = bottom_left.y - height
|
80
|
+
new_x2 = bottom_left.x
|
81
|
+
new_y2 = bottom_left.y
|
82
|
+
elsif degrees == 270
|
83
|
+
new_x1 = bottom_left.x - height
|
84
|
+
new_y1 = bottom_left.y
|
85
|
+
new_x2 = bottom_left.x
|
86
|
+
new_y2 = bottom_left.y + width
|
87
|
+
end
|
88
|
+
set_corners(new_x1 || 0, new_y1 || 0, new_x2 || 0, new_y2 || 0)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def set_corners(x1, y1, x2, y2)
|
94
|
+
@bottom_left = PDF::Reader::Point.new(
|
95
|
+
[x1, x2].min,
|
96
|
+
[y1, y2].min,
|
97
|
+
)
|
98
|
+
@bottom_right = PDF::Reader::Point.new(
|
99
|
+
[x1, x2].max,
|
100
|
+
[y1, y2].min,
|
101
|
+
)
|
102
|
+
@top_left = PDF::Reader::Point.new(
|
103
|
+
[x1, x2].min,
|
104
|
+
[y1, y2].max,
|
105
|
+
)
|
106
|
+
@top_right = PDF::Reader::Point.new(
|
107
|
+
[x1, x2].max,
|
108
|
+
[y1, y2].max,
|
109
|
+
)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
module PDF
|
@@ -6,7 +7,13 @@ module PDF
|
|
6
7
|
|
7
8
|
# mixin for common methods in Page and FormXobjects
|
8
9
|
#
|
9
|
-
|
10
|
+
class Resources
|
11
|
+
|
12
|
+
def initialize(objects, resources)
|
13
|
+
@objects = objects
|
14
|
+
@resources = resources
|
15
|
+
end
|
16
|
+
|
10
17
|
# Returns a Hash of color spaces that are available to this page
|
11
18
|
#
|
12
19
|
# NOTE: this method de-serialise objects from the underlying PDF
|
@@ -14,7 +21,7 @@ module PDF
|
|
14
21
|
# of calling it over and over.
|
15
22
|
#
|
16
23
|
def color_spaces
|
17
|
-
@objects.
|
24
|
+
@objects.deref_hash!(@resources[:ColorSpace]) || {}
|
18
25
|
end
|
19
26
|
|
20
27
|
# Returns a Hash of fonts that are available to this page
|
@@ -24,7 +31,7 @@ module PDF
|
|
24
31
|
# of calling it over and over.
|
25
32
|
#
|
26
33
|
def fonts
|
27
|
-
@objects.
|
34
|
+
@objects.deref_hash!(@resources[:Font]) || {}
|
28
35
|
end
|
29
36
|
|
30
37
|
# Returns a Hash of external graphic states that are available to this
|
@@ -35,7 +42,7 @@ module PDF
|
|
35
42
|
# of calling it over and over.
|
36
43
|
#
|
37
44
|
def graphic_states
|
38
|
-
@objects.
|
45
|
+
@objects.deref_hash!(@resources[:ExtGState]) || {}
|
39
46
|
end
|
40
47
|
|
41
48
|
# Returns a Hash of patterns that are available to this page
|
@@ -45,7 +52,7 @@ module PDF
|
|
45
52
|
# of calling it over and over.
|
46
53
|
#
|
47
54
|
def patterns
|
48
|
-
@objects.
|
55
|
+
@objects.deref_hash!(@resources[:Pattern]) || {}
|
49
56
|
end
|
50
57
|
|
51
58
|
# Returns an Array of procedure sets that are available to this page
|
@@ -55,7 +62,7 @@ module PDF
|
|
55
62
|
# of calling it over and over.
|
56
63
|
#
|
57
64
|
def procedure_sets
|
58
|
-
@objects.
|
65
|
+
@objects.deref_array!(@resources[:ProcSet]) || []
|
59
66
|
end
|
60
67
|
|
61
68
|
# Returns a Hash of properties sets that are available to this page
|
@@ -65,7 +72,7 @@ module PDF
|
|
65
72
|
# of calling it over and over.
|
66
73
|
#
|
67
74
|
def properties
|
68
|
-
@objects.
|
75
|
+
@objects.deref_hash!(@resources[:Properties]) || {}
|
69
76
|
end
|
70
77
|
|
71
78
|
# Returns a Hash of shadings that are available to this page
|
@@ -75,7 +82,7 @@ module PDF
|
|
75
82
|
# of calling it over and over.
|
76
83
|
#
|
77
84
|
def shadings
|
78
|
-
@objects.
|
85
|
+
@objects.deref_hash!(@resources[:Shading]) || {}
|
79
86
|
end
|
80
87
|
|
81
88
|
# Returns a Hash of XObjects that are available to this page
|
@@ -85,7 +92,7 @@ module PDF
|
|
85
92
|
# of calling it over and over.
|
86
93
|
#
|
87
94
|
def xobjects
|
88
|
-
@objects.
|
95
|
+
@objects.deref_hash!(@resources[:XObject]) || {}
|
89
96
|
end
|
90
97
|
|
91
98
|
end
|