pdf-reader 2.4.2 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +44 -0
- data/README.md +16 -1
- data/Rakefile +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +63 -21
- data/lib/pdf/reader/cid_widths.rb +1 -0
- data/lib/pdf/reader/cmap.rb +5 -3
- data/lib/pdf/reader/encoding.rb +3 -2
- data/lib/pdf/reader/error.rb +11 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +10 -8
- data/lib/pdf/reader/filter/flate.rb +4 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -0
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +44 -0
- data/lib/pdf/reader/font_descriptor.rb +1 -0
- data/lib/pdf/reader/form_xobject.rb +1 -0
- data/lib/pdf/reader/glyph_hash.rb +16 -9
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/lzw.rb +4 -2
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +8 -3
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
- data/lib/pdf/reader/page.rb +73 -11
- data/lib/pdf/reader/page_layout.rb +37 -37
- data/lib/pdf/reader/page_state.rb +18 -23
- data/lib/pdf/reader/page_text_receiver.rb +68 -6
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +15 -7
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/resource_methods.rb +5 -0
- data/lib/pdf/reader/standard_security_handler.rb +1 -0
- data/lib/pdf/reader/standard_security_handler_v5.rb +1 -0
- data/lib/pdf/reader/stream.rb +1 -0
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +14 -6
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +8 -15
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +7 -1
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +29 -6
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +1763 -0
- metadata +12 -7
- data/lib/pdf/reader/orientation_detector.rb +0 -34
@@ -1,7 +1,9 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'pdf/reader/overlapping_runs_filter'
|
6
|
+
require 'pdf/reader/zero_width_runs_filter'
|
5
7
|
|
6
8
|
class PDF::Reader
|
7
9
|
|
@@ -15,14 +17,15 @@ class PDF::Reader
|
|
15
17
|
DEFAULT_FONT_SIZE = 12
|
16
18
|
|
17
19
|
def initialize(runs, mediabox)
|
18
|
-
|
20
|
+
# mediabox is a 4-element array for now, but it'd be nice to switch to a
|
21
|
+
# PDF::Reader::Rectangle at some point
|
22
|
+
PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")
|
19
23
|
|
20
|
-
@
|
24
|
+
@mediabox = process_mediabox(mediabox)
|
25
|
+
@runs = runs
|
21
26
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
22
27
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
23
|
-
@
|
24
|
-
@page_width = (mediabox[2] - mediabox[0]).abs
|
25
|
-
@page_height = (mediabox[3] - mediabox[1]).abs
|
28
|
+
@median_glyph_width = median(@runs.map(&:mean_character_width)) || 0
|
26
29
|
@x_offset = @runs.map(&:x).sort.first || 0
|
27
30
|
lowest_y = @runs.map(&:y).sort.first || 0
|
28
31
|
@y_offset = lowest_y > 0 ? 0 : lowest_y
|
@@ -30,6 +33,7 @@ class PDF::Reader
|
|
30
33
|
|
31
34
|
def to_s
|
32
35
|
return "" if @runs.empty?
|
36
|
+
return "" if row_count == 0
|
33
37
|
|
34
38
|
page = row_count.times.map { |i| " " * col_count }
|
35
39
|
@runs.each do |run|
|
@@ -44,6 +48,14 @@ class PDF::Reader
|
|
44
48
|
|
45
49
|
private
|
46
50
|
|
51
|
+
def page_width
|
52
|
+
@mediabox.width
|
53
|
+
end
|
54
|
+
|
55
|
+
def page_height
|
56
|
+
@mediabox.height
|
57
|
+
end
|
58
|
+
|
47
59
|
# given an array of strings, return a new array with empty rows from the
|
48
60
|
# beginning and end removed.
|
49
61
|
#
|
@@ -62,19 +74,19 @@ class PDF::Reader
|
|
62
74
|
end
|
63
75
|
|
64
76
|
def row_count
|
65
|
-
@row_count ||= (
|
77
|
+
@row_count ||= (page_height / @mean_font_size).floor
|
66
78
|
end
|
67
79
|
|
68
80
|
def col_count
|
69
|
-
@col_count ||= ((
|
81
|
+
@col_count ||= ((page_width / @median_glyph_width) * 1.05).floor
|
70
82
|
end
|
71
83
|
|
72
84
|
def row_multiplier
|
73
|
-
@row_multiplier ||=
|
85
|
+
@row_multiplier ||= page_height.to_f / row_count.to_f
|
74
86
|
end
|
75
87
|
|
76
88
|
def col_multiplier
|
77
|
-
@col_multiplier ||=
|
89
|
+
@col_multiplier ||= page_width.to_f / col_count.to_f
|
78
90
|
end
|
79
91
|
|
80
92
|
def mean(collection)
|
@@ -85,40 +97,28 @@ class PDF::Reader
|
|
85
97
|
end
|
86
98
|
end
|
87
99
|
|
88
|
-
def
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
100
|
+
def median(collection)
|
101
|
+
if collection.size == 0
|
102
|
+
0
|
103
|
+
else
|
104
|
+
collection.sort[(collection.size * 0.5).floor]
|
105
|
+
end
|
94
106
|
end
|
95
107
|
|
96
|
-
|
97
|
-
|
98
|
-
def merge_runs(runs)
|
99
|
-
runs.group_by { |char|
|
100
|
-
char.y.to_i
|
101
|
-
}.map { |y, chars|
|
102
|
-
group_chars_into_runs(chars.sort)
|
103
|
-
}.flatten.sort
|
108
|
+
def local_string_insert(haystack, needle, index)
|
109
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
104
110
|
end
|
105
111
|
|
106
|
-
def
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
runs << head
|
115
|
-
end
|
112
|
+
def process_mediabox(mediabox)
|
113
|
+
if mediabox.is_a?(Array)
|
114
|
+
msg = "Passing the mediabox to PageLayout as an Array is deprecated," +
|
115
|
+
" please use a Rectangle instead"
|
116
|
+
$stderr.puts msg
|
117
|
+
PDF::Reader::Rectangle.from_array(mediabox)
|
118
|
+
else
|
119
|
+
mediabox
|
116
120
|
end
|
117
|
-
runs
|
118
121
|
end
|
119
122
|
|
120
|
-
def local_string_insert(haystack, needle, index)
|
121
|
-
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
122
|
-
end
|
123
123
|
end
|
124
124
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'pdf/reader/transformation_matrix'
|
@@ -30,15 +31,7 @@ class PDF::Reader
|
|
30
31
|
@xobject_stack = [page.xobjects]
|
31
32
|
@cs_stack = [page.color_spaces]
|
32
33
|
@stack = [DEFAULT_GRAPHICS_STATE.dup]
|
33
|
-
|
34
|
-
state[:ctm] = identity_matrix
|
35
|
-
else
|
36
|
-
rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
|
37
|
-
rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
|
38
|
-
state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
|
39
|
-
rotate_sin * -1, rotate_cos,
|
40
|
-
0, 0)
|
41
|
-
end
|
34
|
+
state[:ctm] = identity_matrix
|
42
35
|
end
|
43
36
|
|
44
37
|
#####################################################
|
@@ -320,7 +313,7 @@ class PDF::Reader
|
|
320
313
|
# may need to be added
|
321
314
|
#
|
322
315
|
def process_glyph_displacement(w0, tj, word_boundary)
|
323
|
-
fs =
|
316
|
+
fs = state[:text_font_size]
|
324
317
|
tc = state[:char_spacing]
|
325
318
|
if word_boundary
|
326
319
|
tw = state[:word_spacing]
|
@@ -330,22 +323,24 @@ class PDF::Reader
|
|
330
323
|
th = state[:h_scaling]
|
331
324
|
# optimise the common path to reduce Float allocations
|
332
325
|
if th == 1 && tj == 0 && tc == 0 && tw == 0
|
333
|
-
|
334
|
-
|
326
|
+
tx = w0 * fs
|
327
|
+
elsif tj != 0
|
328
|
+
# don't apply spacing to TJ displacement
|
329
|
+
tx = (w0 - (tj/1000.0)) * fs * th
|
335
330
|
else
|
336
|
-
|
337
|
-
tx =
|
331
|
+
# apply horizontal scaling to spacing values but not font size
|
332
|
+
tx = ((w0 * fs) + tc + tw) * th
|
338
333
|
end
|
339
|
-
|
340
|
-
# TODO: I'm pretty sure that tx shouldn't need to be divided by
|
341
|
-
# ctm[0] here, but this gets my tests green and I'm out of
|
342
|
-
# ideas for now
|
343
334
|
# TODO: support ty > 0
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
335
|
+
ty = 0
|
336
|
+
temp = TransformationMatrix.new(1, 0,
|
337
|
+
0, 1,
|
338
|
+
tx, ty)
|
339
|
+
@text_matrix = temp.multiply!(
|
340
|
+
@text_matrix.a, @text_matrix.b,
|
341
|
+
@text_matrix.c, @text_matrix.d,
|
342
|
+
@text_matrix.e, @text_matrix.f
|
343
|
+
)
|
349
344
|
@font_size = @text_rendering_matrix = nil # invalidate cached value
|
350
345
|
end
|
351
346
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
require 'forwardable'
|
@@ -41,16 +42,37 @@ module PDF
|
|
41
42
|
# starting a new page
|
42
43
|
def page=(page)
|
43
44
|
@state = PageState.new(page)
|
45
|
+
@page = page
|
44
46
|
@content = []
|
45
47
|
@characters = []
|
46
|
-
@mediabox = page.objects.deref(page.attributes[:MediaBox])
|
47
|
-
device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
|
48
|
-
device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
|
49
|
-
@device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
|
50
48
|
end
|
51
49
|
|
50
|
+
def runs(opts = {})
|
51
|
+
runs = @characters
|
52
|
+
|
53
|
+
if rect = opts.fetch(:rect, @page.rectangles[:CropBox])
|
54
|
+
runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect)
|
55
|
+
end
|
56
|
+
|
57
|
+
if opts.fetch(:skip_zero_width, true)
|
58
|
+
runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs)
|
59
|
+
end
|
60
|
+
|
61
|
+
if opts.fetch(:skip_overlapping, true)
|
62
|
+
runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
|
63
|
+
end
|
64
|
+
|
65
|
+
if opts.fetch(:merge, true)
|
66
|
+
runs = merge_runs(runs)
|
67
|
+
end
|
68
|
+
|
69
|
+
runs
|
70
|
+
end
|
71
|
+
|
72
|
+
# deprecated
|
52
73
|
def content
|
53
|
-
|
74
|
+
mediabox = @page.rectangles[:MediaBox]
|
75
|
+
PageLayout.new(runs, mediabox).to_s
|
54
76
|
end
|
55
77
|
|
56
78
|
#####################################################
|
@@ -104,11 +126,13 @@ module PDF
|
|
104
126
|
glyphs.each_with_index do |glyph_code, index|
|
105
127
|
# paint the current glyph
|
106
128
|
newx, newy = @state.trm_transform(0,0)
|
129
|
+
newx, newy = apply_rotation(newx, newy)
|
130
|
+
|
107
131
|
utf8_chars = @state.current_font.to_utf8(glyph_code)
|
108
132
|
|
109
133
|
# apply to glyph displacment for the current glyph so the next
|
110
134
|
# glyph will appear in the correct position
|
111
|
-
glyph_width = @state.current_font.
|
135
|
+
glyph_width = @state.current_font.glyph_width_in_text_space(glyph_code)
|
112
136
|
th = 1
|
113
137
|
scaled_glyph_width = glyph_width * @state.font_size * th
|
114
138
|
unless utf8_chars == SPACE
|
@@ -118,6 +142,44 @@ module PDF
|
|
118
142
|
end
|
119
143
|
end
|
120
144
|
|
145
|
+
def apply_rotation(x, y)
|
146
|
+
if @page.rotate == 90
|
147
|
+
tmp = x
|
148
|
+
x = y
|
149
|
+
y = tmp * -1
|
150
|
+
elsif @page.rotate == 180
|
151
|
+
y *= -1
|
152
|
+
x *= -1
|
153
|
+
elsif @page.rotate == 270
|
154
|
+
tmp = y
|
155
|
+
y = x
|
156
|
+
x = tmp * -1
|
157
|
+
end
|
158
|
+
return x, y
|
159
|
+
end
|
160
|
+
|
161
|
+
# take a collection of TextRun objects and merge any that are in close
|
162
|
+
# proximity
|
163
|
+
def merge_runs(runs)
|
164
|
+
runs.group_by { |char|
|
165
|
+
char.y.to_i
|
166
|
+
}.map { |y, chars|
|
167
|
+
group_chars_into_runs(chars.sort)
|
168
|
+
}.flatten.sort
|
169
|
+
end
|
170
|
+
|
171
|
+
def group_chars_into_runs(chars)
|
172
|
+
chars.each_with_object([]) do |char, runs|
|
173
|
+
if runs.empty?
|
174
|
+
runs << char
|
175
|
+
elsif runs.last.mergable?(char)
|
176
|
+
runs[-1] = runs.last + char
|
177
|
+
else
|
178
|
+
runs << char
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
121
183
|
end
|
122
184
|
end
|
123
185
|
end
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -166,7 +167,9 @@ class PDF::Reader
|
|
166
167
|
|
167
168
|
# add a missing digit if required, as required by the spec
|
168
169
|
str << "0" unless str.size % 2 == 0
|
169
|
-
str.
|
170
|
+
str.chars.each_slice(2).map { |nibbles|
|
171
|
+
nibbles.join("").hex.chr
|
172
|
+
}.join.force_encoding("binary")
|
170
173
|
end
|
171
174
|
################################################################################
|
172
175
|
# Reads a PDF String from the buffer and converts it to a Ruby String
|
@@ -175,15 +178,18 @@ class PDF::Reader
|
|
175
178
|
return "".dup.force_encoding("binary") if str == ")"
|
176
179
|
Error.assert_equal(parse_token, ")")
|
177
180
|
|
178
|
-
str.gsub!(/\\([nrtbf()\\\n]
|
179
|
-
|
181
|
+
str.gsub!(/\\(\r\n|[nrtbf()\\\n\r]|([0-7]{1,3}))?|\r\n?/m) do |match|
|
182
|
+
if $2.nil? # not octal digits
|
183
|
+
MAPPING[match] || "".dup
|
184
|
+
else # must be octal digits
|
185
|
+
($2.oct & 0xff).chr # ignore high level overflow
|
186
|
+
end
|
180
187
|
end
|
181
188
|
str.force_encoding("binary")
|
182
189
|
end
|
183
190
|
|
184
191
|
MAPPING = {
|
185
192
|
"\r" => "\n",
|
186
|
-
"\n\r" => "\n",
|
187
193
|
"\r\n" => "\n",
|
188
194
|
"\\n" => "\n",
|
189
195
|
"\\r" => "\r",
|
@@ -194,10 +200,9 @@ class PDF::Reader
|
|
194
200
|
"\\)" => ")",
|
195
201
|
"\\\\" => "\\",
|
196
202
|
"\\\n" => "",
|
203
|
+
"\\\r" => "",
|
204
|
+
"\\\r\n" => "",
|
197
205
|
}
|
198
|
-
0.upto(9) { |n| MAPPING["\\00"+n.to_s] = ("00"+n.to_s).oct.chr }
|
199
|
-
0.upto(99) { |n| MAPPING["\\0"+n.to_s] = ("0"+n.to_s).oct.chr }
|
200
|
-
0.upto(377) { |n| MAPPING["\\"+n.to_s] = n.to_s.oct.chr }
|
201
206
|
|
202
207
|
################################################################################
|
203
208
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
@@ -205,6 +210,9 @@ class PDF::Reader
|
|
205
210
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
206
211
|
if @objects
|
207
212
|
length = @objects.deref(dict[:Length])
|
213
|
+
if dict[:Filter]
|
214
|
+
dict[:Filter] = @objects.deref(dict[:Filter])
|
215
|
+
end
|
208
216
|
else
|
209
217
|
length = dict[:Length] || 0
|
210
218
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: true
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs are all about positioning content on a page, so there's lots of need to
|
9
|
+
# work with a set of X,Y coordinates.
|
10
|
+
#
|
11
|
+
class Point
|
12
|
+
|
13
|
+
attr_reader :x, :y
|
14
|
+
|
15
|
+
def initialize(x, y)
|
16
|
+
@x, @y = x, y
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
other.respond_to?(:x) && other.respond_to?(:y) && x == other.x && y == other.y
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: true
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs represent rectangles all over the place. They're 4 element arrays, like this:
|
9
|
+
#
|
10
|
+
# [A, B, C, D]
|
11
|
+
#
|
12
|
+
# Four element arrays are yucky to work with though, so here's a class that's better.
|
13
|
+
# Initialize it with the 4 elements, and get utility functions (width, height, etc)
|
14
|
+
# for free.
|
15
|
+
#
|
16
|
+
# By convention the first two elements are x1, y1, the co-ords for the bottom left corner
|
17
|
+
# of the rectangle. The third and fourth elements are x2, y2, the co-ords for the top left
|
18
|
+
# corner of the rectangle. It's valid for the alternative corners to be used though, so
|
19
|
+
# we don't assume which is which.
|
20
|
+
#
|
21
|
+
class Rectangle
|
22
|
+
|
23
|
+
attr_reader :bottom_left, :bottom_right, :top_left, :top_right
|
24
|
+
|
25
|
+
def initialize(x1, y1, x2, y2)
|
26
|
+
set_corners(x1, y1, x2, y2)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.from_array(arr)
|
30
|
+
if arr.size != 4
|
31
|
+
raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
|
32
|
+
end
|
33
|
+
|
34
|
+
PDF::Reader::Rectangle.new(
|
35
|
+
arr[0].to_f,
|
36
|
+
arr[1].to_f,
|
37
|
+
arr[2].to_f,
|
38
|
+
arr[3].to_f,
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def ==(other)
|
43
|
+
to_a == other.to_a
|
44
|
+
end
|
45
|
+
|
46
|
+
def height
|
47
|
+
top_right.y - bottom_right.y
|
48
|
+
end
|
49
|
+
|
50
|
+
def width
|
51
|
+
bottom_right.x - bottom_left.x
|
52
|
+
end
|
53
|
+
|
54
|
+
def contains?(point)
|
55
|
+
point.x >= bottom_left.x && point.x <= top_right.x &&
|
56
|
+
point.y >= bottom_left.y && point.y <= top_right.y
|
57
|
+
end
|
58
|
+
|
59
|
+
# A pdf-style 4-number array
|
60
|
+
def to_a
|
61
|
+
[
|
62
|
+
bottom_left.x,
|
63
|
+
bottom_left.y,
|
64
|
+
top_right.x,
|
65
|
+
top_right.y,
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
def apply_rotation(degrees)
|
70
|
+
return if degrees != 90 && degrees != 180 && degrees != 270
|
71
|
+
|
72
|
+
if degrees == 90
|
73
|
+
new_x1 = bottom_left.x
|
74
|
+
new_y1 = bottom_left.y - width
|
75
|
+
new_x2 = bottom_left.x + height
|
76
|
+
new_y2 = bottom_left.y
|
77
|
+
elsif degrees == 180
|
78
|
+
new_x1 = bottom_left.x - width
|
79
|
+
new_y1 = bottom_left.y - height
|
80
|
+
new_x2 = bottom_left.x
|
81
|
+
new_y2 = bottom_left.y
|
82
|
+
elsif degrees == 270
|
83
|
+
new_x1 = bottom_left.x - height
|
84
|
+
new_y1 = bottom_left.y
|
85
|
+
new_x2 = bottom_left.x
|
86
|
+
new_y2 = bottom_left.y + width
|
87
|
+
end
|
88
|
+
set_corners(new_x1, new_y1, new_x2, new_y2)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def set_corners(x1, y1, x2, y2)
|
94
|
+
@bottom_left = PDF::Reader::Point.new(
|
95
|
+
[x1, x2].min,
|
96
|
+
[y1, y2].min,
|
97
|
+
)
|
98
|
+
@bottom_right = PDF::Reader::Point.new(
|
99
|
+
[x1, x2].max,
|
100
|
+
[y1, y2].min,
|
101
|
+
)
|
102
|
+
@top_left = PDF::Reader::Point.new(
|
103
|
+
[x1, x2].min,
|
104
|
+
[y1, y2].max,
|
105
|
+
)
|
106
|
+
@top_right = PDF::Reader::Point.new(
|
107
|
+
[x1, x2].max,
|
108
|
+
[y1, y2].max,
|
109
|
+
)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: false
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
5
|
+
# Setting this file to "typed: true" is difficult because it's a mixin that assumes some things
|
6
|
+
# are aavailable from the class, like @objects and resources. Sorbet doesn't know about them.
|
7
|
+
|
4
8
|
module PDF
|
5
9
|
class Reader
|
6
10
|
|
7
11
|
# mixin for common methods in Page and FormXobjects
|
8
12
|
#
|
9
13
|
module ResourceMethods
|
14
|
+
|
10
15
|
# Returns a Hash of color spaces that are available to this page
|
11
16
|
#
|
12
17
|
# NOTE: this method de-serialise objects from the underlying PDF
|
data/lib/pdf/reader/stream.rb
CHANGED
data/lib/pdf/reader/text_run.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -6,15 +7,14 @@ class PDF::Reader
|
|
6
7
|
class TextRun
|
7
8
|
include Comparable
|
8
9
|
|
9
|
-
attr_reader :
|
10
|
+
attr_reader :origin, :width, :font_size, :text
|
10
11
|
|
11
12
|
alias :to_s :text
|
12
13
|
|
13
14
|
def initialize(x, y, width, font_size, text)
|
14
|
-
@
|
15
|
-
@y = y
|
15
|
+
@origin = PDF::Reader::Point.new(x, y)
|
16
16
|
@width = width
|
17
|
-
@font_size = font_size
|
17
|
+
@font_size = font_size
|
18
18
|
@text = text
|
19
19
|
end
|
20
20
|
|
@@ -34,12 +34,20 @@ class PDF::Reader
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
+
def x
|
38
|
+
@origin.x
|
39
|
+
end
|
40
|
+
|
41
|
+
def y
|
42
|
+
@origin.y
|
43
|
+
end
|
44
|
+
|
37
45
|
def endx
|
38
|
-
@endx ||= x + width
|
46
|
+
@endx ||= @origin.x + width
|
39
47
|
end
|
40
48
|
|
41
49
|
def endy
|
42
|
-
@endy ||= y + font_size
|
50
|
+
@endy ||= @origin.y + font_size
|
43
51
|
end
|
44
52
|
|
45
53
|
def mean_character_width
|
data/lib/pdf/reader/token.rb
CHANGED