pdf-reader 2.7.0 → 2.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +8 -0
- data/Rakefile +1 -1
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/font.rb +43 -0
- data/lib/pdf/reader/page.rb +13 -2
- data/lib/pdf/reader/page_layout.rb +14 -28
- data/lib/pdf/reader/page_text_receiver.rb +48 -9
- data/lib/pdf/reader/parser.rb +3 -0
- data/lib/pdf/reader/rectangle.rb +18 -0
- data/lib/pdf/reader/text_run.rb +13 -6
- data/lib/pdf/reader.rb +15 -2
- data/rbi/pdf-reader.rbi +22 -3
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6182ffd59631afba6a2c234547a428382b1ec2d7b414d89830b1143f1a0e1704
|
4
|
+
data.tar.gz: 6c0e6a7d32cf24912edc3aa96d72b7f70497d2fdd0e0913b86f871bbf9fa104f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 42dafbe0c36ce838da4c3120bf2187efde647e486971896d9a9c59c37dac3da0f2ccf3ecd98d8dd1d3acc5404bfcf26e64a327d7797648646afd6b40be02fec2
|
7
|
+
data.tar.gz: 40f0b0958024b558d6aca7eb2b3b6f042f034059c8fca52ce97fab7d55a39c313797605341331c65efd1099a1310ccbe386c354024dbd3cbc61c1d96c423842d
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
v2.8.0 (28th Decemeber 2021)
|
2
|
+
- Add PDF::Reader::Page#runs for extracting text from a page with positioning metadata (http://github.com/yob/pdf-reader/pull/411)
|
3
|
+
- Add options to PDF::Reader::Page#text to make some behaviour configurable (http://github.com/yob/pdf-reader/pull/411)
|
4
|
+
- including extracting the text for only part of the page
|
5
|
+
- Improve text positioning and extraction for Type3 fonts (http://github.com/yob/pdf-reader/pull/412)
|
6
|
+
- Skip extracting text that is positioned outside the page (http://github.com/yob/pdf-reader/pull/413)
|
7
|
+
- Fix occasional crash when reading some streams (http://github.com/yob/pdf-reader/pull/405)
|
8
|
+
|
1
9
|
v2.7.0 (13th December 2021)
|
2
10
|
- Include RBI type files in the gem
|
3
11
|
- Downstream users of pdf-reader who also use sorbet *should* find many parts of the API will
|
data/Rakefile
CHANGED
@@ -14,7 +14,7 @@ desc "Run cane to check quality metrics"
|
|
14
14
|
Cane::RakeTask.new(:quality) do |cane|
|
15
15
|
cane.abc_max = 20
|
16
16
|
cane.style_measure = 100
|
17
|
-
cane.max_violations =
|
17
|
+
cane.max_violations = 28
|
18
18
|
|
19
19
|
cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
|
20
20
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
class PDF::Reader
|
6
|
+
|
7
|
+
# Filter our text/characters that are positioned outside a rectangle. Usually the page
|
8
|
+
# MediaBox or CropBox, but could be a user specified rectangle too
|
9
|
+
class BoundingRectangleRunsFilter
|
10
|
+
|
11
|
+
def self.runs_within_rect(runs, rect)
|
12
|
+
runs.select { |run| rect.contains?(run.origin) }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -43,6 +43,7 @@ class PDF::Reader
|
|
43
43
|
@tounicode = nil
|
44
44
|
|
45
45
|
extract_base_info(obj)
|
46
|
+
extract_type3_info(obj)
|
46
47
|
extract_descriptor(obj)
|
47
48
|
extract_descendants(obj)
|
48
49
|
@width_calc = build_width_calculator
|
@@ -73,8 +74,44 @@ class PDF::Reader
|
|
73
74
|
@cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
|
74
75
|
end
|
75
76
|
|
77
|
+
# In most cases glyph width is converted into text space with a simple divide by 1000.
|
78
|
+
#
|
79
|
+
# However, Type3 fonts provide their own FontMatrix that's used for the transformation.
|
80
|
+
#
|
81
|
+
def glyph_width_in_text_space(code_point)
|
82
|
+
glyph_width_in_glyph_space = glyph_width(code_point)
|
83
|
+
|
84
|
+
if @subtype == :Type3
|
85
|
+
x1, y1 = font_matrix_transform(0,0)
|
86
|
+
x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
|
87
|
+
(x2 - x1).abs.round(2)
|
88
|
+
else
|
89
|
+
glyph_width_in_glyph_space / 1000.0
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
76
93
|
private
|
77
94
|
|
95
|
+
# Only valid for Type3 fonts
|
96
|
+
def font_matrix_transform(x, y)
|
97
|
+
return x, y if @font_matrix.nil?
|
98
|
+
|
99
|
+
matrix = TransformationMatrix.new(
|
100
|
+
@font_matrix[0], @font_matrix[1],
|
101
|
+
@font_matrix[2], @font_matrix[3],
|
102
|
+
@font_matrix[4], @font_matrix[5],
|
103
|
+
)
|
104
|
+
|
105
|
+
if x == 0 && y == 0
|
106
|
+
[matrix.e, matrix.f]
|
107
|
+
else
|
108
|
+
[
|
109
|
+
(matrix.a * x) + (matrix.c * y) + (matrix.e),
|
110
|
+
(matrix.b * x) + (matrix.d * y) + (matrix.f)
|
111
|
+
]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
78
115
|
def default_encoding(font_name)
|
79
116
|
case font_name.to_s
|
80
117
|
when "Symbol" then
|
@@ -138,6 +175,12 @@ class PDF::Reader
|
|
138
175
|
end
|
139
176
|
end
|
140
177
|
|
178
|
+
def extract_type3_info(obj)
|
179
|
+
if @subtype == :Type3
|
180
|
+
@font_matrix = @ohash.object(obj[:FontMatrix]) || [ 0.001, 0, 0, 0.001, 0, 0 ]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
141
184
|
def extract_descriptor(obj)
|
142
185
|
if obj[:FontDescriptor]
|
143
186
|
# create a font descriptor object if we can, in other words, unless this is
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -101,13 +101,24 @@ module PDF
|
|
101
101
|
# returns the plain text content of this page encoded as UTF-8. Any
|
102
102
|
# characters that can't be translated will be returned as a ▯
|
103
103
|
#
|
104
|
-
def text
|
104
|
+
def text(opts = {})
|
105
105
|
receiver = PageTextReceiver.new
|
106
106
|
walk(receiver)
|
107
|
-
receiver.
|
107
|
+
runs = receiver.runs(opts)
|
108
|
+
|
109
|
+
# rectangles[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
|
110
|
+
mediabox = rectangles[:MediaBox] || Rectangle.new(0, 0, 0, 0)
|
111
|
+
|
112
|
+
PageLayout.new(runs, mediabox).to_s
|
108
113
|
end
|
109
114
|
alias :to_s :text
|
110
115
|
|
116
|
+
def runs(opts = {})
|
117
|
+
receiver = PageTextReceiver.new
|
118
|
+
walk(receiver)
|
119
|
+
receiver.runs(opts)
|
120
|
+
end
|
121
|
+
|
111
122
|
# processes the raw content stream for this page in sequential order and
|
112
123
|
# passes callbacks to the receiver objects.
|
113
124
|
#
|
@@ -21,10 +21,8 @@ class PDF::Reader
|
|
21
21
|
# PDF::Reader::Rectangle at some point
|
22
22
|
PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")
|
23
23
|
|
24
|
-
|
25
|
-
runs =
|
26
|
-
@mediabox = mediabox
|
27
|
-
@runs = merge_runs(runs)
|
24
|
+
@mediabox = process_mediabox(mediabox)
|
25
|
+
@runs = runs
|
28
26
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
29
27
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
30
28
|
@median_glyph_width = median(@runs.map(&:mean_character_width)) || 0
|
@@ -51,13 +49,11 @@ class PDF::Reader
|
|
51
49
|
private
|
52
50
|
|
53
51
|
def page_width
|
54
|
-
|
55
|
-
(@mediabox[2].to_f - @mediabox[0].to_f).abs
|
52
|
+
@mediabox.width
|
56
53
|
end
|
57
54
|
|
58
55
|
def page_height
|
59
|
-
|
60
|
-
(@mediabox[3].to_f - @mediabox[1].to_f).abs
|
56
|
+
@mediabox.height
|
61
57
|
end
|
62
58
|
|
63
59
|
# given an array of strings, return a new array with empty rows from the
|
@@ -109,30 +105,20 @@ class PDF::Reader
|
|
109
105
|
end
|
110
106
|
end
|
111
107
|
|
112
|
-
|
113
|
-
|
114
|
-
def merge_runs(runs)
|
115
|
-
runs.group_by { |char|
|
116
|
-
char.y.to_i
|
117
|
-
}.map { |y, chars|
|
118
|
-
group_chars_into_runs(chars.sort)
|
119
|
-
}.flatten.sort
|
108
|
+
def local_string_insert(haystack, needle, index)
|
109
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
120
110
|
end
|
121
111
|
|
122
|
-
def
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
end
|
112
|
+
def process_mediabox(mediabox)
|
113
|
+
if mediabox.is_a?(Array)
|
114
|
+
msg = "Passing the mediabox to PageLayout as an Array is deprecated," +
|
115
|
+
" please use a Rectangle instead"
|
116
|
+
$stderr.puts msg
|
117
|
+
PDF::Reader::Rectangle.from_array(mediabox)
|
118
|
+
else
|
119
|
+
mediabox
|
131
120
|
end
|
132
121
|
end
|
133
122
|
|
134
|
-
def local_string_insert(haystack, needle, index)
|
135
|
-
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
136
|
-
end
|
137
123
|
end
|
138
124
|
end
|
@@ -47,9 +47,32 @@ module PDF
|
|
47
47
|
@characters = []
|
48
48
|
end
|
49
49
|
|
50
|
+
def runs(opts = {})
|
51
|
+
runs = @characters
|
52
|
+
|
53
|
+
if rect = opts.fetch(:rect, @page.rectangles[:CropBox])
|
54
|
+
runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect)
|
55
|
+
end
|
56
|
+
|
57
|
+
if opts.fetch(:skip_zero_width, true)
|
58
|
+
runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs)
|
59
|
+
end
|
60
|
+
|
61
|
+
if opts.fetch(:skip_overlapping, true)
|
62
|
+
runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
|
63
|
+
end
|
64
|
+
|
65
|
+
if opts.fetch(:merge, true)
|
66
|
+
runs = merge_runs(runs)
|
67
|
+
end
|
68
|
+
|
69
|
+
runs
|
70
|
+
end
|
71
|
+
|
72
|
+
# deprecated
|
50
73
|
def content
|
51
|
-
mediabox = @page.rectangles[:MediaBox]
|
52
|
-
PageLayout.new(
|
74
|
+
mediabox = @page.rectangles[:MediaBox]
|
75
|
+
PageLayout.new(runs, mediabox).to_s
|
53
76
|
end
|
54
77
|
|
55
78
|
#####################################################
|
@@ -109,7 +132,7 @@ module PDF
|
|
109
132
|
|
110
133
|
# apply to glyph displacment for the current glyph so the next
|
111
134
|
# glyph will appear in the correct position
|
112
|
-
glyph_width = @state.current_font.
|
135
|
+
glyph_width = @state.current_font.glyph_width_in_text_space(glyph_code)
|
113
136
|
th = 1
|
114
137
|
scaled_glyph_width = glyph_width * @state.font_size * th
|
115
138
|
unless utf8_chars == SPACE
|
@@ -119,12 +142,6 @@ module PDF
|
|
119
142
|
end
|
120
143
|
end
|
121
144
|
|
122
|
-
# TODO: revist this. It rotates the co-ordinates to the right direction, but I don't
|
123
|
-
# think it sets the correct x,y values. We get away with it because we don't
|
124
|
-
# return the text with co-ordinates, only the full text arranged in a string.
|
125
|
-
#
|
126
|
-
# We should provide an API for extracting the text with positioning data and spec
|
127
|
-
# that. I suspect the co-ords might be wrong for rotated pages
|
128
145
|
def apply_rotation(x, y)
|
129
146
|
if @page.rotate == 90
|
130
147
|
tmp = x
|
@@ -141,6 +158,28 @@ module PDF
|
|
141
158
|
return x, y
|
142
159
|
end
|
143
160
|
|
161
|
+
# take a collection of TextRun objects and merge any that are in close
|
162
|
+
# proximity
|
163
|
+
def merge_runs(runs)
|
164
|
+
runs.group_by { |char|
|
165
|
+
char.y.to_i
|
166
|
+
}.map { |y, chars|
|
167
|
+
group_chars_into_runs(chars.sort)
|
168
|
+
}.flatten.sort
|
169
|
+
end
|
170
|
+
|
171
|
+
def group_chars_into_runs(chars)
|
172
|
+
chars.each_with_object([]) do |char, runs|
|
173
|
+
if runs.empty?
|
174
|
+
runs << char
|
175
|
+
elsif runs.last.mergable?(char)
|
176
|
+
runs[-1] = runs.last + char
|
177
|
+
else
|
178
|
+
runs << char
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
144
183
|
end
|
145
184
|
end
|
146
185
|
end
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -210,6 +210,9 @@ class PDF::Reader
|
|
210
210
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
211
211
|
if @objects
|
212
212
|
length = @objects.deref(dict[:Length])
|
213
|
+
if dict[:Filter]
|
214
|
+
dict[:Filter] = @objects.deref(dict[:Filter])
|
215
|
+
end
|
213
216
|
else
|
214
217
|
length = dict[:Length] || 0
|
215
218
|
end
|
data/lib/pdf/reader/rectangle.rb
CHANGED
@@ -26,6 +26,19 @@ module PDF
|
|
26
26
|
set_corners(x1, y1, x2, y2)
|
27
27
|
end
|
28
28
|
|
29
|
+
def self.from_array(arr)
|
30
|
+
if arr.size != 4
|
31
|
+
raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
|
32
|
+
end
|
33
|
+
|
34
|
+
PDF::Reader::Rectangle.new(
|
35
|
+
arr[0].to_f,
|
36
|
+
arr[1].to_f,
|
37
|
+
arr[2].to_f,
|
38
|
+
arr[3].to_f,
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
29
42
|
def ==(other)
|
30
43
|
to_a == other.to_a
|
31
44
|
end
|
@@ -38,6 +51,11 @@ module PDF
|
|
38
51
|
bottom_right.x - bottom_left.x
|
39
52
|
end
|
40
53
|
|
54
|
+
def contains?(point)
|
55
|
+
point.x >= bottom_left.x && point.x <= top_right.x &&
|
56
|
+
point.y >= bottom_left.y && point.y <= top_right.y
|
57
|
+
end
|
58
|
+
|
41
59
|
# A pdf-style 4-number array
|
42
60
|
def to_a
|
43
61
|
[
|
data/lib/pdf/reader/text_run.rb
CHANGED
@@ -7,15 +7,14 @@ class PDF::Reader
|
|
7
7
|
class TextRun
|
8
8
|
include Comparable
|
9
9
|
|
10
|
-
attr_reader :
|
10
|
+
attr_reader :origin, :width, :font_size, :text
|
11
11
|
|
12
12
|
alias :to_s :text
|
13
13
|
|
14
14
|
def initialize(x, y, width, font_size, text)
|
15
|
-
@
|
16
|
-
@y = y
|
15
|
+
@origin = PDF::Reader::Point.new(x, y)
|
17
16
|
@width = width
|
18
|
-
@font_size = font_size
|
17
|
+
@font_size = font_size
|
19
18
|
@text = text
|
20
19
|
end
|
21
20
|
|
@@ -35,12 +34,20 @@ class PDF::Reader
|
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
37
|
+
def x
|
38
|
+
@origin.x
|
39
|
+
end
|
40
|
+
|
41
|
+
def y
|
42
|
+
@origin.y
|
43
|
+
end
|
44
|
+
|
38
45
|
def endx
|
39
|
-
@endx ||= x + width
|
46
|
+
@endx ||= @origin.x + width
|
40
47
|
end
|
41
48
|
|
42
49
|
def endy
|
43
|
-
@endy ||= y + font_size
|
50
|
+
@endy ||= @origin.y + font_size
|
44
51
|
end
|
45
52
|
|
46
53
|
def mean_character_width
|
data/lib/pdf/reader.rb
CHANGED
@@ -112,17 +112,25 @@ module PDF
|
|
112
112
|
#
|
113
113
|
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
114
114
|
#
|
115
|
+
# Using this method directly is supported, but it's more common to use
|
116
|
+
# `PDF::Reader.open`
|
117
|
+
#
|
115
118
|
def initialize(input, opts = {})
|
116
119
|
@cache = PDF::Reader::ObjectCache.new
|
117
120
|
opts.merge!(:cache => @cache)
|
118
121
|
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
119
122
|
end
|
120
123
|
|
124
|
+
# Return a Hash with some basic information about the PDF file
|
125
|
+
#
|
121
126
|
def info
|
122
127
|
dict = @objects.deref(@objects.trailer[:Info])
|
123
128
|
doc_strings_to_utf8(dict)
|
124
129
|
end
|
125
130
|
|
131
|
+
# Return a Hash with extra metadata provided by the author of the PDF file. Not
|
132
|
+
# always present.
|
133
|
+
#
|
126
134
|
def metadata
|
127
135
|
stream = @objects.deref(root[:Metadata])
|
128
136
|
if stream.nil?
|
@@ -134,6 +142,8 @@ module PDF
|
|
134
142
|
end
|
135
143
|
end
|
136
144
|
|
145
|
+
# To number of pages in this PDF
|
146
|
+
#
|
137
147
|
def page_count
|
138
148
|
pages = @objects.deref(root[:Pages])
|
139
149
|
unless pages.kind_of?(::Hash)
|
@@ -142,12 +152,14 @@ module PDF
|
|
142
152
|
@page_count ||= @objects.deref(pages[:Count])
|
143
153
|
end
|
144
154
|
|
155
|
+
# The PDF version this file uses
|
156
|
+
#
|
145
157
|
def pdf_version
|
146
158
|
@objects.pdf_version
|
147
159
|
end
|
148
160
|
|
149
|
-
# syntactic sugar for opening a PDF file. Accepts the
|
150
|
-
# as new().
|
161
|
+
# syntactic sugar for opening a PDF file and the most common approach. Accepts the
|
162
|
+
# same arguments as new().
|
151
163
|
#
|
152
164
|
# PDF::Reader.open("somefile.pdf") do |reader|
|
153
165
|
# puts reader.pdf_version
|
@@ -273,6 +285,7 @@ end
|
|
273
285
|
|
274
286
|
require 'pdf/reader/resource_methods'
|
275
287
|
require 'pdf/reader/buffer'
|
288
|
+
require 'pdf/reader/bounding_rectangle_runs_filter'
|
276
289
|
require 'pdf/reader/cid_widths'
|
277
290
|
require 'pdf/reader/cmap'
|
278
291
|
require 'pdf/reader/encoding'
|
data/rbi/pdf-reader.rbi
CHANGED
@@ -43,6 +43,13 @@ module PDF
|
|
43
43
|
sig { returns(T::Hash[Symbol, T.untyped]) }
|
44
44
|
def root; end
|
45
45
|
|
46
|
+
class BoundingRectangleRunsFilter
|
47
|
+
extend T::Sig
|
48
|
+
|
49
|
+
sig { params(runs: T::Array[PDF::Reader::TextRun], rect: PDF::Reader::Rectangle).returns(T::Array[PDF::Reader::TextRun]) }
|
50
|
+
def self.runs_within_rect(runs, rect); end
|
51
|
+
end
|
52
|
+
|
46
53
|
class Buffer
|
47
54
|
TOKEN_WHITESPACE = [0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20]
|
48
55
|
TOKEN_DELIMITER = [0x25, 0x3C, 0x3E, 0x28, 0x5B, 0x7B, 0x29, 0x5D, 0x7D, 0x2F]
|
@@ -750,8 +757,11 @@ module PDF
|
|
750
757
|
sig { returns(T::Array[Numeric]) }
|
751
758
|
def origin; end
|
752
759
|
|
753
|
-
sig { returns(
|
754
|
-
def
|
760
|
+
sig { params(opts: T::Hash[Symbol, T.untyped]).returns(T::Array[PDF::Reader::TextRun]) }
|
761
|
+
def runs(opts = {}); end
|
762
|
+
|
763
|
+
sig { params(opts: T::Hash[Symbol, T.untyped]).returns(String) }
|
764
|
+
def text(opts = {}); end
|
755
765
|
|
756
766
|
sig { params(receivers: T.untyped).void }
|
757
767
|
def walk(*receivers); end
|
@@ -794,7 +804,7 @@ module PDF
|
|
794
804
|
extend T::Sig
|
795
805
|
DEFAULT_FONT_SIZE = 12
|
796
806
|
|
797
|
-
sig { params(runs: T::Array[PDF::Reader::TextRun], mediabox: T::Array[Numeric]).void }
|
807
|
+
sig { params(runs: T::Array[PDF::Reader::TextRun], mediabox: T.any(T::Array[Numeric], PDF::Reader::Rectangle)).void }
|
798
808
|
def initialize(runs, mediabox); end
|
799
809
|
|
800
810
|
sig { returns(String) }
|
@@ -829,6 +839,9 @@ module PDF
|
|
829
839
|
|
830
840
|
sig { params(haystack: T.untyped, needle: T.untyped, index: T.untyped).returns(T.untyped) }
|
831
841
|
def local_string_insert(haystack, needle, index); end
|
842
|
+
|
843
|
+
sig { params(mediabox: T.untyped).returns(T.untyped) }
|
844
|
+
def process_mediabox(mediabox); end
|
832
845
|
end
|
833
846
|
|
834
847
|
class PageState
|
@@ -996,6 +1009,9 @@ module PDF
|
|
996
1009
|
sig { params(str: T.untyped).returns(T.untyped) }
|
997
1010
|
def move_to_next_line_and_show_text(str); end
|
998
1011
|
|
1012
|
+
sig { params(opts: T::Hash[Symbol, T.untyped]).returns(T::Array[PDF::Reader::TextRun]) }
|
1013
|
+
def runs(opts = {}); end
|
1014
|
+
|
999
1015
|
sig { params(aw: T.untyped, ac: T.untyped, string: T.untyped).returns(T.untyped) }
|
1000
1016
|
def set_spacing_next_line_show_text(aw, ac, string); end
|
1001
1017
|
|
@@ -1122,6 +1138,9 @@ module PDF
|
|
1122
1138
|
end
|
1123
1139
|
|
1124
1140
|
class Rectangle
|
1141
|
+
sig { params(arr: T::Array[Numeric]).returns(PDF::Reader::Rectangle) }
|
1142
|
+
def self.from_array(arr); end
|
1143
|
+
|
1125
1144
|
sig do
|
1126
1145
|
params(
|
1127
1146
|
x1: Numeric,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-12-
|
11
|
+
date: 2021-12-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -215,6 +215,7 @@ files:
|
|
215
215
|
- lib/pdf/reader/afm/Times-Italic.afm
|
216
216
|
- lib/pdf/reader/afm/Times-Roman.afm
|
217
217
|
- lib/pdf/reader/afm/ZapfDingbats.afm
|
218
|
+
- lib/pdf/reader/bounding_rectangle_runs_filter.rb
|
218
219
|
- lib/pdf/reader/buffer.rb
|
219
220
|
- lib/pdf/reader/cid_widths.rb
|
220
221
|
- lib/pdf/reader/cmap.rb
|
@@ -281,9 +282,9 @@ licenses:
|
|
281
282
|
- MIT
|
282
283
|
metadata:
|
283
284
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
284
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
285
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
286
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
285
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.8.0/CHANGELOG
|
286
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.8.0
|
287
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.8.0
|
287
288
|
post_install_message:
|
288
289
|
rdoc_options:
|
289
290
|
- "--title"
|