pdf-reader 2.7.0 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +8 -0
- data/Rakefile +1 -1
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/font.rb +43 -0
- data/lib/pdf/reader/page.rb +13 -2
- data/lib/pdf/reader/page_layout.rb +14 -28
- data/lib/pdf/reader/page_text_receiver.rb +48 -9
- data/lib/pdf/reader/parser.rb +3 -0
- data/lib/pdf/reader/rectangle.rb +18 -0
- data/lib/pdf/reader/text_run.rb +13 -6
- data/lib/pdf/reader.rb +15 -2
- data/rbi/pdf-reader.rbi +22 -3
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6182ffd59631afba6a2c234547a428382b1ec2d7b414d89830b1143f1a0e1704
|
4
|
+
data.tar.gz: 6c0e6a7d32cf24912edc3aa96d72b7f70497d2fdd0e0913b86f871bbf9fa104f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 42dafbe0c36ce838da4c3120bf2187efde647e486971896d9a9c59c37dac3da0f2ccf3ecd98d8dd1d3acc5404bfcf26e64a327d7797648646afd6b40be02fec2
|
7
|
+
data.tar.gz: 40f0b0958024b558d6aca7eb2b3b6f042f034059c8fca52ce97fab7d55a39c313797605341331c65efd1099a1310ccbe386c354024dbd3cbc61c1d96c423842d
|
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
v2.8.0 (28th Decemeber 2021)
|
2
|
+
- Add PDF::Reader::Page#runs for extracting text from a page with positioning metadata (http://github.com/yob/pdf-reader/pull/411)
|
3
|
+
- Add options to PDF::Reader::Page#text to make some behaviour configurable (http://github.com/yob/pdf-reader/pull/411)
|
4
|
+
- including extracting the text for only part of the page
|
5
|
+
- Improve text positioning and extraction for Type3 fonts (http://github.com/yob/pdf-reader/pull/412)
|
6
|
+
- Skip extracting text that is positioned outside the page (http://github.com/yob/pdf-reader/pull/413)
|
7
|
+
- Fix occasional crash when reading some streams (http://github.com/yob/pdf-reader/pull/405)
|
8
|
+
|
1
9
|
v2.7.0 (13th December 2021)
|
2
10
|
- Include RBI type files in the gem
|
3
11
|
- Downstream users of pdf-reader who also use sorbet *should* find many parts of the API will
|
data/Rakefile
CHANGED
@@ -14,7 +14,7 @@ desc "Run cane to check quality metrics"
|
|
14
14
|
Cane::RakeTask.new(:quality) do |cane|
|
15
15
|
cane.abc_max = 20
|
16
16
|
cane.style_measure = 100
|
17
|
-
cane.max_violations =
|
17
|
+
cane.max_violations = 28
|
18
18
|
|
19
19
|
cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
|
20
20
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
class PDF::Reader
|
6
|
+
|
7
|
+
# Filter our text/characters that are positioned outside a rectangle. Usually the page
|
8
|
+
# MediaBox or CropBox, but could be a user specified rectangle too
|
9
|
+
class BoundingRectangleRunsFilter
|
10
|
+
|
11
|
+
def self.runs_within_rect(runs, rect)
|
12
|
+
runs.select { |run| rect.contains?(run.origin) }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -43,6 +43,7 @@ class PDF::Reader
|
|
43
43
|
@tounicode = nil
|
44
44
|
|
45
45
|
extract_base_info(obj)
|
46
|
+
extract_type3_info(obj)
|
46
47
|
extract_descriptor(obj)
|
47
48
|
extract_descendants(obj)
|
48
49
|
@width_calc = build_width_calculator
|
@@ -73,8 +74,44 @@ class PDF::Reader
|
|
73
74
|
@cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
|
74
75
|
end
|
75
76
|
|
77
|
+
# In most cases glyph width is converted into text space with a simple divide by 1000.
|
78
|
+
#
|
79
|
+
# However, Type3 fonts provide their own FontMatrix that's used for the transformation.
|
80
|
+
#
|
81
|
+
def glyph_width_in_text_space(code_point)
|
82
|
+
glyph_width_in_glyph_space = glyph_width(code_point)
|
83
|
+
|
84
|
+
if @subtype == :Type3
|
85
|
+
x1, y1 = font_matrix_transform(0,0)
|
86
|
+
x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
|
87
|
+
(x2 - x1).abs.round(2)
|
88
|
+
else
|
89
|
+
glyph_width_in_glyph_space / 1000.0
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
76
93
|
private
|
77
94
|
|
95
|
+
# Only valid for Type3 fonts
|
96
|
+
def font_matrix_transform(x, y)
|
97
|
+
return x, y if @font_matrix.nil?
|
98
|
+
|
99
|
+
matrix = TransformationMatrix.new(
|
100
|
+
@font_matrix[0], @font_matrix[1],
|
101
|
+
@font_matrix[2], @font_matrix[3],
|
102
|
+
@font_matrix[4], @font_matrix[5],
|
103
|
+
)
|
104
|
+
|
105
|
+
if x == 0 && y == 0
|
106
|
+
[matrix.e, matrix.f]
|
107
|
+
else
|
108
|
+
[
|
109
|
+
(matrix.a * x) + (matrix.c * y) + (matrix.e),
|
110
|
+
(matrix.b * x) + (matrix.d * y) + (matrix.f)
|
111
|
+
]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
78
115
|
def default_encoding(font_name)
|
79
116
|
case font_name.to_s
|
80
117
|
when "Symbol" then
|
@@ -138,6 +175,12 @@ class PDF::Reader
|
|
138
175
|
end
|
139
176
|
end
|
140
177
|
|
178
|
+
def extract_type3_info(obj)
|
179
|
+
if @subtype == :Type3
|
180
|
+
@font_matrix = @ohash.object(obj[:FontMatrix]) || [ 0.001, 0, 0, 0.001, 0, 0 ]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
141
184
|
def extract_descriptor(obj)
|
142
185
|
if obj[:FontDescriptor]
|
143
186
|
# create a font descriptor object if we can, in other words, unless this is
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -101,13 +101,24 @@ module PDF
|
|
101
101
|
# returns the plain text content of this page encoded as UTF-8. Any
|
102
102
|
# characters that can't be translated will be returned as a ▯
|
103
103
|
#
|
104
|
-
def text
|
104
|
+
def text(opts = {})
|
105
105
|
receiver = PageTextReceiver.new
|
106
106
|
walk(receiver)
|
107
|
-
receiver.
|
107
|
+
runs = receiver.runs(opts)
|
108
|
+
|
109
|
+
# rectangles[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
|
110
|
+
mediabox = rectangles[:MediaBox] || Rectangle.new(0, 0, 0, 0)
|
111
|
+
|
112
|
+
PageLayout.new(runs, mediabox).to_s
|
108
113
|
end
|
109
114
|
alias :to_s :text
|
110
115
|
|
116
|
+
def runs(opts = {})
|
117
|
+
receiver = PageTextReceiver.new
|
118
|
+
walk(receiver)
|
119
|
+
receiver.runs(opts)
|
120
|
+
end
|
121
|
+
|
111
122
|
# processes the raw content stream for this page in sequential order and
|
112
123
|
# passes callbacks to the receiver objects.
|
113
124
|
#
|
@@ -21,10 +21,8 @@ class PDF::Reader
|
|
21
21
|
# PDF::Reader::Rectangle at some point
|
22
22
|
PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")
|
23
23
|
|
24
|
-
|
25
|
-
runs =
|
26
|
-
@mediabox = mediabox
|
27
|
-
@runs = merge_runs(runs)
|
24
|
+
@mediabox = process_mediabox(mediabox)
|
25
|
+
@runs = runs
|
28
26
|
@mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
|
29
27
|
@mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
|
30
28
|
@median_glyph_width = median(@runs.map(&:mean_character_width)) || 0
|
@@ -51,13 +49,11 @@ class PDF::Reader
|
|
51
49
|
private
|
52
50
|
|
53
51
|
def page_width
|
54
|
-
|
55
|
-
(@mediabox[2].to_f - @mediabox[0].to_f).abs
|
52
|
+
@mediabox.width
|
56
53
|
end
|
57
54
|
|
58
55
|
def page_height
|
59
|
-
|
60
|
-
(@mediabox[3].to_f - @mediabox[1].to_f).abs
|
56
|
+
@mediabox.height
|
61
57
|
end
|
62
58
|
|
63
59
|
# given an array of strings, return a new array with empty rows from the
|
@@ -109,30 +105,20 @@ class PDF::Reader
|
|
109
105
|
end
|
110
106
|
end
|
111
107
|
|
112
|
-
|
113
|
-
|
114
|
-
def merge_runs(runs)
|
115
|
-
runs.group_by { |char|
|
116
|
-
char.y.to_i
|
117
|
-
}.map { |y, chars|
|
118
|
-
group_chars_into_runs(chars.sort)
|
119
|
-
}.flatten.sort
|
108
|
+
def local_string_insert(haystack, needle, index)
|
109
|
+
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
120
110
|
end
|
121
111
|
|
122
|
-
def
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
end
|
112
|
+
def process_mediabox(mediabox)
|
113
|
+
if mediabox.is_a?(Array)
|
114
|
+
msg = "Passing the mediabox to PageLayout as an Array is deprecated," +
|
115
|
+
" please use a Rectangle instead"
|
116
|
+
$stderr.puts msg
|
117
|
+
PDF::Reader::Rectangle.from_array(mediabox)
|
118
|
+
else
|
119
|
+
mediabox
|
131
120
|
end
|
132
121
|
end
|
133
122
|
|
134
|
-
def local_string_insert(haystack, needle, index)
|
135
|
-
haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
|
136
|
-
end
|
137
123
|
end
|
138
124
|
end
|
@@ -47,9 +47,32 @@ module PDF
|
|
47
47
|
@characters = []
|
48
48
|
end
|
49
49
|
|
50
|
+
def runs(opts = {})
|
51
|
+
runs = @characters
|
52
|
+
|
53
|
+
if rect = opts.fetch(:rect, @page.rectangles[:CropBox])
|
54
|
+
runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect)
|
55
|
+
end
|
56
|
+
|
57
|
+
if opts.fetch(:skip_zero_width, true)
|
58
|
+
runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs)
|
59
|
+
end
|
60
|
+
|
61
|
+
if opts.fetch(:skip_overlapping, true)
|
62
|
+
runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
|
63
|
+
end
|
64
|
+
|
65
|
+
if opts.fetch(:merge, true)
|
66
|
+
runs = merge_runs(runs)
|
67
|
+
end
|
68
|
+
|
69
|
+
runs
|
70
|
+
end
|
71
|
+
|
72
|
+
# deprecated
|
50
73
|
def content
|
51
|
-
mediabox = @page.rectangles[:MediaBox]
|
52
|
-
PageLayout.new(
|
74
|
+
mediabox = @page.rectangles[:MediaBox]
|
75
|
+
PageLayout.new(runs, mediabox).to_s
|
53
76
|
end
|
54
77
|
|
55
78
|
#####################################################
|
@@ -109,7 +132,7 @@ module PDF
|
|
109
132
|
|
110
133
|
# apply to glyph displacment for the current glyph so the next
|
111
134
|
# glyph will appear in the correct position
|
112
|
-
glyph_width = @state.current_font.
|
135
|
+
glyph_width = @state.current_font.glyph_width_in_text_space(glyph_code)
|
113
136
|
th = 1
|
114
137
|
scaled_glyph_width = glyph_width * @state.font_size * th
|
115
138
|
unless utf8_chars == SPACE
|
@@ -119,12 +142,6 @@ module PDF
|
|
119
142
|
end
|
120
143
|
end
|
121
144
|
|
122
|
-
# TODO: revist this. It rotates the co-ordinates to the right direction, but I don't
|
123
|
-
# think it sets the correct x,y values. We get away with it because we don't
|
124
|
-
# return the text with co-ordinates, only the full text arranged in a string.
|
125
|
-
#
|
126
|
-
# We should provide an API for extracting the text with positioning data and spec
|
127
|
-
# that. I suspect the co-ords might be wrong for rotated pages
|
128
145
|
def apply_rotation(x, y)
|
129
146
|
if @page.rotate == 90
|
130
147
|
tmp = x
|
@@ -141,6 +158,28 @@ module PDF
|
|
141
158
|
return x, y
|
142
159
|
end
|
143
160
|
|
161
|
+
# take a collection of TextRun objects and merge any that are in close
|
162
|
+
# proximity
|
163
|
+
def merge_runs(runs)
|
164
|
+
runs.group_by { |char|
|
165
|
+
char.y.to_i
|
166
|
+
}.map { |y, chars|
|
167
|
+
group_chars_into_runs(chars.sort)
|
168
|
+
}.flatten.sort
|
169
|
+
end
|
170
|
+
|
171
|
+
def group_chars_into_runs(chars)
|
172
|
+
chars.each_with_object([]) do |char, runs|
|
173
|
+
if runs.empty?
|
174
|
+
runs << char
|
175
|
+
elsif runs.last.mergable?(char)
|
176
|
+
runs[-1] = runs.last + char
|
177
|
+
else
|
178
|
+
runs << char
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
144
183
|
end
|
145
184
|
end
|
146
185
|
end
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -210,6 +210,9 @@ class PDF::Reader
|
|
210
210
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
211
211
|
if @objects
|
212
212
|
length = @objects.deref(dict[:Length])
|
213
|
+
if dict[:Filter]
|
214
|
+
dict[:Filter] = @objects.deref(dict[:Filter])
|
215
|
+
end
|
213
216
|
else
|
214
217
|
length = dict[:Length] || 0
|
215
218
|
end
|
data/lib/pdf/reader/rectangle.rb
CHANGED
@@ -26,6 +26,19 @@ module PDF
|
|
26
26
|
set_corners(x1, y1, x2, y2)
|
27
27
|
end
|
28
28
|
|
29
|
+
def self.from_array(arr)
|
30
|
+
if arr.size != 4
|
31
|
+
raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
|
32
|
+
end
|
33
|
+
|
34
|
+
PDF::Reader::Rectangle.new(
|
35
|
+
arr[0].to_f,
|
36
|
+
arr[1].to_f,
|
37
|
+
arr[2].to_f,
|
38
|
+
arr[3].to_f,
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
29
42
|
def ==(other)
|
30
43
|
to_a == other.to_a
|
31
44
|
end
|
@@ -38,6 +51,11 @@ module PDF
|
|
38
51
|
bottom_right.x - bottom_left.x
|
39
52
|
end
|
40
53
|
|
54
|
+
def contains?(point)
|
55
|
+
point.x >= bottom_left.x && point.x <= top_right.x &&
|
56
|
+
point.y >= bottom_left.y && point.y <= top_right.y
|
57
|
+
end
|
58
|
+
|
41
59
|
# A pdf-style 4-number array
|
42
60
|
def to_a
|
43
61
|
[
|
data/lib/pdf/reader/text_run.rb
CHANGED
@@ -7,15 +7,14 @@ class PDF::Reader
|
|
7
7
|
class TextRun
|
8
8
|
include Comparable
|
9
9
|
|
10
|
-
attr_reader :
|
10
|
+
attr_reader :origin, :width, :font_size, :text
|
11
11
|
|
12
12
|
alias :to_s :text
|
13
13
|
|
14
14
|
def initialize(x, y, width, font_size, text)
|
15
|
-
@
|
16
|
-
@y = y
|
15
|
+
@origin = PDF::Reader::Point.new(x, y)
|
17
16
|
@width = width
|
18
|
-
@font_size = font_size
|
17
|
+
@font_size = font_size
|
19
18
|
@text = text
|
20
19
|
end
|
21
20
|
|
@@ -35,12 +34,20 @@ class PDF::Reader
|
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
37
|
+
def x
|
38
|
+
@origin.x
|
39
|
+
end
|
40
|
+
|
41
|
+
def y
|
42
|
+
@origin.y
|
43
|
+
end
|
44
|
+
|
38
45
|
def endx
|
39
|
-
@endx ||= x + width
|
46
|
+
@endx ||= @origin.x + width
|
40
47
|
end
|
41
48
|
|
42
49
|
def endy
|
43
|
-
@endy ||= y + font_size
|
50
|
+
@endy ||= @origin.y + font_size
|
44
51
|
end
|
45
52
|
|
46
53
|
def mean_character_width
|
data/lib/pdf/reader.rb
CHANGED
@@ -112,17 +112,25 @@ module PDF
|
|
112
112
|
#
|
113
113
|
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
114
114
|
#
|
115
|
+
# Using this method directly is supported, but it's more common to use
|
116
|
+
# `PDF::Reader.open`
|
117
|
+
#
|
115
118
|
def initialize(input, opts = {})
|
116
119
|
@cache = PDF::Reader::ObjectCache.new
|
117
120
|
opts.merge!(:cache => @cache)
|
118
121
|
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
119
122
|
end
|
120
123
|
|
124
|
+
# Return a Hash with some basic information about the PDF file
|
125
|
+
#
|
121
126
|
def info
|
122
127
|
dict = @objects.deref(@objects.trailer[:Info])
|
123
128
|
doc_strings_to_utf8(dict)
|
124
129
|
end
|
125
130
|
|
131
|
+
# Return a Hash with extra metadata provided by the author of the PDF file. Not
|
132
|
+
# always present.
|
133
|
+
#
|
126
134
|
def metadata
|
127
135
|
stream = @objects.deref(root[:Metadata])
|
128
136
|
if stream.nil?
|
@@ -134,6 +142,8 @@ module PDF
|
|
134
142
|
end
|
135
143
|
end
|
136
144
|
|
145
|
+
# To number of pages in this PDF
|
146
|
+
#
|
137
147
|
def page_count
|
138
148
|
pages = @objects.deref(root[:Pages])
|
139
149
|
unless pages.kind_of?(::Hash)
|
@@ -142,12 +152,14 @@ module PDF
|
|
142
152
|
@page_count ||= @objects.deref(pages[:Count])
|
143
153
|
end
|
144
154
|
|
155
|
+
# The PDF version this file uses
|
156
|
+
#
|
145
157
|
def pdf_version
|
146
158
|
@objects.pdf_version
|
147
159
|
end
|
148
160
|
|
149
|
-
# syntactic sugar for opening a PDF file. Accepts the
|
150
|
-
# as new().
|
161
|
+
# syntactic sugar for opening a PDF file and the most common approach. Accepts the
|
162
|
+
# same arguments as new().
|
151
163
|
#
|
152
164
|
# PDF::Reader.open("somefile.pdf") do |reader|
|
153
165
|
# puts reader.pdf_version
|
@@ -273,6 +285,7 @@ end
|
|
273
285
|
|
274
286
|
require 'pdf/reader/resource_methods'
|
275
287
|
require 'pdf/reader/buffer'
|
288
|
+
require 'pdf/reader/bounding_rectangle_runs_filter'
|
276
289
|
require 'pdf/reader/cid_widths'
|
277
290
|
require 'pdf/reader/cmap'
|
278
291
|
require 'pdf/reader/encoding'
|
data/rbi/pdf-reader.rbi
CHANGED
@@ -43,6 +43,13 @@ module PDF
|
|
43
43
|
sig { returns(T::Hash[Symbol, T.untyped]) }
|
44
44
|
def root; end
|
45
45
|
|
46
|
+
class BoundingRectangleRunsFilter
|
47
|
+
extend T::Sig
|
48
|
+
|
49
|
+
sig { params(runs: T::Array[PDF::Reader::TextRun], rect: PDF::Reader::Rectangle).returns(T::Array[PDF::Reader::TextRun]) }
|
50
|
+
def self.runs_within_rect(runs, rect); end
|
51
|
+
end
|
52
|
+
|
46
53
|
class Buffer
|
47
54
|
TOKEN_WHITESPACE = [0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20]
|
48
55
|
TOKEN_DELIMITER = [0x25, 0x3C, 0x3E, 0x28, 0x5B, 0x7B, 0x29, 0x5D, 0x7D, 0x2F]
|
@@ -750,8 +757,11 @@ module PDF
|
|
750
757
|
sig { returns(T::Array[Numeric]) }
|
751
758
|
def origin; end
|
752
759
|
|
753
|
-
sig { returns(
|
754
|
-
def
|
760
|
+
sig { params(opts: T::Hash[Symbol, T.untyped]).returns(T::Array[PDF::Reader::TextRun]) }
|
761
|
+
def runs(opts = {}); end
|
762
|
+
|
763
|
+
sig { params(opts: T::Hash[Symbol, T.untyped]).returns(String) }
|
764
|
+
def text(opts = {}); end
|
755
765
|
|
756
766
|
sig { params(receivers: T.untyped).void }
|
757
767
|
def walk(*receivers); end
|
@@ -794,7 +804,7 @@ module PDF
|
|
794
804
|
extend T::Sig
|
795
805
|
DEFAULT_FONT_SIZE = 12
|
796
806
|
|
797
|
-
sig { params(runs: T::Array[PDF::Reader::TextRun], mediabox: T::Array[Numeric]).void }
|
807
|
+
sig { params(runs: T::Array[PDF::Reader::TextRun], mediabox: T.any(T::Array[Numeric], PDF::Reader::Rectangle)).void }
|
798
808
|
def initialize(runs, mediabox); end
|
799
809
|
|
800
810
|
sig { returns(String) }
|
@@ -829,6 +839,9 @@ module PDF
|
|
829
839
|
|
830
840
|
sig { params(haystack: T.untyped, needle: T.untyped, index: T.untyped).returns(T.untyped) }
|
831
841
|
def local_string_insert(haystack, needle, index); end
|
842
|
+
|
843
|
+
sig { params(mediabox: T.untyped).returns(T.untyped) }
|
844
|
+
def process_mediabox(mediabox); end
|
832
845
|
end
|
833
846
|
|
834
847
|
class PageState
|
@@ -996,6 +1009,9 @@ module PDF
|
|
996
1009
|
sig { params(str: T.untyped).returns(T.untyped) }
|
997
1010
|
def move_to_next_line_and_show_text(str); end
|
998
1011
|
|
1012
|
+
sig { params(opts: T::Hash[Symbol, T.untyped]).returns(T::Array[PDF::Reader::TextRun]) }
|
1013
|
+
def runs(opts = {}); end
|
1014
|
+
|
999
1015
|
sig { params(aw: T.untyped, ac: T.untyped, string: T.untyped).returns(T.untyped) }
|
1000
1016
|
def set_spacing_next_line_show_text(aw, ac, string); end
|
1001
1017
|
|
@@ -1122,6 +1138,9 @@ module PDF
|
|
1122
1138
|
end
|
1123
1139
|
|
1124
1140
|
class Rectangle
|
1141
|
+
sig { params(arr: T::Array[Numeric]).returns(PDF::Reader::Rectangle) }
|
1142
|
+
def self.from_array(arr); end
|
1143
|
+
|
1125
1144
|
sig do
|
1126
1145
|
params(
|
1127
1146
|
x1: Numeric,
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-12-
|
11
|
+
date: 2021-12-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -215,6 +215,7 @@ files:
|
|
215
215
|
- lib/pdf/reader/afm/Times-Italic.afm
|
216
216
|
- lib/pdf/reader/afm/Times-Roman.afm
|
217
217
|
- lib/pdf/reader/afm/ZapfDingbats.afm
|
218
|
+
- lib/pdf/reader/bounding_rectangle_runs_filter.rb
|
218
219
|
- lib/pdf/reader/buffer.rb
|
219
220
|
- lib/pdf/reader/cid_widths.rb
|
220
221
|
- lib/pdf/reader/cmap.rb
|
@@ -281,9 +282,9 @@ licenses:
|
|
281
282
|
- MIT
|
282
283
|
metadata:
|
283
284
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
284
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
285
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
286
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
285
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.8.0/CHANGELOG
|
286
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.8.0
|
287
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.8.0
|
287
288
|
post_install_message:
|
288
289
|
rdoc_options:
|
289
290
|
- "--title"
|