pdf-reader 2.6.0 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +21 -1
- data/Rakefile +1 -1
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +1 -0
- data/lib/pdf/reader/cid_widths.rb +1 -0
- data/lib/pdf/reader/cmap.rb +5 -3
- data/lib/pdf/reader/encoding.rb +2 -1
- data/lib/pdf/reader/error.rb +8 -0
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +7 -5
- data/lib/pdf/reader/filter/flate.rb +2 -0
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -0
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +44 -0
- data/lib/pdf/reader/font_descriptor.rb +1 -0
- data/lib/pdf/reader/form_xobject.rb +1 -0
- data/lib/pdf/reader/glyph_hash.rb +1 -0
- data/lib/pdf/reader/lzw.rb +4 -2
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +5 -2
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
- data/lib/pdf/reader/page.rb +73 -11
- data/lib/pdf/reader/page_layout.rb +28 -32
- data/lib/pdf/reader/page_state.rb +11 -10
- data/lib/pdf/reader/page_text_receiver.rb +53 -9
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +7 -1
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/resource_methods.rb +5 -0
- data/lib/pdf/reader/standard_security_handler.rb +1 -0
- data/lib/pdf/reader/standard_security_handler_v5.rb +1 -0
- data/lib/pdf/reader/stream.rb +1 -0
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +14 -6
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +1 -0
- data/lib/pdf/reader/zero_width_runs_filter.rb +2 -0
- data/lib/pdf/reader.rb +29 -6
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +1763 -0
- metadata +13 -10
- data/lib/pdf/reader/orientation_detector.rb +0 -34
data/lib/pdf/reader.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -111,17 +112,25 @@ module PDF
|
|
111
112
|
#
|
112
113
|
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
113
114
|
#
|
115
|
+
# Using this method directly is supported, but it's more common to use
|
116
|
+
# `PDF::Reader.open`
|
117
|
+
#
|
114
118
|
def initialize(input, opts = {})
|
115
119
|
@cache = PDF::Reader::ObjectCache.new
|
116
120
|
opts.merge!(:cache => @cache)
|
117
121
|
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
118
122
|
end
|
119
123
|
|
124
|
+
# Return a Hash with some basic information about the PDF file
|
125
|
+
#
|
120
126
|
def info
|
121
127
|
dict = @objects.deref(@objects.trailer[:Info])
|
122
128
|
doc_strings_to_utf8(dict)
|
123
129
|
end
|
124
130
|
|
131
|
+
# Return a Hash with extra metadata provided by the author of the PDF file. Not
|
132
|
+
# always present.
|
133
|
+
#
|
125
134
|
def metadata
|
126
135
|
stream = @objects.deref(root[:Metadata])
|
127
136
|
if stream.nil?
|
@@ -133,20 +142,24 @@ module PDF
|
|
133
142
|
end
|
134
143
|
end
|
135
144
|
|
145
|
+
# To number of pages in this PDF
|
146
|
+
#
|
136
147
|
def page_count
|
137
148
|
pages = @objects.deref(root[:Pages])
|
138
149
|
unless pages.kind_of?(::Hash)
|
139
|
-
raise MalformedPDFError,
|
150
|
+
raise MalformedPDFError, "Pages structure is missing #{pages.class}"
|
140
151
|
end
|
141
152
|
@page_count ||= @objects.deref(pages[:Count])
|
142
153
|
end
|
143
154
|
|
155
|
+
# The PDF version this file uses
|
156
|
+
#
|
144
157
|
def pdf_version
|
145
158
|
@objects.pdf_version
|
146
159
|
end
|
147
160
|
|
148
|
-
# syntactic sugar for opening a PDF file. Accepts the
|
149
|
-
# as new().
|
161
|
+
# syntactic sugar for opening a PDF file and the most common approach. Accepts the
|
162
|
+
# same arguments as new().
|
150
163
|
#
|
151
164
|
# PDF::Reader.open("somefile.pdf") do |reader|
|
152
165
|
# puts reader.pdf_version
|
@@ -221,7 +234,7 @@ module PDF
|
|
221
234
|
when Array then
|
222
235
|
obj.map { |item| doc_strings_to_utf8(item) }
|
223
236
|
when String then
|
224
|
-
if obj
|
237
|
+
if has_utf16_bom?(obj)
|
225
238
|
utf16_to_utf8(obj)
|
226
239
|
else
|
227
240
|
pdfdoc_to_utf8(obj)
|
@@ -231,6 +244,14 @@ module PDF
|
|
231
244
|
end
|
232
245
|
end
|
233
246
|
|
247
|
+
def has_utf16_bom?(str)
|
248
|
+
first_bytes = str[0,2]
|
249
|
+
|
250
|
+
return false if first_bytes.nil?
|
251
|
+
|
252
|
+
first_bytes.unpack("C*") == [254, 255]
|
253
|
+
end
|
254
|
+
|
234
255
|
# TODO find a PDF I can use to spec this behaviour
|
235
256
|
#
|
236
257
|
def pdfdoc_to_utf8(obj)
|
@@ -242,7 +263,7 @@ module PDF
|
|
242
263
|
# String#encode
|
243
264
|
#
|
244
265
|
def utf16_to_utf8(obj)
|
245
|
-
str = obj[2, obj.size]
|
266
|
+
str = obj[2, obj.size].to_s
|
246
267
|
str = str.unpack("n*").pack("U*")
|
247
268
|
str.force_encoding("utf-8")
|
248
269
|
str
|
@@ -264,6 +285,7 @@ end
|
|
264
285
|
|
265
286
|
require 'pdf/reader/resource_methods'
|
266
287
|
require 'pdf/reader/buffer'
|
288
|
+
require 'pdf/reader/bounding_rectangle_runs_filter'
|
267
289
|
require 'pdf/reader/cid_widths'
|
268
290
|
require 'pdf/reader/cmap'
|
269
291
|
require 'pdf/reader/encoding'
|
@@ -286,7 +308,9 @@ require 'pdf/reader/object_hash'
|
|
286
308
|
require 'pdf/reader/object_stream'
|
287
309
|
require 'pdf/reader/pages_strategy'
|
288
310
|
require 'pdf/reader/parser'
|
311
|
+
require 'pdf/reader/point'
|
289
312
|
require 'pdf/reader/print_receiver'
|
313
|
+
require 'pdf/reader/rectangle'
|
290
314
|
require 'pdf/reader/reference'
|
291
315
|
require 'pdf/reader/register_receiver'
|
292
316
|
require 'pdf/reader/null_security_handler'
|
@@ -299,5 +323,4 @@ require 'pdf/reader/page_state'
|
|
299
323
|
require 'pdf/reader/page_text_receiver'
|
300
324
|
require 'pdf/reader/token'
|
301
325
|
require 'pdf/reader/xref'
|
302
|
-
require 'pdf/reader/orientation_detector'
|
303
326
|
require 'pdf/reader/page'
|