pdf-reader 2.2.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +30 -0
- data/README.md +2 -2
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_text +1 -1
- data/lib/pdf/reader.rb +1 -2
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/buffer.rb +1 -1
- data/lib/pdf/reader/cmap.rb +8 -0
- data/lib/pdf/reader/encoding.rb +11 -9
- data/lib/pdf/reader/filter/flate.rb +28 -16
- data/lib/pdf/reader/font.rb +10 -2
- data/lib/pdf/reader/object_hash.rb +24 -11
- data/lib/pdf/reader/orientation_detector.rb +2 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +28 -0
- data/lib/pdf/reader/page_layout.rb +10 -5
- data/lib/pdf/reader/page_state.rb +7 -5
- data/lib/pdf/reader/page_text_receiver.rb +22 -1
- data/lib/pdf/reader/text_run.rb +24 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +24 -16
- data/lib/pdf/reader/xref.rb +7 -4
- metadata +22 -17
- data/lib/pdf/hash.rb +0 -20
@@ -12,11 +12,20 @@ class PDF::Reader
|
|
12
12
|
# see Section 9.6.2.2, PDF 32000-1:2008, pp 256
|
13
13
|
class BuiltIn
|
14
14
|
|
15
|
+
BUILTINS = [
|
16
|
+
:Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
|
17
|
+
:Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
|
18
|
+
:Symbol,
|
19
|
+
:"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
|
20
|
+
:ZapfDingbats
|
21
|
+
]
|
22
|
+
|
15
23
|
def initialize(font)
|
16
24
|
@font = font
|
17
25
|
@@all_metrics ||= PDF::Reader::SynchronizedCache.new
|
18
26
|
|
19
|
-
|
27
|
+
basefont = extract_basefont(font.basefont)
|
28
|
+
metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
|
20
29
|
|
21
30
|
if File.file?(metrics_path)
|
22
31
|
@metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
|
@@ -28,23 +37,15 @@ class PDF::Reader
|
|
28
37
|
def glyph_width(code_point)
|
29
38
|
return 0 if code_point.nil? || code_point < 0
|
30
39
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
m = names.map { |name|
|
36
|
-
@metrics.char_metrics[name.to_s]
|
37
|
-
}.compact.first
|
38
|
-
end
|
40
|
+
names = @font.encoding.int_to_name(code_point)
|
41
|
+
metrics = names.map { |name|
|
42
|
+
@metrics.char_metrics[name.to_s]
|
43
|
+
}.compact.first
|
39
44
|
|
40
|
-
if
|
41
|
-
|
42
|
-
elsif @font.widths[code_point - 1]
|
43
|
-
@font.widths[code_point - 1]
|
44
|
-
elsif control_character?(code_point)
|
45
|
-
0
|
45
|
+
if metrics
|
46
|
+
metrics[:wx]
|
46
47
|
else
|
47
|
-
0
|
48
|
+
@font.widths[code_point - 1] || 0
|
48
49
|
end
|
49
50
|
end
|
50
51
|
|
@@ -54,6 +55,13 @@ class PDF::Reader
|
|
54
55
|
@font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
|
55
56
|
end
|
56
57
|
|
58
|
+
def extract_basefont(font_name)
|
59
|
+
if BUILTINS.include?(font_name)
|
60
|
+
font_name
|
61
|
+
else
|
62
|
+
"Times-Roman"
|
63
|
+
end
|
64
|
+
end
|
57
65
|
end
|
58
66
|
end
|
59
67
|
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -230,18 +230,21 @@ class PDF::Reader
|
|
230
230
|
# should always be 0, but all sort of crazy junk is prefixed to PDF files
|
231
231
|
# in the real world.
|
232
232
|
#
|
233
|
-
# Checks up to
|
233
|
+
# Checks up to 1024 chars into the file,
|
234
|
+
# returns nil if no PDF data detected.
|
235
|
+
# Adobe PDF 1.4 spec (3.4.1) 12. Acrobat viewers require only that the
|
236
|
+
# header appear somewhere within the first 1024 bytes of the file
|
234
237
|
#
|
235
238
|
def calc_junk_offset(io)
|
236
239
|
io.rewind
|
237
240
|
offset = io.pos
|
238
|
-
until (c = io.readchar) == '%' || c == 37 || offset >
|
241
|
+
until (c = io.readchar) == '%' || c == 37 || offset > 1024
|
239
242
|
offset += 1
|
240
243
|
end
|
241
244
|
io.rewind
|
242
|
-
offset <
|
245
|
+
offset < 1024 ? offset : nil
|
243
246
|
rescue EOFError
|
244
|
-
|
247
|
+
nil
|
245
248
|
end
|
246
249
|
end
|
247
250
|
################################################################################
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-06-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "<"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
19
|
+
version: '13.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "<"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
26
|
+
version: '13.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.2'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: pry
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 1.0
|
103
|
+
version: '1.0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 1.0
|
110
|
+
version: '1.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: ruby-rc4
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -199,7 +199,6 @@ files:
|
|
199
199
|
- examples/text.rb
|
200
200
|
- examples/version.rb
|
201
201
|
- lib/pdf-reader.rb
|
202
|
-
- lib/pdf/hash.rb
|
203
202
|
- lib/pdf/reader.rb
|
204
203
|
- lib/pdf/reader/afm/Courier-Bold.afm
|
205
204
|
- lib/pdf/reader/afm/Courier-BoldOblique.afm
|
@@ -209,6 +208,7 @@ files:
|
|
209
208
|
- lib/pdf/reader/afm/Helvetica-BoldOblique.afm
|
210
209
|
- lib/pdf/reader/afm/Helvetica-Oblique.afm
|
211
210
|
- lib/pdf/reader/afm/Helvetica.afm
|
211
|
+
- lib/pdf/reader/afm/MustRead.html
|
212
212
|
- lib/pdf/reader/afm/Symbol.afm
|
213
213
|
- lib/pdf/reader/afm/Times-Bold.afm
|
214
214
|
- lib/pdf/reader/afm/Times-BoldItalic.afm
|
@@ -246,6 +246,7 @@ files:
|
|
246
246
|
- lib/pdf/reader/object_hash.rb
|
247
247
|
- lib/pdf/reader/object_stream.rb
|
248
248
|
- lib/pdf/reader/orientation_detector.rb
|
249
|
+
- lib/pdf/reader/overlapping_runs_filter.rb
|
249
250
|
- lib/pdf/reader/page.rb
|
250
251
|
- lib/pdf/reader/page_layout.rb
|
251
252
|
- lib/pdf/reader/page_state.rb
|
@@ -271,11 +272,15 @@ files:
|
|
271
272
|
- lib/pdf/reader/width_calculator/type_one_or_three.rb
|
272
273
|
- lib/pdf/reader/width_calculator/type_zero.rb
|
273
274
|
- lib/pdf/reader/xref.rb
|
274
|
-
homepage:
|
275
|
+
homepage: https://github.com/yob/pdf-reader
|
275
276
|
licenses:
|
276
277
|
- MIT
|
277
|
-
metadata:
|
278
|
-
|
278
|
+
metadata:
|
279
|
+
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
280
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
|
281
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
|
282
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
|
283
|
+
post_install_message:
|
279
284
|
rdoc_options:
|
280
285
|
- "--title"
|
281
286
|
- PDF::Reader Documentation
|
@@ -288,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
288
293
|
requirements:
|
289
294
|
- - ">="
|
290
295
|
- !ruby/object:Gem::Version
|
291
|
-
version:
|
296
|
+
version: '2.0'
|
292
297
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
293
298
|
requirements:
|
294
299
|
- - ">="
|
295
300
|
- !ruby/object:Gem::Version
|
296
301
|
version: '0'
|
297
302
|
requirements: []
|
298
|
-
rubygems_version: 3.
|
299
|
-
signing_key:
|
303
|
+
rubygems_version: 3.2.3
|
304
|
+
signing_key:
|
300
305
|
specification_version: 4
|
301
306
|
summary: A library for accessing the content of PDF files
|
302
307
|
test_files: []
|
data/lib/pdf/hash.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
module PDF
|
5
|
-
# This class is deprecated, please stop using it.
|
6
|
-
class Hash < ::PDF::Reader::ObjectHash # :nodoc:
|
7
|
-
def initialize(input)
|
8
|
-
warn "DEPRECATION NOTICE: PDF::Hash has been deprecated, use PDF::Reader::ObjectHash instead"
|
9
|
-
super
|
10
|
-
end
|
11
|
-
|
12
|
-
def version
|
13
|
-
warn <<-EOS
|
14
|
-
DEPRECATION NOTICE: PDF::Hash#version has been deprecated,
|
15
|
-
use PDF::Reader::ObjectHash#pdf_version instead
|
16
|
-
EOS
|
17
|
-
pdf_version
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|