pdf-reader 2.2.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +30 -0
  3. data/README.md +2 -2
  4. data/bin/pdf_callbacks +1 -1
  5. data/bin/pdf_text +1 -1
  6. data/lib/pdf/reader.rb +1 -2
  7. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  8. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  9. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  10. data/lib/pdf/reader/afm/Courier.afm +342 -342
  11. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  12. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  13. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  14. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  15. data/lib/pdf/reader/afm/MustRead.html +19 -0
  16. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  17. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  18. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  19. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  20. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  21. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  22. data/lib/pdf/reader/buffer.rb +1 -1
  23. data/lib/pdf/reader/cmap.rb +8 -0
  24. data/lib/pdf/reader/encoding.rb +11 -9
  25. data/lib/pdf/reader/filter/flate.rb +28 -16
  26. data/lib/pdf/reader/font.rb +10 -2
  27. data/lib/pdf/reader/object_hash.rb +24 -11
  28. data/lib/pdf/reader/orientation_detector.rb +2 -2
  29. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  30. data/lib/pdf/reader/page.rb +28 -0
  31. data/lib/pdf/reader/page_layout.rb +10 -5
  32. data/lib/pdf/reader/page_state.rb +7 -5
  33. data/lib/pdf/reader/page_text_receiver.rb +22 -1
  34. data/lib/pdf/reader/text_run.rb +24 -0
  35. data/lib/pdf/reader/width_calculator/built_in.rb +24 -16
  36. data/lib/pdf/reader/xref.rb +7 -4
  37. metadata +22 -17
  38. data/lib/pdf/hash.rb +0 -20
@@ -12,11 +12,20 @@ class PDF::Reader
12
12
  # see Section 9.6.2.2, PDF 32000-1:2008, pp 256
13
13
  class BuiltIn
14
14
 
15
+ BUILTINS = [
16
+ :Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
17
+ :Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
18
+ :Symbol,
19
+ :"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
20
+ :ZapfDingbats
21
+ ]
22
+
15
23
  def initialize(font)
16
24
  @font = font
17
25
  @@all_metrics ||= PDF::Reader::SynchronizedCache.new
18
26
 
19
- metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{font.basefont}.afm")
27
+ basefont = extract_basefont(font.basefont)
28
+ metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
20
29
 
21
30
  if File.file?(metrics_path)
22
31
  @metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
@@ -28,23 +37,15 @@ class PDF::Reader
28
37
  def glyph_width(code_point)
29
38
  return 0 if code_point.nil? || code_point < 0
30
39
 
31
- m = @metrics.char_metrics_by_code[code_point]
32
- if m.nil?
33
- names = @font.encoding.int_to_name(code_point)
34
-
35
- m = names.map { |name|
36
- @metrics.char_metrics[name.to_s]
37
- }.compact.first
38
- end
40
+ names = @font.encoding.int_to_name(code_point)
41
+ metrics = names.map { |name|
42
+ @metrics.char_metrics[name.to_s]
43
+ }.compact.first
39
44
 
40
- if m
41
- m[:wx]
42
- elsif @font.widths[code_point - 1]
43
- @font.widths[code_point - 1]
44
- elsif control_character?(code_point)
45
- 0
45
+ if metrics
46
+ metrics[:wx]
46
47
  else
47
- 0
48
+ @font.widths[code_point - 1] || 0
48
49
  end
49
50
  end
50
51
 
@@ -54,6 +55,13 @@ class PDF::Reader
54
55
  @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
55
56
  end
56
57
 
58
+ def extract_basefont(font_name)
59
+ if BUILTINS.include?(font_name)
60
+ font_name
61
+ else
62
+ "Times-Roman"
63
+ end
64
+ end
57
65
  end
58
66
  end
59
67
  end
@@ -230,18 +230,21 @@ class PDF::Reader
230
230
  # should always be 0, but all sort of crazy junk is prefixed to PDF files
231
231
  # in the real world.
232
232
  #
233
- # Checks up to 50 chars into the file, returns nil if no PDF data detected.
233
+ # Checks up to 1024 chars into the file,
234
+ # returns nil if no PDF data detected.
235
+ # Adobe PDF 1.4 spec (3.4.1) 12. Acrobat viewers require only that the
236
+ # header appear somewhere within the first 1024 bytes of the file
234
237
  #
235
238
  def calc_junk_offset(io)
236
239
  io.rewind
237
240
  offset = io.pos
238
- until (c = io.readchar) == '%' || c == 37 || offset > 50
241
+ until (c = io.readchar) == '%' || c == 37 || offset > 1024
239
242
  offset += 1
240
243
  end
241
244
  io.rewind
242
- offset < 50 ? offset : nil
245
+ offset < 1024 ? offset : nil
243
246
  rescue EOFError
244
- return nil
247
+ nil
245
248
  end
246
249
  end
247
250
  ################################################################################
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.1
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-27 00:00:00.000000000 Z
11
+ date: 2021-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "<"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '13.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "<"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '13.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.2'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ir_b
70
+ name: pry
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.0.0
103
+ version: '1.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.0.0
110
+ version: '1.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: ruby-rc4
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -199,7 +199,6 @@ files:
199
199
  - examples/text.rb
200
200
  - examples/version.rb
201
201
  - lib/pdf-reader.rb
202
- - lib/pdf/hash.rb
203
202
  - lib/pdf/reader.rb
204
203
  - lib/pdf/reader/afm/Courier-Bold.afm
205
204
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
@@ -209,6 +208,7 @@ files:
209
208
  - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
210
209
  - lib/pdf/reader/afm/Helvetica-Oblique.afm
211
210
  - lib/pdf/reader/afm/Helvetica.afm
211
+ - lib/pdf/reader/afm/MustRead.html
212
212
  - lib/pdf/reader/afm/Symbol.afm
213
213
  - lib/pdf/reader/afm/Times-Bold.afm
214
214
  - lib/pdf/reader/afm/Times-BoldItalic.afm
@@ -246,6 +246,7 @@ files:
246
246
  - lib/pdf/reader/object_hash.rb
247
247
  - lib/pdf/reader/object_stream.rb
248
248
  - lib/pdf/reader/orientation_detector.rb
249
+ - lib/pdf/reader/overlapping_runs_filter.rb
249
250
  - lib/pdf/reader/page.rb
250
251
  - lib/pdf/reader/page_layout.rb
251
252
  - lib/pdf/reader/page_state.rb
@@ -271,11 +272,15 @@ files:
271
272
  - lib/pdf/reader/width_calculator/type_one_or_three.rb
272
273
  - lib/pdf/reader/width_calculator/type_zero.rb
273
274
  - lib/pdf/reader/xref.rb
274
- homepage: http://github.com/yob/pdf-reader
275
+ homepage: https://github.com/yob/pdf-reader
275
276
  licenses:
276
277
  - MIT
277
- metadata: {}
278
- post_install_message:
278
+ metadata:
279
+ bug_tracker_uri: https://github.com/yob/pdf-reader/issues
280
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
283
+ post_install_message:
279
284
  rdoc_options:
280
285
  - "--title"
281
286
  - PDF::Reader Documentation
@@ -288,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
288
293
  requirements:
289
294
  - - ">="
290
295
  - !ruby/object:Gem::Version
291
- version: 1.9.3
296
+ version: '2.0'
292
297
  required_rubygems_version: !ruby/object:Gem::Requirement
293
298
  requirements:
294
299
  - - ">="
295
300
  - !ruby/object:Gem::Version
296
301
  version: '0'
297
302
  requirements: []
298
- rubygems_version: 3.0.1
299
- signing_key:
303
+ rubygems_version: 3.2.3
304
+ signing_key:
300
305
  specification_version: 4
301
306
  summary: A library for accessing the content of PDF files
302
307
  test_files: []
data/lib/pdf/hash.rb DELETED
@@ -1,20 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- module PDF
5
- # This class is deprecated, please stop using it.
6
- class Hash < ::PDF::Reader::ObjectHash # :nodoc:
7
- def initialize(input)
8
- warn "DEPRECATION NOTICE: PDF::Hash has been deprecated, use PDF::Reader::ObjectHash instead"
9
- super
10
- end
11
-
12
- def version
13
- warn <<-EOS
14
- DEPRECATION NOTICE: PDF::Hash#version has been deprecated,
15
- use PDF::Reader::ObjectHash#pdf_version instead
16
- EOS
17
- pdf_version
18
- end
19
- end
20
- end