pdf-reader 2.2.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +30 -0
  3. data/README.md +2 -2
  4. data/bin/pdf_callbacks +1 -1
  5. data/bin/pdf_text +1 -1
  6. data/lib/pdf/reader.rb +1 -2
  7. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  8. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  9. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  10. data/lib/pdf/reader/afm/Courier.afm +342 -342
  11. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  12. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  13. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  14. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  15. data/lib/pdf/reader/afm/MustRead.html +19 -0
  16. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  17. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  18. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  19. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  20. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  21. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  22. data/lib/pdf/reader/buffer.rb +1 -1
  23. data/lib/pdf/reader/cmap.rb +8 -0
  24. data/lib/pdf/reader/encoding.rb +11 -9
  25. data/lib/pdf/reader/filter/flate.rb +28 -16
  26. data/lib/pdf/reader/font.rb +10 -2
  27. data/lib/pdf/reader/object_hash.rb +24 -11
  28. data/lib/pdf/reader/orientation_detector.rb +2 -2
  29. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  30. data/lib/pdf/reader/page.rb +28 -0
  31. data/lib/pdf/reader/page_layout.rb +10 -5
  32. data/lib/pdf/reader/page_state.rb +7 -5
  33. data/lib/pdf/reader/page_text_receiver.rb +22 -1
  34. data/lib/pdf/reader/text_run.rb +24 -0
  35. data/lib/pdf/reader/width_calculator/built_in.rb +24 -16
  36. data/lib/pdf/reader/xref.rb +7 -4
  37. metadata +22 -17
  38. data/lib/pdf/hash.rb +0 -20
@@ -12,11 +12,20 @@ class PDF::Reader
12
12
  # see Section 9.6.2.2, PDF 32000-1:2008, pp 256
13
13
  class BuiltIn
14
14
 
15
+ BUILTINS = [
16
+ :Courier, :"Courier-Bold", :"Courier-BoldOblique", :"Courier-Oblique",
17
+ :Helvetica, :"Helvetica-Bold", :"Helvetica-BoldOblique", :"Helvetica-Oblique",
18
+ :Symbol,
19
+ :"Times-Roman", :"Times-Bold", :"Times-BoldItalic", :"Times-Italic",
20
+ :ZapfDingbats
21
+ ]
22
+
15
23
  def initialize(font)
16
24
  @font = font
17
25
  @@all_metrics ||= PDF::Reader::SynchronizedCache.new
18
26
 
19
- metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{font.basefont}.afm")
27
+ basefont = extract_basefont(font.basefont)
28
+ metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{basefont}.afm")
20
29
 
21
30
  if File.file?(metrics_path)
22
31
  @metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
@@ -28,23 +37,15 @@ class PDF::Reader
28
37
  def glyph_width(code_point)
29
38
  return 0 if code_point.nil? || code_point < 0
30
39
 
31
- m = @metrics.char_metrics_by_code[code_point]
32
- if m.nil?
33
- names = @font.encoding.int_to_name(code_point)
34
-
35
- m = names.map { |name|
36
- @metrics.char_metrics[name.to_s]
37
- }.compact.first
38
- end
40
+ names = @font.encoding.int_to_name(code_point)
41
+ metrics = names.map { |name|
42
+ @metrics.char_metrics[name.to_s]
43
+ }.compact.first
39
44
 
40
- if m
41
- m[:wx]
42
- elsif @font.widths[code_point - 1]
43
- @font.widths[code_point - 1]
44
- elsif control_character?(code_point)
45
- 0
45
+ if metrics
46
+ metrics[:wx]
46
47
  else
47
- 0
48
+ @font.widths[code_point - 1] || 0
48
49
  end
49
50
  end
50
51
 
@@ -54,6 +55,13 @@ class PDF::Reader
54
55
  @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
55
56
  end
56
57
 
58
+ def extract_basefont(font_name)
59
+ if BUILTINS.include?(font_name)
60
+ font_name
61
+ else
62
+ "Times-Roman"
63
+ end
64
+ end
57
65
  end
58
66
  end
59
67
  end
@@ -230,18 +230,21 @@ class PDF::Reader
230
230
  # should always be 0, but all sort of crazy junk is prefixed to PDF files
231
231
  # in the real world.
232
232
  #
233
- # Checks up to 50 chars into the file, returns nil if no PDF data detected.
233
+ # Checks up to 1024 chars into the file,
234
+ # returns nil if no PDF data detected.
235
+ # Adobe PDF 1.4 spec (3.4.1) 12. Acrobat viewers require only that the
236
+ # header appear somewhere within the first 1024 bytes of the file
234
237
  #
235
238
  def calc_junk_offset(io)
236
239
  io.rewind
237
240
  offset = io.pos
238
- until (c = io.readchar) == '%' || c == 37 || offset > 50
241
+ until (c = io.readchar) == '%' || c == 37 || offset > 1024
239
242
  offset += 1
240
243
  end
241
244
  io.rewind
242
- offset < 50 ? offset : nil
245
+ offset < 1024 ? offset : nil
243
246
  rescue EOFError
244
- return nil
247
+ nil
245
248
  end
246
249
  end
247
250
  ################################################################################
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.1
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-07-27 00:00:00.000000000 Z
11
+ date: 2021-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "<"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '13.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "<"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '13.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.2'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ir_b
70
+ name: pry
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.0.0
103
+ version: '1.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.0.0
110
+ version: '1.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: ruby-rc4
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -199,7 +199,6 @@ files:
199
199
  - examples/text.rb
200
200
  - examples/version.rb
201
201
  - lib/pdf-reader.rb
202
- - lib/pdf/hash.rb
203
202
  - lib/pdf/reader.rb
204
203
  - lib/pdf/reader/afm/Courier-Bold.afm
205
204
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
@@ -209,6 +208,7 @@ files:
209
208
  - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
210
209
  - lib/pdf/reader/afm/Helvetica-Oblique.afm
211
210
  - lib/pdf/reader/afm/Helvetica.afm
211
+ - lib/pdf/reader/afm/MustRead.html
212
212
  - lib/pdf/reader/afm/Symbol.afm
213
213
  - lib/pdf/reader/afm/Times-Bold.afm
214
214
  - lib/pdf/reader/afm/Times-BoldItalic.afm
@@ -246,6 +246,7 @@ files:
246
246
  - lib/pdf/reader/object_hash.rb
247
247
  - lib/pdf/reader/object_stream.rb
248
248
  - lib/pdf/reader/orientation_detector.rb
249
+ - lib/pdf/reader/overlapping_runs_filter.rb
249
250
  - lib/pdf/reader/page.rb
250
251
  - lib/pdf/reader/page_layout.rb
251
252
  - lib/pdf/reader/page_state.rb
@@ -271,11 +272,15 @@ files:
271
272
  - lib/pdf/reader/width_calculator/type_one_or_three.rb
272
273
  - lib/pdf/reader/width_calculator/type_zero.rb
273
274
  - lib/pdf/reader/xref.rb
274
- homepage: http://github.com/yob/pdf-reader
275
+ homepage: https://github.com/yob/pdf-reader
275
276
  licenses:
276
277
  - MIT
277
- metadata: {}
278
- post_install_message:
278
+ metadata:
279
+ bug_tracker_uri: https://github.com/yob/pdf-reader/issues
280
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
283
+ post_install_message:
279
284
  rdoc_options:
280
285
  - "--title"
281
286
  - PDF::Reader Documentation
@@ -288,15 +293,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
288
293
  requirements:
289
294
  - - ">="
290
295
  - !ruby/object:Gem::Version
291
- version: 1.9.3
296
+ version: '2.0'
292
297
  required_rubygems_version: !ruby/object:Gem::Requirement
293
298
  requirements:
294
299
  - - ">="
295
300
  - !ruby/object:Gem::Version
296
301
  version: '0'
297
302
  requirements: []
298
- rubygems_version: 3.0.1
299
- signing_key:
303
+ rubygems_version: 3.2.3
304
+ signing_key:
300
305
  specification_version: 4
301
306
  summary: A library for accessing the content of PDF files
302
307
  test_files: []
data/lib/pdf/hash.rb DELETED
@@ -1,20 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- module PDF
5
- # This class is deprecated, please stop using it.
6
- class Hash < ::PDF::Reader::ObjectHash # :nodoc:
7
- def initialize(input)
8
- warn "DEPRECATION NOTICE: PDF::Hash has been deprecated, use PDF::Reader::ObjectHash instead"
9
- super
10
- end
11
-
12
- def version
13
- warn <<-EOS
14
- DEPRECATION NOTICE: PDF::Hash#version has been deprecated,
15
- use PDF::Reader::ObjectHash#pdf_version instead
16
- EOS
17
- pdf_version
18
- end
19
- end
20
- end