pdf-reader 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. data/CHANGELOG +7 -1
  2. data/README.rdoc +1 -0
  3. data/Rakefile +23 -8
  4. data/lib/pdf-reader.rb +3 -1
  5. data/lib/pdf/hash.rb +5 -1
  6. data/lib/pdf/reader.rb +8 -1
  7. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
  8. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
  9. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
  10. data/lib/pdf/reader/afm/Courier.afm +342 -0
  11. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
  12. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
  13. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
  14. data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
  15. data/lib/pdf/reader/afm/Symbol.afm +213 -0
  16. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
  17. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
  18. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
  19. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
  20. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
  21. data/lib/pdf/reader/buffer.rb +14 -6
  22. data/lib/pdf/reader/cid_widths.rb +61 -0
  23. data/lib/pdf/reader/cmap.rb +8 -2
  24. data/lib/pdf/reader/encoding.rb +52 -27
  25. data/lib/pdf/reader/error.rb +16 -1
  26. data/lib/pdf/reader/filter.rb +2 -0
  27. data/lib/pdf/reader/filter/ascii85.rb +3 -1
  28. data/lib/pdf/reader/filter/ascii_hex.rb +3 -1
  29. data/lib/pdf/reader/filter/depredict.rb +2 -0
  30. data/lib/pdf/reader/filter/flate.rb +3 -1
  31. data/lib/pdf/reader/filter/lzw.rb +1 -0
  32. data/lib/pdf/reader/filter/null.rb +1 -0
  33. data/lib/pdf/reader/filter/run_length.rb +2 -1
  34. data/lib/pdf/reader/font.rb +74 -18
  35. data/lib/pdf/reader/font_descriptor.rb +80 -0
  36. data/lib/pdf/reader/glyph_hash.rb +6 -0
  37. data/lib/pdf/reader/lzw.rb +1 -0
  38. data/lib/pdf/reader/object_cache.rb +1 -1
  39. data/lib/pdf/reader/object_hash.rb +1 -1
  40. data/lib/pdf/reader/page_layout.rb +125 -0
  41. data/lib/pdf/reader/page_state.rb +172 -69
  42. data/lib/pdf/reader/page_text_receiver.rb +50 -21
  43. data/lib/pdf/reader/pages_strategy.rb +17 -4
  44. data/lib/pdf/reader/parser.rb +25 -52
  45. data/lib/pdf/reader/print_receiver.rb +5 -0
  46. data/lib/pdf/reader/reference.rb +2 -0
  47. data/lib/pdf/reader/register_receiver.rb +1 -1
  48. data/lib/pdf/reader/standard_security_handler.rb +2 -0
  49. data/lib/pdf/reader/stream.rb +2 -0
  50. data/lib/pdf/reader/synchronized_cache.rb +32 -0
  51. data/lib/pdf/reader/text_receiver.rb +5 -4
  52. data/lib/pdf/reader/text_run.rb +80 -0
  53. data/lib/pdf/reader/token.rb +2 -0
  54. data/lib/pdf/reader/transformation_matrix.rb +194 -0
  55. data/lib/pdf/reader/width_calculator.rb +11 -0
  56. data/lib/pdf/reader/width_calculator/built_in.rb +50 -0
  57. data/lib/pdf/reader/width_calculator/composite.rb +27 -0
  58. data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
  59. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +32 -0
  60. data/lib/pdf/reader/width_calculator/type_zero.rb +24 -0
  61. data/lib/pdf/reader/xref.rb +9 -2
  62. metadata +119 -13
@@ -0,0 +1,11 @@
1
+ # coding: utf-8
2
+
3
+ # PDF files may define fonts in a number of ways. Each approach means we must
4
+ # calculate glyph widths differently, so this set of classes conform to an
5
+ # interface that will perform the appropriate calculations.
6
+
7
+ require 'pdf/reader/width_calculator/built_in'
8
+ require 'pdf/reader/width_calculator/composite'
9
+ require 'pdf/reader/width_calculator/true_type'
10
+ require 'pdf/reader/width_calculator/type_zero'
11
+ require 'pdf/reader/width_calculator/type_one_or_three'
@@ -0,0 +1,50 @@
1
+ # coding: utf-8
2
+
3
+ require 'afm'
4
+ require 'pdf/reader/synchronized_cache'
5
+
6
+ module AFM
7
+ # this is a monkey patch for the AFM gem. hopefully my patch will be accepted
8
+ # upstream and I can drop this
9
+ class Font
10
+ def metrics_for_name(name)
11
+ @char_metrics[name.to_s]
12
+ end
13
+ end
14
+ end
15
+
16
+ class PDF::Reader
17
+ module WidthCalculator
18
+
19
+ # Type1 fonts can be one of 14 "built in" standard fonts. In these cases,
20
+ # the reader is expected to have it's own copy of the font metrics.
21
+ # see Section 9.6.2.2, PDF 32000-1:2008, pp 256
22
+ class BuiltIn
23
+
24
+ def initialize(font)
25
+ @font = font
26
+ @@all_metrics ||= PDF::Reader::SynchronizedCache.new
27
+
28
+ metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{font.basefont}.afm")
29
+
30
+ if File.file?(metrics_path)
31
+ @metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
32
+ else
33
+ raise ArgumentError, "No built-in metrics for #{font.basefont}"
34
+ end
35
+ end
36
+
37
+ def glyph_width(code_point)
38
+ return 0 if code_point.nil? || code_point < 0
39
+
40
+ m = @metrics.metrics_for(code_point)
41
+ if m.nil?
42
+ name = @font.encoding.int_to_name(code_point)
43
+ m = @metrics.metrics_for_name(name)
44
+ end
45
+ m[:wx]
46
+ end
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+
3
+ class PDF::Reader
4
+ module WidthCalculator
5
+ # CIDFontType0 or CIDFontType2 use DW (integer) and W (array) to determine
6
+ # codepoint widths, note that CIDFontType2 will contain a true type font
7
+ # program which could be used to calculate width, however, a conforming writer
8
+ # is supposed to convert the widths for the codepoints used into the W array
9
+ # so that it can be used.
10
+ # see Section 9.7.4.1, PDF 32000-1:2008, pp 269-270
11
+ class Composite
12
+
13
+ def initialize(font)
14
+ @font = font
15
+ @widths = PDF::Reader::CidWidths.new(@font.cid_default_width, @font.cid_widths)
16
+ end
17
+
18
+ def glyph_width(code_point)
19
+ return 0 if code_point.nil? || code_point < 0
20
+
21
+ w = @widths[code_point]
22
+ # 0 is a valid width
23
+ return w.to_f unless w.nil?
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,56 @@
1
+ # coding: utf-8
2
+
3
+ class PDF::Reader
4
+ module WidthCalculator
5
+ # Calculates the width of a glyph in a TrueType font
6
+ class TrueType
7
+
8
+ def initialize(font)
9
+ @font = font
10
+
11
+ if @font.font_descriptor
12
+ @missing_width = @font.font_descriptor.missing_width
13
+ else
14
+ @missing_width = 0
15
+ end
16
+ end
17
+
18
+ def glyph_width(code_point)
19
+ return 0 if code_point.nil? || code_point < 0
20
+
21
+ glyph_width_from_font(code_point) || glyph_width_from_descriptor(code_point)
22
+ end
23
+
24
+ private
25
+
26
+ #TODO convert Type3 units 1000 units => 1 text space unit
27
+ def glyph_width_from_font(code_point)
28
+ return if @font.widths.nil? || @font.widths.count == 0
29
+
30
+ # in ruby a negative index is valid, and will go from the end of the array
31
+ # which is undesireable in this case.
32
+ if @font.first_char <= code_point
33
+ @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
34
+ else
35
+ @missing_width.to_f
36
+ end
37
+ end
38
+
39
+ def glyph_width_from_descriptor(code_point)
40
+ return unless @font.font_descriptor
41
+
42
+ # true type fonts will have most of their information contained
43
+ # with-in a program inside the font descriptor, however the widths
44
+ # may not be in standard PDF glyph widths (1000 units => 1 text space unit)
45
+ # so this width will need to be scaled
46
+ w = @font.font_descriptor.find_glyph_width(code_point)
47
+ if w
48
+ w.to_f * @font.font_descriptor.glyph_to_pdf_scale_factor
49
+ else
50
+ nil
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+
3
+ class PDF::Reader
4
+ module WidthCalculator
5
+ # Calculates the width of a glyph in a Type One or Type Three
6
+ class TypeOneOrThree
7
+
8
+ def initialize(font)
9
+ @font = font
10
+
11
+ if @font.font_descriptor
12
+ @missing_width = @font.font_descriptor.missing_width
13
+ else
14
+ @missing_width = 0
15
+ end
16
+ end
17
+
18
+ def glyph_width(code_point)
19
+ return 0 if code_point.nil? || code_point < 0
20
+ return 0 if @font.widths.nil? || @font.widths.count == 0
21
+
22
+ # in ruby a negative index is valid, and will go from the end of the array
23
+ # which is undesireable in this case.
24
+ if @font.first_char <= code_point
25
+ @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
26
+ else
27
+ @missing_width.to_f
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+
3
+ class PDF::Reader
4
+ module WidthCalculator
5
+ # Type0 (or Composite) fonts are a "root font" that rely on a "descendant font"
6
+ # to do the heavy lifting. The "descendant font" is a CID-Keyed font.
7
+ # see Section 9.7.1, PDF 32000-1:2008, pp 267
8
+ # so if we are calculating a Type0 font width, we just pass off to
9
+ # the descendant font
10
+ class TypeZero
11
+
12
+ def initialize(font)
13
+ @font = font
14
+ @descendant_font = @font.descendantfonts.first
15
+ end
16
+
17
+ def glyph_width(code_point)
18
+ return 0 if code_point.nil? || code_point < 0
19
+
20
+ @descendant_font.glyph_width(code_point).to_f
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,3 +1,5 @@
1
+ # coding: utf-8
2
+
1
3
  ################################################################################
2
4
  #
3
5
  # Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
@@ -111,7 +113,8 @@ class PDF::Reader
111
113
  return load_xref_stream(stream)
112
114
  end
113
115
 
114
- raise PDF::Reader::MalformedPDFError, "xref table not found at offset #{offset} (#{tok_one} != xref)"
116
+ raise PDF::Reader::MalformedPDFError,
117
+ "xref table not found at offset #{offset} (#{tok_one} != xref)"
115
118
  end
116
119
  ################################################################################
117
120
  # Assumes the underlying buffer is positioned at the start of a traditional
@@ -137,7 +140,9 @@ class PDF::Reader
137
140
 
138
141
  trailer = Parser.new(buf, self).parse_token
139
142
 
140
- raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless trailer.kind_of?(Hash)
143
+ unless trailer.kind_of?(Hash)
144
+ raise MalformedPDFError, "PDF malformed, trailer should be a dictionary"
145
+ end
141
146
 
142
147
  load_offsets(trailer[:XRefStm]) if trailer.has_key?(:XRefStm)
143
148
  load_offsets(trailer[:Prev].to_i) if trailer.has_key?(:Prev)
@@ -232,6 +237,8 @@ class PDF::Reader
232
237
  end
233
238
  io.rewind
234
239
  offset < 50 ? offset : nil
240
+ rescue EOFError
241
+ return nil
235
242
  end
236
243
  end
237
244
  ################################################################################
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-30 00:00:00.000000000 Z
12
+ date: 2012-12-29 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -28,29 +28,29 @@ dependencies:
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
30
  - !ruby/object:Gem::Dependency
31
- name: roodi
31
+ name: rspec
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  none: false
34
34
  requirements:
35
- - - ! '>='
35
+ - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: '0'
37
+ version: '2.3'
38
38
  type: :development
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
- - - ! '>='
43
+ - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: '0'
45
+ version: '2.3'
46
46
  - !ruby/object:Gem::Dependency
47
- name: rspec
47
+ name: ZenTest
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  none: false
50
50
  requirements:
51
51
  - - ~>
52
52
  - !ruby/object:Gem::Version
53
- version: '2.3'
53
+ version: 4.4.2
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,15 +58,15 @@ dependencies:
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: '2.3'
61
+ version: 4.4.2
62
62
  - !ruby/object:Gem::Dependency
63
- name: ZenTest
63
+ name: cane
64
64
  requirement: !ruby/object:Gem::Requirement
65
65
  none: false
66
66
  requirements:
67
67
  - - ~>
68
68
  - !ruby/object:Gem::Version
69
- version: 4.4.2
69
+ version: 2.2.3
70
70
  type: :development
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
@@ -74,7 +74,55 @@ dependencies:
74
74
  requirements:
75
75
  - - ~>
76
76
  - !ruby/object:Gem::Version
77
- version: 4.4.2
77
+ version: 2.2.3
78
+ - !ruby/object:Gem::Dependency
79
+ name: morecane
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: ir_b
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: rdoc
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
78
126
  - !ruby/object:Gem::Dependency
79
127
  name: Ascii85
80
128
  requirement: !ruby/object:Gem::Requirement
@@ -123,6 +171,38 @@ dependencies:
123
171
  - - ~>
124
172
  - !ruby/object:Gem::Version
125
173
  version: '2.0'
174
+ - !ruby/object:Gem::Dependency
175
+ name: ttfunk
176
+ requirement: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ type: :runtime
183
+ prerelease: false
184
+ version_requirements: !ruby/object:Gem::Requirement
185
+ none: false
186
+ requirements:
187
+ - - ! '>='
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ - !ruby/object:Gem::Dependency
191
+ name: afm
192
+ requirement: !ruby/object:Gem::Requirement
193
+ none: false
194
+ requirements:
195
+ - - ~>
196
+ - !ruby/object:Gem::Version
197
+ version: 0.2.0
198
+ type: :runtime
199
+ prerelease: false
200
+ version_requirements: !ruby/object:Gem::Requirement
201
+ none: false
202
+ requirements:
203
+ - - ~>
204
+ - !ruby/object:Gem::Version
205
+ version: 0.2.0
126
206
  description: The PDF::Reader library implements a PDF parser conforming as much as
127
207
  possible to the PDF specification from Adobe
128
208
  email:
@@ -151,17 +231,26 @@ files:
151
231
  - examples/version.rb
152
232
  - lib/pdf/hash.rb
153
233
  - lib/pdf/reader.rb
234
+ - lib/pdf/reader/width_calculator/built_in.rb
235
+ - lib/pdf/reader/width_calculator/type_zero.rb
236
+ - lib/pdf/reader/width_calculator/true_type.rb
237
+ - lib/pdf/reader/width_calculator/composite.rb
238
+ - lib/pdf/reader/width_calculator/type_one_or_three.rb
154
239
  - lib/pdf/reader/xref.rb
155
240
  - lib/pdf/reader/page.rb
241
+ - lib/pdf/reader/transformation_matrix.rb
156
242
  - lib/pdf/reader/encoding.rb
243
+ - lib/pdf/reader/page_layout.rb
157
244
  - lib/pdf/reader/font.rb
158
245
  - lib/pdf/reader/print_receiver.rb
159
246
  - lib/pdf/reader/lzw.rb
160
247
  - lib/pdf/reader/buffer.rb
248
+ - lib/pdf/reader/synchronized_cache.rb
161
249
  - lib/pdf/reader/object_stream.rb
162
250
  - lib/pdf/reader/cmap.rb
163
251
  - lib/pdf/reader/text_receiver.rb
164
252
  - lib/pdf/reader/register_receiver.rb
253
+ - lib/pdf/reader/cid_widths.rb
165
254
  - lib/pdf/reader/page_text_receiver.rb
166
255
  - lib/pdf/reader/encodings/mac_roman.txt
167
256
  - lib/pdf/reader/encodings/zapf_dingbats.txt
@@ -181,19 +270,36 @@ files:
181
270
  - lib/pdf/reader/object_hash.rb
182
271
  - lib/pdf/reader/reference.rb
183
272
  - lib/pdf/reader/glyphlist.txt
273
+ - lib/pdf/reader/afm/Courier-BoldOblique.afm
274
+ - lib/pdf/reader/afm/Symbol.afm
275
+ - lib/pdf/reader/afm/Times-Italic.afm
276
+ - lib/pdf/reader/afm/Courier-Oblique.afm
277
+ - lib/pdf/reader/afm/Helvetica-Bold.afm
278
+ - lib/pdf/reader/afm/Courier-Bold.afm
279
+ - lib/pdf/reader/afm/Times-BoldItalic.afm
280
+ - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
281
+ - lib/pdf/reader/afm/Helvetica.afm
282
+ - lib/pdf/reader/afm/ZapfDingbats.afm
283
+ - lib/pdf/reader/afm/Helvetica-Oblique.afm
284
+ - lib/pdf/reader/afm/Times-Bold.afm
285
+ - lib/pdf/reader/afm/Times-Roman.afm
286
+ - lib/pdf/reader/afm/Courier.afm
184
287
  - lib/pdf/reader/token.rb
185
288
  - lib/pdf/reader/parser.rb
186
289
  - lib/pdf/reader/page_state.rb
187
290
  - lib/pdf/reader/error.rb
188
291
  - lib/pdf/reader/glyph_hash.rb
292
+ - lib/pdf/reader/width_calculator.rb
189
293
  - lib/pdf/reader/resource_methods.rb
190
294
  - lib/pdf/reader/standard_security_handler.rb
295
+ - lib/pdf/reader/text_run.rb
191
296
  - lib/pdf/reader/form_xobject.rb
192
297
  - lib/pdf/reader/stream.rb
193
298
  - lib/pdf/reader/pages_strategy.rb
194
299
  - lib/pdf/reader/abstract_strategy.rb
195
300
  - lib/pdf/reader/metadata_strategy.rb
196
301
  - lib/pdf/reader/object_cache.rb
302
+ - lib/pdf/reader/font_descriptor.rb
197
303
  - lib/pdf-reader.rb
198
304
  - Rakefile
199
305
  - README.rdoc