pdf-reader 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/CHANGELOG +7 -1
  2. data/README.rdoc +1 -0
  3. data/Rakefile +23 -8
  4. data/lib/pdf-reader.rb +3 -1
  5. data/lib/pdf/hash.rb +5 -1
  6. data/lib/pdf/reader.rb +8 -1
  7. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
  8. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
  9. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
  10. data/lib/pdf/reader/afm/Courier.afm +342 -0
  11. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
  12. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
  13. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
  14. data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
  15. data/lib/pdf/reader/afm/Symbol.afm +213 -0
  16. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
  17. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
  18. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
  19. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
  20. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
  21. data/lib/pdf/reader/buffer.rb +14 -6
  22. data/lib/pdf/reader/cid_widths.rb +61 -0
  23. data/lib/pdf/reader/cmap.rb +8 -2
  24. data/lib/pdf/reader/encoding.rb +52 -27
  25. data/lib/pdf/reader/error.rb +16 -1
  26. data/lib/pdf/reader/filter.rb +2 -0
  27. data/lib/pdf/reader/filter/ascii85.rb +3 -1
  28. data/lib/pdf/reader/filter/ascii_hex.rb +3 -1
  29. data/lib/pdf/reader/filter/depredict.rb +2 -0
  30. data/lib/pdf/reader/filter/flate.rb +3 -1
  31. data/lib/pdf/reader/filter/lzw.rb +1 -0
  32. data/lib/pdf/reader/filter/null.rb +1 -0
  33. data/lib/pdf/reader/filter/run_length.rb +2 -1
  34. data/lib/pdf/reader/font.rb +74 -18
  35. data/lib/pdf/reader/font_descriptor.rb +80 -0
  36. data/lib/pdf/reader/glyph_hash.rb +6 -0
  37. data/lib/pdf/reader/lzw.rb +1 -0
  38. data/lib/pdf/reader/object_cache.rb +1 -1
  39. data/lib/pdf/reader/object_hash.rb +1 -1
  40. data/lib/pdf/reader/page_layout.rb +125 -0
  41. data/lib/pdf/reader/page_state.rb +172 -69
  42. data/lib/pdf/reader/page_text_receiver.rb +50 -21
  43. data/lib/pdf/reader/pages_strategy.rb +17 -4
  44. data/lib/pdf/reader/parser.rb +25 -52
  45. data/lib/pdf/reader/print_receiver.rb +5 -0
  46. data/lib/pdf/reader/reference.rb +2 -0
  47. data/lib/pdf/reader/register_receiver.rb +1 -1
  48. data/lib/pdf/reader/standard_security_handler.rb +2 -0
  49. data/lib/pdf/reader/stream.rb +2 -0
  50. data/lib/pdf/reader/synchronized_cache.rb +32 -0
  51. data/lib/pdf/reader/text_receiver.rb +5 -4
  52. data/lib/pdf/reader/text_run.rb +80 -0
  53. data/lib/pdf/reader/token.rb +2 -0
  54. data/lib/pdf/reader/transformation_matrix.rb +194 -0
  55. data/lib/pdf/reader/width_calculator.rb +11 -0
  56. data/lib/pdf/reader/width_calculator/built_in.rb +50 -0
  57. data/lib/pdf/reader/width_calculator/composite.rb +27 -0
  58. data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
  59. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +32 -0
  60. data/lib/pdf/reader/width_calculator/type_zero.rb +24 -0
  61. data/lib/pdf/reader/xref.rb +9 -2
  62. metadata +119 -13
@@ -0,0 +1,11 @@
1
+ # coding: utf-8
2
+
3
+ # PDF files may define fonts in a number of ways. Each approach means we must
4
+ # calculate glyph widths differently, so this set of classes conform to an
5
+ # interface that will perform the appropriate calculations.
6
+
7
+ require 'pdf/reader/width_calculator/built_in'
8
+ require 'pdf/reader/width_calculator/composite'
9
+ require 'pdf/reader/width_calculator/true_type'
10
+ require 'pdf/reader/width_calculator/type_zero'
11
+ require 'pdf/reader/width_calculator/type_one_or_three'
@@ -0,0 +1,50 @@
1
+ # coding: utf-8
2
+
3
+ require 'afm'
4
+ require 'pdf/reader/synchronized_cache'
5
+
6
+ module AFM
7
+ # this is a monkey patch for the AFM gem. hopefully my patch will be accepted
8
+ # upstream and I can drop this
9
+ class Font
10
+ def metrics_for_name(name)
11
+ @char_metrics[name.to_s]
12
+ end
13
+ end
14
+ end
15
+
16
+ class PDF::Reader
17
+ module WidthCalculator
18
+
19
+ # Type1 fonts can be one of 14 "built in" standard fonts. In these cases,
20
+ # the reader is expected to have it's own copy of the font metrics.
21
+ # see Section 9.6.2.2, PDF 32000-1:2008, pp 256
22
+ class BuiltIn
23
+
24
+ def initialize(font)
25
+ @font = font
26
+ @@all_metrics ||= PDF::Reader::SynchronizedCache.new
27
+
28
+ metrics_path = File.join(File.dirname(__FILE__), "..","afm","#{font.basefont}.afm")
29
+
30
+ if File.file?(metrics_path)
31
+ @metrics = @@all_metrics[metrics_path] ||= AFM::Font.new(metrics_path)
32
+ else
33
+ raise ArgumentError, "No built-in metrics for #{font.basefont}"
34
+ end
35
+ end
36
+
37
+ def glyph_width(code_point)
38
+ return 0 if code_point.nil? || code_point < 0
39
+
40
+ m = @metrics.metrics_for(code_point)
41
+ if m.nil?
42
+ name = @font.encoding.int_to_name(code_point)
43
+ m = @metrics.metrics_for_name(name)
44
+ end
45
+ m[:wx]
46
+ end
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+
3
+ class PDF::Reader
4
+ module WidthCalculator
5
+ # CIDFontType0 or CIDFontType2 use DW (integer) and W (array) to determine
6
+ # codepoint widths, note that CIDFontType2 will contain a true type font
7
+ # program which could be used to calculate width, however, a conforming writer
8
+ # is supposed to convert the widths for the codepoints used into the W array
9
+ # so that it can be used.
10
+ # see Section 9.7.4.1, PDF 32000-1:2008, pp 269-270
11
+ class Composite
12
+
13
+ def initialize(font)
14
+ @font = font
15
+ @widths = PDF::Reader::CidWidths.new(@font.cid_default_width, @font.cid_widths)
16
+ end
17
+
18
+ def glyph_width(code_point)
19
+ return 0 if code_point.nil? || code_point < 0
20
+
21
+ w = @widths[code_point]
22
+ # 0 is a valid width
23
+ return w.to_f unless w.nil?
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,56 @@
1
+ # coding: utf-8
2
+
3
+ class PDF::Reader
4
+ module WidthCalculator
5
+ # Calculates the width of a glyph in a TrueType font
6
+ class TrueType
7
+
8
+ def initialize(font)
9
+ @font = font
10
+
11
+ if @font.font_descriptor
12
+ @missing_width = @font.font_descriptor.missing_width
13
+ else
14
+ @missing_width = 0
15
+ end
16
+ end
17
+
18
+ def glyph_width(code_point)
19
+ return 0 if code_point.nil? || code_point < 0
20
+
21
+ glyph_width_from_font(code_point) || glyph_width_from_descriptor(code_point)
22
+ end
23
+
24
+ private
25
+
26
+ #TODO convert Type3 units 1000 units => 1 text space unit
27
+ def glyph_width_from_font(code_point)
28
+ return if @font.widths.nil? || @font.widths.count == 0
29
+
30
+ # in ruby a negative index is valid, and will go from the end of the array
31
+ # which is undesireable in this case.
32
+ if @font.first_char <= code_point
33
+ @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
34
+ else
35
+ @missing_width.to_f
36
+ end
37
+ end
38
+
39
+ def glyph_width_from_descriptor(code_point)
40
+ return unless @font.font_descriptor
41
+
42
+ # true type fonts will have most of their information contained
43
+ # with-in a program inside the font descriptor, however the widths
44
+ # may not be in standard PDF glyph widths (1000 units => 1 text space unit)
45
+ # so this width will need to be scaled
46
+ w = @font.font_descriptor.find_glyph_width(code_point)
47
+ if w
48
+ w.to_f * @font.font_descriptor.glyph_to_pdf_scale_factor
49
+ else
50
+ nil
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+
3
+ class PDF::Reader
4
+ module WidthCalculator
5
+ # Calculates the width of a glyph in a Type One or Type Three
6
+ class TypeOneOrThree
7
+
8
+ def initialize(font)
9
+ @font = font
10
+
11
+ if @font.font_descriptor
12
+ @missing_width = @font.font_descriptor.missing_width
13
+ else
14
+ @missing_width = 0
15
+ end
16
+ end
17
+
18
+ def glyph_width(code_point)
19
+ return 0 if code_point.nil? || code_point < 0
20
+ return 0 if @font.widths.nil? || @font.widths.count == 0
21
+
22
+ # in ruby a negative index is valid, and will go from the end of the array
23
+ # which is undesireable in this case.
24
+ if @font.first_char <= code_point
25
+ @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
26
+ else
27
+ @missing_width.to_f
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+
3
+ class PDF::Reader
4
+ module WidthCalculator
5
+ # Type0 (or Composite) fonts are a "root font" that rely on a "descendant font"
6
+ # to do the heavy lifting. The "descendant font" is a CID-Keyed font.
7
+ # see Section 9.7.1, PDF 32000-1:2008, pp 267
8
+ # so if we are calculating a Type0 font width, we just pass off to
9
+ # the descendant font
10
+ class TypeZero
11
+
12
+ def initialize(font)
13
+ @font = font
14
+ @descendant_font = @font.descendantfonts.first
15
+ end
16
+
17
+ def glyph_width(code_point)
18
+ return 0 if code_point.nil? || code_point < 0
19
+
20
+ @descendant_font.glyph_width(code_point).to_f
21
+ end
22
+ end
23
+ end
24
+ end
@@ -1,3 +1,5 @@
1
+ # coding: utf-8
2
+
1
3
  ################################################################################
2
4
  #
3
5
  # Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
@@ -111,7 +113,8 @@ class PDF::Reader
111
113
  return load_xref_stream(stream)
112
114
  end
113
115
 
114
- raise PDF::Reader::MalformedPDFError, "xref table not found at offset #{offset} (#{tok_one} != xref)"
116
+ raise PDF::Reader::MalformedPDFError,
117
+ "xref table not found at offset #{offset} (#{tok_one} != xref)"
115
118
  end
116
119
  ################################################################################
117
120
  # Assumes the underlying buffer is positioned at the start of a traditional
@@ -137,7 +140,9 @@ class PDF::Reader
137
140
 
138
141
  trailer = Parser.new(buf, self).parse_token
139
142
 
140
- raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless trailer.kind_of?(Hash)
143
+ unless trailer.kind_of?(Hash)
144
+ raise MalformedPDFError, "PDF malformed, trailer should be a dictionary"
145
+ end
141
146
 
142
147
  load_offsets(trailer[:XRefStm]) if trailer.has_key?(:XRefStm)
143
148
  load_offsets(trailer[:Prev].to_i) if trailer.has_key?(:Prev)
@@ -232,6 +237,8 @@ class PDF::Reader
232
237
  end
233
238
  io.rewind
234
239
  offset < 50 ? offset : nil
240
+ rescue EOFError
241
+ return nil
235
242
  end
236
243
  end
237
244
  ################################################################################
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-30 00:00:00.000000000 Z
12
+ date: 2012-12-29 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -28,29 +28,29 @@ dependencies:
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
30
  - !ruby/object:Gem::Dependency
31
- name: roodi
31
+ name: rspec
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  none: false
34
34
  requirements:
35
- - - ! '>='
35
+ - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: '0'
37
+ version: '2.3'
38
38
  type: :development
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
- - - ! '>='
43
+ - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: '0'
45
+ version: '2.3'
46
46
  - !ruby/object:Gem::Dependency
47
- name: rspec
47
+ name: ZenTest
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  none: false
50
50
  requirements:
51
51
  - - ~>
52
52
  - !ruby/object:Gem::Version
53
- version: '2.3'
53
+ version: 4.4.2
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,15 +58,15 @@ dependencies:
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: '2.3'
61
+ version: 4.4.2
62
62
  - !ruby/object:Gem::Dependency
63
- name: ZenTest
63
+ name: cane
64
64
  requirement: !ruby/object:Gem::Requirement
65
65
  none: false
66
66
  requirements:
67
67
  - - ~>
68
68
  - !ruby/object:Gem::Version
69
- version: 4.4.2
69
+ version: 2.2.3
70
70
  type: :development
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
@@ -74,7 +74,55 @@ dependencies:
74
74
  requirements:
75
75
  - - ~>
76
76
  - !ruby/object:Gem::Version
77
- version: 4.4.2
77
+ version: 2.2.3
78
+ - !ruby/object:Gem::Dependency
79
+ name: morecane
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: ir_b
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: rdoc
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
78
126
  - !ruby/object:Gem::Dependency
79
127
  name: Ascii85
80
128
  requirement: !ruby/object:Gem::Requirement
@@ -123,6 +171,38 @@ dependencies:
123
171
  - - ~>
124
172
  - !ruby/object:Gem::Version
125
173
  version: '2.0'
174
+ - !ruby/object:Gem::Dependency
175
+ name: ttfunk
176
+ requirement: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ type: :runtime
183
+ prerelease: false
184
+ version_requirements: !ruby/object:Gem::Requirement
185
+ none: false
186
+ requirements:
187
+ - - ! '>='
188
+ - !ruby/object:Gem::Version
189
+ version: '0'
190
+ - !ruby/object:Gem::Dependency
191
+ name: afm
192
+ requirement: !ruby/object:Gem::Requirement
193
+ none: false
194
+ requirements:
195
+ - - ~>
196
+ - !ruby/object:Gem::Version
197
+ version: 0.2.0
198
+ type: :runtime
199
+ prerelease: false
200
+ version_requirements: !ruby/object:Gem::Requirement
201
+ none: false
202
+ requirements:
203
+ - - ~>
204
+ - !ruby/object:Gem::Version
205
+ version: 0.2.0
126
206
  description: The PDF::Reader library implements a PDF parser conforming as much as
127
207
  possible to the PDF specification from Adobe
128
208
  email:
@@ -151,17 +231,26 @@ files:
151
231
  - examples/version.rb
152
232
  - lib/pdf/hash.rb
153
233
  - lib/pdf/reader.rb
234
+ - lib/pdf/reader/width_calculator/built_in.rb
235
+ - lib/pdf/reader/width_calculator/type_zero.rb
236
+ - lib/pdf/reader/width_calculator/true_type.rb
237
+ - lib/pdf/reader/width_calculator/composite.rb
238
+ - lib/pdf/reader/width_calculator/type_one_or_three.rb
154
239
  - lib/pdf/reader/xref.rb
155
240
  - lib/pdf/reader/page.rb
241
+ - lib/pdf/reader/transformation_matrix.rb
156
242
  - lib/pdf/reader/encoding.rb
243
+ - lib/pdf/reader/page_layout.rb
157
244
  - lib/pdf/reader/font.rb
158
245
  - lib/pdf/reader/print_receiver.rb
159
246
  - lib/pdf/reader/lzw.rb
160
247
  - lib/pdf/reader/buffer.rb
248
+ - lib/pdf/reader/synchronized_cache.rb
161
249
  - lib/pdf/reader/object_stream.rb
162
250
  - lib/pdf/reader/cmap.rb
163
251
  - lib/pdf/reader/text_receiver.rb
164
252
  - lib/pdf/reader/register_receiver.rb
253
+ - lib/pdf/reader/cid_widths.rb
165
254
  - lib/pdf/reader/page_text_receiver.rb
166
255
  - lib/pdf/reader/encodings/mac_roman.txt
167
256
  - lib/pdf/reader/encodings/zapf_dingbats.txt
@@ -181,19 +270,36 @@ files:
181
270
  - lib/pdf/reader/object_hash.rb
182
271
  - lib/pdf/reader/reference.rb
183
272
  - lib/pdf/reader/glyphlist.txt
273
+ - lib/pdf/reader/afm/Courier-BoldOblique.afm
274
+ - lib/pdf/reader/afm/Symbol.afm
275
+ - lib/pdf/reader/afm/Times-Italic.afm
276
+ - lib/pdf/reader/afm/Courier-Oblique.afm
277
+ - lib/pdf/reader/afm/Helvetica-Bold.afm
278
+ - lib/pdf/reader/afm/Courier-Bold.afm
279
+ - lib/pdf/reader/afm/Times-BoldItalic.afm
280
+ - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
281
+ - lib/pdf/reader/afm/Helvetica.afm
282
+ - lib/pdf/reader/afm/ZapfDingbats.afm
283
+ - lib/pdf/reader/afm/Helvetica-Oblique.afm
284
+ - lib/pdf/reader/afm/Times-Bold.afm
285
+ - lib/pdf/reader/afm/Times-Roman.afm
286
+ - lib/pdf/reader/afm/Courier.afm
184
287
  - lib/pdf/reader/token.rb
185
288
  - lib/pdf/reader/parser.rb
186
289
  - lib/pdf/reader/page_state.rb
187
290
  - lib/pdf/reader/error.rb
188
291
  - lib/pdf/reader/glyph_hash.rb
292
+ - lib/pdf/reader/width_calculator.rb
189
293
  - lib/pdf/reader/resource_methods.rb
190
294
  - lib/pdf/reader/standard_security_handler.rb
295
+ - lib/pdf/reader/text_run.rb
191
296
  - lib/pdf/reader/form_xobject.rb
192
297
  - lib/pdf/reader/stream.rb
193
298
  - lib/pdf/reader/pages_strategy.rb
194
299
  - lib/pdf/reader/abstract_strategy.rb
195
300
  - lib/pdf/reader/metadata_strategy.rb
196
301
  - lib/pdf/reader/object_cache.rb
302
+ - lib/pdf/reader/font_descriptor.rb
197
303
  - lib/pdf-reader.rb
198
304
  - Rakefile
199
305
  - README.rdoc