pdf-reader 0.6.2 → 0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -38,9 +38,9 @@ class PDF::Reader
38
38
  def initialize (name, options)
39
39
  @options = options
40
40
 
41
- case name
42
- when "FlateDecode" then @filter = :flate
43
- #else raise UnsupportedFeatureError, "Unknown filter: #{name}"
41
+ case name.to_sym
42
+ when :FlateDecode then @filter = :flate
43
+ #else raise UnsupportedFeatureError, "Unknown filter: #{name}"
44
44
  end
45
45
  end
46
46
  ################################################################################
@@ -56,11 +56,21 @@ class PDF::Reader
56
56
  # Decode the specified data with the Zlib compression algorithm
57
57
  def flate (data)
58
58
  begin
59
- z = Zlib::Inflate.new
60
- z.inflate(data)
61
- rescue Exception => e
62
- raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
59
+ Zlib::Inflate.new.inflate(data)
60
+ rescue Zlib::DataError => e
61
+ # by default, Ruby's Zlib assumes the data it's inflating
62
+ # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
63
+ # If that fails, then use an undocumented 'feature' to attempt to inflate
64
+ # the data as a raw RFC1951 stream.
65
+ #
66
+ # See
67
+ # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
68
+ # - http://www.gzip.org/zlib/zlib_faq.html#faq38
69
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
63
70
  end
71
+ rescue Exception => e
72
+ # Oops, there was a problem inflating the stream
73
+ raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
64
74
  end
65
75
  ################################################################################
66
76
  end
@@ -35,7 +35,8 @@ class PDF::Reader
35
35
  @@glyphs ||= {}
36
36
 
37
37
  if @@glyphs.empty?
38
- File.open(File.dirname(__FILE__) + "/glyphlist.txt","r") do |f|
38
+ RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
39
+ File.open(File.dirname(__FILE__) + "/glyphlist.txt",mode) do |f|
39
40
  f.each do |l|
40
41
  m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
41
42
  @@glyphs[name] = "0x#{code}".hex if name
@@ -32,7 +32,7 @@ class PDF::Reader
32
32
  # Create a new parser around a PDF::Reader::Buffer object
33
33
  #
34
34
  # buffer - a PDF::Reader::Buffer object that contains PDF data
35
- # xref - an integer that specifies the byte offset of the xref table in the buffer
35
+ # xref - a PDF::Reader::XRef object that represents the document's object offsets
36
36
  def initialize (buffer, xref)
37
37
  @buffer = buffer
38
38
  @xref = xref
@@ -47,7 +47,7 @@ class PDF::Reader
47
47
  token = @buffer.token
48
48
 
49
49
  case token
50
- when "/" then return Name.new(@buffer.token)
50
+ when "/" then return @buffer.token.to_sym
51
51
  when "<<" then return dictionary()
52
52
  when "[" then return array()
53
53
  when "(" then return string()
@@ -72,7 +72,7 @@ class PDF::Reader
72
72
  loop do
73
73
  key = parse_token
74
74
  break if key.kind_of?(Token) and key == ">>"
75
- raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Name)
75
+ raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
76
76
 
77
77
  value = parse_token
78
78
  value.kind_of?(Token) and Error.str_assert_not(value, ">>")
@@ -174,28 +174,28 @@ class PDF::Reader
174
174
  obj = parse_token
175
175
  post_obj = parse_token
176
176
  case post_obj
177
- when "endobj" then return obj
178
- when "stream" then return obj, stream(obj)
177
+ when "endobj" then return [obj,nil]
178
+ when "stream" then return [obj, stream(obj)]
179
179
  else raise MalformedPDFError, "PDF malformed, unexpected token #{post_obj}"
180
180
  end
181
181
  end
182
182
  ################################################################################
183
183
  # Decodes the contents of a PDF Stream and returns it as a Ruby String.
184
184
  def stream (dict)
185
- raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?('Length')
186
- data = @buffer.read(@xref.object(dict['Length']))
185
+ raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
186
+ data = @buffer.read(@xref.object(dict[:Length]).first)
187
187
 
188
188
  Error.str_assert(parse_token, "endstream")
189
189
  Error.str_assert(parse_token, "endobj")
190
190
 
191
- if dict.has_key?('Filter')
191
+ if dict.has_key?(:Filter)
192
192
  options = []
193
193
 
194
- if dict.has_key?('DecodeParms')
195
- options = Array(dict['DecodeParms'])
194
+ if dict.has_key?(:DecodeParms)
195
+ options = Array(dict[:DecodeParms])
196
196
  end
197
197
 
198
- Array(dict['Filter']).each_with_index do |filter, index|
198
+ Array(dict[:Filter]).each_with_index do |filter, index|
199
199
  data = Filter.new(filter, options[index]).filter(data)
200
200
  end
201
201
  end
@@ -0,0 +1,29 @@
1
+ ***************
2
+ *** 173,178 ****
3
+
4
+ obj = parse_token
5
+ post_obj = parse_token
6
+ case post_obj
7
+ when "endobj" then return [obj,nil]
8
+ when "stream" then return [obj, stream(obj)]
9
+ --- 173,192 ----
10
+
11
+ obj = parse_token
12
+ post_obj = parse_token
13
+ +
14
+ + if obj.class == Array
15
+ + newobj = Array.new
16
+ + obj.each_index {|idx|
17
+ + if obj[idx].class == PDF::Reader::Reference
18
+ + xo, xs = @xref.object(obj[idx])
19
+ + if xs
20
+ + newobj << xs
21
+ + end
22
+ + end
23
+ + }
24
+ + return newobj.flatten
25
+ + end
26
+ +
27
+ case post_obj
28
+ when "endobj" then return [obj,nil]
29
+ when "stream" then return [obj, stream(obj)]
@@ -217,8 +217,8 @@ class PDF::Reader
217
217
  def media_box_check (dict)
218
218
  corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup
219
219
 
220
- if dict.has_key?('MediaBox')
221
- media_box = dict['MediaBox']
220
+ if dict.has_key?(:MediaBox)
221
+ media_box = dict[:MediaBox]
222
222
  corners[:urx] = media_box[2] - media_box[0]
223
223
  corners[:ury] = media_box[3] - media_box[1]
224
224
  end
@@ -9,10 +9,10 @@
9
9
  # distribute, sublicense, and/or sell copies of the Software, and to
10
10
  # permit persons to whom the Software is furnished to do so, subject to
11
11
  # the following conditions:
12
- #
12
+ #
13
13
  # The above copyright notice and this permission notice shall be
14
14
  # included in all copies or substantial portions of the Software.
15
- #
15
+ #
16
16
  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
17
  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
18
  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -45,9 +45,11 @@ class PDF::Reader
45
45
  offset ||= @buffer.find_first_xref_offset
46
46
  @buffer.seek(offset)
47
47
  token = @buffer.token
48
-
48
+
49
49
  if token == "xref" || token == "ref"
50
50
  load_xref_table
51
+ elsif token.to_i >= 0 && @buffer.token.to_i >= 0 && @buffer.token == "obj"
52
+ raise PDF::Reader::UnsupportedFeatureError, "XRef streams are not supported in PDF::Reader yet"
51
53
  else
52
54
  raise PDF::Reader::MalformedPDFError, "xref table not found at offset #{offset} (#{token} != xref)"
53
55
  end
@@ -59,14 +61,14 @@ class PDF::Reader
59
61
  #
60
62
  # If the object is a stream, that is returned as well
61
63
  def object (ref, save_pos = true)
62
- return ref unless ref.kind_of?(Reference)
64
+ return ref, nil unless ref.kind_of?(Reference)
63
65
  pos = @buffer.pos if save_pos
64
66
  obj, stream = Parser.new(@buffer.seek(offset_for(ref)), self).object(ref.id, ref.gen)
65
67
  @buffer.seek(pos) if save_pos
66
68
  if stream
67
- return obj, stream
69
+ return [obj, stream]
68
70
  else
69
- return obj
71
+ return [obj, nil]
70
72
  end
71
73
  end
72
74
  ################################################################################
@@ -78,7 +80,7 @@ class PDF::Reader
78
80
  begin
79
81
  # loop over all subsections of the xref table
80
82
  # In a well formed PDF, the 'trailer' token will indicate
81
- # the end of the table. However we need to be careful in case
83
+ # the end of the table. However we need to be careful in case
82
84
  # we're processing a malformed pdf that is missing the trailer.
83
85
  loop do
84
86
  tok_one, tok_two = @buffer.token, @buffer.token
@@ -104,10 +106,20 @@ class PDF::Reader
104
106
  raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless tok_two == "<<"
105
107
 
106
108
  trailer = Parser.new(@buffer, self).dictionary
107
- load(trailer['Prev'].to_i) if trailer.has_key?('Prev')
109
+ load(trailer[:Prev].to_i) if trailer.has_key?(:Prev)
108
110
 
109
111
  trailer
110
112
  end
113
+ # returns the type of object a ref points to
114
+ def obj_type(ref)
115
+ obj, stream = object(ref)
116
+ obj.class.to_s.to_sym
117
+ end
118
+ # returns true if the supplied references points to an object with a stream
119
+ def stream?(ref)
120
+ obj, stream = @xref.object(ref)
121
+ stream ? true : false
122
+ end
111
123
  ################################################################################
112
124
  # returns the byte offset for the specified PDF object.
113
125
  #
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2
4
+ version: "0.7"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Jones
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-03-22 00:00:00 +11:00
12
+ date: 2008-05-06 00:00:00 +10:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -28,9 +28,8 @@ files:
28
28
  - lib/pdf/reader
29
29
  - lib/pdf/reader/explore.rb
30
30
  - lib/pdf/reader/reference.rb
31
- - lib/pdf/reader/name.rb
32
- - lib/pdf/reader/token.rb
33
31
  - lib/pdf/reader/xref.rb
32
+ - lib/pdf/reader/token.rb
34
33
  - lib/pdf/reader/filter.rb
35
34
  - lib/pdf/reader/text_receiver.rb
36
35
  - lib/pdf/reader/buffer.rb
@@ -42,6 +41,7 @@ files:
42
41
  - lib/pdf/reader/register_receiver.rb
43
42
  - lib/pdf/reader/font.rb
44
43
  - lib/pdf/reader/glyphlist.txt
44
+ - lib/pdf/reader/parser.rb.rej
45
45
  - lib/pdf/reader.rb
46
46
  - Rakefile
47
47
  - README
@@ -1,37 +0,0 @@
1
- ################################################################################
2
- #
3
- # Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
4
- #
5
- # Permission is hereby granted, free of charge, to any person obtaining
6
- # a copy of this software and associated documentation files (the
7
- # "Software"), to deal in the Software without restriction, including
8
- # without limitation the rights to use, copy, modify, merge, publish,
9
- # distribute, sublicense, and/or sell copies of the Software, and to
10
- # permit persons to whom the Software is furnished to do so, subject to
11
- # the following conditions:
12
- #
13
- # The above copyright notice and this permission notice shall be
14
- # included in all copies or substantial portions of the Software.
15
- #
16
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
- #
24
- ################################################################################
25
-
26
- class PDF::Reader
27
- ################################################################################
28
- class Name < String
29
- ################################################################################
30
- def initialize (val)
31
- super
32
- end
33
- ################################################################################
34
- end
35
- ################################################################################
36
- end
37
- ################################################################################