pdf-reader 0.6.2 → 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,9 +38,9 @@ class PDF::Reader
38
38
  def initialize (name, options)
39
39
  @options = options
40
40
 
41
- case name
42
- when "FlateDecode" then @filter = :flate
43
- #else raise UnsupportedFeatureError, "Unknown filter: #{name}"
41
+ case name.to_sym
42
+ when :FlateDecode then @filter = :flate
43
+ #else raise UnsupportedFeatureError, "Unknown filter: #{name}"
44
44
  end
45
45
  end
46
46
  ################################################################################
@@ -56,11 +56,21 @@ class PDF::Reader
56
56
  # Decode the specified data with the Zlib compression algorithm
57
57
  def flate (data)
58
58
  begin
59
- z = Zlib::Inflate.new
60
- z.inflate(data)
61
- rescue Exception => e
62
- raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
59
+ Zlib::Inflate.new.inflate(data)
60
+ rescue Zlib::DataError => e
61
+ # by default, Ruby's Zlib assumes the data it's inflating
62
+ # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
63
+ # If that fails, then use an undocumented 'feature' to attempt to inflate
64
+ # the data as a raw RFC1951 stream.
65
+ #
66
+ # See
67
+ # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
68
+ # - http://www.gzip.org/zlib/zlib_faq.html#faq38
69
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
63
70
  end
71
+ rescue Exception => e
72
+ # Oops, there was a problem inflating the stream
73
+ raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
64
74
  end
65
75
  ################################################################################
66
76
  end
@@ -35,7 +35,8 @@ class PDF::Reader
35
35
  @@glyphs ||= {}
36
36
 
37
37
  if @@glyphs.empty?
38
- File.open(File.dirname(__FILE__) + "/glyphlist.txt","r") do |f|
38
+ RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
39
+ File.open(File.dirname(__FILE__) + "/glyphlist.txt",mode) do |f|
39
40
  f.each do |l|
40
41
  m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
41
42
  @@glyphs[name] = "0x#{code}".hex if name
@@ -32,7 +32,7 @@ class PDF::Reader
32
32
  # Create a new parser around a PDF::Reader::Buffer object
33
33
  #
34
34
  # buffer - a PDF::Reader::Buffer object that contains PDF data
35
- # xref - an integer that specifies the byte offset of the xref table in the buffer
35
+ # xref - a PDF::Reader::XRef object that represents the document's object offsets
36
36
  def initialize (buffer, xref)
37
37
  @buffer = buffer
38
38
  @xref = xref
@@ -47,7 +47,7 @@ class PDF::Reader
47
47
  token = @buffer.token
48
48
 
49
49
  case token
50
- when "/" then return Name.new(@buffer.token)
50
+ when "/" then return @buffer.token.to_sym
51
51
  when "<<" then return dictionary()
52
52
  when "[" then return array()
53
53
  when "(" then return string()
@@ -72,7 +72,7 @@ class PDF::Reader
72
72
  loop do
73
73
  key = parse_token
74
74
  break if key.kind_of?(Token) and key == ">>"
75
- raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Name)
75
+ raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
76
76
 
77
77
  value = parse_token
78
78
  value.kind_of?(Token) and Error.str_assert_not(value, ">>")
@@ -174,28 +174,28 @@ class PDF::Reader
174
174
  obj = parse_token
175
175
  post_obj = parse_token
176
176
  case post_obj
177
- when "endobj" then return obj
178
- when "stream" then return obj, stream(obj)
177
+ when "endobj" then return [obj,nil]
178
+ when "stream" then return [obj, stream(obj)]
179
179
  else raise MalformedPDFError, "PDF malformed, unexpected token #{post_obj}"
180
180
  end
181
181
  end
182
182
  ################################################################################
183
183
  # Decodes the contents of a PDF Stream and returns it as a Ruby String.
184
184
  def stream (dict)
185
- raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?('Length')
186
- data = @buffer.read(@xref.object(dict['Length']))
185
+ raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
186
+ data = @buffer.read(@xref.object(dict[:Length]).first)
187
187
 
188
188
  Error.str_assert(parse_token, "endstream")
189
189
  Error.str_assert(parse_token, "endobj")
190
190
 
191
- if dict.has_key?('Filter')
191
+ if dict.has_key?(:Filter)
192
192
  options = []
193
193
 
194
- if dict.has_key?('DecodeParms')
195
- options = Array(dict['DecodeParms'])
194
+ if dict.has_key?(:DecodeParms)
195
+ options = Array(dict[:DecodeParms])
196
196
  end
197
197
 
198
- Array(dict['Filter']).each_with_index do |filter, index|
198
+ Array(dict[:Filter]).each_with_index do |filter, index|
199
199
  data = Filter.new(filter, options[index]).filter(data)
200
200
  end
201
201
  end
@@ -0,0 +1,29 @@
1
+ ***************
2
+ *** 173,178 ****
3
+
4
+ obj = parse_token
5
+ post_obj = parse_token
6
+ case post_obj
7
+ when "endobj" then return [obj,nil]
8
+ when "stream" then return [obj, stream(obj)]
9
+ --- 173,192 ----
10
+
11
+ obj = parse_token
12
+ post_obj = parse_token
13
+ +
14
+ + if obj.class == Array
15
+ + newobj = Array.new
16
+ + obj.each_index {|idx|
17
+ + if obj[idx].class == PDF::Reader::Reference
18
+ + xo, xs = @xref.object(obj[idx])
19
+ + if xs
20
+ + newobj << xs
21
+ + end
22
+ + end
23
+ + }
24
+ + return newobj.flatten
25
+ + end
26
+ +
27
+ case post_obj
28
+ when "endobj" then return [obj,nil]
29
+ when "stream" then return [obj, stream(obj)]
@@ -217,8 +217,8 @@ class PDF::Reader
217
217
  def media_box_check (dict)
218
218
  corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup
219
219
 
220
- if dict.has_key?('MediaBox')
221
- media_box = dict['MediaBox']
220
+ if dict.has_key?(:MediaBox)
221
+ media_box = dict[:MediaBox]
222
222
  corners[:urx] = media_box[2] - media_box[0]
223
223
  corners[:ury] = media_box[3] - media_box[1]
224
224
  end
@@ -9,10 +9,10 @@
9
9
  # distribute, sublicense, and/or sell copies of the Software, and to
10
10
  # permit persons to whom the Software is furnished to do so, subject to
11
11
  # the following conditions:
12
- #
12
+ #
13
13
  # The above copyright notice and this permission notice shall be
14
14
  # included in all copies or substantial portions of the Software.
15
- #
15
+ #
16
16
  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
17
  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
18
  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@@ -45,9 +45,11 @@ class PDF::Reader
45
45
  offset ||= @buffer.find_first_xref_offset
46
46
  @buffer.seek(offset)
47
47
  token = @buffer.token
48
-
48
+
49
49
  if token == "xref" || token == "ref"
50
50
  load_xref_table
51
+ elsif token.to_i >= 0 && @buffer.token.to_i >= 0 && @buffer.token == "obj"
52
+ raise PDF::Reader::UnsupportedFeatureError, "XRef streams are not supported in PDF::Reader yet"
51
53
  else
52
54
  raise PDF::Reader::MalformedPDFError, "xref table not found at offset #{offset} (#{token} != xref)"
53
55
  end
@@ -59,14 +61,14 @@ class PDF::Reader
59
61
  #
60
62
  # If the object is a stream, that is returned as well
61
63
  def object (ref, save_pos = true)
62
- return ref unless ref.kind_of?(Reference)
64
+ return ref, nil unless ref.kind_of?(Reference)
63
65
  pos = @buffer.pos if save_pos
64
66
  obj, stream = Parser.new(@buffer.seek(offset_for(ref)), self).object(ref.id, ref.gen)
65
67
  @buffer.seek(pos) if save_pos
66
68
  if stream
67
- return obj, stream
69
+ return [obj, stream]
68
70
  else
69
- return obj
71
+ return [obj, nil]
70
72
  end
71
73
  end
72
74
  ################################################################################
@@ -78,7 +80,7 @@ class PDF::Reader
78
80
  begin
79
81
  # loop over all subsections of the xref table
80
82
  # In a well formed PDF, the 'trailer' token will indicate
81
- # the end of the table. However we need to be careful in case
83
+ # the end of the table. However we need to be careful in case
82
84
  # we're processing a malformed pdf that is missing the trailer.
83
85
  loop do
84
86
  tok_one, tok_two = @buffer.token, @buffer.token
@@ -104,10 +106,20 @@ class PDF::Reader
104
106
  raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless tok_two == "<<"
105
107
 
106
108
  trailer = Parser.new(@buffer, self).dictionary
107
- load(trailer['Prev'].to_i) if trailer.has_key?('Prev')
109
+ load(trailer[:Prev].to_i) if trailer.has_key?(:Prev)
108
110
 
109
111
  trailer
110
112
  end
113
+ # returns the type of object a ref points to
114
+ def obj_type(ref)
115
+ obj, stream = object(ref)
116
+ obj.class.to_s.to_sym
117
+ end
118
+ # returns true if the supplied references points to an object with a stream
119
+ def stream?(ref)
120
+ obj, stream = @xref.object(ref)
121
+ stream ? true : false
122
+ end
111
123
  ################################################################################
112
124
  # returns the byte offset for the specified PDF object.
113
125
  #
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.2
4
+ version: "0.7"
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Jones
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-03-22 00:00:00 +11:00
12
+ date: 2008-05-06 00:00:00 +10:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -28,9 +28,8 @@ files:
28
28
  - lib/pdf/reader
29
29
  - lib/pdf/reader/explore.rb
30
30
  - lib/pdf/reader/reference.rb
31
- - lib/pdf/reader/name.rb
32
- - lib/pdf/reader/token.rb
33
31
  - lib/pdf/reader/xref.rb
32
+ - lib/pdf/reader/token.rb
34
33
  - lib/pdf/reader/filter.rb
35
34
  - lib/pdf/reader/text_receiver.rb
36
35
  - lib/pdf/reader/buffer.rb
@@ -42,6 +41,7 @@ files:
42
41
  - lib/pdf/reader/register_receiver.rb
43
42
  - lib/pdf/reader/font.rb
44
43
  - lib/pdf/reader/glyphlist.txt
44
+ - lib/pdf/reader/parser.rb.rej
45
45
  - lib/pdf/reader.rb
46
46
  - Rakefile
47
47
  - README
@@ -1,37 +0,0 @@
1
- ################################################################################
2
- #
3
- # Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
4
- #
5
- # Permission is hereby granted, free of charge, to any person obtaining
6
- # a copy of this software and associated documentation files (the
7
- # "Software"), to deal in the Software without restriction, including
8
- # without limitation the rights to use, copy, modify, merge, publish,
9
- # distribute, sublicense, and/or sell copies of the Software, and to
10
- # permit persons to whom the Software is furnished to do so, subject to
11
- # the following conditions:
12
- #
13
- # The above copyright notice and this permission notice shall be
14
- # included in all copies or substantial portions of the Software.
15
- #
16
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
- #
24
- ################################################################################
25
-
26
- class PDF::Reader
27
- ################################################################################
28
- class Name < String
29
- ################################################################################
30
- def initialize (val)
31
- super
32
- end
33
- ################################################################################
34
- end
35
- ################################################################################
36
- end
37
- ################################################################################