pdf-reader 1.4.1 → 2.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
@@ -118,6 +118,7 @@ class PDF::Reader
118
118
  loop do
119
119
  key = parse_token
120
120
  break if key.kind_of?(Token) and key == ">>"
121
+ raise MalformedPDFError, "unterminated dict" if @buffer.empty?
121
122
  raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
122
123
 
123
124
  value = parse_token
@@ -131,7 +132,6 @@ class PDF::Reader
131
132
  # reads a PDF name from the buffer and converts it to a Ruby Symbol
132
133
  def pdf_name
133
134
  tok = @buffer.token
134
- tok = " " if tok == "" && RUBY_VERSION < "1.9"
135
135
  tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match|
136
136
  match[1, 2].hex.chr
137
137
  end
@@ -145,6 +145,7 @@ class PDF::Reader
145
145
  loop do
146
146
  item = parse_token
147
147
  break if item.kind_of?(Token) and item == "]"
148
+ raise MalformedPDFError, "unterminated array" if @buffer.empty?
148
149
  a << item
149
150
  end
150
151
 
@@ -158,24 +159,25 @@ class PDF::Reader
158
159
  loop do
159
160
  token = @buffer.token
160
161
  break if token == ">"
162
+ raise MalformedPDFError, "unterminated hex string" if @buffer.empty?
161
163
  str << token
162
164
  end
163
165
 
164
166
  # add a missing digit if required, as required by the spec
165
167
  str << "0" unless str.size % 2 == 0
166
- str.scan(/../).map {|i| i.hex.chr}.join
168
+ str.scan(/../).map {|i| i.hex.chr}.join.force_encoding("binary")
167
169
  end
168
170
  ################################################################################
169
171
  # Reads a PDF String from the buffer and converts it to a Ruby String
170
172
  def string
171
173
  str = @buffer.token
172
- return "" if str == ")"
174
+ return "".force_encoding("binary") if str == ")"
173
175
  Error.assert_equal(parse_token, ")")
174
176
 
175
177
  str.gsub!(/\\([nrtbf()\\\n]|\d{1,3})?|\r\n?|\n\r/m) do |match|
176
178
  MAPPING[match] || ""
177
179
  end
178
- str
180
+ str.force_encoding("binary")
179
181
  end
180
182
 
181
183
  MAPPING = {
@@ -82,7 +82,7 @@ class PDF::Reader
82
82
  (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
83
83
  (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
84
84
  length = objKey.length < 16 ? objKey.length : 16
85
- rc4 = RC4.new( Digest::MD5.digest(objKey)[(0...length)] )
85
+ rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
86
86
  rc4.decrypt(buf)
87
87
  end
88
88
 
@@ -94,7 +94,7 @@ class PDF::Reader
94
94
  if p.nil? || p.empty?
95
95
  PassPadBytes.pack('C*')
96
96
  else
97
- p[(0...32)] + PassPadBytes[0...(32-p.length)].pack('C*')
97
+ p[0, 32] + PassPadBytes[0, 32-p.length].pack('C*')
98
98
  end
99
99
  end
100
100
 
@@ -118,13 +118,13 @@ class PDF::Reader
118
118
  md5 = Digest::MD5.digest(pad_pass(pass))
119
119
  if @revision > 2 then
120
120
  50.times { md5 = Digest::MD5.digest(md5) }
121
- keyBegins = md5[(0...@key_length)]
121
+ keyBegins = md5[0, key_length]
122
122
  #first itteration decrypt owner_key
123
123
  out = @owner_key
124
124
  #RC4 keyed with (keyBegins XOR with itteration #) to decrypt previous out
125
125
  19.downto(0).each { |i| out=RC4.new(xor_each_byte(keyBegins,i)).decrypt(out) }
126
126
  else
127
- out = RC4.new( md5[(0...5)] ).decrypt( @owner_key )
127
+ out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
128
128
  end
129
129
  # c) check output as user password
130
130
  auth_user_pass( out )
@@ -142,12 +142,12 @@ class PDF::Reader
142
142
  #
143
143
  def auth_user_pass(pass)
144
144
  keyBegins = make_file_key(pass)
145
- if @revision > 2
145
+ if @revision >= 3
146
146
  #initialize out for first iteration
147
147
  out = Digest::MD5.digest(PassPadBytes.pack("C*") + @file_id)
148
148
  #zero doesn't matter -> so from 0-19
149
- 20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).decrypt(out) }
150
- pass = @user_key[(0...16)] == out
149
+ 20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).encrypt(out) }
150
+ pass = @user_key[0, 16] == out
151
151
  else
152
152
  pass = RC4.new(keyBegins).encrypt(PassPadBytes.pack("C*")) == @user_key
153
153
  end
@@ -163,20 +163,24 @@ class PDF::Reader
163
163
  (0..24).step(8){|e| @buf << (@permissions >> e & 0xFF)}
164
164
  # e) add the file ID
165
165
  @buf << @file_id
166
- # f) if revision > 4 then if encryptMetadata add 4 bytes of 0x00 else add 4 bytes of 0xFF
167
- if @revision > 4
168
- @buf << [ @encryptMetadata ? 0x00 : 0xFF ].pack('C')*4
166
+ # f) if revision >= 4 and metadata not encrypted then add 4 bytes of 0xFF
167
+ if @revision >= 4 && !@encryptMeta
168
+ @buf << [0xFF,0xFF,0xFF,0xFF].pack('C*')
169
169
  end
170
170
  # b) init MD5 digest + g) finish the hash
171
171
  md5 = Digest::MD5.digest(@buf)
172
172
  # h) spin hash 50 times
173
- if @revision > 2
173
+ if @revision >= 3
174
174
  50.times {
175
- md5 = Digest::MD5.digest(md5[(0...@key_length)])
175
+ md5 = Digest::MD5.digest(md5[0, @key_length])
176
176
  }
177
177
  end
178
- # i) n = key_length revision > 3, n = 5 revision == 2
179
- md5[(0...((@revision < 3) ? 5 : @key_length))]
178
+ # i) n = key_length revision >= 3, n = 5 revision == 2
179
+ if @revision < 3
180
+ md5[0, 5]
181
+ else
182
+ md5[0, @key_length]
183
+ end
180
184
  end
181
185
 
182
186
  def build_standard_key(pass)
@@ -65,16 +65,10 @@ class PDF::Reader
65
65
  @mergable_range ||= Range.new(endx - 3, endx + font_size)
66
66
  end
67
67
 
68
+ # Assume string encoding is marked correctly and we can trust String#size to return a
69
+ # character count
68
70
  def character_count
69
- if @text.size == 1
70
- 1.0
71
- elsif @text.respond_to?(:bytesize)
72
- # M17N aware VM
73
- # so we can trust String#size to return a character count
74
- @text.size.to_f
75
- else
76
- text.unpack("U*").size.to_f
77
- end
71
+ @text.size.to_f
78
72
  end
79
73
  end
80
74
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 2.0.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-01 00:00:00.000000000 Z
11
+ date: 2017-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,28 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '3.4'
33
+ version: '3.5'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '3.4'
41
- - !ruby/object:Gem::Dependency
42
- name: ZenTest
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: 4.4.2
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: 4.4.2
40
+ version: '3.5'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: cane
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -70,16 +56,16 @@ dependencies:
70
56
  name: morecane
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
- - - ">="
59
+ - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '0'
61
+ version: '0.2'
76
62
  type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
- - - ">="
66
+ - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '0'
68
+ version: '0.2'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: ir_b
85
71
  requirement: !ruby/object:Gem::Requirement
@@ -185,22 +171,20 @@ email:
185
171
  executables:
186
172
  - pdf_object
187
173
  - pdf_text
188
- - pdf_list_callbacks
189
174
  - pdf_callbacks
190
175
  extensions: []
191
176
  extra_rdoc_files:
192
- - README.rdoc
177
+ - README.md
193
178
  - TODO
194
179
  - CHANGELOG
195
180
  - MIT-LICENSE
196
181
  files:
197
182
  - CHANGELOG
198
183
  - MIT-LICENSE
199
- - README.rdoc
184
+ - README.md
200
185
  - Rakefile
201
186
  - TODO
202
187
  - bin/pdf_callbacks
203
- - bin/pdf_list_callbacks
204
188
  - bin/pdf_object
205
189
  - bin/pdf_text
206
190
  - examples/callbacks.rb
@@ -217,7 +201,6 @@ files:
217
201
  - lib/pdf-reader.rb
218
202
  - lib/pdf/hash.rb
219
203
  - lib/pdf/reader.rb
220
- - lib/pdf/reader/abstract_strategy.rb
221
204
  - lib/pdf/reader/afm/Courier-Bold.afm
222
205
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
223
206
  - lib/pdf/reader/afm/Courier-Oblique.afm
@@ -258,7 +241,6 @@ files:
258
241
  - lib/pdf/reader/glyph_hash.rb
259
242
  - lib/pdf/reader/glyphlist.txt
260
243
  - lib/pdf/reader/lzw.rb
261
- - lib/pdf/reader/metadata_strategy.rb
262
244
  - lib/pdf/reader/object_cache.rb
263
245
  - lib/pdf/reader/object_hash.rb
264
246
  - lib/pdf/reader/object_stream.rb
@@ -276,7 +258,6 @@ files:
276
258
  - lib/pdf/reader/standard_security_handler.rb
277
259
  - lib/pdf/reader/stream.rb
278
260
  - lib/pdf/reader/synchronized_cache.rb
279
- - lib/pdf/reader/text_receiver.rb
280
261
  - lib/pdf/reader/text_run.rb
281
262
  - lib/pdf/reader/token.rb
282
263
  - lib/pdf/reader/transformation_matrix.rb
@@ -291,26 +272,12 @@ homepage: http://github.com/yob/pdf-reader
291
272
  licenses:
292
273
  - MIT
293
274
  metadata: {}
294
- post_install_message: |2+
295
-
296
- ********************************************
297
-
298
- v1.0.0 of PDF::Reader introduced a new page-based API. There are extensive
299
- examples showing how to use it in the README and examples directory.
300
-
301
- For detailed documentation, check the rdocs for the PDF::Reader,
302
- PDF::Reader::Page and PDF::Reader::ObjectHash classes.
303
-
304
- The old API is marked as deprecated but will continue to work with no
305
- visible warnings for now.
306
-
307
- ********************************************
308
-
275
+ post_install_message:
309
276
  rdoc_options:
310
277
  - "--title"
311
278
  - PDF::Reader Documentation
312
279
  - "--main"
313
- - README.rdoc
280
+ - README.md
314
281
  - "-q"
315
282
  require_paths:
316
283
  - lib
@@ -321,9 +288,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
321
288
  version: 1.9.3
322
289
  required_rubygems_version: !ruby/object:Gem::Requirement
323
290
  requirements:
324
- - - ">="
291
+ - - ">"
325
292
  - !ruby/object:Gem::Version
326
- version: '0'
293
+ version: 1.3.1
327
294
  requirements: []
328
295
  rubyforge_project:
329
296
  rubygems_version: 2.5.2
@@ -1,17 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # this executable is deprecated, use pdf_callbacks instead
4
-
5
- require 'rubygems'
6
-
7
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../lib")
8
-
9
- require 'pdf/reader'
10
-
11
- receiver = PDF::Reader::PrintReceiver.new
12
-
13
- if ARGV.empty?
14
- PDF::Reader.new.parse($stdin, receiver)
15
- else
16
- PDF::Reader.file(ARGV[0], receiver)
17
- end
@@ -1,81 +0,0 @@
1
- # coding: utf-8
2
-
3
- class PDF::Reader
4
-
5
- # DEPRECATED: this class was deprecated in version 0.11.0 and will
6
- # eventually be removed
7
- class AbstractStrategy # :nodoc:
8
-
9
- def initialize(ohash, receivers, options = {})
10
- @ohash, @options = ohash, options
11
- if receivers.is_a?(Array)
12
- @receivers = receivers
13
- else
14
- @receivers = [receivers]
15
- end
16
- end
17
-
18
- private
19
-
20
- def options
21
- @options || {}
22
- end
23
-
24
- # calls the name callback method on the receiver class with params as the arguments
25
- #
26
- def callback(name, params=[])
27
- @receivers.each do |receiver|
28
- receiver.send(name, *params) if receiver.respond_to?(name)
29
- end
30
- end
31
-
32
- # strings outside of page content should be in either PDFDocEncoding or UTF-16.
33
- def decode_strings(obj)
34
- case obj
35
- when String then
36
- if obj[0,2].unpack("C*").slice(0,2) == [254,255]
37
- PDF::Reader::Encoding.new(:UTF16Encoding).to_utf8(obj[2, obj.size])
38
- else
39
- PDF::Reader::Encoding.new(:PDFDocEncoding).to_utf8(obj)
40
- end
41
- when Hash then obj.each { |key,val| obj[key] = decode_strings(val) }
42
- when Array then obj.collect { |item| decode_strings(item) }
43
- else
44
- obj
45
- end
46
- end
47
-
48
- def info
49
- ohash.object(trailer[:Info])
50
- end
51
-
52
- def info?
53
- info ? true : false
54
- end
55
-
56
- def ohash
57
- @ohash
58
- end
59
-
60
- def pages
61
- ohash.object(root[:Pages])
62
- end
63
-
64
- def pages?
65
- pages ? true : false
66
- end
67
-
68
- def root
69
- ohash.object(trailer[:Root])
70
- end
71
-
72
- def root?
73
- root ? true : false
74
- end
75
-
76
- def trailer
77
- ohash.trailer
78
- end
79
-
80
- end
81
- end
@@ -1,56 +0,0 @@
1
- # coding: utf-8
2
-
3
- class PDF::Reader
4
-
5
- # DEPRECATED: this class was deprecated in version 0.11.0 and will
6
- # eventually be removed
7
- #
8
- class MetadataStrategy < AbstractStrategy # :nodoc:
9
-
10
- def self.to_sym
11
- :metadata
12
- end
13
-
14
- def process
15
- return false unless options[:metadata]
16
-
17
- # may be useful to some people
18
- callback(:pdf_version, ohash.pdf_version)
19
-
20
- # ye olde metadata
21
- callback(:metadata, [decoded_info]) if info?
22
-
23
- # new style xml metadata
24
- callback(:xml_metadata, [xml_metadata]) if xml_metadata?
25
-
26
- # page count
27
- if pages?
28
- count = ohash.object(pages[:Count])
29
- callback(:page_count, count.to_i)
30
- end
31
- end
32
-
33
- private
34
-
35
- def xml_metadata
36
- return @xml_metadata if defined?(@xml_metadata)
37
-
38
- if root[:Metadata].nil?
39
- @xml_metadata = nil
40
- else
41
- string = ohash.object(root[:Metadata]).unfiltered_data
42
- string.force_encoding("utf-8") if string.respond_to?(:force_encoding)
43
- @xml_metadata = string
44
- end
45
- end
46
-
47
- def xml_metadata?
48
- xml_metadata ? true : false
49
- end
50
-
51
- def decoded_info
52
- @decoded_info ||= decode_strings(info)
53
- end
54
-
55
- end
56
- end