pdf-reader 1.4.1 → 2.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -118,6 +118,7 @@ class PDF::Reader
118
118
  loop do
119
119
  key = parse_token
120
120
  break if key.kind_of?(Token) and key == ">>"
121
+ raise MalformedPDFError, "unterminated dict" if @buffer.empty?
121
122
  raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
122
123
 
123
124
  value = parse_token
@@ -131,7 +132,6 @@ class PDF::Reader
131
132
  # reads a PDF name from the buffer and converts it to a Ruby Symbol
132
133
  def pdf_name
133
134
  tok = @buffer.token
134
- tok = " " if tok == "" && RUBY_VERSION < "1.9"
135
135
  tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match|
136
136
  match[1, 2].hex.chr
137
137
  end
@@ -145,6 +145,7 @@ class PDF::Reader
145
145
  loop do
146
146
  item = parse_token
147
147
  break if item.kind_of?(Token) and item == "]"
148
+ raise MalformedPDFError, "unterminated array" if @buffer.empty?
148
149
  a << item
149
150
  end
150
151
 
@@ -158,24 +159,25 @@ class PDF::Reader
158
159
  loop do
159
160
  token = @buffer.token
160
161
  break if token == ">"
162
+ raise MalformedPDFError, "unterminated hex string" if @buffer.empty?
161
163
  str << token
162
164
  end
163
165
 
164
166
  # add a missing digit if required, as required by the spec
165
167
  str << "0" unless str.size % 2 == 0
166
- str.scan(/../).map {|i| i.hex.chr}.join
168
+ str.scan(/../).map {|i| i.hex.chr}.join.force_encoding("binary")
167
169
  end
168
170
  ################################################################################
169
171
  # Reads a PDF String from the buffer and converts it to a Ruby String
170
172
  def string
171
173
  str = @buffer.token
172
- return "" if str == ")"
174
+ return "".force_encoding("binary") if str == ")"
173
175
  Error.assert_equal(parse_token, ")")
174
176
 
175
177
  str.gsub!(/\\([nrtbf()\\\n]|\d{1,3})?|\r\n?|\n\r/m) do |match|
176
178
  MAPPING[match] || ""
177
179
  end
178
- str
180
+ str.force_encoding("binary")
179
181
  end
180
182
 
181
183
  MAPPING = {
@@ -82,7 +82,7 @@ class PDF::Reader
82
82
  (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
83
83
  (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
84
84
  length = objKey.length < 16 ? objKey.length : 16
85
- rc4 = RC4.new( Digest::MD5.digest(objKey)[(0...length)] )
85
+ rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
86
86
  rc4.decrypt(buf)
87
87
  end
88
88
 
@@ -94,7 +94,7 @@ class PDF::Reader
94
94
  if p.nil? || p.empty?
95
95
  PassPadBytes.pack('C*')
96
96
  else
97
- p[(0...32)] + PassPadBytes[0...(32-p.length)].pack('C*')
97
+ p[0, 32] + PassPadBytes[0, 32-p.length].pack('C*')
98
98
  end
99
99
  end
100
100
 
@@ -118,13 +118,13 @@ class PDF::Reader
118
118
  md5 = Digest::MD5.digest(pad_pass(pass))
119
119
  if @revision > 2 then
120
120
  50.times { md5 = Digest::MD5.digest(md5) }
121
- keyBegins = md5[(0...@key_length)]
121
+ keyBegins = md5[0, key_length]
122
122
  #first itteration decrypt owner_key
123
123
  out = @owner_key
124
124
  #RC4 keyed with (keyBegins XOR with itteration #) to decrypt previous out
125
125
  19.downto(0).each { |i| out=RC4.new(xor_each_byte(keyBegins,i)).decrypt(out) }
126
126
  else
127
- out = RC4.new( md5[(0...5)] ).decrypt( @owner_key )
127
+ out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
128
128
  end
129
129
  # c) check output as user password
130
130
  auth_user_pass( out )
@@ -142,12 +142,12 @@ class PDF::Reader
142
142
  #
143
143
  def auth_user_pass(pass)
144
144
  keyBegins = make_file_key(pass)
145
- if @revision > 2
145
+ if @revision >= 3
146
146
  #initialize out for first iteration
147
147
  out = Digest::MD5.digest(PassPadBytes.pack("C*") + @file_id)
148
148
  #zero doesn't matter -> so from 0-19
149
- 20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).decrypt(out) }
150
- pass = @user_key[(0...16)] == out
149
+ 20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).encrypt(out) }
150
+ pass = @user_key[0, 16] == out
151
151
  else
152
152
  pass = RC4.new(keyBegins).encrypt(PassPadBytes.pack("C*")) == @user_key
153
153
  end
@@ -163,20 +163,24 @@ class PDF::Reader
163
163
  (0..24).step(8){|e| @buf << (@permissions >> e & 0xFF)}
164
164
  # e) add the file ID
165
165
  @buf << @file_id
166
- # f) if revision > 4 then if encryptMetadata add 4 bytes of 0x00 else add 4 bytes of 0xFF
167
- if @revision > 4
168
- @buf << [ @encryptMetadata ? 0x00 : 0xFF ].pack('C')*4
166
+ # f) if revision >= 4 and metadata not encrypted then add 4 bytes of 0xFF
167
+ if @revision >= 4 && !@encryptMeta
168
+ @buf << [0xFF,0xFF,0xFF,0xFF].pack('C*')
169
169
  end
170
170
  # b) init MD5 digest + g) finish the hash
171
171
  md5 = Digest::MD5.digest(@buf)
172
172
  # h) spin hash 50 times
173
- if @revision > 2
173
+ if @revision >= 3
174
174
  50.times {
175
- md5 = Digest::MD5.digest(md5[(0...@key_length)])
175
+ md5 = Digest::MD5.digest(md5[0, @key_length])
176
176
  }
177
177
  end
178
- # i) n = key_length revision > 3, n = 5 revision == 2
179
- md5[(0...((@revision < 3) ? 5 : @key_length))]
178
+ # i) n = key_length revision >= 3, n = 5 revision == 2
179
+ if @revision < 3
180
+ md5[0, 5]
181
+ else
182
+ md5[0, @key_length]
183
+ end
180
184
  end
181
185
 
182
186
  def build_standard_key(pass)
@@ -65,16 +65,10 @@ class PDF::Reader
65
65
  @mergable_range ||= Range.new(endx - 3, endx + font_size)
66
66
  end
67
67
 
68
+ # Assume string encoding is marked correctly and we can trust String#size to return a
69
+ # character count
68
70
  def character_count
69
- if @text.size == 1
70
- 1.0
71
- elsif @text.respond_to?(:bytesize)
72
- # M17N aware VM
73
- # so we can trust String#size to return a character count
74
- @text.size.to_f
75
- else
76
- text.unpack("U*").size.to_f
77
- end
71
+ @text.size.to_f
78
72
  end
79
73
  end
80
74
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.1
4
+ version: 2.0.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-01-01 00:00:00.000000000 Z
11
+ date: 2017-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -30,28 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '3.4'
33
+ version: '3.5'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '3.4'
41
- - !ruby/object:Gem::Dependency
42
- name: ZenTest
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: 4.4.2
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: 4.4.2
40
+ version: '3.5'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: cane
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -70,16 +56,16 @@ dependencies:
70
56
  name: morecane
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
- - - ">="
59
+ - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '0'
61
+ version: '0.2'
76
62
  type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
- - - ">="
66
+ - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '0'
68
+ version: '0.2'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: ir_b
85
71
  requirement: !ruby/object:Gem::Requirement
@@ -185,22 +171,20 @@ email:
185
171
  executables:
186
172
  - pdf_object
187
173
  - pdf_text
188
- - pdf_list_callbacks
189
174
  - pdf_callbacks
190
175
  extensions: []
191
176
  extra_rdoc_files:
192
- - README.rdoc
177
+ - README.md
193
178
  - TODO
194
179
  - CHANGELOG
195
180
  - MIT-LICENSE
196
181
  files:
197
182
  - CHANGELOG
198
183
  - MIT-LICENSE
199
- - README.rdoc
184
+ - README.md
200
185
  - Rakefile
201
186
  - TODO
202
187
  - bin/pdf_callbacks
203
- - bin/pdf_list_callbacks
204
188
  - bin/pdf_object
205
189
  - bin/pdf_text
206
190
  - examples/callbacks.rb
@@ -217,7 +201,6 @@ files:
217
201
  - lib/pdf-reader.rb
218
202
  - lib/pdf/hash.rb
219
203
  - lib/pdf/reader.rb
220
- - lib/pdf/reader/abstract_strategy.rb
221
204
  - lib/pdf/reader/afm/Courier-Bold.afm
222
205
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
223
206
  - lib/pdf/reader/afm/Courier-Oblique.afm
@@ -258,7 +241,6 @@ files:
258
241
  - lib/pdf/reader/glyph_hash.rb
259
242
  - lib/pdf/reader/glyphlist.txt
260
243
  - lib/pdf/reader/lzw.rb
261
- - lib/pdf/reader/metadata_strategy.rb
262
244
  - lib/pdf/reader/object_cache.rb
263
245
  - lib/pdf/reader/object_hash.rb
264
246
  - lib/pdf/reader/object_stream.rb
@@ -276,7 +258,6 @@ files:
276
258
  - lib/pdf/reader/standard_security_handler.rb
277
259
  - lib/pdf/reader/stream.rb
278
260
  - lib/pdf/reader/synchronized_cache.rb
279
- - lib/pdf/reader/text_receiver.rb
280
261
  - lib/pdf/reader/text_run.rb
281
262
  - lib/pdf/reader/token.rb
282
263
  - lib/pdf/reader/transformation_matrix.rb
@@ -291,26 +272,12 @@ homepage: http://github.com/yob/pdf-reader
291
272
  licenses:
292
273
  - MIT
293
274
  metadata: {}
294
- post_install_message: |2+
295
-
296
- ********************************************
297
-
298
- v1.0.0 of PDF::Reader introduced a new page-based API. There are extensive
299
- examples showing how to use it in the README and examples directory.
300
-
301
- For detailed documentation, check the rdocs for the PDF::Reader,
302
- PDF::Reader::Page and PDF::Reader::ObjectHash classes.
303
-
304
- The old API is marked as deprecated but will continue to work with no
305
- visible warnings for now.
306
-
307
- ********************************************
308
-
275
+ post_install_message:
309
276
  rdoc_options:
310
277
  - "--title"
311
278
  - PDF::Reader Documentation
312
279
  - "--main"
313
- - README.rdoc
280
+ - README.md
314
281
  - "-q"
315
282
  require_paths:
316
283
  - lib
@@ -321,9 +288,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
321
288
  version: 1.9.3
322
289
  required_rubygems_version: !ruby/object:Gem::Requirement
323
290
  requirements:
324
- - - ">="
291
+ - - ">"
325
292
  - !ruby/object:Gem::Version
326
- version: '0'
293
+ version: 1.3.1
327
294
  requirements: []
328
295
  rubyforge_project:
329
296
  rubygems_version: 2.5.2
@@ -1,17 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # this executable is deprecated, use pdf_callbacks instead
4
-
5
- require 'rubygems'
6
-
7
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../lib")
8
-
9
- require 'pdf/reader'
10
-
11
- receiver = PDF::Reader::PrintReceiver.new
12
-
13
- if ARGV.empty?
14
- PDF::Reader.new.parse($stdin, receiver)
15
- else
16
- PDF::Reader.file(ARGV[0], receiver)
17
- end
@@ -1,81 +0,0 @@
1
- # coding: utf-8
2
-
3
- class PDF::Reader
4
-
5
- # DEPRECATED: this class was deprecated in version 0.11.0 and will
6
- # eventually be removed
7
- class AbstractStrategy # :nodoc:
8
-
9
- def initialize(ohash, receivers, options = {})
10
- @ohash, @options = ohash, options
11
- if receivers.is_a?(Array)
12
- @receivers = receivers
13
- else
14
- @receivers = [receivers]
15
- end
16
- end
17
-
18
- private
19
-
20
- def options
21
- @options || {}
22
- end
23
-
24
- # calls the name callback method on the receiver class with params as the arguments
25
- #
26
- def callback(name, params=[])
27
- @receivers.each do |receiver|
28
- receiver.send(name, *params) if receiver.respond_to?(name)
29
- end
30
- end
31
-
32
- # strings outside of page content should be in either PDFDocEncoding or UTF-16.
33
- def decode_strings(obj)
34
- case obj
35
- when String then
36
- if obj[0,2].unpack("C*").slice(0,2) == [254,255]
37
- PDF::Reader::Encoding.new(:UTF16Encoding).to_utf8(obj[2, obj.size])
38
- else
39
- PDF::Reader::Encoding.new(:PDFDocEncoding).to_utf8(obj)
40
- end
41
- when Hash then obj.each { |key,val| obj[key] = decode_strings(val) }
42
- when Array then obj.collect { |item| decode_strings(item) }
43
- else
44
- obj
45
- end
46
- end
47
-
48
- def info
49
- ohash.object(trailer[:Info])
50
- end
51
-
52
- def info?
53
- info ? true : false
54
- end
55
-
56
- def ohash
57
- @ohash
58
- end
59
-
60
- def pages
61
- ohash.object(root[:Pages])
62
- end
63
-
64
- def pages?
65
- pages ? true : false
66
- end
67
-
68
- def root
69
- ohash.object(trailer[:Root])
70
- end
71
-
72
- def root?
73
- root ? true : false
74
- end
75
-
76
- def trailer
77
- ohash.trailer
78
- end
79
-
80
- end
81
- end
@@ -1,56 +0,0 @@
1
- # coding: utf-8
2
-
3
- class PDF::Reader
4
-
5
- # DEPRECATED: this class was deprecated in version 0.11.0 and will
6
- # eventually be removed
7
- #
8
- class MetadataStrategy < AbstractStrategy # :nodoc:
9
-
10
- def self.to_sym
11
- :metadata
12
- end
13
-
14
- def process
15
- return false unless options[:metadata]
16
-
17
- # may be useful to some people
18
- callback(:pdf_version, ohash.pdf_version)
19
-
20
- # ye olde metadata
21
- callback(:metadata, [decoded_info]) if info?
22
-
23
- # new style xml metadata
24
- callback(:xml_metadata, [xml_metadata]) if xml_metadata?
25
-
26
- # page count
27
- if pages?
28
- count = ohash.object(pages[:Count])
29
- callback(:page_count, count.to_i)
30
- end
31
- end
32
-
33
- private
34
-
35
- def xml_metadata
36
- return @xml_metadata if defined?(@xml_metadata)
37
-
38
- if root[:Metadata].nil?
39
- @xml_metadata = nil
40
- else
41
- string = ohash.object(root[:Metadata]).unfiltered_data
42
- string.force_encoding("utf-8") if string.respond_to?(:force_encoding)
43
- @xml_metadata = string
44
- end
45
- end
46
-
47
- def xml_metadata?
48
- xml_metadata ? true : false
49
- end
50
-
51
- def decoded_info
52
- @decoded_info ||= decode_strings(info)
53
- end
54
-
55
- end
56
- end