pdf-reader 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,11 @@
1
+ v0.9.2 (24th April 2011)
2
+ - add basic support for fonts with Identity-V encoding.
3
+ - bug: improve robustness of text extraction
4
+ - thanks to Evan Arnold for reporting
5
+ - bug: fix loading of nested resources on XObjects
6
+ - thanks to Samuel Williams for reporting
7
+ - bug: improve parsing of files with XRef object streams
8
+
1
9
  v0.9.1 (21st December 2010)
2
10
  - force gem to only install on ruby 1.8.7 or higher
3
11
  - maintaining supprot for earlier versions takes more time than I have
@@ -116,7 +116,7 @@ class PDF::Reader
116
116
  def original_codepoint_to_unicode(cp, tounicode = nil)
117
117
  if tounicode && (code = tounicode.decode(cp))
118
118
  code
119
- elsif tounicode || ( tounicode.nil? && to_unicode_required? )
119
+ elsif to_unicode_required? && (tounicode.nil? || tounicode.decode(cp).nil?)
120
120
  PDF::Reader::Encoding::UNKNOWN_CHAR
121
121
  elsif mapping[cp]
122
122
  mapping[cp]
@@ -129,7 +129,7 @@ class PDF::Reader
129
129
 
130
130
  def get_unpack(enc)
131
131
  case enc
132
- when :"Identity-H", :UTF16Encoding
132
+ when :"Identity-H", :"Identity-V", :UTF16Encoding
133
133
  "n*"
134
134
  else
135
135
  "C*"
@@ -140,6 +140,7 @@ class PDF::Reader
140
140
  return File.dirname(__FILE__) + "/encodings/standard.txt" if enc.nil?
141
141
  files = {
142
142
  :"Identity-H" => nil,
143
+ :"Identity-V" => nil,
143
144
  :MacRomanEncoding => File.dirname(__FILE__) + "/encodings/mac_roman.txt",
144
145
  :MacExpertEncoding => File.dirname(__FILE__) + "/encodings/mac_expert.txt",
145
146
  :PDFDocEncoding => File.dirname(__FILE__) + "/encodings/pdf_doc.txt",
@@ -158,7 +159,7 @@ class PDF::Reader
158
159
  end
159
160
 
160
161
  def unicode_required?(enc)
161
- enc == :"Identity-H"
162
+ enc == :"Identity-H" or enc == :"Identity-V"
162
163
  end
163
164
 
164
165
  def mapping
@@ -310,11 +310,15 @@ class PDF::Reader
310
310
 
311
311
  if xobject && xobject.hash[:Subtype] == :Form
312
312
  callback(:begin_form_xobject)
313
- resources = @ohash.object(xobject.hash[:Resources])
314
- walk_resources(resources) if resources
315
- fonts = font_hash_from_resources(resources)
313
+ xobj_resources = @ohash.object(xobject.hash[:Resources])
314
+ if xobj_resources
315
+ resources.push xobj_resources
316
+ walk_resources(xobj_resources)
317
+ end
318
+ fonts = font_hash_from_resources(xobj_resources)
316
319
  content_stream(xobject, fonts)
317
320
  callback(:end_form_xobject)
321
+ resources.pop if xobj_resources
318
322
  end
319
323
  end
320
324
 
@@ -439,8 +443,11 @@ class PDF::Reader
439
443
  obj
440
444
  when PDF::Reader::Reference then
441
445
  resolve_references(@ohash.object(obj))
442
- when Hash then obj.each { |key,val| obj[key] = resolve_references(val) }
443
- when Array then obj.collect { |item| resolve_references(item) }
446
+ when Hash then
447
+ arr = obj.map { |key,val| [key, resolve_references(val)] }.flatten(1)
448
+ Hash[*arr]
449
+ when Array then
450
+ obj.collect { |item| resolve_references(item) }
444
451
  else
445
452
  obj
446
453
  end
@@ -154,23 +154,26 @@ class PDF::Reader
154
154
  trailer[:Info] = stream.hash[:Info] if stream.hash[:Info]
155
155
  trailer[:Prev] = stream.hash[:Prev] if stream.hash[:Prev]
156
156
 
157
- widths = stream.hash[:W]
157
+ widths = stream.hash[:W]
158
158
  entry_length = widths.inject(0) { |s, w| s + w }
159
- raw_data = stream.unfiltered_data
159
+ raw_data = StringIO.new(stream.unfiltered_data)
160
160
  if stream.hash[:Index]
161
- index = stream.hash[:Index][0]
161
+ index = stream.hash[:Index]
162
162
  else
163
- index = 0
163
+ index = [0, stream.hash[:Size]]
164
164
  end
165
- stream.hash[:Size].times do |i|
166
- entry = raw_data[i*entry_length, entry_length] || ""
167
- f1 = unpack_bytes(entry[0,widths[0]])
168
- f2 = unpack_bytes(entry[widths[0],widths[1]])
169
- f3 = unpack_bytes(entry[widths[0]+widths[1],widths[2]])
170
- if f1 == 1
171
- store(index + i, f3, f2)
172
- elsif f1 == 2
173
- store(index + i, 0, PDF::Reader::Reference.new(f2, 0))
165
+ index.each_slice(2) do |start_id, size|
166
+ obj_ids = (start_id..(start_id+(size-1)))
167
+ obj_ids.each do |objid|
168
+ entry = raw_data.read(entry_length) || ""
169
+ f1 = unpack_bytes(entry[0,widths[0]])
170
+ f2 = unpack_bytes(entry[widths[0],widths[1]])
171
+ f3 = unpack_bytes(entry[widths[0]+widths[1],widths[2]])
172
+ if f1 == 1 && f2 > 0
173
+ store(objid, f3, f2)
174
+ elsif f1 == 2 && f2 > 0
175
+ store(objid, 0, PDF::Reader::Reference.new(f2, 0))
176
+ end
174
177
  end
175
178
  end
176
179
 
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- hash: 57
5
4
  prerelease: false
6
5
  segments:
7
6
  - 0
8
7
  - 9
9
- - 1
10
- version: 0.9.1
8
+ - 2
9
+ version: 0.9.2
11
10
  platform: ruby
12
11
  authors:
13
12
  - James Healy
@@ -15,7 +14,7 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-12-21 00:00:00 +11:00
17
+ date: 2011-04-24 00:00:00 +10:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
@@ -26,7 +25,6 @@ dependencies:
26
25
  requirements:
27
26
  - - ">="
28
27
  - !ruby/object:Gem::Version
29
- hash: 3
30
28
  segments:
31
29
  - 0
32
30
  version: "0"
@@ -40,7 +38,6 @@ dependencies:
40
38
  requirements:
41
39
  - - ">="
42
40
  - !ruby/object:Gem::Version
43
- hash: 3
44
41
  segments:
45
42
  - 0
46
43
  version: "0"
@@ -54,7 +51,6 @@ dependencies:
54
51
  requirements:
55
52
  - - ~>
56
53
  - !ruby/object:Gem::Version
57
- hash: 1
58
54
  segments:
59
55
  - 2
60
56
  - 1
@@ -69,7 +65,6 @@ dependencies:
69
65
  requirements:
70
66
  - - ">="
71
67
  - !ruby/object:Gem::Version
72
- hash: 25
73
68
  segments:
74
69
  - 0
75
70
  - 9
@@ -158,7 +153,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
158
153
  requirements:
159
154
  - - ">="
160
155
  - !ruby/object:Gem::Version
161
- hash: 57
162
156
  segments:
163
157
  - 1
164
158
  - 8
@@ -169,7 +163,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
169
163
  requirements:
170
164
  - - ">="
171
165
  - !ruby/object:Gem::Version
172
- hash: 3
173
166
  segments:
174
167
  - 0
175
168
  version: "0"