pdf-reader 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +8 -0
- data/lib/pdf/reader/encoding.rb +4 -3
- data/lib/pdf/reader/pages_strategy.rb +12 -5
- data/lib/pdf/reader/xref.rb +16 -13
- metadata +3 -10
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
v0.9.2 (24th April 2011)
|
2
|
+
- add basic support for fonts with Identity-V encoding.
|
3
|
+
- bug: improve robustness of text extraction
|
4
|
+
- thanks to Evan Arnold for reporting
|
5
|
+
- bug: fix loading of nested resources on XObjects
|
6
|
+
- thanks to Samuel Williams for reporting
|
7
|
+
- bug: improve parsing of files with XRef object streams
|
8
|
+
|
1
9
|
v0.9.1 (21st December 2010)
|
2
10
|
- force gem to only install on ruby 1.8.7 or higher
|
3
11
|
- maintaining supprot for earlier versions takes more time than I have
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -116,7 +116,7 @@ class PDF::Reader
|
|
116
116
|
def original_codepoint_to_unicode(cp, tounicode = nil)
|
117
117
|
if tounicode && (code = tounicode.decode(cp))
|
118
118
|
code
|
119
|
-
elsif
|
119
|
+
elsif to_unicode_required? && (tounicode.nil? || tounicode.decode(cp).nil?)
|
120
120
|
PDF::Reader::Encoding::UNKNOWN_CHAR
|
121
121
|
elsif mapping[cp]
|
122
122
|
mapping[cp]
|
@@ -129,7 +129,7 @@ class PDF::Reader
|
|
129
129
|
|
130
130
|
def get_unpack(enc)
|
131
131
|
case enc
|
132
|
-
when :"Identity-H", :UTF16Encoding
|
132
|
+
when :"Identity-H", :"Identity-V", :UTF16Encoding
|
133
133
|
"n*"
|
134
134
|
else
|
135
135
|
"C*"
|
@@ -140,6 +140,7 @@ class PDF::Reader
|
|
140
140
|
return File.dirname(__FILE__) + "/encodings/standard.txt" if enc.nil?
|
141
141
|
files = {
|
142
142
|
:"Identity-H" => nil,
|
143
|
+
:"Identity-V" => nil,
|
143
144
|
:MacRomanEncoding => File.dirname(__FILE__) + "/encodings/mac_roman.txt",
|
144
145
|
:MacExpertEncoding => File.dirname(__FILE__) + "/encodings/mac_expert.txt",
|
145
146
|
:PDFDocEncoding => File.dirname(__FILE__) + "/encodings/pdf_doc.txt",
|
@@ -158,7 +159,7 @@ class PDF::Reader
|
|
158
159
|
end
|
159
160
|
|
160
161
|
def unicode_required?(enc)
|
161
|
-
enc == :"Identity-H"
|
162
|
+
enc == :"Identity-H" or enc == :"Identity-V"
|
162
163
|
end
|
163
164
|
|
164
165
|
def mapping
|
@@ -310,11 +310,15 @@ class PDF::Reader
|
|
310
310
|
|
311
311
|
if xobject && xobject.hash[:Subtype] == :Form
|
312
312
|
callback(:begin_form_xobject)
|
313
|
-
|
314
|
-
|
315
|
-
|
313
|
+
xobj_resources = @ohash.object(xobject.hash[:Resources])
|
314
|
+
if xobj_resources
|
315
|
+
resources.push xobj_resources
|
316
|
+
walk_resources(xobj_resources)
|
317
|
+
end
|
318
|
+
fonts = font_hash_from_resources(xobj_resources)
|
316
319
|
content_stream(xobject, fonts)
|
317
320
|
callback(:end_form_xobject)
|
321
|
+
resources.pop if xobj_resources
|
318
322
|
end
|
319
323
|
end
|
320
324
|
|
@@ -439,8 +443,11 @@ class PDF::Reader
|
|
439
443
|
obj
|
440
444
|
when PDF::Reader::Reference then
|
441
445
|
resolve_references(@ohash.object(obj))
|
442
|
-
when Hash then
|
443
|
-
|
446
|
+
when Hash then
|
447
|
+
arr = obj.map { |key,val| [key, resolve_references(val)] }.flatten(1)
|
448
|
+
Hash[*arr]
|
449
|
+
when Array then
|
450
|
+
obj.collect { |item| resolve_references(item) }
|
444
451
|
else
|
445
452
|
obj
|
446
453
|
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -154,23 +154,26 @@ class PDF::Reader
|
|
154
154
|
trailer[:Info] = stream.hash[:Info] if stream.hash[:Info]
|
155
155
|
trailer[:Prev] = stream.hash[:Prev] if stream.hash[:Prev]
|
156
156
|
|
157
|
-
widths
|
157
|
+
widths = stream.hash[:W]
|
158
158
|
entry_length = widths.inject(0) { |s, w| s + w }
|
159
|
-
raw_data
|
159
|
+
raw_data = StringIO.new(stream.unfiltered_data)
|
160
160
|
if stream.hash[:Index]
|
161
|
-
index = stream.hash[:Index]
|
161
|
+
index = stream.hash[:Index]
|
162
162
|
else
|
163
|
-
index = 0
|
163
|
+
index = [0, stream.hash[:Size]]
|
164
164
|
end
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
165
|
+
index.each_slice(2) do |start_id, size|
|
166
|
+
obj_ids = (start_id..(start_id+(size-1)))
|
167
|
+
obj_ids.each do |objid|
|
168
|
+
entry = raw_data.read(entry_length) || ""
|
169
|
+
f1 = unpack_bytes(entry[0,widths[0]])
|
170
|
+
f2 = unpack_bytes(entry[widths[0],widths[1]])
|
171
|
+
f3 = unpack_bytes(entry[widths[0]+widths[1],widths[2]])
|
172
|
+
if f1 == 1 && f2 > 0
|
173
|
+
store(objid, f3, f2)
|
174
|
+
elsif f1 == 2 && f2 > 0
|
175
|
+
store(objid, 0, PDF::Reader::Reference.new(f2, 0))
|
176
|
+
end
|
174
177
|
end
|
175
178
|
end
|
176
179
|
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 57
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 0
|
8
7
|
- 9
|
9
|
-
-
|
10
|
-
version: 0.9.
|
8
|
+
- 2
|
9
|
+
version: 0.9.2
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- James Healy
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date:
|
17
|
+
date: 2011-04-24 00:00:00 +10:00
|
19
18
|
default_executable:
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
@@ -26,7 +25,6 @@ dependencies:
|
|
26
25
|
requirements:
|
27
26
|
- - ">="
|
28
27
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 3
|
30
28
|
segments:
|
31
29
|
- 0
|
32
30
|
version: "0"
|
@@ -40,7 +38,6 @@ dependencies:
|
|
40
38
|
requirements:
|
41
39
|
- - ">="
|
42
40
|
- !ruby/object:Gem::Version
|
43
|
-
hash: 3
|
44
41
|
segments:
|
45
42
|
- 0
|
46
43
|
version: "0"
|
@@ -54,7 +51,6 @@ dependencies:
|
|
54
51
|
requirements:
|
55
52
|
- - ~>
|
56
53
|
- !ruby/object:Gem::Version
|
57
|
-
hash: 1
|
58
54
|
segments:
|
59
55
|
- 2
|
60
56
|
- 1
|
@@ -69,7 +65,6 @@ dependencies:
|
|
69
65
|
requirements:
|
70
66
|
- - ">="
|
71
67
|
- !ruby/object:Gem::Version
|
72
|
-
hash: 25
|
73
68
|
segments:
|
74
69
|
- 0
|
75
70
|
- 9
|
@@ -158,7 +153,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
158
153
|
requirements:
|
159
154
|
- - ">="
|
160
155
|
- !ruby/object:Gem::Version
|
161
|
-
hash: 57
|
162
156
|
segments:
|
163
157
|
- 1
|
164
158
|
- 8
|
@@ -169,7 +163,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
169
163
|
requirements:
|
170
164
|
- - ">="
|
171
165
|
- !ruby/object:Gem::Version
|
172
|
-
hash: 3
|
173
166
|
segments:
|
174
167
|
- 0
|
175
168
|
version: "0"
|