pdf-reader 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +8 -0
- data/lib/pdf/reader/encoding.rb +4 -3
- data/lib/pdf/reader/pages_strategy.rb +12 -5
- data/lib/pdf/reader/xref.rb +16 -13
- metadata +3 -10
data/CHANGELOG
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
v0.9.2 (24th April 2011)
|
2
|
+
- add basic support for fonts with Identity-V encoding.
|
3
|
+
- bug: improve robustness of text extraction
|
4
|
+
- thanks to Evan Arnold for reporting
|
5
|
+
- bug: fix loading of nested resources on XObjects
|
6
|
+
- thanks to Samuel Williams for reporting
|
7
|
+
- bug: improve parsing of files with XRef object streams
|
8
|
+
|
1
9
|
v0.9.1 (21st December 2010)
|
2
10
|
- force gem to only install on ruby 1.8.7 or higher
|
3
11
|
- maintaining supprot for earlier versions takes more time than I have
|
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -116,7 +116,7 @@ class PDF::Reader
|
|
116
116
|
def original_codepoint_to_unicode(cp, tounicode = nil)
|
117
117
|
if tounicode && (code = tounicode.decode(cp))
|
118
118
|
code
|
119
|
-
elsif
|
119
|
+
elsif to_unicode_required? && (tounicode.nil? || tounicode.decode(cp).nil?)
|
120
120
|
PDF::Reader::Encoding::UNKNOWN_CHAR
|
121
121
|
elsif mapping[cp]
|
122
122
|
mapping[cp]
|
@@ -129,7 +129,7 @@ class PDF::Reader
|
|
129
129
|
|
130
130
|
def get_unpack(enc)
|
131
131
|
case enc
|
132
|
-
when :"Identity-H", :UTF16Encoding
|
132
|
+
when :"Identity-H", :"Identity-V", :UTF16Encoding
|
133
133
|
"n*"
|
134
134
|
else
|
135
135
|
"C*"
|
@@ -140,6 +140,7 @@ class PDF::Reader
|
|
140
140
|
return File.dirname(__FILE__) + "/encodings/standard.txt" if enc.nil?
|
141
141
|
files = {
|
142
142
|
:"Identity-H" => nil,
|
143
|
+
:"Identity-V" => nil,
|
143
144
|
:MacRomanEncoding => File.dirname(__FILE__) + "/encodings/mac_roman.txt",
|
144
145
|
:MacExpertEncoding => File.dirname(__FILE__) + "/encodings/mac_expert.txt",
|
145
146
|
:PDFDocEncoding => File.dirname(__FILE__) + "/encodings/pdf_doc.txt",
|
@@ -158,7 +159,7 @@ class PDF::Reader
|
|
158
159
|
end
|
159
160
|
|
160
161
|
def unicode_required?(enc)
|
161
|
-
enc == :"Identity-H"
|
162
|
+
enc == :"Identity-H" or enc == :"Identity-V"
|
162
163
|
end
|
163
164
|
|
164
165
|
def mapping
|
@@ -310,11 +310,15 @@ class PDF::Reader
|
|
310
310
|
|
311
311
|
if xobject && xobject.hash[:Subtype] == :Form
|
312
312
|
callback(:begin_form_xobject)
|
313
|
-
|
314
|
-
|
315
|
-
|
313
|
+
xobj_resources = @ohash.object(xobject.hash[:Resources])
|
314
|
+
if xobj_resources
|
315
|
+
resources.push xobj_resources
|
316
|
+
walk_resources(xobj_resources)
|
317
|
+
end
|
318
|
+
fonts = font_hash_from_resources(xobj_resources)
|
316
319
|
content_stream(xobject, fonts)
|
317
320
|
callback(:end_form_xobject)
|
321
|
+
resources.pop if xobj_resources
|
318
322
|
end
|
319
323
|
end
|
320
324
|
|
@@ -439,8 +443,11 @@ class PDF::Reader
|
|
439
443
|
obj
|
440
444
|
when PDF::Reader::Reference then
|
441
445
|
resolve_references(@ohash.object(obj))
|
442
|
-
when Hash then
|
443
|
-
|
446
|
+
when Hash then
|
447
|
+
arr = obj.map { |key,val| [key, resolve_references(val)] }.flatten(1)
|
448
|
+
Hash[*arr]
|
449
|
+
when Array then
|
450
|
+
obj.collect { |item| resolve_references(item) }
|
444
451
|
else
|
445
452
|
obj
|
446
453
|
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -154,23 +154,26 @@ class PDF::Reader
|
|
154
154
|
trailer[:Info] = stream.hash[:Info] if stream.hash[:Info]
|
155
155
|
trailer[:Prev] = stream.hash[:Prev] if stream.hash[:Prev]
|
156
156
|
|
157
|
-
widths
|
157
|
+
widths = stream.hash[:W]
|
158
158
|
entry_length = widths.inject(0) { |s, w| s + w }
|
159
|
-
raw_data
|
159
|
+
raw_data = StringIO.new(stream.unfiltered_data)
|
160
160
|
if stream.hash[:Index]
|
161
|
-
index = stream.hash[:Index]
|
161
|
+
index = stream.hash[:Index]
|
162
162
|
else
|
163
|
-
index = 0
|
163
|
+
index = [0, stream.hash[:Size]]
|
164
164
|
end
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
165
|
+
index.each_slice(2) do |start_id, size|
|
166
|
+
obj_ids = (start_id..(start_id+(size-1)))
|
167
|
+
obj_ids.each do |objid|
|
168
|
+
entry = raw_data.read(entry_length) || ""
|
169
|
+
f1 = unpack_bytes(entry[0,widths[0]])
|
170
|
+
f2 = unpack_bytes(entry[widths[0],widths[1]])
|
171
|
+
f3 = unpack_bytes(entry[widths[0]+widths[1],widths[2]])
|
172
|
+
if f1 == 1 && f2 > 0
|
173
|
+
store(objid, f3, f2)
|
174
|
+
elsif f1 == 2 && f2 > 0
|
175
|
+
store(objid, 0, PDF::Reader::Reference.new(f2, 0))
|
176
|
+
end
|
174
177
|
end
|
175
178
|
end
|
176
179
|
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 57
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 0
|
8
7
|
- 9
|
9
|
-
-
|
10
|
-
version: 0.9.
|
8
|
+
- 2
|
9
|
+
version: 0.9.2
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- James Healy
|
@@ -15,7 +14,7 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date:
|
17
|
+
date: 2011-04-24 00:00:00 +10:00
|
19
18
|
default_executable:
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
@@ -26,7 +25,6 @@ dependencies:
|
|
26
25
|
requirements:
|
27
26
|
- - ">="
|
28
27
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 3
|
30
28
|
segments:
|
31
29
|
- 0
|
32
30
|
version: "0"
|
@@ -40,7 +38,6 @@ dependencies:
|
|
40
38
|
requirements:
|
41
39
|
- - ">="
|
42
40
|
- !ruby/object:Gem::Version
|
43
|
-
hash: 3
|
44
41
|
segments:
|
45
42
|
- 0
|
46
43
|
version: "0"
|
@@ -54,7 +51,6 @@ dependencies:
|
|
54
51
|
requirements:
|
55
52
|
- - ~>
|
56
53
|
- !ruby/object:Gem::Version
|
57
|
-
hash: 1
|
58
54
|
segments:
|
59
55
|
- 2
|
60
56
|
- 1
|
@@ -69,7 +65,6 @@ dependencies:
|
|
69
65
|
requirements:
|
70
66
|
- - ">="
|
71
67
|
- !ruby/object:Gem::Version
|
72
|
-
hash: 25
|
73
68
|
segments:
|
74
69
|
- 0
|
75
70
|
- 9
|
@@ -158,7 +153,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
158
153
|
requirements:
|
159
154
|
- - ">="
|
160
155
|
- !ruby/object:Gem::Version
|
161
|
-
hash: 57
|
162
156
|
segments:
|
163
157
|
- 1
|
164
158
|
- 8
|
@@ -169,7 +163,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
169
163
|
requirements:
|
170
164
|
- - ">="
|
171
165
|
- !ruby/object:Gem::Version
|
172
|
-
hash: 3
|
173
166
|
segments:
|
174
167
|
- 0
|
175
168
|
version: "0"
|