pdf-reader 1.0.0.beta1 → 1.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +7 -0
- data/README.rdoc +38 -4
- data/Rakefile +45 -1
- data/examples/extract_fonts.rb +1 -0
- data/examples/extract_images.rb +9 -14
- data/lib/pdf/reader.rb +50 -2
- data/lib/pdf/reader/buffer.rb +20 -20
- data/lib/pdf/reader/cmap.rb +2 -0
- data/lib/pdf/reader/encoding.rb +16 -17
- data/lib/pdf/reader/filter.rb +1 -1
- data/lib/pdf/reader/font.rb +3 -4
- data/lib/pdf/reader/form_xobject.rb +8 -7
- data/lib/pdf/reader/glyph_hash.rb +1 -0
- data/lib/pdf/reader/glyphlist.txt +122 -0
- data/lib/pdf/reader/lzw.rb +2 -2
- data/lib/pdf/reader/object_hash.rb +30 -4
- data/lib/pdf/reader/page.rb +10 -58
- data/lib/pdf/reader/page_text_receiver.rb +26 -17
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +40 -21
- data/lib/pdf/reader/resource_methods.rb +60 -0
- data/lib/pdf/reader/xref.rb +1 -1
- metadata +75 -104
data/CHANGELOG
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
v1.0.0.rc1 (19th December 2011)
|
2
|
+
- performance optimisations (all by Bernerd Schaefer)
|
3
|
+
- some improvements to text extraction from form xobjects
|
4
|
+
- assume invalid font encodings are StandardEncoding
|
5
|
+
- use binary mode when opening PDFs to stop ruby being helpful and transcoding
|
6
|
+
bytes for us
|
7
|
+
|
1
8
|
v1.0.0.beta1 (6th October 2011)
|
2
9
|
- ensure inline images that contain "EI" are correctly parsed
|
3
10
|
(thanks Bernard Schaefer)
|
data/README.rdoc
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
= !PLEASE NOTE!
|
2
|
+
|
3
|
+
All the examples below are for the latest (pre-release) version of the gem (0.11)
|
4
|
+
|
5
|
+
If you have installed the gem via the rubygems with the command:
|
6
|
+
|
7
|
+
$ gem install pdf-reader
|
8
|
+
|
9
|
+
Then the examples below *will not work* for you. Please check the examples that
|
10
|
+
come with previous version of the gem (0.10).
|
11
|
+
|
12
|
+
If you want to install the latest version of this gem use the command:
|
13
|
+
|
14
|
+
$ gem install pdf-reader --prerelease
|
15
|
+
|
16
|
+
= Release Notes
|
17
|
+
|
1
18
|
The PDF::Reader library implements a PDF parser conforming as much as possible
|
2
19
|
to the PDF specification from Adobe.
|
3
20
|
|
@@ -32,7 +49,7 @@ this object.
|
|
32
49
|
puts reader.metadata
|
33
50
|
puts reader.page_count
|
34
51
|
|
35
|
-
PDF::Reader.new
|
52
|
+
PDF::Reader.new accepts an IO stream or a filename. Here's an example with
|
36
53
|
an IO stream:
|
37
54
|
|
38
55
|
require 'open-uri'
|
@@ -41,6 +58,14 @@ an IO stream:
|
|
41
58
|
reader = PDF::Reader.new(io)
|
42
59
|
puts reader.info
|
43
60
|
|
61
|
+
If you open a PDF with File#open or IO#open, I strongly recommend using "rb"
|
62
|
+
mode to ensure the file isn't mangled by ruby being 'helpful'.
|
63
|
+
|
64
|
+
File.open("somefile.pdf", "rb") do |io|
|
65
|
+
reader = PDF::Reader.new(io)
|
66
|
+
puts reader.info
|
67
|
+
end
|
68
|
+
|
44
69
|
PDF is a page based file format, so most visible information is available via
|
45
70
|
page-based iteration
|
46
71
|
|
@@ -80,9 +105,8 @@ The second method is preferred to increase the effectiveness of internal caching
|
|
80
105
|
|
81
106
|
= Text Encoding
|
82
107
|
|
83
|
-
|
84
|
-
|
85
|
-
text will be converted to UTF-8 before it is passed back from PDF::Reader.
|
108
|
+
Regardless of the internal encoding used in the PDF all text will be converted
|
109
|
+
to UTF-8 before it is passed back from PDF::Reader.
|
86
110
|
|
87
111
|
Strings that contain binary data (like font blobs) will be marked as such on
|
88
112
|
M17N aware VMs.
|
@@ -107,6 +131,16 @@ don't, 'rescue MalformedPDFError' will catch all the subclassed errors as well.
|
|
107
131
|
Any other exceptions should be considered bugs in either PDF::Reader (please
|
108
132
|
report it!).
|
109
133
|
|
134
|
+
= PDF Integrity
|
135
|
+
|
136
|
+
Windows developers may run into problems when running specs due to MalformedPDFError's
|
137
|
+
This is usually because CRLF characters are automatically added to some of the PDF's in
|
138
|
+
the spec folder when you checkout a branch from Git.
|
139
|
+
|
140
|
+
To remove any invalid CRLF characters added while checking out a branch from Git, run:
|
141
|
+
|
142
|
+
rake fix_integrity
|
143
|
+
|
110
144
|
= Maintainers
|
111
145
|
|
112
146
|
- James Healy <mailto:jimmy@deefa.com>
|
data/Rakefile
CHANGED
@@ -18,7 +18,7 @@ RSpec::Core::RakeTask.new("spec") do |t|
|
|
18
18
|
t.ruby_opts = "-w"
|
19
19
|
end
|
20
20
|
|
21
|
-
#
|
21
|
+
# Generate the RDoc documentation
|
22
22
|
desc "Create documentation"
|
23
23
|
Rake::RDocTask.new("doc") do |rdoc|
|
24
24
|
rdoc.title = "pdf-reader"
|
@@ -32,3 +32,47 @@ Rake::RDocTask.new("doc") do |rdoc|
|
|
32
32
|
end
|
33
33
|
|
34
34
|
RoodiTask.new 'roodi', ['lib/**/*.rb']
|
35
|
+
|
36
|
+
desc "Create a YAML file of integrity info for PDFs in the spec suite"
|
37
|
+
task :integrity_yaml do
|
38
|
+
data = {}
|
39
|
+
Dir.glob("spec/data/**/*.*").each do |path|
|
40
|
+
path_without_spec = path.gsub("spec/","")
|
41
|
+
data[path_without_spec] = {
|
42
|
+
:bytes => File.size(path),
|
43
|
+
:md5 => `md5sum "#{path}"`.split.first
|
44
|
+
} if File.file?(path)
|
45
|
+
end
|
46
|
+
File.open("spec/integrity.yml","wb") { |f| f.write YAML.dump(data)}
|
47
|
+
end
|
48
|
+
|
49
|
+
desc "Remove any CRLF characters added by Git"
|
50
|
+
task :fix_integrity do
|
51
|
+
yaml_path = File.expand_path("spec/integrity.yml",File.dirname(__FILE__))
|
52
|
+
integrity = YAML.load_file(yaml_path)
|
53
|
+
|
54
|
+
Dir.glob("spec/data/**/*.pdf").each do |path|
|
55
|
+
path_relative_to_spec_folder = path[/.+(data\/.+)/,1]
|
56
|
+
item = integrity[path_relative_to_spec_folder]
|
57
|
+
|
58
|
+
if File.file?(path)
|
59
|
+
file_contents = File.open(path, "rb") { |f| f.read }
|
60
|
+
md5 = Digest::MD5.hexdigest(file_contents)
|
61
|
+
|
62
|
+
unless md5 == item[:md5]
|
63
|
+
#file md5 does not match what was checked into Git
|
64
|
+
|
65
|
+
if Digest::MD5.hexdigest(file_contents.gsub(/\r\n/, "\n")) == item[:md5]
|
66
|
+
#pdf file is fixable by swapping CRLF characters
|
67
|
+
|
68
|
+
File.open(path, "wb") do |f|
|
69
|
+
f.write(file_contents.gsub(/\r\n/, "\n"))
|
70
|
+
end
|
71
|
+
puts "Replaced CRLF characters in: #{path}"
|
72
|
+
else
|
73
|
+
puts "Failed to fix: #{path}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
data/examples/extract_fonts.rb
CHANGED
data/examples/extract_images.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
1
2
|
# coding: utf-8
|
2
3
|
|
3
4
|
# This demonstrates a way to extract some images (those based on the JPG or
|
@@ -14,9 +15,7 @@ module ExtractImages
|
|
14
15
|
class Extractor
|
15
16
|
|
16
17
|
def page(page)
|
17
|
-
|
18
|
-
|
19
|
-
process_resources(page, page.resources, count)
|
18
|
+
process_page(page, 0)
|
20
19
|
end
|
21
20
|
|
22
21
|
private
|
@@ -25,17 +24,13 @@ module ExtractImages
|
|
25
24
|
@complete_refs ||= {}
|
26
25
|
end
|
27
26
|
|
28
|
-
def
|
29
|
-
xobjects =
|
30
|
-
return count if xobjects.
|
27
|
+
def process_page(page, count)
|
28
|
+
xobjects = page.xobjects
|
29
|
+
return count if xobjects.empty?
|
31
30
|
|
32
31
|
xobjects.each do |name, stream|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
stream = page.objects.deref(stream)
|
37
|
-
|
38
|
-
if stream.hash[:Subtype] == :Image
|
32
|
+
case stream.hash[:Subtype]
|
33
|
+
when :Image then
|
39
34
|
count += 1
|
40
35
|
|
41
36
|
case stream.hash[:Filter]
|
@@ -46,8 +41,8 @@ module ExtractImages
|
|
46
41
|
else
|
47
42
|
ExtractImages::Raw.new(stream).save("#{page.number}-#{count}-#{name}.tif")
|
48
43
|
end
|
49
|
-
|
50
|
-
count =
|
44
|
+
when :Form then
|
45
|
+
count = process_page(PDF::Reader::FormXObject.new(page, stream), count)
|
51
46
|
end
|
52
47
|
end
|
53
48
|
count
|
data/lib/pdf/reader.rb
CHANGED
@@ -118,12 +118,19 @@ module PDF
|
|
118
118
|
end
|
119
119
|
|
120
120
|
def info
|
121
|
-
@objects.deref(@objects.trailer[:Info])
|
121
|
+
dict = @objects.deref(@objects.trailer[:Info])
|
122
|
+
doc_strings_to_utf8(dict)
|
122
123
|
end
|
123
124
|
|
124
125
|
def metadata
|
125
126
|
stream = @objects.deref(root[:Metadata])
|
126
|
-
|
127
|
+
if stream.nil?
|
128
|
+
nil
|
129
|
+
else
|
130
|
+
xml = stream.unfiltered_data
|
131
|
+
xml.force_encoding("utf-8") if xml.respond_to?(:force_encoding)
|
132
|
+
xml
|
133
|
+
end
|
127
134
|
end
|
128
135
|
|
129
136
|
def page_count
|
@@ -269,6 +276,46 @@ module PDF
|
|
269
276
|
|
270
277
|
private
|
271
278
|
|
279
|
+
# recursively convert strings from outside a content stream intop UTF-8
|
280
|
+
#
|
281
|
+
def doc_strings_to_utf8(obj)
|
282
|
+
case obj
|
283
|
+
when ::Hash then
|
284
|
+
{}.tap { |new_hash|
|
285
|
+
obj.each do |key, value|
|
286
|
+
new_hash[key] = doc_strings_to_utf8(value)
|
287
|
+
end
|
288
|
+
}
|
289
|
+
when Array then
|
290
|
+
obj.map { |item| doc_strings_to_utf8(item) }
|
291
|
+
when String then
|
292
|
+
if obj[0,2].unpack("C*") == [254, 255]
|
293
|
+
utf16_to_utf8(obj)
|
294
|
+
else
|
295
|
+
pdfdoc_to_utf8(obj)
|
296
|
+
end
|
297
|
+
else
|
298
|
+
obj
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# TODO find a PDF I can use to spec this behaviour
|
303
|
+
#
|
304
|
+
def pdfdoc_to_utf8(obj)
|
305
|
+
obj.force_encoding("utf-8") if obj.respond_to?(:force_encoding)
|
306
|
+
obj
|
307
|
+
end
|
308
|
+
|
309
|
+
# one day we'll all run on a 1.9 compatible VM and I can just do this with
|
310
|
+
# String#encode
|
311
|
+
#
|
312
|
+
def utf16_to_utf8(obj)
|
313
|
+
str = obj[2, obj.size]
|
314
|
+
str = str.unpack("n*").pack("U*")
|
315
|
+
str.force_encoding("utf-8") if str.respond_to?(:force_encoding)
|
316
|
+
str
|
317
|
+
end
|
318
|
+
|
272
319
|
def strategies
|
273
320
|
@strategies ||= [
|
274
321
|
::PDF::Reader::MetadataStrategy,
|
@@ -284,6 +331,7 @@ module PDF
|
|
284
331
|
end
|
285
332
|
################################################################################
|
286
333
|
|
334
|
+
require 'pdf/reader/resource_methods'
|
287
335
|
require 'pdf/reader/abstract_strategy'
|
288
336
|
require 'pdf/reader/buffer'
|
289
337
|
require 'pdf/reader/cmap'
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -151,14 +151,11 @@ class PDF::Reader
|
|
151
151
|
#
|
152
152
|
def prepare_tokens
|
153
153
|
10.times do
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
prepare_regular_token
|
160
|
-
elsif state == :inline
|
161
|
-
prepare_inline_token
|
154
|
+
case state
|
155
|
+
when :literal_string then prepare_literal_token
|
156
|
+
when :hex_string then prepare_hex_token
|
157
|
+
when :regular then prepare_regular_token
|
158
|
+
when :inline then prepare_inline_token
|
162
159
|
end
|
163
160
|
end
|
164
161
|
|
@@ -169,14 +166,12 @@ class PDF::Reader
|
|
169
166
|
# Determine the current context/state by examining the last token we found
|
170
167
|
#
|
171
168
|
def state
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
:
|
178
|
-
elsif in_content_stream? && @tokens[-1] == "ID"
|
179
|
-
:inline
|
169
|
+
case @tokens.last
|
170
|
+
when "(" then :literal_string
|
171
|
+
when "<" then :hex_string
|
172
|
+
when "stream" then :stream
|
173
|
+
when "ID"
|
174
|
+
in_content_stream? ? :inline : :regular
|
180
175
|
else
|
181
176
|
:regular
|
182
177
|
end
|
@@ -209,13 +204,18 @@ class PDF::Reader
|
|
209
204
|
def prepare_inline_token
|
210
205
|
str = ""
|
211
206
|
|
212
|
-
|
207
|
+
buffer = []
|
208
|
+
|
209
|
+
until buffer[0] =~ /\s/ && buffer[1, 2] == ["E", "I"]
|
213
210
|
chr = @io.read(1)
|
214
|
-
|
215
|
-
|
211
|
+
buffer << chr
|
212
|
+
|
213
|
+
if buffer.length > 3
|
214
|
+
str << buffer.shift
|
215
|
+
end
|
216
216
|
end
|
217
217
|
|
218
|
-
@tokens << string_token(str
|
218
|
+
@tokens << string_token(str.strip)
|
219
219
|
@io.seek(-3, IO::SEEK_CUR) unless chr.nil?
|
220
220
|
end
|
221
221
|
|
data/lib/pdf/reader/cmap.rb
CHANGED
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -137,24 +137,23 @@ class PDF::Reader
|
|
137
137
|
end
|
138
138
|
|
139
139
|
def get_mapping_file(enc)
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
files[enc]
|
140
|
+
case enc
|
141
|
+
when :"Identity-H", :"Identity-V", :UTF16Encoding then
|
142
|
+
nil
|
143
|
+
when :MacRomanEncoding then
|
144
|
+
File.dirname(__FILE__) + "/encodings/mac_roman.txt"
|
145
|
+
when :MacExpertEncoding then
|
146
|
+
File.dirname(__FILE__) + "/encodings/mac_expert.txt"
|
147
|
+
when :PDFDocEncoding then
|
148
|
+
File.dirname(__FILE__) + "/encodings/pdf_doc.txt"
|
149
|
+
when :SymbolEncoding then
|
150
|
+
File.dirname(__FILE__) + "/encodings/symbol.txt"
|
151
|
+
when :WinAnsiEncoding then
|
152
|
+
File.dirname(__FILE__) + "/encodings/win_ansi.txt"
|
153
|
+
when :ZapfDingbatsEncoding then
|
154
|
+
File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt"
|
156
155
|
else
|
157
|
-
|
156
|
+
File.dirname(__FILE__) + "/encodings/standard.txt"
|
158
157
|
end
|
159
158
|
end
|
160
159
|
|
data/lib/pdf/reader/filter.rb
CHANGED
data/lib/pdf/reader/font.rb
CHANGED
@@ -39,6 +39,8 @@ class PDF::Reader
|
|
39
39
|
extract_base_info(obj)
|
40
40
|
extract_descriptor(obj)
|
41
41
|
extract_descendants(obj)
|
42
|
+
|
43
|
+
@encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
|
42
44
|
end
|
43
45
|
|
44
46
|
def basefont=(font)
|
@@ -59,10 +61,7 @@ class PDF::Reader
|
|
59
61
|
raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported" if encoding.kind_of?(String)
|
60
62
|
|
61
63
|
if params.class == String
|
62
|
-
|
63
|
-
# If an encoding hasn't been specified, assume the text using this
|
64
|
-
# font is in Adobe Standard Encoding.
|
65
|
-
(encoding || PDF::Reader::Encoding.new(:StandardEncoding)).to_utf8(params, tounicode)
|
64
|
+
encoding.to_utf8(params, tounicode)
|
66
65
|
elsif params.class == Array
|
67
66
|
params.collect { |param| self.to_utf8(param) }
|
68
67
|
else
|
@@ -11,6 +11,7 @@ module PDF
|
|
11
11
|
# This behaves and looks much like a limited PDF::Reader::Page class.
|
12
12
|
#
|
13
13
|
class FormXObject
|
14
|
+
include ResourceMethods
|
14
15
|
|
15
16
|
def initialize(page, xobject)
|
16
17
|
@page = page
|
@@ -18,12 +19,6 @@ module PDF
|
|
18
19
|
@xobject = @objects.deref(xobject)
|
19
20
|
end
|
20
21
|
|
21
|
-
# Returns the resources that accompany this form.
|
22
|
-
#
|
23
|
-
def resources
|
24
|
-
@resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
|
25
|
-
end
|
26
|
-
|
27
22
|
# return a hash of fonts used on this form.
|
28
23
|
#
|
29
24
|
# The keys are the font labels used within the form content stream.
|
@@ -31,7 +26,7 @@ module PDF
|
|
31
26
|
# The values are a PDF::Reader::Font instances that provide access
|
32
27
|
# to most available metrics for each font.
|
33
28
|
#
|
34
|
-
def
|
29
|
+
def font_objects
|
35
30
|
raw_fonts = @objects.deref(resources[:Font] || {})
|
36
31
|
::Hash[raw_fonts.map { |label, font|
|
37
32
|
[label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
|
@@ -56,6 +51,12 @@ module PDF
|
|
56
51
|
|
57
52
|
private
|
58
53
|
|
54
|
+
# Returns the resources that accompany this form.
|
55
|
+
#
|
56
|
+
def resources
|
57
|
+
@resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
|
58
|
+
end
|
59
|
+
|
59
60
|
def callback(receivers, name, params=[])
|
60
61
|
receivers.each do |receiver|
|
61
62
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
@@ -4281,3 +4281,125 @@ zstroke;01B6
|
|
4281
4281
|
zuhiragana;305A
|
4282
4282
|
zukatakana;30BA
|
4283
4283
|
#--end
|
4284
|
+
#--start wingdings
|
4285
|
+
scissors;2702
|
4286
|
+
scissorscutting;2701
|
4287
|
+
telephonesolid;260E
|
4288
|
+
telhandsetcirc;2706
|
4289
|
+
envelopeback;2709
|
4290
|
+
hourglass;231B
|
4291
|
+
keyboard;2328
|
4292
|
+
tapereel;2707
|
4293
|
+
handwrite;270D
|
4294
|
+
handv;270C
|
4295
|
+
handptleft;261C
|
4296
|
+
handptright;261E
|
4297
|
+
handptup;261D
|
4298
|
+
handptdown;261F
|
4299
|
+
handhalt;270B
|
4300
|
+
frownface;2639
|
4301
|
+
skullcrossbones;2620
|
4302
|
+
flag;2690
|
4303
|
+
airplane;2708
|
4304
|
+
sunshine;263C
|
4305
|
+
snowflake;2744
|
4306
|
+
crossshadow;271E
|
4307
|
+
crossmaltese;2720
|
4308
|
+
starofdavid;2721
|
4309
|
+
crescentstar;262A
|
4310
|
+
om;0950
|
4311
|
+
wheel;2638
|
4312
|
+
aries;2648
|
4313
|
+
taurus;2649
|
4314
|
+
gemini;264A
|
4315
|
+
cancer;264B
|
4316
|
+
leo;264C
|
4317
|
+
virgo;264D
|
4318
|
+
libra;264E
|
4319
|
+
scorpio;264F
|
4320
|
+
saggitarius;2650
|
4321
|
+
capricorn;2651
|
4322
|
+
aquarius;2652
|
4323
|
+
pisces;2653
|
4324
|
+
ampersanditlc;0026
|
4325
|
+
ampersandit;0026
|
4326
|
+
circle6;25CF
|
4327
|
+
circleshadowdwn;274D
|
4328
|
+
square6;25A0
|
4329
|
+
box3;25A1
|
4330
|
+
boxshadowdwn;2751
|
4331
|
+
boxshadowup;2752
|
4332
|
+
lozenge4;2B27
|
4333
|
+
lozenge6;29EB
|
4334
|
+
rhombus6;25C6
|
4335
|
+
xrhombus;2756
|
4336
|
+
rhombus4;2B25
|
4337
|
+
escape;2353
|
4338
|
+
command;2318
|
4339
|
+
rosette;2740
|
4340
|
+
rosettesolid;273F
|
4341
|
+
quotedbllftbld;275D
|
4342
|
+
quotedblrtbld;275E
|
4343
|
+
.notdef;25AF
|
4344
|
+
zerosans;24EA
|
4345
|
+
onesans;2460
|
4346
|
+
twosans;2461
|
4347
|
+
threesans;2462
|
4348
|
+
foursans;2463
|
4349
|
+
fivesans;2464
|
4350
|
+
sixsans;2465
|
4351
|
+
sevensans;2466
|
4352
|
+
eightsans;2467
|
4353
|
+
ninesans;2468
|
4354
|
+
tensans;2469
|
4355
|
+
zerosansinv;24FF
|
4356
|
+
onesansinv;2776
|
4357
|
+
twosansinv;2777
|
4358
|
+
threesansinv;2778
|
4359
|
+
foursansinv;2779
|
4360
|
+
fivesansinv;277A
|
4361
|
+
sixsansinv;277B
|
4362
|
+
sevensansinv;277C
|
4363
|
+
eightsansinv;277D
|
4364
|
+
ninesansinv;277E
|
4365
|
+
tensansinv;277F
|
4366
|
+
circle2;00B7
|
4367
|
+
circle4;2022
|
4368
|
+
square2;25AA
|
4369
|
+
ring2;25CB
|
4370
|
+
ring4;2B55
|
4371
|
+
ringbutton2;25C9
|
4372
|
+
target;25CE
|
4373
|
+
square4;25AA
|
4374
|
+
box2;25FB
|
4375
|
+
crosstar2;2726
|
4376
|
+
pentastar2;2605
|
4377
|
+
hexstar2;2736
|
4378
|
+
octastar2;2734
|
4379
|
+
dodecastar3;2739
|
4380
|
+
octastar4;2735
|
4381
|
+
registercircle;2316
|
4382
|
+
cuspopen;27E1
|
4383
|
+
cuspopen1;2311
|
4384
|
+
circlestar;272A
|
4385
|
+
starshadow;2730
|
4386
|
+
head2right;27A2
|
4387
|
+
circleright;27B2
|
4388
|
+
barb4right;2794
|
4389
|
+
bleft;21E6
|
4390
|
+
bright;21E8
|
4391
|
+
bup;21E7
|
4392
|
+
bdown;21E9
|
4393
|
+
bleftright;2B04
|
4394
|
+
bupdown;21F3
|
4395
|
+
bnw;2B00
|
4396
|
+
bne;2B01
|
4397
|
+
bsw;2B03
|
4398
|
+
bse;2B02
|
4399
|
+
bdash1;25AD
|
4400
|
+
bdash2;25AB
|
4401
|
+
xmarkbld;2717
|
4402
|
+
checkbld;2713
|
4403
|
+
boxxmarkbld;2612
|
4404
|
+
boxcheckbld;2611
|
4405
|
+
#--end wingdings
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -37,7 +37,7 @@ module PDF
|
|
37
37
|
while bits_left_in_chunk > 0 and @current_pos < @data.size
|
38
38
|
chunk = 0 if chunk.nil?
|
39
39
|
codepoint = @data[@current_pos, 1].unpack("C*")[0]
|
40
|
-
current_byte = codepoint & (2**@bits_left_in_byte -1) #clear consumed bits
|
40
|
+
current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
|
41
41
|
dif = bits_left_in_chunk - @bits_left_in_byte
|
42
42
|
if dif > 0 then current_byte <<= dif
|
43
43
|
elsif dif < 0 then current_byte >>= dif.abs
|
@@ -82,7 +82,7 @@ module PDF
|
|
82
82
|
def self.decode(data)
|
83
83
|
stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
|
84
84
|
result = ''
|
85
|
-
|
85
|
+
until (code = stream.read) == CODE_EOD
|
86
86
|
if code == CODE_CLEAR_TABLE
|
87
87
|
string_table = StringTable.new
|
88
88
|
code = stream.read
|
@@ -30,6 +30,7 @@ class PDF::Reader
|
|
30
30
|
|
31
31
|
attr_accessor :default
|
32
32
|
attr_reader :trailer, :pdf_version
|
33
|
+
attr_reader :sec_handler
|
33
34
|
|
34
35
|
# Creates a new ObjectHash object. Input can be a string with a valid filename
|
35
36
|
# or an IO-like object.
|
@@ -97,6 +98,27 @@ class PDF::Reader
|
|
97
98
|
end
|
98
99
|
alias :deref :object
|
99
100
|
|
101
|
+
# Recursively dereferences the object refered to be +key+. If +key+ is not
|
102
|
+
# a PDF::Reader::Reference, the key is returned unchanged.
|
103
|
+
#
|
104
|
+
def deref!(key)
|
105
|
+
case object = deref(key)
|
106
|
+
when Hash
|
107
|
+
{}.tap { |hash|
|
108
|
+
object.each do |k, value|
|
109
|
+
hash[k] = deref!(value)
|
110
|
+
end
|
111
|
+
}
|
112
|
+
when PDF::Reader::Stream
|
113
|
+
object.hash = deref!(object.hash)
|
114
|
+
object
|
115
|
+
when Array
|
116
|
+
object.map { |value| deref!(value) }
|
117
|
+
else
|
118
|
+
object
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
100
122
|
# Access an object from the PDF. key can be an int or a PDF::Reader::Reference
|
101
123
|
# object.
|
102
124
|
#
|
@@ -238,6 +260,10 @@ class PDF::Reader
|
|
238
260
|
trailer.has_key?(:Encrypt)
|
239
261
|
end
|
240
262
|
|
263
|
+
def sec_handler?
|
264
|
+
!!sec_handler
|
265
|
+
end
|
266
|
+
|
241
267
|
private
|
242
268
|
|
243
269
|
def build_security_handler(opts = {})
|
@@ -253,11 +279,11 @@ class PDF::Reader
|
|
253
279
|
end
|
254
280
|
|
255
281
|
def decrypt(ref, obj)
|
256
|
-
return obj
|
282
|
+
return obj unless sec_handler?
|
257
283
|
|
258
284
|
case obj
|
259
285
|
when PDF::Reader::Stream then
|
260
|
-
obj.data =
|
286
|
+
obj.data = sec_handler.decrypt(obj.data, ref)
|
261
287
|
obj
|
262
288
|
when Hash then
|
263
289
|
arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
|
@@ -265,7 +291,7 @@ class PDF::Reader
|
|
265
291
|
when Array then
|
266
292
|
obj.collect { |item| decrypt(ref, item) }
|
267
293
|
when String
|
268
|
-
|
294
|
+
sec_handler.decrypt(obj, ref)
|
269
295
|
else
|
270
296
|
obj
|
271
297
|
end
|
@@ -316,7 +342,7 @@ class PDF::Reader
|
|
316
342
|
if File.respond_to?(:binread)
|
317
343
|
File.binread(input.to_s)
|
318
344
|
else
|
319
|
-
File.
|
345
|
+
File.open(input.to_s,"rb") { |f| f.read }
|
320
346
|
end
|
321
347
|
end
|
322
348
|
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -12,6 +12,7 @@ module PDF
|
|
12
12
|
# objects accessor to help walk the page dictionary in any useful way.
|
13
13
|
#
|
14
14
|
class Page
|
15
|
+
include ResourceMethods
|
15
16
|
|
16
17
|
# lowlevel hash-like access to all objects in the underlying PDF
|
17
18
|
attr_reader :objects
|
@@ -45,73 +46,17 @@ module PDF
|
|
45
46
|
"<PDF::Reader::Page page: #{@pagenum}>"
|
46
47
|
end
|
47
48
|
|
48
|
-
# Returns the attributes that accompany this page
|
49
|
+
# Returns the attributes that accompany this page, including
|
49
50
|
# attributes inherited from parents.
|
50
51
|
#
|
51
52
|
def attributes
|
52
|
-
{}.tap { |hash|
|
53
|
+
@attributes ||= {}.tap { |hash|
|
53
54
|
page_with_ancestors.reverse.each do |obj|
|
54
55
|
hash.merge!(@objects.deref(obj))
|
55
56
|
end
|
56
57
|
}
|
57
58
|
end
|
58
59
|
|
59
|
-
# Returns the resources that accompany this page. Includes
|
60
|
-
# resources inherited from parents.
|
61
|
-
#
|
62
|
-
def resources
|
63
|
-
@resources ||= @objects.deref(attributes[:Resources]) || {}
|
64
|
-
end
|
65
|
-
|
66
|
-
# Returns a Hash of color spaces that are available to this page
|
67
|
-
#
|
68
|
-
def color_spaces
|
69
|
-
@objects.deref(resources[:ColorSpace]) || {}
|
70
|
-
end
|
71
|
-
|
72
|
-
# Returns a Hash of fonts that are available to this page
|
73
|
-
#
|
74
|
-
def fonts
|
75
|
-
@objects.deref(resources[:Font]) || {}
|
76
|
-
end
|
77
|
-
|
78
|
-
# Returns a Hash of external graphic states that are available to this
|
79
|
-
# page
|
80
|
-
#
|
81
|
-
def graphic_states
|
82
|
-
@objects.deref(resources[:ExtGState]) || {}
|
83
|
-
end
|
84
|
-
|
85
|
-
# Returns a Hash of patterns that are available to this page
|
86
|
-
#
|
87
|
-
def patterns
|
88
|
-
@objects.deref(resources[:Pattern]) || {}
|
89
|
-
end
|
90
|
-
|
91
|
-
# Returns an Array of procedure sets that are available to this page
|
92
|
-
#
|
93
|
-
def procedure_sets
|
94
|
-
@objects.deref(resources[:ProcSet]) || []
|
95
|
-
end
|
96
|
-
|
97
|
-
# Returns a Hash of properties sets that are available to this page
|
98
|
-
#
|
99
|
-
def properties
|
100
|
-
@objects.deref(resources[:Properties]) || {}
|
101
|
-
end
|
102
|
-
|
103
|
-
# Returns a Hash of shadings that are available to this page
|
104
|
-
#
|
105
|
-
def shadings
|
106
|
-
@objects.deref(resources[:Shading]) || {}
|
107
|
-
end
|
108
|
-
|
109
|
-
# Returns a Hash of XObjects that are available to this page
|
110
|
-
#
|
111
|
-
def xobjects
|
112
|
-
@objects.deref(resources[:XObject]) || {}
|
113
|
-
end
|
114
|
-
|
115
60
|
# returns the plain text content of this page encoded as UTF-8. Any
|
116
61
|
# characters that can't be translated will be returned as a ▯
|
117
62
|
#
|
@@ -168,6 +113,13 @@ module PDF
|
|
168
113
|
root ||= objects.deref(@objects.trailer[:Root])
|
169
114
|
end
|
170
115
|
|
116
|
+
# Returns the resources that accompany this page. Includes
|
117
|
+
# resources inherited from parents.
|
118
|
+
#
|
119
|
+
def resources
|
120
|
+
@resources ||= @objects.deref(attributes[:Resources]) || {}
|
121
|
+
end
|
122
|
+
|
171
123
|
def content_stream(receivers, instructions)
|
172
124
|
buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
|
173
125
|
parser = Parser.new(buffer, @objects)
|
@@ -29,8 +29,8 @@ module PDF
|
|
29
29
|
def page=(page)
|
30
30
|
@page = page
|
31
31
|
@objects = page.objects
|
32
|
-
@
|
33
|
-
@
|
32
|
+
@font_stack = [build_fonts(page.fonts)]
|
33
|
+
@xobject_stack = [page.xobjects]
|
34
34
|
@content = {}
|
35
35
|
@stack = [DEFAULT_GRAPHICS_STATE]
|
36
36
|
end
|
@@ -109,6 +109,10 @@ module PDF
|
|
109
109
|
state[:text_font_size] = size
|
110
110
|
end
|
111
111
|
|
112
|
+
def font_size
|
113
|
+
state[:text_font_size] * @text_matrix[0,0]
|
114
|
+
end
|
115
|
+
|
112
116
|
def set_text_leading(leading)
|
113
117
|
state[:text_leading] = leading
|
114
118
|
end
|
@@ -194,17 +198,23 @@ module PDF
|
|
194
198
|
#####################################################
|
195
199
|
def invoke_xobject(label)
|
196
200
|
save_graphics_state
|
197
|
-
|
201
|
+
dict = @xobject_stack.detect { |xobjects|
|
202
|
+
xobjects.has_key?(label)
|
203
|
+
}
|
204
|
+
xobject = dict ? dict[label] : nil
|
198
205
|
|
206
|
+
raise MalformedPDFError, "XObject #{label} not found" if xobject.nil?
|
199
207
|
matrix = xobject.hash[:Matrix]
|
200
208
|
concatenate_matrix(*matrix) if matrix
|
201
209
|
|
202
210
|
if xobject.hash[:Subtype] == :Form
|
203
211
|
form = PDF::Reader::FormXObject.new(@page, xobject)
|
204
|
-
@
|
212
|
+
@font_stack.unshift(form.font_objects)
|
213
|
+
@xobject_stack.unshift(form.xobjects)
|
205
214
|
form.walk(self)
|
215
|
+
@font_stack.shift
|
216
|
+
@xobject_stack.shift
|
206
217
|
end
|
207
|
-
@form_fonts = {}
|
208
218
|
|
209
219
|
restore_graphics_state
|
210
220
|
end
|
@@ -232,10 +242,10 @@ module PDF
|
|
232
242
|
|
233
243
|
def text_rendering_matrix
|
234
244
|
state_matrix = Matrix[
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
245
|
+
[font_size * state[:h_scaling], 0, 0],
|
246
|
+
[0, font_size, 0],
|
247
|
+
[0, state[:text_rise], 1]
|
248
|
+
]
|
239
249
|
|
240
250
|
state_matrix * @text_matrix * ctm
|
241
251
|
end
|
@@ -251,21 +261,17 @@ module PDF
|
|
251
261
|
# This returns a deep clone of the current state, ensuring changes are
|
252
262
|
# keep separate from earlier states.
|
253
263
|
#
|
254
|
-
#
|
255
|
-
# the deep clone. Kinda hacky, but effective.
|
264
|
+
# Marshal is used to round-trip the state through a string to easily
|
265
|
+
# perform the deep clone. Kinda hacky, but effective.
|
256
266
|
#
|
257
267
|
def clone_state
|
258
268
|
if @stack.empty?
|
259
269
|
{}
|
260
270
|
else
|
261
|
-
|
271
|
+
Marshal.load Marshal.dump(@stack.last)
|
262
272
|
end
|
263
273
|
end
|
264
274
|
|
265
|
-
def yaml_lib
|
266
|
-
Kernel.const_defined?("Psych") ? Psych : YAML
|
267
|
-
end
|
268
|
-
|
269
275
|
# return the current transformation matrix
|
270
276
|
#
|
271
277
|
def ctm
|
@@ -273,7 +279,10 @@ module PDF
|
|
273
279
|
end
|
274
280
|
|
275
281
|
def current_font
|
276
|
-
@
|
282
|
+
dict = @font_stack.detect { |fonts|
|
283
|
+
fonts.has_key?(state[:text_font])
|
284
|
+
}
|
285
|
+
dict ? dict[state[:text_font]] : nil
|
277
286
|
end
|
278
287
|
|
279
288
|
# private class for representing points on a cartesian plain. Used
|
@@ -350,7 +350,7 @@ class PDF::Reader
|
|
350
350
|
|
351
351
|
while (token = parser.parse_token(OPERATORS))
|
352
352
|
if token.kind_of?(Token) and OPERATORS.has_key?(token)
|
353
|
-
|
353
|
+
if OPERATORS[token] == :set_text_font_and_size
|
354
354
|
current_font = params.first
|
355
355
|
if fonts[current_font].nil?
|
356
356
|
raise MalformedPDFError, "Unknown font #{current_font}"
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -28,6 +28,31 @@ class PDF::Reader
|
|
28
28
|
# An internal PDF::Reader class that reads objects from the PDF file and converts
|
29
29
|
# them into useable ruby objects (hash's, arrays, true, false, etc)
|
30
30
|
class Parser
|
31
|
+
|
32
|
+
TOKEN_STRATEGY = proc { |parser, token| Token.new(token) }
|
33
|
+
|
34
|
+
STRATEGIES = {
|
35
|
+
"/" => proc { |parser, token| parser.send(:pdf_name) },
|
36
|
+
"<<" => proc { |parser, token| parser.send(:dictionary) },
|
37
|
+
"[" => proc { |parser, token| parser.send(:array) },
|
38
|
+
"(" => proc { |parser, token| parser.send(:string) },
|
39
|
+
"<" => proc { |parser, token| parser.send(:hex_string) },
|
40
|
+
|
41
|
+
nil => proc { nil },
|
42
|
+
"true" => proc { true },
|
43
|
+
"false" => proc { false },
|
44
|
+
"null" => proc { nil },
|
45
|
+
|
46
|
+
"obj" => TOKEN_STRATEGY,
|
47
|
+
"endobj" => TOKEN_STRATEGY,
|
48
|
+
"stream" => TOKEN_STRATEGY,
|
49
|
+
"endstream" => TOKEN_STRATEGY,
|
50
|
+
">>" => TOKEN_STRATEGY,
|
51
|
+
"]" => TOKEN_STRATEGY,
|
52
|
+
">" => TOKEN_STRATEGY,
|
53
|
+
")" => TOKEN_STRATEGY
|
54
|
+
}
|
55
|
+
|
31
56
|
################################################################################
|
32
57
|
# Create a new parser around a PDF::Reader::Buffer object
|
33
58
|
#
|
@@ -45,25 +70,20 @@ class PDF::Reader
|
|
45
70
|
def parse_token (operators={})
|
46
71
|
token = @buffer.token
|
47
72
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
when ">>", "]", ">", ")" then return Token.new(token)
|
73
|
+
if STRATEGIES.has_key? token
|
74
|
+
STRATEGIES[token].call(self, token)
|
75
|
+
elsif token.is_a? PDF::Reader::Reference
|
76
|
+
token
|
77
|
+
elsif token.is_a? Token
|
78
|
+
token
|
79
|
+
elsif operators.has_key? token
|
80
|
+
Token.new(token)
|
81
|
+
elsif token.respond_to?(:to_token)
|
82
|
+
token.to_token
|
83
|
+
elsif token =~ /\d*\.\d/
|
84
|
+
token.to_f
|
61
85
|
else
|
62
|
-
|
63
|
-
elsif operators.has_key?(token) then return Token.new(token)
|
64
|
-
elsif token =~ /\d*\.\d/ then return token.to_f
|
65
|
-
else return token.to_i
|
66
|
-
end
|
86
|
+
token.to_i
|
67
87
|
end
|
68
88
|
end
|
69
89
|
################################################################################
|
@@ -110,9 +130,8 @@ class PDF::Reader
|
|
110
130
|
# reads a PDF name from the buffer and converts it to a Ruby Symbol
|
111
131
|
def pdf_name
|
112
132
|
tok = @buffer.token
|
113
|
-
tok.
|
114
|
-
|
115
|
-
tok.gsub!("#"+find[0], replace)
|
133
|
+
tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match|
|
134
|
+
match[1, 2].hex.chr
|
116
135
|
end
|
117
136
|
tok.to_sym
|
118
137
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module PDF
|
4
|
+
class Reader
|
5
|
+
|
6
|
+
# mixin for common methods in Page and FormXobjects
|
7
|
+
#
|
8
|
+
module ResourceMethods
|
9
|
+
# Returns a Hash of color spaces that are available to this page
|
10
|
+
#
|
11
|
+
def color_spaces
|
12
|
+
@objects.deref!(resources[:ColorSpace]) || {}
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns a Hash of fonts that are available to this page
|
16
|
+
#
|
17
|
+
def fonts
|
18
|
+
@objects.deref!(resources[:Font]) || {}
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns a Hash of external graphic states that are available to this
|
22
|
+
# page
|
23
|
+
#
|
24
|
+
def graphic_states
|
25
|
+
@objects.deref!(resources[:ExtGState]) || {}
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns a Hash of patterns that are available to this page
|
29
|
+
#
|
30
|
+
def patterns
|
31
|
+
@objects.deref!(resources[:Pattern]) || {}
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns an Array of procedure sets that are available to this page
|
35
|
+
#
|
36
|
+
def procedure_sets
|
37
|
+
@objects.deref!(resources[:ProcSet]) || []
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns a Hash of properties sets that are available to this page
|
41
|
+
#
|
42
|
+
def properties
|
43
|
+
@objects.deref!(resources[:Properties]) || {}
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns a Hash of shadings that are available to this page
|
47
|
+
#
|
48
|
+
def shadings
|
49
|
+
@objects.deref!(resources[:Shading]) || {}
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns a Hash of XObjects that are available to this page
|
53
|
+
#
|
54
|
+
def xobjects
|
55
|
+
@objects.deref!(resources[:XObject]) || {}
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -146,7 +146,7 @@ class PDF::Reader
|
|
146
146
|
# Read a XReaf stream from the underlying buffer instead of a traditional xref table.
|
147
147
|
#
|
148
148
|
def load_xref_stream(stream)
|
149
|
-
unless stream.hash[:Type] == :XRef
|
149
|
+
unless stream.is_a?(PDF::Reader::Stream) && stream.hash[:Type] == :XRef
|
150
150
|
raise PDF::Reader::MalformedPDFError, "xref stream not found when expected"
|
151
151
|
end
|
152
152
|
trailer = Hash[stream.hash.select { |key, value|
|
metadata
CHANGED
@@ -1,122 +1,98 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 1
|
7
|
-
- 0
|
8
|
-
- 0
|
9
|
-
- beta1
|
10
|
-
version: 1.0.0.beta1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0.rc1
|
5
|
+
prerelease: 6
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- James Healy
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-12-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
22
15
|
name: rake
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &19650680 !ruby/object:Gem::Requirement
|
25
17
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
|
30
|
-
- 0
|
31
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :development
|
33
|
-
version_requirements: *id001
|
34
|
-
- !ruby/object:Gem::Dependency
|
35
|
-
name: roodi
|
36
23
|
prerelease: false
|
37
|
-
|
24
|
+
version_requirements: *19650680
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: roodi
|
27
|
+
requirement: &19650220 !ruby/object:Gem::Requirement
|
38
28
|
none: false
|
39
|
-
requirements:
|
40
|
-
- -
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
|
43
|
-
- 0
|
44
|
-
version: "0"
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
45
33
|
type: :development
|
46
|
-
version_requirements: *id002
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: rspec
|
49
34
|
prerelease: false
|
50
|
-
|
35
|
+
version_requirements: *19650220
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rspec
|
38
|
+
requirement: &19649720 !ruby/object:Gem::Requirement
|
51
39
|
none: false
|
52
|
-
requirements:
|
40
|
+
requirements:
|
53
41
|
- - ~>
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
|
56
|
-
- 2
|
57
|
-
- 3
|
58
|
-
version: "2.3"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '2.3'
|
59
44
|
type: :development
|
60
|
-
version_requirements: *id003
|
61
|
-
- !ruby/object:Gem::Dependency
|
62
|
-
name: ZenTest
|
63
45
|
prerelease: false
|
64
|
-
|
46
|
+
version_requirements: *19649720
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: ZenTest
|
49
|
+
requirement: &19649220 !ruby/object:Gem::Requirement
|
65
50
|
none: false
|
66
|
-
requirements:
|
51
|
+
requirements:
|
67
52
|
- - ~>
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
segments:
|
70
|
-
- 4
|
71
|
-
- 4
|
72
|
-
- 2
|
53
|
+
- !ruby/object:Gem::Version
|
73
54
|
version: 4.4.2
|
74
55
|
type: :development
|
75
|
-
version_requirements: *id004
|
76
|
-
- !ruby/object:Gem::Dependency
|
77
|
-
name: Ascii85
|
78
56
|
prerelease: false
|
79
|
-
|
57
|
+
version_requirements: *19649220
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: Ascii85
|
60
|
+
requirement: &19648740 !ruby/object:Gem::Requirement
|
80
61
|
none: false
|
81
|
-
requirements:
|
62
|
+
requirements:
|
82
63
|
- - ~>
|
83
|
-
- !ruby/object:Gem::Version
|
84
|
-
segments:
|
85
|
-
- 1
|
86
|
-
- 0
|
87
|
-
- 0
|
64
|
+
- !ruby/object:Gem::Version
|
88
65
|
version: 1.0.0
|
89
66
|
type: :runtime
|
90
|
-
version_requirements: *id005
|
91
|
-
- !ruby/object:Gem::Dependency
|
92
|
-
name: ruby-rc4
|
93
67
|
prerelease: false
|
94
|
-
|
68
|
+
version_requirements: *19648740
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: ruby-rc4
|
71
|
+
requirement: &19648280 !ruby/object:Gem::Requirement
|
95
72
|
none: false
|
96
|
-
requirements:
|
97
|
-
- -
|
98
|
-
- !ruby/object:Gem::Version
|
99
|
-
|
100
|
-
- 0
|
101
|
-
version: "0"
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
102
77
|
type: :runtime
|
103
|
-
|
104
|
-
|
105
|
-
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *19648280
|
80
|
+
description: The PDF::Reader library implements a PDF parser conforming as much as
|
81
|
+
possible to the PDF specification from Adobe
|
82
|
+
email:
|
106
83
|
- jimmy@deefa.com
|
107
|
-
executables:
|
84
|
+
executables:
|
108
85
|
- pdf_object
|
109
86
|
- pdf_text
|
110
87
|
- pdf_list_callbacks
|
111
88
|
- pdf_callbacks
|
112
89
|
extensions: []
|
113
|
-
|
114
|
-
extra_rdoc_files:
|
90
|
+
extra_rdoc_files:
|
115
91
|
- README.rdoc
|
116
92
|
- TODO
|
117
93
|
- CHANGELOG
|
118
94
|
- MIT-LICENSE
|
119
|
-
files:
|
95
|
+
files:
|
120
96
|
- examples/metadata.rb
|
121
97
|
- examples/extract_images.rb
|
122
98
|
- examples/extract_bates.rb
|
@@ -161,6 +137,7 @@ files:
|
|
161
137
|
- lib/pdf/reader/encodings/zapf_dingbats.txt
|
162
138
|
- lib/pdf/reader/encodings/pdf_doc.txt
|
163
139
|
- lib/pdf/reader/encodings/mac_expert.txt
|
140
|
+
- lib/pdf/reader/resource_methods.rb
|
164
141
|
- lib/pdf/reader/metadata_strategy.rb
|
165
142
|
- lib/pdf/reader/token.rb
|
166
143
|
- lib/pdf-reader.rb
|
@@ -173,45 +150,39 @@ files:
|
|
173
150
|
- bin/pdf_text
|
174
151
|
- bin/pdf_list_callbacks
|
175
152
|
- bin/pdf_callbacks
|
176
|
-
has_rdoc: true
|
177
153
|
homepage: http://github.com/yob/pdf-reader
|
178
154
|
licenses: []
|
179
|
-
|
180
|
-
|
181
|
-
|
155
|
+
post_install_message: ! "\n ********************************************\n\n This
|
156
|
+
is a beta release of PDF::Reader to gather feedback on the proposed\n API changes.\n\n
|
157
|
+
\ The old API is marked as deprecated but will continue to work with no\n visible
|
158
|
+
warnings for now.\n\n The new API is documented in the README and in rdoc for the
|
159
|
+
PDF::Reader,\n PDF::Reader::Page and PDF::Reader::ObjectHash classes.\n\n Do not
|
160
|
+
use this in production, stick to stable releases for that. If you do\n take the
|
161
|
+
new API for a spin, please send any feedback my way.\n\n ********************************************\n\n"
|
162
|
+
rdoc_options:
|
182
163
|
- --title
|
183
164
|
- PDF::Reader Documentation
|
184
165
|
- --main
|
185
166
|
- README.rdoc
|
186
167
|
- -q
|
187
|
-
require_paths:
|
168
|
+
require_paths:
|
188
169
|
- lib
|
189
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
170
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
190
171
|
none: false
|
191
|
-
requirements:
|
192
|
-
- -
|
193
|
-
- !ruby/object:Gem::Version
|
194
|
-
segments:
|
195
|
-
- 1
|
196
|
-
- 8
|
197
|
-
- 7
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
198
175
|
version: 1.8.7
|
199
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
176
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
200
177
|
none: false
|
201
|
-
requirements:
|
202
|
-
- -
|
203
|
-
- !ruby/object:Gem::Version
|
204
|
-
segments:
|
205
|
-
- 1
|
206
|
-
- 3
|
207
|
-
- 1
|
178
|
+
requirements:
|
179
|
+
- - ! '>'
|
180
|
+
- !ruby/object:Gem::Version
|
208
181
|
version: 1.3.1
|
209
182
|
requirements: []
|
210
|
-
|
211
183
|
rubyforge_project:
|
212
|
-
rubygems_version: 1.
|
184
|
+
rubygems_version: 1.8.11
|
213
185
|
signing_key:
|
214
186
|
specification_version: 3
|
215
187
|
summary: A library for accessing the content of PDF files
|
216
188
|
test_files: []
|
217
|
-
|