pdf-reader 1.0.0.beta1 → 1.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +7 -0
- data/README.rdoc +38 -4
- data/Rakefile +45 -1
- data/examples/extract_fonts.rb +1 -0
- data/examples/extract_images.rb +9 -14
- data/lib/pdf/reader.rb +50 -2
- data/lib/pdf/reader/buffer.rb +20 -20
- data/lib/pdf/reader/cmap.rb +2 -0
- data/lib/pdf/reader/encoding.rb +16 -17
- data/lib/pdf/reader/filter.rb +1 -1
- data/lib/pdf/reader/font.rb +3 -4
- data/lib/pdf/reader/form_xobject.rb +8 -7
- data/lib/pdf/reader/glyph_hash.rb +1 -0
- data/lib/pdf/reader/glyphlist.txt +122 -0
- data/lib/pdf/reader/lzw.rb +2 -2
- data/lib/pdf/reader/object_hash.rb +30 -4
- data/lib/pdf/reader/page.rb +10 -58
- data/lib/pdf/reader/page_text_receiver.rb +26 -17
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +40 -21
- data/lib/pdf/reader/resource_methods.rb +60 -0
- data/lib/pdf/reader/xref.rb +1 -1
- metadata +75 -104
data/CHANGELOG
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
v1.0.0.rc1 (19th December 2011)
|
2
|
+
- performance optimisations (all by Bernerd Schaefer)
|
3
|
+
- some improvements to text extraction from form xobjects
|
4
|
+
- assume invalid font encodings are StandardEncoding
|
5
|
+
- use binary mode when opening PDFs to stop ruby being helpful and transcoding
|
6
|
+
bytes for us
|
7
|
+
|
1
8
|
v1.0.0.beta1 (6th October 2011)
|
2
9
|
- ensure inline images that contain "EI" are correctly parsed
|
3
10
|
(thanks Bernard Schaefer)
|
data/README.rdoc
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
= !PLEASE NOTE!
|
2
|
+
|
3
|
+
All the examples below are for the latest (pre-release) version of the gem (0.11)
|
4
|
+
|
5
|
+
If you have installed the gem via the rubygems with the command:
|
6
|
+
|
7
|
+
$ gem install pdf-reader
|
8
|
+
|
9
|
+
Then the examples below *will not work* for you. Please check the examples that
|
10
|
+
come with previous version of the gem (0.10).
|
11
|
+
|
12
|
+
If you want to install the latest version of this gem use the command:
|
13
|
+
|
14
|
+
$ gem install pdf-reader --prerelease
|
15
|
+
|
16
|
+
= Release Notes
|
17
|
+
|
1
18
|
The PDF::Reader library implements a PDF parser conforming as much as possible
|
2
19
|
to the PDF specification from Adobe.
|
3
20
|
|
@@ -32,7 +49,7 @@ this object.
|
|
32
49
|
puts reader.metadata
|
33
50
|
puts reader.page_count
|
34
51
|
|
35
|
-
PDF::Reader.new
|
52
|
+
PDF::Reader.new accepts an IO stream or a filename. Here's an example with
|
36
53
|
an IO stream:
|
37
54
|
|
38
55
|
require 'open-uri'
|
@@ -41,6 +58,14 @@ an IO stream:
|
|
41
58
|
reader = PDF::Reader.new(io)
|
42
59
|
puts reader.info
|
43
60
|
|
61
|
+
If you open a PDF with File#open or IO#open, I strongly recommend using "rb"
|
62
|
+
mode to ensure the file isn't mangled by ruby being 'helpful'.
|
63
|
+
|
64
|
+
File.open("somefile.pdf", "rb") do |io|
|
65
|
+
reader = PDF::Reader.new(io)
|
66
|
+
puts reader.info
|
67
|
+
end
|
68
|
+
|
44
69
|
PDF is a page based file format, so most visible information is available via
|
45
70
|
page-based iteration
|
46
71
|
|
@@ -80,9 +105,8 @@ The second method is preferred to increase the effectiveness of internal caching
|
|
80
105
|
|
81
106
|
= Text Encoding
|
82
107
|
|
83
|
-
|
84
|
-
|
85
|
-
text will be converted to UTF-8 before it is passed back from PDF::Reader.
|
108
|
+
Regardless of the internal encoding used in the PDF all text will be converted
|
109
|
+
to UTF-8 before it is passed back from PDF::Reader.
|
86
110
|
|
87
111
|
Strings that contain binary data (like font blobs) will be marked as such on
|
88
112
|
M17N aware VMs.
|
@@ -107,6 +131,16 @@ don't, 'rescue MalformedPDFError' will catch all the subclassed errors as well.
|
|
107
131
|
Any other exceptions should be considered bugs in either PDF::Reader (please
|
108
132
|
report it!).
|
109
133
|
|
134
|
+
= PDF Integrity
|
135
|
+
|
136
|
+
Windows developers may run into problems when running specs due to MalformedPDFError's
|
137
|
+
This is usually because CRLF characters are automatically added to some of the PDF's in
|
138
|
+
the spec folder when you checkout a branch from Git.
|
139
|
+
|
140
|
+
To remove any invalid CRLF characters added while checking out a branch from Git, run:
|
141
|
+
|
142
|
+
rake fix_integrity
|
143
|
+
|
110
144
|
= Maintainers
|
111
145
|
|
112
146
|
- James Healy <mailto:jimmy@deefa.com>
|
data/Rakefile
CHANGED
@@ -18,7 +18,7 @@ RSpec::Core::RakeTask.new("spec") do |t|
|
|
18
18
|
t.ruby_opts = "-w"
|
19
19
|
end
|
20
20
|
|
21
|
-
#
|
21
|
+
# Generate the RDoc documentation
|
22
22
|
desc "Create documentation"
|
23
23
|
Rake::RDocTask.new("doc") do |rdoc|
|
24
24
|
rdoc.title = "pdf-reader"
|
@@ -32,3 +32,47 @@ Rake::RDocTask.new("doc") do |rdoc|
|
|
32
32
|
end
|
33
33
|
|
34
34
|
RoodiTask.new 'roodi', ['lib/**/*.rb']
|
35
|
+
|
36
|
+
desc "Create a YAML file of integrity info for PDFs in the spec suite"
|
37
|
+
task :integrity_yaml do
|
38
|
+
data = {}
|
39
|
+
Dir.glob("spec/data/**/*.*").each do |path|
|
40
|
+
path_without_spec = path.gsub("spec/","")
|
41
|
+
data[path_without_spec] = {
|
42
|
+
:bytes => File.size(path),
|
43
|
+
:md5 => `md5sum "#{path}"`.split.first
|
44
|
+
} if File.file?(path)
|
45
|
+
end
|
46
|
+
File.open("spec/integrity.yml","wb") { |f| f.write YAML.dump(data)}
|
47
|
+
end
|
48
|
+
|
49
|
+
desc "Remove any CRLF characters added by Git"
|
50
|
+
task :fix_integrity do
|
51
|
+
yaml_path = File.expand_path("spec/integrity.yml",File.dirname(__FILE__))
|
52
|
+
integrity = YAML.load_file(yaml_path)
|
53
|
+
|
54
|
+
Dir.glob("spec/data/**/*.pdf").each do |path|
|
55
|
+
path_relative_to_spec_folder = path[/.+(data\/.+)/,1]
|
56
|
+
item = integrity[path_relative_to_spec_folder]
|
57
|
+
|
58
|
+
if File.file?(path)
|
59
|
+
file_contents = File.open(path, "rb") { |f| f.read }
|
60
|
+
md5 = Digest::MD5.hexdigest(file_contents)
|
61
|
+
|
62
|
+
unless md5 == item[:md5]
|
63
|
+
#file md5 does not match what was checked into Git
|
64
|
+
|
65
|
+
if Digest::MD5.hexdigest(file_contents.gsub(/\r\n/, "\n")) == item[:md5]
|
66
|
+
#pdf file is fixable by swapping CRLF characters
|
67
|
+
|
68
|
+
File.open(path, "wb") do |f|
|
69
|
+
f.write(file_contents.gsub(/\r\n/, "\n"))
|
70
|
+
end
|
71
|
+
puts "Replaced CRLF characters in: #{path}"
|
72
|
+
else
|
73
|
+
puts "Failed to fix: #{path}"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
data/examples/extract_fonts.rb
CHANGED
data/examples/extract_images.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
1
2
|
# coding: utf-8
|
2
3
|
|
3
4
|
# This demonstrates a way to extract some images (those based on the JPG or
|
@@ -14,9 +15,7 @@ module ExtractImages
|
|
14
15
|
class Extractor
|
15
16
|
|
16
17
|
def page(page)
|
17
|
-
|
18
|
-
|
19
|
-
process_resources(page, page.resources, count)
|
18
|
+
process_page(page, 0)
|
20
19
|
end
|
21
20
|
|
22
21
|
private
|
@@ -25,17 +24,13 @@ module ExtractImages
|
|
25
24
|
@complete_refs ||= {}
|
26
25
|
end
|
27
26
|
|
28
|
-
def
|
29
|
-
xobjects =
|
30
|
-
return count if xobjects.
|
27
|
+
def process_page(page, count)
|
28
|
+
xobjects = page.xobjects
|
29
|
+
return count if xobjects.empty?
|
31
30
|
|
32
31
|
xobjects.each do |name, stream|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
stream = page.objects.deref(stream)
|
37
|
-
|
38
|
-
if stream.hash[:Subtype] == :Image
|
32
|
+
case stream.hash[:Subtype]
|
33
|
+
when :Image then
|
39
34
|
count += 1
|
40
35
|
|
41
36
|
case stream.hash[:Filter]
|
@@ -46,8 +41,8 @@ module ExtractImages
|
|
46
41
|
else
|
47
42
|
ExtractImages::Raw.new(stream).save("#{page.number}-#{count}-#{name}.tif")
|
48
43
|
end
|
49
|
-
|
50
|
-
count =
|
44
|
+
when :Form then
|
45
|
+
count = process_page(PDF::Reader::FormXObject.new(page, stream), count)
|
51
46
|
end
|
52
47
|
end
|
53
48
|
count
|
data/lib/pdf/reader.rb
CHANGED
@@ -118,12 +118,19 @@ module PDF
|
|
118
118
|
end
|
119
119
|
|
120
120
|
def info
|
121
|
-
@objects.deref(@objects.trailer[:Info])
|
121
|
+
dict = @objects.deref(@objects.trailer[:Info])
|
122
|
+
doc_strings_to_utf8(dict)
|
122
123
|
end
|
123
124
|
|
124
125
|
def metadata
|
125
126
|
stream = @objects.deref(root[:Metadata])
|
126
|
-
|
127
|
+
if stream.nil?
|
128
|
+
nil
|
129
|
+
else
|
130
|
+
xml = stream.unfiltered_data
|
131
|
+
xml.force_encoding("utf-8") if xml.respond_to?(:force_encoding)
|
132
|
+
xml
|
133
|
+
end
|
127
134
|
end
|
128
135
|
|
129
136
|
def page_count
|
@@ -269,6 +276,46 @@ module PDF
|
|
269
276
|
|
270
277
|
private
|
271
278
|
|
279
|
+
# recursively convert strings from outside a content stream intop UTF-8
|
280
|
+
#
|
281
|
+
def doc_strings_to_utf8(obj)
|
282
|
+
case obj
|
283
|
+
when ::Hash then
|
284
|
+
{}.tap { |new_hash|
|
285
|
+
obj.each do |key, value|
|
286
|
+
new_hash[key] = doc_strings_to_utf8(value)
|
287
|
+
end
|
288
|
+
}
|
289
|
+
when Array then
|
290
|
+
obj.map { |item| doc_strings_to_utf8(item) }
|
291
|
+
when String then
|
292
|
+
if obj[0,2].unpack("C*") == [254, 255]
|
293
|
+
utf16_to_utf8(obj)
|
294
|
+
else
|
295
|
+
pdfdoc_to_utf8(obj)
|
296
|
+
end
|
297
|
+
else
|
298
|
+
obj
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# TODO find a PDF I can use to spec this behaviour
|
303
|
+
#
|
304
|
+
def pdfdoc_to_utf8(obj)
|
305
|
+
obj.force_encoding("utf-8") if obj.respond_to?(:force_encoding)
|
306
|
+
obj
|
307
|
+
end
|
308
|
+
|
309
|
+
# one day we'll all run on a 1.9 compatible VM and I can just do this with
|
310
|
+
# String#encode
|
311
|
+
#
|
312
|
+
def utf16_to_utf8(obj)
|
313
|
+
str = obj[2, obj.size]
|
314
|
+
str = str.unpack("n*").pack("U*")
|
315
|
+
str.force_encoding("utf-8") if str.respond_to?(:force_encoding)
|
316
|
+
str
|
317
|
+
end
|
318
|
+
|
272
319
|
def strategies
|
273
320
|
@strategies ||= [
|
274
321
|
::PDF::Reader::MetadataStrategy,
|
@@ -284,6 +331,7 @@ module PDF
|
|
284
331
|
end
|
285
332
|
################################################################################
|
286
333
|
|
334
|
+
require 'pdf/reader/resource_methods'
|
287
335
|
require 'pdf/reader/abstract_strategy'
|
288
336
|
require 'pdf/reader/buffer'
|
289
337
|
require 'pdf/reader/cmap'
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -151,14 +151,11 @@ class PDF::Reader
|
|
151
151
|
#
|
152
152
|
def prepare_tokens
|
153
153
|
10.times do
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
prepare_regular_token
|
160
|
-
elsif state == :inline
|
161
|
-
prepare_inline_token
|
154
|
+
case state
|
155
|
+
when :literal_string then prepare_literal_token
|
156
|
+
when :hex_string then prepare_hex_token
|
157
|
+
when :regular then prepare_regular_token
|
158
|
+
when :inline then prepare_inline_token
|
162
159
|
end
|
163
160
|
end
|
164
161
|
|
@@ -169,14 +166,12 @@ class PDF::Reader
|
|
169
166
|
# Determine the current context/state by examining the last token we found
|
170
167
|
#
|
171
168
|
def state
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
:
|
178
|
-
elsif in_content_stream? && @tokens[-1] == "ID"
|
179
|
-
:inline
|
169
|
+
case @tokens.last
|
170
|
+
when "(" then :literal_string
|
171
|
+
when "<" then :hex_string
|
172
|
+
when "stream" then :stream
|
173
|
+
when "ID"
|
174
|
+
in_content_stream? ? :inline : :regular
|
180
175
|
else
|
181
176
|
:regular
|
182
177
|
end
|
@@ -209,13 +204,18 @@ class PDF::Reader
|
|
209
204
|
def prepare_inline_token
|
210
205
|
str = ""
|
211
206
|
|
212
|
-
|
207
|
+
buffer = []
|
208
|
+
|
209
|
+
until buffer[0] =~ /\s/ && buffer[1, 2] == ["E", "I"]
|
213
210
|
chr = @io.read(1)
|
214
|
-
|
215
|
-
|
211
|
+
buffer << chr
|
212
|
+
|
213
|
+
if buffer.length > 3
|
214
|
+
str << buffer.shift
|
215
|
+
end
|
216
216
|
end
|
217
217
|
|
218
|
-
@tokens << string_token(str
|
218
|
+
@tokens << string_token(str.strip)
|
219
219
|
@io.seek(-3, IO::SEEK_CUR) unless chr.nil?
|
220
220
|
end
|
221
221
|
|
data/lib/pdf/reader/cmap.rb
CHANGED
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -137,24 +137,23 @@ class PDF::Reader
|
|
137
137
|
end
|
138
138
|
|
139
139
|
def get_mapping_file(enc)
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
files[enc]
|
140
|
+
case enc
|
141
|
+
when :"Identity-H", :"Identity-V", :UTF16Encoding then
|
142
|
+
nil
|
143
|
+
when :MacRomanEncoding then
|
144
|
+
File.dirname(__FILE__) + "/encodings/mac_roman.txt"
|
145
|
+
when :MacExpertEncoding then
|
146
|
+
File.dirname(__FILE__) + "/encodings/mac_expert.txt"
|
147
|
+
when :PDFDocEncoding then
|
148
|
+
File.dirname(__FILE__) + "/encodings/pdf_doc.txt"
|
149
|
+
when :SymbolEncoding then
|
150
|
+
File.dirname(__FILE__) + "/encodings/symbol.txt"
|
151
|
+
when :WinAnsiEncoding then
|
152
|
+
File.dirname(__FILE__) + "/encodings/win_ansi.txt"
|
153
|
+
when :ZapfDingbatsEncoding then
|
154
|
+
File.dirname(__FILE__) + "/encodings/zapf_dingbats.txt"
|
156
155
|
else
|
157
|
-
|
156
|
+
File.dirname(__FILE__) + "/encodings/standard.txt"
|
158
157
|
end
|
159
158
|
end
|
160
159
|
|
data/lib/pdf/reader/filter.rb
CHANGED
data/lib/pdf/reader/font.rb
CHANGED
@@ -39,6 +39,8 @@ class PDF::Reader
|
|
39
39
|
extract_base_info(obj)
|
40
40
|
extract_descriptor(obj)
|
41
41
|
extract_descendants(obj)
|
42
|
+
|
43
|
+
@encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
|
42
44
|
end
|
43
45
|
|
44
46
|
def basefont=(font)
|
@@ -59,10 +61,7 @@ class PDF::Reader
|
|
59
61
|
raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported" if encoding.kind_of?(String)
|
60
62
|
|
61
63
|
if params.class == String
|
62
|
-
|
63
|
-
# If an encoding hasn't been specified, assume the text using this
|
64
|
-
# font is in Adobe Standard Encoding.
|
65
|
-
(encoding || PDF::Reader::Encoding.new(:StandardEncoding)).to_utf8(params, tounicode)
|
64
|
+
encoding.to_utf8(params, tounicode)
|
66
65
|
elsif params.class == Array
|
67
66
|
params.collect { |param| self.to_utf8(param) }
|
68
67
|
else
|
@@ -11,6 +11,7 @@ module PDF
|
|
11
11
|
# This behaves and looks much like a limited PDF::Reader::Page class.
|
12
12
|
#
|
13
13
|
class FormXObject
|
14
|
+
include ResourceMethods
|
14
15
|
|
15
16
|
def initialize(page, xobject)
|
16
17
|
@page = page
|
@@ -18,12 +19,6 @@ module PDF
|
|
18
19
|
@xobject = @objects.deref(xobject)
|
19
20
|
end
|
20
21
|
|
21
|
-
# Returns the resources that accompany this form.
|
22
|
-
#
|
23
|
-
def resources
|
24
|
-
@resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
|
25
|
-
end
|
26
|
-
|
27
22
|
# return a hash of fonts used on this form.
|
28
23
|
#
|
29
24
|
# The keys are the font labels used within the form content stream.
|
@@ -31,7 +26,7 @@ module PDF
|
|
31
26
|
# The values are a PDF::Reader::Font instances that provide access
|
32
27
|
# to most available metrics for each font.
|
33
28
|
#
|
34
|
-
def
|
29
|
+
def font_objects
|
35
30
|
raw_fonts = @objects.deref(resources[:Font] || {})
|
36
31
|
::Hash[raw_fonts.map { |label, font|
|
37
32
|
[label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
|
@@ -56,6 +51,12 @@ module PDF
|
|
56
51
|
|
57
52
|
private
|
58
53
|
|
54
|
+
# Returns the resources that accompany this form.
|
55
|
+
#
|
56
|
+
def resources
|
57
|
+
@resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
|
58
|
+
end
|
59
|
+
|
59
60
|
def callback(receivers, name, params=[])
|
60
61
|
receivers.each do |receiver|
|
61
62
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
@@ -4281,3 +4281,125 @@ zstroke;01B6
|
|
4281
4281
|
zuhiragana;305A
|
4282
4282
|
zukatakana;30BA
|
4283
4283
|
#--end
|
4284
|
+
#--start wingdings
|
4285
|
+
scissors;2702
|
4286
|
+
scissorscutting;2701
|
4287
|
+
telephonesolid;260E
|
4288
|
+
telhandsetcirc;2706
|
4289
|
+
envelopeback;2709
|
4290
|
+
hourglass;231B
|
4291
|
+
keyboard;2328
|
4292
|
+
tapereel;2707
|
4293
|
+
handwrite;270D
|
4294
|
+
handv;270C
|
4295
|
+
handptleft;261C
|
4296
|
+
handptright;261E
|
4297
|
+
handptup;261D
|
4298
|
+
handptdown;261F
|
4299
|
+
handhalt;270B
|
4300
|
+
frownface;2639
|
4301
|
+
skullcrossbones;2620
|
4302
|
+
flag;2690
|
4303
|
+
airplane;2708
|
4304
|
+
sunshine;263C
|
4305
|
+
snowflake;2744
|
4306
|
+
crossshadow;271E
|
4307
|
+
crossmaltese;2720
|
4308
|
+
starofdavid;2721
|
4309
|
+
crescentstar;262A
|
4310
|
+
om;0950
|
4311
|
+
wheel;2638
|
4312
|
+
aries;2648
|
4313
|
+
taurus;2649
|
4314
|
+
gemini;264A
|
4315
|
+
cancer;264B
|
4316
|
+
leo;264C
|
4317
|
+
virgo;264D
|
4318
|
+
libra;264E
|
4319
|
+
scorpio;264F
|
4320
|
+
saggitarius;2650
|
4321
|
+
capricorn;2651
|
4322
|
+
aquarius;2652
|
4323
|
+
pisces;2653
|
4324
|
+
ampersanditlc;0026
|
4325
|
+
ampersandit;0026
|
4326
|
+
circle6;25CF
|
4327
|
+
circleshadowdwn;274D
|
4328
|
+
square6;25A0
|
4329
|
+
box3;25A1
|
4330
|
+
boxshadowdwn;2751
|
4331
|
+
boxshadowup;2752
|
4332
|
+
lozenge4;2B27
|
4333
|
+
lozenge6;29EB
|
4334
|
+
rhombus6;25C6
|
4335
|
+
xrhombus;2756
|
4336
|
+
rhombus4;2B25
|
4337
|
+
escape;2353
|
4338
|
+
command;2318
|
4339
|
+
rosette;2740
|
4340
|
+
rosettesolid;273F
|
4341
|
+
quotedbllftbld;275D
|
4342
|
+
quotedblrtbld;275E
|
4343
|
+
.notdef;25AF
|
4344
|
+
zerosans;24EA
|
4345
|
+
onesans;2460
|
4346
|
+
twosans;2461
|
4347
|
+
threesans;2462
|
4348
|
+
foursans;2463
|
4349
|
+
fivesans;2464
|
4350
|
+
sixsans;2465
|
4351
|
+
sevensans;2466
|
4352
|
+
eightsans;2467
|
4353
|
+
ninesans;2468
|
4354
|
+
tensans;2469
|
4355
|
+
zerosansinv;24FF
|
4356
|
+
onesansinv;2776
|
4357
|
+
twosansinv;2777
|
4358
|
+
threesansinv;2778
|
4359
|
+
foursansinv;2779
|
4360
|
+
fivesansinv;277A
|
4361
|
+
sixsansinv;277B
|
4362
|
+
sevensansinv;277C
|
4363
|
+
eightsansinv;277D
|
4364
|
+
ninesansinv;277E
|
4365
|
+
tensansinv;277F
|
4366
|
+
circle2;00B7
|
4367
|
+
circle4;2022
|
4368
|
+
square2;25AA
|
4369
|
+
ring2;25CB
|
4370
|
+
ring4;2B55
|
4371
|
+
ringbutton2;25C9
|
4372
|
+
target;25CE
|
4373
|
+
square4;25AA
|
4374
|
+
box2;25FB
|
4375
|
+
crosstar2;2726
|
4376
|
+
pentastar2;2605
|
4377
|
+
hexstar2;2736
|
4378
|
+
octastar2;2734
|
4379
|
+
dodecastar3;2739
|
4380
|
+
octastar4;2735
|
4381
|
+
registercircle;2316
|
4382
|
+
cuspopen;27E1
|
4383
|
+
cuspopen1;2311
|
4384
|
+
circlestar;272A
|
4385
|
+
starshadow;2730
|
4386
|
+
head2right;27A2
|
4387
|
+
circleright;27B2
|
4388
|
+
barb4right;2794
|
4389
|
+
bleft;21E6
|
4390
|
+
bright;21E8
|
4391
|
+
bup;21E7
|
4392
|
+
bdown;21E9
|
4393
|
+
bleftright;2B04
|
4394
|
+
bupdown;21F3
|
4395
|
+
bnw;2B00
|
4396
|
+
bne;2B01
|
4397
|
+
bsw;2B03
|
4398
|
+
bse;2B02
|
4399
|
+
bdash1;25AD
|
4400
|
+
bdash2;25AB
|
4401
|
+
xmarkbld;2717
|
4402
|
+
checkbld;2713
|
4403
|
+
boxxmarkbld;2612
|
4404
|
+
boxcheckbld;2611
|
4405
|
+
#--end wingdings
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -37,7 +37,7 @@ module PDF
|
|
37
37
|
while bits_left_in_chunk > 0 and @current_pos < @data.size
|
38
38
|
chunk = 0 if chunk.nil?
|
39
39
|
codepoint = @data[@current_pos, 1].unpack("C*")[0]
|
40
|
-
current_byte = codepoint & (2**@bits_left_in_byte -1) #clear consumed bits
|
40
|
+
current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
|
41
41
|
dif = bits_left_in_chunk - @bits_left_in_byte
|
42
42
|
if dif > 0 then current_byte <<= dif
|
43
43
|
elsif dif < 0 then current_byte >>= dif.abs
|
@@ -82,7 +82,7 @@ module PDF
|
|
82
82
|
def self.decode(data)
|
83
83
|
stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
|
84
84
|
result = ''
|
85
|
-
|
85
|
+
until (code = stream.read) == CODE_EOD
|
86
86
|
if code == CODE_CLEAR_TABLE
|
87
87
|
string_table = StringTable.new
|
88
88
|
code = stream.read
|
@@ -30,6 +30,7 @@ class PDF::Reader
|
|
30
30
|
|
31
31
|
attr_accessor :default
|
32
32
|
attr_reader :trailer, :pdf_version
|
33
|
+
attr_reader :sec_handler
|
33
34
|
|
34
35
|
# Creates a new ObjectHash object. Input can be a string with a valid filename
|
35
36
|
# or an IO-like object.
|
@@ -97,6 +98,27 @@ class PDF::Reader
|
|
97
98
|
end
|
98
99
|
alias :deref :object
|
99
100
|
|
101
|
+
# Recursively dereferences the object refered to be +key+. If +key+ is not
|
102
|
+
# a PDF::Reader::Reference, the key is returned unchanged.
|
103
|
+
#
|
104
|
+
def deref!(key)
|
105
|
+
case object = deref(key)
|
106
|
+
when Hash
|
107
|
+
{}.tap { |hash|
|
108
|
+
object.each do |k, value|
|
109
|
+
hash[k] = deref!(value)
|
110
|
+
end
|
111
|
+
}
|
112
|
+
when PDF::Reader::Stream
|
113
|
+
object.hash = deref!(object.hash)
|
114
|
+
object
|
115
|
+
when Array
|
116
|
+
object.map { |value| deref!(value) }
|
117
|
+
else
|
118
|
+
object
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
100
122
|
# Access an object from the PDF. key can be an int or a PDF::Reader::Reference
|
101
123
|
# object.
|
102
124
|
#
|
@@ -238,6 +260,10 @@ class PDF::Reader
|
|
238
260
|
trailer.has_key?(:Encrypt)
|
239
261
|
end
|
240
262
|
|
263
|
+
def sec_handler?
|
264
|
+
!!sec_handler
|
265
|
+
end
|
266
|
+
|
241
267
|
private
|
242
268
|
|
243
269
|
def build_security_handler(opts = {})
|
@@ -253,11 +279,11 @@ class PDF::Reader
|
|
253
279
|
end
|
254
280
|
|
255
281
|
def decrypt(ref, obj)
|
256
|
-
return obj
|
282
|
+
return obj unless sec_handler?
|
257
283
|
|
258
284
|
case obj
|
259
285
|
when PDF::Reader::Stream then
|
260
|
-
obj.data =
|
286
|
+
obj.data = sec_handler.decrypt(obj.data, ref)
|
261
287
|
obj
|
262
288
|
when Hash then
|
263
289
|
arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
|
@@ -265,7 +291,7 @@ class PDF::Reader
|
|
265
291
|
when Array then
|
266
292
|
obj.collect { |item| decrypt(ref, item) }
|
267
293
|
when String
|
268
|
-
|
294
|
+
sec_handler.decrypt(obj, ref)
|
269
295
|
else
|
270
296
|
obj
|
271
297
|
end
|
@@ -316,7 +342,7 @@ class PDF::Reader
|
|
316
342
|
if File.respond_to?(:binread)
|
317
343
|
File.binread(input.to_s)
|
318
344
|
else
|
319
|
-
File.
|
345
|
+
File.open(input.to_s,"rb") { |f| f.read }
|
320
346
|
end
|
321
347
|
end
|
322
348
|
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -12,6 +12,7 @@ module PDF
|
|
12
12
|
# objects accessor to help walk the page dictionary in any useful way.
|
13
13
|
#
|
14
14
|
class Page
|
15
|
+
include ResourceMethods
|
15
16
|
|
16
17
|
# lowlevel hash-like access to all objects in the underlying PDF
|
17
18
|
attr_reader :objects
|
@@ -45,73 +46,17 @@ module PDF
|
|
45
46
|
"<PDF::Reader::Page page: #{@pagenum}>"
|
46
47
|
end
|
47
48
|
|
48
|
-
# Returns the attributes that accompany this page
|
49
|
+
# Returns the attributes that accompany this page, including
|
49
50
|
# attributes inherited from parents.
|
50
51
|
#
|
51
52
|
def attributes
|
52
|
-
{}.tap { |hash|
|
53
|
+
@attributes ||= {}.tap { |hash|
|
53
54
|
page_with_ancestors.reverse.each do |obj|
|
54
55
|
hash.merge!(@objects.deref(obj))
|
55
56
|
end
|
56
57
|
}
|
57
58
|
end
|
58
59
|
|
59
|
-
# Returns the resources that accompany this page. Includes
|
60
|
-
# resources inherited from parents.
|
61
|
-
#
|
62
|
-
def resources
|
63
|
-
@resources ||= @objects.deref(attributes[:Resources]) || {}
|
64
|
-
end
|
65
|
-
|
66
|
-
# Returns a Hash of color spaces that are available to this page
|
67
|
-
#
|
68
|
-
def color_spaces
|
69
|
-
@objects.deref(resources[:ColorSpace]) || {}
|
70
|
-
end
|
71
|
-
|
72
|
-
# Returns a Hash of fonts that are available to this page
|
73
|
-
#
|
74
|
-
def fonts
|
75
|
-
@objects.deref(resources[:Font]) || {}
|
76
|
-
end
|
77
|
-
|
78
|
-
# Returns a Hash of external graphic states that are available to this
|
79
|
-
# page
|
80
|
-
#
|
81
|
-
def graphic_states
|
82
|
-
@objects.deref(resources[:ExtGState]) || {}
|
83
|
-
end
|
84
|
-
|
85
|
-
# Returns a Hash of patterns that are available to this page
|
86
|
-
#
|
87
|
-
def patterns
|
88
|
-
@objects.deref(resources[:Pattern]) || {}
|
89
|
-
end
|
90
|
-
|
91
|
-
# Returns an Array of procedure sets that are available to this page
|
92
|
-
#
|
93
|
-
def procedure_sets
|
94
|
-
@objects.deref(resources[:ProcSet]) || []
|
95
|
-
end
|
96
|
-
|
97
|
-
# Returns a Hash of properties sets that are available to this page
|
98
|
-
#
|
99
|
-
def properties
|
100
|
-
@objects.deref(resources[:Properties]) || {}
|
101
|
-
end
|
102
|
-
|
103
|
-
# Returns a Hash of shadings that are available to this page
|
104
|
-
#
|
105
|
-
def shadings
|
106
|
-
@objects.deref(resources[:Shading]) || {}
|
107
|
-
end
|
108
|
-
|
109
|
-
# Returns a Hash of XObjects that are available to this page
|
110
|
-
#
|
111
|
-
def xobjects
|
112
|
-
@objects.deref(resources[:XObject]) || {}
|
113
|
-
end
|
114
|
-
|
115
60
|
# returns the plain text content of this page encoded as UTF-8. Any
|
116
61
|
# characters that can't be translated will be returned as a ▯
|
117
62
|
#
|
@@ -168,6 +113,13 @@ module PDF
|
|
168
113
|
root ||= objects.deref(@objects.trailer[:Root])
|
169
114
|
end
|
170
115
|
|
116
|
+
# Returns the resources that accompany this page. Includes
|
117
|
+
# resources inherited from parents.
|
118
|
+
#
|
119
|
+
def resources
|
120
|
+
@resources ||= @objects.deref(attributes[:Resources]) || {}
|
121
|
+
end
|
122
|
+
|
171
123
|
def content_stream(receivers, instructions)
|
172
124
|
buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
|
173
125
|
parser = Parser.new(buffer, @objects)
|
@@ -29,8 +29,8 @@ module PDF
|
|
29
29
|
def page=(page)
|
30
30
|
@page = page
|
31
31
|
@objects = page.objects
|
32
|
-
@
|
33
|
-
@
|
32
|
+
@font_stack = [build_fonts(page.fonts)]
|
33
|
+
@xobject_stack = [page.xobjects]
|
34
34
|
@content = {}
|
35
35
|
@stack = [DEFAULT_GRAPHICS_STATE]
|
36
36
|
end
|
@@ -109,6 +109,10 @@ module PDF
|
|
109
109
|
state[:text_font_size] = size
|
110
110
|
end
|
111
111
|
|
112
|
+
def font_size
|
113
|
+
state[:text_font_size] * @text_matrix[0,0]
|
114
|
+
end
|
115
|
+
|
112
116
|
def set_text_leading(leading)
|
113
117
|
state[:text_leading] = leading
|
114
118
|
end
|
@@ -194,17 +198,23 @@ module PDF
|
|
194
198
|
#####################################################
|
195
199
|
def invoke_xobject(label)
|
196
200
|
save_graphics_state
|
197
|
-
|
201
|
+
dict = @xobject_stack.detect { |xobjects|
|
202
|
+
xobjects.has_key?(label)
|
203
|
+
}
|
204
|
+
xobject = dict ? dict[label] : nil
|
198
205
|
|
206
|
+
raise MalformedPDFError, "XObject #{label} not found" if xobject.nil?
|
199
207
|
matrix = xobject.hash[:Matrix]
|
200
208
|
concatenate_matrix(*matrix) if matrix
|
201
209
|
|
202
210
|
if xobject.hash[:Subtype] == :Form
|
203
211
|
form = PDF::Reader::FormXObject.new(@page, xobject)
|
204
|
-
@
|
212
|
+
@font_stack.unshift(form.font_objects)
|
213
|
+
@xobject_stack.unshift(form.xobjects)
|
205
214
|
form.walk(self)
|
215
|
+
@font_stack.shift
|
216
|
+
@xobject_stack.shift
|
206
217
|
end
|
207
|
-
@form_fonts = {}
|
208
218
|
|
209
219
|
restore_graphics_state
|
210
220
|
end
|
@@ -232,10 +242,10 @@ module PDF
|
|
232
242
|
|
233
243
|
def text_rendering_matrix
|
234
244
|
state_matrix = Matrix[
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
245
|
+
[font_size * state[:h_scaling], 0, 0],
|
246
|
+
[0, font_size, 0],
|
247
|
+
[0, state[:text_rise], 1]
|
248
|
+
]
|
239
249
|
|
240
250
|
state_matrix * @text_matrix * ctm
|
241
251
|
end
|
@@ -251,21 +261,17 @@ module PDF
|
|
251
261
|
# This returns a deep clone of the current state, ensuring changes are
|
252
262
|
# keep separate from earlier states.
|
253
263
|
#
|
254
|
-
#
|
255
|
-
# the deep clone. Kinda hacky, but effective.
|
264
|
+
# Marshal is used to round-trip the state through a string to easily
|
265
|
+
# perform the deep clone. Kinda hacky, but effective.
|
256
266
|
#
|
257
267
|
def clone_state
|
258
268
|
if @stack.empty?
|
259
269
|
{}
|
260
270
|
else
|
261
|
-
|
271
|
+
Marshal.load Marshal.dump(@stack.last)
|
262
272
|
end
|
263
273
|
end
|
264
274
|
|
265
|
-
def yaml_lib
|
266
|
-
Kernel.const_defined?("Psych") ? Psych : YAML
|
267
|
-
end
|
268
|
-
|
269
275
|
# return the current transformation matrix
|
270
276
|
#
|
271
277
|
def ctm
|
@@ -273,7 +279,10 @@ module PDF
|
|
273
279
|
end
|
274
280
|
|
275
281
|
def current_font
|
276
|
-
@
|
282
|
+
dict = @font_stack.detect { |fonts|
|
283
|
+
fonts.has_key?(state[:text_font])
|
284
|
+
}
|
285
|
+
dict ? dict[state[:text_font]] : nil
|
277
286
|
end
|
278
287
|
|
279
288
|
# private class for representing points on a cartesian plain. Used
|
@@ -350,7 +350,7 @@ class PDF::Reader
|
|
350
350
|
|
351
351
|
while (token = parser.parse_token(OPERATORS))
|
352
352
|
if token.kind_of?(Token) and OPERATORS.has_key?(token)
|
353
|
-
|
353
|
+
if OPERATORS[token] == :set_text_font_and_size
|
354
354
|
current_font = params.first
|
355
355
|
if fonts[current_font].nil?
|
356
356
|
raise MalformedPDFError, "Unknown font #{current_font}"
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -28,6 +28,31 @@ class PDF::Reader
|
|
28
28
|
# An internal PDF::Reader class that reads objects from the PDF file and converts
|
29
29
|
# them into useable ruby objects (hash's, arrays, true, false, etc)
|
30
30
|
class Parser
|
31
|
+
|
32
|
+
TOKEN_STRATEGY = proc { |parser, token| Token.new(token) }
|
33
|
+
|
34
|
+
STRATEGIES = {
|
35
|
+
"/" => proc { |parser, token| parser.send(:pdf_name) },
|
36
|
+
"<<" => proc { |parser, token| parser.send(:dictionary) },
|
37
|
+
"[" => proc { |parser, token| parser.send(:array) },
|
38
|
+
"(" => proc { |parser, token| parser.send(:string) },
|
39
|
+
"<" => proc { |parser, token| parser.send(:hex_string) },
|
40
|
+
|
41
|
+
nil => proc { nil },
|
42
|
+
"true" => proc { true },
|
43
|
+
"false" => proc { false },
|
44
|
+
"null" => proc { nil },
|
45
|
+
|
46
|
+
"obj" => TOKEN_STRATEGY,
|
47
|
+
"endobj" => TOKEN_STRATEGY,
|
48
|
+
"stream" => TOKEN_STRATEGY,
|
49
|
+
"endstream" => TOKEN_STRATEGY,
|
50
|
+
">>" => TOKEN_STRATEGY,
|
51
|
+
"]" => TOKEN_STRATEGY,
|
52
|
+
">" => TOKEN_STRATEGY,
|
53
|
+
")" => TOKEN_STRATEGY
|
54
|
+
}
|
55
|
+
|
31
56
|
################################################################################
|
32
57
|
# Create a new parser around a PDF::Reader::Buffer object
|
33
58
|
#
|
@@ -45,25 +70,20 @@ class PDF::Reader
|
|
45
70
|
def parse_token (operators={})
|
46
71
|
token = @buffer.token
|
47
72
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
when ">>", "]", ">", ")" then return Token.new(token)
|
73
|
+
if STRATEGIES.has_key? token
|
74
|
+
STRATEGIES[token].call(self, token)
|
75
|
+
elsif token.is_a? PDF::Reader::Reference
|
76
|
+
token
|
77
|
+
elsif token.is_a? Token
|
78
|
+
token
|
79
|
+
elsif operators.has_key? token
|
80
|
+
Token.new(token)
|
81
|
+
elsif token.respond_to?(:to_token)
|
82
|
+
token.to_token
|
83
|
+
elsif token =~ /\d*\.\d/
|
84
|
+
token.to_f
|
61
85
|
else
|
62
|
-
|
63
|
-
elsif operators.has_key?(token) then return Token.new(token)
|
64
|
-
elsif token =~ /\d*\.\d/ then return token.to_f
|
65
|
-
else return token.to_i
|
66
|
-
end
|
86
|
+
token.to_i
|
67
87
|
end
|
68
88
|
end
|
69
89
|
################################################################################
|
@@ -110,9 +130,8 @@ class PDF::Reader
|
|
110
130
|
# reads a PDF name from the buffer and converts it to a Ruby Symbol
|
111
131
|
def pdf_name
|
112
132
|
tok = @buffer.token
|
113
|
-
tok.
|
114
|
-
|
115
|
-
tok.gsub!("#"+find[0], replace)
|
133
|
+
tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match|
|
134
|
+
match[1, 2].hex.chr
|
116
135
|
end
|
117
136
|
tok.to_sym
|
118
137
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
module PDF
|
4
|
+
class Reader
|
5
|
+
|
6
|
+
# mixin for common methods in Page and FormXobjects
|
7
|
+
#
|
8
|
+
module ResourceMethods
|
9
|
+
# Returns a Hash of color spaces that are available to this page
|
10
|
+
#
|
11
|
+
def color_spaces
|
12
|
+
@objects.deref!(resources[:ColorSpace]) || {}
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns a Hash of fonts that are available to this page
|
16
|
+
#
|
17
|
+
def fonts
|
18
|
+
@objects.deref!(resources[:Font]) || {}
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns a Hash of external graphic states that are available to this
|
22
|
+
# page
|
23
|
+
#
|
24
|
+
def graphic_states
|
25
|
+
@objects.deref!(resources[:ExtGState]) || {}
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns a Hash of patterns that are available to this page
|
29
|
+
#
|
30
|
+
def patterns
|
31
|
+
@objects.deref!(resources[:Pattern]) || {}
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns an Array of procedure sets that are available to this page
|
35
|
+
#
|
36
|
+
def procedure_sets
|
37
|
+
@objects.deref!(resources[:ProcSet]) || []
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns a Hash of properties sets that are available to this page
|
41
|
+
#
|
42
|
+
def properties
|
43
|
+
@objects.deref!(resources[:Properties]) || {}
|
44
|
+
end
|
45
|
+
|
46
|
+
# Returns a Hash of shadings that are available to this page
|
47
|
+
#
|
48
|
+
def shadings
|
49
|
+
@objects.deref!(resources[:Shading]) || {}
|
50
|
+
end
|
51
|
+
|
52
|
+
# Returns a Hash of XObjects that are available to this page
|
53
|
+
#
|
54
|
+
def xobjects
|
55
|
+
@objects.deref!(resources[:XObject]) || {}
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -146,7 +146,7 @@ class PDF::Reader
|
|
146
146
|
# Read a XReaf stream from the underlying buffer instead of a traditional xref table.
|
147
147
|
#
|
148
148
|
def load_xref_stream(stream)
|
149
|
-
unless stream.hash[:Type] == :XRef
|
149
|
+
unless stream.is_a?(PDF::Reader::Stream) && stream.hash[:Type] == :XRef
|
150
150
|
raise PDF::Reader::MalformedPDFError, "xref stream not found when expected"
|
151
151
|
end
|
152
152
|
trailer = Hash[stream.hash.select { |key, value|
|
metadata
CHANGED
@@ -1,122 +1,98 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 1
|
7
|
-
- 0
|
8
|
-
- 0
|
9
|
-
- beta1
|
10
|
-
version: 1.0.0.beta1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0.rc1
|
5
|
+
prerelease: 6
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- James Healy
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-12-19 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
22
15
|
name: rake
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: &19650680 !ruby/object:Gem::Requirement
|
25
17
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
|
30
|
-
- 0
|
31
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :development
|
33
|
-
version_requirements: *id001
|
34
|
-
- !ruby/object:Gem::Dependency
|
35
|
-
name: roodi
|
36
23
|
prerelease: false
|
37
|
-
|
24
|
+
version_requirements: *19650680
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: roodi
|
27
|
+
requirement: &19650220 !ruby/object:Gem::Requirement
|
38
28
|
none: false
|
39
|
-
requirements:
|
40
|
-
- -
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
|
43
|
-
- 0
|
44
|
-
version: "0"
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
45
33
|
type: :development
|
46
|
-
version_requirements: *id002
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: rspec
|
49
34
|
prerelease: false
|
50
|
-
|
35
|
+
version_requirements: *19650220
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rspec
|
38
|
+
requirement: &19649720 !ruby/object:Gem::Requirement
|
51
39
|
none: false
|
52
|
-
requirements:
|
40
|
+
requirements:
|
53
41
|
- - ~>
|
54
|
-
- !ruby/object:Gem::Version
|
55
|
-
|
56
|
-
- 2
|
57
|
-
- 3
|
58
|
-
version: "2.3"
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '2.3'
|
59
44
|
type: :development
|
60
|
-
version_requirements: *id003
|
61
|
-
- !ruby/object:Gem::Dependency
|
62
|
-
name: ZenTest
|
63
45
|
prerelease: false
|
64
|
-
|
46
|
+
version_requirements: *19649720
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: ZenTest
|
49
|
+
requirement: &19649220 !ruby/object:Gem::Requirement
|
65
50
|
none: false
|
66
|
-
requirements:
|
51
|
+
requirements:
|
67
52
|
- - ~>
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
segments:
|
70
|
-
- 4
|
71
|
-
- 4
|
72
|
-
- 2
|
53
|
+
- !ruby/object:Gem::Version
|
73
54
|
version: 4.4.2
|
74
55
|
type: :development
|
75
|
-
version_requirements: *id004
|
76
|
-
- !ruby/object:Gem::Dependency
|
77
|
-
name: Ascii85
|
78
56
|
prerelease: false
|
79
|
-
|
57
|
+
version_requirements: *19649220
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: Ascii85
|
60
|
+
requirement: &19648740 !ruby/object:Gem::Requirement
|
80
61
|
none: false
|
81
|
-
requirements:
|
62
|
+
requirements:
|
82
63
|
- - ~>
|
83
|
-
- !ruby/object:Gem::Version
|
84
|
-
segments:
|
85
|
-
- 1
|
86
|
-
- 0
|
87
|
-
- 0
|
64
|
+
- !ruby/object:Gem::Version
|
88
65
|
version: 1.0.0
|
89
66
|
type: :runtime
|
90
|
-
version_requirements: *id005
|
91
|
-
- !ruby/object:Gem::Dependency
|
92
|
-
name: ruby-rc4
|
93
67
|
prerelease: false
|
94
|
-
|
68
|
+
version_requirements: *19648740
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: ruby-rc4
|
71
|
+
requirement: &19648280 !ruby/object:Gem::Requirement
|
95
72
|
none: false
|
96
|
-
requirements:
|
97
|
-
- -
|
98
|
-
- !ruby/object:Gem::Version
|
99
|
-
|
100
|
-
- 0
|
101
|
-
version: "0"
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
102
77
|
type: :runtime
|
103
|
-
|
104
|
-
|
105
|
-
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *19648280
|
80
|
+
description: The PDF::Reader library implements a PDF parser conforming as much as
|
81
|
+
possible to the PDF specification from Adobe
|
82
|
+
email:
|
106
83
|
- jimmy@deefa.com
|
107
|
-
executables:
|
84
|
+
executables:
|
108
85
|
- pdf_object
|
109
86
|
- pdf_text
|
110
87
|
- pdf_list_callbacks
|
111
88
|
- pdf_callbacks
|
112
89
|
extensions: []
|
113
|
-
|
114
|
-
extra_rdoc_files:
|
90
|
+
extra_rdoc_files:
|
115
91
|
- README.rdoc
|
116
92
|
- TODO
|
117
93
|
- CHANGELOG
|
118
94
|
- MIT-LICENSE
|
119
|
-
files:
|
95
|
+
files:
|
120
96
|
- examples/metadata.rb
|
121
97
|
- examples/extract_images.rb
|
122
98
|
- examples/extract_bates.rb
|
@@ -161,6 +137,7 @@ files:
|
|
161
137
|
- lib/pdf/reader/encodings/zapf_dingbats.txt
|
162
138
|
- lib/pdf/reader/encodings/pdf_doc.txt
|
163
139
|
- lib/pdf/reader/encodings/mac_expert.txt
|
140
|
+
- lib/pdf/reader/resource_methods.rb
|
164
141
|
- lib/pdf/reader/metadata_strategy.rb
|
165
142
|
- lib/pdf/reader/token.rb
|
166
143
|
- lib/pdf-reader.rb
|
@@ -173,45 +150,39 @@ files:
|
|
173
150
|
- bin/pdf_text
|
174
151
|
- bin/pdf_list_callbacks
|
175
152
|
- bin/pdf_callbacks
|
176
|
-
has_rdoc: true
|
177
153
|
homepage: http://github.com/yob/pdf-reader
|
178
154
|
licenses: []
|
179
|
-
|
180
|
-
|
181
|
-
|
155
|
+
post_install_message: ! "\n ********************************************\n\n This
|
156
|
+
is a beta release of PDF::Reader to gather feedback on the proposed\n API changes.\n\n
|
157
|
+
\ The old API is marked as deprecated but will continue to work with no\n visible
|
158
|
+
warnings for now.\n\n The new API is documented in the README and in rdoc for the
|
159
|
+
PDF::Reader,\n PDF::Reader::Page and PDF::Reader::ObjectHash classes.\n\n Do not
|
160
|
+
use this in production, stick to stable releases for that. If you do\n take the
|
161
|
+
new API for a spin, please send any feedback my way.\n\n ********************************************\n\n"
|
162
|
+
rdoc_options:
|
182
163
|
- --title
|
183
164
|
- PDF::Reader Documentation
|
184
165
|
- --main
|
185
166
|
- README.rdoc
|
186
167
|
- -q
|
187
|
-
require_paths:
|
168
|
+
require_paths:
|
188
169
|
- lib
|
189
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
170
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
190
171
|
none: false
|
191
|
-
requirements:
|
192
|
-
- -
|
193
|
-
- !ruby/object:Gem::Version
|
194
|
-
segments:
|
195
|
-
- 1
|
196
|
-
- 8
|
197
|
-
- 7
|
172
|
+
requirements:
|
173
|
+
- - ! '>='
|
174
|
+
- !ruby/object:Gem::Version
|
198
175
|
version: 1.8.7
|
199
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
176
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
200
177
|
none: false
|
201
|
-
requirements:
|
202
|
-
- -
|
203
|
-
- !ruby/object:Gem::Version
|
204
|
-
segments:
|
205
|
-
- 1
|
206
|
-
- 3
|
207
|
-
- 1
|
178
|
+
requirements:
|
179
|
+
- - ! '>'
|
180
|
+
- !ruby/object:Gem::Version
|
208
181
|
version: 1.3.1
|
209
182
|
requirements: []
|
210
|
-
|
211
183
|
rubyforge_project:
|
212
|
-
rubygems_version: 1.
|
184
|
+
rubygems_version: 1.8.11
|
213
185
|
signing_key:
|
214
186
|
specification_version: 3
|
215
187
|
summary: A library for accessing the content of PDF files
|
216
188
|
test_files: []
|
217
|
-
|