pdf-reader 0.5.1 → 0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -0
- data/README +22 -0
- data/Rakefile +1 -1
- data/TODO +21 -3
- data/lib/pdf/reader.rb +4 -1
- data/lib/pdf/reader/buffer.rb +3 -3
- data/lib/pdf/reader/cmap.rb +48 -0
- data/lib/pdf/reader/content.rb +37 -3
- data/lib/pdf/reader/encoding.rb +1012 -0
- data/lib/pdf/reader/explore.rb +1 -1
- data/lib/pdf/reader/filter.rb +1 -1
- data/lib/pdf/reader/font.rb +75 -0
- data/lib/pdf/reader/glyphlist.txt +4322 -0
- data/lib/pdf/reader/parser.rb +29 -27
- data/lib/pdf/reader/register_receiver.rb +48 -1
- data/lib/pdf/reader/xref.rb +1 -0
- metadata +6 -2
data/lib/pdf/reader/parser.rb
CHANGED
@@ -47,19 +47,19 @@ class PDF::Reader
|
|
47
47
|
token = @buffer.token
|
48
48
|
|
49
49
|
case token
|
50
|
-
when "/"
|
51
|
-
when "<<"
|
52
|
-
when "["
|
53
|
-
when "("
|
54
|
-
when "<"
|
55
|
-
when "true"
|
56
|
-
when "false"
|
57
|
-
when "null"
|
58
|
-
when "obj", "endobj"
|
59
|
-
when "stream", "endstream"
|
60
|
-
when ">>", "]", ">"
|
50
|
+
when "/" then return Name.new(@buffer.token)
|
51
|
+
when "<<" then return dictionary()
|
52
|
+
when "[" then return array()
|
53
|
+
when "(" then return string()
|
54
|
+
when "<" then return hex_string()
|
55
|
+
when "true" then return true
|
56
|
+
when "false" then return false
|
57
|
+
when "null" then return nil
|
58
|
+
when "obj", "endobj" then return Token.new(token)
|
59
|
+
when "stream", "endstream" then return Token.new(token)
|
60
|
+
when ">>", "]", ">" then return Token.new(token)
|
61
61
|
else
|
62
|
-
if operators.has_key?(token)
|
62
|
+
if operators.has_key?(token) then return Token.new(token)
|
63
63
|
else return token.to_f
|
64
64
|
end
|
65
65
|
end
|
@@ -132,15 +132,15 @@ class PDF::Reader
|
|
132
132
|
when "\\"
|
133
133
|
to_remove = 2
|
134
134
|
case @buffer.raw[1, 1]
|
135
|
-
when ""
|
136
|
-
when "n"
|
137
|
-
when "r"
|
138
|
-
when "t"
|
139
|
-
when "b"
|
140
|
-
when "f"
|
141
|
-
when "("
|
142
|
-
when ")"
|
143
|
-
when "\\"
|
135
|
+
when "" then to_remove = 1
|
136
|
+
when "n" then str << "\n"
|
137
|
+
when "r" then str << "\r"
|
138
|
+
when "t" then str << "\t"
|
139
|
+
when "b" then str << "\b"
|
140
|
+
when "f" then str << "\f"
|
141
|
+
when "(" then str << "("
|
142
|
+
when ")" then str << ")"
|
143
|
+
when "\\" then str << "\\"
|
144
144
|
else
|
145
145
|
if m = @buffer.raw.match(/^\\(\d{1,3})/)
|
146
146
|
to_remove = m[0].size
|
@@ -168,8 +168,8 @@ class PDF::Reader
|
|
168
168
|
post_obj = parse_token
|
169
169
|
|
170
170
|
case post_obj
|
171
|
-
when "endobj"
|
172
|
-
when "stream"
|
171
|
+
when "endobj" then return obj
|
172
|
+
when "stream" then return stream(obj)
|
173
173
|
else raise MalformedPDFError, "PDF malformed, unexpected token #{post_obj}"
|
174
174
|
end
|
175
175
|
end
|
@@ -177,8 +177,7 @@ class PDF::Reader
|
|
177
177
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
178
178
|
def stream (dict)
|
179
179
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?('Length')
|
180
|
-
|
181
|
-
data = @buffer.read(dict['Length'])
|
180
|
+
data = @buffer.read(@xref.object(dict['Length']))
|
182
181
|
Error.str_assert(parse_token, "endstream")
|
183
182
|
Error.str_assert(parse_token, "endobj")
|
184
183
|
|
@@ -186,14 +185,17 @@ class PDF::Reader
|
|
186
185
|
options = []
|
187
186
|
|
188
187
|
if dict.has_key?('DecodeParms')
|
189
|
-
options = dict['DecodeParms']
|
188
|
+
options = Array(dict['DecodeParms'])
|
190
189
|
end
|
191
190
|
|
192
|
-
dict['Filter'].
|
191
|
+
Array(dict['Filter']).each_with_index do |filter, index|
|
193
192
|
data = Filter.new(filter, options[index]).filter(data)
|
194
193
|
end
|
195
194
|
end
|
196
195
|
|
196
|
+
# this stream is a cmap
|
197
|
+
data = PDF::Reader::CMap.new(data) if data.include?("begincmap") && data.include?("endcmap")
|
198
|
+
|
197
199
|
data
|
198
200
|
end
|
199
201
|
################################################################################
|
@@ -12,7 +12,54 @@ class PDF::Reader
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def method_missing(methodname, *args)
|
15
|
-
callbacks << methodname.to_sym
|
15
|
+
callbacks << {:name => methodname.to_sym, :args => args}
|
16
|
+
end
|
17
|
+
|
18
|
+
# count the number of times a callback fired
|
19
|
+
def count(methodname)
|
20
|
+
counter = 0
|
21
|
+
callbacks.each { |cb| counter += 1 if cb[:name] == methodname}
|
22
|
+
return counter
|
23
|
+
end
|
24
|
+
|
25
|
+
# return the details for the first time the specified callback was fired
|
26
|
+
def first_occurance_of(methodname)
|
27
|
+
callbacks.each do |cb|
|
28
|
+
return cb if cb[:name] == methodname
|
29
|
+
end
|
30
|
+
return nil
|
31
|
+
end
|
32
|
+
|
33
|
+
# return the details for the final time the specified callback was fired
|
34
|
+
def final_occurance_of(methodname)
|
35
|
+
returnme = nil
|
36
|
+
callbacks.each do |cb|
|
37
|
+
returnme = cb if cb[:name] == methodname
|
38
|
+
end
|
39
|
+
return returnme
|
40
|
+
end
|
41
|
+
|
42
|
+
# return the first occurance of a particular series of callbacks
|
43
|
+
def series(*methods)
|
44
|
+
return nil if methods.empty?
|
45
|
+
|
46
|
+
indexes = (0..(callbacks.size-1-methods.size))
|
47
|
+
method_indexes = (0..(methods.size-1))
|
48
|
+
match = nil
|
49
|
+
|
50
|
+
indexes.each do |idx|
|
51
|
+
count = methods.size
|
52
|
+
method_indexes.each do |midx|
|
53
|
+
count -= 1 if callbacks[idx+midx][:name] == methods[midx]
|
54
|
+
end
|
55
|
+
match = idx and break if count == 0
|
56
|
+
end
|
57
|
+
|
58
|
+
if match
|
59
|
+
return callbacks[match, methods.size]
|
60
|
+
else
|
61
|
+
return nil
|
62
|
+
end
|
16
63
|
end
|
17
64
|
end
|
18
65
|
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -54,6 +54,7 @@ class PDF::Reader
|
|
54
54
|
# by specifying a PDF::Reader::Reference object that contains the objects ID and revision
|
55
55
|
# number
|
56
56
|
def object (ref, save_pos = true)
|
57
|
+
return ref unless ref.kind_of?(Reference)
|
57
58
|
pos = @buffer.pos if save_pos
|
58
59
|
parser = Parser.new(@buffer.seek(offset_for(ref)), self).object(ref.id, ref.gen)
|
59
60
|
@buffer.seek(pos) if save_pos
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: "0.6"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Jones
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-02-26 00:00:00 +11:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -37,7 +37,11 @@ files:
|
|
37
37
|
- lib/pdf/reader/error.rb
|
38
38
|
- lib/pdf/reader/content.rb
|
39
39
|
- lib/pdf/reader/parser.rb
|
40
|
+
- lib/pdf/reader/cmap.rb
|
41
|
+
- lib/pdf/reader/encoding.rb
|
40
42
|
- lib/pdf/reader/register_receiver.rb
|
43
|
+
- lib/pdf/reader/font.rb
|
44
|
+
- lib/pdf/reader/glyphlist.txt
|
41
45
|
- lib/pdf/reader.rb
|
42
46
|
- Rakefile
|
43
47
|
- README
|