pdf-reader 0.5.1 → 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/README +22 -0
- data/Rakefile +1 -1
- data/TODO +21 -3
- data/lib/pdf/reader.rb +4 -1
- data/lib/pdf/reader/buffer.rb +3 -3
- data/lib/pdf/reader/cmap.rb +48 -0
- data/lib/pdf/reader/content.rb +37 -3
- data/lib/pdf/reader/encoding.rb +1012 -0
- data/lib/pdf/reader/explore.rb +1 -1
- data/lib/pdf/reader/filter.rb +1 -1
- data/lib/pdf/reader/font.rb +75 -0
- data/lib/pdf/reader/glyphlist.txt +4322 -0
- data/lib/pdf/reader/parser.rb +29 -27
- data/lib/pdf/reader/register_receiver.rb +48 -1
- data/lib/pdf/reader/xref.rb +1 -0
- metadata +6 -2
data/lib/pdf/reader/parser.rb
CHANGED
@@ -47,19 +47,19 @@ class PDF::Reader
|
|
47
47
|
token = @buffer.token
|
48
48
|
|
49
49
|
case token
|
50
|
-
when "/"
|
51
|
-
when "<<"
|
52
|
-
when "["
|
53
|
-
when "("
|
54
|
-
when "<"
|
55
|
-
when "true"
|
56
|
-
when "false"
|
57
|
-
when "null"
|
58
|
-
when "obj", "endobj"
|
59
|
-
when "stream", "endstream"
|
60
|
-
when ">>", "]", ">"
|
50
|
+
when "/" then return Name.new(@buffer.token)
|
51
|
+
when "<<" then return dictionary()
|
52
|
+
when "[" then return array()
|
53
|
+
when "(" then return string()
|
54
|
+
when "<" then return hex_string()
|
55
|
+
when "true" then return true
|
56
|
+
when "false" then return false
|
57
|
+
when "null" then return nil
|
58
|
+
when "obj", "endobj" then return Token.new(token)
|
59
|
+
when "stream", "endstream" then return Token.new(token)
|
60
|
+
when ">>", "]", ">" then return Token.new(token)
|
61
61
|
else
|
62
|
-
if operators.has_key?(token)
|
62
|
+
if operators.has_key?(token) then return Token.new(token)
|
63
63
|
else return token.to_f
|
64
64
|
end
|
65
65
|
end
|
@@ -132,15 +132,15 @@ class PDF::Reader
|
|
132
132
|
when "\\"
|
133
133
|
to_remove = 2
|
134
134
|
case @buffer.raw[1, 1]
|
135
|
-
when ""
|
136
|
-
when "n"
|
137
|
-
when "r"
|
138
|
-
when "t"
|
139
|
-
when "b"
|
140
|
-
when "f"
|
141
|
-
when "("
|
142
|
-
when ")"
|
143
|
-
when "\\"
|
135
|
+
when "" then to_remove = 1
|
136
|
+
when "n" then str << "\n"
|
137
|
+
when "r" then str << "\r"
|
138
|
+
when "t" then str << "\t"
|
139
|
+
when "b" then str << "\b"
|
140
|
+
when "f" then str << "\f"
|
141
|
+
when "(" then str << "("
|
142
|
+
when ")" then str << ")"
|
143
|
+
when "\\" then str << "\\"
|
144
144
|
else
|
145
145
|
if m = @buffer.raw.match(/^\\(\d{1,3})/)
|
146
146
|
to_remove = m[0].size
|
@@ -168,8 +168,8 @@ class PDF::Reader
|
|
168
168
|
post_obj = parse_token
|
169
169
|
|
170
170
|
case post_obj
|
171
|
-
when "endobj"
|
172
|
-
when "stream"
|
171
|
+
when "endobj" then return obj
|
172
|
+
when "stream" then return stream(obj)
|
173
173
|
else raise MalformedPDFError, "PDF malformed, unexpected token #{post_obj}"
|
174
174
|
end
|
175
175
|
end
|
@@ -177,8 +177,7 @@ class PDF::Reader
|
|
177
177
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
178
178
|
def stream (dict)
|
179
179
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?('Length')
|
180
|
-
|
181
|
-
data = @buffer.read(dict['Length'])
|
180
|
+
data = @buffer.read(@xref.object(dict['Length']))
|
182
181
|
Error.str_assert(parse_token, "endstream")
|
183
182
|
Error.str_assert(parse_token, "endobj")
|
184
183
|
|
@@ -186,14 +185,17 @@ class PDF::Reader
|
|
186
185
|
options = []
|
187
186
|
|
188
187
|
if dict.has_key?('DecodeParms')
|
189
|
-
options = dict['DecodeParms']
|
188
|
+
options = Array(dict['DecodeParms'])
|
190
189
|
end
|
191
190
|
|
192
|
-
dict['Filter'].
|
191
|
+
Array(dict['Filter']).each_with_index do |filter, index|
|
193
192
|
data = Filter.new(filter, options[index]).filter(data)
|
194
193
|
end
|
195
194
|
end
|
196
195
|
|
196
|
+
# this stream is a cmap
|
197
|
+
data = PDF::Reader::CMap.new(data) if data.include?("begincmap") && data.include?("endcmap")
|
198
|
+
|
197
199
|
data
|
198
200
|
end
|
199
201
|
################################################################################
|
@@ -12,7 +12,54 @@ class PDF::Reader
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def method_missing(methodname, *args)
|
15
|
-
callbacks << methodname.to_sym
|
15
|
+
callbacks << {:name => methodname.to_sym, :args => args}
|
16
|
+
end
|
17
|
+
|
18
|
+
# count the number of times a callback fired
|
19
|
+
def count(methodname)
|
20
|
+
counter = 0
|
21
|
+
callbacks.each { |cb| counter += 1 if cb[:name] == methodname}
|
22
|
+
return counter
|
23
|
+
end
|
24
|
+
|
25
|
+
# return the details for the first time the specified callback was fired
|
26
|
+
def first_occurance_of(methodname)
|
27
|
+
callbacks.each do |cb|
|
28
|
+
return cb if cb[:name] == methodname
|
29
|
+
end
|
30
|
+
return nil
|
31
|
+
end
|
32
|
+
|
33
|
+
# return the details for the final time the specified callback was fired
|
34
|
+
def final_occurance_of(methodname)
|
35
|
+
returnme = nil
|
36
|
+
callbacks.each do |cb|
|
37
|
+
returnme = cb if cb[:name] == methodname
|
38
|
+
end
|
39
|
+
return returnme
|
40
|
+
end
|
41
|
+
|
42
|
+
# return the first occurance of a particular series of callbacks
|
43
|
+
def series(*methods)
|
44
|
+
return nil if methods.empty?
|
45
|
+
|
46
|
+
indexes = (0..(callbacks.size-1-methods.size))
|
47
|
+
method_indexes = (0..(methods.size-1))
|
48
|
+
match = nil
|
49
|
+
|
50
|
+
indexes.each do |idx|
|
51
|
+
count = methods.size
|
52
|
+
method_indexes.each do |midx|
|
53
|
+
count -= 1 if callbacks[idx+midx][:name] == methods[midx]
|
54
|
+
end
|
55
|
+
match = idx and break if count == 0
|
56
|
+
end
|
57
|
+
|
58
|
+
if match
|
59
|
+
return callbacks[match, methods.size]
|
60
|
+
else
|
61
|
+
return nil
|
62
|
+
end
|
16
63
|
end
|
17
64
|
end
|
18
65
|
end
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -54,6 +54,7 @@ class PDF::Reader
|
|
54
54
|
# by specifying a PDF::Reader::Reference object that contains the objects ID and revision
|
55
55
|
# number
|
56
56
|
def object (ref, save_pos = true)
|
57
|
+
return ref unless ref.kind_of?(Reference)
|
57
58
|
pos = @buffer.pos if save_pos
|
58
59
|
parser = Parser.new(@buffer.seek(offset_for(ref)), self).object(ref.id, ref.gen)
|
59
60
|
@buffer.seek(pos) if save_pos
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: "0.6"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Jones
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-02-26 00:00:00 +11:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -37,7 +37,11 @@ files:
|
|
37
37
|
- lib/pdf/reader/error.rb
|
38
38
|
- lib/pdf/reader/content.rb
|
39
39
|
- lib/pdf/reader/parser.rb
|
40
|
+
- lib/pdf/reader/cmap.rb
|
41
|
+
- lib/pdf/reader/encoding.rb
|
40
42
|
- lib/pdf/reader/register_receiver.rb
|
43
|
+
- lib/pdf/reader/font.rb
|
44
|
+
- lib/pdf/reader/glyphlist.txt
|
41
45
|
- lib/pdf/reader.rb
|
42
46
|
- Rakefile
|
43
47
|
- README
|