pdf-reader 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +2 -0
- data/README +177 -0
- data/Rakefile +84 -0
- data/TODO +9 -0
- data/lib/pdf/reader.rb +106 -0
- data/lib/pdf/reader/buffer.rb +144 -0
- data/lib/pdf/reader/content.rb +289 -0
- data/lib/pdf/reader/error.rb +53 -0
- data/lib/pdf/reader/explore.rb +116 -0
- data/lib/pdf/reader/filter.rb +62 -0
- data/lib/pdf/reader/name.rb +37 -0
- data/lib/pdf/reader/parser.rb +203 -0
- data/lib/pdf/reader/reference.rb +55 -0
- data/lib/pdf/reader/register_receiver.rb +18 -0
- data/lib/pdf/reader/text_receiver.rb +259 -0
- data/lib/pdf/reader/token.rb +41 -0
- data/lib/pdf/reader/xref.rb +101 -0
- metadata +70 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
require 'zlib'
|
26
|
+
|
27
|
+
class PDF::Reader
|
28
|
+
################################################################################
|
29
|
+
# Various parts of a PDF file can be passed through a filter before being stored to provide
|
30
|
+
# support for features like compression and encryption. This class is for decoding that
|
31
|
+
# content.
|
32
|
+
#
|
33
|
+
# Currently only 1 filter type is supported. Hopefully support for others will be added
|
34
|
+
# in the future.
|
35
|
+
class Filter
|
36
|
+
################################################################################
|
37
|
+
# creates a new filter for decoding content
|
38
|
+
def initialize (name, options)
|
39
|
+
@options = options
|
40
|
+
|
41
|
+
case name
|
42
|
+
when "FlateDecode" : @filter = :flate
|
43
|
+
else raise UnsupportedFeatureError, "Unknown filter: #{name}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
################################################################################
|
47
|
+
# attempts to decode the specified data with the current filter
|
48
|
+
def filter (data)
|
49
|
+
self.send(@filter, data)
|
50
|
+
end
|
51
|
+
################################################################################
|
52
|
+
# Decode the specified data with the Zlib compression algorithm
|
53
|
+
def flate (data)
|
54
|
+
z = Zlib::Inflate.new
|
55
|
+
z << data
|
56
|
+
z.inflate(nil)
|
57
|
+
end
|
58
|
+
################################################################################
|
59
|
+
end
|
60
|
+
################################################################################
|
61
|
+
end
|
62
|
+
################################################################################
|
@@ -0,0 +1,37 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
class Name < String
|
29
|
+
################################################################################
|
30
|
+
def initialize (val)
|
31
|
+
super
|
32
|
+
end
|
33
|
+
################################################################################
|
34
|
+
end
|
35
|
+
################################################################################
|
36
|
+
end
|
37
|
+
################################################################################
|
@@ -0,0 +1,203 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An internal PDF::Reader class that reads objects from the PDF file and converts
|
29
|
+
# them into useable ruby objects (hash's, arrays, true, false, etc)
|
30
|
+
class Parser
|
31
|
+
################################################################################
|
32
|
+
# Create a new parser around a PDF::Reader::Buffer object
|
33
|
+
#
|
34
|
+
# buffer - a PDF::Reader::Buffer object that contains PDF data
|
35
|
+
# xref - an integer that specifies the byte offset of the xref table in the buffer
|
36
|
+
def initialize (buffer, xref)
|
37
|
+
@buffer = buffer
|
38
|
+
@xref = xref
|
39
|
+
end
|
40
|
+
################################################################################
|
41
|
+
# Reads the next token from the underlying buffer and convets it to an appropriate
|
42
|
+
# object
|
43
|
+
#
|
44
|
+
# operators - a hash of supported operators to read from the underlying buffer.
|
45
|
+
def parse_token (operators={})
|
46
|
+
ref = Reference.from_buffer(@buffer) and return ref
|
47
|
+
token = @buffer.token
|
48
|
+
|
49
|
+
case token
|
50
|
+
when "/" : return Name.new(@buffer.token)
|
51
|
+
when "<<" : return dictionary()
|
52
|
+
when "[" : return array()
|
53
|
+
when "(" : return string()
|
54
|
+
when "<" : return hex_string()
|
55
|
+
when "true" : return true
|
56
|
+
when "false" : return false
|
57
|
+
when "null" : return nil
|
58
|
+
when "obj", "endobj" : return Token.new(token)
|
59
|
+
when "stream", "endstream" : return Token.new(token)
|
60
|
+
when ">>", "]", ">" : return Token.new(token)
|
61
|
+
else
|
62
|
+
if operators.has_key?(token) : return Token.new(token)
|
63
|
+
else return token.to_f
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
################################################################################
|
68
|
+
# reads a PDF dict from the buffer and converts it to a Ruby Hash.
|
69
|
+
def dictionary
|
70
|
+
dict = {}
|
71
|
+
|
72
|
+
loop do
|
73
|
+
key = parse_token
|
74
|
+
break if key.kind_of?(Token) and key == ">>"
|
75
|
+
raise MalformedPDFError, "PDF malformed, dictionary key is not a name" unless key.kind_of?(Name)
|
76
|
+
|
77
|
+
value = parse_token
|
78
|
+
value.kind_of?(Token) and Error.str_assert_not(value, ">>")
|
79
|
+
dict[key] = value
|
80
|
+
end
|
81
|
+
|
82
|
+
dict
|
83
|
+
end
|
84
|
+
################################################################################
|
85
|
+
# reads a PDF array from the buffer and converts it to a Ruby Array.
|
86
|
+
def array
|
87
|
+
a = []
|
88
|
+
|
89
|
+
loop do
|
90
|
+
item = parse_token
|
91
|
+
break if item.kind_of?(Token) and item == "]"
|
92
|
+
a << item
|
93
|
+
end
|
94
|
+
|
95
|
+
a
|
96
|
+
end
|
97
|
+
################################################################################
|
98
|
+
# Reads a PDF hex string from the buffer and converts it to a Ruby String
|
99
|
+
def hex_string
|
100
|
+
str = @buffer.token
|
101
|
+
Error.str_assert(@buffer.token, ">")
|
102
|
+
|
103
|
+
str << "0" unless str.size % 2 == 0
|
104
|
+
str.scan(/../).map {|i| i.hex.chr}.join
|
105
|
+
end
|
106
|
+
################################################################################
|
107
|
+
# Reads a PDF String from the buffer and converts it to a Ruby String
|
108
|
+
def string
|
109
|
+
str = ""
|
110
|
+
count = 1
|
111
|
+
|
112
|
+
while count != 0
|
113
|
+
@buffer.ready_token(false, false)
|
114
|
+
i = @buffer.raw.index(/[\\\(\)]/)
|
115
|
+
|
116
|
+
if i.nil?
|
117
|
+
str << @buffer.raw + "\n"
|
118
|
+
@buffer.raw.replace("")
|
119
|
+
next
|
120
|
+
end
|
121
|
+
|
122
|
+
str << @buffer.head(i, false)
|
123
|
+
to_remove = 1
|
124
|
+
|
125
|
+
case @buffer.raw[0, 1]
|
126
|
+
when "("
|
127
|
+
str << "("
|
128
|
+
count += 1
|
129
|
+
when ")"
|
130
|
+
count -= 1
|
131
|
+
str << ")" unless count == 0
|
132
|
+
when "\\"
|
133
|
+
to_remove = 2
|
134
|
+
case @buffer.raw[1, 1]
|
135
|
+
when "" : to_remove = 1
|
136
|
+
when "n" : str << "\n"
|
137
|
+
when "r" : str << "\r"
|
138
|
+
when "t" : str << "\t"
|
139
|
+
when "b" : str << "\b"
|
140
|
+
when "f" : str << "\f"
|
141
|
+
when "(" : str << "("
|
142
|
+
when ")" : str << ")"
|
143
|
+
when "\\" : str << "\\"
|
144
|
+
else
|
145
|
+
if m = @buffer.raw.match(/^\\(\d{1,3})/)
|
146
|
+
to_remove = m[0].size
|
147
|
+
str << m[1].oct.chr
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
@buffer.head(to_remove, false)
|
153
|
+
end
|
154
|
+
|
155
|
+
str
|
156
|
+
end
|
157
|
+
################################################################################
|
158
|
+
# Reads an entire PDF object from the buffer and returns it as a Ruby String.
|
159
|
+
#
|
160
|
+
# id - the object ID to return
|
161
|
+
# gen - the object revision number to return
|
162
|
+
def object (id, gen)
|
163
|
+
Error.assert_equal(parse_token, id)
|
164
|
+
Error.assert_equal(parse_token, gen)
|
165
|
+
Error.str_assert(parse_token, "obj")
|
166
|
+
|
167
|
+
obj = parse_token
|
168
|
+
post_obj = parse_token
|
169
|
+
|
170
|
+
case post_obj
|
171
|
+
when "endobj" : return obj
|
172
|
+
when "stream" : return stream(obj)
|
173
|
+
else raise MalformedPDFError, "PDF malformed, unexpected token #{post_obj}"
|
174
|
+
end
|
175
|
+
end
|
176
|
+
################################################################################
|
177
|
+
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
178
|
+
def stream (dict)
|
179
|
+
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?('Length')
|
180
|
+
dict['Length'] = @xref.object(dict['Length']) if dict['Length'].kind_of?(Reference)
|
181
|
+
data = @buffer.read(dict['Length'])
|
182
|
+
Error.str_assert(parse_token, "endstream")
|
183
|
+
Error.str_assert(parse_token, "endobj")
|
184
|
+
|
185
|
+
if dict.has_key?('Filter')
|
186
|
+
options = []
|
187
|
+
|
188
|
+
if dict.has_key?('DecodeParms')
|
189
|
+
options = dict['DecodeParms'].to_a
|
190
|
+
end
|
191
|
+
|
192
|
+
dict['Filter'].to_a.each_with_index do |filter, index|
|
193
|
+
data = Filter.new(filter, options[index]).filter(data)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
data
|
198
|
+
end
|
199
|
+
################################################################################
|
200
|
+
end
|
201
|
+
################################################################################
|
202
|
+
end
|
203
|
+
################################################################################
|
@@ -0,0 +1,55 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An internal PDF::Reader class that represents an indirect reference to a PDF Object
|
29
|
+
class Reference
|
30
|
+
################################################################################
|
31
|
+
# check if the next token in the buffer is a reference, and return a PDF::Reader::Reference
|
32
|
+
# instance. Returns nil if the next token isn't an indirect reference.
|
33
|
+
def self.from_buffer (buffer)
|
34
|
+
buffer.ready_token
|
35
|
+
return nil unless m = buffer.raw.match(/^(\d+)\s+(\d+)\s+R\b/)
|
36
|
+
buffer.head(m[0].size)
|
37
|
+
self.new(m[1].to_i, m[2].to_i)
|
38
|
+
end
|
39
|
+
################################################################################
|
40
|
+
attr_reader :id, :gen
|
41
|
+
################################################################################
|
42
|
+
# Create a new Reference to an object with the specified id and revision number
|
43
|
+
def initialize (id, gen)
|
44
|
+
@id, @gen = id, gen
|
45
|
+
end
|
46
|
+
################################################################################
|
47
|
+
# returns the current Reference object in an array with a single element
|
48
|
+
def to_a
|
49
|
+
[self]
|
50
|
+
end
|
51
|
+
################################################################################
|
52
|
+
end
|
53
|
+
################################################################################
|
54
|
+
end
|
55
|
+
################################################################################
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class PDF::Reader
|
2
|
+
class RegisterReceiver
|
3
|
+
|
4
|
+
attr_accessor :callbacks
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@callbacks = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def respond_to?(meth)
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
def method_missing(methodname, *args)
|
15
|
+
callbacks << methodname.to_sym
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,259 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An example receiver class that processes all text found in a PDF file. All text that
|
29
|
+
# is found will be printed to the IO object specified in the constructor.
|
30
|
+
#
|
31
|
+
# Usage:
|
32
|
+
# receiver = PDF::Reader::TextReceiver.new($stdout)
|
33
|
+
# PDF::Reader.file("somefile.pdf", receiver)
|
34
|
+
class TextReceiver
|
35
|
+
################################################################################
|
36
|
+
# Initialize with the library user's receiver
|
37
|
+
def initialize (main_receiver)
|
38
|
+
@main_receiver = main_receiver
|
39
|
+
@upper_corners = []
|
40
|
+
end
|
41
|
+
################################################################################
|
42
|
+
# Called when the document parsing begins
|
43
|
+
def begin_document (root)
|
44
|
+
@upper_corners = []
|
45
|
+
end
|
46
|
+
################################################################################
|
47
|
+
# Called when the document parsing ends
|
48
|
+
def end_document
|
49
|
+
@state.clear
|
50
|
+
end
|
51
|
+
################################################################################
|
52
|
+
def begin_page_container (page)
|
53
|
+
@upper_corners.push(media_box_check(page))
|
54
|
+
end
|
55
|
+
################################################################################
|
56
|
+
def end_page_container
|
57
|
+
@upper_corners.pop
|
58
|
+
end
|
59
|
+
################################################################################
|
60
|
+
# Called when new page parsing begins
|
61
|
+
def begin_page (info)
|
62
|
+
@page = info
|
63
|
+
|
64
|
+
@state = [{
|
65
|
+
:char_spacing => 0,
|
66
|
+
:word_spacing => 0,
|
67
|
+
:hori_scaling => 100,
|
68
|
+
:leading => 0,
|
69
|
+
:tj_adjustment => 0,
|
70
|
+
}]
|
71
|
+
|
72
|
+
@upper_corners.push(media_box_check(info))
|
73
|
+
|
74
|
+
@output = []
|
75
|
+
@line = 0
|
76
|
+
@location = 0
|
77
|
+
@displacement = {}
|
78
|
+
@smallest_y_loc = @upper_corners.last[:ury]
|
79
|
+
@written_to = false
|
80
|
+
end
|
81
|
+
################################################################################
|
82
|
+
# Called when page parsing ends
|
83
|
+
def end_page
|
84
|
+
@main_receiver << @output.join("\n")
|
85
|
+
@upper_corners.pop
|
86
|
+
end
|
87
|
+
################################################################################
|
88
|
+
# PDF operator BT
|
89
|
+
def begin_text_object
|
90
|
+
@state.push(@state.last.dup)
|
91
|
+
end
|
92
|
+
################################################################################
|
93
|
+
# PDF operator ET
|
94
|
+
def end_text_object
|
95
|
+
@state.pop
|
96
|
+
end
|
97
|
+
################################################################################
|
98
|
+
# PDF operator Tm
|
99
|
+
def set_text_matrix_and_text_line_matrix (a, b, c, d, e, f)
|
100
|
+
calculate_line_and_location(f)
|
101
|
+
end
|
102
|
+
################################################################################
|
103
|
+
# PDF operator Tc
|
104
|
+
def set_character_spacing (n)
|
105
|
+
@state.last[:char_spacing] = n
|
106
|
+
end
|
107
|
+
################################################################################
|
108
|
+
# PDF operator Tw
|
109
|
+
def set_word_spacing (n)
|
110
|
+
@state.last[:word_spacing] = n
|
111
|
+
end
|
112
|
+
################################################################################
|
113
|
+
# PDF operator Tz
|
114
|
+
def set_horizontal_text_scaling (n)
|
115
|
+
@state.last[:hori_scaling] = n/100
|
116
|
+
end
|
117
|
+
################################################################################
|
118
|
+
# PDF operator TL
|
119
|
+
def set_text_leading (n)
|
120
|
+
@state.last[:leading] = n
|
121
|
+
end
|
122
|
+
################################################################################
|
123
|
+
# PDF operator T*
|
124
|
+
def move_to_start_of_next_line
|
125
|
+
move_text_position(0, @state.last[:leading])
|
126
|
+
end
|
127
|
+
################################################################################
|
128
|
+
# PDF operator Td
|
129
|
+
def move_text_position (tx, ty)
|
130
|
+
#puts "#{tx} #{ty} Td"
|
131
|
+
calculate_line_and_location(@location + ty)
|
132
|
+
end
|
133
|
+
################################################################################
|
134
|
+
# PDF operator TD
|
135
|
+
def move_text_position_and_set_leading (tx, ty)
|
136
|
+
set_text_leading(ty)# * -1)
|
137
|
+
move_text_position(tx, ty)
|
138
|
+
end
|
139
|
+
################################################################################
|
140
|
+
# PDF operator Tj
|
141
|
+
def show_text (string)
|
142
|
+
#puts "getting line #@line"
|
143
|
+
|
144
|
+
place = (@output[@line] ||= "")
|
145
|
+
#place << " " unless place.empty?
|
146
|
+
|
147
|
+
place << " " * (@state.last[:tj_adjustment].abs/900) if @state.last[:tj_adjustment] < -1000
|
148
|
+
place << string
|
149
|
+
|
150
|
+
#puts "place is now: #{place}"
|
151
|
+
@written_to = true
|
152
|
+
end
|
153
|
+
def super_show_text (string)
|
154
|
+
urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR
|
155
|
+
ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR
|
156
|
+
|
157
|
+
x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i
|
158
|
+
y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i
|
159
|
+
|
160
|
+
#puts "rendering '#{string}' to #{x}x#{y}"
|
161
|
+
|
162
|
+
place = (@output[y] ||= (" " * urx.to_i))
|
163
|
+
#puts "#{urx} #{place.size} #{string.size} #{x}"
|
164
|
+
return if x+string.size >= urx
|
165
|
+
|
166
|
+
string.split(//).each do |c|
|
167
|
+
chars = 1
|
168
|
+
|
169
|
+
case c
|
170
|
+
when " "
|
171
|
+
chars += @state.last[:word_spacing].to_i
|
172
|
+
place[x-1, chars] = (" " * chars)
|
173
|
+
else
|
174
|
+
chars += @state.last[:char_spacing].to_i
|
175
|
+
chars -= (@state.last[:tj_adjustment]/1000).to_i if @state.last[:tj_adjustment]
|
176
|
+
chars = 1 if chars < 1
|
177
|
+
|
178
|
+
place[x-1] = c
|
179
|
+
place[x, chars-1] = (" " * (chars-1)) if chars > 1
|
180
|
+
end
|
181
|
+
|
182
|
+
x += chars
|
183
|
+
end
|
184
|
+
|
185
|
+
@tm += Matrix.rows([[1, 0, 0], [0, 1, 0], [x*TS_UNITS_PER_H_CHAR, y*TS_UNITS_PER_V_CHAR, 1]])
|
186
|
+
end
|
187
|
+
################################################################################
|
188
|
+
# PDF operator TJ
|
189
|
+
def show_text_with_positioning (params)
|
190
|
+
prev_adjustment = @state.last[:tj_adjustment]
|
191
|
+
|
192
|
+
params.each do |p|
|
193
|
+
case p
|
194
|
+
when Float
|
195
|
+
@state.last[:tj_adjustment] = p
|
196
|
+
else
|
197
|
+
show_text(p)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
@state.last[:tj_adjustment] = prev_adjustment
|
202
|
+
end
|
203
|
+
################################################################################
|
204
|
+
# PDF operator '
|
205
|
+
def move_to_next_line_and_show_text (string)
|
206
|
+
move_to_start_of_next_line
|
207
|
+
show_text(string)
|
208
|
+
end
|
209
|
+
################################################################################
|
210
|
+
# PDF operator "
|
211
|
+
def set_spacing_next_line_show_text (aw, ac, string)
|
212
|
+
set_word_spacing(aw)
|
213
|
+
set_character_spacing(ac)
|
214
|
+
move_to_next_line_and_show_text(string)
|
215
|
+
end
|
216
|
+
################################################################################
|
217
|
+
def media_box_check (dict)
|
218
|
+
corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup
|
219
|
+
|
220
|
+
if dict.has_key?('MediaBox')
|
221
|
+
media_box = dict['MediaBox']
|
222
|
+
corners[:urx] = media_box[2] - media_box[0]
|
223
|
+
corners[:ury] = media_box[3] - media_box[1]
|
224
|
+
end
|
225
|
+
|
226
|
+
corners
|
227
|
+
end
|
228
|
+
################################################################################
|
229
|
+
def calculate_line_and_location (new_loc)
|
230
|
+
##puts "calculate_line_and_location(#{new_loc})"
|
231
|
+
key = new_loc; key.freeze
|
232
|
+
|
233
|
+
#key = new_loc.to_s # because hashes with string keys are magic (auto-freeze)
|
234
|
+
|
235
|
+
if @written_to
|
236
|
+
unless @displacement.has_key?(key)
|
237
|
+
if key < @location
|
238
|
+
@displacement[key] = @line + 1
|
239
|
+
elsif key < @smallest_y_loc
|
240
|
+
@displacement[key] = @line + 1
|
241
|
+
else
|
242
|
+
key = @displacement.keys.find_all {|i| key > i}.sort.last
|
243
|
+
@displacement[key] = 0 unless @displacement.has_key?(key)
|
244
|
+
end
|
245
|
+
end
|
246
|
+
else
|
247
|
+
@displacement[key] = 0
|
248
|
+
end
|
249
|
+
|
250
|
+
@smallest_y_loc = key if key < @smallest_y_loc
|
251
|
+
@location = key
|
252
|
+
@line = @displacement[key]
|
253
|
+
#puts "calculate_line_and_location: @location=#@location @line=#@line smallest_y_loc=#@smallest_y_loc"
|
254
|
+
end
|
255
|
+
################################################################################
|
256
|
+
end
|
257
|
+
################################################################################
|
258
|
+
end
|
259
|
+
################################################################################
|