fireinc-pdf-reader 0.11.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +168 -0
- data/MIT-LICENSE +21 -0
- data/README.rdoc +137 -0
- data/Rakefile +34 -0
- data/TODO +45 -0
- data/bin/pdf_list_callbacks +15 -0
- data/bin/pdf_object +48 -0
- data/bin/pdf_text +15 -0
- data/examples/callbacks.rb +21 -0
- data/examples/extract_bates.rb +49 -0
- data/examples/extract_images.rb +108 -0
- data/examples/hash.rb +12 -0
- data/examples/metadata.rb +25 -0
- data/examples/page_counter_improved.rb +23 -0
- data/examples/page_counter_naive.rb +24 -0
- data/examples/rspec.rb +57 -0
- data/examples/text.rb +40 -0
- data/examples/version.rb +25 -0
- data/lib/pdf/hash.rb +15 -0
- data/lib/pdf/reader/abstract_strategy.rb +81 -0
- data/lib/pdf/reader/buffer.rb +346 -0
- data/lib/pdf/reader/cmap.rb +138 -0
- data/lib/pdf/reader/encoding.rb +190 -0
- data/lib/pdf/reader/encodings/mac_expert.txt +159 -0
- data/lib/pdf/reader/encodings/mac_roman.txt +128 -0
- data/lib/pdf/reader/encodings/pdf_doc.txt +40 -0
- data/lib/pdf/reader/encodings/standard.txt +47 -0
- data/lib/pdf/reader/encodings/symbol.txt +154 -0
- data/lib/pdf/reader/encodings/win_ansi.txt +29 -0
- data/lib/pdf/reader/encodings/zapf_dingbats.txt +201 -0
- data/lib/pdf/reader/error.rb +53 -0
- data/lib/pdf/reader/filter.rb +219 -0
- data/lib/pdf/reader/font.rb +133 -0
- data/lib/pdf/reader/form_xobject.rb +83 -0
- data/lib/pdf/reader/glyphlist.txt +4322 -0
- data/lib/pdf/reader/lzw.rb +123 -0
- data/lib/pdf/reader/metadata_strategy.rb +56 -0
- data/lib/pdf/reader/object_cache.rb +85 -0
- data/lib/pdf/reader/object_hash.rb +289 -0
- data/lib/pdf/reader/object_stream.rb +51 -0
- data/lib/pdf/reader/page.rb +185 -0
- data/lib/pdf/reader/page_text_receiver.rb +278 -0
- data/lib/pdf/reader/pages_strategy.rb +475 -0
- data/lib/pdf/reader/parser.rb +225 -0
- data/lib/pdf/reader/print_receiver.rb +18 -0
- data/lib/pdf/reader/reference.rb +66 -0
- data/lib/pdf/reader/register_receiver.rb +95 -0
- data/lib/pdf/reader/stream.rb +69 -0
- data/lib/pdf/reader/text_receiver.rb +264 -0
- data/lib/pdf/reader/token.rb +41 -0
- data/lib/pdf/reader/xref.rb +220 -0
- data/lib/pdf/reader.rb +296 -0
- data/lib/pdf-reader.rb +1 -0
- metadata +211 -0
@@ -0,0 +1,18 @@
|
|
1
|
+
class PDF::Reader
|
2
|
+
class PrintReceiver
|
3
|
+
|
4
|
+
attr_accessor :callbacks
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@callbacks = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def respond_to?(meth)
|
11
|
+
true
|
12
|
+
end
|
13
|
+
|
14
|
+
def method_missing(methodname, *args)
|
15
|
+
puts "#{methodname} => #{args.inspect}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An internal PDF::Reader class that represents an indirect reference to a PDF Object
|
29
|
+
class Reference
|
30
|
+
attr_reader :id, :gen
|
31
|
+
################################################################################
|
32
|
+
# Create a new Reference to an object with the specified id and revision number
|
33
|
+
def initialize (id, gen)
|
34
|
+
@id, @gen = id, gen
|
35
|
+
end
|
36
|
+
################################################################################
|
37
|
+
# returns the current Reference object in an array with a single element
|
38
|
+
def to_a
|
39
|
+
[self]
|
40
|
+
end
|
41
|
+
################################################################################
|
42
|
+
# returns the ID of this reference. Use with caution, ignores the generation id
|
43
|
+
def to_i
|
44
|
+
self.id
|
45
|
+
end
|
46
|
+
################################################################################
|
47
|
+
# returns true if the provided object points to the same PDF Object as the
|
48
|
+
# current object
|
49
|
+
def ==(obj)
|
50
|
+
return false unless obj.kind_of?(PDF::Reader::Reference)
|
51
|
+
|
52
|
+
self.hash == obj.hash
|
53
|
+
end
|
54
|
+
alias :eql? :==
|
55
|
+
################################################################################
|
56
|
+
# returns a hash based on the PDF::Reference this object points to. Two
|
57
|
+
# different Reference objects that point to the same PDF Object will
|
58
|
+
# return an identical hash
|
59
|
+
def hash
|
60
|
+
"#{self.id}:#{self.gen}".hash
|
61
|
+
end
|
62
|
+
################################################################################
|
63
|
+
end
|
64
|
+
################################################################################
|
65
|
+
end
|
66
|
+
################################################################################
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
# Copyright (C) 2010 James Healy (jimmy@deefa.com)
|
4
|
+
|
5
|
+
class PDF::Reader
|
6
|
+
|
7
|
+
# An example receiver that just records all callbacks generated by parsing
|
8
|
+
# a PDF file.
|
9
|
+
#
|
10
|
+
# Useful for testing the contents of a file in an rspec/test-unit suite.
|
11
|
+
#
|
12
|
+
# Usage:
|
13
|
+
#
|
14
|
+
# receiver = PDF::Reader::RegisterReceiver.new
|
15
|
+
# PDF::Reader.file("somefile.pdf", receiver)
|
16
|
+
# callback = receiver.first_occurance_of(:show_text)
|
17
|
+
# callback[:args].first.should == "Hellow World"
|
18
|
+
#
|
19
|
+
class RegisterReceiver
|
20
|
+
|
21
|
+
attr_accessor :callbacks
|
22
|
+
|
23
|
+
def initialize
|
24
|
+
@callbacks = []
|
25
|
+
end
|
26
|
+
|
27
|
+
def respond_to?(meth)
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
def method_missing(methodname, *args)
|
32
|
+
callbacks << {:name => methodname.to_sym, :args => args}
|
33
|
+
end
|
34
|
+
|
35
|
+
# count the number of times a callback fired
|
36
|
+
def count(methodname)
|
37
|
+
counter = 0
|
38
|
+
callbacks.each { |cb| counter += 1 if cb[:name] == methodname}
|
39
|
+
return counter
|
40
|
+
end
|
41
|
+
|
42
|
+
# return the details for every time the specified callback was fired
|
43
|
+
def all(methodname)
|
44
|
+
ret = []
|
45
|
+
callbacks.each do |cb|
|
46
|
+
ret << cb if cb[:name] == methodname
|
47
|
+
end
|
48
|
+
return ret
|
49
|
+
end
|
50
|
+
|
51
|
+
def all_args(methodname)
|
52
|
+
all(methodname).map { |cb| cb[:args] }
|
53
|
+
end
|
54
|
+
|
55
|
+
# return the details for the first time the specified callback was fired
|
56
|
+
def first_occurance_of(methodname)
|
57
|
+
callbacks.each do |cb|
|
58
|
+
return cb if cb[:name] == methodname
|
59
|
+
end
|
60
|
+
return nil
|
61
|
+
end
|
62
|
+
|
63
|
+
# return the details for the final time the specified callback was fired
|
64
|
+
def final_occurance_of(methodname)
|
65
|
+
returnme = nil
|
66
|
+
callbacks.each do |cb|
|
67
|
+
returnme = cb if cb[:name] == methodname
|
68
|
+
end
|
69
|
+
return returnme
|
70
|
+
end
|
71
|
+
|
72
|
+
# return the first occurance of a particular series of callbacks
|
73
|
+
def series(*methods)
|
74
|
+
return nil if methods.empty?
|
75
|
+
|
76
|
+
indexes = (0..(callbacks.size-1-methods.size))
|
77
|
+
method_indexes = (0..(methods.size-1))
|
78
|
+
match = nil
|
79
|
+
|
80
|
+
indexes.each do |idx|
|
81
|
+
count = methods.size
|
82
|
+
method_indexes.each do |midx|
|
83
|
+
count -= 1 if callbacks[idx+midx][:name] == methods[midx]
|
84
|
+
end
|
85
|
+
match = idx and break if count == 0
|
86
|
+
end
|
87
|
+
|
88
|
+
if match
|
89
|
+
return callbacks[match, methods.size]
|
90
|
+
else
|
91
|
+
return nil
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An internal PDF::Reader class that represents a stream object from a PDF. Stream
|
29
|
+
# objects have 2 components, a dictionary that describes the content (size,
|
30
|
+
# compression, etc) and a stream of bytes.
|
31
|
+
#
|
32
|
+
class Stream
|
33
|
+
attr_accessor :hash
|
34
|
+
attr_reader :data
|
35
|
+
################################################################################
|
36
|
+
# Creates a new stream with the specified dictionary and data. The dictionary
|
37
|
+
# should be a standard ruby hash, the data should be a standard ruby string.
|
38
|
+
def initialize (hash, data)
|
39
|
+
@hash = hash
|
40
|
+
@data = data
|
41
|
+
@udata = nil
|
42
|
+
end
|
43
|
+
################################################################################
|
44
|
+
# apply this streams filters to its data and return the result.
|
45
|
+
def unfiltered_data
|
46
|
+
return @udata if @udata
|
47
|
+
@udata = data.dup
|
48
|
+
|
49
|
+
if hash.has_key?(:Filter)
|
50
|
+
options = []
|
51
|
+
|
52
|
+
if hash.has_key?(:DecodeParms)
|
53
|
+
if hash[:DecodeParms].is_a?(Hash)
|
54
|
+
options = [hash[:DecodeParms]]
|
55
|
+
else
|
56
|
+
options = hash[:DecodeParms]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
Array(hash[:Filter]).each_with_index do |filter, index|
|
61
|
+
@udata = Filter.new(filter, options[index]).filter(@udata)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
@udata
|
65
|
+
end
|
66
|
+
end
|
67
|
+
################################################################################
|
68
|
+
end
|
69
|
+
################################################################################
|
@@ -0,0 +1,264 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An example receiver class that processes all text found in a PDF file. All text that
|
29
|
+
# is found will be printed to the IO object specified in the constructor.
|
30
|
+
#
|
31
|
+
# Usage:
|
32
|
+
# receiver = PDF::Reader::TextReceiver.new($stdout)
|
33
|
+
# PDF::Reader.file("somefile.pdf", receiver)
|
34
|
+
#
|
35
|
+
# DEPRECATED: this class was deprecated in version 0.11.0 and will
|
36
|
+
# eventually be removed
|
37
|
+
class TextReceiver
|
38
|
+
################################################################################
|
39
|
+
# Initialize with the library user's receiver
|
40
|
+
def initialize (main_receiver)
|
41
|
+
@main_receiver = main_receiver
|
42
|
+
@upper_corners = []
|
43
|
+
end
|
44
|
+
################################################################################
|
45
|
+
# Called when the document parsing begins
|
46
|
+
def begin_document (root)
|
47
|
+
@upper_corners = []
|
48
|
+
end
|
49
|
+
################################################################################
|
50
|
+
# Called when the document parsing ends
|
51
|
+
def end_document
|
52
|
+
@state.clear
|
53
|
+
end
|
54
|
+
################################################################################
|
55
|
+
def begin_page_container (page)
|
56
|
+
@upper_corners.push(media_box_check(page))
|
57
|
+
end
|
58
|
+
################################################################################
|
59
|
+
def end_page_container
|
60
|
+
@upper_corners.pop
|
61
|
+
end
|
62
|
+
################################################################################
|
63
|
+
# Called when new page parsing begins
|
64
|
+
def begin_page (info)
|
65
|
+
@page = info
|
66
|
+
|
67
|
+
@state = [{
|
68
|
+
:char_spacing => 0,
|
69
|
+
:word_spacing => 0,
|
70
|
+
:hori_scaling => 100,
|
71
|
+
:leading => 0,
|
72
|
+
:tj_adjustment => 0,
|
73
|
+
}]
|
74
|
+
|
75
|
+
@upper_corners.push(media_box_check(info))
|
76
|
+
|
77
|
+
@output = []
|
78
|
+
@line = 0
|
79
|
+
@location = 0
|
80
|
+
@displacement = {}
|
81
|
+
@smallest_y_loc = @upper_corners.last[:ury]
|
82
|
+
@written_to = false
|
83
|
+
end
|
84
|
+
################################################################################
|
85
|
+
# Called when page parsing ends
|
86
|
+
def end_page
|
87
|
+
@main_receiver << @output.join("\n")
|
88
|
+
@upper_corners.pop
|
89
|
+
end
|
90
|
+
################################################################################
|
91
|
+
# PDF operator BT
|
92
|
+
def begin_text_object
|
93
|
+
@state.push(@state.last.dup)
|
94
|
+
end
|
95
|
+
################################################################################
|
96
|
+
# PDF operator ET
|
97
|
+
def end_text_object
|
98
|
+
@state.pop
|
99
|
+
end
|
100
|
+
################################################################################
|
101
|
+
# PDF operator Tm
|
102
|
+
def set_text_matrix_and_text_line_matrix (*args)
|
103
|
+
# these variable names look bad, but they're from the PDF spec
|
104
|
+
a, b, c, d, e, f = *args
|
105
|
+
calculate_line_and_location(f)
|
106
|
+
end
|
107
|
+
################################################################################
|
108
|
+
# PDF operator Tc
|
109
|
+
def set_character_spacing (n)
|
110
|
+
@state.last[:char_spacing] = n
|
111
|
+
end
|
112
|
+
################################################################################
|
113
|
+
# PDF operator Tw
|
114
|
+
def set_word_spacing (n)
|
115
|
+
@state.last[:word_spacing] = n
|
116
|
+
end
|
117
|
+
################################################################################
|
118
|
+
# PDF operator Tz
|
119
|
+
def set_horizontal_text_scaling (n)
|
120
|
+
@state.last[:hori_scaling] = n/100
|
121
|
+
end
|
122
|
+
################################################################################
|
123
|
+
# PDF operator TL
|
124
|
+
def set_text_leading (n)
|
125
|
+
@state.last[:leading] = n
|
126
|
+
end
|
127
|
+
################################################################################
|
128
|
+
# PDF operator T*
|
129
|
+
def move_to_start_of_next_line
|
130
|
+
move_text_position(0, @state.last[:leading])
|
131
|
+
end
|
132
|
+
################################################################################
|
133
|
+
# PDF operator Td
|
134
|
+
def move_text_position (tx, ty)
|
135
|
+
#puts "#{tx} #{ty} Td"
|
136
|
+
calculate_line_and_location(@location + ty)
|
137
|
+
end
|
138
|
+
################################################################################
|
139
|
+
# PDF operator TD
|
140
|
+
def move_text_position_and_set_leading (tx, ty)
|
141
|
+
set_text_leading(ty)# * -1)
|
142
|
+
move_text_position(tx, ty)
|
143
|
+
end
|
144
|
+
################################################################################
|
145
|
+
# PDF operator Tj
|
146
|
+
def show_text (string)
|
147
|
+
#puts "getting line #@line"
|
148
|
+
|
149
|
+
place = (@output[@line] ||= "")
|
150
|
+
#place << " " unless place.empty?
|
151
|
+
|
152
|
+
place << " " * (@state.last[:tj_adjustment].abs/900) if @state.last[:tj_adjustment] < -1000
|
153
|
+
place << string
|
154
|
+
|
155
|
+
#puts "place is now: #{place}"
|
156
|
+
@written_to = true
|
157
|
+
end
|
158
|
+
def super_show_text (string)
|
159
|
+
urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR
|
160
|
+
ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR
|
161
|
+
|
162
|
+
x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i
|
163
|
+
y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i
|
164
|
+
|
165
|
+
#puts "rendering '#{string}' to #{x}x#{y}"
|
166
|
+
|
167
|
+
place = (@output[y] ||= (" " * urx.to_i))
|
168
|
+
#puts "#{urx} #{place.size} #{string.size} #{x}"
|
169
|
+
return if x+string.size >= urx
|
170
|
+
|
171
|
+
string.split(//).each do |c|
|
172
|
+
chars = 1
|
173
|
+
|
174
|
+
case c
|
175
|
+
when " "
|
176
|
+
chars += @state.last[:word_spacing].to_i
|
177
|
+
place[x-1, chars] = (" " * chars)
|
178
|
+
else
|
179
|
+
chars += @state.last[:char_spacing].to_i
|
180
|
+
chars -= (@state.last[:tj_adjustment]/1000).to_i if @state.last[:tj_adjustment]
|
181
|
+
chars = 1 if chars < 1
|
182
|
+
|
183
|
+
place[x-1] = c
|
184
|
+
place[x, chars-1] = (" " * (chars-1)) if chars > 1
|
185
|
+
end
|
186
|
+
|
187
|
+
x += chars
|
188
|
+
end
|
189
|
+
|
190
|
+
@tm += Matrix.rows([[1, 0, 0], [0, 1, 0], [x*TS_UNITS_PER_H_CHAR, y*TS_UNITS_PER_V_CHAR, 1]])
|
191
|
+
end
|
192
|
+
################################################################################
|
193
|
+
# PDF operator TJ
|
194
|
+
def show_text_with_positioning (params)
|
195
|
+
prev_adjustment = @state.last[:tj_adjustment]
|
196
|
+
|
197
|
+
params.each do |p|
|
198
|
+
case p
|
199
|
+
when Float, Fixnum
|
200
|
+
@state.last[:tj_adjustment] = p
|
201
|
+
else
|
202
|
+
show_text(p)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
@state.last[:tj_adjustment] = prev_adjustment
|
207
|
+
end
|
208
|
+
################################################################################
|
209
|
+
# PDF operator '
|
210
|
+
def move_to_next_line_and_show_text (string)
|
211
|
+
move_to_start_of_next_line
|
212
|
+
show_text(string)
|
213
|
+
end
|
214
|
+
################################################################################
|
215
|
+
# PDF operator "
|
216
|
+
def set_spacing_next_line_show_text (aw, ac, string)
|
217
|
+
set_word_spacing(aw)
|
218
|
+
set_character_spacing(ac)
|
219
|
+
move_to_next_line_and_show_text(string)
|
220
|
+
end
|
221
|
+
################################################################################
|
222
|
+
def media_box_check (dict)
|
223
|
+
corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup
|
224
|
+
|
225
|
+
if dict.has_key?(:MediaBox)
|
226
|
+
media_box = dict[:MediaBox]
|
227
|
+
corners[:urx] = media_box[2] - media_box[0]
|
228
|
+
corners[:ury] = media_box[3] - media_box[1]
|
229
|
+
end
|
230
|
+
|
231
|
+
corners
|
232
|
+
end
|
233
|
+
################################################################################
|
234
|
+
def calculate_line_and_location (new_loc)
|
235
|
+
##puts "calculate_line_and_location(#{new_loc})"
|
236
|
+
key = new_loc; key.freeze
|
237
|
+
|
238
|
+
#key = new_loc.to_s # because hashes with string keys are magic (auto-freeze)
|
239
|
+
|
240
|
+
if @written_to
|
241
|
+
unless @displacement.has_key?(key)
|
242
|
+
if key < @location
|
243
|
+
@displacement[key] = @line + 1
|
244
|
+
elsif key < @smallest_y_loc
|
245
|
+
@displacement[key] = @line + 1
|
246
|
+
else
|
247
|
+
key = @displacement.keys.find_all {|i| key > i}.sort.last
|
248
|
+
@displacement[key] = 0 unless @displacement.has_key?(key)
|
249
|
+
end
|
250
|
+
end
|
251
|
+
else
|
252
|
+
@displacement[key] = 0
|
253
|
+
end
|
254
|
+
|
255
|
+
@smallest_y_loc = key if key < @smallest_y_loc
|
256
|
+
@location = key
|
257
|
+
@line = @displacement[key]
|
258
|
+
#puts "calculate_line_and_location: @location=#@location @line=#@line smallest_y_loc=#@smallest_y_loc"
|
259
|
+
end
|
260
|
+
################################################################################
|
261
|
+
end
|
262
|
+
################################################################################
|
263
|
+
end
|
264
|
+
################################################################################
|
@@ -0,0 +1,41 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An internal PDF::Reader class that represents a single token from a PDF file.
|
29
|
+
#
|
30
|
+
# Behaves exactly like a Ruby String - it basically exists for convenience.
|
31
|
+
class Token < String # :nodoc:
|
32
|
+
################################################################################
|
33
|
+
# Creates a new token with the specified value
|
34
|
+
def initialize (val)
|
35
|
+
super
|
36
|
+
end
|
37
|
+
################################################################################
|
38
|
+
end
|
39
|
+
################################################################################
|
40
|
+
end
|
41
|
+
################################################################################
|