pdf-reader 1.4.1 → 2.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,265 +0,0 @@
1
- # coding: utf-8
2
-
3
- ################################################################################
4
- #
5
- # Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining
8
- # a copy of this software and associated documentation files (the
9
- # "Software"), to deal in the Software without restriction, including
10
- # without limitation the rights to use, copy, modify, merge, publish,
11
- # distribute, sublicense, and/or sell copies of the Software, and to
12
- # permit persons to whom the Software is furnished to do so, subject to
13
- # the following conditions:
14
- #
15
- # The above copyright notice and this permission notice shall be
16
- # included in all copies or substantial portions of the Software.
17
- #
18
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
- #
26
- ################################################################################
27
-
28
- class PDF::Reader
29
- ################################################################################
30
- # An example receiver class that processes all text found in a PDF file. All text that
31
- # is found will be printed to the IO object specified in the constructor.
32
- #
33
- # Usage:
34
- # receiver = PDF::Reader::TextReceiver.new($stdout)
35
- # PDF::Reader.file("somefile.pdf", receiver)
36
- #
37
- # DEPRECATED: this class was deprecated in version 0.11.0 and will
38
- # eventually be removed
39
- class TextReceiver
40
- ################################################################################
41
- # Initialize with the library user's receiver
42
- def initialize(main_receiver)
43
- @main_receiver = main_receiver
44
- @upper_corners = []
45
- end
46
- ################################################################################
47
- # Called when the document parsing begins
48
- def begin_document(root)
49
- @upper_corners = []
50
- end
51
- ################################################################################
52
- # Called when the document parsing ends
53
- def end_document
54
- @state.clear
55
- end
56
- ################################################################################
57
- def begin_page_container(page)
58
- @upper_corners.push(media_box_check(page))
59
- end
60
- ################################################################################
61
- def end_page_container
62
- @upper_corners.pop
63
- end
64
- ################################################################################
65
- # Called when new page parsing begins
66
- def begin_page(info)
67
- @page = info
68
-
69
- @state = [{
70
- :char_spacing => 0,
71
- :word_spacing => 0,
72
- :hori_scaling => 100,
73
- :leading => 0,
74
- :tj_adjustment => 0,
75
- }]
76
-
77
- @upper_corners.push(media_box_check(info))
78
-
79
- @output = []
80
- @line = 0
81
- @location = 0
82
- @displacement = {}
83
- @smallest_y_loc = @upper_corners.last[:ury]
84
- @written_to = false
85
- end
86
- ################################################################################
87
- # Called when page parsing ends
88
- def end_page
89
- @main_receiver << @output.join("\n")
90
- @upper_corners.pop
91
- end
92
- ################################################################################
93
- # PDF operator BT
94
- def begin_text_object
95
- @state.push(@state.last.dup)
96
- end
97
- ################################################################################
98
- # PDF operator ET
99
- def end_text_object
100
- @state.pop
101
- end
102
- ################################################################################
103
- # PDF operator Tm
104
- def set_text_matrix_and_text_line_matrix(*args)
105
- # these variable names look bad, but they're from the PDF spec
106
- _a, _b, _c, _d, _e, f = *args
107
- calculate_line_and_location(f)
108
- end
109
- ################################################################################
110
- # PDF operator Tc
111
- def set_character_spacing(n)
112
- @state.last[:char_spacing] = n
113
- end
114
- ################################################################################
115
- # PDF operator Tw
116
- def set_word_spacing(n)
117
- @state.last[:word_spacing] = n
118
- end
119
- ################################################################################
120
- # PDF operator Tz
121
- def set_horizontal_text_scaling(n)
122
- @state.last[:hori_scaling] = n/100
123
- end
124
- ################################################################################
125
- # PDF operator TL
126
- def set_text_leading(n)
127
- @state.last[:leading] = n
128
- end
129
- ################################################################################
130
- # PDF operator T*
131
- def move_to_start_of_next_line
132
- move_text_position(0, @state.last[:leading])
133
- end
134
- ################################################################################
135
- # PDF operator Td
136
- def move_text_position(tx, ty)
137
- #puts "#{tx} #{ty} Td"
138
- calculate_line_and_location(@location + ty)
139
- end
140
- ################################################################################
141
- # PDF operator TD
142
- def move_text_position_and_set_leading(tx, ty)
143
- set_text_leading(ty)# * -1)
144
- move_text_position(tx, ty)
145
- end
146
- ################################################################################
147
- # PDF operator Tj
148
- def show_text(string)
149
- #puts "getting line #@line"
150
-
151
- place = (@output[@line] ||= "")
152
- #place << " " unless place.empty?
153
-
154
- place << " " * (@state.last[:tj_adjustment].abs/900) if @state.last[:tj_adjustment] < -1000
155
- place << string
156
-
157
- #puts "place is now: #{place}"
158
- @written_to = true
159
- end
160
- def super_show_text(string)
161
- urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR
162
- ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR
163
-
164
- x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i
165
- y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i
166
-
167
- #puts "rendering '#{string}' to #{x}x#{y}"
168
-
169
- place = (@output[y] ||= (" " * urx.to_i))
170
- #puts "#{urx} #{place.size} #{string.size} #{x}"
171
- return if x+string.size >= urx
172
-
173
- string.split(//).each do |c|
174
- chars = 1
175
-
176
- case c
177
- when " "
178
- chars += @state.last[:word_spacing].to_i
179
- place[x-1, chars] = (" " * chars)
180
- else
181
- chars += @state.last[:char_spacing].to_i
182
- chars -= (@state.last[:tj_adjustment]/1000).to_i if @state.last[:tj_adjustment]
183
- chars = 1 if chars < 1
184
-
185
- place[x-1] = c
186
- place[x, chars-1] = (" " * (chars-1)) if chars > 1
187
- end
188
-
189
- x += chars
190
- end
191
-
192
- @tm += Matrix.rows([[1, 0, 0], [0, 1, 0], [x*TS_UNITS_PER_H_CHAR, y*TS_UNITS_PER_V_CHAR, 1]])
193
- end
194
- ################################################################################
195
- # PDF operator TJ
196
- def show_text_with_positioning(params)
197
- prev_adjustment = @state.last[:tj_adjustment]
198
-
199
- params.each do |p|
200
- case p
201
- when Float, Integer
202
- @state.last[:tj_adjustment] = p
203
- else
204
- show_text(p)
205
- end
206
- end
207
-
208
- @state.last[:tj_adjustment] = prev_adjustment
209
- end
210
- ################################################################################
211
- # PDF operator '
212
- def move_to_next_line_and_show_text(string)
213
- move_to_start_of_next_line
214
- show_text(string)
215
- end
216
- ################################################################################
217
- # PDF operator "
218
- def set_spacing_next_line_show_text(aw, ac, string)
219
- set_word_spacing(aw)
220
- set_character_spacing(ac)
221
- move_to_next_line_and_show_text(string)
222
- end
223
- ################################################################################
224
- def media_box_check(dict)
225
- corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup
226
-
227
- if dict.has_key?(:MediaBox)
228
- media_box = dict[:MediaBox]
229
- corners[:urx] = media_box[2] - media_box[0]
230
- corners[:ury] = media_box[3] - media_box[1]
231
- end
232
-
233
- corners
234
- end
235
- ################################################################################
236
- def calculate_line_and_location(new_loc)
237
- ##puts "calculate_line_and_location(#{new_loc})"
238
- key = new_loc; key.freeze
239
-
240
- #key = new_loc.to_s # because hashes with string keys are magic (auto-freeze)
241
-
242
- if @written_to
243
- unless @displacement.has_key?(key)
244
- if key < @location
245
- @displacement[key] = @line + 1
246
- elsif key < @smallest_y_loc
247
- @displacement[key] = @line + 1
248
- else
249
- key = @displacement.keys.find_all {|i| key > i}.sort.last
250
- @displacement[key] = 0 unless @displacement.has_key?(key)
251
- end
252
- end
253
- else
254
- @displacement[key] = 0
255
- end
256
-
257
- @smallest_y_loc = key if key < @smallest_y_loc
258
- @location = key
259
- @line = @displacement[key]
260
- end
261
- ################################################################################
262
- end
263
- ################################################################################
264
- end
265
- ################################################################################