pdf-reader 1.4.1 → 2.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,265 +0,0 @@
1
- # coding: utf-8
2
-
3
- ################################################################################
4
- #
5
- # Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining
8
- # a copy of this software and associated documentation files (the
9
- # "Software"), to deal in the Software without restriction, including
10
- # without limitation the rights to use, copy, modify, merge, publish,
11
- # distribute, sublicense, and/or sell copies of the Software, and to
12
- # permit persons to whom the Software is furnished to do so, subject to
13
- # the following conditions:
14
- #
15
- # The above copyright notice and this permission notice shall be
16
- # included in all copies or substantial portions of the Software.
17
- #
18
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
22
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
- #
26
- ################################################################################
27
-
28
- class PDF::Reader
29
- ################################################################################
30
- # An example receiver class that processes all text found in a PDF file. All text that
31
- # is found will be printed to the IO object specified in the constructor.
32
- #
33
- # Usage:
34
- # receiver = PDF::Reader::TextReceiver.new($stdout)
35
- # PDF::Reader.file("somefile.pdf", receiver)
36
- #
37
- # DEPRECATED: this class was deprecated in version 0.11.0 and will
38
- # eventually be removed
39
- class TextReceiver
40
- ################################################################################
41
- # Initialize with the library user's receiver
42
- def initialize(main_receiver)
43
- @main_receiver = main_receiver
44
- @upper_corners = []
45
- end
46
- ################################################################################
47
- # Called when the document parsing begins
48
- def begin_document(root)
49
- @upper_corners = []
50
- end
51
- ################################################################################
52
- # Called when the document parsing ends
53
- def end_document
54
- @state.clear
55
- end
56
- ################################################################################
57
- def begin_page_container(page)
58
- @upper_corners.push(media_box_check(page))
59
- end
60
- ################################################################################
61
- def end_page_container
62
- @upper_corners.pop
63
- end
64
- ################################################################################
65
- # Called when new page parsing begins
66
- def begin_page(info)
67
- @page = info
68
-
69
- @state = [{
70
- :char_spacing => 0,
71
- :word_spacing => 0,
72
- :hori_scaling => 100,
73
- :leading => 0,
74
- :tj_adjustment => 0,
75
- }]
76
-
77
- @upper_corners.push(media_box_check(info))
78
-
79
- @output = []
80
- @line = 0
81
- @location = 0
82
- @displacement = {}
83
- @smallest_y_loc = @upper_corners.last[:ury]
84
- @written_to = false
85
- end
86
- ################################################################################
87
- # Called when page parsing ends
88
- def end_page
89
- @main_receiver << @output.join("\n")
90
- @upper_corners.pop
91
- end
92
- ################################################################################
93
- # PDF operator BT
94
- def begin_text_object
95
- @state.push(@state.last.dup)
96
- end
97
- ################################################################################
98
- # PDF operator ET
99
- def end_text_object
100
- @state.pop
101
- end
102
- ################################################################################
103
- # PDF operator Tm
104
- def set_text_matrix_and_text_line_matrix(*args)
105
- # these variable names look bad, but they're from the PDF spec
106
- _a, _b, _c, _d, _e, f = *args
107
- calculate_line_and_location(f)
108
- end
109
- ################################################################################
110
- # PDF operator Tc
111
- def set_character_spacing(n)
112
- @state.last[:char_spacing] = n
113
- end
114
- ################################################################################
115
- # PDF operator Tw
116
- def set_word_spacing(n)
117
- @state.last[:word_spacing] = n
118
- end
119
- ################################################################################
120
- # PDF operator Tz
121
- def set_horizontal_text_scaling(n)
122
- @state.last[:hori_scaling] = n/100
123
- end
124
- ################################################################################
125
- # PDF operator TL
126
- def set_text_leading(n)
127
- @state.last[:leading] = n
128
- end
129
- ################################################################################
130
- # PDF operator T*
131
- def move_to_start_of_next_line
132
- move_text_position(0, @state.last[:leading])
133
- end
134
- ################################################################################
135
- # PDF operator Td
136
- def move_text_position(tx, ty)
137
- #puts "#{tx} #{ty} Td"
138
- calculate_line_and_location(@location + ty)
139
- end
140
- ################################################################################
141
- # PDF operator TD
142
- def move_text_position_and_set_leading(tx, ty)
143
- set_text_leading(ty)# * -1)
144
- move_text_position(tx, ty)
145
- end
146
- ################################################################################
147
- # PDF operator Tj
148
- def show_text(string)
149
- #puts "getting line #@line"
150
-
151
- place = (@output[@line] ||= "")
152
- #place << " " unless place.empty?
153
-
154
- place << " " * (@state.last[:tj_adjustment].abs/900) if @state.last[:tj_adjustment] < -1000
155
- place << string
156
-
157
- #puts "place is now: #{place}"
158
- @written_to = true
159
- end
160
- def super_show_text(string)
161
- urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR
162
- ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR
163
-
164
- x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i
165
- y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i
166
-
167
- #puts "rendering '#{string}' to #{x}x#{y}"
168
-
169
- place = (@output[y] ||= (" " * urx.to_i))
170
- #puts "#{urx} #{place.size} #{string.size} #{x}"
171
- return if x+string.size >= urx
172
-
173
- string.split(//).each do |c|
174
- chars = 1
175
-
176
- case c
177
- when " "
178
- chars += @state.last[:word_spacing].to_i
179
- place[x-1, chars] = (" " * chars)
180
- else
181
- chars += @state.last[:char_spacing].to_i
182
- chars -= (@state.last[:tj_adjustment]/1000).to_i if @state.last[:tj_adjustment]
183
- chars = 1 if chars < 1
184
-
185
- place[x-1] = c
186
- place[x, chars-1] = (" " * (chars-1)) if chars > 1
187
- end
188
-
189
- x += chars
190
- end
191
-
192
- @tm += Matrix.rows([[1, 0, 0], [0, 1, 0], [x*TS_UNITS_PER_H_CHAR, y*TS_UNITS_PER_V_CHAR, 1]])
193
- end
194
- ################################################################################
195
- # PDF operator TJ
196
- def show_text_with_positioning(params)
197
- prev_adjustment = @state.last[:tj_adjustment]
198
-
199
- params.each do |p|
200
- case p
201
- when Float, Integer
202
- @state.last[:tj_adjustment] = p
203
- else
204
- show_text(p)
205
- end
206
- end
207
-
208
- @state.last[:tj_adjustment] = prev_adjustment
209
- end
210
- ################################################################################
211
- # PDF operator '
212
- def move_to_next_line_and_show_text(string)
213
- move_to_start_of_next_line
214
- show_text(string)
215
- end
216
- ################################################################################
217
- # PDF operator "
218
- def set_spacing_next_line_show_text(aw, ac, string)
219
- set_word_spacing(aw)
220
- set_character_spacing(ac)
221
- move_to_next_line_and_show_text(string)
222
- end
223
- ################################################################################
224
- def media_box_check(dict)
225
- corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup
226
-
227
- if dict.has_key?(:MediaBox)
228
- media_box = dict[:MediaBox]
229
- corners[:urx] = media_box[2] - media_box[0]
230
- corners[:ury] = media_box[3] - media_box[1]
231
- end
232
-
233
- corners
234
- end
235
- ################################################################################
236
- def calculate_line_and_location(new_loc)
237
- ##puts "calculate_line_and_location(#{new_loc})"
238
- key = new_loc; key.freeze
239
-
240
- #key = new_loc.to_s # because hashes with string keys are magic (auto-freeze)
241
-
242
- if @written_to
243
- unless @displacement.has_key?(key)
244
- if key < @location
245
- @displacement[key] = @line + 1
246
- elsif key < @smallest_y_loc
247
- @displacement[key] = @line + 1
248
- else
249
- key = @displacement.keys.find_all {|i| key > i}.sort.last
250
- @displacement[key] = 0 unless @displacement.has_key?(key)
251
- end
252
- end
253
- else
254
- @displacement[key] = 0
255
- end
256
-
257
- @smallest_y_loc = key if key < @smallest_y_loc
258
- @location = key
259
- @line = @displacement[key]
260
- end
261
- ################################################################################
262
- end
263
- ################################################################################
264
- end
265
- ################################################################################