pdf-reader 0.10.1 → 0.11.0.alpha

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,253 @@
1
+ # coding: utf-8
2
+
3
+ require 'matrix'
4
+
5
+ module PDF
6
+ class Reader
7
+ class PageTextReceiver
8
+
9
+ DEFAULT_GRAPHICS_STATE = {
10
+ :ctm => Matrix.identity(3),
11
+ :char_spacing => 0,
12
+ :word_spacing => 0,
13
+ :h_scaling => 100,
14
+ :text_leading => 0,
15
+ :text_font => nil,
16
+ :text_font_size => nil,
17
+ :text_mode => 0,
18
+ :text_rise => 0,
19
+ :text_knockout => 0
20
+ }
21
+
22
+ def initialize(fonts)
23
+ @fonts = fonts
24
+ @content = ::Hash.new
25
+ @stack = [DEFAULT_GRAPHICS_STATE]
26
+ end
27
+
28
+ def content
29
+ keys = @content.keys.sort.reverse
30
+ keys.map { |key|
31
+ @content[key]
32
+ }.join("\n")
33
+ end
34
+
35
+ #####################################################
36
+ # Graphics State Operators
37
+ #####################################################
38
+
39
+ def save_graphics_state
40
+ @stack.push clone_state
41
+ end
42
+
43
+ def restore_graphics_state
44
+ @stack.pop
45
+ end
46
+
47
+ #####################################################
48
+ # Matrix Operators
49
+ #####################################################
50
+
51
+ # update the current transformation matrix.
52
+ #
53
+ # If the CTM is currently undefined, just store the new values.
54
+ #
55
+ # If there's an existing CTM, then multiply the existing matrix
56
+ # with the new matrix to form the updated matrix.
57
+ #
58
+ def concatenate_matrix(a, b, c, d, e, f)
59
+ transform = Matrix[
60
+ [a, b, 0],
61
+ [c, d, 0],
62
+ [e, f, 1]
63
+ ]
64
+ if state[:ctm]
65
+ state[:ctm] = transform * state[:ctm]
66
+ else
67
+ state[:ctm] = transform
68
+ end
69
+ end
70
+
71
+ #####################################################
72
+ # Text Object Operators
73
+ #####################################################
74
+
75
+ def begin_text_object
76
+ @text_matrix = Matrix.identity(3)
77
+ @text_line_matrix = Matrix.identity(3)
78
+ end
79
+
80
+ def end_text_object
81
+ @text_matrix = Matrix.identity(3)
82
+ @text_line_matrix = Matrix.identity(3)
83
+ end
84
+
85
+ #####################################################
86
+ # Text State Operators
87
+ #####################################################
88
+
89
+ def set_character_spacing(char_spacing)
90
+ state[:char_spacing] = char_spacing
91
+ end
92
+
93
+ def set_horizontal_text_scaling(h_scaling)
94
+ state[:h_scaling] = h_scaling
95
+ end
96
+
97
+ def set_text_font_and_size(label, size)
98
+ state[:text_font] = label
99
+ state[:text_font_size] = size
100
+ end
101
+
102
+ def set_text_leading(leading)
103
+ state[:text_leading] = leading
104
+ end
105
+
106
+ def set_text_rendering_mode(mode)
107
+ state[:text_mode] = mode
108
+ end
109
+
110
+ def set_text_rise(rise)
111
+ state[:text_rise] = rise
112
+ end
113
+
114
+ def set_word_spacing(word_spacing)
115
+ state[:word_spacing] = word_spacing
116
+ end
117
+
118
+ #####################################################
119
+ # Text Positioning Operators
120
+ #####################################################
121
+
122
+ def move_text_position(x, y) # Td
123
+ temp_matrix = Matrix[
124
+ [1, 0, 0],
125
+ [0, 1, 0],
126
+ [x, y, 1]
127
+ ]
128
+ @text_matrix = @text_line_matrix = temp_matrix * @text_line_matrix
129
+ end
130
+
131
+ def move_text_position_and_set_leading(x, y) # TD
132
+ set_text_leading(-1 * y)
133
+ move_text_position(x, y)
134
+ end
135
+
136
+ def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
137
+ @text_matrix = @text_line_matrix = Matrix[
138
+ [a, b, 0],
139
+ [c, d, 0],
140
+ [e, f, 1]
141
+ ]
142
+ end
143
+
144
+ def move_to_start_of_next_line # T*
145
+ move_text_position(0, state[:text_leading])
146
+ end
147
+
148
+ #####################################################
149
+ # Text Showing Operators
150
+ #####################################################
151
+
152
+ # record text that is drawn on the page
153
+ def show_text(string) # Tj
154
+ at = transform(Point.new(0,0))
155
+ @content[at.y] ||= ""
156
+ @content[at.y] << current_font.to_utf8(string)
157
+ end
158
+
159
+ def show_text_with_positioning(params) # TJ
160
+ params.each { |arg|
161
+ case arg
162
+ when String
163
+ show_text(arg)
164
+ when Fixnum, Float
165
+ show_text(" ") if arg > 1000
166
+ end
167
+ }
168
+ end
169
+
170
+ def move_to_next_line_and_show_text(str) # '
171
+ move_to_start_of_next_line
172
+ show_text(str)
173
+ end
174
+
175
+ def set_spacing_next_line_show_text(aw, ac, string) # "
176
+ set_word_spacing(aw)
177
+ set_character_spacing(ac)
178
+ move_to_next_line_and_show_text(string)
179
+ end
180
+
181
+ private
182
+
183
+ # transform x and y co-ordinates from the current text space to the
184
+ # underlying device space.
185
+ #
186
+ def transform(point, z = 1)
187
+ trm = text_rendering_matrix
188
+ Point.new(
189
+ (trm[0,0] * point.x) + (trm[1,0] * point.y) + (trm[2,0] * z),
190
+ (trm[0,1] * point.x) + (trm[1,1] * point.y) + (trm[2,1] * z)
191
+ )
192
+ end
193
+
194
+ def text_rendering_matrix
195
+ state_matrix = Matrix[
196
+ [state[:text_font_size] * state[:h_scaling], 0, 0],
197
+ [0, state[:text_font_size], 0],
198
+ [0, state[:text_rise], 1]
199
+ ]
200
+
201
+ state_matrix * @text_matrix * ctm
202
+ end
203
+
204
+ def state
205
+ @stack.last
206
+ end
207
+
208
+ # when save_graphics_state is called, we need to push a new copy of the
209
+ # current state onto the stack. That way any modifications to the state
210
+ # will be undone once restore_graphics_state is called.
211
+ #
212
+ # This returns a deep clone of the current state, ensuring changes are
213
+ # keep separate from earlier states.
214
+ #
215
+ # YAML is used to round-trip the state through a string to easily perform
216
+ # the deep clone. Kinda hacky, but effective.
217
+ #
218
+ def clone_state
219
+ if @stack.empty?
220
+ {}
221
+ else
222
+ yaml_state = YAML.dump(@stack.last)
223
+ YAML.load(yaml_state)
224
+ end
225
+ end
226
+
227
+ # return the current transformation matrix
228
+ #
229
+ def ctm
230
+ state[:ctm]
231
+ end
232
+
233
+ def current_font
234
+ @fonts[state[:text_font]]
235
+ end
236
+
237
+ # private class for representing points on a cartesian plain. Used
238
+ # to simplify maths in the MinPpi class.
239
+ #
240
+ class Point
241
+ attr_reader :x, :y
242
+
243
+ def initialize(x,y)
244
+ @x, @y = x,y
245
+ end
246
+
247
+ def distance(point)
248
+ Math.hypot(point.x - x, point.y - y)
249
+ end
250
+ end
251
+ end
252
+ end
253
+ end
@@ -175,6 +175,8 @@ class PDF::Reader
175
175
  # If it gets mapped to the name "IM1", then it can be placed on the page using
176
176
  # invoke_xobject "IM1".
177
177
  #
178
+ # DEPRECATED: this class was deprecated in version 0.11.0 and will
179
+ # eventually be removed
178
180
  class PagesStrategy< AbstractStrategy # :nodoc:
179
181
  OPERATORS = {
180
182
  'b' => :close_fill_stroke,
@@ -460,17 +462,7 @@ class PDF::Reader
460
462
  fonts = {}
461
463
  resources = @ohash.object(resources[:Font]) || {}
462
464
  resources.each do |label, desc|
463
- desc = @ohash.object(desc)
464
- fonts[label] = PDF::Reader::Font.new
465
- fonts[label].label = label
466
- fonts[label].subtype = desc[:Subtype] if desc[:Subtype]
467
- fonts[label].basefont = desc[:BaseFont] if desc[:BaseFont]
468
- fonts[label].encoding = PDF::Reader::Encoding.new(@ohash.object(desc[:Encoding]))
469
- fonts[label].descendantfonts = desc[:DescendantFonts] if desc[:DescendantFonts]
470
- if desc[:ToUnicode]
471
- stream = @ohash.object(desc[:ToUnicode])
472
- fonts[label].tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
473
- end
465
+ fonts[label] = PDF::Reader::Font.new(@ohash, @ohash.object(desc))
474
466
  end
475
467
  fonts
476
468
  end
@@ -31,6 +31,9 @@ class PDF::Reader
31
31
  # Usage:
32
32
  # receiver = PDF::Reader::TextReceiver.new($stdout)
33
33
  # PDF::Reader.file("somefile.pdf", receiver)
34
+ #
35
+ # DEPRECATED: this class was deprecated in version 0.11.0 and will
36
+ # eventually be removed
34
37
  class TextReceiver
35
38
  ################################################################################
36
39
  # Initialize with the library user's receiver
@@ -149,10 +149,9 @@ class PDF::Reader
149
149
  unless stream.hash[:Type] == :XRef
150
150
  raise PDF::Reader::MalformedPDFError, "xref stream not found when expected"
151
151
  end
152
- trailer = {}
153
- trailer[:Root] = stream.hash[:Root] if stream.hash[:Root]
154
- trailer[:Info] = stream.hash[:Info] if stream.hash[:Info]
155
- trailer[:Prev] = stream.hash[:Prev] if stream.hash[:Prev]
152
+ trailer = Hash[stream.hash.select { |key, value|
153
+ [:Size, :Prev, :Root, :Encrypt, :Info, :ID].include?(key)
154
+ }]
156
155
 
157
156
  widths = stream.hash[:W]
158
157
  entry_length = widths.inject(0) { |s, w| s + w }
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ prerelease: true
5
5
  segments:
6
6
  - 0
7
- - 10
8
- - 1
9
- version: 0.10.1
7
+ - 11
8
+ - 0
9
+ - alpha
10
+ version: 0.11.0.alpha
10
11
  platform: ruby
11
12
  authors:
12
13
  - James Healy
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2011-10-20 00:00:00 +11:00
18
+ date: 2011-07-19 00:00:00 +10:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
@@ -87,46 +88,49 @@ extra_rdoc_files:
87
88
  - CHANGELOG
88
89
  - MIT-LICENSE
89
90
  files:
91
+ - examples/page_counter_naive.rb
92
+ - examples/rspec.rb
90
93
  - examples/metadata.rb
91
- - examples/extract_images.rb
92
94
  - examples/extract_bates.rb
93
- - examples/page_counter_improved.rb
94
- - examples/callbacks.rb
95
- - examples/rspec.rb
96
95
  - examples/hash.rb
96
+ - examples/callbacks.rb
97
97
  - examples/text.rb
98
- - examples/page_counter_naive.rb
99
98
  - examples/version.rb
100
- - lib/pdf/reader.rb
101
- - lib/pdf/hash.rb
102
- - lib/pdf/reader/print_receiver.rb
103
- - lib/pdf/reader/xref.rb
104
- - lib/pdf/reader/buffer.rb
105
- - lib/pdf/reader/font.rb
106
- - lib/pdf/reader/parser.rb
99
+ - examples/page_counter_improved.rb
100
+ - examples/extract_images.rb
101
+ - lib/pdf/reader/glyphlist.txt
107
102
  - lib/pdf/reader/error.rb
103
+ - lib/pdf/reader/font.rb
104
+ - lib/pdf/reader/lzw.rb
105
+ - lib/pdf/reader/print_receiver.rb
106
+ - lib/pdf/reader/reference.rb
108
107
  - lib/pdf/reader/filter.rb
109
- - lib/pdf/reader/object_hash.rb
108
+ - lib/pdf/reader/text_receiver.rb
109
+ - lib/pdf/reader/pages_strategy.rb
110
+ - lib/pdf/reader/abstract_strategy.rb
111
+ - lib/pdf/reader/page_text_receiver.rb
112
+ - lib/pdf/reader/encoding.rb
110
113
  - lib/pdf/reader/stream.rb
114
+ - lib/pdf/reader/register_receiver.rb
115
+ - lib/pdf/reader/object_hash.rb
116
+ - lib/pdf/reader/object_cache.rb
117
+ - lib/pdf/reader/token.rb
118
+ - lib/pdf/reader/page.rb
119
+ - lib/pdf/reader/xref.rb
111
120
  - lib/pdf/reader/cmap.rb
112
121
  - lib/pdf/reader/object_stream.rb
113
- - lib/pdf/reader/encoding.rb
114
- - lib/pdf/reader/text_receiver.rb
115
- - lib/pdf/reader/glyphlist.txt
116
- - lib/pdf/reader/lzw.rb
117
- - lib/pdf/reader/register_receiver.rb
118
- - lib/pdf/reader/abstract_strategy.rb
119
- - lib/pdf/reader/pages_strategy.rb
120
- - lib/pdf/reader/reference.rb
122
+ - lib/pdf/reader/metadata_strategy.rb
123
+ - lib/pdf/reader/buffer.rb
124
+ - lib/pdf/reader/encodings/zapf_dingbats.txt
121
125
  - lib/pdf/reader/encodings/standard.txt
122
126
  - lib/pdf/reader/encodings/mac_roman.txt
123
- - lib/pdf/reader/encodings/symbol.txt
127
+ - lib/pdf/reader/encodings/mac_expert.txt
124
128
  - lib/pdf/reader/encodings/win_ansi.txt
125
- - lib/pdf/reader/encodings/zapf_dingbats.txt
129
+ - lib/pdf/reader/encodings/symbol.txt
126
130
  - lib/pdf/reader/encodings/pdf_doc.txt
127
- - lib/pdf/reader/encodings/mac_expert.txt
128
- - lib/pdf/reader/metadata_strategy.rb
129
- - lib/pdf/reader/token.rb
131
+ - lib/pdf/reader/parser.rb
132
+ - lib/pdf/hash.rb
133
+ - lib/pdf/reader.rb
130
134
  - lib/pdf-reader.rb
131
135
  - Rakefile
132
136
  - README.rdoc
@@ -140,7 +144,7 @@ has_rdoc: true
140
144
  homepage: http://github.com/yob/pdf-reader
141
145
  licenses: []
142
146
 
143
- post_install_message:
147
+ post_install_message: "\n ********************************************\n\n This is an alpha release of PDF::Reader to gather feedback on the proposed\n API changes.\n\n The old API is marked as deprecated but will continue to work with no\n visible warnings for now.\n\n The new API is documented in the README and in rdoc for the PDF::Reader,\n PDF::Reader::Page and PDF::Reader::ObjectHash classes.\n\n Do not use this in production, stick to stable releases for that. If you do\n take the new API for a spin, please send any feedback my way.\n\n ********************************************\n\n"
144
148
  rdoc_options:
145
149
  - --title
146
150
  - PDF::Reader Documentation
@@ -162,11 +166,13 @@ required_ruby_version: !ruby/object:Gem::Requirement
162
166
  required_rubygems_version: !ruby/object:Gem::Requirement
163
167
  none: false
164
168
  requirements:
165
- - - ">="
169
+ - - ">"
166
170
  - !ruby/object:Gem::Version
167
171
  segments:
168
- - 0
169
- version: "0"
172
+ - 1
173
+ - 3
174
+ - 1
175
+ version: 1.3.1
170
176
  requirements: []
171
177
 
172
178
  rubyforge_project: