pdf-reader 1.4.1 → 2.0.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +8 -3
- data/{README.rdoc → README.md} +40 -23
- data/Rakefile +2 -2
- data/bin/pdf_object +4 -1
- data/lib/pdf/reader.rb +7 -112
- data/lib/pdf/reader/buffer.rb +2 -1
- data/lib/pdf/reader/cmap.rb +26 -24
- data/lib/pdf/reader/encoding.rb +4 -5
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/filter/run_length.rb +1 -5
- data/lib/pdf/reader/font.rb +1 -11
- data/lib/pdf/reader/glyph_hash.rb +6 -2
- data/lib/pdf/reader/lzw.rb +1 -1
- data/lib/pdf/reader/object_hash.rb +35 -16
- data/lib/pdf/reader/page_layout.rb +6 -17
- data/lib/pdf/reader/pages_strategy.rb +1 -304
- data/lib/pdf/reader/parser.rb +6 -4
- data/lib/pdf/reader/standard_security_handler.rb +18 -14
- data/lib/pdf/reader/text_run.rb +3 -9
- metadata +14 -47
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -265
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4ea96ce79d9f4cc65a0a026ea7e50da7b33cd19
|
4
|
+
data.tar.gz: e2302c2d18cdc64cd81654f30658b5cf1e8ae3c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 31da13f8b8e38dffbb19a33855beb1c85a14c2633137fd8e1957db14f3bac16a434c174e79bab6092c72954e6a6f87f0ab585562e79d39d5ede8f9398fd63f7b
|
7
|
+
data.tar.gz: e097e8aed8bbeb918676bacded748a538e109fdcf1e8740cfe1ce3d63105d891cbe2a90ad228bb87fb494cd796bd7e1c66138592ff21d62c97b87fa270eb0899
|
data/CHANGELOG
CHANGED
@@ -1,5 +1,10 @@
|
|
1
|
+
v2.0.0.beta1 (15th February 2017)
|
2
|
+
- BREAKING CHANGE: remove all methods that were deprecated in 1.0.0
|
3
|
+
- Bug: Support extra encrypted PDF variants (thanks to Gyuchang Jun)
|
4
|
+
- various bug fixes
|
5
|
+
|
1
6
|
v1.4.1 (2nd January 2017)
|
2
|
-
- improve
|
7
|
+
- improve compatibility with ruby 2.4 (thanks Akira Matsuda)
|
3
8
|
- various bug fixes
|
4
9
|
|
5
10
|
v1.4.0 (22nd February 2016)
|
@@ -91,10 +96,10 @@ v0.9.2 (24th April 2011)
|
|
91
96
|
|
92
97
|
v0.9.1 (21st December 2010)
|
93
98
|
- force gem to only install on ruby 1.8.7 or higher
|
94
|
-
- maintaining
|
99
|
+
- maintaining support for earlier versions takes more time than I have
|
95
100
|
available at the moment
|
96
101
|
- bug: fix parsing of obscure pdf name format
|
97
|
-
- bug: fix behaviour when loaded in
|
102
|
+
- bug: fix behaviour when loaded in conjunction with htmldoc gem
|
98
103
|
|
99
104
|
v0.9.0 (19th November 2010)
|
100
105
|
- support for pdf 1.5+ files that use object and xref streams
|
data/{README.rdoc → README.md}
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
# Release Notes
|
2
2
|
|
3
3
|
The PDF::Reader library implements a PDF parser conforming as much as possible
|
4
4
|
to the PDF specification from Adobe.
|
@@ -15,46 +15,55 @@ higher level functionality - it's not going to render a PDF for you. There are
|
|
15
15
|
a few exceptions to support very common use cases like extracting text from a
|
16
16
|
page.
|
17
17
|
|
18
|
-
|
18
|
+
# Installation
|
19
19
|
|
20
20
|
The recommended installation method is via Rubygems.
|
21
21
|
|
22
|
+
```ruby
|
22
23
|
gem install pdf-reader
|
24
|
+
```
|
23
25
|
|
24
|
-
|
26
|
+
# Usage
|
25
27
|
|
26
28
|
Begin by creating a PDF::Reader instance that points to a PDF file. Document
|
27
29
|
level information (metadata, page count, bookmarks, etc) is available via
|
28
30
|
this object.
|
29
31
|
|
32
|
+
```ruby
|
30
33
|
reader = PDF::Reader.new("somefile.pdf")
|
31
34
|
|
32
35
|
puts reader.pdf_version
|
33
36
|
puts reader.info
|
34
37
|
puts reader.metadata
|
35
38
|
puts reader.page_count
|
39
|
+
```
|
36
40
|
|
37
41
|
PDF::Reader.new accepts an IO stream or a filename. Here's an example with
|
38
42
|
an IO stream:
|
39
43
|
|
44
|
+
```ruby
|
40
45
|
require 'open-uri'
|
41
46
|
|
42
47
|
io = open('http://example.com/somefile.pdf')
|
43
48
|
reader = PDF::Reader.new(io)
|
44
49
|
puts reader.info
|
50
|
+
```
|
45
51
|
|
46
52
|
If you open a PDF with File#open or IO#open, I strongly recommend using "rb"
|
47
53
|
mode to ensure the file isn't mangled by ruby being 'helpful'. This is
|
48
54
|
particularly important on windows and MRI >= 1.9.2.
|
49
55
|
|
56
|
+
```ruby
|
50
57
|
File.open("somefile.pdf", "rb") do |io|
|
51
58
|
reader = PDF::Reader.new(io)
|
52
59
|
puts reader.info
|
53
60
|
end
|
61
|
+
```
|
54
62
|
|
55
63
|
PDF is a page based file format, so most visible information is available via
|
56
64
|
page-based iteration
|
57
65
|
|
66
|
+
```ruby
|
58
67
|
reader = PDF::Reader.new("somefile.pdf")
|
59
68
|
|
60
69
|
reader.pages.each do |page|
|
@@ -62,10 +71,12 @@ page-based iteration
|
|
62
71
|
puts page.text
|
63
72
|
puts page.raw_content
|
64
73
|
end
|
74
|
+
```
|
65
75
|
|
66
76
|
If you need to access the full program for rendering a page, use the walk() method
|
67
77
|
of PDF::Reader::Page.
|
68
78
|
|
79
|
+
```ruby
|
69
80
|
class RedGreenBlue
|
70
81
|
def set_rgb_color_for_nonstroking(r, g, b)
|
71
82
|
puts "R: #{r}, G: #{g}, B: #{b}"
|
@@ -76,31 +87,32 @@ of PDF::Reader::Page.
|
|
76
87
|
page = reader.page(1)
|
77
88
|
receiver = RedGreenBlue.new
|
78
89
|
page.walk(receiver)
|
90
|
+
```
|
79
91
|
|
80
92
|
For low level access to the objects in a PDF file, use the ObjectHash class like
|
81
93
|
so:
|
82
94
|
|
95
|
+
```ruby
|
83
96
|
reader = PDF::Reader.new("somefile.pdf")
|
84
97
|
puts reader.objects.inspect
|
98
|
+
```
|
85
99
|
|
86
|
-
|
100
|
+
# Text Encoding
|
87
101
|
|
88
102
|
Regardless of the internal encoding used in the PDF all text will be converted
|
89
103
|
to UTF-8 before it is passed back from PDF::Reader.
|
90
104
|
|
91
|
-
Strings that contain binary data (like font blobs) will be marked as such
|
92
|
-
M17N aware VMs.
|
105
|
+
Strings that contain binary data (like font blobs) will be marked as such.
|
93
106
|
|
94
|
-
|
107
|
+
# Former API
|
95
108
|
|
96
109
|
Version 1.0.0 of PDF::Reader introduced a new page-based API that provides
|
97
110
|
efficient and easy access to any page.
|
98
111
|
|
99
|
-
The
|
100
|
-
|
101
|
-
warnings before it is completely removed in version 2.0.0.
|
112
|
+
The pre-1.0 API was deprecated during the 1.x release series, and has been
|
113
|
+
removed from 2.0.0.
|
102
114
|
|
103
|
-
|
115
|
+
# Exceptions
|
104
116
|
|
105
117
|
There are two key exceptions that you will need to watch out for when processing a
|
106
118
|
PDF file:
|
@@ -120,7 +132,7 @@ don't, 'rescue MalformedPDFError' will catch all the subclassed errors as well.
|
|
120
132
|
Any other exceptions should be considered bugs in either PDF::Reader (please
|
121
133
|
report it!).
|
122
134
|
|
123
|
-
|
135
|
+
# PDF Integrity
|
124
136
|
|
125
137
|
Windows developers may run into problems when running specs due to MalformedPDFError's
|
126
138
|
This is usually because CRLF characters are automatically added to some of the PDF's in
|
@@ -128,18 +140,20 @@ the spec folder when you checkout a branch from Git.
|
|
128
140
|
|
129
141
|
To remove any invalid CRLF characters added while checking out a branch from Git, run:
|
130
142
|
|
143
|
+
```ruby
|
131
144
|
rake fix_integrity
|
145
|
+
```
|
132
146
|
|
133
|
-
|
147
|
+
# Maintainers
|
134
148
|
|
135
|
-
|
149
|
+
* James Healy <mailto:jimmy@deefa.com>
|
136
150
|
|
137
|
-
|
151
|
+
# Licensing
|
138
152
|
|
139
153
|
This library is distributed under the terms of the MIT License. See the included file for
|
140
154
|
more detail.
|
141
155
|
|
142
|
-
|
156
|
+
# Mailing List
|
143
157
|
|
144
158
|
Any questions or feedback should be sent to the PDF::Reader google group. It's
|
145
159
|
better that any answers be available for others instead of hiding in someone's
|
@@ -147,20 +161,23 @@ inbox.
|
|
147
161
|
|
148
162
|
http://groups.google.com/group/pdf-reader
|
149
163
|
|
150
|
-
|
164
|
+
# Examples
|
151
165
|
|
152
166
|
The easiest way to explain how this works in practice is to show some examples.
|
153
167
|
Check out the examples/ directory for a few files.
|
154
168
|
|
155
|
-
|
169
|
+
# Known Limitations
|
156
170
|
|
157
171
|
Occasionally some text cannot be extracted properly due to the way it has been
|
158
172
|
stored, or the use of invalid bytes. In these cases PDF::Reader will output a
|
159
173
|
little UTF-8 friendly box to indicate an unrecognisable character.
|
160
174
|
|
161
|
-
|
175
|
+
# Resources
|
162
176
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
177
|
+
* PDF::Reader Code Repository: http://github.com/yob/pdf-reader
|
178
|
+
|
179
|
+
* PDF Specification: http://www.adobe.com/devnet/pdf/pdf_reference.html
|
180
|
+
|
181
|
+
* PDF Tutorial Slide Presentations: http://home.comcast.net/~jk05/presentations/PDFTutorials.html
|
182
|
+
|
183
|
+
* Developing with PDF (book): http://shop.oreilly.com/product/0636920025269.do
|
data/Rakefile
CHANGED
@@ -14,7 +14,7 @@ desc "Run cane to check quality metrics"
|
|
14
14
|
Cane::RakeTask.new(:quality) do |cane|
|
15
15
|
cane.abc_max = 20
|
16
16
|
cane.style_measure = 100
|
17
|
-
cane.max_violations =
|
17
|
+
cane.max_violations = 31
|
18
18
|
|
19
19
|
cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
|
20
20
|
end
|
@@ -41,7 +41,7 @@ end
|
|
41
41
|
desc "Create a YAML file of integrity info for PDFs in the spec suite"
|
42
42
|
task :integrity_yaml do
|
43
43
|
data = {}
|
44
|
-
Dir.glob("spec/data/**/*.*").each do |path|
|
44
|
+
Dir.glob("spec/data/**/*.*").sort.each do |path|
|
45
45
|
path_without_spec = path.gsub("spec/","")
|
46
46
|
data[path_without_spec] = {
|
47
47
|
:bytes => File.size(path),
|
data/bin/pdf_object
CHANGED
@@ -25,7 +25,10 @@ gen = gen.to_i
|
|
25
25
|
|
26
26
|
# make magic happen
|
27
27
|
begin
|
28
|
-
obj =
|
28
|
+
obj = nil
|
29
|
+
PDF::Reader.open(filename) do |pdf|
|
30
|
+
obj = pdf.objects[PDF::Reader::Reference.new(id, gen)]
|
31
|
+
end
|
29
32
|
|
30
33
|
case obj
|
31
34
|
when Hash, Array
|
data/lib/pdf/reader.rb
CHANGED
@@ -110,16 +110,10 @@ module PDF
|
|
110
110
|
#
|
111
111
|
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
112
112
|
#
|
113
|
-
def initialize(input
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
118
|
-
else
|
119
|
-
msg = "Calling PDF::Reader#new with no arguments is deprecated and will be removed "
|
120
|
-
msg += "in the 2.0 release"
|
121
|
-
$stderr.puts(msg)
|
122
|
-
end
|
113
|
+
def initialize(input, opts = {})
|
114
|
+
@cache = PDF::Reader::ObjectCache.new
|
115
|
+
opts.merge!(:cache => @cache)
|
116
|
+
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
123
117
|
end
|
124
118
|
|
125
119
|
def info
|
@@ -133,7 +127,7 @@ module PDF
|
|
133
127
|
nil
|
134
128
|
else
|
135
129
|
xml = stream.unfiltered_data
|
136
|
-
xml.force_encoding("utf-8")
|
130
|
+
xml.force_encoding("utf-8")
|
137
131
|
xml
|
138
132
|
end
|
139
133
|
end
|
@@ -164,61 +158,6 @@ module PDF
|
|
164
158
|
yield PDF::Reader.new(input, opts)
|
165
159
|
end
|
166
160
|
|
167
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
168
|
-
# eventually be removed
|
169
|
-
#
|
170
|
-
#
|
171
|
-
# Parse the file with the given name, sending events to the given receiver.
|
172
|
-
#
|
173
|
-
def self.file(name, receivers, opts = {})
|
174
|
-
msg = "PDF::Reader#file is deprecated and will be removed in the 2.0 release"
|
175
|
-
$stderr.puts(msg)
|
176
|
-
File.open(name,"rb") do |f|
|
177
|
-
new.parse(f, receivers, opts)
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
182
|
-
# eventually be removed
|
183
|
-
#
|
184
|
-
# Parse the given string, sending events to the given receiver.
|
185
|
-
#
|
186
|
-
def self.string(str, receivers, opts = {})
|
187
|
-
msg = "PDF::Reader#string is deprecated and will be removed in the 2.0 release"
|
188
|
-
$stderr.puts(msg)
|
189
|
-
StringIO.open(str) do |s|
|
190
|
-
new.parse(s, receivers, opts)
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
195
|
-
# eventually be removed
|
196
|
-
#
|
197
|
-
# Parse the file with the given name, returning an unmarshalled ruby version of
|
198
|
-
# represents the requested pdf object
|
199
|
-
#
|
200
|
-
def self.object_file(name, id, gen = 0)
|
201
|
-
msg = "PDF::Reader#object_file is deprecated and will be removed in the 2.0 release"
|
202
|
-
$stderr.puts(msg)
|
203
|
-
File.open(name,"rb") { |f|
|
204
|
-
new.object(f, id.to_i, gen.to_i)
|
205
|
-
}
|
206
|
-
end
|
207
|
-
|
208
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
209
|
-
# eventually be removed
|
210
|
-
#
|
211
|
-
# Parse the given string, returning an unmarshalled ruby version of represents
|
212
|
-
# the requested pdf object
|
213
|
-
#
|
214
|
-
def self.object_string(str, id, gen = 0)
|
215
|
-
msg = "PDF::Reader#object_string is deprecated and will be removed in the 2.0 release"
|
216
|
-
$stderr.puts(msg)
|
217
|
-
StringIO.open(str) { |s|
|
218
|
-
new.object(s, id.to_i, gen.to_i)
|
219
|
-
}
|
220
|
-
end
|
221
|
-
|
222
161
|
# returns an array of PDF::Reader::Page objects, one for each
|
223
162
|
# page in the source PDF.
|
224
163
|
#
|
@@ -259,40 +198,6 @@ module PDF
|
|
259
198
|
PDF::Reader::Page.new(@objects, num, :cache => @cache)
|
260
199
|
end
|
261
200
|
|
262
|
-
|
263
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
264
|
-
# eventually be removed
|
265
|
-
#
|
266
|
-
# Given an IO object that contains PDF data, parse it.
|
267
|
-
#
|
268
|
-
def parse(io, receivers, opts = {})
|
269
|
-
msg = "PDF::Reader#parse is deprecated and will be removed in the 2.0 release"
|
270
|
-
$stderr.puts(msg)
|
271
|
-
ohash = ObjectHash.new(io)
|
272
|
-
|
273
|
-
options = {:pages => true, :raw_text => false, :metadata => true}
|
274
|
-
options.merge!(opts)
|
275
|
-
|
276
|
-
strategies.each do |s|
|
277
|
-
s.new(ohash, receivers, options).process
|
278
|
-
end
|
279
|
-
|
280
|
-
self
|
281
|
-
end
|
282
|
-
|
283
|
-
# DEPRECATED: this method was deprecated in version 1.0.0 and will
|
284
|
-
# eventually be removed
|
285
|
-
#
|
286
|
-
# Given an IO object that contains PDF data, return the contents of a single object
|
287
|
-
#
|
288
|
-
def object(io, id, gen)
|
289
|
-
msg = "PDF::Reader#object is deprecated and will be removed in the 2.0 release"
|
290
|
-
$stderr.puts(msg)
|
291
|
-
@objects = ObjectHash.new(io)
|
292
|
-
|
293
|
-
@objects.deref(Reference.new(id, gen))
|
294
|
-
end
|
295
|
-
|
296
201
|
private
|
297
202
|
|
298
203
|
# recursively convert strings from outside a content stream into UTF-8
|
@@ -321,7 +226,7 @@ module PDF
|
|
321
226
|
# TODO find a PDF I can use to spec this behaviour
|
322
227
|
#
|
323
228
|
def pdfdoc_to_utf8(obj)
|
324
|
-
obj.force_encoding("utf-8")
|
229
|
+
obj.force_encoding("utf-8")
|
325
230
|
obj
|
326
231
|
end
|
327
232
|
|
@@ -331,17 +236,10 @@ module PDF
|
|
331
236
|
def utf16_to_utf8(obj)
|
332
237
|
str = obj[2, obj.size]
|
333
238
|
str = str.unpack("n*").pack("U*")
|
334
|
-
str.force_encoding("utf-8")
|
239
|
+
str.force_encoding("utf-8")
|
335
240
|
str
|
336
241
|
end
|
337
242
|
|
338
|
-
def strategies
|
339
|
-
@strategies ||= [
|
340
|
-
::PDF::Reader::MetadataStrategy,
|
341
|
-
::PDF::Reader::PagesStrategy
|
342
|
-
]
|
343
|
-
end
|
344
|
-
|
345
243
|
def root
|
346
244
|
@root ||= @objects.deref(@objects.trailer[:Root])
|
347
245
|
end
|
@@ -351,7 +249,6 @@ end
|
|
351
249
|
################################################################################
|
352
250
|
|
353
251
|
require 'pdf/reader/resource_methods'
|
354
|
-
require 'pdf/reader/abstract_strategy'
|
355
252
|
require 'pdf/reader/buffer'
|
356
253
|
require 'pdf/reader/cid_widths'
|
357
254
|
require 'pdf/reader/cmap'
|
@@ -370,7 +267,6 @@ require 'pdf/reader/font_descriptor'
|
|
370
267
|
require 'pdf/reader/form_xobject'
|
371
268
|
require 'pdf/reader/glyph_hash'
|
372
269
|
require 'pdf/reader/lzw'
|
373
|
-
require 'pdf/reader/metadata_strategy'
|
374
270
|
require 'pdf/reader/object_cache'
|
375
271
|
require 'pdf/reader/object_hash'
|
376
272
|
require 'pdf/reader/object_stream'
|
@@ -381,7 +277,6 @@ require 'pdf/reader/reference'
|
|
381
277
|
require 'pdf/reader/register_receiver'
|
382
278
|
require 'pdf/reader/standard_security_handler'
|
383
279
|
require 'pdf/reader/stream'
|
384
|
-
require 'pdf/reader/text_receiver'
|
385
280
|
require 'pdf/reader/text_run'
|
386
281
|
require 'pdf/reader/page_state'
|
387
282
|
require 'pdf/reader/page_text_receiver'
|
data/lib/pdf/reader/buffer.rb
CHANGED
@@ -37,6 +37,7 @@ class PDF::Reader
|
|
37
37
|
#
|
38
38
|
class Buffer
|
39
39
|
TOKEN_WHITESPACE=[0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20]
|
40
|
+
TOKEN_DELIMITER=[0x25, 0x3C, 0x3E, 0x28, 0x5B, 0x7B, 0x29, 0x5D, 0x7D, 0x2F]
|
40
41
|
|
41
42
|
# some strings for comparissons. Declaring them here avoids creating new
|
42
43
|
# strings that need GC over and over
|
@@ -366,7 +367,7 @@ class PDF::Reader
|
|
366
367
|
# PDF name, start of new token
|
367
368
|
@tokens << tok if tok.size > 0
|
368
369
|
@tokens << byte.chr
|
369
|
-
@tokens << "" if byte == 0x2F && [nil, 0x20, 0x0A].include?(peek_byte)
|
370
|
+
@tokens << "" if byte == 0x2F && ([nil, 0x20, 0x0A] + TOKEN_DELIMITER).include?(peek_byte)
|
370
371
|
tok = ""
|
371
372
|
break
|
372
373
|
else
|
data/lib/pdf/reader/cmap.rb
CHANGED
@@ -31,6 +31,17 @@ class PDF::Reader
|
|
31
31
|
# extracting various useful information.
|
32
32
|
#
|
33
33
|
class CMap # :nodoc:
|
34
|
+
CMAP_KEYWORDS = {
|
35
|
+
"begincodespacerange" => 1,
|
36
|
+
"endcodespacerange" => 1,
|
37
|
+
"beginbfchar" => 1,
|
38
|
+
"endbfchar" => 1,
|
39
|
+
"beginbfrange" => 1,
|
40
|
+
"endbfrange" => 1,
|
41
|
+
"begin" => 1,
|
42
|
+
"begincmap" => 1,
|
43
|
+
"def" => 1
|
44
|
+
}
|
34
45
|
|
35
46
|
attr_reader :map
|
36
47
|
|
@@ -40,24 +51,25 @@ class PDF::Reader
|
|
40
51
|
end
|
41
52
|
|
42
53
|
def process_data(data)
|
54
|
+
parser = build_parser(data)
|
43
55
|
mode = nil
|
44
|
-
instructions =
|
56
|
+
instructions = []
|
45
57
|
|
46
|
-
|
47
|
-
if
|
58
|
+
while token = parser.parse_token(CMAP_KEYWORDS)
|
59
|
+
if token == "beginbfchar"
|
48
60
|
mode = :char
|
49
|
-
elsif
|
61
|
+
elsif token == "endbfchar"
|
50
62
|
process_bfchar_instructions(instructions)
|
51
|
-
instructions =
|
63
|
+
instructions = []
|
52
64
|
mode = nil
|
53
|
-
elsif
|
65
|
+
elsif token == "beginbfrange"
|
54
66
|
mode = :range
|
55
|
-
elsif
|
67
|
+
elsif token == "endbfrange"
|
56
68
|
process_bfrange_instructions(instructions)
|
57
|
-
instructions =
|
69
|
+
instructions = []
|
58
70
|
mode = nil
|
59
71
|
elsif mode == :char || mode == :range
|
60
|
-
instructions <<
|
72
|
+
instructions << token
|
61
73
|
end
|
62
74
|
end
|
63
75
|
end
|
@@ -105,22 +117,15 @@ class PDF::Reader
|
|
105
117
|
end
|
106
118
|
|
107
119
|
def process_bfchar_instructions(instructions)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
@map[find[0]] = replace
|
113
|
-
find = str_to_int(parser.parse_token)
|
114
|
-
replace = str_to_int(parser.parse_token)
|
120
|
+
instructions.each_slice(2) do |one, two|
|
121
|
+
find = str_to_int(one)
|
122
|
+
replace = str_to_int(two)
|
123
|
+
@map[find.first] = replace
|
115
124
|
end
|
116
125
|
end
|
117
126
|
|
118
127
|
def process_bfrange_instructions(instructions)
|
119
|
-
|
120
|
-
start = parser.parse_token
|
121
|
-
finish = parser.parse_token
|
122
|
-
to = parser.parse_token
|
123
|
-
while start && finish && to
|
128
|
+
instructions.each_slice(3) do |start, finish, to|
|
124
129
|
if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
|
125
130
|
bfrange_type_one(start, finish, to)
|
126
131
|
elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
|
@@ -128,9 +133,6 @@ class PDF::Reader
|
|
128
133
|
else
|
129
134
|
raise "invalid bfrange section"
|
130
135
|
end
|
131
|
-
start = parser.parse_token
|
132
|
-
finish = parser.parse_token
|
133
|
-
to = parser.parse_token
|
134
136
|
end
|
135
137
|
end
|
136
138
|
|