pdf-reader 0.9.3 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/lib/pdf/reader.rb +17 -6
- data/lib/pdf/reader/abstract_strategy.rb +10 -8
- metadata +8 -7
data/CHANGELOG
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
v0.9.4 (XXX)
|
2
|
+
- support multiple receivers within a single pass over a source file
|
3
|
+
- massive time saving when dealing with multiple receivers
|
4
|
+
|
1
5
|
v0.9.3 (2nd July 2011)
|
2
6
|
- add PDF::Reader::Reference#hash method
|
3
7
|
- improves behaviour of Reference objects when tehy're used as Hash keys
|
data/lib/pdf/reader.rb
CHANGED
@@ -73,21 +73,32 @@ module PDF
|
|
73
73
|
# :pages
|
74
74
|
# :raw_text
|
75
75
|
#
|
76
|
+
# = Processing with multiple receivers
|
77
|
+
#
|
78
|
+
# If you wish to parse a PDF file with multiple simultaneous receivers, just
|
79
|
+
# pass an array of receivers as the second argument:
|
80
|
+
#
|
81
|
+
# pdf = PDF::Reader.new
|
82
|
+
# pdf.parse(File.new("somefile.pdf"), [receiver_one, receiever_two])
|
83
|
+
#
|
84
|
+
# This saves a significant amount of time by limiting the work to a single pass
|
85
|
+
# over the source file.
|
86
|
+
#
|
76
87
|
class Reader
|
77
88
|
|
78
89
|
# Parse the file with the given name, sending events to the given receiver.
|
79
90
|
#
|
80
|
-
def self.file(name,
|
91
|
+
def self.file(name, receivers, opts = {})
|
81
92
|
File.open(name,"rb") do |f|
|
82
|
-
new.parse(f,
|
93
|
+
new.parse(f, receivers, opts)
|
83
94
|
end
|
84
95
|
end
|
85
96
|
|
86
97
|
# Parse the given string, sending events to the given receiver.
|
87
98
|
#
|
88
|
-
def self.string(str,
|
99
|
+
def self.string(str, receivers, opts = {})
|
89
100
|
StringIO.open(str) do |s|
|
90
|
-
new.parse(s,
|
101
|
+
new.parse(s, receivers, opts)
|
91
102
|
end
|
92
103
|
end
|
93
104
|
|
@@ -111,7 +122,7 @@ module PDF
|
|
111
122
|
|
112
123
|
# Given an IO object that contains PDF data, parse it.
|
113
124
|
#
|
114
|
-
def parse(io,
|
125
|
+
def parse(io, receivers, opts = {})
|
115
126
|
ohash = ObjectHash.new(io)
|
116
127
|
|
117
128
|
if ohash.trailer[:Encrypt]
|
@@ -122,7 +133,7 @@ module PDF
|
|
122
133
|
options.merge!(opts)
|
123
134
|
|
124
135
|
strategies.each do |s|
|
125
|
-
s.new(ohash,
|
136
|
+
s.new(ohash, receivers, options).process
|
126
137
|
end
|
127
138
|
|
128
139
|
self
|
@@ -4,8 +4,13 @@ class PDF::Reader
|
|
4
4
|
|
5
5
|
class AbstractStrategy # :nodoc:
|
6
6
|
|
7
|
-
def initialize(ohash,
|
8
|
-
@ohash, @
|
7
|
+
def initialize(ohash, receivers, options = {})
|
8
|
+
@ohash, @options = ohash, options
|
9
|
+
if receivers.is_a?(Array)
|
10
|
+
@receivers = receivers
|
11
|
+
else
|
12
|
+
@receivers = [receivers]
|
13
|
+
end
|
9
14
|
end
|
10
15
|
|
11
16
|
private
|
@@ -17,7 +22,9 @@ class PDF::Reader
|
|
17
22
|
# calls the name callback method on the receiver class with params as the arguments
|
18
23
|
#
|
19
24
|
def callback (name, params=[])
|
20
|
-
|
25
|
+
@receivers.each do |receiver|
|
26
|
+
receiver.send(name, *params) if receiver.respond_to?(name)
|
27
|
+
end
|
21
28
|
end
|
22
29
|
|
23
30
|
# strings outside of page content should be in either PDFDocEncoding or UTF-16.
|
@@ -56,10 +63,6 @@ class PDF::Reader
|
|
56
63
|
pages ? true : false
|
57
64
|
end
|
58
65
|
|
59
|
-
def receiver
|
60
|
-
@receiver
|
61
|
-
end
|
62
|
-
|
63
66
|
def root
|
64
67
|
ohash.object(trailer[:Root])
|
65
68
|
end
|
@@ -74,4 +77,3 @@ class PDF::Reader
|
|
74
77
|
|
75
78
|
end
|
76
79
|
end
|
77
|
-
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 10
|
8
|
+
- 0
|
9
|
+
version: 0.10.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- James Healy
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-07-
|
17
|
+
date: 2011-07-06 00:00:00 +10:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -63,12 +63,13 @@ dependencies:
|
|
63
63
|
requirement: &id004 !ruby/object:Gem::Requirement
|
64
64
|
none: false
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
segments:
|
69
|
+
- 1
|
70
|
+
- 0
|
69
71
|
- 0
|
70
|
-
|
71
|
-
version: "0.9"
|
72
|
+
version: 1.0.0
|
72
73
|
type: :runtime
|
73
74
|
version_requirements: *id004
|
74
75
|
description: The PDF::Reader library implements a PDF parser conforming as much as possible to the PDF specification from Adobe
|