pdf-reader 0.8.6 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +17 -0
- data/README.rdoc +7 -15
- data/Rakefile +10 -63
- data/TODO +6 -8
- data/bin/pdf_object +3 -0
- data/bin/pdf_text +4 -2
- data/examples/extract_images.rb +108 -0
- data/examples/hash.rb +1 -1
- data/examples/text.rb +3 -0
- data/lib/pdf/hash.rb +8 -225
- data/lib/pdf/reader.rb +79 -55
- data/lib/pdf/reader/abstract_strategy.rb +77 -0
- data/lib/pdf/reader/buffer.rb +61 -40
- data/lib/pdf/reader/cmap.rb +11 -10
- data/lib/pdf/reader/encoding.rb +85 -79
- data/lib/pdf/reader/error.rb +1 -2
- data/lib/pdf/reader/filter.rb +109 -6
- data/lib/pdf/reader/font.rb +11 -11
- data/lib/pdf/reader/lzw.rb +123 -0
- data/lib/pdf/reader/metadata_strategy.rb +53 -0
- data/lib/pdf/reader/object_hash.rb +275 -0
- data/lib/pdf/reader/object_stream.rb +51 -0
- data/lib/pdf/reader/{content.rb → pages_strategy.rb} +63 -100
- data/lib/pdf/reader/parser.rb +74 -37
- data/lib/pdf/reader/print_receiver.rb +0 -1
- data/lib/pdf/reader/register_receiver.rb +21 -0
- data/lib/pdf/reader/stream.rb +5 -1
- data/lib/pdf/reader/text_receiver.rb +3 -1
- data/lib/pdf/reader/token.rb +1 -1
- data/lib/pdf/reader/xref.rb +126 -64
- metadata +61 -13
- data/lib/pdf/reader/explore.rb +0 -116
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 59
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 9
|
9
|
+
- 0
|
10
|
+
version: 0.9.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- James Healy
|
@@ -15,13 +15,56 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-11-19 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
22
|
+
name: rake
|
23
23
|
prerelease: false
|
24
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: roodi
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: rspec
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ~>
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 1
|
58
|
+
segments:
|
59
|
+
- 2
|
60
|
+
- 1
|
61
|
+
version: "2.1"
|
62
|
+
type: :development
|
63
|
+
version_requirements: *id003
|
64
|
+
- !ruby/object:Gem::Dependency
|
65
|
+
name: Ascii85
|
66
|
+
prerelease: false
|
67
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
25
68
|
none: false
|
26
69
|
requirements:
|
27
70
|
- - ">="
|
@@ -32,7 +75,7 @@ dependencies:
|
|
32
75
|
- 9
|
33
76
|
version: "0.9"
|
34
77
|
type: :runtime
|
35
|
-
version_requirements: *
|
78
|
+
version_requirements: *id004
|
36
79
|
description: The PDF::Reader library implements a PDF parser conforming as much as possible to the PDF specification from Adobe
|
37
80
|
email: jimmy@deefa.com
|
38
81
|
executables:
|
@@ -56,22 +99,27 @@ files:
|
|
56
99
|
- examples/text.rb
|
57
100
|
- examples/version.rb
|
58
101
|
- examples/page_counter_improved.rb
|
102
|
+
- examples/extract_images.rb
|
59
103
|
- lib/pdf/reader/glyphlist.txt
|
60
|
-
- lib/pdf/reader/content.rb
|
61
104
|
- lib/pdf/reader/error.rb
|
62
105
|
- lib/pdf/reader/font.rb
|
106
|
+
- lib/pdf/reader/lzw.rb
|
63
107
|
- lib/pdf/reader/print_receiver.rb
|
64
108
|
- lib/pdf/reader/reference.rb
|
65
109
|
- lib/pdf/reader/filter.rb
|
66
110
|
- lib/pdf/reader/text_receiver.rb
|
111
|
+
- lib/pdf/reader/pages_strategy.rb
|
112
|
+
- lib/pdf/reader/abstract_strategy.rb
|
67
113
|
- lib/pdf/reader/encoding.rb
|
68
114
|
- lib/pdf/reader/stream.rb
|
69
115
|
- lib/pdf/reader/register_receiver.rb
|
116
|
+
- lib/pdf/reader/object_hash.rb
|
70
117
|
- lib/pdf/reader/token.rb
|
71
118
|
- lib/pdf/reader/xref.rb
|
72
119
|
- lib/pdf/reader/cmap.rb
|
120
|
+
- lib/pdf/reader/object_stream.rb
|
121
|
+
- lib/pdf/reader/metadata_strategy.rb
|
73
122
|
- lib/pdf/reader/buffer.rb
|
74
|
-
- lib/pdf/reader/explore.rb
|
75
123
|
- lib/pdf/reader/encodings/zapf_dingbats.txt
|
76
124
|
- lib/pdf/reader/encodings/standard.txt
|
77
125
|
- lib/pdf/reader/encodings/mac_roman.txt
|
@@ -84,13 +132,13 @@ files:
|
|
84
132
|
- lib/pdf/reader.rb
|
85
133
|
- lib/pdf-reader.rb
|
86
134
|
- Rakefile
|
87
|
-
- bin/pdf_object
|
88
|
-
- bin/pdf_text
|
89
|
-
- bin/pdf_list_callbacks
|
90
135
|
- README.rdoc
|
91
136
|
- TODO
|
92
137
|
- CHANGELOG
|
93
138
|
- MIT-LICENSE
|
139
|
+
- bin/pdf_object
|
140
|
+
- bin/pdf_text
|
141
|
+
- bin/pdf_list_callbacks
|
94
142
|
has_rdoc: true
|
95
143
|
homepage: http://github.com/yob/pdf-reader
|
96
144
|
licenses: []
|
@@ -124,7 +172,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
124
172
|
version: "0"
|
125
173
|
requirements: []
|
126
174
|
|
127
|
-
rubyforge_project:
|
175
|
+
rubyforge_project:
|
128
176
|
rubygems_version: 1.3.7
|
129
177
|
signing_key:
|
130
178
|
specification_version: 3
|
data/lib/pdf/reader/explore.rb
DELETED
@@ -1,116 +0,0 @@
|
|
1
|
-
################################################################################
|
2
|
-
#
|
3
|
-
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
-
#
|
5
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
-
# a copy of this software and associated documentation files (the
|
7
|
-
# "Software"), to deal in the Software without restriction, including
|
8
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
-
# the following conditions:
|
12
|
-
#
|
13
|
-
# The above copyright notice and this permission notice shall be
|
14
|
-
# included in all copies or substantial portions of the Software.
|
15
|
-
#
|
16
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
-
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
-
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
-
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
-
#
|
24
|
-
################################################################################
|
25
|
-
require 'pathname'
|
26
|
-
|
27
|
-
|
28
|
-
class PDF::Reader
|
29
|
-
################################################################################
|
30
|
-
class Explore
|
31
|
-
################################################################################
|
32
|
-
def self.file (name)
|
33
|
-
PDF::Reader.new.parse(File.open(name), self)
|
34
|
-
end
|
35
|
-
################################################################################
|
36
|
-
def initialize (receiver, xref)
|
37
|
-
@xref = xref
|
38
|
-
@pwd = '/'
|
39
|
-
end
|
40
|
-
################################################################################
|
41
|
-
def document (root)
|
42
|
-
@root = root
|
43
|
-
self
|
44
|
-
end
|
45
|
-
################################################################################
|
46
|
-
def output_parent (obj)
|
47
|
-
case obj
|
48
|
-
when Hash
|
49
|
-
obj.each do |k,v|
|
50
|
-
print "#{k}"; output_child(v); print "\n"
|
51
|
-
Explore::const_set(k, k) if !Explore.const_defined?(k)
|
52
|
-
end
|
53
|
-
when Array
|
54
|
-
obj.each_with_index {|o, i| print "#{i}: "; output_child(o); print "\n"}
|
55
|
-
else
|
56
|
-
output_child(obj)
|
57
|
-
print "\n"
|
58
|
-
end
|
59
|
-
end
|
60
|
-
################################################################################
|
61
|
-
def output_child (obj)
|
62
|
-
print ": #{obj.class}"
|
63
|
-
|
64
|
-
case obj
|
65
|
-
when Float
|
66
|
-
print ": #{obj}"
|
67
|
-
when String
|
68
|
-
print ": #{obj[0, 20].sub(/\n/, ' ')}"
|
69
|
-
end
|
70
|
-
end
|
71
|
-
################################################################################
|
72
|
-
def cd (path)
|
73
|
-
path = path.to_s
|
74
|
-
|
75
|
-
if path[0,1] == "/"
|
76
|
-
@pwd = path
|
77
|
-
else
|
78
|
-
@pwd = Pathname.new(@pwd + '/' + path).cleanpath.to_s
|
79
|
-
end
|
80
|
-
end
|
81
|
-
################################################################################
|
82
|
-
def pwd
|
83
|
-
@pwd
|
84
|
-
end
|
85
|
-
################################################################################
|
86
|
-
def ls (entry = nil)
|
87
|
-
parts = @pwd.split('/')
|
88
|
-
obj = @root
|
89
|
-
|
90
|
-
parts.shift if parts[0] == ""
|
91
|
-
parts.push(entry) if entry
|
92
|
-
|
93
|
-
parts.each do |p|
|
94
|
-
case obj
|
95
|
-
when Hash
|
96
|
-
unless obj.has_key?(p)
|
97
|
-
puts "invalid path at #{p}"
|
98
|
-
return
|
99
|
-
end
|
100
|
-
obj = obj[p]
|
101
|
-
|
102
|
-
when Array
|
103
|
-
obj = obj[p.to_i]
|
104
|
-
end
|
105
|
-
|
106
|
-
obj = @xref.object(obj)
|
107
|
-
end
|
108
|
-
|
109
|
-
output_parent(obj)
|
110
|
-
"#{@pwd}: #{obj.class}"
|
111
|
-
end
|
112
|
-
################################################################################
|
113
|
-
end
|
114
|
-
################################################################################
|
115
|
-
end
|
116
|
-
################################################################################
|