pdf-reader 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +2 -0
- data/README +177 -0
- data/Rakefile +84 -0
- data/TODO +9 -0
- data/lib/pdf/reader.rb +106 -0
- data/lib/pdf/reader/buffer.rb +144 -0
- data/lib/pdf/reader/content.rb +289 -0
- data/lib/pdf/reader/error.rb +53 -0
- data/lib/pdf/reader/explore.rb +116 -0
- data/lib/pdf/reader/filter.rb +62 -0
- data/lib/pdf/reader/name.rb +37 -0
- data/lib/pdf/reader/parser.rb +203 -0
- data/lib/pdf/reader/reference.rb +55 -0
- data/lib/pdf/reader/register_receiver.rb +18 -0
- data/lib/pdf/reader/text_receiver.rb +259 -0
- data/lib/pdf/reader/token.rb +41 -0
- data/lib/pdf/reader/xref.rb +101 -0
- metadata +70 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An internal PDF::Reader class that represents a single token from a PDF file.
|
29
|
+
#
|
30
|
+
# Behaves exactly like a Ruby String - it basically exists for convenience.
|
31
|
+
class Token < String
|
32
|
+
################################################################################
|
33
|
+
# Creates a new token with the specified value
|
34
|
+
def initialize (val)
|
35
|
+
super
|
36
|
+
end
|
37
|
+
################################################################################
|
38
|
+
end
|
39
|
+
################################################################################
|
40
|
+
end
|
41
|
+
################################################################################
|
@@ -0,0 +1,101 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An internal PDF::Reader class that represents the Xref table in a PDF file
|
29
|
+
# An Xref table is a map of object identifiers and byte offsets. Any time a particular
|
30
|
+
# object needs to be found, the Xref table is used to find where it is stored in the
|
31
|
+
# file.
|
32
|
+
class XRef
|
33
|
+
################################################################################
|
34
|
+
# create a new Xref table based on the contents of the supplied PDF::Reader::Buffer object
|
35
|
+
def initialize (buffer)
|
36
|
+
@buffer = buffer
|
37
|
+
@xref = {}
|
38
|
+
end
|
39
|
+
################################################################################
|
40
|
+
# Read the xref table from the underlying buffer. If offset is specified the table
|
41
|
+
# will be loaded from there, otherwise the default offset will be located and used.
|
42
|
+
#
|
43
|
+
# Will fail silently if there is no xref table at the requested offset.
|
44
|
+
def load (offset = nil)
|
45
|
+
@buffer.seek(offset || @buffer.find_first_xref_offset)
|
46
|
+
token = @buffer.token
|
47
|
+
|
48
|
+
if token == "xref"
|
49
|
+
load_xref_table
|
50
|
+
end
|
51
|
+
end
|
52
|
+
################################################################################
|
53
|
+
# Return a string containing the contents of an entire PDF object. The object is requested
|
54
|
+
# by specifying a PDF::Reader::Reference object that contains the objects ID and revision
|
55
|
+
# number
|
56
|
+
def object (ref, save_pos = true)
|
57
|
+
pos = @buffer.pos if save_pos
|
58
|
+
parser = Parser.new(@buffer.seek(offset_for(ref)), self).object(ref.id, ref.gen)
|
59
|
+
@buffer.seek(pos) if save_pos
|
60
|
+
parser
|
61
|
+
end
|
62
|
+
################################################################################
|
63
|
+
# Assumes the underlying buffer is positioned at the start of an Xref table and
|
64
|
+
# processes it into memory.
|
65
|
+
def load_xref_table
|
66
|
+
objid, count = @buffer.token.to_i, @buffer.token.to_i
|
67
|
+
|
68
|
+
count.times do
|
69
|
+
offset = @buffer.token.to_i
|
70
|
+
generation = @buffer.token.to_i
|
71
|
+
state = @buffer.token
|
72
|
+
|
73
|
+
store(objid, generation, offset) if state == "n"
|
74
|
+
objid += 1
|
75
|
+
end
|
76
|
+
|
77
|
+
raise MalformedPDFError, "PDF malformed, missing trailer after cross reference" unless @buffer.token == "trailer"
|
78
|
+
raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless @buffer.token == "<<"
|
79
|
+
|
80
|
+
trailer = Parser.new(@buffer, self).dictionary
|
81
|
+
load(trailer['Prev']) if trailer.has_key?('Prev')
|
82
|
+
|
83
|
+
trailer
|
84
|
+
end
|
85
|
+
################################################################################
|
86
|
+
# returns the byte offset for the specified PDF object.
|
87
|
+
#
|
88
|
+
# ref - a PDF::Reader::Reference object containing an object ID and revision number
|
89
|
+
def offset_for (ref)
|
90
|
+
@xref[ref.id][ref.gen]
|
91
|
+
end
|
92
|
+
################################################################################
|
93
|
+
# Stores an offset value for a particular PDF object ID and revision number
|
94
|
+
def store (id, gen, offset)
|
95
|
+
(@xref[id] ||= {})[gen] ||= offset
|
96
|
+
end
|
97
|
+
################################################################################
|
98
|
+
end
|
99
|
+
################################################################################
|
100
|
+
end
|
101
|
+
################################################################################
|
metadata
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.4
|
3
|
+
specification_version: 1
|
4
|
+
name: pdf-reader
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: "0.5"
|
7
|
+
date: 2007-12-14 00:00:00 +11:00
|
8
|
+
summary: A library for accessing the content of PDF files
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: pjones@pmade.com
|
12
|
+
homepage: http://software.pmade.com/pdfreader
|
13
|
+
rubyforge_project: pdf-reader
|
14
|
+
description: The PDF::Reader library implements a PDF parser conforming as much as possible to the PDF specification from Adobe
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Peter Jones
|
31
|
+
files:
|
32
|
+
- lib/pdf
|
33
|
+
- lib/pdf/reader
|
34
|
+
- lib/pdf/reader/explore.rb
|
35
|
+
- lib/pdf/reader/reference.rb
|
36
|
+
- lib/pdf/reader/name.rb
|
37
|
+
- lib/pdf/reader/token.rb
|
38
|
+
- lib/pdf/reader/xref.rb
|
39
|
+
- lib/pdf/reader/filter.rb
|
40
|
+
- lib/pdf/reader/text_receiver.rb
|
41
|
+
- lib/pdf/reader/buffer.rb
|
42
|
+
- lib/pdf/reader/error.rb
|
43
|
+
- lib/pdf/reader/content.rb
|
44
|
+
- lib/pdf/reader/parser.rb
|
45
|
+
- lib/pdf/reader/register_receiver.rb
|
46
|
+
- lib/pdf/reader.rb
|
47
|
+
- Rakefile
|
48
|
+
- README
|
49
|
+
- TODO
|
50
|
+
- CHANGELOG
|
51
|
+
test_files: []
|
52
|
+
|
53
|
+
rdoc_options:
|
54
|
+
- --title
|
55
|
+
- PDF::Reader Documentation
|
56
|
+
- --main
|
57
|
+
- README
|
58
|
+
- -q
|
59
|
+
extra_rdoc_files:
|
60
|
+
- README
|
61
|
+
- TODO
|
62
|
+
- CHANGELOG
|
63
|
+
executables: []
|
64
|
+
|
65
|
+
extensions: []
|
66
|
+
|
67
|
+
requirements: []
|
68
|
+
|
69
|
+
dependencies: []
|
70
|
+
|