pdf-reader 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +2 -0
- data/README +177 -0
- data/Rakefile +84 -0
- data/TODO +9 -0
- data/lib/pdf/reader.rb +106 -0
- data/lib/pdf/reader/buffer.rb +144 -0
- data/lib/pdf/reader/content.rb +289 -0
- data/lib/pdf/reader/error.rb +53 -0
- data/lib/pdf/reader/explore.rb +116 -0
- data/lib/pdf/reader/filter.rb +62 -0
- data/lib/pdf/reader/name.rb +37 -0
- data/lib/pdf/reader/parser.rb +203 -0
- data/lib/pdf/reader/reference.rb +55 -0
- data/lib/pdf/reader/register_receiver.rb +18 -0
- data/lib/pdf/reader/text_receiver.rb +259 -0
- data/lib/pdf/reader/token.rb +41 -0
- data/lib/pdf/reader/xref.rb +101 -0
- metadata +70 -0
@@ -0,0 +1,41 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An internal PDF::Reader class that represents a single token from a PDF file.
|
29
|
+
#
|
30
|
+
# Behaves exactly like a Ruby String - it basically exists for convenience.
|
31
|
+
class Token < String
|
32
|
+
################################################################################
|
33
|
+
# Creates a new token with the specified value
|
34
|
+
def initialize (val)
|
35
|
+
super
|
36
|
+
end
|
37
|
+
################################################################################
|
38
|
+
end
|
39
|
+
################################################################################
|
40
|
+
end
|
41
|
+
################################################################################
|
@@ -0,0 +1,101 @@
|
|
1
|
+
################################################################################
|
2
|
+
#
|
3
|
+
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
# a copy of this software and associated documentation files (the
|
7
|
+
# "Software"), to deal in the Software without restriction, including
|
8
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
# the following conditions:
|
12
|
+
#
|
13
|
+
# The above copyright notice and this permission notice shall be
|
14
|
+
# included in all copies or substantial portions of the Software.
|
15
|
+
#
|
16
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#
|
24
|
+
################################################################################
|
25
|
+
|
26
|
+
class PDF::Reader
|
27
|
+
################################################################################
|
28
|
+
# An internal PDF::Reader class that represents the Xref table in a PDF file
|
29
|
+
# An Xref table is a map of object identifiers and byte offsets. Any time a particular
|
30
|
+
# object needs to be found, the Xref table is used to find where it is stored in the
|
31
|
+
# file.
|
32
|
+
class XRef
|
33
|
+
################################################################################
|
34
|
+
# create a new Xref table based on the contents of the supplied PDF::Reader::Buffer object
|
35
|
+
def initialize (buffer)
|
36
|
+
@buffer = buffer
|
37
|
+
@xref = {}
|
38
|
+
end
|
39
|
+
################################################################################
|
40
|
+
# Read the xref table from the underlying buffer. If offset is specified the table
|
41
|
+
# will be loaded from there, otherwise the default offset will be located and used.
|
42
|
+
#
|
43
|
+
# Will fail silently if there is no xref table at the requested offset.
|
44
|
+
def load (offset = nil)
|
45
|
+
@buffer.seek(offset || @buffer.find_first_xref_offset)
|
46
|
+
token = @buffer.token
|
47
|
+
|
48
|
+
if token == "xref"
|
49
|
+
load_xref_table
|
50
|
+
end
|
51
|
+
end
|
52
|
+
################################################################################
|
53
|
+
# Return a string containing the contents of an entire PDF object. The object is requested
|
54
|
+
# by specifying a PDF::Reader::Reference object that contains the objects ID and revision
|
55
|
+
# number
|
56
|
+
def object (ref, save_pos = true)
|
57
|
+
pos = @buffer.pos if save_pos
|
58
|
+
parser = Parser.new(@buffer.seek(offset_for(ref)), self).object(ref.id, ref.gen)
|
59
|
+
@buffer.seek(pos) if save_pos
|
60
|
+
parser
|
61
|
+
end
|
62
|
+
################################################################################
|
63
|
+
# Assumes the underlying buffer is positioned at the start of an Xref table and
|
64
|
+
# processes it into memory.
|
65
|
+
def load_xref_table
|
66
|
+
objid, count = @buffer.token.to_i, @buffer.token.to_i
|
67
|
+
|
68
|
+
count.times do
|
69
|
+
offset = @buffer.token.to_i
|
70
|
+
generation = @buffer.token.to_i
|
71
|
+
state = @buffer.token
|
72
|
+
|
73
|
+
store(objid, generation, offset) if state == "n"
|
74
|
+
objid += 1
|
75
|
+
end
|
76
|
+
|
77
|
+
raise MalformedPDFError, "PDF malformed, missing trailer after cross reference" unless @buffer.token == "trailer"
|
78
|
+
raise MalformedPDFError, "PDF malformed, trailer should be a dictionary" unless @buffer.token == "<<"
|
79
|
+
|
80
|
+
trailer = Parser.new(@buffer, self).dictionary
|
81
|
+
load(trailer['Prev']) if trailer.has_key?('Prev')
|
82
|
+
|
83
|
+
trailer
|
84
|
+
end
|
85
|
+
################################################################################
|
86
|
+
# returns the byte offset for the specified PDF object.
|
87
|
+
#
|
88
|
+
# ref - a PDF::Reader::Reference object containing an object ID and revision number
|
89
|
+
def offset_for (ref)
|
90
|
+
@xref[ref.id][ref.gen]
|
91
|
+
end
|
92
|
+
################################################################################
|
93
|
+
# Stores an offset value for a particular PDF object ID and revision number
|
94
|
+
def store (id, gen, offset)
|
95
|
+
(@xref[id] ||= {})[gen] ||= offset
|
96
|
+
end
|
97
|
+
################################################################################
|
98
|
+
end
|
99
|
+
################################################################################
|
100
|
+
end
|
101
|
+
################################################################################
|
metadata
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.4
|
3
|
+
specification_version: 1
|
4
|
+
name: pdf-reader
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: "0.5"
|
7
|
+
date: 2007-12-14 00:00:00 +11:00
|
8
|
+
summary: A library for accessing the content of PDF files
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: pjones@pmade.com
|
12
|
+
homepage: http://software.pmade.com/pdfreader
|
13
|
+
rubyforge_project: pdf-reader
|
14
|
+
description: The PDF::Reader library implements a PDF parser conforming as much as possible to the PDF specification from Adobe
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Peter Jones
|
31
|
+
files:
|
32
|
+
- lib/pdf
|
33
|
+
- lib/pdf/reader
|
34
|
+
- lib/pdf/reader/explore.rb
|
35
|
+
- lib/pdf/reader/reference.rb
|
36
|
+
- lib/pdf/reader/name.rb
|
37
|
+
- lib/pdf/reader/token.rb
|
38
|
+
- lib/pdf/reader/xref.rb
|
39
|
+
- lib/pdf/reader/filter.rb
|
40
|
+
- lib/pdf/reader/text_receiver.rb
|
41
|
+
- lib/pdf/reader/buffer.rb
|
42
|
+
- lib/pdf/reader/error.rb
|
43
|
+
- lib/pdf/reader/content.rb
|
44
|
+
- lib/pdf/reader/parser.rb
|
45
|
+
- lib/pdf/reader/register_receiver.rb
|
46
|
+
- lib/pdf/reader.rb
|
47
|
+
- Rakefile
|
48
|
+
- README
|
49
|
+
- TODO
|
50
|
+
- CHANGELOG
|
51
|
+
test_files: []
|
52
|
+
|
53
|
+
rdoc_options:
|
54
|
+
- --title
|
55
|
+
- PDF::Reader Documentation
|
56
|
+
- --main
|
57
|
+
- README
|
58
|
+
- -q
|
59
|
+
extra_rdoc_files:
|
60
|
+
- README
|
61
|
+
- TODO
|
62
|
+
- CHANGELOG
|
63
|
+
executables: []
|
64
|
+
|
65
|
+
extensions: []
|
66
|
+
|
67
|
+
requirements: []
|
68
|
+
|
69
|
+
dependencies: []
|
70
|
+
|