pdf-core 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pdf/core/object_store.rb +1 -170
- data/pdf-core.gemspec +3 -1
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37fb1b526f79e68be0e821b0773f34a755f88e9a
|
4
|
+
data.tar.gz: 06a2002aecbb70df5bda37ea66adbbba24a59ac6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 38a7c15689de4259d874d1a57522be5c10dfe35453423d9c07beacc317d2a60394d3a5d443c1cba3cf0f11da175d9ff8fe748893aba2df8736b51298cd56b2c8
|
7
|
+
data.tar.gz: b9263b1a73f3d77cf3de5947f5aee64d97946c5852d84134724a31e91c09a515246be10d066886e3df4d27bbed7d0f91d1ffec5370b95d90768a4ab066e792a3
|
@@ -132,176 +132,6 @@ module PDF
|
|
132
132
|
flat_page_ids[k]
|
133
133
|
end
|
134
134
|
|
135
|
-
# imports all objects required to render a page from another PDF. The
|
136
|
-
# objects are added to the current object store, but NOT linked
|
137
|
-
# anywhere.
|
138
|
-
#
|
139
|
-
# The object ID of the root Page object is returned, it's up to the
|
140
|
-
# calling code to link that into the document structure somewhere. If
|
141
|
-
# this isn't done the imported objects will just be removed when the
|
142
|
-
# store is compacted.
|
143
|
-
#
|
144
|
-
# Imports nothing and returns nil if the requested page number doesn't
|
145
|
-
# exist. page_num is 1 indexed, so 1 indicates the first page.
|
146
|
-
#
|
147
|
-
def import_page(input, page_num)
|
148
|
-
@loaded_objects = {}
|
149
|
-
if template_id = indexed_template(input, page_num)
|
150
|
-
return template_id
|
151
|
-
end
|
152
|
-
|
153
|
-
io = if input.respond_to?(:seek) && input.respond_to?(:read)
|
154
|
-
input
|
155
|
-
elsif File.file?(input.to_s)
|
156
|
-
StringIO.new(File.binread(input.to_s))
|
157
|
-
else
|
158
|
-
raise ArgumentError, "input must be an IO-like object or a filename"
|
159
|
-
end
|
160
|
-
|
161
|
-
# unless File.file?(filename)
|
162
|
-
# raise ArgumentError, "#{filename} does not exist"
|
163
|
-
# end
|
164
|
-
|
165
|
-
hash = indexed_hash(input, io)
|
166
|
-
ref = hash.page_references[page_num - 1]
|
167
|
-
|
168
|
-
if ref.nil?
|
169
|
-
nil
|
170
|
-
else
|
171
|
-
index_template(input, page_num, load_object_graph(hash, ref).identifier)
|
172
|
-
end
|
173
|
-
|
174
|
-
rescue PDF::Reader::MalformedPDFError, PDF::Reader::InvalidObjectError => e
|
175
|
-
msg = "Error reading template file. If you are sure it's a valid PDF, it may be a bug.\n#{e.message}"
|
176
|
-
raise PDF::Core::Errors::TemplateError, msg
|
177
|
-
rescue PDF::Reader::UnsupportedFeatureError
|
178
|
-
msg = "Template file contains unsupported PDF features"
|
179
|
-
raise PDF::Core::Errors::TemplateError, msg
|
180
|
-
end
|
181
|
-
|
182
|
-
private
|
183
|
-
|
184
|
-
# An index for page templates so that their loaded object graph
|
185
|
-
# can be reused without multiple loading
|
186
|
-
def template_index
|
187
|
-
@template_index ||= {}
|
188
|
-
end
|
189
|
-
|
190
|
-
# An index for the read object hash of a pdf template so that the
|
191
|
-
# object hash does not need to be parsed multiple times when using
|
192
|
-
# different pages of the pdf as page templates
|
193
|
-
def hash_index
|
194
|
-
@hash_index ||= {}
|
195
|
-
end
|
196
|
-
|
197
|
-
# returns the indexed object graph identifier for a template page if
|
198
|
-
# it exists
|
199
|
-
def indexed_template(input, page_number)
|
200
|
-
key = indexing_key(input)
|
201
|
-
template_index[key] && template_index[key][page_number]
|
202
|
-
end
|
203
|
-
|
204
|
-
# indexes the identifier for a page from a template
|
205
|
-
def index_template(input, page_number, id)
|
206
|
-
(template_index[indexing_key(input)] ||= {})[page_number] ||= id
|
207
|
-
end
|
208
|
-
|
209
|
-
# reads and indexes a new IO for a template
|
210
|
-
# if the IO has been indexed already then the parsed object hash
|
211
|
-
# is returned directly
|
212
|
-
def indexed_hash(input, io)
|
213
|
-
hash_index[indexing_key(input)] ||= PDF::Reader::ObjectHash.new(io)
|
214
|
-
end
|
215
|
-
|
216
|
-
# the index key for the input.
|
217
|
-
# uses object_id so that both a string filename or an IO stream can be
|
218
|
-
# indexed and reused provided the same object gets used in multiple page
|
219
|
-
# template calls.
|
220
|
-
def indexing_key(input)
|
221
|
-
input.object_id
|
222
|
-
end
|
223
|
-
|
224
|
-
# returns a nested array of object IDs for all pages in this object store.
|
225
|
-
#
|
226
|
-
def get_page_objects(obj)
|
227
|
-
if obj.data[:Type] == :Page
|
228
|
-
obj.identifier
|
229
|
-
elsif obj.data[:Type] == :Pages
|
230
|
-
obj.data[:Kids].map { |kid| get_page_objects(kid) }
|
231
|
-
end
|
232
|
-
end
|
233
|
-
|
234
|
-
# takes a source PDF and uses it as a template for this document.
|
235
|
-
#
|
236
|
-
def load_file(template)
|
237
|
-
unless (template.respond_to?(:seek) && template.respond_to?(:read)) ||
|
238
|
-
File.file?(template)
|
239
|
-
raise ArgumentError, "#{template} does not exist"
|
240
|
-
end
|
241
|
-
|
242
|
-
hash = PDF::Reader::ObjectHash.new(template)
|
243
|
-
src_info = hash.trailer[:Info]
|
244
|
-
src_root = hash.trailer[:Root]
|
245
|
-
@min_version = hash.pdf_version.to_f
|
246
|
-
|
247
|
-
if hash.trailer[:Encrypt]
|
248
|
-
msg = "Template file is an encrypted PDF, it can't be used as a template"
|
249
|
-
raise PDF::Core::Errors::TemplateError, msg
|
250
|
-
end
|
251
|
-
|
252
|
-
if src_info
|
253
|
-
@info = load_object_graph(hash, src_info).identifier
|
254
|
-
end
|
255
|
-
|
256
|
-
if src_root
|
257
|
-
@root = load_object_graph(hash, src_root).identifier
|
258
|
-
end
|
259
|
-
rescue PDF::Reader::MalformedPDFError, PDF::Reader::InvalidObjectError => e
|
260
|
-
msg = "Error reading template file. If you are sure it's a valid PDF, it may be a bug.\n#{e.message}"
|
261
|
-
raise PDF::Core::Errors::TemplateError, msg
|
262
|
-
rescue PDF::Reader::UnsupportedFeatureError
|
263
|
-
msg = "Template file contains unsupported PDF features"
|
264
|
-
raise PDF::Core::Errors::TemplateError, msg
|
265
|
-
end
|
266
|
-
|
267
|
-
# recurse down an object graph from a source PDF, importing all the
|
268
|
-
# indirect objects we find.
|
269
|
-
#
|
270
|
-
# hash is the PDF::Reader::ObjectHash to extract objects from, object is
|
271
|
-
# the object to extract.
|
272
|
-
#
|
273
|
-
def load_object_graph(hash, object)
|
274
|
-
@loaded_objects ||= {}
|
275
|
-
case object
|
276
|
-
when ::Hash then
|
277
|
-
object.each { |key,value| object[key] = load_object_graph(hash, value) }
|
278
|
-
object
|
279
|
-
when Array then
|
280
|
-
object.map { |item| load_object_graph(hash, item)}
|
281
|
-
when PDF::Reader::Reference then
|
282
|
-
unless @loaded_objects.has_key?(object.id)
|
283
|
-
@loaded_objects[object.id] = ref(nil)
|
284
|
-
new_obj = load_object_graph(hash, hash[object])
|
285
|
-
if new_obj.kind_of?(PDF::Reader::Stream)
|
286
|
-
stream_dict = load_object_graph(hash, new_obj.hash)
|
287
|
-
@loaded_objects[object.id].data = stream_dict
|
288
|
-
@loaded_objects[object.id] << new_obj.data
|
289
|
-
else
|
290
|
-
@loaded_objects[object.id].data = new_obj
|
291
|
-
end
|
292
|
-
end
|
293
|
-
@loaded_objects[object.id]
|
294
|
-
when PDF::Reader::Stream
|
295
|
-
# Stream is a subclass of string, so this is here to prevent the stream
|
296
|
-
# being wrapped in a LiteralString
|
297
|
-
object
|
298
|
-
when String
|
299
|
-
is_utf8?(object) ? object : PDF::Core::ByteString.new(object)
|
300
|
-
else
|
301
|
-
object
|
302
|
-
end
|
303
|
-
end
|
304
|
-
|
305
135
|
def is_utf8?(str)
|
306
136
|
str.force_encoding(::Encoding::UTF_8)
|
307
137
|
str.valid_encoding?
|
@@ -309,3 +139,4 @@ module PDF
|
|
309
139
|
end
|
310
140
|
end
|
311
141
|
end
|
142
|
+
|
data/pdf-core.gemspec
CHANGED
@@ -19,8 +19,10 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.add_dependency('pdf-reader', '~>1.2')
|
20
20
|
spec.add_dependency('ttfunk', '~>1.0.3')
|
21
21
|
spec.add_dependency('ruby-rc4')
|
22
|
+
spec.add_development_dependency('simplecov')
|
22
23
|
spec.add_development_dependency('pdf-inspector', '~> 1.1.0')
|
23
|
-
spec.add_development_dependency('
|
24
|
+
spec.add_development_dependency('rspec')
|
25
|
+
spec.add_development_dependency('rake')
|
24
26
|
spec.homepage = "http://prawn.majesticseacreature.com"
|
25
27
|
spec.description = "PDF::Core is used by Prawn to render PDF documents"
|
26
28
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregory Brown
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2014-01-
|
15
|
+
date: 2014-01-21 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: pdf-reader
|
@@ -56,6 +56,20 @@ dependencies:
|
|
56
56
|
- - '>='
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
version: '0'
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: simplecov
|
61
|
+
requirement: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - '>='
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
59
73
|
- !ruby/object:Gem::Dependency
|
60
74
|
name: pdf-inspector
|
61
75
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,7 +85,21 @@ dependencies:
|
|
71
85
|
- !ruby/object:Gem::Version
|
72
86
|
version: 1.1.0
|
73
87
|
- !ruby/object:Gem::Dependency
|
74
|
-
name:
|
88
|
+
name: rspec
|
89
|
+
requirement: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
type: :development
|
95
|
+
prerelease: false
|
96
|
+
version_requirements: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - '>='
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
- !ruby/object:Gem::Dependency
|
102
|
+
name: rake
|
75
103
|
requirement: !ruby/object:Gem::Requirement
|
76
104
|
requirements:
|
77
105
|
- - '>='
|