pdf-core 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pdf/core/object_store.rb +1 -170
- data/pdf-core.gemspec +3 -1
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37fb1b526f79e68be0e821b0773f34a755f88e9a
|
4
|
+
data.tar.gz: 06a2002aecbb70df5bda37ea66adbbba24a59ac6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 38a7c15689de4259d874d1a57522be5c10dfe35453423d9c07beacc317d2a60394d3a5d443c1cba3cf0f11da175d9ff8fe748893aba2df8736b51298cd56b2c8
|
7
|
+
data.tar.gz: b9263b1a73f3d77cf3de5947f5aee64d97946c5852d84134724a31e91c09a515246be10d066886e3df4d27bbed7d0f91d1ffec5370b95d90768a4ab066e792a3
|
@@ -132,176 +132,6 @@ module PDF
|
|
132
132
|
flat_page_ids[k]
|
133
133
|
end
|
134
134
|
|
135
|
-
# imports all objects required to render a page from another PDF. The
|
136
|
-
# objects are added to the current object store, but NOT linked
|
137
|
-
# anywhere.
|
138
|
-
#
|
139
|
-
# The object ID of the root Page object is returned, it's up to the
|
140
|
-
# calling code to link that into the document structure somewhere. If
|
141
|
-
# this isn't done the imported objects will just be removed when the
|
142
|
-
# store is compacted.
|
143
|
-
#
|
144
|
-
# Imports nothing and returns nil if the requested page number doesn't
|
145
|
-
# exist. page_num is 1 indexed, so 1 indicates the first page.
|
146
|
-
#
|
147
|
-
def import_page(input, page_num)
|
148
|
-
@loaded_objects = {}
|
149
|
-
if template_id = indexed_template(input, page_num)
|
150
|
-
return template_id
|
151
|
-
end
|
152
|
-
|
153
|
-
io = if input.respond_to?(:seek) && input.respond_to?(:read)
|
154
|
-
input
|
155
|
-
elsif File.file?(input.to_s)
|
156
|
-
StringIO.new(File.binread(input.to_s))
|
157
|
-
else
|
158
|
-
raise ArgumentError, "input must be an IO-like object or a filename"
|
159
|
-
end
|
160
|
-
|
161
|
-
# unless File.file?(filename)
|
162
|
-
# raise ArgumentError, "#{filename} does not exist"
|
163
|
-
# end
|
164
|
-
|
165
|
-
hash = indexed_hash(input, io)
|
166
|
-
ref = hash.page_references[page_num - 1]
|
167
|
-
|
168
|
-
if ref.nil?
|
169
|
-
nil
|
170
|
-
else
|
171
|
-
index_template(input, page_num, load_object_graph(hash, ref).identifier)
|
172
|
-
end
|
173
|
-
|
174
|
-
rescue PDF::Reader::MalformedPDFError, PDF::Reader::InvalidObjectError => e
|
175
|
-
msg = "Error reading template file. If you are sure it's a valid PDF, it may be a bug.\n#{e.message}"
|
176
|
-
raise PDF::Core::Errors::TemplateError, msg
|
177
|
-
rescue PDF::Reader::UnsupportedFeatureError
|
178
|
-
msg = "Template file contains unsupported PDF features"
|
179
|
-
raise PDF::Core::Errors::TemplateError, msg
|
180
|
-
end
|
181
|
-
|
182
|
-
private
|
183
|
-
|
184
|
-
# An index for page templates so that their loaded object graph
|
185
|
-
# can be reused without multiple loading
|
186
|
-
def template_index
|
187
|
-
@template_index ||= {}
|
188
|
-
end
|
189
|
-
|
190
|
-
# An index for the read object hash of a pdf template so that the
|
191
|
-
# object hash does not need to be parsed multiple times when using
|
192
|
-
# different pages of the pdf as page templates
|
193
|
-
def hash_index
|
194
|
-
@hash_index ||= {}
|
195
|
-
end
|
196
|
-
|
197
|
-
# returns the indexed object graph identifier for a template page if
|
198
|
-
# it exists
|
199
|
-
def indexed_template(input, page_number)
|
200
|
-
key = indexing_key(input)
|
201
|
-
template_index[key] && template_index[key][page_number]
|
202
|
-
end
|
203
|
-
|
204
|
-
# indexes the identifier for a page from a template
|
205
|
-
def index_template(input, page_number, id)
|
206
|
-
(template_index[indexing_key(input)] ||= {})[page_number] ||= id
|
207
|
-
end
|
208
|
-
|
209
|
-
# reads and indexes a new IO for a template
|
210
|
-
# if the IO has been indexed already then the parsed object hash
|
211
|
-
# is returned directly
|
212
|
-
def indexed_hash(input, io)
|
213
|
-
hash_index[indexing_key(input)] ||= PDF::Reader::ObjectHash.new(io)
|
214
|
-
end
|
215
|
-
|
216
|
-
# the index key for the input.
|
217
|
-
# uses object_id so that both a string filename or an IO stream can be
|
218
|
-
# indexed and reused provided the same object gets used in multiple page
|
219
|
-
# template calls.
|
220
|
-
def indexing_key(input)
|
221
|
-
input.object_id
|
222
|
-
end
|
223
|
-
|
224
|
-
# returns a nested array of object IDs for all pages in this object store.
|
225
|
-
#
|
226
|
-
def get_page_objects(obj)
|
227
|
-
if obj.data[:Type] == :Page
|
228
|
-
obj.identifier
|
229
|
-
elsif obj.data[:Type] == :Pages
|
230
|
-
obj.data[:Kids].map { |kid| get_page_objects(kid) }
|
231
|
-
end
|
232
|
-
end
|
233
|
-
|
234
|
-
# takes a source PDF and uses it as a template for this document.
|
235
|
-
#
|
236
|
-
def load_file(template)
|
237
|
-
unless (template.respond_to?(:seek) && template.respond_to?(:read)) ||
|
238
|
-
File.file?(template)
|
239
|
-
raise ArgumentError, "#{template} does not exist"
|
240
|
-
end
|
241
|
-
|
242
|
-
hash = PDF::Reader::ObjectHash.new(template)
|
243
|
-
src_info = hash.trailer[:Info]
|
244
|
-
src_root = hash.trailer[:Root]
|
245
|
-
@min_version = hash.pdf_version.to_f
|
246
|
-
|
247
|
-
if hash.trailer[:Encrypt]
|
248
|
-
msg = "Template file is an encrypted PDF, it can't be used as a template"
|
249
|
-
raise PDF::Core::Errors::TemplateError, msg
|
250
|
-
end
|
251
|
-
|
252
|
-
if src_info
|
253
|
-
@info = load_object_graph(hash, src_info).identifier
|
254
|
-
end
|
255
|
-
|
256
|
-
if src_root
|
257
|
-
@root = load_object_graph(hash, src_root).identifier
|
258
|
-
end
|
259
|
-
rescue PDF::Reader::MalformedPDFError, PDF::Reader::InvalidObjectError => e
|
260
|
-
msg = "Error reading template file. If you are sure it's a valid PDF, it may be a bug.\n#{e.message}"
|
261
|
-
raise PDF::Core::Errors::TemplateError, msg
|
262
|
-
rescue PDF::Reader::UnsupportedFeatureError
|
263
|
-
msg = "Template file contains unsupported PDF features"
|
264
|
-
raise PDF::Core::Errors::TemplateError, msg
|
265
|
-
end
|
266
|
-
|
267
|
-
# recurse down an object graph from a source PDF, importing all the
|
268
|
-
# indirect objects we find.
|
269
|
-
#
|
270
|
-
# hash is the PDF::Reader::ObjectHash to extract objects from, object is
|
271
|
-
# the object to extract.
|
272
|
-
#
|
273
|
-
def load_object_graph(hash, object)
|
274
|
-
@loaded_objects ||= {}
|
275
|
-
case object
|
276
|
-
when ::Hash then
|
277
|
-
object.each { |key,value| object[key] = load_object_graph(hash, value) }
|
278
|
-
object
|
279
|
-
when Array then
|
280
|
-
object.map { |item| load_object_graph(hash, item)}
|
281
|
-
when PDF::Reader::Reference then
|
282
|
-
unless @loaded_objects.has_key?(object.id)
|
283
|
-
@loaded_objects[object.id] = ref(nil)
|
284
|
-
new_obj = load_object_graph(hash, hash[object])
|
285
|
-
if new_obj.kind_of?(PDF::Reader::Stream)
|
286
|
-
stream_dict = load_object_graph(hash, new_obj.hash)
|
287
|
-
@loaded_objects[object.id].data = stream_dict
|
288
|
-
@loaded_objects[object.id] << new_obj.data
|
289
|
-
else
|
290
|
-
@loaded_objects[object.id].data = new_obj
|
291
|
-
end
|
292
|
-
end
|
293
|
-
@loaded_objects[object.id]
|
294
|
-
when PDF::Reader::Stream
|
295
|
-
# Stream is a subclass of string, so this is here to prevent the stream
|
296
|
-
# being wrapped in a LiteralString
|
297
|
-
object
|
298
|
-
when String
|
299
|
-
is_utf8?(object) ? object : PDF::Core::ByteString.new(object)
|
300
|
-
else
|
301
|
-
object
|
302
|
-
end
|
303
|
-
end
|
304
|
-
|
305
135
|
def is_utf8?(str)
|
306
136
|
str.force_encoding(::Encoding::UTF_8)
|
307
137
|
str.valid_encoding?
|
@@ -309,3 +139,4 @@ module PDF
|
|
309
139
|
end
|
310
140
|
end
|
311
141
|
end
|
142
|
+
|
data/pdf-core.gemspec
CHANGED
@@ -19,8 +19,10 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.add_dependency('pdf-reader', '~>1.2')
|
20
20
|
spec.add_dependency('ttfunk', '~>1.0.3')
|
21
21
|
spec.add_dependency('ruby-rc4')
|
22
|
+
spec.add_development_dependency('simplecov')
|
22
23
|
spec.add_development_dependency('pdf-inspector', '~> 1.1.0')
|
23
|
-
spec.add_development_dependency('
|
24
|
+
spec.add_development_dependency('rspec')
|
25
|
+
spec.add_development_dependency('rake')
|
24
26
|
spec.homepage = "http://prawn.majesticseacreature.com"
|
25
27
|
spec.description = "PDF::Core is used by Prawn to render PDF documents"
|
26
28
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-core
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregory Brown
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2014-01-
|
15
|
+
date: 2014-01-21 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: pdf-reader
|
@@ -56,6 +56,20 @@ dependencies:
|
|
56
56
|
- - '>='
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
version: '0'
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: simplecov
|
61
|
+
requirement: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - '>='
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
59
73
|
- !ruby/object:Gem::Dependency
|
60
74
|
name: pdf-inspector
|
61
75
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,7 +85,21 @@ dependencies:
|
|
71
85
|
- !ruby/object:Gem::Version
|
72
86
|
version: 1.1.0
|
73
87
|
- !ruby/object:Gem::Dependency
|
74
|
-
name:
|
88
|
+
name: rspec
|
89
|
+
requirement: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
type: :development
|
95
|
+
prerelease: false
|
96
|
+
version_requirements: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - '>='
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
- !ruby/object:Gem::Dependency
|
102
|
+
name: rake
|
75
103
|
requirement: !ruby/object:Gem::Requirement
|
76
104
|
requirements:
|
77
105
|
- - '>='
|