pdf-core 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2623bbd4ba608b95d098ec76c889920a65edf6f7
4
- data.tar.gz: 518d068701cdc3ec25ea5380b262f41d436d9d46
3
+ metadata.gz: 37fb1b526f79e68be0e821b0773f34a755f88e9a
4
+ data.tar.gz: 06a2002aecbb70df5bda37ea66adbbba24a59ac6
5
5
  SHA512:
6
- metadata.gz: 3674b8a9e33c3055983f8b1eded9c4cc2cb83bd5c4c315353b1834d484ba92a896bbd6e4f94ebc34654e46b8ddd78a2c9d633be075962b7900c1c400812406b5
7
- data.tar.gz: 1998aee60a685847bd7511898cc7580de080234367a9dc58a6610a9741ea43cab56ea6d00bf20409b2ec1aaa916ef2f1f866e3ce6c4638622bc0397c71699c32
6
+ metadata.gz: 38a7c15689de4259d874d1a57522be5c10dfe35453423d9c07beacc317d2a60394d3a5d443c1cba3cf0f11da175d9ff8fe748893aba2df8736b51298cd56b2c8
7
+ data.tar.gz: b9263b1a73f3d77cf3de5947f5aee64d97946c5852d84134724a31e91c09a515246be10d066886e3df4d27bbed7d0f91d1ffec5370b95d90768a4ab066e792a3
@@ -132,176 +132,6 @@ module PDF
132
132
  flat_page_ids[k]
133
133
  end
134
134
 
135
- # imports all objects required to render a page from another PDF. The
136
- # objects are added to the current object store, but NOT linked
137
- # anywhere.
138
- #
139
- # The object ID of the root Page object is returned, it's up to the
140
- # calling code to link that into the document structure somewhere. If
141
- # this isn't done the imported objects will just be removed when the
142
- # store is compacted.
143
- #
144
- # Imports nothing and returns nil if the requested page number doesn't
145
- # exist. page_num is 1 indexed, so 1 indicates the first page.
146
- #
147
- def import_page(input, page_num)
148
- @loaded_objects = {}
149
- if template_id = indexed_template(input, page_num)
150
- return template_id
151
- end
152
-
153
- io = if input.respond_to?(:seek) && input.respond_to?(:read)
154
- input
155
- elsif File.file?(input.to_s)
156
- StringIO.new(File.binread(input.to_s))
157
- else
158
- raise ArgumentError, "input must be an IO-like object or a filename"
159
- end
160
-
161
- # unless File.file?(filename)
162
- # raise ArgumentError, "#{filename} does not exist"
163
- # end
164
-
165
- hash = indexed_hash(input, io)
166
- ref = hash.page_references[page_num - 1]
167
-
168
- if ref.nil?
169
- nil
170
- else
171
- index_template(input, page_num, load_object_graph(hash, ref).identifier)
172
- end
173
-
174
- rescue PDF::Reader::MalformedPDFError, PDF::Reader::InvalidObjectError => e
175
- msg = "Error reading template file. If you are sure it's a valid PDF, it may be a bug.\n#{e.message}"
176
- raise PDF::Core::Errors::TemplateError, msg
177
- rescue PDF::Reader::UnsupportedFeatureError
178
- msg = "Template file contains unsupported PDF features"
179
- raise PDF::Core::Errors::TemplateError, msg
180
- end
181
-
182
- private
183
-
184
- # An index for page templates so that their loaded object graph
185
- # can be reused without multiple loading
186
- def template_index
187
- @template_index ||= {}
188
- end
189
-
190
- # An index for the read object hash of a pdf template so that the
191
- # object hash does not need to be parsed multiple times when using
192
- # different pages of the pdf as page templates
193
- def hash_index
194
- @hash_index ||= {}
195
- end
196
-
197
- # returns the indexed object graph identifier for a template page if
198
- # it exists
199
- def indexed_template(input, page_number)
200
- key = indexing_key(input)
201
- template_index[key] && template_index[key][page_number]
202
- end
203
-
204
- # indexes the identifier for a page from a template
205
- def index_template(input, page_number, id)
206
- (template_index[indexing_key(input)] ||= {})[page_number] ||= id
207
- end
208
-
209
- # reads and indexes a new IO for a template
210
- # if the IO has been indexed already then the parsed object hash
211
- # is returned directly
212
- def indexed_hash(input, io)
213
- hash_index[indexing_key(input)] ||= PDF::Reader::ObjectHash.new(io)
214
- end
215
-
216
- # the index key for the input.
217
- # uses object_id so that both a string filename or an IO stream can be
218
- # indexed and reused provided the same object gets used in multiple page
219
- # template calls.
220
- def indexing_key(input)
221
- input.object_id
222
- end
223
-
224
- # returns a nested array of object IDs for all pages in this object store.
225
- #
226
- def get_page_objects(obj)
227
- if obj.data[:Type] == :Page
228
- obj.identifier
229
- elsif obj.data[:Type] == :Pages
230
- obj.data[:Kids].map { |kid| get_page_objects(kid) }
231
- end
232
- end
233
-
234
- # takes a source PDF and uses it as a template for this document.
235
- #
236
- def load_file(template)
237
- unless (template.respond_to?(:seek) && template.respond_to?(:read)) ||
238
- File.file?(template)
239
- raise ArgumentError, "#{template} does not exist"
240
- end
241
-
242
- hash = PDF::Reader::ObjectHash.new(template)
243
- src_info = hash.trailer[:Info]
244
- src_root = hash.trailer[:Root]
245
- @min_version = hash.pdf_version.to_f
246
-
247
- if hash.trailer[:Encrypt]
248
- msg = "Template file is an encrypted PDF, it can't be used as a template"
249
- raise PDF::Core::Errors::TemplateError, msg
250
- end
251
-
252
- if src_info
253
- @info = load_object_graph(hash, src_info).identifier
254
- end
255
-
256
- if src_root
257
- @root = load_object_graph(hash, src_root).identifier
258
- end
259
- rescue PDF::Reader::MalformedPDFError, PDF::Reader::InvalidObjectError => e
260
- msg = "Error reading template file. If you are sure it's a valid PDF, it may be a bug.\n#{e.message}"
261
- raise PDF::Core::Errors::TemplateError, msg
262
- rescue PDF::Reader::UnsupportedFeatureError
263
- msg = "Template file contains unsupported PDF features"
264
- raise PDF::Core::Errors::TemplateError, msg
265
- end
266
-
267
- # recurse down an object graph from a source PDF, importing all the
268
- # indirect objects we find.
269
- #
270
- # hash is the PDF::Reader::ObjectHash to extract objects from, object is
271
- # the object to extract.
272
- #
273
- def load_object_graph(hash, object)
274
- @loaded_objects ||= {}
275
- case object
276
- when ::Hash then
277
- object.each { |key,value| object[key] = load_object_graph(hash, value) }
278
- object
279
- when Array then
280
- object.map { |item| load_object_graph(hash, item)}
281
- when PDF::Reader::Reference then
282
- unless @loaded_objects.has_key?(object.id)
283
- @loaded_objects[object.id] = ref(nil)
284
- new_obj = load_object_graph(hash, hash[object])
285
- if new_obj.kind_of?(PDF::Reader::Stream)
286
- stream_dict = load_object_graph(hash, new_obj.hash)
287
- @loaded_objects[object.id].data = stream_dict
288
- @loaded_objects[object.id] << new_obj.data
289
- else
290
- @loaded_objects[object.id].data = new_obj
291
- end
292
- end
293
- @loaded_objects[object.id]
294
- when PDF::Reader::Stream
295
- # Stream is a subclass of string, so this is here to prevent the stream
296
- # being wrapped in a LiteralString
297
- object
298
- when String
299
- is_utf8?(object) ? object : PDF::Core::ByteString.new(object)
300
- else
301
- object
302
- end
303
- end
304
-
305
135
  def is_utf8?(str)
306
136
  str.force_encoding(::Encoding::UTF_8)
307
137
  str.valid_encoding?
@@ -309,3 +139,4 @@ module PDF
309
139
  end
310
140
  end
311
141
  end
142
+
@@ -19,8 +19,10 @@ Gem::Specification.new do |spec|
19
19
  spec.add_dependency('pdf-reader', '~>1.2')
20
20
  spec.add_dependency('ttfunk', '~>1.0.3')
21
21
  spec.add_dependency('ruby-rc4')
22
+ spec.add_development_dependency('simplecov')
22
23
  spec.add_development_dependency('pdf-inspector', '~> 1.1.0')
23
- spec.add_development_dependency('rdoc')
24
+ spec.add_development_dependency('rspec')
25
+ spec.add_development_dependency('rake')
24
26
  spec.homepage = "http://prawn.majesticseacreature.com"
25
27
  spec.description = "PDF::Core is used by Prawn to render PDF documents"
26
28
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregory Brown
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2014-01-19 00:00:00.000000000 Z
15
+ date: 2014-01-21 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: pdf-reader
@@ -56,6 +56,20 @@ dependencies:
56
56
  - - '>='
57
57
  - !ruby/object:Gem::Version
58
58
  version: '0'
59
+ - !ruby/object:Gem::Dependency
60
+ name: simplecov
61
+ requirement: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
59
73
  - !ruby/object:Gem::Dependency
60
74
  name: pdf-inspector
61
75
  requirement: !ruby/object:Gem::Requirement
@@ -71,7 +85,21 @@ dependencies:
71
85
  - !ruby/object:Gem::Version
72
86
  version: 1.1.0
73
87
  - !ruby/object:Gem::Dependency
74
- name: rdoc
88
+ name: rspec
89
+ requirement: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ type: :development
95
+ prerelease: false
96
+ version_requirements: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - '>='
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ - !ruby/object:Gem::Dependency
102
+ name: rake
75
103
  requirement: !ruby/object:Gem::Requirement
76
104
  requirements:
77
105
  - - '>='