combine_pdf 0.1.23 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,724 +0,0 @@
1
- # -*- encoding : utf-8 -*-
2
- ########################################################
3
- ## Thoughts from reading the ISO 32000-1:2008
4
- ## this file is part of the CombinePDF library and the code
5
- ## is subject to the same license.
6
- ########################################################
7
-
8
-
9
-
10
-
11
-
12
-
13
- module CombinePDF
14
-
15
- # PDF class is the PDF object that can save itself to
16
- # a file and that can be used as a container for a full
17
- # PDF file data, including version, information etc'.
18
- #
19
- # PDF objects can be used to combine or to inject data.
20
- # == Combine/Merge PDF files or Pages
21
- # To combine PDF files (or data):
22
- # pdf = CombinePDF.new
23
- # pdf << CombinePDF.new("file1.pdf") # one way to combine, very fast.
24
- # pdf << CombinePDF.new("file2.pdf")
25
- # pdf.save "combined.pdf"
26
- # or even a one liner:
27
- # (CombinePDF.new("file1.pdf") << CombinePDF.new("file2.pdf") << CombinePDF.new("file3.pdf")).save("combined.pdf")
28
- # you can also add just odd or even pages:
29
- # pdf = CombinePDF.new
30
- # i = 0
31
- # CombinePDF.new("file.pdf").pages.each do |page|
32
- # i += 1
33
- # pdf << page if i.even?
34
- # end
35
- # pdf.save "even_pages.pdf"
36
- # notice that adding all the pages one by one is slower then adding the whole file.
37
- # == Add content to existing pages (Stamp / Watermark)
38
- # To add content to existing PDF pages, first import the new content from an existing PDF file.
39
- # after that, add the content to each of the pages in your existing PDF.
40
- #
41
- # in this example, we will add a company logo to each page:
42
- # company_logo = CombinePDF.new("company_logo.pdf").pages[0]
43
- # pdf = CombinePDF.new "content_file.pdf"
44
- # pdf.pages.each {|page| page << company_logo} # notice the << operator is on a page and not a PDF object.
45
- # pdf.save "content_with_logo.pdf"
46
- # Notice the << operator is on a page and not a PDF object. The << operator acts differently on PDF objects and on Pages.
47
- #
48
- # The << operator defaults to secure injection by renaming references to avoid conflics. For overlaying pages using compressed data that might not be editable (due to limited filter support), you can use:
49
- # pdf.pages(nil, false).each {|page| page << stamp_page}
50
- #
51
- # == Page Numbering
52
- # adding page numbers to a PDF object or file is as simple as can be:
53
- # pdf = CombinePDF.new "file_to_number.pdf"
54
- # pdf.number_pages
55
- # pdf.save "file_with_numbering.pdf"
56
- #
57
- # numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values.
58
- #
59
- # == Loading PDF data
60
- # Loading PDF data can be done from file system or directly from the memory.
61
- #
62
- # Loading data from a file is easy:
63
- # pdf = CombinePDF.new("file.pdf")
64
- # you can also parse PDF files from memory:
65
- # pdf_data = IO.read 'file.pdf' # for this demo, load a file to memory
66
- # pdf = CombinePDF.parse(pdf_data)
67
- # Loading from the memory is especially effective for importing PDF data recieved through the internet or from a different authoring library such as Prawn.
68
- class PDF
69
- # the objects attribute is an Array containing all the PDF sub-objects for te class.
70
- attr_reader :objects
71
- # the info attribute is a Hash that sets the Info data for the PDF.
72
- # use, for example:
73
- # pdf.info[:Title] = "title"
74
- attr_reader :info
75
- # gets/sets the string output format (PDF files store strings in to type of formats).
76
- #
77
- # Accepts:
78
- # - :literal
79
- # - :hex
80
- attr_accessor :string_output
81
- # set/get the PDF version of the file (1.1-1.7) - shuold be type Float.
82
- attr_accessor :version
83
- def initialize (*args)
84
- # default before setting
85
- @objects = []
86
- @version = 0
87
- @info = {}
88
- if args[0].is_a? PDFParser
89
- @objects = args[0].parse
90
- # connecting references with original objects
91
- serialize_objects_and_references
92
- # make sure there is only one Catalog (the latest Catalog)
93
- if args[0].root_object[:Root]
94
- @objects.delete_if {|obj| obj[:Type] == :Catalog}
95
- @objects << args[0].root_object[:Root]
96
- else
97
- last_calalog = (@objects.select {|obj| obj[:Type] == :Catalog}).last
98
- unless last_calalog.nil?
99
- @objects.delete_if {|obj| obj[:Type] == :Catalog}
100
- @objects << last_calalog
101
- end
102
- end
103
- @version = args[0].version if args[0].version.is_a? Float
104
- @info = args[0].info_object || {}
105
- elsif args[0].is_a? Array
106
- # object initialization
107
- @objects = args[0]
108
- @version = args[1] if args[1].is_a? Float
109
- # connecting references with original objects
110
- serialize_objects_and_references
111
- elsif args[0].is_a? Hash
112
- @objects = args
113
- # connecting references with original objects
114
- serialize_objects_and_references
115
- end
116
- # general globals
117
- @string_output = :literal
118
- @set_start_id = 1
119
- @info[:Producer] = "Ruby CombinePDF Library by Boaz Segev"
120
- @info.delete :CreationDate
121
- @info.delete :ModDate
122
- end
123
-
124
- # Formats the data to PDF formats and returns a binary string that represents the PDF file content.
125
- #
126
- # This method is used by the save(file_name) method to save the content to a file.
127
- #
128
- # use this to export the PDF file without saving to disk (such as sending through HTTP ect').
129
- def to_pdf
130
- #reset version if not specified
131
- @version = 1.5 if @version.to_f == 0.0
132
- #set creation date for merged file
133
- @info[:CreationDate] = Time.now.strftime "D:%Y%m%d%H%M%S%:::z'00"
134
- #rebuild_catalog
135
- catalog = rebuild_catalog_and_objects
136
- # add ID and generation numbers to objects
137
- renumber_object_ids
138
-
139
- out = []
140
- xref = []
141
- indirect_object_count = 1 #the first object is the null object
142
- #write head (version and binanry-code)
143
- out << "%PDF-#{@version.to_s}\n%\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
144
-
145
- #collect objects and set xref table locations
146
- loc = 0
147
- out.each {|line| loc += line.bytesize + 1}
148
- @objects.each do |o|
149
- indirect_object_count += 1
150
- xref << loc
151
- out << PDFOperations._object_to_pdf(o)
152
- loc += out.last.bytesize + 1
153
- end
154
- xref_location = loc
155
- # xref_location = 0
156
- # out.each { |line| xref_location += line.bytesize + 1}
157
- out << "xref\n0 #{(indirect_object_count).to_s}\n0000000000 65535 f \n"
158
- xref.each {|offset| out << ( out.pop + ("%010d 00000 n \n" % offset) ) }
159
- out << out.pop + "trailer"
160
- out << "<<\n/Root #{false || "#{catalog[:indirect_reference_id]} #{catalog[:indirect_generation_number]} R"}"
161
- out << "/Size #{indirect_object_count.to_s}"
162
- if @info.is_a?(Hash)
163
- PRIVATE_HASH_KEYS.each {|key| @info.delete key} # make sure the dictionary is rendered inline, without stream
164
- out << "/Info #{PDFOperations._object_to_pdf @info}"
165
- end
166
- out << ">>\nstartxref\n#{xref_location.to_s}\n%%EOF"
167
- # when finished, remove the numbering system and keep only pointers
168
- PDFOperations.remove_old_ids @objects
169
- # output the pdf stream
170
- out.join("\n").force_encoding(Encoding::ASCII_8BIT)
171
- end
172
-
173
- # Save the PDF to file.
174
- #
175
- # file_name:: is a string or path object for the output.
176
- #
177
- # <b>Notice!</b> if the file exists, it <b>WILL</b> be overwritten.
178
- def save(file_name)
179
- IO.binwrite file_name, to_pdf
180
- end
181
- # this method returns all the pages cataloged in the catalog.
182
- #
183
- # if no catalog is passed, it seeks the existing catalog(s) and searches
184
- # for any registered Page objects.
185
- #
186
- # This method also adds the << operator to each page instance, so that content can be
187
- # injected to the pages, as described above.
188
- #
189
- # if the secure_injection is false, then the << operator will not alter the any of the information added to the page.
190
- # this might cause conflicts in the added content, but is available for situations in which
191
- # the content added is compressed using unsupported filters or options.
192
- #
193
- # the default is for the << operator to attempt a secure copy, by attempting to rename the content references and avoiding conflicts.
194
- # because not all PDF files are created equal (some might have formating errors or variations),
195
- # it is imposiible to learn if the attempt was successful.
196
- #
197
- # (page objects are Hash class objects. the << operator is added to the specific instances without changing the class)
198
- #
199
- # catalogs:: a catalog, or an Array of catalog objects. defaults to the existing catalog.
200
- # secure_injection:: a boolean (true / false) controling the behavior of the << operator.
201
- def pages(catalogs = nil, secure_injection = true, inheritance_hash = {})
202
- page_list = []
203
- if catalogs == nil
204
- catalogs = @objects.select {|obj| obj.is_a?(Hash) && obj[:Type] == :Catalog}
205
- catalogs ||= []
206
- end
207
- case
208
- when catalogs.is_a?(Array)
209
- catalogs.each {|c| page_list.push *( pages(c, secure_injection, inheritance_hash ) ) unless c.nil?}
210
- when catalogs.is_a?(Hash)
211
- if catalogs[:is_reference_only]
212
- # not applicable any more... | catalogs[:referenced_object] = PDFOperations.get_refernced_object(@objects, catalogs) # for some reson, the code was: pages(PDFOperations.get_refernced_object(@objects, catalogs), secure_injection, inheritance_hash) unless catalogs[:referenced_object]
213
- if catalogs[:referenced_object]
214
- page_list.push *( pages(catalogs[:referenced_object], secure_injection, inheritance_hash) )
215
- else
216
- warn "couldn't follow reference!!! #{catalogs} not found!"
217
- end
218
- else
219
- unless catalogs[:Type] == :Page
220
- # # set inheritance, when applicable, and delete older data
221
- # inheritance_hash[:MediaBox] = catalogs.delete(:MediaBox) if catalogs[:MediaBox]
222
- # inheritance_hash[:CropBox] = catalogs.delete(:CropBox) if catalogs[:CropBox]
223
- # inheritance_hash[:Rotate] = catalogs.delete(:Rotate) if catalogs[:Rotate]
224
- # (inheritance_hash[:Resources] ||= {}).update( ( catalogs[:Resources][:referenced_object] ? catalogs.delete(:Resources)[:referenced_object] : catalogs.delete(:Resources) ), &self.class.method(:hash_update_proc_for_new) ) if catalogs[:Resources]
225
- # (inheritance_hash[:ColorSpace] ||= {}).update( ( catalogs[:ColorSpace][:referenced_object] ? catalogs.delete(:ColorSpace)[:referenced_object] : catalogs.delete(:ColorSpace) ), &self.class.method(:hash_update_proc_for_new) ) if catalogs[:ColorSpace]
226
- # old - set inheritance, when applicable
227
- inheritance_hash[:MediaBox] = catalogs[:MediaBox] if catalogs[:MediaBox]
228
- inheritance_hash[:CropBox] = catalogs[:CropBox] if catalogs[:CropBox]
229
- inheritance_hash[:Rotate] = catalogs[:Rotate] if catalogs[:Rotate]
230
- (inheritance_hash[:Resources] ||= {}).update( (catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &self.class.method(:hash_update_proc_for_new) ) if catalogs[:Resources]
231
- (inheritance_hash[:ColorSpace] ||= {}).update( (catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &self.class.method(:hash_update_proc_for_new) ) if catalogs[:ColorSpace]
232
- end
233
-
234
- case catalogs[:Type]
235
- when :Page
236
- # holder = self
237
- # if secure_injection
238
- # catalogs.define_singleton_method("<<".to_sym) do |obj|
239
- # obj = PDFOperations.copy_and_secure_for_injection obj
240
- # PDFOperations.inject_to_page self, obj
241
- # holder.add_referenced self # add new referenced objects
242
- # self
243
- # end
244
- # else
245
- # catalogs.define_singleton_method("<<".to_sym) do |obj|
246
- # obj = PDFOperations.create_deep_copy obj
247
- # PDFOperations.inject_to_page self, obj
248
- # holder.add_referenced self # add new referenced objects
249
- # self
250
- # end
251
- # end
252
-
253
- # inheritance
254
- catalogs[:MediaBox] ||= inheritance_hash[:MediaBox] if inheritance_hash[:MediaBox]
255
- catalogs[:CropBox] ||= inheritance_hash[:CropBox] if inheritance_hash[:CropBox]
256
- catalogs[:Rotate] ||= inheritance_hash[:Rotate] if inheritance_hash[:Rotate]
257
- (catalogs[:Resources] ||= {}).update( inheritance_hash[:Resources], &( self.class.method(:hash_update_proc_for_old) ) ) if inheritance_hash[:Resources]
258
- (catalogs[:ColorSpace] ||= {}).update( inheritance_hash[:ColorSpace], &( self.class.method(:hash_update_proc_for_old) ) ) if inheritance_hash[:ColorSpace]
259
-
260
-
261
- # avoide references on MediaBox, CropBox and Rotate
262
- catalogs[:MediaBox] = catalogs[:MediaBox][:referenced_object][:indirect_without_dictionary] if catalogs[:MediaBox].is_a?(Hash) && catalogs[:MediaBox][:referenced_object].is_a?(Hash) && catalogs[:MediaBox][:referenced_object][:indirect_without_dictionary]
263
- catalogs[:CropBox] = catalogs[:CropBox][:referenced_object][:indirect_without_dictionary] if catalogs[:CropBox].is_a?(Hash) && catalogs[:CropBox][:referenced_object].is_a?(Hash) && catalogs[:CropBox][:referenced_object][:indirect_without_dictionary]
264
- catalogs[:Rotate] = catalogs[:Rotate][:referenced_object][:indirect_without_dictionary] if catalogs[:Rotate].is_a?(Hash) && catalogs[:Rotate][:referenced_object].is_a?(Hash) && catalogs[:Rotate][:referenced_object][:indirect_without_dictionary]
265
-
266
- catalogs.instance_eval {extend Page_Methods}
267
- catalogs.secure_injection = secure_injection
268
-
269
- page_list << catalogs
270
- when :Pages
271
- page_list.push *(pages(catalogs[:Kids], secure_injection, inheritance_hash.dup )) unless catalogs[:Kids].nil?
272
- when :Catalog
273
- page_list.push *(pages(catalogs[:Pages], secure_injection, inheritance_hash.dup )) unless catalogs[:Pages].nil?
274
- end
275
- end
276
- end
277
- page_list
278
- end
279
-
280
- # returns an array with the different fonts used in the file.
281
- #
282
- # Type0 font objects ( "font[:Subtype] == :Type0" ) can be registered with the font library
283
- # for use in PDFWriter objects (font numbering / table creation etc').
284
- # @param limit_to_type0 [true,false] limits the list to type0 fonts.
285
- def fonts(limit_to_type0 = false)
286
- fonts_array = []
287
- pages.each do |p|
288
- p[:Resources][:Font].values.each do |f|
289
- f = f[:referenced_object] if f[:referenced_object]
290
- if (limit_to_type0 || f[:Subtype] = :Type0) && f[:Type] == :Font && !fonts_array.include?(f)
291
- fonts_array << f
292
- end
293
- end
294
- end
295
- fonts_array
296
- end
297
-
298
- # add the pages (or file) to the PDF (combine/merge) and RETURNS SELF, for nesting.
299
- # for example:
300
- #
301
- # pdf = CombinePDF.new "first_file.pdf"
302
- #
303
- # pdf << CombinePDF.new "second_file.pdf"
304
- #
305
- # pdf.save "both_files_merged.pdf"
306
- # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
307
- def << (data)
308
- #########
309
- ## how should we add data to PDF?
310
- ## and how to handles imported pages?
311
- if data.is_a?(PDF)
312
- @version = [@version, data.version].max
313
- @objects.push(*data.objects)
314
- # rebuild_catalog
315
- return self
316
- end
317
- insert -1, data
318
- self
319
- end
320
-
321
- # add the pages (or file) to the BEGINNING of the PDF (combine/merge) and RETURNS SELF for nesting operators.
322
- # for example:
323
- #
324
- # pdf = CombinePDF.new "second_file.pdf"
325
- #
326
- # pdf >> CombinePDF.new "first_file.pdf"
327
- #
328
- # pdf.save "both_files_merged.pdf"
329
- # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
330
- def >> (data)
331
- insert 0, data
332
- self
333
- end
334
-
335
- # add PDF pages (or PDF files) into a specific location.
336
- #
337
- # returns the new pages Array
338
- #
339
- # location:: the location for the added page(s). Could be any number. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining). if the location is beyond bounds, the pages will be added to the end of the PDF object (or at the begining, if the out of bounds was a negative number).
340
- # data:: a PDF page, a PDF file (CombinePDF.new "filname.pdf") or an array of pages (CombinePDF.new("filname.pdf").pages[0..3]).
341
- def insert(location, data)
342
- pages_to_add = nil
343
- if data.is_a? PDF
344
- pages_to_add = data.pages
345
- elsif data.is_a?(Array) && (data.select {|o| !(o.is_a?(Hash) && o[:Type] == :Page) } ).empty?
346
- pages_to_add = data
347
- elsif data.is_a?(Hash) && data[:Type] == :Page
348
- pages_to_add = [data]
349
- else
350
- warn "Shouldn't add objects to the file unless they are PDF objects or PDF pages (an Array or a single PDF page)."
351
- return false # return false, which will also stop any chaining.
352
- end
353
- catalog = rebuild_catalog
354
- pages_array = catalog[:Pages][:referenced_object][:Kids]
355
- page_count = pages_array.length
356
- if location < 0 && (page_count + location < 0 )
357
- location = 0
358
- elsif location > 0 && (location > page_count)
359
- location = page_count
360
- end
361
- pages_array.insert location, pages_to_add
362
- pages_array.flatten!
363
- pages_array
364
- end
365
-
366
- # removes a PDF page from the file and the catalog
367
- #
368
- # returns the removed page.
369
- #
370
- # returns nil if failed or if out of bounds.
371
- #
372
- # page_index:: the page's index in the zero (0) based page array. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining).
373
- def remove(page_index)
374
- catalog = rebuild_catalog
375
- pages_array = catalog[:Pages][:referenced_object][:Kids]
376
- removed_page = pages_array.delete_at page_index
377
- catalog[:Pages][:referenced_object][:Count] = pages_array.length
378
- removed_page
379
- end
380
-
381
-
382
- # add page numbers to the PDF
383
- #
384
- # For unicode text, a unicode font(s) must first be registered. the registered font(s) must supply the
385
- # subset of characters used in the text. UNICODE IS AN ISSUE WITH THE PDF FORMAT - USE CAUSION.
386
- #
387
- # options:: a Hash of options setting the behavior and format of the page numbers:
388
- # - :number_format a string representing the format for page number. defaults to ' - %s - ' (allows for letter numbering as well, such as "a", "b"...).
389
- # - :number_location an Array containing the location for the page numbers, can be :top, :buttom, :top_left, :top_right, :bottom_left, :bottom_right. defaults to [:top, :buttom].
390
- # - :start_at a Fixnum that sets the number for first page number. also accepts a letter ("a") for letter numbering. defaults to 1.
391
- # - :margin_from_height a number (PDF points) for the top and buttom margins. defaults to 45.
392
- # - :margin_from_side a number (PDF points) for the left and right margins. defaults to 15.
393
- # the options Hash can also take all the options for PDFWriter#textbox.
394
- # defaults to font: :Helvetica, font_size: 12 and no box (:border_width => 0, :box_color => nil).
395
- def number_pages(options = {})
396
- opt = {
397
- number_format: ' - %s - ',
398
- number_location: [:top, :bottom],
399
- start_at: 1,
400
- font_size: 12,
401
- font: :Helvetica,
402
- margin_from_height: 45,
403
- margin_from_side: 15
404
- }
405
- opt.update options
406
- page_number = opt[:start_at]
407
- pages.each do |page|
408
- # create a "stamp" PDF page with the same size as the target page
409
- mediabox = page[:CropBox] || page[:MediaBox] || [0, 0, 595.3, 841.9]
410
- stamp = PDFWriter.new mediabox
411
- # set stamp text
412
- text = opt[:number_format] % page_number
413
- # compute locations for text boxes
414
- text_dimantions = stamp.dimensions_of( text, opt[:font], opt[:font_size] )
415
- box_width = text_dimantions[0] * 1.2
416
- box_height = text_dimantions[1] * 2
417
- opt[:width] = box_width
418
- opt[:height] = box_height
419
- from_height = opt[:margin_from_height]
420
- from_side = opt[:margin_from_side]
421
- page_width = mediabox[2]
422
- page_height = mediabox[3]
423
- center_position = (page_width - box_width)/2
424
- left_position = from_side
425
- right_position = page_width - from_side - box_width
426
- top_position = page_height - from_height
427
- buttom_position = from_height + box_height
428
- x = center_position
429
- y = top_position
430
- if opt[:number_location].include? :top
431
- stamp.textbox text, {x: x, y: y }.merge(opt)
432
- end
433
- y = buttom_position #bottom position
434
- if opt[:number_location].include? :bottom
435
- stamp.textbox text, {x: x, y: y }.merge(opt)
436
- end
437
- y = top_position #top position
438
- x = left_position # left posotion
439
- if opt[:number_location].include? :top_left
440
- stamp.textbox text, {x: x, y: y }.merge(opt)
441
- end
442
- y = buttom_position #bottom position
443
- if opt[:number_location].include? :bottom_left
444
- stamp.textbox text, {x: x, y: y }.merge(opt)
445
- end
446
- x = right_position # right posotion
447
- y = top_position #top position
448
- if opt[:number_location].include? :top_right
449
- stamp.textbox text, {x: x, y: y }.merge(opt)
450
- end
451
- y = buttom_position #bottom position
452
- if opt[:number_location].include? :bottom_right
453
- stamp.textbox text, {x: x, y: y }.merge(opt)
454
- end
455
- page << stamp
456
- page_number = page_number.succ
457
- end
458
- end
459
-
460
- # adds a new page to the end of the PDF object.
461
- #
462
- # returns the new page object.
463
- def new_page(mediabox = [0, 0, 595.3, 841.9])
464
- insert(-1, PDFWriter.new(mediabox) ).last
465
- end
466
-
467
- # get the title for the pdf
468
- # The title is stored in the information dictionary and isn't required
469
- def title
470
- return @info[:Title]
471
- end
472
- # set the title for the pdf
473
- # The title is stored in the information dictionary and isn't required
474
- # new_title:: a string that is the new author value.
475
- def title=(new_title = nil)
476
- @info[:Title] = new_title
477
- end
478
- # get the author value for the pdf.
479
- # The author is stored in the information dictionary and isn't required
480
- def author
481
- return @info[:Author]
482
- end
483
- # set the author value for the pdf.
484
- # The author is stored in the information dictionary and isn't required
485
- #
486
- # new_title:: a string that is the new author value.
487
- def author=(new_author = nil)
488
- @info[:Author] = new_author
489
- end
490
- end
491
-
492
- #:nodoc: all
493
-
494
-
495
- class PDF
496
- # @private
497
- # Some PDF objects contain references to other PDF objects.
498
- #
499
- # this function adds the references contained in "object", but DOESN'T add the object itself.
500
- #
501
- # this is used for internal operations, such as injectng data using the << operator.
502
- def add_referenced(object)
503
- # add references but not root
504
- case
505
- when object.is_a?(Array)
506
- object.each {|it| add_referenced(it)}
507
- return true
508
- when object.is_a?(Hash)
509
- if object[:is_reference_only] && object[:referenced_object] && object[:referenced_object].is_a?(Hash) && object[:referenced_object][:Type] == :Page
510
- @objects << object[:referenced_object]
511
- elsif object[:is_reference_only] && object[:referenced_object]
512
- found_at = @objects.find_index object[:referenced_object]
513
- if found_at
514
- #if the objects are equal, they might still be different objects!
515
- # so, we need to make sure they are the same object for the pointers to effect id numbering
516
- # and formatting operations.
517
- object[:referenced_object] = @objects[found_at]
518
- # stop this path, there is no need to run over the Hash's keys and values
519
- return true
520
- else
521
- # @objects.include? object[:referenced_object] is bound to be false
522
- #the object wasn't found - add it to the @objects array
523
- @objects << object[:referenced_object]
524
- end
525
-
526
- end
527
- object.each do |k, v|
528
- add_referenced(v) unless k == :Parent
529
- end
530
- else
531
- return false
532
- end
533
- true
534
- end
535
- # @private
536
- # run block of code on evey PDF object (PDF objects are class Hash)
537
- def each_object(&block)
538
- PDFOperations._each_object(@objects, &block)
539
- end
540
-
541
- protected
542
-
543
- # @private
544
- # this method reviews a Hash an updates it by merging Hash data,
545
- # preffering the old over the new.
546
- def self.hash_update_proc_for_old key, old_data, new_data
547
- if old_data.is_a? Hash
548
- old_data.merge( new_data, &self.method(:hash_update_proc_for_old) )
549
- else
550
- old_data
551
- end
552
- end
553
- # @private
554
- # this method reviews a Hash an updates it by merging Hash data,
555
- # preffering the new over the old.
556
- def self.hash_update_proc_for_new key, old_data, new_data
557
- if old_data.is_a? Hash
558
- old_data.merge( new_data, &self.method(:hash_update_proc_for_new) )
559
- else
560
- new_data
561
- end
562
- end
563
-
564
- # @private
565
- # this function returns all the Page objects - regardless of order and even if not cataloged
566
- # could be used for finding "lost" pages... but actually rather useless.
567
- def all_pages
568
- #########
569
- ## Only return the page item, but make sure all references are connected so that
570
- ## referenced items and be reached through the connections.
571
- [].tap {|out| each_object {|obj| out << obj if obj.is_a?(Hash) && obj[:Type] == :Page } }
572
- end
573
- # @private
574
- def serialize_objects_and_references(object = nil)
575
- # # Version 3.5 injects indirect objects if they arn't dictionaries.
576
- # # benchmark 1000.times was 3.568246 sec for pdf = CombinePDF.new "/Users/2Be/Desktop/מוצגים/20121002\ הודעת\ הערעור.pdf" }
577
- # # puts Benchmark.measure { 1000.times {pdf.serialize_objects_and_references} }
578
- # # ######### Intreduces a BUG with catalogging pages... why? I don't know... mybey doesn't catch all.
579
- # each_object do |obj|
580
- # obj.each do |k, v|
581
- # if v.is_a?(Hash) && v[:is_reference_only]
582
- # v[:referenced_object] = PDFOperations.get_refernced_object @objects, v
583
- # raise "couldn't connect references" unless v[:referenced_object]
584
- # obj[k] = v[:referenced_object][:indirect_without_dictionary] if v[:referenced_object][:indirect_without_dictionary]
585
- # end
586
- # end
587
- # end
588
-
589
- # Version 4
590
- # benchmark 1000.times was 0.980651 sec for:
591
- # pdf = CombinePDF.new "/Users/2Be/Desktop/מוצגים/20121002\ הודעת\ הערעור.pdf"
592
- # puts Benchmark.measure { 1000.times {pdf.serialize_objects_and_references} }
593
- objects_reference_hash = {}
594
- @objects.each {|o| objects_reference_hash[ [o[:indirect_reference_id], o[:indirect_generation_number] ] ] = o }
595
- each_object do |obj|
596
- if obj[:is_reference_only]
597
- obj[:referenced_object] = objects_reference_hash[ [obj[:indirect_reference_id], obj[:indirect_generation_number] ] ]
598
- warn "couldn't connect a reference!!! could be a null or removed (empty) object, Silent error!!!\n Object raising issue: #{obj.to_s}" unless obj[:referenced_object]
599
- end
600
- end
601
-
602
- # when finished, remove the old numbering system and keep only pointers
603
- PDFOperations.remove_old_ids @objects
604
-
605
- # # Version 3
606
- # # benchmark 1000.times was 3.568246 sec for pdf = CombinePDF.new "/Users/2Be/Desktop/מוצגים/20121002\ הודעת\ הערעור.pdf" }
607
- # # puts Benchmark.measure { 1000.times {pdf.serialize_objects_and_references} }
608
- # each_object do |obj|
609
- # if obj[:is_reference_only]
610
- # obj[:referenced_object] = PDFOperations.get_refernced_object @objects, obj
611
- # warn "couldn't connect a reference!!! could be a null object, Silent error!!!" unless obj[:referenced_object]
612
- # end
613
- # end
614
-
615
- end
616
- # @private
617
- def renumber_object_ids(start = nil)
618
- @set_start_id = start || @set_start_id
619
- start = @set_start_id
620
- history = {}
621
- all_indirect_object.each do |obj|
622
- obj[:indirect_reference_id] = start
623
- start += 1
624
- end
625
- end
626
-
627
- # @private
628
- def references(indirect_reference_id = nil, indirect_generation_number = nil)
629
- ref = {indirect_reference_id: indirect_reference_id, indirect_generation_number: indirect_generation_number}
630
- out = []
631
- each_object do |obj|
632
- if obj[:is_reference_only]
633
- if (indirect_reference_id == nil && indirect_generation_number == nil)
634
- out << obj
635
- elsif compare_reference_values(ref, obj)
636
- out << obj
637
- end
638
- end
639
- end
640
- out
641
- end
642
- # @private
643
- def all_indirect_object
644
- # [].tap {|out| @objects.each {|obj| out << obj if (obj.is_a?(Hash) && obj[:is_reference_only].nil?) } }
645
- @objects
646
- end
647
- # @private
648
- def sort_objects_by_id
649
- @objects.sort! do |a,b|
650
- if a.is_a?(Hash) && a[:indirect_reference_id] && a[:is_reference_only].nil? && b.is_a?(Hash) && b[:indirect_reference_id] && b[:is_reference_only].nil?
651
- return a[:indirect_reference_id] <=> b[:indirect_reference_id]
652
- end
653
- 0
654
- end
655
- end
656
-
657
- # @private
658
- def rebuild_catalog(*with_pages)
659
- # # build page list v.1 Slow but WORKS
660
- # # Benchmark testing value: 26.708394
661
- # old_catalogs = @objects.select {|obj| obj.is_a?(Hash) && obj[:Type] == :Catalog}
662
- # old_catalogs ||= []
663
- # page_list = []
664
- # PDFOperations._each_object(old_catalogs,false) { |p| page_list << p if p.is_a?(Hash) && p[:Type] == :Page }
665
-
666
- # build page list v.2 faster, better, and works
667
- # Benchmark testing value: 0.215114
668
- page_list = pages
669
-
670
- # add pages to catalog, if requested
671
- page_list.push(*with_pages) unless with_pages.empty?
672
-
673
- # build new Pages object
674
- pages_object = {Type: :Pages, Count: page_list.length, Kids: page_list.map {|p| {referenced_object: p, is_reference_only: true} } }
675
-
676
- # build new Catalog object
677
- catalog_object = {Type: :Catalog, Pages: {referenced_object: pages_object, is_reference_only: true} }
678
-
679
- # point old Pages pointers to new Pages object
680
- ## first point known pages objects - enough?
681
- pages.each {|p| p[:Parent] = { referenced_object: pages_object, is_reference_only: true} }
682
- ## or should we, go over structure? (fails)
683
- # each_object {|obj| obj[:Parent][:referenced_object] = pages_object if obj.is_a?(Hash) && obj[:Parent].is_a?(Hash) && obj[:Parent][:referenced_object] && obj[:Parent][:referenced_object][:Type] == :Pages}
684
-
685
- # remove old catalog and pages objects
686
- @objects.reject! {|obj| obj.is_a?(Hash) && (obj[:Type] == :Catalog || obj[:Type] == :Pages) }
687
-
688
- # inject new catalog and pages objects
689
- @objects << pages_object
690
- @objects << catalog_object
691
-
692
- catalog_object
693
- end
694
-
695
- # @private
696
- # this is an alternative to the rebuild_catalog catalog method
697
- # this method is used by the to_pdf method, for streamlining the PDF output.
698
- # there is no point is calling the method before preparing the output.
699
- def rebuild_catalog_and_objects
700
- catalog = rebuild_catalog
701
- @objects = []
702
- @objects << catalog
703
- add_referenced catalog
704
- catalog
705
- end
706
-
707
- # @private
708
- # the function rerturns true if the reference belongs to the object
709
- def compare_reference_values(obj, ref)
710
- if obj[:referenced_object] && ref[:referenced_object]
711
- return (obj[:referenced_object][:indirect_reference_id] == ref[:referenced_object][:indirect_reference_id] && obj[:referenced_object][:indirect_generation_number] == ref[:referenced_object][:indirect_generation_number])
712
- elsif ref[:referenced_object]
713
- return (obj[:indirect_reference_id] == ref[:referenced_object][:indirect_reference_id] && obj[:indirect_generation_number] == ref[:referenced_object][:indirect_generation_number])
714
- elsif obj[:referenced_object]
715
- return (obj[:referenced_object][:indirect_reference_id] == ref[:indirect_reference_id] && obj[:referenced_object][:indirect_generation_number] == ref[:indirect_generation_number])
716
- else
717
- return (obj[:indirect_reference_id] == ref[:indirect_reference_id] && obj[:indirect_generation_number] == ref[:indirect_generation_number])
718
- end
719
- end
720
-
721
-
722
- end
723
- end
724
-