combine_pdf 0.2.21 → 0.2.27

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,467 +5,495 @@
5
5
  ## is subject to the same license.
6
6
  ########################################################
7
7
 
8
-
9
-
10
-
11
-
12
-
13
8
  module CombinePDF
14
-
15
- # PDF class is the PDF object that can save itself to
16
- # a file and that can be used as a container for a full
17
- # PDF file data, including version, information etc'.
18
- #
19
- # PDF objects can be used to combine or to inject data.
20
- # == Combine/Merge PDF files or Pages
21
- # To combine PDF files (or data):
22
- # pdf = CombinePDF.new
23
- # pdf << CombinePDF.load("file1.pdf") # one way to combine, very fast.
24
- # pdf << CombinePDF.load("file2.pdf")
25
- # pdf.save "combined.pdf"
26
- # or even a one liner:
27
- # (CombinePDF.load("file1.pdf") << CombinePDF.load("file2.pdf") << CombinePDF.load("file3.pdf")).save("combined.pdf")
28
- # you can also add just odd or even pages:
29
- # pdf = CombinePDF.new
30
- # i = 0
31
- # CombinePDF.load("file.pdf").pages.each do |page|
32
- # i += 1
33
- # pdf << page if i.even?
34
- # end
35
- # pdf.save "even_pages.pdf"
36
- # notice that adding all the pages one by one is slower then adding the whole file.
37
- # == Add content to existing pages (Stamp / Watermark)
38
- # To add content to existing PDF pages, first import the new content from an existing PDF file.
39
- # after that, add the content to each of the pages in your existing PDF.
40
- #
41
- # in this example, we will add a company logo to each page:
42
- # company_logo = CombinePDF.load("company_logo.pdf").pages[0]
43
- # pdf = CombinePDF.load "content_file.pdf"
44
- # pdf.pages.each {|page| page << company_logo} # notice the << operator is on a page and not a PDF object.
45
- # pdf.save "content_with_logo.pdf"
46
- # Notice the << operator is on a page and not a PDF object. The << operator acts differently on PDF objects and on Pages.
47
- #
48
- # The << operator defaults to secure injection by renaming references to avoid conflics. For overlaying pages using compressed data that might not be editable (due to limited filter support), you can use:
49
- # pdf.pages(nil, false).each {|page| page << stamp_page}
50
- #
51
- # == Page Numbering
52
- # adding page numbers to a PDF object or file is as simple as can be:
53
- # pdf = CombinePDF.load "file_to_number.pdf"
54
- # pdf.number_pages
55
- # pdf.save "file_with_numbering.pdf"
56
- #
57
- # numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values.
58
- #
59
- # == Loading PDF data
60
- # Loading PDF data can be done from file system or directly from the memory.
61
- #
62
- # Loading data from a file is easy:
63
- # pdf = CombinePDF.load("file.pdf")
64
- # you can also parse PDF files from memory:
65
- # pdf_data = IO.read 'file.pdf' # for this demo, load a file to memory
66
- # pdf = CombinePDF.parse(pdf_data)
67
- # Loading from the memory is especially effective for importing PDF data recieved through the internet or from a different authoring library such as Prawn.
68
- class PDF
69
-
70
- # lists the Hash keys used for PDF objects
71
- #
72
- # the CombinePDF library doesn't use special classes for its objects (PDFPage class, PDFStream class or anything like that).
73
- #
74
- # there is only one PDF class which represents the whole of the PDF file.
75
- #
76
- # this Hash lists the private Hash keys that the CombinePDF library uses to
77
- # differentiate between complex PDF objects.
78
- PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary]
79
-
80
- # the objects attribute is an Array containing all the PDF sub-objects for te class.
81
- attr_reader :objects
82
- # the info attribute is a Hash that sets the Info data for the PDF.
83
- # use, for example:
84
- # pdf.info[:Title] = "title"
85
- attr_reader :info
86
- # set/get the PDF version of the file (1.1-1.7) - shuold be type Float.
87
- attr_accessor :version
88
- # the viewer_preferences attribute is a Hash that sets the ViewerPreferences data for the PDF.
89
- # use, for example:
90
- # pdf.viewer_preferences[:HideMenubar] = true
91
- attr_reader :viewer_preferences
92
-
93
- def initialize (parser = nil)
94
- # default before setting
95
- @objects = []
96
- @version = 0
97
- @viewer_preferences, @info = {}, {}
98
- parser ||= PDFParser.new("")
99
- raise TypeError, "initialization error, expecting CombinePDF::PDFParser or nil, but got #{parser.class.name}" unless parser.is_a? PDFParser
100
- @objects = parser.parse
101
- # remove any existing id's
102
- remove_old_ids
103
- # set data from parser
104
- @version = parser.version if parser.version.is_a? Float
105
- @info = parser.info_object || {}
106
- @names = parser.names_object || {}
107
- @forms_data = parser.forms_object || {}
108
-
109
- # general globals
110
- @set_start_id = 1
111
- @info[:Producer] = "Ruby CombinePDF #{CombinePDF::VERSION} Library"
112
- @info.delete :CreationDate
113
- @info.delete :ModDate
114
- end
115
-
116
- # adds a new page to the end of the PDF object.
117
- #
118
- # returns the new page object.
119
- #
120
- # unless the media box is specified, it defaults to US Letter: [0, 0, 612.0, 792.0]
121
- def new_page(mediabox = [0, 0, 612.0, 792.0], location = -1)
122
- p = PDFWriter.new(mediabox)
123
- insert(-1, p )
124
- p
125
- end
126
-
127
- # get the title for the pdf
128
- # The title is stored in the information dictionary and isn't required
129
- def title
130
- return @info[:Title]
131
- end
132
- # set the title for the pdf
133
- # The title is stored in the information dictionary and isn't required
134
- # new_title:: a string that is the new author value.
135
- def title=(new_title = nil)
136
- @info[:Title] = new_title
137
- end
138
- # get the author value for the pdf.
139
- # The author is stored in the information dictionary and isn't required
140
- def author
141
- return @info[:Author]
142
- end
143
- # set the author value for the pdf.
144
- # The author is stored in the information dictionary and isn't required
145
- #
146
- # new_title:: a string that is the new author value.
147
- def author=(new_author = nil)
148
- @info[:Author] = new_author
149
- end
150
- # Clears any existing form data.
151
- def clear_forms_data
152
- @forms_data.nil? || @forms_data.clear
153
- end
154
-
155
- # Save the PDF to file.
156
- #
157
- # file_name:: is a string or path object for the output.
158
- #
159
- # **Notice!** if the file exists, it **WILL** be overwritten.
160
- def save(file_name, options = {})
161
- IO.binwrite file_name, to_pdf(options)
162
- end
163
-
164
- # Formats the data to PDF formats and returns a binary string that represents the PDF file content.
165
- #
166
- # This method is used by the save(file_name) method to save the content to a file.
167
- #
168
- # use this to export the PDF file without saving to disk (such as sending through HTTP ect').
169
- def to_pdf options = {}
170
- #reset version if not specified
171
- @version = 1.5 if @version.to_f == 0.0
172
- #set info for merged file
173
- @info[:ModDate] = @info[:CreationDate] = Time.now.strftime "D:%Y%m%d%H%M%S%:::z'00"
174
- @info[:Subject] = options[:subject] if options[:subject]
175
- @info[:Producer] = options[:producer] if options[:producer]
176
- #rebuild_catalog
177
- catalog = rebuild_catalog_and_objects
178
- # add ID and generation numbers to objects
179
- renumber_object_ids
180
-
181
- out = []
182
- xref = []
183
- indirect_object_count = 1 #the first object is the null object
184
- #write head (version and binanry-code)
185
- out << "%PDF-#{@version.to_s}\n%\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
186
-
187
- #collect objects and set xref table locations
188
- loc = 0
189
- out.each {|line| loc += line.bytesize + 1}
190
- @objects.each do |o|
191
- indirect_object_count += 1
192
- xref << loc
193
- out << object_to_pdf(o)
194
- loc += out.last.bytesize + 1
195
- end
196
- xref_location = loc
197
- # xref_location = 0
198
- # out.each { |line| xref_location += line.bytesize + 1}
199
- out << "xref\n0 #{(indirect_object_count).to_s}\n0000000000 65535 f \n"
200
- xref.each {|offset| out << ( out.pop + ("%010d 00000 n \n" % offset) ) }
201
- out << out.pop + "trailer"
202
- out << "<<\n/Root #{false || "#{catalog[:indirect_reference_id]} #{catalog[:indirect_generation_number]} R"}"
203
- out << "/Size #{indirect_object_count.to_s}"
204
- out << "/Info #{@info[:indirect_reference_id]} #{@info[:indirect_generation_number]} R"
205
- out << ">>\nstartxref\n#{xref_location.to_s}\n%%EOF"
206
- # when finished, remove the numbering system and keep only pointers
207
- remove_old_ids
208
- # output the pdf stream
209
- out.join("\n").force_encoding(Encoding::ASCII_8BIT)
210
- end
211
-
212
- # this method returns all the pages cataloged in the catalog.
213
- #
214
- # if no catalog is passed, it seeks the existing catalog(s) and searches
215
- # for any registered Page objects.
216
- #
217
- # Page objects are Hash class objects. the page methods are added using a mixin or inheritance.
218
- #
219
- # catalogs:: a catalog, or an Array of catalog objects. defaults to the existing catalog.
220
- def pages(catalogs = nil)
221
- page_list = []
222
- catalogs ||= get_existing_catalogs
223
-
224
- if catalogs.is_a?(Array)
225
- catalogs.each {|c| page_list.push *( pages(c) ) unless c.nil?}
226
- elsif catalogs.is_a?(Hash)
227
- if catalogs[:is_reference_only]
228
- if catalogs[:referenced_object]
229
- page_list.push *( pages(catalogs[:referenced_object]) )
230
- else
231
- warn "couldn't follow reference!!! #{catalogs} not found!"
232
- end
233
- else
234
- case catalogs[:Type]
235
- when :Page
236
- page_list << catalogs
237
- when :Pages
238
- page_list.push *(pages(catalogs[:Kids])) unless catalogs[:Kids].nil?
239
- when :Catalog
240
- page_list.push *(pages(catalogs[:Pages])) unless catalogs[:Pages].nil?
241
- end
242
- end
243
- end
244
- page_list
245
- end
246
-
247
- # returns an array with the different fonts used in the file.
248
- #
249
- # Type0 font objects ( "font[:Subtype] == :Type0" ) can be registered with the font library
250
- # for use in PDFWriter objects (font numbering / table creation etc').
251
- # @param limit_to_type0 [true,false] limits the list to type0 fonts.
252
- def fonts(limit_to_type0 = false)
253
- fonts_array = []
254
- pages.each do |p|
255
- p[:Resources][:Font].values.each do |f|
256
- f = f[:referenced_object] if f[:referenced_object]
257
- if (limit_to_type0 || f[:Subtype] = :Type0) && f[:Type] == :Font && !fonts_array.include?(f)
258
- fonts_array << f
259
- end
260
- end
261
- end
262
- fonts_array
263
- end
264
-
265
- # add the pages (or file) to the PDF (combine/merge) and RETURNS SELF, for nesting.
266
- # for example:
267
- #
268
- # pdf = CombinePDF.new "first_file.pdf"
269
- #
270
- # pdf << CombinePDF.new "second_file.pdf"
271
- #
272
- # pdf.save "both_files_merged.pdf"
273
- # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
274
- def << (data)
275
- insert -1, data
276
- end
277
-
278
- # add the pages (or file) to the BEGINNING of the PDF (combine/merge) and RETURNS SELF for nesting operators.
279
- # for example:
280
- #
281
- # pdf = CombinePDF.new "second_file.pdf"
282
- #
283
- # pdf >> CombinePDF.new "first_file.pdf"
284
- #
285
- # pdf.save "both_files_merged.pdf"
286
- # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
287
- def >> (data)
288
- insert 0, data
289
- end
290
-
291
- # add PDF pages (or PDF files) into a specific location.
292
- #
293
- # returns the new pages Array! (unlike `#<<`, doesn't return self!)
294
- #
295
- # location:: the location for the added page(s). Could be any number. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining). if the location is beyond bounds, the pages will be added to the end of the PDF object (or at the begining, if the out of bounds was a negative number).
296
- # data:: a PDF page, a PDF file (CombinePDF.new "filname.pdf") or an array of pages (CombinePDF.new("filname.pdf").pages[0..3]).
297
- def insert(location, data)
298
- pages_to_add = nil
299
- if data.is_a? PDF
300
- @version = [@version, data.version].max
301
- pages_to_add = data.pages
302
- actual_value(@names).update actual_value(data.names_object), &self.class.method(:hash_merge_new_no_page)
303
- if actual_value(@forms_data)
304
- actual_value(@forms_data).update actual_value(data.forms_data), &self.class.method(:hash_merge_new_no_page) if data.forms_data
305
- else
306
- @forms_data = data.forms_data
307
- end
308
- warn "Form data might be lost when combining PDF forms (possible conflicts)." unless data.forms_data.nil? || data.forms_data.empty?
309
- elsif data.is_a?(Array) && (data.select {|o| !(o.is_a?(Hash) && o[:Type] == :Page) } ).empty?
310
- pages_to_add = data
311
- elsif data.is_a?(Hash) && data[:Type] == :Page
312
- pages_to_add = [data]
313
- else
314
- warn "Shouldn't add objects to the file unless they are PDF objects or PDF pages (an Array or a single PDF page)."
315
- return false # return false, which will also stop any chaining.
316
- end
317
- # pages_to_add.map! {|page| page.copy }
318
- catalog = rebuild_catalog
319
- pages_array = catalog[:Pages][:referenced_object][:Kids]
320
- page_count = pages_array.length
321
- if location < 0 && (page_count + location < 0 )
322
- location = 0
323
- elsif location > 0 && (location > page_count)
324
- location = page_count
325
- end
326
- pages_array.insert location, pages_to_add
327
- pages_array.flatten!
328
- self
329
- end
330
-
331
- # removes a PDF page from the file and the catalog
332
- #
333
- # returns the removed page.
334
- #
335
- # returns nil if failed or if out of bounds.
336
- #
337
- # page_index:: the page's index in the zero (0) based page array. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining).
338
- def remove(page_index)
339
- catalog = rebuild_catalog
340
- pages_array = catalog[:Pages][:referenced_object][:Kids]
341
- removed_page = pages_array.delete_at page_index
342
- catalog[:Pages][:referenced_object][:Count] = pages_array.length
343
- removed_page
344
- end
345
-
346
-
347
- # add page numbers to the PDF
348
- #
349
- # For unicode text, a unicode font(s) must first be registered. the registered font(s) must supply the
350
- # subset of characters used in the text. UNICODE IS AN ISSUE WITH THE PDF FORMAT - USE CAUSION.
351
- #
352
- # options:: a Hash of options setting the behavior and format of the page numbers:
353
- # - :number_format a string representing the format for page number. defaults to ' - %s - ' (allows for letter numbering as well, such as "a", "b"...).
354
- # - :location an Array containing the location for the page numbers, can be :top, :buttom, :top_left, :top_right, :bottom_left, :bottom_right or :center (:center == full page). defaults to [:top, :buttom].
355
- # - :start_at a Fixnum that sets the number for first page number. also accepts a letter ("a") for letter numbering. defaults to 1.
356
- # - :margin_from_height a number (PDF points) for the top and buttom margins. defaults to 45.
357
- # - :margin_from_side a number (PDF points) for the left and right margins. defaults to 15.
358
- # - :page_range a range of pages to be numbered (i.e. (2..-1) ) defaults to all the pages (nil). Remember to set the :start_at to the correct value.
359
- # the options Hash can also take all the options for {Page_Methods#textbox}.
360
- # defaults to font: :Helvetica, font_size: 12 and no box (:border_width => 0, :box_color => nil).
361
- def number_pages(options = {})
362
- opt = {
363
- number_format: ' - %s - ',
364
- start_at: 1,
365
- font: :Helvetica,
366
- margin_from_height: 45,
367
- margin_from_side: 15
368
- }
369
- opt.update options
370
- opt[:location] ||= opt[:number_location] ||= opt[:stamp_location] ||= [:top, :bottom]
371
- opt[:location] = [opt[:location]] unless opt[:location].is_a? (Array)
372
-
373
- page_number = opt[:start_at]
374
- format_repeater = opt[:number_format].count('%')
375
- just_center = [:center]
376
- small_font_size = opt[:font_size] || 12
377
-
378
- # some common computations can be done only once.
379
- from_height = opt[:margin_from_height]
380
- from_side = opt[:margin_from_side]
381
- left_position = from_side
382
-
383
- (opt[:page_range] ? pages[opt[:page_range]] : pages).each do |page|
384
- # Get page dimensions
385
- mediabox = page[:CropBox] || page[:MediaBox] || [0, 0, 595.3, 841.9]
386
- # set stamp text
387
- text = opt[:number_format] % (Array.new(format_repeater) {page_number})
388
- if opt[:location].include? :center
389
- add_opt = {}
390
- if opt[:margin_from_height] && !opt[:height] && !opt[:y]
391
- add_opt[:height] = mediabox[3] - mediabox[1] - (2*opt[:margin_from_height].to_f)
392
- add_opt[:y] = opt[:margin_from_height]
393
- end
394
- if opt[:margin_from_side] && !opt[:width] && !opt[:x]
395
- add_opt[:width] = mediabox[2] - mediabox[0] - (2*opt[:margin_from_side].to_f)
396
- add_opt[:x] = opt[:margin_from_side]
397
- end
398
- page.textbox text, opt.merge(add_opt)
399
- end
400
- unless opt[:location] == just_center
401
- add_opt = { font_size: small_font_size }.merge(opt)
402
- # text = opt[:number_format] % page_number
403
- # compute locations for text boxes
404
- text_dimantions = Fonts.dimensions_of( text, opt[:font], small_font_size )
405
- box_width = text_dimantions[0] * 1.2
406
- box_height = text_dimantions[1] * 2
407
- page_width = mediabox[2]
408
- page_height = mediabox[3]
409
-
410
- add_opt[:width] ||= box_width
411
- add_opt[:height] ||= box_height
412
-
413
- center_position = (page_width - box_width)/2
414
- right_position = page_width - from_side - box_width
415
- top_position = page_height - from_height
416
- bottom_position = from_height + box_height
417
-
418
- if opt[:location].include? :top
419
- page.textbox text, {x: center_position, y: top_position }.merge(add_opt)
420
- end
421
- if opt[:location].include? :bottom
422
- page.textbox text, {x: center_position, y: bottom_position }.merge(add_opt)
423
- end
424
- if opt[:location].include? :top_left
425
- page.textbox text, {x: left_position, y: top_position, font_size: small_font_size }.merge(add_opt)
426
- end
427
- if opt[:location].include? :bottom_left
428
- page.textbox text, {x: left_position, y: bottom_position, font_size: small_font_size }.merge(add_opt)
429
- end
430
- if opt[:location].include? :top_right
431
- page.textbox text, {x: right_position, y: top_position, font_size: small_font_size }.merge(add_opt)
432
- end
433
- if opt[:location].include? :bottom_right
434
- page.textbox text, {x: right_position, y: bottom_position, font_size: small_font_size }.merge(add_opt)
435
- end
436
- end
437
- page_number = page_number.succ
438
- end
439
-
440
- end
441
- # This method stamps all (or some) of the pages is the PDF with the requested stamp.
442
- #
443
- # The method accept:
444
- # stamp:: either a String or a PDF page. If this is a String, you can add formating to add page numbering (i.e. "page number %i"). otherwise remember to escape any percent ('%') sign (i.e. "page \%number not shown\%").
445
- # options:: an options Hash.
446
- #
447
- # If the stamp is a PDF page, only :page_range and :underlay (to reverse-stamp) are valid options.
448
- #
449
- # If the stamp is a String, than all the options used by {#number_pages} or {Page_Methods#textbox} can be used.
450
- #
451
- # The default :location option is :center = meaning the stamp will be stamped all across the page unless the :x, :y, :width or :height options are specified.
452
- def stamp_pages stamp, options = {}
453
- case stamp
454
- when String
455
- options[:location] ||= [:center]
456
- number_pages({number_format: stamp}.merge(options))
457
- when Page_Methods
458
- # stamp = stamp.copy(true)
459
- if options[:underlay]
460
- (options[:page_range] ? pages[options[:page_range]] : pages).each {|p| p >> stamp}
461
- else
462
- (options[:page_range] ? pages[options[:page_range]] : pages).each {|p| p << stamp}
463
- end
464
- else
465
- raise TypeError, "expecting a String or a PDF page as the stamp."
466
- end
467
- end
468
-
469
- end
470
-
9
+ # PDF class is the PDF object that can save itself to
10
+ # a file and that can be used as a container for a full
11
+ # PDF file data, including version, information etc'.
12
+ #
13
+ # PDF objects can be used to combine or to inject data.
14
+ # == Combine/Merge PDF files or Pages
15
+ # To combine PDF files (or data):
16
+ # pdf = CombinePDF.new
17
+ # pdf << CombinePDF.load("file1.pdf") # one way to combine, very fast.
18
+ # pdf << CombinePDF.load("file2.pdf")
19
+ # pdf.save "combined.pdf"
20
+ # or even a one liner:
21
+ # (CombinePDF.load("file1.pdf") << CombinePDF.load("file2.pdf") << CombinePDF.load("file3.pdf")).save("combined.pdf")
22
+ # you can also add just odd or even pages:
23
+ # pdf = CombinePDF.new
24
+ # i = 0
25
+ # CombinePDF.load("file.pdf").pages.each do |page|
26
+ # i += 1
27
+ # pdf << page if i.even?
28
+ # end
29
+ # pdf.save "even_pages.pdf"
30
+ # notice that adding all the pages one by one is slower then adding the whole file.
31
+ # == Add content to existing pages (Stamp / Watermark)
32
+ # To add content to existing PDF pages, first import the new content from an existing PDF file.
33
+ # after that, add the content to each of the pages in your existing PDF.
34
+ #
35
+ # in this example, we will add a company logo to each page:
36
+ # company_logo = CombinePDF.load("company_logo.pdf").pages[0]
37
+ # pdf = CombinePDF.load "content_file.pdf"
38
+ # pdf.pages.each {|page| page << company_logo} # notice the << operator is on a page and not a PDF object.
39
+ # pdf.save "content_with_logo.pdf"
40
+ # Notice the << operator is on a page and not a PDF object. The << operator acts differently on PDF objects and on Pages.
41
+ #
42
+ # The << operator defaults to secure injection by renaming references to avoid conflics. For overlaying pages using compressed data that might not be editable (due to limited filter support), you can use:
43
+ # pdf.pages(nil, false).each {|page| page << stamp_page}
44
+ #
45
+ # == Page Numbering
46
+ # adding page numbers to a PDF object or file is as simple as can be:
47
+ # pdf = CombinePDF.load "file_to_number.pdf"
48
+ # pdf.number_pages
49
+ # pdf.save "file_with_numbering.pdf"
50
+ #
51
+ # numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values.
52
+ #
53
+ # == Loading PDF data
54
+ # Loading PDF data can be done from file system or directly from the memory.
55
+ #
56
+ # Loading data from a file is easy:
57
+ # pdf = CombinePDF.load("file.pdf")
58
+ # you can also parse PDF files from memory:
59
+ # pdf_data = IO.read 'file.pdf' # for this demo, load a file to memory
60
+ # pdf = CombinePDF.parse(pdf_data)
61
+ # Loading from the memory is especially effective for importing PDF data recieved through the internet or from a different authoring library such as Prawn.
62
+ class PDF
63
+ # lists the Hash keys used for PDF objects
64
+ #
65
+ # the CombinePDF library doesn't use special classes for its objects (PDFPage class, PDFStream class or anything like that).
66
+ #
67
+ # there is only one PDF class which represents the whole of the PDF file.
68
+ #
69
+ # this Hash lists the private Hash keys that the CombinePDF library uses to
70
+ # differentiate between complex PDF objects.
71
+ PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary, :related_objects].freeze
72
+
73
+ # the objects attribute is an Array containing all the PDF sub-objects for te class.
74
+ attr_reader :objects
75
+ # the info attribute is a Hash that sets the Info data for the PDF.
76
+ # use, for example:
77
+ # pdf.info[:Title] = "title"
78
+ attr_reader :info
79
+ # set/get the PDF version of the file (1.1-1.7) - shuold be type Float.
80
+ attr_accessor :version
81
+ # the viewer_preferences attribute is a Hash that sets the ViewerPreferences data for the PDF.
82
+ # use, for example:
83
+ # pdf.viewer_preferences[:HideMenubar] = true
84
+ attr_reader :viewer_preferences
85
+
86
+ def initialize(parser = nil)
87
+ # default before setting
88
+ @objects = []
89
+ @version = 0
90
+ @viewer_preferences = {}
91
+ @info = {}
92
+ parser ||= PDFParser.new('')
93
+ raise TypeError, "initialization error, expecting CombinePDF::PDFParser or nil, but got #{parser.class.name}" unless parser.is_a? PDFParser
94
+ @objects = parser.parse
95
+ # remove any existing id's
96
+ remove_old_ids
97
+ # set data from parser
98
+ @version = parser.version if parser.version.is_a? Float
99
+ @info = parser.info_object || {}
100
+ @names = parser.names_object || {}
101
+ @forms_data = parser.forms_object || {}
102
+ @outlines = parser.outlines_object || {}
103
+
104
+ # general globals
105
+ @set_start_id = 1
106
+ @info[:Producer] = "Ruby CombinePDF #{CombinePDF::VERSION} Library"
107
+ @info.delete :CreationDate
108
+ @info.delete :ModDate
109
+ end
110
+
111
+ # adds a new page to the end of the PDF object.
112
+ #
113
+ # returns the new page object.
114
+ #
115
+ # unless the media box is specified, it defaults to US Letter: [0, 0, 612.0, 792.0]
116
+ def new_page(mediabox = [0, 0, 612.0, 792.0], _location = -1)
117
+ p = PDFWriter.new(mediabox)
118
+ insert(-1, p)
119
+ p
120
+ end
121
+
122
+ # get the title for the pdf
123
+ # The title is stored in the information dictionary and isn't required
124
+ def title
125
+ @info[:Title]
126
+ end
127
+
128
+ # set the title for the pdf
129
+ # The title is stored in the information dictionary and isn't required
130
+ # new_title:: a string that is the new author value.
131
+ def title=(new_title = nil)
132
+ @info[:Title] = new_title
133
+ end
134
+
135
+ # get the author value for the pdf.
136
+ # The author is stored in the information dictionary and isn't required
137
+ def author
138
+ @info[:Author]
139
+ end
140
+
141
+ # set the author value for the pdf.
142
+ # The author is stored in the information dictionary and isn't required
143
+ #
144
+ # new_title:: a string that is the new author value.
145
+ def author=(new_author = nil)
146
+ @info[:Author] = new_author
147
+ end
148
+
149
+ # Clears any existing form data.
150
+ def clear_forms_data
151
+ @forms_data.nil? || @forms_data.clear
152
+ end
153
+
154
+ # Save the PDF to file.
155
+ #
156
+ # file_name:: is a string or path object for the output.
157
+ #
158
+ # **Notice!** if the file exists, it **WILL** be overwritten.
159
+ def save(file_name, options = {})
160
+ IO.binwrite file_name, to_pdf(options)
161
+ end
162
+
163
+ # Formats the data to PDF formats and returns a binary string that represents the PDF file content.
164
+ #
165
+ # This method is used by the save(file_name) method to save the content to a file.
166
+ #
167
+ # use this to export the PDF file without saving to disk (such as sending through HTTP ect').
168
+ def to_pdf(options = {})
169
+ # reset version if not specified
170
+ @version = 1.5 if @version.to_f == 0.0
171
+ # set info for merged file
172
+ @info[:ModDate] = @info[:CreationDate] = Time.now.strftime "D:%Y%m%d%H%M%S%:::z'00"
173
+ @info[:Subject] = options[:subject] if options[:subject]
174
+ @info[:Producer] = options[:producer] if options[:producer]
175
+ # rebuild_catalog
176
+ catalog = rebuild_catalog_and_objects
177
+ # add ID and generation numbers to objects
178
+ renumber_object_ids
179
+
180
+ out = []
181
+ xref = []
182
+ indirect_object_count = 1 # the first object is the null object
183
+ # write head (version and binanry-code)
184
+ out << "%PDF-#{@version}\n%\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
185
+
186
+ # collect objects and set xref table locations
187
+ loc = 0
188
+ out.each { |line| loc += line.bytesize + 1 }
189
+ @objects.each do |o|
190
+ indirect_object_count += 1
191
+ xref << loc
192
+ out << object_to_pdf(o)
193
+ loc += out.last.bytesize + 1
194
+ end
195
+ xref_location = loc
196
+ # xref_location = 0
197
+ # out.each { |line| xref_location += line.bytesize + 1}
198
+ out << "xref\n0 #{indirect_object_count}\n0000000000 65535 f \n"
199
+ xref.each { |offset| out << (out.pop + ("%010d 00000 n \n" % offset)) }
200
+ out << out.pop + 'trailer'
201
+ out << "<<\n/Root #{false || "#{catalog[:indirect_reference_id]} #{catalog[:indirect_generation_number]} R"}"
202
+ out << "/Size #{indirect_object_count}"
203
+ out << "/Info #{@info[:indirect_reference_id]} #{@info[:indirect_generation_number]} R"
204
+ out << ">>\nstartxref\n#{xref_location}\n%%EOF"
205
+ # when finished, remove the numbering system and keep only pointers
206
+ remove_old_ids
207
+ # output the pdf stream
208
+ out.join("\n").force_encoding(Encoding::ASCII_8BIT)
209
+ end
210
+
211
+ # this method returns all the pages cataloged in the catalog.
212
+ #
213
+ # if no catalog is passed, it seeks the existing catalog(s) and searches
214
+ # for any registered Page objects.
215
+ #
216
+ # Page objects are Hash class objects. the page methods are added using a mixin or inheritance.
217
+ #
218
+ # catalogs:: a catalog, or an Array of catalog objects. defaults to the existing catalog.
219
+ def pages(catalogs = nil)
220
+ page_list = []
221
+ catalogs ||= get_existing_catalogs
222
+
223
+ if catalogs.is_a?(Array)
224
+ catalogs.each { |c| page_list.concat pages(c) unless c.nil? }
225
+ elsif catalogs.is_a?(Hash)
226
+ if catalogs[:is_reference_only]
227
+ if catalogs[:referenced_object]
228
+ page_list.concat pages(catalogs[:referenced_object])
229
+ else
230
+ warn "couldn't follow reference!!! #{catalogs} not found!"
231
+ end
232
+ else
233
+ case catalogs[:Type]
234
+ when :Page
235
+ page_list << catalogs
236
+ when :Pages
237
+ page_list.concat pages(catalogs[:Kids]) unless catalogs[:Kids].nil?
238
+ when :Catalog
239
+ page_list.concat pages(catalogs[:Pages]) unless catalogs[:Pages].nil?
240
+ end
241
+ end
242
+ end
243
+ page_list
244
+ end
245
+
246
+ # returns an array with the different fonts used in the file.
247
+ #
248
+ # Type0 font objects ( "font[:Subtype] == :Type0" ) can be registered with the font library
249
+ # for use in PDFWriter objects (font numbering / table creation etc').
250
+ # @param limit_to_type0 [true,false] limits the list to type0 fonts.
251
+ def fonts(limit_to_type0 = false)
252
+ fonts_array = []
253
+ pages.each do |p|
254
+ p[:Resources][:Font].values.each do |f|
255
+ f = f[:referenced_object] if f[:referenced_object]
256
+ if (limit_to_type0 || f[:Subtype] = :Type0) && f[:Type] == :Font && !fonts_array.include?(f)
257
+ fonts_array << f
258
+ end
259
+ end
260
+ end
261
+ fonts_array
262
+ end
263
+
264
+ # add the pages (or file) to the PDF (combine/merge) and RETURNS SELF, for nesting.
265
+ # for example:
266
+ #
267
+ # pdf = CombinePDF.new "first_file.pdf"
268
+ #
269
+ # pdf << CombinePDF.new "second_file.pdf"
270
+ #
271
+ # pdf.save "both_files_merged.pdf"
272
+ # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
273
+ def <<(data)
274
+ insert -1, data
275
+ end
276
+
277
+ # add the pages (or file) to the BEGINNING of the PDF (combine/merge) and RETURNS SELF for nesting operators.
278
+ # for example:
279
+ #
280
+ # pdf = CombinePDF.new "second_file.pdf"
281
+ #
282
+ # pdf >> CombinePDF.new "first_file.pdf"
283
+ #
284
+ # pdf.save "both_files_merged.pdf"
285
+ # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
286
+ def >>(data)
287
+ insert 0, data
288
+ end
289
+
290
+ # add PDF pages (or PDF files) into a specific location.
291
+ #
292
+ # returns the new pages Array! (unlike `#<<`, doesn't return self!)
293
+ #
294
+ # location:: the location for the added page(s). Could be any number. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining). if the location is beyond bounds, the pages will be added to the end of the PDF object (or at the begining, if the out of bounds was a negative number).
295
+ # data:: a PDF page, a PDF file (CombinePDF.new "filname.pdf") or an array of pages (CombinePDF.new("filname.pdf").pages[0..3]).
296
+ def insert(location, data)
297
+ pages_to_add = nil
298
+ if data.is_a? PDF
299
+ @version = [@version, data.version].max
300
+ pages_to_add = data.pages
301
+ actual_value(@names ||= {}.dup).update actual_value(data.names_object), &self.class.method(:hash_merge_new_no_page)
302
+ merge_outlines((@outlines ||= {}.dup), data.outlines_object, location) unless actual_value(data.outlines_object).empty?
303
+ if actual_value(@forms_data)
304
+ actual_value(@forms_data).update actual_value(data.forms_data), &self.class.method(:hash_merge_new_no_page) if data.forms_data
305
+ else
306
+ @forms_data = data.forms_data
307
+ end
308
+ warn 'Form data might be lost when combining PDF forms (possible conflicts).' unless data.forms_data.nil? || data.forms_data.empty?
309
+ elsif data.is_a?(Array) && (data.select { |o| !(o.is_a?(Hash) && o[:Type] == :Page) }).empty?
310
+ pages_to_add = data
311
+ elsif data.is_a?(Hash) && data[:Type] == :Page
312
+ pages_to_add = [data]
313
+ else
314
+ warn "Shouldn't add objects to the file unless they are PDF objects or PDF pages (an Array or a single PDF page)."
315
+ return false # return false, which will also stop any chaining.
316
+ end
317
+ # pages_to_add.map! {|page| page.copy }
318
+ catalog = rebuild_catalog
319
+ pages_array = catalog[:Pages][:referenced_object][:Kids]
320
+ page_count = pages_array.length
321
+ if location < 0 && (page_count + location < 0)
322
+ location = 0
323
+ elsif location > 0 && (location > page_count)
324
+ location = page_count
325
+ end
326
+ pages_array.insert location, pages_to_add
327
+ pages_array.flatten!
328
+ self
329
+ end
330
+
331
+ # removes a PDF page from the file and the catalog
332
+ #
333
+ # returns the removed page.
334
+ #
335
+ # returns nil if failed or if out of bounds.
336
+ #
337
+ # page_index:: the page's index in the zero (0) based page array. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining).
338
+ def remove(page_index)
339
+ catalog = rebuild_catalog
340
+ pages_array = catalog[:Pages][:referenced_object][:Kids]
341
+ removed_page = pages_array.delete_at page_index
342
+ catalog[:Pages][:referenced_object][:Count] = pages_array.length
343
+ removed_page
344
+ end
345
+
346
+ # add page numbers to the PDF
347
+ #
348
+ # For unicode text, a unicode font(s) must first be registered. the registered font(s) must supply the
349
+ # subset of characters used in the text. UNICODE IS AN ISSUE WITH THE PDF FORMAT - USE CAUSION.
350
+ #
351
+ # options:: a Hash of options setting the behavior and format of the page numbers:
352
+ # - :number_format a string representing the format for page number. defaults to ' - %s - ' (allows for letter numbering as well, such as "a", "b"...).
353
+ # - :location an Array containing the location for the page numbers, can be :top, :buttom, :top_left, :top_right, :bottom_left, :bottom_right or :center (:center == full page). defaults to [:top, :buttom].
354
+ # - :start_at a Fixnum that sets the number for first page number. also accepts a letter ("a") for letter numbering. defaults to 1.
355
+ # - :margin_from_height a number (PDF points) for the top and buttom margins. defaults to 45.
356
+ # - :margin_from_side a number (PDF points) for the left and right margins. defaults to 15.
357
+ # - :page_range a range of pages to be numbered (i.e. (2..-1) ) defaults to all the pages (nil). Remember to set the :start_at to the correct value.
358
+ # the options Hash can also take all the options for {Page_Methods#textbox}.
359
+ # defaults to font: :Helvetica, font_size: 12 and no box (:border_width => 0, :box_color => nil).
360
+ def number_pages(options = {})
361
+ opt = {
362
+ number_format: ' - %s - ',
363
+ start_at: 1,
364
+ font: :Helvetica,
365
+ margin_from_height: 45,
366
+ margin_from_side: 15
367
+ }
368
+ opt.update options
369
+ opt[:location] ||= opt[:number_location] ||= opt[:stamp_location] ||= [:top, :bottom]
370
+ opt[:location] = [opt[:location]] unless opt[:location].is_a? Array
371
+
372
+ page_number = opt[:start_at]
373
+ format_repeater = opt[:number_format].count('%')
374
+ just_center = [:center]
375
+ small_font_size = opt[:font_size] || 12
376
+
377
+ # some common computations can be done only once.
378
+ from_height = opt[:margin_from_height]
379
+ from_side = opt[:margin_from_side]
380
+ left_position = from_side
381
+
382
+ (opt[:page_range] ? pages[opt[:page_range]] : pages).each do |page|
383
+ # Get page dimensions
384
+ mediabox = page[:CropBox] || page[:MediaBox] || [0, 0, 595.3, 841.9]
385
+ # set stamp text
386
+ text = opt[:number_format] % (Array.new(format_repeater) { page_number })
387
+ if opt[:location].include? :center
388
+ add_opt = {}
389
+ if opt[:margin_from_height] && !opt[:height] && !opt[:y]
390
+ add_opt[:height] = mediabox[3] - mediabox[1] - (2 * opt[:margin_from_height].to_f)
391
+ add_opt[:y] = opt[:margin_from_height]
392
+ end
393
+ if opt[:margin_from_side] && !opt[:width] && !opt[:x]
394
+ add_opt[:width] = mediabox[2] - mediabox[0] - (2 * opt[:margin_from_side].to_f)
395
+ add_opt[:x] = opt[:margin_from_side]
396
+ end
397
+ page.textbox text, opt.merge(add_opt)
398
+ end
399
+ unless opt[:location] == just_center
400
+ add_opt = { font_size: small_font_size }.merge(opt)
401
+ # text = opt[:number_format] % page_number
402
+ # compute locations for text boxes
403
+ text_dimantions = Fonts.dimensions_of(text, opt[:font], small_font_size)
404
+ box_width = text_dimantions[0] * 1.2
405
+ box_height = text_dimantions[1] * 2
406
+ page_width = mediabox[2]
407
+ page_height = mediabox[3]
408
+
409
+ add_opt[:width] ||= box_width
410
+ add_opt[:height] ||= box_height
411
+
412
+ center_position = (page_width - box_width) / 2
413
+ right_position = page_width - from_side - box_width
414
+ top_position = page_height - from_height
415
+ bottom_position = from_height + box_height
416
+
417
+ if opt[:location].include? :top
418
+ page.textbox text, { x: center_position, y: top_position }.merge(add_opt)
419
+ end
420
+ if opt[:location].include? :bottom
421
+ page.textbox text, { x: center_position, y: bottom_position }.merge(add_opt)
422
+ end
423
+ if opt[:location].include? :top_left
424
+ page.textbox text, { x: left_position, y: top_position, font_size: small_font_size }.merge(add_opt)
425
+ end
426
+ if opt[:location].include? :bottom_left
427
+ page.textbox text, { x: left_position, y: bottom_position, font_size: small_font_size }.merge(add_opt)
428
+ end
429
+ if opt[:location].include? :top_right
430
+ page.textbox text, { x: right_position, y: top_position, font_size: small_font_size }.merge(add_opt)
431
+ end
432
+ if opt[:location].include? :bottom_right
433
+ page.textbox text, { x: right_position, y: bottom_position, font_size: small_font_size }.merge(add_opt)
434
+ end
435
+ end
436
+ page_number = page_number.succ
437
+ end
438
+ end
439
+
440
+ # This method stamps all (or some) of the pages is the PDF with the requested stamp.
441
+ #
442
+ # The method accept:
443
+ # stamp:: either a String or a PDF page. If this is a String, you can add formating to add page numbering (i.e. "page number %i"). otherwise remember to escape any percent ('%') sign (i.e. "page \%number not shown\%").
444
+ # options:: an options Hash.
445
+ #
446
+ # If the stamp is a PDF page, only :page_range and :underlay (to reverse-stamp) are valid options.
447
+ #
448
+ # If the stamp is a String, than all the options used by {#number_pages} or {Page_Methods#textbox} can be used.
449
+ #
450
+ # The default :location option is :center = meaning the stamp will be stamped all across the page unless the :x, :y, :width or :height options are specified.
451
+ def stamp_pages(stamp, options = {})
452
+ case stamp
453
+ when String
454
+ options[:location] ||= [:center]
455
+ number_pages({ number_format: stamp }.merge(options))
456
+ when Page_Methods
457
+ # stamp = stamp.copy(true)
458
+ if options[:underlay]
459
+ (options[:page_range] ? pages[options[:page_range]] : pages).each { |p| p >> stamp }
460
+ else
461
+ (options[:page_range] ? pages[options[:page_range]] : pages).each { |p| p << stamp }
462
+ end
463
+ else
464
+ raise TypeError, 'expecting a String or a PDF page as the stamp.'
465
+ end
466
+ end
467
+
468
+ # the form_data attribute is a Hash that corresponds to the PDF form data (if any).
469
+ attr_reader :forms_data
470
+
471
+ # # experimental, allows form data updates. This function lists the form data fields.
472
+ # def form_list
473
+ # if(@forms_data && actual_value(@forms_data).is_a?(Hash) && actual_value(@forms_data)[:Fields])
474
+ # actual_value(actual_value(@forms_data)[:Fields]).map {|f| actual_value(f)[:Kids] ? ({actual_value(f)[:T] => actual_value(f)[:Kids].map {|k| actual_value(k)[:T]}}) : actual_value(f)[:T]}
475
+ # else
476
+ # nil
477
+ # end
478
+ # end
479
+ # # experimental, allows form data updates. This function gets the value of a specific form data field.
480
+ # def form_get name
481
+ # if(@forms_data && actual_value(@forms_data).is_a?(Hash) && actual_value(@forms_data)[:Fields])
482
+ # actual_value(actual_value(@forms_data)[:Fields]).map {|f| return actual_value(f)[:Kids] ? ({actual_value(f)[:V] => actual_value(f)[:Kids].map {|k| actual_value(k)[:V]}}) : actual_value(f)[:V] if actual_value(f)[:T] == name}
483
+ # # actual_value(actual_value(@forms_data)[:Fields]).each {|f| return actual_value(f)[:V] if actual_value(f)[:T] == name}
484
+ # else
485
+ # nil
486
+ # end
487
+ # nil
488
+ # end
489
+ # # experimental, allows form data updates. This function sets the value of a specific form data field.
490
+ # def form_set name, value
491
+ # if(@forms_data && actual_value(@forms_data).is_a?(Hash) && actual_value(@forms_data)[:Fields])
492
+ # actual_value(actual_value(@forms_data)[:Fields]).each {|f| return actual_value(f)[:V]=value if actual_value(f)[:T] == name}
493
+ # else
494
+ # nil
495
+ # end
496
+ # nil
497
+ # end
498
+ end
471
499
  end