combine_pdf 0.2.5 → 0.2.37

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,459 +5,497 @@
5
5
  ## is subject to the same license.
6
6
  ########################################################
7
7
 
8
-
9
-
10
-
11
-
12
-
13
8
  module CombinePDF
14
-
15
- # PDF class is the PDF object that can save itself to
16
- # a file and that can be used as a container for a full
17
- # PDF file data, including version, information etc'.
18
- #
19
- # PDF objects can be used to combine or to inject data.
20
- # == Combine/Merge PDF files or Pages
21
- # To combine PDF files (or data):
22
- # pdf = CombinePDF.new
23
- # pdf << CombinePDF.load("file1.pdf") # one way to combine, very fast.
24
- # pdf << CombinePDF.load("file2.pdf")
25
- # pdf.save "combined.pdf"
26
- # or even a one liner:
27
- # (CombinePDF.load("file1.pdf") << CombinePDF.load("file2.pdf") << CombinePDF.load("file3.pdf")).save("combined.pdf")
28
- # you can also add just odd or even pages:
29
- # pdf = CombinePDF.new
30
- # i = 0
31
- # CombinePDF.load("file.pdf").pages.each do |page|
32
- # i += 1
33
- # pdf << page if i.even?
34
- # end
35
- # pdf.save "even_pages.pdf"
36
- # notice that adding all the pages one by one is slower then adding the whole file.
37
- # == Add content to existing pages (Stamp / Watermark)
38
- # To add content to existing PDF pages, first import the new content from an existing PDF file.
39
- # after that, add the content to each of the pages in your existing PDF.
40
- #
41
- # in this example, we will add a company logo to each page:
42
- # company_logo = CombinePDF.load("company_logo.pdf").pages[0]
43
- # pdf = CombinePDF.load "content_file.pdf"
44
- # pdf.pages.each {|page| page << company_logo} # notice the << operator is on a page and not a PDF object.
45
- # pdf.save "content_with_logo.pdf"
46
- # Notice the << operator is on a page and not a PDF object. The << operator acts differently on PDF objects and on Pages.
47
- #
48
- # The << operator defaults to secure injection by renaming references to avoid conflics. For overlaying pages using compressed data that might not be editable (due to limited filter support), you can use:
49
- # pdf.pages(nil, false).each {|page| page << stamp_page}
50
- #
51
- # == Page Numbering
52
- # adding page numbers to a PDF object or file is as simple as can be:
53
- # pdf = CombinePDF.load "file_to_number.pdf"
54
- # pdf.number_pages
55
- # pdf.save "file_with_numbering.pdf"
56
- #
57
- # numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values.
58
- #
59
- # == Loading PDF data
60
- # Loading PDF data can be done from file system or directly from the memory.
61
- #
62
- # Loading data from a file is easy:
63
- # pdf = CombinePDF.load("file.pdf")
64
- # you can also parse PDF files from memory:
65
- # pdf_data = IO.read 'file.pdf' # for this demo, load a file to memory
66
- # pdf = CombinePDF.parse(pdf_data)
67
- # Loading from the memory is especially effective for importing PDF data recieved through the internet or from a different authoring library such as Prawn.
68
- class PDF
69
-
70
- # lists the Hash keys used for PDF objects
71
- #
72
- # the CombinePDF library doesn't use special classes for its objects (PDFPage class, PDFStream class or anything like that).
73
- #
74
- # there is only one PDF class which represents the whole of the PDF file.
75
- #
76
- # this Hash lists the private Hash keys that the CombinePDF library uses to
77
- # differentiate between complex PDF objects.
78
- PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary]
79
-
80
- # the objects attribute is an Array containing all the PDF sub-objects for te class.
81
- attr_reader :objects
82
- # the info attribute is a Hash that sets the Info data for the PDF.
83
- # use, for example:
84
- # pdf.info[:Title] = "title"
85
- attr_reader :info
86
- # set/get the PDF version of the file (1.1-1.7) - shuold be type Float.
87
- attr_accessor :version
88
- # the viewer_preferences attribute is a Hash that sets the ViewerPreferences data for the PDF.
89
- # use, for example:
90
- # pdf.viewer_preferences[:HideMenubar] = true
91
- attr_reader :viewer_preferences
92
-
93
- def initialize (parser = nil)
94
- # default before setting
95
- @objects = []
96
- @version = 0
97
- @viewer_preferences, @info = {}, {}
98
- parser ||= PDFParser.new("")
99
- raise TypeError, "initialization error, expecting CombinePDF::PDFParser or nil, but got #{parser.class.name}" unless parser.is_a? PDFParser
100
- @objects = parser.parse
101
- # remove any existing id's
102
- remove_old_ids
103
- # set data from parser
104
- @version = parser.version if parser.version.is_a? Float
105
- @info = parser.info_object || {}
106
-
107
- # general globals
108
- @set_start_id = 1
109
- @info[:Producer] = "Ruby CombinePDF #{CombinePDF::VERSION} Library"
110
- @info.delete :CreationDate
111
- @info.delete :ModDate
112
- end
113
-
114
- # adds a new page to the end of the PDF object.
115
- #
116
- # returns the new page object.
117
- #
118
- # unless the media box is specified, it defaults to US Letter: [0, 0, 612.0, 792.0]
119
- def new_page(mediabox = [0, 0, 612.0, 792.0], location = -1)
120
- p = PDFWriter.new(mediabox)
121
- insert(-1, p )
122
- p
123
- end
124
-
125
- # get the title for the pdf
126
- # The title is stored in the information dictionary and isn't required
127
- def title
128
- return @info[:Title]
129
- end
130
- # set the title for the pdf
131
- # The title is stored in the information dictionary and isn't required
132
- # new_title:: a string that is the new author value.
133
- def title=(new_title = nil)
134
- @info[:Title] = new_title
135
- end
136
- # get the author value for the pdf.
137
- # The author is stored in the information dictionary and isn't required
138
- def author
139
- return @info[:Author]
140
- end
141
- # set the author value for the pdf.
142
- # The author is stored in the information dictionary and isn't required
143
- #
144
- # new_title:: a string that is the new author value.
145
- def author=(new_author = nil)
146
- @info[:Author] = new_author
147
- end
148
-
149
- # Save the PDF to file.
150
- #
151
- # file_name:: is a string or path object for the output.
152
- #
153
- # **Notice!** if the file exists, it **WILL** be overwritten.
154
- def save(file_name, options = {})
155
- IO.binwrite file_name, to_pdf(options)
156
- end
157
-
158
- # Formats the data to PDF formats and returns a binary string that represents the PDF file content.
159
- #
160
- # This method is used by the save(file_name) method to save the content to a file.
161
- #
162
- # use this to export the PDF file without saving to disk (such as sending through HTTP ect').
163
- def to_pdf options = {}
164
- #reset version if not specified
165
- @version = 1.5 if @version.to_f == 0.0
166
- #set creation date for merged file
167
- @info[:CreationDate] = Time.now.strftime "D:%Y%m%d%H%M%S%:::z'00"
168
- #rebuild_catalog
169
- catalog = rebuild_catalog_and_objects
170
- # add ID and generation numbers to objects
171
- renumber_object_ids
172
-
173
- out = []
174
- xref = []
175
- indirect_object_count = 1 #the first object is the null object
176
- #write head (version and binanry-code)
177
- out << "%PDF-#{@version.to_s}\n%\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
178
-
179
- #collect objects and set xref table locations
180
- loc = 0
181
- out.each {|line| loc += line.bytesize + 1}
182
- @objects.each do |o|
183
- indirect_object_count += 1
184
- xref << loc
185
- out << object_to_pdf(o)
186
- loc += out.last.bytesize + 1
187
- end
188
- xref_location = loc
189
- # xref_location = 0
190
- # out.each { |line| xref_location += line.bytesize + 1}
191
- out << "xref\n0 #{(indirect_object_count).to_s}\n0000000000 65535 f \n"
192
- xref.each {|offset| out << ( out.pop + ("%010d 00000 n \n" % offset) ) }
193
- out << out.pop + "trailer"
194
- out << "<<\n/Root #{false || "#{catalog[:indirect_reference_id]} #{catalog[:indirect_generation_number]} R"}"
195
- out << "/Size #{indirect_object_count.to_s}"
196
- if @info.is_a?(Hash)
197
- PRIVATE_HASH_KEYS.each {|key| @info.delete key} # make sure the dictionary is rendered inline, without stream
198
- @info[:CreationDate] = @info[:ModDate] = Time.now.strftime "D:%Y%m%d%H%M%S%:::z'00"
199
- @info[:Subject] = options[:subject] if options[:subject]
200
- @info[:Producer] = options[:producer] if options[:producer]
201
- out << "/Info #{object_to_pdf @info}"
202
- end
203
- out << ">>\nstartxref\n#{xref_location.to_s}\n%%EOF"
204
- # when finished, remove the numbering system and keep only pointers
205
- remove_old_ids
206
- # output the pdf stream
207
- out.join("\n").force_encoding(Encoding::ASCII_8BIT)
208
- end
209
-
210
- # this method returns all the pages cataloged in the catalog.
211
- #
212
- # if no catalog is passed, it seeks the existing catalog(s) and searches
213
- # for any registered Page objects.
214
- #
215
- # Page objects are Hash class objects. the page methods are added using a mixin or inheritance.
216
- #
217
- # catalogs:: a catalog, or an Array of catalog objects. defaults to the existing catalog.
218
- def pages(catalogs = nil)
219
- page_list = []
220
- catalogs ||= get_existing_catalogs
221
-
222
- if catalogs.is_a?(Array)
223
- catalogs.each {|c| page_list.push *( pages(c) ) unless c.nil?}
224
- elsif catalogs.is_a?(Hash)
225
- if catalogs[:is_reference_only]
226
- if catalogs[:referenced_object]
227
- page_list.push *( pages(catalogs[:referenced_object]) )
228
- else
229
- warn "couldn't follow reference!!! #{catalogs} not found!"
230
- end
231
- else
232
- case catalogs[:Type]
233
- when :Page
234
- page_list << catalogs
235
- when :Pages
236
- page_list.push *(pages(catalogs[:Kids])) unless catalogs[:Kids].nil?
237
- when :Catalog
238
- page_list.push *(pages(catalogs[:Pages])) unless catalogs[:Pages].nil?
239
- end
240
- end
241
- end
242
- page_list
243
- end
244
-
245
- # returns an array with the different fonts used in the file.
246
- #
247
- # Type0 font objects ( "font[:Subtype] == :Type0" ) can be registered with the font library
248
- # for use in PDFWriter objects (font numbering / table creation etc').
249
- # @param limit_to_type0 [true,false] limits the list to type0 fonts.
250
- def fonts(limit_to_type0 = false)
251
- fonts_array = []
252
- pages.each do |p|
253
- p[:Resources][:Font].values.each do |f|
254
- f = f[:referenced_object] if f[:referenced_object]
255
- if (limit_to_type0 || f[:Subtype] = :Type0) && f[:Type] == :Font && !fonts_array.include?(f)
256
- fonts_array << f
257
- end
258
- end
259
- end
260
- fonts_array
261
- end
262
-
263
- # add the pages (or file) to the PDF (combine/merge) and RETURNS SELF, for nesting.
264
- # for example:
265
- #
266
- # pdf = CombinePDF.new "first_file.pdf"
267
- #
268
- # pdf << CombinePDF.new "second_file.pdf"
269
- #
270
- # pdf.save "both_files_merged.pdf"
271
- # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
272
- def << (data)
273
- insert -1, data
274
- end
275
-
276
- # add the pages (or file) to the BEGINNING of the PDF (combine/merge) and RETURNS SELF for nesting operators.
277
- # for example:
278
- #
279
- # pdf = CombinePDF.new "second_file.pdf"
280
- #
281
- # pdf >> CombinePDF.new "first_file.pdf"
282
- #
283
- # pdf.save "both_files_merged.pdf"
284
- # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
285
- def >> (data)
286
- insert 0, data
287
- end
288
-
289
- # add PDF pages (or PDF files) into a specific location.
290
- #
291
- # returns the new pages Array! (unlike `#<<`, doesn't return self!)
292
- #
293
- # location:: the location for the added page(s). Could be any number. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining). if the location is beyond bounds, the pages will be added to the end of the PDF object (or at the begining, if the out of bounds was a negative number).
294
- # data:: a PDF page, a PDF file (CombinePDF.new "filname.pdf") or an array of pages (CombinePDF.new("filname.pdf").pages[0..3]).
295
- def insert(location, data)
296
- pages_to_add = nil
297
- if data.is_a? PDF
298
- @version = [@version, data.version].max
299
- pages_to_add = data.pages
300
- elsif data.is_a?(Array) && (data.select {|o| !(o.is_a?(Hash) && o[:Type] == :Page) } ).empty?
301
- pages_to_add = data
302
- elsif data.is_a?(Hash) && data[:Type] == :Page
303
- pages_to_add = [data]
304
- else
305
- warn "Shouldn't add objects to the file unless they are PDF objects or PDF pages (an Array or a single PDF page)."
306
- return false # return false, which will also stop any chaining.
307
- end
308
- # pages_to_add.map! {|page| page.copy }
309
- catalog = rebuild_catalog
310
- pages_array = catalog[:Pages][:referenced_object][:Kids]
311
- page_count = pages_array.length
312
- if location < 0 && (page_count + location < 0 )
313
- location = 0
314
- elsif location > 0 && (location > page_count)
315
- location = page_count
316
- end
317
- pages_array.insert location, pages_to_add
318
- pages_array.flatten!
319
- self
320
- end
321
-
322
- # removes a PDF page from the file and the catalog
323
- #
324
- # returns the removed page.
325
- #
326
- # returns nil if failed or if out of bounds.
327
- #
328
- # page_index:: the page's index in the zero (0) based page array. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining).
329
- def remove(page_index)
330
- catalog = rebuild_catalog
331
- pages_array = catalog[:Pages][:referenced_object][:Kids]
332
- removed_page = pages_array.delete_at page_index
333
- catalog[:Pages][:referenced_object][:Count] = pages_array.length
334
- removed_page
335
- end
336
-
337
-
338
- # add page numbers to the PDF
339
- #
340
- # For unicode text, a unicode font(s) must first be registered. the registered font(s) must supply the
341
- # subset of characters used in the text. UNICODE IS AN ISSUE WITH THE PDF FORMAT - USE CAUSION.
342
- #
343
- # options:: a Hash of options setting the behavior and format of the page numbers:
344
- # - :number_format a string representing the format for page number. defaults to ' - %s - ' (allows for letter numbering as well, such as "a", "b"...).
345
- # - :location an Array containing the location for the page numbers, can be :top, :buttom, :top_left, :top_right, :bottom_left, :bottom_right or :center (:center == full page). defaults to [:top, :buttom].
346
- # - :start_at a Fixnum that sets the number for first page number. also accepts a letter ("a") for letter numbering. defaults to 1.
347
- # - :margin_from_height a number (PDF points) for the top and buttom margins. defaults to 45.
348
- # - :margin_from_side a number (PDF points) for the left and right margins. defaults to 15.
349
- # - :page_range a range of pages to be numbered (i.e. (2..-1) ) defaults to all the pages (nil). Remember to set the :start_at to the correct value.
350
- # the options Hash can also take all the options for {Page_Methods#textbox}.
351
- # defaults to font: :Helvetica, font_size: 12 and no box (:border_width => 0, :box_color => nil).
352
- def number_pages(options = {})
353
- opt = {
354
- number_format: ' - %s - ',
355
- start_at: 1,
356
- font: :Helvetica,
357
- margin_from_height: 45,
358
- margin_from_side: 15
359
- }
360
- opt.update options
361
- opt[:location] ||= opt[:number_location] ||= opt[:stamp_location] ||= [:top, :bottom]
362
- opt[:location] = [opt[:location]] unless opt[:location].is_a? (Array)
363
-
364
- page_number = opt[:start_at]
365
- format_repeater = opt[:number_format].count('%')
366
- just_center = [:center]
367
- small_font_size = opt[:font_size] || 12
368
-
369
- # some common computations can be done only once.
370
- from_height = opt[:margin_from_height]
371
- from_side = opt[:margin_from_side]
372
- left_position = from_side
373
-
374
- (opt[:page_range] ? pages[opt[:page_range]] : pages).each do |page|
375
- # Get page dimensions
376
- mediabox = page[:CropBox] || page[:MediaBox] || [0, 0, 595.3, 841.9]
377
- # set stamp text
378
- text = opt[:number_format] % (Array.new(format_repeater) {page_number})
379
- if opt[:location].include? :center
380
- add_opt = {}
381
- if opt[:margin_from_height] && !opt[:height] && !opt[:y]
382
- add_opt[:height] = mediabox[3] - mediabox[1] - (2*opt[:margin_from_height].to_f)
383
- add_opt[:y] = opt[:margin_from_height]
384
- end
385
- if opt[:margin_from_side] && !opt[:width] && !opt[:x]
386
- add_opt[:width] = mediabox[2] - mediabox[0] - (2*opt[:margin_from_side].to_f)
387
- add_opt[:x] = opt[:margin_from_side]
388
- end
389
- page.textbox text, opt.merge(add_opt)
390
- end
391
- unless opt[:location] == just_center
392
- add_opt = { font_size: small_font_size }.merge(opt)
393
- # text = opt[:number_format] % page_number
394
- # compute locations for text boxes
395
- text_dimantions = Fonts.dimensions_of( text, opt[:font], small_font_size )
396
- box_width = text_dimantions[0] * 1.2
397
- box_height = text_dimantions[1] * 2
398
- page_width = mediabox[2]
399
- page_height = mediabox[3]
400
-
401
- add_opt[:width] ||= box_width
402
- add_opt[:height] ||= box_height
403
-
404
- center_position = (page_width - box_width)/2
405
- right_position = page_width - from_side - box_width
406
- top_position = page_height - from_height
407
- bottom_position = from_height + box_height
408
-
409
- if opt[:location].include? :top
410
- page.textbox text, {x: center_position, y: top_position }.merge(add_opt)
411
- end
412
- if opt[:location].include? :bottom
413
- page.textbox text, {x: center_position, y: bottom_position }.merge(add_opt)
414
- end
415
- if opt[:location].include? :top_left
416
- page.textbox text, {x: left_position, y: top_position, font_size: small_font_size }.merge(add_opt)
417
- end
418
- if opt[:location].include? :bottom_left
419
- page.textbox text, {x: left_position, y: bottom_position, font_size: small_font_size }.merge(add_opt)
420
- end
421
- if opt[:location].include? :top_right
422
- page.textbox text, {x: right_position, y: top_position, font_size: small_font_size }.merge(add_opt)
423
- end
424
- if opt[:location].include? :bottom_right
425
- page.textbox text, {x: right_position, y: bottom_position, font_size: small_font_size }.merge(add_opt)
426
- end
427
- end
428
- page_number = page_number.succ
429
- end
430
-
431
- end
432
- # This method stamps all (or some) of the pages is the PDF with the requested stamp.
433
- #
434
- # The method accept:
435
- # stamp:: either a String or a PDF page. If this is a String, you can add formating to add page numbering (i.e. "page number %i"). otherwise remember to escape any percent ('%') sign (i.e. "page \%number not shown\%").
436
- # options:: an options Hash.
437
- #
438
- # If the stamp is a PDF page, only :page_range and :underlay (to reverse-stamp) are valid options.
439
- #
440
- # If the stamp is a String, than all the options used by {#number_pages} or {Page_Methods#textbox} can be used.
441
- #
442
- # The default :location option is :center = meaning the stamp will be stamped all across the page unless the :x, :y, :width or :height options are specified.
443
- def stamp_pages stamp, options = {}
444
- case stamp
445
- when String
446
- options[:location] ||= [:center]
447
- number_pages({number_format: stamp}.merge(options))
448
- when Page_Methods
449
- # stamp = stamp.copy(true)
450
- if options[:underlay]
451
- (options[:page_range] ? pages[options[:page_range]] : pages).each {|p| p >> stamp}
452
- else
453
- (options[:page_range] ? pages[options[:page_range]] : pages).each {|p| p << stamp}
454
- end
455
- else
456
- raise TypeError, "expecting a String or a PDF page as the stamp."
457
- end
458
- end
459
-
460
- end
461
-
9
+ # PDF class is the PDF object that can save itself to
10
+ # a file and that can be used as a container for a full
11
+ # PDF file data, including version, information etc'.
12
+ #
13
+ # PDF objects can be used to combine or to inject data.
14
+ # == Combine/Merge PDF files or Pages
15
+ # To combine PDF files (or data):
16
+ # pdf = CombinePDF.new
17
+ # pdf << CombinePDF.load("file1.pdf") # one way to combine, very fast.
18
+ # pdf << CombinePDF.load("file2.pdf")
19
+ # pdf.save "combined.pdf"
20
+ # or even a one liner:
21
+ # (CombinePDF.load("file1.pdf") << CombinePDF.load("file2.pdf") << CombinePDF.load("file3.pdf")).save("combined.pdf")
22
+ # you can also add just odd or even pages:
23
+ # pdf = CombinePDF.new
24
+ # i = 0
25
+ # CombinePDF.load("file.pdf").pages.each do |page|
26
+ # i += 1
27
+ # pdf << page if i.even?
28
+ # end
29
+ # pdf.save "even_pages.pdf"
30
+ # notice that adding all the pages one by one is slower then adding the whole file.
31
+ # == Add content to existing pages (Stamp / Watermark)
32
+ # To add content to existing PDF pages, first import the new content from an existing PDF file.
33
+ # after that, add the content to each of the pages in your existing PDF.
34
+ #
35
+ # in this example, we will add a company logo to each page:
36
+ # company_logo = CombinePDF.load("company_logo.pdf").pages[0]
37
+ # pdf = CombinePDF.load "content_file.pdf"
38
+ # pdf.pages.each {|page| page << company_logo} # notice the << operator is on a page and not a PDF object.
39
+ # pdf.save "content_with_logo.pdf"
40
+ # Notice the << operator is on a page and not a PDF object. The << operator acts differently on PDF objects and on Pages.
41
+ #
42
+ # The << operator defaults to secure injection by renaming references to avoid conflics. For overlaying pages using compressed data that might not be editable (due to limited filter support), you can use:
43
+ # pdf.pages(nil, false).each {|page| page << stamp_page}
44
+ #
45
+ # == Page Numbering
46
+ # adding page numbers to a PDF object or file is as simple as can be:
47
+ # pdf = CombinePDF.load "file_to_number.pdf"
48
+ # pdf.number_pages
49
+ # pdf.save "file_with_numbering.pdf"
50
+ #
51
+ # numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values.
52
+ #
53
+ # == Loading PDF data
54
+ # Loading PDF data can be done from file system or directly from the memory.
55
+ #
56
+ # Loading data from a file is easy:
57
+ # pdf = CombinePDF.load("file.pdf")
58
+ # you can also parse PDF files from memory:
59
+ # pdf_data = IO.read 'file.pdf' # for this demo, load a file to memory
60
+ # pdf = CombinePDF.parse(pdf_data)
61
+ # Loading from the memory is especially effective for importing PDF data recieved through the internet or from a different authoring library such as Prawn.
62
+ class PDF
63
+ # lists the Hash keys used for PDF objects
64
+ #
65
+ # the CombinePDF library doesn't use special classes for its objects (PDFPage class, PDFStream class or anything like that).
66
+ #
67
+ # there is only one PDF class which represents the whole of the PDF file.
68
+ #
69
+ # this Hash lists the private Hash keys that the CombinePDF library uses to
70
+ # differentiate between complex PDF objects.
71
+ PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary, :related_objects].freeze
72
+
73
+ # the objects attribute is an Array containing all the PDF sub-objects for te class.
74
+ attr_reader :objects
75
+ # the info attribute is a Hash that sets the Info data for the PDF.
76
+ # use, for example:
77
+ # pdf.info[:Title] = "title"
78
+ attr_reader :info
79
+ # set/get the PDF version of the file (1.1-1.7) - shuold be type Float.
80
+ attr_accessor :version
81
+ # the viewer_preferences attribute is a Hash that sets the ViewerPreferences data for the PDF.
82
+ # use, for example:
83
+ # pdf.viewer_preferences[:HideMenubar] = true
84
+ attr_reader :viewer_preferences
85
+
86
+ def initialize(parser = nil)
87
+ # default before setting
88
+ @objects = []
89
+ @version = 0
90
+ @viewer_preferences = {}
91
+ @info = {}
92
+ parser ||= PDFParser.new('')
93
+ raise TypeError, "initialization error, expecting CombinePDF::PDFParser or nil, but got #{parser.class.name}" unless parser.is_a? PDFParser
94
+ @objects = parser.parse
95
+ # remove any existing id's
96
+ remove_old_ids
97
+ # set data from parser
98
+ @version = parser.version if parser.version.is_a? Float
99
+ @info = parser.info_object || {}
100
+ @names = parser.names_object || {}
101
+ @forms_data = parser.forms_object || {}
102
+ @outlines = parser.outlines_object || {}
103
+ # rebuild the catalo, to fix wkhtmltopdf's use of static page numbers
104
+ rebuild_catalog
105
+
106
+ # general globals
107
+ @set_start_id = 1
108
+ @info[:Producer] = "Ruby CombinePDF #{CombinePDF::VERSION} Library"
109
+ @info.delete :CreationDate
110
+ @info.delete :ModDate
111
+ end
112
+
113
+ # adds a new page to the end of the PDF object.
114
+ #
115
+ # returns the new page object.
116
+ #
117
+ # unless the media box is specified, it defaults to US Letter: [0, 0, 612.0, 792.0]
118
+ def new_page(mediabox = [0, 0, 612.0, 792.0], _location = -1)
119
+ p = PDFWriter.new(mediabox)
120
+ insert(-1, p)
121
+ p
122
+ end
123
+
124
+ # get the title for the pdf
125
+ # The title is stored in the information dictionary and isn't required
126
+ def title
127
+ @info[:Title]
128
+ end
129
+
130
+ # set the title for the pdf
131
+ # The title is stored in the information dictionary and isn't required
132
+ # new_title:: a string that is the new author value.
133
+ def title=(new_title = nil)
134
+ @info[:Title] = new_title
135
+ end
136
+
137
+ # get the author value for the pdf.
138
+ # The author is stored in the information dictionary and isn't required
139
+ def author
140
+ @info[:Author]
141
+ end
142
+
143
+ # set the author value for the pdf.
144
+ # The author is stored in the information dictionary and isn't required
145
+ #
146
+ # new_title:: a string that is the new author value.
147
+ def author=(new_author = nil)
148
+ @info[:Author] = new_author
149
+ end
150
+
151
+ # Clears any existing form data.
152
+ def clear_forms_data
153
+ @forms_data.nil? || @forms_data.clear
154
+ end
155
+
156
+ # Save the PDF to file.
157
+ #
158
+ # file_name:: is a string or path object for the output.
159
+ #
160
+ # **Notice!** if the file exists, it **WILL** be overwritten.
161
+ def save(file_name, options = {})
162
+ IO.binwrite file_name, to_pdf(options)
163
+ end
164
+
165
+ # Formats the data to PDF formats and returns a binary string that represents the PDF file content.
166
+ #
167
+ # This method is used by the save(file_name) method to save the content to a file.
168
+ #
169
+ # use this to export the PDF file without saving to disk (such as sending through HTTP ect').
170
+ def to_pdf(options = {})
171
+ # reset version if not specified
172
+ @version = 1.5 if @version.to_f == 0.0
173
+ # set info for merged file
174
+ @info[:ModDate] = @info[:CreationDate] = Time.now.strftime "D:%Y%m%d%H%M%S%:::z'00"
175
+ @info[:Subject] = options[:subject] if options[:subject]
176
+ @info[:Producer] = options[:producer] if options[:producer]
177
+ # rebuild_catalog
178
+ catalog = rebuild_catalog_and_objects
179
+ # add ID and generation numbers to objects
180
+ renumber_object_ids
181
+
182
+ out = []
183
+ xref = []
184
+ indirect_object_count = 1 # the first object is the null object
185
+ # write head (version and binanry-code)
186
+ out << "%PDF-#{@version}\n%\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00".force_encoding(Encoding::ASCII_8BIT)
187
+
188
+ # collect objects and set xref table locations
189
+ loc = 0
190
+ out.each { |line| loc += line.bytesize + 1 }
191
+ @objects.each do |o|
192
+ indirect_object_count += 1
193
+ xref << loc
194
+ out << object_to_pdf(o)
195
+ loc += out.last.bytesize + 1
196
+ end
197
+ xref_location = loc
198
+ # xref_location = 0
199
+ # out.each { |line| xref_location += line.bytesize + 1}
200
+ out << "xref\n0 #{indirect_object_count}\n0000000000 65535 f \n"
201
+ xref.each { |offset| out << (out.pop + ("%010d 00000 n \n" % offset)) }
202
+ out << out.pop + 'trailer'
203
+ out << "<<\n/Root #{false || "#{catalog[:indirect_reference_id]} #{catalog[:indirect_generation_number]} R"}"
204
+ out << "/Size #{indirect_object_count}"
205
+ out << "/Info #{@info[:indirect_reference_id]} #{@info[:indirect_generation_number]} R"
206
+ out << ">>\nstartxref\n#{xref_location}\n%%EOF"
207
+ # when finished, remove the numbering system and keep only pointers
208
+ remove_old_ids
209
+ # output the pdf stream
210
+ out.join("\n").force_encoding(Encoding::ASCII_8BIT)
211
+ end
212
+
213
+ # this method returns all the pages cataloged in the catalog.
214
+ #
215
+ # if no catalog is passed, it seeks the existing catalog(s) and searches
216
+ # for any registered Page objects.
217
+ #
218
+ # Page objects are Hash class objects. the page methods are added using a mixin or inheritance.
219
+ #
220
+ # catalogs:: a catalog, or an Array of catalog objects. defaults to the existing catalog.
221
+ def pages(catalogs = nil)
222
+ page_list = []
223
+ catalogs ||= get_existing_catalogs
224
+
225
+ if catalogs.is_a?(Array)
226
+ catalogs.each { |c| page_list.concat pages(c) unless c.nil? }
227
+ elsif catalogs.is_a?(Hash)
228
+ if catalogs[:is_reference_only]
229
+ if catalogs[:referenced_object]
230
+ page_list.concat pages(catalogs[:referenced_object])
231
+ else
232
+ warn "couldn't follow reference!!! #{catalogs} not found!"
233
+ end
234
+ else
235
+ case catalogs[:Type]
236
+ when :Page
237
+ page_list << catalogs
238
+ when :Pages
239
+ page_list.concat pages(catalogs[:Kids]) unless catalogs[:Kids].nil?
240
+ when :Catalog
241
+ page_list.concat pages(catalogs[:Pages]) unless catalogs[:Pages].nil?
242
+ end
243
+ end
244
+ end
245
+ page_list
246
+ end
247
+
248
+ # returns an array with the different fonts used in the file.
249
+ #
250
+ # Type0 font objects ( "font[:Subtype] == :Type0" ) can be registered with the font library
251
+ # for use in PDFWriter objects (font numbering / table creation etc').
252
+ # @param limit_to_type0 [true,false] limits the list to type0 fonts.
253
+ def fonts(limit_to_type0 = false)
254
+ fonts_array = []
255
+ pages.each do |p|
256
+ p[:Resources][:Font].values.each do |f|
257
+ f = f[:referenced_object] if f[:referenced_object]
258
+ if (limit_to_type0 || f[:Subtype] = :Type0) && f[:Type] == :Font && !fonts_array.include?(f)
259
+ fonts_array << f
260
+ end
261
+ end
262
+ end
263
+ fonts_array
264
+ end
265
+
266
+ # add the pages (or file) to the PDF (combine/merge) and RETURNS SELF, for nesting.
267
+ # for example:
268
+ #
269
+ # pdf = CombinePDF.new "first_file.pdf"
270
+ #
271
+ # pdf << CombinePDF.new "second_file.pdf"
272
+ #
273
+ # pdf.save "both_files_merged.pdf"
274
+ # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
275
+ def <<(data)
276
+ insert -1, data
277
+ end
278
+
279
+ # add the pages (or file) to the BEGINNING of the PDF (combine/merge) and RETURNS SELF for nesting operators.
280
+ # for example:
281
+ #
282
+ # pdf = CombinePDF.new "second_file.pdf"
283
+ #
284
+ # pdf >> CombinePDF.new "first_file.pdf"
285
+ #
286
+ # pdf.save "both_files_merged.pdf"
287
+ # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
288
+ def >>(data)
289
+ insert 0, data
290
+ end
291
+
292
+ # add PDF pages (or PDF files) into a specific location.
293
+ #
294
+ # returns the new pages Array! (unlike `#<<`, doesn't return self!)
295
+ #
296
+ # location:: the location for the added page(s). Could be any number. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining). if the location is beyond bounds, the pages will be added to the end of the PDF object (or at the begining, if the out of bounds was a negative number).
297
+ # data:: a PDF page, a PDF file (CombinePDF.new "filname.pdf") or an array of pages (CombinePDF.new("filname.pdf").pages[0..3]).
298
+ def insert(location, data)
299
+ pages_to_add = nil
300
+ if data.is_a? PDF
301
+ @version = [@version, data.version].max
302
+ pages_to_add = data.pages
303
+ actual_value(@names ||= {}.dup).update actual_value(data.names_object), &self.class.method(:hash_merge_new_no_page)
304
+ merge_outlines((@outlines ||= {}.dup), data.outlines_object, location) unless actual_value(data.outlines_object).empty?
305
+ if actual_value(@forms_data)
306
+ actual_value(@forms_data).update actual_value(data.forms_data), &self.class.method(:hash_merge_new_no_page) if data.forms_data
307
+ else
308
+ @forms_data = data.forms_data
309
+ end
310
+ warn 'Form data might be lost when combining PDF forms (possible conflicts).' unless data.forms_data.nil? || data.forms_data.empty?
311
+ elsif data.is_a?(Array) && (data.select { |o| !(o.is_a?(Hash) && o[:Type] == :Page) }).empty?
312
+ pages_to_add = data
313
+ elsif data.is_a?(Hash) && data[:Type] == :Page
314
+ pages_to_add = [data]
315
+ else
316
+ warn "Shouldn't add objects to the file unless they are PDF objects or PDF pages (an Array or a single PDF page)."
317
+ return false # return false, which will also stop any chaining.
318
+ end
319
+ # pages_to_add.map! {|page| page.copy }
320
+ catalog = rebuild_catalog
321
+ pages_array = catalog[:Pages][:referenced_object][:Kids]
322
+ page_count = pages_array.length
323
+ if location < 0 && (page_count + location < 0)
324
+ location = 0
325
+ elsif location > 0 && (location > page_count)
326
+ location = page_count
327
+ end
328
+ pages_array.insert location, pages_to_add
329
+ pages_array.flatten!
330
+ self
331
+ end
332
+
333
+ # removes a PDF page from the file and the catalog
334
+ #
335
+ # returns the removed page.
336
+ #
337
+ # returns nil if failed or if out of bounds.
338
+ #
339
+ # page_index:: the page's index in the zero (0) based page array. negative numbers represent a count backwards (-1 being the end of the page array and 0 being the begining).
340
+ def remove(page_index)
341
+ catalog = rebuild_catalog
342
+ pages_array = catalog[:Pages][:referenced_object][:Kids]
343
+ removed_page = pages_array.delete_at page_index
344
+ catalog[:Pages][:referenced_object][:Count] = pages_array.length
345
+ removed_page
346
+ end
347
+
348
+ # add page numbers to the PDF
349
+ #
350
+ # For unicode text, a unicode font(s) must first be registered. the registered font(s) must supply the
351
+ # subset of characters used in the text. UNICODE IS AN ISSUE WITH THE PDF FORMAT - USE CAUSION.
352
+ #
353
+ # options:: a Hash of options setting the behavior and format of the page numbers:
354
+ # - :number_format a string representing the format for page number. defaults to ' - %s - ' (allows for letter numbering as well, such as "a", "b"...).
355
+ # - :location an Array containing the location for the page numbers, can be :top, :buttom, :top_left, :top_right, :bottom_left, :bottom_right or :center (:center == full page). defaults to [:top, :buttom].
356
+ # - :start_at an Integer that sets the number for first page number. also accepts a letter ("a") for letter numbering. defaults to 1.
357
+ # - :margin_from_height a number (PDF points) for the top and buttom margins. defaults to 45.
358
+ # - :margin_from_side a number (PDF points) for the left and right margins. defaults to 15.
359
+ # - :page_range a range of pages to be numbered (i.e. (2..-1) ) defaults to all the pages (nil). Remember to set the :start_at to the correct value.
360
+ # the options Hash can also take all the options for {Page_Methods#textbox}.
361
+ # defaults to font: :Helvetica, font_size: 12 and no box (:border_width => 0, :box_color => nil).
362
+ def number_pages(options = {})
363
+ opt = {
364
+ number_format: ' - %s - ',
365
+ start_at: 1,
366
+ font: :Helvetica,
367
+ margin_from_height: 45,
368
+ margin_from_side: 15
369
+ }
370
+ opt.update options
371
+ opt[:location] ||= opt[:number_location] ||= opt[:stamp_location] ||= [:top, :bottom]
372
+ opt[:location] = [opt[:location]] unless opt[:location].is_a? Array
373
+
374
+ page_number = opt[:start_at]
375
+ format_repeater = opt[:number_format].count('%')
376
+ just_center = [:center]
377
+ small_font_size = opt[:font_size] || 12
378
+
379
+ # some common computations can be done only once.
380
+ from_height = opt[:margin_from_height]
381
+ from_side = opt[:margin_from_side]
382
+ left_position = from_side
383
+
384
+ (opt[:page_range] ? pages[opt[:page_range]] : pages).each do |page|
385
+ # Get page dimensions
386
+ mediabox = page[:CropBox] || page[:MediaBox] || [0, 0, 595.3, 841.9]
387
+ # set stamp text
388
+ text = opt[:number_format] % (Array.new(format_repeater) { page_number })
389
+ if opt[:location].include? :center
390
+ add_opt = {}
391
+ if opt[:margin_from_height] && !opt[:height] && !opt[:y]
392
+ add_opt[:height] = mediabox[3] - mediabox[1] - (2 * opt[:margin_from_height].to_f)
393
+ add_opt[:y] = opt[:margin_from_height]
394
+ end
395
+ if opt[:margin_from_side] && !opt[:width] && !opt[:x]
396
+ add_opt[:width] = mediabox[2] - mediabox[0] - (2 * opt[:margin_from_side].to_f)
397
+ add_opt[:x] = opt[:margin_from_side]
398
+ end
399
+ page.textbox text, opt.merge(add_opt)
400
+ end
401
+ unless opt[:location] == just_center
402
+ add_opt = { font_size: small_font_size }.merge(opt)
403
+ # text = opt[:number_format] % page_number
404
+ # compute locations for text boxes
405
+ text_dimantions = Fonts.dimensions_of(text, opt[:font], small_font_size)
406
+ box_width = text_dimantions[0] * 1.2
407
+ box_height = text_dimantions[1] * 2
408
+ page_width = mediabox[2]
409
+ page_height = mediabox[3]
410
+
411
+ add_opt[:width] ||= box_width
412
+ add_opt[:height] ||= box_height
413
+
414
+ center_position = (page_width - box_width) / 2
415
+ right_position = page_width - from_side - box_width
416
+ top_position = page_height - from_height
417
+ bottom_position = from_height + box_height
418
+
419
+ if opt[:location].include? :top
420
+ page.textbox text, { x: center_position, y: top_position }.merge(add_opt)
421
+ end
422
+ if opt[:location].include? :bottom
423
+ page.textbox text, { x: center_position, y: bottom_position }.merge(add_opt)
424
+ end
425
+ if opt[:location].include? :top_left
426
+ page.textbox text, { x: left_position, y: top_position, font_size: small_font_size }.merge(add_opt)
427
+ end
428
+ if opt[:location].include? :bottom_left
429
+ page.textbox text, { x: left_position, y: bottom_position, font_size: small_font_size }.merge(add_opt)
430
+ end
431
+ if opt[:location].include? :top_right
432
+ page.textbox text, { x: right_position, y: top_position, font_size: small_font_size }.merge(add_opt)
433
+ end
434
+ if opt[:location].include? :bottom_right
435
+ page.textbox text, { x: right_position, y: bottom_position, font_size: small_font_size }.merge(add_opt)
436
+ end
437
+ end
438
+ page_number = page_number.succ
439
+ end
440
+ end
441
+
442
+ # This method stamps all (or some) of the pages is the PDF with the requested stamp.
443
+ #
444
+ # The method accept:
445
+ # stamp:: either a String or a PDF page. If this is a String, you can add formating to add page numbering (i.e. "page number %i"). otherwise remember to escape any percent ('%') sign (i.e. "page \%number not shown\%").
446
+ # options:: an options Hash.
447
+ #
448
+ # If the stamp is a PDF page, only :page_range and :underlay (to reverse-stamp) are valid options.
449
+ #
450
+ # If the stamp is a String, than all the options used by {#number_pages} or {Page_Methods#textbox} can be used.
451
+ #
452
+ # The default :location option is :center = meaning the stamp will be stamped all across the page unless the :x, :y, :width or :height options are specified.
453
+ def stamp_pages(stamp, options = {})
454
+ case stamp
455
+ when String
456
+ options[:location] ||= [:center]
457
+ number_pages({ number_format: stamp }.merge(options))
458
+ when Page_Methods
459
+ # stamp = stamp.copy(true)
460
+ if options[:underlay]
461
+ (options[:page_range] ? pages[options[:page_range]] : pages).each { |p| p >> stamp }
462
+ else
463
+ (options[:page_range] ? pages[options[:page_range]] : pages).each { |p| p << stamp }
464
+ end
465
+ else
466
+ raise TypeError, 'expecting a String or a PDF page as the stamp.'
467
+ end
468
+ end
469
+
470
+ # the form_data attribute is a Hash that corresponds to the PDF form data (if any).
471
+ attr_reader :forms_data
472
+
473
+ # # experimental, allows form data updates. This function lists the form data fields.
474
+ # def form_list
475
+ # if(@forms_data && actual_value(@forms_data).is_a?(Hash) && actual_value(@forms_data)[:Fields])
476
+ # actual_value(actual_value(@forms_data)[:Fields]).map {|f| actual_value(f)[:Kids] ? ({actual_value(f)[:T] => actual_value(f)[:Kids].map {|k| actual_value(k)[:T]}}) : actual_value(f)[:T]}
477
+ # else
478
+ # nil
479
+ # end
480
+ # end
481
+ # # experimental, allows form data updates. This function gets the value of a specific form data field.
482
+ # def form_get name
483
+ # if(@forms_data && actual_value(@forms_data).is_a?(Hash) && actual_value(@forms_data)[:Fields])
484
+ # actual_value(actual_value(@forms_data)[:Fields]).map {|f| return actual_value(f)[:Kids] ? ({actual_value(f)[:V] => actual_value(f)[:Kids].map {|k| actual_value(k)[:V]}}) : actual_value(f)[:V] if actual_value(f)[:T] == name}
485
+ # # actual_value(actual_value(@forms_data)[:Fields]).each {|f| return actual_value(f)[:V] if actual_value(f)[:T] == name}
486
+ # else
487
+ # nil
488
+ # end
489
+ # nil
490
+ # end
491
+ # # experimental, allows form data updates. This function sets the value of a specific form data field.
492
+ # def form_set name, value
493
+ # if(@forms_data && actual_value(@forms_data).is_a?(Hash) && actual_value(@forms_data)[:Fields])
494
+ # actual_value(actual_value(@forms_data)[:Fields]).each {|f| return actual_value(f)[:V]=value if actual_value(f)[:T] == name}
495
+ # else
496
+ # nil
497
+ # end
498
+ # nil
499
+ # end
500
+ end
462
501
  end
463
-