combine_pdf 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c5d50f2ddbf6142442857b65ec46d9565ae774e2
4
- data.tar.gz: caaa8ac30c72919c54d28d21a4d30979a40ff41e
3
+ metadata.gz: b66122d4b265048987ad995f6d20e832d70e1e67
4
+ data.tar.gz: 06d4c4aa18793189a4b166eb535a089d71615768
5
5
  SHA512:
6
- metadata.gz: 0814bd7cfdfbd06d58506dc83f2160099bb6741c60625279850b6d8f525fa970228ce99425223a54998136cecff3593e1530bd0e77f5071d931a173bc26128ff
7
- data.tar.gz: 88230912c86e10a0949c63c0a10b358da411a96a615860bd8c62985c2c62ff4d98a05b0fb5982e9f6fab7a6fb3cd8d049254660d6c1c449e84c5922783cf7552
6
+ metadata.gz: ae6a99d83dcc63459094fc9c438520aaa409591aef52f4e491b03228cb9636b40c53de8c6b08a656a22ad2a54aa4d4e9e54a83ede7db26cefcb471bade392551
7
+ data.tar.gz: e4214c1f95fac5fee4c4617ad55d4fc1f704f52261ea0600b59cc89c083d473a89cff793ff1cbb6b7fd8d509ceec7b27f2a4bade651d7a29105e180b4bdaa7d5
data/lib/combine_pdf.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  # -*- encoding : utf-8 -*-
2
+ # use under GPLv3 terms only
3
+
2
4
  require 'zlib'
3
5
  require 'securerandom'
4
6
  require 'strscan'
@@ -32,7 +34,7 @@ load "combine_pdf/combine_pdf_pdf.rb"
32
34
 
33
35
 
34
36
 
35
- # This is a pure ruby library to combine/merge, stmap/overlay and number PDF files.
37
+ # This is a pure ruby library to combine/merge, stmap/overlay and number PDF files - as well as to create tables (ment for indexing combined files).
36
38
  #
37
39
  # You can also use this library for writing basic text content into new or existing PDF files (For authoring new PDF files look at the Prawn ruby library).
38
40
  #
@@ -42,12 +44,10 @@ load "combine_pdf/combine_pdf_pdf.rb"
42
44
  # == Loading PDF data
43
45
  # Loading PDF data can be done from file system or directly from the memory.
44
46
  #
45
- # Loading data from a file is easy:
47
+ # Load data from a file:
46
48
  # pdf = CombinePDF.new("file.pdf")
47
- # you can also parse PDF files from memory:
48
- # pdf_data = IO.read 'file.pdf' # for this demo, load a file to memory
49
+ # parse PDF files from memory:
49
50
  # pdf = CombinePDF.parse(pdf_data)
50
- # Loading from the memory is especially effective for importing PDF data recieved through the internet or from a different authoring library such as Prawn.
51
51
  #
52
52
  # == Combine/Merge PDF files or Pages
53
53
  # To combine PDF files (or data):
@@ -55,10 +55,8 @@ load "combine_pdf/combine_pdf_pdf.rb"
55
55
  # pdf << CombinePDF.new("file1.pdf")
56
56
  # pdf << CombinePDF.new("file2.pdf")
57
57
  # pdf.save "combined.pdf"
58
- # as demonstrated above, these can be chained for into a one-liner.
59
- #
60
- # you can also choose to add only specific pages.
61
58
  #
59
+ # It is possible to add only specific pages.
62
60
  # in this example, only even pages will be added:
63
61
  # pdf = CombinePDF.new
64
62
  # i = 0
@@ -67,30 +65,21 @@ load "combine_pdf/combine_pdf_pdf.rb"
67
65
  # pdf << page if i.even?
68
66
  # end
69
67
  # pdf.save "even_pages.pdf"
70
- # notice that adding the whole file is faster then adding each page seperately.
68
+ # Notice that adding the whole file is faster then adding each page seperately.
71
69
  # == Add content to existing pages (Stamp / Watermark)
72
- # To add content to existing PDF pages, first import the new content from an existing PDF file.
73
- # after that, add the content to each of the pages in your existing PDF.
74
- #
75
- # in this example, a company logo will be stamped over each page:
70
+ # It is possible "stamp" one PDF page using another PDF page. In this example, a company logo will be stamped over each page:
76
71
  # company_logo = CombinePDF.new("company_logo.pdf").pages[0]
77
72
  # pdf = CombinePDF.new "content_file.pdf"
78
73
  # pdf.pages.each {|page| page << company_logo}
79
74
  # pdf.save "content_with_logo.pdf"
80
75
  # Notice the << operator is on a page and not a PDF object. The << operator acts differently on PDF objects and on Pages.
81
- #
82
- # The << operator defaults to secure injection by renaming references to avoid conflics.
83
- #
84
- # Less recommended, but available - for overlaying pages using compressed data that might not be editable (due to limited filter support), you can use:
85
- # pdf.pages(nil, false).each {|page| page << stamp_page}
86
- #
87
76
  # == Page Numbering
88
- # adding page numbers to a PDF object or file is as simple as can be:
77
+ # It is possible to number the pages. in this example we will add very simple numbering:
89
78
  # pdf = CombinePDF.new "file_to_number.pdf"
90
79
  # pdf.number_pages
91
80
  # pdf.save "file_with_numbering.pdf"
92
81
  #
93
- # numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values.
82
+ # numbering can be done with many different options, with different formating, with or without a box object, different locations on each page and even with opacity values.
94
83
  # == Writing Content
95
84
  # page numbering actually adds content using the PDFWriter object (a very basic writer).
96
85
  #
@@ -100,54 +89,30 @@ load "combine_pdf/combine_pdf_pdf.rb"
100
89
  # pdf.number_pages number_format: " - Draft, page %d - ", number_location: [:top], font_color: [0,0,1], box_color: [0.4,0,0], opacity: 0.75, font_size:16
101
90
  # pdf.save "draft.pdf"
102
91
  #
103
- # for demntration, it will now be coded the hard way, just so we can play more directly with some of the data.
92
+ # in this example we will add a first page with the word "Draft", in red over a colored background:
104
93
  #
105
- # pdf = CombinePDF.new "file_to_stamp.pdf"
106
- # ipage_number = 1
107
- # pdf.pages.each do |page|
108
- # # create a "stamp" PDF page with the same size as the target page
109
- # # we will do this because we will use this to center the box in the page
110
- # mediabox = page[:MediaBox]
111
- # # CombinePDF is pointer based...
112
- # # so you can add the stamp to the page and still continue to edit it's content!
113
- # stamp = PDFWriter.new mediabox
114
- # page << stamp
115
- # # set the visible dimensions to the CropBox, if it exists.
116
- # cropbox = page[:CropBox]
117
- # mediabox = cropbox if cropbox
118
- # # set stamp text
119
- # text = " Draft (page %d) " % page_number
120
- # # write the textbox
121
- # stamp.textbox text, x: mediabox[0]+30, y: mediabox[1]+30, width: mediabox[2]-mediabox[0]-60, height: mediabox[3]-mediabox[1]-60, font_color: [0,0,1], font_size: :fit_text, box_color: [0.4,0,0], opacity: 0.5
122
- # end
94
+ # pdf = CombinePDF.new "file.pdf"
95
+ # pdf_first_page = pdf.pages[0]
96
+ # mediabox = page[:CropBox] || page[:MediaBox] #copy page size
97
+ # title_page = CombinePDF.create_page mediabox #make title page same size as first page
98
+ # title_page.textbox "DRAFT", font_color: [0.8,0,0], font_size: :fit_text, box_color: [1,0.8,0.8], opacity: 1
99
+ # pdf >> title_page # the >> operator adds pages at the begining
123
100
  # pdf.save "draft.pdf"
124
101
  #
125
- #
126
- # font support for the writer is still in the works and is extreamly limited.
127
- # at the moment it is best to limit the fonts to the 14 standard latin fonts (no unicode).
102
+ # font support for the writer is still in the works and is limited to extracting know fonts by location of the 14 standard fonts.
128
103
  #
129
104
  # == Decryption & Filters
130
105
  #
131
- # Some PDF files are encrypted and some are compressed (the use of filters)...
132
- #
133
- # There is very little support for encrypted files and very very basic and limited support for compressed files.
134
- #
135
- # I need help with that.
136
- #
106
+ # Some PDF files are encrypted and some are compressed (the use of filters)... not all files can be opened, merged, stamped or used and stamps.
137
107
  # == Comments and file structure
138
108
  #
139
109
  # If you want to help with the code, please be aware:
140
110
  #
141
- # I'm a self learned hobbiest at heart. The documentation is lacking and the comments in the code are poor guidlines.
142
- #
143
111
  # The code itself should be very straight forward, but feel free to ask whatever you want.
144
112
  #
145
113
  # == Credit
146
114
  #
147
- # Caige Nichols wrote an amazing RC4 gem which I used in my code.
148
- #
149
- # I wanted to install the gem, but I had issues with the internet and ended up copying the code itself into the combine_pdf_decrypt class file.
150
- #
115
+ # Caige Nichols wrote an amazing RC4 gem which I reference in my code.
151
116
  # Credit to his wonderful is given here. Please respect his license and copyright... and mine.
152
117
  #
153
118
  # == License
@@ -158,11 +123,15 @@ module CombinePDF
158
123
 
159
124
  # Create an empty PDF object or create a PDF object from a file (parsing the file).
160
125
  # file_name:: is the name of a file to be parsed.
161
- def new(file_name = "")
162
- raise TypeError, "couldn't parse and data, expecting type String" unless file_name.is_a? String
126
+ def load(file_name = "")
127
+ raise TypeError, "couldn't parse and data, expecting type String" unless file_name.is_a?(String) || file_name.is_a?(Pathname)
163
128
  return PDF.new() if file_name == ''
164
129
  PDF.new( PDFParser.new( IO.read(file_name).force_encoding(Encoding::ASCII_8BIT) ) )
165
130
  end
131
+ def new(file_name = "")
132
+ load(file_name)
133
+ end
134
+
166
135
  # Create a PDF object from a raw PDF data (parsing the data).
167
136
  # data:: is a string that represents the content of a PDF file.
168
137
  def parse(data)
@@ -180,7 +149,7 @@ module CombinePDF
180
149
  # ::mediabox an Array representing the size of the PDF document. defaults to: [0.0, 0.0, 612.0, 792.0]
181
150
  #
182
151
  # if the page is PDFWriter object as a stamp, the final size will be that of the original page.
183
- def create_page(mediabox = [0.0, 0.0, 612.0, 792.0])
152
+ def create_page(mediabox = [0, 0, 595.3, 841.9])
184
153
  PDFWriter.new mediabox
185
154
  end
186
155
 
@@ -192,7 +161,7 @@ module CombinePDF
192
161
  # the main intended use of this method is to create indexes (a table of contents) for merged data.
193
162
  #
194
163
  # example:
195
- # pdf = CombinePDF.new_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
164
+ # pdf = CombinePDF.create_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
196
165
  # pdf.save "table_file.pdf"
197
166
  #
198
167
  # accepts a Hash with any of the following keys as well as any of the PDFWriter#textbox options:
@@ -204,16 +173,16 @@ module CombinePDF
204
173
  # column_widths:: an array of relative column widths ([1,2] will display only the first two columns, the second twice as big as the first). defaults to nil (even widths).
205
174
  # header_color:: the header color. defaults to [0.8, 0.8, 0.8] (light gray).
206
175
  # main_color:: main row color. defaults to nil (transparent / white).
207
- # alternate_color: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
208
- # font_color: font color. defaults to [0,0,0] (black).
209
- # border_color: border color. defaults to [0,0,0] (black).
210
- # border_width: border width in PDF units. defaults to 1.
211
- # header_align: the header text alignment within each column (:right, :left, :center). defaults to :center.
176
+ # alternate_color:: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
177
+ # font_color:: font color. defaults to [0,0,0] (black).
178
+ # border_color:: border color. defaults to [0,0,0] (black).
179
+ # border_width:: border width in PDF units. defaults to 1.
180
+ # header_align:: the header text alignment within each column (:right, :left, :center). defaults to :center.
212
181
  # row_align:: the row text alignment within each column. defaults to :left (:right for RTL table).
213
- # direction: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
214
- # rows_per_page: the number of rows per page, INCLUDING the header row. deafults to 25.
215
- # page_size: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
216
- def new_table (options = {})
182
+ # direction:: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
183
+ # rows_per_page:: the number of rows per page, INCLUDING the header row. deafults to 25.
184
+ # page_size:: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
185
+ def create_table(options = {})
217
186
  defaults = {
218
187
  headers: nil,
219
188
  table_data: [[]],
@@ -291,6 +260,9 @@ module CombinePDF
291
260
  end
292
261
  table
293
262
  end
263
+ def new_table(options = {})
264
+ create_table options
265
+ end
294
266
 
295
267
  # adds a correctly formatted font object to the font library.
296
268
  #
@@ -311,14 +283,21 @@ module CombinePDF
311
283
  #
312
284
  # returns the font on success or false on failure.
313
285
  #
286
+ # example:
287
+ # fonts = CombinePDF.new("japanese_fonts.pdf").fonts(true)
288
+ # CombinePDF.register_font_from_pdf_object :david, fonts[0]
289
+ #
314
290
  # VERY LIMITTED SUPPORT:
315
291
  # - at the moment it only imports Type0 fonts.
316
292
  # - also, to extract the Hash of the actual font object you were looking for, is not a trivial matter. I do it on the console.
317
293
  # font_name:: a Symbol with the name of the font registry. if the fonts exists in the library, it will be overwritten!
318
294
  # font_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
319
- def register_font_from_pdf_object font_name, font_object
295
+ def register_existing_font font_name, font_object
320
296
  Fonts.register_font_from_pdf_object font_name, font_object
321
297
  end
298
+ def register_font_from_pdf_object font_name, font_object
299
+ register_existing_font font_name, font_object
300
+ end
322
301
  end
323
302
 
324
303
 
@@ -10,19 +10,13 @@
10
10
 
11
11
  module CombinePDF
12
12
 
13
- #:nodoc: all
14
-
15
- # <b>not fully tested!</b>
16
- #
17
- # NO UNICODE SUPPORT!
13
+ # Limited Unicode Support (font dependent)!
18
14
  #
19
15
  # The PDFWriter class is a subclass of Hash and represents a PDF Page object.
20
16
  #
21
17
  # Writing on this Page is done using the textbox function.
22
18
  #
23
- # Setting the page dimensions can be either at the new or using the mediabox method.
24
- #
25
- # the rest of the methods are for internal use.
19
+ # Setting the page dimensions can be either at the new or using the mediabox method. New pages default to size A4, which is: [0, 0, 595.3, 841.9].
26
20
  #
27
21
  # Once the Page is completed (the last text box was added),
28
22
  # we can insert the page to a CombinePDF object.
@@ -41,7 +35,10 @@ module CombinePDF
41
35
  # pdf.save "stamped_file.pdf"
42
36
  class PDFWriter < Hash
43
37
 
44
- def initialize(mediabox = [0.0, 0.0, 612.0, 792.0])
38
+ # create a new PDFWriter object.
39
+ #
40
+ # mediabox:: the PDF page size in PDF points. defaults to [0, 0, 595.3, 841.9] (A4)
41
+ def initialize(mediabox = [0, 0, 595.3, 841.9])
45
42
  # indirect_reference_id, :indirect_generation_number
46
43
  @contents = ""
47
44
  @base_font_name = "Writer" + SecureRandom.urlsafe_base64(7) + "PDF"
@@ -13,8 +13,12 @@ module CombinePDF
13
13
  protected
14
14
 
15
15
  # @private
16
+ # @!visibility private
17
+
16
18
  # This is an internal class. you don't need it.
17
19
  class PDFDecrypt
20
+
21
+ # @!visibility private
18
22
 
19
23
  # make a new Decrypt object. requires:
20
24
  # objects:: an array containing the encrypted objects.
@@ -8,14 +8,17 @@
8
8
 
9
9
 
10
10
  module CombinePDF
11
- #@private
12
11
  #:nodoc: all
13
12
 
14
13
  protected
15
14
 
15
+ # @!visibility private
16
+
16
17
  # This is an internal class. you don't need it.
17
18
  module PDFFilter
18
19
  module_function
20
+
21
+ # @!visibility private
19
22
 
20
23
  # deflate / compress an object.
21
24
  #
@@ -13,6 +13,7 @@
13
13
  module CombinePDF
14
14
 
15
15
  #:nodoc: all
16
+ # @!visibility private
16
17
 
17
18
 
18
19
 
@@ -20,6 +21,8 @@ module CombinePDF
20
21
 
21
22
  protected
22
23
 
24
+ # @!visibility private
25
+
23
26
  # the internal class for the Fonts model
24
27
  #
25
28
  # this is an internal class, used by PDFWriter and PDF. you don't normally need to use this.
@@ -1,12 +1,11 @@
1
1
  module CombinePDF
2
2
 
3
- #:nodoc: all
4
3
  ################################################################
5
4
  ## These are common functions, used within the different classes
6
5
  ## These functions aren't open to the public.
7
6
  ################################################################
8
7
 
9
- #@private
8
+
10
9
  # lists the Hash keys used for PDF objects
11
10
  #
12
11
  # the CombinePDF library doesn't use special classes for its objects (PDFPage class, PDFStream class or anything like that).
@@ -16,15 +15,22 @@ module CombinePDF
16
15
  # this Hash lists the private Hash keys that the CombinePDF library uses to
17
16
  # differentiate between complex PDF objects.
18
17
  PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary]
19
- #@private
18
+
19
+ # @private
20
+ # @!visibility private
20
21
  #:nodoc: all
21
22
 
22
23
  protected
23
24
 
25
+ # @!visibility private
24
26
 
25
27
  # This is an internal class. you don't need it.
26
28
  module PDFOperations
29
+
27
30
  module_function
31
+
32
+ # @!visibility private
33
+
28
34
  def inject_to_page page = {Type: :Page, MediaBox: [0,0,612.0,792.0], Resources: {}, Contents: []}, stream = nil, top = true
29
35
  # make sure both the page reciving the new data and the injected page are of the correct data type.
30
36
  return false unless page.is_a?(Hash) && stream.is_a?(Hash)
@@ -11,7 +11,8 @@
11
11
  module CombinePDF
12
12
 
13
13
 
14
- #@private
14
+ # @!visibility private
15
+ # @private
15
16
  #:nodoc: all
16
17
 
17
18
  protected
@@ -25,6 +26,9 @@ module CombinePDF
25
26
  # This is an internal class. you don't need it.
26
27
  class PDFParser
27
28
 
29
+ # @!visibility private
30
+
31
+
28
32
  # the array containing all the parsed data (PDF Objects)
29
33
  attr_reader :parsed
30
34
  # a Float representing the PDF version of the data parsed (if exists).
@@ -254,6 +254,24 @@ module CombinePDF
254
254
  page_list
255
255
  end
256
256
 
257
+ # returns an array with the different fonts used in the file.
258
+ #
259
+ # Type0 font objects ( "font[:Subtype] == :Type0" ) can be registered with the font library
260
+ # for use in PDFWriter objects (font numbering / table creation etc').
261
+ # @param limit_to_type0 [true,false] limits the list to type0 fonts.
262
+ def fonts(limit_to_type0 = false)
263
+ fonts_array = []
264
+ pages.each do |p|
265
+ p[:Resources][:Font].values.each do |f|
266
+ f = f[:referenced_object] if f[:referenced_object]
267
+ if (limit_to_type0 || f[:Subtype] = :Type0) && f[:Type] == :Font && !fonts_array.include?(f)
268
+ fonts_array << f
269
+ end
270
+ end
271
+ end
272
+ fonts_array
273
+ end
274
+
257
275
  # add the pages (or file) to the PDF (combine/merge) and return the new pages array.
258
276
  # for example:
259
277
  #
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-09-14 00:00:00.000000000 Z
12
+ date: 2014-09-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ruby-rc4