combine_pdf 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c5d50f2ddbf6142442857b65ec46d9565ae774e2
4
- data.tar.gz: caaa8ac30c72919c54d28d21a4d30979a40ff41e
3
+ metadata.gz: b66122d4b265048987ad995f6d20e832d70e1e67
4
+ data.tar.gz: 06d4c4aa18793189a4b166eb535a089d71615768
5
5
  SHA512:
6
- metadata.gz: 0814bd7cfdfbd06d58506dc83f2160099bb6741c60625279850b6d8f525fa970228ce99425223a54998136cecff3593e1530bd0e77f5071d931a173bc26128ff
7
- data.tar.gz: 88230912c86e10a0949c63c0a10b358da411a96a615860bd8c62985c2c62ff4d98a05b0fb5982e9f6fab7a6fb3cd8d049254660d6c1c449e84c5922783cf7552
6
+ metadata.gz: ae6a99d83dcc63459094fc9c438520aaa409591aef52f4e491b03228cb9636b40c53de8c6b08a656a22ad2a54aa4d4e9e54a83ede7db26cefcb471bade392551
7
+ data.tar.gz: e4214c1f95fac5fee4c4617ad55d4fc1f704f52261ea0600b59cc89c083d473a89cff793ff1cbb6b7fd8d509ceec7b27f2a4bade651d7a29105e180b4bdaa7d5
data/lib/combine_pdf.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  # -*- encoding : utf-8 -*-
2
+ # use under GPLv3 terms only
3
+
2
4
  require 'zlib'
3
5
  require 'securerandom'
4
6
  require 'strscan'
@@ -32,7 +34,7 @@ load "combine_pdf/combine_pdf_pdf.rb"
32
34
 
33
35
 
34
36
 
35
- # This is a pure ruby library to combine/merge, stmap/overlay and number PDF files.
37
+ # This is a pure ruby library to combine/merge, stmap/overlay and number PDF files - as well as to create tables (ment for indexing combined files).
36
38
  #
37
39
  # You can also use this library for writing basic text content into new or existing PDF files (For authoring new PDF files look at the Prawn ruby library).
38
40
  #
@@ -42,12 +44,10 @@ load "combine_pdf/combine_pdf_pdf.rb"
42
44
  # == Loading PDF data
43
45
  # Loading PDF data can be done from file system or directly from the memory.
44
46
  #
45
- # Loading data from a file is easy:
47
+ # Load data from a file:
46
48
  # pdf = CombinePDF.new("file.pdf")
47
- # you can also parse PDF files from memory:
48
- # pdf_data = IO.read 'file.pdf' # for this demo, load a file to memory
49
+ # parse PDF files from memory:
49
50
  # pdf = CombinePDF.parse(pdf_data)
50
- # Loading from the memory is especially effective for importing PDF data recieved through the internet or from a different authoring library such as Prawn.
51
51
  #
52
52
  # == Combine/Merge PDF files or Pages
53
53
  # To combine PDF files (or data):
@@ -55,10 +55,8 @@ load "combine_pdf/combine_pdf_pdf.rb"
55
55
  # pdf << CombinePDF.new("file1.pdf")
56
56
  # pdf << CombinePDF.new("file2.pdf")
57
57
  # pdf.save "combined.pdf"
58
- # as demonstrated above, these can be chained for into a one-liner.
59
- #
60
- # you can also choose to add only specific pages.
61
58
  #
59
+ # It is possible to add only specific pages.
62
60
  # in this example, only even pages will be added:
63
61
  # pdf = CombinePDF.new
64
62
  # i = 0
@@ -67,30 +65,21 @@ load "combine_pdf/combine_pdf_pdf.rb"
67
65
  # pdf << page if i.even?
68
66
  # end
69
67
  # pdf.save "even_pages.pdf"
70
- # notice that adding the whole file is faster then adding each page seperately.
68
+ # Notice that adding the whole file is faster then adding each page seperately.
71
69
  # == Add content to existing pages (Stamp / Watermark)
72
- # To add content to existing PDF pages, first import the new content from an existing PDF file.
73
- # after that, add the content to each of the pages in your existing PDF.
74
- #
75
- # in this example, a company logo will be stamped over each page:
70
+ # It is possible "stamp" one PDF page using another PDF page. In this example, a company logo will be stamped over each page:
76
71
  # company_logo = CombinePDF.new("company_logo.pdf").pages[0]
77
72
  # pdf = CombinePDF.new "content_file.pdf"
78
73
  # pdf.pages.each {|page| page << company_logo}
79
74
  # pdf.save "content_with_logo.pdf"
80
75
  # Notice the << operator is on a page and not a PDF object. The << operator acts differently on PDF objects and on Pages.
81
- #
82
- # The << operator defaults to secure injection by renaming references to avoid conflics.
83
- #
84
- # Less recommended, but available - for overlaying pages using compressed data that might not be editable (due to limited filter support), you can use:
85
- # pdf.pages(nil, false).each {|page| page << stamp_page}
86
- #
87
76
  # == Page Numbering
88
- # adding page numbers to a PDF object or file is as simple as can be:
77
+ # It is possible to number the pages. in this example we will add very simple numbering:
89
78
  # pdf = CombinePDF.new "file_to_number.pdf"
90
79
  # pdf.number_pages
91
80
  # pdf.save "file_with_numbering.pdf"
92
81
  #
93
- # numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values.
82
+ # numbering can be done with many different options, with different formating, with or without a box object, different locations on each page and even with opacity values.
94
83
  # == Writing Content
95
84
  # page numbering actually adds content using the PDFWriter object (a very basic writer).
96
85
  #
@@ -100,54 +89,30 @@ load "combine_pdf/combine_pdf_pdf.rb"
100
89
  # pdf.number_pages number_format: " - Draft, page %d - ", number_location: [:top], font_color: [0,0,1], box_color: [0.4,0,0], opacity: 0.75, font_size:16
101
90
  # pdf.save "draft.pdf"
102
91
  #
103
- # for demntration, it will now be coded the hard way, just so we can play more directly with some of the data.
92
+ # in this example we will add a first page with the word "Draft", in red over a colored background:
104
93
  #
105
- # pdf = CombinePDF.new "file_to_stamp.pdf"
106
- # ipage_number = 1
107
- # pdf.pages.each do |page|
108
- # # create a "stamp" PDF page with the same size as the target page
109
- # # we will do this because we will use this to center the box in the page
110
- # mediabox = page[:MediaBox]
111
- # # CombinePDF is pointer based...
112
- # # so you can add the stamp to the page and still continue to edit it's content!
113
- # stamp = PDFWriter.new mediabox
114
- # page << stamp
115
- # # set the visible dimensions to the CropBox, if it exists.
116
- # cropbox = page[:CropBox]
117
- # mediabox = cropbox if cropbox
118
- # # set stamp text
119
- # text = " Draft (page %d) " % page_number
120
- # # write the textbox
121
- # stamp.textbox text, x: mediabox[0]+30, y: mediabox[1]+30, width: mediabox[2]-mediabox[0]-60, height: mediabox[3]-mediabox[1]-60, font_color: [0,0,1], font_size: :fit_text, box_color: [0.4,0,0], opacity: 0.5
122
- # end
94
+ # pdf = CombinePDF.new "file.pdf"
95
+ # pdf_first_page = pdf.pages[0]
96
+ # mediabox = page[:CropBox] || page[:MediaBox] #copy page size
97
+ # title_page = CombinePDF.create_page mediabox #make title page same size as first page
98
+ # title_page.textbox "DRAFT", font_color: [0.8,0,0], font_size: :fit_text, box_color: [1,0.8,0.8], opacity: 1
99
+ # pdf >> title_page # the >> operator adds pages at the begining
123
100
  # pdf.save "draft.pdf"
124
101
  #
125
- #
126
- # font support for the writer is still in the works and is extreamly limited.
127
- # at the moment it is best to limit the fonts to the 14 standard latin fonts (no unicode).
102
+ # font support for the writer is still in the works and is limited to extracting know fonts by location of the 14 standard fonts.
128
103
  #
129
104
  # == Decryption & Filters
130
105
  #
131
- # Some PDF files are encrypted and some are compressed (the use of filters)...
132
- #
133
- # There is very little support for encrypted files and very very basic and limited support for compressed files.
134
- #
135
- # I need help with that.
136
- #
106
+ # Some PDF files are encrypted and some are compressed (the use of filters)... not all files can be opened, merged, stamped or used and stamps.
137
107
  # == Comments and file structure
138
108
  #
139
109
  # If you want to help with the code, please be aware:
140
110
  #
141
- # I'm a self learned hobbiest at heart. The documentation is lacking and the comments in the code are poor guidlines.
142
- #
143
111
  # The code itself should be very straight forward, but feel free to ask whatever you want.
144
112
  #
145
113
  # == Credit
146
114
  #
147
- # Caige Nichols wrote an amazing RC4 gem which I used in my code.
148
- #
149
- # I wanted to install the gem, but I had issues with the internet and ended up copying the code itself into the combine_pdf_decrypt class file.
150
- #
115
+ # Caige Nichols wrote an amazing RC4 gem which I reference in my code.
151
116
  # Credit to his wonderful is given here. Please respect his license and copyright... and mine.
152
117
  #
153
118
  # == License
@@ -158,11 +123,15 @@ module CombinePDF
158
123
 
159
124
  # Create an empty PDF object or create a PDF object from a file (parsing the file).
160
125
  # file_name:: is the name of a file to be parsed.
161
- def new(file_name = "")
162
- raise TypeError, "couldn't parse and data, expecting type String" unless file_name.is_a? String
126
+ def load(file_name = "")
127
+ raise TypeError, "couldn't parse and data, expecting type String" unless file_name.is_a?(String) || file_name.is_a?(Pathname)
163
128
  return PDF.new() if file_name == ''
164
129
  PDF.new( PDFParser.new( IO.read(file_name).force_encoding(Encoding::ASCII_8BIT) ) )
165
130
  end
131
+ def new(file_name = "")
132
+ load(file_name)
133
+ end
134
+
166
135
  # Create a PDF object from a raw PDF data (parsing the data).
167
136
  # data:: is a string that represents the content of a PDF file.
168
137
  def parse(data)
@@ -180,7 +149,7 @@ module CombinePDF
180
149
  # ::mediabox an Array representing the size of the PDF document. defaults to: [0.0, 0.0, 612.0, 792.0]
181
150
  #
182
151
  # if the page is PDFWriter object as a stamp, the final size will be that of the original page.
183
- def create_page(mediabox = [0.0, 0.0, 612.0, 792.0])
152
+ def create_page(mediabox = [0, 0, 595.3, 841.9])
184
153
  PDFWriter.new mediabox
185
154
  end
186
155
 
@@ -192,7 +161,7 @@ module CombinePDF
192
161
  # the main intended use of this method is to create indexes (a table of contents) for merged data.
193
162
  #
194
163
  # example:
195
- # pdf = CombinePDF.new_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
164
+ # pdf = CombinePDF.create_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
196
165
  # pdf.save "table_file.pdf"
197
166
  #
198
167
  # accepts a Hash with any of the following keys as well as any of the PDFWriter#textbox options:
@@ -204,16 +173,16 @@ module CombinePDF
204
173
  # column_widths:: an array of relative column widths ([1,2] will display only the first two columns, the second twice as big as the first). defaults to nil (even widths).
205
174
  # header_color:: the header color. defaults to [0.8, 0.8, 0.8] (light gray).
206
175
  # main_color:: main row color. defaults to nil (transparent / white).
207
- # alternate_color: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
208
- # font_color: font color. defaults to [0,0,0] (black).
209
- # border_color: border color. defaults to [0,0,0] (black).
210
- # border_width: border width in PDF units. defaults to 1.
211
- # header_align: the header text alignment within each column (:right, :left, :center). defaults to :center.
176
+ # alternate_color:: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
177
+ # font_color:: font color. defaults to [0,0,0] (black).
178
+ # border_color:: border color. defaults to [0,0,0] (black).
179
+ # border_width:: border width in PDF units. defaults to 1.
180
+ # header_align:: the header text alignment within each column (:right, :left, :center). defaults to :center.
212
181
  # row_align:: the row text alignment within each column. defaults to :left (:right for RTL table).
213
- # direction: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
214
- # rows_per_page: the number of rows per page, INCLUDING the header row. deafults to 25.
215
- # page_size: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
216
- def new_table (options = {})
182
+ # direction:: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
183
+ # rows_per_page:: the number of rows per page, INCLUDING the header row. deafults to 25.
184
+ # page_size:: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
185
+ def create_table(options = {})
217
186
  defaults = {
218
187
  headers: nil,
219
188
  table_data: [[]],
@@ -291,6 +260,9 @@ module CombinePDF
291
260
  end
292
261
  table
293
262
  end
263
+ def new_table(options = {})
264
+ create_table options
265
+ end
294
266
 
295
267
  # adds a correctly formatted font object to the font library.
296
268
  #
@@ -311,14 +283,21 @@ module CombinePDF
311
283
  #
312
284
  # returns the font on success or false on failure.
313
285
  #
286
+ # example:
287
+ # fonts = CombinePDF.new("japanese_fonts.pdf").fonts(true)
288
+ # CombinePDF.register_font_from_pdf_object :david, fonts[0]
289
+ #
314
290
  # VERY LIMITTED SUPPORT:
315
291
  # - at the moment it only imports Type0 fonts.
316
292
  # - also, to extract the Hash of the actual font object you were looking for, is not a trivial matter. I do it on the console.
317
293
  # font_name:: a Symbol with the name of the font registry. if the fonts exists in the library, it will be overwritten!
318
294
  # font_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
319
- def register_font_from_pdf_object font_name, font_object
295
+ def register_existing_font font_name, font_object
320
296
  Fonts.register_font_from_pdf_object font_name, font_object
321
297
  end
298
+ def register_font_from_pdf_object font_name, font_object
299
+ register_existing_font font_name, font_object
300
+ end
322
301
  end
323
302
 
324
303
 
@@ -10,19 +10,13 @@
10
10
 
11
11
  module CombinePDF
12
12
 
13
- #:nodoc: all
14
-
15
- # <b>not fully tested!</b>
16
- #
17
- # NO UNICODE SUPPORT!
13
+ # Limited Unicode Support (font dependent)!
18
14
  #
19
15
  # The PDFWriter class is a subclass of Hash and represents a PDF Page object.
20
16
  #
21
17
  # Writing on this Page is done using the textbox function.
22
18
  #
23
- # Setting the page dimensions can be either at the new or using the mediabox method.
24
- #
25
- # the rest of the methods are for internal use.
19
+ # Setting the page dimensions can be either at the new or using the mediabox method. New pages default to size A4, which is: [0, 0, 595.3, 841.9].
26
20
  #
27
21
  # Once the Page is completed (the last text box was added),
28
22
  # we can insert the page to a CombinePDF object.
@@ -41,7 +35,10 @@ module CombinePDF
41
35
  # pdf.save "stamped_file.pdf"
42
36
  class PDFWriter < Hash
43
37
 
44
- def initialize(mediabox = [0.0, 0.0, 612.0, 792.0])
38
+ # create a new PDFWriter object.
39
+ #
40
+ # mediabox:: the PDF page size in PDF points. defaults to [0, 0, 595.3, 841.9] (A4)
41
+ def initialize(mediabox = [0, 0, 595.3, 841.9])
45
42
  # indirect_reference_id, :indirect_generation_number
46
43
  @contents = ""
47
44
  @base_font_name = "Writer" + SecureRandom.urlsafe_base64(7) + "PDF"
@@ -13,8 +13,12 @@ module CombinePDF
13
13
  protected
14
14
 
15
15
  # @private
16
+ # @!visibility private
17
+
16
18
  # This is an internal class. you don't need it.
17
19
  class PDFDecrypt
20
+
21
+ # @!visibility private
18
22
 
19
23
  # make a new Decrypt object. requires:
20
24
  # objects:: an array containing the encrypted objects.
@@ -8,14 +8,17 @@
8
8
 
9
9
 
10
10
  module CombinePDF
11
- #@private
12
11
  #:nodoc: all
13
12
 
14
13
  protected
15
14
 
15
+ # @!visibility private
16
+
16
17
  # This is an internal class. you don't need it.
17
18
  module PDFFilter
18
19
  module_function
20
+
21
+ # @!visibility private
19
22
 
20
23
  # deflate / compress an object.
21
24
  #
@@ -13,6 +13,7 @@
13
13
  module CombinePDF
14
14
 
15
15
  #:nodoc: all
16
+ # @!visibility private
16
17
 
17
18
 
18
19
 
@@ -20,6 +21,8 @@ module CombinePDF
20
21
 
21
22
  protected
22
23
 
24
+ # @!visibility private
25
+
23
26
  # the internal class for the Fonts model
24
27
  #
25
28
  # this is an internal class, used by PDFWriter and PDF. you don't normally need to use this.
@@ -1,12 +1,11 @@
1
1
  module CombinePDF
2
2
 
3
- #:nodoc: all
4
3
  ################################################################
5
4
  ## These are common functions, used within the different classes
6
5
  ## These functions aren't open to the public.
7
6
  ################################################################
8
7
 
9
- #@private
8
+
10
9
  # lists the Hash keys used for PDF objects
11
10
  #
12
11
  # the CombinePDF library doesn't use special classes for its objects (PDFPage class, PDFStream class or anything like that).
@@ -16,15 +15,22 @@ module CombinePDF
16
15
  # this Hash lists the private Hash keys that the CombinePDF library uses to
17
16
  # differentiate between complex PDF objects.
18
17
  PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary]
19
- #@private
18
+
19
+ # @private
20
+ # @!visibility private
20
21
  #:nodoc: all
21
22
 
22
23
  protected
23
24
 
25
+ # @!visibility private
24
26
 
25
27
  # This is an internal class. you don't need it.
26
28
  module PDFOperations
29
+
27
30
  module_function
31
+
32
+ # @!visibility private
33
+
28
34
  def inject_to_page page = {Type: :Page, MediaBox: [0,0,612.0,792.0], Resources: {}, Contents: []}, stream = nil, top = true
29
35
  # make sure both the page reciving the new data and the injected page are of the correct data type.
30
36
  return false unless page.is_a?(Hash) && stream.is_a?(Hash)
@@ -11,7 +11,8 @@
11
11
  module CombinePDF
12
12
 
13
13
 
14
- #@private
14
+ # @!visibility private
15
+ # @private
15
16
  #:nodoc: all
16
17
 
17
18
  protected
@@ -25,6 +26,9 @@ module CombinePDF
25
26
  # This is an internal class. you don't need it.
26
27
  class PDFParser
27
28
 
29
+ # @!visibility private
30
+
31
+
28
32
  # the array containing all the parsed data (PDF Objects)
29
33
  attr_reader :parsed
30
34
  # a Float representing the PDF version of the data parsed (if exists).
@@ -254,6 +254,24 @@ module CombinePDF
254
254
  page_list
255
255
  end
256
256
 
257
+ # returns an array with the different fonts used in the file.
258
+ #
259
+ # Type0 font objects ( "font[:Subtype] == :Type0" ) can be registered with the font library
260
+ # for use in PDFWriter objects (font numbering / table creation etc').
261
+ # @param limit_to_type0 [true,false] limits the list to type0 fonts.
262
+ def fonts(limit_to_type0 = false)
263
+ fonts_array = []
264
+ pages.each do |p|
265
+ p[:Resources][:Font].values.each do |f|
266
+ f = f[:referenced_object] if f[:referenced_object]
267
+ if (limit_to_type0 || f[:Subtype] = :Type0) && f[:Type] == :Font && !fonts_array.include?(f)
268
+ fonts_array << f
269
+ end
270
+ end
271
+ end
272
+ fonts_array
273
+ end
274
+
257
275
  # add the pages (or file) to the PDF (combine/merge) and return the new pages array.
258
276
  # for example:
259
277
  #
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-09-14 00:00:00.000000000 Z
12
+ date: 2014-09-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ruby-rc4