combine_pdf 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/combine_pdf.rb +48 -69
- data/lib/combine_pdf/combine_pdf_basic_writer.rb +6 -9
- data/lib/combine_pdf/combine_pdf_decrypt.rb +4 -0
- data/lib/combine_pdf/combine_pdf_filter.rb +4 -1
- data/lib/combine_pdf/combine_pdf_fonts.rb +3 -0
- data/lib/combine_pdf/combine_pdf_operations.rb +9 -3
- data/lib/combine_pdf/combine_pdf_parser.rb +5 -1
- data/lib/combine_pdf/combine_pdf_pdf.rb +18 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b66122d4b265048987ad995f6d20e832d70e1e67
|
4
|
+
data.tar.gz: 06d4c4aa18793189a4b166eb535a089d71615768
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae6a99d83dcc63459094fc9c438520aaa409591aef52f4e491b03228cb9636b40c53de8c6b08a656a22ad2a54aa4d4e9e54a83ede7db26cefcb471bade392551
|
7
|
+
data.tar.gz: e4214c1f95fac5fee4c4617ad55d4fc1f704f52261ea0600b59cc89c083d473a89cff793ff1cbb6b7fd8d509ceec7b27f2a4bade651d7a29105e180b4bdaa7d5
|
data/lib/combine_pdf.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
+
# use under GPLv3 terms only
|
3
|
+
|
2
4
|
require 'zlib'
|
3
5
|
require 'securerandom'
|
4
6
|
require 'strscan'
|
@@ -32,7 +34,7 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
32
34
|
|
33
35
|
|
34
36
|
|
35
|
-
# This is a pure ruby library to combine/merge, stmap/overlay and number PDF files.
|
37
|
+
# This is a pure ruby library to combine/merge, stmap/overlay and number PDF files - as well as to create tables (ment for indexing combined files).
|
36
38
|
#
|
37
39
|
# You can also use this library for writing basic text content into new or existing PDF files (For authoring new PDF files look at the Prawn ruby library).
|
38
40
|
#
|
@@ -42,12 +44,10 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
42
44
|
# == Loading PDF data
|
43
45
|
# Loading PDF data can be done from file system or directly from the memory.
|
44
46
|
#
|
45
|
-
#
|
47
|
+
# Load data from a file:
|
46
48
|
# pdf = CombinePDF.new("file.pdf")
|
47
|
-
#
|
48
|
-
# pdf_data = IO.read 'file.pdf' # for this demo, load a file to memory
|
49
|
+
# parse PDF files from memory:
|
49
50
|
# pdf = CombinePDF.parse(pdf_data)
|
50
|
-
# Loading from the memory is especially effective for importing PDF data recieved through the internet or from a different authoring library such as Prawn.
|
51
51
|
#
|
52
52
|
# == Combine/Merge PDF files or Pages
|
53
53
|
# To combine PDF files (or data):
|
@@ -55,10 +55,8 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
55
55
|
# pdf << CombinePDF.new("file1.pdf")
|
56
56
|
# pdf << CombinePDF.new("file2.pdf")
|
57
57
|
# pdf.save "combined.pdf"
|
58
|
-
# as demonstrated above, these can be chained for into a one-liner.
|
59
|
-
#
|
60
|
-
# you can also choose to add only specific pages.
|
61
58
|
#
|
59
|
+
# It is possible to add only specific pages.
|
62
60
|
# in this example, only even pages will be added:
|
63
61
|
# pdf = CombinePDF.new
|
64
62
|
# i = 0
|
@@ -67,30 +65,21 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
67
65
|
# pdf << page if i.even?
|
68
66
|
# end
|
69
67
|
# pdf.save "even_pages.pdf"
|
70
|
-
#
|
68
|
+
# Notice that adding the whole file is faster then adding each page seperately.
|
71
69
|
# == Add content to existing pages (Stamp / Watermark)
|
72
|
-
#
|
73
|
-
# after that, add the content to each of the pages in your existing PDF.
|
74
|
-
#
|
75
|
-
# in this example, a company logo will be stamped over each page:
|
70
|
+
# It is possible "stamp" one PDF page using another PDF page. In this example, a company logo will be stamped over each page:
|
76
71
|
# company_logo = CombinePDF.new("company_logo.pdf").pages[0]
|
77
72
|
# pdf = CombinePDF.new "content_file.pdf"
|
78
73
|
# pdf.pages.each {|page| page << company_logo}
|
79
74
|
# pdf.save "content_with_logo.pdf"
|
80
75
|
# Notice the << operator is on a page and not a PDF object. The << operator acts differently on PDF objects and on Pages.
|
81
|
-
#
|
82
|
-
# The << operator defaults to secure injection by renaming references to avoid conflics.
|
83
|
-
#
|
84
|
-
# Less recommended, but available - for overlaying pages using compressed data that might not be editable (due to limited filter support), you can use:
|
85
|
-
# pdf.pages(nil, false).each {|page| page << stamp_page}
|
86
|
-
#
|
87
76
|
# == Page Numbering
|
88
|
-
#
|
77
|
+
# It is possible to number the pages. in this example we will add very simple numbering:
|
89
78
|
# pdf = CombinePDF.new "file_to_number.pdf"
|
90
79
|
# pdf.number_pages
|
91
80
|
# pdf.save "file_with_numbering.pdf"
|
92
81
|
#
|
93
|
-
# numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values.
|
82
|
+
# numbering can be done with many different options, with different formating, with or without a box object, different locations on each page and even with opacity values.
|
94
83
|
# == Writing Content
|
95
84
|
# page numbering actually adds content using the PDFWriter object (a very basic writer).
|
96
85
|
#
|
@@ -100,54 +89,30 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
100
89
|
# pdf.number_pages number_format: " - Draft, page %d - ", number_location: [:top], font_color: [0,0,1], box_color: [0.4,0,0], opacity: 0.75, font_size:16
|
101
90
|
# pdf.save "draft.pdf"
|
102
91
|
#
|
103
|
-
#
|
92
|
+
# in this example we will add a first page with the word "Draft", in red over a colored background:
|
104
93
|
#
|
105
|
-
# pdf = CombinePDF.new "
|
106
|
-
#
|
107
|
-
#
|
108
|
-
#
|
109
|
-
#
|
110
|
-
#
|
111
|
-
# # CombinePDF is pointer based...
|
112
|
-
# # so you can add the stamp to the page and still continue to edit it's content!
|
113
|
-
# stamp = PDFWriter.new mediabox
|
114
|
-
# page << stamp
|
115
|
-
# # set the visible dimensions to the CropBox, if it exists.
|
116
|
-
# cropbox = page[:CropBox]
|
117
|
-
# mediabox = cropbox if cropbox
|
118
|
-
# # set stamp text
|
119
|
-
# text = " Draft (page %d) " % page_number
|
120
|
-
# # write the textbox
|
121
|
-
# stamp.textbox text, x: mediabox[0]+30, y: mediabox[1]+30, width: mediabox[2]-mediabox[0]-60, height: mediabox[3]-mediabox[1]-60, font_color: [0,0,1], font_size: :fit_text, box_color: [0.4,0,0], opacity: 0.5
|
122
|
-
# end
|
94
|
+
# pdf = CombinePDF.new "file.pdf"
|
95
|
+
# pdf_first_page = pdf.pages[0]
|
96
|
+
# mediabox = page[:CropBox] || page[:MediaBox] #copy page size
|
97
|
+
# title_page = CombinePDF.create_page mediabox #make title page same size as first page
|
98
|
+
# title_page.textbox "DRAFT", font_color: [0.8,0,0], font_size: :fit_text, box_color: [1,0.8,0.8], opacity: 1
|
99
|
+
# pdf >> title_page # the >> operator adds pages at the begining
|
123
100
|
# pdf.save "draft.pdf"
|
124
101
|
#
|
125
|
-
#
|
126
|
-
# font support for the writer is still in the works and is extreamly limited.
|
127
|
-
# at the moment it is best to limit the fonts to the 14 standard latin fonts (no unicode).
|
102
|
+
# font support for the writer is still in the works and is limited to extracting know fonts by location of the 14 standard fonts.
|
128
103
|
#
|
129
104
|
# == Decryption & Filters
|
130
105
|
#
|
131
|
-
# Some PDF files are encrypted and some are compressed (the use of filters)...
|
132
|
-
#
|
133
|
-
# There is very little support for encrypted files and very very basic and limited support for compressed files.
|
134
|
-
#
|
135
|
-
# I need help with that.
|
136
|
-
#
|
106
|
+
# Some PDF files are encrypted and some are compressed (the use of filters)... not all files can be opened, merged, stamped or used and stamps.
|
137
107
|
# == Comments and file structure
|
138
108
|
#
|
139
109
|
# If you want to help with the code, please be aware:
|
140
110
|
#
|
141
|
-
# I'm a self learned hobbiest at heart. The documentation is lacking and the comments in the code are poor guidlines.
|
142
|
-
#
|
143
111
|
# The code itself should be very straight forward, but feel free to ask whatever you want.
|
144
112
|
#
|
145
113
|
# == Credit
|
146
114
|
#
|
147
|
-
# Caige Nichols wrote an amazing RC4 gem which I
|
148
|
-
#
|
149
|
-
# I wanted to install the gem, but I had issues with the internet and ended up copying the code itself into the combine_pdf_decrypt class file.
|
150
|
-
#
|
115
|
+
# Caige Nichols wrote an amazing RC4 gem which I reference in my code.
|
151
116
|
# Credit to his wonderful is given here. Please respect his license and copyright... and mine.
|
152
117
|
#
|
153
118
|
# == License
|
@@ -158,11 +123,15 @@ module CombinePDF
|
|
158
123
|
|
159
124
|
# Create an empty PDF object or create a PDF object from a file (parsing the file).
|
160
125
|
# file_name:: is the name of a file to be parsed.
|
161
|
-
def
|
162
|
-
raise TypeError, "couldn't parse and data, expecting type String" unless file_name.is_a?
|
126
|
+
def load(file_name = "")
|
127
|
+
raise TypeError, "couldn't parse and data, expecting type String" unless file_name.is_a?(String) || file_name.is_a?(Pathname)
|
163
128
|
return PDF.new() if file_name == ''
|
164
129
|
PDF.new( PDFParser.new( IO.read(file_name).force_encoding(Encoding::ASCII_8BIT) ) )
|
165
130
|
end
|
131
|
+
def new(file_name = "")
|
132
|
+
load(file_name)
|
133
|
+
end
|
134
|
+
|
166
135
|
# Create a PDF object from a raw PDF data (parsing the data).
|
167
136
|
# data:: is a string that represents the content of a PDF file.
|
168
137
|
def parse(data)
|
@@ -180,7 +149,7 @@ module CombinePDF
|
|
180
149
|
# ::mediabox an Array representing the size of the PDF document. defaults to: [0.0, 0.0, 612.0, 792.0]
|
181
150
|
#
|
182
151
|
# if the page is PDFWriter object as a stamp, the final size will be that of the original page.
|
183
|
-
def create_page(mediabox = [0
|
152
|
+
def create_page(mediabox = [0, 0, 595.3, 841.9])
|
184
153
|
PDFWriter.new mediabox
|
185
154
|
end
|
186
155
|
|
@@ -192,7 +161,7 @@ module CombinePDF
|
|
192
161
|
# the main intended use of this method is to create indexes (a table of contents) for merged data.
|
193
162
|
#
|
194
163
|
# example:
|
195
|
-
# pdf = CombinePDF.
|
164
|
+
# pdf = CombinePDF.create_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
|
196
165
|
# pdf.save "table_file.pdf"
|
197
166
|
#
|
198
167
|
# accepts a Hash with any of the following keys as well as any of the PDFWriter#textbox options:
|
@@ -204,16 +173,16 @@ module CombinePDF
|
|
204
173
|
# column_widths:: an array of relative column widths ([1,2] will display only the first two columns, the second twice as big as the first). defaults to nil (even widths).
|
205
174
|
# header_color:: the header color. defaults to [0.8, 0.8, 0.8] (light gray).
|
206
175
|
# main_color:: main row color. defaults to nil (transparent / white).
|
207
|
-
# alternate_color
|
208
|
-
# font_color
|
209
|
-
# border_color
|
210
|
-
# border_width
|
211
|
-
# header_align
|
176
|
+
# alternate_color:: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
|
177
|
+
# font_color:: font color. defaults to [0,0,0] (black).
|
178
|
+
# border_color:: border color. defaults to [0,0,0] (black).
|
179
|
+
# border_width:: border width in PDF units. defaults to 1.
|
180
|
+
# header_align:: the header text alignment within each column (:right, :left, :center). defaults to :center.
|
212
181
|
# row_align:: the row text alignment within each column. defaults to :left (:right for RTL table).
|
213
|
-
# direction
|
214
|
-
# rows_per_page
|
215
|
-
# page_size
|
216
|
-
def
|
182
|
+
# direction:: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
|
183
|
+
# rows_per_page:: the number of rows per page, INCLUDING the header row. deafults to 25.
|
184
|
+
# page_size:: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
|
185
|
+
def create_table(options = {})
|
217
186
|
defaults = {
|
218
187
|
headers: nil,
|
219
188
|
table_data: [[]],
|
@@ -291,6 +260,9 @@ module CombinePDF
|
|
291
260
|
end
|
292
261
|
table
|
293
262
|
end
|
263
|
+
def new_table(options = {})
|
264
|
+
create_table options
|
265
|
+
end
|
294
266
|
|
295
267
|
# adds a correctly formatted font object to the font library.
|
296
268
|
#
|
@@ -311,14 +283,21 @@ module CombinePDF
|
|
311
283
|
#
|
312
284
|
# returns the font on success or false on failure.
|
313
285
|
#
|
286
|
+
# example:
|
287
|
+
# fonts = CombinePDF.new("japanese_fonts.pdf").fonts(true)
|
288
|
+
# CombinePDF.register_font_from_pdf_object :david, fonts[0]
|
289
|
+
#
|
314
290
|
# VERY LIMITTED SUPPORT:
|
315
291
|
# - at the moment it only imports Type0 fonts.
|
316
292
|
# - also, to extract the Hash of the actual font object you were looking for, is not a trivial matter. I do it on the console.
|
317
293
|
# font_name:: a Symbol with the name of the font registry. if the fonts exists in the library, it will be overwritten!
|
318
294
|
# font_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
|
319
|
-
def
|
295
|
+
def register_existing_font font_name, font_object
|
320
296
|
Fonts.register_font_from_pdf_object font_name, font_object
|
321
297
|
end
|
298
|
+
def register_font_from_pdf_object font_name, font_object
|
299
|
+
register_existing_font font_name, font_object
|
300
|
+
end
|
322
301
|
end
|
323
302
|
|
324
303
|
|
@@ -10,19 +10,13 @@
|
|
10
10
|
|
11
11
|
module CombinePDF
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
# <b>not fully tested!</b>
|
16
|
-
#
|
17
|
-
# NO UNICODE SUPPORT!
|
13
|
+
# Limited Unicode Support (font dependent)!
|
18
14
|
#
|
19
15
|
# The PDFWriter class is a subclass of Hash and represents a PDF Page object.
|
20
16
|
#
|
21
17
|
# Writing on this Page is done using the textbox function.
|
22
18
|
#
|
23
|
-
# Setting the page dimensions can be either at the new or using the mediabox method.
|
24
|
-
#
|
25
|
-
# the rest of the methods are for internal use.
|
19
|
+
# Setting the page dimensions can be either at the new or using the mediabox method. New pages default to size A4, which is: [0, 0, 595.3, 841.9].
|
26
20
|
#
|
27
21
|
# Once the Page is completed (the last text box was added),
|
28
22
|
# we can insert the page to a CombinePDF object.
|
@@ -41,7 +35,10 @@ module CombinePDF
|
|
41
35
|
# pdf.save "stamped_file.pdf"
|
42
36
|
class PDFWriter < Hash
|
43
37
|
|
44
|
-
|
38
|
+
# create a new PDFWriter object.
|
39
|
+
#
|
40
|
+
# mediabox:: the PDF page size in PDF points. defaults to [0, 0, 595.3, 841.9] (A4)
|
41
|
+
def initialize(mediabox = [0, 0, 595.3, 841.9])
|
45
42
|
# indirect_reference_id, :indirect_generation_number
|
46
43
|
@contents = ""
|
47
44
|
@base_font_name = "Writer" + SecureRandom.urlsafe_base64(7) + "PDF"
|
@@ -13,8 +13,12 @@ module CombinePDF
|
|
13
13
|
protected
|
14
14
|
|
15
15
|
# @private
|
16
|
+
# @!visibility private
|
17
|
+
|
16
18
|
# This is an internal class. you don't need it.
|
17
19
|
class PDFDecrypt
|
20
|
+
|
21
|
+
# @!visibility private
|
18
22
|
|
19
23
|
# make a new Decrypt object. requires:
|
20
24
|
# objects:: an array containing the encrypted objects.
|
@@ -8,14 +8,17 @@
|
|
8
8
|
|
9
9
|
|
10
10
|
module CombinePDF
|
11
|
-
#@private
|
12
11
|
#:nodoc: all
|
13
12
|
|
14
13
|
protected
|
15
14
|
|
15
|
+
# @!visibility private
|
16
|
+
|
16
17
|
# This is an internal class. you don't need it.
|
17
18
|
module PDFFilter
|
18
19
|
module_function
|
20
|
+
|
21
|
+
# @!visibility private
|
19
22
|
|
20
23
|
# deflate / compress an object.
|
21
24
|
#
|
@@ -13,6 +13,7 @@
|
|
13
13
|
module CombinePDF
|
14
14
|
|
15
15
|
#:nodoc: all
|
16
|
+
# @!visibility private
|
16
17
|
|
17
18
|
|
18
19
|
|
@@ -20,6 +21,8 @@ module CombinePDF
|
|
20
21
|
|
21
22
|
protected
|
22
23
|
|
24
|
+
# @!visibility private
|
25
|
+
|
23
26
|
# the internal class for the Fonts model
|
24
27
|
#
|
25
28
|
# this is an internal class, used by PDFWriter and PDF. you don't normally need to use this.
|
@@ -1,12 +1,11 @@
|
|
1
1
|
module CombinePDF
|
2
2
|
|
3
|
-
#:nodoc: all
|
4
3
|
################################################################
|
5
4
|
## These are common functions, used within the different classes
|
6
5
|
## These functions aren't open to the public.
|
7
6
|
################################################################
|
8
7
|
|
9
|
-
|
8
|
+
|
10
9
|
# lists the Hash keys used for PDF objects
|
11
10
|
#
|
12
11
|
# the CombinePDF library doesn't use special classes for its objects (PDFPage class, PDFStream class or anything like that).
|
@@ -16,15 +15,22 @@ module CombinePDF
|
|
16
15
|
# this Hash lists the private Hash keys that the CombinePDF library uses to
|
17
16
|
# differentiate between complex PDF objects.
|
18
17
|
PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary]
|
19
|
-
|
18
|
+
|
19
|
+
# @private
|
20
|
+
# @!visibility private
|
20
21
|
#:nodoc: all
|
21
22
|
|
22
23
|
protected
|
23
24
|
|
25
|
+
# @!visibility private
|
24
26
|
|
25
27
|
# This is an internal class. you don't need it.
|
26
28
|
module PDFOperations
|
29
|
+
|
27
30
|
module_function
|
31
|
+
|
32
|
+
# @!visibility private
|
33
|
+
|
28
34
|
def inject_to_page page = {Type: :Page, MediaBox: [0,0,612.0,792.0], Resources: {}, Contents: []}, stream = nil, top = true
|
29
35
|
# make sure both the page reciving the new data and the injected page are of the correct data type.
|
30
36
|
return false unless page.is_a?(Hash) && stream.is_a?(Hash)
|
@@ -11,7 +11,8 @@
|
|
11
11
|
module CombinePDF
|
12
12
|
|
13
13
|
|
14
|
-
|
14
|
+
# @!visibility private
|
15
|
+
# @private
|
15
16
|
#:nodoc: all
|
16
17
|
|
17
18
|
protected
|
@@ -25,6 +26,9 @@ module CombinePDF
|
|
25
26
|
# This is an internal class. you don't need it.
|
26
27
|
class PDFParser
|
27
28
|
|
29
|
+
# @!visibility private
|
30
|
+
|
31
|
+
|
28
32
|
# the array containing all the parsed data (PDF Objects)
|
29
33
|
attr_reader :parsed
|
30
34
|
# a Float representing the PDF version of the data parsed (if exists).
|
@@ -254,6 +254,24 @@ module CombinePDF
|
|
254
254
|
page_list
|
255
255
|
end
|
256
256
|
|
257
|
+
# returns an array with the different fonts used in the file.
|
258
|
+
#
|
259
|
+
# Type0 font objects ( "font[:Subtype] == :Type0" ) can be registered with the font library
|
260
|
+
# for use in PDFWriter objects (font numbering / table creation etc').
|
261
|
+
# @param limit_to_type0 [true,false] limits the list to type0 fonts.
|
262
|
+
def fonts(limit_to_type0 = false)
|
263
|
+
fonts_array = []
|
264
|
+
pages.each do |p|
|
265
|
+
p[:Resources][:Font].values.each do |f|
|
266
|
+
f = f[:referenced_object] if f[:referenced_object]
|
267
|
+
if (limit_to_type0 || f[:Subtype] = :Type0) && f[:Type] == :Font && !fonts_array.include?(f)
|
268
|
+
fonts_array << f
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
fonts_array
|
273
|
+
end
|
274
|
+
|
257
275
|
# add the pages (or file) to the PDF (combine/merge) and return the new pages array.
|
258
276
|
# for example:
|
259
277
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-09-
|
12
|
+
date: 2014-09-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ruby-rc4
|