combine_pdf 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/combine_pdf.rb +48 -69
- data/lib/combine_pdf/combine_pdf_basic_writer.rb +6 -9
- data/lib/combine_pdf/combine_pdf_decrypt.rb +4 -0
- data/lib/combine_pdf/combine_pdf_filter.rb +4 -1
- data/lib/combine_pdf/combine_pdf_fonts.rb +3 -0
- data/lib/combine_pdf/combine_pdf_operations.rb +9 -3
- data/lib/combine_pdf/combine_pdf_parser.rb +5 -1
- data/lib/combine_pdf/combine_pdf_pdf.rb +18 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b66122d4b265048987ad995f6d20e832d70e1e67
|
4
|
+
data.tar.gz: 06d4c4aa18793189a4b166eb535a089d71615768
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae6a99d83dcc63459094fc9c438520aaa409591aef52f4e491b03228cb9636b40c53de8c6b08a656a22ad2a54aa4d4e9e54a83ede7db26cefcb471bade392551
|
7
|
+
data.tar.gz: e4214c1f95fac5fee4c4617ad55d4fc1f704f52261ea0600b59cc89c083d473a89cff793ff1cbb6b7fd8d509ceec7b27f2a4bade651d7a29105e180b4bdaa7d5
|
data/lib/combine_pdf.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
+
# use under GPLv3 terms only
|
3
|
+
|
2
4
|
require 'zlib'
|
3
5
|
require 'securerandom'
|
4
6
|
require 'strscan'
|
@@ -32,7 +34,7 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
32
34
|
|
33
35
|
|
34
36
|
|
35
|
-
# This is a pure ruby library to combine/merge, stmap/overlay and number PDF files.
|
37
|
+
# This is a pure ruby library to combine/merge, stmap/overlay and number PDF files - as well as to create tables (ment for indexing combined files).
|
36
38
|
#
|
37
39
|
# You can also use this library for writing basic text content into new or existing PDF files (For authoring new PDF files look at the Prawn ruby library).
|
38
40
|
#
|
@@ -42,12 +44,10 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
42
44
|
# == Loading PDF data
|
43
45
|
# Loading PDF data can be done from file system or directly from the memory.
|
44
46
|
#
|
45
|
-
#
|
47
|
+
# Load data from a file:
|
46
48
|
# pdf = CombinePDF.new("file.pdf")
|
47
|
-
#
|
48
|
-
# pdf_data = IO.read 'file.pdf' # for this demo, load a file to memory
|
49
|
+
# parse PDF files from memory:
|
49
50
|
# pdf = CombinePDF.parse(pdf_data)
|
50
|
-
# Loading from the memory is especially effective for importing PDF data recieved through the internet or from a different authoring library such as Prawn.
|
51
51
|
#
|
52
52
|
# == Combine/Merge PDF files or Pages
|
53
53
|
# To combine PDF files (or data):
|
@@ -55,10 +55,8 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
55
55
|
# pdf << CombinePDF.new("file1.pdf")
|
56
56
|
# pdf << CombinePDF.new("file2.pdf")
|
57
57
|
# pdf.save "combined.pdf"
|
58
|
-
# as demonstrated above, these can be chained for into a one-liner.
|
59
|
-
#
|
60
|
-
# you can also choose to add only specific pages.
|
61
58
|
#
|
59
|
+
# It is possible to add only specific pages.
|
62
60
|
# in this example, only even pages will be added:
|
63
61
|
# pdf = CombinePDF.new
|
64
62
|
# i = 0
|
@@ -67,30 +65,21 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
67
65
|
# pdf << page if i.even?
|
68
66
|
# end
|
69
67
|
# pdf.save "even_pages.pdf"
|
70
|
-
#
|
68
|
+
# Notice that adding the whole file is faster then adding each page seperately.
|
71
69
|
# == Add content to existing pages (Stamp / Watermark)
|
72
|
-
#
|
73
|
-
# after that, add the content to each of the pages in your existing PDF.
|
74
|
-
#
|
75
|
-
# in this example, a company logo will be stamped over each page:
|
70
|
+
# It is possible "stamp" one PDF page using another PDF page. In this example, a company logo will be stamped over each page:
|
76
71
|
# company_logo = CombinePDF.new("company_logo.pdf").pages[0]
|
77
72
|
# pdf = CombinePDF.new "content_file.pdf"
|
78
73
|
# pdf.pages.each {|page| page << company_logo}
|
79
74
|
# pdf.save "content_with_logo.pdf"
|
80
75
|
# Notice the << operator is on a page and not a PDF object. The << operator acts differently on PDF objects and on Pages.
|
81
|
-
#
|
82
|
-
# The << operator defaults to secure injection by renaming references to avoid conflics.
|
83
|
-
#
|
84
|
-
# Less recommended, but available - for overlaying pages using compressed data that might not be editable (due to limited filter support), you can use:
|
85
|
-
# pdf.pages(nil, false).each {|page| page << stamp_page}
|
86
|
-
#
|
87
76
|
# == Page Numbering
|
88
|
-
#
|
77
|
+
# It is possible to number the pages. in this example we will add very simple numbering:
|
89
78
|
# pdf = CombinePDF.new "file_to_number.pdf"
|
90
79
|
# pdf.number_pages
|
91
80
|
# pdf.save "file_with_numbering.pdf"
|
92
81
|
#
|
93
|
-
# numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values.
|
82
|
+
# numbering can be done with many different options, with different formating, with or without a box object, different locations on each page and even with opacity values.
|
94
83
|
# == Writing Content
|
95
84
|
# page numbering actually adds content using the PDFWriter object (a very basic writer).
|
96
85
|
#
|
@@ -100,54 +89,30 @@ load "combine_pdf/combine_pdf_pdf.rb"
|
|
100
89
|
# pdf.number_pages number_format: " - Draft, page %d - ", number_location: [:top], font_color: [0,0,1], box_color: [0.4,0,0], opacity: 0.75, font_size:16
|
101
90
|
# pdf.save "draft.pdf"
|
102
91
|
#
|
103
|
-
#
|
92
|
+
# in this example we will add a first page with the word "Draft", in red over a colored background:
|
104
93
|
#
|
105
|
-
# pdf = CombinePDF.new "
|
106
|
-
#
|
107
|
-
#
|
108
|
-
#
|
109
|
-
#
|
110
|
-
#
|
111
|
-
# # CombinePDF is pointer based...
|
112
|
-
# # so you can add the stamp to the page and still continue to edit it's content!
|
113
|
-
# stamp = PDFWriter.new mediabox
|
114
|
-
# page << stamp
|
115
|
-
# # set the visible dimensions to the CropBox, if it exists.
|
116
|
-
# cropbox = page[:CropBox]
|
117
|
-
# mediabox = cropbox if cropbox
|
118
|
-
# # set stamp text
|
119
|
-
# text = " Draft (page %d) " % page_number
|
120
|
-
# # write the textbox
|
121
|
-
# stamp.textbox text, x: mediabox[0]+30, y: mediabox[1]+30, width: mediabox[2]-mediabox[0]-60, height: mediabox[3]-mediabox[1]-60, font_color: [0,0,1], font_size: :fit_text, box_color: [0.4,0,0], opacity: 0.5
|
122
|
-
# end
|
94
|
+
# pdf = CombinePDF.new "file.pdf"
|
95
|
+
# pdf_first_page = pdf.pages[0]
|
96
|
+
# mediabox = page[:CropBox] || page[:MediaBox] #copy page size
|
97
|
+
# title_page = CombinePDF.create_page mediabox #make title page same size as first page
|
98
|
+
# title_page.textbox "DRAFT", font_color: [0.8,0,0], font_size: :fit_text, box_color: [1,0.8,0.8], opacity: 1
|
99
|
+
# pdf >> title_page # the >> operator adds pages at the begining
|
123
100
|
# pdf.save "draft.pdf"
|
124
101
|
#
|
125
|
-
#
|
126
|
-
# font support for the writer is still in the works and is extreamly limited.
|
127
|
-
# at the moment it is best to limit the fonts to the 14 standard latin fonts (no unicode).
|
102
|
+
# font support for the writer is still in the works and is limited to extracting know fonts by location of the 14 standard fonts.
|
128
103
|
#
|
129
104
|
# == Decryption & Filters
|
130
105
|
#
|
131
|
-
# Some PDF files are encrypted and some are compressed (the use of filters)...
|
132
|
-
#
|
133
|
-
# There is very little support for encrypted files and very very basic and limited support for compressed files.
|
134
|
-
#
|
135
|
-
# I need help with that.
|
136
|
-
#
|
106
|
+
# Some PDF files are encrypted and some are compressed (the use of filters)... not all files can be opened, merged, stamped or used and stamps.
|
137
107
|
# == Comments and file structure
|
138
108
|
#
|
139
109
|
# If you want to help with the code, please be aware:
|
140
110
|
#
|
141
|
-
# I'm a self learned hobbiest at heart. The documentation is lacking and the comments in the code are poor guidlines.
|
142
|
-
#
|
143
111
|
# The code itself should be very straight forward, but feel free to ask whatever you want.
|
144
112
|
#
|
145
113
|
# == Credit
|
146
114
|
#
|
147
|
-
# Caige Nichols wrote an amazing RC4 gem which I
|
148
|
-
#
|
149
|
-
# I wanted to install the gem, but I had issues with the internet and ended up copying the code itself into the combine_pdf_decrypt class file.
|
150
|
-
#
|
115
|
+
# Caige Nichols wrote an amazing RC4 gem which I reference in my code.
|
151
116
|
# Credit to his wonderful is given here. Please respect his license and copyright... and mine.
|
152
117
|
#
|
153
118
|
# == License
|
@@ -158,11 +123,15 @@ module CombinePDF
|
|
158
123
|
|
159
124
|
# Create an empty PDF object or create a PDF object from a file (parsing the file).
|
160
125
|
# file_name:: is the name of a file to be parsed.
|
161
|
-
def
|
162
|
-
raise TypeError, "couldn't parse and data, expecting type String" unless file_name.is_a?
|
126
|
+
def load(file_name = "")
|
127
|
+
raise TypeError, "couldn't parse and data, expecting type String" unless file_name.is_a?(String) || file_name.is_a?(Pathname)
|
163
128
|
return PDF.new() if file_name == ''
|
164
129
|
PDF.new( PDFParser.new( IO.read(file_name).force_encoding(Encoding::ASCII_8BIT) ) )
|
165
130
|
end
|
131
|
+
def new(file_name = "")
|
132
|
+
load(file_name)
|
133
|
+
end
|
134
|
+
|
166
135
|
# Create a PDF object from a raw PDF data (parsing the data).
|
167
136
|
# data:: is a string that represents the content of a PDF file.
|
168
137
|
def parse(data)
|
@@ -180,7 +149,7 @@ module CombinePDF
|
|
180
149
|
# ::mediabox an Array representing the size of the PDF document. defaults to: [0.0, 0.0, 612.0, 792.0]
|
181
150
|
#
|
182
151
|
# if the page is PDFWriter object as a stamp, the final size will be that of the original page.
|
183
|
-
def create_page(mediabox = [0
|
152
|
+
def create_page(mediabox = [0, 0, 595.3, 841.9])
|
184
153
|
PDFWriter.new mediabox
|
185
154
|
end
|
186
155
|
|
@@ -192,7 +161,7 @@ module CombinePDF
|
|
192
161
|
# the main intended use of this method is to create indexes (a table of contents) for merged data.
|
193
162
|
#
|
194
163
|
# example:
|
195
|
-
# pdf = CombinePDF.
|
164
|
+
# pdf = CombinePDF.create_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
|
196
165
|
# pdf.save "table_file.pdf"
|
197
166
|
#
|
198
167
|
# accepts a Hash with any of the following keys as well as any of the PDFWriter#textbox options:
|
@@ -204,16 +173,16 @@ module CombinePDF
|
|
204
173
|
# column_widths:: an array of relative column widths ([1,2] will display only the first two columns, the second twice as big as the first). defaults to nil (even widths).
|
205
174
|
# header_color:: the header color. defaults to [0.8, 0.8, 0.8] (light gray).
|
206
175
|
# main_color:: main row color. defaults to nil (transparent / white).
|
207
|
-
# alternate_color
|
208
|
-
# font_color
|
209
|
-
# border_color
|
210
|
-
# border_width
|
211
|
-
# header_align
|
176
|
+
# alternate_color:: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
|
177
|
+
# font_color:: font color. defaults to [0,0,0] (black).
|
178
|
+
# border_color:: border color. defaults to [0,0,0] (black).
|
179
|
+
# border_width:: border width in PDF units. defaults to 1.
|
180
|
+
# header_align:: the header text alignment within each column (:right, :left, :center). defaults to :center.
|
212
181
|
# row_align:: the row text alignment within each column. defaults to :left (:right for RTL table).
|
213
|
-
# direction
|
214
|
-
# rows_per_page
|
215
|
-
# page_size
|
216
|
-
def
|
182
|
+
# direction:: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
|
183
|
+
# rows_per_page:: the number of rows per page, INCLUDING the header row. deafults to 25.
|
184
|
+
# page_size:: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
|
185
|
+
def create_table(options = {})
|
217
186
|
defaults = {
|
218
187
|
headers: nil,
|
219
188
|
table_data: [[]],
|
@@ -291,6 +260,9 @@ module CombinePDF
|
|
291
260
|
end
|
292
261
|
table
|
293
262
|
end
|
263
|
+
def new_table(options = {})
|
264
|
+
create_table options
|
265
|
+
end
|
294
266
|
|
295
267
|
# adds a correctly formatted font object to the font library.
|
296
268
|
#
|
@@ -311,14 +283,21 @@ module CombinePDF
|
|
311
283
|
#
|
312
284
|
# returns the font on success or false on failure.
|
313
285
|
#
|
286
|
+
# example:
|
287
|
+
# fonts = CombinePDF.new("japanese_fonts.pdf").fonts(true)
|
288
|
+
# CombinePDF.register_font_from_pdf_object :david, fonts[0]
|
289
|
+
#
|
314
290
|
# VERY LIMITTED SUPPORT:
|
315
291
|
# - at the moment it only imports Type0 fonts.
|
316
292
|
# - also, to extract the Hash of the actual font object you were looking for, is not a trivial matter. I do it on the console.
|
317
293
|
# font_name:: a Symbol with the name of the font registry. if the fonts exists in the library, it will be overwritten!
|
318
294
|
# font_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
|
319
|
-
def
|
295
|
+
def register_existing_font font_name, font_object
|
320
296
|
Fonts.register_font_from_pdf_object font_name, font_object
|
321
297
|
end
|
298
|
+
def register_font_from_pdf_object font_name, font_object
|
299
|
+
register_existing_font font_name, font_object
|
300
|
+
end
|
322
301
|
end
|
323
302
|
|
324
303
|
|
@@ -10,19 +10,13 @@
|
|
10
10
|
|
11
11
|
module CombinePDF
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
# <b>not fully tested!</b>
|
16
|
-
#
|
17
|
-
# NO UNICODE SUPPORT!
|
13
|
+
# Limited Unicode Support (font dependent)!
|
18
14
|
#
|
19
15
|
# The PDFWriter class is a subclass of Hash and represents a PDF Page object.
|
20
16
|
#
|
21
17
|
# Writing on this Page is done using the textbox function.
|
22
18
|
#
|
23
|
-
# Setting the page dimensions can be either at the new or using the mediabox method.
|
24
|
-
#
|
25
|
-
# the rest of the methods are for internal use.
|
19
|
+
# Setting the page dimensions can be either at the new or using the mediabox method. New pages default to size A4, which is: [0, 0, 595.3, 841.9].
|
26
20
|
#
|
27
21
|
# Once the Page is completed (the last text box was added),
|
28
22
|
# we can insert the page to a CombinePDF object.
|
@@ -41,7 +35,10 @@ module CombinePDF
|
|
41
35
|
# pdf.save "stamped_file.pdf"
|
42
36
|
class PDFWriter < Hash
|
43
37
|
|
44
|
-
|
38
|
+
# create a new PDFWriter object.
|
39
|
+
#
|
40
|
+
# mediabox:: the PDF page size in PDF points. defaults to [0, 0, 595.3, 841.9] (A4)
|
41
|
+
def initialize(mediabox = [0, 0, 595.3, 841.9])
|
45
42
|
# indirect_reference_id, :indirect_generation_number
|
46
43
|
@contents = ""
|
47
44
|
@base_font_name = "Writer" + SecureRandom.urlsafe_base64(7) + "PDF"
|
@@ -13,8 +13,12 @@ module CombinePDF
|
|
13
13
|
protected
|
14
14
|
|
15
15
|
# @private
|
16
|
+
# @!visibility private
|
17
|
+
|
16
18
|
# This is an internal class. you don't need it.
|
17
19
|
class PDFDecrypt
|
20
|
+
|
21
|
+
# @!visibility private
|
18
22
|
|
19
23
|
# make a new Decrypt object. requires:
|
20
24
|
# objects:: an array containing the encrypted objects.
|
@@ -8,14 +8,17 @@
|
|
8
8
|
|
9
9
|
|
10
10
|
module CombinePDF
|
11
|
-
#@private
|
12
11
|
#:nodoc: all
|
13
12
|
|
14
13
|
protected
|
15
14
|
|
15
|
+
# @!visibility private
|
16
|
+
|
16
17
|
# This is an internal class. you don't need it.
|
17
18
|
module PDFFilter
|
18
19
|
module_function
|
20
|
+
|
21
|
+
# @!visibility private
|
19
22
|
|
20
23
|
# deflate / compress an object.
|
21
24
|
#
|
@@ -13,6 +13,7 @@
|
|
13
13
|
module CombinePDF
|
14
14
|
|
15
15
|
#:nodoc: all
|
16
|
+
# @!visibility private
|
16
17
|
|
17
18
|
|
18
19
|
|
@@ -20,6 +21,8 @@ module CombinePDF
|
|
20
21
|
|
21
22
|
protected
|
22
23
|
|
24
|
+
# @!visibility private
|
25
|
+
|
23
26
|
# the internal class for the Fonts model
|
24
27
|
#
|
25
28
|
# this is an internal class, used by PDFWriter and PDF. you don't normally need to use this.
|
@@ -1,12 +1,11 @@
|
|
1
1
|
module CombinePDF
|
2
2
|
|
3
|
-
#:nodoc: all
|
4
3
|
################################################################
|
5
4
|
## These are common functions, used within the different classes
|
6
5
|
## These functions aren't open to the public.
|
7
6
|
################################################################
|
8
7
|
|
9
|
-
|
8
|
+
|
10
9
|
# lists the Hash keys used for PDF objects
|
11
10
|
#
|
12
11
|
# the CombinePDF library doesn't use special classes for its objects (PDFPage class, PDFStream class or anything like that).
|
@@ -16,15 +15,22 @@ module CombinePDF
|
|
16
15
|
# this Hash lists the private Hash keys that the CombinePDF library uses to
|
17
16
|
# differentiate between complex PDF objects.
|
18
17
|
PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary]
|
19
|
-
|
18
|
+
|
19
|
+
# @private
|
20
|
+
# @!visibility private
|
20
21
|
#:nodoc: all
|
21
22
|
|
22
23
|
protected
|
23
24
|
|
25
|
+
# @!visibility private
|
24
26
|
|
25
27
|
# This is an internal class. you don't need it.
|
26
28
|
module PDFOperations
|
29
|
+
|
27
30
|
module_function
|
31
|
+
|
32
|
+
# @!visibility private
|
33
|
+
|
28
34
|
def inject_to_page page = {Type: :Page, MediaBox: [0,0,612.0,792.0], Resources: {}, Contents: []}, stream = nil, top = true
|
29
35
|
# make sure both the page reciving the new data and the injected page are of the correct data type.
|
30
36
|
return false unless page.is_a?(Hash) && stream.is_a?(Hash)
|
@@ -11,7 +11,8 @@
|
|
11
11
|
module CombinePDF
|
12
12
|
|
13
13
|
|
14
|
-
|
14
|
+
# @!visibility private
|
15
|
+
# @private
|
15
16
|
#:nodoc: all
|
16
17
|
|
17
18
|
protected
|
@@ -25,6 +26,9 @@ module CombinePDF
|
|
25
26
|
# This is an internal class. you don't need it.
|
26
27
|
class PDFParser
|
27
28
|
|
29
|
+
# @!visibility private
|
30
|
+
|
31
|
+
|
28
32
|
# the array containing all the parsed data (PDF Objects)
|
29
33
|
attr_reader :parsed
|
30
34
|
# a Float representing the PDF version of the data parsed (if exists).
|
@@ -254,6 +254,24 @@ module CombinePDF
|
|
254
254
|
page_list
|
255
255
|
end
|
256
256
|
|
257
|
+
# returns an array with the different fonts used in the file.
|
258
|
+
#
|
259
|
+
# Type0 font objects ( "font[:Subtype] == :Type0" ) can be registered with the font library
|
260
|
+
# for use in PDFWriter objects (font numbering / table creation etc').
|
261
|
+
# @param limit_to_type0 [true,false] limits the list to type0 fonts.
|
262
|
+
def fonts(limit_to_type0 = false)
|
263
|
+
fonts_array = []
|
264
|
+
pages.each do |p|
|
265
|
+
p[:Resources][:Font].values.each do |f|
|
266
|
+
f = f[:referenced_object] if f[:referenced_object]
|
267
|
+
if (limit_to_type0 || f[:Subtype] = :Type0) && f[:Type] == :Font && !fonts_array.include?(f)
|
268
|
+
fonts_array << f
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
fonts_array
|
273
|
+
end
|
274
|
+
|
257
275
|
# add the pages (or file) to the PDF (combine/merge) and return the new pages array.
|
258
276
|
# for example:
|
259
277
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-09-
|
12
|
+
date: 2014-09-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: ruby-rc4
|