combine_pdf 0.2.5 → 0.2.37
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +273 -27
- data/LICENSE.txt +2 -1
- data/README.md +69 -4
- data/lib/combine_pdf/api.rb +156 -153
- data/lib/combine_pdf/basic_writer.rb +41 -53
- data/lib/combine_pdf/decrypt.rb +238 -228
- data/lib/combine_pdf/exceptions.rb +4 -0
- data/lib/combine_pdf/filter.rb +79 -85
- data/lib/combine_pdf/fonts.rb +451 -462
- data/lib/combine_pdf/page_methods.rb +891 -946
- data/lib/combine_pdf/parser.rb +663 -531
- data/lib/combine_pdf/pdf_protected.rb +341 -126
- data/lib/combine_pdf/pdf_public.rb +492 -454
- data/lib/combine_pdf/renderer.rb +146 -141
- data/lib/combine_pdf/version.rb +1 -2
- data/lib/combine_pdf.rb +14 -18
- data/test/automated +132 -0
- data/test/console +4 -4
- data/test/named_dest +84 -0
- metadata +8 -5
- data/lib/combine_pdf/operations.rb +0 -416
data/lib/combine_pdf/api.rb
CHANGED
@@ -1,167 +1,170 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
2
|
|
3
|
+
module CombinePDF
|
4
|
+
module_function
|
3
5
|
|
6
|
+
# Create an empty PDF object or create a PDF object from a file (parsing the file).
|
7
|
+
# file_name:: is the name of a file to be parsed.
|
8
|
+
def load(file_name = '', options = {})
|
9
|
+
raise TypeError, "couldn't parse data, expecting type String" unless file_name.is_a?(String) || file_name.is_a?(Pathname)
|
10
|
+
return PDF.new if file_name == ''
|
11
|
+
PDF.new(PDFParser.new(IO.read(file_name, mode: 'rb').force_encoding(Encoding::ASCII_8BIT), options))
|
12
|
+
end
|
4
13
|
|
14
|
+
# creats a new PDF object.
|
15
|
+
#
|
16
|
+
# Combine PDF will check to see if `string` is a filename.
|
17
|
+
# If it's a file name, it will attempt to load the PDF file using `CombinePDF.load`. Otherwise it will attempt parsing `string` using `CombinePDF.parse`.
|
18
|
+
#
|
19
|
+
# If the string is empty it will return a new PDF object (the same as parse).
|
20
|
+
#
|
21
|
+
# For both performance and code readability reasons, `CombinePDF.load` and `CombinePDF.parse` should be preffered unless creating a new PDF object.
|
22
|
+
def new(string = false)
|
23
|
+
return PDF.new unless string
|
24
|
+
raise TypeError, "couldn't create PDF object, expecting type String" unless string.is_a?(String) || string.is_a?(Pathname)
|
25
|
+
begin
|
26
|
+
(begin
|
27
|
+
File.file? string
|
28
|
+
rescue
|
29
|
+
false
|
30
|
+
end) ? load(string) : parse(string)
|
31
|
+
rescue => e
|
32
|
+
raise 'General PDF error - Use CombinePDF.load or CombinePDF.parse for a non-general error message (the requested file was not found OR the string received is not a valid PDF stream OR the file was found but not valid).'
|
33
|
+
end
|
34
|
+
end
|
5
35
|
|
36
|
+
# Create a PDF object from a raw PDF data (parsing the data).
|
37
|
+
# data:: is a string that represents the content of a PDF file.
|
38
|
+
def parse(data, options = {})
|
39
|
+
raise TypeError, "couldn't parse and data, expecting type String" unless data.is_a? String
|
40
|
+
PDF.new(PDFParser.new(data, options))
|
41
|
+
end
|
6
42
|
|
7
|
-
|
8
|
-
|
43
|
+
# makes a PDFWriter object
|
44
|
+
#
|
45
|
+
# PDFWriter objects reresent an empty page and have the method "textbox"
|
46
|
+
# that adds content to that page.
|
47
|
+
#
|
48
|
+
# PDFWriter objects are used internally for numbering pages (by creating a PDF page
|
49
|
+
# with the page number and "stamping" it over the existing page).
|
50
|
+
#
|
51
|
+
# ::mediabox an Array representing the size of the PDF document. defaults to: [0.0, 0.0, 612.0, 792.0] (US Letter)
|
52
|
+
#
|
53
|
+
# if the page is PDFWriter object as a stamp, the final size will be that of the original page.
|
54
|
+
def create_page(mediabox = [0, 0, 612.0, 792.0])
|
55
|
+
PDFWriter.new mediabox
|
56
|
+
end
|
9
57
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
58
|
+
# makes a PDF object containing a table
|
59
|
+
#
|
60
|
+
# all the pages in this PDF object are PDFWriter objects and are
|
61
|
+
# writable using the texbox function (should you wish to add a title, or more info)
|
62
|
+
#
|
63
|
+
# the main intended use of this method is to create indexes (a table of contents) for merged data.
|
64
|
+
#
|
65
|
+
# example:
|
66
|
+
# pdf = CombinePDF.create_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
|
67
|
+
# pdf.save "table_file.pdf"
|
68
|
+
#
|
69
|
+
# accepts a Hash with any of the following keys as well as any of the Page_Methods#textbox options:
|
70
|
+
# headers:: an Array of strings with the headers (will be repeated every page).
|
71
|
+
# table_data:: as Array of Arrays, each containing a string for each column. the first row sets the number of columns. extra columns will be ignored.
|
72
|
+
# font:: a registered or standard font name (see Page_Methods). defaults to nil (:Helvetica).
|
73
|
+
# header_font:: a registered or standard font name for the headers (see Page_Methods). defaults to nil (the font for all the table rows).
|
74
|
+
# max_font_size:: the maximum font size. if the string doesn't fit, it will be resized. defaults to 14.
|
75
|
+
# column_widths:: an array of relative column widths ([1,2] will display only the first two columns, the second twice as big as the first). defaults to nil (even widths).
|
76
|
+
# header_color:: the header color. defaults to [0.8, 0.8, 0.8] (light gray).
|
77
|
+
# main_color:: main row color. defaults to nil (transparent / white).
|
78
|
+
# alternate_color:: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
|
79
|
+
# font_color:: font color. defaults to [0,0,0] (black).
|
80
|
+
# border_color:: border color. defaults to [0,0,0] (black).
|
81
|
+
# border_width:: border width in PDF units. defaults to 1.
|
82
|
+
# header_align:: the header text alignment within each column (:right, :left, :center). defaults to :center.
|
83
|
+
# row_align:: the row text alignment within each column. defaults to :left (:right for RTL table).
|
84
|
+
# direction:: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
|
85
|
+
# max_rows:: the number of rows per page, INCLUDING the header row. deafults to 25.
|
86
|
+
# page_size:: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
|
87
|
+
def create_table(options = {})
|
88
|
+
options[:max_rows] = options[:rows_per_page] if options[:rows_per_page]
|
35
89
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
#
|
47
|
-
# PDFWriter objects are used internally for numbering pages (by creating a PDF page
|
48
|
-
# with the page number and "stamping" it over the existing page).
|
49
|
-
#
|
50
|
-
# ::mediabox an Array representing the size of the PDF document. defaults to: [0.0, 0.0, 612.0, 792.0] (US Letter)
|
51
|
-
#
|
52
|
-
# if the page is PDFWriter object as a stamp, the final size will be that of the original page.
|
53
|
-
def create_page(mediabox = [0, 0, 612.0, 792.0])
|
54
|
-
PDFWriter.new mediabox
|
55
|
-
end
|
90
|
+
page_size = options[:page_size] || [0, 0, 595.3, 841.9]
|
91
|
+
table = PDF.new
|
92
|
+
page = nil
|
93
|
+
until options[:table_data].empty?
|
94
|
+
page = create_page page_size
|
95
|
+
page.write_table options
|
96
|
+
table << page
|
97
|
+
end
|
98
|
+
table
|
99
|
+
end
|
56
100
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# writable using the texbox function (should you wish to add a title, or more info)
|
61
|
-
#
|
62
|
-
# the main intended use of this method is to create indexes (a table of contents) for merged data.
|
63
|
-
#
|
64
|
-
# example:
|
65
|
-
# pdf = CombinePDF.create_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
|
66
|
-
# pdf.save "table_file.pdf"
|
67
|
-
#
|
68
|
-
# accepts a Hash with any of the following keys as well as any of the Page_Methods#textbox options:
|
69
|
-
# headers:: an Array of strings with the headers (will be repeated every page).
|
70
|
-
# table_data:: as Array of Arrays, each containing a string for each column. the first row sets the number of columns. extra columns will be ignored.
|
71
|
-
# font:: a registered or standard font name (see Page_Methods). defaults to nil (:Helvetica).
|
72
|
-
# header_font:: a registered or standard font name for the headers (see Page_Methods). defaults to nil (the font for all the table rows).
|
73
|
-
# max_font_size:: the maximum font size. if the string doesn't fit, it will be resized. defaults to 14.
|
74
|
-
# column_widths:: an array of relative column widths ([1,2] will display only the first two columns, the second twice as big as the first). defaults to nil (even widths).
|
75
|
-
# header_color:: the header color. defaults to [0.8, 0.8, 0.8] (light gray).
|
76
|
-
# main_color:: main row color. defaults to nil (transparent / white).
|
77
|
-
# alternate_color:: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
|
78
|
-
# font_color:: font color. defaults to [0,0,0] (black).
|
79
|
-
# border_color:: border color. defaults to [0,0,0] (black).
|
80
|
-
# border_width:: border width in PDF units. defaults to 1.
|
81
|
-
# header_align:: the header text alignment within each column (:right, :left, :center). defaults to :center.
|
82
|
-
# row_align:: the row text alignment within each column. defaults to :left (:right for RTL table).
|
83
|
-
# direction:: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
|
84
|
-
# max_rows:: the number of rows per page, INCLUDING the header row. deafults to 25.
|
85
|
-
# page_size:: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
|
86
|
-
def create_table(options = {})
|
87
|
-
options[:max_rows] = options[:rows_per_page] if options[:rows_per_page]
|
101
|
+
def new_table(options = {})
|
102
|
+
create_table options
|
103
|
+
end
|
88
104
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
105
|
+
# calculate a CTM value for a specific transformation.
|
106
|
+
#
|
107
|
+
# this could be used to apply transformation in #textbox and to convert visual
|
108
|
+
# rotation values into actual rotation transformation.
|
109
|
+
#
|
110
|
+
# this method accepts a Hash containing any of the following parameters:
|
111
|
+
#
|
112
|
+
# deg:: the clockwise rotation to be applied, in degrees
|
113
|
+
# tx:: the x translation to be applied.
|
114
|
+
# ty:: the y translation to be applied.
|
115
|
+
# sx:: the x scaling to be applied.
|
116
|
+
# sy:: the y scaling to be applied.
|
117
|
+
#
|
118
|
+
# * scaling will be applied after the transformation is applied.
|
119
|
+
#
|
120
|
+
def calc_ctm(parameters)
|
121
|
+
p = { deg: 0, tx: 0, ty: 0, sx: 1, sy: 1 }.merge parameters
|
122
|
+
r = p[:deg] * Math::PI / 180
|
123
|
+
s = Math.sin(r)
|
124
|
+
c = Math.cos(r)
|
125
|
+
# start with tranlation matrix
|
126
|
+
m = Matrix[[1, 0, 0], [0, 1, 0], [p[:tx], p[:ty], 1]]
|
127
|
+
# then rotate
|
128
|
+
m *= Matrix[[c, s, 0], [-s, c, 0], [0, 0, 1]] if parameters[:deg]
|
129
|
+
# then scale
|
130
|
+
m *= Matrix[[p[:sx], 0, 0], [0, p[:sy], 0], [0, 0, 1]] if parameters[:sx] || parameters[:sy]
|
131
|
+
# flaten array and round to 6 digits
|
132
|
+
m.to_a.flatten.values_at(0, 1, 3, 4, 6, 7).map! { |f| f.round 6 }
|
133
|
+
end
|
102
134
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
#
|
118
|
-
def calc_ctm parameters
|
119
|
-
p = {deg: 0, tx: 0, ty: 0, sx: 1, sy: 1}.merge parameters
|
120
|
-
r = p[:deg] * Math::PI / 180
|
121
|
-
s = Math.sin(r)
|
122
|
-
c = Math.cos(r)
|
123
|
-
# start with tranlation matrix
|
124
|
-
m = Matrix[ [1,0,0], [0,1,0], [ p[:tx], p[:ty], 1] ]
|
125
|
-
# then rotate
|
126
|
-
m = m * Matrix[ [c, s, 0], [-s, c, 0], [0, 0, 1]] if parameters[:deg]
|
127
|
-
# then scale
|
128
|
-
m = m * Matrix[ [p[:sx], 0, 0], [0, p[:sy], 0], [0,0,1] ] if parameters[:sx] || parameters[:sy]
|
129
|
-
# flaten array and round to 6 digits
|
130
|
-
m.to_a.flatten.values_at(0,1,3,4,6,7).map! {|f| f.round 6}
|
131
|
-
end
|
135
|
+
# adds a correctly formatted font object to the font library.
|
136
|
+
#
|
137
|
+
# registered fonts will remain in the library and will only be embeded in
|
138
|
+
# PDF objects when they are used by PDFWriter objects (for example, for numbering pages).
|
139
|
+
#
|
140
|
+
# this function enables plug-ins to expend the font functionality of CombinePDF.
|
141
|
+
#
|
142
|
+
# font_name:: a Symbol with the name of the font. if the fonts exists in the library, it will be overwritten!
|
143
|
+
# font_metrics:: a Hash of font metrics, of the format char => {wx: char_width, boundingbox: [left_x, buttom_y, right_x, top_y]} where char == character itself (i.e. " " for space). The Hash should contain a special value :missing for the metrics of missing characters. an optional :wy might be supported in the future, for up to down fonts.
|
144
|
+
# font_pdf_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
|
145
|
+
# font_cmap:: a CMap dictionary Hash) which maps unicode characters to the hex CID for the font (i.e. {"a" => "61", "z" => "7a" }).
|
146
|
+
def register_font(font_name, font_metrics, font_pdf_object, font_cmap = nil)
|
147
|
+
Fonts.register_font font_name, font_metrics, font_pdf_object, font_cmap
|
148
|
+
end
|
132
149
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
150
|
+
# adds an existing font (from any PDF Object) to the font library.
|
151
|
+
#
|
152
|
+
# returns the font on success or false on failure.
|
153
|
+
#
|
154
|
+
# example:
|
155
|
+
# fonts = CombinePDF.new("japanese_fonts.pdf").fonts(true)
|
156
|
+
# CombinePDF.register_font_from_pdf_object :david, fonts[0]
|
157
|
+
#
|
158
|
+
# VERY LIMITTED SUPPORT:
|
159
|
+
# - at the moment it only imports Type0 fonts.
|
160
|
+
# - also, to extract the Hash of the actual font object you were looking for, is not a trivial matter. I do it on the console.
|
161
|
+
# font_name:: a Symbol with the name of the font registry. if the fonts exists in the library, it will be overwritten!
|
162
|
+
# font_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
|
163
|
+
def register_existing_font(font_name, font_object)
|
164
|
+
Fonts.register_font_from_pdf_object font_name, font_object
|
165
|
+
end
|
147
166
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
#
|
152
|
-
# example:
|
153
|
-
# fonts = CombinePDF.new("japanese_fonts.pdf").fonts(true)
|
154
|
-
# CombinePDF.register_font_from_pdf_object :david, fonts[0]
|
155
|
-
#
|
156
|
-
# VERY LIMITTED SUPPORT:
|
157
|
-
# - at the moment it only imports Type0 fonts.
|
158
|
-
# - also, to extract the Hash of the actual font object you were looking for, is not a trivial matter. I do it on the console.
|
159
|
-
# font_name:: a Symbol with the name of the font registry. if the fonts exists in the library, it will be overwritten!
|
160
|
-
# font_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
|
161
|
-
def register_existing_font font_name, font_object
|
162
|
-
Fonts.register_font_from_pdf_object font_name, font_object
|
163
|
-
end
|
164
|
-
def register_font_from_pdf_object font_name, font_object
|
165
|
-
register_existing_font font_name, font_object
|
166
|
-
end
|
167
|
+
def register_font_from_pdf_object(font_name, font_object)
|
168
|
+
register_existing_font font_name, font_object
|
169
|
+
end
|
167
170
|
end
|
@@ -5,58 +5,46 @@
|
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
8
|
module CombinePDF
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
include Page_Methods
|
54
|
-
|
55
|
-
end
|
56
|
-
|
9
|
+
# Limited Unicode Support (font dependent)!
|
10
|
+
#
|
11
|
+
# The PDFWriter class is a subclass of Hash and represents a PDF Page object.
|
12
|
+
#
|
13
|
+
# Writing on this Page is done using the textbox function.
|
14
|
+
#
|
15
|
+
# Setting the page dimensions can be either at the new or using the mediabox method. New pages default to size A4, which is: [0, 0, 595.3, 841.9].
|
16
|
+
#
|
17
|
+
# Once the Page is completed (the last text box was added),
|
18
|
+
# we can insert the page to a CombinePDF object.
|
19
|
+
#
|
20
|
+
# We can either insert the PDFWriter as a new page:
|
21
|
+
# pdf = CombinePDF.new
|
22
|
+
# new_page = CombinePDF.create_page # => PDFWriter object
|
23
|
+
# new_page.textbox "some text"
|
24
|
+
# pdf << new_page
|
25
|
+
# pdf.save "file_with_new_page.pdf"
|
26
|
+
#
|
27
|
+
# Or we can use the Page_Methods methods to write an overlay (stamp / watermark) over existing pages:
|
28
|
+
# pdf = CombinePDF.new
|
29
|
+
# new_page = PDFWriter.new "some_file.pdf"
|
30
|
+
# pdf.pages.each {|page| page.textbox "Draft", opacity: 0.4 }
|
31
|
+
# pdf.save "stamped_file.pdf"
|
32
|
+
class PDFWriter < Hash
|
33
|
+
# create a new PDFWriter object.
|
34
|
+
#
|
35
|
+
# mediabox:: the PDF page size in PDF points. defaults to [0, 0, 612.0, 792.0] (US Letter)
|
36
|
+
def initialize(mediabox = [0, 0, 612.0, 792.0])
|
37
|
+
# indirect_reference_id, :indirect_generation_number
|
38
|
+
@contents = ''
|
39
|
+
@base_font_name = 'Writer' + SecureRandom.hex(7) + 'PDF'
|
40
|
+
self[:Type] = :Page
|
41
|
+
self[:indirect_reference_id] = 0
|
42
|
+
self[:Resources] = {}
|
43
|
+
self[:Contents] = { is_reference_only: true, referenced_object: { indirect_reference_id: 0, raw_stream_content: @contents } }
|
44
|
+
self[:MediaBox] = mediabox
|
45
|
+
end
|
46
|
+
|
47
|
+
# includes the PDF Page_Methods module, including all page methods (textbox etc').
|
48
|
+
include Page_Methods
|
49
|
+
end
|
57
50
|
end
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|