combine_pdf 0.2.5 → 0.2.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +273 -27
- data/LICENSE.txt +2 -1
- data/README.md +69 -4
- data/lib/combine_pdf/api.rb +156 -153
- data/lib/combine_pdf/basic_writer.rb +41 -53
- data/lib/combine_pdf/decrypt.rb +238 -228
- data/lib/combine_pdf/exceptions.rb +4 -0
- data/lib/combine_pdf/filter.rb +79 -85
- data/lib/combine_pdf/fonts.rb +451 -462
- data/lib/combine_pdf/page_methods.rb +891 -946
- data/lib/combine_pdf/parser.rb +663 -531
- data/lib/combine_pdf/pdf_protected.rb +341 -126
- data/lib/combine_pdf/pdf_public.rb +492 -454
- data/lib/combine_pdf/renderer.rb +146 -141
- data/lib/combine_pdf/version.rb +1 -2
- data/lib/combine_pdf.rb +14 -18
- data/test/automated +132 -0
- data/test/console +4 -4
- data/test/named_dest +84 -0
- metadata +8 -5
- data/lib/combine_pdf/operations.rb +0 -416
data/lib/combine_pdf/api.rb
CHANGED
@@ -1,167 +1,170 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
2
|
|
3
|
+
module CombinePDF
|
4
|
+
module_function
|
3
5
|
|
6
|
+
# Create an empty PDF object or create a PDF object from a file (parsing the file).
|
7
|
+
# file_name:: is the name of a file to be parsed.
|
8
|
+
def load(file_name = '', options = {})
|
9
|
+
raise TypeError, "couldn't parse data, expecting type String" unless file_name.is_a?(String) || file_name.is_a?(Pathname)
|
10
|
+
return PDF.new if file_name == ''
|
11
|
+
PDF.new(PDFParser.new(IO.read(file_name, mode: 'rb').force_encoding(Encoding::ASCII_8BIT), options))
|
12
|
+
end
|
4
13
|
|
14
|
+
# creats a new PDF object.
|
15
|
+
#
|
16
|
+
# Combine PDF will check to see if `string` is a filename.
|
17
|
+
# If it's a file name, it will attempt to load the PDF file using `CombinePDF.load`. Otherwise it will attempt parsing `string` using `CombinePDF.parse`.
|
18
|
+
#
|
19
|
+
# If the string is empty it will return a new PDF object (the same as parse).
|
20
|
+
#
|
21
|
+
# For both performance and code readability reasons, `CombinePDF.load` and `CombinePDF.parse` should be preffered unless creating a new PDF object.
|
22
|
+
def new(string = false)
|
23
|
+
return PDF.new unless string
|
24
|
+
raise TypeError, "couldn't create PDF object, expecting type String" unless string.is_a?(String) || string.is_a?(Pathname)
|
25
|
+
begin
|
26
|
+
(begin
|
27
|
+
File.file? string
|
28
|
+
rescue
|
29
|
+
false
|
30
|
+
end) ? load(string) : parse(string)
|
31
|
+
rescue => e
|
32
|
+
raise 'General PDF error - Use CombinePDF.load or CombinePDF.parse for a non-general error message (the requested file was not found OR the string received is not a valid PDF stream OR the file was found but not valid).'
|
33
|
+
end
|
34
|
+
end
|
5
35
|
|
36
|
+
# Create a PDF object from a raw PDF data (parsing the data).
|
37
|
+
# data:: is a string that represents the content of a PDF file.
|
38
|
+
def parse(data, options = {})
|
39
|
+
raise TypeError, "couldn't parse and data, expecting type String" unless data.is_a? String
|
40
|
+
PDF.new(PDFParser.new(data, options))
|
41
|
+
end
|
6
42
|
|
7
|
-
|
8
|
-
|
43
|
+
# makes a PDFWriter object
|
44
|
+
#
|
45
|
+
# PDFWriter objects reresent an empty page and have the method "textbox"
|
46
|
+
# that adds content to that page.
|
47
|
+
#
|
48
|
+
# PDFWriter objects are used internally for numbering pages (by creating a PDF page
|
49
|
+
# with the page number and "stamping" it over the existing page).
|
50
|
+
#
|
51
|
+
# ::mediabox an Array representing the size of the PDF document. defaults to: [0.0, 0.0, 612.0, 792.0] (US Letter)
|
52
|
+
#
|
53
|
+
# if the page is PDFWriter object as a stamp, the final size will be that of the original page.
|
54
|
+
def create_page(mediabox = [0, 0, 612.0, 792.0])
|
55
|
+
PDFWriter.new mediabox
|
56
|
+
end
|
9
57
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
58
|
+
# makes a PDF object containing a table
|
59
|
+
#
|
60
|
+
# all the pages in this PDF object are PDFWriter objects and are
|
61
|
+
# writable using the texbox function (should you wish to add a title, or more info)
|
62
|
+
#
|
63
|
+
# the main intended use of this method is to create indexes (a table of contents) for merged data.
|
64
|
+
#
|
65
|
+
# example:
|
66
|
+
# pdf = CombinePDF.create_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
|
67
|
+
# pdf.save "table_file.pdf"
|
68
|
+
#
|
69
|
+
# accepts a Hash with any of the following keys as well as any of the Page_Methods#textbox options:
|
70
|
+
# headers:: an Array of strings with the headers (will be repeated every page).
|
71
|
+
# table_data:: as Array of Arrays, each containing a string for each column. the first row sets the number of columns. extra columns will be ignored.
|
72
|
+
# font:: a registered or standard font name (see Page_Methods). defaults to nil (:Helvetica).
|
73
|
+
# header_font:: a registered or standard font name for the headers (see Page_Methods). defaults to nil (the font for all the table rows).
|
74
|
+
# max_font_size:: the maximum font size. if the string doesn't fit, it will be resized. defaults to 14.
|
75
|
+
# column_widths:: an array of relative column widths ([1,2] will display only the first two columns, the second twice as big as the first). defaults to nil (even widths).
|
76
|
+
# header_color:: the header color. defaults to [0.8, 0.8, 0.8] (light gray).
|
77
|
+
# main_color:: main row color. defaults to nil (transparent / white).
|
78
|
+
# alternate_color:: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
|
79
|
+
# font_color:: font color. defaults to [0,0,0] (black).
|
80
|
+
# border_color:: border color. defaults to [0,0,0] (black).
|
81
|
+
# border_width:: border width in PDF units. defaults to 1.
|
82
|
+
# header_align:: the header text alignment within each column (:right, :left, :center). defaults to :center.
|
83
|
+
# row_align:: the row text alignment within each column. defaults to :left (:right for RTL table).
|
84
|
+
# direction:: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
|
85
|
+
# max_rows:: the number of rows per page, INCLUDING the header row. deafults to 25.
|
86
|
+
# page_size:: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
|
87
|
+
def create_table(options = {})
|
88
|
+
options[:max_rows] = options[:rows_per_page] if options[:rows_per_page]
|
35
89
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
#
|
47
|
-
# PDFWriter objects are used internally for numbering pages (by creating a PDF page
|
48
|
-
# with the page number and "stamping" it over the existing page).
|
49
|
-
#
|
50
|
-
# ::mediabox an Array representing the size of the PDF document. defaults to: [0.0, 0.0, 612.0, 792.0] (US Letter)
|
51
|
-
#
|
52
|
-
# if the page is PDFWriter object as a stamp, the final size will be that of the original page.
|
53
|
-
def create_page(mediabox = [0, 0, 612.0, 792.0])
|
54
|
-
PDFWriter.new mediabox
|
55
|
-
end
|
90
|
+
page_size = options[:page_size] || [0, 0, 595.3, 841.9]
|
91
|
+
table = PDF.new
|
92
|
+
page = nil
|
93
|
+
until options[:table_data].empty?
|
94
|
+
page = create_page page_size
|
95
|
+
page.write_table options
|
96
|
+
table << page
|
97
|
+
end
|
98
|
+
table
|
99
|
+
end
|
56
100
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# writable using the texbox function (should you wish to add a title, or more info)
|
61
|
-
#
|
62
|
-
# the main intended use of this method is to create indexes (a table of contents) for merged data.
|
63
|
-
#
|
64
|
-
# example:
|
65
|
-
# pdf = CombinePDF.create_table headers: ["header 1", "another header"], table_data: [ ["this is one row", "with two columns"] , ["this is another row", "also two columns", "the third will be ignored"] ]
|
66
|
-
# pdf.save "table_file.pdf"
|
67
|
-
#
|
68
|
-
# accepts a Hash with any of the following keys as well as any of the Page_Methods#textbox options:
|
69
|
-
# headers:: an Array of strings with the headers (will be repeated every page).
|
70
|
-
# table_data:: as Array of Arrays, each containing a string for each column. the first row sets the number of columns. extra columns will be ignored.
|
71
|
-
# font:: a registered or standard font name (see Page_Methods). defaults to nil (:Helvetica).
|
72
|
-
# header_font:: a registered or standard font name for the headers (see Page_Methods). defaults to nil (the font for all the table rows).
|
73
|
-
# max_font_size:: the maximum font size. if the string doesn't fit, it will be resized. defaults to 14.
|
74
|
-
# column_widths:: an array of relative column widths ([1,2] will display only the first two columns, the second twice as big as the first). defaults to nil (even widths).
|
75
|
-
# header_color:: the header color. defaults to [0.8, 0.8, 0.8] (light gray).
|
76
|
-
# main_color:: main row color. defaults to nil (transparent / white).
|
77
|
-
# alternate_color:: alternate row color. defaults to [0.95, 0.95, 0.95] (very light gray).
|
78
|
-
# font_color:: font color. defaults to [0,0,0] (black).
|
79
|
-
# border_color:: border color. defaults to [0,0,0] (black).
|
80
|
-
# border_width:: border width in PDF units. defaults to 1.
|
81
|
-
# header_align:: the header text alignment within each column (:right, :left, :center). defaults to :center.
|
82
|
-
# row_align:: the row text alignment within each column. defaults to :left (:right for RTL table).
|
83
|
-
# direction:: the table's writing direction (:ltr or :rtl). this reffers to the direction of the columns and doesn't effect text (rtl text is automatically recognized). defaults to :ltr.
|
84
|
-
# max_rows:: the number of rows per page, INCLUDING the header row. deafults to 25.
|
85
|
-
# page_size:: the size of the page in PDF points. defaults to [0, 0, 595.3, 841.9] (A4).
|
86
|
-
def create_table(options = {})
|
87
|
-
options[:max_rows] = options[:rows_per_page] if options[:rows_per_page]
|
101
|
+
def new_table(options = {})
|
102
|
+
create_table options
|
103
|
+
end
|
88
104
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
105
|
+
# calculate a CTM value for a specific transformation.
|
106
|
+
#
|
107
|
+
# this could be used to apply transformation in #textbox and to convert visual
|
108
|
+
# rotation values into actual rotation transformation.
|
109
|
+
#
|
110
|
+
# this method accepts a Hash containing any of the following parameters:
|
111
|
+
#
|
112
|
+
# deg:: the clockwise rotation to be applied, in degrees
|
113
|
+
# tx:: the x translation to be applied.
|
114
|
+
# ty:: the y translation to be applied.
|
115
|
+
# sx:: the x scaling to be applied.
|
116
|
+
# sy:: the y scaling to be applied.
|
117
|
+
#
|
118
|
+
# * scaling will be applied after the transformation is applied.
|
119
|
+
#
|
120
|
+
def calc_ctm(parameters)
|
121
|
+
p = { deg: 0, tx: 0, ty: 0, sx: 1, sy: 1 }.merge parameters
|
122
|
+
r = p[:deg] * Math::PI / 180
|
123
|
+
s = Math.sin(r)
|
124
|
+
c = Math.cos(r)
|
125
|
+
# start with tranlation matrix
|
126
|
+
m = Matrix[[1, 0, 0], [0, 1, 0], [p[:tx], p[:ty], 1]]
|
127
|
+
# then rotate
|
128
|
+
m *= Matrix[[c, s, 0], [-s, c, 0], [0, 0, 1]] if parameters[:deg]
|
129
|
+
# then scale
|
130
|
+
m *= Matrix[[p[:sx], 0, 0], [0, p[:sy], 0], [0, 0, 1]] if parameters[:sx] || parameters[:sy]
|
131
|
+
# flaten array and round to 6 digits
|
132
|
+
m.to_a.flatten.values_at(0, 1, 3, 4, 6, 7).map! { |f| f.round 6 }
|
133
|
+
end
|
102
134
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
#
|
118
|
-
def calc_ctm parameters
|
119
|
-
p = {deg: 0, tx: 0, ty: 0, sx: 1, sy: 1}.merge parameters
|
120
|
-
r = p[:deg] * Math::PI / 180
|
121
|
-
s = Math.sin(r)
|
122
|
-
c = Math.cos(r)
|
123
|
-
# start with tranlation matrix
|
124
|
-
m = Matrix[ [1,0,0], [0,1,0], [ p[:tx], p[:ty], 1] ]
|
125
|
-
# then rotate
|
126
|
-
m = m * Matrix[ [c, s, 0], [-s, c, 0], [0, 0, 1]] if parameters[:deg]
|
127
|
-
# then scale
|
128
|
-
m = m * Matrix[ [p[:sx], 0, 0], [0, p[:sy], 0], [0,0,1] ] if parameters[:sx] || parameters[:sy]
|
129
|
-
# flaten array and round to 6 digits
|
130
|
-
m.to_a.flatten.values_at(0,1,3,4,6,7).map! {|f| f.round 6}
|
131
|
-
end
|
135
|
+
# adds a correctly formatted font object to the font library.
|
136
|
+
#
|
137
|
+
# registered fonts will remain in the library and will only be embeded in
|
138
|
+
# PDF objects when they are used by PDFWriter objects (for example, for numbering pages).
|
139
|
+
#
|
140
|
+
# this function enables plug-ins to expend the font functionality of CombinePDF.
|
141
|
+
#
|
142
|
+
# font_name:: a Symbol with the name of the font. if the fonts exists in the library, it will be overwritten!
|
143
|
+
# font_metrics:: a Hash of font metrics, of the format char => {wx: char_width, boundingbox: [left_x, buttom_y, right_x, top_y]} where char == character itself (i.e. " " for space). The Hash should contain a special value :missing for the metrics of missing characters. an optional :wy might be supported in the future, for up to down fonts.
|
144
|
+
# font_pdf_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
|
145
|
+
# font_cmap:: a CMap dictionary Hash) which maps unicode characters to the hex CID for the font (i.e. {"a" => "61", "z" => "7a" }).
|
146
|
+
def register_font(font_name, font_metrics, font_pdf_object, font_cmap = nil)
|
147
|
+
Fonts.register_font font_name, font_metrics, font_pdf_object, font_cmap
|
148
|
+
end
|
132
149
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
150
|
+
# adds an existing font (from any PDF Object) to the font library.
|
151
|
+
#
|
152
|
+
# returns the font on success or false on failure.
|
153
|
+
#
|
154
|
+
# example:
|
155
|
+
# fonts = CombinePDF.new("japanese_fonts.pdf").fonts(true)
|
156
|
+
# CombinePDF.register_font_from_pdf_object :david, fonts[0]
|
157
|
+
#
|
158
|
+
# VERY LIMITTED SUPPORT:
|
159
|
+
# - at the moment it only imports Type0 fonts.
|
160
|
+
# - also, to extract the Hash of the actual font object you were looking for, is not a trivial matter. I do it on the console.
|
161
|
+
# font_name:: a Symbol with the name of the font registry. if the fonts exists in the library, it will be overwritten!
|
162
|
+
# font_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
|
163
|
+
def register_existing_font(font_name, font_object)
|
164
|
+
Fonts.register_font_from_pdf_object font_name, font_object
|
165
|
+
end
|
147
166
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
#
|
152
|
-
# example:
|
153
|
-
# fonts = CombinePDF.new("japanese_fonts.pdf").fonts(true)
|
154
|
-
# CombinePDF.register_font_from_pdf_object :david, fonts[0]
|
155
|
-
#
|
156
|
-
# VERY LIMITTED SUPPORT:
|
157
|
-
# - at the moment it only imports Type0 fonts.
|
158
|
-
# - also, to extract the Hash of the actual font object you were looking for, is not a trivial matter. I do it on the console.
|
159
|
-
# font_name:: a Symbol with the name of the font registry. if the fonts exists in the library, it will be overwritten!
|
160
|
-
# font_object:: a Hash in the internal format recognized by CombinePDF, that represents the font object.
|
161
|
-
def register_existing_font font_name, font_object
|
162
|
-
Fonts.register_font_from_pdf_object font_name, font_object
|
163
|
-
end
|
164
|
-
def register_font_from_pdf_object font_name, font_object
|
165
|
-
register_existing_font font_name, font_object
|
166
|
-
end
|
167
|
+
def register_font_from_pdf_object(font_name, font_object)
|
168
|
+
register_existing_font font_name, font_object
|
169
|
+
end
|
167
170
|
end
|
@@ -5,58 +5,46 @@
|
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
8
|
module CombinePDF
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
include Page_Methods
|
54
|
-
|
55
|
-
end
|
56
|
-
|
9
|
+
# Limited Unicode Support (font dependent)!
|
10
|
+
#
|
11
|
+
# The PDFWriter class is a subclass of Hash and represents a PDF Page object.
|
12
|
+
#
|
13
|
+
# Writing on this Page is done using the textbox function.
|
14
|
+
#
|
15
|
+
# Setting the page dimensions can be either at the new or using the mediabox method. New pages default to size A4, which is: [0, 0, 595.3, 841.9].
|
16
|
+
#
|
17
|
+
# Once the Page is completed (the last text box was added),
|
18
|
+
# we can insert the page to a CombinePDF object.
|
19
|
+
#
|
20
|
+
# We can either insert the PDFWriter as a new page:
|
21
|
+
# pdf = CombinePDF.new
|
22
|
+
# new_page = CombinePDF.create_page # => PDFWriter object
|
23
|
+
# new_page.textbox "some text"
|
24
|
+
# pdf << new_page
|
25
|
+
# pdf.save "file_with_new_page.pdf"
|
26
|
+
#
|
27
|
+
# Or we can use the Page_Methods methods to write an overlay (stamp / watermark) over existing pages:
|
28
|
+
# pdf = CombinePDF.new
|
29
|
+
# new_page = PDFWriter.new "some_file.pdf"
|
30
|
+
# pdf.pages.each {|page| page.textbox "Draft", opacity: 0.4 }
|
31
|
+
# pdf.save "stamped_file.pdf"
|
32
|
+
class PDFWriter < Hash
|
33
|
+
# create a new PDFWriter object.
|
34
|
+
#
|
35
|
+
# mediabox:: the PDF page size in PDF points. defaults to [0, 0, 612.0, 792.0] (US Letter)
|
36
|
+
def initialize(mediabox = [0, 0, 612.0, 792.0])
|
37
|
+
# indirect_reference_id, :indirect_generation_number
|
38
|
+
@contents = ''
|
39
|
+
@base_font_name = 'Writer' + SecureRandom.hex(7) + 'PDF'
|
40
|
+
self[:Type] = :Page
|
41
|
+
self[:indirect_reference_id] = 0
|
42
|
+
self[:Resources] = {}
|
43
|
+
self[:Contents] = { is_reference_only: true, referenced_object: { indirect_reference_id: 0, raw_stream_content: @contents } }
|
44
|
+
self[:MediaBox] = mediabox
|
45
|
+
end
|
46
|
+
|
47
|
+
# includes the PDF Page_Methods module, including all page methods (textbox etc').
|
48
|
+
include Page_Methods
|
49
|
+
end
|
57
50
|
end
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|