hexapdf 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -1
- data/CONTRIBUTERS +1 -1
- data/LICENSE +1 -1
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/hexapdf.rb +1 -1
- data/lib/hexapdf/cli.rb +19 -52
- data/lib/hexapdf/cli/command.rb +251 -0
- data/lib/hexapdf/cli/{extract.rb → files.rb} +19 -23
- data/lib/hexapdf/cli/images.rb +147 -0
- data/lib/hexapdf/cli/info.rb +5 -5
- data/lib/hexapdf/cli/inspect.rb +13 -12
- data/lib/hexapdf/cli/merge.rb +200 -0
- data/lib/hexapdf/cli/modify.rb +39 -242
- data/lib/hexapdf/cli/optimize.rb +104 -0
- data/lib/hexapdf/configuration.rb +1 -1
- data/lib/hexapdf/content.rb +1 -1
- data/lib/hexapdf/content/canvas.rb +1 -1
- data/lib/hexapdf/content/color_space.rb +1 -1
- data/lib/hexapdf/content/graphic_object.rb +1 -1
- data/lib/hexapdf/content/graphic_object/arc.rb +1 -1
- data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +1 -1
- data/lib/hexapdf/content/graphic_object/solid_arc.rb +1 -1
- data/lib/hexapdf/content/graphics_state.rb +1 -1
- data/lib/hexapdf/content/operator.rb +1 -1
- data/lib/hexapdf/content/parser.rb +16 -15
- data/lib/hexapdf/content/processor.rb +1 -1
- data/lib/hexapdf/content/transformation_matrix.rb +1 -1
- data/lib/hexapdf/data_dir.rb +1 -1
- data/lib/hexapdf/dictionary.rb +1 -1
- data/lib/hexapdf/dictionary_fields.rb +1 -1
- data/lib/hexapdf/document.rb +1 -1
- data/lib/hexapdf/document/files.rb +1 -1
- data/lib/hexapdf/document/fonts.rb +1 -1
- data/lib/hexapdf/document/images.rb +1 -1
- data/lib/hexapdf/document/pages.rb +1 -1
- data/lib/hexapdf/encryption.rb +1 -1
- data/lib/hexapdf/encryption/aes.rb +1 -1
- data/lib/hexapdf/encryption/arc4.rb +1 -1
- data/lib/hexapdf/encryption/fast_aes.rb +1 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/identity.rb +1 -1
- data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
- data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -1
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -1
- data/lib/hexapdf/error.rb +1 -1
- data/lib/hexapdf/filter.rb +1 -1
- data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
- data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
- data/lib/hexapdf/filter/dct_decode.rb +1 -1
- data/lib/hexapdf/filter/encryption.rb +1 -1
- data/lib/hexapdf/filter/flate_decode.rb +1 -1
- data/lib/hexapdf/filter/jpx_decode.rb +1 -1
- data/lib/hexapdf/filter/lzw_decode.rb +2 -3
- data/lib/hexapdf/filter/predictor.rb +11 -11
- data/lib/hexapdf/filter/run_length_decode.rb +1 -1
- data/lib/hexapdf/font/cmap.rb +1 -1
- data/lib/hexapdf/font/cmap/parser.rb +1 -1
- data/lib/hexapdf/font/cmap/writer.rb +1 -1
- data/lib/hexapdf/font/encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/base.rb +1 -1
- data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/glyph_list.rb +1 -1
- data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
- data/lib/hexapdf/font/true_type.rb +2 -1
- data/lib/hexapdf/font/true_type/font.rb +1 -1
- data/lib/hexapdf/font/true_type/subsetter.rb +186 -0
- data/lib/hexapdf/font/true_type/table.rb +8 -4
- data/lib/hexapdf/font/true_type/table/cmap.rb +1 -1
- data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +1 -1
- data/lib/hexapdf/font/true_type/table/directory.rb +1 -1
- data/lib/hexapdf/font/true_type/table/glyf.rb +6 -2
- data/lib/hexapdf/font/true_type/table/head.rb +2 -2
- data/lib/hexapdf/font/true_type/table/hhea.rb +1 -1
- data/lib/hexapdf/font/true_type/table/hmtx.rb +1 -1
- data/lib/hexapdf/font/true_type/table/loca.rb +1 -1
- data/lib/hexapdf/font/true_type/table/maxp.rb +1 -1
- data/lib/hexapdf/font/true_type/table/name.rb +1 -1
- data/lib/hexapdf/font/true_type/table/os2.rb +1 -1
- data/lib/hexapdf/font/true_type/table/post.rb +1 -1
- data/lib/hexapdf/font/true_type_wrapper.rb +56 -8
- data/lib/hexapdf/font/type1.rb +1 -1
- data/lib/hexapdf/font/type1/afm_parser.rb +1 -1
- data/lib/hexapdf/font/type1/character_metrics.rb +1 -1
- data/lib/hexapdf/font/type1/font.rb +1 -1
- data/lib/hexapdf/font/type1/font_metrics.rb +1 -1
- data/lib/hexapdf/font/type1/pfb_parser.rb +1 -1
- data/lib/hexapdf/font/type1_wrapper.rb +1 -1
- data/lib/hexapdf/font_loader.rb +1 -1
- data/lib/hexapdf/font_loader/from_configuration.rb +6 -3
- data/lib/hexapdf/font_loader/standard14.rb +1 -1
- data/lib/hexapdf/image_loader.rb +1 -1
- data/lib/hexapdf/image_loader/jpeg.rb +1 -1
- data/lib/hexapdf/image_loader/pdf.rb +1 -1
- data/lib/hexapdf/image_loader/png.rb +1 -1
- data/lib/hexapdf/importer.rb +1 -1
- data/lib/hexapdf/name_tree_node.rb +1 -1
- data/lib/hexapdf/number_tree_node.rb +1 -1
- data/lib/hexapdf/object.rb +1 -1
- data/lib/hexapdf/parser.rb +1 -1
- data/lib/hexapdf/rectangle.rb +1 -1
- data/lib/hexapdf/reference.rb +1 -1
- data/lib/hexapdf/revision.rb +1 -1
- data/lib/hexapdf/revisions.rb +13 -15
- data/lib/hexapdf/serializer.rb +7 -3
- data/lib/hexapdf/stream.rb +1 -1
- data/lib/hexapdf/task.rb +1 -1
- data/lib/hexapdf/task/dereference.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +1 -1
- data/lib/hexapdf/tokenizer.rb +12 -12
- data/lib/hexapdf/type.rb +1 -1
- data/lib/hexapdf/type/catalog.rb +1 -1
- data/lib/hexapdf/type/embedded_file.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_descriptor.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +1 -1
- data/lib/hexapdf/type/font_true_type.rb +1 -1
- data/lib/hexapdf/type/font_type1.rb +1 -1
- data/lib/hexapdf/type/form.rb +1 -1
- data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
- data/lib/hexapdf/type/image.rb +187 -1
- data/lib/hexapdf/type/info.rb +1 -1
- data/lib/hexapdf/type/names.rb +1 -1
- data/lib/hexapdf/type/object_stream.rb +1 -1
- data/lib/hexapdf/type/page.rb +1 -1
- data/lib/hexapdf/type/page_tree_node.rb +6 -1
- data/lib/hexapdf/type/resources.rb +1 -1
- data/lib/hexapdf/type/trailer.rb +2 -2
- data/lib/hexapdf/type/viewer_preferences.rb +1 -1
- data/lib/hexapdf/type/xref_stream.rb +22 -18
- data/lib/hexapdf/utils/bit_field.rb +1 -1
- data/lib/hexapdf/utils/bit_stream.rb +16 -32
- data/lib/hexapdf/utils/lru_cache.rb +1 -1
- data/lib/hexapdf/utils/math_helpers.rb +1 -1
- data/lib/hexapdf/utils/object_hash.rb +1 -1
- data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +1 -1
- data/lib/hexapdf/version.rb +2 -2
- data/lib/hexapdf/writer.rb +2 -1
- data/lib/hexapdf/xref_section.rb +6 -1
- data/man/man1/hexapdf.1 +194 -115
- data/test/data/images/greyscale-1bit.png +0 -0
- data/test/data/images/greyscale-2bit.png +0 -0
- data/test/data/images/greyscale-8bit.png +0 -0
- data/test/data/images/indexed-alpha-4bit.png +0 -0
- data/test/data/images/truecolour-8bit.png +0 -0
- data/test/hexapdf/content/test_operator.rb +8 -8
- data/test/hexapdf/content/test_processor.rb +1 -1
- data/test/hexapdf/encryption/test_security_handler.rb +1 -1
- data/test/hexapdf/font/test_true_type_wrapper.rb +89 -48
- data/test/hexapdf/font/true_type/table/test_glyf.rb +1 -0
- data/test/hexapdf/font/true_type/test_subsetter.rb +70 -0
- data/test/hexapdf/font/true_type/test_table.rb +16 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +7 -0
- data/test/hexapdf/test_document.rb +1 -1
- data/test/hexapdf/test_object.rb +1 -1
- data/test/hexapdf/test_revisions.rb +34 -8
- data/test/hexapdf/test_serializer.rb +3 -0
- data/test/hexapdf/test_writer.rb +11 -2
- data/test/hexapdf/test_xref_section.rb +15 -0
- data/test/hexapdf/type/test_image.rb +234 -0
- data/test/hexapdf/type/test_object_stream.rb +2 -2
- data/test/hexapdf/type/test_trailer.rb +4 -0
- data/test/hexapdf/utils/test_bit_stream.rb +69 -0
- metadata +14 -6
data/lib/hexapdf/cli/modify.rb
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This file is part of HexaPDF.
|
|
5
5
|
#
|
|
6
6
|
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
-
# Copyright (C)
|
|
7
|
+
# Copyright (C) 2014-2017 Thomas Leitner
|
|
8
8
|
#
|
|
9
9
|
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
10
|
# under the terms of the GNU Affero General Public License version 3 as
|
|
@@ -32,284 +32,81 @@
|
|
|
32
32
|
#++
|
|
33
33
|
|
|
34
34
|
require 'ostruct'
|
|
35
|
-
require 'hexapdf/cli'
|
|
35
|
+
require 'hexapdf/cli/command'
|
|
36
36
|
|
|
37
37
|
module HexaPDF
|
|
38
38
|
module CLI
|
|
39
39
|
|
|
40
40
|
# Modifies a PDF file:
|
|
41
41
|
#
|
|
42
|
-
# * Adds pages from other PDF files.
|
|
43
42
|
# * Decrypts or encrypts the resulting output PDF file.
|
|
44
43
|
# * Generates or deletes object and cross-reference streams.
|
|
45
44
|
# * Optimizes the output PDF by merging the revisions of a PDF file and removes unused entries.
|
|
46
45
|
#
|
|
47
46
|
# See: HexaPDF::Task::Optimize
|
|
48
|
-
class Modify <
|
|
49
|
-
|
|
50
|
-
InputSpec = Struct.new(:file, :pages, :password) #:nodoc:
|
|
47
|
+
class Modify < Command
|
|
51
48
|
|
|
52
49
|
def initialize #:nodoc:
|
|
53
50
|
super('modify', takes_commands: false)
|
|
54
51
|
short_desc("Modify a PDF file")
|
|
55
52
|
long_desc(<<-EOF.gsub!(/^ */, ''))
|
|
56
53
|
This command modifies a PDF file. It can be used to select pages that should appear in
|
|
57
|
-
the output file and
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
The first input file is the primary file which gets modified, so meta data like file
|
|
61
|
-
information, outlines, etc. are taken from it. Alternatively, it is possible to start
|
|
62
|
-
with an empty PDF file by using --empty. The order of the options specifying the files
|
|
63
|
-
is important as they are used in that order.
|
|
64
|
-
|
|
65
|
-
Also note that the --password and --pages options apply to the last preceeding input file.
|
|
54
|
+
the output file and/or rotate them. The output file can also be encrypted/decrypted and
|
|
55
|
+
optimized in various ways.
|
|
66
56
|
EOF
|
|
67
57
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
options.on("-f", "--file FILE", "Input file, can be specified multiple times") do |file|
|
|
71
|
-
@files << InputSpec.new(file, '1-e')
|
|
72
|
-
end
|
|
73
|
-
options.on("-p", "--password PASSWORD", String, "The password for decrypting the last " \
|
|
74
|
-
"specified input file (use - for reading from standard input)") do |pwd|
|
|
75
|
-
raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
|
|
76
|
-
pwd = (pwd == '-' ? command_parser.read_password("#{@files.last.file} password") : pwd)
|
|
77
|
-
@files.last.password = pwd
|
|
78
|
-
end
|
|
79
|
-
options.on("-i", "--pages PAGES", "The pages of the last specified input file that " \
|
|
80
|
-
"should be used (default: 1-e)") do |pages|
|
|
81
|
-
raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
|
|
82
|
-
@files.last.pages = pages
|
|
83
|
-
end
|
|
84
|
-
options.on("-e", "--empty", "Use an empty file as the first input file") do
|
|
85
|
-
@initial_empty = true
|
|
86
|
-
end
|
|
87
|
-
options.on("--[no-]interleave", "Interleave the pages from the input files (default: " \
|
|
88
|
-
"false)") do |c|
|
|
89
|
-
@interleave = c
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
options.separator("")
|
|
93
|
-
options.separator("Output file related options")
|
|
94
|
-
options.on("--embed FILE", String, "Embed the file into the output file (can be used " \
|
|
95
|
-
"multiple times)") do |file|
|
|
96
|
-
@embed_files << file
|
|
97
|
-
end
|
|
98
|
-
options.on("--[no-]compact", "Delete unnecessary PDF objects (default: yes)") do |c|
|
|
99
|
-
@compact = c
|
|
100
|
-
end
|
|
101
|
-
options.on("--object-streams MODE", [:generate, :preserve, :delete],
|
|
102
|
-
"Handling of object streams (either generate, preserve or delete; " \
|
|
103
|
-
"default: preserve)") do |os|
|
|
104
|
-
@object_streams = os
|
|
105
|
-
end
|
|
106
|
-
options.on("--xref-streams MODE", [:generate, :preserve, :delete],
|
|
107
|
-
"Handling of cross-reference streams (either generate, preserve or delete; " \
|
|
108
|
-
"default: preserve)") do |x|
|
|
109
|
-
@xref_streams = x
|
|
110
|
-
end
|
|
111
|
-
options.on("--streams MODE", [:compress, :preserve, :uncompress],
|
|
112
|
-
"Handling of stream data (either compress, preserve or uncompress; default: " \
|
|
113
|
-
"preserve)") do |streams|
|
|
114
|
-
@streams = streams
|
|
115
|
-
end
|
|
116
|
-
options.on("--[no-]compress-pages", "Recompress page content streams (may take a long " \
|
|
117
|
-
"time; default: no)") do |c|
|
|
118
|
-
@compress_pages = c
|
|
119
|
-
end
|
|
120
|
-
options.on("--decrypt", "Remove any encryption") do
|
|
121
|
-
@encryption = :remove
|
|
122
|
-
end
|
|
123
|
-
options.on("--encrypt", "Encrypt the output file") do
|
|
124
|
-
@encryption = :add
|
|
125
|
-
end
|
|
126
|
-
options.on("--owner-password PASSWORD", String, "The owner password to be set on the " \
|
|
127
|
-
"output file (use - for reading from standard input)") do |pwd|
|
|
128
|
-
@encryption = :add
|
|
129
|
-
@enc_owner_pwd = (pwd == '-' ? command_parser.read_password("Owner password") : pwd)
|
|
130
|
-
end
|
|
131
|
-
options.on("--user-password PASSWORD", String, "The user password to be set on the " \
|
|
132
|
-
"output file (use - for reading from standard input)") do |pwd|
|
|
133
|
-
@encryption = :add
|
|
134
|
-
@enc_user_pwd = (pwd == '-' ? command_parser.read_password("User password") : pwd)
|
|
135
|
-
end
|
|
136
|
-
options.on("--algorithm ALGORITHM", [:aes, :arc4],
|
|
137
|
-
"The encryption algorithm: aes or arc4 (default: aes)") do |a|
|
|
138
|
-
@encryption = :add
|
|
139
|
-
@enc_algorithm = a
|
|
140
|
-
end
|
|
141
|
-
options.on("--key-length BITS", Integer,
|
|
142
|
-
"The encryption key length in bits (default: 128)") do |i|
|
|
143
|
-
@encryption = :add
|
|
144
|
-
@enc_key_length = i
|
|
145
|
-
end
|
|
146
|
-
options.on("--force-V4",
|
|
147
|
-
"Force the use of encryption version 4 if key length=128 and algorithm=arc4") do
|
|
148
|
-
@encryption = :add
|
|
149
|
-
@enc_force_v4 = true
|
|
150
|
-
end
|
|
151
|
-
syms = HexaPDF::Encryption::StandardSecurityHandler::Permissions::SYMBOL_TO_PERMISSION.keys
|
|
152
|
-
options.on("--permissions PERMS", Array,
|
|
153
|
-
"Comma separated list of permissions to be set on the output file. Possible " \
|
|
154
|
-
"values: #{syms.join(', ')}") do |perms|
|
|
155
|
-
perms.map! do |perm|
|
|
156
|
-
unless syms.include?(perm.to_sym)
|
|
157
|
-
raise OptionParser::InvalidArgument, "#{perm} (invalid permission name)"
|
|
158
|
-
end
|
|
159
|
-
perm.to_sym
|
|
160
|
-
end
|
|
161
|
-
@encryption = :add
|
|
162
|
-
@enc_permissions = perms
|
|
163
|
-
end
|
|
164
|
-
|
|
165
|
-
@files = []
|
|
166
|
-
@initial_empty = false
|
|
167
|
-
@interleave = false
|
|
168
|
-
|
|
58
|
+
@password = nil
|
|
59
|
+
@pages = '1-e'
|
|
169
60
|
@embed_files = []
|
|
170
|
-
@compact = true
|
|
171
|
-
@compress_pages = false
|
|
172
|
-
@object_streams = :preserve
|
|
173
|
-
@xref_streams = :preserve
|
|
174
|
-
@streams = :preserve
|
|
175
61
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
@enc_algorithm = :aes
|
|
180
|
-
@enc_force_v4 = false
|
|
181
|
-
@enc_permissions = []
|
|
182
|
-
end
|
|
183
|
-
|
|
184
|
-
def execute(output_file) #:nodoc:
|
|
185
|
-
if !@initial_empty && @files.empty?
|
|
186
|
-
error = OptionParser::ParseError.new("At least one --file FILE or --empty is needed")
|
|
187
|
-
error.reason = "Missing argument"
|
|
188
|
-
raise error
|
|
62
|
+
options.on("--password PASSWORD", "-p", String,
|
|
63
|
+
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
64
|
+
@password = (pwd == '-' ? read_password : pwd)
|
|
189
65
|
end
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
@files.each do |spec|
|
|
194
|
-
cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
|
|
195
|
-
decryption_opts: {password: spec.password})
|
|
196
|
-
spec.file = cache[spec.file]
|
|
66
|
+
options.on("-i", "--pages PAGES", "The pages of the input file that should be used " \
|
|
67
|
+
"(default: 1-e)") do |pages|
|
|
68
|
+
@pages = pages
|
|
197
69
|
end
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
page_tree = target.add(Type: :Pages)
|
|
202
|
-
import_pages(page_tree)
|
|
203
|
-
target.catalog[:Pages] = page_tree
|
|
204
|
-
|
|
205
|
-
# Remove potentially imported but unused pages and page tree nodes
|
|
206
|
-
retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
|
|
207
|
-
retained[target.pages.root.data] = true
|
|
208
|
-
target.each(current: false) do |obj|
|
|
209
|
-
next unless obj.kind_of?(HexaPDF::Dictionary)
|
|
210
|
-
if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
|
|
211
|
-
target.delete(obj)
|
|
212
|
-
end
|
|
70
|
+
options.on("-e", "--embed FILE", String, "Embed the file into the output file (can be " \
|
|
71
|
+
"used multiple times)") do |file|
|
|
72
|
+
@embed_files << file
|
|
213
73
|
end
|
|
74
|
+
define_optimization_options
|
|
75
|
+
define_encryption_options
|
|
76
|
+
end
|
|
214
77
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
handle_streams(target) unless @streams == :preserve
|
|
224
|
-
|
|
225
|
-
# Encrypt, decrypt or do nothing
|
|
226
|
-
if @encryption == :add
|
|
227
|
-
target.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length,
|
|
228
|
-
force_V4: @enc_force_v4, permissions: @enc_permissions,
|
|
229
|
-
owner_password: @enc_owner_pwd, user_password: @enc_user_pwd)
|
|
230
|
-
elsif @encryption == :remove
|
|
231
|
-
target.encrypt(name: nil)
|
|
78
|
+
def execute(in_file, out_file) #:nodoc:
|
|
79
|
+
maybe_raise_on_existing_file(out_file)
|
|
80
|
+
HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
|
|
81
|
+
arrange_pages(doc) unless @pages == '1-e'
|
|
82
|
+
@embed_files.each {|file| doc.files.add(file, embed: true)}
|
|
83
|
+
apply_encryption_options(doc)
|
|
84
|
+
apply_optimization_options(doc)
|
|
85
|
+
doc.write(out_file)
|
|
232
86
|
end
|
|
233
|
-
|
|
234
|
-
target.write(output_file)
|
|
235
87
|
rescue HexaPDF::Error => e
|
|
236
88
|
$stderr.puts "Processing error : #{e.message}"
|
|
237
89
|
exit(1)
|
|
238
90
|
end
|
|
239
91
|
|
|
240
|
-
def usage_arguments #:nodoc:
|
|
241
|
-
"{--file IN_FILE | --empty} OUT_FILE"
|
|
242
|
-
end
|
|
243
|
-
|
|
244
92
|
private
|
|
245
93
|
|
|
246
|
-
#
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
s.pages.each do |arr|
|
|
253
|
-
arr[0] = page_list[arr[0]]
|
|
254
|
-
arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
|
|
255
|
-
end
|
|
256
|
-
end
|
|
257
|
-
|
|
258
|
-
if @interleave
|
|
259
|
-
max_pages_per_file = 0
|
|
260
|
-
all = @files.each_with_index.map do |spec, findex|
|
|
261
|
-
list = []
|
|
262
|
-
spec.pages.each {|index, rotation| list << [spec.file, findex, index, rotation]}
|
|
263
|
-
max_pages_per_file = list.size if list.size > max_pages_per_file
|
|
264
|
-
list
|
|
265
|
-
end
|
|
266
|
-
first, *rest = *all
|
|
267
|
-
first[max_pages_per_file - 1] ||= nil
|
|
268
|
-
first.zip(*rest) do |slice|
|
|
269
|
-
slice.each do |source, findex, page, rotation|
|
|
270
|
-
next unless source
|
|
271
|
-
import_page(page_tree, findex, page, rotation)
|
|
272
|
-
end
|
|
273
|
-
end
|
|
274
|
-
else
|
|
275
|
-
@files.each_with_index do |s, findex|
|
|
276
|
-
s.pages.each {|page, rotation| import_page(page_tree, findex, page, rotation)}
|
|
277
|
-
end
|
|
278
|
-
end
|
|
279
|
-
end
|
|
280
|
-
|
|
281
|
-
# Import the page with the given +rotation+ into the page tree.
|
|
282
|
-
def import_page(page_tree, source_index, page, rotation)
|
|
283
|
-
if page_tree.document == page.document
|
|
94
|
+
# Arranges the pages of the document as specified with the --pages option.
|
|
95
|
+
def arrange_pages(doc)
|
|
96
|
+
all_pages = doc.pages.to_a
|
|
97
|
+
new_page_tree = doc.add(Type: :Pages)
|
|
98
|
+
parse_pages_specification(@pages, all_pages.length).each do |index, rotation|
|
|
99
|
+
page = all_pages[index]
|
|
284
100
|
page.value.update(page.copy_inherited_values)
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
page = page_tree.document.import(page).deep_copy
|
|
288
|
-
end
|
|
289
|
-
if rotation == :none
|
|
290
|
-
page.delete(:Rotate)
|
|
291
|
-
elsif rotation.kind_of?(Integer)
|
|
292
|
-
page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
|
|
293
|
-
end
|
|
294
|
-
page_tree.document.add(page)
|
|
295
|
-
page_tree.add_page(page)
|
|
296
|
-
end
|
|
297
|
-
|
|
298
|
-
IGNORED_FILTERS = { #:nodoc:
|
|
299
|
-
CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true
|
|
300
|
-
}.freeze
|
|
301
|
-
|
|
302
|
-
# Applies the chosen stream mode to all streams.
|
|
303
|
-
def handle_streams(doc)
|
|
304
|
-
doc.each(current: false) do |obj|
|
|
305
|
-
next if !obj.respond_to?(:set_filter) || obj[:Subtype] == :Image ||
|
|
306
|
-
Array(obj[:Filter]).any? {|f| IGNORED_FILTERS[f]}
|
|
307
|
-
if @streams == :compress
|
|
308
|
-
obj.set_filter(:FlateDecode)
|
|
101
|
+
if rotation == :none
|
|
102
|
+
page.delete(:Rotate)
|
|
309
103
|
else
|
|
310
|
-
|
|
104
|
+
page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
|
|
311
105
|
end
|
|
106
|
+
new_page_tree.add_page(page)
|
|
312
107
|
end
|
|
108
|
+
doc.delete(doc.catalog.delete(:Pages))
|
|
109
|
+
doc.catalog[:Pages] = new_page_tree
|
|
313
110
|
end
|
|
314
111
|
|
|
315
112
|
end
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
#--
|
|
4
|
+
# This file is part of HexaPDF.
|
|
5
|
+
#
|
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
+
# Copyright (C) 2014-2017 Thomas Leitner
|
|
8
|
+
#
|
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
|
16
|
+
#
|
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
|
20
|
+
# License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
|
24
|
+
#
|
|
25
|
+
# The interactive user interfaces in modified source and object code
|
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
|
28
|
+
#
|
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
|
31
|
+
# is created or manipulated using HexaPDF.
|
|
32
|
+
#++
|
|
33
|
+
|
|
34
|
+
require 'hexapdf/cli/command'
|
|
35
|
+
|
|
36
|
+
module HexaPDF
|
|
37
|
+
module CLI
|
|
38
|
+
|
|
39
|
+
# Optimizes the size of a PDF file.
|
|
40
|
+
class Optimize < Command
|
|
41
|
+
|
|
42
|
+
def initialize #:nodoc:
|
|
43
|
+
super('optimize', takes_commands: false)
|
|
44
|
+
short_desc("Optimize the size of a PDF file")
|
|
45
|
+
long_desc(<<-EOF.gsub!(/^ */, ''))
|
|
46
|
+
This command uses several optimization strategies to reduce the file size of the PDF file.
|
|
47
|
+
|
|
48
|
+
By default, all strategies except page compression are used since page compression may
|
|
49
|
+
take a very long time without much benefit.
|
|
50
|
+
EOF
|
|
51
|
+
|
|
52
|
+
@password = nil
|
|
53
|
+
@out_options.compact = true
|
|
54
|
+
@out_options.xref_streams = :generate
|
|
55
|
+
@out_options.object_streams = :generate
|
|
56
|
+
@out_options.streams = :compress
|
|
57
|
+
|
|
58
|
+
options.on("--password PASSWORD", "-p", String,
|
|
59
|
+
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
60
|
+
@password = (pwd == '-' ? read_password : pwd)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
options.separator("")
|
|
64
|
+
options.separator("Optimization options")
|
|
65
|
+
define_optimization_options
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def execute(in_file, out_file) #:nodoc:
|
|
69
|
+
maybe_raise_on_existing_file(out_file)
|
|
70
|
+
HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
|
|
71
|
+
optimize_page_tree(doc)
|
|
72
|
+
apply_optimization_options(doc)
|
|
73
|
+
doc.write(out_file)
|
|
74
|
+
end
|
|
75
|
+
rescue HexaPDF::Error => e
|
|
76
|
+
$stderr.puts "Processing error : #{e.message}"
|
|
77
|
+
exit(1)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
# Optimizes the page tree by flattening it and deleting unsed objects.
|
|
83
|
+
def optimize_page_tree(doc)
|
|
84
|
+
page_tree = doc.add(Type: :Pages)
|
|
85
|
+
retained = {page_tree.data => true}
|
|
86
|
+
doc.pages.each do |page|
|
|
87
|
+
page.value.update(page.copy_inherited_values)
|
|
88
|
+
page_tree.add_page(page)
|
|
89
|
+
retained[page.data] = true
|
|
90
|
+
end
|
|
91
|
+
doc.catalog[:Pages] = page_tree
|
|
92
|
+
|
|
93
|
+
doc.each(current: false) do |obj|
|
|
94
|
+
next unless obj.kind_of?(HexaPDF::Dictionary)
|
|
95
|
+
if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
|
|
96
|
+
doc.delete(obj)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This file is part of HexaPDF.
|
|
5
5
|
#
|
|
6
6
|
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
-
# Copyright (C)
|
|
7
|
+
# Copyright (C) 2014-2017 Thomas Leitner
|
|
8
8
|
#
|
|
9
9
|
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
10
|
# under the terms of the GNU Affero General Public License version 3 as
|
data/lib/hexapdf/content.rb
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This file is part of HexaPDF.
|
|
5
5
|
#
|
|
6
6
|
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
-
# Copyright (C)
|
|
7
|
+
# Copyright (C) 2014-2017 Thomas Leitner
|
|
8
8
|
#
|
|
9
9
|
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
10
|
# under the terms of the GNU Affero General Public License version 3 as
|