hexapdf 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (173) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +33 -1
  3. data/CONTRIBUTERS +1 -1
  4. data/LICENSE +1 -1
  5. data/Rakefile +1 -1
  6. data/VERSION +1 -1
  7. data/lib/hexapdf.rb +1 -1
  8. data/lib/hexapdf/cli.rb +19 -52
  9. data/lib/hexapdf/cli/command.rb +251 -0
  10. data/lib/hexapdf/cli/{extract.rb → files.rb} +19 -23
  11. data/lib/hexapdf/cli/images.rb +147 -0
  12. data/lib/hexapdf/cli/info.rb +5 -5
  13. data/lib/hexapdf/cli/inspect.rb +13 -12
  14. data/lib/hexapdf/cli/merge.rb +200 -0
  15. data/lib/hexapdf/cli/modify.rb +39 -242
  16. data/lib/hexapdf/cli/optimize.rb +104 -0
  17. data/lib/hexapdf/configuration.rb +1 -1
  18. data/lib/hexapdf/content.rb +1 -1
  19. data/lib/hexapdf/content/canvas.rb +1 -1
  20. data/lib/hexapdf/content/color_space.rb +1 -1
  21. data/lib/hexapdf/content/graphic_object.rb +1 -1
  22. data/lib/hexapdf/content/graphic_object/arc.rb +1 -1
  23. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +1 -1
  24. data/lib/hexapdf/content/graphic_object/solid_arc.rb +1 -1
  25. data/lib/hexapdf/content/graphics_state.rb +1 -1
  26. data/lib/hexapdf/content/operator.rb +1 -1
  27. data/lib/hexapdf/content/parser.rb +16 -15
  28. data/lib/hexapdf/content/processor.rb +1 -1
  29. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  30. data/lib/hexapdf/data_dir.rb +1 -1
  31. data/lib/hexapdf/dictionary.rb +1 -1
  32. data/lib/hexapdf/dictionary_fields.rb +1 -1
  33. data/lib/hexapdf/document.rb +1 -1
  34. data/lib/hexapdf/document/files.rb +1 -1
  35. data/lib/hexapdf/document/fonts.rb +1 -1
  36. data/lib/hexapdf/document/images.rb +1 -1
  37. data/lib/hexapdf/document/pages.rb +1 -1
  38. data/lib/hexapdf/encryption.rb +1 -1
  39. data/lib/hexapdf/encryption/aes.rb +1 -1
  40. data/lib/hexapdf/encryption/arc4.rb +1 -1
  41. data/lib/hexapdf/encryption/fast_aes.rb +1 -1
  42. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  43. data/lib/hexapdf/encryption/identity.rb +1 -1
  44. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  45. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  46. data/lib/hexapdf/encryption/security_handler.rb +1 -1
  47. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -1
  48. data/lib/hexapdf/error.rb +1 -1
  49. data/lib/hexapdf/filter.rb +1 -1
  50. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  51. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  52. data/lib/hexapdf/filter/dct_decode.rb +1 -1
  53. data/lib/hexapdf/filter/encryption.rb +1 -1
  54. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  55. data/lib/hexapdf/filter/jpx_decode.rb +1 -1
  56. data/lib/hexapdf/filter/lzw_decode.rb +2 -3
  57. data/lib/hexapdf/filter/predictor.rb +11 -11
  58. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  59. data/lib/hexapdf/font/cmap.rb +1 -1
  60. data/lib/hexapdf/font/cmap/parser.rb +1 -1
  61. data/lib/hexapdf/font/cmap/writer.rb +1 -1
  62. data/lib/hexapdf/font/encoding.rb +1 -1
  63. data/lib/hexapdf/font/encoding/base.rb +1 -1
  64. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  65. data/lib/hexapdf/font/encoding/glyph_list.rb +1 -1
  66. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  67. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +1 -1
  68. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  69. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  70. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +1 -1
  71. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  72. data/lib/hexapdf/font/true_type.rb +2 -1
  73. data/lib/hexapdf/font/true_type/font.rb +1 -1
  74. data/lib/hexapdf/font/true_type/subsetter.rb +186 -0
  75. data/lib/hexapdf/font/true_type/table.rb +8 -4
  76. data/lib/hexapdf/font/true_type/table/cmap.rb +1 -1
  77. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +1 -1
  78. data/lib/hexapdf/font/true_type/table/directory.rb +1 -1
  79. data/lib/hexapdf/font/true_type/table/glyf.rb +6 -2
  80. data/lib/hexapdf/font/true_type/table/head.rb +2 -2
  81. data/lib/hexapdf/font/true_type/table/hhea.rb +1 -1
  82. data/lib/hexapdf/font/true_type/table/hmtx.rb +1 -1
  83. data/lib/hexapdf/font/true_type/table/loca.rb +1 -1
  84. data/lib/hexapdf/font/true_type/table/maxp.rb +1 -1
  85. data/lib/hexapdf/font/true_type/table/name.rb +1 -1
  86. data/lib/hexapdf/font/true_type/table/os2.rb +1 -1
  87. data/lib/hexapdf/font/true_type/table/post.rb +1 -1
  88. data/lib/hexapdf/font/true_type_wrapper.rb +56 -8
  89. data/lib/hexapdf/font/type1.rb +1 -1
  90. data/lib/hexapdf/font/type1/afm_parser.rb +1 -1
  91. data/lib/hexapdf/font/type1/character_metrics.rb +1 -1
  92. data/lib/hexapdf/font/type1/font.rb +1 -1
  93. data/lib/hexapdf/font/type1/font_metrics.rb +1 -1
  94. data/lib/hexapdf/font/type1/pfb_parser.rb +1 -1
  95. data/lib/hexapdf/font/type1_wrapper.rb +1 -1
  96. data/lib/hexapdf/font_loader.rb +1 -1
  97. data/lib/hexapdf/font_loader/from_configuration.rb +6 -3
  98. data/lib/hexapdf/font_loader/standard14.rb +1 -1
  99. data/lib/hexapdf/image_loader.rb +1 -1
  100. data/lib/hexapdf/image_loader/jpeg.rb +1 -1
  101. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  102. data/lib/hexapdf/image_loader/png.rb +1 -1
  103. data/lib/hexapdf/importer.rb +1 -1
  104. data/lib/hexapdf/name_tree_node.rb +1 -1
  105. data/lib/hexapdf/number_tree_node.rb +1 -1
  106. data/lib/hexapdf/object.rb +1 -1
  107. data/lib/hexapdf/parser.rb +1 -1
  108. data/lib/hexapdf/rectangle.rb +1 -1
  109. data/lib/hexapdf/reference.rb +1 -1
  110. data/lib/hexapdf/revision.rb +1 -1
  111. data/lib/hexapdf/revisions.rb +13 -15
  112. data/lib/hexapdf/serializer.rb +7 -3
  113. data/lib/hexapdf/stream.rb +1 -1
  114. data/lib/hexapdf/task.rb +1 -1
  115. data/lib/hexapdf/task/dereference.rb +1 -1
  116. data/lib/hexapdf/task/optimize.rb +1 -1
  117. data/lib/hexapdf/tokenizer.rb +12 -12
  118. data/lib/hexapdf/type.rb +1 -1
  119. data/lib/hexapdf/type/catalog.rb +1 -1
  120. data/lib/hexapdf/type/embedded_file.rb +1 -1
  121. data/lib/hexapdf/type/file_specification.rb +1 -1
  122. data/lib/hexapdf/type/font.rb +1 -1
  123. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  124. data/lib/hexapdf/type/font_simple.rb +1 -1
  125. data/lib/hexapdf/type/font_true_type.rb +1 -1
  126. data/lib/hexapdf/type/font_type1.rb +1 -1
  127. data/lib/hexapdf/type/form.rb +1 -1
  128. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  129. data/lib/hexapdf/type/image.rb +187 -1
  130. data/lib/hexapdf/type/info.rb +1 -1
  131. data/lib/hexapdf/type/names.rb +1 -1
  132. data/lib/hexapdf/type/object_stream.rb +1 -1
  133. data/lib/hexapdf/type/page.rb +1 -1
  134. data/lib/hexapdf/type/page_tree_node.rb +6 -1
  135. data/lib/hexapdf/type/resources.rb +1 -1
  136. data/lib/hexapdf/type/trailer.rb +2 -2
  137. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  138. data/lib/hexapdf/type/xref_stream.rb +22 -18
  139. data/lib/hexapdf/utils/bit_field.rb +1 -1
  140. data/lib/hexapdf/utils/bit_stream.rb +16 -32
  141. data/lib/hexapdf/utils/lru_cache.rb +1 -1
  142. data/lib/hexapdf/utils/math_helpers.rb +1 -1
  143. data/lib/hexapdf/utils/object_hash.rb +1 -1
  144. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  145. data/lib/hexapdf/utils/sorted_tree_node.rb +1 -1
  146. data/lib/hexapdf/version.rb +2 -2
  147. data/lib/hexapdf/writer.rb +2 -1
  148. data/lib/hexapdf/xref_section.rb +6 -1
  149. data/man/man1/hexapdf.1 +194 -115
  150. data/test/data/images/greyscale-1bit.png +0 -0
  151. data/test/data/images/greyscale-2bit.png +0 -0
  152. data/test/data/images/greyscale-8bit.png +0 -0
  153. data/test/data/images/indexed-alpha-4bit.png +0 -0
  154. data/test/data/images/truecolour-8bit.png +0 -0
  155. data/test/hexapdf/content/test_operator.rb +8 -8
  156. data/test/hexapdf/content/test_processor.rb +1 -1
  157. data/test/hexapdf/encryption/test_security_handler.rb +1 -1
  158. data/test/hexapdf/font/test_true_type_wrapper.rb +89 -48
  159. data/test/hexapdf/font/true_type/table/test_glyf.rb +1 -0
  160. data/test/hexapdf/font/true_type/test_subsetter.rb +70 -0
  161. data/test/hexapdf/font/true_type/test_table.rb +16 -0
  162. data/test/hexapdf/font_loader/test_from_configuration.rb +7 -0
  163. data/test/hexapdf/test_document.rb +1 -1
  164. data/test/hexapdf/test_object.rb +1 -1
  165. data/test/hexapdf/test_revisions.rb +34 -8
  166. data/test/hexapdf/test_serializer.rb +3 -0
  167. data/test/hexapdf/test_writer.rb +11 -2
  168. data/test/hexapdf/test_xref_section.rb +15 -0
  169. data/test/hexapdf/type/test_image.rb +234 -0
  170. data/test/hexapdf/type/test_object_stream.rb +2 -2
  171. data/test/hexapdf/type/test_trailer.rb +4 -0
  172. data/test/hexapdf/utils/test_bit_stream.rb +69 -0
  173. metadata +14 -6
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2016 Thomas Leitner
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -32,284 +32,81 @@
32
32
  #++
33
33
 
34
34
  require 'ostruct'
35
- require 'hexapdf/cli'
35
+ require 'hexapdf/cli/command'
36
36
 
37
37
  module HexaPDF
38
38
  module CLI
39
39
 
40
40
  # Modifies a PDF file:
41
41
  #
42
- # * Adds pages from other PDF files.
43
42
  # * Decrypts or encrypts the resulting output PDF file.
44
43
  # * Generates or deletes object and cross-reference streams.
45
44
  # * Optimizes the output PDF by merging the revisions of a PDF file and removes unused entries.
46
45
  #
47
46
  # See: HexaPDF::Task::Optimize
48
- class Modify < CmdParse::Command
49
-
50
- InputSpec = Struct.new(:file, :pages, :password) #:nodoc:
47
+ class Modify < Command
51
48
 
52
49
  def initialize #:nodoc:
53
50
  super('modify', takes_commands: false)
54
51
  short_desc("Modify a PDF file")
55
52
  long_desc(<<-EOF.gsub!(/^ */, ''))
56
53
  This command modifies a PDF file. It can be used to select pages that should appear in
57
- the output file and to add pages from other PDF files. The output file can be
58
- encrypted/decrypted and optimized in various ways.
59
-
60
- The first input file is the primary file which gets modified, so meta data like file
61
- information, outlines, etc. are taken from it. Alternatively, it is possible to start
62
- with an empty PDF file by using --empty. The order of the options specifying the files
63
- is important as they are used in that order.
64
-
65
- Also note that the --password and --pages options apply to the last preceeding input file.
54
+ the output file and/or rotate them. The output file can also be encrypted/decrypted and
55
+ optimized in various ways.
66
56
  EOF
67
57
 
68
- options.separator("")
69
- options.separator("Input file(s) related options")
70
- options.on("-f", "--file FILE", "Input file, can be specified multiple times") do |file|
71
- @files << InputSpec.new(file, '1-e')
72
- end
73
- options.on("-p", "--password PASSWORD", String, "The password for decrypting the last " \
74
- "specified input file (use - for reading from standard input)") do |pwd|
75
- raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
76
- pwd = (pwd == '-' ? command_parser.read_password("#{@files.last.file} password") : pwd)
77
- @files.last.password = pwd
78
- end
79
- options.on("-i", "--pages PAGES", "The pages of the last specified input file that " \
80
- "should be used (default: 1-e)") do |pages|
81
- raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
82
- @files.last.pages = pages
83
- end
84
- options.on("-e", "--empty", "Use an empty file as the first input file") do
85
- @initial_empty = true
86
- end
87
- options.on("--[no-]interleave", "Interleave the pages from the input files (default: " \
88
- "false)") do |c|
89
- @interleave = c
90
- end
91
-
92
- options.separator("")
93
- options.separator("Output file related options")
94
- options.on("--embed FILE", String, "Embed the file into the output file (can be used " \
95
- "multiple times)") do |file|
96
- @embed_files << file
97
- end
98
- options.on("--[no-]compact", "Delete unnecessary PDF objects (default: yes)") do |c|
99
- @compact = c
100
- end
101
- options.on("--object-streams MODE", [:generate, :preserve, :delete],
102
- "Handling of object streams (either generate, preserve or delete; " \
103
- "default: preserve)") do |os|
104
- @object_streams = os
105
- end
106
- options.on("--xref-streams MODE", [:generate, :preserve, :delete],
107
- "Handling of cross-reference streams (either generate, preserve or delete; " \
108
- "default: preserve)") do |x|
109
- @xref_streams = x
110
- end
111
- options.on("--streams MODE", [:compress, :preserve, :uncompress],
112
- "Handling of stream data (either compress, preserve or uncompress; default: " \
113
- "preserve)") do |streams|
114
- @streams = streams
115
- end
116
- options.on("--[no-]compress-pages", "Recompress page content streams (may take a long " \
117
- "time; default: no)") do |c|
118
- @compress_pages = c
119
- end
120
- options.on("--decrypt", "Remove any encryption") do
121
- @encryption = :remove
122
- end
123
- options.on("--encrypt", "Encrypt the output file") do
124
- @encryption = :add
125
- end
126
- options.on("--owner-password PASSWORD", String, "The owner password to be set on the " \
127
- "output file (use - for reading from standard input)") do |pwd|
128
- @encryption = :add
129
- @enc_owner_pwd = (pwd == '-' ? command_parser.read_password("Owner password") : pwd)
130
- end
131
- options.on("--user-password PASSWORD", String, "The user password to be set on the " \
132
- "output file (use - for reading from standard input)") do |pwd|
133
- @encryption = :add
134
- @enc_user_pwd = (pwd == '-' ? command_parser.read_password("User password") : pwd)
135
- end
136
- options.on("--algorithm ALGORITHM", [:aes, :arc4],
137
- "The encryption algorithm: aes or arc4 (default: aes)") do |a|
138
- @encryption = :add
139
- @enc_algorithm = a
140
- end
141
- options.on("--key-length BITS", Integer,
142
- "The encryption key length in bits (default: 128)") do |i|
143
- @encryption = :add
144
- @enc_key_length = i
145
- end
146
- options.on("--force-V4",
147
- "Force the use of encryption version 4 if key length=128 and algorithm=arc4") do
148
- @encryption = :add
149
- @enc_force_v4 = true
150
- end
151
- syms = HexaPDF::Encryption::StandardSecurityHandler::Permissions::SYMBOL_TO_PERMISSION.keys
152
- options.on("--permissions PERMS", Array,
153
- "Comma separated list of permissions to be set on the output file. Possible " \
154
- "values: #{syms.join(', ')}") do |perms|
155
- perms.map! do |perm|
156
- unless syms.include?(perm.to_sym)
157
- raise OptionParser::InvalidArgument, "#{perm} (invalid permission name)"
158
- end
159
- perm.to_sym
160
- end
161
- @encryption = :add
162
- @enc_permissions = perms
163
- end
164
-
165
- @files = []
166
- @initial_empty = false
167
- @interleave = false
168
-
58
+ @password = nil
59
+ @pages = '1-e'
169
60
  @embed_files = []
170
- @compact = true
171
- @compress_pages = false
172
- @object_streams = :preserve
173
- @xref_streams = :preserve
174
- @streams = :preserve
175
61
 
176
- @encryption = :preserve
177
- @enc_user_pwd = @enc_owner_pwd = nil
178
- @enc_key_length = 128
179
- @enc_algorithm = :aes
180
- @enc_force_v4 = false
181
- @enc_permissions = []
182
- end
183
-
184
- def execute(output_file) #:nodoc:
185
- if !@initial_empty && @files.empty?
186
- error = OptionParser::ParseError.new("At least one --file FILE or --empty is needed")
187
- error.reason = "Missing argument"
188
- raise error
62
+ options.on("--password PASSWORD", "-p", String,
63
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
64
+ @password = (pwd == '-' ? read_password : pwd)
189
65
  end
190
-
191
- # Create PDF documents for each input file
192
- cache = {}
193
- @files.each do |spec|
194
- cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
195
- decryption_opts: {password: spec.password})
196
- spec.file = cache[spec.file]
66
+ options.on("-i", "--pages PAGES", "The pages of the input file that should be used " \
67
+ "(default: 1-e)") do |pages|
68
+ @pages = pages
197
69
  end
198
-
199
- # Assemble pages
200
- target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
201
- page_tree = target.add(Type: :Pages)
202
- import_pages(page_tree)
203
- target.catalog[:Pages] = page_tree
204
-
205
- # Remove potentially imported but unused pages and page tree nodes
206
- retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
207
- retained[target.pages.root.data] = true
208
- target.each(current: false) do |obj|
209
- next unless obj.kind_of?(HexaPDF::Dictionary)
210
- if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
211
- target.delete(obj)
212
- end
70
+ options.on("-e", "--embed FILE", String, "Embed the file into the output file (can be " \
71
+ "used multiple times)") do |file|
72
+ @embed_files << file
213
73
  end
74
+ define_optimization_options
75
+ define_encryption_options
76
+ end
214
77
 
215
- # Embed the given files
216
- @embed_files.each {|file| target.files.add(file, embed: true)}
217
-
218
- # Optimize the PDF file
219
- target.task(:optimize, compact: @compact, object_streams: @object_streams,
220
- xref_streams: @xref_streams, compress_pages: @compress_pages)
221
-
222
- # Update stream filters
223
- handle_streams(target) unless @streams == :preserve
224
-
225
- # Encrypt, decrypt or do nothing
226
- if @encryption == :add
227
- target.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length,
228
- force_V4: @enc_force_v4, permissions: @enc_permissions,
229
- owner_password: @enc_owner_pwd, user_password: @enc_user_pwd)
230
- elsif @encryption == :remove
231
- target.encrypt(name: nil)
78
+ def execute(in_file, out_file) #:nodoc:
79
+ maybe_raise_on_existing_file(out_file)
80
+ HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
81
+ arrange_pages(doc) unless @pages == '1-e'
82
+ @embed_files.each {|file| doc.files.add(file, embed: true)}
83
+ apply_encryption_options(doc)
84
+ apply_optimization_options(doc)
85
+ doc.write(out_file)
232
86
  end
233
-
234
- target.write(output_file)
235
87
  rescue HexaPDF::Error => e
236
88
  $stderr.puts "Processing error : #{e.message}"
237
89
  exit(1)
238
90
  end
239
91
 
240
- def usage_arguments #:nodoc:
241
- "{--file IN_FILE | --empty} OUT_FILE"
242
- end
243
-
244
92
  private
245
93
 
246
- # Imports the pages of the document as specified with the --pages option to the given page
247
- # tree.
248
- def import_pages(page_tree)
249
- @files.each do |s|
250
- page_list = s.file.pages.to_a
251
- s.pages = command_parser.parse_pages_specification(s.pages, s.file.pages.count)
252
- s.pages.each do |arr|
253
- arr[0] = page_list[arr[0]]
254
- arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
255
- end
256
- end
257
-
258
- if @interleave
259
- max_pages_per_file = 0
260
- all = @files.each_with_index.map do |spec, findex|
261
- list = []
262
- spec.pages.each {|index, rotation| list << [spec.file, findex, index, rotation]}
263
- max_pages_per_file = list.size if list.size > max_pages_per_file
264
- list
265
- end
266
- first, *rest = *all
267
- first[max_pages_per_file - 1] ||= nil
268
- first.zip(*rest) do |slice|
269
- slice.each do |source, findex, page, rotation|
270
- next unless source
271
- import_page(page_tree, findex, page, rotation)
272
- end
273
- end
274
- else
275
- @files.each_with_index do |s, findex|
276
- s.pages.each {|page, rotation| import_page(page_tree, findex, page, rotation)}
277
- end
278
- end
279
- end
280
-
281
- # Import the page with the given +rotation+ into the page tree.
282
- def import_page(page_tree, source_index, page, rotation)
283
- if page_tree.document == page.document
94
+ # Arranges the pages of the document as specified with the --pages option.
95
+ def arrange_pages(doc)
96
+ all_pages = doc.pages.to_a
97
+ new_page_tree = doc.add(Type: :Pages)
98
+ parse_pages_specification(@pages, all_pages.length).each do |index, rotation|
99
+ page = all_pages[index]
284
100
  page.value.update(page.copy_inherited_values)
285
- page = page.deep_copy unless source_index == 0
286
- else
287
- page = page_tree.document.import(page).deep_copy
288
- end
289
- if rotation == :none
290
- page.delete(:Rotate)
291
- elsif rotation.kind_of?(Integer)
292
- page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
293
- end
294
- page_tree.document.add(page)
295
- page_tree.add_page(page)
296
- end
297
-
298
- IGNORED_FILTERS = { #:nodoc:
299
- CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true
300
- }.freeze
301
-
302
- # Applies the chosen stream mode to all streams.
303
- def handle_streams(doc)
304
- doc.each(current: false) do |obj|
305
- next if !obj.respond_to?(:set_filter) || obj[:Subtype] == :Image ||
306
- Array(obj[:Filter]).any? {|f| IGNORED_FILTERS[f]}
307
- if @streams == :compress
308
- obj.set_filter(:FlateDecode)
101
+ if rotation == :none
102
+ page.delete(:Rotate)
309
103
  else
310
- obj.set_filter(nil)
104
+ page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
311
105
  end
106
+ new_page_tree.add_page(page)
312
107
  end
108
+ doc.delete(doc.catalog.delete(:Pages))
109
+ doc.catalog[:Pages] = new_page_tree
313
110
  end
314
111
 
315
112
  end
@@ -0,0 +1,104 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/cli/command'
35
+
36
+ module HexaPDF
37
+ module CLI
38
+
39
+ # Optimizes the size of a PDF file.
40
+ class Optimize < Command
41
+
42
+ def initialize #:nodoc:
43
+ super('optimize', takes_commands: false)
44
+ short_desc("Optimize the size of a PDF file")
45
+ long_desc(<<-EOF.gsub!(/^ */, ''))
46
+ This command uses several optimization strategies to reduce the file size of the PDF file.
47
+
48
+ By default, all strategies except page compression are used since page compression may
49
+ take a very long time without much benefit.
50
+ EOF
51
+
52
+ @password = nil
53
+ @out_options.compact = true
54
+ @out_options.xref_streams = :generate
55
+ @out_options.object_streams = :generate
56
+ @out_options.streams = :compress
57
+
58
+ options.on("--password PASSWORD", "-p", String,
59
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
60
+ @password = (pwd == '-' ? read_password : pwd)
61
+ end
62
+
63
+ options.separator("")
64
+ options.separator("Optimization options")
65
+ define_optimization_options
66
+ end
67
+
68
+ def execute(in_file, out_file) #:nodoc:
69
+ maybe_raise_on_existing_file(out_file)
70
+ HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
71
+ optimize_page_tree(doc)
72
+ apply_optimization_options(doc)
73
+ doc.write(out_file)
74
+ end
75
+ rescue HexaPDF::Error => e
76
+ $stderr.puts "Processing error : #{e.message}"
77
+ exit(1)
78
+ end
79
+
80
+ private
81
+
82
+ # Optimizes the page tree by flattening it and deleting unsed objects.
83
+ def optimize_page_tree(doc)
84
+ page_tree = doc.add(Type: :Pages)
85
+ retained = {page_tree.data => true}
86
+ doc.pages.each do |page|
87
+ page.value.update(page.copy_inherited_values)
88
+ page_tree.add_page(page)
89
+ retained[page.data] = true
90
+ end
91
+ doc.catalog[:Pages] = page_tree
92
+
93
+ doc.each(current: false) do |obj|
94
+ next unless obj.kind_of?(HexaPDF::Dictionary)
95
+ if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
96
+ doc.delete(obj)
97
+ end
98
+ end
99
+ end
100
+
101
+ end
102
+
103
+ end
104
+ end
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2016 Thomas Leitner
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2016 Thomas Leitner
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as