hexapdf 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +33 -1
  3. data/CONTRIBUTERS +1 -1
  4. data/LICENSE +1 -1
  5. data/Rakefile +1 -1
  6. data/VERSION +1 -1
  7. data/lib/hexapdf.rb +1 -1
  8. data/lib/hexapdf/cli.rb +19 -52
  9. data/lib/hexapdf/cli/command.rb +251 -0
  10. data/lib/hexapdf/cli/{extract.rb → files.rb} +19 -23
  11. data/lib/hexapdf/cli/images.rb +147 -0
  12. data/lib/hexapdf/cli/info.rb +5 -5
  13. data/lib/hexapdf/cli/inspect.rb +13 -12
  14. data/lib/hexapdf/cli/merge.rb +200 -0
  15. data/lib/hexapdf/cli/modify.rb +39 -242
  16. data/lib/hexapdf/cli/optimize.rb +104 -0
  17. data/lib/hexapdf/configuration.rb +1 -1
  18. data/lib/hexapdf/content.rb +1 -1
  19. data/lib/hexapdf/content/canvas.rb +1 -1
  20. data/lib/hexapdf/content/color_space.rb +1 -1
  21. data/lib/hexapdf/content/graphic_object.rb +1 -1
  22. data/lib/hexapdf/content/graphic_object/arc.rb +1 -1
  23. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +1 -1
  24. data/lib/hexapdf/content/graphic_object/solid_arc.rb +1 -1
  25. data/lib/hexapdf/content/graphics_state.rb +1 -1
  26. data/lib/hexapdf/content/operator.rb +1 -1
  27. data/lib/hexapdf/content/parser.rb +16 -15
  28. data/lib/hexapdf/content/processor.rb +1 -1
  29. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  30. data/lib/hexapdf/data_dir.rb +1 -1
  31. data/lib/hexapdf/dictionary.rb +1 -1
  32. data/lib/hexapdf/dictionary_fields.rb +1 -1
  33. data/lib/hexapdf/document.rb +1 -1
  34. data/lib/hexapdf/document/files.rb +1 -1
  35. data/lib/hexapdf/document/fonts.rb +1 -1
  36. data/lib/hexapdf/document/images.rb +1 -1
  37. data/lib/hexapdf/document/pages.rb +1 -1
  38. data/lib/hexapdf/encryption.rb +1 -1
  39. data/lib/hexapdf/encryption/aes.rb +1 -1
  40. data/lib/hexapdf/encryption/arc4.rb +1 -1
  41. data/lib/hexapdf/encryption/fast_aes.rb +1 -1
  42. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  43. data/lib/hexapdf/encryption/identity.rb +1 -1
  44. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  45. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  46. data/lib/hexapdf/encryption/security_handler.rb +1 -1
  47. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -1
  48. data/lib/hexapdf/error.rb +1 -1
  49. data/lib/hexapdf/filter.rb +1 -1
  50. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  51. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  52. data/lib/hexapdf/filter/dct_decode.rb +1 -1
  53. data/lib/hexapdf/filter/encryption.rb +1 -1
  54. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  55. data/lib/hexapdf/filter/jpx_decode.rb +1 -1
  56. data/lib/hexapdf/filter/lzw_decode.rb +2 -3
  57. data/lib/hexapdf/filter/predictor.rb +11 -11
  58. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  59. data/lib/hexapdf/font/cmap.rb +1 -1
  60. data/lib/hexapdf/font/cmap/parser.rb +1 -1
  61. data/lib/hexapdf/font/cmap/writer.rb +1 -1
  62. data/lib/hexapdf/font/encoding.rb +1 -1
  63. data/lib/hexapdf/font/encoding/base.rb +1 -1
  64. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  65. data/lib/hexapdf/font/encoding/glyph_list.rb +1 -1
  66. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  67. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +1 -1
  68. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  69. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  70. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +1 -1
  71. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  72. data/lib/hexapdf/font/true_type.rb +2 -1
  73. data/lib/hexapdf/font/true_type/font.rb +1 -1
  74. data/lib/hexapdf/font/true_type/subsetter.rb +186 -0
  75. data/lib/hexapdf/font/true_type/table.rb +8 -4
  76. data/lib/hexapdf/font/true_type/table/cmap.rb +1 -1
  77. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +1 -1
  78. data/lib/hexapdf/font/true_type/table/directory.rb +1 -1
  79. data/lib/hexapdf/font/true_type/table/glyf.rb +6 -2
  80. data/lib/hexapdf/font/true_type/table/head.rb +2 -2
  81. data/lib/hexapdf/font/true_type/table/hhea.rb +1 -1
  82. data/lib/hexapdf/font/true_type/table/hmtx.rb +1 -1
  83. data/lib/hexapdf/font/true_type/table/loca.rb +1 -1
  84. data/lib/hexapdf/font/true_type/table/maxp.rb +1 -1
  85. data/lib/hexapdf/font/true_type/table/name.rb +1 -1
  86. data/lib/hexapdf/font/true_type/table/os2.rb +1 -1
  87. data/lib/hexapdf/font/true_type/table/post.rb +1 -1
  88. data/lib/hexapdf/font/true_type_wrapper.rb +56 -8
  89. data/lib/hexapdf/font/type1.rb +1 -1
  90. data/lib/hexapdf/font/type1/afm_parser.rb +1 -1
  91. data/lib/hexapdf/font/type1/character_metrics.rb +1 -1
  92. data/lib/hexapdf/font/type1/font.rb +1 -1
  93. data/lib/hexapdf/font/type1/font_metrics.rb +1 -1
  94. data/lib/hexapdf/font/type1/pfb_parser.rb +1 -1
  95. data/lib/hexapdf/font/type1_wrapper.rb +1 -1
  96. data/lib/hexapdf/font_loader.rb +1 -1
  97. data/lib/hexapdf/font_loader/from_configuration.rb +6 -3
  98. data/lib/hexapdf/font_loader/standard14.rb +1 -1
  99. data/lib/hexapdf/image_loader.rb +1 -1
  100. data/lib/hexapdf/image_loader/jpeg.rb +1 -1
  101. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  102. data/lib/hexapdf/image_loader/png.rb +1 -1
  103. data/lib/hexapdf/importer.rb +1 -1
  104. data/lib/hexapdf/name_tree_node.rb +1 -1
  105. data/lib/hexapdf/number_tree_node.rb +1 -1
  106. data/lib/hexapdf/object.rb +1 -1
  107. data/lib/hexapdf/parser.rb +1 -1
  108. data/lib/hexapdf/rectangle.rb +1 -1
  109. data/lib/hexapdf/reference.rb +1 -1
  110. data/lib/hexapdf/revision.rb +1 -1
  111. data/lib/hexapdf/revisions.rb +13 -15
  112. data/lib/hexapdf/serializer.rb +7 -3
  113. data/lib/hexapdf/stream.rb +1 -1
  114. data/lib/hexapdf/task.rb +1 -1
  115. data/lib/hexapdf/task/dereference.rb +1 -1
  116. data/lib/hexapdf/task/optimize.rb +1 -1
  117. data/lib/hexapdf/tokenizer.rb +12 -12
  118. data/lib/hexapdf/type.rb +1 -1
  119. data/lib/hexapdf/type/catalog.rb +1 -1
  120. data/lib/hexapdf/type/embedded_file.rb +1 -1
  121. data/lib/hexapdf/type/file_specification.rb +1 -1
  122. data/lib/hexapdf/type/font.rb +1 -1
  123. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  124. data/lib/hexapdf/type/font_simple.rb +1 -1
  125. data/lib/hexapdf/type/font_true_type.rb +1 -1
  126. data/lib/hexapdf/type/font_type1.rb +1 -1
  127. data/lib/hexapdf/type/form.rb +1 -1
  128. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  129. data/lib/hexapdf/type/image.rb +187 -1
  130. data/lib/hexapdf/type/info.rb +1 -1
  131. data/lib/hexapdf/type/names.rb +1 -1
  132. data/lib/hexapdf/type/object_stream.rb +1 -1
  133. data/lib/hexapdf/type/page.rb +1 -1
  134. data/lib/hexapdf/type/page_tree_node.rb +6 -1
  135. data/lib/hexapdf/type/resources.rb +1 -1
  136. data/lib/hexapdf/type/trailer.rb +2 -2
  137. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  138. data/lib/hexapdf/type/xref_stream.rb +22 -18
  139. data/lib/hexapdf/utils/bit_field.rb +1 -1
  140. data/lib/hexapdf/utils/bit_stream.rb +16 -32
  141. data/lib/hexapdf/utils/lru_cache.rb +1 -1
  142. data/lib/hexapdf/utils/math_helpers.rb +1 -1
  143. data/lib/hexapdf/utils/object_hash.rb +1 -1
  144. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  145. data/lib/hexapdf/utils/sorted_tree_node.rb +1 -1
  146. data/lib/hexapdf/version.rb +2 -2
  147. data/lib/hexapdf/writer.rb +2 -1
  148. data/lib/hexapdf/xref_section.rb +6 -1
  149. data/man/man1/hexapdf.1 +194 -115
  150. data/test/data/images/greyscale-1bit.png +0 -0
  151. data/test/data/images/greyscale-2bit.png +0 -0
  152. data/test/data/images/greyscale-8bit.png +0 -0
  153. data/test/data/images/indexed-alpha-4bit.png +0 -0
  154. data/test/data/images/truecolour-8bit.png +0 -0
  155. data/test/hexapdf/content/test_operator.rb +8 -8
  156. data/test/hexapdf/content/test_processor.rb +1 -1
  157. data/test/hexapdf/encryption/test_security_handler.rb +1 -1
  158. data/test/hexapdf/font/test_true_type_wrapper.rb +89 -48
  159. data/test/hexapdf/font/true_type/table/test_glyf.rb +1 -0
  160. data/test/hexapdf/font/true_type/test_subsetter.rb +70 -0
  161. data/test/hexapdf/font/true_type/test_table.rb +16 -0
  162. data/test/hexapdf/font_loader/test_from_configuration.rb +7 -0
  163. data/test/hexapdf/test_document.rb +1 -1
  164. data/test/hexapdf/test_object.rb +1 -1
  165. data/test/hexapdf/test_revisions.rb +34 -8
  166. data/test/hexapdf/test_serializer.rb +3 -0
  167. data/test/hexapdf/test_writer.rb +11 -2
  168. data/test/hexapdf/test_xref_section.rb +15 -0
  169. data/test/hexapdf/type/test_image.rb +234 -0
  170. data/test/hexapdf/type/test_object_stream.rb +2 -2
  171. data/test/hexapdf/type/test_trailer.rb +4 -0
  172. data/test/hexapdf/utils/test_bit_stream.rb +69 -0
  173. metadata +14 -6
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2016 Thomas Leitner
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -32,284 +32,81 @@
32
32
  #++
33
33
 
34
34
  require 'ostruct'
35
- require 'hexapdf/cli'
35
+ require 'hexapdf/cli/command'
36
36
 
37
37
  module HexaPDF
38
38
  module CLI
39
39
 
40
40
  # Modifies a PDF file:
41
41
  #
42
- # * Adds pages from other PDF files.
43
42
  # * Decrypts or encrypts the resulting output PDF file.
44
43
  # * Generates or deletes object and cross-reference streams.
45
44
  # * Optimizes the output PDF by merging the revisions of a PDF file and removes unused entries.
46
45
  #
47
46
  # See: HexaPDF::Task::Optimize
48
- class Modify < CmdParse::Command
49
-
50
- InputSpec = Struct.new(:file, :pages, :password) #:nodoc:
47
+ class Modify < Command
51
48
 
52
49
  def initialize #:nodoc:
53
50
  super('modify', takes_commands: false)
54
51
  short_desc("Modify a PDF file")
55
52
  long_desc(<<-EOF.gsub!(/^ */, ''))
56
53
  This command modifies a PDF file. It can be used to select pages that should appear in
57
- the output file and to add pages from other PDF files. The output file can be
58
- encrypted/decrypted and optimized in various ways.
59
-
60
- The first input file is the primary file which gets modified, so meta data like file
61
- information, outlines, etc. are taken from it. Alternatively, it is possible to start
62
- with an empty PDF file by using --empty. The order of the options specifying the files
63
- is important as they are used in that order.
64
-
65
- Also note that the --password and --pages options apply to the last preceeding input file.
54
+ the output file and/or rotate them. The output file can also be encrypted/decrypted and
55
+ optimized in various ways.
66
56
  EOF
67
57
 
68
- options.separator("")
69
- options.separator("Input file(s) related options")
70
- options.on("-f", "--file FILE", "Input file, can be specified multiple times") do |file|
71
- @files << InputSpec.new(file, '1-e')
72
- end
73
- options.on("-p", "--password PASSWORD", String, "The password for decrypting the last " \
74
- "specified input file (use - for reading from standard input)") do |pwd|
75
- raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
76
- pwd = (pwd == '-' ? command_parser.read_password("#{@files.last.file} password") : pwd)
77
- @files.last.password = pwd
78
- end
79
- options.on("-i", "--pages PAGES", "The pages of the last specified input file that " \
80
- "should be used (default: 1-e)") do |pages|
81
- raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
82
- @files.last.pages = pages
83
- end
84
- options.on("-e", "--empty", "Use an empty file as the first input file") do
85
- @initial_empty = true
86
- end
87
- options.on("--[no-]interleave", "Interleave the pages from the input files (default: " \
88
- "false)") do |c|
89
- @interleave = c
90
- end
91
-
92
- options.separator("")
93
- options.separator("Output file related options")
94
- options.on("--embed FILE", String, "Embed the file into the output file (can be used " \
95
- "multiple times)") do |file|
96
- @embed_files << file
97
- end
98
- options.on("--[no-]compact", "Delete unnecessary PDF objects (default: yes)") do |c|
99
- @compact = c
100
- end
101
- options.on("--object-streams MODE", [:generate, :preserve, :delete],
102
- "Handling of object streams (either generate, preserve or delete; " \
103
- "default: preserve)") do |os|
104
- @object_streams = os
105
- end
106
- options.on("--xref-streams MODE", [:generate, :preserve, :delete],
107
- "Handling of cross-reference streams (either generate, preserve or delete; " \
108
- "default: preserve)") do |x|
109
- @xref_streams = x
110
- end
111
- options.on("--streams MODE", [:compress, :preserve, :uncompress],
112
- "Handling of stream data (either compress, preserve or uncompress; default: " \
113
- "preserve)") do |streams|
114
- @streams = streams
115
- end
116
- options.on("--[no-]compress-pages", "Recompress page content streams (may take a long " \
117
- "time; default: no)") do |c|
118
- @compress_pages = c
119
- end
120
- options.on("--decrypt", "Remove any encryption") do
121
- @encryption = :remove
122
- end
123
- options.on("--encrypt", "Encrypt the output file") do
124
- @encryption = :add
125
- end
126
- options.on("--owner-password PASSWORD", String, "The owner password to be set on the " \
127
- "output file (use - for reading from standard input)") do |pwd|
128
- @encryption = :add
129
- @enc_owner_pwd = (pwd == '-' ? command_parser.read_password("Owner password") : pwd)
130
- end
131
- options.on("--user-password PASSWORD", String, "The user password to be set on the " \
132
- "output file (use - for reading from standard input)") do |pwd|
133
- @encryption = :add
134
- @enc_user_pwd = (pwd == '-' ? command_parser.read_password("User password") : pwd)
135
- end
136
- options.on("--algorithm ALGORITHM", [:aes, :arc4],
137
- "The encryption algorithm: aes or arc4 (default: aes)") do |a|
138
- @encryption = :add
139
- @enc_algorithm = a
140
- end
141
- options.on("--key-length BITS", Integer,
142
- "The encryption key length in bits (default: 128)") do |i|
143
- @encryption = :add
144
- @enc_key_length = i
145
- end
146
- options.on("--force-V4",
147
- "Force the use of encryption version 4 if key length=128 and algorithm=arc4") do
148
- @encryption = :add
149
- @enc_force_v4 = true
150
- end
151
- syms = HexaPDF::Encryption::StandardSecurityHandler::Permissions::SYMBOL_TO_PERMISSION.keys
152
- options.on("--permissions PERMS", Array,
153
- "Comma separated list of permissions to be set on the output file. Possible " \
154
- "values: #{syms.join(', ')}") do |perms|
155
- perms.map! do |perm|
156
- unless syms.include?(perm.to_sym)
157
- raise OptionParser::InvalidArgument, "#{perm} (invalid permission name)"
158
- end
159
- perm.to_sym
160
- end
161
- @encryption = :add
162
- @enc_permissions = perms
163
- end
164
-
165
- @files = []
166
- @initial_empty = false
167
- @interleave = false
168
-
58
+ @password = nil
59
+ @pages = '1-e'
169
60
  @embed_files = []
170
- @compact = true
171
- @compress_pages = false
172
- @object_streams = :preserve
173
- @xref_streams = :preserve
174
- @streams = :preserve
175
61
 
176
- @encryption = :preserve
177
- @enc_user_pwd = @enc_owner_pwd = nil
178
- @enc_key_length = 128
179
- @enc_algorithm = :aes
180
- @enc_force_v4 = false
181
- @enc_permissions = []
182
- end
183
-
184
- def execute(output_file) #:nodoc:
185
- if !@initial_empty && @files.empty?
186
- error = OptionParser::ParseError.new("At least one --file FILE or --empty is needed")
187
- error.reason = "Missing argument"
188
- raise error
62
+ options.on("--password PASSWORD", "-p", String,
63
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
64
+ @password = (pwd == '-' ? read_password : pwd)
189
65
  end
190
-
191
- # Create PDF documents for each input file
192
- cache = {}
193
- @files.each do |spec|
194
- cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
195
- decryption_opts: {password: spec.password})
196
- spec.file = cache[spec.file]
66
+ options.on("-i", "--pages PAGES", "The pages of the input file that should be used " \
67
+ "(default: 1-e)") do |pages|
68
+ @pages = pages
197
69
  end
198
-
199
- # Assemble pages
200
- target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
201
- page_tree = target.add(Type: :Pages)
202
- import_pages(page_tree)
203
- target.catalog[:Pages] = page_tree
204
-
205
- # Remove potentially imported but unused pages and page tree nodes
206
- retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
207
- retained[target.pages.root.data] = true
208
- target.each(current: false) do |obj|
209
- next unless obj.kind_of?(HexaPDF::Dictionary)
210
- if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
211
- target.delete(obj)
212
- end
70
+ options.on("-e", "--embed FILE", String, "Embed the file into the output file (can be " \
71
+ "used multiple times)") do |file|
72
+ @embed_files << file
213
73
  end
74
+ define_optimization_options
75
+ define_encryption_options
76
+ end
214
77
 
215
- # Embed the given files
216
- @embed_files.each {|file| target.files.add(file, embed: true)}
217
-
218
- # Optimize the PDF file
219
- target.task(:optimize, compact: @compact, object_streams: @object_streams,
220
- xref_streams: @xref_streams, compress_pages: @compress_pages)
221
-
222
- # Update stream filters
223
- handle_streams(target) unless @streams == :preserve
224
-
225
- # Encrypt, decrypt or do nothing
226
- if @encryption == :add
227
- target.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length,
228
- force_V4: @enc_force_v4, permissions: @enc_permissions,
229
- owner_password: @enc_owner_pwd, user_password: @enc_user_pwd)
230
- elsif @encryption == :remove
231
- target.encrypt(name: nil)
78
+ def execute(in_file, out_file) #:nodoc:
79
+ maybe_raise_on_existing_file(out_file)
80
+ HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
81
+ arrange_pages(doc) unless @pages == '1-e'
82
+ @embed_files.each {|file| doc.files.add(file, embed: true)}
83
+ apply_encryption_options(doc)
84
+ apply_optimization_options(doc)
85
+ doc.write(out_file)
232
86
  end
233
-
234
- target.write(output_file)
235
87
  rescue HexaPDF::Error => e
236
88
  $stderr.puts "Processing error : #{e.message}"
237
89
  exit(1)
238
90
  end
239
91
 
240
- def usage_arguments #:nodoc:
241
- "{--file IN_FILE | --empty} OUT_FILE"
242
- end
243
-
244
92
  private
245
93
 
246
- # Imports the pages of the document as specified with the --pages option to the given page
247
- # tree.
248
- def import_pages(page_tree)
249
- @files.each do |s|
250
- page_list = s.file.pages.to_a
251
- s.pages = command_parser.parse_pages_specification(s.pages, s.file.pages.count)
252
- s.pages.each do |arr|
253
- arr[0] = page_list[arr[0]]
254
- arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
255
- end
256
- end
257
-
258
- if @interleave
259
- max_pages_per_file = 0
260
- all = @files.each_with_index.map do |spec, findex|
261
- list = []
262
- spec.pages.each {|index, rotation| list << [spec.file, findex, index, rotation]}
263
- max_pages_per_file = list.size if list.size > max_pages_per_file
264
- list
265
- end
266
- first, *rest = *all
267
- first[max_pages_per_file - 1] ||= nil
268
- first.zip(*rest) do |slice|
269
- slice.each do |source, findex, page, rotation|
270
- next unless source
271
- import_page(page_tree, findex, page, rotation)
272
- end
273
- end
274
- else
275
- @files.each_with_index do |s, findex|
276
- s.pages.each {|page, rotation| import_page(page_tree, findex, page, rotation)}
277
- end
278
- end
279
- end
280
-
281
- # Import the page with the given +rotation+ into the page tree.
282
- def import_page(page_tree, source_index, page, rotation)
283
- if page_tree.document == page.document
94
+ # Arranges the pages of the document as specified with the --pages option.
95
+ def arrange_pages(doc)
96
+ all_pages = doc.pages.to_a
97
+ new_page_tree = doc.add(Type: :Pages)
98
+ parse_pages_specification(@pages, all_pages.length).each do |index, rotation|
99
+ page = all_pages[index]
284
100
  page.value.update(page.copy_inherited_values)
285
- page = page.deep_copy unless source_index == 0
286
- else
287
- page = page_tree.document.import(page).deep_copy
288
- end
289
- if rotation == :none
290
- page.delete(:Rotate)
291
- elsif rotation.kind_of?(Integer)
292
- page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
293
- end
294
- page_tree.document.add(page)
295
- page_tree.add_page(page)
296
- end
297
-
298
- IGNORED_FILTERS = { #:nodoc:
299
- CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true
300
- }.freeze
301
-
302
- # Applies the chosen stream mode to all streams.
303
- def handle_streams(doc)
304
- doc.each(current: false) do |obj|
305
- next if !obj.respond_to?(:set_filter) || obj[:Subtype] == :Image ||
306
- Array(obj[:Filter]).any? {|f| IGNORED_FILTERS[f]}
307
- if @streams == :compress
308
- obj.set_filter(:FlateDecode)
101
+ if rotation == :none
102
+ page.delete(:Rotate)
309
103
  else
310
- obj.set_filter(nil)
104
+ page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
311
105
  end
106
+ new_page_tree.add_page(page)
312
107
  end
108
+ doc.delete(doc.catalog.delete(:Pages))
109
+ doc.catalog[:Pages] = new_page_tree
313
110
  end
314
111
 
315
112
  end
@@ -0,0 +1,104 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/cli/command'
35
+
36
+ module HexaPDF
37
+ module CLI
38
+
39
+ # Optimizes the size of a PDF file.
40
+ class Optimize < Command
41
+
42
+ def initialize #:nodoc:
43
+ super('optimize', takes_commands: false)
44
+ short_desc("Optimize the size of a PDF file")
45
+ long_desc(<<-EOF.gsub!(/^ */, ''))
46
+ This command uses several optimization strategies to reduce the file size of the PDF file.
47
+
48
+ By default, all strategies except page compression are used since page compression may
49
+ take a very long time without much benefit.
50
+ EOF
51
+
52
+ @password = nil
53
+ @out_options.compact = true
54
+ @out_options.xref_streams = :generate
55
+ @out_options.object_streams = :generate
56
+ @out_options.streams = :compress
57
+
58
+ options.on("--password PASSWORD", "-p", String,
59
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
60
+ @password = (pwd == '-' ? read_password : pwd)
61
+ end
62
+
63
+ options.separator("")
64
+ options.separator("Optimization options")
65
+ define_optimization_options
66
+ end
67
+
68
+ def execute(in_file, out_file) #:nodoc:
69
+ maybe_raise_on_existing_file(out_file)
70
+ HexaPDF::Document.open(in_file, decryption_opts: {password: @password}) do |doc|
71
+ optimize_page_tree(doc)
72
+ apply_optimization_options(doc)
73
+ doc.write(out_file)
74
+ end
75
+ rescue HexaPDF::Error => e
76
+ $stderr.puts "Processing error : #{e.message}"
77
+ exit(1)
78
+ end
79
+
80
+ private
81
+
82
+ # Optimizes the page tree by flattening it and deleting unsed objects.
83
+ def optimize_page_tree(doc)
84
+ page_tree = doc.add(Type: :Pages)
85
+ retained = {page_tree.data => true}
86
+ doc.pages.each do |page|
87
+ page.value.update(page.copy_inherited_values)
88
+ page_tree.add_page(page)
89
+ retained[page.data] = true
90
+ end
91
+ doc.catalog[:Pages] = page_tree
92
+
93
+ doc.each(current: false) do |obj|
94
+ next unless obj.kind_of?(HexaPDF::Dictionary)
95
+ if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
96
+ doc.delete(obj)
97
+ end
98
+ end
99
+ end
100
+
101
+ end
102
+
103
+ end
104
+ end
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2016 Thomas Leitner
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2016 Thomas Leitner
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as