hexapdf 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +33 -1
  3. data/CONTRIBUTERS +1 -1
  4. data/LICENSE +1 -1
  5. data/Rakefile +1 -1
  6. data/VERSION +1 -1
  7. data/lib/hexapdf.rb +1 -1
  8. data/lib/hexapdf/cli.rb +19 -52
  9. data/lib/hexapdf/cli/command.rb +251 -0
  10. data/lib/hexapdf/cli/{extract.rb → files.rb} +19 -23
  11. data/lib/hexapdf/cli/images.rb +147 -0
  12. data/lib/hexapdf/cli/info.rb +5 -5
  13. data/lib/hexapdf/cli/inspect.rb +13 -12
  14. data/lib/hexapdf/cli/merge.rb +200 -0
  15. data/lib/hexapdf/cli/modify.rb +39 -242
  16. data/lib/hexapdf/cli/optimize.rb +104 -0
  17. data/lib/hexapdf/configuration.rb +1 -1
  18. data/lib/hexapdf/content.rb +1 -1
  19. data/lib/hexapdf/content/canvas.rb +1 -1
  20. data/lib/hexapdf/content/color_space.rb +1 -1
  21. data/lib/hexapdf/content/graphic_object.rb +1 -1
  22. data/lib/hexapdf/content/graphic_object/arc.rb +1 -1
  23. data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +1 -1
  24. data/lib/hexapdf/content/graphic_object/solid_arc.rb +1 -1
  25. data/lib/hexapdf/content/graphics_state.rb +1 -1
  26. data/lib/hexapdf/content/operator.rb +1 -1
  27. data/lib/hexapdf/content/parser.rb +16 -15
  28. data/lib/hexapdf/content/processor.rb +1 -1
  29. data/lib/hexapdf/content/transformation_matrix.rb +1 -1
  30. data/lib/hexapdf/data_dir.rb +1 -1
  31. data/lib/hexapdf/dictionary.rb +1 -1
  32. data/lib/hexapdf/dictionary_fields.rb +1 -1
  33. data/lib/hexapdf/document.rb +1 -1
  34. data/lib/hexapdf/document/files.rb +1 -1
  35. data/lib/hexapdf/document/fonts.rb +1 -1
  36. data/lib/hexapdf/document/images.rb +1 -1
  37. data/lib/hexapdf/document/pages.rb +1 -1
  38. data/lib/hexapdf/encryption.rb +1 -1
  39. data/lib/hexapdf/encryption/aes.rb +1 -1
  40. data/lib/hexapdf/encryption/arc4.rb +1 -1
  41. data/lib/hexapdf/encryption/fast_aes.rb +1 -1
  42. data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
  43. data/lib/hexapdf/encryption/identity.rb +1 -1
  44. data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
  45. data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
  46. data/lib/hexapdf/encryption/security_handler.rb +1 -1
  47. data/lib/hexapdf/encryption/standard_security_handler.rb +1 -1
  48. data/lib/hexapdf/error.rb +1 -1
  49. data/lib/hexapdf/filter.rb +1 -1
  50. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  51. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
  52. data/lib/hexapdf/filter/dct_decode.rb +1 -1
  53. data/lib/hexapdf/filter/encryption.rb +1 -1
  54. data/lib/hexapdf/filter/flate_decode.rb +1 -1
  55. data/lib/hexapdf/filter/jpx_decode.rb +1 -1
  56. data/lib/hexapdf/filter/lzw_decode.rb +2 -3
  57. data/lib/hexapdf/filter/predictor.rb +11 -11
  58. data/lib/hexapdf/filter/run_length_decode.rb +1 -1
  59. data/lib/hexapdf/font/cmap.rb +1 -1
  60. data/lib/hexapdf/font/cmap/parser.rb +1 -1
  61. data/lib/hexapdf/font/cmap/writer.rb +1 -1
  62. data/lib/hexapdf/font/encoding.rb +1 -1
  63. data/lib/hexapdf/font/encoding/base.rb +1 -1
  64. data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
  65. data/lib/hexapdf/font/encoding/glyph_list.rb +1 -1
  66. data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
  67. data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +1 -1
  68. data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
  69. data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
  70. data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +1 -1
  71. data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
  72. data/lib/hexapdf/font/true_type.rb +2 -1
  73. data/lib/hexapdf/font/true_type/font.rb +1 -1
  74. data/lib/hexapdf/font/true_type/subsetter.rb +186 -0
  75. data/lib/hexapdf/font/true_type/table.rb +8 -4
  76. data/lib/hexapdf/font/true_type/table/cmap.rb +1 -1
  77. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +1 -1
  78. data/lib/hexapdf/font/true_type/table/directory.rb +1 -1
  79. data/lib/hexapdf/font/true_type/table/glyf.rb +6 -2
  80. data/lib/hexapdf/font/true_type/table/head.rb +2 -2
  81. data/lib/hexapdf/font/true_type/table/hhea.rb +1 -1
  82. data/lib/hexapdf/font/true_type/table/hmtx.rb +1 -1
  83. data/lib/hexapdf/font/true_type/table/loca.rb +1 -1
  84. data/lib/hexapdf/font/true_type/table/maxp.rb +1 -1
  85. data/lib/hexapdf/font/true_type/table/name.rb +1 -1
  86. data/lib/hexapdf/font/true_type/table/os2.rb +1 -1
  87. data/lib/hexapdf/font/true_type/table/post.rb +1 -1
  88. data/lib/hexapdf/font/true_type_wrapper.rb +56 -8
  89. data/lib/hexapdf/font/type1.rb +1 -1
  90. data/lib/hexapdf/font/type1/afm_parser.rb +1 -1
  91. data/lib/hexapdf/font/type1/character_metrics.rb +1 -1
  92. data/lib/hexapdf/font/type1/font.rb +1 -1
  93. data/lib/hexapdf/font/type1/font_metrics.rb +1 -1
  94. data/lib/hexapdf/font/type1/pfb_parser.rb +1 -1
  95. data/lib/hexapdf/font/type1_wrapper.rb +1 -1
  96. data/lib/hexapdf/font_loader.rb +1 -1
  97. data/lib/hexapdf/font_loader/from_configuration.rb +6 -3
  98. data/lib/hexapdf/font_loader/standard14.rb +1 -1
  99. data/lib/hexapdf/image_loader.rb +1 -1
  100. data/lib/hexapdf/image_loader/jpeg.rb +1 -1
  101. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  102. data/lib/hexapdf/image_loader/png.rb +1 -1
  103. data/lib/hexapdf/importer.rb +1 -1
  104. data/lib/hexapdf/name_tree_node.rb +1 -1
  105. data/lib/hexapdf/number_tree_node.rb +1 -1
  106. data/lib/hexapdf/object.rb +1 -1
  107. data/lib/hexapdf/parser.rb +1 -1
  108. data/lib/hexapdf/rectangle.rb +1 -1
  109. data/lib/hexapdf/reference.rb +1 -1
  110. data/lib/hexapdf/revision.rb +1 -1
  111. data/lib/hexapdf/revisions.rb +13 -15
  112. data/lib/hexapdf/serializer.rb +7 -3
  113. data/lib/hexapdf/stream.rb +1 -1
  114. data/lib/hexapdf/task.rb +1 -1
  115. data/lib/hexapdf/task/dereference.rb +1 -1
  116. data/lib/hexapdf/task/optimize.rb +1 -1
  117. data/lib/hexapdf/tokenizer.rb +12 -12
  118. data/lib/hexapdf/type.rb +1 -1
  119. data/lib/hexapdf/type/catalog.rb +1 -1
  120. data/lib/hexapdf/type/embedded_file.rb +1 -1
  121. data/lib/hexapdf/type/file_specification.rb +1 -1
  122. data/lib/hexapdf/type/font.rb +1 -1
  123. data/lib/hexapdf/type/font_descriptor.rb +1 -1
  124. data/lib/hexapdf/type/font_simple.rb +1 -1
  125. data/lib/hexapdf/type/font_true_type.rb +1 -1
  126. data/lib/hexapdf/type/font_type1.rb +1 -1
  127. data/lib/hexapdf/type/form.rb +1 -1
  128. data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
  129. data/lib/hexapdf/type/image.rb +187 -1
  130. data/lib/hexapdf/type/info.rb +1 -1
  131. data/lib/hexapdf/type/names.rb +1 -1
  132. data/lib/hexapdf/type/object_stream.rb +1 -1
  133. data/lib/hexapdf/type/page.rb +1 -1
  134. data/lib/hexapdf/type/page_tree_node.rb +6 -1
  135. data/lib/hexapdf/type/resources.rb +1 -1
  136. data/lib/hexapdf/type/trailer.rb +2 -2
  137. data/lib/hexapdf/type/viewer_preferences.rb +1 -1
  138. data/lib/hexapdf/type/xref_stream.rb +22 -18
  139. data/lib/hexapdf/utils/bit_field.rb +1 -1
  140. data/lib/hexapdf/utils/bit_stream.rb +16 -32
  141. data/lib/hexapdf/utils/lru_cache.rb +1 -1
  142. data/lib/hexapdf/utils/math_helpers.rb +1 -1
  143. data/lib/hexapdf/utils/object_hash.rb +1 -1
  144. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  145. data/lib/hexapdf/utils/sorted_tree_node.rb +1 -1
  146. data/lib/hexapdf/version.rb +2 -2
  147. data/lib/hexapdf/writer.rb +2 -1
  148. data/lib/hexapdf/xref_section.rb +6 -1
  149. data/man/man1/hexapdf.1 +194 -115
  150. data/test/data/images/greyscale-1bit.png +0 -0
  151. data/test/data/images/greyscale-2bit.png +0 -0
  152. data/test/data/images/greyscale-8bit.png +0 -0
  153. data/test/data/images/indexed-alpha-4bit.png +0 -0
  154. data/test/data/images/truecolour-8bit.png +0 -0
  155. data/test/hexapdf/content/test_operator.rb +8 -8
  156. data/test/hexapdf/content/test_processor.rb +1 -1
  157. data/test/hexapdf/encryption/test_security_handler.rb +1 -1
  158. data/test/hexapdf/font/test_true_type_wrapper.rb +89 -48
  159. data/test/hexapdf/font/true_type/table/test_glyf.rb +1 -0
  160. data/test/hexapdf/font/true_type/test_subsetter.rb +70 -0
  161. data/test/hexapdf/font/true_type/test_table.rb +16 -0
  162. data/test/hexapdf/font_loader/test_from_configuration.rb +7 -0
  163. data/test/hexapdf/test_document.rb +1 -1
  164. data/test/hexapdf/test_object.rb +1 -1
  165. data/test/hexapdf/test_revisions.rb +34 -8
  166. data/test/hexapdf/test_serializer.rb +3 -0
  167. data/test/hexapdf/test_writer.rb +11 -2
  168. data/test/hexapdf/test_xref_section.rb +15 -0
  169. data/test/hexapdf/type/test_image.rb +234 -0
  170. data/test/hexapdf/type/test_object_stream.rb +2 -2
  171. data/test/hexapdf/type/test_trailer.rb +4 -0
  172. data/test/hexapdf/utils/test_bit_stream.rb +69 -0
  173. metadata +14 -6
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2016 Thomas Leitner
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -31,28 +31,28 @@
31
31
  # is created or manipulated using HexaPDF.
32
32
  #++
33
33
 
34
- require 'hexapdf/cli'
34
+ require 'hexapdf/cli/command'
35
35
 
36
36
  module HexaPDF
37
37
  module CLI
38
38
 
39
- # Extracts files from a PDF file.
39
+ # Lists or extracts embedded files from a PDF file.
40
40
  #
41
41
  # See: HexaPDF::Type::EmbeddedFile
42
- class Extract < CmdParse::Command
42
+ class Files < Command
43
43
 
44
44
  def initialize #:nodoc:
45
- super('extract', takes_commands: false)
46
- short_desc("Extract files from a PDF file")
45
+ super('files', takes_commands: false)
46
+ short_desc("List or extract embedded files from a PDF file")
47
47
  long_desc(<<-EOF.gsub!(/^ */, ''))
48
- This command extracts files embedded in a PDF file. If the option --indices is not given,
49
- the available files are listed with their names and indices. The --indices option can then
50
- be used to extract one or more files.
48
+ If the option --extract is not given, the available files are listed with their names and
49
+ indices. The --extract option can then be used to extract one or more files.
51
50
  EOF
52
- options.on("--indices a,b,c", "-i a,b,c,...", Array,
53
- "The indices of the files that should be extracted. Use 0 to extract " \
54
- "all files.") do |indices|
55
- @indices = indices.map(&:to_i)
51
+
52
+ options.on("--extract [a,b,c,...]", "-e [a,b,c,...]", Array,
53
+ "The indices of the files that should be extracted. Use 0 or no argument to " \
54
+ "extract all files.") do |indices|
55
+ @indices = (indices ? indices.map(&:to_i) : [0])
56
56
  end
57
57
  options.on("--[no-]search", "-s", "Search the whole PDF instead of the " \
58
58
  "standard locations (default: false)") do |search|
@@ -60,24 +60,22 @@ module HexaPDF
60
60
  end
61
61
  options.on("--password PASSWORD", "-p", String,
62
62
  "The password for decryption. Use - for reading from standard input.") do |pwd|
63
- @password = (pwd == '-' ? command_parser.read_password : pwd)
63
+ @password = (pwd == '-' ? read_password : pwd)
64
64
  end
65
+
65
66
  @indices = []
66
- @password = ''
67
+ @password = nil
67
68
  @search = false
68
69
  end
69
70
 
70
- def execute(file) #:nodoc:
71
- HexaPDF::Document.open(file, decryption_opts: {password: @password}) do |doc|
71
+ def execute(pdf) #:nodoc:
72
+ HexaPDF::Document.open(pdf, decryption_opts: {password: @password}) do |doc|
72
73
  if @indices.empty?
73
74
  list_files(doc)
74
75
  else
75
76
  extract_files(doc)
76
77
  end
77
78
  end
78
- rescue HexaPDF::Error => e
79
- $stderr.puts "Error while processing the PDF file: #{e.message}"
80
- exit(1)
81
79
  end
82
80
 
83
81
  private
@@ -104,9 +102,7 @@ module HexaPDF
104
102
  def extract_files(doc)
105
103
  each_file(doc) do |obj, index|
106
104
  next unless @indices.include?(index + 1) || @indices.include?(0)
107
- if File.exist?(obj.path)
108
- raise HexaPDF::Error, "Output file #{obj.path} already exists, not overwriting"
109
- end
105
+ maybe_raise_on_existing_file(obj.path)
110
106
  puts "Extracting #{obj.path}..."
111
107
  File.open(obj.path, 'wb') do |file|
112
108
  fiber = obj.embedded_file_stream.stream_decoder
@@ -0,0 +1,147 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/cli/command'
35
+
36
+ module HexaPDF
37
+ module CLI
38
+
39
+ # Lists or extracts images from a PDF file.
40
+ #
41
+ # See: HexaPDF::Type::Image
42
+ class Images < Command
43
+
44
+ def initialize #:nodoc:
45
+ super('images', takes_commands: false)
46
+ short_desc("List or extract images from a PDF file")
47
+ long_desc(<<-EOF.gsub!(/^ */, ''))
48
+ If the option --extract is not given, the available images are listed with their index and
49
+ additional information, sorted by page number. The --extract option can then be used to
50
+ extract one or more images, saving them to files called `prefix-n.ext` where the prefix
51
+ can be set via --prefix, n is the index and ext is either png, jpg or jpx.
52
+ EOF
53
+
54
+ options.on("--extract [A,B,C,...]", "-e [A,B,C,...]", Array,
55
+ "The indices of the images that should be extracted. Use 0 or no argument to " \
56
+ "extract all images.") do |indices|
57
+ @indices = (indices ? indices.map(&:to_i) : [0])
58
+ end
59
+ options.on("--prefix PREFIX", String,
60
+ "The prefix to use when saving images. May include directories. Default: " \
61
+ "image.") do |prefix|
62
+ @prefix = prefix
63
+ end
64
+ options.on("--[no-]search", "-s", "Search the whole PDF instead of the " \
65
+ "standard locations (default: false)") do |search|
66
+ @search = search
67
+ end
68
+ options.on("--password PASSWORD", "-p", String,
69
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
70
+ @password = (pwd == '-' ? read_password : pwd)
71
+ end
72
+
73
+ @indices = []
74
+ @prefix = 'image'
75
+ @password = nil
76
+ @search = false
77
+ end
78
+
79
+ def execute(pdf) #:nodoc:
80
+ HexaPDF::Document.open(pdf, decryption_opts: {password: @password}) do |doc|
81
+ if @indices.empty?
82
+ list_images(doc)
83
+ else
84
+ extract_images(doc)
85
+ end
86
+ end
87
+ end
88
+
89
+ private
90
+
91
+ # Outputs a table with the images of the PDF document.
92
+ def list_images(doc)
93
+ printf("%5s %5s %9s %6s %6s %5s %4s %3s %5s %8s\n",
94
+ "index", "page", "oid", "width", "height", "color", "comp", "bpc", "type",
95
+ "writable")
96
+ puts("-" * 65)
97
+ each_image(doc) do |image, index, pindex|
98
+ info = image.info
99
+ printf("%5i %5s %9s %6i %6i %5s %4i %3i %5s %8s\n",
100
+ index, pindex || '-', "#{image.oid},#{image.gen}", info.width, info.height,
101
+ info.color_space, info.components, info.bits_per_component, info.type,
102
+ info.writable)
103
+ end
104
+ end
105
+
106
+
107
+ # Extracts the images with the given indices.
108
+ def extract_images(doc)
109
+ each_image(doc) do |image, index, _|
110
+ next unless @indices.include?(index) || @indices.include?(0)
111
+ path = "#{@prefix}-#{index}.#{image.info.extension}"
112
+ maybe_raise_on_existing_file(path)
113
+ puts "Extracting #{path}..."
114
+ image.write(path)
115
+ end
116
+ end
117
+
118
+ # Iterates over all images.
119
+ def each_image(doc) # :yields: obj, index, page_index
120
+ index = 1
121
+ seen = {}
122
+
123
+ doc.pages.each_with_index do |page, pindex|
124
+ page.resources[:XObject]&.each do |_name, xobject|
125
+ if seen[xobject]
126
+ yield(xobject, seen[xobject], pindex + 1)
127
+ elsif xobject[:Subtype] == :Image && !xobject[:ImageMask]
128
+ yield(xobject, index, pindex + 1)
129
+ seen[xobject] = index
130
+ index += 1
131
+ end
132
+ end
133
+ end
134
+
135
+ if @search
136
+ doc.images.each do |image|
137
+ next if seen[image]
138
+ yield(image, index, nil)
139
+ index += 1
140
+ end
141
+ end
142
+ end
143
+
144
+ end
145
+
146
+ end
147
+ end
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2016 Thomas Leitner
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -31,7 +31,7 @@
31
31
  # is created or manipulated using HexaPDF.
32
32
  #++
33
33
 
34
- require 'hexapdf/cli'
34
+ require 'hexapdf/cli/command'
35
35
 
36
36
  module HexaPDF
37
37
  module CLI
@@ -44,7 +44,7 @@ module HexaPDF
44
44
  # * The used PDF version
45
45
  #
46
46
  # See: HexaPDF::Type::Info, HexaPDF::Encryption::SecurityHandler
47
- class Info < CmdParse::Command
47
+ class Info < Command
48
48
 
49
49
  def initialize #:nodoc:
50
50
  super('info', takes_commands: false)
@@ -55,9 +55,9 @@ module HexaPDF
55
55
  EOF
56
56
  options.on("--password PASSWORD", "-p", String,
57
57
  "The password for decryption. Use - for reading from standard input.") do |pwd|
58
- @password = (pwd == '-' ? command_parser.read_password : pwd)
58
+ @password = (pwd == '-' ? read_password : pwd)
59
59
  end
60
- @password = ''
60
+ @password = nil
61
61
  @auto_decrypt = true
62
62
  end
63
63
 
@@ -4,7 +4,7 @@
4
4
  # This file is part of HexaPDF.
5
5
  #
6
6
  # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
- # Copyright (C) 2016 Thomas Leitner
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
8
  #
9
9
  # HexaPDF is free software: you can redistribute it and/or modify it
10
10
  # under the terms of the GNU Affero General Public License version 3 as
@@ -31,13 +31,13 @@
31
31
  # is created or manipulated using HexaPDF.
32
32
  #++
33
33
 
34
- require 'hexapdf/cli'
34
+ require 'hexapdf/cli/command'
35
35
 
36
36
  module HexaPDF
37
37
  module CLI
38
38
 
39
39
  # Shows the internal structure of a PDF file.
40
- class Inspect < CmdParse::Command
40
+ class Inspect < Command
41
41
 
42
42
  def initialize #:nodoc:
43
43
  super('inspect', takes_commands: false)
@@ -47,13 +47,13 @@ module HexaPDF
47
47
  needs to inspect the internal object structure or a stream of a PDF file. A PDF object is
48
48
  always shown in the PDF syntax.
49
49
 
50
- If no option is given, the main PDF object, the catalog, is shown. Otherwise the various,
51
- mutually exclusive display options define the shown content. If multiple such options are
52
- specified only the last is respected.
50
+ If no option is given, the PDF trailer is shown. Otherwise the various, mutually exclusive
51
+ display options define the shown content. If multiple such options are specified only the
52
+ last is respected.
53
53
  EOF
54
54
 
55
- options.on("-t", "--trailer", "Show the trailer dictionary.") do
56
- @exec = :trailer
55
+ options.on("--catalog", "Show the PDF catalog dictionary.") do
56
+ @exec = :catalog
57
57
  end
58
58
  options.on("-c", "--page-count", "Print the number of pages.") do
59
59
  @exec = :page_count
@@ -84,11 +84,11 @@ module HexaPDF
84
84
  options.separator("")
85
85
  options.on("--password PASSWORD", "-p", String,
86
86
  "The password for decryption. Use - for reading from standard input.") do |pwd|
87
- @password = (pwd == '-' ? command_parser.read_password : pwd)
87
+ @password = (pwd == '-' ? read_password : pwd)
88
88
  end
89
89
 
90
90
  @password = nil
91
- @exec = :catalog
91
+ @exec = :trailer
92
92
  @param = nil
93
93
  @raw = nil
94
94
  end
@@ -117,9 +117,10 @@ module HexaPDF
117
117
  end
118
118
 
119
119
  def do_pages(doc) #:nodoc:
120
- pages = command_parser.parse_pages_specification(@param, doc.pages.count)
120
+ pages = parse_pages_specification(@param, doc.pages.count)
121
+ page_list = doc.pages.to_a
121
122
  pages.each do |index, _|
122
- page = doc.pages[index]
123
+ page = page_list[index]
123
124
  str = "page #{index + 1} (#{page.oid},#{page.gen}): "
124
125
  str << Array(page[:Contents]).map {|c| "#{c.oid},#{c.gen}"}.join(" ")
125
126
  puts str
@@ -0,0 +1,200 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/cli/command'
35
+
36
+ module HexaPDF
37
+ module CLI
38
+
39
+ # Merges pages from multiple PDF files.
40
+ class Merge < Command
41
+
42
+ InputSpec = Struct.new(:file, :pages, :password) #:nodoc:
43
+
44
+ def initialize #:nodoc:
45
+ super('merge', takes_commands: false)
46
+ short_desc("Merge multiple PDF files")
47
+ long_desc(<<-EOF.gsub!(/^ */, ''))
48
+ This command merges pages from multiple PDFs into one output file which can optionally be
49
+ encrypted/decrypted and optimized in various ways.
50
+
51
+ The first input file is the primary input file from which meta data like file information,
52
+ outlines, etc. are taken from. Alternatively, it is possible to start with an empty PDF
53
+ file by using --empty. The order of the files is important as they are used in that order.
54
+
55
+ Also note that the --password and --pages options apply to the last preceeding input file.
56
+ EOF
57
+
58
+ options.on(/.*/, "Input file, can be specified multiple times") do |file|
59
+ @files << InputSpec.new(file, '1-e')
60
+ throw :prune
61
+ end
62
+ options.on("-p", "--password PASSWORD", String, "The password for decrypting the last " \
63
+ "specified input file (use - for reading from standard input)") do |pwd|
64
+ raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
65
+ pwd = (pwd == '-' ? read_password("#{@files.last.file} password") : pwd)
66
+ @files.last.password = pwd
67
+ end
68
+ options.on("-i", "--pages PAGES", "The pages of the last specified input file that " \
69
+ "should be used (default: 1-e)") do |pages|
70
+ raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
71
+ @files.last.pages = pages
72
+ end
73
+ options.on("-e", "--empty", "Use an empty file as the first input file") do
74
+ @initial_empty = true
75
+ end
76
+ options.on("--[no-]interleave", "Interleave the pages from the input files (default: " \
77
+ "false)") do |c|
78
+ @interleave = c
79
+ end
80
+
81
+ options.separator("")
82
+ options.separator("Output related options")
83
+ define_optimization_options
84
+ define_encryption_options
85
+
86
+ @files = []
87
+ @initial_empty = false
88
+ @interleave = false
89
+ end
90
+
91
+ def execute #:nodoc:
92
+ if !@initial_empty && @files.empty?
93
+ error = OptionParser::ParseError.new("At least one FILE or --empty is needed")
94
+ error.reason = "Missing argument"
95
+ raise error
96
+ elsif (@initial_empty && @files.empty?) || (!@initial_empty && @files.length < 2)
97
+ error = OptionParser::ParseError.new("Output file is needed")
98
+ error.reason = "Missing argument"
99
+ raise error
100
+ end
101
+
102
+ output_file = @files.pop.file
103
+ maybe_raise_on_existing_file(output_file)
104
+
105
+ # Create PDF documents for each input file
106
+ cache = {}
107
+ @files.each do |spec|
108
+ cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
109
+ decryption_opts: {password: spec.password})
110
+ spec.file = cache[spec.file]
111
+ end
112
+
113
+ # Assemble pages
114
+ target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
115
+ page_tree = target.add(Type: :Pages)
116
+ import_pages(page_tree)
117
+ target.catalog[:Pages] = page_tree
118
+
119
+ # Remove potentially imported but unused pages and page tree nodes
120
+ retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
121
+ retained[target.pages.root.data] = true
122
+ target.each(current: false) do |obj|
123
+ next unless obj.kind_of?(HexaPDF::Dictionary)
124
+ if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
125
+ target.delete(obj)
126
+ end
127
+ end
128
+
129
+ apply_encryption_options(target)
130
+ apply_optimization_options(target)
131
+
132
+ target.write(output_file)
133
+ rescue HexaPDF::Error => e
134
+ $stderr.puts "Processing error : #{e.message}"
135
+ exit(1)
136
+ end
137
+
138
+ def usage #:nodoc:
139
+ "Usage: #{command_parser.main_options.program_name} merge [options] {FILE | --empty} " \
140
+ "[FILE]... OUT_FILE"
141
+ end
142
+
143
+ private
144
+
145
+ # Imports the pages of the document as specified with the --pages option to the given page
146
+ # tree.
147
+ def import_pages(page_tree)
148
+ @files.each do |s|
149
+ page_list = s.file.pages.to_a
150
+ s.pages = parse_pages_specification(s.pages, s.file.pages.count)
151
+ s.pages.each do |arr|
152
+ arr[0] = page_list[arr[0]]
153
+ arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
154
+ end
155
+ end
156
+
157
+ if @interleave
158
+ max_pages_per_file = 0
159
+ all = @files.each_with_index.map do |spec, findex|
160
+ list = []
161
+ spec.pages.each {|index, rotation| list << [spec.file, findex, index, rotation]}
162
+ max_pages_per_file = list.size if list.size > max_pages_per_file
163
+ list
164
+ end
165
+ first, *rest = *all
166
+ first[max_pages_per_file - 1] ||= nil
167
+ first.zip(*rest) do |slice|
168
+ slice.each do |source, findex, page, rotation|
169
+ next unless source
170
+ import_page(page_tree, findex, page, rotation)
171
+ end
172
+ end
173
+ else
174
+ @files.each_with_index do |s, findex|
175
+ s.pages.each {|page, rotation| import_page(page_tree, findex, page, rotation)}
176
+ end
177
+ end
178
+ end
179
+
180
+ # Import the page with the given +rotation+ into the page tree.
181
+ def import_page(page_tree, source_index, page, rotation)
182
+ if page_tree.document == page.document
183
+ page.value.update(page.copy_inherited_values)
184
+ page = page.deep_copy unless source_index == 0
185
+ else
186
+ page = page_tree.document.import(page).deep_copy
187
+ end
188
+ if rotation == :none
189
+ page.delete(:Rotate)
190
+ elsif rotation.kind_of?(Integer)
191
+ page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
192
+ end
193
+ page_tree.document.add(page)
194
+ page_tree.add_page(page)
195
+ end
196
+
197
+ end
198
+
199
+ end
200
+ end