hexapdf 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -0
  3. data/CONTRIBUTERS +1 -1
  4. data/Rakefile +3 -3
  5. data/VERSION +1 -1
  6. data/examples/arc.rb +1 -1
  7. data/examples/graphics.rb +1 -1
  8. data/examples/hello_world.rb +1 -1
  9. data/examples/merging.rb +1 -1
  10. data/examples/show_char_bboxes.rb +1 -1
  11. data/examples/standard_pdf_fonts.rb +1 -1
  12. data/examples/truetype.rb +1 -1
  13. data/lib/hexapdf/cli.rb +14 -7
  14. data/lib/hexapdf/cli/extract.rb +1 -1
  15. data/lib/hexapdf/cli/info.rb +2 -2
  16. data/lib/hexapdf/cli/inspect.rb +4 -4
  17. data/lib/hexapdf/cli/modify.rb +151 -51
  18. data/lib/hexapdf/configuration.rb +1 -1
  19. data/lib/hexapdf/content/canvas.rb +1 -1
  20. data/lib/hexapdf/content/processor.rb +1 -1
  21. data/lib/hexapdf/dictionary.rb +6 -19
  22. data/lib/hexapdf/dictionary_fields.rb +1 -1
  23. data/lib/hexapdf/document.rb +23 -16
  24. data/lib/hexapdf/document/files.rb +130 -0
  25. data/lib/hexapdf/{font_utils.rb → document/fonts.rb} +40 -38
  26. data/lib/hexapdf/document/images.rb +117 -0
  27. data/lib/hexapdf/document/pages.rb +125 -0
  28. data/lib/hexapdf/encryption/aes.rb +1 -1
  29. data/lib/hexapdf/encryption/ruby_aes.rb +10 -10
  30. data/lib/hexapdf/encryption/standard_security_handler.rb +11 -8
  31. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  32. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -6
  33. data/lib/hexapdf/font/cmap/writer.rb +5 -7
  34. data/lib/hexapdf/font/true_type.rb +4 -1
  35. data/lib/hexapdf/font/true_type/font.rb +8 -16
  36. data/lib/hexapdf/font/true_type/table.rb +5 -16
  37. data/lib/hexapdf/font/true_type/table/cmap.rb +2 -7
  38. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +2 -6
  39. data/lib/hexapdf/font/true_type/table/directory.rb +0 -5
  40. data/lib/hexapdf/font/true_type/table/glyf.rb +3 -11
  41. data/lib/hexapdf/font/true_type/table/head.rb +0 -12
  42. data/lib/hexapdf/font/true_type/table/hhea.rb +0 -7
  43. data/lib/hexapdf/font/true_type/table/hmtx.rb +1 -5
  44. data/lib/hexapdf/font/true_type/table/loca.rb +0 -4
  45. data/lib/hexapdf/font/true_type/table/maxp.rb +0 -8
  46. data/lib/hexapdf/font/true_type/table/name.rb +3 -17
  47. data/lib/hexapdf/font/true_type/table/os2.rb +0 -14
  48. data/lib/hexapdf/font/true_type/table/post.rb +0 -8
  49. data/lib/hexapdf/font/true_type_wrapper.rb +1 -1
  50. data/lib/hexapdf/font/type1.rb +2 -2
  51. data/lib/hexapdf/font/type1/font.rb +2 -1
  52. data/lib/hexapdf/font/type1/font_metrics.rb +10 -1
  53. data/lib/hexapdf/font/type1_wrapper.rb +2 -1
  54. data/lib/hexapdf/font_loader/from_configuration.rb +1 -1
  55. data/lib/hexapdf/font_loader/standard14.rb +1 -1
  56. data/lib/hexapdf/image_loader/jpeg.rb +1 -1
  57. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  58. data/lib/hexapdf/image_loader/png.rb +2 -2
  59. data/lib/hexapdf/object.rb +18 -5
  60. data/lib/hexapdf/rectangle.rb +8 -1
  61. data/lib/hexapdf/revisions.rb +4 -2
  62. data/lib/hexapdf/serializer.rb +3 -3
  63. data/lib/hexapdf/stream.rb +3 -2
  64. data/lib/hexapdf/task/dereference.rb +4 -5
  65. data/lib/hexapdf/task/optimize.rb +6 -3
  66. data/lib/hexapdf/tokenizer.rb +3 -3
  67. data/lib/hexapdf/type/file_specification.rb +2 -2
  68. data/lib/hexapdf/type/form.rb +19 -0
  69. data/lib/hexapdf/type/page.rb +21 -6
  70. data/lib/hexapdf/type/page_tree_node.rb +27 -34
  71. data/lib/hexapdf/utils/bit_stream.rb +1 -1
  72. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  73. data/lib/hexapdf/version.rb +1 -1
  74. data/man/man1/hexapdf.1 +259 -187
  75. data/test/hexapdf/content/graphic_object/test_arc.rb +1 -1
  76. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +1 -1
  77. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +1 -1
  78. data/test/hexapdf/content/test_canvas.rb +1 -1
  79. data/test/hexapdf/document/test_files.rb +71 -0
  80. data/test/hexapdf/{test_font_utils.rb → document/test_fonts.rb} +1 -2
  81. data/test/hexapdf/document/test_images.rb +78 -0
  82. data/test/hexapdf/document/test_pages.rb +114 -0
  83. data/test/hexapdf/encryption/test_standard_security_handler.rb +26 -5
  84. data/test/hexapdf/font/test_true_type_wrapper.rb +1 -1
  85. data/test/hexapdf/font/true_type/common.rb +0 -4
  86. data/test/hexapdf/font/true_type/table/test_cmap.rb +0 -6
  87. data/test/hexapdf/font/true_type/table/test_directory.rb +0 -5
  88. data/test/hexapdf/font/true_type/table/test_glyf.rb +5 -8
  89. data/test/hexapdf/font/true_type/table/test_head.rb +0 -20
  90. data/test/hexapdf/font/true_type/table/test_hhea.rb +0 -7
  91. data/test/hexapdf/font/true_type/table/test_hmtx.rb +2 -7
  92. data/test/hexapdf/font/true_type/table/test_loca.rb +4 -8
  93. data/test/hexapdf/font/true_type/table/test_maxp.rb +0 -7
  94. data/test/hexapdf/font/true_type/table/test_name.rb +0 -19
  95. data/test/hexapdf/font/true_type/table/test_os2.rb +0 -8
  96. data/test/hexapdf/font/true_type/table/test_post.rb +0 -13
  97. data/test/hexapdf/font/true_type/test_font.rb +14 -38
  98. data/test/hexapdf/font/true_type/test_table.rb +0 -9
  99. data/test/hexapdf/font/type1/test_font_metrics.rb +22 -0
  100. data/test/hexapdf/task/test_dereference.rb +5 -1
  101. data/test/hexapdf/task/test_optimize.rb +1 -1
  102. data/test/hexapdf/test_dictionary.rb +4 -0
  103. data/test/hexapdf/test_document.rb +0 -7
  104. data/test/hexapdf/test_importer.rb +4 -4
  105. data/test/hexapdf/test_object.rb +31 -9
  106. data/test/hexapdf/test_rectangle.rb +18 -0
  107. data/test/hexapdf/test_revisions.rb +7 -0
  108. data/test/hexapdf/test_serializer.rb +6 -0
  109. data/test/hexapdf/test_writer.rb +2 -2
  110. data/test/hexapdf/type/test_form.rb +12 -0
  111. data/test/hexapdf/type/test_page.rb +39 -20
  112. data/test/hexapdf/type/test_page_tree_node.rb +28 -21
  113. metadata +21 -9
  114. data/lib/hexapdf/document_utils.rb +0 -209
  115. data/test/hexapdf/test_document_utils.rb +0 -144
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1c10b3600c81488af01c877e24fc31aa859968e8
4
- data.tar.gz: 6bdde619dae45921219a100612fd67e3fd1e1317
3
+ metadata.gz: 874a5d2e5a2408ceed91a9bc61df138248fbbcd2
4
+ data.tar.gz: 894e67110daae93c1479a5ced992a84dabc66cd1
5
5
  SHA512:
6
- metadata.gz: 9b79e62cf749a111b4a81a6ce7dab39dfca4b66ba47cd9961ab7452fb75a75373bb01a088659d7bca31429969341b980e75a3a39008525effb2bd116d0162a69
7
- data.tar.gz: b09a691c6f92f6035a73f226ec0d566f2d48a18082478a03037ea4dd0dded3698d25a0e403aa7f51cea8d9481bafd923f8de8d9510a961994c0f5e733aa12bbf
6
+ metadata.gz: 9d7c18191ea0d64992fc01b05904c3324cc1f32e98fe848bc931970b53f42d9ab6878d4e8c3730a570c607f677dfdaa8a8adebe7ab4c8e279cc13383124dee1b
7
+ data.tar.gz: 1e080dd694ad58c3256a647a98a9b03c439ae340311b037fad3b1bdbe436ffbed74fec0c00af3df68540ca32aa75336c328a317a74233428dcf16d8920cc8e75
data/CHANGELOG.md ADDED
@@ -0,0 +1,56 @@
1
+ ## 0.2.0 - unreleased
2
+
3
+ ### Added
4
+
5
+ * PDF file merge ability to `hexapdf modify`, i.e. adding pages from other PDFs
6
+ * Page interleaving support to 'hexapdf modify'
7
+ * Step values in pages definitions for CLI commands
8
+ * Convenience class for working with pages through [HexaPDF::Document#pages]
9
+ with a more Ruby-like interface
10
+ * Method [HexaPDF::Type::Form#canvas]
11
+ * Method [HexaPDF::Type::Page#index]
12
+ * Validation for [HexaPDF::Rectangle] objects
13
+ * [HexaPDF::Font::Type1::FontMetrics#weight_class] for returning the numeric
14
+ weight
15
+
16
+ ### Changed
17
+
18
+ * Refactor document utilities into own classes with a more Ruby-like interface;
19
+ concern fonts, images and files, now accessible through
20
+ [HexaPDF::Document#fonts], [HexaPDF::Document#images] and
21
+ [HexaPDF::Document#files]
22
+ * Validate nested collection values in [HexaPDF::Object]
23
+ * Allow [HexaPDF::Dictionary#[]] to always unwrap nil values
24
+ * Update [HexaPDF::Task::Optimize] to delete unused objects on `:compact`
25
+ * Allow [HexaPDF::Type::PageTreeNode#delete_page] to take a page object or a
26
+ page index
27
+ * Don't set /EFF key in encryption dictionary
28
+ * Better error handling for hexapdf CLI commands
29
+ * Show help output when no command is given for `hexapdf` CLI
30
+ * Set /FontWeight in [HexaPDF::Font::Type1Wrapper]
31
+ * Use kramdown's man page support for the `hexapdf` man page instead of ronn
32
+
33
+ ### Removed
34
+
35
+ * Remove unneeded parts of TrueType implementation
36
+
37
+ ### Fixed
38
+
39
+ * Problem with unnamed classes/modules on serialization
40
+ * Handle potentially indirect objects correctly in [HexaPDF::Object::deep_copy]
41
+ * [HexaPDF::Revisions#merge] for objects that appear in multiple revisions
42
+ * Output of `--pages` option of 'hexapdf inspect' command
43
+ * Infinite recursion problem in [HexaPDF::Task::Dereference]
44
+ * Problem with iteration over images in certain cases
45
+ * [HexaPDF::Type::Page#[]] with respect to inherited fields
46
+ * Problems with access permissions on encryption
47
+ * Encryption routine of standard security handler with respect to owner password
48
+ * Invalid check in validation of standard encryption dictionary
49
+ * 'hexapdf modify' command to support files with many pages
50
+ * Validation of encryption key for encryption revision 6
51
+ * Various parts of the API documentation
52
+
53
+
54
+ ## 0.1.0 - 2016-10-26
55
+
56
+ * Initial release
data/CONTRIBUTERS CHANGED
@@ -1,3 +1,3 @@
1
1
  Count Name
2
2
  ======= ====
3
- 601 Thomas Leitner <t_leitner@gmx.at>
3
+ 647 Thomas Leitner <t_leitner@gmx.at>
data/Rakefile CHANGED
@@ -16,7 +16,7 @@ namespace :dev do
16
16
  PKG_FILES = FileList.new([
17
17
  'Rakefile',
18
18
  'LICENSE', 'agpl-3.0.txt',
19
- 'README.md',
19
+ 'README.md', 'CHANGELOG.md',
20
20
  'VERSION', 'CONTRIBUTERS',
21
21
  'bin/*',
22
22
  'lib/**/*.rb',
@@ -29,7 +29,7 @@ namespace :dev do
29
29
  CLOBBER << "man/man1/hexapdf.1"
30
30
  file 'man/man1/hexapdf.1' => ['man/man1/hexapdf.1.md'] do
31
31
  puts "Generating hexapdf man page"
32
- system "ronn --pipe -r man/man1/hexapdf.1.md > man/man1/hexapdf.1"
32
+ system "kramdown -o man man/man1/hexapdf.1.md > man/man1/hexapdf.1"
33
33
  end
34
34
 
35
35
  CLOBBER << "VERSION"
@@ -65,7 +65,7 @@ namespace :dev do
65
65
  s.executables = ['hexapdf']
66
66
  s.default_executable = 'hexapdf'
67
67
  s.add_dependency('cmdparse', '~> 3.0', '>= 3.0.1')
68
- s.add_development_dependency('ronn', '~> 0.7')
68
+ s.add_development_dependency('kramdown', '~> 1.0', '>= 1.13.0')
69
69
 
70
70
  s.author = 'Thomas Leitner'
71
71
  s.email = 't_leitner@gmx.at'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
data/examples/arc.rb CHANGED
@@ -10,7 +10,7 @@
10
10
  require 'hexapdf'
11
11
 
12
12
  doc = HexaPDF::Document.new
13
- page = doc.pages.add_page
13
+ page = doc.pages.add
14
14
  canvas = page.canvas
15
15
 
16
16
  radius = 75
data/examples/graphics.rb CHANGED
@@ -14,7 +14,7 @@
14
14
  require 'hexapdf'
15
15
 
16
16
  doc = HexaPDF::Document.new
17
- page = doc.pages.add_page
17
+ page = doc.pages.add
18
18
  canvas = page.canvas
19
19
 
20
20
  # Draws the shape that is used to showcase the transformations in the given
@@ -10,7 +10,7 @@
10
10
  require 'hexapdf'
11
11
 
12
12
  doc = HexaPDF::Document.new
13
- canvas = doc.pages.add_page.canvas
13
+ canvas = doc.pages.add.canvas
14
14
  canvas.font('Helvetica', size: 100)
15
15
  canvas.text("Hello World!", at: [20, 400])
16
16
  doc.write("hello_world.pdf", optimize: true)
data/examples/merging.rb CHANGED
@@ -19,6 +19,6 @@ require 'hexapdf'
19
19
  target = HexaPDF::Document.new
20
20
  ARGV.each do |file|
21
21
  pdf = HexaPDF::Document.open(file)
22
- pdf.pages.each_page {|page| target.pages.add_page(target.import(page))}
22
+ pdf.pages.each {|page| target.pages << target.import(page)}
23
23
  end
24
24
  target.write("merging.pdf", optimize: true)
@@ -47,7 +47,7 @@ class ShowTextProcessor < HexaPDF::Content::Processor
47
47
  end
48
48
 
49
49
  doc = HexaPDF::Document.open(ARGV.shift)
50
- doc.pages.each_page.with_index do |page, index|
50
+ doc.pages.each_with_index do |page, index|
51
51
  puts "Processing page #{index + 1}"
52
52
  processor = ShowTextProcessor.new(page)
53
53
  page.process_contents(processor)
@@ -26,7 +26,7 @@ doc = HexaPDF::Document.new
26
26
 
27
27
  HexaPDF::FontLoader::Standard14::MAPPING.each do |font_name, mapping|
28
28
  mapping.each_key do |variant|
29
- canvas = doc.pages.add_page.canvas
29
+ canvas = doc.pages.add.canvas
30
30
  canvas.font("Helvetica", size: 14)
31
31
  canvas.text("#{font_name} #{variant != :none ? variant : ''}", at: [100, 800])
32
32
 
data/examples/truetype.rb CHANGED
@@ -27,7 +27,7 @@ max_gid = wrapper.wrapped_font[:maxp].num_glyphs
27
27
 
28
28
  255.times do |page|
29
29
  break unless page * 256 < wrapper.wrapped_font[:maxp].num_glyphs
30
- canvas = doc.pages.add_page.canvas
30
+ canvas = doc.pages.add.canvas
31
31
  canvas.font("Helvetica", size: 10)
32
32
  canvas.text("Font: #{wrapper.wrapped_font.full_name}", at: [50, 825])
33
33
 
data/lib/hexapdf/cli.rb CHANGED
@@ -49,6 +49,9 @@ module HexaPDF
49
49
  # Runs the CLI application.
50
50
  def self.run(args = ARGV)
51
51
  Application.new.parse(args)
52
+ rescue => e
53
+ $stderr.puts "An error occurred: #{e.message}"
54
+ exit(1)
52
55
  end
53
56
 
54
57
  # The CmdParse::CommandParser class that is used for running the CLI application.
@@ -66,8 +69,13 @@ module HexaPDF
66
69
  add_command(CmdParse::VersionCommand.new)
67
70
  end
68
71
 
69
- PAGE_NUMBER_SPEC = "([1-9]\\d*|e)" #:nodoc:
70
- ROTATE_MAP = {'l' => -90, 'r' => 90, 'd' => 180, 'n' => :none, nil => :none} #:nodoc:
72
+ def parse(argv = ARGV) #:nodoc:
73
+ ARGV.unshift('help') if ARGV.empty?
74
+ super
75
+ end
76
+
77
+ PAGE_NUMBER_SPEC = "([1-9]\\d*|e)".freeze #:nodoc:
78
+ ROTATE_MAP = {'l' => -90, 'r' => 90, 'd' => 180, 'n' => :none}.freeze #:nodoc:
71
79
 
72
80
  # Parses the pages specification string and returns an array of tuples containing a page
73
81
  # number and a rotation value (either -90, 90, 180 or :none).
@@ -78,13 +86,12 @@ module HexaPDF
78
86
  case str
79
87
  when /\A#{PAGE_NUMBER_SPEC}(l|r|d|n)?\z/o
80
88
  arr << [($1 == 'e' ? count : str.to_i) - 1, ROTATE_MAP[$2]]
81
- when /\A#{PAGE_NUMBER_SPEC}-#{PAGE_NUMBER_SPEC}(l|r|d|n)?\z/
89
+ when /\A#{PAGE_NUMBER_SPEC}-#{PAGE_NUMBER_SPEC}(?:\/([1-9]\d*))?(l|r|d|n)?\z/
82
90
  start_nr = ($1 == 'e' ? count : $1.to_i) - 1
83
91
  end_nr = ($2 == 'e' ? count : $2.to_i) - 1
84
- rotation = ROTATE_MAP[$3]
85
- (start_nr > end_nr ? (end_nr..start_nr).reverse_each : (start_nr..end_nr)).each do |n|
86
- arr << [n, rotation]
87
- end
92
+ step = ($3 ? $3.to_i : 1) * (start_nr > end_nr ? -1 : 1)
93
+ rotation = ROTATE_MAP[$4]
94
+ start_nr.step(to: end_nr, by: step) {|n| arr << [n, rotation]}
88
95
  else
89
96
  raise OptionParser::InvalidArgument, "invalid page range format: #{str}"
90
97
  end
@@ -119,7 +119,7 @@ module HexaPDF
119
119
 
120
120
  # Iterates over all embedded files.
121
121
  def each_file(doc, &block) # :yields: obj, index
122
- doc.utils.each_file(search: @search).select(&:embedded_file?).each_with_index(&block)
122
+ doc.files.each(search: @search).select(&:embedded_file?).each_with_index(&block)
123
123
  end
124
124
 
125
125
  end
@@ -68,7 +68,7 @@ module HexaPDF
68
68
  private
69
69
 
70
70
  INFO_KEYS = [:Title, :Author, :Subject, :Keywords, :Creator, :Producer, #:nodoc:
71
- :CreationDate, :ModDate]
71
+ :CreationDate, :ModDate].freeze
72
72
 
73
73
  COLUMN_WIDTH = 20 #:nodoc:
74
74
 
@@ -95,7 +95,7 @@ module HexaPDF
95
95
  output_line("Encrypted", "yes (no or wrong password given)")
96
96
  end
97
97
 
98
- output_line("Pages", doc.pages.page_count.to_s)
98
+ output_line("Pages", doc.pages.count.to_s)
99
99
  output_line("Version", doc.version)
100
100
  end
101
101
  rescue HexaPDF::EncryptionError => e
@@ -113,15 +113,15 @@ module HexaPDF
113
113
  end
114
114
 
115
115
  def do_page_count(doc) #:nodoc:
116
- puts doc.pages.page_count
116
+ puts doc.pages.count
117
117
  end
118
118
 
119
119
  def do_pages(doc) #:nodoc:
120
- pages = command_parser.parse_pages_specification(@param, doc.pages.page_count)
120
+ pages = command_parser.parse_pages_specification(@param, doc.pages.count)
121
121
  pages.each do |index, _|
122
- page = doc.pages.page(index)
122
+ page = doc.pages[index]
123
123
  str = "page #{index + 1} (#{page.oid},#{page.gen}): "
124
- Array(page[:Contents]).each {|c| str << "#{c.oid},#{c.gen}"}
124
+ str << Array(page[:Contents]).map {|c| "#{c.oid},#{c.gen}"}.join(" ")
125
125
  puts str
126
126
  end
127
127
  end
@@ -39,28 +39,58 @@ module HexaPDF
39
39
 
40
40
  # Modifies a PDF file:
41
41
  #
42
- # * Decrypts or encrypts the PDF file.
42
+ # * Adds pages from other PDF files.
43
+ # * Decrypts or encrypts the resulting output PDF file.
43
44
  # * Generates or deletes object and cross-reference streams.
44
- # * Optimizes a PDF by merging the revisions of a PDF file and removes unused entries.
45
+ # * Optimizes the output PDF by merging the revisions of a PDF file and removes unused entries.
45
46
  #
46
47
  # See: HexaPDF::Task::Optimize
47
48
  class Modify < CmdParse::Command
48
49
 
50
+ InputSpec = Struct.new(:file, :pages, :password) #:nodoc:
51
+
49
52
  def initialize #:nodoc:
50
53
  super('modify', takes_commands: false)
51
54
  short_desc("Modify a PDF file")
52
55
  long_desc(<<-EOF.gsub!(/^ */, ''))
53
- This command modifies a PDF file. It can be used to encrypt/decrypt a file, to optimize it
54
- and remove unused entries and to generate or delete object and cross-reference streams.
56
+ This command modifies a PDF file. It can be used to select pages that should appear in
57
+ the output file and to add pages from other PDF files. The output file can be
58
+ encrypted/decrypted and optimized in various ways.
59
+
60
+ The first input file is the primary file which gets modified, so meta data like file
61
+ information, outlines, etc. are taken from it. Alternatively, it is possible to start
62
+ with an empty PDF file by using --empty. The order of the options specifying the files
63
+ is important as they are used in that order.
64
+
65
+ Also note that the --password and --pages options apply to the last preceeding input file.
55
66
  EOF
56
67
 
57
- options.on("--password PASSWORD", "-p", String,
58
- "The password for decryption. Use - for reading from standard input.") do |pwd|
59
- @password = (pwd == '-' ? command_parser.read_password("Input file password") : pwd)
68
+ options.separator("")
69
+ options.separator("Input file(s) related options")
70
+ options.on("-f", "--file FILE", "Input file, can be specified multiple times") do |file|
71
+ @files << InputSpec.new(file, '1-e')
72
+ end
73
+ options.on("-p", "--password PASSWORD", String, "The password for decrypting the last " \
74
+ "specified input file (use - for reading from standard input)") do |pwd|
75
+ raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
76
+ pwd = (pwd == '-' ? command_parser.read_password("#{@files.last.file} password") : pwd)
77
+ @files.last.password = pwd
60
78
  end
61
- options.on("--pages PAGES", "The pages to be used in the output file") do |pages|
62
- @pages = pages
79
+ options.on("-i", "--pages PAGES", "The pages of the last specified input file that " \
80
+ "should be used (default: 1-e)") do |pages|
81
+ raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
82
+ @files.last.pages = pages
63
83
  end
84
+ options.on("-e", "--empty", "Use an empty file as the first input file") do
85
+ @initial_empty = true
86
+ end
87
+ options.on("--[no-]interleave", "Interleave the pages from the input files (default: " \
88
+ "false)") do |c|
89
+ @interleave = c
90
+ end
91
+
92
+ options.separator("")
93
+ options.separator("Output file related options")
64
94
  options.on("--embed FILE", String, "Embed the file into the output file (can be used " \
65
95
  "multiple times)") do |file|
66
96
  @embed_files << file
@@ -83,9 +113,10 @@ module HexaPDF
83
113
  "preserve)") do |streams|
84
114
  @streams = streams
85
115
  end
86
-
87
- options.separator("")
88
- options.separator("Encryption related options")
116
+ options.on("--[no-]compress-pages", "Recompress page content streams (may take a long " \
117
+ "time; default: no)") do |c|
118
+ @compress_pages = c
119
+ end
89
120
  options.on("--decrypt", "Remove any encryption") do
90
121
  @encryption = :remove
91
122
  end
@@ -93,12 +124,12 @@ module HexaPDF
93
124
  @encryption = :add
94
125
  end
95
126
  options.on("--owner-password PASSWORD", String, "The owner password to be set on the " \
96
- "output file. Use - for reading from standard input.") do |pwd|
127
+ "output file (use - for reading from standard input)") do |pwd|
97
128
  @encryption = :add
98
129
  @enc_owner_pwd = (pwd == '-' ? command_parser.read_password("Owner password") : pwd)
99
130
  end
100
131
  options.on("--user-password PASSWORD", String, "The user password to be set on the " \
101
- "output file. Use - for reading from standard input.") do |pwd|
132
+ "output file (use - for reading from standard input)") do |pwd|
102
133
  @encryption = :add
103
134
  @enc_user_pwd = (pwd == '-' ? command_parser.read_password("User password") : pwd)
104
135
  end
@@ -121,19 +152,23 @@ module HexaPDF
121
152
  options.on("--permissions PERMS", Array,
122
153
  "Comma separated list of permissions to be set on the output file. Possible " \
123
154
  "values: #{syms.join(', ')}") do |perms|
124
- perms.each do |perm|
125
- unless syms.include?(perm)
155
+ perms.map! do |perm|
156
+ unless syms.include?(perm.to_sym)
126
157
  raise OptionParser::InvalidArgument, "#{perm} (invalid permission name)"
127
158
  end
159
+ perm.to_sym
128
160
  end
129
161
  @encryption = :add
130
162
  @enc_permissions = perms
131
163
  end
132
164
 
133
- @password = nil
134
- @pages = '1-e'
165
+ @files = []
166
+ @initial_empty = false
167
+ @interleave = false
168
+
135
169
  @embed_files = []
136
170
  @compact = true
171
+ @compress_pages = false
137
172
  @object_streams = :preserve
138
173
  @xref_streams = :preserve
139
174
  @streams = :preserve
@@ -146,58 +181,123 @@ module HexaPDF
146
181
  @enc_permissions = []
147
182
  end
148
183
 
149
- def execute(input_file, output_file) #:nodoc:
150
- @compact = true unless @pages == '1-e'
151
- if @enc_user_pwd && !@enc_user_pwd.empty? && (!@enc_owner_pwd || @enc_owner_pwd.empty?)
152
- @enc_owner_pwd = @enc_user_pwd
184
+ def execute(output_file) #:nodoc:
185
+ if !@initial_empty && @files.empty?
186
+ error = OptionParser::ParseError.new("At least one --file FILE or --empty is needed")
187
+ error.reason = "Missing argument"
188
+ raise error
153
189
  end
154
190
 
155
- HexaPDF::Document.open(input_file, decryption_opts: {password: @password}) do |doc|
156
- arrange_pages(doc) unless @pages == '1-e'
157
- @embed_files.each {|file| doc.utils.add_file(file, embed: true)}
158
-
159
- doc.task(:optimize, compact: @compact, object_streams: @object_streams,
160
- xref_streams: @xref_streams)
191
+ # Create PDF documents for each input file
192
+ cache = {}
193
+ @files.each do |spec|
194
+ cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
195
+ decryption_opts: {password: spec.password})
196
+ spec.file = cache[spec.file]
197
+ end
161
198
 
162
- handle_streams(doc) if @streams != :preserve
199
+ # Assemble pages
200
+ target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
201
+ page_tree = target.add(Type: :Pages)
202
+ import_pages(page_tree)
203
+ target.catalog[:Pages] = page_tree
163
204
 
164
- if @encryption == :add
165
- doc.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length,
166
- force_V4: @enc_force_v4, permissions: @enc_permissions,
167
- owner_password: @enc_owner_pwd, user_password: @enc_user_pwd)
168
- elsif @encryption == :remove
169
- doc.encrypt(name: nil)
205
+ # Remove potentially imported but unused pages and page tree nodes
206
+ retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
207
+ retained[target.pages.root.data] = true
208
+ target.each(current: false) do |obj|
209
+ next unless obj.kind_of?(HexaPDF::Dictionary)
210
+ if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
211
+ target.delete(obj)
170
212
  end
213
+ end
214
+
215
+ # Embed the given files
216
+ @embed_files.each {|file| target.files.add(file, embed: true)}
217
+
218
+ # Optimize the PDF file
219
+ target.task(:optimize, compact: @compact, object_streams: @object_streams,
220
+ xref_streams: @xref_streams, compress_pages: @compress_pages)
171
221
 
172
- doc.write(output_file)
222
+ # Update stream filters
223
+ handle_streams(target) unless @streams == :preserve
224
+
225
+ # Encrypt, decrypt or do nothing
226
+ if @encryption == :add
227
+ target.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length,
228
+ force_V4: @enc_force_v4, permissions: @enc_permissions,
229
+ owner_password: @enc_owner_pwd, user_password: @enc_user_pwd)
230
+ elsif @encryption == :remove
231
+ target.encrypt(name: nil)
173
232
  end
233
+
234
+ target.write(output_file)
174
235
  rescue HexaPDF::Error => e
175
- $stderr.puts "Error while processing the PDF file: #{e.message}"
236
+ $stderr.puts "Processing error : #{e.message}"
176
237
  exit(1)
177
238
  end
178
239
 
240
+ def usage_arguments #:nodoc:
241
+ "{--file IN_FILE | --empty} OUT_FILE"
242
+ end
243
+
179
244
  private
180
245
 
181
- # Arranges the pages of the document as specified with the --pages option.
182
- def arrange_pages(doc)
183
- pages = command_parser.parse_pages_specification(@pages, doc.pages.page_count)
184
- new_page_tree = doc.add(Type: :Pages)
185
- pages.each do |index, rotation|
186
- page = doc.pages.page(index)
187
- page.value.update(page.copy_inherited_values)
188
- if rotation == :none
189
- page.delete(:Rotate)
190
- else
191
- page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
246
+ # Imports the pages of the document as specified with the --pages option to the given page
247
+ # tree.
248
+ def import_pages(page_tree)
249
+ @files.each do |s|
250
+ page_list = s.file.pages.to_a
251
+ s.pages = command_parser.parse_pages_specification(s.pages, s.file.pages.count)
252
+ s.pages.each do |arr|
253
+ arr[0] = page_list[arr[0]]
254
+ arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
255
+ end
256
+ end
257
+
258
+ if @interleave
259
+ max_pages_per_file = 0
260
+ all = @files.each_with_index.map do |spec, findex|
261
+ list = []
262
+ spec.pages.each {|index, rotation| list << [spec.file, findex, index, rotation]}
263
+ max_pages_per_file = list.size if list.size > max_pages_per_file
264
+ list
192
265
  end
193
- new_page_tree.add_page(page)
266
+ first, *rest = *all
267
+ first[max_pages_per_file - 1] ||= nil
268
+ first.zip(*rest) do |slice|
269
+ slice.each do |source, findex, page, rotation|
270
+ next unless source
271
+ import_page(page_tree, findex, page, rotation)
272
+ end
273
+ end
274
+ else
275
+ @files.each_with_index do |s, findex|
276
+ s.pages.each {|page, rotation| import_page(page_tree, findex, page, rotation)}
277
+ end
278
+ end
279
+ end
280
+
281
+ # Import the page with the given +rotation+ into the page tree.
282
+ def import_page(page_tree, source_index, page, rotation)
283
+ if page_tree.document == page.document
284
+ page.value.update(page.copy_inherited_values)
285
+ page = page.deep_copy unless source_index == 0
286
+ else
287
+ page = page_tree.document.import(page).deep_copy
288
+ end
289
+ if rotation == :none
290
+ page.delete(:Rotate)
291
+ elsif rotation.kind_of?(Integer)
292
+ page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
194
293
  end
195
- doc.catalog[:Pages] = new_page_tree
294
+ page_tree.document.add(page)
295
+ page_tree.add_page(page)
196
296
  end
197
297
 
198
298
  IGNORED_FILTERS = { #:nodoc:
199
299
  CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true
200
- }
300
+ }.freeze
201
301
 
202
302
  # Applies the chosen stream mode to all streams.
203
303
  def handle_streams(doc)