hexapdf 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -0
  3. data/CONTRIBUTERS +1 -1
  4. data/Rakefile +3 -3
  5. data/VERSION +1 -1
  6. data/examples/arc.rb +1 -1
  7. data/examples/graphics.rb +1 -1
  8. data/examples/hello_world.rb +1 -1
  9. data/examples/merging.rb +1 -1
  10. data/examples/show_char_bboxes.rb +1 -1
  11. data/examples/standard_pdf_fonts.rb +1 -1
  12. data/examples/truetype.rb +1 -1
  13. data/lib/hexapdf/cli.rb +14 -7
  14. data/lib/hexapdf/cli/extract.rb +1 -1
  15. data/lib/hexapdf/cli/info.rb +2 -2
  16. data/lib/hexapdf/cli/inspect.rb +4 -4
  17. data/lib/hexapdf/cli/modify.rb +151 -51
  18. data/lib/hexapdf/configuration.rb +1 -1
  19. data/lib/hexapdf/content/canvas.rb +1 -1
  20. data/lib/hexapdf/content/processor.rb +1 -1
  21. data/lib/hexapdf/dictionary.rb +6 -19
  22. data/lib/hexapdf/dictionary_fields.rb +1 -1
  23. data/lib/hexapdf/document.rb +23 -16
  24. data/lib/hexapdf/document/files.rb +130 -0
  25. data/lib/hexapdf/{font_utils.rb → document/fonts.rb} +40 -38
  26. data/lib/hexapdf/document/images.rb +117 -0
  27. data/lib/hexapdf/document/pages.rb +125 -0
  28. data/lib/hexapdf/encryption/aes.rb +1 -1
  29. data/lib/hexapdf/encryption/ruby_aes.rb +10 -10
  30. data/lib/hexapdf/encryption/standard_security_handler.rb +11 -8
  31. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  32. data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -6
  33. data/lib/hexapdf/font/cmap/writer.rb +5 -7
  34. data/lib/hexapdf/font/true_type.rb +4 -1
  35. data/lib/hexapdf/font/true_type/font.rb +8 -16
  36. data/lib/hexapdf/font/true_type/table.rb +5 -16
  37. data/lib/hexapdf/font/true_type/table/cmap.rb +2 -7
  38. data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +2 -6
  39. data/lib/hexapdf/font/true_type/table/directory.rb +0 -5
  40. data/lib/hexapdf/font/true_type/table/glyf.rb +3 -11
  41. data/lib/hexapdf/font/true_type/table/head.rb +0 -12
  42. data/lib/hexapdf/font/true_type/table/hhea.rb +0 -7
  43. data/lib/hexapdf/font/true_type/table/hmtx.rb +1 -5
  44. data/lib/hexapdf/font/true_type/table/loca.rb +0 -4
  45. data/lib/hexapdf/font/true_type/table/maxp.rb +0 -8
  46. data/lib/hexapdf/font/true_type/table/name.rb +3 -17
  47. data/lib/hexapdf/font/true_type/table/os2.rb +0 -14
  48. data/lib/hexapdf/font/true_type/table/post.rb +0 -8
  49. data/lib/hexapdf/font/true_type_wrapper.rb +1 -1
  50. data/lib/hexapdf/font/type1.rb +2 -2
  51. data/lib/hexapdf/font/type1/font.rb +2 -1
  52. data/lib/hexapdf/font/type1/font_metrics.rb +10 -1
  53. data/lib/hexapdf/font/type1_wrapper.rb +2 -1
  54. data/lib/hexapdf/font_loader/from_configuration.rb +1 -1
  55. data/lib/hexapdf/font_loader/standard14.rb +1 -1
  56. data/lib/hexapdf/image_loader/jpeg.rb +1 -1
  57. data/lib/hexapdf/image_loader/pdf.rb +1 -1
  58. data/lib/hexapdf/image_loader/png.rb +2 -2
  59. data/lib/hexapdf/object.rb +18 -5
  60. data/lib/hexapdf/rectangle.rb +8 -1
  61. data/lib/hexapdf/revisions.rb +4 -2
  62. data/lib/hexapdf/serializer.rb +3 -3
  63. data/lib/hexapdf/stream.rb +3 -2
  64. data/lib/hexapdf/task/dereference.rb +4 -5
  65. data/lib/hexapdf/task/optimize.rb +6 -3
  66. data/lib/hexapdf/tokenizer.rb +3 -3
  67. data/lib/hexapdf/type/file_specification.rb +2 -2
  68. data/lib/hexapdf/type/form.rb +19 -0
  69. data/lib/hexapdf/type/page.rb +21 -6
  70. data/lib/hexapdf/type/page_tree_node.rb +27 -34
  71. data/lib/hexapdf/utils/bit_stream.rb +1 -1
  72. data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
  73. data/lib/hexapdf/version.rb +1 -1
  74. data/man/man1/hexapdf.1 +259 -187
  75. data/test/hexapdf/content/graphic_object/test_arc.rb +1 -1
  76. data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +1 -1
  77. data/test/hexapdf/content/graphic_object/test_solid_arc.rb +1 -1
  78. data/test/hexapdf/content/test_canvas.rb +1 -1
  79. data/test/hexapdf/document/test_files.rb +71 -0
  80. data/test/hexapdf/{test_font_utils.rb → document/test_fonts.rb} +1 -2
  81. data/test/hexapdf/document/test_images.rb +78 -0
  82. data/test/hexapdf/document/test_pages.rb +114 -0
  83. data/test/hexapdf/encryption/test_standard_security_handler.rb +26 -5
  84. data/test/hexapdf/font/test_true_type_wrapper.rb +1 -1
  85. data/test/hexapdf/font/true_type/common.rb +0 -4
  86. data/test/hexapdf/font/true_type/table/test_cmap.rb +0 -6
  87. data/test/hexapdf/font/true_type/table/test_directory.rb +0 -5
  88. data/test/hexapdf/font/true_type/table/test_glyf.rb +5 -8
  89. data/test/hexapdf/font/true_type/table/test_head.rb +0 -20
  90. data/test/hexapdf/font/true_type/table/test_hhea.rb +0 -7
  91. data/test/hexapdf/font/true_type/table/test_hmtx.rb +2 -7
  92. data/test/hexapdf/font/true_type/table/test_loca.rb +4 -8
  93. data/test/hexapdf/font/true_type/table/test_maxp.rb +0 -7
  94. data/test/hexapdf/font/true_type/table/test_name.rb +0 -19
  95. data/test/hexapdf/font/true_type/table/test_os2.rb +0 -8
  96. data/test/hexapdf/font/true_type/table/test_post.rb +0 -13
  97. data/test/hexapdf/font/true_type/test_font.rb +14 -38
  98. data/test/hexapdf/font/true_type/test_table.rb +0 -9
  99. data/test/hexapdf/font/type1/test_font_metrics.rb +22 -0
  100. data/test/hexapdf/task/test_dereference.rb +5 -1
  101. data/test/hexapdf/task/test_optimize.rb +1 -1
  102. data/test/hexapdf/test_dictionary.rb +4 -0
  103. data/test/hexapdf/test_document.rb +0 -7
  104. data/test/hexapdf/test_importer.rb +4 -4
  105. data/test/hexapdf/test_object.rb +31 -9
  106. data/test/hexapdf/test_rectangle.rb +18 -0
  107. data/test/hexapdf/test_revisions.rb +7 -0
  108. data/test/hexapdf/test_serializer.rb +6 -0
  109. data/test/hexapdf/test_writer.rb +2 -2
  110. data/test/hexapdf/type/test_form.rb +12 -0
  111. data/test/hexapdf/type/test_page.rb +39 -20
  112. data/test/hexapdf/type/test_page_tree_node.rb +28 -21
  113. metadata +21 -9
  114. data/lib/hexapdf/document_utils.rb +0 -209
  115. data/test/hexapdf/test_document_utils.rb +0 -144
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1c10b3600c81488af01c877e24fc31aa859968e8
4
- data.tar.gz: 6bdde619dae45921219a100612fd67e3fd1e1317
3
+ metadata.gz: 874a5d2e5a2408ceed91a9bc61df138248fbbcd2
4
+ data.tar.gz: 894e67110daae93c1479a5ced992a84dabc66cd1
5
5
  SHA512:
6
- metadata.gz: 9b79e62cf749a111b4a81a6ce7dab39dfca4b66ba47cd9961ab7452fb75a75373bb01a088659d7bca31429969341b980e75a3a39008525effb2bd116d0162a69
7
- data.tar.gz: b09a691c6f92f6035a73f226ec0d566f2d48a18082478a03037ea4dd0dded3698d25a0e403aa7f51cea8d9481bafd923f8de8d9510a961994c0f5e733aa12bbf
6
+ metadata.gz: 9d7c18191ea0d64992fc01b05904c3324cc1f32e98fe848bc931970b53f42d9ab6878d4e8c3730a570c607f677dfdaa8a8adebe7ab4c8e279cc13383124dee1b
7
+ data.tar.gz: 1e080dd694ad58c3256a647a98a9b03c439ae340311b037fad3b1bdbe436ffbed74fec0c00af3df68540ca32aa75336c328a317a74233428dcf16d8920cc8e75
data/CHANGELOG.md ADDED
@@ -0,0 +1,56 @@
1
+ ## 0.2.0 - unreleased
2
+
3
+ ### Added
4
+
5
+ * PDF file merge ability to `hexapdf modify`, i.e. adding pages from other PDFs
6
+ * Page interleaving support to 'hexapdf modify'
7
+ * Step values in pages definitions for CLI commands
8
+ * Convenience class for working with pages through [HexaPDF::Document#pages]
9
+ with a more Ruby-like interface
10
+ * Method [HexaPDF::Type::Form#canvas]
11
+ * Method [HexaPDF::Type::Page#index]
12
+ * Validation for [HexaPDF::Rectangle] objects
13
+ * [HexaPDF::Font::Type1::FontMetrics#weight_class] for returning the numeric
14
+ weight
15
+
16
+ ### Changed
17
+
18
+ * Refactor document utilities into own classes with a more Ruby-like interface;
19
+ concern fonts, images and files, now accessible through
20
+ [HexaPDF::Document#fonts], [HexaPDF::Document#images] and
21
+ [HexaPDF::Document#files]
22
+ * Validate nested collection values in [HexaPDF::Object]
23
+ * Allow [HexaPDF::Dictionary#[]] to always unwrap nil values
24
+ * Update [HexaPDF::Task::Optimize] to delete unused objects on `:compact`
25
+ * Allow [HexaPDF::Type::PageTreeNode#delete_page] to take a page object or a
26
+ page index
27
+ * Don't set /EFF key in encryption dictionary
28
+ * Better error handling for hexapdf CLI commands
29
+ * Show help output when no command is given for `hexapdf` CLI
30
+ * Set /FontWeight in [HexaPDF::Font::Type1Wrapper]
31
+ * Use kramdown's man page support for the `hexapdf` man page instead of ronn
32
+
33
+ ### Removed
34
+
35
+ * Remove unneeded parts of TrueType implementation
36
+
37
+ ### Fixed
38
+
39
+ * Problem with unnamed classes/modules on serialization
40
+ * Handle potentially indirect objects correctly in [HexaPDF::Object::deep_copy]
41
+ * [HexaPDF::Revisions#merge] for objects that appear in multiple revisions
42
+ * Output of `--pages` option of 'hexapdf inspect' command
43
+ * Infinite recursion problem in [HexaPDF::Task::Dereference]
44
+ * Problem with iteration over images in certain cases
45
+ * [HexaPDF::Type::Page#[]] with respect to inherited fields
46
+ * Problems with access permissions on encryption
47
+ * Encryption routine of standard security handler with respect to owner password
48
+ * Invalid check in validation of standard encryption dictionary
49
+ * 'hexapdf modify' command to support files with many pages
50
+ * Validation of encryption key for encryption revision 6
51
+ * Various parts of the API documentation
52
+
53
+
54
+ ## 0.1.0 - 2016-10-26
55
+
56
+ * Initial release
data/CONTRIBUTERS CHANGED
@@ -1,3 +1,3 @@
1
1
  Count Name
2
2
  ======= ====
3
- 601 Thomas Leitner <t_leitner@gmx.at>
3
+ 647 Thomas Leitner <t_leitner@gmx.at>
data/Rakefile CHANGED
@@ -16,7 +16,7 @@ namespace :dev do
16
16
  PKG_FILES = FileList.new([
17
17
  'Rakefile',
18
18
  'LICENSE', 'agpl-3.0.txt',
19
- 'README.md',
19
+ 'README.md', 'CHANGELOG.md',
20
20
  'VERSION', 'CONTRIBUTERS',
21
21
  'bin/*',
22
22
  'lib/**/*.rb',
@@ -29,7 +29,7 @@ namespace :dev do
29
29
  CLOBBER << "man/man1/hexapdf.1"
30
30
  file 'man/man1/hexapdf.1' => ['man/man1/hexapdf.1.md'] do
31
31
  puts "Generating hexapdf man page"
32
- system "ronn --pipe -r man/man1/hexapdf.1.md > man/man1/hexapdf.1"
32
+ system "kramdown -o man man/man1/hexapdf.1.md > man/man1/hexapdf.1"
33
33
  end
34
34
 
35
35
  CLOBBER << "VERSION"
@@ -65,7 +65,7 @@ namespace :dev do
65
65
  s.executables = ['hexapdf']
66
66
  s.default_executable = 'hexapdf'
67
67
  s.add_dependency('cmdparse', '~> 3.0', '>= 3.0.1')
68
- s.add_development_dependency('ronn', '~> 0.7')
68
+ s.add_development_dependency('kramdown', '~> 1.0', '>= 1.13.0')
69
69
 
70
70
  s.author = 'Thomas Leitner'
71
71
  s.email = 't_leitner@gmx.at'
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
data/examples/arc.rb CHANGED
@@ -10,7 +10,7 @@
10
10
  require 'hexapdf'
11
11
 
12
12
  doc = HexaPDF::Document.new
13
- page = doc.pages.add_page
13
+ page = doc.pages.add
14
14
  canvas = page.canvas
15
15
 
16
16
  radius = 75
data/examples/graphics.rb CHANGED
@@ -14,7 +14,7 @@
14
14
  require 'hexapdf'
15
15
 
16
16
  doc = HexaPDF::Document.new
17
- page = doc.pages.add_page
17
+ page = doc.pages.add
18
18
  canvas = page.canvas
19
19
 
20
20
  # Draws the shape that is used to showcase the transformations in the given
@@ -10,7 +10,7 @@
10
10
  require 'hexapdf'
11
11
 
12
12
  doc = HexaPDF::Document.new
13
- canvas = doc.pages.add_page.canvas
13
+ canvas = doc.pages.add.canvas
14
14
  canvas.font('Helvetica', size: 100)
15
15
  canvas.text("Hello World!", at: [20, 400])
16
16
  doc.write("hello_world.pdf", optimize: true)
data/examples/merging.rb CHANGED
@@ -19,6 +19,6 @@ require 'hexapdf'
19
19
  target = HexaPDF::Document.new
20
20
  ARGV.each do |file|
21
21
  pdf = HexaPDF::Document.open(file)
22
- pdf.pages.each_page {|page| target.pages.add_page(target.import(page))}
22
+ pdf.pages.each {|page| target.pages << target.import(page)}
23
23
  end
24
24
  target.write("merging.pdf", optimize: true)
@@ -47,7 +47,7 @@ class ShowTextProcessor < HexaPDF::Content::Processor
47
47
  end
48
48
 
49
49
  doc = HexaPDF::Document.open(ARGV.shift)
50
- doc.pages.each_page.with_index do |page, index|
50
+ doc.pages.each_with_index do |page, index|
51
51
  puts "Processing page #{index + 1}"
52
52
  processor = ShowTextProcessor.new(page)
53
53
  page.process_contents(processor)
@@ -26,7 +26,7 @@ doc = HexaPDF::Document.new
26
26
 
27
27
  HexaPDF::FontLoader::Standard14::MAPPING.each do |font_name, mapping|
28
28
  mapping.each_key do |variant|
29
- canvas = doc.pages.add_page.canvas
29
+ canvas = doc.pages.add.canvas
30
30
  canvas.font("Helvetica", size: 14)
31
31
  canvas.text("#{font_name} #{variant != :none ? variant : ''}", at: [100, 800])
32
32
 
data/examples/truetype.rb CHANGED
@@ -27,7 +27,7 @@ max_gid = wrapper.wrapped_font[:maxp].num_glyphs
27
27
 
28
28
  255.times do |page|
29
29
  break unless page * 256 < wrapper.wrapped_font[:maxp].num_glyphs
30
- canvas = doc.pages.add_page.canvas
30
+ canvas = doc.pages.add.canvas
31
31
  canvas.font("Helvetica", size: 10)
32
32
  canvas.text("Font: #{wrapper.wrapped_font.full_name}", at: [50, 825])
33
33
 
data/lib/hexapdf/cli.rb CHANGED
@@ -49,6 +49,9 @@ module HexaPDF
49
49
  # Runs the CLI application.
50
50
  def self.run(args = ARGV)
51
51
  Application.new.parse(args)
52
+ rescue => e
53
+ $stderr.puts "An error occurred: #{e.message}"
54
+ exit(1)
52
55
  end
53
56
 
54
57
  # The CmdParse::CommandParser class that is used for running the CLI application.
@@ -66,8 +69,13 @@ module HexaPDF
66
69
  add_command(CmdParse::VersionCommand.new)
67
70
  end
68
71
 
69
- PAGE_NUMBER_SPEC = "([1-9]\\d*|e)" #:nodoc:
70
- ROTATE_MAP = {'l' => -90, 'r' => 90, 'd' => 180, 'n' => :none, nil => :none} #:nodoc:
72
+ def parse(argv = ARGV) #:nodoc:
73
+ ARGV.unshift('help') if ARGV.empty?
74
+ super
75
+ end
76
+
77
+ PAGE_NUMBER_SPEC = "([1-9]\\d*|e)".freeze #:nodoc:
78
+ ROTATE_MAP = {'l' => -90, 'r' => 90, 'd' => 180, 'n' => :none}.freeze #:nodoc:
71
79
 
72
80
  # Parses the pages specification string and returns an array of tuples containing a page
73
81
  # number and a rotation value (either -90, 90, 180 or :none).
@@ -78,13 +86,12 @@ module HexaPDF
78
86
  case str
79
87
  when /\A#{PAGE_NUMBER_SPEC}(l|r|d|n)?\z/o
80
88
  arr << [($1 == 'e' ? count : str.to_i) - 1, ROTATE_MAP[$2]]
81
- when /\A#{PAGE_NUMBER_SPEC}-#{PAGE_NUMBER_SPEC}(l|r|d|n)?\z/
89
+ when /\A#{PAGE_NUMBER_SPEC}-#{PAGE_NUMBER_SPEC}(?:\/([1-9]\d*))?(l|r|d|n)?\z/
82
90
  start_nr = ($1 == 'e' ? count : $1.to_i) - 1
83
91
  end_nr = ($2 == 'e' ? count : $2.to_i) - 1
84
- rotation = ROTATE_MAP[$3]
85
- (start_nr > end_nr ? (end_nr..start_nr).reverse_each : (start_nr..end_nr)).each do |n|
86
- arr << [n, rotation]
87
- end
92
+ step = ($3 ? $3.to_i : 1) * (start_nr > end_nr ? -1 : 1)
93
+ rotation = ROTATE_MAP[$4]
94
+ start_nr.step(to: end_nr, by: step) {|n| arr << [n, rotation]}
88
95
  else
89
96
  raise OptionParser::InvalidArgument, "invalid page range format: #{str}"
90
97
  end
@@ -119,7 +119,7 @@ module HexaPDF
119
119
 
120
120
  # Iterates over all embedded files.
121
121
  def each_file(doc, &block) # :yields: obj, index
122
- doc.utils.each_file(search: @search).select(&:embedded_file?).each_with_index(&block)
122
+ doc.files.each(search: @search).select(&:embedded_file?).each_with_index(&block)
123
123
  end
124
124
 
125
125
  end
@@ -68,7 +68,7 @@ module HexaPDF
68
68
  private
69
69
 
70
70
  INFO_KEYS = [:Title, :Author, :Subject, :Keywords, :Creator, :Producer, #:nodoc:
71
- :CreationDate, :ModDate]
71
+ :CreationDate, :ModDate].freeze
72
72
 
73
73
  COLUMN_WIDTH = 20 #:nodoc:
74
74
 
@@ -95,7 +95,7 @@ module HexaPDF
95
95
  output_line("Encrypted", "yes (no or wrong password given)")
96
96
  end
97
97
 
98
- output_line("Pages", doc.pages.page_count.to_s)
98
+ output_line("Pages", doc.pages.count.to_s)
99
99
  output_line("Version", doc.version)
100
100
  end
101
101
  rescue HexaPDF::EncryptionError => e
@@ -113,15 +113,15 @@ module HexaPDF
113
113
  end
114
114
 
115
115
  def do_page_count(doc) #:nodoc:
116
- puts doc.pages.page_count
116
+ puts doc.pages.count
117
117
  end
118
118
 
119
119
  def do_pages(doc) #:nodoc:
120
- pages = command_parser.parse_pages_specification(@param, doc.pages.page_count)
120
+ pages = command_parser.parse_pages_specification(@param, doc.pages.count)
121
121
  pages.each do |index, _|
122
- page = doc.pages.page(index)
122
+ page = doc.pages[index]
123
123
  str = "page #{index + 1} (#{page.oid},#{page.gen}): "
124
- Array(page[:Contents]).each {|c| str << "#{c.oid},#{c.gen}"}
124
+ str << Array(page[:Contents]).map {|c| "#{c.oid},#{c.gen}"}.join(" ")
125
125
  puts str
126
126
  end
127
127
  end
@@ -39,28 +39,58 @@ module HexaPDF
39
39
 
40
40
  # Modifies a PDF file:
41
41
  #
42
- # * Decrypts or encrypts the PDF file.
42
+ # * Adds pages from other PDF files.
43
+ # * Decrypts or encrypts the resulting output PDF file.
43
44
  # * Generates or deletes object and cross-reference streams.
44
- # * Optimizes a PDF by merging the revisions of a PDF file and removes unused entries.
45
+ # * Optimizes the output PDF by merging the revisions of a PDF file and removes unused entries.
45
46
  #
46
47
  # See: HexaPDF::Task::Optimize
47
48
  class Modify < CmdParse::Command
48
49
 
50
+ InputSpec = Struct.new(:file, :pages, :password) #:nodoc:
51
+
49
52
  def initialize #:nodoc:
50
53
  super('modify', takes_commands: false)
51
54
  short_desc("Modify a PDF file")
52
55
  long_desc(<<-EOF.gsub!(/^ */, ''))
53
- This command modifies a PDF file. It can be used to encrypt/decrypt a file, to optimize it
54
- and remove unused entries and to generate or delete object and cross-reference streams.
56
+ This command modifies a PDF file. It can be used to select pages that should appear in
57
+ the output file and to add pages from other PDF files. The output file can be
58
+ encrypted/decrypted and optimized in various ways.
59
+
60
+ The first input file is the primary file which gets modified, so meta data like file
61
+ information, outlines, etc. are taken from it. Alternatively, it is possible to start
62
+ with an empty PDF file by using --empty. The order of the options specifying the files
63
+ is important as they are used in that order.
64
+
65
+ Also note that the --password and --pages options apply to the last preceeding input file.
55
66
  EOF
56
67
 
57
- options.on("--password PASSWORD", "-p", String,
58
- "The password for decryption. Use - for reading from standard input.") do |pwd|
59
- @password = (pwd == '-' ? command_parser.read_password("Input file password") : pwd)
68
+ options.separator("")
69
+ options.separator("Input file(s) related options")
70
+ options.on("-f", "--file FILE", "Input file, can be specified multiple times") do |file|
71
+ @files << InputSpec.new(file, '1-e')
72
+ end
73
+ options.on("-p", "--password PASSWORD", String, "The password for decrypting the last " \
74
+ "specified input file (use - for reading from standard input)") do |pwd|
75
+ raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
76
+ pwd = (pwd == '-' ? command_parser.read_password("#{@files.last.file} password") : pwd)
77
+ @files.last.password = pwd
60
78
  end
61
- options.on("--pages PAGES", "The pages to be used in the output file") do |pages|
62
- @pages = pages
79
+ options.on("-i", "--pages PAGES", "The pages of the last specified input file that " \
80
+ "should be used (default: 1-e)") do |pages|
81
+ raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
82
+ @files.last.pages = pages
63
83
  end
84
+ options.on("-e", "--empty", "Use an empty file as the first input file") do
85
+ @initial_empty = true
86
+ end
87
+ options.on("--[no-]interleave", "Interleave the pages from the input files (default: " \
88
+ "false)") do |c|
89
+ @interleave = c
90
+ end
91
+
92
+ options.separator("")
93
+ options.separator("Output file related options")
64
94
  options.on("--embed FILE", String, "Embed the file into the output file (can be used " \
65
95
  "multiple times)") do |file|
66
96
  @embed_files << file
@@ -83,9 +113,10 @@ module HexaPDF
83
113
  "preserve)") do |streams|
84
114
  @streams = streams
85
115
  end
86
-
87
- options.separator("")
88
- options.separator("Encryption related options")
116
+ options.on("--[no-]compress-pages", "Recompress page content streams (may take a long " \
117
+ "time; default: no)") do |c|
118
+ @compress_pages = c
119
+ end
89
120
  options.on("--decrypt", "Remove any encryption") do
90
121
  @encryption = :remove
91
122
  end
@@ -93,12 +124,12 @@ module HexaPDF
93
124
  @encryption = :add
94
125
  end
95
126
  options.on("--owner-password PASSWORD", String, "The owner password to be set on the " \
96
- "output file. Use - for reading from standard input.") do |pwd|
127
+ "output file (use - for reading from standard input)") do |pwd|
97
128
  @encryption = :add
98
129
  @enc_owner_pwd = (pwd == '-' ? command_parser.read_password("Owner password") : pwd)
99
130
  end
100
131
  options.on("--user-password PASSWORD", String, "The user password to be set on the " \
101
- "output file. Use - for reading from standard input.") do |pwd|
132
+ "output file (use - for reading from standard input)") do |pwd|
102
133
  @encryption = :add
103
134
  @enc_user_pwd = (pwd == '-' ? command_parser.read_password("User password") : pwd)
104
135
  end
@@ -121,19 +152,23 @@ module HexaPDF
121
152
  options.on("--permissions PERMS", Array,
122
153
  "Comma separated list of permissions to be set on the output file. Possible " \
123
154
  "values: #{syms.join(', ')}") do |perms|
124
- perms.each do |perm|
125
- unless syms.include?(perm)
155
+ perms.map! do |perm|
156
+ unless syms.include?(perm.to_sym)
126
157
  raise OptionParser::InvalidArgument, "#{perm} (invalid permission name)"
127
158
  end
159
+ perm.to_sym
128
160
  end
129
161
  @encryption = :add
130
162
  @enc_permissions = perms
131
163
  end
132
164
 
133
- @password = nil
134
- @pages = '1-e'
165
+ @files = []
166
+ @initial_empty = false
167
+ @interleave = false
168
+
135
169
  @embed_files = []
136
170
  @compact = true
171
+ @compress_pages = false
137
172
  @object_streams = :preserve
138
173
  @xref_streams = :preserve
139
174
  @streams = :preserve
@@ -146,58 +181,123 @@ module HexaPDF
146
181
  @enc_permissions = []
147
182
  end
148
183
 
149
- def execute(input_file, output_file) #:nodoc:
150
- @compact = true unless @pages == '1-e'
151
- if @enc_user_pwd && !@enc_user_pwd.empty? && (!@enc_owner_pwd || @enc_owner_pwd.empty?)
152
- @enc_owner_pwd = @enc_user_pwd
184
+ def execute(output_file) #:nodoc:
185
+ if !@initial_empty && @files.empty?
186
+ error = OptionParser::ParseError.new("At least one --file FILE or --empty is needed")
187
+ error.reason = "Missing argument"
188
+ raise error
153
189
  end
154
190
 
155
- HexaPDF::Document.open(input_file, decryption_opts: {password: @password}) do |doc|
156
- arrange_pages(doc) unless @pages == '1-e'
157
- @embed_files.each {|file| doc.utils.add_file(file, embed: true)}
158
-
159
- doc.task(:optimize, compact: @compact, object_streams: @object_streams,
160
- xref_streams: @xref_streams)
191
+ # Create PDF documents for each input file
192
+ cache = {}
193
+ @files.each do |spec|
194
+ cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
195
+ decryption_opts: {password: spec.password})
196
+ spec.file = cache[spec.file]
197
+ end
161
198
 
162
- handle_streams(doc) if @streams != :preserve
199
+ # Assemble pages
200
+ target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
201
+ page_tree = target.add(Type: :Pages)
202
+ import_pages(page_tree)
203
+ target.catalog[:Pages] = page_tree
163
204
 
164
- if @encryption == :add
165
- doc.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length,
166
- force_V4: @enc_force_v4, permissions: @enc_permissions,
167
- owner_password: @enc_owner_pwd, user_password: @enc_user_pwd)
168
- elsif @encryption == :remove
169
- doc.encrypt(name: nil)
205
+ # Remove potentially imported but unused pages and page tree nodes
206
+ retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
207
+ retained[target.pages.root.data] = true
208
+ target.each(current: false) do |obj|
209
+ next unless obj.kind_of?(HexaPDF::Dictionary)
210
+ if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
211
+ target.delete(obj)
170
212
  end
213
+ end
214
+
215
+ # Embed the given files
216
+ @embed_files.each {|file| target.files.add(file, embed: true)}
217
+
218
+ # Optimize the PDF file
219
+ target.task(:optimize, compact: @compact, object_streams: @object_streams,
220
+ xref_streams: @xref_streams, compress_pages: @compress_pages)
171
221
 
172
- doc.write(output_file)
222
+ # Update stream filters
223
+ handle_streams(target) unless @streams == :preserve
224
+
225
+ # Encrypt, decrypt or do nothing
226
+ if @encryption == :add
227
+ target.encrypt(algorithm: @enc_algorithm, key_length: @enc_key_length,
228
+ force_V4: @enc_force_v4, permissions: @enc_permissions,
229
+ owner_password: @enc_owner_pwd, user_password: @enc_user_pwd)
230
+ elsif @encryption == :remove
231
+ target.encrypt(name: nil)
173
232
  end
233
+
234
+ target.write(output_file)
174
235
  rescue HexaPDF::Error => e
175
- $stderr.puts "Error while processing the PDF file: #{e.message}"
236
+ $stderr.puts "Processing error : #{e.message}"
176
237
  exit(1)
177
238
  end
178
239
 
240
+ def usage_arguments #:nodoc:
241
+ "{--file IN_FILE | --empty} OUT_FILE"
242
+ end
243
+
179
244
  private
180
245
 
181
- # Arranges the pages of the document as specified with the --pages option.
182
- def arrange_pages(doc)
183
- pages = command_parser.parse_pages_specification(@pages, doc.pages.page_count)
184
- new_page_tree = doc.add(Type: :Pages)
185
- pages.each do |index, rotation|
186
- page = doc.pages.page(index)
187
- page.value.update(page.copy_inherited_values)
188
- if rotation == :none
189
- page.delete(:Rotate)
190
- else
191
- page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
246
+ # Imports the pages of the document as specified with the --pages option to the given page
247
+ # tree.
248
+ def import_pages(page_tree)
249
+ @files.each do |s|
250
+ page_list = s.file.pages.to_a
251
+ s.pages = command_parser.parse_pages_specification(s.pages, s.file.pages.count)
252
+ s.pages.each do |arr|
253
+ arr[0] = page_list[arr[0]]
254
+ arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
255
+ end
256
+ end
257
+
258
+ if @interleave
259
+ max_pages_per_file = 0
260
+ all = @files.each_with_index.map do |spec, findex|
261
+ list = []
262
+ spec.pages.each {|index, rotation| list << [spec.file, findex, index, rotation]}
263
+ max_pages_per_file = list.size if list.size > max_pages_per_file
264
+ list
192
265
  end
193
- new_page_tree.add_page(page)
266
+ first, *rest = *all
267
+ first[max_pages_per_file - 1] ||= nil
268
+ first.zip(*rest) do |slice|
269
+ slice.each do |source, findex, page, rotation|
270
+ next unless source
271
+ import_page(page_tree, findex, page, rotation)
272
+ end
273
+ end
274
+ else
275
+ @files.each_with_index do |s, findex|
276
+ s.pages.each {|page, rotation| import_page(page_tree, findex, page, rotation)}
277
+ end
278
+ end
279
+ end
280
+
281
+ # Import the page with the given +rotation+ into the page tree.
282
+ def import_page(page_tree, source_index, page, rotation)
283
+ if page_tree.document == page.document
284
+ page.value.update(page.copy_inherited_values)
285
+ page = page.deep_copy unless source_index == 0
286
+ else
287
+ page = page_tree.document.import(page).deep_copy
288
+ end
289
+ if rotation == :none
290
+ page.delete(:Rotate)
291
+ elsif rotation.kind_of?(Integer)
292
+ page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
194
293
  end
195
- doc.catalog[:Pages] = new_page_tree
294
+ page_tree.document.add(page)
295
+ page_tree.add_page(page)
196
296
  end
197
297
 
198
298
  IGNORED_FILTERS = { #:nodoc:
199
299
  CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true
200
- }
300
+ }.freeze
201
301
 
202
302
  # Applies the chosen stream mode to all streams.
203
303
  def handle_streams(doc)