hexapdf 0.41.0 → 0.43.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +55 -0
  3. data/Rakefile +1 -1
  4. data/examples/031-acro_form_java_script.rb +36 -24
  5. data/lib/hexapdf/cli/command.rb +14 -11
  6. data/lib/hexapdf/cli/files.rb +31 -7
  7. data/lib/hexapdf/cli/form.rb +10 -31
  8. data/lib/hexapdf/cli/inspect.rb +1 -1
  9. data/lib/hexapdf/cli/usage.rb +215 -0
  10. data/lib/hexapdf/cli.rb +2 -0
  11. data/lib/hexapdf/configuration.rb +1 -1
  12. data/lib/hexapdf/dictionary.rb +3 -3
  13. data/lib/hexapdf/document.rb +14 -1
  14. data/lib/hexapdf/encryption.rb +17 -0
  15. data/lib/hexapdf/layout/box.rb +1 -0
  16. data/lib/hexapdf/layout/box_fitter.rb +3 -3
  17. data/lib/hexapdf/layout/column_box.rb +2 -2
  18. data/lib/hexapdf/layout/container_box.rb +1 -1
  19. data/lib/hexapdf/layout/line.rb +4 -0
  20. data/lib/hexapdf/layout/list_box.rb +2 -2
  21. data/lib/hexapdf/layout/table_box.rb +1 -1
  22. data/lib/hexapdf/layout/text_box.rb +16 -2
  23. data/lib/hexapdf/parser.rb +20 -17
  24. data/lib/hexapdf/type/acro_form/button_field.rb +7 -5
  25. data/lib/hexapdf/type/acro_form/form.rb +123 -27
  26. data/lib/hexapdf/type/acro_form/java_script_actions.rb +165 -14
  27. data/lib/hexapdf/type/acro_form/text_field.rb +13 -1
  28. data/lib/hexapdf/type/resources.rb +2 -1
  29. data/lib/hexapdf/utils.rb +19 -0
  30. data/lib/hexapdf/version.rb +1 -1
  31. data/test/hexapdf/layout/test_box_fitter.rb +3 -3
  32. data/test/hexapdf/layout/test_text_box.rb +27 -1
  33. data/test/hexapdf/test_dictionary.rb +6 -4
  34. data/test/hexapdf/test_parser.rb +12 -0
  35. data/test/hexapdf/test_utils.rb +16 -0
  36. data/test/hexapdf/type/acro_form/test_button_field.rb +5 -0
  37. data/test/hexapdf/type/acro_form/test_form.rb +110 -2
  38. data/test/hexapdf/type/acro_form/test_java_script_actions.rb +102 -1
  39. data/test/hexapdf/type/acro_form/test_text_field.rb +22 -4
  40. data/test/hexapdf/type/test_resources.rb +5 -0
  41. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ae86345e0f2ed2dd27c9c58c550e7eaffb4d7c5d3ba388afb04318ee20491313
4
- data.tar.gz: cfd9f8575ce9f4324c594c2617cc7e1bcf1d735276ac92a275f26acf74566bc9
3
+ metadata.gz: 24f6839e903fd945678915625b1e2ef6a12221f29a82cf1c7a6d8bcea38af288
4
+ data.tar.gz: 2f8309e2ef2406dd279e00643bf7fbf73ded63a1076c0067684a356fea8ee89e
5
5
  SHA512:
6
- metadata.gz: d36715922fbbf5a93eeb5512ed0abf7ec78fbd099c49131500bd9f7db75c39248bb2bac12ad7e3df5c4a8a449351f63e0d83e0ed635f9a025e4bf25fdbe9f0e1
7
- data.tar.gz: fc7a89694614826c8d151b7dab2c3f45ec335fc94efff873065c3dc68c845a691311b91b42c3b791eb71be80e1f8fd623838eb0cf1b94d0d1b758441df1b1a7a
6
+ metadata.gz: 8c6ddd8ec6f19d39daa9a6422fdb3595d255b528fb93ca7907ef12dab0eca23c74de9fcfc27d02c15b3a9d3c24d47c7f7db6ea472f4c1ed34c2b7c06d4a8a351
7
+ data.tar.gz: 1d77c2da70d9048cbef6935afacde40fb6d4041414fce571a331a4bee33b0e3ee2ff59bb28eee02e515798c3c156a57f42f139356e946f64e878ea962756a1d8
data/CHANGELOG.md CHANGED
@@ -1,3 +1,58 @@
1
+ ## 0.43.0 - 2024-05-26
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Type::AcroForm::Form#create_namespace_field] for creating a pure
6
+ namespace field
7
+ * [HexaPDF::Type::AcroForm::Form#delete_field] for deleting fields
8
+
9
+ ### Changed
10
+
11
+ * Minimum Ruby version to be 3.0
12
+ * **Breaking change**: Renamed `HexaPDF::Layout::BoxFitter#fit_successful?` to
13
+ [HexaPDF::Layout::BoxFitter#success?]
14
+ * **Breaking Change**: Removed HexaPDF::Dictionary#to_h
15
+ * Form field creation methods of [HexaPDF::Type::AcroForm::Form] to
16
+ automatically create parent fields as namespace fields
17
+
18
+ ### Fixed
19
+
20
+ * [HexaPDF::Layout::TextBox#fit] to correctly calculate width in case of flowing
21
+ text around other boxes
22
+ * [HexaPDF::Layout::TextBox#draw] to correctly draw border, background... on
23
+ boxes using position 'flow'
24
+ * Comparison of Hash with [HexaPDF::Dictionary] objects by implementing
25
+ `#to_hash`
26
+ * Parsing of invalid files having multiple end-of-file markers with the last one
27
+ being invalid
28
+
29
+
30
+ ## 0.42.0 - 2024-05-12
31
+
32
+ ### Added
33
+
34
+ * Support for the `AFPercent_Format` JavaScript method
35
+ * Support for the `AFTime_Format` JavaScript method
36
+ * [HexaPDF::Type::AcroForm::Form#fill] for easily filling out form fields
37
+ * CLI command `hexapdf usage` for showing space usage information
38
+ * Support for attaching files via `hexapdf files` CLI command
39
+ * Refinement on [HexaPDF::Utils] to support conversion of Numeric values to
40
+ points (e.g. `5.mm`, `5.cm`, `5.inch`)
41
+
42
+ ### Changed
43
+
44
+ * [HexaPDF::Type::AcroForm::ButtonField#field_value=] to always allow using
45
+ `true` for check boxes
46
+ * CLI commands to prompt whether an existing output file should be overwritten
47
+
48
+ ### Fixed
49
+
50
+ * [HexaPDF::Type::Resources#font] to always return a correctly wrapped font
51
+ object
52
+ * [HexaPDF::Type::AcroForm::TextField#field_value=] to actually use the value
53
+ returned by the call to the config option 'acro_form.on_invalid_value'
54
+
55
+
1
56
  ## 0.41.0 - 2024-05-05
2
57
 
3
58
  ### Added
data/Rakefile CHANGED
@@ -47,7 +47,7 @@ namespace :dev do
47
47
  end
48
48
 
49
49
  task :test_all do
50
- versions = `rbenv versions --bare | grep -i ^2.7\\\\\\|^3.`.split("\n")
50
+ versions = `rbenv versions --bare | grep -i ^3.`.split("\n")
51
51
  versions.each do |version|
52
52
  sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
53
53
  end
@@ -31,70 +31,82 @@ tx.set_format_action(:number, decimals: 2, separator_style: :comma)
31
31
  widget = tx.create_widget(page, Rect: [200, 615, 500, 635])
32
32
  tx.field_value = "1234567.898"
33
33
 
34
- canvas.text("Calculate actions", at: [50, 570])
34
+ canvas.text("Percent format", at: [70, 590])
35
+ tx = form.create_text_field("Percent_Format", font_size: 16)
36
+ tx.set_format_action(:percent, decimals: 2, separator_style: :comma)
37
+ widget = tx.create_widget(page, Rect: [200, 585, 500, 605])
38
+ tx.field_value = "12,45678"
35
39
 
36
- canvas.text("Source fields", at: [70, 540])
37
- canvas.text("a:", at: [200, 540])
40
+ canvas.text("Time format", at: [70, 560])
41
+ tx = form.create_text_field("Time_Format", font_size: 16)
42
+ tx.set_format_action(:time, format: :hh_mm_ss)
43
+ widget = tx.create_widget(page, Rect: [200, 555, 500, 575])
44
+ tx.field_value = "3:15:20 pm"
45
+
46
+ canvas.text("Calculate actions", at: [50, 510])
47
+
48
+ canvas.text("Source fields", at: [70, 480])
49
+ canvas.text("a:", at: [200, 480])
38
50
  tx = form.create_text_field("a", font_size: 16)
39
51
  tx.set_format_action(:number, decimals: 2)
40
- widget = tx.create_widget(page, Rect: [220, 535, 280, 555])
52
+ widget = tx.create_widget(page, Rect: [220, 475, 280, 495])
41
53
  tx.field_value = "10,50"
42
- canvas.text("b:", at: [310, 540])
54
+ canvas.text("b:", at: [310, 480])
43
55
  tx = form.create_text_field("b", font_size: 16)
44
56
  tx.set_format_action(:number, decimals: 2)
45
- widget = tx.create_widget(page, Rect: [330, 535, 390, 555])
57
+ widget = tx.create_widget(page, Rect: [330, 475, 390, 495])
46
58
  tx.field_value = "20,60"
47
- canvas.text("c:", at: [420, 540])
59
+ canvas.text("c:", at: [420, 480])
48
60
  tx = form.create_text_field("c", font_size: 16)
49
61
  tx.set_format_action(:number, decimals: 2)
50
- widget = tx.create_widget(page, Rect: [440, 535, 500, 555])
62
+ widget = tx.create_widget(page, Rect: [440, 475, 500, 495])
51
63
  tx.field_value = "30,70"
52
64
 
53
- canvas.text("Predefined", at: [70, 510])
54
- canvas.text("Sum", at: [90, 480])
65
+ canvas.text("Predefined", at: [70, 450])
66
+ canvas.text("Sum", at: [90, 420])
55
67
  tx = form.create_text_field("sum", font_size: 16)
56
68
  tx.set_format_action(:number, decimals: 2)
57
69
  tx.set_calculate_action(:sum, fields: ['a', 'b', 'c'])
58
70
  tx.flag(:read_only)
59
- widget = tx.create_widget(page, Rect: [310, 475, 500, 495])
60
- canvas.text("Average", at: [90, 450])
71
+ widget = tx.create_widget(page, Rect: [310, 415, 500, 435])
72
+ canvas.text("Average", at: [90, 390])
61
73
  tx = form.create_text_field("average", font_size: 16)
62
74
  tx.set_format_action(:number, decimals: 2)
63
75
  tx.set_calculate_action(:average, fields: ['a', 'b', 'c'])
64
76
  tx.flag(:read_only)
65
- widget = tx.create_widget(page, Rect: [310, 445, 500, 465])
66
- canvas.text("Product", at: [90, 420])
77
+ widget = tx.create_widget(page, Rect: [310, 385, 500, 405])
78
+ canvas.text("Product", at: [90, 360])
67
79
  tx = form.create_text_field("product", font_size: 16)
68
80
  tx.set_format_action(:number, decimals: 2)
69
81
  tx.set_calculate_action(:product, fields: ['a', 'b', 'c'])
70
82
  tx.flag(:read_only)
71
- widget = tx.create_widget(page, Rect: [310, 415, 500, 435])
72
- canvas.text("Minimum", at: [90, 390])
83
+ widget = tx.create_widget(page, Rect: [310, 355, 500, 375])
84
+ canvas.text("Minimum", at: [90, 330])
73
85
  tx = form.create_text_field("min", font_size: 16)
74
86
  tx.set_format_action(:number, decimals: 2)
75
87
  tx.set_calculate_action(:min, fields: ['a', 'b', 'c'])
76
88
  tx.flag(:read_only)
77
- widget = tx.create_widget(page, Rect: [310, 385, 500, 405])
78
- canvas.text("Maximum", at: [90, 360])
89
+ widget = tx.create_widget(page, Rect: [310, 325, 500, 345])
90
+ canvas.text("Maximum", at: [90, 300])
79
91
  tx = form.create_text_field("max", font_size: 16)
80
92
  tx.set_format_action(:number, decimals: 2)
81
93
  tx.set_calculate_action(:max, fields: ['a', 'b', 'c'])
82
94
  tx.flag(:read_only)
83
- widget = tx.create_widget(page, Rect: [310, 355, 500, 375])
95
+ widget = tx.create_widget(page, Rect: [310, 295, 500, 315])
84
96
 
85
- canvas.text("Simplified Field Notation", at: [70, 330])
86
- canvas.text("a + b + c", at: [90, 300])
97
+ canvas.text("Simplified Field Notation", at: [70, 270])
98
+ canvas.text("a + b + c", at: [90, 240])
87
99
  tx = form.create_text_field("sfn1", font_size: 16)
88
100
  tx.set_format_action(:number, decimals: 2)
89
101
  tx.set_calculate_action(:sfn, fields: "a + b + c")
90
102
  tx.flag(:read_only)
91
- widget = tx.create_widget(page, Rect: [310, 295, 500, 315])
92
- canvas.text("(a + b)*(c - a) / b + 3.14", at: [90, 270])
103
+ widget = tx.create_widget(page, Rect: [310, 235, 500, 255])
104
+ canvas.text("(a + b)*(c - a) / b + 3.14", at: [90, 210])
93
105
  tx = form.create_text_field("sfn2", font_size: 16)
94
106
  tx.set_format_action(:number, decimals: 2)
95
107
  tx.set_calculate_action(:sfn, fields: "(a + b)*(c - a) / b + 3.14")
96
108
  tx.flag(:read_only)
97
- widget = tx.create_widget(page, Rect: [310, 265, 500, 285])
109
+ widget = tx.create_widget(page, Rect: [310, 205, 500, 225])
98
110
 
99
111
  form.recalculate_fields
100
112
 
@@ -53,7 +53,7 @@ module HexaPDF
53
53
  module Extensions #:nodoc:
54
54
  def help_banner #:nodoc:
55
55
  "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
56
- "Copyright (c) 2014-2023 Thomas Leitner; licensed under the AGPLv3\n\n" \
56
+ "Copyright (c) 2014-2024 Thomas Leitner; licensed under the AGPLv3\n\n" \
57
57
  "#{format(usage, indent: 7)}\n\n"
58
58
  end
59
59
 
@@ -167,12 +167,12 @@ module HexaPDF
167
167
  end
168
168
  end
169
169
 
170
- # Checks whether the given output file exists and raises an error if it does and
171
- # HexaPDF::CLI#force is not set.
170
+ # Checks whether the given output file exists and ask whether to overwrite the output file if
171
+ # it does. If HexaPDF::CLI#force is set, a possibly existing output file is always overwritten.
172
172
  def maybe_raise_on_existing_file(filename)
173
173
  if !command_parser.force && File.exist?(filename)
174
- raise Error, "Output file '#{filename}' already exists, not overwriting. Use --force to " \
175
- "force writing"
174
+ response = read_from_console("Output file '#{filename}' already exists - overwrite? (y/n)")
175
+ exit(1) unless response =~ /y/i
176
176
  end
177
177
  end
178
178
 
@@ -377,9 +377,9 @@ module HexaPDF
377
377
  # console.
378
378
  def read_password(prompt = "Password")
379
379
  if $stdin.tty?
380
- read_from_console(prompt)
380
+ read_from_console(prompt, noecho: true)
381
381
  else
382
- ($stdin.gets || read_from_console(prompt)).chomp
382
+ ($stdin.gets || read_from_console(prompt, noecho: true)).chomp
383
383
  end
384
384
  end
385
385
 
@@ -407,11 +407,14 @@ module HexaPDF
407
407
  private
408
408
 
409
409
  # Displays the given prompt, reads from the console without echo and returns the read string.
410
- def read_from_console(prompt)
410
+ def read_from_console(prompt, noecho: false)
411
411
  IO.console.write("#{prompt}: ")
412
- str = IO.console.noecho {|io| io.gets.chomp }
413
- puts
414
- str
412
+ if noecho
413
+ IO.console.noecho {|io| io.gets.chomp }
414
+ puts
415
+ else
416
+ IO.console.gets.chomp
417
+ end
415
418
  end
416
419
 
417
420
  end
@@ -39,19 +39,29 @@ require 'hexapdf/cli/command'
39
39
  module HexaPDF
40
40
  module CLI
41
41
 
42
- # Lists or extracts embedded files from a PDF file.
42
+ # Lists or extracts embedded files from a PDF file or attaches them.
43
43
  #
44
44
  # See: HexaPDF::Type::EmbeddedFile
45
45
  class Files < Command
46
46
 
47
47
  def initialize #:nodoc:
48
48
  super('files', takes_commands: false)
49
- short_desc("List or extract embedded files from a PDF file")
49
+ short_desc("List and extract embedded files from a PDF or attach files")
50
50
  long_desc(<<~EOF)
51
- If the option --extract is not given, the available files are listed with their names and
52
- indices. The --extract option can then be used to extract one or more files.
51
+ If neither the option --attach nor the option --extract is given, the available
52
+ files are listed with their names and indices. The --extract option can then be
53
+ used to extract one or more files. Or the --attach option can be used to attach
54
+ files to the PDF.
53
55
  EOF
54
56
 
57
+ options.on("--attach FILE", "-a FILE", String,
58
+ "The file that should be attached. Can be used multiple times.") do |file|
59
+ @attach_files << [file, nil]
60
+ end
61
+ options.on("--description DESC", "-d DESC", String,
62
+ "Adds a description to the last file to be attached.") do |description|
63
+ @attach_files[-1][1] = description
64
+ end
55
65
  options.on("--extract [a,b,c,...]", "-e [a,b,c,...]", Array,
56
66
  "The indices of the files that should be extracted. Use 0 or no argument to " \
57
67
  "extract all files.") do |indices|
@@ -66,15 +76,24 @@ module HexaPDF
66
76
  @password = (pwd == '-' ? read_password : pwd)
67
77
  end
68
78
 
79
+ @attach_files = []
69
80
  @indices = []
70
81
  @password = nil
71
82
  @search = false
72
83
  end
73
84
 
74
- def execute(pdf) #:nodoc:
75
- with_document(pdf, password: @password) do |doc|
76
- if @indices.empty?
85
+ def execute(pdf, output = nil) #:nodoc:
86
+ if @indices.empty? && !@attach_files.empty?
87
+ raise Error, "Missing output file" unless output
88
+ maybe_raise_on_existing_file(output)
89
+ end
90
+ with_document(pdf, password: @password, out_file: output) do |doc|
91
+ if @indices.empty? && @attach_files.empty?
77
92
  list_files(doc)
93
+ elsif !@indices.empty? && !@attach_files.empty?
94
+ raise Error, "Use either --attach or --extract but not both"
95
+ elsif !@attach_files.empty?
96
+ attach_files(doc)
78
97
  else
79
98
  extract_files(doc)
80
99
  end
@@ -116,6 +135,11 @@ module HexaPDF
116
135
  end
117
136
  end
118
137
 
138
+ # Attaches the files given on the CLI to the document.
139
+ def attach_files(doc)
140
+ @attach_files.each {|file, desc| doc.files.add(file, description: desc) }
141
+ end
142
+
119
143
  # Iterates over all embedded files.
120
144
  def each_file(doc, &block) # :yields: obj, index
121
145
  doc.files.each(search: @search).select(&:embedded_file?).each_with_index(&block)
@@ -195,6 +195,7 @@ module HexaPDF
195
195
  # Fills out the form by interactively asking the user for field values.
196
196
  def fill_form(doc)
197
197
  current_page_index = -1
198
+ form = doc.acro_form
198
199
  each_field(doc) do |_page, page_index, field, _widget|
199
200
  next if field.flagged?(:read_only) && !@fill_read_only_fields
200
201
  if current_page_index != page_index
@@ -224,9 +225,9 @@ module HexaPDF
224
225
  print " └─ New value: "
225
226
  value = $stdin.readline.chomp
226
227
  next if value.empty?
227
- apply_field_value(field, value)
228
+ form.fill(field.full_field_name => value)
228
229
  rescue HexaPDF::Error => e
229
- puts " ⚠ #{e.message}"
230
+ puts " ⚠ Error while setting '#{field.full_field_name}': #{e.message}"
230
231
  retry
231
232
  end
232
233
  end
@@ -234,18 +235,20 @@ module HexaPDF
234
235
 
235
236
  # Fills out the form using the data from the provided template file.
236
237
  def fill_form_with_template(doc)
237
- data = parse_template
238
238
  form = doc.acro_form
239
- data.each do |name, value|
239
+ data = parse_template
240
+ data.reject! do |name, _value|
240
241
  field = form.field_by_name(name)
241
242
  raise Error, "Field '#{name}' not found in input PDF" unless field
242
243
  if field.flagged?(:read_only) && !@fill_read_only_fields
243
244
  puts "Ignoring field '#{name}' because it is read only and --fill-read-only-fields " \
244
- "is no set"
245
- next
245
+ "is not set"
246
+ true
247
+ else
248
+ false
246
249
  end
247
- apply_field_value(field, value)
248
250
  end
251
+ form.fill(data)
249
252
  end
250
253
 
251
254
  # Parses the data from the given template file.
@@ -273,30 +276,6 @@ module HexaPDF
273
276
  data
274
277
  end
275
278
 
276
- # Applies the given value to the field.
277
- def apply_field_value(field, value)
278
- case field.concrete_field_type
279
- when :single_line_text_field, :multiline_text_field, :comb_text_field, :file_select_field,
280
- :combo_box, :list_box, :editable_combo_box
281
- field.field_value = value
282
- when :check_box
283
- field.field_value = case value
284
- when /y(es)?|t(rue)?/
285
- true
286
- when /n(o)?|f(alse)?/
287
- false
288
- else
289
- value
290
- end
291
- when :radio_button
292
- field.field_value = value.to_sym
293
- else
294
- raise Error, "Field type #{field.concrete_field_type} not yet supported"
295
- end
296
- rescue StandardError
297
- raise Error, "Error while setting '#{field.full_field_name}': #{$!.message}"
298
- end
299
-
300
279
  # Iterates over all non-push button fields in page order. If a field appears on multiple
301
280
  # pages, it is only yielded on the first page if +with_seen+ is +false.
302
281
  def each_field(doc, with_seen: false) # :yields: page, page_index, field
@@ -270,7 +270,7 @@ module HexaPDF
270
270
  if (rev_index = data.shift)
271
271
  rev_index = rev_index.to_i - 1
272
272
  if rev_index < 0 || rev_index >= @doc.revisions.count
273
- $stderr.puts("Error: Invalid revision numer specified")
273
+ $stderr.puts("Error: Invalid revision number specified")
274
274
  next
275
275
  end
276
276
  length = 0
@@ -0,0 +1,215 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2024 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'hexapdf/cli/command'
38
+
39
+ module HexaPDF
40
+ module CLI
41
+
42
+ # Shows the space usage of various parts of a PDF file.
43
+ class Usage < Command
44
+
45
+ # Modifies the HexaPDF::PDFData class to store the size information
46
+ module PDFDataExtension
47
+
48
+ # Used to store the size of the indirect object.
49
+ attr_accessor :size
50
+
51
+ # Used to store the size of the object inside the object stream.
52
+ attr_accessor :size_in_object_stream
53
+
54
+ end
55
+
56
+ # Modifies HexaPDF::Parser to retrieve space used by indirect objects.
57
+ module ParserExtension
58
+
59
+ # :nodoc:
60
+ def initialize(*)
61
+ super
62
+ @last_size = nil
63
+ end
64
+
65
+ # :nodoc:
66
+ def load_object(xref_entry)
67
+ super.tap do |obj|
68
+ if xref_entry.type == :compressed
69
+ obj.data.size_in_object_stream = @last_size
70
+ elsif xref_entry.type == :in_use
71
+ obj.data.size = @last_size
72
+ end
73
+ @last_size = nil
74
+ end
75
+ end
76
+
77
+ # :nodoc:
78
+ def parse_indirect_object(offset = nil)
79
+ real_offset = (offset ? @header_offset + offset : @tokenizer.pos)
80
+ result = super
81
+ @last_size = @tokenizer.pos - real_offset
82
+ result
83
+ end
84
+
85
+ # :nodoc:
86
+ def load_compressed_object(xref_entry)
87
+ result = super
88
+ offsets = @object_stream_data[xref_entry.objstm].instance_variable_get(:@offsets)
89
+ @last_size = if xref_entry.pos == offsets.size - 1
90
+ @object_stream_data[xref_entry.objstm].instance_variable_get(:@tokenizer).
91
+ io.size - offsets[xref_entry.pos]
92
+ else
93
+ offsets[xref_entry.pos + 1] - offsets[xref_entry.pos]
94
+ end
95
+ result
96
+ end
97
+
98
+ end
99
+
100
+ def initialize #:nodoc:
101
+ super('usage', takes_commands: false)
102
+ short_desc("Show space usage of various parts of a PDF file")
103
+ long_desc(<<~EOF)
104
+ This command displays some usage statistics of the PDF file, i.e. which parts take which
105
+ approximate space in the file.
106
+
107
+ Each statistic line shows the space used followed by the number of indirect objects in
108
+ parentheses. If some of those objects are in object streams, that number is displayed
109
+ after a slash.
110
+ EOF
111
+
112
+ options.on("--password PASSWORD", "-p", String,
113
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
114
+ @password = (pwd == '-' ? read_password : pwd)
115
+ end
116
+
117
+ @password = nil
118
+ end
119
+
120
+ def execute(file) #:nodoc:
121
+ HexaPDF::Parser.prepend(ParserExtension)
122
+ HexaPDF::PDFData.prepend(PDFDataExtension)
123
+
124
+ with_document(file, password: @password) do |doc|
125
+ # Prepare cache of outline items
126
+ outline_item_cache = {}
127
+ if doc.catalog.key?(:Outlines)
128
+ doc.outline.each_item {|item| outline_item_cache[item] = true }
129
+ outline_item_cache[doc.outline] = true
130
+ end
131
+
132
+ doc.revisions.each.with_index do |rev, index|
133
+ sum = count = 0
134
+ categories = {
135
+ Content: [],
136
+ Files: [],
137
+ Fonts: [],
138
+ Images: [],
139
+ Metadata: [],
140
+ ObjectStreams: [],
141
+ Outline: [],
142
+ XObjects: [],
143
+ }
144
+ puts if index > 0
145
+ puts "Usage information for revision #{index + 1}" if doc.revisions.count > 1
146
+ rev.each do |obj|
147
+ if command_parser.verbosity_info?
148
+ print "(#{obj.oid},#{obj.gen}): #{obj.data.size.to_i}"
149
+ print " (#{obj.data.size_in_object_stream})" if obj.data.size.nil?
150
+ puts
151
+ end
152
+ next unless obj.kind_of?(HexaPDF::Dictionary)
153
+
154
+ case obj.type
155
+ when :Page
156
+ Array(obj[:Contents]).each do |content|
157
+ categories[:Content] << content if object_in_rev?(content, rev)
158
+ end
159
+ when :Font
160
+ categories[:Fonts] << obj
161
+ when :FontDescriptor
162
+ categories[:Fonts] << obj
163
+ [:FontFile, :FontFile2, :FontFile3].each do |name|
164
+ categories[:Fonts] << obj[name] if object_in_rev?(obj[name], rev)
165
+ end
166
+ when :Metadata
167
+ categories[:Metadata] << obj
168
+ when :Filespec
169
+ categories[:Files] << obj
170
+ categories[:Files] << obj.embedded_file_stream if obj.embedded_file?
171
+ when :ObjStm
172
+ categories[:ObjectStreams] << obj
173
+ else
174
+ if obj[:Subtype] == :Image
175
+ categories[:Images] << obj
176
+ elsif obj[:Subtype] == :Form
177
+ categories[:XObjects] << obj
178
+ end
179
+ end
180
+ sum += obj.data.size if obj.data.size
181
+ count += 1
182
+ end
183
+
184
+ # Populate Outline category
185
+ outline_item_cache.reject! do |obj, _val|
186
+ object_in_rev?(obj, rev) && categories[:Outline] << obj
187
+ end
188
+
189
+ categories.each do |name, data|
190
+ next if data.empty?
191
+ object_stream_count = 0
192
+ category_sum = data.sum do |o|
193
+ object_stream_count += 1 unless o.data.size
194
+ o.data.size.to_i
195
+ end
196
+ object_stream_count = object_stream_count > 0 ? "/#{object_stream_count}" : ''
197
+ size = human_readable_file_size(category_sum)
198
+ puts "#{name.to_s.ljust(15)} #{size.rjust(8)} (#{data.count}#{object_stream_count})"
199
+ end
200
+ puts "#{'Total'.ljust(15)} #{human_readable_file_size(sum).rjust(8)} (#{count})"
201
+ end
202
+ end
203
+ end
204
+
205
+ private
206
+
207
+ # Returns +true+ if the +obj+ is in the given +rev+.
208
+ def object_in_rev?(obj, rev)
209
+ obj && rev.object(obj) == obj
210
+ end
211
+
212
+ end
213
+
214
+ end
215
+ end
data/lib/hexapdf/cli.rb CHANGED
@@ -48,6 +48,7 @@ require 'hexapdf/cli/watermark'
48
48
  require 'hexapdf/cli/image2pdf'
49
49
  require 'hexapdf/cli/form'
50
50
  require 'hexapdf/cli/fonts'
51
+ require 'hexapdf/cli/usage'
51
52
  require 'hexapdf/version'
52
53
  require 'hexapdf/document'
53
54
 
@@ -107,6 +108,7 @@ module HexaPDF
107
108
  add_command(HexaPDF::CLI::Image2PDF.new)
108
109
  add_command(HexaPDF::CLI::Form.new)
109
110
  add_command(HexaPDF::CLI::Fonts.new)
111
+ add_command(HexaPDF::CLI::Usage.new)
110
112
  add_command(CmdParse::HelpCommand.new)
111
113
  version_command = CmdParse::VersionCommand.new(add_switches: false)
112
114
  add_command(version_command)
@@ -481,7 +481,7 @@ module HexaPDF
481
481
  'acro_form.fallback_font' => 'Helvetica',
482
482
  'acro_form.on_invalid_value' => proc do |field, value|
483
483
  raise HexaPDF::Error, "Invalid value #{value.inspect} for " \
484
- "#{field.concrete_field_type} field #{field.full_field_name}"
484
+ "#{field.concrete_field_type} field named '#{field.full_field_name}'"
485
485
  end,
486
486
  'acro_form.text_field.default_width' => 100,
487
487
  'debug' => false,
@@ -228,9 +228,9 @@ module HexaPDF
228
228
  value.empty?
229
229
  end
230
230
 
231
- # Returns a dup of the underlying hash.
232
- def to_h
233
- value.dup
231
+ # Returns a hash containing the preprocessed values (like in #[]).
232
+ def to_hash
233
+ value.each_with_object({}) {|(k, _), h| h[k] = self[k] }
234
234
  end
235
235
 
236
236
  private