hexapdf 0.41.0 → 0.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +55 -0
  3. data/Rakefile +1 -1
  4. data/examples/031-acro_form_java_script.rb +36 -24
  5. data/lib/hexapdf/cli/command.rb +14 -11
  6. data/lib/hexapdf/cli/files.rb +31 -7
  7. data/lib/hexapdf/cli/form.rb +10 -31
  8. data/lib/hexapdf/cli/inspect.rb +1 -1
  9. data/lib/hexapdf/cli/usage.rb +215 -0
  10. data/lib/hexapdf/cli.rb +2 -0
  11. data/lib/hexapdf/configuration.rb +1 -1
  12. data/lib/hexapdf/dictionary.rb +3 -3
  13. data/lib/hexapdf/document.rb +14 -1
  14. data/lib/hexapdf/encryption.rb +17 -0
  15. data/lib/hexapdf/layout/box.rb +1 -0
  16. data/lib/hexapdf/layout/box_fitter.rb +3 -3
  17. data/lib/hexapdf/layout/column_box.rb +2 -2
  18. data/lib/hexapdf/layout/container_box.rb +1 -1
  19. data/lib/hexapdf/layout/line.rb +4 -0
  20. data/lib/hexapdf/layout/list_box.rb +2 -2
  21. data/lib/hexapdf/layout/table_box.rb +1 -1
  22. data/lib/hexapdf/layout/text_box.rb +16 -2
  23. data/lib/hexapdf/parser.rb +20 -17
  24. data/lib/hexapdf/type/acro_form/button_field.rb +7 -5
  25. data/lib/hexapdf/type/acro_form/form.rb +123 -27
  26. data/lib/hexapdf/type/acro_form/java_script_actions.rb +165 -14
  27. data/lib/hexapdf/type/acro_form/text_field.rb +13 -1
  28. data/lib/hexapdf/type/resources.rb +2 -1
  29. data/lib/hexapdf/utils.rb +19 -0
  30. data/lib/hexapdf/version.rb +1 -1
  31. data/test/hexapdf/layout/test_box_fitter.rb +3 -3
  32. data/test/hexapdf/layout/test_text_box.rb +27 -1
  33. data/test/hexapdf/test_dictionary.rb +6 -4
  34. data/test/hexapdf/test_parser.rb +12 -0
  35. data/test/hexapdf/test_utils.rb +16 -0
  36. data/test/hexapdf/type/acro_form/test_button_field.rb +5 -0
  37. data/test/hexapdf/type/acro_form/test_form.rb +110 -2
  38. data/test/hexapdf/type/acro_form/test_java_script_actions.rb +102 -1
  39. data/test/hexapdf/type/acro_form/test_text_field.rb +22 -4
  40. data/test/hexapdf/type/test_resources.rb +5 -0
  41. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ae86345e0f2ed2dd27c9c58c550e7eaffb4d7c5d3ba388afb04318ee20491313
4
- data.tar.gz: cfd9f8575ce9f4324c594c2617cc7e1bcf1d735276ac92a275f26acf74566bc9
3
+ metadata.gz: 24f6839e903fd945678915625b1e2ef6a12221f29a82cf1c7a6d8bcea38af288
4
+ data.tar.gz: 2f8309e2ef2406dd279e00643bf7fbf73ded63a1076c0067684a356fea8ee89e
5
5
  SHA512:
6
- metadata.gz: d36715922fbbf5a93eeb5512ed0abf7ec78fbd099c49131500bd9f7db75c39248bb2bac12ad7e3df5c4a8a449351f63e0d83e0ed635f9a025e4bf25fdbe9f0e1
7
- data.tar.gz: fc7a89694614826c8d151b7dab2c3f45ec335fc94efff873065c3dc68c845a691311b91b42c3b791eb71be80e1f8fd623838eb0cf1b94d0d1b758441df1b1a7a
6
+ metadata.gz: 8c6ddd8ec6f19d39daa9a6422fdb3595d255b528fb93ca7907ef12dab0eca23c74de9fcfc27d02c15b3a9d3c24d47c7f7db6ea472f4c1ed34c2b7c06d4a8a351
7
+ data.tar.gz: 1d77c2da70d9048cbef6935afacde40fb6d4041414fce571a331a4bee33b0e3ee2ff59bb28eee02e515798c3c156a57f42f139356e946f64e878ea962756a1d8
data/CHANGELOG.md CHANGED
@@ -1,3 +1,58 @@
1
+ ## 0.43.0 - 2024-05-26
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Type::AcroForm::Form#create_namespace_field] for creating a pure
6
+ namespace field
7
+ * [HexaPDF::Type::AcroForm::Form#delete_field] for deleting fields
8
+
9
+ ### Changed
10
+
11
+ * Minimum Ruby version to be 3.0
12
+ * **Breaking change**: Renamed `HexaPDF::Layout::BoxFitter#fit_successful?` to
13
+ [HexaPDF::Layout::BoxFitter#success?]
14
+ * **Breaking Change**: Removed HexaPDF::Dictionary#to_h
15
+ * Form field creation methods of [HexaPDF::Type::AcroForm::Form] to
16
+ automatically create parent fields as namespace fields
17
+
18
+ ### Fixed
19
+
20
+ * [HexaPDF::Layout::TextBox#fit] to correctly calculate width in case of flowing
21
+ text around other boxes
22
+ * [HexaPDF::Layout::TextBox#draw] to correctly draw border, background... on
23
+ boxes using position 'flow'
24
+ * Comparison of Hash with [HexaPDF::Dictionary] objects by implementing
25
+ `#to_hash`
26
+ * Parsing of invalid files having multiple end-of-file markers with the last one
27
+ being invalid
28
+
29
+
30
+ ## 0.42.0 - 2024-05-12
31
+
32
+ ### Added
33
+
34
+ * Support for the `AFPercent_Format` JavaScript method
35
+ * Support for the `AFTime_Format` JavaScript method
36
+ * [HexaPDF::Type::AcroForm::Form#fill] for easily filling out form fields
37
+ * CLI command `hexapdf usage` for showing space usage information
38
+ * Support for attaching files via `hexapdf files` CLI command
39
+ * Refinement on [HexaPDF::Utils] to support conversion of Numeric values to
40
+ points (e.g. `5.mm`, `5.cm`, `5.inch`)
41
+
42
+ ### Changed
43
+
44
+ * [HexaPDF::Type::AcroForm::ButtonField#field_value=] to always allow using
45
+ `true` for check boxes
46
+ * CLI commands to prompt whether an existing output file should be overwritten
47
+
48
+ ### Fixed
49
+
50
+ * [HexaPDF::Type::Resources#font] to always return a correctly wrapped font
51
+ object
52
+ * [HexaPDF::Type::AcroForm::TextField#field_value=] to actually use the value
53
+ returned by the call to the config option 'acro_form.on_invalid_value'
54
+
55
+
1
56
  ## 0.41.0 - 2024-05-05
2
57
 
3
58
  ### Added
data/Rakefile CHANGED
@@ -47,7 +47,7 @@ namespace :dev do
47
47
  end
48
48
 
49
49
  task :test_all do
50
- versions = `rbenv versions --bare | grep -i ^2.7\\\\\\|^3.`.split("\n")
50
+ versions = `rbenv versions --bare | grep -i ^3.`.split("\n")
51
51
  versions.each do |version|
52
52
  sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
53
53
  end
@@ -31,70 +31,82 @@ tx.set_format_action(:number, decimals: 2, separator_style: :comma)
31
31
  widget = tx.create_widget(page, Rect: [200, 615, 500, 635])
32
32
  tx.field_value = "1234567.898"
33
33
 
34
- canvas.text("Calculate actions", at: [50, 570])
34
+ canvas.text("Percent format", at: [70, 590])
35
+ tx = form.create_text_field("Percent_Format", font_size: 16)
36
+ tx.set_format_action(:percent, decimals: 2, separator_style: :comma)
37
+ widget = tx.create_widget(page, Rect: [200, 585, 500, 605])
38
+ tx.field_value = "12,45678"
35
39
 
36
- canvas.text("Source fields", at: [70, 540])
37
- canvas.text("a:", at: [200, 540])
40
+ canvas.text("Time format", at: [70, 560])
41
+ tx = form.create_text_field("Time_Format", font_size: 16)
42
+ tx.set_format_action(:time, format: :hh_mm_ss)
43
+ widget = tx.create_widget(page, Rect: [200, 555, 500, 575])
44
+ tx.field_value = "3:15:20 pm"
45
+
46
+ canvas.text("Calculate actions", at: [50, 510])
47
+
48
+ canvas.text("Source fields", at: [70, 480])
49
+ canvas.text("a:", at: [200, 480])
38
50
  tx = form.create_text_field("a", font_size: 16)
39
51
  tx.set_format_action(:number, decimals: 2)
40
- widget = tx.create_widget(page, Rect: [220, 535, 280, 555])
52
+ widget = tx.create_widget(page, Rect: [220, 475, 280, 495])
41
53
  tx.field_value = "10,50"
42
- canvas.text("b:", at: [310, 540])
54
+ canvas.text("b:", at: [310, 480])
43
55
  tx = form.create_text_field("b", font_size: 16)
44
56
  tx.set_format_action(:number, decimals: 2)
45
- widget = tx.create_widget(page, Rect: [330, 535, 390, 555])
57
+ widget = tx.create_widget(page, Rect: [330, 475, 390, 495])
46
58
  tx.field_value = "20,60"
47
- canvas.text("c:", at: [420, 540])
59
+ canvas.text("c:", at: [420, 480])
48
60
  tx = form.create_text_field("c", font_size: 16)
49
61
  tx.set_format_action(:number, decimals: 2)
50
- widget = tx.create_widget(page, Rect: [440, 535, 500, 555])
62
+ widget = tx.create_widget(page, Rect: [440, 475, 500, 495])
51
63
  tx.field_value = "30,70"
52
64
 
53
- canvas.text("Predefined", at: [70, 510])
54
- canvas.text("Sum", at: [90, 480])
65
+ canvas.text("Predefined", at: [70, 450])
66
+ canvas.text("Sum", at: [90, 420])
55
67
  tx = form.create_text_field("sum", font_size: 16)
56
68
  tx.set_format_action(:number, decimals: 2)
57
69
  tx.set_calculate_action(:sum, fields: ['a', 'b', 'c'])
58
70
  tx.flag(:read_only)
59
- widget = tx.create_widget(page, Rect: [310, 475, 500, 495])
60
- canvas.text("Average", at: [90, 450])
71
+ widget = tx.create_widget(page, Rect: [310, 415, 500, 435])
72
+ canvas.text("Average", at: [90, 390])
61
73
  tx = form.create_text_field("average", font_size: 16)
62
74
  tx.set_format_action(:number, decimals: 2)
63
75
  tx.set_calculate_action(:average, fields: ['a', 'b', 'c'])
64
76
  tx.flag(:read_only)
65
- widget = tx.create_widget(page, Rect: [310, 445, 500, 465])
66
- canvas.text("Product", at: [90, 420])
77
+ widget = tx.create_widget(page, Rect: [310, 385, 500, 405])
78
+ canvas.text("Product", at: [90, 360])
67
79
  tx = form.create_text_field("product", font_size: 16)
68
80
  tx.set_format_action(:number, decimals: 2)
69
81
  tx.set_calculate_action(:product, fields: ['a', 'b', 'c'])
70
82
  tx.flag(:read_only)
71
- widget = tx.create_widget(page, Rect: [310, 415, 500, 435])
72
- canvas.text("Minimum", at: [90, 390])
83
+ widget = tx.create_widget(page, Rect: [310, 355, 500, 375])
84
+ canvas.text("Minimum", at: [90, 330])
73
85
  tx = form.create_text_field("min", font_size: 16)
74
86
  tx.set_format_action(:number, decimals: 2)
75
87
  tx.set_calculate_action(:min, fields: ['a', 'b', 'c'])
76
88
  tx.flag(:read_only)
77
- widget = tx.create_widget(page, Rect: [310, 385, 500, 405])
78
- canvas.text("Maximum", at: [90, 360])
89
+ widget = tx.create_widget(page, Rect: [310, 325, 500, 345])
90
+ canvas.text("Maximum", at: [90, 300])
79
91
  tx = form.create_text_field("max", font_size: 16)
80
92
  tx.set_format_action(:number, decimals: 2)
81
93
  tx.set_calculate_action(:max, fields: ['a', 'b', 'c'])
82
94
  tx.flag(:read_only)
83
- widget = tx.create_widget(page, Rect: [310, 355, 500, 375])
95
+ widget = tx.create_widget(page, Rect: [310, 295, 500, 315])
84
96
 
85
- canvas.text("Simplified Field Notation", at: [70, 330])
86
- canvas.text("a + b + c", at: [90, 300])
97
+ canvas.text("Simplified Field Notation", at: [70, 270])
98
+ canvas.text("a + b + c", at: [90, 240])
87
99
  tx = form.create_text_field("sfn1", font_size: 16)
88
100
  tx.set_format_action(:number, decimals: 2)
89
101
  tx.set_calculate_action(:sfn, fields: "a + b + c")
90
102
  tx.flag(:read_only)
91
- widget = tx.create_widget(page, Rect: [310, 295, 500, 315])
92
- canvas.text("(a + b)*(c - a) / b + 3.14", at: [90, 270])
103
+ widget = tx.create_widget(page, Rect: [310, 235, 500, 255])
104
+ canvas.text("(a + b)*(c - a) / b + 3.14", at: [90, 210])
93
105
  tx = form.create_text_field("sfn2", font_size: 16)
94
106
  tx.set_format_action(:number, decimals: 2)
95
107
  tx.set_calculate_action(:sfn, fields: "(a + b)*(c - a) / b + 3.14")
96
108
  tx.flag(:read_only)
97
- widget = tx.create_widget(page, Rect: [310, 265, 500, 285])
109
+ widget = tx.create_widget(page, Rect: [310, 205, 500, 225])
98
110
 
99
111
  form.recalculate_fields
100
112
 
@@ -53,7 +53,7 @@ module HexaPDF
53
53
  module Extensions #:nodoc:
54
54
  def help_banner #:nodoc:
55
55
  "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
56
- "Copyright (c) 2014-2023 Thomas Leitner; licensed under the AGPLv3\n\n" \
56
+ "Copyright (c) 2014-2024 Thomas Leitner; licensed under the AGPLv3\n\n" \
57
57
  "#{format(usage, indent: 7)}\n\n"
58
58
  end
59
59
 
@@ -167,12 +167,12 @@ module HexaPDF
167
167
  end
168
168
  end
169
169
 
170
- # Checks whether the given output file exists and raises an error if it does and
171
- # HexaPDF::CLI#force is not set.
170
+ # Checks whether the given output file exists and ask whether to overwrite the output file if
171
+ # it does. If HexaPDF::CLI#force is set, a possibly existing output file is always overwritten.
172
172
  def maybe_raise_on_existing_file(filename)
173
173
  if !command_parser.force && File.exist?(filename)
174
- raise Error, "Output file '#{filename}' already exists, not overwriting. Use --force to " \
175
- "force writing"
174
+ response = read_from_console("Output file '#{filename}' already exists - overwrite? (y/n)")
175
+ exit(1) unless response =~ /y/i
176
176
  end
177
177
  end
178
178
 
@@ -377,9 +377,9 @@ module HexaPDF
377
377
  # console.
378
378
  def read_password(prompt = "Password")
379
379
  if $stdin.tty?
380
- read_from_console(prompt)
380
+ read_from_console(prompt, noecho: true)
381
381
  else
382
- ($stdin.gets || read_from_console(prompt)).chomp
382
+ ($stdin.gets || read_from_console(prompt, noecho: true)).chomp
383
383
  end
384
384
  end
385
385
 
@@ -407,11 +407,14 @@ module HexaPDF
407
407
  private
408
408
 
409
409
  # Displays the given prompt, reads from the console without echo and returns the read string.
410
- def read_from_console(prompt)
410
+ def read_from_console(prompt, noecho: false)
411
411
  IO.console.write("#{prompt}: ")
412
- str = IO.console.noecho {|io| io.gets.chomp }
413
- puts
414
- str
412
+ if noecho
413
+ IO.console.noecho {|io| io.gets.chomp }
414
+ puts
415
+ else
416
+ IO.console.gets.chomp
417
+ end
415
418
  end
416
419
 
417
420
  end
@@ -39,19 +39,29 @@ require 'hexapdf/cli/command'
39
39
  module HexaPDF
40
40
  module CLI
41
41
 
42
- # Lists or extracts embedded files from a PDF file.
42
+ # Lists or extracts embedded files from a PDF file or attaches them.
43
43
  #
44
44
  # See: HexaPDF::Type::EmbeddedFile
45
45
  class Files < Command
46
46
 
47
47
  def initialize #:nodoc:
48
48
  super('files', takes_commands: false)
49
- short_desc("List or extract embedded files from a PDF file")
49
+ short_desc("List and extract embedded files from a PDF or attach files")
50
50
  long_desc(<<~EOF)
51
- If the option --extract is not given, the available files are listed with their names and
52
- indices. The --extract option can then be used to extract one or more files.
51
+ If neither the option --attach nor the option --extract is given, the available
52
+ files are listed with their names and indices. The --extract option can then be
53
+ used to extract one or more files. Or the --attach option can be used to attach
54
+ files to the PDF.
53
55
  EOF
54
56
 
57
+ options.on("--attach FILE", "-a FILE", String,
58
+ "The file that should be attached. Can be used multiple times.") do |file|
59
+ @attach_files << [file, nil]
60
+ end
61
+ options.on("--description DESC", "-d DESC", String,
62
+ "Adds a description to the last file to be attached.") do |description|
63
+ @attach_files[-1][1] = description
64
+ end
55
65
  options.on("--extract [a,b,c,...]", "-e [a,b,c,...]", Array,
56
66
  "The indices of the files that should be extracted. Use 0 or no argument to " \
57
67
  "extract all files.") do |indices|
@@ -66,15 +76,24 @@ module HexaPDF
66
76
  @password = (pwd == '-' ? read_password : pwd)
67
77
  end
68
78
 
79
+ @attach_files = []
69
80
  @indices = []
70
81
  @password = nil
71
82
  @search = false
72
83
  end
73
84
 
74
- def execute(pdf) #:nodoc:
75
- with_document(pdf, password: @password) do |doc|
76
- if @indices.empty?
85
+ def execute(pdf, output = nil) #:nodoc:
86
+ if @indices.empty? && !@attach_files.empty?
87
+ raise Error, "Missing output file" unless output
88
+ maybe_raise_on_existing_file(output)
89
+ end
90
+ with_document(pdf, password: @password, out_file: output) do |doc|
91
+ if @indices.empty? && @attach_files.empty?
77
92
  list_files(doc)
93
+ elsif !@indices.empty? && !@attach_files.empty?
94
+ raise Error, "Use either --attach or --extract but not both"
95
+ elsif !@attach_files.empty?
96
+ attach_files(doc)
78
97
  else
79
98
  extract_files(doc)
80
99
  end
@@ -116,6 +135,11 @@ module HexaPDF
116
135
  end
117
136
  end
118
137
 
138
+ # Attaches the files given on the CLI to the document.
139
+ def attach_files(doc)
140
+ @attach_files.each {|file, desc| doc.files.add(file, description: desc) }
141
+ end
142
+
119
143
  # Iterates over all embedded files.
120
144
  def each_file(doc, &block) # :yields: obj, index
121
145
  doc.files.each(search: @search).select(&:embedded_file?).each_with_index(&block)
@@ -195,6 +195,7 @@ module HexaPDF
195
195
  # Fills out the form by interactively asking the user for field values.
196
196
  def fill_form(doc)
197
197
  current_page_index = -1
198
+ form = doc.acro_form
198
199
  each_field(doc) do |_page, page_index, field, _widget|
199
200
  next if field.flagged?(:read_only) && !@fill_read_only_fields
200
201
  if current_page_index != page_index
@@ -224,9 +225,9 @@ module HexaPDF
224
225
  print " └─ New value: "
225
226
  value = $stdin.readline.chomp
226
227
  next if value.empty?
227
- apply_field_value(field, value)
228
+ form.fill(field.full_field_name => value)
228
229
  rescue HexaPDF::Error => e
229
- puts " ⚠ #{e.message}"
230
+ puts " ⚠ Error while setting '#{field.full_field_name}': #{e.message}"
230
231
  retry
231
232
  end
232
233
  end
@@ -234,18 +235,20 @@ module HexaPDF
234
235
 
235
236
  # Fills out the form using the data from the provided template file.
236
237
  def fill_form_with_template(doc)
237
- data = parse_template
238
238
  form = doc.acro_form
239
- data.each do |name, value|
239
+ data = parse_template
240
+ data.reject! do |name, _value|
240
241
  field = form.field_by_name(name)
241
242
  raise Error, "Field '#{name}' not found in input PDF" unless field
242
243
  if field.flagged?(:read_only) && !@fill_read_only_fields
243
244
  puts "Ignoring field '#{name}' because it is read only and --fill-read-only-fields " \
244
- "is no set"
245
- next
245
+ "is not set"
246
+ true
247
+ else
248
+ false
246
249
  end
247
- apply_field_value(field, value)
248
250
  end
251
+ form.fill(data)
249
252
  end
250
253
 
251
254
  # Parses the data from the given template file.
@@ -273,30 +276,6 @@ module HexaPDF
273
276
  data
274
277
  end
275
278
 
276
- # Applies the given value to the field.
277
- def apply_field_value(field, value)
278
- case field.concrete_field_type
279
- when :single_line_text_field, :multiline_text_field, :comb_text_field, :file_select_field,
280
- :combo_box, :list_box, :editable_combo_box
281
- field.field_value = value
282
- when :check_box
283
- field.field_value = case value
284
- when /y(es)?|t(rue)?/
285
- true
286
- when /n(o)?|f(alse)?/
287
- false
288
- else
289
- value
290
- end
291
- when :radio_button
292
- field.field_value = value.to_sym
293
- else
294
- raise Error, "Field type #{field.concrete_field_type} not yet supported"
295
- end
296
- rescue StandardError
297
- raise Error, "Error while setting '#{field.full_field_name}': #{$!.message}"
298
- end
299
-
300
279
  # Iterates over all non-push button fields in page order. If a field appears on multiple
301
280
  # pages, it is only yielded on the first page if +with_seen+ is +false.
302
281
  def each_field(doc, with_seen: false) # :yields: page, page_index, field
@@ -270,7 +270,7 @@ module HexaPDF
270
270
  if (rev_index = data.shift)
271
271
  rev_index = rev_index.to_i - 1
272
272
  if rev_index < 0 || rev_index >= @doc.revisions.count
273
- $stderr.puts("Error: Invalid revision numer specified")
273
+ $stderr.puts("Error: Invalid revision number specified")
274
274
  next
275
275
  end
276
276
  length = 0
@@ -0,0 +1,215 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2024 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'hexapdf/cli/command'
38
+
39
+ module HexaPDF
40
+ module CLI
41
+
42
+ # Shows the space usage of various parts of a PDF file.
43
+ class Usage < Command
44
+
45
+ # Modifies the HexaPDF::PDFData class to store the size information
46
+ module PDFDataExtension
47
+
48
+ # Used to store the size of the indirect object.
49
+ attr_accessor :size
50
+
51
+ # Used to store the size of the object inside the object stream.
52
+ attr_accessor :size_in_object_stream
53
+
54
+ end
55
+
56
+ # Modifies HexaPDF::Parser to retrieve space used by indirect objects.
57
+ module ParserExtension
58
+
59
+ # :nodoc:
60
+ def initialize(*)
61
+ super
62
+ @last_size = nil
63
+ end
64
+
65
+ # :nodoc:
66
+ def load_object(xref_entry)
67
+ super.tap do |obj|
68
+ if xref_entry.type == :compressed
69
+ obj.data.size_in_object_stream = @last_size
70
+ elsif xref_entry.type == :in_use
71
+ obj.data.size = @last_size
72
+ end
73
+ @last_size = nil
74
+ end
75
+ end
76
+
77
+ # :nodoc:
78
+ def parse_indirect_object(offset = nil)
79
+ real_offset = (offset ? @header_offset + offset : @tokenizer.pos)
80
+ result = super
81
+ @last_size = @tokenizer.pos - real_offset
82
+ result
83
+ end
84
+
85
+ # :nodoc:
86
+ def load_compressed_object(xref_entry)
87
+ result = super
88
+ offsets = @object_stream_data[xref_entry.objstm].instance_variable_get(:@offsets)
89
+ @last_size = if xref_entry.pos == offsets.size - 1
90
+ @object_stream_data[xref_entry.objstm].instance_variable_get(:@tokenizer).
91
+ io.size - offsets[xref_entry.pos]
92
+ else
93
+ offsets[xref_entry.pos + 1] - offsets[xref_entry.pos]
94
+ end
95
+ result
96
+ end
97
+
98
+ end
99
+
100
+ def initialize #:nodoc:
101
+ super('usage', takes_commands: false)
102
+ short_desc("Show space usage of various parts of a PDF file")
103
+ long_desc(<<~EOF)
104
+ This command displays some usage statistics of the PDF file, i.e. which parts take which
105
+ approximate space in the file.
106
+
107
+ Each statistic line shows the space used followed by the number of indirect objects in
108
+ parentheses. If some of those objects are in object streams, that number is displayed
109
+ after a slash.
110
+ EOF
111
+
112
+ options.on("--password PASSWORD", "-p", String,
113
+ "The password for decryption. Use - for reading from standard input.") do |pwd|
114
+ @password = (pwd == '-' ? read_password : pwd)
115
+ end
116
+
117
+ @password = nil
118
+ end
119
+
120
+ def execute(file) #:nodoc:
121
+ HexaPDF::Parser.prepend(ParserExtension)
122
+ HexaPDF::PDFData.prepend(PDFDataExtension)
123
+
124
+ with_document(file, password: @password) do |doc|
125
+ # Prepare cache of outline items
126
+ outline_item_cache = {}
127
+ if doc.catalog.key?(:Outlines)
128
+ doc.outline.each_item {|item| outline_item_cache[item] = true }
129
+ outline_item_cache[doc.outline] = true
130
+ end
131
+
132
+ doc.revisions.each.with_index do |rev, index|
133
+ sum = count = 0
134
+ categories = {
135
+ Content: [],
136
+ Files: [],
137
+ Fonts: [],
138
+ Images: [],
139
+ Metadata: [],
140
+ ObjectStreams: [],
141
+ Outline: [],
142
+ XObjects: [],
143
+ }
144
+ puts if index > 0
145
+ puts "Usage information for revision #{index + 1}" if doc.revisions.count > 1
146
+ rev.each do |obj|
147
+ if command_parser.verbosity_info?
148
+ print "(#{obj.oid},#{obj.gen}): #{obj.data.size.to_i}"
149
+ print " (#{obj.data.size_in_object_stream})" if obj.data.size.nil?
150
+ puts
151
+ end
152
+ next unless obj.kind_of?(HexaPDF::Dictionary)
153
+
154
+ case obj.type
155
+ when :Page
156
+ Array(obj[:Contents]).each do |content|
157
+ categories[:Content] << content if object_in_rev?(content, rev)
158
+ end
159
+ when :Font
160
+ categories[:Fonts] << obj
161
+ when :FontDescriptor
162
+ categories[:Fonts] << obj
163
+ [:FontFile, :FontFile2, :FontFile3].each do |name|
164
+ categories[:Fonts] << obj[name] if object_in_rev?(obj[name], rev)
165
+ end
166
+ when :Metadata
167
+ categories[:Metadata] << obj
168
+ when :Filespec
169
+ categories[:Files] << obj
170
+ categories[:Files] << obj.embedded_file_stream if obj.embedded_file?
171
+ when :ObjStm
172
+ categories[:ObjectStreams] << obj
173
+ else
174
+ if obj[:Subtype] == :Image
175
+ categories[:Images] << obj
176
+ elsif obj[:Subtype] == :Form
177
+ categories[:XObjects] << obj
178
+ end
179
+ end
180
+ sum += obj.data.size if obj.data.size
181
+ count += 1
182
+ end
183
+
184
+ # Populate Outline category
185
+ outline_item_cache.reject! do |obj, _val|
186
+ object_in_rev?(obj, rev) && categories[:Outline] << obj
187
+ end
188
+
189
+ categories.each do |name, data|
190
+ next if data.empty?
191
+ object_stream_count = 0
192
+ category_sum = data.sum do |o|
193
+ object_stream_count += 1 unless o.data.size
194
+ o.data.size.to_i
195
+ end
196
+ object_stream_count = object_stream_count > 0 ? "/#{object_stream_count}" : ''
197
+ size = human_readable_file_size(category_sum)
198
+ puts "#{name.to_s.ljust(15)} #{size.rjust(8)} (#{data.count}#{object_stream_count})"
199
+ end
200
+ puts "#{'Total'.ljust(15)} #{human_readable_file_size(sum).rjust(8)} (#{count})"
201
+ end
202
+ end
203
+ end
204
+
205
+ private
206
+
207
+ # Returns +true+ if the +obj+ is in the given +rev+.
208
+ def object_in_rev?(obj, rev)
209
+ obj && rev.object(obj) == obj
210
+ end
211
+
212
+ end
213
+
214
+ end
215
+ end
data/lib/hexapdf/cli.rb CHANGED
@@ -48,6 +48,7 @@ require 'hexapdf/cli/watermark'
48
48
  require 'hexapdf/cli/image2pdf'
49
49
  require 'hexapdf/cli/form'
50
50
  require 'hexapdf/cli/fonts'
51
+ require 'hexapdf/cli/usage'
51
52
  require 'hexapdf/version'
52
53
  require 'hexapdf/document'
53
54
 
@@ -107,6 +108,7 @@ module HexaPDF
107
108
  add_command(HexaPDF::CLI::Image2PDF.new)
108
109
  add_command(HexaPDF::CLI::Form.new)
109
110
  add_command(HexaPDF::CLI::Fonts.new)
111
+ add_command(HexaPDF::CLI::Usage.new)
110
112
  add_command(CmdParse::HelpCommand.new)
111
113
  version_command = CmdParse::VersionCommand.new(add_switches: false)
112
114
  add_command(version_command)
@@ -481,7 +481,7 @@ module HexaPDF
481
481
  'acro_form.fallback_font' => 'Helvetica',
482
482
  'acro_form.on_invalid_value' => proc do |field, value|
483
483
  raise HexaPDF::Error, "Invalid value #{value.inspect} for " \
484
- "#{field.concrete_field_type} field #{field.full_field_name}"
484
+ "#{field.concrete_field_type} field named '#{field.full_field_name}'"
485
485
  end,
486
486
  'acro_form.text_field.default_width' => 100,
487
487
  'debug' => false,
@@ -228,9 +228,9 @@ module HexaPDF
228
228
  value.empty?
229
229
  end
230
230
 
231
- # Returns a dup of the underlying hash.
232
- def to_h
233
- value.dup
231
+ # Returns a hash containing the preprocessed values (like in #[]).
232
+ def to_hash
233
+ value.each_with_object({}) {|(k, _), h| h[k] = self[k] }
234
234
  end
235
235
 
236
236
  private