hexapdf 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +68 -0
  3. data/CONTRIBUTERS +1 -1
  4. data/README.md +35 -4
  5. data/Rakefile +1 -0
  6. data/VERSION +1 -1
  7. data/data/hexapdf/cmap/83pv-RKSJ-H +314 -0
  8. data/data/hexapdf/cmap/90ms-RKSJ-H +259 -0
  9. data/data/hexapdf/cmap/90ms-RKSJ-V +156 -0
  10. data/data/hexapdf/cmap/90msp-RKSJ-H +257 -0
  11. data/data/hexapdf/cmap/90msp-RKSJ-V +155 -0
  12. data/data/hexapdf/cmap/90pv-RKSJ-H +355 -0
  13. data/data/hexapdf/cmap/Add-RKSJ-H +738 -0
  14. data/data/hexapdf/cmap/Add-RKSJ-V +135 -0
  15. data/data/hexapdf/cmap/Adobe-CNS1-UCS2 +18209 -0
  16. data/data/hexapdf/cmap/Adobe-GB1-UCS2 +14267 -0
  17. data/data/hexapdf/cmap/Adobe-Japan1-UCS2 +19159 -0
  18. data/data/hexapdf/cmap/Adobe-Korea1-UCS2 +9267 -0
  19. data/data/hexapdf/cmap/B5pc-H +337 -0
  20. data/data/hexapdf/cmap/B5pc-V +90 -0
  21. data/data/hexapdf/cmap/CNS-EUC-H +490 -0
  22. data/data/hexapdf/cmap/CNS-EUC-V +538 -0
  23. data/data/hexapdf/cmap/ETen-B5-H +343 -0
  24. data/data/hexapdf/cmap/ETen-B5-V +91 -0
  25. data/data/hexapdf/cmap/ETenms-B5-H +79 -0
  26. data/data/hexapdf/cmap/ETenms-B5-V +99 -0
  27. data/data/hexapdf/cmap/EUC-H +207 -0
  28. data/data/hexapdf/cmap/EUC-V +105 -0
  29. data/data/hexapdf/cmap/Ext-RKSJ-H +768 -0
  30. data/data/hexapdf/cmap/Ext-RKSJ-V +117 -0
  31. data/data/hexapdf/cmap/GB-EUC-H +173 -0
  32. data/data/hexapdf/cmap/GB-EUC-V +98 -0
  33. data/data/hexapdf/cmap/GBK-EUC-H +4273 -0
  34. data/data/hexapdf/cmap/GBK-EUC-V +97 -0
  35. data/data/hexapdf/cmap/GBK2K-H +5325 -0
  36. data/data/hexapdf/cmap/GBK2K-V +118 -0
  37. data/data/hexapdf/cmap/GBKp-EUC-H +4272 -0
  38. data/data/hexapdf/cmap/GBKp-EUC-V +97 -0
  39. data/data/hexapdf/cmap/GBpc-EUC-H +175 -0
  40. data/data/hexapdf/cmap/GBpc-EUC-V +98 -0
  41. data/data/hexapdf/cmap/H +200 -0
  42. data/data/hexapdf/cmap/HKscs-B5-H +1331 -0
  43. data/data/hexapdf/cmap/HKscs-B5-V +90 -0
  44. data/data/hexapdf/cmap/Identity-H +339 -0
  45. data/data/hexapdf/cmap/Identity-V +73 -0
  46. data/data/hexapdf/cmap/KSC-EUC-H +562 -0
  47. data/data/hexapdf/cmap/KSC-EUC-V +94 -0
  48. data/data/hexapdf/cmap/KSCms-UHC-H +776 -0
  49. data/data/hexapdf/cmap/KSCms-UHC-HW-H +775 -0
  50. data/data/hexapdf/cmap/KSCms-UHC-HW-V +93 -0
  51. data/data/hexapdf/cmap/KSCms-UHC-V +94 -0
  52. data/data/hexapdf/cmap/KSCpc-EUC-H +608 -0
  53. data/data/hexapdf/cmap/LICENSE.txt +26 -0
  54. data/data/hexapdf/cmap/README.txt +9 -0
  55. data/data/hexapdf/cmap/UniCNS-UCS2-H +16992 -0
  56. data/data/hexapdf/cmap/UniCNS-UCS2-V +90 -0
  57. data/data/hexapdf/cmap/UniCNS-UTF16-H +19117 -0
  58. data/data/hexapdf/cmap/UniCNS-UTF16-V +94 -0
  59. data/data/hexapdf/cmap/UniGB-UCS2-H +14321 -0
  60. data/data/hexapdf/cmap/UniGB-UCS2-V +101 -0
  61. data/data/hexapdf/cmap/UniGB-UTF16-H +14381 -0
  62. data/data/hexapdf/cmap/UniGB-UTF16-V +104 -0
  63. data/data/hexapdf/cmap/UniJIS-UCS2-H +8870 -0
  64. data/data/hexapdf/cmap/UniJIS-UCS2-HW-H +81 -0
  65. data/data/hexapdf/cmap/UniJIS-UCS2-HW-V +279 -0
  66. data/data/hexapdf/cmap/UniJIS-UCS2-V +275 -0
  67. data/data/hexapdf/cmap/UniJIS-UTF16-H +14450 -0
  68. data/data/hexapdf/cmap/UniJIS-UTF16-V +299 -0
  69. data/data/hexapdf/cmap/UniKS-UCS2-H +8725 -0
  70. data/data/hexapdf/cmap/UniKS-UCS2-V +95 -0
  71. data/data/hexapdf/cmap/UniKS-UTF16-H +8895 -0
  72. data/data/hexapdf/cmap/UniKS-UTF16-V +99 -0
  73. data/data/hexapdf/cmap/V +105 -0
  74. data/examples/arc.rb +3 -3
  75. data/examples/merging.rb +4 -1
  76. data/examples/optimizing.rb +3 -0
  77. data/examples/show_char_bboxes.rb +2 -2
  78. data/examples/truetype.rb +2 -2
  79. data/lib/hexapdf/cli.rb +40 -1
  80. data/lib/hexapdf/cli/batch.rb +72 -0
  81. data/lib/hexapdf/cli/command.rb +112 -15
  82. data/lib/hexapdf/cli/files.rb +2 -2
  83. data/lib/hexapdf/cli/images.rb +14 -6
  84. data/lib/hexapdf/cli/info.rb +6 -8
  85. data/lib/hexapdf/cli/inspect.rb +5 -8
  86. data/lib/hexapdf/cli/merge.rb +13 -20
  87. data/lib/hexapdf/cli/modify.rb +4 -7
  88. data/lib/hexapdf/cli/optimize.rb +2 -5
  89. data/lib/hexapdf/configuration.rb +32 -3
  90. data/lib/hexapdf/content/canvas.rb +130 -37
  91. data/lib/hexapdf/content/parser.rb +40 -6
  92. data/lib/hexapdf/content/processor.rb +4 -4
  93. data/lib/hexapdf/document.rb +40 -10
  94. data/lib/hexapdf/document/fonts.rb +1 -0
  95. data/lib/hexapdf/encryption/security_handler.rb +8 -12
  96. data/lib/hexapdf/filter/flate_decode.rb +25 -2
  97. data/lib/hexapdf/font/cmap.rb +124 -8
  98. data/lib/hexapdf/font/cmap/parser.rb +65 -15
  99. data/lib/hexapdf/font/encoding/base.rb +2 -2
  100. data/lib/hexapdf/font/encoding/glyph_list.rb +2 -4
  101. data/lib/hexapdf/font/true_type.rb +1 -0
  102. data/lib/hexapdf/font/true_type/builder.rb +75 -0
  103. data/lib/hexapdf/font/true_type/optimizer.rb +65 -0
  104. data/lib/hexapdf/font/true_type/subsetter.rb +9 -22
  105. data/lib/hexapdf/font/true_type_wrapper.rb +9 -21
  106. data/lib/hexapdf/font_loader.rb +1 -1
  107. data/lib/hexapdf/importer.rb +1 -1
  108. data/lib/hexapdf/serializer.rb +5 -3
  109. data/lib/hexapdf/type.rb +2 -0
  110. data/lib/hexapdf/type/cid_font.rb +120 -0
  111. data/lib/hexapdf/type/font.rb +32 -12
  112. data/lib/hexapdf/type/font_simple.rb +34 -42
  113. data/lib/hexapdf/type/font_type0.rb +148 -0
  114. data/lib/hexapdf/type/form.rb +4 -4
  115. data/lib/hexapdf/type/page.rb +12 -11
  116. data/lib/hexapdf/type/resources.rb +14 -0
  117. data/lib/hexapdf/utils/graphics_helpers.rb +77 -0
  118. data/lib/hexapdf/version.rb +1 -1
  119. data/man/man1/hexapdf.1 +43 -1
  120. data/test/hexapdf/content/test_canvas.rb +76 -0
  121. data/test/hexapdf/content/test_parser.rb +20 -1
  122. data/test/hexapdf/content/test_processor.rb +11 -7
  123. data/test/hexapdf/document/test_fonts.rb +3 -1
  124. data/test/hexapdf/font/cmap/test_parser.rb +42 -7
  125. data/test/hexapdf/font/encoding/test_base.rb +1 -1
  126. data/test/hexapdf/font/encoding/test_glyph_list.rb +3 -3
  127. data/test/hexapdf/font/test_cmap.rb +104 -0
  128. data/test/hexapdf/font/test_true_type_wrapper.rb +63 -46
  129. data/test/hexapdf/font/true_type/test_builder.rb +37 -0
  130. data/test/hexapdf/font/true_type/test_optimizer.rb +27 -0
  131. data/test/hexapdf/font/true_type/test_subsetter.rb +6 -13
  132. data/test/hexapdf/test_configuration.rb +12 -7
  133. data/test/hexapdf/test_document.rb +24 -0
  134. data/test/hexapdf/test_importer.rb +9 -1
  135. data/test/hexapdf/test_writer.rb +2 -2
  136. data/test/hexapdf/type/test_cid_font.rb +61 -0
  137. data/test/hexapdf/type/test_font.rb +31 -4
  138. data/test/hexapdf/type/test_font_simple.rb +6 -21
  139. data/test/hexapdf/type/test_font_type0.rb +114 -0
  140. data/test/hexapdf/type/test_resources.rb +17 -1
  141. data/test/hexapdf/utils/test_graphics_helpers.rb +29 -0
  142. metadata +82 -3
@@ -0,0 +1,99 @@
1
+ %!PS-Adobe-3.0 Resource-CMap
2
+ %%DocumentNeededResources: ProcSet (CIDInit)
3
+ %%DocumentNeededResources: CMap (UniKS-UTF16-H)
4
+ %%IncludeResource: ProcSet (CIDInit)
5
+ %%IncludeResource: CMap (UniKS-UTF16-H)
6
+ %%BeginResource: CMap (UniKS-UTF16-V)
7
+ %%Title: (UniKS-UTF16-V Adobe Korea1 1)
8
+ %%Version: 1.004
9
+ %%Copyright: -----------------------------------------------------------
10
+ %%Copyright: Copyright 1990-2015 Adobe Systems Incorporated.
11
+ %%Copyright: All rights reserved.
12
+ %%Copyright:
13
+ %%Copyright: Redistribution and use in source and binary forms, with or
14
+ %%Copyright: without modification, are permitted provided that the
15
+ %%Copyright: following conditions are met:
16
+ %%Copyright:
17
+ %%Copyright: Redistributions of source code must retain the above
18
+ %%Copyright: copyright notice, this list of conditions and the following
19
+ %%Copyright: disclaimer.
20
+ %%Copyright:
21
+ %%Copyright: Redistributions in binary form must reproduce the above
22
+ %%Copyright: copyright notice, this list of conditions and the following
23
+ %%Copyright: disclaimer in the documentation and/or other materials
24
+ %%Copyright: provided with the distribution.
25
+ %%Copyright:
26
+ %%Copyright: Neither the name of Adobe Systems Incorporated nor the names
27
+ %%Copyright: of its contributors may be used to endorse or promote
28
+ %%Copyright: products derived from this software without specific prior
29
+ %%Copyright: written permission.
30
+ %%Copyright:
31
+ %%Copyright: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
32
+ %%Copyright: CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
33
+ %%Copyright: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
34
+ %%Copyright: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35
+ %%Copyright: DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
36
+ %%Copyright: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
37
+ %%Copyright: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38
+ %%Copyright: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39
+ %%Copyright: LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40
+ %%Copyright: HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41
+ %%Copyright: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
42
+ %%Copyright: OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43
+ %%Copyright: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
+ %%Copyright: -----------------------------------------------------------
45
+ %%EndComments
46
+
47
+ /CIDInit /ProcSet findresource begin
48
+
49
+ 12 dict begin
50
+
51
+ begincmap
52
+
53
+ /UniKS-UTF16-H usecmap
54
+
55
+ /CIDSystemInfo 3 dict dup begin
56
+ /Registry (Adobe) def
57
+ /Ordering (Korea1) def
58
+ /Supplement 1 def
59
+ end def
60
+
61
+ /CMapName /UniKS-UTF16-V def
62
+ /CMapVersion 1.004 def
63
+ /CMapType 1 def
64
+
65
+ /XUID [1 10 25545] def
66
+
67
+ /WMode 1 def
68
+
69
+ 11 begincidchar
70
+ <2016> 8061
71
+ <2025> 8058
72
+ <3013> 8075
73
+ <ff01> 8076
74
+ <ff0c> 8079
75
+ <ff0e> 8080
76
+ <ff3b> 8087
77
+ <ff3d> 8088
78
+ <ff3f> 8089
79
+ <ff5e> 8062
80
+ <ffe3> 8093
81
+ endcidchar
82
+
83
+ 7 begincidrange
84
+ <2013> <2014> 8059
85
+ <3001> <3002> 8056
86
+ <3008> <3011> 8065
87
+ <3014> <3015> 8063
88
+ <ff08> <ff09> 8077
89
+ <ff1a> <ff1f> 8081
90
+ <ff5b> <ff5d> 8090
91
+ endcidrange
92
+
93
+ endcmap
94
+ CMapName currentdict /CMap defineresource pop
95
+ end
96
+ end
97
+
98
+ %%EndResource
99
+ %%EOF
@@ -0,0 +1,105 @@
1
+ %!PS-Adobe-3.0 Resource-CMap
2
+ %%DocumentNeededResources: ProcSet (CIDInit)
3
+ %%DocumentNeededResources: CMap (H)
4
+ %%IncludeResource: ProcSet (CIDInit)
5
+ %%IncludeResource: CMap (H)
6
+ %%BeginResource: CMap (V)
7
+ %%Title: (V Adobe Japan1 1)
8
+ %%Version: 12.004
9
+ %%Copyright: -----------------------------------------------------------
10
+ %%Copyright: Copyright 1990-2015 Adobe Systems Incorporated.
11
+ %%Copyright: All rights reserved.
12
+ %%Copyright:
13
+ %%Copyright: Redistribution and use in source and binary forms, with or
14
+ %%Copyright: without modification, are permitted provided that the
15
+ %%Copyright: following conditions are met:
16
+ %%Copyright:
17
+ %%Copyright: Redistributions of source code must retain the above
18
+ %%Copyright: copyright notice, this list of conditions and the following
19
+ %%Copyright: disclaimer.
20
+ %%Copyright:
21
+ %%Copyright: Redistributions in binary form must reproduce the above
22
+ %%Copyright: copyright notice, this list of conditions and the following
23
+ %%Copyright: disclaimer in the documentation and/or other materials
24
+ %%Copyright: provided with the distribution.
25
+ %%Copyright:
26
+ %%Copyright: Neither the name of Adobe Systems Incorporated nor the names
27
+ %%Copyright: of its contributors may be used to endorse or promote
28
+ %%Copyright: products derived from this software without specific prior
29
+ %%Copyright: written permission.
30
+ %%Copyright:
31
+ %%Copyright: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
32
+ %%Copyright: CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
33
+ %%Copyright: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
34
+ %%Copyright: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35
+ %%Copyright: DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
36
+ %%Copyright: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
37
+ %%Copyright: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38
+ %%Copyright: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39
+ %%Copyright: LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40
+ %%Copyright: HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41
+ %%Copyright: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
42
+ %%Copyright: OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43
+ %%Copyright: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
+ %%Copyright: -----------------------------------------------------------
45
+ %%EndComments
46
+
47
+ /CIDInit /ProcSet findresource begin
48
+
49
+ 12 dict begin
50
+
51
+ begincmap
52
+
53
+ /H usecmap
54
+
55
+ /CIDSystemInfo 3 dict dup begin
56
+ /Registry (Adobe) def
57
+ /Ordering (Japan1) def
58
+ /Supplement 1 def
59
+ end def
60
+
61
+ /CMapName /V def
62
+ /CMapVersion 12.004 def
63
+ /CMapType 1 def
64
+
65
+ /UIDOffset 850 def
66
+ /XUID [1 10 25340] def
67
+
68
+ /WMode 1 def
69
+
70
+ 27 begincidrange
71
+ <2122> <2123> 7887
72
+ <2131> <2132> 7889
73
+ <213c> <213e> 7891
74
+ <2141> <2145> 7894
75
+ <214a> <215b> 7899
76
+ <2161> <2161> 7917
77
+ <2421> <2421> 7918
78
+ <2423> <2423> 7919
79
+ <2425> <2425> 7920
80
+ <2427> <2427> 7921
81
+ <2429> <2429> 7922
82
+ <2443> <2443> 7923
83
+ <2463> <2463> 7924
84
+ <2465> <2465> 7925
85
+ <2467> <2467> 7926
86
+ <246e> <246e> 7927
87
+ <2521> <2521> 7928
88
+ <2523> <2523> 7929
89
+ <2525> <2525> 7930
90
+ <2527> <2527> 7931
91
+ <2529> <2529> 7932
92
+ <2543> <2543> 7933
93
+ <2563> <2563> 7934
94
+ <2565> <2565> 7935
95
+ <2567> <2567> 7936
96
+ <256e> <256e> 7937
97
+ <2575> <2576> 7938
98
+ endcidrange
99
+ endcmap
100
+ CMapName currentdict /CMap defineresource pop
101
+ end
102
+ end
103
+
104
+ %%EndResource
105
+ %%EOF
data/examples/arc.rb CHANGED
@@ -17,7 +17,7 @@ radius = 75
17
17
 
18
18
  # Left pie chart
19
19
  center = [page.box.width * 0.25, page.box.height * 0.85]
20
- pie = canvas.graphic_object(:solid_arc, cx: center[0], cy: center[1],
20
+ pie = canvas.graphic_object(:solid_arc, cx: center[0], cy: center[1],
21
21
  outer_a: radius, outer_b: radius)
22
22
  canvas.fill_color('ddddff')
23
23
  canvas.draw(pie, start_angle: 30, end_angle: 110).fill
@@ -26,7 +26,7 @@ canvas.draw(pie, start_angle: 110, end_angle: 130).fill
26
26
  canvas.fill_color('ddffdd')
27
27
  canvas.draw(pie, start_angle: 130, end_angle: 30).fill
28
28
 
29
- arc = canvas.graphic_object(:arc, cx: center[0], cy: center[1],
29
+ arc = canvas.graphic_object(:arc, cx: center[0], cy: center[1],
30
30
  a: radius, b: radius)
31
31
  canvas.stroke_color('0000ff')
32
32
  canvas.draw(arc, start_angle: 30, end_angle: 110).stroke
@@ -38,7 +38,7 @@ canvas.draw(arc, start_angle: 130, end_angle: 30).stroke
38
38
  # Right pie chart
39
39
  center = [page.box.width * 0.75, page.box.height * 0.85]
40
40
  canvas.stroke_color('777777')
41
- pie = canvas.graphic_object(:solid_arc, cx: center[0], cy: center[1],
41
+ pie = canvas.graphic_object(:solid_arc, cx: center[0], cy: center[1],
42
42
  outer_a: radius, outer_b: radius)
43
43
  canvas.fill_color('ddddff')
44
44
  canvas.draw(pie, start_angle: 30, end_angle: 110).fill_stroke
data/examples/merging.rb CHANGED
@@ -10,6 +10,9 @@
10
10
  # Sometimes other things like attached files or a document outline
11
11
  # should also be preserved.
12
12
  #
13
+ # The hexapdf binary provides a command for merging files which does
14
+ # the merging in a more sophisticated way.
15
+ #
13
16
  # Usage:
14
17
  # : `ruby merging.rb INPUT1.PDF INPUT2.PDF ...`
15
18
  #
@@ -21,4 +24,4 @@ ARGV.each do |file|
21
24
  pdf = HexaPDF::Document.open(file)
22
25
  pdf.pages.each {|page| target.pages << target.import(page)}
23
26
  end
24
- target.write("merging.pdf", optimize: true)
27
+ target.write("2.merging.pdf", optimize: true)
@@ -7,6 +7,9 @@
7
7
  # over which aspects should be optimized. See [HexaPDF::Task::Optimize]
8
8
  # for detailed information.
9
9
  #
10
+ # The hexapdf binary provides an optimization command which does some
11
+ # additional operations like optimizing the page tree.
12
+ #
10
13
  # Usage:
11
14
  # : `ruby optimizing.rb INPUT.PDF`
12
15
  #
@@ -2,12 +2,12 @@
2
2
  #
3
3
  # This examples shows how to process the contents of a page. It finds all
4
4
  # characters on a page and surrounds them with their bounding box. Additionally,
5
- # all consecutive text runs are also surrounded by box.
5
+ # all consecutive text runs are also surrounded by a box.
6
6
  #
7
7
  # The code provides two ways of generating the boxes. The commented part of
8
8
  # `ShowTextProcessor#show_text` uses a polyline since some characters may be
9
9
  # transforemd (rotated or skewed). The un-commented part uses rectangles which
10
- # is faster and correct in most cases.
10
+ # is faster and correct for most but not all cases.
11
11
  #
12
12
  # Usage:
13
13
  # : `ruby show_char_boxes.rb INPUT.PDF`
data/examples/truetype.rb CHANGED
@@ -17,9 +17,9 @@
17
17
  require 'hexapdf'
18
18
 
19
19
  doc = HexaPDF::Document.new
20
- doc.config['font.on_missing_glyph'] = ->(n,f) { f.missing_glyph_id }
20
+ doc.config['font.on_missing_glyph'] = ->(_, f) { f.missing_glyph_id }
21
21
  doc.config['font.map'] = {
22
- 'myfont' => {none: ARGV.shift || File.join(__dir__, '../test/data/fonts/Ubuntu-Title.ttf')}
22
+ 'myfont' => {none: ARGV.shift || File.join(__dir__, '../test/data/fonts/Ubuntu-Title.ttf')},
23
23
  }
24
24
 
25
25
  wrapper = doc.fonts.load('myfont')
data/lib/hexapdf/cli.rb CHANGED
@@ -39,6 +39,7 @@ require 'hexapdf/cli/modify'
39
39
  require 'hexapdf/cli/merge'
40
40
  require 'hexapdf/cli/optimize'
41
41
  require 'hexapdf/cli/images'
42
+ require 'hexapdf/cli/batch'
42
43
  require 'hexapdf/version'
43
44
  require 'hexapdf/document'
44
45
 
@@ -59,14 +60,28 @@ module HexaPDF
59
60
  # The CmdParse::CommandParser class that is used for running the CLI application.
60
61
  class Application < CmdParse::CommandParser
61
62
 
63
+ # Verbosity level for no output
64
+ VERBOSITY_QUIET = 0
65
+
66
+ # Verbosity level for warning output
67
+ VERBOSITY_WARNING = 1
68
+
69
+ # Verbosity level for informational output
70
+ VERBOSITY_INFO = 2
71
+
62
72
  # Specifies whether an operation should be forced. For example, if an existing file should be
63
73
  # overwritten.
64
74
  attr_reader :force
65
75
 
76
+ # Specifies whether strict parsing and validation should be used.
77
+ attr_reader :strict
78
+
66
79
  def initialize #:nodoc:
67
80
  super(handle_exceptions: :no_help)
68
81
  main_command.options.program_name = "hexapdf"
69
82
  main_command.options.version = HexaPDF::VERSION
83
+ main_command.extend(Command::Extensions)
84
+ main_command.define_singleton_method(:usage_commands) { "COMMAND" }
70
85
  add_command(HexaPDF::CLI::Info.new)
71
86
  add_command(HexaPDF::CLI::Files.new)
72
87
  add_command(HexaPDF::CLI::Images.new)
@@ -74,13 +89,37 @@ module HexaPDF
74
89
  add_command(HexaPDF::CLI::Modify.new)
75
90
  add_command(HexaPDF::CLI::Optimize.new)
76
91
  add_command(HexaPDF::CLI::Merge.new)
92
+ add_command(HexaPDF::CLI::Batch.new)
77
93
  add_command(CmdParse::HelpCommand.new)
78
- add_command(CmdParse::VersionCommand.new)
94
+ version_command = CmdParse::VersionCommand.new(add_switches: false)
95
+ add_command(version_command)
96
+ main_options.on_tail("--version", "Show hexapdf version") { version_command.execute }
79
97
 
80
98
  @force = false
99
+ @verbosity = VERBOSITY_WARNING
100
+ @strict = false
81
101
  global_options.on("--[no-]force", "Force overwriting existing files. Default: false") do |f|
82
102
  @force = f
83
103
  end
104
+ global_options.on("--strict", "Enable strict parsing and validation") do |s|
105
+ @strict = s
106
+ end
107
+ global_options.on("--verbose", "-v", "Verbose output") do
108
+ @verbosity += 1
109
+ end
110
+ global_options.on("--quiet", "-q", "Suppress any output") do
111
+ @verbosity = VERBOSITY_QUIET
112
+ end
113
+ end
114
+
115
+ # Returns +true+ if the verbosity level warning is enabled.
116
+ def verbosity_warning?
117
+ @verbosity >= VERBOSITY_WARNING
118
+ end
119
+
120
+ # Returns +true+ if the verbosity level info is enabled.
121
+ def verbosity_info?
122
+ @verbosity >= VERBOSITY_INFO
84
123
  end
85
124
 
86
125
  def parse(argv = ARGV) #:nodoc:
@@ -0,0 +1,72 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/cli/command'
35
+ require 'shellwords'
36
+
37
+ module HexaPDF
38
+ module CLI
39
+
40
+ # Execute the same command for multiple input files.
41
+ class Batch < Command
42
+
43
+ def initialize #:nodoc:
44
+ super('batch', takes_commands: false)
45
+ short_desc("Execute a single command on multiple files")
46
+ long_desc(<<-EOF.gsub!(/^ */, ''))
47
+ This command allows executing a single command for multiple input files, thereby reducing
48
+ the overall execution time.
49
+
50
+ The first argument is used as a hexapdf command line (but without the binary name) and
51
+ the rest as input files. The specified command will be executed for each input file, with
52
+ {} being replaced by the file name.
53
+ EOF
54
+ end
55
+
56
+ def execute(command, *files) #:nodoc:
57
+ args = Shellwords.split(command)
58
+ files.each do |file|
59
+ begin
60
+ HexaPDF::CLI::Application.new.parse(args.map {|a| a.gsub(/{}/, file)})
61
+ rescue
62
+ if command_parser.verbosity_warning?
63
+ $stderr.puts "Error processing '#{file}': #{$!.message}"
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+ end
@@ -35,6 +35,7 @@ require 'io/console'
35
35
  require 'ostruct'
36
36
  require 'cmdparse'
37
37
  require 'hexapdf/document'
38
+ require 'hexapdf/font/true_type'
38
39
 
39
40
  module HexaPDF
40
41
  module CLI
@@ -43,6 +44,16 @@ module HexaPDF
43
44
  # commands.
44
45
  class Command < CmdParse::Command
45
46
 
47
+ module Extensions #:nodoc:
48
+ def help_banner #:nodoc:
49
+ "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
50
+ "Copyright (c) 2014-2017 Thomas Leitner; licensed under the AGPLv3\n\n" <<
51
+ format(usage, indent: 7) << "\n\n"
52
+ end
53
+ end
54
+
55
+ include Extensions
56
+
46
57
  def initialize(*args, &block) #:nodoc:
47
58
  super
48
59
  @out_options = OpenStruct.new
@@ -51,6 +62,7 @@ module HexaPDF
51
62
  @out_options.object_streams = :preserve
52
63
  @out_options.xref_streams = :preserve
53
64
  @out_options.streams = :preserve
65
+ @out_options.optimize_fonts = false
54
66
 
55
67
  @out_options.encryption = :preserve
56
68
  @out_options.enc_user_pwd = @out_options.enc_owner_pwd = nil
@@ -62,6 +74,57 @@ module HexaPDF
62
74
 
63
75
  protected
64
76
 
77
+ # Creates a HexaPDF::Document instance for the PDF file and yields it.
78
+ #
79
+ # If +out_file+ is given, the document is written to it after yielding.
80
+ def with_document(file, password: nil, out_file: nil) #:yield: document
81
+ if file == out_file
82
+ doc = HexaPDF::Document.open(file, pdf_options(password))
83
+ else
84
+ file_io = File.open(file, 'rb')
85
+ doc = HexaPDF::Document.new(io: file_io, **pdf_options(password))
86
+ end
87
+
88
+ yield(doc)
89
+
90
+ write_document(doc, out_file)
91
+ ensure
92
+ file_io&.close
93
+ end
94
+
95
+ # Returns a hash with HexaPDF::Document options based on the given password and the option
96
+ # switches.
97
+ def pdf_options(password)
98
+ hash = {decryption_opts: {password: password}, config: {}}
99
+ hash[:config]['parser.on_correctable_error'] =
100
+ if command_parser.strict
101
+ proc { true }
102
+ else
103
+ proc do |_, msg, pos|
104
+ if command_parser.verbosity_info?
105
+ msg = MalformedPDFError.new(msg, pos: pos).message
106
+ $stderr.puts "Corrected parsing problem: #{msg}"
107
+ end
108
+ false
109
+ end
110
+ end
111
+ hash
112
+ end
113
+
114
+ # Writes the document to the given file or does nothing if +out_file+ is +nil+.
115
+ def write_document(doc, out_file)
116
+ if out_file
117
+ doc.validate(auto_correct: true) do |msg, correctable|
118
+ if command_parser.strict && !correctable
119
+ raise "Validation error: #{msg}"
120
+ elsif command_parser.verbosity_info?
121
+ $stderr.puts "#{correctable ? 'Corrected' : 'Ignored'} validation problem: #{msg}"
122
+ end
123
+ end
124
+ doc.write(out_file, validate: false)
125
+ end
126
+ end
127
+
65
128
  # Checks whether the given output file exists and raises an error if it does and
66
129
  # HexaPDF::CLI#force is not set.
67
130
  def maybe_raise_on_existing_file(filename)
@@ -98,6 +161,10 @@ module HexaPDF
98
161
  "time; default: #{@out_options.compress_pages})") do |c|
99
162
  @out_options.compress_pages = c
100
163
  end
164
+ options.on("--[no-]optimize-fonts", "Optimize embedded font files; " \
165
+ "default: #{@out_options.optimize_fonts})") do |o|
166
+ @out_options.optimize_fonts = o
167
+ end
101
168
  end
102
169
 
103
170
  # Defines the encryption options.
@@ -160,23 +227,37 @@ module HexaPDF
160
227
  object_streams: @out_options.object_streams,
161
228
  xref_streams: @out_options.xref_streams,
162
229
  compress_pages: @out_options.compress_pages)
163
- handle_streams(doc) unless @out_options.streams == :preserve
230
+ if @out_options.streams != :preserve || @out_options.optimize_fonts
231
+ doc.each(current: false) do |obj|
232
+ optimize_stream(obj)
233
+ optimize_font(obj)
234
+ end
235
+ end
164
236
  end
165
237
 
166
238
  IGNORED_FILTERS = { #:nodoc:
167
239
  CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true
168
240
  }.freeze
169
241
 
170
- # Applies the chosen stream mode to all streams.
171
- def handle_streams(doc)
172
- doc.each(current: false) do |obj|
173
- next if !obj.respond_to?(:set_filter) || Array(obj[:Filter]).any? {|f| IGNORED_FILTERS[f]}
174
- if @out_options.streams == :compress
175
- obj.set_filter(:FlateDecode)
176
- else
177
- obj.set_filter(nil)
178
- end
179
- end
242
+ # Applies the chosen stream mode to the given object.
243
+ def optimize_stream(obj)
244
+ return if @out_options.streams == :preserve || !obj.respond_to?(:set_filter) ||
245
+ Array(obj[:Filter]).any? {|f| IGNORED_FILTERS[f]}
246
+
247
+ obj.set_filter(@out_options.streams == :compress ? :FlateDecode : nil)
248
+ end
249
+
250
+ # Optimize the object if it is a font object.
251
+ def optimize_font(obj)
252
+ return unless @out_options.optimize_fonts && obj.kind_of?(HexaPDF::Type::Font) &&
253
+ (obj[:Subtype] == :TrueType ||
254
+ (obj[:Subtype] == :Type0 && obj.descendant_font[:Subtype] == :CIDFontType2)) &&
255
+ obj.embedded?
256
+
257
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(obj.font_file.stream))
258
+ data = HexaPDF::Font::TrueType::Optimizer.build_for_pdf(font)
259
+ obj.font_file.stream = data
260
+ obj.font_file[:Length1] = data.size
180
261
  end
181
262
 
182
263
  # Applies the encryption related options to the given HexaPDF::Document instance.
@@ -199,7 +280,9 @@ module HexaPDF
199
280
  ROTATE_MAP = {'l' => -90, 'r' => 90, 'd' => 180, 'n' => :none}.freeze #:nodoc:
200
281
 
201
282
  # Parses the pages specification string and returns an array of tuples containing a page
202
- # number and a rotation value (either -90, 90, 180 or :none).
283
+ # number and a rotation value (either -90, 90, 180, :none or +nil+ where an integer means
284
+ # adding a rotation by that number of degrees, :none means removing any set rotation value and
285
+ # +nil+ means preserving the set rotation value).
203
286
  #
204
287
  # The parameter +count+ needs to be the total number of pages in the document.
205
288
  #
@@ -208,10 +291,12 @@ module HexaPDF
208
291
  range.split(',').each_with_object([]) do |str, arr|
209
292
  case str
210
293
  when /\A#{PAGE_NUMBER_SPEC}(l|r|d|n)?\z/o
211
- arr << [($1 == 'e' ? count : str.to_i) - 1, ROTATE_MAP[$2]]
294
+ page_num = ($1 == 'e' ? count : str.to_i)
295
+ next if page_num > count
296
+ arr << [page_num - 1, ROTATE_MAP[$2]]
212
297
  when /\A#{PAGE_NUMBER_SPEC}-#{PAGE_NUMBER_SPEC}(?:\/([1-9]\d*))?(l|r|d|n)?\z/
213
- start_nr = ($1 == 'e' ? count : $1.to_i) - 1
214
- end_nr = ($2 == 'e' ? count : $2.to_i) - 1
298
+ start_nr = ($1 == 'e' ? count : [$1.to_i, count].min) - 1
299
+ end_nr = ($2 == 'e' ? count : [$2.to_i, count].min) - 1
215
300
  step = ($3 ? $3.to_i : 1) * (start_nr > end_nr ? -1 : 1)
216
301
  rotation = ROTATE_MAP[$4]
217
302
  start_nr.step(to: end_nr, by: step) {|n| arr << [n, rotation]}
@@ -235,6 +320,18 @@ module HexaPDF
235
320
  end
236
321
  end
237
322
 
323
+ # Removes unused pages and page tree nodes from the document.
324
+ def remove_unused_pages(doc)
325
+ retained = doc.pages.each_with_object({}) {|page, h| h[page.data] = true}
326
+ retained[doc.pages.root.data] = true
327
+ doc.each(current: false) do |obj|
328
+ next unless obj.kind_of?(HexaPDF::Dictionary)
329
+ if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
330
+ doc.delete(obj)
331
+ end
332
+ end
333
+ end
334
+
238
335
  private
239
336
 
240
337
  # Displays the given prompt, reads from the console without echo and returns the read string.