hexapdf 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +68 -0
  3. data/CONTRIBUTERS +1 -1
  4. data/README.md +35 -4
  5. data/Rakefile +1 -0
  6. data/VERSION +1 -1
  7. data/data/hexapdf/cmap/83pv-RKSJ-H +314 -0
  8. data/data/hexapdf/cmap/90ms-RKSJ-H +259 -0
  9. data/data/hexapdf/cmap/90ms-RKSJ-V +156 -0
  10. data/data/hexapdf/cmap/90msp-RKSJ-H +257 -0
  11. data/data/hexapdf/cmap/90msp-RKSJ-V +155 -0
  12. data/data/hexapdf/cmap/90pv-RKSJ-H +355 -0
  13. data/data/hexapdf/cmap/Add-RKSJ-H +738 -0
  14. data/data/hexapdf/cmap/Add-RKSJ-V +135 -0
  15. data/data/hexapdf/cmap/Adobe-CNS1-UCS2 +18209 -0
  16. data/data/hexapdf/cmap/Adobe-GB1-UCS2 +14267 -0
  17. data/data/hexapdf/cmap/Adobe-Japan1-UCS2 +19159 -0
  18. data/data/hexapdf/cmap/Adobe-Korea1-UCS2 +9267 -0
  19. data/data/hexapdf/cmap/B5pc-H +337 -0
  20. data/data/hexapdf/cmap/B5pc-V +90 -0
  21. data/data/hexapdf/cmap/CNS-EUC-H +490 -0
  22. data/data/hexapdf/cmap/CNS-EUC-V +538 -0
  23. data/data/hexapdf/cmap/ETen-B5-H +343 -0
  24. data/data/hexapdf/cmap/ETen-B5-V +91 -0
  25. data/data/hexapdf/cmap/ETenms-B5-H +79 -0
  26. data/data/hexapdf/cmap/ETenms-B5-V +99 -0
  27. data/data/hexapdf/cmap/EUC-H +207 -0
  28. data/data/hexapdf/cmap/EUC-V +105 -0
  29. data/data/hexapdf/cmap/Ext-RKSJ-H +768 -0
  30. data/data/hexapdf/cmap/Ext-RKSJ-V +117 -0
  31. data/data/hexapdf/cmap/GB-EUC-H +173 -0
  32. data/data/hexapdf/cmap/GB-EUC-V +98 -0
  33. data/data/hexapdf/cmap/GBK-EUC-H +4273 -0
  34. data/data/hexapdf/cmap/GBK-EUC-V +97 -0
  35. data/data/hexapdf/cmap/GBK2K-H +5325 -0
  36. data/data/hexapdf/cmap/GBK2K-V +118 -0
  37. data/data/hexapdf/cmap/GBKp-EUC-H +4272 -0
  38. data/data/hexapdf/cmap/GBKp-EUC-V +97 -0
  39. data/data/hexapdf/cmap/GBpc-EUC-H +175 -0
  40. data/data/hexapdf/cmap/GBpc-EUC-V +98 -0
  41. data/data/hexapdf/cmap/H +200 -0
  42. data/data/hexapdf/cmap/HKscs-B5-H +1331 -0
  43. data/data/hexapdf/cmap/HKscs-B5-V +90 -0
  44. data/data/hexapdf/cmap/Identity-H +339 -0
  45. data/data/hexapdf/cmap/Identity-V +73 -0
  46. data/data/hexapdf/cmap/KSC-EUC-H +562 -0
  47. data/data/hexapdf/cmap/KSC-EUC-V +94 -0
  48. data/data/hexapdf/cmap/KSCms-UHC-H +776 -0
  49. data/data/hexapdf/cmap/KSCms-UHC-HW-H +775 -0
  50. data/data/hexapdf/cmap/KSCms-UHC-HW-V +93 -0
  51. data/data/hexapdf/cmap/KSCms-UHC-V +94 -0
  52. data/data/hexapdf/cmap/KSCpc-EUC-H +608 -0
  53. data/data/hexapdf/cmap/LICENSE.txt +26 -0
  54. data/data/hexapdf/cmap/README.txt +9 -0
  55. data/data/hexapdf/cmap/UniCNS-UCS2-H +16992 -0
  56. data/data/hexapdf/cmap/UniCNS-UCS2-V +90 -0
  57. data/data/hexapdf/cmap/UniCNS-UTF16-H +19117 -0
  58. data/data/hexapdf/cmap/UniCNS-UTF16-V +94 -0
  59. data/data/hexapdf/cmap/UniGB-UCS2-H +14321 -0
  60. data/data/hexapdf/cmap/UniGB-UCS2-V +101 -0
  61. data/data/hexapdf/cmap/UniGB-UTF16-H +14381 -0
  62. data/data/hexapdf/cmap/UniGB-UTF16-V +104 -0
  63. data/data/hexapdf/cmap/UniJIS-UCS2-H +8870 -0
  64. data/data/hexapdf/cmap/UniJIS-UCS2-HW-H +81 -0
  65. data/data/hexapdf/cmap/UniJIS-UCS2-HW-V +279 -0
  66. data/data/hexapdf/cmap/UniJIS-UCS2-V +275 -0
  67. data/data/hexapdf/cmap/UniJIS-UTF16-H +14450 -0
  68. data/data/hexapdf/cmap/UniJIS-UTF16-V +299 -0
  69. data/data/hexapdf/cmap/UniKS-UCS2-H +8725 -0
  70. data/data/hexapdf/cmap/UniKS-UCS2-V +95 -0
  71. data/data/hexapdf/cmap/UniKS-UTF16-H +8895 -0
  72. data/data/hexapdf/cmap/UniKS-UTF16-V +99 -0
  73. data/data/hexapdf/cmap/V +105 -0
  74. data/examples/arc.rb +3 -3
  75. data/examples/merging.rb +4 -1
  76. data/examples/optimizing.rb +3 -0
  77. data/examples/show_char_bboxes.rb +2 -2
  78. data/examples/truetype.rb +2 -2
  79. data/lib/hexapdf/cli.rb +40 -1
  80. data/lib/hexapdf/cli/batch.rb +72 -0
  81. data/lib/hexapdf/cli/command.rb +112 -15
  82. data/lib/hexapdf/cli/files.rb +2 -2
  83. data/lib/hexapdf/cli/images.rb +14 -6
  84. data/lib/hexapdf/cli/info.rb +6 -8
  85. data/lib/hexapdf/cli/inspect.rb +5 -8
  86. data/lib/hexapdf/cli/merge.rb +13 -20
  87. data/lib/hexapdf/cli/modify.rb +4 -7
  88. data/lib/hexapdf/cli/optimize.rb +2 -5
  89. data/lib/hexapdf/configuration.rb +32 -3
  90. data/lib/hexapdf/content/canvas.rb +130 -37
  91. data/lib/hexapdf/content/parser.rb +40 -6
  92. data/lib/hexapdf/content/processor.rb +4 -4
  93. data/lib/hexapdf/document.rb +40 -10
  94. data/lib/hexapdf/document/fonts.rb +1 -0
  95. data/lib/hexapdf/encryption/security_handler.rb +8 -12
  96. data/lib/hexapdf/filter/flate_decode.rb +25 -2
  97. data/lib/hexapdf/font/cmap.rb +124 -8
  98. data/lib/hexapdf/font/cmap/parser.rb +65 -15
  99. data/lib/hexapdf/font/encoding/base.rb +2 -2
  100. data/lib/hexapdf/font/encoding/glyph_list.rb +2 -4
  101. data/lib/hexapdf/font/true_type.rb +1 -0
  102. data/lib/hexapdf/font/true_type/builder.rb +75 -0
  103. data/lib/hexapdf/font/true_type/optimizer.rb +65 -0
  104. data/lib/hexapdf/font/true_type/subsetter.rb +9 -22
  105. data/lib/hexapdf/font/true_type_wrapper.rb +9 -21
  106. data/lib/hexapdf/font_loader.rb +1 -1
  107. data/lib/hexapdf/importer.rb +1 -1
  108. data/lib/hexapdf/serializer.rb +5 -3
  109. data/lib/hexapdf/type.rb +2 -0
  110. data/lib/hexapdf/type/cid_font.rb +120 -0
  111. data/lib/hexapdf/type/font.rb +32 -12
  112. data/lib/hexapdf/type/font_simple.rb +34 -42
  113. data/lib/hexapdf/type/font_type0.rb +148 -0
  114. data/lib/hexapdf/type/form.rb +4 -4
  115. data/lib/hexapdf/type/page.rb +12 -11
  116. data/lib/hexapdf/type/resources.rb +14 -0
  117. data/lib/hexapdf/utils/graphics_helpers.rb +77 -0
  118. data/lib/hexapdf/version.rb +1 -1
  119. data/man/man1/hexapdf.1 +43 -1
  120. data/test/hexapdf/content/test_canvas.rb +76 -0
  121. data/test/hexapdf/content/test_parser.rb +20 -1
  122. data/test/hexapdf/content/test_processor.rb +11 -7
  123. data/test/hexapdf/document/test_fonts.rb +3 -1
  124. data/test/hexapdf/font/cmap/test_parser.rb +42 -7
  125. data/test/hexapdf/font/encoding/test_base.rb +1 -1
  126. data/test/hexapdf/font/encoding/test_glyph_list.rb +3 -3
  127. data/test/hexapdf/font/test_cmap.rb +104 -0
  128. data/test/hexapdf/font/test_true_type_wrapper.rb +63 -46
  129. data/test/hexapdf/font/true_type/test_builder.rb +37 -0
  130. data/test/hexapdf/font/true_type/test_optimizer.rb +27 -0
  131. data/test/hexapdf/font/true_type/test_subsetter.rb +6 -13
  132. data/test/hexapdf/test_configuration.rb +12 -7
  133. data/test/hexapdf/test_document.rb +24 -0
  134. data/test/hexapdf/test_importer.rb +9 -1
  135. data/test/hexapdf/test_writer.rb +2 -2
  136. data/test/hexapdf/type/test_cid_font.rb +61 -0
  137. data/test/hexapdf/type/test_font.rb +31 -4
  138. data/test/hexapdf/type/test_font_simple.rb +6 -21
  139. data/test/hexapdf/type/test_font_type0.rb +114 -0
  140. data/test/hexapdf/type/test_resources.rb +17 -1
  141. data/test/hexapdf/utils/test_graphics_helpers.rb +29 -0
  142. metadata +82 -3
@@ -0,0 +1,99 @@
1
+ %!PS-Adobe-3.0 Resource-CMap
2
+ %%DocumentNeededResources: ProcSet (CIDInit)
3
+ %%DocumentNeededResources: CMap (UniKS-UTF16-H)
4
+ %%IncludeResource: ProcSet (CIDInit)
5
+ %%IncludeResource: CMap (UniKS-UTF16-H)
6
+ %%BeginResource: CMap (UniKS-UTF16-V)
7
+ %%Title: (UniKS-UTF16-V Adobe Korea1 1)
8
+ %%Version: 1.004
9
+ %%Copyright: -----------------------------------------------------------
10
+ %%Copyright: Copyright 1990-2015 Adobe Systems Incorporated.
11
+ %%Copyright: All rights reserved.
12
+ %%Copyright:
13
+ %%Copyright: Redistribution and use in source and binary forms, with or
14
+ %%Copyright: without modification, are permitted provided that the
15
+ %%Copyright: following conditions are met:
16
+ %%Copyright:
17
+ %%Copyright: Redistributions of source code must retain the above
18
+ %%Copyright: copyright notice, this list of conditions and the following
19
+ %%Copyright: disclaimer.
20
+ %%Copyright:
21
+ %%Copyright: Redistributions in binary form must reproduce the above
22
+ %%Copyright: copyright notice, this list of conditions and the following
23
+ %%Copyright: disclaimer in the documentation and/or other materials
24
+ %%Copyright: provided with the distribution.
25
+ %%Copyright:
26
+ %%Copyright: Neither the name of Adobe Systems Incorporated nor the names
27
+ %%Copyright: of its contributors may be used to endorse or promote
28
+ %%Copyright: products derived from this software without specific prior
29
+ %%Copyright: written permission.
30
+ %%Copyright:
31
+ %%Copyright: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
32
+ %%Copyright: CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
33
+ %%Copyright: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
34
+ %%Copyright: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35
+ %%Copyright: DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
36
+ %%Copyright: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
37
+ %%Copyright: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38
+ %%Copyright: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39
+ %%Copyright: LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40
+ %%Copyright: HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41
+ %%Copyright: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
42
+ %%Copyright: OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43
+ %%Copyright: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
+ %%Copyright: -----------------------------------------------------------
45
+ %%EndComments
46
+
47
+ /CIDInit /ProcSet findresource begin
48
+
49
+ 12 dict begin
50
+
51
+ begincmap
52
+
53
+ /UniKS-UTF16-H usecmap
54
+
55
+ /CIDSystemInfo 3 dict dup begin
56
+ /Registry (Adobe) def
57
+ /Ordering (Korea1) def
58
+ /Supplement 1 def
59
+ end def
60
+
61
+ /CMapName /UniKS-UTF16-V def
62
+ /CMapVersion 1.004 def
63
+ /CMapType 1 def
64
+
65
+ /XUID [1 10 25545] def
66
+
67
+ /WMode 1 def
68
+
69
+ 11 begincidchar
70
+ <2016> 8061
71
+ <2025> 8058
72
+ <3013> 8075
73
+ <ff01> 8076
74
+ <ff0c> 8079
75
+ <ff0e> 8080
76
+ <ff3b> 8087
77
+ <ff3d> 8088
78
+ <ff3f> 8089
79
+ <ff5e> 8062
80
+ <ffe3> 8093
81
+ endcidchar
82
+
83
+ 7 begincidrange
84
+ <2013> <2014> 8059
85
+ <3001> <3002> 8056
86
+ <3008> <3011> 8065
87
+ <3014> <3015> 8063
88
+ <ff08> <ff09> 8077
89
+ <ff1a> <ff1f> 8081
90
+ <ff5b> <ff5d> 8090
91
+ endcidrange
92
+
93
+ endcmap
94
+ CMapName currentdict /CMap defineresource pop
95
+ end
96
+ end
97
+
98
+ %%EndResource
99
+ %%EOF
@@ -0,0 +1,105 @@
1
+ %!PS-Adobe-3.0 Resource-CMap
2
+ %%DocumentNeededResources: ProcSet (CIDInit)
3
+ %%DocumentNeededResources: CMap (H)
4
+ %%IncludeResource: ProcSet (CIDInit)
5
+ %%IncludeResource: CMap (H)
6
+ %%BeginResource: CMap (V)
7
+ %%Title: (V Adobe Japan1 1)
8
+ %%Version: 12.004
9
+ %%Copyright: -----------------------------------------------------------
10
+ %%Copyright: Copyright 1990-2015 Adobe Systems Incorporated.
11
+ %%Copyright: All rights reserved.
12
+ %%Copyright:
13
+ %%Copyright: Redistribution and use in source and binary forms, with or
14
+ %%Copyright: without modification, are permitted provided that the
15
+ %%Copyright: following conditions are met:
16
+ %%Copyright:
17
+ %%Copyright: Redistributions of source code must retain the above
18
+ %%Copyright: copyright notice, this list of conditions and the following
19
+ %%Copyright: disclaimer.
20
+ %%Copyright:
21
+ %%Copyright: Redistributions in binary form must reproduce the above
22
+ %%Copyright: copyright notice, this list of conditions and the following
23
+ %%Copyright: disclaimer in the documentation and/or other materials
24
+ %%Copyright: provided with the distribution.
25
+ %%Copyright:
26
+ %%Copyright: Neither the name of Adobe Systems Incorporated nor the names
27
+ %%Copyright: of its contributors may be used to endorse or promote
28
+ %%Copyright: products derived from this software without specific prior
29
+ %%Copyright: written permission.
30
+ %%Copyright:
31
+ %%Copyright: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
32
+ %%Copyright: CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
33
+ %%Copyright: INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
34
+ %%Copyright: MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35
+ %%Copyright: DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
36
+ %%Copyright: CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
37
+ %%Copyright: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38
+ %%Copyright: NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39
+ %%Copyright: LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40
+ %%Copyright: HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41
+ %%Copyright: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
42
+ %%Copyright: OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43
+ %%Copyright: SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
+ %%Copyright: -----------------------------------------------------------
45
+ %%EndComments
46
+
47
+ /CIDInit /ProcSet findresource begin
48
+
49
+ 12 dict begin
50
+
51
+ begincmap
52
+
53
+ /H usecmap
54
+
55
+ /CIDSystemInfo 3 dict dup begin
56
+ /Registry (Adobe) def
57
+ /Ordering (Japan1) def
58
+ /Supplement 1 def
59
+ end def
60
+
61
+ /CMapName /V def
62
+ /CMapVersion 12.004 def
63
+ /CMapType 1 def
64
+
65
+ /UIDOffset 850 def
66
+ /XUID [1 10 25340] def
67
+
68
+ /WMode 1 def
69
+
70
+ 27 begincidrange
71
+ <2122> <2123> 7887
72
+ <2131> <2132> 7889
73
+ <213c> <213e> 7891
74
+ <2141> <2145> 7894
75
+ <214a> <215b> 7899
76
+ <2161> <2161> 7917
77
+ <2421> <2421> 7918
78
+ <2423> <2423> 7919
79
+ <2425> <2425> 7920
80
+ <2427> <2427> 7921
81
+ <2429> <2429> 7922
82
+ <2443> <2443> 7923
83
+ <2463> <2463> 7924
84
+ <2465> <2465> 7925
85
+ <2467> <2467> 7926
86
+ <246e> <246e> 7927
87
+ <2521> <2521> 7928
88
+ <2523> <2523> 7929
89
+ <2525> <2525> 7930
90
+ <2527> <2527> 7931
91
+ <2529> <2529> 7932
92
+ <2543> <2543> 7933
93
+ <2563> <2563> 7934
94
+ <2565> <2565> 7935
95
+ <2567> <2567> 7936
96
+ <256e> <256e> 7937
97
+ <2575> <2576> 7938
98
+ endcidrange
99
+ endcmap
100
+ CMapName currentdict /CMap defineresource pop
101
+ end
102
+ end
103
+
104
+ %%EndResource
105
+ %%EOF
data/examples/arc.rb CHANGED
@@ -17,7 +17,7 @@ radius = 75
17
17
 
18
18
  # Left pie chart
19
19
  center = [page.box.width * 0.25, page.box.height * 0.85]
20
- pie = canvas.graphic_object(:solid_arc, cx: center[0], cy: center[1],
20
+ pie = canvas.graphic_object(:solid_arc, cx: center[0], cy: center[1],
21
21
  outer_a: radius, outer_b: radius)
22
22
  canvas.fill_color('ddddff')
23
23
  canvas.draw(pie, start_angle: 30, end_angle: 110).fill
@@ -26,7 +26,7 @@ canvas.draw(pie, start_angle: 110, end_angle: 130).fill
26
26
  canvas.fill_color('ddffdd')
27
27
  canvas.draw(pie, start_angle: 130, end_angle: 30).fill
28
28
 
29
- arc = canvas.graphic_object(:arc, cx: center[0], cy: center[1],
29
+ arc = canvas.graphic_object(:arc, cx: center[0], cy: center[1],
30
30
  a: radius, b: radius)
31
31
  canvas.stroke_color('0000ff')
32
32
  canvas.draw(arc, start_angle: 30, end_angle: 110).stroke
@@ -38,7 +38,7 @@ canvas.draw(arc, start_angle: 130, end_angle: 30).stroke
38
38
  # Right pie chart
39
39
  center = [page.box.width * 0.75, page.box.height * 0.85]
40
40
  canvas.stroke_color('777777')
41
- pie = canvas.graphic_object(:solid_arc, cx: center[0], cy: center[1],
41
+ pie = canvas.graphic_object(:solid_arc, cx: center[0], cy: center[1],
42
42
  outer_a: radius, outer_b: radius)
43
43
  canvas.fill_color('ddddff')
44
44
  canvas.draw(pie, start_angle: 30, end_angle: 110).fill_stroke
data/examples/merging.rb CHANGED
@@ -10,6 +10,9 @@
10
10
  # Sometimes other things like attached files or a document outline
11
11
  # should also be preserved.
12
12
  #
13
+ # The hexapdf binary provides a command for merging files which does
14
+ # the merging in a more sophisticated way.
15
+ #
13
16
  # Usage:
14
17
  # : `ruby merging.rb INPUT1.PDF INPUT2.PDF ...`
15
18
  #
@@ -21,4 +24,4 @@ ARGV.each do |file|
21
24
  pdf = HexaPDF::Document.open(file)
22
25
  pdf.pages.each {|page| target.pages << target.import(page)}
23
26
  end
24
- target.write("merging.pdf", optimize: true)
27
+ target.write("2.merging.pdf", optimize: true)
@@ -7,6 +7,9 @@
7
7
  # over which aspects should be optimized. See [HexaPDF::Task::Optimize]
8
8
  # for detailed information.
9
9
  #
10
+ # The hexapdf binary provides an optimization command which does some
11
+ # additional operations like optimizing the page tree.
12
+ #
10
13
  # Usage:
11
14
  # : `ruby optimizing.rb INPUT.PDF`
12
15
  #
@@ -2,12 +2,12 @@
2
2
  #
3
3
  # This examples shows how to process the contents of a page. It finds all
4
4
  # characters on a page and surrounds them with their bounding box. Additionally,
5
- # all consecutive text runs are also surrounded by box.
5
+ # all consecutive text runs are also surrounded by a box.
6
6
  #
7
7
  # The code provides two ways of generating the boxes. The commented part of
8
8
  # `ShowTextProcessor#show_text` uses a polyline since some characters may be
9
9
  # transforemd (rotated or skewed). The un-commented part uses rectangles which
10
- # is faster and correct in most cases.
10
+ # is faster and correct for most but not all cases.
11
11
  #
12
12
  # Usage:
13
13
  # : `ruby show_char_boxes.rb INPUT.PDF`
data/examples/truetype.rb CHANGED
@@ -17,9 +17,9 @@
17
17
  require 'hexapdf'
18
18
 
19
19
  doc = HexaPDF::Document.new
20
- doc.config['font.on_missing_glyph'] = ->(n,f) { f.missing_glyph_id }
20
+ doc.config['font.on_missing_glyph'] = ->(_, f) { f.missing_glyph_id }
21
21
  doc.config['font.map'] = {
22
- 'myfont' => {none: ARGV.shift || File.join(__dir__, '../test/data/fonts/Ubuntu-Title.ttf')}
22
+ 'myfont' => {none: ARGV.shift || File.join(__dir__, '../test/data/fonts/Ubuntu-Title.ttf')},
23
23
  }
24
24
 
25
25
  wrapper = doc.fonts.load('myfont')
data/lib/hexapdf/cli.rb CHANGED
@@ -39,6 +39,7 @@ require 'hexapdf/cli/modify'
39
39
  require 'hexapdf/cli/merge'
40
40
  require 'hexapdf/cli/optimize'
41
41
  require 'hexapdf/cli/images'
42
+ require 'hexapdf/cli/batch'
42
43
  require 'hexapdf/version'
43
44
  require 'hexapdf/document'
44
45
 
@@ -59,14 +60,28 @@ module HexaPDF
59
60
  # The CmdParse::CommandParser class that is used for running the CLI application.
60
61
  class Application < CmdParse::CommandParser
61
62
 
63
+ # Verbosity level for no output
64
+ VERBOSITY_QUIET = 0
65
+
66
+ # Verbosity level for warning output
67
+ VERBOSITY_WARNING = 1
68
+
69
+ # Verbosity level for informational output
70
+ VERBOSITY_INFO = 2
71
+
62
72
  # Specifies whether an operation should be forced. For example, if an existing file should be
63
73
  # overwritten.
64
74
  attr_reader :force
65
75
 
76
+ # Specifies whether strict parsing and validation should be used.
77
+ attr_reader :strict
78
+
66
79
  def initialize #:nodoc:
67
80
  super(handle_exceptions: :no_help)
68
81
  main_command.options.program_name = "hexapdf"
69
82
  main_command.options.version = HexaPDF::VERSION
83
+ main_command.extend(Command::Extensions)
84
+ main_command.define_singleton_method(:usage_commands) { "COMMAND" }
70
85
  add_command(HexaPDF::CLI::Info.new)
71
86
  add_command(HexaPDF::CLI::Files.new)
72
87
  add_command(HexaPDF::CLI::Images.new)
@@ -74,13 +89,37 @@ module HexaPDF
74
89
  add_command(HexaPDF::CLI::Modify.new)
75
90
  add_command(HexaPDF::CLI::Optimize.new)
76
91
  add_command(HexaPDF::CLI::Merge.new)
92
+ add_command(HexaPDF::CLI::Batch.new)
77
93
  add_command(CmdParse::HelpCommand.new)
78
- add_command(CmdParse::VersionCommand.new)
94
+ version_command = CmdParse::VersionCommand.new(add_switches: false)
95
+ add_command(version_command)
96
+ main_options.on_tail("--version", "Show hexapdf version") { version_command.execute }
79
97
 
80
98
  @force = false
99
+ @verbosity = VERBOSITY_WARNING
100
+ @strict = false
81
101
  global_options.on("--[no-]force", "Force overwriting existing files. Default: false") do |f|
82
102
  @force = f
83
103
  end
104
+ global_options.on("--strict", "Enable strict parsing and validation") do |s|
105
+ @strict = s
106
+ end
107
+ global_options.on("--verbose", "-v", "Verbose output") do
108
+ @verbosity += 1
109
+ end
110
+ global_options.on("--quiet", "-q", "Suppress any output") do
111
+ @verbosity = VERBOSITY_QUIET
112
+ end
113
+ end
114
+
115
+ # Returns +true+ if the verbosity level warning is enabled.
116
+ def verbosity_warning?
117
+ @verbosity >= VERBOSITY_WARNING
118
+ end
119
+
120
+ # Returns +true+ if the verbosity level info is enabled.
121
+ def verbosity_info?
122
+ @verbosity >= VERBOSITY_INFO
84
123
  end
85
124
 
86
125
  def parse(argv = ARGV) #:nodoc:
@@ -0,0 +1,72 @@
1
+ # -*- encoding: utf-8 -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2017 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #++
33
+
34
+ require 'hexapdf/cli/command'
35
+ require 'shellwords'
36
+
37
+ module HexaPDF
38
+ module CLI
39
+
40
+ # Execute the same command for multiple input files.
41
+ class Batch < Command
42
+
43
+ def initialize #:nodoc:
44
+ super('batch', takes_commands: false)
45
+ short_desc("Execute a single command on multiple files")
46
+ long_desc(<<-EOF.gsub!(/^ */, ''))
47
+ This command allows executing a single command for multiple input files, thereby reducing
48
+ the overall execution time.
49
+
50
+ The first argument is used as a hexapdf command line (but without the binary name) and
51
+ the rest as input files. The specified command will be executed for each input file, with
52
+ {} being replaced by the file name.
53
+ EOF
54
+ end
55
+
56
+ def execute(command, *files) #:nodoc:
57
+ args = Shellwords.split(command)
58
+ files.each do |file|
59
+ begin
60
+ HexaPDF::CLI::Application.new.parse(args.map {|a| a.gsub(/{}/, file)})
61
+ rescue
62
+ if command_parser.verbosity_warning?
63
+ $stderr.puts "Error processing '#{file}': #{$!.message}"
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+ end
@@ -35,6 +35,7 @@ require 'io/console'
35
35
  require 'ostruct'
36
36
  require 'cmdparse'
37
37
  require 'hexapdf/document'
38
+ require 'hexapdf/font/true_type'
38
39
 
39
40
  module HexaPDF
40
41
  module CLI
@@ -43,6 +44,16 @@ module HexaPDF
43
44
  # commands.
44
45
  class Command < CmdParse::Command
45
46
 
47
+ module Extensions #:nodoc:
48
+ def help_banner #:nodoc:
49
+ "hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
50
+ "Copyright (c) 2014-2017 Thomas Leitner; licensed under the AGPLv3\n\n" <<
51
+ format(usage, indent: 7) << "\n\n"
52
+ end
53
+ end
54
+
55
+ include Extensions
56
+
46
57
  def initialize(*args, &block) #:nodoc:
47
58
  super
48
59
  @out_options = OpenStruct.new
@@ -51,6 +62,7 @@ module HexaPDF
51
62
  @out_options.object_streams = :preserve
52
63
  @out_options.xref_streams = :preserve
53
64
  @out_options.streams = :preserve
65
+ @out_options.optimize_fonts = false
54
66
 
55
67
  @out_options.encryption = :preserve
56
68
  @out_options.enc_user_pwd = @out_options.enc_owner_pwd = nil
@@ -62,6 +74,57 @@ module HexaPDF
62
74
 
63
75
  protected
64
76
 
77
+ # Creates a HexaPDF::Document instance for the PDF file and yields it.
78
+ #
79
+ # If +out_file+ is given, the document is written to it after yielding.
80
+ def with_document(file, password: nil, out_file: nil) #:yield: document
81
+ if file == out_file
82
+ doc = HexaPDF::Document.open(file, pdf_options(password))
83
+ else
84
+ file_io = File.open(file, 'rb')
85
+ doc = HexaPDF::Document.new(io: file_io, **pdf_options(password))
86
+ end
87
+
88
+ yield(doc)
89
+
90
+ write_document(doc, out_file)
91
+ ensure
92
+ file_io&.close
93
+ end
94
+
95
+ # Returns a hash with HexaPDF::Document options based on the given password and the option
96
+ # switches.
97
+ def pdf_options(password)
98
+ hash = {decryption_opts: {password: password}, config: {}}
99
+ hash[:config]['parser.on_correctable_error'] =
100
+ if command_parser.strict
101
+ proc { true }
102
+ else
103
+ proc do |_, msg, pos|
104
+ if command_parser.verbosity_info?
105
+ msg = MalformedPDFError.new(msg, pos: pos).message
106
+ $stderr.puts "Corrected parsing problem: #{msg}"
107
+ end
108
+ false
109
+ end
110
+ end
111
+ hash
112
+ end
113
+
114
+ # Writes the document to the given file or does nothing if +out_file+ is +nil+.
115
+ def write_document(doc, out_file)
116
+ if out_file
117
+ doc.validate(auto_correct: true) do |msg, correctable|
118
+ if command_parser.strict && !correctable
119
+ raise "Validation error: #{msg}"
120
+ elsif command_parser.verbosity_info?
121
+ $stderr.puts "#{correctable ? 'Corrected' : 'Ignored'} validation problem: #{msg}"
122
+ end
123
+ end
124
+ doc.write(out_file, validate: false)
125
+ end
126
+ end
127
+
65
128
  # Checks whether the given output file exists and raises an error if it does and
66
129
  # HexaPDF::CLI#force is not set.
67
130
  def maybe_raise_on_existing_file(filename)
@@ -98,6 +161,10 @@ module HexaPDF
98
161
  "time; default: #{@out_options.compress_pages})") do |c|
99
162
  @out_options.compress_pages = c
100
163
  end
164
+ options.on("--[no-]optimize-fonts", "Optimize embedded font files; " \
165
+ "default: #{@out_options.optimize_fonts})") do |o|
166
+ @out_options.optimize_fonts = o
167
+ end
101
168
  end
102
169
 
103
170
  # Defines the encryption options.
@@ -160,23 +227,37 @@ module HexaPDF
160
227
  object_streams: @out_options.object_streams,
161
228
  xref_streams: @out_options.xref_streams,
162
229
  compress_pages: @out_options.compress_pages)
163
- handle_streams(doc) unless @out_options.streams == :preserve
230
+ if @out_options.streams != :preserve || @out_options.optimize_fonts
231
+ doc.each(current: false) do |obj|
232
+ optimize_stream(obj)
233
+ optimize_font(obj)
234
+ end
235
+ end
164
236
  end
165
237
 
166
238
  IGNORED_FILTERS = { #:nodoc:
167
239
  CCITTFaxDecode: true, JBIG2Decode: true, DCTDecode: true, JPXDecode: true, Crypt: true
168
240
  }.freeze
169
241
 
170
- # Applies the chosen stream mode to all streams.
171
- def handle_streams(doc)
172
- doc.each(current: false) do |obj|
173
- next if !obj.respond_to?(:set_filter) || Array(obj[:Filter]).any? {|f| IGNORED_FILTERS[f]}
174
- if @out_options.streams == :compress
175
- obj.set_filter(:FlateDecode)
176
- else
177
- obj.set_filter(nil)
178
- end
179
- end
242
+ # Applies the chosen stream mode to the given object.
243
+ def optimize_stream(obj)
244
+ return if @out_options.streams == :preserve || !obj.respond_to?(:set_filter) ||
245
+ Array(obj[:Filter]).any? {|f| IGNORED_FILTERS[f]}
246
+
247
+ obj.set_filter(@out_options.streams == :compress ? :FlateDecode : nil)
248
+ end
249
+
250
+ # Optimize the object if it is a font object.
251
+ def optimize_font(obj)
252
+ return unless @out_options.optimize_fonts && obj.kind_of?(HexaPDF::Type::Font) &&
253
+ (obj[:Subtype] == :TrueType ||
254
+ (obj[:Subtype] == :Type0 && obj.descendant_font[:Subtype] == :CIDFontType2)) &&
255
+ obj.embedded?
256
+
257
+ font = HexaPDF::Font::TrueType::Font.new(StringIO.new(obj.font_file.stream))
258
+ data = HexaPDF::Font::TrueType::Optimizer.build_for_pdf(font)
259
+ obj.font_file.stream = data
260
+ obj.font_file[:Length1] = data.size
180
261
  end
181
262
 
182
263
  # Applies the encryption related options to the given HexaPDF::Document instance.
@@ -199,7 +280,9 @@ module HexaPDF
199
280
  ROTATE_MAP = {'l' => -90, 'r' => 90, 'd' => 180, 'n' => :none}.freeze #:nodoc:
200
281
 
201
282
  # Parses the pages specification string and returns an array of tuples containing a page
202
- # number and a rotation value (either -90, 90, 180 or :none).
283
+ # number and a rotation value (either -90, 90, 180, :none or +nil+ where an integer means
284
+ # adding a rotation by that number of degrees, :none means removing any set rotation value and
285
+ # +nil+ means preserving the set rotation value).
203
286
  #
204
287
  # The parameter +count+ needs to be the total number of pages in the document.
205
288
  #
@@ -208,10 +291,12 @@ module HexaPDF
208
291
  range.split(',').each_with_object([]) do |str, arr|
209
292
  case str
210
293
  when /\A#{PAGE_NUMBER_SPEC}(l|r|d|n)?\z/o
211
- arr << [($1 == 'e' ? count : str.to_i) - 1, ROTATE_MAP[$2]]
294
+ page_num = ($1 == 'e' ? count : str.to_i)
295
+ next if page_num > count
296
+ arr << [page_num - 1, ROTATE_MAP[$2]]
212
297
  when /\A#{PAGE_NUMBER_SPEC}-#{PAGE_NUMBER_SPEC}(?:\/([1-9]\d*))?(l|r|d|n)?\z/
213
- start_nr = ($1 == 'e' ? count : $1.to_i) - 1
214
- end_nr = ($2 == 'e' ? count : $2.to_i) - 1
298
+ start_nr = ($1 == 'e' ? count : [$1.to_i, count].min) - 1
299
+ end_nr = ($2 == 'e' ? count : [$2.to_i, count].min) - 1
215
300
  step = ($3 ? $3.to_i : 1) * (start_nr > end_nr ? -1 : 1)
216
301
  rotation = ROTATE_MAP[$4]
217
302
  start_nr.step(to: end_nr, by: step) {|n| arr << [n, rotation]}
@@ -235,6 +320,18 @@ module HexaPDF
235
320
  end
236
321
  end
237
322
 
323
+ # Removes unused pages and page tree nodes from the document.
324
+ def remove_unused_pages(doc)
325
+ retained = doc.pages.each_with_object({}) {|page, h| h[page.data] = true}
326
+ retained[doc.pages.root.data] = true
327
+ doc.each(current: false) do |obj|
328
+ next unless obj.kind_of?(HexaPDF::Dictionary)
329
+ if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
330
+ doc.delete(obj)
331
+ end
332
+ end
333
+ end
334
+
238
335
  private
239
336
 
240
337
  # Displays the given prompt, reads from the console without echo and returns the read string.