pdf_paradise 0.1.66

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of pdf_paradise might be problematic. Click here for more details.

Files changed (110) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +705 -0
  3. data/bin/automatic_pdf_title +7 -0
  4. data/bin/combine_these_pdf_pages +7 -0
  5. data/bin/compress_via_hexapdf +7 -0
  6. data/bin/convert_markdown_to_pdf +7 -0
  7. data/bin/convert_pdf_to_text +7 -0
  8. data/bin/delete_first_page_of_this_pdf_file +7 -0
  9. data/bin/merge_then_open +7 -0
  10. data/bin/n_pages +10 -0
  11. data/bin/open_main_pdf +7 -0
  12. data/bin/pdf_paradise +9 -0
  13. data/bin/set_main_book +7 -0
  14. data/bin/set_title_of_this_pdf_file +15 -0
  15. data/doc/README.gen +662 -0
  16. data/doc/todo/todo.md +7 -0
  17. data/lib/pdf_paradise/base/base.rb +239 -0
  18. data/lib/pdf_paradise/base/colours.rb +36 -0
  19. data/lib/pdf_paradise/commandline/commandline.rb +101 -0
  20. data/lib/pdf_paradise/commandline/help.rb +73 -0
  21. data/lib/pdf_paradise/commandline/menu.rb +142 -0
  22. data/lib/pdf_paradise/compress/compress_via_hexapdf.rb +27 -0
  23. data/lib/pdf_paradise/compress_this_pdf_file.rb +87 -0
  24. data/lib/pdf_paradise/constants/constants.rb +76 -0
  25. data/lib/pdf_paradise/convert_text_to_pdf.rb +94 -0
  26. data/lib/pdf_paradise/css/project.css +17 -0
  27. data/lib/pdf_paradise/djvu_to_pdf.rb +85 -0
  28. data/lib/pdf_paradise/gui/README.md +6 -0
  29. data/lib/pdf_paradise/gui/fox/split_pdf_file.rb +77 -0
  30. data/lib/pdf_paradise/gui/gtk2/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +39 -0
  31. data/lib/pdf_paradise/gui/gtk2/pdf_viewer/pdf_viewer.rb +34 -0
  32. data/lib/pdf_paradise/gui/gtk2/split_pdf_file/split_pdf_file.rb +34 -0
  33. data/lib/pdf_paradise/gui/gtk2/statistics_widget/statistics_widget.rb +34 -0
  34. data/lib/pdf_paradise/gui/gtk2/to_pdf/to_pdf.rb +32 -0
  35. data/lib/pdf_paradise/gui/gtk3/controller/controller.rb +212 -0
  36. data/lib/pdf_paradise/gui/gtk3/convert_pdf_to_text/convert_pdf_to_text.rb +34 -0
  37. data/lib/pdf_paradise/gui/gtk3/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +39 -0
  38. data/lib/pdf_paradise/gui/gtk3/pdf_viewer/pdf_viewer.rb +34 -0
  39. data/lib/pdf_paradise/gui/gtk3/split_pdf_file/split_pdf_file.rb +34 -0
  40. data/lib/pdf_paradise/gui/gtk3/statistics_widget/statistics_widget.rb +34 -0
  41. data/lib/pdf_paradise/gui/gtk3/to_pdf/to_pdf.rb +32 -0
  42. data/lib/pdf_paradise/gui/libui/extract_all_images_from_this_pdf_file/extract_all_images_from_this_pdf_file.rb +223 -0
  43. data/lib/pdf_paradise/gui/libui/statistics_widget/statistics_widget.rb +233 -0
  44. data/lib/pdf_paradise/gui/shared_code/convert_pdf_to_text/convert_pdf_to_text_module.rb +277 -0
  45. data/lib/pdf_paradise/gui/shared_code/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file_module.rb +443 -0
  46. data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer.css +5 -0
  47. data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer_module.rb +284 -0
  48. data/lib/pdf_paradise/gui/shared_code/split_pdf_file/split_pdf_file.css +0 -0
  49. data/lib/pdf_paradise/gui/shared_code/split_pdf_file/split_pdf_file_module.rb +294 -0
  50. data/lib/pdf_paradise/gui/shared_code/statistics_widget/statistics_widget_module.rb +349 -0
  51. data/lib/pdf_paradise/gui/shared_code/to_pdf/to_pdf_module.rb +281 -0
  52. data/lib/pdf_paradise/hexapdf/001_rainbow_pattern_example.rb +0 -0
  53. data/lib/pdf_paradise/hexapdf/hexapdf.rb +123 -0
  54. data/lib/pdf_paradise/images/PDF_PARADISE_LOGO.png +0 -0
  55. data/lib/pdf_paradise/main_pdf/main_pdf.rb +444 -0
  56. data/lib/pdf_paradise/merge_pdf/menu.rb +63 -0
  57. data/lib/pdf_paradise/merge_pdf/merge_pdf.rb +306 -0
  58. data/lib/pdf_paradise/merge_pdf_namespace.rb +9 -0
  59. data/lib/pdf_paradise/merge_then_open/merge_then_open.rb +105 -0
  60. data/lib/pdf_paradise/pdf_file_n_total_pages.rb +249 -0
  61. data/lib/pdf_paradise/prawn_addons/README.md +2 -0
  62. data/lib/pdf_paradise/prawn_addons/prawn_addons.rb +17 -0
  63. data/lib/pdf_paradise/project/project.rb +22 -0
  64. data/lib/pdf_paradise/remove_pdf_password.rb +391 -0
  65. data/lib/pdf_paradise/requires/batch_require_toplevel_files.rb +22 -0
  66. data/lib/pdf_paradise/requires/colours.rb +7 -0
  67. data/lib/pdf_paradise/requires/colours_and_esystem_and_save_file_and_fileutils_and_opn.rb +11 -0
  68. data/lib/pdf_paradise/requires/esystem_and_colours.rb +10 -0
  69. data/lib/pdf_paradise/requires/esystem_and_opn_and_colours.rb +8 -0
  70. data/lib/pdf_paradise/requires/require_the_whole_project.rb +28 -0
  71. data/lib/pdf_paradise/requires/require_utility_scripts.rb +9 -0
  72. data/lib/pdf_paradise/set_main_book.rb +156 -0
  73. data/lib/pdf_paradise/set_pdf_title.rb +220 -0
  74. data/lib/pdf_paradise/sinatra/embeddable_interface.rb +318 -0
  75. data/lib/pdf_paradise/toplevel_methods/automatic_pdf_title.rb +55 -0
  76. data/lib/pdf_paradise/toplevel_methods/convert_epub_to_pdf.rb +27 -0
  77. data/lib/pdf_paradise/toplevel_methods/convert_markdown_to_pdf.rb +45 -0
  78. data/lib/pdf_paradise/toplevel_methods/convert_ppt_to_pdf.rb +35 -0
  79. data/lib/pdf_paradise/toplevel_methods/e.rb +16 -0
  80. data/lib/pdf_paradise/toplevel_methods/esystem.rb +19 -0
  81. data/lib/pdf_paradise/toplevel_methods/misc.rb +76 -0
  82. data/lib/pdf_paradise/toplevel_methods/number_pages.rb +38 -0
  83. data/lib/pdf_paradise/toplevel_methods/opened_pdf_files.rb +221 -0
  84. data/lib/pdf_paradise/toplevel_methods/query_pdf_title.rb +191 -0
  85. data/lib/pdf_paradise/toplevel_methods/reduce_size_of_this_pdf_file.rb +46 -0
  86. data/lib/pdf_paradise/toplevel_methods/roebe.rb +17 -0
  87. data/lib/pdf_paradise/toplevel_methods/rotate_pdf_file.rb +143 -0
  88. data/lib/pdf_paradise/toplevel_methods/to_pdf.rb +38 -0
  89. data/lib/pdf_paradise/utility_scripts/README.md +3 -0
  90. data/lib/pdf_paradise/utility_scripts/combine_these_pdf_pages.rb +118 -0
  91. data/lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb +175 -0
  92. data/lib/pdf_paradise/utility_scripts/delete_first_page_of_this_pdf_file.rb +221 -0
  93. data/lib/pdf_paradise/utility_scripts/delete_last_page_of_this_pdf_file.rb +180 -0
  94. data/lib/pdf_paradise/utility_scripts/delete_this_page_of_this_pdf_file.rb +329 -0
  95. data/lib/pdf_paradise/utility_scripts/extract_all_images_from_this_pdf_file.rb +129 -0
  96. data/lib/pdf_paradise/utility_scripts/extract_pdf_page.rb +283 -0
  97. data/lib/pdf_paradise/utility_scripts/pdf_optimizer.rb +111 -0
  98. data/lib/pdf_paradise/utility_scripts/pdf_statistics.rb +148 -0
  99. data/lib/pdf_paradise/utility_scripts/pdf_to_html.rb +75 -0
  100. data/lib/pdf_paradise/utility_scripts/remove_images.rb +110 -0
  101. data/lib/pdf_paradise/utility_scripts/split_pdf.rb +340 -0
  102. data/lib/pdf_paradise/utility_scripts/to_qdf.rb +82 -0
  103. data/lib/pdf_paradise/version/version.rb +19 -0
  104. data/lib/pdf_paradise/www/README.md +2 -0
  105. data/lib/pdf_paradise/www/sinatra/app.rb +276 -0
  106. data/lib/pdf_paradise/yaml/working_on_these_pdf_files.yml +4 -0
  107. data/lib/pdf_paradise.rb +5 -0
  108. data/pdf_paradise.gemspec +61 -0
  109. data/test/testing_pdf_paradise.rb +9 -0
  110. metadata +219 -0
@@ -0,0 +1,283 @@
1
+ #!/usr/bin/ruby -w
2
+ # Encoding: UTF-8
3
+ # frozen_string_literal: true
4
+ # =========================================================================== #
5
+ # === PdfParadise::ExtractPdfPage
6
+ #
7
+ # Use this class to extract one or more pdf pages from a given .pdf file.
8
+ #
9
+ # Three arguments can be passed to this class:
10
+ #
11
+ # The first argument is the first page of the range to extract
12
+ # The second argument is the last page of the range to extract
13
+ # The third argument is the given input file (the .pdf file)
14
+ #
15
+ # Specific invocation examples:
16
+ #
17
+ # pdfpextr START_PAGE.pdf 5 19
18
+ # pdfpextr inputfile.pdf 22 36
19
+ # ext_pdf foo.pdf 1 100
20
+ #
21
+ # Usage examples:
22
+ # require 'extract_pdf_page'
23
+ # ExtractPdfPage.new
24
+ # ExtractPdfPage.new :save_here => '/Depot/Temp/test.pdf'
25
+ #
26
+ # Usage examples from the commandline:
27
+ # epdf foo.pdf 5
28
+ # epdf foo.pdf 5 10
29
+ # =========================================================================== #
30
+ # require 'extract_pdf_page.rb'
31
+ # =========================================================================== #
32
+ require 'pdf_paradise/requires/esystem_and_opn_and_colours.rb'
33
+ require 'pdf_paradise/base/base.rb'
34
+
35
+ module PdfParadise
36
+
37
+ class ExtractPdfPage < PdfParadise::Base # === PdfParadise::ExtractPdfPage
38
+
39
+ require 'pdf_paradise/pdf_file_n_total_pages.rb'
40
+
41
+ # ========================================================================= #
42
+ # === NAMESPACE
43
+ # ========================================================================= #
44
+ NAMESPACE = inspect
45
+
46
+ # ========================================================================= #
47
+ # === DEFAULT_START_PAGE
48
+ # ========================================================================= #
49
+ DEFAULT_START_PAGE = '1'
50
+
51
+ # ========================================================================= #
52
+ # === DEFAULT_END_PAGE
53
+ #
54
+ # This number can be modified automatically during runtime.
55
+ # ========================================================================= #
56
+ DEFAULT_END_PAGE = '10'
57
+
58
+ # ========================================================================= #
59
+ # === DEFAULT_EXTRACT_N_PDF_FILES
60
+ # ========================================================================= #
61
+ DEFAULT_EXTRACT_N_PDF_FILES = 1
62
+
63
+ # ========================================================================= #
64
+ # === initialize
65
+ #
66
+ # We accept three arguments:
67
+ #
68
+ # (1) the name of the .pdf file
69
+ # (2) the start page
70
+ # (3) the end page
71
+ #
72
+ # ========================================================================= #
73
+ def initialize(
74
+ mandatory_name_of_pdf_file = nil,
75
+ optional_start_page = DEFAULT_START_PAGE,
76
+ extract_n_pdf_files = DEFAULT_EXTRACT_N_PDF_FILES,
77
+ run_already = true
78
+ )
79
+ reset
80
+ check_against_menu(mandatory_name_of_pdf_file)
81
+ if mandatory_name_of_pdf_file.is_a? Hash
82
+ if mandatory_name_of_pdf_file.has_key? :dont_run_yet
83
+ run_already = !mandatory_name_of_pdf_file.delete(:dont_run_yet)
84
+ end
85
+ end
86
+ set_name_of_pdf_file(mandatory_name_of_pdf_file) # Must come before set_extract_n_pdf_files()
87
+ unless mandatory_name_of_pdf_file.is_a? Hash
88
+ set_start_page(optional_start_page)
89
+ set_extract_n_pdf_files(extract_n_pdf_files)
90
+ end
91
+ run if run_already
92
+ end
93
+
94
+ # ========================================================================= #
95
+ # === reset (reset tag)
96
+ # ========================================================================= #
97
+ def reset
98
+ super()
99
+ end
100
+
101
+ # ========================================================================= #
102
+ # === set_name_of_pdf_file
103
+ #
104
+ # The input can also be a Hash.
105
+ # ========================================================================= #
106
+ def set_name_of_pdf_file(i)
107
+ if i.is_a? Hash
108
+ if i.has_key? :start_page
109
+ set_start_page(i.delete(:start_page))
110
+ end
111
+ if i.has_key? :end_page
112
+ set_end_page(i.delete(:end_page))
113
+ end
114
+ if i.has_key? :pdf_file_to_use
115
+ i = i.delete(:pdf_file_to_use)
116
+ end
117
+ else
118
+ i = i.to_s
119
+ end
120
+ unless File.exist? i
121
+ opnn; e 'Warning - no file at `'+sfile(i.to_s)+'` could be found.'
122
+ opnn; e 'Thus we can not extract anything. Exiting now'
123
+ exit
124
+ end
125
+ @name_of_pdf_file = i
126
+ end; alias use_this_input_file set_name_of_pdf_file # === use_this_input_file
127
+
128
+ # ========================================================================= #
129
+ # === set_start_page
130
+ # ========================================================================= #
131
+ def set_start_page(
132
+ i = DEFAULT_START_PAGE
133
+ )
134
+ i = DEFAULT_START_PAGE if i.nil?
135
+ if i.is_a? Hash
136
+ if i.has_key? :save_here
137
+ set_save_here(i.delete(:save_here))
138
+ end
139
+ end
140
+ i = i.to_s
141
+ if File.exist?(i) and i.include? '.pdf'
142
+ use_this_input_file(i)
143
+ i = DEFAULT_START_PAGE
144
+ end
145
+ i = i.to_i
146
+ @start_page = i
147
+ end
148
+
149
+ # ========================================================================= #
150
+ # === report_to_the_user_where_we_stored_the_new_pdf_file
151
+ # ========================================================================= #
152
+ def report_to_the_user_where_we_stored_the_new_pdf_file
153
+ _ = output_file?
154
+ if File.exist? _
155
+ opnn; e 'Finished storing at `'+sfile(_)+'`.'
156
+ end
157
+ end
158
+
159
+ # ========================================================================= #
160
+ # === check_against_menu (menu tag)
161
+ # ========================================================================= #
162
+ def check_against_menu(i = nil)
163
+ case i
164
+ when '--help','HELP'
165
+ e 'Usage example:'
166
+ e ' ext_pdf foo.pdf 1 100'
167
+ exit
168
+ end
169
+ end
170
+
171
+ # ========================================================================= #
172
+ # === start_page?
173
+ # ========================================================================= #
174
+ def start_page?
175
+ @start_page.to_s
176
+ end; alias which_page? start_page? # === which_page?
177
+
178
+ # ========================================================================= #
179
+ # === calculate_last_page
180
+ #
181
+ # This will calculate which page must be the last.
182
+ # ========================================================================= #
183
+ def calculate_last_page
184
+ return start_page?.to_i + (@extract_n_pdf_files.to_i - 1)
185
+ end
186
+
187
+ # ========================================================================= #
188
+ # === difference?
189
+ # ========================================================================= #
190
+ def difference?
191
+ return (calculate_last_page - start_page?.to_i)
192
+ end; alias n_times difference? # === n_times
193
+
194
+ # ========================================================================= #
195
+ # === output_file?
196
+ # ========================================================================= #
197
+ def output_file?
198
+ @output_file
199
+ end
200
+
201
+ # ========================================================================= #
202
+ # === report_to_the_user_how_many_pages_we_extracted
203
+ # ========================================================================= #
204
+ def report_to_the_user_how_many_pages_we_extracted
205
+ n_pdf_pages = @extract_n_pdf_files.to_i
206
+ if File.exist? input_file?
207
+ opnn; e 'We extracted '+sfancy(n_pdf_pages.to_s)+' pdf pages.'
208
+ end
209
+ end
210
+
211
+ # ========================================================================= #
212
+ # === report_to_the_user
213
+ # ========================================================================= #
214
+ def report_to_the_user
215
+ report_to_the_user_how_many_pages_we_extracted
216
+ report_to_the_user_where_we_stored_the_new_pdf_file
217
+ end
218
+
219
+ # ========================================================================= #
220
+ # === name_of_pdf_file?
221
+ # ========================================================================= #
222
+ def name_of_pdf_file?
223
+ @name_of_pdf_file
224
+ end; alias input_file? name_of_pdf_file? # === input_file?
225
+ alias name_of_the_pdf_file? name_of_pdf_file? # === name_of_the_pdf_file?
226
+
227
+ # ========================================================================= #
228
+ # === run_verbose_system_command
229
+ # ========================================================================= #
230
+ def run_verbose_system_command
231
+ n_times.times.each {|index|
232
+ start = start_page?.to_i+index.to_i
233
+ _ = ''.dup
234
+ _ << 'gs -sDEVICE=pdfwrite -dNOPAUSE -dBATCH -dSAFER'
235
+ _ << ' -dFirstPage='+start.to_s
236
+ _ << ' -dLastPage='+( start.to_i ).to_s
237
+ _ << ' -sOutputFile='+start.to_s+'_'+output_file?
238
+ _ << ' '+name_of_pdf_file?.to_s
239
+ esystem _
240
+ }
241
+ end
242
+
243
+ # ========================================================================= #
244
+ # === determine_output_file
245
+ # ========================================================================= #
246
+ def determine_output_file
247
+ # @output_file = 'extracted_page_'+which_page?+'_from_file_'+name_of_pdf_file?
248
+ @output_file = 'extracted_page_from_file_'+name_of_pdf_file?
249
+ end
250
+
251
+ # ========================================================================= #
252
+ # === set_end_page
253
+ # ========================================================================= #
254
+ def set_extract_n_pdf_files(i = DEFAULT_EXTRACT_N_PDF_FILES)
255
+ i = DEFAULT_EXTRACT_N_PDF_FILES if i.nil?
256
+ i = i.to_i
257
+ if i > ::PdfParadise.n_pdf_pages?(name_of_the_pdf_file?)
258
+ i = ::PdfParadise.n_pdf_pages?(name_of_the_pdf_file?)
259
+ end
260
+ @extract_n_pdf_files = i
261
+ end; alias set_end_page set_extract_n_pdf_files # === set_end_page
262
+
263
+ # ========================================================================= #
264
+ # === opnn
265
+ # ========================================================================= #
266
+ def opnn
267
+ super(NAMESPACE)
268
+ end
269
+
270
+ # ========================================================================= #
271
+ # === run (run tag)
272
+ # ========================================================================= #
273
+ def run
274
+ determine_output_file
275
+ run_verbose_system_command
276
+ report_to_the_user
277
+ end
278
+
279
+ end; end
280
+
281
+ if __FILE__ == $PROGRAM_NAME
282
+ PdfParadise::ExtractPdfPage.new(ARGV[0], ARGV[1], ARGV[2])
283
+ end # epdf
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/ruby -w
2
+ # Encoding: UTF-8
3
+ # frozen_string_literal: false
4
+ # =========================================================================== #
5
+ # === PdfParadise::PdfOptimizer
6
+ #
7
+ # /screen selects low-resolution output similar to the Acrobat Distiller
8
+ # "Screen Optimized" setting.
9
+ # /ebook selects medium-resolution output similar to the Acrobat Distiller
10
+ # "eBook" setting.
11
+ # /printer selects output similar to the Acrobat Distiller "Print Optimized"
12
+ # setting.
13
+ # /prepress selects output similar to Acrobat Distiller "Prepress Optimized"
14
+ # setting.
15
+ # /default selects output intended to be useful across a wide variety of
16
+ # uses, possibly at the expense of a larger output file.
17
+ #
18
+ # Usage example:
19
+ #
20
+ # PdfParadise::PdfOptimizer.new(ARGV)
21
+ #
22
+ # =========================================================================== #
23
+ # require 'pdf_paradise/utility_scripts/pdf_optimizer.rb'
24
+ # =========================================================================== #
25
+ require 'pdf_paradise/base/base.rb'
26
+
27
+ module PdfParadise
28
+
29
+ class PdfOptimizer < ::PdfParadise::Base # === PdfParadise::PdfOptimizer
30
+
31
+ # ========================================================================= #
32
+ # === initialize
33
+ # ========================================================================= #
34
+ def initialize(
35
+ i = nil,
36
+ run_already = true
37
+ )
38
+ reset
39
+ set_input(i)
40
+ run if run_already
41
+ end
42
+
43
+ # ========================================================================= #
44
+ # === reset (reset tag)
45
+ # ========================================================================= #
46
+ def reset
47
+ end
48
+
49
+ # ========================================================================= #
50
+ # === set_input
51
+ # ========================================================================= #
52
+ def set_input(i = '')
53
+ i = i.first if i.is_a? Array
54
+ i = i.to_s.dup
55
+ @input = i
56
+ end
57
+
58
+ # ========================================================================= #
59
+ # === input?
60
+ # ========================================================================= #
61
+ def input?
62
+ @input
63
+ end; alias input_file input? # === input_file
64
+ alias input_file? input? # === input_file?
65
+
66
+ # ========================================================================= #
67
+ # === output_file?
68
+ # ========================================================================= #
69
+ def output_file?
70
+ 'output.pdf'
71
+ end; alias output_pdf? output_file? # === output_pdf?
72
+
73
+ # ========================================================================= #
74
+ # === use_ghostscript?
75
+ # ========================================================================= #
76
+ def use_ghostscript?
77
+ true # For now this is hardcoded.
78
+ end
79
+
80
+ # ========================================================================= #
81
+ # === output_ps
82
+ # ========================================================================= #
83
+ def output_ps
84
+ 'output.ps'
85
+ end
86
+
87
+ # ========================================================================= #
88
+ # === run (run tag)
89
+ # ========================================================================= #
90
+ def run
91
+ # ======================================================================= #
92
+ # We have two different solutions.
93
+ # ======================================================================= #
94
+ if use_ghostscript?
95
+ _ = 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen '\
96
+ '-dNOPAUSE -dQUIET -dBATCH -sOutputFile='+
97
+ output_file?+' '+
98
+ input_file?
99
+ else
100
+ _ = 'pdf2ps '+input?+' '+output_ps
101
+ esystem _
102
+ _ = 'ps2pdf '+output_ps+' '+output_pdf?
103
+ end
104
+ esystem _
105
+ end
106
+
107
+ end; end
108
+
109
+ if __FILE__ == $PROGRAM_NAME
110
+ PdfParadise::PdfOptimizer.new(ARGV)
111
+ end # pdfoptimizer
@@ -0,0 +1,148 @@
1
+ #!/usr/bin/ruby -w
2
+ # Encoding: UTF-8
3
+ # frozen_string_literal: true
4
+ # =========================================================================== #
5
+ # === PdfParadise::PdfStatistics
6
+ #
7
+ # This class will make use of PDF::Reader to provide us with some
8
+ # statistical information about a given .pdf file.
9
+ #
10
+ # You can also ouput information from a specific page.
11
+ #
12
+ # PDF is a page based file format, so most visible information
13
+ # is available via page-based iteration
14
+ #
15
+ # reader = PDF::Reader.new("somefile.pdf")
16
+ #
17
+ # reader.pages.each { |page|
18
+ # puts page.fonts
19
+ # puts page.text
20
+ # puts page.raw_content
21
+ # }
22
+ #
23
+ # Usage example:
24
+ #
25
+ # PdfParadise::PdfStatistics.new(ARGV)
26
+ #
27
+ # =========================================================================== #
28
+ # require 'pdf_paradise/utility_scripts/pdf_statistics.rb'
29
+ # =========================================================================== #
30
+ require 'pdf_paradise/base/base.rb'
31
+
32
+ module PdfParadise
33
+
34
+ class PdfStatistics < ::PdfParadise::Base # === PdfParadise::PdfStatistics
35
+
36
+ # ========================================================================= #
37
+ # === initialize
38
+ # ========================================================================= #
39
+ def initialize(
40
+ i = nil,
41
+ run_already = true
42
+ )
43
+ require_pdf_reader
44
+ reset
45
+ set_input(i)
46
+ run if run_already
47
+ end
48
+
49
+ # ========================================================================= #
50
+ # === reset (reset tag)
51
+ # ========================================================================= #
52
+ def reset
53
+ end
54
+
55
+ # ========================================================================= #
56
+ # === require_pdf_reader
57
+ # ========================================================================= #
58
+ def require_pdf_reader
59
+ old_verbose = $VERBOSE
60
+ $VERBOSE = nil
61
+ require 'pdf-reader'
62
+ $VERBOSE = old_verbose # Restore it again here.
63
+ end
64
+
65
+ # ========================================================================= #
66
+ # === set_input
67
+ # ========================================================================= #
68
+ def set_input(i = '')
69
+ i = i.first if i.is_a? Array
70
+ i = i.to_s.dup
71
+ @input = i
72
+ end
73
+
74
+ # ========================================================================= #
75
+ # === input?
76
+ # ========================================================================= #
77
+ def input?
78
+ @input
79
+ end
80
+
81
+ # ========================================================================= #
82
+ # === instantiate_reader_object
83
+ # ========================================================================= #
84
+ def instantiate_reader_object
85
+ @reader = PDF::Reader.new(input?)
86
+ end
87
+
88
+ # ========================================================================= #
89
+ # === show_extended_info
90
+ #
91
+ # This will tap into the .info method.
92
+ # ========================================================================= #
93
+ def show_extended_info
94
+ hash = @reader.info
95
+ ljust = 32
96
+ if hash.has_key? :Title
97
+ e 'The title of this .pdf is: '.ljust(ljust)+simp(hash[:Title])
98
+ end
99
+ if hash.has_key? :CreationDate
100
+ e 'This .pdf was created at: '.ljust(ljust)+simp(hash[:CreationDate])
101
+ end
102
+ if hash.has_key? :Author
103
+ e 'The author of this .pdf is: '.ljust(ljust)+simp(hash[:Author])
104
+ end
105
+ if hash.has_key? :Producer
106
+ e 'It was produced via: '.ljust(ljust)+simp(hash[:Producer])
107
+ end
108
+ if hash.has_key? :ModDate
109
+ e 'It was last modified at: '.ljust(ljust)+simp(hash[:ModDate])
110
+ end
111
+ if hash.has_key? :Creator
112
+ e 'It was created via: '.ljust(ljust)+simp(hash[:Creator])
113
+ end
114
+ end
115
+
116
+ # ========================================================================= #
117
+ # === report_n_pages_in_the_pdf_document
118
+ # ========================================================================= #
119
+ def report_n_pages_in_the_pdf_document
120
+ e 'n pages in this .pdf document: '+simp(@reader.page_count)
121
+ end
122
+
123
+ # ========================================================================= #
124
+ # === run (run tag)
125
+ # ========================================================================= #
126
+ def run
127
+ instantiate_reader_object
128
+ cliner
129
+ e rev+
130
+ 'The PDF version for this .pdf file was: '+simp(@reader.pdf_version)
131
+ show_extended_info
132
+ # e @reader.metadata # <- This is how to obtain the metadata information.
133
+ report_n_pages_in_the_pdf_document
134
+ cliner
135
+ end
136
+
137
+ # ========================================================================= #
138
+ # === PdfParadise::PdfStatistics[]
139
+ # ========================================================================= #
140
+ def self.[](i = '')
141
+ new(i)
142
+ end
143
+
144
+ end; end
145
+
146
+ if __FILE__ == $PROGRAM_NAME
147
+ PdfParadise::PdfStatistics.new(ARGV)
148
+ end # pdfstatistics /Depot/PDF/foobar.pdf
@@ -0,0 +1,75 @@
1
+ #!/usr/bin/ruby -w
2
+ # Encoding: UTF-8
3
+ # frozen_string_literal: true
4
+ # =========================================================================== #
5
+ # === PdfParadise::PdfToHtml
6
+ #
7
+ # This class will convert a .pdf file to .html, based on pdftohtml from
8
+ # poppler.
9
+ #
10
+ # Usage examples:
11
+ #
12
+ # PdfParadise::PdfToHtml.new(ARGV)
13
+ #
14
+ # =========================================================================== #
15
+ # require 'pdf_paradise/utility_scripts/pdf_to_html.rb'
16
+ # PdfParadise::PdfToHtml.new(ARGV)
17
+ # =========================================================================== #
18
+ require 'pdf_paradise/base/base.rb'
19
+
20
+ module PdfParadise
21
+
22
+ class PdfToHtml < ::PdfParadise::Base # === PdfParadise::PdfToHtml
23
+
24
+ # ========================================================================= #
25
+ # === NAMESPACE
26
+ # ========================================================================= #
27
+ NAMESPACE = inspect
28
+
29
+ # ========================================================================= #
30
+ # === initialize
31
+ # ========================================================================= #
32
+ def initialize(
33
+ i = ARGV,
34
+ run_already = true
35
+ )
36
+ reset
37
+ set_commandline_arguments(i)
38
+ run if run_already
39
+ end
40
+
41
+ # ========================================================================= #
42
+ # === reset
43
+ # ========================================================================= #
44
+ def reset
45
+ super()
46
+ end
47
+
48
+ # ========================================================================= #
49
+ # === opnn
50
+ # ========================================================================= #
51
+ def opnn
52
+ super(NAMESPACE)
53
+ end
54
+
55
+ # ========================================================================= #
56
+ # === run (run tag)
57
+ # ========================================================================= #
58
+ def run
59
+ commandline_arguments?.each {|entry|
60
+ # ===================================================================== #
61
+ # The -s option is used to "generate single HTML that includes all
62
+ # pages".
63
+ # ===================================================================== #
64
+ _ = 'pdftohtml -s '+entry+' '+
65
+ File.basename(entry+'_to_html')+
66
+ '.html'
67
+ esystem _
68
+ }
69
+ end
70
+
71
+ end; end
72
+
73
+ if __FILE__ == $PROGRAM_NAME
74
+ PdfParadise::PdfToHtml.new(ARGV)
75
+ end # pdf_to_html foobar.pdf