pdf_paradise 0.1.66
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pdf_paradise might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/README.md +705 -0
- data/bin/automatic_pdf_title +7 -0
- data/bin/combine_these_pdf_pages +7 -0
- data/bin/compress_via_hexapdf +7 -0
- data/bin/convert_markdown_to_pdf +7 -0
- data/bin/convert_pdf_to_text +7 -0
- data/bin/delete_first_page_of_this_pdf_file +7 -0
- data/bin/merge_then_open +7 -0
- data/bin/n_pages +10 -0
- data/bin/open_main_pdf +7 -0
- data/bin/pdf_paradise +9 -0
- data/bin/set_main_book +7 -0
- data/bin/set_title_of_this_pdf_file +15 -0
- data/doc/README.gen +662 -0
- data/doc/todo/todo.md +7 -0
- data/lib/pdf_paradise/base/base.rb +239 -0
- data/lib/pdf_paradise/base/colours.rb +36 -0
- data/lib/pdf_paradise/commandline/commandline.rb +101 -0
- data/lib/pdf_paradise/commandline/help.rb +73 -0
- data/lib/pdf_paradise/commandline/menu.rb +142 -0
- data/lib/pdf_paradise/compress/compress_via_hexapdf.rb +27 -0
- data/lib/pdf_paradise/compress_this_pdf_file.rb +87 -0
- data/lib/pdf_paradise/constants/constants.rb +76 -0
- data/lib/pdf_paradise/convert_text_to_pdf.rb +94 -0
- data/lib/pdf_paradise/css/project.css +17 -0
- data/lib/pdf_paradise/djvu_to_pdf.rb +85 -0
- data/lib/pdf_paradise/gui/README.md +6 -0
- data/lib/pdf_paradise/gui/fox/split_pdf_file.rb +77 -0
- data/lib/pdf_paradise/gui/gtk2/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +39 -0
- data/lib/pdf_paradise/gui/gtk2/pdf_viewer/pdf_viewer.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/split_pdf_file/split_pdf_file.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/statistics_widget/statistics_widget.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/to_pdf/to_pdf.rb +32 -0
- data/lib/pdf_paradise/gui/gtk3/controller/controller.rb +212 -0
- data/lib/pdf_paradise/gui/gtk3/convert_pdf_to_text/convert_pdf_to_text.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +39 -0
- data/lib/pdf_paradise/gui/gtk3/pdf_viewer/pdf_viewer.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/split_pdf_file/split_pdf_file.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/statistics_widget/statistics_widget.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/to_pdf/to_pdf.rb +32 -0
- data/lib/pdf_paradise/gui/libui/extract_all_images_from_this_pdf_file/extract_all_images_from_this_pdf_file.rb +223 -0
- data/lib/pdf_paradise/gui/libui/statistics_widget/statistics_widget.rb +233 -0
- data/lib/pdf_paradise/gui/shared_code/convert_pdf_to_text/convert_pdf_to_text_module.rb +277 -0
- data/lib/pdf_paradise/gui/shared_code/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file_module.rb +443 -0
- data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer.css +5 -0
- data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer_module.rb +284 -0
- data/lib/pdf_paradise/gui/shared_code/split_pdf_file/split_pdf_file.css +0 -0
- data/lib/pdf_paradise/gui/shared_code/split_pdf_file/split_pdf_file_module.rb +294 -0
- data/lib/pdf_paradise/gui/shared_code/statistics_widget/statistics_widget_module.rb +349 -0
- data/lib/pdf_paradise/gui/shared_code/to_pdf/to_pdf_module.rb +281 -0
- data/lib/pdf_paradise/hexapdf/001_rainbow_pattern_example.rb +0 -0
- data/lib/pdf_paradise/hexapdf/hexapdf.rb +123 -0
- data/lib/pdf_paradise/images/PDF_PARADISE_LOGO.png +0 -0
- data/lib/pdf_paradise/main_pdf/main_pdf.rb +444 -0
- data/lib/pdf_paradise/merge_pdf/menu.rb +63 -0
- data/lib/pdf_paradise/merge_pdf/merge_pdf.rb +306 -0
- data/lib/pdf_paradise/merge_pdf_namespace.rb +9 -0
- data/lib/pdf_paradise/merge_then_open/merge_then_open.rb +105 -0
- data/lib/pdf_paradise/pdf_file_n_total_pages.rb +249 -0
- data/lib/pdf_paradise/prawn_addons/README.md +2 -0
- data/lib/pdf_paradise/prawn_addons/prawn_addons.rb +17 -0
- data/lib/pdf_paradise/project/project.rb +22 -0
- data/lib/pdf_paradise/remove_pdf_password.rb +391 -0
- data/lib/pdf_paradise/requires/batch_require_toplevel_files.rb +22 -0
- data/lib/pdf_paradise/requires/colours.rb +7 -0
- data/lib/pdf_paradise/requires/colours_and_esystem_and_save_file_and_fileutils_and_opn.rb +11 -0
- data/lib/pdf_paradise/requires/esystem_and_colours.rb +10 -0
- data/lib/pdf_paradise/requires/esystem_and_opn_and_colours.rb +8 -0
- data/lib/pdf_paradise/requires/require_the_whole_project.rb +28 -0
- data/lib/pdf_paradise/requires/require_utility_scripts.rb +9 -0
- data/lib/pdf_paradise/set_main_book.rb +156 -0
- data/lib/pdf_paradise/set_pdf_title.rb +220 -0
- data/lib/pdf_paradise/sinatra/embeddable_interface.rb +318 -0
- data/lib/pdf_paradise/toplevel_methods/automatic_pdf_title.rb +55 -0
- data/lib/pdf_paradise/toplevel_methods/convert_epub_to_pdf.rb +27 -0
- data/lib/pdf_paradise/toplevel_methods/convert_markdown_to_pdf.rb +45 -0
- data/lib/pdf_paradise/toplevel_methods/convert_ppt_to_pdf.rb +35 -0
- data/lib/pdf_paradise/toplevel_methods/e.rb +16 -0
- data/lib/pdf_paradise/toplevel_methods/esystem.rb +19 -0
- data/lib/pdf_paradise/toplevel_methods/misc.rb +76 -0
- data/lib/pdf_paradise/toplevel_methods/number_pages.rb +38 -0
- data/lib/pdf_paradise/toplevel_methods/opened_pdf_files.rb +221 -0
- data/lib/pdf_paradise/toplevel_methods/query_pdf_title.rb +191 -0
- data/lib/pdf_paradise/toplevel_methods/reduce_size_of_this_pdf_file.rb +46 -0
- data/lib/pdf_paradise/toplevel_methods/roebe.rb +17 -0
- data/lib/pdf_paradise/toplevel_methods/rotate_pdf_file.rb +143 -0
- data/lib/pdf_paradise/toplevel_methods/to_pdf.rb +38 -0
- data/lib/pdf_paradise/utility_scripts/README.md +3 -0
- data/lib/pdf_paradise/utility_scripts/combine_these_pdf_pages.rb +118 -0
- data/lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb +175 -0
- data/lib/pdf_paradise/utility_scripts/delete_first_page_of_this_pdf_file.rb +221 -0
- data/lib/pdf_paradise/utility_scripts/delete_last_page_of_this_pdf_file.rb +180 -0
- data/lib/pdf_paradise/utility_scripts/delete_this_page_of_this_pdf_file.rb +329 -0
- data/lib/pdf_paradise/utility_scripts/extract_all_images_from_this_pdf_file.rb +129 -0
- data/lib/pdf_paradise/utility_scripts/extract_pdf_page.rb +283 -0
- data/lib/pdf_paradise/utility_scripts/pdf_optimizer.rb +111 -0
- data/lib/pdf_paradise/utility_scripts/pdf_statistics.rb +148 -0
- data/lib/pdf_paradise/utility_scripts/pdf_to_html.rb +75 -0
- data/lib/pdf_paradise/utility_scripts/remove_images.rb +110 -0
- data/lib/pdf_paradise/utility_scripts/split_pdf.rb +340 -0
- data/lib/pdf_paradise/utility_scripts/to_qdf.rb +82 -0
- data/lib/pdf_paradise/version/version.rb +19 -0
- data/lib/pdf_paradise/www/README.md +2 -0
- data/lib/pdf_paradise/www/sinatra/app.rb +276 -0
- data/lib/pdf_paradise/yaml/working_on_these_pdf_files.yml +4 -0
- data/lib/pdf_paradise.rb +5 -0
- data/pdf_paradise.gemspec +61 -0
- data/test/testing_pdf_paradise.rb +9 -0
- metadata +219 -0
@@ -0,0 +1,283 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# === PdfParadise::ExtractPdfPage
|
6
|
+
#
|
7
|
+
# Use this class to extract one or more pdf pages from a given .pdf file.
|
8
|
+
#
|
9
|
+
# Three arguments can be passed to this class:
|
10
|
+
#
|
11
|
+
# The first argument is the first page of the range to extract
|
12
|
+
# The second argument is the last page of the range to extract
|
13
|
+
# The third argument is the given input file (the .pdf file)
|
14
|
+
#
|
15
|
+
# Specific invocation examples:
|
16
|
+
#
|
17
|
+
# pdfpextr START_PAGE.pdf 5 19
|
18
|
+
# pdfpextr inputfile.pdf 22 36
|
19
|
+
# ext_pdf foo.pdf 1 100
|
20
|
+
#
|
21
|
+
# Usage examples:
|
22
|
+
# require 'extract_pdf_page'
|
23
|
+
# ExtractPdfPage.new
|
24
|
+
# ExtractPdfPage.new :save_here => '/Depot/Temp/test.pdf'
|
25
|
+
#
|
26
|
+
# Usage examples from the commandline:
|
27
|
+
# epdf foo.pdf 5
|
28
|
+
# epdf foo.pdf 5 10
|
29
|
+
# =========================================================================== #
|
30
|
+
# require 'extract_pdf_page.rb'
|
31
|
+
# =========================================================================== #
|
32
|
+
require 'pdf_paradise/requires/esystem_and_opn_and_colours.rb'
|
33
|
+
require 'pdf_paradise/base/base.rb'
|
34
|
+
|
35
|
+
module PdfParadise
|
36
|
+
|
37
|
+
class ExtractPdfPage < PdfParadise::Base # === PdfParadise::ExtractPdfPage
|
38
|
+
|
39
|
+
require 'pdf_paradise/pdf_file_n_total_pages.rb'
|
40
|
+
|
41
|
+
# ========================================================================= #
|
42
|
+
# === NAMESPACE
|
43
|
+
# ========================================================================= #
|
44
|
+
NAMESPACE = inspect
|
45
|
+
|
46
|
+
# ========================================================================= #
|
47
|
+
# === DEFAULT_START_PAGE
|
48
|
+
# ========================================================================= #
|
49
|
+
DEFAULT_START_PAGE = '1'
|
50
|
+
|
51
|
+
# ========================================================================= #
|
52
|
+
# === DEFAULT_END_PAGE
|
53
|
+
#
|
54
|
+
# This number can be modified automatically during runtime.
|
55
|
+
# ========================================================================= #
|
56
|
+
DEFAULT_END_PAGE = '10'
|
57
|
+
|
58
|
+
# ========================================================================= #
|
59
|
+
# === DEFAULT_EXTRACT_N_PDF_FILES
|
60
|
+
# ========================================================================= #
|
61
|
+
DEFAULT_EXTRACT_N_PDF_FILES = 1
|
62
|
+
|
63
|
+
# ========================================================================= #
|
64
|
+
# === initialize
|
65
|
+
#
|
66
|
+
# We accept three arguments:
|
67
|
+
#
|
68
|
+
# (1) the name of the .pdf file
|
69
|
+
# (2) the start page
|
70
|
+
# (3) the end page
|
71
|
+
#
|
72
|
+
# ========================================================================= #
|
73
|
+
def initialize(
|
74
|
+
mandatory_name_of_pdf_file = nil,
|
75
|
+
optional_start_page = DEFAULT_START_PAGE,
|
76
|
+
extract_n_pdf_files = DEFAULT_EXTRACT_N_PDF_FILES,
|
77
|
+
run_already = true
|
78
|
+
)
|
79
|
+
reset
|
80
|
+
check_against_menu(mandatory_name_of_pdf_file)
|
81
|
+
if mandatory_name_of_pdf_file.is_a? Hash
|
82
|
+
if mandatory_name_of_pdf_file.has_key? :dont_run_yet
|
83
|
+
run_already = !mandatory_name_of_pdf_file.delete(:dont_run_yet)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
set_name_of_pdf_file(mandatory_name_of_pdf_file) # Must come before set_extract_n_pdf_files()
|
87
|
+
unless mandatory_name_of_pdf_file.is_a? Hash
|
88
|
+
set_start_page(optional_start_page)
|
89
|
+
set_extract_n_pdf_files(extract_n_pdf_files)
|
90
|
+
end
|
91
|
+
run if run_already
|
92
|
+
end
|
93
|
+
|
94
|
+
# ========================================================================= #
|
95
|
+
# === reset (reset tag)
|
96
|
+
# ========================================================================= #
|
97
|
+
def reset
|
98
|
+
super()
|
99
|
+
end
|
100
|
+
|
101
|
+
# ========================================================================= #
|
102
|
+
# === set_name_of_pdf_file
|
103
|
+
#
|
104
|
+
# The input can also be a Hash.
|
105
|
+
# ========================================================================= #
|
106
|
+
def set_name_of_pdf_file(i)
|
107
|
+
if i.is_a? Hash
|
108
|
+
if i.has_key? :start_page
|
109
|
+
set_start_page(i.delete(:start_page))
|
110
|
+
end
|
111
|
+
if i.has_key? :end_page
|
112
|
+
set_end_page(i.delete(:end_page))
|
113
|
+
end
|
114
|
+
if i.has_key? :pdf_file_to_use
|
115
|
+
i = i.delete(:pdf_file_to_use)
|
116
|
+
end
|
117
|
+
else
|
118
|
+
i = i.to_s
|
119
|
+
end
|
120
|
+
unless File.exist? i
|
121
|
+
opnn; e 'Warning - no file at `'+sfile(i.to_s)+'` could be found.'
|
122
|
+
opnn; e 'Thus we can not extract anything. Exiting now'
|
123
|
+
exit
|
124
|
+
end
|
125
|
+
@name_of_pdf_file = i
|
126
|
+
end; alias use_this_input_file set_name_of_pdf_file # === use_this_input_file
|
127
|
+
|
128
|
+
# ========================================================================= #
|
129
|
+
# === set_start_page
|
130
|
+
# ========================================================================= #
|
131
|
+
def set_start_page(
|
132
|
+
i = DEFAULT_START_PAGE
|
133
|
+
)
|
134
|
+
i = DEFAULT_START_PAGE if i.nil?
|
135
|
+
if i.is_a? Hash
|
136
|
+
if i.has_key? :save_here
|
137
|
+
set_save_here(i.delete(:save_here))
|
138
|
+
end
|
139
|
+
end
|
140
|
+
i = i.to_s
|
141
|
+
if File.exist?(i) and i.include? '.pdf'
|
142
|
+
use_this_input_file(i)
|
143
|
+
i = DEFAULT_START_PAGE
|
144
|
+
end
|
145
|
+
i = i.to_i
|
146
|
+
@start_page = i
|
147
|
+
end
|
148
|
+
|
149
|
+
# ========================================================================= #
|
150
|
+
# === report_to_the_user_where_we_stored_the_new_pdf_file
|
151
|
+
# ========================================================================= #
|
152
|
+
def report_to_the_user_where_we_stored_the_new_pdf_file
|
153
|
+
_ = output_file?
|
154
|
+
if File.exist? _
|
155
|
+
opnn; e 'Finished storing at `'+sfile(_)+'`.'
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# ========================================================================= #
|
160
|
+
# === check_against_menu (menu tag)
|
161
|
+
# ========================================================================= #
|
162
|
+
def check_against_menu(i = nil)
|
163
|
+
case i
|
164
|
+
when '--help','HELP'
|
165
|
+
e 'Usage example:'
|
166
|
+
e ' ext_pdf foo.pdf 1 100'
|
167
|
+
exit
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
# ========================================================================= #
|
172
|
+
# === start_page?
|
173
|
+
# ========================================================================= #
|
174
|
+
def start_page?
|
175
|
+
@start_page.to_s
|
176
|
+
end; alias which_page? start_page? # === which_page?
|
177
|
+
|
178
|
+
# ========================================================================= #
|
179
|
+
# === calculate_last_page
|
180
|
+
#
|
181
|
+
# This will calculate which page must be the last.
|
182
|
+
# ========================================================================= #
|
183
|
+
def calculate_last_page
|
184
|
+
return start_page?.to_i + (@extract_n_pdf_files.to_i - 1)
|
185
|
+
end
|
186
|
+
|
187
|
+
# ========================================================================= #
|
188
|
+
# === difference?
|
189
|
+
# ========================================================================= #
|
190
|
+
def difference?
|
191
|
+
return (calculate_last_page - start_page?.to_i)
|
192
|
+
end; alias n_times difference? # === n_times
|
193
|
+
|
194
|
+
# ========================================================================= #
|
195
|
+
# === output_file?
|
196
|
+
# ========================================================================= #
|
197
|
+
def output_file?
|
198
|
+
@output_file
|
199
|
+
end
|
200
|
+
|
201
|
+
# ========================================================================= #
|
202
|
+
# === report_to_the_user_how_many_pages_we_extracted
|
203
|
+
# ========================================================================= #
|
204
|
+
def report_to_the_user_how_many_pages_we_extracted
|
205
|
+
n_pdf_pages = @extract_n_pdf_files.to_i
|
206
|
+
if File.exist? input_file?
|
207
|
+
opnn; e 'We extracted '+sfancy(n_pdf_pages.to_s)+' pdf pages.'
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
# ========================================================================= #
|
212
|
+
# === report_to_the_user
|
213
|
+
# ========================================================================= #
|
214
|
+
def report_to_the_user
|
215
|
+
report_to_the_user_how_many_pages_we_extracted
|
216
|
+
report_to_the_user_where_we_stored_the_new_pdf_file
|
217
|
+
end
|
218
|
+
|
219
|
+
# ========================================================================= #
|
220
|
+
# === name_of_pdf_file?
|
221
|
+
# ========================================================================= #
|
222
|
+
def name_of_pdf_file?
|
223
|
+
@name_of_pdf_file
|
224
|
+
end; alias input_file? name_of_pdf_file? # === input_file?
|
225
|
+
alias name_of_the_pdf_file? name_of_pdf_file? # === name_of_the_pdf_file?
|
226
|
+
|
227
|
+
# ========================================================================= #
|
228
|
+
# === run_verbose_system_command
|
229
|
+
# ========================================================================= #
|
230
|
+
def run_verbose_system_command
|
231
|
+
n_times.times.each {|index|
|
232
|
+
start = start_page?.to_i+index.to_i
|
233
|
+
_ = ''.dup
|
234
|
+
_ << 'gs -sDEVICE=pdfwrite -dNOPAUSE -dBATCH -dSAFER'
|
235
|
+
_ << ' -dFirstPage='+start.to_s
|
236
|
+
_ << ' -dLastPage='+( start.to_i ).to_s
|
237
|
+
_ << ' -sOutputFile='+start.to_s+'_'+output_file?
|
238
|
+
_ << ' '+name_of_pdf_file?.to_s
|
239
|
+
esystem _
|
240
|
+
}
|
241
|
+
end
|
242
|
+
|
243
|
+
# ========================================================================= #
|
244
|
+
# === determine_output_file
|
245
|
+
# ========================================================================= #
|
246
|
+
def determine_output_file
|
247
|
+
# @output_file = 'extracted_page_'+which_page?+'_from_file_'+name_of_pdf_file?
|
248
|
+
@output_file = 'extracted_page_from_file_'+name_of_pdf_file?
|
249
|
+
end
|
250
|
+
|
251
|
+
# ========================================================================= #
|
252
|
+
# === set_end_page
|
253
|
+
# ========================================================================= #
|
254
|
+
def set_extract_n_pdf_files(i = DEFAULT_EXTRACT_N_PDF_FILES)
|
255
|
+
i = DEFAULT_EXTRACT_N_PDF_FILES if i.nil?
|
256
|
+
i = i.to_i
|
257
|
+
if i > ::PdfParadise.n_pdf_pages?(name_of_the_pdf_file?)
|
258
|
+
i = ::PdfParadise.n_pdf_pages?(name_of_the_pdf_file?)
|
259
|
+
end
|
260
|
+
@extract_n_pdf_files = i
|
261
|
+
end; alias set_end_page set_extract_n_pdf_files # === set_end_page
|
262
|
+
|
263
|
+
# ========================================================================= #
|
264
|
+
# === opnn
|
265
|
+
# ========================================================================= #
|
266
|
+
def opnn
|
267
|
+
super(NAMESPACE)
|
268
|
+
end
|
269
|
+
|
270
|
+
# ========================================================================= #
|
271
|
+
# === run (run tag)
|
272
|
+
# ========================================================================= #
|
273
|
+
def run
|
274
|
+
determine_output_file
|
275
|
+
run_verbose_system_command
|
276
|
+
report_to_the_user
|
277
|
+
end
|
278
|
+
|
279
|
+
end; end
|
280
|
+
|
281
|
+
if __FILE__ == $PROGRAM_NAME
|
282
|
+
PdfParadise::ExtractPdfPage.new(ARGV[0], ARGV[1], ARGV[2])
|
283
|
+
end # epdf
|
@@ -0,0 +1,111 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: false
|
4
|
+
# =========================================================================== #
|
5
|
+
# === PdfParadise::PdfOptimizer
|
6
|
+
#
|
7
|
+
# /screen selects low-resolution output similar to the Acrobat Distiller
|
8
|
+
# "Screen Optimized" setting.
|
9
|
+
# /ebook selects medium-resolution output similar to the Acrobat Distiller
|
10
|
+
# "eBook" setting.
|
11
|
+
# /printer selects output similar to the Acrobat Distiller "Print Optimized"
|
12
|
+
# setting.
|
13
|
+
# /prepress selects output similar to Acrobat Distiller "Prepress Optimized"
|
14
|
+
# setting.
|
15
|
+
# /default selects output intended to be useful across a wide variety of
|
16
|
+
# uses, possibly at the expense of a larger output file.
|
17
|
+
#
|
18
|
+
# Usage example:
|
19
|
+
#
|
20
|
+
# PdfParadise::PdfOptimizer.new(ARGV)
|
21
|
+
#
|
22
|
+
# =========================================================================== #
|
23
|
+
# require 'pdf_paradise/utility_scripts/pdf_optimizer.rb'
|
24
|
+
# =========================================================================== #
|
25
|
+
require 'pdf_paradise/base/base.rb'
|
26
|
+
|
27
|
+
module PdfParadise
|
28
|
+
|
29
|
+
class PdfOptimizer < ::PdfParadise::Base # === PdfParadise::PdfOptimizer
|
30
|
+
|
31
|
+
# ========================================================================= #
|
32
|
+
# === initialize
|
33
|
+
# ========================================================================= #
|
34
|
+
def initialize(
|
35
|
+
i = nil,
|
36
|
+
run_already = true
|
37
|
+
)
|
38
|
+
reset
|
39
|
+
set_input(i)
|
40
|
+
run if run_already
|
41
|
+
end
|
42
|
+
|
43
|
+
# ========================================================================= #
|
44
|
+
# === reset (reset tag)
|
45
|
+
# ========================================================================= #
|
46
|
+
def reset
|
47
|
+
end
|
48
|
+
|
49
|
+
# ========================================================================= #
|
50
|
+
# === set_input
|
51
|
+
# ========================================================================= #
|
52
|
+
def set_input(i = '')
|
53
|
+
i = i.first if i.is_a? Array
|
54
|
+
i = i.to_s.dup
|
55
|
+
@input = i
|
56
|
+
end
|
57
|
+
|
58
|
+
# ========================================================================= #
|
59
|
+
# === input?
|
60
|
+
# ========================================================================= #
|
61
|
+
def input?
|
62
|
+
@input
|
63
|
+
end; alias input_file input? # === input_file
|
64
|
+
alias input_file? input? # === input_file?
|
65
|
+
|
66
|
+
# ========================================================================= #
|
67
|
+
# === output_file?
|
68
|
+
# ========================================================================= #
|
69
|
+
def output_file?
|
70
|
+
'output.pdf'
|
71
|
+
end; alias output_pdf? output_file? # === output_pdf?
|
72
|
+
|
73
|
+
# ========================================================================= #
|
74
|
+
# === use_ghostscript?
|
75
|
+
# ========================================================================= #
|
76
|
+
def use_ghostscript?
|
77
|
+
true # For now this is hardcoded.
|
78
|
+
end
|
79
|
+
|
80
|
+
# ========================================================================= #
|
81
|
+
# === output_ps
|
82
|
+
# ========================================================================= #
|
83
|
+
def output_ps
|
84
|
+
'output.ps'
|
85
|
+
end
|
86
|
+
|
87
|
+
# ========================================================================= #
|
88
|
+
# === run (run tag)
|
89
|
+
# ========================================================================= #
|
90
|
+
def run
|
91
|
+
# ======================================================================= #
|
92
|
+
# We have two different solutions.
|
93
|
+
# ======================================================================= #
|
94
|
+
if use_ghostscript?
|
95
|
+
_ = 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen '\
|
96
|
+
'-dNOPAUSE -dQUIET -dBATCH -sOutputFile='+
|
97
|
+
output_file?+' '+
|
98
|
+
input_file?
|
99
|
+
else
|
100
|
+
_ = 'pdf2ps '+input?+' '+output_ps
|
101
|
+
esystem _
|
102
|
+
_ = 'ps2pdf '+output_ps+' '+output_pdf?
|
103
|
+
end
|
104
|
+
esystem _
|
105
|
+
end
|
106
|
+
|
107
|
+
end; end
|
108
|
+
|
109
|
+
if __FILE__ == $PROGRAM_NAME
|
110
|
+
PdfParadise::PdfOptimizer.new(ARGV)
|
111
|
+
end # pdfoptimizer
|
@@ -0,0 +1,148 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# === PdfParadise::PdfStatistics
|
6
|
+
#
|
7
|
+
# This class will make use of PDF::Reader to provide us with some
|
8
|
+
# statistical information about a given .pdf file.
|
9
|
+
#
|
10
|
+
# You can also ouput information from a specific page.
|
11
|
+
#
|
12
|
+
# PDF is a page based file format, so most visible information
|
13
|
+
# is available via page-based iteration
|
14
|
+
#
|
15
|
+
# reader = PDF::Reader.new("somefile.pdf")
|
16
|
+
#
|
17
|
+
# reader.pages.each { |page|
|
18
|
+
# puts page.fonts
|
19
|
+
# puts page.text
|
20
|
+
# puts page.raw_content
|
21
|
+
# }
|
22
|
+
#
|
23
|
+
# Usage example:
|
24
|
+
#
|
25
|
+
# PdfParadise::PdfStatistics.new(ARGV)
|
26
|
+
#
|
27
|
+
# =========================================================================== #
|
28
|
+
# require 'pdf_paradise/utility_scripts/pdf_statistics.rb'
|
29
|
+
# =========================================================================== #
|
30
|
+
require 'pdf_paradise/base/base.rb'
|
31
|
+
|
32
|
+
module PdfParadise
|
33
|
+
|
34
|
+
class PdfStatistics < ::PdfParadise::Base # === PdfParadise::PdfStatistics
|
35
|
+
|
36
|
+
# ========================================================================= #
|
37
|
+
# === initialize
|
38
|
+
# ========================================================================= #
|
39
|
+
def initialize(
|
40
|
+
i = nil,
|
41
|
+
run_already = true
|
42
|
+
)
|
43
|
+
require_pdf_reader
|
44
|
+
reset
|
45
|
+
set_input(i)
|
46
|
+
run if run_already
|
47
|
+
end
|
48
|
+
|
49
|
+
# ========================================================================= #
|
50
|
+
# === reset (reset tag)
|
51
|
+
# ========================================================================= #
|
52
|
+
def reset
|
53
|
+
end
|
54
|
+
|
55
|
+
# ========================================================================= #
|
56
|
+
# === require_pdf_reader
|
57
|
+
# ========================================================================= #
|
58
|
+
def require_pdf_reader
|
59
|
+
old_verbose = $VERBOSE
|
60
|
+
$VERBOSE = nil
|
61
|
+
require 'pdf-reader'
|
62
|
+
$VERBOSE = old_verbose # Restore it again here.
|
63
|
+
end
|
64
|
+
|
65
|
+
# ========================================================================= #
|
66
|
+
# === set_input
|
67
|
+
# ========================================================================= #
|
68
|
+
def set_input(i = '')
|
69
|
+
i = i.first if i.is_a? Array
|
70
|
+
i = i.to_s.dup
|
71
|
+
@input = i
|
72
|
+
end
|
73
|
+
|
74
|
+
# ========================================================================= #
|
75
|
+
# === input?
|
76
|
+
# ========================================================================= #
|
77
|
+
def input?
|
78
|
+
@input
|
79
|
+
end
|
80
|
+
|
81
|
+
# ========================================================================= #
|
82
|
+
# === instantiate_reader_object
|
83
|
+
# ========================================================================= #
|
84
|
+
def instantiate_reader_object
|
85
|
+
@reader = PDF::Reader.new(input?)
|
86
|
+
end
|
87
|
+
|
88
|
+
# ========================================================================= #
|
89
|
+
# === show_extended_info
|
90
|
+
#
|
91
|
+
# This will tap into the .info method.
|
92
|
+
# ========================================================================= #
|
93
|
+
def show_extended_info
|
94
|
+
hash = @reader.info
|
95
|
+
ljust = 32
|
96
|
+
if hash.has_key? :Title
|
97
|
+
e 'The title of this .pdf is: '.ljust(ljust)+simp(hash[:Title])
|
98
|
+
end
|
99
|
+
if hash.has_key? :CreationDate
|
100
|
+
e 'This .pdf was created at: '.ljust(ljust)+simp(hash[:CreationDate])
|
101
|
+
end
|
102
|
+
if hash.has_key? :Author
|
103
|
+
e 'The author of this .pdf is: '.ljust(ljust)+simp(hash[:Author])
|
104
|
+
end
|
105
|
+
if hash.has_key? :Producer
|
106
|
+
e 'It was produced via: '.ljust(ljust)+simp(hash[:Producer])
|
107
|
+
end
|
108
|
+
if hash.has_key? :ModDate
|
109
|
+
e 'It was last modified at: '.ljust(ljust)+simp(hash[:ModDate])
|
110
|
+
end
|
111
|
+
if hash.has_key? :Creator
|
112
|
+
e 'It was created via: '.ljust(ljust)+simp(hash[:Creator])
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# ========================================================================= #
|
117
|
+
# === report_n_pages_in_the_pdf_document
|
118
|
+
# ========================================================================= #
|
119
|
+
def report_n_pages_in_the_pdf_document
|
120
|
+
e 'n pages in this .pdf document: '+simp(@reader.page_count)
|
121
|
+
end
|
122
|
+
|
123
|
+
# ========================================================================= #
|
124
|
+
# === run (run tag)
|
125
|
+
# ========================================================================= #
|
126
|
+
def run
|
127
|
+
instantiate_reader_object
|
128
|
+
cliner
|
129
|
+
e rev+
|
130
|
+
'The PDF version for this .pdf file was: '+simp(@reader.pdf_version)
|
131
|
+
show_extended_info
|
132
|
+
# e @reader.metadata # <- This is how to obtain the metadata information.
|
133
|
+
report_n_pages_in_the_pdf_document
|
134
|
+
cliner
|
135
|
+
end
|
136
|
+
|
137
|
+
# ========================================================================= #
|
138
|
+
# === PdfParadise::PdfStatistics[]
|
139
|
+
# ========================================================================= #
|
140
|
+
def self.[](i = '')
|
141
|
+
new(i)
|
142
|
+
end
|
143
|
+
|
144
|
+
end; end
|
145
|
+
|
146
|
+
if __FILE__ == $PROGRAM_NAME
|
147
|
+
PdfParadise::PdfStatistics.new(ARGV)
|
148
|
+
end # pdfstatistics /Depot/PDF/foobar.pdf
|
@@ -0,0 +1,75 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# === PdfParadise::PdfToHtml
|
6
|
+
#
|
7
|
+
# This class will convert a .pdf file to .html, based on pdftohtml from
|
8
|
+
# poppler.
|
9
|
+
#
|
10
|
+
# Usage examples:
|
11
|
+
#
|
12
|
+
# PdfParadise::PdfToHtml.new(ARGV)
|
13
|
+
#
|
14
|
+
# =========================================================================== #
|
15
|
+
# require 'pdf_paradise/utility_scripts/pdf_to_html.rb'
|
16
|
+
# PdfParadise::PdfToHtml.new(ARGV)
|
17
|
+
# =========================================================================== #
|
18
|
+
require 'pdf_paradise/base/base.rb'
|
19
|
+
|
20
|
+
module PdfParadise
|
21
|
+
|
22
|
+
class PdfToHtml < ::PdfParadise::Base # === PdfParadise::PdfToHtml
|
23
|
+
|
24
|
+
# ========================================================================= #
|
25
|
+
# === NAMESPACE
|
26
|
+
# ========================================================================= #
|
27
|
+
NAMESPACE = inspect
|
28
|
+
|
29
|
+
# ========================================================================= #
|
30
|
+
# === initialize
|
31
|
+
# ========================================================================= #
|
32
|
+
def initialize(
|
33
|
+
i = ARGV,
|
34
|
+
run_already = true
|
35
|
+
)
|
36
|
+
reset
|
37
|
+
set_commandline_arguments(i)
|
38
|
+
run if run_already
|
39
|
+
end
|
40
|
+
|
41
|
+
# ========================================================================= #
|
42
|
+
# === reset
|
43
|
+
# ========================================================================= #
|
44
|
+
def reset
|
45
|
+
super()
|
46
|
+
end
|
47
|
+
|
48
|
+
# ========================================================================= #
|
49
|
+
# === opnn
|
50
|
+
# ========================================================================= #
|
51
|
+
def opnn
|
52
|
+
super(NAMESPACE)
|
53
|
+
end
|
54
|
+
|
55
|
+
# ========================================================================= #
|
56
|
+
# === run (run tag)
|
57
|
+
# ========================================================================= #
|
58
|
+
def run
|
59
|
+
commandline_arguments?.each {|entry|
|
60
|
+
# ===================================================================== #
|
61
|
+
# The -s option is used to "generate single HTML that includes all
|
62
|
+
# pages".
|
63
|
+
# ===================================================================== #
|
64
|
+
_ = 'pdftohtml -s '+entry+' '+
|
65
|
+
File.basename(entry+'_to_html')+
|
66
|
+
'.html'
|
67
|
+
esystem _
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
end; end
|
72
|
+
|
73
|
+
if __FILE__ == $PROGRAM_NAME
|
74
|
+
PdfParadise::PdfToHtml.new(ARGV)
|
75
|
+
end # pdf_to_html foobar.pdf
|