pdf_paradise 0.3.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +933 -0
- data/bin/automatic_pdf_title +7 -0
- data/bin/burst_this_pdf_file +7 -0
- data/bin/combine_these_pdf_pages +7 -0
- data/bin/compress_via_hexapdf +7 -0
- data/bin/convert_markdown_to_pdf +7 -0
- data/bin/convert_pdf_to_text +7 -0
- data/bin/delete_first_page_of_this_pdf_file +7 -0
- data/bin/djvu_to_pdf +7 -0
- data/bin/merge_then_open +7 -0
- data/bin/n_pages +10 -0
- data/bin/open_main_pdf +7 -0
- data/bin/pdf_paradise +9 -0
- data/bin/rotate_pdf +7 -0
- data/bin/set_main_book +7 -0
- data/bin/set_title_of_this_pdf_file +15 -0
- data/doc/README.gen +871 -0
- data/doc/todo/todo.md +13 -0
- data/images/Logo_for_the_pdf_paradise_project.avif +0 -0
- data/lib/pdf_paradise/base/base.rb +344 -0
- data/lib/pdf_paradise/base/colours.rb +67 -0
- data/lib/pdf_paradise/colours/colours.rb +27 -0
- data/lib/pdf_paradise/commandline/commandline.rb +109 -0
- data/lib/pdf_paradise/commandline/help.rb +77 -0
- data/lib/pdf_paradise/commandline/menu.rb +173 -0
- data/lib/pdf_paradise/compress/compress_this_pdf_file.rb +108 -0
- data/lib/pdf_paradise/compress/compress_via_hexapdf.rb +27 -0
- data/lib/pdf_paradise/compress/compress_via_qpdf.rb +32 -0
- data/lib/pdf_paradise/constants/constants.rb +76 -0
- data/lib/pdf_paradise/convert_text_to_pdf.rb +94 -0
- data/lib/pdf_paradise/css/project.css +17 -0
- data/lib/pdf_paradise/fpdf/README.md +2 -0
- data/lib/pdf_paradise/fpdf/bookmark.rb +129 -0
- data/lib/pdf_paradise/fpdf/chinese.rb +454 -0
- data/lib/pdf_paradise/fpdf/fpdf.rb +1902 -0
- data/lib/pdf_paradise/fpdf/fpdf_eps.rb +138 -0
- data/lib/pdf_paradise/fpdf/makefont.rb +1794 -0
- data/lib/pdf_paradise/gui/README.md +6 -0
- data/lib/pdf_paradise/gui/fox/split_pdf_file.rb +77 -0
- data/lib/pdf_paradise/gui/gtk2/pdf_viewer/pdf_viewer.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/split_pdf_file/split_pdf_file.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/statistics_widget/statistics_widget.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/controller/controller.rb +214 -0
- data/lib/pdf_paradise/gui/gtk3/pdf_viewer/pdf_viewer.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/split_pdf_file/split_pdf_file.rb +34 -0
- data/lib/pdf_paradise/gui/jruby/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +167 -0
- data/lib/pdf_paradise/gui/jruby/remove_the_first_page_of_this_pdf_file/remove_the_first_page_of_this_pdf_file.rb +103 -0
- data/lib/pdf_paradise/gui/libui/extract_all_images_from_this_pdf_file/extract_all_images_from_this_pdf_file.rb +223 -0
- data/lib/pdf_paradise/gui/libui/remove_the_first_page_of_this_pdf_file/remove_the_first_page_of_this_pdf_file.rb +267 -0
- data/lib/pdf_paradise/gui/libui/rotate_pdf_file/rotate_pdf_file.rb +219 -0
- data/lib/pdf_paradise/gui/libui/statistics_widget/statistics_widget.rb +233 -0
- data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer.css +5 -0
- data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer_module.rb +287 -0
- data/lib/pdf_paradise/gui/shared_code/remove_the_first_page_of_this_pdf_file_module/remove_the_first_page_of_this_pdf_file_module.rb +31 -0
- data/lib/pdf_paradise/gui/shared_code/split_pdf_file/split_pdf_file_module.rb +295 -0
- data/lib/pdf_paradise/gui/universal_widgets/convert_pdf_to_text/convert_pdf_to_text.rb +366 -0
- data/lib/pdf_paradise/gui/universal_widgets/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +776 -0
- data/lib/pdf_paradise/gui/universal_widgets/statistics_widget/statistics_widget.rb +407 -0
- data/lib/pdf_paradise/gui/universal_widgets/to_pdf/to_pdf.rb +351 -0
- data/lib/pdf_paradise/hexapdf/001_rainbow_pattern_example.rb +0 -0
- data/lib/pdf_paradise/hexapdf/hexapdf.rb +123 -0
- data/lib/pdf_paradise/images/PDF_PARADISE_LOGO.png +0 -0
- data/lib/pdf_paradise/main_pdf/main_pdf.rb +474 -0
- data/lib/pdf_paradise/merge_pdf/menu.rb +63 -0
- data/lib/pdf_paradise/merge_pdf/merge_pdf.rb +307 -0
- data/lib/pdf_paradise/merge_pdf_namespace.rb +9 -0
- data/lib/pdf_paradise/merge_then_open/merge_then_open.rb +105 -0
- data/lib/pdf_paradise/prawn_addons/README.md +2 -0
- data/lib/pdf_paradise/prawn_addons/prawn_addons.rb +17 -0
- data/lib/pdf_paradise/project/project.rb +22 -0
- data/lib/pdf_paradise/remove_pdf_password.rb +391 -0
- data/lib/pdf_paradise/requires/batch_require_toplevel_files.rb +22 -0
- data/lib/pdf_paradise/requires/colours.rb +11 -0
- data/lib/pdf_paradise/requires/colours_and_esystem_and_save_file_and_fileutils_and_opn.rb +13 -0
- data/lib/pdf_paradise/requires/esystem_and_colours.rb +11 -0
- data/lib/pdf_paradise/requires/esystem_and_opn_and_colours.rb +10 -0
- data/lib/pdf_paradise/requires/require_the_whole_project.rb +30 -0
- data/lib/pdf_paradise/requires/require_utility_scripts.rb +9 -0
- data/lib/pdf_paradise/set_main_book.rb +156 -0
- data/lib/pdf_paradise/set_pdf_title.rb +220 -0
- data/lib/pdf_paradise/sinatra/embeddable_interface.rb +389 -0
- data/lib/pdf_paradise/toplevel_methods/convert_epub_to_pdf.rb +27 -0
- data/lib/pdf_paradise/toplevel_methods/convert_markdown_to_pdf.rb +45 -0
- data/lib/pdf_paradise/toplevel_methods/convert_ppt_to_pdf.rb +35 -0
- data/lib/pdf_paradise/toplevel_methods/e.rb +16 -0
- data/lib/pdf_paradise/toplevel_methods/esystem.rb +20 -0
- data/lib/pdf_paradise/toplevel_methods/misc.rb +228 -0
- data/lib/pdf_paradise/toplevel_methods/number_pages.rb +38 -0
- data/lib/pdf_paradise/toplevel_methods/opened_pdf_files.rb +221 -0
- data/lib/pdf_paradise/toplevel_methods/query_pdf_title.rb +201 -0
- data/lib/pdf_paradise/toplevel_methods/reduce_size_of_this_pdf_file.rb +46 -0
- data/lib/pdf_paradise/toplevel_methods/roebe.rb +17 -0
- data/lib/pdf_paradise/toplevel_methods/to_pdf.rb +12 -0
- data/lib/pdf_paradise/utility_scripts/README.md +3 -0
- data/lib/pdf_paradise/utility_scripts/automatic_pdf_title.rb +104 -0
- data/lib/pdf_paradise/utility_scripts/check_syntax_of_pdf_files.rb +106 -0
- data/lib/pdf_paradise/utility_scripts/combine_these_pdf_pages.rb +118 -0
- data/lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb +179 -0
- data/lib/pdf_paradise/utility_scripts/delete_last_page_of_this_pdf_file.rb +180 -0
- data/lib/pdf_paradise/utility_scripts/delete_the_first_page_of_this_pdf_file/delete_the_first_page_of_this_pdf_file.rb +429 -0
- data/lib/pdf_paradise/utility_scripts/delete_this_page_of_this_pdf_file.rb +356 -0
- data/lib/pdf_paradise/utility_scripts/djvu_to_pdf.rb +87 -0
- data/lib/pdf_paradise/utility_scripts/extract_all_images_from_this_pdf_file.rb +129 -0
- data/lib/pdf_paradise/utility_scripts/extract_pdf_page.rb +283 -0
- data/lib/pdf_paradise/utility_scripts/pdf_file_n_total_pages.rb +348 -0
- data/lib/pdf_paradise/utility_scripts/pdf_optimizer.rb +111 -0
- data/lib/pdf_paradise/utility_scripts/pdf_statistics.rb +148 -0
- data/lib/pdf_paradise/utility_scripts/pdf_to_html.rb +75 -0
- data/lib/pdf_paradise/utility_scripts/remove_images.rb +110 -0
- data/lib/pdf_paradise/utility_scripts/rotate_pdf_file.rb +303 -0
- data/lib/pdf_paradise/utility_scripts/split_pdf.rb +364 -0
- data/lib/pdf_paradise/utility_scripts/to_pdf.rb +130 -0
- data/lib/pdf_paradise/utility_scripts/to_qdf.rb +66 -0
- data/lib/pdf_paradise/version/version.rb +19 -0
- data/lib/pdf_paradise/www/README.md +2 -0
- data/lib/pdf_paradise/www/sinatra/app.rb +304 -0
- data/lib/pdf_paradise/yaml/working_on_these_pdf_files.yml +4 -0
- data/lib/pdf_paradise.rb +5 -0
- data/pdf_paradise.gemspec +61 -0
- data/test/fpdf/001_minimal_example.rb +12 -0
- data/test/fpdf/002.pdf +0 -0
- data/test/fpdf/002_header_and_footer_example.rb +64 -0
- data/test/fpdf/003.pdf +98 -0
- data/test/fpdf/003_justified_paragraphs.rb +96 -0
- data/test/fpdf/file1.md +3 -0
- data/test/fpdf/file2.md +3 -0
- data/test/fpdf/test.pdf +0 -0
- data/test/testing_pdf_paradise.rb +12 -0
- metadata +239 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
#!/usr/bin/ruby -w
|
|
2
|
+
# Encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
# =========================================================================== #
|
|
5
|
+
# === PdfParadise::ExtractPdfPage
|
|
6
|
+
#
|
|
7
|
+
# Use this class to extract one or more pdf pages from a given .pdf file.
|
|
8
|
+
#
|
|
9
|
+
# Three arguments can be passed to this class:
|
|
10
|
+
#
|
|
11
|
+
# The first argument is the first page of the range to extract
|
|
12
|
+
# The second argument is the last page of the range to extract
|
|
13
|
+
# The third argument is the given input file (the .pdf file)
|
|
14
|
+
#
|
|
15
|
+
# Specific invocation examples:
|
|
16
|
+
#
|
|
17
|
+
# pdfpextr START_PAGE.pdf 5 19
|
|
18
|
+
# pdfpextr inputfile.pdf 22 36
|
|
19
|
+
# ext_pdf foo.pdf 1 100
|
|
20
|
+
#
|
|
21
|
+
# Usage examples:
|
|
22
|
+
# require 'extract_pdf_page'
|
|
23
|
+
# ExtractPdfPage.new
|
|
24
|
+
# ExtractPdfPage.new :save_here => '/Depot/Temp/test.pdf'
|
|
25
|
+
#
|
|
26
|
+
# Usage examples from the commandline:
|
|
27
|
+
# epdf foo.pdf 5
|
|
28
|
+
# epdf foo.pdf 5 10
|
|
29
|
+
# =========================================================================== #
|
|
30
|
+
# require 'extract_pdf_page.rb'
|
|
31
|
+
# =========================================================================== #
|
|
32
|
+
require 'pdf_paradise/requires/esystem_and_opn_and_colours.rb'
|
|
33
|
+
require 'pdf_paradise/base/base.rb'
|
|
34
|
+
|
|
35
|
+
module PdfParadise
|
|
36
|
+
|
|
37
|
+
class ExtractPdfPage < PdfParadise::Base # === PdfParadise::ExtractPdfPage
|
|
38
|
+
|
|
39
|
+
require 'pdf_paradise/utility_scripts/pdf_file_n_total_pages.rb'
|
|
40
|
+
|
|
41
|
+
# ========================================================================= #
|
|
42
|
+
# === NAMESPACE
|
|
43
|
+
# ========================================================================= #
|
|
44
|
+
NAMESPACE = inspect
|
|
45
|
+
|
|
46
|
+
# ========================================================================= #
|
|
47
|
+
# === DEFAULT_START_PAGE
|
|
48
|
+
# ========================================================================= #
|
|
49
|
+
DEFAULT_START_PAGE = '1'
|
|
50
|
+
|
|
51
|
+
# ========================================================================= #
|
|
52
|
+
# === DEFAULT_END_PAGE
|
|
53
|
+
#
|
|
54
|
+
# This number can be modified automatically during runtime.
|
|
55
|
+
# ========================================================================= #
|
|
56
|
+
DEFAULT_END_PAGE = '10'
|
|
57
|
+
|
|
58
|
+
# ========================================================================= #
|
|
59
|
+
# === DEFAULT_EXTRACT_N_PDF_FILES
|
|
60
|
+
# ========================================================================= #
|
|
61
|
+
DEFAULT_EXTRACT_N_PDF_FILES = 1
|
|
62
|
+
|
|
63
|
+
# ========================================================================= #
|
|
64
|
+
# === initialize
|
|
65
|
+
#
|
|
66
|
+
# We accept three arguments:
|
|
67
|
+
#
|
|
68
|
+
# (1) the name of the .pdf file
|
|
69
|
+
# (2) the start page
|
|
70
|
+
# (3) the end page
|
|
71
|
+
#
|
|
72
|
+
# ========================================================================= #
|
|
73
|
+
def initialize(
|
|
74
|
+
mandatory_name_of_pdf_file = nil,
|
|
75
|
+
optional_start_page = DEFAULT_START_PAGE,
|
|
76
|
+
extract_n_pdf_files = DEFAULT_EXTRACT_N_PDF_FILES,
|
|
77
|
+
run_already = true
|
|
78
|
+
)
|
|
79
|
+
reset
|
|
80
|
+
check_against_menu(mandatory_name_of_pdf_file)
|
|
81
|
+
if mandatory_name_of_pdf_file.is_a? Hash
|
|
82
|
+
if mandatory_name_of_pdf_file.has_key? :dont_run_yet
|
|
83
|
+
run_already = !mandatory_name_of_pdf_file.delete(:dont_run_yet)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
set_name_of_pdf_file(mandatory_name_of_pdf_file) # Must come before set_extract_n_pdf_files()
|
|
87
|
+
unless mandatory_name_of_pdf_file.is_a? Hash
|
|
88
|
+
set_start_page(optional_start_page)
|
|
89
|
+
set_extract_n_pdf_files(extract_n_pdf_files)
|
|
90
|
+
end
|
|
91
|
+
run if run_already
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# ========================================================================= #
|
|
95
|
+
# === reset (reset tag)
|
|
96
|
+
# ========================================================================= #
|
|
97
|
+
def reset
|
|
98
|
+
super()
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# ========================================================================= #
|
|
102
|
+
# === set_name_of_pdf_file
|
|
103
|
+
#
|
|
104
|
+
# The input can also be a Hash.
|
|
105
|
+
# ========================================================================= #
|
|
106
|
+
def set_name_of_pdf_file(i)
|
|
107
|
+
if i.is_a? Hash
|
|
108
|
+
if i.has_key? :start_page
|
|
109
|
+
set_start_page(i.delete(:start_page))
|
|
110
|
+
end
|
|
111
|
+
if i.has_key? :end_page
|
|
112
|
+
set_end_page(i.delete(:end_page))
|
|
113
|
+
end
|
|
114
|
+
if i.has_key? :pdf_file_to_use
|
|
115
|
+
i = i.delete(:pdf_file_to_use)
|
|
116
|
+
end
|
|
117
|
+
else
|
|
118
|
+
i = i.to_s
|
|
119
|
+
end
|
|
120
|
+
unless File.exist? i
|
|
121
|
+
opnn; e 'Warning - no file at `'+sfile(i.to_s)+'` could be found.'
|
|
122
|
+
opnn; e 'Thus we can not extract anything. Exiting now'
|
|
123
|
+
exit
|
|
124
|
+
end
|
|
125
|
+
@name_of_pdf_file = i
|
|
126
|
+
end; alias use_this_input_file set_name_of_pdf_file # === use_this_input_file
|
|
127
|
+
|
|
128
|
+
# ========================================================================= #
|
|
129
|
+
# === set_start_page
|
|
130
|
+
# ========================================================================= #
|
|
131
|
+
def set_start_page(
|
|
132
|
+
i = DEFAULT_START_PAGE
|
|
133
|
+
)
|
|
134
|
+
i = DEFAULT_START_PAGE if i.nil?
|
|
135
|
+
if i.is_a? Hash
|
|
136
|
+
if i.has_key? :save_here
|
|
137
|
+
set_save_here(i.delete(:save_here))
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
i = i.to_s
|
|
141
|
+
if File.exist?(i) and i.include? '.pdf'
|
|
142
|
+
use_this_input_file(i)
|
|
143
|
+
i = DEFAULT_START_PAGE
|
|
144
|
+
end
|
|
145
|
+
i = i.to_i
|
|
146
|
+
@start_page = i
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# ========================================================================= #
|
|
150
|
+
# === report_to_the_user_where_we_stored_the_new_pdf_file
|
|
151
|
+
# ========================================================================= #
|
|
152
|
+
def report_to_the_user_where_we_stored_the_new_pdf_file
|
|
153
|
+
_ = output_file?
|
|
154
|
+
if File.exist? _
|
|
155
|
+
opnn; e 'Finished storing at `'+sfile(_)+'`.'
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# ========================================================================= #
|
|
160
|
+
# === check_against_menu (menu tag)
|
|
161
|
+
# ========================================================================= #
|
|
162
|
+
def check_against_menu(i = nil)
|
|
163
|
+
case i
|
|
164
|
+
when '--help','HELP'
|
|
165
|
+
e 'Usage example:'
|
|
166
|
+
e ' ext_pdf foo.pdf 1 100'
|
|
167
|
+
exit
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# ========================================================================= #
|
|
172
|
+
# === start_page?
|
|
173
|
+
# ========================================================================= #
|
|
174
|
+
def start_page?
|
|
175
|
+
@start_page.to_s
|
|
176
|
+
end; alias which_page? start_page? # === which_page?
|
|
177
|
+
|
|
178
|
+
# ========================================================================= #
|
|
179
|
+
# === calculate_last_page
|
|
180
|
+
#
|
|
181
|
+
# This will calculate which page must be the last.
|
|
182
|
+
# ========================================================================= #
|
|
183
|
+
def calculate_last_page
|
|
184
|
+
return start_page?.to_i + (@extract_n_pdf_files.to_i - 1)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# ========================================================================= #
|
|
188
|
+
# === difference?
|
|
189
|
+
# ========================================================================= #
|
|
190
|
+
def difference?
|
|
191
|
+
return (calculate_last_page - start_page?.to_i)
|
|
192
|
+
end; alias n_times difference? # === n_times
|
|
193
|
+
|
|
194
|
+
# ========================================================================= #
|
|
195
|
+
# === output_file?
|
|
196
|
+
# ========================================================================= #
|
|
197
|
+
def output_file?
|
|
198
|
+
@output_file
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# ========================================================================= #
|
|
202
|
+
# === report_to_the_user_how_many_pages_we_extracted
|
|
203
|
+
# ========================================================================= #
|
|
204
|
+
def report_to_the_user_how_many_pages_we_extracted
|
|
205
|
+
n_pdf_pages = @extract_n_pdf_files.to_i
|
|
206
|
+
if File.exist? input_file?
|
|
207
|
+
opnn; e 'We extracted '+sfancy(n_pdf_pages.to_s)+' pdf pages.'
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# ========================================================================= #
|
|
212
|
+
# === report_to_the_user
|
|
213
|
+
# ========================================================================= #
|
|
214
|
+
def report_to_the_user
|
|
215
|
+
report_to_the_user_how_many_pages_we_extracted
|
|
216
|
+
report_to_the_user_where_we_stored_the_new_pdf_file
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# ========================================================================= #
|
|
220
|
+
# === name_of_pdf_file?
|
|
221
|
+
# ========================================================================= #
|
|
222
|
+
def name_of_pdf_file?
|
|
223
|
+
@name_of_pdf_file
|
|
224
|
+
end; alias input_file? name_of_pdf_file? # === input_file?
|
|
225
|
+
alias name_of_the_pdf_file? name_of_pdf_file? # === name_of_the_pdf_file?
|
|
226
|
+
|
|
227
|
+
# ========================================================================= #
|
|
228
|
+
# === run_verbose_system_command
|
|
229
|
+
# ========================================================================= #
|
|
230
|
+
def run_verbose_system_command
|
|
231
|
+
n_times.times.each {|index|
|
|
232
|
+
start = start_page?.to_i+index.to_i
|
|
233
|
+
_ = ''.dup
|
|
234
|
+
_ << 'gs -sDEVICE=pdfwrite -dNOPAUSE -dBATCH -dSAFER'
|
|
235
|
+
_ << ' -dFirstPage='+start.to_s
|
|
236
|
+
_ << ' -dLastPage='+( start.to_i ).to_s
|
|
237
|
+
_ << ' -sOutputFile='+start.to_s+'_'+output_file?
|
|
238
|
+
_ << ' '+name_of_pdf_file?.to_s
|
|
239
|
+
esystem _
|
|
240
|
+
}
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# ========================================================================= #
|
|
244
|
+
# === determine_output_file
|
|
245
|
+
# ========================================================================= #
|
|
246
|
+
def determine_output_file
|
|
247
|
+
# @output_file = 'extracted_page_'+which_page?+'_from_file_'+name_of_pdf_file?
|
|
248
|
+
@output_file = 'extracted_page_from_file_'+name_of_pdf_file?
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# ========================================================================= #
|
|
252
|
+
# === set_end_page
|
|
253
|
+
# ========================================================================= #
|
|
254
|
+
def set_extract_n_pdf_files(i = DEFAULT_EXTRACT_N_PDF_FILES)
|
|
255
|
+
i = DEFAULT_EXTRACT_N_PDF_FILES if i.nil?
|
|
256
|
+
i = i.to_i
|
|
257
|
+
if i > ::PdfParadise.n_pdf_pages?(name_of_the_pdf_file?)
|
|
258
|
+
i = ::PdfParadise.n_pdf_pages?(name_of_the_pdf_file?)
|
|
259
|
+
end
|
|
260
|
+
@extract_n_pdf_files = i
|
|
261
|
+
end; alias set_end_page set_extract_n_pdf_files # === set_end_page
|
|
262
|
+
|
|
263
|
+
# ========================================================================= #
|
|
264
|
+
# === opnn
|
|
265
|
+
# ========================================================================= #
|
|
266
|
+
def opnn
|
|
267
|
+
super(NAMESPACE)
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# ========================================================================= #
|
|
271
|
+
# === run (run tag)
|
|
272
|
+
# ========================================================================= #
|
|
273
|
+
def run
|
|
274
|
+
determine_output_file
|
|
275
|
+
run_verbose_system_command
|
|
276
|
+
report_to_the_user
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
end; end
|
|
280
|
+
|
|
281
|
+
if __FILE__ == $PROGRAM_NAME
|
|
282
|
+
PdfParadise::ExtractPdfPage.new(ARGV[0], ARGV[1], ARGV[2])
|
|
283
|
+
end # epdf
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
#!/usr/bin/ruby -w
|
|
2
|
+
# Encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
# =========================================================================== #
|
|
5
|
+
# === PdfParadise::PdfFileNTotalPages
|
|
6
|
+
#
|
|
7
|
+
# This class can report how many pages are in a given .pdf file.
|
|
8
|
+
#
|
|
9
|
+
# Also note that an alternative exists:
|
|
10
|
+
#
|
|
11
|
+
# https://github.com/prawnpdf/pdf-inspector
|
|
12
|
+
#
|
|
13
|
+
# The code would be like this here:
|
|
14
|
+
#
|
|
15
|
+
# page_analysis = PDF::Inspector::Page.analyze(pdf)
|
|
16
|
+
# page_analysis.pages.size # <-- like 50 pages
|
|
17
|
+
#
|
|
18
|
+
# Usage example:
|
|
19
|
+
#
|
|
20
|
+
# PdfParadise::PdfFileNTotalPages.new(ARGV)
|
|
21
|
+
#
|
|
22
|
+
# =========================================================================== #
|
|
23
|
+
# require 'pdf_paradise/utility_scripts/pdf_file_n_total_pages.rb'
|
|
24
|
+
# PdfParadise::PdfFileNTotalPages.new(ARGV)
|
|
25
|
+
# =========================================================================== #
|
|
26
|
+
require 'pdf_paradise/base/base.rb'
|
|
27
|
+
|
|
28
|
+
module PdfParadise
|
|
29
|
+
|
|
30
|
+
class PdfFileNTotalPages < Base # === PdfParadise::PdfFileNTotalPages
|
|
31
|
+
|
|
32
|
+
# ========================================================================= #
|
|
33
|
+
# === MATCH_AGAINST_THIS_REGEX
|
|
34
|
+
# ========================================================================= #
|
|
35
|
+
MATCH_AGAINST_THIS_REGEX = /Count (\d+)/
|
|
36
|
+
|
|
37
|
+
# ========================================================================= #
|
|
38
|
+
# === USE_THIS_PROGRAM_TO_DETERMINE_HOW_MANY_PAGES_ARE_PART_OF_THE_PDF_FILE
|
|
39
|
+
#
|
|
40
|
+
# This can be :pdfinfo or :qpdf or similar.
|
|
41
|
+
# ========================================================================= #
|
|
42
|
+
USE_THIS_PROGRAM_TO_DETERMINE_HOW_MANY_PAGES_ARE_PART_OF_THE_PDF_FILE = :hexapdf # :qpdf # :pdfinfo
|
|
43
|
+
|
|
44
|
+
# ========================================================================= #
|
|
45
|
+
# === initialize
|
|
46
|
+
# ========================================================================= #
|
|
47
|
+
def initialize(
|
|
48
|
+
i = nil,
|
|
49
|
+
run_already = true,
|
|
50
|
+
&block
|
|
51
|
+
)
|
|
52
|
+
reset
|
|
53
|
+
# ======================================================================= #
|
|
54
|
+
# === @be_verbose
|
|
55
|
+
#
|
|
56
|
+
# Must be defined here, as reset() may be called from another method.
|
|
57
|
+
# ======================================================================= #
|
|
58
|
+
@be_verbose = true
|
|
59
|
+
set_commandline_arguments(i)
|
|
60
|
+
# ======================================================================= #
|
|
61
|
+
# === Handle blocks next
|
|
62
|
+
# ======================================================================= #
|
|
63
|
+
if block_given?
|
|
64
|
+
yielded = yield
|
|
65
|
+
case yielded
|
|
66
|
+
# ===================================================================== #
|
|
67
|
+
# === :be_quiet
|
|
68
|
+
# ===================================================================== #
|
|
69
|
+
when :be_quiet,
|
|
70
|
+
:be_silent
|
|
71
|
+
@be_verbose = false
|
|
72
|
+
run_already = false
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
case run_already
|
|
76
|
+
# ======================================================================= #
|
|
77
|
+
# === :be_silent
|
|
78
|
+
# ======================================================================= #
|
|
79
|
+
when :be_silent
|
|
80
|
+
@be_verbose = false
|
|
81
|
+
run_already = false
|
|
82
|
+
end
|
|
83
|
+
run if run_already
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# ========================================================================= #
|
|
87
|
+
# === reset (reset tag)
|
|
88
|
+
# ========================================================================= #
|
|
89
|
+
def reset
|
|
90
|
+
super()
|
|
91
|
+
infer_the_namespace
|
|
92
|
+
reset_the_internal_variables
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# ========================================================================= #
|
|
96
|
+
# === reset_the_internal_variables
|
|
97
|
+
# ========================================================================= #
|
|
98
|
+
def reset_the_internal_variables
|
|
99
|
+
# ======================================================================= #
|
|
100
|
+
# === @n_pages
|
|
101
|
+
#
|
|
102
|
+
# This variable will keep track as to how many pages the given .pdf
|
|
103
|
+
# page has.
|
|
104
|
+
# ======================================================================= #
|
|
105
|
+
@n_pages = 0
|
|
106
|
+
# ======================================================================= #
|
|
107
|
+
# === @use_this_program_to_determine_how_many_pages_are_part_of_the_pdf_file
|
|
108
|
+
# ======================================================================= #
|
|
109
|
+
@use_this_program_to_determine_how_many_pages_are_part_of_the_pdf_file =
|
|
110
|
+
USE_THIS_PROGRAM_TO_DETERMINE_HOW_MANY_PAGES_ARE_PART_OF_THE_PDF_FILE
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# ========================================================================= #
|
|
114
|
+
# === n_pages
|
|
115
|
+
# ========================================================================= #
|
|
116
|
+
def n_pages?
|
|
117
|
+
@n_pages
|
|
118
|
+
end; alias n_pages n_pages? # === n_pages
|
|
119
|
+
alias result? n_pages? # === result?
|
|
120
|
+
|
|
121
|
+
# ========================================================================= #
|
|
122
|
+
# === set_n_pages
|
|
123
|
+
#
|
|
124
|
+
# Since as of December 2021 this method will do a tiny bit of
|
|
125
|
+
# sanitizing the given input.
|
|
126
|
+
# ========================================================================= #
|
|
127
|
+
def set_n_pages(i)
|
|
128
|
+
i = i.strip if i.is_a? String
|
|
129
|
+
i = i.to_i
|
|
130
|
+
@n_pages = i
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# ========================================================================= #
|
|
134
|
+
# === determine_dataset
|
|
135
|
+
# ========================================================================= #
|
|
136
|
+
def determine_dataset(
|
|
137
|
+
of_this_pdf_file = @this_pdf_file
|
|
138
|
+
)
|
|
139
|
+
@dataset = File.binread(of_this_pdf_file)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# ========================================================================= #
|
|
143
|
+
# === this_pdf_file?
|
|
144
|
+
# ========================================================================= #
|
|
145
|
+
def this_pdf_file?
|
|
146
|
+
@this_pdf_file
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# ========================================================================= #
|
|
150
|
+
# === set_this_pdf_file
|
|
151
|
+
# ========================================================================= #
|
|
152
|
+
def set_this_pdf_file(i)
|
|
153
|
+
@this_pdf_file = i
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# ========================================================================= #
|
|
157
|
+
# === check_whether_qpdf_exists_and_if_not_then_exit
|
|
158
|
+
# ========================================================================= #
|
|
159
|
+
def check_whether_qpdf_exists_and_if_not_then_exit
|
|
160
|
+
does_qpdf_exist = false
|
|
161
|
+
result = `qpdf 2>&1`
|
|
162
|
+
unless result.include? 'No such file or directory'
|
|
163
|
+
does_qpdf_exist = true
|
|
164
|
+
end
|
|
165
|
+
if does_qpdf_exist == false
|
|
166
|
+
opnn; e 'qpdf is not installed, can not continue.'
|
|
167
|
+
exit
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# ========================================================================= #
|
|
172
|
+
# === ensure_that_hexapdf_exists
|
|
173
|
+
#
|
|
174
|
+
# This method will try to determine whether hexapdf exists on the
|
|
175
|
+
# given computer system or whether it does not.
|
|
176
|
+
# ========================================================================= #
|
|
177
|
+
def ensure_that_hexapdf_exists
|
|
178
|
+
result = `hexapdf --version 2>&1`
|
|
179
|
+
if result.include?('command not found')
|
|
180
|
+
false
|
|
181
|
+
else
|
|
182
|
+
true
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# ========================================================================= #
|
|
187
|
+
# === determine_n_pages
|
|
188
|
+
# ========================================================================= #
|
|
189
|
+
def determine_n_pages(
|
|
190
|
+
use_this_program = @use_this_program_to_determine_how_many_pages_are_part_of_the_pdf_file
|
|
191
|
+
)
|
|
192
|
+
this_pdf_file = @this_pdf_file
|
|
193
|
+
_ = @dataset
|
|
194
|
+
if be_verbose?
|
|
195
|
+
opne 'Using `'+use_this_program.to_s+'` to determine how many '\
|
|
196
|
+
'pages are in the given .pdf file.'
|
|
197
|
+
end
|
|
198
|
+
case use_this_program
|
|
199
|
+
# ======================================================================= #
|
|
200
|
+
# === :hexapdf
|
|
201
|
+
# ======================================================================= #
|
|
202
|
+
when :hexapdf
|
|
203
|
+
if ensure_that_hexapdf_exists
|
|
204
|
+
result = `hexapdf info #{this_pdf_file}`
|
|
205
|
+
if result
|
|
206
|
+
use_this_regex = /Pages:(\s*)(\d{1,6})\s*$/ # See: https://rubular.com/r/QZ7JB6c0O22vta
|
|
207
|
+
result =~ use_this_regex
|
|
208
|
+
set_n_pages($2.to_s.dup.to_i)
|
|
209
|
+
end
|
|
210
|
+
else
|
|
211
|
+
opne 'hexapdf appears to not have been installed, or it could'
|
|
212
|
+
opne 'not be found.'
|
|
213
|
+
end
|
|
214
|
+
# ======================================================================= #
|
|
215
|
+
# === :qpdf
|
|
216
|
+
# ======================================================================= #
|
|
217
|
+
when :qpdf
|
|
218
|
+
check_whether_qpdf_exists_and_if_not_then_exit
|
|
219
|
+
result = `qpdf #{this_pdf_file} --show-npages`
|
|
220
|
+
set_n_pages(result)
|
|
221
|
+
# ======================================================================= #
|
|
222
|
+
# === :pdfinfo
|
|
223
|
+
# ======================================================================= #
|
|
224
|
+
when :pdfinfo
|
|
225
|
+
result = `pdfinfo #{this_pdf_file}`
|
|
226
|
+
determine_n_pages_via_pdfinfo(result)
|
|
227
|
+
else # else tag
|
|
228
|
+
# ===================================================================== #
|
|
229
|
+
# This here has the advantage (in theory) that we can just use
|
|
230
|
+
# a Regex and obtain the number of pages in the .pdf file. This
|
|
231
|
+
# works ok for many .pdf files, but not for all of them. This
|
|
232
|
+
# is also the reason why I switched to pdfinfo in March 2020 -
|
|
233
|
+
# it seems to be more reliable than the regex I am using.
|
|
234
|
+
# ===================================================================== #
|
|
235
|
+
scanned = _.scan(MATCH_AGAINST_THIS_REGEX)
|
|
236
|
+
# ===================================================================== #
|
|
237
|
+
# Note that the following may still return the wrong entry.
|
|
238
|
+
# I had this problem in March 2020.
|
|
239
|
+
# ===================================================================== #
|
|
240
|
+
max_value = scanned.map {|entry|
|
|
241
|
+
entry.first.to_i
|
|
242
|
+
}.max
|
|
243
|
+
set_n_pages(max_value)
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# ========================================================================= #
|
|
248
|
+
# === determine_n_pages_via_pdfinfo
|
|
249
|
+
#
|
|
250
|
+
# We have to find an entry such as the following one:
|
|
251
|
+
#
|
|
252
|
+
# Pages: 35
|
|
253
|
+
#
|
|
254
|
+
# ========================================================================= #
|
|
255
|
+
def determine_n_pages_via_pdfinfo(
|
|
256
|
+
i = @this_pdf_file,
|
|
257
|
+
use_this_regex = Regexp.new(/Pages: (.+)/)
|
|
258
|
+
)
|
|
259
|
+
if i.include? 'Pages:'
|
|
260
|
+
i = i.scan(use_this_regex).flatten.first.strip
|
|
261
|
+
end
|
|
262
|
+
set_n_pages(i)
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# ========================================================================= #
|
|
266
|
+
# === report_how_many_pages_were_found
|
|
267
|
+
# ========================================================================= #
|
|
268
|
+
def report_how_many_pages_were_found
|
|
269
|
+
if be_verbose?
|
|
270
|
+
e "#{rev}The pdf-file #{sfile(@this_pdf_file)} has exactly "\
|
|
271
|
+
"#{simp(n_pages?.to_s)} pages."
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
# ========================================================================= #
|
|
276
|
+
# === be_verbose?
|
|
277
|
+
# ========================================================================= #
|
|
278
|
+
def be_verbose?
|
|
279
|
+
@be_verbose
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
# ========================================================================= #
|
|
283
|
+
# === run (run tag)
|
|
284
|
+
# ========================================================================= #
|
|
285
|
+
def run
|
|
286
|
+
return_files_from_the_commandline_arguments.each {|this_pdf_file|
|
|
287
|
+
reset_the_internal_variables
|
|
288
|
+
set_this_pdf_file(this_pdf_file)
|
|
289
|
+
determine_dataset
|
|
290
|
+
determine_n_pages # Determine how many pages are in that .pdf file.
|
|
291
|
+
report_how_many_pages_were_found if be_verbose?
|
|
292
|
+
}
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# ========================================================================= #
|
|
296
|
+
# === PdfFileNTotalPages.return_n_pages
|
|
297
|
+
#
|
|
298
|
+
# This method is silent by default.
|
|
299
|
+
# ========================================================================= #
|
|
300
|
+
def self.return_n_pages(
|
|
301
|
+
of_this_pdf_file,
|
|
302
|
+
&block
|
|
303
|
+
)
|
|
304
|
+
_ = PdfFileNTotalPages.new(of_this_pdf_file, &block)
|
|
305
|
+
_.run
|
|
306
|
+
return _.n_pages.to_i # Ought to return a number, as an integer.
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# ========================================================================= #
|
|
310
|
+
# === PdfParadise::PdfFileNTotalPages[]
|
|
311
|
+
# ========================================================================= #
|
|
312
|
+
def self.[](i)
|
|
313
|
+
new(i).result?
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# =========================================================================== #
|
|
319
|
+
# === PdfParadise.n_pdf_pages?
|
|
320
|
+
#
|
|
321
|
+
# This is the top-level method to find out how many pdf pages are
|
|
322
|
+
# part of the given .pdf file at hand.
|
|
323
|
+
#
|
|
324
|
+
# The first argument to this method is simply the name of the .pdf file
|
|
325
|
+
# or, rather, the path to it.
|
|
326
|
+
#
|
|
327
|
+
# Usage examples:
|
|
328
|
+
#
|
|
329
|
+
# PdfParadise.n_pdf_pages?("/home/x/studium/UNI_WIEN/300260_Immunologie_und_zellul�re_Mikrobiologie_Teil_A/XIV-XVII_combined.pdf")
|
|
330
|
+
# PdfParadise.n_pdf_pages?("/Depot/j/hello-world.pdf")
|
|
331
|
+
#
|
|
332
|
+
# =========================================================================== #
|
|
333
|
+
def self.n_pdf_pages?(of_this_pdf_file)
|
|
334
|
+
PdfFileNTotalPages.return_n_pages(of_this_pdf_file) { :be_quiet }.to_i
|
|
335
|
+
end; self.instance_eval { alias n_pages? n_pdf_pages? } # === PdfParadise.n_pages?
|
|
336
|
+
self.instance_eval { alias n_pages n_pdf_pages? } # === PdfParadise.n_pages
|
|
337
|
+
self.instance_eval { alias n_pdf_pages n_pdf_pages? } # === PdfParadise.n_pdf_pages
|
|
338
|
+
self.instance_eval { alias n_pages_in_this_pdf_file? n_pdf_pages? } # === PdfParadise.n_pages_in_this_pdf_file?
|
|
339
|
+
self.instance_eval { alias n_pages_in_this_pdf_file n_pdf_pages? } # === PdfParadise.n_pages_in_this_pdf_file
|
|
340
|
+
self.instance_eval { alias has_n_pages? n_pdf_pages? } # === PdfParadise.has_n_pages?
|
|
341
|
+
self.instance_eval { alias n_pdf_pages_in_this_pdf_file n_pdf_pages? } # === PdfParadise.n_pdf_pages_in_this_pdf_file
|
|
342
|
+
self.instance_eval { alias n_pdf_pages_in_this_pdf_file? n_pdf_pages? } # === PdfParadise.n_pdf_pages_in_this_pdf_file?
|
|
343
|
+
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
if __FILE__ == $PROGRAM_NAME
|
|
347
|
+
PdfParadise::PdfFileNTotalPages.new(ARGV)
|
|
348
|
+
end # n_pages $UNI_WIEN/300609_Molekulare_Entwicklungsbiologie/Entwicklungsbiologie7.pdf
|