pdf_paradise 0.1.66
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pdf_paradise might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/README.md +705 -0
- data/bin/automatic_pdf_title +7 -0
- data/bin/combine_these_pdf_pages +7 -0
- data/bin/compress_via_hexapdf +7 -0
- data/bin/convert_markdown_to_pdf +7 -0
- data/bin/convert_pdf_to_text +7 -0
- data/bin/delete_first_page_of_this_pdf_file +7 -0
- data/bin/merge_then_open +7 -0
- data/bin/n_pages +10 -0
- data/bin/open_main_pdf +7 -0
- data/bin/pdf_paradise +9 -0
- data/bin/set_main_book +7 -0
- data/bin/set_title_of_this_pdf_file +15 -0
- data/doc/README.gen +662 -0
- data/doc/todo/todo.md +7 -0
- data/lib/pdf_paradise/base/base.rb +239 -0
- data/lib/pdf_paradise/base/colours.rb +36 -0
- data/lib/pdf_paradise/commandline/commandline.rb +101 -0
- data/lib/pdf_paradise/commandline/help.rb +73 -0
- data/lib/pdf_paradise/commandline/menu.rb +142 -0
- data/lib/pdf_paradise/compress/compress_via_hexapdf.rb +27 -0
- data/lib/pdf_paradise/compress_this_pdf_file.rb +87 -0
- data/lib/pdf_paradise/constants/constants.rb +76 -0
- data/lib/pdf_paradise/convert_text_to_pdf.rb +94 -0
- data/lib/pdf_paradise/css/project.css +17 -0
- data/lib/pdf_paradise/djvu_to_pdf.rb +85 -0
- data/lib/pdf_paradise/gui/README.md +6 -0
- data/lib/pdf_paradise/gui/fox/split_pdf_file.rb +77 -0
- data/lib/pdf_paradise/gui/gtk2/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +39 -0
- data/lib/pdf_paradise/gui/gtk2/pdf_viewer/pdf_viewer.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/split_pdf_file/split_pdf_file.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/statistics_widget/statistics_widget.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/to_pdf/to_pdf.rb +32 -0
- data/lib/pdf_paradise/gui/gtk3/controller/controller.rb +212 -0
- data/lib/pdf_paradise/gui/gtk3/convert_pdf_to_text/convert_pdf_to_text.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +39 -0
- data/lib/pdf_paradise/gui/gtk3/pdf_viewer/pdf_viewer.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/split_pdf_file/split_pdf_file.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/statistics_widget/statistics_widget.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/to_pdf/to_pdf.rb +32 -0
- data/lib/pdf_paradise/gui/libui/extract_all_images_from_this_pdf_file/extract_all_images_from_this_pdf_file.rb +223 -0
- data/lib/pdf_paradise/gui/libui/statistics_widget/statistics_widget.rb +233 -0
- data/lib/pdf_paradise/gui/shared_code/convert_pdf_to_text/convert_pdf_to_text_module.rb +277 -0
- data/lib/pdf_paradise/gui/shared_code/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file_module.rb +443 -0
- data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer.css +5 -0
- data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer_module.rb +284 -0
- data/lib/pdf_paradise/gui/shared_code/split_pdf_file/split_pdf_file.css +0 -0
- data/lib/pdf_paradise/gui/shared_code/split_pdf_file/split_pdf_file_module.rb +294 -0
- data/lib/pdf_paradise/gui/shared_code/statistics_widget/statistics_widget_module.rb +349 -0
- data/lib/pdf_paradise/gui/shared_code/to_pdf/to_pdf_module.rb +281 -0
- data/lib/pdf_paradise/hexapdf/001_rainbow_pattern_example.rb +0 -0
- data/lib/pdf_paradise/hexapdf/hexapdf.rb +123 -0
- data/lib/pdf_paradise/images/PDF_PARADISE_LOGO.png +0 -0
- data/lib/pdf_paradise/main_pdf/main_pdf.rb +444 -0
- data/lib/pdf_paradise/merge_pdf/menu.rb +63 -0
- data/lib/pdf_paradise/merge_pdf/merge_pdf.rb +306 -0
- data/lib/pdf_paradise/merge_pdf_namespace.rb +9 -0
- data/lib/pdf_paradise/merge_then_open/merge_then_open.rb +105 -0
- data/lib/pdf_paradise/pdf_file_n_total_pages.rb +249 -0
- data/lib/pdf_paradise/prawn_addons/README.md +2 -0
- data/lib/pdf_paradise/prawn_addons/prawn_addons.rb +17 -0
- data/lib/pdf_paradise/project/project.rb +22 -0
- data/lib/pdf_paradise/remove_pdf_password.rb +391 -0
- data/lib/pdf_paradise/requires/batch_require_toplevel_files.rb +22 -0
- data/lib/pdf_paradise/requires/colours.rb +7 -0
- data/lib/pdf_paradise/requires/colours_and_esystem_and_save_file_and_fileutils_and_opn.rb +11 -0
- data/lib/pdf_paradise/requires/esystem_and_colours.rb +10 -0
- data/lib/pdf_paradise/requires/esystem_and_opn_and_colours.rb +8 -0
- data/lib/pdf_paradise/requires/require_the_whole_project.rb +28 -0
- data/lib/pdf_paradise/requires/require_utility_scripts.rb +9 -0
- data/lib/pdf_paradise/set_main_book.rb +156 -0
- data/lib/pdf_paradise/set_pdf_title.rb +220 -0
- data/lib/pdf_paradise/sinatra/embeddable_interface.rb +318 -0
- data/lib/pdf_paradise/toplevel_methods/automatic_pdf_title.rb +55 -0
- data/lib/pdf_paradise/toplevel_methods/convert_epub_to_pdf.rb +27 -0
- data/lib/pdf_paradise/toplevel_methods/convert_markdown_to_pdf.rb +45 -0
- data/lib/pdf_paradise/toplevel_methods/convert_ppt_to_pdf.rb +35 -0
- data/lib/pdf_paradise/toplevel_methods/e.rb +16 -0
- data/lib/pdf_paradise/toplevel_methods/esystem.rb +19 -0
- data/lib/pdf_paradise/toplevel_methods/misc.rb +76 -0
- data/lib/pdf_paradise/toplevel_methods/number_pages.rb +38 -0
- data/lib/pdf_paradise/toplevel_methods/opened_pdf_files.rb +221 -0
- data/lib/pdf_paradise/toplevel_methods/query_pdf_title.rb +191 -0
- data/lib/pdf_paradise/toplevel_methods/reduce_size_of_this_pdf_file.rb +46 -0
- data/lib/pdf_paradise/toplevel_methods/roebe.rb +17 -0
- data/lib/pdf_paradise/toplevel_methods/rotate_pdf_file.rb +143 -0
- data/lib/pdf_paradise/toplevel_methods/to_pdf.rb +38 -0
- data/lib/pdf_paradise/utility_scripts/README.md +3 -0
- data/lib/pdf_paradise/utility_scripts/combine_these_pdf_pages.rb +118 -0
- data/lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb +175 -0
- data/lib/pdf_paradise/utility_scripts/delete_first_page_of_this_pdf_file.rb +221 -0
- data/lib/pdf_paradise/utility_scripts/delete_last_page_of_this_pdf_file.rb +180 -0
- data/lib/pdf_paradise/utility_scripts/delete_this_page_of_this_pdf_file.rb +329 -0
- data/lib/pdf_paradise/utility_scripts/extract_all_images_from_this_pdf_file.rb +129 -0
- data/lib/pdf_paradise/utility_scripts/extract_pdf_page.rb +283 -0
- data/lib/pdf_paradise/utility_scripts/pdf_optimizer.rb +111 -0
- data/lib/pdf_paradise/utility_scripts/pdf_statistics.rb +148 -0
- data/lib/pdf_paradise/utility_scripts/pdf_to_html.rb +75 -0
- data/lib/pdf_paradise/utility_scripts/remove_images.rb +110 -0
- data/lib/pdf_paradise/utility_scripts/split_pdf.rb +340 -0
- data/lib/pdf_paradise/utility_scripts/to_qdf.rb +82 -0
- data/lib/pdf_paradise/version/version.rb +19 -0
- data/lib/pdf_paradise/www/README.md +2 -0
- data/lib/pdf_paradise/www/sinatra/app.rb +276 -0
- data/lib/pdf_paradise/yaml/working_on_these_pdf_files.yml +4 -0
- data/lib/pdf_paradise.rb +5 -0
- data/pdf_paradise.gemspec +61 -0
- data/test/testing_pdf_paradise.rb +9 -0
- metadata +219 -0
@@ -0,0 +1,143 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# === PdfParadise::RotatePdfFile
|
6
|
+
#
|
7
|
+
# Usage example:
|
8
|
+
#
|
9
|
+
# PdfParadise::RotatePdfFile.new(ARGV)
|
10
|
+
#
|
11
|
+
# =========================================================================== #
|
12
|
+
# require 'pdf_paradise/toplevel_methods/rotate_pdf_file.rb'
|
13
|
+
# =========================================================================== #
|
14
|
+
require 'pdf_paradise/base/base.rb'
|
15
|
+
|
16
|
+
module PdfParadise
|
17
|
+
|
18
|
+
class RotatePdfFile < Base # === PdfParadise::RotatePdfFile
|
19
|
+
|
20
|
+
# ========================================================================= #
|
21
|
+
# === NAMESPACE
|
22
|
+
# ========================================================================= #
|
23
|
+
NAMESPACE = inspect
|
24
|
+
|
25
|
+
# ========================================================================= #
|
26
|
+
# === ROTATE_BY_N_DEGREES
|
27
|
+
# ========================================================================= #
|
28
|
+
ROTATE_BY_N_DEGREES = 90
|
29
|
+
|
30
|
+
# ========================================================================= #
|
31
|
+
# === ROTATE_REGEX
|
32
|
+
# ========================================================================= #
|
33
|
+
ROTATE_REGEX = /(--rotate (\d+))/ # URL: http://rubular.com/r/ds3tk7DPPw
|
34
|
+
|
35
|
+
# ========================================================================= #
|
36
|
+
# === initialize
|
37
|
+
# ========================================================================= #
|
38
|
+
def initialize(
|
39
|
+
i = nil,
|
40
|
+
run_already = true
|
41
|
+
)
|
42
|
+
reset
|
43
|
+
set_input(i)
|
44
|
+
run if run_already
|
45
|
+
end
|
46
|
+
|
47
|
+
# ========================================================================= #
|
48
|
+
# === reset (reset tag)
|
49
|
+
# ========================================================================= #
|
50
|
+
def reset
|
51
|
+
set_rotate
|
52
|
+
end
|
53
|
+
|
54
|
+
# ========================================================================= #
|
55
|
+
# === set_rotate
|
56
|
+
# ========================================================================= #
|
57
|
+
def set_rotate(i = ROTATE_BY_N_DEGREES)
|
58
|
+
@rotate_by_n_degrees = i.to_s
|
59
|
+
end
|
60
|
+
|
61
|
+
# ========================================================================= #
|
62
|
+
# === set_input
|
63
|
+
# ========================================================================= #
|
64
|
+
def set_input(i = '')
|
65
|
+
i = i.join(' ').strip if i.is_a? Array
|
66
|
+
i = i.to_s.dup
|
67
|
+
if i.start_with? 'file://'
|
68
|
+
i.sub!(/^file:\/\//,'')
|
69
|
+
end
|
70
|
+
case i
|
71
|
+
# ========================================================================= #
|
72
|
+
# === --help
|
73
|
+
# ========================================================================= #
|
74
|
+
when 'HELP',/-?-?help$/,'' # '' means empty.
|
75
|
+
opnn; e 'The available options are:'
|
76
|
+
opnn; eparse ' --rotate N # rotate by n degrees (into '\
|
77
|
+
'clockwise direction. Example: rotatepdf --rotate 90 foo.pdf)'
|
78
|
+
exit
|
79
|
+
# ======================================================================= #
|
80
|
+
# We add support for --rotate next. When there is a match, we
|
81
|
+
# will get rid of that part of the input.
|
82
|
+
# ======================================================================= #
|
83
|
+
when ROTATE_REGEX, /--rotate/ # Usage: rotate --rotate 100 5-Kapitel_5_AllgMikrobiologie_SS2015_students_2S.pdf
|
84
|
+
i =~ ROTATE_REGEX
|
85
|
+
set_rotate($2.to_s.dup)
|
86
|
+
i.gsub!(/#{$1.to_s}/, '')
|
87
|
+
end unless File.exist? i
|
88
|
+
i = i.to_s.strip
|
89
|
+
@input = i
|
90
|
+
end
|
91
|
+
|
92
|
+
# ========================================================================= #
|
93
|
+
# === input?
|
94
|
+
# ========================================================================= #
|
95
|
+
def input?
|
96
|
+
@input
|
97
|
+
end
|
98
|
+
|
99
|
+
# ========================================================================= #
|
100
|
+
# === output_file?
|
101
|
+
# ========================================================================= #
|
102
|
+
def output_file?
|
103
|
+
_ = File.basename(input?).gsub(/\.pdf$/,'')
|
104
|
+
_.tr(' ','') if _.include? ' '
|
105
|
+
'rotated_pdf_file'+_+'.pdf'
|
106
|
+
end
|
107
|
+
|
108
|
+
# ========================================================================= #
|
109
|
+
# === opnn
|
110
|
+
# ========================================================================= #
|
111
|
+
def opnn
|
112
|
+
Opn.opn(namespace: NAMESPACE)
|
113
|
+
end
|
114
|
+
|
115
|
+
# ========================================================================= #
|
116
|
+
# === rotate_by_n_degrees?
|
117
|
+
# ========================================================================= #
|
118
|
+
def rotate_by_n_degrees?
|
119
|
+
@rotate_by_n_degrees
|
120
|
+
end; alias rotate? rotate_by_n_degrees? # === rotate?
|
121
|
+
|
122
|
+
# ========================================================================= #
|
123
|
+
# === add_quality
|
124
|
+
# ========================================================================= #
|
125
|
+
def add_quality
|
126
|
+
return '-quality 100 '
|
127
|
+
end
|
128
|
+
|
129
|
+
# ========================================================================= #
|
130
|
+
# === run (run tag)
|
131
|
+
# ========================================================================= #
|
132
|
+
def run
|
133
|
+
_ = 'convert '+input?+' -rotate '+rotate_by_n_degrees?+' '+add_quality+output_file?
|
134
|
+
esystem _
|
135
|
+
e 'Finished converting! The file should now reside at:'
|
136
|
+
e sfile(' '+output_file?)
|
137
|
+
end
|
138
|
+
|
139
|
+
end; end
|
140
|
+
|
141
|
+
if __FILE__ == $PROGRAM_NAME
|
142
|
+
PdfParadise::RotatePdfFile.new(ARGV)
|
143
|
+
end # rotatepdf
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# require 'pdf_paradise/toplevel_methods/to_pdf.rb'
|
6
|
+
# PdfParadise.to_pdf
|
7
|
+
# =========================================================================== #
|
8
|
+
module PdfParadise
|
9
|
+
|
10
|
+
require 'pdf_paradise/toplevel_methods/e.rb'
|
11
|
+
|
12
|
+
# ========================================================================= #
|
13
|
+
# === PdfParadise.to_pdf
|
14
|
+
#
|
15
|
+
# This method can, by using soffice, convert a file such as .docx into
|
16
|
+
# the corresponding .pdf file - all on the commandline.
|
17
|
+
# ========================================================================= #
|
18
|
+
def self.to_pdf(
|
19
|
+
these_pdf_files = ARGV
|
20
|
+
)
|
21
|
+
[these_pdf_files].flatten.compact.map {|this_pdf_file|
|
22
|
+
if File.exist? this_pdf_file
|
23
|
+
_ = 'soffice --headless --invisible --convert-to pdf '+this_pdf_file
|
24
|
+
e _
|
25
|
+
system _
|
26
|
+
this_pdf_file # Return the path here, in case it has to be used.
|
27
|
+
else
|
28
|
+
e "No file exists at `#{this_pdf_file}.`"
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
if __FILE__ == $PROGRAM_NAME
|
37
|
+
PdfParadise.to_pdf(ARGV)
|
38
|
+
end # to_pdf
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# === PdfParadise::CombineThesePdfPages
|
6
|
+
# =========================================================================== #
|
7
|
+
# require 'pdf_paradise/utility_scripts/combine_these_pdf_pages.rb'
|
8
|
+
# =========================================================================== #
|
9
|
+
require 'pdf_paradise/base/base.rb'
|
10
|
+
|
11
|
+
module PdfParadise
|
12
|
+
|
13
|
+
class CombineThesePdfPages < PdfParadise::Base # === PdfParadise::CombineThesePdfPages
|
14
|
+
|
15
|
+
require 'pdf_paradise/utility_scripts/split_pdf.rb'
|
16
|
+
|
17
|
+
# ========================================================================= #
|
18
|
+
# === NAMESPACE
|
19
|
+
# ========================================================================= #
|
20
|
+
NAMESPACE = inspect
|
21
|
+
|
22
|
+
# ========================================================================= #
|
23
|
+
# === initialize
|
24
|
+
# ========================================================================= #
|
25
|
+
def initialize(
|
26
|
+
commandline_arguments = ARGV,
|
27
|
+
run_already = true
|
28
|
+
)
|
29
|
+
reset
|
30
|
+
set_commandline_arguments(
|
31
|
+
commandline_arguments
|
32
|
+
)
|
33
|
+
run if run_already
|
34
|
+
end
|
35
|
+
|
36
|
+
# ========================================================================= #
|
37
|
+
# === reset (reset tag)
|
38
|
+
# ========================================================================= #
|
39
|
+
def reset
|
40
|
+
super()
|
41
|
+
end
|
42
|
+
|
43
|
+
# ========================================================================= #
|
44
|
+
# === opnn
|
45
|
+
# ========================================================================= #
|
46
|
+
def opnn
|
47
|
+
super(NAMESPACE)
|
48
|
+
end
|
49
|
+
|
50
|
+
# ========================================================================= #
|
51
|
+
# === run (run tag)
|
52
|
+
# ========================================================================= #
|
53
|
+
def run
|
54
|
+
name_of_the_pdf_file = File.absolute_path(first_argument?)
|
55
|
+
unless File.exist? name_of_the_pdf_file
|
56
|
+
e 'Please provide a path to an existing .pdf file.'
|
57
|
+
return
|
58
|
+
end
|
59
|
+
_ = log_dir?
|
60
|
+
mkdir(_) unless File.directory? _
|
61
|
+
cd _
|
62
|
+
if File.exist? name_of_the_pdf_file
|
63
|
+
unless File.exist? File.absolute_path(File.basename(name_of_the_pdf_file))
|
64
|
+
e 'Copying the file '+name_of_the_pdf_file+'.'
|
65
|
+
copy_file(name_of_the_pdf_file)
|
66
|
+
end
|
67
|
+
if File.exist? File.basename(name_of_the_pdf_file)
|
68
|
+
name_of_the_pdf_file = File.absolute_path(File.basename(name_of_the_pdf_file))
|
69
|
+
end
|
70
|
+
end
|
71
|
+
these_pages_must_be_kept = commandline_arguments?[1]
|
72
|
+
# Must be converted into an array.
|
73
|
+
if these_pages_must_be_kept.include? ','
|
74
|
+
these_pages_must_be_kept = these_pages_must_be_kept.split(',')
|
75
|
+
else
|
76
|
+
these_pages_must_be_kept = [these_pages_must_be_kept]
|
77
|
+
end
|
78
|
+
these_pages_must_be_kept.map! {|entry| entry.to_i }
|
79
|
+
begin
|
80
|
+
require 'hexapdf'
|
81
|
+
pdf = HexaPDF::Document.open(name_of_the_pdf_file)
|
82
|
+
new_pdf = HexaPDF::Document.new
|
83
|
+
pdf.pages.each_with_index { |page, index| index += 1
|
84
|
+
if these_pages_must_be_kept.include? index
|
85
|
+
e 'Copying page page number: '+
|
86
|
+
steelblue(index.to_s)
|
87
|
+
new_pdf.pages << new_pdf.import(page)
|
88
|
+
end
|
89
|
+
}
|
90
|
+
@output_file_to_use = 'output.pdf'
|
91
|
+
@output_file_to_use = File.absolute_path(@output_file_to_use)
|
92
|
+
delete_file(@output_file_to_use) if File.exist? @output_file_to_use
|
93
|
+
e 'Storing into `'+@output_file_to_use+'`.'
|
94
|
+
new_pdf.write(
|
95
|
+
@output_file_to_use,
|
96
|
+
validate: false,
|
97
|
+
optimize: true
|
98
|
+
)
|
99
|
+
rescue LoadError
|
100
|
+
e 'HexaPDF is not available. Please install it via:'
|
101
|
+
e
|
102
|
+
e ' gem install hexapdf'
|
103
|
+
e
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# ========================================================================= #
|
108
|
+
# === stored_where?
|
109
|
+
# ========================================================================= #
|
110
|
+
def stored_where?
|
111
|
+
@output_file_to_use
|
112
|
+
end; alias output_file_to_use? stored_where? # === output_file_to_use?
|
113
|
+
|
114
|
+
end; end
|
115
|
+
|
116
|
+
if __FILE__ == $PROGRAM_NAME
|
117
|
+
PdfParadise::CombineThesePdfPages.new(ARGV)
|
118
|
+
end # combinethesepdfpages
|
@@ -0,0 +1,175 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# === PdfParadise::ConvertPdfToText
|
6
|
+
#
|
7
|
+
# This file can convert one or several .pdf files to text files.
|
8
|
+
#
|
9
|
+
# Usage examples:
|
10
|
+
#
|
11
|
+
# PdfParadise::ConvertPdfToText.new(ARGV)
|
12
|
+
#
|
13
|
+
# =========================================================================== #
|
14
|
+
# require 'pdf_paradise/utility_scripts/convert_pdf_to_text.rb'
|
15
|
+
# PdfParadise::ConvertPdfToText.new(ARGV)
|
16
|
+
# =========================================================================== #
|
17
|
+
require 'pdf_paradise/base/base.rb'
|
18
|
+
|
19
|
+
module PdfParadise
|
20
|
+
|
21
|
+
class ConvertPdfToText < ::PdfParadise::Base # === PdfParadise::ConvertPdfToText
|
22
|
+
|
23
|
+
# ========================================================================= #
|
24
|
+
# === NAMESPACE
|
25
|
+
# ========================================================================= #
|
26
|
+
NAMESPACE = inspect
|
27
|
+
|
28
|
+
# ========================================================================= #
|
29
|
+
# === initialize
|
30
|
+
# ========================================================================= #
|
31
|
+
def initialize(
|
32
|
+
i = ARGV,
|
33
|
+
run_already = true
|
34
|
+
)
|
35
|
+
reset
|
36
|
+
# ======================================================================= #
|
37
|
+
# Next designate which files are to be converted.
|
38
|
+
# ======================================================================= #
|
39
|
+
set_convert_these(i)
|
40
|
+
run if run_already
|
41
|
+
end
|
42
|
+
|
43
|
+
# ========================================================================= #
|
44
|
+
# === reset
|
45
|
+
# ========================================================================= #
|
46
|
+
def reset
|
47
|
+
super()
|
48
|
+
end
|
49
|
+
|
50
|
+
# ========================================================================= #
|
51
|
+
# === get_all_pdf_files_from_current_directory
|
52
|
+
#
|
53
|
+
# This method will obtain all pdf files from the current directory.
|
54
|
+
# ========================================================================= #
|
55
|
+
def get_all_pdf_files_from_current_directory
|
56
|
+
return Dir['*.pdf']
|
57
|
+
end
|
58
|
+
|
59
|
+
# ========================================================================= #
|
60
|
+
# === set_convert_these
|
61
|
+
#
|
62
|
+
# This method will preferentially try to work on only .pdf files.
|
63
|
+
# ========================================================================= #
|
64
|
+
def set_convert_these(
|
65
|
+
i = return_pwd
|
66
|
+
)
|
67
|
+
i = return_pwd if i.nil?
|
68
|
+
if i.is_a? String
|
69
|
+
if i.empty?
|
70
|
+
i = get_all_pdf_files_from_current_directory()
|
71
|
+
elsif File.directory? i
|
72
|
+
i = get_all_pdf_files_from_current_directory()
|
73
|
+
end
|
74
|
+
elsif i.is_a? Array
|
75
|
+
i = get_all_pdf_files_from_current_directory() if i.empty?
|
76
|
+
end
|
77
|
+
i = [i] unless i.is_a? Array
|
78
|
+
@convert_these = i # This must be an Array.
|
79
|
+
end
|
80
|
+
|
81
|
+
# ========================================================================= #
|
82
|
+
# === start_conversion
|
83
|
+
# ========================================================================= #
|
84
|
+
def start_conversion
|
85
|
+
@convert_these.each {|pdf| convert_this_pdf(pdf) }
|
86
|
+
end
|
87
|
+
|
88
|
+
# ========================================================================= #
|
89
|
+
# === opnn
|
90
|
+
# ========================================================================= #
|
91
|
+
def opnn
|
92
|
+
super(NAMESPACE)
|
93
|
+
end
|
94
|
+
|
95
|
+
# ========================================================================= #
|
96
|
+
# === convert_this_pdf
|
97
|
+
#
|
98
|
+
# This will make use of the binary called "pdftotext".
|
99
|
+
# ========================================================================= #
|
100
|
+
def convert_this_pdf(
|
101
|
+
i = @convert_these
|
102
|
+
)
|
103
|
+
if i.is_a? Array
|
104
|
+
i.each {|entry| convert_this_pdf(entry) }
|
105
|
+
else
|
106
|
+
if File.exist? i
|
107
|
+
opnn; e "Now converting `#{sfile(i)}` via #{simp('pdftotext')}."
|
108
|
+
cmdline_to_use = 'pdftotext "'+i+'"' # We use "".
|
109
|
+
@output_file = i.gsub(/#{File.extname(i)}/, '')+'.txt'
|
110
|
+
system(cmdline_to_use)
|
111
|
+
opnn; e "Storing into the file `#{sfile(@output_file)}`."
|
112
|
+
else
|
113
|
+
unless i.start_with? '--'
|
114
|
+
opnn; e 'No file called '+sfile(i)+' could be found.'
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end; alias do_convert convert_this_pdf # === do_convert
|
119
|
+
|
120
|
+
# ========================================================================= #
|
121
|
+
# === output_file?
|
122
|
+
# ========================================================================= #
|
123
|
+
def output_file?
|
124
|
+
@output_file
|
125
|
+
end
|
126
|
+
|
127
|
+
# ========================================================================= #
|
128
|
+
# === menu (menu tag)
|
129
|
+
# ========================================================================= #
|
130
|
+
def menu(
|
131
|
+
i = @convert_these
|
132
|
+
)
|
133
|
+
if i.is_a? Array
|
134
|
+
i.each {|entry| menu(entry) }
|
135
|
+
else
|
136
|
+
case i
|
137
|
+
# ===================================================================== #
|
138
|
+
# === cpdf --gui
|
139
|
+
# ===================================================================== #
|
140
|
+
when /^-?-?gui/i
|
141
|
+
do_start_the_GUI_interface
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# ========================================================================= #
|
147
|
+
# === do_start_the_GUI_interface
|
148
|
+
# ========================================================================= #
|
149
|
+
def do_start_the_GUI_interface
|
150
|
+
require 'pdf_paradise/gui/gtk3/convert_pdf_to_text/convert_pdf_to_text.rb'
|
151
|
+
::PdfParadise::GUI::Gtk::ConvertPdfToText.run
|
152
|
+
end
|
153
|
+
|
154
|
+
# ========================================================================= #
|
155
|
+
# === run (run tag)
|
156
|
+
# ========================================================================= #
|
157
|
+
def run
|
158
|
+
menu
|
159
|
+
start_conversion
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
|
164
|
+
# =========================================================================== #
|
165
|
+
# === PdfParadise.pdf_to_text
|
166
|
+
# =========================================================================== #
|
167
|
+
def self.pdf_to_text(i = ARGV)
|
168
|
+
PdfParadise::ConvertPdfToText.new(i)
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
|
173
|
+
if __FILE__ == $PROGRAM_NAME
|
174
|
+
PdfParadise::ConvertPdfToText.new(ARGV)
|
175
|
+
end # cpdf foobar.pdf
|
@@ -0,0 +1,221 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
# Encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
# =========================================================================== #
|
5
|
+
# === PdfParadise::DeleteFirstPageOfThisPdfFile
|
6
|
+
#
|
7
|
+
# This class will accept one (or several) .pdf files and then proceed
|
8
|
+
# to remove the very first .pdf page in that file. So if that pdf-file
|
9
|
+
# has had 100 pages, we will end up with 99 pages, and the first page
|
10
|
+
# will be gone.
|
11
|
+
#
|
12
|
+
# By default, the class here will create a new output file and leave
|
13
|
+
# the original file untouched.
|
14
|
+
#
|
15
|
+
# Note that the functionality made available through this class here
|
16
|
+
# depends on either the binary called "qpdf" or on the binary called
|
17
|
+
# "pdftk".
|
18
|
+
# =========================================================================== #
|
19
|
+
# require 'pdf_paradise/utility_scripts/delete_first_page_of_this_pdf_file.rb'
|
20
|
+
# PdfParadise::DeleteFirstPageOfThisPdfFile.new(ARGV)
|
21
|
+
# =========================================================================== #
|
22
|
+
require 'pdf_paradise/base/base.rb'
|
23
|
+
|
24
|
+
module PdfParadise
|
25
|
+
|
26
|
+
class DeleteFirstPageOfThisPdfFile < Base
|
27
|
+
|
28
|
+
require 'pdf_paradise/pdf_file_n_total_pages.rb'
|
29
|
+
|
30
|
+
# ========================================================================= #
|
31
|
+
# === NAMESPACE
|
32
|
+
# ========================================================================= #
|
33
|
+
NAMESPACE = inspect
|
34
|
+
|
35
|
+
# ========================================================================= #
|
36
|
+
# === SHALL_WE_OVERWRITE_THE_ORIGINAL_PDF_FILE
|
37
|
+
# ========================================================================= #
|
38
|
+
SHALL_WE_OVERWRITE_THE_ORIGINAL_PDF_FILE = true
|
39
|
+
|
40
|
+
# ========================================================================= #
|
41
|
+
# === initialize
|
42
|
+
# ========================================================================= #
|
43
|
+
def initialize(
|
44
|
+
input_files = ARGV,
|
45
|
+
run_already = true
|
46
|
+
)
|
47
|
+
reset
|
48
|
+
set_input_files(
|
49
|
+
input_files
|
50
|
+
)
|
51
|
+
run if run_already
|
52
|
+
end
|
53
|
+
|
54
|
+
# ========================================================================= #
|
55
|
+
# === reset
|
56
|
+
# ========================================================================= #
|
57
|
+
def reset
|
58
|
+
super()
|
59
|
+
# ======================================================================= #
|
60
|
+
# === @namespace
|
61
|
+
# ======================================================================= #
|
62
|
+
@namespace = NAMESPACE
|
63
|
+
# ======================================================================= #
|
64
|
+
# === @use_this_pdf_application
|
65
|
+
#
|
66
|
+
# The following variable may be :hexapdf, :pdftk or :qpdf.
|
67
|
+
#
|
68
|
+
# It specifies which application can be used for deleting .pdf pages.
|
69
|
+
# ======================================================================= #
|
70
|
+
@use_this_pdf_application = :qpdf # :hexapdf # :pdftk
|
71
|
+
end
|
72
|
+
|
73
|
+
# ========================================================================= #
|
74
|
+
# === use_hexapdf?
|
75
|
+
# ========================================================================= #
|
76
|
+
def use_hexapdf?
|
77
|
+
@use_this_pdf_application == :hexapdf
|
78
|
+
end
|
79
|
+
|
80
|
+
# ========================================================================= #
|
81
|
+
# === set_input_files
|
82
|
+
# ========================================================================= #
|
83
|
+
def set_input_files(i)
|
84
|
+
i = [i] unless i.is_a? Array
|
85
|
+
i.map! {|entry|
|
86
|
+
if entry.include? '*'
|
87
|
+
entry = Dir[entry]
|
88
|
+
end
|
89
|
+
entry
|
90
|
+
}
|
91
|
+
i.flatten!
|
92
|
+
# ======================================================================= #
|
93
|
+
# Next, if i is empty, and we are on Roebe, we will use the default
|
94
|
+
# book, if a certain ENV variable exists AND if the file that points
|
95
|
+
# to that location also exists.
|
96
|
+
# ======================================================================= #
|
97
|
+
if i.empty? and is_on_roebe?
|
98
|
+
if ENV.has_key?('MAIN_BOOK') and
|
99
|
+
File.exist?(ENV['MAIN_BOOK'])
|
100
|
+
this_file = ENV['MAIN_BOOK'].dup
|
101
|
+
e "#{rev}As we are on roebe, we will use this file:"
|
102
|
+
e
|
103
|
+
e sfile(" #{this_file}")
|
104
|
+
e
|
105
|
+
i << this_file
|
106
|
+
end
|
107
|
+
end
|
108
|
+
@input_files = i
|
109
|
+
sanitize_input_files
|
110
|
+
end
|
111
|
+
|
112
|
+
# ========================================================================= #
|
113
|
+
# === sanitize_input_files
|
114
|
+
# ========================================================================= #
|
115
|
+
def sanitize_input_files
|
116
|
+
unless @input_files.empty?
|
117
|
+
# ===================================================================== #
|
118
|
+
# Select only .pdf files next.
|
119
|
+
# ===================================================================== #
|
120
|
+
@input_files.select! {|entry|
|
121
|
+
entry.end_with? '.pdf'
|
122
|
+
}
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# ========================================================================= #
|
127
|
+
# === process_each_pdf
|
128
|
+
# ========================================================================= #
|
129
|
+
def process_each_pdf
|
130
|
+
@input_files.each {|this_pdf_file|
|
131
|
+
# ===================================================================== #
|
132
|
+
# First, we must find out how many pdf pages are in the given
|
133
|
+
# pdf file at hand.
|
134
|
+
# ===================================================================== #
|
135
|
+
if File.exist? this_pdf_file
|
136
|
+
# ===================================================================== #
|
137
|
+
# Store how many pages this .pdf file has.
|
138
|
+
# ===================================================================== #
|
139
|
+
has_n_pages = PdfParadise.n_pages_in_this_pdf_file?(this_pdf_file)
|
140
|
+
if this_pdf_file.include? ' '
|
141
|
+
this_pdf_file = '"'+this_pdf_file+'"'
|
142
|
+
end
|
143
|
+
opnn; e 'Now working on the .pdf file '+sfancy(this_pdf_file)
|
144
|
+
if use_pdftk?
|
145
|
+
_ = 'pdftk '+this_pdf_file+' cat 2-'+(has_n_pages.to_i).to_s+' '+
|
146
|
+
has_n_pages.to_s+'-end output '+
|
147
|
+
name_of_the_output_file?(this_pdf_file)
|
148
|
+
elsif use_hexapdf?
|
149
|
+
_ = 'hexapdf modify '+this_pdf_file+' -i 2-e '+
|
150
|
+
name_of_the_output_file?(this_pdf_file)
|
151
|
+
else # else we use qpdf
|
152
|
+
_ = 'qpdf --pages '+this_pdf_file+' 2-'+(has_n_pages.to_i).to_s+' -- '+
|
153
|
+
this_pdf_file+' '+name_of_the_output_file?(this_pdf_file)
|
154
|
+
end
|
155
|
+
esystem _
|
156
|
+
if SHALL_WE_OVERWRITE_THE_ORIGINAL_PDF_FILE
|
157
|
+
# ================================================================= #
|
158
|
+
# In this case we will overwrite the original .pdf file.
|
159
|
+
# ================================================================= #
|
160
|
+
mv(
|
161
|
+
name_of_the_output_file?(this_pdf_file),
|
162
|
+
this_pdf_file
|
163
|
+
)
|
164
|
+
end
|
165
|
+
else
|
166
|
+
opnn; no_file_exists_at(this_pdf_file)
|
167
|
+
end
|
168
|
+
}
|
169
|
+
end
|
170
|
+
|
171
|
+
# ========================================================================= #
|
172
|
+
# === use_pdftk?
|
173
|
+
# ========================================================================= #
|
174
|
+
def use_pdftk?
|
175
|
+
@use_this_pdf_application == :pdftk
|
176
|
+
end
|
177
|
+
|
178
|
+
# ========================================================================= #
|
179
|
+
# === input?
|
180
|
+
# ========================================================================= #
|
181
|
+
def input?
|
182
|
+
@input_files
|
183
|
+
end
|
184
|
+
|
185
|
+
# ========================================================================= #
|
186
|
+
# === name_of_the_output_file?
|
187
|
+
#
|
188
|
+
# This method will determine the output file of the .pdf file.
|
189
|
+
# ========================================================================= #
|
190
|
+
def name_of_the_output_file?(i)
|
191
|
+
i.sub(/\.pdf$/,'')+'_output_file.pdf'
|
192
|
+
end; alias output? name_of_the_output_file? # === output?
|
193
|
+
|
194
|
+
# ========================================================================= #
|
195
|
+
# === run
|
196
|
+
# ========================================================================= #
|
197
|
+
def run
|
198
|
+
process_each_pdf
|
199
|
+
end
|
200
|
+
|
201
|
+
end
|
202
|
+
|
203
|
+
# =========================================================================== #
|
204
|
+
# === PdfParadise.delete_first_page_of_this_pdf_file
|
205
|
+
#
|
206
|
+
# Easier method-way to invoke the above class.
|
207
|
+
# =========================================================================== #
|
208
|
+
def self.delete_first_page_of_this_pdf_file(i = ARGV)
|
209
|
+
PdfParadise::DeleteFirstPageOfThisPdfFile.new(i)
|
210
|
+
end; self.instance_eval {
|
211
|
+
alias remove_the_first_page_of_this_pdf_file delete_first_page_of_this_pdf_file
|
212
|
+
} # === PdfParadise.remove_the_first_page_of_this_pdf_file
|
213
|
+
self.instance_eval {
|
214
|
+
alias remove_first_page_of_this_pdf_file delete_first_page_of_this_pdf_file
|
215
|
+
} # === PdfParadise.remove_first_page_of_this_pdf_file
|
216
|
+
|
217
|
+
end
|
218
|
+
|
219
|
+
if __FILE__ == $PROGRAM_NAME
|
220
|
+
PdfParadise::DeleteFirstPageOfThisPdfFile.new(ARGV)
|
221
|
+
end # delete_first_page_of_this_pdf_file
|