pdf_paradise 0.3.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +933 -0
- data/bin/automatic_pdf_title +7 -0
- data/bin/burst_this_pdf_file +7 -0
- data/bin/combine_these_pdf_pages +7 -0
- data/bin/compress_via_hexapdf +7 -0
- data/bin/convert_markdown_to_pdf +7 -0
- data/bin/convert_pdf_to_text +7 -0
- data/bin/delete_first_page_of_this_pdf_file +7 -0
- data/bin/djvu_to_pdf +7 -0
- data/bin/merge_then_open +7 -0
- data/bin/n_pages +10 -0
- data/bin/open_main_pdf +7 -0
- data/bin/pdf_paradise +9 -0
- data/bin/rotate_pdf +7 -0
- data/bin/set_main_book +7 -0
- data/bin/set_title_of_this_pdf_file +15 -0
- data/doc/README.gen +871 -0
- data/doc/todo/todo.md +13 -0
- data/images/Logo_for_the_pdf_paradise_project.avif +0 -0
- data/lib/pdf_paradise/base/base.rb +344 -0
- data/lib/pdf_paradise/base/colours.rb +67 -0
- data/lib/pdf_paradise/colours/colours.rb +27 -0
- data/lib/pdf_paradise/commandline/commandline.rb +109 -0
- data/lib/pdf_paradise/commandline/help.rb +77 -0
- data/lib/pdf_paradise/commandline/menu.rb +173 -0
- data/lib/pdf_paradise/compress/compress_this_pdf_file.rb +108 -0
- data/lib/pdf_paradise/compress/compress_via_hexapdf.rb +27 -0
- data/lib/pdf_paradise/compress/compress_via_qpdf.rb +32 -0
- data/lib/pdf_paradise/constants/constants.rb +76 -0
- data/lib/pdf_paradise/convert_text_to_pdf.rb +94 -0
- data/lib/pdf_paradise/css/project.css +17 -0
- data/lib/pdf_paradise/fpdf/README.md +2 -0
- data/lib/pdf_paradise/fpdf/bookmark.rb +129 -0
- data/lib/pdf_paradise/fpdf/chinese.rb +454 -0
- data/lib/pdf_paradise/fpdf/fpdf.rb +1902 -0
- data/lib/pdf_paradise/fpdf/fpdf_eps.rb +138 -0
- data/lib/pdf_paradise/fpdf/makefont.rb +1794 -0
- data/lib/pdf_paradise/gui/README.md +6 -0
- data/lib/pdf_paradise/gui/fox/split_pdf_file.rb +77 -0
- data/lib/pdf_paradise/gui/gtk2/pdf_viewer/pdf_viewer.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/split_pdf_file/split_pdf_file.rb +34 -0
- data/lib/pdf_paradise/gui/gtk2/statistics_widget/statistics_widget.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/controller/controller.rb +214 -0
- data/lib/pdf_paradise/gui/gtk3/pdf_viewer/pdf_viewer.rb +34 -0
- data/lib/pdf_paradise/gui/gtk3/split_pdf_file/split_pdf_file.rb +34 -0
- data/lib/pdf_paradise/gui/jruby/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +167 -0
- data/lib/pdf_paradise/gui/jruby/remove_the_first_page_of_this_pdf_file/remove_the_first_page_of_this_pdf_file.rb +103 -0
- data/lib/pdf_paradise/gui/libui/extract_all_images_from_this_pdf_file/extract_all_images_from_this_pdf_file.rb +223 -0
- data/lib/pdf_paradise/gui/libui/remove_the_first_page_of_this_pdf_file/remove_the_first_page_of_this_pdf_file.rb +267 -0
- data/lib/pdf_paradise/gui/libui/rotate_pdf_file/rotate_pdf_file.rb +219 -0
- data/lib/pdf_paradise/gui/libui/statistics_widget/statistics_widget.rb +233 -0
- data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer.css +5 -0
- data/lib/pdf_paradise/gui/shared_code/pdf_viewer/pdf_viewer_module.rb +287 -0
- data/lib/pdf_paradise/gui/shared_code/remove_the_first_page_of_this_pdf_file_module/remove_the_first_page_of_this_pdf_file_module.rb +31 -0
- data/lib/pdf_paradise/gui/shared_code/split_pdf_file/split_pdf_file_module.rb +295 -0
- data/lib/pdf_paradise/gui/universal_widgets/convert_pdf_to_text/convert_pdf_to_text.rb +366 -0
- data/lib/pdf_paradise/gui/universal_widgets/delete_the_first_or_the_last_page_of_this_pdf_file/delete_the_first_or_the_last_page_of_this_pdf_file.rb +776 -0
- data/lib/pdf_paradise/gui/universal_widgets/statistics_widget/statistics_widget.rb +407 -0
- data/lib/pdf_paradise/gui/universal_widgets/to_pdf/to_pdf.rb +351 -0
- data/lib/pdf_paradise/hexapdf/001_rainbow_pattern_example.rb +0 -0
- data/lib/pdf_paradise/hexapdf/hexapdf.rb +123 -0
- data/lib/pdf_paradise/images/PDF_PARADISE_LOGO.png +0 -0
- data/lib/pdf_paradise/main_pdf/main_pdf.rb +474 -0
- data/lib/pdf_paradise/merge_pdf/menu.rb +63 -0
- data/lib/pdf_paradise/merge_pdf/merge_pdf.rb +307 -0
- data/lib/pdf_paradise/merge_pdf_namespace.rb +9 -0
- data/lib/pdf_paradise/merge_then_open/merge_then_open.rb +105 -0
- data/lib/pdf_paradise/prawn_addons/README.md +2 -0
- data/lib/pdf_paradise/prawn_addons/prawn_addons.rb +17 -0
- data/lib/pdf_paradise/project/project.rb +22 -0
- data/lib/pdf_paradise/remove_pdf_password.rb +391 -0
- data/lib/pdf_paradise/requires/batch_require_toplevel_files.rb +22 -0
- data/lib/pdf_paradise/requires/colours.rb +11 -0
- data/lib/pdf_paradise/requires/colours_and_esystem_and_save_file_and_fileutils_and_opn.rb +13 -0
- data/lib/pdf_paradise/requires/esystem_and_colours.rb +11 -0
- data/lib/pdf_paradise/requires/esystem_and_opn_and_colours.rb +10 -0
- data/lib/pdf_paradise/requires/require_the_whole_project.rb +30 -0
- data/lib/pdf_paradise/requires/require_utility_scripts.rb +9 -0
- data/lib/pdf_paradise/set_main_book.rb +156 -0
- data/lib/pdf_paradise/set_pdf_title.rb +220 -0
- data/lib/pdf_paradise/sinatra/embeddable_interface.rb +389 -0
- data/lib/pdf_paradise/toplevel_methods/convert_epub_to_pdf.rb +27 -0
- data/lib/pdf_paradise/toplevel_methods/convert_markdown_to_pdf.rb +45 -0
- data/lib/pdf_paradise/toplevel_methods/convert_ppt_to_pdf.rb +35 -0
- data/lib/pdf_paradise/toplevel_methods/e.rb +16 -0
- data/lib/pdf_paradise/toplevel_methods/esystem.rb +20 -0
- data/lib/pdf_paradise/toplevel_methods/misc.rb +228 -0
- data/lib/pdf_paradise/toplevel_methods/number_pages.rb +38 -0
- data/lib/pdf_paradise/toplevel_methods/opened_pdf_files.rb +221 -0
- data/lib/pdf_paradise/toplevel_methods/query_pdf_title.rb +201 -0
- data/lib/pdf_paradise/toplevel_methods/reduce_size_of_this_pdf_file.rb +46 -0
- data/lib/pdf_paradise/toplevel_methods/roebe.rb +17 -0
- data/lib/pdf_paradise/toplevel_methods/to_pdf.rb +12 -0
- data/lib/pdf_paradise/utility_scripts/README.md +3 -0
- data/lib/pdf_paradise/utility_scripts/automatic_pdf_title.rb +104 -0
- data/lib/pdf_paradise/utility_scripts/check_syntax_of_pdf_files.rb +106 -0
- data/lib/pdf_paradise/utility_scripts/combine_these_pdf_pages.rb +118 -0
- data/lib/pdf_paradise/utility_scripts/convert_pdf_to_text.rb +179 -0
- data/lib/pdf_paradise/utility_scripts/delete_last_page_of_this_pdf_file.rb +180 -0
- data/lib/pdf_paradise/utility_scripts/delete_the_first_page_of_this_pdf_file/delete_the_first_page_of_this_pdf_file.rb +429 -0
- data/lib/pdf_paradise/utility_scripts/delete_this_page_of_this_pdf_file.rb +356 -0
- data/lib/pdf_paradise/utility_scripts/djvu_to_pdf.rb +87 -0
- data/lib/pdf_paradise/utility_scripts/extract_all_images_from_this_pdf_file.rb +129 -0
- data/lib/pdf_paradise/utility_scripts/extract_pdf_page.rb +283 -0
- data/lib/pdf_paradise/utility_scripts/pdf_file_n_total_pages.rb +348 -0
- data/lib/pdf_paradise/utility_scripts/pdf_optimizer.rb +111 -0
- data/lib/pdf_paradise/utility_scripts/pdf_statistics.rb +148 -0
- data/lib/pdf_paradise/utility_scripts/pdf_to_html.rb +75 -0
- data/lib/pdf_paradise/utility_scripts/remove_images.rb +110 -0
- data/lib/pdf_paradise/utility_scripts/rotate_pdf_file.rb +303 -0
- data/lib/pdf_paradise/utility_scripts/split_pdf.rb +364 -0
- data/lib/pdf_paradise/utility_scripts/to_pdf.rb +130 -0
- data/lib/pdf_paradise/utility_scripts/to_qdf.rb +66 -0
- data/lib/pdf_paradise/version/version.rb +19 -0
- data/lib/pdf_paradise/www/README.md +2 -0
- data/lib/pdf_paradise/www/sinatra/app.rb +304 -0
- data/lib/pdf_paradise/yaml/working_on_these_pdf_files.yml +4 -0
- data/lib/pdf_paradise.rb +5 -0
- data/pdf_paradise.gemspec +61 -0
- data/test/fpdf/001_minimal_example.rb +12 -0
- data/test/fpdf/002.pdf +0 -0
- data/test/fpdf/002_header_and_footer_example.rb +64 -0
- data/test/fpdf/003.pdf +98 -0
- data/test/fpdf/003_justified_paragraphs.rb +96 -0
- data/test/fpdf/file1.md +3 -0
- data/test/fpdf/file2.md +3 -0
- data/test/fpdf/test.pdf +0 -0
- data/test/testing_pdf_paradise.rb +12 -0
- metadata +239 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
#!/usr/bin/ruby -w
|
|
2
|
+
# Encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: false
|
|
4
|
+
# =========================================================================== #
|
|
5
|
+
# === PdfParadise::PdfOptimizer
|
|
6
|
+
#
|
|
7
|
+
# /screen selects low-resolution output similar to the Acrobat Distiller
|
|
8
|
+
# "Screen Optimized" setting.
|
|
9
|
+
# /ebook selects medium-resolution output similar to the Acrobat Distiller
|
|
10
|
+
# "eBook" setting.
|
|
11
|
+
# /printer selects output similar to the Acrobat Distiller "Print Optimized"
|
|
12
|
+
# setting.
|
|
13
|
+
# /prepress selects output similar to Acrobat Distiller "Prepress Optimized"
|
|
14
|
+
# setting.
|
|
15
|
+
# /default selects output intended to be useful across a wide variety of
|
|
16
|
+
# uses, possibly at the expense of a larger output file.
|
|
17
|
+
#
|
|
18
|
+
# Usage example:
|
|
19
|
+
#
|
|
20
|
+
# PdfParadise::PdfOptimizer.new(ARGV)
|
|
21
|
+
#
|
|
22
|
+
# =========================================================================== #
|
|
23
|
+
# require 'pdf_paradise/utility_scripts/pdf_optimizer.rb'
|
|
24
|
+
# =========================================================================== #
|
|
25
|
+
require 'pdf_paradise/base/base.rb'
|
|
26
|
+
|
|
27
|
+
module PdfParadise
|
|
28
|
+
|
|
29
|
+
class PdfOptimizer < ::PdfParadise::Base # === PdfParadise::PdfOptimizer
|
|
30
|
+
|
|
31
|
+
# ========================================================================= #
|
|
32
|
+
# === initialize
|
|
33
|
+
# ========================================================================= #
|
|
34
|
+
def initialize(
|
|
35
|
+
i = nil,
|
|
36
|
+
run_already = true
|
|
37
|
+
)
|
|
38
|
+
reset
|
|
39
|
+
set_input(i)
|
|
40
|
+
run if run_already
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# ========================================================================= #
|
|
44
|
+
# === reset (reset tag)
|
|
45
|
+
# ========================================================================= #
|
|
46
|
+
def reset
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# ========================================================================= #
|
|
50
|
+
# === set_input
|
|
51
|
+
# ========================================================================= #
|
|
52
|
+
def set_input(i = '')
|
|
53
|
+
i = i.first if i.is_a? Array
|
|
54
|
+
i = i.to_s.dup
|
|
55
|
+
@input = i
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# ========================================================================= #
|
|
59
|
+
# === input?
|
|
60
|
+
# ========================================================================= #
|
|
61
|
+
def input?
|
|
62
|
+
@input
|
|
63
|
+
end; alias input_file input? # === input_file
|
|
64
|
+
alias input_file? input? # === input_file?
|
|
65
|
+
|
|
66
|
+
# ========================================================================= #
|
|
67
|
+
# === output_file?
|
|
68
|
+
# ========================================================================= #
|
|
69
|
+
def output_file?
|
|
70
|
+
'output.pdf'
|
|
71
|
+
end; alias output_pdf? output_file? # === output_pdf?
|
|
72
|
+
|
|
73
|
+
# ========================================================================= #
|
|
74
|
+
# === use_ghostscript?
|
|
75
|
+
# ========================================================================= #
|
|
76
|
+
def use_ghostscript?
|
|
77
|
+
true # For now this is hardcoded.
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# ========================================================================= #
|
|
81
|
+
# === output_ps
|
|
82
|
+
# ========================================================================= #
|
|
83
|
+
def output_ps
|
|
84
|
+
'output.ps'
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# ========================================================================= #
|
|
88
|
+
# === run (run tag)
|
|
89
|
+
# ========================================================================= #
|
|
90
|
+
def run
|
|
91
|
+
# ======================================================================= #
|
|
92
|
+
# We have two different solutions.
|
|
93
|
+
# ======================================================================= #
|
|
94
|
+
if use_ghostscript?
|
|
95
|
+
_ = 'gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/screen '\
|
|
96
|
+
'-dNOPAUSE -dQUIET -dBATCH -sOutputFile='+
|
|
97
|
+
output_file?+' '+
|
|
98
|
+
input_file?
|
|
99
|
+
else
|
|
100
|
+
_ = 'pdf2ps '+input?+' '+output_ps
|
|
101
|
+
esystem _
|
|
102
|
+
_ = 'ps2pdf '+output_ps+' '+output_pdf?
|
|
103
|
+
end
|
|
104
|
+
esystem _
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
end; end
|
|
108
|
+
|
|
109
|
+
if __FILE__ == $PROGRAM_NAME
|
|
110
|
+
PdfParadise::PdfOptimizer.new(ARGV)
|
|
111
|
+
end # pdfoptimizer
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
#!/usr/bin/ruby -w
|
|
2
|
+
# Encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
# =========================================================================== #
|
|
5
|
+
# === PdfParadise::PdfStatistics
|
|
6
|
+
#
|
|
7
|
+
# This class will make use of PDF::Reader to provide us with some
|
|
8
|
+
# statistical information about a given .pdf file.
|
|
9
|
+
#
|
|
10
|
+
# You can also ouput information from a specific page.
|
|
11
|
+
#
|
|
12
|
+
# PDF is a page based file format, so most visible information
|
|
13
|
+
# is available via page-based iteration
|
|
14
|
+
#
|
|
15
|
+
# reader = PDF::Reader.new("somefile.pdf")
|
|
16
|
+
#
|
|
17
|
+
# reader.pages.each { |page|
|
|
18
|
+
# puts page.fonts
|
|
19
|
+
# puts page.text
|
|
20
|
+
# puts page.raw_content
|
|
21
|
+
# }
|
|
22
|
+
#
|
|
23
|
+
# Usage example:
|
|
24
|
+
#
|
|
25
|
+
# PdfParadise::PdfStatistics.new(ARGV)
|
|
26
|
+
#
|
|
27
|
+
# =========================================================================== #
|
|
28
|
+
# require 'pdf_paradise/utility_scripts/pdf_statistics.rb'
|
|
29
|
+
# =========================================================================== #
|
|
30
|
+
require 'pdf_paradise/base/base.rb'
|
|
31
|
+
|
|
32
|
+
module PdfParadise
|
|
33
|
+
|
|
34
|
+
class PdfStatistics < ::PdfParadise::Base # === PdfParadise::PdfStatistics
|
|
35
|
+
|
|
36
|
+
# ========================================================================= #
|
|
37
|
+
# === initialize
|
|
38
|
+
# ========================================================================= #
|
|
39
|
+
def initialize(
|
|
40
|
+
i = nil,
|
|
41
|
+
run_already = true
|
|
42
|
+
)
|
|
43
|
+
require_pdf_reader
|
|
44
|
+
reset
|
|
45
|
+
set_input(i)
|
|
46
|
+
run if run_already
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# ========================================================================= #
|
|
50
|
+
# === reset (reset tag)
|
|
51
|
+
# ========================================================================= #
|
|
52
|
+
def reset
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# ========================================================================= #
|
|
56
|
+
# === require_pdf_reader
|
|
57
|
+
# ========================================================================= #
|
|
58
|
+
def require_pdf_reader
|
|
59
|
+
old_verbose = $VERBOSE
|
|
60
|
+
$VERBOSE = nil
|
|
61
|
+
require 'pdf-reader'
|
|
62
|
+
$VERBOSE = old_verbose # Restore it again here.
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# ========================================================================= #
|
|
66
|
+
# === set_input
|
|
67
|
+
# ========================================================================= #
|
|
68
|
+
def set_input(i = '')
|
|
69
|
+
i = i.first if i.is_a? Array
|
|
70
|
+
i = i.to_s.dup
|
|
71
|
+
@input = i
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# ========================================================================= #
|
|
75
|
+
# === input?
|
|
76
|
+
# ========================================================================= #
|
|
77
|
+
def input?
|
|
78
|
+
@input
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# ========================================================================= #
|
|
82
|
+
# === instantiate_reader_object
|
|
83
|
+
# ========================================================================= #
|
|
84
|
+
def instantiate_reader_object
|
|
85
|
+
@reader = PDF::Reader.new(input?)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# ========================================================================= #
|
|
89
|
+
# === show_extended_info
|
|
90
|
+
#
|
|
91
|
+
# This will tap into the .info method.
|
|
92
|
+
# ========================================================================= #
|
|
93
|
+
def show_extended_info
|
|
94
|
+
hash = @reader.info
|
|
95
|
+
ljust = 32
|
|
96
|
+
if hash.has_key? :Title
|
|
97
|
+
e 'The title of this .pdf is: '.ljust(ljust)+simp(hash[:Title])
|
|
98
|
+
end
|
|
99
|
+
if hash.has_key? :CreationDate
|
|
100
|
+
e 'This .pdf was created at: '.ljust(ljust)+simp(hash[:CreationDate])
|
|
101
|
+
end
|
|
102
|
+
if hash.has_key? :Author
|
|
103
|
+
e 'The author of this .pdf is: '.ljust(ljust)+simp(hash[:Author])
|
|
104
|
+
end
|
|
105
|
+
if hash.has_key? :Producer
|
|
106
|
+
e 'It was produced via: '.ljust(ljust)+simp(hash[:Producer])
|
|
107
|
+
end
|
|
108
|
+
if hash.has_key? :ModDate
|
|
109
|
+
e 'It was last modified at: '.ljust(ljust)+simp(hash[:ModDate])
|
|
110
|
+
end
|
|
111
|
+
if hash.has_key? :Creator
|
|
112
|
+
e 'It was created via: '.ljust(ljust)+simp(hash[:Creator])
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# ========================================================================= #
|
|
117
|
+
# === report_n_pages_in_the_pdf_document
|
|
118
|
+
# ========================================================================= #
|
|
119
|
+
def report_n_pages_in_the_pdf_document
|
|
120
|
+
e 'n pages in this .pdf document: '+simp(@reader.page_count)
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# ========================================================================= #
|
|
124
|
+
# === run (run tag)
|
|
125
|
+
# ========================================================================= #
|
|
126
|
+
def run
|
|
127
|
+
instantiate_reader_object
|
|
128
|
+
cliner
|
|
129
|
+
e rev+
|
|
130
|
+
'The PDF version for this .pdf file was: '+simp(@reader.pdf_version)
|
|
131
|
+
show_extended_info
|
|
132
|
+
# e @reader.metadata # <- This is how to obtain the metadata information.
|
|
133
|
+
report_n_pages_in_the_pdf_document
|
|
134
|
+
cliner
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# ========================================================================= #
|
|
138
|
+
# === PdfParadise::PdfStatistics[]
|
|
139
|
+
# ========================================================================= #
|
|
140
|
+
def self.[](i = '')
|
|
141
|
+
new(i)
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
end; end
|
|
145
|
+
|
|
146
|
+
if __FILE__ == $PROGRAM_NAME
|
|
147
|
+
PdfParadise::PdfStatistics.new(ARGV)
|
|
148
|
+
end # pdfstatistics /Depot/PDF/foobar.pdf
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
#!/usr/bin/ruby -w
|
|
2
|
+
# Encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
# =========================================================================== #
|
|
5
|
+
# === PdfParadise::PdfToHtml
|
|
6
|
+
#
|
|
7
|
+
# This class will convert a .pdf file to .html, based on pdftohtml from
|
|
8
|
+
# poppler.
|
|
9
|
+
#
|
|
10
|
+
# Usage examples:
|
|
11
|
+
#
|
|
12
|
+
# PdfParadise::PdfToHtml.new(ARGV)
|
|
13
|
+
#
|
|
14
|
+
# =========================================================================== #
|
|
15
|
+
# require 'pdf_paradise/utility_scripts/pdf_to_html.rb'
|
|
16
|
+
# PdfParadise::PdfToHtml.new(ARGV)
|
|
17
|
+
# =========================================================================== #
|
|
18
|
+
require 'pdf_paradise/base/base.rb'
|
|
19
|
+
|
|
20
|
+
module PdfParadise
|
|
21
|
+
|
|
22
|
+
class PdfToHtml < ::PdfParadise::Base # === PdfParadise::PdfToHtml
|
|
23
|
+
|
|
24
|
+
# ========================================================================= #
|
|
25
|
+
# === NAMESPACE
|
|
26
|
+
# ========================================================================= #
|
|
27
|
+
NAMESPACE = inspect
|
|
28
|
+
|
|
29
|
+
# ========================================================================= #
|
|
30
|
+
# === initialize
|
|
31
|
+
# ========================================================================= #
|
|
32
|
+
def initialize(
|
|
33
|
+
i = ARGV,
|
|
34
|
+
run_already = true
|
|
35
|
+
)
|
|
36
|
+
reset
|
|
37
|
+
set_commandline_arguments(i)
|
|
38
|
+
run if run_already
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# ========================================================================= #
|
|
42
|
+
# === reset
|
|
43
|
+
# ========================================================================= #
|
|
44
|
+
def reset
|
|
45
|
+
super()
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# ========================================================================= #
|
|
49
|
+
# === opnn
|
|
50
|
+
# ========================================================================= #
|
|
51
|
+
def opnn
|
|
52
|
+
super(NAMESPACE)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# ========================================================================= #
|
|
56
|
+
# === run (run tag)
|
|
57
|
+
# ========================================================================= #
|
|
58
|
+
def run
|
|
59
|
+
commandline_arguments?.each {|entry|
|
|
60
|
+
# ===================================================================== #
|
|
61
|
+
# The -s option is used to "generate single HTML that includes all
|
|
62
|
+
# pages".
|
|
63
|
+
# ===================================================================== #
|
|
64
|
+
_ = 'pdftohtml -s '+entry+' '+
|
|
65
|
+
File.basename(entry+'_to_html')+
|
|
66
|
+
'.html'
|
|
67
|
+
esystem _
|
|
68
|
+
}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
end; end
|
|
72
|
+
|
|
73
|
+
if __FILE__ == $PROGRAM_NAME
|
|
74
|
+
PdfParadise::PdfToHtml.new(ARGV)
|
|
75
|
+
end # pdf_to_html foobar.pdf
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/ruby -w
|
|
2
|
+
# Encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
# =========================================================================== #
|
|
5
|
+
# === PdfParadise::RemoveImages
|
|
6
|
+
#
|
|
7
|
+
# This class will remove all images from a given .pdf file, by making use
|
|
8
|
+
# of ghostscript.
|
|
9
|
+
#
|
|
10
|
+
# Usage example:
|
|
11
|
+
#
|
|
12
|
+
# PdfParadise::RemoveImages.new(ARGV)
|
|
13
|
+
#
|
|
14
|
+
# =========================================================================== #
|
|
15
|
+
# require 'pdf_paradise/utility_scripts/remove_images.rb'
|
|
16
|
+
# =========================================================================== #
|
|
17
|
+
require 'pdf_paradise/base/base.rb'
|
|
18
|
+
|
|
19
|
+
module PdfParadise
|
|
20
|
+
|
|
21
|
+
class RemoveImages < ::PdfParadise::Base # === PdfParadise::RemoveImages
|
|
22
|
+
|
|
23
|
+
# ========================================================================= #
|
|
24
|
+
# === NAMESPACE
|
|
25
|
+
# ========================================================================= #
|
|
26
|
+
NAMESPACE = inspect
|
|
27
|
+
|
|
28
|
+
# ========================================================================= #
|
|
29
|
+
# === initialize
|
|
30
|
+
# ========================================================================= #
|
|
31
|
+
def initialize(
|
|
32
|
+
i = nil,
|
|
33
|
+
run_already = true
|
|
34
|
+
)
|
|
35
|
+
reset
|
|
36
|
+
set_input(i)
|
|
37
|
+
run if run_already
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# ========================================================================= #
|
|
41
|
+
# === reset (reset tag)
|
|
42
|
+
# ========================================================================= #
|
|
43
|
+
def reset
|
|
44
|
+
super()
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# ========================================================================= #
|
|
48
|
+
# === set_input
|
|
49
|
+
# ========================================================================= #
|
|
50
|
+
def set_input(i = '')
|
|
51
|
+
i = i.first if i.is_a? Array
|
|
52
|
+
i = i.to_s.dup
|
|
53
|
+
@input = i
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# ========================================================================= #
|
|
57
|
+
# === input?
|
|
58
|
+
# ========================================================================= #
|
|
59
|
+
def input?
|
|
60
|
+
@input
|
|
61
|
+
end; alias input_file? input? # === input_file?
|
|
62
|
+
|
|
63
|
+
# ========================================================================= #
|
|
64
|
+
# === opnn
|
|
65
|
+
# ========================================================================= #
|
|
66
|
+
def opnn
|
|
67
|
+
super(NAMESPACE)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# ========================================================================= #
|
|
71
|
+
# === run (run tag)
|
|
72
|
+
# ========================================================================= #
|
|
73
|
+
def run
|
|
74
|
+
input_file = input_file?
|
|
75
|
+
this_output_file = 'only_text_is_kept_'+input_file
|
|
76
|
+
if File.exist? input_file
|
|
77
|
+
opnn; e 'Now removing all images from the file `'+sfile(input_file)+'`.'
|
|
78
|
+
opnn; e 'Will store into the file `'+sfile(this_output_file)+'`.'
|
|
79
|
+
_ = 'gs -o '+this_output_file+' -sDEVICE=pdfwrite -dFILTERVECTOR -dFILTERIMAGE '+input_file
|
|
80
|
+
esystem _
|
|
81
|
+
if File.exist? this_output_file
|
|
82
|
+
opnn; e 'The file `'+sfile(this_output_file)+'` has been created!'
|
|
83
|
+
end
|
|
84
|
+
else
|
|
85
|
+
e 'Please supply an argument to this class that should be a local'
|
|
86
|
+
e '.pdf file.'
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# ========================================================================= #
|
|
91
|
+
# === PdfParadise::RemoveImages[]
|
|
92
|
+
# ========================================================================= #
|
|
93
|
+
def self.[](i = '')
|
|
94
|
+
new(i)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# =========================================================================== #
|
|
100
|
+
# === PdfParadise.remove_image
|
|
101
|
+
# =========================================================================== #
|
|
102
|
+
def self.remove_image(from_this_pdf_file = ARGV)
|
|
103
|
+
RemoveImage.new(from_this_pdf_file)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
if __FILE__ == $PROGRAM_NAME
|
|
109
|
+
PdfParadise::RemoveImages.new(ARGV)
|
|
110
|
+
end # removeimagesfromthispdffile
|