RubyGems - pdf_paradise - Versions diffs - 0.1.66 - Mend

pdf_paradise 0.1.66

Potentially problematic release.

This version of pdf_paradise might be problematic. Click here for more details.

Files changed (110) hide show

data/lib/pdf_paradise/merge_pdf/merge_pdf.rb ADDED Viewed

@@ -0,0 +1,306 @@
+#!/usr/bin/ruby -w
+# Encoding: UTF-8
+# frozen_string_literal: true
+# =========================================================================== #
+# === PdfParadise::MergePdf
+#
+# This class will merge together some .pdf files.
+#
+# Usage example:
+#
+#   PdfParadise::MergePdf.new(ARGV)
+#
+# =========================================================================== #
+# require 'pdf_paradise/merge_pdf/merge_pdf.rb'
+# =========================================================================== #
+require 'pdf_paradise/base/base.rb'
+require 'pdf_paradise/merge_pdf/menu.rb'
+module PdfParadise
+class MergePdf < Base # === PdfParadise::MergePdf
+  # ========================================================================= #
+  # === NAMESPACE
+  # ========================================================================= #
+  NAMESPACE = inspect
+  # ========================================================================= #
+  # === THIS_FILE_HERE
+  # ========================================================================= #
+  THIS_FILE_HERE =
+    '/home/x/programming/ruby/src/pdf_paradise/lib/pdf_paradise/merge_pdf/merge_pdf.rb'
+  # ========================================================================= #
+  # === STORE_WHERE_WE_MERGED_FILES
+  # ========================================================================= #
+  STORE_WHERE_WE_MERGED_FILES =
+    '/home/Temp/merged_these_pdf_files.md'
+  # ========================================================================= #
+  # === initialize
+  # ========================================================================= #
+  def initialize(
+      i           = nil,
+      run_already = true
+    )
+    reset
+    set_commandline_arguments(
+      return_hyphen_arguments(i)
+    )
+    case i
+    when :dont_run_yet
+      run_already = false
+    else
+      set_array_pdf_files(i)
+    end
+    run if run_already
+  end
+  # ========================================================================= #
+  # === reset                                                     (reset tag)
+  # ========================================================================= #
+  def reset
+    # ======================================================================= #
+    # === @array_pdf_files
+    # ======================================================================= #
+    @array_pdf_files = []
+    # ======================================================================= #
+    # === @use_this_program_for_merging_the_pdf_files
+    #
+    # Currently we can either use :ghostscript (gs) or :hexapdf. The
+    # latter is a pure ruby-gem.
+    # ======================================================================= #
+    @use_this_program_for_merging_the_pdf_files = :hexapdf # :ghostscript
+    set_output_filename
+  end
+  # ========================================================================= #
+  # === show_help                                                  (help tag)
+  # ========================================================================= #
+  def show_help
+    e
+    e 'To remove the merged files, do this:'
+    e
+    e '  mergepdf --remove'
+    e
+    e 'To use hexapdf:'
+    e
+    e '  mergepdf --use-hexapdf'
+    e
+    e 'To use ghostscript:'
+    e
+    e '  mergepdf --use-ghostscript'
+    e
+  end
+  # ========================================================================= #
+  # === remove_merged_files                                      (remove tag)
+  # ========================================================================= #
+  def remove_merged_files
+    _ = where_are_the_merged_files?
+    opnn; e 'We will now remove the merged files, by reading '\
+            'in from `'+sfile(_)+'`.'
+    # ======================================================================= #
+    # Read in that save-file next. As of Feb 2015, the format is
+    # one-file-per-line. We also get rid of '"' characters.
+    # ======================================================================= #
+    _ = File.readlines(_).map {|entry|
+      entry.chomp.delete('"')
+    } # .split(' ')
+    _.each {|file| delete(file) }
+  end
+  # ========================================================================= #
+  # === open_this_file_here
+  # ========================================================================= #
+  def open_this_file_here
+    _ = 'bluefish '+THIS_FILE_HERE
+    esystem _
+  end
+  # ========================================================================= #
+  # === do_conversion
+  #
+  # This method will do the actual conversion.
+  # ========================================================================= #
+  def do_conversion
+    these_files = @array_pdf_files.join(N)
+    # ======================================================================= #
+    # Next, we store this in the save-file.
+    # ======================================================================= #
+    write_what_into(these_files, where_are_the_merged_files?)
+    case @use_this_program_for_merging_the_pdf_files
+    # ======================================================================= #
+    # === :hexapdf
+    # ======================================================================= #
+    when :hexapdf
+      _ = 'hexapdf merge --force '.dup
+      _ << these_files.tr(N,' ')
+      _ << " #{@output_filename}"
+    # ======================================================================= #
+    # === :ghostscript
+    #
+    # To use this via the commandline, try:
+    #
+    #   mergepdf one.pdf two.pdf --use-ghostscript
+    #
+    # ======================================================================= #
+    when :ghostscript,
+         :default
+      _ = 'gs -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE="'+
+          @output_filename+
+          '" -dBATCH '
+      _ << these_files.tr(N,' ')
+    end
+    e
+    cliner
+    e "#{::Colours.rev}We will run this command next for "\
+      "#{@array_pdf_files.size.to_s} .pdf files:"
+    e
+    efancy "  #{_}" # This is the command we will run.
+    e
+    system _
+    cliner
+  end
+  # ========================================================================= #
+  # === where_are_the_merged_files?
+  # ========================================================================= #
+  def where_are_the_merged_files?
+    STORE_WHERE_WE_MERGED_FILES
+  end
+  # ========================================================================= #
+  # === delete
+  # ========================================================================= #
+  def delete(i)
+    unless i == '/'
+      efancy(" - #{i}") # Add a slight padding to the output.
+      File.delete(i) if File.exist? i # Safeguard.
+    end
+  end
+  # ========================================================================= #
+  # === pad_this
+  # ========================================================================= #
+  def pad_this(i)
+    return '"'+i+'"'
+  end
+  # ========================================================================= #
+  # === set_array_pdf_files
+  # ========================================================================= #
+  def set_array_pdf_files(i = '')
+    if i and i.is_a?(String) and i.include?(' ')
+      # ===================================================================== #
+      # Do automatic splitting in this case.
+      # ===================================================================== #
+      i = i.split(' ')
+    end
+    i = [i].flatten.compact.reject {|entry| entry.start_with?('--') }
+    if i.is_a?(Array) and i.empty?
+      all_pdf_files = Dir['*.pdf']
+      # ===================================================================== #
+      # Try to use all .pdf files.
+      # ===================================================================== #
+      unless all_pdf_files.empty?
+        opnn; e 'No specific input was given, thus using all '+
+                 sfancy(all_pdf_files.size.to_s)+' .pdf files '\
+                'in this directory'
+        i = all_pdf_files
+      end
+    end
+    if i.is_a? Array # Keep only files that exist.
+      i.select! {|entry| File.exist? entry }
+    end
+    if i.is_a? Array
+      i.map! {|entry| pad_this(entry) }
+    end
+    if i.empty?
+      opnn; ewarn 'Unable to find any existing files '\
+                  'to match to, thus exiting now.'
+      exit
+    end
+    @array_pdf_files << i
+    @array_pdf_files = @array_pdf_files.flatten # Always keep it flattened.
+  end; alias << set_array_pdf_files # === <<
+  # ========================================================================= #
+  # === set_output_filename
+  #
+  # Store the filename of the pdf through this method, the name of
+  # the output file.
+  # ========================================================================= #
+  def set_output_filename(
+      i = 'Merged_PDF_Files.pdf'
+    )
+    if i.size > 255
+      opnn; ewarn 'The name for the new filename is larger than 255.'
+      opnn; ewarn 'We will thus truncate the name to the first 255 '\
+                  'characters instead.'
+      i = i[0, 254]
+      # ===================================================================== #
+      # Since as of Jun 2016, we will get rid of the last 4
+      # characters and append '.pdf'
+      # ===================================================================== #
+      i[-4,4] = ''
+      i << '.pdf'
+    end
+    @output_filename = i
+  end; alias store_at set_output_filename # === store_at
+  # ========================================================================= #
+  # === opnn
+  # ========================================================================= #
+  def opnn
+    super(NAMESPACE)
+  end
+  # ========================================================================= #
+  # === feedback_where_it_is_stored
+  # ========================================================================= #
+  def feedback_where_it_is_stored
+    if File.exist? result?
+      opnn; e "The result is stored at `#{sfile(self.result)}`."
+    end
+  end; alias report feedback_where_it_is_stored # === report
+  # ========================================================================= #
+  # === output_filename
+  # ========================================================================= #
+  def output_filename
+    _ = @output_filename
+    _ = _.join(', ') if _.is_a? Array
+    return _
+  end; alias result           output_filename # === result
+       alias result?          output_filename # === result?
+       alias output_filename? output_filename # === output_filename?
+       alias output_file?     output_filename # === output_file?
+  # ========================================================================= #
+  # === build_new_output_name_based_on_input
+  # ========================================================================= #
+  def build_new_output_name_based_on_input
+    _ = @array_pdf_files.map {|entry|
+          File.basename(entry).gsub(File.extname(entry), '')
+        }
+    _ = _.join(' ').gsub(/ /,'_').gsub(/\"/,'')+'.pdf'
+    set_output_filename(_)
+  end
+  # ========================================================================= #
+  # === run                                                         (run tag)
+  # ========================================================================= #
+  def run
+    menu
+    build_new_output_name_based_on_input
+    do_conversion
+  end
+end; end
+if __FILE__ == $PROGRAM_NAME
+  _ = PdfParadise::MergePdf.new(ARGV)
+  _.feedback_where_it_is_stored # Call it manually.
+end # merge_pdf

data/lib/pdf_paradise/merge_pdf_namespace.rb ADDED Viewed

@@ -0,0 +1,9 @@
+#!/usr/bin/ruby -w
+# Encoding: UTF-8
+# frozen_string_literal: true
+# =========================================================================== #
+# require 'pdf_paradise/merge_pdf_namespace'
+# =========================================================================== #
+require 'pdf_paradise/requires/require_the_whole_project.rb'
+MergePdf = PdfParadise::MergePdf # Now the MergePdf namespace is available in the toplevel.

data/lib/pdf_paradise/merge_then_open/merge_then_open.rb ADDED Viewed

@@ -0,0 +1,105 @@
+#!/usr/bin/ruby -w
+# Encoding: UTF-8
+# frozen_string_literal: true
+# =========================================================================== #
+# === PdfParadise::MergeThenOpen
+#
+# This will first merge different .pdf files, and then open the
+# newly generated .pdf file.
+#
+# Usage example:
+#
+#   PdfParadise::MergeThenOpen.new(ARGV)
+#
+# =========================================================================== #
+# require 'pdf_paradise/merge_then_open/merge_then_open.rb'
+# PdfParadise::MergeThenOpen.new(ARGV)
+# =========================================================================== #
+require 'pdf_paradise/merge_pdf/merge_pdf.rb'
+module PdfParadise
+class MergeThenOpen < Base # === PdfParadise::MergeThenOpen
+  # ========================================================================= #
+  # === NAMESPACE
+  # ========================================================================= #
+  NAMESPACE = inspect
+  # ========================================================================= #
+  # === initialize
+  # ========================================================================= #
+  def initialize(
+      commandline_arguments = nil,
+      run_already           = true
+    )
+    reset
+    set_commandline_arguments(
+      commandline_arguments
+    )
+    run if run_already
+  end
+  # ========================================================================= #
+  # === reset                                                     (reset tag)
+  # ========================================================================= #
+  def reset
+    super()
+  end
+  # ========================================================================= #
+  # === opnn
+  # ========================================================================= #
+  def opnn(i = NAMESPACE)
+    super(i)
+  end
+  # ========================================================================= #
+  # === set_commandline_arguments
+  # ========================================================================= #
+  def set_commandline_arguments(i = '')
+    i = [i].flatten.compact
+    @commandline_arguments = i
+  end
+  # ========================================================================= #
+  # === commandline_arguments?
+  # ========================================================================= #
+  def commandline_arguments?
+    @commandline_arguments
+  end
+  # ========================================================================= #
+  # === run                                                         (run tag)
+  # ========================================================================= #
+  def run
+    merge_pdf = MergePdf.new(@commandline_arguments)
+    new_file = merge_pdf.output_file?
+    if File.exist? new_file
+      # ===================================================================== #
+      # We have to open this file next.
+      # ===================================================================== #
+      begin
+        require 'open'
+        if Object.const_defined? :Open
+          opnn; e "Opening #{sfile(new_file)} next."
+          Open.in_editor(new_file)
+        end
+      rescue LoadError; end
+    else
+      opnn; e "No file appears to exist at #{sfile(new_file)}."
+    end
+  end
+  # ========================================================================= #
+  # === PdfParadise::MergeThenOpen[]
+  # ========================================================================= #
+  def self.[](i = '')
+    new(i)
+  end
+end; end
+if __FILE__ == $PROGRAM_NAME
+  PdfParadise::MergeThenOpen.new(ARGV)
+end # mergethenopen

data/lib/pdf_paradise/pdf_file_n_total_pages.rb ADDED Viewed

@@ -0,0 +1,249 @@
+#!/usr/bin/ruby -w
+# Encoding: UTF-8
+# frozen_string_literal: true
+# =========================================================================== #
+# === PdfParadise::PdfFileNTotalPages
+#
+# This class can report how many pages are in a given .pdf file.
+#
+# Also note that an alternative exists:
+#
+#   https://github.com/prawnpdf/pdf-inspector
+#
+# The code would be like this here:
+#
+#   page_analysis = PDF::Inspector::Page.analyze(pdf)
+#   page_analysis.pages.size # <-- like 50 pages
+#
+# Usage example:
+#
+#   PdfParadise::PdfFileNTotalPages.new(ARGV)
+#
+# =========================================================================== #
+# require 'pdf_paradise/pdf_file_n_total_pages.rb'
+# PdfParadise::PdfFileNTotalPages.new(ARGV)
+# =========================================================================== #
+require 'pdf_paradise/base/base.rb'
+module PdfParadise
+class PdfFileNTotalPages < Base # === PdfParadise::PdfFileNTotalPages
+  # ========================================================================= #
+  # === MATCH_AGAINST_THIS_REGEX
+  # ========================================================================= #
+  MATCH_AGAINST_THIS_REGEX = /Count (\d+)/
+  # ========================================================================= #
+  # === USE_THIS_PROGRAM_TO_DETERMINE_HOW_MANY_PAGES_ARE_PART_OF_THE_PDF_FILE
+  #
+  # This can be :pdfinfo or :qpdf or similar.
+  # ========================================================================= #
+  USE_THIS_PROGRAM_TO_DETERMINE_HOW_MANY_PAGES_ARE_PART_OF_THE_PDF_FILE = :qpdf # :pdfinfo
+  # ========================================================================= #
+  # === initialize
+  # ========================================================================= #
+  def initialize(
+      i           = nil,
+      run_already = true
+    )
+    reset
+    set_commandline_arguments(i)
+    case run_already
+    when :be_silent
+      @be_verbose = false
+      run_already = false
+    end
+    run if run_already
+  end
+  # ========================================================================= #
+  # === reset                                                     (reset tag)
+  # ========================================================================= #
+  def reset
+    super()
+    # ======================================================================= #
+    # === @n_pages
+    #
+    # This variable will keep track as to how many pages the given .pdf
+    # page has.
+    # ======================================================================= #
+    @n_pages    = 0
+    # ======================================================================= #
+    # === @be_verbose
+    # ======================================================================= #
+    @be_verbose = true
+    # ======================================================================= #
+    # === @use_this_program_to_determine_how_many_pages_are_part_of_the_pdf_file
+    # ======================================================================= #
+    @use_this_program_to_determine_how_many_pages_are_part_of_the_pdf_file =
+      USE_THIS_PROGRAM_TO_DETERMINE_HOW_MANY_PAGES_ARE_PART_OF_THE_PDF_FILE
+  end
+  # ========================================================================= #
+  # === n_pages
+  # ========================================================================= #
+  def n_pages?
+    @n_pages
+  end; alias n_pages n_pages? # === n_pages
+       alias result? n_pages? # === result?
+  # ========================================================================= #
+  # === set_n_pages
+  #
+  # Since as of December 2021 this method will do a tiny bit of
+  # sanitizing the given input.
+  # ========================================================================= #
+  def set_n_pages(i)
+    if i.is_a? String
+      i = i.strip
+    end
+    i = i.to_i
+    @n_pages = i
+  end
+  # ========================================================================= #
+  # === determine_dataset
+  # ========================================================================= #
+  def determine_dataset(of_this_pdf_file = @this_pdf_file)
+    @dataset = File.binread(of_this_pdf_file)
+  end
+  # ========================================================================= #
+  # === be_verbose?
+  # ========================================================================= #
+  def be_verbose?
+    @be_verbose
+  end
+  # ========================================================================= #
+  # === report_how_many_pages_were_found
+  # ========================================================================= #
+  def report_how_many_pages_were_found
+    e "#{rev}The pdf-file #{sfile(@this_pdf_file)} has exactly "\
+      "#{simp(n_pages?.to_s)} pages."
+  end
+  # ========================================================================= #
+  # === set_this_pdf_file
+  # ========================================================================= #
+  def set_this_pdf_file(i)
+    @this_pdf_file = i
+  end
+  # ========================================================================= #
+  # === determine_n_pages
+  # ========================================================================= #
+  def determine_n_pages
+    _ = @dataset
+    case @use_this_program_to_determine_how_many_pages_are_part_of_the_pdf_file
+    # ======================================================================= #
+    # === :qpdf
+    # ======================================================================= #
+    when :qpdf
+      result = `qpdf #{@this_pdf_file} --show-npages`
+      set_n_pages(result)
+    # ======================================================================= #
+    # === :pdfinfo
+    # ======================================================================= #
+    when :pdfinfo
+      result = `pdfinfo #{@this_pdf_file}`
+      determine_n_pages_via_pdfinfo(result)
+    else # else tag
+      # ===================================================================== #
+      # This here has the advantage (in theory) that we can just use
+      # a Regex and obtain the number of pages in the .pdf file. This
+      # works ok for many .pdf files, but not for all of them. This
+      # is also the reason why I switched to pdfinfo in March 2020 -
+      # it seems to be more reliable than the regex I am using.
+      # ===================================================================== #
+      scanned = _.scan(MATCH_AGAINST_THIS_REGEX)
+      # ===================================================================== #
+      # Note that the following may still return the wrong entry.
+      # I had this problem in March 2020.
+      # ===================================================================== #
+      max_value = scanned.map {|entry|
+        entry.first.to_i
+      }.max
+      set_n_pages(max_value)
+    end
+  end
+  # ========================================================================= #
+  # === determine_n_pages_via_pdfinfo
+  #
+  # We have to find an entry such as the following one:
+  #
+  #   Pages:          35
+  #
+  # ========================================================================= #
+  def determine_n_pages_via_pdfinfo(
+      i              = @this_pdf_file,
+      use_this_regex = Regexp.new(/Pages: (.+)/)
+    )
+    if i.include? 'Pages:'
+      i = i.scan(use_this_regex).flatten.first.strip
+    end
+    set_n_pages(i)
+  end
+  # ========================================================================= #
+  # === run                                                         (run tag)
+  # ========================================================================= #
+  def run
+    return_files_from_the_commandline_arguments.each {|this_pdf_file|
+      reset
+      set_this_pdf_file(this_pdf_file)
+      determine_dataset
+      determine_n_pages # Determine how many pages are in that .pdf file.
+      report_how_many_pages_were_found if be_verbose?
+    }
+  end
+  # ========================================================================= #
+  # === PdfFileNTotalPages.return_n_pages
+  #
+  # This method is silent by default.
+  # ========================================================================= #
+  def self.return_n_pages(of_this_pdf_file)
+    _ = PdfFileNTotalPages.new(of_this_pdf_file, :be_silent)
+    _.run
+    return _.n_pages.to_i # Ought to return a number, as an integer.
+  end
+  # ========================================================================= #
+  # === PdfParadise::PdfFileNTotalPages[]
+  # ========================================================================= #
+  def self.[](i)
+    new(i).result?
+  end
+end
+# =========================================================================== #
+# === PdfParadise.n_pdf_pages?
+#
+# This is the top-level method to find out how many pdf pages are
+# part of the given .pdf file at hand.
+#
+# The first argument to this method is simply the name of the .pdf file
+# or, rather, the path to it.
+#
+# Usage example:
+#
+#   PdfParadise.n_pdf_pages?("/home/x/STUDIUM/UNI_WIEN/300260_Immunologie_und_zellul�re_Mikrobiologie_Teil_A/XIV-XVII_combined.pdf")
+#
+# =========================================================================== #
+def self.n_pdf_pages?(of_this_pdf_file)
+  PdfFileNTotalPages.return_n_pages(of_this_pdf_file).to_i
+end; self.instance_eval { alias n_pages?                  n_pdf_pages? } # === PdfParadise.n_pages?
+     self.instance_eval { alias n_pages                   n_pdf_pages? } # === PdfParadise.n_pages
+     self.instance_eval { alias n_pdf_pages               n_pdf_pages? } # === PdfParadise.n_pdf_pages
+     self.instance_eval { alias n_pages_in_this_pdf_file? n_pdf_pages? } # === PdfParadise.n_pages_in_this_pdf_file?
+end
+if __FILE__ == $PROGRAM_NAME
+  PdfParadise::PdfFileNTotalPages.new(ARGV)
+end # n_pages $UNI_WIEN/300609_Molekulare_Entwicklungsbiologie/Entwicklungsbiologie7.pdf

data/lib/pdf_paradise/prawn_addons/README.md ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ This directory may contain some modifications to the prawn gem
2	+ and Prawn namespace.

data/lib/pdf_paradise/prawn_addons/prawn_addons.rb ADDED Viewed

@@ -0,0 +1,17 @@
+#!/usr/bin/ruby -w
+# Encoding: UTF-8
+# frozen_string_literal: true
+# =========================================================================== #
+# require 'pdf_paradise/prawn_addons/prawn_addons.rb'
+# =========================================================================== #
+if Object.const_defined? :Prawn
+module Prawn
+class Document
+  if respond_to? :start_new_page
+    alias add_page start_new_page
+  end
+end; end; end