pdfh 3.3.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +0 -15
- data/.gitignore +3 -0
- data/.rubocop.yml +5 -1
- data/.rubocop_todo.yml +5 -18
- data/.simplecov +32 -0
- data/AGENTS.md +174 -0
- data/CHANGELOG.md +74 -9
- data/Gemfile +0 -4
- data/Gemfile.lock +26 -37
- data/README.md +72 -37
- data/Rakefile +24 -6
- data/bin/console +3 -10
- data/bin/run +0 -1
- data/exe/pdfh +1 -1
- data/justfile +65 -0
- data/lib/pdfh/main.rb +25 -120
- data/lib/pdfh/models/document.rb +43 -128
- data/lib/pdfh/models/document_type.rb +35 -69
- data/lib/pdfh/models/run_options.rb +20 -0
- data/lib/pdfh/models/settings.rb +23 -83
- data/lib/pdfh/services/directory_scanner.rb +27 -0
- data/lib/pdfh/services/document_manager.rb +125 -0
- data/lib/pdfh/services/document_matcher.rb +57 -0
- data/lib/pdfh/services/opt_parser.rb +76 -0
- data/lib/pdfh/services/pdf_text_extractor.rb +45 -0
- data/lib/pdfh/services/settings_builder.rb +113 -0
- data/lib/pdfh/services/settings_validator.rb +150 -0
- data/lib/pdfh/utils/console.rb +5 -5
- data/lib/pdfh/utils/date_info.rb +55 -0
- data/lib/pdfh/utils/file_info.rb +47 -0
- data/lib/pdfh/utils/rename_validator.rb +4 -3
- data/lib/pdfh/version.rb +1 -1
- data/lib/pdfh.rb +25 -20
- data/mise.toml +20 -3
- data/pdfh.gemspec +3 -3
- metadata +18 -15
- data/lib/ext/string.rb +0 -9
- data/lib/pdfh/concerns/password_decodable.rb +0 -31
- data/lib/pdfh/models/document_period.rb +0 -37
- data/lib/pdfh/models/document_sub_type.rb +0 -6
- data/lib/pdfh/models/zip_types.rb +0 -17
- data/lib/pdfh/settings_template.rb +0 -21
- data/lib/pdfh/utils/opt_parser.rb +0 -78
- data/lib/pdfh/utils/options.rb +0 -38
- data/lib/pdfh/utils/pdf_file_handler.rb +0 -122
- data/lib/pdfh/utils/settings_builder.rb +0 -62
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pdfh
|
|
4
|
-
# Calculate correct period from the extracted document date and subtype month offset
|
|
5
|
-
class DocumentPeriod
|
|
6
|
-
attr_reader :month, :year
|
|
7
|
-
|
|
8
|
-
# @return [self]
|
|
9
|
-
def initialize(month:, month_offset:, year:, day: nil)
|
|
10
|
-
@day = day
|
|
11
|
-
@raw_month = month
|
|
12
|
-
@raw_year = year
|
|
13
|
-
normalized_month = Month.normalize_to_i(month) + (month_offset || 0)
|
|
14
|
-
year_offset = 0
|
|
15
|
-
@month = case normalized_month
|
|
16
|
-
when 0
|
|
17
|
-
year_offset = -1
|
|
18
|
-
12
|
|
19
|
-
when 13
|
|
20
|
-
year_offset = 1
|
|
21
|
-
1
|
|
22
|
-
else normalized_month
|
|
23
|
-
end
|
|
24
|
-
@year = (year.size == 2 ? "20#{year}" : year).to_i + year_offset
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
# @return [String (frozen)]
|
|
28
|
-
def to_s
|
|
29
|
-
"#{year}-#{month.to_s.rjust(2, "0")}"
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# @return [String (frozen)]
|
|
33
|
-
def inspect
|
|
34
|
-
"<#{self.class} year=#{year} month=#{month}>"
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
end
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pdfh
|
|
4
|
-
# Zip files which contains PDF files that need pre-processing
|
|
5
|
-
class ZipType
|
|
6
|
-
include Concerns::PasswordDecodable
|
|
7
|
-
|
|
8
|
-
attr_reader :name, :re_file, :pwd
|
|
9
|
-
|
|
10
|
-
# @param args [Hash]
|
|
11
|
-
# @return [self]
|
|
12
|
-
def initialize(args)
|
|
13
|
-
args.each { |k, v| instance_variable_set(:"@#{k}", v) }
|
|
14
|
-
@re_file = Regexp.new(re_file)
|
|
15
|
-
end
|
|
16
|
-
end
|
|
17
|
-
end
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pdfh
|
|
4
|
-
# rubocop:disable Layout/HashAlignment
|
|
5
|
-
DOCUMENT_TYPE_TEMPLATE = {
|
|
6
|
-
"name" => "Example Name",
|
|
7
|
-
"re_file" => ".*file_name\.pdf",
|
|
8
|
-
"re_date" => "(\d{2})\/(?<m>\w+)\/(?<y>\d{4})",
|
|
9
|
-
"pwd" => "BASE64_STRING",
|
|
10
|
-
"store_path" => "{YEAR}/sub folder",
|
|
11
|
-
"name_template" => "{period} {original}",
|
|
12
|
-
"sub_types" => []
|
|
13
|
-
}.freeze
|
|
14
|
-
|
|
15
|
-
SETTINGS_TEMPLATE = {
|
|
16
|
-
"lookup_dirs" => ["~/Downloads"].freeze,
|
|
17
|
-
"destination_base_path" => "~/Documents",
|
|
18
|
-
"document_types" => [DOCUMENT_TYPE_TEMPLATE].freeze
|
|
19
|
-
}.freeze
|
|
20
|
-
# rubocop:enable Layout/HashAlignment
|
|
21
|
-
end
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require "optparse"
|
|
4
|
-
|
|
5
|
-
module Pdfh
|
|
6
|
-
# Handles Argument options
|
|
7
|
-
class OptParser
|
|
8
|
-
# @param argv [Array<String>] command line arguments (ie. ARGV)
|
|
9
|
-
# @param console [Pdfh::Console, nil]
|
|
10
|
-
# @return [self]
|
|
11
|
-
def initialize(argv:, console: nil)
|
|
12
|
-
@argv = argv
|
|
13
|
-
@console = console || Console.new(false)
|
|
14
|
-
@options = {
|
|
15
|
-
verbose: false,
|
|
16
|
-
dry: false,
|
|
17
|
-
type: nil,
|
|
18
|
-
files: []
|
|
19
|
-
}
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
# @return [Hash] Parsed options including flags and file arguments
|
|
23
|
-
def parse_argv
|
|
24
|
-
option_parser = build_option_parser
|
|
25
|
-
non_option_args = option_parser.parse!(@argv)
|
|
26
|
-
@options[:files] = non_option_args
|
|
27
|
-
@options.transform_keys { |key| key.to_s.tr("-", "_").to_sym }
|
|
28
|
-
rescue OptionParser::InvalidOption => e
|
|
29
|
-
@console.error_print(e.message, exit_app: false)
|
|
30
|
-
puts option_parser.help
|
|
31
|
-
exit 1
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
private
|
|
35
|
-
|
|
36
|
-
# @return [OptionParser] Configured OptionParser instance
|
|
37
|
-
def build_option_parser
|
|
38
|
-
OptionParser.new do |opts|
|
|
39
|
-
opts.banner = "Usage: #{opts.program_name} [options] [file1.pdf, ...]"
|
|
40
|
-
opts.separator ""
|
|
41
|
-
opts.separator "Specific options:"
|
|
42
|
-
|
|
43
|
-
opts.on("-tID", "--type=ID", "Document type id (requires a trailing file list)") { @options[:type] = _1 }
|
|
44
|
-
opts.on("-v", "--verbose", "Show more output. Useful for debug") { @options[:verbose] = true }
|
|
45
|
-
opts.on("-d", "--dry", "Dry run, does not write new pdf") { @options[:dry] = true }
|
|
46
|
-
opts.on_tail("-T", "--list-types", "List document types in configuration") { list_types && exit }
|
|
47
|
-
opts.on_tail("-V", "--version", "Show version") { version || exit }
|
|
48
|
-
opts.on_tail("-h", "--help", "help (this dialog)") { help || exit }
|
|
49
|
-
end
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# @return [nil]
|
|
53
|
-
def version
|
|
54
|
-
@console.info "#{build_option_parser.program_name} v#{Pdfh::VERSION}"
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# @return [nil]
|
|
58
|
-
def help
|
|
59
|
-
@console.info build_option_parser
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Lists the available document types
|
|
63
|
-
# @return [nil]
|
|
64
|
-
def list_types
|
|
65
|
-
Pdfh.instance_variable_set(:@options, Options.new(@options))
|
|
66
|
-
Pdfh.instance_variable_set(:@console, @console)
|
|
67
|
-
|
|
68
|
-
settings = SettingsBuilder.build
|
|
69
|
-
spacing = " " * 2
|
|
70
|
-
max_width = settings.document_types.map { |t| t.gid.size }.max
|
|
71
|
-
@console.info "#{spacing}#{"ID".ljust(max_width)} Type Name"
|
|
72
|
-
@console.info "#{spacing}#{"—" * max_width} #{"—" * 23}"
|
|
73
|
-
settings.document_types.each do |type|
|
|
74
|
-
@console.info "#{spacing}#{type.gid.ljust(max_width).yellow} #{type.name}"
|
|
75
|
-
end
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
end
|
data/lib/pdfh/utils/options.rb
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pdfh
|
|
4
|
-
# Argument Options object container
|
|
5
|
-
class Options
|
|
6
|
-
attr_reader :type, :files
|
|
7
|
-
|
|
8
|
-
# @param arg_options [Hash]
|
|
9
|
-
# @return [self]
|
|
10
|
-
def initialize(arg_options)
|
|
11
|
-
@verbose = arg_options[:verbose]
|
|
12
|
-
@dry = arg_options[:dry]
|
|
13
|
-
@type = arg_options[:type]
|
|
14
|
-
@files = arg_options[:files] || []
|
|
15
|
-
@mode = type ? :file : :directory
|
|
16
|
-
end
|
|
17
|
-
|
|
18
|
-
# @return [Boolean]
|
|
19
|
-
def verbose?
|
|
20
|
-
@verbose
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# @return [Boolean]
|
|
24
|
-
def dry?
|
|
25
|
-
@dry
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# @return [Boolean]
|
|
29
|
-
def file_mode?
|
|
30
|
-
@mode == :file
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# @return [Boolean]
|
|
34
|
-
def files?
|
|
35
|
-
!!@files&.any?
|
|
36
|
-
end
|
|
37
|
-
end
|
|
38
|
-
end
|
|
@@ -1,122 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pdfh
|
|
4
|
-
# Handles the PDF file
|
|
5
|
-
class PdfFileHandler
|
|
6
|
-
attr_reader :file, :type, :document
|
|
7
|
-
|
|
8
|
-
# @param [String] file
|
|
9
|
-
# @param [DocumentType, nil] type
|
|
10
|
-
# @return [self]
|
|
11
|
-
def initialize(file, type)
|
|
12
|
-
@file = file
|
|
13
|
-
@type = type
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
# @return [boolean]
|
|
17
|
-
def type?
|
|
18
|
-
!!type
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
# Generate document, and process actions
|
|
22
|
-
# @return [void]
|
|
23
|
-
def process_document(base_path)
|
|
24
|
-
Pdfh.info "Working on #{base_name.colorize(:light_green)}"
|
|
25
|
-
raise IOError, "File #{file} not found" unless File.exist?(file)
|
|
26
|
-
|
|
27
|
-
@document = Document.new(file, type, extract_text)
|
|
28
|
-
document.process
|
|
29
|
-
document.print_info
|
|
30
|
-
write_pdf(base_path)
|
|
31
|
-
|
|
32
|
-
nil
|
|
33
|
-
rescue StandardError => e
|
|
34
|
-
Pdfh.ident_print "Doc Error", e.message, color: :red, width: 12
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# Create a backup of original document
|
|
38
|
-
# @return [void]
|
|
39
|
-
def make_document_backup(document)
|
|
40
|
-
Pdfh.debug "~~~~~~~~~~~~~~~~~~ Creating PDF backup"
|
|
41
|
-
Dir.chdir(document.home_dir) do
|
|
42
|
-
Pdfh.debug " Working on: #{document.home_dir.inspect} directory"
|
|
43
|
-
Pdfh.debug " mv #{document.file_name.inspect} -> #{document.backup_name.inspect}"
|
|
44
|
-
File.rename(document.file_name, document.backup_name) unless Pdfh.dry?
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# @return [void]
|
|
49
|
-
def copy_companion_files(destination, document)
|
|
50
|
-
Pdfh.debug "~~~~~~~~~~~~~~~~~~ Writing Companion files"
|
|
51
|
-
document.companion_files.each do |file|
|
|
52
|
-
Pdfh.debug " Working on #{file.inspect}..."
|
|
53
|
-
src_name = File.join(document.home_dir, file)
|
|
54
|
-
src_ext = File.extname(file)
|
|
55
|
-
dest_name = File.basename(document.new_name, ".pdf")
|
|
56
|
-
dest_full = File.join(destination, "#{dest_name}#{src_ext}")
|
|
57
|
-
Pdfh.debug " cp #{src_name} --> #{dest_full}"
|
|
58
|
-
FileUtils.cp(src_name, dest_full) unless Pdfh.dry?
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# @return [String]
|
|
63
|
-
def base_name
|
|
64
|
-
File.basename(file)
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
private
|
|
68
|
-
|
|
69
|
-
# @return [void]
|
|
70
|
-
def write_pdf(base_path)
|
|
71
|
-
full_path = File.join(base_path, document.store_path, document.new_name)
|
|
72
|
-
dir_path = File.join(base_path, document.store_path)
|
|
73
|
-
|
|
74
|
-
FileUtils.mkdir_p(dir_path)
|
|
75
|
-
|
|
76
|
-
write_new_pdf(dir_path, full_path)
|
|
77
|
-
make_document_backup(document)
|
|
78
|
-
copy_companion_files(dir_path, document)
|
|
79
|
-
rescue StandardError => e
|
|
80
|
-
Pdfh.ident_print "Doc Error", e.message, color: :red, width: IDENT
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
def qpdf_command(*args)
|
|
84
|
-
password_option = type&.password ? "--password=#{type&.password.inspect} " : ""
|
|
85
|
-
|
|
86
|
-
%(qpdf #{password_option}--decrypt #{args.join(" ")})
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
# Gets the text from the pdf in order to execute
|
|
90
|
-
# the regular expression matches
|
|
91
|
-
# @return [String]
|
|
92
|
-
def extract_text
|
|
93
|
-
temp = Tempfile.new("pdfh")
|
|
94
|
-
Pdfh.debug "~~~~~~~~~~~~~~~~~~ Extract PDF text"
|
|
95
|
-
Pdfh.debug " --> #{temp.path} temporal file assigned."
|
|
96
|
-
|
|
97
|
-
cmd1 = qpdf_command("--stream-data=uncompress", file.inspect, temp.path)
|
|
98
|
-
Pdfh.debug " DeCrypt Command: #{cmd1}"
|
|
99
|
-
_result = `#{cmd1}`
|
|
100
|
-
|
|
101
|
-
cmd2 = %(pdftotext -enc UTF-8 #{temp.path} -)
|
|
102
|
-
Pdfh.debug " Extract Command: #{cmd2}"
|
|
103
|
-
text = `#{cmd2}`
|
|
104
|
-
Pdfh.debug " Text: #{text.inspect}"
|
|
105
|
-
text
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
# @return [void]
|
|
109
|
-
def write_new_pdf(dir_path, full_path)
|
|
110
|
-
Pdfh.debug "~~~~~~~~~~~~~~~~~~ Writing PDFs"
|
|
111
|
-
raise IOError, "Path #{dir_path} not found." unless Dir.exist?(dir_path)
|
|
112
|
-
|
|
113
|
-
cmd = qpdf_command(file.inspect, full_path.inspect)
|
|
114
|
-
Pdfh.debug " Write PDF Command: #{cmd}"
|
|
115
|
-
|
|
116
|
-
return if Pdfh.dry?
|
|
117
|
-
|
|
118
|
-
_result = `#{cmd}`
|
|
119
|
-
raise IOError, "New PDF file #{full_path.inspect} was not created." unless File.file?(full_path)
|
|
120
|
-
end
|
|
121
|
-
end
|
|
122
|
-
end
|
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Pdfh
|
|
4
|
-
# Loads or creates a default settings yaml file
|
|
5
|
-
class SettingsBuilder
|
|
6
|
-
CONFIG_FILE_LOCATIONS = [Dir.pwd, ENV.fetch("XDG_CONFIG_HOME", "~/.config"), "~"].freeze
|
|
7
|
-
SUPPORTED_EXTENSIONS = %w[yml yaml].freeze
|
|
8
|
-
ENV_VAR = "PDFH_CONFIG_FILE"
|
|
9
|
-
|
|
10
|
-
class << self
|
|
11
|
-
# @return [Pdfh::Settings]
|
|
12
|
-
def build
|
|
13
|
-
env_config_file = ENV.fetch(ENV_VAR, nil)
|
|
14
|
-
raise "File path in #{ENV_VAR} not found" if env_config_file && !File.exist?(env_config_file)
|
|
15
|
-
|
|
16
|
-
config_file = env_config_file || search_config_file
|
|
17
|
-
file_hash = YAML.load_file(config_file, symbolize_names: true)
|
|
18
|
-
Pdfh.debug "Loaded configuration file: #{config_file}"
|
|
19
|
-
|
|
20
|
-
Settings.new(file_hash)
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
private
|
|
24
|
-
|
|
25
|
-
# @return [String]
|
|
26
|
-
def config_file_name
|
|
27
|
-
File.basename($PROGRAM_NAME)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# @return [String (frozen)]
|
|
31
|
-
def default_settings_name
|
|
32
|
-
"#{config_file_name}.#{SUPPORTED_EXTENSIONS.first}"
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# @return [String]
|
|
36
|
-
def create_settings_file
|
|
37
|
-
full_path = File.join(File.expand_path("~"), default_settings_name)
|
|
38
|
-
return if File.exist?(full_path) # double check
|
|
39
|
-
|
|
40
|
-
File.write(full_path, Pdfh::SETTINGS_TEMPLATE.to_yaml)
|
|
41
|
-
Pdfh.info "Default settings file was created: #{full_path.colorize(:green)}"
|
|
42
|
-
|
|
43
|
-
full_path
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
# Gets the first settings file found, or creates a new one
|
|
47
|
-
# @return [String]
|
|
48
|
-
def search_config_file
|
|
49
|
-
CONFIG_FILE_LOCATIONS.each do |dir_string|
|
|
50
|
-
dir = File.expand_path(dir_string)
|
|
51
|
-
SUPPORTED_EXTENSIONS.each do |ext|
|
|
52
|
-
path = File.join(dir, "#{config_file_name}.#{ext}")
|
|
53
|
-
return path if File.exist?(path)
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
Pdfh.warn_print "No configuration file was found within paths: #{CONFIG_FILE_LOCATIONS.join(", ")}"
|
|
58
|
-
create_settings_file
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
end
|