pdfh 3.3.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +16 -0
- data/.gitignore +3 -0
- data/.pre-commit-config.yaml +13 -5
- data/.rubocop.yml +5 -1
- data/.rubocop_todo.yml +5 -18
- data/.simplecov +32 -0
- data/AGENTS.md +174 -0
- data/CHANGELOG.md +74 -9
- data/Gemfile +4 -7
- data/Gemfile.lock +68 -80
- data/README.md +72 -37
- data/Rakefile +24 -6
- data/bin/console +3 -10
- data/bin/run +0 -1
- data/commitlint.config.js +1 -1
- data/exe/pdfh +1 -1
- data/justfile +65 -0
- data/lib/pdfh/main.rb +25 -120
- data/lib/pdfh/models/document.rb +43 -128
- data/lib/pdfh/models/document_type.rb +35 -67
- data/lib/pdfh/models/run_options.rb +20 -0
- data/lib/pdfh/models/settings.rb +23 -69
- data/lib/pdfh/services/directory_scanner.rb +27 -0
- data/lib/pdfh/services/document_manager.rb +125 -0
- data/lib/pdfh/services/document_matcher.rb +57 -0
- data/lib/pdfh/services/opt_parser.rb +76 -0
- data/lib/pdfh/services/pdf_text_extractor.rb +45 -0
- data/lib/pdfh/services/settings_builder.rb +113 -0
- data/lib/pdfh/services/settings_validator.rb +150 -0
- data/lib/pdfh/utils/console.rb +5 -5
- data/lib/pdfh/utils/date_info.rb +55 -0
- data/lib/pdfh/utils/file_info.rb +47 -0
- data/lib/pdfh/utils/rename_validator.rb +4 -3
- data/lib/pdfh/version.rb +1 -1
- data/lib/pdfh.rb +26 -20
- data/mise.toml +25 -1
- data/pdfh.gemspec +3 -3
- metadata +19 -15
- data/lib/ext/string.rb +0 -9
- data/lib/pdfh/concerns/password_decodable.rb +0 -31
- data/lib/pdfh/models/document_period.rb +0 -37
- data/lib/pdfh/models/document_sub_type.rb +0 -6
- data/lib/pdfh/models/zip_types.rb +0 -17
- data/lib/pdfh/settings_template.rb +0 -21
- data/lib/pdfh/utils/opt_parser.rb +0 -78
- data/lib/pdfh/utils/options.rb +0 -38
- data/lib/pdfh/utils/pdf_file_handler.rb +0 -122
- data/lib/pdfh/utils/settings_builder.rb +0 -62
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pdfh
|
|
4
|
+
module Services
|
|
5
|
+
# Manages the documents, rename, move, etc.
|
|
6
|
+
class DocumentManager
|
|
7
|
+
PDF_UNLOCKED_MAGIC_SUFFIX = "_unlocked"
|
|
8
|
+
|
|
9
|
+
# @param document [Document]
|
|
10
|
+
# @param base_path [String]
|
|
11
|
+
# @param dry_run [Boolean]
|
|
12
|
+
# @return [DocumentManager]
|
|
13
|
+
def initialize(document, base_path:, dry_run:)
|
|
14
|
+
@document = document
|
|
15
|
+
@base_path = base_path
|
|
16
|
+
@dry_run = dry_run
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# @return [void]
|
|
20
|
+
def call
|
|
21
|
+
destination_dir = File.join(@base_path, @document.store_path)
|
|
22
|
+
destination_file = File.join(destination_dir, @document.new_name)
|
|
23
|
+
|
|
24
|
+
print_info(destination_dir) if Pdfh.logger.verbose?
|
|
25
|
+
create_destination_dir(destination_dir)
|
|
26
|
+
copy_pdf(destination_file)
|
|
27
|
+
move_companion_files(destination_dir)
|
|
28
|
+
backup_original
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
# @!attribute [r] document
|
|
34
|
+
# @return [Document]
|
|
35
|
+
attr_reader :document
|
|
36
|
+
|
|
37
|
+
# @return [Boolean]
|
|
38
|
+
def dry_run? = @dry_run
|
|
39
|
+
|
|
40
|
+
# @param dir [String]
|
|
41
|
+
# @return [void]
|
|
42
|
+
def create_destination_dir(dir)
|
|
43
|
+
return if Dir.exist?(dir)
|
|
44
|
+
|
|
45
|
+
Pdfh.logger.debug "Creating directory: #{dir}"
|
|
46
|
+
FileUtils.mkdir_p(dir) unless @dry_run
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# @param destination_file [String]
|
|
50
|
+
# @return [void]
|
|
51
|
+
def copy_pdf(destination_file)
|
|
52
|
+
source_file = @document.file_info.path
|
|
53
|
+
|
|
54
|
+
companion_extensions = companion_files.map { File.extname(_1).delete(".") }
|
|
55
|
+
companion_str = companion_extensions.any? ? " [#{companion_extensions.join(", ").colorize(:magenta)}]" : ""
|
|
56
|
+
message = format("[%<type>s] %<file>s -> %<dest>s#{companion_str}",
|
|
57
|
+
type: document.type.name.ljust(15).colorize(:green),
|
|
58
|
+
file: document.file_info.name.colorize(:blue),
|
|
59
|
+
dest: document.new_name.colorize(:cyan))
|
|
60
|
+
if @dry_run
|
|
61
|
+
Pdfh.logger.info "#{"dry".colorize(:red)} #{message}" unless Pdfh.logger.verbose?
|
|
62
|
+
return
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
Pdfh.logger.info "#{"".colorize(:green)} #{message}" unless Pdfh.logger.verbose?
|
|
66
|
+
FileUtils.cp(source_file, destination_file, preserve: true)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# @param destination_dir [String]
|
|
70
|
+
# @return [void]
|
|
71
|
+
def move_companion_files(destination_dir)
|
|
72
|
+
companion_files.each do |companion|
|
|
73
|
+
source = companion
|
|
74
|
+
dest_name = File.basename(@document.new_name, @document.file_info.extension) + File.extname(companion)
|
|
75
|
+
destination = File.join(destination_dir, dest_name)
|
|
76
|
+
|
|
77
|
+
FileUtils.cp(source, destination, preserve: true) unless dry_run?
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# @return [void]
|
|
82
|
+
def backup_original
|
|
83
|
+
source_file = document.file_info.path
|
|
84
|
+
backup_file = "#{source_file}.bkp"
|
|
85
|
+
|
|
86
|
+
FileUtils.mv(source_file, backup_file) unless dry_run?
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Finds companion files by removing the _unlocked suffix from the PDF name if present.
|
|
90
|
+
# This allows PDFs unlock by qpdf to locate their original companion files (e.g., .xml, .txt)
|
|
91
|
+
# that were never renamed with the _unlocked suffix.
|
|
92
|
+
#
|
|
93
|
+
# @return [Array<String>] array of non-PDF files with the same base name
|
|
94
|
+
# @example
|
|
95
|
+
# # If document is "cuenta_unlocked.pdf", searches for "cuenta.*"
|
|
96
|
+
# # Returns ["cuenta.xml", "cuenta.txt"] (excluding "cuenta.pdf")
|
|
97
|
+
def companion_files
|
|
98
|
+
@companion_files ||= begin
|
|
99
|
+
base_name = document.file_info.stem.delete_suffix(PDF_UNLOCKED_MAGIC_SUFFIX)
|
|
100
|
+
Dir.glob(File.join(document.file_info.dir, "#{base_name}.*")).reject do |file|
|
|
101
|
+
File.extname(file) == ".pdf"
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# @param property [String]
|
|
107
|
+
# @param info [String]
|
|
108
|
+
# @return [void]
|
|
109
|
+
def print_info_line(property, info)
|
|
110
|
+
Pdfh.logger.ident_print property, info.to_s, color: :light_blue, width: 12
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# @param destination_dir [String]
|
|
114
|
+
# @return [void]
|
|
115
|
+
def print_info(destination_dir)
|
|
116
|
+
print_info_line "Type", document.type.name
|
|
117
|
+
print_info_line "Period", document.date_info.period
|
|
118
|
+
print_info_line "New Name", document.new_name
|
|
119
|
+
print_info_line "Store Path", destination_dir
|
|
120
|
+
print_info_line "Extra files", companion_files.any? ? companion_files.join(", ") : "—"
|
|
121
|
+
print_info_line "Processed?", "No (in Dry mode)" if dry_run?
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pdfh
|
|
4
|
+
module Services
|
|
5
|
+
# Matches a PDF file against settings and builds a Document if valid
|
|
6
|
+
class DocumentMatcher
|
|
7
|
+
# @param document_types [Array<DocumentType>]
|
|
8
|
+
# @return [DocumentMatcher]
|
|
9
|
+
def initialize(document_types)
|
|
10
|
+
@document_types = document_types
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# @param file [String] Path to the PDF file
|
|
14
|
+
# @param text [String] Extracted text from the PDF
|
|
15
|
+
# @return [Array<Document>]
|
|
16
|
+
def match(file, text)
|
|
17
|
+
@document_types.each_with_object([]) do |type, matches|
|
|
18
|
+
# Try to match the document type by ID (content)
|
|
19
|
+
next unless type.re_id.match?(text)
|
|
20
|
+
|
|
21
|
+
Pdfh.logger.debug "Matched document type: #{type.name}"
|
|
22
|
+
|
|
23
|
+
# Try to match the date in the text
|
|
24
|
+
date_match = type.re_date.match(text)
|
|
25
|
+
unless date_match
|
|
26
|
+
Pdfh.logger.debug "No date match found for #{type.name}"
|
|
27
|
+
next
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Extract date captures (handles both named and positional captures)
|
|
31
|
+
date_captures = extract_date_captures(date_match)
|
|
32
|
+
|
|
33
|
+
matches << Document.new(file, type, text, date_captures)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
# Extracts date captures from MatchData, supporting both named and positional captures
|
|
40
|
+
# @param match_data [MatchData]
|
|
41
|
+
# @return [Hash{String => String}] Hash with keys 'm' (month), 'y' (year), 'd' (day)
|
|
42
|
+
def extract_date_captures(match_data)
|
|
43
|
+
if match_data.names.any?
|
|
44
|
+
Pdfh.logger.debug "Using #{"named".colorize(:green)} captures: #{match_data.named_captures.inspect}"
|
|
45
|
+
return match_data.named_captures
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Fall back to positional captures — assume order: [month, year, day?]
|
|
49
|
+
{}.tap do |c|
|
|
50
|
+
c["m"], c["y"], c["d"] = match_data.captures
|
|
51
|
+
c.compact!
|
|
52
|
+
Pdfh.logger.debug "Using #{"positional".colorize(:red)} captures: #{c.inspect}"
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "optparse"
|
|
4
|
+
|
|
5
|
+
module Pdfh
|
|
6
|
+
module Services
|
|
7
|
+
# Handles Argument options
|
|
8
|
+
class OptParser
|
|
9
|
+
# @param argv [Array<String>] command line arguments (ie. ARGV)
|
|
10
|
+
# @param console [Pdfh::Console, nil]
|
|
11
|
+
# @return [self]
|
|
12
|
+
def initialize(argv:, console: nil)
|
|
13
|
+
@argv = argv
|
|
14
|
+
@console = console || Console.new(false)
|
|
15
|
+
@options = {
|
|
16
|
+
verbose: false,
|
|
17
|
+
dry: false
|
|
18
|
+
}
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# @return [Hash] Parsed options including flags and file arguments
|
|
22
|
+
def parse_argv
|
|
23
|
+
option_parser = build_option_parser
|
|
24
|
+
option_parser.parse!(@argv)
|
|
25
|
+
@options
|
|
26
|
+
rescue OptionParser::InvalidOption => e
|
|
27
|
+
@console.error_print(e.message, exit_app: false)
|
|
28
|
+
@console.info option_parser.help
|
|
29
|
+
exit 1
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
# @return [OptionParser] Configured OptionParser instance
|
|
35
|
+
def build_option_parser
|
|
36
|
+
OptionParser.new do |opts|
|
|
37
|
+
opts.banner = "Usage: #{opts.program_name} [options]"
|
|
38
|
+
opts.separator ""
|
|
39
|
+
opts.separator "Specific options:"
|
|
40
|
+
|
|
41
|
+
opts.on("-v", "--verbose", "Show more output. Useful for debug") { @options[:verbose] = true }
|
|
42
|
+
opts.on("-d", "--dry", "Dry run, does not write new pdf") { @options[:dry] = true }
|
|
43
|
+
opts.on_tail("-T", "--list-types", "List document types in configuration") { list_types && exit }
|
|
44
|
+
opts.on_tail("-V", "--version", "Show version") { version || exit }
|
|
45
|
+
opts.on_tail("-h", "--help", "help (this dialog)") { help || exit }
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# @return [nil]
|
|
50
|
+
def version
|
|
51
|
+
@console.info "#{build_option_parser.program_name} v#{Pdfh::VERSION}"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# @return [nil]
|
|
55
|
+
def help
|
|
56
|
+
@console.info build_option_parser
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Lists the available document types
|
|
60
|
+
# @return [nil]
|
|
61
|
+
def list_types
|
|
62
|
+
# Temporarily set logger for loading settings
|
|
63
|
+
Pdfh.logger = @console
|
|
64
|
+
|
|
65
|
+
settings = SettingsBuilder.call
|
|
66
|
+
spacing = " " * 2
|
|
67
|
+
max_width = settings.document_types.map { |t| t.gid.size }.max
|
|
68
|
+
@console.info "#{spacing}#{"ID".ljust(max_width)} Type Name"
|
|
69
|
+
@console.info "#{spacing}#{"—" * max_width} #{"—" * 23}"
|
|
70
|
+
settings.document_types.each do |type|
|
|
71
|
+
@console.info "#{spacing}#{type.gid.ljust(max_width).yellow} #{type.name}"
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "shellwords"
|
|
4
|
+
require "english"
|
|
5
|
+
|
|
6
|
+
module Pdfh
|
|
7
|
+
module Services
|
|
8
|
+
# Extracts text from a PDF using pdftotext command
|
|
9
|
+
class PdfTextExtractor
|
|
10
|
+
# @param pdf_path [String]
|
|
11
|
+
# @return [String]
|
|
12
|
+
# @raise [ArgumentError] if file doesn't exist or is not a PDF
|
|
13
|
+
# @raise [RuntimeError] if extraction fails
|
|
14
|
+
def self.call(pdf_path)
|
|
15
|
+
validate_file!(pdf_path)
|
|
16
|
+
|
|
17
|
+
# Use Shellwords to properly escape the path for shell execution
|
|
18
|
+
safe_path = Shellwords.escape(pdf_path)
|
|
19
|
+
cmd = "pdftotext -enc UTF-8 -layout #{safe_path} - 2>/dev/null"
|
|
20
|
+
|
|
21
|
+
text = `#{cmd}`
|
|
22
|
+
exit_status = $CHILD_STATUS
|
|
23
|
+
|
|
24
|
+
# Check if command executed successfully
|
|
25
|
+
if exit_status.nil? || !exit_status.success?
|
|
26
|
+
Pdfh.logger.debug "Failed to extract text from: #{pdf_path}"
|
|
27
|
+
return ""
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
text
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# @param pdf_path [String]
|
|
34
|
+
# @return [void]
|
|
35
|
+
# @raise [ArgumentError] if validation fails
|
|
36
|
+
def self.validate_file!(pdf_path)
|
|
37
|
+
raise ArgumentError, "PDF path cannot be nil" if pdf_path.nil?
|
|
38
|
+
raise ArgumentError, "PDF path cannot be empty" if pdf_path.empty?
|
|
39
|
+
raise ArgumentError, "File does not exist: #{pdf_path}" unless File.exist?(pdf_path)
|
|
40
|
+
raise ArgumentError, "Not a file: #{pdf_path}" unless File.file?(pdf_path)
|
|
41
|
+
raise ArgumentError, "Not a PDF file: #{pdf_path}" unless File.extname(pdf_path).casecmp?(".pdf")
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pdfh
|
|
4
|
+
module Services
|
|
5
|
+
# Loads or creates a default settings yaml file
|
|
6
|
+
class SettingsBuilder
|
|
7
|
+
CONFIG_FILE_LOCATIONS = [Dir.pwd, ENV.fetch("XDG_CONFIG_HOME", "~/.config"), "~"].freeze
|
|
8
|
+
SUPPORTED_EXTENSIONS = %w[yml yaml].freeze
|
|
9
|
+
ENV_VAR = "PDFH_CONFIG_FILE"
|
|
10
|
+
|
|
11
|
+
DOCUMENT_TYPE_TEMPLATE = {
|
|
12
|
+
name: "Example Name",
|
|
13
|
+
re_id: "EXAMPLE MATCH",
|
|
14
|
+
re_date: "(\d{2})/(?<m>\w+)/(?<y>\d{4})",
|
|
15
|
+
store_path: "{YEAR}/sub folder",
|
|
16
|
+
name_template: "{period} {original}"
|
|
17
|
+
}.freeze
|
|
18
|
+
|
|
19
|
+
SETTINGS_TEMPLATE = {
|
|
20
|
+
lookup_dirs: ["~/Downloads"].freeze,
|
|
21
|
+
destination_base_path: "~/Documents",
|
|
22
|
+
document_types: [DOCUMENT_TYPE_TEMPLATE].freeze
|
|
23
|
+
}.freeze
|
|
24
|
+
|
|
25
|
+
# @param program_name [String, nil] Override for testing (defaults to PROGRAM_NAME)
|
|
26
|
+
# @return [Settings]
|
|
27
|
+
def self.call(program_name: nil)
|
|
28
|
+
new(program_name: program_name).call
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @param program_name [String, nil]
|
|
32
|
+
# @return [SettingsBuilder]
|
|
33
|
+
def initialize(program_name: nil)
|
|
34
|
+
@program_name = program_name || PROGRAM_NAME
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# @return [Settings]
|
|
38
|
+
def call
|
|
39
|
+
config_file = find_config_file
|
|
40
|
+
file_hash = YAML.load_file(config_file, symbolize_names: true)
|
|
41
|
+
Pdfh.logger.debug "Loaded configuration file: #{config_file}"
|
|
42
|
+
|
|
43
|
+
validated = Services::SettingsValidator.call(file_hash)
|
|
44
|
+
Settings.new(**validated)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
private
|
|
48
|
+
|
|
49
|
+
# @return [String]
|
|
50
|
+
def find_config_file
|
|
51
|
+
env_config_file = ENV.fetch(ENV_VAR, nil)
|
|
52
|
+
|
|
53
|
+
if env_config_file
|
|
54
|
+
unless File.exist?(env_config_file)
|
|
55
|
+
raise SettingsIOError,
|
|
56
|
+
"File path in #{ENV_VAR} not found: #{env_config_file}"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
return env_config_file
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
search_config_file || create_settings_file
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# @return [String]
|
|
66
|
+
def config_file_name
|
|
67
|
+
File.basename(@program_name, ".*")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @return [String]
|
|
71
|
+
def default_settings_name
|
|
72
|
+
"#{config_file_name}.#{SUPPORTED_EXTENSIONS.first}"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# @return [String]
|
|
76
|
+
def create_settings_file
|
|
77
|
+
full_path = File.join(File.expand_path("~"), default_settings_name)
|
|
78
|
+
return full_path if File.exist?(full_path) # double check
|
|
79
|
+
|
|
80
|
+
File.write(full_path, stringify_keys(SETTINGS_TEMPLATE).to_yaml)
|
|
81
|
+
Pdfh.logger.info "Default settings file was created: #{full_path.colorize(:green)}"
|
|
82
|
+
|
|
83
|
+
full_path
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Recursively converts symbol keys to string keys for YAML serialization
|
|
87
|
+
# @param value [Hash, Array, Object]
|
|
88
|
+
# @return [Hash, Array, Object]
|
|
89
|
+
def stringify_keys(value)
|
|
90
|
+
case value
|
|
91
|
+
when Hash then value.to_h { |k, v| [k.to_s, stringify_keys(v)] }
|
|
92
|
+
when Array then value.map { |v| stringify_keys(v) }
|
|
93
|
+
else value
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Gets the first settings file found, or nil
|
|
98
|
+
# @return [String, nil]
|
|
99
|
+
def search_config_file
|
|
100
|
+
CONFIG_FILE_LOCATIONS.each do |dir_string|
|
|
101
|
+
dir = File.expand_path(dir_string)
|
|
102
|
+
SUPPORTED_EXTENSIONS.each do |ext|
|
|
103
|
+
path = File.join(dir, "#{config_file_name}.#{ext}")
|
|
104
|
+
return path if File.exist?(path)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
Pdfh.logger.warn_print "No configuration file was found within paths: #{CONFIG_FILE_LOCATIONS.join(", ")}"
|
|
109
|
+
nil
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pdfh
|
|
4
|
+
module Services
|
|
5
|
+
# Validates and processes raw configuration data from YAML into clean,
|
|
6
|
+
# validated attributes ready for Settings construction.
|
|
7
|
+
class SettingsValidator
|
|
8
|
+
# @param config_data [Hash] Raw configuration hash from YAML
|
|
9
|
+
# @return [Hash] Validated and processed attributes for Settings
|
|
10
|
+
# @raise [ArgumentError] if configuration is invalid
|
|
11
|
+
def self.call(config_data)
|
|
12
|
+
new(config_data).call
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# @param config_data [Hash]
|
|
16
|
+
# @return [Self]
|
|
17
|
+
def initialize(config_data)
|
|
18
|
+
@config_data = config_data
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# @return [Hash{Symbol => Object}] with keys :lookup_dirs, :base_path, :document_types
|
|
22
|
+
def call
|
|
23
|
+
{
|
|
24
|
+
lookup_dirs: process_lookup_dirs(@config_data[:lookup_dirs]),
|
|
25
|
+
base_path: process_destination_base(@config_data[:destination_base_path]),
|
|
26
|
+
document_types: build_doc_types(@config_data[:document_types])
|
|
27
|
+
}
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
# Expands and validates a directory. Returns nil if invalid.
|
|
33
|
+
# @param dir [String, nil]
|
|
34
|
+
# @return [String, nil]
|
|
35
|
+
def expand_directory(dir)
|
|
36
|
+
return nil unless dir.is_a?(String) && !dir.strip.empty?
|
|
37
|
+
|
|
38
|
+
expanded = File.expand_path(dir)
|
|
39
|
+
File.directory?(expanded) ? expanded : nil
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Same as expand_directory but raises on failure.
|
|
43
|
+
# @param dir [String, nil]
|
|
44
|
+
# @param label [String] used in the error message
|
|
45
|
+
# @return [String]
|
|
46
|
+
# @raise [ArgumentError]
|
|
47
|
+
def expand_directory!(dir, label:)
|
|
48
|
+
expand_directory(dir) || raise(ArgumentError, "#{label} is invalid or does not exist: #{dir.inspect}")
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @param lookup_dirs_list [Array<String>]
|
|
52
|
+
# @return [Array<String>]
|
|
53
|
+
def process_lookup_dirs(lookup_dirs_list)
|
|
54
|
+
validate_lookup_dirs_type(lookup_dirs_list)
|
|
55
|
+
|
|
56
|
+
dirs = lookup_dirs_list.filter_map do |dir|
|
|
57
|
+
expanded = expand_directory(dir)
|
|
58
|
+
Pdfh.logger.warn_print "lookup_dirs: #{dir.inspect} does not exist, skipping." unless expanded
|
|
59
|
+
expanded
|
|
60
|
+
end
|
|
61
|
+
raise ArgumentError, "No valid lookup_dirs configured." if dirs.empty?
|
|
62
|
+
|
|
63
|
+
Pdfh.logger.debug "Configured Look up directories:"
|
|
64
|
+
dirs.each.with_index(1) { |dir, idx| Pdfh.logger.debug " #{idx}. #{dir}" }
|
|
65
|
+
Pdfh.logger.debug
|
|
66
|
+
|
|
67
|
+
dirs
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @param lookup_dirs_list [Array, nil]
|
|
71
|
+
# @return [void]
|
|
72
|
+
# @raise [ArgumentError] if lookup_dirs_list is invalid
|
|
73
|
+
def validate_lookup_dirs_type(lookup_dirs_list)
|
|
74
|
+
raise ArgumentError, "Look up directories are not configured." if lookup_dirs_list.nil?
|
|
75
|
+
raise ArgumentError, "Look up directories must be an array of strings." unless lookup_dirs_list.is_a?(Array)
|
|
76
|
+
return if lookup_dirs_list.all?(String)
|
|
77
|
+
|
|
78
|
+
raise ArgumentError, "Look up directories must be an array of strings."
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# @param dir [String, nil]
|
|
82
|
+
# @return [String]
|
|
83
|
+
def process_destination_base(dir)
|
|
84
|
+
expand_directory!(dir, label: "destination_base_path")
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# @param doc_types [Array<Hash>]
|
|
88
|
+
# @return [Hash{String => DocumentType}]
|
|
89
|
+
def build_doc_types(doc_types)
|
|
90
|
+
validate_doc_types_type(doc_types)
|
|
91
|
+
doc_types = parse_doc_types(doc_types)
|
|
92
|
+
raise ArgumentError, "No valid document types configured." if doc_types.empty?
|
|
93
|
+
|
|
94
|
+
doc_types
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# @param doc_types [Array, nil]
|
|
98
|
+
# @return [void]
|
|
99
|
+
# @raise [ArgumentError] if doc_types is invalid
|
|
100
|
+
def validate_doc_types_type(doc_types)
|
|
101
|
+
raise ArgumentError, "Document types are not configured." if doc_types.nil?
|
|
102
|
+
raise ArgumentError, "Document types must be an array." unless doc_types.is_a?(Array)
|
|
103
|
+
raise ArgumentError, "Document types must be an array of hashes." unless doc_types.all?(Hash)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# @param doc_types [Array<Hash>]
|
|
107
|
+
# @return [Hash{String => DocumentType}]
|
|
108
|
+
def parse_doc_types(doc_types)
|
|
109
|
+
doc_types.each_with_object({}) do |data, result|
|
|
110
|
+
doc_type = build_doc_type(data)
|
|
111
|
+
result.store(doc_type.gid, doc_type) if doc_type
|
|
112
|
+
rescue StandardError => e
|
|
113
|
+
Pdfh.logger.error_print e.message, exit_app: false
|
|
114
|
+
Pdfh.logger.backtrace_print e if Pdfh.logger.verbose?
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# @param data [Hash]
|
|
119
|
+
# @return [DocumentType, nil] Document type when valid, otherwise nil.
|
|
120
|
+
def build_doc_type(data)
|
|
121
|
+
doc_type = DocumentType.new(data)
|
|
122
|
+
return doc_type if doc_type.valid?
|
|
123
|
+
|
|
124
|
+
log_doc_type_errors(doc_type)
|
|
125
|
+
nil
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# @param doc_type [DocumentType]
|
|
129
|
+
# @return [void]
|
|
130
|
+
def log_doc_type_errors(doc_type)
|
|
131
|
+
doc_type_name = doc_type.name.to_s.colorize(:blue)
|
|
132
|
+
|
|
133
|
+
if doc_type.missing_keys?
|
|
134
|
+
missing = doc_type.missing_keys.join(", ").colorize(:red)
|
|
135
|
+
Pdfh.logger.info "Document type '#{doc_type_name}' is missing required keys: #{missing}"
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
unless doc_type.path_validator.valid?
|
|
139
|
+
unknown = doc_type.store_path.unknown_list
|
|
140
|
+
Pdfh.logger.info "Document type '#{doc_type_name}', path_validator has invalid keys: #{unknown}"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
return if doc_type.name_validator.valid?
|
|
144
|
+
|
|
145
|
+
unknown = doc_type.name_template.unknown_list
|
|
146
|
+
Pdfh.logger.info "Document type '#{doc_type_name}', name_validator has invalid keys: #{unknown}"
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
data/lib/pdfh/utils/console.rb
CHANGED
|
@@ -41,10 +41,10 @@ module Pdfh
|
|
|
41
41
|
exit 1 if exit_app
|
|
42
42
|
end
|
|
43
43
|
|
|
44
|
-
# @param
|
|
44
|
+
# @param error [StandardError]
|
|
45
45
|
# @return [void]
|
|
46
|
-
def backtrace_print(
|
|
47
|
-
|
|
46
|
+
def backtrace_print(error)
|
|
47
|
+
error.backtrace&.each do |line|
|
|
48
48
|
output " ↳ #{line.sub("#{Dir.pwd}/", "")}".colorize(:light_black)
|
|
49
49
|
end
|
|
50
50
|
end
|
|
@@ -87,13 +87,13 @@ module Pdfh
|
|
|
87
87
|
nil
|
|
88
88
|
end
|
|
89
89
|
|
|
90
|
-
private
|
|
91
|
-
|
|
92
90
|
# @return [boolean]
|
|
93
91
|
def verbose?
|
|
94
92
|
@verbose
|
|
95
93
|
end
|
|
96
94
|
|
|
95
|
+
private
|
|
96
|
+
|
|
97
97
|
# @return [void]
|
|
98
98
|
def output(msg)
|
|
99
99
|
puts(msg)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pdfh
|
|
4
|
+
# Encapsulates date interpretation from regex captures.
|
|
5
|
+
# Responsible for converting raw captured strings into typed date values
|
|
6
|
+
# and deriving period groupings (quarter, bimester, period string).
|
|
7
|
+
class DateInfo
|
|
8
|
+
# @param date_captures [Hash{String => String}] Captured date components.
|
|
9
|
+
# Keys: "m" (month — name or number), "y" (year — 2 or 4 digits), "d" (day, optional)
|
|
10
|
+
# @return [self]
|
|
11
|
+
def initialize(date_captures)
|
|
12
|
+
@date_captures = date_captures
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# @return [Hash{String => String}] Raw date captures as provided by the regex match
|
|
16
|
+
def captures
|
|
17
|
+
@date_captures
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# @return [Integer] Normalized month number (1–12)
|
|
21
|
+
def month
|
|
22
|
+
@month ||= Month.normalize_to_i(@date_captures["m"])
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# @return [Integer] Full four-digit year (e.g., 2024)
|
|
26
|
+
def year
|
|
27
|
+
@year ||= begin
|
|
28
|
+
raw = @date_captures["y"]
|
|
29
|
+
(raw.size == 2 ? "20#{raw}" : raw).to_i
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# @return [String, nil] Day of month if captured, nil otherwise
|
|
34
|
+
def day
|
|
35
|
+
@date_captures["d"]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Q1: Jan–Mar, Q2: Apr–Jun, Q3: Jul–Sep, Q4: Oct–Dec
|
|
39
|
+
# @return [Integer] Quarter (1–4) based on the month
|
|
40
|
+
def quarter
|
|
41
|
+
@quarter ||= ((month - 1) / 3) + 1
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# B1: Jan–Feb, B2: Mar–Apr, B3: May–Jun, B4: Jul–Aug, B5: Sep–Oct, B6: Nov–Dec
|
|
45
|
+
# @return [Integer] Bimester (1–6) based on the month
|
|
46
|
+
def bimester
|
|
47
|
+
@bimester ||= ((month - 1) / 2) + 1
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# @return [String] Period in format "YYYY-MM"
|
|
51
|
+
def period
|
|
52
|
+
"#{year}-#{month.to_s.rjust(2, "0")}"
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|