pdfh 3.3.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.editorconfig +16 -0
- data/.gitignore +3 -0
- data/.pre-commit-config.yaml +13 -5
- data/.rubocop.yml +5 -1
- data/.rubocop_todo.yml +5 -18
- data/.simplecov +32 -0
- data/AGENTS.md +174 -0
- data/CHANGELOG.md +74 -9
- data/Gemfile +4 -7
- data/Gemfile.lock +68 -80
- data/README.md +72 -37
- data/Rakefile +24 -6
- data/bin/console +3 -10
- data/bin/run +0 -1
- data/commitlint.config.js +1 -1
- data/exe/pdfh +1 -1
- data/justfile +65 -0
- data/lib/pdfh/main.rb +25 -120
- data/lib/pdfh/models/document.rb +43 -128
- data/lib/pdfh/models/document_type.rb +35 -67
- data/lib/pdfh/models/run_options.rb +20 -0
- data/lib/pdfh/models/settings.rb +23 -69
- data/lib/pdfh/services/directory_scanner.rb +27 -0
- data/lib/pdfh/services/document_manager.rb +125 -0
- data/lib/pdfh/services/document_matcher.rb +57 -0
- data/lib/pdfh/services/opt_parser.rb +76 -0
- data/lib/pdfh/services/pdf_text_extractor.rb +45 -0
- data/lib/pdfh/services/settings_builder.rb +113 -0
- data/lib/pdfh/services/settings_validator.rb +150 -0
- data/lib/pdfh/utils/console.rb +5 -5
- data/lib/pdfh/utils/date_info.rb +55 -0
- data/lib/pdfh/utils/file_info.rb +47 -0
- data/lib/pdfh/utils/rename_validator.rb +4 -3
- data/lib/pdfh/version.rb +1 -1
- data/lib/pdfh.rb +26 -20
- data/mise.toml +25 -1
- data/pdfh.gemspec +3 -3
- metadata +19 -15
- data/lib/ext/string.rb +0 -9
- data/lib/pdfh/concerns/password_decodable.rb +0 -31
- data/lib/pdfh/models/document_period.rb +0 -37
- data/lib/pdfh/models/document_sub_type.rb +0 -6
- data/lib/pdfh/models/zip_types.rb +0 -17
- data/lib/pdfh/settings_template.rb +0 -21
- data/lib/pdfh/utils/opt_parser.rb +0 -78
- data/lib/pdfh/utils/options.rb +0 -38
- data/lib/pdfh/utils/pdf_file_handler.rb +0 -122
- data/lib/pdfh/utils/settings_builder.rb +0 -62
data/lib/pdfh/main.rb
CHANGED
|
@@ -7,137 +7,42 @@ module Pdfh
|
|
|
7
7
|
# @param argv [Array<String>]
|
|
8
8
|
# @return [void]
|
|
9
9
|
def start(argv:)
|
|
10
|
-
arg_options =
|
|
11
|
-
|
|
12
|
-
assign_global_utils(@options)
|
|
13
|
-
Pdfh.print_options(arg_options)
|
|
10
|
+
arg_options = Services::OptParser.new(argv: argv).parse_argv
|
|
11
|
+
options = RunOptions.new(**arg_options)
|
|
14
12
|
|
|
15
|
-
|
|
16
|
-
Pdfh.
|
|
13
|
+
# Initialize the global logger
|
|
14
|
+
Pdfh.logger = Console.new(options.verbose?)
|
|
15
|
+
Pdfh.logger.print_options(arg_options)
|
|
17
16
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
Pdfh.error_print(e.message, exit_app: false)
|
|
21
|
-
Pdfh.create_settings_file
|
|
22
|
-
exit(1)
|
|
23
|
-
rescue StandardError => e
|
|
24
|
-
Pdfh.backtrace_print e if Pdfh.verbose?
|
|
25
|
-
Pdfh.error_print(e.message)
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
private
|
|
29
|
-
|
|
30
|
-
attr_reader :options, :settings
|
|
31
|
-
|
|
32
|
-
# @param options [Options]
|
|
33
|
-
# @return [void]
|
|
34
|
-
def assign_global_utils(options)
|
|
35
|
-
Pdfh.instance_variable_set(:@options, options)
|
|
36
|
-
Pdfh.instance_variable_set(:@console, Console.new(options.verbose?))
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# @param [String] file_name
|
|
40
|
-
# @return [DocumentType, nil]
|
|
41
|
-
def match_doc_type(file_name)
|
|
42
|
-
settings.document_types.each do |type|
|
|
43
|
-
match = type.re_file.match(file_name)
|
|
44
|
-
return type if match
|
|
45
|
-
end
|
|
46
|
-
nil
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# @return [void]
|
|
50
|
-
def process_provided_files
|
|
51
|
-
type_id = options.type
|
|
52
|
-
raise ArgumentError, "No files provided to process #{type_id.inspect} type." unless options.files?
|
|
53
|
-
|
|
54
|
-
type = settings.document_type(type_id)
|
|
55
|
-
Pdfh.error_print "Type #{type_id.inspect} was not found." if type.nil?
|
|
56
|
-
options.files.each do |file|
|
|
57
|
-
next Pdfh.warn_print "File #{file.inspect} does not exist." unless File.exist?(file)
|
|
58
|
-
next Pdfh.warn_print "File #{file.inspect} is not a pdf." unless File.extname(file) == ".pdf"
|
|
59
|
-
|
|
60
|
-
PdfFileHandler.new(file, type).process_document(settings.base_path)
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# @return [void]
|
|
65
|
-
def process_lookup_dirs
|
|
66
|
-
settings.lookup_dirs.each do |work_directory|
|
|
67
|
-
process_directory(work_directory)
|
|
68
|
-
end
|
|
69
|
-
end
|
|
17
|
+
settings = Services::SettingsBuilder.call
|
|
18
|
+
Pdfh.logger.debug "Destination path: #{settings.base_path.colorize(:light_blue)}"
|
|
70
19
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def process_zip_files(work_directory)
|
|
74
|
-
@settings.zip_types&.each do |zip_type|
|
|
75
|
-
find_files(work_directory, :zip).each do |file|
|
|
76
|
-
next unless zip_type.re_file.match?(File.basename(file))
|
|
20
|
+
files = Services::DirectoryScanner.new(settings.lookup_dirs).scan
|
|
21
|
+
matcher = Services::DocumentMatcher.new(settings.document_types)
|
|
77
22
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
end
|
|
23
|
+
files.each do |file_path|
|
|
24
|
+
Pdfh.logger.info "Working on: #{file_path.colorize(:green)}" if Pdfh.logger.verbose?
|
|
25
|
+
text = Services::PdfTextExtractor.call(file_path)
|
|
84
26
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
# @return [Array<String>]
|
|
88
|
-
def find_files(directory, type)
|
|
89
|
-
glob = File.join(directory, "*.#{type}")
|
|
90
|
-
Dir.glob(glob)
|
|
91
|
-
end
|
|
27
|
+
documents = matcher.match(file_path, text)
|
|
28
|
+
next Pdfh.logger.debug "No document type match found for #{file_path.colorize(:yellow)}" if documents.empty?
|
|
92
29
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
processed_result = RunResult.new
|
|
97
|
-
files = find_files(work_directory, :pdf)
|
|
98
|
-
files.each do |pdf_file|
|
|
99
|
-
type = match_doc_type(pdf_file)
|
|
100
|
-
if type
|
|
101
|
-
PdfFileHandler.new(pdf_file, type).process_document(settings.base_path)
|
|
102
|
-
processed_result.add_processed(pdf_file)
|
|
103
|
-
else
|
|
104
|
-
processed_result.add_ignored(pdf_file)
|
|
30
|
+
unless documents.one?
|
|
31
|
+
matches = documents.map { _1.type.name.inspect }.join(", ")
|
|
32
|
+
next Pdfh.logger.warn_print "Skipping #{file_path.inspect} as multiple matches found: #{matches}."
|
|
105
33
|
end
|
|
106
|
-
end
|
|
107
|
-
print_processing_results(processed_result)
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
# @return [String]
|
|
111
|
-
def base_name_no_ext(file)
|
|
112
|
-
File.basename(file, File.extname(file))
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
def print_processing_results(result)
|
|
116
|
-
Pdfh.info " (No files processed)".colorize(:light_black) if result.processed.empty?
|
|
117
|
-
return unless Pdfh.verbose?
|
|
118
34
|
|
|
119
|
-
|
|
120
|
-
result.ignored.each.with_index(1) do |file, index|
|
|
121
|
-
Pdfh.ident_print index, base_name_no_ext(file), color: :magenta
|
|
35
|
+
Services::DocumentManager.new(documents.first, base_path: settings.base_path, dry_run: options.dry?).call
|
|
122
36
|
end
|
|
123
|
-
end
|
|
124
|
-
end
|
|
125
|
-
|
|
126
|
-
# keeps track of the processed and ignored files
|
|
127
|
-
class RunResult
|
|
128
|
-
attr_reader :processed, :ignored
|
|
129
37
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
38
|
+
nil
|
|
39
|
+
rescue SettingsIOError => e
|
|
40
|
+
Pdfh.logger.error_print(e.message, exit_app: false)
|
|
41
|
+
exit(1)
|
|
42
|
+
rescue StandardError => e
|
|
43
|
+
Pdfh.logger.backtrace_print(e) if Pdfh.logger.verbose?
|
|
44
|
+
Pdfh.logger.error_print(e.message)
|
|
134
45
|
end
|
|
135
|
-
|
|
136
|
-
# @return [void]
|
|
137
|
-
def add_ignored(file) = @ignored << file
|
|
138
|
-
|
|
139
|
-
# @return [void]
|
|
140
|
-
def add_processed(file) = @processed << file
|
|
141
46
|
end
|
|
142
47
|
end
|
|
143
48
|
end
|
data/lib/pdfh/models/document.rb
CHANGED
|
@@ -1,152 +1,67 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Pdfh
|
|
4
|
-
#
|
|
4
|
+
# Lightweight struct that connects a PDF file with its matched document type and
|
|
5
|
+
# extracted text. All file metadata, date interpretation, and rename resolution
|
|
6
|
+
# are accessible through dedicated value objects (FileInfo, DateInfo).
|
|
5
7
|
class Document
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
|
|
13
|
-
|
|
8
|
+
# @!attribute [r] file_info
|
|
9
|
+
# @return [FileInfo] File metadata wrapper
|
|
10
|
+
# @!attribute [r] type
|
|
11
|
+
# @return [DocumentType] Matched document type
|
|
12
|
+
# @!attribute [r] text
|
|
13
|
+
# @return [String] Extracted text from the PDF
|
|
14
|
+
# @!attribute [r] date_info
|
|
15
|
+
# @return [DateInfo] Parsed date value object
|
|
16
|
+
attr_reader :file_info, :type, :text, :date_info
|
|
17
|
+
|
|
18
|
+
# @param file [String] Path to the PDF file
|
|
19
|
+
# @param type [DocumentType] Type of the document
|
|
20
|
+
# @param text [String] Extracted text from the PDF
|
|
21
|
+
# @param date_captures [Hash{String => String}] Captured date components from regex
|
|
22
|
+
# @return [self] A new Document instance
|
|
23
|
+
def initialize(file, type, text, date_captures)
|
|
14
24
|
@type = type
|
|
15
25
|
@text = text
|
|
26
|
+
@file_info = FileInfo.new(file)
|
|
27
|
+
@date_info = DateInfo.new(date_captures)
|
|
16
28
|
end
|
|
17
29
|
|
|
18
|
-
# @return [
|
|
19
|
-
def process
|
|
20
|
-
Pdfh.debug "=== Document Type: #{type.name} =============================="
|
|
21
|
-
Pdfh.debug "~~~~~~~~~~~~~~~~~~ Finding a subtype"
|
|
22
|
-
@sub_type = type.sub_type(@text)
|
|
23
|
-
Pdfh.debug " SubType: #{@sub_type}"
|
|
24
|
-
@companion = search_companion_files
|
|
25
|
-
|
|
26
|
-
month, year, @extra = match_date(@sub_type&.re_date || @type.re_date)
|
|
27
|
-
@period = DocumentPeriod.new(day: extra, month: month, month_offset: @sub_type&.month_offset, year: year)
|
|
28
|
-
Pdfh.debug " Period: #{@period.inspect}"
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# @return [void]
|
|
32
|
-
def print_info
|
|
33
|
-
print_info_line "Type", type.name
|
|
34
|
-
print_info_line "Sub-Type", sub_type
|
|
35
|
-
print_info_line "Period", period
|
|
36
|
-
print_info_line "New Name", new_name
|
|
37
|
-
print_info_line "Store Path", store_path
|
|
38
|
-
print_info_line "Extra files", companion_files(join: true)
|
|
39
|
-
print_info_line "Processed?", "No (in Dry mode)" if Pdfh.dry?
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# @return [void]
|
|
43
|
-
def print_info_line(property, info)
|
|
44
|
-
Pdfh.ident_print property, info.to_s, color: :light_blue, width: 12
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# @return [String]
|
|
48
|
-
def file_name_only
|
|
49
|
-
File.basename(@file, file_extension)
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# @return [String]
|
|
53
|
-
def file_extension
|
|
54
|
-
File.extname(@file)
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# @return [String]
|
|
58
|
-
def file_name
|
|
59
|
-
File.basename(@file)
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# @return [String]
|
|
63
|
-
def backup_name
|
|
64
|
-
"#{file_name}.bkp"
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# @return [String]
|
|
30
|
+
# @return [String] Document type name or "N/A" if type is nil
|
|
68
31
|
def type_name
|
|
69
|
-
type&.name
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
# @return [String]
|
|
73
|
-
def sub_type
|
|
74
|
-
@sub_type&.name&.titleize || "N/A"
|
|
32
|
+
type&.name || "N/A"
|
|
75
33
|
end
|
|
76
34
|
|
|
77
|
-
# @return [
|
|
78
|
-
def
|
|
79
|
-
|
|
80
|
-
original: file_name_only,
|
|
81
|
-
period: period.to_s,
|
|
82
|
-
year: period.year.to_s,
|
|
83
|
-
month: period.month.to_s,
|
|
84
|
-
type: type_name,
|
|
85
|
-
subtype: sub_type,
|
|
86
|
-
extra: extra || ""
|
|
87
|
-
}.freeze
|
|
35
|
+
# @return [String] File name
|
|
36
|
+
def to_s
|
|
37
|
+
file_info.name
|
|
88
38
|
end
|
|
89
39
|
|
|
90
|
-
# @return [String]
|
|
40
|
+
# @return [String] New file name with extension (e.g., "2024-01 Cuenta.pdf")
|
|
91
41
|
def new_name
|
|
92
|
-
|
|
93
|
-
"#{new_name}#{file_extension}"
|
|
42
|
+
"#{@type.name_validator.gsub(rename_data)}#{@file_info.extension}"
|
|
94
43
|
end
|
|
95
44
|
|
|
96
|
-
# @return [String]
|
|
45
|
+
# @return [String] Storage path for the document (e.g., "2024/Edo Cuenta")
|
|
97
46
|
def store_path
|
|
98
|
-
type.
|
|
99
|
-
end
|
|
100
|
-
|
|
101
|
-
# @return [String (frozen)]
|
|
102
|
-
def companion_files(join: false)
|
|
103
|
-
return @companion unless join
|
|
104
|
-
|
|
105
|
-
@companion.empty? ? "N/A" : @companion.join(", ")
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
# @return [String]
|
|
109
|
-
def home_dir
|
|
110
|
-
File.dirname(@file)
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
# @return [String]
|
|
114
|
-
def to_s
|
|
115
|
-
@file
|
|
47
|
+
@type.path_validator.gsub(rename_data)
|
|
116
48
|
end
|
|
117
49
|
|
|
118
50
|
private
|
|
119
51
|
|
|
120
|
-
#
|
|
121
|
-
#
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
return matched.captures.map(&:downcase) if regex.named_captures.empty?
|
|
134
|
-
|
|
135
|
-
extra = matched.captures.size > 2 ? matched[:d] : nil
|
|
136
|
-
[matched[:m].downcase, matched[:y], extra]
|
|
137
|
-
end
|
|
138
|
-
|
|
139
|
-
# @return [Array]
|
|
140
|
-
def search_companion_files
|
|
141
|
-
Pdfh.debug "~~~~~~~~~~~~~~~~~~ Searching Companion files"
|
|
142
|
-
Pdfh.debug " Searching on: #{home_dir.inspect}"
|
|
143
|
-
Dir.chdir(home_dir) do
|
|
144
|
-
files_matching = Dir["#{file_name_only}.*"]
|
|
145
|
-
companion = files_matching.reject { |file| file.include? ".pdf" }
|
|
146
|
-
Pdfh.debug " Found: #{companion.inspect}"
|
|
147
|
-
|
|
148
|
-
companion
|
|
149
|
-
end
|
|
52
|
+
# Used to replace variables in the rename pattern i.e {original}, {period}, etc.
|
|
53
|
+
# @return [Hash{Symbol => String}] Hash containing rename variables
|
|
54
|
+
def rename_data
|
|
55
|
+
@rename_data ||= {
|
|
56
|
+
original: @file_info.stem,
|
|
57
|
+
period: @date_info.period,
|
|
58
|
+
year: @date_info.year.to_s,
|
|
59
|
+
month: @date_info.month.to_s,
|
|
60
|
+
quarter: "Q#{@date_info.quarter}",
|
|
61
|
+
bimester: "B#{@date_info.bimester}",
|
|
62
|
+
name: @type.name,
|
|
63
|
+
day: @date_info.day || ""
|
|
64
|
+
}.freeze
|
|
150
65
|
end
|
|
151
66
|
end
|
|
152
67
|
end
|
|
@@ -3,97 +3,65 @@
|
|
|
3
3
|
module Pdfh
|
|
4
4
|
# Represents a type of document that can be processed by pdfh
|
|
5
5
|
class DocumentType
|
|
6
|
-
|
|
6
|
+
REQUIRED_KEYS = %i[name re_date store_path].freeze
|
|
7
|
+
DEFAULT_NAME_TEMPLATE = "{name} {period}"
|
|
7
8
|
|
|
8
9
|
# @!attribute [r] name
|
|
9
|
-
# @return [String] The name of the document type
|
|
10
|
-
# @!attribute [r]
|
|
11
|
-
# @return [Regexp] The regular expression to
|
|
10
|
+
# @return [String] The name of the document type
|
|
11
|
+
# @!attribute [r] re_id
|
|
12
|
+
# @return [Regexp] The regular expression to extract the document ID
|
|
12
13
|
# @!attribute [r] re_date
|
|
13
|
-
# @return [Regexp] The regular expression to extract dates
|
|
14
|
-
# @!attribute [r] pwd
|
|
15
|
-
# @return [String, nil] The base64 password for the document type, if any.
|
|
14
|
+
# @return [Regexp] The regular expression to extract dates
|
|
16
15
|
# @!attribute [r] store_path
|
|
17
|
-
# @return [String] The path where the document will be stored
|
|
16
|
+
# @return [String] The path where the document will be stored
|
|
18
17
|
# @!attribute [r] name_template
|
|
19
|
-
# @return [String] The template for generating document names
|
|
20
|
-
# @!attribute [r]
|
|
21
|
-
# @return [
|
|
22
|
-
|
|
18
|
+
# @return [String] The template for generating document names
|
|
19
|
+
# @!attribute [r] path_validator
|
|
20
|
+
# @return [RenameValidator] The validator for the storage path
|
|
21
|
+
# @!attribute [r] name_validator
|
|
22
|
+
# @return [RenameValidator] The validator for the document name
|
|
23
|
+
attr_reader :name, :re_id, :re_date, :store_path, :name_template, :path_validator, :name_validator
|
|
23
24
|
|
|
24
|
-
# @param args [Hash]
|
|
25
|
-
# @return [
|
|
25
|
+
# @param args [Hash] The initialization arguments
|
|
26
|
+
# @return [DocumentType]
|
|
26
27
|
def initialize(args)
|
|
27
28
|
args.each { |k, v| instance_variable_set(:"@#{k}", v) }
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
return if missing_keys?
|
|
30
|
+
|
|
31
|
+
@name = name.to_s.strip
|
|
32
|
+
@re_id = Regexp.new(re_id || name)
|
|
30
33
|
@re_date = Regexp.new(re_date)
|
|
31
|
-
@
|
|
34
|
+
@name_template = name_template || DEFAULT_NAME_TEMPLATE
|
|
32
35
|
@path_validator = RenameValidator.new(store_path)
|
|
33
|
-
@name_validator = RenameValidator.new(name_template)
|
|
34
|
-
|
|
36
|
+
@name_validator = RenameValidator.new(@name_template)
|
|
37
|
+
end
|
|
35
38
|
|
|
36
|
-
|
|
39
|
+
# @return [Boolean]
|
|
40
|
+
def valid?
|
|
41
|
+
missing_keys.empty? &&
|
|
42
|
+
@path_validator.valid? &&
|
|
43
|
+
@name_validator.valid?
|
|
37
44
|
end
|
|
38
45
|
|
|
39
|
-
# @return [Hash{
|
|
46
|
+
# @return [Hash{String => Object}]
|
|
40
47
|
def to_h
|
|
41
48
|
instance_variables.to_h { |var| [var.to_s.delete_prefix("@"), instance_variable_get(var)] }
|
|
42
49
|
end
|
|
43
50
|
|
|
44
51
|
# removes special characters from string and replaces spaces with dashes
|
|
45
|
-
# @example
|
|
46
|
-
# "Test This?%&".gid
|
|
47
|
-
# # => "test-this"
|
|
52
|
+
# @example
|
|
53
|
+
# "Test This?%&".gid # => "test-this"
|
|
48
54
|
# @return [String]
|
|
49
55
|
def gid
|
|
50
56
|
name.downcase.gsub(/[^0-9A-Za-z\s]/, "").tr(" ", "-")
|
|
51
57
|
end
|
|
52
58
|
|
|
53
|
-
#
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
# Regexp.new(st.name).match?(name)
|
|
57
|
-
sub_types&.find { |st| /#{st.name}/i.match?(text) }
|
|
59
|
+
# @return [Array<Symbol>]
|
|
60
|
+
def missing_keys
|
|
61
|
+
@missing_keys ||= REQUIRED_KEYS.select { |key| instance_variable_get(:"@#{key}").to_s.strip.empty? }
|
|
58
62
|
end
|
|
59
63
|
|
|
60
|
-
# @
|
|
61
|
-
|
|
62
|
-
def generate_new_name(values)
|
|
63
|
-
@name_validator.gsub(values)
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# @param values [Hash{Symbol->String}
|
|
67
|
-
# @return [String]
|
|
68
|
-
def generate_path(values)
|
|
69
|
-
@path_validator.gsub(values)
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
private
|
|
73
|
-
|
|
74
|
-
attr_accessor :path_validator, :name_validator
|
|
75
|
-
|
|
76
|
-
# @param sub_types [Array<Hash{Symbol->String}>]
|
|
77
|
-
# @return [Array<DocumentSubType>]
|
|
78
|
-
def extract_subtypes(sub_types)
|
|
79
|
-
sub_types.map do |st|
|
|
80
|
-
data = {
|
|
81
|
-
name: st[:name],
|
|
82
|
-
month_offset: st[:month_offset].to_i,
|
|
83
|
-
re_date: st[:re_date] && Regexp.new(st[:re_date])
|
|
84
|
-
}.compact
|
|
85
|
-
DocumentSubType.new(data)
|
|
86
|
-
end
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
# @raise [ArgumentError] when called
|
|
90
|
-
# @return [void]
|
|
91
|
-
def raise_validators_error
|
|
92
|
-
template = "has invalid %<field>s[Unknown tokens=%<error>s]"
|
|
93
|
-
errors = []
|
|
94
|
-
errors << format(template, field: :store_path, error: path_validator.unknown_list) unless path_validator.valid?
|
|
95
|
-
errors << format(template, field: :name_template, error: name_validator.unknown_list) unless name_validator.valid?
|
|
96
|
-
raise ArgumentError, "Document type #{name.inspect} #{errors.join(", ")}"
|
|
97
|
-
end
|
|
64
|
+
# @return [Boolean]
|
|
65
|
+
def missing_keys? = missing_keys.any?
|
|
98
66
|
end
|
|
99
67
|
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pdfh
|
|
4
|
+
# Runtime options for the application
|
|
5
|
+
class RunOptions
|
|
6
|
+
# @param verbose [Boolean]
|
|
7
|
+
# @param dry [Boolean]
|
|
8
|
+
# @return [RunOptions]
|
|
9
|
+
def initialize(verbose: false, dry: false)
|
|
10
|
+
@verbose = verbose
|
|
11
|
+
@dry = dry
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# @return [Boolean]
|
|
15
|
+
def verbose? = @verbose
|
|
16
|
+
|
|
17
|
+
# @return [Boolean]
|
|
18
|
+
def dry? = @dry
|
|
19
|
+
end
|
|
20
|
+
end
|
data/lib/pdfh/models/settings.rb
CHANGED
|
@@ -1,87 +1,41 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Pdfh
|
|
4
|
-
# Handles the config yaml data mapping, and associates a file name with a doc type
|
|
4
|
+
# Handles the config yaml data mapping, and associates a file name with a doc type.
|
|
5
|
+
# This is a pure data object — validation is handled by Services::SettingsValidator.
|
|
5
6
|
class Settings
|
|
6
7
|
# @!attribute [r] lookup_dirs
|
|
7
|
-
# @return [Array<String>] List of directories to look up for processing.
|
|
8
|
+
# @return [Array<String>] List of validated, expanded directories to look up for processing.
|
|
8
9
|
# @!attribute [r] base_path
|
|
9
|
-
# @return [String] The base directory path for storing processed files.
|
|
10
|
-
|
|
11
|
-
# @return [Array<ZipType>, nil] List of zip types to process, or nil if none.
|
|
12
|
-
attr_reader :lookup_dirs, :base_path, :zip_types
|
|
10
|
+
# @return [String] The validated, expanded base directory path for storing processed files.
|
|
11
|
+
attr_reader :lookup_dirs, :base_path
|
|
13
12
|
|
|
14
|
-
# @param
|
|
15
|
-
# @
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
Pdfh.debug
|
|
23
|
-
|
|
24
|
-
build_doc_types(config_data[:document_types])
|
|
25
|
-
build_zip_types(config_data[:zip_types]) if config_data.key?(:zip_types)
|
|
13
|
+
# @param lookup_dirs [Array<String>] Already validated and expanded directories
|
|
14
|
+
# @param base_path [String] Already validated and expanded base path
|
|
15
|
+
# @param document_types [Hash{String => DocumentType}] Already validated document types keyed by gid
|
|
16
|
+
# @return [Settings]
|
|
17
|
+
def initialize(lookup_dirs:, base_path:, document_types:)
|
|
18
|
+
@lookup_dirs = lookup_dirs
|
|
19
|
+
@base_path = base_path
|
|
20
|
+
@document_types = document_types
|
|
26
21
|
end
|
|
27
22
|
|
|
28
23
|
# @return [Array<DocumentType>]
|
|
29
|
-
def document_types
|
|
30
|
-
|
|
24
|
+
def document_types = @document_types.values
|
|
25
|
+
|
|
26
|
+
# @example
|
|
27
|
+
# # document_types.map(&:name) ['12345', '12', '123']
|
|
28
|
+
# settings.document_types_name_max_size #=> 5
|
|
29
|
+
# @return [Integer]
|
|
30
|
+
def document_types_name_max_size
|
|
31
|
+
return 0 if document_types.empty?
|
|
32
|
+
|
|
33
|
+
document_types.map { _1.name.length }.max
|
|
31
34
|
end
|
|
32
35
|
|
|
33
36
|
# @return [DocumentType]
|
|
34
37
|
def document_type(id)
|
|
35
38
|
@document_types[id]
|
|
36
39
|
end
|
|
37
|
-
|
|
38
|
-
# @return [Boolean]
|
|
39
|
-
def zip_types?
|
|
40
|
-
!!zip_types&.any?
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
private
|
|
44
|
-
|
|
45
|
-
# @param lookup_dirs_list [Array[String]]
|
|
46
|
-
# @return [void]
|
|
47
|
-
def process_lookup_dirs(lookup_dirs_list)
|
|
48
|
-
@lookup_dirs = lookup_dirs_list.filter_map do |dir|
|
|
49
|
-
expanded = File.expand_path(dir)
|
|
50
|
-
unless File.directory?(expanded)
|
|
51
|
-
Pdfh.debug " ** Error, Directory #{dir} does not exists."
|
|
52
|
-
next
|
|
53
|
-
end
|
|
54
|
-
expanded
|
|
55
|
-
end
|
|
56
|
-
raise ArgumentError, "No valid Look up directories configured." if lookup_dirs.empty?
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# @return [void]
|
|
60
|
-
# @param dir [String]
|
|
61
|
-
def process_destination_base(dir)
|
|
62
|
-
@base_path = File.expand_path(dir)
|
|
63
|
-
raise ArgumentError, "Destination base directory is not configured." if @base_path.nil?
|
|
64
|
-
raise ArgumentError, "Destination base directory #{@base_path} does not exist." unless File.directory?(@base_path)
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# @param doc_types [Array<Hash>]
|
|
68
|
-
# @return [void]
|
|
69
|
-
def build_doc_types(doc_types)
|
|
70
|
-
@document_types = doc_types.each_with_object({}) do |data, result|
|
|
71
|
-
doc_type = DocumentType.new(data)
|
|
72
|
-
result.store(doc_type.gid, doc_type)
|
|
73
|
-
rescue ArgumentError => e
|
|
74
|
-
Pdfh.error_print e.message, exit_app: false
|
|
75
|
-
Pdfh.backtrace_print e if Pdfh.verbose?
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# @param zip_types [Array<Hash>]
|
|
80
|
-
# @return [void]
|
|
81
|
-
def build_zip_types(zip_types)
|
|
82
|
-
exit(1) if Pdfh::Utils::DependencyValidator.missing?(:unzip)
|
|
83
|
-
|
|
84
|
-
@zip_types = zip_types.compact.map { ZipType.new(_1) }
|
|
85
|
-
end
|
|
86
40
|
end
|
|
87
41
|
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pdfh
|
|
4
|
+
module Services
|
|
5
|
+
# Scans lookup dirs and returns matched documents
|
|
6
|
+
class DirectoryScanner
|
|
7
|
+
# @param directories [Array<String>]
|
|
8
|
+
# @return [DirectoryScanner]
|
|
9
|
+
def initialize(directories)
|
|
10
|
+
@directories = directories
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# @return [Array<String>]
|
|
14
|
+
def scan
|
|
15
|
+
@directories.flat_map { |dir| scan_dir(dir) }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
# @param dir [String]
|
|
21
|
+
# @return [Array<String>]
|
|
22
|
+
def scan_dir(dir)
|
|
23
|
+
Dir.glob(File.join(dir, "*.pdf"))
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|