pdfh 3.3.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +16 -0
  3. data/.gitignore +3 -0
  4. data/.pre-commit-config.yaml +13 -5
  5. data/.rubocop.yml +5 -1
  6. data/.rubocop_todo.yml +5 -18
  7. data/.simplecov +32 -0
  8. data/AGENTS.md +174 -0
  9. data/CHANGELOG.md +74 -9
  10. data/Gemfile +4 -7
  11. data/Gemfile.lock +68 -80
  12. data/README.md +72 -37
  13. data/Rakefile +24 -6
  14. data/bin/console +3 -10
  15. data/bin/run +0 -1
  16. data/commitlint.config.js +1 -1
  17. data/exe/pdfh +1 -1
  18. data/justfile +65 -0
  19. data/lib/pdfh/main.rb +25 -120
  20. data/lib/pdfh/models/document.rb +43 -128
  21. data/lib/pdfh/models/document_type.rb +35 -67
  22. data/lib/pdfh/models/run_options.rb +20 -0
  23. data/lib/pdfh/models/settings.rb +23 -69
  24. data/lib/pdfh/services/directory_scanner.rb +27 -0
  25. data/lib/pdfh/services/document_manager.rb +125 -0
  26. data/lib/pdfh/services/document_matcher.rb +57 -0
  27. data/lib/pdfh/services/opt_parser.rb +76 -0
  28. data/lib/pdfh/services/pdf_text_extractor.rb +45 -0
  29. data/lib/pdfh/services/settings_builder.rb +113 -0
  30. data/lib/pdfh/services/settings_validator.rb +150 -0
  31. data/lib/pdfh/utils/console.rb +5 -5
  32. data/lib/pdfh/utils/date_info.rb +55 -0
  33. data/lib/pdfh/utils/file_info.rb +47 -0
  34. data/lib/pdfh/utils/rename_validator.rb +4 -3
  35. data/lib/pdfh/version.rb +1 -1
  36. data/lib/pdfh.rb +26 -20
  37. data/mise.toml +25 -1
  38. data/pdfh.gemspec +3 -3
  39. metadata +19 -15
  40. data/lib/ext/string.rb +0 -9
  41. data/lib/pdfh/concerns/password_decodable.rb +0 -31
  42. data/lib/pdfh/models/document_period.rb +0 -37
  43. data/lib/pdfh/models/document_sub_type.rb +0 -6
  44. data/lib/pdfh/models/zip_types.rb +0 -17
  45. data/lib/pdfh/settings_template.rb +0 -21
  46. data/lib/pdfh/utils/opt_parser.rb +0 -78
  47. data/lib/pdfh/utils/options.rb +0 -38
  48. data/lib/pdfh/utils/pdf_file_handler.rb +0 -122
  49. data/lib/pdfh/utils/settings_builder.rb +0 -62
data/lib/pdfh/main.rb CHANGED
@@ -7,137 +7,42 @@ module Pdfh
7
7
  # @param argv [Array<String>]
8
8
  # @return [void]
9
9
  def start(argv:)
10
- arg_options = Pdfh::OptParser.new(argv: argv).parse_argv
11
- @options = Options.new(arg_options)
12
- assign_global_utils(@options)
13
- Pdfh.print_options(arg_options)
10
+ arg_options = Services::OptParser.new(argv: argv).parse_argv
11
+ options = RunOptions.new(**arg_options)
14
12
 
15
- @settings = SettingsBuilder.build
16
- Pdfh.debug "Destination path: #{settings.base_path.colorize(:light_blue)}"
13
+ # Initialize the global logger
14
+ Pdfh.logger = Console.new(options.verbose?)
15
+ Pdfh.logger.print_options(arg_options)
17
16
 
18
- options.file_mode? ? process_provided_files : process_lookup_dirs
19
- rescue SettingsIOError => e
20
- Pdfh.error_print(e.message, exit_app: false)
21
- Pdfh.create_settings_file
22
- exit(1)
23
- rescue StandardError => e
24
- Pdfh.backtrace_print e if Pdfh.verbose?
25
- Pdfh.error_print(e.message)
26
- end
27
-
28
- private
29
-
30
- attr_reader :options, :settings
31
-
32
- # @param options [Options]
33
- # @return [void]
34
- def assign_global_utils(options)
35
- Pdfh.instance_variable_set(:@options, options)
36
- Pdfh.instance_variable_set(:@console, Console.new(options.verbose?))
37
- end
38
-
39
- # @param [String] file_name
40
- # @return [DocumentType, nil]
41
- def match_doc_type(file_name)
42
- settings.document_types.each do |type|
43
- match = type.re_file.match(file_name)
44
- return type if match
45
- end
46
- nil
47
- end
48
-
49
- # @return [void]
50
- def process_provided_files
51
- type_id = options.type
52
- raise ArgumentError, "No files provided to process #{type_id.inspect} type." unless options.files?
53
-
54
- type = settings.document_type(type_id)
55
- Pdfh.error_print "Type #{type_id.inspect} was not found." if type.nil?
56
- options.files.each do |file|
57
- next Pdfh.warn_print "File #{file.inspect} does not exist." unless File.exist?(file)
58
- next Pdfh.warn_print "File #{file.inspect} is not a pdf." unless File.extname(file) == ".pdf"
59
-
60
- PdfFileHandler.new(file, type).process_document(settings.base_path)
61
- end
62
- end
63
-
64
- # @return [void]
65
- def process_lookup_dirs
66
- settings.lookup_dirs.each do |work_directory|
67
- process_directory(work_directory)
68
- end
69
- end
17
+ settings = Services::SettingsBuilder.call
18
+ Pdfh.logger.debug "Destination path: #{settings.base_path.colorize(:light_blue)}"
70
19
 
71
- # @param [String] work_directory
72
- # @return [void]
73
- def process_zip_files(work_directory)
74
- @settings.zip_types&.each do |zip_type|
75
- find_files(work_directory, :zip).each do |file|
76
- next unless zip_type.re_file.match?(File.basename(file))
20
+ files = Services::DirectoryScanner.new(settings.lookup_dirs).scan
21
+ matcher = Services::DocumentMatcher.new(settings.document_types)
77
22
 
78
- Pdfh.info " > Processing zip file: #{file.green}"
79
- password_opt = "-P #{zip_type.password}" if zip_type.password?
80
- `unzip -o #{password_opt} #{file} -d #{work_directory}`
81
- end
82
- end
83
- end
23
+ files.each do |file_path|
24
+ Pdfh.logger.info "Working on: #{file_path.colorize(:green)}" if Pdfh.logger.verbose?
25
+ text = Services::PdfTextExtractor.call(file_path)
84
26
 
85
- # @param directory [String]
86
- # @param type [String, Symbol]
87
- # @return [Array<String>]
88
- def find_files(directory, type)
89
- glob = File.join(directory, "*.#{type}")
90
- Dir.glob(glob)
91
- end
27
+ documents = matcher.match(file_path, text)
28
+ next Pdfh.logger.debug "No document type match found for #{file_path.colorize(:yellow)}" if documents.empty?
92
29
 
93
- def process_directory(work_directory)
94
- Pdfh.headline(work_directory)
95
- process_zip_files(work_directory) if @settings.zip_types?
96
- processed_result = RunResult.new
97
- files = find_files(work_directory, :pdf)
98
- files.each do |pdf_file|
99
- type = match_doc_type(pdf_file)
100
- if type
101
- PdfFileHandler.new(pdf_file, type).process_document(settings.base_path)
102
- processed_result.add_processed(pdf_file)
103
- else
104
- processed_result.add_ignored(pdf_file)
30
+ unless documents.one?
31
+ matches = documents.map { _1.type.name.inspect }.join(", ")
32
+ next Pdfh.logger.warn_print "Skipping #{file_path.inspect} as multiple matches found: #{matches}."
105
33
  end
106
- end
107
- print_processing_results(processed_result)
108
- end
109
-
110
- # @return [String]
111
- def base_name_no_ext(file)
112
- File.basename(file, File.extname(file))
113
- end
114
-
115
- def print_processing_results(result)
116
- Pdfh.info " (No files processed)".colorize(:light_black) if result.processed.empty?
117
- return unless Pdfh.verbose?
118
34
 
119
- Pdfh.info "\n No document type found for these PDF files:" if result.ignored.any?
120
- result.ignored.each.with_index(1) do |file, index|
121
- Pdfh.ident_print index, base_name_no_ext(file), color: :magenta
35
+ Services::DocumentManager.new(documents.first, base_path: settings.base_path, dry_run: options.dry?).call
122
36
  end
123
- end
124
- end
125
-
126
- # keeps track of the processed and ignored files
127
- class RunResult
128
- attr_reader :processed, :ignored
129
37
 
130
- # @return [self]
131
- def initialize
132
- @processed = []
133
- @ignored = []
38
+ nil
39
+ rescue SettingsIOError => e
40
+ Pdfh.logger.error_print(e.message, exit_app: false)
41
+ exit(1)
42
+ rescue StandardError => e
43
+ Pdfh.logger.backtrace_print(e) if Pdfh.logger.verbose?
44
+ Pdfh.logger.error_print(e.message)
134
45
  end
135
-
136
- # @return [void]
137
- def add_ignored(file) = @ignored << file
138
-
139
- # @return [void]
140
- def add_processed(file) = @processed << file
141
46
  end
142
47
  end
143
48
  end
@@ -1,152 +1,67 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pdfh
4
- # Handles the PDF detected by the rules
4
+ # Lightweight struct that connects a PDF file with its matched document type and
5
+ # extracted text. All file metadata, date interpretation, and rename resolution
6
+ # are accessible through dedicated value objects (FileInfo, DateInfo).
5
7
  class Document
6
- attr_reader :text, :type, :file, :extra, :period
7
-
8
- # @param file [String]
9
- # @param type [DocumentType]
10
- # @param text [String]
11
- # @return [self]
12
- def initialize(file, type, text)
13
- @file = file
8
+ # @!attribute [r] file_info
9
+ # @return [FileInfo] File metadata wrapper
10
+ # @!attribute [r] type
11
+ # @return [DocumentType] Matched document type
12
+ # @!attribute [r] text
13
+ # @return [String] Extracted text from the PDF
14
+ # @!attribute [r] date_info
15
+ # @return [DateInfo] Parsed date value object
16
+ attr_reader :file_info, :type, :text, :date_info
17
+
18
+ # @param file [String] Path to the PDF file
19
+ # @param type [DocumentType] Type of the document
20
+ # @param text [String] Extracted text from the PDF
21
+ # @param date_captures [Hash{String => String}] Captured date components from regex
22
+ # @return [self] A new Document instance
23
+ def initialize(file, type, text, date_captures)
14
24
  @type = type
15
25
  @text = text
26
+ @file_info = FileInfo.new(file)
27
+ @date_info = DateInfo.new(date_captures)
16
28
  end
17
29
 
18
- # @return [void]
19
- def process
20
- Pdfh.debug "=== Document Type: #{type.name} =============================="
21
- Pdfh.debug "~~~~~~~~~~~~~~~~~~ Finding a subtype"
22
- @sub_type = type.sub_type(@text)
23
- Pdfh.debug " SubType: #{@sub_type}"
24
- @companion = search_companion_files
25
-
26
- month, year, @extra = match_date(@sub_type&.re_date || @type.re_date)
27
- @period = DocumentPeriod.new(day: extra, month: month, month_offset: @sub_type&.month_offset, year: year)
28
- Pdfh.debug " Period: #{@period.inspect}"
29
- end
30
-
31
- # @return [void]
32
- def print_info
33
- print_info_line "Type", type.name
34
- print_info_line "Sub-Type", sub_type
35
- print_info_line "Period", period
36
- print_info_line "New Name", new_name
37
- print_info_line "Store Path", store_path
38
- print_info_line "Extra files", companion_files(join: true)
39
- print_info_line "Processed?", "No (in Dry mode)" if Pdfh.dry?
40
- end
41
-
42
- # @return [void]
43
- def print_info_line(property, info)
44
- Pdfh.ident_print property, info.to_s, color: :light_blue, width: 12
45
- end
46
-
47
- # @return [String]
48
- def file_name_only
49
- File.basename(@file, file_extension)
50
- end
51
-
52
- # @return [String]
53
- def file_extension
54
- File.extname(@file)
55
- end
56
-
57
- # @return [String]
58
- def file_name
59
- File.basename(@file)
60
- end
61
-
62
- # @return [String]
63
- def backup_name
64
- "#{file_name}.bkp"
65
- end
66
-
67
- # @return [String]
30
+ # @return [String] Document type name or "N/A" if type is nil
68
31
  def type_name
69
- type&.name&.titleize || "N/A"
70
- end
71
-
72
- # @return [String]
73
- def sub_type
74
- @sub_type&.name&.titleize || "N/A"
32
+ type&.name || "N/A"
75
33
  end
76
34
 
77
- # @return [Hash{Symbol->String}]
78
- def rename_data
79
- {
80
- original: file_name_only,
81
- period: period.to_s,
82
- year: period.year.to_s,
83
- month: period.month.to_s,
84
- type: type_name,
85
- subtype: sub_type,
86
- extra: extra || ""
87
- }.freeze
35
+ # @return [String] File name
36
+ def to_s
37
+ file_info.name
88
38
  end
89
39
 
90
- # @return [String]
40
+ # @return [String] New file name with extension (e.g., "2024-01 Cuenta.pdf")
91
41
  def new_name
92
- new_name = type.generate_new_name(rename_data)
93
- "#{new_name}#{file_extension}"
42
+ "#{@type.name_validator.gsub(rename_data)}#{@file_info.extension}"
94
43
  end
95
44
 
96
- # @return [String]
45
+ # @return [String] Storage path for the document (e.g., "2024/Edo Cuenta")
97
46
  def store_path
98
- type.generate_path(rename_data)
99
- end
100
-
101
- # @return [String (frozen)]
102
- def companion_files(join: false)
103
- return @companion unless join
104
-
105
- @companion.empty? ? "N/A" : @companion.join(", ")
106
- end
107
-
108
- # @return [String]
109
- def home_dir
110
- File.dirname(@file)
111
- end
112
-
113
- # @return [String]
114
- def to_s
115
- @file
47
+ @type.path_validator.gsub(rename_data)
116
48
  end
117
49
 
118
50
  private
119
51
 
120
- # named matches can appear in any order with names 'd', 'm' and 'y'
121
- # unnamed matches needs to be in order month, year
122
- # @return [Array] - format [month, year, day]
123
- # @param regex [RegularExpression]
124
- def match_date(regex)
125
- Pdfh.debug "~~~~~~~~~~~~~~~~~~ Match Data RegEx"
126
- Pdfh.debug " Using regex: #{regex}"
127
- Pdfh.debug " named: #{regex.named_captures}"
128
- matched = regex.match(@text)
129
- raise ReDateError unless matched
130
-
131
- Pdfh.debug " captured: #{matched.captures}"
132
-
133
- return matched.captures.map(&:downcase) if regex.named_captures.empty?
134
-
135
- extra = matched.captures.size > 2 ? matched[:d] : nil
136
- [matched[:m].downcase, matched[:y], extra]
137
- end
138
-
139
- # @return [Array]
140
- def search_companion_files
141
- Pdfh.debug "~~~~~~~~~~~~~~~~~~ Searching Companion files"
142
- Pdfh.debug " Searching on: #{home_dir.inspect}"
143
- Dir.chdir(home_dir) do
144
- files_matching = Dir["#{file_name_only}.*"]
145
- companion = files_matching.reject { |file| file.include? ".pdf" }
146
- Pdfh.debug " Found: #{companion.inspect}"
147
-
148
- companion
149
- end
52
+ # Used to replace variables in the rename pattern i.e {original}, {period}, etc.
53
+ # @return [Hash{Symbol => String}] Hash containing rename variables
54
+ def rename_data
55
+ @rename_data ||= {
56
+ original: @file_info.stem,
57
+ period: @date_info.period,
58
+ year: @date_info.year.to_s,
59
+ month: @date_info.month.to_s,
60
+ quarter: "Q#{@date_info.quarter}",
61
+ bimester: "B#{@date_info.bimester}",
62
+ name: @type.name,
63
+ day: @date_info.day || ""
64
+ }.freeze
150
65
  end
151
66
  end
152
67
  end
@@ -3,97 +3,65 @@
3
3
  module Pdfh
4
4
  # Represents a type of document that can be processed by pdfh
5
5
  class DocumentType
6
- include Concerns::PasswordDecodable
6
+ REQUIRED_KEYS = %i[name re_date store_path].freeze
7
+ DEFAULT_NAME_TEMPLATE = "{name} {period}"
7
8
 
8
9
  # @!attribute [r] name
9
- # @return [String] The name of the document type.
10
- # @!attribute [r] re_file
11
- # @return [Regexp] The regular expression to match file names.
10
+ # @return [String] The name of the document type
11
+ # @!attribute [r] re_id
12
+ # @return [Regexp] The regular expression to extract the document ID
12
13
  # @!attribute [r] re_date
13
- # @return [Regexp] The regular expression to extract dates and its information.
14
- # @!attribute [r] pwd
15
- # @return [String, nil] The base64 password for the document type, if any.
14
+ # @return [Regexp] The regular expression to extract dates
16
15
  # @!attribute [r] store_path
17
- # @return [String] The path where the document will be stored.
16
+ # @return [String] The path where the document will be stored
18
17
  # @!attribute [r] name_template
19
- # @return [String] The template for generating document names.
20
- # @!attribute [r] sub_types
21
- # @return [Array<DocumentSubType>, nil] The subtypes of the document, if any.
22
- attr_reader :name, :re_file, :re_date, :pwd, :store_path, :name_template, :sub_types
18
+ # @return [String] The template for generating document names
19
+ # @!attribute [r] path_validator
20
+ # @return [RenameValidator] The validator for the storage path
21
+ # @!attribute [r] name_validator
22
+ # @return [RenameValidator] The validator for the document name
23
+ attr_reader :name, :re_id, :re_date, :store_path, :name_template, :path_validator, :name_validator
23
24
 
24
- # @param args [Hash]
25
- # @return [self]
25
+ # @param args [Hash] The initialization arguments
26
+ # @return [DocumentType]
26
27
  def initialize(args)
27
28
  args.each { |k, v| instance_variable_set(:"@#{k}", v) }
28
- @name_template ||= "{original}"
29
- @re_file = Regexp.new(re_file)
29
+ return if missing_keys?
30
+
31
+ @name = name.to_s.strip
32
+ @re_id = Regexp.new(re_id || name)
30
33
  @re_date = Regexp.new(re_date)
31
- @sub_types = extract_subtypes(sub_types) if sub_types&.any?
34
+ @name_template = name_template || DEFAULT_NAME_TEMPLATE
32
35
  @path_validator = RenameValidator.new(store_path)
33
- @name_validator = RenameValidator.new(name_template)
34
- return if @path_validator.valid? && @name_validator.valid?
36
+ @name_validator = RenameValidator.new(@name_template)
37
+ end
35
38
 
36
- raise_validators_error
39
+ # @return [Boolean]
40
+ def valid?
41
+ missing_keys.empty? &&
42
+ @path_validator.valid? &&
43
+ @name_validator.valid?
37
44
  end
38
45
 
39
- # @return [Hash{Symbol->any}]
46
+ # @return [Hash{String => Object}]
40
47
  def to_h
41
48
  instance_variables.to_h { |var| [var.to_s.delete_prefix("@"), instance_variable_get(var)] }
42
49
  end
43
50
 
44
51
  # removes special characters from string and replaces spaces with dashes
45
- # @example usage
46
- # "Test This?%&".gid
47
- # # => "test-this"
52
+ # @example
53
+ # "Test This?%&".gid # => "test-this"
48
54
  # @return [String]
49
55
  def gid
50
56
  name.downcase.gsub(/[^0-9A-Za-z\s]/, "").tr(" ", "-")
51
57
  end
52
58
 
53
- # search the subtype name in the pdf document
54
- # @return [DocumentSubType]
55
- def sub_type(text)
56
- # Regexp.new(st.name).match?(name)
57
- sub_types&.find { |st| /#{st.name}/i.match?(text) }
59
+ # @return [Array<Symbol>]
60
+ def missing_keys
61
+ @missing_keys ||= REQUIRED_KEYS.select { |key| instance_variable_get(:"@#{key}").to_s.strip.empty? }
58
62
  end
59
63
 
60
- # @param values [Hash{Symbol->String}
61
- # @return [String]
62
- def generate_new_name(values)
63
- @name_validator.gsub(values)
64
- end
65
-
66
- # @param values [Hash{Symbol->String}
67
- # @return [String]
68
- def generate_path(values)
69
- @path_validator.gsub(values)
70
- end
71
-
72
- private
73
-
74
- attr_accessor :path_validator, :name_validator
75
-
76
- # @param sub_types [Array<Hash{Symbol->String}>]
77
- # @return [Array<DocumentSubType>]
78
- def extract_subtypes(sub_types)
79
- sub_types.map do |st|
80
- data = {
81
- name: st[:name],
82
- month_offset: st[:month_offset].to_i,
83
- re_date: st[:re_date] && Regexp.new(st[:re_date])
84
- }.compact
85
- DocumentSubType.new(data)
86
- end
87
- end
88
-
89
- # @raise [ArgumentError] when called
90
- # @return [void]
91
- def raise_validators_error
92
- template = "has invalid %<field>s[Unknown tokens=%<error>s]"
93
- errors = []
94
- errors << format(template, field: :store_path, error: path_validator.unknown_list) unless path_validator.valid?
95
- errors << format(template, field: :name_template, error: name_validator.unknown_list) unless name_validator.valid?
96
- raise ArgumentError, "Document type #{name.inspect} #{errors.join(", ")}"
97
- end
64
+ # @return [Boolean]
65
+ def missing_keys? = missing_keys.any?
98
66
  end
99
67
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pdfh
4
+ # Runtime options for the application
5
+ class RunOptions
6
+ # @param verbose [Boolean]
7
+ # @param dry [Boolean]
8
+ # @return [RunOptions]
9
+ def initialize(verbose: false, dry: false)
10
+ @verbose = verbose
11
+ @dry = dry
12
+ end
13
+
14
+ # @return [Boolean]
15
+ def verbose? = @verbose
16
+
17
+ # @return [Boolean]
18
+ def dry? = @dry
19
+ end
20
+ end
@@ -1,87 +1,41 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pdfh
4
- # Handles the config yaml data mapping, and associates a file name with a doc type
4
+ # Handles the config yaml data mapping, and associates a file name with a doc type.
5
+ # This is a pure data object — validation is handled by Services::SettingsValidator.
5
6
  class Settings
6
7
  # @!attribute [r] lookup_dirs
7
- # @return [Array<String>] List of directories to look up for processing.
8
+ # @return [Array<String>] List of validated, expanded directories to look up for processing.
8
9
  # @!attribute [r] base_path
9
- # @return [String] The base directory path for storing processed files.
10
- # @!attribute [r] zip_types
11
- # @return [Array<ZipType>, nil] List of zip types to process, or nil if none.
12
- attr_reader :lookup_dirs, :base_path, :zip_types
10
+ # @return [String] The validated, expanded base directory path for storing processed files.
11
+ attr_reader :lookup_dirs, :base_path
13
12
 
14
- # @param config_data [Hash]
15
- # @return [self]
16
- def initialize(config_data)
17
- process_lookup_dirs(config_data[:lookup_dirs])
18
- process_destination_base(config_data[:destination_base_path])
19
-
20
- Pdfh.debug "Configured Look up directories:"
21
- lookup_dirs.each.with_index(1) { |dir, idx| Pdfh.debug " #{idx}. #{dir}" }
22
- Pdfh.debug
23
-
24
- build_doc_types(config_data[:document_types])
25
- build_zip_types(config_data[:zip_types]) if config_data.key?(:zip_types)
13
+ # @param lookup_dirs [Array<String>] Already validated and expanded directories
14
+ # @param base_path [String] Already validated and expanded base path
15
+ # @param document_types [Hash{String => DocumentType}] Already validated document types keyed by gid
16
+ # @return [Settings]
17
+ def initialize(lookup_dirs:, base_path:, document_types:)
18
+ @lookup_dirs = lookup_dirs
19
+ @base_path = base_path
20
+ @document_types = document_types
26
21
  end
27
22
 
28
23
  # @return [Array<DocumentType>]
29
- def document_types
30
- @document_types.values
24
+ def document_types = @document_types.values
25
+
26
+ # @example
27
+ # # document_types.map(&:name) ['12345', '12', '123']
28
+ # settings.document_types_name_max_size #=> 5
29
+ # @return [Integer]
30
+ def document_types_name_max_size
31
+ return 0 if document_types.empty?
32
+
33
+ document_types.map { _1.name.length }.max
31
34
  end
32
35
 
33
36
  # @return [DocumentType]
34
37
  def document_type(id)
35
38
  @document_types[id]
36
39
  end
37
-
38
- # @return [Boolean]
39
- def zip_types?
40
- !!zip_types&.any?
41
- end
42
-
43
- private
44
-
45
- # @param lookup_dirs_list [Array[String]]
46
- # @return [void]
47
- def process_lookup_dirs(lookup_dirs_list)
48
- @lookup_dirs = lookup_dirs_list.filter_map do |dir|
49
- expanded = File.expand_path(dir)
50
- unless File.directory?(expanded)
51
- Pdfh.debug " ** Error, Directory #{dir} does not exists."
52
- next
53
- end
54
- expanded
55
- end
56
- raise ArgumentError, "No valid Look up directories configured." if lookup_dirs.empty?
57
- end
58
-
59
- # @return [void]
60
- # @param dir [String]
61
- def process_destination_base(dir)
62
- @base_path = File.expand_path(dir)
63
- raise ArgumentError, "Destination base directory is not configured." if @base_path.nil?
64
- raise ArgumentError, "Destination base directory #{@base_path} does not exist." unless File.directory?(@base_path)
65
- end
66
-
67
- # @param doc_types [Array<Hash>]
68
- # @return [void]
69
- def build_doc_types(doc_types)
70
- @document_types = doc_types.each_with_object({}) do |data, result|
71
- doc_type = DocumentType.new(data)
72
- result.store(doc_type.gid, doc_type)
73
- rescue ArgumentError => e
74
- Pdfh.error_print e.message, exit_app: false
75
- Pdfh.backtrace_print e if Pdfh.verbose?
76
- end
77
- end
78
-
79
- # @param zip_types [Array<Hash>]
80
- # @return [void]
81
- def build_zip_types(zip_types)
82
- exit(1) if Pdfh::Utils::DependencyValidator.missing?(:unzip)
83
-
84
- @zip_types = zip_types.compact.map { ZipType.new(_1) }
85
- end
86
40
  end
87
41
  end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pdfh
4
+ module Services
5
+ # Scans lookup dirs and returns matched documents
6
+ class DirectoryScanner
7
+ # @param directories [Array<String>]
8
+ # @return [DirectoryScanner]
9
+ def initialize(directories)
10
+ @directories = directories
11
+ end
12
+
13
+ # @return [Array<String>]
14
+ def scan
15
+ @directories.flat_map { |dir| scan_dir(dir) }
16
+ end
17
+
18
+ private
19
+
20
+ # @param dir [String]
21
+ # @return [Array<String>]
22
+ def scan_dir(dir)
23
+ Dir.glob(File.join(dir, "*.pdf"))
24
+ end
25
+ end
26
+ end
27
+ end