pdfh 3.3.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.editorconfig +0 -15
  3. data/.gitignore +3 -0
  4. data/.rubocop.yml +5 -1
  5. data/.rubocop_todo.yml +5 -18
  6. data/.simplecov +32 -0
  7. data/AGENTS.md +174 -0
  8. data/CHANGELOG.md +74 -9
  9. data/Gemfile +0 -4
  10. data/Gemfile.lock +26 -37
  11. data/README.md +72 -37
  12. data/Rakefile +24 -6
  13. data/bin/console +3 -10
  14. data/bin/run +0 -1
  15. data/exe/pdfh +1 -1
  16. data/justfile +65 -0
  17. data/lib/pdfh/main.rb +25 -120
  18. data/lib/pdfh/models/document.rb +43 -128
  19. data/lib/pdfh/models/document_type.rb +35 -69
  20. data/lib/pdfh/models/run_options.rb +20 -0
  21. data/lib/pdfh/models/settings.rb +23 -83
  22. data/lib/pdfh/services/directory_scanner.rb +27 -0
  23. data/lib/pdfh/services/document_manager.rb +125 -0
  24. data/lib/pdfh/services/document_matcher.rb +57 -0
  25. data/lib/pdfh/services/opt_parser.rb +76 -0
  26. data/lib/pdfh/services/pdf_text_extractor.rb +45 -0
  27. data/lib/pdfh/services/settings_builder.rb +113 -0
  28. data/lib/pdfh/services/settings_validator.rb +150 -0
  29. data/lib/pdfh/utils/console.rb +5 -5
  30. data/lib/pdfh/utils/date_info.rb +55 -0
  31. data/lib/pdfh/utils/file_info.rb +47 -0
  32. data/lib/pdfh/utils/rename_validator.rb +4 -3
  33. data/lib/pdfh/version.rb +1 -1
  34. data/lib/pdfh.rb +25 -20
  35. data/mise.toml +20 -3
  36. data/pdfh.gemspec +3 -3
  37. metadata +18 -15
  38. data/lib/ext/string.rb +0 -9
  39. data/lib/pdfh/concerns/password_decodable.rb +0 -31
  40. data/lib/pdfh/models/document_period.rb +0 -37
  41. data/lib/pdfh/models/document_sub_type.rb +0 -6
  42. data/lib/pdfh/models/zip_types.rb +0 -17
  43. data/lib/pdfh/settings_template.rb +0 -21
  44. data/lib/pdfh/utils/opt_parser.rb +0 -78
  45. data/lib/pdfh/utils/options.rb +0 -38
  46. data/lib/pdfh/utils/pdf_file_handler.rb +0 -122
  47. data/lib/pdfh/utils/settings_builder.rb +0 -62
@@ -1,37 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pdfh
4
- # Calculate correct period from the extracted document date and subtype month offset
5
- class DocumentPeriod
6
- attr_reader :month, :year
7
-
8
- # @return [self]
9
- def initialize(month:, month_offset:, year:, day: nil)
10
- @day = day
11
- @raw_month = month
12
- @raw_year = year
13
- normalized_month = Month.normalize_to_i(month) + (month_offset || 0)
14
- year_offset = 0
15
- @month = case normalized_month
16
- when 0
17
- year_offset = -1
18
- 12
19
- when 13
20
- year_offset = 1
21
- 1
22
- else normalized_month
23
- end
24
- @year = (year.size == 2 ? "20#{year}" : year).to_i + year_offset
25
- end
26
-
27
- # @return [String (frozen)]
28
- def to_s
29
- "#{year}-#{month.to_s.rjust(2, "0")}"
30
- end
31
-
32
- # @return [String (frozen)]
33
- def inspect
34
- "<#{self.class} year=#{year} month=#{month}>"
35
- end
36
- end
37
- end
@@ -1,6 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pdfh
4
- # Provides a way to divide document type by subtypes, for different name, and month adjustments
5
- DocumentSubType = Struct.new(:name, :month_offset, :re_date, keyword_init: true)
6
- end
@@ -1,17 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pdfh
4
- # Zip files which contains PDF files that need pre-processing
5
- class ZipType
6
- include Concerns::PasswordDecodable
7
-
8
- attr_reader :name, :re_file, :pwd
9
-
10
- # @param args [Hash]
11
- # @return [self]
12
- def initialize(args)
13
- args.each { |k, v| instance_variable_set(:"@#{k}", v) }
14
- @re_file = Regexp.new(re_file)
15
- end
16
- end
17
- end
@@ -1,21 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pdfh
4
- # rubocop:disable Layout/HashAlignment
5
- DOCUMENT_TYPE_TEMPLATE = {
6
- "name" => "Example Name",
7
- "re_file" => ".*file_name\.pdf",
8
- "re_date" => "(\d{2})\/(?<m>\w+)\/(?<y>\d{4})",
9
- "pwd" => "BASE64_STRING",
10
- "store_path" => "{YEAR}/sub folder",
11
- "name_template" => "{period} {original}",
12
- "sub_types" => []
13
- }.freeze
14
-
15
- SETTINGS_TEMPLATE = {
16
- "lookup_dirs" => ["~/Downloads"].freeze,
17
- "destination_base_path" => "~/Documents",
18
- "document_types" => [DOCUMENT_TYPE_TEMPLATE].freeze
19
- }.freeze
20
- # rubocop:enable Layout/HashAlignment
21
- end
@@ -1,78 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "optparse"
4
-
5
- module Pdfh
6
- # Handles Argument options
7
- class OptParser
8
- # @param argv [Array<String>] command line arguments (ie. ARGV)
9
- # @param console [Pdfh::Console, nil]
10
- # @return [self]
11
- def initialize(argv:, console: nil)
12
- @argv = argv
13
- @console = console || Console.new(false)
14
- @options = {
15
- verbose: false,
16
- dry: false,
17
- type: nil,
18
- files: []
19
- }
20
- end
21
-
22
- # @return [Hash] Parsed options including flags and file arguments
23
- def parse_argv
24
- option_parser = build_option_parser
25
- non_option_args = option_parser.parse!(@argv)
26
- @options[:files] = non_option_args
27
- @options.transform_keys { |key| key.to_s.tr("-", "_").to_sym }
28
- rescue OptionParser::InvalidOption => e
29
- @console.error_print(e.message, exit_app: false)
30
- puts option_parser.help
31
- exit 1
32
- end
33
-
34
- private
35
-
36
- # @return [OptionParser] Configured OptionParser instance
37
- def build_option_parser
38
- OptionParser.new do |opts|
39
- opts.banner = "Usage: #{opts.program_name} [options] [file1.pdf, ...]"
40
- opts.separator ""
41
- opts.separator "Specific options:"
42
-
43
- opts.on("-tID", "--type=ID", "Document type id (requires a trailing file list)") { @options[:type] = _1 }
44
- opts.on("-v", "--verbose", "Show more output. Useful for debug") { @options[:verbose] = true }
45
- opts.on("-d", "--dry", "Dry run, does not write new pdf") { @options[:dry] = true }
46
- opts.on_tail("-T", "--list-types", "List document types in configuration") { list_types && exit }
47
- opts.on_tail("-V", "--version", "Show version") { version || exit }
48
- opts.on_tail("-h", "--help", "help (this dialog)") { help || exit }
49
- end
50
- end
51
-
52
- # @return [nil]
53
- def version
54
- @console.info "#{build_option_parser.program_name} v#{Pdfh::VERSION}"
55
- end
56
-
57
- # @return [nil]
58
- def help
59
- @console.info build_option_parser
60
- end
61
-
62
- # Lists the available document types
63
- # @return [nil]
64
- def list_types
65
- Pdfh.instance_variable_set(:@options, Options.new(@options))
66
- Pdfh.instance_variable_set(:@console, @console)
67
-
68
- settings = SettingsBuilder.build
69
- spacing = " " * 2
70
- max_width = settings.document_types.map { |t| t.gid.size }.max
71
- @console.info "#{spacing}#{"ID".ljust(max_width)} Type Name"
72
- @console.info "#{spacing}#{"—" * max_width} #{"—" * 23}"
73
- settings.document_types.each do |type|
74
- @console.info "#{spacing}#{type.gid.ljust(max_width).yellow} #{type.name}"
75
- end
76
- end
77
- end
78
- end
@@ -1,38 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pdfh
4
- # Argument Options object container
5
- class Options
6
- attr_reader :type, :files
7
-
8
- # @param arg_options [Hash]
9
- # @return [self]
10
- def initialize(arg_options)
11
- @verbose = arg_options[:verbose]
12
- @dry = arg_options[:dry]
13
- @type = arg_options[:type]
14
- @files = arg_options[:files] || []
15
- @mode = type ? :file : :directory
16
- end
17
-
18
- # @return [Boolean]
19
- def verbose?
20
- @verbose
21
- end
22
-
23
- # @return [Boolean]
24
- def dry?
25
- @dry
26
- end
27
-
28
- # @return [Boolean]
29
- def file_mode?
30
- @mode == :file
31
- end
32
-
33
- # @return [Boolean]
34
- def files?
35
- !!@files&.any?
36
- end
37
- end
38
- end
@@ -1,122 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pdfh
4
- # Handles the PDF file
5
- class PdfFileHandler
6
- attr_reader :file, :type, :document
7
-
8
- # @param [String] file
9
- # @param [DocumentType, nil] type
10
- # @return [self]
11
- def initialize(file, type)
12
- @file = file
13
- @type = type
14
- end
15
-
16
- # @return [boolean]
17
- def type?
18
- !!type
19
- end
20
-
21
- # Generate document, and process actions
22
- # @return [void]
23
- def process_document(base_path)
24
- Pdfh.info "Working on #{base_name.colorize(:light_green)}"
25
- raise IOError, "File #{file} not found" unless File.exist?(file)
26
-
27
- @document = Document.new(file, type, extract_text)
28
- document.process
29
- document.print_info
30
- write_pdf(base_path)
31
-
32
- nil
33
- rescue StandardError => e
34
- Pdfh.ident_print "Doc Error", e.message, color: :red, width: 12
35
- end
36
-
37
- # Create a backup of original document
38
- # @return [void]
39
- def make_document_backup(document)
40
- Pdfh.debug "~~~~~~~~~~~~~~~~~~ Creating PDF backup"
41
- Dir.chdir(document.home_dir) do
42
- Pdfh.debug " Working on: #{document.home_dir.inspect} directory"
43
- Pdfh.debug " mv #{document.file_name.inspect} -> #{document.backup_name.inspect}"
44
- File.rename(document.file_name, document.backup_name) unless Pdfh.dry?
45
- end
46
- end
47
-
48
- # @return [void]
49
- def copy_companion_files(destination, document)
50
- Pdfh.debug "~~~~~~~~~~~~~~~~~~ Writing Companion files"
51
- document.companion_files.each do |file|
52
- Pdfh.debug " Working on #{file.inspect}..."
53
- src_name = File.join(document.home_dir, file)
54
- src_ext = File.extname(file)
55
- dest_name = File.basename(document.new_name, ".pdf")
56
- dest_full = File.join(destination, "#{dest_name}#{src_ext}")
57
- Pdfh.debug " cp #{src_name} --> #{dest_full}"
58
- FileUtils.cp(src_name, dest_full) unless Pdfh.dry?
59
- end
60
- end
61
-
62
- # @return [String]
63
- def base_name
64
- File.basename(file)
65
- end
66
-
67
- private
68
-
69
- # @return [void]
70
- def write_pdf(base_path)
71
- full_path = File.join(base_path, document.store_path, document.new_name)
72
- dir_path = File.join(base_path, document.store_path)
73
-
74
- FileUtils.mkdir_p(dir_path)
75
-
76
- write_new_pdf(dir_path, full_path)
77
- make_document_backup(document)
78
- copy_companion_files(dir_path, document)
79
- rescue StandardError => e
80
- Pdfh.ident_print "Doc Error", e.message, color: :red, width: IDENT
81
- end
82
-
83
- def qpdf_command(*args)
84
- password_option = type&.password ? "--password=#{type&.password.inspect} " : ""
85
-
86
- %(qpdf #{password_option}--decrypt #{args.join(" ")})
87
- end
88
-
89
- # Gets the text from the pdf in order to execute
90
- # the regular expression matches
91
- # @return [String]
92
- def extract_text
93
- temp = Tempfile.new("pdfh")
94
- Pdfh.debug "~~~~~~~~~~~~~~~~~~ Extract PDF text"
95
- Pdfh.debug " --> #{temp.path} temporal file assigned."
96
-
97
- cmd1 = qpdf_command("--stream-data=uncompress", file.inspect, temp.path)
98
- Pdfh.debug " DeCrypt Command: #{cmd1}"
99
- _result = `#{cmd1}`
100
-
101
- cmd2 = %(pdftotext -enc UTF-8 #{temp.path} -)
102
- Pdfh.debug " Extract Command: #{cmd2}"
103
- text = `#{cmd2}`
104
- Pdfh.debug " Text: #{text.inspect}"
105
- text
106
- end
107
-
108
- # @return [void]
109
- def write_new_pdf(dir_path, full_path)
110
- Pdfh.debug "~~~~~~~~~~~~~~~~~~ Writing PDFs"
111
- raise IOError, "Path #{dir_path} not found." unless Dir.exist?(dir_path)
112
-
113
- cmd = qpdf_command(file.inspect, full_path.inspect)
114
- Pdfh.debug " Write PDF Command: #{cmd}"
115
-
116
- return if Pdfh.dry?
117
-
118
- _result = `#{cmd}`
119
- raise IOError, "New PDF file #{full_path.inspect} was not created." unless File.file?(full_path)
120
- end
121
- end
122
- end
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pdfh
4
- # Loads or creates a default settings yaml file
5
- class SettingsBuilder
6
- CONFIG_FILE_LOCATIONS = [Dir.pwd, ENV.fetch("XDG_CONFIG_HOME", "~/.config"), "~"].freeze
7
- SUPPORTED_EXTENSIONS = %w[yml yaml].freeze
8
- ENV_VAR = "PDFH_CONFIG_FILE"
9
-
10
- class << self
11
- # @return [Pdfh::Settings]
12
- def build
13
- env_config_file = ENV.fetch(ENV_VAR, nil)
14
- raise "File path in #{ENV_VAR} not found" if env_config_file && !File.exist?(env_config_file)
15
-
16
- config_file = env_config_file || search_config_file
17
- file_hash = YAML.load_file(config_file, symbolize_names: true)
18
- Pdfh.debug "Loaded configuration file: #{config_file}"
19
-
20
- Settings.new(file_hash)
21
- end
22
-
23
- private
24
-
25
- # @return [String]
26
- def config_file_name
27
- File.basename($PROGRAM_NAME)
28
- end
29
-
30
- # @return [String (frozen)]
31
- def default_settings_name
32
- "#{config_file_name}.#{SUPPORTED_EXTENSIONS.first}"
33
- end
34
-
35
- # @return [String]
36
- def create_settings_file
37
- full_path = File.join(File.expand_path("~"), default_settings_name)
38
- return if File.exist?(full_path) # double check
39
-
40
- File.write(full_path, Pdfh::SETTINGS_TEMPLATE.to_yaml)
41
- Pdfh.info "Default settings file was created: #{full_path.colorize(:green)}"
42
-
43
- full_path
44
- end
45
-
46
- # Gets the first settings file found, or creates a new one
47
- # @return [String]
48
- def search_config_file
49
- CONFIG_FILE_LOCATIONS.each do |dir_string|
50
- dir = File.expand_path(dir_string)
51
- SUPPORTED_EXTENSIONS.each do |ext|
52
- path = File.join(dir, "#{config_file_name}.#{ext}")
53
- return path if File.exist?(path)
54
- end
55
- end
56
-
57
- Pdfh.warn_print "No configuration file was found within paths: #{CONFIG_FILE_LOCATIONS.join(", ")}"
58
- create_settings_file
59
- end
60
- end
61
- end
62
- end