pdfh 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2cc0c0f6ba735b3d69c9f5bf32e6acc94b185b3606574834504634acad34f1e8
4
- data.tar.gz: fd4bf05918b2631d2cb6335341704be2540d50cdcf5a395e7e85dd2206c865ca
3
+ metadata.gz: 29f90e2cbc737c2ea6f15b58f6a46513d90ac68053c770018a5a2549b76adcc6
4
+ data.tar.gz: 476440f76e4cd8eb0cdc248d9040cbf75fc510656ff1129d83d18bc1f9edf8ec
5
5
  SHA512:
6
- metadata.gz: c58687eba046fb229b713a70650554a93473b22c9fe39c6d5427adde475187ec64a91031fb9b6ef6f22de9e516a5b5f75e7bdd6276657541127247395d53cec4
7
- data.tar.gz: 0bc6756a6c83335f4afaad5708da797bd5d4e052542b12fa0ee030e23e66a779fe6303aa21eb6f63b6bba849291bf3a2f08a0a32ea1ea4b4f05ab14f2c3b976d
6
+ metadata.gz: 10672062c7041f020920bf4c51a9af88d82130d4db59f8a332a5b05df055bf6079c127b4c8d7d31fb145f9fc88b9527bc330a83f23d2f0dc69c92e843c8246ba
7
+ data.tar.gz: 606afc6611bb8036cd073c8a5a031dbc0812d2385de77d1b774c2c49b42673f266e4020c679a558dc3d709b3309e1e9d8708d2a8a7b6808f66b4d1ae770da780
data/.gitignore CHANGED
@@ -39,4 +39,3 @@ Icon
39
39
  Network Trash Folder
40
40
  Temporary Items
41
41
  .apdisk
42
-
@@ -0,0 +1,36 @@
1
+ # See https://pre-commit.com for more information
2
+ # See https://pre-commit.com/hooks.html for more hooks
3
+ default_install_hook_types:
4
+ - pre-commit
5
+ - commit-msg
6
+
7
+ repos:
8
+ - repo: https://github.com/pre-commit/pre-commit-hooks
9
+ rev: v5.0.0
10
+ hooks:
11
+ - id: trailing-whitespace
12
+ - id: end-of-file-fixer
13
+ exclude: ^.idea/
14
+ - id: check-yaml
15
+ - id: check-added-large-files
16
+ - id: check-executables-have-shebangs
17
+ - id: check-shebang-scripts-are-executable
18
+ - id: mixed-line-ending
19
+ - repo: https://github.com/gitleaks/gitleaks
20
+ rev: v8.25.0
21
+ hooks:
22
+ - id: gitleaks
23
+ - repo: https://github.com/rubocop/rubocop
24
+ rev: v1.75.4
25
+ hooks:
26
+ - id: rubocop
27
+ - repo: https://github.com/compilerla/conventional-pre-commit
28
+ rev: v4.1.0
29
+ hooks:
30
+ - id: conventional-pre-commit
31
+ stages: [commit-msg]
32
+ args: []
33
+ - repo: https://github.com/codespell-project/codespell
34
+ rev: v2.4.1
35
+ hooks:
36
+ - id: codespell
data/.rubocop.yml CHANGED
@@ -1,10 +1,9 @@
1
1
  inherit_from: .rubocop_todo.yml
2
2
 
3
- require:
4
- - rubocop-factory_bot
3
+ plugins:
5
4
  - rubocop-performance
5
+ - rubocop-factory_bot
6
6
  - rubocop-rake
7
- - rubocop-rspec
8
7
 
9
8
  AllCops:
10
9
  NewCops: enable
@@ -14,6 +13,8 @@ AllCops:
14
13
  - pkg/**/*
15
14
  - tmp/**/*
16
15
  - vendor/**/*
16
+ SuggestExtensions:
17
+ rubocop-rspec: false
17
18
 
18
19
  Layout/LineLength:
19
20
  Max: 120
data/.rubocop_todo.yml CHANGED
@@ -1,54 +1,33 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2024-03-13 18:36:20 UTC using RuboCop version 1.62.1.
3
+ # on 2025-04-19 20:15:39 UTC using RuboCop version 1.75.2.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 8
9
+ # Offense count: 10
10
10
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
11
11
  Metrics/AbcSize:
12
- Max: 25
12
+ Max: 38
13
+
14
+ # Offense count: 16
15
+ # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
16
+ # AllowedMethods: refine
17
+ Metrics/BlockLength:
18
+ Max: 131
13
19
 
14
20
  # Offense count: 6
15
21
  # Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
16
22
  Metrics/MethodLength:
17
- Max: 17
18
-
19
- # Offense count: 2
20
- RSpec/AnyInstance:
21
- Exclude:
22
- - 'spec/pdfh/main_spec.rb'
23
-
24
- # Offense count: 8
25
- # Configuration parameters: Include, CustomTransform, IgnoreMethods, SpecSuffixOnly.
26
- # Include: **/*_spec*rb*, **/spec/**/*
27
- RSpec/FilePath:
28
- Exclude:
29
- - 'spec/pdfh/models/document_period_spec.rb'
30
- - 'spec/pdfh/models/document_spec.rb'
31
- - 'spec/pdfh/utils/console_spec.rb'
32
- - 'spec/pdfh/utils/month_spec.rb'
33
- - 'spec/pdfh/utils/opt_parser_spec.rb'
34
- - 'spec/pdfh/utils/pdf_file_handler_spec.rb'
35
- - 'spec/pdfh/utils/rename_validator_spec.rb'
36
- - 'spec/pdfh/utils/settings_builder_spec.rb'
23
+ Max: 33
37
24
 
38
- # Offense count: 8
39
- # Configuration parameters: Include, CustomTransform, IgnoreMethods, IgnoreMetadata.
40
- # Include: **/*_spec.rb
41
- RSpec/SpecFilePathFormat:
25
+ # Offense count: 1
26
+ # Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
27
+ # AllowedNames: as, at, by, cc, db, id, if, in, io, ip, of, on, os, pp, to
28
+ Naming/MethodParameterName:
42
29
  Exclude:
43
- - '**/spec/routing/**/*'
44
- - 'spec/pdfh/models/document_period_spec.rb'
45
- - 'spec/pdfh/models/document_spec.rb'
46
- - 'spec/pdfh/utils/console_spec.rb'
47
- - 'spec/pdfh/utils/month_spec.rb'
48
- - 'spec/pdfh/utils/opt_parser_spec.rb'
49
- - 'spec/pdfh/utils/pdf_file_handler_spec.rb'
50
- - 'spec/pdfh/utils/rename_validator_spec.rb'
51
- - 'spec/pdfh/utils/settings_builder_spec.rb'
30
+ - 'lib/pdfh/utils/console.rb'
52
31
 
53
32
  # Offense count: 3
54
33
  # This cop supports safe autocorrection (--autocorrect).
data/Gemfile.lock CHANGED
@@ -1,13 +1,13 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pdfh (3.2.0)
4
+ pdfh (3.3.0)
5
5
  colorize (~> 1.1.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- activesupport (8.0.1)
10
+ activesupport (8.0.2)
11
11
  base64
12
12
  benchmark (>= 0.3)
13
13
  bigdecimal
@@ -21,7 +21,7 @@ GEM
21
21
  tzinfo (~> 2.0, >= 2.0.5)
22
22
  uri (>= 0.13.1)
23
23
  ansi (1.5.0)
24
- ast (2.4.2)
24
+ ast (2.4.3)
25
25
  base64 (0.2.0)
26
26
  benchmark (0.4.0)
27
27
  bigdecimal (3.1.9)
@@ -31,12 +31,12 @@ GEM
31
31
  coderay (1.1.3)
32
32
  colorize (1.1.0)
33
33
  concurrent-ruby (1.3.5)
34
- connection_pool (2.5.0)
34
+ connection_pool (2.5.3)
35
35
  date (3.4.1)
36
36
  debug (1.10.0)
37
37
  irb (~> 1.10)
38
38
  reline (>= 0.3.8)
39
- diff-lcs (1.6.0)
39
+ diff-lcs (1.6.1)
40
40
  docile (1.4.1)
41
41
  drb (2.2.1)
42
42
  factory_bot (6.5.1)
@@ -44,36 +44,37 @@ GEM
44
44
  i18n (1.14.7)
45
45
  concurrent-ruby (~> 1.0)
46
46
  io-console (0.8.0)
47
- irb (1.15.1)
47
+ irb (1.15.2)
48
48
  pp (>= 0.6.0)
49
49
  rdoc (>= 4.0.0)
50
50
  reline (>= 0.4.2)
51
- json (2.10.1)
51
+ json (2.11.3)
52
52
  language_server-protocol (3.17.0.4)
53
53
  lint_roller (1.1.0)
54
- logger (1.6.6)
54
+ logger (1.7.0)
55
55
  method_source (1.1.0)
56
- minitest (5.25.4)
57
- parallel (1.26.3)
58
- parser (3.3.7.1)
56
+ minitest (5.25.5)
57
+ parallel (1.27.0)
58
+ parser (3.3.8.0)
59
59
  ast (~> 2.4.1)
60
60
  racc
61
61
  pp (0.6.2)
62
62
  prettyprint
63
63
  prettyprint (0.2.0)
64
+ prism (1.4.0)
64
65
  pry (0.15.2)
65
66
  coderay (~> 1.1)
66
67
  method_source (~> 1.0)
67
- psych (5.2.3)
68
+ psych (5.2.4)
68
69
  date
69
70
  stringio
70
71
  racc (1.8.1)
71
72
  rainbow (3.1.1)
72
73
  rake (13.2.1)
73
- rdoc (6.12.0)
74
+ rdoc (6.13.1)
74
75
  psych (>= 4.0.0)
75
76
  regexp_parser (2.10.0)
76
- reline (0.6.0)
77
+ reline (0.6.1)
77
78
  io-console (~> 0.5)
78
79
  rspec (3.13.0)
79
80
  rspec-core (~> 3.13.0)
@@ -81,16 +82,16 @@ GEM
81
82
  rspec-mocks (~> 3.13.0)
82
83
  rspec-core (3.13.3)
83
84
  rspec-support (~> 3.13.0)
84
- rspec-expectations (3.13.3)
85
+ rspec-expectations (3.13.4)
85
86
  diff-lcs (>= 1.2.0, < 2.0)
86
87
  rspec-support (~> 3.13.0)
87
- rspec-mocks (3.13.2)
88
+ rspec-mocks (3.13.4)
88
89
  diff-lcs (>= 1.2.0, < 2.0)
89
90
  rspec-support (~> 3.13.0)
90
- rspec-support (3.13.2)
91
+ rspec-support (3.13.3)
91
92
  rspec_junit_formatter (0.6.0)
92
93
  rspec-core (>= 2, < 4, != 2.12.0)
93
- rubocop (1.73.2)
94
+ rubocop (1.75.5)
94
95
  json (~> 2.3)
95
96
  language_server-protocol (~> 3.17.0.2)
96
97
  lint_roller (~> 1.1.0)
@@ -98,19 +99,21 @@ GEM
98
99
  parser (>= 3.3.0.2)
99
100
  rainbow (>= 2.2.2, < 4.0)
100
101
  regexp_parser (>= 2.9.3, < 3.0)
101
- rubocop-ast (>= 1.38.0, < 2.0)
102
+ rubocop-ast (>= 1.44.0, < 2.0)
102
103
  ruby-progressbar (~> 1.7)
103
104
  unicode-display_width (>= 2.4.0, < 4.0)
104
- rubocop-ast (1.38.1)
105
- parser (>= 3.3.1.0)
106
- rubocop-capybara (2.21.0)
107
- rubocop (~> 1.41)
108
- rubocop-factory_bot (2.27.0)
105
+ rubocop-ast (1.44.1)
106
+ parser (>= 3.3.7.2)
107
+ prism (~> 1.4)
108
+ rubocop-capybara (2.22.1)
109
+ lint_roller (~> 1.1)
110
+ rubocop (~> 1.72, >= 1.72.1)
111
+ rubocop-factory_bot (2.27.1)
109
112
  lint_roller (~> 1.1)
110
113
  rubocop (~> 1.72, >= 1.72.1)
111
- rubocop-performance (1.24.0)
114
+ rubocop-performance (1.25.0)
112
115
  lint_roller (~> 1.1)
113
- rubocop (>= 1.72.1, < 2.0)
116
+ rubocop (>= 1.75.0, < 2.0)
114
117
  rubocop-ast (>= 1.38.0, < 2.0)
115
118
  rubocop-rake (0.7.1)
116
119
  lint_roller (~> 1.1)
@@ -134,7 +137,7 @@ GEM
134
137
  terminal-table
135
138
  simplecov-html (0.13.1)
136
139
  simplecov_json_formatter (0.1.4)
137
- stringio (3.1.5)
140
+ stringio (3.1.7)
138
141
  terminal-table (4.0.0)
139
142
  unicode-display_width (>= 1.1.1, < 4)
140
143
  tzinfo (2.0.6)
@@ -168,4 +171,4 @@ DEPENDENCIES
168
171
  versionomy (~> 0.5)
169
172
 
170
173
  BUNDLED WITH
171
- 2.6.5
174
+ 2.6.8
data/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
  [![Conventional Commits][cc-img]][cc-url]
6
6
  [![Current version][gem-img]][gem-url]
7
7
 
8
- Examine all PDF files in Look up directories, remove password (if has one), rename and copy to a new directory using regular expressions.
8
+ Examine all PDF files in lookup directories, remove passwords (if present), rename them, and copy them to a new directory using regular expressions.
9
9
 
10
10
  ## Installation
11
11
 
@@ -20,8 +20,7 @@ You need to install pdf handling dependencies in order to use this gem.
20
20
  #### macOS
21
21
 
22
22
  ```bash
23
- brew install qpdf # for qpdf
24
- brew install xpdf # for pdftotext
23
+ brew install qpdf xpdf # < for pdftotext
25
24
  ```
26
25
 
27
26
  #### Fedora
@@ -38,38 +37,58 @@ sudo pacman -S qpdf poppler
38
37
 
39
38
  ## Usage
40
39
 
41
- After installing this gem you need to create your configuration file on any of the following directories:
42
-
40
+ After installing this gem, create your configuration file in one of the following directories:
43
41
  - `~/.config/pdfh.yml`
44
42
  - `~/pdfh.yml`
45
- - or configure `PDFH_CONFIG_FILE` environment variable
43
+ - or configure the `PDFH_CONFIG_FILE` environment variable
46
44
 
45
+ Example configuration:
47
46
  ```yaml
48
47
  ---
49
- lookup_dirs: # Directories where all pdf's are going to be analyzed
48
+ lookup_dirs: # Directories where all PDFs will be analyzed
50
49
  - ~/Downloads
51
50
  destination_base_path: ~/PDFs # Directory where all matching documents will be copied (MUST exist)
52
51
  document_types:
53
- - name: Document From Bank # Description
52
+ - name: My Bank # Description (type)
54
53
  re_file: '.*MyBankReg\.pdf' # Regular expression to match its filename
55
- re_date: 'al \d{1,2} de (\w+) del? (\d+)' # Date regular expresion
56
- pwd: base64string # [OPTIONAL] Password if the document is protected
54
+ re_date: '\d{1,2} de (\w+) de (\d+)' # Date regular expression
55
+ pwd: base64_encoded # [OPTIONAL] Password if the document is protected
57
56
  store_path: "{year}/bank_docs" # Relative path to copy this document
58
57
  name_template: '{period} {subtype}' # Template for new filename when copied
59
58
  sub_types: # [OPTIONAL] In case your need an extra category
60
- - name: Account1 # Regular expresion to match this subtype
59
+ - name: AccountX # Regular expression to match this subtype
60
+ re_date: '\d{1,2} de (\w+)' # [OPTIONAL] Date regular expression
61
61
  month_offset: -1 # [OPTIONAL] Integer (signed) value to adjust month
62
+ zip_types: # [OPTIONAL] Zip files to be processed BEFORE the PDFs
63
+ - name: My Bank 2 # Description
64
+ re_file: 'Document_MR5664_\d+_\d+.zip' # Regular expression to match its filename
65
+ pwd: base64_encoded # [OPTIONAL] Password if the document is protected
62
66
  ```
63
67
 
68
+ > [!CAUTION]
69
+ > `pwd` is not encrypted, so be careful with this option. It is stored as a base64 string as a very thin layer of obfuscation.
70
+ > You can use `echo -n 'password' | base64` to encode your password.
71
+
64
72
  **Store Path** and **Name Template** supported placeholders:
65
73
 
66
- - `{original}` Original filename
67
- - `{period}` 2022-01
68
- - `{year}` 2022
69
- - `{month}` 01
70
- - `{type}` document_type.name
71
- - `{subtype}` subtype.name if matched
72
- - `{extra}` day if provided/matched
74
+ Placeholder | Description | Example
75
+ --- |---------------------------| ---
76
+ `{original}` | Original filename | MyBankDocument2.pdf
77
+ `{period}` | Year-Month | 2022-01
78
+ `{year}` | Year | 2022
79
+ `{month}` | Month | 01
80
+ `{type}` | Document type **name** | My Bank
81
+ `{subtype}` | Sub type **name** | AccountX
82
+ `{extra}` | day if captured/matched | 01
83
+
84
+ `period`, `year`, `month` and `{extra}` are calculated from the date captured by the regular expression.
85
+
86
+ ### Examples
87
+
88
+ Date text | RegEx | Captured
89
+ --- | --- | ---
90
+ `01/02/2025` | `(?<d>\d{2}\/(?<m>\d{2})\/(?<y>\d{4})` | d: `01` m: `02` y: `2025`
91
+ `072025 - ` | `(?<m>\d{2})(?<y>\d{4}) -` | m: `07` y: `2025`
73
92
 
74
93
  ## Development
75
94
 
@@ -85,6 +104,15 @@ build pdfh.gemspec
85
104
  gem install pdfh-*
86
105
  ```
87
106
 
107
+ To release a new version, run:
108
+
109
+ ```bash
110
+ rake bump
111
+ rake release
112
+ ```
113
+
114
+ This will create a git tag for the version, push git commits and tags, and upload the `.gem` file to rubygems.org.
115
+
88
116
  ### Conventional Commits
89
117
 
90
118
  ```bash
data/bin/run CHANGED
@@ -6,4 +6,6 @@ require "debug"
6
6
  require "pdfh"
7
7
  require "pry"
8
8
 
9
- Pdfh::Main.start
9
+ exit(1) if Pdfh::Utils::DependencyValidator.missing?(*Pdfh::REQUIRED_CMDS)
10
+
11
+ Pdfh::Main.start(argv: ARGV)
data/exe/pdfh CHANGED
@@ -1,26 +1,12 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- require "open3"
5
4
  require "pdfh"
6
5
 
7
- # @param apps [Array]
8
- # @return [Boolean]
9
- def validate_installed(*apps)
10
- found_app = []
11
- apps.each_with_object(found_app) do |app, result|
12
- _stdout, _stderr, status = Open3.capture3("which #{app}")
13
- puts "Missing #{app} command." unless status.success?
14
- result << status.success?
15
- end
16
-
17
- found_app.all?
18
- end
19
-
20
- exit(1) unless validate_installed("qpdf", "pdftotext")
6
+ exit(1) if Pdfh::Utils::DependencyValidator.missing?(*Pdfh::REQUIRED_CMDS)
21
7
 
22
8
  begin
23
- Pdfh::Main.start
9
+ Pdfh::Main.start(argv: ARGV)
24
10
  rescue StandardError => e
25
11
  Pdfh.error_print e.message
26
12
  end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pdfh
4
+ module Concerns
5
+ # Module that provides password handling capabilities for classes that contain
6
+ # password attributes. It handles Base64-encoded passwords by automatically
7
+ # detecting and decoding them when accessed through the password method.
8
+ module PasswordDecodable
9
+ # Returns the decoded password if it's Base64 encoded, otherwise returns it as is
10
+ # @return [String]
11
+ def password
12
+ return Base64.decode64(pwd) if base64?
13
+
14
+ pwd
15
+ end
16
+
17
+ # @return [Boolean]
18
+ def password?
19
+ base64?
20
+ end
21
+
22
+ private
23
+
24
+ # @return [boolean]
25
+ def base64?
26
+ pwd.is_a?(String) && pwd.size.positive? &&
27
+ Base64.strict_encode64(Base64.decode64(pwd)) == pwd
28
+ end
29
+ end
30
+ end
31
+ end
data/lib/pdfh/main.rb CHANGED
@@ -4,13 +4,12 @@ module Pdfh
4
4
  # Main functionality. This class is intended to manage the pdf documents
5
5
  class Main
6
6
  class << self
7
+ # @param argv [Array<String>]
7
8
  # @return [void]
8
- def start
9
- arg_options = Pdfh::OptParser.parse_argv
9
+ def start(argv:)
10
+ arg_options = Pdfh::OptParser.new(argv: argv).parse_argv
10
11
  @options = Options.new(arg_options)
11
-
12
- Pdfh.instance_variable_set(:@options, options)
13
- Pdfh.instance_variable_set(:@console, Console.new(options.verbose?))
12
+ assign_global_utils(@options)
14
13
  Pdfh.print_options(arg_options)
15
14
 
16
15
  @settings = SettingsBuilder.build
@@ -30,8 +29,15 @@ module Pdfh
30
29
 
31
30
  attr_reader :options, :settings
32
31
 
32
+ # @param options [Options]
33
+ # @return [void]
34
+ def assign_global_utils(options)
35
+ Pdfh.instance_variable_set(:@options, options)
36
+ Pdfh.instance_variable_set(:@console, Console.new(options.verbose?))
37
+ end
38
+
33
39
  # @param [String] file_name
34
- # @return [DocumentType]
40
+ # @return [DocumentType, nil]
35
41
  def match_doc_type(file_name)
36
42
  settings.document_types.each do |type|
37
43
  match = type.re_file.match(file_name)
@@ -64,31 +70,74 @@ module Pdfh
64
70
 
65
71
  # @param [String] work_directory
66
72
  # @return [void]
73
+ def process_zip_files(work_directory)
74
+ @settings.zip_types&.each do |zip_type|
75
+ find_files(work_directory, :zip).each do |file|
76
+ next unless zip_type.re_file.match?(File.basename(file))
77
+
78
+ Pdfh.info " > Processing zip file: #{file.green}"
79
+ password_opt = "-P #{zip_type.password}" if zip_type.password?
80
+ `unzip -o #{password_opt} #{file} -d #{work_directory}`
81
+ end
82
+ end
83
+ end
84
+
85
+ # @param directory [String]
86
+ # @param type [String, Symbol]
87
+ # @return [Array<String>]
88
+ def find_files(directory, type)
89
+ glob = File.join(directory, "*.#{type}")
90
+ Dir.glob(glob)
91
+ end
92
+
67
93
  def process_directory(work_directory)
68
94
  Pdfh.headline(work_directory)
69
- processed_count = 0
70
- ignored_files = []
71
- files = Dir["#{work_directory}/*.pdf"]
95
+ process_zip_files(work_directory) if @settings.zip_types?
96
+ processed_result = RunResult.new
97
+ files = find_files(work_directory, :pdf)
72
98
  files.each do |pdf_file|
73
99
  type = match_doc_type(pdf_file)
74
100
  if type
75
- processed_count += 1
76
101
  PdfFileHandler.new(pdf_file, type).process_document(settings.base_path)
102
+ processed_result.add_processed(pdf_file)
77
103
  else
78
- ignored_files << base_name_no_ext(pdf_file)
104
+ processed_result.add_ignored(pdf_file)
79
105
  end
80
106
  end
81
- puts " (No files processed)".colorize(:light_black) if processed_count.zero?
82
- return unless Pdfh.verbose?
83
-
84
- puts "\n No document type found for these PDF files:" if ignored_files.any?
85
- ignored_files.each.with_index(1) { |file, index| Pdfh.ident_print index, file, color: :magenta }
107
+ print_processing_results(processed_result)
86
108
  end
87
109
 
88
110
  # @return [String]
89
111
  def base_name_no_ext(file)
90
112
  File.basename(file, File.extname(file))
91
113
  end
114
+
115
+ def print_processing_results(result)
116
+ Pdfh.info " (No files processed)".colorize(:light_black) if result.processed.empty?
117
+ return unless Pdfh.verbose?
118
+
119
+ Pdfh.info "\n No document type found for these PDF files:" if result.ignored.any?
120
+ result.ignored.each.with_index(1) do |file, index|
121
+ Pdfh.ident_print index, base_name_no_ext(file), color: :magenta
122
+ end
123
+ end
124
+ end
125
+
126
+ # keeps track of the processed and ignored files
127
+ class RunResult
128
+ attr_reader :processed, :ignored
129
+
130
+ # @return [self]
131
+ def initialize
132
+ @processed = []
133
+ @ignored = []
134
+ end
135
+
136
+ # @return [void]
137
+ def add_ignored(file) = @ignored << file
138
+
139
+ # @return [void]
140
+ def add_processed(file) = @processed << file
92
141
  end
93
142
  end
94
143
  end
@@ -3,6 +3,22 @@
3
3
  module Pdfh
4
4
  # Represents a type of document that can be processed by pdfh
5
5
  class DocumentType
6
+ include Concerns::PasswordDecodable
7
+
8
+ # @!attribute [r] name
9
+ # @return [String] The name of the document type.
10
+ # @!attribute [r] re_file
11
+ # @return [Regexp] The regular expression to match file names.
12
+ # @!attribute [r] re_date
13
+ # @return [Regexp] The regular expression to extract dates and its information.
14
+ # @!attribute [r] pwd
15
+ # @return [String, nil] The base64 password for the document type, if any.
16
+ # @!attribute [r] store_path
17
+ # @return [String] The path where the document will be stored.
18
+ # @!attribute [r] name_template
19
+ # @return [String] The template for generating document names.
20
+ # @!attribute [r] sub_types
21
+ # @return [Array<DocumentSubType>, nil] The subtypes of the document, if any.
6
22
  attr_reader :name, :re_file, :re_date, :pwd, :store_path, :name_template, :sub_types
7
23
 
8
24
  # @param args [Hash]
@@ -41,13 +57,6 @@ module Pdfh
41
57
  sub_types&.find { |st| /#{st.name}/i.match?(text) }
42
58
  end
43
59
 
44
- # @return [String]
45
- def password
46
- return Base64.decode64(pwd) if base64?
47
-
48
- pwd
49
- end
50
-
51
60
  # @param values [Hash{Symbol->String}
52
61
  # @return [String]
53
62
  def generate_new_name(values)
@@ -64,11 +73,6 @@ module Pdfh
64
73
 
65
74
  attr_accessor :path_validator, :name_validator
66
75
 
67
- # @return [boolean]
68
- def base64?
69
- pwd.is_a?(String) && Base64.strict_encode64(Base64.decode64(pwd)) == pwd
70
- end
71
-
72
76
  # @param sub_types [Array<Hash{Symbol->String}>]
73
77
  # @return [Array<DocumentSubType>]
74
78
  def extract_subtypes(sub_types)
@@ -3,7 +3,13 @@
3
3
  module Pdfh
4
4
  # Handles the config yaml data mapping, and associates a file name with a doc type
5
5
  class Settings
6
- attr_reader :lookup_dirs, :base_path
6
+ # @!attribute [r] lookup_dirs
7
+ # @return [Array<String>] List of directories to look up for processing.
8
+ # @!attribute [r] base_path
9
+ # @return [String] The base directory path for storing processed files.
10
+ # @!attribute [r] zip_types
11
+ # @return [Array<ZipType>, nil] List of zip types to process, or nil if none.
12
+ attr_reader :lookup_dirs, :base_path, :zip_types
7
13
 
8
14
  # @param config_data [Hash]
9
15
  # @return [self]
@@ -15,7 +21,8 @@ module Pdfh
15
21
  lookup_dirs.each.with_index(1) { |dir, idx| Pdfh.debug " #{idx}. #{dir}" }
16
22
  Pdfh.debug
17
23
 
18
- load_doc_types(config_data[:document_types])
24
+ build_doc_types(config_data[:document_types])
25
+ build_zip_types(config_data[:zip_types]) if config_data.key?(:zip_types)
19
26
  end
20
27
 
21
28
  # @return [Array<DocumentType>]
@@ -28,8 +35,14 @@ module Pdfh
28
35
  @document_types[id]
29
36
  end
30
37
 
38
+ # @return [Boolean]
39
+ def zip_types?
40
+ !!zip_types&.any?
41
+ end
42
+
31
43
  private
32
44
 
45
+ # @param lookup_dirs_list [Array[String]]
33
46
  # @return [void]
34
47
  def process_lookup_dirs(lookup_dirs_list)
35
48
  @lookup_dirs = lookup_dirs_list.filter_map do |dir|
@@ -44,14 +57,16 @@ module Pdfh
44
57
  end
45
58
 
46
59
  # @return [void]
60
+ # @param dir [String]
47
61
  def process_destination_base(dir)
48
62
  @base_path = File.expand_path(dir)
49
63
  raise ArgumentError, "Destination base directory is not configured." if @base_path.nil?
50
64
  raise ArgumentError, "Destination base directory #{@base_path} does not exist." unless File.directory?(@base_path)
51
65
  end
52
66
 
53
- # @return [Array<DocumentType>]
54
- def load_doc_types(doc_types)
67
+ # @param doc_types [Array<Hash>]
68
+ # @return [void]
69
+ def build_doc_types(doc_types)
55
70
  @document_types = doc_types.each_with_object({}) do |data, result|
56
71
  doc_type = DocumentType.new(data)
57
72
  result.store(doc_type.gid, doc_type)
@@ -60,5 +75,13 @@ module Pdfh
60
75
  Pdfh.backtrace_print e if Pdfh.verbose?
61
76
  end
62
77
  end
78
+
79
+ # @param zip_types [Array<Hash>]
80
+ # @return [void]
81
+ def build_zip_types(zip_types)
82
+ exit(1) if Pdfh::Utils::DependencyValidator.missing?(:unzip)
83
+
84
+ @zip_types = zip_types.compact.map { ZipType.new(_1) }
85
+ end
63
86
  end
64
87
  end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pdfh
4
+ # Zip files which contains PDF files that need pre-processing
5
+ class ZipType
6
+ include Concerns::PasswordDecodable
7
+
8
+ attr_reader :name, :re_file, :pwd
9
+
10
+ # @param args [Hash]
11
+ # @return [self]
12
+ def initialize(args)
13
+ args.each { |k, v| instance_variable_set(:"@#{k}", v) }
14
+ @re_file = Regexp.new(re_file)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open3"
4
+
5
+ module Pdfh
6
+ module Utils
7
+ # Provides methods to validate external dependencies
8
+ module DependencyValidator
9
+ module_function
10
+
11
+ # Validates if the required command-line applications are installed
12
+ # @param apps [Array<String>] names of required command-line applications
13
+ # @return [Boolean] true if all applications are installed, false otherwise
14
+ def installed?(*apps)
15
+ missing = apps.filter_map do |app|
16
+ _stdout, _stderr, status = Open3.capture3("which #{app}")
17
+
18
+ app.to_s unless status.success?
19
+ end
20
+
21
+ if missing.any?
22
+ errors = missing.map(&:red)
23
+ puts "Required dependency #{errors.join(", ")} not found. Please install it before continuing."
24
+ end
25
+ missing.empty?
26
+ end
27
+
28
+ # @param apps [Array<String>]
29
+ # @return [Boolean] true if any application is missing, false if all are installed
30
+ def missing?(*apps)
31
+ !installed?(*apps)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -5,58 +5,73 @@ require "optparse"
5
5
  module Pdfh
6
6
  # Handles Argument options
7
7
  class OptParser
8
- OPT_PARSER = OptionParser.new do |opts|
9
- opts.default_argv
10
- # Process ARGV
11
- opts.banner = "Usage: #{opts.program_name} [options] [file1 ...]"
12
- opts.separator ""
13
- opts.separator "Specific options:"
14
-
15
- opts.on("-tID", "--type=ID", "Document type id (requires a trailing file list)")
16
- opts.on_tail("-T", "--list-types", "List document types in configuration") { list_types || exit }
17
- opts.on_tail("-V", "--version", "Show version") { version || exit }
18
- opts.on_tail("-h", "--help", "help (this dialog)") { help || exit }
19
-
20
- opts.on("-v", "--verbose", "Show more output. Useful for debug")
21
- opts.on("-d", "--dry", "Dry run, does not write new pdf")
8
+ # @param argv [Array<String>] command line arguments (ie. ARGV)
9
+ # @param console [Pdfh::Console, nil]
10
+ # @return [self]
11
+ def initialize(argv:, console: nil)
12
+ @argv = argv
13
+ @console = console || Console.new(false)
14
+ @options = {
15
+ verbose: false,
16
+ dry: false,
17
+ type: nil,
18
+ files: []
19
+ }
22
20
  end
23
21
 
24
- class << self
25
- # @return [Hash]
26
- def parse_argv
27
- Pdfh.instance_variable_set(:@console, Console.new(false))
28
-
29
- options = { dry: false, verbose: false }
30
- OPT_PARSER.parse!(into: options)
31
- options[:files] = ARGV if ARGV.any?
32
- options.transform_keys { |key| key.to_s.tr("-", "_").to_sym }
33
- rescue OptionParser::InvalidOption => e
34
- Pdfh.error_print(e.message, exit_app: false)
35
- puts OPT_PARSER.help
36
- exit 1
37
- end
22
+ # @return [Hash] Parsed options including flags and file arguments
23
+ def parse_argv
24
+ option_parser = build_option_parser
25
+ non_option_args = option_parser.parse!(@argv)
26
+ @options[:files] = non_option_args
27
+ @options.transform_keys { |key| key.to_s.tr("-", "_").to_sym }
28
+ rescue OptionParser::InvalidOption => e
29
+ @console.error_print(e.message, exit_app: false)
30
+ puts option_parser.help
31
+ exit 1
32
+ end
38
33
 
39
- # @return [nil]
40
- def version
41
- puts "#{OPT_PARSER.program_name} v#{Pdfh::VERSION}"
42
- end
34
+ private
43
35
 
44
- # @return [nil]
45
- def help
46
- puts OPT_PARSER
36
+ # @return [OptionParser] Configured OptionParser instance
37
+ def build_option_parser
38
+ OptionParser.new do |opts|
39
+ opts.banner = "Usage: #{opts.program_name} [options] [file1.pdf, ...]"
40
+ opts.separator ""
41
+ opts.separator "Specific options:"
42
+
43
+ opts.on("-tID", "--type=ID", "Document type id (requires a trailing file list)") { @options[:type] = _1 }
44
+ opts.on("-v", "--verbose", "Show more output. Useful for debug") { @options[:verbose] = true }
45
+ opts.on("-d", "--dry", "Dry run, does not write new pdf") { @options[:dry] = true }
46
+ opts.on_tail("-T", "--list-types", "List document types in configuration") { list_types && exit }
47
+ opts.on_tail("-V", "--version", "Show version") { version || exit }
48
+ opts.on_tail("-h", "--help", "help (this dialog)") { help || exit }
47
49
  end
50
+ end
51
+
52
+ # @return [nil]
53
+ def version
54
+ @console.info "#{build_option_parser.program_name} v#{Pdfh::VERSION}"
55
+ end
56
+
57
+ # @return [nil]
58
+ def help
59
+ @console.info build_option_parser
60
+ end
61
+
62
+ # Lists the available document types
63
+ # @return [nil]
64
+ def list_types
65
+ Pdfh.instance_variable_set(:@options, Options.new(@options))
66
+ Pdfh.instance_variable_set(:@console, @console)
48
67
 
49
- # @return [nil]
50
- def list_types
51
- settings = SettingsBuilder.build
52
- ident = 2
53
- max_width = settings.document_types.map { |t| t.gid.size }.max
54
- puts "#{" " * ident}#{"ID".ljust(max_width)} Type Name"
55
- puts "#{" " * ident}#{"—" * max_width} #{"—" * 23}"
56
- settings.document_types.each do |type|
57
- puts "#{" " * ident}#{type.gid.ljust(max_width).yellow} #{type.name}"
58
- end
59
- nil
68
+ settings = SettingsBuilder.build
69
+ spacing = " " * 2
70
+ max_width = settings.document_types.map { |t| t.gid.size }.max
71
+ @console.info "#{spacing}#{"ID".ljust(max_width)} Type Name"
72
+ @console.info "#{spacing}#{"—" * max_width} #{"—" * 23}"
73
+ settings.document_types.each do |type|
74
+ @console.info "#{spacing}#{type.gid.ljust(max_width).yellow} #{type.name}"
60
75
  end
61
76
  end
62
77
  end
@@ -87,7 +87,7 @@ module Pdfh
87
87
  end
88
88
 
89
89
  # Gets the text from the pdf in order to execute
90
- # the regular expresion matches
90
+ # the regular expression matches
91
91
  # @return [String]
92
92
  def extract_text
93
93
  temp = Tempfile.new("pdfh")
data/lib/pdfh/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pdfh
4
- VERSION = "3.2.0"
4
+ VERSION = "3.3.0"
5
5
  end
data/lib/pdfh.rb CHANGED
@@ -9,15 +9,20 @@ require "yaml"
9
9
 
10
10
  require_relative "ext/string"
11
11
 
12
+ # Concerns
13
+ require_relative "pdfh/concerns/password_decodable"
14
+
12
15
  # Models
13
16
  require_relative "pdfh/models/document"
14
17
  require_relative "pdfh/models/document_period"
15
18
  require_relative "pdfh/models/document_sub_type"
16
19
  require_relative "pdfh/models/document_type"
17
20
  require_relative "pdfh/models/settings"
21
+ require_relative "pdfh/models/zip_types"
18
22
 
19
23
  # Utils
20
24
  require_relative "pdfh/utils/console"
25
+ require_relative "pdfh/utils/dependency_validator"
21
26
  require_relative "pdfh/utils/month"
22
27
  require_relative "pdfh/utils/opt_parser"
23
28
  require_relative "pdfh/utils/options"
@@ -31,6 +36,8 @@ require_relative "pdfh/version"
31
36
 
32
37
  # Gem entry point
33
38
  module Pdfh
39
+ REQUIRED_CMDS = %i[qpdf pdftotext].freeze
40
+
34
41
  # Settings not found
35
42
  class SettingsIOError < StandardError; end
36
43
 
@@ -45,6 +52,7 @@ module Pdfh
45
52
  class << self
46
53
  extend Forwardable
47
54
  def_delegators :@options, :verbose?, :dry?, :file_mode?
48
- def_delegators :@console, :ident_print, :warn_print, :error_print, :backtrace_print, :headline, :debug, :info, :print_options
55
+ def_delegators :@console, :ident_print, :warn_print, :error_print, :backtrace_print, :headline, :debug, :info,
56
+ :print_options
49
57
  end
50
58
  end
data/mise.toml CHANGED
@@ -1,2 +1,2 @@
1
1
  [tools]
2
- ruby = "3.4.2"
2
+ ruby = "3.4.3"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfh
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Isaias Piña
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-04-01 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: colorize
@@ -33,6 +33,7 @@ extensions: []
33
33
  extra_rdoc_files: []
34
34
  files:
35
35
  - ".gitignore"
36
+ - ".pre-commit-config.yaml"
36
37
  - ".rspec"
37
38
  - ".rubocop.yml"
38
39
  - ".rubocop_todo.yml"
@@ -50,14 +51,17 @@ files:
50
51
  - exe/pdfh
51
52
  - lib/ext/string.rb
52
53
  - lib/pdfh.rb
54
+ - lib/pdfh/concerns/password_decodable.rb
53
55
  - lib/pdfh/main.rb
54
56
  - lib/pdfh/models/document.rb
55
57
  - lib/pdfh/models/document_period.rb
56
58
  - lib/pdfh/models/document_sub_type.rb
57
59
  - lib/pdfh/models/document_type.rb
58
60
  - lib/pdfh/models/settings.rb
61
+ - lib/pdfh/models/zip_types.rb
59
62
  - lib/pdfh/settings_template.rb
60
63
  - lib/pdfh/utils/console.rb
64
+ - lib/pdfh/utils/dependency_validator.rb
61
65
  - lib/pdfh/utils/month.rb
62
66
  - lib/pdfh/utils/opt_parser.rb
63
67
  - lib/pdfh/utils/options.rb
@@ -90,7 +94,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
90
94
  - !ruby/object:Gem::Version
91
95
  version: '0'
92
96
  requirements: []
93
- rubygems_version: 3.6.6
97
+ rubygems_version: 3.6.8
94
98
  specification_version: 4
95
99
  summary: Organize PDF files
96
100
  test_files: []