pdfh 0.2.1 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfh
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 3.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Isaias Piña
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-01-06 00:00:00.000000000 Z
11
+ date: 2024-01-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colorize
@@ -37,7 +37,7 @@ files:
37
37
  - ".rspec"
38
38
  - ".rubocop.yml"
39
39
  - ".rubocop_todo.yml"
40
- - ".ruby-version"
40
+ - ".tool-versions"
41
41
  - CHANGELOG.md
42
42
  - CODE_OF_CONDUCT.md
43
43
  - Gemfile
@@ -51,15 +51,18 @@ files:
51
51
  - exe/pdfh
52
52
  - lib/ext/string.rb
53
53
  - lib/pdfh.rb
54
- - lib/pdfh/document.rb
55
- - lib/pdfh/document_period.rb
56
- - lib/pdfh/document_processor.rb
57
- - lib/pdfh/document_type.rb
58
- - lib/pdfh/month.rb
59
- - lib/pdfh/opt_parser.rb
60
- - lib/pdfh/pdf_handler.rb
61
- - lib/pdfh/settings.rb
54
+ - lib/pdfh/main.rb
55
+ - lib/pdfh/models/document.rb
56
+ - lib/pdfh/models/document_period.rb
57
+ - lib/pdfh/models/document_type.rb
58
+ - lib/pdfh/models/settings.rb
62
59
  - lib/pdfh/settings_template.rb
60
+ - lib/pdfh/utils/console.rb
61
+ - lib/pdfh/utils/month.rb
62
+ - lib/pdfh/utils/opt_parser.rb
63
+ - lib/pdfh/utils/options.rb
64
+ - lib/pdfh/utils/pdf_file_handler.rb
65
+ - lib/pdfh/utils/settings_builder.rb
63
66
  - lib/pdfh/version.rb
64
67
  - pdfh.gemspec
65
68
  homepage: https://github.com/iax7/pdfh
@@ -79,14 +82,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
79
82
  requirements:
80
83
  - - ">="
81
84
  - !ruby/object:Gem::Version
82
- version: 2.5.0
85
+ version: 3.0.0
83
86
  required_rubygems_version: !ruby/object:Gem::Requirement
84
87
  requirements:
85
88
  - - ">="
86
89
  - !ruby/object:Gem::Version
87
90
  version: '0'
88
91
  requirements: []
89
- rubygems_version: 3.3.3
92
+ rubygems_version: 3.5.3
90
93
  signing_key:
91
94
  specification_version: 4
92
95
  summary: Organize PDF files
data/.ruby-version DELETED
@@ -1 +0,0 @@
1
- ruby-3.1.0
@@ -1,164 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "fileutils"
4
-
5
- module Pdfh
6
- # Main functionality. This class is intended to manage the pdf documents
7
- class DocumentProcessor
8
- # @return [self]
9
- def initialize
10
- @options = Pdfh.parse_argv
11
- Pdfh.verbose = options[:verbose]
12
- Pdfh.dry = options[:dry]
13
- Pdfh.verbose_print(options)
14
- Pdfh.mode = options.key?(:type) ? :file : :directory
15
- end
16
-
17
- # @return [void]
18
- def start
19
- @settings = Settings.new(Pdfh.search_config_file)
20
- puts "Destination path: #{@settings.base_path.colorize(:light_blue)}" if Pdfh.verbose?
21
-
22
- Pdfh.file_mode? ? process_files : process_lookup_dirs
23
- rescue SettingsIOError => e
24
- Pdfh.error_print(e.message, exit_app: false)
25
- Pdfh.create_settings_file
26
- exit(1)
27
- rescue StandardError => e
28
- Pdfh.error_print e.message
29
- end
30
-
31
- private
32
-
33
- attr_reader :options
34
-
35
- # @param [String] file_name
36
- # @return [DocumentType]
37
- def match_doc_type(file_name)
38
- @settings.document_types.each do |type|
39
- match = type.re_file.match(file_name)
40
- return type if match
41
- end
42
- nil
43
- end
44
-
45
- # @return [DocumentType]
46
- def doc_type_by_id(id)
47
- @settings.document_types.find { |t| t.gid == id }
48
- end
49
-
50
- # @return [void]
51
- def process_files
52
- type_id = options[:type]
53
- raise ArgumentError, "No files provided to process #{type_id.inspect} type." unless options[:files]
54
-
55
- type = doc_type_by_id(type_id)
56
- Pdfh.error_print "Type #{type_id.inspect} was not found." if type.nil?
57
- puts
58
- options[:files].each do |file|
59
- unless File.exist?(file)
60
- Pdfh.warn_print "File #{file.inspect} does not exist."
61
- next
62
- end
63
- unless File.extname(file) == ".pdf"
64
- Pdfh.warn_print "File #{file.inspect} is not a pdf."
65
- next
66
- end
67
- process_document(file, type)
68
- end
69
- end
70
-
71
- # @return [void]
72
- def process_lookup_dirs
73
- @settings.lookup_dirs.each do |work_directory|
74
- process_directory(work_directory)
75
- end
76
- end
77
-
78
- # @param [String] work_directory
79
- # @return [Enumerator]
80
- def process_directory(work_directory)
81
- Pdfh.headline(work_directory)
82
- processed_count = 0
83
- ignored_files = []
84
- files = Dir["#{work_directory}/*.pdf"]
85
- files.each do |pdf_file|
86
- type = match_doc_type(pdf_file)
87
- if type
88
- processed_count += 1
89
- process_document(pdf_file, type)
90
- else
91
- ignored_files << basename_without_ext(pdf_file)
92
- end
93
- end
94
- puts " (No files processed)".colorize(:light_black) if processed_count.zero?
95
- return unless Pdfh.verbose?
96
-
97
- puts "\n No document type found for these PDF files:" if ignored_files.any?
98
- ignored_files.each.with_index(1) { |file, index| Pdfh.ident_print index, file, color: :magenta }
99
- end
100
-
101
- ##
102
- # Generate document, and process actions
103
- # @param [String] file
104
- # @param [DocumentType] type
105
- # @return [void]
106
- def process_document(file, type)
107
- base = File.basename(file)
108
- puts "Working on #{base.colorize(:light_green)}"
109
- pad = 12
110
- Pdfh.ident_print "Type", type.name, color: :light_blue, width: pad
111
- doc = Document.new(file, type)
112
- Pdfh.ident_print "Sub-Type", doc.sub_type, color: :light_blue, width: pad
113
- Pdfh.ident_print "Period", doc.period.to_s, color: :light_blue, width: pad
114
- Pdfh.ident_print "New Name", doc.new_name, color: :light_blue, width: pad
115
- Pdfh.ident_print "Store Path", doc.store_path, color: :light_blue, width: pad
116
- Pdfh.ident_print "Other files", doc.companion_files(join: true), color: :light_blue, width: pad
117
- Pdfh.ident_print "Print CMD", doc.print_cmd, color: :light_blue, width: pad
118
- Pdfh.ident_print "Processed?", "No (in Dry mode)", color: :red, width: pad if Pdfh.dry?
119
- write_pdf(doc)
120
- rescue StandardError => e
121
- Pdfh.ident_print "Doc Error", e.message, color: :red, width: pad
122
- end
123
-
124
- def write_pdf(document)
125
- base_path = @settings.base_path
126
- full_path = File.join(base_path, document.store_path, document.new_name)
127
- dir_path = File.join(base_path, document.store_path)
128
-
129
- FileUtils.mkdir_p(dir_path) unless File.exist?(dir_path)
130
-
131
- document.pdf_doc.write_new_pdf(dir_path, full_path)
132
- make_document_backup(document)
133
- copy_companion_files(dir_path, document)
134
- end
135
-
136
- # Create a backup of original document
137
- def make_document_backup(document)
138
- Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Creating PDF backup"
139
- Dir.chdir(document.home_dir) do
140
- Pdfh.verbose_print " Working on: #{document.home_dir.inspect} directory"
141
- Pdfh.verbose_print " mv #{document.file_name.inspect} -> #{document.backup_name.inspect}"
142
- File.rename(document.file_name, document.backup_name) unless Pdfh.dry?
143
- end
144
- end
145
-
146
- def copy_companion_files(destination, document)
147
- Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Writing Companion files"
148
- document.companion_files.each do |file|
149
- Pdfh.verbose_print " Working on #{file.inspect}..."
150
- src_name = File.join(document.home_dir, file)
151
- src_ext = File.extname(file)
152
- dest_name = File.basename(document.new_name, ".pdf")
153
- dest_full = File.join(destination, "#{dest_name}#{src_ext}")
154
- Pdfh.verbose_print " cp #{src_name} --> #{dest_full}"
155
- FileUtils.cp(src_name, dest_full) unless Pdfh.dry?
156
- end
157
- end
158
-
159
- # @return [String]
160
- def basename_without_ext(file)
161
- File.basename(file, File.extname(file))
162
- end
163
- end
164
- end
@@ -1,41 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "optparse"
4
-
5
- module Pdfh
6
- OPT_PARSER = OptionParser.new do |opts|
7
- opts.default_argv
8
- # Process ARGV
9
- opts.banner = "Usage: #{opts.program_name} [options] [file1 ...]"
10
- opts.separator ""
11
- opts.separator "Specific options:"
12
-
13
- opts.on("-tID", "--type=ID", "Document type id (requires a trailing file list)")
14
- opts.on_tail("-T", "--list-types", "List document types in configuration") do
15
- settings = Settings.new(Pdfh.search_config_file)
16
- ident = 4
17
- max_width = settings.document_types.map { |t| t.gid.size }.max
18
- puts "#{" " * ident}#{"ID".ljust(max_width)} Type Name"
19
- puts "#{" " * ident}#{"-" * max_width} -----------------------"
20
- settings.document_types.each do |type|
21
- puts "#{" " * ident}#{type.gid.ljust(max_width)} #{type.name.inspect}"
22
- end
23
- exit
24
- rescue SettingsIOError => e
25
- Pdfh.error_print(e.message, exit_app: false)
26
- Pdfh.create_settings_file
27
- exit(1)
28
- end
29
- opts.on_tail("-V", "--version", "Show version") do
30
- puts "#{opts.program_name} v#{Pdfh::VERSION}"
31
- exit
32
- end
33
- opts.on_tail("-h", "--help", "help (this dialog)") do
34
- puts opts
35
- exit
36
- end
37
-
38
- opts.on("-v", "--verbose", "Show more output. Useful for debug")
39
- opts.on("-d", "--dry", "Dry run, does not write new pdf")
40
- end
41
- end
@@ -1,55 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Pdfh
4
- # Handles the Pdf document text extraction and password removal
5
- # TODO: Replace command utils with this gem
6
- # require 'pdf-reader'
7
- #
8
- # reader = PDF::Reader.new(temp)
9
- # reader.pages.each do |page|
10
- # @text << page.text
11
- # end
12
- class PdfHandler
13
- attr_reader :file
14
-
15
- # @return [self]
16
- def initialize(file, password)
17
- @file = file
18
- @password_option = password ? "--password=#{password.inspect} " : ""
19
- end
20
-
21
- ##
22
- # Gets the text from the pdf in order to execute
23
- # the regular expresion matches
24
- # @return [String]
25
- def extract_text
26
- temp = `mktemp`.chomp
27
- Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Extract PDF text"
28
- Pdfh.verbose_print " --> #{temp.inspect} temporal file assigned."
29
-
30
- cmd = %(qpdf #{@password_option}--decrypt --stream-data=uncompress #{@file.inspect} #{temp.inspect})
31
- Pdfh.verbose_print " DeCrypt Command: #{cmd}"
32
- _result = `#{cmd}`
33
-
34
- cmd2 = %(pdftotext -enc UTF-8 #{temp.inspect} -)
35
- Pdfh.verbose_print " Extract Command: #{cmd2}"
36
- text = `#{cmd2}`
37
- Pdfh.verbose_print " Text: #{text.inspect}"
38
- text
39
- end
40
-
41
- # @return [void]
42
- def write_new_pdf(dir_path, full_path)
43
- Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Writing PDFs"
44
- raise IOError, "Path #{dir_path} not found." unless Dir.exist?(dir_path)
45
-
46
- cmd = %(qpdf #{@password_option}--decrypt #{@file.inspect} #{full_path.inspect})
47
- Pdfh.verbose_print " Write PDF Command: #{cmd}"
48
-
49
- return if Pdfh.dry?
50
-
51
- _result = `#{cmd}`
52
- raise IOError, "New PDF file #{full_path.inspect} was not created." unless File.file?(full_path)
53
- end
54
- end
55
- end
File without changes