pdfh 0.2.1 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +1 -1
- data/.rubocop.yml +2 -2
- data/.rubocop_todo.yml +24 -16
- data/.tool-versions +1 -0
- data/CHANGELOG.md +20 -15
- data/Gemfile +5 -4
- data/Gemfile.lock +80 -42
- data/README.md +21 -7
- data/bin/console +1 -1
- data/bin/run +1 -1
- data/exe/pdfh +1 -1
- data/lib/pdfh/main.rb +93 -0
- data/lib/pdfh/{document.rb → models/document.rb} +34 -17
- data/lib/pdfh/{document_type.rb → models/document_type.rb} +18 -5
- data/lib/pdfh/{settings.rb → models/settings.rb} +23 -15
- data/lib/pdfh/settings_template.rb +11 -11
- data/lib/pdfh/utils/console.rb +101 -0
- data/lib/pdfh/utils/opt_parser.rb +56 -0
- data/lib/pdfh/utils/options.rb +38 -0
- data/lib/pdfh/utils/pdf_file_handler.rb +121 -0
- data/lib/pdfh/utils/settings_builder.rb +57 -0
- data/lib/pdfh/version.rb +1 -1
- data/lib/pdfh.rb +29 -118
- data/pdfh.gemspec +2 -2
- metadata +16 -13
- data/.ruby-version +0 -1
- data/lib/pdfh/document_processor.rb +0 -164
- data/lib/pdfh/opt_parser.rb +0 -41
- data/lib/pdfh/pdf_handler.rb +0 -55
- /data/lib/pdfh/{document_period.rb → models/document_period.rb} +0 -0
- /data/lib/pdfh/{month.rb → utils/month.rb} +0 -0
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfh
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 3.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Isaias Piña
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colorize
|
@@ -37,7 +37,7 @@ files:
|
|
37
37
|
- ".rspec"
|
38
38
|
- ".rubocop.yml"
|
39
39
|
- ".rubocop_todo.yml"
|
40
|
-
- ".
|
40
|
+
- ".tool-versions"
|
41
41
|
- CHANGELOG.md
|
42
42
|
- CODE_OF_CONDUCT.md
|
43
43
|
- Gemfile
|
@@ -51,15 +51,18 @@ files:
|
|
51
51
|
- exe/pdfh
|
52
52
|
- lib/ext/string.rb
|
53
53
|
- lib/pdfh.rb
|
54
|
-
- lib/pdfh/
|
55
|
-
- lib/pdfh/
|
56
|
-
- lib/pdfh/
|
57
|
-
- lib/pdfh/document_type.rb
|
58
|
-
- lib/pdfh/
|
59
|
-
- lib/pdfh/opt_parser.rb
|
60
|
-
- lib/pdfh/pdf_handler.rb
|
61
|
-
- lib/pdfh/settings.rb
|
54
|
+
- lib/pdfh/main.rb
|
55
|
+
- lib/pdfh/models/document.rb
|
56
|
+
- lib/pdfh/models/document_period.rb
|
57
|
+
- lib/pdfh/models/document_type.rb
|
58
|
+
- lib/pdfh/models/settings.rb
|
62
59
|
- lib/pdfh/settings_template.rb
|
60
|
+
- lib/pdfh/utils/console.rb
|
61
|
+
- lib/pdfh/utils/month.rb
|
62
|
+
- lib/pdfh/utils/opt_parser.rb
|
63
|
+
- lib/pdfh/utils/options.rb
|
64
|
+
- lib/pdfh/utils/pdf_file_handler.rb
|
65
|
+
- lib/pdfh/utils/settings_builder.rb
|
63
66
|
- lib/pdfh/version.rb
|
64
67
|
- pdfh.gemspec
|
65
68
|
homepage: https://github.com/iax7/pdfh
|
@@ -79,14 +82,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
79
82
|
requirements:
|
80
83
|
- - ">="
|
81
84
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
85
|
+
version: 3.0.0
|
83
86
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
87
|
requirements:
|
85
88
|
- - ">="
|
86
89
|
- !ruby/object:Gem::Version
|
87
90
|
version: '0'
|
88
91
|
requirements: []
|
89
|
-
rubygems_version: 3.
|
92
|
+
rubygems_version: 3.5.3
|
90
93
|
signing_key:
|
91
94
|
specification_version: 4
|
92
95
|
summary: Organize PDF files
|
data/.ruby-version
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
ruby-3.1.0
|
@@ -1,164 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "fileutils"
|
4
|
-
|
5
|
-
module Pdfh
|
6
|
-
# Main functionality. This class is intended to manage the pdf documents
|
7
|
-
class DocumentProcessor
|
8
|
-
# @return [self]
|
9
|
-
def initialize
|
10
|
-
@options = Pdfh.parse_argv
|
11
|
-
Pdfh.verbose = options[:verbose]
|
12
|
-
Pdfh.dry = options[:dry]
|
13
|
-
Pdfh.verbose_print(options)
|
14
|
-
Pdfh.mode = options.key?(:type) ? :file : :directory
|
15
|
-
end
|
16
|
-
|
17
|
-
# @return [void]
|
18
|
-
def start
|
19
|
-
@settings = Settings.new(Pdfh.search_config_file)
|
20
|
-
puts "Destination path: #{@settings.base_path.colorize(:light_blue)}" if Pdfh.verbose?
|
21
|
-
|
22
|
-
Pdfh.file_mode? ? process_files : process_lookup_dirs
|
23
|
-
rescue SettingsIOError => e
|
24
|
-
Pdfh.error_print(e.message, exit_app: false)
|
25
|
-
Pdfh.create_settings_file
|
26
|
-
exit(1)
|
27
|
-
rescue StandardError => e
|
28
|
-
Pdfh.error_print e.message
|
29
|
-
end
|
30
|
-
|
31
|
-
private
|
32
|
-
|
33
|
-
attr_reader :options
|
34
|
-
|
35
|
-
# @param [String] file_name
|
36
|
-
# @return [DocumentType]
|
37
|
-
def match_doc_type(file_name)
|
38
|
-
@settings.document_types.each do |type|
|
39
|
-
match = type.re_file.match(file_name)
|
40
|
-
return type if match
|
41
|
-
end
|
42
|
-
nil
|
43
|
-
end
|
44
|
-
|
45
|
-
# @return [DocumentType]
|
46
|
-
def doc_type_by_id(id)
|
47
|
-
@settings.document_types.find { |t| t.gid == id }
|
48
|
-
end
|
49
|
-
|
50
|
-
# @return [void]
|
51
|
-
def process_files
|
52
|
-
type_id = options[:type]
|
53
|
-
raise ArgumentError, "No files provided to process #{type_id.inspect} type." unless options[:files]
|
54
|
-
|
55
|
-
type = doc_type_by_id(type_id)
|
56
|
-
Pdfh.error_print "Type #{type_id.inspect} was not found." if type.nil?
|
57
|
-
puts
|
58
|
-
options[:files].each do |file|
|
59
|
-
unless File.exist?(file)
|
60
|
-
Pdfh.warn_print "File #{file.inspect} does not exist."
|
61
|
-
next
|
62
|
-
end
|
63
|
-
unless File.extname(file) == ".pdf"
|
64
|
-
Pdfh.warn_print "File #{file.inspect} is not a pdf."
|
65
|
-
next
|
66
|
-
end
|
67
|
-
process_document(file, type)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
# @return [void]
|
72
|
-
def process_lookup_dirs
|
73
|
-
@settings.lookup_dirs.each do |work_directory|
|
74
|
-
process_directory(work_directory)
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# @param [String] work_directory
|
79
|
-
# @return [Enumerator]
|
80
|
-
def process_directory(work_directory)
|
81
|
-
Pdfh.headline(work_directory)
|
82
|
-
processed_count = 0
|
83
|
-
ignored_files = []
|
84
|
-
files = Dir["#{work_directory}/*.pdf"]
|
85
|
-
files.each do |pdf_file|
|
86
|
-
type = match_doc_type(pdf_file)
|
87
|
-
if type
|
88
|
-
processed_count += 1
|
89
|
-
process_document(pdf_file, type)
|
90
|
-
else
|
91
|
-
ignored_files << basename_without_ext(pdf_file)
|
92
|
-
end
|
93
|
-
end
|
94
|
-
puts " (No files processed)".colorize(:light_black) if processed_count.zero?
|
95
|
-
return unless Pdfh.verbose?
|
96
|
-
|
97
|
-
puts "\n No document type found for these PDF files:" if ignored_files.any?
|
98
|
-
ignored_files.each.with_index(1) { |file, index| Pdfh.ident_print index, file, color: :magenta }
|
99
|
-
end
|
100
|
-
|
101
|
-
##
|
102
|
-
# Generate document, and process actions
|
103
|
-
# @param [String] file
|
104
|
-
# @param [DocumentType] type
|
105
|
-
# @return [void]
|
106
|
-
def process_document(file, type)
|
107
|
-
base = File.basename(file)
|
108
|
-
puts "Working on #{base.colorize(:light_green)}"
|
109
|
-
pad = 12
|
110
|
-
Pdfh.ident_print "Type", type.name, color: :light_blue, width: pad
|
111
|
-
doc = Document.new(file, type)
|
112
|
-
Pdfh.ident_print "Sub-Type", doc.sub_type, color: :light_blue, width: pad
|
113
|
-
Pdfh.ident_print "Period", doc.period.to_s, color: :light_blue, width: pad
|
114
|
-
Pdfh.ident_print "New Name", doc.new_name, color: :light_blue, width: pad
|
115
|
-
Pdfh.ident_print "Store Path", doc.store_path, color: :light_blue, width: pad
|
116
|
-
Pdfh.ident_print "Other files", doc.companion_files(join: true), color: :light_blue, width: pad
|
117
|
-
Pdfh.ident_print "Print CMD", doc.print_cmd, color: :light_blue, width: pad
|
118
|
-
Pdfh.ident_print "Processed?", "No (in Dry mode)", color: :red, width: pad if Pdfh.dry?
|
119
|
-
write_pdf(doc)
|
120
|
-
rescue StandardError => e
|
121
|
-
Pdfh.ident_print "Doc Error", e.message, color: :red, width: pad
|
122
|
-
end
|
123
|
-
|
124
|
-
def write_pdf(document)
|
125
|
-
base_path = @settings.base_path
|
126
|
-
full_path = File.join(base_path, document.store_path, document.new_name)
|
127
|
-
dir_path = File.join(base_path, document.store_path)
|
128
|
-
|
129
|
-
FileUtils.mkdir_p(dir_path) unless File.exist?(dir_path)
|
130
|
-
|
131
|
-
document.pdf_doc.write_new_pdf(dir_path, full_path)
|
132
|
-
make_document_backup(document)
|
133
|
-
copy_companion_files(dir_path, document)
|
134
|
-
end
|
135
|
-
|
136
|
-
# Create a backup of original document
|
137
|
-
def make_document_backup(document)
|
138
|
-
Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Creating PDF backup"
|
139
|
-
Dir.chdir(document.home_dir) do
|
140
|
-
Pdfh.verbose_print " Working on: #{document.home_dir.inspect} directory"
|
141
|
-
Pdfh.verbose_print " mv #{document.file_name.inspect} -> #{document.backup_name.inspect}"
|
142
|
-
File.rename(document.file_name, document.backup_name) unless Pdfh.dry?
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
def copy_companion_files(destination, document)
|
147
|
-
Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Writing Companion files"
|
148
|
-
document.companion_files.each do |file|
|
149
|
-
Pdfh.verbose_print " Working on #{file.inspect}..."
|
150
|
-
src_name = File.join(document.home_dir, file)
|
151
|
-
src_ext = File.extname(file)
|
152
|
-
dest_name = File.basename(document.new_name, ".pdf")
|
153
|
-
dest_full = File.join(destination, "#{dest_name}#{src_ext}")
|
154
|
-
Pdfh.verbose_print " cp #{src_name} --> #{dest_full}"
|
155
|
-
FileUtils.cp(src_name, dest_full) unless Pdfh.dry?
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
# @return [String]
|
160
|
-
def basename_without_ext(file)
|
161
|
-
File.basename(file, File.extname(file))
|
162
|
-
end
|
163
|
-
end
|
164
|
-
end
|
data/lib/pdfh/opt_parser.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "optparse"
|
4
|
-
|
5
|
-
module Pdfh
|
6
|
-
OPT_PARSER = OptionParser.new do |opts|
|
7
|
-
opts.default_argv
|
8
|
-
# Process ARGV
|
9
|
-
opts.banner = "Usage: #{opts.program_name} [options] [file1 ...]"
|
10
|
-
opts.separator ""
|
11
|
-
opts.separator "Specific options:"
|
12
|
-
|
13
|
-
opts.on("-tID", "--type=ID", "Document type id (requires a trailing file list)")
|
14
|
-
opts.on_tail("-T", "--list-types", "List document types in configuration") do
|
15
|
-
settings = Settings.new(Pdfh.search_config_file)
|
16
|
-
ident = 4
|
17
|
-
max_width = settings.document_types.map { |t| t.gid.size }.max
|
18
|
-
puts "#{" " * ident}#{"ID".ljust(max_width)} Type Name"
|
19
|
-
puts "#{" " * ident}#{"-" * max_width} -----------------------"
|
20
|
-
settings.document_types.each do |type|
|
21
|
-
puts "#{" " * ident}#{type.gid.ljust(max_width)} #{type.name.inspect}"
|
22
|
-
end
|
23
|
-
exit
|
24
|
-
rescue SettingsIOError => e
|
25
|
-
Pdfh.error_print(e.message, exit_app: false)
|
26
|
-
Pdfh.create_settings_file
|
27
|
-
exit(1)
|
28
|
-
end
|
29
|
-
opts.on_tail("-V", "--version", "Show version") do
|
30
|
-
puts "#{opts.program_name} v#{Pdfh::VERSION}"
|
31
|
-
exit
|
32
|
-
end
|
33
|
-
opts.on_tail("-h", "--help", "help (this dialog)") do
|
34
|
-
puts opts
|
35
|
-
exit
|
36
|
-
end
|
37
|
-
|
38
|
-
opts.on("-v", "--verbose", "Show more output. Useful for debug")
|
39
|
-
opts.on("-d", "--dry", "Dry run, does not write new pdf")
|
40
|
-
end
|
41
|
-
end
|
data/lib/pdfh/pdf_handler.rb
DELETED
@@ -1,55 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Pdfh
|
4
|
-
# Handles the Pdf document text extraction and password removal
|
5
|
-
# TODO: Replace command utils with this gem
|
6
|
-
# require 'pdf-reader'
|
7
|
-
#
|
8
|
-
# reader = PDF::Reader.new(temp)
|
9
|
-
# reader.pages.each do |page|
|
10
|
-
# @text << page.text
|
11
|
-
# end
|
12
|
-
class PdfHandler
|
13
|
-
attr_reader :file
|
14
|
-
|
15
|
-
# @return [self]
|
16
|
-
def initialize(file, password)
|
17
|
-
@file = file
|
18
|
-
@password_option = password ? "--password=#{password.inspect} " : ""
|
19
|
-
end
|
20
|
-
|
21
|
-
##
|
22
|
-
# Gets the text from the pdf in order to execute
|
23
|
-
# the regular expresion matches
|
24
|
-
# @return [String]
|
25
|
-
def extract_text
|
26
|
-
temp = `mktemp`.chomp
|
27
|
-
Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Extract PDF text"
|
28
|
-
Pdfh.verbose_print " --> #{temp.inspect} temporal file assigned."
|
29
|
-
|
30
|
-
cmd = %(qpdf #{@password_option}--decrypt --stream-data=uncompress #{@file.inspect} #{temp.inspect})
|
31
|
-
Pdfh.verbose_print " DeCrypt Command: #{cmd}"
|
32
|
-
_result = `#{cmd}`
|
33
|
-
|
34
|
-
cmd2 = %(pdftotext -enc UTF-8 #{temp.inspect} -)
|
35
|
-
Pdfh.verbose_print " Extract Command: #{cmd2}"
|
36
|
-
text = `#{cmd2}`
|
37
|
-
Pdfh.verbose_print " Text: #{text.inspect}"
|
38
|
-
text
|
39
|
-
end
|
40
|
-
|
41
|
-
# @return [void]
|
42
|
-
def write_new_pdf(dir_path, full_path)
|
43
|
-
Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Writing PDFs"
|
44
|
-
raise IOError, "Path #{dir_path} not found." unless Dir.exist?(dir_path)
|
45
|
-
|
46
|
-
cmd = %(qpdf #{@password_option}--decrypt #{@file.inspect} #{full_path.inspect})
|
47
|
-
Pdfh.verbose_print " Write PDF Command: #{cmd}"
|
48
|
-
|
49
|
-
return if Pdfh.dry?
|
50
|
-
|
51
|
-
_result = `#{cmd}`
|
52
|
-
raise IOError, "New PDF file #{full_path.inspect} was not created." unless File.file?(full_path)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
File without changes
|
File without changes
|