pdfh 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +2 -0
- data/.rubocop_todo.yml +15 -120
- data/.ruby-version +1 -1
- data/CHANGELOG.md +10 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +20 -17
- data/README.md +12 -5
- data/Rakefile +3 -0
- data/bin/console +3 -0
- data/bin/run +8 -0
- data/exe/pdfh +15 -32
- data/lib/ext/string.rb +5 -9
- data/lib/pdfh.rb +113 -76
- data/lib/pdfh/document.rb +53 -141
- data/lib/pdfh/document_period.rb +32 -0
- data/lib/pdfh/document_processor.rb +163 -0
- data/lib/pdfh/document_type.rb +43 -0
- data/lib/pdfh/month.rb +42 -32
- data/lib/pdfh/opt_parser.rb +41 -0
- data/lib/pdfh/pdf_handler.rb +19 -18
- data/lib/pdfh/settings.rb +31 -39
- data/lib/pdfh/settings_template.rb +21 -0
- data/lib/pdfh/version.rb +1 -1
- data/pdfh.gemspec +14 -18
- metadata +12 -7
- data/lib/pdfh/utils.rb +0 -42
data/lib/ext/string.rb
CHANGED
@@ -1,13 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
refine String do
|
9
|
-
def titleize
|
10
|
-
split.map(&:capitalize).join(" ")
|
11
|
-
end
|
3
|
+
# Adds :titleize to string Class
|
4
|
+
class String
|
5
|
+
# @return [String]
|
6
|
+
def titleize
|
7
|
+
split.map(&:capitalize).join(" ")
|
12
8
|
end
|
13
9
|
end
|
data/lib/pdfh.rb
CHANGED
@@ -1,97 +1,134 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "ext/string"
|
4
|
+
require "colorize"
|
5
|
+
|
3
6
|
require "pdfh/version"
|
4
|
-
require "pdfh/
|
7
|
+
require "pdfh/document_period"
|
8
|
+
require "pdfh/document_type"
|
5
9
|
require "pdfh/document"
|
6
|
-
require "pdfh/
|
10
|
+
require "pdfh/month"
|
11
|
+
require "pdfh/opt_parser"
|
12
|
+
require "pdfh/pdf_handler"
|
13
|
+
require "pdfh/settings"
|
14
|
+
require "pdfh/settings_template"
|
15
|
+
require "pdfh/document_processor"
|
7
16
|
|
8
|
-
##
|
9
17
|
# Gem entry point
|
10
18
|
module Pdfh
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
process_directory(work_directory)
|
19
|
+
# Settings not found
|
20
|
+
class SettingsIOError < StandardError; end
|
21
|
+
|
22
|
+
# Regular Date Error, when there is not match
|
23
|
+
class ReDateError < StandardError
|
24
|
+
def initialize(msg = "No data matched your date regular expression")
|
25
|
+
super
|
19
26
|
end
|
20
|
-
rescue StandardError => e
|
21
|
-
print_error e
|
22
27
|
end
|
23
28
|
|
24
|
-
|
25
|
-
|
26
|
-
names_to_look = ["#{name}.yml", "#{name}.yaml"]
|
27
|
-
dir_order = [Dir.pwd, File.expand_path("~")]
|
29
|
+
class << self
|
30
|
+
attr_writer :verbose, :dry
|
28
31
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
return f if File.file?(f)
|
33
|
-
end
|
32
|
+
# @return [Boolean]
|
33
|
+
def verbose?
|
34
|
+
@verbose
|
34
35
|
end
|
35
36
|
|
36
|
-
|
37
|
-
|
38
|
-
|
37
|
+
# @return [Boolean]
|
38
|
+
def dry?
|
39
|
+
@dry
|
40
|
+
end
|
39
41
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
Dir["#{work_directory}/*.pdf"].each do |pdf_file|
|
46
|
-
type = @settings.match_doc_type(pdf_file)
|
47
|
-
if type
|
48
|
-
process_document(pdf_file, type)
|
49
|
-
else
|
50
|
-
ignored_files << basename_without_ext(pdf_file)
|
51
|
-
end
|
42
|
+
# Returns rows, cols
|
43
|
+
# TODO: review https://gist.github.com/nixpulvis/6025433
|
44
|
+
# @return [Array<Integer, Integer>]
|
45
|
+
def console_size
|
46
|
+
`stty size`.split.map(&:to_i)
|
52
47
|
end
|
53
48
|
|
54
|
-
|
55
|
-
|
56
|
-
|
49
|
+
# Prints visual separator in shell for easier reading for humans
|
50
|
+
# @example output
|
51
|
+
# [Title Text] -----------------------
|
52
|
+
# @param msg [String]
|
53
|
+
# @return [void]
|
54
|
+
def headline(msg)
|
55
|
+
_, cols = console_size
|
56
|
+
line_length = cols - (msg.size + 5)
|
57
|
+
left = "\033[31m#{"—" * 3}\033[0m"
|
58
|
+
right = "\033[31m#{"—" * line_length}\033[0m"
|
59
|
+
puts "\n#{left} \033[1;34m#{msg}\033[0m #{right}"
|
60
|
+
end
|
57
61
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def self.process_document(file, type)
|
64
|
-
puts "Working on #{basename_without_ext(file).colorize(:light_green)}"
|
65
|
-
pad = 12
|
66
|
-
print_ident "Type", type.name, :light_blue, width: pad
|
67
|
-
doc = Document.new(file, type)
|
68
|
-
print_ident "Sub-Type", doc.sub_type, :light_blue, width: pad
|
69
|
-
print_ident "Period", doc.period, :light_blue, width: pad
|
70
|
-
print_ident "New Name", doc.new_name, :light_blue, width: pad
|
71
|
-
print_ident "Store Path", doc.store_path, :light_blue, width: pad
|
72
|
-
print_ident "Other files", doc.companion_files(join: true), :light_blue, width: pad
|
73
|
-
print_ident "Print CMD", doc.print_cmd, :light_blue, width: pad
|
74
|
-
doc.write_pdf(@settings.base_path)
|
75
|
-
rescue StandardError => e
|
76
|
-
puts " Doc Error: #{e.message}".colorize(:red)
|
77
|
-
end
|
78
|
-
# rubocop:enable Metrics/AbcSize
|
79
|
-
|
80
|
-
def self.print_separator(title)
|
81
|
-
_rows, cols = `stty size`.split.map(&:to_i)
|
82
|
-
sep = "\n#{"-" * 40} #{title} "
|
83
|
-
remaining_cols = cols - sep.size
|
84
|
-
sep += "-" * remaining_cols if remaining_cols.positive?
|
85
|
-
puts sep.colorize(:light_yellow)
|
86
|
-
end
|
62
|
+
# @param msg [Object]
|
63
|
+
# @return [void]
|
64
|
+
def verbose_print(msg = nil)
|
65
|
+
puts msg.to_s.colorize(:cyan) if verbose?
|
66
|
+
end
|
87
67
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
68
|
+
# @param message [String]
|
69
|
+
# @param exit_app [Boolean] exit application if true (default)
|
70
|
+
# @return [void]
|
71
|
+
def error_print(message, exit_app: true)
|
72
|
+
puts "Error, #{message}".colorize(:red)
|
73
|
+
exit 1 if exit_app
|
74
|
+
end
|
93
75
|
|
94
|
-
|
95
|
-
|
76
|
+
# @param message [String]
|
77
|
+
# @return [void]
|
78
|
+
def warn_print(message)
|
79
|
+
puts message.colorize(:yellow)
|
80
|
+
end
|
81
|
+
|
82
|
+
# @return [void]
|
83
|
+
def ident_print(field, value, color: :green, width: 3)
|
84
|
+
field_str = field.to_s.rjust(width)
|
85
|
+
value_str = value.colorize(color)
|
86
|
+
puts "#{" " * 4}#{field_str}: #{value_str}"
|
87
|
+
end
|
88
|
+
|
89
|
+
# @return [Hash]
|
90
|
+
def parse_argv
|
91
|
+
options = {}
|
92
|
+
OPT_PARSER.parse!(into: options)
|
93
|
+
options[:files] = ARGV if ARGV.any?
|
94
|
+
options.transform_keys { |key| key.to_s.tr("-", "_").to_sym }
|
95
|
+
rescue OptionParser::InvalidOption => e
|
96
|
+
error_print e.message, exit_app: false
|
97
|
+
puts OPT_PARSER.help
|
98
|
+
exit 1
|
99
|
+
end
|
100
|
+
|
101
|
+
# @return [String]
|
102
|
+
def config_file_name
|
103
|
+
File.basename($PROGRAM_NAME)
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [void]
|
107
|
+
def create_settings_file
|
108
|
+
full_path = File.join(File.expand_path("~"), "#{config_file_name}.yml")
|
109
|
+
return if File.exist?(full_path) # double check
|
110
|
+
|
111
|
+
File.open(full_path, "w") do |f|
|
112
|
+
f.write Pdfh::SETTINGS_TEMPLATE.to_yaml
|
113
|
+
end
|
114
|
+
puts "Settings #{full_path.inspect.colorize(:green)} was created."
|
115
|
+
end
|
116
|
+
|
117
|
+
# @raise [SettingsIOError] if no file is found
|
118
|
+
# @return [String]
|
119
|
+
def search_config_file
|
120
|
+
names_to_look = %W[#{config_file_name}.yml #{config_file_name}.yaml]
|
121
|
+
dir_order = [Dir.pwd, File.expand_path("~")]
|
122
|
+
|
123
|
+
dir_order.each do |dir|
|
124
|
+
names_to_look.each do |file|
|
125
|
+
path = File.join(dir, file)
|
126
|
+
return path if File.exist?(path)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
raise SettingsIOError, "no configuration file (#{names_to_look.join(" or ")}) was found\n"\
|
131
|
+
" within paths: #{dir_order.join(", ")}"
|
132
|
+
end
|
96
133
|
end
|
97
134
|
end
|
data/lib/pdfh/document.rb
CHANGED
@@ -1,107 +1,64 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "fileutils"
|
4
|
-
require "pdfh/month"
|
5
|
-
require "pdfh/pdf_handler"
|
6
|
-
require "ext/string"
|
7
|
-
|
8
|
-
##
|
9
|
-
# main module
|
10
3
|
module Pdfh
|
11
|
-
using Extensions
|
12
|
-
|
13
|
-
##
|
14
|
-
# Regular Date Error, when there is not match
|
15
|
-
class ReDateError < StandardError
|
16
|
-
def initialize(message = "No data matched your date regular expression")
|
17
|
-
super(message)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
##
|
22
4
|
# Handles the PDF detected by the rules
|
23
5
|
class Document
|
24
|
-
attr_reader :text, :type, :file, :extra
|
6
|
+
attr_reader :text, :type, :file, :extra, :pdf_doc
|
25
7
|
|
26
|
-
|
8
|
+
# @param file [String]
|
9
|
+
# @param type [DocumentType]
|
10
|
+
# @return [self]
|
11
|
+
def initialize(file, type)
|
27
12
|
raise IOError, "File #{file} not found" unless File.exist?(file)
|
28
13
|
|
29
14
|
@file = file
|
30
15
|
@type = type
|
31
|
-
|
32
|
-
@
|
33
|
-
|
34
|
-
@pdf_doc = PdfHandler.new(file, @type.pwd)
|
35
|
-
|
36
|
-
Verbose.print "=== Type: #{type_name} =================="
|
16
|
+
Pdfh.verbose_print "=== Type: #{type.name} =============================="
|
17
|
+
@pdf_doc = PdfHandler.new(file, type.pwd)
|
37
18
|
@text = @pdf_doc.extract_text
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
find_companion_files
|
43
|
-
end
|
44
|
-
|
45
|
-
def write_pdf(base_path)
|
46
|
-
full_path = File.join(base_path, store_path, new_name)
|
47
|
-
dir_path = File.join(base_path, store_path)
|
48
|
-
|
49
|
-
FileUtils.mkdir_p(dir_path) unless File.exist?(dir_path)
|
50
|
-
|
51
|
-
@pdf_doc.write_pdf(dir_path, full_path)
|
52
|
-
|
53
|
-
return if Dry.active?
|
19
|
+
Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Finding a subtype"
|
20
|
+
@sub_type = type.sub_type(@text)
|
21
|
+
Pdfh.verbose_print " SubType: #{@sub_type}"
|
22
|
+
@companion = search_companion_files
|
54
23
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
def period
|
60
|
-
formated_month = month.to_s.rjust(2, "0")
|
61
|
-
"#{year}-#{formated_month}"
|
62
|
-
end
|
63
|
-
|
64
|
-
def month
|
65
|
-
month = Month.normalize(@month) + @month_offset
|
66
|
-
|
67
|
-
case month
|
68
|
-
when 0
|
69
|
-
@year_offset = -1
|
70
|
-
12
|
71
|
-
when 13
|
72
|
-
@year_offset = 1
|
73
|
-
1
|
74
|
-
else month
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def year
|
79
|
-
tmp = @year.size == 2 ? "20#{@year}" : @year
|
80
|
-
tmp.to_i + @year_offset
|
24
|
+
month, year, @extra = match_data
|
25
|
+
@period = DocumentPeriod.new(day: extra, month: month, month_offset: @sub_type&.month_offset, year: year)
|
26
|
+
Pdfh.verbose_print " Period: #{@period.inspect}"
|
81
27
|
end
|
82
28
|
|
29
|
+
# @return [String]
|
83
30
|
def file_name_only
|
84
31
|
File.basename(@file, File.extname(@file))
|
85
32
|
end
|
86
33
|
|
34
|
+
# @return [String]
|
87
35
|
def file_name
|
88
36
|
File.basename(@file)
|
89
37
|
end
|
90
38
|
|
39
|
+
# @return [String]
|
91
40
|
def backup_name
|
92
41
|
"#{file_name}.bkp"
|
93
42
|
end
|
94
43
|
|
44
|
+
# @return [String]
|
95
45
|
def type_name
|
96
|
-
@type
|
46
|
+
@type&.name&.titleize || "N/A"
|
97
47
|
end
|
98
48
|
|
49
|
+
# @return [String]
|
99
50
|
def sub_type
|
100
|
-
@sub_type
|
51
|
+
@sub_type&.name&.titleize || "N/A"
|
52
|
+
end
|
53
|
+
|
54
|
+
# @return [String]
|
55
|
+
def period
|
56
|
+
@period.to_s
|
101
57
|
end
|
102
58
|
|
59
|
+
# @return [String]
|
103
60
|
def new_name
|
104
|
-
template = @type.
|
61
|
+
template = @type.name_template
|
105
62
|
new_name = template
|
106
63
|
.sub("{original}", file_name_only)
|
107
64
|
.sub("{period}", period)
|
@@ -111,10 +68,12 @@ module Pdfh
|
|
111
68
|
"#{new_name}.pdf"
|
112
69
|
end
|
113
70
|
|
71
|
+
# @return [String]
|
114
72
|
def store_path
|
115
|
-
@type.store_path.gsub(
|
73
|
+
@type.store_path.gsub("{YEAR}", @period.year.to_s)
|
116
74
|
end
|
117
75
|
|
76
|
+
# @return [String]
|
118
77
|
def print_cmd
|
119
78
|
return "N/A" if type.print_cmd.nil? || type.print_cmd.empty?
|
120
79
|
|
@@ -122,62 +81,35 @@ module Pdfh
|
|
122
81
|
"#{type.print_cmd} #{relative_path}"
|
123
82
|
end
|
124
83
|
|
125
|
-
def to_s
|
126
|
-
<<~STR
|
127
|
-
Name: #{file_name_only}
|
128
|
-
Type: #{type_name}
|
129
|
-
Sub Type: #{sub_type}
|
130
|
-
Period: #{period}
|
131
|
-
File Path: #{file}
|
132
|
-
File Name: #{file_name}
|
133
|
-
New Name: #{new_name}
|
134
|
-
StorePath: #{store_path}
|
135
|
-
Companion: #{companion_files(join: true)}
|
136
|
-
Print Cmd: #{print_cmd}
|
137
|
-
STR
|
138
|
-
end
|
139
|
-
|
140
84
|
def companion_files(join: false)
|
141
|
-
@companion unless join
|
85
|
+
return @companion unless join
|
142
86
|
|
143
87
|
@companion.empty? ? "N/A" : @companion.join(", ")
|
144
88
|
end
|
145
89
|
|
146
|
-
|
147
|
-
|
148
|
-
##
|
149
|
-
# @param sub_types [Array]
|
150
|
-
# @return [OpenStruct]
|
151
|
-
def extract_subtype(sub_types)
|
152
|
-
return nil if sub_types.nil? || sub_types.empty?
|
153
|
-
|
154
|
-
sub_types.each do |st|
|
155
|
-
is_matched = Regexp.new(st["name"]).match?(@text)
|
156
|
-
next unless is_matched
|
157
|
-
|
158
|
-
sub = OpenStruct.new(st)
|
159
|
-
@month_offset = sub.month_offset || 0
|
160
|
-
return sub
|
161
|
-
end
|
162
|
-
nil
|
163
|
-
end
|
164
|
-
|
90
|
+
# @return [String]
|
165
91
|
def home_dir
|
166
92
|
File.dirname(@file)
|
167
93
|
end
|
168
94
|
|
169
|
-
|
95
|
+
# @return [String]
|
96
|
+
def to_s
|
97
|
+
@file
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
|
170
102
|
# named matches can appear in any order with names 'd', 'm' and 'y'
|
171
|
-
#
|
103
|
+
# unnamed matches needs to be in order month, year
|
172
104
|
# @return [Array] - format [month, year, day]
|
173
105
|
def match_data
|
174
|
-
|
175
|
-
|
176
|
-
|
106
|
+
Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ RegEx"
|
107
|
+
Pdfh.verbose_print " Using regex: #{@type.re_date}"
|
108
|
+
Pdfh.verbose_print " named: #{@type.re_date.named_captures}"
|
177
109
|
matched = @type.re_date.match(@text)
|
178
110
|
raise ReDateError unless matched
|
179
111
|
|
180
|
-
|
112
|
+
Pdfh.verbose_print " captured: #{matched.captures}"
|
181
113
|
|
182
114
|
return matched.captures.map(&:downcase) if @type.re_date.named_captures.empty?
|
183
115
|
|
@@ -185,36 +117,16 @@ module Pdfh
|
|
185
117
|
[matched[:m].downcase, matched[:y], extra]
|
186
118
|
end
|
187
119
|
|
188
|
-
|
189
|
-
|
190
|
-
|
120
|
+
# @return [Array]
|
121
|
+
def search_companion_files
|
122
|
+
Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Searching Companion files"
|
123
|
+
Pdfh.verbose_print " Searching on: #{home_dir.inspect}"
|
191
124
|
Dir.chdir(home_dir) do
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
def find_companion_files
|
197
|
-
Verbose.print "~~~~~~~~~~~~~~~~~~ Searching Companion files"
|
198
|
-
Verbose.print " Working on dir: #{home_dir}"
|
199
|
-
Dir.chdir(home_dir) do
|
200
|
-
all_files = Dir["#{file_name_only}.*"]
|
201
|
-
companion = all_files.reject { |file| file.include? "pdf" }
|
202
|
-
Verbose.print " - #{companion.join(", ")}"
|
203
|
-
|
204
|
-
@companion = companion || []
|
205
|
-
end
|
206
|
-
end
|
125
|
+
files_matching = Dir["#{file_name_only}.*"]
|
126
|
+
companion = files_matching.reject { |file| file.include? ".pdf" }
|
127
|
+
Pdfh.verbose_print " Found: #{companion.inspect}"
|
207
128
|
|
208
|
-
|
209
|
-
Verbose.print "~~~~~~~~~~~~~~~~~~ Writing Companion files"
|
210
|
-
@companion.each do |file|
|
211
|
-
Verbose.print " Working on #{file}..."
|
212
|
-
src_name = File.join(home_dir, file)
|
213
|
-
src_ext = File.extname(file)
|
214
|
-
dest_name = File.basename(new_name, ".pdf")
|
215
|
-
dest_full = File.join(destination, "#{dest_name}#{src_ext}")
|
216
|
-
Verbose.print " cp #{src_name} --> #{dest_full}"
|
217
|
-
FileUtils.cp(src_name, dest_full)
|
129
|
+
companion
|
218
130
|
end
|
219
131
|
end
|
220
132
|
end
|