pdfh 0.1.4 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,8 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'bundler/gem_tasks'
4
- require 'rspec/core/rake_task'
3
+ require "colorize"
4
+ require "bundler/gem_tasks"
5
+ require "rspec/core/rake_task"
6
+ require "versionomy"
5
7
 
6
8
  RSpec::Core::RakeTask.new(:spec)
7
9
 
8
10
  task default: :spec
11
+
12
+ desc "Bump gem version number (tiny|minor|major)"
13
+ task :bump, :type do |_t, args|
14
+ args.with_defaults(type: :tiny)
15
+ version_file = File.join(__dir__, "lib", "pdfh", "version.rb")
16
+ content = File.read(version_file)
17
+
18
+ version_pattern = /(?<major>\d+)\.(?<minor>\d+)\.(?<tiny>\d+)/
19
+ current_version = content.match(version_pattern)
20
+ next_version = Versionomy.parse(current_version.to_s).bump(args.type).to_s
21
+
22
+ File.write(version_file, content.gsub(version_pattern, "\\1#{next_version}\\3"))
23
+
24
+ puts "Successfully bumped from #{current_version.to_s.red} to #{next_version.green}"
25
+ end
data/bin/console CHANGED
@@ -1,15 +1,15 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- require 'bundler/setup'
5
- require 'pdfh'
4
+ require "bundler/setup"
5
+ require "pdfh"
6
6
 
7
7
  # You can add fixtures and/or initialization code here to make experimenting
8
8
  # with your gem easier. You can also use a different console, if you like.
9
9
 
10
10
  # (If you use this, don't forget to add pry to your Gemfile!)
11
- # require 'pry'
12
- # Pry.start
11
+ require "pry"
12
+ Pry.start
13
13
 
14
- require 'irb'
15
- IRB.start(__FILE__)
14
+ # require "irb"
15
+ # IRB.start(__FILE__)
data/exe/pdfh CHANGED
@@ -1,21 +1,22 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- require 'optparse'
5
- require 'pdfh'
6
- require 'pdfh/version'
4
+ require "optparse"
5
+ require "pdfh"
6
+ require "pdfh/version"
7
+ require "pdfh/utils"
7
8
 
8
9
  options = {}
9
10
  opt = OptionParser.new do |opts|
10
11
  # Process ARGV
11
12
  opts.banner = "Usage: #{opts.program_name} [options]"
12
- opts.on('-v', '--verbose', 'Show more output, useful for debug') { |o| options[:verbose] = o }
13
- opts.on('-d', '--dry', 'Dry run, does not write new pdf') { |o| options[:dry] = o }
14
- opts.on_tail('-V', '--version', 'Show version') do
13
+ opts.on("-v", "--verbose", "Show more output, useful for debug") { |o| options[:verbose] = o }
14
+ opts.on("-d", "--dry", "Dry run, does not write new pdf") { |o| options[:dry] = o }
15
+ opts.on_tail("-V", "--version", "Show version") do
15
16
  puts "#{opts.program_name} v#{Pdfh::VERSION}"
16
17
  exit
17
18
  end
18
- opts.on_tail('-h', '--help', 'Show this message') do
19
+ opts.on_tail("-h", "--help", "Show this message") do
19
20
  puts opts
20
21
  exit
21
22
  end
@@ -24,9 +25,19 @@ end
24
25
  begin
25
26
  opt.parse!
26
27
  rescue OptionParser::InvalidOption => e
27
- puts "Error, #{e.message}".colorize(:red)
28
+ Pdfh.print_error e, exit_app: false
28
29
  puts opt
29
30
  exit 1
30
31
  end
31
32
 
33
+ def validate_installed(app)
34
+ require "open3"
35
+ _stdout, _stderr, status = Open3.capture3("command -v #{app}")
36
+ puts "Missing #{app} command." unless status.success?
37
+
38
+ status.success?
39
+ end
40
+
41
+ exit(1) unless validate_installed("qpdf") && validate_installed("pdftotext")
42
+
32
43
  Pdfh.main(options)
data/lib/ext/string.rb ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Extends String class when required
5
+ module Extensions
6
+ ##
7
+ # Adds new functionality to string Class
8
+ refine String do
9
+ def titleize
10
+ split.map(&:capitalize).join(" ")
11
+ end
12
+ end
13
+ end
data/lib/pdfh.rb CHANGED
@@ -1,9 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'pdfh/version'
4
- require 'pdfh/settings'
5
- require 'pdfh/document'
6
- require 'pdfh/utils'
3
+ require "pdfh/version"
4
+ require "pdfh/settings"
5
+ require "pdfh/document"
6
+ require "pdfh/utils"
7
7
 
8
8
  ##
9
9
  # Gem entry point
@@ -12,68 +12,86 @@ module Pdfh
12
12
  Verbose.active = options[:verbose]
13
13
  Dry.active = options[:dry]
14
14
 
15
- settings = Settings.new(search_config_file)
15
+ @settings = Settings.new(search_config_file)
16
+ puts "Destination path: #{@settings.base_path.light_blue}"
17
+ @settings.scrape_dirs.each do |work_directory|
18
+ process_directory(work_directory)
19
+ end
20
+ rescue StandardError => e
21
+ print_error e
22
+ end
16
23
 
17
- settings.scrape_dirs.each do |work_directory|
18
- print_separator(work_directory)
19
- ignored_files = []
20
- Dir["#{work_directory}/*.pdf"].each do |file|
21
- base_name = File.basename(file, File.extname(file))
22
- type = settings.match_doc_type(file)
23
- unless type
24
- ignored_files << base_name
25
- next
26
- end
24
+ def self.search_config_file
25
+ name = File.basename($PROGRAM_NAME)
26
+ names_to_look = ["#{name}.yml", "#{name}.yaml"]
27
+ dir_order = [Dir.pwd, File.expand_path("~")]
27
28
 
28
- pad = 12
29
- puts "Working on #{base_name.colorize(:light_green)}"
30
- print_ident 'Type', type.name, :light_blue, width: pad
31
- doc = Document.new(file, type)
32
- print_ident 'Sub-Type', doc.sub_type, :light_blue, width: pad
33
- print_ident 'Period', doc.period, :light_blue, width: pad
34
- print_ident 'New Name', doc.new_name, :light_blue, width: pad
35
- print_ident 'Store Path', doc.store_path, :light_blue, width: pad
36
- print_ident 'Other files', doc.companion_files(join: true), :light_blue, width: pad
37
- doc.write_pdf(settings.base_path)
29
+ dir_order.each do |dir|
30
+ names_to_look.each do |file|
31
+ f = File.join(dir, file)
32
+ return f if File.file?(f)
38
33
  end
34
+ end
39
35
 
40
- puts "\nNo account was matched for these PDF files:" unless ignored_files.empty?
41
- ignored_files.each.with_index(1) do |file, index|
42
- print_ident index, file, :red
36
+ raise StandardError, "no configuraton file (#{names_to_look.join(" or ")}) was found\n"\
37
+ " within paths: #{dir_order.join(", ")}"
38
+ end
39
+
40
+ ##
41
+ # @param [String] work_directory
42
+ def self.process_directory(work_directory)
43
+ print_separator work_directory
44
+ ignored_files = []
45
+ Dir["#{work_directory}/*.pdf"].each do |pdf_file|
46
+ type = @settings.match_doc_type(pdf_file)
47
+ if type
48
+ process_document(pdf_file, type)
49
+ else
50
+ ignored_files << basename_without_ext(pdf_file)
43
51
  end
44
52
  end
53
+
54
+ puts "\nNo account was matched for these PDF files:" unless ignored_files.empty?
55
+ ignored_files.each.with_index(1) { |file, index| print_ident index, file, :magenta }
56
+ end
57
+
58
+ ##
59
+ # Generate document, and process actions
60
+ # @param [String] file
61
+ # @param [Type] type
62
+ # rubocop:disable Metrics/AbcSize
63
+ def self.process_document(file, type)
64
+ puts "Working on #{basename_without_ext(file).colorize(:light_green)}"
65
+ pad = 12
66
+ print_ident "Type", type.name, :light_blue, width: pad
67
+ doc = Document.new(file, type)
68
+ print_ident "Sub-Type", doc.sub_type, :light_blue, width: pad
69
+ print_ident "Period", doc.period, :light_blue, width: pad
70
+ print_ident "New Name", doc.new_name, :light_blue, width: pad
71
+ print_ident "Store Path", doc.store_path, :light_blue, width: pad
72
+ print_ident "Other files", doc.companion_files(join: true), :light_blue, width: pad
73
+ print_ident "Print CMD", doc.print_cmd, :light_blue, width: pad
74
+ doc.write_pdf(@settings.base_path)
45
75
  rescue StandardError => e
46
- line = e.backtrace[0].match(/:(\d+)/)[1]
47
- puts "Error, #{e.message}. #{line}".colorize(:red)
48
- exit 1
76
+ puts " Doc Error: #{e.message}".colorize(:red)
49
77
  end
78
+ # rubocop:enable Metrics/AbcSize
50
79
 
51
80
  def self.print_separator(title)
52
81
  _rows, cols = `stty size`.split.map(&:to_i)
53
- sep = "\n#{'-' * 40} #{title} "
82
+ sep = "\n#{"-" * 40} #{title} "
54
83
  remaining_cols = cols - sep.size
55
- sep += '-' * remaining_cols if remaining_cols.positive?
84
+ sep += "-" * remaining_cols if remaining_cols.positive?
56
85
  puts sep.colorize(:light_yellow)
57
86
  end
58
87
 
59
88
  def self.print_ident(field, value, color = :green, width: 3)
60
89
  field_str = field.to_s.rjust(width)
61
90
  value_str = value.colorize(color)
62
- puts "#{' ' * 4}#{field_str}: #{value_str}"
91
+ puts "#{" " * 4}#{field_str}: #{value_str}"
63
92
  end
64
93
 
65
- def self.search_config_file
66
- name = File.basename($PROGRAM_NAME)
67
- names_to_look = ["#{name}.yml", "#{name}.yaml"]
68
- dir_order = [Dir.pwd, File.expand_path('~')]
69
-
70
- dir_order.each do |dir|
71
- names_to_look.each do |file|
72
- f = File.join(dir, file)
73
- return f if File.file?(f)
74
- end
75
- end
76
-
77
- raise StandardError, "no configuraton file (#{names_to_look.join(' or ')}) was found\n within paths: #{dir_order.join(', ')}"
94
+ def self.basename_without_ext(file)
95
+ File.basename(file, File.extname(file))
78
96
  end
79
97
  end
data/lib/pdfh/document.rb CHANGED
@@ -1,73 +1,77 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'fileutils'
4
- require 'ext/string'
3
+ require "fileutils"
4
+ require "pdfh/month"
5
+ require "pdfh/pdf_handler"
6
+ require "ext/string"
5
7
 
8
+ ##
9
+ # main module
6
10
  module Pdfh
7
- using Pdfh::String
11
+ using Extensions
12
+
13
+ ##
14
+ # Regular Date Error, when there is not match
15
+ class ReDateError < StandardError
16
+ def initialize(message = "No data matched your date regular expression")
17
+ super(message)
18
+ end
19
+ end
8
20
 
9
21
  ##
10
22
  # Handles the PDF detected by the rules
11
- # TODO: Replace command utils with this gem
12
- # require 'pdf-reader'
13
- #
14
- # reader = PDF::Reader.new(temp)
15
- # reader.pages.each do |page|
16
- # @text << page.text
17
- # end
18
23
  class Document
19
- attr_accessor :text, :type, :file, :extra
20
-
21
- MONTHS = {
22
- enero: 1,
23
- febrero: 2,
24
- marzo: 3,
25
- abril: 4,
26
- mayo: 5,
27
- junio: 6,
28
- julio: 7,
29
- agosto: 8,
30
- septiembre: 9,
31
- octubre: 10,
32
- noviembre: 11,
33
- diciembre: 12
34
- }.freeze
24
+ attr_reader :text, :type, :file, :extra
35
25
 
36
26
  def initialize(file, type, _options = {})
27
+ raise IOError, "File #{file} not found" unless File.exist?(file)
28
+
37
29
  @file = file
38
30
  @type = type
39
31
  @month_offset = 0
40
32
  @year_offset = 0
41
33
 
42
- raise IOError, "File #{file} not found" unless File.exist?(file)
34
+ @pdf_doc = PdfHandler.new(file, @type.pwd)
43
35
 
44
36
  Verbose.print "=== Type: #{type_name} =================="
45
- @text = pdf_text
46
- Verbose.print " Text extracted: #{@text}"
37
+ @text = @pdf_doc.extract_text
47
38
  @sub_type = extract_subtype(@type.sub_types)
48
39
  Verbose.print " SubType: #{@sub_type}"
49
40
  @month, @year, @extra = match_data
50
41
  Verbose.print "==== Assigned: #{@month}, #{@year}, #{@extra} ==( Month, Year, Extra )================"
51
- @companion = find_companion_files
42
+ find_companion_files
43
+ end
44
+
45
+ def write_pdf(base_path)
46
+ full_path = File.join(base_path, store_path, new_name)
47
+ dir_path = File.join(base_path, store_path)
48
+
49
+ FileUtils.mkdir_p(dir_path) unless File.exist?(dir_path)
50
+
51
+ @pdf_doc.write_pdf(dir_path, full_path)
52
+
53
+ return if Dry.active?
54
+
55
+ make_document_backup
56
+ copy_companion_files(dir_path)
52
57
  end
53
58
 
54
59
  def period
55
- m = month.to_s.rjust(2, '0')
56
- y = year
57
- "#{y}-#{m}"
60
+ formated_month = month.to_s.rjust(2, "0")
61
+ "#{year}-#{formated_month}"
58
62
  end
59
63
 
60
64
  def month
61
- m = normalize_month + @month_offset
65
+ month = Month.normalize(@month) + @month_offset
62
66
 
63
- case m
64
- when 0 then
67
+ case month
68
+ when 0
65
69
  @year_offset = -1
66
70
  12
67
- when 13 then
71
+ when 13
68
72
  @year_offset = 1
69
73
  1
70
- else m
74
+ else month
71
75
  end
72
76
  end
73
77
 
@@ -89,21 +93,21 @@ module Pdfh
89
93
  end
90
94
 
91
95
  def type_name
92
- @type ? @type.name.titleize : 'N/A'
96
+ @type ? @type.name.titleize : "N/A"
93
97
  end
94
98
 
95
99
  def sub_type
96
- @sub_type ? @sub_type.name.titleize : 'N/A'
100
+ @sub_type ? @sub_type.name.titleize : "N/A"
97
101
  end
98
102
 
99
103
  def new_name
100
- template = @type.to_h.key?(:name_template) ? @type.name_template : '{original}'
104
+ template = @type.to_h.key?(:name_template) ? @type.name_template : "{original}"
101
105
  new_name = template
102
- .gsub(/\{original\}/, file_name_only)
103
- .gsub(/\{period\}/, period)
104
- .gsub(/\{type\}/, type_name)
105
- .gsub(/\{subtype\}/, sub_type)
106
- .gsub(/\{extra\}/, extra || '')
106
+ .sub("{original}", file_name_only)
107
+ .sub("{period}", period)
108
+ .sub("{type}", type_name)
109
+ .sub("{subtype}", sub_type)
110
+ .sub("{extra}", extra || "")
107
111
  "#{new_name}.pdf"
108
112
  end
109
113
 
@@ -111,6 +115,13 @@ module Pdfh
111
115
  @type.store_path.gsub(/\{YEAR\}/, year.to_s)
112
116
  end
113
117
 
118
+ def print_cmd
119
+ return "N/A" if type.print_cmd.nil? || type.print_cmd.empty?
120
+
121
+ relative_path = File.join(store_path, new_name)
122
+ "#{type.print_cmd} #{relative_path}"
123
+ end
124
+
114
125
  def to_s
115
126
  <<~STR
116
127
  Name: #{file_name_only}
@@ -122,49 +133,26 @@ module Pdfh
122
133
  New Name: #{new_name}
123
134
  StorePath: #{store_path}
124
135
  Companion: #{companion_files(join: true)}
136
+ Print Cmd: #{print_cmd}
125
137
  STR
126
138
  end
127
139
 
128
140
  def companion_files(join: false)
129
141
  @companion unless join
130
142
 
131
- @companion.empty? ? 'N/A' : @companion.join(', ')
132
- end
133
-
134
- def write_pdf(base_path)
135
- Verbose.print '~~~~~~~~~~~~~~~~~~ Writing PDFs'
136
- full_path = File.join(base_path, store_path, new_name)
137
- dir_path = File.join(base_path, store_path)
138
-
139
- raise IOError, "Path #{dir_path} was not found." unless Dir.exist?(dir_path)
140
-
141
- password_opt = "--password='#{@type.pwd}'" if @type.pwd
142
- cmd = %(qpdf #{password_opt} --decrypt '#{@file}' '#{full_path}')
143
- Verbose.print " Write pdf command: #{cmd}"
144
-
145
- return if Dry.active?
146
-
147
- _result = `#{cmd}`
148
- raise IOError, "File #{full_path} was not created." unless File.file?(full_path)
149
-
150
- # Making backup of original
151
- Dir.chdir(home_dir) do
152
- File.rename(file, backup_name)
153
- end
154
-
155
- copy_companion_files(dir_path)
143
+ @companion.empty? ? "N/A" : @companion.join(", ")
156
144
  end
157
145
 
158
146
  private
159
147
 
160
148
  ##
161
- # @param [Array] subtypes
149
+ # @param sub_types [Array]
162
150
  # @return [OpenStruct]
163
151
  def extract_subtype(sub_types)
164
152
  return nil if sub_types.nil? || sub_types.empty?
165
153
 
166
154
  sub_types.each do |st|
167
- is_matched = Regexp.new(st['name']).match?(@text)
155
+ is_matched = Regexp.new(st["name"]).match?(@text)
168
156
  next unless is_matched
169
157
 
170
158
  sub = OpenStruct.new(st)
@@ -174,70 +162,56 @@ module Pdfh
174
162
  nil
175
163
  end
176
164
 
177
- def normalize_month
178
- month_num = @month.to_i
179
- return month_num if month_num.positive?
180
-
181
- if @month.size == 3
182
- MONTHS.keys.each do |mon|
183
- return MONTHS[mon] if mon.to_s[0, 3] == @month
184
- end
185
- end
186
-
187
- MONTHS[@month.to_sym]
188
- end
189
-
190
165
  def home_dir
191
166
  File.dirname(@file)
192
167
  end
193
168
 
194
- def pdf_text
195
- temp = `mktemp`.chomp
196
- Verbose.print " --> #{temp} temporal file assigned."
197
-
198
- password_opt = "--password='#{@type.pwd}'" if @type.pwd
199
- cmd = %(qpdf #{password_opt} --decrypt --stream-data=uncompress '#{@file}' '#{temp}')
200
- Verbose.print " Command: #{cmd}"
201
- _result = `#{cmd}`
202
- Verbose.print 'Password removed.'
203
-
204
- cmd2 = %(pdftotext -enc UTF-8 '#{temp}' -)
205
- Verbose.print " Command: #{cmd2}"
206
- `#{cmd2}`
207
- end
208
-
169
+ ##
170
+ # named matches can appear in any order with names 'd', 'm' and 'y'
171
+ # unamed matches needs to be in order month, year
172
+ # @return [Array] - format [month, year, day]
209
173
  def match_data
210
- Verbose.print '~~~~~~~~~~~~~~~~~~ RegEx'
174
+ Verbose.print "~~~~~~~~~~~~~~~~~~ RegEx"
211
175
  Verbose.print " Using regex: #{@type.re_date}"
212
176
  Verbose.print " named: #{@type.re_date.named_captures}"
213
177
  matched = @type.re_date.match(@text)
178
+ raise ReDateError unless matched
179
+
214
180
  Verbose.print " captured: #{matched.captures}"
215
181
 
216
182
  return matched.captures.map(&:downcase) if @type.re_date.named_captures.empty?
217
183
 
218
- extra = matched.captures.size > 2 ? matched[3] : nil
184
+ extra = matched.captures.size > 2 ? matched[:d] : nil
219
185
  [matched[:m].downcase, matched[:y], extra]
220
186
  end
221
187
 
188
+ ##
189
+ # Create a backup of original document
190
+ def make_document_backup
191
+ Dir.chdir(home_dir) do
192
+ File.rename(file, backup_name)
193
+ end
194
+ end
195
+
222
196
  def find_companion_files
223
- Verbose.print '~~~~~~~~~~~~~~~~~~ Searching Companion files'
197
+ Verbose.print "~~~~~~~~~~~~~~~~~~ Searching Companion files"
224
198
  Verbose.print " Working on dir: #{home_dir}"
225
199
  Dir.chdir(home_dir) do
226
200
  all_files = Dir["#{file_name_only}.*"]
227
- companion = all_files.reject { |f| f.include? 'pdf' }
228
- Verbose.print " - #{companion.join(', ')}"
201
+ companion = all_files.reject { |file| file.include? "pdf" }
202
+ Verbose.print " - #{companion.join(", ")}"
229
203
 
230
- companion || []
204
+ @companion = companion || []
231
205
  end
232
206
  end
233
207
 
234
208
  def copy_companion_files(destination)
235
- Verbose.print '~~~~~~~~~~~~~~~~~~ Writing Companion files'
209
+ Verbose.print "~~~~~~~~~~~~~~~~~~ Writing Companion files"
236
210
  @companion.each do |file|
237
211
  Verbose.print " Working on #{file}..."
238
212
  src_name = File.join(home_dir, file)
239
213
  src_ext = File.extname(file)
240
- dest_name = File.basename(new_name, '.pdf')
214
+ dest_name = File.basename(new_name, ".pdf")
241
215
  dest_full = File.join(destination, "#{dest_name}#{src_ext}")
242
216
  Verbose.print " cp #{src_name} --> #{dest_full}"
243
217
  FileUtils.cp(src_name, dest_full)