pdfh 0.1.4 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/pdfh/month.rb ADDED
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pdfh
4
+ ##
5
+ # Handles Month convertions
6
+ class Month
7
+ MONTHS = {
8
+ enero: 1,
9
+ febrero: 2,
10
+ marzo: 3,
11
+ abril: 4,
12
+ mayo: 5,
13
+ junio: 6,
14
+ julio: 7,
15
+ agosto: 8,
16
+ septiembre: 9,
17
+ octubre: 10,
18
+ noviembre: 11,
19
+ diciembre: 12
20
+ }.freeze
21
+
22
+ ##
23
+ # @param [String] month
24
+ # @return [Integer]
25
+ def self.normalize(month)
26
+ # When param is a number
27
+ month_num = month.to_i
28
+ return month_num if month_num.between?(1, 12) # (1..12).include?(month_num)
29
+
30
+ # When param is a 3 char month: 'mar', 'nov'
31
+ if month.size == 3
32
+ MONTHS.each_key do |mon|
33
+ return MONTHS[mon] if mon.to_s[0, 3] == month
34
+ end
35
+ end
36
+
37
+ # When param has a direct match
38
+ MONTHS[month.to_sym]
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pdfh
4
+ ##
5
+ # Handles the Pdf document text extraction and password removal
6
+ # TODO: Replace command utils with this gem
7
+ # require 'pdf-reader'
8
+ #
9
+ # reader = PDF::Reader.new(temp)
10
+ # reader.pages.each do |page|
11
+ # @text << page.text
12
+ # end
13
+ class PdfHandler
14
+ attr_reader :file, :password
15
+
16
+ def initialize(file, password)
17
+ @file = file
18
+ @password = password
19
+ end
20
+
21
+ ##
22
+ # Gets the text from the pdf in order to execute
23
+ # the regular expresiom matches
24
+ def extract_text
25
+ temp = `mktemp`.chomp
26
+ Verbose.print " --> #{temp} temporal file assigned."
27
+
28
+ password_opt = "--password='#{@password}'" if @password
29
+ cmd = %(qpdf #{password_opt} --decrypt --stream-data=uncompress '#{@file}' '#{temp}')
30
+ Verbose.print " Command: #{cmd}"
31
+ _result = `#{cmd}`
32
+
33
+ cmd2 = %(pdftotext -enc UTF-8 '#{temp}' -)
34
+ Verbose.print " Command: #{cmd2}"
35
+ text = `#{cmd2}`
36
+ Verbose.print " Text extracted: #{text}"
37
+ text
38
+ end
39
+
40
+ def write_pdf(dir_path, full_path)
41
+ Verbose.print "~~~~~~~~~~~~~~~~~~ Writing PDFs"
42
+ raise IOError, "Path #{dir_path} not found." unless Dir.exist?(dir_path)
43
+
44
+ password_opt = "--password='#{@password}'" if @password
45
+ cmd = %(qpdf #{password_opt} --decrypt '#{@file}' '#{full_path}')
46
+ Verbose.print " Write pdf command: #{cmd}"
47
+
48
+ return if Dry.active?
49
+
50
+ _result = `#{cmd}`
51
+ raise IOError, "File #{full_path} was not created." unless File.file?(full_path)
52
+ end
53
+ end
54
+ end
data/lib/pdfh/settings.rb CHANGED
@@ -1,27 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'yaml'
4
- require 'ostruct'
5
- require 'base64'
3
+ require "yaml"
4
+ require "ostruct"
5
+ require "base64"
6
6
 
7
7
  module Pdfh
8
8
  ##
9
9
  # Handles the config yaml data mapping, and associates a file name with a doc type
10
10
  class Settings
11
- attr_accessor :scrape_dirs, :base_path, :document_types
11
+ attr_reader :scrape_dirs, :base_path, :document_types
12
12
 
13
13
  def initialize(file)
14
14
  file_hash = YAML.load_file(file)
15
15
  Verbose.print "Loaded configuration file: #{file}"
16
16
 
17
- self.scrape_dirs = file_hash['scrape_dirs'].map do |d|
18
- File.expand_path(d)
19
- end
20
- self.base_path = File.expand_path(file_hash['base_path'])
21
- self.document_types = process_doc_types(file_hash['document_types'])
17
+ @scrape_dirs = process_scrape_dirs(file_hash["scrape_dirs"])
18
+ @base_path = File.expand_path(file_hash["base_path"])
19
+ @document_types = process_doc_types(file_hash["document_types"])
22
20
 
23
- Verbose.print 'Processing directories:'
24
- scrape_dirs.each { |d| Verbose.print " - #{d}" }
21
+ Verbose.print "Processing directories:"
22
+ scrape_dirs.each { |dir| Verbose.print " - #{dir}" }
25
23
  Verbose.print
26
24
  end
27
25
 
@@ -38,6 +36,18 @@ module Pdfh
38
36
 
39
37
  private
40
38
 
39
+ def process_scrape_dirs(scrape_dirs_list)
40
+ scrape_dirs_list.map do |dir|
41
+ expanded = File.expand_path(dir)
42
+ dir_exists = File.directory?(expanded)
43
+ if dir_exists
44
+ expanded
45
+ else
46
+ Verbose.print " ** Directory #{dir} does not exists."
47
+ end
48
+ end.compact
49
+ end
50
+
41
51
  def process_doc_types(doc_types)
42
52
  doc_types.map do |x|
43
53
  object = OpenStruct.new(x)
data/lib/pdfh/utils.rb CHANGED
@@ -1,10 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'colorize'
3
+ require "colorize"
4
4
 
5
+ # Contains all generic short functionality
5
6
  module Pdfh
6
- class Error < StandardError; end
7
-
8
7
  ##
9
8
  # Keeps Verbose option in whole project
10
9
  class Verbose
@@ -16,7 +15,7 @@ module Pdfh
16
15
  @active
17
16
  end
18
17
 
19
- def print(msg = '')
18
+ def print(msg = "")
20
19
  puts msg.colorize(:cyan) if active?
21
20
  end
22
21
  end
@@ -34,4 +33,10 @@ module Pdfh
34
33
  end
35
34
  end
36
35
  end
36
+
37
+ def self.print_error(exception, exit_app: true)
38
+ line = exception.backtrace[0].match(/:(?<line>\d+)/)[:line]
39
+ puts "Error, Line[#{line}]: #{exception.message}.".colorize(:red)
40
+ exit 1 if exit_app
41
+ end
37
42
  end
data/lib/pdfh/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Pdfh
4
- VERSION = '0.1.4'
4
+ VERSION = "0.1.9"
5
5
  end
data/pdfh.gemspec CHANGED
@@ -1,48 +1,43 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- lib = File.expand_path('lib', __dir__)
4
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
- require 'pdfh/version'
3
+ # lib = File.expand_path("lib", __dir__)
4
+ # $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require_relative "lib/pdfh/version"
6
6
 
7
7
  Gem::Specification.new do |spec|
8
- spec.name = 'pdfh'
8
+ spec.name = "pdfh"
9
9
  spec.version = Pdfh::VERSION
10
- spec.authors = ['Isaias Piña']
11
- spec.email = ['iax7@users.noreply.github.com']
10
+ spec.authors = ["Isaias Piña"]
11
+ spec.email = ["iax7@users.noreply.github.com"]
12
12
 
13
- spec.summary = 'Organize PDF files'
14
- spec.description = 'Examine all PDF files in scrape directories, remove password (if has one), rename and copy to a new directory using regular expresions.'
15
- spec.homepage = 'https://github.com/iax7/pdfh'
16
- spec.license = 'MIT'
17
- spec.required_ruby_version = '>= 2.6.0'
13
+ spec.summary = "Organize PDF files"
14
+ spec.description = "Examine all PDF files in scrape directories, remove password (if has one), "\
15
+ "rename and copy to a new directory using regular expresions."
16
+ spec.homepage = "https://github.com/iax7/pdfh"
17
+ spec.license = "MIT"
18
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
18
19
 
19
20
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
20
21
  # to allow pushing to a single host or delete this section to allow pushing to any host.
21
22
  if spec.respond_to?(:metadata)
22
- spec.metadata['allowed_push_host'] = 'https://rubygems.org'
23
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
23
24
 
24
- spec.metadata['homepage_uri'] = spec.homepage
25
- spec.metadata['source_code_uri'] = spec.homepage
26
- spec.metadata['changelog_uri'] = 'https://raw.githubusercontent.com/iax7/pdfh/master/CHANGELOG.md'
25
+ spec.metadata["homepage_uri"] = spec.homepage
26
+ spec.metadata["source_code_uri"] = spec.homepage
27
+ spec.metadata["changelog_uri"] = "https://raw.githubusercontent.com/iax7/pdfh/master/CHANGELOG.md"
27
28
  else
28
- raise 'RubyGems 2.0 or newer is required to protect against ' \
29
- 'public gem pushes.'
29
+ raise "RubyGems 2.0 or newer is required to protect against " \
30
+ "public gem pushes."
30
31
  end
31
32
 
32
33
  # Specify which files should be added to the gem when it is released.
33
34
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
34
35
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
35
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
36
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:\.\w+|docs|test|spec|features)/}) }
36
37
  end
37
- spec.bindir = 'exe'
38
+ spec.bindir = "exe"
38
39
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
39
- spec.require_paths = ['lib']
40
+ spec.require_paths = ["lib"]
40
41
 
41
- spec.add_dependency 'colorize', '~> 0.8.1'
42
-
43
- spec.add_development_dependency 'bundler', '~> 1.17.2'
44
- spec.add_development_dependency 'rake', '~> 10.0'
45
- spec.add_development_dependency 'rspec', '~> 3.0'
46
- spec.add_development_dependency 'simplecov', '~> 0.16.1'
47
- spec.add_development_dependency 'simplecov-console', '~> 0.4.2'
42
+ spec.add_dependency "colorize", "~> 0.8.0"
48
43
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfh
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Isaias Piña
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-28 00:00:00.000000000 Z
11
+ date: 2021-03-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colorize
@@ -16,84 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.8.1
19
+ version: 0.8.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.8.1
27
- - !ruby/object:Gem::Dependency
28
- name: bundler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: 1.17.2
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: 1.17.2
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: '10.0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '10.0'
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '3.0'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - "~>"
67
- - !ruby/object:Gem::Version
68
- version: '3.0'
69
- - !ruby/object:Gem::Dependency
70
- name: simplecov
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - "~>"
74
- - !ruby/object:Gem::Version
75
- version: 0.16.1
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - "~>"
81
- - !ruby/object:Gem::Version
82
- version: 0.16.1
83
- - !ruby/object:Gem::Dependency
84
- name: simplecov-console
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - "~>"
88
- - !ruby/object:Gem::Version
89
- version: 0.4.2
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - "~>"
95
- - !ruby/object:Gem::Version
96
- version: 0.4.2
26
+ version: 0.8.0
97
27
  description: Examine all PDF files in scrape directories, remove password (if has
98
28
  one), rename and copy to a new directory using regular expresions.
99
29
  email:
@@ -106,9 +36,8 @@ files:
106
36
  - ".gitignore"
107
37
  - ".rspec"
108
38
  - ".rubocop.yml"
109
- - ".ruby-gemset"
39
+ - ".rubocop_todo.yml"
110
40
  - ".ruby-version"
111
- - ".travis.yml"
112
41
  - CHANGELOG.md
113
42
  - CODE_OF_CONDUCT.md
114
43
  - Gemfile
@@ -119,8 +48,11 @@ files:
119
48
  - bin/console
120
49
  - bin/setup
121
50
  - exe/pdfh
51
+ - lib/ext/string.rb
122
52
  - lib/pdfh.rb
123
53
  - lib/pdfh/document.rb
54
+ - lib/pdfh/month.rb
55
+ - lib/pdfh/pdf_handler.rb
124
56
  - lib/pdfh/settings.rb
125
57
  - lib/pdfh/utils.rb
126
58
  - lib/pdfh/version.rb
@@ -133,7 +65,7 @@ metadata:
133
65
  homepage_uri: https://github.com/iax7/pdfh
134
66
  source_code_uri: https://github.com/iax7/pdfh
135
67
  changelog_uri: https://raw.githubusercontent.com/iax7/pdfh/master/CHANGELOG.md
136
- post_install_message:
68
+ post_install_message:
137
69
  rdoc_options: []
138
70
  require_paths:
139
71
  - lib
@@ -141,15 +73,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
141
73
  requirements:
142
74
  - - ">="
143
75
  - !ruby/object:Gem::Version
144
- version: 2.6.0
76
+ version: 2.5.0
145
77
  required_rubygems_version: !ruby/object:Gem::Requirement
146
78
  requirements:
147
79
  - - ">="
148
80
  - !ruby/object:Gem::Version
149
81
  version: '0'
150
82
  requirements: []
151
- rubygems_version: 3.0.1
152
- signing_key:
83
+ rubygems_version: 3.2.4
84
+ signing_key:
153
85
  specification_version: 4
154
86
  summary: Organize PDF files
155
87
  test_files: []
data/.ruby-gemset DELETED
@@ -1 +0,0 @@
1
- pdfh
data/.travis.yml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- sudo: false
3
- language: ruby
4
- cache: bundler
5
- rvm:
6
- - 2.6.0
7
- before_install: gem install bundler -v 1.17.2