pdfmd 1.4.0 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/LICENSE +22 -0
- data/README.md +108 -0
- data/bin/pdfmd +1 -942
- data/lib/pdfmd.rb +531 -0
- data/lib/pdfmd/check.rb +10 -0
- data/lib/pdfmd/edit.rb +40 -0
- data/lib/pdfmd/explain.author.md +3 -0
- data/lib/pdfmd/explain.createdate.md +6 -0
- data/lib/pdfmd/explain.hiera.md +18 -0
- data/lib/pdfmd/explain.keywords.md +9 -0
- data/lib/pdfmd/explain.rb +17 -0
- data/lib/pdfmd/explain.subject.md +8 -0
- data/lib/pdfmd/explain.title.md +5 -0
- data/lib/pdfmd/methods.rb +130 -0
- data/lib/pdfmd/rename.rb +146 -0
- data/lib/pdfmd/show.rb +24 -0
- data/lib/pdfmd/sort.rb +100 -0
- data/pdfmd.gemspec +27 -0
- metadata +83 -5
data/lib/pdfmd/check.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
filename = ENV.fetch('PDFMD_FILENAME')
|
2
|
+
|
3
|
+
returnvalue = 0
|
4
|
+
readMetadata(filename).each do|key,value|
|
5
|
+
if key.match(/author|subject|createdate|title/) and value.empty?
|
6
|
+
puts 'Missing value: ' + key
|
7
|
+
returnvalue == 0 ? returnvalue = 1 : ''
|
8
|
+
end
|
9
|
+
end
|
10
|
+
exit returnvalue
|
data/lib/pdfmd/edit.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#
|
2
|
+
# Thor command 'edit' for changing the common
|
3
|
+
# ExifTags within the PDF file
|
4
|
+
#
|
5
|
+
filename = ENV.fetch('PDFMD_FILENAME')
|
6
|
+
optTag = ENV['PDFMD_TAG'] || nil
|
7
|
+
optRename = ENV['PDFMD_RENAME'] == 'true' ? true : false
|
8
|
+
pdfmd = ENV['PDFMD']
|
9
|
+
|
10
|
+
|
11
|
+
metadata = readMetadata(filename)
|
12
|
+
|
13
|
+
if optTag == 'all'
|
14
|
+
tags = ['author','title','subject','createdate','keywords']
|
15
|
+
else
|
16
|
+
tags = optTag.split(',')
|
17
|
+
end
|
18
|
+
tags.each do |currentTag|
|
19
|
+
|
20
|
+
# Change the tag to something we can use here
|
21
|
+
puts "Current value: '#{metadata[currentTag.downcase]}'"
|
22
|
+
answer = readUserInput("Enter new value for #{currentTag} :")
|
23
|
+
if currentTag.downcase == 'createdate'
|
24
|
+
while not answer = identifyDate(answer)
|
25
|
+
puts 'Invalid date format'
|
26
|
+
answer = readUserInput("Enter new value for #{currentTag} :")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
puts "Changing value for #{currentTag}: '#{metadata[currentTag]}' => #{answer}"
|
30
|
+
`exiftool -#{currentTag.downcase}='#{answer}' -overwrite_original '#{filename}'`
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# If required, run the renaming task afterwards
|
35
|
+
# This is not pretty, but seems to be the only way to do this in THOR
|
36
|
+
#
|
37
|
+
if optRename
|
38
|
+
`#{pdfmd} rename '#{filename}'`
|
39
|
+
end
|
40
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Information about hiera: https://docs.puppetlabs.com/hiera/1/index.html
|
2
|
+
|
3
|
+
Installation:
|
4
|
+
|
5
|
+
```
|
6
|
+
$ gem install hiera
|
7
|
+
```
|
8
|
+
|
9
|
+
Configure default settings in hiera:
|
10
|
+
|
11
|
+
YAML
|
12
|
+
---
|
13
|
+
pdfmd::config:
|
14
|
+
sort:
|
15
|
+
destination : /tmp/output
|
16
|
+
copy : true
|
17
|
+
logfile : /var/log/pdfmd.log
|
18
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
[Keywords]
|
2
|
+
Anything else that might be of interesst.
|
3
|
+
In Orders the elements that have been orders. Contracts might contain the
|
4
|
+
Names and adress of the involved parties.
|
5
|
+
|
6
|
+
When writing Invoices with their numbers, these will be automatically be
|
7
|
+
picked up and can be integrated in the filename, e.g. "Invoicenumber 12334"
|
8
|
+
becomes "inv12334"
|
9
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
term = ENV.fetch('PDFMD_EXPLAIN')
|
2
|
+
pdfmd = ENV.fetch('PDFMD')
|
3
|
+
|
4
|
+
case term
|
5
|
+
when ''
|
6
|
+
puts 'Available subjects:'
|
7
|
+
puts '- author'
|
8
|
+
puts '- createdate'
|
9
|
+
puts '- hiera'
|
10
|
+
puts '- keywords'
|
11
|
+
puts '- subject'
|
12
|
+
puts '- title'
|
13
|
+
puts ' '
|
14
|
+
puts "Run `$ #{pdfmd} explain <subject>` to get more details."
|
15
|
+
else
|
16
|
+
puts File.read("lib/pdfmd/explain.#{term.downcase}.md")
|
17
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# == File: methods.rb
|
2
|
+
#
|
3
|
+
# General methods for supporting smaller tasks of the Thor commands
|
4
|
+
|
5
|
+
#
|
6
|
+
# Query Hiera installation
|
7
|
+
# I don't give a sh** about cross platform at this point.
|
8
|
+
#
|
9
|
+
# Return the hash of the hiera values or false (if no hiera is found)
|
10
|
+
#
|
11
|
+
def queryHiera(keyword,facts = 'UNSET')
|
12
|
+
|
13
|
+
# Set default facts
|
14
|
+
facts == 'UNSET' ? facts = "fqdn=#{`hostname`}" : ''
|
15
|
+
|
16
|
+
# If hiera isn't found, return false
|
17
|
+
# otherwise return the hash
|
18
|
+
if !system('which hiera > /dev/null 2>&1')
|
19
|
+
puts 'Cannot find "hiera" command in $path.'
|
20
|
+
return false
|
21
|
+
else
|
22
|
+
return eval(`hiera #{keyword} #{facts}`)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
#
|
30
|
+
# Set Keywords Preface based on title and subject
|
31
|
+
# If subject matches a number/character combination and contains no spaces,
|
32
|
+
# the preface will be combined with the doktype.
|
33
|
+
# If not: preface will contain the whole subject with dots and spaces being
|
34
|
+
# replaced with underscores
|
35
|
+
#
|
36
|
+
def setKeywordsPreface(metadata, doktype)
|
37
|
+
if metadata['subject'].match(/^\d+[^+s]+.*/)
|
38
|
+
return doktype + metadata['subject']
|
39
|
+
else
|
40
|
+
subject = metadata['subject']
|
41
|
+
|
42
|
+
# Take care of special characters
|
43
|
+
I18n.enforce_available_locales = false
|
44
|
+
subject = I18n.transliterate(metadata['subject'])
|
45
|
+
|
46
|
+
# Replace everything else
|
47
|
+
subject = subject.gsub(/[^a-zA-Z0-9]+/,'_')
|
48
|
+
return subject
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
#
|
54
|
+
# Function to read the metadata from a given file
|
55
|
+
# hash readMetadata(string)
|
56
|
+
#
|
57
|
+
def readMetadata(pathFile = false)
|
58
|
+
metadata = Hash.new
|
59
|
+
metadata['keywords'] = ''
|
60
|
+
metadata['subject'] = ''
|
61
|
+
metadata['title'] = ''
|
62
|
+
metadata['author'] = ''
|
63
|
+
metadata['creator'] = ''
|
64
|
+
metadata['createdate'] = ''
|
65
|
+
if not File.file?(pathFile)
|
66
|
+
puts "Cannot access file #{pathFile}. Abort"
|
67
|
+
abort
|
68
|
+
end
|
69
|
+
|
70
|
+
# Fetch the Metada with the help of exiftools (unless something better is
|
71
|
+
# found
|
72
|
+
metaStrings = `exiftool '#{pathFile}' | egrep -i '^Creator\s+\:|^Author|Create Date|Subject|Keywords|Title'`
|
73
|
+
|
74
|
+
# Time to cherrypick the available data
|
75
|
+
entries = metaStrings.split("\n")
|
76
|
+
entries.each do |entry|
|
77
|
+
values = entry.split(" : ")
|
78
|
+
values[0].match(/Creator/) and metadata['creator'] == '' ? metadata['creator'] = values[1]: metadata['creator'] = ''
|
79
|
+
values[0].match(/Author/) and metadata['author'] == '' ? metadata['author'] = values[1]: metadata['author'] = ''
|
80
|
+
values[0].match(/Create Date/) and metadata['createdate'] == '' ? metadata['createdate'] = values[1]: metadata['createdate'] = ''
|
81
|
+
values[0].match(/Subject/) and metadata['subject'] == '' ? metadata['subject'] = values[1]: metadata['subject'] = ''
|
82
|
+
values[0].match(/Keywords/) and metadata['keywords'] == '' ? metadata['keywords'] = values[1]: metadata['keywords'] =''
|
83
|
+
values[0].match(/Title/) and metadata['title'] == '' ? metadata['title'] = values[1]: metadata['title'] =''
|
84
|
+
end
|
85
|
+
return metadata
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
#
|
90
|
+
# Read user input
|
91
|
+
#
|
92
|
+
def readUserInput(textstring = 'Enter value: ')
|
93
|
+
return ask textstring
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
#
|
98
|
+
# Identify a date
|
99
|
+
# Function takes a string and tries to identify a date in there.
|
100
|
+
# returns false if no date could be identified
|
101
|
+
# otherwise the date is returned in the format as
|
102
|
+
#
|
103
|
+
# YYYY:MM:DD HH:mm:ss
|
104
|
+
#
|
105
|
+
# For missing time values zero is assumed
|
106
|
+
#
|
107
|
+
def identifyDate(datestring)
|
108
|
+
identifiedDate = ''
|
109
|
+
year = '[1-2][90][0-9][0-9]'
|
110
|
+
month = '0[0-9]|10|11|12'
|
111
|
+
day = '[1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1]'
|
112
|
+
hour = '[0-1][0-9]|2[0-3]|[1-9]'
|
113
|
+
minute = '[0-5][0-9]'
|
114
|
+
second = '[0-5][0-9]'
|
115
|
+
case datestring
|
116
|
+
when /^(#{year})(#{month})(#{day})$/
|
117
|
+
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' 00:00:00'
|
118
|
+
when /^(#{year})(#{month})(#{day})(#{hour})(#{minute})(#{second})$/
|
119
|
+
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' ' + $4 + ':' + $5 + ':' + $6
|
120
|
+
when /^(#{year})[\:|\.|\-](#{month})[\:|\.|\-](#{day})\s(#{hour})[\:](#{minute})[\:](#{second})$/
|
121
|
+
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' ' + $4 + ':' + $5 + ':' + $6
|
122
|
+
when /^(#{year})[\:|\.|\-](#{month})[\:|\.|\-](#{day})$/
|
123
|
+
day = "%02d" % $3
|
124
|
+
month = "%02d" % $2
|
125
|
+
identifiedDate = $1 + ':' + month + ':' + day + ' 00:00:00'
|
126
|
+
else
|
127
|
+
identifiedDate = false
|
128
|
+
end
|
129
|
+
return identifiedDate
|
130
|
+
end
|
data/lib/pdfmd/rename.rb
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
#
|
2
|
+
# Thor command 'rename'
|
3
|
+
#
|
4
|
+
# TODO: Define outputdir from Hiera
|
5
|
+
# TODO: Add option for copy when renaming
|
6
|
+
# TODO: Add option to create outputdir if not existing
|
7
|
+
# TODO: Define option to create outputdir via Hiera
|
8
|
+
#
|
9
|
+
filename = ENV.fetch('PDFMD_FILENAME')
|
10
|
+
allkeywords = ENV.fetch('PDFMD_ALLKEYWORDS')
|
11
|
+
outputdir = ENV.fetch('PDFMD_OUTPUTDIR') == 'false' ? false : ENV.fetch('PDFMD_OUTPUTDIR')
|
12
|
+
dryrun = ENV.fetch('PDFMD_DRYRUN') == 'false' ? false : true
|
13
|
+
numberKeywords = ENV.fetch('PDFMD_NUMBERKEYWORDS').to_i
|
14
|
+
|
15
|
+
metadata = readMetadata(filename).each do |key,value|
|
16
|
+
|
17
|
+
# Check if the metadata is complete
|
18
|
+
if key.match(/author|subject|createdate|title/) and value.empty?
|
19
|
+
puts 'Missing value for ' + key
|
20
|
+
puts 'Abort'
|
21
|
+
exit 1
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
date = metadata['createdate'].gsub(/\ \d{2}\:\d{2}\:\d{2}.*$/,'').gsub(/\:/,'')
|
27
|
+
author = metadata['author'].gsub(/\./,'_').gsub(/\-/,'').gsub(/\s/,'_')
|
28
|
+
I18n.enforce_available_locales = false
|
29
|
+
author = I18n.transliterate(author) # Normalising
|
30
|
+
|
31
|
+
keywords_preface = ''
|
32
|
+
# This statement can probably be optimised
|
33
|
+
case metadata['title']
|
34
|
+
when /(Tilbudt|Angebot)/i
|
35
|
+
doktype = 'til'
|
36
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
37
|
+
when /Orderbekrefelse/i
|
38
|
+
doktype = 'odb'
|
39
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
40
|
+
when /faktura/i
|
41
|
+
doktype = 'fak'
|
42
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
43
|
+
when /order/i
|
44
|
+
doktype = 'ord'
|
45
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
46
|
+
when /(kontrakt|avtale|vertrag|contract)/i
|
47
|
+
doktype = 'avt'
|
48
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
49
|
+
when /kvittering/i
|
50
|
+
doktype = 'kvi'
|
51
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
52
|
+
when /manual/i
|
53
|
+
doktype = 'man'
|
54
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
55
|
+
when /(billett|ticket)/i
|
56
|
+
doktype = 'bil'
|
57
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
58
|
+
when /(informasjon|information)/i
|
59
|
+
doktype = 'inf'
|
60
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
61
|
+
else
|
62
|
+
doktype = 'dok'
|
63
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
64
|
+
end
|
65
|
+
if not metadata['keywords'].empty?
|
66
|
+
keywords_preface == '' ? keywords = '' : keywords = keywords_preface
|
67
|
+
keywordsarray = metadata['keywords'].split(',')
|
68
|
+
|
69
|
+
#
|
70
|
+
# Sort array
|
71
|
+
#
|
72
|
+
keywordssorted = Array.new
|
73
|
+
keywordsarray.each_with_index do |value,index|
|
74
|
+
value = value.lstrip.chomp
|
75
|
+
value = value.gsub(/(Faktura|Rechnungs)(nummer)? /i,'fak')
|
76
|
+
value = value.gsub(/(Kunde)(n)?(nummer)? /i,'kdn')
|
77
|
+
value = value.gsub(/(Kunde)(n)?(nummer)?-/i,'kdn')
|
78
|
+
value = value.gsub(/(Ordre|Bestellung)(s?nummer)? /i,'ord')
|
79
|
+
value = value.gsub(/(Kvittering|Quittung)(snummer)? /i,'kvi')
|
80
|
+
value = value.gsub(/\s/,'_')
|
81
|
+
value = value.gsub(/\//,'_')
|
82
|
+
keywordsarray[index] = value
|
83
|
+
if value.match(/^(fak|kdn|ord|kvi)/)
|
84
|
+
keywordssorted.insert(0, value)
|
85
|
+
else
|
86
|
+
keywordssorted.push(value)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
counter = 0
|
91
|
+
keywordssorted.each_with_index do |value,index|
|
92
|
+
|
93
|
+
# Exit condition limits the number of keywords used in the filename
|
94
|
+
# unless all keywords shall be added
|
95
|
+
if not allkeywords.empty?
|
96
|
+
counter > numberKeywords-1 ? break : counter = counter + 1
|
97
|
+
end
|
98
|
+
if value.match(/(kvi|fak|ord|kdn)/i)
|
99
|
+
keywords == '' ? keywords = '-' + value : keywords = value + '-' + keywords
|
100
|
+
else
|
101
|
+
keywords == '' ? keywords = '-' + value : keywords.concat('-' + value)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
# Normalise the keywords as well
|
105
|
+
#
|
106
|
+
I18n.enforce_available_locales = false
|
107
|
+
keywords = I18n.transliterate(keywords)
|
108
|
+
|
109
|
+
# There are no keywords
|
110
|
+
# Rare, but it happens
|
111
|
+
else
|
112
|
+
|
113
|
+
# There are no keywords.
|
114
|
+
# we are using the title and the subject
|
115
|
+
if keywords_preface != ''
|
116
|
+
keywords = keywords_preface
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
extension = 'pdf'
|
121
|
+
if keywords != nil and keywords[0] != '-'
|
122
|
+
keywords = '-' + keywords
|
123
|
+
end
|
124
|
+
keywords == nil ? keywords = '' : ''
|
125
|
+
newFilename = date + '-' +
|
126
|
+
author + '-' +
|
127
|
+
doktype +
|
128
|
+
keywords + '.' +
|
129
|
+
extension
|
130
|
+
|
131
|
+
# Output directory checks
|
132
|
+
if outputdir
|
133
|
+
if not File.exist?(outputdir)
|
134
|
+
puts "Error: output dir '#{outputdir}' not found. Abort."
|
135
|
+
exit 1
|
136
|
+
end
|
137
|
+
else
|
138
|
+
# Output to Inputdir
|
139
|
+
outputdir = File.dirname(filename)
|
140
|
+
end
|
141
|
+
|
142
|
+
if not dryrun and filename != newFilename.downcase
|
143
|
+
`mv -v '#{filename}' '#{outputdir}/#{newFilename.downcase}'`
|
144
|
+
else
|
145
|
+
puts filename + "\n => " + newFilename.downcase
|
146
|
+
end
|
data/lib/pdfmd/show.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
filename = ENV.fetch('PDFMD_FILENAME')
|
2
|
+
optTag = ENV['PDFMD_TAGS'] || nil
|
3
|
+
optAll = ENV['PDFMD_ALL'] == 'true' ? true : nil
|
4
|
+
|
5
|
+
metadata = readMetadata(filename)
|
6
|
+
|
7
|
+
# Output all metatags
|
8
|
+
if optAll or optTag.nil?
|
9
|
+
|
10
|
+
puts "Author : " + metadata['author'].to_s
|
11
|
+
puts "Creator : " + metadata['creator'].to_s
|
12
|
+
puts "CreateDate : " + metadata['createdate'].to_s
|
13
|
+
puts "Subject : " + metadata['subject'].to_s
|
14
|
+
puts "Title : " + metadata['title'].to_s
|
15
|
+
puts "Keywords : " + metadata['keywords'].to_s
|
16
|
+
|
17
|
+
elsif not optTag.nil? # Output specific tag(s)
|
18
|
+
|
19
|
+
tags = optTag.split(',')
|
20
|
+
tags.each do |tag|
|
21
|
+
puts metadata[tag.downcase]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
data/lib/pdfmd/sort.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
inputDir = ENV.fetch('PDFMD_INPUTDIR')
|
2
|
+
|
3
|
+
require_relative('./methods.rb')
|
4
|
+
require 'fileutils'
|
5
|
+
|
6
|
+
opt_destination = ENV.fetch('PDFMD_DESTINATION')
|
7
|
+
opt_copy = ENV.fetch('PDFMD_COPY')
|
8
|
+
opt_log = ENV.fetch('PDFMD_LOG')
|
9
|
+
opt_interactive = ENV.fetch('PDFMD_INTERACTIVE')
|
10
|
+
|
11
|
+
hieraDefaults = queryHiera('pdfmd::config')
|
12
|
+
|
13
|
+
copyAction = opt_copy.empty? ? false : true
|
14
|
+
if opt_copy.nil? and hieraDefaults['sort']['copy'] == true
|
15
|
+
copyAction = true
|
16
|
+
puts 'Setting action to copy based on Hiera.'
|
17
|
+
end
|
18
|
+
|
19
|
+
interactiveAction = opt_interactive.empty? ? false : true
|
20
|
+
if opt_interactive.empty? and hieraDefaults['sort']['interactive'] == true
|
21
|
+
interactiveAction = true
|
22
|
+
puts 'Setting interactive to true based on Hiera.'
|
23
|
+
end
|
24
|
+
|
25
|
+
# Fetch alternate destination from hiera if available
|
26
|
+
destination = opt_destination
|
27
|
+
if destination.nil? or destination == ''
|
28
|
+
|
29
|
+
hieraHash = queryHiera('pdfmd::config')
|
30
|
+
if !hieraHash['sort']['destination'].nil?
|
31
|
+
destination = hieraHash['sort']['destination']
|
32
|
+
else
|
33
|
+
puts 'No information about destination found.'
|
34
|
+
puts 'Set parameter -d or configure hiera.'
|
35
|
+
puts 'Abort.'
|
36
|
+
exit 1
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
logenable = opt_log
|
42
|
+
logfile = !hieraDefaults['sort']['logfile'].nil? ? hieraDefaults['sort']['logfile'] : Dir.pwd.chomp('/') + '/' + Pathname.new(__FILE__).basename + '.log'
|
43
|
+
|
44
|
+
# Check that logfilepath exists and is writeable
|
45
|
+
if !File.writable?(logfile)
|
46
|
+
puts "Cannot write '#{logfile}. Abort."
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
logenable ? $logger = Logger.new(logfile) : ''
|
50
|
+
|
51
|
+
# Input validation
|
52
|
+
!File.exist?(inputDir) ? abort('Input directory does not exist. Abort.'): ''
|
53
|
+
File.directory?(inputDir) ? '' : abort('Input is a single file')
|
54
|
+
File.file?(destination) ? abort("Output '#{destination}' is an existing file. Cannot create directory with the same name. Abort") : ''
|
55
|
+
unless File.directory?(destination)
|
56
|
+
FileUtils.mkdir_p(destination)
|
57
|
+
$logger.info("Destination '#{destination}' has been created.")
|
58
|
+
end
|
59
|
+
|
60
|
+
# Iterate through all files
|
61
|
+
Dir[inputDir.chomp('/') + '/*.pdf'].sort.each do |file|
|
62
|
+
|
63
|
+
if interactiveAction
|
64
|
+
answer = readUserInput("Process '#{file}' ([y]/n): ")
|
65
|
+
answer = answer.empty? ? 'y' : answer
|
66
|
+
answer.match(/y/) ? '' : next
|
67
|
+
end
|
68
|
+
|
69
|
+
metadata = readMetadata(file)
|
70
|
+
if metadata['author'] and not metadata['author'].empty?
|
71
|
+
author = metadata['author'].gsub(' ','_').gsub('.','_')
|
72
|
+
I18n.enforce_available_locales = false # Serialize special characters
|
73
|
+
author = I18n.transliterate(author).downcase
|
74
|
+
folderdestination = destination.chomp('/') + '/' + author
|
75
|
+
|
76
|
+
unless File.directory?(folderdestination)
|
77
|
+
FileUtils.mkdir_p(folderdestination)
|
78
|
+
logenable ? $logger.info("Folder '#{folderdestination}' has been created."): ''
|
79
|
+
end
|
80
|
+
|
81
|
+
filedestination = destination.chomp('/') + '/' + author + '/' + Pathname.new(file).basename.to_s
|
82
|
+
|
83
|
+
# Final check before touching the filesystem
|
84
|
+
if not File.exist?(filedestination)
|
85
|
+
$logger.info("File '#{file}' => '#{filedestination}'")
|
86
|
+
|
87
|
+
# Move/Copy the file
|
88
|
+
if copyAction
|
89
|
+
FileUtils.cp(file, filedestination)
|
90
|
+
else
|
91
|
+
FileUtils.mv(file,filedestination)
|
92
|
+
end
|
93
|
+
|
94
|
+
else
|
95
|
+
logenable ? $logger.warn("File '#{filedestination}' already exists. Ignoring.") : ''
|
96
|
+
end
|
97
|
+
else
|
98
|
+
logenable ? $logger.warn("Missing tag 'Author' for file '#{file}'. Skipping.") : (puts "Missing tag 'Author' for file '#{file}'. Skipping")
|
99
|
+
end
|
100
|
+
end
|