pdfmd 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/LICENSE +22 -0
- data/README.md +108 -0
- data/bin/pdfmd +1 -942
- data/lib/pdfmd.rb +531 -0
- data/lib/pdfmd/check.rb +10 -0
- data/lib/pdfmd/edit.rb +40 -0
- data/lib/pdfmd/explain.author.md +3 -0
- data/lib/pdfmd/explain.createdate.md +6 -0
- data/lib/pdfmd/explain.hiera.md +18 -0
- data/lib/pdfmd/explain.keywords.md +9 -0
- data/lib/pdfmd/explain.rb +17 -0
- data/lib/pdfmd/explain.subject.md +8 -0
- data/lib/pdfmd/explain.title.md +5 -0
- data/lib/pdfmd/methods.rb +130 -0
- data/lib/pdfmd/rename.rb +146 -0
- data/lib/pdfmd/show.rb +24 -0
- data/lib/pdfmd/sort.rb +100 -0
- data/pdfmd.gemspec +27 -0
- metadata +83 -5
data/lib/pdfmd/check.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
filename = ENV.fetch('PDFMD_FILENAME')
|
2
|
+
|
3
|
+
returnvalue = 0
|
4
|
+
readMetadata(filename).each do|key,value|
|
5
|
+
if key.match(/author|subject|createdate|title/) and value.empty?
|
6
|
+
puts 'Missing value: ' + key
|
7
|
+
returnvalue == 0 ? returnvalue = 1 : ''
|
8
|
+
end
|
9
|
+
end
|
10
|
+
exit returnvalue
|
data/lib/pdfmd/edit.rb
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#
|
2
|
+
# Thor command 'edit' for changing the common
|
3
|
+
# ExifTags within the PDF file
|
4
|
+
#
|
5
|
+
filename = ENV.fetch('PDFMD_FILENAME')
|
6
|
+
optTag = ENV['PDFMD_TAG'] || nil
|
7
|
+
optRename = ENV['PDFMD_RENAME'] == 'true' ? true : false
|
8
|
+
pdfmd = ENV['PDFMD']
|
9
|
+
|
10
|
+
|
11
|
+
metadata = readMetadata(filename)
|
12
|
+
|
13
|
+
if optTag == 'all'
|
14
|
+
tags = ['author','title','subject','createdate','keywords']
|
15
|
+
else
|
16
|
+
tags = optTag.split(',')
|
17
|
+
end
|
18
|
+
tags.each do |currentTag|
|
19
|
+
|
20
|
+
# Change the tag to something we can use here
|
21
|
+
puts "Current value: '#{metadata[currentTag.downcase]}'"
|
22
|
+
answer = readUserInput("Enter new value for #{currentTag} :")
|
23
|
+
if currentTag.downcase == 'createdate'
|
24
|
+
while not answer = identifyDate(answer)
|
25
|
+
puts 'Invalid date format'
|
26
|
+
answer = readUserInput("Enter new value for #{currentTag} :")
|
27
|
+
end
|
28
|
+
end
|
29
|
+
puts "Changing value for #{currentTag}: '#{metadata[currentTag]}' => #{answer}"
|
30
|
+
`exiftool -#{currentTag.downcase}='#{answer}' -overwrite_original '#{filename}'`
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# If required, run the renaming task afterwards
|
35
|
+
# This is not pretty, but seems to be the only way to do this in THOR
|
36
|
+
#
|
37
|
+
if optRename
|
38
|
+
`#{pdfmd} rename '#{filename}'`
|
39
|
+
end
|
40
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
Information about hiera: https://docs.puppetlabs.com/hiera/1/index.html
|
2
|
+
|
3
|
+
Installation:
|
4
|
+
|
5
|
+
```
|
6
|
+
$ gem install hiera
|
7
|
+
```
|
8
|
+
|
9
|
+
Configure default settings in hiera:
|
10
|
+
|
11
|
+
YAML
|
12
|
+
---
|
13
|
+
pdfmd::config:
|
14
|
+
sort:
|
15
|
+
destination : /tmp/output
|
16
|
+
copy : true
|
17
|
+
logfile : /var/log/pdfmd.log
|
18
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
[Keywords]
|
2
|
+
Anything else that might be of interesst.
|
3
|
+
In Orders the elements that have been orders. Contracts might contain the
|
4
|
+
Names and adress of the involved parties.
|
5
|
+
|
6
|
+
When writing Invoices with their numbers, these will be automatically be
|
7
|
+
picked up and can be integrated in the filename, e.g. "Invoicenumber 12334"
|
8
|
+
becomes "inv12334"
|
9
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
term = ENV.fetch('PDFMD_EXPLAIN')
|
2
|
+
pdfmd = ENV.fetch('PDFMD')
|
3
|
+
|
4
|
+
case term
|
5
|
+
when ''
|
6
|
+
puts 'Available subjects:'
|
7
|
+
puts '- author'
|
8
|
+
puts '- createdate'
|
9
|
+
puts '- hiera'
|
10
|
+
puts '- keywords'
|
11
|
+
puts '- subject'
|
12
|
+
puts '- title'
|
13
|
+
puts ' '
|
14
|
+
puts "Run `$ #{pdfmd} explain <subject>` to get more details."
|
15
|
+
else
|
16
|
+
puts File.read("lib/pdfmd/explain.#{term.downcase}.md")
|
17
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# == File: methods.rb
|
2
|
+
#
|
3
|
+
# General methods for supporting smaller tasks of the Thor commands
|
4
|
+
|
5
|
+
#
|
6
|
+
# Query Hiera installation
|
7
|
+
# I don't give a sh** about cross platform at this point.
|
8
|
+
#
|
9
|
+
# Return the hash of the hiera values or false (if no hiera is found)
|
10
|
+
#
|
11
|
+
def queryHiera(keyword,facts = 'UNSET')
|
12
|
+
|
13
|
+
# Set default facts
|
14
|
+
facts == 'UNSET' ? facts = "fqdn=#{`hostname`}" : ''
|
15
|
+
|
16
|
+
# If hiera isn't found, return false
|
17
|
+
# otherwise return the hash
|
18
|
+
if !system('which hiera > /dev/null 2>&1')
|
19
|
+
puts 'Cannot find "hiera" command in $path.'
|
20
|
+
return false
|
21
|
+
else
|
22
|
+
return eval(`hiera #{keyword} #{facts}`)
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
#
|
30
|
+
# Set Keywords Preface based on title and subject
|
31
|
+
# If subject matches a number/character combination and contains no spaces,
|
32
|
+
# the preface will be combined with the doktype.
|
33
|
+
# If not: preface will contain the whole subject with dots and spaces being
|
34
|
+
# replaced with underscores
|
35
|
+
#
|
36
|
+
def setKeywordsPreface(metadata, doktype)
|
37
|
+
if metadata['subject'].match(/^\d+[^+s]+.*/)
|
38
|
+
return doktype + metadata['subject']
|
39
|
+
else
|
40
|
+
subject = metadata['subject']
|
41
|
+
|
42
|
+
# Take care of special characters
|
43
|
+
I18n.enforce_available_locales = false
|
44
|
+
subject = I18n.transliterate(metadata['subject'])
|
45
|
+
|
46
|
+
# Replace everything else
|
47
|
+
subject = subject.gsub(/[^a-zA-Z0-9]+/,'_')
|
48
|
+
return subject
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
#
|
54
|
+
# Function to read the metadata from a given file
|
55
|
+
# hash readMetadata(string)
|
56
|
+
#
|
57
|
+
def readMetadata(pathFile = false)
|
58
|
+
metadata = Hash.new
|
59
|
+
metadata['keywords'] = ''
|
60
|
+
metadata['subject'] = ''
|
61
|
+
metadata['title'] = ''
|
62
|
+
metadata['author'] = ''
|
63
|
+
metadata['creator'] = ''
|
64
|
+
metadata['createdate'] = ''
|
65
|
+
if not File.file?(pathFile)
|
66
|
+
puts "Cannot access file #{pathFile}. Abort"
|
67
|
+
abort
|
68
|
+
end
|
69
|
+
|
70
|
+
# Fetch the Metada with the help of exiftools (unless something better is
|
71
|
+
# found
|
72
|
+
metaStrings = `exiftool '#{pathFile}' | egrep -i '^Creator\s+\:|^Author|Create Date|Subject|Keywords|Title'`
|
73
|
+
|
74
|
+
# Time to cherrypick the available data
|
75
|
+
entries = metaStrings.split("\n")
|
76
|
+
entries.each do |entry|
|
77
|
+
values = entry.split(" : ")
|
78
|
+
values[0].match(/Creator/) and metadata['creator'] == '' ? metadata['creator'] = values[1]: metadata['creator'] = ''
|
79
|
+
values[0].match(/Author/) and metadata['author'] == '' ? metadata['author'] = values[1]: metadata['author'] = ''
|
80
|
+
values[0].match(/Create Date/) and metadata['createdate'] == '' ? metadata['createdate'] = values[1]: metadata['createdate'] = ''
|
81
|
+
values[0].match(/Subject/) and metadata['subject'] == '' ? metadata['subject'] = values[1]: metadata['subject'] = ''
|
82
|
+
values[0].match(/Keywords/) and metadata['keywords'] == '' ? metadata['keywords'] = values[1]: metadata['keywords'] =''
|
83
|
+
values[0].match(/Title/) and metadata['title'] == '' ? metadata['title'] = values[1]: metadata['title'] =''
|
84
|
+
end
|
85
|
+
return metadata
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
#
|
90
|
+
# Read user input
|
91
|
+
#
|
92
|
+
def readUserInput(textstring = 'Enter value: ')
|
93
|
+
return ask textstring
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
#
|
98
|
+
# Identify a date
|
99
|
+
# Function takes a string and tries to identify a date in there.
|
100
|
+
# returns false if no date could be identified
|
101
|
+
# otherwise the date is returned in the format as
|
102
|
+
#
|
103
|
+
# YYYY:MM:DD HH:mm:ss
|
104
|
+
#
|
105
|
+
# For missing time values zero is assumed
|
106
|
+
#
|
107
|
+
def identifyDate(datestring)
|
108
|
+
identifiedDate = ''
|
109
|
+
year = '[1-2][90][0-9][0-9]'
|
110
|
+
month = '0[0-9]|10|11|12'
|
111
|
+
day = '[1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1]'
|
112
|
+
hour = '[0-1][0-9]|2[0-3]|[1-9]'
|
113
|
+
minute = '[0-5][0-9]'
|
114
|
+
second = '[0-5][0-9]'
|
115
|
+
case datestring
|
116
|
+
when /^(#{year})(#{month})(#{day})$/
|
117
|
+
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' 00:00:00'
|
118
|
+
when /^(#{year})(#{month})(#{day})(#{hour})(#{minute})(#{second})$/
|
119
|
+
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' ' + $4 + ':' + $5 + ':' + $6
|
120
|
+
when /^(#{year})[\:|\.|\-](#{month})[\:|\.|\-](#{day})\s(#{hour})[\:](#{minute})[\:](#{second})$/
|
121
|
+
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' ' + $4 + ':' + $5 + ':' + $6
|
122
|
+
when /^(#{year})[\:|\.|\-](#{month})[\:|\.|\-](#{day})$/
|
123
|
+
day = "%02d" % $3
|
124
|
+
month = "%02d" % $2
|
125
|
+
identifiedDate = $1 + ':' + month + ':' + day + ' 00:00:00'
|
126
|
+
else
|
127
|
+
identifiedDate = false
|
128
|
+
end
|
129
|
+
return identifiedDate
|
130
|
+
end
|
data/lib/pdfmd/rename.rb
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
#
|
2
|
+
# Thor command 'rename'
|
3
|
+
#
|
4
|
+
# TODO: Define outputdir from Hiera
|
5
|
+
# TODO: Add option for copy when renaming
|
6
|
+
# TODO: Add option to create outputdir if not existing
|
7
|
+
# TODO: Define option to create outputdir via Hiera
|
8
|
+
#
|
9
|
+
filename = ENV.fetch('PDFMD_FILENAME')
|
10
|
+
allkeywords = ENV.fetch('PDFMD_ALLKEYWORDS')
|
11
|
+
outputdir = ENV.fetch('PDFMD_OUTPUTDIR') == 'false' ? false : ENV.fetch('PDFMD_OUTPUTDIR')
|
12
|
+
dryrun = ENV.fetch('PDFMD_DRYRUN') == 'false' ? false : true
|
13
|
+
numberKeywords = ENV.fetch('PDFMD_NUMBERKEYWORDS').to_i
|
14
|
+
|
15
|
+
metadata = readMetadata(filename).each do |key,value|
|
16
|
+
|
17
|
+
# Check if the metadata is complete
|
18
|
+
if key.match(/author|subject|createdate|title/) and value.empty?
|
19
|
+
puts 'Missing value for ' + key
|
20
|
+
puts 'Abort'
|
21
|
+
exit 1
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
date = metadata['createdate'].gsub(/\ \d{2}\:\d{2}\:\d{2}.*$/,'').gsub(/\:/,'')
|
27
|
+
author = metadata['author'].gsub(/\./,'_').gsub(/\-/,'').gsub(/\s/,'_')
|
28
|
+
I18n.enforce_available_locales = false
|
29
|
+
author = I18n.transliterate(author) # Normalising
|
30
|
+
|
31
|
+
keywords_preface = ''
|
32
|
+
# This statement can probably be optimised
|
33
|
+
case metadata['title']
|
34
|
+
when /(Tilbudt|Angebot)/i
|
35
|
+
doktype = 'til'
|
36
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
37
|
+
when /Orderbekrefelse/i
|
38
|
+
doktype = 'odb'
|
39
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
40
|
+
when /faktura/i
|
41
|
+
doktype = 'fak'
|
42
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
43
|
+
when /order/i
|
44
|
+
doktype = 'ord'
|
45
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
46
|
+
when /(kontrakt|avtale|vertrag|contract)/i
|
47
|
+
doktype = 'avt'
|
48
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
49
|
+
when /kvittering/i
|
50
|
+
doktype = 'kvi'
|
51
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
52
|
+
when /manual/i
|
53
|
+
doktype = 'man'
|
54
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
55
|
+
when /(billett|ticket)/i
|
56
|
+
doktype = 'bil'
|
57
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
58
|
+
when /(informasjon|information)/i
|
59
|
+
doktype = 'inf'
|
60
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
61
|
+
else
|
62
|
+
doktype = 'dok'
|
63
|
+
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
64
|
+
end
|
65
|
+
if not metadata['keywords'].empty?
|
66
|
+
keywords_preface == '' ? keywords = '' : keywords = keywords_preface
|
67
|
+
keywordsarray = metadata['keywords'].split(',')
|
68
|
+
|
69
|
+
#
|
70
|
+
# Sort array
|
71
|
+
#
|
72
|
+
keywordssorted = Array.new
|
73
|
+
keywordsarray.each_with_index do |value,index|
|
74
|
+
value = value.lstrip.chomp
|
75
|
+
value = value.gsub(/(Faktura|Rechnungs)(nummer)? /i,'fak')
|
76
|
+
value = value.gsub(/(Kunde)(n)?(nummer)? /i,'kdn')
|
77
|
+
value = value.gsub(/(Kunde)(n)?(nummer)?-/i,'kdn')
|
78
|
+
value = value.gsub(/(Ordre|Bestellung)(s?nummer)? /i,'ord')
|
79
|
+
value = value.gsub(/(Kvittering|Quittung)(snummer)? /i,'kvi')
|
80
|
+
value = value.gsub(/\s/,'_')
|
81
|
+
value = value.gsub(/\//,'_')
|
82
|
+
keywordsarray[index] = value
|
83
|
+
if value.match(/^(fak|kdn|ord|kvi)/)
|
84
|
+
keywordssorted.insert(0, value)
|
85
|
+
else
|
86
|
+
keywordssorted.push(value)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
counter = 0
|
91
|
+
keywordssorted.each_with_index do |value,index|
|
92
|
+
|
93
|
+
# Exit condition limits the number of keywords used in the filename
|
94
|
+
# unless all keywords shall be added
|
95
|
+
if not allkeywords.empty?
|
96
|
+
counter > numberKeywords-1 ? break : counter = counter + 1
|
97
|
+
end
|
98
|
+
if value.match(/(kvi|fak|ord|kdn)/i)
|
99
|
+
keywords == '' ? keywords = '-' + value : keywords = value + '-' + keywords
|
100
|
+
else
|
101
|
+
keywords == '' ? keywords = '-' + value : keywords.concat('-' + value)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
# Normalise the keywords as well
|
105
|
+
#
|
106
|
+
I18n.enforce_available_locales = false
|
107
|
+
keywords = I18n.transliterate(keywords)
|
108
|
+
|
109
|
+
# There are no keywords
|
110
|
+
# Rare, but it happens
|
111
|
+
else
|
112
|
+
|
113
|
+
# There are no keywords.
|
114
|
+
# we are using the title and the subject
|
115
|
+
if keywords_preface != ''
|
116
|
+
keywords = keywords_preface
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
extension = 'pdf'
|
121
|
+
if keywords != nil and keywords[0] != '-'
|
122
|
+
keywords = '-' + keywords
|
123
|
+
end
|
124
|
+
keywords == nil ? keywords = '' : ''
|
125
|
+
newFilename = date + '-' +
|
126
|
+
author + '-' +
|
127
|
+
doktype +
|
128
|
+
keywords + '.' +
|
129
|
+
extension
|
130
|
+
|
131
|
+
# Output directory checks
|
132
|
+
if outputdir
|
133
|
+
if not File.exist?(outputdir)
|
134
|
+
puts "Error: output dir '#{outputdir}' not found. Abort."
|
135
|
+
exit 1
|
136
|
+
end
|
137
|
+
else
|
138
|
+
# Output to Inputdir
|
139
|
+
outputdir = File.dirname(filename)
|
140
|
+
end
|
141
|
+
|
142
|
+
if not dryrun and filename != newFilename.downcase
|
143
|
+
`mv -v '#{filename}' '#{outputdir}/#{newFilename.downcase}'`
|
144
|
+
else
|
145
|
+
puts filename + "\n => " + newFilename.downcase
|
146
|
+
end
|
data/lib/pdfmd/show.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
filename = ENV.fetch('PDFMD_FILENAME')
|
2
|
+
optTag = ENV['PDFMD_TAGS'] || nil
|
3
|
+
optAll = ENV['PDFMD_ALL'] == 'true' ? true : nil
|
4
|
+
|
5
|
+
metadata = readMetadata(filename)
|
6
|
+
|
7
|
+
# Output all metatags
|
8
|
+
if optAll or optTag.nil?
|
9
|
+
|
10
|
+
puts "Author : " + metadata['author'].to_s
|
11
|
+
puts "Creator : " + metadata['creator'].to_s
|
12
|
+
puts "CreateDate : " + metadata['createdate'].to_s
|
13
|
+
puts "Subject : " + metadata['subject'].to_s
|
14
|
+
puts "Title : " + metadata['title'].to_s
|
15
|
+
puts "Keywords : " + metadata['keywords'].to_s
|
16
|
+
|
17
|
+
elsif not optTag.nil? # Output specific tag(s)
|
18
|
+
|
19
|
+
tags = optTag.split(',')
|
20
|
+
tags.each do |tag|
|
21
|
+
puts metadata[tag.downcase]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
data/lib/pdfmd/sort.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
inputDir = ENV.fetch('PDFMD_INPUTDIR')
|
2
|
+
|
3
|
+
require_relative('./methods.rb')
|
4
|
+
require 'fileutils'
|
5
|
+
|
6
|
+
opt_destination = ENV.fetch('PDFMD_DESTINATION')
|
7
|
+
opt_copy = ENV.fetch('PDFMD_COPY')
|
8
|
+
opt_log = ENV.fetch('PDFMD_LOG')
|
9
|
+
opt_interactive = ENV.fetch('PDFMD_INTERACTIVE')
|
10
|
+
|
11
|
+
hieraDefaults = queryHiera('pdfmd::config')
|
12
|
+
|
13
|
+
copyAction = opt_copy.empty? ? false : true
|
14
|
+
if opt_copy.nil? and hieraDefaults['sort']['copy'] == true
|
15
|
+
copyAction = true
|
16
|
+
puts 'Setting action to copy based on Hiera.'
|
17
|
+
end
|
18
|
+
|
19
|
+
interactiveAction = opt_interactive.empty? ? false : true
|
20
|
+
if opt_interactive.empty? and hieraDefaults['sort']['interactive'] == true
|
21
|
+
interactiveAction = true
|
22
|
+
puts 'Setting interactive to true based on Hiera.'
|
23
|
+
end
|
24
|
+
|
25
|
+
# Fetch alternate destination from hiera if available
|
26
|
+
destination = opt_destination
|
27
|
+
if destination.nil? or destination == ''
|
28
|
+
|
29
|
+
hieraHash = queryHiera('pdfmd::config')
|
30
|
+
if !hieraHash['sort']['destination'].nil?
|
31
|
+
destination = hieraHash['sort']['destination']
|
32
|
+
else
|
33
|
+
puts 'No information about destination found.'
|
34
|
+
puts 'Set parameter -d or configure hiera.'
|
35
|
+
puts 'Abort.'
|
36
|
+
exit 1
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
logenable = opt_log
|
42
|
+
logfile = !hieraDefaults['sort']['logfile'].nil? ? hieraDefaults['sort']['logfile'] : Dir.pwd.chomp('/') + '/' + Pathname.new(__FILE__).basename + '.log'
|
43
|
+
|
44
|
+
# Check that logfilepath exists and is writeable
|
45
|
+
if !File.writable?(logfile)
|
46
|
+
puts "Cannot write '#{logfile}. Abort."
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
logenable ? $logger = Logger.new(logfile) : ''
|
50
|
+
|
51
|
+
# Input validation
|
52
|
+
!File.exist?(inputDir) ? abort('Input directory does not exist. Abort.'): ''
|
53
|
+
File.directory?(inputDir) ? '' : abort('Input is a single file')
|
54
|
+
File.file?(destination) ? abort("Output '#{destination}' is an existing file. Cannot create directory with the same name. Abort") : ''
|
55
|
+
unless File.directory?(destination)
|
56
|
+
FileUtils.mkdir_p(destination)
|
57
|
+
$logger.info("Destination '#{destination}' has been created.")
|
58
|
+
end
|
59
|
+
|
60
|
+
# Iterate through all files
|
61
|
+
Dir[inputDir.chomp('/') + '/*.pdf'].sort.each do |file|
|
62
|
+
|
63
|
+
if interactiveAction
|
64
|
+
answer = readUserInput("Process '#{file}' ([y]/n): ")
|
65
|
+
answer = answer.empty? ? 'y' : answer
|
66
|
+
answer.match(/y/) ? '' : next
|
67
|
+
end
|
68
|
+
|
69
|
+
metadata = readMetadata(file)
|
70
|
+
if metadata['author'] and not metadata['author'].empty?
|
71
|
+
author = metadata['author'].gsub(' ','_').gsub('.','_')
|
72
|
+
I18n.enforce_available_locales = false # Serialize special characters
|
73
|
+
author = I18n.transliterate(author).downcase
|
74
|
+
folderdestination = destination.chomp('/') + '/' + author
|
75
|
+
|
76
|
+
unless File.directory?(folderdestination)
|
77
|
+
FileUtils.mkdir_p(folderdestination)
|
78
|
+
logenable ? $logger.info("Folder '#{folderdestination}' has been created."): ''
|
79
|
+
end
|
80
|
+
|
81
|
+
filedestination = destination.chomp('/') + '/' + author + '/' + Pathname.new(file).basename.to_s
|
82
|
+
|
83
|
+
# Final check before touching the filesystem
|
84
|
+
if not File.exist?(filedestination)
|
85
|
+
$logger.info("File '#{file}' => '#{filedestination}'")
|
86
|
+
|
87
|
+
# Move/Copy the file
|
88
|
+
if copyAction
|
89
|
+
FileUtils.cp(file, filedestination)
|
90
|
+
else
|
91
|
+
FileUtils.mv(file,filedestination)
|
92
|
+
end
|
93
|
+
|
94
|
+
else
|
95
|
+
logenable ? $logger.warn("File '#{filedestination}' already exists. Ignoring.") : ''
|
96
|
+
end
|
97
|
+
else
|
98
|
+
logenable ? $logger.warn("Missing tag 'Author' for file '#{file}'. Skipping.") : (puts "Missing tag 'Author' for file '#{file}'. Skipping")
|
99
|
+
end
|
100
|
+
end
|