pdfmd 1.4.0 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/LICENSE +22 -0
- data/README.md +108 -0
- data/bin/pdfmd +1 -942
- data/lib/pdfmd.rb +531 -0
- data/lib/pdfmd/check.rb +10 -0
- data/lib/pdfmd/edit.rb +40 -0
- data/lib/pdfmd/explain.author.md +3 -0
- data/lib/pdfmd/explain.createdate.md +6 -0
- data/lib/pdfmd/explain.hiera.md +18 -0
- data/lib/pdfmd/explain.keywords.md +9 -0
- data/lib/pdfmd/explain.rb +17 -0
- data/lib/pdfmd/explain.subject.md +8 -0
- data/lib/pdfmd/explain.title.md +5 -0
- data/lib/pdfmd/methods.rb +130 -0
- data/lib/pdfmd/rename.rb +146 -0
- data/lib/pdfmd/show.rb +24 -0
- data/lib/pdfmd/sort.rb +100 -0
- data/pdfmd.gemspec +27 -0
- metadata +83 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 798bed0d41462e97707960d925ec633109b5bb67
|
4
|
+
data.tar.gz: f678724174da3a5a3b4baa98615efb5dde3395b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4fde3094c09a5d5631b37beffbc16b4b3c731f16a58af86b3b1eeab992aa953dac3a74e71ab683dd1f6a3eb90bb0288ae6dd18798a7242a2263707b9640b769
|
7
|
+
data.tar.gz: 3f30cd43417346980587346f5cde64537d45b9fe346c093568510e66414897221e57af7d936dadad41882c591910dc3ec4a2a4736de5e0931d6c1701e44bea8d
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# Version 1.4.1
|
2
|
+
- Bugfix: When in interactive sorting, choosing the default and 'y' did not have the same effect.
|
3
|
+
- Renamed paramter '--:all-keywords' to '--allkeywords' (rename method).
|
4
|
+
- Bugfix: Method 'rename', Renaming a file puts it in the input directory, not in the current working directory.
|
5
|
+
- Bugfix: Method 'show', Listing single tags works now.
|
6
|
+
- Moved 'explain'-text into separate files.
|
7
|
+
- Moved commands into separate files under './lib/pdfmd'.
|
8
|
+
- Bugfix: Method 'show', Paramter '-t' is now case insensitive
|
9
|
+
- Added option 'outputdir' to command 'rename'.
|
10
|
+
|
11
|
+
# Version 1.4.0
|
12
|
+
- Added Hiera support for 'sort' command to define some standards (less typing)
|
13
|
+
- Added interactive parameter to 'sort' command
|
14
|
+
- Updated documentation
|
15
|
+
|
16
|
+
# Version 1.3.2
|
17
|
+
- Moved the script to right place in the GEM (/bin)
|
18
|
+
- Readme Updated
|
19
|
+
- Moved Changelog into separate file
|
20
|
+
|
21
|
+
# Version 1.3.1
|
22
|
+
- Corrected Email address (Gemspec)
|
23
|
+
- Corrected website address (Gemspec)
|
24
|
+
- No changes to script
|
25
|
+
|
26
|
+
# Version 1.3
|
27
|
+
- Small bugfix about special characters in filenames (author).
|
28
|
+
- Bugfix for the tag 'createdate' written as 'CreateDate' which did not
|
29
|
+
take the date then.
|
30
|
+
- Removed inactive code.
|
31
|
+
- Added paramter 'version'
|
32
|
+
|
33
|
+
# Version 1.2
|
34
|
+
- Small bugfix with the sort function and the logfile being created.
|
35
|
+
|
36
|
+
# Version 1.1
|
37
|
+
- Added Function to sort pdf documents into a directory structure based on
|
38
|
+
the author of the document.
|
39
|
+
- Added dependency 'pathname'
|
40
|
+
- Added dependency 'logger'
|
41
|
+
- Added dependency 'i18n'
|
42
|
+
- Added method 'sort'
|
43
|
+
- Changing a tag will now output the old value in the edit dialog.
|
44
|
+
- Updated documentation and descriptions of methods
|
45
|
+
|
46
|
+
# Version 1.0
|
47
|
+
- Added documentation in long description of the commands
|
48
|
+
- Added method "explain" for further information
|
49
|
+
|
50
|
+
# Version 0.9
|
51
|
+
- Added 'rename' option to edit metatags
|
52
|
+
- Fixed some output strings
|
53
|
+
|
54
|
+
# Version 0.x
|
55
|
+
- All other stuff
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Micronarrativ
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
# pdfmd
|
2
|
+
Pdf Meta data managing script.
|
3
|
+
|
4
|
+
I use the script `pdfmd.rb`/pdfmetadata (with a slightly different name) to manage my PDF documents and keep the naming in line.
|
5
|
+
Hidden deep in the directory structure of my disks I can quickly find the
|
6
|
+
documents I need with a quick `find /document/path -type f -iname
|
7
|
+
'*<keyword>*'` which matches some string in the filename.
|
8
|
+
|
9
|
+
# Requirements
|
10
|
+
|
11
|
+
Although the requirements are listed in the script itself as well (header documentation!), here they are again:
|
12
|
+
|
13
|
+
## Ruby Gems
|
14
|
+
|
15
|
+
1. [thor](https://rubygems.org/gems/thor)
|
16
|
+
2. [highline/import](https://rubygems.org/gems/highline)
|
17
|
+
3. [fileutils](https://rubygems.org/gems/fileutils)
|
18
|
+
4. [i18n](https://rubygems.org/gems/i18n)
|
19
|
+
5. [logger]()
|
20
|
+
6. [pathname]()
|
21
|
+
|
22
|
+
Install the requirements as usual
|
23
|
+
|
24
|
+
```
|
25
|
+
$ gem install thor
|
26
|
+
$ gem install highline
|
27
|
+
$ gem install fileutils
|
28
|
+
$ gem install i18n
|
29
|
+
$ gem install pathname
|
30
|
+
$ gem install logger
|
31
|
+
```
|
32
|
+
|
33
|
+
## Applications
|
34
|
+
|
35
|
+
1. [exiftools](http://www.sno.phy.queensu.ca/~phil/exiftool/)
|
36
|
+
|
37
|
+
This is usually already in your os repositories
|
38
|
+
|
39
|
+
```
|
40
|
+
$ sudo yum install Perl-Image-Exiftool
|
41
|
+
```
|
42
|
+
|
43
|
+
2. [hiera](https://rubygems.org/gems/hiera) can be optionally used to configure
|
44
|
+
some default settings (instead of a configuration file).
|
45
|
+
|
46
|
+
```
|
47
|
+
$ gem install hiera
|
48
|
+
```
|
49
|
+
|
50
|
+
# Usage
|
51
|
+
|
52
|
+
The usage is quite simple:
|
53
|
+
|
54
|
+
```
|
55
|
+
$ ./pdfmd.rb [show|edit|rename|sort] [options] <filename>
|
56
|
+
```
|
57
|
+
|
58
|
+
The interface has been setup using Thor.
|
59
|
+
So in order to get more information just run the required _help_ command:
|
60
|
+
|
61
|
+
```
|
62
|
+
# Show general possibilities:
|
63
|
+
$ pdfmd.rb
|
64
|
+
|
65
|
+
# Show more information about <action>
|
66
|
+
$ pdfmd.rb help <action>
|
67
|
+
```
|
68
|
+
|
69
|
+
My usual workflow is like this:
|
70
|
+
|
71
|
+
```
|
72
|
+
$ pdfmd.rb show test.pdf
|
73
|
+
$ pdfmd.rb edit -t all test.pdf
|
74
|
+
...
|
75
|
+
$ pdfmd.rb rename test.pdf
|
76
|
+
$ mv 20150101-me-dok-testdocument.pdf /my/pdf/directory
|
77
|
+
```
|
78
|
+
|
79
|
+
There's an underlogic in the renaming and sorting of the files according to the metadata. Make sure you read at least the help-information before you use it.
|
80
|
+
|
81
|
+
|
82
|
+
__HINT__: Before you start using the script, make sure you have a backup of your files or you know what you're doing. If you loose information/files I will not be able to help you.
|
83
|
+
|
84
|
+
# Hiera
|
85
|
+
|
86
|
+
In order for Hiera to provide (default) configuration data, setup a configuration hash e.g. inside the YAML backend:
|
87
|
+
|
88
|
+
``` YAML
|
89
|
+
pdfmd::config:
|
90
|
+
sort:
|
91
|
+
destination : /data/tmp
|
92
|
+
copy : true
|
93
|
+
logfile : /var/log/pdfmd.log
|
94
|
+
interactive : false
|
95
|
+
```
|
96
|
+
|
97
|
+
Information about which hiera configuration settings are available can be either found in `pdfmd help <command>` or `pdfmd explain hiera`.
|
98
|
+
|
99
|
+
Test your hiera configuration with
|
100
|
+
|
101
|
+
``` bash
|
102
|
+
$ hiera pdfmd::config
|
103
|
+
```
|
104
|
+
|
105
|
+
# Contact
|
106
|
+
|
107
|
+
If you have improvements and suggestions -> let me know.
|
108
|
+
|
data/bin/pdfmd
CHANGED
@@ -1,943 +1,2 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
#
|
4
|
-
# == File: pdfmd.rb
|
5
|
-
#
|
6
|
-
# Show and edit Metadata of PDF files and rename the files accordingly.
|
7
|
-
#
|
8
|
-
# === Requirements
|
9
|
-
#
|
10
|
-
# ==== Ruby gems:
|
11
|
-
# - thor
|
12
|
-
# - highline/import
|
13
|
-
# - fileutils
|
14
|
-
# - i18n
|
15
|
-
# - pathname
|
16
|
-
# - logger
|
17
|
-
#
|
18
|
-
# ==== OS applications:
|
19
|
-
#
|
20
|
-
# - exiftools
|
21
|
-
#
|
22
|
-
# === Usage
|
23
|
-
#
|
24
|
-
# $ ./pdfmd <action> <parameter> file
|
25
|
-
#
|
26
|
-
# $ ./pdfmd help <action>
|
27
|
-
#
|
28
|
-
# An overview about the actions can be seen when running the script without
|
29
|
-
# any parameters
|
30
|
-
#
|
31
|
-
# Check and set metadata of PDF documents
|
32
|
-
#
|
33
|
-
# A complete set of metada contains
|
34
|
-
#
|
35
|
-
# * CreateDate
|
36
|
-
# * Title
|
37
|
-
# * Author
|
38
|
-
# * Subject
|
39
|
-
# * Keywords (optional)
|
40
|
-
#
|
41
|
-
# TODO: Include password protected PDF documents as well
|
42
|
-
# TODO: Fix broken PDF files automatically
|
43
|
-
# TODO: Enable logging in more functions than only "sort"
|
44
|
-
# TODO: Read this: http://lostechies.com/derickbailey/2011/04/29/writing-a-thor-application/
|
45
|
-
# TODO: ... and this: http://blog.paracode.com/2012/05/17/building-your-tools-with-thor/
|
46
|
-
# TODO: Create Gem: http://yehudakatz.com/2010/04/02/using-gemspecs-as-intended/
|
47
|
-
# gs \
|
48
|
-
# -o repaired.pdf \
|
49
|
-
# -sDEVICE=pdfwrite \
|
50
|
-
# -dPDFSETTINGS=/prepress \
|
51
|
-
# corrupted.pdf
|
52
|
-
#
|
53
|
-
# == Author
|
54
|
-
#
|
55
|
-
# Daniel Roos <daniel-git@micronarrativ.org>
|
56
|
-
# Source: https://github.com/Micronarrativ/ruby-pmd
|
57
|
-
#
|
58
|
-
require "thor"
|
59
|
-
require "highline/import"
|
60
|
-
require "fileutils"
|
61
|
-
require "i18n"
|
62
|
-
require 'pathname'
|
63
|
-
require 'logger'
|
64
|
-
|
65
|
-
VERSION = '1.4.0'
|
66
|
-
|
67
|
-
#
|
68
|
-
# Function to read the metadata from a given file
|
69
|
-
# hash readMetadata(string)
|
70
|
-
#
|
71
|
-
def readMetadata(pathFile = false)
|
72
|
-
metadata = Hash.new
|
73
|
-
metadata['keywords'] = ''
|
74
|
-
metadata['subject'] = ''
|
75
|
-
metadata['title'] = ''
|
76
|
-
metadata['author'] = ''
|
77
|
-
metadata['creator'] = ''
|
78
|
-
metadata['createdate'] = ''
|
79
|
-
if not File.file?(pathFile)
|
80
|
-
puts "Cannot access file #{pathFile}. Abort"
|
81
|
-
abort
|
82
|
-
end
|
83
|
-
|
84
|
-
# Fetch the Metada with the help of exiftools (unless something better is
|
85
|
-
# found
|
86
|
-
metaStrings = `exiftool '#{pathFile}' | egrep -i '^Creator\s+\:|^Author|Create Date|Subject|Keywords|Title'`
|
87
|
-
|
88
|
-
# Time to cherrypick the available data
|
89
|
-
entries = metaStrings.split("\n")
|
90
|
-
entries.each do |entry|
|
91
|
-
values = entry.split(" : ")
|
92
|
-
values[0].match(/Creator/) and metadata['creator'] == '' ? metadata['creator'] = values[1]: metadata['creator'] = ''
|
93
|
-
values[0].match(/Author/) and metadata['author'] == '' ? metadata['author'] = values[1]: metadata['author'] = ''
|
94
|
-
values[0].match(/Create Date/) and metadata['createdate'] == '' ? metadata['createdate'] = values[1]: metadata['createdate'] = ''
|
95
|
-
values[0].match(/Subject/) and metadata['subject'] == '' ? metadata['subject'] = values[1]: metadata['subject'] = ''
|
96
|
-
values[0].match(/Keywords/) and metadata['keywords'] == '' ? metadata['keywords'] = values[1]: metadata['keywords'] =''
|
97
|
-
values[0].match(/Title/) and metadata['title'] == '' ? metadata['title'] = values[1]: metadata['title'] =''
|
98
|
-
end
|
99
|
-
return metadata
|
100
|
-
end
|
101
|
-
|
102
|
-
|
103
|
-
#
|
104
|
-
# Query Hiera installation
|
105
|
-
# I don't give a sh** about cross platform at this point.
|
106
|
-
#
|
107
|
-
# Return the hash of the hiera values or false (if no hiera is found)
|
108
|
-
#
|
109
|
-
def queryHiera(keyword,facts = 'UNSET')
|
110
|
-
|
111
|
-
# Set default facts
|
112
|
-
facts == 'UNSET' ? facts = "fqdn=#{`hostname`}" : ''
|
113
|
-
|
114
|
-
# If hiera isn't found, return false
|
115
|
-
# otherwise return the hash
|
116
|
-
if !system('which hiera > /dev/null 2>&1')
|
117
|
-
puts 'Cannot find "hiera" command in $path.'
|
118
|
-
return false
|
119
|
-
else
|
120
|
-
return eval(`hiera #{keyword} #{facts}`)
|
121
|
-
end
|
122
|
-
|
123
|
-
end
|
124
|
-
|
125
|
-
|
126
|
-
#
|
127
|
-
# Set Keywords Preface based on title and subject
|
128
|
-
# If subject matches a number/character combination and contains no spaces,
|
129
|
-
# the preface will be combined with the doktype.
|
130
|
-
# If not: preface will contain the whole subject with dots and spaces being
|
131
|
-
# replaced with underscores
|
132
|
-
#
|
133
|
-
def setKeywordsPreface(metadata, doktype)
|
134
|
-
if metadata['subject'].match(/^\d+[^+s]+.*/)
|
135
|
-
return doktype + metadata['subject']
|
136
|
-
else
|
137
|
-
subject = metadata['subject']
|
138
|
-
|
139
|
-
# Take care of special characters
|
140
|
-
I18n.enforce_available_locales = false
|
141
|
-
subject = I18n.transliterate(metadata['subject'])
|
142
|
-
|
143
|
-
# Replace everything else
|
144
|
-
subject = subject.gsub(/[^a-zA-Z0-9]+/,'_')
|
145
|
-
return subject
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
#
|
150
|
-
# Read user input
|
151
|
-
#
|
152
|
-
def readUserInput(textstring = 'Enter value: ')
|
153
|
-
return ask textstring
|
154
|
-
end
|
155
|
-
|
156
|
-
#
|
157
|
-
# Identify a date
|
158
|
-
# Function takes a string and tries to identify a date in there.
|
159
|
-
# returns false if no date could be identified
|
160
|
-
# otherwise the date is returned in the format as
|
161
|
-
#
|
162
|
-
# YYYY:MM:DD HH:mm:ss
|
163
|
-
#
|
164
|
-
# For missing time values zero is assumed
|
165
|
-
#
|
166
|
-
def identifyDate(datestring)
|
167
|
-
identifiedDate = ''
|
168
|
-
year = '[1-2][90][0-9][0-9]'
|
169
|
-
month = '0[0-9]|10|11|12'
|
170
|
-
day = '[1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1]'
|
171
|
-
hour = '[0-1][0-9]|2[0-3]|[1-9]'
|
172
|
-
minute = '[0-5][0-9]'
|
173
|
-
second = '[0-5][0-9]'
|
174
|
-
case datestring
|
175
|
-
when /^(#{year})(#{month})(#{day})$/
|
176
|
-
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' 00:00:00'
|
177
|
-
when /^(#{year})(#{month})(#{day})(#{hour})(#{minute})(#{second})$/
|
178
|
-
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' ' + $4 + ':' + $5 + ':' + $6
|
179
|
-
when /^(#{year})[\:|\.|\-](#{month})[\:|\.|\-](#{day})\s(#{hour})[\:](#{minute})[\:](#{second})$/
|
180
|
-
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' ' + $4 + ':' + $5 + ':' + $6
|
181
|
-
when /^(#{year})[\:|\.|\-](#{month})[\:|\.|\-](#{day})$/
|
182
|
-
day = "%02d" % $3
|
183
|
-
month = "%02d" % $2
|
184
|
-
identifiedDate = $1 + ':' + month + ':' + day + ' 00:00:00'
|
185
|
-
else
|
186
|
-
identifiedDate = false
|
187
|
-
end
|
188
|
-
return identifiedDate
|
189
|
-
end
|
190
|
-
|
191
|
-
class DOC < Thor
|
192
|
-
|
193
|
-
|
194
|
-
#
|
195
|
-
# Show the current metadata tags
|
196
|
-
#
|
197
|
-
# TODO: format output as JSON and YAML
|
198
|
-
# TODO: Enable additional options
|
199
|
-
#
|
200
|
-
desc 'show', 'Show metadata of a file'
|
201
|
-
method_option :all, :type => :boolean, :aliases => '-a', :desc => 'Show all metatags', :default => false, :required => false
|
202
|
-
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Show specific tag(s), comma separated', :required => false
|
203
|
-
long_desc <<-LONGDESC
|
204
|
-
== General
|
205
|
-
|
206
|
-
Show metatags of a PDF document.
|
207
|
-
|
208
|
-
The following tags are being shown:
|
209
|
-
\x5 * Author
|
210
|
-
\x5 * Creator
|
211
|
-
\x5 * CreateDate
|
212
|
-
\x5 * Title
|
213
|
-
\x5 * Subject
|
214
|
-
\x5 * Keywords
|
215
|
-
|
216
|
-
== Parameters
|
217
|
-
|
218
|
-
--all, -a
|
219
|
-
\x5 Show all relevant metatags for a document.
|
220
|
-
|
221
|
-
Relevant tags are Author,Creator, CreateDate, Title, Subject, Keywords.
|
222
|
-
|
223
|
-
--tag, -t
|
224
|
-
\x5 Specify the metatag to show. The selected metatag must be one of the relevant tags. Other tags are ignored and nothing is returned.
|
225
|
-
|
226
|
-
== Example
|
227
|
-
|
228
|
-
# Show default metatags for a pdf document
|
229
|
-
\x5>CLI show <filename>
|
230
|
-
|
231
|
-
# Show default metatags for example.pdf
|
232
|
-
\x5>CLI show example.pdf
|
233
|
-
|
234
|
-
# Show value for metatag 'Author' for the file example.pdf
|
235
|
-
\x5>CLI show -t author example.pdf
|
236
|
-
|
237
|
-
# Show value for metatags 'Author','Title' for the file example.pdf
|
238
|
-
\x5>CLI show -t author,title example.pdf
|
239
|
-
|
240
|
-
LONGDESC
|
241
|
-
def show(filename)
|
242
|
-
metadata = readMetadata(filename)
|
243
|
-
|
244
|
-
# Output all metatags
|
245
|
-
if options[:all] or options[:tag].nil?
|
246
|
-
puts "Author : " + metadata['author'].to_s
|
247
|
-
puts "Creator : " + metadata['creator'].to_s
|
248
|
-
puts "CreateDate : " + metadata['createdate'].to_s
|
249
|
-
puts "Subject : " + metadata['subject'].to_s
|
250
|
-
puts "Title : " + metadata['title'].to_s
|
251
|
-
puts "Keywords : " + metadata['keywords'].to_s
|
252
|
-
|
253
|
-
# Ouput only specific tags
|
254
|
-
elsif not options[:tag].nil?
|
255
|
-
tags = options[:tag].split(',')
|
256
|
-
tags.each do |tag|
|
257
|
-
puts metadata[tag]
|
258
|
-
end
|
259
|
-
end
|
260
|
-
|
261
|
-
end
|
262
|
-
|
263
|
-
#
|
264
|
-
# Change a MetaTag Attribute
|
265
|
-
#
|
266
|
-
# TODO: keywords are added differently according to the documentation
|
267
|
-
# http://www.sno.phy.queensu.ca/~phil/exiftool/faq.html
|
268
|
-
desc 'edit', 'Edit Meta Tag(s)'
|
269
|
-
long_desc <<-LONGDESC
|
270
|
-
== General
|
271
|
-
|
272
|
-
Command will edit the metadata of a PDF document. Multiple values can be
|
273
|
-
specified or 'all'.
|
274
|
-
|
275
|
-
The command will invoke an interactive user input and request the values
|
276
|
-
for the metatag.
|
277
|
-
|
278
|
-
Additionally the file can be renamed at the end according to the new meta
|
279
|
-
tags. See `$ #{__FILE__} help rename` for details.
|
280
|
-
|
281
|
-
== Parameters
|
282
|
-
|
283
|
-
--tag, -t
|
284
|
-
\x5 Names or list of names of Metatag fields to set, separated by commata.
|
285
|
-
|
286
|
-
--rename, -r
|
287
|
-
\x5 Rename file after updating the meta tag information according to the fields.
|
288
|
-
|
289
|
-
This parameter is identical to running `> CLI rename <filename>`
|
290
|
-
|
291
|
-
General example:
|
292
|
-
|
293
|
-
# Edit tag 'TAG' and set a new value interactive.
|
294
|
-
\x5>CLI edit -t TAG <filename>
|
295
|
-
|
296
|
-
# Edit tag 'Author' and set new value interactive.
|
297
|
-
\x5>CLI edit -t author example.pdf
|
298
|
-
|
299
|
-
# Edit mulitple Tags and set a new value.
|
300
|
-
\x5>CLI edit -t tag1,tag2,tag3 <filename>
|
301
|
-
|
302
|
-
|
303
|
-
== Multiple Tags
|
304
|
-
|
305
|
-
For setting multiple tags list the tags comma separated.
|
306
|
-
|
307
|
-
For setting all tags (Author, Title, Subject, CreateDate, Keywords) use the keyword 'all' as tagname.
|
308
|
-
|
309
|
-
# Set tags 'Author', 'Title', 'Subject' in example.pdf interactivly.
|
310
|
-
\x5>CLI edit -t author,title,subject example.pdf`
|
311
|
-
|
312
|
-
# Set tags 'Author', 'Title', 'Subject', 'CreateDate', 'Keywords' in
|
313
|
-
example.pdf interactive.
|
314
|
-
\x5>CLI edit -t all example.pdf
|
315
|
-
|
316
|
-
== Tag: CreateDate
|
317
|
-
|
318
|
-
In order to enter a value for the 'CreateDate' field, some internal matching is going on in order to make it easier and faster to enter dates and times.
|
319
|
-
|
320
|
-
The following formats are identified/matched:
|
321
|
-
|
322
|
-
\x5 yyyymmdd
|
323
|
-
\x5 yyyymmd
|
324
|
-
\x5 yyyymmddHHMMSS
|
325
|
-
\x5 yyyy-mm-dd HH:MM:SS
|
326
|
-
\x5 yyyy:mm:dd HH:MM:SS
|
327
|
-
\x5 yyyy.mm.dd HH:MM:SS
|
328
|
-
\x5 yyyy-mm-d
|
329
|
-
\x5 yyyy-mm-dd
|
330
|
-
\x5 yyyy.mm.d
|
331
|
-
\x5 yyyy.mm.dd
|
332
|
-
\x5 yyyy:mm:d
|
333
|
-
\x5 yyyy:mm:dd
|
334
|
-
|
335
|
-
\x5 - If HH:MM:SS or HHMMSS is not provided, those values are automatically set to zero.
|
336
|
-
\x5 - The output format of every timestamp is <yyyy:mm:dd HH:MM:SS>
|
337
|
-
\x5 - When providing and invalid date, the incorrect date is rejected and the user asked to provide the correct date.
|
338
|
-
|
339
|
-
== Rename file
|
340
|
-
|
341
|
-
In addition to setting the tags the current file can be renamed according to
|
342
|
-
the new metadata.
|
343
|
-
|
344
|
-
# Set tag 'Author' and rename file example.pdf
|
345
|
-
\x5> CLI edit -t author -r example.pdf
|
346
|
-
|
347
|
-
See `> CLI help rename` for details about renaming.
|
348
|
-
|
349
|
-
LONGDESC
|
350
|
-
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Name of the Tag(s) to Edit', :default => false, :required => true
|
351
|
-
method_option :rename, :type => :boolean, :aliases => '-r', :desc => 'Rename file after changing meta-tags', :default => false, :required => false
|
352
|
-
def edit(filename)
|
353
|
-
metadata = readMetadata(filename)
|
354
|
-
|
355
|
-
if options[:tag] == 'all'
|
356
|
-
tags = ['author','title','subject','createdate','keywords']
|
357
|
-
else
|
358
|
-
tags = options[:tag].split(',')
|
359
|
-
end
|
360
|
-
tags.each do |currentTag|
|
361
|
-
|
362
|
-
# Change the tag to something we can use here
|
363
|
-
puts "Current value: '#{metadata[currentTag.downcase]}'"
|
364
|
-
answer = readUserInput("Enter new value for #{currentTag} :")
|
365
|
-
if currentTag.downcase == 'createdate'
|
366
|
-
while not answer = identifyDate(answer)
|
367
|
-
puts 'Invalid date format'
|
368
|
-
answer = readUserInput("Enter new value for #{currentTag} :")
|
369
|
-
end
|
370
|
-
end
|
371
|
-
puts "Changing value for #{currentTag}: '#{metadata[currentTag]}' => #{answer}"
|
372
|
-
`exiftool -#{currentTag.downcase}='#{answer}' -overwrite_original '#{filename}'`
|
373
|
-
end
|
374
|
-
|
375
|
-
#
|
376
|
-
# If required, run the renaming task afterwards
|
377
|
-
# This is not pretty, but seems to be the only way to do this in THOR
|
378
|
-
#
|
379
|
-
if options[:rename]
|
380
|
-
puts `#{__FILE__} rename '#{filename}'`
|
381
|
-
end
|
382
|
-
|
383
|
-
end
|
384
|
-
|
385
|
-
#
|
386
|
-
# Check the metadata for the minium necessary tags
|
387
|
-
# See documentation at the top of this file for defailts
|
388
|
-
#
|
389
|
-
# void check(string)
|
390
|
-
desc 'check', 'Check Metadata for completeness'
|
391
|
-
long_desc <<-LONGDESC
|
392
|
-
== General
|
393
|
-
|
394
|
-
Show value of the following metatags of a PDF document:
|
395
|
-
|
396
|
-
- Author
|
397
|
-
\x5- Creator
|
398
|
-
\x5- CreateDate
|
399
|
-
\x5- Subject
|
400
|
-
\x5- Title
|
401
|
-
\x5- Keywords
|
402
|
-
|
403
|
-
== Example
|
404
|
-
|
405
|
-
# Show the values of the metatags for example.pdf
|
406
|
-
\x5>CLI show example.pdf
|
407
|
-
|
408
|
-
LONGDESC
|
409
|
-
def check(filename)
|
410
|
-
returnvalue = 0
|
411
|
-
readMetadata(filename).each do|key,value|
|
412
|
-
if key.match(/author|subject|createdate|title/) and value.empty?
|
413
|
-
puts 'Missing value: ' + key
|
414
|
-
returnvalue == 0 ? returnvalue = 1 : ''
|
415
|
-
end
|
416
|
-
end
|
417
|
-
exit returnvalue
|
418
|
-
end
|
419
|
-
|
420
|
-
#
|
421
|
-
# Explain fields and Metatags
|
422
|
-
# Show information about how they are used.
|
423
|
-
#
|
424
|
-
desc 'explain','Show more information about usuable Meta-Tags'
|
425
|
-
long_desc <<-LONGDESC
|
426
|
-
== General
|
427
|
-
|
428
|
-
Explain some terms used with the script.
|
429
|
-
|
430
|
-
== Example
|
431
|
-
|
432
|
-
# Show the available subjects
|
433
|
-
\x5>CLI explain
|
434
|
-
|
435
|
-
# Show information about the subject 'author'
|
436
|
-
\x5>CLI explain author
|
437
|
-
|
438
|
-
LONGDESC
|
439
|
-
def explain(term='')
|
440
|
-
|
441
|
-
case term
|
442
|
-
when ''
|
443
|
-
puts 'Available subjects:'
|
444
|
-
puts '- author'
|
445
|
-
puts '- createdate'
|
446
|
-
puts '- hiera'
|
447
|
-
puts '- keywords'
|
448
|
-
puts '- subject'
|
449
|
-
puts '- title'
|
450
|
-
puts ' '
|
451
|
-
puts "Run `$ #{__FILE__} explain <subject>` to get more details."
|
452
|
-
when 'author'
|
453
|
-
puts '[Author]'
|
454
|
-
puts ' The sender or creator of the document.'
|
455
|
-
when 'createdate'
|
456
|
-
puts '[CreateDate]'
|
457
|
-
puts ' Date of the document. This is not the date when the file was created, but'
|
458
|
-
puts ' the date found in the document or printed on the document.'
|
459
|
-
when 'title'
|
460
|
-
puts '[Title]'
|
461
|
-
puts ' General type of the document, e.g. Manual, Invoice.'
|
462
|
-
when 'subject'
|
463
|
-
puts '[Subject]'
|
464
|
-
puts ' What is the document about.'
|
465
|
-
puts ' For example:'
|
466
|
-
puts ' Manual: What is the manual about?'
|
467
|
-
puts ' Invoice: Invoice number?'
|
468
|
-
puts ' Contract: Contract number of Subject of the contract?'
|
469
|
-
puts ' Order: Ordernumber of the document?'
|
470
|
-
when 'keywords'
|
471
|
-
puts '[Keywords]'
|
472
|
-
puts ' Anything else that might be of interesst.'
|
473
|
-
puts ' In Orders the elements that have been orders. Contracts might contain the'
|
474
|
-
puts ' Names and adress of the involved parties.'
|
475
|
-
puts ' '
|
476
|
-
puts ' When writing Invoices with their numbers, these will be automatically be '
|
477
|
-
puts ' picked up and can be integrated in the filename, e.g. "Invoicenumber 12334'
|
478
|
-
when 'hiera'
|
479
|
-
puts 'Information about hiera: https://docs.puppetlabs.com/hiera/1/index.html'
|
480
|
-
puts ''
|
481
|
-
puts 'Installation:'
|
482
|
-
puts ' $ gem install hiera'
|
483
|
-
puts ''
|
484
|
-
puts ''
|
485
|
-
puts 'Configure default settings in hiera:'
|
486
|
-
puts ''
|
487
|
-
puts ' YAML'
|
488
|
-
puts ' ---'
|
489
|
-
puts ' pdfmd::config:'
|
490
|
-
puts ' sort:'
|
491
|
-
puts ' destination : /tmp/output'
|
492
|
-
puts ' action : copy'
|
493
|
-
puts ' logfile : /var/log/pdfmd.log'
|
494
|
-
puts ''
|
495
|
-
end
|
496
|
-
|
497
|
-
end
|
498
|
-
|
499
|
-
#
|
500
|
-
# Sort the files into directories based on the author
|
501
|
-
#
|
502
|
-
desc 'sort','Sort files into directories sorted by Author'
|
503
|
-
long_desc <<-LONGDESC
|
504
|
-
== General
|
505
|
-
|
506
|
-
Will sort pdf documents into subdirectories according to the value of their
|
507
|
-
tag 'author'.
|
508
|
-
|
509
|
-
When using this action a logfile with all actions will be generated in the
|
510
|
-
current working directory with the same name as the script and the ending
|
511
|
-
'.log'. This can be disabled with the parameter 'log' if required.
|
512
|
-
|
513
|
-
If a document does not have an entry in the meta tag 'author', the file will
|
514
|
-
not be processed. This can be seen in the output of the logfile as well.
|
515
|
-
|
516
|
-
=== Parameters
|
517
|
-
|
518
|
-
[*destination|d*]
|
519
|
-
\x5 Speficy the root output directory to where the folderstructure is being created.
|
520
|
-
|
521
|
-
This parameter is required if hiera is not configured.
|
522
|
-
|
523
|
-
This parameter overwrites the hiera defaults
|
524
|
-
|
525
|
-
[*copy|c*]
|
526
|
-
\x5 Copy the files instead of moving them.
|
527
|
-
|
528
|
-
[*log|l*]
|
529
|
-
\x5 Disable/Enable the logging.
|
530
|
-
\x5 Default: enabled.
|
531
|
-
|
532
|
-
[*interactive|i*]
|
533
|
-
\x5 Disable/Enable interactive sorting. This will ask for confirmation for
|
534
|
-
\x5 each sorting action.
|
535
|
-
\x5 Default: disabled.
|
536
|
-
|
537
|
-
=== Replacement rules
|
538
|
-
|
539
|
-
The subdirectories for the documents are generated from the values in the
|
540
|
-
tag 'author' of each document.
|
541
|
-
|
542
|
-
In order to ensure a clean directory structure, there are certain rules
|
543
|
-
for altering the values.
|
544
|
-
\x5 1. Whitespaces are replaced by underscores.
|
545
|
-
\x5 2. Dots are replaced by underscores.
|
546
|
-
\x5 3. All letters are converted to their lowercase version.
|
547
|
-
\x5 4. Special characters are serialized
|
548
|
-
|
549
|
-
=== Hiera configuration
|
550
|
-
|
551
|
-
Set the default values mentioned below as sub-hash of the main configuration:
|
552
|
-
|
553
|
-
YAML
|
554
|
-
\x5sort:
|
555
|
-
\x5 key: value
|
556
|
-
|
557
|
-
=== Hiera defaults
|
558
|
-
|
559
|
-
The following values can be influenced by the hiera configuration in the
|
560
|
-
section 'sort'. Commandline parameter will overwrite the defaults coming
|
561
|
-
from hiera unless otherwise notet.
|
562
|
-
|
563
|
-
[*copy*]
|
564
|
-
\x5 If set to true copies the files from the source directory instead of moving them.
|
565
|
-
|
566
|
-
[*destination*]
|
567
|
-
\x5 Specifies the default output directory (root-directory). Either this or the
|
568
|
-
command line parameter for destinations must be set.
|
569
|
-
|
570
|
-
[*logfile*]
|
571
|
-
\x5 Specifies the default path for the logfile output. If this is not
|
572
|
-
specfied a logfile with the scriptname + '.log' will be created in the
|
573
|
-
current working directory.
|
574
|
-
|
575
|
-
[*interactive*]
|
576
|
-
\x5 If set to true, each file must be acknowledged to be processed when
|
577
|
-
running the script.
|
578
|
-
|
579
|
-
=== Example
|
580
|
-
|
581
|
-
This command does the following:
|
582
|
-
\x5 1. Take all pdf documents in the subdirectory ./documents.
|
583
|
-
\x5 2. Create the output folder structure in `/tmp/test/`.
|
584
|
-
\x5 3. Copy the files instead of moving them.
|
585
|
-
\x5 4. Disable the logging.
|
586
|
-
\x5> CLI sort -d /tmp/test -c -l false ./documents
|
587
|
-
|
588
|
-
LONGDESC
|
589
|
-
method_option :destination, :aliases => '-d', :required => false, :type => :string, :desc => 'Defines the output directory'
|
590
|
-
method_option :copy, :aliases => '-c', :required => false, :type => :boolean, :desc => 'Copy files instead of moving them'
|
591
|
-
method_option :log, :aliases => '-l', :required => false, :type => :boolean, :desc => 'Enable/Disable creation of log files', :default => true
|
592
|
-
method_option :interactive, :aliases => '-i', :required => false, :type => :boolean, :desc => 'Enable/Disable interactive sort'
|
593
|
-
def sort(inputDir = '.')
|
594
|
-
|
595
|
-
hieraDefaults = queryHiera('pdfmd::config')
|
596
|
-
|
597
|
-
copyAction = options[:copy].nil? ? false : true
|
598
|
-
if options[:copy].nil? and hieraDefaults['sort']['copy'] == true
|
599
|
-
copyAction = true
|
600
|
-
puts 'Setting action to copy based on Hiera.'
|
601
|
-
end
|
602
|
-
|
603
|
-
interactiveAction = options[:interactive].nil? ? false : true
|
604
|
-
if options[:interactive].nil? and hieraDefaults['sort']['interactive'] == true
|
605
|
-
interactiveAction = true
|
606
|
-
puts 'Setting interactive to true based on Hiera.'
|
607
|
-
end
|
608
|
-
|
609
|
-
# Fetch alternate destination from hiera if available
|
610
|
-
destination = options[:destination]
|
611
|
-
if destination.nil?
|
612
|
-
|
613
|
-
hieraHash = queryHiera('pdfmd::config')
|
614
|
-
if !hieraHash['sort']['destination'].nil?
|
615
|
-
destination = hieraHash['sort']['destination']
|
616
|
-
else
|
617
|
-
puts 'No information about destination found.'
|
618
|
-
puts 'Set parameter -d or configure hiera.'
|
619
|
-
puts 'Abort.'
|
620
|
-
exit 1
|
621
|
-
end
|
622
|
-
|
623
|
-
end
|
624
|
-
|
625
|
-
logenable = options[:log]
|
626
|
-
logfile = !hieraHash['sort']['logfile'].nil? ? hieraHash['sort']['logfile'] : Dir.pwd.chomp('/') + '/' + Pathname.new(__FILE__).basename + '.log'
|
627
|
-
|
628
|
-
# Check that logfilepath exists and is writeable
|
629
|
-
if !File.writable?(logfile)
|
630
|
-
puts "Cannot write '#{logfile}. Abort."
|
631
|
-
exit 1
|
632
|
-
end
|
633
|
-
logenable ? $logger = Logger.new(logfile) : ''
|
634
|
-
|
635
|
-
# Input validation
|
636
|
-
!File.exist?(inputDir) ? abort('Input directory does not exist. Abort.'): ''
|
637
|
-
File.directory?(inputDir) ? '' : abort('Input is a single file')
|
638
|
-
File.file?(destination) ? abort("Output '#{destination}' is an existing file. Cannot create directory with the same name. Abort") : ''
|
639
|
-
unless File.directory?(destination)
|
640
|
-
FileUtils.mkdir_p(destination)
|
641
|
-
$logger.info("Destination '#{destination}' has been created.")
|
642
|
-
end
|
643
|
-
|
644
|
-
# Iterate through all files
|
645
|
-
Dir[inputDir.chomp('/') + '/*.pdf'].sort.each do |file|
|
646
|
-
|
647
|
-
if interactiveAction
|
648
|
-
answer = readUserInput("Process '#{file}' ([y]/n): ")
|
649
|
-
answer.empty? ? 'y' : next
|
650
|
-
end
|
651
|
-
|
652
|
-
metadata = readMetadata(file)
|
653
|
-
if metadata['author'] and not metadata['author'].empty?
|
654
|
-
author = metadata['author'].gsub(' ','_').gsub('.','_')
|
655
|
-
I18n.enforce_available_locales = false # Serialize special characters
|
656
|
-
author = I18n.transliterate(author).downcase
|
657
|
-
folderdestination = destination.chomp('/') + '/' + author
|
658
|
-
|
659
|
-
unless File.directory?(folderdestination)
|
660
|
-
FileUtils.mkdir_p(folderdestination)
|
661
|
-
logenable ? $logger.info("Folder '#{folderdestination}' has been created."): ''
|
662
|
-
end
|
663
|
-
|
664
|
-
filedestination = destination.chomp('/') + '/' + author + '/' + Pathname.new(file).basename.to_s
|
665
|
-
|
666
|
-
# Final check before touching the filesystem
|
667
|
-
if not File.exist?(filedestination)
|
668
|
-
$logger.info("File '#{file}' => '#{filedestination}'")
|
669
|
-
|
670
|
-
# Move/Copy the file
|
671
|
-
if copyAction
|
672
|
-
FileUtils.cp(file, filedestination)
|
673
|
-
else
|
674
|
-
FileUtils.mv(file,filedestination)
|
675
|
-
end
|
676
|
-
|
677
|
-
else
|
678
|
-
logenable ? $logger.warn("File '#{filedestination}' already exists. Ignoring.") : ''
|
679
|
-
end
|
680
|
-
else
|
681
|
-
logenable ? $logger.warn("Missing tag 'Author' for file '#{file}'. Skipping.") : (puts "Missing tag 'Author' for file '#{file}'. Skipping")
|
682
|
-
end
|
683
|
-
end
|
684
|
-
|
685
|
-
end
|
686
|
-
|
687
|
-
#
|
688
|
-
# Rename the file according to the Metadata
|
689
|
-
#
|
690
|
-
# Scheme: YYYYMMDD-author-subject-keywords.extension
|
691
|
-
desc 'rename', 'Rename the file according to Metadata'
|
692
|
-
long_desc <<-LONGDESC
|
693
|
-
== General
|
694
|
-
|
695
|
-
Rename a file with the meta tags in the document.
|
696
|
-
|
697
|
-
== Parameter
|
698
|
-
|
699
|
-
--dry-run, -n
|
700
|
-
\x5 Simulate the renaming process and show the result without changing the file.
|
701
|
-
|
702
|
-
--all-keywords, -a
|
703
|
-
\x5 Use all keywords from the meta information in the file name and ignore the limit.
|
704
|
-
|
705
|
-
--keywwords, -k
|
706
|
-
\x5 Set the number of keywords used in the filename to a new value.
|
707
|
-
\x5 Default: 3
|
708
|
-
|
709
|
-
--outputdir, -o
|
710
|
-
\x5 Not implemented yet. Default output dir for the renamed file is the source directory.
|
711
|
-
|
712
|
-
== Example
|
713
|
-
|
714
|
-
# Rename the file according to the metatags
|
715
|
-
\x5> CLI rename <filename>
|
716
|
-
|
717
|
-
# Rename example.pdf according to the metatags
|
718
|
-
\x5> CLI rename example.pdf
|
719
|
-
|
720
|
-
# Simulate renaming example.pdf according to the metatags (dry-run)
|
721
|
-
\x5> CLI rename -n example.pdf
|
722
|
-
|
723
|
-
== Rules
|
724
|
-
|
725
|
-
There are some rules regarding how documents are being renamed
|
726
|
-
|
727
|
-
Rule 1: All documents have the following filenaming structure:
|
728
|
-
|
729
|
-
<yyyymmdd>-<author>-<type>-<additionalInformation>.<extension>
|
730
|
-
|
731
|
-
\x5 # <yyyymmdd>: Year, month and day identival to the meta information in the
|
732
|
-
document.
|
733
|
-
\x5 # <author>: Author of the document, identical to the meta information
|
734
|
-
in the document. Special characters and whitespaces are replaced.
|
735
|
-
\x5 # <type>: Document type, is being generated from the title field in the metadata of the document. Document type is a three character abbreviation following the following logic:
|
736
|
-
|
737
|
-
\x5 til => Tilbudt|Angebot
|
738
|
-
\x5 odb => Orderbekreftelse
|
739
|
-
\x5 fak => Faktura
|
740
|
-
\x5 ord => Order
|
741
|
-
\x5 avt => Kontrakt|Avtale|Vertrag|contract
|
742
|
-
\x5 kvi => Kvittering
|
743
|
-
\x5 man => Manual
|
744
|
-
\x5 bil => Billett|Ticket
|
745
|
-
\x5 inf => Informasjon|Information
|
746
|
-
\x5 dok => unknown
|
747
|
-
|
748
|
-
If the dokument type can not be determined automatically, it defaults to 'dok'.
|
749
|
-
|
750
|
-
# <additionalInformation>: Information generated from the metadata fields
|
751
|
-
'title', 'subject' and 'keywords'.
|
752
|
-
|
753
|
-
If 'Title' or 'Keywords' contains one of the following keywords, the will be replaced with the corresponding abbreviation followed by the specified value separated by a whitespace:
|
754
|
-
|
755
|
-
\x5 fak => Faktura|Fakturanummer|Rechnung|Rechnungsnummer
|
756
|
-
\x5 kdn => Kunde|Kundenummer|Kunde|Kundennummer
|
757
|
-
\x5 ord => Ordre|Ordrenummer|Bestellung|Bestellungsnummer
|
758
|
-
\x5 kvi => Kvittering|Kvitteringsnummer|Quittung|Quittungsnummer
|
759
|
-
|
760
|
-
Rule 2: The number of keywords used in the filename is defined by the parameter '-k'. See the section of that parameter for more details and the default value.
|
761
|
-
|
762
|
-
Rule 3: Keywords matching 'kvi','fak','ord','kdn' are prioritised.
|
763
|
-
|
764
|
-
Rule 4: Special characters and whitespaces are replaced:
|
765
|
-
|
766
|
-
\x5 ' ' => '_'
|
767
|
-
\x5 '/' => '_'
|
768
|
-
|
769
|
-
Rule 5: The new filename has only lowercase characters.
|
770
|
-
|
771
|
-
== Example (detailed)
|
772
|
-
|
773
|
-
# Example PDF with following MetaTags:
|
774
|
-
|
775
|
-
\x5 Filename : example.pdf
|
776
|
-
\x5 Author : John
|
777
|
-
\x5 Subject : new Product
|
778
|
-
\x5 Title : Presentation
|
779
|
-
\x5 CreateDate : 1970:01:01 01:00:00
|
780
|
-
\x5 Keywords : John Doe, Jane Doe, Mister Doe
|
781
|
-
|
782
|
-
# Renaming the file
|
783
|
-
\x5> CLI rename example.pdf
|
784
|
-
\x5 example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe.pdf
|
785
|
-
|
786
|
-
# Simulation to rename the file (no actual change)
|
787
|
-
\x5> CLI rename -n example.pdf
|
788
|
-
\x5example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe.pdf
|
789
|
-
|
790
|
-
# Renaming the file with all keywords
|
791
|
-
\x5> CLI rename -n -a example.pdf
|
792
|
-
\x5example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe-mister_doe.pdf
|
793
|
-
|
794
|
-
LONGDESC
|
795
|
-
method_option :dryrun, :type => :boolean, :aliases => '-n', :desc => 'Run without making changes', :default => false, :required => false
|
796
|
-
method_option ':all-keywords', :type => :boolean, :aliases => '-a', :desc => 'Add all keywords (no limit)', :default => false, :required => false
|
797
|
-
method_option :keywords, :type => :numeric, :aliases => '-k', :desc => 'Number of keywords to include (Default: 3)', :default => 3, :required => false
|
798
|
-
method_option :outputdir, :aliases => '-o', :type => :string, :desc => 'Speficy output directory', :default => :false, :required => :false
|
799
|
-
def rename(filename)
|
800
|
-
metadata = readMetadata(filename).each do |key,value|
|
801
|
-
|
802
|
-
# Check if the metadata is complete
|
803
|
-
if key.match(/author|subject|createdate|title/) and value.empty?
|
804
|
-
puts 'Missing value for ' + key
|
805
|
-
puts 'Abort'
|
806
|
-
exit 1
|
807
|
-
end
|
808
|
-
|
809
|
-
end
|
810
|
-
|
811
|
-
date = metadata['createdate'].gsub(/\ \d{2}\:\d{2}\:\d{2}.*$/,'').gsub(/\:/,'')
|
812
|
-
author = metadata['author'].gsub(/\./,'_').gsub(/\-/,'').gsub(/\s/,'_')
|
813
|
-
I18n.enforce_available_locales = false
|
814
|
-
author = I18n.transliterate(author) # Normalising
|
815
|
-
|
816
|
-
keywords_preface = ''
|
817
|
-
# This statement can probably be optimised
|
818
|
-
case metadata['title']
|
819
|
-
when /(Tilbudt|Angebot)/i
|
820
|
-
doktype = 'til'
|
821
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
822
|
-
when /Orderbekrefelse/i
|
823
|
-
doktype = 'odb'
|
824
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
825
|
-
when /faktura/i
|
826
|
-
doktype = 'fak'
|
827
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
828
|
-
when /order/i
|
829
|
-
doktype = 'ord'
|
830
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
831
|
-
when /(kontrakt|avtale|vertrag|contract)/i
|
832
|
-
doktype = 'avt'
|
833
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
834
|
-
when /kvittering/i
|
835
|
-
doktype = 'kvi'
|
836
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
837
|
-
when /manual/i
|
838
|
-
doktype = 'man'
|
839
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
840
|
-
when /(billett|ticket)/i
|
841
|
-
doktype = 'bil'
|
842
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
843
|
-
when /(informasjon|information)/i
|
844
|
-
doktype = 'inf'
|
845
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
846
|
-
else
|
847
|
-
doktype = 'dok'
|
848
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
849
|
-
end
|
850
|
-
if not metadata['keywords'].empty?
|
851
|
-
keywords_preface == '' ? keywords = '' : keywords = keywords_preface
|
852
|
-
keywordsarray = metadata['keywords'].split(',')
|
853
|
-
|
854
|
-
#
|
855
|
-
# Sort array
|
856
|
-
#
|
857
|
-
keywordssorted = Array.new
|
858
|
-
keywordsarray.each_with_index do |value,index|
|
859
|
-
value = value.lstrip.chomp
|
860
|
-
value = value.gsub(/(Faktura|Rechnungs)(nummer)? /i,'fak')
|
861
|
-
value = value.gsub(/(Kunde)(n)?(nummer)? /i,'kdn')
|
862
|
-
value = value.gsub(/(Kunde)(n)?(nummer)?-/i,'kdn')
|
863
|
-
value = value.gsub(/(Ordre|Bestellung)(s?nummer)? /i,'ord')
|
864
|
-
value = value.gsub(/(Kvittering|Quittung)(snummer)? /i,'kvi')
|
865
|
-
value = value.gsub(/\s/,'_')
|
866
|
-
value = value.gsub(/\//,'_')
|
867
|
-
keywordsarray[index] = value
|
868
|
-
if value.match(/^(fak|kdn|ord|kvi)/)
|
869
|
-
keywordssorted.insert(0, value)
|
870
|
-
else
|
871
|
-
keywordssorted.push(value)
|
872
|
-
end
|
873
|
-
end
|
874
|
-
|
875
|
-
counter = 0
|
876
|
-
keywordssorted.each_with_index do |value,index|
|
877
|
-
|
878
|
-
# Exit condition limits the number of keywords used in the filename
|
879
|
-
# unless all keywords shall be added
|
880
|
-
if not options[':all-keywords']
|
881
|
-
counter > options[:keywords]-1 ? break : counter = counter + 1
|
882
|
-
end
|
883
|
-
if value.match(/(kvi|fak|ord|kdn)/i)
|
884
|
-
keywords == '' ? keywords = '-' + value : keywords = value + '-' + keywords
|
885
|
-
else
|
886
|
-
keywords == '' ? keywords = '-' + value : keywords.concat('-' + value)
|
887
|
-
end
|
888
|
-
end
|
889
|
-
# Normalise the keywords as well
|
890
|
-
#
|
891
|
-
I18n.enforce_available_locales = false
|
892
|
-
keywords = I18n.transliterate(keywords)
|
893
|
-
|
894
|
-
# There are no keywords
|
895
|
-
# Rare, but it happens
|
896
|
-
else
|
897
|
-
|
898
|
-
# There are no keywords.
|
899
|
-
# we are using the title and the subject
|
900
|
-
if keywords_preface != ''
|
901
|
-
keywords = keywords_preface
|
902
|
-
end
|
903
|
-
|
904
|
-
end
|
905
|
-
extension = 'pdf'
|
906
|
-
if keywords != nil and keywords[0] != '-'
|
907
|
-
keywords = '-' + keywords
|
908
|
-
end
|
909
|
-
keywords == nil ? keywords = '' : ''
|
910
|
-
newFilename = date + '-' +
|
911
|
-
author + '-' +
|
912
|
-
doktype +
|
913
|
-
keywords + '.' +
|
914
|
-
extension
|
915
|
-
|
916
|
-
# Output directory checks
|
917
|
-
if options[:outputdir]
|
918
|
-
#if not File.exist?(options[:outputdir])
|
919
|
-
# puts "Error: output dir '#{options[:outputdir]}' not found. Abort"
|
920
|
-
# exit 1
|
921
|
-
#end
|
922
|
-
end
|
923
|
-
|
924
|
-
if not options[:dryrun] and filename != newFilename.downcase
|
925
|
-
`mv -v '#{filename}' '#{newFilename.downcase}'`
|
926
|
-
else
|
927
|
-
puts filename + "\n => " + newFilename.downcase
|
928
|
-
end
|
929
|
-
end
|
930
|
-
|
931
|
-
#
|
932
|
-
# One parameter to show the current version
|
933
|
-
#
|
934
|
-
map %w[--version -v] => :__print_version
|
935
|
-
desc "--version, -v", 'Show the current script version'
|
936
|
-
def __print_version
|
937
|
-
puts VERSION
|
938
|
-
end
|
939
|
-
|
940
|
-
end
|
941
|
-
|
942
|
-
DOC.start
|
943
|
-
|
2
|
+
require 'pdfmd.rb'
|