pdfmd 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/LICENSE +22 -0
- data/README.md +108 -0
- data/bin/pdfmd +1 -942
- data/lib/pdfmd.rb +531 -0
- data/lib/pdfmd/check.rb +10 -0
- data/lib/pdfmd/edit.rb +40 -0
- data/lib/pdfmd/explain.author.md +3 -0
- data/lib/pdfmd/explain.createdate.md +6 -0
- data/lib/pdfmd/explain.hiera.md +18 -0
- data/lib/pdfmd/explain.keywords.md +9 -0
- data/lib/pdfmd/explain.rb +17 -0
- data/lib/pdfmd/explain.subject.md +8 -0
- data/lib/pdfmd/explain.title.md +5 -0
- data/lib/pdfmd/methods.rb +130 -0
- data/lib/pdfmd/rename.rb +146 -0
- data/lib/pdfmd/show.rb +24 -0
- data/lib/pdfmd/sort.rb +100 -0
- data/pdfmd.gemspec +27 -0
- metadata +83 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 798bed0d41462e97707960d925ec633109b5bb67
|
4
|
+
data.tar.gz: f678724174da3a5a3b4baa98615efb5dde3395b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f4fde3094c09a5d5631b37beffbc16b4b3c731f16a58af86b3b1eeab992aa953dac3a74e71ab683dd1f6a3eb90bb0288ae6dd18798a7242a2263707b9640b769
|
7
|
+
data.tar.gz: 3f30cd43417346980587346f5cde64537d45b9fe346c093568510e66414897221e57af7d936dadad41882c591910dc3ec4a2a4736de5e0931d6c1701e44bea8d
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# Version 1.4.1
|
2
|
+
- Bugfix: When in interactive sorting, choosing the default and 'y' did not have the same effect.
|
3
|
+
- Renamed paramter '--:all-keywords' to '--allkeywords' (rename method).
|
4
|
+
- Bugfix: Method 'rename', Renaming a file puts it in the input directory, not in the current working directory.
|
5
|
+
- Bugfix: Method 'show', Listing single tags works now.
|
6
|
+
- Moved 'explain'-text into separate files.
|
7
|
+
- Moved commands into separate files under './lib/pdfmd'.
|
8
|
+
- Bugfix: Method 'show', Paramter '-t' is now case insensitive
|
9
|
+
- Added option 'outputdir' to command 'rename'.
|
10
|
+
|
11
|
+
# Version 1.4.0
|
12
|
+
- Added Hiera support for 'sort' command to define some standards (less typing)
|
13
|
+
- Added interactive parameter to 'sort' command
|
14
|
+
- Updated documentation
|
15
|
+
|
16
|
+
# Version 1.3.2
|
17
|
+
- Moved the script to right place in the GEM (/bin)
|
18
|
+
- Readme Updated
|
19
|
+
- Moved Changelog into separate file
|
20
|
+
|
21
|
+
# Version 1.3.1
|
22
|
+
- Corrected Email address (Gemspec)
|
23
|
+
- Corrected website address (Gemspec)
|
24
|
+
- No changes to script
|
25
|
+
|
26
|
+
# Version 1.3
|
27
|
+
- Small bugfix about special characters in filenames (author).
|
28
|
+
- Bugfix for the tag 'createdate' written as 'CreateDate' which did not
|
29
|
+
take the date then.
|
30
|
+
- Removed inactive code.
|
31
|
+
- Added paramter 'version'
|
32
|
+
|
33
|
+
# Version 1.2
|
34
|
+
- Small bugfix with the sort function and the logfile being created.
|
35
|
+
|
36
|
+
# Version 1.1
|
37
|
+
- Added Function to sort pdf documents into a directory structure based on
|
38
|
+
the author of the document.
|
39
|
+
- Added dependency 'pathname'
|
40
|
+
- Added dependency 'logger'
|
41
|
+
- Added dependency 'i18n'
|
42
|
+
- Added method 'sort'
|
43
|
+
- Changing a tag will now output the old value in the edit dialog.
|
44
|
+
- Updated documentation and descriptions of methods
|
45
|
+
|
46
|
+
# Version 1.0
|
47
|
+
- Added documentation in long description of the commands
|
48
|
+
- Added method "explain" for further information
|
49
|
+
|
50
|
+
# Version 0.9
|
51
|
+
- Added 'rename' option to edit metatags
|
52
|
+
- Fixed some output strings
|
53
|
+
|
54
|
+
# Version 0.x
|
55
|
+
- All other stuff
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Micronarrativ
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
# pdfmd
|
2
|
+
Pdf Meta data managing script.
|
3
|
+
|
4
|
+
I use the script `pdfmd.rb`/pdfmetadata (with a slightly different name) to manage my PDF documents and keep the naming in line.
|
5
|
+
Hidden deep in the directory structure of my disks I can quickly find the
|
6
|
+
documents I need with a quick `find /document/path -type f -iname
|
7
|
+
'*<keyword>*'` which matches some string in the filename.
|
8
|
+
|
9
|
+
# Requirements
|
10
|
+
|
11
|
+
Although the requirements are listed in the script itself as well (header documentation!), here they are again:
|
12
|
+
|
13
|
+
## Ruby Gems
|
14
|
+
|
15
|
+
1. [thor](https://rubygems.org/gems/thor)
|
16
|
+
2. [highline/import](https://rubygems.org/gems/highline)
|
17
|
+
3. [fileutils](https://rubygems.org/gems/fileutils)
|
18
|
+
4. [i18n](https://rubygems.org/gems/i18n)
|
19
|
+
5. [logger]()
|
20
|
+
6. [pathname]()
|
21
|
+
|
22
|
+
Install the requirements as usual
|
23
|
+
|
24
|
+
```
|
25
|
+
$ gem install thor
|
26
|
+
$ gem install highline
|
27
|
+
$ gem install fileutils
|
28
|
+
$ gem install i18n
|
29
|
+
$ gem install pathname
|
30
|
+
$ gem install logger
|
31
|
+
```
|
32
|
+
|
33
|
+
## Applications
|
34
|
+
|
35
|
+
1. [exiftools](http://www.sno.phy.queensu.ca/~phil/exiftool/)
|
36
|
+
|
37
|
+
This is usually already in your os repositories
|
38
|
+
|
39
|
+
```
|
40
|
+
$ sudo yum install Perl-Image-Exiftool
|
41
|
+
```
|
42
|
+
|
43
|
+
2. [hiera](https://rubygems.org/gems/hiera) can be optionally used to configure
|
44
|
+
some default settings (instead of a configuration file).
|
45
|
+
|
46
|
+
```
|
47
|
+
$ gem install hiera
|
48
|
+
```
|
49
|
+
|
50
|
+
# Usage
|
51
|
+
|
52
|
+
The usage is quite simple:
|
53
|
+
|
54
|
+
```
|
55
|
+
$ ./pdfmd.rb [show|edit|rename|sort] [options] <filename>
|
56
|
+
```
|
57
|
+
|
58
|
+
The interface has been setup using Thor.
|
59
|
+
So in order to get more information just run the required _help_ command:
|
60
|
+
|
61
|
+
```
|
62
|
+
# Show general possibilities:
|
63
|
+
$ pdfmd.rb
|
64
|
+
|
65
|
+
# Show more information about <action>
|
66
|
+
$ pdfmd.rb help <action>
|
67
|
+
```
|
68
|
+
|
69
|
+
My usual workflow is like this:
|
70
|
+
|
71
|
+
```
|
72
|
+
$ pdfmd.rb show test.pdf
|
73
|
+
$ pdfmd.rb edit -t all test.pdf
|
74
|
+
...
|
75
|
+
$ pdfmd.rb rename test.pdf
|
76
|
+
$ mv 20150101-me-dok-testdocument.pdf /my/pdf/directory
|
77
|
+
```
|
78
|
+
|
79
|
+
There's an underlogic in the renaming and sorting of the files according to the metadata. Make sure you read at least the help-information before you use it.
|
80
|
+
|
81
|
+
|
82
|
+
__HINT__: Before you start using the script, make sure you have a backup of your files or you know what you're doing. If you loose information/files I will not be able to help you.
|
83
|
+
|
84
|
+
# Hiera
|
85
|
+
|
86
|
+
In order for Hiera to provide (default) configuration data, setup a configuration hash e.g. inside the YAML backend:
|
87
|
+
|
88
|
+
``` YAML
|
89
|
+
pdfmd::config:
|
90
|
+
sort:
|
91
|
+
destination : /data/tmp
|
92
|
+
copy : true
|
93
|
+
logfile : /var/log/pdfmd.log
|
94
|
+
interactive : false
|
95
|
+
```
|
96
|
+
|
97
|
+
Information about which hiera configuration settings are available can be either found in `pdfmd help <command>` or `pdfmd explain hiera`.
|
98
|
+
|
99
|
+
Test your hiera configuration with
|
100
|
+
|
101
|
+
``` bash
|
102
|
+
$ hiera pdfmd::config
|
103
|
+
```
|
104
|
+
|
105
|
+
# Contact
|
106
|
+
|
107
|
+
If you have improvements and suggestions -> let me know.
|
108
|
+
|
data/bin/pdfmd
CHANGED
@@ -1,943 +1,2 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
#
|
4
|
-
# == File: pdfmd.rb
|
5
|
-
#
|
6
|
-
# Show and edit Metadata of PDF files and rename the files accordingly.
|
7
|
-
#
|
8
|
-
# === Requirements
|
9
|
-
#
|
10
|
-
# ==== Ruby gems:
|
11
|
-
# - thor
|
12
|
-
# - highline/import
|
13
|
-
# - fileutils
|
14
|
-
# - i18n
|
15
|
-
# - pathname
|
16
|
-
# - logger
|
17
|
-
#
|
18
|
-
# ==== OS applications:
|
19
|
-
#
|
20
|
-
# - exiftools
|
21
|
-
#
|
22
|
-
# === Usage
|
23
|
-
#
|
24
|
-
# $ ./pdfmd <action> <parameter> file
|
25
|
-
#
|
26
|
-
# $ ./pdfmd help <action>
|
27
|
-
#
|
28
|
-
# An overview about the actions can be seen when running the script without
|
29
|
-
# any parameters
|
30
|
-
#
|
31
|
-
# Check and set metadata of PDF documents
|
32
|
-
#
|
33
|
-
# A complete set of metada contains
|
34
|
-
#
|
35
|
-
# * CreateDate
|
36
|
-
# * Title
|
37
|
-
# * Author
|
38
|
-
# * Subject
|
39
|
-
# * Keywords (optional)
|
40
|
-
#
|
41
|
-
# TODO: Include password protected PDF documents as well
|
42
|
-
# TODO: Fix broken PDF files automatically
|
43
|
-
# TODO: Enable logging in more functions than only "sort"
|
44
|
-
# TODO: Read this: http://lostechies.com/derickbailey/2011/04/29/writing-a-thor-application/
|
45
|
-
# TODO: ... and this: http://blog.paracode.com/2012/05/17/building-your-tools-with-thor/
|
46
|
-
# TODO: Create Gem: http://yehudakatz.com/2010/04/02/using-gemspecs-as-intended/
|
47
|
-
# gs \
|
48
|
-
# -o repaired.pdf \
|
49
|
-
# -sDEVICE=pdfwrite \
|
50
|
-
# -dPDFSETTINGS=/prepress \
|
51
|
-
# corrupted.pdf
|
52
|
-
#
|
53
|
-
# == Author
|
54
|
-
#
|
55
|
-
# Daniel Roos <daniel-git@micronarrativ.org>
|
56
|
-
# Source: https://github.com/Micronarrativ/ruby-pmd
|
57
|
-
#
|
58
|
-
require "thor"
|
59
|
-
require "highline/import"
|
60
|
-
require "fileutils"
|
61
|
-
require "i18n"
|
62
|
-
require 'pathname'
|
63
|
-
require 'logger'
|
64
|
-
|
65
|
-
VERSION = '1.4.0'
|
66
|
-
|
67
|
-
#
|
68
|
-
# Function to read the metadata from a given file
|
69
|
-
# hash readMetadata(string)
|
70
|
-
#
|
71
|
-
def readMetadata(pathFile = false)
|
72
|
-
metadata = Hash.new
|
73
|
-
metadata['keywords'] = ''
|
74
|
-
metadata['subject'] = ''
|
75
|
-
metadata['title'] = ''
|
76
|
-
metadata['author'] = ''
|
77
|
-
metadata['creator'] = ''
|
78
|
-
metadata['createdate'] = ''
|
79
|
-
if not File.file?(pathFile)
|
80
|
-
puts "Cannot access file #{pathFile}. Abort"
|
81
|
-
abort
|
82
|
-
end
|
83
|
-
|
84
|
-
# Fetch the Metada with the help of exiftools (unless something better is
|
85
|
-
# found
|
86
|
-
metaStrings = `exiftool '#{pathFile}' | egrep -i '^Creator\s+\:|^Author|Create Date|Subject|Keywords|Title'`
|
87
|
-
|
88
|
-
# Time to cherrypick the available data
|
89
|
-
entries = metaStrings.split("\n")
|
90
|
-
entries.each do |entry|
|
91
|
-
values = entry.split(" : ")
|
92
|
-
values[0].match(/Creator/) and metadata['creator'] == '' ? metadata['creator'] = values[1]: metadata['creator'] = ''
|
93
|
-
values[0].match(/Author/) and metadata['author'] == '' ? metadata['author'] = values[1]: metadata['author'] = ''
|
94
|
-
values[0].match(/Create Date/) and metadata['createdate'] == '' ? metadata['createdate'] = values[1]: metadata['createdate'] = ''
|
95
|
-
values[0].match(/Subject/) and metadata['subject'] == '' ? metadata['subject'] = values[1]: metadata['subject'] = ''
|
96
|
-
values[0].match(/Keywords/) and metadata['keywords'] == '' ? metadata['keywords'] = values[1]: metadata['keywords'] =''
|
97
|
-
values[0].match(/Title/) and metadata['title'] == '' ? metadata['title'] = values[1]: metadata['title'] =''
|
98
|
-
end
|
99
|
-
return metadata
|
100
|
-
end
|
101
|
-
|
102
|
-
|
103
|
-
#
|
104
|
-
# Query Hiera installation
|
105
|
-
# I don't give a sh** about cross platform at this point.
|
106
|
-
#
|
107
|
-
# Return the hash of the hiera values or false (if no hiera is found)
|
108
|
-
#
|
109
|
-
def queryHiera(keyword,facts = 'UNSET')
|
110
|
-
|
111
|
-
# Set default facts
|
112
|
-
facts == 'UNSET' ? facts = "fqdn=#{`hostname`}" : ''
|
113
|
-
|
114
|
-
# If hiera isn't found, return false
|
115
|
-
# otherwise return the hash
|
116
|
-
if !system('which hiera > /dev/null 2>&1')
|
117
|
-
puts 'Cannot find "hiera" command in $path.'
|
118
|
-
return false
|
119
|
-
else
|
120
|
-
return eval(`hiera #{keyword} #{facts}`)
|
121
|
-
end
|
122
|
-
|
123
|
-
end
|
124
|
-
|
125
|
-
|
126
|
-
#
|
127
|
-
# Set Keywords Preface based on title and subject
|
128
|
-
# If subject matches a number/character combination and contains no spaces,
|
129
|
-
# the preface will be combined with the doktype.
|
130
|
-
# If not: preface will contain the whole subject with dots and spaces being
|
131
|
-
# replaced with underscores
|
132
|
-
#
|
133
|
-
def setKeywordsPreface(metadata, doktype)
|
134
|
-
if metadata['subject'].match(/^\d+[^+s]+.*/)
|
135
|
-
return doktype + metadata['subject']
|
136
|
-
else
|
137
|
-
subject = metadata['subject']
|
138
|
-
|
139
|
-
# Take care of special characters
|
140
|
-
I18n.enforce_available_locales = false
|
141
|
-
subject = I18n.transliterate(metadata['subject'])
|
142
|
-
|
143
|
-
# Replace everything else
|
144
|
-
subject = subject.gsub(/[^a-zA-Z0-9]+/,'_')
|
145
|
-
return subject
|
146
|
-
end
|
147
|
-
end
|
148
|
-
|
149
|
-
#
|
150
|
-
# Read user input
|
151
|
-
#
|
152
|
-
def readUserInput(textstring = 'Enter value: ')
|
153
|
-
return ask textstring
|
154
|
-
end
|
155
|
-
|
156
|
-
#
|
157
|
-
# Identify a date
|
158
|
-
# Function takes a string and tries to identify a date in there.
|
159
|
-
# returns false if no date could be identified
|
160
|
-
# otherwise the date is returned in the format as
|
161
|
-
#
|
162
|
-
# YYYY:MM:DD HH:mm:ss
|
163
|
-
#
|
164
|
-
# For missing time values zero is assumed
|
165
|
-
#
|
166
|
-
def identifyDate(datestring)
|
167
|
-
identifiedDate = ''
|
168
|
-
year = '[1-2][90][0-9][0-9]'
|
169
|
-
month = '0[0-9]|10|11|12'
|
170
|
-
day = '[1-9]|0[1-9]|1[0-9]|2[0-9]|3[0-1]'
|
171
|
-
hour = '[0-1][0-9]|2[0-3]|[1-9]'
|
172
|
-
minute = '[0-5][0-9]'
|
173
|
-
second = '[0-5][0-9]'
|
174
|
-
case datestring
|
175
|
-
when /^(#{year})(#{month})(#{day})$/
|
176
|
-
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' 00:00:00'
|
177
|
-
when /^(#{year})(#{month})(#{day})(#{hour})(#{minute})(#{second})$/
|
178
|
-
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' ' + $4 + ':' + $5 + ':' + $6
|
179
|
-
when /^(#{year})[\:|\.|\-](#{month})[\:|\.|\-](#{day})\s(#{hour})[\:](#{minute})[\:](#{second})$/
|
180
|
-
identifiedDate = $1 + ':' + $2 + ':' + $3 + ' ' + $4 + ':' + $5 + ':' + $6
|
181
|
-
when /^(#{year})[\:|\.|\-](#{month})[\:|\.|\-](#{day})$/
|
182
|
-
day = "%02d" % $3
|
183
|
-
month = "%02d" % $2
|
184
|
-
identifiedDate = $1 + ':' + month + ':' + day + ' 00:00:00'
|
185
|
-
else
|
186
|
-
identifiedDate = false
|
187
|
-
end
|
188
|
-
return identifiedDate
|
189
|
-
end
|
190
|
-
|
191
|
-
class DOC < Thor
|
192
|
-
|
193
|
-
|
194
|
-
#
|
195
|
-
# Show the current metadata tags
|
196
|
-
#
|
197
|
-
# TODO: format output as JSON and YAML
|
198
|
-
# TODO: Enable additional options
|
199
|
-
#
|
200
|
-
desc 'show', 'Show metadata of a file'
|
201
|
-
method_option :all, :type => :boolean, :aliases => '-a', :desc => 'Show all metatags', :default => false, :required => false
|
202
|
-
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Show specific tag(s), comma separated', :required => false
|
203
|
-
long_desc <<-LONGDESC
|
204
|
-
== General
|
205
|
-
|
206
|
-
Show metatags of a PDF document.
|
207
|
-
|
208
|
-
The following tags are being shown:
|
209
|
-
\x5 * Author
|
210
|
-
\x5 * Creator
|
211
|
-
\x5 * CreateDate
|
212
|
-
\x5 * Title
|
213
|
-
\x5 * Subject
|
214
|
-
\x5 * Keywords
|
215
|
-
|
216
|
-
== Parameters
|
217
|
-
|
218
|
-
--all, -a
|
219
|
-
\x5 Show all relevant metatags for a document.
|
220
|
-
|
221
|
-
Relevant tags are Author,Creator, CreateDate, Title, Subject, Keywords.
|
222
|
-
|
223
|
-
--tag, -t
|
224
|
-
\x5 Specify the metatag to show. The selected metatag must be one of the relevant tags. Other tags are ignored and nothing is returned.
|
225
|
-
|
226
|
-
== Example
|
227
|
-
|
228
|
-
# Show default metatags for a pdf document
|
229
|
-
\x5>CLI show <filename>
|
230
|
-
|
231
|
-
# Show default metatags for example.pdf
|
232
|
-
\x5>CLI show example.pdf
|
233
|
-
|
234
|
-
# Show value for metatag 'Author' for the file example.pdf
|
235
|
-
\x5>CLI show -t author example.pdf
|
236
|
-
|
237
|
-
# Show value for metatags 'Author','Title' for the file example.pdf
|
238
|
-
\x5>CLI show -t author,title example.pdf
|
239
|
-
|
240
|
-
LONGDESC
|
241
|
-
def show(filename)
|
242
|
-
metadata = readMetadata(filename)
|
243
|
-
|
244
|
-
# Output all metatags
|
245
|
-
if options[:all] or options[:tag].nil?
|
246
|
-
puts "Author : " + metadata['author'].to_s
|
247
|
-
puts "Creator : " + metadata['creator'].to_s
|
248
|
-
puts "CreateDate : " + metadata['createdate'].to_s
|
249
|
-
puts "Subject : " + metadata['subject'].to_s
|
250
|
-
puts "Title : " + metadata['title'].to_s
|
251
|
-
puts "Keywords : " + metadata['keywords'].to_s
|
252
|
-
|
253
|
-
# Ouput only specific tags
|
254
|
-
elsif not options[:tag].nil?
|
255
|
-
tags = options[:tag].split(',')
|
256
|
-
tags.each do |tag|
|
257
|
-
puts metadata[tag]
|
258
|
-
end
|
259
|
-
end
|
260
|
-
|
261
|
-
end
|
262
|
-
|
263
|
-
#
|
264
|
-
# Change a MetaTag Attribute
|
265
|
-
#
|
266
|
-
# TODO: keywords are added differently according to the documentation
|
267
|
-
# http://www.sno.phy.queensu.ca/~phil/exiftool/faq.html
|
268
|
-
desc 'edit', 'Edit Meta Tag(s)'
|
269
|
-
long_desc <<-LONGDESC
|
270
|
-
== General
|
271
|
-
|
272
|
-
Command will edit the metadata of a PDF document. Multiple values can be
|
273
|
-
specified or 'all'.
|
274
|
-
|
275
|
-
The command will invoke an interactive user input and request the values
|
276
|
-
for the metatag.
|
277
|
-
|
278
|
-
Additionally the file can be renamed at the end according to the new meta
|
279
|
-
tags. See `$ #{__FILE__} help rename` for details.
|
280
|
-
|
281
|
-
== Parameters
|
282
|
-
|
283
|
-
--tag, -t
|
284
|
-
\x5 Names or list of names of Metatag fields to set, separated by commata.
|
285
|
-
|
286
|
-
--rename, -r
|
287
|
-
\x5 Rename file after updating the meta tag information according to the fields.
|
288
|
-
|
289
|
-
This parameter is identical to running `> CLI rename <filename>`
|
290
|
-
|
291
|
-
General example:
|
292
|
-
|
293
|
-
# Edit tag 'TAG' and set a new value interactive.
|
294
|
-
\x5>CLI edit -t TAG <filename>
|
295
|
-
|
296
|
-
# Edit tag 'Author' and set new value interactive.
|
297
|
-
\x5>CLI edit -t author example.pdf
|
298
|
-
|
299
|
-
# Edit mulitple Tags and set a new value.
|
300
|
-
\x5>CLI edit -t tag1,tag2,tag3 <filename>
|
301
|
-
|
302
|
-
|
303
|
-
== Multiple Tags
|
304
|
-
|
305
|
-
For setting multiple tags list the tags comma separated.
|
306
|
-
|
307
|
-
For setting all tags (Author, Title, Subject, CreateDate, Keywords) use the keyword 'all' as tagname.
|
308
|
-
|
309
|
-
# Set tags 'Author', 'Title', 'Subject' in example.pdf interactivly.
|
310
|
-
\x5>CLI edit -t author,title,subject example.pdf`
|
311
|
-
|
312
|
-
# Set tags 'Author', 'Title', 'Subject', 'CreateDate', 'Keywords' in
|
313
|
-
example.pdf interactive.
|
314
|
-
\x5>CLI edit -t all example.pdf
|
315
|
-
|
316
|
-
== Tag: CreateDate
|
317
|
-
|
318
|
-
In order to enter a value for the 'CreateDate' field, some internal matching is going on in order to make it easier and faster to enter dates and times.
|
319
|
-
|
320
|
-
The following formats are identified/matched:
|
321
|
-
|
322
|
-
\x5 yyyymmdd
|
323
|
-
\x5 yyyymmd
|
324
|
-
\x5 yyyymmddHHMMSS
|
325
|
-
\x5 yyyy-mm-dd HH:MM:SS
|
326
|
-
\x5 yyyy:mm:dd HH:MM:SS
|
327
|
-
\x5 yyyy.mm.dd HH:MM:SS
|
328
|
-
\x5 yyyy-mm-d
|
329
|
-
\x5 yyyy-mm-dd
|
330
|
-
\x5 yyyy.mm.d
|
331
|
-
\x5 yyyy.mm.dd
|
332
|
-
\x5 yyyy:mm:d
|
333
|
-
\x5 yyyy:mm:dd
|
334
|
-
|
335
|
-
\x5 - If HH:MM:SS or HHMMSS is not provided, those values are automatically set to zero.
|
336
|
-
\x5 - The output format of every timestamp is <yyyy:mm:dd HH:MM:SS>
|
337
|
-
\x5 - When providing and invalid date, the incorrect date is rejected and the user asked to provide the correct date.
|
338
|
-
|
339
|
-
== Rename file
|
340
|
-
|
341
|
-
In addition to setting the tags the current file can be renamed according to
|
342
|
-
the new metadata.
|
343
|
-
|
344
|
-
# Set tag 'Author' and rename file example.pdf
|
345
|
-
\x5> CLI edit -t author -r example.pdf
|
346
|
-
|
347
|
-
See `> CLI help rename` for details about renaming.
|
348
|
-
|
349
|
-
LONGDESC
|
350
|
-
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Name of the Tag(s) to Edit', :default => false, :required => true
|
351
|
-
method_option :rename, :type => :boolean, :aliases => '-r', :desc => 'Rename file after changing meta-tags', :default => false, :required => false
|
352
|
-
def edit(filename)
|
353
|
-
metadata = readMetadata(filename)
|
354
|
-
|
355
|
-
if options[:tag] == 'all'
|
356
|
-
tags = ['author','title','subject','createdate','keywords']
|
357
|
-
else
|
358
|
-
tags = options[:tag].split(',')
|
359
|
-
end
|
360
|
-
tags.each do |currentTag|
|
361
|
-
|
362
|
-
# Change the tag to something we can use here
|
363
|
-
puts "Current value: '#{metadata[currentTag.downcase]}'"
|
364
|
-
answer = readUserInput("Enter new value for #{currentTag} :")
|
365
|
-
if currentTag.downcase == 'createdate'
|
366
|
-
while not answer = identifyDate(answer)
|
367
|
-
puts 'Invalid date format'
|
368
|
-
answer = readUserInput("Enter new value for #{currentTag} :")
|
369
|
-
end
|
370
|
-
end
|
371
|
-
puts "Changing value for #{currentTag}: '#{metadata[currentTag]}' => #{answer}"
|
372
|
-
`exiftool -#{currentTag.downcase}='#{answer}' -overwrite_original '#{filename}'`
|
373
|
-
end
|
374
|
-
|
375
|
-
#
|
376
|
-
# If required, run the renaming task afterwards
|
377
|
-
# This is not pretty, but seems to be the only way to do this in THOR
|
378
|
-
#
|
379
|
-
if options[:rename]
|
380
|
-
puts `#{__FILE__} rename '#{filename}'`
|
381
|
-
end
|
382
|
-
|
383
|
-
end
|
384
|
-
|
385
|
-
#
|
386
|
-
# Check the metadata for the minium necessary tags
|
387
|
-
# See documentation at the top of this file for defailts
|
388
|
-
#
|
389
|
-
# void check(string)
|
390
|
-
desc 'check', 'Check Metadata for completeness'
|
391
|
-
long_desc <<-LONGDESC
|
392
|
-
== General
|
393
|
-
|
394
|
-
Show value of the following metatags of a PDF document:
|
395
|
-
|
396
|
-
- Author
|
397
|
-
\x5- Creator
|
398
|
-
\x5- CreateDate
|
399
|
-
\x5- Subject
|
400
|
-
\x5- Title
|
401
|
-
\x5- Keywords
|
402
|
-
|
403
|
-
== Example
|
404
|
-
|
405
|
-
# Show the values of the metatags for example.pdf
|
406
|
-
\x5>CLI show example.pdf
|
407
|
-
|
408
|
-
LONGDESC
|
409
|
-
def check(filename)
|
410
|
-
returnvalue = 0
|
411
|
-
readMetadata(filename).each do|key,value|
|
412
|
-
if key.match(/author|subject|createdate|title/) and value.empty?
|
413
|
-
puts 'Missing value: ' + key
|
414
|
-
returnvalue == 0 ? returnvalue = 1 : ''
|
415
|
-
end
|
416
|
-
end
|
417
|
-
exit returnvalue
|
418
|
-
end
|
419
|
-
|
420
|
-
#
|
421
|
-
# Explain fields and Metatags
|
422
|
-
# Show information about how they are used.
|
423
|
-
#
|
424
|
-
desc 'explain','Show more information about usuable Meta-Tags'
|
425
|
-
long_desc <<-LONGDESC
|
426
|
-
== General
|
427
|
-
|
428
|
-
Explain some terms used with the script.
|
429
|
-
|
430
|
-
== Example
|
431
|
-
|
432
|
-
# Show the available subjects
|
433
|
-
\x5>CLI explain
|
434
|
-
|
435
|
-
# Show information about the subject 'author'
|
436
|
-
\x5>CLI explain author
|
437
|
-
|
438
|
-
LONGDESC
|
439
|
-
def explain(term='')
|
440
|
-
|
441
|
-
case term
|
442
|
-
when ''
|
443
|
-
puts 'Available subjects:'
|
444
|
-
puts '- author'
|
445
|
-
puts '- createdate'
|
446
|
-
puts '- hiera'
|
447
|
-
puts '- keywords'
|
448
|
-
puts '- subject'
|
449
|
-
puts '- title'
|
450
|
-
puts ' '
|
451
|
-
puts "Run `$ #{__FILE__} explain <subject>` to get more details."
|
452
|
-
when 'author'
|
453
|
-
puts '[Author]'
|
454
|
-
puts ' The sender or creator of the document.'
|
455
|
-
when 'createdate'
|
456
|
-
puts '[CreateDate]'
|
457
|
-
puts ' Date of the document. This is not the date when the file was created, but'
|
458
|
-
puts ' the date found in the document or printed on the document.'
|
459
|
-
when 'title'
|
460
|
-
puts '[Title]'
|
461
|
-
puts ' General type of the document, e.g. Manual, Invoice.'
|
462
|
-
when 'subject'
|
463
|
-
puts '[Subject]'
|
464
|
-
puts ' What is the document about.'
|
465
|
-
puts ' For example:'
|
466
|
-
puts ' Manual: What is the manual about?'
|
467
|
-
puts ' Invoice: Invoice number?'
|
468
|
-
puts ' Contract: Contract number of Subject of the contract?'
|
469
|
-
puts ' Order: Ordernumber of the document?'
|
470
|
-
when 'keywords'
|
471
|
-
puts '[Keywords]'
|
472
|
-
puts ' Anything else that might be of interesst.'
|
473
|
-
puts ' In Orders the elements that have been orders. Contracts might contain the'
|
474
|
-
puts ' Names and adress of the involved parties.'
|
475
|
-
puts ' '
|
476
|
-
puts ' When writing Invoices with their numbers, these will be automatically be '
|
477
|
-
puts ' picked up and can be integrated in the filename, e.g. "Invoicenumber 12334'
|
478
|
-
when 'hiera'
|
479
|
-
puts 'Information about hiera: https://docs.puppetlabs.com/hiera/1/index.html'
|
480
|
-
puts ''
|
481
|
-
puts 'Installation:'
|
482
|
-
puts ' $ gem install hiera'
|
483
|
-
puts ''
|
484
|
-
puts ''
|
485
|
-
puts 'Configure default settings in hiera:'
|
486
|
-
puts ''
|
487
|
-
puts ' YAML'
|
488
|
-
puts ' ---'
|
489
|
-
puts ' pdfmd::config:'
|
490
|
-
puts ' sort:'
|
491
|
-
puts ' destination : /tmp/output'
|
492
|
-
puts ' action : copy'
|
493
|
-
puts ' logfile : /var/log/pdfmd.log'
|
494
|
-
puts ''
|
495
|
-
end
|
496
|
-
|
497
|
-
end
|
498
|
-
|
499
|
-
#
|
500
|
-
# Sort the files into directories based on the author
|
501
|
-
#
|
502
|
-
desc 'sort','Sort files into directories sorted by Author'
|
503
|
-
long_desc <<-LONGDESC
|
504
|
-
== General
|
505
|
-
|
506
|
-
Will sort pdf documents into subdirectories according to the value of their
|
507
|
-
tag 'author'.
|
508
|
-
|
509
|
-
When using this action a logfile with all actions will be generated in the
|
510
|
-
current working directory with the same name as the script and the ending
|
511
|
-
'.log'. This can be disabled with the parameter 'log' if required.
|
512
|
-
|
513
|
-
If a document does not have an entry in the meta tag 'author', the file will
|
514
|
-
not be processed. This can be seen in the output of the logfile as well.
|
515
|
-
|
516
|
-
=== Parameters
|
517
|
-
|
518
|
-
[*destination|d*]
|
519
|
-
\x5 Speficy the root output directory to where the folderstructure is being created.
|
520
|
-
|
521
|
-
This parameter is required if hiera is not configured.
|
522
|
-
|
523
|
-
This parameter overwrites the hiera defaults
|
524
|
-
|
525
|
-
[*copy|c*]
|
526
|
-
\x5 Copy the files instead of moving them.
|
527
|
-
|
528
|
-
[*log|l*]
|
529
|
-
\x5 Disable/Enable the logging.
|
530
|
-
\x5 Default: enabled.
|
531
|
-
|
532
|
-
[*interactive|i*]
|
533
|
-
\x5 Disable/Enable interactive sorting. This will ask for confirmation for
|
534
|
-
\x5 each sorting action.
|
535
|
-
\x5 Default: disabled.
|
536
|
-
|
537
|
-
=== Replacement rules
|
538
|
-
|
539
|
-
The subdirectories for the documents are generated from the values in the
|
540
|
-
tag 'author' of each document.
|
541
|
-
|
542
|
-
In order to ensure a clean directory structure, there are certain rules
|
543
|
-
for altering the values.
|
544
|
-
\x5 1. Whitespaces are replaced by underscores.
|
545
|
-
\x5 2. Dots are replaced by underscores.
|
546
|
-
\x5 3. All letters are converted to their lowercase version.
|
547
|
-
\x5 4. Special characters are serialized
|
548
|
-
|
549
|
-
=== Hiera configuration
|
550
|
-
|
551
|
-
Set the default values mentioned below as sub-hash of the main configuration:
|
552
|
-
|
553
|
-
YAML
|
554
|
-
\x5sort:
|
555
|
-
\x5 key: value
|
556
|
-
|
557
|
-
=== Hiera defaults
|
558
|
-
|
559
|
-
The following values can be influenced by the hiera configuration in the
|
560
|
-
section 'sort'. Commandline parameter will overwrite the defaults coming
|
561
|
-
from hiera unless otherwise notet.
|
562
|
-
|
563
|
-
[*copy*]
|
564
|
-
\x5 If set to true copies the files from the source directory instead of moving them.
|
565
|
-
|
566
|
-
[*destination*]
|
567
|
-
\x5 Specifies the default output directory (root-directory). Either this or the
|
568
|
-
command line parameter for destinations must be set.
|
569
|
-
|
570
|
-
[*logfile*]
|
571
|
-
\x5 Specifies the default path for the logfile output. If this is not
|
572
|
-
specfied a logfile with the scriptname + '.log' will be created in the
|
573
|
-
current working directory.
|
574
|
-
|
575
|
-
[*interactive*]
|
576
|
-
\x5 If set to true, each file must be acknowledged to be processed when
|
577
|
-
running the script.
|
578
|
-
|
579
|
-
=== Example
|
580
|
-
|
581
|
-
This command does the following:
|
582
|
-
\x5 1. Take all pdf documents in the subdirectory ./documents.
|
583
|
-
\x5 2. Create the output folder structure in `/tmp/test/`.
|
584
|
-
\x5 3. Copy the files instead of moving them.
|
585
|
-
\x5 4. Disable the logging.
|
586
|
-
\x5> CLI sort -d /tmp/test -c -l false ./documents
|
587
|
-
|
588
|
-
LONGDESC
|
589
|
-
method_option :destination, :aliases => '-d', :required => false, :type => :string, :desc => 'Defines the output directory'
|
590
|
-
method_option :copy, :aliases => '-c', :required => false, :type => :boolean, :desc => 'Copy files instead of moving them'
|
591
|
-
method_option :log, :aliases => '-l', :required => false, :type => :boolean, :desc => 'Enable/Disable creation of log files', :default => true
|
592
|
-
method_option :interactive, :aliases => '-i', :required => false, :type => :boolean, :desc => 'Enable/Disable interactive sort'
|
593
|
-
def sort(inputDir = '.')
|
594
|
-
|
595
|
-
hieraDefaults = queryHiera('pdfmd::config')
|
596
|
-
|
597
|
-
copyAction = options[:copy].nil? ? false : true
|
598
|
-
if options[:copy].nil? and hieraDefaults['sort']['copy'] == true
|
599
|
-
copyAction = true
|
600
|
-
puts 'Setting action to copy based on Hiera.'
|
601
|
-
end
|
602
|
-
|
603
|
-
interactiveAction = options[:interactive].nil? ? false : true
|
604
|
-
if options[:interactive].nil? and hieraDefaults['sort']['interactive'] == true
|
605
|
-
interactiveAction = true
|
606
|
-
puts 'Setting interactive to true based on Hiera.'
|
607
|
-
end
|
608
|
-
|
609
|
-
# Fetch alternate destination from hiera if available
|
610
|
-
destination = options[:destination]
|
611
|
-
if destination.nil?
|
612
|
-
|
613
|
-
hieraHash = queryHiera('pdfmd::config')
|
614
|
-
if !hieraHash['sort']['destination'].nil?
|
615
|
-
destination = hieraHash['sort']['destination']
|
616
|
-
else
|
617
|
-
puts 'No information about destination found.'
|
618
|
-
puts 'Set parameter -d or configure hiera.'
|
619
|
-
puts 'Abort.'
|
620
|
-
exit 1
|
621
|
-
end
|
622
|
-
|
623
|
-
end
|
624
|
-
|
625
|
-
logenable = options[:log]
|
626
|
-
logfile = !hieraHash['sort']['logfile'].nil? ? hieraHash['sort']['logfile'] : Dir.pwd.chomp('/') + '/' + Pathname.new(__FILE__).basename + '.log'
|
627
|
-
|
628
|
-
# Check that logfilepath exists and is writeable
|
629
|
-
if !File.writable?(logfile)
|
630
|
-
puts "Cannot write '#{logfile}. Abort."
|
631
|
-
exit 1
|
632
|
-
end
|
633
|
-
logenable ? $logger = Logger.new(logfile) : ''
|
634
|
-
|
635
|
-
# Input validation
|
636
|
-
!File.exist?(inputDir) ? abort('Input directory does not exist. Abort.'): ''
|
637
|
-
File.directory?(inputDir) ? '' : abort('Input is a single file')
|
638
|
-
File.file?(destination) ? abort("Output '#{destination}' is an existing file. Cannot create directory with the same name. Abort") : ''
|
639
|
-
unless File.directory?(destination)
|
640
|
-
FileUtils.mkdir_p(destination)
|
641
|
-
$logger.info("Destination '#{destination}' has been created.")
|
642
|
-
end
|
643
|
-
|
644
|
-
# Iterate through all files
|
645
|
-
Dir[inputDir.chomp('/') + '/*.pdf'].sort.each do |file|
|
646
|
-
|
647
|
-
if interactiveAction
|
648
|
-
answer = readUserInput("Process '#{file}' ([y]/n): ")
|
649
|
-
answer.empty? ? 'y' : next
|
650
|
-
end
|
651
|
-
|
652
|
-
metadata = readMetadata(file)
|
653
|
-
if metadata['author'] and not metadata['author'].empty?
|
654
|
-
author = metadata['author'].gsub(' ','_').gsub('.','_')
|
655
|
-
I18n.enforce_available_locales = false # Serialize special characters
|
656
|
-
author = I18n.transliterate(author).downcase
|
657
|
-
folderdestination = destination.chomp('/') + '/' + author
|
658
|
-
|
659
|
-
unless File.directory?(folderdestination)
|
660
|
-
FileUtils.mkdir_p(folderdestination)
|
661
|
-
logenable ? $logger.info("Folder '#{folderdestination}' has been created."): ''
|
662
|
-
end
|
663
|
-
|
664
|
-
filedestination = destination.chomp('/') + '/' + author + '/' + Pathname.new(file).basename.to_s
|
665
|
-
|
666
|
-
# Final check before touching the filesystem
|
667
|
-
if not File.exist?(filedestination)
|
668
|
-
$logger.info("File '#{file}' => '#{filedestination}'")
|
669
|
-
|
670
|
-
# Move/Copy the file
|
671
|
-
if copyAction
|
672
|
-
FileUtils.cp(file, filedestination)
|
673
|
-
else
|
674
|
-
FileUtils.mv(file,filedestination)
|
675
|
-
end
|
676
|
-
|
677
|
-
else
|
678
|
-
logenable ? $logger.warn("File '#{filedestination}' already exists. Ignoring.") : ''
|
679
|
-
end
|
680
|
-
else
|
681
|
-
logenable ? $logger.warn("Missing tag 'Author' for file '#{file}'. Skipping.") : (puts "Missing tag 'Author' for file '#{file}'. Skipping")
|
682
|
-
end
|
683
|
-
end
|
684
|
-
|
685
|
-
end
|
686
|
-
|
687
|
-
#
|
688
|
-
# Rename the file according to the Metadata
|
689
|
-
#
|
690
|
-
# Scheme: YYYYMMDD-author-subject-keywords.extension
|
691
|
-
desc 'rename', 'Rename the file according to Metadata'
|
692
|
-
long_desc <<-LONGDESC
|
693
|
-
== General
|
694
|
-
|
695
|
-
Rename a file with the meta tags in the document.
|
696
|
-
|
697
|
-
== Parameter
|
698
|
-
|
699
|
-
--dry-run, -n
|
700
|
-
\x5 Simulate the renaming process and show the result without changing the file.
|
701
|
-
|
702
|
-
--all-keywords, -a
|
703
|
-
\x5 Use all keywords from the meta information in the file name and ignore the limit.
|
704
|
-
|
705
|
-
--keywwords, -k
|
706
|
-
\x5 Set the number of keywords used in the filename to a new value.
|
707
|
-
\x5 Default: 3
|
708
|
-
|
709
|
-
--outputdir, -o
|
710
|
-
\x5 Not implemented yet. Default output dir for the renamed file is the source directory.
|
711
|
-
|
712
|
-
== Example
|
713
|
-
|
714
|
-
# Rename the file according to the metatags
|
715
|
-
\x5> CLI rename <filename>
|
716
|
-
|
717
|
-
# Rename example.pdf according to the metatags
|
718
|
-
\x5> CLI rename example.pdf
|
719
|
-
|
720
|
-
# Simulate renaming example.pdf according to the metatags (dry-run)
|
721
|
-
\x5> CLI rename -n example.pdf
|
722
|
-
|
723
|
-
== Rules
|
724
|
-
|
725
|
-
There are some rules regarding how documents are being renamed
|
726
|
-
|
727
|
-
Rule 1: All documents have the following filenaming structure:
|
728
|
-
|
729
|
-
<yyyymmdd>-<author>-<type>-<additionalInformation>.<extension>
|
730
|
-
|
731
|
-
\x5 # <yyyymmdd>: Year, month and day identival to the meta information in the
|
732
|
-
document.
|
733
|
-
\x5 # <author>: Author of the document, identical to the meta information
|
734
|
-
in the document. Special characters and whitespaces are replaced.
|
735
|
-
\x5 # <type>: Document type, is being generated from the title field in the metadata of the document. Document type is a three character abbreviation following the following logic:
|
736
|
-
|
737
|
-
\x5 til => Tilbudt|Angebot
|
738
|
-
\x5 odb => Orderbekreftelse
|
739
|
-
\x5 fak => Faktura
|
740
|
-
\x5 ord => Order
|
741
|
-
\x5 avt => Kontrakt|Avtale|Vertrag|contract
|
742
|
-
\x5 kvi => Kvittering
|
743
|
-
\x5 man => Manual
|
744
|
-
\x5 bil => Billett|Ticket
|
745
|
-
\x5 inf => Informasjon|Information
|
746
|
-
\x5 dok => unknown
|
747
|
-
|
748
|
-
If the dokument type can not be determined automatically, it defaults to 'dok'.
|
749
|
-
|
750
|
-
# <additionalInformation>: Information generated from the metadata fields
|
751
|
-
'title', 'subject' and 'keywords'.
|
752
|
-
|
753
|
-
If 'Title' or 'Keywords' contains one of the following keywords, the will be replaced with the corresponding abbreviation followed by the specified value separated by a whitespace:
|
754
|
-
|
755
|
-
\x5 fak => Faktura|Fakturanummer|Rechnung|Rechnungsnummer
|
756
|
-
\x5 kdn => Kunde|Kundenummer|Kunde|Kundennummer
|
757
|
-
\x5 ord => Ordre|Ordrenummer|Bestellung|Bestellungsnummer
|
758
|
-
\x5 kvi => Kvittering|Kvitteringsnummer|Quittung|Quittungsnummer
|
759
|
-
|
760
|
-
Rule 2: The number of keywords used in the filename is defined by the parameter '-k'. See the section of that parameter for more details and the default value.
|
761
|
-
|
762
|
-
Rule 3: Keywords matching 'kvi','fak','ord','kdn' are prioritised.
|
763
|
-
|
764
|
-
Rule 4: Special characters and whitespaces are replaced:
|
765
|
-
|
766
|
-
\x5 ' ' => '_'
|
767
|
-
\x5 '/' => '_'
|
768
|
-
|
769
|
-
Rule 5: The new filename has only lowercase characters.
|
770
|
-
|
771
|
-
== Example (detailed)
|
772
|
-
|
773
|
-
# Example PDF with following MetaTags:
|
774
|
-
|
775
|
-
\x5 Filename : example.pdf
|
776
|
-
\x5 Author : John
|
777
|
-
\x5 Subject : new Product
|
778
|
-
\x5 Title : Presentation
|
779
|
-
\x5 CreateDate : 1970:01:01 01:00:00
|
780
|
-
\x5 Keywords : John Doe, Jane Doe, Mister Doe
|
781
|
-
|
782
|
-
# Renaming the file
|
783
|
-
\x5> CLI rename example.pdf
|
784
|
-
\x5 example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe.pdf
|
785
|
-
|
786
|
-
# Simulation to rename the file (no actual change)
|
787
|
-
\x5> CLI rename -n example.pdf
|
788
|
-
\x5example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe.pdf
|
789
|
-
|
790
|
-
# Renaming the file with all keywords
|
791
|
-
\x5> CLI rename -n -a example.pdf
|
792
|
-
\x5example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe-mister_doe.pdf
|
793
|
-
|
794
|
-
LONGDESC
|
795
|
-
method_option :dryrun, :type => :boolean, :aliases => '-n', :desc => 'Run without making changes', :default => false, :required => false
|
796
|
-
method_option ':all-keywords', :type => :boolean, :aliases => '-a', :desc => 'Add all keywords (no limit)', :default => false, :required => false
|
797
|
-
method_option :keywords, :type => :numeric, :aliases => '-k', :desc => 'Number of keywords to include (Default: 3)', :default => 3, :required => false
|
798
|
-
method_option :outputdir, :aliases => '-o', :type => :string, :desc => 'Speficy output directory', :default => :false, :required => :false
|
799
|
-
def rename(filename)
|
800
|
-
metadata = readMetadata(filename).each do |key,value|
|
801
|
-
|
802
|
-
# Check if the metadata is complete
|
803
|
-
if key.match(/author|subject|createdate|title/) and value.empty?
|
804
|
-
puts 'Missing value for ' + key
|
805
|
-
puts 'Abort'
|
806
|
-
exit 1
|
807
|
-
end
|
808
|
-
|
809
|
-
end
|
810
|
-
|
811
|
-
date = metadata['createdate'].gsub(/\ \d{2}\:\d{2}\:\d{2}.*$/,'').gsub(/\:/,'')
|
812
|
-
author = metadata['author'].gsub(/\./,'_').gsub(/\-/,'').gsub(/\s/,'_')
|
813
|
-
I18n.enforce_available_locales = false
|
814
|
-
author = I18n.transliterate(author) # Normalising
|
815
|
-
|
816
|
-
keywords_preface = ''
|
817
|
-
# This statement can probably be optimised
|
818
|
-
case metadata['title']
|
819
|
-
when /(Tilbudt|Angebot)/i
|
820
|
-
doktype = 'til'
|
821
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
822
|
-
when /Orderbekrefelse/i
|
823
|
-
doktype = 'odb'
|
824
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
825
|
-
when /faktura/i
|
826
|
-
doktype = 'fak'
|
827
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
828
|
-
when /order/i
|
829
|
-
doktype = 'ord'
|
830
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
831
|
-
when /(kontrakt|avtale|vertrag|contract)/i
|
832
|
-
doktype = 'avt'
|
833
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
834
|
-
when /kvittering/i
|
835
|
-
doktype = 'kvi'
|
836
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
837
|
-
when /manual/i
|
838
|
-
doktype = 'man'
|
839
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
840
|
-
when /(billett|ticket)/i
|
841
|
-
doktype = 'bil'
|
842
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
843
|
-
when /(informasjon|information)/i
|
844
|
-
doktype = 'inf'
|
845
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
846
|
-
else
|
847
|
-
doktype = 'dok'
|
848
|
-
keywords_preface = setKeywordsPreface(metadata,doktype.gsub(/\-/,''))
|
849
|
-
end
|
850
|
-
if not metadata['keywords'].empty?
|
851
|
-
keywords_preface == '' ? keywords = '' : keywords = keywords_preface
|
852
|
-
keywordsarray = metadata['keywords'].split(',')
|
853
|
-
|
854
|
-
#
|
855
|
-
# Sort array
|
856
|
-
#
|
857
|
-
keywordssorted = Array.new
|
858
|
-
keywordsarray.each_with_index do |value,index|
|
859
|
-
value = value.lstrip.chomp
|
860
|
-
value = value.gsub(/(Faktura|Rechnungs)(nummer)? /i,'fak')
|
861
|
-
value = value.gsub(/(Kunde)(n)?(nummer)? /i,'kdn')
|
862
|
-
value = value.gsub(/(Kunde)(n)?(nummer)?-/i,'kdn')
|
863
|
-
value = value.gsub(/(Ordre|Bestellung)(s?nummer)? /i,'ord')
|
864
|
-
value = value.gsub(/(Kvittering|Quittung)(snummer)? /i,'kvi')
|
865
|
-
value = value.gsub(/\s/,'_')
|
866
|
-
value = value.gsub(/\//,'_')
|
867
|
-
keywordsarray[index] = value
|
868
|
-
if value.match(/^(fak|kdn|ord|kvi)/)
|
869
|
-
keywordssorted.insert(0, value)
|
870
|
-
else
|
871
|
-
keywordssorted.push(value)
|
872
|
-
end
|
873
|
-
end
|
874
|
-
|
875
|
-
counter = 0
|
876
|
-
keywordssorted.each_with_index do |value,index|
|
877
|
-
|
878
|
-
# Exit condition limits the number of keywords used in the filename
|
879
|
-
# unless all keywords shall be added
|
880
|
-
if not options[':all-keywords']
|
881
|
-
counter > options[:keywords]-1 ? break : counter = counter + 1
|
882
|
-
end
|
883
|
-
if value.match(/(kvi|fak|ord|kdn)/i)
|
884
|
-
keywords == '' ? keywords = '-' + value : keywords = value + '-' + keywords
|
885
|
-
else
|
886
|
-
keywords == '' ? keywords = '-' + value : keywords.concat('-' + value)
|
887
|
-
end
|
888
|
-
end
|
889
|
-
# Normalise the keywords as well
|
890
|
-
#
|
891
|
-
I18n.enforce_available_locales = false
|
892
|
-
keywords = I18n.transliterate(keywords)
|
893
|
-
|
894
|
-
# There are no keywords
|
895
|
-
# Rare, but it happens
|
896
|
-
else
|
897
|
-
|
898
|
-
# There are no keywords.
|
899
|
-
# we are using the title and the subject
|
900
|
-
if keywords_preface != ''
|
901
|
-
keywords = keywords_preface
|
902
|
-
end
|
903
|
-
|
904
|
-
end
|
905
|
-
extension = 'pdf'
|
906
|
-
if keywords != nil and keywords[0] != '-'
|
907
|
-
keywords = '-' + keywords
|
908
|
-
end
|
909
|
-
keywords == nil ? keywords = '' : ''
|
910
|
-
newFilename = date + '-' +
|
911
|
-
author + '-' +
|
912
|
-
doktype +
|
913
|
-
keywords + '.' +
|
914
|
-
extension
|
915
|
-
|
916
|
-
# Output directory checks
|
917
|
-
if options[:outputdir]
|
918
|
-
#if not File.exist?(options[:outputdir])
|
919
|
-
# puts "Error: output dir '#{options[:outputdir]}' not found. Abort"
|
920
|
-
# exit 1
|
921
|
-
#end
|
922
|
-
end
|
923
|
-
|
924
|
-
if not options[:dryrun] and filename != newFilename.downcase
|
925
|
-
`mv -v '#{filename}' '#{newFilename.downcase}'`
|
926
|
-
else
|
927
|
-
puts filename + "\n => " + newFilename.downcase
|
928
|
-
end
|
929
|
-
end
|
930
|
-
|
931
|
-
#
|
932
|
-
# One parameter to show the current version
|
933
|
-
#
|
934
|
-
map %w[--version -v] => :__print_version
|
935
|
-
desc "--version, -v", 'Show the current script version'
|
936
|
-
def __print_version
|
937
|
-
puts VERSION
|
938
|
-
end
|
939
|
-
|
940
|
-
end
|
941
|
-
|
942
|
-
DOC.start
|
943
|
-
|
2
|
+
require 'pdfmd.rb'
|