pdfmd 1.9.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -2
- data/README.md +2 -2
- data/TODO.mkd +26 -0
- data/bin/pdfmd +267 -1
- data/lib/pdfmd.rb +242 -634
- data/lib/pdfmd/explain.hiera.md +25 -4
- data/lib/pdfmd/long_desc.pdfmdconfig.txt +40 -0
- data/lib/pdfmd/long_desc.pdfmdedit.txt +166 -0
- data/lib/pdfmd/long_desc.pdfmdexplain.txt +16 -0
- data/lib/pdfmd/long_desc.pdfmdrename.txt +206 -0
- data/lib/pdfmd/long_desc.pdfmdshow.txt +92 -0
- data/lib/pdfmd/long_desc.pdfmdsort.txt +111 -0
- data/lib/pdfmd/long_desc.pdfmdstat.txt +23 -0
- data/lib/pdfmd/pdfmdconfig.rb +30 -0
- data/lib/pdfmd/pdfmdedit.rb +201 -0
- data/lib/pdfmd/pdfmdmethods.rb +125 -0
- data/lib/pdfmd/pdfmdrename.rb +243 -0
- data/lib/pdfmd/pdfmdshow.rb +88 -0
- data/lib/pdfmd/pdfmdsort.rb +115 -0
- data/lib/pdfmd/pdfmdstat.rb +117 -0
- data/lib/{string_extend.rb → pdfmd/string_extend.rb} +0 -0
- data/lib/run.rb +235 -0
- data/pdfmd.gemspec +3 -2
- metadata +23 -11
- data/lib/pdfmd/check.rb +0 -10
- data/lib/pdfmd/config.rb +0 -59
- data/lib/pdfmd/edit.rb +0 -144
- data/lib/pdfmd/rename.rb +0 -295
- data/lib/pdfmd/show.rb +0 -164
- data/lib/pdfmd/sort.rb +0 -199
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d9a3ab542e0996fa6a51d24668010afcbaf2bbb9
|
4
|
+
data.tar.gz: b3d3da094eb90f1c1e8d9f00f77404e2bec38393
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 47cdb550ad5259ab7578b43844cd1c34a423cbd8674fe7710f823360de2fde6ba1b8f6a597b5fff801cc9ced87f202363f822806397ba98ce35747a49014d360
|
7
|
+
data.tar.gz: c6df8ae93e2dddcd91c16e886a4672c6ce9b6e17d619a62ba7bba958f8a9dfa645250a2e419a7c9aeeea1cb3395ab5bfcafe2103718e0b9c96a1984cb431906f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1
|
+
# Version 2.0.0
|
2
|
+
- Rewritten the gem using classes.
|
3
|
+
- Shorter Code (optimizing)
|
4
|
+
- Introduced a log-level in hiera
|
5
|
+
- Set default log-file to current working directory.
|
6
|
+
- Command 'rename': Parameter 'keywords' changed to 'nrkeywords'.
|
7
|
+
- Command 'rename': Checking if filename is unchanged and avoiding error message from system now.
|
8
|
+
- Command 'edit': Order of input values when changing all tags has been changed.
|
9
|
+
- Command 'edit': Replaced Tag separator '=' with ':'. See `pdfmd help edit` for details.
|
10
|
+
- Command 'sort': Added parameter 'overwrite'. See `pdfmd help sort` for details.
|
11
|
+
- Command 'edit': Added Hiera parameter 'opendoc' and 'pdfviewer'.
|
12
|
+
- Command 'stat': Added command to show some primitive statistics for a directory.
|
13
|
+
- Defaults for the Thor commands have mostly changed. No defaults there anymore, but in the class itself.
|
14
|
+
- Longer help texts take out into separate files for more structured code.
|
15
|
+
- Changed multiple log messages in all commands.
|
16
|
+
- Added parameter '-r' which shows the revision of the gem.
|
17
|
+
- Bug: Renaming files with a '/' in the metadatafield 'author'.
|
18
|
+
- Bug: Renaming files with spaces in the metadatafield 'subject'.
|
19
|
+
- Collected Todo's in `TODO.mkd`.
|
20
|
+
|
1
21
|
# Version 1.9.1
|
2
22
|
- Removed some inactive Code
|
3
23
|
|
@@ -17,7 +37,7 @@
|
|
17
37
|
# Version 1.8.0
|
18
38
|
- Added Support for password protected pdf files in command 'show' and 'edit'
|
19
39
|
- Cleaned up renaming key-string and added all string for NO,EN an DE language.
|
20
|
-
- Cleaned the output of `pdfmd config`.
|
40
|
+
- Cleaned the output of `pdfmd config`.
|
21
41
|
- Removed some TODOs
|
22
42
|
- Bugfix in the rename command
|
23
43
|
- Updated Tests
|
@@ -100,7 +120,7 @@
|
|
100
120
|
|
101
121
|
# Version 1.3
|
102
122
|
- Small bugfix about special characters in filenames (author).
|
103
|
-
- Bugfix for the tag 'createdate' written as 'CreateDate' which did not
|
123
|
+
- Bugfix for the tag 'createdate' written as 'CreateDate' which did not
|
104
124
|
take the date then.
|
105
125
|
- Removed inactive code.
|
106
126
|
- Added paramter 'version'
|
data/README.md
CHANGED
@@ -131,8 +131,6 @@ pdfmd::config:
|
|
131
131
|
sort:
|
132
132
|
destination : /data/tmp
|
133
133
|
copy : true
|
134
|
-
log : true
|
135
|
-
logfilepath : /var/log/pdfmd.log # Needs create/write rights on this file
|
136
134
|
interactive : false
|
137
135
|
rename:
|
138
136
|
#allkeywords : true # Does not make sense in combination with _keywords_
|
@@ -146,6 +144,8 @@ pdfmd::config:
|
|
146
144
|
|
147
145
|
Information about which hiera configuration settings are available can be either found in `pdfmd help <command>` or `pdfmd explain hiera`.
|
148
146
|
|
147
|
+
**PDFMD** expects currently the hiera configuration file to be at `/etc/hiera.yaml`. With Hiera2 the default location has changed to `/etc/puppetlabs/code/hiera.yaml`. This might be addressed in a future version. Currently you have to create at least a symlink to `/etc/hiera.yaml`.
|
148
|
+
|
149
149
|
Test your hiera configuration with
|
150
150
|
|
151
151
|
``` bash
|
data/TODO.mkd
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#TODO
|
2
|
+
|
3
|
+
## pdfmd.rb
|
4
|
+
* The logfile is a bit annoying. It should be possible to configure the logfile to be placed whereever convenient without creating double log files.
|
5
|
+
* Replace system copy command with fileutils.copy.
|
6
|
+
* Run commands on multiple PDF documents as one.
|
7
|
+
* Method for removing all metadata from a PDF document.
|
8
|
+
|
9
|
+
### Method: _stat_
|
10
|
+
* Parameter to ignore differences in upper and lowercase
|
11
|
+
* Parameter to disable percentage output
|
12
|
+
* Parameter to set output format: json,yaml, hash
|
13
|
+
|
14
|
+
## pdfmdrename.rb
|
15
|
+
|
16
|
+
* Refuse renaming if values are missing for either: author, title, subject or createdate. Keywords are optional.
|
17
|
+
|
18
|
+
## pdfmdedit.rb
|
19
|
+
|
20
|
+
* keywords are added differently according to the documentation, http://www.sno.phy.queensu.ca/~phil/exiftool/faq.html
|
21
|
+
|
22
|
+
## pdfmdstat.rb
|
23
|
+
|
24
|
+
### Method count_values
|
25
|
+
|
26
|
+
* Keywords could be splittet up into two words (as additional parameter)
|
data/bin/pdfmd
CHANGED
@@ -1,2 +1,268 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
require 'pdfmd
|
2
|
+
require 'pdfmd'
|
3
|
+
require 'pdfmd/pdfmdstat'
|
4
|
+
require "thor"
|
5
|
+
require "highline/import"
|
6
|
+
require "fileutils"
|
7
|
+
require "i18n"
|
8
|
+
require 'pathname'
|
9
|
+
|
10
|
+
VERSION = '2.0.0'
|
11
|
+
NAME = 'pdfmd'
|
12
|
+
|
13
|
+
#
|
14
|
+
# Read the content of the long description from an external file
|
15
|
+
#
|
16
|
+
def readLongDesc(filename)
|
17
|
+
|
18
|
+
# Multiple possible paths to cover the development as well
|
19
|
+
paths = [
|
20
|
+
"#{File.dirname(File.expand_path($0))}../lib",
|
21
|
+
"#{Gem.dir}/gems/#{NAME}-#{VERSION}/lib",
|
22
|
+
]
|
23
|
+
|
24
|
+
longDescContent = ''
|
25
|
+
paths.each do |value|
|
26
|
+
if File.exists?(value + '/' + filename)
|
27
|
+
|
28
|
+
File.open(value + '/' + filename, 'r') do |infile|
|
29
|
+
while (line = infile.gets)
|
30
|
+
longDescContent = longDescContent + line
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
longDescContent
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Thor class
|
43
|
+
class DOC < Thor
|
44
|
+
|
45
|
+
# Class options for all commands (logging only)
|
46
|
+
# none
|
47
|
+
|
48
|
+
#
|
49
|
+
# Show the current metadata tags
|
50
|
+
#
|
51
|
+
desc 'show', 'Show metadata of a file'
|
52
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdshow.txt'
|
53
|
+
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Show specific tag(s), comma separated', :required => false
|
54
|
+
method_option :format, :type => :string, :aliases => '-f', :desc => 'Define output format', :required => false
|
55
|
+
method_option :includepdf, :type => :boolean, :aliases => '-i', :desc => 'Include the filename in output', :required => false
|
56
|
+
def show(filename)
|
57
|
+
|
58
|
+
pdfdoc = Pdfmdshow.new filename
|
59
|
+
format = pdfdoc.determineValidSetting(options[:format], 'show:format')
|
60
|
+
show_filename = pdfdoc.determineValidSetting(options[:includepdf], 'show:includepdf')
|
61
|
+
show_tags = pdfdoc.determineValidSetting(options[:tag], 'show:tags')
|
62
|
+
pdfdoc.set_outputformat format
|
63
|
+
pdfdoc.show_filename show_filename
|
64
|
+
pdfdoc.set_tags show_tags
|
65
|
+
puts pdfdoc.show_metatags
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
# Show current settings
|
70
|
+
#
|
71
|
+
desc 'config', 'Show config defaults'
|
72
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdconfig.txt'
|
73
|
+
method_option :show, :type => :boolean, :aliases => '-s', :required => false
|
74
|
+
def config(subcommand = '')
|
75
|
+
|
76
|
+
pdfdoc = Pdfmdconfig.new ''
|
77
|
+
puts pdfdoc.show_config subcommand
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Change a MetaTag Attribute
|
83
|
+
#
|
84
|
+
# FIXME: keywords are added differently according to the documentation
|
85
|
+
# http://www.sno.phy.queensu.ca/~phil/exiftool/faq.html
|
86
|
+
desc 'edit', 'Edit Meta Tag(s)'
|
87
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdedit.txt'
|
88
|
+
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Name of the Tag(s) to Edit', :required => true, :lazy_default => 'all'
|
89
|
+
method_option :rename, :type => :boolean, :aliases => '-r', :desc => 'Rename file after changing meta-tags', :required => false
|
90
|
+
method_option :opendoc, :type => :boolean, :aliases => '-o', :desc => 'Open the PDF document in a separate window.', :required => false, :lazy_default => true
|
91
|
+
def edit(filename)
|
92
|
+
|
93
|
+
pdfdoc = Pdfmdedit.new filename
|
94
|
+
tags = pdfdoc.determineValidSetting(options[:tag],'edit:tags')
|
95
|
+
pdfdoc.opendoc = pdfdoc.determineValidSetting(options[:opendoc], 'edit:opendoc')
|
96
|
+
pdfdoc.pdfviewer = pdfdoc.determineValidSetting(nil, 'edit:pdfviewer')
|
97
|
+
pdfdoc.set_tags tags
|
98
|
+
pdfdoc.update_tags
|
99
|
+
pdfdoc.write_tags filename
|
100
|
+
|
101
|
+
# If the file shall be renamed at the same time, trigger the other task
|
102
|
+
if pdfdoc.determineValidSetting(options[:rename], 'edit:rename')
|
103
|
+
|
104
|
+
#rename filename
|
105
|
+
pdfdoc.log('info', 'Running rename command.')
|
106
|
+
rename filename
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# Show statistics
|
114
|
+
#
|
115
|
+
desc 'stat', 'Show metadata statistics of multiple files'
|
116
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdstat.txt'
|
117
|
+
option :recursive, :type => :boolean, :aliases => '-r', :desc => 'Include subdirectories recursively.', :lazy_default => true, :required => false
|
118
|
+
option :tags, :aliases => '-t', :type => :string, :desc => 'Define Metatags to run at', :lazy_default => 'author,title,subject,createdate,keywords', :required => false
|
119
|
+
def stat(input)
|
120
|
+
|
121
|
+
filemetadata = Hash.new
|
122
|
+
currentOutput = Hash.new
|
123
|
+
|
124
|
+
if File.file?(input)
|
125
|
+
puts 'Input is a single file.'
|
126
|
+
puts 'n.a.y.'
|
127
|
+
else
|
128
|
+
|
129
|
+
# Iterate through all Files an collect the metadata
|
130
|
+
recursive = options[:recursive] ? '/**' : ''
|
131
|
+
|
132
|
+
# Count the number of files quickly to show an overview
|
133
|
+
# nooFiles = numberOfFiles
|
134
|
+
nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
|
135
|
+
currentNooFiles = 0
|
136
|
+
Dir.glob("#{input.chomp}#{recursive}/*.pdf").each do |filename|
|
137
|
+
|
138
|
+
# Print percentage
|
139
|
+
currentNooFiles = currentNooFiles + 1
|
140
|
+
percentage = 100 / nooFiles * currentNooFiles
|
141
|
+
print "\r Status: #{percentage} % of #{nooFiles} files processed. "
|
142
|
+
|
143
|
+
pdfdoc = Pdfmd.new filename
|
144
|
+
filemetadata = {}
|
145
|
+
currentOutput[File.basename(filename)] = pdfdoc.metadata.to_s
|
146
|
+
pdfdoc = nil
|
147
|
+
|
148
|
+
end
|
149
|
+
puts ''
|
150
|
+
puts ''
|
151
|
+
|
152
|
+
pdfstat = Pdfmdstat.new(currentOutput)
|
153
|
+
pdfstat.tags options[:tags]
|
154
|
+
pdfstat.analyse_metadata
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|
159
|
+
|
160
|
+
#
|
161
|
+
# Sort the files into directories based on the author
|
162
|
+
#
|
163
|
+
desc 'sort','Sort files into directories sorted by Author'
|
164
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdsort.txt'
|
165
|
+
method_option :destination, :aliases => '-d', :required => false, :type => :string, :desc => 'Defines the output directory'
|
166
|
+
method_option :copy, :aliases => '-c', :required => false, :type => :boolean, :desc => 'Copy files instead of moving them'
|
167
|
+
method_option :interactive, :aliases => '-i', :required => false, :type => :boolean, :desc => 'Enable/Disable interactive sorting'
|
168
|
+
method_option :overwrite, :alises => '-o', :required => false, :type => :boolean, :desc => 'Enable/Disable file overwrite.', :lazy_default => true
|
169
|
+
method_option :dryrun, :aliases => '-n', :required => false, :type => :boolean, :desc => 'Run without changing something'
|
170
|
+
def sort(input)
|
171
|
+
|
172
|
+
if File.file?(input)
|
173
|
+
pdfdoc = Pdfmdsort.new input
|
174
|
+
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
|
175
|
+
pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
|
176
|
+
pdfdoc.destination = pdfdoc.determineValidSetting(options[:destination], 'sort:destination')
|
177
|
+
pdfdoc.overwrite = pdfdoc.determineValidSetting(options[:overwrite], 'sort:overwrite')
|
178
|
+
pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun], 'sort:dryrun')
|
179
|
+
pdfdoc.sort
|
180
|
+
else
|
181
|
+
|
182
|
+
# Run the actions for all files
|
183
|
+
Dir.glob(input.chomp + '/*.pdf').each do |filename|
|
184
|
+
pdfdoc = Pdfmdsort.new filename
|
185
|
+
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
|
186
|
+
pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
|
187
|
+
pdfdoc.destination = pdfdoc.determineValidSetting(options[:destination], 'sort:destination')
|
188
|
+
pdfdoc.overwrite = pdfdoc.determineValidSetting(options[:overwrite], 'sort:overwrite')
|
189
|
+
pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun], 'sort:dryrun')
|
190
|
+
pdfdoc.sort
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
|
198
|
+
# Rename the file according to the Metadata
|
199
|
+
#
|
200
|
+
# Scheme: YYYYMMDD-author-subject-keywords.extension
|
201
|
+
desc 'rename', 'Rename the file according to Metadata'
|
202
|
+
long_desc readLongDesc('pdfmd/long_desc.pdfmdrename.txt')
|
203
|
+
method_option :dryrun, :type => :boolean, :aliases => '-n', :desc => 'Run without making changes', :required => false
|
204
|
+
method_option :allkeywords, :type => :boolean, :aliases => '-a', :desc => 'Add all keywords (no limit)', :required => false, :lazy_default => true
|
205
|
+
method_option :nrkeywords, :type => :string, :aliases => '-k', :desc => 'Number of keywords to include (Default: 3)', :required => false
|
206
|
+
method_option :outputdir, :aliases => '-o', :type => :string, :desc => 'Specify output directory', :required => false
|
207
|
+
method_option :copy, :aliases => '-c', :type => :boolean, :desc => 'Copy instead of moving the file when renaming', :lazy_default => true
|
208
|
+
def rename(filename)
|
209
|
+
|
210
|
+
pdfdoc = Pdfmdrename.new filename
|
211
|
+
pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun],'rename:dryrun')
|
212
|
+
pdfdoc.allkeywords = pdfdoc.determineValidSetting(options[:allkeywords],'rename:allkeywords')
|
213
|
+
pdfdoc.outputdir = pdfdoc.determineValidSetting(options[:outputdir], 'rename:outputdir')
|
214
|
+
if nrkeywords = pdfdoc.determineValidSetting(options[:nrkeywords], 'rename:nrkeywords' )
|
215
|
+
pdfdoc.nrkeywords = nrkeywords
|
216
|
+
end
|
217
|
+
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'rename:copy')
|
218
|
+
pdfdoc.rename
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
#
|
223
|
+
# One parameter to show the current version
|
224
|
+
#
|
225
|
+
map %w[--version -v] => :__print_version
|
226
|
+
desc "--version, -v", 'Show the current script version'
|
227
|
+
def __print_version
|
228
|
+
puts VERSION
|
229
|
+
end
|
230
|
+
|
231
|
+
map %w[--revision -r] => :__print_revision
|
232
|
+
desc "--revision, -r", 'Show the revision of the gem'
|
233
|
+
def __print_revision
|
234
|
+
metadata = YAML.load(`gem specification pdfmd metadata`)
|
235
|
+
puts metadata['revision']
|
236
|
+
end
|
237
|
+
|
238
|
+
end
|
239
|
+
|
240
|
+
DOC.start
|
241
|
+
|
242
|
+
# #
|
243
|
+
# # Explain fields and Metatags
|
244
|
+
# # Show information about how they are used.
|
245
|
+
# #
|
246
|
+
# desc 'explain','Show more information about usuable Meta-Tags'
|
247
|
+
# long_desc <<-LONGDESC
|
248
|
+
# == General
|
249
|
+
#
|
250
|
+
# Explain some terms used with the script.
|
251
|
+
#
|
252
|
+
# == Example
|
253
|
+
#
|
254
|
+
# # Show the available subjects
|
255
|
+
# \x5>CLI explain
|
256
|
+
#
|
257
|
+
# # Show information about the subject 'author'
|
258
|
+
# \x5>CLI explain author
|
259
|
+
#
|
260
|
+
# LONGDESC
|
261
|
+
# def explain(term='')
|
262
|
+
#
|
263
|
+
# ENV['PDFMD_EXPLAIN'] = term
|
264
|
+
# ENV['PDFMD'] = File.basename(__FILE__)
|
265
|
+
# require_relative('./pdfmd/explain.rb')
|
266
|
+
#
|
267
|
+
# end
|
268
|
+
#
|
data/lib/pdfmd.rb
CHANGED
@@ -1,678 +1,286 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# == File: pdfmd.rb
|
3
2
|
#
|
4
|
-
#
|
3
|
+
# Class for PDF document and meta tag management
|
5
4
|
#
|
6
|
-
|
7
|
-
|
8
|
-
# ==== Ruby gems:
|
9
|
-
# - thor
|
10
|
-
# - highline/import
|
11
|
-
# - fileutils
|
12
|
-
# - i18n
|
13
|
-
# - pathname
|
14
|
-
# - logger
|
15
|
-
#
|
16
|
-
# ==== OS applications:
|
17
|
-
#
|
18
|
-
# - exiftools
|
19
|
-
#
|
20
|
-
# === Usage
|
21
|
-
#
|
22
|
-
# $ ./pdfmd <action> <parameter> file
|
23
|
-
#
|
24
|
-
# $ ./pdfmd help <action>
|
25
|
-
#
|
26
|
-
# An overview about the actions can be seen when running the script without
|
27
|
-
# any parameters
|
28
|
-
#
|
29
|
-
# Check and set metadata of PDF documents
|
30
|
-
#
|
31
|
-
# A complete set of metada contains
|
32
|
-
#
|
33
|
-
# * CreateDate
|
34
|
-
# * Title
|
35
|
-
# * Author
|
36
|
-
# * Subject
|
37
|
-
# * Keywords (optional)
|
38
|
-
#
|
39
|
-
# TODO: Fix broken PDF files automatically
|
40
|
-
# TODO: Implement check that exifdata has been updated
|
41
|
-
# TODO: Read this: http://lostechies.com/derickbailey/2011/04/29/writing-a-thor-application/
|
42
|
-
# TODO: ... and this: http://blog.paracode.com/2012/05/17/building-your-tools-with-thor/
|
43
|
-
# gs \
|
44
|
-
# -o repaired.pdf \
|
45
|
-
# -sDEVICE=pdfwrite \
|
46
|
-
# -dPDFSETTINGS=/prepress \
|
47
|
-
# corrupted.pdf
|
48
|
-
#
|
49
|
-
# == Author
|
50
|
-
#
|
51
|
-
# Daniel Roos <daniel-git@micronarrativ.org>
|
52
|
-
# Source: https://github.com/Micronarrativ/ruby-pmd
|
53
|
-
#
|
54
|
-
require "thor"
|
55
|
-
require "highline/import"
|
56
|
-
require "fileutils"
|
57
|
-
require "i18n"
|
58
|
-
require 'pathname'
|
59
|
-
require 'logger'
|
60
|
-
|
61
|
-
VERSION = '1.9.1'
|
62
|
-
|
63
|
-
# Include general usage methods
|
64
|
-
require_relative('pdfmd/methods.rb')
|
65
|
-
|
66
|
-
class DOC < Thor
|
67
|
-
|
68
|
-
#
|
69
|
-
# Show the current metadata tags
|
70
|
-
#
|
71
|
-
# TODO: Enable additional options
|
72
|
-
#
|
73
|
-
desc 'show', 'Show metadata of a file'
|
74
|
-
method_option :all, :type => :boolean, :aliases => '-a', :desc => 'Show all metatags', :default => false, :required => false
|
75
|
-
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Show specific tag(s), comma separated', :required => false
|
76
|
-
method_option :format, :type => :string, :aliases => '-f', :desc => 'Define output format', :required => false
|
77
|
-
method_option :includepdf, :type => :boolean, :aliases => '-i', :desc => 'Include the filename in output', :required => false
|
78
|
-
long_desc <<-LONGDESC
|
79
|
-
== General
|
80
|
-
|
81
|
-
Show metatags of a PDF document.
|
82
|
-
|
83
|
-
The following tags are being shown:
|
84
|
-
\x5 * Author
|
85
|
-
\x5 * Creator
|
86
|
-
\x5 * CreateDate
|
87
|
-
\x5 * Title
|
88
|
-
\x5 * Subject
|
89
|
-
\x5 * Keywords
|
90
|
-
|
91
|
-
== Parameters
|
92
|
-
|
93
|
-
--all, -a
|
94
|
-
\x5 Show all relevant metatags for a document.
|
5
|
+
require_relative './pdfmd/pdfmdmethods.rb'
|
6
|
+
class Pdfmd
|
95
7
|
|
96
|
-
|
8
|
+
require "i18n"
|
9
|
+
require 'pathname'
|
10
|
+
require 'fileutils'
|
11
|
+
require "highline/import"
|
97
12
|
|
98
|
-
|
13
|
+
# Include general method for Pdfmd
|
14
|
+
include Pdfmdmethods
|
99
15
|
|
100
|
-
|
101
|
-
\x5 Specify the metatag to show. The selected metatag must be one of the relevant tags. Other tags are ignored and nothing is returned.
|
16
|
+
attr_accessor :filename, :logstatus, :logfile
|
102
17
|
|
103
|
-
|
18
|
+
require_relative 'pdfmd/pdfmdshow.rb'
|
19
|
+
require_relative 'pdfmd/pdfmdconfig.rb'
|
20
|
+
require_relative 'pdfmd/pdfmdedit.rb'
|
21
|
+
require_relative 'pdfmd/pdfmdrename.rb'
|
22
|
+
require_relative 'pdfmd/pdfmdsort.rb'
|
23
|
+
require_relative 'pdfmd/string_extend.rb'
|
24
|
+
require 'logger'
|
104
25
|
|
105
|
-
|
26
|
+
@@default_tags = ['createdate', 'author', 'title', 'subject', 'keywords']
|
106
27
|
|
107
|
-
|
28
|
+
# Default document password
|
29
|
+
@@documentPassword = ''
|
108
30
|
|
109
|
-
|
31
|
+
# Document metadata, read from the document
|
32
|
+
@@metadata = Hash.new
|
110
33
|
|
111
|
-
|
34
|
+
# Hiera configuration data
|
35
|
+
@@hieradata = Hash.new
|
112
36
|
|
113
|
-
|
37
|
+
def initialize(filename)
|
114
38
|
|
115
|
-
|
39
|
+
# Default Logfile location and logging enabled
|
40
|
+
if !@logfile or @logfile.empty?
|
41
|
+
@logfile = Dir.pwd.chomp('/') + '/.pdfmd.log'
|
42
|
+
end
|
43
|
+
@log = true
|
116
44
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
Hiera parameter: includepdf (boolean)
|
127
|
-
|
128
|
-
== Example
|
129
|
-
|
130
|
-
# Show default metatags for a pdf document
|
131
|
-
\x5>CLI show <filename>
|
132
|
-
|
133
|
-
# Show default metatags for example.pdf
|
134
|
-
\x5>CLI show example.pdf
|
135
|
-
|
136
|
-
# Show value for metatag 'Author' for the file example.pdf
|
137
|
-
\x5>CLI show -t author example.pdf
|
138
|
-
|
139
|
-
# Show value for metatags 'Author','Title' for the file example.pdf
|
140
|
-
\x5>CLI show -t author,title example.pdf
|
141
|
-
|
142
|
-
== Hiera
|
143
|
-
|
144
|
-
Here is an example configuration for hiera:
|
145
|
-
|
146
|
-
pdfmd::config
|
147
|
-
show:
|
148
|
-
format : yaml
|
149
|
-
tag : author,subject
|
150
|
-
includepdf: true
|
151
|
-
|
152
|
-
LONGDESC
|
153
|
-
def show(filename)
|
154
|
-
|
155
|
-
ENV['PDFMD_FILENAME'] = filename
|
156
|
-
ENV['PDFMD_TAGS'] = options[:tag]
|
157
|
-
ENV['PDFMD_ALL'] = options[:all].to_s
|
158
|
-
ENV['PDFMD_FORMAT'] = options[:format]
|
159
|
-
ENV['PDFMD_INCLUDEPDF'] = options[:includepdf].to_s
|
160
|
-
require_relative('./pdfmd/show.rb')
|
45
|
+
# Defining the loglevel
|
46
|
+
@loglevel = 'info'
|
47
|
+
self.log('debug','---')
|
48
|
+
self.log('info',"Starting with file '#{filename}'.")
|
49
|
+
@filename = filename
|
50
|
+
@hieradata = queryHiera('pdfmd::config')
|
51
|
+
if ! filename.empty?
|
52
|
+
read_metatags(@filename)
|
53
|
+
end
|
161
54
|
|
162
55
|
end
|
163
56
|
|
164
57
|
#
|
165
|
-
#
|
166
|
-
|
167
|
-
|
168
|
-
long_desc <<-LONGDESC
|
169
|
-
|
170
|
-
Shows the current default configuration as available in Hiera.
|
171
|
-
|
172
|
-
== Usage
|
173
|
-
|
174
|
-
Example: `pdfmd config [<command>]`
|
175
|
-
|
176
|
-
|
177
|
-
== Parameter
|
178
|
-
|
179
|
-
[<commandname>]
|
180
|
-
|
181
|
-
Shows only the default configuration from hiera for the specified command.
|
182
|
-
The command parameter is not case-sensitive.
|
183
|
-
|
184
|
-
Example: `pdfmd config edit`
|
185
|
-
|
186
|
-
LONGDESC
|
187
|
-
method_option :show, :type => :boolean, :aliases => '-s', :required => false
|
188
|
-
def config(subcommand = '')
|
189
|
-
|
190
|
-
ENV['PDFMD_SHOW'] = options[:show].to_s
|
191
|
-
ENV['PDFMD_COMMAND'] = subcommand
|
192
|
-
require_relative('./pdfmd/config.rb')
|
193
|
-
|
58
|
+
# Make Metadata available to the outside
|
59
|
+
def metadata
|
60
|
+
@@metadata
|
194
61
|
end
|
195
62
|
|
196
63
|
#
|
197
|
-
#
|
198
|
-
#
|
199
|
-
|
200
|
-
#
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
If a value is provided, the current Value will be replaced by the new value.
|
220
|
-
|
221
|
-
--rename, -r
|
222
|
-
\x5 Rename file after updating the meta tag information according to the fields.
|
223
|
-
|
224
|
-
This parameter is identical to running `> CLI rename <filename>`
|
225
|
-
|
226
|
-
Hiera parameter: rename
|
227
|
-
|
228
|
-
General example:
|
229
|
-
|
230
|
-
# Edit tag 'TAG' and set a new value interactive.
|
231
|
-
\x5>CLI edit -t TAG <filename>
|
232
|
-
|
233
|
-
# Edit tag 'Author' and set new value interactive.
|
234
|
-
\x5>CLI edit -t author example.pdf
|
235
|
-
|
236
|
-
# Edit multiple Tags and set a new value interactive.
|
237
|
-
\x5>CLI edit -t tag1,tag2,tag3 <filename>
|
238
|
-
|
239
|
-
# Edit multiple Tags and set a new value in batch mode.
|
240
|
-
\x5 CLI edit -t tag1='value1',tag2='value2' <filename>
|
241
|
-
|
242
|
-
== Multiple Tags
|
243
|
-
|
244
|
-
For setting multiple tags list the tags comma separated.
|
245
|
-
|
246
|
-
For setting all tags (Author, Title, Subject, CreateDate, Keywords) use the keyword 'all' as tagname.
|
247
|
-
|
248
|
-
# Set tags 'Author', 'Title', 'Subject' in example.pdf interactivly.
|
249
|
-
\x5>CLI edit -t author,title,subject example.pdf`
|
250
|
-
|
251
|
-
# Set tags 'Author', 'Title', 'Subject', 'CreateDate', 'Keywords' in
|
252
|
-
example.pdf interactive:
|
253
|
-
\x5>CLI edit -t all example.pdf
|
254
|
-
|
255
|
-
# Set tags 'Author', 'CreateDate' in example.pdf in batch mode (non-interactive:
|
256
|
-
|
257
|
-
CLI edit -t author='Me',createdate='1970:00:00 01:01:01' example.pdf
|
258
|
-
CLI edit -t author='Me',Createdate=19700000 example.pdf
|
259
|
-
|
260
|
-
== Tag: CreateDate
|
261
|
-
|
262
|
-
In order to enter a value for the 'CreateDate' field, some internal matching is going on in order to make it easier and faster to enter dates and times.
|
263
|
-
|
264
|
-
The following formats are identified/matched:
|
265
|
-
|
266
|
-
\x5 yyyymmdd
|
267
|
-
\x5 yyyymmd
|
268
|
-
\x5 yyyymmddHHMMSS
|
269
|
-
\x5 yyyy-mm-dd HH:MM:SS
|
270
|
-
\x5 yyyy:mm:dd HH:MM:SS
|
271
|
-
\x5 yyyy.mm.dd HH:MM:SS
|
272
|
-
\x5 yyyy-mm-d
|
273
|
-
\x5 yyyy-mm-dd
|
274
|
-
\x5 yyyy.mm.d
|
275
|
-
\x5 yyyy.mm.dd
|
276
|
-
\x5 yyyy:mm:d
|
277
|
-
\x5 yyyy:mm:dd
|
278
|
-
|
279
|
-
\x5 - If HH:MM:SS or HHMMSS is not provided, those values are automatically set to zero.
|
280
|
-
\x5 - The output format of every timestamp is <yyyy:mm:dd HH:MM:SS>
|
281
|
-
\x5 - When providing and invalid date, the incorrect date is rejected and the user asked to provide the correct date.
|
282
|
-
|
283
|
-
== Rename file
|
284
|
-
|
285
|
-
In addition to setting the tags the current file can be renamed according to
|
286
|
-
the new metadata.
|
287
|
-
|
288
|
-
# Set tag 'Author' and rename file example.pdf
|
289
|
-
\x5> CLI edit -t author -r example.pdf
|
290
|
-
|
291
|
-
See `> CLI help rename` for details about renaming.
|
292
|
-
|
293
|
-
To enable this feature in hiera add the key 'rename' into the section 'edit' with the value 'true'.
|
294
|
-
|
295
|
-
LONGDESC
|
296
|
-
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Name of the Tag(s) to Edit', :default => false, :required => true
|
297
|
-
method_option :rename, :type => :boolean, :aliases => '-r', :desc => 'Rename file after changing meta-tags', :required => false
|
298
|
-
method_option :log, :aliases => '-l', :type => :boolean, :desc => 'Enable logging'
|
299
|
-
method_option :logfile, :aliases => '-p', :type => :string, :desc => 'Define path to logfile'
|
300
|
-
def edit(filename)
|
301
|
-
|
302
|
-
ENV['PDFMD_FILENAME'] = filename
|
303
|
-
ENV['PDFMD_TAG'] = options[:tag]
|
304
|
-
ENV['PDFMD_RENAME'] = options[:rename].to_s
|
305
|
-
ENV['PDFMD'] = __FILE__
|
306
|
-
ENV['PDFMD_LOG'] = options[:log].to_s
|
307
|
-
ENV['PDFMD_LOGFILE'] = options[:logfile]
|
308
|
-
|
309
|
-
require_relative('./pdfmd/edit.rb')
|
310
|
-
|
311
|
-
end
|
64
|
+
# Logging stuff
|
65
|
+
# def log(status = 'info', message)
|
66
|
+
|
67
|
+
# # Setting the loglevel
|
68
|
+
# case @loglevel
|
69
|
+
# when /info/i
|
70
|
+
# level = 'Logger::INFO'
|
71
|
+
# when /warn/i
|
72
|
+
# level = 'Logger::WARN'
|
73
|
+
# when /error/i
|
74
|
+
# level = 'Logger::ERROR'
|
75
|
+
# when /debug/i
|
76
|
+
# level = 'Logger::DEBUG'
|
77
|
+
# else
|
78
|
+
# level = 'Logger::INFO'
|
79
|
+
# end
|
80
|
+
# logger = Logger.new(@logfile)
|
81
|
+
# logger.level = eval level
|
82
|
+
# logger.send(status, message)
|
83
|
+
# logger.close
|
84
|
+
|
85
|
+
# end
|
312
86
|
|
313
87
|
#
|
314
|
-
# Check
|
315
|
-
#
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
== Example
|
332
|
-
|
333
|
-
# Show the values of the metatags for example.pdf
|
334
|
-
\x5>CLI show example.pdf
|
335
|
-
|
336
|
-
LONGDESC
|
337
|
-
def check(filename)
|
338
|
-
|
339
|
-
ENV['PDFMD_FILENAME'] = filename
|
340
|
-
require_relative('./pdfmd/check.rb')
|
88
|
+
# Check all or certain metatags
|
89
|
+
# If there is no content for a tag, return false
|
90
|
+
def check_metatags(metatags = [])
|
91
|
+
|
92
|
+
if metatags.is_a?(String)
|
93
|
+
metatags = metatags.split
|
94
|
+
elsif !metatags.is_a?(Array)
|
95
|
+
self.log('error', 'Array or string parameter expected for parameter of check_metatags.')
|
96
|
+
exit 1
|
97
|
+
end
|
98
|
+
|
99
|
+
metatags.each do |value|
|
100
|
+
if @@metadata[value].to_s.empty?
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
341
104
|
|
342
105
|
end
|
343
106
|
|
344
|
-
#
|
345
|
-
#
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
107
|
+
# Read metatags from @metadata froma file into
|
108
|
+
# @@metadata
|
109
|
+
def read_metatags(filename)
|
110
|
+
|
111
|
+
# Setup the metatags
|
112
|
+
commandparameter = '-Warning'
|
113
|
+
@@default_tags.each do |key|
|
114
|
+
@@metadata[key] = ''
|
115
|
+
commandparameter = commandparameter + " -#{key}"
|
116
|
+
end
|
117
|
+
|
118
|
+
if not File.file?(filename)
|
119
|
+
self.log('error', "Cannog access file '#{filename}'.")
|
120
|
+
puts "Cannot access file '#{filename}'. Abort"
|
121
|
+
abort
|
122
|
+
end
|
123
|
+
|
124
|
+
metastrings = `exiftool #{commandparameter} '#{filename}'`.split("\n")
|
125
|
+
|
126
|
+
# Assume an error (to enter the loop)
|
127
|
+
metaPasswordError = true
|
128
|
+
|
129
|
+
# Repeat password request to user until a valid password has been provided.
|
130
|
+
# This loop can surely be made prettier.
|
131
|
+
while metaPasswordError
|
132
|
+
|
133
|
+
metaPasswordError = false
|
134
|
+
metastrings.each do |metatag|
|
135
|
+
if metatag.match(/warning.*password protected/i)
|
136
|
+
self.log('info',"File '#{filename}' is password protected.")
|
137
|
+
metaPasswordError = true
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Leave this loop if there is no error in accessing the document
|
142
|
+
if !metaPasswordError
|
143
|
+
break
|
144
|
+
end
|
145
|
+
|
146
|
+
triedHieraPassword ||= false
|
147
|
+
triedManualPassword ||= 0
|
148
|
+
# Try a hiera password first, request otherwise from the user
|
149
|
+
if documentPassword = self.determineValidSetting(nil, 'default:password') and
|
150
|
+
!triedHieraPassword
|
151
|
+
|
152
|
+
self.log('debug','Using default password from hiera.')
|
153
|
+
@@documentPassword = documentPassword
|
154
|
+
triedHieraPassword = true
|
155
|
+
|
156
|
+
else
|
157
|
+
|
158
|
+
# Message output if default password was not working
|
159
|
+
if triedHieraPassword and triedManualPassword == 0
|
160
|
+
self.log('warn','Default password from hiera is invalid.')
|
161
|
+
end
|
162
|
+
|
163
|
+
# Exit loop if there were more than three manual password inputs
|
164
|
+
if triedManualPassword == 3
|
165
|
+
self.log('error',"More than three password attempts on file '#{filename}'. Abort.")
|
166
|
+
exit 1
|
167
|
+
end
|
168
|
+
|
169
|
+
# Request password from user
|
170
|
+
self.log('info', 'Requesting password from user.')
|
171
|
+
@@documentPassword = readUserInput('Document password : ').chomp
|
172
|
+
triedManualPassword = 1 + triedManualPassword
|
173
|
+
puts ''
|
174
|
+
end
|
175
|
+
|
176
|
+
metastrings = `exiftool -password '#{@@documentPassword}' #{commandparameter} '#{filename}'`.split("\n")
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
# NB: Maybe the output format should be changed here to catch keywords
|
182
|
+
# matching the split string (' : '). Exiftool has a format output option as well.
|
183
|
+
self.log('debug', "Reading metadata from file '#{filename}'.")
|
184
|
+
metastrings.each do |key|
|
185
|
+
value = key.split(' : ')
|
186
|
+
metatag = value[0].downcase.gsub(/ /,'')
|
187
|
+
if @@metadata.has_key?( metatag )
|
188
|
+
@@metadata[ metatag ] = value[1]
|
189
|
+
end
|
190
|
+
end
|
368
191
|
|
369
192
|
end
|
370
193
|
|
371
194
|
#
|
372
|
-
#
|
373
|
-
|
374
|
-
desc 'sort','Sort files into directories sorted by Author'
|
375
|
-
long_desc <<-LONGDESC
|
376
|
-
== General
|
377
|
-
|
378
|
-
Will sort pdf documents into subdirectories according to the value of their
|
379
|
-
tag 'author'.
|
380
|
-
|
381
|
-
When using this action a logfile with all actions will be generated in the
|
382
|
-
current working directory with the same name as the script and the ending
|
383
|
-
'.log'. This can be disabled with the parameter 'log' if required or adjusted to write the logfile to a different location.
|
384
|
-
|
385
|
-
If a document does not have an entry in the meta tag 'author', the file will
|
386
|
-
not be processed. This can be seen in the output of the logfile as well.
|
387
|
-
|
388
|
-
=== Parameters
|
389
|
-
|
390
|
-
[*destination|d*]
|
391
|
-
\x5 Speficy the root output directory to where the folderstructure is being created.
|
392
|
-
|
393
|
-
This parameter is required if hiera is not configured.
|
394
|
-
|
395
|
-
This parameter overwrites the hiera defaults
|
396
|
-
|
397
|
-
[*copy|c*]
|
398
|
-
\x5 Copy the files instead of moving them.
|
399
|
-
|
400
|
-
[*log|l*]
|
401
|
-
\x5 Disable/Enable the logging.
|
402
|
-
|
403
|
-
Default: enabled.
|
404
|
-
|
405
|
-
[*logfile|p*]
|
406
|
-
\x5 Set an alternate path for the logfile. If not path is chosen, the logfile
|
407
|
-
is being created in the current working directory as `pdfmd.log`.
|
408
|
-
|
409
|
-
[*interactive|i*]
|
410
|
-
\x5 Disable/Enable interactive sorting. This will ask for confirmation for each sorting action.
|
411
|
-
|
412
|
-
Default: disabled.
|
413
|
-
|
414
|
-
=== Replacement rules
|
415
|
-
|
416
|
-
The subdirectories for the documents are generated from the values in the
|
417
|
-
tag 'author' of each document.
|
418
|
-
|
419
|
-
In order to ensure a clean directory structure, there are certain rules
|
420
|
-
for altering the values.
|
421
|
-
\x5 1. Whitespaces are replaced by underscores.
|
422
|
-
\x5 2. Dots are replaced by underscores.
|
423
|
-
\x5 3. All letters are converted to their lowercase version.
|
424
|
-
\x5 4. Special characters are serialized
|
425
|
-
|
426
|
-
=== Hiera configuration
|
195
|
+
# Read user input
|
196
|
+
def readUserInput(textstring = 'Enter value: ')
|
427
197
|
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
=== Hiera defaults
|
437
|
-
|
438
|
-
The following values can be influenced by the hiera configuration in the section 'sort'. Commandline parameter will overwrite the defaults coming from hiera unless otherwise notet.
|
439
|
-
|
440
|
-
[*copy*]
|
441
|
-
\x5 If set to true copies the files from the source directory instead of moving them.
|
442
|
-
|
443
|
-
[*destination*]
|
444
|
-
\x5 Specifies the default output directory (root-directory). Either this or the command line parameter for destinations must be set.
|
445
|
-
|
446
|
-
[*log*]
|
447
|
-
\x5 Enables (true) or disables (false) logging.
|
448
|
-
|
449
|
-
[*logfile*]
|
450
|
-
\x5 Specifes the default path for the logfile. If no path is set and logging is enable, the logfile will be created in the current working directory.
|
451
|
-
|
452
|
-
Default is the current working directory with the filename `pdfmd.log`
|
453
|
-
|
454
|
-
[*interactive*]
|
455
|
-
\x5 If set to true, each file must be acknowledged to be processed when running the script.
|
456
|
-
|
457
|
-
=== Example
|
458
|
-
|
459
|
-
This command does the following:
|
460
|
-
\x5 1. Take all pdf documents in the subdirectory ./documents.
|
461
|
-
\x5 2. Create the output folder structure in `/tmp/test/`.
|
462
|
-
\x5 3. Copy the files instead of moving them.
|
463
|
-
\x5 4. Disable the logging.
|
464
|
-
\x5> CLI sort -d /tmp/test -c -l false ./documents
|
465
|
-
|
466
|
-
# Sort only a single file
|
467
|
-
\x5> CLI sort -d /tmp/test -c -l false ./documents/test.pdf
|
468
|
-
|
469
|
-
LONGDESC
|
470
|
-
method_option :destination, :aliases => '-d', :required => false, :type => :string, :desc => 'Defines the output directory'
|
471
|
-
method_option :copy, :aliases => '-c', :required => false, :type => :boolean, :desc => 'Copy files instead of moving them'
|
472
|
-
method_option :log, :aliases => '-l', :required => false, :type => :boolean, :desc => 'Enable/Disable creation of log files'
|
473
|
-
method_option :logfile, :aliases => '-p', :required => false, :type => :string, :desc => 'Change the default logfile path'
|
474
|
-
method_option :interactive, :aliases => '-i', :required => false, :type => :boolean, :desc => 'Enable/Disable interactive sorting'
|
475
|
-
method_option :dryrun, :aliases => '-n', :required => false, :type => :boolean, :desc => 'Run without changing something'
|
476
|
-
def sort(inputDir)
|
477
|
-
|
478
|
-
ENV['PDFMD_INPUTDIR'] = inputDir
|
479
|
-
ENV['PDFMD_DESTINATION'] = options[:destination].to_s
|
480
|
-
ENV['PDFMD_COPY'] = options[:copy].to_s
|
481
|
-
ENV['PDFMD_LOG'] = options[:log].to_s
|
482
|
-
ENV['PDFMD_LOGFILEPATH'] = options[:logfile].to_s
|
483
|
-
ENV['PDFMD_INTERACTIVE'] = options[:interactive].to_s
|
484
|
-
ENV['PDFMD_DRYRUN'] = options['dryrun'].to_s
|
485
|
-
ENV['PDFMD'] = __FILE__
|
486
|
-
require_relative('./pdfmd/sort.rb')
|
198
|
+
self.log('info','Waiting for user input.')
|
199
|
+
if textstring.match(/password/i)
|
200
|
+
print textstring
|
201
|
+
STDIN.noecho(&:gets).chomp + "\n"
|
202
|
+
else
|
203
|
+
ask textstring
|
204
|
+
end
|
487
205
|
|
488
206
|
end
|
489
207
|
|
490
208
|
#
|
491
|
-
#
|
492
|
-
#
|
493
|
-
# Scheme: YYYYMMDD-author-subject-keywords.extension
|
494
|
-
desc 'rename', 'Rename the file according to Metadata'
|
495
|
-
long_desc <<-LONGDESC
|
496
|
-
== General
|
497
|
-
|
498
|
-
Rename a file with the meta tags in the document.
|
499
|
-
|
500
|
-
== Parameter
|
501
|
-
|
502
|
-
--dry-run, -n
|
503
|
-
\x5 Simulate the renaming process and show the result without changing the file.
|
504
|
-
|
505
|
-
--all-keywords, -a
|
506
|
-
\x5 Use all keywords from the meta information in the file name and ignore the limit.
|
507
|
-
|
508
|
-
Hiera parameter: allkeywords [true|false]
|
509
|
-
|
510
|
-
Default: false
|
511
|
-
|
512
|
-
--keywwords, -k
|
513
|
-
\x5 Set the number of keywords used in the filename to a new value.
|
514
|
-
|
515
|
-
Hiera parameter: keywords <integer>
|
209
|
+
# Query hiera for settings if available
|
210
|
+
# def queryHiera(keyword, facts = 'UNSET')
|
516
211
|
|
517
|
-
|
212
|
+
# # Set default facts
|
213
|
+
# facts == 'UNSET' ? facts = "fqdn=#{`hostname`}" : ''
|
518
214
|
|
519
|
-
|
520
|
-
|
215
|
+
# # If Hiera is not found (damn cat, get of my keyboard!), return false,
|
216
|
+
# # otherwise return the hash from Hiera
|
217
|
+
# if !system('which hiera > /dev/null 2>&1')
|
218
|
+
# self.log('warn','Cannot find hiera command in $path.')
|
219
|
+
# puts 'Cannot find "hiera" command in $path.'
|
220
|
+
# return eval('{}')
|
221
|
+
# else
|
222
|
+
# self.log('debug', 'Reading hiera values for pdfmd::config.')
|
223
|
+
# commandreturn = ''
|
224
|
+
# commandreturn = `hiera #{keyword} #{facts} 2>/dev/null`
|
521
225
|
|
522
|
-
|
226
|
+
# if $?.exitstatus == 1
|
227
|
+
# self.log('warn', 'Could not retrieve configuration from with hiera.')
|
228
|
+
# eval('{}')
|
229
|
+
# else
|
230
|
+
# self.log('debug', 'Could retrieve configuration from hiera.')
|
231
|
+
# eval(commandreturn)
|
232
|
+
# end
|
523
233
|
|
524
|
-
|
234
|
+
# end
|
525
235
|
|
526
|
-
|
527
|
-
\x5 Copy the file instead of moving it to the new name or destination.
|
236
|
+
# end # End of queryHiera
|
528
237
|
|
529
|
-
Hiera parameter: copy [true|false]
|
530
|
-
|
531
|
-
Default: false
|
532
|
-
|
533
|
-
The directory must exist at runtime.
|
534
|
-
|
535
|
-
--log, -l
|
536
|
-
\x5 Enable logging.
|
537
|
-
|
538
|
-
Values: true|false
|
539
|
-
|
540
|
-
--logfile, -p
|
541
|
-
\x5 Define logfile path
|
542
|
-
|
543
|
-
Default: current working-dir/pdfmd.log
|
544
|
-
|
545
|
-
== Example
|
546
|
-
|
547
|
-
# Rename the file according to the metatags
|
548
|
-
\x5> CLI rename <filename>
|
549
|
-
|
550
|
-
# Rename example.pdf according to the metatags
|
551
|
-
\x5> CLI rename example.pdf
|
552
|
-
|
553
|
-
# Simulate renaming example.pdf according to the metatags (dry-run)
|
554
|
-
\x5> CLI rename -n example.pdf
|
555
|
-
|
556
|
-
== Hiera
|
557
|
-
|
558
|
-
There are Hiera settings available, that cannot be addressed by a commandline parameter.
|
559
|
-
|
560
|
-
defaultdoctype: Defines the appreviation for the default document type. This one isused when no other document type could be determined from the metadata-field 'title'. Default value is 'doc'.
|
561
|
-
|
562
|
-
For details on how to set the parameter, see 'pdfmd explain hiera'.
|
563
|
-
|
564
|
-
|
565
|
-
== Rules
|
566
|
-
|
567
|
-
There are some rules regarding how documents are being renamed
|
568
|
-
|
569
|
-
Rule 1: All documents have the following filenaming structure:
|
570
|
-
|
571
|
-
<yyyymmdd>-<author>-<type>-<additionalInformation>.<extension>
|
572
|
-
|
573
|
-
\x5 # <yyyymmdd>: Year, month and day identical to the meta information in the document.
|
574
|
-
\x5 # <author>: Author of the document, identical to the meta information
|
575
|
-
in the document. Special characters and whitespaces are replaced.
|
576
|
-
\x5 # <type>: Document type, is being generated from the title field in the metadata of the document. Document type is a three character abbreviation following the following logic:
|
577
|
-
|
578
|
-
\x5 con => Contract
|
579
|
-
\x5 inv => Invoice
|
580
|
-
\x5 inf => Information
|
581
|
-
\x5 man => Manual
|
582
|
-
\x5 off => Offer
|
583
|
-
\x5 ord => Order
|
584
|
-
\x5 rpt => Receipt
|
585
|
-
\x5 tic => Ticket
|
586
|
-
|
587
|
-
If the dokument type can not be determined automatically, it defaults to 'dok'.
|
588
|
-
|
589
|
-
This default behavior got introduced with version 1.8.1 and can be overwritten by hiera.
|
590
|
-
See `pdfmd explain hiera-keys` for information on how to do this.
|
591
|
-
|
592
|
-
# <additionalInformation>: Information generated from the metadata fields
|
593
|
-
'title', 'subject' and 'keywords'.
|
594
|
-
|
595
|
-
If 'Title' or 'Keywords' contains one of the following keywords, they will be replaced with the corresponding abbreviation followed by the specified value:
|
596
|
-
|
597
|
-
\x5 Contract => con
|
598
|
-
\x5 Invoice => inv
|
599
|
-
\x5 Information => inf
|
600
|
-
\x5 Manual => man
|
601
|
-
\x5 Offer => off
|
602
|
-
\x5 Order => ord
|
603
|
-
\x5 Receipt => rpt
|
604
|
-
\x5 Ticket => tic
|
605
|
-
|
606
|
-
This setting will be overwritten as well by defining the 'keys' hash in Hiera.
|
607
|
-
|
608
|
-
Rule 2: The number of keywords used in the filename is defined by the parameter '-k'. See the section of that parameter for more details and the default value.
|
609
|
-
|
610
|
-
Rule 3: Keywords matching 'kvi','fak','ord','kdn' are prioritised.
|
611
|
-
|
612
|
-
Rule 4: Special characters and whitespaces are replaced:
|
613
|
-
|
614
|
-
\x5 ' ' => '_'
|
615
|
-
\x5 '/' => '_'
|
616
|
-
|
617
|
-
Rule 5: The new filename has only lowercase characters.
|
618
|
-
|
619
|
-
== Example (detailed)
|
620
|
-
|
621
|
-
# Example PDF with following MetaTags:
|
622
|
-
|
623
|
-
\x5 Filename : example.pdf
|
624
|
-
\x5 Author : John
|
625
|
-
\x5 Subject : new Product
|
626
|
-
\x5 Title : Presentation
|
627
|
-
\x5 CreateDate : 1970:01:01 01:00:00
|
628
|
-
\x5 Keywords : John Doe, Jane Doe, Mister Doe
|
629
|
-
|
630
|
-
# Renaming the file
|
631
|
-
\x5> CLI rename example.pdf
|
632
|
-
\x5 example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe.pdf
|
633
|
-
|
634
|
-
# Simulation to rename the file (no actual change)
|
635
|
-
\x5> CLI rename -n example.pdf
|
636
|
-
\x5example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe.pdf
|
637
|
-
|
638
|
-
# Renaming the file with all keywords
|
639
|
-
\x5> CLI rename -n -a example.pdf
|
640
|
-
|
641
|
-
\x5 example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe-mister_doe.pdf
|
642
|
-
|
643
|
-
LONGDESC
|
644
|
-
method_option :dryrun, :type => :boolean, :aliases => '-n', :desc => 'Run without making changes', :default => false, :required => false
|
645
|
-
method_option :allkeywords, :type => :boolean, :aliases => '-a', :desc => 'Add all keywords (no limit)', :required => false
|
646
|
-
method_option :keywords, :type => :numeric, :aliases => '-k', :desc => 'Number of keywords to include (Default: 3)', :required => false
|
647
|
-
method_option :outputdir, :aliases => '-o', :type => :string, :desc => 'Speficy output directory', :default => false, :required => :false
|
648
|
-
method_option :copy, :aliases => '-c', :type => :boolean, :desc => 'Copy instead of moving the file when renaming'
|
649
|
-
method_option :log, :aliases => '-l', :type => :boolean, :desc => 'Enable logging'
|
650
|
-
method_option :logfile, :aliases => '-p', :type => :string, :desc => 'Define path to logfile'
|
651
|
-
def rename(filename)
|
652
|
-
|
653
|
-
ENV['PDFMD_FILENAME'] = filename
|
654
|
-
ENV['PDFMD_DRYRUN'] = options[:dryrun].to_s
|
655
|
-
ENV['PDFMD_ALLKEYWORDS'] = options[:allkeywords].to_s
|
656
|
-
ENV['PDFMD_OUTPUTDIR'] = options[:outputdir].to_s
|
657
|
-
ENV['PDFMD_NUMBERKEYWORDS'] = options[:keywords].to_s
|
658
|
-
ENV['PDFMD_COPY'] = options[:copy].to_s
|
659
|
-
ENV['PDFMD_LOG'] = options[:log].to_s
|
660
|
-
ENV['PDFMD_LOGFILE'] = options[:logfile].to_s
|
661
|
-
ENV['PDFMD'] = __FILE__
|
662
|
-
require_relative('pdfmd/rename.rb')
|
663
|
-
|
664
|
-
end
|
665
238
|
|
666
239
|
#
|
667
|
-
#
|
240
|
+
# Determine the valid setting
|
241
|
+
# 1. Priority: manual setting
|
242
|
+
# 2. Priority: Hiera setting
|
668
243
|
#
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
244
|
+
# If there is no manual setting, the value of 'manualSetting'
|
245
|
+
# should be set to 'nil'
|
246
|
+
#
|
247
|
+
# def determineValidSetting(manualSetting,key)
|
248
|
+
|
249
|
+
# if !@hieradata.nil?
|
250
|
+
# hieraKey = '@hieradata'
|
251
|
+
# hieraValue = ''
|
252
|
+
|
253
|
+
# key.split(':').each do |keyname|
|
254
|
+
|
255
|
+
# hieraKeyCheck = eval(hieraKey)
|
256
|
+
# if !hieraKeyCheck.nil? and hieraKeyCheck.has_key?(keyname)
|
257
|
+
# hieraKey = hieraKey + "['#{keyname}']"
|
258
|
+
# else
|
259
|
+
# # Key has not been found
|
260
|
+
# hieraKey = ''
|
261
|
+
# break
|
262
|
+
# end
|
263
|
+
# end
|
264
|
+
|
265
|
+
# hieraValue = eval(hieraKey)
|
266
|
+
# else
|
267
|
+
# hieraValue = nil
|
268
|
+
# end
|
269
|
+
|
270
|
+
# if !manualSetting.nil?
|
271
|
+
# self.log('debug', "Chosing manual setting '#{key} = #{manualSetting}'.")
|
272
|
+
# manualSetting
|
273
|
+
# elsif !hieraValue.nil? or
|
274
|
+
# !hieraValue == ''
|
275
|
+
|
276
|
+
# self.log('debug', "Chosing hiera setting '#{key} = #{hieraValue}'.")
|
277
|
+
# hieraValue
|
278
|
+
|
279
|
+
# else
|
280
|
+
# self.log('debug', "No setting chosen for '#{key}' in hiera.")
|
281
|
+
# false
|
282
|
+
# end
|
283
|
+
|
284
|
+
# end
|
285
|
+
|
286
|
+
end # End of Class
|