pdfmd 1.9.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -2
- data/README.md +2 -2
- data/TODO.mkd +26 -0
- data/bin/pdfmd +267 -1
- data/lib/pdfmd.rb +242 -634
- data/lib/pdfmd/explain.hiera.md +25 -4
- data/lib/pdfmd/long_desc.pdfmdconfig.txt +40 -0
- data/lib/pdfmd/long_desc.pdfmdedit.txt +166 -0
- data/lib/pdfmd/long_desc.pdfmdexplain.txt +16 -0
- data/lib/pdfmd/long_desc.pdfmdrename.txt +206 -0
- data/lib/pdfmd/long_desc.pdfmdshow.txt +92 -0
- data/lib/pdfmd/long_desc.pdfmdsort.txt +111 -0
- data/lib/pdfmd/long_desc.pdfmdstat.txt +23 -0
- data/lib/pdfmd/pdfmdconfig.rb +30 -0
- data/lib/pdfmd/pdfmdedit.rb +201 -0
- data/lib/pdfmd/pdfmdmethods.rb +125 -0
- data/lib/pdfmd/pdfmdrename.rb +243 -0
- data/lib/pdfmd/pdfmdshow.rb +88 -0
- data/lib/pdfmd/pdfmdsort.rb +115 -0
- data/lib/pdfmd/pdfmdstat.rb +117 -0
- data/lib/{string_extend.rb → pdfmd/string_extend.rb} +0 -0
- data/lib/run.rb +235 -0
- data/pdfmd.gemspec +3 -2
- metadata +23 -11
- data/lib/pdfmd/check.rb +0 -10
- data/lib/pdfmd/config.rb +0 -59
- data/lib/pdfmd/edit.rb +0 -144
- data/lib/pdfmd/rename.rb +0 -295
- data/lib/pdfmd/show.rb +0 -164
- data/lib/pdfmd/sort.rb +0 -199
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d9a3ab542e0996fa6a51d24668010afcbaf2bbb9
|
4
|
+
data.tar.gz: b3d3da094eb90f1c1e8d9f00f77404e2bec38393
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 47cdb550ad5259ab7578b43844cd1c34a423cbd8674fe7710f823360de2fde6ba1b8f6a597b5fff801cc9ced87f202363f822806397ba98ce35747a49014d360
|
7
|
+
data.tar.gz: c6df8ae93e2dddcd91c16e886a4672c6ce9b6e17d619a62ba7bba958f8a9dfa645250a2e419a7c9aeeea1cb3395ab5bfcafe2103718e0b9c96a1984cb431906f
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,23 @@
|
|
1
|
+
# Version 2.0.0
|
2
|
+
- Rewritten the gem using classes.
|
3
|
+
- Shorter Code (optimizing)
|
4
|
+
- Introduced a log-level in hiera
|
5
|
+
- Set default log-file to current working directory.
|
6
|
+
- Command 'rename': Parameter 'keywords' changed to 'nrkeywords'.
|
7
|
+
- Command 'rename': Checking if filename is unchanged and avoiding error message from system now.
|
8
|
+
- Command 'edit': Order of input values when changing all tags has been changed.
|
9
|
+
- Command 'edit': Replaced Tag separator '=' with ':'. See `pdfmd help edit` for details.
|
10
|
+
- Command 'sort': Added parameter 'overwrite'. See `pdfmd help sort` for details.
|
11
|
+
- Command 'edit': Added Hiera parameter 'opendoc' and 'pdfviewer'.
|
12
|
+
- Command 'stat': Added command to show some primitive statistics for a directory.
|
13
|
+
- Defaults for the Thor commands have mostly changed. No defaults there anymore, but in the class itself.
|
14
|
+
- Longer help texts take out into separate files for more structured code.
|
15
|
+
- Changed multiple log messages in all commands.
|
16
|
+
- Added parameter '-r' which shows the revision of the gem.
|
17
|
+
- Bug: Renaming files with a '/' in the metadatafield 'author'.
|
18
|
+
- Bug: Renaming files with spaces in the metadatafield 'subject'.
|
19
|
+
- Collected Todo's in `TODO.mkd`.
|
20
|
+
|
1
21
|
# Version 1.9.1
|
2
22
|
- Removed some inactive Code
|
3
23
|
|
@@ -17,7 +37,7 @@
|
|
17
37
|
# Version 1.8.0
|
18
38
|
- Added Support for password protected pdf files in command 'show' and 'edit'
|
19
39
|
- Cleaned up renaming key-string and added all string for NO,EN an DE language.
|
20
|
-
- Cleaned the output of `pdfmd config`.
|
40
|
+
- Cleaned the output of `pdfmd config`.
|
21
41
|
- Removed some TODOs
|
22
42
|
- Bugfix in the rename command
|
23
43
|
- Updated Tests
|
@@ -100,7 +120,7 @@
|
|
100
120
|
|
101
121
|
# Version 1.3
|
102
122
|
- Small bugfix about special characters in filenames (author).
|
103
|
-
- Bugfix for the tag 'createdate' written as 'CreateDate' which did not
|
123
|
+
- Bugfix for the tag 'createdate' written as 'CreateDate' which did not
|
104
124
|
take the date then.
|
105
125
|
- Removed inactive code.
|
106
126
|
- Added paramter 'version'
|
data/README.md
CHANGED
@@ -131,8 +131,6 @@ pdfmd::config:
|
|
131
131
|
sort:
|
132
132
|
destination : /data/tmp
|
133
133
|
copy : true
|
134
|
-
log : true
|
135
|
-
logfilepath : /var/log/pdfmd.log # Needs create/write rights on this file
|
136
134
|
interactive : false
|
137
135
|
rename:
|
138
136
|
#allkeywords : true # Does not make sense in combination with _keywords_
|
@@ -146,6 +144,8 @@ pdfmd::config:
|
|
146
144
|
|
147
145
|
Information about which hiera configuration settings are available can be either found in `pdfmd help <command>` or `pdfmd explain hiera`.
|
148
146
|
|
147
|
+
**PDFMD** expects currently the hiera configuration file to be at `/etc/hiera.yaml`. With Hiera2 the default location has changed to `/etc/puppetlabs/code/hiera.yaml`. This might be addressed in a future version. Currently you have to create at least a symlink to `/etc/hiera.yaml`.
|
148
|
+
|
149
149
|
Test your hiera configuration with
|
150
150
|
|
151
151
|
``` bash
|
data/TODO.mkd
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
#TODO
|
2
|
+
|
3
|
+
## pdfmd.rb
|
4
|
+
* The logfile is a bit annoying. It should be possible to configure the logfile to be placed whereever convenient without creating double log files.
|
5
|
+
* Replace system copy command with fileutils.copy.
|
6
|
+
* Run commands on multiple PDF documents as one.
|
7
|
+
* Method for removing all metadata from a PDF document.
|
8
|
+
|
9
|
+
### Method: _stat_
|
10
|
+
* Parameter to ignore differences in upper and lowercase
|
11
|
+
* Parameter to disable percentage output
|
12
|
+
* Parameter to set output format: json,yaml, hash
|
13
|
+
|
14
|
+
## pdfmdrename.rb
|
15
|
+
|
16
|
+
* Refuse renaming if values are missing for either: author, title, subject or createdate. Keywords are optional.
|
17
|
+
|
18
|
+
## pdfmdedit.rb
|
19
|
+
|
20
|
+
* keywords are added differently according to the documentation, http://www.sno.phy.queensu.ca/~phil/exiftool/faq.html
|
21
|
+
|
22
|
+
## pdfmdstat.rb
|
23
|
+
|
24
|
+
### Method count_values
|
25
|
+
|
26
|
+
* Keywords could be splittet up into two words (as additional parameter)
|
data/bin/pdfmd
CHANGED
@@ -1,2 +1,268 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
require 'pdfmd
|
2
|
+
require 'pdfmd'
|
3
|
+
require 'pdfmd/pdfmdstat'
|
4
|
+
require "thor"
|
5
|
+
require "highline/import"
|
6
|
+
require "fileutils"
|
7
|
+
require "i18n"
|
8
|
+
require 'pathname'
|
9
|
+
|
10
|
+
VERSION = '2.0.0'
|
11
|
+
NAME = 'pdfmd'
|
12
|
+
|
13
|
+
#
|
14
|
+
# Read the content of the long description from an external file
|
15
|
+
#
|
16
|
+
def readLongDesc(filename)
|
17
|
+
|
18
|
+
# Multiple possible paths to cover the development as well
|
19
|
+
paths = [
|
20
|
+
"#{File.dirname(File.expand_path($0))}../lib",
|
21
|
+
"#{Gem.dir}/gems/#{NAME}-#{VERSION}/lib",
|
22
|
+
]
|
23
|
+
|
24
|
+
longDescContent = ''
|
25
|
+
paths.each do |value|
|
26
|
+
if File.exists?(value + '/' + filename)
|
27
|
+
|
28
|
+
File.open(value + '/' + filename, 'r') do |infile|
|
29
|
+
while (line = infile.gets)
|
30
|
+
longDescContent = longDescContent + line
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
longDescContent
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Thor class
|
43
|
+
class DOC < Thor
|
44
|
+
|
45
|
+
# Class options for all commands (logging only)
|
46
|
+
# none
|
47
|
+
|
48
|
+
#
|
49
|
+
# Show the current metadata tags
|
50
|
+
#
|
51
|
+
desc 'show', 'Show metadata of a file'
|
52
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdshow.txt'
|
53
|
+
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Show specific tag(s), comma separated', :required => false
|
54
|
+
method_option :format, :type => :string, :aliases => '-f', :desc => 'Define output format', :required => false
|
55
|
+
method_option :includepdf, :type => :boolean, :aliases => '-i', :desc => 'Include the filename in output', :required => false
|
56
|
+
def show(filename)
|
57
|
+
|
58
|
+
pdfdoc = Pdfmdshow.new filename
|
59
|
+
format = pdfdoc.determineValidSetting(options[:format], 'show:format')
|
60
|
+
show_filename = pdfdoc.determineValidSetting(options[:includepdf], 'show:includepdf')
|
61
|
+
show_tags = pdfdoc.determineValidSetting(options[:tag], 'show:tags')
|
62
|
+
pdfdoc.set_outputformat format
|
63
|
+
pdfdoc.show_filename show_filename
|
64
|
+
pdfdoc.set_tags show_tags
|
65
|
+
puts pdfdoc.show_metatags
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
# Show current settings
|
70
|
+
#
|
71
|
+
desc 'config', 'Show config defaults'
|
72
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdconfig.txt'
|
73
|
+
method_option :show, :type => :boolean, :aliases => '-s', :required => false
|
74
|
+
def config(subcommand = '')
|
75
|
+
|
76
|
+
pdfdoc = Pdfmdconfig.new ''
|
77
|
+
puts pdfdoc.show_config subcommand
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
#
|
82
|
+
# Change a MetaTag Attribute
|
83
|
+
#
|
84
|
+
# FIXME: keywords are added differently according to the documentation
|
85
|
+
# http://www.sno.phy.queensu.ca/~phil/exiftool/faq.html
|
86
|
+
desc 'edit', 'Edit Meta Tag(s)'
|
87
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdedit.txt'
|
88
|
+
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Name of the Tag(s) to Edit', :required => true, :lazy_default => 'all'
|
89
|
+
method_option :rename, :type => :boolean, :aliases => '-r', :desc => 'Rename file after changing meta-tags', :required => false
|
90
|
+
method_option :opendoc, :type => :boolean, :aliases => '-o', :desc => 'Open the PDF document in a separate window.', :required => false, :lazy_default => true
|
91
|
+
def edit(filename)
|
92
|
+
|
93
|
+
pdfdoc = Pdfmdedit.new filename
|
94
|
+
tags = pdfdoc.determineValidSetting(options[:tag],'edit:tags')
|
95
|
+
pdfdoc.opendoc = pdfdoc.determineValidSetting(options[:opendoc], 'edit:opendoc')
|
96
|
+
pdfdoc.pdfviewer = pdfdoc.determineValidSetting(nil, 'edit:pdfviewer')
|
97
|
+
pdfdoc.set_tags tags
|
98
|
+
pdfdoc.update_tags
|
99
|
+
pdfdoc.write_tags filename
|
100
|
+
|
101
|
+
# If the file shall be renamed at the same time, trigger the other task
|
102
|
+
if pdfdoc.determineValidSetting(options[:rename], 'edit:rename')
|
103
|
+
|
104
|
+
#rename filename
|
105
|
+
pdfdoc.log('info', 'Running rename command.')
|
106
|
+
rename filename
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# Show statistics
|
114
|
+
#
|
115
|
+
desc 'stat', 'Show metadata statistics of multiple files'
|
116
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdstat.txt'
|
117
|
+
option :recursive, :type => :boolean, :aliases => '-r', :desc => 'Include subdirectories recursively.', :lazy_default => true, :required => false
|
118
|
+
option :tags, :aliases => '-t', :type => :string, :desc => 'Define Metatags to run at', :lazy_default => 'author,title,subject,createdate,keywords', :required => false
|
119
|
+
def stat(input)
|
120
|
+
|
121
|
+
filemetadata = Hash.new
|
122
|
+
currentOutput = Hash.new
|
123
|
+
|
124
|
+
if File.file?(input)
|
125
|
+
puts 'Input is a single file.'
|
126
|
+
puts 'n.a.y.'
|
127
|
+
else
|
128
|
+
|
129
|
+
# Iterate through all Files an collect the metadata
|
130
|
+
recursive = options[:recursive] ? '/**' : ''
|
131
|
+
|
132
|
+
# Count the number of files quickly to show an overview
|
133
|
+
# nooFiles = numberOfFiles
|
134
|
+
nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
|
135
|
+
currentNooFiles = 0
|
136
|
+
Dir.glob("#{input.chomp}#{recursive}/*.pdf").each do |filename|
|
137
|
+
|
138
|
+
# Print percentage
|
139
|
+
currentNooFiles = currentNooFiles + 1
|
140
|
+
percentage = 100 / nooFiles * currentNooFiles
|
141
|
+
print "\r Status: #{percentage} % of #{nooFiles} files processed. "
|
142
|
+
|
143
|
+
pdfdoc = Pdfmd.new filename
|
144
|
+
filemetadata = {}
|
145
|
+
currentOutput[File.basename(filename)] = pdfdoc.metadata.to_s
|
146
|
+
pdfdoc = nil
|
147
|
+
|
148
|
+
end
|
149
|
+
puts ''
|
150
|
+
puts ''
|
151
|
+
|
152
|
+
pdfstat = Pdfmdstat.new(currentOutput)
|
153
|
+
pdfstat.tags options[:tags]
|
154
|
+
pdfstat.analyse_metadata
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
end
|
159
|
+
|
160
|
+
#
|
161
|
+
# Sort the files into directories based on the author
|
162
|
+
#
|
163
|
+
desc 'sort','Sort files into directories sorted by Author'
|
164
|
+
long_desc readLongDesc 'pdfmd/long_desc.pdfmdsort.txt'
|
165
|
+
method_option :destination, :aliases => '-d', :required => false, :type => :string, :desc => 'Defines the output directory'
|
166
|
+
method_option :copy, :aliases => '-c', :required => false, :type => :boolean, :desc => 'Copy files instead of moving them'
|
167
|
+
method_option :interactive, :aliases => '-i', :required => false, :type => :boolean, :desc => 'Enable/Disable interactive sorting'
|
168
|
+
method_option :overwrite, :alises => '-o', :required => false, :type => :boolean, :desc => 'Enable/Disable file overwrite.', :lazy_default => true
|
169
|
+
method_option :dryrun, :aliases => '-n', :required => false, :type => :boolean, :desc => 'Run without changing something'
|
170
|
+
def sort(input)
|
171
|
+
|
172
|
+
if File.file?(input)
|
173
|
+
pdfdoc = Pdfmdsort.new input
|
174
|
+
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
|
175
|
+
pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
|
176
|
+
pdfdoc.destination = pdfdoc.determineValidSetting(options[:destination], 'sort:destination')
|
177
|
+
pdfdoc.overwrite = pdfdoc.determineValidSetting(options[:overwrite], 'sort:overwrite')
|
178
|
+
pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun], 'sort:dryrun')
|
179
|
+
pdfdoc.sort
|
180
|
+
else
|
181
|
+
|
182
|
+
# Run the actions for all files
|
183
|
+
Dir.glob(input.chomp + '/*.pdf').each do |filename|
|
184
|
+
pdfdoc = Pdfmdsort.new filename
|
185
|
+
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
|
186
|
+
pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
|
187
|
+
pdfdoc.destination = pdfdoc.determineValidSetting(options[:destination], 'sort:destination')
|
188
|
+
pdfdoc.overwrite = pdfdoc.determineValidSetting(options[:overwrite], 'sort:overwrite')
|
189
|
+
pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun], 'sort:dryrun')
|
190
|
+
pdfdoc.sort
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
|
198
|
+
# Rename the file according to the Metadata
|
199
|
+
#
|
200
|
+
# Scheme: YYYYMMDD-author-subject-keywords.extension
|
201
|
+
desc 'rename', 'Rename the file according to Metadata'
|
202
|
+
long_desc readLongDesc('pdfmd/long_desc.pdfmdrename.txt')
|
203
|
+
method_option :dryrun, :type => :boolean, :aliases => '-n', :desc => 'Run without making changes', :required => false
|
204
|
+
method_option :allkeywords, :type => :boolean, :aliases => '-a', :desc => 'Add all keywords (no limit)', :required => false, :lazy_default => true
|
205
|
+
method_option :nrkeywords, :type => :string, :aliases => '-k', :desc => 'Number of keywords to include (Default: 3)', :required => false
|
206
|
+
method_option :outputdir, :aliases => '-o', :type => :string, :desc => 'Specify output directory', :required => false
|
207
|
+
method_option :copy, :aliases => '-c', :type => :boolean, :desc => 'Copy instead of moving the file when renaming', :lazy_default => true
|
208
|
+
def rename(filename)
|
209
|
+
|
210
|
+
pdfdoc = Pdfmdrename.new filename
|
211
|
+
pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun],'rename:dryrun')
|
212
|
+
pdfdoc.allkeywords = pdfdoc.determineValidSetting(options[:allkeywords],'rename:allkeywords')
|
213
|
+
pdfdoc.outputdir = pdfdoc.determineValidSetting(options[:outputdir], 'rename:outputdir')
|
214
|
+
if nrkeywords = pdfdoc.determineValidSetting(options[:nrkeywords], 'rename:nrkeywords' )
|
215
|
+
pdfdoc.nrkeywords = nrkeywords
|
216
|
+
end
|
217
|
+
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'rename:copy')
|
218
|
+
pdfdoc.rename
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
#
|
223
|
+
# One parameter to show the current version
|
224
|
+
#
|
225
|
+
map %w[--version -v] => :__print_version
|
226
|
+
desc "--version, -v", 'Show the current script version'
|
227
|
+
def __print_version
|
228
|
+
puts VERSION
|
229
|
+
end
|
230
|
+
|
231
|
+
map %w[--revision -r] => :__print_revision
|
232
|
+
desc "--revision, -r", 'Show the revision of the gem'
|
233
|
+
def __print_revision
|
234
|
+
metadata = YAML.load(`gem specification pdfmd metadata`)
|
235
|
+
puts metadata['revision']
|
236
|
+
end
|
237
|
+
|
238
|
+
end
|
239
|
+
|
240
|
+
DOC.start
|
241
|
+
|
242
|
+
# #
|
243
|
+
# # Explain fields and Metatags
|
244
|
+
# # Show information about how they are used.
|
245
|
+
# #
|
246
|
+
# desc 'explain','Show more information about usuable Meta-Tags'
|
247
|
+
# long_desc <<-LONGDESC
|
248
|
+
# == General
|
249
|
+
#
|
250
|
+
# Explain some terms used with the script.
|
251
|
+
#
|
252
|
+
# == Example
|
253
|
+
#
|
254
|
+
# # Show the available subjects
|
255
|
+
# \x5>CLI explain
|
256
|
+
#
|
257
|
+
# # Show information about the subject 'author'
|
258
|
+
# \x5>CLI explain author
|
259
|
+
#
|
260
|
+
# LONGDESC
|
261
|
+
# def explain(term='')
|
262
|
+
#
|
263
|
+
# ENV['PDFMD_EXPLAIN'] = term
|
264
|
+
# ENV['PDFMD'] = File.basename(__FILE__)
|
265
|
+
# require_relative('./pdfmd/explain.rb')
|
266
|
+
#
|
267
|
+
# end
|
268
|
+
#
|
data/lib/pdfmd.rb
CHANGED
@@ -1,678 +1,286 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# == File: pdfmd.rb
|
3
2
|
#
|
4
|
-
#
|
3
|
+
# Class for PDF document and meta tag management
|
5
4
|
#
|
6
|
-
|
7
|
-
|
8
|
-
# ==== Ruby gems:
|
9
|
-
# - thor
|
10
|
-
# - highline/import
|
11
|
-
# - fileutils
|
12
|
-
# - i18n
|
13
|
-
# - pathname
|
14
|
-
# - logger
|
15
|
-
#
|
16
|
-
# ==== OS applications:
|
17
|
-
#
|
18
|
-
# - exiftools
|
19
|
-
#
|
20
|
-
# === Usage
|
21
|
-
#
|
22
|
-
# $ ./pdfmd <action> <parameter> file
|
23
|
-
#
|
24
|
-
# $ ./pdfmd help <action>
|
25
|
-
#
|
26
|
-
# An overview about the actions can be seen when running the script without
|
27
|
-
# any parameters
|
28
|
-
#
|
29
|
-
# Check and set metadata of PDF documents
|
30
|
-
#
|
31
|
-
# A complete set of metada contains
|
32
|
-
#
|
33
|
-
# * CreateDate
|
34
|
-
# * Title
|
35
|
-
# * Author
|
36
|
-
# * Subject
|
37
|
-
# * Keywords (optional)
|
38
|
-
#
|
39
|
-
# TODO: Fix broken PDF files automatically
|
40
|
-
# TODO: Implement check that exifdata has been updated
|
41
|
-
# TODO: Read this: http://lostechies.com/derickbailey/2011/04/29/writing-a-thor-application/
|
42
|
-
# TODO: ... and this: http://blog.paracode.com/2012/05/17/building-your-tools-with-thor/
|
43
|
-
# gs \
|
44
|
-
# -o repaired.pdf \
|
45
|
-
# -sDEVICE=pdfwrite \
|
46
|
-
# -dPDFSETTINGS=/prepress \
|
47
|
-
# corrupted.pdf
|
48
|
-
#
|
49
|
-
# == Author
|
50
|
-
#
|
51
|
-
# Daniel Roos <daniel-git@micronarrativ.org>
|
52
|
-
# Source: https://github.com/Micronarrativ/ruby-pmd
|
53
|
-
#
|
54
|
-
require "thor"
|
55
|
-
require "highline/import"
|
56
|
-
require "fileutils"
|
57
|
-
require "i18n"
|
58
|
-
require 'pathname'
|
59
|
-
require 'logger'
|
60
|
-
|
61
|
-
VERSION = '1.9.1'
|
62
|
-
|
63
|
-
# Include general usage methods
|
64
|
-
require_relative('pdfmd/methods.rb')
|
65
|
-
|
66
|
-
class DOC < Thor
|
67
|
-
|
68
|
-
#
|
69
|
-
# Show the current metadata tags
|
70
|
-
#
|
71
|
-
# TODO: Enable additional options
|
72
|
-
#
|
73
|
-
desc 'show', 'Show metadata of a file'
|
74
|
-
method_option :all, :type => :boolean, :aliases => '-a', :desc => 'Show all metatags', :default => false, :required => false
|
75
|
-
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Show specific tag(s), comma separated', :required => false
|
76
|
-
method_option :format, :type => :string, :aliases => '-f', :desc => 'Define output format', :required => false
|
77
|
-
method_option :includepdf, :type => :boolean, :aliases => '-i', :desc => 'Include the filename in output', :required => false
|
78
|
-
long_desc <<-LONGDESC
|
79
|
-
== General
|
80
|
-
|
81
|
-
Show metatags of a PDF document.
|
82
|
-
|
83
|
-
The following tags are being shown:
|
84
|
-
\x5 * Author
|
85
|
-
\x5 * Creator
|
86
|
-
\x5 * CreateDate
|
87
|
-
\x5 * Title
|
88
|
-
\x5 * Subject
|
89
|
-
\x5 * Keywords
|
90
|
-
|
91
|
-
== Parameters
|
92
|
-
|
93
|
-
--all, -a
|
94
|
-
\x5 Show all relevant metatags for a document.
|
5
|
+
require_relative './pdfmd/pdfmdmethods.rb'
|
6
|
+
class Pdfmd
|
95
7
|
|
96
|
-
|
8
|
+
require "i18n"
|
9
|
+
require 'pathname'
|
10
|
+
require 'fileutils'
|
11
|
+
require "highline/import"
|
97
12
|
|
98
|
-
|
13
|
+
# Include general method for Pdfmd
|
14
|
+
include Pdfmdmethods
|
99
15
|
|
100
|
-
|
101
|
-
\x5 Specify the metatag to show. The selected metatag must be one of the relevant tags. Other tags are ignored and nothing is returned.
|
16
|
+
attr_accessor :filename, :logstatus, :logfile
|
102
17
|
|
103
|
-
|
18
|
+
require_relative 'pdfmd/pdfmdshow.rb'
|
19
|
+
require_relative 'pdfmd/pdfmdconfig.rb'
|
20
|
+
require_relative 'pdfmd/pdfmdedit.rb'
|
21
|
+
require_relative 'pdfmd/pdfmdrename.rb'
|
22
|
+
require_relative 'pdfmd/pdfmdsort.rb'
|
23
|
+
require_relative 'pdfmd/string_extend.rb'
|
24
|
+
require 'logger'
|
104
25
|
|
105
|
-
|
26
|
+
@@default_tags = ['createdate', 'author', 'title', 'subject', 'keywords']
|
106
27
|
|
107
|
-
|
28
|
+
# Default document password
|
29
|
+
@@documentPassword = ''
|
108
30
|
|
109
|
-
|
31
|
+
# Document metadata, read from the document
|
32
|
+
@@metadata = Hash.new
|
110
33
|
|
111
|
-
|
34
|
+
# Hiera configuration data
|
35
|
+
@@hieradata = Hash.new
|
112
36
|
|
113
|
-
|
37
|
+
def initialize(filename)
|
114
38
|
|
115
|
-
|
39
|
+
# Default Logfile location and logging enabled
|
40
|
+
if !@logfile or @logfile.empty?
|
41
|
+
@logfile = Dir.pwd.chomp('/') + '/.pdfmd.log'
|
42
|
+
end
|
43
|
+
@log = true
|
116
44
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
Hiera parameter: includepdf (boolean)
|
127
|
-
|
128
|
-
== Example
|
129
|
-
|
130
|
-
# Show default metatags for a pdf document
|
131
|
-
\x5>CLI show <filename>
|
132
|
-
|
133
|
-
# Show default metatags for example.pdf
|
134
|
-
\x5>CLI show example.pdf
|
135
|
-
|
136
|
-
# Show value for metatag 'Author' for the file example.pdf
|
137
|
-
\x5>CLI show -t author example.pdf
|
138
|
-
|
139
|
-
# Show value for metatags 'Author','Title' for the file example.pdf
|
140
|
-
\x5>CLI show -t author,title example.pdf
|
141
|
-
|
142
|
-
== Hiera
|
143
|
-
|
144
|
-
Here is an example configuration for hiera:
|
145
|
-
|
146
|
-
pdfmd::config
|
147
|
-
show:
|
148
|
-
format : yaml
|
149
|
-
tag : author,subject
|
150
|
-
includepdf: true
|
151
|
-
|
152
|
-
LONGDESC
|
153
|
-
def show(filename)
|
154
|
-
|
155
|
-
ENV['PDFMD_FILENAME'] = filename
|
156
|
-
ENV['PDFMD_TAGS'] = options[:tag]
|
157
|
-
ENV['PDFMD_ALL'] = options[:all].to_s
|
158
|
-
ENV['PDFMD_FORMAT'] = options[:format]
|
159
|
-
ENV['PDFMD_INCLUDEPDF'] = options[:includepdf].to_s
|
160
|
-
require_relative('./pdfmd/show.rb')
|
45
|
+
# Defining the loglevel
|
46
|
+
@loglevel = 'info'
|
47
|
+
self.log('debug','---')
|
48
|
+
self.log('info',"Starting with file '#{filename}'.")
|
49
|
+
@filename = filename
|
50
|
+
@hieradata = queryHiera('pdfmd::config')
|
51
|
+
if ! filename.empty?
|
52
|
+
read_metatags(@filename)
|
53
|
+
end
|
161
54
|
|
162
55
|
end
|
163
56
|
|
164
57
|
#
|
165
|
-
#
|
166
|
-
|
167
|
-
|
168
|
-
long_desc <<-LONGDESC
|
169
|
-
|
170
|
-
Shows the current default configuration as available in Hiera.
|
171
|
-
|
172
|
-
== Usage
|
173
|
-
|
174
|
-
Example: `pdfmd config [<command>]`
|
175
|
-
|
176
|
-
|
177
|
-
== Parameter
|
178
|
-
|
179
|
-
[<commandname>]
|
180
|
-
|
181
|
-
Shows only the default configuration from hiera for the specified command.
|
182
|
-
The command parameter is not case-sensitive.
|
183
|
-
|
184
|
-
Example: `pdfmd config edit`
|
185
|
-
|
186
|
-
LONGDESC
|
187
|
-
method_option :show, :type => :boolean, :aliases => '-s', :required => false
|
188
|
-
def config(subcommand = '')
|
189
|
-
|
190
|
-
ENV['PDFMD_SHOW'] = options[:show].to_s
|
191
|
-
ENV['PDFMD_COMMAND'] = subcommand
|
192
|
-
require_relative('./pdfmd/config.rb')
|
193
|
-
|
58
|
+
# Make Metadata available to the outside
|
59
|
+
def metadata
|
60
|
+
@@metadata
|
194
61
|
end
|
195
62
|
|
196
63
|
#
|
197
|
-
#
|
198
|
-
#
|
199
|
-
|
200
|
-
#
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
If a value is provided, the current Value will be replaced by the new value.
|
220
|
-
|
221
|
-
--rename, -r
|
222
|
-
\x5 Rename file after updating the meta tag information according to the fields.
|
223
|
-
|
224
|
-
This parameter is identical to running `> CLI rename <filename>`
|
225
|
-
|
226
|
-
Hiera parameter: rename
|
227
|
-
|
228
|
-
General example:
|
229
|
-
|
230
|
-
# Edit tag 'TAG' and set a new value interactive.
|
231
|
-
\x5>CLI edit -t TAG <filename>
|
232
|
-
|
233
|
-
# Edit tag 'Author' and set new value interactive.
|
234
|
-
\x5>CLI edit -t author example.pdf
|
235
|
-
|
236
|
-
# Edit multiple Tags and set a new value interactive.
|
237
|
-
\x5>CLI edit -t tag1,tag2,tag3 <filename>
|
238
|
-
|
239
|
-
# Edit multiple Tags and set a new value in batch mode.
|
240
|
-
\x5 CLI edit -t tag1='value1',tag2='value2' <filename>
|
241
|
-
|
242
|
-
== Multiple Tags
|
243
|
-
|
244
|
-
For setting multiple tags list the tags comma separated.
|
245
|
-
|
246
|
-
For setting all tags (Author, Title, Subject, CreateDate, Keywords) use the keyword 'all' as tagname.
|
247
|
-
|
248
|
-
# Set tags 'Author', 'Title', 'Subject' in example.pdf interactivly.
|
249
|
-
\x5>CLI edit -t author,title,subject example.pdf`
|
250
|
-
|
251
|
-
# Set tags 'Author', 'Title', 'Subject', 'CreateDate', 'Keywords' in
|
252
|
-
example.pdf interactive:
|
253
|
-
\x5>CLI edit -t all example.pdf
|
254
|
-
|
255
|
-
# Set tags 'Author', 'CreateDate' in example.pdf in batch mode (non-interactive:
|
256
|
-
|
257
|
-
CLI edit -t author='Me',createdate='1970:00:00 01:01:01' example.pdf
|
258
|
-
CLI edit -t author='Me',Createdate=19700000 example.pdf
|
259
|
-
|
260
|
-
== Tag: CreateDate
|
261
|
-
|
262
|
-
In order to enter a value for the 'CreateDate' field, some internal matching is going on in order to make it easier and faster to enter dates and times.
|
263
|
-
|
264
|
-
The following formats are identified/matched:
|
265
|
-
|
266
|
-
\x5 yyyymmdd
|
267
|
-
\x5 yyyymmd
|
268
|
-
\x5 yyyymmddHHMMSS
|
269
|
-
\x5 yyyy-mm-dd HH:MM:SS
|
270
|
-
\x5 yyyy:mm:dd HH:MM:SS
|
271
|
-
\x5 yyyy.mm.dd HH:MM:SS
|
272
|
-
\x5 yyyy-mm-d
|
273
|
-
\x5 yyyy-mm-dd
|
274
|
-
\x5 yyyy.mm.d
|
275
|
-
\x5 yyyy.mm.dd
|
276
|
-
\x5 yyyy:mm:d
|
277
|
-
\x5 yyyy:mm:dd
|
278
|
-
|
279
|
-
\x5 - If HH:MM:SS or HHMMSS is not provided, those values are automatically set to zero.
|
280
|
-
\x5 - The output format of every timestamp is <yyyy:mm:dd HH:MM:SS>
|
281
|
-
\x5 - When providing and invalid date, the incorrect date is rejected and the user asked to provide the correct date.
|
282
|
-
|
283
|
-
== Rename file
|
284
|
-
|
285
|
-
In addition to setting the tags the current file can be renamed according to
|
286
|
-
the new metadata.
|
287
|
-
|
288
|
-
# Set tag 'Author' and rename file example.pdf
|
289
|
-
\x5> CLI edit -t author -r example.pdf
|
290
|
-
|
291
|
-
See `> CLI help rename` for details about renaming.
|
292
|
-
|
293
|
-
To enable this feature in hiera add the key 'rename' into the section 'edit' with the value 'true'.
|
294
|
-
|
295
|
-
LONGDESC
|
296
|
-
method_option :tag, :type => :string, :aliases => '-t', :desc => 'Name of the Tag(s) to Edit', :default => false, :required => true
|
297
|
-
method_option :rename, :type => :boolean, :aliases => '-r', :desc => 'Rename file after changing meta-tags', :required => false
|
298
|
-
method_option :log, :aliases => '-l', :type => :boolean, :desc => 'Enable logging'
|
299
|
-
method_option :logfile, :aliases => '-p', :type => :string, :desc => 'Define path to logfile'
|
300
|
-
def edit(filename)
|
301
|
-
|
302
|
-
ENV['PDFMD_FILENAME'] = filename
|
303
|
-
ENV['PDFMD_TAG'] = options[:tag]
|
304
|
-
ENV['PDFMD_RENAME'] = options[:rename].to_s
|
305
|
-
ENV['PDFMD'] = __FILE__
|
306
|
-
ENV['PDFMD_LOG'] = options[:log].to_s
|
307
|
-
ENV['PDFMD_LOGFILE'] = options[:logfile]
|
308
|
-
|
309
|
-
require_relative('./pdfmd/edit.rb')
|
310
|
-
|
311
|
-
end
|
64
|
+
# Logging stuff
|
65
|
+
# def log(status = 'info', message)
|
66
|
+
|
67
|
+
# # Setting the loglevel
|
68
|
+
# case @loglevel
|
69
|
+
# when /info/i
|
70
|
+
# level = 'Logger::INFO'
|
71
|
+
# when /warn/i
|
72
|
+
# level = 'Logger::WARN'
|
73
|
+
# when /error/i
|
74
|
+
# level = 'Logger::ERROR'
|
75
|
+
# when /debug/i
|
76
|
+
# level = 'Logger::DEBUG'
|
77
|
+
# else
|
78
|
+
# level = 'Logger::INFO'
|
79
|
+
# end
|
80
|
+
# logger = Logger.new(@logfile)
|
81
|
+
# logger.level = eval level
|
82
|
+
# logger.send(status, message)
|
83
|
+
# logger.close
|
84
|
+
|
85
|
+
# end
|
312
86
|
|
313
87
|
#
|
314
|
-
# Check
|
315
|
-
#
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
== Example
|
332
|
-
|
333
|
-
# Show the values of the metatags for example.pdf
|
334
|
-
\x5>CLI show example.pdf
|
335
|
-
|
336
|
-
LONGDESC
|
337
|
-
def check(filename)
|
338
|
-
|
339
|
-
ENV['PDFMD_FILENAME'] = filename
|
340
|
-
require_relative('./pdfmd/check.rb')
|
88
|
+
# Check all or certain metatags
|
89
|
+
# If there is no content for a tag, return false
|
90
|
+
def check_metatags(metatags = [])
|
91
|
+
|
92
|
+
if metatags.is_a?(String)
|
93
|
+
metatags = metatags.split
|
94
|
+
elsif !metatags.is_a?(Array)
|
95
|
+
self.log('error', 'Array or string parameter expected for parameter of check_metatags.')
|
96
|
+
exit 1
|
97
|
+
end
|
98
|
+
|
99
|
+
metatags.each do |value|
|
100
|
+
if @@metadata[value].to_s.empty?
|
101
|
+
false
|
102
|
+
end
|
103
|
+
end
|
341
104
|
|
342
105
|
end
|
343
106
|
|
344
|
-
#
|
345
|
-
#
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
107
|
+
# Read metatags from @metadata froma file into
|
108
|
+
# @@metadata
|
109
|
+
def read_metatags(filename)
|
110
|
+
|
111
|
+
# Setup the metatags
|
112
|
+
commandparameter = '-Warning'
|
113
|
+
@@default_tags.each do |key|
|
114
|
+
@@metadata[key] = ''
|
115
|
+
commandparameter = commandparameter + " -#{key}"
|
116
|
+
end
|
117
|
+
|
118
|
+
if not File.file?(filename)
|
119
|
+
self.log('error', "Cannog access file '#{filename}'.")
|
120
|
+
puts "Cannot access file '#{filename}'. Abort"
|
121
|
+
abort
|
122
|
+
end
|
123
|
+
|
124
|
+
metastrings = `exiftool #{commandparameter} '#{filename}'`.split("\n")
|
125
|
+
|
126
|
+
# Assume an error (to enter the loop)
|
127
|
+
metaPasswordError = true
|
128
|
+
|
129
|
+
# Repeat password request to user until a valid password has been provided.
|
130
|
+
# This loop can surely be made prettier.
|
131
|
+
while metaPasswordError
|
132
|
+
|
133
|
+
metaPasswordError = false
|
134
|
+
metastrings.each do |metatag|
|
135
|
+
if metatag.match(/warning.*password protected/i)
|
136
|
+
self.log('info',"File '#{filename}' is password protected.")
|
137
|
+
metaPasswordError = true
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Leave this loop if there is no error in accessing the document
|
142
|
+
if !metaPasswordError
|
143
|
+
break
|
144
|
+
end
|
145
|
+
|
146
|
+
triedHieraPassword ||= false
|
147
|
+
triedManualPassword ||= 0
|
148
|
+
# Try a hiera password first, request otherwise from the user
|
149
|
+
if documentPassword = self.determineValidSetting(nil, 'default:password') and
|
150
|
+
!triedHieraPassword
|
151
|
+
|
152
|
+
self.log('debug','Using default password from hiera.')
|
153
|
+
@@documentPassword = documentPassword
|
154
|
+
triedHieraPassword = true
|
155
|
+
|
156
|
+
else
|
157
|
+
|
158
|
+
# Message output if default password was not working
|
159
|
+
if triedHieraPassword and triedManualPassword == 0
|
160
|
+
self.log('warn','Default password from hiera is invalid.')
|
161
|
+
end
|
162
|
+
|
163
|
+
# Exit loop if there were more than three manual password inputs
|
164
|
+
if triedManualPassword == 3
|
165
|
+
self.log('error',"More than three password attempts on file '#{filename}'. Abort.")
|
166
|
+
exit 1
|
167
|
+
end
|
168
|
+
|
169
|
+
# Request password from user
|
170
|
+
self.log('info', 'Requesting password from user.')
|
171
|
+
@@documentPassword = readUserInput('Document password : ').chomp
|
172
|
+
triedManualPassword = 1 + triedManualPassword
|
173
|
+
puts ''
|
174
|
+
end
|
175
|
+
|
176
|
+
metastrings = `exiftool -password '#{@@documentPassword}' #{commandparameter} '#{filename}'`.split("\n")
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
# NB: Maybe the output format should be changed here to catch keywords
|
182
|
+
# matching the split string (' : '). Exiftool has a format output option as well.
|
183
|
+
self.log('debug', "Reading metadata from file '#{filename}'.")
|
184
|
+
metastrings.each do |key|
|
185
|
+
value = key.split(' : ')
|
186
|
+
metatag = value[0].downcase.gsub(/ /,'')
|
187
|
+
if @@metadata.has_key?( metatag )
|
188
|
+
@@metadata[ metatag ] = value[1]
|
189
|
+
end
|
190
|
+
end
|
368
191
|
|
369
192
|
end
|
370
193
|
|
371
194
|
#
|
372
|
-
#
|
373
|
-
|
374
|
-
desc 'sort','Sort files into directories sorted by Author'
|
375
|
-
long_desc <<-LONGDESC
|
376
|
-
== General
|
377
|
-
|
378
|
-
Will sort pdf documents into subdirectories according to the value of their
|
379
|
-
tag 'author'.
|
380
|
-
|
381
|
-
When using this action a logfile with all actions will be generated in the
|
382
|
-
current working directory with the same name as the script and the ending
|
383
|
-
'.log'. This can be disabled with the parameter 'log' if required or adjusted to write the logfile to a different location.
|
384
|
-
|
385
|
-
If a document does not have an entry in the meta tag 'author', the file will
|
386
|
-
not be processed. This can be seen in the output of the logfile as well.
|
387
|
-
|
388
|
-
=== Parameters
|
389
|
-
|
390
|
-
[*destination|d*]
|
391
|
-
\x5 Speficy the root output directory to where the folderstructure is being created.
|
392
|
-
|
393
|
-
This parameter is required if hiera is not configured.
|
394
|
-
|
395
|
-
This parameter overwrites the hiera defaults
|
396
|
-
|
397
|
-
[*copy|c*]
|
398
|
-
\x5 Copy the files instead of moving them.
|
399
|
-
|
400
|
-
[*log|l*]
|
401
|
-
\x5 Disable/Enable the logging.
|
402
|
-
|
403
|
-
Default: enabled.
|
404
|
-
|
405
|
-
[*logfile|p*]
|
406
|
-
\x5 Set an alternate path for the logfile. If not path is chosen, the logfile
|
407
|
-
is being created in the current working directory as `pdfmd.log`.
|
408
|
-
|
409
|
-
[*interactive|i*]
|
410
|
-
\x5 Disable/Enable interactive sorting. This will ask for confirmation for each sorting action.
|
411
|
-
|
412
|
-
Default: disabled.
|
413
|
-
|
414
|
-
=== Replacement rules
|
415
|
-
|
416
|
-
The subdirectories for the documents are generated from the values in the
|
417
|
-
tag 'author' of each document.
|
418
|
-
|
419
|
-
In order to ensure a clean directory structure, there are certain rules
|
420
|
-
for altering the values.
|
421
|
-
\x5 1. Whitespaces are replaced by underscores.
|
422
|
-
\x5 2. Dots are replaced by underscores.
|
423
|
-
\x5 3. All letters are converted to their lowercase version.
|
424
|
-
\x5 4. Special characters are serialized
|
425
|
-
|
426
|
-
=== Hiera configuration
|
195
|
+
# Read user input
|
196
|
+
def readUserInput(textstring = 'Enter value: ')
|
427
197
|
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
=== Hiera defaults
|
437
|
-
|
438
|
-
The following values can be influenced by the hiera configuration in the section 'sort'. Commandline parameter will overwrite the defaults coming from hiera unless otherwise notet.
|
439
|
-
|
440
|
-
[*copy*]
|
441
|
-
\x5 If set to true copies the files from the source directory instead of moving them.
|
442
|
-
|
443
|
-
[*destination*]
|
444
|
-
\x5 Specifies the default output directory (root-directory). Either this or the command line parameter for destinations must be set.
|
445
|
-
|
446
|
-
[*log*]
|
447
|
-
\x5 Enables (true) or disables (false) logging.
|
448
|
-
|
449
|
-
[*logfile*]
|
450
|
-
\x5 Specifes the default path for the logfile. If no path is set and logging is enable, the logfile will be created in the current working directory.
|
451
|
-
|
452
|
-
Default is the current working directory with the filename `pdfmd.log`
|
453
|
-
|
454
|
-
[*interactive*]
|
455
|
-
\x5 If set to true, each file must be acknowledged to be processed when running the script.
|
456
|
-
|
457
|
-
=== Example
|
458
|
-
|
459
|
-
This command does the following:
|
460
|
-
\x5 1. Take all pdf documents in the subdirectory ./documents.
|
461
|
-
\x5 2. Create the output folder structure in `/tmp/test/`.
|
462
|
-
\x5 3. Copy the files instead of moving them.
|
463
|
-
\x5 4. Disable the logging.
|
464
|
-
\x5> CLI sort -d /tmp/test -c -l false ./documents
|
465
|
-
|
466
|
-
# Sort only a single file
|
467
|
-
\x5> CLI sort -d /tmp/test -c -l false ./documents/test.pdf
|
468
|
-
|
469
|
-
LONGDESC
|
470
|
-
method_option :destination, :aliases => '-d', :required => false, :type => :string, :desc => 'Defines the output directory'
|
471
|
-
method_option :copy, :aliases => '-c', :required => false, :type => :boolean, :desc => 'Copy files instead of moving them'
|
472
|
-
method_option :log, :aliases => '-l', :required => false, :type => :boolean, :desc => 'Enable/Disable creation of log files'
|
473
|
-
method_option :logfile, :aliases => '-p', :required => false, :type => :string, :desc => 'Change the default logfile path'
|
474
|
-
method_option :interactive, :aliases => '-i', :required => false, :type => :boolean, :desc => 'Enable/Disable interactive sorting'
|
475
|
-
method_option :dryrun, :aliases => '-n', :required => false, :type => :boolean, :desc => 'Run without changing something'
|
476
|
-
def sort(inputDir)
|
477
|
-
|
478
|
-
ENV['PDFMD_INPUTDIR'] = inputDir
|
479
|
-
ENV['PDFMD_DESTINATION'] = options[:destination].to_s
|
480
|
-
ENV['PDFMD_COPY'] = options[:copy].to_s
|
481
|
-
ENV['PDFMD_LOG'] = options[:log].to_s
|
482
|
-
ENV['PDFMD_LOGFILEPATH'] = options[:logfile].to_s
|
483
|
-
ENV['PDFMD_INTERACTIVE'] = options[:interactive].to_s
|
484
|
-
ENV['PDFMD_DRYRUN'] = options['dryrun'].to_s
|
485
|
-
ENV['PDFMD'] = __FILE__
|
486
|
-
require_relative('./pdfmd/sort.rb')
|
198
|
+
self.log('info','Waiting for user input.')
|
199
|
+
if textstring.match(/password/i)
|
200
|
+
print textstring
|
201
|
+
STDIN.noecho(&:gets).chomp + "\n"
|
202
|
+
else
|
203
|
+
ask textstring
|
204
|
+
end
|
487
205
|
|
488
206
|
end
|
489
207
|
|
490
208
|
#
|
491
|
-
#
|
492
|
-
#
|
493
|
-
# Scheme: YYYYMMDD-author-subject-keywords.extension
|
494
|
-
desc 'rename', 'Rename the file according to Metadata'
|
495
|
-
long_desc <<-LONGDESC
|
496
|
-
== General
|
497
|
-
|
498
|
-
Rename a file with the meta tags in the document.
|
499
|
-
|
500
|
-
== Parameter
|
501
|
-
|
502
|
-
--dry-run, -n
|
503
|
-
\x5 Simulate the renaming process and show the result without changing the file.
|
504
|
-
|
505
|
-
--all-keywords, -a
|
506
|
-
\x5 Use all keywords from the meta information in the file name and ignore the limit.
|
507
|
-
|
508
|
-
Hiera parameter: allkeywords [true|false]
|
509
|
-
|
510
|
-
Default: false
|
511
|
-
|
512
|
-
--keywwords, -k
|
513
|
-
\x5 Set the number of keywords used in the filename to a new value.
|
514
|
-
|
515
|
-
Hiera parameter: keywords <integer>
|
209
|
+
# Query hiera for settings if available
|
210
|
+
# def queryHiera(keyword, facts = 'UNSET')
|
516
211
|
|
517
|
-
|
212
|
+
# # Set default facts
|
213
|
+
# facts == 'UNSET' ? facts = "fqdn=#{`hostname`}" : ''
|
518
214
|
|
519
|
-
|
520
|
-
|
215
|
+
# # If Hiera is not found (damn cat, get of my keyboard!), return false,
|
216
|
+
# # otherwise return the hash from Hiera
|
217
|
+
# if !system('which hiera > /dev/null 2>&1')
|
218
|
+
# self.log('warn','Cannot find hiera command in $path.')
|
219
|
+
# puts 'Cannot find "hiera" command in $path.'
|
220
|
+
# return eval('{}')
|
221
|
+
# else
|
222
|
+
# self.log('debug', 'Reading hiera values for pdfmd::config.')
|
223
|
+
# commandreturn = ''
|
224
|
+
# commandreturn = `hiera #{keyword} #{facts} 2>/dev/null`
|
521
225
|
|
522
|
-
|
226
|
+
# if $?.exitstatus == 1
|
227
|
+
# self.log('warn', 'Could not retrieve configuration from with hiera.')
|
228
|
+
# eval('{}')
|
229
|
+
# else
|
230
|
+
# self.log('debug', 'Could retrieve configuration from hiera.')
|
231
|
+
# eval(commandreturn)
|
232
|
+
# end
|
523
233
|
|
524
|
-
|
234
|
+
# end
|
525
235
|
|
526
|
-
|
527
|
-
\x5 Copy the file instead of moving it to the new name or destination.
|
236
|
+
# end # End of queryHiera
|
528
237
|
|
529
|
-
Hiera parameter: copy [true|false]
|
530
|
-
|
531
|
-
Default: false
|
532
|
-
|
533
|
-
The directory must exist at runtime.
|
534
|
-
|
535
|
-
--log, -l
|
536
|
-
\x5 Enable logging.
|
537
|
-
|
538
|
-
Values: true|false
|
539
|
-
|
540
|
-
--logfile, -p
|
541
|
-
\x5 Define logfile path
|
542
|
-
|
543
|
-
Default: current working-dir/pdfmd.log
|
544
|
-
|
545
|
-
== Example
|
546
|
-
|
547
|
-
# Rename the file according to the metatags
|
548
|
-
\x5> CLI rename <filename>
|
549
|
-
|
550
|
-
# Rename example.pdf according to the metatags
|
551
|
-
\x5> CLI rename example.pdf
|
552
|
-
|
553
|
-
# Simulate renaming example.pdf according to the metatags (dry-run)
|
554
|
-
\x5> CLI rename -n example.pdf
|
555
|
-
|
556
|
-
== Hiera
|
557
|
-
|
558
|
-
There are Hiera settings available, that cannot be addressed by a commandline parameter.
|
559
|
-
|
560
|
-
defaultdoctype: Defines the appreviation for the default document type. This one isused when no other document type could be determined from the metadata-field 'title'. Default value is 'doc'.
|
561
|
-
|
562
|
-
For details on how to set the parameter, see 'pdfmd explain hiera'.
|
563
|
-
|
564
|
-
|
565
|
-
== Rules
|
566
|
-
|
567
|
-
There are some rules regarding how documents are being renamed
|
568
|
-
|
569
|
-
Rule 1: All documents have the following filenaming structure:
|
570
|
-
|
571
|
-
<yyyymmdd>-<author>-<type>-<additionalInformation>.<extension>
|
572
|
-
|
573
|
-
\x5 # <yyyymmdd>: Year, month and day identical to the meta information in the document.
|
574
|
-
\x5 # <author>: Author of the document, identical to the meta information
|
575
|
-
in the document. Special characters and whitespaces are replaced.
|
576
|
-
\x5 # <type>: Document type, is being generated from the title field in the metadata of the document. Document type is a three character abbreviation following the following logic:
|
577
|
-
|
578
|
-
\x5 con => Contract
|
579
|
-
\x5 inv => Invoice
|
580
|
-
\x5 inf => Information
|
581
|
-
\x5 man => Manual
|
582
|
-
\x5 off => Offer
|
583
|
-
\x5 ord => Order
|
584
|
-
\x5 rpt => Receipt
|
585
|
-
\x5 tic => Ticket
|
586
|
-
|
587
|
-
If the dokument type can not be determined automatically, it defaults to 'dok'.
|
588
|
-
|
589
|
-
This default behavior got introduced with version 1.8.1 and can be overwritten by hiera.
|
590
|
-
See `pdfmd explain hiera-keys` for information on how to do this.
|
591
|
-
|
592
|
-
# <additionalInformation>: Information generated from the metadata fields
|
593
|
-
'title', 'subject' and 'keywords'.
|
594
|
-
|
595
|
-
If 'Title' or 'Keywords' contains one of the following keywords, they will be replaced with the corresponding abbreviation followed by the specified value:
|
596
|
-
|
597
|
-
\x5 Contract => con
|
598
|
-
\x5 Invoice => inv
|
599
|
-
\x5 Information => inf
|
600
|
-
\x5 Manual => man
|
601
|
-
\x5 Offer => off
|
602
|
-
\x5 Order => ord
|
603
|
-
\x5 Receipt => rpt
|
604
|
-
\x5 Ticket => tic
|
605
|
-
|
606
|
-
This setting will be overwritten as well by defining the 'keys' hash in Hiera.
|
607
|
-
|
608
|
-
Rule 2: The number of keywords used in the filename is defined by the parameter '-k'. See the section of that parameter for more details and the default value.
|
609
|
-
|
610
|
-
Rule 3: Keywords matching 'kvi','fak','ord','kdn' are prioritised.
|
611
|
-
|
612
|
-
Rule 4: Special characters and whitespaces are replaced:
|
613
|
-
|
614
|
-
\x5 ' ' => '_'
|
615
|
-
\x5 '/' => '_'
|
616
|
-
|
617
|
-
Rule 5: The new filename has only lowercase characters.
|
618
|
-
|
619
|
-
== Example (detailed)
|
620
|
-
|
621
|
-
# Example PDF with following MetaTags:
|
622
|
-
|
623
|
-
\x5 Filename : example.pdf
|
624
|
-
\x5 Author : John
|
625
|
-
\x5 Subject : new Product
|
626
|
-
\x5 Title : Presentation
|
627
|
-
\x5 CreateDate : 1970:01:01 01:00:00
|
628
|
-
\x5 Keywords : John Doe, Jane Doe, Mister Doe
|
629
|
-
|
630
|
-
# Renaming the file
|
631
|
-
\x5> CLI rename example.pdf
|
632
|
-
\x5 example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe.pdf
|
633
|
-
|
634
|
-
# Simulation to rename the file (no actual change)
|
635
|
-
\x5> CLI rename -n example.pdf
|
636
|
-
\x5example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe.pdf
|
637
|
-
|
638
|
-
# Renaming the file with all keywords
|
639
|
-
\x5> CLI rename -n -a example.pdf
|
640
|
-
|
641
|
-
\x5 example.pdf => 19700101-john-dok-new_product-john_doe-jane_doe-mister_doe.pdf
|
642
|
-
|
643
|
-
LONGDESC
|
644
|
-
method_option :dryrun, :type => :boolean, :aliases => '-n', :desc => 'Run without making changes', :default => false, :required => false
|
645
|
-
method_option :allkeywords, :type => :boolean, :aliases => '-a', :desc => 'Add all keywords (no limit)', :required => false
|
646
|
-
method_option :keywords, :type => :numeric, :aliases => '-k', :desc => 'Number of keywords to include (Default: 3)', :required => false
|
647
|
-
method_option :outputdir, :aliases => '-o', :type => :string, :desc => 'Speficy output directory', :default => false, :required => :false
|
648
|
-
method_option :copy, :aliases => '-c', :type => :boolean, :desc => 'Copy instead of moving the file when renaming'
|
649
|
-
method_option :log, :aliases => '-l', :type => :boolean, :desc => 'Enable logging'
|
650
|
-
method_option :logfile, :aliases => '-p', :type => :string, :desc => 'Define path to logfile'
|
651
|
-
def rename(filename)
|
652
|
-
|
653
|
-
ENV['PDFMD_FILENAME'] = filename
|
654
|
-
ENV['PDFMD_DRYRUN'] = options[:dryrun].to_s
|
655
|
-
ENV['PDFMD_ALLKEYWORDS'] = options[:allkeywords].to_s
|
656
|
-
ENV['PDFMD_OUTPUTDIR'] = options[:outputdir].to_s
|
657
|
-
ENV['PDFMD_NUMBERKEYWORDS'] = options[:keywords].to_s
|
658
|
-
ENV['PDFMD_COPY'] = options[:copy].to_s
|
659
|
-
ENV['PDFMD_LOG'] = options[:log].to_s
|
660
|
-
ENV['PDFMD_LOGFILE'] = options[:logfile].to_s
|
661
|
-
ENV['PDFMD'] = __FILE__
|
662
|
-
require_relative('pdfmd/rename.rb')
|
663
|
-
|
664
|
-
end
|
665
238
|
|
666
239
|
#
|
667
|
-
#
|
240
|
+
# Determine the valid setting
|
241
|
+
# 1. Priority: manual setting
|
242
|
+
# 2. Priority: Hiera setting
|
668
243
|
#
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
244
|
+
# If there is no manual setting, the value of 'manualSetting'
|
245
|
+
# should be set to 'nil'
|
246
|
+
#
|
247
|
+
# def determineValidSetting(manualSetting,key)
|
248
|
+
|
249
|
+
# if !@hieradata.nil?
|
250
|
+
# hieraKey = '@hieradata'
|
251
|
+
# hieraValue = ''
|
252
|
+
|
253
|
+
# key.split(':').each do |keyname|
|
254
|
+
|
255
|
+
# hieraKeyCheck = eval(hieraKey)
|
256
|
+
# if !hieraKeyCheck.nil? and hieraKeyCheck.has_key?(keyname)
|
257
|
+
# hieraKey = hieraKey + "['#{keyname}']"
|
258
|
+
# else
|
259
|
+
# # Key has not been found
|
260
|
+
# hieraKey = ''
|
261
|
+
# break
|
262
|
+
# end
|
263
|
+
# end
|
264
|
+
|
265
|
+
# hieraValue = eval(hieraKey)
|
266
|
+
# else
|
267
|
+
# hieraValue = nil
|
268
|
+
# end
|
269
|
+
|
270
|
+
# if !manualSetting.nil?
|
271
|
+
# self.log('debug', "Chosing manual setting '#{key} = #{manualSetting}'.")
|
272
|
+
# manualSetting
|
273
|
+
# elsif !hieraValue.nil? or
|
274
|
+
# !hieraValue == ''
|
275
|
+
|
276
|
+
# self.log('debug', "Chosing hiera setting '#{key} = #{hieraValue}'.")
|
277
|
+
# hieraValue
|
278
|
+
|
279
|
+
# else
|
280
|
+
# self.log('debug', "No setting chosen for '#{key}' in hiera.")
|
281
|
+
# false
|
282
|
+
# end
|
283
|
+
|
284
|
+
# end
|
285
|
+
|
286
|
+
end # End of Class
|