pdfmd 1.9.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,115 @@
1
+ # == Class: pdfmdsort
2
+ #
3
+ # TODO: Author values with a slave One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
4
+ class Pdfmdsort < Pdfmd
5
+
6
+ attr_accessor :filename, :dryrun, :copy, :interactive, :destination, :overwrite
7
+
8
+ # Initialize
9
+ def initialize(input)
10
+ super input
11
+ @destination = '.'
12
+ @interactive = false
13
+ @copy = false
14
+ @dryrun = false
15
+ @overwrite = false
16
+ end
17
+
18
+
19
+ #
20
+ # Check if the destination is valid
21
+ def checkDestination
22
+
23
+ log('debug', "Checking destination parameter '#{@destination}'.")
24
+
25
+ if File.file?(@destination)
26
+ log('error', "Destination '#{@destination}' is a file.")
27
+ false
28
+ else
29
+ log('debug', "Destination '#{@destination}' as directory confirmed.")
30
+ true
31
+ end
32
+
33
+ end
34
+
35
+ #
36
+ # Get the author
37
+ # Return 'false' if no author is being found.
38
+ def get_author()
39
+ if not self.check_metatags('author')
40
+ return false
41
+ end
42
+ author = @@metadata['author'].gsub(/\./,'_').gsub(/\&/,'').gsub(/\-/,'').gsub(/\s/,'_').gsub(/\,/,'_').gsub(/\_\_/,'_')
43
+ I18n.enforce_available_locales = false
44
+ I18n.transliterate(author).downcase # Normalising
45
+ end
46
+
47
+
48
+ #
49
+ # Sort the file away
50
+ def sort
51
+ if self.checkDestination
52
+
53
+ if @interactive
54
+ answer = readUserInput("Process '#{@filename}' ([y]/n): ")
55
+ answer = answer.empty? ? 'y' : answer
56
+ self.log('info', "User Answer for file '#{@filename}': #{answer}")
57
+ if !answer.match(/y/)
58
+ self.log('info',"Skipping file '#{@filename}' due to user answer: '#{answer}'.")
59
+ return
60
+ else
61
+ self.log('info',"Processing file '#{@filename}' due to user answer: '#{answer}'.")
62
+ end
63
+ end
64
+
65
+ if not author = get_author() or author.empty?
66
+ self.log('error', "File '#{@filename}' has not value for author set. Cannot sort file. Abort.")
67
+ exit 1
68
+ end
69
+ targetdir = @destination.chomp + '/' + author
70
+ targetfile = targetdir + '/' + Pathname.new(@filename).basename.to_s
71
+
72
+ # Create the target dir if not existing.
73
+ if !File.exists? targetdir
74
+ if @dryrun
75
+ self.log('info', "Dryrun: Created Directory '#{targetdir}'.")
76
+ else
77
+ self.log('info', "Created directory '#{targetdir}'.")
78
+ puts targetdir
79
+ FileUtils.mkdir_p(targetdir)
80
+ end
81
+ end
82
+
83
+ # Check if the file already exists
84
+ # This does nothing so far
85
+ if File.exists?(targetfile) and @overwrite
86
+ self.log('info', "File '#{@filename}' already exists. Overwrite active: replacing file.")
87
+ elsif File.exists?(targetfile) and !@overwrite
88
+ self.log('info', "File '#{@filename}' already exists, overwrite disabled: not replacing file.")
89
+ return true
90
+ end
91
+
92
+ if @copy
93
+
94
+ if @dryrun
95
+ self.log('info', "Dryrun: Copy file '#{@filename}' to '#{targetdir}'.")
96
+ else
97
+ self.log('info', "Copy file '#{@filename}' to '#{targetdir}'.")
98
+ FileUtils.cp(@filename, targetdir)
99
+ end
100
+
101
+ else
102
+
103
+ if @dryrun
104
+ self.log('info', "Dryrun: Move file '#{@filename}' to '#{targetdir}'.")
105
+ else
106
+ self.log('info', "Move file '#{@filename}' to '#{targetdir}'.")
107
+ FileUtils.mv(@filename, targetdir)
108
+ end
109
+
110
+ end
111
+
112
+ end
113
+ end
114
+
115
+ end
@@ -0,0 +1,117 @@
1
+ # == Class: pdfmdstat
2
+ #
3
+ # gather and store statistical information
4
+ # about pdf documents
5
+ require_relative './pdfmdmethods.rb'
6
+
7
+ class Pdfmdstat
8
+
9
+ # Include unspecific methods for Pdfmd
10
+ include Pdfmdmethods
11
+
12
+ attr_accessor :metadata
13
+
14
+ @statdata = {}
15
+ @hieradata = {}
16
+
17
+ def initialize(metadata)
18
+
19
+ @default_tags = ['author', 'title', 'subject', 'createdate', 'keywords']
20
+ @statdata = {
21
+ 'author' => {},
22
+ 'createdate' => {},
23
+ 'title' => {},
24
+ 'subject' => {},
25
+ 'keywords' => {},
26
+ }
27
+ @statdata = count_values(metadata,@default_tags)
28
+ end
29
+
30
+ #
31
+ # Method to set tags
32
+ def tags(metatagnames)
33
+
34
+ if metatagnames.is_a?(String)
35
+ @default_tags = metatagnames.split(',')
36
+ self.log('debug', "Setting tags for statistic to '#{metatagnames}'.")
37
+ elsif !metatagnames.nil?
38
+ self.log('error', 'Unkown Tag definition. Exit.')
39
+ exit 1
40
+ end
41
+
42
+ end
43
+
44
+ # Counting all values provided as hash in metadata
45
+ # Optional keynames can be handed over as an array
46
+ def count_values(metadata, keys = '')
47
+
48
+ data = Hash.new
49
+ if keys == ''
50
+ data = {
51
+ 'author' => {},
52
+ 'title' => {},
53
+ 'createdate' => {},
54
+ 'subject' => {},
55
+ 'keywords' => {},
56
+ }
57
+ elsif keys.is_a?(Array)
58
+
59
+ keys.each do |keyname|
60
+ data[keyname] = {}
61
+ end
62
+
63
+ else
64
+ puts 'invalid keys provided'
65
+ exit 1
66
+ end
67
+
68
+ # Iterate through all metadata and
69
+ # count how often the metadata shows up in each
70
+ # category
71
+ metadata.each do |value|
72
+
73
+ # Iterate through all metadata tags and count
74
+ datahash = eval value[1]
75
+ datahash.keys.each do |tagkey|
76
+
77
+ datahash[tagkey].nil? ? next : ''
78
+ if data[tagkey][datahash[tagkey]].nil?
79
+ data[tagkey][datahash[tagkey]] = 1
80
+ else
81
+ data[tagkey][datahash[tagkey]] = data[tagkey][datahash[tagkey]] + 1
82
+ end
83
+ end
84
+ end
85
+
86
+ data
87
+
88
+ end
89
+
90
+ #
91
+ # Run statistical overview about the metadata
92
+ # Count all values in the metatags and summ them up
93
+ def analyse_metadata()
94
+
95
+ outputHash = Hash.new
96
+ @default_tags.sort.each do |tagname|
97
+ outputHash[tagname.capitalize] = @statdata[tagname]
98
+ end
99
+
100
+ sortedOutputHash = Hash.new
101
+ outputHash.each do |metatag,statdata|
102
+
103
+ sortedstatdata = Hash.new
104
+ statdata = statdata.sort.each do |title, amount|
105
+ title = title.empty? ? '*empty*' : title
106
+ sortedstatdata[title] = amount
107
+ end
108
+
109
+ sortedOutputHash[metatag] = sortedstatdata
110
+
111
+ end
112
+
113
+ puts sortedOutputHash.to_yaml.gsub(/---\n/,'')
114
+
115
+ end
116
+
117
+ end
File without changes
data/lib/run.rb ADDED
@@ -0,0 +1,235 @@
1
+ #!/usr/bin/env ruby
2
+ require './pdfmd.rb'
3
+ require './pdfmd/pdfmdstat.rb'
4
+ require "thor"
5
+
6
+ VERSION = '2.0.0'
7
+ NAME = 'pdfmd'
8
+
9
+ #
10
+ # Read the content of the long description from an external file
11
+ #
12
+ def readLongDesc(filename)
13
+
14
+ paths = [
15
+ "#{File.dirname(File.expand_path($0))}../lib",
16
+ "#{Gem.dir}/gems/#{NAME}-#{VERSION}/lib",
17
+ ]
18
+
19
+ longDescContent = ''
20
+ paths.each do |value|
21
+ if File.exists?(value + '/' + filename)
22
+
23
+ File.open(value + '/' + filename, 'r') do |infile|
24
+ while (line = infile.gets)
25
+ longDescContent = longDescContent + line
26
+ end
27
+ end
28
+
29
+ end
30
+ end
31
+
32
+ longDescContent
33
+
34
+
35
+ end
36
+
37
+ #
38
+ # Thor class
39
+ class DOC < Thor
40
+
41
+ # Class options for all commands (logging only)
42
+ # none
43
+
44
+ #
45
+ # Show the current metadata tags
46
+ #
47
+ desc 'show', 'Show metadata of a file'
48
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdshow.txt'
49
+ method_option :tag, :type => :string, :aliases => '-t', :desc => 'Show specific tag(s), comma separated', :required => false
50
+ method_option :format, :type => :string, :aliases => '-f', :desc => 'Define output format', :required => false
51
+ method_option :includepdf, :type => :boolean, :aliases => '-i', :desc => 'Include the filename in output', :required => false
52
+ def show(filename)
53
+
54
+ pdfdoc = Pdfmdshow.new filename
55
+ format = pdfdoc.determineValidSetting(options[:format], 'show:format')
56
+ show_filename = pdfdoc.determineValidSetting(options[:includepdf], 'show:includepdf')
57
+ show_tags = pdfdoc.determineValidSetting(options[:tag], 'show:tags')
58
+ pdfdoc.set_outputformat format
59
+ pdfdoc.show_filename show_filename
60
+ pdfdoc.set_tags show_tags
61
+ puts pdfdoc.show_metatags
62
+ end
63
+
64
+
65
+ # Show current settings
66
+ #
67
+ desc 'config', 'Show config defaults'
68
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdconfig.txt'
69
+ method_option :show, :type => :boolean, :aliases => '-s', :required => false
70
+ def config(subcommand = '')
71
+
72
+ pdfdoc = Pdfmdconfig.new ''
73
+ puts pdfdoc.show_config subcommand
74
+
75
+ end
76
+
77
+ #
78
+ # Change a MetaTag Attribute
79
+ #
80
+ desc 'edit', 'Edit Meta Tag(s)'
81
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdedit.txt'
82
+ method_option :tag, :type => :string, :aliases => '-t', :desc => 'Name of the Tag(s) to Edit', :required => true, :lazy_default => 'all'
83
+ method_option :rename, :type => :boolean, :aliases => '-r', :desc => 'Rename file after changing meta-tags', :required => false
84
+ method_option :opendoc, :type => :boolean, :aliases => '-o', :desc => 'Open the PDF document in a separate window.', :required => false, :lazy_default => true
85
+ def edit(filename)
86
+
87
+ pdfdoc = Pdfmdedit.new filename
88
+ tags = pdfdoc.determineValidSetting(options[:tag],'edit:tags')
89
+ pdfdoc.opendoc = pdfdoc.determineValidSetting(options[:opendoc], 'edit:opendoc')
90
+ pdfdoc.pdfviewer = pdfdoc.determineValidSetting(nil, 'edit:pdfviewer')
91
+ pdfdoc.set_tags tags
92
+ pdfdoc.update_tags
93
+ pdfdoc.write_tags filename
94
+
95
+ # If the file shall be renamed at the same time, trigger the other task
96
+ if pdfdoc.determineValidSetting(options[:rename], 'edit:rename')
97
+
98
+ #rename filename
99
+ pdfdoc.log('info', 'Running rename command.')
100
+ rename filename
101
+
102
+ end
103
+
104
+ end
105
+
106
+ #
107
+ # Show statistics
108
+ #
109
+ desc 'stat', 'Show metadata statistics of multiple files'
110
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdstat.txt'
111
+ option :recursive, :type => :boolean, :aliases => '-r', :desc => 'Include subdirectories recursively.', :lazy_default => true, :required => false
112
+ option :tags, :aliases => '-t', :type => :string, :desc => 'Define Metatags to run at', :lazy_default => 'author,title,subject,createdate,keywords', :required => false
113
+ def stat(input)
114
+
115
+ filemetadata = Hash.new
116
+ currentOutput = Hash.new
117
+
118
+ if File.file?(input)
119
+ puts 'Input is a single file.'
120
+ puts 'n.a.y.'
121
+ else
122
+
123
+ # Iterate through all Files an collect the metadata
124
+ recursive = options[:recursive] ? '/**' : ''
125
+
126
+ # Count the number of files quickly to show an overview
127
+ # nooFiles = numberOfFiles
128
+ nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
129
+ currentNooFiles = 0
130
+ Dir.glob("#{input.chomp}#{recursive}/*.pdf").each do |filename|
131
+
132
+ # Print percentage
133
+ currentNooFiles = currentNooFiles + 1
134
+ percentage = 100 / nooFiles * currentNooFiles
135
+ print "\r Status: #{percentage} % of #{nooFiles} files processed. "
136
+
137
+ pdfdoc = Pdfmd.new filename
138
+ filemetadata = {}
139
+ currentOutput[File.basename(filename)] = pdfdoc.metadata.to_s
140
+ pdfdoc = nil
141
+
142
+ end
143
+ puts ''
144
+ puts ''
145
+
146
+ pdfstat = Pdfmdstat.new(currentOutput)
147
+ pdfstat.tags options[:tags]
148
+ pdfstat.analyse_metadata
149
+
150
+ end
151
+
152
+ end
153
+
154
+ #
155
+ # Sort the files into directories based on the author
156
+ #
157
+ desc 'sort','Sort files into directories sorted by Author'
158
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdsort.txt'
159
+ method_option :destination, :aliases => '-d', :required => false, :type => :string, :desc => 'Defines the output directory'
160
+ method_option :copy, :aliases => '-c', :required => false, :type => :boolean, :desc => 'Copy files instead of moving them'
161
+ method_option :interactive, :aliases => '-i', :required => false, :type => :boolean, :desc => 'Enable/Disable interactive sorting'
162
+ method_option :overwrite, :alises => '-o', :required => false, :type => :boolean, :desc => 'Enable/Disable file overwrite.', :lazy_default => true
163
+ method_option :dryrun, :aliases => '-n', :required => false, :type => :boolean, :desc => 'Run without changing something'
164
+ def sort(input)
165
+
166
+ if File.file?(input)
167
+ pdfdoc = Pdfmdsort.new input
168
+ pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
169
+ pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
170
+ pdfdoc.destination = pdfdoc.determineValidSetting(options[:destination], 'sort:destination')
171
+ pdfdoc.overwrite = pdfdoc.determineValidSetting(options[:overwrite], 'sort:overwrite')
172
+ pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun], 'sort:dryrun')
173
+ pdfdoc.sort
174
+ else
175
+
176
+ # Run the actions for all files
177
+ Dir.glob(input.chomp + '/*.pdf').each do |filename|
178
+ pdfdoc = Pdfmdsort.new filename
179
+ pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
180
+ pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
181
+ pdfdoc.destination = pdfdoc.determineValidSetting(options[:destination], 'sort:destination')
182
+ pdfdoc.overwrite = pdfdoc.determineValidSetting(options[:overwrite], 'sort:overwrite')
183
+ pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun], 'sort:dryrun')
184
+ pdfdoc.sort
185
+ end
186
+
187
+ end
188
+
189
+ end
190
+
191
+
192
+ # Rename the file according to the Metadata
193
+ #
194
+ # Scheme: YYYYMMDD-author-subject-keywords.extension
195
+ # this is messing up the logging and creates two different files
196
+ desc 'rename', 'Rename the file according to Metadata'
197
+ long_desc readLongDesc('pdfmd/long_desc.pdfmdrename.txt')
198
+ method_option :dryrun, :type => :boolean, :aliases => '-n', :desc => 'Run without making changes', :required => false
199
+ method_option :allkeywords, :type => :boolean, :aliases => '-a', :desc => 'Add all keywords (no limit)', :required => false, :lazy_default => true
200
+ method_option :nrkeywords, :type => :string, :aliases => '-k', :desc => 'Number of keywords to include (Default: 3)', :required => false
201
+ method_option :outputdir, :aliases => '-o', :type => :string, :desc => 'Specify output directory', :required => false
202
+ method_option :copy, :aliases => '-c', :type => :boolean, :desc => 'Copy instead of moving the file when renaming', :lazy_default => true
203
+ def rename(filename)
204
+
205
+ pdfdoc = Pdfmdrename.new filename
206
+ pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun],'rename:dryrun')
207
+ pdfdoc.allkeywords = pdfdoc.determineValidSetting(options[:allkeywords],'rename:allkeywords')
208
+ pdfdoc.outputdir = pdfdoc.determineValidSetting(options[:outputdir], 'rename:outputdir')
209
+ if nrkeywords = pdfdoc.determineValidSetting(options[:nrkeywords], 'rename:nrkeywords' )
210
+ pdfdoc.nrkeywords = nrkeywords
211
+ end
212
+ pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'rename:copy')
213
+ pdfdoc.rename
214
+
215
+ end
216
+
217
+ #
218
+ # One parameter to show the current version
219
+ #
220
+ map %w[--version -v] => :__print_version
221
+ desc "--version, -v", 'Show the current script version'
222
+ def __print_version
223
+ puts VERSION
224
+ end
225
+
226
+ map %w[--revision -r] => :__print_revision
227
+ desc "--revision, -r", 'Show the revision of the gem'
228
+ def __print_createdate
229
+ metadata = YAML.load(`gem specification pdfmd metadata`)
230
+ puts metadata['revision']
231
+ end
232
+
233
+ end
234
+
235
+ DOC.start