pdfmd 1.9.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,115 @@
1
+ # == Class: pdfmdsort
2
+ #
3
+ # TODO: Author values with a slave One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
4
+ class Pdfmdsort < Pdfmd
5
+
6
+ attr_accessor :filename, :dryrun, :copy, :interactive, :destination, :overwrite
7
+
8
+ # Initialize
9
+ def initialize(input)
10
+ super input
11
+ @destination = '.'
12
+ @interactive = false
13
+ @copy = false
14
+ @dryrun = false
15
+ @overwrite = false
16
+ end
17
+
18
+
19
+ #
20
+ # Check if the destination is valid
21
+ def checkDestination
22
+
23
+ log('debug', "Checking destination parameter '#{@destination}'.")
24
+
25
+ if File.file?(@destination)
26
+ log('error', "Destination '#{@destination}' is a file.")
27
+ false
28
+ else
29
+ log('debug', "Destination '#{@destination}' as directory confirmed.")
30
+ true
31
+ end
32
+
33
+ end
34
+
35
+ #
36
+ # Get the author
37
+ # Return 'false' if no author is being found.
38
+ def get_author()
39
+ if not self.check_metatags('author')
40
+ return false
41
+ end
42
+ author = @@metadata['author'].gsub(/\./,'_').gsub(/\&/,'').gsub(/\-/,'').gsub(/\s/,'_').gsub(/\,/,'_').gsub(/\_\_/,'_')
43
+ I18n.enforce_available_locales = false
44
+ I18n.transliterate(author).downcase # Normalising
45
+ end
46
+
47
+
48
+ #
49
+ # Sort the file away
50
+ def sort
51
+ if self.checkDestination
52
+
53
+ if @interactive
54
+ answer = readUserInput("Process '#{@filename}' ([y]/n): ")
55
+ answer = answer.empty? ? 'y' : answer
56
+ self.log('info', "User Answer for file '#{@filename}': #{answer}")
57
+ if !answer.match(/y/)
58
+ self.log('info',"Skipping file '#{@filename}' due to user answer: '#{answer}'.")
59
+ return
60
+ else
61
+ self.log('info',"Processing file '#{@filename}' due to user answer: '#{answer}'.")
62
+ end
63
+ end
64
+
65
+ if not author = get_author() or author.empty?
66
+ self.log('error', "File '#{@filename}' has not value for author set. Cannot sort file. Abort.")
67
+ exit 1
68
+ end
69
+ targetdir = @destination.chomp + '/' + author
70
+ targetfile = targetdir + '/' + Pathname.new(@filename).basename.to_s
71
+
72
+ # Create the target dir if not existing.
73
+ if !File.exists? targetdir
74
+ if @dryrun
75
+ self.log('info', "Dryrun: Created Directory '#{targetdir}'.")
76
+ else
77
+ self.log('info', "Created directory '#{targetdir}'.")
78
+ puts targetdir
79
+ FileUtils.mkdir_p(targetdir)
80
+ end
81
+ end
82
+
83
+ # Check if the file already exists
84
+ # This does nothing so far
85
+ if File.exists?(targetfile) and @overwrite
86
+ self.log('info', "File '#{@filename}' already exists. Overwrite active: replacing file.")
87
+ elsif File.exists?(targetfile) and !@overwrite
88
+ self.log('info', "File '#{@filename}' already exists, overwrite disabled: not replacing file.")
89
+ return true
90
+ end
91
+
92
+ if @copy
93
+
94
+ if @dryrun
95
+ self.log('info', "Dryrun: Copy file '#{@filename}' to '#{targetdir}'.")
96
+ else
97
+ self.log('info', "Copy file '#{@filename}' to '#{targetdir}'.")
98
+ FileUtils.cp(@filename, targetdir)
99
+ end
100
+
101
+ else
102
+
103
+ if @dryrun
104
+ self.log('info', "Dryrun: Move file '#{@filename}' to '#{targetdir}'.")
105
+ else
106
+ self.log('info', "Move file '#{@filename}' to '#{targetdir}'.")
107
+ FileUtils.mv(@filename, targetdir)
108
+ end
109
+
110
+ end
111
+
112
+ end
113
+ end
114
+
115
+ end
@@ -0,0 +1,117 @@
1
+ # == Class: pdfmdstat
2
+ #
3
+ # gather and store statistical information
4
+ # about pdf documents
5
+ require_relative './pdfmdmethods.rb'
6
+
7
+ class Pdfmdstat
8
+
9
+ # Include unspecific methods for Pdfmd
10
+ include Pdfmdmethods
11
+
12
+ attr_accessor :metadata
13
+
14
+ @statdata = {}
15
+ @hieradata = {}
16
+
17
+ def initialize(metadata)
18
+
19
+ @default_tags = ['author', 'title', 'subject', 'createdate', 'keywords']
20
+ @statdata = {
21
+ 'author' => {},
22
+ 'createdate' => {},
23
+ 'title' => {},
24
+ 'subject' => {},
25
+ 'keywords' => {},
26
+ }
27
+ @statdata = count_values(metadata,@default_tags)
28
+ end
29
+
30
+ #
31
+ # Method to set tags
32
+ def tags(metatagnames)
33
+
34
+ if metatagnames.is_a?(String)
35
+ @default_tags = metatagnames.split(',')
36
+ self.log('debug', "Setting tags for statistic to '#{metatagnames}'.")
37
+ elsif !metatagnames.nil?
38
+ self.log('error', 'Unkown Tag definition. Exit.')
39
+ exit 1
40
+ end
41
+
42
+ end
43
+
44
+ # Counting all values provided as hash in metadata
45
+ # Optional keynames can be handed over as an array
46
+ def count_values(metadata, keys = '')
47
+
48
+ data = Hash.new
49
+ if keys == ''
50
+ data = {
51
+ 'author' => {},
52
+ 'title' => {},
53
+ 'createdate' => {},
54
+ 'subject' => {},
55
+ 'keywords' => {},
56
+ }
57
+ elsif keys.is_a?(Array)
58
+
59
+ keys.each do |keyname|
60
+ data[keyname] = {}
61
+ end
62
+
63
+ else
64
+ puts 'invalid keys provided'
65
+ exit 1
66
+ end
67
+
68
+ # Iterate through all metadata and
69
+ # count how often the metadata shows up in each
70
+ # category
71
+ metadata.each do |value|
72
+
73
+ # Iterate through all metadata tags and count
74
+ datahash = eval value[1]
75
+ datahash.keys.each do |tagkey|
76
+
77
+ datahash[tagkey].nil? ? next : ''
78
+ if data[tagkey][datahash[tagkey]].nil?
79
+ data[tagkey][datahash[tagkey]] = 1
80
+ else
81
+ data[tagkey][datahash[tagkey]] = data[tagkey][datahash[tagkey]] + 1
82
+ end
83
+ end
84
+ end
85
+
86
+ data
87
+
88
+ end
89
+
90
+ #
91
+ # Run statistical overview about the metadata
92
+ # Count all values in the metatags and summ them up
93
+ def analyse_metadata()
94
+
95
+ outputHash = Hash.new
96
+ @default_tags.sort.each do |tagname|
97
+ outputHash[tagname.capitalize] = @statdata[tagname]
98
+ end
99
+
100
+ sortedOutputHash = Hash.new
101
+ outputHash.each do |metatag,statdata|
102
+
103
+ sortedstatdata = Hash.new
104
+ statdata = statdata.sort.each do |title, amount|
105
+ title = title.empty? ? '*empty*' : title
106
+ sortedstatdata[title] = amount
107
+ end
108
+
109
+ sortedOutputHash[metatag] = sortedstatdata
110
+
111
+ end
112
+
113
+ puts sortedOutputHash.to_yaml.gsub(/---\n/,'')
114
+
115
+ end
116
+
117
+ end
File without changes
data/lib/run.rb ADDED
@@ -0,0 +1,235 @@
1
+ #!/usr/bin/env ruby
2
+ require './pdfmd.rb'
3
+ require './pdfmd/pdfmdstat.rb'
4
+ require "thor"
5
+
6
+ VERSION = '2.0.0'
7
+ NAME = 'pdfmd'
8
+
9
+ #
10
+ # Read the content of the long description from an external file
11
+ #
12
+ def readLongDesc(filename)
13
+
14
+ paths = [
15
+ "#{File.dirname(File.expand_path($0))}../lib",
16
+ "#{Gem.dir}/gems/#{NAME}-#{VERSION}/lib",
17
+ ]
18
+
19
+ longDescContent = ''
20
+ paths.each do |value|
21
+ if File.exists?(value + '/' + filename)
22
+
23
+ File.open(value + '/' + filename, 'r') do |infile|
24
+ while (line = infile.gets)
25
+ longDescContent = longDescContent + line
26
+ end
27
+ end
28
+
29
+ end
30
+ end
31
+
32
+ longDescContent
33
+
34
+
35
+ end
36
+
37
+ #
38
+ # Thor class
39
+ class DOC < Thor
40
+
41
+ # Class options for all commands (logging only)
42
+ # none
43
+
44
+ #
45
+ # Show the current metadata tags
46
+ #
47
+ desc 'show', 'Show metadata of a file'
48
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdshow.txt'
49
+ method_option :tag, :type => :string, :aliases => '-t', :desc => 'Show specific tag(s), comma separated', :required => false
50
+ method_option :format, :type => :string, :aliases => '-f', :desc => 'Define output format', :required => false
51
+ method_option :includepdf, :type => :boolean, :aliases => '-i', :desc => 'Include the filename in output', :required => false
52
+ def show(filename)
53
+
54
+ pdfdoc = Pdfmdshow.new filename
55
+ format = pdfdoc.determineValidSetting(options[:format], 'show:format')
56
+ show_filename = pdfdoc.determineValidSetting(options[:includepdf], 'show:includepdf')
57
+ show_tags = pdfdoc.determineValidSetting(options[:tag], 'show:tags')
58
+ pdfdoc.set_outputformat format
59
+ pdfdoc.show_filename show_filename
60
+ pdfdoc.set_tags show_tags
61
+ puts pdfdoc.show_metatags
62
+ end
63
+
64
+
65
+ # Show current settings
66
+ #
67
+ desc 'config', 'Show config defaults'
68
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdconfig.txt'
69
+ method_option :show, :type => :boolean, :aliases => '-s', :required => false
70
+ def config(subcommand = '')
71
+
72
+ pdfdoc = Pdfmdconfig.new ''
73
+ puts pdfdoc.show_config subcommand
74
+
75
+ end
76
+
77
+ #
78
+ # Change a MetaTag Attribute
79
+ #
80
+ desc 'edit', 'Edit Meta Tag(s)'
81
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdedit.txt'
82
+ method_option :tag, :type => :string, :aliases => '-t', :desc => 'Name of the Tag(s) to Edit', :required => true, :lazy_default => 'all'
83
+ method_option :rename, :type => :boolean, :aliases => '-r', :desc => 'Rename file after changing meta-tags', :required => false
84
+ method_option :opendoc, :type => :boolean, :aliases => '-o', :desc => 'Open the PDF document in a separate window.', :required => false, :lazy_default => true
85
+ def edit(filename)
86
+
87
+ pdfdoc = Pdfmdedit.new filename
88
+ tags = pdfdoc.determineValidSetting(options[:tag],'edit:tags')
89
+ pdfdoc.opendoc = pdfdoc.determineValidSetting(options[:opendoc], 'edit:opendoc')
90
+ pdfdoc.pdfviewer = pdfdoc.determineValidSetting(nil, 'edit:pdfviewer')
91
+ pdfdoc.set_tags tags
92
+ pdfdoc.update_tags
93
+ pdfdoc.write_tags filename
94
+
95
+ # If the file shall be renamed at the same time, trigger the other task
96
+ if pdfdoc.determineValidSetting(options[:rename], 'edit:rename')
97
+
98
+ #rename filename
99
+ pdfdoc.log('info', 'Running rename command.')
100
+ rename filename
101
+
102
+ end
103
+
104
+ end
105
+
106
+ #
107
+ # Show statistics
108
+ #
109
+ desc 'stat', 'Show metadata statistics of multiple files'
110
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdstat.txt'
111
+ option :recursive, :type => :boolean, :aliases => '-r', :desc => 'Include subdirectories recursively.', :lazy_default => true, :required => false
112
+ option :tags, :aliases => '-t', :type => :string, :desc => 'Define Metatags to run at', :lazy_default => 'author,title,subject,createdate,keywords', :required => false
113
+ def stat(input)
114
+
115
+ filemetadata = Hash.new
116
+ currentOutput = Hash.new
117
+
118
+ if File.file?(input)
119
+ puts 'Input is a single file.'
120
+ puts 'n.a.y.'
121
+ else
122
+
123
+ # Iterate through all Files an collect the metadata
124
+ recursive = options[:recursive] ? '/**' : ''
125
+
126
+ # Count the number of files quickly to show an overview
127
+ # nooFiles = numberOfFiles
128
+ nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
129
+ currentNooFiles = 0
130
+ Dir.glob("#{input.chomp}#{recursive}/*.pdf").each do |filename|
131
+
132
+ # Print percentage
133
+ currentNooFiles = currentNooFiles + 1
134
+ percentage = 100 / nooFiles * currentNooFiles
135
+ print "\r Status: #{percentage} % of #{nooFiles} files processed. "
136
+
137
+ pdfdoc = Pdfmd.new filename
138
+ filemetadata = {}
139
+ currentOutput[File.basename(filename)] = pdfdoc.metadata.to_s
140
+ pdfdoc = nil
141
+
142
+ end
143
+ puts ''
144
+ puts ''
145
+
146
+ pdfstat = Pdfmdstat.new(currentOutput)
147
+ pdfstat.tags options[:tags]
148
+ pdfstat.analyse_metadata
149
+
150
+ end
151
+
152
+ end
153
+
154
+ #
155
+ # Sort the files into directories based on the author
156
+ #
157
+ desc 'sort','Sort files into directories sorted by Author'
158
+ long_desc readLongDesc 'pdfmd/long_desc.pdfmdsort.txt'
159
+ method_option :destination, :aliases => '-d', :required => false, :type => :string, :desc => 'Defines the output directory'
160
+ method_option :copy, :aliases => '-c', :required => false, :type => :boolean, :desc => 'Copy files instead of moving them'
161
+ method_option :interactive, :aliases => '-i', :required => false, :type => :boolean, :desc => 'Enable/Disable interactive sorting'
162
+ method_option :overwrite, :alises => '-o', :required => false, :type => :boolean, :desc => 'Enable/Disable file overwrite.', :lazy_default => true
163
+ method_option :dryrun, :aliases => '-n', :required => false, :type => :boolean, :desc => 'Run without changing something'
164
+ def sort(input)
165
+
166
+ if File.file?(input)
167
+ pdfdoc = Pdfmdsort.new input
168
+ pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
169
+ pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
170
+ pdfdoc.destination = pdfdoc.determineValidSetting(options[:destination], 'sort:destination')
171
+ pdfdoc.overwrite = pdfdoc.determineValidSetting(options[:overwrite], 'sort:overwrite')
172
+ pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun], 'sort:dryrun')
173
+ pdfdoc.sort
174
+ else
175
+
176
+ # Run the actions for all files
177
+ Dir.glob(input.chomp + '/*.pdf').each do |filename|
178
+ pdfdoc = Pdfmdsort.new filename
179
+ pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
180
+ pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
181
+ pdfdoc.destination = pdfdoc.determineValidSetting(options[:destination], 'sort:destination')
182
+ pdfdoc.overwrite = pdfdoc.determineValidSetting(options[:overwrite], 'sort:overwrite')
183
+ pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun], 'sort:dryrun')
184
+ pdfdoc.sort
185
+ end
186
+
187
+ end
188
+
189
+ end
190
+
191
+
192
+ # Rename the file according to the Metadata
193
+ #
194
+ # Scheme: YYYYMMDD-author-subject-keywords.extension
195
+ # this is messing up the logging and creates two different files
196
+ desc 'rename', 'Rename the file according to Metadata'
197
+ long_desc readLongDesc('pdfmd/long_desc.pdfmdrename.txt')
198
+ method_option :dryrun, :type => :boolean, :aliases => '-n', :desc => 'Run without making changes', :required => false
199
+ method_option :allkeywords, :type => :boolean, :aliases => '-a', :desc => 'Add all keywords (no limit)', :required => false, :lazy_default => true
200
+ method_option :nrkeywords, :type => :string, :aliases => '-k', :desc => 'Number of keywords to include (Default: 3)', :required => false
201
+ method_option :outputdir, :aliases => '-o', :type => :string, :desc => 'Specify output directory', :required => false
202
+ method_option :copy, :aliases => '-c', :type => :boolean, :desc => 'Copy instead of moving the file when renaming', :lazy_default => true
203
+ def rename(filename)
204
+
205
+ pdfdoc = Pdfmdrename.new filename
206
+ pdfdoc.dryrun = pdfdoc.determineValidSetting(options[:dryrun],'rename:dryrun')
207
+ pdfdoc.allkeywords = pdfdoc.determineValidSetting(options[:allkeywords],'rename:allkeywords')
208
+ pdfdoc.outputdir = pdfdoc.determineValidSetting(options[:outputdir], 'rename:outputdir')
209
+ if nrkeywords = pdfdoc.determineValidSetting(options[:nrkeywords], 'rename:nrkeywords' )
210
+ pdfdoc.nrkeywords = nrkeywords
211
+ end
212
+ pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'rename:copy')
213
+ pdfdoc.rename
214
+
215
+ end
216
+
217
+ #
218
+ # One parameter to show the current version
219
+ #
220
+ map %w[--version -v] => :__print_version
221
+ desc "--version, -v", 'Show the current script version'
222
+ def __print_version
223
+ puts VERSION
224
+ end
225
+
226
+ map %w[--revision -r] => :__print_revision
227
+ desc "--revision, -r", 'Show the revision of the gem'
228
+ def __print_createdate
229
+ metadata = YAML.load(`gem specification pdfmd metadata`)
230
+ puts metadata['revision']
231
+ end
232
+
233
+ end
234
+
235
+ DOC.start