pdfmd 2.5.0 → 2.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 970844407051f137b893c42e9702e1a51a7558e8
4
- data.tar.gz: a0f7daf1d0fa37e297f37650b087181a3ae64402
3
+ metadata.gz: 4749a93bfd91007699878f1a2e3a6a98b39b7e08
4
+ data.tar.gz: 8c48f25ec662757a6f4b41e3ac6cb3b275deea11
5
5
  SHA512:
6
- metadata.gz: 2b36151807b90b6474065900c29414b4ff95b74adb9a956ae56b1fab5439628f64994091a654ccf51d5bd255abffc072c90fed9edeb1b06e3d8043601dba905e
7
- data.tar.gz: 6ee948b86f665d1ae105083fb0075d6e25cd0fde22f69d9172041da0b0e9041c57f2a90a9cec6556f2ef088f710f35f756e9e5ef907361a7f96f346aed8e07d8
6
+ metadata.gz: c1ca2922304b90752e324a005213823f1e2b1f39eb77f5b35749671e9c0f3c08dbd42d4e52069bb6f444a7bf07182b087b0293f97b009e54ec7825d98b3a15a8
7
+ data.tar.gz: 122f3d170c6a9988aa522f53d0923168766a4868098f8118045f2b37cbb6624672095651d85fe0c0f945efcd99ed3a3dbc49885dd8303f448f5a2c220e145622
@@ -1,3 +1,11 @@
1
+ # Version 2.6.0
2
+ - Adding stat export formats.
3
+ - Bugfix, Abbreviations in keywords are not replaced anymore.
4
+ - Bugfix, Files in the destination directory are now ignored while sorting and
5
+ not used in the author-collision calculation any more.
6
+ - Buxfix, Sorting now also works with directories as input.
7
+ - Command 'stat': Added parameter to disable the percentage output.
8
+
1
9
  # Version 2.5.0
2
10
  - Bugfix, Removing output of debugging and empty lines.
3
11
  - Changing edit separation sign from ':' to '='.
data/TODO.mkd CHANGED
@@ -6,8 +6,12 @@
6
6
 
7
7
  ### Method: _stat_
8
8
  * Parameter to ignore differences in upper and lowercase
9
- * Parameter to disable percentage output
10
- * Parameter to set output format: json,yaml, hash
9
+
10
+ ### Method: _rename_
11
+ * The createdate pattern does not work with yyyy-mm-dd.
12
+
13
+ ### Method: _sort_
14
+ * Author values with a slash One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
11
15
 
12
16
  ## pdfmdedit.rb
13
17
 
data/bin/pdfmd CHANGED
@@ -8,7 +8,7 @@ require "fileutils"
8
8
  require "i18n"
9
9
  require 'pathname'
10
10
 
11
- VERSION = '2.5.0'
11
+ VERSION = '2.6.0'
12
12
  NAME = 'pdfmd'
13
13
 
14
14
  # Read the content of the long description from an external file
@@ -174,6 +174,8 @@ desc 'stat', 'Show metadata statistics of multiple files'
174
174
  long_desc readLongDesc 'pdfmd/long_desc.pdfmdstat.txt'
175
175
  option :recursive, :type => :boolean, :aliases => '-r', :desc => 'Include subdirectories recursively.', :lazy_default => true, :required => false
176
176
  option :tags, :aliases => '-t', :type => :string, :desc => 'Define Metatags to run at', :lazy_default => 'author,title,subject,createdate,keywords', :required => false
177
+ option :format, :aliases => '-f', :type => :string, :desc => 'Define output format.', :required => false, :default => 'yaml', :enum => ['yaml', 'json', 'hash']
178
+ option :status, :aliases => '-s', :type => :boolean, :desc => 'Show calculation status.', :required => false, :default => true
177
179
  def stat(input)
178
180
 
179
181
  filemetadata = Hash.new
@@ -189,27 +191,34 @@ def stat(input)
189
191
 
190
192
  # Count the number of files quickly to show an overview
191
193
  # nooFiles = numberOfFiles
192
- nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
194
+ nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
193
195
  currentNooFiles = 0
194
196
  Dir.glob("#{input.chomp}#{recursive}/*.pdf").each do |filename|
195
197
 
196
- # Print percentage
197
- currentNooFiles = currentNooFiles + 1
198
- percentage = 100 / nooFiles * currentNooFiles
199
- print "\r Status: #{percentage} % of #{nooFiles} files processed. "
198
+ # Print percentage if requested
199
+ if options[:status]
200
+ currentNooFiles = currentNooFiles + 1
201
+ percentage = 100 / nooFiles * currentNooFiles
202
+ print "\r Status: #{percentage} % of #{nooFiles} files processed. "
203
+ end
200
204
 
201
- pdfdoc = Pdfmd.new filename
202
- filemetadata = {}
205
+ pdfdoc = Pdfmd.new filename
206
+ filemetadata = {}
203
207
  currentOutput[File.basename(filename)] = pdfdoc.metadata.to_s
204
- pdfdoc = nil
208
+ pdfdoc = nil
205
209
 
206
210
  end
207
- puts ''
208
- puts ''
211
+ # Print some linebreak if the status has been shown.
212
+ # This makes it a bit prettier.
213
+ if options[:status]
214
+ puts ''
215
+ puts ''
216
+ end
209
217
 
210
218
  pdfstat = Pdfmdstat.new(currentOutput)
211
219
  pdfstat.tags options[:tags]
212
220
  pdfstat.analyse_metadata
221
+ pdfstat.output_metadata(options[:format])
213
222
 
214
223
  end
215
224
 
@@ -229,7 +238,7 @@ method_option :typo, :aliases => '-t', :required => false, :type => :boolean, :d
229
238
  def sort(*input)
230
239
 
231
240
  input.each do |file|
232
-
241
+
233
242
  if File.file?(file)
234
243
  pdfdoc = Pdfmdsort.new file
235
244
  pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
@@ -243,8 +252,8 @@ def sort(*input)
243
252
 
244
253
  else
245
254
 
246
- # Run the actions for all files
247
- Dir.glob(input.chomp + '/*.pdf').each do |filename|
255
+ # Run the actions for all files which are in the directory
256
+ Dir.glob(input.join.chomp('/') + '/*.pdf').each do |filename|
248
257
  pdfdoc = Pdfmdsort.new filename
249
258
  pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
250
259
  pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
@@ -14,10 +14,23 @@ Path to the directory containing PDF documents or subdirectories with PDF docume
14
14
  Example: `pdfmd stat ~/pdf`
15
15
 
16
16
 
17
- --r --recursive
17
+ -r --recursive
18
18
 
19
19
  If set to true, pdfmd includes all PDF documents found in subdirectories of <directory> as well.
20
20
 
21
21
  Default: false
22
22
 
23
23
 
24
+ -f --format
25
+
26
+ Sets alternativ output formats. Valid values are 'hash', 'yaml', 'json'.
27
+
28
+ Default: json
29
+
30
+
31
+ -s --status
32
+
33
+ Enable/Disable the output during the statistics calculation.
34
+
35
+ Default: true
36
+
@@ -152,6 +152,11 @@ class Pdfmdrename < Pdfmd
152
152
  end
153
153
 
154
154
  # Get the keywords
155
+ # This methods is trying in a way to intelligently handle the keywords and
156
+ # return them back to. While doing this, the abbreviations are also being
157
+ # taken into account. Wordcombinations on the other hand, that contain some
158
+ # keywords for the abbreviation, should not be changed.
159
+ # That's what makes it a bit tricky.
155
160
  def get_keywords(preface = '')
156
161
 
157
162
  if !@@metadata['keywords'].empty?
@@ -170,7 +175,7 @@ class Pdfmdrename < Pdfmd
170
175
  end
171
176
  keyvaluearray = keyvaluearray.sort_by{|size| -size.length}
172
177
  keyvaluearray.each do |keystring|
173
- value = value.gsub(/#{keystring.lstrip.chomp}\s?/i, abbreviation.to_s)
178
+ value = value.gsub(/^#{keystring.lstrip.chomp}\s?/i, abbreviation.to_s)
174
179
  end
175
180
  end
176
181
 
@@ -1,6 +1,5 @@
1
1
  # == Class: pdfmdsort
2
2
  #
3
- # TODO: Author values with a slave One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
4
3
  class Pdfmdsort < Pdfmd
5
4
 
6
5
  require 'fuzzystringmatch'
@@ -80,14 +79,21 @@ class Pdfmdsort < Pdfmd
80
79
  # Get all subfolders
81
80
  subDirectories = Dir[@destination + '/*']
82
81
  subDirectories.each do |fullPathFolder|
83
- stringSimilarity = fuzzy.getDistance(
84
- fullPathFolder.gsub(@destination + '/', ''),
85
- targetdir.gsub(@destination + '/', '')
86
- )
87
- if stringSimilarity > @stringSimBorder
88
- self.log('debug', "findSimilarTargetdir: Found String value #{stringSimilarity.to_s} for target '#{fullPathFolder}'.")
89
- returnValue = fullPathFolder
90
- end
82
+
83
+ # Match only directories, not any files that might be in the target directory
84
+ if !File.directory?(fullPathFolder)
85
+
86
+ stringSimilarity = fuzzy.getDistance(
87
+ fullPathFolder.gsub(@destination + '/', ''),
88
+ targetdir.gsub(@destination + '/', '')
89
+ )
90
+ if stringSimilarity > @stringSimBorder
91
+ self.log('debug', "findSimilarTargetdir: Found String value #{stringSimilarity.to_s} for target '#{fullPathFolder}'.")
92
+ returnValue = fullPathFolder
93
+ end
94
+
95
+ end
96
+
91
97
  end
92
98
  returnValue
93
99
  end
@@ -11,8 +11,10 @@ class Pdfmdstat
11
11
 
12
12
  attr_accessor :metadata
13
13
 
14
- @statdata = {}
15
- @hieradata = {}
14
+ # Instancevariables
15
+ @statdata = {}
16
+ @hieradata = {}
17
+ @metadata_hash = {} # Keeps the metadata ones it's available
16
18
 
17
19
  def initialize(metadata)
18
20
 
@@ -110,8 +112,29 @@ class Pdfmdstat
110
112
 
111
113
  end
112
114
 
113
- puts sortedOutputHash.to_yaml.gsub(/---\n/,'')
115
+ # Load the class variable with the metadata
116
+ @metadata_hash = sortedOutputHash
114
117
 
115
118
  end
116
119
 
120
+ # Output the metadata in multiple format
121
+ # Default: yaml
122
+ #
123
+ # else:
124
+ # json
125
+ def output_metadata(format = 'yaml')
126
+
127
+ case format
128
+ when 'json'
129
+ require 'json'
130
+ puts @metadata_hash.to_json
131
+ when 'hash'
132
+ puts @metadata_hash
133
+ else
134
+ puts @metadata_hash.to_yaml.gsub(/---\n/,'')
135
+ end
136
+
137
+ end
138
+
139
+
117
140
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfmd
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.0
4
+ version: 2.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Roos
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-25 00:00:00.000000000 Z
11
+ date: 2016-10-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -174,8 +174,8 @@ homepage: https://github.com/Micronarrativ/ruby-pmd
174
174
  licenses:
175
175
  - MIT
176
176
  metadata:
177
- created: '2016-09-25 20:19:06'
178
- revision: '20160925201906'
177
+ created: '2016-10-14 21:27:32'
178
+ revision: '20161014212732'
179
179
  post_install_message: ". Run `pdfmd` to see the command help."
180
180
  rdoc_options: []
181
181
  require_paths: