pdfmd 2.5.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 970844407051f137b893c42e9702e1a51a7558e8
4
- data.tar.gz: a0f7daf1d0fa37e297f37650b087181a3ae64402
3
+ metadata.gz: 4749a93bfd91007699878f1a2e3a6a98b39b7e08
4
+ data.tar.gz: 8c48f25ec662757a6f4b41e3ac6cb3b275deea11
5
5
  SHA512:
6
- metadata.gz: 2b36151807b90b6474065900c29414b4ff95b74adb9a956ae56b1fab5439628f64994091a654ccf51d5bd255abffc072c90fed9edeb1b06e3d8043601dba905e
7
- data.tar.gz: 6ee948b86f665d1ae105083fb0075d6e25cd0fde22f69d9172041da0b0e9041c57f2a90a9cec6556f2ef088f710f35f756e9e5ef907361a7f96f346aed8e07d8
6
+ metadata.gz: c1ca2922304b90752e324a005213823f1e2b1f39eb77f5b35749671e9c0f3c08dbd42d4e52069bb6f444a7bf07182b087b0293f97b009e54ec7825d98b3a15a8
7
+ data.tar.gz: 122f3d170c6a9988aa522f53d0923168766a4868098f8118045f2b37cbb6624672095651d85fe0c0f945efcd99ed3a3dbc49885dd8303f448f5a2c220e145622
@@ -1,3 +1,11 @@
1
+ # Version 2.6.0
2
+ - Adding stat export formats.
3
+ - Bugfix, Abbreviations in keywords are not replaced anymore.
4
+ - Bugfix, Files in the destination directory are now ignored while sorting and
5
+ not used in the author-collision calculation any more.
6
+ - Buxfix, Sorting now also works with directories as input.
7
+ - Command 'stat': Added parameter to disable the percentage output.
8
+
1
9
  # Version 2.5.0
2
10
  - Bugfix, Removing output of debugging and empty lines.
3
11
  - Changing edit separation sign from ':' to '='.
data/TODO.mkd CHANGED
@@ -6,8 +6,12 @@
6
6
 
7
7
  ### Method: _stat_
8
8
  * Parameter to ignore differences in upper and lowercase
9
- * Parameter to disable percentage output
10
- * Parameter to set output format: json,yaml, hash
9
+
10
+ ### Method: _rename_
11
+ * The createdate pattern does not work with yyyy-mm-dd.
12
+
13
+ ### Method: _sort_
14
+ * Author values with a slash One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
11
15
 
12
16
  ## pdfmdedit.rb
13
17
 
data/bin/pdfmd CHANGED
@@ -8,7 +8,7 @@ require "fileutils"
8
8
  require "i18n"
9
9
  require 'pathname'
10
10
 
11
- VERSION = '2.5.0'
11
+ VERSION = '2.6.0'
12
12
  NAME = 'pdfmd'
13
13
 
14
14
  # Read the content of the long description from an external file
@@ -174,6 +174,8 @@ desc 'stat', 'Show metadata statistics of multiple files'
174
174
  long_desc readLongDesc 'pdfmd/long_desc.pdfmdstat.txt'
175
175
  option :recursive, :type => :boolean, :aliases => '-r', :desc => 'Include subdirectories recursively.', :lazy_default => true, :required => false
176
176
  option :tags, :aliases => '-t', :type => :string, :desc => 'Define Metatags to run at', :lazy_default => 'author,title,subject,createdate,keywords', :required => false
177
+ option :format, :aliases => '-f', :type => :string, :desc => 'Define output format.', :required => false, :default => 'yaml', :enum => ['yaml', 'json', 'hash']
178
+ option :status, :aliases => '-s', :type => :boolean, :desc => 'Show calculation status.', :required => false, :default => true
177
179
  def stat(input)
178
180
 
179
181
  filemetadata = Hash.new
@@ -189,27 +191,34 @@ def stat(input)
189
191
 
190
192
  # Count the number of files quickly to show an overview
191
193
  # nooFiles = numberOfFiles
192
- nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
194
+ nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
193
195
  currentNooFiles = 0
194
196
  Dir.glob("#{input.chomp}#{recursive}/*.pdf").each do |filename|
195
197
 
196
- # Print percentage
197
- currentNooFiles = currentNooFiles + 1
198
- percentage = 100 / nooFiles * currentNooFiles
199
- print "\r Status: #{percentage} % of #{nooFiles} files processed. "
198
+ # Print percentage if requested
199
+ if options[:status]
200
+ currentNooFiles = currentNooFiles + 1
201
+ percentage = 100 / nooFiles * currentNooFiles
202
+ print "\r Status: #{percentage} % of #{nooFiles} files processed. "
203
+ end
200
204
 
201
- pdfdoc = Pdfmd.new filename
202
- filemetadata = {}
205
+ pdfdoc = Pdfmd.new filename
206
+ filemetadata = {}
203
207
  currentOutput[File.basename(filename)] = pdfdoc.metadata.to_s
204
- pdfdoc = nil
208
+ pdfdoc = nil
205
209
 
206
210
  end
207
- puts ''
208
- puts ''
211
+ # Print some linebreak if the status has been shown.
212
+ # This makes it a bit prettier.
213
+ if options[:status]
214
+ puts ''
215
+ puts ''
216
+ end
209
217
 
210
218
  pdfstat = Pdfmdstat.new(currentOutput)
211
219
  pdfstat.tags options[:tags]
212
220
  pdfstat.analyse_metadata
221
+ pdfstat.output_metadata(options[:format])
213
222
 
214
223
  end
215
224
 
@@ -229,7 +238,7 @@ method_option :typo, :aliases => '-t', :required => false, :type => :boolean, :d
229
238
  def sort(*input)
230
239
 
231
240
  input.each do |file|
232
-
241
+
233
242
  if File.file?(file)
234
243
  pdfdoc = Pdfmdsort.new file
235
244
  pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
@@ -243,8 +252,8 @@ def sort(*input)
243
252
 
244
253
  else
245
254
 
246
- # Run the actions for all files
247
- Dir.glob(input.chomp + '/*.pdf').each do |filename|
255
+ # Run the actions for all files which are in the directory
256
+ Dir.glob(input.join.chomp('/') + '/*.pdf').each do |filename|
248
257
  pdfdoc = Pdfmdsort.new filename
249
258
  pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
250
259
  pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
@@ -14,10 +14,23 @@ Path to the directory containing PDF documents or subdirectories with PDF docume
14
14
  Example: `pdfmd stat ~/pdf`
15
15
 
16
16
 
17
- --r --recursive
17
+ -r --recursive
18
18
 
19
19
  If set to true, pdfmd includes all PDF documents found in subdirectories of <directory> as well.
20
20
 
21
21
  Default: false
22
22
 
23
23
 
24
+ -f --format
25
+
26
+ Sets alternativ output formats. Valid values are 'hash', 'yaml', 'json'.
27
+
28
+ Default: json
29
+
30
+
31
+ -s --status
32
+
33
+ Enable/Disable the output during the statistics calculation.
34
+
35
+ Default: true
36
+
@@ -152,6 +152,11 @@ class Pdfmdrename < Pdfmd
152
152
  end
153
153
 
154
154
  # Get the keywords
155
+ # This methods is trying in a way to intelligently handle the keywords and
156
+ # return them back to. While doing this, the abbreviations are also being
157
+ # taken into account. Wordcombinations on the other hand, that contain some
158
+ # keywords for the abbreviation, should not be changed.
159
+ # That's what makes it a bit tricky.
155
160
  def get_keywords(preface = '')
156
161
 
157
162
  if !@@metadata['keywords'].empty?
@@ -170,7 +175,7 @@ class Pdfmdrename < Pdfmd
170
175
  end
171
176
  keyvaluearray = keyvaluearray.sort_by{|size| -size.length}
172
177
  keyvaluearray.each do |keystring|
173
- value = value.gsub(/#{keystring.lstrip.chomp}\s?/i, abbreviation.to_s)
178
+ value = value.gsub(/^#{keystring.lstrip.chomp}\s?/i, abbreviation.to_s)
174
179
  end
175
180
  end
176
181
 
@@ -1,6 +1,5 @@
1
1
  # == Class: pdfmdsort
2
2
  #
3
- # TODO: Author values with a slave One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
4
3
  class Pdfmdsort < Pdfmd
5
4
 
6
5
  require 'fuzzystringmatch'
@@ -80,14 +79,21 @@ class Pdfmdsort < Pdfmd
80
79
  # Get all subfolders
81
80
  subDirectories = Dir[@destination + '/*']
82
81
  subDirectories.each do |fullPathFolder|
83
- stringSimilarity = fuzzy.getDistance(
84
- fullPathFolder.gsub(@destination + '/', ''),
85
- targetdir.gsub(@destination + '/', '')
86
- )
87
- if stringSimilarity > @stringSimBorder
88
- self.log('debug', "findSimilarTargetdir: Found String value #{stringSimilarity.to_s} for target '#{fullPathFolder}'.")
89
- returnValue = fullPathFolder
90
- end
82
+
83
+ # Match only directories, not any files that might be in the target directory
84
+ if !File.directory?(fullPathFolder)
85
+
86
+ stringSimilarity = fuzzy.getDistance(
87
+ fullPathFolder.gsub(@destination + '/', ''),
88
+ targetdir.gsub(@destination + '/', '')
89
+ )
90
+ if stringSimilarity > @stringSimBorder
91
+ self.log('debug', "findSimilarTargetdir: Found String value #{stringSimilarity.to_s} for target '#{fullPathFolder}'.")
92
+ returnValue = fullPathFolder
93
+ end
94
+
95
+ end
96
+
91
97
  end
92
98
  returnValue
93
99
  end
@@ -11,8 +11,10 @@ class Pdfmdstat
11
11
 
12
12
  attr_accessor :metadata
13
13
 
14
- @statdata = {}
15
- @hieradata = {}
14
+ # Instancevariables
15
+ @statdata = {}
16
+ @hieradata = {}
17
+ @metadata_hash = {} # Keeps the metadata ones it's available
16
18
 
17
19
  def initialize(metadata)
18
20
 
@@ -110,8 +112,29 @@ class Pdfmdstat
110
112
 
111
113
  end
112
114
 
113
- puts sortedOutputHash.to_yaml.gsub(/---\n/,'')
115
+ # Load the class variable with the metadata
116
+ @metadata_hash = sortedOutputHash
114
117
 
115
118
  end
116
119
 
120
+ # Output the metadata in multiple format
121
+ # Default: yaml
122
+ #
123
+ # else:
124
+ # json
125
+ def output_metadata(format = 'yaml')
126
+
127
+ case format
128
+ when 'json'
129
+ require 'json'
130
+ puts @metadata_hash.to_json
131
+ when 'hash'
132
+ puts @metadata_hash
133
+ else
134
+ puts @metadata_hash.to_yaml.gsub(/---\n/,'')
135
+ end
136
+
137
+ end
138
+
139
+
117
140
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfmd
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.0
4
+ version: 2.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Roos
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-09-25 00:00:00.000000000 Z
11
+ date: 2016-10-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -174,8 +174,8 @@ homepage: https://github.com/Micronarrativ/ruby-pmd
174
174
  licenses:
175
175
  - MIT
176
176
  metadata:
177
- created: '2016-09-25 20:19:06'
178
- revision: '20160925201906'
177
+ created: '2016-10-14 21:27:32'
178
+ revision: '20161014212732'
179
179
  post_install_message: ". Run `pdfmd` to see the command help."
180
180
  rdoc_options: []
181
181
  require_paths: