pdfmd 2.5.0 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/TODO.mkd +6 -2
- data/bin/pdfmd +23 -14
- data/lib/pdfmd/long_desc.pdfmdstat.txt +14 -1
- data/lib/pdfmd/pdfmdrename.rb +6 -1
- data/lib/pdfmd/pdfmdsort.rb +15 -9
- data/lib/pdfmd/pdfmdstat.rb +26 -3
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4749a93bfd91007699878f1a2e3a6a98b39b7e08
|
4
|
+
data.tar.gz: 8c48f25ec662757a6f4b41e3ac6cb3b275deea11
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c1ca2922304b90752e324a005213823f1e2b1f39eb77f5b35749671e9c0f3c08dbd42d4e52069bb6f444a7bf07182b087b0293f97b009e54ec7825d98b3a15a8
|
7
|
+
data.tar.gz: 122f3d170c6a9988aa522f53d0923168766a4868098f8118045f2b37cbb6624672095651d85fe0c0f945efcd99ed3a3dbc49885dd8303f448f5a2c220e145622
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
# Version 2.6.0
|
2
|
+
- Adding stat export formats.
|
3
|
+
- Bugfix, Abbreviations in keywords are not replaced anymore.
|
4
|
+
- Bugfix, Files in the destination directory are now ignored while sorting and
|
5
|
+
not used in the author-collision calculation any more.
|
6
|
+
- Buxfix, Sorting now also works with directories as input.
|
7
|
+
- Command 'stat': Added parameter to disable the percentage output.
|
8
|
+
|
1
9
|
# Version 2.5.0
|
2
10
|
- Bugfix, Removing output of debugging and empty lines.
|
3
11
|
- Changing edit separation sign from ':' to '='.
|
data/TODO.mkd
CHANGED
@@ -6,8 +6,12 @@
|
|
6
6
|
|
7
7
|
### Method: _stat_
|
8
8
|
* Parameter to ignore differences in upper and lowercase
|
9
|
-
|
10
|
-
|
9
|
+
|
10
|
+
### Method: _rename_
|
11
|
+
* The createdate pattern does not work with yyyy-mm-dd.
|
12
|
+
|
13
|
+
### Method: _sort_
|
14
|
+
* Author values with a slash One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
|
11
15
|
|
12
16
|
## pdfmdedit.rb
|
13
17
|
|
data/bin/pdfmd
CHANGED
@@ -8,7 +8,7 @@ require "fileutils"
|
|
8
8
|
require "i18n"
|
9
9
|
require 'pathname'
|
10
10
|
|
11
|
-
VERSION = '2.
|
11
|
+
VERSION = '2.6.0'
|
12
12
|
NAME = 'pdfmd'
|
13
13
|
|
14
14
|
# Read the content of the long description from an external file
|
@@ -174,6 +174,8 @@ desc 'stat', 'Show metadata statistics of multiple files'
|
|
174
174
|
long_desc readLongDesc 'pdfmd/long_desc.pdfmdstat.txt'
|
175
175
|
option :recursive, :type => :boolean, :aliases => '-r', :desc => 'Include subdirectories recursively.', :lazy_default => true, :required => false
|
176
176
|
option :tags, :aliases => '-t', :type => :string, :desc => 'Define Metatags to run at', :lazy_default => 'author,title,subject,createdate,keywords', :required => false
|
177
|
+
option :format, :aliases => '-f', :type => :string, :desc => 'Define output format.', :required => false, :default => 'yaml', :enum => ['yaml', 'json', 'hash']
|
178
|
+
option :status, :aliases => '-s', :type => :boolean, :desc => 'Show calculation status.', :required => false, :default => true
|
177
179
|
def stat(input)
|
178
180
|
|
179
181
|
filemetadata = Hash.new
|
@@ -189,27 +191,34 @@ def stat(input)
|
|
189
191
|
|
190
192
|
# Count the number of files quickly to show an overview
|
191
193
|
# nooFiles = numberOfFiles
|
192
|
-
nooFiles
|
194
|
+
nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
|
193
195
|
currentNooFiles = 0
|
194
196
|
Dir.glob("#{input.chomp}#{recursive}/*.pdf").each do |filename|
|
195
197
|
|
196
|
-
# Print percentage
|
197
|
-
|
198
|
-
|
199
|
-
|
198
|
+
# Print percentage if requested
|
199
|
+
if options[:status]
|
200
|
+
currentNooFiles = currentNooFiles + 1
|
201
|
+
percentage = 100 / nooFiles * currentNooFiles
|
202
|
+
print "\r Status: #{percentage} % of #{nooFiles} files processed. "
|
203
|
+
end
|
200
204
|
|
201
|
-
pdfdoc
|
202
|
-
filemetadata
|
205
|
+
pdfdoc = Pdfmd.new filename
|
206
|
+
filemetadata = {}
|
203
207
|
currentOutput[File.basename(filename)] = pdfdoc.metadata.to_s
|
204
|
-
pdfdoc
|
208
|
+
pdfdoc = nil
|
205
209
|
|
206
210
|
end
|
207
|
-
|
208
|
-
|
211
|
+
# Print some linebreak if the status has been shown.
|
212
|
+
# This makes it a bit prettier.
|
213
|
+
if options[:status]
|
214
|
+
puts ''
|
215
|
+
puts ''
|
216
|
+
end
|
209
217
|
|
210
218
|
pdfstat = Pdfmdstat.new(currentOutput)
|
211
219
|
pdfstat.tags options[:tags]
|
212
220
|
pdfstat.analyse_metadata
|
221
|
+
pdfstat.output_metadata(options[:format])
|
213
222
|
|
214
223
|
end
|
215
224
|
|
@@ -229,7 +238,7 @@ method_option :typo, :aliases => '-t', :required => false, :type => :boolean, :d
|
|
229
238
|
def sort(*input)
|
230
239
|
|
231
240
|
input.each do |file|
|
232
|
-
|
241
|
+
|
233
242
|
if File.file?(file)
|
234
243
|
pdfdoc = Pdfmdsort.new file
|
235
244
|
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
|
@@ -243,8 +252,8 @@ def sort(*input)
|
|
243
252
|
|
244
253
|
else
|
245
254
|
|
246
|
-
# Run the actions for all files
|
247
|
-
Dir.glob(input.chomp + '/*.pdf').each do |filename|
|
255
|
+
# Run the actions for all files which are in the directory
|
256
|
+
Dir.glob(input.join.chomp('/') + '/*.pdf').each do |filename|
|
248
257
|
pdfdoc = Pdfmdsort.new filename
|
249
258
|
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
|
250
259
|
pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
|
@@ -14,10 +14,23 @@ Path to the directory containing PDF documents or subdirectories with PDF docume
|
|
14
14
|
Example: `pdfmd stat ~/pdf`
|
15
15
|
|
16
16
|
|
17
|
-
|
17
|
+
-r --recursive
|
18
18
|
|
19
19
|
If set to true, pdfmd includes all PDF documents found in subdirectories of <directory> as well.
|
20
20
|
|
21
21
|
Default: false
|
22
22
|
|
23
23
|
|
24
|
+
-f --format
|
25
|
+
|
26
|
+
Sets alternativ output formats. Valid values are 'hash', 'yaml', 'json'.
|
27
|
+
|
28
|
+
Default: json
|
29
|
+
|
30
|
+
|
31
|
+
-s --status
|
32
|
+
|
33
|
+
Enable/Disable the output during the statistics calculation.
|
34
|
+
|
35
|
+
Default: true
|
36
|
+
|
data/lib/pdfmd/pdfmdrename.rb
CHANGED
@@ -152,6 +152,11 @@ class Pdfmdrename < Pdfmd
|
|
152
152
|
end
|
153
153
|
|
154
154
|
# Get the keywords
|
155
|
+
# This methods is trying in a way to intelligently handle the keywords and
|
156
|
+
# return them back to. While doing this, the abbreviations are also being
|
157
|
+
# taken into account. Wordcombinations on the other hand, that contain some
|
158
|
+
# keywords for the abbreviation, should not be changed.
|
159
|
+
# That's what makes it a bit tricky.
|
155
160
|
def get_keywords(preface = '')
|
156
161
|
|
157
162
|
if !@@metadata['keywords'].empty?
|
@@ -170,7 +175,7 @@ class Pdfmdrename < Pdfmd
|
|
170
175
|
end
|
171
176
|
keyvaluearray = keyvaluearray.sort_by{|size| -size.length}
|
172
177
|
keyvaluearray.each do |keystring|
|
173
|
-
value = value.gsub(
|
178
|
+
value = value.gsub(/^#{keystring.lstrip.chomp}\s?/i, abbreviation.to_s)
|
174
179
|
end
|
175
180
|
end
|
176
181
|
|
data/lib/pdfmd/pdfmdsort.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# == Class: pdfmdsort
|
2
2
|
#
|
3
|
-
# TODO: Author values with a slave One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
|
4
3
|
class Pdfmdsort < Pdfmd
|
5
4
|
|
6
5
|
require 'fuzzystringmatch'
|
@@ -80,14 +79,21 @@ class Pdfmdsort < Pdfmd
|
|
80
79
|
# Get all subfolders
|
81
80
|
subDirectories = Dir[@destination + '/*']
|
82
81
|
subDirectories.each do |fullPathFolder|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
82
|
+
|
83
|
+
# Match only directories, not any files that might be in the target directory
|
84
|
+
if !File.directory?(fullPathFolder)
|
85
|
+
|
86
|
+
stringSimilarity = fuzzy.getDistance(
|
87
|
+
fullPathFolder.gsub(@destination + '/', ''),
|
88
|
+
targetdir.gsub(@destination + '/', '')
|
89
|
+
)
|
90
|
+
if stringSimilarity > @stringSimBorder
|
91
|
+
self.log('debug', "findSimilarTargetdir: Found String value #{stringSimilarity.to_s} for target '#{fullPathFolder}'.")
|
92
|
+
returnValue = fullPathFolder
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
91
97
|
end
|
92
98
|
returnValue
|
93
99
|
end
|
data/lib/pdfmd/pdfmdstat.rb
CHANGED
@@ -11,8 +11,10 @@ class Pdfmdstat
|
|
11
11
|
|
12
12
|
attr_accessor :metadata
|
13
13
|
|
14
|
-
|
15
|
-
@
|
14
|
+
# Instancevariables
|
15
|
+
@statdata = {}
|
16
|
+
@hieradata = {}
|
17
|
+
@metadata_hash = {} # Keeps the metadata ones it's available
|
16
18
|
|
17
19
|
def initialize(metadata)
|
18
20
|
|
@@ -110,8 +112,29 @@ class Pdfmdstat
|
|
110
112
|
|
111
113
|
end
|
112
114
|
|
113
|
-
|
115
|
+
# Load the class variable with the metadata
|
116
|
+
@metadata_hash = sortedOutputHash
|
114
117
|
|
115
118
|
end
|
116
119
|
|
120
|
+
# Output the metadata in multiple format
|
121
|
+
# Default: yaml
|
122
|
+
#
|
123
|
+
# else:
|
124
|
+
# json
|
125
|
+
def output_metadata(format = 'yaml')
|
126
|
+
|
127
|
+
case format
|
128
|
+
when 'json'
|
129
|
+
require 'json'
|
130
|
+
puts @metadata_hash.to_json
|
131
|
+
when 'hash'
|
132
|
+
puts @metadata_hash
|
133
|
+
else
|
134
|
+
puts @metadata_hash.to_yaml.gsub(/---\n/,'')
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
|
117
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfmd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Roos
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -174,8 +174,8 @@ homepage: https://github.com/Micronarrativ/ruby-pmd
|
|
174
174
|
licenses:
|
175
175
|
- MIT
|
176
176
|
metadata:
|
177
|
-
created: '2016-
|
178
|
-
revision: '
|
177
|
+
created: '2016-10-14 21:27:32'
|
178
|
+
revision: '20161014212732'
|
179
179
|
post_install_message: ". Run `pdfmd` to see the command help."
|
180
180
|
rdoc_options: []
|
181
181
|
require_paths:
|