pdfmd 2.5.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/TODO.mkd +6 -2
- data/bin/pdfmd +23 -14
- data/lib/pdfmd/long_desc.pdfmdstat.txt +14 -1
- data/lib/pdfmd/pdfmdrename.rb +6 -1
- data/lib/pdfmd/pdfmdsort.rb +15 -9
- data/lib/pdfmd/pdfmdstat.rb +26 -3
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4749a93bfd91007699878f1a2e3a6a98b39b7e08
|
4
|
+
data.tar.gz: 8c48f25ec662757a6f4b41e3ac6cb3b275deea11
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c1ca2922304b90752e324a005213823f1e2b1f39eb77f5b35749671e9c0f3c08dbd42d4e52069bb6f444a7bf07182b087b0293f97b009e54ec7825d98b3a15a8
|
7
|
+
data.tar.gz: 122f3d170c6a9988aa522f53d0923168766a4868098f8118045f2b37cbb6624672095651d85fe0c0f945efcd99ed3a3dbc49885dd8303f448f5a2c220e145622
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
# Version 2.6.0
|
2
|
+
- Adding stat export formats.
|
3
|
+
- Bugfix, Abbreviations in keywords are not replaced anymore.
|
4
|
+
- Bugfix, Files in the destination directory are now ignored while sorting and
|
5
|
+
not used in the author-collision calculation any more.
|
6
|
+
- Buxfix, Sorting now also works with directories as input.
|
7
|
+
- Command 'stat': Added parameter to disable the percentage output.
|
8
|
+
|
1
9
|
# Version 2.5.0
|
2
10
|
- Bugfix, Removing output of debugging and empty lines.
|
3
11
|
- Changing edit separation sign from ':' to '='.
|
data/TODO.mkd
CHANGED
@@ -6,8 +6,12 @@
|
|
6
6
|
|
7
7
|
### Method: _stat_
|
8
8
|
* Parameter to ignore differences in upper and lowercase
|
9
|
-
|
10
|
-
|
9
|
+
|
10
|
+
### Method: _rename_
|
11
|
+
* The createdate pattern does not work with yyyy-mm-dd.
|
12
|
+
|
13
|
+
### Method: _sort_
|
14
|
+
* Author values with a slash One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
|
11
15
|
|
12
16
|
## pdfmdedit.rb
|
13
17
|
|
data/bin/pdfmd
CHANGED
@@ -8,7 +8,7 @@ require "fileutils"
|
|
8
8
|
require "i18n"
|
9
9
|
require 'pathname'
|
10
10
|
|
11
|
-
VERSION = '2.
|
11
|
+
VERSION = '2.6.0'
|
12
12
|
NAME = 'pdfmd'
|
13
13
|
|
14
14
|
# Read the content of the long description from an external file
|
@@ -174,6 +174,8 @@ desc 'stat', 'Show metadata statistics of multiple files'
|
|
174
174
|
long_desc readLongDesc 'pdfmd/long_desc.pdfmdstat.txt'
|
175
175
|
option :recursive, :type => :boolean, :aliases => '-r', :desc => 'Include subdirectories recursively.', :lazy_default => true, :required => false
|
176
176
|
option :tags, :aliases => '-t', :type => :string, :desc => 'Define Metatags to run at', :lazy_default => 'author,title,subject,createdate,keywords', :required => false
|
177
|
+
option :format, :aliases => '-f', :type => :string, :desc => 'Define output format.', :required => false, :default => 'yaml', :enum => ['yaml', 'json', 'hash']
|
178
|
+
option :status, :aliases => '-s', :type => :boolean, :desc => 'Show calculation status.', :required => false, :default => true
|
177
179
|
def stat(input)
|
178
180
|
|
179
181
|
filemetadata = Hash.new
|
@@ -189,27 +191,34 @@ def stat(input)
|
|
189
191
|
|
190
192
|
# Count the number of files quickly to show an overview
|
191
193
|
# nooFiles = numberOfFiles
|
192
|
-
nooFiles
|
194
|
+
nooFiles = Dir[File.join(input.chomp, recursive, '*.pdf')].count { |file| File.file?(file) }
|
193
195
|
currentNooFiles = 0
|
194
196
|
Dir.glob("#{input.chomp}#{recursive}/*.pdf").each do |filename|
|
195
197
|
|
196
|
-
# Print percentage
|
197
|
-
|
198
|
-
|
199
|
-
|
198
|
+
# Print percentage if requested
|
199
|
+
if options[:status]
|
200
|
+
currentNooFiles = currentNooFiles + 1
|
201
|
+
percentage = 100 / nooFiles * currentNooFiles
|
202
|
+
print "\r Status: #{percentage} % of #{nooFiles} files processed. "
|
203
|
+
end
|
200
204
|
|
201
|
-
pdfdoc
|
202
|
-
filemetadata
|
205
|
+
pdfdoc = Pdfmd.new filename
|
206
|
+
filemetadata = {}
|
203
207
|
currentOutput[File.basename(filename)] = pdfdoc.metadata.to_s
|
204
|
-
pdfdoc
|
208
|
+
pdfdoc = nil
|
205
209
|
|
206
210
|
end
|
207
|
-
|
208
|
-
|
211
|
+
# Print some linebreak if the status has been shown.
|
212
|
+
# This makes it a bit prettier.
|
213
|
+
if options[:status]
|
214
|
+
puts ''
|
215
|
+
puts ''
|
216
|
+
end
|
209
217
|
|
210
218
|
pdfstat = Pdfmdstat.new(currentOutput)
|
211
219
|
pdfstat.tags options[:tags]
|
212
220
|
pdfstat.analyse_metadata
|
221
|
+
pdfstat.output_metadata(options[:format])
|
213
222
|
|
214
223
|
end
|
215
224
|
|
@@ -229,7 +238,7 @@ method_option :typo, :aliases => '-t', :required => false, :type => :boolean, :d
|
|
229
238
|
def sort(*input)
|
230
239
|
|
231
240
|
input.each do |file|
|
232
|
-
|
241
|
+
|
233
242
|
if File.file?(file)
|
234
243
|
pdfdoc = Pdfmdsort.new file
|
235
244
|
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
|
@@ -243,8 +252,8 @@ def sort(*input)
|
|
243
252
|
|
244
253
|
else
|
245
254
|
|
246
|
-
# Run the actions for all files
|
247
|
-
Dir.glob(input.chomp + '/*.pdf').each do |filename|
|
255
|
+
# Run the actions for all files which are in the directory
|
256
|
+
Dir.glob(input.join.chomp('/') + '/*.pdf').each do |filename|
|
248
257
|
pdfdoc = Pdfmdsort.new filename
|
249
258
|
pdfdoc.copy = pdfdoc.determineValidSetting(options[:copy], 'sort:copy')
|
250
259
|
pdfdoc.interactive = pdfdoc.determineValidSetting(options[:interactive], 'sort:interactive')
|
@@ -14,10 +14,23 @@ Path to the directory containing PDF documents or subdirectories with PDF docume
|
|
14
14
|
Example: `pdfmd stat ~/pdf`
|
15
15
|
|
16
16
|
|
17
|
-
|
17
|
+
-r --recursive
|
18
18
|
|
19
19
|
If set to true, pdfmd includes all PDF documents found in subdirectories of <directory> as well.
|
20
20
|
|
21
21
|
Default: false
|
22
22
|
|
23
23
|
|
24
|
+
-f --format
|
25
|
+
|
26
|
+
Sets alternativ output formats. Valid values are 'hash', 'yaml', 'json'.
|
27
|
+
|
28
|
+
Default: json
|
29
|
+
|
30
|
+
|
31
|
+
-s --status
|
32
|
+
|
33
|
+
Enable/Disable the output during the statistics calculation.
|
34
|
+
|
35
|
+
Default: true
|
36
|
+
|
data/lib/pdfmd/pdfmdrename.rb
CHANGED
@@ -152,6 +152,11 @@ class Pdfmdrename < Pdfmd
|
|
152
152
|
end
|
153
153
|
|
154
154
|
# Get the keywords
|
155
|
+
# This methods is trying in a way to intelligently handle the keywords and
|
156
|
+
# return them back to. While doing this, the abbreviations are also being
|
157
|
+
# taken into account. Wordcombinations on the other hand, that contain some
|
158
|
+
# keywords for the abbreviation, should not be changed.
|
159
|
+
# That's what makes it a bit tricky.
|
155
160
|
def get_keywords(preface = '')
|
156
161
|
|
157
162
|
if !@@metadata['keywords'].empty?
|
@@ -170,7 +175,7 @@ class Pdfmdrename < Pdfmd
|
|
170
175
|
end
|
171
176
|
keyvaluearray = keyvaluearray.sort_by{|size| -size.length}
|
172
177
|
keyvaluearray.each do |keystring|
|
173
|
-
value = value.gsub(
|
178
|
+
value = value.gsub(/^#{keystring.lstrip.chomp}\s?/i, abbreviation.to_s)
|
174
179
|
end
|
175
180
|
end
|
176
181
|
|
data/lib/pdfmd/pdfmdsort.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# == Class: pdfmdsort
|
2
2
|
#
|
3
|
-
# TODO: Author values with a slave One/two should be sorted into one/two/yyyymmdd-one_to-xxx.pdf
|
4
3
|
class Pdfmdsort < Pdfmd
|
5
4
|
|
6
5
|
require 'fuzzystringmatch'
|
@@ -80,14 +79,21 @@ class Pdfmdsort < Pdfmd
|
|
80
79
|
# Get all subfolders
|
81
80
|
subDirectories = Dir[@destination + '/*']
|
82
81
|
subDirectories.each do |fullPathFolder|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
82
|
+
|
83
|
+
# Match only directories, not any files that might be in the target directory
|
84
|
+
if !File.directory?(fullPathFolder)
|
85
|
+
|
86
|
+
stringSimilarity = fuzzy.getDistance(
|
87
|
+
fullPathFolder.gsub(@destination + '/', ''),
|
88
|
+
targetdir.gsub(@destination + '/', '')
|
89
|
+
)
|
90
|
+
if stringSimilarity > @stringSimBorder
|
91
|
+
self.log('debug', "findSimilarTargetdir: Found String value #{stringSimilarity.to_s} for target '#{fullPathFolder}'.")
|
92
|
+
returnValue = fullPathFolder
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
91
97
|
end
|
92
98
|
returnValue
|
93
99
|
end
|
data/lib/pdfmd/pdfmdstat.rb
CHANGED
@@ -11,8 +11,10 @@ class Pdfmdstat
|
|
11
11
|
|
12
12
|
attr_accessor :metadata
|
13
13
|
|
14
|
-
|
15
|
-
@
|
14
|
+
# Instancevariables
|
15
|
+
@statdata = {}
|
16
|
+
@hieradata = {}
|
17
|
+
@metadata_hash = {} # Keeps the metadata ones it's available
|
16
18
|
|
17
19
|
def initialize(metadata)
|
18
20
|
|
@@ -110,8 +112,29 @@ class Pdfmdstat
|
|
110
112
|
|
111
113
|
end
|
112
114
|
|
113
|
-
|
115
|
+
# Load the class variable with the metadata
|
116
|
+
@metadata_hash = sortedOutputHash
|
114
117
|
|
115
118
|
end
|
116
119
|
|
120
|
+
# Output the metadata in multiple format
|
121
|
+
# Default: yaml
|
122
|
+
#
|
123
|
+
# else:
|
124
|
+
# json
|
125
|
+
def output_metadata(format = 'yaml')
|
126
|
+
|
127
|
+
case format
|
128
|
+
when 'json'
|
129
|
+
require 'json'
|
130
|
+
puts @metadata_hash.to_json
|
131
|
+
when 'hash'
|
132
|
+
puts @metadata_hash
|
133
|
+
else
|
134
|
+
puts @metadata_hash.to_yaml.gsub(/---\n/,'')
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
|
117
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfmd
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Roos
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -174,8 +174,8 @@ homepage: https://github.com/Micronarrativ/ruby-pmd
|
|
174
174
|
licenses:
|
175
175
|
- MIT
|
176
176
|
metadata:
|
177
|
-
created: '2016-
|
178
|
-
revision: '
|
177
|
+
created: '2016-10-14 21:27:32'
|
178
|
+
revision: '20161014212732'
|
179
179
|
post_install_message: ". Run `pdfmd` to see the command help."
|
180
180
|
rdoc_options: []
|
181
181
|
require_paths:
|