dir_validator 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +3 -0
- data/LICENSE.rdoc +28 -0
- data/README.rdoc +134 -0
- data/lib/dir_validator/catalog.rb +115 -0
- data/lib/dir_validator/item.rb +119 -0
- data/lib/dir_validator/quantity.rb +59 -0
- data/lib/dir_validator/validator.rb +265 -0
- data/lib/dir_validator/warning.rb +28 -0
- data/lib/dir_validator.rb +17 -0
- data/tutorial/tutorial.rb +88 -0
- metadata +160 -0
data/CHANGELOG.rdoc
ADDED
data/LICENSE.rdoc
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
Copyright (c) 2012 by The Board of Trustees of the Leland Stanford Junior
|
2
|
+
University. All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use of this distribution in source and binary forms, with or
|
5
|
+
without modification, are permitted provided that:
|
6
|
+
|
7
|
+
* The above copyright notice and this permission notice appear in all copies and
|
8
|
+
supporting documentation.
|
9
|
+
|
10
|
+
* The name, identifiers, and trademarks of The Board of Trustees of the Leland
|
11
|
+
Stanford Junior University are not used in advertising or publicity without the
|
12
|
+
express prior written permission of The Board of Trustees of the Leland
|
13
|
+
Stanford Junior University.
|
14
|
+
|
15
|
+
* Recipients acknowledge that this distribution is made available as a research
|
16
|
+
courtesy, "as is", potentially with defects, without any obligation on the part
|
17
|
+
of The Board of Trustees of the Leland Stanford Junior University to provide
|
18
|
+
support, services, or repair.
|
19
|
+
|
20
|
+
THE BOARD OF TRUSTEES OF THE LELAND STANFORD JUNIOR UNIVERSITY DISCLAIMS ALL
|
21
|
+
WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD TO THIS SOFTWARE, INCLUDING WITHOUT
|
22
|
+
LIMITATION ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
23
|
+
PARTICULAR PURPOSE, AND IN NO EVENT SHALL THE BOARD OF TRUSTEES OF THE LELAND
|
24
|
+
STANFORD JUNIOR UNIVERSITY BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
|
25
|
+
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
26
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT (INCLUDING NEGLIGENCE) OR STRICT
|
27
|
+
LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
28
|
+
SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
= dir_validate
|
2
|
+
|
3
|
+
== Synopsis
|
4
|
+
|
5
|
+
require 'dir_validator'
|
6
|
+
dv = DirValidator.new('some/path')
|
7
|
+
dv.dirs('blah', :pattern => '*').each do |subdir|
|
8
|
+
# More validations as needed.
|
9
|
+
end
|
10
|
+
dv.report()
|
11
|
+
|
12
|
+
|
13
|
+
== Overview
|
14
|
+
|
15
|
+
This gem provides a convenient syntax for checking whether the contents of a
|
16
|
+
directory structure match your expectations.
|
17
|
+
|
18
|
+
The best place to start is by reading the {file:tutorial/tutorial.rb tutorial script}.
|
19
|
+
|
20
|
+
The public API for the gem is fairly simple. First you set up the validator by
|
21
|
+
passing in the path to the directory structure that you want to check.
|
22
|
+
|
23
|
+
require 'dir_validator'
|
24
|
+
dv = DirValidator.new('some/path')
|
25
|
+
|
26
|
+
Then you define your expectations regarding the content of the directory
|
27
|
+
structure. This is done with the four methods used to declare particular
|
28
|
+
validations:
|
29
|
+
|
30
|
+
dirs()
|
31
|
+
files()
|
32
|
+
dir()
|
33
|
+
file()
|
34
|
+
|
35
|
+
The validation methods use this syntax:
|
36
|
+
|
37
|
+
dirs(VID, OPTS)
|
38
|
+
|
39
|
+
# where VID = The validation identifier.
|
40
|
+
# Can be any string meaningful to the user.
|
41
|
+
# Used when reporting warnings.
|
42
|
+
#
|
43
|
+
# OPTS = A hash of options to set up expectations regarding
|
44
|
+
# the names of files or directory as well as their
|
45
|
+
# quantity.
|
46
|
+
|
47
|
+
The plural validation methods return an array of {DirValidator::Item} objects.
|
48
|
+
The singular variants return one such object (or nil). The returned objects are
|
49
|
+
those that (a) meet the criteria specified in the OPTS hash, and (b) have not
|
50
|
+
been matched already by prior validations.
|
51
|
+
|
52
|
+
The validation criteria defined in the OPTS hash come in three general types:
|
53
|
+
|
54
|
+
* Name-related criteria affect whether a particular {DirValidator::Item} will be
|
55
|
+
returned (only if its file or directory name matches the criteria). You can
|
56
|
+
supply one of the following:
|
57
|
+
|
58
|
+
:name # A literal string.
|
59
|
+
:re # A regular expression, supplied as either a Regexp or String.
|
60
|
+
:pattern # A glob-like pattern. Supports only the * and ? wildcards.
|
61
|
+
|
62
|
+
# Also see the :recurse option below.
|
63
|
+
|
64
|
+
* Quantity assertions. These control the maximum number of {DirValidator::Item}
|
65
|
+
objects that will be returned. They also generate a warning if too few are
|
66
|
+
found.
|
67
|
+
|
68
|
+
:n # A string in one of the following forms:
|
69
|
+
# '*' Zero or more.
|
70
|
+
# '+' One or more.
|
71
|
+
# '?' Zero or one.
|
72
|
+
# 'n+' n or more
|
73
|
+
# 'n' Exactly n.
|
74
|
+
# 'm-n' m through n, inclusive.
|
75
|
+
|
76
|
+
* Other attributes:
|
77
|
+
|
78
|
+
:recurse # Boolean (default = false).
|
79
|
+
|
80
|
+
# Normally, validation methods find only the immediate
|
81
|
+
# children of the object upon which the method is called. If
|
82
|
+
# :recurse is true, items deeper in the hierarchy can be
|
83
|
+
# discovered.
|
84
|
+
|
85
|
+
# For example, given this content:
|
86
|
+
# some/path/
|
87
|
+
# foo/
|
88
|
+
# bar/
|
89
|
+
# foo/
|
90
|
+
|
91
|
+
# And this validator.
|
92
|
+
dv = DirValidator.new('some/path')
|
93
|
+
|
94
|
+
# This call would return only the 'foo' directory.
|
95
|
+
dv.dirs('a', :re => /foo$/)
|
96
|
+
|
97
|
+
# Whereas this call would return both 'foo' and 'foo/bar/foo'.
|
98
|
+
dv.dirs('a', :re => /foo$/, :recurse => true)
|
99
|
+
|
100
|
+
After the validations have been defined, you can examine the results programmatically:
|
101
|
+
|
102
|
+
dv.validate()
|
103
|
+
dv.warnings.each do |w|
|
104
|
+
puts w.vid
|
105
|
+
puts w.opts.inspect
|
106
|
+
end
|
107
|
+
|
108
|
+
More commonly, you can print the information contained in the warnings in the form
|
109
|
+
of a basic CSV repot.
|
110
|
+
|
111
|
+
dv.report()
|
112
|
+
|
113
|
+
The warnings and the CSV report contain the following information:
|
114
|
+
|
115
|
+
vid # Validation identifier, as discussed above.
|
116
|
+
got # The number of items found.
|
117
|
+
n # A normalized version of the :n optionn discussed above.
|
118
|
+
base_dir # The parent directory of the item.
|
119
|
+
name # The name-related options discussed above.
|
120
|
+
re # "
|
121
|
+
pattern # "
|
122
|
+
path # The path of the item.
|
123
|
+
|
124
|
+
|
125
|
+
== Known issues
|
126
|
+
|
127
|
+
Currently handles only regular files and directories.
|
128
|
+
|
129
|
+
Not yet tested on Windows.
|
130
|
+
|
131
|
+
|
132
|
+
== Copyright
|
133
|
+
|
134
|
+
Copyright © 2012 Stanford University Library. See LICENSE for details.
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# A Validator has one Catalog, which contains all of the information
|
2
|
+
# about the Item objects discovered in the Validator.root_dir.
|
3
|
+
#
|
4
|
+
# @!visibility private
|
5
|
+
class DirValidator::Catalog
|
6
|
+
|
7
|
+
FS = '\\' + File::SEPARATOR
|
8
|
+
DOTDIR = '\.\.?\z' # A dotdir is . or .. at end-of-string,
|
9
|
+
DOTDIR_RE = / ( \A | #{FS} ) #{DOTDIR} /x # preceded by start-of-string or file-sep.
|
10
|
+
|
11
|
+
# Takes a validator, because we need to pass the validator down
|
12
|
+
# to the Item objects that are created.
|
13
|
+
def initialize(validator)
|
14
|
+
# The validator and an array of all Items in the Catalog.
|
15
|
+
@validator = validator
|
16
|
+
@items = nil
|
17
|
+
|
18
|
+
# Two indexes of to boost the performance of the unmatched_items method.
|
19
|
+
# Both indexes contain the catalog_id values of an remaining unmatched
|
20
|
+
# Items. The catalog_id values also serve as indexes into the @items
|
21
|
+
# array. The indexes have the following structure:
|
22
|
+
#
|
23
|
+
# @unmatched[Item.catalog_id] = true
|
24
|
+
# @bdi[Item.base_dir][Item.catalog_id] = true
|
25
|
+
#
|
26
|
+
# The @bdi index is more fined-grained and thus gives the largest
|
27
|
+
# performance boost. For typical uses, calls to unmatched_items()
|
28
|
+
# will have a base_dir, so we can use @bdi.
|
29
|
+
@unmatched = {}
|
30
|
+
@bdi = {}
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns the discovered Items, loading them if needed.
|
34
|
+
def items
|
35
|
+
return @items ||= load_items()
|
36
|
+
end
|
37
|
+
|
38
|
+
# Scans the Validator.root_dir, creating a new Item for each file/dir
|
39
|
+
# found, and adding the Item to the indexes used by the Catalog.
|
40
|
+
def load_items
|
41
|
+
catalog_id = -1
|
42
|
+
@items = []
|
43
|
+
Dir.chdir(@validator.root_path) do
|
44
|
+
Dir.glob('**/*', File::FNM_DOTMATCH).each do |path|
|
45
|
+
# We want dotfiles, but not the . and .. dirs.
|
46
|
+
next if path_is_dot_dir(path)
|
47
|
+
# Create the new Item, and give it a unique ID, which is
|
48
|
+
# also an index into the @items array.
|
49
|
+
catalog_id += 1
|
50
|
+
item = DirValidator::Item.new(@validator, path, catalog_id)
|
51
|
+
@items << item
|
52
|
+
# Add Item to the indexes.
|
53
|
+
add_to_index(item)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
return @items
|
57
|
+
end
|
58
|
+
|
59
|
+
# Takes a path as a string. Returns true if it's . or .. directory.
|
60
|
+
def path_is_dot_dir(path)
|
61
|
+
return path =~ DOTDIR_RE ? true : false
|
62
|
+
end
|
63
|
+
|
64
|
+
# Takes an Item object and adds it to the Catalog indexes.
|
65
|
+
def add_to_index(item)
|
66
|
+
cid = item.catalog_id
|
67
|
+
dn = item.dirname2
|
68
|
+
@bdi[dn] ||= {}
|
69
|
+
@bdi[dn][cid] = true
|
70
|
+
@unmatched[cid] = true
|
71
|
+
end
|
72
|
+
|
73
|
+
# Takes and Item and removes it from the Catalog indexes.
|
74
|
+
def delete_from_index(item)
|
75
|
+
cid = item.catalog_id
|
76
|
+
dn = item.dirname2
|
77
|
+
@bdi[dn].delete(cid)
|
78
|
+
@unmatched.delete(cid)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns unmatched directories from the Catalog.
|
82
|
+
def unmatched_dirs(base_dir = nil)
|
83
|
+
return unmatched_items(base_dir).select { |i| i.is_dir }
|
84
|
+
end
|
85
|
+
|
86
|
+
# Returns unmatched files from the Catalog.
|
87
|
+
def unmatched_files(base_dir = nil)
|
88
|
+
return unmatched_items(base_dir).select { |i| i.is_file }
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns unmatches files and directories from the Catalog.
|
92
|
+
def unmatched_items(base_dir = nil)
|
93
|
+
# If a base_dir is given, we'll use the @bdi index. Otherwise,
|
94
|
+
# we'll use the @unmatched index. When using @bdi, we also
|
95
|
+
# must return [] if @bdi doesn't contain base_dir as a key.
|
96
|
+
# We sort the catalog_id values to obtain a deterministic ordering.
|
97
|
+
itms = items()
|
98
|
+
h = base_dir ? @bdi[base_dir] : @unmatched
|
99
|
+
return [] unless h
|
100
|
+
return h.keys.sort.map { |cid| itms[cid] }
|
101
|
+
end
|
102
|
+
|
103
|
+
# Takes an array of Items. Marks them as matched and removes them
|
104
|
+
# from the Catalog indexes. We can't do this within unmatched_items()
|
105
|
+
# because we don't know that time which of the returned Items will
|
106
|
+
# survive the validation-methods name-filtering and quantity-filtering
|
107
|
+
# criteria.
|
108
|
+
def mark_as_matched(matched_items)
|
109
|
+
matched_items.each do |i|
|
110
|
+
i.mark_as_matched
|
111
|
+
delete_from_index(i)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
# @!attribute [r] path
|
4
|
+
# @return [String]
|
5
|
+
# The path to the Item, omitting the root_dir of the {DirValidator::Validator}.
|
6
|
+
#
|
7
|
+
# @!attribute [r] dirname
|
8
|
+
# @return [String]
|
9
|
+
# The path to the parent directory of the Item, or '.' if the parent
|
10
|
+
# directory is the root_dir of the {DirValidator::Validator}.
|
11
|
+
#
|
12
|
+
# @!attribute [r] match_data
|
13
|
+
# @return [MatchData]
|
14
|
+
# The MatchData from the most recent regular expression test against the Item.
|
15
|
+
class DirValidator::Item
|
16
|
+
|
17
|
+
attr_reader(:path, :dirname, :match_data)
|
18
|
+
attr_reader(:pathname, :dirname2, :catalog_id, :matched, :target, :filetype) # @!visibility private
|
19
|
+
|
20
|
+
# Returns a new Item, based on these arguments:
|
21
|
+
# - Validator.
|
22
|
+
# - String: path to a file or directory (omitting Validator.root_dir).
|
23
|
+
# - Integer: the Catalog ID assigned by the Catalog class.
|
24
|
+
#
|
25
|
+
# @!visibility private
|
26
|
+
def initialize(validator, path, catalog_id)
|
27
|
+
@validator = validator
|
28
|
+
@pathname = Pathname.new(path).cleanpath
|
29
|
+
@path = @pathname.to_s
|
30
|
+
@dirname = @pathname.dirname.to_s
|
31
|
+
@dirname2 = @dirname == '.' ? '' : @dirname
|
32
|
+
@catalog_id = catalog_id
|
33
|
+
@matched = false
|
34
|
+
@target = nil
|
35
|
+
@match_data = nil
|
36
|
+
@filetype = @pathname.file? ? :file :
|
37
|
+
@pathname.directory? ? :dir : nil
|
38
|
+
end
|
39
|
+
|
40
|
+
# Just a front-end for Ruby's File#basename.
|
41
|
+
#
|
42
|
+
# @param args See File#basename.
|
43
|
+
# @return [String] The basename of the Item.
|
44
|
+
def basename(*args)
|
45
|
+
return @pathname.basename(*args).to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
# @!visibility private
|
49
|
+
def is_dir
|
50
|
+
return @filetype == :dir
|
51
|
+
end
|
52
|
+
|
53
|
+
# @!visibility private
|
54
|
+
def is_file
|
55
|
+
return @filetype == :file
|
56
|
+
end
|
57
|
+
|
58
|
+
# @!visibility private
|
59
|
+
def mark_as_matched
|
60
|
+
@matched = true
|
61
|
+
end
|
62
|
+
|
63
|
+
# @!visibility private
|
64
|
+
def set_target(t)
|
65
|
+
@target = t
|
66
|
+
end
|
67
|
+
|
68
|
+
# Takes a Regexp. Trys to match Item.target against the Regexp.
|
69
|
+
# Stores the resulting MatchData.
|
70
|
+
#
|
71
|
+
# @!visibility private
|
72
|
+
def target_match(regex)
|
73
|
+
@match_data = regex.match(@target)
|
74
|
+
return @match_data
|
75
|
+
end
|
76
|
+
|
77
|
+
# Validation method, using a {DirValidator::Item} as the receiver.
|
78
|
+
#
|
79
|
+
# @see DirValidator::Validator#dir
|
80
|
+
# @return (see DirValidator::Validator#dir)
|
81
|
+
def dir(vid, opts = {})
|
82
|
+
return @validator.dir(vid, item_opts(opts))
|
83
|
+
end
|
84
|
+
|
85
|
+
# @see #dir
|
86
|
+
# @return (see DirValidator::Validator#file)
|
87
|
+
def file(vid, opts = {})
|
88
|
+
return @validator.file(vid, item_opts(opts))
|
89
|
+
end
|
90
|
+
|
91
|
+
# @see #dir
|
92
|
+
# @return (see DirValidator::Validator#dirs)
|
93
|
+
def dirs(vid, opts = {})
|
94
|
+
return @validator.dirs(vid, item_opts(opts))
|
95
|
+
end
|
96
|
+
|
97
|
+
# @see #dir
|
98
|
+
# @return (see DirValidator::Validator#files)
|
99
|
+
def files(vid, opts = {})
|
100
|
+
return @validator.files(vid, item_opts(opts))
|
101
|
+
end
|
102
|
+
|
103
|
+
# Takes a validation-method opts hash.
|
104
|
+
# Returns a new hash of opts with the appropriate value
|
105
|
+
# for :base_dir. That value depends on whether the current Item
|
106
|
+
# is a dir or file, and whether the file has a parent dir.
|
107
|
+
#
|
108
|
+
# @!visibility private
|
109
|
+
def item_opts(opts)
|
110
|
+
if is_dir
|
111
|
+
return opts.merge(:base_dir => @path)
|
112
|
+
elsif @dirname == '.'
|
113
|
+
return opts
|
114
|
+
else
|
115
|
+
return opts.merge(:base_dir => @dirname)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# @!visibility private
|
2
|
+
class DirValidator::Quantity
|
3
|
+
|
4
|
+
attr_reader(:spec, :min_n, :max_n, :max_index)
|
5
|
+
|
6
|
+
# Takes a string and returns a new DirValidator::Quantity object.
|
7
|
+
# min_n Minimum expected N of items.
|
8
|
+
# max_n Maximum N of items to be matched.
|
9
|
+
# max_index Array index corresponding to max_n
|
10
|
+
def initialize(spec)
|
11
|
+
@spec = spec
|
12
|
+
parse_spec
|
13
|
+
end
|
14
|
+
|
15
|
+
def parse_spec
|
16
|
+
case @spec
|
17
|
+
when '*'
|
18
|
+
# 0+.
|
19
|
+
@min_n = 0
|
20
|
+
@max_n = 1.0 / 0
|
21
|
+
@max_index = -1
|
22
|
+
when '+'
|
23
|
+
# 1+.
|
24
|
+
@min_n = 1
|
25
|
+
@max_n = 1.0 / 0
|
26
|
+
@max_index = -1
|
27
|
+
when '?'
|
28
|
+
# Zero or one.
|
29
|
+
@min_n = 0
|
30
|
+
@max_n = 1
|
31
|
+
@max_index = @max_n - 1
|
32
|
+
when /\A (\d+)\+ \z/x
|
33
|
+
# n+
|
34
|
+
@min_n = $1.to_i
|
35
|
+
@max_n = 1.0 / 0
|
36
|
+
@max_index = -1
|
37
|
+
when /\A (\d+) \z/x
|
38
|
+
# n
|
39
|
+
@min_n = $1.to_i
|
40
|
+
@max_n = @min_n
|
41
|
+
@max_index = @max_n - 1
|
42
|
+
when /\A (\d+) - (\d+) \z/x
|
43
|
+
# m-n
|
44
|
+
@min_n = $1.to_i
|
45
|
+
@max_n = $2.to_i
|
46
|
+
@max_index = @max_n - 1
|
47
|
+
else
|
48
|
+
invalid_spec()
|
49
|
+
end
|
50
|
+
|
51
|
+
# Sanity check min and max.
|
52
|
+
invalid_spec() if (@min_n > @max_n or @min_n < 0)
|
53
|
+
end
|
54
|
+
|
55
|
+
def invalid_spec
|
56
|
+
raise ArgumentError, "Invalid quantitifer: #{@spec.inspect}."
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,265 @@
|
|
1
|
+
# @!attribute [r] root_path
|
2
|
+
# @return [String]
|
3
|
+
# The root path of the validator.
|
4
|
+
#
|
5
|
+
# @!attribute [r] warnings
|
6
|
+
# @return [Array]
|
7
|
+
# The validator's {DirValidator::Warning} objects.
|
8
|
+
class DirValidator::Validator
|
9
|
+
|
10
|
+
attr_reader(:root_path, :warnings)
|
11
|
+
attr_reader(:catalog, :validated) # @!visibility private
|
12
|
+
|
13
|
+
FILE_SEP = File::SEPARATOR # @!visibility private
|
14
|
+
EXTRA_VID = '_EXTRA_' # @!visibility private
|
15
|
+
|
16
|
+
# @param root_path [String] Path to the directory structure to be validated.
|
17
|
+
def initialize(root_path)
|
18
|
+
@root_path = root_path
|
19
|
+
@catalog = DirValidator::Catalog.new(self)
|
20
|
+
@warnings = []
|
21
|
+
@validated = false
|
22
|
+
end
|
23
|
+
|
24
|
+
# Validation method. See {file:README.rdoc} for details.
|
25
|
+
#
|
26
|
+
# Plural validation methods ({#dirs} and {#files}) return an array of
|
27
|
+
# {DirValidator::Item} objects. Singular variants ({#dirs} and {#files})
|
28
|
+
# return one such object, or nil.
|
29
|
+
#
|
30
|
+
# @param vid [String] Validation identifier meaningful to the user.
|
31
|
+
# @param opts [Hash] Validation options.
|
32
|
+
#
|
33
|
+
# @option opts :name [String] Item name must match a literal string.
|
34
|
+
# @option opts :re [String|Regexp] Item name must match regular expression.
|
35
|
+
# @option opts :pattern [String] Item name must match a glob-like pattern.
|
36
|
+
# @option opts :n [String] Expected number of items. Plural validation
|
37
|
+
# methods default to '1+'. Singular variants
|
38
|
+
# force the option to be '1'.
|
39
|
+
# @option opts :recurse [false|true] (false) Whether to return items other than immediate
|
40
|
+
# children.
|
41
|
+
#
|
42
|
+
# @return [DirValidator::Item|nil]
|
43
|
+
def dir(vid, opts = {})
|
44
|
+
opts = opts.merge({:n => '1'})
|
45
|
+
return dirs(vid, opts).first
|
46
|
+
end
|
47
|
+
|
48
|
+
# @see #dir
|
49
|
+
# @return (see #dir)
|
50
|
+
def file(vid, opts = {})
|
51
|
+
opts = opts.merge({:n => '1'})
|
52
|
+
return files(vid, opts).first
|
53
|
+
end
|
54
|
+
|
55
|
+
# @see #dir
|
56
|
+
# @return [Array]
|
57
|
+
def dirs(vid, opts = {})
|
58
|
+
bd = normalized_base_dir(opts, :handle_recurse => true)
|
59
|
+
return process_items(@catalog.unmatched_dirs(bd), vid, opts)
|
60
|
+
end
|
61
|
+
|
62
|
+
# @see #dir
|
63
|
+
# @return [Array]
|
64
|
+
def files(vid, opts = {})
|
65
|
+
bd = normalized_base_dir(opts, :handle_recurse => true)
|
66
|
+
return process_items(@catalog.unmatched_files(bd), vid, opts)
|
67
|
+
end
|
68
|
+
|
69
|
+
# The workhorse for the the validation methods.
|
70
|
+
#
|
71
|
+
# @!visibility private
|
72
|
+
def process_items(items, vid, opts = {})
|
73
|
+
# Make sure the user did not forget to pass the validation identifier.
|
74
|
+
if vid.class == Hash
|
75
|
+
msg = "Validation identifier should not be a hash: #{vid.inspect}"
|
76
|
+
raise ArgumentError, msg
|
77
|
+
end
|
78
|
+
|
79
|
+
# Get a Quantifier object.
|
80
|
+
quant = DirValidator::Quantity.new(opts[:n] || '1+')
|
81
|
+
|
82
|
+
# We are given a list of unmatched Items (either files or dirs).
|
83
|
+
# Here we filter the list to those matching the name-related criteria.
|
84
|
+
items = name_filtered(items, opts)
|
85
|
+
|
86
|
+
# And here we cap the N of items to be returned. For example, if the
|
87
|
+
# user asked for 1-3 Items and we found 5, we will return only 3.
|
88
|
+
items = quantity_limited(items, quant)
|
89
|
+
|
90
|
+
# Add a warning if the N of Items is less than the user's expectation.
|
91
|
+
sz = items.size
|
92
|
+
unless sz >= quant.min_n
|
93
|
+
add_warning(vid, opts.merge(:got => sz))
|
94
|
+
end
|
95
|
+
|
96
|
+
# Mark the Items as matched so subsequent validations won't return same Items.
|
97
|
+
@catalog.mark_as_matched(items)
|
98
|
+
|
99
|
+
return items
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
####
|
104
|
+
# Name-related filtering.
|
105
|
+
####
|
106
|
+
|
107
|
+
# Takes an array of items and a validation-method opts hash.
|
108
|
+
# Returns the subset of those items matching the name-related
|
109
|
+
# criteria in the opts hash.
|
110
|
+
#
|
111
|
+
# @!visibility private
|
112
|
+
def name_filtered(items, opts)
|
113
|
+
# Filter the items to those in the base_dir.
|
114
|
+
# If there is no base_dir, no filtering occurs.
|
115
|
+
base_dir = normalized_base_dir(opts, :add_file_sep => true)
|
116
|
+
sz = base_dir.size
|
117
|
+
items = items.select { |i| i.path.start_with?(base_dir) } if sz > 0
|
118
|
+
|
119
|
+
# Set the item.target values, which are the values that will
|
120
|
+
# be subjected to the name-related test. If there is no base_dir,
|
121
|
+
# the target is the same as item.path.
|
122
|
+
items.each { |i| i.set_target(i.path[sz .. -1]) }
|
123
|
+
|
124
|
+
# Filter items to immediate children, unless user wants to recurse.
|
125
|
+
items = items.reject { |i| i.target.include?(FILE_SEP) } unless opts[:recurse]
|
126
|
+
|
127
|
+
# Return the items having targets matching the name regex.
|
128
|
+
rgx = name_regex(opts)
|
129
|
+
return items.select { |i| i.target_match(rgx) }
|
130
|
+
end
|
131
|
+
|
132
|
+
# Takes a validation-method opts hash.
|
133
|
+
# Returns opts[:base_dir] in a normalized form (with trailing separator).
|
134
|
+
# Returns nil or '' under certain conditions.
|
135
|
+
#
|
136
|
+
# @!visibility private
|
137
|
+
def normalized_base_dir(opts, nbd_opts = {})
|
138
|
+
return nil if opts[:recurse] and nbd_opts[:handle_recurse]
|
139
|
+
bd = opts[:base_dir]
|
140
|
+
return '' unless bd
|
141
|
+
bd.chop! while bd.end_with?(FILE_SEP)
|
142
|
+
bd += FILE_SEP if nbd_opts[:add_file_sep]
|
143
|
+
return bd
|
144
|
+
end
|
145
|
+
|
146
|
+
# Takes a validation-method opts hash.
|
147
|
+
# Returns the appropriate Regexp based on the name-related criteria.
|
148
|
+
#
|
149
|
+
# @!visibility private
|
150
|
+
def name_regex(opts)
|
151
|
+
name = opts[:name]
|
152
|
+
re = opts[:re]
|
153
|
+
pattern = opts[:pattern]
|
154
|
+
return Regexp.new(
|
155
|
+
name ? name_to_re(name) :
|
156
|
+
pattern ? pattern_to_re(pattern) :
|
157
|
+
re ? re : ''
|
158
|
+
)
|
159
|
+
end
|
160
|
+
|
161
|
+
# Takes a validation-method opts[:name] value.
|
162
|
+
# Returns the corresponding regex-ready string:
|
163
|
+
# - wrapped in start- and end- anchors
|
164
|
+
# - all special characters quoted
|
165
|
+
#
|
166
|
+
# @!visibility private
|
167
|
+
def name_to_re(name)
|
168
|
+
return az_wrap(Regexp.quote(name))
|
169
|
+
end
|
170
|
+
|
171
|
+
# Takes a validation-method opts[:pattern] value.
|
172
|
+
# Returns the corresponding regex-ready string
|
173
|
+
# - wrapped in start- and end- anchors
|
174
|
+
# - all special regex chacters quoted except for:
|
175
|
+
# * becomes .*
|
176
|
+
# ? becomes .
|
177
|
+
#
|
178
|
+
# @!visibility private
|
179
|
+
def pattern_to_re(pattern)
|
180
|
+
return az_wrap(Regexp.quote(pattern).gsub(/\\\*/, '.*').gsub(/\\\?/, '.'))
|
181
|
+
end
|
182
|
+
|
183
|
+
# Takes a string.
|
184
|
+
# Returns a new string wrapped in Regexp start-of-string and end-of-string anchors.
|
185
|
+
#
|
186
|
+
# @!visibility private
|
187
|
+
def az_wrap(s)
|
188
|
+
return "\\A#{s}\\z"
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
####
|
193
|
+
# Quantity filtering.
|
194
|
+
####
|
195
|
+
|
196
|
+
# Takes an array of items and a DirValidator::Quantity object.
|
197
|
+
# Returns the subset of those items falling within the max allowed
|
198
|
+
# by the Quantity.
|
199
|
+
#
|
200
|
+
# @!visibility private
|
201
|
+
def quantity_limited(items, quant)
|
202
|
+
return items[0 .. quant.max_index]
|
203
|
+
end
|
204
|
+
|
205
|
+
|
206
|
+
####
|
207
|
+
# Methods related validation warnings and reporting.
|
208
|
+
####
|
209
|
+
|
210
|
+
# Takes a validation identifier and a validation-method opts hash.
|
211
|
+
# Creates and new DirValidator::Warning and adds it to the validator's
|
212
|
+
# array of warnings.
|
213
|
+
#
|
214
|
+
# @!visibility private
|
215
|
+
def add_warning(vid, opts)
|
216
|
+
@warnings << DirValidator::Warning.new(vid, opts)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Adds a warning to the validator for all unmatched items in the catalog.
|
220
|
+
# Should be run after all validation-methods have been called, typically
|
221
|
+
# before producing a report.
|
222
|
+
#
|
223
|
+
# @!visibility private
|
224
|
+
def validate
|
225
|
+
return if @validated # Run only once.
|
226
|
+
@catalog.unmatched_items.each do |item|
|
227
|
+
add_warning(EXTRA_VID, :path => item.path)
|
228
|
+
end
|
229
|
+
@validated = true
|
230
|
+
end
|
231
|
+
|
232
|
+
# Write a CSV report of the information contained in the validator's warnings.
|
233
|
+
#
|
234
|
+
# @param io [IO] IO object to which the report should be written.
|
235
|
+
def report(io = STDOUT)
|
236
|
+
require 'csv'
|
237
|
+
validate()
|
238
|
+
report_data.each do |row|
|
239
|
+
io.puts CSV.generate_line(row)
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
# Returns a matrix of warning information.
|
244
|
+
# Used when producing the CSV report.
|
245
|
+
#
|
246
|
+
# @!visibility private
|
247
|
+
def report_data
|
248
|
+
rc = DirValidator::Validator.report_columns
|
249
|
+
data = [rc]
|
250
|
+
@warnings.each do |w|
|
251
|
+
cells = rc.map { |c| v = w.opts[c]; v.nil? ? '' : v }
|
252
|
+
cells[0] = w.vid
|
253
|
+
data.push(cells)
|
254
|
+
end
|
255
|
+
return data
|
256
|
+
end
|
257
|
+
|
258
|
+
# Column headings for the CSV report.
|
259
|
+
#
|
260
|
+
# @!visibility private
|
261
|
+
def self.report_columns
|
262
|
+
return [:vid, :got, :n, :base_dir, :name, :re, :pattern, :path]
|
263
|
+
end
|
264
|
+
|
265
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# @!attribute [r] vid
|
2
|
+
# @see #initialize
|
3
|
+
# @return [String]
|
4
|
+
# @!attribute [r] opts
|
5
|
+
# @see #initialize
|
6
|
+
# @return [Hash]
|
7
|
+
class DirValidator::Warning
|
8
|
+
|
9
|
+
attr_reader(:vid, :opts)
|
10
|
+
|
11
|
+
# @param vid [String] Validation identifier of the validation-method call
|
12
|
+
# that led to the warning.
|
13
|
+
# @param opts [Hash] Validation-method options hash, along with some additional
|
14
|
+
# information about the context in which the warning was
|
15
|
+
# generated.
|
16
|
+
def initialize(vid, opts)
|
17
|
+
@vid = vid
|
18
|
+
@opts = opts
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns a basic representation of the information contained in the warning.
|
22
|
+
#
|
23
|
+
# @return [String]
|
24
|
+
def to_s
|
25
|
+
return "#{@vid}: #{@opts.inspect}"
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module DirValidator
|
2
|
+
|
3
|
+
# Syntactic sugar: a module method for creating a new validator.
|
4
|
+
#
|
5
|
+
# @param (see DirValidator::Validator#new)
|
6
|
+
# @return [DirValidator::Validator]
|
7
|
+
def self.new(*args, &block)
|
8
|
+
return DirValidator::Validator.new(*args, &block)
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'dir_validator/catalog'
|
14
|
+
require 'dir_validator/item'
|
15
|
+
require 'dir_validator/quantity'
|
16
|
+
require 'dir_validator/validator'
|
17
|
+
require 'dir_validator/warning'
|
@@ -0,0 +1,88 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'dir_validator'
|
5
|
+
|
6
|
+
# Suppose that we want to check the following directory structure.
|
7
|
+
#
|
8
|
+
# spec/fixtures/tutorial/
|
9
|
+
# aaa/ # Top-level sub-directories should be 3 lower-case leters.
|
10
|
+
# 00/ # Should be three second-level directories: 00, 01, and 02.
|
11
|
+
# a.tif # One or more .tif files in the 00 directory.
|
12
|
+
# b.tif
|
13
|
+
# 01/
|
14
|
+
# a.jpg # A parallel set of .jpg files.
|
15
|
+
# b.jpg
|
16
|
+
# 02/
|
17
|
+
# a.jp2 # A parallel set of .jp2 files.
|
18
|
+
# b.jp2
|
19
|
+
# blort.txt # What?!?
|
20
|
+
# aab/
|
21
|
+
# 00/
|
22
|
+
# a.tif
|
23
|
+
# b.tif
|
24
|
+
# 01/
|
25
|
+
# a.jpg
|
26
|
+
# b.jpg
|
27
|
+
# 02/
|
28
|
+
# a.jp2 # Missing file.
|
29
|
+
# baa/
|
30
|
+
# 00/
|
31
|
+
# a.tif
|
32
|
+
# b.tif
|
33
|
+
# 01/
|
34
|
+
# a.jpg
|
35
|
+
# b.jpg
|
36
|
+
# 02/
|
37
|
+
# a.jp2 # Missing file.
|
38
|
+
|
39
|
+
# Set up the validator, passing in the starting path.
|
40
|
+
dv = DirValidator.new('tutorial/files')
|
41
|
+
|
42
|
+
# Here we set up our expectation for the top-level sub-directories. In this
|
43
|
+
# case, we are looking for 1 or more sub-directories named with exactly 3
|
44
|
+
# lower-case letters, as defined in the regular expression.
|
45
|
+
dv.dirs('top-level subdir', :re => /^[a-z]{3}$/).each do |subdir|
|
46
|
+
|
47
|
+
# Within each of those top-level directories, we expect to find three
|
48
|
+
# numbered directories. In this case, we pass in literal names to
|
49
|
+
# the validation methods rather than a regular expression.
|
50
|
+
# Notice also that the validation method is being called on the
|
51
|
+
# subdirectory (subdir), not the overall dir-validator (dv).
|
52
|
+
d0 = subdir.dir('00', :name => '00')
|
53
|
+
d1 = subdir.dir('01', :name => '01')
|
54
|
+
d2 = subdir.dir('02', :name => '02')
|
55
|
+
|
56
|
+
# In the 00 subdirectory, we expect to see a bunch of .tif files.
|
57
|
+
# We could have used a regular expression, but in this case we'll
|
58
|
+
# resort to a simple glob-like pattern.
|
59
|
+
d0.files('tifs', :pattern => '*.tif').each do |tif|
|
60
|
+
|
61
|
+
# And finally, we set up the validations for the parallel .jpg
|
62
|
+
# and .jp2 files. Those files should reside in the 01 and 02
|
63
|
+
# subdirectories (which we stored in d1 and d2 above). Their
|
64
|
+
# file names should mirror that of the current .tif file.
|
65
|
+
tif_base = tif.basename('.tif')
|
66
|
+
d1.file('jpgs', :name => tif_base + '.jpg')
|
67
|
+
d2.file('jp2s', :name => tif_base + '.jp2')
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
# We can generate a basic CSV report that will list:
|
73
|
+
# - items that we not found
|
74
|
+
# - extra items
|
75
|
+
#
|
76
|
+
# The CSV output (with extra spacing here for readability):
|
77
|
+
# vid, got, n, base_dir, name, re, pattern, path
|
78
|
+
# jp2s, 0, 1, aab/02, b.jp2, "", "", ""
|
79
|
+
# jp2s, 0, 1, baa/02, b.jp2, "", "", ""
|
80
|
+
# _EXTRA_, "", "", "", "", "", "", aaa/02/blort.txt
|
81
|
+
dv.report()
|
82
|
+
|
83
|
+
# Alternatively, we could examine the results programmatically by
|
84
|
+
# iterating over the dir-validator's warnings.
|
85
|
+
dv.validate()
|
86
|
+
dv.warnings.each do |w|
|
87
|
+
# ...
|
88
|
+
end
|
metadata
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dir_validator
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 47
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 12
|
9
|
+
- 0
|
10
|
+
version: 0.12.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Monty Hindman
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2012-07-30 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ~>
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 15
|
29
|
+
segments:
|
30
|
+
- 2
|
31
|
+
- 6
|
32
|
+
version: "2.6"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: lyberteam-devel
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: yard
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
type: :development
|
62
|
+
version_requirements: *id003
|
63
|
+
- !ruby/object:Gem::Dependency
|
64
|
+
name: awesome_print
|
65
|
+
prerelease: false
|
66
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
74
|
+
version: "0"
|
75
|
+
type: :development
|
76
|
+
version_requirements: *id004
|
77
|
+
- !ruby/object:Gem::Dependency
|
78
|
+
name: looksee
|
79
|
+
prerelease: false
|
80
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
hash: 3
|
86
|
+
segments:
|
87
|
+
- 0
|
88
|
+
version: "0"
|
89
|
+
type: :development
|
90
|
+
version_requirements: *id005
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: rcov
|
93
|
+
prerelease: false
|
94
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
hash: 3
|
100
|
+
segments:
|
101
|
+
- 0
|
102
|
+
version: "0"
|
103
|
+
type: :development
|
104
|
+
version_requirements: *id006
|
105
|
+
description: This gem provides a convenient syntax for checking whether the contents of a directory structure match your expectations.
|
106
|
+
email:
|
107
|
+
- hindman@stanford.edu
|
108
|
+
executables: []
|
109
|
+
|
110
|
+
extensions: []
|
111
|
+
|
112
|
+
extra_rdoc_files: []
|
113
|
+
|
114
|
+
files:
|
115
|
+
- lib/dir_validator/catalog.rb
|
116
|
+
- lib/dir_validator/item.rb
|
117
|
+
- lib/dir_validator/quantity.rb
|
118
|
+
- lib/dir_validator/validator.rb
|
119
|
+
- lib/dir_validator/warning.rb
|
120
|
+
- lib/dir_validator.rb
|
121
|
+
- LICENSE.rdoc
|
122
|
+
- README.rdoc
|
123
|
+
- CHANGELOG.rdoc
|
124
|
+
- tutorial/tutorial.rb
|
125
|
+
homepage: https://github.com/sul-dlss
|
126
|
+
licenses: []
|
127
|
+
|
128
|
+
post_install_message:
|
129
|
+
rdoc_options: []
|
130
|
+
|
131
|
+
require_paths:
|
132
|
+
- lib
|
133
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
134
|
+
none: false
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
hash: 3
|
139
|
+
segments:
|
140
|
+
- 0
|
141
|
+
version: "0"
|
142
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
143
|
+
none: false
|
144
|
+
requirements:
|
145
|
+
- - ">="
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
hash: 3
|
148
|
+
segments:
|
149
|
+
- 0
|
150
|
+
version: "0"
|
151
|
+
requirements: []
|
152
|
+
|
153
|
+
rubyforge_project:
|
154
|
+
rubygems_version: 1.8.24
|
155
|
+
signing_key:
|
156
|
+
specification_version: 3
|
157
|
+
summary: Validate content of a directory structure.
|
158
|
+
test_files: []
|
159
|
+
|
160
|
+
has_rdoc:
|