findler 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.travis.yml +3 -0
- data/Gemfile +1 -1
- data/README.md +9 -11
- data/findler.gemspec +10 -10
- data/lib/findler.rb +59 -26
- data/lib/findler/error.rb +3 -1
- data/lib/findler/filters.rb +31 -29
- data/lib/findler/iterator.rb +25 -100
- data/lib/findler/path.rb +27 -0
- data/lib/findler/version.rb +2 -2
- data/test/findler_test.rb +69 -82
- data/test/minitest_helper.rb +34 -17
- metadata +9 -26
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
ZGNkZjllNWFjMzlhZjFlN2RjOGRkNWExMWU0NGU4ZDRiM2VhODhkZQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MTFiM2Y3Y2ZlMjk5MTRjMzBjYTQ0N2FmZTMyNTk3YjQ4MzA3NzhiMg==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NWY4NDA4NzE2YzFkNWU0MWI5NjVkZGRhOTBhOWNiZjg0ZmY1NGJiYWI0MTll
|
10
|
+
YTcxMTg3N2I0ZjE2Y2RmODMxZTgyOTk0ZjUzOGQ0NzE2M2JhYmU3MDY1NjVh
|
11
|
+
NGYwNGZjMWRmNWEwOTAwMzhjODM1NDcxNWNkN2Q0MjI1MjJjZmE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
Njg4MDNmMTU4N2Q2NDc2NTcyZGU0ZWE2YjBhNGI1MmFjOGY4MTljNjQzMzY5
|
14
|
+
YTU5Yjk5NjBlYjc2MjI2Yzc4YmQxYTFiNTUyMDBhMzBmOWMxOGYyZDdjMjcy
|
15
|
+
NGY2NjI5YWQzNTM3MTJiMWJhZTgyMTg2MDBkMDE5NTRlYTNjMmM=
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -26,25 +26,17 @@ The entire state of the iteration for the filesystem is returned, which can then
|
|
26
26
|
be pushed onto any durable storage, like ActiveRecord or Redis, or just a local file:
|
27
27
|
|
28
28
|
```ruby
|
29
|
-
File.open('iterator.state', '
|
29
|
+
File.open('iterator.state', 'wb') { |f| Marshal.dump(iterator, f) }
|
30
30
|
```
|
31
31
|
|
32
32
|
To resume iteration:
|
33
33
|
|
34
34
|
```ruby
|
35
|
-
Marshal.load(
|
36
|
-
|
35
|
+
iterator2 = Marshal.load(File.open('iterator.state', 'rb'))
|
36
|
+
iterator2.next_file
|
37
37
|
# => "/Users/mrm/Photos/img_1001.jpg"
|
38
38
|
```
|
39
39
|
|
40
|
-
To re-check a directory hierarchy for files that you haven't visited yet:
|
41
|
-
|
42
|
-
```ruby
|
43
|
-
iterator.rescan!
|
44
|
-
iterator.next_file
|
45
|
-
# => "/Users/mrm/Photos/img_1002.jpg"
|
46
|
-
```
|
47
|
-
|
48
40
|
External synchronization between the serialized state of the
|
49
41
|
iterator and the other processes will have to be done by you, of course.
|
50
42
|
The ```load```, ```next_file``` , and ```dump``` should be done while holding
|
@@ -123,6 +115,12 @@ Because procs and lambdas aren't ```Marshal```able, and I didn't want to use som
|
|
123
115
|
|
124
116
|
## Changelog
|
125
117
|
|
118
|
+
### 0.0.7
|
119
|
+
* Use a non-inherited set per iterator, rather than a global bloom filter
|
120
|
+
* Removed the ability to "rescan" due to the weight of the bloom filter in marshalling when
|
121
|
+
traversing an enormous tree.
|
122
|
+
* Fixed marshal documentation and tests to support ruby 1.9+
|
123
|
+
|
126
124
|
### 0.0.6
|
127
125
|
* ```add_filters``` takes an array, not a glob
|
128
126
|
* added tests for order_by_mtime filters
|
data/findler.gemspec
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
$:.push File.expand_path(
|
3
|
-
require
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
|
+
require 'findler/version'
|
4
4
|
|
5
5
|
Gem::Specification.new do |gem|
|
6
|
-
gem.name =
|
6
|
+
gem.name = 'findler'
|
7
7
|
gem.version = Findler::VERSION
|
8
|
-
gem.authors = [
|
9
|
-
gem.email =
|
8
|
+
gem.authors = ['Matthew McEachen']
|
9
|
+
gem.email = %w(matthew+github@mceachen.org)
|
10
10
|
gem.homepage = "https://github.com/mceachen/findler/"
|
11
11
|
gem.summary = %q{Findler is a stateful filesystem iterator}
|
12
12
|
gem.description = %q{Findler is designed for very large filesystem hierarchies,
|
@@ -17,9 +17,9 @@ Gem::Specification.new do |gem|
|
|
17
17
|
gem.test_files = `git ls-files -- {test,features}/*`.split("\n")
|
18
18
|
gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
19
19
|
gem.require_paths = ["lib"]
|
20
|
-
gem.add_development_dependency
|
21
|
-
gem.add_development_dependency
|
22
|
-
gem.add_development_dependency
|
23
|
-
gem.add_development_dependency
|
24
|
-
gem.
|
20
|
+
gem.add_development_dependency 'rake'
|
21
|
+
gem.add_development_dependency 'yard'
|
22
|
+
gem.add_development_dependency 'minitest'
|
23
|
+
gem.add_development_dependency 'minitest-great_expectations'
|
24
|
+
gem.add_development_dependency 'minitest-reporters'
|
25
25
|
end
|
data/lib/findler.rb
CHANGED
@@ -1,19 +1,18 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
require "findler/filters"
|
1
|
+
require 'findler/error'
|
2
|
+
require 'findler/filters'
|
3
|
+
require 'findler/iterator'
|
4
|
+
require 'findler/path'
|
6
5
|
|
7
|
-
|
8
|
-
|
6
|
+
class Findler
|
7
|
+
attr_reader :path
|
9
8
|
|
10
9
|
def initialize(path)
|
11
|
-
@path = path
|
10
|
+
@path = Path.clean(path)
|
12
11
|
@flags = 0
|
13
12
|
end
|
14
13
|
|
15
|
-
# These are File.fnmatch patterns.
|
16
|
-
# If any pattern matches, it will be returned by Iterator#
|
14
|
+
# These are File.fnmatch patterns, and are only applied to files, not directories.
|
15
|
+
# If any pattern matches, it will be returned by Iterator#next_file.
|
17
16
|
# (see File.fnmatch?)
|
18
17
|
def patterns
|
19
18
|
@patterns ||= []
|
@@ -38,31 +37,39 @@ class Findler
|
|
38
37
|
# Should patterns be interpreted in a case-sensitive manner? The default is case sensitive,
|
39
38
|
# but if your local filesystem is not case sensitive, this flag is a no-op.
|
40
39
|
def case_sensitive!
|
41
|
-
@flags &= ~
|
40
|
+
@flags &= ~File::FNM_CASEFOLD
|
42
41
|
end
|
43
42
|
|
44
43
|
def case_insensitive!
|
45
|
-
@flags |=
|
44
|
+
@flags |= File::FNM_CASEFOLD
|
45
|
+
end
|
46
|
+
|
47
|
+
def ignore_case?
|
48
|
+
(@flags & File::FNM_CASEFOLD) > 0
|
46
49
|
end
|
47
50
|
|
48
51
|
# Should we traverse hidden directories and files? (default is to skip files that start
|
49
52
|
# with a '.')
|
50
53
|
def include_hidden!
|
51
|
-
@flags |=
|
54
|
+
@flags |= File::FNM_DOTMATCH
|
52
55
|
end
|
53
56
|
|
54
57
|
def exclude_hidden!
|
55
|
-
@flags &= ~
|
58
|
+
@flags &= ~File::FNM_DOTMATCH
|
56
59
|
end
|
57
60
|
|
58
|
-
def
|
59
|
-
(@
|
61
|
+
def include_hidden?
|
62
|
+
(@flags & File::FNM_DOTMATCH) > 0
|
60
63
|
end
|
61
64
|
|
62
|
-
def
|
63
|
-
|
64
|
-
|
65
|
-
|
65
|
+
def filters_class
|
66
|
+
@filters_class ||= Filters
|
67
|
+
end
|
68
|
+
|
69
|
+
def filters_class=(new_filters_class)
|
70
|
+
raise Error unless new_filters_class.is_a? Class
|
71
|
+
filters.each { |ea| new_filters_class.method(ea) } # verify the filters class has those methods defined
|
72
|
+
@filters_class = new_filters_class
|
66
73
|
end
|
67
74
|
|
68
75
|
# Accepts symbols whose names are class methods on Finder::Filters.
|
@@ -78,12 +85,11 @@ class Findler
|
|
78
85
|
# Note that the last filter added will be last to order the children, so it will be the
|
79
86
|
# "primary" sort criterion.
|
80
87
|
def add_filter(filter_symbol)
|
81
|
-
filter_class.method(filter_symbol)
|
82
88
|
filters << filter_symbol
|
83
89
|
end
|
84
90
|
|
85
91
|
def filters
|
86
|
-
|
92
|
+
@filters ||= []
|
87
93
|
end
|
88
94
|
|
89
95
|
def add_filters(filter_symbols)
|
@@ -91,14 +97,41 @@ class Findler
|
|
91
97
|
end
|
92
98
|
|
93
99
|
def iterator
|
94
|
-
Iterator.new(
|
95
|
-
:patterns => @patterns,
|
96
|
-
:flags => @flags,
|
97
|
-
:filters => @filters)
|
100
|
+
Iterator.new(self, path)
|
98
101
|
end
|
99
102
|
|
100
103
|
private
|
101
104
|
|
105
|
+
def filter_paths(pathnames)
|
106
|
+
viable_paths = pathnames.select { |ea| viable_path?(ea) }
|
107
|
+
filters.inject(viable_paths) do |paths, filter_symbol|
|
108
|
+
apply_filter(paths, filter_symbol)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Should the given file or directory be iterated over?
|
113
|
+
def viable_path?(pathname)
|
114
|
+
return false if !pathname.exist?
|
115
|
+
return false if !include_hidden? && Path.hidden?(pathname)
|
116
|
+
if patterns.empty? || pathname.directory?
|
117
|
+
true
|
118
|
+
else
|
119
|
+
patterns.any? { |p| pathname.fnmatch(p, @flags) }
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def apply_filter(pathnames, filter_method_sym)
|
124
|
+
filtered_pathnames = filters_class.send(filter_method_sym, pathnames.dup)
|
125
|
+
unless filtered_pathnames.respond_to? :map
|
126
|
+
raise Error, "#{filters_class}.#{filter_method_sym} did not return an Enumerable"
|
127
|
+
end
|
128
|
+
unexpected_paths = filtered_pathnames - pathnames
|
129
|
+
unless unexpected_paths.empty?
|
130
|
+
raise Error, "#{filters_class}.#{filter_method_sym} returned unexpected paths: #{unexpected_paths.collect { |ea| ea.to_s }.join(",")}"
|
131
|
+
end
|
132
|
+
filtered_pathnames
|
133
|
+
end
|
134
|
+
|
102
135
|
def normalize_extension(extension)
|
103
136
|
if extension.nil? || extension.empty? || extension.start_with?(".")
|
104
137
|
extension
|
data/lib/findler/error.rb
CHANGED
data/lib/findler/filters.rb
CHANGED
@@ -1,38 +1,40 @@
|
|
1
|
-
class Findler
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
class Findler
|
2
|
+
class Filters
|
3
|
+
# files first, then directories
|
4
|
+
def self.files_first(paths)
|
5
|
+
preserve_sort_by(paths) { |ea| ea.file? ? -1 : 1 }
|
6
|
+
end
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
# directories first, then files
|
9
|
+
def self.directories_first(paths)
|
10
|
+
preserve_sort_by(paths) { |ea| ea.directory? ? -1 : 1 }
|
11
|
+
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
# order by the mtime of each file. Oldest files first.
|
14
|
+
def self.order_by_mtime_asc(paths)
|
15
|
+
preserve_sort_by(paths) { |ea| ea.mtime.to_i }
|
16
|
+
end
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
# reverse order by the mtime of each file. Newest files first.
|
19
|
+
def self.order_by_mtime_desc(paths)
|
20
|
+
preserve_sort_by(paths) { |ea| -1 * ea.mtime.to_i }
|
21
|
+
end
|
21
22
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
# order by the name of each file.
|
24
|
+
def self.order_by_name(paths)
|
25
|
+
preserve_sort_by(paths) { |ea| ea.basename.to_s }
|
26
|
+
end
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
# reverse the order of the sort
|
29
|
+
def self.reverse(paths)
|
30
|
+
paths.reverse
|
31
|
+
end
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
33
|
+
def self.preserve_sort_by(array, &block)
|
34
|
+
ea_to_index = Hash[array.zip((0..array.size-1).to_a)]
|
35
|
+
array.sort_by do |ea|
|
36
|
+
[yield(ea), ea_to_index[ea]]
|
37
|
+
end
|
36
38
|
end
|
37
39
|
end
|
38
40
|
end
|
data/lib/findler/iterator.rb
CHANGED
@@ -1,56 +1,17 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'set'
|
2
|
+
require 'forwardable'
|
3
3
|
|
4
4
|
class Findler
|
5
|
-
|
6
5
|
class Iterator
|
6
|
+
extend Forwardable
|
7
|
+
def_delegators :@configuration, :filter_paths
|
8
|
+
attr_reader :path
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
@
|
12
|
-
@
|
13
|
-
@path = @path.expand_path unless @path.absolute?
|
14
|
-
@parent = parent
|
15
|
-
|
16
|
-
set_inheritable_ivar(:visited_dirs, attrs) { self.class.new_presence_collection }
|
17
|
-
set_inheritable_ivar(:visited_files, attrs) { self.class.new_presence_collection }
|
18
|
-
set_inheritable_ivar(:patterns, attrs) { nil }
|
19
|
-
set_inheritable_ivar(:flags, attrs) { 0 }
|
20
|
-
set_inheritable_ivar(:filters, attrs) { [] }
|
21
|
-
set_inheritable_ivar(:filters_class, attrs) { Filters }
|
22
|
-
set_inheritable_ivar(:sort_with, attrs) { nil }
|
23
|
-
|
24
|
-
@sub_iter = self.class.new(attrs[:sub_iter], self) if attrs[:sub_iter]
|
25
|
-
end
|
26
|
-
|
27
|
-
# Visit this directory and all sub directories, and check for unseen files. Only call on the root iterator.
|
28
|
-
def rescan!
|
29
|
-
raise Error, "Only invoke on root" unless @parent.nil?
|
30
|
-
@visited_dirs = self.class.new_presence_collection
|
31
|
-
@children = nil
|
32
|
-
@sub_iter = nil
|
33
|
-
end
|
34
|
-
|
35
|
-
def ignore_case?
|
36
|
-
(Findler::IGNORE_CASE & flags) > 0
|
37
|
-
end
|
38
|
-
|
39
|
-
def include_hidden?
|
40
|
-
(Findler::INCLUDE_HIDDEN & flags) > 0
|
41
|
-
end
|
42
|
-
|
43
|
-
def fnmatch_flags
|
44
|
-
@fnmatch_flags ||= (@parent && @parent.fnmatch_flags) || begin
|
45
|
-
f = 0
|
46
|
-
f |= File::FNM_CASEFOLD if ignore_case?
|
47
|
-
f |= File::FNM_DOTMATCH if include_hidden?
|
48
|
-
f
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
def path
|
53
|
-
@path
|
10
|
+
def initialize(findler, path, parent_iterator = nil)
|
11
|
+
@configuration = findler
|
12
|
+
@path = Path.clean(path).freeze
|
13
|
+
@parent = parent_iterator
|
14
|
+
@visited = []
|
54
15
|
end
|
55
16
|
|
56
17
|
def next_file
|
@@ -59,76 +20,40 @@ class Findler
|
|
59
20
|
if @sub_iter
|
60
21
|
nxt = @sub_iter.next_file
|
61
22
|
return nxt unless nxt.nil?
|
62
|
-
@
|
23
|
+
mark_visited(@sub_iter.path)
|
63
24
|
@sub_iter = nil
|
64
25
|
end
|
65
26
|
|
66
|
-
|
67
|
-
@children = nil if @path.ctime != @ctime || @path.mtime != @mtime
|
68
|
-
|
69
|
-
if @children.nil?
|
70
|
-
@mtime = @path.mtime
|
71
|
-
@ctime = @path.ctime
|
72
|
-
children = @path.children.delete_if { |ea| skip?(ea) }
|
73
|
-
filtered_children = @filters.inject(children){ |c, f| filter(c, f) }
|
74
|
-
@children = filtered_children
|
75
|
-
end
|
76
|
-
|
77
|
-
nxt = @children.shift
|
27
|
+
nxt = next_visitable_child
|
78
28
|
return nil if nxt.nil?
|
79
29
|
|
80
30
|
if nxt.directory?
|
81
|
-
@sub_iter = Iterator.new(
|
31
|
+
@sub_iter = Iterator.new(@configuration, nxt, self)
|
82
32
|
self.next_file
|
83
33
|
else
|
84
|
-
|
34
|
+
mark_visited(nxt)
|
85
35
|
nxt
|
86
36
|
end
|
87
37
|
end
|
88
38
|
|
89
39
|
private
|
90
40
|
|
91
|
-
def
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
unless filtered_children.respond_to? :collect
|
98
|
-
raise Error, "#{path.to_s}: filter_with, must return an Enumerable"
|
99
|
-
end
|
100
|
-
children_as_pathnames = filtered_children.collect { |ea| ea.is_a?(Pathname) ? ea : Pathname.new(ea) }
|
101
|
-
illegal_children = children_as_pathnames - children
|
102
|
-
unless illegal_children.empty?
|
103
|
-
raise Error, "#{path.to_s}: filter_with returned unexpected paths: #{illegal_children.join(",")}"
|
41
|
+
def children
|
42
|
+
# If someone touches the directory while we iterate, redo the @children.
|
43
|
+
if @children.nil? || @mtime != @path.mtime || @ctime != @path.ctime
|
44
|
+
@mtime = @path.mtime
|
45
|
+
@ctime = @path.ctime
|
46
|
+
@children = filter_paths(@path.children)
|
104
47
|
end
|
105
|
-
|
48
|
+
@children
|
106
49
|
end
|
107
50
|
|
108
|
-
|
109
|
-
|
110
|
-
# If the parent doesn't have a value, use the block to generate a default.
|
111
|
-
def set_inheritable_ivar(field, attrs, &block)
|
112
|
-
v = attrs[field]
|
113
|
-
sym = "@#{field}".to_sym
|
114
|
-
v ||= parent.instance_variable_get(sym)
|
115
|
-
v ||= yield
|
116
|
-
instance_variable_set(sym, v)
|
51
|
+
def next_visitable_child
|
52
|
+
children.detect { |ea| !@visited.include?(Path.base(ea)) }
|
117
53
|
end
|
118
54
|
|
119
|
-
def
|
120
|
-
|
121
|
-
end
|
122
|
-
|
123
|
-
def skip? pathname
|
124
|
-
s = pathname.to_s
|
125
|
-
return true if !include_hidden? && hidden?(pathname)
|
126
|
-
return visited_dirs.include?(s) if pathname.directory?
|
127
|
-
return true if visited_files.include?(s)
|
128
|
-
unless patterns.nil?
|
129
|
-
return true if patterns.none? { |p| pathname.fnmatch(p, fnmatch_flags) }
|
130
|
-
end
|
131
|
-
return false
|
55
|
+
def mark_visited(path)
|
56
|
+
@visited << Path.base(path)
|
132
57
|
end
|
133
58
|
end
|
134
59
|
end
|
data/lib/findler/path.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
class Findler
|
4
|
+
class Path
|
5
|
+
def self.clean(path)
|
6
|
+
path = Pathname.new(path) unless path.is_a? Pathname
|
7
|
+
path = path.expand_path unless path.absolute?
|
8
|
+
path
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.base(path)
|
12
|
+
path.basename.to_s
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.clean_array(array)
|
16
|
+
array.map { |ea| clean(ea) }
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.base_array(array)
|
20
|
+
array.map { |ea| base(ea) }
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.hidden?(path)
|
24
|
+
base(path).start_with?('.')
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/findler/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
class Findler
|
2
|
-
VERSION = "0.0.
|
3
|
-
end
|
2
|
+
VERSION = Gem::Version.new("0.0.7")
|
3
|
+
end
|
data/test/findler_test.rb
CHANGED
@@ -9,7 +9,7 @@ class Findler::Filters
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def self.invalid_return(children)
|
12
|
-
|
12
|
+
Pathname.new('/invalid/file')
|
13
13
|
end
|
14
14
|
|
15
15
|
def self.files_to_s(children)
|
@@ -23,107 +23,90 @@ describe Findler do
|
|
23
23
|
`mkdir .hide ; touch .outer-hide dir-0/.hide .hide/normal.txt .hide/.secret`
|
24
24
|
end
|
25
25
|
|
26
|
-
it
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
i = Findler.new("/tmp").iterator
|
36
|
-
i.send(:skip?, Pathname.new("/tmp/not-hidden")).must_equal false
|
37
|
-
i.send(:skip?, Pathname.new("/tmp/.hidden")).must_equal true
|
26
|
+
it 'detects hidden files properly' do
|
27
|
+
%w(/a/b /.a/b).each do |ea|
|
28
|
+
p = Pathname.new(ea)
|
29
|
+
Findler::Path.hidden?(p).must_be_false
|
30
|
+
end
|
31
|
+
%w(/a/.b /a/.b).each do |ea|
|
32
|
+
p = Pathname.new(ea)
|
33
|
+
Findler::Path.hidden?(p).must_be_true
|
34
|
+
end
|
38
35
|
end
|
39
36
|
|
40
|
-
it
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
37
|
+
it 'skips hidden files by default' do
|
38
|
+
with_tmp_dir do |dir|
|
39
|
+
visible = (dir + rand_alphanumeric).tap { |ea| ea.touch }
|
40
|
+
hidden = (dir + ".#{rand_alphanumeric}").tap { |ea| ea.touch }
|
41
|
+
f = Findler.new(dir)
|
42
|
+
f.send(:viable_path?, visible).must_equal true
|
43
|
+
f.send(:viable_path?, hidden).must_equal false
|
44
|
+
f.include_hidden!
|
45
|
+
f.send(:viable_path?, visible).must_equal true
|
46
|
+
f.send(:viable_path?, hidden).must_equal true
|
47
|
+
end
|
46
48
|
end
|
47
49
|
|
48
|
-
it
|
50
|
+
it 'finds all non-hidden files by default' do
|
49
51
|
with_tree(%W(.jpg .txt)) do |dir|
|
50
52
|
touch_secrets
|
51
53
|
f = Findler.new(dir)
|
52
|
-
collect_files(f.iterator).
|
54
|
+
collect_files(f.iterator).must_equal_contents `find * -type f -not -name '.*'`.split
|
53
55
|
f.exclude_hidden! # should be a no-op
|
54
|
-
collect_files(f.iterator).
|
56
|
+
collect_files(f.iterator).must_equal_contents `find * -type f -not -name '.*'`.split
|
55
57
|
f.include_hidden!
|
56
|
-
collect_files(f.iterator).
|
58
|
+
collect_files(f.iterator).must_equal_contents `find . -type f | sed -e 's/^\\.\\///'`.split
|
57
59
|
end
|
58
60
|
end
|
59
61
|
|
60
|
-
it
|
62
|
+
it 'finds only .jpg files when constrained' do
|
61
63
|
with_tree(%W(.jpg .txt .JPG)) do |dir|
|
62
64
|
f = Findler.new(dir)
|
63
65
|
f.add_extension ".jpg"
|
64
66
|
if fs_case_sensitive?
|
65
67
|
f.case_sensitive!
|
66
|
-
collect_files(f.iterator).
|
68
|
+
collect_files(f.iterator).must_equal_contents `find * -type f -name \\*.jpg`.split
|
67
69
|
end
|
68
70
|
f.case_insensitive!
|
69
|
-
collect_files(f.iterator).
|
71
|
+
collect_files(f.iterator).must_equal_contents `find * -type f -iname \\*.jpg`.split
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
73
|
-
it
|
74
|
-
with_tree(%
|
75
|
+
it 'finds .jpg or .JPG files when constrained' do
|
76
|
+
with_tree(%w(.jpg .txt .JPG)) do |dir|
|
75
77
|
f = Findler.new(dir)
|
76
|
-
f.add_extension
|
78
|
+
f.add_extension '.jpg'
|
77
79
|
f.case_insensitive!
|
78
80
|
iter = f.iterator
|
79
|
-
collect_files(iter).
|
81
|
+
collect_files(iter).must_equal_contents `find * -type f -iname \\*.jpg`.split
|
80
82
|
end
|
81
83
|
end
|
82
84
|
|
83
|
-
it
|
85
|
+
it 'finds files added after iteration started' do
|
84
86
|
with_tree(%W(.txt)) do |dir|
|
85
87
|
f = Findler.new(dir)
|
86
88
|
iter = f.iterator
|
87
89
|
iter.next_file.wont_be_nil
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
sleep(1.1)
|
92
|
-
|
93
|
-
FileUtils.touch(dir + "new.txt")
|
94
|
-
collect_files(iter).must_include("new.txt")
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
it "should find new files after a rescan" do
|
99
|
-
with_tree([".txt", ".no"]) do |dir|
|
100
|
-
f = Findler.new(dir)
|
101
|
-
f.add_extension ".txt"
|
102
|
-
iter = f.iterator
|
103
|
-
collect_files(iter).sort.must_equal `find * -type f -iname \\*.txt`.split.sort
|
104
|
-
FileUtils.touch(dir + "dir-0" + "dir-1" + "new-0.txt")
|
105
|
-
FileUtils.touch(dir + "dir-1" + "dir-0" + "new-1.txt")
|
106
|
-
FileUtils.touch(dir + "dir-2" + "dir-2" + "new-2.txt")
|
107
|
-
collect_files(iter).must_be_empty
|
108
|
-
iter.rescan!
|
109
|
-
collect_files(iter).sort.must_equal ["dir-0/dir-1/new-0.txt", "dir-1/dir-0/new-1.txt", "dir-2/dir-2/new-2.txt"]
|
90
|
+
sleep(1.1) # <- deal with the second-granularity resolution of the filesystem
|
91
|
+
(dir + 'new.txt').touch
|
92
|
+
collect_files(iter).must_include('new.txt')
|
110
93
|
end
|
111
94
|
end
|
112
95
|
|
113
|
-
it
|
114
|
-
with_tree(
|
96
|
+
it 'should not return files removed after iteration started' do
|
97
|
+
with_tree(%w(.txt)) do |dir|
|
115
98
|
f = Findler.new(dir)
|
116
99
|
iter = f.iterator
|
117
100
|
iter.next_file.wont_be_nil
|
118
|
-
sleep(1.1) #
|
119
|
-
|
120
|
-
(dir + "tmp-1.txt").unlink
|
101
|
+
sleep(1.1) # < make sure mtime change will be detected (which only has second resolution)
|
102
|
+
(dir + 'tmp-1.txt').unlink
|
121
103
|
collect_files(iter).wont_include("tmp-1.txt")
|
122
104
|
end
|
123
105
|
end
|
124
106
|
|
125
|
-
|
126
|
-
|
107
|
+
|
108
|
+
it 'dump/loads in the middle of iterating' do
|
109
|
+
with_tree(%w(.jpg .txt .JPG)) do |dir|
|
127
110
|
all_files = `find * -type f -iname \\*.jpg`.split
|
128
111
|
all_files.size.times do |i|
|
129
112
|
f = Findler.new(dir)
|
@@ -131,19 +114,24 @@ describe Findler do
|
|
131
114
|
f.case_insensitive!
|
132
115
|
iter_a = f.iterator
|
133
116
|
files_a = i.times.collect { relative_path(iter_a.path, iter_a.next_file) }
|
134
|
-
iter_b =
|
117
|
+
iter_b = marshal_round_trip(iter_a)
|
135
118
|
files_b = collect_files(iter_b)
|
136
119
|
|
137
|
-
|
138
|
-
|
139
|
-
|
120
|
+
files_a.wont_include_any files_b
|
121
|
+
files_b.wont_include_any files_a
|
122
|
+
(files_a + files_b).must_equal_contents all_files
|
123
|
+
|
124
|
+
# iter_b should be "exhausted" now.
|
125
|
+
collect_files(iter_b).must_be_empty
|
140
126
|
|
141
|
-
|
127
|
+
# and "exhaustion" should survive marshalling:
|
128
|
+
iter_c = marshal_round_trip(iter_b)
|
129
|
+
collect_files(iter_c).must_be_empty
|
142
130
|
end
|
143
131
|
end
|
144
132
|
end
|
145
133
|
|
146
|
-
it
|
134
|
+
it 'creates an iterator even for a non-existent directory' do
|
147
135
|
tmpdir = nil
|
148
136
|
Dir.mktmpdir do |dir|
|
149
137
|
tmpdir = Pathname.new dir
|
@@ -153,7 +141,7 @@ describe Findler do
|
|
153
141
|
collect_files(f.iterator).must_be_empty
|
154
142
|
end
|
155
143
|
|
156
|
-
it
|
144
|
+
it 'raises an error if the block given to next_file returns nil' do
|
157
145
|
Dir.mktmpdir do |dir|
|
158
146
|
f = Findler.new(dir)
|
159
147
|
f.add_filter :no_return
|
@@ -162,7 +150,7 @@ describe Findler do
|
|
162
150
|
end
|
163
151
|
end
|
164
152
|
|
165
|
-
it
|
153
|
+
it 'raises an error if the block returns non-children' do
|
166
154
|
with_tree(%W(.txt)) do |dir|
|
167
155
|
f = Findler.new(dir)
|
168
156
|
f.add_filter :invalid_return
|
@@ -171,32 +159,31 @@ describe Findler do
|
|
171
159
|
end
|
172
160
|
end
|
173
161
|
|
174
|
-
it
|
162
|
+
it 'raises error when filter methods return strings' do
|
175
163
|
with_tree(%W(.txt)) do |dir|
|
176
164
|
f = Findler.new(dir)
|
177
165
|
f.add_filter :files_to_s
|
178
|
-
|
179
|
-
|
180
|
-
files.sort.must_equal `find * -type f`.split.sort
|
166
|
+
i = f.iterator
|
167
|
+
lambda { i.next_file }.must_raise(Findler::Error)
|
181
168
|
end
|
182
169
|
end
|
183
170
|
|
184
|
-
it
|
171
|
+
it 'supports next_file blocks properly' do
|
185
172
|
with_tree(%W(.a .b)) do |dir|
|
186
173
|
Dir["**/*.a"].each { |ea| File.open(ea, 'w') { |f| f.write("hello") } }
|
187
174
|
f = Findler.new(dir)
|
188
175
|
f.add_filter :non_empty_files
|
189
176
|
iter = f.iterator
|
190
177
|
files = collect_files(iter)
|
191
|
-
files.
|
178
|
+
files.must_equal_contents `find * -type f -name \\*.a`.split
|
192
179
|
end
|
193
180
|
end
|
194
181
|
|
195
|
-
it
|
182
|
+
it 'supports files_first ordering' do
|
196
183
|
with_tree(%W(.a), {
|
197
|
-
|
198
|
-
|
199
|
-
|
184
|
+
:depth => 2,
|
185
|
+
:files_per_dir => 2,
|
186
|
+
:subdirs_per_dir => 1,
|
200
187
|
}) do |dir|
|
201
188
|
f = Findler.new(dir)
|
202
189
|
f.add_filters([:order_by_name, :files_first])
|
@@ -207,11 +194,11 @@ describe Findler do
|
|
207
194
|
end
|
208
195
|
end
|
209
196
|
|
210
|
-
it
|
197
|
+
it 'supports directory_first ordering' do
|
211
198
|
with_tree(%W(.a), {
|
212
|
-
|
213
|
-
|
214
|
-
|
199
|
+
:depth => 2,
|
200
|
+
:files_per_dir => 2,
|
201
|
+
:subdirs_per_dir => 1,
|
215
202
|
}) do |dir|
|
216
203
|
f = Findler.new(dir)
|
217
204
|
f.add_filters([:order_by_name, :directories_first])
|
data/test/minitest_helper.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
-
require 'minitest/spec'
|
2
|
-
require 'minitest/reporters'
|
3
1
|
require 'minitest/autorun'
|
2
|
+
require 'minitest/great_expectations'
|
4
3
|
require 'tmpdir'
|
5
4
|
require 'fileutils'
|
6
5
|
require 'findler'
|
7
6
|
|
8
|
-
|
7
|
+
unless ENV['CI']
|
8
|
+
require 'minitest/reporters'
|
9
|
+
MiniTest::Reporters.use!
|
10
|
+
end
|
9
11
|
|
10
12
|
def with_tmp_dir(&block)
|
11
13
|
cwd = Dir.pwd
|
@@ -27,20 +29,20 @@ end
|
|
27
29
|
|
28
30
|
def mk_tree(target_dir, options)
|
29
31
|
opts = {
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
:depth => 3,
|
33
|
+
:files_per_dir => 3,
|
34
|
+
:subdirs_per_dir => 3,
|
35
|
+
:prefix => 'tmp',
|
36
|
+
:suffix => '',
|
37
|
+
:dir_prefix => 'dir',
|
38
|
+
:dir_suffix => ''
|
37
39
|
}.merge options
|
38
40
|
p = target_dir.is_a?(Pathname) ? target_dir : Pathname.new(target_dir)
|
39
41
|
p.mkdir unless p.exist?
|
40
42
|
|
41
43
|
opts[:files_per_dir].times do |i|
|
42
44
|
fname = "#{opts[:prefix]}-#{i}#{opts[:suffix]}"
|
43
|
-
|
45
|
+
(p + fname).touch
|
44
46
|
end
|
45
47
|
return if (opts[:depth] -= 1) <= 0
|
46
48
|
opts[:subdirs_per_dir].times do |i|
|
@@ -58,9 +60,15 @@ def relative_path(parent, pathname)
|
|
58
60
|
pathname.relative_path_from(parent).to_s
|
59
61
|
end
|
60
62
|
|
63
|
+
def marshal_round_trip(iter)
|
64
|
+
output = "#{rand_alphanumeric}.ser"
|
65
|
+
File.open(output, 'wb') { |io| Marshal.dump(iter, io) }
|
66
|
+
Marshal.load(File.open(output, 'rb'))
|
67
|
+
end
|
68
|
+
|
61
69
|
def collect_files(iter)
|
62
70
|
files = []
|
63
|
-
while nxt = iter.next_file
|
71
|
+
while (nxt = iter.next_file)
|
64
72
|
files << relative_path(iter.path, nxt)
|
65
73
|
end
|
66
74
|
files
|
@@ -68,16 +76,25 @@ end
|
|
68
76
|
|
69
77
|
def fs_case_sensitive?
|
70
78
|
@fs_case_sensitive ||= begin
|
71
|
-
|
72
|
-
|
79
|
+
downcase = Pathname.new(rand_alphanumeric.downcase)
|
80
|
+
downcase.touch
|
81
|
+
upcase = Pathname.new(downcase.basename.to_s.upcase)
|
82
|
+
!upcase.exist?
|
73
83
|
ensure
|
74
|
-
|
75
|
-
end
|
84
|
+
downcase.unlink
|
85
|
+
end.tap { |ea| puts "fs_case_sensitive = #{ea}" }
|
76
86
|
end
|
77
87
|
|
78
|
-
ALPHANUMERIC = (('a'..'z').to_a + ('
|
88
|
+
ALPHANUMERIC = (('a'..'z').to_a + ('0'..'9').to_a).freeze
|
89
|
+
|
79
90
|
def rand_alphanumeric(length = 10)
|
80
91
|
(0..length).collect do
|
81
92
|
ALPHANUMERIC[rand(ALPHANUMERIC.length)]
|
82
93
|
end.join
|
83
94
|
end
|
95
|
+
|
96
|
+
class Pathname
|
97
|
+
def touch
|
98
|
+
FileUtils.touch(self.expand_path.to_s)
|
99
|
+
end
|
100
|
+
end
|
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: findler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.7
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Matthew McEachen
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-07-05 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rake
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ! '>='
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -30,7 +27,6 @@ dependencies:
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: yard
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
31
|
- - ! '>='
|
36
32
|
- !ruby/object:Gem::Version
|
@@ -38,7 +34,6 @@ dependencies:
|
|
38
34
|
type: :development
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
38
|
- - ! '>='
|
44
39
|
- !ruby/object:Gem::Version
|
@@ -46,7 +41,6 @@ dependencies:
|
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: minitest
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
45
|
- - ! '>='
|
52
46
|
- !ruby/object:Gem::Version
|
@@ -54,15 +48,13 @@ dependencies:
|
|
54
48
|
type: :development
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
52
|
- - ! '>='
|
60
53
|
- !ruby/object:Gem::Version
|
61
54
|
version: '0'
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
|
-
name: minitest-
|
56
|
+
name: minitest-great_expectations
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
59
|
- - ! '>='
|
68
60
|
- !ruby/object:Gem::Version
|
@@ -70,23 +62,20 @@ dependencies:
|
|
70
62
|
type: :development
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
66
|
- - ! '>='
|
76
67
|
- !ruby/object:Gem::Version
|
77
68
|
version: '0'
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
|
-
name:
|
70
|
+
name: minitest-reporters
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
73
|
- - ! '>='
|
84
74
|
- !ruby/object:Gem::Version
|
85
75
|
version: '0'
|
86
|
-
type: :
|
76
|
+
type: :development
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
80
|
- - ! '>='
|
92
81
|
- !ruby/object:Gem::Version
|
@@ -112,39 +101,33 @@ files:
|
|
112
101
|
- lib/findler/error.rb
|
113
102
|
- lib/findler/filters.rb
|
114
103
|
- lib/findler/iterator.rb
|
104
|
+
- lib/findler/path.rb
|
115
105
|
- lib/findler/version.rb
|
116
106
|
- test/filters_test.rb
|
117
107
|
- test/findler_test.rb
|
118
108
|
- test/minitest_helper.rb
|
119
109
|
homepage: https://github.com/mceachen/findler/
|
120
110
|
licenses: []
|
111
|
+
metadata: {}
|
121
112
|
post_install_message:
|
122
113
|
rdoc_options: []
|
123
114
|
require_paths:
|
124
115
|
- lib
|
125
116
|
required_ruby_version: !ruby/object:Gem::Requirement
|
126
|
-
none: false
|
127
117
|
requirements:
|
128
118
|
- - ! '>='
|
129
119
|
- !ruby/object:Gem::Version
|
130
120
|
version: '0'
|
131
|
-
segments:
|
132
|
-
- 0
|
133
|
-
hash: 1051954001939010091
|
134
121
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
135
|
-
none: false
|
136
122
|
requirements:
|
137
123
|
- - ! '>='
|
138
124
|
- !ruby/object:Gem::Version
|
139
125
|
version: '0'
|
140
|
-
segments:
|
141
|
-
- 0
|
142
|
-
hash: 1051954001939010091
|
143
126
|
requirements: []
|
144
127
|
rubyforge_project:
|
145
|
-
rubygems_version:
|
128
|
+
rubygems_version: 2.0.3
|
146
129
|
signing_key:
|
147
|
-
specification_version:
|
130
|
+
specification_version: 4
|
148
131
|
summary: Findler is a stateful filesystem iterator
|
149
132
|
test_files:
|
150
133
|
- test/filters_test.rb
|