fuzzy_file_finder 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,4 @@
1
+ All code, documentation, and other data distributed with this project
2
+ is released in the PUBLIC DOMAIN, by the author, Jamis Buck. Anyone,
3
+ anywhere, is allowed to use, modify, and/or redistribute any of this
4
+ without restriction.
@@ -0,0 +1,4 @@
1
+ lib/fuzzy_file_finder.rb
2
+ LICENSE
3
+ Manifest
4
+ README.rdoc
@@ -0,0 +1,36 @@
1
+ = FuzzyFileFinder
2
+
3
+ FuzzyFileFinder is a (somewhat improved) implementation of TextMate's "cmd-T" functionality. It allows you to search for a file by specifying a pattern of characters that appear in that file's name. Unlike TextMate, FuzzyFileFinder also lets you match against the file's directory, so you can more easily scope your search.
4
+
5
+ == FEATURES:
6
+
7
+ * Quickly search directory trees for files
8
+ * Avoids accidentally scanning huge directories by implementing a ceiling (default 10,000 entries)
9
+ * Simple highlighting of matches to discover how a pattern matched
10
+
11
+ == SYNOPSIS:
12
+
13
+ In a nutshell:
14
+
15
+ require 'fuzzy_file_finder'
16
+
17
+ finder = FuzzyFileFinder.new
18
+
19
+ finder.search "app/blogcon" do |match|
20
+ puts "[%5d] %s" % [match[:score] * 10000, match[:highlighted_path]]
21
+ end
22
+
23
+ matches = finder.find("app/blogcon").sort_by { |m| [-m[:score], m[:path] }
24
+ matches.each do |match|
25
+ puts "[%5d] %s" % [match[:score] * 10000, match[:highlighted_path]]
26
+ end
27
+
28
+ See FuzzyFileFinder for more documentation, and links to further information.
29
+
30
+ == INSTALL:
31
+
32
+ * gem install --source=http://gems.github.com jamis-fuzzy_file_finder
33
+
34
+ == LICENSE:
35
+
36
+ All code, documentation, and related materials in this project are released into the PUBLIC DOMAIN. Usage, modification, and distribution are allowed without restriction.
@@ -0,0 +1,19 @@
1
+ begin
2
+ require 'echoe'
3
+ rescue LoadError
4
+ abort "You'll need to have `echoe' installed to use Net::SSH's Rakefile"
5
+ end
6
+
7
+ require './lib/fuzzy_file_finder'
8
+ version = FuzzyFileFinder::Version::STRING.dup
9
+ if ENV['SNAPSHOT'].to_i == 1
10
+ version << "." << Time.now.utc.strftime("%Y%m%d%H%M%S")
11
+ end
12
+
13
+ Echoe.new('fuzzy_file_finder', version) do |p|
14
+ p.author = "Jamis Buck"
15
+ p.email = "jamis@jamisbuck.org"
16
+ p.summary = "an implementation of TextMate's cmd-T search functionality"
17
+
18
+ p.rdoc_pattern = /^(lib|README.rdoc)/
19
+ end
@@ -0,0 +1,30 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{fuzzy_file_finder}
5
+ s.version = "1.0.4"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Jamis Buck"]
9
+ s.date = %q{2009-12-11}
10
+ s.description = %q{an implementation of TextMate's cmd-T search functionality}
11
+ s.email = %q{jamis@jamisbuck.org}
12
+ s.extra_rdoc_files = ["lib/fuzzy_file_finder.rb", "README.rdoc"]
13
+ s.files = ["lib/fuzzy_file_finder.rb", "LICENSE", "Manifest", "README.rdoc", "fuzzy_file_finder.gemspec", "Rakefile"]
14
+ s.homepage = %q{}
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Fuzzy_file_finder", "--main", "README.rdoc"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{fuzzy_file_finder}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{an implementation of TextMate's cmd-T search functionality}
20
+
21
+ if s.respond_to? :specification_version then
22
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
+ s.specification_version = 3
24
+
25
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
26
+ else
27
+ end
28
+ else
29
+ end
30
+ end
@@ -0,0 +1,354 @@
1
+ #--
2
+ # ==================================================================
3
+ # Author: Jamis Buck (jamis@jamisbuck.org)
4
+ # Date: 2008-10-09
5
+ #
6
+ # This file is in the public domain. Usage, modification, and
7
+ # redistribution of this file are unrestricted.
8
+ # ==================================================================
9
+ #++
10
+
11
+ # The "fuzzy" file finder provides a way for searching a directory
12
+ # tree with only a partial name. This is similar to the "cmd-T"
13
+ # feature in TextMate (http://macromates.com).
14
+ #
15
+ # Usage:
16
+ #
17
+ # finder = FuzzyFileFinder.new
18
+ # finder.search("app/blogcon") do |match|
19
+ # puts match[:highlighted_path]
20
+ # end
21
+ #
22
+ # In the above example, all files matching "app/blogcon" will be
23
+ # yielded to the block. The given pattern is reduced to a regular
24
+ # expression internally, so that any file that contains those
25
+ # characters in that order (even if there are other characters
26
+ # in between) will match.
27
+ #
28
+ # In other words, "app/blogcon" would match any of the following
29
+ # (parenthesized strings indicate how the match was made):
30
+ #
31
+ # * (app)/controllers/(blog)_(con)troller.rb
32
+ # * lib/c(ap)_(p)ool/(bl)ue_(o)r_(g)reen_(co)loratio(n)
33
+ # * test/(app)/(blog)_(con)troller_test.rb
34
+ #
35
+ # And so forth.
36
+ class FuzzyFileFinder
37
+ module Version
38
+ MAJOR = 1
39
+ MINOR = 0
40
+ TINY = 4
41
+ STRING = [MAJOR, MINOR, TINY].join(".")
42
+ end
43
+
44
+ # This is the exception that is raised if you try to scan a
45
+ # directory tree with too many entries. By default, a ceiling of
46
+ # 10,000 entries is enforced, but you can change that number via
47
+ # the +ceiling+ parameter to FuzzyFileFinder.new.
48
+ class TooManyEntries < RuntimeError; end
49
+
50
+ # Used internally to represent a run of characters within a
51
+ # match. This is used to build the highlighted version of
52
+ # a file name.
53
+ class CharacterRun < Struct.new(:string, :inside) #:nodoc:
54
+ def to_s
55
+ if inside
56
+ "(#{string})"
57
+ else
58
+ string
59
+ end
60
+ end
61
+ end
62
+
63
+ # Used internally to represent a file within the directory tree.
64
+ class FileSystemEntry #:nodoc:
65
+ attr_reader :parent
66
+ attr_reader :name
67
+
68
+ def initialize(parent, name)
69
+ @parent = parent
70
+ @name = name
71
+ end
72
+
73
+ def path
74
+ File.join(parent.name, name)
75
+ end
76
+ end
77
+
78
+ # Used internally to represent a subdirectory within the directory
79
+ # tree.
80
+ class Directory #:nodoc:
81
+ attr_reader :name
82
+
83
+ def initialize(name, is_root=false)
84
+ @name = name
85
+ @is_root = is_root
86
+ end
87
+
88
+ def root?
89
+ is_root
90
+ end
91
+ end
92
+
93
+ # The roots directory trees to search.
94
+ attr_reader :roots
95
+
96
+ # The list of files beneath all +roots+
97
+ attr_reader :files
98
+
99
+ # The maximum number of files beneath all +roots+
100
+ attr_reader :ceiling
101
+
102
+ # The prefix shared by all +roots+.
103
+ attr_reader :shared_prefix
104
+
105
+ # The list of glob patterns to ignore.
106
+ attr_reader :ignores
107
+
108
+ # Initializes a new FuzzyFileFinder. This will scan the
109
+ # given +directories+, using +ceiling+ as the maximum number
110
+ # of entries to scan. If there are more than +ceiling+ entries
111
+ # a TooManyEntries exception will be raised.
112
+ def initialize(directories=['.'], ceiling=10_000, ignores=nil)
113
+ directories = Array(directories)
114
+ directories << "." if directories.empty?
115
+
116
+ # expand any paths with ~
117
+ root_dirnames = directories.map { |d| File.expand_path(d) }.select { |d| File.directory?(d) }.uniq
118
+
119
+ @roots = root_dirnames.map { |d| Directory.new(d, true) }
120
+ @shared_prefix = determine_shared_prefix
121
+ @shared_prefix_re = Regexp.new("^#{Regexp.escape(shared_prefix)}" + (shared_prefix.empty? ? "" : "/"))
122
+
123
+ @files = []
124
+ @ceiling = ceiling
125
+
126
+ @ignores = Array(ignores)
127
+
128
+ rescan!
129
+ end
130
+
131
+ # Rescans the subtree. If the directory contents every change,
132
+ # you'll need to call this to force the finder to be aware of
133
+ # the changes.
134
+ def rescan!
135
+ @files.clear
136
+ roots.each { |root| follow_tree(root) }
137
+ end
138
+
139
+ # Takes the given +pattern+ (which must be a string) and searches
140
+ # all files beneath +root+, yielding each match.
141
+ #
142
+ # +pattern+ is interpreted thus:
143
+ #
144
+ # * "foo" : look for any file with the characters 'f', 'o', and 'o'
145
+ # in its basename (discounting directory names). The characters
146
+ # must be in that order.
147
+ # * "foo/bar" : look for any file with the characters 'b', 'a',
148
+ # and 'r' in its basename (discounting directory names). Also,
149
+ # any successful match must also have at least one directory
150
+ # element matching the characters 'f', 'o', and 'o' (in that
151
+ # order.
152
+ # * "foo/bar/baz" : same as "foo/bar", but matching two
153
+ # directory elements in addition to a file name of "baz".
154
+ #
155
+ # Each yielded match will be a hash containing the following keys:
156
+ #
157
+ # * :path refers to the full path to the file
158
+ # * :directory refers to the directory of the file
159
+ # * :name refers to the name of the file (without directory)
160
+ # * :highlighted_directory refers to the directory of the file with
161
+ # matches highlighted in parentheses.
162
+ # * :highlighted_name refers to the name of the file with matches
163
+ # highlighted in parentheses
164
+ # * :highlighted_path refers to the full path of the file with
165
+ # matches highlighted in parentheses
166
+ # * :abbr refers to an abbreviated form of :highlighted_path, where
167
+ # path segments without matches are compressed to just their first
168
+ # character.
169
+ # * :score refers to a value between 0 and 1 indicating how closely
170
+ # the file matches the given pattern. A score of 1 means the
171
+ # pattern matches the file exactly.
172
+ def search(pattern, &block)
173
+ pattern.gsub!(" ", "")
174
+ path_parts = pattern.split("/")
175
+ path_parts.push "" if pattern[-1,1] == "/"
176
+
177
+ file_name_part = path_parts.pop || ""
178
+
179
+ if path_parts.any?
180
+ path_regex_raw = "^(.*?)" + path_parts.map { |part| make_pattern(part) }.join("(.*?/.*?)") + "(.*?)$"
181
+ path_regex = Regexp.new(path_regex_raw, Regexp::IGNORECASE)
182
+ end
183
+
184
+ file_regex_raw = "^(.*?)" << make_pattern(file_name_part) << "(.*)$"
185
+ file_regex = Regexp.new(file_regex_raw, Regexp::IGNORECASE)
186
+
187
+ path_matches = {}
188
+ files.each do |file|
189
+ path_match = match_path(file.parent, path_matches, path_regex, path_parts.length)
190
+ next if path_match[:missed]
191
+
192
+ match_file(file, file_regex, path_match, &block)
193
+ end
194
+ end
195
+
196
+ # Takes the given +pattern+ (which must be a string, formatted as
197
+ # described in #search), and returns up to +max+ matches in an
198
+ # Array. If +max+ is nil, all matches will be returned.
199
+ def find(pattern, max=nil)
200
+ results = []
201
+ search(pattern) do |match|
202
+ results << match
203
+ break if max && results.length >= max
204
+ end
205
+ return results
206
+ end
207
+
208
+ # Displays the finder object in a sane, non-explosive manner.
209
+ def inspect #:nodoc:
210
+ "#<%s:0x%x roots=%s, files=%d>" % [self.class.name, object_id, roots.map { |r| r.name.inspect }.join(", "), files.length]
211
+ end
212
+
213
+ private
214
+
215
+ # Recursively scans +directory+ and all files and subdirectories
216
+ # beneath it, depth-first.
217
+ def follow_tree(directory)
218
+ Dir.entries(directory.name).each do |entry|
219
+ next if entry[0,1] == "."
220
+ next if ignore?(directory.name) # Ignore whole directory hierarchies
221
+ raise TooManyEntries if files.length > ceiling
222
+
223
+ full = File.join(directory.name, entry)
224
+
225
+ if File.directory?(full)
226
+ follow_tree(Directory.new(full))
227
+ elsif !ignore?(full.sub(@shared_prefix_re, ""))
228
+ files.push(FileSystemEntry.new(directory, entry))
229
+ end
230
+ end
231
+ end
232
+
233
+ # Returns +true+ if the given name matches any of the ignore
234
+ # patterns.
235
+ def ignore?(name)
236
+ ignores.any? { |pattern| File.fnmatch(pattern, name) }
237
+ end
238
+
239
+ # Takes the given pattern string "foo" and converts it to a new
240
+ # string "(f)([^/]*?)(o)([^/]*?)(o)" that can be used to create
241
+ # a regular expression.
242
+ def make_pattern(pattern)
243
+ pattern = pattern.split(//)
244
+ pattern << "" if pattern.empty?
245
+
246
+ pattern.inject("") do |regex, character|
247
+ regex << "([^/]*?)" if regex.length > 0
248
+ regex << "(" << Regexp.escape(character) << ")"
249
+ end
250
+ end
251
+
252
+ # Given a MatchData object +match+ and a number of "inside"
253
+ # segments to support, compute both the match score and the
254
+ # highlighted match string. The "inside segments" refers to how
255
+ # many patterns were matched in this one match. For a file name,
256
+ # this will always be one. For directories, it will be one for
257
+ # each directory segment in the original pattern.
258
+ def build_match_result(match, inside_segments)
259
+ runs = []
260
+ inside_chars = total_chars = 0
261
+ match.captures.each_with_index do |capture, index|
262
+ if capture.length > 0
263
+ # odd-numbered captures are matches inside the pattern.
264
+ # even-numbered captures are matches between the pattern's elements.
265
+ inside = index % 2 != 0
266
+
267
+ total_chars += capture.gsub(%r(/), "").length # ignore '/' delimiters
268
+ inside_chars += capture.length if inside
269
+
270
+ if runs.last && runs.last.inside == inside
271
+ runs.last.string << capture
272
+ else
273
+ runs << CharacterRun.new(capture, inside)
274
+ end
275
+ end
276
+ end
277
+
278
+ # Determine the score of this match.
279
+ # 1. fewer "inside runs" (runs corresponding to the original pattern)
280
+ # is better.
281
+ # 2. better coverage of the actual path name is better
282
+
283
+ inside_runs = runs.select { |r| r.inside }
284
+ run_ratio = inside_runs.length.zero? ? 1 : inside_segments / inside_runs.length.to_f
285
+
286
+ char_ratio = total_chars.zero? ? 1 : inside_chars.to_f / total_chars
287
+
288
+ score = run_ratio * char_ratio
289
+
290
+ return { :score => score, :result => runs.join }
291
+ end
292
+
293
+ # Match the given path against the regex, caching the result in +path_matches+.
294
+ # If +path+ is already cached in the path_matches cache, just return the cached
295
+ # value.
296
+ def match_path(path, path_matches, path_regex, path_segments)
297
+ return path_matches[path] if path_matches.key?(path)
298
+
299
+ name_with_slash = path.name + "/" # add a trailing slash for matching the prefix
300
+ matchable_name = name_with_slash.sub(@shared_prefix_re, "")
301
+ matchable_name.chop! # kill the trailing slash
302
+
303
+ if path_regex
304
+ match = matchable_name.match(path_regex)
305
+
306
+ path_matches[path] =
307
+ match && build_match_result(match, path_segments) ||
308
+ { :score => 1, :result => matchable_name, :missed => true }
309
+ else
310
+ path_matches[path] = { :score => 1, :result => matchable_name }
311
+ end
312
+ end
313
+
314
+ # Match +file+ against +file_regex+. If it matches, yield the match
315
+ # metadata to the block.
316
+ def match_file(file, file_regex, path_match, &block)
317
+ if file_match = file.name.match(file_regex)
318
+ match_result = build_match_result(file_match, 1)
319
+ full_match_result = path_match[:result].empty? ? match_result[:result] : File.join(path_match[:result], match_result[:result])
320
+ shortened_path = path_match[:result].gsub(/[^\/]+/) { |m| m.index("(") ? m : m[0,1] }
321
+ abbr = shortened_path.empty? ? match_result[:result] : File.join(shortened_path, match_result[:result])
322
+
323
+ result = { :path => file.path,
324
+ :abbr => abbr,
325
+ :directory => file.parent.name,
326
+ :name => file.name,
327
+ :highlighted_directory => path_match[:result],
328
+ :highlighted_name => match_result[:result],
329
+ :highlighted_path => full_match_result,
330
+ :score => path_match[:score] * match_result[:score] }
331
+ yield result
332
+ end
333
+ end
334
+
335
+ def determine_shared_prefix
336
+ # the common case: if there is only a single root, then the entire
337
+ # name of the root is the shared prefix.
338
+ return roots.first.name if roots.length == 1
339
+
340
+ split_roots = roots.map { |root| root.name.split(%r{/}) }
341
+ segments = split_roots.map { |root| root.length }.max
342
+ master = split_roots.pop
343
+
344
+ segments.times do |segment|
345
+ if !split_roots.all? { |root| root[segment] == master[segment] }
346
+ return master[0,segment].join("/")
347
+ end
348
+ end
349
+
350
+ # shouldn't ever get here, since we uniq the root list before
351
+ # calling this method, but if we do, somehow...
352
+ return roots.first.name
353
+ end
354
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fuzzy_file_finder
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.4
5
+ platform: ruby
6
+ authors:
7
+ - Jamis Buck
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-11 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: an implementation of TextMate's cmd-T search functionality
17
+ email: jamis@jamisbuck.org
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - lib/fuzzy_file_finder.rb
24
+ - README.rdoc
25
+ files:
26
+ - lib/fuzzy_file_finder.rb
27
+ - LICENSE
28
+ - Manifest
29
+ - README.rdoc
30
+ - fuzzy_file_finder.gemspec
31
+ - Rakefile
32
+ has_rdoc: true
33
+ homepage: ""
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --line-numbers
39
+ - --inline-source
40
+ - --title
41
+ - Fuzzy_file_finder
42
+ - --main
43
+ - README.rdoc
44
+ require_paths:
45
+ - lib
46
+ required_ruby_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ version:
52
+ required_rubygems_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "1.2"
57
+ version:
58
+ requirements: []
59
+
60
+ rubyforge_project: fuzzy_file_finder
61
+ rubygems_version: 1.3.5
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: an implementation of TextMate's cmd-T search functionality
65
+ test_files: []
66
+