command-t 3.0.2 → 4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -2
  3. data/doc/command-t.txt +312 -147
  4. data/ruby/command-t.rb +13 -12
  5. data/ruby/command-t/controller.rb +86 -15
  6. data/ruby/command-t/depend +4 -0
  7. data/ruby/command-t/ext.h +9 -2
  8. data/ruby/command-t/extconf.rb +2 -2
  9. data/ruby/command-t/finder.rb +6 -2
  10. data/ruby/command-t/finder/buffer_finder.rb +3 -3
  11. data/ruby/command-t/finder/command_finder.rb +23 -0
  12. data/ruby/command-t/finder/file_finder.rb +3 -3
  13. data/ruby/command-t/finder/help_finder.rb +25 -0
  14. data/ruby/command-t/finder/history_finder.rb +27 -0
  15. data/ruby/command-t/finder/jump_finder.rb +3 -3
  16. data/ruby/command-t/finder/line_finder.rb +23 -0
  17. data/ruby/command-t/finder/mru_buffer_finder.rb +3 -3
  18. data/ruby/command-t/finder/tag_finder.rb +3 -3
  19. data/ruby/command-t/heap.c +146 -0
  20. data/ruby/command-t/heap.h +22 -0
  21. data/ruby/command-t/match.c +183 -116
  22. data/ruby/command-t/match.h +16 -10
  23. data/ruby/command-t/match_window.rb +10 -1
  24. data/ruby/command-t/matcher.c +203 -63
  25. data/ruby/command-t/metadata/fallback.rb +2 -2
  26. data/ruby/command-t/mru.rb +2 -2
  27. data/ruby/command-t/path_utilities.rb +2 -2
  28. data/ruby/command-t/progress_reporter.rb +38 -0
  29. data/ruby/command-t/prompt.rb +4 -4
  30. data/ruby/command-t/scanner.rb +22 -2
  31. data/ruby/command-t/scanner/buffer_scanner.rb +3 -3
  32. data/ruby/command-t/scanner/command_scanner.rb +33 -0
  33. data/ruby/command-t/scanner/file_scanner.rb +30 -6
  34. data/ruby/command-t/scanner/file_scanner/find_file_scanner.rb +12 -7
  35. data/ruby/command-t/scanner/file_scanner/git_file_scanner.rb +11 -8
  36. data/ruby/command-t/scanner/file_scanner/ruby_file_scanner.rb +7 -4
  37. data/ruby/command-t/scanner/file_scanner/watchman_file_scanner.rb +13 -5
  38. data/ruby/command-t/scanner/help_scanner.rb +40 -0
  39. data/ruby/command-t/scanner/history_scanner.rb +24 -0
  40. data/ruby/command-t/scanner/jump_scanner.rb +3 -3
  41. data/ruby/command-t/scanner/line_scanner.rb +45 -0
  42. data/ruby/command-t/scanner/mru_buffer_scanner.rb +3 -3
  43. data/ruby/command-t/scanner/tag_scanner.rb +3 -3
  44. data/ruby/command-t/scm_utilities.rb +2 -2
  45. data/ruby/command-t/settings.rb +2 -2
  46. data/ruby/command-t/stub.rb +7 -2
  47. data/ruby/command-t/util.rb +2 -2
  48. data/ruby/command-t/vim.rb +27 -2
  49. data/ruby/command-t/vim/screen.rb +3 -3
  50. data/ruby/command-t/vim/window.rb +3 -3
  51. data/ruby/command-t/watchman.c +1 -1
  52. metadata +13 -2
@@ -0,0 +1,22 @@
1
+ // Copyright 2016-present Greg Hurrell. All rights reserved.
2
+ // Licensed under the terms of the BSD 2-clause license.
3
+
4
+ /**
5
+ * A fixed size min-heap implementation.
6
+ */
7
+
8
+ typedef int (*heap_compare_entries)(const void *a, const void *b);
9
+
10
+ typedef struct {
11
+ long count;
12
+ long capacity;
13
+ void **entries;
14
+ heap_compare_entries comparator;
15
+ } heap_t;
16
+
17
+ #define HEAP_PEEK(heap) (heap->entries[0])
18
+
19
+ heap_t *heap_new(long capacity, heap_compare_entries comparator);
20
+ void heap_free(heap_t *heap);
21
+ void heap_insert(heap_t *heap, void *value);
22
+ void *heap_extract(heap_t *heap);
@@ -6,171 +6,238 @@
6
6
  #include "ext.h"
7
7
  #include "ruby_compat.h"
8
8
 
9
- // use a struct to make passing params during recursion easier
9
+ #define UNSET_SCORE FLT_MAX
10
+
11
+ // Use a struct to make passing params during recursion easier.
10
12
  typedef struct {
11
- char *haystack_p; // pointer to the path string to be searched
12
- long haystack_len; // length of same
13
- char *needle_p; // pointer to search string (needle)
14
- long needle_len; // length of same
15
- double max_score_per_char;
16
- int always_show_dot_files; // boolean
17
- int never_show_dot_files; // boolean
18
- int case_sensitive; // boolean
19
- int recurse; // boolean
20
- double *memo; // memoization
13
+ char *haystack_p; // Pointer to the path string to be searched.
14
+ long haystack_len; // Length of same.
15
+ char *needle_p; // Pointer to search string (needle).
16
+ long needle_len; // Length of same.
17
+ long *rightmost_match_p; // Rightmost match for each char in needle.
18
+ float max_score_per_char;
19
+ int always_show_dot_files; // Boolean.
20
+ int never_show_dot_files; // Boolean.
21
+ int case_sensitive; // Boolean.
22
+ int recurse; // Boolean.
23
+ float *memo; // Memoization.
21
24
  } matchinfo_t;
22
25
 
23
- double recursive_match(matchinfo_t *m, // sharable meta-data
24
- long haystack_idx, // where in the path string to start
25
- long needle_idx, // where in the needle string to start
26
- long last_idx, // location of last matched character
27
- double score) // cumulative score so far
28
- {
29
- double score_for_char;
30
- double seen_score = 0; // remember best score seen via recursion
31
- int found;
32
- long i, j, distance;
33
- long memo_idx = haystack_idx;
34
-
35
- // do we have a memoized result we can return?
36
- double memoized = m->memo[needle_idx * m->needle_len + memo_idx];
37
- if (memoized != DBL_MAX)
38
- return memoized;
39
-
40
- // bail early if not enough room (left) in haystack for (rest of) needle
41
- if (m->haystack_len - haystack_idx < m->needle_len - needle_idx) {
42
- score = 0.0;
43
- goto memoize;
44
- }
45
-
26
+ float recursive_match(
27
+ matchinfo_t *m, // Sharable meta-data.
28
+ long haystack_idx, // Where in the path string to start.
29
+ long needle_idx, // Where in the needle string to start.
30
+ long last_idx, // Location of last matched character.
31
+ float score // Cumulative score so far.
32
+ ) {
33
+ long distance, i, j;
34
+ float *memoized = NULL;
35
+ float score_for_char;
36
+ float seen_score = 0;
37
+
38
+ // Iterate over needle.
46
39
  for (i = needle_idx; i < m->needle_len; i++) {
47
- char c = m->needle_p[i];
48
- found = 0;
49
-
50
- // similar to above, we'll stop iterating when we know we're too close
51
- // to the end of the string to possibly match
52
- for (j = haystack_idx;
53
- j <= m->haystack_len - (m->needle_len - i);
54
- j++, haystack_idx++) {
55
- char d = m->haystack_p[j];
40
+ // Iterate over (valid range of) haystack.
41
+ for (j = haystack_idx; j <= m->rightmost_match_p[i]; j++) {
42
+ char c, d;
43
+
44
+ // Do we have a memoized result we can return?
45
+ memoized = &m->memo[j * m->needle_len + i];
46
+ if (*memoized != UNSET_SCORE) {
47
+ return *memoized > seen_score ? *memoized : seen_score;
48
+ }
49
+ c = m->needle_p[i];
50
+ d = m->haystack_p[j];
56
51
  if (d == '.') {
57
- if (j == 0 || m->haystack_p[j - 1] == '/') { // this is a dot-file
58
- int dot_search = (i == 0 && c == '.'); // searching for a dot
59
- if (m->never_show_dot_files || (!dot_search && !m->always_show_dot_files)) {
60
- score = 0.0;
61
- goto memoize;
52
+ if (j == 0 || m->haystack_p[j - 1] == '/') { // This is a dot-file.
53
+ int dot_search = c == '.'; // Searching for a dot.
54
+ if (
55
+ m->never_show_dot_files ||
56
+ (!dot_search && !m->always_show_dot_files)
57
+ ) {
58
+ return *memoized = 0.0;
62
59
  }
63
60
  }
64
61
  } else if (d >= 'A' && d <= 'Z' && !m->case_sensitive) {
65
- d += 'a' - 'A'; // add 32 to downcase
62
+ d += 'a' - 'A'; // Add 32 to downcase.
66
63
  }
67
64
 
68
65
  if (c == d) {
69
- found = 1;
70
-
71
- // calculate score
66
+ // Calculate score.
67
+ float sub_score = 0;
72
68
  score_for_char = m->max_score_per_char;
73
69
  distance = j - last_idx;
74
70
 
75
71
  if (distance > 1) {
76
- double factor = 1.0;
72
+ float factor = 1.0;
77
73
  char last = m->haystack_p[j - 1];
78
- char curr = m->haystack_p[j]; // case matters, so get again
79
- if (last == '/')
74
+ char curr = m->haystack_p[j]; // Case matters, so get again.
75
+ if (last == '/') {
80
76
  factor = 0.9;
81
- else if (last == '-' ||
82
- last == '_' ||
83
- last == ' ' ||
84
- (last >= '0' && last <= '9'))
77
+ } else if (
78
+ last == '-' ||
79
+ last == '_' ||
80
+ last == ' ' ||
81
+ (last >= '0' && last <= '9')
82
+ ) {
85
83
  factor = 0.8;
86
- else if (last >= 'a' && last <= 'z' &&
87
- curr >= 'A' && curr <= 'Z')
84
+ } else if (
85
+ last >= 'a' && last <= 'z' &&
86
+ curr >= 'A' && curr <= 'Z'
87
+ ) {
88
88
  factor = 0.8;
89
- else if (last == '.')
89
+ } else if (last == '.') {
90
90
  factor = 0.7;
91
- else
92
- // if no "special" chars behind char, factor diminishes
93
- // as distance from last matched char increases
91
+ } else {
92
+ // If no "special" chars behind char, factor diminishes
93
+ // as distance from last matched char increases.
94
94
  factor = (1.0 / distance) * 0.75;
95
+ }
95
96
  score_for_char *= factor;
96
97
  }
97
98
 
98
- if (++j < m->haystack_len && m->recurse) {
99
- // bump cursor one char to the right and
100
- // use recursion to try and find a better match
101
- double sub_score = recursive_match(m, j, i, last_idx, score);
102
- if (sub_score > seen_score)
99
+ if (j < m->rightmost_match_p[i] && m->recurse) {
100
+ sub_score = recursive_match(m, j + 1, i, last_idx, score);
101
+ if (sub_score > seen_score) {
103
102
  seen_score = sub_score;
103
+ }
104
104
  }
105
-
105
+ last_idx = j;
106
+ haystack_idx = last_idx + 1;
106
107
  score += score_for_char;
107
- last_idx = haystack_idx++;
108
- break;
108
+ *memoized = seen_score > score ? seen_score : score;
109
+ if (i == m->needle_len - 1) {
110
+ // Whole string matched.
111
+ return *memoized;
112
+ }
113
+ if (!m->recurse) {
114
+ break;
115
+ }
109
116
  }
110
117
  }
111
- if (!found) {
112
- score = 0.0;
113
- goto memoize;
114
- }
115
118
  }
116
-
117
- score = score > seen_score ? score : seen_score;
118
-
119
- memoize:
120
- m->memo[needle_idx * m->needle_len + memo_idx] = score;
121
- return score;
119
+ return *memoized = score;
122
120
  }
123
121
 
124
- void calculate_match(VALUE str,
125
- VALUE needle,
126
- VALUE case_sensitive,
127
- VALUE always_show_dot_files,
128
- VALUE never_show_dot_files,
129
- VALUE recurse,
130
- match_t *out)
131
- {
132
- long i, max;
133
- double score;
122
+ float calculate_match(
123
+ VALUE haystack,
124
+ VALUE needle,
125
+ VALUE case_sensitive,
126
+ VALUE always_show_dot_files,
127
+ VALUE never_show_dot_files,
128
+ VALUE recurse,
129
+ long needle_bitmask,
130
+ long *haystack_bitmask
131
+ ) {
134
132
  matchinfo_t m;
135
- m.haystack_p = RSTRING_PTR(str);
136
- m.haystack_len = RSTRING_LEN(str);
133
+ long i;
134
+ float score = 1.0;
135
+ int compute_bitmasks = *haystack_bitmask == UNSET_BITMASK;
136
+ m.haystack_p = RSTRING_PTR(haystack);
137
+ m.haystack_len = RSTRING_LEN(haystack);
137
138
  m.needle_p = RSTRING_PTR(needle);
138
139
  m.needle_len = RSTRING_LEN(needle);
140
+ m.rightmost_match_p = NULL;
139
141
  m.max_score_per_char = (1.0 / m.haystack_len + 1.0 / m.needle_len) / 2;
140
142
  m.always_show_dot_files = always_show_dot_files == Qtrue;
141
143
  m.never_show_dot_files = never_show_dot_files == Qtrue;
142
144
  m.case_sensitive = (int)case_sensitive;
143
145
  m.recurse = recurse == Qtrue;
144
146
 
145
- // calculate score
146
- score = 1.0;
147
-
148
- // special case for zero-length search string
147
+ // Special case for zero-length search string.
149
148
  if (m.needle_len == 0) {
150
-
151
- // filter out dot files
152
- if (!m.always_show_dot_files) {
149
+ // Filter out dot files.
150
+ if (m.never_show_dot_files || !m.always_show_dot_files) {
153
151
  for (i = 0; i < m.haystack_len; i++) {
154
152
  char c = m.haystack_p[i];
155
-
156
153
  if (c == '.' && (i == 0 || m.haystack_p[i - 1] == '/')) {
157
- score = 0.0;
158
- break;
154
+ return 0.0;
159
155
  }
160
156
  }
161
157
  }
162
- } else if (m.haystack_len > 0) { // normal case
158
+ } else {
159
+ long haystack_limit;
160
+ long memo_size;
161
+ long needle_idx;
162
+ long mask;
163
+ long rightmost_match_p[m.needle_len];
164
+
165
+ if (*haystack_bitmask != UNSET_BITMASK) {
166
+ if ((needle_bitmask & *haystack_bitmask) != needle_bitmask) {
167
+ return 0.0;
168
+ }
169
+ }
163
170
 
164
- // prepare for memoization
165
- double memo[m.haystack_len * m.needle_len];
166
- for (i = 0, max = m.haystack_len * m.needle_len; i < max; i++)
167
- memo[i] = DBL_MAX;
168
- m.memo = memo;
171
+ // Pre-scan string:
172
+ // - Bail if it can't match at all.
173
+ // - Record rightmost match for each character (prune search space).
174
+ // - Record bitmask for haystack to speed up future searches.
175
+ m.rightmost_match_p = rightmost_match_p;
176
+ needle_idx = m.needle_len - 1;
177
+ mask = 0;
178
+ for (i = m.haystack_len - 1; i >= 0; i--) {
179
+ char c = m.haystack_p[i];
180
+ char lower = c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c;
181
+ if (!m.case_sensitive) {
182
+ c = lower;
183
+ }
184
+ if (compute_bitmasks) {
185
+ mask |= (1 << (lower - 'a'));
186
+ }
169
187
 
170
- score = recursive_match(&m, 0, 0, 0, 0.0);
171
- }
188
+ if (needle_idx >= 0) {
189
+ char d = m.needle_p[needle_idx];
190
+ if (c == d) {
191
+ rightmost_match_p[needle_idx] = i;
192
+ needle_idx--;
193
+ }
194
+ }
195
+ }
196
+ if (compute_bitmasks) {
197
+ *haystack_bitmask = mask;
198
+ }
199
+ if (needle_idx != -1) {
200
+ return 0.0;
201
+ }
172
202
 
173
- // final book-keeping
174
- out->path = str;
175
- out->score = score;
203
+ // Prepare for memoization.
204
+ haystack_limit = rightmost_match_p[m.needle_len - 1] + 1;
205
+ memo_size = m.needle_len * haystack_limit;
206
+ {
207
+ float memo[memo_size];
208
+ for (i = 0; i < memo_size; i++) {
209
+ memo[i] = UNSET_SCORE;
210
+ }
211
+ m.memo = memo;
212
+ score = recursive_match(&m, 0, 0, 0, 0.0);
213
+
214
+ #ifdef DEBUG
215
+ fprintf(stdout, " ");
216
+ for (i = 0; i < m.needle_len; i++) {
217
+ fprintf(stdout, " %c ", m.needle_p[i]);
218
+ }
219
+ fprintf(stdout, "\n");
220
+ for (i = 0; i < memo_size; i++) {
221
+ char formatted[8];
222
+ if (i % m.needle_len == 0) {
223
+ long haystack_idx = i / m.needle_len;
224
+ fprintf(stdout, "%c: ", m.haystack_p[haystack_idx]);
225
+ }
226
+ if (memo[i] == UNSET_SCORE) {
227
+ snprintf(formatted, sizeof(formatted), " - ");
228
+ } else {
229
+ snprintf(formatted, sizeof(formatted), " %-.4f", memo[i]);
230
+ }
231
+ fprintf(stdout, "%s", formatted);
232
+ if ((i + 1) % m.needle_len == 0) {
233
+ fprintf(stdout, "\n");
234
+ } else {
235
+ fprintf(stdout, " ");
236
+ }
237
+ }
238
+ fprintf(stdout, "Final score: %f\n\n", score);
239
+ #endif
240
+ }
241
+ }
242
+ return score;
176
243
  }
@@ -3,16 +3,22 @@
3
3
 
4
4
  #include <ruby.h>
5
5
 
6
- // struct for representing an individual match
6
+ #define UNSET_BITMASK (-1)
7
+
8
+ // Struct for representing an individual match.
7
9
  typedef struct {
8
- VALUE path;
9
- double score;
10
+ VALUE path;
11
+ long bitmask;
12
+ float score;
10
13
  } match_t;
11
14
 
12
- extern void calculate_match(VALUE str,
13
- VALUE needle,
14
- VALUE case_sensitive,
15
- VALUE always_show_dot_files,
16
- VALUE never_show_dot_files,
17
- VALUE recurse,
18
- match_t *out);
15
+ extern float calculate_match(
16
+ VALUE str,
17
+ VALUE needle,
18
+ VALUE case_sensitive,
19
+ VALUE always_show_dot_files,
20
+ VALUE never_show_dot_files,
21
+ VALUE recurse,
22
+ long needle_bitmask,
23
+ long *haystack_bitmask
24
+ );
@@ -15,6 +15,7 @@ module CommandT
15
15
  Highlight = Struct.new(:highlight, :bang)
16
16
 
17
17
  def initialize(options = {})
18
+ @encoding = options[:encoding]
18
19
  @highlight_color = options[:highlight_color] || 'PmenuSel'
19
20
  @min_height = options[:min_height]
20
21
  @prompt = options[:prompt]
@@ -35,7 +36,10 @@ module CommandT
35
36
  set 'scrolloff', 0 # don't scroll near buffer edges
36
37
  set 'sidescroll', 0 # don't sidescroll in jumps
37
38
  set 'sidescrolloff', 0 # don't sidescroll automatically
38
- set 'updatetime', options[:debounce_interval]
39
+
40
+ if options[:debounce_interval] > 0
41
+ set 'updatetime', options[:debounce_interval]
42
+ end
39
43
 
40
44
  # Save existing window views so we can restore them later.
41
45
  current_window = ::VIM::evaluate('winnr()')
@@ -396,6 +400,11 @@ module CommandT
396
400
  #
397
401
  def match_with_syntax_highlight(match)
398
402
  highlight_chars = @prompt.abbrev.downcase.scan(/./mu)
403
+ if @encoding &&
404
+ match.respond_to?(:force_encoding) &&
405
+ match.encoding != @encoding
406
+ match = match.force_encoding(@encoding)
407
+ end
399
408
  match.scan(/./mu).inject([]) do |output, char|
400
409
  if char.downcase == highlight_chars.first
401
410
  highlight_chars.shift
@@ -3,8 +3,9 @@
3
3
 
4
4
  #include <stdlib.h> /* for qsort() */
5
5
  #include <string.h> /* for strncmp() */
6
- #include "matcher.h"
7
6
  #include "match.h"
7
+ #include "matcher.h"
8
+ #include "heap.h"
8
9
  #include "ext.h"
9
10
  #include "ruby_compat.h"
10
11
 
@@ -13,7 +14,7 @@
13
14
  #include <pthread.h> /* for pthread_create, pthread_join etc */
14
15
  #endif
15
16
 
16
- // comparison function for use with qsort
17
+ // Comparison function for use with qsort.
17
18
  int cmp_alpha(const void *a, const void *b)
18
19
  {
19
20
  match_t a_match = *(match_t *)a;
@@ -29,11 +30,11 @@ int cmp_alpha(const void *a, const void *b)
29
30
  if (a_len > b_len) {
30
31
  order = strncmp(a_p, b_p, b_len);
31
32
  if (order == 0)
32
- order = 1; // shorter string (b) wins
33
+ order = 1; // shorter string (b) wins.
33
34
  } else if (a_len < b_len) {
34
35
  order = strncmp(a_p, b_p, a_len);
35
36
  if (order == 0)
36
- order = -1; // shorter string (a) wins
37
+ order = -1; // shorter string (a) wins.
37
38
  } else {
38
39
  order = strncmp(a_p, b_p, a_len);
39
40
  }
@@ -41,16 +42,16 @@ int cmp_alpha(const void *a, const void *b)
41
42
  return order;
42
43
  }
43
44
 
44
- // comparison function for use with qsort
45
+ // Comparison function for use with qsort.
45
46
  int cmp_score(const void *a, const void *b)
46
47
  {
47
48
  match_t a_match = *(match_t *)a;
48
49
  match_t b_match = *(match_t *)b;
49
50
 
50
51
  if (a_match.score > b_match.score)
51
- return -1; // a scores higher, a should appear sooner
52
+ return -1; // a scores higher, a should appear sooner.
52
53
  else if (a_match.score < b_match.score)
53
- return 1; // b scores higher, a should appear later
54
+ return 1; // b scores higher, a should appear later.
54
55
  else
55
56
  return cmp_alpha(a, b);
56
57
  }
@@ -62,7 +63,7 @@ VALUE CommandTMatcher_initialize(int argc, VALUE *argv, VALUE self)
62
63
  VALUE options;
63
64
  VALUE scanner;
64
65
 
65
- // process arguments: 1 mandatory, 1 optional
66
+ // Process arguments: 1 mandatory, 1 optional.
66
67
  if (rb_scan_args(argc, argv, "11", &scanner, &options) == 1)
67
68
  options = Qnil;
68
69
  if (NIL_P(scanner))
@@ -70,7 +71,7 @@ VALUE CommandTMatcher_initialize(int argc, VALUE *argv, VALUE self)
70
71
 
71
72
  rb_iv_set(self, "@scanner", scanner);
72
73
 
73
- // check optional options hash for overrides
74
+ // Check optional options hash for overrides.
74
75
  always_show_dot_files = CommandT_option_from_hash("always_show_dot_files", options);
75
76
  never_show_dot_files = CommandT_option_from_hash("never_show_dot_files", options);
76
77
 
@@ -84,94 +85,192 @@ typedef struct {
84
85
  long thread_count;
85
86
  long thread_index;
86
87
  long case_sensitive;
88
+ long limit;
87
89
  match_t *matches;
88
90
  long path_count;
89
- VALUE paths;
90
- VALUE abbrev;
91
+ VALUE haystacks;
92
+ VALUE needle;
91
93
  VALUE always_show_dot_files;
92
94
  VALUE never_show_dot_files;
93
95
  VALUE recurse;
96
+ long needle_bitmask;
94
97
  } thread_args_t;
95
98
 
96
99
  void *match_thread(void *thread_args)
97
100
  {
98
101
  long i;
102
+ float score;
103
+ heap_t *heap = NULL;
99
104
  thread_args_t *args = (thread_args_t *)thread_args;
100
- for (i = args->thread_index; i < args->path_count; i += args->thread_count) {
101
- VALUE path = RARRAY_PTR(args->paths)[i];
102
- calculate_match(path,
103
- args->abbrev,
104
- args->case_sensitive,
105
- args->always_show_dot_files,
106
- args->never_show_dot_files,
107
- args->recurse,
108
- &args->matches[i]);
105
+
106
+ if (args->limit) {
107
+ // Reserve one extra slot so that we can do an insert-then-extract even
108
+ // when "full" (effectively allows use of min-heap to maintain a
109
+ // top-"limit" list of items).
110
+ heap = heap_new(args->limit + 1, cmp_score);
111
+ }
112
+
113
+ for (
114
+ i = args->thread_index;
115
+ i < args->path_count;
116
+ i += args->thread_count
117
+ ) {
118
+ args->matches[i].path = RARRAY_PTR(args->haystacks)[i];
119
+ if (args->needle_bitmask == UNSET_BITMASK) {
120
+ args->matches[i].bitmask = UNSET_BITMASK;
121
+ }
122
+ args->matches[i].score = calculate_match(
123
+ args->matches[i].path,
124
+ args->needle,
125
+ args->case_sensitive,
126
+ args->always_show_dot_files,
127
+ args->never_show_dot_files,
128
+ args->recurse,
129
+ args->needle_bitmask,
130
+ &args->matches[i].bitmask
131
+ );
132
+ if (heap) {
133
+ if (heap->count == args->limit) {
134
+ score = ((match_t *)HEAP_PEEK(heap))->score;
135
+ if (args->matches[i].score >= score) {
136
+ heap_insert(heap, &args->matches[i]);
137
+ (void)heap_extract(heap);
138
+ }
139
+ } else {
140
+ heap_insert(heap, &args->matches[i]);
141
+ }
142
+ }
109
143
  }
110
144
 
111
- return NULL;
145
+ return heap;
146
+ }
147
+
148
+ long calculate_bitmask(VALUE string) {
149
+ char *str = RSTRING_PTR(string);
150
+ long len = RSTRING_LEN(string);
151
+ long i;
152
+ long mask = 0;
153
+ for (i = 0; i < len; i++) {
154
+ if (str[i] >= 'a' && str[i] <= 'z') {
155
+ mask |= (1 << (str[i] - 'a'));
156
+ } else if (str[i] >= 'A' && str[i] <= 'Z') {
157
+ mask |= (1 << (str[i] - 'A'));
158
+ }
159
+ }
160
+ return mask;
112
161
  }
113
162
 
114
163
  VALUE CommandTMatcher_sorted_matches_for(int argc, VALUE *argv, VALUE self)
115
164
  {
116
- long i, limit, path_count, thread_count;
165
+ long i, j, limit, path_count, thread_count;
117
166
  #ifdef HAVE_PTHREAD_H
118
167
  long err;
119
168
  pthread_t *threads;
120
169
  #endif
170
+ long needle_bitmask = UNSET_BITMASK;
171
+ long heap_matches_count;
172
+ int use_heap;
173
+ int sort;
121
174
  match_t *matches;
175
+ match_t *heap_matches = NULL;
176
+ heap_t *heap;
122
177
  thread_args_t *thread_args;
123
- VALUE abbrev;
124
- VALUE case_sensitive;
125
178
  VALUE always_show_dot_files;
179
+ VALUE case_sensitive;
180
+ VALUE recurse;
181
+ VALUE ignore_spaces;
126
182
  VALUE limit_option;
183
+ VALUE needle;
127
184
  VALUE never_show_dot_files;
128
- VALUE ignore_spaces;
185
+ VALUE new_paths_object_id;
129
186
  VALUE options;
130
187
  VALUE paths;
131
- VALUE recurse;
188
+ VALUE paths_object_id;
132
189
  VALUE results;
133
190
  VALUE scanner;
134
191
  VALUE sort_option;
135
192
  VALUE threads_option;
193
+ VALUE wrapped_matches;
136
194
 
137
- // process arguments: 1 mandatory, 1 optional
138
- if (rb_scan_args(argc, argv, "11", &abbrev, &options) == 1)
195
+ // Process arguments: 1 mandatory, 1 optional.
196
+ if (rb_scan_args(argc, argv, "11", &needle, &options) == 1)
139
197
  options = Qnil;
140
- if (NIL_P(abbrev))
141
- rb_raise(rb_eArgError, "nil abbrev");
198
+ if (NIL_P(needle))
199
+ rb_raise(rb_eArgError, "nil needle");
142
200
 
143
- // check optional options hash for overrides
201
+ // Check optional options hash for overrides.
144
202
  case_sensitive = CommandT_option_from_hash("case_sensitive", options);
145
203
  limit_option = CommandT_option_from_hash("limit", options);
146
204
  threads_option = CommandT_option_from_hash("threads", options);
147
205
  sort_option = CommandT_option_from_hash("sort", options);
148
206
  ignore_spaces = CommandT_option_from_hash("ignore_spaces", options);
207
+ always_show_dot_files = rb_iv_get(self, "@always_show_dot_files");
208
+ never_show_dot_files = rb_iv_get(self, "@never_show_dot_files");
149
209
  recurse = CommandT_option_from_hash("recurse", options);
150
210
 
151
- abbrev = StringValue(abbrev);
211
+ limit = NIL_P(limit_option) ? 15 : NUM2LONG(limit_option);
212
+ sort = NIL_P(sort_option) || sort_option == Qtrue;
213
+ use_heap = limit && sort;
214
+ heap_matches_count = 0;
215
+
216
+ needle = StringValue(needle);
152
217
  if (case_sensitive != Qtrue)
153
- abbrev = rb_funcall(abbrev, rb_intern("downcase"), 0);
218
+ needle = rb_funcall(needle, rb_intern("downcase"), 0);
154
219
 
155
220
  if (ignore_spaces == Qtrue)
156
- abbrev = rb_funcall(abbrev, rb_intern("delete"), 1, rb_str_new2(" "));
221
+ needle = rb_funcall(needle, rb_intern("delete"), 1, rb_str_new2(" "));
157
222
 
158
- // get unsorted matches
223
+ // Get unsorted matches.
159
224
  scanner = rb_iv_get(self, "@scanner");
160
225
  paths = rb_funcall(scanner, rb_intern("paths"), 0);
161
- always_show_dot_files = rb_iv_get(self, "@always_show_dot_files");
162
- never_show_dot_files = rb_iv_get(self, "@never_show_dot_files");
163
-
164
226
  path_count = RARRAY_LEN(paths);
165
- matches = malloc(path_count * sizeof(match_t));
166
- if (!matches)
167
- rb_raise(rb_eNoMemError, "memory allocation failed");
227
+
228
+ // Cached C data, not visible to Ruby layer.
229
+ paths_object_id = rb_ivar_get(self, rb_intern("paths_object_id"));
230
+ new_paths_object_id = rb_funcall(paths, rb_intern("object_id"), 0);
231
+ rb_ivar_set(self, rb_intern("paths_object_id"), new_paths_object_id);
232
+ if (
233
+ NIL_P(paths_object_id) ||
234
+ NUM2LONG(new_paths_object_id) != NUM2LONG(paths_object_id)
235
+ ) {
236
+ // `paths` changed, need to replace matches array.
237
+ paths_object_id = new_paths_object_id;
238
+ matches = malloc(path_count * sizeof(match_t));
239
+ if (!matches) {
240
+ rb_raise(rb_eNoMemError, "memory allocation failed");
241
+ }
242
+ wrapped_matches = Data_Wrap_Struct(
243
+ rb_cObject,
244
+ 0,
245
+ free,
246
+ matches
247
+ );
248
+ rb_ivar_set(self, rb_intern("matches"), wrapped_matches);
249
+ } else {
250
+ // Get existing array.
251
+ Data_Get_Struct(
252
+ rb_ivar_get(self, rb_intern("matches")),
253
+ match_t,
254
+ matches
255
+ );
256
+
257
+ // Will compare against previously computed haystack bitmasks.
258
+ needle_bitmask = calculate_bitmask(needle);
259
+ }
168
260
 
169
261
  thread_count = NIL_P(threads_option) ? 1 : NUM2LONG(threads_option);
262
+ if (use_heap) {
263
+ heap_matches = malloc(thread_count * limit * sizeof(match_t));
264
+ if (!heap_matches) {
265
+ rb_raise(rb_eNoMemError, "memory allocation failed");
266
+ }
267
+ }
170
268
 
171
269
  #ifdef HAVE_PTHREAD_H
172
270
  #define THREAD_THRESHOLD 1000 /* avoid the overhead of threading when search space is small */
173
- if (path_count < THREAD_THRESHOLD)
271
+ if (path_count < THREAD_THRESHOLD) {
174
272
  thread_count = 1;
273
+ }
175
274
  threads = malloc(sizeof(pthread_t) * thread_count);
176
275
  if (!threads)
177
276
  rb_raise(rb_eNoMemError, "memory allocation failed");
@@ -185,58 +284,99 @@ VALUE CommandTMatcher_sorted_matches_for(int argc, VALUE *argv, VALUE self)
185
284
  thread_args[i].thread_index = i;
186
285
  thread_args[i].case_sensitive = case_sensitive == Qtrue;
187
286
  thread_args[i].matches = matches;
287
+ thread_args[i].limit = use_heap ? limit : 0;
188
288
  thread_args[i].path_count = path_count;
189
- thread_args[i].paths = paths;
190
- thread_args[i].abbrev = abbrev;
289
+ thread_args[i].haystacks = paths;
290
+ thread_args[i].needle = needle;
191
291
  thread_args[i].always_show_dot_files = always_show_dot_files;
192
292
  thread_args[i].never_show_dot_files = never_show_dot_files;
193
293
  thread_args[i].recurse = recurse;
294
+ thread_args[i].needle_bitmask = needle_bitmask;
194
295
 
195
296
  #ifdef HAVE_PTHREAD_H
196
297
  if (i == thread_count - 1) {
197
298
  #endif
198
- // for the last "worker", we'll just use the main thread
199
- (void)match_thread(&thread_args[i]);
299
+ // For the last "worker", we'll just use the main thread.
300
+ heap = match_thread(&thread_args[i]);
301
+ if (heap) {
302
+ for (j = 0; j < heap->count; j++) {
303
+ heap_matches[heap_matches_count++] = *(match_t *)heap->entries[j];
304
+ }
305
+ heap_free(heap);
306
+ }
200
307
  #ifdef HAVE_PTHREAD_H
201
308
  } else {
202
309
  err = pthread_create(&threads[i], NULL, match_thread, (void *)&thread_args[i]);
203
- if (err != 0)
310
+ if (err != 0) {
204
311
  rb_raise(rb_eSystemCallError, "pthread_create() failure (%d)", (int)err);
312
+ }
205
313
  }
206
314
  #endif
207
315
  }
208
316
 
209
317
  #ifdef HAVE_PTHREAD_H
210
318
  for (i = 0; i < thread_count - 1; i++) {
211
- err = pthread_join(threads[i], NULL);
212
- if (err != 0)
319
+ err = pthread_join(threads[i], (void **)&heap);
320
+ if (err != 0) {
213
321
  rb_raise(rb_eSystemCallError, "pthread_join() failure (%d)", (int)err);
322
+ }
323
+ if (heap) {
324
+ for (j = 0; j < heap->count; j++) {
325
+ heap_matches[heap_matches_count++] = *(match_t *)heap->entries[j];
326
+ }
327
+ heap_free(heap);
328
+ }
214
329
  }
215
330
  free(threads);
216
331
  #endif
217
332
 
218
- if (NIL_P(sort_option) || sort_option == Qtrue) {
219
- if (RSTRING_LEN(abbrev) == 0 ||
220
- (RSTRING_LEN(abbrev) == 1 && RSTRING_PTR(abbrev)[0] == '.'))
221
- // alphabetic order if search string is only "" or "."
222
- qsort(matches, path_count, sizeof(match_t), cmp_alpha);
223
- else
224
- // for all other non-empty search strings, sort by score
225
- qsort(matches, path_count, sizeof(match_t), cmp_score);
333
+ if (sort) {
334
+ if (
335
+ RSTRING_LEN(needle) == 0 ||
336
+ (RSTRING_LEN(needle) == 1 && RSTRING_PTR(needle)[0] == '.')
337
+ ) {
338
+ // Alphabetic order if search string is only "" or "."
339
+ // TODO: make those semantics fully apply to heap case as well
340
+ // (they don't because the heap itself calls cmp_score, which means
341
+ // that the items which stay in the top [limit] may (will) be
342
+ // different).
343
+ qsort(
344
+ use_heap ? heap_matches : matches,
345
+ use_heap ? heap_matches_count : path_count,
346
+ sizeof(match_t),
347
+ cmp_alpha
348
+ );
349
+ } else {
350
+ qsort(
351
+ use_heap ? heap_matches : matches,
352
+ use_heap ? heap_matches_count : path_count,
353
+ sizeof(match_t),
354
+ cmp_score
355
+ );
356
+ }
226
357
  }
227
358
 
228
359
  results = rb_ary_new();
229
-
230
- limit = NIL_P(limit_option) ? 0 : NUM2LONG(limit_option);
231
360
  if (limit == 0)
232
361
  limit = path_count;
233
- for (i = 0; i < path_count && limit > 0; i++) {
234
- if (matches[i].score > 0.0) {
235
- rb_funcall(results, rb_intern("push"), 1, matches[i].path);
362
+ for (
363
+ i = 0;
364
+ i < (use_heap ? heap_matches_count : path_count) && limit > 0;
365
+ i++
366
+ ) {
367
+ if ((use_heap ? heap_matches : matches)[i].score > 0.0) {
368
+ rb_funcall(
369
+ results,
370
+ rb_intern("push"),
371
+ 1,
372
+ (use_heap ? heap_matches : matches)[i].path
373
+ );
236
374
  limit--;
237
375
  }
238
376
  }
239
377
 
240
- free(matches);
378
+ if (use_heap) {
379
+ free(heap_matches);
380
+ }
241
381
  return results;
242
382
  }