command-t 3.0.2 → 4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +8 -2
  3. data/doc/command-t.txt +312 -147
  4. data/ruby/command-t.rb +13 -12
  5. data/ruby/command-t/controller.rb +86 -15
  6. data/ruby/command-t/depend +4 -0
  7. data/ruby/command-t/ext.h +9 -2
  8. data/ruby/command-t/extconf.rb +2 -2
  9. data/ruby/command-t/finder.rb +6 -2
  10. data/ruby/command-t/finder/buffer_finder.rb +3 -3
  11. data/ruby/command-t/finder/command_finder.rb +23 -0
  12. data/ruby/command-t/finder/file_finder.rb +3 -3
  13. data/ruby/command-t/finder/help_finder.rb +25 -0
  14. data/ruby/command-t/finder/history_finder.rb +27 -0
  15. data/ruby/command-t/finder/jump_finder.rb +3 -3
  16. data/ruby/command-t/finder/line_finder.rb +23 -0
  17. data/ruby/command-t/finder/mru_buffer_finder.rb +3 -3
  18. data/ruby/command-t/finder/tag_finder.rb +3 -3
  19. data/ruby/command-t/heap.c +146 -0
  20. data/ruby/command-t/heap.h +22 -0
  21. data/ruby/command-t/match.c +183 -116
  22. data/ruby/command-t/match.h +16 -10
  23. data/ruby/command-t/match_window.rb +10 -1
  24. data/ruby/command-t/matcher.c +203 -63
  25. data/ruby/command-t/metadata/fallback.rb +2 -2
  26. data/ruby/command-t/mru.rb +2 -2
  27. data/ruby/command-t/path_utilities.rb +2 -2
  28. data/ruby/command-t/progress_reporter.rb +38 -0
  29. data/ruby/command-t/prompt.rb +4 -4
  30. data/ruby/command-t/scanner.rb +22 -2
  31. data/ruby/command-t/scanner/buffer_scanner.rb +3 -3
  32. data/ruby/command-t/scanner/command_scanner.rb +33 -0
  33. data/ruby/command-t/scanner/file_scanner.rb +30 -6
  34. data/ruby/command-t/scanner/file_scanner/find_file_scanner.rb +12 -7
  35. data/ruby/command-t/scanner/file_scanner/git_file_scanner.rb +11 -8
  36. data/ruby/command-t/scanner/file_scanner/ruby_file_scanner.rb +7 -4
  37. data/ruby/command-t/scanner/file_scanner/watchman_file_scanner.rb +13 -5
  38. data/ruby/command-t/scanner/help_scanner.rb +40 -0
  39. data/ruby/command-t/scanner/history_scanner.rb +24 -0
  40. data/ruby/command-t/scanner/jump_scanner.rb +3 -3
  41. data/ruby/command-t/scanner/line_scanner.rb +45 -0
  42. data/ruby/command-t/scanner/mru_buffer_scanner.rb +3 -3
  43. data/ruby/command-t/scanner/tag_scanner.rb +3 -3
  44. data/ruby/command-t/scm_utilities.rb +2 -2
  45. data/ruby/command-t/settings.rb +2 -2
  46. data/ruby/command-t/stub.rb +7 -2
  47. data/ruby/command-t/util.rb +2 -2
  48. data/ruby/command-t/vim.rb +27 -2
  49. data/ruby/command-t/vim/screen.rb +3 -3
  50. data/ruby/command-t/vim/window.rb +3 -3
  51. data/ruby/command-t/watchman.c +1 -1
  52. metadata +13 -2
@@ -0,0 +1,22 @@
1
+ // Copyright 2016-present Greg Hurrell. All rights reserved.
2
+ // Licensed under the terms of the BSD 2-clause license.
3
+
4
+ /**
5
+ * A fixed size min-heap implementation.
6
+ */
7
+
8
+ typedef int (*heap_compare_entries)(const void *a, const void *b);
9
+
10
+ typedef struct {
11
+ long count;
12
+ long capacity;
13
+ void **entries;
14
+ heap_compare_entries comparator;
15
+ } heap_t;
16
+
17
+ #define HEAP_PEEK(heap) (heap->entries[0])
18
+
19
+ heap_t *heap_new(long capacity, heap_compare_entries comparator);
20
+ void heap_free(heap_t *heap);
21
+ void heap_insert(heap_t *heap, void *value);
22
+ void *heap_extract(heap_t *heap);
@@ -6,171 +6,238 @@
6
6
  #include "ext.h"
7
7
  #include "ruby_compat.h"
8
8
 
9
- // use a struct to make passing params during recursion easier
9
+ #define UNSET_SCORE FLT_MAX
10
+
11
+ // Use a struct to make passing params during recursion easier.
10
12
  typedef struct {
11
- char *haystack_p; // pointer to the path string to be searched
12
- long haystack_len; // length of same
13
- char *needle_p; // pointer to search string (needle)
14
- long needle_len; // length of same
15
- double max_score_per_char;
16
- int always_show_dot_files; // boolean
17
- int never_show_dot_files; // boolean
18
- int case_sensitive; // boolean
19
- int recurse; // boolean
20
- double *memo; // memoization
13
+ char *haystack_p; // Pointer to the path string to be searched.
14
+ long haystack_len; // Length of same.
15
+ char *needle_p; // Pointer to search string (needle).
16
+ long needle_len; // Length of same.
17
+ long *rightmost_match_p; // Rightmost match for each char in needle.
18
+ float max_score_per_char;
19
+ int always_show_dot_files; // Boolean.
20
+ int never_show_dot_files; // Boolean.
21
+ int case_sensitive; // Boolean.
22
+ int recurse; // Boolean.
23
+ float *memo; // Memoization.
21
24
  } matchinfo_t;
22
25
 
23
- double recursive_match(matchinfo_t *m, // sharable meta-data
24
- long haystack_idx, // where in the path string to start
25
- long needle_idx, // where in the needle string to start
26
- long last_idx, // location of last matched character
27
- double score) // cumulative score so far
28
- {
29
- double score_for_char;
30
- double seen_score = 0; // remember best score seen via recursion
31
- int found;
32
- long i, j, distance;
33
- long memo_idx = haystack_idx;
34
-
35
- // do we have a memoized result we can return?
36
- double memoized = m->memo[needle_idx * m->needle_len + memo_idx];
37
- if (memoized != DBL_MAX)
38
- return memoized;
39
-
40
- // bail early if not enough room (left) in haystack for (rest of) needle
41
- if (m->haystack_len - haystack_idx < m->needle_len - needle_idx) {
42
- score = 0.0;
43
- goto memoize;
44
- }
45
-
26
+ float recursive_match(
27
+ matchinfo_t *m, // Sharable meta-data.
28
+ long haystack_idx, // Where in the path string to start.
29
+ long needle_idx, // Where in the needle string to start.
30
+ long last_idx, // Location of last matched character.
31
+ float score // Cumulative score so far.
32
+ ) {
33
+ long distance, i, j;
34
+ float *memoized = NULL;
35
+ float score_for_char;
36
+ float seen_score = 0;
37
+
38
+ // Iterate over needle.
46
39
  for (i = needle_idx; i < m->needle_len; i++) {
47
- char c = m->needle_p[i];
48
- found = 0;
49
-
50
- // similar to above, we'll stop iterating when we know we're too close
51
- // to the end of the string to possibly match
52
- for (j = haystack_idx;
53
- j <= m->haystack_len - (m->needle_len - i);
54
- j++, haystack_idx++) {
55
- char d = m->haystack_p[j];
40
+ // Iterate over (valid range of) haystack.
41
+ for (j = haystack_idx; j <= m->rightmost_match_p[i]; j++) {
42
+ char c, d;
43
+
44
+ // Do we have a memoized result we can return?
45
+ memoized = &m->memo[j * m->needle_len + i];
46
+ if (*memoized != UNSET_SCORE) {
47
+ return *memoized > seen_score ? *memoized : seen_score;
48
+ }
49
+ c = m->needle_p[i];
50
+ d = m->haystack_p[j];
56
51
  if (d == '.') {
57
- if (j == 0 || m->haystack_p[j - 1] == '/') { // this is a dot-file
58
- int dot_search = (i == 0 && c == '.'); // searching for a dot
59
- if (m->never_show_dot_files || (!dot_search && !m->always_show_dot_files)) {
60
- score = 0.0;
61
- goto memoize;
52
+ if (j == 0 || m->haystack_p[j - 1] == '/') { // This is a dot-file.
53
+ int dot_search = c == '.'; // Searching for a dot.
54
+ if (
55
+ m->never_show_dot_files ||
56
+ (!dot_search && !m->always_show_dot_files)
57
+ ) {
58
+ return *memoized = 0.0;
62
59
  }
63
60
  }
64
61
  } else if (d >= 'A' && d <= 'Z' && !m->case_sensitive) {
65
- d += 'a' - 'A'; // add 32 to downcase
62
+ d += 'a' - 'A'; // Add 32 to downcase.
66
63
  }
67
64
 
68
65
  if (c == d) {
69
- found = 1;
70
-
71
- // calculate score
66
+ // Calculate score.
67
+ float sub_score = 0;
72
68
  score_for_char = m->max_score_per_char;
73
69
  distance = j - last_idx;
74
70
 
75
71
  if (distance > 1) {
76
- double factor = 1.0;
72
+ float factor = 1.0;
77
73
  char last = m->haystack_p[j - 1];
78
- char curr = m->haystack_p[j]; // case matters, so get again
79
- if (last == '/')
74
+ char curr = m->haystack_p[j]; // Case matters, so get again.
75
+ if (last == '/') {
80
76
  factor = 0.9;
81
- else if (last == '-' ||
82
- last == '_' ||
83
- last == ' ' ||
84
- (last >= '0' && last <= '9'))
77
+ } else if (
78
+ last == '-' ||
79
+ last == '_' ||
80
+ last == ' ' ||
81
+ (last >= '0' && last <= '9')
82
+ ) {
85
83
  factor = 0.8;
86
- else if (last >= 'a' && last <= 'z' &&
87
- curr >= 'A' && curr <= 'Z')
84
+ } else if (
85
+ last >= 'a' && last <= 'z' &&
86
+ curr >= 'A' && curr <= 'Z'
87
+ ) {
88
88
  factor = 0.8;
89
- else if (last == '.')
89
+ } else if (last == '.') {
90
90
  factor = 0.7;
91
- else
92
- // if no "special" chars behind char, factor diminishes
93
- // as distance from last matched char increases
91
+ } else {
92
+ // If no "special" chars behind char, factor diminishes
93
+ // as distance from last matched char increases.
94
94
  factor = (1.0 / distance) * 0.75;
95
+ }
95
96
  score_for_char *= factor;
96
97
  }
97
98
 
98
- if (++j < m->haystack_len && m->recurse) {
99
- // bump cursor one char to the right and
100
- // use recursion to try and find a better match
101
- double sub_score = recursive_match(m, j, i, last_idx, score);
102
- if (sub_score > seen_score)
99
+ if (j < m->rightmost_match_p[i] && m->recurse) {
100
+ sub_score = recursive_match(m, j + 1, i, last_idx, score);
101
+ if (sub_score > seen_score) {
103
102
  seen_score = sub_score;
103
+ }
104
104
  }
105
-
105
+ last_idx = j;
106
+ haystack_idx = last_idx + 1;
106
107
  score += score_for_char;
107
- last_idx = haystack_idx++;
108
- break;
108
+ *memoized = seen_score > score ? seen_score : score;
109
+ if (i == m->needle_len - 1) {
110
+ // Whole string matched.
111
+ return *memoized;
112
+ }
113
+ if (!m->recurse) {
114
+ break;
115
+ }
109
116
  }
110
117
  }
111
- if (!found) {
112
- score = 0.0;
113
- goto memoize;
114
- }
115
118
  }
116
-
117
- score = score > seen_score ? score : seen_score;
118
-
119
- memoize:
120
- m->memo[needle_idx * m->needle_len + memo_idx] = score;
121
- return score;
119
+ return *memoized = score;
122
120
  }
123
121
 
124
- void calculate_match(VALUE str,
125
- VALUE needle,
126
- VALUE case_sensitive,
127
- VALUE always_show_dot_files,
128
- VALUE never_show_dot_files,
129
- VALUE recurse,
130
- match_t *out)
131
- {
132
- long i, max;
133
- double score;
122
+ float calculate_match(
123
+ VALUE haystack,
124
+ VALUE needle,
125
+ VALUE case_sensitive,
126
+ VALUE always_show_dot_files,
127
+ VALUE never_show_dot_files,
128
+ VALUE recurse,
129
+ long needle_bitmask,
130
+ long *haystack_bitmask
131
+ ) {
134
132
  matchinfo_t m;
135
- m.haystack_p = RSTRING_PTR(str);
136
- m.haystack_len = RSTRING_LEN(str);
133
+ long i;
134
+ float score = 1.0;
135
+ int compute_bitmasks = *haystack_bitmask == UNSET_BITMASK;
136
+ m.haystack_p = RSTRING_PTR(haystack);
137
+ m.haystack_len = RSTRING_LEN(haystack);
137
138
  m.needle_p = RSTRING_PTR(needle);
138
139
  m.needle_len = RSTRING_LEN(needle);
140
+ m.rightmost_match_p = NULL;
139
141
  m.max_score_per_char = (1.0 / m.haystack_len + 1.0 / m.needle_len) / 2;
140
142
  m.always_show_dot_files = always_show_dot_files == Qtrue;
141
143
  m.never_show_dot_files = never_show_dot_files == Qtrue;
142
144
  m.case_sensitive = (int)case_sensitive;
143
145
  m.recurse = recurse == Qtrue;
144
146
 
145
- // calculate score
146
- score = 1.0;
147
-
148
- // special case for zero-length search string
147
+ // Special case for zero-length search string.
149
148
  if (m.needle_len == 0) {
150
-
151
- // filter out dot files
152
- if (!m.always_show_dot_files) {
149
+ // Filter out dot files.
150
+ if (m.never_show_dot_files || !m.always_show_dot_files) {
153
151
  for (i = 0; i < m.haystack_len; i++) {
154
152
  char c = m.haystack_p[i];
155
-
156
153
  if (c == '.' && (i == 0 || m.haystack_p[i - 1] == '/')) {
157
- score = 0.0;
158
- break;
154
+ return 0.0;
159
155
  }
160
156
  }
161
157
  }
162
- } else if (m.haystack_len > 0) { // normal case
158
+ } else {
159
+ long haystack_limit;
160
+ long memo_size;
161
+ long needle_idx;
162
+ long mask;
163
+ long rightmost_match_p[m.needle_len];
164
+
165
+ if (*haystack_bitmask != UNSET_BITMASK) {
166
+ if ((needle_bitmask & *haystack_bitmask) != needle_bitmask) {
167
+ return 0.0;
168
+ }
169
+ }
163
170
 
164
- // prepare for memoization
165
- double memo[m.haystack_len * m.needle_len];
166
- for (i = 0, max = m.haystack_len * m.needle_len; i < max; i++)
167
- memo[i] = DBL_MAX;
168
- m.memo = memo;
171
+ // Pre-scan string:
172
+ // - Bail if it can't match at all.
173
+ // - Record rightmost match for each character (prune search space).
174
+ // - Record bitmask for haystack to speed up future searches.
175
+ m.rightmost_match_p = rightmost_match_p;
176
+ needle_idx = m.needle_len - 1;
177
+ mask = 0;
178
+ for (i = m.haystack_len - 1; i >= 0; i--) {
179
+ char c = m.haystack_p[i];
180
+ char lower = c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c;
181
+ if (!m.case_sensitive) {
182
+ c = lower;
183
+ }
184
+ if (compute_bitmasks) {
185
+ mask |= (1 << (lower - 'a'));
186
+ }
169
187
 
170
- score = recursive_match(&m, 0, 0, 0, 0.0);
171
- }
188
+ if (needle_idx >= 0) {
189
+ char d = m.needle_p[needle_idx];
190
+ if (c == d) {
191
+ rightmost_match_p[needle_idx] = i;
192
+ needle_idx--;
193
+ }
194
+ }
195
+ }
196
+ if (compute_bitmasks) {
197
+ *haystack_bitmask = mask;
198
+ }
199
+ if (needle_idx != -1) {
200
+ return 0.0;
201
+ }
172
202
 
173
- // final book-keeping
174
- out->path = str;
175
- out->score = score;
203
+ // Prepare for memoization.
204
+ haystack_limit = rightmost_match_p[m.needle_len - 1] + 1;
205
+ memo_size = m.needle_len * haystack_limit;
206
+ {
207
+ float memo[memo_size];
208
+ for (i = 0; i < memo_size; i++) {
209
+ memo[i] = UNSET_SCORE;
210
+ }
211
+ m.memo = memo;
212
+ score = recursive_match(&m, 0, 0, 0, 0.0);
213
+
214
+ #ifdef DEBUG
215
+ fprintf(stdout, " ");
216
+ for (i = 0; i < m.needle_len; i++) {
217
+ fprintf(stdout, " %c ", m.needle_p[i]);
218
+ }
219
+ fprintf(stdout, "\n");
220
+ for (i = 0; i < memo_size; i++) {
221
+ char formatted[8];
222
+ if (i % m.needle_len == 0) {
223
+ long haystack_idx = i / m.needle_len;
224
+ fprintf(stdout, "%c: ", m.haystack_p[haystack_idx]);
225
+ }
226
+ if (memo[i] == UNSET_SCORE) {
227
+ snprintf(formatted, sizeof(formatted), " - ");
228
+ } else {
229
+ snprintf(formatted, sizeof(formatted), " %-.4f", memo[i]);
230
+ }
231
+ fprintf(stdout, "%s", formatted);
232
+ if ((i + 1) % m.needle_len == 0) {
233
+ fprintf(stdout, "\n");
234
+ } else {
235
+ fprintf(stdout, " ");
236
+ }
237
+ }
238
+ fprintf(stdout, "Final score: %f\n\n", score);
239
+ #endif
240
+ }
241
+ }
242
+ return score;
176
243
  }
@@ -3,16 +3,22 @@
3
3
 
4
4
  #include <ruby.h>
5
5
 
6
- // struct for representing an individual match
6
+ #define UNSET_BITMASK (-1)
7
+
8
+ // Struct for representing an individual match.
7
9
  typedef struct {
8
- VALUE path;
9
- double score;
10
+ VALUE path;
11
+ long bitmask;
12
+ float score;
10
13
  } match_t;
11
14
 
12
- extern void calculate_match(VALUE str,
13
- VALUE needle,
14
- VALUE case_sensitive,
15
- VALUE always_show_dot_files,
16
- VALUE never_show_dot_files,
17
- VALUE recurse,
18
- match_t *out);
15
+ extern float calculate_match(
16
+ VALUE str,
17
+ VALUE needle,
18
+ VALUE case_sensitive,
19
+ VALUE always_show_dot_files,
20
+ VALUE never_show_dot_files,
21
+ VALUE recurse,
22
+ long needle_bitmask,
23
+ long *haystack_bitmask
24
+ );
@@ -15,6 +15,7 @@ module CommandT
15
15
  Highlight = Struct.new(:highlight, :bang)
16
16
 
17
17
  def initialize(options = {})
18
+ @encoding = options[:encoding]
18
19
  @highlight_color = options[:highlight_color] || 'PmenuSel'
19
20
  @min_height = options[:min_height]
20
21
  @prompt = options[:prompt]
@@ -35,7 +36,10 @@ module CommandT
35
36
  set 'scrolloff', 0 # don't scroll near buffer edges
36
37
  set 'sidescroll', 0 # don't sidescroll in jumps
37
38
  set 'sidescrolloff', 0 # don't sidescroll automatically
38
- set 'updatetime', options[:debounce_interval]
39
+
40
+ if options[:debounce_interval] > 0
41
+ set 'updatetime', options[:debounce_interval]
42
+ end
39
43
 
40
44
  # Save existing window views so we can restore them later.
41
45
  current_window = ::VIM::evaluate('winnr()')
@@ -396,6 +400,11 @@ module CommandT
396
400
  #
397
401
  def match_with_syntax_highlight(match)
398
402
  highlight_chars = @prompt.abbrev.downcase.scan(/./mu)
403
+ if @encoding &&
404
+ match.respond_to?(:force_encoding) &&
405
+ match.encoding != @encoding
406
+ match = match.force_encoding(@encoding)
407
+ end
399
408
  match.scan(/./mu).inject([]) do |output, char|
400
409
  if char.downcase == highlight_chars.first
401
410
  highlight_chars.shift
@@ -3,8 +3,9 @@
3
3
 
4
4
  #include <stdlib.h> /* for qsort() */
5
5
  #include <string.h> /* for strncmp() */
6
- #include "matcher.h"
7
6
  #include "match.h"
7
+ #include "matcher.h"
8
+ #include "heap.h"
8
9
  #include "ext.h"
9
10
  #include "ruby_compat.h"
10
11
 
@@ -13,7 +14,7 @@
13
14
  #include <pthread.h> /* for pthread_create, pthread_join etc */
14
15
  #endif
15
16
 
16
- // comparison function for use with qsort
17
+ // Comparison function for use with qsort.
17
18
  int cmp_alpha(const void *a, const void *b)
18
19
  {
19
20
  match_t a_match = *(match_t *)a;
@@ -29,11 +30,11 @@ int cmp_alpha(const void *a, const void *b)
29
30
  if (a_len > b_len) {
30
31
  order = strncmp(a_p, b_p, b_len);
31
32
  if (order == 0)
32
- order = 1; // shorter string (b) wins
33
+ order = 1; // shorter string (b) wins.
33
34
  } else if (a_len < b_len) {
34
35
  order = strncmp(a_p, b_p, a_len);
35
36
  if (order == 0)
36
- order = -1; // shorter string (a) wins
37
+ order = -1; // shorter string (a) wins.
37
38
  } else {
38
39
  order = strncmp(a_p, b_p, a_len);
39
40
  }
@@ -41,16 +42,16 @@ int cmp_alpha(const void *a, const void *b)
41
42
  return order;
42
43
  }
43
44
 
44
- // comparison function for use with qsort
45
+ // Comparison function for use with qsort.
45
46
  int cmp_score(const void *a, const void *b)
46
47
  {
47
48
  match_t a_match = *(match_t *)a;
48
49
  match_t b_match = *(match_t *)b;
49
50
 
50
51
  if (a_match.score > b_match.score)
51
- return -1; // a scores higher, a should appear sooner
52
+ return -1; // a scores higher, a should appear sooner.
52
53
  else if (a_match.score < b_match.score)
53
- return 1; // b scores higher, a should appear later
54
+ return 1; // b scores higher, a should appear later.
54
55
  else
55
56
  return cmp_alpha(a, b);
56
57
  }
@@ -62,7 +63,7 @@ VALUE CommandTMatcher_initialize(int argc, VALUE *argv, VALUE self)
62
63
  VALUE options;
63
64
  VALUE scanner;
64
65
 
65
- // process arguments: 1 mandatory, 1 optional
66
+ // Process arguments: 1 mandatory, 1 optional.
66
67
  if (rb_scan_args(argc, argv, "11", &scanner, &options) == 1)
67
68
  options = Qnil;
68
69
  if (NIL_P(scanner))
@@ -70,7 +71,7 @@ VALUE CommandTMatcher_initialize(int argc, VALUE *argv, VALUE self)
70
71
 
71
72
  rb_iv_set(self, "@scanner", scanner);
72
73
 
73
- // check optional options hash for overrides
74
+ // Check optional options hash for overrides.
74
75
  always_show_dot_files = CommandT_option_from_hash("always_show_dot_files", options);
75
76
  never_show_dot_files = CommandT_option_from_hash("never_show_dot_files", options);
76
77
 
@@ -84,94 +85,192 @@ typedef struct {
84
85
  long thread_count;
85
86
  long thread_index;
86
87
  long case_sensitive;
88
+ long limit;
87
89
  match_t *matches;
88
90
  long path_count;
89
- VALUE paths;
90
- VALUE abbrev;
91
+ VALUE haystacks;
92
+ VALUE needle;
91
93
  VALUE always_show_dot_files;
92
94
  VALUE never_show_dot_files;
93
95
  VALUE recurse;
96
+ long needle_bitmask;
94
97
  } thread_args_t;
95
98
 
96
99
  void *match_thread(void *thread_args)
97
100
  {
98
101
  long i;
102
+ float score;
103
+ heap_t *heap = NULL;
99
104
  thread_args_t *args = (thread_args_t *)thread_args;
100
- for (i = args->thread_index; i < args->path_count; i += args->thread_count) {
101
- VALUE path = RARRAY_PTR(args->paths)[i];
102
- calculate_match(path,
103
- args->abbrev,
104
- args->case_sensitive,
105
- args->always_show_dot_files,
106
- args->never_show_dot_files,
107
- args->recurse,
108
- &args->matches[i]);
105
+
106
+ if (args->limit) {
107
+ // Reserve one extra slot so that we can do an insert-then-extract even
108
+ // when "full" (effectively allows use of min-heap to maintain a
109
+ // top-"limit" list of items).
110
+ heap = heap_new(args->limit + 1, cmp_score);
111
+ }
112
+
113
+ for (
114
+ i = args->thread_index;
115
+ i < args->path_count;
116
+ i += args->thread_count
117
+ ) {
118
+ args->matches[i].path = RARRAY_PTR(args->haystacks)[i];
119
+ if (args->needle_bitmask == UNSET_BITMASK) {
120
+ args->matches[i].bitmask = UNSET_BITMASK;
121
+ }
122
+ args->matches[i].score = calculate_match(
123
+ args->matches[i].path,
124
+ args->needle,
125
+ args->case_sensitive,
126
+ args->always_show_dot_files,
127
+ args->never_show_dot_files,
128
+ args->recurse,
129
+ args->needle_bitmask,
130
+ &args->matches[i].bitmask
131
+ );
132
+ if (heap) {
133
+ if (heap->count == args->limit) {
134
+ score = ((match_t *)HEAP_PEEK(heap))->score;
135
+ if (args->matches[i].score >= score) {
136
+ heap_insert(heap, &args->matches[i]);
137
+ (void)heap_extract(heap);
138
+ }
139
+ } else {
140
+ heap_insert(heap, &args->matches[i]);
141
+ }
142
+ }
109
143
  }
110
144
 
111
- return NULL;
145
+ return heap;
146
+ }
147
+
148
+ long calculate_bitmask(VALUE string) {
149
+ char *str = RSTRING_PTR(string);
150
+ long len = RSTRING_LEN(string);
151
+ long i;
152
+ long mask = 0;
153
+ for (i = 0; i < len; i++) {
154
+ if (str[i] >= 'a' && str[i] <= 'z') {
155
+ mask |= (1 << (str[i] - 'a'));
156
+ } else if (str[i] >= 'A' && str[i] <= 'Z') {
157
+ mask |= (1 << (str[i] - 'A'));
158
+ }
159
+ }
160
+ return mask;
112
161
  }
113
162
 
114
163
  VALUE CommandTMatcher_sorted_matches_for(int argc, VALUE *argv, VALUE self)
115
164
  {
116
- long i, limit, path_count, thread_count;
165
+ long i, j, limit, path_count, thread_count;
117
166
  #ifdef HAVE_PTHREAD_H
118
167
  long err;
119
168
  pthread_t *threads;
120
169
  #endif
170
+ long needle_bitmask = UNSET_BITMASK;
171
+ long heap_matches_count;
172
+ int use_heap;
173
+ int sort;
121
174
  match_t *matches;
175
+ match_t *heap_matches = NULL;
176
+ heap_t *heap;
122
177
  thread_args_t *thread_args;
123
- VALUE abbrev;
124
- VALUE case_sensitive;
125
178
  VALUE always_show_dot_files;
179
+ VALUE case_sensitive;
180
+ VALUE recurse;
181
+ VALUE ignore_spaces;
126
182
  VALUE limit_option;
183
+ VALUE needle;
127
184
  VALUE never_show_dot_files;
128
- VALUE ignore_spaces;
185
+ VALUE new_paths_object_id;
129
186
  VALUE options;
130
187
  VALUE paths;
131
- VALUE recurse;
188
+ VALUE paths_object_id;
132
189
  VALUE results;
133
190
  VALUE scanner;
134
191
  VALUE sort_option;
135
192
  VALUE threads_option;
193
+ VALUE wrapped_matches;
136
194
 
137
- // process arguments: 1 mandatory, 1 optional
138
- if (rb_scan_args(argc, argv, "11", &abbrev, &options) == 1)
195
+ // Process arguments: 1 mandatory, 1 optional.
196
+ if (rb_scan_args(argc, argv, "11", &needle, &options) == 1)
139
197
  options = Qnil;
140
- if (NIL_P(abbrev))
141
- rb_raise(rb_eArgError, "nil abbrev");
198
+ if (NIL_P(needle))
199
+ rb_raise(rb_eArgError, "nil needle");
142
200
 
143
- // check optional options hash for overrides
201
+ // Check optional options hash for overrides.
144
202
  case_sensitive = CommandT_option_from_hash("case_sensitive", options);
145
203
  limit_option = CommandT_option_from_hash("limit", options);
146
204
  threads_option = CommandT_option_from_hash("threads", options);
147
205
  sort_option = CommandT_option_from_hash("sort", options);
148
206
  ignore_spaces = CommandT_option_from_hash("ignore_spaces", options);
207
+ always_show_dot_files = rb_iv_get(self, "@always_show_dot_files");
208
+ never_show_dot_files = rb_iv_get(self, "@never_show_dot_files");
149
209
  recurse = CommandT_option_from_hash("recurse", options);
150
210
 
151
- abbrev = StringValue(abbrev);
211
+ limit = NIL_P(limit_option) ? 15 : NUM2LONG(limit_option);
212
+ sort = NIL_P(sort_option) || sort_option == Qtrue;
213
+ use_heap = limit && sort;
214
+ heap_matches_count = 0;
215
+
216
+ needle = StringValue(needle);
152
217
  if (case_sensitive != Qtrue)
153
- abbrev = rb_funcall(abbrev, rb_intern("downcase"), 0);
218
+ needle = rb_funcall(needle, rb_intern("downcase"), 0);
154
219
 
155
220
  if (ignore_spaces == Qtrue)
156
- abbrev = rb_funcall(abbrev, rb_intern("delete"), 1, rb_str_new2(" "));
221
+ needle = rb_funcall(needle, rb_intern("delete"), 1, rb_str_new2(" "));
157
222
 
158
- // get unsorted matches
223
+ // Get unsorted matches.
159
224
  scanner = rb_iv_get(self, "@scanner");
160
225
  paths = rb_funcall(scanner, rb_intern("paths"), 0);
161
- always_show_dot_files = rb_iv_get(self, "@always_show_dot_files");
162
- never_show_dot_files = rb_iv_get(self, "@never_show_dot_files");
163
-
164
226
  path_count = RARRAY_LEN(paths);
165
- matches = malloc(path_count * sizeof(match_t));
166
- if (!matches)
167
- rb_raise(rb_eNoMemError, "memory allocation failed");
227
+
228
+ // Cached C data, not visible to Ruby layer.
229
+ paths_object_id = rb_ivar_get(self, rb_intern("paths_object_id"));
230
+ new_paths_object_id = rb_funcall(paths, rb_intern("object_id"), 0);
231
+ rb_ivar_set(self, rb_intern("paths_object_id"), new_paths_object_id);
232
+ if (
233
+ NIL_P(paths_object_id) ||
234
+ NUM2LONG(new_paths_object_id) != NUM2LONG(paths_object_id)
235
+ ) {
236
+ // `paths` changed, need to replace matches array.
237
+ paths_object_id = new_paths_object_id;
238
+ matches = malloc(path_count * sizeof(match_t));
239
+ if (!matches) {
240
+ rb_raise(rb_eNoMemError, "memory allocation failed");
241
+ }
242
+ wrapped_matches = Data_Wrap_Struct(
243
+ rb_cObject,
244
+ 0,
245
+ free,
246
+ matches
247
+ );
248
+ rb_ivar_set(self, rb_intern("matches"), wrapped_matches);
249
+ } else {
250
+ // Get existing array.
251
+ Data_Get_Struct(
252
+ rb_ivar_get(self, rb_intern("matches")),
253
+ match_t,
254
+ matches
255
+ );
256
+
257
+ // Will compare against previously computed haystack bitmasks.
258
+ needle_bitmask = calculate_bitmask(needle);
259
+ }
168
260
 
169
261
  thread_count = NIL_P(threads_option) ? 1 : NUM2LONG(threads_option);
262
+ if (use_heap) {
263
+ heap_matches = malloc(thread_count * limit * sizeof(match_t));
264
+ if (!heap_matches) {
265
+ rb_raise(rb_eNoMemError, "memory allocation failed");
266
+ }
267
+ }
170
268
 
171
269
  #ifdef HAVE_PTHREAD_H
172
270
  #define THREAD_THRESHOLD 1000 /* avoid the overhead of threading when search space is small */
173
- if (path_count < THREAD_THRESHOLD)
271
+ if (path_count < THREAD_THRESHOLD) {
174
272
  thread_count = 1;
273
+ }
175
274
  threads = malloc(sizeof(pthread_t) * thread_count);
176
275
  if (!threads)
177
276
  rb_raise(rb_eNoMemError, "memory allocation failed");
@@ -185,58 +284,99 @@ VALUE CommandTMatcher_sorted_matches_for(int argc, VALUE *argv, VALUE self)
185
284
  thread_args[i].thread_index = i;
186
285
  thread_args[i].case_sensitive = case_sensitive == Qtrue;
187
286
  thread_args[i].matches = matches;
287
+ thread_args[i].limit = use_heap ? limit : 0;
188
288
  thread_args[i].path_count = path_count;
189
- thread_args[i].paths = paths;
190
- thread_args[i].abbrev = abbrev;
289
+ thread_args[i].haystacks = paths;
290
+ thread_args[i].needle = needle;
191
291
  thread_args[i].always_show_dot_files = always_show_dot_files;
192
292
  thread_args[i].never_show_dot_files = never_show_dot_files;
193
293
  thread_args[i].recurse = recurse;
294
+ thread_args[i].needle_bitmask = needle_bitmask;
194
295
 
195
296
  #ifdef HAVE_PTHREAD_H
196
297
  if (i == thread_count - 1) {
197
298
  #endif
198
- // for the last "worker", we'll just use the main thread
199
- (void)match_thread(&thread_args[i]);
299
+ // For the last "worker", we'll just use the main thread.
300
+ heap = match_thread(&thread_args[i]);
301
+ if (heap) {
302
+ for (j = 0; j < heap->count; j++) {
303
+ heap_matches[heap_matches_count++] = *(match_t *)heap->entries[j];
304
+ }
305
+ heap_free(heap);
306
+ }
200
307
  #ifdef HAVE_PTHREAD_H
201
308
  } else {
202
309
  err = pthread_create(&threads[i], NULL, match_thread, (void *)&thread_args[i]);
203
- if (err != 0)
310
+ if (err != 0) {
204
311
  rb_raise(rb_eSystemCallError, "pthread_create() failure (%d)", (int)err);
312
+ }
205
313
  }
206
314
  #endif
207
315
  }
208
316
 
209
317
  #ifdef HAVE_PTHREAD_H
210
318
  for (i = 0; i < thread_count - 1; i++) {
211
- err = pthread_join(threads[i], NULL);
212
- if (err != 0)
319
+ err = pthread_join(threads[i], (void **)&heap);
320
+ if (err != 0) {
213
321
  rb_raise(rb_eSystemCallError, "pthread_join() failure (%d)", (int)err);
322
+ }
323
+ if (heap) {
324
+ for (j = 0; j < heap->count; j++) {
325
+ heap_matches[heap_matches_count++] = *(match_t *)heap->entries[j];
326
+ }
327
+ heap_free(heap);
328
+ }
214
329
  }
215
330
  free(threads);
216
331
  #endif
217
332
 
218
- if (NIL_P(sort_option) || sort_option == Qtrue) {
219
- if (RSTRING_LEN(abbrev) == 0 ||
220
- (RSTRING_LEN(abbrev) == 1 && RSTRING_PTR(abbrev)[0] == '.'))
221
- // alphabetic order if search string is only "" or "."
222
- qsort(matches, path_count, sizeof(match_t), cmp_alpha);
223
- else
224
- // for all other non-empty search strings, sort by score
225
- qsort(matches, path_count, sizeof(match_t), cmp_score);
333
+ if (sort) {
334
+ if (
335
+ RSTRING_LEN(needle) == 0 ||
336
+ (RSTRING_LEN(needle) == 1 && RSTRING_PTR(needle)[0] == '.')
337
+ ) {
338
+ // Alphabetic order if search string is only "" or "."
339
+ // TODO: make those semantics fully apply to heap case as well
340
+ // (they don't because the heap itself calls cmp_score, which means
341
+ // that the items which stay in the top [limit] may (will) be
342
+ // different).
343
+ qsort(
344
+ use_heap ? heap_matches : matches,
345
+ use_heap ? heap_matches_count : path_count,
346
+ sizeof(match_t),
347
+ cmp_alpha
348
+ );
349
+ } else {
350
+ qsort(
351
+ use_heap ? heap_matches : matches,
352
+ use_heap ? heap_matches_count : path_count,
353
+ sizeof(match_t),
354
+ cmp_score
355
+ );
356
+ }
226
357
  }
227
358
 
228
359
  results = rb_ary_new();
229
-
230
- limit = NIL_P(limit_option) ? 0 : NUM2LONG(limit_option);
231
360
  if (limit == 0)
232
361
  limit = path_count;
233
- for (i = 0; i < path_count && limit > 0; i++) {
234
- if (matches[i].score > 0.0) {
235
- rb_funcall(results, rb_intern("push"), 1, matches[i].path);
362
+ for (
363
+ i = 0;
364
+ i < (use_heap ? heap_matches_count : path_count) && limit > 0;
365
+ i++
366
+ ) {
367
+ if ((use_heap ? heap_matches : matches)[i].score > 0.0) {
368
+ rb_funcall(
369
+ results,
370
+ rb_intern("push"),
371
+ 1,
372
+ (use_heap ? heap_matches : matches)[i].path
373
+ );
236
374
  limit--;
237
375
  }
238
376
  }
239
377
 
240
- free(matches);
378
+ if (use_heap) {
379
+ free(heap_matches);
380
+ }
241
381
  return results;
242
382
  }