StrIdx 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/rubyext/extconf.rb CHANGED
@@ -1,15 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
- #
3
2
 
4
3
  require 'mkmf'
5
4
 
6
5
  module_name = "stridx"
7
6
  extension_name = 'stridx'
8
7
 
9
- $CXXFLAGS << " -Wall -Wno-unused-variable -O3 -fopenmp"
8
+ $CXXFLAGS << " -Wall -Wno-unused-variable -O3"
10
9
 
11
10
  have_library( 'stdc++');
12
- have_library( 'gomp' );
13
11
 
14
12
  dir_config(extension_name) # The destination
15
13
  create_makefile(extension_name) # Create Makefile
@@ -7,7 +7,7 @@
7
7
 
8
8
  extern "C" {
9
9
 
10
- void str_idx_free(void *data) { delete (StringIndex *)data; }
10
+ void str_idx_free(void *data) { delete (StrIdx::StringIndex *)data; }
11
11
 
12
12
  // Wrap StringIndex class inside a ruby variable
13
13
  static const rb_data_type_t str_idx_type = {
@@ -26,7 +26,7 @@ static const rb_data_type_t str_idx_type = {
26
26
  };
27
27
 
28
28
  VALUE str_idx_alloc(VALUE self) {
29
- void *data = new StringIndex();
29
+ void *data = new StrIdx::StringIndex();
30
30
  return TypedData_Wrap_Struct(self, &str_idx_type, data);
31
31
  }
32
32
 
@@ -36,18 +36,27 @@ VALUE StringIndexAddSegments(VALUE self, VALUE str, VALUE fileId) {
36
36
 
37
37
  void *data;
38
38
  TypedData_Get_Struct(self, int, &str_idx_type, data);
39
- ((StringIndex *)data)->addStrToIndex(s1, fid, '/');
39
+ // ((StringIndex *)data)->addStrToIndex(s1, fid);
40
+ ((StrIdx::StringIndex *)data)->addStrToIndexThreaded(s1, fid);
40
41
 
41
42
  return self;
42
43
  }
43
44
 
45
+ VALUE StringIndexWaitUntilDone(VALUE self) {
46
+ void *data;
47
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
48
+ ((StrIdx::StringIndex *)data)->waitUntilDone();
49
+ return self;
50
+ }
51
+
52
+
44
53
  VALUE StringIndexFind(VALUE self, VALUE str) {
45
54
  VALUE ret;
46
55
  std::string s1 = StringValueCStr(str);
47
56
 
48
57
  void *data;
49
58
  TypedData_Get_Struct(self, int, &str_idx_type, data);
50
- StringIndex *idx = (StringIndex *)data;
59
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
51
60
 
52
61
  ret = rb_ary_new();
53
62
  const std::vector<std::pair<float, int>> &results = idx->findSimilar(s1, 2);
@@ -66,6 +75,26 @@ VALUE StringIndexFind(VALUE self, VALUE str) {
66
75
  return ret;
67
76
  }
68
77
 
78
+ VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
79
+ char c = '/';
80
+ if (TYPE(str) == T_STRING) {
81
+ std::string s = StringValueCStr(str);
82
+ if (s.size() >= 1) {
83
+ c = s[0];
84
+ }
85
+ } else {
86
+ c = '\0'; // No separator
87
+ // if (TYPE(obj) == T_NIL)
88
+ }
89
+
90
+ void *data;
91
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
92
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
93
+ idx->setDirSeparator(c);
94
+
95
+ return self;
96
+ }
97
+
69
98
  void Init_stridx(void) {
70
99
 
71
100
  VALUE mStrIdx = rb_define_module("StrIdx");
@@ -73,7 +102,12 @@ void Init_stridx(void) {
73
102
 
74
103
  rb_define_alloc_func(classStringIndex, str_idx_alloc);
75
104
  rb_define_method(classStringIndex, "add", StringIndexAddSegments, 2);
105
+ rb_define_method(classStringIndex, "waitUntilDone", StringIndexWaitUntilDone, 0);
76
106
  rb_define_method(classStringIndex, "find", StringIndexFind, 1);
107
+
108
+ rb_define_method(classStringIndex, "setDirSeparator", StringIndexSetDirSeparator, 1);
109
+
110
+
77
111
  }
78
112
 
79
113
  } // End extern "C"
data/stridx.hpp CHANGED
@@ -1,21 +1,66 @@
1
1
 
2
+ #ifndef SSSTRIDX_HPP
3
+ #define SSSTRIDX_HPP
4
+
2
5
  #include <stdio.h>
3
6
  #include <stdlib.h>
4
7
  #include <cassert>
5
8
 
6
9
  #include <vector>
10
+ #include <array>
7
11
  #include <iostream>
8
12
  #include <unordered_map>
9
13
  #include <set>
10
14
  #include <algorithm>
11
15
  #include <sstream>
12
16
 
13
- #ifdef _OPENMP
14
- #include <omp.h>
15
- #endif
17
+ #include <vector>
18
+ #include <mutex>
19
+ #include <thread>
16
20
 
21
+ #include "thread_pool.hpp"
17
22
  #include "unordered_dense.h"
18
23
 
24
+ namespace StrIdx {
25
+
26
+ /* Alternative to using std::cout
27
+ Allows to control verbose level */
28
+ class Output {
29
+ private:
30
+ int verboseLevel;
31
+
32
+ public:
33
+ Output(int verb) : verboseLevel(verb) {}
34
+ Output() : Output(3) {}
35
+ ~Output() = default;
36
+ void print() {}
37
+
38
+ // When calling as print("xxx ",3, " yyy") outputs "xxx 3 yyy"
39
+ template <typename T, typename... Types> void print(T var1, Types... var2) {
40
+ std::cout << var1;
41
+ print(var2...);
42
+ }
43
+
44
+ // When calling as printl("xxx ",3, " yyy") outputs "xxx 3 yyy\n"
45
+ template <typename... Types> void printl(Types... var2) {
46
+ print(var2...);
47
+ print("\n");
48
+ }
49
+
50
+ /* When calling as printv(2, "xxx ",3, " yyy") outputs "xxx 3 yyy\n"
51
+ * if verboseLevel >= 2 (first arg)
52
+ */
53
+ template <typename... Types> void printv(int vlevel, Types... var2) {
54
+ if (verboseLevel < vlevel) {
55
+ return;
56
+ }
57
+ if (verboseLevel >= 3) {
58
+ print("[v=", vlevel, "] ");
59
+ }
60
+ printl(var2...);
61
+ }
62
+ };
63
+
19
64
  // Transforms input string as follows:
20
65
  // '/foo/bar/file1.txt'
21
66
  // => vector{"foo", "bar", "file1.txt"}
@@ -34,7 +79,7 @@ std::vector<std::string> splitString(const std::string &input, const char &separ
34
79
  }
35
80
 
36
81
  // Convert int64_t to binary string
37
- std::string int64ToBinaryString(int64_t num) {
82
+ [[nodiscard]] std::string int64ToBinaryString(int64_t num) {
38
83
  std::string result;
39
84
  for (int i = 63; i >= 0; --i) {
40
85
  result += ((num >> i) & 1) ? '1' : '0';
@@ -42,8 +87,8 @@ std::string int64ToBinaryString(int64_t num) {
42
87
  return result;
43
88
  }
44
89
 
45
- // Convert a (8 char) string represented as int64_t to std::string
46
- std::string int64ToStr(int64_t key) {
90
+ // Debug. Convert a (8 char) string represented as int64_t to std::string
91
+ [[nodiscard]] std::string int64ToStr(int64_t key) {
47
92
  int nchars = 8;
48
93
  std::string str;
49
94
  int multip = nchars * 8;
@@ -55,22 +100,24 @@ std::string int64ToStr(int64_t key) {
55
100
  return str;
56
101
  }
57
102
 
103
+ // Debug
58
104
  void printVector(const std::vector<int> &vec) {
59
105
  for (const auto &value : vec) {
60
106
  std::cout << value << " ";
61
107
  }
62
108
  }
63
109
 
64
- std::string charToBinaryString(char num) {
110
+ // Debug
111
+ [[nodiscard]] std::string charToBinaryString(char chr) {
65
112
  std::string result;
66
113
  for (int i = 7; i >= 0; --i) {
67
- result += ((num >> i) & 1) ? '1' : '0';
114
+ result += ((chr >> i) & 1) ? '1' : '0';
68
115
  }
69
116
  return result;
70
117
  }
71
118
 
72
119
  class Candidate;
73
- enum segmentType { Dir, File };
120
+ enum class segmentType { Dir, File };
74
121
 
75
122
  // A segment of a file path
76
123
  // e.g. if path is /foo/bar/baz.txt
@@ -81,17 +128,18 @@ public:
81
128
  int fileId; // (if FILE)
82
129
  Candidate *cand;
83
130
  PathSegment *parent;
131
+ std::mutex mu;
84
132
  ankerl::unordered_dense::map<std::string, PathSegment *> children;
85
- segmentType type = Dir;
86
- PathSegment() : parent(NULL) {}
87
- PathSegment(std::string _str) : str(_str), parent(NULL) {}
133
+ segmentType type = segmentType::Dir;
134
+ PathSegment() : parent(nullptr) {}
135
+ PathSegment(std::string _str) : str(_str), parent(nullptr) {}
88
136
  PathSegment(std::string _str, int _fileId)
89
- : str(_str), fileId(_fileId), cand(NULL), parent(NULL) {}
90
- int size() {
137
+ : str(_str), fileId(_fileId), cand(nullptr), parent(nullptr) {}
138
+ [[nodiscard]] int size() {
91
139
  int sz = str.size();
92
140
  PathSegment *cur = parent;
93
141
  // Sum up length of parent segments (+1 for divisors)
94
- while (cur->parent != NULL) {
142
+ while (cur->parent != nullptr) {
95
143
  sz += cur->str.size() + 1;
96
144
  cur = cur->parent;
97
145
  }
@@ -118,7 +166,7 @@ public:
118
166
  // Initialize v_charscores with zeros
119
167
  v_charscore.resize(len, 0);
120
168
  candLen = str.size();
121
- seg = NULL;
169
+ seg = nullptr;
122
170
  }
123
171
 
124
172
  Candidate(PathSegment *_seg, int _len) : seg(_seg), len(_len) {
@@ -127,7 +175,7 @@ public:
127
175
  candLen = seg->size();
128
176
  }
129
177
 
130
- float getScore() {
178
+ [[nodiscard]] float getScore() {
131
179
  int i = 0;
132
180
  float score = 0.0;
133
181
  candLen = seg->size();
@@ -145,19 +193,21 @@ public:
145
193
  return score;
146
194
  }
147
195
 
148
- float operator[](int idx) { return v_charscore[idx]; }
196
+ [[nodiscard]] float operator[](int idx) { return v_charscore[idx]; }
149
197
  };
150
198
 
151
199
  // This seems to give 10x speed improvement over std::unordered_map
152
200
  typedef ankerl::unordered_dense::map<int64_t, std::set<PathSegment *> *> SegMap;
153
201
  // typedef std::unordered_map<int64_t, std::set<PathSegment *> *> SegMap;
154
202
 
155
- typedef std::unordered_map<float, Candidate> CandMap;
203
+ typedef ankerl::unordered_dense::map<int, Candidate *> CandMap;
204
+ // typedef std::unordered_map<int, Candidate*> CandMap;
156
205
 
157
206
  class StringIndex {
158
207
  private:
159
208
  int tmp;
160
209
  char dirSeparator = '/'; // Usually '/', '\' or '\0' (no separator)
210
+ int numStrings = 0;
161
211
 
162
212
  std::vector<SegMap *> dirmaps;
163
213
  std::vector<SegMap *> filemaps;
@@ -170,10 +220,16 @@ private:
170
220
  int dirId = 0;
171
221
  float dirWeight = 0.7; // Give only 70% of score if match is for a directory
172
222
 
223
+ std::array<std::mutex, 9> mts_f;
224
+ std::array<std::mutex, 9> mts_d;
225
+
226
+ std::unique_ptr<ThreadPool> pool;
227
+ Output out{1}; // verbose level = 1
228
+
173
229
  public:
174
- StringIndex() {
230
+ StringIndex(char sep) : dirSeparator(sep) {
175
231
  root = new PathSegment();
176
- root->parent = NULL;
232
+ root->parent = nullptr;
177
233
  root->str = "[ROOT]";
178
234
 
179
235
  for (int i = 0; i <= 8; i++) {
@@ -181,11 +237,18 @@ public:
181
237
  filemaps.push_back(new SegMap);
182
238
  }
183
239
 
184
- #ifdef _OPENMP
185
- std::cout << "OPENMP enabled\n";
186
- #endif
240
+ // Threads between 4 and 6
241
+ // We don't seem to get any benefit from more than 6 threads even if the hardware supports it
242
+ int num_threads = std::max((int)std::thread::hardware_concurrency(), 4);
243
+ num_threads = std::min(num_threads, 6);
244
+ out.printv(2, "Number of threads: ", num_threads);
245
+ pool = std::unique_ptr<ThreadPool>(new ThreadPool(num_threads));
187
246
  }
188
247
 
248
+ /* Don't separate path to segments separator=\0.
249
+ This is slower, but can be used for other data than files also. */
250
+ StringIndex() : StringIndex('\0') {}
251
+
189
252
  void setDirSeparator(char sep) { dirSeparator = sep; }
190
253
  void setDirWeight(float val) { dirWeight = val; }
191
254
 
@@ -213,8 +276,15 @@ public:
213
276
  addStrToIndex(filePath, fileId, dirSeparator);
214
277
  }
215
278
 
279
+ void addStrToIndexThreaded(std::string filePath, int fileId) {
280
+ pool->enqueue([=] { addStrToIndex(filePath, fileId, dirSeparator); });
281
+ }
282
+ void waitUntilReady() { pool->waitUntilDone(); }
283
+
284
+ void waitUntilDone() { pool->waitUntilDone(); }
285
+
216
286
  /**
217
- * Add a string to the index to be search for afterwards
287
+ * Add a string to the index to be searched for afterwards
218
288
  *
219
289
  * @param filePath String to index (e.g. /home/user/Project/main.cpp).
220
290
  * @param fileId Unique identifier for filePath. Will be return as result from findSimilar.
@@ -222,8 +292,10 @@ public:
222
292
  * one of {'\\', '/', '\0' (no separation)}.
223
293
  */
224
294
  void addStrToIndex(std::string filePath, int fileId, const char &separator) {
295
+ out.printv(3, "Add file:", filePath, ",", fileId, ",", separator);
225
296
 
226
297
  std::vector<std::string> segs;
298
+ numStrings += 1;
227
299
 
228
300
  if (separator == '\0') {
229
301
  // No separation to directories & files
@@ -233,7 +305,7 @@ public:
233
305
  segs = splitString(filePath, separator);
234
306
  }
235
307
 
236
- PathSegment *prev = NULL;
308
+ PathSegment *prev = nullptr;
237
309
  prev = root;
238
310
  // Add segments to a tree type data structure
239
311
  // e.g. addStrToIndex('/foo/bar/file1.txt' ..)
@@ -245,25 +317,27 @@ public:
245
317
  auto x = *_x;
246
318
  PathSegment *p;
247
319
 
248
- auto it = prev->children.find(x);
320
+ prev->mu.lock();
321
+
249
322
  // this part of the path already exists in the tree
250
- if (it != prev->children.end()) {
323
+ if (auto it = prev->children.find(x); it != prev->children.end()) {
251
324
  p = it->second;
325
+ prev->mu.unlock();
252
326
  } else {
253
327
  p = new PathSegment(x, fileId);
254
328
  p->parent = prev;
255
- // If this is last item in segs
329
+ // If this is last item in segs, then it is a file.
256
330
  if (_x == std::prev(segs.end())) {
257
- // therefore, it is a file.
258
- p->type = File;
331
+ p->type = segmentType::File;
259
332
  seglist[fileId] = p;
260
- } else {
261
- p->type = Dir;
333
+ } else { // otherwise, it is a directory
334
+ p->type = segmentType::Dir;
262
335
  p->fileId = dirId;
263
336
  // Files use user input Id. Directories need to have it generated
264
337
  dirId++;
265
338
  }
266
339
  prev->children[x] = p;
340
+ prev->mu.unlock();
267
341
  addPathSegmentKeys(p);
268
342
  }
269
343
 
@@ -272,7 +346,7 @@ public:
272
346
  }
273
347
 
274
348
  /**
275
- * The search will find filepaths similar to the input string
349
+ The search will find filepaths similar to the input string
276
350
 
277
351
  To be considered a candidate path, the file component of the path (e.g. file.txt)
278
352
  is required to have at least a substring of two characters in common with the
@@ -286,8 +360,8 @@ public:
286
360
  is also included in the PathSegment
287
361
  - take the lenght of that substring as score
288
362
  sum up the scores for each character c and divide by (string length)^2
289
-
290
- For example, if query = "rngnomadriv"
363
+
364
+ For example, if query = "rngnomadriv"
291
365
  and candidate is "./drivers/char/hw_random/nomadik-rng.c", then scores are calculated
292
366
  as follows:
293
367
  rngnomadriv
@@ -296,17 +370,23 @@ public:
296
370
  score1=(3+3+3+5+5+5+5+5+(4+4+4)*0.7)
297
371
 
298
372
  In final score, give a small penalty for larger candidate filenames:
299
- Divide main part of score with (query string length)^2
373
+ Divide main part of score with (query string length)^2
300
374
  and minor part by (query string length)*(candidate string length)
301
375
  score = score1/(11*11)*0.97 + score1/(11*38)*0.03 = 0.342944
302
376
 
303
377
  @param query String to search for inside the index
304
378
  */
305
379
 
306
- std::vector<std::pair<float, int>> findSimilar(std::string query, int minChars) {
380
+ [[nodiscard]] std::vector<std::pair<float, int>> findSimilar(std::string query) {
381
+ return findSimilar(query, 2);
382
+ }
383
+
384
+ [[nodiscard]] std::vector<std::pair<float, int>> findSimilar(std::string query, int minChars) {
307
385
  CandMap fileCandMap;
308
386
  CandMap dirCandMap;
309
387
 
388
+ waitUntilDone();
389
+
310
390
  // Find both files and directories that match the input query
311
391
  addToCandMap(fileCandMap, query, filemaps);
312
392
  addToCandMap(dirCandMap, query, dirmaps);
@@ -315,9 +395,9 @@ public:
315
395
  scores of the file */
316
396
  mergeCandidateMaps(fileCandMap, dirCandMap);
317
397
 
318
- // Set all candidate pointers to NULL so they won't mess up future searches
398
+ // Set all candidate pointers to nullptr so they won't mess up future searches
319
399
  for (auto seg : segsToClean) {
320
- seg->cand = NULL;
400
+ seg->cand = nullptr;
321
401
  }
322
402
  segsToClean.clear();
323
403
 
@@ -325,11 +405,17 @@ public:
325
405
  std::vector<std::pair<float, int>> results;
326
406
  for (auto &[fid, cand] : fileCandMap) {
327
407
  std::pair<float, int> v;
328
- float sc = cand.getScore();
408
+ float sc = cand->getScore();
329
409
  v.first = sc;
330
410
  v.second = fid;
331
411
  results.push_back(v);
412
+ delete cand;
332
413
  }
414
+
415
+ for (auto &[fid, cand] : dirCandMap) {
416
+ delete cand;
417
+ }
418
+
333
419
  // Sort highest score first
334
420
  std::sort(results.begin(), results.end(),
335
421
  [](std::pair<float, int> a, std::pair<float, int> b) { return a.first > b.first; });
@@ -337,10 +423,10 @@ public:
337
423
  }
338
424
 
339
425
  // Return int64_t representation of the first nchars in str, starting from index i
340
- int64_t getKeyAtIdx(std::string str, int i, int nchars) {
426
+ [[nodiscard]] int64_t getKeyAtIdx(std::string str, int i, int nchars) {
341
427
  int64_t key = 0;
342
428
  for (int i_char = 0; i_char < nchars; i_char++) {
343
- key = key | static_cast<int>(str[i + i_char]);
429
+ key = key | static_cast<int64_t>(str[i + i_char]);
344
430
  if (i_char < nchars - 1) {
345
431
  // Shift 8 bits to the left except on the last iteration
346
432
  key = key << 8;
@@ -395,22 +481,29 @@ private:
395
481
  maxChars = p->str.size();
396
482
  }
397
483
 
398
- #ifdef _OPENMP
399
- #pragma omp parallel for
400
- #endif
401
484
  for (int sublen = minChars; sublen <= maxChars; sublen++) {
402
485
 
486
+ std::mutex *mu;
403
487
  SegMap *map;
404
- if (p->type == File) {
488
+ if (p->type == segmentType::File) {
405
489
  map = filemaps[sublen];
490
+ mu = &mts_f[sublen];
406
491
  } else {
407
492
  map = dirmaps[sublen];
493
+ mu = &mts_d[sublen];
408
494
  }
409
495
 
410
496
  int count = str.size() - sublen + 1;
411
497
 
498
+ int64_t keys[count + 1];
499
+ for (int i = 0; i <= count; i++) {
500
+ keys[i] = getKeyAtIdx(str, i, sublen);
501
+ }
502
+
503
+ mu->lock();
412
504
  for (int i = 0; i <= count; i++) {
413
- int64_t key = getKeyAtIdx(str, i, sublen);
505
+ // int64_t key = getKeyAtIdx(str, i, sublen);
506
+ auto key = keys[i];
414
507
 
415
508
  // Create a new std::set for key if doesn't exist already
416
509
  auto it = map->find(key);
@@ -419,12 +512,14 @@ private:
419
512
  }
420
513
  (*map)[key]->insert(p);
421
514
  }
515
+ mu->unlock();
422
516
  }
423
517
  }
424
518
 
425
519
  // Find pathsegments from <map> that include the substring of <str> which starts at index <i> and
426
520
  // is of length <nchars>.
427
- std::vector<PathSegment *> findSimilarForNgram(std::string str, int i, int nchars, SegMap &map) {
521
+ [[nodiscard]] std::vector<PathSegment *> findSimilarForNgram(std::string str, int i, int nchars,
522
+ SegMap &map) {
428
523
 
429
524
  assert(i + nchars <= static_cast<int>(str.size()));
430
525
  std::vector<PathSegment *> res;
@@ -433,8 +528,7 @@ private:
433
528
  // transform that to 64 bit integer
434
529
  int64_t key = getKeyAtIdx(str, i, nchars);
435
530
  // Find all path segments in map that have the same substring
436
- auto it = map.find(key);
437
- if (it != map.end()) { // key found
531
+ if (auto it = map.find(key); it != map.end()) { // key found
438
532
  auto set = it->second;
439
533
  for (auto value : *set) {
440
534
  res.push_back(value);
@@ -471,12 +565,12 @@ private:
471
565
  void mergeCandidateMaps(CandMap &fileCandMap, CandMap &dirCandMap) {
472
566
 
473
567
  for (auto &[fid, cand] : fileCandMap) {
474
- PathSegment *p = cand.seg->parent;
475
- while (p->parent != NULL) {
476
- if (p->cand != NULL) {
477
- auto &scoreA = cand.v_charscore;
568
+ PathSegment *p = cand->seg->parent;
569
+ while (p->parent != nullptr) {
570
+ if (p->cand != nullptr) {
571
+ auto &scoreA = cand->v_charscore;
478
572
  auto &scoreB = p->cand->v_charscore;
479
- for (int i = 0; i < cand.len; i++) {
573
+ for (int i = 0; i < cand->len; i++) {
480
574
  if (scoreA[i] < scoreB[i] * dirWeight) {
481
575
  scoreA[i] = scoreB[i] * dirWeight;
482
576
  }
@@ -489,18 +583,22 @@ private:
489
583
 
490
584
  void addToResults(PathSegment *seg, std::string str, int i, int nchars, CandMap &candmap) {
491
585
 
492
- auto it2 = candmap.find(seg->fileId);
493
- if (it2 == candmap.end()) {
494
- Candidate cand(seg, str.size());
495
- seg->cand = &(candmap[seg->fileId]);
586
+ if (auto it2 = candmap.find(seg->fileId); it2 == candmap.end()) {
587
+ Candidate *cand = new Candidate(seg, str.size());
496
588
  segsToClean.push_back(seg);
497
589
  candmap[seg->fileId] = cand;
590
+ seg->cand = cand;
498
591
  }
499
592
 
500
593
  for (int j = i; j < i + nchars; j++) {
501
- if (candmap[seg->fileId][j] < nchars) {
502
- candmap[seg->fileId].v_charscore[j] = nchars;
594
+ Candidate &cand = *(candmap[seg->fileId]);
595
+ if (cand[j] < nchars) {
596
+ cand.v_charscore[j] = nchars;
503
597
  }
504
598
  }
505
599
  }
506
600
  };
601
+
602
+ } // namespace StrIdx
603
+
604
+ #endif
data/test.rb CHANGED
@@ -13,7 +13,13 @@ for x in lines
13
13
  end
14
14
 
15
15
  idx_time = Time.new
16
- puts "\nIndexing time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
16
+ # Time to start the threadpool to process indexing
17
+ puts "\nIndexing launch time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
18
+
19
+ idx.waitUntilDone() # Not necessary, will be called by idx.find
20
+ idx_time = Time.new
21
+ # Time when all threads have completed
22
+ puts "\nIndexing completed time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
17
23
 
18
24
  query = "rngnomadriv"
19
25
  res = idx.find(query)