StrIdx 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/rubyext/extconf.rb CHANGED
@@ -1,15 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
- #
3
2
 
4
3
  require 'mkmf'
5
4
 
6
5
  module_name = "stridx"
7
6
  extension_name = 'stridx'
8
7
 
9
- $CXXFLAGS << " -Wall -Wno-unused-variable -O3 -fopenmp"
8
+ $CXXFLAGS << " -Wall -Wno-unused-variable -O3"
10
9
 
11
10
  have_library( 'stdc++');
12
- have_library( 'gomp' );
13
11
 
14
12
  dir_config(extension_name) # The destination
15
13
  create_makefile(extension_name) # Create Makefile
@@ -7,7 +7,7 @@
7
7
 
8
8
  extern "C" {
9
9
 
10
- void str_idx_free(void *data) { delete (StringIndex *)data; }
10
+ void str_idx_free(void *data) { delete (StrIdx::StringIndex *)data; }
11
11
 
12
12
  // Wrap StringIndex class inside a ruby variable
13
13
  static const rb_data_type_t str_idx_type = {
@@ -26,7 +26,7 @@ static const rb_data_type_t str_idx_type = {
26
26
  };
27
27
 
28
28
  VALUE str_idx_alloc(VALUE self) {
29
- void *data = new StringIndex();
29
+ void *data = new StrIdx::StringIndex();
30
30
  return TypedData_Wrap_Struct(self, &str_idx_type, data);
31
31
  }
32
32
 
@@ -36,18 +36,27 @@ VALUE StringIndexAddSegments(VALUE self, VALUE str, VALUE fileId) {
36
36
 
37
37
  void *data;
38
38
  TypedData_Get_Struct(self, int, &str_idx_type, data);
39
- ((StringIndex *)data)->addStrToIndex(s1, fid, '/');
39
+ // ((StringIndex *)data)->addStrToIndex(s1, fid);
40
+ ((StrIdx::StringIndex *)data)->addStrToIndexThreaded(s1, fid);
40
41
 
41
42
  return self;
42
43
  }
43
44
 
45
+ VALUE StringIndexWaitUntilDone(VALUE self) {
46
+ void *data;
47
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
48
+ ((StrIdx::StringIndex *)data)->waitUntilDone();
49
+ return self;
50
+ }
51
+
52
+
44
53
  VALUE StringIndexFind(VALUE self, VALUE str) {
45
54
  VALUE ret;
46
55
  std::string s1 = StringValueCStr(str);
47
56
 
48
57
  void *data;
49
58
  TypedData_Get_Struct(self, int, &str_idx_type, data);
50
- StringIndex *idx = (StringIndex *)data;
59
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
51
60
 
52
61
  ret = rb_ary_new();
53
62
  const std::vector<std::pair<float, int>> &results = idx->findSimilar(s1, 2);
@@ -66,6 +75,26 @@ VALUE StringIndexFind(VALUE self, VALUE str) {
66
75
  return ret;
67
76
  }
68
77
 
78
+ VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
79
+ char c = '/';
80
+ if (TYPE(str) == T_STRING) {
81
+ std::string s = StringValueCStr(str);
82
+ if (s.size() >= 1) {
83
+ c = s[0];
84
+ }
85
+ } else {
86
+ c = '\0'; // No separator
87
+ // if (TYPE(obj) == T_NIL)
88
+ }
89
+
90
+ void *data;
91
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
92
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
93
+ idx->setDirSeparator(c);
94
+
95
+ return self;
96
+ }
97
+
69
98
  void Init_stridx(void) {
70
99
 
71
100
  VALUE mStrIdx = rb_define_module("StrIdx");
@@ -73,7 +102,12 @@ void Init_stridx(void) {
73
102
 
74
103
  rb_define_alloc_func(classStringIndex, str_idx_alloc);
75
104
  rb_define_method(classStringIndex, "add", StringIndexAddSegments, 2);
105
+ rb_define_method(classStringIndex, "waitUntilDone", StringIndexWaitUntilDone, 0);
76
106
  rb_define_method(classStringIndex, "find", StringIndexFind, 1);
107
+
108
+ rb_define_method(classStringIndex, "setDirSeparator", StringIndexSetDirSeparator, 1);
109
+
110
+
77
111
  }
78
112
 
79
113
  } // End extern "C"
data/stridx.hpp CHANGED
@@ -1,21 +1,66 @@
1
1
 
2
+ #ifndef SSSTRIDX_HPP
3
+ #define SSSTRIDX_HPP
4
+
2
5
  #include <stdio.h>
3
6
  #include <stdlib.h>
4
7
  #include <cassert>
5
8
 
6
9
  #include <vector>
10
+ #include <array>
7
11
  #include <iostream>
8
12
  #include <unordered_map>
9
13
  #include <set>
10
14
  #include <algorithm>
11
15
  #include <sstream>
12
16
 
13
- #ifdef _OPENMP
14
- #include <omp.h>
15
- #endif
17
+ #include <vector>
18
+ #include <mutex>
19
+ #include <thread>
16
20
 
21
+ #include "thread_pool.hpp"
17
22
  #include "unordered_dense.h"
18
23
 
24
+ namespace StrIdx {
25
+
26
+ /* Alternative to using std::cout
27
+ Allows to control verbose level */
28
+ class Output {
29
+ private:
30
+ int verboseLevel;
31
+
32
+ public:
33
+ Output(int verb) : verboseLevel(verb) {}
34
+ Output() : Output(3) {}
35
+ ~Output() = default;
36
+ void print() {}
37
+
38
+ // When calling as print("xxx ",3, " yyy") outputs "xxx 3 yyy"
39
+ template <typename T, typename... Types> void print(T var1, Types... var2) {
40
+ std::cout << var1;
41
+ print(var2...);
42
+ }
43
+
44
+ // When calling as printl("xxx ",3, " yyy") outputs "xxx 3 yyy\n"
45
+ template <typename... Types> void printl(Types... var2) {
46
+ print(var2...);
47
+ print("\n");
48
+ }
49
+
50
+ /* When calling as printv(2, "xxx ",3, " yyy") outputs "xxx 3 yyy\n"
51
+ * if verboseLevel >= 2 (first arg)
52
+ */
53
+ template <typename... Types> void printv(int vlevel, Types... var2) {
54
+ if (verboseLevel < vlevel) {
55
+ return;
56
+ }
57
+ if (verboseLevel >= 3) {
58
+ print("[v=", vlevel, "] ");
59
+ }
60
+ printl(var2...);
61
+ }
62
+ };
63
+
19
64
  // Transforms input string as follows:
20
65
  // '/foo/bar/file1.txt'
21
66
  // => vector{"foo", "bar", "file1.txt"}
@@ -34,7 +79,7 @@ std::vector<std::string> splitString(const std::string &input, const char &separ
34
79
  }
35
80
 
36
81
  // Convert int64_t to binary string
37
- std::string int64ToBinaryString(int64_t num) {
82
+ [[nodiscard]] std::string int64ToBinaryString(int64_t num) {
38
83
  std::string result;
39
84
  for (int i = 63; i >= 0; --i) {
40
85
  result += ((num >> i) & 1) ? '1' : '0';
@@ -42,8 +87,8 @@ std::string int64ToBinaryString(int64_t num) {
42
87
  return result;
43
88
  }
44
89
 
45
- // Convert a (8 char) string represented as int64_t to std::string
46
- std::string int64ToStr(int64_t key) {
90
+ // Debug. Convert a (8 char) string represented as int64_t to std::string
91
+ [[nodiscard]] std::string int64ToStr(int64_t key) {
47
92
  int nchars = 8;
48
93
  std::string str;
49
94
  int multip = nchars * 8;
@@ -55,22 +100,24 @@ std::string int64ToStr(int64_t key) {
55
100
  return str;
56
101
  }
57
102
 
103
+ // Debug
58
104
  void printVector(const std::vector<int> &vec) {
59
105
  for (const auto &value : vec) {
60
106
  std::cout << value << " ";
61
107
  }
62
108
  }
63
109
 
64
- std::string charToBinaryString(char num) {
110
+ // Debug
111
+ [[nodiscard]] std::string charToBinaryString(char chr) {
65
112
  std::string result;
66
113
  for (int i = 7; i >= 0; --i) {
67
- result += ((num >> i) & 1) ? '1' : '0';
114
+ result += ((chr >> i) & 1) ? '1' : '0';
68
115
  }
69
116
  return result;
70
117
  }
71
118
 
72
119
  class Candidate;
73
- enum segmentType { Dir, File };
120
+ enum class segmentType { Dir, File };
74
121
 
75
122
  // A segment of a file path
76
123
  // e.g. if path is /foo/bar/baz.txt
@@ -81,17 +128,18 @@ public:
81
128
  int fileId; // (if FILE)
82
129
  Candidate *cand;
83
130
  PathSegment *parent;
131
+ std::mutex mu;
84
132
  ankerl::unordered_dense::map<std::string, PathSegment *> children;
85
- segmentType type = Dir;
86
- PathSegment() : parent(NULL) {}
87
- PathSegment(std::string _str) : str(_str), parent(NULL) {}
133
+ segmentType type = segmentType::Dir;
134
+ PathSegment() : parent(nullptr) {}
135
+ PathSegment(std::string _str) : str(_str), parent(nullptr) {}
88
136
  PathSegment(std::string _str, int _fileId)
89
- : str(_str), fileId(_fileId), cand(NULL), parent(NULL) {}
90
- int size() {
137
+ : str(_str), fileId(_fileId), cand(nullptr), parent(nullptr) {}
138
+ [[nodiscard]] int size() {
91
139
  int sz = str.size();
92
140
  PathSegment *cur = parent;
93
141
  // Sum up length of parent segments (+1 for divisors)
94
- while (cur->parent != NULL) {
142
+ while (cur->parent != nullptr) {
95
143
  sz += cur->str.size() + 1;
96
144
  cur = cur->parent;
97
145
  }
@@ -118,7 +166,7 @@ public:
118
166
  // Initialize v_charscores with zeros
119
167
  v_charscore.resize(len, 0);
120
168
  candLen = str.size();
121
- seg = NULL;
169
+ seg = nullptr;
122
170
  }
123
171
 
124
172
  Candidate(PathSegment *_seg, int _len) : seg(_seg), len(_len) {
@@ -127,7 +175,7 @@ public:
127
175
  candLen = seg->size();
128
176
  }
129
177
 
130
- float getScore() {
178
+ [[nodiscard]] float getScore() {
131
179
  int i = 0;
132
180
  float score = 0.0;
133
181
  candLen = seg->size();
@@ -145,19 +193,21 @@ public:
145
193
  return score;
146
194
  }
147
195
 
148
- float operator[](int idx) { return v_charscore[idx]; }
196
+ [[nodiscard]] float operator[](int idx) { return v_charscore[idx]; }
149
197
  };
150
198
 
151
199
  // This seems to give 10x speed improvement over std::unordered_map
152
200
  typedef ankerl::unordered_dense::map<int64_t, std::set<PathSegment *> *> SegMap;
153
201
  // typedef std::unordered_map<int64_t, std::set<PathSegment *> *> SegMap;
154
202
 
155
- typedef std::unordered_map<float, Candidate> CandMap;
203
+ typedef ankerl::unordered_dense::map<int, Candidate *> CandMap;
204
+ // typedef std::unordered_map<int, Candidate*> CandMap;
156
205
 
157
206
  class StringIndex {
158
207
  private:
159
208
  int tmp;
160
209
  char dirSeparator = '/'; // Usually '/', '\' or '\0' (no separator)
210
+ int numStrings = 0;
161
211
 
162
212
  std::vector<SegMap *> dirmaps;
163
213
  std::vector<SegMap *> filemaps;
@@ -170,10 +220,16 @@ private:
170
220
  int dirId = 0;
171
221
  float dirWeight = 0.7; // Give only 70% of score if match is for a directory
172
222
 
223
+ std::array<std::mutex, 9> mts_f;
224
+ std::array<std::mutex, 9> mts_d;
225
+
226
+ std::unique_ptr<ThreadPool> pool;
227
+ Output out{1}; // verbose level = 1
228
+
173
229
  public:
174
- StringIndex() {
230
+ StringIndex(char sep) : dirSeparator(sep) {
175
231
  root = new PathSegment();
176
- root->parent = NULL;
232
+ root->parent = nullptr;
177
233
  root->str = "[ROOT]";
178
234
 
179
235
  for (int i = 0; i <= 8; i++) {
@@ -181,11 +237,18 @@ public:
181
237
  filemaps.push_back(new SegMap);
182
238
  }
183
239
 
184
- #ifdef _OPENMP
185
- std::cout << "OPENMP enabled\n";
186
- #endif
240
+ // Threads between 4 and 6
241
+ // We don't seem to get any benefit from more than 6 threads even if the hardware supports it
242
+ int num_threads = std::max((int)std::thread::hardware_concurrency(), 4);
243
+ num_threads = std::min(num_threads, 6);
244
+ out.printv(2, "Number of threads: ", num_threads);
245
+ pool = std::unique_ptr<ThreadPool>(new ThreadPool(num_threads));
187
246
  }
188
247
 
248
+ /* Don't separate path to segments separator=\0.
249
+ This is slower, but can be used for other data than files also. */
250
+ StringIndex() : StringIndex('\0') {}
251
+
189
252
  void setDirSeparator(char sep) { dirSeparator = sep; }
190
253
  void setDirWeight(float val) { dirWeight = val; }
191
254
 
@@ -213,8 +276,15 @@ public:
213
276
  addStrToIndex(filePath, fileId, dirSeparator);
214
277
  }
215
278
 
279
+ void addStrToIndexThreaded(std::string filePath, int fileId) {
280
+ pool->enqueue([=] { addStrToIndex(filePath, fileId, dirSeparator); });
281
+ }
282
+ void waitUntilReady() { pool->waitUntilDone(); }
283
+
284
+ void waitUntilDone() { pool->waitUntilDone(); }
285
+
216
286
  /**
217
- * Add a string to the index to be search for afterwards
287
+ * Add a string to the index to be searched for afterwards
218
288
  *
219
289
  * @param filePath String to index (e.g. /home/user/Project/main.cpp).
220
290
  * @param fileId Unique identifier for filePath. Will be return as result from findSimilar.
@@ -222,8 +292,10 @@ public:
222
292
  * one of {'\\', '/', '\0' (no separation)}.
223
293
  */
224
294
  void addStrToIndex(std::string filePath, int fileId, const char &separator) {
295
+ out.printv(3, "Add file:", filePath, ",", fileId, ",", separator);
225
296
 
226
297
  std::vector<std::string> segs;
298
+ numStrings += 1;
227
299
 
228
300
  if (separator == '\0') {
229
301
  // No separation to directories & files
@@ -233,7 +305,7 @@ public:
233
305
  segs = splitString(filePath, separator);
234
306
  }
235
307
 
236
- PathSegment *prev = NULL;
308
+ PathSegment *prev = nullptr;
237
309
  prev = root;
238
310
  // Add segments to a tree type data structure
239
311
  // e.g. addStrToIndex('/foo/bar/file1.txt' ..)
@@ -245,25 +317,27 @@ public:
245
317
  auto x = *_x;
246
318
  PathSegment *p;
247
319
 
248
- auto it = prev->children.find(x);
320
+ prev->mu.lock();
321
+
249
322
  // this part of the path already exists in the tree
250
- if (it != prev->children.end()) {
323
+ if (auto it = prev->children.find(x); it != prev->children.end()) {
251
324
  p = it->second;
325
+ prev->mu.unlock();
252
326
  } else {
253
327
  p = new PathSegment(x, fileId);
254
328
  p->parent = prev;
255
- // If this is last item in segs
329
+ // If this is last item in segs, then it is a file.
256
330
  if (_x == std::prev(segs.end())) {
257
- // therefore, it is a file.
258
- p->type = File;
331
+ p->type = segmentType::File;
259
332
  seglist[fileId] = p;
260
- } else {
261
- p->type = Dir;
333
+ } else { // otherwise, it is a directory
334
+ p->type = segmentType::Dir;
262
335
  p->fileId = dirId;
263
336
  // Files use user input Id. Directories need to have it generated
264
337
  dirId++;
265
338
  }
266
339
  prev->children[x] = p;
340
+ prev->mu.unlock();
267
341
  addPathSegmentKeys(p);
268
342
  }
269
343
 
@@ -272,7 +346,7 @@ public:
272
346
  }
273
347
 
274
348
  /**
275
- * The search will find filepaths similar to the input string
349
+ The search will find filepaths similar to the input string
276
350
 
277
351
  To be considered a candidate path, the file component of the path (e.g. file.txt)
278
352
  is required to have at least a substring of two characters in common with the
@@ -286,8 +360,8 @@ public:
286
360
  is also included in the PathSegment
287
361
  - take the lenght of that substring as score
288
362
  sum up the scores for each character c and divide by (string length)^2
289
-
290
- For example, if query = "rngnomadriv"
363
+
364
+ For example, if query = "rngnomadriv"
291
365
  and candidate is "./drivers/char/hw_random/nomadik-rng.c", then scores are calculated
292
366
  as follows:
293
367
  rngnomadriv
@@ -296,17 +370,23 @@ public:
296
370
  score1=(3+3+3+5+5+5+5+5+(4+4+4)*0.7)
297
371
 
298
372
  In final score, give a small penalty for larger candidate filenames:
299
- Divide main part of score with (query string length)^2
373
+ Divide main part of score with (query string length)^2
300
374
  and minor part by (query string length)*(candidate string length)
301
375
  score = score1/(11*11)*0.97 + score1/(11*38)*0.03 = 0.342944
302
376
 
303
377
  @param query String to search for inside the index
304
378
  */
305
379
 
306
- std::vector<std::pair<float, int>> findSimilar(std::string query, int minChars) {
380
+ [[nodiscard]] std::vector<std::pair<float, int>> findSimilar(std::string query) {
381
+ return findSimilar(query, 2);
382
+ }
383
+
384
+ [[nodiscard]] std::vector<std::pair<float, int>> findSimilar(std::string query, int minChars) {
307
385
  CandMap fileCandMap;
308
386
  CandMap dirCandMap;
309
387
 
388
+ waitUntilDone();
389
+
310
390
  // Find both files and directories that match the input query
311
391
  addToCandMap(fileCandMap, query, filemaps);
312
392
  addToCandMap(dirCandMap, query, dirmaps);
@@ -315,9 +395,9 @@ public:
315
395
  scores of the file */
316
396
  mergeCandidateMaps(fileCandMap, dirCandMap);
317
397
 
318
- // Set all candidate pointers to NULL so they won't mess up future searches
398
+ // Set all candidate pointers to nullptr so they won't mess up future searches
319
399
  for (auto seg : segsToClean) {
320
- seg->cand = NULL;
400
+ seg->cand = nullptr;
321
401
  }
322
402
  segsToClean.clear();
323
403
 
@@ -325,11 +405,17 @@ public:
325
405
  std::vector<std::pair<float, int>> results;
326
406
  for (auto &[fid, cand] : fileCandMap) {
327
407
  std::pair<float, int> v;
328
- float sc = cand.getScore();
408
+ float sc = cand->getScore();
329
409
  v.first = sc;
330
410
  v.second = fid;
331
411
  results.push_back(v);
412
+ delete cand;
332
413
  }
414
+
415
+ for (auto &[fid, cand] : dirCandMap) {
416
+ delete cand;
417
+ }
418
+
333
419
  // Sort highest score first
334
420
  std::sort(results.begin(), results.end(),
335
421
  [](std::pair<float, int> a, std::pair<float, int> b) { return a.first > b.first; });
@@ -337,10 +423,10 @@ public:
337
423
  }
338
424
 
339
425
  // Return int64_t representation of the first nchars in str, starting from index i
340
- int64_t getKeyAtIdx(std::string str, int i, int nchars) {
426
+ [[nodiscard]] int64_t getKeyAtIdx(std::string str, int i, int nchars) {
341
427
  int64_t key = 0;
342
428
  for (int i_char = 0; i_char < nchars; i_char++) {
343
- key = key | static_cast<int>(str[i + i_char]);
429
+ key = key | static_cast<int64_t>(str[i + i_char]);
344
430
  if (i_char < nchars - 1) {
345
431
  // Shift 8 bits to the left except on the last iteration
346
432
  key = key << 8;
@@ -395,22 +481,29 @@ private:
395
481
  maxChars = p->str.size();
396
482
  }
397
483
 
398
- #ifdef _OPENMP
399
- #pragma omp parallel for
400
- #endif
401
484
  for (int sublen = minChars; sublen <= maxChars; sublen++) {
402
485
 
486
+ std::mutex *mu;
403
487
  SegMap *map;
404
- if (p->type == File) {
488
+ if (p->type == segmentType::File) {
405
489
  map = filemaps[sublen];
490
+ mu = &mts_f[sublen];
406
491
  } else {
407
492
  map = dirmaps[sublen];
493
+ mu = &mts_d[sublen];
408
494
  }
409
495
 
410
496
  int count = str.size() - sublen + 1;
411
497
 
498
+ int64_t keys[count + 1];
499
+ for (int i = 0; i <= count; i++) {
500
+ keys[i] = getKeyAtIdx(str, i, sublen);
501
+ }
502
+
503
+ mu->lock();
412
504
  for (int i = 0; i <= count; i++) {
413
- int64_t key = getKeyAtIdx(str, i, sublen);
505
+ // int64_t key = getKeyAtIdx(str, i, sublen);
506
+ auto key = keys[i];
414
507
 
415
508
  // Create a new std::set for key if doesn't exist already
416
509
  auto it = map->find(key);
@@ -419,12 +512,14 @@ private:
419
512
  }
420
513
  (*map)[key]->insert(p);
421
514
  }
515
+ mu->unlock();
422
516
  }
423
517
  }
424
518
 
425
519
  // Find pathsegments from <map> that include the substring of <str> which starts at index <i> and
426
520
  // is of length <nchars>.
427
- std::vector<PathSegment *> findSimilarForNgram(std::string str, int i, int nchars, SegMap &map) {
521
+ [[nodiscard]] std::vector<PathSegment *> findSimilarForNgram(std::string str, int i, int nchars,
522
+ SegMap &map) {
428
523
 
429
524
  assert(i + nchars <= static_cast<int>(str.size()));
430
525
  std::vector<PathSegment *> res;
@@ -433,8 +528,7 @@ private:
433
528
  // transform that to 64 bit integer
434
529
  int64_t key = getKeyAtIdx(str, i, nchars);
435
530
  // Find all path segments in map that have the same substring
436
- auto it = map.find(key);
437
- if (it != map.end()) { // key found
531
+ if (auto it = map.find(key); it != map.end()) { // key found
438
532
  auto set = it->second;
439
533
  for (auto value : *set) {
440
534
  res.push_back(value);
@@ -471,12 +565,12 @@ private:
471
565
  void mergeCandidateMaps(CandMap &fileCandMap, CandMap &dirCandMap) {
472
566
 
473
567
  for (auto &[fid, cand] : fileCandMap) {
474
- PathSegment *p = cand.seg->parent;
475
- while (p->parent != NULL) {
476
- if (p->cand != NULL) {
477
- auto &scoreA = cand.v_charscore;
568
+ PathSegment *p = cand->seg->parent;
569
+ while (p->parent != nullptr) {
570
+ if (p->cand != nullptr) {
571
+ auto &scoreA = cand->v_charscore;
478
572
  auto &scoreB = p->cand->v_charscore;
479
- for (int i = 0; i < cand.len; i++) {
573
+ for (int i = 0; i < cand->len; i++) {
480
574
  if (scoreA[i] < scoreB[i] * dirWeight) {
481
575
  scoreA[i] = scoreB[i] * dirWeight;
482
576
  }
@@ -489,18 +583,22 @@ private:
489
583
 
490
584
  void addToResults(PathSegment *seg, std::string str, int i, int nchars, CandMap &candmap) {
491
585
 
492
- auto it2 = candmap.find(seg->fileId);
493
- if (it2 == candmap.end()) {
494
- Candidate cand(seg, str.size());
495
- seg->cand = &(candmap[seg->fileId]);
586
+ if (auto it2 = candmap.find(seg->fileId); it2 == candmap.end()) {
587
+ Candidate *cand = new Candidate(seg, str.size());
496
588
  segsToClean.push_back(seg);
497
589
  candmap[seg->fileId] = cand;
590
+ seg->cand = cand;
498
591
  }
499
592
 
500
593
  for (int j = i; j < i + nchars; j++) {
501
- if (candmap[seg->fileId][j] < nchars) {
502
- candmap[seg->fileId].v_charscore[j] = nchars;
594
+ Candidate &cand = *(candmap[seg->fileId]);
595
+ if (cand[j] < nchars) {
596
+ cand.v_charscore[j] = nchars;
503
597
  }
504
598
  }
505
599
  }
506
600
  };
601
+
602
+ } // namespace StrIdx
603
+
604
+ #endif
data/test.rb CHANGED
@@ -13,7 +13,13 @@ for x in lines
13
13
  end
14
14
 
15
15
  idx_time = Time.new
16
- puts "\nIndexing time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
16
+ # Time to start the threadpool to process indexing
17
+ puts "\nIndexing launch time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
18
+
19
+ idx.waitUntilDone() # Not necessary, will be called by idx.find
20
+ idx_time = Time.new
21
+ # Time when all threads have completed
22
+ puts "\nIndexing completed time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
17
23
 
18
24
  query = "rngnomadriv"
19
25
  res = idx.find(query)