StrIdx 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d559876ad4cbf26be66db682e45701f80359d9b4752fdc61042c665f62c3000
4
- data.tar.gz: 6c0aa9cce9cea114ada2d1dfb08a833f2d338a3d9815c2a5dc32ceb15186bfe7
3
+ metadata.gz: e79f1bebc1e56e0a1966ae5ff69b50e4048953dd338807a6c9dba1c53b1c8f34
4
+ data.tar.gz: 28eea5841ce96f9460975720a43a2655f4902a9c44458a8aae01ce0b3077e67d
5
5
  SHA512:
6
- metadata.gz: 6f9bf0a17ee3541b0dfc2e296bbb78dcb2c86c821427ffff5b1d6a86b7f1a7df53d428e5fc447a148c561177a495c0a9013cf1ebe18fd074fbc478bf7ad81fec
7
- data.tar.gz: 0cdead8fab1925979337d5fa5ee890c42c654006db3aed2274e7a0e4a41cc50e157a346b28cbb2f5ce2dee35876010d503602932addcf06f5409a75b8cb5c81c
6
+ metadata.gz: e82430abd644e876dd758ceaffb3f759e19fde2a44cc85872f01721943c42d85d2032cbde91d235d44b7921833ed044241692247c88b56f54c6ea7248f5e584e
7
+ data.tar.gz: 642e16d0e6291474b007e0341bf6571576299f353bf1f5ff13a5c9966d9b1b2450de052f3ebad6f44c9ab9b341686dd41b4adf35922406e3df358c8cbabc9c31
data/README.md CHANGED
@@ -66,7 +66,7 @@ eval "$(stridx.rb bash)"
66
66
 
67
67
  To autostart server, add following line to .bashrc:
68
68
  ```
69
- [ ! -f ~/.stridx/sock ] && stridx.rb start -- ~/Documents/ ~/Pictures/
69
+ stridx.rb start -- ~/Documents/ ~/Pictures/
70
70
  ```
71
71
 
72
72
 
@@ -79,7 +79,6 @@ Stop server:
79
79
  ```
80
80
  stridx.rb stop
81
81
  ```
82
- In case stop doesn't work, try: `kill $(pgrep -f runserver.rb | tail -n 1)`
83
82
 
84
83
  Start indexing server (on foreground, to debug):
85
84
  ```
data/demo.cpp CHANGED
@@ -1,5 +1,8 @@
1
1
 
2
2
  #include <sys/resource.h>
3
+ #include <malloc.h>
4
+
5
+ #include "mem_info.h"
3
6
 
4
7
  #include <condition_variable>
5
8
  #include <functional>
@@ -21,7 +24,7 @@ using std::cout;
21
24
  using std::pair;
22
25
  using std::vector;
23
26
 
24
- std::vector<std::string> readLinesFromFile(const std::string &filename) {
27
+ std::vector<std::string> readLinesFromFile(const std::string &filename, int limit = 0) {
25
28
  std::vector<std::string> lines;
26
29
  std::ifstream file(filename);
27
30
  if (!file.is_open()) {
@@ -30,8 +33,10 @@ std::vector<std::string> readLinesFromFile(const std::string &filename) {
30
33
  }
31
34
 
32
35
  std::string line;
33
- while (std::getline(file, line)) {
36
+ int i=0;
37
+ while (std::getline(file, line) && ( limit == 0 || i < limit) ) {
34
38
  lines.push_back(line);
39
+ i++;
35
40
  }
36
41
 
37
42
  file.close();
@@ -39,88 +44,134 @@ std::vector<std::string> readLinesFromFile(const std::string &filename) {
39
44
  }
40
45
 
41
46
  int main() {
42
- StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
43
- // idx.addStrToIndex("./gdk/x11/gdkasync.c", 0 /*id*/, '/' /*separator*/);
44
- // idx.addStrToIndex("./gdk/x11/gdksettings.c", 1, '/');
45
- // idx.addStrToIndex("./gdk/x11/gdkx11devicemanager-xi2.h", 2, '/');
46
-
47
- // Add the file paths of 89828 files in linux-6.9-rc6 to the index
48
- std::string fn_filePaths = "flist.txt";
49
- std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths);
50
-
51
- // int* a = new int[10];
52
- // delete(a);
53
- // delete(a);
54
-
55
- // Launch indexing to be run on background
56
- cout << "File paths: " << v_filePaths.size() << std::endl;
57
- cout << "Start indexing in the background" << std::endl;
58
- auto start = std::chrono::high_resolution_clock::now();
59
- int id = 0;
60
- for (const auto &filePath : v_filePaths) {
61
- idx.addStrToIndexThreaded(filePath, id);
62
- id++;
63
- }
64
-
65
- auto idx_time_launch = std::chrono::high_resolution_clock::now();
66
- std::chrono::duration<double, std::milli> duration_launch = idx_time_launch - start;
67
- cout << "Indexing launch time (seconds): " << duration_launch.count() / 1000 << "\n";
68
-
69
- // Wait until indexing has finished
70
- idx.waitUntilDone();
71
-
72
- auto idx_time = std::chrono::high_resolution_clock::now();
73
- std::chrono::duration<double, std::milli> duration = idx_time - start;
74
- cout << "Indexing finished time for " << v_filePaths.size()
75
- << " file paths (seconds): " << duration.count() / 1000 << "\n";
76
-
77
-
78
- cout << "DEBUG" << std::endl;
79
- // idx.cm.debug();
80
- cout << "END DEBUG" << std::endl;
81
-
82
- // Find matching filepaths from the index for the query string "rngnomadriv"
83
- start = std::chrono::high_resolution_clock::now();
84
- // std::string query = "rngnomadriv";
85
- std::string query = "irqbypass.c";
86
- for (int i = 0; i < 99; i++) {
87
- // const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
88
- const vector<pair<float, int>> &results = idx.findSim(query);
89
- }
90
-
47
+ {
48
+ StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
49
+ // idx.addStrToIndex("./gdk/x11/gdkasync.c", 0 /*id*/, '/' /*separator*/);
50
+ // idx.addStrToIndex("./gdk/x11/gdksettings.c", 1, '/');
51
+ // idx.addStrToIndex("./gdk/x11/gdkx11devicemanager-xi2.h", 2, '/');
52
+
53
+ // Add the file paths of 89828 files in linux-6.9-rc6 to the index
54
+ std::string fn_filePaths = "flist2.txt";
55
+ std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths);
56
+ // std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths,10000);
57
+
58
+ // int* a = new int[10];
59
+ // delete(a);
60
+ // delete(a);
61
+
62
+ // Launch indexing to be run on background
63
+ cout << "File paths: " << v_filePaths.size() << std::endl;
64
+ cout << "Start indexing in the background" << std::endl;
65
+ auto start = std::chrono::high_resolution_clock::now();
66
+ int id = 0;
67
+ for (const auto &filePath : v_filePaths) {
68
+ // idx.addStrToIndexThreaded(filePath, id);
69
+ idx.addStrToIndex(filePath, id);
70
+ id++;
71
+ }
91
72
 
92
- // idx.findSim(query);
73
+ std::cout << "========\n";
74
+ for (int i = 0; i < id; i++) {
75
+ // std::cout << idx.getString(2) << "{}";
76
+ idx.getString(i);
77
+ }
78
+ std::cout << "========\n";
79
+
80
+ auto idx_time_launch = std::chrono::high_resolution_clock::now();
81
+ std::chrono::duration<double, std::milli> duration_launch = idx_time_launch - start;
82
+ cout << "Indexing launch time (seconds): " << duration_launch.count() / 1000 << "\n";
83
+
84
+ // Wait until indexing has finished
85
+ idx.waitUntilDone();
86
+
87
+ auto idx_time = std::chrono::high_resolution_clock::now();
88
+ std::chrono::duration<double, std::milli> duration = idx_time - start;
89
+ cout << "Indexing finished time for " << v_filePaths.size()
90
+ << " file paths (seconds): " << duration.count() / 1000 << "\n";
91
+
92
+ cout << "DEBUG" << std::endl;
93
+ // idx.cm.debug();
94
+ cout << "END DEBUG" << std::endl;
95
+
96
+ // Find matching filepaths from the index for the query string "rngnomadriv"
97
+ start = std::chrono::high_resolution_clock::now();
98
+ std::string query = "rngnomadriv";
99
+ // std::string query = "rngnomaindriv";
100
+ // std::string query = "time.rs";
101
+ // std::string query = "irqbypass.c";
102
+ for (int i = 0; i < 99; i++) {
103
+ // const vector<pair<float, int>> &results = idx.findSimilar(query);
104
+ }
93
105
 
94
- // const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
95
- const vector<pair<float, int>> &results = idx.findSim(query);
96
- auto search_time = std::chrono::high_resolution_clock::now();
97
- duration = search_time - start;
98
- cout << "Search time for 100 queries (seconds): " << duration.count() / 1000 << "\n";
106
+ // idx.findSim(query);
107
+
108
+ // auto res = idx.findDirectories(query);
109
+
110
+ // const vector<pair<float, int>> &results = idx.findSimilar(query);
111
+ // const vector<pair<float, int>> &results = idx.findDirectories(query);
112
+ // const vector<pair<float, std::string>> &results = idx.findFilesAndDirectories(query, true,
113
+ // false);
114
+ vector<pair<float, std::string>> results = idx.findFilesAndDirectories(query, true, false);
115
+
116
+ auto search_time = std::chrono::high_resolution_clock::now();
117
+ duration = search_time - start;
118
+ cout << "Search time for 100 queries (seconds): " << duration.count() / 1000 << "\n";
119
+
120
+ int i = 0;
121
+ std::cout << "query string: " << query << "\n";
122
+ std::cout << "Top 20 matches[1]:\n";
123
+ bool isDir = true;
124
+ for (const auto &res : results) {
125
+ // std::cout << res.second << " " << res.first << " " << v_filePaths[res.second] << "\n";
126
+ std::cout << res.first << " " << res.second << "\n";
127
+ i++;
128
+ if (i > 40) {
129
+ break;
130
+ }
131
+ }
132
+
133
+ {
134
+
135
+ auto results = idx.findFiles(query);
136
+ int i = 0;
137
+ std::cout << "query string: " << query << "\n";
138
+ std::cout << "Top 20 matchesfff:\n";
139
+ bool isDir = true;
140
+ for (const auto &res : results) {
141
+ std::cout << res.second << " " << res.first << " " << v_filePaths[res.second] << "\n";
142
+ // std::cout << res.first << " " << res.second << "\n";
143
+ i++;
144
+ if (i > 40) {
145
+ break;
146
+ }
147
+ }
148
+ }
99
149
 
100
- int i = 0;
101
- std::cout << "query string: " << query << "\n";
102
- std::cout << "Top 20 matches:\n";
103
- for (const auto &res : results) {
104
- std::cout << res.second << " " << res.first << " " << v_filePaths[res.second] << "\n";
105
- i++;
106
- if (i > 20) {
107
- break;
150
+ std::cout << "========\n";
151
+ for (int i = 0; i < id; i++) {
152
+ // std::cout << idx.getString(2) << "{}";
153
+ // idx.getString(i);
108
154
  }
109
- }
155
+ std::cout << "========\n";
110
156
 
111
- // std::cout << "Size of MyClass: " << sizeof(StrIdx::CharMap) << " bytes" << std::endl;
112
- // std::cout << "Size of CharMap3: " << sizeof(StrIdx::CharMap3) << " bytes" << std::endl;
113
157
  std::cout << "Size of CharNode: " << sizeof(StrIdx::CharNode) << " bytes" << std::endl;
114
158
  std::cout << "Size of int: " << sizeof(int) << " bytes" << std::endl;
115
159
 
160
+ StrIdx::out.printl("MEMSTAT current:", getCurrentRSS(), " peak:", getPeakRSS());
161
+ // std::this_thread::sleep_for(std::chrono::milliseconds(7000));
162
+
163
+ }
164
+
165
+ // Force memory dealloc to properly benchmark
166
+ // https://www.reddit.com/r/C_Programming/comments/13dn8d7/is_malloc_trim_safe_to_use/
167
+ malloc_trim(0);
168
+
169
+ StrIdx::out.printl("MEMSTAT current:", getCurrentRSS(), " peak:", getPeakRSS());
170
+
171
+ // std::this_thread::sleep_for(std::chrono::milliseconds(7000));
116
172
  struct rusage usage;
117
173
  getrusage(RUSAGE_SELF, &usage);
118
174
  std::cout << "Maximum resident set size: " << usage.ru_maxrss << " kilobytes" << std::endl;
119
- std::cout << "Integral shared memory size: " << usage.ru_ixrss << " kilobytes" << std::endl;
120
- std::cout << "Integral unshared data size: " << usage.ru_idrss << " kilobytes" << std::endl;
121
- std::cout << "Integral unshared stack size: " << usage.ru_isrss << " kilobytes" << std::endl;
122
-
123
-
124
175
 
125
176
  return 0;
126
177
  }
data/exe/stridx.rb CHANGED
@@ -1,21 +1,135 @@
1
1
  #!/usr/bin/env ruby
2
- require 'fileutils'
2
+ require "fileutils"
3
3
 
4
4
  $:.unshift File.dirname(__FILE__) + "/.."
5
+ require "server.rb"
5
6
 
6
- if ARGV[0] == "tty"
7
+ CUR_FILE = File.basename(__FILE__)
8
+ PID_FILE = File.expand_path("~/.config/stridx/index.pid")
9
+ LOCK_FILE = File.expand_path("~/.config/stridx/index.lock")
10
+
11
+
12
+
13
+ # To prevent against race condition when two process started at the same time
14
+ def obtain_lock_or_exit
15
+ @lockfile = File.open(LOCK_FILE, File::RDWR | File::CREAT, 0644)
16
+
17
+ unless @lockfile.flock(File::LOCK_NB | File::LOCK_EX)
18
+ puts "Another instance is already running."
19
+ exit 1
20
+ end
21
+
22
+ # Optionally truncate and write PID for info/logging
23
+ @lockfile.truncate(0)
24
+ @lockfile.write("#{Process.pid}\n")
25
+ @lockfile.flush
26
+ end
27
+
28
+ def running?
29
+ return false unless File.exist?(PID_FILE)
30
+
31
+ pid = File.read(PID_FILE).to_i
32
+
33
+ begin
34
+ # Check if process exists
35
+ Process.kill(0, pid)
36
+
37
+ # Handle race condition: if the daemon was previously killed with "kill -9",
38
+ # the PID file may remain. A new, unrelated process could later reuse the same PID,
39
+ # causing a false positive when checking for an existing instance and preventing the daemon from starting.
40
+
41
+ # ./daemon.rb # Starts daemon
42
+ # kill -9 $(cat /tmp/daemon_example.pid) # Force kill
43
+ # echo $$ > /tmp/daemon_example.pid # Simulate reused PID (use another terminal)
44
+ # ./daemon.rb # Old version would fail here; fixed version should detect mismatch
45
+
46
+ # Check if command line matches this script
47
+ cmdline = File.read("/proc/#{pid}/cmdline").split("\0")
48
+
49
+ correct_process = cmdline.any? { |arg| arg.include?(CUR_FILE) }
50
+ puts correct_process
51
+ if correct_process == false
52
+ puts "Old pidfile points to wrong process"
53
+ return false
54
+ end
55
+
56
+ return true
57
+ rescue Errno::ESRCH, Errno::ENOENT
58
+ return false
59
+ rescue Errno::EACCES
60
+ # Process exists, but inaccessible — might still be ours
61
+ return true
62
+ end
63
+ end
64
+
65
+ # Old version without /proc check
66
+ def running_old?
67
+ return false unless File.exist?(PID_FILE)
68
+ pid = File.read(PID_FILE).to_i
69
+ Process.kill(0, pid)
70
+ true
71
+ rescue Errno::ESRCH, Errno::EPERM
72
+ false
73
+ end
74
+
75
+ def start(daemonize: false)
76
+ if running?
77
+ puts "Daemon is already running."
78
+ exit 1
79
+ end
80
+
81
+ if daemonize
82
+ # Daemonize the process
83
+ Process.daemon(true, true) # Don't change directory, close stdio
84
+
85
+ # Save PID
86
+ File.write(PID_FILE, Process.pid)
87
+ puts "Daemon started with PID #{Process.pid}"
88
+
89
+ trap("TERM") do
90
+ puts "Daemon stopping..."
91
+ File.delete(PID_FILE) if File.exist?(PID_FILE)
92
+ exit
93
+ end
94
+
95
+ pid_dir_path = File.expand_path("~/.config/stridx/")
96
+ FileUtils.mkdir_p(pid_dir_path)
97
+ end
98
+
99
+ StrIdx::Server.start ARGV
100
+ end
101
+
102
+ def stop
103
+ unless File.exist?(PID_FILE)
104
+ puts "No PID file found. Daemon not running?"
105
+ exit 1
106
+ end
107
+
108
+ pid = File.read(PID_FILE).to_i
109
+ puts "Stopping daemon with PID #{pid}..."
110
+ Process.kill("TERM", pid)
111
+ File.delete(PID_FILE) rescue nil
112
+ rescue Errno::ESRCH
113
+ puts "Process not found. Cleaning up PID file."
114
+ File.delete(PID_FILE) rescue nil
115
+ end
116
+
117
+ # Entry point
118
+ case ARGV.first
119
+ when "stop"
120
+ stop
121
+ when "tty"
7
122
  require "stridx-tty.rb"
8
123
  StrIdxTTY.run
9
- elsif ARGV[0] == "bash"
124
+ when "bash"
10
125
  puts %q/
11
126
  bind -m emacs-standard '"\er": redraw-current-line';
12
127
  bind -m emacs-standard '"\C-t": " \C-b\C-k \C-u`stridx.rb tty`\e\C-e\er\C-a\C-y\C-h\C-e\e \C-y\ey\C-x\C-x\C-f"'
13
128
  /
14
- else
15
- require "daemons"
16
-
17
- pid_dir_path = File.expand_path("~/.config/stridx/")
18
- FileUtils.mkdir_p(pid_dir_path)
19
- Daemons.run(File.dirname(__FILE__) + "/../runserver.rb",
20
- {:dir_mode => :normal, :dir => pid_dir_path })
129
+ when "run"
130
+ obtain_lock_or_exit
131
+ start(daemonize: false)
132
+ when "start"
133
+ obtain_lock_or_exit
134
+ start(daemonize: true)
21
135
  end
data/rubyext/extconf.rb CHANGED
@@ -5,7 +5,7 @@ require 'mkmf'
5
5
  module_name = "stridx"
6
6
  extension_name = 'stridx'
7
7
 
8
- $CXXFLAGS << " -Wall -Wno-unused-variable -O3"
8
+ $CXXFLAGS << " -std=c++17 -Wall -Wno-unused-variable -O3"
9
9
 
10
10
  have_library( 'stdc++');
11
11
 
@@ -48,7 +48,32 @@ VALUE StringIndexWaitUntilDone(VALUE self) {
48
48
  ((StrIdx::StringIndex *)data)->waitUntilDone();
49
49
  return self;
50
50
  }
51
-
51
+
52
+ VALUE StringIndexFindNum(VALUE self, VALUE str, VALUE _limit) {
53
+ VALUE ret;
54
+ std::string s1 = StringValueCStr(str);
55
+
56
+ void *data;
57
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
58
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
59
+
60
+ int limit = NUM2INT(_limit);
61
+
62
+ ret = rb_ary_new();
63
+ const std::vector<std::pair<float, int>> &results = idx->findSimilar(s1);
64
+ int i = 0;
65
+ for (const auto &res : results) {
66
+ VALUE arr = rb_ary_new();
67
+ rb_ary_push(arr, INT2NUM(res.second));
68
+ rb_ary_push(arr, DBL2NUM(res.first));
69
+ rb_ary_push(ret, arr);
70
+ i++;
71
+ if (i >= limit) {
72
+ break;
73
+ }
74
+ }
75
+ return ret;
76
+ }
52
77
 
53
78
  VALUE StringIndexFind(VALUE self, VALUE str) {
54
79
  VALUE ret;
@@ -109,7 +134,8 @@ VALUE StringIndexFindDirs(VALUE self, VALUE str) {
109
134
  StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
110
135
 
111
136
  ret = rb_ary_new();
112
- const std::vector<std::pair<float, std::string>> &results = idx->findFilesAndDirectories(s1,false,true);
137
+ const std::vector<std::pair<float, std::string>> &results =
138
+ idx->findFilesAndDirectories(s1, false, true);
113
139
  int limit = 40;
114
140
  int i = 0;
115
141
  for (const auto &res : results) {
@@ -125,10 +151,6 @@ VALUE StringIndexFindDirs(VALUE self, VALUE str) {
125
151
  return ret;
126
152
  }
127
153
 
128
-
129
-
130
-
131
-
132
154
  VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
133
155
  char c = '/';
134
156
  if (TYPE(str) == T_STRING) {
@@ -149,6 +171,17 @@ VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
149
171
  return self;
150
172
  }
151
173
 
174
+ VALUE StringIndexSetDirWeight(VALUE self, VALUE d) {
175
+ if (TYPE(d) == T_FLOAT) {
176
+ double c_float = NUM2DBL(rb_funcall(d, rb_intern("to_f"), 0));
177
+ void *data;
178
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
179
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
180
+ idx->setDirWeight(c_float);
181
+ }
182
+ return self;
183
+ }
184
+
152
185
  void Init_stridx(void) {
153
186
 
154
187
  VALUE mStrIdx = rb_define_module("StrIdx");
@@ -158,12 +191,12 @@ void Init_stridx(void) {
158
191
  rb_define_method(classStringIndex, "add", StringIndexAddSegments, 2);
159
192
  rb_define_method(classStringIndex, "waitUntilDone", StringIndexWaitUntilDone, 0);
160
193
  rb_define_method(classStringIndex, "find", StringIndexFind, 1);
194
+ rb_define_method(classStringIndex, "findNum", StringIndexFindNum, 2);
195
+ rb_define_method(classStringIndex, "setDirWeight", StringIndexSetDirWeight, 1);
161
196
  rb_define_method(classStringIndex, "findFilesAndDirs", StringIndexFindFilesAndDirs, 1);
162
197
  rb_define_method(classStringIndex, "findDirs", StringIndexFindDirs, 1);
163
-
198
+
164
199
  rb_define_method(classStringIndex, "setDirSeparator", StringIndexSetDirSeparator, 1);
165
-
166
-
167
200
  }
168
201
 
169
202
  } // End extern "C"
data/runserver.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
+
3
+ # Add cur dir to load path
2
4
  $:.unshift File.dirname(__FILE__)
3
5
 
4
6
  def kill_signal
data/server.rb CHANGED
@@ -33,6 +33,7 @@ module StrIdx
33
33
  def initialize(dir_list, daemonize: false)
34
34
  idx = StrIdx::StringIndex.new
35
35
  idx.setDirSeparator("/")
36
+ idx.setDirWeight(0.85) # Lower scores for directory matches
36
37
 
37
38
  t = Time.new
38
39
 
data/stridx.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "StrIdx"
3
- spec.version = "0.1.5"
3
+ spec.version = "0.1.6"
4
4
  spec.authors = ["Sami Sieranoja"]
5
5
  spec.email = ["sami.sieranoja@gmail.com"]
6
6
 
data/stridx.hpp CHANGED
@@ -324,7 +324,11 @@ struct PathSegment {
324
324
 
325
325
  // Candidate for result in string (filename) search
326
326
  struct Candidate {
327
+
328
+ //This holds the subscores for each character in the query string
327
329
  std::vector<float> v_charscore;
330
+
331
+
328
332
  PathSegment *seg;
329
333
  int fileId;
330
334
  // The string that this candidate represents
@@ -343,6 +347,7 @@ struct Candidate {
343
347
  candLen = seg->size();
344
348
  }
345
349
 
350
+ // Sum subscores in v_charscore and normalize to get final score
346
351
  [[nodiscard]] float getScore() const {
347
352
  int i = 0;
348
353
  float score = 0.0;
@@ -375,13 +380,14 @@ private:
375
380
 
376
381
  std::vector<PathSegment *> segsToClean;
377
382
 
383
+ // Maps id's stored in charTree to corresponding PathSegment's
378
384
  std::unordered_map<int, PathSegment *> seglist;
379
385
  std::unordered_map<int, PathSegment *> seglist_dir;
380
386
  std::mutex seglist_mu;
381
387
 
382
388
  PathSegment *root;
383
389
  int dirId = 0;
384
- float dirWeight = 0.7; // Give only 70% of score if match is for a directory
390
+ float dirWeight = 1.0; // =0.7: Give only 70% of score if match is for a directory
385
391
 
386
392
  std::unique_ptr<ThreadPool> pool;
387
393
  Output out{1}; // verbose level = 1
@@ -588,12 +594,17 @@ public:
588
594
  void searchCharTree(const std::string &query, CandMap &candmap, CharTree &chartr) {
589
595
 
590
596
  int last_start = query.size() - 2;
597
+ // Loop all possible start positions in query string. Indexes [0..(n-3)]
591
598
  for (int start = 0; start <= last_start; start++) {
592
599
  CharNode *cn = chartr.root;
600
+
601
+ // select a suffix (substring) starting from start, but cap length to 8 chars
593
602
  int end = std::min(start + 7, ((int)query.size()) - 1);
594
603
  int nchars = end - start + 1;
595
604
  std::string s = query.substr(start, nchars);
596
605
 
606
+ // Loop all chars of the query substring
607
+ // Traverse from the
597
608
  for (int i = 0; i < s.size(); i++) {
598
609
  char c = s[i];
599
610
  CharNode *x = cn->find(c);
@@ -601,12 +612,20 @@ public:
601
612
  cn = x;
602
613
  // Consider scores only for substrings with size >= 2
603
614
  if (i > 0) {
615
+ // If we've reached here, size of substring is i+2
616
+
617
+ // Get identifiers of files that include substring
618
+ // query[start..(start+i+1)] ??
604
619
  std::set<int> ids = cn->getIds();
605
620
  for (const int &y : ids) {
606
621
  PathSegment *p = nullptr;
622
+
623
+ // Searching in file segments
624
+ // (or no file/dir separation)
607
625
  if (&chartr == &cm) {
608
626
  p = seglist[y];
609
627
  } else {
628
+ // Searching in dir segments
610
629
  p = seglist_dir[y];
611
630
  }
612
631
  assert(p != nullptr);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: StrIdx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sami Sieranoja
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-06-05 00:00:00.000000000 Z
11
+ date: 2025-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler