StrIdx 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d559876ad4cbf26be66db682e45701f80359d9b4752fdc61042c665f62c3000
4
- data.tar.gz: 6c0aa9cce9cea114ada2d1dfb08a833f2d338a3d9815c2a5dc32ceb15186bfe7
3
+ metadata.gz: 4d5ef6484e911e5eea742e95575fca38e86e0ff4c819e937ba7a4d9bb8457b38
4
+ data.tar.gz: fd9e7d3d3c758840ed8fd8681c67a7993ad2ac1f20e20cd9795dab4e2ddce965
5
5
  SHA512:
6
- metadata.gz: 6f9bf0a17ee3541b0dfc2e296bbb78dcb2c86c821427ffff5b1d6a86b7f1a7df53d428e5fc447a148c561177a495c0a9013cf1ebe18fd074fbc478bf7ad81fec
7
- data.tar.gz: 0cdead8fab1925979337d5fa5ee890c42c654006db3aed2274e7a0e4a41cc50e157a346b28cbb2f5ce2dee35876010d503602932addcf06f5409a75b8cb5c81c
6
+ metadata.gz: 26c852b1bfbe04de48a3872c54a8d02eb94ed9da9644d45ba5606df7c73f59cad3b7a64dd151c3ac7aae5f8bd06f9d1953c2529edbe5588d78c46a2808d2fd44
7
+ data.tar.gz: 01d6e6c07ea359a89a60d7c438275b313cb7c04061a5e46d85f784d35016c1259aa139f0510da1f5dd62f6046aa9baf72c1b350ec15c2f25928ded60672bef62
data/README.md CHANGED
@@ -66,7 +66,7 @@ eval "$(stridx.rb bash)"
66
66
 
67
67
  To autostart server, add following line to .bashrc:
68
68
  ```
69
- [ ! -f ~/.stridx/sock ] && stridx.rb start -- ~/Documents/ ~/Pictures/
69
+ stridx.rb start -- ~/Documents/ ~/Pictures/
70
70
  ```
71
71
 
72
72
 
@@ -79,7 +79,6 @@ Stop server:
79
79
  ```
80
80
  stridx.rb stop
81
81
  ```
82
- In case stop doesn't work, try: `kill $(pgrep -f runserver.rb | tail -n 1)`
83
82
 
84
83
  Start indexing server (on foreground, to debug):
85
84
  ```
data/demo.cpp CHANGED
@@ -1,5 +1,8 @@
1
1
 
2
2
  #include <sys/resource.h>
3
+ #include <malloc.h>
4
+
5
+ #include "mem_info.h"
3
6
 
4
7
  #include <condition_variable>
5
8
  #include <functional>
@@ -21,7 +24,7 @@ using std::cout;
21
24
  using std::pair;
22
25
  using std::vector;
23
26
 
24
- std::vector<std::string> readLinesFromFile(const std::string &filename) {
27
+ std::vector<std::string> readLinesFromFile(const std::string &filename, int limit = 0) {
25
28
  std::vector<std::string> lines;
26
29
  std::ifstream file(filename);
27
30
  if (!file.is_open()) {
@@ -30,8 +33,10 @@ std::vector<std::string> readLinesFromFile(const std::string &filename) {
30
33
  }
31
34
 
32
35
  std::string line;
33
- while (std::getline(file, line)) {
36
+ int i=0;
37
+ while (std::getline(file, line) && ( limit == 0 || i < limit) ) {
34
38
  lines.push_back(line);
39
+ i++;
35
40
  }
36
41
 
37
42
  file.close();
@@ -39,88 +44,134 @@ std::vector<std::string> readLinesFromFile(const std::string &filename) {
39
44
  }
40
45
 
41
46
  int main() {
42
- StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
43
- // idx.addStrToIndex("./gdk/x11/gdkasync.c", 0 /*id*/, '/' /*separator*/);
44
- // idx.addStrToIndex("./gdk/x11/gdksettings.c", 1, '/');
45
- // idx.addStrToIndex("./gdk/x11/gdkx11devicemanager-xi2.h", 2, '/');
46
-
47
- // Add the file paths of 89828 files in linux-6.9-rc6 to the index
48
- std::string fn_filePaths = "flist.txt";
49
- std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths);
50
-
51
- // int* a = new int[10];
52
- // delete(a);
53
- // delete(a);
54
-
55
- // Launch indexing to be run on background
56
- cout << "File paths: " << v_filePaths.size() << std::endl;
57
- cout << "Start indexing in the background" << std::endl;
58
- auto start = std::chrono::high_resolution_clock::now();
59
- int id = 0;
60
- for (const auto &filePath : v_filePaths) {
61
- idx.addStrToIndexThreaded(filePath, id);
62
- id++;
63
- }
64
-
65
- auto idx_time_launch = std::chrono::high_resolution_clock::now();
66
- std::chrono::duration<double, std::milli> duration_launch = idx_time_launch - start;
67
- cout << "Indexing launch time (seconds): " << duration_launch.count() / 1000 << "\n";
68
-
69
- // Wait until indexing has finished
70
- idx.waitUntilDone();
71
-
72
- auto idx_time = std::chrono::high_resolution_clock::now();
73
- std::chrono::duration<double, std::milli> duration = idx_time - start;
74
- cout << "Indexing finished time for " << v_filePaths.size()
75
- << " file paths (seconds): " << duration.count() / 1000 << "\n";
76
-
77
-
78
- cout << "DEBUG" << std::endl;
79
- // idx.cm.debug();
80
- cout << "END DEBUG" << std::endl;
81
-
82
- // Find matching filepaths from the index for the query string "rngnomadriv"
83
- start = std::chrono::high_resolution_clock::now();
84
- // std::string query = "rngnomadriv";
85
- std::string query = "irqbypass.c";
86
- for (int i = 0; i < 99; i++) {
87
- // const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
88
- const vector<pair<float, int>> &results = idx.findSim(query);
89
- }
90
-
47
+ {
48
+ StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
49
+ // idx.addStrToIndex("./gdk/x11/gdkasync.c", 0 /*id*/, '/' /*separator*/);
50
+ // idx.addStrToIndex("./gdk/x11/gdksettings.c", 1, '/');
51
+ // idx.addStrToIndex("./gdk/x11/gdkx11devicemanager-xi2.h", 2, '/');
52
+
53
+ // Add the file paths of 89828 files in linux-6.9-rc6 to the index
54
+ std::string fn_filePaths = "flist2.txt";
55
+ std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths);
56
+ // std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths,10000);
57
+
58
+ // int* a = new int[10];
59
+ // delete(a);
60
+ // delete(a);
61
+
62
+ // Launch indexing to be run on background
63
+ cout << "File paths: " << v_filePaths.size() << std::endl;
64
+ cout << "Start indexing in the background" << std::endl;
65
+ auto start = std::chrono::high_resolution_clock::now();
66
+ int id = 0;
67
+ for (const auto &filePath : v_filePaths) {
68
+ // idx.addStrToIndexThreaded(filePath, id);
69
+ idx.addStrToIndex(filePath, id);
70
+ id++;
71
+ }
91
72
 
92
- // idx.findSim(query);
73
+ std::cout << "========\n";
74
+ for (int i = 0; i < id; i++) {
75
+ // std::cout << idx.getString(2) << "{}";
76
+ idx.getString(i);
77
+ }
78
+ std::cout << "========\n";
79
+
80
+ auto idx_time_launch = std::chrono::high_resolution_clock::now();
81
+ std::chrono::duration<double, std::milli> duration_launch = idx_time_launch - start;
82
+ cout << "Indexing launch time (seconds): " << duration_launch.count() / 1000 << "\n";
83
+
84
+ // Wait until indexing has finished
85
+ idx.waitUntilDone();
86
+
87
+ auto idx_time = std::chrono::high_resolution_clock::now();
88
+ std::chrono::duration<double, std::milli> duration = idx_time - start;
89
+ cout << "Indexing finished time for " << v_filePaths.size()
90
+ << " file paths (seconds): " << duration.count() / 1000 << "\n";
91
+
92
+ cout << "DEBUG" << std::endl;
93
+ // idx.cm.debug();
94
+ cout << "END DEBUG" << std::endl;
95
+
96
+ // Find matching filepaths from the index for the query string "rngnomadriv"
97
+ start = std::chrono::high_resolution_clock::now();
98
+ std::string query = "rngnomadriv";
99
+ // std::string query = "rngnomaindriv";
100
+ // std::string query = "time.rs";
101
+ // std::string query = "irqbypass.c";
102
+ for (int i = 0; i < 99; i++) {
103
+ // const vector<pair<float, int>> &results = idx.findSimilar(query);
104
+ }
93
105
 
94
- // const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
95
- const vector<pair<float, int>> &results = idx.findSim(query);
96
- auto search_time = std::chrono::high_resolution_clock::now();
97
- duration = search_time - start;
98
- cout << "Search time for 100 queries (seconds): " << duration.count() / 1000 << "\n";
106
+ // idx.findSim(query);
107
+
108
+ // auto res = idx.findDirectories(query);
109
+
110
+ // const vector<pair<float, int>> &results = idx.findSimilar(query);
111
+ // const vector<pair<float, int>> &results = idx.findDirectories(query);
112
+ // const vector<pair<float, std::string>> &results = idx.findFilesAndDirectories(query, true,
113
+ // false);
114
+ vector<pair<float, std::string>> results = idx.findFilesAndDirectories(query, true, false);
115
+
116
+ auto search_time = std::chrono::high_resolution_clock::now();
117
+ duration = search_time - start;
118
+ cout << "Search time for 100 queries (seconds): " << duration.count() / 1000 << "\n";
119
+
120
+ int i = 0;
121
+ std::cout << "query string: " << query << "\n";
122
+ std::cout << "Top 20 matches[1]:\n";
123
+ bool isDir = true;
124
+ for (const auto &res : results) {
125
+ // std::cout << res.second << " " << res.first << " " << v_filePaths[res.second] << "\n";
126
+ std::cout << res.first << " " << res.second << "\n";
127
+ i++;
128
+ if (i > 40) {
129
+ break;
130
+ }
131
+ }
132
+
133
+ {
134
+
135
+ auto results = idx.findFiles(query);
136
+ int i = 0;
137
+ std::cout << "query string: " << query << "\n";
138
+ std::cout << "Top 20 matchesfff:\n";
139
+ bool isDir = true;
140
+ for (const auto &res : results) {
141
+ std::cout << res.second << " " << res.first << " " << v_filePaths[res.second] << "\n";
142
+ // std::cout << res.first << " " << res.second << "\n";
143
+ i++;
144
+ if (i > 40) {
145
+ break;
146
+ }
147
+ }
148
+ }
99
149
 
100
- int i = 0;
101
- std::cout << "query string: " << query << "\n";
102
- std::cout << "Top 20 matches:\n";
103
- for (const auto &res : results) {
104
- std::cout << res.second << " " << res.first << " " << v_filePaths[res.second] << "\n";
105
- i++;
106
- if (i > 20) {
107
- break;
150
+ std::cout << "========\n";
151
+ for (int i = 0; i < id; i++) {
152
+ // std::cout << idx.getString(2) << "{}";
153
+ // idx.getString(i);
108
154
  }
109
- }
155
+ std::cout << "========\n";
110
156
 
111
- // std::cout << "Size of MyClass: " << sizeof(StrIdx::CharMap) << " bytes" << std::endl;
112
- // std::cout << "Size of CharMap3: " << sizeof(StrIdx::CharMap3) << " bytes" << std::endl;
113
157
  std::cout << "Size of CharNode: " << sizeof(StrIdx::CharNode) << " bytes" << std::endl;
114
158
  std::cout << "Size of int: " << sizeof(int) << " bytes" << std::endl;
115
159
 
160
+ StrIdx::out.printl("MEMSTAT current:", getCurrentRSS(), " peak:", getPeakRSS());
161
+ // std::this_thread::sleep_for(std::chrono::milliseconds(7000));
162
+
163
+ }
164
+
165
+ // Force memory dealloc to properly benchmark
166
+ // https://www.reddit.com/r/C_Programming/comments/13dn8d7/is_malloc_trim_safe_to_use/
167
+ malloc_trim(0);
168
+
169
+ StrIdx::out.printl("MEMSTAT current:", getCurrentRSS(), " peak:", getPeakRSS());
170
+
171
+ // std::this_thread::sleep_for(std::chrono::milliseconds(7000));
116
172
  struct rusage usage;
117
173
  getrusage(RUSAGE_SELF, &usage);
118
174
  std::cout << "Maximum resident set size: " << usage.ru_maxrss << " kilobytes" << std::endl;
119
- std::cout << "Integral shared memory size: " << usage.ru_ixrss << " kilobytes" << std::endl;
120
- std::cout << "Integral unshared data size: " << usage.ru_idrss << " kilobytes" << std::endl;
121
- std::cout << "Integral unshared stack size: " << usage.ru_isrss << " kilobytes" << std::endl;
122
-
123
-
124
175
 
125
176
  return 0;
126
177
  }
data/exe/stridx.rb CHANGED
@@ -1,21 +1,133 @@
1
1
  #!/usr/bin/env ruby
2
- require 'fileutils'
2
+ require "fileutils"
3
3
 
4
4
  $:.unshift File.dirname(__FILE__) + "/.."
5
+ require "server.rb"
5
6
 
6
- if ARGV[0] == "tty"
7
+ CUR_FILE = File.basename(__FILE__)
8
+ PID_FILE = File.expand_path("~/.config/stridx/index.pid")
9
+ LOCK_FILE = File.expand_path("~/.config/stridx/index.lock")
10
+
11
+ pid_dir_path = File.expand_path("~/.config/stridx/")
12
+ FileUtils.mkdir_p(pid_dir_path)
13
+
14
+ # To prevent against race condition when two process started at the same time
15
+ def obtain_lock_or_exit
16
+ @lockfile = File.open(LOCK_FILE, File::RDWR | File::CREAT, 0644)
17
+
18
+ unless @lockfile.flock(File::LOCK_NB | File::LOCK_EX)
19
+ puts "Another instance is already running."
20
+ exit 1
21
+ end
22
+
23
+ # Optionally truncate and write PID for info/logging
24
+ @lockfile.truncate(0)
25
+ @lockfile.write("#{Process.pid}\n")
26
+ @lockfile.flush
27
+ end
28
+
29
+ def running?
30
+ return false unless File.exist?(PID_FILE)
31
+
32
+ pid = File.read(PID_FILE).to_i
33
+
34
+ begin
35
+ # Check if process exists
36
+ Process.kill(0, pid)
37
+
38
+ # Handle race condition: if the daemon was previously killed with "kill -9",
39
+ # the PID file may remain. A new, unrelated process could later reuse the same PID,
40
+ # causing a false positive when checking for an existing instance and preventing the daemon from starting.
41
+
42
+ # ./daemon.rb # Starts daemon
43
+ # kill -9 $(cat /tmp/daemon_example.pid) # Force kill
44
+ # echo $$ > /tmp/daemon_example.pid # Simulate reused PID (use another terminal)
45
+ # ./daemon.rb # Old version would fail here; fixed version should detect mismatch
46
+
47
+ # Check if command line matches this script
48
+ cmdline = File.read("/proc/#{pid}/cmdline").split("\0")
49
+
50
+ correct_process = cmdline.any? { |arg| arg.include?(CUR_FILE) }
51
+ puts correct_process
52
+ if correct_process == false
53
+ puts "Old pidfile points to wrong process"
54
+ return false
55
+ end
56
+
57
+ return true
58
+ rescue Errno::ESRCH, Errno::ENOENT
59
+ return false
60
+ rescue Errno::EACCES
61
+ # Process exists, but inaccessible — might still be ours
62
+ return true
63
+ end
64
+ end
65
+
66
+ # Old version without /proc check
67
+ def running_old?
68
+ return false unless File.exist?(PID_FILE)
69
+ pid = File.read(PID_FILE).to_i
70
+ Process.kill(0, pid)
71
+ true
72
+ rescue Errno::ESRCH, Errno::EPERM
73
+ false
74
+ end
75
+
76
+ def start(daemonize: false)
77
+ if running?
78
+ puts "Daemon is already running."
79
+ exit 1
80
+ end
81
+
82
+ if daemonize
83
+ # Daemonize the process
84
+ Process.daemon(true, true) # Don't change directory, close stdio
85
+
86
+ # Save PID
87
+ File.write(PID_FILE, Process.pid)
88
+ puts "Daemon started with PID #{Process.pid}"
89
+
90
+ trap("TERM") do
91
+ puts "Daemon stopping..."
92
+ File.delete(PID_FILE) if File.exist?(PID_FILE)
93
+ exit
94
+ end
95
+ end
96
+
97
+ StrIdx::Server.start ARGV
98
+ end
99
+
100
+ def stop
101
+ unless File.exist?(PID_FILE)
102
+ puts "No PID file found. Daemon not running?"
103
+ exit 1
104
+ end
105
+
106
+ pid = File.read(PID_FILE).to_i
107
+ puts "Stopping daemon with PID #{pid}..."
108
+ Process.kill("TERM", pid)
109
+ File.delete(PID_FILE) rescue nil
110
+ rescue Errno::ESRCH
111
+ puts "Process not found. Cleaning up PID file."
112
+ File.delete(PID_FILE) rescue nil
113
+ end
114
+
115
+ # Entry point
116
+ case ARGV.first
117
+ when "stop"
118
+ stop
119
+ when "tty"
7
120
  require "stridx-tty.rb"
8
121
  StrIdxTTY.run
9
- elsif ARGV[0] == "bash"
122
+ when "bash"
10
123
  puts %q/
11
124
  bind -m emacs-standard '"\er": redraw-current-line';
12
125
  bind -m emacs-standard '"\C-t": " \C-b\C-k \C-u`stridx.rb tty`\e\C-e\er\C-a\C-y\C-h\C-e\e \C-y\ey\C-x\C-x\C-f"'
13
126
  /
14
- else
15
- require "daemons"
16
-
17
- pid_dir_path = File.expand_path("~/.config/stridx/")
18
- FileUtils.mkdir_p(pid_dir_path)
19
- Daemons.run(File.dirname(__FILE__) + "/../runserver.rb",
20
- {:dir_mode => :normal, :dir => pid_dir_path })
127
+ when "run"
128
+ obtain_lock_or_exit
129
+ start(daemonize: false)
130
+ when "start"
131
+ obtain_lock_or_exit
132
+ start(daemonize: true)
21
133
  end
data/rubyext/extconf.rb CHANGED
@@ -5,7 +5,7 @@ require 'mkmf'
5
5
  module_name = "stridx"
6
6
  extension_name = 'stridx'
7
7
 
8
- $CXXFLAGS << " -Wall -Wno-unused-variable -O3"
8
+ $CXXFLAGS << " -std=c++17 -Wall -Wno-unused-variable -O3"
9
9
 
10
10
  have_library( 'stdc++');
11
11
 
@@ -48,7 +48,32 @@ VALUE StringIndexWaitUntilDone(VALUE self) {
48
48
  ((StrIdx::StringIndex *)data)->waitUntilDone();
49
49
  return self;
50
50
  }
51
-
51
+
52
+ VALUE StringIndexFindNum(VALUE self, VALUE str, VALUE _limit) {
53
+ VALUE ret;
54
+ std::string s1 = StringValueCStr(str);
55
+
56
+ void *data;
57
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
58
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
59
+
60
+ int limit = NUM2INT(_limit);
61
+
62
+ ret = rb_ary_new();
63
+ const std::vector<std::pair<float, int>> &results = idx->findSimilar(s1);
64
+ int i = 0;
65
+ for (const auto &res : results) {
66
+ VALUE arr = rb_ary_new();
67
+ rb_ary_push(arr, INT2NUM(res.second));
68
+ rb_ary_push(arr, DBL2NUM(res.first));
69
+ rb_ary_push(ret, arr);
70
+ i++;
71
+ if (i >= limit) {
72
+ break;
73
+ }
74
+ }
75
+ return ret;
76
+ }
52
77
 
53
78
  VALUE StringIndexFind(VALUE self, VALUE str) {
54
79
  VALUE ret;
@@ -109,7 +134,8 @@ VALUE StringIndexFindDirs(VALUE self, VALUE str) {
109
134
  StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
110
135
 
111
136
  ret = rb_ary_new();
112
- const std::vector<std::pair<float, std::string>> &results = idx->findFilesAndDirectories(s1,false,true);
137
+ const std::vector<std::pair<float, std::string>> &results =
138
+ idx->findFilesAndDirectories(s1, false, true);
113
139
  int limit = 40;
114
140
  int i = 0;
115
141
  for (const auto &res : results) {
@@ -125,10 +151,6 @@ VALUE StringIndexFindDirs(VALUE self, VALUE str) {
125
151
  return ret;
126
152
  }
127
153
 
128
-
129
-
130
-
131
-
132
154
  VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
133
155
  char c = '/';
134
156
  if (TYPE(str) == T_STRING) {
@@ -149,6 +171,17 @@ VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
149
171
  return self;
150
172
  }
151
173
 
174
+ VALUE StringIndexSetDirWeight(VALUE self, VALUE d) {
175
+ if (TYPE(d) == T_FLOAT) {
176
+ double c_float = NUM2DBL(rb_funcall(d, rb_intern("to_f"), 0));
177
+ void *data;
178
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
179
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
180
+ idx->setDirWeight(c_float);
181
+ }
182
+ return self;
183
+ }
184
+
152
185
  void Init_stridx(void) {
153
186
 
154
187
  VALUE mStrIdx = rb_define_module("StrIdx");
@@ -158,12 +191,12 @@ void Init_stridx(void) {
158
191
  rb_define_method(classStringIndex, "add", StringIndexAddSegments, 2);
159
192
  rb_define_method(classStringIndex, "waitUntilDone", StringIndexWaitUntilDone, 0);
160
193
  rb_define_method(classStringIndex, "find", StringIndexFind, 1);
194
+ rb_define_method(classStringIndex, "findNum", StringIndexFindNum, 2);
195
+ rb_define_method(classStringIndex, "setDirWeight", StringIndexSetDirWeight, 1);
161
196
  rb_define_method(classStringIndex, "findFilesAndDirs", StringIndexFindFilesAndDirs, 1);
162
197
  rb_define_method(classStringIndex, "findDirs", StringIndexFindDirs, 1);
163
-
198
+
164
199
  rb_define_method(classStringIndex, "setDirSeparator", StringIndexSetDirSeparator, 1);
165
-
166
-
167
200
  }
168
201
 
169
202
  } // End extern "C"
data/runserver.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
+
3
+ # Add cur dir to load path
2
4
  $:.unshift File.dirname(__FILE__)
3
5
 
4
6
  def kill_signal
data/server.rb CHANGED
@@ -33,6 +33,7 @@ module StrIdx
33
33
  def initialize(dir_list, daemonize: false)
34
34
  idx = StrIdx::StringIndex.new
35
35
  idx.setDirSeparator("/")
36
+ idx.setDirWeight(0.85) # Lower scores for directory matches
36
37
 
37
38
  t = Time.new
38
39
 
@@ -76,22 +77,21 @@ module StrIdx
76
77
 
77
78
  # Read data from the client
78
79
  data = client.recv(1024)
79
-
80
- if data.match(/^stop$/)
80
+ if data.nil?
81
+ # puts "GOT NIL"
82
+ elsif data.match(/^stop$/)
81
83
  puts "Got stop signal. Shutting down server."
82
84
  client.close
83
85
  break
84
- end
85
-
86
86
  # puts "Received from client: #{data}"
87
- if data.match(/^find:(.*)/)
87
+ elsif data.match(/^find:(.*)/)
88
88
  query = Regexp.last_match(1)
89
89
  # TODO: not sure which is best as default:
90
90
  # res = idx.find(query)
91
91
  # res = idx.findDirs(query)
92
92
  res = idx.findFilesAndDirs(query)
93
93
  # response = res.collect { |x| flist[x[0]] }.join("\n")
94
- response = res.collect { |x| "/"+x[0] }.join("\n")
94
+ response = res.collect { |x| "/" + x[0] }.join("\n")
95
95
 
96
96
  # Send a response back to the client
97
97
  client.puts response
data/stridx.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "StrIdx"
3
- spec.version = "0.1.5"
3
+ spec.version = "0.1.7"
4
4
  spec.authors = ["Sami Sieranoja"]
5
5
  spec.email = ["sami.sieranoja@gmail.com"]
6
6
 
data/stridx.hpp CHANGED
@@ -324,7 +324,11 @@ struct PathSegment {
324
324
 
325
325
  // Candidate for result in string (filename) search
326
326
  struct Candidate {
327
+
328
+ //This holds the subscores for each character in the query string
327
329
  std::vector<float> v_charscore;
330
+
331
+
328
332
  PathSegment *seg;
329
333
  int fileId;
330
334
  // The string that this candidate represents
@@ -343,6 +347,7 @@ struct Candidate {
343
347
  candLen = seg->size();
344
348
  }
345
349
 
350
+ // Sum subscores in v_charscore and normalize to get final score
346
351
  [[nodiscard]] float getScore() const {
347
352
  int i = 0;
348
353
  float score = 0.0;
@@ -375,13 +380,14 @@ private:
375
380
 
376
381
  std::vector<PathSegment *> segsToClean;
377
382
 
383
+ // Maps id's stored in charTree to corresponding PathSegment's
378
384
  std::unordered_map<int, PathSegment *> seglist;
379
385
  std::unordered_map<int, PathSegment *> seglist_dir;
380
386
  std::mutex seglist_mu;
381
387
 
382
388
  PathSegment *root;
383
389
  int dirId = 0;
384
- float dirWeight = 0.7; // Give only 70% of score if match is for a directory
390
+ float dirWeight = 1.0; // =0.7: Give only 70% of score if match is for a directory
385
391
 
386
392
  std::unique_ptr<ThreadPool> pool;
387
393
  Output out{1}; // verbose level = 1
@@ -588,12 +594,17 @@ public:
588
594
  void searchCharTree(const std::string &query, CandMap &candmap, CharTree &chartr) {
589
595
 
590
596
  int last_start = query.size() - 2;
597
+ // Loop all possible start positions in query string. Indexes [0..(n-3)]
591
598
  for (int start = 0; start <= last_start; start++) {
592
599
  CharNode *cn = chartr.root;
600
+
601
+ // select a suffix (substring) starting from start, but cap length to 8 chars
593
602
  int end = std::min(start + 7, ((int)query.size()) - 1);
594
603
  int nchars = end - start + 1;
595
604
  std::string s = query.substr(start, nchars);
596
605
 
606
+ // Loop all chars of the query substring
607
+ // Traverse from the
597
608
  for (int i = 0; i < s.size(); i++) {
598
609
  char c = s[i];
599
610
  CharNode *x = cn->find(c);
@@ -601,12 +612,20 @@ public:
601
612
  cn = x;
602
613
  // Consider scores only for substrings with size >= 2
603
614
  if (i > 0) {
615
+ // If we've reached here, size of substring is i+2
616
+
617
+ // Get identifiers of files that include substring
618
+ // query[start..(start+i+1)] ??
604
619
  std::set<int> ids = cn->getIds();
605
620
  for (const int &y : ids) {
606
621
  PathSegment *p = nullptr;
622
+
623
+ // Searching in file segments
624
+ // (or no file/dir separation)
607
625
  if (&chartr == &cm) {
608
626
  p = seglist[y];
609
627
  } else {
628
+ // Searching in dir segments
610
629
  p = seglist_dir[y];
611
630
  }
612
631
  assert(p != nullptr);
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: StrIdx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sami Sieranoja
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-06-05 00:00:00.000000000 Z
11
+ date: 2025-07-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
178
  - !ruby/object:Gem::Version
179
179
  version: '0'
180
180
  requirements: []
181
- rubygems_version: 3.4.20
181
+ rubygems_version: 3.5.22
182
182
  signing_key:
183
183
  specification_version: 4
184
184
  summary: StrIdx