StrIdx 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +1 -0
- data/README.md +17 -3
- data/demo.cpp +36 -5
- data/exe/stridx.rb +6 -1
- data/flist.txt +0 -5550
- data/rubyext/ruby_interf.cpp +58 -2
- data/runserver.rb +20 -0
- data/server.rb +7 -2
- data/stridx.gemspec +1 -5
- data/stridx.hpp +411 -226
- data/thread_pool.hpp +20 -5
- data/unittest.cpp +58 -16
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d559876ad4cbf26be66db682e45701f80359d9b4752fdc61042c665f62c3000
|
4
|
+
data.tar.gz: 6c0aa9cce9cea114ada2d1dfb08a833f2d338a3d9815c2a5dc32ceb15186bfe7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f9bf0a17ee3541b0dfc2e296bbb78dcb2c86c821427ffff5b1d6a86b7f1a7df53d428e5fc447a148c561177a495c0a9013cf1ebe18fd074fbc478bf7ad81fec
|
7
|
+
data.tar.gz: 0cdead8fab1925979337d5fa5ee890c42c654006db3aed2274e7a0e4a41cc50e157a346b28cbb2f5ce2dee35876010d503602932addcf06f5409a75b8cb5c81c
|
data/Makefile
CHANGED
data/README.md
CHANGED
@@ -5,8 +5,14 @@ The fuzziness means that candidate filepaths do not need to include exact match
|
|
5
5
|
|
6
6
|
The library can be applied for UTF-8 data also, although there is a small bias in scoring for multibyte characters.
|
7
7
|
|
8
|
+
1. [String similarity calculation](#stringsim)
|
9
|
+
2. [Interfaces](#interfaces)
|
10
|
+
1. [Command line](#comm)
|
11
|
+
2. [Ruby](#ruby)
|
12
|
+
3. [C++](#cpp)
|
8
13
|
|
9
|
-
|
14
|
+
|
15
|
+
## String similarity calculation <a name="stringsim"/>
|
10
16
|
|
11
17
|
Once the index has been created, the contents can be searched to find the best matching strings.
|
12
18
|
|
@@ -39,7 +45,7 @@ and candidate is "./drivers/char/hw_random/nomadik-rng.c", then scores are calcu
|
|
39
45
|
|
40
46
|
# Interfaces
|
41
47
|
|
42
|
-
## Commandline
|
48
|
+
## Commandline <a name="comm"/>
|
43
49
|
Install instructions (for Ubuntu Linux):
|
44
50
|
```
|
45
51
|
apt update
|
@@ -55,8 +61,15 @@ stridx.rb start -- ~/Documents/ ~/Pictures/
|
|
55
61
|
Add bash keybindings (Ctrl-t):
|
56
62
|
```
|
57
63
|
eval "$(stridx.rb bash)"
|
64
|
+
|
65
|
+
```
|
66
|
+
|
67
|
+
To autostart server, add following line to .bashrc:
|
68
|
+
```
|
69
|
+
[ ! -f ~/.stridx/sock ] && stridx.rb start -- ~/Documents/ ~/Pictures/
|
58
70
|
```
|
59
71
|
|
72
|
+
|
60
73
|
Search by pressing <kbd>ctrl</kbd>+<kbd>t</kbd>. Keys: <kbd>up</kbd>, <kbd>down</kbd>, select with <kbd>enter</kbd>
|
61
74
|
|
62
75
|

|
@@ -66,6 +79,7 @@ Stop server:
|
|
66
79
|
```
|
67
80
|
stridx.rb stop
|
68
81
|
```
|
82
|
+
In case stop doesn't work, try: `kill $(pgrep -f runserver.rb | tail -n 1)`
|
69
83
|
|
70
84
|
Start indexing server (on foreground, to debug):
|
71
85
|
```
|
@@ -151,7 +165,7 @@ Search time: 0.0488 seconds
|
|
151
165
|
```
|
152
166
|
|
153
167
|
|
154
|
-
## C++
|
168
|
+
## C++ <a name="cpp"/>
|
155
169
|
See demo.cpp
|
156
170
|
```cpp
|
157
171
|
#include "stridx.hpp"
|
data/demo.cpp
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
|
2
|
+
#include <sys/resource.h>
|
3
|
+
|
2
4
|
#include <condition_variable>
|
3
5
|
#include <functional>
|
4
6
|
#include <iostream>
|
@@ -46,7 +48,11 @@ int main() {
|
|
46
48
|
std::string fn_filePaths = "flist.txt";
|
47
49
|
std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths);
|
48
50
|
|
49
|
-
|
51
|
+
// int* a = new int[10];
|
52
|
+
// delete(a);
|
53
|
+
// delete(a);
|
54
|
+
|
55
|
+
// Launch indexing to be run on background
|
50
56
|
cout << "File paths: " << v_filePaths.size() << std::endl;
|
51
57
|
cout << "Start indexing in the background" << std::endl;
|
52
58
|
auto start = std::chrono::high_resolution_clock::now();
|
@@ -60,22 +66,33 @@ int main() {
|
|
60
66
|
std::chrono::duration<double, std::milli> duration_launch = idx_time_launch - start;
|
61
67
|
cout << "Indexing launch time (seconds): " << duration_launch.count() / 1000 << "\n";
|
62
68
|
|
63
|
-
|
69
|
+
// Wait until indexing has finished
|
64
70
|
idx.waitUntilDone();
|
65
71
|
|
66
72
|
auto idx_time = std::chrono::high_resolution_clock::now();
|
67
73
|
std::chrono::duration<double, std::milli> duration = idx_time - start;
|
68
74
|
cout << "Indexing finished time for " << v_filePaths.size()
|
69
75
|
<< " file paths (seconds): " << duration.count() / 1000 << "\n";
|
76
|
+
|
77
|
+
|
78
|
+
cout << "DEBUG" << std::endl;
|
79
|
+
// idx.cm.debug();
|
80
|
+
cout << "END DEBUG" << std::endl;
|
70
81
|
|
71
82
|
// Find matching filepaths from the index for the query string "rngnomadriv"
|
72
83
|
start = std::chrono::high_resolution_clock::now();
|
73
|
-
std::string query = "rngnomadriv";
|
84
|
+
// std::string query = "rngnomadriv";
|
85
|
+
std::string query = "irqbypass.c";
|
74
86
|
for (int i = 0; i < 99; i++) {
|
75
|
-
const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
|
87
|
+
// const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
|
88
|
+
const vector<pair<float, int>> &results = idx.findSim(query);
|
76
89
|
}
|
77
90
|
|
78
|
-
|
91
|
+
|
92
|
+
// idx.findSim(query);
|
93
|
+
|
94
|
+
// const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
|
95
|
+
const vector<pair<float, int>> &results = idx.findSim(query);
|
79
96
|
auto search_time = std::chrono::high_resolution_clock::now();
|
80
97
|
duration = search_time - start;
|
81
98
|
cout << "Search time for 100 queries (seconds): " << duration.count() / 1000 << "\n";
|
@@ -91,6 +108,20 @@ int main() {
|
|
91
108
|
}
|
92
109
|
}
|
93
110
|
|
111
|
+
// std::cout << "Size of MyClass: " << sizeof(StrIdx::CharMap) << " bytes" << std::endl;
|
112
|
+
// std::cout << "Size of CharMap3: " << sizeof(StrIdx::CharMap3) << " bytes" << std::endl;
|
113
|
+
std::cout << "Size of CharNode: " << sizeof(StrIdx::CharNode) << " bytes" << std::endl;
|
114
|
+
std::cout << "Size of int: " << sizeof(int) << " bytes" << std::endl;
|
115
|
+
|
116
|
+
struct rusage usage;
|
117
|
+
getrusage(RUSAGE_SELF, &usage);
|
118
|
+
std::cout << "Maximum resident set size: " << usage.ru_maxrss << " kilobytes" << std::endl;
|
119
|
+
std::cout << "Integral shared memory size: " << usage.ru_ixrss << " kilobytes" << std::endl;
|
120
|
+
std::cout << "Integral unshared data size: " << usage.ru_idrss << " kilobytes" << std::endl;
|
121
|
+
std::cout << "Integral unshared stack size: " << usage.ru_isrss << " kilobytes" << std::endl;
|
122
|
+
|
123
|
+
|
124
|
+
|
94
125
|
return 0;
|
95
126
|
}
|
96
127
|
|
data/exe/stridx.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
require 'fileutils'
|
2
3
|
|
3
4
|
$:.unshift File.dirname(__FILE__) + "/.."
|
4
5
|
|
@@ -12,5 +13,9 @@ elsif ARGV[0] == "bash"
|
|
12
13
|
/
|
13
14
|
else
|
14
15
|
require "daemons"
|
15
|
-
|
16
|
+
|
17
|
+
pid_dir_path = File.expand_path("~/.config/stridx/")
|
18
|
+
FileUtils.mkdir_p(pid_dir_path)
|
19
|
+
Daemons.run(File.dirname(__FILE__) + "/../runserver.rb",
|
20
|
+
{:dir_mode => :normal, :dir => pid_dir_path })
|
16
21
|
end
|