StrIdx 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CMakeLists.txt +27 -0
- data/Gemfile +5 -0
- data/Makefile +1 -0
- data/README.md +54 -3
- data/demo.cpp +36 -5
- data/exe/stridx.rb +21 -0
- data/flist.txt +0 -5550
- data/gem_install +4 -0
- data/py_example.py +18 -0
- data/py_interf.cpp +182 -0
- data/rubyext/ruby_interf.cpp +58 -2
- data/runserver.rb +27 -0
- data/server.rb +108 -0
- data/setup.py +32 -0
- data/stridx-screencast.mp4 +0 -0
- data/stridx-tty.rb +122 -0
- data/stridx.gemspec +33 -0
- data/stridx.hpp +435 -243
- data/test.rb +7 -2
- data/thread_pool.hpp +20 -5
- data/unit_tests.sh +4 -0
- data/unittest.cpp +189 -0
- metadata +106 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d559876ad4cbf26be66db682e45701f80359d9b4752fdc61042c665f62c3000
|
4
|
+
data.tar.gz: 6c0aa9cce9cea114ada2d1dfb08a833f2d338a3d9815c2a5dc32ceb15186bfe7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f9bf0a17ee3541b0dfc2e296bbb78dcb2c86c821427ffff5b1d6a86b7f1a7df53d428e5fc447a148c561177a495c0a9013cf1ebe18fd074fbc478bf7ad81fec
|
7
|
+
data.tar.gz: 0cdead8fab1925979337d5fa5ee890c42c654006db3aed2274e7a0e4a41cc50e157a346b28cbb2f5ce2dee35876010d503602932addcf06f5409a75b8cb5c81c
|
data/CMakeLists.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
cmake_minimum_required(VERSION 3.14)
|
2
|
+
|
3
|
+
project(my_project)
|
4
|
+
# https://github.com/google/googletest/issues/4000
|
5
|
+
include(FetchContent)
|
6
|
+
FetchContent_Declare(
|
7
|
+
googletest
|
8
|
+
URL https://github.com/google/googletest/archive/58d77fa8070e8cec2dc1ed015d66b454c8d78850.zip # release-1.12.1
|
9
|
+
)
|
10
|
+
|
11
|
+
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
12
|
+
FetchContent_MakeAvailable(googletest)
|
13
|
+
|
14
|
+
enable_testing()
|
15
|
+
|
16
|
+
add_executable(
|
17
|
+
stridx_test
|
18
|
+
unittest.cpp
|
19
|
+
)
|
20
|
+
target_link_libraries(
|
21
|
+
stridx_test
|
22
|
+
GTest::gtest_main
|
23
|
+
)
|
24
|
+
|
25
|
+
include(GoogleTest)
|
26
|
+
gtest_discover_tests(stridx_test)
|
27
|
+
|
data/Makefile
CHANGED
data/README.md
CHANGED
@@ -5,8 +5,14 @@ The fuzziness means that candidate filepaths do not need to include exact match
|
|
5
5
|
|
6
6
|
The library can be applied for UTF-8 data also, although there is a small bias in scoring for multibyte characters.
|
7
7
|
|
8
|
+
1. [String similarity calculation](#stringsim)
|
9
|
+
2. [Interfaces](#interfaces)
|
10
|
+
1. [Command line](#comm)
|
11
|
+
2. [Ruby](#ruby)
|
12
|
+
3. [C++](#cpp)
|
8
13
|
|
9
|
-
|
14
|
+
|
15
|
+
## String similarity calculation <a name="stringsim"/>
|
10
16
|
|
11
17
|
Once the index has been created, the contents can be searched to find the best matching strings.
|
12
18
|
|
@@ -37,9 +43,54 @@ and candidate is "./drivers/char/hw_random/nomadik-rng.c", then scores are calcu
|
|
37
43
|
score = score1/(11*11)*0.97 + score1/(11*38)*0.03 = 0.342944
|
38
44
|
```
|
39
45
|
|
40
|
-
#
|
46
|
+
# Interfaces
|
47
|
+
|
48
|
+
## Commandline <a name="comm"/>
|
49
|
+
Install instructions (for Ubuntu Linux):
|
50
|
+
```
|
51
|
+
apt update
|
52
|
+
apt install ruby ruby-dev build-essential
|
53
|
+
gem install StrIdx
|
54
|
+
```
|
55
|
+
|
56
|
+
Start indexing server (on background):
|
57
|
+
```
|
58
|
+
stridx.rb start -- ~/Documents/ ~/Pictures/
|
59
|
+
```
|
60
|
+
|
61
|
+
Add bash keybindings (Ctrl-t):
|
62
|
+
```
|
63
|
+
eval "$(stridx.rb bash)"
|
64
|
+
|
65
|
+
```
|
66
|
+
|
67
|
+
To autostart server, add following line to .bashrc:
|
68
|
+
```
|
69
|
+
[ ! -f ~/.stridx/sock ] && stridx.rb start -- ~/Documents/ ~/Pictures/
|
70
|
+
```
|
71
|
+
|
72
|
+
|
73
|
+
Search by pressing <kbd>ctrl</kbd>+<kbd>t</kbd>. Keys: <kbd>up</kbd>, <kbd>down</kbd>, select with <kbd>enter</kbd>
|
74
|
+
|
75
|
+

|
76
|
+
|
77
|
+
|
78
|
+
Stop server:
|
79
|
+
```
|
80
|
+
stridx.rb stop
|
81
|
+
```
|
82
|
+
In case stop doesn't work, try: `kill $(pgrep -f runserver.rb | tail -n 1)`
|
83
|
+
|
84
|
+
Start indexing server (on foreground, to debug):
|
85
|
+
```
|
86
|
+
stridx.rb run -- ~/Documents/ ~/Pictures/
|
87
|
+
```
|
88
|
+
|
89
|
+
|
90
|
+
## Ruby
|
41
91
|
Install:
|
42
92
|
```
|
93
|
+
apt install ruby ruby-dev build-essential
|
43
94
|
gem install StrIdx
|
44
95
|
```
|
45
96
|
|
@@ -114,7 +165,7 @@ Search time: 0.0488 seconds
|
|
114
165
|
```
|
115
166
|
|
116
167
|
|
117
|
-
|
168
|
+
## C++ <a name="cpp"/>
|
118
169
|
See demo.cpp
|
119
170
|
```cpp
|
120
171
|
#include "stridx.hpp"
|
data/demo.cpp
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
|
2
|
+
#include <sys/resource.h>
|
3
|
+
|
2
4
|
#include <condition_variable>
|
3
5
|
#include <functional>
|
4
6
|
#include <iostream>
|
@@ -46,7 +48,11 @@ int main() {
|
|
46
48
|
std::string fn_filePaths = "flist.txt";
|
47
49
|
std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths);
|
48
50
|
|
49
|
-
|
51
|
+
// int* a = new int[10];
|
52
|
+
// delete(a);
|
53
|
+
// delete(a);
|
54
|
+
|
55
|
+
// Launch indexing to be run on background
|
50
56
|
cout << "File paths: " << v_filePaths.size() << std::endl;
|
51
57
|
cout << "Start indexing in the background" << std::endl;
|
52
58
|
auto start = std::chrono::high_resolution_clock::now();
|
@@ -60,22 +66,33 @@ int main() {
|
|
60
66
|
std::chrono::duration<double, std::milli> duration_launch = idx_time_launch - start;
|
61
67
|
cout << "Indexing launch time (seconds): " << duration_launch.count() / 1000 << "\n";
|
62
68
|
|
63
|
-
|
69
|
+
// Wait until indexing has finished
|
64
70
|
idx.waitUntilDone();
|
65
71
|
|
66
72
|
auto idx_time = std::chrono::high_resolution_clock::now();
|
67
73
|
std::chrono::duration<double, std::milli> duration = idx_time - start;
|
68
74
|
cout << "Indexing finished time for " << v_filePaths.size()
|
69
75
|
<< " file paths (seconds): " << duration.count() / 1000 << "\n";
|
76
|
+
|
77
|
+
|
78
|
+
cout << "DEBUG" << std::endl;
|
79
|
+
// idx.cm.debug();
|
80
|
+
cout << "END DEBUG" << std::endl;
|
70
81
|
|
71
82
|
// Find matching filepaths from the index for the query string "rngnomadriv"
|
72
83
|
start = std::chrono::high_resolution_clock::now();
|
73
|
-
std::string query = "rngnomadriv";
|
84
|
+
// std::string query = "rngnomadriv";
|
85
|
+
std::string query = "irqbypass.c";
|
74
86
|
for (int i = 0; i < 99; i++) {
|
75
|
-
const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
|
87
|
+
// const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
|
88
|
+
const vector<pair<float, int>> &results = idx.findSim(query);
|
76
89
|
}
|
77
90
|
|
78
|
-
|
91
|
+
|
92
|
+
// idx.findSim(query);
|
93
|
+
|
94
|
+
// const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
|
95
|
+
const vector<pair<float, int>> &results = idx.findSim(query);
|
79
96
|
auto search_time = std::chrono::high_resolution_clock::now();
|
80
97
|
duration = search_time - start;
|
81
98
|
cout << "Search time for 100 queries (seconds): " << duration.count() / 1000 << "\n";
|
@@ -91,6 +108,20 @@ int main() {
|
|
91
108
|
}
|
92
109
|
}
|
93
110
|
|
111
|
+
// std::cout << "Size of MyClass: " << sizeof(StrIdx::CharMap) << " bytes" << std::endl;
|
112
|
+
// std::cout << "Size of CharMap3: " << sizeof(StrIdx::CharMap3) << " bytes" << std::endl;
|
113
|
+
std::cout << "Size of CharNode: " << sizeof(StrIdx::CharNode) << " bytes" << std::endl;
|
114
|
+
std::cout << "Size of int: " << sizeof(int) << " bytes" << std::endl;
|
115
|
+
|
116
|
+
struct rusage usage;
|
117
|
+
getrusage(RUSAGE_SELF, &usage);
|
118
|
+
std::cout << "Maximum resident set size: " << usage.ru_maxrss << " kilobytes" << std::endl;
|
119
|
+
std::cout << "Integral shared memory size: " << usage.ru_ixrss << " kilobytes" << std::endl;
|
120
|
+
std::cout << "Integral unshared data size: " << usage.ru_idrss << " kilobytes" << std::endl;
|
121
|
+
std::cout << "Integral unshared stack size: " << usage.ru_isrss << " kilobytes" << std::endl;
|
122
|
+
|
123
|
+
|
124
|
+
|
94
125
|
return 0;
|
95
126
|
}
|
96
127
|
|
data/exe/stridx.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/.."
|
5
|
+
|
6
|
+
if ARGV[0] == "tty"
|
7
|
+
require "stridx-tty.rb"
|
8
|
+
StrIdxTTY.run
|
9
|
+
elsif ARGV[0] == "bash"
|
10
|
+
puts %q/
|
11
|
+
bind -m emacs-standard '"\er": redraw-current-line';
|
12
|
+
bind -m emacs-standard '"\C-t": " \C-b\C-k \C-u`stridx.rb tty`\e\C-e\er\C-a\C-y\C-h\C-e\e \C-y\ey\C-x\C-x\C-f"'
|
13
|
+
/
|
14
|
+
else
|
15
|
+
require "daemons"
|
16
|
+
|
17
|
+
pid_dir_path = File.expand_path("~/.config/stridx/")
|
18
|
+
FileUtils.mkdir_p(pid_dir_path)
|
19
|
+
Daemons.run(File.dirname(__FILE__) + "/../runserver.rb",
|
20
|
+
{:dir_mode => :normal, :dir => pid_dir_path })
|
21
|
+
end
|