StrIdx 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CMakeLists.txt +27 -0
- data/Gemfile +5 -0
- data/Makefile +1 -0
- data/README.md +54 -3
- data/demo.cpp +36 -5
- data/exe/stridx.rb +21 -0
- data/flist.txt +0 -5550
- data/gem_install +4 -0
- data/py_example.py +18 -0
- data/py_interf.cpp +182 -0
- data/rubyext/ruby_interf.cpp +58 -2
- data/runserver.rb +27 -0
- data/server.rb +108 -0
- data/setup.py +32 -0
- data/stridx-screencast.mp4 +0 -0
- data/stridx-tty.rb +122 -0
- data/stridx.gemspec +33 -0
- data/stridx.hpp +435 -243
- data/test.rb +7 -2
- data/thread_pool.hpp +20 -5
- data/unit_tests.sh +4 -0
- data/unittest.cpp +189 -0
- metadata +106 -7
data/test.rb
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
$:.unshift File.dirname(__FILE__)
|
4
|
+
|
3
5
|
require "stridx"
|
4
6
|
idx = StrIdx::StringIndex.new
|
5
7
|
|
8
|
+
# "/" for unix-style file paths
|
9
|
+
idx.setDirSeparator("/") #(comment out if not file paths)
|
10
|
+
|
6
11
|
t = Time.new
|
7
12
|
fn = File.expand_path("flist.txt")
|
8
13
|
lines = IO.read(fn).lines.collect { |x| x.strip }
|
@@ -14,12 +19,12 @@ end
|
|
14
19
|
|
15
20
|
idx_time = Time.new
|
16
21
|
# Time to start the threadpool to process indexing
|
17
|
-
puts "\nIndexing launch time (#{lines.size} files
|
22
|
+
puts "\nIndexing launch time (#{lines.size} files): #{(idx_time - t).round(4)} seconds"
|
18
23
|
|
19
24
|
idx.waitUntilDone() # Not necessary, will be called by idx.find
|
20
25
|
idx_time = Time.new
|
21
26
|
# Time when all threads have completed
|
22
|
-
puts "\nIndexing completed time (#{lines.size} files
|
27
|
+
puts "\nIndexing completed time (#{lines.size} files): #{(idx_time - t).round(4)} seconds"
|
23
28
|
|
24
29
|
query = "rngnomadriv"
|
25
30
|
res = idx.find(query)
|
data/thread_pool.hpp
CHANGED
@@ -21,14 +21,21 @@ public:
|
|
21
21
|
|
22
22
|
// Creating worker threads
|
23
23
|
for (size_t i = 0; i < num_threads; ++i) {
|
24
|
-
|
24
|
+
workDone.push_back(false);
|
25
|
+
workerThreads.emplace_back([this, i] {
|
25
26
|
while (true) {
|
27
|
+
|
28
|
+
// std::lock_guard<std::mutex) mu_guard(mu_done);
|
26
29
|
std::function<void()> task;
|
27
30
|
{
|
28
31
|
std::unique_lock<std::mutex> lock(mu_queue);
|
29
32
|
|
30
33
|
// Waiting until there is a task to execute or the pool is stopped
|
31
|
-
|
34
|
+
|
35
|
+
workDone[i] = true;
|
36
|
+
cv_.wait(lock, [this, i] { return !taskQueue.empty() || stop_; });
|
37
|
+
|
38
|
+
workDone[i] = false;
|
32
39
|
|
33
40
|
// Exit the thread in case the pool is stopped and there are no tasks
|
34
41
|
if (stop_ && taskQueue.empty()) {
|
@@ -39,7 +46,6 @@ public:
|
|
39
46
|
task = std::move(taskQueue.front());
|
40
47
|
taskQueue.pop();
|
41
48
|
}
|
42
|
-
|
43
49
|
task();
|
44
50
|
}
|
45
51
|
});
|
@@ -68,11 +74,18 @@ public:
|
|
68
74
|
while (true) {
|
69
75
|
{
|
70
76
|
std::lock_guard<std::mutex> guard(mu_queue);
|
71
|
-
|
77
|
+
bool done = true;
|
78
|
+
|
79
|
+
for (auto x : workDone) {
|
80
|
+
if (x == false) {
|
81
|
+
done = false;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
if (done && taskQueue.empty()) {
|
72
85
|
return;
|
73
86
|
}
|
74
87
|
}
|
75
|
-
std::this_thread::sleep_for(std::chrono::milliseconds(
|
88
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
76
89
|
}
|
77
90
|
}
|
78
91
|
|
@@ -89,6 +102,8 @@ private:
|
|
89
102
|
std::vector<std::thread> workerThreads;
|
90
103
|
std::queue<std::function<void()>> taskQueue;
|
91
104
|
std::mutex mu_queue;
|
105
|
+
std::vector<bool> workDone;
|
106
|
+
std::mutex mu_done;
|
92
107
|
|
93
108
|
// Condition variable to signal changes in the state of the tasks queue
|
94
109
|
std::condition_variable cv_;
|
data/unit_tests.sh
ADDED
data/unittest.cpp
ADDED
@@ -0,0 +1,189 @@
|
|
1
|
+
|
2
|
+
#include <gtest/gtest.h>
|
3
|
+
#include "stridx.hpp"
|
4
|
+
#include <cmath>
|
5
|
+
#include <memory>
|
6
|
+
#include <chrono>
|
7
|
+
#include <thread>
|
8
|
+
|
9
|
+
TEST(SplitString, MatchSize) {
|
10
|
+
std::vector<std::string> svec = StrIdx::splitString("foo/bar/test1.txt", '/');
|
11
|
+
EXPECT_EQ(svec.size(), 3);
|
12
|
+
if (svec.size() == 3) {
|
13
|
+
EXPECT_EQ(svec[0].size(), 3);
|
14
|
+
EXPECT_EQ(svec[2].size(), 9);
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
std::vector<std::string> flist{
|
19
|
+
"./drivers/char/hw_random/nomadik-rng.c",
|
20
|
+
"./drivers/pinctrl/nomadik",
|
21
|
+
"./drivers/clk/clk-nomadik.c",
|
22
|
+
"./drivers/gpio/gpio-nomadik.c",
|
23
|
+
"./drivers/i2c/busses/i2c-nomadik.c",
|
24
|
+
"./drivers/clocksource/nomadik-mtu.c",
|
25
|
+
"./drivers/gpu/drm/pl111/pl111_nomadik.h",
|
26
|
+
"./drivers/gpu/drm/pl111/pl111_nomadik.c",
|
27
|
+
"./drivers/pinctrl/nomadik/pinctrl-nomadik.c",
|
28
|
+
"./drivers/input/keyboard/nomadik-ske-keypad.c",
|
29
|
+
"./drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c",
|
30
|
+
"./drivers/pinctrl/nomadik/pinctrl-nomadik-stn8816.c",
|
31
|
+
"./drivers/char/hw_random/omap-rng.c",
|
32
|
+
"./drivers/char/hw_random/omap3-rom-rng.c",
|
33
|
+
"./include/dt-bindings/pinctrl/nomadik.h",
|
34
|
+
"./Documentation/devicetree/bindings/arm/ste-nomadik.txt"};
|
35
|
+
|
36
|
+
std::vector<float> target_scores{0.342944, 0.271396, 0.271126, 0.270893, 0.270431, 0.270355,
|
37
|
+
0.270088, 0.270088, 0.26987, 0.269776, 0.269574, 0.269538,
|
38
|
+
0.236358, 0.236074, 0.224804, 0.224238};
|
39
|
+
|
40
|
+
void createIndex(bool threaded) {
|
41
|
+
|
42
|
+
StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
|
43
|
+
std::string query = "rngnomadriv";
|
44
|
+
|
45
|
+
int i = 1;
|
46
|
+
for (const auto &str : flist) {
|
47
|
+
if (threaded && i > 2) {
|
48
|
+
idx.addStrToIndexThreaded(str, i);
|
49
|
+
} else {
|
50
|
+
idx.addStrToIndex(str, i);
|
51
|
+
}
|
52
|
+
i++;
|
53
|
+
}
|
54
|
+
|
55
|
+
idx.waitUntilReady();
|
56
|
+
|
57
|
+
EXPECT_EQ(idx.size(), 16);
|
58
|
+
}
|
59
|
+
|
60
|
+
void scoreTest(bool threaded, bool runSearch) {
|
61
|
+
|
62
|
+
StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
|
63
|
+
std::string query = "rngnomadriv";
|
64
|
+
|
65
|
+
int i = 1;
|
66
|
+
for (const auto &str : flist) {
|
67
|
+
if (threaded) {
|
68
|
+
idx.addStrToIndexThreaded(str, i);
|
69
|
+
} else {
|
70
|
+
idx.addStrToIndex(str, i);
|
71
|
+
}
|
72
|
+
i++;
|
73
|
+
}
|
74
|
+
|
75
|
+
idx.waitUntilReady();
|
76
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
|
77
|
+
std::cout << "s:" << idx.size() << ":";
|
78
|
+
EXPECT_EQ(idx.size(), 16);
|
79
|
+
|
80
|
+
if (runSearch) {
|
81
|
+
const std::vector<std::pair<float, int>> &results = idx.findSimilar(query);
|
82
|
+
|
83
|
+
std::cout << results[0].first;
|
84
|
+
EXPECT_EQ(results[0].second, 1);
|
85
|
+
if (results.size() == 16) {
|
86
|
+
int i = 0;
|
87
|
+
for (const auto &res : results) {
|
88
|
+
// Check if first five digits of the scores match
|
89
|
+
std::cout<< "{" << res.first << " " << target_scores[i] << "\n";
|
90
|
+
EXPECT_EQ(std::floor(res.first * 1e5), std::floor(1e5 * target_scores[i]));
|
91
|
+
i++;
|
92
|
+
}
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
// EXPECT_EQ(0,1);
|
97
|
+
}
|
98
|
+
|
99
|
+
TEST(Index, Create) { createIndex(false); }
|
100
|
+
TEST(Index, CreateThreaded) { createIndex(true); }
|
101
|
+
TEST(IndexSearch, MatchingScoresSingleThread) {
|
102
|
+
scoreTest(false, true);
|
103
|
+
}
|
104
|
+
TEST(IndexSearch, MatchingScoresThreaded) {
|
105
|
+
for (int i = 0; i < 3; i++) {
|
106
|
+
scoreTest(true, true);
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
class IndexTest : public testing::Test {
|
111
|
+
protected:
|
112
|
+
std::unique_ptr<StrIdx::StringIndex> idx = std::make_unique<StrIdx::StringIndex>('/');
|
113
|
+
|
114
|
+
IndexTest() {}
|
115
|
+
|
116
|
+
void SetUp() override {
|
117
|
+
// Code here will be called immediately after the constructor (right
|
118
|
+
// before each test).
|
119
|
+
idx = std::make_unique<StrIdx::StringIndex>('/');
|
120
|
+
}
|
121
|
+
|
122
|
+
void TearDown() override {
|
123
|
+
// Code here will be called immediately after each test (right
|
124
|
+
// before the destructor).
|
125
|
+
}
|
126
|
+
};
|
127
|
+
|
128
|
+
TEST_F(IndexTest, BinaryRepresentation1) {
|
129
|
+
int64_t num = idx->getKeyAtIdx("abcdefgh", 0, 8);
|
130
|
+
std::string s = StrIdx::int64ToBinaryString(num);
|
131
|
+
// a b c d ...
|
132
|
+
EXPECT_TRUE(s == "0110000101100010011000110110010001100101011001100110011101101000");
|
133
|
+
}
|
134
|
+
|
135
|
+
TEST_F(IndexTest, BinaryRepresentation2) {
|
136
|
+
int64_t num = idx->getKeyAtIdx("abcdefgh", 0, 1);
|
137
|
+
std::string s = StrIdx::int64ToBinaryString(num);
|
138
|
+
EXPECT_TRUE(
|
139
|
+
s == "0000000000000000000000000000000000000000000000000000000001100001"); // 01100001 == "a"
|
140
|
+
}
|
141
|
+
TEST_F(IndexTest, BinaryRepresentation3) {
|
142
|
+
int64_t num = idx->getKeyAtIdx("abcdefgh", 7, 1);
|
143
|
+
std::string s = StrIdx::int64ToBinaryString(num);
|
144
|
+
EXPECT_TRUE(
|
145
|
+
s == "0000000000000000000000000000000000000000000000000000000001101000"); // 01101000 == "h"
|
146
|
+
}
|
147
|
+
|
148
|
+
TEST_F(IndexTest, AccessString) {
|
149
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 0);
|
150
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadiksdf.c", 2);
|
151
|
+
idx->addStrToIndex("./drivers//i2c///busses////aa-i2c-nomadiksdf.c", 3);
|
152
|
+
idx->addStrToIndex("/test/foo/bar.txt", 4);
|
153
|
+
idx->addStrToIndex("bar.txt", 5);
|
154
|
+
|
155
|
+
EXPECT_EQ(idx->size(), 5);
|
156
|
+
EXPECT_STREQ(idx->getString(0).c_str(), "./drivers/i2c/busses/i2c-nomadik.c");
|
157
|
+
|
158
|
+
// TODO: does not work yet
|
159
|
+
// EXPECT_STREQ(idx->getString(3).c_str(), "./drivers//i2c///busses////aa-i2c-nomadiksdf.c");
|
160
|
+
|
161
|
+
// TODO: does not work yet
|
162
|
+
// EXPECT_STREQ(idx->getString(4).c_str(), "/test/foo/bar.txt");
|
163
|
+
EXPECT_STREQ(idx->getString(5).c_str(), "bar.txt");
|
164
|
+
}
|
165
|
+
|
166
|
+
TEST_F(IndexTest, Size1) {
|
167
|
+
// Should not add different files with same id
|
168
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 0);
|
169
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadiksdf.c", 0);
|
170
|
+
|
171
|
+
// Should not be added because essentially same as 0:
|
172
|
+
idx->addStrToIndex("./drivers//i2c///busses////i2c-nomadik.c", 44);
|
173
|
+
|
174
|
+
// Should not add same file with different id
|
175
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 1);
|
176
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 2);
|
177
|
+
|
178
|
+
EXPECT_EQ(idx->size(), 1);
|
179
|
+
|
180
|
+
// Test no-overwriting
|
181
|
+
EXPECT_STREQ(idx->getString(0).c_str(), "./drivers/i2c/busses/i2c-nomadik.c");
|
182
|
+
}
|
183
|
+
|
184
|
+
TEST_F(IndexTest, Size2) {
|
185
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 22);
|
186
|
+
idx->addStrToIndex("./Documentation/devicetree/bindings/arm/ste-nomadik.txt", 1);
|
187
|
+
idx->addStrToIndex("./Documentation/devicetree/bindings/arm/ste-nomadik33.txt", 3);
|
188
|
+
EXPECT_EQ(idx->size(), 3);
|
189
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: StrIdx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sami Sieranoja
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-06-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,24 +38,123 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 13.1.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: tty-cursor
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.7.1
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.7.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: tty-prompt
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.23.1
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.23.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: tty-reader
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.9.0
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.9.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: tty-screen
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.8.2
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.8.2
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: pastel
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 0.8.0
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.8.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: daemons
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 1.4.1
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.4.1
|
41
125
|
description: " Fast fuzzy string similarity search and indexing (for filenames)"
|
42
126
|
email:
|
43
127
|
- sami.sieranoja@gmail.com
|
44
|
-
executables:
|
128
|
+
executables:
|
129
|
+
- stridx.rb
|
45
130
|
extensions:
|
46
131
|
- rubyext/extconf.rb
|
47
132
|
extra_rdoc_files: []
|
48
133
|
files:
|
134
|
+
- CMakeLists.txt
|
135
|
+
- Gemfile
|
49
136
|
- LICENSE
|
50
137
|
- Makefile
|
51
138
|
- README.md
|
52
139
|
- demo.cpp
|
140
|
+
- exe/stridx.rb
|
53
141
|
- flist.txt
|
142
|
+
- gem_install
|
143
|
+
- py_example.py
|
144
|
+
- py_interf.cpp
|
54
145
|
- rubyext/extconf.rb
|
55
146
|
- rubyext/ruby_interf.cpp
|
147
|
+
- runserver.rb
|
148
|
+
- server.rb
|
149
|
+
- setup.py
|
150
|
+
- stridx-screencast.mp4
|
151
|
+
- stridx-tty.rb
|
152
|
+
- stridx.gemspec
|
56
153
|
- stridx.hpp
|
57
154
|
- test.rb
|
58
155
|
- thread_pool.hpp
|
156
|
+
- unit_tests.sh
|
157
|
+
- unittest.cpp
|
59
158
|
- unordered_dense.h
|
60
159
|
homepage: https://github.com/SamiSieranoja/stridx
|
61
160
|
licenses:
|
@@ -63,7 +162,7 @@ licenses:
|
|
63
162
|
metadata:
|
64
163
|
source_code_uri: https://github.com/SamiSieranoja/stridx
|
65
164
|
homepage_uri: https://github.com/SamiSieranoja/stridx
|
66
|
-
post_install_message:
|
165
|
+
post_install_message:
|
67
166
|
rdoc_options: []
|
68
167
|
require_paths:
|
69
168
|
- lib
|
@@ -79,8 +178,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
178
|
- !ruby/object:Gem::Version
|
80
179
|
version: '0'
|
81
180
|
requirements: []
|
82
|
-
rubygems_version: 3.
|
83
|
-
signing_key:
|
181
|
+
rubygems_version: 3.4.20
|
182
|
+
signing_key:
|
84
183
|
specification_version: 4
|
85
184
|
summary: StrIdx
|
86
185
|
test_files: []
|