StrIdx 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CMakeLists.txt +27 -0
- data/Gemfile +5 -0
- data/Makefile +2 -2
- data/README.md +49 -3
- data/demo.cpp +30 -8
- data/exe/stridx.rb +16 -0
- data/gem_install +4 -0
- data/py_example.py +18 -0
- data/py_interf.cpp +182 -0
- data/rubyext/extconf.rb +1 -3
- data/rubyext/ruby_interf.cpp +18 -5
- data/runserver.rb +7 -0
- data/server.rb +103 -0
- data/setup.py +32 -0
- data/stridx-screencast.mp4 +0 -0
- data/stridx-tty.rb +122 -0
- data/stridx.gemspec +37 -0
- data/stridx.hpp +172 -71
- data/test.rb +12 -1
- data/thread_pool.hpp +98 -0
- data/unit_tests.sh +4 -0
- data/unittest.cpp +147 -0
- metadata +103 -3
data/thread_pool.hpp
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
|
2
|
+
// Based on example in https://www.geeksforgeeks.org/thread-pool-in-cpp/
|
3
|
+
|
4
|
+
#include <condition_variable>
|
5
|
+
#include <functional>
|
6
|
+
#include <iostream>
|
7
|
+
#include <mutex>
|
8
|
+
#include <queue>
|
9
|
+
#include <thread>
|
10
|
+
#include <algorithm>
|
11
|
+
#include <iostream>
|
12
|
+
#include <fstream>
|
13
|
+
#include <vector>
|
14
|
+
#include <string>
|
15
|
+
#include <chrono>
|
16
|
+
|
17
|
+
class ThreadPool {
|
18
|
+
public:
|
19
|
+
// Create a thread pool with given number of threads
|
20
|
+
ThreadPool(size_t num_threads) {
|
21
|
+
|
22
|
+
// Creating worker threads
|
23
|
+
for (size_t i = 0; i < num_threads; ++i) {
|
24
|
+
workerThreads.emplace_back([this] {
|
25
|
+
while (true) {
|
26
|
+
std::function<void()> task;
|
27
|
+
{
|
28
|
+
std::unique_lock<std::mutex> lock(mu_queue);
|
29
|
+
|
30
|
+
// Waiting until there is a task to execute or the pool is stopped
|
31
|
+
cv_.wait(lock, [this] { return !taskQueue.empty() || stop_; });
|
32
|
+
|
33
|
+
// Exit the thread in case the pool is stopped and there are no tasks
|
34
|
+
if (stop_ && taskQueue.empty()) {
|
35
|
+
return;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Get the next task from the queue
|
39
|
+
task = std::move(taskQueue.front());
|
40
|
+
taskQueue.pop();
|
41
|
+
}
|
42
|
+
|
43
|
+
task();
|
44
|
+
}
|
45
|
+
});
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
// Destructor to stop the thread pool
|
50
|
+
~ThreadPool() {
|
51
|
+
{
|
52
|
+
std::lock_guard<std::mutex> lock(mu_queue);
|
53
|
+
stop_ = true;
|
54
|
+
}
|
55
|
+
|
56
|
+
// Notify all threads
|
57
|
+
cv_.notify_all();
|
58
|
+
|
59
|
+
// Joining all worker threads to ensure they have
|
60
|
+
// completed their tasks
|
61
|
+
for (auto &thread : workerThreads) {
|
62
|
+
thread.join();
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
// Wait until all tasks assigned to the threads have been finished
|
67
|
+
void waitUntilDone() {
|
68
|
+
while (true) {
|
69
|
+
{
|
70
|
+
std::lock_guard<std::mutex> guard(mu_queue);
|
71
|
+
if (taskQueue.empty()) {
|
72
|
+
return;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(50));
|
76
|
+
}
|
77
|
+
}
|
78
|
+
|
79
|
+
// Enqueue task for execution by the thread pool
|
80
|
+
void enqueue(std::function<void()> task) {
|
81
|
+
{
|
82
|
+
std::lock_guard<std::mutex> lock(mu_queue);
|
83
|
+
taskQueue.emplace(move(task));
|
84
|
+
}
|
85
|
+
cv_.notify_one();
|
86
|
+
}
|
87
|
+
|
88
|
+
private:
|
89
|
+
std::vector<std::thread> workerThreads;
|
90
|
+
std::queue<std::function<void()>> taskQueue;
|
91
|
+
std::mutex mu_queue;
|
92
|
+
|
93
|
+
// Condition variable to signal changes in the state of the tasks queue
|
94
|
+
std::condition_variable cv_;
|
95
|
+
|
96
|
+
// Flag to indicate whether the thread pool should stop
|
97
|
+
bool stop_ = false;
|
98
|
+
};
|
data/unit_tests.sh
ADDED
data/unittest.cpp
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
|
2
|
+
#include <gtest/gtest.h>
|
3
|
+
#include "stridx.hpp"
|
4
|
+
#include <cmath>
|
5
|
+
#include <memory>
|
6
|
+
|
7
|
+
TEST(SplitString, MatchSize) {
|
8
|
+
std::vector<std::string> svec = StrIdx::splitString("foo/bar/test1.txt", '/');
|
9
|
+
EXPECT_EQ(svec.size(), 3);
|
10
|
+
if (svec.size() == 3) {
|
11
|
+
EXPECT_EQ(svec[0].size(), 3);
|
12
|
+
EXPECT_EQ(svec[2].size(), 9);
|
13
|
+
}
|
14
|
+
}
|
15
|
+
|
16
|
+
std::vector<std::string> flist{"./drivers/char/hw_random/nomadik-rng.c",
|
17
|
+
"./drivers/pinctrl/nomadik",
|
18
|
+
"./drivers/clk/clk-nomadik.c",
|
19
|
+
"./drivers/gpio/gpio-nomadik.c",
|
20
|
+
"./drivers/i2c/busses/i2c-nomadik.c",
|
21
|
+
"./drivers/clocksource/nomadik-mtu.c",
|
22
|
+
"./drivers/gpu/drm/pl111/pl111_nomadik.h",
|
23
|
+
"./drivers/gpu/drm/pl111/pl111_nomadik.c",
|
24
|
+
"./drivers/pinctrl/nomadik/pinctrl-nomadik.c",
|
25
|
+
"./drivers/input/keyboard/nomadik-ske-keypad.c",
|
26
|
+
"./drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c",
|
27
|
+
"./drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c",
|
28
|
+
"./drivers/char/hw_random/omap-rng.c",
|
29
|
+
"./drivers/char/hw_random/omap3-rom-rng.c",
|
30
|
+
"./include/dt-bindings/pinctrl/nomadik.h",
|
31
|
+
"./Documentation/devicetree/bindings/arm/ste-nomadik.txt"};
|
32
|
+
|
33
|
+
std::vector<float> target_scores{0.342944, 0.271396, 0.271126, 0.270893, 0.270431, 0.270355,
|
34
|
+
0.270088, 0.270088, 0.26987, 0.269776, 0.269574, 0.269538,
|
35
|
+
0.236358, 0.236074, 0.224804, 0.224238};
|
36
|
+
|
37
|
+
void scoreTest(bool threaded) {
|
38
|
+
|
39
|
+
StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
|
40
|
+
std::string query = "rngnomadriv";
|
41
|
+
|
42
|
+
int i = 1;
|
43
|
+
for (const auto &str : flist) {
|
44
|
+
if (threaded) {
|
45
|
+
idx.addStrToIndexThreaded(str, i);
|
46
|
+
} else {
|
47
|
+
idx.addStrToIndex(str, i);
|
48
|
+
}
|
49
|
+
i++;
|
50
|
+
}
|
51
|
+
const std::vector<std::pair<float, int>> &results = idx.findSimilar(query);
|
52
|
+
|
53
|
+
std::cout << results[0].first;
|
54
|
+
EXPECT_EQ(results[0].second, 1);
|
55
|
+
if (results.size() == 16) {
|
56
|
+
int i = 0;
|
57
|
+
for (const auto &res : results) {
|
58
|
+
// Check if first five digits of the scores match
|
59
|
+
EXPECT_EQ(std::floor(res.first * 1e5), std::floor(1e5 * target_scores[i]));
|
60
|
+
i++;
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
|
65
|
+
TEST(IndexSearch, MatchingScoresSingleThread) { scoreTest(false); }
|
66
|
+
TEST(IndexSearch, MatchingScoresThreaded) { scoreTest(true); }
|
67
|
+
|
68
|
+
class IndexTest : public testing::Test {
|
69
|
+
protected:
|
70
|
+
std::unique_ptr<StrIdx::StringIndex> idx = std::make_unique<StrIdx::StringIndex>('/');
|
71
|
+
|
72
|
+
IndexTest() {}
|
73
|
+
|
74
|
+
void SetUp() override {
|
75
|
+
// Code here will be called immediately after the constructor (right
|
76
|
+
// before each test).
|
77
|
+
idx = std::make_unique<StrIdx::StringIndex>('/');
|
78
|
+
}
|
79
|
+
|
80
|
+
void TearDown() override {
|
81
|
+
// Code here will be called immediately after each test (right
|
82
|
+
// before the destructor).
|
83
|
+
}
|
84
|
+
};
|
85
|
+
|
86
|
+
TEST_F(IndexTest, BinaryRepresentation1) {
|
87
|
+
int64_t num = idx->getKeyAtIdx("abcdefgh", 0, 8);
|
88
|
+
std::string s = StrIdx::int64ToBinaryString(num);
|
89
|
+
// a b c d ...
|
90
|
+
EXPECT_TRUE(s == "0110000101100010011000110110010001100101011001100110011101101000");
|
91
|
+
}
|
92
|
+
|
93
|
+
TEST_F(IndexTest, BinaryRepresentation2) {
|
94
|
+
int64_t num = idx->getKeyAtIdx("abcdefgh", 0, 1);
|
95
|
+
std::string s = StrIdx::int64ToBinaryString(num);
|
96
|
+
EXPECT_TRUE(
|
97
|
+
s == "0000000000000000000000000000000000000000000000000000000001100001"); // 01100001 == "a"
|
98
|
+
}
|
99
|
+
TEST_F(IndexTest, BinaryRepresentation3) {
|
100
|
+
int64_t num = idx->getKeyAtIdx("abcdefgh", 7, 1);
|
101
|
+
std::string s = StrIdx::int64ToBinaryString(num);
|
102
|
+
EXPECT_TRUE(
|
103
|
+
s == "0000000000000000000000000000000000000000000000000000000001101000"); // 01101000 == "h"
|
104
|
+
}
|
105
|
+
|
106
|
+
TEST_F(IndexTest, AccessString) {
|
107
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 0);
|
108
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadiksdf.c", 2);
|
109
|
+
idx->addStrToIndex("./drivers//i2c///busses////aa-i2c-nomadiksdf.c", 3);
|
110
|
+
idx->addStrToIndex("/test/foo/bar.txt", 4);
|
111
|
+
idx->addStrToIndex("bar.txt", 5);
|
112
|
+
|
113
|
+
EXPECT_EQ(idx->size(), 5);
|
114
|
+
EXPECT_STREQ(idx->getString(0).c_str(), "./drivers/i2c/busses/i2c-nomadik.c");
|
115
|
+
|
116
|
+
// TODO: does not work yet
|
117
|
+
// EXPECT_STREQ(idx->getString(3).c_str(), "./drivers//i2c///busses////aa-i2c-nomadiksdf.c");
|
118
|
+
|
119
|
+
// TODO: does not work yet
|
120
|
+
// EXPECT_STREQ(idx->getString(4).c_str(), "/test/foo/bar.txt");
|
121
|
+
EXPECT_STREQ(idx->getString(5).c_str(), "bar.txt");
|
122
|
+
}
|
123
|
+
|
124
|
+
TEST_F(IndexTest, Size1) {
|
125
|
+
// Should not add different files with same id
|
126
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 0);
|
127
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadiksdf.c", 0);
|
128
|
+
|
129
|
+
// Should not be added because essentially same as 0:
|
130
|
+
idx->addStrToIndex("./drivers//i2c///busses////i2c-nomadik.c", 44);
|
131
|
+
|
132
|
+
// Should not add same file with different id
|
133
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 1);
|
134
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 2);
|
135
|
+
|
136
|
+
EXPECT_EQ(idx->size(), 1);
|
137
|
+
|
138
|
+
// Test no-overwriting
|
139
|
+
EXPECT_STREQ(idx->getString(0).c_str(), "./drivers/i2c/busses/i2c-nomadik.c");
|
140
|
+
}
|
141
|
+
|
142
|
+
TEST_F(IndexTest, Size2) {
|
143
|
+
idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 22);
|
144
|
+
idx->addStrToIndex("./Documentation/devicetree/bindings/arm/ste-nomadik.txt", 1);
|
145
|
+
idx->addStrToIndex("./Documentation/devicetree/bindings/arm/ste-nomadik33.txt", 3);
|
146
|
+
EXPECT_EQ(idx->size(), 3);
|
147
|
+
}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: StrIdx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sami Sieranoja
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,23 +38,123 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 13.1.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: tty-cursor
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.7.1
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.7.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: tty-prompt
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.23.1
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.23.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: tty-reader
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.9.0
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.9.0
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: tty-screen
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.8.2
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.8.2
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: pastel
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 0.8.0
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.8.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: daemons
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 1.4.1
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.4.1
|
41
125
|
description: " Fast fuzzy string similarity search and indexing (for filenames)"
|
42
126
|
email:
|
43
127
|
- sami.sieranoja@gmail.com
|
44
|
-
executables:
|
128
|
+
executables:
|
129
|
+
- stridx.rb
|
45
130
|
extensions:
|
46
131
|
- rubyext/extconf.rb
|
47
132
|
extra_rdoc_files: []
|
48
133
|
files:
|
134
|
+
- CMakeLists.txt
|
135
|
+
- Gemfile
|
49
136
|
- LICENSE
|
50
137
|
- Makefile
|
51
138
|
- README.md
|
52
139
|
- demo.cpp
|
140
|
+
- exe/stridx.rb
|
53
141
|
- flist.txt
|
142
|
+
- gem_install
|
143
|
+
- py_example.py
|
144
|
+
- py_interf.cpp
|
54
145
|
- rubyext/extconf.rb
|
55
146
|
- rubyext/ruby_interf.cpp
|
147
|
+
- runserver.rb
|
148
|
+
- server.rb
|
149
|
+
- setup.py
|
150
|
+
- stridx-screencast.mp4
|
151
|
+
- stridx-tty.rb
|
152
|
+
- stridx.gemspec
|
56
153
|
- stridx.hpp
|
57
154
|
- test.rb
|
155
|
+
- thread_pool.hpp
|
156
|
+
- unit_tests.sh
|
157
|
+
- unittest.cpp
|
58
158
|
- unordered_dense.h
|
59
159
|
homepage: https://github.com/SamiSieranoja/stridx
|
60
160
|
licenses:
|