StrIdx 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test.rb CHANGED
@@ -1,8 +1,13 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ $:.unshift File.dirname(__FILE__)
4
+
3
5
  require "stridx"
4
6
  idx = StrIdx::StringIndex.new
5
7
 
8
+ # "/" for unix-style file paths
9
+ idx.setDirSeparator("/") #(comment out if not file paths)
10
+
6
11
  t = Time.new
7
12
  fn = File.expand_path("flist.txt")
8
13
  lines = IO.read(fn).lines.collect { |x| x.strip }
@@ -14,12 +19,12 @@ end
14
19
 
15
20
  idx_time = Time.new
16
21
  # Time to start the threadpool to process indexing
17
- puts "\nIndexing launch time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
22
+ puts "\nIndexing launch time (#{lines.size} files): #{(idx_time - t).round(4)} seconds"
18
23
 
19
24
  idx.waitUntilDone() # Not necessary, will be called by idx.find
20
25
  idx_time = Time.new
21
26
  # Time when all threads have completed
22
- puts "\nIndexing completed time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
27
+ puts "\nIndexing completed time (#{lines.size} files): #{(idx_time - t).round(4)} seconds"
23
28
 
24
29
  query = "rngnomadriv"
25
30
  res = idx.find(query)
data/thread_pool.hpp CHANGED
@@ -21,14 +21,21 @@ public:
21
21
 
22
22
  // Creating worker threads
23
23
  for (size_t i = 0; i < num_threads; ++i) {
24
- workerThreads.emplace_back([this] {
24
+ workDone.push_back(false);
25
+ workerThreads.emplace_back([this, i] {
25
26
  while (true) {
27
+
28
+ // std::lock_guard<std::mutex) mu_guard(mu_done);
26
29
  std::function<void()> task;
27
30
  {
28
31
  std::unique_lock<std::mutex> lock(mu_queue);
29
32
 
30
33
  // Waiting until there is a task to execute or the pool is stopped
31
- cv_.wait(lock, [this] { return !taskQueue.empty() || stop_; });
34
+
35
+ workDone[i] = true;
36
+ cv_.wait(lock, [this, i] { return !taskQueue.empty() || stop_; });
37
+
38
+ workDone[i] = false;
32
39
 
33
40
  // Exit the thread in case the pool is stopped and there are no tasks
34
41
  if (stop_ && taskQueue.empty()) {
@@ -39,7 +46,6 @@ public:
39
46
  task = std::move(taskQueue.front());
40
47
  taskQueue.pop();
41
48
  }
42
-
43
49
  task();
44
50
  }
45
51
  });
@@ -68,11 +74,18 @@ public:
68
74
  while (true) {
69
75
  {
70
76
  std::lock_guard<std::mutex> guard(mu_queue);
71
- if (taskQueue.empty()) {
77
+ bool done = true;
78
+
79
+ for (auto x : workDone) {
80
+ if (x == false) {
81
+ done = false;
82
+ }
83
+ }
84
+ if (done && taskQueue.empty()) {
72
85
  return;
73
86
  }
74
87
  }
75
- std::this_thread::sleep_for(std::chrono::milliseconds(50));
88
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
76
89
  }
77
90
  }
78
91
 
@@ -89,6 +102,8 @@ private:
89
102
  std::vector<std::thread> workerThreads;
90
103
  std::queue<std::function<void()>> taskQueue;
91
104
  std::mutex mu_queue;
105
+ std::vector<bool> workDone;
106
+ std::mutex mu_done;
92
107
 
93
108
  // Condition variable to signal changes in the state of the tasks queue
94
109
  std::condition_variable cv_;
data/unit_tests.sh ADDED
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+ cmake -S . -B build
3
+ cmake --build build
4
+ cd build && ctest
data/unittest.cpp ADDED
@@ -0,0 +1,189 @@
1
+
2
+ #include <gtest/gtest.h>
3
+ #include "stridx.hpp"
4
+ #include <cmath>
5
+ #include <memory>
6
+ #include <chrono>
7
+ #include <thread>
8
+
9
+ TEST(SplitString, MatchSize) {
10
+ std::vector<std::string> svec = StrIdx::splitString("foo/bar/test1.txt", '/');
11
+ EXPECT_EQ(svec.size(), 3);
12
+ if (svec.size() == 3) {
13
+ EXPECT_EQ(svec[0].size(), 3);
14
+ EXPECT_EQ(svec[2].size(), 9);
15
+ }
16
+ }
17
+
18
+ std::vector<std::string> flist{
19
+ "./drivers/char/hw_random/nomadik-rng.c",
20
+ "./drivers/pinctrl/nomadik",
21
+ "./drivers/clk/clk-nomadik.c",
22
+ "./drivers/gpio/gpio-nomadik.c",
23
+ "./drivers/i2c/busses/i2c-nomadik.c",
24
+ "./drivers/clocksource/nomadik-mtu.c",
25
+ "./drivers/gpu/drm/pl111/pl111_nomadik.h",
26
+ "./drivers/gpu/drm/pl111/pl111_nomadik.c",
27
+ "./drivers/pinctrl/nomadik/pinctrl-nomadik.c",
28
+ "./drivers/input/keyboard/nomadik-ske-keypad.c",
29
+ "./drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c",
30
+ "./drivers/pinctrl/nomadik/pinctrl-nomadik-stn8816.c",
31
+ "./drivers/char/hw_random/omap-rng.c",
32
+ "./drivers/char/hw_random/omap3-rom-rng.c",
33
+ "./include/dt-bindings/pinctrl/nomadik.h",
34
+ "./Documentation/devicetree/bindings/arm/ste-nomadik.txt"};
35
+
36
+ std::vector<float> target_scores{0.342944, 0.271396, 0.271126, 0.270893, 0.270431, 0.270355,
37
+ 0.270088, 0.270088, 0.26987, 0.269776, 0.269574, 0.269538,
38
+ 0.236358, 0.236074, 0.224804, 0.224238};
39
+
40
+ void createIndex(bool threaded) {
41
+
42
+ StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
43
+ std::string query = "rngnomadriv";
44
+
45
+ int i = 1;
46
+ for (const auto &str : flist) {
47
+ if (threaded && i > 2) {
48
+ idx.addStrToIndexThreaded(str, i);
49
+ } else {
50
+ idx.addStrToIndex(str, i);
51
+ }
52
+ i++;
53
+ }
54
+
55
+ idx.waitUntilReady();
56
+
57
+ EXPECT_EQ(idx.size(), 16);
58
+ }
59
+
60
+ void scoreTest(bool threaded, bool runSearch) {
61
+
62
+ StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
63
+ std::string query = "rngnomadriv";
64
+
65
+ int i = 1;
66
+ for (const auto &str : flist) {
67
+ if (threaded) {
68
+ idx.addStrToIndexThreaded(str, i);
69
+ } else {
70
+ idx.addStrToIndex(str, i);
71
+ }
72
+ i++;
73
+ }
74
+
75
+ idx.waitUntilReady();
76
+ std::this_thread::sleep_for(std::chrono::milliseconds(1000));
77
+ std::cout << "s:" << idx.size() << ":";
78
+ EXPECT_EQ(idx.size(), 16);
79
+
80
+ if (runSearch) {
81
+ const std::vector<std::pair<float, int>> &results = idx.findSimilar(query);
82
+
83
+ std::cout << results[0].first;
84
+ EXPECT_EQ(results[0].second, 1);
85
+ if (results.size() == 16) {
86
+ int i = 0;
87
+ for (const auto &res : results) {
88
+ // Check if first five digits of the scores match
89
+ std::cout<< "{" << res.first << " " << target_scores[i] << "\n";
90
+ EXPECT_EQ(std::floor(res.first * 1e5), std::floor(1e5 * target_scores[i]));
91
+ i++;
92
+ }
93
+ }
94
+ }
95
+
96
+ // EXPECT_EQ(0,1);
97
+ }
98
+
99
+ TEST(Index, Create) { createIndex(false); }
100
+ TEST(Index, CreateThreaded) { createIndex(true); }
101
+ TEST(IndexSearch, MatchingScoresSingleThread) {
102
+ scoreTest(false, true);
103
+ }
104
+ TEST(IndexSearch, MatchingScoresThreaded) {
105
+ for (int i = 0; i < 3; i++) {
106
+ scoreTest(true, true);
107
+ }
108
+ }
109
+
110
+ class IndexTest : public testing::Test {
111
+ protected:
112
+ std::unique_ptr<StrIdx::StringIndex> idx = std::make_unique<StrIdx::StringIndex>('/');
113
+
114
+ IndexTest() {}
115
+
116
+ void SetUp() override {
117
+ // Code here will be called immediately after the constructor (right
118
+ // before each test).
119
+ idx = std::make_unique<StrIdx::StringIndex>('/');
120
+ }
121
+
122
+ void TearDown() override {
123
+ // Code here will be called immediately after each test (right
124
+ // before the destructor).
125
+ }
126
+ };
127
+
128
+ TEST_F(IndexTest, BinaryRepresentation1) {
129
+ int64_t num = idx->getKeyAtIdx("abcdefgh", 0, 8);
130
+ std::string s = StrIdx::int64ToBinaryString(num);
131
+ // a b c d ...
132
+ EXPECT_TRUE(s == "0110000101100010011000110110010001100101011001100110011101101000");
133
+ }
134
+
135
+ TEST_F(IndexTest, BinaryRepresentation2) {
136
+ int64_t num = idx->getKeyAtIdx("abcdefgh", 0, 1);
137
+ std::string s = StrIdx::int64ToBinaryString(num);
138
+ EXPECT_TRUE(
139
+ s == "0000000000000000000000000000000000000000000000000000000001100001"); // 01100001 == "a"
140
+ }
141
+ TEST_F(IndexTest, BinaryRepresentation3) {
142
+ int64_t num = idx->getKeyAtIdx("abcdefgh", 7, 1);
143
+ std::string s = StrIdx::int64ToBinaryString(num);
144
+ EXPECT_TRUE(
145
+ s == "0000000000000000000000000000000000000000000000000000000001101000"); // 01101000 == "h"
146
+ }
147
+
148
+ TEST_F(IndexTest, AccessString) {
149
+ idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 0);
150
+ idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadiksdf.c", 2);
151
+ idx->addStrToIndex("./drivers//i2c///busses////aa-i2c-nomadiksdf.c", 3);
152
+ idx->addStrToIndex("/test/foo/bar.txt", 4);
153
+ idx->addStrToIndex("bar.txt", 5);
154
+
155
+ EXPECT_EQ(idx->size(), 5);
156
+ EXPECT_STREQ(idx->getString(0).c_str(), "./drivers/i2c/busses/i2c-nomadik.c");
157
+
158
+ // TODO: does not work yet
159
+ // EXPECT_STREQ(idx->getString(3).c_str(), "./drivers//i2c///busses////aa-i2c-nomadiksdf.c");
160
+
161
+ // TODO: does not work yet
162
+ // EXPECT_STREQ(idx->getString(4).c_str(), "/test/foo/bar.txt");
163
+ EXPECT_STREQ(idx->getString(5).c_str(), "bar.txt");
164
+ }
165
+
166
+ TEST_F(IndexTest, Size1) {
167
+ // Should not add different files with same id
168
+ idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 0);
169
+ idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadiksdf.c", 0);
170
+
171
+ // Should not be added because essentially same as 0:
172
+ idx->addStrToIndex("./drivers//i2c///busses////i2c-nomadik.c", 44);
173
+
174
+ // Should not add same file with different id
175
+ idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 1);
176
+ idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 2);
177
+
178
+ EXPECT_EQ(idx->size(), 1);
179
+
180
+ // Test no-overwriting
181
+ EXPECT_STREQ(idx->getString(0).c_str(), "./drivers/i2c/busses/i2c-nomadik.c");
182
+ }
183
+
184
+ TEST_F(IndexTest, Size2) {
185
+ idx->addStrToIndex("./drivers/i2c/busses/i2c-nomadik.c", 22);
186
+ idx->addStrToIndex("./Documentation/devicetree/bindings/arm/ste-nomadik.txt", 1);
187
+ idx->addStrToIndex("./Documentation/devicetree/bindings/arm/ste-nomadik33.txt", 3);
188
+ EXPECT_EQ(idx->size(), 3);
189
+ }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: StrIdx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sami Sieranoja
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-05-24 00:00:00.000000000 Z
11
+ date: 2025-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,24 +38,123 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 13.1.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: tty-cursor
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.7.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.7.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: tty-prompt
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.23.1
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.23.1
69
+ - !ruby/object:Gem::Dependency
70
+ name: tty-reader
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.9.0
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.9.0
83
+ - !ruby/object:Gem::Dependency
84
+ name: tty-screen
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.8.2
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.8.2
97
+ - !ruby/object:Gem::Dependency
98
+ name: pastel
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.8.0
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.8.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: daemons
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 1.4.1
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 1.4.1
41
125
  description: " Fast fuzzy string similarity search and indexing (for filenames)"
42
126
  email:
43
127
  - sami.sieranoja@gmail.com
44
- executables: []
128
+ executables:
129
+ - stridx.rb
45
130
  extensions:
46
131
  - rubyext/extconf.rb
47
132
  extra_rdoc_files: []
48
133
  files:
134
+ - CMakeLists.txt
135
+ - Gemfile
49
136
  - LICENSE
50
137
  - Makefile
51
138
  - README.md
52
139
  - demo.cpp
140
+ - exe/stridx.rb
53
141
  - flist.txt
142
+ - gem_install
143
+ - py_example.py
144
+ - py_interf.cpp
54
145
  - rubyext/extconf.rb
55
146
  - rubyext/ruby_interf.cpp
147
+ - runserver.rb
148
+ - server.rb
149
+ - setup.py
150
+ - stridx-screencast.mp4
151
+ - stridx-tty.rb
152
+ - stridx.gemspec
56
153
  - stridx.hpp
57
154
  - test.rb
58
155
  - thread_pool.hpp
156
+ - unit_tests.sh
157
+ - unittest.cpp
59
158
  - unordered_dense.h
60
159
  homepage: https://github.com/SamiSieranoja/stridx
61
160
  licenses:
@@ -63,7 +162,7 @@ licenses:
63
162
  metadata:
64
163
  source_code_uri: https://github.com/SamiSieranoja/stridx
65
164
  homepage_uri: https://github.com/SamiSieranoja/stridx
66
- post_install_message:
165
+ post_install_message:
67
166
  rdoc_options: []
68
167
  require_paths:
69
168
  - lib
@@ -79,8 +178,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
79
178
  - !ruby/object:Gem::Version
80
179
  version: '0'
81
180
  requirements: []
82
- rubygems_version: 3.3.26
83
- signing_key:
181
+ rubygems_version: 3.4.20
182
+ signing_key:
84
183
  specification_version: 4
85
184
  summary: StrIdx
86
185
  test_files: []