StrIdx 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CMakeLists.txt +27 -0
- data/Gemfile +5 -0
- data/Makefile +2 -2
- data/README.md +49 -3
- data/demo.cpp +30 -8
- data/exe/stridx.rb +16 -0
- data/gem_install +4 -0
- data/py_example.py +18 -0
- data/py_interf.cpp +182 -0
- data/rubyext/extconf.rb +1 -3
- data/rubyext/ruby_interf.cpp +18 -5
- data/runserver.rb +7 -0
- data/server.rb +103 -0
- data/setup.py +32 -0
- data/stridx-screencast.mp4 +0 -0
- data/stridx-tty.rb +122 -0
- data/stridx.gemspec +37 -0
- data/stridx.hpp +172 -71
- data/test.rb +12 -1
- data/thread_pool.hpp +98 -0
- data/unit_tests.sh +4 -0
- data/unittest.cpp +147 -0
- metadata +103 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7655b6bd71bca58c86ad607fd197933fc19b97b3ae1c76e322ec0432025dad7
|
4
|
+
data.tar.gz: 2421892aa6fe750213d08e2254019d87ce7abb10496cdf1635a61815b8b8b0d7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a0ed3f51b95b72a553cf97e1a852f63e8b6d1cbbba56fdab55ed5037ef4d658b8649f2cf92d35b5eea4e6657a18a3a1460a3a57069cda0889a1987f5d1611ee
|
7
|
+
data.tar.gz: f0d4753ee43cb205fa86468dad92a66644e12cf889d8018283cb421e4a5d670386b8666af64b89d4d475a59f10701de2223c31d03ac5d366f2ee255c77190cf8
|
data/CMakeLists.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
cmake_minimum_required(VERSION 3.14)
|
2
|
+
|
3
|
+
project(my_project)
|
4
|
+
# https://github.com/google/googletest/issues/4000
|
5
|
+
include(FetchContent)
|
6
|
+
FetchContent_Declare(
|
7
|
+
googletest
|
8
|
+
URL https://github.com/google/googletest/archive/58d77fa8070e8cec2dc1ed015d66b454c8d78850.zip # release-1.12.1
|
9
|
+
)
|
10
|
+
|
11
|
+
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
12
|
+
FetchContent_MakeAvailable(googletest)
|
13
|
+
|
14
|
+
enable_testing()
|
15
|
+
|
16
|
+
add_executable(
|
17
|
+
stridx_test
|
18
|
+
unittest.cpp
|
19
|
+
)
|
20
|
+
target_link_libraries(
|
21
|
+
stridx_test
|
22
|
+
GTest::gtest_main
|
23
|
+
)
|
24
|
+
|
25
|
+
include(GoogleTest)
|
26
|
+
gtest_discover_tests(stridx_test)
|
27
|
+
|
data/Makefile
CHANGED
data/README.md
CHANGED
@@ -37,12 +37,58 @@ and candidate is "./drivers/char/hw_random/nomadik-rng.c", then scores are calcu
|
|
37
37
|
score = score1/(11*11)*0.97 + score1/(11*38)*0.03 = 0.342944
|
38
38
|
```
|
39
39
|
|
40
|
-
#
|
40
|
+
# Interfaces
|
41
|
+
|
42
|
+
## Commandline
|
43
|
+
Install instructions (for Ubuntu Linux):
|
44
|
+
```
|
45
|
+
apt update
|
46
|
+
apt install ruby ruby-dev build-essential
|
47
|
+
gem install StrIdx
|
48
|
+
```
|
49
|
+
|
50
|
+
Start indexing server (on background):
|
51
|
+
```
|
52
|
+
stridx.rb start -- ~/Documents/ ~/Pictures/
|
53
|
+
```
|
54
|
+
|
55
|
+
Add bash keybindings (Ctrl-t):
|
56
|
+
```
|
57
|
+
eval "$(stridx.rb bash)"
|
58
|
+
```
|
59
|
+
|
60
|
+
Search by pressing <kbd>ctrl</kbd>+<kbd>t</kbd>. Keys: <kbd>up</kbd>, <kbd>down</kbd>, select with <kbd>enter</kbd>
|
61
|
+
|
62
|
+
![screencast](https://github.com/SamiSieranoja/stridx/assets/46612258/b2fd4fa2-37ad-4423-bd5f-d54b24ff6df5)
|
63
|
+
|
64
|
+
|
65
|
+
Stop server:
|
66
|
+
```
|
67
|
+
stridx.rb stop
|
68
|
+
```
|
69
|
+
|
70
|
+
Start indexing server (on foreground, to debug):
|
71
|
+
```
|
72
|
+
stridx.rb run -- ~/Documents/ ~/Pictures/
|
73
|
+
```
|
74
|
+
|
75
|
+
|
76
|
+
## Ruby
|
41
77
|
Install:
|
42
78
|
```
|
79
|
+
apt install ruby ruby-dev build-essential
|
43
80
|
gem install StrIdx
|
44
81
|
```
|
45
82
|
|
83
|
+
Or, for development version:
|
84
|
+
```
|
85
|
+
git clone https://github.com/SamiSieranoja/stridx.git
|
86
|
+
cd stridx
|
87
|
+
cd rubyext; ruby extconf.rb ; make ; cd ..
|
88
|
+
gem build stridx.gemspec
|
89
|
+
gem install $(ls -1tr StrIdx*gem | tail -n 1)
|
90
|
+
```
|
91
|
+
|
46
92
|
Usage example (see test.rb):
|
47
93
|
```ruby
|
48
94
|
require "stridx"
|
@@ -58,7 +104,7 @@ for x in lines
|
|
58
104
|
end
|
59
105
|
|
60
106
|
idx_time = Time.new
|
61
|
-
puts "\nIndexing time (#{lines.size} files
|
107
|
+
puts "\nIndexing time (#{lines.size} files): #{(idx_time - t).round(4)} seconds"
|
62
108
|
|
63
109
|
query = "rngnomadriv"
|
64
110
|
res = idx.find(query)
|
@@ -105,7 +151,7 @@ Search time: 0.0488 seconds
|
|
105
151
|
```
|
106
152
|
|
107
153
|
|
108
|
-
|
154
|
+
## C++
|
109
155
|
See demo.cpp
|
110
156
|
```cpp
|
111
157
|
#include "stridx.hpp"
|
data/demo.cpp
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
|
2
|
+
#include <condition_variable>
|
3
|
+
#include <functional>
|
4
|
+
#include <iostream>
|
5
|
+
#include <mutex>
|
6
|
+
#include <queue>
|
7
|
+
#include <thread>
|
8
|
+
#include <algorithm>
|
9
|
+
|
1
10
|
#include "stridx.hpp"
|
2
11
|
|
3
12
|
#include <iostream>
|
@@ -28,7 +37,7 @@ std::vector<std::string> readLinesFromFile(const std::string &filename) {
|
|
28
37
|
}
|
29
38
|
|
30
39
|
int main() {
|
31
|
-
StringIndex idx;
|
40
|
+
StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
|
32
41
|
// idx.addStrToIndex("./gdk/x11/gdkasync.c", 0 /*id*/, '/' /*separator*/);
|
33
42
|
// idx.addStrToIndex("./gdk/x11/gdksettings.c", 1, '/');
|
34
43
|
// idx.addStrToIndex("./gdk/x11/gdkx11devicemanager-xi2.h", 2, '/');
|
@@ -37,26 +46,39 @@ int main() {
|
|
37
46
|
std::string fn_filePaths = "flist.txt";
|
38
47
|
std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths);
|
39
48
|
|
49
|
+
// Launch indexing to be run on background
|
50
|
+
cout << "File paths: " << v_filePaths.size() << std::endl;
|
51
|
+
cout << "Start indexing in the background" << std::endl;
|
40
52
|
auto start = std::chrono::high_resolution_clock::now();
|
41
53
|
int id = 0;
|
42
54
|
for (const auto &filePath : v_filePaths) {
|
43
|
-
idx.
|
44
|
-
// idx.addStrToIndex(filePath, id, '\0' /*dir separator*/);
|
55
|
+
idx.addStrToIndexThreaded(filePath, id);
|
45
56
|
id++;
|
46
57
|
}
|
47
|
-
|
58
|
+
|
59
|
+
auto idx_time_launch = std::chrono::high_resolution_clock::now();
|
60
|
+
std::chrono::duration<double, std::milli> duration_launch = idx_time_launch - start;
|
61
|
+
cout << "Indexing launch time (seconds): " << duration_launch.count() / 1000 << "\n";
|
62
|
+
|
63
|
+
// Wait until indexing has finished
|
64
|
+
idx.waitUntilDone();
|
65
|
+
|
48
66
|
auto idx_time = std::chrono::high_resolution_clock::now();
|
49
67
|
std::chrono::duration<double, std::milli> duration = idx_time - start;
|
50
|
-
cout << "Indexing
|
68
|
+
cout << "Indexing finished time for " << v_filePaths.size()
|
69
|
+
<< " file paths (seconds): " << duration.count() / 1000 << "\n";
|
51
70
|
|
52
71
|
// Find matching filepaths from the index for the query string "rngnomadriv"
|
53
72
|
start = std::chrono::high_resolution_clock::now();
|
54
73
|
std::string query = "rngnomadriv";
|
74
|
+
for (int i = 0; i < 99; i++) {
|
75
|
+
const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
|
76
|
+
}
|
77
|
+
|
55
78
|
const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
|
56
79
|
auto search_time = std::chrono::high_resolution_clock::now();
|
57
80
|
duration = search_time - start;
|
58
|
-
cout << "Search time (seconds): " << duration.count() / 1000
|
59
|
-
<< "\n";
|
81
|
+
cout << "Search time for 100 queries (seconds): " << duration.count() / 1000 << "\n";
|
60
82
|
|
61
83
|
int i = 0;
|
62
84
|
std::cout << "query string: " << query << "\n";
|
@@ -73,4 +95,4 @@ int main() {
|
|
73
95
|
}
|
74
96
|
|
75
97
|
// Compile:
|
76
|
-
// g++ -Wall -Wno-unused-variable -O3 -
|
98
|
+
// g++ -Wall -Wno-unused-variable -O3 -lstdc++ demo.cpp -o demo
|
data/exe/stridx.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$:.unshift File.dirname(__FILE__) + "/.."
|
4
|
+
|
5
|
+
if ARGV[0] == "tty"
|
6
|
+
require "stridx-tty.rb"
|
7
|
+
StrIdxTTY.run
|
8
|
+
elsif ARGV[0] == "bash"
|
9
|
+
puts %q/
|
10
|
+
bind -m emacs-standard '"\er": redraw-current-line';
|
11
|
+
bind -m emacs-standard '"\C-t": " \C-b\C-k \C-u`stridx.rb tty`\e\C-e\er\C-a\C-y\C-h\C-e\e \C-y\ey\C-x\C-x\C-f"'
|
12
|
+
/
|
13
|
+
else
|
14
|
+
require "daemons"
|
15
|
+
Daemons.run(File.dirname(__FILE__) + "/../runserver.rb")
|
16
|
+
end
|
data/gem_install
ADDED
data/py_example.py
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
from stridx import StringIndex
|
4
|
+
e=StringIndex()
|
5
|
+
e.set_value(3)
|
6
|
+
e.add("./rust/alloc/vec/spec_extend.rs",0)
|
7
|
+
e.add("./virt/kvm/dirty_ring.c",1)
|
8
|
+
e.add("./Documentation/staging/static-keys.rst",2)
|
9
|
+
e.add("./Documentation/staging/lzo.rst",3)
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
results = e.find("rstalloc")
|
14
|
+
for x in results:
|
15
|
+
print(x)
|
16
|
+
|
17
|
+
# print(e.get_value())
|
18
|
+
|
data/py_interf.cpp
ADDED
@@ -0,0 +1,182 @@
|
|
1
|
+
|
2
|
+
#include <stdio.h>
|
3
|
+
#include <limits.h>
|
4
|
+
#include <cstring>
|
5
|
+
#include <pthread.h>
|
6
|
+
|
7
|
+
#include <stdio.h>
|
8
|
+
#include <iostream>
|
9
|
+
#include <string>
|
10
|
+
#include <vector>
|
11
|
+
#include <cfloat>
|
12
|
+
#include <cmath>
|
13
|
+
#include <bits/stdc++.h>
|
14
|
+
|
15
|
+
using std::ios;
|
16
|
+
using std::sort;
|
17
|
+
using std::string;
|
18
|
+
using std::vector;
|
19
|
+
|
20
|
+
#include <Python.h>
|
21
|
+
#include <cstring>
|
22
|
+
|
23
|
+
#include "stridx.hpp"
|
24
|
+
|
25
|
+
extern "C" {
|
26
|
+
|
27
|
+
// Define a structure for the custom object
|
28
|
+
typedef struct {
|
29
|
+
PyObject_HEAD int value;
|
30
|
+
StrIdx::StringIndex *idx;
|
31
|
+
} StrIdxObject;
|
32
|
+
|
33
|
+
// Method to allocate memory for the object
|
34
|
+
static PyObject *StrIdxObject_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
|
35
|
+
StrIdxObject *self;
|
36
|
+
|
37
|
+
self = (StrIdxObject *)type->tp_alloc(type, 0);
|
38
|
+
if (self != NULL) {
|
39
|
+
self->value = 0;
|
40
|
+
self->idx = new StrIdx::StringIndex();
|
41
|
+
}
|
42
|
+
|
43
|
+
return (PyObject *)self;
|
44
|
+
}
|
45
|
+
|
46
|
+
// Method to deallocate memory for the object
|
47
|
+
static void StrIdxObject_dealloc(StrIdxObject *self) { Py_TYPE(self)->tp_free((PyObject *)self); }
|
48
|
+
|
49
|
+
// Method to set the value of the object
|
50
|
+
static PyObject *StrIdxObject_set_value(StrIdxObject *self, PyObject *args) {
|
51
|
+
int value;
|
52
|
+
|
53
|
+
if (!PyArg_ParseTuple(args, "i", &value)) {
|
54
|
+
return NULL;
|
55
|
+
}
|
56
|
+
|
57
|
+
self->value = value;
|
58
|
+
|
59
|
+
Py_INCREF(Py_None);
|
60
|
+
return Py_None;
|
61
|
+
}
|
62
|
+
|
63
|
+
static PyObject *StrIdxObject_add(StrIdxObject *self, PyObject *args) {
|
64
|
+
char *value;
|
65
|
+
int file_id;
|
66
|
+
std::string str;
|
67
|
+
if (!PyArg_ParseTuple(args, "si", &value, &file_id)) {
|
68
|
+
return NULL;
|
69
|
+
}
|
70
|
+
str = value;
|
71
|
+
|
72
|
+
printf("char[]*: %s %i\n", value, file_id);
|
73
|
+
self->idx->addStrToIndex(str, file_id);
|
74
|
+
// self->idx->addStrToIndexThreaded(str, file_id);
|
75
|
+
Py_INCREF(Py_None);
|
76
|
+
return Py_None;
|
77
|
+
}
|
78
|
+
|
79
|
+
static PyObject *StrIdxObject_find(StrIdxObject *self, PyObject *args) {
|
80
|
+
char *value;
|
81
|
+
std::string str;
|
82
|
+
if (!PyArg_ParseTuple(args, "s", &value)) {
|
83
|
+
return NULL;
|
84
|
+
}
|
85
|
+
str = value;
|
86
|
+
|
87
|
+
printf("char*: %s\n", value);
|
88
|
+
const std::vector<std::pair<float, int>> &results = self->idx->findSimilar(str, 2);
|
89
|
+
|
90
|
+
int limit = 15;
|
91
|
+
int i = 0;
|
92
|
+
|
93
|
+
printf("res=%d\n", results.size());
|
94
|
+
if (results.size() < limit) {
|
95
|
+
limit = results.size();
|
96
|
+
}
|
97
|
+
PyObject *pyarr = PyList_New(limit);
|
98
|
+
|
99
|
+
for (const auto &[score,fileId] : results) {
|
100
|
+
PyObject *arr2 = PyList_New(2);
|
101
|
+
// PyList_SetItem(arr2, 0, Py_BuildValue("i", res.second));
|
102
|
+
// PyList_SetItem(arr2, 1, Py_BuildValue("d", res.first));
|
103
|
+
PyList_SetItem(arr2, 0, Py_BuildValue("i", fileId));
|
104
|
+
PyList_SetItem(arr2, 1, Py_BuildValue("d", score));
|
105
|
+
PyList_SetItem(pyarr, i, arr2);
|
106
|
+
i++;
|
107
|
+
if (i >= limit) {
|
108
|
+
break;
|
109
|
+
}
|
110
|
+
}
|
111
|
+
|
112
|
+
// Py_INCREF(Py_None);
|
113
|
+
return pyarr;
|
114
|
+
}
|
115
|
+
|
116
|
+
// Method to get the value of the object
|
117
|
+
static PyObject *StrIdxObject_get_value(StrIdxObject *self) {
|
118
|
+
return PyLong_FromLong(self->value);
|
119
|
+
}
|
120
|
+
|
121
|
+
// Define methods of the class
|
122
|
+
static PyMethodDef StrIdxObject_methods[] = {
|
123
|
+
{"set_value", (PyCFunction)StrIdxObject_set_value, METH_VARARGS,
|
124
|
+
"Set the value of the object"},
|
125
|
+
{"add", (PyCFunction)StrIdxObject_add, METH_VARARGS, "Set the value of the object"},
|
126
|
+
{"find", (PyCFunction)StrIdxObject_find, METH_VARARGS, "Find similar strings"},
|
127
|
+
{"get_value", (PyCFunction)StrIdxObject_get_value, METH_NOARGS, "Get the value of the object"},
|
128
|
+
{NULL} /* Sentinel */
|
129
|
+
};
|
130
|
+
|
131
|
+
// Define the type object for the class
|
132
|
+
static PyTypeObject StrIdxType = {
|
133
|
+
PyVarObject_HEAD_INIT(NULL, 0).tp_name = "stridx.StrIdx",
|
134
|
+
.tp_basicsize = sizeof(StrIdxObject),
|
135
|
+
.tp_dealloc = (destructor)StrIdxObject_dealloc,
|
136
|
+
.tp_doc = PyDoc_STR("Fuzzy string index"),
|
137
|
+
.tp_methods = StrIdxObject_methods,
|
138
|
+
.tp_new = StrIdxObject_new,
|
139
|
+
// .tp_repr = (reprfunc)myobj_repr,
|
140
|
+
};
|
141
|
+
|
142
|
+
// PyVarObject_HEAD_INIT(NULL, 0)
|
143
|
+
// .tp_name = "stridx.StrIdx",
|
144
|
+
// .tp_doc = "StrIdx class",
|
145
|
+
// .tp_basicsize = sizeof(StrIdxObject),
|
146
|
+
// .tp_itemsize = 0,
|
147
|
+
// .tp_flags = Py_TPFLAGS_DEFAULT,
|
148
|
+
// .tp_new = StrIdxObject_new,
|
149
|
+
// .tp_dealloc = (destructor)StrIdxObject_dealloc,
|
150
|
+
// .tp_methods = StrIdxObject_methods,
|
151
|
+
// };
|
152
|
+
|
153
|
+
|
154
|
+
// Define python accessible methods
|
155
|
+
static PyMethodDef StrIdxMethods[] = {
|
156
|
+
{NULL, NULL, 0, NULL}};
|
157
|
+
|
158
|
+
static struct PyModuleDef moduledef = {
|
159
|
+
PyModuleDef_HEAD_INIT, "stridx", NULL, -1, StrIdxMethods, NULL, NULL, NULL, NULL};
|
160
|
+
|
161
|
+
PyMODINIT_FUNC PyInit_stridx(void) {
|
162
|
+
PyObject *m;
|
163
|
+
m = PyModule_Create(&moduledef);
|
164
|
+
|
165
|
+
// Initialize the type object
|
166
|
+
if (PyType_Ready(&StrIdxType) < 0) {
|
167
|
+
return NULL;
|
168
|
+
}
|
169
|
+
|
170
|
+
Py_INCREF(&StrIdxType);
|
171
|
+
if (PyModule_AddObject(m, "StringIndex", (PyObject *)&StrIdxType) < 0) {
|
172
|
+
Py_DECREF(&StrIdxType);
|
173
|
+
Py_DECREF(m);
|
174
|
+
return NULL;
|
175
|
+
}
|
176
|
+
|
177
|
+
if (!m) {
|
178
|
+
return NULL;
|
179
|
+
}
|
180
|
+
return m;
|
181
|
+
}
|
182
|
+
} // END extern "C"
|
data/rubyext/extconf.rb
CHANGED
@@ -1,15 +1,13 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
#
|
3
2
|
|
4
3
|
require 'mkmf'
|
5
4
|
|
6
5
|
module_name = "stridx"
|
7
6
|
extension_name = 'stridx'
|
8
7
|
|
9
|
-
$CXXFLAGS << " -Wall -Wno-unused-variable -O3
|
8
|
+
$CXXFLAGS << " -Wall -Wno-unused-variable -O3"
|
10
9
|
|
11
10
|
have_library( 'stdc++');
|
12
|
-
have_library( 'gomp' );
|
13
11
|
|
14
12
|
dir_config(extension_name) # The destination
|
15
13
|
create_makefile(extension_name) # Create Makefile
|
data/rubyext/ruby_interf.cpp
CHANGED
@@ -7,7 +7,7 @@
|
|
7
7
|
|
8
8
|
extern "C" {
|
9
9
|
|
10
|
-
void str_idx_free(void *data) { delete (StringIndex *)data; }
|
10
|
+
void str_idx_free(void *data) { delete (StrIdx::StringIndex *)data; }
|
11
11
|
|
12
12
|
// Wrap StringIndex class inside a ruby variable
|
13
13
|
static const rb_data_type_t str_idx_type = {
|
@@ -26,7 +26,7 @@ static const rb_data_type_t str_idx_type = {
|
|
26
26
|
};
|
27
27
|
|
28
28
|
VALUE str_idx_alloc(VALUE self) {
|
29
|
-
void *data = new StringIndex();
|
29
|
+
void *data = new StrIdx::StringIndex();
|
30
30
|
return TypedData_Wrap_Struct(self, &str_idx_type, data);
|
31
31
|
}
|
32
32
|
|
@@ -36,18 +36,27 @@ VALUE StringIndexAddSegments(VALUE self, VALUE str, VALUE fileId) {
|
|
36
36
|
|
37
37
|
void *data;
|
38
38
|
TypedData_Get_Struct(self, int, &str_idx_type, data);
|
39
|
-
((StringIndex *)data)->addStrToIndex(s1, fid);
|
39
|
+
// ((StringIndex *)data)->addStrToIndex(s1, fid);
|
40
|
+
((StrIdx::StringIndex *)data)->addStrToIndexThreaded(s1, fid);
|
40
41
|
|
41
42
|
return self;
|
42
43
|
}
|
43
44
|
|
45
|
+
VALUE StringIndexWaitUntilDone(VALUE self) {
|
46
|
+
void *data;
|
47
|
+
TypedData_Get_Struct(self, int, &str_idx_type, data);
|
48
|
+
((StrIdx::StringIndex *)data)->waitUntilDone();
|
49
|
+
return self;
|
50
|
+
}
|
51
|
+
|
52
|
+
|
44
53
|
VALUE StringIndexFind(VALUE self, VALUE str) {
|
45
54
|
VALUE ret;
|
46
55
|
std::string s1 = StringValueCStr(str);
|
47
56
|
|
48
57
|
void *data;
|
49
58
|
TypedData_Get_Struct(self, int, &str_idx_type, data);
|
50
|
-
StringIndex *idx = (StringIndex *)data;
|
59
|
+
StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
|
51
60
|
|
52
61
|
ret = rb_ary_new();
|
53
62
|
const std::vector<std::pair<float, int>> &results = idx->findSimilar(s1, 2);
|
@@ -80,7 +89,7 @@ VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
|
|
80
89
|
|
81
90
|
void *data;
|
82
91
|
TypedData_Get_Struct(self, int, &str_idx_type, data);
|
83
|
-
StringIndex *idx = (StringIndex *)data;
|
92
|
+
StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
|
84
93
|
idx->setDirSeparator(c);
|
85
94
|
|
86
95
|
return self;
|
@@ -93,8 +102,12 @@ void Init_stridx(void) {
|
|
93
102
|
|
94
103
|
rb_define_alloc_func(classStringIndex, str_idx_alloc);
|
95
104
|
rb_define_method(classStringIndex, "add", StringIndexAddSegments, 2);
|
105
|
+
rb_define_method(classStringIndex, "waitUntilDone", StringIndexWaitUntilDone, 0);
|
96
106
|
rb_define_method(classStringIndex, "find", StringIndexFind, 1);
|
107
|
+
|
97
108
|
rb_define_method(classStringIndex, "setDirSeparator", StringIndexSetDirSeparator, 1);
|
109
|
+
|
110
|
+
|
98
111
|
}
|
99
112
|
|
100
113
|
} // End extern "C"
|
data/runserver.rb
ADDED
data/server.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "socket"
|
4
|
+
require "stridx"
|
5
|
+
|
6
|
+
module StrIdx
|
7
|
+
class Server
|
8
|
+
def recursively_find_files(directories)
|
9
|
+
filelist = []
|
10
|
+
|
11
|
+
for d in directories
|
12
|
+
filelist = filelist + Dir.glob("#{d}/**/*").select { |e|
|
13
|
+
File.file?(e)
|
14
|
+
# File.file?(e) or File.directory?(e)
|
15
|
+
}
|
16
|
+
end
|
17
|
+
return filelist
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.start(dir_list, daemonize: false)
|
21
|
+
Server.new(dir_list, daemonize: daemonize)
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.stop
|
25
|
+
sock_dir = File.expand_path("~/.stridx")
|
26
|
+
sockfn = "#{sock_dir}/sock"
|
27
|
+
client = UNIXSocket.new(sockfn)
|
28
|
+
client.puts "stop"
|
29
|
+
response = client.recv(200 * 200)
|
30
|
+
client.close
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(dir_list, daemonize: false)
|
34
|
+
idx = StrIdx::StringIndex.new
|
35
|
+
idx.setDirSeparator("/")
|
36
|
+
|
37
|
+
t = Time.new
|
38
|
+
|
39
|
+
dirs = dir_list.select { |x| File.directory?(x) }
|
40
|
+
puts "Scanning files in directories:#{dirs.join(",")}"
|
41
|
+
flist = recursively_find_files(dirs)
|
42
|
+
|
43
|
+
i = 0
|
44
|
+
for x in flist
|
45
|
+
idx.add(x, i)
|
46
|
+
i += 1
|
47
|
+
end
|
48
|
+
|
49
|
+
idx.waitUntilDone()
|
50
|
+
idx_time = Time.new
|
51
|
+
puts "\nIndexing time (#{flist.size} files): #{(idx_time - t).round(4)} seconds"
|
52
|
+
|
53
|
+
sock_dir = File.expand_path("~/.stridx")
|
54
|
+
Dir.mkdir(sock_dir) if !Dir.exist?(sock_dir)
|
55
|
+
sockfn = "#{sock_dir}/sock"
|
56
|
+
File.unlink(sockfn) if File.exist?(sockfn)
|
57
|
+
|
58
|
+
puts "Indexing done, starting server"
|
59
|
+
if (daemonize)
|
60
|
+
require "daemons"
|
61
|
+
Daemons.daemonize
|
62
|
+
# exit if fork() # Daemonize
|
63
|
+
end
|
64
|
+
|
65
|
+
# exit if fork() # Daemonize
|
66
|
+
# $PROGRAM_NAME = "stridx-daemon"
|
67
|
+
|
68
|
+
t = Thread.new {
|
69
|
+
serv = UNIXServer.new(sockfn)
|
70
|
+
|
71
|
+
loop do
|
72
|
+
# Accept a new client connection
|
73
|
+
client = serv.accept
|
74
|
+
|
75
|
+
# puts "Client connected!"
|
76
|
+
|
77
|
+
# Read data from the client
|
78
|
+
data = client.recv(1024)
|
79
|
+
|
80
|
+
if data.match(/^stop$/)
|
81
|
+
puts "Got stop signal. Shutting down server."
|
82
|
+
client.close
|
83
|
+
break
|
84
|
+
end
|
85
|
+
|
86
|
+
# puts "Received from client: #{data}"
|
87
|
+
if data.match(/^find:(.*)/)
|
88
|
+
query = Regexp.last_match(1)
|
89
|
+
res = idx.find(query)
|
90
|
+
response = res.collect { |x| flist[x[0]] }.join("\n")
|
91
|
+
|
92
|
+
# Send a response back to the client
|
93
|
+
client.puts response
|
94
|
+
end
|
95
|
+
# Close the client connection
|
96
|
+
client.close
|
97
|
+
end
|
98
|
+
}
|
99
|
+
|
100
|
+
t.join
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
data/setup.py
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
import numpy
|
3
|
+
|
4
|
+
import setuptools
|
5
|
+
from setuptools import setup, Extension
|
6
|
+
|
7
|
+
__version__ = "0.1"
|
8
|
+
|
9
|
+
cargs = ['-fpermissive']
|
10
|
+
|
11
|
+
|
12
|
+
with open('README.md', 'r', encoding='utf-8') as f:
|
13
|
+
long_description = f.read()
|
14
|
+
|
15
|
+
module1 = Extension('stridx', sources=['py_interf.cpp'], include_dirs=['.'], extra_compile_args=cargs,
|
16
|
+
language="c++",
|
17
|
+
)
|
18
|
+
|
19
|
+
ext_modules = [module1]
|
20
|
+
|
21
|
+
setup(
|
22
|
+
name='stridx',
|
23
|
+
version='1.0',
|
24
|
+
setup_requires=['wheel'],
|
25
|
+
python_requires='>=3',
|
26
|
+
provides=['stridx'],
|
27
|
+
description='Fast fuzzy string similarity search and indexing (for filenames) ',
|
28
|
+
long_description=long_description,
|
29
|
+
long_description_content_type='text/markdown',
|
30
|
+
ext_modules=[module1]
|
31
|
+
)
|
32
|
+
|
Binary file
|