StrIdx 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/gem_install ADDED
@@ -0,0 +1,4 @@
1
+ gem uninstall --force -x StrIdx
2
+ gem build stridx.gemspec
3
+ gem install $(ls -1tr StrIdx*gem | tail -n 1)
4
+
data/py_example.py ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env python
2
+
3
+ from stridx import StringIndex
4
+ e=StringIndex()
5
+ e.set_value(3)
6
+ e.add("./rust/alloc/vec/spec_extend.rs",0)
7
+ e.add("./virt/kvm/dirty_ring.c",1)
8
+ e.add("./Documentation/staging/static-keys.rst",2)
9
+ e.add("./Documentation/staging/lzo.rst",3)
10
+
11
+
12
+
13
+ results = e.find("rstalloc")
14
+ for x in results:
15
+ print(x)
16
+
17
+ # print(e.get_value())
18
+
data/py_interf.cpp ADDED
@@ -0,0 +1,182 @@
1
+
2
+ #include <stdio.h>
3
+ #include <limits.h>
4
+ #include <cstring>
5
+ #include <pthread.h>
6
+
7
+ #include <stdio.h>
8
+ #include <iostream>
9
+ #include <string>
10
+ #include <vector>
11
+ #include <cfloat>
12
+ #include <cmath>
13
+ #include <bits/stdc++.h>
14
+
15
+ using std::ios;
16
+ using std::sort;
17
+ using std::string;
18
+ using std::vector;
19
+
20
+ #include <Python.h>
21
+ #include <cstring>
22
+
23
+ #include "stridx.hpp"
24
+
25
+ extern "C" {
26
+
27
+ // Define a structure for the custom object
28
+ typedef struct {
29
+ PyObject_HEAD int value;
30
+ StrIdx::StringIndex *idx;
31
+ } StrIdxObject;
32
+
33
+ // Method to allocate memory for the object
34
+ static PyObject *StrIdxObject_new(PyTypeObject *type, PyObject *args, PyObject *kwds) {
35
+ StrIdxObject *self;
36
+
37
+ self = (StrIdxObject *)type->tp_alloc(type, 0);
38
+ if (self != NULL) {
39
+ self->value = 0;
40
+ self->idx = new StrIdx::StringIndex();
41
+ }
42
+
43
+ return (PyObject *)self;
44
+ }
45
+
46
+ // Method to deallocate memory for the object
47
+ static void StrIdxObject_dealloc(StrIdxObject *self) { Py_TYPE(self)->tp_free((PyObject *)self); }
48
+
49
+ // Method to set the value of the object
50
+ static PyObject *StrIdxObject_set_value(StrIdxObject *self, PyObject *args) {
51
+ int value;
52
+
53
+ if (!PyArg_ParseTuple(args, "i", &value)) {
54
+ return NULL;
55
+ }
56
+
57
+ self->value = value;
58
+
59
+ Py_INCREF(Py_None);
60
+ return Py_None;
61
+ }
62
+
63
+ static PyObject *StrIdxObject_add(StrIdxObject *self, PyObject *args) {
64
+ char *value;
65
+ int file_id;
66
+ std::string str;
67
+ if (!PyArg_ParseTuple(args, "si", &value, &file_id)) {
68
+ return NULL;
69
+ }
70
+ str = value;
71
+
72
+ printf("char[]*: %s %i\n", value, file_id);
73
+ self->idx->addStrToIndex(str, file_id);
74
+ // self->idx->addStrToIndexThreaded(str, file_id);
75
+ Py_INCREF(Py_None);
76
+ return Py_None;
77
+ }
78
+
79
+ static PyObject *StrIdxObject_find(StrIdxObject *self, PyObject *args) {
80
+ char *value;
81
+ std::string str;
82
+ if (!PyArg_ParseTuple(args, "s", &value)) {
83
+ return NULL;
84
+ }
85
+ str = value;
86
+
87
+ printf("char*: %s\n", value);
88
+ const std::vector<std::pair<float, int>> &results = self->idx->findSimilar(str, 2);
89
+
90
+ int limit = 15;
91
+ int i = 0;
92
+
93
+ printf("res=%d\n", results.size());
94
+ if (results.size() < limit) {
95
+ limit = results.size();
96
+ }
97
+ PyObject *pyarr = PyList_New(limit);
98
+
99
+ for (const auto &[score,fileId] : results) {
100
+ PyObject *arr2 = PyList_New(2);
101
+ // PyList_SetItem(arr2, 0, Py_BuildValue("i", res.second));
102
+ // PyList_SetItem(arr2, 1, Py_BuildValue("d", res.first));
103
+ PyList_SetItem(arr2, 0, Py_BuildValue("i", fileId));
104
+ PyList_SetItem(arr2, 1, Py_BuildValue("d", score));
105
+ PyList_SetItem(pyarr, i, arr2);
106
+ i++;
107
+ if (i >= limit) {
108
+ break;
109
+ }
110
+ }
111
+
112
+ // Py_INCREF(Py_None);
113
+ return pyarr;
114
+ }
115
+
116
+ // Method to get the value of the object
117
+ static PyObject *StrIdxObject_get_value(StrIdxObject *self) {
118
+ return PyLong_FromLong(self->value);
119
+ }
120
+
121
+ // Define methods of the class
122
+ static PyMethodDef StrIdxObject_methods[] = {
123
+ {"set_value", (PyCFunction)StrIdxObject_set_value, METH_VARARGS,
124
+ "Set the value of the object"},
125
+ {"add", (PyCFunction)StrIdxObject_add, METH_VARARGS, "Set the value of the object"},
126
+ {"find", (PyCFunction)StrIdxObject_find, METH_VARARGS, "Find similar strings"},
127
+ {"get_value", (PyCFunction)StrIdxObject_get_value, METH_NOARGS, "Get the value of the object"},
128
+ {NULL} /* Sentinel */
129
+ };
130
+
131
+ // Define the type object for the class
132
+ static PyTypeObject StrIdxType = {
133
+ PyVarObject_HEAD_INIT(NULL, 0).tp_name = "stridx.StrIdx",
134
+ .tp_basicsize = sizeof(StrIdxObject),
135
+ .tp_dealloc = (destructor)StrIdxObject_dealloc,
136
+ .tp_doc = PyDoc_STR("Fuzzy string index"),
137
+ .tp_methods = StrIdxObject_methods,
138
+ .tp_new = StrIdxObject_new,
139
+ // .tp_repr = (reprfunc)myobj_repr,
140
+ };
141
+
142
+ // PyVarObject_HEAD_INIT(NULL, 0)
143
+ // .tp_name = "stridx.StrIdx",
144
+ // .tp_doc = "StrIdx class",
145
+ // .tp_basicsize = sizeof(StrIdxObject),
146
+ // .tp_itemsize = 0,
147
+ // .tp_flags = Py_TPFLAGS_DEFAULT,
148
+ // .tp_new = StrIdxObject_new,
149
+ // .tp_dealloc = (destructor)StrIdxObject_dealloc,
150
+ // .tp_methods = StrIdxObject_methods,
151
+ // };
152
+
153
+
154
+ // Define python accessible methods
155
+ static PyMethodDef StrIdxMethods[] = {
156
+ {NULL, NULL, 0, NULL}};
157
+
158
+ static struct PyModuleDef moduledef = {
159
+ PyModuleDef_HEAD_INIT, "stridx", NULL, -1, StrIdxMethods, NULL, NULL, NULL, NULL};
160
+
161
+ PyMODINIT_FUNC PyInit_stridx(void) {
162
+ PyObject *m;
163
+ m = PyModule_Create(&moduledef);
164
+
165
+ // Initialize the type object
166
+ if (PyType_Ready(&StrIdxType) < 0) {
167
+ return NULL;
168
+ }
169
+
170
+ Py_INCREF(&StrIdxType);
171
+ if (PyModule_AddObject(m, "StringIndex", (PyObject *)&StrIdxType) < 0) {
172
+ Py_DECREF(&StrIdxType);
173
+ Py_DECREF(m);
174
+ return NULL;
175
+ }
176
+
177
+ if (!m) {
178
+ return NULL;
179
+ }
180
+ return m;
181
+ }
182
+ } // END extern "C"
@@ -59,8 +59,8 @@ VALUE StringIndexFind(VALUE self, VALUE str) {
59
59
  StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
60
60
 
61
61
  ret = rb_ary_new();
62
- const std::vector<std::pair<float, int>> &results = idx->findSimilar(s1, 2);
63
- int limit = 15;
62
+ const std::vector<std::pair<float, int>> &results = idx->findSimilar(s1);
63
+ int limit = 40;
64
64
  int i = 0;
65
65
  for (const auto &res : results) {
66
66
  VALUE arr = rb_ary_new();
@@ -75,6 +75,60 @@ VALUE StringIndexFind(VALUE self, VALUE str) {
75
75
  return ret;
76
76
  }
77
77
 
78
+ VALUE StringIndexFindFilesAndDirs(VALUE self, VALUE str) {
79
+ VALUE ret;
80
+ std::string s1 = StringValueCStr(str);
81
+
82
+ void *data;
83
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
84
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
85
+
86
+ ret = rb_ary_new();
87
+ const std::vector<std::pair<float, std::string>> &results = idx->findFilesAndDirectories(s1);
88
+ int limit = 40;
89
+ int i = 0;
90
+ for (const auto &res : results) {
91
+ VALUE arr = rb_ary_new();
92
+ rb_ary_push(arr, rb_str_new_cstr(res.second.c_str()));
93
+ rb_ary_push(arr, DBL2NUM(res.first));
94
+ rb_ary_push(ret, arr);
95
+ i++;
96
+ if (i >= limit) {
97
+ break;
98
+ }
99
+ }
100
+ return ret;
101
+ }
102
+
103
+ VALUE StringIndexFindDirs(VALUE self, VALUE str) {
104
+ VALUE ret;
105
+ std::string s1 = StringValueCStr(str);
106
+
107
+ void *data;
108
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
109
+ StrIdx::StringIndex *idx = (StrIdx::StringIndex *)data;
110
+
111
+ ret = rb_ary_new();
112
+ const std::vector<std::pair<float, std::string>> &results = idx->findFilesAndDirectories(s1,false,true);
113
+ int limit = 40;
114
+ int i = 0;
115
+ for (const auto &res : results) {
116
+ VALUE arr = rb_ary_new();
117
+ rb_ary_push(arr, rb_str_new_cstr(res.second.c_str()));
118
+ rb_ary_push(arr, DBL2NUM(res.first));
119
+ rb_ary_push(ret, arr);
120
+ i++;
121
+ if (i >= limit) {
122
+ break;
123
+ }
124
+ }
125
+ return ret;
126
+ }
127
+
128
+
129
+
130
+
131
+
78
132
  VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
79
133
  char c = '/';
80
134
  if (TYPE(str) == T_STRING) {
@@ -104,6 +158,8 @@ void Init_stridx(void) {
104
158
  rb_define_method(classStringIndex, "add", StringIndexAddSegments, 2);
105
159
  rb_define_method(classStringIndex, "waitUntilDone", StringIndexWaitUntilDone, 0);
106
160
  rb_define_method(classStringIndex, "find", StringIndexFind, 1);
161
+ rb_define_method(classStringIndex, "findFilesAndDirs", StringIndexFindFilesAndDirs, 1);
162
+ rb_define_method(classStringIndex, "findDirs", StringIndexFindDirs, 1);
107
163
 
108
164
  rb_define_method(classStringIndex, "setDirSeparator", StringIndexSetDirSeparator, 1);
109
165
 
data/runserver.rb ADDED
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ $:.unshift File.dirname(__FILE__)
3
+
4
+ def kill_signal
5
+ puts "\nShutting down..."
6
+ File.delete(File.expand_path("~/.stridx/sock"))
7
+ end
8
+
9
+ # https://gist.github.com/sauloperez/6592971
10
+ # Trap ^C
11
+ Signal.trap("INT") {
12
+ kill_signal
13
+ exit
14
+ }
15
+
16
+ # Trap `Kill `
17
+ Signal.trap("TERM") {
18
+ kill_signal
19
+ exit
20
+ }
21
+
22
+ require "server.rb"
23
+ # StrIdx::Server.start ARGV, daemonize: true
24
+ StrIdx::Server.start ARGV
25
+
26
+
27
+
data/server.rb ADDED
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "socket"
4
+ require "stridx"
5
+
6
+ module StrIdx
7
+ class Server
8
+ def recursively_find_files(directories)
9
+ filelist = []
10
+
11
+ for d in directories
12
+ filelist = filelist + Dir.glob("#{d}/**/*").select { |e|
13
+ File.file?(e)
14
+ # File.file?(e) or File.directory?(e)
15
+ }
16
+ end
17
+ return filelist
18
+ end
19
+
20
+ def self.start(dir_list, daemonize: false)
21
+ Server.new(dir_list, daemonize: daemonize)
22
+ end
23
+
24
+ def self.stop
25
+ sock_dir = File.expand_path("~/.stridx")
26
+ sockfn = "#{sock_dir}/sock"
27
+ client = UNIXSocket.new(sockfn)
28
+ client.puts "stop"
29
+ response = client.recv(200 * 200)
30
+ client.close
31
+ end
32
+
33
+ def initialize(dir_list, daemonize: false)
34
+ idx = StrIdx::StringIndex.new
35
+ idx.setDirSeparator("/")
36
+
37
+ t = Time.new
38
+
39
+ dirs = dir_list.select { |x| File.directory?(x) }
40
+ puts "Scanning files in directories:#{dirs.join(",")}"
41
+ flist = recursively_find_files(dirs)
42
+
43
+ i = 0
44
+ for x in flist
45
+ idx.add(x, i)
46
+ i += 1
47
+ end
48
+
49
+ idx.waitUntilDone()
50
+ idx_time = Time.new
51
+ puts "\nIndexing time (#{flist.size} files): #{(idx_time - t).round(4)} seconds"
52
+
53
+ sock_dir = File.expand_path("~/.stridx")
54
+ Dir.mkdir(sock_dir) if !Dir.exist?(sock_dir)
55
+ sockfn = "#{sock_dir}/sock"
56
+ File.unlink(sockfn) if File.exist?(sockfn)
57
+
58
+ puts "Indexing done, starting server"
59
+ if (daemonize)
60
+ require "daemons"
61
+ Daemons.daemonize
62
+ # exit if fork() # Daemonize
63
+ end
64
+
65
+ # exit if fork() # Daemonize
66
+ # $PROGRAM_NAME = "stridx-daemon"
67
+
68
+ t = Thread.new {
69
+ serv = UNIXServer.new(sockfn)
70
+
71
+ loop do
72
+ # Accept a new client connection
73
+ client = serv.accept
74
+
75
+ # puts "Client connected!"
76
+
77
+ # Read data from the client
78
+ data = client.recv(1024)
79
+
80
+ if data.match(/^stop$/)
81
+ puts "Got stop signal. Shutting down server."
82
+ client.close
83
+ break
84
+ end
85
+
86
+ # puts "Received from client: #{data}"
87
+ if data.match(/^find:(.*)/)
88
+ query = Regexp.last_match(1)
89
+ # TODO: not sure which is best as default:
90
+ # res = idx.find(query)
91
+ # res = idx.findDirs(query)
92
+ res = idx.findFilesAndDirs(query)
93
+ # response = res.collect { |x| flist[x[0]] }.join("\n")
94
+ response = res.collect { |x| "/"+x[0] }.join("\n")
95
+
96
+ # Send a response back to the client
97
+ client.puts response
98
+ end
99
+ # Close the client connection
100
+ client.close
101
+ end
102
+ }
103
+
104
+ t.join
105
+ File.delete(sockfn)
106
+ end
107
+ end
108
+ end
data/setup.py ADDED
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env python
2
+ import numpy
3
+
4
+ import setuptools
5
+ from setuptools import setup, Extension
6
+
7
+ __version__ = "0.1"
8
+
9
+ cargs = ['-fpermissive']
10
+
11
+
12
+ with open('README.md', 'r', encoding='utf-8') as f:
13
+ long_description = f.read()
14
+
15
+ module1 = Extension('stridx', sources=['py_interf.cpp'], include_dirs=['.'], extra_compile_args=cargs,
16
+ language="c++",
17
+ )
18
+
19
+ ext_modules = [module1]
20
+
21
+ setup(
22
+ name='stridx',
23
+ version='1.0',
24
+ setup_requires=['wheel'],
25
+ python_requires='>=3',
26
+ provides=['stridx'],
27
+ description='Fast fuzzy string similarity search and indexing (for filenames) ',
28
+ long_description=long_description,
29
+ long_description_content_type='text/markdown',
30
+ ext_modules=[module1]
31
+ )
32
+
Binary file
data/stridx-tty.rb ADDED
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "tty-prompt"
4
+ require "tty-cursor"
5
+ require "tty-reader"
6
+ require "pastel"
7
+
8
+ require "socket"
9
+
10
+ class StrIdxTTY
11
+ def self.run
12
+ stty = StrIdxTTY.new
13
+ selected = stty.search
14
+ STDOUT.write selected
15
+ end
16
+
17
+ def initialize()
18
+ @lines = []
19
+ @selected = ""
20
+ @idx = 0
21
+
22
+ @reader = TTY::Reader.new(output: STDERR)
23
+ @pastel = Pastel.new()
24
+ @cursor = TTY::Cursor
25
+
26
+ sock_dir = File.expand_path("~/.stridx")
27
+ sockfn = "#{sock_dir}/sock"
28
+
29
+ error = true
30
+ while error
31
+ begin
32
+ # Create a new UNIXSocket
33
+ client = UNIXSocket.new(sockfn)
34
+ rescue Errno::ECONNREFUSED => e
35
+ out "Waiting for server to start\n"
36
+ sleep 2
37
+ error = true
38
+ else
39
+ error = false
40
+ client.close
41
+ #... executes when no error
42
+ end
43
+ end
44
+ end
45
+
46
+ def out(x)
47
+ STDERR.write x
48
+ end
49
+
50
+ def search
51
+ out "\n" * 20
52
+ out @cursor.clear_screen
53
+ out "\n" * 20
54
+ @cursor.move_to(0, 0)
55
+ @reader.on(:keypress) { |event|
56
+ handle_event(event)
57
+ }
58
+ @reader.read_line(">> ")
59
+
60
+ out @cursor.clear_screen
61
+ return @selected.strip
62
+ end
63
+
64
+ def get_res_from_server(query)
65
+ # Define the socket file path
66
+ sock_dir = File.expand_path("~/.stridx")
67
+ sockfn = "#{sock_dir}/sock"
68
+
69
+ # Create a new UNIXSocket
70
+ client = UNIXSocket.new(sockfn)
71
+
72
+ # Send data to the server
73
+ client.puts "find:#{query}"
74
+
75
+ # Read response from the server
76
+ response = client.recv(200 * 200)
77
+
78
+ # Close the client connection
79
+ client.close
80
+ return response.lines
81
+ end
82
+
83
+ def draw_list()
84
+ @selected = @list[@idx]
85
+ i = 0
86
+ for x in @list
87
+ out @cursor.up(1)
88
+ out @cursor.clear_line
89
+ if i == @idx
90
+ out @pastel.lookup(:bold)
91
+ end
92
+ out x.strip
93
+ out @pastel.lookup(:reset)
94
+ i += 1
95
+ end
96
+ end
97
+
98
+ def update_search(event)
99
+ query = event.line[3..-1]
100
+ if query.size > 2
101
+ @list = get_res_from_server(query)
102
+ draw_list
103
+ end
104
+ end
105
+
106
+ def handle_event(event)
107
+ out @cursor.save
108
+ if event.key.name == :alpha
109
+ update_search(event)
110
+ elsif event.key.name == :up
111
+ @idx += 1 if @idx < @list.size - 1
112
+ draw_list
113
+ elsif event.key.name == :down
114
+ @idx -= 1 if @idx > 0
115
+ draw_list
116
+ elsif event.key.name == :backspace
117
+ update_search(event)
118
+ end
119
+
120
+ out @cursor.restore
121
+ end
122
+ end
data/stridx.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ Gem::Specification.new do |spec|
2
+ spec.name = "StrIdx"
3
+ spec.version = "0.1.5"
4
+ spec.authors = ["Sami Sieranoja"]
5
+ spec.email = ["sami.sieranoja@gmail.com"]
6
+
7
+ spec.summary = %q{StrIdx}
8
+ spec.description = %q{ Fast fuzzy string similarity search and indexing (for filenames)}
9
+ spec.homepage = "https://github.com/SamiSieranoja/stridx"
10
+ spec.metadata["source_code_uri"] = spec.homepage
11
+ spec.metadata["homepage_uri"] = spec.homepage
12
+
13
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
14
+ f.match(%r{^(refcode|spec|features)/})
15
+ end
16
+
17
+ spec.bindir = "exe"
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ["lib", "ext"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 2.4.21"
22
+ spec.add_development_dependency "rake", "~> 13.1.0"
23
+
24
+ spec.add_runtime_dependency "tty-cursor", "~> 0.7.1"
25
+ spec.add_runtime_dependency "tty-prompt", "~> 0.23.1"
26
+ spec.add_runtime_dependency "tty-reader", "~> 0.9.0"
27
+ spec.add_runtime_dependency "tty-screen", "~> 0.8.2"
28
+ spec.add_runtime_dependency "pastel", "~> 0.8.0"
29
+ spec.add_runtime_dependency "daemons", "~> 1.4.1"
30
+
31
+ spec.extensions = ["rubyext/extconf.rb"]
32
+ spec.licenses = ["LGPL-2.0+"]
33
+ end