StrIdx 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1079ebe4265ed15d2307d260395e33471ab149bc6cd33ca6e469ff93c960dba8
4
- data.tar.gz: 0bdba2496690ad9e1794d653bb5ca047a15fcfee6d2988ff06098c69920ae269
3
+ metadata.gz: 2155c54900f595ae954d2074840f425195a66d6bf464e35dc68a1bb10236255c
4
+ data.tar.gz: fc2b50bcf6083b806643886a275e2a03ce3267cbd66dd81a0d63f5f68f8e4268
5
5
  SHA512:
6
- metadata.gz: 8a1b2fe9ce6f87a8585f5b9ca86092f21c600575b704fc3bf4ebaa8ae3bd0897f7e272fb580fb98c38e8b65a5b95d0fd8cc1c3baeafdb235fe1d48e51153ae6c
7
- data.tar.gz: b4a62b6251a38637905653f2aaa919653465fbc453844a2f02466ce413a8a1de0946de09973535472a70ccbc17151d426e76fe92b3338ccc0af9f83f6821b201
6
+ metadata.gz: ac27e251a448f1ca17d4672d12ac165412f2c7fbd71359e77d21f8a8c30521797befe7f2555d181be2af0cace345636558b4a8d93d8e141f707e8b28ffe5687b
7
+ data.tar.gz: f8e65efbed5079c0a2e37504aa955e0855d0ef088ad07d1d2566dee6c5f51b79d9ba71ef102e5aaeef8d1b7d548eefa2fab3930e21b7de05b940da882c9b86f3
data/Makefile CHANGED
@@ -1,268 +1,9 @@
1
+ all: demo
1
2
 
2
- SHELL = /bin/sh
3
+ demo: *.hpp *.cpp
4
+ g++ -Wall -O3 -fopenmp -lstdc++ demo.cpp -o demo
5
+
6
+ clean:
7
+ rm demo
3
8
 
4
- # V=0 quiet, V=1 verbose. other values don't work.
5
- V = 0
6
- V0 = $(V:0=)
7
- Q1 = $(V:1=)
8
- Q = $(Q1:0=@)
9
- ECHO1 = $(V:1=@ :)
10
- ECHO = $(ECHO1:0=@ echo)
11
- NULLCMD = :
12
9
 
13
- #### Start of system configuration section. ####
14
-
15
- srcdir = .
16
- topdir = /home/samsam/.rbenv/versions/3.1.4/include/ruby-3.1.0
17
- hdrdir = $(topdir)
18
- arch_hdrdir = /home/samsam/.rbenv/versions/3.1.4/include/ruby-3.1.0/x86_64-linux
19
- PATH_SEPARATOR = :
20
- VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
21
- prefix = $(DESTDIR)/home/samsam/.rbenv/versions/3.1.4
22
- rubysitearchprefix = $(rubylibprefix)/$(sitearch)
23
- rubyarchprefix = $(rubylibprefix)/$(arch)
24
- rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
25
- exec_prefix = $(prefix)
26
- vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
27
- sitearchhdrdir = $(sitehdrdir)/$(sitearch)
28
- rubyarchhdrdir = $(rubyhdrdir)/$(arch)
29
- vendorhdrdir = $(rubyhdrdir)/vendor_ruby
30
- sitehdrdir = $(rubyhdrdir)/site_ruby
31
- rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
32
- vendorarchdir = $(vendorlibdir)/$(sitearch)
33
- vendorlibdir = $(vendordir)/$(ruby_version)
34
- vendordir = $(rubylibprefix)/vendor_ruby
35
- sitearchdir = $(sitelibdir)/$(sitearch)
36
- sitelibdir = $(sitedir)/$(ruby_version)
37
- sitedir = $(rubylibprefix)/site_ruby
38
- rubyarchdir = $(rubylibdir)/$(arch)
39
- rubylibdir = $(rubylibprefix)/$(ruby_version)
40
- sitearchincludedir = $(includedir)/$(sitearch)
41
- archincludedir = $(includedir)/$(arch)
42
- sitearchlibdir = $(libdir)/$(sitearch)
43
- archlibdir = $(libdir)/$(arch)
44
- ridir = $(datarootdir)/$(RI_BASE_NAME)
45
- mandir = $(datarootdir)/man
46
- localedir = $(datarootdir)/locale
47
- libdir = $(exec_prefix)/lib
48
- psdir = $(docdir)
49
- pdfdir = $(docdir)
50
- dvidir = $(docdir)
51
- htmldir = $(docdir)
52
- infodir = $(datarootdir)/info
53
- docdir = $(datarootdir)/doc/$(PACKAGE)
54
- oldincludedir = $(DESTDIR)/usr/include
55
- includedir = $(prefix)/include
56
- runstatedir = $(localstatedir)/run
57
- localstatedir = $(prefix)/var
58
- sharedstatedir = $(prefix)/com
59
- sysconfdir = $(prefix)/etc
60
- datadir = $(datarootdir)
61
- datarootdir = $(prefix)/share
62
- libexecdir = $(exec_prefix)/libexec
63
- sbindir = $(exec_prefix)/sbin
64
- bindir = $(exec_prefix)/bin
65
- archdir = $(rubyarchdir)
66
-
67
-
68
- CC_WRAPPER =
69
- CC = gcc
70
- CXX = g++
71
- LIBRUBY = $(LIBRUBY_SO)
72
- LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
73
- LIBRUBYARG_SHARED = -Wl,-rpath,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)
74
- LIBRUBYARG_STATIC = -Wl,-rpath,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)-static $(MAINLIBS)
75
- empty =
76
- OUTFLAG = -o $(empty)
77
- COUTFLAG = -o $(empty)
78
- CSRCFLAG = $(empty)
79
-
80
- RUBY_EXTCONF_H =
81
- cflags = $(optflags) $(debugflags) $(warnflags)
82
- cxxflags =
83
- optflags = -O3 -fno-fast-math
84
- debugflags = -ggdb3
85
- warnflags = -Wall -Wextra -Wdeprecated-declarations -Wduplicated-cond -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wwrite-strings -Wold-style-definition -Wimplicit-fallthrough=0 -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-packed-bitfield-compat -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wsuggest-attribute=format -Wsuggest-attribute=noreturn -Wunused-variable -Wundef
86
- cppflags =
87
- CCDLFLAGS = -fPIC
88
- CFLAGS = $(CCDLFLAGS) $(cflags) -fPIC $(ARCH_FLAG)
89
- INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
90
- DEFS =
91
- CPPFLAGS = -I/home/samsam/.rbenv/versions/3.1.4/include $(DEFS) $(cppflags)
92
- CXXFLAGS = $(CCDLFLAGS) -Wall -Wno-unused-variable -O3 -fopenmp $(ARCH_FLAG)
93
- ldflags = -L. -L/home/samsam/.rbenv/versions/3.1.4/lib -fstack-protector-strong -rdynamic -Wl,-export-dynamic -Wl,--no-as-needed
94
- dldflags = -L/home/samsam/.rbenv/versions/3.1.4/lib -Wl,--compress-debug-sections=zlib
95
- ARCH_FLAG =
96
- DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
97
- LDSHARED = $(CC) -shared
98
- LDSHAREDXX = $(CXX) -shared
99
- AR = gcc-ar
100
- EXEEXT =
101
-
102
- RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
103
- RUBY_SO_NAME = ruby
104
- RUBYW_INSTALL_NAME =
105
- RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
106
- RUBYW_BASE_NAME = rubyw
107
- RUBY_BASE_NAME = ruby
108
-
109
- arch = x86_64-linux
110
- sitearch = $(arch)
111
- ruby_version = 3.1.0
112
- ruby = $(bindir)/$(RUBY_BASE_NAME)
113
- RUBY = $(ruby)
114
- BUILTRUBY = $(bindir)/$(RUBY_BASE_NAME)
115
- ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
116
-
117
- RM = rm -f
118
- RM_RF = rm -fr
119
- RMDIRS = rmdir --ignore-fail-on-non-empty -p
120
- MAKEDIRS = /usr/bin/mkdir -p
121
- INSTALL = /usr/bin/install -c
122
- INSTALL_PROG = $(INSTALL) -m 0755
123
- INSTALL_DATA = $(INSTALL) -m 644
124
- COPY = cp
125
- TOUCH = exit >
126
-
127
- #### End of system configuration section. ####
128
-
129
- preload =
130
- libpath = . $(libdir)
131
- LIBPATH = -L. -L$(libdir) -Wl,-rpath,$(libdir)
132
- DEFFILE =
133
-
134
- CLEANFILES = mkmf.log
135
- DISTCLEANFILES =
136
- DISTCLEANDIRS =
137
-
138
- extout =
139
- extout_prefix =
140
- target_prefix =
141
- LOCAL_LIBS =
142
- LIBS = $(LIBRUBYARG_SHARED) -lgomp -lstdc++ -lm -lc
143
- ORIG_SRCS = demo.cpp ruby_interf.cpp
144
- SRCS = $(ORIG_SRCS)
145
- OBJS = demo.o ruby_interf.o
146
- HDRS = $(srcdir)/unordered_dense.h $(srcdir)/stridx.hpp
147
- LOCAL_HDRS =
148
- TARGET = stridx
149
- TARGET_NAME = stridx
150
- TARGET_ENTRY = Init_$(TARGET_NAME)
151
- DLLIB = $(TARGET).so
152
- EXTSTATIC =
153
- STATIC_LIB =
154
-
155
- TIMESTAMP_DIR = .
156
- BINDIR = $(bindir)
157
- RUBYCOMMONDIR = $(sitedir)$(target_prefix)
158
- RUBYLIBDIR = $(sitelibdir)$(target_prefix)
159
- RUBYARCHDIR = $(sitearchdir)$(target_prefix)
160
- HDRDIR = $(sitehdrdir)$(target_prefix)
161
- ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
162
- TARGET_SO_DIR =
163
- TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
164
- CLEANLIBS = $(TARGET_SO) false
165
- CLEANOBJS = *.o *.bak
166
-
167
- all: $(DLLIB)
168
- static: $(STATIC_LIB)
169
- .PHONY: all install static install-so install-rb
170
- .PHONY: clean clean-so clean-static clean-rb
171
-
172
- clean-static::
173
- clean-rb-default::
174
- clean-rb::
175
- clean-so::
176
- clean: clean-so clean-static clean-rb-default clean-rb
177
- -$(Q)$(RM_RF) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
178
-
179
- distclean-rb-default::
180
- distclean-rb::
181
- distclean-so::
182
- distclean-static::
183
- distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
184
- -$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
185
- -$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
186
- -$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
187
-
188
- realclean: distclean
189
- install: install-so install-rb
190
-
191
- install-so: $(DLLIB) $(TIMESTAMP_DIR)/.sitearchdir.time
192
- $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
193
- clean-static::
194
- -$(Q)$(RM) $(STATIC_LIB)
195
- install-rb: pre-install-rb do-install-rb install-rb-default
196
- install-rb-default: pre-install-rb-default do-install-rb-default
197
- pre-install-rb: Makefile
198
- pre-install-rb-default: Makefile
199
- do-install-rb:
200
- do-install-rb-default:
201
- pre-install-rb-default:
202
- @$(NULLCMD)
203
- $(TIMESTAMP_DIR)/.sitearchdir.time:
204
- $(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
205
- $(Q) $(TOUCH) $@
206
-
207
- site-install: site-install-so site-install-rb
208
- site-install-so: install-so
209
- site-install-rb: install-rb
210
-
211
- .SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
212
-
213
- .cc.o:
214
- $(ECHO) compiling $(<)
215
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
216
-
217
- .cc.S:
218
- $(ECHO) translating $(<)
219
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
220
-
221
- .mm.o:
222
- $(ECHO) compiling $(<)
223
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
224
-
225
- .mm.S:
226
- $(ECHO) translating $(<)
227
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
228
-
229
- .cxx.o:
230
- $(ECHO) compiling $(<)
231
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
232
-
233
- .cxx.S:
234
- $(ECHO) translating $(<)
235
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
236
-
237
- .cpp.o:
238
- $(ECHO) compiling $(<)
239
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
240
-
241
- .cpp.S:
242
- $(ECHO) translating $(<)
243
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
244
-
245
- .c.o:
246
- $(ECHO) compiling $(<)
247
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
248
-
249
- .c.S:
250
- $(ECHO) translating $(<)
251
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
252
-
253
- .m.o:
254
- $(ECHO) compiling $(<)
255
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
256
-
257
- .m.S:
258
- $(ECHO) translating $(<)
259
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
260
-
261
- $(TARGET_SO): $(OBJS) Makefile
262
- $(ECHO) linking shared-object $(DLLIB)
263
- -$(Q)$(RM) $(@)
264
- $(Q) $(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
265
-
266
-
267
-
268
- $(OBJS): $(HDRS) $(ruby_headers)
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # stridx
1
+ # StrIdx
2
2
  This library provides fast fuzzy string similarity search and indexing. It has been mainly developed for indexing filepaths, but can be used for other types of strings aswell. It can easily handle fuzzy searches for more than 100,000 filepaths.
3
3
 
4
4
  The fuzziness means that candidate filepaths do not need to include exact match of the query string. They are considered a good match if they include parts of the query string, and even if those parts are in the wrong order.
@@ -26,7 +26,7 @@ Sum up the scores for each character c and divide by (string length)^2
26
26
  For example, if query = "rngnomadriv"
27
27
  and candidate is "./drivers/char/hw_random/nomadik-rng.c", then scores are calculated as follows:
28
28
  ```
29
- rngnomadriv
29
+ rngnomadriv (substrings rng=3, nomad=5 and driv=4)
30
30
  33355555444 (subscores)
31
31
  FFFFFFFFDDD (F=file component, D=dir component)
32
32
  score1=(3+3+3+5+5+5+5+5+(4+4+4)*0.7)
@@ -36,6 +36,75 @@ and candidate is "./drivers/char/hw_random/nomadik-rng.c", then scores are calcu
36
36
  and minor part by (query string length)*(candidate string length)
37
37
  score = score1/(11*11)*0.97 + score1/(11*38)*0.03 = 0.342944
38
38
  ```
39
+
40
+ # Ruby interface
41
+ Install:
42
+ ```
43
+ gem install StrIdx
44
+ ```
45
+
46
+ Usage example (see test.rb):
47
+ ```ruby
48
+ require "stridx"
49
+ idx = StrIdx::StringIndex.new
50
+
51
+ t = Time.new
52
+ fn = File.expand_path("flist.txt")
53
+ lines = IO.read(fn).lines.collect { |x| x.strip }
54
+ i = 0
55
+ for x in lines
56
+ idx.add(x, i)
57
+ i += 1
58
+ end
59
+
60
+ idx_time = Time.new
61
+ puts "\nIndexing time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
62
+
63
+ query = "rngnomadriv"
64
+ res = idx.find(query)
65
+ puts "query: #{query}"
66
+ puts "\nResults:"
67
+ puts "Filename, score"
68
+ puts "==============="
69
+ for id, score in res
70
+ fn = lines[id]
71
+ puts "#{fn}, #{score.round(4)}"
72
+ end
73
+
74
+ query_time = Time.new
75
+
76
+ puts "\nSearch time: #{(query_time - idx_time).round(4)} seconds"
77
+
78
+ ```
79
+
80
+ Output:
81
+ ```
82
+ Indexing time (89828 files}): 2.813722207
83
+ query: rngnomadriv
84
+
85
+ Results:
86
+ Filename, score
87
+ ===============
88
+ ./drivers/char/hw_random/nomadik-rng.c, 0.3429
89
+ ./drivers/pinctrl/nomadik, 0.2714
90
+ ./drivers/clk/clk-nomadik.c, 0.2711
91
+ ./drivers/gpio/gpio-nomadik.c, 0.2709
92
+ ./drivers/i2c/busses/i2c-nomadik.c, 0.2704
93
+ ./drivers/clocksource/nomadik-mtu.c, 0.2704
94
+ ./drivers/gpu/drm/pl111/pl111_nomadik.h, 0.2701
95
+ ./drivers/gpu/drm/pl111/pl111_nomadik.c, 0.2701
96
+ ./drivers/pinctrl/nomadik/pinctrl-nomadik.c, 0.2699
97
+ ./drivers/input/keyboard/nomadik-ske-keypad.c, 0.2698
98
+ ./drivers/pinctrl/nomadik/pinctrl-nomadik-db8500.c, 0.2696
99
+ ./drivers/pinctrl/nomadik/pinctrl-nomadik-stn8815.c, 0.2695
100
+ ./drivers/char/hw_random/omap-rng.c, 0.2364
101
+ ./drivers/char/hw_random/omap3-rom-rng.c, 0.2361
102
+ ./include/dt-bindings/pinctrl/nomadik.h, 0.2248
103
+
104
+ Search time: 0.0488 seconds
105
+ ```
106
+
107
+
39
108
  # C++ API
40
109
  See demo.cpp
41
110
  ```cpp