StrIdx 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3e062e9a4d99367ff312ab222e861362e05381a034f513d1581cc2464df53df
4
- data.tar.gz: 6088bf73c44fa3487757f2a0d3a60fcc6b00ee0514216a76278f027d83cc7807
3
+ metadata.gz: 52d40e64a5ace0231828bdbbe6fd94475ab5986d0c1fb7e35e0ce18463a97ae0
4
+ data.tar.gz: e1cdcc2ed9f377b2acb049a9fb6de22f24acdbd6e3552748b1307342c10b6cf7
5
5
  SHA512:
6
- metadata.gz: ece52c12cef6f1460e995e296fbaed2911e45a15d46cbe1fd1b0ff21f8ae546198078a83951cb9430bf82ff81d866a22e0244c7472f566efa2376285b8e805b2
7
- data.tar.gz: 8b6751d914e41ffbe9c69274054ae398679cabdd159f0ab1c45ca5dc899a9a34c626f782ca4518c68b7d599efe71ea61489e277061b42284bc22345d793b116e
6
+ metadata.gz: f3c27923a568fe5916c17e91766066362a965abf9568b21a4daa269cd16a8a4778248ae935b26502aa482aad4807908d401989e7ebfb88d1fbdb011b0c240b60
7
+ data.tar.gz: f94dda8d71931c18ae3dc6b58204edda7ffd649bc7452a74fdba4929d6092183e99bf99d7b3632be5bafccfd0be7a877f5513c8c3e72e814dcca08bd79a9b217
data/Makefile CHANGED
@@ -1,268 +1,9 @@
1
+ all: demo
1
2
 
2
- SHELL = /bin/sh
3
+ demo: *.hpp *.cpp Makefile
4
+ g++ -Wall -O3 -lstdc++ demo.cpp -o demo
5
+
6
+ clean:
7
+ rm demo
3
8
 
4
- # V=0 quiet, V=1 verbose. other values don't work.
5
- V = 0
6
- V0 = $(V:0=)
7
- Q1 = $(V:1=)
8
- Q = $(Q1:0=@)
9
- ECHO1 = $(V:1=@ :)
10
- ECHO = $(ECHO1:0=@ echo)
11
- NULLCMD = :
12
9
 
13
- #### Start of system configuration section. ####
14
-
15
- srcdir = .
16
- topdir = /home/samsam/.rbenv/versions/3.1.4/include/ruby-3.1.0
17
- hdrdir = $(topdir)
18
- arch_hdrdir = /home/samsam/.rbenv/versions/3.1.4/include/ruby-3.1.0/x86_64-linux
19
- PATH_SEPARATOR = :
20
- VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
21
- prefix = $(DESTDIR)/home/samsam/.rbenv/versions/3.1.4
22
- rubysitearchprefix = $(rubylibprefix)/$(sitearch)
23
- rubyarchprefix = $(rubylibprefix)/$(arch)
24
- rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
25
- exec_prefix = $(prefix)
26
- vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
27
- sitearchhdrdir = $(sitehdrdir)/$(sitearch)
28
- rubyarchhdrdir = $(rubyhdrdir)/$(arch)
29
- vendorhdrdir = $(rubyhdrdir)/vendor_ruby
30
- sitehdrdir = $(rubyhdrdir)/site_ruby
31
- rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
32
- vendorarchdir = $(vendorlibdir)/$(sitearch)
33
- vendorlibdir = $(vendordir)/$(ruby_version)
34
- vendordir = $(rubylibprefix)/vendor_ruby
35
- sitearchdir = $(sitelibdir)/$(sitearch)
36
- sitelibdir = $(sitedir)/$(ruby_version)
37
- sitedir = $(rubylibprefix)/site_ruby
38
- rubyarchdir = $(rubylibdir)/$(arch)
39
- rubylibdir = $(rubylibprefix)/$(ruby_version)
40
- sitearchincludedir = $(includedir)/$(sitearch)
41
- archincludedir = $(includedir)/$(arch)
42
- sitearchlibdir = $(libdir)/$(sitearch)
43
- archlibdir = $(libdir)/$(arch)
44
- ridir = $(datarootdir)/$(RI_BASE_NAME)
45
- mandir = $(datarootdir)/man
46
- localedir = $(datarootdir)/locale
47
- libdir = $(exec_prefix)/lib
48
- psdir = $(docdir)
49
- pdfdir = $(docdir)
50
- dvidir = $(docdir)
51
- htmldir = $(docdir)
52
- infodir = $(datarootdir)/info
53
- docdir = $(datarootdir)/doc/$(PACKAGE)
54
- oldincludedir = $(DESTDIR)/usr/include
55
- includedir = $(prefix)/include
56
- runstatedir = $(localstatedir)/run
57
- localstatedir = $(prefix)/var
58
- sharedstatedir = $(prefix)/com
59
- sysconfdir = $(prefix)/etc
60
- datadir = $(datarootdir)
61
- datarootdir = $(prefix)/share
62
- libexecdir = $(exec_prefix)/libexec
63
- sbindir = $(exec_prefix)/sbin
64
- bindir = $(exec_prefix)/bin
65
- archdir = $(rubyarchdir)
66
-
67
-
68
- CC_WRAPPER =
69
- CC = gcc
70
- CXX = g++
71
- LIBRUBY = $(LIBRUBY_SO)
72
- LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
73
- LIBRUBYARG_SHARED = -Wl,-rpath,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)
74
- LIBRUBYARG_STATIC = -Wl,-rpath,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)-static $(MAINLIBS)
75
- empty =
76
- OUTFLAG = -o $(empty)
77
- COUTFLAG = -o $(empty)
78
- CSRCFLAG = $(empty)
79
-
80
- RUBY_EXTCONF_H =
81
- cflags = $(optflags) $(debugflags) $(warnflags)
82
- cxxflags =
83
- optflags = -O3 -fno-fast-math
84
- debugflags = -ggdb3
85
- warnflags = -Wall -Wextra -Wdeprecated-declarations -Wduplicated-cond -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wwrite-strings -Wold-style-definition -Wimplicit-fallthrough=0 -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-packed-bitfield-compat -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wsuggest-attribute=format -Wsuggest-attribute=noreturn -Wunused-variable -Wundef
86
- cppflags =
87
- CCDLFLAGS = -fPIC
88
- CFLAGS = $(CCDLFLAGS) $(cflags) -fPIC $(ARCH_FLAG)
89
- INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
90
- DEFS =
91
- CPPFLAGS = -I/home/samsam/.rbenv/versions/3.1.4/include $(DEFS) $(cppflags)
92
- CXXFLAGS = $(CCDLFLAGS) -Wall -Wno-unused-variable -O3 -fopenmp $(ARCH_FLAG)
93
- ldflags = -L. -L/home/samsam/.rbenv/versions/3.1.4/lib -fstack-protector-strong -rdynamic -Wl,-export-dynamic -Wl,--no-as-needed
94
- dldflags = -L/home/samsam/.rbenv/versions/3.1.4/lib -Wl,--compress-debug-sections=zlib
95
- ARCH_FLAG =
96
- DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
97
- LDSHARED = $(CC) -shared
98
- LDSHAREDXX = $(CXX) -shared
99
- AR = gcc-ar
100
- EXEEXT =
101
-
102
- RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
103
- RUBY_SO_NAME = ruby
104
- RUBYW_INSTALL_NAME =
105
- RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
106
- RUBYW_BASE_NAME = rubyw
107
- RUBY_BASE_NAME = ruby
108
-
109
- arch = x86_64-linux
110
- sitearch = $(arch)
111
- ruby_version = 3.1.0
112
- ruby = $(bindir)/$(RUBY_BASE_NAME)
113
- RUBY = $(ruby)
114
- BUILTRUBY = $(bindir)/$(RUBY_BASE_NAME)
115
- ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
116
-
117
- RM = rm -f
118
- RM_RF = rm -fr
119
- RMDIRS = rmdir --ignore-fail-on-non-empty -p
120
- MAKEDIRS = /usr/bin/mkdir -p
121
- INSTALL = /usr/bin/install -c
122
- INSTALL_PROG = $(INSTALL) -m 0755
123
- INSTALL_DATA = $(INSTALL) -m 644
124
- COPY = cp
125
- TOUCH = exit >
126
-
127
- #### End of system configuration section. ####
128
-
129
- preload =
130
- libpath = . $(libdir)
131
- LIBPATH = -L. -L$(libdir) -Wl,-rpath,$(libdir)
132
- DEFFILE =
133
-
134
- CLEANFILES = mkmf.log
135
- DISTCLEANFILES =
136
- DISTCLEANDIRS =
137
-
138
- extout =
139
- extout_prefix =
140
- target_prefix =
141
- LOCAL_LIBS =
142
- LIBS = $(LIBRUBYARG_SHARED) -lgomp -lstdc++ -lm -lc
143
- ORIG_SRCS = demo.cpp ruby_interf.cpp
144
- SRCS = $(ORIG_SRCS)
145
- OBJS = demo.o ruby_interf.o
146
- HDRS = $(srcdir)/unordered_dense.h $(srcdir)/stridx.hpp
147
- LOCAL_HDRS =
148
- TARGET = stridx
149
- TARGET_NAME = stridx
150
- TARGET_ENTRY = Init_$(TARGET_NAME)
151
- DLLIB = $(TARGET).so
152
- EXTSTATIC =
153
- STATIC_LIB =
154
-
155
- TIMESTAMP_DIR = .
156
- BINDIR = $(bindir)
157
- RUBYCOMMONDIR = $(sitedir)$(target_prefix)
158
- RUBYLIBDIR = $(sitelibdir)$(target_prefix)
159
- RUBYARCHDIR = $(sitearchdir)$(target_prefix)
160
- HDRDIR = $(sitehdrdir)$(target_prefix)
161
- ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
162
- TARGET_SO_DIR =
163
- TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
164
- CLEANLIBS = $(TARGET_SO) false
165
- CLEANOBJS = *.o *.bak
166
-
167
- all: $(DLLIB)
168
- static: $(STATIC_LIB)
169
- .PHONY: all install static install-so install-rb
170
- .PHONY: clean clean-so clean-static clean-rb
171
-
172
- clean-static::
173
- clean-rb-default::
174
- clean-rb::
175
- clean-so::
176
- clean: clean-so clean-static clean-rb-default clean-rb
177
- -$(Q)$(RM_RF) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
178
-
179
- distclean-rb-default::
180
- distclean-rb::
181
- distclean-so::
182
- distclean-static::
183
- distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
184
- -$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
185
- -$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
186
- -$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
187
-
188
- realclean: distclean
189
- install: install-so install-rb
190
-
191
- install-so: $(DLLIB) $(TIMESTAMP_DIR)/.sitearchdir.time
192
- $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
193
- clean-static::
194
- -$(Q)$(RM) $(STATIC_LIB)
195
- install-rb: pre-install-rb do-install-rb install-rb-default
196
- install-rb-default: pre-install-rb-default do-install-rb-default
197
- pre-install-rb: Makefile
198
- pre-install-rb-default: Makefile
199
- do-install-rb:
200
- do-install-rb-default:
201
- pre-install-rb-default:
202
- @$(NULLCMD)
203
- $(TIMESTAMP_DIR)/.sitearchdir.time:
204
- $(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
205
- $(Q) $(TOUCH) $@
206
-
207
- site-install: site-install-so site-install-rb
208
- site-install-so: install-so
209
- site-install-rb: install-rb
210
-
211
- .SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
212
-
213
- .cc.o:
214
- $(ECHO) compiling $(<)
215
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
216
-
217
- .cc.S:
218
- $(ECHO) translating $(<)
219
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
220
-
221
- .mm.o:
222
- $(ECHO) compiling $(<)
223
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
224
-
225
- .mm.S:
226
- $(ECHO) translating $(<)
227
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
228
-
229
- .cxx.o:
230
- $(ECHO) compiling $(<)
231
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
232
-
233
- .cxx.S:
234
- $(ECHO) translating $(<)
235
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
236
-
237
- .cpp.o:
238
- $(ECHO) compiling $(<)
239
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
240
-
241
- .cpp.S:
242
- $(ECHO) translating $(<)
243
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
244
-
245
- .c.o:
246
- $(ECHO) compiling $(<)
247
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
248
-
249
- .c.S:
250
- $(ECHO) translating $(<)
251
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
252
-
253
- .m.o:
254
- $(ECHO) compiling $(<)
255
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
256
-
257
- .m.S:
258
- $(ECHO) translating $(<)
259
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
260
-
261
- $(TARGET_SO): $(OBJS) Makefile
262
- $(ECHO) linking shared-object $(DLLIB)
263
- -$(Q)$(RM) $(@)
264
- $(Q) $(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
265
-
266
-
267
-
268
- $(OBJS): $(HDRS) $(ruby_headers)
data/README.md CHANGED
@@ -43,6 +43,15 @@ Install:
43
43
  gem install StrIdx
44
44
  ```
45
45
 
46
+ Or, for development version:
47
+ ```
48
+ git clone https://github.com/SamiSieranoja/stridx.git
49
+ cd stridx
50
+ cd rubyext; ruby extconf.rb ; make ; cd ..
51
+ gem build stridx.gemspec
52
+ gem install $(ls -1tr StrIdx*gem | tail -n 1)
53
+ ```
54
+
46
55
  Usage example (see test.rb):
47
56
  ```ruby
48
57
  require "stridx"
@@ -58,7 +67,7 @@ for x in lines
58
67
  end
59
68
 
60
69
  idx_time = Time.new
61
- puts "\nIndexing time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
70
+ puts "\nIndexing time (#{lines.size} files): #{(idx_time - t).round(4)} seconds"
62
71
 
63
72
  query = "rngnomadriv"
64
73
  res = idx.find(query)
data/demo.cpp CHANGED
@@ -1,3 +1,12 @@
1
+
2
+ #include <condition_variable>
3
+ #include <functional>
4
+ #include <iostream>
5
+ #include <mutex>
6
+ #include <queue>
7
+ #include <thread>
8
+ #include <algorithm>
9
+
1
10
  #include "stridx.hpp"
2
11
 
3
12
  #include <iostream>
@@ -28,7 +37,7 @@ std::vector<std::string> readLinesFromFile(const std::string &filename) {
28
37
  }
29
38
 
30
39
  int main() {
31
- StringIndex idx;
40
+ StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
32
41
  // idx.addStrToIndex("./gdk/x11/gdkasync.c", 0 /*id*/, '/' /*separator*/);
33
42
  // idx.addStrToIndex("./gdk/x11/gdksettings.c", 1, '/');
34
43
  // idx.addStrToIndex("./gdk/x11/gdkx11devicemanager-xi2.h", 2, '/');
@@ -37,26 +46,39 @@ int main() {
37
46
  std::string fn_filePaths = "flist.txt";
38
47
  std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths);
39
48
 
49
+ // Launch indexing to be run on background
50
+ cout << "File paths: " << v_filePaths.size() << std::endl;
51
+ cout << "Start indexing in the background" << std::endl;
40
52
  auto start = std::chrono::high_resolution_clock::now();
41
53
  int id = 0;
42
54
  for (const auto &filePath : v_filePaths) {
43
- idx.addStrToIndex(filePath, id, '/' /*dir separator*/);
44
- // idx.addStrToIndex(filePath, id, '\0' /*dir separator*/);
55
+ idx.addStrToIndexThreaded(filePath, id);
45
56
  id++;
46
57
  }
47
-
58
+
59
+ auto idx_time_launch = std::chrono::high_resolution_clock::now();
60
+ std::chrono::duration<double, std::milli> duration_launch = idx_time_launch - start;
61
+ cout << "Indexing launch time (seconds): " << duration_launch.count() / 1000 << "\n";
62
+
63
+ // Wait until indexing has finished
64
+ idx.waitUntilDone();
65
+
48
66
  auto idx_time = std::chrono::high_resolution_clock::now();
49
67
  std::chrono::duration<double, std::milli> duration = idx_time - start;
50
- cout << "Indexing creation time for " << v_filePaths.size() << " file paths (seconds): " << duration.count() / 1000 << "\n";
68
+ cout << "Indexing finished time for " << v_filePaths.size()
69
+ << " file paths (seconds): " << duration.count() / 1000 << "\n";
51
70
 
52
71
  // Find matching filepaths from the index for the query string "rngnomadriv"
53
72
  start = std::chrono::high_resolution_clock::now();
54
73
  std::string query = "rngnomadriv";
74
+ for (int i = 0; i < 99; i++) {
75
+ const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
76
+ }
77
+
55
78
  const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
56
79
  auto search_time = std::chrono::high_resolution_clock::now();
57
80
  duration = search_time - start;
58
- cout << "Search time (seconds): " << duration.count() / 1000
59
- << "\n";
81
+ cout << "Search time for 100 queries (seconds): " << duration.count() / 1000 << "\n";
60
82
 
61
83
  int i = 0;
62
84
  std::cout << "query string: " << query << "\n";
@@ -73,4 +95,4 @@ int main() {
73
95
  }
74
96
 
75
97
  // Compile:
76
- // g++ -Wall -Wno-unused-variable -O3 -fopenmp -lstdc++ demo.cpp -o demo
98
+ // g++ -Wall -Wno-unused-variable -O3 -lstdc++ demo.cpp -o demo