StrIdx 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3e062e9a4d99367ff312ab222e861362e05381a034f513d1581cc2464df53df
4
- data.tar.gz: 6088bf73c44fa3487757f2a0d3a60fcc6b00ee0514216a76278f027d83cc7807
3
+ metadata.gz: 52d40e64a5ace0231828bdbbe6fd94475ab5986d0c1fb7e35e0ce18463a97ae0
4
+ data.tar.gz: e1cdcc2ed9f377b2acb049a9fb6de22f24acdbd6e3552748b1307342c10b6cf7
5
5
  SHA512:
6
- metadata.gz: ece52c12cef6f1460e995e296fbaed2911e45a15d46cbe1fd1b0ff21f8ae546198078a83951cb9430bf82ff81d866a22e0244c7472f566efa2376285b8e805b2
7
- data.tar.gz: 8b6751d914e41ffbe9c69274054ae398679cabdd159f0ab1c45ca5dc899a9a34c626f782ca4518c68b7d599efe71ea61489e277061b42284bc22345d793b116e
6
+ metadata.gz: f3c27923a568fe5916c17e91766066362a965abf9568b21a4daa269cd16a8a4778248ae935b26502aa482aad4807908d401989e7ebfb88d1fbdb011b0c240b60
7
+ data.tar.gz: f94dda8d71931c18ae3dc6b58204edda7ffd649bc7452a74fdba4929d6092183e99bf99d7b3632be5bafccfd0be7a877f5513c8c3e72e814dcca08bd79a9b217
data/Makefile CHANGED
@@ -1,268 +1,9 @@
1
+ all: demo
1
2
 
2
- SHELL = /bin/sh
3
+ demo: *.hpp *.cpp Makefile
4
+ g++ -Wall -O3 -lstdc++ demo.cpp -o demo
5
+
6
+ clean:
7
+ rm demo
3
8
 
4
- # V=0 quiet, V=1 verbose. other values don't work.
5
- V = 0
6
- V0 = $(V:0=)
7
- Q1 = $(V:1=)
8
- Q = $(Q1:0=@)
9
- ECHO1 = $(V:1=@ :)
10
- ECHO = $(ECHO1:0=@ echo)
11
- NULLCMD = :
12
9
 
13
- #### Start of system configuration section. ####
14
-
15
- srcdir = .
16
- topdir = /home/samsam/.rbenv/versions/3.1.4/include/ruby-3.1.0
17
- hdrdir = $(topdir)
18
- arch_hdrdir = /home/samsam/.rbenv/versions/3.1.4/include/ruby-3.1.0/x86_64-linux
19
- PATH_SEPARATOR = :
20
- VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
21
- prefix = $(DESTDIR)/home/samsam/.rbenv/versions/3.1.4
22
- rubysitearchprefix = $(rubylibprefix)/$(sitearch)
23
- rubyarchprefix = $(rubylibprefix)/$(arch)
24
- rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
25
- exec_prefix = $(prefix)
26
- vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
27
- sitearchhdrdir = $(sitehdrdir)/$(sitearch)
28
- rubyarchhdrdir = $(rubyhdrdir)/$(arch)
29
- vendorhdrdir = $(rubyhdrdir)/vendor_ruby
30
- sitehdrdir = $(rubyhdrdir)/site_ruby
31
- rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
32
- vendorarchdir = $(vendorlibdir)/$(sitearch)
33
- vendorlibdir = $(vendordir)/$(ruby_version)
34
- vendordir = $(rubylibprefix)/vendor_ruby
35
- sitearchdir = $(sitelibdir)/$(sitearch)
36
- sitelibdir = $(sitedir)/$(ruby_version)
37
- sitedir = $(rubylibprefix)/site_ruby
38
- rubyarchdir = $(rubylibdir)/$(arch)
39
- rubylibdir = $(rubylibprefix)/$(ruby_version)
40
- sitearchincludedir = $(includedir)/$(sitearch)
41
- archincludedir = $(includedir)/$(arch)
42
- sitearchlibdir = $(libdir)/$(sitearch)
43
- archlibdir = $(libdir)/$(arch)
44
- ridir = $(datarootdir)/$(RI_BASE_NAME)
45
- mandir = $(datarootdir)/man
46
- localedir = $(datarootdir)/locale
47
- libdir = $(exec_prefix)/lib
48
- psdir = $(docdir)
49
- pdfdir = $(docdir)
50
- dvidir = $(docdir)
51
- htmldir = $(docdir)
52
- infodir = $(datarootdir)/info
53
- docdir = $(datarootdir)/doc/$(PACKAGE)
54
- oldincludedir = $(DESTDIR)/usr/include
55
- includedir = $(prefix)/include
56
- runstatedir = $(localstatedir)/run
57
- localstatedir = $(prefix)/var
58
- sharedstatedir = $(prefix)/com
59
- sysconfdir = $(prefix)/etc
60
- datadir = $(datarootdir)
61
- datarootdir = $(prefix)/share
62
- libexecdir = $(exec_prefix)/libexec
63
- sbindir = $(exec_prefix)/sbin
64
- bindir = $(exec_prefix)/bin
65
- archdir = $(rubyarchdir)
66
-
67
-
68
- CC_WRAPPER =
69
- CC = gcc
70
- CXX = g++
71
- LIBRUBY = $(LIBRUBY_SO)
72
- LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
73
- LIBRUBYARG_SHARED = -Wl,-rpath,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)
74
- LIBRUBYARG_STATIC = -Wl,-rpath,$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)-static $(MAINLIBS)
75
- empty =
76
- OUTFLAG = -o $(empty)
77
- COUTFLAG = -o $(empty)
78
- CSRCFLAG = $(empty)
79
-
80
- RUBY_EXTCONF_H =
81
- cflags = $(optflags) $(debugflags) $(warnflags)
82
- cxxflags =
83
- optflags = -O3 -fno-fast-math
84
- debugflags = -ggdb3
85
- warnflags = -Wall -Wextra -Wdeprecated-declarations -Wduplicated-cond -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wwrite-strings -Wold-style-definition -Wimplicit-fallthrough=0 -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-packed-bitfield-compat -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wsuggest-attribute=format -Wsuggest-attribute=noreturn -Wunused-variable -Wundef
86
- cppflags =
87
- CCDLFLAGS = -fPIC
88
- CFLAGS = $(CCDLFLAGS) $(cflags) -fPIC $(ARCH_FLAG)
89
- INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
90
- DEFS =
91
- CPPFLAGS = -I/home/samsam/.rbenv/versions/3.1.4/include $(DEFS) $(cppflags)
92
- CXXFLAGS = $(CCDLFLAGS) -Wall -Wno-unused-variable -O3 -fopenmp $(ARCH_FLAG)
93
- ldflags = -L. -L/home/samsam/.rbenv/versions/3.1.4/lib -fstack-protector-strong -rdynamic -Wl,-export-dynamic -Wl,--no-as-needed
94
- dldflags = -L/home/samsam/.rbenv/versions/3.1.4/lib -Wl,--compress-debug-sections=zlib
95
- ARCH_FLAG =
96
- DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
97
- LDSHARED = $(CC) -shared
98
- LDSHAREDXX = $(CXX) -shared
99
- AR = gcc-ar
100
- EXEEXT =
101
-
102
- RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
103
- RUBY_SO_NAME = ruby
104
- RUBYW_INSTALL_NAME =
105
- RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
106
- RUBYW_BASE_NAME = rubyw
107
- RUBY_BASE_NAME = ruby
108
-
109
- arch = x86_64-linux
110
- sitearch = $(arch)
111
- ruby_version = 3.1.0
112
- ruby = $(bindir)/$(RUBY_BASE_NAME)
113
- RUBY = $(ruby)
114
- BUILTRUBY = $(bindir)/$(RUBY_BASE_NAME)
115
- ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
116
-
117
- RM = rm -f
118
- RM_RF = rm -fr
119
- RMDIRS = rmdir --ignore-fail-on-non-empty -p
120
- MAKEDIRS = /usr/bin/mkdir -p
121
- INSTALL = /usr/bin/install -c
122
- INSTALL_PROG = $(INSTALL) -m 0755
123
- INSTALL_DATA = $(INSTALL) -m 644
124
- COPY = cp
125
- TOUCH = exit >
126
-
127
- #### End of system configuration section. ####
128
-
129
- preload =
130
- libpath = . $(libdir)
131
- LIBPATH = -L. -L$(libdir) -Wl,-rpath,$(libdir)
132
- DEFFILE =
133
-
134
- CLEANFILES = mkmf.log
135
- DISTCLEANFILES =
136
- DISTCLEANDIRS =
137
-
138
- extout =
139
- extout_prefix =
140
- target_prefix =
141
- LOCAL_LIBS =
142
- LIBS = $(LIBRUBYARG_SHARED) -lgomp -lstdc++ -lm -lc
143
- ORIG_SRCS = demo.cpp ruby_interf.cpp
144
- SRCS = $(ORIG_SRCS)
145
- OBJS = demo.o ruby_interf.o
146
- HDRS = $(srcdir)/unordered_dense.h $(srcdir)/stridx.hpp
147
- LOCAL_HDRS =
148
- TARGET = stridx
149
- TARGET_NAME = stridx
150
- TARGET_ENTRY = Init_$(TARGET_NAME)
151
- DLLIB = $(TARGET).so
152
- EXTSTATIC =
153
- STATIC_LIB =
154
-
155
- TIMESTAMP_DIR = .
156
- BINDIR = $(bindir)
157
- RUBYCOMMONDIR = $(sitedir)$(target_prefix)
158
- RUBYLIBDIR = $(sitelibdir)$(target_prefix)
159
- RUBYARCHDIR = $(sitearchdir)$(target_prefix)
160
- HDRDIR = $(sitehdrdir)$(target_prefix)
161
- ARCHHDRDIR = $(sitearchhdrdir)$(target_prefix)
162
- TARGET_SO_DIR =
163
- TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
164
- CLEANLIBS = $(TARGET_SO) false
165
- CLEANOBJS = *.o *.bak
166
-
167
- all: $(DLLIB)
168
- static: $(STATIC_LIB)
169
- .PHONY: all install static install-so install-rb
170
- .PHONY: clean clean-so clean-static clean-rb
171
-
172
- clean-static::
173
- clean-rb-default::
174
- clean-rb::
175
- clean-so::
176
- clean: clean-so clean-static clean-rb-default clean-rb
177
- -$(Q)$(RM_RF) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
178
-
179
- distclean-rb-default::
180
- distclean-rb::
181
- distclean-so::
182
- distclean-static::
183
- distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
184
- -$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
185
- -$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
186
- -$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
187
-
188
- realclean: distclean
189
- install: install-so install-rb
190
-
191
- install-so: $(DLLIB) $(TIMESTAMP_DIR)/.sitearchdir.time
192
- $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
193
- clean-static::
194
- -$(Q)$(RM) $(STATIC_LIB)
195
- install-rb: pre-install-rb do-install-rb install-rb-default
196
- install-rb-default: pre-install-rb-default do-install-rb-default
197
- pre-install-rb: Makefile
198
- pre-install-rb-default: Makefile
199
- do-install-rb:
200
- do-install-rb-default:
201
- pre-install-rb-default:
202
- @$(NULLCMD)
203
- $(TIMESTAMP_DIR)/.sitearchdir.time:
204
- $(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
205
- $(Q) $(TOUCH) $@
206
-
207
- site-install: site-install-so site-install-rb
208
- site-install-so: install-so
209
- site-install-rb: install-rb
210
-
211
- .SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
212
-
213
- .cc.o:
214
- $(ECHO) compiling $(<)
215
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
216
-
217
- .cc.S:
218
- $(ECHO) translating $(<)
219
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
220
-
221
- .mm.o:
222
- $(ECHO) compiling $(<)
223
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
224
-
225
- .mm.S:
226
- $(ECHO) translating $(<)
227
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
228
-
229
- .cxx.o:
230
- $(ECHO) compiling $(<)
231
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
232
-
233
- .cxx.S:
234
- $(ECHO) translating $(<)
235
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
236
-
237
- .cpp.o:
238
- $(ECHO) compiling $(<)
239
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
240
-
241
- .cpp.S:
242
- $(ECHO) translating $(<)
243
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
244
-
245
- .c.o:
246
- $(ECHO) compiling $(<)
247
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
248
-
249
- .c.S:
250
- $(ECHO) translating $(<)
251
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
252
-
253
- .m.o:
254
- $(ECHO) compiling $(<)
255
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
256
-
257
- .m.S:
258
- $(ECHO) translating $(<)
259
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
260
-
261
- $(TARGET_SO): $(OBJS) Makefile
262
- $(ECHO) linking shared-object $(DLLIB)
263
- -$(Q)$(RM) $(@)
264
- $(Q) $(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
265
-
266
-
267
-
268
- $(OBJS): $(HDRS) $(ruby_headers)
data/README.md CHANGED
@@ -43,6 +43,15 @@ Install:
43
43
  gem install StrIdx
44
44
  ```
45
45
 
46
+ Or, for development version:
47
+ ```
48
+ git clone https://github.com/SamiSieranoja/stridx.git
49
+ cd stridx
50
+ cd rubyext; ruby extconf.rb ; make ; cd ..
51
+ gem build stridx.gemspec
52
+ gem install $(ls -1tr StrIdx*gem | tail -n 1)
53
+ ```
54
+
46
55
  Usage example (see test.rb):
47
56
  ```ruby
48
57
  require "stridx"
@@ -58,7 +67,7 @@ for x in lines
58
67
  end
59
68
 
60
69
  idx_time = Time.new
61
- puts "\nIndexing time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
70
+ puts "\nIndexing time (#{lines.size} files): #{(idx_time - t).round(4)} seconds"
62
71
 
63
72
  query = "rngnomadriv"
64
73
  res = idx.find(query)
data/demo.cpp CHANGED
@@ -1,3 +1,12 @@
1
+
2
+ #include <condition_variable>
3
+ #include <functional>
4
+ #include <iostream>
5
+ #include <mutex>
6
+ #include <queue>
7
+ #include <thread>
8
+ #include <algorithm>
9
+
1
10
  #include "stridx.hpp"
2
11
 
3
12
  #include <iostream>
@@ -28,7 +37,7 @@ std::vector<std::string> readLinesFromFile(const std::string &filename) {
28
37
  }
29
38
 
30
39
  int main() {
31
- StringIndex idx;
40
+ StrIdx::StringIndex idx('/'); // Separate directories using unix style "/" char
32
41
  // idx.addStrToIndex("./gdk/x11/gdkasync.c", 0 /*id*/, '/' /*separator*/);
33
42
  // idx.addStrToIndex("./gdk/x11/gdksettings.c", 1, '/');
34
43
  // idx.addStrToIndex("./gdk/x11/gdkx11devicemanager-xi2.h", 2, '/');
@@ -37,26 +46,39 @@ int main() {
37
46
  std::string fn_filePaths = "flist.txt";
38
47
  std::vector<std::string> v_filePaths = readLinesFromFile(fn_filePaths);
39
48
 
49
+ // Launch indexing to be run on background
50
+ cout << "File paths: " << v_filePaths.size() << std::endl;
51
+ cout << "Start indexing in the background" << std::endl;
40
52
  auto start = std::chrono::high_resolution_clock::now();
41
53
  int id = 0;
42
54
  for (const auto &filePath : v_filePaths) {
43
- idx.addStrToIndex(filePath, id, '/' /*dir separator*/);
44
- // idx.addStrToIndex(filePath, id, '\0' /*dir separator*/);
55
+ idx.addStrToIndexThreaded(filePath, id);
45
56
  id++;
46
57
  }
47
-
58
+
59
+ auto idx_time_launch = std::chrono::high_resolution_clock::now();
60
+ std::chrono::duration<double, std::milli> duration_launch = idx_time_launch - start;
61
+ cout << "Indexing launch time (seconds): " << duration_launch.count() / 1000 << "\n";
62
+
63
+ // Wait until indexing has finished
64
+ idx.waitUntilDone();
65
+
48
66
  auto idx_time = std::chrono::high_resolution_clock::now();
49
67
  std::chrono::duration<double, std::milli> duration = idx_time - start;
50
- cout << "Indexing creation time for " << v_filePaths.size() << " file paths (seconds): " << duration.count() / 1000 << "\n";
68
+ cout << "Indexing finished time for " << v_filePaths.size()
69
+ << " file paths (seconds): " << duration.count() / 1000 << "\n";
51
70
 
52
71
  // Find matching filepaths from the index for the query string "rngnomadriv"
53
72
  start = std::chrono::high_resolution_clock::now();
54
73
  std::string query = "rngnomadriv";
74
+ for (int i = 0; i < 99; i++) {
75
+ const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
76
+ }
77
+
55
78
  const vector<pair<float, int>> &results = idx.findSimilar(query, 2);
56
79
  auto search_time = std::chrono::high_resolution_clock::now();
57
80
  duration = search_time - start;
58
- cout << "Search time (seconds): " << duration.count() / 1000
59
- << "\n";
81
+ cout << "Search time for 100 queries (seconds): " << duration.count() / 1000 << "\n";
60
82
 
61
83
  int i = 0;
62
84
  std::cout << "query string: " << query << "\n";
@@ -73,4 +95,4 @@ int main() {
73
95
  }
74
96
 
75
97
  // Compile:
76
- // g++ -Wall -Wno-unused-variable -O3 -fopenmp -lstdc++ demo.cpp -o demo
98
+ // g++ -Wall -Wno-unused-variable -O3 -lstdc++ demo.cpp -o demo