StrIdx 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -36,12 +36,12 @@ VALUE StringIndexAddSegments(VALUE self, VALUE str, VALUE fileId) {
36
36
 
37
37
  void *data;
38
38
  TypedData_Get_Struct(self, int, &str_idx_type, data);
39
- ((StringIndex *)data)->addStrToIndex(s1, fid, '/');
39
+ ((StringIndex *)data)->addStrToIndex(s1, fid);
40
40
 
41
41
  return self;
42
42
  }
43
43
 
44
- VALUE StringIndexFind(VALUE self, VALUE str, VALUE minChars) {
44
+ VALUE StringIndexFind(VALUE self, VALUE str) {
45
45
  VALUE ret;
46
46
  std::string s1 = StringValueCStr(str);
47
47
 
@@ -50,7 +50,7 @@ VALUE StringIndexFind(VALUE self, VALUE str, VALUE minChars) {
50
50
  StringIndex *idx = (StringIndex *)data;
51
51
 
52
52
  ret = rb_ary_new();
53
- const std::vector<std::pair<float, int>> &results = idx->findSimilar(s1, NUM2INT(minChars));
53
+ const std::vector<std::pair<float, int>> &results = idx->findSimilar(s1, 2);
54
54
  int limit = 15;
55
55
  int i = 0;
56
56
  for (const auto &res : results) {
@@ -66,14 +66,35 @@ VALUE StringIndexFind(VALUE self, VALUE str, VALUE minChars) {
66
66
  return ret;
67
67
  }
68
68
 
69
+ VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
70
+ char c = '/';
71
+ if (TYPE(str) == T_STRING) {
72
+ std::string s = StringValueCStr(str);
73
+ if (s.size() >= 1) {
74
+ c = s[0];
75
+ }
76
+ } else {
77
+ c = '\0'; // No separator
78
+ // if (TYPE(obj) == T_NIL)
79
+ }
80
+
81
+ void *data;
82
+ TypedData_Get_Struct(self, int, &str_idx_type, data);
83
+ StringIndex *idx = (StringIndex *)data;
84
+ idx->setDirSeparator(c);
85
+
86
+ return self;
87
+ }
88
+
69
89
  void Init_stridx(void) {
70
90
 
71
- VALUE cFoo = rb_define_class("CppStringIndex", rb_cObject);
91
+ VALUE mStrIdx = rb_define_module("StrIdx");
92
+ VALUE classStringIndex = rb_define_class_under(mStrIdx, "StringIndex", rb_cObject);
72
93
 
73
- rb_define_alloc_func(cFoo, str_idx_alloc);
74
- rb_define_method(cFoo, "add", StringIndexAddSegments, 2);
75
- rb_define_method(cFoo, "find", StringIndexFind, 2);
94
+ rb_define_alloc_func(classStringIndex, str_idx_alloc);
95
+ rb_define_method(classStringIndex, "add", StringIndexAddSegments, 2);
96
+ rb_define_method(classStringIndex, "find", StringIndexFind, 1);
97
+ rb_define_method(classStringIndex, "setDirSeparator", StringIndexSetDirSeparator, 1);
76
98
  }
77
99
 
78
100
  } // End extern "C"
79
-
data/stridx.hpp CHANGED
@@ -214,7 +214,7 @@ public:
214
214
  }
215
215
 
216
216
  /**
217
- * Add a string to the index to be search for afterwards
217
+ * Add a string to the index to be searched for afterwards
218
218
  *
219
219
  * @param filePath String to index (e.g. /home/user/Project/main.cpp).
220
220
  * @param fileId Unique identifier for filePath. Will be return as result from findSimilar.
@@ -272,7 +272,7 @@ public:
272
272
  }
273
273
 
274
274
  /**
275
- * The search will find filepaths similar to the input string
275
+ The search will find filepaths similar to the input string
276
276
 
277
277
  To be considered a candidate path, the file component of the path (e.g. file.txt)
278
278
  is required to have at least a substring of two characters in common with the
@@ -286,8 +286,8 @@ public:
286
286
  is also included in the PathSegment
287
287
  - take the lenght of that substring as score
288
288
  sum up the scores for each character c and divide by (string length)^2
289
-
290
- For example, if query = "rngnomadriv"
289
+
290
+ For example, if query = "rngnomadriv"
291
291
  and candidate is "./drivers/char/hw_random/nomadik-rng.c", then scores are calculated
292
292
  as follows:
293
293
  rngnomadriv
@@ -296,13 +296,17 @@ public:
296
296
  score1=(3+3+3+5+5+5+5+5+(4+4+4)*0.7)
297
297
 
298
298
  In final score, give a small penalty for larger candidate filenames:
299
- Divide main part of score with (query string length)^2
299
+ Divide main part of score with (query string length)^2
300
300
  and minor part by (query string length)*(candidate string length)
301
301
  score = score1/(11*11)*0.97 + score1/(11*38)*0.03 = 0.342944
302
302
 
303
303
  @param query String to search for inside the index
304
304
  */
305
305
 
306
+ std::vector<std::pair<float, int>> findSimilar(std::string query) {
307
+ return findSimilar(query, 2);
308
+ }
309
+
306
310
  std::vector<std::pair<float, int>> findSimilar(std::string query, int minChars) {
307
311
  CandMap fileCandMap;
308
312
  CandMap dirCandMap;
@@ -1,34 +1,31 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- $:.unshift File.dirname(__FILE__)
4
3
  require "stridx"
5
- idx = CppStringIndex.new
4
+ idx = StrIdx::StringIndex.new
6
5
 
7
6
  t = Time.new
8
- fn = File.expand_path("../flist.txt")
7
+ fn = File.expand_path("flist.txt")
9
8
  lines = IO.read(fn).lines.collect { |x| x.strip }
10
- i = 1
9
+ i = 0
11
10
  for x in lines
12
11
  idx.add(x, i)
13
12
  i += 1
14
13
  end
15
14
 
16
15
  idx_time = Time.new
17
- puts "\nIndexing time: #{idx_time - t}"
18
- query = "helbind.h"
19
- res = idx.find(query, 2)
16
+ puts "\nIndexing time (#{lines.size} files}): #{(idx_time - t).round(4)} seconds"
17
+
18
+ query = "rngnomadriv"
19
+ res = idx.find(query)
20
20
  puts "query: #{query}"
21
21
  puts "\nResults:"
22
22
  puts "Filename, score"
23
23
  puts "==============="
24
- for x in res
25
- fn = lines[x[0] - 1]
26
- score = x[1]
24
+ for id, score in res
25
+ fn = lines[id]
27
26
  puts "#{fn}, #{score.round(4)}"
28
- # pp [lines[x[0] - 1], x[1]]
29
27
  end
30
28
 
31
-
32
29
  query_time = Time.new
33
30
 
34
- puts "\nSearch time: #{query_time - idx_time}"
31
+ puts "\nSearch time: #{(query_time - idx_time).round(4)} seconds"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: StrIdx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sami Sieranoja
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-05-05 00:00:00.000000000 Z
11
+ date: 2024-05-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -50,16 +50,19 @@ files:
50
50
  - Makefile
51
51
  - README.md
52
52
  - demo.cpp
53
+ - flist.txt
53
54
  - rubyext/extconf.rb
54
55
  - rubyext/ruby_interf.cpp
55
- - rubyext/test.rb
56
56
  - stridx.hpp
57
+ - test.rb
57
58
  - unordered_dense.h
58
59
  homepage: https://github.com/SamiSieranoja/stridx
59
60
  licenses:
60
61
  - LGPL-2.0+
61
- metadata: {}
62
- post_install_message:
62
+ metadata:
63
+ source_code_uri: https://github.com/SamiSieranoja/stridx
64
+ homepage_uri: https://github.com/SamiSieranoja/stridx
65
+ post_install_message:
63
66
  rdoc_options: []
64
67
  require_paths:
65
68
  - lib
@@ -76,7 +79,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
76
79
  version: '0'
77
80
  requirements: []
78
81
  rubygems_version: 3.3.26
79
- signing_key:
82
+ signing_key:
80
83
  specification_version: 4
81
84
  summary: StrIdx
82
85
  test_files: []