StrIdx 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Makefile +6 -265
- data/flist.txt +89828 -0
- data/rubyext/ruby_interf.cpp +22 -1
- data/stridx.hpp +9 -5
- metadata +9 -6
data/rubyext/ruby_interf.cpp
CHANGED
@@ -36,7 +36,7 @@ VALUE StringIndexAddSegments(VALUE self, VALUE str, VALUE fileId) {
|
|
36
36
|
|
37
37
|
void *data;
|
38
38
|
TypedData_Get_Struct(self, int, &str_idx_type, data);
|
39
|
-
((StringIndex *)data)->addStrToIndex(s1, fid
|
39
|
+
((StringIndex *)data)->addStrToIndex(s1, fid);
|
40
40
|
|
41
41
|
return self;
|
42
42
|
}
|
@@ -66,6 +66,26 @@ VALUE StringIndexFind(VALUE self, VALUE str) {
|
|
66
66
|
return ret;
|
67
67
|
}
|
68
68
|
|
69
|
+
VALUE StringIndexSetDirSeparator(VALUE self, VALUE str) {
|
70
|
+
char c = '/';
|
71
|
+
if (TYPE(str) == T_STRING) {
|
72
|
+
std::string s = StringValueCStr(str);
|
73
|
+
if (s.size() >= 1) {
|
74
|
+
c = s[0];
|
75
|
+
}
|
76
|
+
} else {
|
77
|
+
c = '\0'; // No separator
|
78
|
+
// if (TYPE(obj) == T_NIL)
|
79
|
+
}
|
80
|
+
|
81
|
+
void *data;
|
82
|
+
TypedData_Get_Struct(self, int, &str_idx_type, data);
|
83
|
+
StringIndex *idx = (StringIndex *)data;
|
84
|
+
idx->setDirSeparator(c);
|
85
|
+
|
86
|
+
return self;
|
87
|
+
}
|
88
|
+
|
69
89
|
void Init_stridx(void) {
|
70
90
|
|
71
91
|
VALUE mStrIdx = rb_define_module("StrIdx");
|
@@ -74,6 +94,7 @@ void Init_stridx(void) {
|
|
74
94
|
rb_define_alloc_func(classStringIndex, str_idx_alloc);
|
75
95
|
rb_define_method(classStringIndex, "add", StringIndexAddSegments, 2);
|
76
96
|
rb_define_method(classStringIndex, "find", StringIndexFind, 1);
|
97
|
+
rb_define_method(classStringIndex, "setDirSeparator", StringIndexSetDirSeparator, 1);
|
77
98
|
}
|
78
99
|
|
79
100
|
} // End extern "C"
|
data/stridx.hpp
CHANGED
@@ -214,7 +214,7 @@ public:
|
|
214
214
|
}
|
215
215
|
|
216
216
|
/**
|
217
|
-
* Add a string to the index to be
|
217
|
+
* Add a string to the index to be searched for afterwards
|
218
218
|
*
|
219
219
|
* @param filePath String to index (e.g. /home/user/Project/main.cpp).
|
220
220
|
* @param fileId Unique identifier for filePath. Will be return as result from findSimilar.
|
@@ -272,7 +272,7 @@ public:
|
|
272
272
|
}
|
273
273
|
|
274
274
|
/**
|
275
|
-
|
275
|
+
The search will find filepaths similar to the input string
|
276
276
|
|
277
277
|
To be considered a candidate path, the file component of the path (e.g. file.txt)
|
278
278
|
is required to have at least a substring of two characters in common with the
|
@@ -286,8 +286,8 @@ public:
|
|
286
286
|
is also included in the PathSegment
|
287
287
|
- take the lenght of that substring as score
|
288
288
|
sum up the scores for each character c and divide by (string length)^2
|
289
|
-
|
290
|
-
For example, if query = "rngnomadriv"
|
289
|
+
|
290
|
+
For example, if query = "rngnomadriv"
|
291
291
|
and candidate is "./drivers/char/hw_random/nomadik-rng.c", then scores are calculated
|
292
292
|
as follows:
|
293
293
|
rngnomadriv
|
@@ -296,13 +296,17 @@ public:
|
|
296
296
|
score1=(3+3+3+5+5+5+5+5+(4+4+4)*0.7)
|
297
297
|
|
298
298
|
In final score, give a small penalty for larger candidate filenames:
|
299
|
-
Divide main part of score with (query string length)^2
|
299
|
+
Divide main part of score with (query string length)^2
|
300
300
|
and minor part by (query string length)*(candidate string length)
|
301
301
|
score = score1/(11*11)*0.97 + score1/(11*38)*0.03 = 0.342944
|
302
302
|
|
303
303
|
@param query String to search for inside the index
|
304
304
|
*/
|
305
305
|
|
306
|
+
std::vector<std::pair<float, int>> findSimilar(std::string query) {
|
307
|
+
return findSimilar(query, 2);
|
308
|
+
}
|
309
|
+
|
306
310
|
std::vector<std::pair<float, int>> findSimilar(std::string query, int minChars) {
|
307
311
|
CandMap fileCandMap;
|
308
312
|
CandMap dirCandMap;
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: StrIdx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sami Sieranoja
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-05-
|
11
|
+
date: 2024-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -50,6 +50,7 @@ files:
|
|
50
50
|
- Makefile
|
51
51
|
- README.md
|
52
52
|
- demo.cpp
|
53
|
+
- flist.txt
|
53
54
|
- rubyext/extconf.rb
|
54
55
|
- rubyext/ruby_interf.cpp
|
55
56
|
- stridx.hpp
|
@@ -58,8 +59,10 @@ files:
|
|
58
59
|
homepage: https://github.com/SamiSieranoja/stridx
|
59
60
|
licenses:
|
60
61
|
- LGPL-2.0+
|
61
|
-
metadata:
|
62
|
-
|
62
|
+
metadata:
|
63
|
+
source_code_uri: https://github.com/SamiSieranoja/stridx
|
64
|
+
homepage_uri: https://github.com/SamiSieranoja/stridx
|
65
|
+
post_install_message:
|
63
66
|
rdoc_options: []
|
64
67
|
require_paths:
|
65
68
|
- lib
|
@@ -76,7 +79,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
79
|
version: '0'
|
77
80
|
requirements: []
|
78
81
|
rubygems_version: 3.3.26
|
79
|
-
signing_key:
|
82
|
+
signing_key:
|
80
83
|
specification_version: 4
|
81
84
|
summary: StrIdx
|
82
85
|
test_files: []
|