kmp 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/README.md +78 -0
- data/benchmark.rb +60 -0
- data/benchmark.txt +70 -0
- data/ext/kmp/kmp_string.c +10 -10
- data/lib/kmp/kmp.so +0 -0
- data/lib/kmp/version.rb +1 -1
- metadata +4 -3
- data/banch_mark.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7d9ef58fc7947f5b3f2901cf4c52e7ad3e5ad99
|
4
|
+
data.tar.gz: 68909c1f9efaeef13ef684828c13b7e4c01b29aa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8cf863aaf38f13b29b0a8b1a5ddb2da28457e31d8064e2f0265614d3457ce653da7221984e58e0d424409c4a97018ffc3bbebaef4c8cd4a282a0752564c59deb
|
7
|
+
data.tar.gz: 378da39201d4cc1f38dfc6d99b249731fcfaf5ca91c3c3fe665a33627d7716dd64e8af00f0ce9fd6d5d78be45b645b2f7acf67354affdb9b3c5961f6180537b5
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
[](https://travis-ci.org/tanvir002700/kmp)
|
3
3
|
[](https://codeclimate.com/github/tanvir002700/kmp/maintainability)
|
4
4
|
[](https://coveralls.io/github/tanvir002700/kmp?branch=master)
|
5
|
+
[](https://badge.fury.io/rb/kmp)
|
6
|
+
[](https://rubygems.org/gems/kmp)
|
5
7
|
[](https://github.com/tanvir002700/kmp)
|
6
8
|
[](https://github.com/tanvir002700/kmp/blob/master/LICENSE)
|
7
9
|
|
@@ -39,6 +41,82 @@ k.length #=> 7
|
|
39
41
|
- match function complexity O(N+M), N original text string length, M match string length
|
40
42
|
- length complexity O(1)
|
41
43
|
|
44
|
+
## Benchmark
|
45
|
+
This benchmark between kmp implementation in c-extenstion vs pure ruby. [Benchmark](http://ruby-doc.org/stdlib-2.0.0/libdoc/benchmark/rdoc/Benchmark.html) module use for comparison.
|
46
|
+
|
47
|
+
```
|
48
|
+
lenght of string: 500
|
49
|
+
Rehearsal -----------------------------------------------
|
50
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000028)
|
51
|
+
ruby 0.000000 0.000000 0.000000 ( 0.001067)
|
52
|
+
-------------------------------------- total: 0.000000sec
|
53
|
+
|
54
|
+
user system total real
|
55
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000019)
|
56
|
+
ruby 0.000000 0.000000 0.000000 ( 0.000731)
|
57
|
+
|
58
|
+
lenght of string: 5000
|
59
|
+
Rehearsal -----------------------------------------------
|
60
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000074)
|
61
|
+
ruby 0.000000 0.000000 0.000000 ( 0.006990)
|
62
|
+
-------------------------------------- total: 0.000000sec
|
63
|
+
|
64
|
+
user system total real
|
65
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000059)
|
66
|
+
ruby 0.000000 0.000000 0.000000 ( 0.003058)
|
67
|
+
|
68
|
+
lenght of string: 50000
|
69
|
+
Rehearsal -----------------------------------------------
|
70
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000276)
|
71
|
+
ruby 0.030000 0.000000 0.030000 ( 0.022061)
|
72
|
+
-------------------------------------- total: 0.030000sec
|
73
|
+
|
74
|
+
user system total real
|
75
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000117)
|
76
|
+
ruby 0.010000 0.000000 0.010000 ( 0.014624)
|
77
|
+
|
78
|
+
lenght of string: 500000
|
79
|
+
Rehearsal -----------------------------------------------
|
80
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.001346)
|
81
|
+
ruby 0.140000 0.000000 0.140000 ( 0.147500)
|
82
|
+
-------------------------------------- total: 0.140000sec
|
83
|
+
|
84
|
+
user system total real
|
85
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.001188)
|
86
|
+
ruby 0.140000 0.000000 0.140000 ( 0.146642)
|
87
|
+
|
88
|
+
lenght of string: 5000000
|
89
|
+
Rehearsal -----------------------------------------------
|
90
|
+
c-extension 0.010000 0.000000 0.010000 ( 0.013171)
|
91
|
+
ruby 1.480000 0.010000 1.490000 ( 1.482893)
|
92
|
+
-------------------------------------- total: 1.500000sec
|
93
|
+
|
94
|
+
user system total real
|
95
|
+
c-extension 0.010000 0.000000 0.010000 ( 0.013098)
|
96
|
+
ruby 1.510000 0.000000 1.510000 ( 1.508361)
|
97
|
+
|
98
|
+
lenght of string: 50000000
|
99
|
+
Rehearsal -----------------------------------------------
|
100
|
+
c-extension 0.120000 0.020000 0.140000 ( 0.143160)
|
101
|
+
ruby 16.320000 0.020000 16.340000 ( 16.356332)
|
102
|
+
------------------------------------- total: 16.480000sec
|
103
|
+
|
104
|
+
user system total real
|
105
|
+
c-extension 0.120000 0.010000 0.130000 ( 0.143203)
|
106
|
+
ruby 15.130000 0.050000 15.180000 ( 15.191924)
|
107
|
+
|
108
|
+
lenght of string: 500000000
|
109
|
+
Rehearsal -----------------------------------------------
|
110
|
+
c-extension 1.190000 0.200000 1.390000 ( 1.491211)
|
111
|
+
ruby 159.510000 0.210000 159.720000 (159.723059)
|
112
|
+
------------------------------------ total: 161.110000sec
|
113
|
+
|
114
|
+
user system total real
|
115
|
+
c-extension 1.270000 0.200000 1.470000 ( 1.568321)
|
116
|
+
ruby 161.190000 0.250000 161.440000 (161.449839)
|
117
|
+
|
118
|
+
```
|
119
|
+
|
42
120
|
## Development
|
43
121
|
|
44
122
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. For compile c extension run `rake compile`.
|
data/benchmark.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "kmp"
|
5
|
+
|
6
|
+
def compute_prefix(ptrn)
|
7
|
+
m = ptrn.size
|
8
|
+
prefix = Array.new m, 0
|
9
|
+
k = -1
|
10
|
+
prefix[0] = -1
|
11
|
+
(m-1).times do |i|
|
12
|
+
i = i + 1
|
13
|
+
while k>-1 && ptrn[k+1] != ptrn[i] do
|
14
|
+
k = prefix[k];
|
15
|
+
end
|
16
|
+
if ptrn[k+1] == ptrn[i]
|
17
|
+
k = k + 1
|
18
|
+
end
|
19
|
+
prefix[i] = k
|
20
|
+
end
|
21
|
+
prefix
|
22
|
+
end
|
23
|
+
|
24
|
+
def kmp_ruby(txt, ptrn)
|
25
|
+
prefix = compute_prefix(ptrn)
|
26
|
+
n = txt.size
|
27
|
+
m = ptrn.size
|
28
|
+
pos = []
|
29
|
+
|
30
|
+
q = -1
|
31
|
+
(n).times do |i|
|
32
|
+
while(q>-1 && ptrn[q+1]!=txt[i]) do
|
33
|
+
q = prefix[q]
|
34
|
+
end
|
35
|
+
if ptrn[q+1] == txt[i]
|
36
|
+
q = q + 1
|
37
|
+
end
|
38
|
+
if q == (m-1)
|
39
|
+
pos << (i-m+1)
|
40
|
+
q = prefix[q]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return pos
|
44
|
+
end
|
45
|
+
|
46
|
+
require 'benchmark'
|
47
|
+
|
48
|
+
n = 1
|
49
|
+
7.times do |t|
|
50
|
+
n = n * 10;
|
51
|
+
txt = ''
|
52
|
+
(n).times{txt << 'xpyyubzabcabcabcabcabcabcabcabcabcabcxyzabukxpyksl'}
|
53
|
+
puts "lenght of string: #{txt.size}"
|
54
|
+
a = Kmp::String.new(txt);
|
55
|
+
Benchmark.bmbm do |x|
|
56
|
+
x.report("c-extension") { a.match 'abcabcabcabcabcabc' }
|
57
|
+
x.report("ruby") { kmp_ruby(txt, 'abcabcabcabcabcabc') }
|
58
|
+
end
|
59
|
+
puts ""
|
60
|
+
end
|
data/benchmark.txt
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
lenght of string: 500
|
2
|
+
Rehearsal -----------------------------------------------
|
3
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000028)
|
4
|
+
ruby 0.000000 0.000000 0.000000 ( 0.001067)
|
5
|
+
-------------------------------------- total: 0.000000sec
|
6
|
+
|
7
|
+
user system total real
|
8
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000019)
|
9
|
+
ruby 0.000000 0.000000 0.000000 ( 0.000731)
|
10
|
+
|
11
|
+
lenght of string: 5000
|
12
|
+
Rehearsal -----------------------------------------------
|
13
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000074)
|
14
|
+
ruby 0.000000 0.000000 0.000000 ( 0.006990)
|
15
|
+
-------------------------------------- total: 0.000000sec
|
16
|
+
|
17
|
+
user system total real
|
18
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000059)
|
19
|
+
ruby 0.000000 0.000000 0.000000 ( 0.003058)
|
20
|
+
|
21
|
+
lenght of string: 50000
|
22
|
+
Rehearsal -----------------------------------------------
|
23
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000276)
|
24
|
+
ruby 0.030000 0.000000 0.030000 ( 0.022061)
|
25
|
+
-------------------------------------- total: 0.030000sec
|
26
|
+
|
27
|
+
user system total real
|
28
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.000117)
|
29
|
+
ruby 0.010000 0.000000 0.010000 ( 0.014624)
|
30
|
+
|
31
|
+
lenght of string: 500000
|
32
|
+
Rehearsal -----------------------------------------------
|
33
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.001346)
|
34
|
+
ruby 0.140000 0.000000 0.140000 ( 0.147500)
|
35
|
+
-------------------------------------- total: 0.140000sec
|
36
|
+
|
37
|
+
user system total real
|
38
|
+
c-extension 0.000000 0.000000 0.000000 ( 0.001188)
|
39
|
+
ruby 0.140000 0.000000 0.140000 ( 0.146642)
|
40
|
+
|
41
|
+
lenght of string: 5000000
|
42
|
+
Rehearsal -----------------------------------------------
|
43
|
+
c-extension 0.010000 0.000000 0.010000 ( 0.013171)
|
44
|
+
ruby 1.480000 0.010000 1.490000 ( 1.482893)
|
45
|
+
-------------------------------------- total: 1.500000sec
|
46
|
+
|
47
|
+
user system total real
|
48
|
+
c-extension 0.010000 0.000000 0.010000 ( 0.013098)
|
49
|
+
ruby 1.510000 0.000000 1.510000 ( 1.508361)
|
50
|
+
|
51
|
+
lenght of string: 50000000
|
52
|
+
Rehearsal -----------------------------------------------
|
53
|
+
c-extension 0.120000 0.020000 0.140000 ( 0.143160)
|
54
|
+
ruby 16.320000 0.020000 16.340000 ( 16.356332)
|
55
|
+
------------------------------------- total: 16.480000sec
|
56
|
+
|
57
|
+
user system total real
|
58
|
+
c-extension 0.120000 0.010000 0.130000 ( 0.143203)
|
59
|
+
ruby 15.130000 0.050000 15.180000 ( 15.191924)
|
60
|
+
|
61
|
+
lenght of string: 500000000
|
62
|
+
Rehearsal -----------------------------------------------
|
63
|
+
c-extension 1.190000 0.200000 1.390000 ( 1.491211)
|
64
|
+
ruby 159.510000 0.210000 159.720000 (159.723059)
|
65
|
+
------------------------------------ total: 161.110000sec
|
66
|
+
|
67
|
+
user system total real
|
68
|
+
c-extension 1.270000 0.200000 1.470000 ( 1.568321)
|
69
|
+
ruby 161.190000 0.250000 161.440000 (161.449839)
|
70
|
+
|
data/ext/kmp/kmp_string.c
CHANGED
@@ -27,7 +27,7 @@ static VALUE initialize(VALUE self, VALUE rb_string)
|
|
27
27
|
Check_Type(rb_string, T_STRING);
|
28
28
|
Data_Get_Struct(self, struct Str, str);
|
29
29
|
|
30
|
-
str->ptr = calloc(RSTRING_LEN(rb_string) , sizeof(char));
|
30
|
+
str->ptr = calloc(RSTRING_LEN(rb_string) + 1 , sizeof(char));
|
31
31
|
memcpy(str->ptr, StringValuePtr(rb_string), RSTRING_LEN(rb_string));
|
32
32
|
|
33
33
|
rb_iv_set(self, "@str", rb_string);
|
@@ -36,15 +36,14 @@ static VALUE initialize(VALUE self, VALUE rb_string)
|
|
36
36
|
return self;
|
37
37
|
}
|
38
38
|
|
39
|
-
static int* compute_prefix(char *str)
|
39
|
+
static int* compute_prefix(const char *str)
|
40
40
|
{
|
41
41
|
int len = strlen(str);
|
42
|
-
|
42
|
+
int k = -1;
|
43
43
|
int * prefix;
|
44
|
-
prefix = calloc(len+1, sizeof(int));
|
45
44
|
|
45
|
+
prefix = (int *) calloc(len+1, sizeof(int));
|
46
46
|
prefix[0] = -1;
|
47
|
-
int k = -1;
|
48
47
|
|
49
48
|
for(int i=1; i<len; i++)
|
50
49
|
{
|
@@ -62,20 +61,21 @@ static VALUE match(VALUE self, VALUE rb_str)
|
|
62
61
|
char * str;
|
63
62
|
char * ptrn;
|
64
63
|
int * prefix;
|
64
|
+
int n,m,q;
|
65
65
|
|
66
66
|
Data_Get_Struct(self, struct Str, obj);
|
67
|
-
str = calloc(strlen(obj->ptr), sizeof(char));
|
67
|
+
str = calloc(strlen(obj->ptr) + 1, sizeof(char));
|
68
68
|
strcpy(str, obj->ptr);
|
69
69
|
|
70
|
-
ptrn = calloc(RSTRING_LEN(rb_str), sizeof(char));
|
70
|
+
ptrn = (char *) calloc(RSTRING_LEN(rb_str) + 1, sizeof(char));
|
71
71
|
memcpy(ptrn, StringValuePtr(rb_str), RSTRING_LEN(rb_str));
|
72
72
|
|
73
73
|
prefix = compute_prefix(ptrn);
|
74
74
|
|
75
|
-
|
76
|
-
|
75
|
+
n = strlen(str);
|
76
|
+
m = strlen(ptrn);
|
77
77
|
|
78
|
-
|
78
|
+
q = -1;
|
79
79
|
for(int i=0; i<n; i++)
|
80
80
|
{
|
81
81
|
while( q>-1 && ptrn[q+1]!=str[i] ) q = prefix[q];
|
data/lib/kmp/kmp.so
CHANGED
Binary file
|
data/lib/kmp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kmp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tanvir hasan
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-02-
|
11
|
+
date: 2018-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -108,7 +108,8 @@ files:
|
|
108
108
|
- LICENSE.txt
|
109
109
|
- README.md
|
110
110
|
- Rakefile
|
111
|
-
-
|
111
|
+
- benchmark.rb
|
112
|
+
- benchmark.txt
|
112
113
|
- bin/console
|
113
114
|
- bin/setup
|
114
115
|
- ext/kmp/extconf.rb
|
data/banch_mark.rb
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require "bundler/setup"
|
4
|
-
require "kmp"
|
5
|
-
|
6
|
-
#(0...50).map { ('a'..'z').to_a[rand(26)] }.join
|
7
|
-
txt = ''
|
8
|
-
10000.times{txt << 'abc'}
|
9
|
-
|
10
|
-
t1 = Time.now
|
11
|
-
a = Kmp::String.new txt
|
12
|
-
b = a.match 'abcabcabcabcabcabc'
|
13
|
-
puts Time.now - t1
|
14
|
-
puts b.length
|
15
|
-
|
16
|
-
t1 = Time.now
|
17
|
-
b = txt.enum_for(:scan, /(?=abcabcabcabcabcabc)/).map { Regexp.last_match.offset(0).first }
|
18
|
-
puts Time.now - t1
|
19
|
-
puts b.size
|