levenshtein_str 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -1
- data/README.md +19 -4
- data/benchmark +107 -38
- data/lib/levenshtein_str/version.rb +1 -1
- metadata +15 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a54be85457479ff4e2a4d0550dc9c1405a1e7068ed917e2f9075bc3c1021b81d
|
4
|
+
data.tar.gz: 3a149377f5b8c1c464c01735cba4298543200b881364f689cae954cd81082389
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01daf8720df28549a468f1ee916e6fc62f25da97b399c86f5ff9e9dea5e9fbcecaad5933b63a7c6e418f8516a5cb69080ee72b121f1e30e3850588ee47ca1bfa
|
7
|
+
data.tar.gz: 7356d14517290915ada81c7c6f6ae3fd220e469f4cd533b913f77c83df35fa13fb002b3a20ed4add2c851389dacb455adb37b0e92d206db21450687f88b4cd74
|
data/Gemfile.lock
CHANGED
@@ -7,12 +7,16 @@ GEM
|
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
benchmark-ips (2.8.2)
|
10
|
+
benchmark-memory (0.1.2)
|
11
|
+
memory_profiler (~> 0.9)
|
12
|
+
memory_profiler (0.9.14)
|
10
13
|
|
11
14
|
PLATFORMS
|
12
15
|
ruby
|
13
16
|
|
14
17
|
DEPENDENCIES
|
15
|
-
benchmark-ips
|
18
|
+
benchmark-ips (~> 2.7)
|
19
|
+
benchmark-memory (~> 0.1.2)
|
16
20
|
levenshtein_str!
|
17
21
|
|
18
22
|
BUNDLED WITH
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# String#levenshtein
|
2
2
|
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/levenshtein_str.svg)](https://badge.fury.io/rb/levenshtein_str)
|
4
|
+
|
3
5
|
A performant Ruby gem for getting the [levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) between 2 strings, with the leg work done in Crystal.
|
4
6
|
|
5
7
|
## Installation:
|
@@ -33,16 +35,29 @@ then
|
|
33
35
|
|
34
36
|
## Benchmarks
|
35
37
|
|
36
|
-
See benchmark
|
38
|
+
A pure Ruby implementation vs this crystal gem. See `./benchmark` file for full details. Run them with
|
39
|
+
|
40
|
+
```
|
41
|
+
./benchmark
|
42
|
+
```
|
43
|
+
Note it takes a while to run the full set, especially the memory ones.
|
37
44
|
|
38
|
-
|
45
|
+
### Iterations Per Second
|
39
46
|
|
40
|
-
- "" and "" Same-ish,
|
47
|
+
- "" and "" Same-ish, often Ruby ~1.08x quicker (no type conversion overheads)
|
41
48
|
- "abd" and "abc" Crystal ~2x quicker
|
42
|
-
- "abc" and "abcde" Crystal ~2x quicker
|
43
49
|
- "abcdefghi" and "0123456789" Crystal ~2.25x quicker
|
44
50
|
- [whole alphabet] vs "012345" Crystal ~2.37x quicker
|
45
51
|
|
52
|
+
### Memory
|
53
|
+
|
54
|
+
- "" and "" Same
|
55
|
+
- "abd" and "abc" Ruby 25.2x more
|
56
|
+
- "abcdefghi" and "0123456789" Ruby 141.5x more
|
57
|
+
- [whole alphabet] vs "012345" Ruby 212x more
|
58
|
+
|
59
|
+
See `./benchmark` file for full detailed results.
|
60
|
+
|
46
61
|
## Template
|
47
62
|
|
48
63
|
Based on [this](https://github.com/johansenja/crystal_gem_template) template for writing Ruby gems in Crystal.
|
data/benchmark
CHANGED
@@ -36,13 +36,18 @@ class String
|
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
-
def
|
39
|
+
def setup
|
40
40
|
load "Rakefile"
|
41
41
|
|
42
42
|
Rake::Task["compile"].invoke
|
43
43
|
|
44
44
|
require "benchmark/ips"
|
45
|
+
require "benchmark/memory"
|
45
46
|
require "levenshtein_str"
|
47
|
+
end
|
48
|
+
|
49
|
+
def bm_ips
|
50
|
+
puts "\n\nBENCHMARKING IPS (iterations per second)\n\n\n"
|
46
51
|
|
47
52
|
# blank strings
|
48
53
|
Benchmark.ips do |bmark|
|
@@ -58,22 +63,43 @@ def bm
|
|
58
63
|
bmark.compare!
|
59
64
|
end
|
60
65
|
|
61
|
-
#
|
66
|
+
# medium diff
|
62
67
|
Benchmark.ips do |bmark|
|
63
|
-
bmark.report("
|
64
|
-
bmark.report("
|
68
|
+
bmark.report("abcdefghi vs 0123456789 (Ruby)") { 100_000.times { "abcdefghi".levenshtein_rb("0123456789") } }
|
69
|
+
bmark.report("abcdefghi vs 0123456789 (Crystal)") { 100_000.times { "abcdefghi".levenshtein("0123456789") } }
|
65
70
|
bmark.compare!
|
66
71
|
end
|
67
72
|
|
68
|
-
#
|
73
|
+
# long diff
|
69
74
|
Benchmark.ips do |bmark|
|
75
|
+
bmark.report("[whole alphabet] vs 012345 (Ruby)") { 100_000.times { "abcdefghijklmnopqrstuvwxyz".levenshtein_rb("012345") } }
|
76
|
+
bmark.report("[whole alphabet] vs 012345 (Crystal)") { 100_000.times { "abcdefghijklmnopqrstuvwxyz".levenshtein("012345") } }
|
77
|
+
bmark.compare!
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def bm_memory
|
82
|
+
puts "\n\nBENCHMARKING MEMORY USAGE\n\n\n"
|
83
|
+
|
84
|
+
Benchmark.memory do |bmark|
|
85
|
+
bmark.report("Blank strings (Ruby)") { 100_000.times { "".levenshtein_rb("") } }
|
86
|
+
bmark.report("Blank string (Crystal)") { 100_000.times { "".levenshtein("") } }
|
87
|
+
bmark.compare!
|
88
|
+
end
|
89
|
+
|
90
|
+
Benchmark.memory do |bmark|
|
91
|
+
bmark.report("abd vs abc (Ruby)") { 100_000.times { "abd".levenshtein_rb("abc") } }
|
92
|
+
bmark.report("abd vs abc (Crystal)") { 100_000.times { "abd".levenshtein("abc") } }
|
93
|
+
bmark.compare!
|
94
|
+
end
|
95
|
+
|
96
|
+
Benchmark.memory do |bmark|
|
70
97
|
bmark.report("abcdefghi vs 0123456789 (Ruby)") { 100_000.times { "abcdefghi".levenshtein_rb("0123456789") } }
|
71
98
|
bmark.report("abcdefghi vs 0123456789 (Crystal)") { 100_000.times { "abcdefghi".levenshtein("0123456789") } }
|
72
99
|
bmark.compare!
|
73
100
|
end
|
74
101
|
|
75
|
-
|
76
|
-
Benchmark.ips do |bmark|
|
102
|
+
Benchmark.memory do |bmark|
|
77
103
|
bmark.report("[whole alphabet] vs 012345 (Ruby)") { 100_000.times { "abcdefghijklmnopqrstuvwxyz".levenshtein_rb("012345") } }
|
78
104
|
bmark.report("[whole alphabet] vs 012345 (Crystal)") { 100_000.times { "abcdefghijklmnopqrstuvwxyz".levenshtein("012345") } }
|
79
105
|
bmark.compare!
|
@@ -81,49 +107,40 @@ def bm
|
|
81
107
|
end
|
82
108
|
|
83
109
|
begin
|
84
|
-
|
110
|
+
setup
|
111
|
+
bm_ips
|
112
|
+
bm_memory
|
85
113
|
ensure
|
86
114
|
Rake::Task["clean"].invoke
|
87
115
|
end
|
88
116
|
|
89
117
|
# RESULTS
|
90
118
|
|
119
|
+
# BENCHMARKING IPS (iterations per second)
|
120
|
+
|
91
121
|
# Warming up --------------------------------------
|
92
|
-
# Blank strings (Ruby)
|
122
|
+
# Blank strings (Ruby) 4.000 i/100ms
|
93
123
|
# Blank string (Crystal)
|
94
|
-
#
|
124
|
+
# 5.000 i/100ms
|
95
125
|
# Calculating -------------------------------------
|
96
|
-
# Blank strings (Ruby)
|
126
|
+
# Blank strings (Ruby) 55.396 (± 1.8%) i/s - 280.000 in 5.056752s
|
97
127
|
# Blank string (Crystal)
|
98
|
-
#
|
128
|
+
# 51.092 (± 2.0%) i/s - 260.000 in 5.090523s
|
99
129
|
|
100
130
|
# Comparison:
|
101
|
-
# Blank strings (Ruby):
|
102
|
-
# Blank string (Crystal):
|
131
|
+
# Blank strings (Ruby): 55.4 i/s
|
132
|
+
# Blank string (Crystal): 51.1 i/s - 1.08x (± 0.00) slower
|
103
133
|
|
104
134
|
# Warming up --------------------------------------
|
105
135
|
# abd vs abc (Ruby) 1.000 i/100ms
|
106
136
|
# abd vs abc (Crystal) 1.000 i/100ms
|
107
137
|
# Calculating -------------------------------------
|
108
|
-
# abd vs abc (Ruby) 1.
|
109
|
-
# abd vs abc (Crystal) 2.
|
110
|
-
|
111
|
-
# Comparison:
|
112
|
-
# abd vs abc (Crystal): 2.7 i/s
|
113
|
-
# abd vs abc (Ruby): 1.4 i/s - 2.01x (± 0.00) slower
|
114
|
-
|
115
|
-
# Warming up --------------------------------------
|
116
|
-
# abc vs abcde (Ruby) 1.000 i/100ms
|
117
|
-
# abc vs abcde (Crystal)
|
118
|
-
# 1.000 i/100ms
|
119
|
-
# Calculating -------------------------------------
|
120
|
-
# abc vs abcde (Ruby) 0.901 (± 0.0%) i/s - 5.000 in 5.547298s
|
121
|
-
# abc vs abcde (Crystal)
|
122
|
-
# 1.792 (± 0.0%) i/s - 9.000 in 5.023881s
|
138
|
+
# abd vs abc (Ruby) 1.510 (± 0.0%) i/s - 8.000 in 5.298484s
|
139
|
+
# abd vs abc (Crystal) 2.951 (± 0.0%) i/s - 15.000 in 5.084644s
|
123
140
|
|
124
141
|
# Comparison:
|
125
|
-
#
|
126
|
-
#
|
142
|
+
# abd vs abc (Crystal): 3.0 i/s
|
143
|
+
# abd vs abc (Ruby): 1.5 i/s - 1.95x (± 0.00) slower
|
127
144
|
|
128
145
|
# Warming up --------------------------------------
|
129
146
|
# abcdefghi vs 0123456789 (Ruby)
|
@@ -132,13 +149,13 @@ end
|
|
132
149
|
# 1.000 i/100ms
|
133
150
|
# Calculating -------------------------------------
|
134
151
|
# abcdefghi vs 0123456789 (Ruby)
|
135
|
-
# 0.
|
152
|
+
# 0.218 (± 0.0%) i/s - 2.000 in 9.178957s
|
136
153
|
# abcdefghi vs 0123456789 (Crystal)
|
137
|
-
# 0.
|
154
|
+
# 0.475 (± 0.0%) i/s - 3.000 in 6.312027s
|
138
155
|
|
139
156
|
# Comparison:
|
140
|
-
# abcdefghi vs 0123456789 (Crystal): 0.
|
141
|
-
# abcdefghi vs 0123456789 (Ruby): 0.2 i/s - 2.
|
157
|
+
# abcdefghi vs 0123456789 (Crystal): 0.5 i/s
|
158
|
+
# abcdefghi vs 0123456789 (Ruby): 0.2 i/s - 2.18x (± 0.00) slower
|
142
159
|
|
143
160
|
# Warming up --------------------------------------
|
144
161
|
# [whole alphabet] vs 012345 (Ruby)
|
@@ -147,10 +164,62 @@ end
|
|
147
164
|
# 1.000 i/100ms
|
148
165
|
# Calculating -------------------------------------
|
149
166
|
# [whole alphabet] vs 012345 (Ruby)
|
150
|
-
# 0.
|
167
|
+
# 0.136 (± 0.0%) i/s - 1.000 in 7.358898s
|
151
168
|
# [whole alphabet] vs 012345 (Crystal)
|
152
|
-
# 0.
|
169
|
+
# 0.316 (± 0.0%) i/s - 2.000 in 6.324441s
|
153
170
|
|
154
171
|
# Comparison:
|
155
172
|
# [whole alphabet] vs 012345 (Crystal): 0.3 i/s
|
156
|
-
# [whole alphabet] vs 012345 (Ruby): 0.1 i/s - 2.
|
173
|
+
# [whole alphabet] vs 012345 (Ruby): 0.1 i/s - 2.33x (± 0.00) slower
|
174
|
+
|
175
|
+
# BENCHMARKING MEMORY USAGE
|
176
|
+
|
177
|
+
# Calculating -------------------------------------
|
178
|
+
# Blank strings (Ruby) 8.000M memsize ( 0.000 retained)
|
179
|
+
# 200.000k objects ( 0.000 retained)
|
180
|
+
# 1.000 strings ( 0.000 retained)
|
181
|
+
# Blank string (Crystal)
|
182
|
+
# 8.000M memsize ( 0.000 retained)
|
183
|
+
# 200.000k objects ( 0.000 retained)
|
184
|
+
# 1.000 strings ( 0.000 retained)
|
185
|
+
|
186
|
+
# Comparison:
|
187
|
+
# Blank strings (Ruby): 8000000 allocated
|
188
|
+
# Blank string (Crystal): 8000000 allocated - same
|
189
|
+
# Calculating -------------------------------------
|
190
|
+
# abd vs abc (Ruby) 201.600M memsize ( 0.000 retained)
|
191
|
+
# 4.000M objects ( 0.000 retained)
|
192
|
+
# 6.000 strings ( 0.000 retained)
|
193
|
+
# abd vs abc (Crystal) 8.000M memsize ( 0.000 retained)
|
194
|
+
# 200.000k objects ( 0.000 retained)
|
195
|
+
# 2.000 strings ( 0.000 retained)
|
196
|
+
|
197
|
+
# Comparison:
|
198
|
+
# abd vs abc (Crystal): 8000000 allocated
|
199
|
+
# abd vs abc (Ruby): 201600000 allocated - 25.20x more
|
200
|
+
# Calculating -------------------------------------
|
201
|
+
# abcdefghi vs 0123456789 (Ruby)
|
202
|
+
# 1.132B memsize ( 0.000 retained)
|
203
|
+
# 23.700M objects ( 0.000 retained)
|
204
|
+
# 21.000 strings ( 0.000 retained)
|
205
|
+
# abcdefghi vs 0123456789 (Crystal)
|
206
|
+
# 8.000M memsize ( 0.000 retained)
|
207
|
+
# 200.000k objects ( 0.000 retained)
|
208
|
+
# 2.000 strings ( 0.000 retained)
|
209
|
+
|
210
|
+
# Comparison:
|
211
|
+
# abcdefghi vs 0123456789 (Crystal): 8000000 allocated
|
212
|
+
# abcdefghi vs 0123456789 (Ruby): 1132000000 allocated - 141.50x more
|
213
|
+
# Calculating -------------------------------------
|
214
|
+
# [whole alphabet] vs 012345 (Ruby)
|
215
|
+
# 1.696B memsize ( 0.000 retained)
|
216
|
+
# 34.900M objects ( 0.000 retained)
|
217
|
+
# 34.000 strings ( 0.000 retained)
|
218
|
+
# [whole alphabet] vs 012345 (Crystal)
|
219
|
+
# 8.000M memsize ( 0.000 retained)
|
220
|
+
# 200.000k objects ( 0.000 retained)
|
221
|
+
# 2.000 strings ( 0.000 retained)
|
222
|
+
|
223
|
+
# Comparison:
|
224
|
+
# [whole alphabet] vs 012345 (Crystal): 8000000 allocated
|
225
|
+
# [whole alphabet] vs 012345 (Ruby): 1696000000 allocated - 212.00x more
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: levenshtein_str
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- johansenja
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: benchmark-memory
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.1.2
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.1.2
|
27
41
|
description: Use String#levenshtein(other_str) to get the levenshtein distance between
|
28
42
|
2 strings. Useful for measuring approximate string similarity, and fuzzy matching.
|
29
43
|
email:
|