levenshtein_str 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -1
- data/README.md +19 -4
- data/benchmark +107 -38
- data/lib/levenshtein_str/version.rb +1 -1
- metadata +15 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a54be85457479ff4e2a4d0550dc9c1405a1e7068ed917e2f9075bc3c1021b81d
|
4
|
+
data.tar.gz: 3a149377f5b8c1c464c01735cba4298543200b881364f689cae954cd81082389
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01daf8720df28549a468f1ee916e6fc62f25da97b399c86f5ff9e9dea5e9fbcecaad5933b63a7c6e418f8516a5cb69080ee72b121f1e30e3850588ee47ca1bfa
|
7
|
+
data.tar.gz: 7356d14517290915ada81c7c6f6ae3fd220e469f4cd533b913f77c83df35fa13fb002b3a20ed4add2c851389dacb455adb37b0e92d206db21450687f88b4cd74
|
data/Gemfile.lock
CHANGED
@@ -7,12 +7,16 @@ GEM
|
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
9
|
benchmark-ips (2.8.2)
|
10
|
+
benchmark-memory (0.1.2)
|
11
|
+
memory_profiler (~> 0.9)
|
12
|
+
memory_profiler (0.9.14)
|
10
13
|
|
11
14
|
PLATFORMS
|
12
15
|
ruby
|
13
16
|
|
14
17
|
DEPENDENCIES
|
15
|
-
benchmark-ips
|
18
|
+
benchmark-ips (~> 2.7)
|
19
|
+
benchmark-memory (~> 0.1.2)
|
16
20
|
levenshtein_str!
|
17
21
|
|
18
22
|
BUNDLED WITH
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# String#levenshtein
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/levenshtein_str)
|
4
|
+
|
3
5
|
A performant Ruby gem for getting the [levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) between 2 strings, with the leg work done in Crystal.
|
4
6
|
|
5
7
|
## Installation:
|
@@ -33,16 +35,29 @@ then
|
|
33
35
|
|
34
36
|
## Benchmarks
|
35
37
|
|
36
|
-
See benchmark
|
38
|
+
A pure Ruby implementation vs this crystal gem. See `./benchmark` file for full details. Run them with
|
39
|
+
|
40
|
+
```
|
41
|
+
./benchmark
|
42
|
+
```
|
43
|
+
Note it takes a while to run the full set, especially the memory ones.
|
37
44
|
|
38
|
-
|
45
|
+
### Iterations Per Second
|
39
46
|
|
40
|
-
- "" and "" Same-ish,
|
47
|
+
- "" and "" Same-ish, often Ruby ~1.08x quicker (no type conversion overheads)
|
41
48
|
- "abd" and "abc" Crystal ~2x quicker
|
42
|
-
- "abc" and "abcde" Crystal ~2x quicker
|
43
49
|
- "abcdefghi" and "0123456789" Crystal ~2.25x quicker
|
44
50
|
- [whole alphabet] vs "012345" Crystal ~2.37x quicker
|
45
51
|
|
52
|
+
### Memory
|
53
|
+
|
54
|
+
- "" and "" Same
|
55
|
+
- "abd" and "abc" Ruby 25.2x more
|
56
|
+
- "abcdefghi" and "0123456789" Ruby 141.5x more
|
57
|
+
- [whole alphabet] vs "012345" Ruby 212x more
|
58
|
+
|
59
|
+
See `./benchmark` file for full detailed results.
|
60
|
+
|
46
61
|
## Template
|
47
62
|
|
48
63
|
Based on [this](https://github.com/johansenja/crystal_gem_template) template for writing Ruby gems in Crystal.
|
data/benchmark
CHANGED
@@ -36,13 +36,18 @@ class String
|
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
-
def
|
39
|
+
def setup
|
40
40
|
load "Rakefile"
|
41
41
|
|
42
42
|
Rake::Task["compile"].invoke
|
43
43
|
|
44
44
|
require "benchmark/ips"
|
45
|
+
require "benchmark/memory"
|
45
46
|
require "levenshtein_str"
|
47
|
+
end
|
48
|
+
|
49
|
+
def bm_ips
|
50
|
+
puts "\n\nBENCHMARKING IPS (iterations per second)\n\n\n"
|
46
51
|
|
47
52
|
# blank strings
|
48
53
|
Benchmark.ips do |bmark|
|
@@ -58,22 +63,43 @@ def bm
|
|
58
63
|
bmark.compare!
|
59
64
|
end
|
60
65
|
|
61
|
-
#
|
66
|
+
# medium diff
|
62
67
|
Benchmark.ips do |bmark|
|
63
|
-
bmark.report("
|
64
|
-
bmark.report("
|
68
|
+
bmark.report("abcdefghi vs 0123456789 (Ruby)") { 100_000.times { "abcdefghi".levenshtein_rb("0123456789") } }
|
69
|
+
bmark.report("abcdefghi vs 0123456789 (Crystal)") { 100_000.times { "abcdefghi".levenshtein("0123456789") } }
|
65
70
|
bmark.compare!
|
66
71
|
end
|
67
72
|
|
68
|
-
#
|
73
|
+
# long diff
|
69
74
|
Benchmark.ips do |bmark|
|
75
|
+
bmark.report("[whole alphabet] vs 012345 (Ruby)") { 100_000.times { "abcdefghijklmnopqrstuvwxyz".levenshtein_rb("012345") } }
|
76
|
+
bmark.report("[whole alphabet] vs 012345 (Crystal)") { 100_000.times { "abcdefghijklmnopqrstuvwxyz".levenshtein("012345") } }
|
77
|
+
bmark.compare!
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def bm_memory
|
82
|
+
puts "\n\nBENCHMARKING MEMORY USAGE\n\n\n"
|
83
|
+
|
84
|
+
Benchmark.memory do |bmark|
|
85
|
+
bmark.report("Blank strings (Ruby)") { 100_000.times { "".levenshtein_rb("") } }
|
86
|
+
bmark.report("Blank string (Crystal)") { 100_000.times { "".levenshtein("") } }
|
87
|
+
bmark.compare!
|
88
|
+
end
|
89
|
+
|
90
|
+
Benchmark.memory do |bmark|
|
91
|
+
bmark.report("abd vs abc (Ruby)") { 100_000.times { "abd".levenshtein_rb("abc") } }
|
92
|
+
bmark.report("abd vs abc (Crystal)") { 100_000.times { "abd".levenshtein("abc") } }
|
93
|
+
bmark.compare!
|
94
|
+
end
|
95
|
+
|
96
|
+
Benchmark.memory do |bmark|
|
70
97
|
bmark.report("abcdefghi vs 0123456789 (Ruby)") { 100_000.times { "abcdefghi".levenshtein_rb("0123456789") } }
|
71
98
|
bmark.report("abcdefghi vs 0123456789 (Crystal)") { 100_000.times { "abcdefghi".levenshtein("0123456789") } }
|
72
99
|
bmark.compare!
|
73
100
|
end
|
74
101
|
|
75
|
-
|
76
|
-
Benchmark.ips do |bmark|
|
102
|
+
Benchmark.memory do |bmark|
|
77
103
|
bmark.report("[whole alphabet] vs 012345 (Ruby)") { 100_000.times { "abcdefghijklmnopqrstuvwxyz".levenshtein_rb("012345") } }
|
78
104
|
bmark.report("[whole alphabet] vs 012345 (Crystal)") { 100_000.times { "abcdefghijklmnopqrstuvwxyz".levenshtein("012345") } }
|
79
105
|
bmark.compare!
|
@@ -81,49 +107,40 @@ def bm
|
|
81
107
|
end
|
82
108
|
|
83
109
|
begin
|
84
|
-
|
110
|
+
setup
|
111
|
+
bm_ips
|
112
|
+
bm_memory
|
85
113
|
ensure
|
86
114
|
Rake::Task["clean"].invoke
|
87
115
|
end
|
88
116
|
|
89
117
|
# RESULTS
|
90
118
|
|
119
|
+
# BENCHMARKING IPS (iterations per second)
|
120
|
+
|
91
121
|
# Warming up --------------------------------------
|
92
|
-
# Blank strings (Ruby)
|
122
|
+
# Blank strings (Ruby) 4.000 i/100ms
|
93
123
|
# Blank string (Crystal)
|
94
|
-
#
|
124
|
+
# 5.000 i/100ms
|
95
125
|
# Calculating -------------------------------------
|
96
|
-
# Blank strings (Ruby)
|
126
|
+
# Blank strings (Ruby) 55.396 (± 1.8%) i/s - 280.000 in 5.056752s
|
97
127
|
# Blank string (Crystal)
|
98
|
-
#
|
128
|
+
# 51.092 (± 2.0%) i/s - 260.000 in 5.090523s
|
99
129
|
|
100
130
|
# Comparison:
|
101
|
-
# Blank strings (Ruby):
|
102
|
-
# Blank string (Crystal):
|
131
|
+
# Blank strings (Ruby): 55.4 i/s
|
132
|
+
# Blank string (Crystal): 51.1 i/s - 1.08x (± 0.00) slower
|
103
133
|
|
104
134
|
# Warming up --------------------------------------
|
105
135
|
# abd vs abc (Ruby) 1.000 i/100ms
|
106
136
|
# abd vs abc (Crystal) 1.000 i/100ms
|
107
137
|
# Calculating -------------------------------------
|
108
|
-
# abd vs abc (Ruby) 1.
|
109
|
-
# abd vs abc (Crystal) 2.
|
110
|
-
|
111
|
-
# Comparison:
|
112
|
-
# abd vs abc (Crystal): 2.7 i/s
|
113
|
-
# abd vs abc (Ruby): 1.4 i/s - 2.01x (± 0.00) slower
|
114
|
-
|
115
|
-
# Warming up --------------------------------------
|
116
|
-
# abc vs abcde (Ruby) 1.000 i/100ms
|
117
|
-
# abc vs abcde (Crystal)
|
118
|
-
# 1.000 i/100ms
|
119
|
-
# Calculating -------------------------------------
|
120
|
-
# abc vs abcde (Ruby) 0.901 (± 0.0%) i/s - 5.000 in 5.547298s
|
121
|
-
# abc vs abcde (Crystal)
|
122
|
-
# 1.792 (± 0.0%) i/s - 9.000 in 5.023881s
|
138
|
+
# abd vs abc (Ruby) 1.510 (± 0.0%) i/s - 8.000 in 5.298484s
|
139
|
+
# abd vs abc (Crystal) 2.951 (± 0.0%) i/s - 15.000 in 5.084644s
|
123
140
|
|
124
141
|
# Comparison:
|
125
|
-
#
|
126
|
-
#
|
142
|
+
# abd vs abc (Crystal): 3.0 i/s
|
143
|
+
# abd vs abc (Ruby): 1.5 i/s - 1.95x (± 0.00) slower
|
127
144
|
|
128
145
|
# Warming up --------------------------------------
|
129
146
|
# abcdefghi vs 0123456789 (Ruby)
|
@@ -132,13 +149,13 @@ end
|
|
132
149
|
# 1.000 i/100ms
|
133
150
|
# Calculating -------------------------------------
|
134
151
|
# abcdefghi vs 0123456789 (Ruby)
|
135
|
-
# 0.
|
152
|
+
# 0.218 (± 0.0%) i/s - 2.000 in 9.178957s
|
136
153
|
# abcdefghi vs 0123456789 (Crystal)
|
137
|
-
# 0.
|
154
|
+
# 0.475 (± 0.0%) i/s - 3.000 in 6.312027s
|
138
155
|
|
139
156
|
# Comparison:
|
140
|
-
# abcdefghi vs 0123456789 (Crystal): 0.
|
141
|
-
# abcdefghi vs 0123456789 (Ruby): 0.2 i/s - 2.
|
157
|
+
# abcdefghi vs 0123456789 (Crystal): 0.5 i/s
|
158
|
+
# abcdefghi vs 0123456789 (Ruby): 0.2 i/s - 2.18x (± 0.00) slower
|
142
159
|
|
143
160
|
# Warming up --------------------------------------
|
144
161
|
# [whole alphabet] vs 012345 (Ruby)
|
@@ -147,10 +164,62 @@ end
|
|
147
164
|
# 1.000 i/100ms
|
148
165
|
# Calculating -------------------------------------
|
149
166
|
# [whole alphabet] vs 012345 (Ruby)
|
150
|
-
# 0.
|
167
|
+
# 0.136 (± 0.0%) i/s - 1.000 in 7.358898s
|
151
168
|
# [whole alphabet] vs 012345 (Crystal)
|
152
|
-
# 0.
|
169
|
+
# 0.316 (± 0.0%) i/s - 2.000 in 6.324441s
|
153
170
|
|
154
171
|
# Comparison:
|
155
172
|
# [whole alphabet] vs 012345 (Crystal): 0.3 i/s
|
156
|
-
# [whole alphabet] vs 012345 (Ruby): 0.1 i/s - 2.
|
173
|
+
# [whole alphabet] vs 012345 (Ruby): 0.1 i/s - 2.33x (± 0.00) slower
|
174
|
+
|
175
|
+
# BENCHMARKING MEMORY USAGE
|
176
|
+
|
177
|
+
# Calculating -------------------------------------
|
178
|
+
# Blank strings (Ruby) 8.000M memsize ( 0.000 retained)
|
179
|
+
# 200.000k objects ( 0.000 retained)
|
180
|
+
# 1.000 strings ( 0.000 retained)
|
181
|
+
# Blank string (Crystal)
|
182
|
+
# 8.000M memsize ( 0.000 retained)
|
183
|
+
# 200.000k objects ( 0.000 retained)
|
184
|
+
# 1.000 strings ( 0.000 retained)
|
185
|
+
|
186
|
+
# Comparison:
|
187
|
+
# Blank strings (Ruby): 8000000 allocated
|
188
|
+
# Blank string (Crystal): 8000000 allocated - same
|
189
|
+
# Calculating -------------------------------------
|
190
|
+
# abd vs abc (Ruby) 201.600M memsize ( 0.000 retained)
|
191
|
+
# 4.000M objects ( 0.000 retained)
|
192
|
+
# 6.000 strings ( 0.000 retained)
|
193
|
+
# abd vs abc (Crystal) 8.000M memsize ( 0.000 retained)
|
194
|
+
# 200.000k objects ( 0.000 retained)
|
195
|
+
# 2.000 strings ( 0.000 retained)
|
196
|
+
|
197
|
+
# Comparison:
|
198
|
+
# abd vs abc (Crystal): 8000000 allocated
|
199
|
+
# abd vs abc (Ruby): 201600000 allocated - 25.20x more
|
200
|
+
# Calculating -------------------------------------
|
201
|
+
# abcdefghi vs 0123456789 (Ruby)
|
202
|
+
# 1.132B memsize ( 0.000 retained)
|
203
|
+
# 23.700M objects ( 0.000 retained)
|
204
|
+
# 21.000 strings ( 0.000 retained)
|
205
|
+
# abcdefghi vs 0123456789 (Crystal)
|
206
|
+
# 8.000M memsize ( 0.000 retained)
|
207
|
+
# 200.000k objects ( 0.000 retained)
|
208
|
+
# 2.000 strings ( 0.000 retained)
|
209
|
+
|
210
|
+
# Comparison:
|
211
|
+
# abcdefghi vs 0123456789 (Crystal): 8000000 allocated
|
212
|
+
# abcdefghi vs 0123456789 (Ruby): 1132000000 allocated - 141.50x more
|
213
|
+
# Calculating -------------------------------------
|
214
|
+
# [whole alphabet] vs 012345 (Ruby)
|
215
|
+
# 1.696B memsize ( 0.000 retained)
|
216
|
+
# 34.900M objects ( 0.000 retained)
|
217
|
+
# 34.000 strings ( 0.000 retained)
|
218
|
+
# [whole alphabet] vs 012345 (Crystal)
|
219
|
+
# 8.000M memsize ( 0.000 retained)
|
220
|
+
# 200.000k objects ( 0.000 retained)
|
221
|
+
# 2.000 strings ( 0.000 retained)
|
222
|
+
|
223
|
+
# Comparison:
|
224
|
+
# [whole alphabet] vs 012345 (Crystal): 8000000 allocated
|
225
|
+
# [whole alphabet] vs 012345 (Ruby): 1696000000 allocated - 212.00x more
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: levenshtein_str
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- johansenja
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: benchmark-memory
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.1.2
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.1.2
|
27
41
|
description: Use String#levenshtein(other_str) to get the levenshtein distance between
|
28
42
|
2 strings. Useful for measuring approximate string similarity, and fuzzy matching.
|
29
43
|
email:
|