bmatch 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 731552d40e900fde0658bcd649f5d5fa95d903a7
4
+ data.tar.gz: adcb836f48404f2a7631740155bc0e9d2ad4cdf7
5
+ SHA512:
6
+ metadata.gz: 7e71ed9e202edb6d14c6d123fbf1b62c0976ea333c2ade13e0ef16a0fc5eedde824f1167fcf03e5b99406817140b1e82c65d7ea0d2128a8c4257dedf26613caa
7
+ data.tar.gz: 88a5715d6643d0fdac36e61d818f68289c37c7b44fb128ede9007ec6fb19a983638c6beec7b3c9b22e58d17e3a27244d6350b9ad3971d0fa033de59b164cec97
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Documentation cache and generated files:
13
+ /.yardoc/
14
+ /_yardoc/
15
+ /doc/
16
+ /rdoc/
17
+
18
+ ## Environment normalisation:
19
+ /.bundle/
20
+ /vendor/bundle
21
+ /lib/bundler/man/
22
+
23
+ # for a library or gem, you might want to ignore these files since the code is
24
+ # intended to run in multiple environments; otherwise, check them in:
25
+ Gemfile.lock
26
+ .ruby-version
27
+ # .ruby-gemset
28
+ spec/examples.txt
29
+
30
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
31
+ .rvmrc
32
+
33
+ ext/*.so
34
+ ext/*.h
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "https://rubygems.org"
2
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Udo Groebner
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,33 @@
1
+ # bmatch
2
+
3
+ [![Build
4
+ Status](https://semaphoreci.com/api/v1/projects/d08731e1-78d1-4bfa-9cd5-3789d02e369d/558462/badge.svg)](https://semaphoreci.com/udo-groebner/bmatch)
5
+ [![Gem
6
+ Version](https://badge.fury.io/rb/bmatch.svg)](http://badge.fury.io/rb/bmatch)
7
+
8
+ Approximate string matching library (with native bindings)
9
+
10
+ Inspired by
11
+ * http://blog.paracode.com/2015/08/28/ruby-and-go-sitting-in-a-tree/
12
+ * https://github.com/flori/amatch
13
+ * https://blog.jcoglan.com/2012/07/29/your-first-ruby-native-extension-c/
14
+ * http://yorickpeterse.com/articles/hacking-extconf-rb/
15
+
16
+ Levenshtein implementation taken from: https://github.com/arbovm/levenshtein
17
+
18
+ ## Why
19
+
20
+ Because amatch unfortunately produces segfaults. I hope golang-to-C doesn't do that.
21
+
22
+ ## Usage
23
+
24
+ ```ruby
25
+ require 'bmatch/similarities'
26
+ Similarities.levenshtein_distance("aa", "aü")
27
+ Similarities.dice_similarity("night", "nacht")
28
+ ```
29
+
30
+ ## Contributing
31
+
32
+ Just send me a github pull request.
33
+
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require File.expand_path('../lib/bmatch/version', __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "bmatch"
6
+ s.version = Bmatch::VERSION
7
+ s.authors = ["Udo Groebner"]
8
+ s.email = "udo.groebner@crealytics.com"
9
+ s.homepage = "https://github.com/udl/bmatch"
10
+ s.licenses = ["MIT"]
11
+ s.summary = "Library for string similarities."
12
+ s.description = "Provides string similarity functions like levenshtein distance or dice's coefficient as native extensions."
13
+ s.rubygems_version = ">= 1.3"
14
+ s.required_ruby_version = ">= 1.9.3"
15
+ s.platform = Gem::Platform::RUBY
16
+ s.extensions = ['ext/extconf.rb']
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.require_path = 'lib'
20
+
21
+ s.add_dependency 'ffi', '~> 1.9'
22
+ s.add_development_dependency 'pry', '~> 0.10'
23
+ s.add_development_dependency 'rspec', '~> 3.3'
24
+ end
@@ -0,0 +1,5 @@
1
+ all:
2
+ true
3
+
4
+ install:
5
+ true
@@ -0,0 +1,2 @@
1
+ #!/bin/bash
2
+ go build -buildmode=c-shared -o similarities.so similarities.go
@@ -0,0 +1,23 @@
1
+ require 'mkmf'
2
+
3
+ # Stops the installation process if one of these commands is not found in
4
+ # $PATH.
5
+ find_executable('go')
6
+
7
+ # Create a dummy extension file. Without this RubyGems would abort the
8
+ # installation process. On Linux this would result in the file "bmatch.so"
9
+ # being created in the current working directory.
10
+ #
11
+ # Normally the generated Makefile would take care of this but since we
12
+ # don't generate one we'll have to do this manually.
13
+ #
14
+ File.open(File.join(Dir.pwd, 'bmatch.' + RbConfig::CONFIG['DLEXT']), "w") {}
15
+
16
+ Dir.chdir(File.dirname(__FILE__)) do
17
+ `go build -buildmode=c-shared -o similarities.so similarities.go`
18
+ end
19
+
20
+ # This is normally set by calling create_makefile() but we don't need that
21
+ # method since we'll provide a dummy Makefile. Without setting this value
22
+ # RubyGems will abort the installation.
23
+ $makefile_created = true
@@ -0,0 +1,115 @@
1
+ package main
2
+
3
+ import (
4
+ "C"
5
+ )
6
+
7
+ type BiGram struct{ a, b rune }
8
+
9
+ func CreateBiGrams(s string) map[BiGram]bool {
10
+ result := make(map[BiGram]bool)
11
+ end := rune(0)
12
+ for i, c := range s {
13
+ if i > 0 {
14
+ result[BiGram{end, c}] = true
15
+ }
16
+ end = c
17
+ }
18
+ return result
19
+ }
20
+
21
+ //export dice_similarity
22
+ func dice_similarity(s1in, s2in *C.char) float32 {
23
+ s1 := C.GoString(s1in)
24
+ s2 := C.GoString(s2in)
25
+ bigram1, bigram2 := CreateBiGrams(s1), CreateBiGrams(s2)
26
+ // ruby only supports float32
27
+ var numIntersects float32
28
+ for first, _ := range bigram1 {
29
+ if bigram2[first] {
30
+ numIntersects += 1
31
+ }
32
+ }
33
+ return 2.0 * numIntersects / float32(len(bigram1)+len(bigram2))
34
+ }
35
+
36
+ func main() {}
37
+
38
+ // all of the following is taken and slightly adjusted to compile with exported function
39
+ // from https://github.com/arbovm/levenshtein/blob/master/levenshtein.go
40
+
41
+ //Copyright (c) 2015, Arbo von Monkiewitsch All rights reserved.
42
+
43
+ //Redistribution and use in source and binary forms, with or without
44
+ //modification, are permitted provided that the following conditions are
45
+ //met:
46
+
47
+ //1. Redistributions of source code must retain the above copyright
48
+ //notice, this list of conditions and the following disclaimer.
49
+
50
+ //2. Redistributions in binary form must reproduce the above copyright
51
+ //notice, this list of conditions and the following disclaimer in the
52
+ //documentation and/or other materials provided with the distribution.
53
+
54
+ //3. Neither the name of the copyright holder nor the names of its
55
+ //contributors may be used to endorse or promote products derived from
56
+ //this software without specific prior written permission.
57
+
58
+ //THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
59
+ //"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
60
+ //LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
61
+ //A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
62
+ //HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
63
+ //SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
64
+ //LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
65
+ //DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
66
+ //THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
67
+ //(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
68
+ //OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
69
+
70
+ // Calculate the Levenshtein distance between two strings
71
+
72
+ //export levenshtein_distance
73
+ func levenshtein_distance(s1in, s2in *C.char) int {
74
+ s1 := C.GoString(s1in)
75
+ s2 := C.GoString(s2in)
76
+ var cost, lastdiag, olddiag int
77
+ len_s1 := len([]rune(s1))
78
+ len_s2 := len([]rune(s2))
79
+
80
+ column := make([]int, len_s1+1)
81
+
82
+ for y := 1; y <= len_s1; y++ {
83
+ column[y] = y
84
+ }
85
+
86
+ for x := 1; x <= len_s2; x++ {
87
+ column[0] = x
88
+ lastdiag = x - 1
89
+ for y := 1; y <= len_s1; y++ {
90
+ olddiag = column[y]
91
+ cost = 0
92
+ if s1[y-1] != s2[x-1] {
93
+ cost = 1
94
+ }
95
+ column[y] = min(
96
+ column[y]+1,
97
+ column[y-1]+1,
98
+ lastdiag+cost)
99
+ lastdiag = olddiag
100
+ }
101
+ }
102
+ return column[len_s1]
103
+ }
104
+ func min(a, b, c int) int {
105
+ if a < b {
106
+ if a < c {
107
+ return a
108
+ }
109
+ } else {
110
+ if b < c {
111
+ return b
112
+ }
113
+ }
114
+ return c
115
+ }
@@ -0,0 +1,7 @@
1
+ require 'ffi'
2
+ module Similarities
3
+ extend FFI::Library
4
+ ffi_lib File.expand_path(File.join(File.dirname(__FILE__), "..", "..", "ext", "similarities.so"))
5
+ attach_function :dice_similarity, [:string, :string], :float
6
+ attach_function :levenshtein_distance, [:string, :string], :int
7
+ end
@@ -0,0 +1,3 @@
1
+ module Bmatch
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+ require 'bmatch/similarities'
3
+ RSpec.describe Similarities do
4
+
5
+ context "#dice_similarity" do
6
+ it "has a similarity of 1 for the same string" do
7
+ expect(Similarities.dice_similarity("aa", "aa")).to be 1.0
8
+ end
9
+
10
+ it "has a similarity of 1/4 for 'night' and 'nacht'" do
11
+ expect(Similarities.dice_similarity("night", "nacht")).to be 0.25
12
+ end
13
+
14
+ it "also works for utf-8" do
15
+ expect(Similarities.dice_similarity("münchen", "munich")).to be 0.1818181872367859
16
+ end
17
+ end
18
+
19
+ context "#levenshtein_distance" do
20
+ it "has a distance of 0 for the same string" do
21
+ expect(Similarities.levenshtein_distance("aa", "aa")).to be 0
22
+ end
23
+
24
+ it "has a distance of 1 for a one-off string" do
25
+ expect(Similarities.levenshtein_distance("aa", "ab")).to be 1
26
+ end
27
+
28
+ it "also works for utf-8" do
29
+ expect(Similarities.levenshtein_distance("aa", "aü")).to be 1
30
+ end
31
+
32
+ end
33
+ end
34
+
@@ -0,0 +1,30 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
3
+ RSpec.configure do |config|
4
+ config.expect_with :rspec do |expectations|
5
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
6
+ end
7
+
8
+ config.mock_with :rspec do |mocks|
9
+ mocks.verify_partial_doubles = true
10
+ end
11
+
12
+ config.filter_run :focus
13
+ config.run_all_when_everything_filtered = true
14
+
15
+ config.example_status_persistence_file_path = "spec/examples.txt"
16
+
17
+ config.disable_monkey_patching!
18
+
19
+ config.warnings = true
20
+
21
+ if config.files_to_run.one?
22
+ config.default_formatter = 'doc'
23
+ end
24
+
25
+ config.profile_examples = 10
26
+
27
+ config.order = :random
28
+
29
+ Kernel.srand config.seed
30
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bmatch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Udo Groebner
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-10-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ffi
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.10'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.10'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.3'
55
+ description: Provides string similarity functions like levenshtein distance or dice's
56
+ coefficient as native extensions.
57
+ email: udo.groebner@crealytics.com
58
+ executables: []
59
+ extensions:
60
+ - ext/extconf.rb
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - ".rspec"
65
+ - Gemfile
66
+ - LICENSE
67
+ - README.md
68
+ - bmatch.gemspec
69
+ - ext/Makefile
70
+ - ext/build.sh
71
+ - ext/extconf.rb
72
+ - ext/similarities.go
73
+ - lib/bmatch/similarities.rb
74
+ - lib/bmatch/version.rb
75
+ - spec/bmatch/similarities_spec.rb
76
+ - spec/spec_helper.rb
77
+ homepage: https://github.com/udl/bmatch
78
+ licenses:
79
+ - MIT
80
+ metadata: {}
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: 1.9.3
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.4.5.1
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: Library for string similarities.
101
+ test_files: []