segno 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 67f6327b27ffe04393f1ad13a7838a0b6e3ce24b
4
+ data.tar.gz: 2f81782ef6e507a62221fb083a0fd3a5ba1d0f4c
5
+ SHA512:
6
+ metadata.gz: 84de431e555d2026c5159d0eb627d7e9512906486c85580ffee31e6acc3e47e5b8110b7ff62125e8d1f9c7f8d3d56d2ed1ddbe2abd40683d3aa7cad76d98dd75
7
+ data.tar.gz: 6bea7812f4b3f0357f04ee183dad49b0f2c235bc36c6e8a9b7ec4faee1b4f0561ffbf8fa6dc40189866912d6c032672a6544158436924e58d9fccef40bdb86eb
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ .idea
7
+ .env*
8
+ Gemfile.lock
9
+ InstalledFiles
10
+ _yardoc
11
+ coverage
12
+ doc/
13
+ lib/bundler/man
14
+ pkg
15
+ rdoc
16
+ spec/reports
17
+ test/tmp
18
+ test/version_tmp
19
+ tmp
20
+ vendor/bundle
21
+ /spec/fixtures/vcr_cassettes
22
+ .DS_Store
data/CHANGELOG.md ADDED
@@ -0,0 +1,25 @@
1
+ # 変更履歴
2
+
3
+ ## バージョニング規則
4
+
5
+ バージョン番号は MAJOR.MINOR.PATCH と表記し、
6
+ 以下の3つの法則によって更新される。
7
+ このとき MAJOR, MINOR, PATCH は非負の整数とし、頭にゼロは入れず、それぞれの要素は数値的に増加する。
8
+
9
+ 1. 互換性のない API の変更を行うときに MAJOR バージョンをインクリメントする
10
+ 2. 後方互換性のある方法で機能性を追加したときに MINOR バージョンをインクリメントする
11
+ 3. そして、後方互換性のあるバグ フィックスをしたときに PATCH バージョンをインクリメントする
12
+
13
+ メジャー バージョン 0 (0.y.z) は、初期の開発のためのものであり、
14
+ いつでもどんなものでも変化する可能性があるため、
15
+ パブリック API は安定的と考えられるべきではない。
16
+
17
+ バージョン 1.0.0 はパブリック API を定義する。
18
+ このリリースの後でバージョン番号がインクリメントされることは、
19
+ このパブリック API とそれの変更がどの程度であるかに左右される。
20
+
21
+ [参考:Semantic Versioning](http://semver.org/)
22
+
23
+
24
+ 0.0.1
25
+ ---
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in minhash.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Shogo Kawaguchi
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,28 @@
1
+ # Segno
2
+
3
+ An implementation of the b bit MinHash algorithm in ruby
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'segno'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install segno
18
+
19
+ ## Usage
20
+
21
+
22
+ ## Contributing
23
+
24
+ 1. Fork it ( https://github.com/k-shogo/b-bit-minhash/fork )
25
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
26
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
27
+ 4. Push to the branch (`git push origin my-new-feature`)
28
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/lib/segno.rb ADDED
@@ -0,0 +1,23 @@
1
+ require 'segno/version'
2
+ require 'segno/configuration'
3
+ require 'segno/generator'
4
+ require 'segno/hash_vec'
5
+ require 'json'
6
+ require 'murmurhash3'
7
+
8
+ module Segno
9
+ extend Configuration
10
+
11
+ def self.generator(options = {})
12
+ Segno::Generator.new(options)
13
+ end
14
+
15
+ def self.method_missing(method_name, *args, &block)
16
+ return super unless generator.respond_to?(method_name)
17
+ generator.send(method_name, *args, &block)
18
+ end
19
+
20
+ def self.respond_to?(method_name, include_private = false)
21
+ return generator.respond_to?(method_name, include_private) || super
22
+ end
23
+ end
@@ -0,0 +1,41 @@
1
+ module Segno
2
+
3
+ module Configuration
4
+ VALID_OPTIONS_KEYS = [
5
+ :b,
6
+ :k,
7
+ :seed,
8
+ :seed_vec
9
+ ].freeze
10
+
11
+ DEFAULT_B = 1
12
+ DEFAULT_K = 128
13
+ DEFAULT_SEED = 2090358822
14
+ DEFAULT_SEED_VEC = nil
15
+
16
+ attr_accessor *VALID_OPTIONS_KEYS
17
+
18
+ def self.extended(base)
19
+ base.reset
20
+ end
21
+
22
+ def configure
23
+ yield self
24
+ self
25
+ end
26
+
27
+ def options
28
+ VALID_OPTIONS_KEYS.inject({}) do |options, key|
29
+ options.merge!(key => send(key))
30
+ end
31
+ end
32
+
33
+ def reset
34
+ self.b = DEFAULT_B
35
+ self.k = DEFAULT_K
36
+ self.seed = DEFAULT_SEED
37
+ self.seed_vec = DEFAULT_SEED_VEC
38
+ self
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,19 @@
1
+ require 'segno/seed'
2
+ require 'segno/minhash'
3
+ module Segno
4
+ class Generator
5
+ include Seed
6
+ include MinHash
7
+
8
+ attr_accessor *Configuration::VALID_OPTIONS_KEYS
9
+
10
+ def initialize(options = {})
11
+ options = Segno.options.merge(options)
12
+ Configuration::VALID_OPTIONS_KEYS.each do |key|
13
+ send("#{key}=", options[key])
14
+ end
15
+ self.seed_vec ||= gen_seed self.k
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,41 @@
1
+ module Segno
2
+ class HashVec
3
+ attr_accessor :vec
4
+ VALID_SERIALIZE_PATTERN = /\A(?<b>[0-9]{1,})\.(?<k>[0-9]{1,})\.(?<hash>[0-9a-f]{1,})\z/
5
+
6
+ def initialize vec
7
+ @vec = vec
8
+ end
9
+
10
+ def to_s
11
+ @vec.join.to_i(2).to_s(16)
12
+ end
13
+
14
+ def b
15
+ @vec.first.size
16
+ end
17
+
18
+ def k
19
+ @vec.size
20
+ end
21
+
22
+ def jaccard hash_vec
23
+ n = [self.vec, hash_vec.vec].transpose.map{|v| v[0]==v[1]}.select{|x| x}.size
24
+ (2 ** self.b * Rational(n,k) - 1) / (2 ** self.b - 1)
25
+ end
26
+
27
+ def self.dump hash_vec
28
+ [hash_vec.b, hash_vec.k, hash_vec.to_s].join('.')
29
+ end
30
+
31
+ def self.load string
32
+ if key = string.match(VALID_SERIALIZE_PATTERN)
33
+ b = key['b'].to_i
34
+ k = key['k'].to_i
35
+ hash = key['hash']
36
+ vec = sprintf("%0#{b*k}d", hash.to_i(16).to_s(2)).scan(/.{1,#{b}}/)
37
+ new vec
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,20 @@
1
+ module Segno
2
+ module MinHash
3
+ def minhash targets, seed
4
+ hash_values = []
5
+ targets.each do |str|
6
+ hash_values << MurmurHash3::V32.str_hash(str, seed)
7
+ end
8
+ hash_values.min
9
+ end
10
+
11
+ def bbit_minhash targets, seed
12
+ sprintf("%0#{b}d", minhash(targets, seed).to_s(2))[-b..-1]
13
+ end
14
+
15
+ def bbit_minhash_vec targets
16
+ vec = seed_vec.map{|seed| bbit_minhash targets, seed}
17
+ Segno::HashVec.new vec
18
+ end
19
+ end
20
+ end
data/lib/segno/seed.rb ADDED
@@ -0,0 +1,14 @@
1
+ module Segno
2
+ module Seed
3
+
4
+ def gen_seed length
5
+ seeds = [seed]
6
+ length.times do
7
+ seeds << MurmurHash3::V32.int32_hash(seeds.last)
8
+ end
9
+ seeds.shift
10
+ seeds
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,3 @@
1
+ module Segno
2
+ VERSION = '0.0.1'
3
+ end
data/segno.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'segno/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "segno"
8
+ spec.version = Segno::VERSION
9
+ spec.authors = ["Shogo Kawaguchi"]
10
+ spec.email = ["platycod0n.ramosa@gmail.com"]
11
+ spec.summary = %q{An implementation of the b bit MinHash algorithm in ruby}
12
+ spec.description = %q{An implementation of the b bit MinHash algorithm in ruby}
13
+ spec.homepage = "https://github.com/k-shogo/segno"
14
+ spec.license = "MIT"
15
+
16
+ spec.add_runtime_dependency 'murmurhash3'
17
+
18
+ spec.add_development_dependency "bundler", "~> 1.6"
19
+ spec.add_development_dependency "rake"
20
+ spec.add_development_dependency "rspec"
21
+ spec.add_development_dependency "simplecov"
22
+ spec.add_development_dependency "pry"
23
+
24
+ spec.files = `git ls-files -z`.split("\x0")
25
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
26
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
27
+ spec.require_paths = ["lib"]
28
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Segno::Generator do
6
+
7
+ context '#respond_to?'do
8
+ it '存在するメソッドでtrueが返る' do
9
+ expect(Segno.respond_to?(:configure)).to be_true
10
+ end
11
+
12
+ it '存在しないメソッドでfalseが帰る' do
13
+ expect(Segno.respond_to?(:hoge)).to be_false
14
+ end
15
+ end
16
+
17
+ # it 'メソッドを呼び出せる' do
18
+ # expect(Segno.api_key('user_id')).to_not be_nil
19
+ # end
20
+
21
+ end
@@ -0,0 +1,13 @@
1
+ require 'simplecov'
2
+ SimpleCov.start do
3
+ add_filter 'spec'
4
+ add_filter 'vendor'
5
+ end
6
+
7
+ require 'segno'
8
+ require 'rspec'
9
+
10
+ RSpec.configure do |config|
11
+ config.mock_framework = :rspec
12
+ end
13
+
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: segno
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Shogo Kawaguchi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: murmurhash3
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.6'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: An implementation of the b bit MinHash algorithm in ruby
98
+ email:
99
+ - platycod0n.ramosa@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - CHANGELOG.md
106
+ - Gemfile
107
+ - LICENSE.txt
108
+ - README.md
109
+ - Rakefile
110
+ - lib/segno.rb
111
+ - lib/segno/configuration.rb
112
+ - lib/segno/generator.rb
113
+ - lib/segno/hash_vec.rb
114
+ - lib/segno/minhash.rb
115
+ - lib/segno/seed.rb
116
+ - lib/segno/version.rb
117
+ - segno.gemspec
118
+ - spec/generator_spec.rb
119
+ - spec/spec_helper.rb
120
+ homepage: https://github.com/k-shogo/segno
121
+ licenses:
122
+ - MIT
123
+ metadata: {}
124
+ post_install_message:
125
+ rdoc_options: []
126
+ require_paths:
127
+ - lib
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: '0'
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ requirements: []
139
+ rubyforge_project:
140
+ rubygems_version: 2.0.3
141
+ signing_key:
142
+ specification_version: 4
143
+ summary: An implementation of the b bit MinHash algorithm in ruby
144
+ test_files:
145
+ - spec/generator_spec.rb
146
+ - spec/spec_helper.rb