clustering 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,40 @@
1
+ /.bundle
2
+ db/*.sqlite3
3
+ /log
4
+ /tmp/**/*
5
+ .DS_Store
6
+ rerun.txt
7
+ .project
8
+ nbproject
9
+ test/ruby
10
+ /.idea
11
+ /.rvmrc
12
+ /.powrc
13
+ /coverage
14
+ /coverage.data
15
+ /public/assets
16
+ vendor/cache
17
+ vendor/bundle
18
+ .sass-cache
19
+ tmp/*
20
+ !tmp/
21
+ !tmp/restart.txt
22
+ !tmp/.gitkeep
23
+
24
+ *.gem
25
+ *.rbc
26
+ .bundle
27
+ .config
28
+ .yardoc
29
+ Gemfile.lock
30
+ InstalledFiles
31
+ _yardoc
32
+ coverage
33
+ doc/
34
+ lib/bundler/man
35
+ pkg
36
+ rdoc
37
+ spec/reports
38
+ test/tmp
39
+ test/version_tmp
40
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ if RUBY_VERSION =~ /1.9/
2
+ Encoding.default_external = Encoding::UTF_8
3
+ Encoding.default_internal = Encoding::UTF_8
4
+ end
5
+
6
+ source 'https://rubygems.org'
7
+ gemspec
8
+ gem 'rake', '10.0.2'
9
+
10
+ group :test do
11
+ gem 'rspec'
12
+ gem 'mocha'
13
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,30 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ clustering (0.0.1)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.1.3)
10
+ metaclass (0.0.1)
11
+ mocha (0.12.4)
12
+ metaclass (~> 0.0.1)
13
+ rake (10.0.2)
14
+ rspec (2.11.0)
15
+ rspec-core (~> 2.11.0)
16
+ rspec-expectations (~> 2.11.0)
17
+ rspec-mocks (~> 2.11.0)
18
+ rspec-core (2.11.1)
19
+ rspec-expectations (2.11.3)
20
+ diff-lcs (~> 1.1.3)
21
+ rspec-mocks (2.11.2)
22
+
23
+ PLATFORMS
24
+ ruby
25
+
26
+ DEPENDENCIES
27
+ clustering!
28
+ mocha
29
+ rake (= 10.0.2)
30
+ rspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 the-architect
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Clustering
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'clustering'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install clustering
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'rake'
2
+ require 'bundler/gem_tasks'
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new('spec') do |t|
6
+ t.pattern = 'spec/**/*_spec.rb'
7
+ t.rspec_opts = %w[--color]
8
+ end
9
+
10
+ task 'default' => ['spec']
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'clustering/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "clustering"
8
+ gem.version = Clustering::VERSION
9
+ gem.authors = ["the-architect"]
10
+ gem.email = ["marcel.scherf@epicteams.com"]
11
+ gem.description = %q{Cluster elements that are connected}
12
+ gem.summary = %q{Cluster elements}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ end
@@ -0,0 +1,57 @@
1
+ module Clustering
2
+
3
+ class Simple
4
+
5
+ # input: "data" should be an array of arrays
6
+ # each array should contain elements that are connected to each other
7
+ # elements are counted as often as they occur in the arrays
8
+ # this class does not make sure to only have unique elements in an array
9
+ # [
10
+ # [1,2,3],
11
+ # [2,3],
12
+ # [3,1]
13
+ # ]
14
+ def initialize(data)
15
+ @original = data
16
+ end
17
+
18
+ # create a hash of hashes which contains the grade of linkage between elements
19
+ # connections[3][4] #=> 2
20
+ def connections
21
+ @connections ||= @original.inject(Hash.new{|h,k| h[k] = Hash.new(0)}) do |akk, e|
22
+ e.each do |key|
23
+ e.each do |id|
24
+ akk[key][id] += 1 unless key == id
25
+ end
26
+ end
27
+ akk
28
+ end
29
+ end
30
+
31
+ def relations
32
+ @relations ||= connections.inject(Hash.new{|h,k| h[k] = Hash.new{|h,k| h[k] = Array.new }}) do |akk, e|
33
+ e.last.each do |k,v|
34
+ akk[e.first][v].push k
35
+ end
36
+ akk
37
+ end
38
+ end
39
+
40
+ def clusters(threshold = 2)
41
+ relations.inject(Hash.new{|h,k| h[k] = Array.new }) do |akk, e|
42
+ key, values = *e
43
+ keys = values.keys.sort.select{|s| s >= threshold}
44
+ keys.each do |k|
45
+ akk[key].concat values[k]
46
+ end
47
+ akk
48
+ end
49
+ end
50
+
51
+ def strength?(a, b)
52
+ connections[a][b]
53
+ end
54
+
55
+ end
56
+
57
+ end
@@ -0,0 +1,3 @@
1
+ module Clustering
2
+ VERSION = "0.0.1"
3
+ end
data/lib/clustering.rb ADDED
@@ -0,0 +1,5 @@
1
+ require_relative 'clustering/simple'
2
+ require_relative 'clustering/version'
3
+
4
+ module Clustering
5
+ end
@@ -0,0 +1,31 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ describe Clustering::Simple do
4
+
5
+ subject{ Clustering::Simple.new(TEST_DATA.clone.freeze) }
6
+
7
+ it 'should find count connections from data' do
8
+ connections = subject.connections
9
+ connections.keys.should_not be_empty
10
+
11
+ connections[3][4].should eql 2
12
+ end
13
+
14
+ it 'should find strength between elements' do
15
+ subject.strength?(3,4).should eql 2
16
+ end
17
+
18
+ it 'should build relations and put them in strength buckets' do
19
+ relations = subject.relations
20
+ relations[3][2].should eql [1,4]
21
+ end
22
+
23
+ it 'should find clusters' do
24
+ clusters = subject.clusters
25
+ clusters.keys.sort.should eql [1,3,4]
26
+ clusters[1].should eql [3]
27
+ clusters[3].should eql [1,4]
28
+ clusters[4].should eql [3]
29
+ end
30
+
31
+ end
@@ -0,0 +1,17 @@
1
+ require 'rspec/autorun'
2
+
3
+ RSpec.configure do |config|
4
+ config.mock_with :mocha
5
+ config.treat_symbols_as_metadata_keys_with_true_values = true
6
+ config.filter_run_excluding :wip => true
7
+ end
8
+
9
+ require_relative '../lib/clustering'
10
+
11
+ # these ids are related to each other
12
+ # they could represent tag-ids for each user profile
13
+ TEST_DATA = [
14
+ [1,3,4,5],
15
+ [2,3,4],
16
+ [1,3,8,7]
17
+ ]
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: clustering
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - the-architect
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-18 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Cluster elements that are connected
15
+ email:
16
+ - marcel.scherf@epicteams.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - .gitignore
22
+ - Gemfile
23
+ - Gemfile.lock
24
+ - LICENSE.txt
25
+ - README.md
26
+ - Rakefile
27
+ - clustering.gemspec
28
+ - lib/clustering.rb
29
+ - lib/clustering/simple.rb
30
+ - lib/clustering/version.rb
31
+ - spec/lib/clustering/simple_spec.rb
32
+ - spec/spec_helper.rb
33
+ homepage: ''
34
+ licenses: []
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ segments:
46
+ - 0
47
+ hash: -2856824314895578114
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ segments:
55
+ - 0
56
+ hash: -2856824314895578114
57
+ requirements: []
58
+ rubyforge_project:
59
+ rubygems_version: 1.8.24
60
+ signing_key:
61
+ specification_version: 3
62
+ summary: Cluster elements
63
+ test_files:
64
+ - spec/lib/clustering/simple_spec.rb
65
+ - spec/spec_helper.rb