dbscan 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/dbscan.rb +101 -0
- metadata +76 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 34c666c340e0836c87ec540d6134017dd1194f92
|
4
|
+
data.tar.gz: 0df92c8c2794eaf3b1486a12d7c73830929179b4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 067ebff05da42bd0341817dffa4d55430d390636f1fc2a7e4010679335d63443822f8cadc59572034e06e8b91a6c72d19d3079a7eaabc2a1a43d7bf3fd10552b
|
7
|
+
data.tar.gz: a8a864c43506836469c89d0034cc6fab70a1f959eb79e4defab911fa5f5459b6d871c1ccc899d584078dcab7ca281b5ab01c03b9b104543d072e916627c63d5c
|
data/lib/dbscan.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
## https://github.com/shiguodong/dbscan (fork)
|
2
|
+
|
3
|
+
require 'distance_measures'
|
4
|
+
|
5
|
+
require 'geocoder'
|
6
|
+
|
7
|
+
class Array
|
8
|
+
def haversine_distance2(n)
|
9
|
+
return Geocoder::Calculations::distance_between( self, n )
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
module DBSCAN
|
14
|
+
|
15
|
+
class Clusterer
|
16
|
+
attr_accessor :points, :options, :clusters
|
17
|
+
|
18
|
+
def initialize( points, options = {} )
|
19
|
+
options[:distance] = :euclidean_distance if !options[:distance]
|
20
|
+
@points, @options, @clusters = points.map { |e| Point.new(e) }, options, {-1 => []}
|
21
|
+
|
22
|
+
clusterize!
|
23
|
+
end
|
24
|
+
|
25
|
+
def clusterize!
|
26
|
+
current_cluster = -1
|
27
|
+
@points.each do |point|
|
28
|
+
if !point.visited?
|
29
|
+
point.visit!
|
30
|
+
neighbors = inmediate_neighbors( point )
|
31
|
+
if neighbors.size >= options[:min_points]
|
32
|
+
current_cluster += 1
|
33
|
+
point.cluster = current_cluster
|
34
|
+
cluster = [point].push( add_connected( neighbors, current_cluster ))
|
35
|
+
clusters[current_cluster] = cluster.flatten
|
36
|
+
else
|
37
|
+
clusters[-1].push( point )
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
def results
|
45
|
+
hash = {}
|
46
|
+
@clusters.dup.each { |cluster_index, value| hash[cluster_index] = value.flatten.map(&:items) if !value.flatten.empty? }
|
47
|
+
hash
|
48
|
+
end
|
49
|
+
|
50
|
+
def inmediate_neighbors( point )
|
51
|
+
neighbors = []
|
52
|
+
@points.each do |p|
|
53
|
+
if p.items != point.items
|
54
|
+
d = eval("point.items.#{options[:distance]}( p.items )")
|
55
|
+
neighbors.push( p ) if d < options[:epsilon]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
neighbors
|
59
|
+
end
|
60
|
+
|
61
|
+
def add_connected( neighbors, current_cluster )
|
62
|
+
cluster_points = []
|
63
|
+
neighbors.each do |point|
|
64
|
+
if !point.visited?
|
65
|
+
point.visit!
|
66
|
+
new_points = inmediate_neighbors(point)
|
67
|
+
|
68
|
+
if new_points.size >= options[:min_points]
|
69
|
+
new_points.each do |p|
|
70
|
+
if !neighbors.include?(p)
|
71
|
+
neighbors.push( p )
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
if !point.cluster
|
78
|
+
cluster_points.push( point )
|
79
|
+
point.cluster = current_cluster
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
cluster_points
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
class Point
|
88
|
+
attr_accessor :items, :cluster, :visited
|
89
|
+
define_method(:visited?) { @visited }
|
90
|
+
define_method(:visit!) { @visited = true }
|
91
|
+
def initialize( point )
|
92
|
+
@items, @cluster, @visited = point, nil, false
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def DBSCAN( * args )
|
97
|
+
Clusterer.new( *args )
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
include DBSCAN
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dbscan
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Matias Insaurralde
|
8
|
+
- shiguodong
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-08 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: distance_measures
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - '>='
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
type: :runtime
|
22
|
+
prerelease: false
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - '>='
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
version: '0'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: geocoder
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
description: DBSCAN (Density-based spatial clustering of applications with noise)
|
43
|
+
is a data clustering algorithm proposed by Martin Ester, Hans-Peter Kriegel, Jorg
|
44
|
+
Sander and Xiaowei Xu.
|
45
|
+
email:
|
46
|
+
- matiasbaruchinsaurralde@gmail.com
|
47
|
+
- oreatial@gmail.com
|
48
|
+
executables: []
|
49
|
+
extensions: []
|
50
|
+
extra_rdoc_files: []
|
51
|
+
files:
|
52
|
+
- lib/dbscan.rb
|
53
|
+
homepage: https://github.com/matiasinsaurralde/dbscan
|
54
|
+
licenses: []
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 2.0.7
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: DBSCAN implementation.
|
76
|
+
test_files: []
|