db_clustering 0.1.13 → 0.1.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/algorithms/density_based/dbscan.rb +38 -12
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 615007829554b370c41083af36305a796ad5eb29
|
4
|
+
data.tar.gz: 9d0ae95d547512641bee5d8b4e59ab496ee15359
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 97545dae398e584b13884969e9219212301be23e578b0cb8f0a210f70445fae9e5e04d8e150c8de75fa02d9838e3881ed102e7a6cc3caaee7326f60b01b98d72
|
7
|
+
data.tar.gz: 8021b6cb3c55745a851ccb5c011ebd79717dc232b51e4e6c1d9ddf2b1b5e4eaf25b5e75629700181ee5bed2fff577852f5428748054a0221a76383d332f7889c
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.14
|
@@ -6,16 +6,21 @@ module DbClustering
|
|
6
6
|
|
7
7
|
attr_accessor :datasource, :clusters
|
8
8
|
|
9
|
-
def initialize(datasource:, distance_metric:)
|
9
|
+
def initialize(datasource:, distance_metric:, debug: false)
|
10
10
|
@datasource = datasource
|
11
11
|
@distance_metric = distance_metric
|
12
12
|
@clusters = []
|
13
|
+
@debug = debug
|
13
14
|
end
|
14
15
|
|
15
|
-
def cluster(max_distance:, min_neighbors
|
16
|
+
def cluster(max_distance:, min_neighbors:)
|
16
17
|
@clusters = []
|
17
18
|
cluster = nil
|
18
19
|
|
20
|
+
if @debug
|
21
|
+
last_printed_progress = 0.0
|
22
|
+
end
|
23
|
+
|
19
24
|
@datasource.iterate_all_points do |point, current_index, points_count|
|
20
25
|
neighbors = @datasource.neighbors(point: point, distance_metric: @distance_metric, max_distance: max_distance)
|
21
26
|
|
@@ -24,22 +29,43 @@ module DbClustering
|
|
24
29
|
elsif point.cluster.nil?
|
25
30
|
cluster = DbClustering::Models::Cluster.new
|
26
31
|
@clusters << cluster
|
32
|
+
cluster.add(point)
|
33
|
+
|
34
|
+
expand_cluster(point, neighbors, max_distance)
|
35
|
+
end
|
27
36
|
|
28
|
-
|
29
|
-
|
30
|
-
|
37
|
+
yield(point, current_index, points_count) if block_given?
|
38
|
+
|
39
|
+
if @debug
|
40
|
+
point_type_string = point.is_edge_point? ? 'E' : point.is_core_point? ? 'C' : 'N'
|
41
|
+
|
42
|
+
if point.cluster
|
43
|
+
print "(#{point_type_string}|#{point.cluster.id}|#{point.cluster.points.count})"
|
44
|
+
else
|
45
|
+
print "(#{point_type_string}|nil|0)"
|
46
|
+
end
|
31
47
|
|
32
|
-
|
33
|
-
print "+"
|
34
|
-
end
|
48
|
+
progress = (current_index + 1) * 100 / points_count.to_f
|
35
49
|
|
36
|
-
|
37
|
-
|
38
|
-
|
50
|
+
if progress > last_printed_progress + 1
|
51
|
+
print "[#{progress.to_i}%]"
|
52
|
+
last_printed_progress = progress
|
39
53
|
end
|
40
54
|
end
|
55
|
+
end
|
56
|
+
end
|
41
57
|
|
42
|
-
|
58
|
+
def expand_cluster(point, neighbors, max_distance)
|
59
|
+
neighbors.each do |neighbor|
|
60
|
+
if neighbor.cluster.nil?
|
61
|
+
point.cluster.add(neighbor)
|
62
|
+
|
63
|
+
if @debug
|
64
|
+
print "+"
|
65
|
+
end
|
66
|
+
|
67
|
+
neighbors |= @datasource.neighbors(point: point, distance_metric: @distance_metric, max_distance: max_distance)
|
68
|
+
end
|
43
69
|
end
|
44
70
|
end
|
45
71
|
|