db_clustering 0.1.13 → 0.1.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe813536c77416b8518d986832a137fbcc441aec
4
- data.tar.gz: 5390301c390d4b026532f6d9bc7a5e97ebc7b405
3
+ metadata.gz: 615007829554b370c41083af36305a796ad5eb29
4
+ data.tar.gz: 9d0ae95d547512641bee5d8b4e59ab496ee15359
5
5
  SHA512:
6
- metadata.gz: ff53b70b7505433199f3b6b4e5df47c48e39af1d915d14db1489c56ce12112dd8b01522dfb5b0aab892eb2f6379c95141ddfa40aba97c0c11037a63a2e81ad54
7
- data.tar.gz: abaac0ab99a4edfa2f0caca7a1de352ae2628168aa870e4caee59a31dc1c690db5e6a2efc19f6fda6046ced0613eebcadabcb858624ad9d29dd5bf10a523e533
6
+ metadata.gz: 97545dae398e584b13884969e9219212301be23e578b0cb8f0a210f70445fae9e5e04d8e150c8de75fa02d9838e3881ed102e7a6cc3caaee7326f60b01b98d72
7
+ data.tar.gz: 8021b6cb3c55745a851ccb5c011ebd79717dc232b51e4e6c1d9ddf2b1b5e4eaf25b5e75629700181ee5bed2fff577852f5428748054a0221a76383d332f7889c
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.13
1
+ 0.1.14
@@ -6,16 +6,21 @@ module DbClustering
6
6
 
7
7
  attr_accessor :datasource, :clusters
8
8
 
9
- def initialize(datasource:, distance_metric:)
9
+ def initialize(datasource:, distance_metric:, debug: false)
10
10
  @datasource = datasource
11
11
  @distance_metric = distance_metric
12
12
  @clusters = []
13
+ @debug = debug
13
14
  end
14
15
 
15
- def cluster(max_distance:, min_neighbors:, debug: false)
16
+ def cluster(max_distance:, min_neighbors:)
16
17
  @clusters = []
17
18
  cluster = nil
18
19
 
20
+ if @debug
21
+ last_printed_progress = 0.0
22
+ end
23
+
19
24
  @datasource.iterate_all_points do |point, current_index, points_count|
20
25
  neighbors = @datasource.neighbors(point: point, distance_metric: @distance_metric, max_distance: max_distance)
21
26
 
@@ -24,22 +29,43 @@ module DbClustering
24
29
  elsif point.cluster.nil?
25
30
  cluster = DbClustering::Models::Cluster.new
26
31
  @clusters << cluster
32
+ cluster.add(point)
33
+
34
+ expand_cluster(point, neighbors, max_distance)
35
+ end
27
36
 
28
- neighbors.each do |neighbor|
29
- if neighbor.cluster.nil?
30
- cluster.add(neighbor)
37
+ yield(point, current_index, points_count) if block_given?
38
+
39
+ if @debug
40
+ point_type_string = point.is_edge_point? ? 'E' : point.is_core_point? ? 'C' : 'N'
41
+
42
+ if point.cluster
43
+ print "(#{point_type_string}|#{point.cluster.id}|#{point.cluster.points.count})"
44
+ else
45
+ print "(#{point_type_string}|nil|0)"
46
+ end
31
47
 
32
- if debug
33
- print "+"
34
- end
48
+ progress = (current_index + 1) * 100 / points_count.to_f
35
49
 
36
- # add the neighbors of the neighbor to the neighbors to fully expand the cluster
37
- neighbors |= @datasource.neighbors(point: neighbor, distance_metric: @distance_metric, max_distance: max_distance)
38
- end
50
+ if progress > last_printed_progress + 1
51
+ print "[#{progress.to_i}%]"
52
+ last_printed_progress = progress
39
53
  end
40
54
  end
55
+ end
56
+ end
41
57
 
42
- yield(point, current_index, points_count) if block_given?
58
+ def expand_cluster(point, neighbors, max_distance)
59
+ neighbors.each do |neighbor|
60
+ if neighbor.cluster.nil?
61
+ point.cluster.add(neighbor)
62
+
63
+ if @debug
64
+ print "+"
65
+ end
66
+
67
+ neighbors |= @datasource.neighbors(point: point, distance_metric: @distance_metric, max_distance: max_distance)
68
+ end
43
69
  end
44
70
  end
45
71
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: db_clustering
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.1.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cihat Gündüz