db_clustering 0.1.13 → 0.1.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fe813536c77416b8518d986832a137fbcc441aec
4
- data.tar.gz: 5390301c390d4b026532f6d9bc7a5e97ebc7b405
3
+ metadata.gz: 615007829554b370c41083af36305a796ad5eb29
4
+ data.tar.gz: 9d0ae95d547512641bee5d8b4e59ab496ee15359
5
5
  SHA512:
6
- metadata.gz: ff53b70b7505433199f3b6b4e5df47c48e39af1d915d14db1489c56ce12112dd8b01522dfb5b0aab892eb2f6379c95141ddfa40aba97c0c11037a63a2e81ad54
7
- data.tar.gz: abaac0ab99a4edfa2f0caca7a1de352ae2628168aa870e4caee59a31dc1c690db5e6a2efc19f6fda6046ced0613eebcadabcb858624ad9d29dd5bf10a523e533
6
+ metadata.gz: 97545dae398e584b13884969e9219212301be23e578b0cb8f0a210f70445fae9e5e04d8e150c8de75fa02d9838e3881ed102e7a6cc3caaee7326f60b01b98d72
7
+ data.tar.gz: 8021b6cb3c55745a851ccb5c011ebd79717dc232b51e4e6c1d9ddf2b1b5e4eaf25b5e75629700181ee5bed2fff577852f5428748054a0221a76383d332f7889c
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.13
1
+ 0.1.14
@@ -6,16 +6,21 @@ module DbClustering
6
6
 
7
7
  attr_accessor :datasource, :clusters
8
8
 
9
- def initialize(datasource:, distance_metric:)
9
+ def initialize(datasource:, distance_metric:, debug: false)
10
10
  @datasource = datasource
11
11
  @distance_metric = distance_metric
12
12
  @clusters = []
13
+ @debug = debug
13
14
  end
14
15
 
15
- def cluster(max_distance:, min_neighbors:, debug: false)
16
+ def cluster(max_distance:, min_neighbors:)
16
17
  @clusters = []
17
18
  cluster = nil
18
19
 
20
+ if @debug
21
+ last_printed_progress = 0.0
22
+ end
23
+
19
24
  @datasource.iterate_all_points do |point, current_index, points_count|
20
25
  neighbors = @datasource.neighbors(point: point, distance_metric: @distance_metric, max_distance: max_distance)
21
26
 
@@ -24,22 +29,43 @@ module DbClustering
24
29
  elsif point.cluster.nil?
25
30
  cluster = DbClustering::Models::Cluster.new
26
31
  @clusters << cluster
32
+ cluster.add(point)
33
+
34
+ expand_cluster(point, neighbors, max_distance)
35
+ end
27
36
 
28
- neighbors.each do |neighbor|
29
- if neighbor.cluster.nil?
30
- cluster.add(neighbor)
37
+ yield(point, current_index, points_count) if block_given?
38
+
39
+ if @debug
40
+ point_type_string = point.is_edge_point? ? 'E' : point.is_core_point? ? 'C' : 'N'
41
+
42
+ if point.cluster
43
+ print "(#{point_type_string}|#{point.cluster.id}|#{point.cluster.points.count})"
44
+ else
45
+ print "(#{point_type_string}|nil|0)"
46
+ end
31
47
 
32
- if debug
33
- print "+"
34
- end
48
+ progress = (current_index + 1) * 100 / points_count.to_f
35
49
 
36
- # add the neighbors of the neighbor to the neighbors to fully expand the cluster
37
- neighbors |= @datasource.neighbors(point: neighbor, distance_metric: @distance_metric, max_distance: max_distance)
38
- end
50
+ if progress > last_printed_progress + 1
51
+ print "[#{progress.to_i}%]"
52
+ last_printed_progress = progress
39
53
  end
40
54
  end
55
+ end
56
+ end
41
57
 
42
- yield(point, current_index, points_count) if block_given?
58
+ def expand_cluster(point, neighbors, max_distance)
59
+ neighbors.each do |neighbor|
60
+ if neighbor.cluster.nil?
61
+ point.cluster.add(neighbor)
62
+
63
+ if @debug
64
+ print "+"
65
+ end
66
+
67
+ neighbors |= @datasource.neighbors(point: point, distance_metric: @distance_metric, max_distance: max_distance)
68
+ end
43
69
  end
44
70
  end
45
71
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: db_clustering
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.13
4
+ version: 0.1.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cihat Gündüz