kmeans-clusterer 0.5.3 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kmeans-clusterer.rb +21 -21
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18cbb4fb26dfd4c1648b73de2f6dafed580dbcd0
4
- data.tar.gz: 1b163ae96a584ab74117f00d589f56e33ccaee96
3
+ metadata.gz: cf47337db7688eba2c31a5117e2f8c071415759a
4
+ data.tar.gz: 844c11b63adf3d1e5a554bebcdbde8f251c45d96
5
5
  SHA512:
6
- metadata.gz: 48ebbf8b36bdfaa27900f1f2fcfdf2475bea0e2a0211f55cbbd2517906724bc16bd5952f25cd62b13fa5eb3ba9dfe2c3dd561102a8a1e3ceeae60070cfddebf6
7
- data.tar.gz: ce40602935d7f9ad81ddad2cabfebb87a30c06a8a05df4f7c303910761463724bab887118c3f5671177f5c1c453c1401ca0806675cd12af6aa6be9222c38acc0
6
+ metadata.gz: 78750f92bd336135d5118c061dea3e6d56cce8500642f18aa8b4dd0c5dd2953eda2c47691348685ff591d5c33b795aff93c52fd6869d53701cc3c46b661b9eee
7
+ data.tar.gz: 70470a45c59ad789a08f3f73a189de3958e6202b4c4fc5459eae967d2cd3ea4653ac20fc1dc2f343fe795829307f845680975fbaeeb35e5b62335ffc3006fd76
@@ -1,6 +1,8 @@
1
1
  require 'narray'
2
2
 
3
3
  class KMeansClusterer
4
+ TYPECODE = { double: NArray::DFLOAT, single: NArray::SFLOAT }
5
+
4
6
  module Scaler
5
7
  def self.mean data
6
8
  data.mean(1)
@@ -12,8 +14,8 @@ class KMeansClusterer
12
14
  std
13
15
  end
14
16
 
15
- def self.scale data, mean = nil, std = nil
16
- data = NArray.cast(data, NArray::DFLOAT)
17
+ def self.scale data, mean = nil, std = nil, typecode = nil
18
+ data = NArray.cast(data, typecode)
17
19
  mean ||= self.mean(data)
18
20
  std ||= self.std(data)
19
21
  data = (data - mean) / std
@@ -64,27 +66,24 @@ class KMeansClusterer
64
66
  point.cluster = self
65
67
  @points << point
66
68
  end
67
-
68
- def points_narray
69
- NArray.cast @points.map(&:data)
70
- end
71
69
  end
72
70
 
73
71
 
74
- DEFAULT_OPTS = { scale_data: false, runs: 10, log: false, init: :kmpp}
72
+ DEFAULT_OPTS = { scale_data: false, runs: 10, log: false, init: :kmpp, float_precision: :double }
75
73
 
76
74
  def self.run k, data, opts = {}
77
75
  opts = DEFAULT_OPTS.merge(opts)
78
76
 
79
77
  opts[:k] = k
78
+ opts[:typecode] = TYPECODE[opts[:float_precision]]
80
79
 
81
80
  if opts[:scale_data]
82
- data, mean, std = Scaler.scale(data)
81
+ data, mean, std = Scaler.scale(data, nil, nil, opts[:typecode])
83
82
  opts[:mean] = mean
84
83
  opts[:std] = std
85
84
  end
86
85
 
87
- opts[:points_matrix] = NMatrix.cast(data, NArray::DFLOAT)
86
+ opts[:points_matrix] = NMatrix.cast(data, opts[:typecode])
88
87
  opts[:row_norms] = opts[:points_matrix].map {|v| v**2}.sum(0)
89
88
 
90
89
  bestrun = nil
@@ -119,6 +118,7 @@ class KMeansClusterer
119
118
  @mean = opts[:mean]
120
119
  @std = opts[:std]
121
120
  @scale_data = opts[:scale_data]
121
+ @typecode = opts[:typecode]
122
122
 
123
123
  init_centroids
124
124
  end
@@ -144,7 +144,7 @@ class KMeansClusterer
144
144
  updated_centroids = []
145
145
 
146
146
  @k.times do |i|
147
- centroid = NArray.cast(@centroids[true, i].flatten)
147
+ centroid = NArray.cast(@centroids[true, i].flatten, @typecode)
148
148
  point_ids = @cluster_point_ids[i]
149
149
 
150
150
  if point_ids.empty?
@@ -159,7 +159,7 @@ class KMeansClusterer
159
159
  updated_centroids << newcenter
160
160
  end
161
161
 
162
- @centroids = NMatrix.cast updated_centroids
162
+ @centroids = NMatrix.cast updated_centroids, @typecode
163
163
 
164
164
  break if moves.max < 0.001 # i.e., no movement
165
165
  break if @iterations >= 300
@@ -179,8 +179,8 @@ class KMeansClusterer
179
179
  end
180
180
 
181
181
  def predict data
182
- data, _m, _s = Scaler.scale(data, @mean, @std) if @scale_data
183
- data = NMatrix.cast(data, NArray::DFLOAT)
182
+ data, _m, _s = Scaler.scale(data, @mean, @std, @typecode) if @scale_data
183
+ data = NMatrix.cast(data, @typecode)
184
184
  distances = distance(@centroids, data, nil)
185
185
  data.shape[1].times.map do |i|
186
186
  distances[i, true].sort_index[0] # index of closest cluster
@@ -223,7 +223,7 @@ class KMeansClusterer
223
223
  private
224
224
  def wrap_point point
225
225
  return point if point.is_a?(Point)
226
- Point.new(0, NArray.to_na(point).to_f)
226
+ Point.new(0, NArray.cast(point, @typecode))
227
227
  end
228
228
 
229
229
  def dissimilarity points, point
@@ -259,7 +259,7 @@ class KMeansClusterer
259
259
  d2 << min_distance**2
260
260
  end
261
261
 
262
- d2 = NArray.to_na d2
262
+ d2 = NArray.cast(d2, @typecode)
263
263
  probs = d2 / d2.sum
264
264
  cumprobs = probs.cumsum
265
265
  r = rand
@@ -271,7 +271,7 @@ class KMeansClusterer
271
271
  end
272
272
 
273
273
  def custom_centroid_init
274
- @centroids = NMatrix.cast @init
274
+ @centroids = NMatrix.cast @init, @typecode
275
275
  @k = @init.length
276
276
  end
277
277
 
@@ -289,14 +289,14 @@ class KMeansClusterer
289
289
 
290
290
  def set_points
291
291
  @points = @points_count.times.map do |i|
292
- data = NArray.cast @points_matrix[true, i].flatten
292
+ data = NArray.cast @points_matrix[true, i].flatten, @typecode
293
293
  Point.new(i, data, @labels[i])
294
294
  end
295
295
  end
296
296
 
297
297
  def set_clusters
298
298
  @clusters = @k.times.map do |i|
299
- centroid = NArray.cast @centroids[true, i].flatten
299
+ centroid = NArray.cast @centroids[true, i].flatten, @typecode
300
300
  c = Cluster.new i, Point.new(-i, centroid)
301
301
  @cluster_point_ids[i].each do |p|
302
302
  c << @points[p]
@@ -322,17 +322,17 @@ class KMeansClusterer
322
322
  end
323
323
 
324
324
  def get_point i
325
- NArray.cast @points_matrix[true, i].flatten
325
+ NArray.cast @points_matrix[true, i].flatten, @typecode
326
326
  end
327
327
 
328
328
  def get_centroid i
329
- NArray.cast(@centroids[true, i].flatten)
329
+ NArray.cast(@centroids[true, i].flatten, @typecode)
330
330
  end
331
331
 
332
332
  def get_points_for_centroid i
333
333
  point_ids = @cluster_point_ids[i]
334
334
  points = @points_matrix[true, point_ids]
335
- points.empty? ? NArray.dfloat(0) : NArray.cast(points)
335
+ points.empty? ? NArray.sfloat(0) : NArray.cast(points, @typecode)
336
336
  end
337
337
 
338
338
  def distance x, y, yy = @row_norms
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kmeans-clusterer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Geoff Buesing