kmeans-clusterer 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kmeans-clusterer.rb +21 -21
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 18cbb4fb26dfd4c1648b73de2f6dafed580dbcd0
4
- data.tar.gz: 1b163ae96a584ab74117f00d589f56e33ccaee96
3
+ metadata.gz: cf47337db7688eba2c31a5117e2f8c071415759a
4
+ data.tar.gz: 844c11b63adf3d1e5a554bebcdbde8f251c45d96
5
5
  SHA512:
6
- metadata.gz: 48ebbf8b36bdfaa27900f1f2fcfdf2475bea0e2a0211f55cbbd2517906724bc16bd5952f25cd62b13fa5eb3ba9dfe2c3dd561102a8a1e3ceeae60070cfddebf6
7
- data.tar.gz: ce40602935d7f9ad81ddad2cabfebb87a30c06a8a05df4f7c303910761463724bab887118c3f5671177f5c1c453c1401ca0806675cd12af6aa6be9222c38acc0
6
+ metadata.gz: 78750f92bd336135d5118c061dea3e6d56cce8500642f18aa8b4dd0c5dd2953eda2c47691348685ff591d5c33b795aff93c52fd6869d53701cc3c46b661b9eee
7
+ data.tar.gz: 70470a45c59ad789a08f3f73a189de3958e6202b4c4fc5459eae967d2cd3ea4653ac20fc1dc2f343fe795829307f845680975fbaeeb35e5b62335ffc3006fd76
@@ -1,6 +1,8 @@
1
1
  require 'narray'
2
2
 
3
3
  class KMeansClusterer
4
+ TYPECODE = { double: NArray::DFLOAT, single: NArray::SFLOAT }
5
+
4
6
  module Scaler
5
7
  def self.mean data
6
8
  data.mean(1)
@@ -12,8 +14,8 @@ class KMeansClusterer
12
14
  std
13
15
  end
14
16
 
15
- def self.scale data, mean = nil, std = nil
16
- data = NArray.cast(data, NArray::DFLOAT)
17
+ def self.scale data, mean = nil, std = nil, typecode = nil
18
+ data = NArray.cast(data, typecode)
17
19
  mean ||= self.mean(data)
18
20
  std ||= self.std(data)
19
21
  data = (data - mean) / std
@@ -64,27 +66,24 @@ class KMeansClusterer
64
66
  point.cluster = self
65
67
  @points << point
66
68
  end
67
-
68
- def points_narray
69
- NArray.cast @points.map(&:data)
70
- end
71
69
  end
72
70
 
73
71
 
74
- DEFAULT_OPTS = { scale_data: false, runs: 10, log: false, init: :kmpp}
72
+ DEFAULT_OPTS = { scale_data: false, runs: 10, log: false, init: :kmpp, float_precision: :double }
75
73
 
76
74
  def self.run k, data, opts = {}
77
75
  opts = DEFAULT_OPTS.merge(opts)
78
76
 
79
77
  opts[:k] = k
78
+ opts[:typecode] = TYPECODE[opts[:float_precision]]
80
79
 
81
80
  if opts[:scale_data]
82
- data, mean, std = Scaler.scale(data)
81
+ data, mean, std = Scaler.scale(data, nil, nil, opts[:typecode])
83
82
  opts[:mean] = mean
84
83
  opts[:std] = std
85
84
  end
86
85
 
87
- opts[:points_matrix] = NMatrix.cast(data, NArray::DFLOAT)
86
+ opts[:points_matrix] = NMatrix.cast(data, opts[:typecode])
88
87
  opts[:row_norms] = opts[:points_matrix].map {|v| v**2}.sum(0)
89
88
 
90
89
  bestrun = nil
@@ -119,6 +118,7 @@ class KMeansClusterer
119
118
  @mean = opts[:mean]
120
119
  @std = opts[:std]
121
120
  @scale_data = opts[:scale_data]
121
+ @typecode = opts[:typecode]
122
122
 
123
123
  init_centroids
124
124
  end
@@ -144,7 +144,7 @@ class KMeansClusterer
144
144
  updated_centroids = []
145
145
 
146
146
  @k.times do |i|
147
- centroid = NArray.cast(@centroids[true, i].flatten)
147
+ centroid = NArray.cast(@centroids[true, i].flatten, @typecode)
148
148
  point_ids = @cluster_point_ids[i]
149
149
 
150
150
  if point_ids.empty?
@@ -159,7 +159,7 @@ class KMeansClusterer
159
159
  updated_centroids << newcenter
160
160
  end
161
161
 
162
- @centroids = NMatrix.cast updated_centroids
162
+ @centroids = NMatrix.cast updated_centroids, @typecode
163
163
 
164
164
  break if moves.max < 0.001 # i.e., no movement
165
165
  break if @iterations >= 300
@@ -179,8 +179,8 @@ class KMeansClusterer
179
179
  end
180
180
 
181
181
  def predict data
182
- data, _m, _s = Scaler.scale(data, @mean, @std) if @scale_data
183
- data = NMatrix.cast(data, NArray::DFLOAT)
182
+ data, _m, _s = Scaler.scale(data, @mean, @std, @typecode) if @scale_data
183
+ data = NMatrix.cast(data, @typecode)
184
184
  distances = distance(@centroids, data, nil)
185
185
  data.shape[1].times.map do |i|
186
186
  distances[i, true].sort_index[0] # index of closest cluster
@@ -223,7 +223,7 @@ class KMeansClusterer
223
223
  private
224
224
  def wrap_point point
225
225
  return point if point.is_a?(Point)
226
- Point.new(0, NArray.to_na(point).to_f)
226
+ Point.new(0, NArray.cast(point, @typecode))
227
227
  end
228
228
 
229
229
  def dissimilarity points, point
@@ -259,7 +259,7 @@ class KMeansClusterer
259
259
  d2 << min_distance**2
260
260
  end
261
261
 
262
- d2 = NArray.to_na d2
262
+ d2 = NArray.cast(d2, @typecode)
263
263
  probs = d2 / d2.sum
264
264
  cumprobs = probs.cumsum
265
265
  r = rand
@@ -271,7 +271,7 @@ class KMeansClusterer
271
271
  end
272
272
 
273
273
  def custom_centroid_init
274
- @centroids = NMatrix.cast @init
274
+ @centroids = NMatrix.cast @init, @typecode
275
275
  @k = @init.length
276
276
  end
277
277
 
@@ -289,14 +289,14 @@ class KMeansClusterer
289
289
 
290
290
  def set_points
291
291
  @points = @points_count.times.map do |i|
292
- data = NArray.cast @points_matrix[true, i].flatten
292
+ data = NArray.cast @points_matrix[true, i].flatten, @typecode
293
293
  Point.new(i, data, @labels[i])
294
294
  end
295
295
  end
296
296
 
297
297
  def set_clusters
298
298
  @clusters = @k.times.map do |i|
299
- centroid = NArray.cast @centroids[true, i].flatten
299
+ centroid = NArray.cast @centroids[true, i].flatten, @typecode
300
300
  c = Cluster.new i, Point.new(-i, centroid)
301
301
  @cluster_point_ids[i].each do |p|
302
302
  c << @points[p]
@@ -322,17 +322,17 @@ class KMeansClusterer
322
322
  end
323
323
 
324
324
  def get_point i
325
- NArray.cast @points_matrix[true, i].flatten
325
+ NArray.cast @points_matrix[true, i].flatten, @typecode
326
326
  end
327
327
 
328
328
  def get_centroid i
329
- NArray.cast(@centroids[true, i].flatten)
329
+ NArray.cast(@centroids[true, i].flatten, @typecode)
330
330
  end
331
331
 
332
332
  def get_points_for_centroid i
333
333
  point_ids = @cluster_point_ids[i]
334
334
  points = @points_matrix[true, point_ids]
335
- points.empty? ? NArray.dfloat(0) : NArray.cast(points)
335
+ points.empty? ? NArray.sfloat(0) : NArray.cast(points, @typecode)
336
336
  end
337
337
 
338
338
  def distance x, y, yy = @row_norms
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kmeans-clusterer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Geoff Buesing