kmeans-clusterer 0.11.1 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kmeans-clusterer.rb +11 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5074292fdb3b22c58c295514f9f80c255bd9c38b
|
4
|
+
data.tar.gz: a2ed2f4c4d288755d551444ac279e57762e14dd3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 896ab8eae01c177a4c70550ee73ce22fd57d044c03e099d1148cf017adf1b9be1d9663571a9dcfc23b744776d0d4689e36e5967a3a4fd794a34d08b8026c5e84
|
7
|
+
data.tar.gz: d19418742a21b6d7302d2be7cca6701553c82f3cd2b5cfc6e4289948b2c4f32cc1f05e7861af88f3014538b0faf1f7430e39482732c23b8a0ff9924144a74e65
|
data/lib/kmeans-clusterer.rb
CHANGED
@@ -255,13 +255,15 @@ class KMeansClusterer
|
|
255
255
|
def silhouette
|
256
256
|
return 1.0 if @k < 2
|
257
257
|
|
258
|
+
# calculate all point-to-point distances at once
|
259
|
+
# uses more memory, but much faster
|
260
|
+
point_distances = Distance.euclidean @data, @data
|
261
|
+
|
258
262
|
scores = @points.map do |point|
|
259
263
|
sort_index = point.centroid_distances.sort_index
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
a = dissimilarity(c1_points, point.data)
|
264
|
-
b = dissimilarity(c2_points, point.data)
|
264
|
+
c1, c2 = sort_index[0], sort_index[1]
|
265
|
+
a = dissimilarity point.id, c1, point_distances
|
266
|
+
b = dissimilarity point.id, c2, point_distances
|
265
267
|
(b - a) / [a,b].max
|
266
268
|
end
|
267
269
|
|
@@ -274,9 +276,10 @@ class KMeansClusterer
|
|
274
276
|
|
275
277
|
private
|
276
278
|
|
277
|
-
def dissimilarity
|
278
|
-
|
279
|
-
|
279
|
+
def dissimilarity point_id, cluster_id, point_distances
|
280
|
+
cluster_point_ids = @cluster_assigns.eq(cluster_id).where
|
281
|
+
cluster_point_distances = point_distances[cluster_point_ids, point_id]
|
282
|
+
cluster_point_distances.sum / cluster_point_distances.length
|
280
283
|
end
|
281
284
|
|
282
285
|
def init_centroids
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kmeans-clusterer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Geoff Buesing
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: narray
|