data_mining 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8bd3fddec5ee391005c68d0b98688a15ad0049c6
4
- data.tar.gz: f0dc879bab93ae64bbf42bb139ca5feb2bab0588
3
+ metadata.gz: 4803a5f289ea44fdd11459cd36955cbfa0420a16
4
+ data.tar.gz: 6390a930c494f7eb600db10168fb124f8ce7a339
5
5
  SHA512:
6
- metadata.gz: d77e59e8ead5dfd7f171ef7a3f47dc9749579f06896200843c7a1d382169910b8fd303f6ae37d07b24fe25515ac2a5fa2b11d734f58bc9216c5f09d7d66983d2
7
- data.tar.gz: 27c676e73655384b28d50f4247180dfcfc0c287cf1caf873cfbd014af70e0fcce346e4ea84fc76a48756b7ea96593e14350e8578c388f8e09248b04567e4feee
6
+ metadata.gz: 8fae4b4508398f56fd95956c3a6bd17bb029c363276bf6873a108ec7d60af62175975f839e5d3038dc32fb31afc04c2e328ceaa24fe6affcb8db044b5f85ee07
7
+ data.tar.gz: 846a24bc8ab88f0939c12a2545acd0bd4082efc9417b683692e399b7fc535b2ce58728b0a67413d513fa630a24f8f4fc320f53d6e1edc27fbd1edc1824e555bd
@@ -87,17 +87,8 @@ module DataMining
87
87
  return false if p1.value.length != p2.value.length
88
88
  (p1.value + p2.value).all? { |x| x.is_a? Numeric }
89
89
  end
90
-
91
- def euclidean_distance(p1, p2)
92
- p1 = p1.value
93
- p2 = p2.value
94
- Math.sqrt(
95
- p1.each_with_index.inject(0) do |sum, (v, i)|
96
- sum + ((v - p2[i])**2)
97
- end
98
- )
99
- end
100
90
  end
101
91
  end
102
92
 
103
93
  require 'data_mining/point'
94
+ require 'data_mining/euclidean_distance'
@@ -0,0 +1,9 @@
1
+ def euclidean_distance(p1, p2)
2
+ p1 = p1.value
3
+ p2 = p2.value
4
+ Math.sqrt(
5
+ p1.each_with_index.inject(0) do |sum, (v, i)|
6
+ sum + ((v - p2[i])**2)
7
+ end
8
+ )
9
+ end
@@ -0,0 +1,35 @@
1
+ module DataMining
2
+ # k-Nearest Neighbor Classifier
3
+ class KNearestNeighbor
4
+ # Find nearest neighbors class
5
+ #
6
+ # Arguments:
7
+ # training_data: (array of arrays, like [[:c1, value], [:c2, value] .. ])
8
+ # k: (integer)
9
+ def initialize(training_data, k)
10
+ @data = training_data.map { |i, v| DataMining::Point.new(i, v) }
11
+ @k = k
12
+ end
13
+
14
+ def classify(point)
15
+ count_classes(k_nearest_points(point)).max_by { |_, v| v }.first
16
+ end
17
+
18
+ private
19
+
20
+ def count_classes(points)
21
+ points.each_with_object(Hash.new(0)) do |p, o|
22
+ o[p.id] += 1
23
+ end
24
+ end
25
+
26
+ def k_nearest_points(point)
27
+ @data.sort_by do |item|
28
+ euclidean_distance(item, DataMining::Point.new(point[0], point[1]))
29
+ end.take(@k)
30
+ end
31
+ end
32
+ end
33
+
34
+ require 'data_mining/point'
35
+ require 'data_mining/euclidean_distance'
@@ -39,8 +39,9 @@ module DataMining
39
39
  end
40
40
 
41
41
  def next_state
42
+ current_term = term
42
43
  @graph.each_with_object({}) do |(node, _), ranks|
43
- ranks[node] = term + @damper * sum_incoming_scores(node)
44
+ ranks[node] = current_term + @damper * sum_incoming_scores(node)
44
45
  end
45
46
  end
46
47
 
@@ -53,6 +54,7 @@ module DataMining
53
54
  end
54
55
 
55
56
  def pagerank_of_sinknodes
57
+ return 0 if @sinknodes.empty?
56
58
  @ranks.select { |k, _| @sinknodes.include?(k) }.values.inject(:+).to_f
57
59
  end
58
60
  end
@@ -3,7 +3,7 @@ module DataMining
3
3
  class Point
4
4
  attr_reader :id, :value
5
5
 
6
- # Represents a Point for the DBScan Algorithm
6
+ # Represents a Point
7
7
  #
8
8
  # Arguments:
9
9
  # id: (symbol)
data/lib/data_mining.rb CHANGED
@@ -1,6 +1,8 @@
1
+ # A little collection of several Data-Mining-Algorithms
1
2
  module DataMining
2
3
  end
3
4
 
4
5
  require 'data_mining/dbscan'
5
6
  require 'data_mining/apriori'
6
7
  require 'data_mining/page_rank'
8
+ require 'data_mining/knn'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_mining
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Stuefer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-15 00:00:00.000000000 Z
11
+ date: 2015-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: minitest
@@ -61,6 +61,8 @@ files:
61
61
  - lib/data_mining.rb
62
62
  - lib/data_mining/apriori.rb
63
63
  - lib/data_mining/dbscan.rb
64
+ - lib/data_mining/euclidean_distance.rb
65
+ - lib/data_mining/knn.rb
64
66
  - lib/data_mining/page_rank.rb
65
67
  - lib/data_mining/point.rb
66
68
  homepage: https://github.com/mstuefer/data_mining