data_mining 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8bd3fddec5ee391005c68d0b98688a15ad0049c6
4
- data.tar.gz: f0dc879bab93ae64bbf42bb139ca5feb2bab0588
3
+ metadata.gz: 4803a5f289ea44fdd11459cd36955cbfa0420a16
4
+ data.tar.gz: 6390a930c494f7eb600db10168fb124f8ce7a339
5
5
  SHA512:
6
- metadata.gz: d77e59e8ead5dfd7f171ef7a3f47dc9749579f06896200843c7a1d382169910b8fd303f6ae37d07b24fe25515ac2a5fa2b11d734f58bc9216c5f09d7d66983d2
7
- data.tar.gz: 27c676e73655384b28d50f4247180dfcfc0c287cf1caf873cfbd014af70e0fcce346e4ea84fc76a48756b7ea96593e14350e8578c388f8e09248b04567e4feee
6
+ metadata.gz: 8fae4b4508398f56fd95956c3a6bd17bb029c363276bf6873a108ec7d60af62175975f839e5d3038dc32fb31afc04c2e328ceaa24fe6affcb8db044b5f85ee07
7
+ data.tar.gz: 846a24bc8ab88f0939c12a2545acd0bd4082efc9417b683692e399b7fc535b2ce58728b0a67413d513fa630a24f8f4fc320f53d6e1edc27fbd1edc1824e555bd
@@ -87,17 +87,8 @@ module DataMining
87
87
  return false if p1.value.length != p2.value.length
88
88
  (p1.value + p2.value).all? { |x| x.is_a? Numeric }
89
89
  end
90
-
91
- def euclidean_distance(p1, p2)
92
- p1 = p1.value
93
- p2 = p2.value
94
- Math.sqrt(
95
- p1.each_with_index.inject(0) do |sum, (v, i)|
96
- sum + ((v - p2[i])**2)
97
- end
98
- )
99
- end
100
90
  end
101
91
  end
102
92
 
103
93
  require 'data_mining/point'
94
+ require 'data_mining/euclidean_distance'
@@ -0,0 +1,9 @@
1
+ def euclidean_distance(p1, p2)
2
+ p1 = p1.value
3
+ p2 = p2.value
4
+ Math.sqrt(
5
+ p1.each_with_index.inject(0) do |sum, (v, i)|
6
+ sum + ((v - p2[i])**2)
7
+ end
8
+ )
9
+ end
@@ -0,0 +1,35 @@
1
+ module DataMining
2
+ # k-Nearest Neighbor Classifier
3
+ class KNearestNeighbor
4
+ # Find nearest neighbors class
5
+ #
6
+ # Arguments:
7
+ # training_data: (array of arrays, like [[:c1, value], [:c2, value] .. ])
8
+ # k: (integer)
9
+ def initialize(training_data, k)
10
+ @data = training_data.map { |i, v| DataMining::Point.new(i, v) }
11
+ @k = k
12
+ end
13
+
14
+ def classify(point)
15
+ count_classes(k_nearest_points(point)).max_by { |_, v| v }.first
16
+ end
17
+
18
+ private
19
+
20
+ def count_classes(points)
21
+ points.each_with_object(Hash.new(0)) do |p, o|
22
+ o[p.id] += 1
23
+ end
24
+ end
25
+
26
+ def k_nearest_points(point)
27
+ @data.sort_by do |item|
28
+ euclidean_distance(item, DataMining::Point.new(point[0], point[1]))
29
+ end.take(@k)
30
+ end
31
+ end
32
+ end
33
+
34
+ require 'data_mining/point'
35
+ require 'data_mining/euclidean_distance'
@@ -39,8 +39,9 @@ module DataMining
39
39
  end
40
40
 
41
41
  def next_state
42
+ current_term = term
42
43
  @graph.each_with_object({}) do |(node, _), ranks|
43
- ranks[node] = term + @damper * sum_incoming_scores(node)
44
+ ranks[node] = current_term + @damper * sum_incoming_scores(node)
44
45
  end
45
46
  end
46
47
 
@@ -53,6 +54,7 @@ module DataMining
53
54
  end
54
55
 
55
56
  def pagerank_of_sinknodes
57
+ return 0 if @sinknodes.empty?
56
58
  @ranks.select { |k, _| @sinknodes.include?(k) }.values.inject(:+).to_f
57
59
  end
58
60
  end
@@ -3,7 +3,7 @@ module DataMining
3
3
  class Point
4
4
  attr_reader :id, :value
5
5
 
6
- # Represents a Point for the DBScan Algorithm
6
+ # Represents a Point
7
7
  #
8
8
  # Arguments:
9
9
  # id: (symbol)
data/lib/data_mining.rb CHANGED
@@ -1,6 +1,8 @@
1
+ # A little collection of several Data-Mining-Algorithms
1
2
  module DataMining
2
3
  end
3
4
 
4
5
  require 'data_mining/dbscan'
5
6
  require 'data_mining/apriori'
6
7
  require 'data_mining/page_rank'
8
+ require 'data_mining/knn'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_mining
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Stuefer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-15 00:00:00.000000000 Z
11
+ date: 2015-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: minitest
@@ -61,6 +61,8 @@ files:
61
61
  - lib/data_mining.rb
62
62
  - lib/data_mining/apriori.rb
63
63
  - lib/data_mining/dbscan.rb
64
+ - lib/data_mining/euclidean_distance.rb
65
+ - lib/data_mining/knn.rb
64
66
  - lib/data_mining/page_rank.rb
65
67
  - lib/data_mining/point.rb
66
68
  homepage: https://github.com/mstuefer/data_mining