data_mining 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/data_mining/dbscan.rb +1 -10
- data/lib/data_mining/euclidean_distance.rb +9 -0
- data/lib/data_mining/knn.rb +35 -0
- data/lib/data_mining/page_rank.rb +3 -1
- data/lib/data_mining/point.rb +1 -1
- data/lib/data_mining.rb +2 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4803a5f289ea44fdd11459cd36955cbfa0420a16
|
4
|
+
data.tar.gz: 6390a930c494f7eb600db10168fb124f8ce7a339
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8fae4b4508398f56fd95956c3a6bd17bb029c363276bf6873a108ec7d60af62175975f839e5d3038dc32fb31afc04c2e328ceaa24fe6affcb8db044b5f85ee07
|
7
|
+
data.tar.gz: 846a24bc8ab88f0939c12a2545acd0bd4082efc9417b683692e399b7fc535b2ce58728b0a67413d513fa630a24f8f4fc320f53d6e1edc27fbd1edc1824e555bd
|
data/lib/data_mining/dbscan.rb
CHANGED
@@ -87,17 +87,8 @@ module DataMining
|
|
87
87
|
return false if p1.value.length != p2.value.length
|
88
88
|
(p1.value + p2.value).all? { |x| x.is_a? Numeric }
|
89
89
|
end
|
90
|
-
|
91
|
-
def euclidean_distance(p1, p2)
|
92
|
-
p1 = p1.value
|
93
|
-
p2 = p2.value
|
94
|
-
Math.sqrt(
|
95
|
-
p1.each_with_index.inject(0) do |sum, (v, i)|
|
96
|
-
sum + ((v - p2[i])**2)
|
97
|
-
end
|
98
|
-
)
|
99
|
-
end
|
100
90
|
end
|
101
91
|
end
|
102
92
|
|
103
93
|
require 'data_mining/point'
|
94
|
+
require 'data_mining/euclidean_distance'
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module DataMining
|
2
|
+
# k-Nearest Neighbor Classifier
|
3
|
+
class KNearestNeighbor
|
4
|
+
# Find nearest neighbors class
|
5
|
+
#
|
6
|
+
# Arguments:
|
7
|
+
# training_data: (array of arrays, like [[:c1, value], [:c2, value] .. ])
|
8
|
+
# k: (integer)
|
9
|
+
def initialize(training_data, k)
|
10
|
+
@data = training_data.map { |i, v| DataMining::Point.new(i, v) }
|
11
|
+
@k = k
|
12
|
+
end
|
13
|
+
|
14
|
+
def classify(point)
|
15
|
+
count_classes(k_nearest_points(point)).max_by { |_, v| v }.first
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def count_classes(points)
|
21
|
+
points.each_with_object(Hash.new(0)) do |p, o|
|
22
|
+
o[p.id] += 1
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def k_nearest_points(point)
|
27
|
+
@data.sort_by do |item|
|
28
|
+
euclidean_distance(item, DataMining::Point.new(point[0], point[1]))
|
29
|
+
end.take(@k)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
require 'data_mining/point'
|
35
|
+
require 'data_mining/euclidean_distance'
|
@@ -39,8 +39,9 @@ module DataMining
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def next_state
|
42
|
+
current_term = term
|
42
43
|
@graph.each_with_object({}) do |(node, _), ranks|
|
43
|
-
ranks[node] =
|
44
|
+
ranks[node] = current_term + @damper * sum_incoming_scores(node)
|
44
45
|
end
|
45
46
|
end
|
46
47
|
|
@@ -53,6 +54,7 @@ module DataMining
|
|
53
54
|
end
|
54
55
|
|
55
56
|
def pagerank_of_sinknodes
|
57
|
+
return 0 if @sinknodes.empty?
|
56
58
|
@ranks.select { |k, _| @sinknodes.include?(k) }.values.inject(:+).to_f
|
57
59
|
end
|
58
60
|
end
|
data/lib/data_mining/point.rb
CHANGED
data/lib/data_mining.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_mining
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Stuefer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-07-
|
11
|
+
date: 2015-07-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: minitest
|
@@ -61,6 +61,8 @@ files:
|
|
61
61
|
- lib/data_mining.rb
|
62
62
|
- lib/data_mining/apriori.rb
|
63
63
|
- lib/data_mining/dbscan.rb
|
64
|
+
- lib/data_mining/euclidean_distance.rb
|
65
|
+
- lib/data_mining/knn.rb
|
64
66
|
- lib/data_mining/page_rank.rb
|
65
67
|
- lib/data_mining/point.rb
|
66
68
|
homepage: https://github.com/mstuefer/data_mining
|