rgraphum 0.0.1.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +26 -0
- data/GLOSSARIES.md +108 -0
- data/GREMLIN.md +1398 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +136 -0
- data/Rakefile +16 -0
- data/bin/.irbrc +41 -0
- data/bin/rgraphum_console +61 -0
- data/bin/rgraphum_runner +57 -0
- data/examples/ba_model/make.rb +19 -0
- data/examples/ba_model/make_dummy_twitter_rt_data.rb +0 -0
- data/examples/basic/check_modularity.rb +27 -0
- data/examples/basic/make_graph.rb +12 -0
- data/examples/parser/dot.rb +28 -0
- data/examples/sis_model/lifegame.rb +161 -0
- data/graph_struct.jpg +0 -0
- data/lib/rgraphum/analyzer/linear_regression.rb +31 -0
- data/lib/rgraphum/analyzer/meme_tracker.rb +296 -0
- data/lib/rgraphum/analyzer/twitter/rt_at_mark.rb +45 -0
- data/lib/rgraphum/analyzer.rb +8 -0
- data/lib/rgraphum/cluster.rb +67 -0
- data/lib/rgraphum/communities.rb +65 -0
- data/lib/rgraphum/community.rb +86 -0
- data/lib/rgraphum/cosine_similarity_matrix.rb +40 -0
- data/lib/rgraphum/edge.rb +194 -0
- data/lib/rgraphum/edges.rb +161 -0
- data/lib/rgraphum/ext/cosine_similarity_matrix.rb +79 -0
- data/lib/rgraphum/ext/linear_regression.rb +22 -0
- data/lib/rgraphum/ext/tf_idf.rb +52 -0
- data/lib/rgraphum/graph/gremlin.rb +193 -0
- data/lib/rgraphum/graph/math/clustering_coefficient.rb +53 -0
- data/lib/rgraphum/graph/math/community_detection.rb +141 -0
- data/lib/rgraphum/graph/math/degree_distribution.rb +50 -0
- data/lib/rgraphum/graph/math/dijkstra.rb +331 -0
- data/lib/rgraphum/graph/math.rb +45 -0
- data/lib/rgraphum/graph.rb +267 -0
- data/lib/rgraphum/importer.rb +97 -0
- data/lib/rgraphum/marshal.rb +26 -0
- data/lib/rgraphum/motifs.rb +8 -0
- data/lib/rgraphum/parsers/flare.rb +42 -0
- data/lib/rgraphum/parsers/gephi.rb +193 -0
- data/lib/rgraphum/parsers/graphviz.rb +78 -0
- data/lib/rgraphum/parsers/miserables.rb +54 -0
- data/lib/rgraphum/parsers.rb +32 -0
- data/lib/rgraphum/path.rb +37 -0
- data/lib/rgraphum/query.rb +130 -0
- data/lib/rgraphum/rgraphum_array.rb +159 -0
- data/lib/rgraphum/rgraphum_array_dividers.rb +43 -0
- data/lib/rgraphum/rgraphum_random.rb +5 -0
- data/lib/rgraphum/simulator/ba_model.rb +140 -0
- data/lib/rgraphum/simulator/sir_model.rb +178 -0
- data/lib/rgraphum/simulator/sis_model.rb +158 -0
- data/lib/rgraphum/simulator.rb +29 -0
- data/lib/rgraphum/statistic/power_law.rb +9 -0
- data/lib/rgraphum/t.rb +12 -0
- data/lib/rgraphum/tf_idf.rb +27 -0
- data/lib/rgraphum/version.rb +3 -0
- data/lib/rgraphum/vertex.rb +354 -0
- data/lib/rgraphum/vertices.rb +97 -0
- data/lib/rgraphum.rb +38 -0
- data/performance/add-vertices-edges.rb +20 -0
- data/performance/add-vertices.rb +12 -0
- data/performance/build-graph.rb +19 -0
- data/performance/delete-graph.rb +24 -0
- data/performance/delete-vertices.rb +25 -0
- data/performance/refer-graph.rb +23 -0
- data/rgraphum.gemspec +30 -0
- data/test/lib/rgraphum/analyzer/linear_regression_test.rb +20 -0
- data/test/lib/rgraphum/analyzer/meme_tracker_test.rb +383 -0
- data/test/lib/rgraphum/analyzer/twitter/rt_at_mark_test.rb +120 -0
- data/test/lib/rgraphum/array_test.rb +95 -0
- data/test/lib/rgraphum/bubble_test.rb +7 -0
- data/test/lib/rgraphum/communities_test.rb +53 -0
- data/test/lib/rgraphum/cosine_similarity_test.rb +18 -0
- data/test/lib/rgraphum/edge_test.rb +89 -0
- data/test/lib/rgraphum/edges_test.rb +178 -0
- data/test/lib/rgraphum/graph_builder_test.rb +64 -0
- data/test/lib/rgraphum/graph_dup_test.rb +199 -0
- data/test/lib/rgraphum/graph_plus_test.rb +80 -0
- data/test/lib/rgraphum/graph_test.rb +512 -0
- data/test/lib/rgraphum/gremlin_test.rb +145 -0
- data/test/lib/rgraphum/importers/idg_json_edges.json +20 -0
- data/test/lib/rgraphum/importers/idg_json_test.rb +207 -0
- data/test/lib/rgraphum/importers/idg_json_vertices.json +46 -0
- data/test/lib/rgraphum/math/average_distance_matrix_test.rb +142 -0
- data/test/lib/rgraphum/math/clustering_coefficient_test.rb +219 -0
- data/test/lib/rgraphum/math/community_test.rb +78 -0
- data/test/lib/rgraphum/math/degree_distribution_test.rb +40 -0
- data/test/lib/rgraphum/math/dijkstra_test.rb +146 -0
- data/test/lib/rgraphum/math/modularity_test.rb +154 -0
- data/test/lib/rgraphum/math/quick_average_distance_matrix_test.rb +84 -0
- data/test/lib/rgraphum/path_test.rb +44 -0
- data/test/lib/rgraphum/query/enumerable_test.rb +42 -0
- data/test/lib/rgraphum/query/where_operators_test.rb +75 -0
- data/test/lib/rgraphum/query/where_test.rb +59 -0
- data/test/lib/rgraphum/simulator/ba_model_test.rb +75 -0
- data/test/lib/rgraphum/simulator/sir_model_test.rb +513 -0
- data/test/lib/rgraphum/simulator/sis_model_test.rb +478 -0
- data/test/lib/rgraphum/simulator_test.rb +22 -0
- data/test/lib/rgraphum/tf_idf_test.rb +30 -0
- data/test/lib/rgraphum/vertex_test.rb +50 -0
- data/test/lib/rgraphum/vertices_test.rb +180 -0
- data/test/test_helper.rb +98 -0
- data/tmp/.gitkeep +0 -0
- metadata +254 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
# SIR Model
|
|
4
|
+
# Susceptible Infected Recovered(Removed) Model
|
|
5
|
+
#
|
|
6
|
+
class Rgraphum::Simulator::SIRModel
|
|
7
|
+
attr_reader :graph
|
|
8
|
+
attr_reader :recovery_rate, :infection_rate
|
|
9
|
+
|
|
10
|
+
# options: Options Hash
|
|
11
|
+
# graph: Graph instance
|
|
12
|
+
# sir_map: Array of symbol(:s, :i or :r)
|
|
13
|
+
# infection_rate: λ 0 <= λ <= 1
|
|
14
|
+
# recovery_rate: μ 0 <= μ <= 1
|
|
15
|
+
# periods: 0 <=
|
|
16
|
+
# t_per_period: increase t for each period
|
|
17
|
+
#
|
|
18
|
+
def initialize(options={})
|
|
19
|
+
@graph = options[:graph]
|
|
20
|
+
|
|
21
|
+
@t_map = Hash.new(0)
|
|
22
|
+
|
|
23
|
+
@sir_map = {}
|
|
24
|
+
if options[:sir_map]
|
|
25
|
+
if @graph.vertices.size != options[:sir_map].size
|
|
26
|
+
raise ArgumentError, ":sir_map should be same size with graph.vertices"
|
|
27
|
+
end
|
|
28
|
+
options[:sir_map].each_with_index do |si, index|
|
|
29
|
+
vertex = @graph.vertices[index]
|
|
30
|
+
unless [:s, :i, :r].include?(si)
|
|
31
|
+
raise ArgumentError, ":sir_map can only have :s, :i or :r"
|
|
32
|
+
end
|
|
33
|
+
@sir_map[vertex.id] = si
|
|
34
|
+
end
|
|
35
|
+
else
|
|
36
|
+
@graph.vertices.each do |vertex|
|
|
37
|
+
@sir_map[vertex.id] = :s
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
if options[:infection_rate]
|
|
42
|
+
@infection_rate = options[:infection_rate].to_f
|
|
43
|
+
if @infection_rate < 0 || 1 < @infection_rate
|
|
44
|
+
raise ArgumentError, ":infection_rate should be between 0 and 1"
|
|
45
|
+
end
|
|
46
|
+
else
|
|
47
|
+
# raise ArgumentError, ":infection_rate is required"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
if options[:recovery_rate]
|
|
51
|
+
@recovery_rate = options[:recovery_rate].to_f
|
|
52
|
+
if @recovery_rate < 0 || 1 < @recovery_rate
|
|
53
|
+
raise ArgumentError, ":recovery_rate should be between 0 and 1"
|
|
54
|
+
end
|
|
55
|
+
else
|
|
56
|
+
# raise ArgumentError, ":recovery_rate is required"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
if options[:periods]
|
|
60
|
+
@periods = options[:periods].to_i
|
|
61
|
+
if 0 > @periods
|
|
62
|
+
raise ArgumentError, ":periods should be greater than equal 0"
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
if options[:t_per_period]
|
|
67
|
+
@t_per_period = options[:t_per_period].to_f
|
|
68
|
+
if 0 > @t_per_period
|
|
69
|
+
raise ArgumentError, ":t_per_period should be greater than equal 0"
|
|
70
|
+
end
|
|
71
|
+
else
|
|
72
|
+
@t_per_period = 1
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Simurate SIR Model
|
|
77
|
+
def simulate(options={})
|
|
78
|
+
periods = options[:periods].to_i
|
|
79
|
+
yield 0, self if block_given?
|
|
80
|
+
periods.times do |n|
|
|
81
|
+
next_period
|
|
82
|
+
yield n+1, self if block_given?
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def vertices
|
|
87
|
+
@graph.vertices
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def edges
|
|
91
|
+
@graph.edges
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def next_period
|
|
95
|
+
new_sir_map = {}
|
|
96
|
+
new_t_map = {}
|
|
97
|
+
|
|
98
|
+
@graph.vertices.each do |vertex|
|
|
99
|
+
vertex_id = vertex.id
|
|
100
|
+
case @sir_map[vertex_id]
|
|
101
|
+
when :i
|
|
102
|
+
if recovered?(vertex, 1)
|
|
103
|
+
new_sir_map[vertex_id] = :r
|
|
104
|
+
new_t_map[vertex_id] = 0
|
|
105
|
+
else
|
|
106
|
+
new_sir_map[vertex_id] = :i
|
|
107
|
+
new_t_map[vertex_id] = @t_map[vertex_id] + @t_per_period
|
|
108
|
+
end
|
|
109
|
+
when :s
|
|
110
|
+
if infected?(vertex, 1)
|
|
111
|
+
new_sir_map[vertex_id] = :i
|
|
112
|
+
new_t_map[vertex_id] = 0
|
|
113
|
+
else
|
|
114
|
+
new_sir_map[vertex_id] = :s
|
|
115
|
+
new_t_map[vertex_id] = @t_map[vertex_id] + @t_per_period
|
|
116
|
+
end
|
|
117
|
+
when :r
|
|
118
|
+
new_sir_map[vertex_id] = :r
|
|
119
|
+
new_t_map[vertex_id] = 0
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
@sir_map = new_sir_map
|
|
124
|
+
@t_map = new_t_map
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def infected?(vertex, periods=0)
|
|
128
|
+
return false if @sir_map[vertex.id] == :r
|
|
129
|
+
return true if @sir_map[vertex.id] == :i
|
|
130
|
+
return false if periods.zero?
|
|
131
|
+
|
|
132
|
+
t = @t_map[vertex.id] + @t_per_period * periods
|
|
133
|
+
|
|
134
|
+
num_infected = vertex.both.inject(0) { |num_infected, v|
|
|
135
|
+
num_infected + (@sir_map[v.id] == :i ? 1 : 0)
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if 1 <= num_infected * infection_rate * t
|
|
139
|
+
true
|
|
140
|
+
else
|
|
141
|
+
false
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# def susceptible?(vertex, periods=0)
|
|
146
|
+
# return true if @sir_map[vertex.id] == :s
|
|
147
|
+
# return false if @sir_map[vertex.id] == :r
|
|
148
|
+
# return false if periods.zero?
|
|
149
|
+
|
|
150
|
+
# t = @t_map[vertex.id] + @t_per_period * periods
|
|
151
|
+
|
|
152
|
+
# if 1 <= recovery_rate * t
|
|
153
|
+
# true
|
|
154
|
+
# else
|
|
155
|
+
# false
|
|
156
|
+
# end
|
|
157
|
+
# end
|
|
158
|
+
|
|
159
|
+
def recovered?(vertex, periods=0)
|
|
160
|
+
return true if @sir_map[vertex.id] == :r
|
|
161
|
+
return false if @sir_map[vertex.id] == :s
|
|
162
|
+
return false if periods.zero?
|
|
163
|
+
|
|
164
|
+
t = @t_map[vertex.id] + @t_per_period * periods
|
|
165
|
+
|
|
166
|
+
if 1 <= recovery_rate * t
|
|
167
|
+
true
|
|
168
|
+
else
|
|
169
|
+
false
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
alias :removed? :recovered?
|
|
173
|
+
|
|
174
|
+
# SIR(:s, :i or :r) array, order is the same as vertices
|
|
175
|
+
def sir_map
|
|
176
|
+
@sir_map.values
|
|
177
|
+
end
|
|
178
|
+
end
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
# SIS Model
|
|
4
|
+
# Susceptible Infected Susceptible Model
|
|
5
|
+
#
|
|
6
|
+
class Rgraphum::Simulator::SISModel
|
|
7
|
+
attr_reader :graph
|
|
8
|
+
attr_reader :recovery_rate, :infection_rate
|
|
9
|
+
|
|
10
|
+
# options: Options Hash
|
|
11
|
+
# graph: Graph instance
|
|
12
|
+
# si_map: Array of symbol(:s or :i)
|
|
13
|
+
# infection_rate: λ 0 <= λ <= 1
|
|
14
|
+
# recovery_rate: μ 0 <= μ <= 1
|
|
15
|
+
# periods: 0 <=
|
|
16
|
+
# t_per_period: increase t for each period
|
|
17
|
+
#
|
|
18
|
+
def initialize(options={})
|
|
19
|
+
@graph = options[:graph]
|
|
20
|
+
|
|
21
|
+
@t_map = Hash.new(0)
|
|
22
|
+
|
|
23
|
+
@si_map = {}
|
|
24
|
+
if options[:si_map]
|
|
25
|
+
if @graph.vertices.size != options[:si_map].size
|
|
26
|
+
raise ArgumentError, ":si_map should be same size with graph.vertices"
|
|
27
|
+
end
|
|
28
|
+
options[:si_map].each_with_index do |si, index|
|
|
29
|
+
vertex = @graph.vertices[index]
|
|
30
|
+
unless [:s, :i].include?(si)
|
|
31
|
+
raise ArgumentError, ":si_map can only have :s or :i"
|
|
32
|
+
end
|
|
33
|
+
@si_map[vertex.id] = si
|
|
34
|
+
end
|
|
35
|
+
else
|
|
36
|
+
@graph.vertices.each do |vertex|
|
|
37
|
+
@si_map[vertex.id] = :s
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
if options[:infection_rate]
|
|
42
|
+
@infection_rate = options[:infection_rate].to_f
|
|
43
|
+
if @infection_rate < 0 || 1 < @infection_rate
|
|
44
|
+
raise ArgumentError, ":infection_rate should be between 0 and 1"
|
|
45
|
+
end
|
|
46
|
+
else
|
|
47
|
+
# raise ArgumentError, ":infection_rate is required"
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
if options[:recovery_rate]
|
|
51
|
+
@recovery_rate = options[:recovery_rate].to_f
|
|
52
|
+
if @recovery_rate < 0 || 1 < @recovery_rate
|
|
53
|
+
raise ArgumentError, ":recovery_rate should be between 0 and 1"
|
|
54
|
+
end
|
|
55
|
+
else
|
|
56
|
+
# raise ArgumentError, ":recovery_rate is required"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
if options[:periods]
|
|
60
|
+
@periods = options[:periods].to_i
|
|
61
|
+
if 0 > @periods
|
|
62
|
+
raise ArgumentError, ":periods should be greater than equal 0"
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
if options[:t_per_period]
|
|
67
|
+
@t_per_period = options[:t_per_period].to_f
|
|
68
|
+
if 0 > @t_per_period
|
|
69
|
+
raise ArgumentError, ":t_per_period should be greater than equal 0"
|
|
70
|
+
end
|
|
71
|
+
else
|
|
72
|
+
@t_per_period = 1
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Simurate SIS Model
|
|
77
|
+
def simulate(options={})
|
|
78
|
+
periods = options[:periods].to_i
|
|
79
|
+
yield 0, self if block_given?
|
|
80
|
+
periods.times do |n|
|
|
81
|
+
next_period
|
|
82
|
+
yield n+1, self if block_given?
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def vertices
|
|
87
|
+
@graph.vertices
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def edges
|
|
91
|
+
@graph.edges
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def next_period
|
|
95
|
+
new_si_map = {}
|
|
96
|
+
new_t_map = {}
|
|
97
|
+
|
|
98
|
+
@graph.vertices.each do |vertex|
|
|
99
|
+
vertex_id = vertex.id
|
|
100
|
+
case @si_map[vertex_id]
|
|
101
|
+
when :i
|
|
102
|
+
if susceptible?(vertex, 1)
|
|
103
|
+
new_si_map[vertex_id] = :s
|
|
104
|
+
new_t_map[vertex_id] = 0
|
|
105
|
+
else
|
|
106
|
+
new_si_map[vertex_id] = :i
|
|
107
|
+
new_t_map[vertex_id] = @t_map[vertex_id] + @t_per_period
|
|
108
|
+
end
|
|
109
|
+
when :s
|
|
110
|
+
if infected?(vertex, 1)
|
|
111
|
+
new_si_map[vertex_id] = :i
|
|
112
|
+
new_t_map[vertex_id] = 0
|
|
113
|
+
else
|
|
114
|
+
new_si_map[vertex_id] = :s
|
|
115
|
+
new_t_map[vertex_id] = @t_map[vertex_id] + @t_per_period
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
@si_map = new_si_map
|
|
121
|
+
@t_map = new_t_map
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def infected?(vertex, periods=0)
|
|
125
|
+
return true if @si_map[vertex.id] == :i
|
|
126
|
+
return false if periods.zero?
|
|
127
|
+
|
|
128
|
+
t = @t_map[vertex.id] + @t_per_period * periods
|
|
129
|
+
|
|
130
|
+
num_infected = vertex.both.inject(0) { |num_infected, v|
|
|
131
|
+
num_infected + (@si_map[v.id] == :i ? 1 : 0)
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if 1 <= num_infected * infection_rate * t
|
|
135
|
+
true
|
|
136
|
+
else
|
|
137
|
+
false
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def susceptible?(vertex, periods=0)
|
|
142
|
+
return true if @si_map[vertex.id] == :s
|
|
143
|
+
return false if periods.zero?
|
|
144
|
+
|
|
145
|
+
t = @t_map[vertex.id] + @t_per_period * periods
|
|
146
|
+
|
|
147
|
+
if 1 <= recovery_rate * t
|
|
148
|
+
true
|
|
149
|
+
else
|
|
150
|
+
false
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# SI(:s or :i) array, order is the same as vertices
|
|
155
|
+
def si_map
|
|
156
|
+
@si_map.values
|
|
157
|
+
end
|
|
158
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
module Rgraphum::Simulator
|
|
4
|
+
require_relative 'simulator/ba_model'
|
|
5
|
+
require_relative 'simulator/sis_model'
|
|
6
|
+
require_relative 'simulator/sir_model'
|
|
7
|
+
|
|
8
|
+
MODEL_NAME_MODEL_MAP = {
|
|
9
|
+
"bamodel" => Rgraphum::Simulator::BAModel,
|
|
10
|
+
"sismodel" => Rgraphum::Simulator::SISModel,
|
|
11
|
+
"sirmodel" => Rgraphum::Simulator::SIRModel,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
def simulate(model_name, options={})
|
|
15
|
+
model_class = guess_model_class(model_name)
|
|
16
|
+
new_options = options.merge(graph: self)
|
|
17
|
+
simulator = model_class.new(new_options)
|
|
18
|
+
simulator.simulate(options)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
private
|
|
22
|
+
|
|
23
|
+
def guess_model_class(model_name)
|
|
24
|
+
name = model_name.dup.downcase.gsub(/[^a-z0-9]/, "")
|
|
25
|
+
model = MODEL_NAME_MODEL_MAP[name]
|
|
26
|
+
return model if model
|
|
27
|
+
raise ArgumentError, "Simulator model not found: '#{model_name}'"
|
|
28
|
+
end
|
|
29
|
+
end
|
data/lib/rgraphum/t.rb
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
#require 'matrix'
|
|
4
|
+
#require 'inline'
|
|
5
|
+
|
|
6
|
+
class TfIdf
|
|
7
|
+
def tf_idf(matrix)
|
|
8
|
+
row_size = matrix.size.to_f
|
|
9
|
+
|
|
10
|
+
idf = matrix.transpose.map do |col_array|
|
|
11
|
+
df = col_array.select{|n| n > 0 }.size.to_f
|
|
12
|
+
Math.log( row_size / df )
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
matrix.map do |row_array|
|
|
16
|
+
row_sum = row_array.inject(&:+).to_f
|
|
17
|
+
|
|
18
|
+
tmp = []
|
|
19
|
+
row_array.each_with_index do |n,i|
|
|
20
|
+
tmp << ( n / row_sum ) * idf[i]
|
|
21
|
+
end
|
|
22
|
+
tmp
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|