rgraphum 0.0.1.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. data/.gitignore +26 -0
  2. data/GLOSSARIES.md +108 -0
  3. data/GREMLIN.md +1398 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +136 -0
  7. data/Rakefile +16 -0
  8. data/bin/.irbrc +41 -0
  9. data/bin/rgraphum_console +61 -0
  10. data/bin/rgraphum_runner +57 -0
  11. data/examples/ba_model/make.rb +19 -0
  12. data/examples/ba_model/make_dummy_twitter_rt_data.rb +0 -0
  13. data/examples/basic/check_modularity.rb +27 -0
  14. data/examples/basic/make_graph.rb +12 -0
  15. data/examples/parser/dot.rb +28 -0
  16. data/examples/sis_model/lifegame.rb +161 -0
  17. data/graph_struct.jpg +0 -0
  18. data/lib/rgraphum/analyzer/linear_regression.rb +31 -0
  19. data/lib/rgraphum/analyzer/meme_tracker.rb +296 -0
  20. data/lib/rgraphum/analyzer/twitter/rt_at_mark.rb +45 -0
  21. data/lib/rgraphum/analyzer.rb +8 -0
  22. data/lib/rgraphum/cluster.rb +67 -0
  23. data/lib/rgraphum/communities.rb +65 -0
  24. data/lib/rgraphum/community.rb +86 -0
  25. data/lib/rgraphum/cosine_similarity_matrix.rb +40 -0
  26. data/lib/rgraphum/edge.rb +194 -0
  27. data/lib/rgraphum/edges.rb +161 -0
  28. data/lib/rgraphum/ext/cosine_similarity_matrix.rb +79 -0
  29. data/lib/rgraphum/ext/linear_regression.rb +22 -0
  30. data/lib/rgraphum/ext/tf_idf.rb +52 -0
  31. data/lib/rgraphum/graph/gremlin.rb +193 -0
  32. data/lib/rgraphum/graph/math/clustering_coefficient.rb +53 -0
  33. data/lib/rgraphum/graph/math/community_detection.rb +141 -0
  34. data/lib/rgraphum/graph/math/degree_distribution.rb +50 -0
  35. data/lib/rgraphum/graph/math/dijkstra.rb +331 -0
  36. data/lib/rgraphum/graph/math.rb +45 -0
  37. data/lib/rgraphum/graph.rb +267 -0
  38. data/lib/rgraphum/importer.rb +97 -0
  39. data/lib/rgraphum/marshal.rb +26 -0
  40. data/lib/rgraphum/motifs.rb +8 -0
  41. data/lib/rgraphum/parsers/flare.rb +42 -0
  42. data/lib/rgraphum/parsers/gephi.rb +193 -0
  43. data/lib/rgraphum/parsers/graphviz.rb +78 -0
  44. data/lib/rgraphum/parsers/miserables.rb +54 -0
  45. data/lib/rgraphum/parsers.rb +32 -0
  46. data/lib/rgraphum/path.rb +37 -0
  47. data/lib/rgraphum/query.rb +130 -0
  48. data/lib/rgraphum/rgraphum_array.rb +159 -0
  49. data/lib/rgraphum/rgraphum_array_dividers.rb +43 -0
  50. data/lib/rgraphum/rgraphum_random.rb +5 -0
  51. data/lib/rgraphum/simulator/ba_model.rb +140 -0
  52. data/lib/rgraphum/simulator/sir_model.rb +178 -0
  53. data/lib/rgraphum/simulator/sis_model.rb +158 -0
  54. data/lib/rgraphum/simulator.rb +29 -0
  55. data/lib/rgraphum/statistic/power_law.rb +9 -0
  56. data/lib/rgraphum/t.rb +12 -0
  57. data/lib/rgraphum/tf_idf.rb +27 -0
  58. data/lib/rgraphum/version.rb +3 -0
  59. data/lib/rgraphum/vertex.rb +354 -0
  60. data/lib/rgraphum/vertices.rb +97 -0
  61. data/lib/rgraphum.rb +38 -0
  62. data/performance/add-vertices-edges.rb +20 -0
  63. data/performance/add-vertices.rb +12 -0
  64. data/performance/build-graph.rb +19 -0
  65. data/performance/delete-graph.rb +24 -0
  66. data/performance/delete-vertices.rb +25 -0
  67. data/performance/refer-graph.rb +23 -0
  68. data/rgraphum.gemspec +30 -0
  69. data/test/lib/rgraphum/analyzer/linear_regression_test.rb +20 -0
  70. data/test/lib/rgraphum/analyzer/meme_tracker_test.rb +383 -0
  71. data/test/lib/rgraphum/analyzer/twitter/rt_at_mark_test.rb +120 -0
  72. data/test/lib/rgraphum/array_test.rb +95 -0
  73. data/test/lib/rgraphum/bubble_test.rb +7 -0
  74. data/test/lib/rgraphum/communities_test.rb +53 -0
  75. data/test/lib/rgraphum/cosine_similarity_test.rb +18 -0
  76. data/test/lib/rgraphum/edge_test.rb +89 -0
  77. data/test/lib/rgraphum/edges_test.rb +178 -0
  78. data/test/lib/rgraphum/graph_builder_test.rb +64 -0
  79. data/test/lib/rgraphum/graph_dup_test.rb +199 -0
  80. data/test/lib/rgraphum/graph_plus_test.rb +80 -0
  81. data/test/lib/rgraphum/graph_test.rb +512 -0
  82. data/test/lib/rgraphum/gremlin_test.rb +145 -0
  83. data/test/lib/rgraphum/importers/idg_json_edges.json +20 -0
  84. data/test/lib/rgraphum/importers/idg_json_test.rb +207 -0
  85. data/test/lib/rgraphum/importers/idg_json_vertices.json +46 -0
  86. data/test/lib/rgraphum/math/average_distance_matrix_test.rb +142 -0
  87. data/test/lib/rgraphum/math/clustering_coefficient_test.rb +219 -0
  88. data/test/lib/rgraphum/math/community_test.rb +78 -0
  89. data/test/lib/rgraphum/math/degree_distribution_test.rb +40 -0
  90. data/test/lib/rgraphum/math/dijkstra_test.rb +146 -0
  91. data/test/lib/rgraphum/math/modularity_test.rb +154 -0
  92. data/test/lib/rgraphum/math/quick_average_distance_matrix_test.rb +84 -0
  93. data/test/lib/rgraphum/path_test.rb +44 -0
  94. data/test/lib/rgraphum/query/enumerable_test.rb +42 -0
  95. data/test/lib/rgraphum/query/where_operators_test.rb +75 -0
  96. data/test/lib/rgraphum/query/where_test.rb +59 -0
  97. data/test/lib/rgraphum/simulator/ba_model_test.rb +75 -0
  98. data/test/lib/rgraphum/simulator/sir_model_test.rb +513 -0
  99. data/test/lib/rgraphum/simulator/sis_model_test.rb +478 -0
  100. data/test/lib/rgraphum/simulator_test.rb +22 -0
  101. data/test/lib/rgraphum/tf_idf_test.rb +30 -0
  102. data/test/lib/rgraphum/vertex_test.rb +50 -0
  103. data/test/lib/rgraphum/vertices_test.rb +180 -0
  104. data/test/test_helper.rb +98 -0
  105. data/tmp/.gitkeep +0 -0
  106. metadata +254 -0
@@ -0,0 +1,296 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class Rgraphum::Analyzer::MemeTracker
4
+ attr_accessor :distance_max_limit
5
+ attr_accessor :graph
6
+ attr_accessor :clusters
7
+
8
+ def initialize(graph=Rgraphum::Graph.new)
9
+ @distance_max_limit = 5
10
+ self.graph = graph
11
+ end
12
+
13
+ def edit_distance(words_a, words_b, limit=@distance_max_limit)
14
+ a = words_a.dup
15
+ b = words_b.dup
16
+
17
+ return nil if (a - b | b - a).size > (limit * 2)
18
+ d = find_shift_distance(a, b)
19
+ end
20
+
21
+ def find_shift_distance(words_a, words_b, depth=0)
22
+ return nil if depth > @distance_max_limit
23
+
24
+ return words_b.size if words_a.empty?
25
+ return words_a.size if words_b.empty?
26
+
27
+ shifted_words_a = words_a[1..-1]
28
+ shifted_words_b = words_b[1..-1]
29
+
30
+ if words_a[0] == words_b[0]
31
+ return find_shift_distance(shifted_words_a, shifted_words_b, depth)
32
+ else
33
+ depth += 1
34
+ distance = 1
35
+ distance_a = find_shift_distance(words_a, shifted_words_b, depth)
36
+ distance_b = find_shift_distance(shifted_words_a, words_b, depth)
37
+ distance_c = find_shift_distance(shifted_words_a, shifted_words_b, depth)
38
+ if delta_distance = [distance_a, distance_b, distance_c].compact.min
39
+ return distance += delta_distance
40
+ else
41
+ return nil
42
+ end
43
+ end
44
+ end
45
+
46
+ ###############################################################
47
+
48
+ def phrase_clusters
49
+ new_graph = @graph.dup
50
+ graph_start_root_vertices = start_root_vertices(new_graph)
51
+ graph_end_root_vertices = end_root_vertices(new_graph)
52
+
53
+ clusters, cluster_keys = [], []
54
+ graph_start_root_vertices.each do |graph_start_root_vertex|
55
+ cluster = build_cluster(graph_start_root_vertex)
56
+ clusters << cluster
57
+ cluster_keys << cluster.paths.map { |path| path.end_vertex }
58
+ end
59
+ cluster_keys = vertex_id_map(cluster_keys)
60
+
61
+ sets = {}
62
+ clusters.each_with_index do |end_path, i|
63
+ cluster_keys.each do |end_path_keys|
64
+ unless (end_path.paths.map { |path| path.end_vertex } & end_path_keys).empty?
65
+ sets[end_path_keys] ||= []
66
+ sets[end_path_keys] << graph_start_root_vertices[i]
67
+ break
68
+ end
69
+ end
70
+ end
71
+
72
+ sets = sets.map{ |end_path_keys, end_path_start_root_vertices|
73
+ [end_path_start_root_vertices, end_path_keys]
74
+ }
75
+
76
+ clusters = []
77
+ sets.each do |end_path_start_root_vertices, end_path_keys|
78
+ end_path_start_root_vertices_pt = end_path_start_root_vertices.permutation
79
+ end_path_keys_pt = end_path_keys.repeated_permutation(end_path_start_root_vertices.size)
80
+ communities_set = []
81
+ end_path_start_root_vertices_pt.each_with_index do |end_path_start_root_vertices_p, i|
82
+ end_path_keys_pt.each_with_index do |end_path_keys_p, j|
83
+ communities_set << make_communities(end_path_start_root_vertices_p, end_path_keys_p)
84
+ end
85
+ end
86
+
87
+ sigma_in_sizes = communities_set.map { |communities| sum_sigma_in(communities) }
88
+ max = sigma_in_sizes.max
89
+ index = sigma_in_sizes.index(max)
90
+
91
+ clusters += communities_set[index]
92
+ end
93
+ clusters
94
+ end
95
+
96
+ def vertex_id_map(cluster_keys)
97
+ return cluster_keys if cluster_keys.size < 2
98
+ id_map = cluster_keys.dup
99
+
100
+ cluster_keys.combination(2).each do |a, b|
101
+ unless (a & b).empty?
102
+ id_map.delete(a)
103
+ id_map.delete(b)
104
+ id_map << (a | b)
105
+ end
106
+ end
107
+
108
+ if id_map.size == cluster_keys.size
109
+ cluster_keys
110
+ else
111
+ vertex_id_map(id_map)
112
+ end
113
+ end
114
+
115
+ def sum_sigma_in(communities)
116
+ communities.inject(0) { |size, community|
117
+ size + community.sigma_in
118
+ }
119
+ end
120
+
121
+ def make_communities(graph_start_root_vertices, graph_end_root_vertices)
122
+ hashed_cluster = {}
123
+ used_vertices = {}
124
+
125
+ pair = [graph_start_root_vertices, graph_end_root_vertices].transpose
126
+
127
+ pair.each do |start_vertex, end_vertex|
128
+ cluster, used_vertices = find_cluster_with_used_vertices(start_vertex, end_vertex, used_vertices)
129
+
130
+ if cluster
131
+ if hashed_cluster[end_vertex.id]
132
+ hashed_cluster[end_vertex.id] = (hashed_cluster[end_vertex.id] | cluster)
133
+ else
134
+ hashed_cluster[end_vertex.id] = cluster
135
+ end
136
+ end
137
+ end
138
+
139
+ communities = hashed_cluster.map do |end_vertex_id, vertices|
140
+ Rgraphum::Community.new(vertices: vertices)
141
+ end
142
+
143
+ Rgraphum::Communities(communities)
144
+ end
145
+
146
+ # NOTE cluster を探しているっぽい
147
+ def find_cluster(start_vertex, end_vertex)
148
+ cluster, used_vertices = find_cluster_with_used_vertices(start_vertex, end_vertex, {})
149
+ cluster
150
+ end
151
+
152
+ def find_cluster_with_used_vertices(start_vertex, end_vertex, used_vertices)
153
+ # FIXME rename cluster
154
+ if used_vertex = used_vertices[start_vertex]
155
+ if used_vertex == end_vertex
156
+ return [[], used_vertices]
157
+ else
158
+ return [nil, used_vertices]
159
+ end
160
+ end
161
+
162
+ if start_vertex == end_vertex
163
+ used_vertices[start_vertex] = end_vertex
164
+ return [[start_vertex], used_vertices]
165
+ else
166
+ if start_vertex.out.empty?
167
+ return nil, used_vertices
168
+ end
169
+ end
170
+
171
+ cluster = nil
172
+ start_vertex.out.each do |vertex|
173
+ deep_cluster, used_vertices = find_cluster_with_used_vertices(vertex, end_vertex, used_vertices)
174
+
175
+ if deep_cluster
176
+ cluster ||= []
177
+ cluster += deep_cluster
178
+ end
179
+ end
180
+
181
+ if cluster
182
+ cluster << start_vertex
183
+ used_vertices[start_vertex] = end_vertex
184
+ end
185
+
186
+ [cluster, used_vertices]
187
+ end
188
+
189
+ # NOTE 孤立した cluster を探してるかも?
190
+ def build_cluster(start_vertex, cluster=nil)
191
+ cluster ||= Rgraphum::Cluster.new
192
+ start_vertex.out.each do |vertex|
193
+ next if cluster.have_vertex_in_path?(vertex, start_vertex)
194
+ if vertex.out.empty?
195
+ if cluster.have_end_vertex?(vertex)
196
+ path = cluster.find_path(vertex.id)
197
+ cluster.append_vertex path, start_vertex
198
+ else
199
+ cluster.add_path Rgraphum::Path.new(vertex, [vertex, start_vertex])
200
+ end
201
+ else
202
+ found = cluster.have_vertex?(vertex) && cluster.have_vertex?(start_vertex)
203
+ next if found
204
+
205
+ cluster = build_cluster(vertex, cluster)
206
+ cluster.each_path do |path|
207
+ if path.include?(vertex) and !path.include?(start_vertex)
208
+ cluster.append_vertex path, start_vertex
209
+ end
210
+ end
211
+ end
212
+ end
213
+ cluster
214
+ end
215
+
216
+ def start_root_vertices(target_graph=@graph)
217
+ target_graph.vertices.find_all{ |vertex| vertex.in.empty? and !vertex.out.empty? }
218
+ end
219
+
220
+ def end_root_vertices(target_graph=@graph)
221
+ target_graph.vertices.find_all{ |vertex| !vertex.in.empty? and vertex.out.empty? }
222
+ end
223
+
224
+ def find_path(target_vertex, vertices=Rgraphum::Vertices.new)
225
+ return vertices if vertices.include?(target_vertex)
226
+ return vertices << target_vertex if target_vertex.out.empty?
227
+ path_vertices = target_vertex.out.inject(vertices) do |vertices, vertex|
228
+ size = vertices.size
229
+ vertices = find_path(vertex, vertices)
230
+ if vertices.size == size
231
+ edge_to_delete = target_vertex.edges.where(target: vertex).first
232
+ target_vertex.edges.delete(edge_to_delete)
233
+ end
234
+ vertices
235
+ end
236
+ path_vertices << target_vertex
237
+ end
238
+
239
+ def make_path_graph(graph=@graph)
240
+ p "in make path graph" if Rgraphum.verbose?
241
+ graph = graph.dup
242
+
243
+ p "find srn" if Rgraphum.verbose?
244
+ graph_start_root_vertices = start_root_vertices(graph)
245
+
246
+ p "find path and to_graph" if Rgraphum.verbose?
247
+ graphes = graph_start_root_vertices.map { |vertex| Rgraphum::Vertices.new(find_path(vertex)).to_graph }
248
+ end
249
+
250
+ def cut_edges_with_srn(graph=@graph)
251
+ new_graph = Rgraphum::Graph.new
252
+
253
+ graphes = make_path_graph(graph)
254
+
255
+ new_graph.vertices = graphes.map { |g| g.vertices }.flatten
256
+ new_graph.edges = graphes.map { |g| g.edges }.flatten
257
+
258
+ new_graph.compact_with(:id)
259
+ end
260
+
261
+ def count_same_words_vertices(graph=@graph)
262
+ graph.vertices.combination(2).each do |vertex_a, vertex_b|
263
+ vertex_a.count = vertex_a.count.to_i + 1 if vertex_a.words == vertex_b.words
264
+ end
265
+ end
266
+
267
+ def make_edges(graph=@graph)
268
+ graph.vertices.sort! { |a, b| a.start.to_f <=> b.start.to_f }
269
+
270
+ graph.vertices.combination(2).each_with_index do |pair, i|
271
+ if pair[1].start and pair[0].end
272
+ next unless pair[0].within_term(pair[1])
273
+ end
274
+
275
+ distance = edit_distance(pair[0].words, pair[1].words)
276
+ next unless distance
277
+
278
+ graph.edges << { source: pair[0], target: pair[1], weight: (1.0 / (distance + 1)) }
279
+ end
280
+
281
+ graph.edges
282
+ end
283
+
284
+ def make_graph(phrase_array)
285
+ @graph = Rgraphum::Graph.new
286
+ @graph.vertices = phrase_array
287
+
288
+ self.count_same_words_vertices(@graph)
289
+
290
+ @graph.compact_with(:words, @graph)
291
+
292
+ self.make_edges(@graph)
293
+
294
+ @graph
295
+ end
296
+ end
@@ -0,0 +1,45 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'time'
4
+
5
+ class Rgraphum::Analyzer::RTAtMark
6
+
7
+ class Rgraphum::Vertex
8
+ field :twits
9
+ end
10
+
11
+ def make_graph(twits)
12
+ @graph = Rgraphum::Graph.new
13
+
14
+ make_vertices(twits)
15
+ make_edges(twits)
16
+
17
+ @graph
18
+ end
19
+
20
+ def make_vertices(twits,graph=@graph)
21
+ graph.vertices = twits.map{ |twit| { label:twit[7] } }.uniq!
22
+ twits.each do |twit|
23
+ vertex = graph.vertices.where(label: twit[7]).first
24
+ vertex.twits ||= []
25
+ vertex.twits << twit.compact
26
+ end
27
+ end
28
+
29
+ def make_edges(twits,graph=@graph)
30
+ twits.each_with_index do |twit|
31
+ next unless atmark_screen_name = pickup_screen_name(twit[8])
32
+ source_vertex = graph.vertices.where(label: atmark_screen_name).first
33
+ source_vertex = graph.vertices.build(label: atmark_screen_name) unless source_vertex
34
+ target_vertex = graph.vertices.where(label: twit[7]).first
35
+ graph.edges << {source:source_vertex,target:target_vertex,label:twit[8],start:Time.parse(twit[11])}
36
+ end
37
+ end
38
+
39
+ def pickup_screen_name(text)
40
+ return nil unless screen_name = text.match(/(^|[^@0-9_a-zA-Z])@[0-9_a-zA-Z]+($|[^@0-9_a-zA-Z])/)
41
+ return nil unless screen_name = screen_name[0].gsub(/[^0-9_a-zA-Z]/,"").downcase
42
+ screen_name
43
+ end
44
+
45
+ end
@@ -0,0 +1,8 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Rgraphum::Analyzer
4
+ end
5
+
6
+ require_relative 'analyzer/linear_regression'
7
+ require_relative 'analyzer/meme_tracker'
8
+ require_relative 'analyzer/twitter/rt_at_mark'
@@ -0,0 +1,67 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class Rgraphum::Cluster
4
+ include Enumerable
5
+
6
+ def initialize(*paths)
7
+ if paths.empty?
8
+ @paths = {}
9
+ else
10
+ @paths = Hash[paths.map { |path| [path.end_vertex.id, path] }]
11
+ end
12
+ end
13
+
14
+ def paths
15
+ @paths.values
16
+ end
17
+
18
+ def each_path
19
+ if block_given?
20
+ @paths.each do |id, path|
21
+ yield path
22
+ end
23
+ else
24
+ to_enum
25
+ end
26
+ end
27
+
28
+ def add_path(path)
29
+ @paths[path.end_vertex.id] = path
30
+ end
31
+
32
+ def find_path(end_vertex_id)
33
+ @paths[end_vertex_id]
34
+ end
35
+
36
+ def append_vertex(path, vertex)
37
+ @paths[path.end_vertex.id].vertices << vertex
38
+ end
39
+
40
+ def empty?
41
+ @paths.empty?
42
+ end
43
+
44
+ def have_vertex_in_path?(end_vertex, vertex)
45
+ path = find_path(end_vertex.id)
46
+ return unless path
47
+ path.vertices.include?(vertex)
48
+ end
49
+
50
+ def have_vertex?(vertex)
51
+ @paths.any? do |id, path|
52
+ path.vertices.include?(vertex)
53
+ end
54
+ end
55
+
56
+ def have_end_vertex?(end_vertex)
57
+ @paths.any? { |id, path| path.end_vertex.id == (end_vertex.id rescue end_vertex) }
58
+ end
59
+
60
+ def to_hash
61
+ hash = {}
62
+ @paths.each do |id, path|
63
+ hash[path.end_vertex] = path.vertices
64
+ end
65
+ hash
66
+ end
67
+ end
@@ -0,0 +1,65 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ def Rgraphum::Communities(array)
4
+ if array.instance_of?(Rgraphum::Communities)
5
+ array
6
+ else
7
+ Rgraphum::Communities.new(array)
8
+ end
9
+ end
10
+
11
+ class Rgraphum::Communities < Rgraphum::RgraphumArray
12
+
13
+ # Non-Gremlin methods
14
+
15
+ # FIXME
16
+ # def dup
17
+ # end
18
+
19
+ # add community in communities
20
+ # @param [Hash] community_hash one community, it is hash.
21
+ # @return [Community] added community.
22
+ def build(community_hash={})
23
+ community = new_community(community_hash)
24
+ original_push_1 community
25
+ community
26
+ end
27
+
28
+ alias :original_push_1 :<<
29
+ def <<(community_hash)
30
+ build(community_hash)
31
+ self
32
+ end
33
+
34
+ alias :original_push_m :push
35
+ def push(*community_hashs)
36
+ community_hashs.each do |community_hash|
37
+ self << community_hash
38
+ end
39
+ self
40
+ end
41
+
42
+ # Called from delete_if, reject! and reject
43
+ def delete(community_or_id)
44
+ if community_or_id.is_a?(Rgraphum::Community)
45
+ target_community = community_or_id
46
+ else
47
+ target_community = where(id: community_or_id).first
48
+ end
49
+ super(target_community)
50
+ end
51
+
52
+ protected :original_push_1
53
+ protected :original_push_m
54
+
55
+ private
56
+
57
+ def new_community(community_hash={})
58
+ if community_hash.is_a?(Hash)
59
+ community_hash = community_hash.dup
60
+ community_hash[:graph] = @graph
61
+ community_hash[:id] ||= new_id
62
+ end
63
+ Rgraphum::Community(community_hash)
64
+ end
65
+ end
@@ -0,0 +1,86 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ def Rgraphum::Community(hash_or_community)
4
+ if hash_or_community.instance_of?(Rgraphum::Community)
5
+ hash_or_community
6
+ else
7
+ Rgraphum::Community.new(hash_or_community)
8
+ end
9
+ end
10
+
11
+ class Rgraphum::Community
12
+ attr_reader :graph
13
+ attr_reader :id
14
+ attr_reader :vertices
15
+
16
+ def initialize(options={})
17
+ @id = options[:id]
18
+ @graph = options[:graph]
19
+ @vertices = []
20
+
21
+ if options[:vertices]
22
+ options[:vertices].each do |vertex|
23
+ add_vertex vertex
24
+ end
25
+ end
26
+ end
27
+
28
+ def add_vertex(vertex)
29
+ @vertices << vertex
30
+ end
31
+
32
+ def inter_edges
33
+ return @inter_edges if @inter_edges
34
+ @inter_edges = []
35
+ @vertices.combination(2) do |vertex_a, vertex_b|
36
+ @inter_edges += (vertex_a.edges & vertex_b.edges)
37
+ end
38
+ @inter_edges
39
+ end
40
+
41
+ def outer_edges
42
+ @outer_edges ||= edges - inter_edges
43
+ end
44
+
45
+ def edges
46
+ @edges ||= Rgraphum::Edges.new(@vertices.map(&:edges).flatten.uniq)
47
+ end
48
+
49
+ def edges_from(community)
50
+ edges & community.edges || []
51
+ end
52
+
53
+ def degree_weight
54
+ @vertices.inject(0) { |sum, vertex| sum + vertex.degree_weight }
55
+ end
56
+
57
+ def sigma_in
58
+ @sigma_in ||= inter_edges.inject(0) { |sum, edge|
59
+ sum + edge.weight
60
+ }
61
+ end
62
+
63
+ def update
64
+ @inter_edges = nil
65
+ @outer_edges = nil
66
+ @edges = nil
67
+ @sigma_tot = nil
68
+ @sigma_in = nil
69
+ end
70
+
71
+ def neighborhood?
72
+ raise NotImplementedError
73
+ end
74
+
75
+ def merge(other_community)
76
+ other_community.vertices.each do |vertex|
77
+ vertex.community_id = self.id
78
+ @vertices << vertex
79
+ end
80
+ self.update
81
+ end
82
+
83
+ def to_graph
84
+ Rgraphum::Graph.new(vertices: vertices, edges: inter_edges)
85
+ end
86
+ end
@@ -0,0 +1,40 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # CosineSimilarity
4
+ # calc vector distance with cosine similarity
5
+ # ex. it make equilateral triangle
6
+ # [ [1,1,0],[1,0,1],[1,0,1] ]
7
+ # it's angle is 60. cosine 60 = 0.5,
8
+ # thus outputs is
9
+ # [[1.0, 0.5, 0.5], [0.5, 1.0, 0.5], [0.5, 0.5, 1.0]]
10
+ #
11
+ class CosineSimilarityMatrix
12
+ def similarity(matrix)
13
+ sim_matrix = []
14
+
15
+ # calc cosine similarity
16
+ # @params [Array] matrix array of array
17
+ # @return [Array] array of array cosine similarity matrix
18
+ matrix.each_with_index do |row_fix,j|
19
+ sim_array = []
20
+ a2_sum = row_fix.inject(0.0){|sum,a| sum + a**2}
21
+ matrix.each_with_index do |row_move,i|
22
+ next sim_array << sim_matrix[i][j] if j > i
23
+ next sim_array << 1.0 if i == j
24
+
25
+ b2_sum = 0.0
26
+ ab_sum = 0.0
27
+
28
+ [row_fix,row_move].transpose.each do |a,b|
29
+ b2_sum += b**2
30
+ ab_sum += a*b
31
+ end
32
+ sim_array << ab_sum / ( Math.sqrt(a2_sum * b2_sum) )
33
+ end
34
+ sim_matrix << sim_array
35
+ end
36
+ sim_matrix
37
+ end
38
+
39
+ end
40
+