rgraphum 0.0.1.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. data/.gitignore +26 -0
  2. data/GLOSSARIES.md +108 -0
  3. data/GREMLIN.md +1398 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +136 -0
  7. data/Rakefile +16 -0
  8. data/bin/.irbrc +41 -0
  9. data/bin/rgraphum_console +61 -0
  10. data/bin/rgraphum_runner +57 -0
  11. data/examples/ba_model/make.rb +19 -0
  12. data/examples/ba_model/make_dummy_twitter_rt_data.rb +0 -0
  13. data/examples/basic/check_modularity.rb +27 -0
  14. data/examples/basic/make_graph.rb +12 -0
  15. data/examples/parser/dot.rb +28 -0
  16. data/examples/sis_model/lifegame.rb +161 -0
  17. data/graph_struct.jpg +0 -0
  18. data/lib/rgraphum/analyzer/linear_regression.rb +31 -0
  19. data/lib/rgraphum/analyzer/meme_tracker.rb +296 -0
  20. data/lib/rgraphum/analyzer/twitter/rt_at_mark.rb +45 -0
  21. data/lib/rgraphum/analyzer.rb +8 -0
  22. data/lib/rgraphum/cluster.rb +67 -0
  23. data/lib/rgraphum/communities.rb +65 -0
  24. data/lib/rgraphum/community.rb +86 -0
  25. data/lib/rgraphum/cosine_similarity_matrix.rb +40 -0
  26. data/lib/rgraphum/edge.rb +194 -0
  27. data/lib/rgraphum/edges.rb +161 -0
  28. data/lib/rgraphum/ext/cosine_similarity_matrix.rb +79 -0
  29. data/lib/rgraphum/ext/linear_regression.rb +22 -0
  30. data/lib/rgraphum/ext/tf_idf.rb +52 -0
  31. data/lib/rgraphum/graph/gremlin.rb +193 -0
  32. data/lib/rgraphum/graph/math/clustering_coefficient.rb +53 -0
  33. data/lib/rgraphum/graph/math/community_detection.rb +141 -0
  34. data/lib/rgraphum/graph/math/degree_distribution.rb +50 -0
  35. data/lib/rgraphum/graph/math/dijkstra.rb +331 -0
  36. data/lib/rgraphum/graph/math.rb +45 -0
  37. data/lib/rgraphum/graph.rb +267 -0
  38. data/lib/rgraphum/importer.rb +97 -0
  39. data/lib/rgraphum/marshal.rb +26 -0
  40. data/lib/rgraphum/motifs.rb +8 -0
  41. data/lib/rgraphum/parsers/flare.rb +42 -0
  42. data/lib/rgraphum/parsers/gephi.rb +193 -0
  43. data/lib/rgraphum/parsers/graphviz.rb +78 -0
  44. data/lib/rgraphum/parsers/miserables.rb +54 -0
  45. data/lib/rgraphum/parsers.rb +32 -0
  46. data/lib/rgraphum/path.rb +37 -0
  47. data/lib/rgraphum/query.rb +130 -0
  48. data/lib/rgraphum/rgraphum_array.rb +159 -0
  49. data/lib/rgraphum/rgraphum_array_dividers.rb +43 -0
  50. data/lib/rgraphum/rgraphum_random.rb +5 -0
  51. data/lib/rgraphum/simulator/ba_model.rb +140 -0
  52. data/lib/rgraphum/simulator/sir_model.rb +178 -0
  53. data/lib/rgraphum/simulator/sis_model.rb +158 -0
  54. data/lib/rgraphum/simulator.rb +29 -0
  55. data/lib/rgraphum/statistic/power_law.rb +9 -0
  56. data/lib/rgraphum/t.rb +12 -0
  57. data/lib/rgraphum/tf_idf.rb +27 -0
  58. data/lib/rgraphum/version.rb +3 -0
  59. data/lib/rgraphum/vertex.rb +354 -0
  60. data/lib/rgraphum/vertices.rb +97 -0
  61. data/lib/rgraphum.rb +38 -0
  62. data/performance/add-vertices-edges.rb +20 -0
  63. data/performance/add-vertices.rb +12 -0
  64. data/performance/build-graph.rb +19 -0
  65. data/performance/delete-graph.rb +24 -0
  66. data/performance/delete-vertices.rb +25 -0
  67. data/performance/refer-graph.rb +23 -0
  68. data/rgraphum.gemspec +30 -0
  69. data/test/lib/rgraphum/analyzer/linear_regression_test.rb +20 -0
  70. data/test/lib/rgraphum/analyzer/meme_tracker_test.rb +383 -0
  71. data/test/lib/rgraphum/analyzer/twitter/rt_at_mark_test.rb +120 -0
  72. data/test/lib/rgraphum/array_test.rb +95 -0
  73. data/test/lib/rgraphum/bubble_test.rb +7 -0
  74. data/test/lib/rgraphum/communities_test.rb +53 -0
  75. data/test/lib/rgraphum/cosine_similarity_test.rb +18 -0
  76. data/test/lib/rgraphum/edge_test.rb +89 -0
  77. data/test/lib/rgraphum/edges_test.rb +178 -0
  78. data/test/lib/rgraphum/graph_builder_test.rb +64 -0
  79. data/test/lib/rgraphum/graph_dup_test.rb +199 -0
  80. data/test/lib/rgraphum/graph_plus_test.rb +80 -0
  81. data/test/lib/rgraphum/graph_test.rb +512 -0
  82. data/test/lib/rgraphum/gremlin_test.rb +145 -0
  83. data/test/lib/rgraphum/importers/idg_json_edges.json +20 -0
  84. data/test/lib/rgraphum/importers/idg_json_test.rb +207 -0
  85. data/test/lib/rgraphum/importers/idg_json_vertices.json +46 -0
  86. data/test/lib/rgraphum/math/average_distance_matrix_test.rb +142 -0
  87. data/test/lib/rgraphum/math/clustering_coefficient_test.rb +219 -0
  88. data/test/lib/rgraphum/math/community_test.rb +78 -0
  89. data/test/lib/rgraphum/math/degree_distribution_test.rb +40 -0
  90. data/test/lib/rgraphum/math/dijkstra_test.rb +146 -0
  91. data/test/lib/rgraphum/math/modularity_test.rb +154 -0
  92. data/test/lib/rgraphum/math/quick_average_distance_matrix_test.rb +84 -0
  93. data/test/lib/rgraphum/path_test.rb +44 -0
  94. data/test/lib/rgraphum/query/enumerable_test.rb +42 -0
  95. data/test/lib/rgraphum/query/where_operators_test.rb +75 -0
  96. data/test/lib/rgraphum/query/where_test.rb +59 -0
  97. data/test/lib/rgraphum/simulator/ba_model_test.rb +75 -0
  98. data/test/lib/rgraphum/simulator/sir_model_test.rb +513 -0
  99. data/test/lib/rgraphum/simulator/sis_model_test.rb +478 -0
  100. data/test/lib/rgraphum/simulator_test.rb +22 -0
  101. data/test/lib/rgraphum/tf_idf_test.rb +30 -0
  102. data/test/lib/rgraphum/vertex_test.rb +50 -0
  103. data/test/lib/rgraphum/vertices_test.rb +180 -0
  104. data/test/test_helper.rb +98 -0
  105. data/tmp/.gitkeep +0 -0
  106. metadata +254 -0
@@ -0,0 +1,296 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class Rgraphum::Analyzer::MemeTracker
4
+ attr_accessor :distance_max_limit
5
+ attr_accessor :graph
6
+ attr_accessor :clusters
7
+
8
+ def initialize(graph=Rgraphum::Graph.new)
9
+ @distance_max_limit = 5
10
+ self.graph = graph
11
+ end
12
+
13
+ def edit_distance(words_a, words_b, limit=@distance_max_limit)
14
+ a = words_a.dup
15
+ b = words_b.dup
16
+
17
+ return nil if (a - b | b - a).size > (limit * 2)
18
+ d = find_shift_distance(a, b)
19
+ end
20
+
21
+ def find_shift_distance(words_a, words_b, depth=0)
22
+ return nil if depth > @distance_max_limit
23
+
24
+ return words_b.size if words_a.empty?
25
+ return words_a.size if words_b.empty?
26
+
27
+ shifted_words_a = words_a[1..-1]
28
+ shifted_words_b = words_b[1..-1]
29
+
30
+ if words_a[0] == words_b[0]
31
+ return find_shift_distance(shifted_words_a, shifted_words_b, depth)
32
+ else
33
+ depth += 1
34
+ distance = 1
35
+ distance_a = find_shift_distance(words_a, shifted_words_b, depth)
36
+ distance_b = find_shift_distance(shifted_words_a, words_b, depth)
37
+ distance_c = find_shift_distance(shifted_words_a, shifted_words_b, depth)
38
+ if delta_distance = [distance_a, distance_b, distance_c].compact.min
39
+ return distance += delta_distance
40
+ else
41
+ return nil
42
+ end
43
+ end
44
+ end
45
+
46
+ ###############################################################
47
+
48
+ def phrase_clusters
49
+ new_graph = @graph.dup
50
+ graph_start_root_vertices = start_root_vertices(new_graph)
51
+ graph_end_root_vertices = end_root_vertices(new_graph)
52
+
53
+ clusters, cluster_keys = [], []
54
+ graph_start_root_vertices.each do |graph_start_root_vertex|
55
+ cluster = build_cluster(graph_start_root_vertex)
56
+ clusters << cluster
57
+ cluster_keys << cluster.paths.map { |path| path.end_vertex }
58
+ end
59
+ cluster_keys = vertex_id_map(cluster_keys)
60
+
61
+ sets = {}
62
+ clusters.each_with_index do |end_path, i|
63
+ cluster_keys.each do |end_path_keys|
64
+ unless (end_path.paths.map { |path| path.end_vertex } & end_path_keys).empty?
65
+ sets[end_path_keys] ||= []
66
+ sets[end_path_keys] << graph_start_root_vertices[i]
67
+ break
68
+ end
69
+ end
70
+ end
71
+
72
+ sets = sets.map{ |end_path_keys, end_path_start_root_vertices|
73
+ [end_path_start_root_vertices, end_path_keys]
74
+ }
75
+
76
+ clusters = []
77
+ sets.each do |end_path_start_root_vertices, end_path_keys|
78
+ end_path_start_root_vertices_pt = end_path_start_root_vertices.permutation
79
+ end_path_keys_pt = end_path_keys.repeated_permutation(end_path_start_root_vertices.size)
80
+ communities_set = []
81
+ end_path_start_root_vertices_pt.each_with_index do |end_path_start_root_vertices_p, i|
82
+ end_path_keys_pt.each_with_index do |end_path_keys_p, j|
83
+ communities_set << make_communities(end_path_start_root_vertices_p, end_path_keys_p)
84
+ end
85
+ end
86
+
87
+ sigma_in_sizes = communities_set.map { |communities| sum_sigma_in(communities) }
88
+ max = sigma_in_sizes.max
89
+ index = sigma_in_sizes.index(max)
90
+
91
+ clusters += communities_set[index]
92
+ end
93
+ clusters
94
+ end
95
+
96
+ def vertex_id_map(cluster_keys)
97
+ return cluster_keys if cluster_keys.size < 2
98
+ id_map = cluster_keys.dup
99
+
100
+ cluster_keys.combination(2).each do |a, b|
101
+ unless (a & b).empty?
102
+ id_map.delete(a)
103
+ id_map.delete(b)
104
+ id_map << (a | b)
105
+ end
106
+ end
107
+
108
+ if id_map.size == cluster_keys.size
109
+ cluster_keys
110
+ else
111
+ vertex_id_map(id_map)
112
+ end
113
+ end
114
+
115
+ def sum_sigma_in(communities)
116
+ communities.inject(0) { |size, community|
117
+ size + community.sigma_in
118
+ }
119
+ end
120
+
121
+ def make_communities(graph_start_root_vertices, graph_end_root_vertices)
122
+ hashed_cluster = {}
123
+ used_vertices = {}
124
+
125
+ pair = [graph_start_root_vertices, graph_end_root_vertices].transpose
126
+
127
+ pair.each do |start_vertex, end_vertex|
128
+ cluster, used_vertices = find_cluster_with_used_vertices(start_vertex, end_vertex, used_vertices)
129
+
130
+ if cluster
131
+ if hashed_cluster[end_vertex.id]
132
+ hashed_cluster[end_vertex.id] = (hashed_cluster[end_vertex.id] | cluster)
133
+ else
134
+ hashed_cluster[end_vertex.id] = cluster
135
+ end
136
+ end
137
+ end
138
+
139
+ communities = hashed_cluster.map do |end_vertex_id, vertices|
140
+ Rgraphum::Community.new(vertices: vertices)
141
+ end
142
+
143
+ Rgraphum::Communities(communities)
144
+ end
145
+
146
+ # NOTE cluster を探しているっぽい
147
+ def find_cluster(start_vertex, end_vertex)
148
+ cluster, used_vertices = find_cluster_with_used_vertices(start_vertex, end_vertex, {})
149
+ cluster
150
+ end
151
+
152
+ def find_cluster_with_used_vertices(start_vertex, end_vertex, used_vertices)
153
+ # FIXME rename cluster
154
+ if used_vertex = used_vertices[start_vertex]
155
+ if used_vertex == end_vertex
156
+ return [[], used_vertices]
157
+ else
158
+ return [nil, used_vertices]
159
+ end
160
+ end
161
+
162
+ if start_vertex == end_vertex
163
+ used_vertices[start_vertex] = end_vertex
164
+ return [[start_vertex], used_vertices]
165
+ else
166
+ if start_vertex.out.empty?
167
+ return nil, used_vertices
168
+ end
169
+ end
170
+
171
+ cluster = nil
172
+ start_vertex.out.each do |vertex|
173
+ deep_cluster, used_vertices = find_cluster_with_used_vertices(vertex, end_vertex, used_vertices)
174
+
175
+ if deep_cluster
176
+ cluster ||= []
177
+ cluster += deep_cluster
178
+ end
179
+ end
180
+
181
+ if cluster
182
+ cluster << start_vertex
183
+ used_vertices[start_vertex] = end_vertex
184
+ end
185
+
186
+ [cluster, used_vertices]
187
+ end
188
+
189
+ # NOTE 孤立した cluster を探してるかも?
190
+ def build_cluster(start_vertex, cluster=nil)
191
+ cluster ||= Rgraphum::Cluster.new
192
+ start_vertex.out.each do |vertex|
193
+ next if cluster.have_vertex_in_path?(vertex, start_vertex)
194
+ if vertex.out.empty?
195
+ if cluster.have_end_vertex?(vertex)
196
+ path = cluster.find_path(vertex.id)
197
+ cluster.append_vertex path, start_vertex
198
+ else
199
+ cluster.add_path Rgraphum::Path.new(vertex, [vertex, start_vertex])
200
+ end
201
+ else
202
+ found = cluster.have_vertex?(vertex) && cluster.have_vertex?(start_vertex)
203
+ next if found
204
+
205
+ cluster = build_cluster(vertex, cluster)
206
+ cluster.each_path do |path|
207
+ if path.include?(vertex) and !path.include?(start_vertex)
208
+ cluster.append_vertex path, start_vertex
209
+ end
210
+ end
211
+ end
212
+ end
213
+ cluster
214
+ end
215
+
216
+ def start_root_vertices(target_graph=@graph)
217
+ target_graph.vertices.find_all{ |vertex| vertex.in.empty? and !vertex.out.empty? }
218
+ end
219
+
220
+ def end_root_vertices(target_graph=@graph)
221
+ target_graph.vertices.find_all{ |vertex| !vertex.in.empty? and vertex.out.empty? }
222
+ end
223
+
224
+ def find_path(target_vertex, vertices=Rgraphum::Vertices.new)
225
+ return vertices if vertices.include?(target_vertex)
226
+ return vertices << target_vertex if target_vertex.out.empty?
227
+ path_vertices = target_vertex.out.inject(vertices) do |vertices, vertex|
228
+ size = vertices.size
229
+ vertices = find_path(vertex, vertices)
230
+ if vertices.size == size
231
+ edge_to_delete = target_vertex.edges.where(target: vertex).first
232
+ target_vertex.edges.delete(edge_to_delete)
233
+ end
234
+ vertices
235
+ end
236
+ path_vertices << target_vertex
237
+ end
238
+
239
+ def make_path_graph(graph=@graph)
240
+ p "in make path graph" if Rgraphum.verbose?
241
+ graph = graph.dup
242
+
243
+ p "find srn" if Rgraphum.verbose?
244
+ graph_start_root_vertices = start_root_vertices(graph)
245
+
246
+ p "find path and to_graph" if Rgraphum.verbose?
247
+ graphes = graph_start_root_vertices.map { |vertex| Rgraphum::Vertices.new(find_path(vertex)).to_graph }
248
+ end
249
+
250
+ def cut_edges_with_srn(graph=@graph)
251
+ new_graph = Rgraphum::Graph.new
252
+
253
+ graphes = make_path_graph(graph)
254
+
255
+ new_graph.vertices = graphes.map { |g| g.vertices }.flatten
256
+ new_graph.edges = graphes.map { |g| g.edges }.flatten
257
+
258
+ new_graph.compact_with(:id)
259
+ end
260
+
261
+ def count_same_words_vertices(graph=@graph)
262
+ graph.vertices.combination(2).each do |vertex_a, vertex_b|
263
+ vertex_a.count = vertex_a.count.to_i + 1 if vertex_a.words == vertex_b.words
264
+ end
265
+ end
266
+
267
+ def make_edges(graph=@graph)
268
+ graph.vertices.sort! { |a, b| a.start.to_f <=> b.start.to_f }
269
+
270
+ graph.vertices.combination(2).each_with_index do |pair, i|
271
+ if pair[1].start and pair[0].end
272
+ next unless pair[0].within_term(pair[1])
273
+ end
274
+
275
+ distance = edit_distance(pair[0].words, pair[1].words)
276
+ next unless distance
277
+
278
+ graph.edges << { source: pair[0], target: pair[1], weight: (1.0 / (distance + 1)) }
279
+ end
280
+
281
+ graph.edges
282
+ end
283
+
284
+ def make_graph(phrase_array)
285
+ @graph = Rgraphum::Graph.new
286
+ @graph.vertices = phrase_array
287
+
288
+ self.count_same_words_vertices(@graph)
289
+
290
+ @graph.compact_with(:words, @graph)
291
+
292
+ self.make_edges(@graph)
293
+
294
+ @graph
295
+ end
296
+ end
@@ -0,0 +1,45 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'time'
4
+
5
+ class Rgraphum::Analyzer::RTAtMark
6
+
7
+ class Rgraphum::Vertex
8
+ field :twits
9
+ end
10
+
11
+ def make_graph(twits)
12
+ @graph = Rgraphum::Graph.new
13
+
14
+ make_vertices(twits)
15
+ make_edges(twits)
16
+
17
+ @graph
18
+ end
19
+
20
+ def make_vertices(twits,graph=@graph)
21
+ graph.vertices = twits.map{ |twit| { label:twit[7] } }.uniq!
22
+ twits.each do |twit|
23
+ vertex = graph.vertices.where(label: twit[7]).first
24
+ vertex.twits ||= []
25
+ vertex.twits << twit.compact
26
+ end
27
+ end
28
+
29
+ def make_edges(twits,graph=@graph)
30
+ twits.each_with_index do |twit|
31
+ next unless atmark_screen_name = pickup_screen_name(twit[8])
32
+ source_vertex = graph.vertices.where(label: atmark_screen_name).first
33
+ source_vertex = graph.vertices.build(label: atmark_screen_name) unless source_vertex
34
+ target_vertex = graph.vertices.where(label: twit[7]).first
35
+ graph.edges << {source:source_vertex,target:target_vertex,label:twit[8],start:Time.parse(twit[11])}
36
+ end
37
+ end
38
+
39
+ def pickup_screen_name(text)
40
+ return nil unless screen_name = text.match(/(^|[^@0-9_a-zA-Z])@[0-9_a-zA-Z]+($|[^@0-9_a-zA-Z])/)
41
+ return nil unless screen_name = screen_name[0].gsub(/[^0-9_a-zA-Z]/,"").downcase
42
+ screen_name
43
+ end
44
+
45
+ end
@@ -0,0 +1,8 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module Rgraphum::Analyzer
4
+ end
5
+
6
+ require_relative 'analyzer/linear_regression'
7
+ require_relative 'analyzer/meme_tracker'
8
+ require_relative 'analyzer/twitter/rt_at_mark'
@@ -0,0 +1,67 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ class Rgraphum::Cluster
4
+ include Enumerable
5
+
6
+ def initialize(*paths)
7
+ if paths.empty?
8
+ @paths = {}
9
+ else
10
+ @paths = Hash[paths.map { |path| [path.end_vertex.id, path] }]
11
+ end
12
+ end
13
+
14
+ def paths
15
+ @paths.values
16
+ end
17
+
18
+ def each_path
19
+ if block_given?
20
+ @paths.each do |id, path|
21
+ yield path
22
+ end
23
+ else
24
+ to_enum
25
+ end
26
+ end
27
+
28
+ def add_path(path)
29
+ @paths[path.end_vertex.id] = path
30
+ end
31
+
32
+ def find_path(end_vertex_id)
33
+ @paths[end_vertex_id]
34
+ end
35
+
36
+ def append_vertex(path, vertex)
37
+ @paths[path.end_vertex.id].vertices << vertex
38
+ end
39
+
40
+ def empty?
41
+ @paths.empty?
42
+ end
43
+
44
+ def have_vertex_in_path?(end_vertex, vertex)
45
+ path = find_path(end_vertex.id)
46
+ return unless path
47
+ path.vertices.include?(vertex)
48
+ end
49
+
50
+ def have_vertex?(vertex)
51
+ @paths.any? do |id, path|
52
+ path.vertices.include?(vertex)
53
+ end
54
+ end
55
+
56
+ def have_end_vertex?(end_vertex)
57
+ @paths.any? { |id, path| path.end_vertex.id == (end_vertex.id rescue end_vertex) }
58
+ end
59
+
60
+ def to_hash
61
+ hash = {}
62
+ @paths.each do |id, path|
63
+ hash[path.end_vertex] = path.vertices
64
+ end
65
+ hash
66
+ end
67
+ end
@@ -0,0 +1,65 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ def Rgraphum::Communities(array)
4
+ if array.instance_of?(Rgraphum::Communities)
5
+ array
6
+ else
7
+ Rgraphum::Communities.new(array)
8
+ end
9
+ end
10
+
11
+ class Rgraphum::Communities < Rgraphum::RgraphumArray
12
+
13
+ # Non-Gremlin methods
14
+
15
+ # FIXME
16
+ # def dup
17
+ # end
18
+
19
+ # add community in communities
20
+ # @param [Hash] community_hash one community, it is hash.
21
+ # @return [Community] added community.
22
+ def build(community_hash={})
23
+ community = new_community(community_hash)
24
+ original_push_1 community
25
+ community
26
+ end
27
+
28
+ alias :original_push_1 :<<
29
+ def <<(community_hash)
30
+ build(community_hash)
31
+ self
32
+ end
33
+
34
+ alias :original_push_m :push
35
+ def push(*community_hashs)
36
+ community_hashs.each do |community_hash|
37
+ self << community_hash
38
+ end
39
+ self
40
+ end
41
+
42
+ # Called from delete_if, reject! and reject
43
+ def delete(community_or_id)
44
+ if community_or_id.is_a?(Rgraphum::Community)
45
+ target_community = community_or_id
46
+ else
47
+ target_community = where(id: community_or_id).first
48
+ end
49
+ super(target_community)
50
+ end
51
+
52
+ protected :original_push_1
53
+ protected :original_push_m
54
+
55
+ private
56
+
57
+ def new_community(community_hash={})
58
+ if community_hash.is_a?(Hash)
59
+ community_hash = community_hash.dup
60
+ community_hash[:graph] = @graph
61
+ community_hash[:id] ||= new_id
62
+ end
63
+ Rgraphum::Community(community_hash)
64
+ end
65
+ end
@@ -0,0 +1,86 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ def Rgraphum::Community(hash_or_community)
4
+ if hash_or_community.instance_of?(Rgraphum::Community)
5
+ hash_or_community
6
+ else
7
+ Rgraphum::Community.new(hash_or_community)
8
+ end
9
+ end
10
+
11
+ class Rgraphum::Community
12
+ attr_reader :graph
13
+ attr_reader :id
14
+ attr_reader :vertices
15
+
16
+ def initialize(options={})
17
+ @id = options[:id]
18
+ @graph = options[:graph]
19
+ @vertices = []
20
+
21
+ if options[:vertices]
22
+ options[:vertices].each do |vertex|
23
+ add_vertex vertex
24
+ end
25
+ end
26
+ end
27
+
28
+ def add_vertex(vertex)
29
+ @vertices << vertex
30
+ end
31
+
32
+ def inter_edges
33
+ return @inter_edges if @inter_edges
34
+ @inter_edges = []
35
+ @vertices.combination(2) do |vertex_a, vertex_b|
36
+ @inter_edges += (vertex_a.edges & vertex_b.edges)
37
+ end
38
+ @inter_edges
39
+ end
40
+
41
+ def outer_edges
42
+ @outer_edges ||= edges - inter_edges
43
+ end
44
+
45
+ def edges
46
+ @edges ||= Rgraphum::Edges.new(@vertices.map(&:edges).flatten.uniq)
47
+ end
48
+
49
+ def edges_from(community)
50
+ edges & community.edges || []
51
+ end
52
+
53
+ def degree_weight
54
+ @vertices.inject(0) { |sum, vertex| sum + vertex.degree_weight }
55
+ end
56
+
57
+ def sigma_in
58
+ @sigma_in ||= inter_edges.inject(0) { |sum, edge|
59
+ sum + edge.weight
60
+ }
61
+ end
62
+
63
+ def update
64
+ @inter_edges = nil
65
+ @outer_edges = nil
66
+ @edges = nil
67
+ @sigma_tot = nil
68
+ @sigma_in = nil
69
+ end
70
+
71
+ def neighborhood?
72
+ raise NotImplementedError
73
+ end
74
+
75
+ def merge(other_community)
76
+ other_community.vertices.each do |vertex|
77
+ vertex.community_id = self.id
78
+ @vertices << vertex
79
+ end
80
+ self.update
81
+ end
82
+
83
+ def to_graph
84
+ Rgraphum::Graph.new(vertices: vertices, edges: inter_edges)
85
+ end
86
+ end
@@ -0,0 +1,40 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # CosineSimilarity
4
+ # calc vector distance with cosine similarity
5
+ # ex. it make equilateral triangle
6
+ # [ [1,1,0],[1,0,1],[1,0,1] ]
7
+ # it's angle is 60. cosine 60 = 0.5,
8
+ # thus outputs is
9
+ # [[1.0, 0.5, 0.5], [0.5, 1.0, 0.5], [0.5, 0.5, 1.0]]
10
+ #
11
+ class CosineSimilarityMatrix
12
+ def similarity(matrix)
13
+ sim_matrix = []
14
+
15
+ # calc cosine similarity
16
+ # @params [Array] matrix array of array
17
+ # @return [Array] array of array cosine similarity matrix
18
+ matrix.each_with_index do |row_fix,j|
19
+ sim_array = []
20
+ a2_sum = row_fix.inject(0.0){|sum,a| sum + a**2}
21
+ matrix.each_with_index do |row_move,i|
22
+ next sim_array << sim_matrix[i][j] if j > i
23
+ next sim_array << 1.0 if i == j
24
+
25
+ b2_sum = 0.0
26
+ ab_sum = 0.0
27
+
28
+ [row_fix,row_move].transpose.each do |a,b|
29
+ b2_sum += b**2
30
+ ab_sum += a*b
31
+ end
32
+ sim_array << ab_sum / ( Math.sqrt(a2_sum * b2_sum) )
33
+ end
34
+ sim_matrix << sim_array
35
+ end
36
+ sim_matrix
37
+ end
38
+
39
+ end
40
+