rgraphum 0.0.1.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +26 -0
- data/GLOSSARIES.md +108 -0
- data/GREMLIN.md +1398 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +136 -0
- data/Rakefile +16 -0
- data/bin/.irbrc +41 -0
- data/bin/rgraphum_console +61 -0
- data/bin/rgraphum_runner +57 -0
- data/examples/ba_model/make.rb +19 -0
- data/examples/ba_model/make_dummy_twitter_rt_data.rb +0 -0
- data/examples/basic/check_modularity.rb +27 -0
- data/examples/basic/make_graph.rb +12 -0
- data/examples/parser/dot.rb +28 -0
- data/examples/sis_model/lifegame.rb +161 -0
- data/graph_struct.jpg +0 -0
- data/lib/rgraphum/analyzer/linear_regression.rb +31 -0
- data/lib/rgraphum/analyzer/meme_tracker.rb +296 -0
- data/lib/rgraphum/analyzer/twitter/rt_at_mark.rb +45 -0
- data/lib/rgraphum/analyzer.rb +8 -0
- data/lib/rgraphum/cluster.rb +67 -0
- data/lib/rgraphum/communities.rb +65 -0
- data/lib/rgraphum/community.rb +86 -0
- data/lib/rgraphum/cosine_similarity_matrix.rb +40 -0
- data/lib/rgraphum/edge.rb +194 -0
- data/lib/rgraphum/edges.rb +161 -0
- data/lib/rgraphum/ext/cosine_similarity_matrix.rb +79 -0
- data/lib/rgraphum/ext/linear_regression.rb +22 -0
- data/lib/rgraphum/ext/tf_idf.rb +52 -0
- data/lib/rgraphum/graph/gremlin.rb +193 -0
- data/lib/rgraphum/graph/math/clustering_coefficient.rb +53 -0
- data/lib/rgraphum/graph/math/community_detection.rb +141 -0
- data/lib/rgraphum/graph/math/degree_distribution.rb +50 -0
- data/lib/rgraphum/graph/math/dijkstra.rb +331 -0
- data/lib/rgraphum/graph/math.rb +45 -0
- data/lib/rgraphum/graph.rb +267 -0
- data/lib/rgraphum/importer.rb +97 -0
- data/lib/rgraphum/marshal.rb +26 -0
- data/lib/rgraphum/motifs.rb +8 -0
- data/lib/rgraphum/parsers/flare.rb +42 -0
- data/lib/rgraphum/parsers/gephi.rb +193 -0
- data/lib/rgraphum/parsers/graphviz.rb +78 -0
- data/lib/rgraphum/parsers/miserables.rb +54 -0
- data/lib/rgraphum/parsers.rb +32 -0
- data/lib/rgraphum/path.rb +37 -0
- data/lib/rgraphum/query.rb +130 -0
- data/lib/rgraphum/rgraphum_array.rb +159 -0
- data/lib/rgraphum/rgraphum_array_dividers.rb +43 -0
- data/lib/rgraphum/rgraphum_random.rb +5 -0
- data/lib/rgraphum/simulator/ba_model.rb +140 -0
- data/lib/rgraphum/simulator/sir_model.rb +178 -0
- data/lib/rgraphum/simulator/sis_model.rb +158 -0
- data/lib/rgraphum/simulator.rb +29 -0
- data/lib/rgraphum/statistic/power_law.rb +9 -0
- data/lib/rgraphum/t.rb +12 -0
- data/lib/rgraphum/tf_idf.rb +27 -0
- data/lib/rgraphum/version.rb +3 -0
- data/lib/rgraphum/vertex.rb +354 -0
- data/lib/rgraphum/vertices.rb +97 -0
- data/lib/rgraphum.rb +38 -0
- data/performance/add-vertices-edges.rb +20 -0
- data/performance/add-vertices.rb +12 -0
- data/performance/build-graph.rb +19 -0
- data/performance/delete-graph.rb +24 -0
- data/performance/delete-vertices.rb +25 -0
- data/performance/refer-graph.rb +23 -0
- data/rgraphum.gemspec +30 -0
- data/test/lib/rgraphum/analyzer/linear_regression_test.rb +20 -0
- data/test/lib/rgraphum/analyzer/meme_tracker_test.rb +383 -0
- data/test/lib/rgraphum/analyzer/twitter/rt_at_mark_test.rb +120 -0
- data/test/lib/rgraphum/array_test.rb +95 -0
- data/test/lib/rgraphum/bubble_test.rb +7 -0
- data/test/lib/rgraphum/communities_test.rb +53 -0
- data/test/lib/rgraphum/cosine_similarity_test.rb +18 -0
- data/test/lib/rgraphum/edge_test.rb +89 -0
- data/test/lib/rgraphum/edges_test.rb +178 -0
- data/test/lib/rgraphum/graph_builder_test.rb +64 -0
- data/test/lib/rgraphum/graph_dup_test.rb +199 -0
- data/test/lib/rgraphum/graph_plus_test.rb +80 -0
- data/test/lib/rgraphum/graph_test.rb +512 -0
- data/test/lib/rgraphum/gremlin_test.rb +145 -0
- data/test/lib/rgraphum/importers/idg_json_edges.json +20 -0
- data/test/lib/rgraphum/importers/idg_json_test.rb +207 -0
- data/test/lib/rgraphum/importers/idg_json_vertices.json +46 -0
- data/test/lib/rgraphum/math/average_distance_matrix_test.rb +142 -0
- data/test/lib/rgraphum/math/clustering_coefficient_test.rb +219 -0
- data/test/lib/rgraphum/math/community_test.rb +78 -0
- data/test/lib/rgraphum/math/degree_distribution_test.rb +40 -0
- data/test/lib/rgraphum/math/dijkstra_test.rb +146 -0
- data/test/lib/rgraphum/math/modularity_test.rb +154 -0
- data/test/lib/rgraphum/math/quick_average_distance_matrix_test.rb +84 -0
- data/test/lib/rgraphum/path_test.rb +44 -0
- data/test/lib/rgraphum/query/enumerable_test.rb +42 -0
- data/test/lib/rgraphum/query/where_operators_test.rb +75 -0
- data/test/lib/rgraphum/query/where_test.rb +59 -0
- data/test/lib/rgraphum/simulator/ba_model_test.rb +75 -0
- data/test/lib/rgraphum/simulator/sir_model_test.rb +513 -0
- data/test/lib/rgraphum/simulator/sis_model_test.rb +478 -0
- data/test/lib/rgraphum/simulator_test.rb +22 -0
- data/test/lib/rgraphum/tf_idf_test.rb +30 -0
- data/test/lib/rgraphum/vertex_test.rb +50 -0
- data/test/lib/rgraphum/vertices_test.rb +180 -0
- data/test/test_helper.rb +98 -0
- data/tmp/.gitkeep +0 -0
- metadata +254 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
class Rgraphum::Analyzer::MemeTracker
|
|
4
|
+
attr_accessor :distance_max_limit
|
|
5
|
+
attr_accessor :graph
|
|
6
|
+
attr_accessor :clusters
|
|
7
|
+
|
|
8
|
+
def initialize(graph=Rgraphum::Graph.new)
|
|
9
|
+
@distance_max_limit = 5
|
|
10
|
+
self.graph = graph
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def edit_distance(words_a, words_b, limit=@distance_max_limit)
|
|
14
|
+
a = words_a.dup
|
|
15
|
+
b = words_b.dup
|
|
16
|
+
|
|
17
|
+
return nil if (a - b | b - a).size > (limit * 2)
|
|
18
|
+
d = find_shift_distance(a, b)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def find_shift_distance(words_a, words_b, depth=0)
|
|
22
|
+
return nil if depth > @distance_max_limit
|
|
23
|
+
|
|
24
|
+
return words_b.size if words_a.empty?
|
|
25
|
+
return words_a.size if words_b.empty?
|
|
26
|
+
|
|
27
|
+
shifted_words_a = words_a[1..-1]
|
|
28
|
+
shifted_words_b = words_b[1..-1]
|
|
29
|
+
|
|
30
|
+
if words_a[0] == words_b[0]
|
|
31
|
+
return find_shift_distance(shifted_words_a, shifted_words_b, depth)
|
|
32
|
+
else
|
|
33
|
+
depth += 1
|
|
34
|
+
distance = 1
|
|
35
|
+
distance_a = find_shift_distance(words_a, shifted_words_b, depth)
|
|
36
|
+
distance_b = find_shift_distance(shifted_words_a, words_b, depth)
|
|
37
|
+
distance_c = find_shift_distance(shifted_words_a, shifted_words_b, depth)
|
|
38
|
+
if delta_distance = [distance_a, distance_b, distance_c].compact.min
|
|
39
|
+
return distance += delta_distance
|
|
40
|
+
else
|
|
41
|
+
return nil
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
###############################################################
|
|
47
|
+
|
|
48
|
+
def phrase_clusters
|
|
49
|
+
new_graph = @graph.dup
|
|
50
|
+
graph_start_root_vertices = start_root_vertices(new_graph)
|
|
51
|
+
graph_end_root_vertices = end_root_vertices(new_graph)
|
|
52
|
+
|
|
53
|
+
clusters, cluster_keys = [], []
|
|
54
|
+
graph_start_root_vertices.each do |graph_start_root_vertex|
|
|
55
|
+
cluster = build_cluster(graph_start_root_vertex)
|
|
56
|
+
clusters << cluster
|
|
57
|
+
cluster_keys << cluster.paths.map { |path| path.end_vertex }
|
|
58
|
+
end
|
|
59
|
+
cluster_keys = vertex_id_map(cluster_keys)
|
|
60
|
+
|
|
61
|
+
sets = {}
|
|
62
|
+
clusters.each_with_index do |end_path, i|
|
|
63
|
+
cluster_keys.each do |end_path_keys|
|
|
64
|
+
unless (end_path.paths.map { |path| path.end_vertex } & end_path_keys).empty?
|
|
65
|
+
sets[end_path_keys] ||= []
|
|
66
|
+
sets[end_path_keys] << graph_start_root_vertices[i]
|
|
67
|
+
break
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
sets = sets.map{ |end_path_keys, end_path_start_root_vertices|
|
|
73
|
+
[end_path_start_root_vertices, end_path_keys]
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
clusters = []
|
|
77
|
+
sets.each do |end_path_start_root_vertices, end_path_keys|
|
|
78
|
+
end_path_start_root_vertices_pt = end_path_start_root_vertices.permutation
|
|
79
|
+
end_path_keys_pt = end_path_keys.repeated_permutation(end_path_start_root_vertices.size)
|
|
80
|
+
communities_set = []
|
|
81
|
+
end_path_start_root_vertices_pt.each_with_index do |end_path_start_root_vertices_p, i|
|
|
82
|
+
end_path_keys_pt.each_with_index do |end_path_keys_p, j|
|
|
83
|
+
communities_set << make_communities(end_path_start_root_vertices_p, end_path_keys_p)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
sigma_in_sizes = communities_set.map { |communities| sum_sigma_in(communities) }
|
|
88
|
+
max = sigma_in_sizes.max
|
|
89
|
+
index = sigma_in_sizes.index(max)
|
|
90
|
+
|
|
91
|
+
clusters += communities_set[index]
|
|
92
|
+
end
|
|
93
|
+
clusters
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def vertex_id_map(cluster_keys)
|
|
97
|
+
return cluster_keys if cluster_keys.size < 2
|
|
98
|
+
id_map = cluster_keys.dup
|
|
99
|
+
|
|
100
|
+
cluster_keys.combination(2).each do |a, b|
|
|
101
|
+
unless (a & b).empty?
|
|
102
|
+
id_map.delete(a)
|
|
103
|
+
id_map.delete(b)
|
|
104
|
+
id_map << (a | b)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
if id_map.size == cluster_keys.size
|
|
109
|
+
cluster_keys
|
|
110
|
+
else
|
|
111
|
+
vertex_id_map(id_map)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def sum_sigma_in(communities)
|
|
116
|
+
communities.inject(0) { |size, community|
|
|
117
|
+
size + community.sigma_in
|
|
118
|
+
}
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def make_communities(graph_start_root_vertices, graph_end_root_vertices)
|
|
122
|
+
hashed_cluster = {}
|
|
123
|
+
used_vertices = {}
|
|
124
|
+
|
|
125
|
+
pair = [graph_start_root_vertices, graph_end_root_vertices].transpose
|
|
126
|
+
|
|
127
|
+
pair.each do |start_vertex, end_vertex|
|
|
128
|
+
cluster, used_vertices = find_cluster_with_used_vertices(start_vertex, end_vertex, used_vertices)
|
|
129
|
+
|
|
130
|
+
if cluster
|
|
131
|
+
if hashed_cluster[end_vertex.id]
|
|
132
|
+
hashed_cluster[end_vertex.id] = (hashed_cluster[end_vertex.id] | cluster)
|
|
133
|
+
else
|
|
134
|
+
hashed_cluster[end_vertex.id] = cluster
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
communities = hashed_cluster.map do |end_vertex_id, vertices|
|
|
140
|
+
Rgraphum::Community.new(vertices: vertices)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
Rgraphum::Communities(communities)
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# NOTE cluster を探しているっぽい
|
|
147
|
+
def find_cluster(start_vertex, end_vertex)
|
|
148
|
+
cluster, used_vertices = find_cluster_with_used_vertices(start_vertex, end_vertex, {})
|
|
149
|
+
cluster
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def find_cluster_with_used_vertices(start_vertex, end_vertex, used_vertices)
|
|
153
|
+
# FIXME rename cluster
|
|
154
|
+
if used_vertex = used_vertices[start_vertex]
|
|
155
|
+
if used_vertex == end_vertex
|
|
156
|
+
return [[], used_vertices]
|
|
157
|
+
else
|
|
158
|
+
return [nil, used_vertices]
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
if start_vertex == end_vertex
|
|
163
|
+
used_vertices[start_vertex] = end_vertex
|
|
164
|
+
return [[start_vertex], used_vertices]
|
|
165
|
+
else
|
|
166
|
+
if start_vertex.out.empty?
|
|
167
|
+
return nil, used_vertices
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
cluster = nil
|
|
172
|
+
start_vertex.out.each do |vertex|
|
|
173
|
+
deep_cluster, used_vertices = find_cluster_with_used_vertices(vertex, end_vertex, used_vertices)
|
|
174
|
+
|
|
175
|
+
if deep_cluster
|
|
176
|
+
cluster ||= []
|
|
177
|
+
cluster += deep_cluster
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
if cluster
|
|
182
|
+
cluster << start_vertex
|
|
183
|
+
used_vertices[start_vertex] = end_vertex
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
[cluster, used_vertices]
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# NOTE 孤立した cluster を探してるかも?
|
|
190
|
+
def build_cluster(start_vertex, cluster=nil)
|
|
191
|
+
cluster ||= Rgraphum::Cluster.new
|
|
192
|
+
start_vertex.out.each do |vertex|
|
|
193
|
+
next if cluster.have_vertex_in_path?(vertex, start_vertex)
|
|
194
|
+
if vertex.out.empty?
|
|
195
|
+
if cluster.have_end_vertex?(vertex)
|
|
196
|
+
path = cluster.find_path(vertex.id)
|
|
197
|
+
cluster.append_vertex path, start_vertex
|
|
198
|
+
else
|
|
199
|
+
cluster.add_path Rgraphum::Path.new(vertex, [vertex, start_vertex])
|
|
200
|
+
end
|
|
201
|
+
else
|
|
202
|
+
found = cluster.have_vertex?(vertex) && cluster.have_vertex?(start_vertex)
|
|
203
|
+
next if found
|
|
204
|
+
|
|
205
|
+
cluster = build_cluster(vertex, cluster)
|
|
206
|
+
cluster.each_path do |path|
|
|
207
|
+
if path.include?(vertex) and !path.include?(start_vertex)
|
|
208
|
+
cluster.append_vertex path, start_vertex
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
cluster
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def start_root_vertices(target_graph=@graph)
|
|
217
|
+
target_graph.vertices.find_all{ |vertex| vertex.in.empty? and !vertex.out.empty? }
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def end_root_vertices(target_graph=@graph)
|
|
221
|
+
target_graph.vertices.find_all{ |vertex| !vertex.in.empty? and vertex.out.empty? }
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
def find_path(target_vertex, vertices=Rgraphum::Vertices.new)
|
|
225
|
+
return vertices if vertices.include?(target_vertex)
|
|
226
|
+
return vertices << target_vertex if target_vertex.out.empty?
|
|
227
|
+
path_vertices = target_vertex.out.inject(vertices) do |vertices, vertex|
|
|
228
|
+
size = vertices.size
|
|
229
|
+
vertices = find_path(vertex, vertices)
|
|
230
|
+
if vertices.size == size
|
|
231
|
+
edge_to_delete = target_vertex.edges.where(target: vertex).first
|
|
232
|
+
target_vertex.edges.delete(edge_to_delete)
|
|
233
|
+
end
|
|
234
|
+
vertices
|
|
235
|
+
end
|
|
236
|
+
path_vertices << target_vertex
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def make_path_graph(graph=@graph)
|
|
240
|
+
p "in make path graph" if Rgraphum.verbose?
|
|
241
|
+
graph = graph.dup
|
|
242
|
+
|
|
243
|
+
p "find srn" if Rgraphum.verbose?
|
|
244
|
+
graph_start_root_vertices = start_root_vertices(graph)
|
|
245
|
+
|
|
246
|
+
p "find path and to_graph" if Rgraphum.verbose?
|
|
247
|
+
graphes = graph_start_root_vertices.map { |vertex| Rgraphum::Vertices.new(find_path(vertex)).to_graph }
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def cut_edges_with_srn(graph=@graph)
|
|
251
|
+
new_graph = Rgraphum::Graph.new
|
|
252
|
+
|
|
253
|
+
graphes = make_path_graph(graph)
|
|
254
|
+
|
|
255
|
+
new_graph.vertices = graphes.map { |g| g.vertices }.flatten
|
|
256
|
+
new_graph.edges = graphes.map { |g| g.edges }.flatten
|
|
257
|
+
|
|
258
|
+
new_graph.compact_with(:id)
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def count_same_words_vertices(graph=@graph)
|
|
262
|
+
graph.vertices.combination(2).each do |vertex_a, vertex_b|
|
|
263
|
+
vertex_a.count = vertex_a.count.to_i + 1 if vertex_a.words == vertex_b.words
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def make_edges(graph=@graph)
|
|
268
|
+
graph.vertices.sort! { |a, b| a.start.to_f <=> b.start.to_f }
|
|
269
|
+
|
|
270
|
+
graph.vertices.combination(2).each_with_index do |pair, i|
|
|
271
|
+
if pair[1].start and pair[0].end
|
|
272
|
+
next unless pair[0].within_term(pair[1])
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
distance = edit_distance(pair[0].words, pair[1].words)
|
|
276
|
+
next unless distance
|
|
277
|
+
|
|
278
|
+
graph.edges << { source: pair[0], target: pair[1], weight: (1.0 / (distance + 1)) }
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
graph.edges
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def make_graph(phrase_array)
|
|
285
|
+
@graph = Rgraphum::Graph.new
|
|
286
|
+
@graph.vertices = phrase_array
|
|
287
|
+
|
|
288
|
+
self.count_same_words_vertices(@graph)
|
|
289
|
+
|
|
290
|
+
@graph.compact_with(:words, @graph)
|
|
291
|
+
|
|
292
|
+
self.make_edges(@graph)
|
|
293
|
+
|
|
294
|
+
@graph
|
|
295
|
+
end
|
|
296
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
require 'time'
|
|
4
|
+
|
|
5
|
+
class Rgraphum::Analyzer::RTAtMark
|
|
6
|
+
|
|
7
|
+
class Rgraphum::Vertex
|
|
8
|
+
field :twits
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def make_graph(twits)
|
|
12
|
+
@graph = Rgraphum::Graph.new
|
|
13
|
+
|
|
14
|
+
make_vertices(twits)
|
|
15
|
+
make_edges(twits)
|
|
16
|
+
|
|
17
|
+
@graph
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def make_vertices(twits,graph=@graph)
|
|
21
|
+
graph.vertices = twits.map{ |twit| { label:twit[7] } }.uniq!
|
|
22
|
+
twits.each do |twit|
|
|
23
|
+
vertex = graph.vertices.where(label: twit[7]).first
|
|
24
|
+
vertex.twits ||= []
|
|
25
|
+
vertex.twits << twit.compact
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def make_edges(twits,graph=@graph)
|
|
30
|
+
twits.each_with_index do |twit|
|
|
31
|
+
next unless atmark_screen_name = pickup_screen_name(twit[8])
|
|
32
|
+
source_vertex = graph.vertices.where(label: atmark_screen_name).first
|
|
33
|
+
source_vertex = graph.vertices.build(label: atmark_screen_name) unless source_vertex
|
|
34
|
+
target_vertex = graph.vertices.where(label: twit[7]).first
|
|
35
|
+
graph.edges << {source:source_vertex,target:target_vertex,label:twit[8],start:Time.parse(twit[11])}
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def pickup_screen_name(text)
|
|
40
|
+
return nil unless screen_name = text.match(/(^|[^@0-9_a-zA-Z])@[0-9_a-zA-Z]+($|[^@0-9_a-zA-Z])/)
|
|
41
|
+
return nil unless screen_name = screen_name[0].gsub(/[^0-9_a-zA-Z]/,"").downcase
|
|
42
|
+
screen_name
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
class Rgraphum::Cluster
|
|
4
|
+
include Enumerable
|
|
5
|
+
|
|
6
|
+
def initialize(*paths)
|
|
7
|
+
if paths.empty?
|
|
8
|
+
@paths = {}
|
|
9
|
+
else
|
|
10
|
+
@paths = Hash[paths.map { |path| [path.end_vertex.id, path] }]
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def paths
|
|
15
|
+
@paths.values
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def each_path
|
|
19
|
+
if block_given?
|
|
20
|
+
@paths.each do |id, path|
|
|
21
|
+
yield path
|
|
22
|
+
end
|
|
23
|
+
else
|
|
24
|
+
to_enum
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def add_path(path)
|
|
29
|
+
@paths[path.end_vertex.id] = path
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def find_path(end_vertex_id)
|
|
33
|
+
@paths[end_vertex_id]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def append_vertex(path, vertex)
|
|
37
|
+
@paths[path.end_vertex.id].vertices << vertex
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def empty?
|
|
41
|
+
@paths.empty?
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def have_vertex_in_path?(end_vertex, vertex)
|
|
45
|
+
path = find_path(end_vertex.id)
|
|
46
|
+
return unless path
|
|
47
|
+
path.vertices.include?(vertex)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def have_vertex?(vertex)
|
|
51
|
+
@paths.any? do |id, path|
|
|
52
|
+
path.vertices.include?(vertex)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def have_end_vertex?(end_vertex)
|
|
57
|
+
@paths.any? { |id, path| path.end_vertex.id == (end_vertex.id rescue end_vertex) }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def to_hash
|
|
61
|
+
hash = {}
|
|
62
|
+
@paths.each do |id, path|
|
|
63
|
+
hash[path.end_vertex] = path.vertices
|
|
64
|
+
end
|
|
65
|
+
hash
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
def Rgraphum::Communities(array)
|
|
4
|
+
if array.instance_of?(Rgraphum::Communities)
|
|
5
|
+
array
|
|
6
|
+
else
|
|
7
|
+
Rgraphum::Communities.new(array)
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class Rgraphum::Communities < Rgraphum::RgraphumArray
|
|
12
|
+
|
|
13
|
+
# Non-Gremlin methods
|
|
14
|
+
|
|
15
|
+
# FIXME
|
|
16
|
+
# def dup
|
|
17
|
+
# end
|
|
18
|
+
|
|
19
|
+
# add community in communities
|
|
20
|
+
# @param [Hash] community_hash one community, it is hash.
|
|
21
|
+
# @return [Community] added community.
|
|
22
|
+
def build(community_hash={})
|
|
23
|
+
community = new_community(community_hash)
|
|
24
|
+
original_push_1 community
|
|
25
|
+
community
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
alias :original_push_1 :<<
|
|
29
|
+
def <<(community_hash)
|
|
30
|
+
build(community_hash)
|
|
31
|
+
self
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
alias :original_push_m :push
|
|
35
|
+
def push(*community_hashs)
|
|
36
|
+
community_hashs.each do |community_hash|
|
|
37
|
+
self << community_hash
|
|
38
|
+
end
|
|
39
|
+
self
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Called from delete_if, reject! and reject
|
|
43
|
+
def delete(community_or_id)
|
|
44
|
+
if community_or_id.is_a?(Rgraphum::Community)
|
|
45
|
+
target_community = community_or_id
|
|
46
|
+
else
|
|
47
|
+
target_community = where(id: community_or_id).first
|
|
48
|
+
end
|
|
49
|
+
super(target_community)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
protected :original_push_1
|
|
53
|
+
protected :original_push_m
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def new_community(community_hash={})
|
|
58
|
+
if community_hash.is_a?(Hash)
|
|
59
|
+
community_hash = community_hash.dup
|
|
60
|
+
community_hash[:graph] = @graph
|
|
61
|
+
community_hash[:id] ||= new_id
|
|
62
|
+
end
|
|
63
|
+
Rgraphum::Community(community_hash)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
def Rgraphum::Community(hash_or_community)
|
|
4
|
+
if hash_or_community.instance_of?(Rgraphum::Community)
|
|
5
|
+
hash_or_community
|
|
6
|
+
else
|
|
7
|
+
Rgraphum::Community.new(hash_or_community)
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class Rgraphum::Community
|
|
12
|
+
attr_reader :graph
|
|
13
|
+
attr_reader :id
|
|
14
|
+
attr_reader :vertices
|
|
15
|
+
|
|
16
|
+
def initialize(options={})
|
|
17
|
+
@id = options[:id]
|
|
18
|
+
@graph = options[:graph]
|
|
19
|
+
@vertices = []
|
|
20
|
+
|
|
21
|
+
if options[:vertices]
|
|
22
|
+
options[:vertices].each do |vertex|
|
|
23
|
+
add_vertex vertex
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def add_vertex(vertex)
|
|
29
|
+
@vertices << vertex
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def inter_edges
|
|
33
|
+
return @inter_edges if @inter_edges
|
|
34
|
+
@inter_edges = []
|
|
35
|
+
@vertices.combination(2) do |vertex_a, vertex_b|
|
|
36
|
+
@inter_edges += (vertex_a.edges & vertex_b.edges)
|
|
37
|
+
end
|
|
38
|
+
@inter_edges
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def outer_edges
|
|
42
|
+
@outer_edges ||= edges - inter_edges
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def edges
|
|
46
|
+
@edges ||= Rgraphum::Edges.new(@vertices.map(&:edges).flatten.uniq)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def edges_from(community)
|
|
50
|
+
edges & community.edges || []
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def degree_weight
|
|
54
|
+
@vertices.inject(0) { |sum, vertex| sum + vertex.degree_weight }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def sigma_in
|
|
58
|
+
@sigma_in ||= inter_edges.inject(0) { |sum, edge|
|
|
59
|
+
sum + edge.weight
|
|
60
|
+
}
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def update
|
|
64
|
+
@inter_edges = nil
|
|
65
|
+
@outer_edges = nil
|
|
66
|
+
@edges = nil
|
|
67
|
+
@sigma_tot = nil
|
|
68
|
+
@sigma_in = nil
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def neighborhood?
|
|
72
|
+
raise NotImplementedError
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def merge(other_community)
|
|
76
|
+
other_community.vertices.each do |vertex|
|
|
77
|
+
vertex.community_id = self.id
|
|
78
|
+
@vertices << vertex
|
|
79
|
+
end
|
|
80
|
+
self.update
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def to_graph
|
|
84
|
+
Rgraphum::Graph.new(vertices: vertices, edges: inter_edges)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
# CosineSimilarity
|
|
4
|
+
# calc vector distance with cosine similarity
|
|
5
|
+
# ex. it make equilateral triangle
|
|
6
|
+
# [ [1,1,0],[1,0,1],[1,0,1] ]
|
|
7
|
+
# it's angle is 60. cosine 60 = 0.5,
|
|
8
|
+
# thus outputs is
|
|
9
|
+
# [[1.0, 0.5, 0.5], [0.5, 1.0, 0.5], [0.5, 0.5, 1.0]]
|
|
10
|
+
#
|
|
11
|
+
class CosineSimilarityMatrix
|
|
12
|
+
def similarity(matrix)
|
|
13
|
+
sim_matrix = []
|
|
14
|
+
|
|
15
|
+
# calc cosine similarity
|
|
16
|
+
# @params [Array] matrix array of array
|
|
17
|
+
# @return [Array] array of array cosine similarity matrix
|
|
18
|
+
matrix.each_with_index do |row_fix,j|
|
|
19
|
+
sim_array = []
|
|
20
|
+
a2_sum = row_fix.inject(0.0){|sum,a| sum + a**2}
|
|
21
|
+
matrix.each_with_index do |row_move,i|
|
|
22
|
+
next sim_array << sim_matrix[i][j] if j > i
|
|
23
|
+
next sim_array << 1.0 if i == j
|
|
24
|
+
|
|
25
|
+
b2_sum = 0.0
|
|
26
|
+
ab_sum = 0.0
|
|
27
|
+
|
|
28
|
+
[row_fix,row_move].transpose.each do |a,b|
|
|
29
|
+
b2_sum += b**2
|
|
30
|
+
ab_sum += a*b
|
|
31
|
+
end
|
|
32
|
+
sim_array << ab_sum / ( Math.sqrt(a2_sum * b2_sum) )
|
|
33
|
+
end
|
|
34
|
+
sim_matrix << sim_array
|
|
35
|
+
end
|
|
36
|
+
sim_matrix
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
|