rgraphum 0.0.1.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +26 -0
- data/GLOSSARIES.md +108 -0
- data/GREMLIN.md +1398 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +136 -0
- data/Rakefile +16 -0
- data/bin/.irbrc +41 -0
- data/bin/rgraphum_console +61 -0
- data/bin/rgraphum_runner +57 -0
- data/examples/ba_model/make.rb +19 -0
- data/examples/ba_model/make_dummy_twitter_rt_data.rb +0 -0
- data/examples/basic/check_modularity.rb +27 -0
- data/examples/basic/make_graph.rb +12 -0
- data/examples/parser/dot.rb +28 -0
- data/examples/sis_model/lifegame.rb +161 -0
- data/graph_struct.jpg +0 -0
- data/lib/rgraphum/analyzer/linear_regression.rb +31 -0
- data/lib/rgraphum/analyzer/meme_tracker.rb +296 -0
- data/lib/rgraphum/analyzer/twitter/rt_at_mark.rb +45 -0
- data/lib/rgraphum/analyzer.rb +8 -0
- data/lib/rgraphum/cluster.rb +67 -0
- data/lib/rgraphum/communities.rb +65 -0
- data/lib/rgraphum/community.rb +86 -0
- data/lib/rgraphum/cosine_similarity_matrix.rb +40 -0
- data/lib/rgraphum/edge.rb +194 -0
- data/lib/rgraphum/edges.rb +161 -0
- data/lib/rgraphum/ext/cosine_similarity_matrix.rb +79 -0
- data/lib/rgraphum/ext/linear_regression.rb +22 -0
- data/lib/rgraphum/ext/tf_idf.rb +52 -0
- data/lib/rgraphum/graph/gremlin.rb +193 -0
- data/lib/rgraphum/graph/math/clustering_coefficient.rb +53 -0
- data/lib/rgraphum/graph/math/community_detection.rb +141 -0
- data/lib/rgraphum/graph/math/degree_distribution.rb +50 -0
- data/lib/rgraphum/graph/math/dijkstra.rb +331 -0
- data/lib/rgraphum/graph/math.rb +45 -0
- data/lib/rgraphum/graph.rb +267 -0
- data/lib/rgraphum/importer.rb +97 -0
- data/lib/rgraphum/marshal.rb +26 -0
- data/lib/rgraphum/motifs.rb +8 -0
- data/lib/rgraphum/parsers/flare.rb +42 -0
- data/lib/rgraphum/parsers/gephi.rb +193 -0
- data/lib/rgraphum/parsers/graphviz.rb +78 -0
- data/lib/rgraphum/parsers/miserables.rb +54 -0
- data/lib/rgraphum/parsers.rb +32 -0
- data/lib/rgraphum/path.rb +37 -0
- data/lib/rgraphum/query.rb +130 -0
- data/lib/rgraphum/rgraphum_array.rb +159 -0
- data/lib/rgraphum/rgraphum_array_dividers.rb +43 -0
- data/lib/rgraphum/rgraphum_random.rb +5 -0
- data/lib/rgraphum/simulator/ba_model.rb +140 -0
- data/lib/rgraphum/simulator/sir_model.rb +178 -0
- data/lib/rgraphum/simulator/sis_model.rb +158 -0
- data/lib/rgraphum/simulator.rb +29 -0
- data/lib/rgraphum/statistic/power_law.rb +9 -0
- data/lib/rgraphum/t.rb +12 -0
- data/lib/rgraphum/tf_idf.rb +27 -0
- data/lib/rgraphum/version.rb +3 -0
- data/lib/rgraphum/vertex.rb +354 -0
- data/lib/rgraphum/vertices.rb +97 -0
- data/lib/rgraphum.rb +38 -0
- data/performance/add-vertices-edges.rb +20 -0
- data/performance/add-vertices.rb +12 -0
- data/performance/build-graph.rb +19 -0
- data/performance/delete-graph.rb +24 -0
- data/performance/delete-vertices.rb +25 -0
- data/performance/refer-graph.rb +23 -0
- data/rgraphum.gemspec +30 -0
- data/test/lib/rgraphum/analyzer/linear_regression_test.rb +20 -0
- data/test/lib/rgraphum/analyzer/meme_tracker_test.rb +383 -0
- data/test/lib/rgraphum/analyzer/twitter/rt_at_mark_test.rb +120 -0
- data/test/lib/rgraphum/array_test.rb +95 -0
- data/test/lib/rgraphum/bubble_test.rb +7 -0
- data/test/lib/rgraphum/communities_test.rb +53 -0
- data/test/lib/rgraphum/cosine_similarity_test.rb +18 -0
- data/test/lib/rgraphum/edge_test.rb +89 -0
- data/test/lib/rgraphum/edges_test.rb +178 -0
- data/test/lib/rgraphum/graph_builder_test.rb +64 -0
- data/test/lib/rgraphum/graph_dup_test.rb +199 -0
- data/test/lib/rgraphum/graph_plus_test.rb +80 -0
- data/test/lib/rgraphum/graph_test.rb +512 -0
- data/test/lib/rgraphum/gremlin_test.rb +145 -0
- data/test/lib/rgraphum/importers/idg_json_edges.json +20 -0
- data/test/lib/rgraphum/importers/idg_json_test.rb +207 -0
- data/test/lib/rgraphum/importers/idg_json_vertices.json +46 -0
- data/test/lib/rgraphum/math/average_distance_matrix_test.rb +142 -0
- data/test/lib/rgraphum/math/clustering_coefficient_test.rb +219 -0
- data/test/lib/rgraphum/math/community_test.rb +78 -0
- data/test/lib/rgraphum/math/degree_distribution_test.rb +40 -0
- data/test/lib/rgraphum/math/dijkstra_test.rb +146 -0
- data/test/lib/rgraphum/math/modularity_test.rb +154 -0
- data/test/lib/rgraphum/math/quick_average_distance_matrix_test.rb +84 -0
- data/test/lib/rgraphum/path_test.rb +44 -0
- data/test/lib/rgraphum/query/enumerable_test.rb +42 -0
- data/test/lib/rgraphum/query/where_operators_test.rb +75 -0
- data/test/lib/rgraphum/query/where_test.rb +59 -0
- data/test/lib/rgraphum/simulator/ba_model_test.rb +75 -0
- data/test/lib/rgraphum/simulator/sir_model_test.rb +513 -0
- data/test/lib/rgraphum/simulator/sis_model_test.rb +478 -0
- data/test/lib/rgraphum/simulator_test.rb +22 -0
- data/test/lib/rgraphum/tf_idf_test.rb +30 -0
- data/test/lib/rgraphum/vertex_test.rb +50 -0
- data/test/lib/rgraphum/vertices_test.rb +180 -0
- data/test/test_helper.rb +98 -0
- data/tmp/.gitkeep +0 -0
- metadata +254 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
def Rgraphum::Edge(hash_or_edge)
|
|
4
|
+
if hash_or_edge.instance_of?(Rgraphum::Edge)
|
|
5
|
+
hash_or_edge
|
|
6
|
+
else
|
|
7
|
+
Rgraphum::Edge.new(hash_or_edge)
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class Rgraphum::Edge #< Hash
|
|
12
|
+
attr_accessor :graph
|
|
13
|
+
# attr_accessor :vertex
|
|
14
|
+
|
|
15
|
+
def initialize(fields={})
|
|
16
|
+
unless fields[:source] && fields[:target]
|
|
17
|
+
raise ArgumentError, "Edge.new: :source and :target options are required"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
self.source = fields.delete(:source)
|
|
21
|
+
self.target = fields.delete(:target)
|
|
22
|
+
|
|
23
|
+
unknown_fields = fields.keys - @@field_names
|
|
24
|
+
unless unknown_fields.empty?
|
|
25
|
+
raise ArgumentError, "No such field(s) in Vertex: #{unknown_fields.join(', ')}"
|
|
26
|
+
end
|
|
27
|
+
fields.each do |name, value|
|
|
28
|
+
self.send("#{name}=", value)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
self.weight ||= 1
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Gremlin: outV
|
|
35
|
+
#
|
|
36
|
+
# Get both outgoing tail vertex of the edge.
|
|
37
|
+
#
|
|
38
|
+
# gremlin> e = g.e(12)
|
|
39
|
+
# ==>e[12][6-created->3]
|
|
40
|
+
# gremlin> e.outV
|
|
41
|
+
# ==>v[6]
|
|
42
|
+
# gremlin> e.inV
|
|
43
|
+
# ==>v[3]
|
|
44
|
+
# gremlin> e.bothV
|
|
45
|
+
# ==>v[6]
|
|
46
|
+
# ==>v[3]
|
|
47
|
+
#
|
|
48
|
+
def outV
|
|
49
|
+
self.source
|
|
50
|
+
end
|
|
51
|
+
alias :out_v :outV
|
|
52
|
+
|
|
53
|
+
# Gremlin: inV
|
|
54
|
+
#
|
|
55
|
+
# Get both incoming head vertex of the edge.
|
|
56
|
+
#
|
|
57
|
+
# gremlin> e = g.e(12)
|
|
58
|
+
# ==>e[12][6-created->3]
|
|
59
|
+
# gremlin> e.outV
|
|
60
|
+
# ==>v[6]
|
|
61
|
+
# gremlin> e.inV
|
|
62
|
+
# ==>v[3]
|
|
63
|
+
# gremlin> e.bothV
|
|
64
|
+
# ==>v[6]
|
|
65
|
+
# ==>v[3]
|
|
66
|
+
#
|
|
67
|
+
def inV
|
|
68
|
+
self.target
|
|
69
|
+
end
|
|
70
|
+
alias :in_v :inV
|
|
71
|
+
|
|
72
|
+
# Gremlin: bothV
|
|
73
|
+
#
|
|
74
|
+
# Get both incoming and outgoing vertices of the edge.
|
|
75
|
+
#
|
|
76
|
+
# gremlin> e = g.e(12)
|
|
77
|
+
# ==>e[12][6-created->3]
|
|
78
|
+
# gremlin> e.outV
|
|
79
|
+
# ==>v[6]
|
|
80
|
+
# gremlin> e.inV
|
|
81
|
+
# ==>v[3]
|
|
82
|
+
# gremlin> e.bothV
|
|
83
|
+
# ==>v[6]
|
|
84
|
+
# ==>v[3]
|
|
85
|
+
#
|
|
86
|
+
def bothV
|
|
87
|
+
[outV, inV]
|
|
88
|
+
end
|
|
89
|
+
alias :both_v :bothV
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# Non-Gremlin methods
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# def id
|
|
96
|
+
# self[:id]
|
|
97
|
+
# end
|
|
98
|
+
|
|
99
|
+
def update_vertices(vertices)
|
|
100
|
+
self.source = find_vertex(:source, vertices)
|
|
101
|
+
self.target = find_vertex(:target, vertices)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def find_vertex(syn, vertices)
|
|
105
|
+
vertex = self[syn]
|
|
106
|
+
if vertex.instance_of?(Rgraphum::Vertex) and vertex.graph.equal?(@graph)
|
|
107
|
+
vertex
|
|
108
|
+
else
|
|
109
|
+
vertex = vertices.find_by_id(vertex)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
unless vertex
|
|
113
|
+
p "edge has no #{syn}" if Rgraphum.verbose?
|
|
114
|
+
end
|
|
115
|
+
vertex
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# accessors
|
|
119
|
+
|
|
120
|
+
# attr_accessor :id
|
|
121
|
+
# attr_accessor :source, :target
|
|
122
|
+
# attr_accessor :start, :end
|
|
123
|
+
# attr_accessor :label
|
|
124
|
+
# attr_accessor :weight
|
|
125
|
+
# attr_accessor :attvalues
|
|
126
|
+
|
|
127
|
+
def [](key)
|
|
128
|
+
send(key)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def []=(key, value)
|
|
132
|
+
send("#{key}=")
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def ==(other)
|
|
136
|
+
if other.is_a?(Rgraphum::Edge)
|
|
137
|
+
return false unless id == other.id
|
|
138
|
+
else
|
|
139
|
+
return id == other
|
|
140
|
+
end
|
|
141
|
+
return false unless source == other.source
|
|
142
|
+
return false unless target == other.target
|
|
143
|
+
true
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def to_hash
|
|
147
|
+
hash = {}
|
|
148
|
+
@@field_names.each do |name|
|
|
149
|
+
value = instance_variable_get("@#{name}")
|
|
150
|
+
if value
|
|
151
|
+
if value.respond_to?(:to_hash)
|
|
152
|
+
hash[name] = value.to_hash
|
|
153
|
+
else
|
|
154
|
+
hash[name] = value
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
hash
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def to_s
|
|
162
|
+
to_hash.to_s
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# FIXME
|
|
166
|
+
def self.field(*field_names)
|
|
167
|
+
@@field_names ||= []
|
|
168
|
+
field_names = [field_names] unless field_names.is_a?(Array)
|
|
169
|
+
field_names.each do |field_name|
|
|
170
|
+
@@field_names << field_name.to_sym
|
|
171
|
+
class_eval <<-EOT, __FILE__, __LINE__ + 1
|
|
172
|
+
def #{field_name}
|
|
173
|
+
# self[:#{field_name}]
|
|
174
|
+
@#{field_name}
|
|
175
|
+
end
|
|
176
|
+
def #{field_name}=(rhs)
|
|
177
|
+
# self[:#{field_name}] = rhs if respond_to?(:[]=) # FIXME
|
|
178
|
+
@#{field_name} = rhs
|
|
179
|
+
end
|
|
180
|
+
EOT
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def self.has_field?(field_name)
|
|
185
|
+
@@field_names.include?(field_name.to_sym)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
field :id
|
|
189
|
+
field :source, :target
|
|
190
|
+
field :start, :end
|
|
191
|
+
field :label
|
|
192
|
+
field :weight
|
|
193
|
+
field :attvalues
|
|
194
|
+
end
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
def Rgraphum::Edges(array)
|
|
4
|
+
if array.instance_of?(Rgraphum::Edges)
|
|
5
|
+
array
|
|
6
|
+
else
|
|
7
|
+
Rgraphum::Edges.new(array)
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
class Rgraphum::Edges < Rgraphum::RgraphumArray
|
|
12
|
+
include Rgraphum::RgraphumArrayDividers
|
|
13
|
+
|
|
14
|
+
attr_accessor :vertex
|
|
15
|
+
|
|
16
|
+
# Gremlin: outV
|
|
17
|
+
#
|
|
18
|
+
# Get both outgoing tail vertex of the edge.
|
|
19
|
+
#
|
|
20
|
+
# gremlin> e = g.e(12)
|
|
21
|
+
# ==>e[12][6-created->3]
|
|
22
|
+
# gremlin> e.outV
|
|
23
|
+
# ==>v[6]
|
|
24
|
+
# gremlin> e.inV
|
|
25
|
+
# ==>v[3]
|
|
26
|
+
# gremlin> e.bothV
|
|
27
|
+
# ==>v[6]
|
|
28
|
+
# ==>v[3]
|
|
29
|
+
#
|
|
30
|
+
def outV
|
|
31
|
+
self.map{ |edge| edge.source }
|
|
32
|
+
end
|
|
33
|
+
alias :out_v :outV
|
|
34
|
+
|
|
35
|
+
# Gremlin: inV
|
|
36
|
+
#
|
|
37
|
+
# Get both incoming head vertex of the edge.
|
|
38
|
+
#
|
|
39
|
+
# gremlin> e = g.e(12)
|
|
40
|
+
# ==>e[12][6-created->3]
|
|
41
|
+
# gremlin> e.outV
|
|
42
|
+
# ==>v[6]
|
|
43
|
+
# gremlin> e.inV
|
|
44
|
+
# ==>v[3]
|
|
45
|
+
# gremlin> e.bothV
|
|
46
|
+
# ==>v[6]
|
|
47
|
+
# ==>v[3]
|
|
48
|
+
#
|
|
49
|
+
def inV
|
|
50
|
+
self.map{ |edge| edge.target }
|
|
51
|
+
end
|
|
52
|
+
alias :in_v :inV
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Non-Gremlin methods
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def initialize(edge_hashes=[])
|
|
59
|
+
super()
|
|
60
|
+
@id_edge_map = {}
|
|
61
|
+
edge_hashes.each do |edge_hash|
|
|
62
|
+
self << edge_hash
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def find_by_id(edge_id)
|
|
67
|
+
if edge_id.is_a?(Rgraphum::Edge)
|
|
68
|
+
id = edge_id.id
|
|
69
|
+
else
|
|
70
|
+
id = edge_id
|
|
71
|
+
end
|
|
72
|
+
@id_edge_map[id]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def find_vertex(vertex)
|
|
76
|
+
vertices = @graph.vertices
|
|
77
|
+
|
|
78
|
+
if vertex.is_a?(Rgraphum::Vertex) and vertex.graph.equal?(@graph)
|
|
79
|
+
vertex
|
|
80
|
+
else
|
|
81
|
+
vertices.find_by_id(vertex)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def build(edge_or_hash, recursive=true)
|
|
86
|
+
if @vertex and @vertex.graph
|
|
87
|
+
if recursive
|
|
88
|
+
edge = @vertex.graph.edges.build(edge_or_hash, false)
|
|
89
|
+
else
|
|
90
|
+
edge = edge_or_hash
|
|
91
|
+
original_push_1 edge
|
|
92
|
+
end
|
|
93
|
+
else
|
|
94
|
+
edge = Rgraphum::Edge(edge_or_hash)
|
|
95
|
+
if @graph
|
|
96
|
+
source_vertex = find_vertex(edge.source)
|
|
97
|
+
target_vertex = find_vertex(edge.target)
|
|
98
|
+
raise ArgumentError, "Source vertex is required" unless source_vertex
|
|
99
|
+
raise ArgumentError, "Target vertex is required" unless target_vertex
|
|
100
|
+
edge.source = source_vertex
|
|
101
|
+
edge.target = target_vertex
|
|
102
|
+
|
|
103
|
+
edge.id = new_id(edge.id)
|
|
104
|
+
edge.graph = @graph
|
|
105
|
+
edge.source.edges.build(edge, false)
|
|
106
|
+
edge.target.edges.build(edge, false)
|
|
107
|
+
end
|
|
108
|
+
original_push_1 edge
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
@id_edge_map[edge.id] = edge
|
|
112
|
+
|
|
113
|
+
edge
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
alias :original_push_1 :<<
|
|
117
|
+
def <<(edge_or_hash)
|
|
118
|
+
build(edge_or_hash)
|
|
119
|
+
self
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
alias :original_push_m :push
|
|
123
|
+
def push(*edge_hashes)
|
|
124
|
+
edge_hashes.each do |edge_hash|
|
|
125
|
+
build(edge_hash)
|
|
126
|
+
end
|
|
127
|
+
self
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Called from delete_if, reject! and reject
|
|
131
|
+
def delete(edge_or_id, recursive=true)
|
|
132
|
+
id = edge_or_id.id rescue edge_or_id
|
|
133
|
+
target_edge = find_by_id(id)
|
|
134
|
+
|
|
135
|
+
return edge_or_id unless target_edge
|
|
136
|
+
deleted_edge = super(target_edge)
|
|
137
|
+
|
|
138
|
+
if @vertex and @vertex.graph
|
|
139
|
+
if recursive
|
|
140
|
+
@vertex.graph.edges.delete(target_edge, false)
|
|
141
|
+
end
|
|
142
|
+
else
|
|
143
|
+
if @graph
|
|
144
|
+
target_edge.source.edges.delete(target_edge, false)
|
|
145
|
+
target_edge.target.edges.delete(target_edge, false)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
@id_edge_map.delete id
|
|
149
|
+
|
|
150
|
+
deleted_edge
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# def weights
|
|
154
|
+
# self.map{ |edge| edge.weight }
|
|
155
|
+
# end
|
|
156
|
+
|
|
157
|
+
protected :original_push_1
|
|
158
|
+
protected :original_push_m
|
|
159
|
+
|
|
160
|
+
private
|
|
161
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
class CosineSimilarityMatrix
|
|
2
|
+
|
|
3
|
+
inline do |builder|
|
|
4
|
+
builder.include "<math.h>"
|
|
5
|
+
builder.add_compile_flags "-fopenmp"
|
|
6
|
+
builder.c <<-EOF
|
|
7
|
+
|
|
8
|
+
VALUE c_similarity( VALUE ary ){
|
|
9
|
+
int i,j,k;
|
|
10
|
+
|
|
11
|
+
int m = RARRAY_LEN(ary);
|
|
12
|
+
int n = RARRAY_LEN(rb_ary_entry(ary,0));
|
|
13
|
+
|
|
14
|
+
float **val;
|
|
15
|
+
float *s;
|
|
16
|
+
|
|
17
|
+
float dot_ab = 0.0;
|
|
18
|
+
float sq_sum_a = 0.0;
|
|
19
|
+
float sq_sum_b = 0.0;
|
|
20
|
+
VALUE sim_ary = rb_ary_new();
|
|
21
|
+
VALUE sim = rb_ary_new();
|
|
22
|
+
|
|
23
|
+
// float val[m][n];
|
|
24
|
+
val = malloc(sizeof(float *) * m);
|
|
25
|
+
for (i=0;i<m;i++){
|
|
26
|
+
val[i] = malloc(sizeof(float) * n);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// float s[m][n];
|
|
30
|
+
s = malloc( sizeof(float) * m * m);
|
|
31
|
+
|
|
32
|
+
// init
|
|
33
|
+
for( i=0; i<m; i++){
|
|
34
|
+
for( j=0; j<n; j++){
|
|
35
|
+
val[i][j] = NUM2DBL( rb_ary_entry(rb_ary_entry(ary,i),j) );
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
for( i=0; i<m; i++){
|
|
41
|
+
for( j=0; j<m; j++){
|
|
42
|
+
if( i == j ){
|
|
43
|
+
s[i*m + j] = 1.0;
|
|
44
|
+
} else if( i > j ){
|
|
45
|
+
s[i*m + j] = s[j*m + i];
|
|
46
|
+
} else {
|
|
47
|
+
dot_ab = 0.0;
|
|
48
|
+
sq_sum_a = 0.0;
|
|
49
|
+
sq_sum_b = 0.0;
|
|
50
|
+
|
|
51
|
+
for( k=0; k<n; k++){
|
|
52
|
+
dot_ab += ( val[i][k] * val[j][k] );
|
|
53
|
+
sq_sum_a += ( val[i][k] * val[i][k] );
|
|
54
|
+
sq_sum_b += ( val[j][k] * val[j][k] );
|
|
55
|
+
}
|
|
56
|
+
s[i*m + j] = dot_ab / ( sqrt( sq_sum_a * sq_sum_b) );
|
|
57
|
+
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
for( i=0; i<m; i++){
|
|
63
|
+
sim = rb_ary_new();
|
|
64
|
+
for( j=0; j<m; j++){
|
|
65
|
+
rb_ary_push( sim, DBL2NUM(s[i*m + j] ));
|
|
66
|
+
}
|
|
67
|
+
rb_ary_push( sim_ary, sim );
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
free(val);
|
|
71
|
+
free(s);
|
|
72
|
+
|
|
73
|
+
return sim_ary;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
EOF
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
#require 'numru/lapack'
|
|
4
|
+
|
|
5
|
+
class Rgraphum::Analyzer::LinearRegression
|
|
6
|
+
def analyze(x_array, y_array, degree=1, round=5)
|
|
7
|
+
|
|
8
|
+
nrow = x_array.size
|
|
9
|
+
nx = NArray.to_na(x_array)
|
|
10
|
+
nxm = NMatrix.sfloat(nrow,degree + 1)
|
|
11
|
+
|
|
12
|
+
(degree + 1).times.each do |d|
|
|
13
|
+
nxm[(degree - d) * nrow] = nx ** d
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
ny = NArray.to_na([y_array])
|
|
17
|
+
s, rank, work, info, b = NumRu::Lapack.dgelsd(nxm, ny, 0)
|
|
18
|
+
|
|
19
|
+
b.to_a[0].map { |n| n.round(round) }
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'matrix'
|
|
4
|
+
require 'inline'
|
|
5
|
+
|
|
6
|
+
class TfIdf
|
|
7
|
+
|
|
8
|
+
inline do |builder|
|
|
9
|
+
builder.include "<math.h>"
|
|
10
|
+
builder.add_compile_flags "-fopenmp"
|
|
11
|
+
builder.c <<-EOF
|
|
12
|
+
|
|
13
|
+
VALUE tf_idf( VALUE ary ){
|
|
14
|
+
int i,j,k;
|
|
15
|
+
int m = RARRAY_LEN(ary);
|
|
16
|
+
int n = RARRAY_LEN(rb_ary_entry(ary,0));
|
|
17
|
+
|
|
18
|
+
float words_amount;
|
|
19
|
+
float non_word;
|
|
20
|
+
float idfT;
|
|
21
|
+
|
|
22
|
+
VALUE tf_idf = rb_ary_new();
|
|
23
|
+
VALUE tf_idf_ary = rb_ary_new();
|
|
24
|
+
|
|
25
|
+
/* tf-idf and output*/
|
|
26
|
+
for( i=0; i<m; i++){
|
|
27
|
+
|
|
28
|
+
/* tf base */
|
|
29
|
+
words_amount = 0.0;
|
|
30
|
+
for( j=0; j<n; j++){
|
|
31
|
+
words_amount = words_amount + FIX2INT( rb_ary_entry(rb_ary_entry(ary,i),j) );
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
tf_idf = rb_ary_new();
|
|
35
|
+
for( j=0; j<n; j++){
|
|
36
|
+
non_word = 0.0;
|
|
37
|
+
for( k=0; k<m; k++){
|
|
38
|
+
if( FIX2INT( rb_ary_entry(rb_ary_entry(ary,k),j)) == 0 ){
|
|
39
|
+
non_word = non_word + 1;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
idfT = log( m / ( m - non_word ) );
|
|
43
|
+
rb_ary_push( tf_idf, DBL2NUM( ( FIX2INT(rb_ary_entry(rb_ary_entry(ary,i),j)) / words_amount ) * idfT ));
|
|
44
|
+
}
|
|
45
|
+
rb_ary_push( tf_idf_ary, tf_idf );
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return tf_idf_ary;
|
|
49
|
+
}
|
|
50
|
+
EOF
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
class Rgraphum::Graph
|
|
3
|
+
end
|
|
4
|
+
|
|
5
|
+
module Rgraphum::Graph::Gremlin
|
|
6
|
+
|
|
7
|
+
# Gremlin: Graph.v
|
|
8
|
+
#
|
|
9
|
+
# Get a vertex or set of vertices by providing one or more vertex identifiers.
|
|
10
|
+
# The identifiers must be the identifiers assigned by the underlying graph implementation.
|
|
11
|
+
#
|
|
12
|
+
# gremlin> g.v(1)
|
|
13
|
+
# ==>v[1]
|
|
14
|
+
# gremlin> g.v(1,2,3)
|
|
15
|
+
# ==>v[1]
|
|
16
|
+
# ==>v[2]
|
|
17
|
+
# ==>v[3]
|
|
18
|
+
# gremlin> ids = [1,2,3]
|
|
19
|
+
# ==>1
|
|
20
|
+
# ==>2
|
|
21
|
+
# ==>3
|
|
22
|
+
# gremlin> g.v(ids.toArray())
|
|
23
|
+
# ==>v[1]
|
|
24
|
+
# ==>v[2]
|
|
25
|
+
# ==>v[3]
|
|
26
|
+
#
|
|
27
|
+
# @param [Array] ids
|
|
28
|
+
def v(*ids)
|
|
29
|
+
ids = ids.flatten
|
|
30
|
+
|
|
31
|
+
return @vertices.find_by_id(ids[0]) if ids.size == 1
|
|
32
|
+
|
|
33
|
+
new_vertices = Rgraphum::Vertices.new
|
|
34
|
+
ids.each do |id|
|
|
35
|
+
new_vertices << @vertices.find_by_id(id)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
new_vertices
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Gremlin: Graph.e
|
|
42
|
+
#
|
|
43
|
+
# Get an edge or set of edges by providing one or more edge identifiers.
|
|
44
|
+
# The identifiers must be the identifiers assigned by the underlying graph implementation.
|
|
45
|
+
#
|
|
46
|
+
# gremlin> g.e(10)
|
|
47
|
+
# ==>e[10][4-created->5]
|
|
48
|
+
# gremlin> g.e(10,11,12)
|
|
49
|
+
# ==>e[10][4-created->5]
|
|
50
|
+
# ==>e[11][4-created->3]
|
|
51
|
+
# ==>e[12][6-created->3]
|
|
52
|
+
# gremlin> ids = [10,11,12]
|
|
53
|
+
# ==>10
|
|
54
|
+
# ==>11
|
|
55
|
+
# ==>12
|
|
56
|
+
# gremlin> g.e(ids.toArray())
|
|
57
|
+
# ==>e[10][4-created->5]
|
|
58
|
+
# ==>e[11][4-created->3]
|
|
59
|
+
# ==>e[12][6-created->3]
|
|
60
|
+
#
|
|
61
|
+
# @param [Array] ids
|
|
62
|
+
def e(*ids)
|
|
63
|
+
ids = ids.flatten
|
|
64
|
+
|
|
65
|
+
return @edges.find_by_id(ids[0]) if ids.size == 1
|
|
66
|
+
|
|
67
|
+
new_edges = Rgraphum::Edges.new
|
|
68
|
+
ids.each do |id|
|
|
69
|
+
new_edges << @edges.find_by_id(id)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
new_edges
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Gremlin: V
|
|
76
|
+
#
|
|
77
|
+
# The vertex iterator for the graph.
|
|
78
|
+
# Utilize this to iterate through all the vertices in the graph.
|
|
79
|
+
# Use with care on large graphs unless used in combination with a key index lookup.
|
|
80
|
+
#
|
|
81
|
+
# gremlin> g.V
|
|
82
|
+
# ==>v[3]
|
|
83
|
+
# ==>v[2]
|
|
84
|
+
# ==>v[1]
|
|
85
|
+
# ==>v[6]
|
|
86
|
+
# ==>v[5]
|
|
87
|
+
# ==>v[4]
|
|
88
|
+
# gremlin> g.V("name", "marko")
|
|
89
|
+
# ==>v[1]
|
|
90
|
+
# gremlin> g.V("name", "marko").name
|
|
91
|
+
# ==>marko
|
|
92
|
+
#
|
|
93
|
+
# @param [String] key
|
|
94
|
+
# @param [String] value
|
|
95
|
+
def V(key=nil,value=nil)
|
|
96
|
+
if key
|
|
97
|
+
@vertices.where( { key => value } ).all
|
|
98
|
+
else
|
|
99
|
+
@vertices
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
# Gremlin: E
|
|
105
|
+
#
|
|
106
|
+
# The edge iterator for the graph.
|
|
107
|
+
# Utilize this to iterate through all the edges in the graph.
|
|
108
|
+
# Use with care on large graphs.
|
|
109
|
+
#
|
|
110
|
+
# gremlin> g.E
|
|
111
|
+
# ==>e[10][4-created->5]
|
|
112
|
+
# ==>e[7][1-knows->2]
|
|
113
|
+
# ==>e[9][1-created->3]
|
|
114
|
+
# ==>e[8][1-knows->4]
|
|
115
|
+
# ==>e[11][4-created->3]
|
|
116
|
+
# ==>e[12][6-created->3]
|
|
117
|
+
# gremlin> g.E.weight
|
|
118
|
+
# ==>1.0
|
|
119
|
+
# ==>0.5
|
|
120
|
+
# ==>0.4
|
|
121
|
+
# ==>1.0
|
|
122
|
+
# ==>0.4
|
|
123
|
+
# ==>0.2
|
|
124
|
+
#
|
|
125
|
+
def E(key=nil,value=nil)
|
|
126
|
+
if key
|
|
127
|
+
@edges.find_all {|edge| vertex.send(key) == value }
|
|
128
|
+
else
|
|
129
|
+
@edges
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Gremlin: Graph.addVertex
|
|
134
|
+
#
|
|
135
|
+
# Adds a vertex to the graph.
|
|
136
|
+
# Note that most graph implementations ignore the identifier supplied to addVertex.
|
|
137
|
+
#
|
|
138
|
+
# gremlin> g = new TinkerGraph()
|
|
139
|
+
# ==>tinkergraph[vertices:0 edges:0]
|
|
140
|
+
# gremlin> g.addVertex()
|
|
141
|
+
# ==>v[0]
|
|
142
|
+
# gremlin> g.addVertex(100)
|
|
143
|
+
# ==>v[100]
|
|
144
|
+
# gremlin> g.addVertex(null,[name:"stephen"])
|
|
145
|
+
# ==>v[1]
|
|
146
|
+
#
|
|
147
|
+
def addVertex(id=nil, vertex=nil)
|
|
148
|
+
vertex ||= Rgraphum::Vertex.new(id: id)
|
|
149
|
+
vertex = Rgraphum::Vertex.new(vertex) unless vertex.is_a?(Rgraphum::Vertex) # FIXME
|
|
150
|
+
@vertices << vertex
|
|
151
|
+
@vertices[-1]
|
|
152
|
+
end
|
|
153
|
+
alias :add_vertex :addVertex
|
|
154
|
+
|
|
155
|
+
# Gremlin: Graph.addEdge
|
|
156
|
+
#
|
|
157
|
+
# Adds an edge to the graph.
|
|
158
|
+
# Note that most graph implementations ignore the identifier supplied to addEdge.
|
|
159
|
+
#
|
|
160
|
+
# gremlin> g = new TinkerGraph()
|
|
161
|
+
# ==>tinkergraph[vertices:0 edges:0]
|
|
162
|
+
# gremlin> v1 = g.addVertex(100)
|
|
163
|
+
# ==>v[100]
|
|
164
|
+
# gremlin> v2 = g.addVertex(200)
|
|
165
|
+
# ==>v[200]
|
|
166
|
+
# gremlin> g.addEdge(v1,v2,'friend')
|
|
167
|
+
# ==>e[0][100-friend->200]
|
|
168
|
+
# gremlin> g.addEdge(1000,v1,v2,'buddy')
|
|
169
|
+
# ==>e[1000][100-buddy->200]
|
|
170
|
+
# gremlin> g.addEdge(null,v1,v2,'pal',[weight:0.75f])
|
|
171
|
+
# ==>e[1][100-pal->200]
|
|
172
|
+
#
|
|
173
|
+
def addEdge( *params )
|
|
174
|
+
if params.size == 3
|
|
175
|
+
source = params[0]; target = params[1]; label = params[2]
|
|
176
|
+
@edges << Rgraphum::Edge.new(source: source, target: target, label: label)
|
|
177
|
+
@edges[-1]
|
|
178
|
+
elsif params.size == 4
|
|
179
|
+
id = params[0]; source = params[1]; target = params[2]; label = params[3]
|
|
180
|
+
@edges.build(id: id, source: source, target: target, label: label)
|
|
181
|
+
elsif params.size == 5
|
|
182
|
+
edge_hash = {}
|
|
183
|
+
edge_hash[:id] = params[0];
|
|
184
|
+
edge_hash[:source] = params[1];
|
|
185
|
+
edge_hash[:target] = params[2];
|
|
186
|
+
edge_hash[:label] = params[3];
|
|
187
|
+
edge_hash.merge!(params[4])
|
|
188
|
+
@edges.build(edge_hash)
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
alias :add_edge :addEdge
|
|
192
|
+
|
|
193
|
+
end
|