graphmatcher 0.3.5 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/graphmatcher.rb +281 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 645e7aafa2217a293307b3e715fadae02860907b9faad2591db48499c494bfe6
|
4
|
+
data.tar.gz: 5fd7016adfbcd0ce6a7ca3383adbf292a4ff9b8e792c677a960617f51ab6f735
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c1d68dd886a150243ed58a1420f664c183065a1e907fb2a4b84ec69476f485fe37587be04f99112bfb1b15a2bf3c569c9eff022b30b0d6d1d5c86a6b6b6058cf
|
7
|
+
data.tar.gz: 307c94a250b9f74c7273fabd228221633bbda7ea99d249a0125d8f84dc37d7f3afe9b43827b23287e23f5ac6fea635f961754703370f90b6d4d306856cd8d68e
|
data/lib/graphmatcher.rb
ADDED
@@ -0,0 +1,281 @@
|
|
1
|
+
require "logger"
|
2
|
+
require "ruby-prof" if ENV["GM_ENV"] == "test"
|
3
|
+
|
4
|
+
class Graphmatcher
|
5
|
+
VERSION = "0.3.8"
|
6
|
+
|
7
|
+
@@logger = Logger.new(STDOUT)
|
8
|
+
@@logger.level = Logger::FATAL
|
9
|
+
|
10
|
+
def initialize(args)
|
11
|
+
@query_graph = args[:query_graph].to_a
|
12
|
+
@data_graph = args[:data_graph].to_a
|
13
|
+
@limit = (args[:limit] || 1).to_s.to_i
|
14
|
+
@max_allowed_time = (args[:max_allowed_time] || 4.000).to_f
|
15
|
+
@cost_matrix = args[:cost_matrix] || nil
|
16
|
+
@self_loops = args[:self_loops] || false
|
17
|
+
validate!
|
18
|
+
end
|
19
|
+
|
20
|
+
# Function for generating feasible matches for query
|
21
|
+
# graph based on labels of vertices of data graph.
|
22
|
+
def label_match
|
23
|
+
data_labels = @data_graph[1]
|
24
|
+
query_labels = @query_graph[1]
|
25
|
+
|
26
|
+
feasible = query_labels.map.with_index do |ql, _index|
|
27
|
+
data_labels.each_index.select { |i| data_labels[i] == ql }
|
28
|
+
end
|
29
|
+
|
30
|
+
if @cost_matrix
|
31
|
+
feasible = assess_cost(feasible, @cost_matrix)
|
32
|
+
|
33
|
+
feasible = feasible.map do |feasible_set|
|
34
|
+
feasible_set.sort_by { |f| f[1] }
|
35
|
+
end
|
36
|
+
|
37
|
+
feasible = feasible.map do |f_set|
|
38
|
+
f_set.map do |f|
|
39
|
+
f[0]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
@@logger.info("Label matches(phi) are: " + feasible.to_s)
|
45
|
+
feasible
|
46
|
+
end
|
47
|
+
|
48
|
+
# Public interface for Graphmatcher class.
|
49
|
+
#
|
50
|
+
# @matches: Array of matching indices of query graph in data graph.
|
51
|
+
def find_matches
|
52
|
+
@matches = []
|
53
|
+
@t0 = Time.now.to_f
|
54
|
+
phi = label_match
|
55
|
+
|
56
|
+
dual_iso(dual_simulation(phi), 0)
|
57
|
+
# @@logger.info("FINISHED matches=#{@matches}")
|
58
|
+
if @cost_matrix
|
59
|
+
# if cost matrix is available, get costs of found matches.
|
60
|
+
|
61
|
+
@matches = assess_cost(@matches, @cost_matrix)
|
62
|
+
|
63
|
+
# sort matches by sum of costs of matched resources.
|
64
|
+
# MATCHES
|
65
|
+
# [ [[1,100],[2,10]],[[3,500],[4,800]] ]
|
66
|
+
# MATCH COSTS
|
67
|
+
# 110 1300
|
68
|
+
|
69
|
+
# The behaviour here is important !
|
70
|
+
# Sum of costs vs. max of costs, depends which one is relevant.
|
71
|
+
|
72
|
+
@matches.reject! { |match_set| match_set.map { |e| e[1] }.include?(nil) }
|
73
|
+
|
74
|
+
@matches = @matches.sort_by do |match_set|
|
75
|
+
match_set.reduce(0) { |sum, e| sum + e[1] }
|
76
|
+
end
|
77
|
+
end
|
78
|
+
@matches
|
79
|
+
end
|
80
|
+
|
81
|
+
def get_resource_property(_match, property)
|
82
|
+
truncated_data_graph = @data_graph.truncate
|
83
|
+
@matches.map { |match_set| match_set.map { |match| truncated_data_graph[match[0]][2][property] } }
|
84
|
+
end
|
85
|
+
|
86
|
+
def get_resource_cost(costs, resource_position, query_index)
|
87
|
+
# costs = { resource_id => { query_index => cost } }
|
88
|
+
# e.g.
|
89
|
+
# costs = { 40 => { 0 => 5, 1 => 10 } }
|
90
|
+
if costs[resource_position][query_index]
|
91
|
+
cost = (costs[resource_position][query_index])
|
92
|
+
cost
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def assess_cost(matches, costs)
|
97
|
+
# resource_graph =
|
98
|
+
# [
|
99
|
+
# [[],[],[],[],[],[],[]], #adj.
|
100
|
+
# ['POTATO','TOMATO','POTATO','TOMATO','POTATO','TOMATO','POTATO'], #types
|
101
|
+
# ['img_x','img_y','img_z','img_t','img_z','img_q','img_z'], #images
|
102
|
+
# ['12','52','25','61','74','95','11'] #resource_id
|
103
|
+
# ]
|
104
|
+
|
105
|
+
# request_graph =
|
106
|
+
# [
|
107
|
+
# [[],[]],
|
108
|
+
# ['TOMATO','TOMATO'],
|
109
|
+
# ['img_y','img_z'],
|
110
|
+
# ['SAUSAGE_A','EGG_A']
|
111
|
+
# ]
|
112
|
+
|
113
|
+
# costs = {
|
114
|
+
# 52 => {0 => 0, 1 => 50} , #y
|
115
|
+
# 61 => {0 => 30, 1 => 70} , #t
|
116
|
+
# 95 => {0 => 40, 1 => 55} , #q
|
117
|
+
# }
|
118
|
+
|
119
|
+
# matches = [
|
120
|
+
# [[1],[2]],[[1],[4]],[[1],[6]],
|
121
|
+
# [[3],[2]],[[3],[4]],[[3],[6]],
|
122
|
+
# [[5],[2]],[[5],[4]],[[5],[6]]
|
123
|
+
# ]
|
124
|
+
|
125
|
+
matches.map do |match_set| # [ [1],[2] ]
|
126
|
+
match_set.flatten.map.with_index do |match, query_index| # 1
|
127
|
+
[match, get_resource_cost(costs, match, query_index).to_f]
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# INFO: Function that uses parameter phi -which is generated by label_match-
|
133
|
+
# to determine which matches of data have expected relations in query graph.
|
134
|
+
# phi = ...
|
135
|
+
def dual_simulation(phi)
|
136
|
+
# One directional adjacency array for data graph and query graphs.
|
137
|
+
data_children = @data_graph[0]
|
138
|
+
query_children = @query_graph[0]
|
139
|
+
# @@logger.info("Data children: #{data_children.to_s}")
|
140
|
+
# @@logger.info("Query children: #{query_children.to_s}")
|
141
|
+
changed = true
|
142
|
+
while changed
|
143
|
+
changed = false
|
144
|
+
return nil if (Time.now.to_f - @t0) > @max_allowed_time
|
145
|
+
|
146
|
+
# children = query_edges
|
147
|
+
# q_index = query_vertex_index
|
148
|
+
query_children.each_with_index do |children, q_index|
|
149
|
+
# query_child = query_edge_target
|
150
|
+
children.each do |query_child|
|
151
|
+
# Create a temporary phi object.
|
152
|
+
temp_phi = []
|
153
|
+
# Loop over candidates of each vertex in data graph.
|
154
|
+
to_delete = []
|
155
|
+
|
156
|
+
phi[q_index].map do |child| # loop 3
|
157
|
+
# @@logger.debug("u=#{q_index}, u_c=#{query_child}, child=#{child}")
|
158
|
+
|
159
|
+
# Find intersection of children of 'child' in data graph and
|
160
|
+
# candidates of 'query child' in data graph.
|
161
|
+
phi_intersection = data_children[child] & phi[query_child]
|
162
|
+
# @@logger.debug("datachildren[child]=#{data_children[child]}")
|
163
|
+
# @@logger.debug("phi[query_child]=#{phi[query_child]}")
|
164
|
+
# @@logger.debug("Intersection=#{phi_intersection}")
|
165
|
+
if phi_intersection.nil? || phi_intersection.empty?
|
166
|
+
to_delete.push(child)
|
167
|
+
return phi if phi[q_index].empty?
|
168
|
+
|
169
|
+
changed = true
|
170
|
+
end
|
171
|
+
temp_phi |= phi_intersection
|
172
|
+
end
|
173
|
+
|
174
|
+
unless to_delete.empty?
|
175
|
+
to_delete.each do |td|
|
176
|
+
phi[q_index].delete(td)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
return phi if temp_phi.flatten.empty?
|
180
|
+
|
181
|
+
changed = true if temp_phi.size < phi[query_child].size
|
182
|
+
if @self_loops && query_child == q_index
|
183
|
+
phi[query_child] &= temp_phi
|
184
|
+
else
|
185
|
+
# @@logger.debug("phi=#{phi} and phi[#{query_child}]=#{temp_phi}")
|
186
|
+
phi[query_child] = temp_phi
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
@@logger.info("Returning phi=#{phi}")
|
192
|
+
phi
|
193
|
+
end
|
194
|
+
|
195
|
+
# INFO: Function call to collect matches from phi object.
|
196
|
+
# phi = ...
|
197
|
+
# depth = ...
|
198
|
+
# matches = ...
|
199
|
+
def dual_iso(phi, depth)
|
200
|
+
if depth == @query_graph[0].length
|
201
|
+
unless phi.nil? || phi.empty?
|
202
|
+
@matches <<
|
203
|
+
if phi.include?([]) # Unable to match this vertex in graph.
|
204
|
+
[nil]
|
205
|
+
else
|
206
|
+
phi
|
207
|
+
end
|
208
|
+
end
|
209
|
+
elsif !(phi.nil? || phi.empty?)
|
210
|
+
phi[depth].sort_by { |value| @cost_matrix ? (@cost_matrix[value][depth] || Float::INFINITY) : next }.each do |value|
|
211
|
+
next if contains(phi, depth, value)
|
212
|
+
|
213
|
+
# keys are indices 0...n, values are possible values for that index
|
214
|
+
phicopy = phi.map(&:clone)
|
215
|
+
# @@logger.info("phicopy=#{phicopy},depth=#{depth},value=#{value}")
|
216
|
+
phicopy[depth] = [value]
|
217
|
+
if @matches.length >= @limit
|
218
|
+
@@logger.info("FINISHED matches=#{@matches}")
|
219
|
+
return @matches
|
220
|
+
end
|
221
|
+
dual_iso(dual_simulation(phicopy), depth + 1)
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# INFO: Checks if vertex J is contained in any of previous matches.
|
227
|
+
# TODO: Change with find method.
|
228
|
+
def contains(phi, depth, vertex_j)
|
229
|
+
false if depth <= 0
|
230
|
+
(0..depth - 1).each do |i|
|
231
|
+
# @@logger.info("phi[#{i}]=#{phi[i]},depth=#{depth},vertex_j=#{vertex_j}")
|
232
|
+
return true if phi[i].include?(vertex_j)
|
233
|
+
end
|
234
|
+
false
|
235
|
+
end
|
236
|
+
|
237
|
+
# EXPERIMENTAL
|
238
|
+
# INFO: Produce a GraphViz-compliant directed graph syntax.
|
239
|
+
# INFO: Needs dot/graphviz tools installed as a dependency.
|
240
|
+
# TODO: Unable to handle multiple results, color each result different.
|
241
|
+
# Indices are IDs, labels are labels adjencency array is outgoing edges.
|
242
|
+
def dot_graph(data, subgraph = nil, prefix = "")
|
243
|
+
output = ["digraph {"]
|
244
|
+
data.transpose.each_with_index do |node, id|
|
245
|
+
output <<
|
246
|
+
["#{id} [label=\"#{node[1]}##{id}\"]",
|
247
|
+
"#{id}->{#{node[0].join(" ")}}"].join("\n")
|
248
|
+
end
|
249
|
+
if subgraph
|
250
|
+
subgraph.each_with_index do |node, _id|
|
251
|
+
output << "#{node} [fontcolor=\"Red\"]"
|
252
|
+
end
|
253
|
+
end
|
254
|
+
output << "}"
|
255
|
+
tstamp = Time.new.to_i.to_s
|
256
|
+
File.write("#{prefix}#{tstamp}.dot", output.join("\n"))
|
257
|
+
dot_produce = ["dot", "-Tpng", "#{prefix}#{tstamp}.dot",
|
258
|
+
"-o", "#{prefix}#{tstamp}.png"].join(" ")
|
259
|
+
`#{dot_produce}`
|
260
|
+
end
|
261
|
+
|
262
|
+
def validate!
|
263
|
+
unless @query_graph.is_a?(Array) && @data_graph.is_a?(Array)
|
264
|
+
raise ArgumentError,
|
265
|
+
"Type mismatch for graphs in initialization !"
|
266
|
+
end
|
267
|
+
unless @limit.is_a?(Numeric) && @max_allowed_time.is_a?(Numeric)
|
268
|
+
raise ArgumentError,
|
269
|
+
"Type mismatch for limit or timeout value in initialization !"
|
270
|
+
end
|
271
|
+
unless @query_graph.length >= 2 && @data_graph.length >= 2
|
272
|
+
raise ArgumentError,
|
273
|
+
"Input graphs must have at least two dimensions !"
|
274
|
+
end
|
275
|
+
unless @query_graph.map(&:length).uniq.size == 1 &&
|
276
|
+
@data_graph.map(&:length).uniq.size == 1
|
277
|
+
raise ArgumentError,
|
278
|
+
'Input graphs\' adjencency and label arrays must be sized equal !'
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: graphmatcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emre Unlu
|
@@ -73,7 +73,8 @@ email:
|
|
73
73
|
executables: []
|
74
74
|
extensions: []
|
75
75
|
extra_rdoc_files: []
|
76
|
-
files:
|
76
|
+
files:
|
77
|
+
- lib/graphmatcher.rb
|
77
78
|
homepage: https://github.com/forvelin/graphmatcher
|
78
79
|
licenses:
|
79
80
|
- MIT
|