automated_metareview 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,13 @@
1
+ class Edge
2
+ attr_accessor :edgeID, :type, :name, :index, :in_vertex, :out_vertex, :edge_match, :average_match, :frequency, :label
3
+
4
+ def initialize(edge_name, edge_type)
5
+ @name = edge_name
6
+ @type = edge_type #1 - verb, 2 - adjective, 3-adverb
7
+ @average_match = 0.0 #initializing match to 0
8
+ @frequency = 0
9
+ #initializing the number of matches for each metric value to 0
10
+ @edge_match = Array.new
11
+ @edge_match = [0, 0, 0, 0, 0]
12
+ end
13
+ end
@@ -0,0 +1,695 @@
1
+ require 'automated_metareview/sentence_state'
2
+ require 'automated_metareview/edge'
3
+ require 'automated_metareview/vertex'
4
+
5
+ class GraphGenerator
6
+ #include SentenceState
7
+ #creating accessors for the instance variables
8
+ attr_accessor :vertices, :num_vertices, :edges, :num_edges, :pipeline, :pos_tagger
9
+
10
+ # #global variables
11
+ # $vertices = Array.new
12
+ # $edges = Array.new
13
+
14
+ =begin
15
+ * generates the graph for the given review text and
16
+ * INPUT: an array of sentences for a review or a submission. Every row in 'text' contains one sentence.
17
+ * type - tells you if it was a review or s submission
18
+ * type = 1 - submission/past review
19
+ * type = 2 - new review
20
+ =end
21
+ def generate_graph(text, pos_tagger, coreNLPTagger, forRelevance, forPatternIdentify)
22
+ #initializing common arrays
23
+ @vertices = Array.new
24
+ @num_vertices = 0
25
+ @edges = Array.new
26
+ @num_edges = 0
27
+
28
+ @pos_tagger = pos_tagger #part of speech tagger
29
+ @pipeline = coreNLPTagger #dependency parsing
30
+ #iterate through the sentences in the text
31
+ for i in (0..text.length-1)
32
+ if(text[i].empty? or text[i] == "" or text[i].split(" ").empty?)
33
+ next
34
+ end
35
+ unTaggedString = text[i].split(" ")
36
+ # puts "UnTagged String:: #{unTaggedString}"
37
+ taggedString = @pos_tagger.get_readable(text[i])
38
+ # puts "taggedString:: #{taggedString}"
39
+
40
+ #Initializing some arrays
41
+ nouns = Array.new
42
+ nCount = 0
43
+ verbs = Array.new
44
+ vCount = 0
45
+ adjectives = Array.new
46
+ adjCount = 0
47
+ adverbs = Array.new
48
+ advCount = 0
49
+
50
+ parents = Array.new
51
+ labels = Array.new
52
+
53
+ #------------------------------------------#------------------------------------------
54
+ #finding parents
55
+ parents = find_parents(text[i])
56
+ parentCounter = 0
57
+ #------------------------------------------#------------------------------------------
58
+ #finding parents
59
+ labels = find_labels(text[i])
60
+ labelCounter = 0
61
+ #------------------------------------------#------------------------------------------
62
+ #find state
63
+ sstate = SentenceState.new
64
+ states_array = sstate.identify_sentence_state(taggedString)
65
+ states_counter = 0
66
+ state = states_array[states_counter]
67
+ states_counter += 1
68
+ #------------------------------------------#------------------------------------------
69
+
70
+ taggedString = taggedString.split(" ")
71
+ prevType = nil #initlializing the prevyp
72
+
73
+ #iterate through the tokens
74
+ for j in (0..taggedString.length-1)
75
+ taggedToken = taggedString[j]
76
+ plainToken = taggedToken[0...taggedToken.index("/")].to_s
77
+ posTag = taggedToken[taggedToken.index("/")+1..taggedToken.length].to_s
78
+ #ignore periods
79
+ if(plainToken == "." or taggedToken.include?("/POS") or (taggedToken.index("/") == taggedToken.length()-1) or (taggedToken.index("/") == taggedToken.length()-2))#this is for strings containinig "'s" or without POS
80
+ next
81
+ end
82
+
83
+ #SETTING STATE
84
+ #since the CC or IN are part of the following sentence segment, we set the STATE for that segment when we see a CC or IN
85
+ if(taggedToken.include?("/CC"))#{//|| ps.contains("/IN")
86
+ state = states_array[states_counter]
87
+ states_counter+=1
88
+ end
89
+ # puts("**Value:: #{plainToken} LabelCounter:: #{labelCounter} ParentCounter:: #{parentCounter} POStag:: #{posTag} .. state = #{state}")
90
+
91
+ #------------------------------------------
92
+ #if the token is a noun
93
+ if(taggedToken.include?("NN") or taggedToken.include?("PRP") or taggedToken.include?("IN") or taggedToken.include?("/EX") or taggedToken.include?("WP"))
94
+ #either add on to a previous vertex or create a brand new noun vertex
95
+ if(prevType == NOUN) #adding to a previous noun vertex
96
+ nCount -= 1 #decrement, since we are accessing a previous noun vertex
97
+ prevVertex = search_vertices(@vertices, nouns[nCount], i) #fetching the previous vertex
98
+ nouns[nCount] = nouns[nCount].to_s + " " + plainToken #concatenating with contents of the previous noun vertex
99
+ #checking if the previous noun concatenated with "s" already exists among the vertices
100
+ if((nounVertex = search_vertices(@vertices, nouns[nCount], i)) == nil)
101
+ prevVertex.name = prevVertex.name.to_s + " " + plainToken #concatenating the nouns
102
+ nounVertex = prevVertex #the current concatenated vertex will be considered
103
+ if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD")#resetting labels for the concatenated vertex
104
+ nounVertex.label = labels[labelCounter]
105
+ end
106
+ #fAppendedVertex = 1
107
+ end#if the vertex already exists, just use nounVertex - the returned vertex for ops.
108
+ else #if the previous token is not a noun, create a brand new vertex
109
+ nouns[nCount] = plainToken #this is checked for later on
110
+ nounVertex = search_vertices(@vertices, plainToken, i)
111
+ if(nounVertex == nil) #the string doesn't already exist
112
+ @vertices[@num_vertices] = Vertex.new(nouns[nCount], NOUN, i, state, labels[labelCounter], parents[parentCounter], posTag)
113
+ nounVertex = @vertices[@num_vertices] #the newly formed vertex will be considered
114
+ @num_vertices+=1
115
+ end
116
+ end #end of if prevType was noun
117
+ remove_redundant_vertices(nouns[nCount], i)
118
+ nCount+=1 #increment nCount for a new noun vertex just created (or existing previous vertex appended with new text)
119
+
120
+ #checking if a noun existed before this one and if the adjective was attached to that noun.
121
+ #if an adjective was found earlier, we add a new edge
122
+ if(prevType == ADJ)
123
+ #set previous noun's property to null, if it was set, if there is a noun before the adjective
124
+ if(nCount > 1)
125
+ v1 = search_vertices(@vertices, nouns[nCount-2], i) #fetching the previous noun, the one before the current noun (therefore -2)
126
+ v2 = search_vertices(@vertices, adjectives[adjCount-1], i) #fetching the previous adjective
127
+ #if such an edge exists - DELETE IT - search_edges_to_set_null() returns the position in the array at which such an edge exists
128
+ if(!v1.nil? and !v2.nil? and (e = search_edges_to_set_null(@edges, v1, v2, i)) != -1) #-1 is when no such edge exists
129
+ @edges[e] = nil #setting the edge to null
130
+ #if @num_edges had been previously incremented, decrement it
131
+ if(@num_edges > 0)
132
+ @num_edges-=1 #deducting an edge count
133
+ end
134
+ end
135
+ end
136
+ #if this noun vertex was encountered for the first time, nCount < 1,
137
+ #so do adding of edge outside the if condition
138
+ #add a new edge with v1 as the adjective and v2 as the new noun
139
+ v1 = search_vertices(@vertices, adjectives[adjCount-1], i)
140
+ v2 = nounVertex #the noun vertex that was just created
141
+ #if such an edge did not already exist
142
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1)
143
+ @edges[@num_edges] = Edge.new("noun-property",VERB)
144
+ @edges[@num_edges].in_vertex = v1
145
+ @edges[@num_edges].out_vertex = v2
146
+ @edges[@num_edges].index = i
147
+ @num_edges+=1
148
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
149
+ remove_redundant_edges(v1, v2, i)
150
+ end
151
+ end
152
+ #a noun has been found and has established a verb as an in_vertex and such an edge doesnt already previously exist
153
+ if(vCount > 0) #and fAppendedVertex == 0
154
+ #add edge only when a fresh vertex is created not when existing vertex is appended to
155
+ v1 = search_vertices(@vertices, verbs[vCount-1], i)
156
+ v2 = nounVertex
157
+ #if such an edge does not already exist add it
158
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges,v1, v2, i)) == -1)
159
+ @edges[@num_edges] = Edge.new("verb", VERB)
160
+ @edges[@num_edges].in_vertex = v1 #for vCount = 0
161
+ @edges[@num_edges].out_vertex = v2
162
+ @edges[@num_edges].index = i
163
+ @num_edges+=1
164
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
165
+ remove_redundant_edges(v1, v2, i)
166
+ end
167
+ end
168
+ prevType = NOUN
169
+ #------------------------------------------
170
+
171
+ #if the string is an adjective
172
+ #adjectives are vertices but they are not connected by an edge to the nouns, instead they are the noun's properties
173
+ elsif(taggedToken.include?("/JJ"))
174
+ adjective = nil
175
+ if(prevType == ADJ) #combine the adjectives
176
+ # puts("PREV ADJ here:: #{plainToken}")
177
+ if(adjCount >= 1)
178
+ adjCount = adjCount - 1
179
+ prevVertex = search_vertices(@vertices, adjectives[adjCount], i) #fetching the previous vertex
180
+ adjectives[adjCount] = adjectives[adjCount] + " " + plainToken
181
+ #if the concatenated vertex didn't already exist
182
+ if((adjective = search_vertices(@vertices, adjectives[adjCount], i)).nil?)
183
+ prevVertex.name = prevVertex.name+" "+plainToken
184
+ adjective = prevVertex #set it as "adjective" for further execution
185
+ if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD") #resetting labels for the concatenated vertex
186
+ adjective.label = labels[labelCounter]
187
+ end
188
+ end
189
+ end
190
+ else #new adjective vertex
191
+ adjectives[adjCount] = plainToken
192
+ if((adjective = search_vertices(@vertices, plainToken, i)).nil?) #the string doesn't already exist
193
+ @vertices[@num_vertices] = Vertex.new(adjectives[adjCount], ADJ, i, state, labels[labelCounter], parents[parentCounter], posTag)
194
+ adjective = @vertices[@num_vertices]
195
+ @num_vertices+=1
196
+ end
197
+ end
198
+ remove_redundant_vertices(adjectives[adjCount], i)
199
+ adjCount+=1 #incrementing, since a new adjective was created or an existing one updated.
200
+
201
+ #by default associate the adjective with the previous/latest noun and if there is a noun following it immediately, then remove the property from the older noun (done under noun condition)
202
+ if(nCount > 0) #gets the previous noun to form the edge
203
+ v1 = search_vertices(@vertices, nouns[nCount-1], i)
204
+ v2 = adjective #the current adjective vertex
205
+ #if such an edge does not already exist add it
206
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1)
207
+ # puts "** Adding noun-adj edge .. #{v1.name} - #{v2.name}"
208
+ @edges[@num_edges] = Edge.new("noun-property",VERB)
209
+ @edges[@num_edges].in_vertex = v1
210
+ @edges[@num_edges].out_vertex = v2
211
+ @edges[@num_edges].index = i
212
+ @num_edges+=1
213
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
214
+ remove_redundant_edges(v1, v2, i)
215
+ end
216
+ end
217
+ prevType = ADJ
218
+ #end of if condition for adjective
219
+ #------------------------------------------
220
+
221
+ #if the string is a verb or a modal//length condition for verbs is, be, are...
222
+ elsif(taggedToken.include?("/VB") or taggedToken.include?("MD"))
223
+ verbVertex = nil
224
+ if(prevType == VERB) #combine the verbs
225
+ vCount = vCount - 1
226
+ prevVertex = search_vertices(@vertices, verbs[vCount], i) #fetching the previous vertex
227
+ verbs[vCount] = verbs[vCount] + " " + plainToken
228
+ #if the concatenated vertex didn't already exist
229
+ if((verbVertex = search_vertices(@vertices, verbs[vCount], i)) == nil)
230
+ prevVertex.name = prevVertex.name + " " + plainToken
231
+ verbVertex = prevVertex #concatenated vertex becomes the new verb vertex
232
+ if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD")#resetting labels for the concatenated vertex
233
+ verbVertex.label = labels[labelCounter]
234
+ end
235
+ end
236
+ else
237
+ verbs[vCount] = plainToken
238
+ if((verbVertex = search_vertices(@vertices, plainToken, i)) == nil)
239
+ @vertices[@num_vertices] = Vertex.new(plainToken, VERB, i, state, labels[labelCounter], parents[parentCounter], posTag)
240
+ verbVertex = @vertices[@num_vertices] #newly created verb vertex will be considered in the future
241
+ @num_vertices+=1
242
+ end
243
+ end
244
+ remove_redundant_vertices(verbs[vCount], i)
245
+ vCount+=1
246
+
247
+ #if an adverb was found earlier, we set that as the verb's property
248
+ if(prevType == ADV)
249
+ #set previous verb's property to null, if it was set, if there is a verb following the adverb
250
+ if(vCount > 1)
251
+ v1 = search_vertices(@vertices, verbs[vCount-2], i) #fetching the previous verb, the one before the current one (hence -2)
252
+ v2 = search_vertices(@vertices, adverbs[advCount-1], i) #fetching the previous adverb
253
+ #if such an edge exists - DELETE IT
254
+ if(!v1.nil? and !v2.nil? and (e = search_edges_to_set_null(@edges, v1, v2, i)) != -1)
255
+ @edges[e] = nil #setting the edge to null
256
+ if(@num_edges > 0)
257
+ @num_edges-=1 #deducting an edge count
258
+ end
259
+ end
260
+ end
261
+ #if this verb vertex was encountered for the first time, vCount < 1,
262
+ #so do adding of edge outside the if condition
263
+ #add a new edge with v1 as the adverb and v2 as the new verb
264
+ v1 = search_vertices(@vertices, adverbs[advCount-1], i)
265
+ v2 = verbVertex
266
+ #if such an edge did not already exist
267
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1)
268
+ @edges[@num_edges] = Edge.new("verb-property",VERB)
269
+ @edges[@num_edges].in_vertex = v1
270
+ @edges[@num_edges].out_vertex = v2
271
+ @edges[@num_edges].index = i
272
+ @num_edges+=1
273
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
274
+ remove_redundant_edges(v1, v2, i)
275
+ end
276
+ end
277
+
278
+ #making the previous noun, one of the vertices of the verb edge
279
+ if(nCount > 0) #and fAppendedVertex == 0
280
+ #gets the previous noun to form the edge
281
+ v1 = search_vertices(@vertices, nouns[nCount-1], i)
282
+ v2 = verbVertex
283
+ #if such an edge does not already exist add it
284
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1)
285
+ @edges[@num_edges] = Edge.new("verb",VERB)
286
+ @edges[@num_edges].in_vertex = v1 #for nCount = 0;
287
+ @edges[@num_edges].out_vertex = v2 #the verb
288
+ @edges[@num_edges].index = i
289
+ @num_edges+=1
290
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
291
+ remove_redundant_edges(v1, v2, i)
292
+ end
293
+ end
294
+ prevType = VERB
295
+ #------------------------------------------
296
+ #if the string is an adverb
297
+ elsif(taggedToken.include?("RB"))
298
+ adverb = nil
299
+ if(prevType == ADV) #appending to existing adverb
300
+ if(advCount >= 1)
301
+ advCount = advCount - 1
302
+ end
303
+ prevVertex = search_vertices(@vertices, adverbs[advCount], i) #fetching the previous vertex
304
+ adverbs[advCount] = adverbs[advCount] + " " + plainToken
305
+ #if the concatenated vertex didn't already exist
306
+ if((adverb = search_vertices(@vertices, adverbs[advCount], i)) == nil)
307
+ prevVertex.name = prevVertex.name + " " + plainToken
308
+ adverb = prevVertex #setting it as "adverb" for further computation
309
+ if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD") #resetting labels for the concatenated vertex
310
+ adverb.label = labels[labelCounter]
311
+ end
312
+ end
313
+ else #else creating a new vertex
314
+ adverbs[advCount] = plainToken
315
+ if((adverb = search_vertices(@vertices, plainToken, i)) == nil)
316
+ @vertices[@num_vertices] = Vertex.new(adverbs[advCount], ADV, i, state, labels[labelCounter], parents[parentCounter], posTag);
317
+ adverb = @vertices[@num_vertices]
318
+ @num_vertices+=1
319
+ end
320
+ end
321
+ remove_redundant_vertices(adverbs[advCount], i)
322
+ advCount+=1
323
+
324
+ #by default associate it with the previous/latest verb and if there is a verb following it immediately, then remove the property from the verb
325
+ if(vCount > 0) #gets the previous verb to form a verb-adverb edge
326
+ v1 = search_vertices(@vertices, verbs[vCount-1], i)
327
+ v2 = adverb
328
+ #if such an edge does not already exist add it
329
+ if(!v1.nil? and !v2.nil? && (e = search_edges(@edges, v1, v2, i)) == -1)
330
+ @edges[@num_edges] = Edge.new("verb-property",VERB)
331
+ @edges[@num_edges].in_vertex = v1 #for nCount = 0;
332
+ @edges[@num_edges].out_vertex = v2 #the verb
333
+ @edges[@num_edges].index = i
334
+ @num_edges+=1
335
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
336
+ remove_redundant_edges(v1, v2, i)
337
+ end
338
+ end
339
+ prevType = ADV
340
+ #end of if condition for adverb
341
+ end #end of if condition
342
+ #------------------------------------------
343
+ #incrementing counters for labels and parents
344
+ labelCounter+=1
345
+ parentCounter+=1
346
+ end #end of the for loop for the tokens
347
+ #puts "here outside the for loop for tokens"
348
+ nouns = nil
349
+ verbs = nil
350
+ adjectives = nil
351
+ adverbs = nil
352
+ end #end of number of sentences in the text
353
+
354
+ @num_vertices = @num_vertices - 1 #since as a counter it was 1 ahead of the array's contents
355
+ @num_edges = @num_edges - 1 #same reason as for num_vertices
356
+ set_semantic_labels_for_edges
357
+ #print_graph(@edges, @vertices)
358
+ # puts("Number of edges:: #{@num_edges}")
359
+ # puts("Number of vertices:: #{@num_vertices}")
360
+ return @num_edges
361
+ end #end of the graphGenerate method
362
+
363
+ #------------------------------------------#------------------------------------------#------------------------------------------
364
+
365
+ def search_vertices(list, s, index)
366
+ for i in (0..list.length-1)
367
+ if(!list[i].nil? and !s.nil?)
368
+ #if the vertex exists and in the same sentence (index)
369
+ if(list[i].name.casecmp(s) == 0 and list[i].index == index)
370
+ # puts("***** search_vertices:: Returning:: #{s}")
371
+ return list[i]
372
+ end
373
+ end
374
+ end
375
+ # puts("***** search_vertices:: Returning nil")
376
+ return nil
377
+ end #end of the search_vertices method
378
+
379
+ #------------------------------------------#------------------------------------------#------------------------------------------
380
+
381
+ =begin
382
+ NULLIFY ALL VERTICES CONTAINING "ONLY SUBSTRINGS" (and not exact matches) OF THIS VERTEX IN THE SAME SENTENCE (verts[j].index == index)
383
+ And reset the @vertices array with non-null elements.
384
+ =end
385
+ def remove_redundant_vertices(s, index)
386
+ # puts "**** remove_redundant_vertices:: string #{s}"
387
+ j = @num_vertices - 1
388
+ verts = @vertices
389
+ while j >= 0
390
+ if(!verts[j].nil? and verts[j].index == index and s.casecmp(verts[j].name) != 0 and
391
+ (s.downcase.include?(verts[j].name.downcase) and verts[j].name.length > 1))
392
+ #the last 'length' condition is added so as to prevent "I" (an indiv. vertex) from being replaced by nil
393
+ # puts "*** string index = #{index}... verts[j].index = #{verts[j].index}"
394
+ # puts "**** remove_redundant_vertices setting #{verts[j].name} to nil!"
395
+ #search through all the edges and set those with this vertex as in-out- vertex to null
396
+ if(!@edges.nil?)
397
+ for i in 0..@edges.length - 1
398
+ edge = @edges[i]
399
+ if(!edge.nil? and (edge.in_vertex == verts[j] or edge.out_vertex == verts[j]))
400
+ # puts "edge #{edge.in_vertex.name} - #{edge.out_vertex.name}"
401
+ @edges[i] = nil #setting that edge to nil
402
+ end
403
+ end
404
+ end
405
+ #finally setting the vertex to null
406
+ verts[j] = nil
407
+ end
408
+ j-=1
409
+ end #end of while loop
410
+
411
+ # puts "**** remove_redundant_vertices Old @num_vertices:: #{@num_vertices}"
412
+ #recreating the vertices array without the nil values
413
+ counter = 0
414
+ vertices_array = Array.new
415
+ for i in (0..verts.length-1)
416
+ vertex = verts[i]
417
+ if(!vertex.nil?)
418
+ vertices_array << vertex
419
+ counter+=1
420
+ end
421
+ end
422
+ @vertices = vertices_array
423
+ @num_vertices = counter+1 #since @num_vertices is always one advanced of the last vertex
424
+ end
425
+
426
+ #------------------------------------------#------------------------------------------#------------------------------------------
427
+
428
+ =begin
429
+ Checks to see if an edge between vertices "in" and "out" exists.
430
+ true - if an edge exists and false - if an edge doesn't exist
431
+ edge[] list, vertex in, vertex out, int index
432
+ =end
433
+ def search_edges(list, in_vertex, out, index)
434
+ edgePos = -1
435
+ if(list.nil?)#if the list is null
436
+ return edgePos
437
+ end
438
+
439
+ for i in (0..list.length-1)
440
+ if(!list[i].nil? and !list[i].in_vertex.nil? and !list[i].out_vertex.nil?)
441
+ #checking for exact match with an edge
442
+ if(((list[i].in_vertex.name.casecmp(in_vertex.name)==0 or list[i].in_vertex.name.include?(in_vertex.name)) and
443
+ (list[i].out_vertex.name.casecmp(out.name)==0 or list[i].out_vertex.name.include?(out.name))) or
444
+ ((list[i].in_vertex.name.casecmp(out.name)==0 or list[i].in_vertex.name.include?(out.name)) and
445
+ (list[i].out_vertex.name.casecmp(in_vertex.name)==0 or list[i].out_vertex.name.include?(in_vertex.name))))
446
+ # puts("***** Found edge! : index:: #{index} list[i].index:: #{list[i].index}")
447
+ #if an edge was found
448
+ edgePos = i #returning its position in the array
449
+ #INCREMENT FREQUENCY IF THE EDGE WAS FOUND IN A DIFFERENT SENT. (CHECK BY MAINTAINING A TEXT NUMBER AND CHECKING IF THE NEW # IS DIFF FROM PREV #)
450
+ if(index != list[i].index)
451
+ list[i].frequency+=1
452
+ end
453
+ end
454
+ end
455
+ end #end of the for loop
456
+ return edgePos
457
+ end # end of searchdges
458
+ #------------------------------------------#------------------------------------------#------------------------------------------
459
+
460
+ def search_edges_to_set_null(list, in_vertex, out, index)
461
+ edgePos = -1
462
+ # puts("***** Searching edge to set to null:: #{in_vertex.name} - #{out.name} ... num_edges #{@num_edges}")
463
+ for i in 0..@num_edges - 1
464
+ if(!list[i].nil? and !list[i].in_vertex.nil? and !list[i].out_vertex.nil?)
465
+ # puts "comparing with #{list[i].in_vertex.name} - #{list[i].out_vertex.name}"
466
+ #puts "#{list[i].in_vertex.name.downcase == in_vertex.name.downcase} - #{list[i].out_vertex.name.downcase == out.name.downcase}"
467
+ #checking for exact match with an edge
468
+ if((list[i].in_vertex.name.downcase == in_vertex.name.downcase and list[i].out_vertex.name.downcase == out.name.downcase) or
469
+ (list[i].in_vertex.name.downcase == out.name.downcase and list[i].out_vertex.name.downcase == in_vertex.name.downcase))
470
+ #if an edge was found
471
+ edgePos = i #returning its position in the array
472
+ #INCREMENT FREQUENCY IF THE EDGE WAS FOUND IN A DIFFERENT SENT. (CHECK BY MAINTAINING A TEXT NUMBER AND CHECKING IF THE NEW # IS DIFF FROM PREV #)
473
+ if(index != list[i].index)
474
+ list[i].frequency+=1
475
+ end
476
+ end
477
+ end
478
+ end #end of the for loop
479
+ # puts("***** search_edges_to_set_null #{in_vertex.name} - #{out.name} returning:: #{edgePos}")
480
+ return edgePos
481
+ end # end of the method search_edges_to_set_null
482
+ #------------------------------------------#------------------------------------------#------------------------------------------
483
+ =begin
484
+ NULLIFY ALL EDGES CONTAINING "ONLY SUBSTRINGS" (and not exact matches) OF EITHER IN/OUT VERTICES IN THE SAME SENTENCE (verts[j].index == index)
485
+ And reset the @edges array with non-null elements.
486
+ =end
487
+
488
+ def remove_redundant_edges(in_vertex, out, index)
489
+ list = @edges
490
+ j = @num_edges - 1
491
+ while j >= 0 do
492
+ if(!list[j].nil? and list[j].index == index)
493
+ #when invertices are eq and out-verts are substrings or vice versa
494
+ if(in_vertex.name.casecmp(list[j].in_vertex.name) == 0 and out.name.casecmp(list[j].out_vertex.name) != 0 and out.name.downcase.include?(list[j].out_vertex.name.downcase))
495
+ # puts("FOUND out_vertex match for edge:: #{list[j].in_vertex.name} - #{list[j].out_vertex.name}")
496
+ list[j] = nil
497
+ #@num_edges-=1
498
+ #when in-vertices are only substrings and out-verts are equal
499
+ elsif(in_vertex.name.casecmp(list[j].in_vertex.name)!=0 and in_vertex.name.downcase.include?(list[j].in_vertex.name.downcase) and out.name.casecmp(list[j].out_vertex.name)==0)
500
+ # puts("FOUND in_vertex match for edge: #{list[j].in_vertex.name} - #{list[j].out_vertex.name}")
501
+ list[j] = nil
502
+ #@num_edges-=1
503
+ end
504
+ end
505
+ j-=1
506
+ end #end of the while loop
507
+ # puts "**** search_edges:: Old number #{@num_edges}"
508
+ #recreating the edges array without the nil values
509
+ counter = 0
510
+ edges_array = Array.new
511
+ list.each{
512
+ |edge|
513
+ if(!edge.nil?)
514
+ # puts "edge:: #{edge.in_vertex.name} - #{edge.out_vertex.name}"
515
+ edges_array << edge
516
+ counter+=1
517
+ end
518
+ }
519
+ @edges = edges_array
520
+ @num_edges = counter+1
521
+ # puts "**** search_edges:: New number of edges #{@num_edges}"
522
+ end
523
+
524
+ #------------------------------------------#------------------------------------------#------------------------------------------
525
+ def print_graph(edges, vertices)
526
+ puts("*** List of vertices::")
527
+ for j in (0..vertices.length-1)
528
+ if(!vertices[j].nil?)
529
+ puts("@@@ Vertex:: #{vertices[j].name}")
530
+ puts("*** Frequency:: #{vertices[j].frequency} State:: #{vertices[j].state}")
531
+ puts("*** Label:: #{vertices[j].label} Parent:: #{vertices[j].parent}")
532
+ end
533
+ end
534
+ puts("*******")
535
+ puts("*** List of edges::")
536
+ for j in (0..edges.length-1)
537
+ if(!edges[j].nil? and !edges[j].in_vertex.nil? and !edges[j].out_vertex.nil?)
538
+ puts("@@@ Edge:: #{edges[j].in_vertex.name} & #{edges[j].out_vertex.name}")
539
+ puts("*** Frequency:: #{edges[j].frequency} State:: #{edges[j].in_vertex.state} & #{edges[j].out_vertex.state}")
540
+ puts("*** Label:: #{edges[j].label}")
541
+ end
542
+ end
543
+ puts("--------------")
544
+ end #end of print_graph method
545
+
546
+ #------------------------------------------#------------------------------------------#------------------------------------------
547
+ #Identifying parents and labels for the vertices
548
+ def find_parents(t)
549
+ # puts "Inside find_parents.. text #{t}"
550
+ tp = TextPreprocessing.new
551
+ unTaggedString = t.split(" ")
552
+ parents = Array.new
553
+ # t = text[i]
554
+ t = StanfordCoreNLP::Text.new(t) #the same variable has to be passed into the Textx.new method
555
+ @pipeline.annotate(t)
556
+ #for each sentence identify theparsed form of the sentence
557
+ sentence = t.get(:sentences).toArray
558
+ parsed_sentence = sentence[0].get(:collapsed_c_c_processed_dependencies)
559
+ #puts "parsed sentence #{parsed_sentence}"
560
+ #iterating through the set of tokens and identifying each token's parent
561
+ #puts "unTaggedString.length #{unTaggedString.length}"
562
+ for j in (0..unTaggedString.length - 1)
563
+ #puts "unTaggedString[#{j}] #{unTaggedString[j]}"
564
+ if(tp.is_punct(unTaggedString[j]))
565
+ next
566
+ end
567
+ if(tp.contains_punct(unTaggedString[j]))
568
+ unTaggedString[j] = tp.contains_punct(unTaggedString[j])
569
+ # puts "unTaggedString #{unTaggedString[j]} and #{tp.contains_punct_bool(unTaggedString[j])}"
570
+ end
571
+ if(!unTaggedString[j].nil? and !tp.contains_punct_bool(unTaggedString[j]))
572
+ pat = parsed_sentence.getAllNodesByWordPattern(unTaggedString[j])
573
+ pat = pat.toArray
574
+ parent = parsed_sentence.getParents(pat[0]).toArray
575
+ end
576
+ #puts "parent of #{unTaggedString[j]} is #{parent[0]}"
577
+ if(!parent.nil? and !parent[0].nil?)
578
+ parents[j] = (parent[0].to_s)[0..(parent[0].to_s).index("-")-1]#extracting the name of the parent (since it is in the foramt-> "name-POS")
579
+ #puts "parents[#{j}] = #{parents[j]}"
580
+ else
581
+ parents[j] = nil
582
+ end
583
+ end
584
+ return parents
585
+ end #end of find_parents method
586
+ #------------------------------------------#------------------------------------------#------------------------------------------
587
+ #Identifying parents and labels for the vertices
588
+ def find_labels(t)
589
+ # puts "Inside find_labels"
590
+ unTaggedString = t.split(" ")
591
+ t = StanfordCoreNLP::Text.new(t)
592
+ @pipeline.annotate(t)
593
+ #for each sentence identify theparsed form of the sentence
594
+ sentence = t.get(:sentences).toArray
595
+ parsed_sentence = sentence[0].get(:collapsed_c_c_processed_dependencies)
596
+ labels = Array.new
597
+ labelCounter = 0
598
+ govDep = parsed_sentence.typedDependencies.toArray
599
+ #for each untagged token
600
+ for j in (0..unTaggedString.length - 1)
601
+ unTaggedString[j].gsub!(".", "")
602
+ unTaggedString[j].gsub!(",", "")
603
+ #puts "Label for #{unTaggedString[j]}"
604
+ #identify its corresponding position in govDep and fetch its label
605
+ for k in (0..govDep.length - 1)
606
+ #puts "Comparing with #{govDep[k].dep.value()}"
607
+ if(govDep[k].dep.value() == unTaggedString[j])
608
+ labels[j] = govDep[k].reln.getShortName()
609
+ #puts labels[j]
610
+ labelCounter+=1
611
+ break
612
+ end
613
+ end
614
+ end
615
+ return labels
616
+ end # end of find_labels method
617
+ #------------------------------------------#------------------------------------------#------------------------------------------
618
+ =begin
619
+ * Setting semantic labels for edges based on the labels vertices have with their parents
620
+ =end
621
+ def set_semantic_labels_for_edges
622
+ # puts "*** inside set_semantic_labels_for_edges"
623
+ for i in (0.. @vertices.length - 1)
624
+ if(!@vertices[i].nil? and !@vertices[i].parent.nil?) #parent = null for ROOT
625
+ #search for the parent vertex
626
+ for j in (0..@vertices.length - 1)
627
+ if(!@vertices[j].nil? and (@vertices[j].name.casecmp(@vertices[i].parent) == 0 or
628
+ @vertices[j].name.downcase.include?(@vertices[i].parent.downcase)))
629
+ # puts("**Parent:: #{@vertices[j].name}")
630
+ parent = @vertices[j]
631
+ break #break out of search for the parent
632
+ end
633
+ end
634
+ if(!parent.nil?)#{
635
+ #check if an edge exists between vertices[i] and the parent
636
+ for k in (0..@edges.length - 1)
637
+ if(!@edges[k].nil? and !@edges[k].in_vertex.nil? and !@edges[k].out_vertex.nil?)
638
+ if((@edges[k].in_vertex.name.equal?(@vertices[i].name) and @edges[k].out_vertex.name.equal?(parent.name)) or (@edges[k].in_vertex.name.equal?(parent.name) and @edges[k].out_vertex.name.equal?(@vertices[i].name)))
639
+ #set the role label
640
+ if(@edges[k].label.nil?)
641
+ @edges[k].label = @vertices[i].label
642
+ elsif(!@edges[k].label.nil? and (@edges[k].label == "NMOD" or @edges[k].label == "PMOD") and (@vertices[i].label != "NMOD" or @vertices[i].label != "PMOD"))
643
+ @edges[k].label = @vertices[i].label
644
+ end
645
+ end
646
+ end
647
+ end
648
+ end#end of if paren.nil? condition
649
+ end
650
+ end #end of for loop
651
+ end #end of set_semantic_labels_for_edges method
652
+
653
+ end # end of the class GraphGenerator
654
+ #------------------------------------------#------------------------------------------#------------------------------------------
655
+ =begin
656
+ Identifying frequency of edges and pruning out edges that do no meet the threshold conditions
657
+ =end
658
+ def identify_frequency_and_prune_edges(edges, num)
659
+ # puts "inside frequency threshold! :: num #{num}"
660
+ #freqEdges maintains the top frequency edges from ALPHA_FREQ to BETA_FREQ
661
+ freqEdges = Array.new #from alpha = 3 to beta = 10
662
+ #iterating through all the edges
663
+ for j in (0..num-1)
664
+ if(!edges[j].nil?)
665
+ if(edges[j].frequency <= BETA_FREQ and edges[j].frequency >= ALPHA_FREQ and !freqEdges[edges[j].frequency-1].nil?)#{
666
+ for i in (0..freqEdges[edges[j].frequency-1].length - 1)#iterating to find i for which freqEdges is null
667
+ if(!freqEdges[edges[j].frequency-1][i].nil?)
668
+ break
669
+ end
670
+ end
671
+ freqEdges[edges[j].frequency-1][i] = edges[j]
672
+ end
673
+ end
674
+ end
675
+ selectedEdges = Array.new
676
+ #Selecting only those edges that satisfy the frequency condition [between ALPHA and BETA]
677
+ j = BETA_FREQ-1
678
+ while j >= ALPHA_FREQ-1 do
679
+ if(!freqEdges[j].nil?)
680
+ for i in (0..num-1)
681
+ if(!freqEdges[j][i].nil?)
682
+ selectedEdges[maxSelected] = freqEdges[j][i]
683
+ maxSelected+=1
684
+ end
685
+ end
686
+ end
687
+ j-=1
688
+ end
689
+
690
+ if(maxSelected != 0)
691
+ @num_edges = maxSelected #replacing numEdges with the number of selected edges
692
+ end
693
+ return selectedEdges
694
+ end
695
+ #------------------------------------------#------------------------------------------#------------------------------------------