automated_metareview 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ class Edge
2
+ attr_accessor :edgeID, :type, :name, :index, :in_vertex, :out_vertex, :edge_match, :average_match, :frequency, :label
3
+
4
+ def initialize(edge_name, edge_type)
5
+ @name = edge_name
6
+ @type = edge_type #1 - verb, 2 - adjective, 3-adverb
7
+ @average_match = 0.0 #initializing match to 0
8
+ @frequency = 0
9
+ #initializing the number of matches for each metric value to 0
10
+ @edge_match = Array.new
11
+ @edge_match = [0, 0, 0, 0, 0]
12
+ end
13
+ end
@@ -0,0 +1,695 @@
1
+ require 'automated_metareview/sentence_state'
2
+ require 'automated_metareview/edge'
3
+ require 'automated_metareview/vertex'
4
+
5
+ class GraphGenerator
6
+ #include SentenceState
7
+ #creating accessors for the instance variables
8
+ attr_accessor :vertices, :num_vertices, :edges, :num_edges, :pipeline, :pos_tagger
9
+
10
+ # #global variables
11
+ # $vertices = Array.new
12
+ # $edges = Array.new
13
+
14
+ =begin
15
+ * generates the graph for the given review text and
16
+ * INPUT: an array of sentences for a review or a submission. Every row in 'text' contains one sentence.
17
+ * type - tells you if it was a review or s submission
18
+ * type = 1 - submission/past review
19
+ * type = 2 - new review
20
+ =end
21
+ def generate_graph(text, pos_tagger, coreNLPTagger, forRelevance, forPatternIdentify)
22
+ #initializing common arrays
23
+ @vertices = Array.new
24
+ @num_vertices = 0
25
+ @edges = Array.new
26
+ @num_edges = 0
27
+
28
+ @pos_tagger = pos_tagger #part of speech tagger
29
+ @pipeline = coreNLPTagger #dependency parsing
30
+ #iterate through the sentences in the text
31
+ for i in (0..text.length-1)
32
+ if(text[i].empty? or text[i] == "" or text[i].split(" ").empty?)
33
+ next
34
+ end
35
+ unTaggedString = text[i].split(" ")
36
+ # puts "UnTagged String:: #{unTaggedString}"
37
+ taggedString = @pos_tagger.get_readable(text[i])
38
+ # puts "taggedString:: #{taggedString}"
39
+
40
+ #Initializing some arrays
41
+ nouns = Array.new
42
+ nCount = 0
43
+ verbs = Array.new
44
+ vCount = 0
45
+ adjectives = Array.new
46
+ adjCount = 0
47
+ adverbs = Array.new
48
+ advCount = 0
49
+
50
+ parents = Array.new
51
+ labels = Array.new
52
+
53
+ #------------------------------------------#------------------------------------------
54
+ #finding parents
55
+ parents = find_parents(text[i])
56
+ parentCounter = 0
57
+ #------------------------------------------#------------------------------------------
58
+ #finding parents
59
+ labels = find_labels(text[i])
60
+ labelCounter = 0
61
+ #------------------------------------------#------------------------------------------
62
+ #find state
63
+ sstate = SentenceState.new
64
+ states_array = sstate.identify_sentence_state(taggedString)
65
+ states_counter = 0
66
+ state = states_array[states_counter]
67
+ states_counter += 1
68
+ #------------------------------------------#------------------------------------------
69
+
70
+ taggedString = taggedString.split(" ")
71
+ prevType = nil #initlializing the prevyp
72
+
73
+ #iterate through the tokens
74
+ for j in (0..taggedString.length-1)
75
+ taggedToken = taggedString[j]
76
+ plainToken = taggedToken[0...taggedToken.index("/")].to_s
77
+ posTag = taggedToken[taggedToken.index("/")+1..taggedToken.length].to_s
78
+ #ignore periods
79
+ if(plainToken == "." or taggedToken.include?("/POS") or (taggedToken.index("/") == taggedToken.length()-1) or (taggedToken.index("/") == taggedToken.length()-2))#this is for strings containinig "'s" or without POS
80
+ next
81
+ end
82
+
83
+ #SETTING STATE
84
+ #since the CC or IN are part of the following sentence segment, we set the STATE for that segment when we see a CC or IN
85
+ if(taggedToken.include?("/CC"))#{//|| ps.contains("/IN")
86
+ state = states_array[states_counter]
87
+ states_counter+=1
88
+ end
89
+ # puts("**Value:: #{plainToken} LabelCounter:: #{labelCounter} ParentCounter:: #{parentCounter} POStag:: #{posTag} .. state = #{state}")
90
+
91
+ #------------------------------------------
92
+ #if the token is a noun
93
+ if(taggedToken.include?("NN") or taggedToken.include?("PRP") or taggedToken.include?("IN") or taggedToken.include?("/EX") or taggedToken.include?("WP"))
94
+ #either add on to a previous vertex or create a brand new noun vertex
95
+ if(prevType == NOUN) #adding to a previous noun vertex
96
+ nCount -= 1 #decrement, since we are accessing a previous noun vertex
97
+ prevVertex = search_vertices(@vertices, nouns[nCount], i) #fetching the previous vertex
98
+ nouns[nCount] = nouns[nCount].to_s + " " + plainToken #concatenating with contents of the previous noun vertex
99
+ #checking if the previous noun concatenated with "s" already exists among the vertices
100
+ if((nounVertex = search_vertices(@vertices, nouns[nCount], i)) == nil)
101
+ prevVertex.name = prevVertex.name.to_s + " " + plainToken #concatenating the nouns
102
+ nounVertex = prevVertex #the current concatenated vertex will be considered
103
+ if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD")#resetting labels for the concatenated vertex
104
+ nounVertex.label = labels[labelCounter]
105
+ end
106
+ #fAppendedVertex = 1
107
+ end#if the vertex already exists, just use nounVertex - the returned vertex for ops.
108
+ else #if the previous token is not a noun, create a brand new vertex
109
+ nouns[nCount] = plainToken #this is checked for later on
110
+ nounVertex = search_vertices(@vertices, plainToken, i)
111
+ if(nounVertex == nil) #the string doesn't already exist
112
+ @vertices[@num_vertices] = Vertex.new(nouns[nCount], NOUN, i, state, labels[labelCounter], parents[parentCounter], posTag)
113
+ nounVertex = @vertices[@num_vertices] #the newly formed vertex will be considered
114
+ @num_vertices+=1
115
+ end
116
+ end #end of if prevType was noun
117
+ remove_redundant_vertices(nouns[nCount], i)
118
+ nCount+=1 #increment nCount for a new noun vertex just created (or existing previous vertex appended with new text)
119
+
120
+ #checking if a noun existed before this one and if the adjective was attached to that noun.
121
+ #if an adjective was found earlier, we add a new edge
122
+ if(prevType == ADJ)
123
+ #set previous noun's property to null, if it was set, if there is a noun before the adjective
124
+ if(nCount > 1)
125
+ v1 = search_vertices(@vertices, nouns[nCount-2], i) #fetching the previous noun, the one before the current noun (therefore -2)
126
+ v2 = search_vertices(@vertices, adjectives[adjCount-1], i) #fetching the previous adjective
127
+ #if such an edge exists - DELETE IT - search_edges_to_set_null() returns the position in the array at which such an edge exists
128
+ if(!v1.nil? and !v2.nil? and (e = search_edges_to_set_null(@edges, v1, v2, i)) != -1) #-1 is when no such edge exists
129
+ @edges[e] = nil #setting the edge to null
130
+ #if @num_edges had been previously incremented, decrement it
131
+ if(@num_edges > 0)
132
+ @num_edges-=1 #deducting an edge count
133
+ end
134
+ end
135
+ end
136
+ #if this noun vertex was encountered for the first time, nCount < 1,
137
+ #so do adding of edge outside the if condition
138
+ #add a new edge with v1 as the adjective and v2 as the new noun
139
+ v1 = search_vertices(@vertices, adjectives[adjCount-1], i)
140
+ v2 = nounVertex #the noun vertex that was just created
141
+ #if such an edge did not already exist
142
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1)
143
+ @edges[@num_edges] = Edge.new("noun-property",VERB)
144
+ @edges[@num_edges].in_vertex = v1
145
+ @edges[@num_edges].out_vertex = v2
146
+ @edges[@num_edges].index = i
147
+ @num_edges+=1
148
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
149
+ remove_redundant_edges(v1, v2, i)
150
+ end
151
+ end
152
+ #a noun has been found and has established a verb as an in_vertex and such an edge doesnt already previously exist
153
+ if(vCount > 0) #and fAppendedVertex == 0
154
+ #add edge only when a fresh vertex is created not when existing vertex is appended to
155
+ v1 = search_vertices(@vertices, verbs[vCount-1], i)
156
+ v2 = nounVertex
157
+ #if such an edge does not already exist add it
158
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges,v1, v2, i)) == -1)
159
+ @edges[@num_edges] = Edge.new("verb", VERB)
160
+ @edges[@num_edges].in_vertex = v1 #for vCount = 0
161
+ @edges[@num_edges].out_vertex = v2
162
+ @edges[@num_edges].index = i
163
+ @num_edges+=1
164
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
165
+ remove_redundant_edges(v1, v2, i)
166
+ end
167
+ end
168
+ prevType = NOUN
169
+ #------------------------------------------
170
+
171
+ #if the string is an adjective
172
+ #adjectives are vertices but they are not connected by an edge to the nouns, instead they are the noun's properties
173
+ elsif(taggedToken.include?("/JJ"))
174
+ adjective = nil
175
+ if(prevType == ADJ) #combine the adjectives
176
+ # puts("PREV ADJ here:: #{plainToken}")
177
+ if(adjCount >= 1)
178
+ adjCount = adjCount - 1
179
+ prevVertex = search_vertices(@vertices, adjectives[adjCount], i) #fetching the previous vertex
180
+ adjectives[adjCount] = adjectives[adjCount] + " " + plainToken
181
+ #if the concatenated vertex didn't already exist
182
+ if((adjective = search_vertices(@vertices, adjectives[adjCount], i)).nil?)
183
+ prevVertex.name = prevVertex.name+" "+plainToken
184
+ adjective = prevVertex #set it as "adjective" for further execution
185
+ if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD") #resetting labels for the concatenated vertex
186
+ adjective.label = labels[labelCounter]
187
+ end
188
+ end
189
+ end
190
+ else #new adjective vertex
191
+ adjectives[adjCount] = plainToken
192
+ if((adjective = search_vertices(@vertices, plainToken, i)).nil?) #the string doesn't already exist
193
+ @vertices[@num_vertices] = Vertex.new(adjectives[adjCount], ADJ, i, state, labels[labelCounter], parents[parentCounter], posTag)
194
+ adjective = @vertices[@num_vertices]
195
+ @num_vertices+=1
196
+ end
197
+ end
198
+ remove_redundant_vertices(adjectives[adjCount], i)
199
+ adjCount+=1 #incrementing, since a new adjective was created or an existing one updated.
200
+
201
+ #by default associate the adjective with the previous/latest noun and if there is a noun following it immediately, then remove the property from the older noun (done under noun condition)
202
+ if(nCount > 0) #gets the previous noun to form the edge
203
+ v1 = search_vertices(@vertices, nouns[nCount-1], i)
204
+ v2 = adjective #the current adjective vertex
205
+ #if such an edge does not already exist add it
206
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1)
207
+ # puts "** Adding noun-adj edge .. #{v1.name} - #{v2.name}"
208
+ @edges[@num_edges] = Edge.new("noun-property",VERB)
209
+ @edges[@num_edges].in_vertex = v1
210
+ @edges[@num_edges].out_vertex = v2
211
+ @edges[@num_edges].index = i
212
+ @num_edges+=1
213
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
214
+ remove_redundant_edges(v1, v2, i)
215
+ end
216
+ end
217
+ prevType = ADJ
218
+ #end of if condition for adjective
219
+ #------------------------------------------
220
+
221
+ #if the string is a verb or a modal//length condition for verbs is, be, are...
222
+ elsif(taggedToken.include?("/VB") or taggedToken.include?("MD"))
223
+ verbVertex = nil
224
+ if(prevType == VERB) #combine the verbs
225
+ vCount = vCount - 1
226
+ prevVertex = search_vertices(@vertices, verbs[vCount], i) #fetching the previous vertex
227
+ verbs[vCount] = verbs[vCount] + " " + plainToken
228
+ #if the concatenated vertex didn't already exist
229
+ if((verbVertex = search_vertices(@vertices, verbs[vCount], i)) == nil)
230
+ prevVertex.name = prevVertex.name + " " + plainToken
231
+ verbVertex = prevVertex #concatenated vertex becomes the new verb vertex
232
+ if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD")#resetting labels for the concatenated vertex
233
+ verbVertex.label = labels[labelCounter]
234
+ end
235
+ end
236
+ else
237
+ verbs[vCount] = plainToken
238
+ if((verbVertex = search_vertices(@vertices, plainToken, i)) == nil)
239
+ @vertices[@num_vertices] = Vertex.new(plainToken, VERB, i, state, labels[labelCounter], parents[parentCounter], posTag)
240
+ verbVertex = @vertices[@num_vertices] #newly created verb vertex will be considered in the future
241
+ @num_vertices+=1
242
+ end
243
+ end
244
+ remove_redundant_vertices(verbs[vCount], i)
245
+ vCount+=1
246
+
247
+ #if an adverb was found earlier, we set that as the verb's property
248
+ if(prevType == ADV)
249
+ #set previous verb's property to null, if it was set, if there is a verb following the adverb
250
+ if(vCount > 1)
251
+ v1 = search_vertices(@vertices, verbs[vCount-2], i) #fetching the previous verb, the one before the current one (hence -2)
252
+ v2 = search_vertices(@vertices, adverbs[advCount-1], i) #fetching the previous adverb
253
+ #if such an edge exists - DELETE IT
254
+ if(!v1.nil? and !v2.nil? and (e = search_edges_to_set_null(@edges, v1, v2, i)) != -1)
255
+ @edges[e] = nil #setting the edge to null
256
+ if(@num_edges > 0)
257
+ @num_edges-=1 #deducting an edge count
258
+ end
259
+ end
260
+ end
261
+ #if this verb vertex was encountered for the first time, vCount < 1,
262
+ #so do adding of edge outside the if condition
263
+ #add a new edge with v1 as the adverb and v2 as the new verb
264
+ v1 = search_vertices(@vertices, adverbs[advCount-1], i)
265
+ v2 = verbVertex
266
+ #if such an edge did not already exist
267
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1)
268
+ @edges[@num_edges] = Edge.new("verb-property",VERB)
269
+ @edges[@num_edges].in_vertex = v1
270
+ @edges[@num_edges].out_vertex = v2
271
+ @edges[@num_edges].index = i
272
+ @num_edges+=1
273
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
274
+ remove_redundant_edges(v1, v2, i)
275
+ end
276
+ end
277
+
278
+ #making the previous noun, one of the vertices of the verb edge
279
+ if(nCount > 0) #and fAppendedVertex == 0
280
+ #gets the previous noun to form the edge
281
+ v1 = search_vertices(@vertices, nouns[nCount-1], i)
282
+ v2 = verbVertex
283
+ #if such an edge does not already exist add it
284
+ if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1)
285
+ @edges[@num_edges] = Edge.new("verb",VERB)
286
+ @edges[@num_edges].in_vertex = v1 #for nCount = 0;
287
+ @edges[@num_edges].out_vertex = v2 #the verb
288
+ @edges[@num_edges].index = i
289
+ @num_edges+=1
290
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
291
+ remove_redundant_edges(v1, v2, i)
292
+ end
293
+ end
294
+ prevType = VERB
295
+ #------------------------------------------
296
+ #if the string is an adverb
297
+ elsif(taggedToken.include?("RB"))
298
+ adverb = nil
299
+ if(prevType == ADV) #appending to existing adverb
300
+ if(advCount >= 1)
301
+ advCount = advCount - 1
302
+ end
303
+ prevVertex = search_vertices(@vertices, adverbs[advCount], i) #fetching the previous vertex
304
+ adverbs[advCount] = adverbs[advCount] + " " + plainToken
305
+ #if the concatenated vertex didn't already exist
306
+ if((adverb = search_vertices(@vertices, adverbs[advCount], i)) == nil)
307
+ prevVertex.name = prevVertex.name + " " + plainToken
308
+ adverb = prevVertex #setting it as "adverb" for further computation
309
+ if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD") #resetting labels for the concatenated vertex
310
+ adverb.label = labels[labelCounter]
311
+ end
312
+ end
313
+ else #else creating a new vertex
314
+ adverbs[advCount] = plainToken
315
+ if((adverb = search_vertices(@vertices, plainToken, i)) == nil)
316
+ @vertices[@num_vertices] = Vertex.new(adverbs[advCount], ADV, i, state, labels[labelCounter], parents[parentCounter], posTag);
317
+ adverb = @vertices[@num_vertices]
318
+ @num_vertices+=1
319
+ end
320
+ end
321
+ remove_redundant_vertices(adverbs[advCount], i)
322
+ advCount+=1
323
+
324
+ #by default associate it with the previous/latest verb and if there is a verb following it immediately, then remove the property from the verb
325
+ if(vCount > 0) #gets the previous verb to form a verb-adverb edge
326
+ v1 = search_vertices(@vertices, verbs[vCount-1], i)
327
+ v2 = adverb
328
+ #if such an edge does not already exist add it
329
+ if(!v1.nil? and !v2.nil? && (e = search_edges(@edges, v1, v2, i)) == -1)
330
+ @edges[@num_edges] = Edge.new("verb-property",VERB)
331
+ @edges[@num_edges].in_vertex = v1 #for nCount = 0;
332
+ @edges[@num_edges].out_vertex = v2 #the verb
333
+ @edges[@num_edges].index = i
334
+ @num_edges+=1
335
+ #since an edge was just added we try to check if there exist any redundant edges that can be removed
336
+ remove_redundant_edges(v1, v2, i)
337
+ end
338
+ end
339
+ prevType = ADV
340
+ #end of if condition for adverb
341
+ end #end of if condition
342
+ #------------------------------------------
343
+ #incrementing counters for labels and parents
344
+ labelCounter+=1
345
+ parentCounter+=1
346
+ end #end of the for loop for the tokens
347
+ #puts "here outside the for loop for tokens"
348
+ nouns = nil
349
+ verbs = nil
350
+ adjectives = nil
351
+ adverbs = nil
352
+ end #end of number of sentences in the text
353
+
354
+ @num_vertices = @num_vertices - 1 #since as a counter it was 1 ahead of the array's contents
355
+ @num_edges = @num_edges - 1 #same reason as for num_vertices
356
+ set_semantic_labels_for_edges
357
+ #print_graph(@edges, @vertices)
358
+ # puts("Number of edges:: #{@num_edges}")
359
+ # puts("Number of vertices:: #{@num_vertices}")
360
+ return @num_edges
361
+ end #end of the graphGenerate method
362
+
363
+ #------------------------------------------#------------------------------------------#------------------------------------------
364
+
365
+ def search_vertices(list, s, index)
366
+ for i in (0..list.length-1)
367
+ if(!list[i].nil? and !s.nil?)
368
+ #if the vertex exists and in the same sentence (index)
369
+ if(list[i].name.casecmp(s) == 0 and list[i].index == index)
370
+ # puts("***** search_vertices:: Returning:: #{s}")
371
+ return list[i]
372
+ end
373
+ end
374
+ end
375
+ # puts("***** search_vertices:: Returning nil")
376
+ return nil
377
+ end #end of the search_vertices method
378
+
379
+ #------------------------------------------#------------------------------------------#------------------------------------------
380
+
381
+ =begin
382
+ NULLIFY ALL VERTICES CONTAINING "ONLY SUBSTRINGS" (and not exact matches) OF THIS VERTEX IN THE SAME SENTENCE (verts[j].index == index)
383
+ And reset the @vertices array with non-null elements.
384
+ =end
385
+ def remove_redundant_vertices(s, index)
386
+ # puts "**** remove_redundant_vertices:: string #{s}"
387
+ j = @num_vertices - 1
388
+ verts = @vertices
389
+ while j >= 0
390
+ if(!verts[j].nil? and verts[j].index == index and s.casecmp(verts[j].name) != 0 and
391
+ (s.downcase.include?(verts[j].name.downcase) and verts[j].name.length > 1))
392
+ #the last 'length' condition is added so as to prevent "I" (an indiv. vertex) from being replaced by nil
393
+ # puts "*** string index = #{index}... verts[j].index = #{verts[j].index}"
394
+ # puts "**** remove_redundant_vertices setting #{verts[j].name} to nil!"
395
+ #search through all the edges and set those with this vertex as in-out- vertex to null
396
+ if(!@edges.nil?)
397
+ for i in 0..@edges.length - 1
398
+ edge = @edges[i]
399
+ if(!edge.nil? and (edge.in_vertex == verts[j] or edge.out_vertex == verts[j]))
400
+ # puts "edge #{edge.in_vertex.name} - #{edge.out_vertex.name}"
401
+ @edges[i] = nil #setting that edge to nil
402
+ end
403
+ end
404
+ end
405
+ #finally setting the vertex to null
406
+ verts[j] = nil
407
+ end
408
+ j-=1
409
+ end #end of while loop
410
+
411
+ # puts "**** remove_redundant_vertices Old @num_vertices:: #{@num_vertices}"
412
+ #recreating the vertices array without the nil values
413
+ counter = 0
414
+ vertices_array = Array.new
415
+ for i in (0..verts.length-1)
416
+ vertex = verts[i]
417
+ if(!vertex.nil?)
418
+ vertices_array << vertex
419
+ counter+=1
420
+ end
421
+ end
422
+ @vertices = vertices_array
423
+ @num_vertices = counter+1 #since @num_vertices is always one advanced of the last vertex
424
+ end
425
+
426
+ #------------------------------------------#------------------------------------------#------------------------------------------
427
+
428
+ =begin
429
+ Checks to see if an edge between vertices "in" and "out" exists.
430
+ true - if an edge exists and false - if an edge doesn't exist
431
+ edge[] list, vertex in, vertex out, int index
432
+ =end
433
+ def search_edges(list, in_vertex, out, index)
434
+ edgePos = -1
435
+ if(list.nil?)#if the list is null
436
+ return edgePos
437
+ end
438
+
439
+ for i in (0..list.length-1)
440
+ if(!list[i].nil? and !list[i].in_vertex.nil? and !list[i].out_vertex.nil?)
441
+ #checking for exact match with an edge
442
+ if(((list[i].in_vertex.name.casecmp(in_vertex.name)==0 or list[i].in_vertex.name.include?(in_vertex.name)) and
443
+ (list[i].out_vertex.name.casecmp(out.name)==0 or list[i].out_vertex.name.include?(out.name))) or
444
+ ((list[i].in_vertex.name.casecmp(out.name)==0 or list[i].in_vertex.name.include?(out.name)) and
445
+ (list[i].out_vertex.name.casecmp(in_vertex.name)==0 or list[i].out_vertex.name.include?(in_vertex.name))))
446
+ # puts("***** Found edge! : index:: #{index} list[i].index:: #{list[i].index}")
447
+ #if an edge was found
448
+ edgePos = i #returning its position in the array
449
+ #INCREMENT FREQUENCY IF THE EDGE WAS FOUND IN A DIFFERENT SENT. (CHECK BY MAINTAINING A TEXT NUMBER AND CHECKING IF THE NEW # IS DIFF FROM PREV #)
450
+ if(index != list[i].index)
451
+ list[i].frequency+=1
452
+ end
453
+ end
454
+ end
455
+ end #end of the for loop
456
+ return edgePos
457
+ end # end of searchdges
458
+ #------------------------------------------#------------------------------------------#------------------------------------------
459
+
460
+ def search_edges_to_set_null(list, in_vertex, out, index)
461
+ edgePos = -1
462
+ # puts("***** Searching edge to set to null:: #{in_vertex.name} - #{out.name} ... num_edges #{@num_edges}")
463
+ for i in 0..@num_edges - 1
464
+ if(!list[i].nil? and !list[i].in_vertex.nil? and !list[i].out_vertex.nil?)
465
+ # puts "comparing with #{list[i].in_vertex.name} - #{list[i].out_vertex.name}"
466
+ #puts "#{list[i].in_vertex.name.downcase == in_vertex.name.downcase} - #{list[i].out_vertex.name.downcase == out.name.downcase}"
467
+ #checking for exact match with an edge
468
+ if((list[i].in_vertex.name.downcase == in_vertex.name.downcase and list[i].out_vertex.name.downcase == out.name.downcase) or
469
+ (list[i].in_vertex.name.downcase == out.name.downcase and list[i].out_vertex.name.downcase == in_vertex.name.downcase))
470
+ #if an edge was found
471
+ edgePos = i #returning its position in the array
472
+ #INCREMENT FREQUENCY IF THE EDGE WAS FOUND IN A DIFFERENT SENT. (CHECK BY MAINTAINING A TEXT NUMBER AND CHECKING IF THE NEW # IS DIFF FROM PREV #)
473
+ if(index != list[i].index)
474
+ list[i].frequency+=1
475
+ end
476
+ end
477
+ end
478
+ end #end of the for loop
479
+ # puts("***** search_edges_to_set_null #{in_vertex.name} - #{out.name} returning:: #{edgePos}")
480
+ return edgePos
481
+ end # end of the method search_edges_to_set_null
482
+ #------------------------------------------#------------------------------------------#------------------------------------------
483
+ =begin
484
+ NULLIFY ALL EDGES CONTAINING "ONLY SUBSTRINGS" (and not exact matches) OF EITHER IN/OUT VERTICES IN THE SAME SENTENCE (verts[j].index == index)
485
+ And reset the @edges array with non-null elements.
486
+ =end
487
+
488
+ def remove_redundant_edges(in_vertex, out, index)
489
+ list = @edges
490
+ j = @num_edges - 1
491
+ while j >= 0 do
492
+ if(!list[j].nil? and list[j].index == index)
493
+ #when invertices are eq and out-verts are substrings or vice versa
494
+ if(in_vertex.name.casecmp(list[j].in_vertex.name) == 0 and out.name.casecmp(list[j].out_vertex.name) != 0 and out.name.downcase.include?(list[j].out_vertex.name.downcase))
495
+ # puts("FOUND out_vertex match for edge:: #{list[j].in_vertex.name} - #{list[j].out_vertex.name}")
496
+ list[j] = nil
497
+ #@num_edges-=1
498
+ #when in-vertices are only substrings and out-verts are equal
499
+ elsif(in_vertex.name.casecmp(list[j].in_vertex.name)!=0 and in_vertex.name.downcase.include?(list[j].in_vertex.name.downcase) and out.name.casecmp(list[j].out_vertex.name)==0)
500
+ # puts("FOUND in_vertex match for edge: #{list[j].in_vertex.name} - #{list[j].out_vertex.name}")
501
+ list[j] = nil
502
+ #@num_edges-=1
503
+ end
504
+ end
505
+ j-=1
506
+ end #end of the while loop
507
+ # puts "**** search_edges:: Old number #{@num_edges}"
508
+ #recreating the edges array without the nil values
509
+ counter = 0
510
+ edges_array = Array.new
511
+ list.each{
512
+ |edge|
513
+ if(!edge.nil?)
514
+ # puts "edge:: #{edge.in_vertex.name} - #{edge.out_vertex.name}"
515
+ edges_array << edge
516
+ counter+=1
517
+ end
518
+ }
519
+ @edges = edges_array
520
+ @num_edges = counter+1
521
+ # puts "**** search_edges:: New number of edges #{@num_edges}"
522
+ end
523
+
524
+ #------------------------------------------#------------------------------------------#------------------------------------------
525
+ def print_graph(edges, vertices)
526
+ puts("*** List of vertices::")
527
+ for j in (0..vertices.length-1)
528
+ if(!vertices[j].nil?)
529
+ puts("@@@ Vertex:: #{vertices[j].name}")
530
+ puts("*** Frequency:: #{vertices[j].frequency} State:: #{vertices[j].state}")
531
+ puts("*** Label:: #{vertices[j].label} Parent:: #{vertices[j].parent}")
532
+ end
533
+ end
534
+ puts("*******")
535
+ puts("*** List of edges::")
536
+ for j in (0..edges.length-1)
537
+ if(!edges[j].nil? and !edges[j].in_vertex.nil? and !edges[j].out_vertex.nil?)
538
+ puts("@@@ Edge:: #{edges[j].in_vertex.name} & #{edges[j].out_vertex.name}")
539
+ puts("*** Frequency:: #{edges[j].frequency} State:: #{edges[j].in_vertex.state} & #{edges[j].out_vertex.state}")
540
+ puts("*** Label:: #{edges[j].label}")
541
+ end
542
+ end
543
+ puts("--------------")
544
+ end #end of print_graph method
545
+
546
+ #------------------------------------------#------------------------------------------#------------------------------------------
547
+ #Identifying parents and labels for the vertices
548
+ def find_parents(t)
549
+ # puts "Inside find_parents.. text #{t}"
550
+ tp = TextPreprocessing.new
551
+ unTaggedString = t.split(" ")
552
+ parents = Array.new
553
+ # t = text[i]
554
+ t = StanfordCoreNLP::Text.new(t) #the same variable has to be passed into the Textx.new method
555
+ @pipeline.annotate(t)
556
+ #for each sentence identify theparsed form of the sentence
557
+ sentence = t.get(:sentences).toArray
558
+ parsed_sentence = sentence[0].get(:collapsed_c_c_processed_dependencies)
559
+ #puts "parsed sentence #{parsed_sentence}"
560
+ #iterating through the set of tokens and identifying each token's parent
561
+ #puts "unTaggedString.length #{unTaggedString.length}"
562
+ for j in (0..unTaggedString.length - 1)
563
+ #puts "unTaggedString[#{j}] #{unTaggedString[j]}"
564
+ if(tp.is_punct(unTaggedString[j]))
565
+ next
566
+ end
567
+ if(tp.contains_punct(unTaggedString[j]))
568
+ unTaggedString[j] = tp.contains_punct(unTaggedString[j])
569
+ # puts "unTaggedString #{unTaggedString[j]} and #{tp.contains_punct_bool(unTaggedString[j])}"
570
+ end
571
+ if(!unTaggedString[j].nil? and !tp.contains_punct_bool(unTaggedString[j]))
572
+ pat = parsed_sentence.getAllNodesByWordPattern(unTaggedString[j])
573
+ pat = pat.toArray
574
+ parent = parsed_sentence.getParents(pat[0]).toArray
575
+ end
576
+ #puts "parent of #{unTaggedString[j]} is #{parent[0]}"
577
+ if(!parent.nil? and !parent[0].nil?)
578
+ parents[j] = (parent[0].to_s)[0..(parent[0].to_s).index("-")-1]#extracting the name of the parent (since it is in the foramt-> "name-POS")
579
+ #puts "parents[#{j}] = #{parents[j]}"
580
+ else
581
+ parents[j] = nil
582
+ end
583
+ end
584
+ return parents
585
+ end #end of find_parents method
586
+ #------------------------------------------#------------------------------------------#------------------------------------------
587
+ #Identifying parents and labels for the vertices
588
+ def find_labels(t)
589
+ # puts "Inside find_labels"
590
+ unTaggedString = t.split(" ")
591
+ t = StanfordCoreNLP::Text.new(t)
592
+ @pipeline.annotate(t)
593
+ #for each sentence identify theparsed form of the sentence
594
+ sentence = t.get(:sentences).toArray
595
+ parsed_sentence = sentence[0].get(:collapsed_c_c_processed_dependencies)
596
+ labels = Array.new
597
+ labelCounter = 0
598
+ govDep = parsed_sentence.typedDependencies.toArray
599
+ #for each untagged token
600
+ for j in (0..unTaggedString.length - 1)
601
+ unTaggedString[j].gsub!(".", "")
602
+ unTaggedString[j].gsub!(",", "")
603
+ #puts "Label for #{unTaggedString[j]}"
604
+ #identify its corresponding position in govDep and fetch its label
605
+ for k in (0..govDep.length - 1)
606
+ #puts "Comparing with #{govDep[k].dep.value()}"
607
+ if(govDep[k].dep.value() == unTaggedString[j])
608
+ labels[j] = govDep[k].reln.getShortName()
609
+ #puts labels[j]
610
+ labelCounter+=1
611
+ break
612
+ end
613
+ end
614
+ end
615
+ return labels
616
+ end # end of find_labels method
617
+ #------------------------------------------#------------------------------------------#------------------------------------------
618
+ =begin
619
+ * Setting semantic labels for edges based on the labels vertices have with their parents
620
+ =end
621
+ def set_semantic_labels_for_edges
622
+ # puts "*** inside set_semantic_labels_for_edges"
623
+ for i in (0.. @vertices.length - 1)
624
+ if(!@vertices[i].nil? and !@vertices[i].parent.nil?) #parent = null for ROOT
625
+ #search for the parent vertex
626
+ for j in (0..@vertices.length - 1)
627
+ if(!@vertices[j].nil? and (@vertices[j].name.casecmp(@vertices[i].parent) == 0 or
628
+ @vertices[j].name.downcase.include?(@vertices[i].parent.downcase)))
629
+ # puts("**Parent:: #{@vertices[j].name}")
630
+ parent = @vertices[j]
631
+ break #break out of search for the parent
632
+ end
633
+ end
634
+ if(!parent.nil?)#{
635
+ #check if an edge exists between vertices[i] and the parent
636
+ for k in (0..@edges.length - 1)
637
+ if(!@edges[k].nil? and !@edges[k].in_vertex.nil? and !@edges[k].out_vertex.nil?)
638
+ if((@edges[k].in_vertex.name.equal?(@vertices[i].name) and @edges[k].out_vertex.name.equal?(parent.name)) or (@edges[k].in_vertex.name.equal?(parent.name) and @edges[k].out_vertex.name.equal?(@vertices[i].name)))
639
+ #set the role label
640
+ if(@edges[k].label.nil?)
641
+ @edges[k].label = @vertices[i].label
642
+ elsif(!@edges[k].label.nil? and (@edges[k].label == "NMOD" or @edges[k].label == "PMOD") and (@vertices[i].label != "NMOD" or @vertices[i].label != "PMOD"))
643
+ @edges[k].label = @vertices[i].label
644
+ end
645
+ end
646
+ end
647
+ end
648
+ end#end of if paren.nil? condition
649
+ end
650
+ end #end of for loop
651
+ end #end of set_semantic_labels_for_edges method
652
+
653
+ end # end of the class GraphGenerator
654
+ #------------------------------------------#------------------------------------------#------------------------------------------
655
+ =begin
656
+ Identifying frequency of edges and pruning out edges that do no meet the threshold conditions
657
+ =end
658
+ def identify_frequency_and_prune_edges(edges, num)
659
+ # puts "inside frequency threshold! :: num #{num}"
660
+ #freqEdges maintains the top frequency edges from ALPHA_FREQ to BETA_FREQ
661
+ freqEdges = Array.new #from alpha = 3 to beta = 10
662
+ #iterating through all the edges
663
+ for j in (0..num-1)
664
+ if(!edges[j].nil?)
665
+ if(edges[j].frequency <= BETA_FREQ and edges[j].frequency >= ALPHA_FREQ and !freqEdges[edges[j].frequency-1].nil?)#{
666
+ for i in (0..freqEdges[edges[j].frequency-1].length - 1)#iterating to find i for which freqEdges is null
667
+ if(!freqEdges[edges[j].frequency-1][i].nil?)
668
+ break
669
+ end
670
+ end
671
+ freqEdges[edges[j].frequency-1][i] = edges[j]
672
+ end
673
+ end
674
+ end
675
+ selectedEdges = Array.new
676
+ #Selecting only those edges that satisfy the frequency condition [between ALPHA and BETA]
677
+ j = BETA_FREQ-1
678
+ while j >= ALPHA_FREQ-1 do
679
+ if(!freqEdges[j].nil?)
680
+ for i in (0..num-1)
681
+ if(!freqEdges[j][i].nil?)
682
+ selectedEdges[maxSelected] = freqEdges[j][i]
683
+ maxSelected+=1
684
+ end
685
+ end
686
+ end
687
+ j-=1
688
+ end
689
+
690
+ if(maxSelected != 0)
691
+ @num_edges = maxSelected #replacing numEdges with the number of selected edges
692
+ end
693
+ return selectedEdges
694
+ end
695
+ #------------------------------------------#------------------------------------------#------------------------------------------