crosslanguagespotter 0.0.2-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/Gemfile +3 -0
- data/Rakefile +13 -0
- data/crosslanguagespotter.gemspec +36 -0
- data/examples/ex1.rb +13 -0
- data/examples/services_example.rb +13 -0
- data/lib/crosslanguagespotter/basic.rb +157 -0
- data/lib/crosslanguagespotter/context.rb +139 -0
- data/lib/crosslanguagespotter/figures_evaluator.rb +160 -0
- data/lib/crosslanguagespotter/jaccard.rb +114 -0
- data/lib/crosslanguagespotter/methods/context.rb +127 -0
- data/lib/crosslanguagespotter/methods/jaro.rb +118 -0
- data/lib/crosslanguagespotter/methods/tversky.rb +44 -0
- data/lib/crosslanguagespotter/model_loading.rb +333 -0
- data/lib/crosslanguagespotter/oracle.rb +261 -0
- data/lib/crosslanguagespotter/report.rb +88 -0
- data/lib/crosslanguagespotter/version.rb +5 -0
- data/lib/crosslanguagespotter/wekaintegration.rb +83 -0
- data/lib/crosslanguagespotter.rb +7 -0
- data/lib/jars/weka.jar +0 -0
- data/resources/css/bootstrap-theme.css +346 -0
- data/resources/css/bootstrap-theme.min.css +7 -0
- data/resources/css/bootstrap.css +5780 -0
- data/resources/css/bootstrap.min.css +7 -0
- data/resources/css/highlightstyles/arta.css +160 -0
- data/resources/css/highlightstyles/ascetic.css +50 -0
- data/resources/css/highlightstyles/atelier-dune.dark.css +93 -0
- data/resources/css/highlightstyles/atelier-dune.light.css +93 -0
- data/resources/css/highlightstyles/atelier-forest.dark.css +93 -0
- data/resources/css/highlightstyles/atelier-forest.light.css +93 -0
- data/resources/css/highlightstyles/atelier-heath.dark.css +93 -0
- data/resources/css/highlightstyles/atelier-heath.light.css +93 -0
- data/resources/css/highlightstyles/atelier-lakeside.dark.css +93 -0
- data/resources/css/highlightstyles/atelier-lakeside.light.css +93 -0
- data/resources/css/highlightstyles/atelier-seaside.dark.css +93 -0
- data/resources/css/highlightstyles/atelier-seaside.light.css +93 -0
- data/resources/css/highlightstyles/brown_paper.css +105 -0
- data/resources/css/highlightstyles/brown_papersq.png +0 -0
- data/resources/css/highlightstyles/dark.css +105 -0
- data/resources/css/highlightstyles/default.css +153 -0
- data/resources/css/highlightstyles/docco.css +132 -0
- data/resources/css/highlightstyles/far.css +113 -0
- data/resources/css/highlightstyles/foundation.css +133 -0
- data/resources/css/highlightstyles/github.css +125 -0
- data/resources/css/highlightstyles/googlecode.css +147 -0
- data/resources/css/highlightstyles/idea.css +122 -0
- data/resources/css/highlightstyles/ir_black.css +105 -0
- data/resources/css/highlightstyles/magula.css +123 -0
- data/resources/css/highlightstyles/mono-blue.css +62 -0
- data/resources/css/highlightstyles/monokai.css +127 -0
- data/resources/css/highlightstyles/monokai_sublime.css +149 -0
- data/resources/css/highlightstyles/obsidian.css +154 -0
- data/resources/css/highlightstyles/paraiso.dark.css +93 -0
- data/resources/css/highlightstyles/paraiso.light.css +93 -0
- data/resources/css/highlightstyles/pojoaque.css +106 -0
- data/resources/css/highlightstyles/pojoaque.jpg +0 -0
- data/resources/css/highlightstyles/railscasts.css +182 -0
- data/resources/css/highlightstyles/rainbow.css +112 -0
- data/resources/css/highlightstyles/school_book.css +113 -0
- data/resources/css/highlightstyles/school_book.png +0 -0
- data/resources/css/highlightstyles/solarized_dark.css +107 -0
- data/resources/css/highlightstyles/solarized_light.css +107 -0
- data/resources/css/highlightstyles/sunburst.css +160 -0
- data/resources/css/highlightstyles/tomorrow-night-blue.css +93 -0
- data/resources/css/highlightstyles/tomorrow-night-bright.css +92 -0
- data/resources/css/highlightstyles/tomorrow-night-eighties.css +92 -0
- data/resources/css/highlightstyles/tomorrow-night.css +93 -0
- data/resources/css/highlightstyles/tomorrow.css +90 -0
- data/resources/css/highlightstyles/vs.css +89 -0
- data/resources/css/highlightstyles/xcode.css +158 -0
- data/resources/css/highlightstyles/zenburn.css +117 -0
- data/resources/example.html +1501 -0
- data/resources/js/bootstrap.js +1943 -0
- data/resources/js/bootstrap.min.js +7 -0
- data/resources/js/highlight.pack.js +1 -0
- data/resources/services_example.html +141 -0
- data/resources/template.html +61 -0
- data/test/data/angular-puzzle.GS +111 -0
- data/test/data/angular_puzzle/app.js +66 -0
- data/test/data/angular_puzzle/index.html +67 -0
- data/test/data/angular_puzzle/slidingPuzzle.js +203 -0
- data/test/data/angular_puzzle/wordSearchPuzzle.js +270 -0
- data/test/data/example.html +5 -0
- data/test/data/example.js +4 -0
- data/test/data/services/index.html +33 -0
- data/test/data/services/script.js +15 -0
- data/test/test_helper.rb +9 -0
- data/test/test_parsing.rb +23 -0
- data/test/test_spotter.rb +42 -0
- data/test/test_wekaintegration.rb +43 -0
- metadata +328 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 68ee2d427d4dacc3d22b88a1deababeb2a1bcafd
|
|
4
|
+
data.tar.gz: 65ab29b3ba450f824a4930bd0a436170efabff57
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: ec2f763115ece653ad3a1379f24397bd73e18f3d4d101b222a922fead038bb4498459319595ac86444e253176b66ca9ebe93f117632293f879ee1bbf2f348cb8
|
|
7
|
+
data.tar.gz: d7b1571637ec59db6e9519e035c4826fecde7d121b2e8f03898317f4eaf3cc237bb9253833f4f90357fa604fb1c0b31cfb9b1b27b496218b3066aa3034d4fae8
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require 'crosslanguagespotter/version'
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |s|
|
|
7
|
+
s.platform = 'java'
|
|
8
|
+
s.name = 'crosslanguagespotter'
|
|
9
|
+
s.version = CrossLanguageSpotter::VERSION
|
|
10
|
+
s.summary = "Automatic Spotter of Cross-Language references"
|
|
11
|
+
s.description = "Automatic Spotter of Cross-Language references"
|
|
12
|
+
s.authors = ["Federico Tomassetti"]
|
|
13
|
+
s.email = 'f.tomassetti@gmail.com'
|
|
14
|
+
s.homepage = 'https://github.com/CrossLanguageProject/crosslanguagerelationsspotter'
|
|
15
|
+
s.license = "Apache v2"
|
|
16
|
+
|
|
17
|
+
s.files = `git ls-files`.split($/)
|
|
18
|
+
s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
19
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
|
20
|
+
s.require_paths = ["lib"]
|
|
21
|
+
|
|
22
|
+
s.add_dependency('codemodels')
|
|
23
|
+
s.add_dependency('codemodels-js')
|
|
24
|
+
s.add_dependency('codemodels-html')
|
|
25
|
+
s.add_dependency('codemodels-java')
|
|
26
|
+
s.add_dependency('codemodels-ruby')
|
|
27
|
+
s.add_dependency('codemodels-xml')
|
|
28
|
+
s.add_dependency('codemodels-properties')
|
|
29
|
+
s.add_dependency('htmlentities')
|
|
30
|
+
s.add_dependency('liquid')
|
|
31
|
+
|
|
32
|
+
s.add_development_dependency "bundler"
|
|
33
|
+
s.add_development_dependency "rake"
|
|
34
|
+
s.add_development_dependency "simplecov"
|
|
35
|
+
s.add_development_dependency "rubygems-tasks"
|
|
36
|
+
end
|
data/examples/ex1.rb
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
$: << './lib'
|
|
2
|
+
require 'crosslanguagespotter'
|
|
3
|
+
include CrossLanguageSpotter
|
|
4
|
+
|
|
5
|
+
oracle_loader = OracleLoader.new
|
|
6
|
+
classifier = oracle_loader.build_weka_classifier('./test/data/angular_puzzle','./test/data/angular-puzzle.GS')
|
|
7
|
+
|
|
8
|
+
path = './test/data/angular_puzzle'
|
|
9
|
+
spotter = CrossLanguageSpotter::Spotter.new()
|
|
10
|
+
project = Project.new(path)
|
|
11
|
+
relations = spotter.classify_relations(project,classifier)
|
|
12
|
+
|
|
13
|
+
generate_report_file(relations,'resources/example.html')
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
$: << './lib'
|
|
2
|
+
require 'crosslanguagespotter'
|
|
3
|
+
include CrossLanguageSpotter
|
|
4
|
+
|
|
5
|
+
oracle_loader = OracleLoader.new
|
|
6
|
+
classifier = oracle_loader.build_weka_classifier('./test/data/angular_puzzle','./test/data/angular-puzzle.GS')
|
|
7
|
+
|
|
8
|
+
path = './test/data/services'
|
|
9
|
+
spotter = CrossLanguageSpotter::Spotter.new()
|
|
10
|
+
project = Project.new(path)
|
|
11
|
+
relations = spotter.classify_relations(project,classifier)
|
|
12
|
+
|
|
13
|
+
generate_report_file(relations,'resources/services_example.html')
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require "codemodels"
|
|
4
|
+
require "codemodels/html"
|
|
5
|
+
require "codemodels/js"
|
|
6
|
+
|
|
7
|
+
require 'crosslanguagespotter/figures_evaluator'
|
|
8
|
+
require 'crosslanguagespotter/methods/context'
|
|
9
|
+
require 'crosslanguagespotter/methods/tversky'
|
|
10
|
+
require 'crosslanguagespotter/methods/jaro'
|
|
11
|
+
require 'crosslanguagespotter/model_loading'
|
|
12
|
+
require 'csv'
|
|
13
|
+
require 'set'
|
|
14
|
+
require 'crosslanguagespotter/jaccard'
|
|
15
|
+
|
|
16
|
+
module CrossLanguageSpotter
|
|
17
|
+
|
|
18
|
+
def self._load_models(dir,base_path='',models={})
|
|
19
|
+
Dir.foreach(dir) do |f|
|
|
20
|
+
if f!='.' and f!='..'
|
|
21
|
+
path = dir+'/'+f
|
|
22
|
+
if File.directory?(path)
|
|
23
|
+
_load_models(path,base_path+'/'+dir,models)
|
|
24
|
+
else
|
|
25
|
+
begin
|
|
26
|
+
models[base_path+'/'+f] = CodeModels.parse_file(path)
|
|
27
|
+
rescue Exception => e
|
|
28
|
+
puts "No model available for #{path}: #{e}"
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
return models
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
class Spotter
|
|
37
|
+
|
|
38
|
+
def initialize
|
|
39
|
+
@verbose = false
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def find_relations(dir)
|
|
43
|
+
models = CrossLanguageSpotter._load_models(dir)
|
|
44
|
+
_calc(dir,models)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def features_for_dir(dir)
|
|
48
|
+
project = Project.new(dir,@verbose)
|
|
49
|
+
return features_for_project(project)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def classify_relations(project,classifier)
|
|
53
|
+
features_data = features_for_project(project)
|
|
54
|
+
data = []
|
|
55
|
+
list_of_original_features_rows = []
|
|
56
|
+
features_data.each do |rel,row|
|
|
57
|
+
row[:result] = false
|
|
58
|
+
data.push(row)
|
|
59
|
+
list_of_original_features_rows.push(row)
|
|
60
|
+
end
|
|
61
|
+
keys = {shared_length: :numeric,
|
|
62
|
+
tfidf_shared: :numeric,itfidf_shared: :numeric,
|
|
63
|
+
perc_shared_length_min: :numeric,
|
|
64
|
+
perc_shared_length_max: :numeric,
|
|
65
|
+
diff_min: :numeric,diff_max: :numeric,
|
|
66
|
+
perc_diff_min: :numeric,perc_diff_max: :numeric,
|
|
67
|
+
context: :numeric,jaccard: :numeric,jaro: :numeric,tversky: :numeric,
|
|
68
|
+
result: :boolean}
|
|
69
|
+
data_instances = hash2weka_instances("data",data,keys,:result)
|
|
70
|
+
classification = classifier.classify(data_instances)
|
|
71
|
+
|
|
72
|
+
i=0
|
|
73
|
+
results = []
|
|
74
|
+
classification.each do |c|
|
|
75
|
+
if c[:result]
|
|
76
|
+
# just put a true in the real relations
|
|
77
|
+
list_of_original_features_rows[i][:result] = true
|
|
78
|
+
results.push(list_of_original_features_rows[i])
|
|
79
|
+
end
|
|
80
|
+
i+=1
|
|
81
|
+
end
|
|
82
|
+
return results
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def features_for_project(project)
|
|
86
|
+
results = {}
|
|
87
|
+
tversky_producer = TverskyReferencesProducer.new ({:alpha => 0.5, :threshold => 0.0})
|
|
88
|
+
context_producer = ContextReferencesProducer.new ({:alpha => 1.0, :threshold => 0.0})
|
|
89
|
+
context_points_map = context_producer.points_map(project)
|
|
90
|
+
jaro_producer = JaroReferencesProducer.new ({:winkleradjust=>false,:threshold=>0.0})
|
|
91
|
+
block = Proc.new do |ni,nj|
|
|
92
|
+
context_ni = context(ni).values & project.shared_ids
|
|
93
|
+
context_nj = context(nj).values & project.shared_ids
|
|
94
|
+
shared_length = (context_ni & context_nj).count
|
|
95
|
+
|
|
96
|
+
file_i = ni.source.artifact(:absolute).filename
|
|
97
|
+
file_j = ni.source.artifact(:absolute).filename
|
|
98
|
+
tfidf_shared = 0
|
|
99
|
+
itfidf_shared = 0
|
|
100
|
+
(context_ni & context_nj).each do |v|
|
|
101
|
+
tfidf_shared += project.tf_idf(file_i,v)+project.tf_idf(file_j,v)
|
|
102
|
+
itfidf_shared += project.itf_idf(file_i,v)+project.itf_idf(file_j,v)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
perc_shared_length_i = context_ni.count==0 ? 0.0 : shared_length.to_f/context_ni.count.to_f
|
|
106
|
+
perc_shared_length_j = context_nj.count==0 ? 0.0 : shared_length.to_f/context_nj.count.to_f
|
|
107
|
+
perc_shared_length = [perc_shared_length_i,perc_shared_length_j]
|
|
108
|
+
perc_shared_length_min = (perc_shared_length[0]<perc_shared_length[1]) ? perc_shared_length[0] : perc_shared_length[1]
|
|
109
|
+
perc_shared_length_max = (perc_shared_length[0]<perc_shared_length[1]) ? perc_shared_length[1] : perc_shared_length[0]
|
|
110
|
+
diffs = [context_ni.count-shared_length,context_nj.count-shared_length]
|
|
111
|
+
diff_min = diffs.min
|
|
112
|
+
diff_max = diffs.max
|
|
113
|
+
perc_diff_i = context_ni.count==0 ? 0.0 : diffs[0].to_f/context_ni.count.to_f
|
|
114
|
+
perc_diff_j = context_nj.count==0 ? 0.0 : diffs[1].to_f/context_nj.count.to_f
|
|
115
|
+
perc_diffs = [perc_diff_i,perc_diff_j]
|
|
116
|
+
perc_diff_min = (perc_diffs[0]<perc_diffs[1]) ? perc_diffs[0] : perc_diffs[1]
|
|
117
|
+
perc_diff_max = (perc_diffs[0]<perc_diffs[1]) ? perc_diffs[1] : perc_diffs[0]
|
|
118
|
+
id_i = NodeId.from_node(ni)
|
|
119
|
+
id_j = NodeId.from_node(nj)
|
|
120
|
+
rel = CrossLanguageRelation.new([id_i,id_j])
|
|
121
|
+
|
|
122
|
+
jaccard = Jaccard.coefficient(context_ni,context_nj)
|
|
123
|
+
jaccard = 0.0 if jaccard.nan?
|
|
124
|
+
tversky = tversky_producer.tversky_coefficient(context_ni,context_nj)
|
|
125
|
+
tversky = 0.0 if tversky.nan?
|
|
126
|
+
context = context_points_map.points(Pair.new(ni,nj))
|
|
127
|
+
jaro = jaro_producer.jaro_coefficient_from_nodes(ni,nj)
|
|
128
|
+
jaro = 0.0 if jaro.nan?
|
|
129
|
+
|
|
130
|
+
results[rel] = {
|
|
131
|
+
node_a_file: ni.source.artifact(:absolute).filename,
|
|
132
|
+
node_a_begin_line: ni.source.position(:absolute).begin_line,
|
|
133
|
+
node_a_end_line: ni.source.position(:absolute).end_line,
|
|
134
|
+
node_a_begin_column: ni.source.position(:absolute).begin_column,
|
|
135
|
+
node_a_end_column: ni.source.position(:absolute).end_column,
|
|
136
|
+
|
|
137
|
+
node_b_file: nj.source.artifact(:absolute).filename,
|
|
138
|
+
node_b_begin_line: nj.source.position(:absolute).begin_line,
|
|
139
|
+
node_b_end_line: nj.source.position(:absolute).end_line,
|
|
140
|
+
node_b_begin_column: nj.source.position(:absolute).begin_column,
|
|
141
|
+
node_b_end_column: nj.source.position(:absolute).end_column,
|
|
142
|
+
|
|
143
|
+
shared_length:shared_length,
|
|
144
|
+
tfidf_shared:tfidf_shared,itfidf_shared:itfidf_shared,
|
|
145
|
+
perc_shared_length_min:perc_shared_length_min,
|
|
146
|
+
perc_shared_length_max:perc_shared_length_max,
|
|
147
|
+
diff_min:diff_min,diff_max:diff_max,
|
|
148
|
+
perc_diff_min:perc_diff_min,perc_diff_max:perc_diff_max,
|
|
149
|
+
context:context,jaccard:jaccard,jaro:jaro,tversky:tversky}
|
|
150
|
+
end
|
|
151
|
+
project.iter_over_shared_ids_instances {|ni,nj| block.call(ni,nj) }
|
|
152
|
+
return results
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
require 'crosslanguagespotter/model_loading'
|
|
2
|
+
require 'set'
|
|
3
|
+
|
|
4
|
+
module CrossLanguageSpotter
|
|
5
|
+
|
|
6
|
+
def collect_values_with_declarator(node)
|
|
7
|
+
declarators_per_value = Hash.new {|h,k| h[k]=[]}
|
|
8
|
+
self.class.ecore.eAllAttributes.each do |a|
|
|
9
|
+
v = self.send(:"#{a.name}")
|
|
10
|
+
if v!=nil
|
|
11
|
+
if a.many
|
|
12
|
+
v.each {|el| values[el]+=1}
|
|
13
|
+
else
|
|
14
|
+
values[v]+=1
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
values
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
class Context
|
|
22
|
+
|
|
23
|
+
attr_reader :sequence_of_values
|
|
24
|
+
|
|
25
|
+
def initialize
|
|
26
|
+
@map = Hash.new {|h,k| h[k]=[]}
|
|
27
|
+
@sequence_of_values = []
|
|
28
|
+
@register_sequence = []
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def values
|
|
32
|
+
@map.keys.select {|k| @map[k].count>0}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def count
|
|
36
|
+
values.count
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def has_value?(v)
|
|
40
|
+
values.include?(v)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def declarators_per_value(value)
|
|
44
|
+
@map[value]
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def sequence_of_values
|
|
48
|
+
@sequence_of_values
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def register(value,declarator)
|
|
52
|
+
@sequence_of_values << value
|
|
53
|
+
@map[value] << declarator unless @map[value].include?(declarator)
|
|
54
|
+
@register_sequence << {value:value, declarator:value}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def merge(other)
|
|
58
|
+
other.values.each do |v|
|
|
59
|
+
other.declarators_per_value(v).each do |d|
|
|
60
|
+
register(v,d)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def clone
|
|
66
|
+
new_instance = Context.new
|
|
67
|
+
@register_sequence.each do |r|
|
|
68
|
+
new_instance.register(r[:value],r[:declarator])
|
|
69
|
+
end
|
|
70
|
+
new_instance
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def intersection(values)
|
|
74
|
+
new_instance = self.clone
|
|
75
|
+
new_instance.intersection!(values)
|
|
76
|
+
new_instance
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def intersection!(values)
|
|
80
|
+
@map.keys.each do |k|
|
|
81
|
+
if values.is_a? Array
|
|
82
|
+
@map[k] = [] unless values.include?(k)
|
|
83
|
+
elsif values.is_a? Context
|
|
84
|
+
if values.has_value?(k)
|
|
85
|
+
@map[k].concat(values.declarators_per_value(k))
|
|
86
|
+
else
|
|
87
|
+
@map[k] = []
|
|
88
|
+
end
|
|
89
|
+
else
|
|
90
|
+
raise "error"
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
self
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def count
|
|
97
|
+
values.count
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def to_a
|
|
101
|
+
a = []
|
|
102
|
+
values.sort.each do |v|
|
|
103
|
+
a << {value:v,declarators:declarators_per_value(v)}
|
|
104
|
+
end
|
|
105
|
+
a
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def to_s
|
|
109
|
+
to_a.to_s
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def context(node)
|
|
115
|
+
ctx = Context.new
|
|
116
|
+
container = node.container_also_foreign
|
|
117
|
+
if container
|
|
118
|
+
ctx.merge(context(container))
|
|
119
|
+
|
|
120
|
+
# RGen attributes of the father
|
|
121
|
+
container.collect_values_with_count.keys.each do |value|
|
|
122
|
+
ctx.register(value,container)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# siblings in different containment reference
|
|
126
|
+
container.all_children_also_foreign.each do |sibling|
|
|
127
|
+
if (sibling.eContainingFeature!=node.eContainingFeature) || (node.eContainingFeature==nil && node!=sibling)
|
|
128
|
+
sibling.traverse(:also_foreign) do |n|
|
|
129
|
+
n.collect_values_with_count.keys.each do |value|
|
|
130
|
+
ctx.register(value,n)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
ctx
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
end
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
module CrossLanguageSpotter
|
|
2
|
+
|
|
3
|
+
class CrossLanguageReferencesProducer
|
|
4
|
+
|
|
5
|
+
def initialize(parameters)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
# It should produce a set of CrossLanguageRelation
|
|
9
|
+
def produce_set(project)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# It compare different methods, each methods can be instantiated
|
|
15
|
+
# different times using different parameters
|
|
16
|
+
class CrossLanguageReferencesProducerMethodsComparator
|
|
17
|
+
# map per class, per params of the figures obtained agains the given gold set
|
|
18
|
+
attr_reader :results
|
|
19
|
+
|
|
20
|
+
def initialize(gold_set,project)
|
|
21
|
+
@gold_set = gold_set
|
|
22
|
+
@results = Hash.new {|h,k| h[k]={}}
|
|
23
|
+
@project = project
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def add(clazz,parameters)
|
|
27
|
+
producer = clazz.new(parameters)
|
|
28
|
+
observed_set = producer.produce_set(@project)
|
|
29
|
+
fe = FiguresEvaluator.new(@gold_set,observed_set)
|
|
30
|
+
result = fe.all_figures
|
|
31
|
+
@results[clazz][parameters] = result
|
|
32
|
+
result
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
class NodeId
|
|
38
|
+
attr_reader :file
|
|
39
|
+
attr_reader :node_index
|
|
40
|
+
|
|
41
|
+
def index
|
|
42
|
+
@node_index
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def self.from_node(node)
|
|
46
|
+
new(node.source.artifact.final_host.filename,traverse_index(node))
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def initialize(file,node_index)
|
|
50
|
+
@file = file
|
|
51
|
+
@node_index = node_index
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def eql?(other)
|
|
55
|
+
return false unless other.is_a?(NodeId)
|
|
56
|
+
self.file.eql?(other.file) && self.node_index.eql?(other.node_index)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def ==(other)
|
|
60
|
+
return self.eql?(other)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def hash
|
|
64
|
+
@file.hash*7+@node_index.hash
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def <=>(other)
|
|
68
|
+
res = self.file <=> other.file
|
|
69
|
+
if res==0
|
|
70
|
+
self.node_index <=> other.node_index
|
|
71
|
+
else
|
|
72
|
+
res
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def to_s
|
|
77
|
+
"#{@file}:#{@node_index}"
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# It is a set of two node_ids (unordered)
|
|
83
|
+
class CrossLanguageRelation
|
|
84
|
+
attr_reader :node_ids
|
|
85
|
+
|
|
86
|
+
def initialize(node_ids)
|
|
87
|
+
raise "Two elements expected, #{node_ids.count} found" unless node_ids.count==2
|
|
88
|
+
node_id_a = node_ids[0]
|
|
89
|
+
node_id_b = node_ids[1]
|
|
90
|
+
if (node_id_a<=>node_id_b)<0
|
|
91
|
+
@node_ids = [node_id_a,node_id_b]
|
|
92
|
+
else
|
|
93
|
+
@node_ids = [node_id_b,node_id_a]
|
|
94
|
+
end
|
|
95
|
+
#puts "SORTING GAVE #{@node_ids}"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def eql?(other)
|
|
99
|
+
return false unless other.is_a?(CrossLanguageRelation)
|
|
100
|
+
self.node_ids.eql?(other.node_ids)
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def ==(other)
|
|
104
|
+
return self.eql?(other)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def hash
|
|
108
|
+
@node_ids[0].hash*7+@node_ids[1].hash
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def to_s
|
|
112
|
+
"CrossLanguageRelation #{@node_ids[0]} <-> #{@node_ids[1]}"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Calculates precision, recall, f-measure
|
|
118
|
+
class FiguresEvaluator
|
|
119
|
+
|
|
120
|
+
# Gold set is the "truth", observed is calculated from
|
|
121
|
+
# some method and compared with the gold set
|
|
122
|
+
def initialize(gold_set,observed_set)
|
|
123
|
+
@gold_set = gold_set
|
|
124
|
+
@observed_set = observed_set
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def precision
|
|
128
|
+
@precision = calc_precision unless @precision
|
|
129
|
+
@precision
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def recall
|
|
133
|
+
@recall = calc_recall unless @recall
|
|
134
|
+
@recall
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def f_measure(beta=1.0)
|
|
138
|
+
beta_square = beta**2.0
|
|
139
|
+
(2*(beta_square)*precision*recall)/(beta_square*precision+recall)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def all_figures(beta=1.0)
|
|
143
|
+
{precision:precision,recall:recall,f_measure:f_measure(beta),beta:beta}
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
private
|
|
147
|
+
|
|
148
|
+
def calc_precision
|
|
149
|
+
intersection_size = @gold_set.intersection(@observed_set).count.to_f
|
|
150
|
+
intersection_size/@observed_set.count.to_f
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def calc_recall
|
|
154
|
+
intersection_size = @gold_set.intersection(@observed_set).count.to_f
|
|
155
|
+
intersection_size/@gold_set.count.to_f
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
require "set"
|
|
2
|
+
|
|
3
|
+
# Helpers to calculate the Jaccard Coefficient Index and related metrics easily.
|
|
4
|
+
#
|
|
5
|
+
# (from Wikipedia): The Jaccard coefficient measures similarity between sample sets, and is defined
|
|
6
|
+
# as the size of the intersection divided by the size of the union of the sample sets.
|
|
7
|
+
#
|
|
8
|
+
# The closer to 1.0 this number is, the more similar two items are.
|
|
9
|
+
module Jaccard
|
|
10
|
+
# Calculates the Jaccard Coefficient Index.
|
|
11
|
+
#
|
|
12
|
+
# +a+ must implement the set intersection and set union operators: <code>#&</code> and <code>#+</code>. Array and Set
|
|
13
|
+
# both implement these methods natively. It is expected that the results of <code>+</code> will either return a
|
|
14
|
+
# unique set or that it returns an object that responds to +#uniq!+. The results of +#coefficient+ will be
|
|
15
|
+
# wrong if the union contains duplicate elements.
|
|
16
|
+
#
|
|
17
|
+
# Also note that the individual items in +a+ and +b+ must implement a sane #eql? method.
|
|
18
|
+
# ActiveRecord::Base, String, Fixnum (but not Float), Array and Hash instances all implement
|
|
19
|
+
# a correct notion of equality. Other instances might have to be checked to ensure correct
|
|
20
|
+
# behavior.
|
|
21
|
+
#
|
|
22
|
+
# @param [#&, #+] a A set of items
|
|
23
|
+
# @param [#&, #+] b A second set of items
|
|
24
|
+
#
|
|
25
|
+
# @return [Float] The Jaccard Coefficient Index between +a+ and +b+.
|
|
26
|
+
#
|
|
27
|
+
# @example
|
|
28
|
+
#
|
|
29
|
+
# a = [1, 2, 3, 4]
|
|
30
|
+
# b = [1, 3, 4]
|
|
31
|
+
# Jaccard.coefficient(a, b) #=> 0.75
|
|
32
|
+
#
|
|
33
|
+
# @see http://en.wikipedia.org/wiki/Jaccard_index Jaccard Coefficient Index on Wikipedia.
|
|
34
|
+
def self.coefficient(a, b)
|
|
35
|
+
raise ArgumentError, "#{a.inspect} does not implement #&" unless a.respond_to?(:&)
|
|
36
|
+
raise ArgumentError, "#{a.inspect} does not implement #+" unless a.respond_to?(:+)
|
|
37
|
+
|
|
38
|
+
intersection = a & b
|
|
39
|
+
union = a + b
|
|
40
|
+
|
|
41
|
+
# Set does not implement #uniq or #uniq! since elements are
|
|
42
|
+
# always guaranteed to be present only once. That's the only
|
|
43
|
+
# reason we need to guard against that here.
|
|
44
|
+
union.uniq! if union.respond_to?(:uniq!)
|
|
45
|
+
|
|
46
|
+
intersection.length.to_f / union.length.to_f
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Calculates the inverse of the Jaccard coefficient.
|
|
50
|
+
#
|
|
51
|
+
# The closer to 0.0 the distance is, the more similar two items are.
|
|
52
|
+
#
|
|
53
|
+
# @return [Float] <code>1.0 - #coefficient(a, b)</code>
|
|
54
|
+
#
|
|
55
|
+
# @see Jaccard#coefficient for parameter calling convention and caveats about Array vs Set vs other object types.
|
|
56
|
+
def self.distance(a, b)
|
|
57
|
+
1.0 - coefficient(a, b)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Determines which member of +others+ has the smallest distance vs +a+.
|
|
61
|
+
#
|
|
62
|
+
# Because of the implementation, if multiple items from +others+ have
|
|
63
|
+
# the same distance, the last one will be returned. If this is undesirable,
|
|
64
|
+
# reverse +others+ before calling #closest_to.
|
|
65
|
+
#
|
|
66
|
+
# @param [#&, #+] a A set of attributes
|
|
67
|
+
# @param [#inject] others A collection of set of attributes
|
|
68
|
+
#
|
|
69
|
+
# @return The item from +others+ with the distance minimized to 0.0.
|
|
70
|
+
#
|
|
71
|
+
# @example
|
|
72
|
+
#
|
|
73
|
+
# a = [1, 2, 3]
|
|
74
|
+
# b = [1, 3]
|
|
75
|
+
# c = [1, 2, 3]
|
|
76
|
+
# Jaccard.closest_to(b, [a, c]) #=> [1, 2, 3]
|
|
77
|
+
# # Note that the actual instance returned will be c
|
|
78
|
+
def self.closest_to(a, others)
|
|
79
|
+
others.inject([2.0, nil]) do |memo, other|
|
|
80
|
+
dist = distance(a, other)
|
|
81
|
+
next memo if memo.first < dist
|
|
82
|
+
|
|
83
|
+
[dist, other]
|
|
84
|
+
end.last
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Returns the pair of items whose distance is minimized.
|
|
88
|
+
#
|
|
89
|
+
# @param [#each] items A collection of attributes.
|
|
90
|
+
#
|
|
91
|
+
# @return [Array<a, b>] A pair of set of attributes whose Jaccard distance is the minimal, given the input set.
|
|
92
|
+
#
|
|
93
|
+
# @example
|
|
94
|
+
#
|
|
95
|
+
# a = [1, 2, 3]
|
|
96
|
+
# b = [1, 2]
|
|
97
|
+
# c = [1, 3]
|
|
98
|
+
# Jaccard.best_match([a, b, c]) #=> [[1, 2, 3], [1, 2]]
|
|
99
|
+
def self.best_match(items)
|
|
100
|
+
seen = Set.new
|
|
101
|
+
matches = []
|
|
102
|
+
|
|
103
|
+
items.each do |row|
|
|
104
|
+
items.each do |col|
|
|
105
|
+
next if row == col
|
|
106
|
+
next if seen.include?([row, col]) || seen.include?([col, row])
|
|
107
|
+
seen << [row, col]
|
|
108
|
+
matches << [distance(row, col), [row, col]]
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
matches.sort.first.last
|
|
113
|
+
end
|
|
114
|
+
end
|