crosslanguagespotter 0.0.2-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/Gemfile +3 -0
  4. data/Rakefile +13 -0
  5. data/crosslanguagespotter.gemspec +36 -0
  6. data/examples/ex1.rb +13 -0
  7. data/examples/services_example.rb +13 -0
  8. data/lib/crosslanguagespotter/basic.rb +157 -0
  9. data/lib/crosslanguagespotter/context.rb +139 -0
  10. data/lib/crosslanguagespotter/figures_evaluator.rb +160 -0
  11. data/lib/crosslanguagespotter/jaccard.rb +114 -0
  12. data/lib/crosslanguagespotter/methods/context.rb +127 -0
  13. data/lib/crosslanguagespotter/methods/jaro.rb +118 -0
  14. data/lib/crosslanguagespotter/methods/tversky.rb +44 -0
  15. data/lib/crosslanguagespotter/model_loading.rb +333 -0
  16. data/lib/crosslanguagespotter/oracle.rb +261 -0
  17. data/lib/crosslanguagespotter/report.rb +88 -0
  18. data/lib/crosslanguagespotter/version.rb +5 -0
  19. data/lib/crosslanguagespotter/wekaintegration.rb +83 -0
  20. data/lib/crosslanguagespotter.rb +7 -0
  21. data/lib/jars/weka.jar +0 -0
  22. data/resources/css/bootstrap-theme.css +346 -0
  23. data/resources/css/bootstrap-theme.min.css +7 -0
  24. data/resources/css/bootstrap.css +5780 -0
  25. data/resources/css/bootstrap.min.css +7 -0
  26. data/resources/css/highlightstyles/arta.css +160 -0
  27. data/resources/css/highlightstyles/ascetic.css +50 -0
  28. data/resources/css/highlightstyles/atelier-dune.dark.css +93 -0
  29. data/resources/css/highlightstyles/atelier-dune.light.css +93 -0
  30. data/resources/css/highlightstyles/atelier-forest.dark.css +93 -0
  31. data/resources/css/highlightstyles/atelier-forest.light.css +93 -0
  32. data/resources/css/highlightstyles/atelier-heath.dark.css +93 -0
  33. data/resources/css/highlightstyles/atelier-heath.light.css +93 -0
  34. data/resources/css/highlightstyles/atelier-lakeside.dark.css +93 -0
  35. data/resources/css/highlightstyles/atelier-lakeside.light.css +93 -0
  36. data/resources/css/highlightstyles/atelier-seaside.dark.css +93 -0
  37. data/resources/css/highlightstyles/atelier-seaside.light.css +93 -0
  38. data/resources/css/highlightstyles/brown_paper.css +105 -0
  39. data/resources/css/highlightstyles/brown_papersq.png +0 -0
  40. data/resources/css/highlightstyles/dark.css +105 -0
  41. data/resources/css/highlightstyles/default.css +153 -0
  42. data/resources/css/highlightstyles/docco.css +132 -0
  43. data/resources/css/highlightstyles/far.css +113 -0
  44. data/resources/css/highlightstyles/foundation.css +133 -0
  45. data/resources/css/highlightstyles/github.css +125 -0
  46. data/resources/css/highlightstyles/googlecode.css +147 -0
  47. data/resources/css/highlightstyles/idea.css +122 -0
  48. data/resources/css/highlightstyles/ir_black.css +105 -0
  49. data/resources/css/highlightstyles/magula.css +123 -0
  50. data/resources/css/highlightstyles/mono-blue.css +62 -0
  51. data/resources/css/highlightstyles/monokai.css +127 -0
  52. data/resources/css/highlightstyles/monokai_sublime.css +149 -0
  53. data/resources/css/highlightstyles/obsidian.css +154 -0
  54. data/resources/css/highlightstyles/paraiso.dark.css +93 -0
  55. data/resources/css/highlightstyles/paraiso.light.css +93 -0
  56. data/resources/css/highlightstyles/pojoaque.css +106 -0
  57. data/resources/css/highlightstyles/pojoaque.jpg +0 -0
  58. data/resources/css/highlightstyles/railscasts.css +182 -0
  59. data/resources/css/highlightstyles/rainbow.css +112 -0
  60. data/resources/css/highlightstyles/school_book.css +113 -0
  61. data/resources/css/highlightstyles/school_book.png +0 -0
  62. data/resources/css/highlightstyles/solarized_dark.css +107 -0
  63. data/resources/css/highlightstyles/solarized_light.css +107 -0
  64. data/resources/css/highlightstyles/sunburst.css +160 -0
  65. data/resources/css/highlightstyles/tomorrow-night-blue.css +93 -0
  66. data/resources/css/highlightstyles/tomorrow-night-bright.css +92 -0
  67. data/resources/css/highlightstyles/tomorrow-night-eighties.css +92 -0
  68. data/resources/css/highlightstyles/tomorrow-night.css +93 -0
  69. data/resources/css/highlightstyles/tomorrow.css +90 -0
  70. data/resources/css/highlightstyles/vs.css +89 -0
  71. data/resources/css/highlightstyles/xcode.css +158 -0
  72. data/resources/css/highlightstyles/zenburn.css +117 -0
  73. data/resources/example.html +1501 -0
  74. data/resources/js/bootstrap.js +1943 -0
  75. data/resources/js/bootstrap.min.js +7 -0
  76. data/resources/js/highlight.pack.js +1 -0
  77. data/resources/services_example.html +141 -0
  78. data/resources/template.html +61 -0
  79. data/test/data/angular-puzzle.GS +111 -0
  80. data/test/data/angular_puzzle/app.js +66 -0
  81. data/test/data/angular_puzzle/index.html +67 -0
  82. data/test/data/angular_puzzle/slidingPuzzle.js +203 -0
  83. data/test/data/angular_puzzle/wordSearchPuzzle.js +270 -0
  84. data/test/data/example.html +5 -0
  85. data/test/data/example.js +4 -0
  86. data/test/data/services/index.html +33 -0
  87. data/test/data/services/script.js +15 -0
  88. data/test/test_helper.rb +9 -0
  89. data/test/test_parsing.rb +23 -0
  90. data/test/test_spotter.rb +42 -0
  91. data/test/test_wekaintegration.rb +43 -0
  92. metadata +328 -0
@@ -0,0 +1,127 @@
1
+ require 'set'
2
+ require 'crosslanguagespotter/figures_evaluator'
3
+
4
+ module CrossLanguageSpotter
5
+
6
+ class Pair
7
+
8
+ def initialize(a,b)
9
+ raise "error" unless a.source.position(:absolute)
10
+ raise "error" unless b.source.position(:absolute)
11
+ if b.source.artifact(:absolute).filename < a.source.artifact(:absolute).filename
12
+ @nodes = [b,a]
13
+ else
14
+ @nodes = [a,b]
15
+ end
16
+ end
17
+
18
+ def first
19
+ nodes[0]
20
+ end
21
+
22
+ def second
23
+ nodes[1]
24
+ end
25
+
26
+ def nodes
27
+ @nodes
28
+ end
29
+
30
+ def eql?(other)
31
+ return false unless other.is_a?(Pair)
32
+ self.nodes[0]==other.nodes[0] && self.nodes[1]==other.nodes[1]
33
+ end
34
+
35
+ def ==(other)
36
+ self.eql?(other)
37
+ end
38
+
39
+ def to_s
40
+ "[#{nodes[0]} <-> #{nodes[1]}]"
41
+ end
42
+
43
+ def hash
44
+ nodes[0].hash*3+nodes[1].hash
45
+ end
46
+
47
+ end
48
+
49
+ class PointsMap
50
+
51
+ def initialize(alpha)
52
+ @alpha = alpha
53
+ @points = Hash.new {|h,k| h[k]=0.0}
54
+ end
55
+
56
+ def points(pair)
57
+ @points[pair]
58
+ end
59
+
60
+ def register_context_contribute(pair,value)
61
+ @points[pair] += 0.2*@alpha*value.to_f
62
+ end
63
+
64
+ def register_child_contribute(pair)
65
+ @points[pair] += 0.1
66
+ end
67
+
68
+ def each(threshold, &block)
69
+ @points.select{|k,v| v>=threshold}.each(&block)
70
+ end
71
+
72
+ end
73
+
74
+ class ContextReferencesProducer
75
+
76
+ attr_accessor :verbose
77
+
78
+ def initialize(parameters)
79
+ @threshold = parameters[:threshold]
80
+ @verbose = parameters[:verbose]
81
+ @alpha = parameters[:alpha]
82
+ end
83
+
84
+ def points_map(project)
85
+ # fill points map
86
+ points_map = PointsMap.new(@alpha)
87
+ block1 = Proc.new do |ni,nj|
88
+ context_ni = context(ni)
89
+ context_nj = context(nj)
90
+ shared_ctx = context_nj.intersection(context_ni).to_a
91
+ shared_ctx.each do |shared_ctx_entry|
92
+ v = shared_ctx_entry[:value]
93
+ context_ni.declarators_per_value(v).each do |di|
94
+ context_nj.declarators_per_value(v).each do |dj|
95
+ points_map.register_child_contribute(Pair.new(di,dj))
96
+ end
97
+ end
98
+ end
99
+ points_map.register_context_contribute(Pair.new(ni,nj),shared_ctx.count)
100
+ end
101
+ project.iter_over_shared_ids_instances {|ni,nj| block1.call(ni,nj) }
102
+ points_map
103
+ end
104
+
105
+ # It should produce a set of node ids
106
+ def produce_set(project)
107
+ set = Set.new
108
+ puts "Context method:" if @verbose
109
+
110
+ points_map = points_map(project)
111
+
112
+ # look into points map
113
+ points_map.each(@threshold).each do |pair,value|
114
+ f = pair.first
115
+ s = pair.second
116
+ id_i = NodeId.from_node(f)
117
+ id_j = NodeId.from_node(s)
118
+ set << CrossLanguageRelation.new([id_i,id_j])
119
+ end
120
+
121
+ puts "Context method, set produced: #{set.count} elements" if @verbose
122
+ set
123
+ end
124
+
125
+ end
126
+
127
+ end
@@ -0,0 +1,118 @@
1
+ require 'set'
2
+ require 'crosslanguagespotter/figures_evaluator'
3
+
4
+ class Array
5
+ # select array items with index
6
+ # give a block both the item with index of array
7
+ # filtered by a select statement
8
+ def select_with_index
9
+ index = -1
10
+ select { |x| index += 1; yield(x, index) }
11
+ end
12
+
13
+ # return indices array of array item
14
+ # example all indices of a in string "aaabaaabba"
15
+ def aindices(o)
16
+ out = Array.new
17
+ select_with_index { |x, i|
18
+ out << i if x == o }
19
+ out
20
+ end
21
+ end
22
+
23
+ module CrossLanguageSpotter
24
+
25
+ class JaroReferencesProducer
26
+
27
+ attr_accessor :verbose
28
+
29
+ def initialize(parameters)
30
+ @threshold = parameters[:threshold]
31
+ @verbose = parameters[:verbose]
32
+ @winkleradjust = parameters[:winkleradjust]
33
+ end
34
+
35
+ # It should produce a set of node ids
36
+ def produce_set(project)
37
+ set = Set.new
38
+ puts "Jaro method:" if @verbose
39
+
40
+ block1 = Proc.new do |ni,nj|
41
+ context_ni = context(ni).sequence_of_values.map{|v| v.to_s}
42
+ context_nj = context(nj).sequence_of_values.map{|v| v.to_s}
43
+ if jaro_coefficient(context_ni,context_nj)>@threshold
44
+ id_i = NodeId.from_node(ni)
45
+ id_j = NodeId.from_node(nj)
46
+ set << CrossLanguageRelation.new([id_i,id_j])
47
+ end
48
+ end
49
+ project.iter_over_shared_ids_instances {|ni,nj| block1.call(ni,nj) }
50
+ puts "Jaro method, set produced: #{set.count} elements" if @verbose
51
+ set
52
+ end
53
+
54
+ def jaro_coefficient_from_nodes(ni,nj)
55
+ jaro_coefficient_from_context(context(ni),context(nj))
56
+ end
57
+
58
+ def jaro_coefficient_from_context(context_ni,context_nj)
59
+ s1 = context_ni.sequence_of_values.map{|v| v.to_s}
60
+ s2 = context_nj.sequence_of_values.map{|v| v.to_s}
61
+ jaro_coefficient(s1,s2)
62
+ end
63
+
64
+ def jaro_coefficient(s1,s2)
65
+ # if strings (without trailing & leadning spaces) are equal - return 1
66
+ #return 1 if str1.strip==str2.strip
67
+ # either string blank - return 0
68
+ #return 0 if str1.size==0 or str2.size==0
69
+ m = 0 # number of matching chars
70
+ tr = 0 # number of transpositions
71
+
72
+ # get character array length
73
+ s1l = s1.length
74
+ s2l = s2.length
75
+ # str2 should be the longer string
76
+ if s1l > s2l
77
+ s1, s2 = s2, s1
78
+ end
79
+ # hash from all unique str2 chars + occurances
80
+ # example 'aba': hash={ a => 0, b => 0 } a: first occurance, b first occurance
81
+ # if the first a was visited: { a => 1, b => 0} a: second occuance, b second occurance
82
+ found = Hash[*s2.uniq.sort.collect {|v| [v,0]}.flatten]
83
+ # matching distance definition
84
+ md = (([s1l,s2l].max / 2) - 1).to_i
85
+ s1.each_with_index do |c,i|
86
+ # find number of matching chars
87
+ if !found[c].nil? # character exists in str2
88
+ # calculates distance between 2 matching characters compare with md
89
+ if !s2.aindices(c)[found[c]].nil?
90
+ x = (s2.aindices(c)[found[c]] - i).abs
91
+ if x <= md
92
+ found[c] += 1 # increase occurance of character
93
+ m += 1 # increase number of matching characters
94
+ # transpositions?
95
+ if (x != 0)
96
+ tr += 1
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
102
+ tr = (tr/2).to_i
103
+ # calc jaro-distance
104
+ third = 1.0/3
105
+ jd = (third * m / s1l) + (third * m / s2l) + (third * (m - tr) / m)
106
+ out = jd
107
+ # winkleradjust? if first l characters are the same
108
+ if @winkleradjust
109
+ l = 0
110
+ (0..s1l-1).each { |i| s1[i]==s2[i] ? l+=1 : break }
111
+ out = jd + (l * 0.1 * (1 - jd))
112
+ end
113
+ out
114
+ end
115
+
116
+ end
117
+
118
+ end
@@ -0,0 +1,44 @@
1
+ require 'set'
2
+ require 'crosslanguagespotter/jaccard'
3
+ require 'crosslanguagespotter/figures_evaluator'
4
+
5
+ module CrossLanguageSpotter
6
+
7
+ class TverskyReferencesProducer
8
+
9
+ attr_accessor :verbose
10
+
11
+ def initialize(parameters)
12
+ @threshold = parameters[:threshold]
13
+ @alpha = parameters[:alpha]
14
+ end
15
+
16
+ # It should produce a set of node ids
17
+ def produce_set(project)
18
+ set = Set.new
19
+ puts "Tversky method:" if @verbose
20
+ block = Proc.new do |ni,nj|
21
+ context_ni = context(ni).values & project.shared_ids
22
+ context_nj = context(nj).values & project.shared_ids
23
+ j = tversky_coefficient(context_ni,context_nj)
24
+ if j>=@threshold
25
+ id_i = NodeId.from_node(ni)
26
+ id_j = NodeId.from_node(nj)
27
+ puts " * '#{id_i.file}':#{id_i.index} -> '#{id_j.file}':#{id_j.index}" if @verbose
28
+ set << CrossLanguageRelation.new([id_i,id_j])
29
+ end
30
+ end
31
+ project.iter_over_shared_ids_instances {|ni,nj| block.call(ni,nj) }
32
+ puts "Tversky method, set produced: #{set.count} elements" if @verbose
33
+ set
34
+ end
35
+
36
+ def tversky_coefficient(context_ni,context_nj)
37
+ shared = context_ni & context_nj
38
+ others = (context_ni.count-shared.count)+(context_nj.count-shared.count)
39
+ shared.count.to_f/(shared.count.to_f+@alpha*others.to_f)
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -0,0 +1,333 @@
1
+ require "codemodels"
2
+ require "codemodels/html"
3
+ require "codemodels/js"
4
+ require "crosslanguagespotter/context"
5
+
6
+ module CrossLanguageSpotter
7
+
8
+ AngularParser = CodeModels::Html::AngularJs.parser_considering_angular_embedded_code
9
+
10
+
11
+ def traverse_index(node)
12
+ root = node.root(:also_foreign)
13
+ i = 1
14
+ root.traverse(:also_foreign) do |n|
15
+ return i if (n==node) && (n.source.position(:absolute)==node.source.position(:absolute))
16
+ i+=1
17
+ end
18
+ raise "Error..."
19
+ end
20
+
21
+ def node_at_traverse_index(root,index)
22
+ i = 1
23
+ root.traverse(:also_foreign) do |n|
24
+ return n if (i==index)
25
+ i+=1
26
+ end
27
+ raise "Error... traverse_index: #{index}. Reached #{i}"
28
+ end
29
+
30
+
31
+ def offset_referred_to_host(node)
32
+ base = node.eContainer ? offset_referred_to_host(node.eContainer) : 0
33
+ if node.eContainingFeature && node.eContainingFeature==:foreign_asts
34
+ base+node.eContainer.source.begin_pos.line-1
35
+ else
36
+ base
37
+ end
38
+ end
39
+
40
+ def line_referred_to_host(node,line)
41
+ offset_referred_to_host(node)+line
42
+ end
43
+
44
+ def host_lines(node)
45
+ [line_referred_to_host(node,node.source.begin_line),
46
+ line_referred_to_host(node,node.source.end_line)]
47
+ end
48
+
49
+ def is_in_line?(node,line)
50
+ line>=line_referred_to_host(node,node.source.begin_pos.line) && line<=line_referred_to_host(node,node.source.end_pos.line)
51
+ end
52
+
53
+ class ModelLoader
54
+
55
+ def initialize
56
+ @models = Hash.new do |h,k|
57
+ h[k] = load_model(k)
58
+ end
59
+ end
60
+
61
+ def model(path)
62
+ @models[path]
63
+ end
64
+
65
+ private
66
+
67
+ def load_model(relpath)
68
+ complete_path =relpath
69
+ raise "Unexisting file #{complete_path}" unless File.exist?(complete_path)
70
+ if relpath.end_with?'.html'
71
+ model = AngularParser.parse_file(complete_path)
72
+ elsif relpath.end_with?'.js'
73
+ model = CodeModels.parse_file(complete_path)
74
+ else
75
+ raise "I don't know what to do with: #{complete_path}"
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+ class SequentialAst
82
+
83
+ attr_reader :node
84
+ attr_reader :value
85
+ attr_reader :reference_labels
86
+
87
+ def initialize(node,value,reference_labels)
88
+ @node = node
89
+ @value = value
90
+ @reference_labels = reference_labels
91
+ end
92
+
93
+ end
94
+
95
+ class Project
96
+
97
+ def initialize(src,verbose=false)
98
+ @models = {}
99
+ @verbose = verbose
100
+ load_all_models(src)
101
+ @values_map_per_file = {}
102
+ @value_frequencies = Hash.new {|h,k| h[k]={} }
103
+ @idf = {}
104
+ end
105
+
106
+ def models
107
+ @models
108
+ end
109
+
110
+ def sequential_asts
111
+ sequential_asts = []
112
+ @nodes_per_file.each do |f,nodes|
113
+ nodes.each do |n|
114
+ attribute_values_of_n = n.collect_values_with_count.keys
115
+ context = context(n) if attribute_values_of_n.count > 0
116
+ attribute_values_of_n.each do |v|
117
+ sequential_asts << SequentialAst.new(n,v,context.values)
118
+ end
119
+ end
120
+ end
121
+ sequential_asts
122
+ end
123
+
124
+ def shared_ids
125
+ @shared_ids = calc_shared_ids unless @shared_ids
126
+ @shared_ids
127
+ end
128
+
129
+ def files
130
+ files = Set.new
131
+ @files_per_values.values.each do |fs|
132
+ fs.each {|f| files<<f}
133
+ end
134
+ files
135
+ end
136
+
137
+ def iter_over_shared_ids_instances(&block)
138
+ shared_ids.each do |v|
139
+ iter_value_for_all_extensions(v,&block)
140
+ end
141
+ end
142
+
143
+ def iter_over_shared_ids_instances_case_insensitive(&block)
144
+ shared_ids.each do |v|
145
+ iter_value_for_all_extensions_case_insensitive(v,&block)
146
+ end
147
+ end
148
+
149
+ def iter_value_for_all_extensions(v,&block)
150
+ extensions = []
151
+ @files_per_values[v].each do |el|
152
+ ext = File.extname(el)
153
+ extensions << ext unless extensions.include?(ext)
154
+ end
155
+ for i in 0...extensions.count
156
+ ext_i = extensions[i]
157
+ for j in (i+1)...extensions.count
158
+ ext_j = extensions[j]
159
+ iter_value_in_extensions(v,ext_i,ext_j,&block)
160
+ end
161
+ end
162
+ end
163
+
164
+ def iter_value_for_all_extensions_case_insensitive(v,&block)
165
+ extensions = []
166
+ @files_per_values[v].each do |el|
167
+ ext = File.extname(el)
168
+ extensions << ext unless extensions.include?(ext)
169
+ end
170
+ for i in 0...extensions.count
171
+ ext_i = extensions[i]
172
+ for j in (i+1)...extensions.count
173
+ ext_j = extensions[j]
174
+ iter_value_in_extensions_case_insensitive(v,ext_i,ext_j,&block)
175
+ end
176
+ end
177
+ end
178
+
179
+ def iter_value_in_extensions(v,ext_i,ext_j,&block)
180
+ raise "Error" if ext_i==ext_j
181
+ files_i = []
182
+ files_j = []
183
+ vs = [v]
184
+ vs.each do |v_el|
185
+ @files_per_values[v_el].each do |el|
186
+ files_i << el if File.extname(el)==ext_i
187
+ files_j << el if File.extname(el)==ext_j
188
+ end
189
+ end
190
+ nodes_i = []
191
+ nodes_j = []
192
+ files_i.each do |f|
193
+ vs.each do |v_el|
194
+ @nodes_per_value_and_file_map[v_el][f].each {|n| nodes_i << n}
195
+ end
196
+ end
197
+ files_j.each do |f|
198
+ vs.each do |v_el|
199
+ @nodes_per_value_and_file_map[v_el][f].each {|n| nodes_j << n}
200
+ end
201
+ end
202
+ for ni in nodes_i
203
+ for nj in nodes_j
204
+ block.call(ni,nj)
205
+ end
206
+ end
207
+ end
208
+
209
+ def iter_value_in_extensions_case_insensitive(v,ext_i,ext_j,&block)
210
+ raise "Error" if ext_i==ext_j
211
+ files_i = []
212
+ files_j = []
213
+ vs = values_case_insensitve(v)
214
+ vs.each do |v_el|
215
+ @files_per_values[v_el].each do |el|
216
+ files_i << el if File.extname(el)==ext_i
217
+ files_j << el if File.extname(el)==ext_j
218
+ end
219
+ end
220
+ nodes_i = []
221
+ nodes_j = []
222
+ files_i.each do |f|
223
+ vs.each do |v_el|
224
+ @nodes_per_value_and_file_map[v_el][f].each {|n| nodes_i << n}
225
+ end
226
+ end
227
+ files_j.each do |f|
228
+ vs.each do |v_el|
229
+ @nodes_per_value_and_file_map[v_el][f].each {|n| nodes_j << n}
230
+ end
231
+ end
232
+ for ni in nodes_i
233
+ for nj in nodes_j
234
+ block.call(ni,nj)
235
+ end
236
+ end
237
+ end
238
+
239
+ def tf_idf(file,value)
240
+ value_frequency(file,value)*idf(value)
241
+ end
242
+
243
+ def itf_idf(file,value)
244
+ itf(file,value)*idf(value)
245
+ end
246
+
247
+ private
248
+
249
+ def idf(value)
250
+ unless @idf[value]
251
+ pos = 0
252
+ neg = 0
253
+ files.each do |f|
254
+ values_per_file(f).has_key?(value) ? pos+=1 : neg+=1
255
+ end
256
+ @idf[value] = Math.log((pos+neg).to_f/pos.to_f)
257
+ end
258
+ @idf[value]
259
+ end
260
+
261
+ def itf(file,value)
262
+ Math.log(1.0/value_frequency(file,value))
263
+ end
264
+
265
+ def value_frequency(file,value)
266
+ unless @value_frequencies[file][value]
267
+ values_map = values_per_file(file)
268
+ total = values_map.values.inject(:+)
269
+ @value_frequencies[file][value] = values_map[value].to_f/total.to_f
270
+ end
271
+ @value_frequencies[file][value]
272
+ end
273
+
274
+ def values_per_file(file)
275
+ unless @values_map_per_file[file]
276
+ @values_map_per_file[file] = @ml.model(file).collect_values_with_count_subtree(:also_foreign)
277
+ end
278
+ @values_map_per_file[file]
279
+ end
280
+
281
+ def values_case_insensitve(v)
282
+ @files_per_values.keys.select {|el| el.to_s.downcase==v.to_s.downcase}
283
+ end
284
+
285
+ def calc_shared_ids
286
+ shared = []
287
+ @files_per_values.each do |v,s|
288
+ extensions = []
289
+ s.each do |el|
290
+ ext = File.extname(el)
291
+ extensions << ext unless extensions.include?(ext)
292
+ end
293
+ if extensions.count>1
294
+ shared << v
295
+ end
296
+ end
297
+ shared
298
+ end
299
+
300
+ def load_all_models(src)
301
+ @ml = ModelLoader.new
302
+ @nodes_per_file = Hash.new {|h,k| h[k] = []}
303
+ @files_per_values = Hash.new {|h,k| h[k] = Set.new}
304
+
305
+ # nodes per value, file
306
+ @nodes_per_value_and_file_map = Hash.new {|h,k| h[k] = Hash.new {|h,k| h[k] = [] }}
307
+
308
+ Dir["#{src}/**/*.html"].each do |f|
309
+ puts "Loading model from #{f}" if @verbose
310
+ load_model_from_file(f)
311
+ end
312
+ Dir["#{src}/**/*.js"].each do |f|
313
+ puts "Loading model from #{f}" if @verbose
314
+ load_model_from_file(f)
315
+ end
316
+ end
317
+
318
+ def load_model_from_file(f)
319
+ m = @ml.model(f)
320
+ @models[f]=m
321
+ m.traverse_also_foreign do |n|
322
+ @nodes_per_file[f] << n
323
+ values = n.collect_values_with_count.keys
324
+ values.each do |v|
325
+ @files_per_values[v] << f
326
+ @nodes_per_value_and_file_map[v][f] << n
327
+ end
328
+ end
329
+ end
330
+
331
+ end
332
+
333
+ end