scrappy 0.3.5 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/Rakefile +1 -1
- data/bin/scrappy +75 -17
- data/lib/scrappy.rb +1 -1
- data/lib/scrappy/extractor/extractor.rb +11 -8
- data/lib/scrappy/extractor/fragment.rb +1 -1
- data/lib/scrappy/extractor/selector.rb +6 -2
- data/lib/scrappy/extractor/selectors/uri_pattern.rb +1 -1
- data/lib/scrappy/extractor/selectors/visual.rb +66 -52
- data/lib/scrappy/learning/optimizer.rb +355 -107
- data/lib/scrappy/learning/trainer.rb +112 -40
- data/lib/scrappy/server/admin.rb +180 -17
- data/lib/scrappy/support.rb +0 -24
- data/public/javascripts/annotator.js +1 -1
- data/public/stylesheets/application.css +33 -0
- data/scrappy.gemspec +5 -5
- data/views/help.haml +1 -2
- data/views/layout.haml +1 -0
- data/views/patterns.haml +10 -5
- data/views/samples.haml +46 -22
- metadata +6 -6
@@ -3,43 +3,79 @@ module Scrappy
|
|
3
3
|
# Generates visual patterns
|
4
4
|
def train *samples
|
5
5
|
RDF::Graph.new( samples.inject([]) do |triples, sample|
|
6
|
-
triples + train_sample(sample).triples
|
6
|
+
triples + train_sample( sample ).triples
|
7
7
|
end )
|
8
8
|
end
|
9
|
-
|
9
|
+
|
10
|
+
# Generate XPath fragments
|
11
|
+
def train_xpath *samples
|
12
|
+
RDF::Graph.new( samples.inject([]) do |triples, sample|
|
13
|
+
triples + train_sample(sample, true).triples
|
14
|
+
end )
|
15
|
+
end
|
16
|
+
|
10
17
|
private
|
11
|
-
def train_sample sample
|
12
|
-
|
18
|
+
def train_sample sample, xpath=false
|
19
|
+
sample = sample.merge(:content=>Nokogiri::HTML(sample[:html], nil, 'utf-8'))
|
20
|
+
results = RDF::Graph.new extract(sample[:uri], sample[:html], xpath ? Scrappy::Kb.patterns : Scrappy::Kb.extractors, :minimum)
|
13
21
|
|
14
22
|
typed_nodes = results.find(nil, Node("rdf:type"), [])
|
15
23
|
non_root_nodes = results.find([], [], nil)
|
16
24
|
|
17
25
|
nodes = typed_nodes - non_root_nodes
|
18
|
-
|
26
|
+
|
27
|
+
superfragment = Node(nil)
|
28
|
+
selector = Node(nil)
|
29
|
+
identifier = Node(nil)
|
30
|
+
selector.rdf::type = Node('sc:UriSelector')
|
31
|
+
selector.rdf::value = sample[:uri]
|
32
|
+
identifier.rdf::type = Node('sc:BaseUriSelector')
|
33
|
+
superfragment.rdf::type = Node('sc:Fragment')
|
34
|
+
superfragment.sc::selector = selector
|
35
|
+
superfragment.sc::identifier = identifier
|
36
|
+
superfragment.graph << selector
|
37
|
+
superfragment.graph << identifier
|
38
|
+
|
19
39
|
RDF::Graph.new( nodes.inject([]) do |triples, node|
|
20
|
-
|
21
|
-
|
40
|
+
fragment = fragment_for(node, sample, xpath)
|
41
|
+
# Include a superfragment that limits the fragment to a specified URI
|
42
|
+
if xpath
|
43
|
+
other_triples = [ [superfragment.id, ID('sc:subfragment'), fragment.id] ]
|
44
|
+
else
|
45
|
+
other_triples = []
|
46
|
+
end
|
47
|
+
|
48
|
+
triples + fragment.graph.triples + other_triples
|
49
|
+
end + (xpath ? superfragment.graph.triples : []) )
|
22
50
|
end
|
23
51
|
|
24
|
-
def fragment_for node, parent=nil
|
52
|
+
def fragment_for node, sample, xpath, parent=nil, parent_path=nil
|
25
53
|
fragment = Node(nil)
|
54
|
+
node_path = node.sc::source.first.sc::selector.first.sc::path.first
|
26
55
|
node.keys.each do |predicate|
|
27
56
|
case predicate
|
28
57
|
when ID("sc:source") then
|
29
|
-
selector = selector_for(node.sc::source.first, parent)
|
58
|
+
selector = selector_for(node.sc::source.first, sample, xpath, parent, parent_path)
|
30
59
|
fragment.graph << selector
|
31
60
|
fragment.sc::selector = selector
|
32
61
|
when ID("sc:uri") then
|
33
|
-
selector = selector_for(node.sc::uri.first.sc::source.first, node)
|
62
|
+
selector = selector_for(node.sc::uri.first.sc::source.first, sample, xpath, node, node_path)
|
34
63
|
fragment.graph << selector
|
35
64
|
fragment.sc::identifier = selector
|
36
65
|
when ID("rdf:type") then
|
37
66
|
fragment.sc::type = node.rdf::type
|
38
67
|
else
|
39
68
|
if node[predicate].map(&:class).uniq.first == RDF::Node
|
69
|
+
done = []
|
40
70
|
node[predicate].map do |subnode|
|
41
|
-
|
71
|
+
selector = subnode.sc::source.first.sc::selector.first
|
72
|
+
next if done.include?( {}.merge(selector) )
|
73
|
+
done << {}.merge(selector)
|
74
|
+
|
75
|
+
subfragment = fragment_for(subnode, sample, xpath, node, node_path)
|
42
76
|
subfragment.sc::relation = Node(predicate)
|
77
|
+
subfragment.sc::min_cardinality = "1"
|
78
|
+
subfragment.sc::max_cardinality = "1"
|
43
79
|
|
44
80
|
fragment.graph << subfragment
|
45
81
|
fragment.sc::subfragment += [subfragment]
|
@@ -48,47 +84,83 @@ module Scrappy
|
|
48
84
|
end
|
49
85
|
end
|
50
86
|
fragment.rdf::type = Node("sc:Fragment")
|
51
|
-
|
52
|
-
fragment.sc::max_cardinality = "1"
|
87
|
+
|
53
88
|
fragment
|
54
89
|
end
|
55
90
|
|
56
|
-
def selector_for fragment, parent=nil
|
91
|
+
def selector_for fragment, sample, xpath=false, parent=nil, parent_path=nil
|
57
92
|
fragment_selector = fragment.sc::selector.first
|
58
93
|
presentation = fragment.sc::presentation.first
|
59
94
|
|
60
95
|
selector = Node(nil)
|
61
|
-
selector.rdf::type = Node("sc:VisualSelector")
|
62
|
-
|
63
|
-
origin_x = parent ? parent.sc::source.first.sc::presentation.first.sc::x.first.to_i : 0
|
64
|
-
origin_y = parent ? parent.sc::source.first.sc::presentation.first.sc::y.first.to_i : 0
|
65
|
-
|
66
|
-
relative_x = presentation.sc::x.first.to_i - origin_x
|
67
|
-
relative_y = presentation.sc::y.first.to_i - origin_y
|
68
96
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
97
|
+
if xpath
|
98
|
+
selector.rdf::type = Node("sc:XPathSelector")
|
99
|
+
selector.sc::text = presentation.sc::text
|
100
|
+
selector.rdf::value = path_for fragment_selector.sc::path.first, parent_path, sample
|
101
|
+
else
|
102
|
+
selector.rdf::type = Node("sc:VisualSelector")
|
103
|
+
|
104
|
+
origin_x = parent ? parent.sc::source.first.sc::presentation.first.sc::x.first.to_i : 0
|
105
|
+
origin_y = parent ? parent.sc::source.first.sc::presentation.first.sc::y.first.to_i : 0
|
106
|
+
|
107
|
+
relative_x = presentation.sc::x.first.to_i - origin_x
|
108
|
+
relative_y = presentation.sc::y.first.to_i - origin_y
|
109
|
+
|
110
|
+
selector.sc::min_relative_x = relative_x.to_s
|
111
|
+
selector.sc::max_relative_x = relative_x.to_s
|
112
|
+
selector.sc::min_relative_y = relative_y.to_s
|
113
|
+
selector.sc::max_relative_y = relative_y.to_s
|
114
|
+
selector.sc::min_x = presentation.sc::x
|
115
|
+
selector.sc::max_x = presentation.sc::x
|
116
|
+
selector.sc::min_y = presentation.sc::y
|
117
|
+
selector.sc::max_y = presentation.sc::y
|
118
|
+
|
119
|
+
selector.sc::min_width = presentation.sc::width
|
120
|
+
selector.sc::max_width = presentation.sc::width
|
121
|
+
selector.sc::min_height = presentation.sc::height
|
122
|
+
selector.sc::max_height = presentation.sc::height
|
123
|
+
selector.sc::min_font_size = presentation.sc::font_size
|
124
|
+
selector.sc::max_font_size = presentation.sc::font_size
|
125
|
+
selector.sc::min_font_weight = presentation.sc::font_weight
|
126
|
+
selector.sc::max_font_weight = presentation.sc::font_weight
|
127
|
+
selector.sc::font_family = presentation.sc::font_family
|
128
|
+
|
129
|
+
selector.sc::tag = ["text"] if presentation.sc::font_family.first
|
130
|
+
special_tag = fragment_selector.sc::tag.select { |tag| ["a","img"].include?(tag) }
|
131
|
+
selector.sc::tag = special_tag if special_tag.size > 0
|
132
|
+
end
|
87
133
|
|
88
|
-
selector.sc::tag = fragment_selector.sc::tag.select { |tag| ["a","img"].include?(tag) }
|
89
134
|
selector.sc::attribute = fragment_selector.sc::attribute
|
90
135
|
|
91
136
|
selector
|
92
137
|
end
|
138
|
+
|
139
|
+
def path_for path, parent_path, sample
|
140
|
+
return "./." if path == parent_path
|
141
|
+
return path if ["", "/html", "/html/body"].include?(path)
|
142
|
+
|
143
|
+
node = sample[:content].search(path).first
|
144
|
+
conditions = []
|
145
|
+
if node[:class]
|
146
|
+
conditions += node[:class].split(" ").map {|c| "contains(concat(' ',normalize-space(@class),' '),concat(' ','#{c.strip}',' '))" }
|
147
|
+
else
|
148
|
+
conditions += ["not(@class)"]
|
149
|
+
end
|
150
|
+
if node[:id]
|
151
|
+
conditions += ["contains(@id,'#{node[:id].strip}')"]
|
152
|
+
else
|
153
|
+
conditions += ["not(@id)"]
|
154
|
+
end
|
155
|
+
selector = "/#{node.name}[#{conditions * " and "}]"
|
156
|
+
index = nil
|
157
|
+
results = node.parent.search(".#{selector}")
|
158
|
+
results.each_with_index { |n,i| index = i+1 if n.path == path }
|
159
|
+
|
160
|
+
previous_path = path.split("/")[0..-2] * "/"
|
161
|
+
suffix = results.size > 1 ? "[#{index}]" : ""
|
162
|
+
|
163
|
+
path_for(previous_path, parent_path, sample) + selector + suffix
|
164
|
+
end
|
93
165
|
end
|
94
166
|
end
|
data/lib/scrappy/server/admin.rb
CHANGED
@@ -37,11 +37,13 @@ module Scrappy
|
|
37
37
|
app.post '/extractors' do
|
38
38
|
if params[:html]
|
39
39
|
# Generate extractor automatically
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
40
|
+
html = Iconv.iconv('UTF-8', params[:encoding], params[:html]).first
|
41
|
+
samples = [{ :html=>html, :uri=>params[:uri] }]
|
42
|
+
extractor = agent.train_xpath(*samples)
|
43
|
+
# Train
|
44
|
+
Scrappy::App.add_extractor extractor
|
45
|
+
# Optimize
|
46
|
+
Scrappy::App.replace_extractor agent.optimize_extractors(Scrappy::Kb.extractors, samples), samples
|
45
47
|
else
|
46
48
|
# Store the given extractor
|
47
49
|
Scrappy::App.add_extractor RDF::Parser.parse(:yarf,params[:rdf])
|
@@ -58,14 +60,30 @@ module Scrappy
|
|
58
60
|
# Patterns
|
59
61
|
|
60
62
|
app.get '/patterns' do
|
61
|
-
@
|
62
|
-
|
63
|
-
map { |node| node.sc::type }.flatten.map(&:to_s).sort
|
63
|
+
@patterns = ( Scrappy::Kb.patterns.find(nil, Node('rdf:type'), Node('sc:Fragment')) -
|
64
|
+
Scrappy::Kb.patterns.find([], Node('sc:subfragment'), nil) )
|
64
65
|
haml :patterns
|
65
66
|
end
|
66
67
|
|
67
|
-
app.
|
68
|
-
Scrappy::
|
68
|
+
app.get '/patterns/visual' do
|
69
|
+
@patterns = ( Scrappy::Kb.patterns.find(nil, Node('rdf:type'), Node('sc:Fragment')) -
|
70
|
+
Scrappy::Kb.patterns.find([], Node('sc:subfragment'), nil) )
|
71
|
+
html = @patterns.map { |pattern| render_fragment(pattern) } * ""
|
72
|
+
"<html><body>#{html}</body></html>"
|
73
|
+
end
|
74
|
+
|
75
|
+
app.get '/patterns/*' do |id|
|
76
|
+
"<html><body>#{render_fragment(Scrappy::Kb.patterns[id])}</body></html>"
|
77
|
+
end
|
78
|
+
|
79
|
+
app.delete '/patterns' do
|
80
|
+
Scrappy::App.delete_patterns
|
81
|
+
flash[:notice] = "Patterns deleted"
|
82
|
+
redirect "#{settings.base_uri}/patterns"
|
83
|
+
end
|
84
|
+
|
85
|
+
app.delete '/patterns/*' do |id|
|
86
|
+
Scrappy::App.delete_pattern id
|
69
87
|
flash[:notice] = "Pattern deleted"
|
70
88
|
redirect "#{settings.base_uri}/patterns"
|
71
89
|
end
|
@@ -78,9 +96,34 @@ module Scrappy
|
|
78
96
|
end
|
79
97
|
|
80
98
|
app.get '/samples/:id' do |id|
|
99
|
+
Nokogiri::HTML(Scrappy::App.samples[id.to_i][:html], nil, 'utf-8').search("*").map do |node|
|
100
|
+
next if node.text?
|
101
|
+
text = node.children.map { |n| n.content if n.text? } * " "
|
102
|
+
x = node[:vx].to_i
|
103
|
+
y = node[:vy].to_i
|
104
|
+
w = node[:vw].to_i
|
105
|
+
h = node[:vh].to_i
|
106
|
+
font = node[:vfont]
|
107
|
+
size = node[:vsize].to_i
|
108
|
+
weight = node[:vweight].to_i
|
109
|
+
color = "#555"
|
110
|
+
color = "#55f" if node.name == "a"
|
111
|
+
style = "position: absolute; left: #{x}px; top: #{y}px; width: #{w}px; height: #{h}px; font-family: #{font}; font-size: #{size}px; font-weight: #{weight}; border: 1px solid gray; color: #{color};"
|
112
|
+
style += "background-color: #f00; opacity: 0.2;" if node.name == "img"
|
113
|
+
style += "text-decoration: underline;" if node.name == "a"
|
114
|
+
"<div style='#{style}'>#{text}</div>"
|
115
|
+
end * ""
|
116
|
+
end
|
117
|
+
|
118
|
+
app.get '/samples/:id/raw' do |id|
|
81
119
|
Scrappy::App.samples[id.to_i][:html]
|
82
120
|
end
|
83
|
-
|
121
|
+
|
122
|
+
app.get '/samples/:id/annotations' do |id|
|
123
|
+
headers 'Content-Type' => 'text/plain'
|
124
|
+
RDF::Graph.new(Scrappy::App.samples[id.to_i][:output] || []).serialize(:yarf)
|
125
|
+
end
|
126
|
+
|
84
127
|
app.get '/samples/:id/:kb_type' do |id,kb_type|
|
85
128
|
kb = (kb_type == "patterns" ? Scrappy::Kb.patterns : Scrappy::Kb.extractors)
|
86
129
|
sample = Scrappy::App.samples[id.to_i]
|
@@ -88,19 +131,109 @@ module Scrappy
|
|
88
131
|
RDF::Graph.new(agent.extract(sample[:uri], sample[:html], kb, Agent::Options.referenceable)).serialize(:yarf)
|
89
132
|
end
|
90
133
|
|
91
|
-
app.post '/samples
|
92
|
-
|
93
|
-
|
134
|
+
app.post '/samples/annotate' do
|
135
|
+
samples = (params['samples'] || []).map { |i| Scrappy::App.samples[i.to_i] }.each do |sample|
|
136
|
+
sample[:output] = agent.extract(sample[:uri], sample[:html], Scrappy::Kb.extractors)
|
137
|
+
end
|
138
|
+
Scrappy::App.save_samples
|
139
|
+
flash[:notice] = "Samples annotated"
|
140
|
+
redirect "#{settings.base_uri}/samples"
|
141
|
+
end
|
142
|
+
|
143
|
+
app.post '/samples/train/:kb_type' do |kb_type|
|
144
|
+
kb = (kb_type == "patterns" ? Scrappy::Kb.patterns : Scrappy::Kb.extractors)
|
145
|
+
samples = (params['samples'] || []).map { |i| Scrappy::App.samples[i.to_i] }
|
146
|
+
if kb_type == "patterns"
|
147
|
+
Scrappy::App.add_patterns agent.train(*samples)
|
148
|
+
else
|
149
|
+
Scrappy::App.add_extractor agent.train_xpath(*samples)
|
150
|
+
end
|
94
151
|
flash[:notice] = "Training completed"
|
95
152
|
redirect "#{settings.base_uri}/samples"
|
96
153
|
end
|
97
154
|
|
98
|
-
app.post '/samples
|
99
|
-
|
100
|
-
Scrappy::App.
|
155
|
+
app.post '/samples/optimize/:kb_type' do |kb_type|
|
156
|
+
kb = (kb_type == "patterns" ? Scrappy::Kb.patterns : Scrappy::Kb.extractors)
|
157
|
+
samples = (params['samples'] || []).map { |i| Scrappy::App.samples[i.to_i] }
|
158
|
+
if kb_type == "patterns"
|
159
|
+
Scrappy::App.save_patterns agent.optimize_patterns(kb, samples)
|
160
|
+
else
|
161
|
+
Scrappy::App.replace_extractor agent.optimize_extractors(kb, samples), samples
|
162
|
+
end
|
101
163
|
flash[:notice] = "Optimization completed"
|
102
164
|
redirect "#{settings.base_uri}/samples"
|
103
165
|
end
|
166
|
+
|
167
|
+
app.post '/samples/test/:kb_type' do |kb_type|
|
168
|
+
kb = (kb_type == "patterns" ? Scrappy::Kb.patterns : Scrappy::Kb.extractors)
|
169
|
+
@results = {}
|
170
|
+
@missing = []
|
171
|
+
@wrong = []
|
172
|
+
output = RDF::Parser.parse(:ntriples, params["output"].to_s).triples
|
173
|
+
extraction = []
|
174
|
+
(params['samples'] || []).each do |i|
|
175
|
+
sample = Scrappy::App.samples[i.to_i]
|
176
|
+
output += sample[:output] || []
|
177
|
+
extraction += agent.extract(sample[:uri], sample[:html], kb)
|
178
|
+
end
|
179
|
+
|
180
|
+
output = output.uniq
|
181
|
+
extraction = extraction.uniq
|
182
|
+
|
183
|
+
predicates = output.map { |s,p,o| p }.uniq
|
184
|
+
types = output.map { |s,p,o| o if p == ID('rdf:type') }.compact.uniq
|
185
|
+
|
186
|
+
predicates.each do |predicate|
|
187
|
+
new_output = output.select { |s,p,o| p==predicate }
|
188
|
+
new_extraction = extraction.select { |s,p,o| p==predicate }
|
189
|
+
precision, recall, fscore = agent.send :metrics, new_output, new_extraction
|
190
|
+
@results[predicate] ||= Hash.new(0.0)
|
191
|
+
@results[predicate][:count] += 1
|
192
|
+
@results[predicate][:fscore] += fscore
|
193
|
+
@results[predicate][:precision] += precision
|
194
|
+
@results[predicate][:recall] += recall
|
195
|
+
end
|
196
|
+
|
197
|
+
types.each do |type|
|
198
|
+
new_output = output.select { |s,p,o| p==ID("rdf:type") and o==type }
|
199
|
+
new_extraction = extraction.select { |s,p,o| p==ID("rdf:type") and o==type }
|
200
|
+
|
201
|
+
precision, recall, fscore = agent.send :metrics, new_output, new_extraction
|
202
|
+
@results[type] ||= Hash.new(0.0)
|
203
|
+
@results[type][:count] += 1
|
204
|
+
@results[type][:fscore] += fscore
|
205
|
+
@results[type][:precision] += precision
|
206
|
+
@results[type][:recall] += recall
|
207
|
+
end
|
208
|
+
|
209
|
+
precision, recall, fscore = agent.send :metrics, output, extraction
|
210
|
+
@results[:total] ||= Hash.new(0.0)
|
211
|
+
@results[:total][:count] += 1
|
212
|
+
@results[:total][:fscore] += fscore
|
213
|
+
@results[:total][:precision] += precision
|
214
|
+
@results[:total][:recall] += recall
|
215
|
+
|
216
|
+
@missing += output - extraction
|
217
|
+
@wrong += extraction - output
|
218
|
+
|
219
|
+
# Here we get sth like: { :'dc:title'=>{:fscore=>0.3, ...}, :total=>{:fscore=>0.4, ...} }
|
220
|
+
@results.each do |key, result|
|
221
|
+
count = result[:count]
|
222
|
+
result.each do |k,v|
|
223
|
+
result[k] /= count
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
@total = output.size
|
228
|
+
@extracted = extraction.size
|
229
|
+
@correct = @extracted - @wrong.size
|
230
|
+
|
231
|
+
@missing = RDF::Graph.new(@missing)
|
232
|
+
@wrong = RDF::Graph.new(@wrong)
|
233
|
+
|
234
|
+
flash.now[:notice] = "Testing completed"
|
235
|
+
haml :test
|
236
|
+
end
|
104
237
|
|
105
238
|
app.post '/samples' do
|
106
239
|
html = Iconv.iconv('UTF-8', params[:encoding], params[:html]).first
|
@@ -114,6 +247,36 @@ module Scrappy
|
|
114
247
|
flash[:notice] = "Sample deleted"
|
115
248
|
redirect "#{settings.base_uri}/samples"
|
116
249
|
end
|
250
|
+
|
251
|
+
def render_fragment fragment, selected_branch=nil
|
252
|
+
label = if fragment.sc::relation.first
|
253
|
+
fragment.sc::relation.map {|id| RDF::ID.compress(id)} * ', '
|
254
|
+
else
|
255
|
+
fragment.sc::type.map {|id| RDF::ID.compress(id)} * ', '
|
256
|
+
end
|
257
|
+
subfragments = [selected_branch || [:min, :max]].flatten.map do |branch|
|
258
|
+
fragment.sc::subfragment.map { |f| render_fragment(f, branch) } * ""
|
259
|
+
end * ""
|
260
|
+
|
261
|
+
[selected_branch || [:min, :max]].flatten.map do |branch|
|
262
|
+
fragment.sc::selector.map do |selector|
|
263
|
+
x,y,w,h,font,size,weight,color = case branch
|
264
|
+
when :min then
|
265
|
+
[selector.sc::min_relative_x.first, selector.sc::min_relative_y.first, selector.sc::min_width.first, selector.sc::min_height.first, selector.sc::font_family.first, selector.sc::min_font_size.first, selector.sc::min_font_weight.first, :blue]
|
266
|
+
when :max then
|
267
|
+
[selector.sc::max_relative_x.first, selector.sc::max_relative_y.first, selector.sc::max_width.first, selector.sc::max_height.first, selector.sc::font_family.first, selector.sc::max_font_size.first, selector.sc::max_font_weight.first, :red]
|
268
|
+
end
|
269
|
+
style = "position: absolute; left: #{x}px; top: #{y}px; width: #{w}px; height: #{h}px; font-family: #{font}; font-size: #{size}px; font-weight: #{weight}; border: 1px solid #{color}; color: #555;"
|
270
|
+
"<div style='#{style}'>#{label}#{subfragments}</div>"
|
271
|
+
end * ""
|
272
|
+
end * ""
|
273
|
+
end
|
274
|
+
|
275
|
+
def percentage value
|
276
|
+
"%.2f%" % (value * 100.0)
|
277
|
+
end
|
278
|
+
|
279
|
+
app.helpers Admin
|
117
280
|
end
|
118
281
|
end
|
119
282
|
end
|
data/lib/scrappy/support.rb
CHANGED
@@ -29,28 +29,4 @@ class String
|
|
29
29
|
tr("-", "_").
|
30
30
|
downcase
|
31
31
|
end
|
32
|
-
end
|
33
|
-
|
34
|
-
class Array
|
35
|
-
# Return true if a given array has the same elements as this one
|
36
|
-
def equivalent? array
|
37
|
-
self.all? { |i| array.include?(i) } and
|
38
|
-
array.all? { |i| self.include?(i) }
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
module RDF
|
43
|
-
class Node
|
44
|
-
def self.mix *nodes
|
45
|
-
id = nodes.first
|
46
|
-
graph = RDF::Graph.new( nodes.inject([]) do |triples, node|
|
47
|
-
triples + node.graph.triples.map do |s,p,o|
|
48
|
-
[ s==node.id ? id : s,
|
49
|
-
p==node.id ? id : p,
|
50
|
-
o==node.id ? id : o ]
|
51
|
-
end
|
52
|
-
end )
|
53
|
-
graph[id]
|
54
|
-
end
|
55
|
-
end
|
56
32
|
end
|