scrappy 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest +21 -14
- data/README.rdoc +5 -9
- data/Rakefile +1 -2
- data/bin/scrappy +141 -51
- data/lib/scrappy.rb +6 -9
- data/lib/scrappy/agent/agent.rb +3 -3
- data/lib/scrappy/extractor/extractor.rb +108 -0
- data/lib/scrappy/{agent → extractor}/formats.rb +0 -0
- data/lib/scrappy/extractor/fragment.rb +111 -0
- data/lib/scrappy/extractor/selector.rb +41 -0
- data/lib/scrappy/{selectors → extractor/selectors}/base_uri.rb +1 -3
- data/lib/scrappy/extractor/selectors/css.rb +5 -0
- data/lib/scrappy/{selectors → extractor/selectors}/new_uri.rb +1 -3
- data/lib/scrappy/{selectors → extractor/selectors}/root.rb +1 -4
- data/lib/scrappy/{selectors → extractor/selectors}/section.rb +1 -4
- data/lib/scrappy/{selectors → extractor/selectors}/slice.rb +1 -3
- data/lib/scrappy/{selectors → extractor/selectors}/uri.rb +2 -4
- data/lib/scrappy/{selectors → extractor/selectors}/uri_pattern.rb +2 -4
- data/lib/scrappy/extractor/selectors/visual.rb +39 -0
- data/lib/scrappy/{selectors → extractor/selectors}/xpath.rb +1 -4
- data/lib/scrappy/server/admin.rb +89 -2
- data/lib/scrappy/server/helpers.rb +11 -2
- data/lib/scrappy/server/server.rb +1 -0
- data/lib/scrappy/trainer/trainer.rb +101 -0
- data/public/javascripts/annotator.js +75 -0
- data/public/javascripts/remote.js +132 -0
- data/public/stylesheets/application.css +39 -12
- data/scrappy.gemspec +13 -11
- data/views/extractors.haml +24 -0
- data/views/layout.haml +14 -4
- data/views/patterns.haml +19 -0
- data/views/samples.haml +28 -0
- metadata +58 -56
- data/lib/scrappy/agent/extractor.rb +0 -196
- data/lib/scrappy/selectors/css.rb +0 -10
- data/public/javascripts/scrappy.js +0 -65
- data/views/kb.haml +0 -15
@@ -1,65 +0,0 @@
|
|
1
|
-
add_visual_data = function() {
|
2
|
-
var items = document.documentElement.getElementsByTagName('*');
|
3
|
-
var i=0;
|
4
|
-
for(var i=0; i<items.length; i++) {
|
5
|
-
var item = items[i];
|
6
|
-
item.setAttribute('vx', item.offsetLeft);
|
7
|
-
item.setAttribute('vy', item.offsetTop);
|
8
|
-
item.setAttribute('vw', item.offsetWidth);
|
9
|
-
item.setAttribute('vh', item.offsetHeight);
|
10
|
-
item.setAttribute('vsize', document.defaultView.getComputedStyle(item, null).getPropertyValue('font-size'));
|
11
|
-
var weight = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-weight');
|
12
|
-
if (weight == 'normal') weight = 400;
|
13
|
-
if (weight == 'bold') weight = 700;
|
14
|
-
item.setAttribute('vweight', weight);
|
15
|
-
item.setAttribute('vcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('color'));
|
16
|
-
item.setAttribute('vbcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('background-color'));
|
17
|
-
}
|
18
|
-
}
|
19
|
-
|
20
|
-
$(document).ready(function(){
|
21
|
-
$("body").append("<div id='myTrees'></div>")
|
22
|
-
$("#page > *").bind('mouseover', function(e){
|
23
|
-
e.stopPropagation();
|
24
|
-
$(this).addClass("changeBg");
|
25
|
-
})
|
26
|
-
.mouseout(function(){
|
27
|
-
$(this).removeClass("changeBg");
|
28
|
-
});
|
29
|
-
});
|
30
|
-
|
31
|
-
$(document).ready(function(){
|
32
|
-
$("*").bind('click', function(e){
|
33
|
-
e.stopPropagation();
|
34
|
-
var element = $(e.target).closest(this.tagName).get(0).tagName;
|
35
|
-
var parents = $(this).parents();
|
36
|
-
var string = element.toString();
|
37
|
-
for(j=0;j<parents.length;j++) {
|
38
|
-
string = string + " " + parents[j].tagName;
|
39
|
-
}
|
40
|
-
|
41
|
-
var tree = [];
|
42
|
-
var treeString = "";
|
43
|
-
for(h=parents.length-1; h>=0; h-- ) {
|
44
|
-
tree.push(parents[h].tagName);
|
45
|
-
|
46
|
-
if( treeString == "" ) {
|
47
|
-
treeString = treeString + parents[h].tagName;
|
48
|
-
} else {
|
49
|
-
treeString = treeString + " > " + parents[h].tagName;
|
50
|
-
}
|
51
|
-
}
|
52
|
-
|
53
|
-
tree.push(element);
|
54
|
-
treeString = treeString + " > " + element;
|
55
|
-
|
56
|
-
var myTrees = document.getElementById("myTrees");
|
57
|
-
var ul = document.createElement("ul");
|
58
|
-
var li = document.createElement("li");
|
59
|
-
myTrees.appendChild(ul);
|
60
|
-
li.innerHTML = treeString;
|
61
|
-
myTrees.appendChild(li);
|
62
|
-
});
|
63
|
-
});
|
64
|
-
|
65
|
-
window.scrappy_loaded = true
|
data/views/kb.haml
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
#body
|
2
|
-
%h1 Knowledge base
|
3
|
-
%p
|
4
|
-
%ul.detail
|
5
|
-
-@uris.each do |uri|
|
6
|
-
%li
|
7
|
-
%span.name
|
8
|
-
-if !uri.include?('*')
|
9
|
-
%a{:href=>uri}=uri
|
10
|
-
-else
|
11
|
-
=uri
|
12
|
-
-if !uri.include?('*')
|
13
|
-
-[['RDF', :rdf], ['JSON', :ejson], ['YARF', :yarf], ['nTriples', :ntriples], ['PNG', :png]].reverse.each do |format, format_code|
|
14
|
-
%span.format
|
15
|
-
%a{:href=>"#{settings.base_uri}/#{format_code}/#{uri}"}=format
|