scrappy 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/History.txt +6 -0
  2. data/Manifest +21 -14
  3. data/README.rdoc +5 -9
  4. data/Rakefile +1 -2
  5. data/bin/scrappy +141 -51
  6. data/lib/scrappy.rb +6 -9
  7. data/lib/scrappy/agent/agent.rb +3 -3
  8. data/lib/scrappy/extractor/extractor.rb +108 -0
  9. data/lib/scrappy/{agent → extractor}/formats.rb +0 -0
  10. data/lib/scrappy/extractor/fragment.rb +111 -0
  11. data/lib/scrappy/extractor/selector.rb +41 -0
  12. data/lib/scrappy/{selectors → extractor/selectors}/base_uri.rb +1 -3
  13. data/lib/scrappy/extractor/selectors/css.rb +5 -0
  14. data/lib/scrappy/{selectors → extractor/selectors}/new_uri.rb +1 -3
  15. data/lib/scrappy/{selectors → extractor/selectors}/root.rb +1 -4
  16. data/lib/scrappy/{selectors → extractor/selectors}/section.rb +1 -4
  17. data/lib/scrappy/{selectors → extractor/selectors}/slice.rb +1 -3
  18. data/lib/scrappy/{selectors → extractor/selectors}/uri.rb +2 -4
  19. data/lib/scrappy/{selectors → extractor/selectors}/uri_pattern.rb +2 -4
  20. data/lib/scrappy/extractor/selectors/visual.rb +39 -0
  21. data/lib/scrappy/{selectors → extractor/selectors}/xpath.rb +1 -4
  22. data/lib/scrappy/server/admin.rb +89 -2
  23. data/lib/scrappy/server/helpers.rb +11 -2
  24. data/lib/scrappy/server/server.rb +1 -0
  25. data/lib/scrappy/trainer/trainer.rb +101 -0
  26. data/public/javascripts/annotator.js +75 -0
  27. data/public/javascripts/remote.js +132 -0
  28. data/public/stylesheets/application.css +39 -12
  29. data/scrappy.gemspec +13 -11
  30. data/views/extractors.haml +24 -0
  31. data/views/layout.haml +14 -4
  32. data/views/patterns.haml +19 -0
  33. data/views/samples.haml +28 -0
  34. metadata +58 -56
  35. data/lib/scrappy/agent/extractor.rb +0 -196
  36. data/lib/scrappy/selectors/css.rb +0 -10
  37. data/public/javascripts/scrappy.js +0 -65
  38. data/views/kb.haml +0 -15
@@ -1,10 +0,0 @@
1
- module Sc
2
- class CssSelector
3
- include RDF::NodeProxy
4
-
5
- def filter doc
6
- # By using Nokogiri, CSS and XPath use the same search method
7
- Sc::XPathSelector.new(node).filter doc
8
- end
9
- end
10
- end
@@ -1,65 +0,0 @@
1
- add_visual_data = function() {
2
- var items = document.documentElement.getElementsByTagName('*');
3
- var i=0;
4
- for(var i=0; i<items.length; i++) {
5
- var item = items[i];
6
- item.setAttribute('vx', item.offsetLeft);
7
- item.setAttribute('vy', item.offsetTop);
8
- item.setAttribute('vw', item.offsetWidth);
9
- item.setAttribute('vh', item.offsetHeight);
10
- item.setAttribute('vsize', document.defaultView.getComputedStyle(item, null).getPropertyValue('font-size'));
11
- var weight = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-weight');
12
- if (weight == 'normal') weight = 400;
13
- if (weight == 'bold') weight = 700;
14
- item.setAttribute('vweight', weight);
15
- item.setAttribute('vcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('color'));
16
- item.setAttribute('vbcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('background-color'));
17
- }
18
- }
19
-
20
- $(document).ready(function(){
21
- $("body").append("<div id='myTrees'></div>")
22
- $("#page > *").bind('mouseover', function(e){
23
- e.stopPropagation();
24
- $(this).addClass("changeBg");
25
- })
26
- .mouseout(function(){
27
- $(this).removeClass("changeBg");
28
- });
29
- });
30
-
31
- $(document).ready(function(){
32
- $("*").bind('click', function(e){
33
- e.stopPropagation();
34
- var element = $(e.target).closest(this.tagName).get(0).tagName;
35
- var parents = $(this).parents();
36
- var string = element.toString();
37
- for(j=0;j<parents.length;j++) {
38
- string = string + " " + parents[j].tagName;
39
- }
40
-
41
- var tree = [];
42
- var treeString = "";
43
- for(h=parents.length-1; h>=0; h-- ) {
44
- tree.push(parents[h].tagName);
45
-
46
- if( treeString == "" ) {
47
- treeString = treeString + parents[h].tagName;
48
- } else {
49
- treeString = treeString + " > " + parents[h].tagName;
50
- }
51
- }
52
-
53
- tree.push(element);
54
- treeString = treeString + " > " + element;
55
-
56
- var myTrees = document.getElementById("myTrees");
57
- var ul = document.createElement("ul");
58
- var li = document.createElement("li");
59
- myTrees.appendChild(ul);
60
- li.innerHTML = treeString;
61
- myTrees.appendChild(li);
62
- });
63
- });
64
-
65
- window.scrappy_loaded = true
data/views/kb.haml DELETED
@@ -1,15 +0,0 @@
1
- #body
2
- %h1 Knowledge base
3
- %p
4
- %ul.detail
5
- -@uris.each do |uri|
6
- %li
7
- %span.name
8
- -if !uri.include?('*')
9
- %a{:href=>uri}=uri
10
- -else
11
- =uri
12
- -if !uri.include?('*')
13
- -[['RDF', :rdf], ['JSON', :ejson], ['YARF', :yarf], ['nTriples', :ntriples], ['PNG', :png]].reverse.each do |format, format_code|
14
- %span.format
15
- %a{:href=>"#{settings.base_uri}/#{format_code}/#{uri}"}=format