scrappy 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/History.txt +6 -0
  2. data/Manifest +21 -14
  3. data/README.rdoc +5 -9
  4. data/Rakefile +1 -2
  5. data/bin/scrappy +141 -51
  6. data/lib/scrappy.rb +6 -9
  7. data/lib/scrappy/agent/agent.rb +3 -3
  8. data/lib/scrappy/extractor/extractor.rb +108 -0
  9. data/lib/scrappy/{agent → extractor}/formats.rb +0 -0
  10. data/lib/scrappy/extractor/fragment.rb +111 -0
  11. data/lib/scrappy/extractor/selector.rb +41 -0
  12. data/lib/scrappy/{selectors → extractor/selectors}/base_uri.rb +1 -3
  13. data/lib/scrappy/extractor/selectors/css.rb +5 -0
  14. data/lib/scrappy/{selectors → extractor/selectors}/new_uri.rb +1 -3
  15. data/lib/scrappy/{selectors → extractor/selectors}/root.rb +1 -4
  16. data/lib/scrappy/{selectors → extractor/selectors}/section.rb +1 -4
  17. data/lib/scrappy/{selectors → extractor/selectors}/slice.rb +1 -3
  18. data/lib/scrappy/{selectors → extractor/selectors}/uri.rb +2 -4
  19. data/lib/scrappy/{selectors → extractor/selectors}/uri_pattern.rb +2 -4
  20. data/lib/scrappy/extractor/selectors/visual.rb +39 -0
  21. data/lib/scrappy/{selectors → extractor/selectors}/xpath.rb +1 -4
  22. data/lib/scrappy/server/admin.rb +89 -2
  23. data/lib/scrappy/server/helpers.rb +11 -2
  24. data/lib/scrappy/server/server.rb +1 -0
  25. data/lib/scrappy/trainer/trainer.rb +101 -0
  26. data/public/javascripts/annotator.js +75 -0
  27. data/public/javascripts/remote.js +132 -0
  28. data/public/stylesheets/application.css +39 -12
  29. data/scrappy.gemspec +13 -11
  30. data/views/extractors.haml +24 -0
  31. data/views/layout.haml +14 -4
  32. data/views/patterns.haml +19 -0
  33. data/views/samples.haml +28 -0
  34. metadata +58 -56
  35. data/lib/scrappy/agent/extractor.rb +0 -196
  36. data/lib/scrappy/selectors/css.rb +0 -10
  37. data/public/javascripts/scrappy.js +0 -65
  38. data/views/kb.haml +0 -15
@@ -0,0 +1,75 @@
1
+ var add_visual_data = function() {
2
+ var items = document.documentElement.getElementsByTagName('*');
3
+ var i=0;
4
+ for(var i=0; i<items.length; i++) {
5
+ var item = items[i];
6
+ var x = 0;
7
+ var y = 0;
8
+ if (item.offsetParent) {
9
+ var obj = item;
10
+ do {
11
+ x += obj.offsetLeft;
12
+ y += obj.offsetTop;
13
+ } while (obj = obj.offsetParent);
14
+ }
15
+ item.setAttribute('vx', x);
16
+ item.setAttribute('vy', y);
17
+ item.setAttribute('vw', item.offsetWidth);
18
+ item.setAttribute('vh', item.offsetHeight);
19
+ var size = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-size');
20
+ size = size.substring(0, size.length-2);
21
+ item.setAttribute('vsize', size);
22
+ var fonts = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-family').split(",");
23
+ var font = fonts[fonts.length-1].trim();
24
+ item.setAttribute('vfont', font);
25
+ var weight = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-weight');
26
+ if (weight == 'normal') weight = 400;
27
+ if (weight == 'bold') weight = 700;
28
+ item.setAttribute('vweight', weight);
29
+ }
30
+ }
31
+
32
+
33
+ jQuery(document).ready(function(){
34
+ var div;
35
+ if (window.scrappy_extractor) {
36
+ div = "<div id='scrappy_window' title='Scrappy'>" +
37
+ "<p>Extractor available for this URL</p>" +
38
+ "<p><a href='http://localhost:3434/rdf/"+escape(window.location)+"'>See output</a></p>" +
39
+ "<p><a class='sample' href='http://localhost:3434/samples'>Upload sample</a></p>" +
40
+ "</div>";
41
+ } else {
42
+ div = "<div id='scrappy_window' title='Scrappy'>" +
43
+ "<p>No extractor available for this URL</p>" +
44
+ "<p><a href='TODO'>Annotate page</a></p>" +
45
+ "<p><a class='extractor' href='http://localhost:3434/extractors'>Generate extractor</a></p>" +
46
+ "</div>";
47
+ }
48
+
49
+ $("body").append(div);
50
+
51
+ $('#scrappy_window .extractor, #scrappy_window .sample').live('click', function (e){
52
+ var link = $(this),
53
+ href = link.attr('href'),
54
+ html = $('<input name="html" type="hidden" />');
55
+ enc = $('<input name="encoding" type="hidden" />');
56
+ uri = $('<input name="uri" type="hidden" />');
57
+ form = $('<form method="post" action="'+href+'"></form>');
58
+ enc.attr('value', document.characterSet);
59
+ html.attr('value', document.documentElement.outerHTML);
60
+ uri.attr('value', window.location);
61
+ form.hide()
62
+ .append(html)
63
+ .append(enc)
64
+ .append(uri)
65
+ .appendTo('body');
66
+ e.preventDefault();
67
+ form.submit();
68
+ });
69
+
70
+ $("#scrappy_window").dialog();
71
+ });
72
+
73
+ add_visual_data();
74
+
75
+ window.scrappy_loaded = true
@@ -0,0 +1,132 @@
1
+ jQuery(function ($) {
2
+ var csrf_token = $('meta[name=csrf-token]').attr('content'),
3
+ csrf_param = $('meta[name=csrf-param]').attr('content');
4
+
5
+ $.fn.extend({
6
+ /**
7
+ * Triggers a custom event on an element and returns the event result
8
+ * this is used to get around not being able to ensure callbacks are placed
9
+ * at the end of the chain.
10
+ *
11
+ * TODO: deprecate with jQuery 1.4.2 release, in favor of subscribing to our
12
+ * own events and placing ourselves at the end of the chain.
13
+ */
14
+ triggerAndReturn: function (name, data) {
15
+ var event = new $.Event(name);
16
+ this.trigger(event, data);
17
+
18
+ return event.result !== false;
19
+ },
20
+
21
+ /**
22
+ * Handles execution of remote calls firing overridable events along the way
23
+ */
24
+ callRemote: function () {
25
+ var el = this,
26
+ method = el.attr('method') || el.attr('data-method') || 'GET',
27
+ url = el.attr('action') || el.attr('href'),
28
+ dataType = el.attr('data-type') || 'script';
29
+
30
+ if (url === undefined) {
31
+ throw "No URL specified for remote call (action or href must be present).";
32
+ } else {
33
+ if (el.triggerAndReturn('ajax:before')) {
34
+ var data = el.is('form') ? el.serializeArray() : [];
35
+ $.ajax({
36
+ url: url,
37
+ data: data,
38
+ dataType: dataType,
39
+ type: method.toUpperCase(),
40
+ beforeSend: function (xhr) {
41
+ el.trigger('ajax:loading', xhr);
42
+ },
43
+ success: function (data, status, xhr) {
44
+ el.trigger('ajax:success', [data, status, xhr]);
45
+ },
46
+ complete: function (xhr) {
47
+ el.trigger('ajax:complete', xhr);
48
+ },
49
+ error: function (xhr, status, error) {
50
+ el.trigger('ajax:failure', [xhr, status, error]);
51
+ }
52
+ });
53
+ }
54
+
55
+ el.trigger('ajax:after');
56
+ }
57
+ }
58
+ });
59
+
60
+ /**
61
+ * confirmation handler
62
+ */
63
+ $('a[data-confirm],input[data-confirm]').live('click', function () {
64
+ var el = $(this);
65
+ if (el.triggerAndReturn('confirm')) {
66
+ if (!confirm(el.attr('data-confirm'))) {
67
+ return false;
68
+ }
69
+ }
70
+ });
71
+
72
+
73
+ /**
74
+ * remote handlers
75
+ */
76
+ $('form[data-remote]').live('submit', function (e) {
77
+ $(this).callRemote();
78
+ e.preventDefault();
79
+ });
80
+
81
+ $('a[data-remote],input[data-remote]').live('click', function (e) {
82
+ $(this).callRemote();
83
+ e.preventDefault();
84
+ });
85
+
86
+ $('a[data-method]:not([data-remote])').live('click', function (e){
87
+ var link = $(this),
88
+ href = link.attr('href'),
89
+ method = link.attr('data-method'),
90
+ form = $('<form method="post" action="'+href+'"></form>'),
91
+ metadata_input = '<input name="_method" value="'+method+'" type="hidden" />';
92
+
93
+ if (csrf_param != null && csrf_token != null) {
94
+ metadata_input += '<input name="'+csrf_param+'" value="'+csrf_token+'" type="hidden" />';
95
+ }
96
+
97
+ form.hide()
98
+ .append(metadata_input)
99
+ .appendTo('body');
100
+
101
+ e.preventDefault();
102
+ form.submit();
103
+ });
104
+
105
+ /**
106
+ * disable-with handlers
107
+ */
108
+ var disable_with_input_selector = 'input[data-disable-with]';
109
+ var disable_with_form_remote_selector = 'form[data-remote]:has(' + disable_with_input_selector + ')';
110
+ var disable_with_form_not_remote_selector = 'form:not([data-remote]):has(' + disable_with_input_selector + ')';
111
+
112
+ var disable_with_input_function = function () {
113
+ $(this).find(disable_with_input_selector).each(function () {
114
+ var input = $(this);
115
+ input.data('enable-with', input.val())
116
+ .attr('value', input.attr('data-disable-with'))
117
+ .attr('disabled', 'disabled');
118
+ });
119
+ };
120
+
121
+ $(disable_with_form_remote_selector).live('ajax:before', disable_with_input_function);
122
+ $(disable_with_form_not_remote_selector).live('submit', disable_with_input_function);
123
+
124
+ $(disable_with_form_remote_selector).live('ajax:complete', function () {
125
+ $(this).find(disable_with_input_selector).each(function () {
126
+ var input = $(this);
127
+ input.removeAttr('disabled')
128
+ .val(input.data('enable-with'));
129
+ });
130
+ });
131
+
132
+ });
@@ -18,7 +18,7 @@ a:hover, a:active {
18
18
  text-decoration: underline;
19
19
  }
20
20
  h1, h2, h3, h4 {
21
- color: #777;
21
+ color: #888;
22
22
  }
23
23
  h2 {
24
24
  font-weight: normal;
@@ -73,14 +73,19 @@ img {
73
73
  #bar ul.right {
74
74
  float: right;
75
75
  }
76
- #column {
77
- float:left;
76
+ #notice {
78
77
  margin-top: 20px;
79
- height: auto;
80
- width: 200px;
81
- height: 600px;
82
- border-right: 1px solid #ddd;
83
- overflow-y: auto;
78
+ margin-left: auto;
79
+ margin-right: auto;
80
+ padding: 15px;
81
+ width: 800px;
82
+ color: #333;
83
+ font-size: 14px;
84
+ text-align: center;
85
+ background: -webkit-gradient(linear, left top, left bottom, from(#af9), to(#dfd));
86
+ background: -moz-linear-gradient(top, #af9, #dfd);
87
+ border-radius: 10px;
88
+ -moz-border-radius: 10px;
84
89
  }
85
90
  #body {
86
91
  margin: auto; width: 800px; padding: 15px;
@@ -112,7 +117,7 @@ img {
112
117
  margin-right: 5px;
113
118
  }
114
119
  #footer {
115
- margin-top:30px; text-align: center; font-size:14px; color: #555;
120
+ margin-top:30px; text-align: center; font-size:12px; color: #555;
116
121
  height: 50px;
117
122
  }
118
123
 
@@ -142,6 +147,7 @@ ul.detail {
142
147
  list-style-type: none;
143
148
  }
144
149
  ul.detail li {
150
+ font-size: 12px;
145
151
  padding: 6px;
146
152
  background-color: #eee;
147
153
  margin: 1px;
@@ -149,19 +155,28 @@ ul.detail li {
149
155
  ul.detail li span {
150
156
  display: inline-block;
151
157
  }
152
- ul.detail li span.name {
158
+ ul.detail li span.action {
159
+ font-size: 10px;
160
+ width: 10px;
161
+ }
162
+ ul.detail li span.name, ul.detail li span.short_name {
153
163
  width: 550px;
154
164
  overflow-x: hidden;
165
+ white-space: nowrap;
155
166
  font-family: monospace;
156
- font-size: 12px;
167
+ }
168
+ ul.detail li span.short_name {
169
+ width: 420px;
157
170
  }
158
171
  ul.detail li span.format {
159
172
  float: right;
160
- font-size: 12px;
161
173
  font-weight: bold;
162
174
  margin-left: 10px;
163
175
  text-align: center;
164
176
  }
177
+ ul.detail li span.date {
178
+ float: right;
179
+ }
165
180
 
166
181
  ul.detail li span.format a:hover,
167
182
  ul.detail li span.format a:link,
@@ -169,3 +184,15 @@ ul.detail li span.format a:visited,
169
184
  ul.detail li span.format a:active {
170
185
  color: #900;
171
186
  }
187
+ ul.detail li span.action a:hover,
188
+ ul.detail li span.action a:link,
189
+ ul.detail li span.action a:visited,
190
+ ul.detail li span.action a:active {
191
+ color: #900;
192
+ }
193
+ ul.detail li span.date a:hover,
194
+ ul.detail li span.date a:link,
195
+ ul.detail li span.date a:visited,
196
+ ul.detail li span.date a:active {
197
+ color: #888;
198
+ }
data/scrappy.gemspec CHANGED
@@ -2,50 +2,51 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.3.0"
5
+ s.version = "0.3.1"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-03-11}
9
+ s.date = %q{2011-03-18}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
13
13
  s.executables = ["scrappy"]
14
- s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/repository.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb"]
15
- s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "kb/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/repository.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/scrappy.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/help.haml", "views/home.haml", "views/kb.haml", "views/layout.haml", "scrappy.gemspec"]
14
+ s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "lib/scrappy/trainer/trainer.rb"]
15
+ s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "kb/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "lib/scrappy/trainer/trainer.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/annotator.js", "public/javascripts/remote.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/extractors.haml", "views/help.haml", "views/home.haml", "views/layout.haml", "views/patterns.haml", "views/samples.haml", "scrappy.gemspec"]
16
16
  s.homepage = %q{http://github.com/josei/scrappy}
17
- s.post_install_message = %q{**(Optional) Remember to install rbwebkitgtk for visual parsing features**}
18
17
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Scrappy", "--main", "README.rdoc"]
19
18
  s.require_paths = ["lib"]
20
19
  s.rubyforge_project = %q{scrappy}
21
- s.rubygems_version = %q{1.3.7}
20
+ s.rubygems_version = %q{1.3.6}
22
21
  s.summary = %q{Web scraper that allows producing RDF data out of plain web pages}
23
- s.test_files = ["test/test_helper.rb", "test/test_scrappy.rb"]
22
+ s.test_files = ["test/test_scrappy.rb", "test/test_helper.rb"]
24
23
 
25
24
  if s.respond_to? :specification_version then
26
25
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
27
26
  s.specification_version = 3
28
27
 
29
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
28
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
30
29
  s.add_runtime_dependency(%q<activesupport>, [">= 2.3.5"])
31
30
  s.add_runtime_dependency(%q<sinatra>, [">= 1.1.2"])
32
31
  s.add_runtime_dependency(%q<thin>, [">= 1.2.7"])
33
32
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
34
33
  s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
35
- s.add_runtime_dependency(%q<lightrdf>, [">= 0.2.1"])
34
+ s.add_runtime_dependency(%q<lightrdf>, [">= 0.3.0"])
36
35
  s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
37
36
  s.add_runtime_dependency(%q<rest-client>, [">= 1.6.1"])
38
37
  s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
38
+ s.add_runtime_dependency(%q<rack-flash>, [">= 0.1.1"])
39
39
  else
40
40
  s.add_dependency(%q<activesupport>, [">= 2.3.5"])
41
41
  s.add_dependency(%q<sinatra>, [">= 1.1.2"])
42
42
  s.add_dependency(%q<thin>, [">= 1.2.7"])
43
43
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
44
44
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
45
- s.add_dependency(%q<lightrdf>, [">= 0.2.1"])
45
+ s.add_dependency(%q<lightrdf>, [">= 0.3.0"])
46
46
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
47
47
  s.add_dependency(%q<rest-client>, [">= 1.6.1"])
48
48
  s.add_dependency(%q<haml>, [">= 3.0.24"])
49
+ s.add_dependency(%q<rack-flash>, [">= 0.1.1"])
49
50
  end
50
51
  else
51
52
  s.add_dependency(%q<activesupport>, [">= 2.3.5"])
@@ -53,9 +54,10 @@ Gem::Specification.new do |s|
53
54
  s.add_dependency(%q<thin>, [">= 1.2.7"])
54
55
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
55
56
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
56
- s.add_dependency(%q<lightrdf>, [">= 0.2.1"])
57
+ s.add_dependency(%q<lightrdf>, [">= 0.3.0"])
57
58
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
58
59
  s.add_dependency(%q<rest-client>, [">= 1.6.1"])
59
60
  s.add_dependency(%q<haml>, [">= 3.0.24"])
61
+ s.add_dependency(%q<rack-flash>, [">= 0.1.1"])
60
62
  end
61
63
  end
@@ -0,0 +1,24 @@
1
+ #body
2
+ %h1 Extractors
3
+ %p
4
+ Extractors are mappings between HTML pages and RDF data. They are used to extract RDF data from plain web pages.
5
+ %p
6
+ -if @uris.empty?
7
+ Currently, there are no extractors.
8
+ -else
9
+ %ul.detail
10
+ -@uris.each do |uri|
11
+ %li
12
+ -if Scrappy::App.editable_kb?
13
+ %span.action
14
+ %a{:href=>"#{settings.base_uri}/extractors/#{CGI::escape(uri)}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the extractor for #{uri}?"}
15
+ X
16
+ %span.name
17
+ -if !uri.include?('*')
18
+ %a{:href=>uri}=uri
19
+ -else
20
+ =uri
21
+ -if !uri.include?('*')
22
+ -[['RDF', :rdf], ['JSON', :ejson], ['YARF', :yarf], ['nTriples', :ntriples], ['PNG', :png]].reverse.each do |format, format_code|
23
+ %span.format
24
+ %a{:href=>"#{settings.base_uri}/#{format_code}/#{uri}"}=format
data/views/layout.haml CHANGED
@@ -3,6 +3,8 @@
3
3
  %head
4
4
  %title Scrappy
5
5
  %link{:type=>"text/css", :href=>"#{settings.base_uri}/stylesheets/application.css", :rel=>"stylesheet"}
6
+ %script{:src=>"https://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js"}
7
+ %script{:src=>"#{settings.base_uri}/javascripts/remote.js"}
6
8
  %body
7
9
  #bar
8
10
  -if request.fullpath!='/'
@@ -12,11 +14,19 @@
12
14
  %img{:src=>"#{settings.base_uri}/images/logo_tiny.png", :alt=>"Scrappy"}
13
15
  %ul.right
14
16
  %li
15
- %a{:href=>"#{settings.base_uri}/kb"} Knowledge base
17
+ %a{:href=>"#{settings.base_uri}/extractors"} Extractors
18
+ %li
19
+ %a{:href=>"#{settings.base_uri}/patterns"} Patterns
20
+ %li
21
+ %a{:href=>"#{settings.base_uri}/samples"} Samples
16
22
  %li
17
23
  %a{:href=>"#{settings.base_uri}/help"} Help
24
+ -if flash[:notice]
25
+ #notice=flash[:notice]
18
26
  =yield
19
27
  #footer
20
- %a{:href=>"#{settings.base_uri}/"} Home
21
- |
22
- %a{:href=>'http://github.com/josei/scrappy'} About
28
+ %p
29
+ %a{:href=>"#{settings.base_uri}/"} Home
30
+ |
31
+ %a{:href=>'http://github.com/josei/scrappy'} About
32
+ %p==Scrappy v#{Scrappy::VERSION}
@@ -0,0 +1,19 @@
1
+ #body
2
+ %h1 Patterns
3
+ %p
4
+ Patterns are visual conditions that are used to identify data in sites which do not have a defined extractor.
5
+ %p
6
+ -if @uris.empty?
7
+ Currently, there are no patterns.
8
+ -else
9
+ %ul.detail
10
+ -@uris.each do |uri|
11
+ %li
12
+ %span.action
13
+ %a{:href=>"#{settings.base_uri}/patterns/#{CGI::escape(uri)}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the pattern for #{uri}?"}
14
+ X
15
+ %span.name
16
+ -if !uri.include?('*')
17
+ %a{:href=>uri}=uri
18
+ -else
19
+ =uri