scrappy 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/History.txt +6 -0
  2. data/Manifest +21 -14
  3. data/README.rdoc +5 -9
  4. data/Rakefile +1 -2
  5. data/bin/scrappy +141 -51
  6. data/lib/scrappy.rb +6 -9
  7. data/lib/scrappy/agent/agent.rb +3 -3
  8. data/lib/scrappy/extractor/extractor.rb +108 -0
  9. data/lib/scrappy/{agent → extractor}/formats.rb +0 -0
  10. data/lib/scrappy/extractor/fragment.rb +111 -0
  11. data/lib/scrappy/extractor/selector.rb +41 -0
  12. data/lib/scrappy/{selectors → extractor/selectors}/base_uri.rb +1 -3
  13. data/lib/scrappy/extractor/selectors/css.rb +5 -0
  14. data/lib/scrappy/{selectors → extractor/selectors}/new_uri.rb +1 -3
  15. data/lib/scrappy/{selectors → extractor/selectors}/root.rb +1 -4
  16. data/lib/scrappy/{selectors → extractor/selectors}/section.rb +1 -4
  17. data/lib/scrappy/{selectors → extractor/selectors}/slice.rb +1 -3
  18. data/lib/scrappy/{selectors → extractor/selectors}/uri.rb +2 -4
  19. data/lib/scrappy/{selectors → extractor/selectors}/uri_pattern.rb +2 -4
  20. data/lib/scrappy/extractor/selectors/visual.rb +39 -0
  21. data/lib/scrappy/{selectors → extractor/selectors}/xpath.rb +1 -4
  22. data/lib/scrappy/server/admin.rb +89 -2
  23. data/lib/scrappy/server/helpers.rb +11 -2
  24. data/lib/scrappy/server/server.rb +1 -0
  25. data/lib/scrappy/trainer/trainer.rb +101 -0
  26. data/public/javascripts/annotator.js +75 -0
  27. data/public/javascripts/remote.js +132 -0
  28. data/public/stylesheets/application.css +39 -12
  29. data/scrappy.gemspec +13 -11
  30. data/views/extractors.haml +24 -0
  31. data/views/layout.haml +14 -4
  32. data/views/patterns.haml +19 -0
  33. data/views/samples.haml +28 -0
  34. metadata +58 -56
  35. data/lib/scrappy/agent/extractor.rb +0 -196
  36. data/lib/scrappy/selectors/css.rb +0 -10
  37. data/public/javascripts/scrappy.js +0 -65
  38. data/views/kb.haml +0 -15
@@ -0,0 +1,75 @@
1
+ var add_visual_data = function() {
2
+ var items = document.documentElement.getElementsByTagName('*');
3
+ var i=0;
4
+ for(var i=0; i<items.length; i++) {
5
+ var item = items[i];
6
+ var x = 0;
7
+ var y = 0;
8
+ if (item.offsetParent) {
9
+ var obj = item;
10
+ do {
11
+ x += obj.offsetLeft;
12
+ y += obj.offsetTop;
13
+ } while (obj = obj.offsetParent);
14
+ }
15
+ item.setAttribute('vx', x);
16
+ item.setAttribute('vy', y);
17
+ item.setAttribute('vw', item.offsetWidth);
18
+ item.setAttribute('vh', item.offsetHeight);
19
+ var size = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-size');
20
+ size = size.substring(0, size.length-2);
21
+ item.setAttribute('vsize', size);
22
+ var fonts = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-family').split(",");
23
+ var font = fonts[fonts.length-1].trim();
24
+ item.setAttribute('vfont', font);
25
+ var weight = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-weight');
26
+ if (weight == 'normal') weight = 400;
27
+ if (weight == 'bold') weight = 700;
28
+ item.setAttribute('vweight', weight);
29
+ }
30
+ }
31
+
32
+
33
+ jQuery(document).ready(function(){
34
+ var div;
35
+ if (window.scrappy_extractor) {
36
+ div = "<div id='scrappy_window' title='Scrappy'>" +
37
+ "<p>Extractor available for this URL</p>" +
38
+ "<p><a href='http://localhost:3434/rdf/"+escape(window.location)+"'>See output</a></p>" +
39
+ "<p><a class='sample' href='http://localhost:3434/samples'>Upload sample</a></p>" +
40
+ "</div>";
41
+ } else {
42
+ div = "<div id='scrappy_window' title='Scrappy'>" +
43
+ "<p>No extractor available for this URL</p>" +
44
+ "<p><a href='TODO'>Annotate page</a></p>" +
45
+ "<p><a class='extractor' href='http://localhost:3434/extractors'>Generate extractor</a></p>" +
46
+ "</div>";
47
+ }
48
+
49
+ $("body").append(div);
50
+
51
+ $('#scrappy_window .extractor, #scrappy_window .sample').live('click', function (e){
52
+ var link = $(this),
53
+ href = link.attr('href'),
54
+ html = $('<input name="html" type="hidden" />');
55
+ enc = $('<input name="encoding" type="hidden" />');
56
+ uri = $('<input name="uri" type="hidden" />');
57
+ form = $('<form method="post" action="'+href+'"></form>');
58
+ enc.attr('value', document.characterSet);
59
+ html.attr('value', document.documentElement.outerHTML);
60
+ uri.attr('value', window.location);
61
+ form.hide()
62
+ .append(html)
63
+ .append(enc)
64
+ .append(uri)
65
+ .appendTo('body');
66
+ e.preventDefault();
67
+ form.submit();
68
+ });
69
+
70
+ $("#scrappy_window").dialog();
71
+ });
72
+
73
+ add_visual_data();
74
+
75
+ window.scrappy_loaded = true
@@ -0,0 +1,132 @@
1
+ jQuery(function ($) {
2
+ var csrf_token = $('meta[name=csrf-token]').attr('content'),
3
+ csrf_param = $('meta[name=csrf-param]').attr('content');
4
+
5
+ $.fn.extend({
6
+ /**
7
+ * Triggers a custom event on an element and returns the event result
8
+ * this is used to get around not being able to ensure callbacks are placed
9
+ * at the end of the chain.
10
+ *
11
+ * TODO: deprecate with jQuery 1.4.2 release, in favor of subscribing to our
12
+ * own events and placing ourselves at the end of the chain.
13
+ */
14
+ triggerAndReturn: function (name, data) {
15
+ var event = new $.Event(name);
16
+ this.trigger(event, data);
17
+
18
+ return event.result !== false;
19
+ },
20
+
21
+ /**
22
+ * Handles execution of remote calls firing overridable events along the way
23
+ */
24
+ callRemote: function () {
25
+ var el = this,
26
+ method = el.attr('method') || el.attr('data-method') || 'GET',
27
+ url = el.attr('action') || el.attr('href'),
28
+ dataType = el.attr('data-type') || 'script';
29
+
30
+ if (url === undefined) {
31
+ throw "No URL specified for remote call (action or href must be present).";
32
+ } else {
33
+ if (el.triggerAndReturn('ajax:before')) {
34
+ var data = el.is('form') ? el.serializeArray() : [];
35
+ $.ajax({
36
+ url: url,
37
+ data: data,
38
+ dataType: dataType,
39
+ type: method.toUpperCase(),
40
+ beforeSend: function (xhr) {
41
+ el.trigger('ajax:loading', xhr);
42
+ },
43
+ success: function (data, status, xhr) {
44
+ el.trigger('ajax:success', [data, status, xhr]);
45
+ },
46
+ complete: function (xhr) {
47
+ el.trigger('ajax:complete', xhr);
48
+ },
49
+ error: function (xhr, status, error) {
50
+ el.trigger('ajax:failure', [xhr, status, error]);
51
+ }
52
+ });
53
+ }
54
+
55
+ el.trigger('ajax:after');
56
+ }
57
+ }
58
+ });
59
+
60
+ /**
61
+ * confirmation handler
62
+ */
63
+ $('a[data-confirm],input[data-confirm]').live('click', function () {
64
+ var el = $(this);
65
+ if (el.triggerAndReturn('confirm')) {
66
+ if (!confirm(el.attr('data-confirm'))) {
67
+ return false;
68
+ }
69
+ }
70
+ });
71
+
72
+
73
+ /**
74
+ * remote handlers
75
+ */
76
+ $('form[data-remote]').live('submit', function (e) {
77
+ $(this).callRemote();
78
+ e.preventDefault();
79
+ });
80
+
81
+ $('a[data-remote],input[data-remote]').live('click', function (e) {
82
+ $(this).callRemote();
83
+ e.preventDefault();
84
+ });
85
+
86
+ $('a[data-method]:not([data-remote])').live('click', function (e){
87
+ var link = $(this),
88
+ href = link.attr('href'),
89
+ method = link.attr('data-method'),
90
+ form = $('<form method="post" action="'+href+'"></form>'),
91
+ metadata_input = '<input name="_method" value="'+method+'" type="hidden" />';
92
+
93
+ if (csrf_param != null && csrf_token != null) {
94
+ metadata_input += '<input name="'+csrf_param+'" value="'+csrf_token+'" type="hidden" />';
95
+ }
96
+
97
+ form.hide()
98
+ .append(metadata_input)
99
+ .appendTo('body');
100
+
101
+ e.preventDefault();
102
+ form.submit();
103
+ });
104
+
105
+ /**
106
+ * disable-with handlers
107
+ */
108
+ var disable_with_input_selector = 'input[data-disable-with]';
109
+ var disable_with_form_remote_selector = 'form[data-remote]:has(' + disable_with_input_selector + ')';
110
+ var disable_with_form_not_remote_selector = 'form:not([data-remote]):has(' + disable_with_input_selector + ')';
111
+
112
+ var disable_with_input_function = function () {
113
+ $(this).find(disable_with_input_selector).each(function () {
114
+ var input = $(this);
115
+ input.data('enable-with', input.val())
116
+ .attr('value', input.attr('data-disable-with'))
117
+ .attr('disabled', 'disabled');
118
+ });
119
+ };
120
+
121
+ $(disable_with_form_remote_selector).live('ajax:before', disable_with_input_function);
122
+ $(disable_with_form_not_remote_selector).live('submit', disable_with_input_function);
123
+
124
+ $(disable_with_form_remote_selector).live('ajax:complete', function () {
125
+ $(this).find(disable_with_input_selector).each(function () {
126
+ var input = $(this);
127
+ input.removeAttr('disabled')
128
+ .val(input.data('enable-with'));
129
+ });
130
+ });
131
+
132
+ });
@@ -18,7 +18,7 @@ a:hover, a:active {
18
18
  text-decoration: underline;
19
19
  }
20
20
  h1, h2, h3, h4 {
21
- color: #777;
21
+ color: #888;
22
22
  }
23
23
  h2 {
24
24
  font-weight: normal;
@@ -73,14 +73,19 @@ img {
73
73
  #bar ul.right {
74
74
  float: right;
75
75
  }
76
- #column {
77
- float:left;
76
+ #notice {
78
77
  margin-top: 20px;
79
- height: auto;
80
- width: 200px;
81
- height: 600px;
82
- border-right: 1px solid #ddd;
83
- overflow-y: auto;
78
+ margin-left: auto;
79
+ margin-right: auto;
80
+ padding: 15px;
81
+ width: 800px;
82
+ color: #333;
83
+ font-size: 14px;
84
+ text-align: center;
85
+ background: -webkit-gradient(linear, left top, left bottom, from(#af9), to(#dfd));
86
+ background: -moz-linear-gradient(top, #af9, #dfd);
87
+ border-radius: 10px;
88
+ -moz-border-radius: 10px;
84
89
  }
85
90
  #body {
86
91
  margin: auto; width: 800px; padding: 15px;
@@ -112,7 +117,7 @@ img {
112
117
  margin-right: 5px;
113
118
  }
114
119
  #footer {
115
- margin-top:30px; text-align: center; font-size:14px; color: #555;
120
+ margin-top:30px; text-align: center; font-size:12px; color: #555;
116
121
  height: 50px;
117
122
  }
118
123
 
@@ -142,6 +147,7 @@ ul.detail {
142
147
  list-style-type: none;
143
148
  }
144
149
  ul.detail li {
150
+ font-size: 12px;
145
151
  padding: 6px;
146
152
  background-color: #eee;
147
153
  margin: 1px;
@@ -149,19 +155,28 @@ ul.detail li {
149
155
  ul.detail li span {
150
156
  display: inline-block;
151
157
  }
152
- ul.detail li span.name {
158
+ ul.detail li span.action {
159
+ font-size: 10px;
160
+ width: 10px;
161
+ }
162
+ ul.detail li span.name, ul.detail li span.short_name {
153
163
  width: 550px;
154
164
  overflow-x: hidden;
165
+ white-space: nowrap;
155
166
  font-family: monospace;
156
- font-size: 12px;
167
+ }
168
+ ul.detail li span.short_name {
169
+ width: 420px;
157
170
  }
158
171
  ul.detail li span.format {
159
172
  float: right;
160
- font-size: 12px;
161
173
  font-weight: bold;
162
174
  margin-left: 10px;
163
175
  text-align: center;
164
176
  }
177
+ ul.detail li span.date {
178
+ float: right;
179
+ }
165
180
 
166
181
  ul.detail li span.format a:hover,
167
182
  ul.detail li span.format a:link,
@@ -169,3 +184,15 @@ ul.detail li span.format a:visited,
169
184
  ul.detail li span.format a:active {
170
185
  color: #900;
171
186
  }
187
+ ul.detail li span.action a:hover,
188
+ ul.detail li span.action a:link,
189
+ ul.detail li span.action a:visited,
190
+ ul.detail li span.action a:active {
191
+ color: #900;
192
+ }
193
+ ul.detail li span.date a:hover,
194
+ ul.detail li span.date a:link,
195
+ ul.detail li span.date a:visited,
196
+ ul.detail li span.date a:active {
197
+ color: #888;
198
+ }
data/scrappy.gemspec CHANGED
@@ -2,50 +2,51 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.3.0"
5
+ s.version = "0.3.1"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-03-11}
9
+ s.date = %q{2011-03-18}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
13
13
  s.executables = ["scrappy"]
14
- s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/repository.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb"]
15
- s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "kb/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/extractor.rb", "lib/scrappy/agent/formats.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/repository.rb", "lib/scrappy/selectors/base_uri.rb", "lib/scrappy/selectors/css.rb", "lib/scrappy/selectors/new_uri.rb", "lib/scrappy/selectors/root.rb", "lib/scrappy/selectors/section.rb", "lib/scrappy/selectors/slice.rb", "lib/scrappy/selectors/uri.rb", "lib/scrappy/selectors/uri_pattern.rb", "lib/scrappy/selectors/xpath.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/scrappy.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/help.haml", "views/home.haml", "views/kb.haml", "views/layout.haml", "scrappy.gemspec"]
14
+ s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "lib/scrappy/trainer/trainer.rb"]
15
+ s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "kb/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "lib/scrappy/trainer/trainer.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/annotator.js", "public/javascripts/remote.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/extractors.haml", "views/help.haml", "views/home.haml", "views/layout.haml", "views/patterns.haml", "views/samples.haml", "scrappy.gemspec"]
16
16
  s.homepage = %q{http://github.com/josei/scrappy}
17
- s.post_install_message = %q{**(Optional) Remember to install rbwebkitgtk for visual parsing features**}
18
17
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Scrappy", "--main", "README.rdoc"]
19
18
  s.require_paths = ["lib"]
20
19
  s.rubyforge_project = %q{scrappy}
21
- s.rubygems_version = %q{1.3.7}
20
+ s.rubygems_version = %q{1.3.6}
22
21
  s.summary = %q{Web scraper that allows producing RDF data out of plain web pages}
23
- s.test_files = ["test/test_helper.rb", "test/test_scrappy.rb"]
22
+ s.test_files = ["test/test_scrappy.rb", "test/test_helper.rb"]
24
23
 
25
24
  if s.respond_to? :specification_version then
26
25
  current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
27
26
  s.specification_version = 3
28
27
 
29
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
28
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
30
29
  s.add_runtime_dependency(%q<activesupport>, [">= 2.3.5"])
31
30
  s.add_runtime_dependency(%q<sinatra>, [">= 1.1.2"])
32
31
  s.add_runtime_dependency(%q<thin>, [">= 1.2.7"])
33
32
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
34
33
  s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
35
- s.add_runtime_dependency(%q<lightrdf>, [">= 0.2.1"])
34
+ s.add_runtime_dependency(%q<lightrdf>, [">= 0.3.0"])
36
35
  s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
37
36
  s.add_runtime_dependency(%q<rest-client>, [">= 1.6.1"])
38
37
  s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
38
+ s.add_runtime_dependency(%q<rack-flash>, [">= 0.1.1"])
39
39
  else
40
40
  s.add_dependency(%q<activesupport>, [">= 2.3.5"])
41
41
  s.add_dependency(%q<sinatra>, [">= 1.1.2"])
42
42
  s.add_dependency(%q<thin>, [">= 1.2.7"])
43
43
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
44
44
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
45
- s.add_dependency(%q<lightrdf>, [">= 0.2.1"])
45
+ s.add_dependency(%q<lightrdf>, [">= 0.3.0"])
46
46
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
47
47
  s.add_dependency(%q<rest-client>, [">= 1.6.1"])
48
48
  s.add_dependency(%q<haml>, [">= 3.0.24"])
49
+ s.add_dependency(%q<rack-flash>, [">= 0.1.1"])
49
50
  end
50
51
  else
51
52
  s.add_dependency(%q<activesupport>, [">= 2.3.5"])
@@ -53,9 +54,10 @@ Gem::Specification.new do |s|
53
54
  s.add_dependency(%q<thin>, [">= 1.2.7"])
54
55
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
55
56
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
56
- s.add_dependency(%q<lightrdf>, [">= 0.2.1"])
57
+ s.add_dependency(%q<lightrdf>, [">= 0.3.0"])
57
58
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
58
59
  s.add_dependency(%q<rest-client>, [">= 1.6.1"])
59
60
  s.add_dependency(%q<haml>, [">= 3.0.24"])
61
+ s.add_dependency(%q<rack-flash>, [">= 0.1.1"])
60
62
  end
61
63
  end
@@ -0,0 +1,24 @@
1
+ #body
2
+ %h1 Extractors
3
+ %p
4
+ Extractors are mappings between HTML pages and RDF data. They are used to extract RDF data from plain web pages.
5
+ %p
6
+ -if @uris.empty?
7
+ Currently, there are no extractors.
8
+ -else
9
+ %ul.detail
10
+ -@uris.each do |uri|
11
+ %li
12
+ -if Scrappy::App.editable_kb?
13
+ %span.action
14
+ %a{:href=>"#{settings.base_uri}/extractors/#{CGI::escape(uri)}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the extractor for #{uri}?"}
15
+ X
16
+ %span.name
17
+ -if !uri.include?('*')
18
+ %a{:href=>uri}=uri
19
+ -else
20
+ =uri
21
+ -if !uri.include?('*')
22
+ -[['RDF', :rdf], ['JSON', :ejson], ['YARF', :yarf], ['nTriples', :ntriples], ['PNG', :png]].reverse.each do |format, format_code|
23
+ %span.format
24
+ %a{:href=>"#{settings.base_uri}/#{format_code}/#{uri}"}=format
data/views/layout.haml CHANGED
@@ -3,6 +3,8 @@
3
3
  %head
4
4
  %title Scrappy
5
5
  %link{:type=>"text/css", :href=>"#{settings.base_uri}/stylesheets/application.css", :rel=>"stylesheet"}
6
+ %script{:src=>"https://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js"}
7
+ %script{:src=>"#{settings.base_uri}/javascripts/remote.js"}
6
8
  %body
7
9
  #bar
8
10
  -if request.fullpath!='/'
@@ -12,11 +14,19 @@
12
14
  %img{:src=>"#{settings.base_uri}/images/logo_tiny.png", :alt=>"Scrappy"}
13
15
  %ul.right
14
16
  %li
15
- %a{:href=>"#{settings.base_uri}/kb"} Knowledge base
17
+ %a{:href=>"#{settings.base_uri}/extractors"} Extractors
18
+ %li
19
+ %a{:href=>"#{settings.base_uri}/patterns"} Patterns
20
+ %li
21
+ %a{:href=>"#{settings.base_uri}/samples"} Samples
16
22
  %li
17
23
  %a{:href=>"#{settings.base_uri}/help"} Help
24
+ -if flash[:notice]
25
+ #notice=flash[:notice]
18
26
  =yield
19
27
  #footer
20
- %a{:href=>"#{settings.base_uri}/"} Home
21
- |
22
- %a{:href=>'http://github.com/josei/scrappy'} About
28
+ %p
29
+ %a{:href=>"#{settings.base_uri}/"} Home
30
+ |
31
+ %a{:href=>'http://github.com/josei/scrappy'} About
32
+ %p==Scrappy v#{Scrappy::VERSION}
@@ -0,0 +1,19 @@
1
+ #body
2
+ %h1 Patterns
3
+ %p
4
+ Patterns are visual conditions that are used to identify data in sites which do not have a defined extractor.
5
+ %p
6
+ -if @uris.empty?
7
+ Currently, there are no patterns.
8
+ -else
9
+ %ul.detail
10
+ -@uris.each do |uri|
11
+ %li
12
+ %span.action
13
+ %a{:href=>"#{settings.base_uri}/patterns/#{CGI::escape(uri)}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the pattern for #{uri}?"}
14
+ X
15
+ %span.name
16
+ -if !uri.include?('*')
17
+ %a{:href=>uri}=uri
18
+ -else
19
+ =uri