scrappy 0.4.2 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest +2 -0
- data/lib/scrappy.rb +1 -1
- data/public/javascripts/utils.js +10 -0
- data/scrappy.gemspec +3 -3
- data/views/test.haml +73 -0
- metadata +5 -3
data/History.txt
CHANGED
data/Manifest
CHANGED
@@ -37,6 +37,7 @@ public/images/logo.png
|
|
37
37
|
public/images/logo_tiny.png
|
38
38
|
public/javascripts/annotator.js
|
39
39
|
public/javascripts/remote.js
|
40
|
+
public/javascripts/utils.js
|
40
41
|
public/stylesheets/application.css
|
41
42
|
test/test_helper.rb
|
42
43
|
test/test_scrappy.rb
|
@@ -46,3 +47,4 @@ views/home.haml
|
|
46
47
|
views/layout.haml
|
47
48
|
views/patterns.haml
|
48
49
|
views/samples.haml
|
50
|
+
views/test.haml
|
data/lib/scrappy.rb
CHANGED
@@ -0,0 +1,10 @@
|
|
1
|
+
jQuery(function ($) {
|
2
|
+
$('.checkall').click(function () {
|
3
|
+
$(this).parents('form').find(':checkbox').attr('checked', this.checked);
|
4
|
+
});
|
5
|
+
|
6
|
+
$('.checksend').live('click', function (e){
|
7
|
+
$("form").attr("action",$(this).attr("href")).submit();
|
8
|
+
return false;
|
9
|
+
});
|
10
|
+
});
|
data/scrappy.gemspec
CHANGED
@@ -2,17 +2,17 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{scrappy}
|
5
|
-
s.version = "0.4.
|
5
|
+
s.version = "0.4.3"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Jose Ignacio"]
|
9
|
-
s.date = %q{2011-07-
|
9
|
+
s.date = %q{2011-07-11}
|
10
10
|
s.default_executable = %q{scrappy}
|
11
11
|
s.description = %q{RDF web scraper}
|
12
12
|
s.email = %q{joseignacio.fernandez@gmail.com}
|
13
13
|
s.executables = ["scrappy"]
|
14
14
|
s.extra_rdoc_files = ["README.rdoc", "bin/scrappy", "extractors/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/learning/optimizer.rb", "lib/scrappy/learning/trainer.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb"]
|
15
|
-
s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "extractors/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/learning/optimizer.rb", "lib/scrappy/learning/trainer.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/annotator.js", "public/javascripts/remote.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/extractors.haml", "views/help.haml", "views/home.haml", "views/layout.haml", "views/patterns.haml", "views/samples.haml", "scrappy.gemspec"]
|
15
|
+
s.files = ["History.txt", "Manifest", "README.rdoc", "Rakefile", "bin/scrappy", "extractors/elmundo.yarf", "lib/scrappy.rb", "lib/scrappy/agent/agent.rb", "lib/scrappy/agent/blind_agent.rb", "lib/scrappy/agent/cache.rb", "lib/scrappy/agent/dumper.rb", "lib/scrappy/agent/map_reduce.rb", "lib/scrappy/extractor/extractor.rb", "lib/scrappy/extractor/formats.rb", "lib/scrappy/extractor/fragment.rb", "lib/scrappy/extractor/selector.rb", "lib/scrappy/extractor/selectors/base_uri.rb", "lib/scrappy/extractor/selectors/css.rb", "lib/scrappy/extractor/selectors/new_uri.rb", "lib/scrappy/extractor/selectors/root.rb", "lib/scrappy/extractor/selectors/section.rb", "lib/scrappy/extractor/selectors/slice.rb", "lib/scrappy/extractor/selectors/uri.rb", "lib/scrappy/extractor/selectors/uri_pattern.rb", "lib/scrappy/extractor/selectors/visual.rb", "lib/scrappy/extractor/selectors/xpath.rb", "lib/scrappy/learning/optimizer.rb", "lib/scrappy/learning/trainer.rb", "lib/scrappy/repository.rb", "lib/scrappy/server/admin.rb", "lib/scrappy/server/errors.rb", "lib/scrappy/server/helpers.rb", "lib/scrappy/server/server.rb", "lib/scrappy/support.rb", "public/favicon.ico", "public/images/logo.png", "public/images/logo_tiny.png", "public/javascripts/annotator.js", "public/javascripts/remote.js", "public/javascripts/utils.js", "public/stylesheets/application.css", "test/test_helper.rb", "test/test_scrappy.rb", "views/extractors.haml", "views/help.haml", "views/home.haml", "views/layout.haml", "views/patterns.haml", "views/samples.haml", "views/test.haml", "scrappy.gemspec"]
|
16
16
|
s.homepage = %q{http://github.com/josei/scrappy}
|
17
17
|
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Scrappy", "--main", "README.rdoc"]
|
18
18
|
s.require_paths = ["lib"]
|
data/views/test.haml
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
#body
|
2
|
+
%h1 Testing extraction
|
3
|
+
%h2 Results
|
4
|
+
%p
|
5
|
+
-(@results.keys-[:total]).each do |label|
|
6
|
+
==#{label}:
|
7
|
+
%ul.detail
|
8
|
+
%li
|
9
|
+
%span.name
|
10
|
+
Precision
|
11
|
+
%span.date
|
12
|
+
=percentage(@results[label][:precision])
|
13
|
+
%li
|
14
|
+
%span.name
|
15
|
+
Recall
|
16
|
+
%span.date
|
17
|
+
=percentage(@results[label][:recall])
|
18
|
+
%li
|
19
|
+
%span.name
|
20
|
+
F-score
|
21
|
+
%span.date
|
22
|
+
=percentage(@results[label][:fscore])
|
23
|
+
Total:
|
24
|
+
%ul.detail
|
25
|
+
%li
|
26
|
+
%span.name
|
27
|
+
Precision
|
28
|
+
%span.date
|
29
|
+
=percentage(@results[:total][:precision])
|
30
|
+
%li
|
31
|
+
%span.name
|
32
|
+
Recall
|
33
|
+
%span.date
|
34
|
+
=percentage(@results[:total][:recall])
|
35
|
+
%li
|
36
|
+
%span.name
|
37
|
+
F-score
|
38
|
+
%span.date
|
39
|
+
=percentage(@results[:total][:fscore])
|
40
|
+
|
41
|
+
%h2 Details
|
42
|
+
%p
|
43
|
+
%ul.detail
|
44
|
+
%li
|
45
|
+
%span.name
|
46
|
+
Triples
|
47
|
+
%span.date
|
48
|
+
=@total
|
49
|
+
%li
|
50
|
+
%span.name
|
51
|
+
Extracted triples
|
52
|
+
%span.date
|
53
|
+
=@extracted
|
54
|
+
%li
|
55
|
+
%span.name
|
56
|
+
Correct triples
|
57
|
+
%span.date
|
58
|
+
=@correct
|
59
|
+
Wrong triples:
|
60
|
+
%pre.wide=escape_html @wrong.to_ntriples
|
61
|
+
Missing triples:
|
62
|
+
%pre.wide=escape_html @missing.to_ntriples
|
63
|
+
%h2
|
64
|
+
Retry?
|
65
|
+
%form{:method=>:post}
|
66
|
+
%p
|
67
|
+
Add here any triples that should be counted as correct ones:
|
68
|
+
%p
|
69
|
+
%textarea{:name=>:output, :rows=>20, :wrap=>:off}=params["output"]
|
70
|
+
-(params[:samples] || []).each do |sample|
|
71
|
+
%input{:name=>"samples[]", :type=>:hidden, :value=>sample}
|
72
|
+
%p
|
73
|
+
%button Retry
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 4
|
8
|
-
-
|
9
|
-
version: 0.4.
|
8
|
+
- 3
|
9
|
+
version: 0.4.3
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Jose Ignacio
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-07-
|
17
|
+
date: 2011-07-11 00:00:00 +02:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -235,6 +235,7 @@ files:
|
|
235
235
|
- public/images/logo_tiny.png
|
236
236
|
- public/javascripts/annotator.js
|
237
237
|
- public/javascripts/remote.js
|
238
|
+
- public/javascripts/utils.js
|
238
239
|
- public/stylesheets/application.css
|
239
240
|
- test/test_helper.rb
|
240
241
|
- test/test_scrappy.rb
|
@@ -244,6 +245,7 @@ files:
|
|
244
245
|
- views/layout.haml
|
245
246
|
- views/patterns.haml
|
246
247
|
- views/samples.haml
|
248
|
+
- views/test.haml
|
247
249
|
- scrappy.gemspec
|
248
250
|
has_rdoc: true
|
249
251
|
homepage: http://github.com/josei/scrappy
|