scrappy 0.3.5 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -53,8 +53,8 @@ jQuery(document).ready(function(){
53
53
  } else {
54
54
  div = "<div id='scrappy_window' title='Scrappy'>" +
55
55
  "<p>No extractor available for this URL</p>" +
56
- "<p><a href='TODO'>Annotate page</a></p>" +
57
56
  "<p><a class='extractor' href='http://localhost:3434/extractors'>Generate extractor</a></p>" +
57
+ "<p><a class='sample' href='http://localhost:3434/samples'>Upload sample</a></p>" +
58
58
  "</div>";
59
59
  }
60
60
 
@@ -9,6 +9,17 @@ pre {
9
9
  border: 1px solid;
10
10
  padding: 10px;
11
11
  }
12
+ pre.wide {
13
+ width: 770px;
14
+ max-height: 900px;
15
+ margin-left: auto;
16
+ margin-right: auto;
17
+ border: 1px solid;
18
+ font-size: 12px;
19
+ overflow: scroll;
20
+ padding: 10px;
21
+ font-family: monospace;
22
+ }
12
23
  a:link, a:visited {
13
24
  color: #33f;
14
25
  text-decoration: none;
@@ -152,6 +163,9 @@ ul.detail li {
152
163
  background-color: #eee;
153
164
  margin: 1px;
154
165
  }
166
+ ul.detail li.special {
167
+ background-color: #fff;
168
+ }
155
169
  ul.detail li span {
156
170
  display: inline-block;
157
171
  }
@@ -174,6 +188,9 @@ ul.detail li span.format {
174
188
  margin-left: 10px;
175
189
  text-align: center;
176
190
  }
191
+ ul.detail li.special span.format {
192
+ font-weight: normal;
193
+ }
177
194
  ul.detail li span.date {
178
195
  float: right;
179
196
  }
@@ -196,3 +213,19 @@ ul.detail li span.date a:visited,
196
213
  ul.detail li span.date a:active {
197
214
  color: #888;
198
215
  }
216
+
217
+ fieldset {
218
+ border: 0;
219
+ padding: 0;
220
+ margin: 0;
221
+ vertical-align: baseline;
222
+ }
223
+
224
+ textarea {
225
+ width: 100%;
226
+ }
227
+
228
+ span.type {
229
+ float: left;
230
+ width: 80px;
231
+ }
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.3.5"
5
+ s.version = "0.4.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-03-29}
9
+ s.date = %q{2011-06-30}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
@@ -31,7 +31,7 @@ Gem::Specification.new do |s|
31
31
  s.add_runtime_dependency(%q<thin>, [">= 1.2.7"])
32
32
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
33
33
  s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
34
- s.add_runtime_dependency(%q<lightrdf>, [">= 0.3.7"])
34
+ s.add_runtime_dependency(%q<lightrdf>, [">= 0.3.9"])
35
35
  s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
36
36
  s.add_runtime_dependency(%q<rest-client>, [">= 1.6.1"])
37
37
  s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
@@ -42,7 +42,7 @@ Gem::Specification.new do |s|
42
42
  s.add_dependency(%q<thin>, [">= 1.2.7"])
43
43
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
44
44
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
45
- s.add_dependency(%q<lightrdf>, [">= 0.3.7"])
45
+ s.add_dependency(%q<lightrdf>, [">= 0.3.9"])
46
46
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
47
47
  s.add_dependency(%q<rest-client>, [">= 1.6.1"])
48
48
  s.add_dependency(%q<haml>, [">= 3.0.24"])
@@ -54,7 +54,7 @@ Gem::Specification.new do |s|
54
54
  s.add_dependency(%q<thin>, [">= 1.2.7"])
55
55
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
56
56
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
57
- s.add_dependency(%q<lightrdf>, [">= 0.3.7"])
57
+ s.add_dependency(%q<lightrdf>, [">= 0.3.9"])
58
58
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
59
59
  s.add_dependency(%q<rest-client>, [">= 1.6.1"])
60
60
  s.add_dependency(%q<haml>, [">= 3.0.24"])
data/views/help.haml CHANGED
@@ -16,5 +16,4 @@
16
16
  Drag this to your bookmarks:
17
17
  %a.bookmark{:href=>bookmark_js, :onclick=>drag_js} Scrappy
18
18
  %p
19
- Then visit the web page you want to build a extractor for.
20
- Click on your "Scrappy" bookmark and annotate the web page.
19
+ Then visit the web page you want to build a extractor for and click on your "Scrappy" bookmark.
data/views/layout.haml CHANGED
@@ -4,6 +4,7 @@
4
4
  %title Scrappy
5
5
  %link{:type=>"text/css", :href=>"#{settings.base_uri}/stylesheets/application.css", :rel=>"stylesheet"}
6
6
  %script{:src=>"https://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js"}
7
+ %script{:src=>"#{settings.base_uri}/javascripts/utils.js"}
7
8
  %script{:src=>"#{settings.base_uri}/javascripts/remote.js"}
8
9
  %body
9
10
  #bar
data/views/patterns.haml CHANGED
@@ -3,17 +3,22 @@
3
3
  %p
4
4
  Patterns are visual conditions that are used to identify data in sites which do not have a defined extractor.
5
5
  %p
6
- -if @uris.empty?
6
+ -if @patterns.empty?
7
7
  Currently, there are no patterns.
8
8
  -else
9
9
  %ul.detail
10
- -@uris.each do |uri|
10
+ -@patterns.each do |pattern|
11
+ -uri = pattern.sc::type.first.to_s
11
12
  %li
12
13
  %span.action
13
- %a{:href=>"#{settings.base_uri}/patterns/#{CGI::escape(uri)}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the pattern for #{uri}?"}
14
+ %a{:href=>"#{settings.base_uri}/patterns/#{CGI::escape(pattern.to_s)}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete this pattern?"}
14
15
  X
15
16
  %span.name
16
17
  -if !uri.include?('*')
17
- %a{:href=>uri}=uri
18
+ %a{:href=>"#{settings.base_uri}/patterns/#{CGI::escape(pattern.to_s)}"}=uri
18
19
  -else
19
- =uri
20
+ =uri
21
+ %p
22
+ %a{:href=>"#{settings.base_uri}/patterns/visual"} See patterns visually
23
+ |
24
+ %a{:href=>"#{settings.base_uri}/patterns", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete all the patterns?"} Delete all patterns
data/views/samples.haml CHANGED
@@ -3,28 +3,52 @@
3
3
  %p
4
4
  Sample pages are used to build extractors as well as visual patterns that can be applied to retrieve data
5
5
  from other pages.
6
- %p
7
- -if @samples.empty?
8
- Currently, there are no samples.
9
- -else
10
- %ul.detail
11
- -@samples.each_with_index do |sample,i|
12
- %li
6
+ %form{:method=>:post}
7
+ %p
8
+ -if @samples.empty?
9
+ Currently, there are no samples.
10
+ -else
11
+ %ul.detail
12
+ %li.special
13
13
  %span.action
14
- %a{:href=>"#{settings.base_uri}/samples/#{i}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the sample #{sample[:uri]}?"}
15
- X
16
- %span.short_name
17
- -if !sample[:uri].include?('*')
18
- %a{:href=>sample[:uri]}=sample[:uri]
19
- -else
20
- =sample[:uri]
21
- -[['Patterns output', :patterns], ['Extractors output', :extractors]].reverse.each do |text, action|
22
- %span.format
23
- %a{:href=>"#{settings.base_uri}/samples/#{i}/#{action}"}=text
24
14
  %span.format
25
- %a{:href=>"#{settings.base_uri}/samples/#{i}/optimize", :'data-method'=>:post} Optimize
15
+ %input.checkall{:type=>:checkbox}
26
16
  %span.format
27
- %a{:href=>"#{settings.base_uri}/samples/#{i}/train", :'data-method'=>:post} Train
28
- %span.date
29
- %a{:href=>"#{settings.base_uri}/samples/#{i}"}
30
- =sample[:date].strftime("%Y/%m/%d - %H:%M")
17
+ Select all
18
+ -@samples.each_with_index do |sample,i|
19
+ %li
20
+ %span.action
21
+ %a{:href=>"#{settings.base_uri}/samples/#{i}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the sample #{sample[:uri]}?"}
22
+ X
23
+ %span.short_name
24
+ -if !sample[:uri].include?('*')
25
+ %a{:href=>sample[:uri]}=sample[:uri]
26
+ -else
27
+ =sample[:uri]
28
+ %span.format
29
+ %input{:type=>:checkbox, :name=>'samples[]', :value=>i}
30
+ -[['Patterns', :patterns], ['Extractors', :extractors], ['Annotations', :annotations]].reverse.each do |text, action|
31
+ %span.format
32
+ %a{:href=>"#{settings.base_uri}/samples/#{i}/#{action}"}=text
33
+ %span.format
34
+ %a{:href=>"#{settings.base_uri}/samples/#{i}/raw"} RAW
35
+ %span.date
36
+ %a{:href=>"#{settings.base_uri}/samples/#{i}"}
37
+ =sample[:date].strftime("%Y/%m/%d - %H:%M")
38
+ %p
39
+ %span.type General:
40
+ %a.checksend{:href=>"#{settings.base_uri}/samples/annotate", :title=>'This will store extractors output as the correct samples output'} Annotate
41
+ %p
42
+ %span.type Extractors:
43
+ %a.checksend{:href=>"#{settings.base_uri}/samples/train/extractors", :title=>'This will generate extractors for each of the selected samples'} Train
44
+ |
45
+ %a.checksend{:href=>"#{settings.base_uri}/samples/optimize/extractors", :title=>'This will generalize extractors to improve the performance on the selected samples'} Optimize
46
+ |
47
+ %a.checksend{:href=>"#{settings.base_uri}/samples/test/extractors", :title=>'This will test extractors on the selected samples'} Test
48
+ %p
49
+ %span.type Patterns:
50
+ %a.checksend{:href=>"#{settings.base_uri}/samples/train/patterns", :title=>'This will generate patterns for each of the selected samples'} Train
51
+ |
52
+ %a.checksend{:href=>"#{settings.base_uri}/samples/optimize/patterns", :title=>'This will generalize patterns to improve the performance on the selected samples'} Optimize
53
+ |
54
+ %a.checksend{:href=>"#{settings.base_uri}/samples/test/patterns", :title=>'This will test patterns on the selected samples'} Test
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 3
8
- - 5
9
- version: 0.3.5
7
+ - 4
8
+ - 0
9
+ version: 0.4.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-03-29 00:00:00 +02:00
17
+ date: 2011-06-30 00:00:00 +02:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -97,8 +97,8 @@ dependencies:
97
97
  segments:
98
98
  - 0
99
99
  - 3
100
- - 7
101
- version: 0.3.7
100
+ - 9
101
+ version: 0.3.9
102
102
  type: :runtime
103
103
  version_requirements: *id006
104
104
  - !ruby/object:Gem::Dependency