scrappy 0.3.5 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,8 +53,8 @@ jQuery(document).ready(function(){
53
53
  } else {
54
54
  div = "<div id='scrappy_window' title='Scrappy'>" +
55
55
  "<p>No extractor available for this URL</p>" +
56
- "<p><a href='TODO'>Annotate page</a></p>" +
57
56
  "<p><a class='extractor' href='http://localhost:3434/extractors'>Generate extractor</a></p>" +
57
+ "<p><a class='sample' href='http://localhost:3434/samples'>Upload sample</a></p>" +
58
58
  "</div>";
59
59
  }
60
60
 
@@ -9,6 +9,17 @@ pre {
9
9
  border: 1px solid;
10
10
  padding: 10px;
11
11
  }
12
+ pre.wide {
13
+ width: 770px;
14
+ max-height: 900px;
15
+ margin-left: auto;
16
+ margin-right: auto;
17
+ border: 1px solid;
18
+ font-size: 12px;
19
+ overflow: scroll;
20
+ padding: 10px;
21
+ font-family: monospace;
22
+ }
12
23
  a:link, a:visited {
13
24
  color: #33f;
14
25
  text-decoration: none;
@@ -152,6 +163,9 @@ ul.detail li {
152
163
  background-color: #eee;
153
164
  margin: 1px;
154
165
  }
166
+ ul.detail li.special {
167
+ background-color: #fff;
168
+ }
155
169
  ul.detail li span {
156
170
  display: inline-block;
157
171
  }
@@ -174,6 +188,9 @@ ul.detail li span.format {
174
188
  margin-left: 10px;
175
189
  text-align: center;
176
190
  }
191
+ ul.detail li.special span.format {
192
+ font-weight: normal;
193
+ }
177
194
  ul.detail li span.date {
178
195
  float: right;
179
196
  }
@@ -196,3 +213,19 @@ ul.detail li span.date a:visited,
196
213
  ul.detail li span.date a:active {
197
214
  color: #888;
198
215
  }
216
+
217
+ fieldset {
218
+ border: 0;
219
+ padding: 0;
220
+ margin: 0;
221
+ vertical-align: baseline;
222
+ }
223
+
224
+ textarea {
225
+ width: 100%;
226
+ }
227
+
228
+ span.type {
229
+ float: left;
230
+ width: 80px;
231
+ }
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.3.5"
5
+ s.version = "0.4.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-03-29}
9
+ s.date = %q{2011-06-30}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
@@ -31,7 +31,7 @@ Gem::Specification.new do |s|
31
31
  s.add_runtime_dependency(%q<thin>, [">= 1.2.7"])
32
32
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
33
33
  s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
34
- s.add_runtime_dependency(%q<lightrdf>, [">= 0.3.7"])
34
+ s.add_runtime_dependency(%q<lightrdf>, [">= 0.3.9"])
35
35
  s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
36
36
  s.add_runtime_dependency(%q<rest-client>, [">= 1.6.1"])
37
37
  s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
@@ -42,7 +42,7 @@ Gem::Specification.new do |s|
42
42
  s.add_dependency(%q<thin>, [">= 1.2.7"])
43
43
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
44
44
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
45
- s.add_dependency(%q<lightrdf>, [">= 0.3.7"])
45
+ s.add_dependency(%q<lightrdf>, [">= 0.3.9"])
46
46
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
47
47
  s.add_dependency(%q<rest-client>, [">= 1.6.1"])
48
48
  s.add_dependency(%q<haml>, [">= 3.0.24"])
@@ -54,7 +54,7 @@ Gem::Specification.new do |s|
54
54
  s.add_dependency(%q<thin>, [">= 1.2.7"])
55
55
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
56
56
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
57
- s.add_dependency(%q<lightrdf>, [">= 0.3.7"])
57
+ s.add_dependency(%q<lightrdf>, [">= 0.3.9"])
58
58
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
59
59
  s.add_dependency(%q<rest-client>, [">= 1.6.1"])
60
60
  s.add_dependency(%q<haml>, [">= 3.0.24"])
data/views/help.haml CHANGED
@@ -16,5 +16,4 @@
16
16
  Drag this to your bookmarks:
17
17
  %a.bookmark{:href=>bookmark_js, :onclick=>drag_js} Scrappy
18
18
  %p
19
- Then visit the web page you want to build a extractor for.
20
- Click on your "Scrappy" bookmark and annotate the web page.
19
+ Then visit the web page you want to build a extractor for and click on your "Scrappy" bookmark.
data/views/layout.haml CHANGED
@@ -4,6 +4,7 @@
4
4
  %title Scrappy
5
5
  %link{:type=>"text/css", :href=>"#{settings.base_uri}/stylesheets/application.css", :rel=>"stylesheet"}
6
6
  %script{:src=>"https://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js"}
7
+ %script{:src=>"#{settings.base_uri}/javascripts/utils.js"}
7
8
  %script{:src=>"#{settings.base_uri}/javascripts/remote.js"}
8
9
  %body
9
10
  #bar
data/views/patterns.haml CHANGED
@@ -3,17 +3,22 @@
3
3
  %p
4
4
  Patterns are visual conditions that are used to identify data in sites which do not have a defined extractor.
5
5
  %p
6
- -if @uris.empty?
6
+ -if @patterns.empty?
7
7
  Currently, there are no patterns.
8
8
  -else
9
9
  %ul.detail
10
- -@uris.each do |uri|
10
+ -@patterns.each do |pattern|
11
+ -uri = pattern.sc::type.first.to_s
11
12
  %li
12
13
  %span.action
13
- %a{:href=>"#{settings.base_uri}/patterns/#{CGI::escape(uri)}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the pattern for #{uri}?"}
14
+ %a{:href=>"#{settings.base_uri}/patterns/#{CGI::escape(pattern.to_s)}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete this pattern?"}
14
15
  X
15
16
  %span.name
16
17
  -if !uri.include?('*')
17
- %a{:href=>uri}=uri
18
+ %a{:href=>"#{settings.base_uri}/patterns/#{CGI::escape(pattern.to_s)}"}=uri
18
19
  -else
19
- =uri
20
+ =uri
21
+ %p
22
+ %a{:href=>"#{settings.base_uri}/patterns/visual"} See patterns visually
23
+ |
24
+ %a{:href=>"#{settings.base_uri}/patterns", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete all the patterns?"} Delete all patterns
data/views/samples.haml CHANGED
@@ -3,28 +3,52 @@
3
3
  %p
4
4
  Sample pages are used to build extractors as well as visual patterns that can be applied to retrieve data
5
5
  from other pages.
6
- %p
7
- -if @samples.empty?
8
- Currently, there are no samples.
9
- -else
10
- %ul.detail
11
- -@samples.each_with_index do |sample,i|
12
- %li
6
+ %form{:method=>:post}
7
+ %p
8
+ -if @samples.empty?
9
+ Currently, there are no samples.
10
+ -else
11
+ %ul.detail
12
+ %li.special
13
13
  %span.action
14
- %a{:href=>"#{settings.base_uri}/samples/#{i}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the sample #{sample[:uri]}?"}
15
- X
16
- %span.short_name
17
- -if !sample[:uri].include?('*')
18
- %a{:href=>sample[:uri]}=sample[:uri]
19
- -else
20
- =sample[:uri]
21
- -[['Patterns output', :patterns], ['Extractors output', :extractors]].reverse.each do |text, action|
22
- %span.format
23
- %a{:href=>"#{settings.base_uri}/samples/#{i}/#{action}"}=text
24
14
  %span.format
25
- %a{:href=>"#{settings.base_uri}/samples/#{i}/optimize", :'data-method'=>:post} Optimize
15
+ %input.checkall{:type=>:checkbox}
26
16
  %span.format
27
- %a{:href=>"#{settings.base_uri}/samples/#{i}/train", :'data-method'=>:post} Train
28
- %span.date
29
- %a{:href=>"#{settings.base_uri}/samples/#{i}"}
30
- =sample[:date].strftime("%Y/%m/%d - %H:%M")
17
+ Select all
18
+ -@samples.each_with_index do |sample,i|
19
+ %li
20
+ %span.action
21
+ %a{:href=>"#{settings.base_uri}/samples/#{i}", :'data-method'=>:delete, :'data-confirm'=>"Are you sure you want to delete the sample #{sample[:uri]}?"}
22
+ X
23
+ %span.short_name
24
+ -if !sample[:uri].include?('*')
25
+ %a{:href=>sample[:uri]}=sample[:uri]
26
+ -else
27
+ =sample[:uri]
28
+ %span.format
29
+ %input{:type=>:checkbox, :name=>'samples[]', :value=>i}
30
+ -[['Patterns', :patterns], ['Extractors', :extractors], ['Annotations', :annotations]].reverse.each do |text, action|
31
+ %span.format
32
+ %a{:href=>"#{settings.base_uri}/samples/#{i}/#{action}"}=text
33
+ %span.format
34
+ %a{:href=>"#{settings.base_uri}/samples/#{i}/raw"} RAW
35
+ %span.date
36
+ %a{:href=>"#{settings.base_uri}/samples/#{i}"}
37
+ =sample[:date].strftime("%Y/%m/%d - %H:%M")
38
+ %p
39
+ %span.type General:
40
+ %a.checksend{:href=>"#{settings.base_uri}/samples/annotate", :title=>'This will store extractors output as the correct samples output'} Annotate
41
+ %p
42
+ %span.type Extractors:
43
+ %a.checksend{:href=>"#{settings.base_uri}/samples/train/extractors", :title=>'This will generate extractors for each of the selected samples'} Train
44
+ |
45
+ %a.checksend{:href=>"#{settings.base_uri}/samples/optimize/extractors", :title=>'This will generalize extractors to improve the performance on the selected samples'} Optimize
46
+ |
47
+ %a.checksend{:href=>"#{settings.base_uri}/samples/test/extractors", :title=>'This will test extractors on the selected samples'} Test
48
+ %p
49
+ %span.type Patterns:
50
+ %a.checksend{:href=>"#{settings.base_uri}/samples/train/patterns", :title=>'This will generate patterns for each of the selected samples'} Train
51
+ |
52
+ %a.checksend{:href=>"#{settings.base_uri}/samples/optimize/patterns", :title=>'This will generalize patterns to improve the performance on the selected samples'} Optimize
53
+ |
54
+ %a.checksend{:href=>"#{settings.base_uri}/samples/test/patterns", :title=>'This will test patterns on the selected samples'} Test
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 3
8
- - 5
9
- version: 0.3.5
7
+ - 4
8
+ - 0
9
+ version: 0.4.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-03-29 00:00:00 +02:00
17
+ date: 2011-06-30 00:00:00 +02:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -97,8 +97,8 @@ dependencies:
97
97
  segments:
98
98
  - 0
99
99
  - 3
100
- - 7
101
- version: 0.3.7
100
+ - 9
101
+ version: 0.3.9
102
102
  type: :runtime
103
103
  version_requirements: *id006
104
104
  - !ruby/object:Gem::Dependency