scrappy 0.1.23 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ === 0.1.24 2011-03-08
2
+
3
+ * Using RDF::NodeProxy from lightRDF 0.2
4
+ * Added references (-r option) on constructed URIs
5
+ * Minor corrections
6
+
1
7
  === 0.1.23 2011-03-03
2
8
 
3
9
  * Prettier web interface
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Echoe.new('scrappy', Scrappy::VERSION) do |p|
11
11
  p.email = "joseignacio.fernandez@gmail.com"
12
12
  p.install_message = '**(Optional) Remember to install rbwebkitgtk for visual parsing features**'
13
13
  p.ignore_pattern = ["pkg/*"]
14
- p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1.9'], ['i18n', '>= 0.4.2'], ['haml', '>= 3.0.24']]
14
+ p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.2.0'], ['i18n', '>= 0.4.2'], ['haml', '>= 3.0.24']]
15
15
  end
16
16
 
17
17
  Rake::RDocTask.new(:rdoc) do |rdoc|
data/bin/scrappy CHANGED
@@ -29,7 +29,7 @@ module Scrappy
29
29
  args = ARGV.map { |arg| arg.split(" ") }.flatten
30
30
 
31
31
  OptionParser.new do |opts|
32
- opts.on('-V', '--version') { output_version; exit 0 }
32
+ opts.on('-v', '--version') { output_version; exit 0 }
33
33
  opts.on('-h', '--help') { output_help; exit 0 }
34
34
  opts.on('-g URL', '--get URL') { |url| Options.url = url; Options.http_method=:get }
35
35
  opts.on('-p URL', '--post URL') { |url| Options.url = url; Options.http_method=:post }
@@ -42,7 +42,7 @@ module Scrappy
42
42
  opts.on('-c C', '--concurrence C') { |c| Agent::Options.workers = c.to_i }
43
43
  opts.on('-d D', '--delay D') { |d| Agent::Options.delay = d; Agent::Options.workers = 1 }
44
44
  opts.on('-l L', '--levels L') { |l| Agent::Options.depth = l.to_i }
45
- opts.on('-v', '--visual') { Agent::Options.agent = :visual; Agent::Options.workers = 1 }
45
+ opts.on('-V', '--visual') { Agent::Options.agent = :visual; Agent::Options.workers = 1 }
46
46
  opts.on('-r', '--reference') { Agent::Options.referenceable = :minimum }
47
47
  opts.on('-R', '--reference-all') { Agent::Options.referenceable = :dump }
48
48
  opts.on('-w', '--window') { Agent::Options.window = true }
@@ -85,8 +85,8 @@ module Scrappy
85
85
  Scrappy is a tool to scrape semantic data out of the unstructured web
86
86
 
87
87
  Examples
88
- This command retrieves Google web page
89
- scrappy -g http://www.google.com
88
+ This command retrieves a web page
89
+ scrappy -g http://www.example.com
90
90
 
91
91
  Usage
92
92
  scrappy [options]
@@ -95,7 +95,7 @@ Usage
95
95
 
96
96
  Options
97
97
  -h, --help Displays help message
98
- -V, --version Display the version, then exit
98
+ -v, --version Display the version, then exit
99
99
  -f, --format Picks output format (json, ejson, rdfxml, ntriples, png)
100
100
  -g, --get URL Gets requested URL
101
101
  -p, --post URL Posts requested URL
@@ -108,7 +108,7 @@ Options
108
108
  -s, --server [ROOT] Runs web server (optionally specify server's root url)
109
109
  -S, --proxy-server Runs web proxy
110
110
  -P, --port PORT Selects port number (default is 3434)
111
- -v, --visual Uses visual agent (slow)
111
+ -V, --visual Uses visual agent (slow)
112
112
  -r, --reference Outputs referenceable data
113
113
  -R, --reference-all Outputs all HTML referenceable data
114
114
  -w, --window Shows browser window (requires -v)
@@ -7,12 +7,12 @@ module Scrappy
7
7
  print "Extracting #{uri}..."; $stdout.flush
8
8
  end
9
9
 
10
+ @selector_pool ||= {}
10
11
  triples = []
11
12
  content = Nokogiri::HTML(html, nil, 'utf-8')
12
13
 
13
14
  uri_selectors = (kb.find(nil, Node('rdf:type'), Node('sc:UriSelector')) + kb.find(nil, Node('rdf:type'), Node('sc:UriPatternSelector'))).flatten.select do |uri_selector|
14
- class_name = uri_selector.rdf::type.first.to_s.split('#').last
15
- results = Kernel.const_get(class_name).filter uri_selector, {:content=>content, :uri=>uri}
15
+ results = selector_pool(uri_selector).filter :content=>content, :uri=>uri
16
16
  !results.empty?
17
17
  end
18
18
 
@@ -41,9 +41,19 @@ module Scrappy
41
41
  # Generate triples
42
42
  docs.each do |doc|
43
43
  # Build URIs if identifier present
44
- nodes = fragment.sc::identifier.map { |s| filter s, doc }.flatten.map{ |d| Node(parse_uri(uri, d[:value])) }
44
+ nodes = fragment.sc::identifier.map { |s| filter s, doc }.flatten.map do |d|
45
+ node = Node(parse_uri(uri, d[:value]))
46
+ if options[:referenceable]
47
+ # Include the fragment where the URI was built from
48
+ uri_node = Node(nil)
49
+ options[:triples] << [ node, Node("sc:uri"), uri_node ]
50
+ options[:triples] << [ uri_node, Node("rdf:value"), node.to_s ]
51
+ options[:triples] << [ uri_node, Node("sc:source"), Node(node_hash(d[:uri], d[:content].path)) ]
52
+ end
53
+ node
54
+ end
45
55
  nodes << Node(nil) if nodes.empty?
46
-
56
+
47
57
  nodes.each do |node|
48
58
  # Build the object
49
59
  object = if fragment.sc::type.include?(Node('rdf:Literal'))
@@ -82,9 +92,6 @@ module Scrappy
82
92
  end
83
93
 
84
94
  def filter selector, doc
85
- # From "BaseUriSelector" to "base_uri"
86
- class_name = selector.rdf::type.first.to_s.split('#').last
87
-
88
95
  if !selector.sc::debug.empty? and options.debug
89
96
  puts '== DEBUG'
90
97
  puts '== Selector:'
@@ -96,7 +103,7 @@ module Scrappy
96
103
  end
97
104
 
98
105
  # Process selector
99
- results = Kernel.const_get(class_name).filter selector, doc
106
+ results = selector_pool(selector).filter doc
100
107
 
101
108
  if !selector.sc::debug.empty? and options.debug
102
109
  puts "== No results" if results.empty?
@@ -128,7 +135,7 @@ module Scrappy
128
135
  end
129
136
 
130
137
  def add_referenceable_data content, triples, referenceable
131
- resources = triples.map{|s,p,o| [[s],[o]]}.flatten
138
+ resources = {}; triples.each { |s,p,o| resources[o] = true }
132
139
 
133
140
  fragment = Node(node_hash(uri, '/'))
134
141
  selector = Node(nil)
@@ -136,7 +143,7 @@ module Scrappy
136
143
 
137
144
  selector.rdf::type = Node('sc:UnivocalSelector')
138
145
  selector.sc::path = '/'
139
- selector.sc::uri = uri
146
+ selector.sc::document = uri
140
147
 
141
148
  fragment.sc::selector = selector
142
149
 
@@ -144,15 +151,15 @@ module Scrappy
144
151
 
145
152
  content.search('*').each do |node|
146
153
  fragment = Node(node_hash(uri, node.path))
147
-
148
- if referenceable == :dump or resources.include?(fragment)
154
+
155
+ if referenceable == :dump or resources[fragment]
149
156
  selector = Node(nil)
150
157
  presentation = Node(nil)
151
158
 
152
159
  selector.rdf::type = Node('sc:UnivocalSelector')
153
160
  selector.sc::path = node.path.to_s
154
161
  selector.sc::tag = node.name.to_s
155
- selector.sc::uri = uri
162
+ selector.sc::document = uri
156
163
 
157
164
  presentation.sc::x = node[:vx].to_s if node[:vx]
158
165
  presentation.sc::y = node[:vy].to_s if node[:vy]
@@ -175,7 +182,11 @@ module Scrappy
175
182
 
176
183
  def node_hash uri, path
177
184
  digest = Digest::MD5.hexdigest("#{uri} #{path}")
178
- "_:bnode#{digest}"
185
+ :"_:bnode#{digest}"
186
+ end
187
+
188
+ def selector_pool selector
189
+ @selector_pool[selector.id] ||= kb.node(selector)
179
190
  end
180
191
  end
181
192
  end
@@ -63,14 +63,17 @@ module Scrappy
63
63
  var i=0;
64
64
  for(var i=0; i<items.length; i++) {
65
65
  var item = items[i];
66
- item.setAttribute('vx', item.offsetLeft)
67
- item.setAttribute('vy', item.offsetTop)
68
- item.setAttribute('vw', item.offsetWidth)
69
- item.setAttribute('vh', item.offsetHeight)
70
- item.setAttribute('vsize', document.defaultView.getComputedStyle(item, null).getPropertyValue('font-size'))
71
- item.setAttribute('vweight', document.defaultView.getComputedStyle(item, null).getPropertyValue('font-weight'))
72
- item.setAttribute('vcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('color'))
73
- item.setAttribute('vbcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('background-color'))
66
+ item.setAttribute('vx', item.offsetLeft);
67
+ item.setAttribute('vy', item.offsetTop);
68
+ item.setAttribute('vw', item.offsetWidth);
69
+ item.setAttribute('vh', item.offsetHeight);
70
+ item.setAttribute('vsize', document.defaultView.getComputedStyle(item, null).getPropertyValue('font-size'));
71
+ var weight = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-weight');
72
+ if (weight == 'normal') weight = 400;
73
+ if (weight == 'bold') weight = 700;
74
+ item.setAttribute('vweight', weight);
75
+ item.setAttribute('vcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('color'));
76
+ item.setAttribute('vbcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('background-color'));
74
77
  }"""
75
78
  end
76
79
 
@@ -1,5 +1,9 @@
1
- module BaseUriSelector
2
- def self.filter selector, doc
3
- [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:uri] } ]
1
+ module Sc
2
+ class BaseUriSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:uri] } ]
7
+ end
4
8
  end
5
9
  end
@@ -1,6 +1,10 @@
1
- module CssSelector
2
- def self.filter selector, doc
3
- # By using Nokogiri, CSS and XPath use the same search method
4
- XPathSelector.filter selector, doc
1
+ module Sc
2
+ class CssSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ # By using Nokogiri, CSS and XPath use the same search method
7
+ Sc::XPathSelector.new(node).filter doc
8
+ end
5
9
  end
6
10
  end
@@ -1,23 +1,35 @@
1
- module NewUriSelector
2
- def self.filter selector, doc
3
- contents = if selector.sc::attribute.first
4
- # Select node's attribute if given
5
- selector.sc::attribute.map { |attribute| doc[:content][attribute] }
6
- else
7
- [ doc[:content].text ]
8
- end
9
-
10
- @@indexes ||= Hash.new(0)
11
- prefix = selector.sc::prefix.first.to_s
12
- prefix = (prefix =~ /\Ahttp/ ? URI::parse(doc[:uri]).merge(prefix).to_s : "#{doc[:uri]}#{prefix}")
13
- suffix = selector.sc::suffix.first.to_s
14
-
15
- contents.map do |content|
16
- variable = selector.sc::sequence.first.to_s=="true" ? (@@indexes[selector] += 1) : content.wikify
1
+ module Sc
2
+ class NewUriSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ contents = if sc::attribute.first
7
+ # Select node's attribute if given
8
+ sc::attribute.map { |attribute| doc[:content][attribute] }
9
+ else
10
+ [ doc[:value] ]
11
+ end
17
12
 
18
- new_uri = "#{prefix}#{variable}#{suffix}"
13
+ @indexes ||= Hash.new(0)
14
+ prefix = sc::prefix.first.to_s
15
+ prefix = (prefix =~ /\Ahttp/ ? URI::parse(doc[:uri]).merge(prefix).to_s : "#{doc[:uri]}#{prefix}")
16
+ suffix = sc::suffix.first.to_s
19
17
 
20
- { :uri=>new_uri, :content=>doc[:content], :value=>new_uri }
18
+ contents.map do |content|
19
+ variable = if sc::sequence.first.to_s=="true"
20
+ @indexes[prefix] += 1
21
+ else
22
+ if sc::downcase.first.to_s=="true"
23
+ content.to_s.underscore
24
+ else
25
+ content.to_s.wikify
26
+ end
27
+ end
28
+
29
+ new_uri = "#{prefix}#{variable}#{suffix}"
30
+
31
+ { :uri=>new_uri, :content=>doc[:content], :value=>new_uri }
32
+ end
21
33
  end
22
34
  end
23
35
  end
@@ -1,12 +1,15 @@
1
- module RootSelector
2
- extend Scrappy::Formats
1
+ module Sc
2
+ class RootSelector
3
+ include RDF::NodeProxy
4
+ include Scrappy::Formats
3
5
 
4
- def self.filter selector, doc
5
- if selector.sc::attribute.first
6
- # Select node's attribute if given
7
- selector.sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content][attribute] } }
8
- else
9
- [ { :uri=>doc[:uri], :content=>doc[:content], :value=>format(doc[:content], selector.sc::format, doc[:uri]) } ]
6
+ def filter doc
7
+ if sc::attribute.first
8
+ # Select node's attribute if given
9
+ sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content][attribute] } }
10
+ else
11
+ [ { :uri=>doc[:uri], :content=>doc[:content], :value=>format(doc[:content], sc::format, doc[:uri]) } ]
12
+ end
10
13
  end
11
14
  end
12
- end
15
+ end
@@ -1,14 +1,17 @@
1
- module SectionSelector
2
- extend Scrappy::Formats
3
-
4
- def self.filter selector, doc
5
- selector.rdf::value.map do |pattern|
6
- doc[:content].search('h1, h2, h3, h4, h5, h6, h7, h8, h9, h10').select { |n| n.parent.name!='script' and n.text.downcase.strip == pattern }.map do |node|
7
- found = false
8
- content = node.parent.children[node.parent.children.index(node)+1..-1].select { |n| found ||= (n.name==node.name or n.name=='div'); !found }
9
-
10
- [ { :uri=>doc[:uri], :content=>content, :value=>content.map{|t| format(t, selector.sc::format, doc[:uri])}.select{|t| t.to_s.strip!=''}*"\n\n" } ]
11
- end
12
- end.flatten
1
+ module Sc
2
+ class SectionSelector
3
+ include RDF::NodeProxy
4
+ include Scrappy::Formats
5
+
6
+ def filter doc
7
+ rdf::value.map do |pattern|
8
+ doc[:content].search('h1, h2, h3, h4, h5, h6, h7, h8, h9, h10').select { |n| n.parent.name!='script' and n.text.downcase.strip == pattern }.map do |node|
9
+ found = false
10
+ content = node.parent.children[node.parent.children.index(node)+1..-1].select { |n| found ||= (n.name==node.name or n.name=='div'); !found }
11
+
12
+ [ { :uri=>doc[:uri], :content=>content, :value=>content.map{|t| format(t, sc::format, doc[:uri])}.select{|t| t.to_s.strip!=''}*"\n\n" } ]
13
+ end
14
+ end.flatten
15
+ end
13
16
  end
14
17
  end
@@ -1,8 +1,14 @@
1
- module SliceSelector
2
- def self.filter selector, doc
3
- selector.rdf::value.map do |separator|
4
- slices = doc[:value].split(separator)
5
- selector.sc::index.map { |index| { :uri=>doc[:uri], :content=>doc[:content], :value=>slices[index.to_i].to_s.strip} }
6
- end.flatten
1
+ module Sc
2
+ class SliceSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ rdf::value.map do |separator|
7
+ slices = doc[:value].split(separator)
8
+ sc::index.map { |index| slices[index.to_i].to_s.strip }.
9
+ select { |value| value != "" }.
10
+ map { |value| { :uri=>doc[:uri], :content=>doc[:content], :value=>value} }
11
+ end.flatten
12
+ end
7
13
  end
8
- end
14
+ end
@@ -1,10 +1,14 @@
1
- module UriSelector
2
- def self.filter selector, doc
3
- # Check if the UriSelector has this URI as value (without params: ?param1=value1&param2=value2)
4
- if selector.rdf::value.include?(doc[:uri].match(/\A([^\?]*)(\?.*\Z)?/).captures.first)
5
- [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content].text } ]
6
- else
7
- []
1
+ module Sc
2
+ class UriSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ # Check if the UriSelector has this URI as value (without params: ?param1=value1&param2=value2)
7
+ if rdf::value.include?(doc[:uri].match(/\A([^\?]*)(\?.*\Z)?/).captures.first)
8
+ [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content].text } ]
9
+ else
10
+ []
11
+ end
8
12
  end
9
13
  end
10
14
  end
@@ -1,10 +1,14 @@
1
- module UriPatternSelector
2
- def self.filter selector, doc
3
- # Check if the uri fits the pattern
4
- if selector.rdf::value.any? { |v| doc[:uri] =~ /\A#{v.gsub('.','\.').gsub('*', '.+')}\Z/ }
5
- [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content].text } ]
6
- else
7
- []
1
+ module Sc
2
+ class UriPatternSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ # Check if the uri fits the pattern
7
+ if rdf::value.any? { |v| doc[:uri] =~ /\A#{v.gsub('.','\.').gsub('*', '.+')}\Z/ }
8
+ [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content].text } ]
9
+ else
10
+ []
11
+ end
8
12
  end
9
13
  end
10
- end
14
+ end
@@ -1,23 +1,26 @@
1
- module XPathSelector
2
- extend Scrappy::Formats
3
-
4
- def self.filter selector, doc
5
- selector.rdf::value.map do |pattern|
6
- interval = if selector.sc::index.first
7
- (selector.sc::index.first.to_i..selector.sc::index.first.to_i)
8
- else
9
- (0..-1)
10
- end
11
- patterns = selector.sc::keyword
12
- (doc[:content].search(pattern)[interval] || []).select { |node| patterns.any? ? patterns.include?(node.text.downcase.strip) : true }.map do |result|
13
- if selector.sc::attribute.first
14
- # Select node's attribute if given
15
- selector.sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>result, :value=>result[attribute] } }
1
+ module Sc
2
+ class XPathSelector
3
+ include RDF::NodeProxy
4
+ include Scrappy::Formats
5
+
6
+ def filter doc
7
+ rdf::value.map do |pattern|
8
+ interval = if sc::index.first
9
+ (sc::index.first.to_i..sc::index.first.to_i)
16
10
  else
17
- # Select node
18
- [ { :uri=>doc[:uri], :content=>result, :value=>format(result, selector.sc::format, doc[:uri]) } ]
11
+ (0..-1)
19
12
  end
20
- end
21
- end.flatten
13
+ patterns = sc::keyword
14
+ (doc[:content].search(pattern)[interval] || []).select { |node| patterns.any? ? patterns.include?(node.text.downcase.strip) : true }.map do |result|
15
+ if sc::attribute.first
16
+ # Select node's attribute if given
17
+ sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>result, :value=>result[attribute] } }
18
+ else
19
+ # Select node
20
+ [ { :uri=>doc[:uri], :content=>result, :value=>format(result, sc::format, doc[:uri]) } ]
21
+ end
22
+ end
23
+ end.flatten
24
+ end
22
25
  end
23
26
  end
@@ -1,24 +1,32 @@
1
1
  body {
2
2
  font-family: Arial, sans;
3
3
  }
4
- .center {
4
+ #center {
5
5
  text-align: center;
6
6
  margin-top: 100px;
7
+ margin-bottom: 100px;
7
8
  }
8
- .search {
9
+ #search {
9
10
  margin-top: 40px;
10
- margin-bottom: 100px;
11
11
  font-size:20px;
12
+ margin-bottom: 10px;
12
13
  }
13
- .search input {
14
- width: 400px; height:30px; font-size:16px;
14
+ #search input {
15
+ width: 700px; height:30px; font-size:16px;
15
16
  }
16
- .search select {
17
- width: 80px; height: 30px; font-size:16px;
17
+ #buttons {
18
+ width: 400px;
19
+ margin: auto;
18
20
  }
19
- .search button {
20
- width: 80px; height: 30px; font-size:16px;
21
+ #buttons select {
22
+ width: 100px; height: 30px; font-size:16px;
23
+ margin-left: 5px;
21
24
  }
25
+ #buttons button {
26
+ width: 100px; height: 30px; font-size:16px;
27
+ margin-right: 5px;
28
+ }
29
+
22
30
  pre {
23
31
  width: 600px;
24
32
  margin-left: auto;
@@ -37,4 +45,7 @@ pre {
37
45
  }
38
46
  #footer {
39
47
  margin-top:30px; text-align: center; font-size:14px; color: #555;
48
+ }
49
+ img {
50
+ border: none;
40
51
  }
@@ -2,10 +2,10 @@
2
2
  %html
3
3
  %head
4
4
  %title Help - Scrappy
5
- %link{:type=>"text/css", :href=>"/stylesheets/application.css", :rel=>"stylesheet"}
5
+ %link{:type=>"text/css", :href=>"#{settings.base_uri}/stylesheets/application.css", :rel=>"stylesheet"}
6
6
  %body
7
7
  %div#header
8
- %img{:src=>'/images/logo_small.png'}
8
+ %img{:src=>"#{settings.base_uri}/images/logo_small.png"}
9
9
  %div#body
10
10
  %h1 Help
11
11
  %p
@@ -15,11 +15,11 @@
15
15
  %pre http://[host]/[format]/[url]
16
16
  %p
17
17
  For example, to retrieve http://example.com/~user/?test=1 with RDF serialization:
18
- %pre http://localhost:3434/rdf/http://example.com/~user/%3Ftest%3D1
18
+ %pre==#{settings.base_uri || "http://localhost:#{settings.port}"}/rdf/http://example.com/~user/%3Ftest%3D1
19
19
  %p Available serialization formats are rdf, rdfxml, png, yarf, ntriples, turtle, json, jsonrdf, and ejson.
20
20
  %div#footer
21
- %a{:href=>"/"} Home
21
+ %a{:href=>"#{settings.base_uri}/"} Home
22
22
  |
23
- %a{:href=>"/help"} Help
23
+ %a{:href=>"#{settings.base_uri}/help"} Help
24
24
  |
25
25
  %a{:href=>'http://github.com/josei/scrappy'} About
@@ -2,24 +2,25 @@
2
2
  %html
3
3
  %head
4
4
  %title Scrappy
5
- %link{:type=>"text/css", :href=>"/stylesheets/application.css", :rel=>"stylesheet"}
5
+ %link{:type=>"text/css", :href=>"#{settings.base_uri}/stylesheets/application.css", :rel=>"stylesheet"}
6
6
  %body
7
- %div.center
7
+ %div#center
8
8
  %a{:href=>'http://github.com/josei/scrappy'}
9
- %img{:src=>'/images/logo.png'}
10
- %form.search
11
- %div
9
+ %img{:src=>"#{settings.base_uri}/images/logo.png"}
10
+ %form
11
+ %div#search
12
+ %input{:name=>:uri, :size=>30, :type=>:text, :value=>'Enter URI...', :onclick=>"if(this.value=='Enter URI...') this.value='';"}
13
+ %div#buttons
14
+ %button Scrape
12
15
  %select{:name=>:format}
13
16
  %option{:value=>:rdf} RDF
14
17
  %option{:value=>:png} PNG
15
18
  %option{:value=>:ejson} JSON
16
19
  %option{:value=>:yarf} YARF
17
20
  %option{:value=>:ntriples} nTriples
18
- %input{:name=>:uri, :size=>30, :type=>:text, :value=>'Enter URI...', :onclick=>"if(this.value=='Enter URI...') this.value='';"}
19
- %button Scrape
20
21
  %div#footer
21
- %a{:href=>"/"} Home
22
+ %a{:href=>"#{settings.base_uri}/"} Home
22
23
  |
23
- %a{:href=>"/help"} Help
24
+ %a{:href=>"#{settings.base_uri}/help"} Help
24
25
  |
25
26
  %a{:href=>'http://github.com/josei/scrappy'} About
@@ -22,4 +22,11 @@ class String
22
22
  def wikify
23
23
  gsub(/^[a-z]|\s+[a-z]/) { |a| a.upcase }.gsub(/\s/, '')
24
24
  end
25
+ def underscore
26
+ self.gsub(/::/, '/').
27
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
28
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
29
+ tr("-", "_").
30
+ downcase
31
+ end
25
32
  end
data/lib/scrappy.rb CHANGED
@@ -21,7 +21,7 @@ require 'scrappy/agent/agent'
21
21
  Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
22
22
 
23
23
  module Scrappy
24
- VERSION = '0.1.23'
24
+ VERSION = '0.1.24'
25
25
  end
26
26
 
27
27
  # Require selectors
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.1.23"
5
+ s.version = "0.1.24"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-03-03}
9
+ s.date = %q{2011-03-08}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
32
32
  s.add_runtime_dependency(%q<thin>, [">= 1.2.7"])
33
33
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
34
34
  s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
35
- s.add_runtime_dependency(%q<lightrdf>, [">= 0.1.9"])
35
+ s.add_runtime_dependency(%q<lightrdf>, [">= 0.2.0"])
36
36
  s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
37
37
  s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
38
38
  else
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
41
41
  s.add_dependency(%q<thin>, [">= 1.2.7"])
42
42
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
43
43
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
44
- s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
44
+ s.add_dependency(%q<lightrdf>, [">= 0.2.0"])
45
45
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
46
46
  s.add_dependency(%q<haml>, [">= 3.0.24"])
47
47
  end
@@ -51,7 +51,7 @@ Gem::Specification.new do |s|
51
51
  s.add_dependency(%q<thin>, [">= 1.2.7"])
52
52
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
53
53
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
54
- s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
54
+ s.add_dependency(%q<lightrdf>, [">= 0.2.0"])
55
55
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
56
56
  s.add_dependency(%q<haml>, [">= 3.0.24"])
57
57
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 23
9
- version: 0.1.23
8
+ - 24
9
+ version: 0.1.24
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-03-03 00:00:00 +01:00
17
+ date: 2011-03-08 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -96,9 +96,9 @@ dependencies:
96
96
  - !ruby/object:Gem::Version
97
97
  segments:
98
98
  - 0
99
- - 1
100
- - 9
101
- version: 0.1.9
99
+ - 2
100
+ - 0
101
+ version: 0.2.0
102
102
  type: :runtime
103
103
  version_requirements: *id006
104
104
  - !ruby/object:Gem::Dependency