scrappy 0.1.23 → 0.1.24

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,9 @@
1
+ === 0.1.24 2011-03-08
2
+
3
+ * Using RDF::NodeProxy from lightRDF 0.2
4
+ * Added references (-r option) on constructed URIs
5
+ * Minor corrections
6
+
1
7
  === 0.1.23 2011-03-03
2
8
 
3
9
  * Prettier web interface
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Echoe.new('scrappy', Scrappy::VERSION) do |p|
11
11
  p.email = "joseignacio.fernandez@gmail.com"
12
12
  p.install_message = '**(Optional) Remember to install rbwebkitgtk for visual parsing features**'
13
13
  p.ignore_pattern = ["pkg/*"]
14
- p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1.9'], ['i18n', '>= 0.4.2'], ['haml', '>= 3.0.24']]
14
+ p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.2.0'], ['i18n', '>= 0.4.2'], ['haml', '>= 3.0.24']]
15
15
  end
16
16
 
17
17
  Rake::RDocTask.new(:rdoc) do |rdoc|
data/bin/scrappy CHANGED
@@ -29,7 +29,7 @@ module Scrappy
29
29
  args = ARGV.map { |arg| arg.split(" ") }.flatten
30
30
 
31
31
  OptionParser.new do |opts|
32
- opts.on('-V', '--version') { output_version; exit 0 }
32
+ opts.on('-v', '--version') { output_version; exit 0 }
33
33
  opts.on('-h', '--help') { output_help; exit 0 }
34
34
  opts.on('-g URL', '--get URL') { |url| Options.url = url; Options.http_method=:get }
35
35
  opts.on('-p URL', '--post URL') { |url| Options.url = url; Options.http_method=:post }
@@ -42,7 +42,7 @@ module Scrappy
42
42
  opts.on('-c C', '--concurrence C') { |c| Agent::Options.workers = c.to_i }
43
43
  opts.on('-d D', '--delay D') { |d| Agent::Options.delay = d; Agent::Options.workers = 1 }
44
44
  opts.on('-l L', '--levels L') { |l| Agent::Options.depth = l.to_i }
45
- opts.on('-v', '--visual') { Agent::Options.agent = :visual; Agent::Options.workers = 1 }
45
+ opts.on('-V', '--visual') { Agent::Options.agent = :visual; Agent::Options.workers = 1 }
46
46
  opts.on('-r', '--reference') { Agent::Options.referenceable = :minimum }
47
47
  opts.on('-R', '--reference-all') { Agent::Options.referenceable = :dump }
48
48
  opts.on('-w', '--window') { Agent::Options.window = true }
@@ -85,8 +85,8 @@ module Scrappy
85
85
  Scrappy is a tool to scrape semantic data out of the unstructured web
86
86
 
87
87
  Examples
88
- This command retrieves Google web page
89
- scrappy -g http://www.google.com
88
+ This command retrieves a web page
89
+ scrappy -g http://www.example.com
90
90
 
91
91
  Usage
92
92
  scrappy [options]
@@ -95,7 +95,7 @@ Usage
95
95
 
96
96
  Options
97
97
  -h, --help Displays help message
98
- -V, --version Display the version, then exit
98
+ -v, --version Display the version, then exit
99
99
  -f, --format Picks output format (json, ejson, rdfxml, ntriples, png)
100
100
  -g, --get URL Gets requested URL
101
101
  -p, --post URL Posts requested URL
@@ -108,7 +108,7 @@ Options
108
108
  -s, --server [ROOT] Runs web server (optionally specify server's root url)
109
109
  -S, --proxy-server Runs web proxy
110
110
  -P, --port PORT Selects port number (default is 3434)
111
- -v, --visual Uses visual agent (slow)
111
+ -V, --visual Uses visual agent (slow)
112
112
  -r, --reference Outputs referenceable data
113
113
  -R, --reference-all Outputs all HTML referenceable data
114
114
  -w, --window Shows browser window (requires -v)
@@ -7,12 +7,12 @@ module Scrappy
7
7
  print "Extracting #{uri}..."; $stdout.flush
8
8
  end
9
9
 
10
+ @selector_pool ||= {}
10
11
  triples = []
11
12
  content = Nokogiri::HTML(html, nil, 'utf-8')
12
13
 
13
14
  uri_selectors = (kb.find(nil, Node('rdf:type'), Node('sc:UriSelector')) + kb.find(nil, Node('rdf:type'), Node('sc:UriPatternSelector'))).flatten.select do |uri_selector|
14
- class_name = uri_selector.rdf::type.first.to_s.split('#').last
15
- results = Kernel.const_get(class_name).filter uri_selector, {:content=>content, :uri=>uri}
15
+ results = selector_pool(uri_selector).filter :content=>content, :uri=>uri
16
16
  !results.empty?
17
17
  end
18
18
 
@@ -41,9 +41,19 @@ module Scrappy
41
41
  # Generate triples
42
42
  docs.each do |doc|
43
43
  # Build URIs if identifier present
44
- nodes = fragment.sc::identifier.map { |s| filter s, doc }.flatten.map{ |d| Node(parse_uri(uri, d[:value])) }
44
+ nodes = fragment.sc::identifier.map { |s| filter s, doc }.flatten.map do |d|
45
+ node = Node(parse_uri(uri, d[:value]))
46
+ if options[:referenceable]
47
+ # Include the fragment where the URI was built from
48
+ uri_node = Node(nil)
49
+ options[:triples] << [ node, Node("sc:uri"), uri_node ]
50
+ options[:triples] << [ uri_node, Node("rdf:value"), node.to_s ]
51
+ options[:triples] << [ uri_node, Node("sc:source"), Node(node_hash(d[:uri], d[:content].path)) ]
52
+ end
53
+ node
54
+ end
45
55
  nodes << Node(nil) if nodes.empty?
46
-
56
+
47
57
  nodes.each do |node|
48
58
  # Build the object
49
59
  object = if fragment.sc::type.include?(Node('rdf:Literal'))
@@ -82,9 +92,6 @@ module Scrappy
82
92
  end
83
93
 
84
94
  def filter selector, doc
85
- # From "BaseUriSelector" to "base_uri"
86
- class_name = selector.rdf::type.first.to_s.split('#').last
87
-
88
95
  if !selector.sc::debug.empty? and options.debug
89
96
  puts '== DEBUG'
90
97
  puts '== Selector:'
@@ -96,7 +103,7 @@ module Scrappy
96
103
  end
97
104
 
98
105
  # Process selector
99
- results = Kernel.const_get(class_name).filter selector, doc
106
+ results = selector_pool(selector).filter doc
100
107
 
101
108
  if !selector.sc::debug.empty? and options.debug
102
109
  puts "== No results" if results.empty?
@@ -128,7 +135,7 @@ module Scrappy
128
135
  end
129
136
 
130
137
  def add_referenceable_data content, triples, referenceable
131
- resources = triples.map{|s,p,o| [[s],[o]]}.flatten
138
+ resources = {}; triples.each { |s,p,o| resources[o] = true }
132
139
 
133
140
  fragment = Node(node_hash(uri, '/'))
134
141
  selector = Node(nil)
@@ -136,7 +143,7 @@ module Scrappy
136
143
 
137
144
  selector.rdf::type = Node('sc:UnivocalSelector')
138
145
  selector.sc::path = '/'
139
- selector.sc::uri = uri
146
+ selector.sc::document = uri
140
147
 
141
148
  fragment.sc::selector = selector
142
149
 
@@ -144,15 +151,15 @@ module Scrappy
144
151
 
145
152
  content.search('*').each do |node|
146
153
  fragment = Node(node_hash(uri, node.path))
147
-
148
- if referenceable == :dump or resources.include?(fragment)
154
+
155
+ if referenceable == :dump or resources[fragment]
149
156
  selector = Node(nil)
150
157
  presentation = Node(nil)
151
158
 
152
159
  selector.rdf::type = Node('sc:UnivocalSelector')
153
160
  selector.sc::path = node.path.to_s
154
161
  selector.sc::tag = node.name.to_s
155
- selector.sc::uri = uri
162
+ selector.sc::document = uri
156
163
 
157
164
  presentation.sc::x = node[:vx].to_s if node[:vx]
158
165
  presentation.sc::y = node[:vy].to_s if node[:vy]
@@ -175,7 +182,11 @@ module Scrappy
175
182
 
176
183
  def node_hash uri, path
177
184
  digest = Digest::MD5.hexdigest("#{uri} #{path}")
178
- "_:bnode#{digest}"
185
+ :"_:bnode#{digest}"
186
+ end
187
+
188
+ def selector_pool selector
189
+ @selector_pool[selector.id] ||= kb.node(selector)
179
190
  end
180
191
  end
181
192
  end
@@ -63,14 +63,17 @@ module Scrappy
63
63
  var i=0;
64
64
  for(var i=0; i<items.length; i++) {
65
65
  var item = items[i];
66
- item.setAttribute('vx', item.offsetLeft)
67
- item.setAttribute('vy', item.offsetTop)
68
- item.setAttribute('vw', item.offsetWidth)
69
- item.setAttribute('vh', item.offsetHeight)
70
- item.setAttribute('vsize', document.defaultView.getComputedStyle(item, null).getPropertyValue('font-size'))
71
- item.setAttribute('vweight', document.defaultView.getComputedStyle(item, null).getPropertyValue('font-weight'))
72
- item.setAttribute('vcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('color'))
73
- item.setAttribute('vbcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('background-color'))
66
+ item.setAttribute('vx', item.offsetLeft);
67
+ item.setAttribute('vy', item.offsetTop);
68
+ item.setAttribute('vw', item.offsetWidth);
69
+ item.setAttribute('vh', item.offsetHeight);
70
+ item.setAttribute('vsize', document.defaultView.getComputedStyle(item, null).getPropertyValue('font-size'));
71
+ var weight = document.defaultView.getComputedStyle(item, null).getPropertyValue('font-weight');
72
+ if (weight == 'normal') weight = 400;
73
+ if (weight == 'bold') weight = 700;
74
+ item.setAttribute('vweight', weight);
75
+ item.setAttribute('vcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('color'));
76
+ item.setAttribute('vbcolor', document.defaultView.getComputedStyle(item, null).getPropertyValue('background-color'));
74
77
  }"""
75
78
  end
76
79
 
@@ -1,5 +1,9 @@
1
- module BaseUriSelector
2
- def self.filter selector, doc
3
- [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:uri] } ]
1
+ module Sc
2
+ class BaseUriSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:uri] } ]
7
+ end
4
8
  end
5
9
  end
@@ -1,6 +1,10 @@
1
- module CssSelector
2
- def self.filter selector, doc
3
- # By using Nokogiri, CSS and XPath use the same search method
4
- XPathSelector.filter selector, doc
1
+ module Sc
2
+ class CssSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ # By using Nokogiri, CSS and XPath use the same search method
7
+ Sc::XPathSelector.new(node).filter doc
8
+ end
5
9
  end
6
10
  end
@@ -1,23 +1,35 @@
1
- module NewUriSelector
2
- def self.filter selector, doc
3
- contents = if selector.sc::attribute.first
4
- # Select node's attribute if given
5
- selector.sc::attribute.map { |attribute| doc[:content][attribute] }
6
- else
7
- [ doc[:content].text ]
8
- end
9
-
10
- @@indexes ||= Hash.new(0)
11
- prefix = selector.sc::prefix.first.to_s
12
- prefix = (prefix =~ /\Ahttp/ ? URI::parse(doc[:uri]).merge(prefix).to_s : "#{doc[:uri]}#{prefix}")
13
- suffix = selector.sc::suffix.first.to_s
14
-
15
- contents.map do |content|
16
- variable = selector.sc::sequence.first.to_s=="true" ? (@@indexes[selector] += 1) : content.wikify
1
+ module Sc
2
+ class NewUriSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ contents = if sc::attribute.first
7
+ # Select node's attribute if given
8
+ sc::attribute.map { |attribute| doc[:content][attribute] }
9
+ else
10
+ [ doc[:value] ]
11
+ end
17
12
 
18
- new_uri = "#{prefix}#{variable}#{suffix}"
13
+ @indexes ||= Hash.new(0)
14
+ prefix = sc::prefix.first.to_s
15
+ prefix = (prefix =~ /\Ahttp/ ? URI::parse(doc[:uri]).merge(prefix).to_s : "#{doc[:uri]}#{prefix}")
16
+ suffix = sc::suffix.first.to_s
19
17
 
20
- { :uri=>new_uri, :content=>doc[:content], :value=>new_uri }
18
+ contents.map do |content|
19
+ variable = if sc::sequence.first.to_s=="true"
20
+ @indexes[prefix] += 1
21
+ else
22
+ if sc::downcase.first.to_s=="true"
23
+ content.to_s.underscore
24
+ else
25
+ content.to_s.wikify
26
+ end
27
+ end
28
+
29
+ new_uri = "#{prefix}#{variable}#{suffix}"
30
+
31
+ { :uri=>new_uri, :content=>doc[:content], :value=>new_uri }
32
+ end
21
33
  end
22
34
  end
23
35
  end
@@ -1,12 +1,15 @@
1
- module RootSelector
2
- extend Scrappy::Formats
1
+ module Sc
2
+ class RootSelector
3
+ include RDF::NodeProxy
4
+ include Scrappy::Formats
3
5
 
4
- def self.filter selector, doc
5
- if selector.sc::attribute.first
6
- # Select node's attribute if given
7
- selector.sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content][attribute] } }
8
- else
9
- [ { :uri=>doc[:uri], :content=>doc[:content], :value=>format(doc[:content], selector.sc::format, doc[:uri]) } ]
6
+ def filter doc
7
+ if sc::attribute.first
8
+ # Select node's attribute if given
9
+ sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content][attribute] } }
10
+ else
11
+ [ { :uri=>doc[:uri], :content=>doc[:content], :value=>format(doc[:content], sc::format, doc[:uri]) } ]
12
+ end
10
13
  end
11
14
  end
12
- end
15
+ end
@@ -1,14 +1,17 @@
1
- module SectionSelector
2
- extend Scrappy::Formats
3
-
4
- def self.filter selector, doc
5
- selector.rdf::value.map do |pattern|
6
- doc[:content].search('h1, h2, h3, h4, h5, h6, h7, h8, h9, h10').select { |n| n.parent.name!='script' and n.text.downcase.strip == pattern }.map do |node|
7
- found = false
8
- content = node.parent.children[node.parent.children.index(node)+1..-1].select { |n| found ||= (n.name==node.name or n.name=='div'); !found }
9
-
10
- [ { :uri=>doc[:uri], :content=>content, :value=>content.map{|t| format(t, selector.sc::format, doc[:uri])}.select{|t| t.to_s.strip!=''}*"\n\n" } ]
11
- end
12
- end.flatten
1
+ module Sc
2
+ class SectionSelector
3
+ include RDF::NodeProxy
4
+ include Scrappy::Formats
5
+
6
+ def filter doc
7
+ rdf::value.map do |pattern|
8
+ doc[:content].search('h1, h2, h3, h4, h5, h6, h7, h8, h9, h10').select { |n| n.parent.name!='script' and n.text.downcase.strip == pattern }.map do |node|
9
+ found = false
10
+ content = node.parent.children[node.parent.children.index(node)+1..-1].select { |n| found ||= (n.name==node.name or n.name=='div'); !found }
11
+
12
+ [ { :uri=>doc[:uri], :content=>content, :value=>content.map{|t| format(t, sc::format, doc[:uri])}.select{|t| t.to_s.strip!=''}*"\n\n" } ]
13
+ end
14
+ end.flatten
15
+ end
13
16
  end
14
17
  end
@@ -1,8 +1,14 @@
1
- module SliceSelector
2
- def self.filter selector, doc
3
- selector.rdf::value.map do |separator|
4
- slices = doc[:value].split(separator)
5
- selector.sc::index.map { |index| { :uri=>doc[:uri], :content=>doc[:content], :value=>slices[index.to_i].to_s.strip} }
6
- end.flatten
1
+ module Sc
2
+ class SliceSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ rdf::value.map do |separator|
7
+ slices = doc[:value].split(separator)
8
+ sc::index.map { |index| slices[index.to_i].to_s.strip }.
9
+ select { |value| value != "" }.
10
+ map { |value| { :uri=>doc[:uri], :content=>doc[:content], :value=>value} }
11
+ end.flatten
12
+ end
7
13
  end
8
- end
14
+ end
@@ -1,10 +1,14 @@
1
- module UriSelector
2
- def self.filter selector, doc
3
- # Check if the UriSelector has this URI as value (without params: ?param1=value1&param2=value2)
4
- if selector.rdf::value.include?(doc[:uri].match(/\A([^\?]*)(\?.*\Z)?/).captures.first)
5
- [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content].text } ]
6
- else
7
- []
1
+ module Sc
2
+ class UriSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ # Check if the UriSelector has this URI as value (without params: ?param1=value1&param2=value2)
7
+ if rdf::value.include?(doc[:uri].match(/\A([^\?]*)(\?.*\Z)?/).captures.first)
8
+ [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content].text } ]
9
+ else
10
+ []
11
+ end
8
12
  end
9
13
  end
10
14
  end
@@ -1,10 +1,14 @@
1
- module UriPatternSelector
2
- def self.filter selector, doc
3
- # Check if the uri fits the pattern
4
- if selector.rdf::value.any? { |v| doc[:uri] =~ /\A#{v.gsub('.','\.').gsub('*', '.+')}\Z/ }
5
- [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content].text } ]
6
- else
7
- []
1
+ module Sc
2
+ class UriPatternSelector
3
+ include RDF::NodeProxy
4
+
5
+ def filter doc
6
+ # Check if the uri fits the pattern
7
+ if rdf::value.any? { |v| doc[:uri] =~ /\A#{v.gsub('.','\.').gsub('*', '.+')}\Z/ }
8
+ [ { :uri=>doc[:uri], :content=>doc[:content], :value=>doc[:content].text } ]
9
+ else
10
+ []
11
+ end
8
12
  end
9
13
  end
10
- end
14
+ end
@@ -1,23 +1,26 @@
1
- module XPathSelector
2
- extend Scrappy::Formats
3
-
4
- def self.filter selector, doc
5
- selector.rdf::value.map do |pattern|
6
- interval = if selector.sc::index.first
7
- (selector.sc::index.first.to_i..selector.sc::index.first.to_i)
8
- else
9
- (0..-1)
10
- end
11
- patterns = selector.sc::keyword
12
- (doc[:content].search(pattern)[interval] || []).select { |node| patterns.any? ? patterns.include?(node.text.downcase.strip) : true }.map do |result|
13
- if selector.sc::attribute.first
14
- # Select node's attribute if given
15
- selector.sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>result, :value=>result[attribute] } }
1
+ module Sc
2
+ class XPathSelector
3
+ include RDF::NodeProxy
4
+ include Scrappy::Formats
5
+
6
+ def filter doc
7
+ rdf::value.map do |pattern|
8
+ interval = if sc::index.first
9
+ (sc::index.first.to_i..sc::index.first.to_i)
16
10
  else
17
- # Select node
18
- [ { :uri=>doc[:uri], :content=>result, :value=>format(result, selector.sc::format, doc[:uri]) } ]
11
+ (0..-1)
19
12
  end
20
- end
21
- end.flatten
13
+ patterns = sc::keyword
14
+ (doc[:content].search(pattern)[interval] || []).select { |node| patterns.any? ? patterns.include?(node.text.downcase.strip) : true }.map do |result|
15
+ if sc::attribute.first
16
+ # Select node's attribute if given
17
+ sc::attribute.map { |attribute| { :uri=>doc[:uri], :content=>result, :value=>result[attribute] } }
18
+ else
19
+ # Select node
20
+ [ { :uri=>doc[:uri], :content=>result, :value=>format(result, sc::format, doc[:uri]) } ]
21
+ end
22
+ end
23
+ end.flatten
24
+ end
22
25
  end
23
26
  end
@@ -1,24 +1,32 @@
1
1
  body {
2
2
  font-family: Arial, sans;
3
3
  }
4
- .center {
4
+ #center {
5
5
  text-align: center;
6
6
  margin-top: 100px;
7
+ margin-bottom: 100px;
7
8
  }
8
- .search {
9
+ #search {
9
10
  margin-top: 40px;
10
- margin-bottom: 100px;
11
11
  font-size:20px;
12
+ margin-bottom: 10px;
12
13
  }
13
- .search input {
14
- width: 400px; height:30px; font-size:16px;
14
+ #search input {
15
+ width: 700px; height:30px; font-size:16px;
15
16
  }
16
- .search select {
17
- width: 80px; height: 30px; font-size:16px;
17
+ #buttons {
18
+ width: 400px;
19
+ margin: auto;
18
20
  }
19
- .search button {
20
- width: 80px; height: 30px; font-size:16px;
21
+ #buttons select {
22
+ width: 100px; height: 30px; font-size:16px;
23
+ margin-left: 5px;
21
24
  }
25
+ #buttons button {
26
+ width: 100px; height: 30px; font-size:16px;
27
+ margin-right: 5px;
28
+ }
29
+
22
30
  pre {
23
31
  width: 600px;
24
32
  margin-left: auto;
@@ -37,4 +45,7 @@ pre {
37
45
  }
38
46
  #footer {
39
47
  margin-top:30px; text-align: center; font-size:14px; color: #555;
48
+ }
49
+ img {
50
+ border: none;
40
51
  }
@@ -2,10 +2,10 @@
2
2
  %html
3
3
  %head
4
4
  %title Help - Scrappy
5
- %link{:type=>"text/css", :href=>"/stylesheets/application.css", :rel=>"stylesheet"}
5
+ %link{:type=>"text/css", :href=>"#{settings.base_uri}/stylesheets/application.css", :rel=>"stylesheet"}
6
6
  %body
7
7
  %div#header
8
- %img{:src=>'/images/logo_small.png'}
8
+ %img{:src=>"#{settings.base_uri}/images/logo_small.png"}
9
9
  %div#body
10
10
  %h1 Help
11
11
  %p
@@ -15,11 +15,11 @@
15
15
  %pre http://[host]/[format]/[url]
16
16
  %p
17
17
  For example, to retrieve http://example.com/~user/?test=1 with RDF serialization:
18
- %pre http://localhost:3434/rdf/http://example.com/~user/%3Ftest%3D1
18
+ %pre==#{settings.base_uri || "http://localhost:#{settings.port}"}/rdf/http://example.com/~user/%3Ftest%3D1
19
19
  %p Available serialization formats are rdf, rdfxml, png, yarf, ntriples, turtle, json, jsonrdf, and ejson.
20
20
  %div#footer
21
- %a{:href=>"/"} Home
21
+ %a{:href=>"#{settings.base_uri}/"} Home
22
22
  |
23
- %a{:href=>"/help"} Help
23
+ %a{:href=>"#{settings.base_uri}/help"} Help
24
24
  |
25
25
  %a{:href=>'http://github.com/josei/scrappy'} About
@@ -2,24 +2,25 @@
2
2
  %html
3
3
  %head
4
4
  %title Scrappy
5
- %link{:type=>"text/css", :href=>"/stylesheets/application.css", :rel=>"stylesheet"}
5
+ %link{:type=>"text/css", :href=>"#{settings.base_uri}/stylesheets/application.css", :rel=>"stylesheet"}
6
6
  %body
7
- %div.center
7
+ %div#center
8
8
  %a{:href=>'http://github.com/josei/scrappy'}
9
- %img{:src=>'/images/logo.png'}
10
- %form.search
11
- %div
9
+ %img{:src=>"#{settings.base_uri}/images/logo.png"}
10
+ %form
11
+ %div#search
12
+ %input{:name=>:uri, :size=>30, :type=>:text, :value=>'Enter URI...', :onclick=>"if(this.value=='Enter URI...') this.value='';"}
13
+ %div#buttons
14
+ %button Scrape
12
15
  %select{:name=>:format}
13
16
  %option{:value=>:rdf} RDF
14
17
  %option{:value=>:png} PNG
15
18
  %option{:value=>:ejson} JSON
16
19
  %option{:value=>:yarf} YARF
17
20
  %option{:value=>:ntriples} nTriples
18
- %input{:name=>:uri, :size=>30, :type=>:text, :value=>'Enter URI...', :onclick=>"if(this.value=='Enter URI...') this.value='';"}
19
- %button Scrape
20
21
  %div#footer
21
- %a{:href=>"/"} Home
22
+ %a{:href=>"#{settings.base_uri}/"} Home
22
23
  |
23
- %a{:href=>"/help"} Help
24
+ %a{:href=>"#{settings.base_uri}/help"} Help
24
25
  |
25
26
  %a{:href=>'http://github.com/josei/scrappy'} About
@@ -22,4 +22,11 @@ class String
22
22
  def wikify
23
23
  gsub(/^[a-z]|\s+[a-z]/) { |a| a.upcase }.gsub(/\s/, '')
24
24
  end
25
+ def underscore
26
+ self.gsub(/::/, '/').
27
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
28
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
29
+ tr("-", "_").
30
+ downcase
31
+ end
25
32
  end
data/lib/scrappy.rb CHANGED
@@ -21,7 +21,7 @@ require 'scrappy/agent/agent'
21
21
  Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
22
22
 
23
23
  module Scrappy
24
- VERSION = '0.1.23'
24
+ VERSION = '0.1.24'
25
25
  end
26
26
 
27
27
  # Require selectors
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.1.23"
5
+ s.version = "0.1.24"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-03-03}
9
+ s.date = %q{2011-03-08}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
@@ -32,7 +32,7 @@ Gem::Specification.new do |s|
32
32
  s.add_runtime_dependency(%q<thin>, [">= 1.2.7"])
33
33
  s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.1"])
34
34
  s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
35
- s.add_runtime_dependency(%q<lightrdf>, [">= 0.1.9"])
35
+ s.add_runtime_dependency(%q<lightrdf>, [">= 0.2.0"])
36
36
  s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
37
37
  s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
38
38
  else
@@ -41,7 +41,7 @@ Gem::Specification.new do |s|
41
41
  s.add_dependency(%q<thin>, [">= 1.2.7"])
42
42
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
43
43
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
44
- s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
44
+ s.add_dependency(%q<lightrdf>, [">= 0.2.0"])
45
45
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
46
46
  s.add_dependency(%q<haml>, [">= 3.0.24"])
47
47
  end
@@ -51,7 +51,7 @@ Gem::Specification.new do |s|
51
51
  s.add_dependency(%q<thin>, [">= 1.2.7"])
52
52
  s.add_dependency(%q<nokogiri>, [">= 1.4.1"])
53
53
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
54
- s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
54
+ s.add_dependency(%q<lightrdf>, [">= 0.2.0"])
55
55
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
56
56
  s.add_dependency(%q<haml>, [">= 3.0.24"])
57
57
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 23
9
- version: 0.1.23
8
+ - 24
9
+ version: 0.1.24
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-03-03 00:00:00 +01:00
17
+ date: 2011-03-08 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -96,9 +96,9 @@ dependencies:
96
96
  - !ruby/object:Gem::Version
97
97
  segments:
98
98
  - 0
99
- - 1
100
- - 9
101
- version: 0.1.9
99
+ - 2
100
+ - 0
101
+ version: 0.2.0
102
102
  type: :runtime
103
103
  version_requirements: *id006
104
104
  - !ruby/object:Gem::Dependency