scrappy 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.1.9 2010-12-16
2
+
3
+ * Corrected bug when filtering URI selectors
4
+ * Added sc:suffix to uri builder
5
+
1
6
  === 0.1.8 2010-12-14
2
7
 
3
8
  * Added sc:sameas
data/lib/scrappy.rb CHANGED
@@ -19,7 +19,7 @@ require 'scrappy/agent/agent'
19
19
  Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
20
20
 
21
21
  module Scrappy
22
- VERSION = '0.1.8'
22
+ VERSION = '0.1.9'
23
23
  end
24
24
 
25
25
  # Require selectors
@@ -6,7 +6,7 @@ module Scrappy
6
6
  triples = []
7
7
  content = Nokogiri::HTML(html, nil, 'utf-8')
8
8
 
9
- uri_selectors = kb.find(nil, Node('rdf:type'), Node('sc:UriSelector')) + kb.find(nil, Node('rdf:type'), Node('sc:UriPatternSelector')).flatten.select do |uri_selector|
9
+ uri_selectors = (kb.find(nil, Node('rdf:type'), Node('sc:UriSelector')) + kb.find(nil, Node('rdf:type'), Node('sc:UriPatternSelector'))).flatten.select do |uri_selector|
10
10
  class_name = uri_selector.rdf::type.first.to_s.split('#').last
11
11
  results = Kernel.const_get(class_name).filter uri_selector, {:content=>content, :uri=>uri}
12
12
  !results.empty?
@@ -8,7 +8,7 @@ module NewUriSelector
8
8
  end
9
9
 
10
10
  contents.map do |content|
11
- new_uri = selector.sc::prefix.to_s + content.wikify
11
+ new_uri = selector.sc::prefix.first.to_s + content.wikify + selector.sc::suffix.first.to_s
12
12
  { :uri=>new_uri, :content=>doc[:content], :value=>new_uri }
13
13
  end
14
14
  end
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.1.8"
5
+ s.version = "0.1.9"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2010-12-14}
9
+ s.date = %q{2010-12-16}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 8
9
- version: 0.1.8
8
+ - 9
9
+ version: 0.1.9
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-14 00:00:00 +01:00
17
+ date: 2010-12-16 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency