scrappy 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.1.9 2010-12-16
2
+
3
+ * Corrected bug when filtering URI selectors
4
+ * Added sc:suffix to uri builder
5
+
1
6
  === 0.1.8 2010-12-14
2
7
 
3
8
  * Added sc:sameas
data/lib/scrappy.rb CHANGED
@@ -19,7 +19,7 @@ require 'scrappy/agent/agent'
19
19
  Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
20
20
 
21
21
  module Scrappy
22
- VERSION = '0.1.8'
22
+ VERSION = '0.1.9'
23
23
  end
24
24
 
25
25
  # Require selectors
@@ -6,7 +6,7 @@ module Scrappy
6
6
  triples = []
7
7
  content = Nokogiri::HTML(html, nil, 'utf-8')
8
8
 
9
- uri_selectors = kb.find(nil, Node('rdf:type'), Node('sc:UriSelector')) + kb.find(nil, Node('rdf:type'), Node('sc:UriPatternSelector')).flatten.select do |uri_selector|
9
+ uri_selectors = (kb.find(nil, Node('rdf:type'), Node('sc:UriSelector')) + kb.find(nil, Node('rdf:type'), Node('sc:UriPatternSelector'))).flatten.select do |uri_selector|
10
10
  class_name = uri_selector.rdf::type.first.to_s.split('#').last
11
11
  results = Kernel.const_get(class_name).filter uri_selector, {:content=>content, :uri=>uri}
12
12
  !results.empty?
@@ -8,7 +8,7 @@ module NewUriSelector
8
8
  end
9
9
 
10
10
  contents.map do |content|
11
- new_uri = selector.sc::prefix.to_s + content.wikify
11
+ new_uri = selector.sc::prefix.first.to_s + content.wikify + selector.sc::suffix.first.to_s
12
12
  { :uri=>new_uri, :content=>doc[:content], :value=>new_uri }
13
13
  end
14
14
  end
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.1.8"
5
+ s.version = "0.1.9"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2010-12-14}
9
+ s.date = %q{2010-12-16}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 8
9
- version: 0.1.8
8
+ - 9
9
+ version: 0.1.9
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-14 00:00:00 +01:00
17
+ date: 2010-12-16 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency