scrappy 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,8 @@
1
+ === 0.4.2 2011-07-06
2
+
3
+ * Support to sc:nonempty and sc:boolean in selectors
4
+ * Bugfix in NewUriSelector
5
+
1
6
  === 0.4.1 2011-07-05
2
7
 
3
8
  * Bug correction in NewUriSelector
@@ -24,5 +24,5 @@ require 'scrappy/agent/blind_agent'
24
24
  require 'scrappy/agent/agent'
25
25
 
26
26
  module Scrappy
27
- VERSION = '0.4.1'
27
+ VERSION = '0.4.2'
28
28
  end
@@ -64,7 +64,7 @@ module Sc
64
64
 
65
65
  # Build the object -- it can be a node or a literal
66
66
  object = if sc::type.include?(Node('rdf:Literal'))
67
- value = doc[:value].to_s.strip
67
+ value = doc[:value].to_s.gsub("\302\240"," ").strip
68
68
  if options[:referenceable]
69
69
  node.rdf::value = value
70
70
  node.rdf::type += [Node('rdf:Literal')]
@@ -20,6 +20,27 @@ module Sc
20
20
  # Filter method is defined in each subclass
21
21
  results = filter doc
22
22
 
23
+ if sc::boolean.first=="true"
24
+ results = results.map do |r|
25
+ affirmations = ["yes", "true"]
26
+ negations = ["no", "none", "false", "-", "--"]
27
+ no = negations.include?(r[:value].gsub("\302\240"," ").strip.downcase)
28
+ yes = affirmations.include?(r[:value].gsub("\302\240"," ").strip.downcase)
29
+ if no
30
+ value = "false"
31
+ elsif yes
32
+ value = "true"
33
+ else
34
+ value = :remove
35
+ end
36
+ r.merge :value=>value
37
+ end
38
+ results = results.select{ |r| r[:value] != :remove }
39
+ end
40
+ if sc::nonempty.first=="true"
41
+ results = results.select{ |r| r[:value].gsub("\302\240"," ").strip!=""}
42
+ end
43
+
23
44
  if sc::debug.first=="true" and Scrappy::Agent::Options.debug and
24
45
  (Scrappy::Agent::Options.debug_key.nil? or doc[:value].downcase.include?(Scrappy::Agent::Options.debug_key) )
25
46
 
@@ -10,7 +10,9 @@ module Sc
10
10
 
11
11
  @indexes ||= Hash.new(0)
12
12
  prefix = sc::prefix.first.to_s
13
- prefix = (prefix =~ /\Ahttp\:/ or prefix =~ /\Ahttps\:/) ? URI::parse(doc[:uri]).merge(prefix).to_s : "#{doc[:uri]}#{prefix}"
13
+ if !["http://", "https://"].include?(prefix)
14
+ prefix = (prefix =~ /\Ahttp\:/ or prefix =~ /\Ahttps\:/) ? URI::parse(doc[:uri]).merge(prefix).to_s : "#{doc[:uri]}#{prefix}"
15
+ end
14
16
  suffix = sc::suffix.first.to_s
15
17
 
16
18
  nofollow = (sc::follow.first != "true")
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.4.1"
5
+ s.version = "0.4.2"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-07-05}
9
+ s.date = %q{2011-07-06}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 4
8
- - 1
9
- version: 0.4.1
8
+ - 2
9
+ version: 0.4.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-07-05 00:00:00 +02:00
17
+ date: 2011-07-06 00:00:00 +02:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency