scrappy 0.4.7 → 0.4.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,8 @@
1
+ === 0.4.8 2011-11-24
2
+
3
+ * Encoding fixes
4
+ * Added sc:normalize_max and sc:normalize_min support
5
+
1
6
  === 0.4.7 2011-11-24
2
7
 
3
8
  * Bug fix in Ruby 1.9
@@ -8,6 +8,7 @@ require 'ostruct'
8
8
  require 'active_support'
9
9
  require 'tmpdir'
10
10
  require 'lightrdf'
11
+ require 'iconv'
11
12
 
12
13
  Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
13
14
 
@@ -24,5 +25,5 @@ require 'scrappy/agent/blind_agent'
24
25
  require 'scrappy/agent/agent'
25
26
 
26
27
  module Scrappy
27
- VERSION = '0.4.7'
28
+ VERSION = '0.4.8'
28
29
  end
@@ -65,7 +65,8 @@ module Sc
65
65
 
66
66
  # Build the object -- it can be a node or a literal
67
67
  object = if sc::type.include?(Node('rdf:Literal'))
68
- value = doc[:value].to_s.gsub("\302\240"," ").strip
68
+ ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
69
+ value = ic.iconv(doc[:value].to_s + ' ')[0..-2].gsub("\302\240"," ").strip
69
70
  if options[:referenceable]
70
71
  node.rdf::value = value
71
72
  node.rdf::type += [Node('rdf:Literal')]
@@ -1,3 +1,4 @@
1
+ # -*- encoding: utf-8 -*-
1
2
  module Sc
2
3
  class Selector
3
4
  include RDF::NodeProxy
@@ -37,6 +38,11 @@ module Sc
37
38
  end
38
39
  results = results.select{ |r| r[:value] != :remove }
39
40
  end
41
+ if sc::normalize_max.first
42
+ max = sc::normalize_max.first.to_f
43
+ min = sc::normalize_min.first.to_f
44
+ results.each { |r| r[:value] = ((r[:value].to_f-min) / (max-min)).to_s }
45
+ end
40
46
  if sc::nonempty.first=="true"
41
47
  results = results.select{ |r| r[:value].gsub("\302\240"," ").strip!=""}
42
48
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "scrappy"
5
- s.version = "0.4.7"
5
+ s.version = "0.4.8"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrappy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.7
4
+ version: 0.4.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2011-11-24 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activesupport
16
- requirement: &81718490 !ruby/object:Gem::Requirement
16
+ requirement: &82230020 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 2.3.5
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *81718490
24
+ version_requirements: *82230020
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: sinatra
27
- requirement: &81718210 !ruby/object:Gem::Requirement
27
+ requirement: &82229760 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.1.2
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *81718210
35
+ version_requirements: *82229760
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: thin
38
- requirement: &81717960 !ruby/object:Gem::Requirement
38
+ requirement: &82229510 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 1.2.7
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *81717960
46
+ version_requirements: *82229510
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &81717720 !ruby/object:Gem::Requirement
49
+ requirement: &82229270 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 1.4.1
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *81717720
57
+ version_requirements: *82229270
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: mechanize
60
- requirement: &81717430 !ruby/object:Gem::Requirement
60
+ requirement: &82229010 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 1.0.0
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *81717430
68
+ version_requirements: *82229010
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: lightrdf
71
- requirement: &81717180 !ruby/object:Gem::Requirement
71
+ requirement: &82228740 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 0.4.1
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *81717180
79
+ version_requirements: *82228740
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: i18n
82
- requirement: &81716890 !ruby/object:Gem::Requirement
82
+ requirement: &82228500 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: 0.4.2
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *81716890
90
+ version_requirements: *82228500
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: rest-client
93
- requirement: &81716630 !ruby/object:Gem::Requirement
93
+ requirement: &82228260 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: 1.6.1
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *81716630
101
+ version_requirements: *82228260
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: haml
104
- requirement: &81716070 !ruby/object:Gem::Requirement
104
+ requirement: &82227980 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: 3.0.24
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *81716070
112
+ version_requirements: *82227980
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: rack-flash
115
- requirement: &81715820 !ruby/object:Gem::Requirement
115
+ requirement: &82227730 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ! '>='
@@ -120,7 +120,7 @@ dependencies:
120
120
  version: 0.1.1
121
121
  type: :runtime
122
122
  prerelease: false
123
- version_requirements: *81715820
123
+ version_requirements: *82227730
124
124
  description: RDF web scraper
125
125
  email: joseignacio.fernandez@gmail.com
126
126
  executables: