scrappy 0.1.19 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.1.20 2011-02-25
2
+
3
+ * Added missing dependency (haml)
4
+ * Support for more predicates in NewUriSelector
5
+
1
6
  === 0.1.19 2011-02-24
2
7
 
3
8
  * Using Sinatra (in production mode) instead of Camping
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Echoe.new('scrappy', Scrappy::VERSION) do |p|
11
11
  p.email = "joseignacio.fernandez@gmail.com"
12
12
  p.install_message = '**(Optional) Remember to install rbwebkitgtk for visual parsing features**'
13
13
  p.ignore_pattern = ["pkg/*"]
14
- p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1.9'], ['i18n', '>= 0.4.2']]
14
+ p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1.9'], ['i18n', '>= 0.4.2'], ['haml', '>= 3.0.24']]
15
15
  end
16
16
 
17
17
  Rake::RDocTask.new(:rdoc) do |rdoc|
data/lib/scrappy.rb CHANGED
@@ -21,7 +21,7 @@ require 'scrappy/agent/agent'
21
21
  Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
22
22
 
23
23
  module Scrappy
24
- VERSION = '0.1.19'
24
+ VERSION = '0.1.20'
25
25
  end
26
26
 
27
27
  # Require selectors
@@ -7,8 +7,16 @@ module NewUriSelector
7
7
  [ doc[:content].text ]
8
8
  end
9
9
 
10
+ @@indexes ||= Hash.new(0)
11
+ prefix = selector.sc::prefix.first.to_s
12
+ prefix = (prefix =~ /\Ahttp/ ? URI::parse(doc[:uri]).merge(prefix).to_s : "#{doc[:uri]}#{prefix}")
13
+ suffix = selector.sc::suffix.first.to_s
14
+
10
15
  contents.map do |content|
11
- new_uri = selector.sc::prefix.first.to_s + content.wikify + selector.sc::suffix.first.to_s
16
+ variable = selector.sc::sequence.first.to_s=="true" ? (@@indexes[selector] += 1) : content.wikify
17
+
18
+ new_uri = "#{prefix}#{variable}#{suffix}"
19
+
12
20
  { :uri=>new_uri, :content=>doc[:content], :value=>new_uri }
13
21
  end
14
22
  end
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.1.19"
5
+ s.version = "0.1.20"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-02-24}
9
+ s.date = %q{2011-02-25}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
@@ -34,6 +34,7 @@ Gem::Specification.new do |s|
34
34
  s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
35
35
  s.add_runtime_dependency(%q<lightrdf>, [">= 0.1.9"])
36
36
  s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
37
+ s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
37
38
  else
38
39
  s.add_dependency(%q<activesupport>, [">= 2.3.5"])
39
40
  s.add_dependency(%q<sinatra>, [">= 1.1.2"])
@@ -42,6 +43,7 @@ Gem::Specification.new do |s|
42
43
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
43
44
  s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
44
45
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
46
+ s.add_dependency(%q<haml>, [">= 3.0.24"])
45
47
  end
46
48
  else
47
49
  s.add_dependency(%q<activesupport>, [">= 2.3.5"])
@@ -51,5 +53,6 @@ Gem::Specification.new do |s|
51
53
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
52
54
  s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
53
55
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
56
+ s.add_dependency(%q<haml>, [">= 3.0.24"])
54
57
  end
55
58
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 19
9
- version: 0.1.19
8
+ - 20
9
+ version: 0.1.20
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-24 00:00:00 +01:00
17
+ date: 2011-02-25 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -115,6 +115,20 @@ dependencies:
115
115
  version: 0.4.2
116
116
  type: :runtime
117
117
  version_requirements: *id007
118
+ - !ruby/object:Gem::Dependency
119
+ name: haml
120
+ prerelease: false
121
+ requirement: &id008 !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ segments:
126
+ - 3
127
+ - 0
128
+ - 24
129
+ version: 3.0.24
130
+ type: :runtime
131
+ version_requirements: *id008
118
132
  description: RDF web scraper
119
133
  email: joseignacio.fernandez@gmail.com
120
134
  executables: