scrappy 0.1.19 → 0.1.20

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.1.20 2011-02-25
2
+
3
+ * Added missing dependency (haml)
4
+ * Support for more predicates in NewUriSelector
5
+
1
6
  === 0.1.19 2011-02-24
2
7
 
3
8
  * Using Sinatra (in production mode) instead of Camping
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Echoe.new('scrappy', Scrappy::VERSION) do |p|
11
11
  p.email = "joseignacio.fernandez@gmail.com"
12
12
  p.install_message = '**(Optional) Remember to install rbwebkitgtk for visual parsing features**'
13
13
  p.ignore_pattern = ["pkg/*"]
14
- p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1.9'], ['i18n', '>= 0.4.2']]
14
+ p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1.9'], ['i18n', '>= 0.4.2'], ['haml', '>= 3.0.24']]
15
15
  end
16
16
 
17
17
  Rake::RDocTask.new(:rdoc) do |rdoc|
data/lib/scrappy.rb CHANGED
@@ -21,7 +21,7 @@ require 'scrappy/agent/agent'
21
21
  Namespace :sc, 'http://lab.gsi.dit.upm.es/scraping.rdf#'
22
22
 
23
23
  module Scrappy
24
- VERSION = '0.1.19'
24
+ VERSION = '0.1.20'
25
25
  end
26
26
 
27
27
  # Require selectors
@@ -7,8 +7,16 @@ module NewUriSelector
7
7
  [ doc[:content].text ]
8
8
  end
9
9
 
10
+ @@indexes ||= Hash.new(0)
11
+ prefix = selector.sc::prefix.first.to_s
12
+ prefix = (prefix =~ /\Ahttp/ ? URI::parse(doc[:uri]).merge(prefix).to_s : "#{doc[:uri]}#{prefix}")
13
+ suffix = selector.sc::suffix.first.to_s
14
+
10
15
  contents.map do |content|
11
- new_uri = selector.sc::prefix.first.to_s + content.wikify + selector.sc::suffix.first.to_s
16
+ variable = selector.sc::sequence.first.to_s=="true" ? (@@indexes[selector] += 1) : content.wikify
17
+
18
+ new_uri = "#{prefix}#{variable}#{suffix}"
19
+
12
20
  { :uri=>new_uri, :content=>doc[:content], :value=>new_uri }
13
21
  end
14
22
  end
data/scrappy.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{scrappy}
5
- s.version = "0.1.19"
5
+ s.version = "0.1.20"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Jose Ignacio"]
9
- s.date = %q{2011-02-24}
9
+ s.date = %q{2011-02-25}
10
10
  s.default_executable = %q{scrappy}
11
11
  s.description = %q{RDF web scraper}
12
12
  s.email = %q{joseignacio.fernandez@gmail.com}
@@ -34,6 +34,7 @@ Gem::Specification.new do |s|
34
34
  s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
35
35
  s.add_runtime_dependency(%q<lightrdf>, [">= 0.1.9"])
36
36
  s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
37
+ s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
37
38
  else
38
39
  s.add_dependency(%q<activesupport>, [">= 2.3.5"])
39
40
  s.add_dependency(%q<sinatra>, [">= 1.1.2"])
@@ -42,6 +43,7 @@ Gem::Specification.new do |s|
42
43
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
43
44
  s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
44
45
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
46
+ s.add_dependency(%q<haml>, [">= 3.0.24"])
45
47
  end
46
48
  else
47
49
  s.add_dependency(%q<activesupport>, [">= 2.3.5"])
@@ -51,5 +53,6 @@ Gem::Specification.new do |s|
51
53
  s.add_dependency(%q<mechanize>, [">= 1.0.0"])
52
54
  s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
53
55
  s.add_dependency(%q<i18n>, [">= 0.4.2"])
56
+ s.add_dependency(%q<haml>, [">= 3.0.24"])
54
57
  end
55
58
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 19
9
- version: 0.1.19
8
+ - 20
9
+ version: 0.1.20
10
10
  platform: ruby
11
11
  authors:
12
12
  - Jose Ignacio
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-24 00:00:00 +01:00
17
+ date: 2011-02-25 00:00:00 +01:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -115,6 +115,20 @@ dependencies:
115
115
  version: 0.4.2
116
116
  type: :runtime
117
117
  version_requirements: *id007
118
+ - !ruby/object:Gem::Dependency
119
+ name: haml
120
+ prerelease: false
121
+ requirement: &id008 !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ segments:
126
+ - 3
127
+ - 0
128
+ - 24
129
+ version: 3.0.24
130
+ type: :runtime
131
+ version_requirements: *id008
118
132
  description: RDF web scraper
119
133
  email: joseignacio.fernandez@gmail.com
120
134
  executables: