scrappy 0.1.19 → 0.1.20
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/Rakefile +1 -1
- data/lib/scrappy.rb +1 -1
- data/lib/scrappy/selectors/new_uri.rb +9 -1
- data/scrappy.gemspec +5 -2
- metadata +17 -3
data/History.txt
CHANGED
data/Rakefile
CHANGED
@@ -11,7 +11,7 @@ Echoe.new('scrappy', Scrappy::VERSION) do |p|
|
|
11
11
|
p.email = "joseignacio.fernandez@gmail.com"
|
12
12
|
p.install_message = '**(Optional) Remember to install rbwebkitgtk for visual parsing features**'
|
13
13
|
p.ignore_pattern = ["pkg/*"]
|
14
|
-
p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1.9'], ['i18n', '>= 0.4.2']]
|
14
|
+
p.dependencies = [['activesupport','>= 2.3.5'], ['sinatra', '>= 1.1.2'], ['thin', '>= 1.2.7'], ['nokogiri', '>= 1.4.1'], ['mechanize','>= 1.0.0'], ['lightrdf','>= 0.1.9'], ['i18n', '>= 0.4.2'], ['haml', '>= 3.0.24']]
|
15
15
|
end
|
16
16
|
|
17
17
|
Rake::RDocTask.new(:rdoc) do |rdoc|
|
data/lib/scrappy.rb
CHANGED
@@ -7,8 +7,16 @@ module NewUriSelector
|
|
7
7
|
[ doc[:content].text ]
|
8
8
|
end
|
9
9
|
|
10
|
+
@@indexes ||= Hash.new(0)
|
11
|
+
prefix = selector.sc::prefix.first.to_s
|
12
|
+
prefix = (prefix =~ /\Ahttp/ ? URI::parse(doc[:uri]).merge(prefix).to_s : "#{doc[:uri]}#{prefix}")
|
13
|
+
suffix = selector.sc::suffix.first.to_s
|
14
|
+
|
10
15
|
contents.map do |content|
|
11
|
-
|
16
|
+
variable = selector.sc::sequence.first.to_s=="true" ? (@@indexes[selector] += 1) : content.wikify
|
17
|
+
|
18
|
+
new_uri = "#{prefix}#{variable}#{suffix}"
|
19
|
+
|
12
20
|
{ :uri=>new_uri, :content=>doc[:content], :value=>new_uri }
|
13
21
|
end
|
14
22
|
end
|
data/scrappy.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{scrappy}
|
5
|
-
s.version = "0.1.
|
5
|
+
s.version = "0.1.20"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Jose Ignacio"]
|
9
|
-
s.date = %q{2011-02-
|
9
|
+
s.date = %q{2011-02-25}
|
10
10
|
s.default_executable = %q{scrappy}
|
11
11
|
s.description = %q{RDF web scraper}
|
12
12
|
s.email = %q{joseignacio.fernandez@gmail.com}
|
@@ -34,6 +34,7 @@ Gem::Specification.new do |s|
|
|
34
34
|
s.add_runtime_dependency(%q<mechanize>, [">= 1.0.0"])
|
35
35
|
s.add_runtime_dependency(%q<lightrdf>, [">= 0.1.9"])
|
36
36
|
s.add_runtime_dependency(%q<i18n>, [">= 0.4.2"])
|
37
|
+
s.add_runtime_dependency(%q<haml>, [">= 3.0.24"])
|
37
38
|
else
|
38
39
|
s.add_dependency(%q<activesupport>, [">= 2.3.5"])
|
39
40
|
s.add_dependency(%q<sinatra>, [">= 1.1.2"])
|
@@ -42,6 +43,7 @@ Gem::Specification.new do |s|
|
|
42
43
|
s.add_dependency(%q<mechanize>, [">= 1.0.0"])
|
43
44
|
s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
|
44
45
|
s.add_dependency(%q<i18n>, [">= 0.4.2"])
|
46
|
+
s.add_dependency(%q<haml>, [">= 3.0.24"])
|
45
47
|
end
|
46
48
|
else
|
47
49
|
s.add_dependency(%q<activesupport>, [">= 2.3.5"])
|
@@ -51,5 +53,6 @@ Gem::Specification.new do |s|
|
|
51
53
|
s.add_dependency(%q<mechanize>, [">= 1.0.0"])
|
52
54
|
s.add_dependency(%q<lightrdf>, [">= 0.1.9"])
|
53
55
|
s.add_dependency(%q<i18n>, [">= 0.4.2"])
|
56
|
+
s.add_dependency(%q<haml>, [">= 3.0.24"])
|
54
57
|
end
|
55
58
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
8
|
+
- 20
|
9
|
+
version: 0.1.20
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Jose Ignacio
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-25 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -115,6 +115,20 @@ dependencies:
|
|
115
115
|
version: 0.4.2
|
116
116
|
type: :runtime
|
117
117
|
version_requirements: *id007
|
118
|
+
- !ruby/object:Gem::Dependency
|
119
|
+
name: haml
|
120
|
+
prerelease: false
|
121
|
+
requirement: &id008 !ruby/object:Gem::Requirement
|
122
|
+
requirements:
|
123
|
+
- - ">="
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
segments:
|
126
|
+
- 3
|
127
|
+
- 0
|
128
|
+
- 24
|
129
|
+
version: 3.0.24
|
130
|
+
type: :runtime
|
131
|
+
version_requirements: *id008
|
118
132
|
description: RDF web scraper
|
119
133
|
email: joseignacio.fernandez@gmail.com
|
120
134
|
executables:
|