serp_scraper 1.0.0 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -0
- data/EXAMPLES.md +0 -0
- data/README.md +0 -0
- data/lib/engines/google.rb +10 -3
- data/lib/serp_response.rb +0 -0
- data/lib/serp_scraper.rb +0 -0
- data/serp_scraper.gemspec +3 -3
- metadata +5 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37e155ad5c882faa1b543fc3f875b417ed2f903a
|
4
|
+
data.tar.gz: 18089df07af8acfc609da7552d2282eae2ff05d3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6736db5b2e411387cf6401b888b0e20b723483801e05fd80aab0a7823c65740f9a047b019b51f45c92b45dfa91c3004a94a215e750f039123b6246028e884934
|
7
|
+
data.tar.gz: 0123239f14dc80688a682a362e27f3739731a71b53a8ff836a56f655df04766040e1adfcb7df2f95c5d121d2e1e6b45856954a50287bf4dcdc6cb48a407932cb
|
data/.gitignore
CHANGED
File without changes
|
data/EXAMPLES.md
CHANGED
File without changes
|
data/README.md
CHANGED
File without changes
|
data/lib/engines/google.rb
CHANGED
@@ -89,8 +89,10 @@ class SerpScraper::Google
|
|
89
89
|
doc = Nokogiri::HTML(html)
|
90
90
|
results = Array.new
|
91
91
|
|
92
|
-
rows = doc.css('h3.r > a')
|
93
|
-
|
92
|
+
rows = doc.css('div.rc h3.r > a')
|
93
|
+
|
94
|
+
position = 1
|
95
|
+
rows.each do |row|
|
94
96
|
begin
|
95
97
|
href = Addressable::URI.parse(row["href"])
|
96
98
|
|
@@ -99,14 +101,19 @@ class SerpScraper::Google
|
|
99
101
|
|
100
102
|
url = Addressable::URI.parse(external_url)
|
101
103
|
|
104
|
+
next unless url.host # Only add valid URL's (ignore images, news etc)
|
105
|
+
|
102
106
|
results.push({
|
103
|
-
position:
|
107
|
+
position: position,
|
104
108
|
title: row.content,
|
105
109
|
scheme: url.scheme,
|
106
110
|
domain: url.host,
|
107
111
|
url: url.request_uri,
|
108
112
|
full_url: url.to_s
|
109
113
|
})
|
114
|
+
|
115
|
+
position += 1
|
116
|
+
|
110
117
|
rescue
|
111
118
|
next
|
112
119
|
end
|
data/lib/serp_response.rb
CHANGED
File without changes
|
data/lib/serp_scraper.rb
CHANGED
File without changes
|
data/serp_scraper.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'serp_scraper'
|
3
|
-
s.version = '1.0.
|
3
|
+
s.version = '1.0.4'
|
4
4
|
s.date = '2017-05-26'
|
5
5
|
|
6
6
|
s.homepage = 'https://github.com/kjellberg'
|
@@ -15,8 +15,8 @@ Gem::Specification.new do |s|
|
|
15
15
|
s.require_paths = ["lib"]
|
16
16
|
s.files = `git ls-files`.split($/)
|
17
17
|
|
18
|
-
s.add_runtime_dependency 'mechanize', '~> 2.7
|
18
|
+
s.add_runtime_dependency 'mechanize', '~> 2.7.0'
|
19
19
|
s.add_runtime_dependency 'addressable', '~> 2.5'
|
20
|
-
s.add_runtime_dependency 'nokogiri', '~>
|
20
|
+
s.add_runtime_dependency 'nokogiri', '~> 1.6'
|
21
21
|
s.add_runtime_dependency 'deathbycaptcha', '~> 5.0.0'
|
22
22
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: serp_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Rasmus Kjellberg
|
@@ -16,20 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
20
|
-
- - ">="
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: 2.7.5
|
19
|
+
version: 2.7.0
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
26
23
|
requirements:
|
27
24
|
- - "~>"
|
28
25
|
- !ruby/object:Gem::Version
|
29
|
-
version:
|
30
|
-
- - ">="
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: 2.7.5
|
26
|
+
version: 2.7.0
|
33
27
|
- !ruby/object:Gem::Dependency
|
34
28
|
name: addressable
|
35
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -50,20 +44,14 @@ dependencies:
|
|
50
44
|
requirements:
|
51
45
|
- - "~>"
|
52
46
|
- !ruby/object:Gem::Version
|
53
|
-
version: '
|
54
|
-
- - ">="
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
version: 2.9.4
|
47
|
+
version: '1.6'
|
57
48
|
type: :runtime
|
58
49
|
prerelease: false
|
59
50
|
version_requirements: !ruby/object:Gem::Requirement
|
60
51
|
requirements:
|
61
52
|
- - "~>"
|
62
53
|
- !ruby/object:Gem::Version
|
63
|
-
version: '
|
64
|
-
- - ">="
|
65
|
-
- !ruby/object:Gem::Version
|
66
|
-
version: 2.9.4
|
54
|
+
version: '1.6'
|
67
55
|
- !ruby/object:Gem::Dependency
|
68
56
|
name: deathbycaptcha
|
69
57
|
requirement: !ruby/object:Gem::Requirement
|