gscraper 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/README.txt +2 -1
- data/lib/gscraper.rb +1 -1
- data/lib/gscraper/extensions.rb +1 -1
- data/lib/gscraper/extensions/uri.rb +1 -1
- data/lib/gscraper/extensions/uri/http.rb +1 -1
- data/lib/gscraper/gscraper.rb +4 -3
- data/lib/gscraper/has_pages.rb +1 -1
- data/lib/gscraper/licenses.rb +1 -1
- data/lib/gscraper/page.rb +1 -1
- data/lib/gscraper/search.rb +1 -1
- data/lib/gscraper/search/ajax_query.rb +4 -3
- data/lib/gscraper/search/page.rb +1 -1
- data/lib/gscraper/search/query.rb +1 -3
- data/lib/gscraper/search/result.rb +1 -1
- data/lib/gscraper/search/search.rb +1 -1
- data/lib/gscraper/search/web_query.rb +2 -2
- data/lib/gscraper/sponsored_ad.rb +9 -2
- data/lib/gscraper/sponsored_links.rb +1 -1
- data/lib/gscraper/version.rb +2 -2
- metadata +3 -3
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
=== 0.2.4 / 2009-03-18
|
2
|
+
|
3
|
+
* Added SponsoredAd#direct_link.
|
4
|
+
* Fixed a bug in SponsoredAd#direct_url where direct links we're not
|
5
|
+
being URI escaped.
|
6
|
+
* Removed last references to Hpricot, replacing them with Nokogiri.
|
7
|
+
|
1
8
|
=== 0.2.3 / 2009-01-27
|
2
9
|
|
3
10
|
* Fixed a bug in GScraper::Search::WebQuery#page, when the search query
|
data/README.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
= GScraper
|
2
2
|
|
3
3
|
* http://gscraper.rubyforge.org/
|
4
|
+
* http://github.com/postmodern/gscraper/
|
4
5
|
* Postmodern (postmodern.mod3 at gmail.com)
|
5
6
|
|
6
7
|
== DESCRIPTION:
|
@@ -127,7 +128,7 @@ GScraper is a web-scraping interface to various Google Services.
|
|
127
128
|
|
128
129
|
GScraper - A web-scraping interface to various Google Services.
|
129
130
|
|
130
|
-
Copyright (c) 2007-
|
131
|
+
Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
131
132
|
|
132
133
|
This program is free software; you can redistribute it and/or modify
|
133
134
|
it under the terms of the GNU General Public License as published by
|
data/lib/gscraper.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/extensions.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/gscraper.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -22,6 +22,7 @@
|
|
22
22
|
|
23
23
|
require 'uri/http'
|
24
24
|
require 'mechanize'
|
25
|
+
require 'nokogiri'
|
25
26
|
require 'open-uri'
|
26
27
|
|
27
28
|
module GScraper
|
@@ -127,10 +128,10 @@ module GScraper
|
|
127
128
|
end
|
128
129
|
|
129
130
|
#
|
130
|
-
# Similar to GScraper.open_uri but returns
|
131
|
+
# Similar to GScraper.open_uri but returns a Nokogiri::HTML document.
|
131
132
|
#
|
132
133
|
def GScraper.open_page(uri,options={})
|
133
|
-
|
134
|
+
Nokogiri::HTML(GScraper.open_uri(uri,options))
|
134
135
|
end
|
135
136
|
|
136
137
|
#
|
data/lib/gscraper/has_pages.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/licenses.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/page.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/search.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -28,6 +28,7 @@ require 'gscraper/has_pages'
|
|
28
28
|
require 'gscraper/gscraper'
|
29
29
|
|
30
30
|
require 'json'
|
31
|
+
require 'nokogiri'
|
31
32
|
|
32
33
|
module GScraper
|
33
34
|
module Search
|
@@ -160,9 +161,9 @@ module GScraper
|
|
160
161
|
if (hash.kind_of?(Hash) && hash['results'])
|
161
162
|
hash['results'].each_with_index do |result,index|
|
162
163
|
rank = rank_offset + (index + 1)
|
163
|
-
title =
|
164
|
+
title = Nokogiri::HTML(result['title']).inner_text
|
164
165
|
url = URI(result['unescapedUrl'])
|
165
|
-
summary =
|
166
|
+
summary = Nokogiri::HTML(result['content']).inner_text
|
166
167
|
cached_url = URI(result['cacheUrl'])
|
167
168
|
|
168
169
|
new_page << Result.new(rank,title,url,summary,cached_url)
|
data/lib/gscraper/search/page.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -29,8 +29,6 @@ require 'gscraper/has_pages'
|
|
29
29
|
require 'gscraper/licenses'
|
30
30
|
require 'gscraper/gscraper'
|
31
31
|
|
32
|
-
require 'hpricot'
|
33
|
-
|
34
32
|
module GScraper
|
35
33
|
module Search
|
36
34
|
class Query
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -349,7 +349,7 @@ module GScraper
|
|
349
349
|
result = results[index]
|
350
350
|
|
351
351
|
rank = rank_offset + (index + 1)
|
352
|
-
link = result.at('a
|
352
|
+
link = result.at('h3.r/a')
|
353
353
|
title = link.inner_text
|
354
354
|
url = URI(link.get_attribute('href'))
|
355
355
|
summary_text = ''
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -39,11 +39,18 @@ module GScraper
|
|
39
39
|
@url = url
|
40
40
|
end
|
41
41
|
|
42
|
+
#
|
43
|
+
# Returns the direct link of the ad.
|
44
|
+
#
|
45
|
+
def direct_link
|
46
|
+
@url.query_params['adurl'] || @url.query_params['q']
|
47
|
+
end
|
48
|
+
|
42
49
|
#
|
43
50
|
# Returns the direct URL of the ad.
|
44
51
|
#
|
45
52
|
def direct_url
|
46
|
-
URI(
|
53
|
+
URI(URI.escape(direct_link))
|
47
54
|
end
|
48
55
|
|
49
56
|
#
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/version.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -21,5 +21,5 @@
|
|
21
21
|
#
|
22
22
|
|
23
23
|
module GScraper
|
24
|
-
VERSION = '0.2.
|
24
|
+
VERSION = '0.2.4'
|
25
25
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gscraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-03-18 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -30,7 +30,7 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.
|
33
|
+
version: 1.10.0
|
34
34
|
version:
|
35
35
|
description: GScraper is a web-scraping interface to various Google Services.
|
36
36
|
email:
|