gscraper 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/README.txt +2 -1
- data/lib/gscraper.rb +1 -1
- data/lib/gscraper/extensions.rb +1 -1
- data/lib/gscraper/extensions/uri.rb +1 -1
- data/lib/gscraper/extensions/uri/http.rb +1 -1
- data/lib/gscraper/gscraper.rb +4 -3
- data/lib/gscraper/has_pages.rb +1 -1
- data/lib/gscraper/licenses.rb +1 -1
- data/lib/gscraper/page.rb +1 -1
- data/lib/gscraper/search.rb +1 -1
- data/lib/gscraper/search/ajax_query.rb +4 -3
- data/lib/gscraper/search/page.rb +1 -1
- data/lib/gscraper/search/query.rb +1 -3
- data/lib/gscraper/search/result.rb +1 -1
- data/lib/gscraper/search/search.rb +1 -1
- data/lib/gscraper/search/web_query.rb +2 -2
- data/lib/gscraper/sponsored_ad.rb +9 -2
- data/lib/gscraper/sponsored_links.rb +1 -1
- data/lib/gscraper/version.rb +2 -2
- metadata +3 -3
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
=== 0.2.4 / 2009-03-18
|
2
|
+
|
3
|
+
* Added SponsoredAd#direct_link.
|
4
|
+
* Fixed a bug in SponsoredAd#direct_url where direct links we're not
|
5
|
+
being URI escaped.
|
6
|
+
* Removed last references to Hpricot, replacing them with Nokogiri.
|
7
|
+
|
1
8
|
=== 0.2.3 / 2009-01-27
|
2
9
|
|
3
10
|
* Fixed a bug in GScraper::Search::WebQuery#page, when the search query
|
data/README.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
= GScraper
|
2
2
|
|
3
3
|
* http://gscraper.rubyforge.org/
|
4
|
+
* http://github.com/postmodern/gscraper/
|
4
5
|
* Postmodern (postmodern.mod3 at gmail.com)
|
5
6
|
|
6
7
|
== DESCRIPTION:
|
@@ -127,7 +128,7 @@ GScraper is a web-scraping interface to various Google Services.
|
|
127
128
|
|
128
129
|
GScraper - A web-scraping interface to various Google Services.
|
129
130
|
|
130
|
-
Copyright (c) 2007-
|
131
|
+
Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
131
132
|
|
132
133
|
This program is free software; you can redistribute it and/or modify
|
133
134
|
it under the terms of the GNU General Public License as published by
|
data/lib/gscraper.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/extensions.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/gscraper.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -22,6 +22,7 @@
|
|
22
22
|
|
23
23
|
require 'uri/http'
|
24
24
|
require 'mechanize'
|
25
|
+
require 'nokogiri'
|
25
26
|
require 'open-uri'
|
26
27
|
|
27
28
|
module GScraper
|
@@ -127,10 +128,10 @@ module GScraper
|
|
127
128
|
end
|
128
129
|
|
129
130
|
#
|
130
|
-
# Similar to GScraper.open_uri but returns
|
131
|
+
# Similar to GScraper.open_uri but returns a Nokogiri::HTML document.
|
131
132
|
#
|
132
133
|
def GScraper.open_page(uri,options={})
|
133
|
-
|
134
|
+
Nokogiri::HTML(GScraper.open_uri(uri,options))
|
134
135
|
end
|
135
136
|
|
136
137
|
#
|
data/lib/gscraper/has_pages.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/licenses.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/page.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/search.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -28,6 +28,7 @@ require 'gscraper/has_pages'
|
|
28
28
|
require 'gscraper/gscraper'
|
29
29
|
|
30
30
|
require 'json'
|
31
|
+
require 'nokogiri'
|
31
32
|
|
32
33
|
module GScraper
|
33
34
|
module Search
|
@@ -160,9 +161,9 @@ module GScraper
|
|
160
161
|
if (hash.kind_of?(Hash) && hash['results'])
|
161
162
|
hash['results'].each_with_index do |result,index|
|
162
163
|
rank = rank_offset + (index + 1)
|
163
|
-
title =
|
164
|
+
title = Nokogiri::HTML(result['title']).inner_text
|
164
165
|
url = URI(result['unescapedUrl'])
|
165
|
-
summary =
|
166
|
+
summary = Nokogiri::HTML(result['content']).inner_text
|
166
167
|
cached_url = URI(result['cacheUrl'])
|
167
168
|
|
168
169
|
new_page << Result.new(rank,title,url,summary,cached_url)
|
data/lib/gscraper/search/page.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -29,8 +29,6 @@ require 'gscraper/has_pages'
|
|
29
29
|
require 'gscraper/licenses'
|
30
30
|
require 'gscraper/gscraper'
|
31
31
|
|
32
|
-
require 'hpricot'
|
33
|
-
|
34
32
|
module GScraper
|
35
33
|
module Search
|
36
34
|
class Query
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -349,7 +349,7 @@ module GScraper
|
|
349
349
|
result = results[index]
|
350
350
|
|
351
351
|
rank = rank_offset + (index + 1)
|
352
|
-
link = result.at('a
|
352
|
+
link = result.at('h3.r/a')
|
353
353
|
title = link.inner_text
|
354
354
|
url = URI(link.get_attribute('href'))
|
355
355
|
summary_text = ''
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -39,11 +39,18 @@ module GScraper
|
|
39
39
|
@url = url
|
40
40
|
end
|
41
41
|
|
42
|
+
#
|
43
|
+
# Returns the direct link of the ad.
|
44
|
+
#
|
45
|
+
def direct_link
|
46
|
+
@url.query_params['adurl'] || @url.query_params['q']
|
47
|
+
end
|
48
|
+
|
42
49
|
#
|
43
50
|
# Returns the direct URL of the ad.
|
44
51
|
#
|
45
52
|
def direct_url
|
46
|
-
URI(
|
53
|
+
URI(URI.escape(direct_link))
|
47
54
|
end
|
48
55
|
|
49
56
|
#
|
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
data/lib/gscraper/version.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
#--
|
3
3
|
# GScraper - A web-scraping interface to various Google Services.
|
4
4
|
#
|
5
|
-
# Copyright (c) 2007-
|
5
|
+
# Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
|
6
6
|
#
|
7
7
|
# This program is free software; you can redistribute it and/or modify
|
8
8
|
# it under the terms of the GNU General Public License as published by
|
@@ -21,5 +21,5 @@
|
|
21
21
|
#
|
22
22
|
|
23
23
|
module GScraper
|
24
|
-
VERSION = '0.2.
|
24
|
+
VERSION = '0.2.4'
|
25
25
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gscraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Postmodern
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-03-18 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -30,7 +30,7 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.
|
33
|
+
version: 1.10.0
|
34
34
|
version:
|
35
35
|
description: GScraper is a web-scraping interface to various Google Services.
|
36
36
|
email:
|