gscraper 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,10 @@
1
+ === 0.2.4 / 2009-03-18
2
+
3
+ * Added SponsoredAd#direct_link.
4
+ * Fixed a bug in SponsoredAd#direct_url where direct links we're not
5
+ being URI escaped.
6
+ * Removed last references to Hpricot, replacing them with Nokogiri.
7
+
1
8
  === 0.2.3 / 2009-01-27
2
9
 
3
10
  * Fixed a bug in GScraper::Search::WebQuery#page, when the search query
data/README.txt CHANGED
@@ -1,6 +1,7 @@
1
1
  = GScraper
2
2
 
3
3
  * http://gscraper.rubyforge.org/
4
+ * http://github.com/postmodern/gscraper/
4
5
  * Postmodern (postmodern.mod3 at gmail.com)
5
6
 
6
7
  == DESCRIPTION:
@@ -127,7 +128,7 @@ GScraper is a web-scraping interface to various Google Services.
127
128
 
128
129
  GScraper - A web-scraping interface to various Google Services.
129
130
 
130
- Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
131
+ Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
131
132
 
132
133
  This program is free software; you can redistribute it and/or modify
133
134
  it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -22,6 +22,7 @@
22
22
 
23
23
  require 'uri/http'
24
24
  require 'mechanize'
25
+ require 'nokogiri'
25
26
  require 'open-uri'
26
27
 
27
28
  module GScraper
@@ -127,10 +128,10 @@ module GScraper
127
128
  end
128
129
 
129
130
  #
130
- # Similar to GScraper.open_uri but returns an Hpricot document.
131
+ # Similar to GScraper.open_uri but returns a Nokogiri::HTML document.
131
132
  #
132
133
  def GScraper.open_page(uri,options={})
133
- Hpricot(GScraper.open_uri(uri,options))
134
+ Nokogiri::HTML(GScraper.open_uri(uri,options))
134
135
  end
135
136
 
136
137
  #
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -28,6 +28,7 @@ require 'gscraper/has_pages'
28
28
  require 'gscraper/gscraper'
29
29
 
30
30
  require 'json'
31
+ require 'nokogiri'
31
32
 
32
33
  module GScraper
33
34
  module Search
@@ -160,9 +161,9 @@ module GScraper
160
161
  if (hash.kind_of?(Hash) && hash['results'])
161
162
  hash['results'].each_with_index do |result,index|
162
163
  rank = rank_offset + (index + 1)
163
- title = Hpricot(result['title']).inner_text
164
+ title = Nokogiri::HTML(result['title']).inner_text
164
165
  url = URI(result['unescapedUrl'])
165
- summary = Hpricot(result['content']).inner_text
166
+ summary = Nokogiri::HTML(result['content']).inner_text
166
167
  cached_url = URI(result['cacheUrl'])
167
168
 
168
169
  new_page << Result.new(rank,title,url,summary,cached_url)
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -29,8 +29,6 @@ require 'gscraper/has_pages'
29
29
  require 'gscraper/licenses'
30
30
  require 'gscraper/gscraper'
31
31
 
32
- require 'hpricot'
33
-
34
32
  module GScraper
35
33
  module Search
36
34
  class Query
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -349,7 +349,7 @@ module GScraper
349
349
  result = results[index]
350
350
 
351
351
  rank = rank_offset + (index + 1)
352
- link = result.at('a.l')
352
+ link = result.at('h3.r/a')
353
353
  title = link.inner_text
354
354
  url = URI(link.get_attribute('href'))
355
355
  summary_text = ''
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -39,11 +39,18 @@ module GScraper
39
39
  @url = url
40
40
  end
41
41
 
42
+ #
43
+ # Returns the direct link of the ad.
44
+ #
45
+ def direct_link
46
+ @url.query_params['adurl'] || @url.query_params['q']
47
+ end
48
+
42
49
  #
43
50
  # Returns the direct URL of the ad.
44
51
  #
45
52
  def direct_url
46
- URI(@url.query_params['adurl'] || @url.query_params['q'])
53
+ URI(URI.escape(direct_link))
47
54
  end
48
55
 
49
56
  #
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -2,7 +2,7 @@
2
2
  #--
3
3
  # GScraper - A web-scraping interface to various Google Services.
4
4
  #
5
- # Copyright (c) 2007-2008 Hal Brodigan (postmodern.mod3 at gmail.com)
5
+ # Copyright (c) 2007-2009 Hal Brodigan (postmodern.mod3 at gmail.com)
6
6
  #
7
7
  # This program is free software; you can redistribute it and/or modify
8
8
  # it under the terms of the GNU General Public License as published by
@@ -21,5 +21,5 @@
21
21
  #
22
22
 
23
23
  module GScraper
24
- VERSION = '0.2.3'
24
+ VERSION = '0.2.4'
25
25
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Postmodern
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-27 00:00:00 -08:00
12
+ date: 2009-03-18 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -30,7 +30,7 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 1.8.3
33
+ version: 1.10.0
34
34
  version:
35
35
  description: GScraper is a web-scraping interface to various Google Services.
36
36
  email: