gscraper 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,12 @@
1
+ require 'gscraper/search/query'
2
+ require 'gscraper/web_agent'
3
+
1
4
  module GScraper
2
5
  module Search
3
6
  class Result
4
7
 
8
+ include WebAgent
9
+
5
10
  # Rank of the result page
6
11
  attr_reader :rank
7
12
 
@@ -33,18 +38,6 @@ module GScraper
33
38
  @similar_url = similar_url
34
39
  end
35
40
 
36
- #
37
- # Opens the URL of the cached page for the Result. If _opts_ are
38
- # given, they will be used in accessing the cached page URL.
39
- #
40
- # result.cached_page # => File
41
- #
42
- def cached_page(opts={})
43
- if @cached_url
44
- return GScraper.open(@cached_url,opts)
45
- end
46
- end
47
-
48
41
  #
49
42
  # Create a new Query for results that are similar to the Result. If
50
43
  # a _block_ is given, it will be passed the newly created Query
@@ -64,6 +57,14 @@ module GScraper
64
57
  end
65
58
  end
66
59
 
60
+ #
61
+ # Fetches the cached page of the result. If a _block_ is given it will
62
+ # be passed the cached page.
63
+ #
64
+ def cached_page(&block)
65
+ get_page(@cached_url,&block)
66
+ end
67
+
67
68
  #
68
69
  # Returns a string containing the result's title.
69
70
  #
@@ -3,7 +3,7 @@ require 'gscraper/search/query'
3
3
  module GScraper
4
4
  module Search
5
5
  #
6
- # Returns a new Query object with the given _opts_. See Query.new.
6
+ # Returns a new Query object with the given _options_. See Query.new.
7
7
  #
8
8
  # Search.query(:query => 'ruby', :with_words => 'sow rspec')
9
9
  #
@@ -11,8 +11,8 @@ module GScraper
11
11
  # q.within_past_week = true
12
12
  # end
13
13
  #
14
- def Search.query(opts={},&block)
15
- Query.new(opts,&block)
14
+ def Search.query(options={},&block)
15
+ Query.new(options,&block)
16
16
  end
17
17
 
18
18
  #
@@ -0,0 +1,35 @@
1
+ require 'gscraper/extensions/uri'
2
+
3
+ module GScraper
4
+ class SponsoredAd
5
+
6
+ # Title of the ad
7
+ attr_reader :title
8
+
9
+ # URL of the ad
10
+ attr_reader :url
11
+
12
+ #
13
+ # Creates a new SponsoredAd with the specified _title_ and _url_.
14
+ #
15
+ def initialize(title,url)
16
+ @title = title
17
+ @url = URI.parse(url)
18
+ end
19
+
20
+ #
21
+ # Returns the direct URL of the ad.
22
+ #
23
+ def direct_url
24
+ @url.query_params['adurl'] || @url.query_params['q']
25
+ end
26
+
27
+ #
28
+ # Returns the title of the ad.
29
+ #
30
+ def to_s
31
+ @title.to_s
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,151 @@
1
+ require 'gscraper/sponsored_ad'
2
+
3
+ module GScraper
4
+ class SponsoredLinks < Array
5
+ #
6
+ # Creates a new SponsoredLinks object with the given _ads_.
7
+ #
8
+ def initialize(ads=[])
9
+ super(ads)
10
+ end
11
+
12
+ #
13
+ # Returns a mapped Array of the ads within the SponsoredLinks
14
+ # using the given _block_. If the _block_ is not given, the
15
+ # SponsoredLinks will be returned.
16
+ #
17
+ # sponsored.map # => SponsoredLinks
18
+ #
19
+ # sponsored.map { |ad| ad.url } # => [...]
20
+ #
21
+ def map(&block)
22
+ return self unless block
23
+
24
+ mapped = []
25
+
26
+ each { |ad| mapped << block.call(ad) }
27
+ return mapped
28
+ end
29
+
30
+ #
31
+ # Selects the ads within the SponsoredLinks which match the given _block_.
32
+ #
33
+ # sponsored.select { |ad| ad.title =~ /consume/i }
34
+ #
35
+ def select(&block)
36
+ SponsoredLinks.new(super(&block))
37
+ end
38
+
39
+ #
40
+ # Selects the ads using the specified _block_.
41
+ #
42
+ # sponsored.ads_with { |ad| ad.title =~ /status symbol/ }
43
+ #
44
+ def ads_with(&block)
45
+ select(&block)
46
+ end
47
+
48
+ #
49
+ # Selects the ads with the matching _title_. The _title_ may be
50
+ # either a String or a Regexp. If _block_ is given, each matching
51
+ # ad will be passed to the _block_.
52
+ #
53
+ # sponsored.ads_with_title('be attractive') #=> SponsoredLinks
54
+ #
55
+ # sponsored.ads_with_title(/buy me/) do |ad|
56
+ # puts ad.url
57
+ # end
58
+ #
59
+ def ads_with_title(title,&block)
60
+ if title.kind_of?(Regexp)
61
+ ads = ads_with { |ad| ad.title =~ title }
62
+ else
63
+ ads = ads_with { |ad| ad.title == title }
64
+ end
65
+
66
+ ads.each(&block) if block
67
+ return ads
68
+ end
69
+
70
+ #
71
+ # Selects the ads with the matching _url_. The _url_ may be
72
+ # either a String or a Regexp. If _block_ is given, each matching
73
+ # ad will be passed to the _block_.
74
+ #
75
+ # sponsored.ads_with_url(/\.com/) # => SponsoredLinks
76
+ #
77
+ # sponsored.ads_with_url(/^https:\/\//) do |ad|
78
+ # puts ad.title
79
+ # end
80
+ #
81
+ def ads_with_url(url,&block)
82
+ if url.kind_of?(Regexp)
83
+ ads = ads_with { |ad| ad.url =~ url }
84
+ else
85
+ ads = ads_with { |ad| ad.url == url }
86
+ end
87
+
88
+ ads.each(&block) if block
89
+ return ads
90
+ end
91
+
92
+ #
93
+ # Returns an Array containing the titles of the ads within the
94
+ # SponsoredLinks.
95
+ #
96
+ # sponsored.titles # => [...]
97
+ #
98
+ def titles
99
+ map { |ad| ad.title }
100
+ end
101
+
102
+ #
103
+ # Returns an Array containing the URLs of the ads within the
104
+ # SponsoredLinks.
105
+ #
106
+ # sponsored.urls # => [...]
107
+ #
108
+ def urls
109
+ map { |ad| ad.url }
110
+ end
111
+
112
+ #
113
+ # Iterates over each ad's title within the SponsoredLinks, passing each to
114
+ # the given _block_.
115
+ #
116
+ # each_title { |title| puts title }
117
+ #
118
+ def each_title(&block)
119
+ titles.each(&block)
120
+ end
121
+
122
+ #
123
+ # Iterates over each ad's url within the SponsoredLinks, passing each to
124
+ # the given _block_.
125
+ #
126
+ # each_url { |url| puts url }
127
+ #
128
+ def each_url(&block)
129
+ urls.each(&block)
130
+ end
131
+
132
+ #
133
+ # Returns the titles of the ads that match the specified _block_.
134
+ #
135
+ # sponsored.titles_of { |ad| ad.url.include?('www') }
136
+ #
137
+ def titles_of(&block)
138
+ ads_with(&block).titles
139
+ end
140
+
141
+ #
142
+ # Returns the urls of the ads that match the specified _block_.
143
+ #
144
+ # sponsored.urls_of { |ad| ad.title =~ /buy these pants/ }
145
+ #
146
+ def urls_of(&block)
147
+ ads_with(&block).urls
148
+ end
149
+
150
+ end
151
+ end
@@ -1,3 +1,3 @@
1
1
  module GScraper
2
- VERSION = '0.1.5'
2
+ VERSION = '0.1.6'
3
3
  end
@@ -0,0 +1,38 @@
1
+ require 'gscraper/gscraper'
2
+
3
+ module GScraper
4
+ module WebAgent
5
+ protected
6
+
7
+ #
8
+ # Returns the WWW::Mechanize agent.
9
+ #
10
+ def web_agent(&block)
11
+ @web_agent ||= GScraper.web_agent
12
+
13
+ block.call(@web_agent) if block
14
+ return @web_agent
15
+ end
16
+
17
+ #
18
+ # Fetches the specified _url_, with the given _referer_ using the
19
+ # web_agent.
20
+ #
21
+ # get_page('http://www.hackety.org/')
22
+ #
23
+ def get_page(url,referer=nil,&block)
24
+ web_agent.get(url,referer,&block)
25
+ end
26
+
27
+ #
28
+ # Posts the specified _url_ and the given _query_ parameters using the
29
+ # web_agent.
30
+ #
31
+ # post_page('http://www.wired.com/', :q => 'the future')
32
+ #
33
+ def post_page(url,query={})
34
+ web_agent.post(url,query)
35
+ end
36
+
37
+ end
38
+ end
@@ -10,7 +10,7 @@ class QueryResult < Test::Unit::TestCase
10
10
  end
11
11
 
12
12
  def test_first_result
13
- result = @query.first_result
13
+ result = @query.top_result
14
14
 
15
15
  assert_not_nil result, "The Query for 'ruby' has no first-result"
16
16
  assert_equal result.rank, 1, "The first result for the Query 'ruby' does not have the rank of 1"
@@ -1,7 +1,4 @@
1
1
  $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__),'..','lib')))
2
2
 
3
3
  require 'test/unit'
4
- require 'search/query_from_url'
5
- require 'search/query_result'
6
- require 'search/query_pages'
7
- require 'search/page_results'
4
+ require 'search'
metadata CHANGED
@@ -1,33 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.4
3
- specification_version: 1
4
2
  name: gscraper
5
3
  version: !ruby/object:Gem::Version
6
- version: 0.1.5
7
- date: 2007-12-29 00:00:00 -08:00
8
- summary: A ruby web-scraping interface to various Google Services
9
- require_paths:
10
- - lib
11
- email: postmodern.mod3@gmail.com
12
- homepage: " by Postmodern Modulus III"
13
- rubyforge_project: gscraper
14
- description: "== FEATURES/PROBLEMS: * Supports the Google Search service. * Provides HTTP access with custom User-Agent strings. == REQUIREMENTS: * Hpricot * Mechanize == INSTALL:"
15
- autorequire:
16
- default_executable:
17
- bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
4
+ version: 0.1.6
25
5
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
6
  authors:
30
7
  - Postmodern Modulus III
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-03-15 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0"
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: mechanize
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: "0"
32
+ version:
33
+ - !ruby/object:Gem::Dependency
34
+ name: hoe
35
+ version_requirement:
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.5.1
41
+ version:
42
+ description: "== FEATURES/PROBLEMS: * Supports the Google Search service. * Provides access to search results and ranks. * Provides access to the Sponsored Links. * Provides HTTP access with custom User-Agent strings. * Provides proxy settings for HTTP access. == REQUIREMENTS: * Hpricot * WWW::Mechanize == INSTALL:"
43
+ email: postmodern.mod3@gmail.com
44
+ executables: []
45
+
46
+ extensions: []
47
+
48
+ extra_rdoc_files:
49
+ - History.txt
50
+ - LICENSE.txt
51
+ - Manifest.txt
52
+ - README.txt
31
53
  files:
32
54
  - History.txt
33
55
  - LICENSE.txt
@@ -37,10 +59,13 @@ files:
37
59
  - lib/gscraper.rb
38
60
  - lib/gscraper/version.rb
39
61
  - lib/gscraper/gscraper.rb
62
+ - lib/gscraper/web_agent.rb
40
63
  - lib/gscraper/extensions/uri/http.rb
41
64
  - lib/gscraper/extensions/uri.rb
42
65
  - lib/gscraper/extensions.rb
43
66
  - lib/gscraper/licenses.rb
67
+ - lib/gscraper/sponsored_ad.rb
68
+ - lib/gscraper/sponsored_links.rb
44
69
  - lib/gscraper/search/result.rb
45
70
  - lib/gscraper/search/page.rb
46
71
  - lib/gscraper/search/query.rb
@@ -51,47 +76,32 @@ files:
51
76
  - test/search/query_result.rb
52
77
  - test/search/query_pages.rb
53
78
  - test/search/page_results.rb
54
- test_files:
55
- - test/test_gscraper.rb
79
+ has_rdoc: true
80
+ homepage: " by Postmodern Modulus III"
81
+ post_install_message:
56
82
  rdoc_options:
57
83
  - --main
58
84
  - README.txt
59
- extra_rdoc_files:
60
- - History.txt
61
- - LICENSE.txt
62
- - Manifest.txt
63
- - README.txt
64
- executables: []
65
-
66
- extensions: []
67
-
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: "0"
92
+ version:
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: "0"
98
+ version:
68
99
  requirements: []
69
100
 
70
- dependencies:
71
- - !ruby/object:Gem::Dependency
72
- name: hpricot
73
- version_requirement:
74
- version_requirements: !ruby/object:Gem::Version::Requirement
75
- requirements:
76
- - - ">"
77
- - !ruby/object:Gem::Version
78
- version: 0.0.0
79
- version:
80
- - !ruby/object:Gem::Dependency
81
- name: mechanize
82
- version_requirement:
83
- version_requirements: !ruby/object:Gem::Version::Requirement
84
- requirements:
85
- - - ">"
86
- - !ruby/object:Gem::Version
87
- version: 0.0.0
88
- version:
89
- - !ruby/object:Gem::Dependency
90
- name: hoe
91
- version_requirement:
92
- version_requirements: !ruby/object:Gem::Version::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: 1.4.0
97
- version:
101
+ rubyforge_project: gscraper
102
+ rubygems_version: 1.0.1
103
+ signing_key:
104
+ specification_version: 2
105
+ summary: A ruby web-scraping interface to various Google Services
106
+ test_files:
107
+ - test/test_gscraper.rb