gscraper 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,12 @@
1
+ require 'gscraper/search/query'
2
+ require 'gscraper/web_agent'
3
+
1
4
  module GScraper
2
5
  module Search
3
6
  class Result
4
7
 
8
+ include WebAgent
9
+
5
10
  # Rank of the result page
6
11
  attr_reader :rank
7
12
 
@@ -33,18 +38,6 @@ module GScraper
33
38
  @similar_url = similar_url
34
39
  end
35
40
 
36
- #
37
- # Opens the URL of the cached page for the Result. If _opts_ are
38
- # given, they will be used in accessing the cached page URL.
39
- #
40
- # result.cached_page # => File
41
- #
42
- def cached_page(opts={})
43
- if @cached_url
44
- return GScraper.open(@cached_url,opts)
45
- end
46
- end
47
-
48
41
  #
49
42
  # Create a new Query for results that are similar to the Result. If
50
43
  # a _block_ is given, it will be passed the newly created Query
@@ -64,6 +57,14 @@ module GScraper
64
57
  end
65
58
  end
66
59
 
60
+ #
61
+ # Fetches the cached page of the result. If a _block_ is given it will
62
+ # be passed the cached page.
63
+ #
64
+ def cached_page(&block)
65
+ get_page(@cached_url,&block)
66
+ end
67
+
67
68
  #
68
69
  # Returns a string containing the result's title.
69
70
  #
@@ -3,7 +3,7 @@ require 'gscraper/search/query'
3
3
  module GScraper
4
4
  module Search
5
5
  #
6
- # Returns a new Query object with the given _opts_. See Query.new.
6
+ # Returns a new Query object with the given _options_. See Query.new.
7
7
  #
8
8
  # Search.query(:query => 'ruby', :with_words => 'sow rspec')
9
9
  #
@@ -11,8 +11,8 @@ module GScraper
11
11
  # q.within_past_week = true
12
12
  # end
13
13
  #
14
- def Search.query(opts={},&block)
15
- Query.new(opts,&block)
14
+ def Search.query(options={},&block)
15
+ Query.new(options,&block)
16
16
  end
17
17
 
18
18
  #
@@ -0,0 +1,35 @@
1
+ require 'gscraper/extensions/uri'
2
+
3
+ module GScraper
4
+ class SponsoredAd
5
+
6
+ # Title of the ad
7
+ attr_reader :title
8
+
9
+ # URL of the ad
10
+ attr_reader :url
11
+
12
+ #
13
+ # Creates a new SponsoredAd with the specified _title_ and _url_.
14
+ #
15
+ def initialize(title,url)
16
+ @title = title
17
+ @url = URI.parse(url)
18
+ end
19
+
20
+ #
21
+ # Returns the direct URL of the ad.
22
+ #
23
+ def direct_url
24
+ @url.query_params['adurl'] || @url.query_params['q']
25
+ end
26
+
27
+ #
28
+ # Returns the title of the ad.
29
+ #
30
+ def to_s
31
+ @title.to_s
32
+ end
33
+
34
+ end
35
+ end
@@ -0,0 +1,151 @@
1
+ require 'gscraper/sponsored_ad'
2
+
3
+ module GScraper
4
+ class SponsoredLinks < Array
5
+ #
6
+ # Creates a new SponsoredLinks object with the given _ads_.
7
+ #
8
+ def initialize(ads=[])
9
+ super(ads)
10
+ end
11
+
12
+ #
13
+ # Returns a mapped Array of the ads within the SponsoredLinks
14
+ # using the given _block_. If the _block_ is not given, the
15
+ # SponsoredLinks will be returned.
16
+ #
17
+ # sponsored.map # => SponsoredLinks
18
+ #
19
+ # sponsored.map { |ad| ad.url } # => [...]
20
+ #
21
+ def map(&block)
22
+ return self unless block
23
+
24
+ mapped = []
25
+
26
+ each { |ad| mapped << block.call(ad) }
27
+ return mapped
28
+ end
29
+
30
+ #
31
+ # Selects the ads within the SponsoredLinks which match the given _block_.
32
+ #
33
+ # sponsored.select { |ad| ad.title =~ /consume/i }
34
+ #
35
+ def select(&block)
36
+ SponsoredLinks.new(super(&block))
37
+ end
38
+
39
+ #
40
+ # Selects the ads using the specified _block_.
41
+ #
42
+ # sponsored.ads_with { |ad| ad.title =~ /status symbol/ }
43
+ #
44
+ def ads_with(&block)
45
+ select(&block)
46
+ end
47
+
48
+ #
49
+ # Selects the ads with the matching _title_. The _title_ may be
50
+ # either a String or a Regexp. If _block_ is given, each matching
51
+ # ad will be passed to the _block_.
52
+ #
53
+ # sponsored.ads_with_title('be attractive') #=> SponsoredLinks
54
+ #
55
+ # sponsored.ads_with_title(/buy me/) do |ad|
56
+ # puts ad.url
57
+ # end
58
+ #
59
+ def ads_with_title(title,&block)
60
+ if title.kind_of?(Regexp)
61
+ ads = ads_with { |ad| ad.title =~ title }
62
+ else
63
+ ads = ads_with { |ad| ad.title == title }
64
+ end
65
+
66
+ ads.each(&block) if block
67
+ return ads
68
+ end
69
+
70
+ #
71
+ # Selects the ads with the matching _url_. The _url_ may be
72
+ # either a String or a Regexp. If _block_ is given, each matching
73
+ # ad will be passed to the _block_.
74
+ #
75
+ # sponsored.ads_with_url(/\.com/) # => SponsoredLinks
76
+ #
77
+ # sponsored.ads_with_url(/^https:\/\//) do |ad|
78
+ # puts ad.title
79
+ # end
80
+ #
81
+ def ads_with_url(url,&block)
82
+ if url.kind_of?(Regexp)
83
+ ads = ads_with { |ad| ad.url =~ url }
84
+ else
85
+ ads = ads_with { |ad| ad.url == url }
86
+ end
87
+
88
+ ads.each(&block) if block
89
+ return ads
90
+ end
91
+
92
+ #
93
+ # Returns an Array containing the titles of the ads within the
94
+ # SponsoredLinks.
95
+ #
96
+ # sponsored.titles # => [...]
97
+ #
98
+ def titles
99
+ map { |ad| ad.title }
100
+ end
101
+
102
+ #
103
+ # Returns an Array containing the URLs of the ads within the
104
+ # SponsoredLinks.
105
+ #
106
+ # sponsored.urls # => [...]
107
+ #
108
+ def urls
109
+ map { |ad| ad.url }
110
+ end
111
+
112
+ #
113
+ # Iterates over each ad's title within the SponsoredLinks, passing each to
114
+ # the given _block_.
115
+ #
116
+ # each_title { |title| puts title }
117
+ #
118
+ def each_title(&block)
119
+ titles.each(&block)
120
+ end
121
+
122
+ #
123
+ # Iterates over each ad's url within the SponsoredLinks, passing each to
124
+ # the given _block_.
125
+ #
126
+ # each_url { |url| puts url }
127
+ #
128
+ def each_url(&block)
129
+ urls.each(&block)
130
+ end
131
+
132
+ #
133
+ # Returns the titles of the ads that match the specified _block_.
134
+ #
135
+ # sponsored.titles_of { |ad| ad.url.include?('www') }
136
+ #
137
+ def titles_of(&block)
138
+ ads_with(&block).titles
139
+ end
140
+
141
+ #
142
+ # Returns the urls of the ads that match the specified _block_.
143
+ #
144
+ # sponsored.urls_of { |ad| ad.title =~ /buy these pants/ }
145
+ #
146
+ def urls_of(&block)
147
+ ads_with(&block).urls
148
+ end
149
+
150
+ end
151
+ end
@@ -1,3 +1,3 @@
1
1
  module GScraper
2
- VERSION = '0.1.5'
2
+ VERSION = '0.1.6'
3
3
  end
@@ -0,0 +1,38 @@
1
+ require 'gscraper/gscraper'
2
+
3
+ module GScraper
4
+ module WebAgent
5
+ protected
6
+
7
+ #
8
+ # Returns the WWW::Mechanize agent.
9
+ #
10
+ def web_agent(&block)
11
+ @web_agent ||= GScraper.web_agent
12
+
13
+ block.call(@web_agent) if block
14
+ return @web_agent
15
+ end
16
+
17
+ #
18
+ # Fetches the specified _url_, with the given _referer_ using the
19
+ # web_agent.
20
+ #
21
+ # get_page('http://www.hackety.org/')
22
+ #
23
+ def get_page(url,referer=nil,&block)
24
+ web_agent.get(url,referer,&block)
25
+ end
26
+
27
+ #
28
+ # Posts the specified _url_ and the given _query_ parameters using the
29
+ # web_agent.
30
+ #
31
+ # post_page('http://www.wired.com/', :q => 'the future')
32
+ #
33
+ def post_page(url,query={})
34
+ web_agent.post(url,query)
35
+ end
36
+
37
+ end
38
+ end
@@ -10,7 +10,7 @@ class QueryResult < Test::Unit::TestCase
10
10
  end
11
11
 
12
12
  def test_first_result
13
- result = @query.first_result
13
+ result = @query.top_result
14
14
 
15
15
  assert_not_nil result, "The Query for 'ruby' has no first-result"
16
16
  assert_equal result.rank, 1, "The first result for the Query 'ruby' does not have the rank of 1"
@@ -1,7 +1,4 @@
1
1
  $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__),'..','lib')))
2
2
 
3
3
  require 'test/unit'
4
- require 'search/query_from_url'
5
- require 'search/query_result'
6
- require 'search/query_pages'
7
- require 'search/page_results'
4
+ require 'search'
metadata CHANGED
@@ -1,33 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.4
3
- specification_version: 1
4
2
  name: gscraper
5
3
  version: !ruby/object:Gem::Version
6
- version: 0.1.5
7
- date: 2007-12-29 00:00:00 -08:00
8
- summary: A ruby web-scraping interface to various Google Services
9
- require_paths:
10
- - lib
11
- email: postmodern.mod3@gmail.com
12
- homepage: " by Postmodern Modulus III"
13
- rubyforge_project: gscraper
14
- description: "== FEATURES/PROBLEMS: * Supports the Google Search service. * Provides HTTP access with custom User-Agent strings. == REQUIREMENTS: * Hpricot * Mechanize == INSTALL:"
15
- autorequire:
16
- default_executable:
17
- bindir: bin
18
- has_rdoc: true
19
- required_ruby_version: !ruby/object:Gem::Version::Requirement
20
- requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
24
- version:
4
+ version: 0.1.6
25
5
  platform: ruby
26
- signing_key:
27
- cert_chain:
28
- post_install_message:
29
6
  authors:
30
7
  - Postmodern Modulus III
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-03-15 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0"
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: mechanize
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: "0"
32
+ version:
33
+ - !ruby/object:Gem::Dependency
34
+ name: hoe
35
+ version_requirement:
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 1.5.1
41
+ version:
42
+ description: "== FEATURES/PROBLEMS: * Supports the Google Search service. * Provides access to search results and ranks. * Provides access to the Sponsored Links. * Provides HTTP access with custom User-Agent strings. * Provides proxy settings for HTTP access. == REQUIREMENTS: * Hpricot * WWW::Mechanize == INSTALL:"
43
+ email: postmodern.mod3@gmail.com
44
+ executables: []
45
+
46
+ extensions: []
47
+
48
+ extra_rdoc_files:
49
+ - History.txt
50
+ - LICENSE.txt
51
+ - Manifest.txt
52
+ - README.txt
31
53
  files:
32
54
  - History.txt
33
55
  - LICENSE.txt
@@ -37,10 +59,13 @@ files:
37
59
  - lib/gscraper.rb
38
60
  - lib/gscraper/version.rb
39
61
  - lib/gscraper/gscraper.rb
62
+ - lib/gscraper/web_agent.rb
40
63
  - lib/gscraper/extensions/uri/http.rb
41
64
  - lib/gscraper/extensions/uri.rb
42
65
  - lib/gscraper/extensions.rb
43
66
  - lib/gscraper/licenses.rb
67
+ - lib/gscraper/sponsored_ad.rb
68
+ - lib/gscraper/sponsored_links.rb
44
69
  - lib/gscraper/search/result.rb
45
70
  - lib/gscraper/search/page.rb
46
71
  - lib/gscraper/search/query.rb
@@ -51,47 +76,32 @@ files:
51
76
  - test/search/query_result.rb
52
77
  - test/search/query_pages.rb
53
78
  - test/search/page_results.rb
54
- test_files:
55
- - test/test_gscraper.rb
79
+ has_rdoc: true
80
+ homepage: " by Postmodern Modulus III"
81
+ post_install_message:
56
82
  rdoc_options:
57
83
  - --main
58
84
  - README.txt
59
- extra_rdoc_files:
60
- - History.txt
61
- - LICENSE.txt
62
- - Manifest.txt
63
- - README.txt
64
- executables: []
65
-
66
- extensions: []
67
-
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: "0"
92
+ version:
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: "0"
98
+ version:
68
99
  requirements: []
69
100
 
70
- dependencies:
71
- - !ruby/object:Gem::Dependency
72
- name: hpricot
73
- version_requirement:
74
- version_requirements: !ruby/object:Gem::Version::Requirement
75
- requirements:
76
- - - ">"
77
- - !ruby/object:Gem::Version
78
- version: 0.0.0
79
- version:
80
- - !ruby/object:Gem::Dependency
81
- name: mechanize
82
- version_requirement:
83
- version_requirements: !ruby/object:Gem::Version::Requirement
84
- requirements:
85
- - - ">"
86
- - !ruby/object:Gem::Version
87
- version: 0.0.0
88
- version:
89
- - !ruby/object:Gem::Dependency
90
- name: hoe
91
- version_requirement:
92
- version_requirements: !ruby/object:Gem::Version::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: 1.4.0
97
- version:
101
+ rubyforge_project: gscraper
102
+ rubygems_version: 1.0.1
103
+ signing_key:
104
+ specification_version: 2
105
+ summary: A ruby web-scraping interface to various Google Services
106
+ test_files:
107
+ - test/test_gscraper.rb