gsearch-parser 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,7 @@
1
- # Gsearch::Parser
1
+ # GSearchParser
2
2
 
3
3
  TODO: Write a gem description
4
+ GSearchParser is a lightweight framework for making Google search queries and parsing the resulting pages. More parsed results can be requested by simply calling the 'nextResults' method.
4
5
 
5
6
  ## Installation
6
7
 
@@ -18,7 +19,34 @@ Or install it yourself as:
18
19
 
19
20
  ## Usage
20
21
 
21
- TODO: Write usage instructions here
22
+ require 'gsearch-parser'
23
+
24
+ # Create a new Google web search from a query string
25
+ webSearch = GSearchParser.webSearch('what')
26
+
27
+ # Iterate over results
28
+ webSearch.each do |result|
29
+ puts "\t" + result.title
30
+ puts "\t" + result.content
31
+ puts "\t" + result.uri
32
+ puts "\n"
33
+ end
34
+
35
+ # Fetch the next set of results, and iterate over them
36
+ webSearch.nextResults.each do |result|
37
+ puts "\t" + result.title
38
+ puts "\t" + result.content
39
+ puts "\t" + result.uri
40
+ puts "\n"
41
+ end
42
+
43
+ # Iterate over all the results, including the ones from calls to .nextResults
44
+ webSearch.each do |result|
45
+ puts "\t" + result.title
46
+ puts "\t" + result.content
47
+ puts "\t" + result.uri
48
+ puts "\n"
49
+ end
22
50
 
23
51
  ## Contributing
24
52
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.4
1
+ 0.3.0
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "gsearch-parser"
8
- s.version = "0.2.4"
8
+ s.version = "0.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Diego Netto"]
12
- s.date = "2012-04-11"
12
+ s.date = "2012-04-15"
13
13
  s.description = "Queries Google search and parses the resulting web page for content."
14
14
  s.email = "diegormnetto@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -1,38 +1,61 @@
1
- require "gsearch-parser/version"
2
1
  require 'open-uri'
3
2
  require 'nokogiri'
4
3
 
4
+ #
5
+ # Module method definitions
6
+ #
5
7
  module GSearchParser
6
8
 
9
+ # Entry method for performing a web search
7
10
  def GSearchParser.webSearch(query)
8
- GoogleWebSearch.new(query)
11
+ webSearch = GoogleWebSearch.new(query)
9
12
  end
10
13
 
11
14
  end
12
15
 
13
- ###################################################
14
- # #
15
- # GoogleWebSearch Class #
16
- # #
17
- ###################################################
16
+ #
17
+ # Google Web Search class
18
+ #
18
19
  class GoogleWebSearch
19
- attr_accessor :results
20
-
20
+ attr_accessor :results, :currentPage
21
+ @index
22
+
21
23
  # Class initializer
22
24
  def initialize(query)
23
- # Initialize array
25
+ # Initialize variables
24
26
  @results = Array.new
27
+ @index = 0
28
+
29
+ # Update the results list: (Fetch, Store, and Parse)
30
+ updateResults("http://google.com/search?sourceid=chrome&q=#{query}")
31
+ end
32
+
33
+ # Update the WebSearch results array by performing a Fetch, Store, Parse routine
34
+ def updateResults(url)
35
+ # Fetch
36
+ searchPage = fetchPage(url)
25
37
 
26
- # TODO: Format query
38
+ # Store
39
+ @currentPage = searchPage
40
+
41
+ # Parse
42
+ parseCurrentPage
43
+ end
44
+
45
+ # Fetch the page from a URL
46
+ def fetchPage(url)
47
+ Nokogiri::HTML(open(url, 'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.152 Safari/535.19'))
48
+ end
27
49
 
28
- # Fetch page
29
- searchPage = Nokogiri::HTML(open("http://google.com/search?sourceid=chrome&q=#{query}",
30
- 'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.152 Safari/535.19'))
50
+ # Parse the current page and populate results
51
+ def parseCurrentPage
52
+ # Initialize local variables
53
+ currentResults = Array.new
31
54
 
32
55
  # Iterate over each Google result list element
33
- searchPage.css('li.g').each do |result|
56
+ @currentPage.css('li.g').each do |result|
34
57
  # Extract the title
35
- title = result.css('h3 > a').first.inner_html
58
+ title = result.css('h3.r a').first.inner_html
36
59
 
37
60
  # Extract the content. There is the possibility for
38
61
  # the content to be nil, so check for this
@@ -47,8 +70,22 @@ class GoogleWebSearch
47
70
  end
48
71
 
49
72
  # Create a new Result object and append to the array
50
- @results << Result.new(title, content, uri)
73
+ currentResults << Result.new(title, content, uri)
51
74
  end
75
+ @results += currentResults
76
+ return currentResults
77
+ end
78
+
79
+ # Parse the results from the next page and append to results list
80
+ def nextResults
81
+ # Parse next result page link
82
+ nextPageUrl = @currentPage.css("table#nav tr td a")[@index]['href']
83
+
84
+ # Increment reference index
85
+ @index += 1
86
+
87
+ # Update results
88
+ updateResults("http://www.google.com" + nextPageUrl)
52
89
  end
53
90
 
54
91
  # Iterator over results
@@ -56,22 +93,20 @@ class GoogleWebSearch
56
93
  @results.each(&blk)
57
94
  end
58
95
 
59
- ###################################################
60
- # #
61
- # Result Class #
62
- # #
63
- ###################################################
64
- class Result
65
- attr_accessor :title, :content, :uri
66
-
67
- # Class initializer
68
- def initialize(title, content, uri)
69
- @title = title
70
- @content = content
71
- @uri = uri
72
- end
96
+ end # GoogleWebSearch
97
+
98
+ #
99
+ # Result class
100
+ #
101
+ class Result
102
+ attr_accessor :title, :content, :uri
73
103
 
74
- end # Result
104
+ # Class initializer
105
+ def initialize(title, content, uri)
106
+ @title = title
107
+ @content = content
108
+ @uri = uri
109
+ end
75
110
 
76
- end # GoogleSearch
111
+ end # Result
77
112
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gsearch-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-04-11 00:00:00.000000000 Z
12
+ date: 2012-04-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri