ralert 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: db68c2b3754d20707792ffde3ef60b33a18bbec3
4
+ data.tar.gz: af6df9818eaa6513b5393becf25dd7a87d226324
5
+ SHA512:
6
+ metadata.gz: 06e3243385de2ced3f3051cf067a0fa47f4826abbd6dc182f30e95cb8b9747b866610304cd5747ea0effbb2294fb890def0f88332a2e7f0bbe455312ef1207e7
7
+ data.tar.gz: 643b0b4d7efd401c33b94fd9cc36f374dc0fbf660cb0dd502114738bc315cc8c01b11c0e1b067e22eda334e5a03a34ff264e3af2ae426bb8bd25217fed0c663b
data/lib/ralert.rb ADDED
@@ -0,0 +1,161 @@
1
+ #!/usr/bin/env ruby
2
+ #encoding: UTF-8
3
+
4
+ require 'result'
5
+ require 'search-options'
6
+
7
+ require 'rubygems'
8
+ require 'nokogiri'
9
+ require 'open-uri'
10
+
11
+ class Ralert
12
+ attr_accessor :results, :next_page
13
+ @page
14
+
15
+ def initialize(query, options = nil)
16
+ @results = Array.new
17
+
18
+ # Take care of spaces and other special
19
+ # characters.
20
+ query = transform_query(query) if query.index(" ")
21
+
22
+ # Instantiante a new search options object if
23
+ # the user hasn't provided any when calling
24
+ # the class
25
+ options = SearchOptions.new unless !options.nil?
26
+
27
+ uri = construct_uri(query, options)
28
+ @results = perform_search(uri)
29
+ end
30
+
31
+ ##
32
+ # Takes the query string and a SearchOptions object
33
+ # and constructs a new search query.
34
+ #
35
+ def construct_uri(query, options)
36
+ base_uri = "https://google.com/search?q="
37
+
38
+ if !options.literal.nil?
39
+ query = "\"" + query + "\""
40
+ end
41
+
42
+ if !options.date_range.nil?
43
+ query += "&tbs=qdr:#{options.date_range}"
44
+ else
45
+ query += "&tbs=qdr:w"
46
+ end
47
+
48
+ if options.sort_by.nil?
49
+ query += ",sbd:1"
50
+ end
51
+
52
+ if !options.safe.nil?
53
+ query += "&safe=on"
54
+ else
55
+ query += "&safe=off"
56
+ end
57
+
58
+ if options.mode.nil?
59
+ query += "&tbm=nws"
60
+ end
61
+
62
+ return base_uri + query
63
+ end
64
+
65
+ ##
66
+ # Given a URI, performs a request and scans
67
+ # the resulting page with the Nokogiri parser.
68
+ #
69
+ def perform_search(uri)
70
+ html = open(uri)
71
+ @page = Nokogiri::HTML(html.read)
72
+ @page.encoding = 'utf-8'
73
+
74
+ parse_results
75
+ end
76
+
77
+ ##
78
+ # Parses the page resulting from the search query
79
+ # and returns the search items found in a result
80
+ # array.
81
+ #
82
+ def parse_results
83
+ cur_results = Array.new
84
+
85
+ @page.search('li.g').each do |item|
86
+ cur_results << node_from_item(item)
87
+ end
88
+
89
+ update_next_page unless next_page_missing
90
+ @results += cur_results
91
+
92
+ return cur_results
93
+ end
94
+
95
+ ##
96
+ # Takes an HTML li block which represents a search
97
+ # result and extracts all the information from it
98
+ # like: a title, a link, a (relative) date and the
99
+ # articles source.
100
+ #
101
+ def node_from_item(item)
102
+ result_node = Result.new
103
+ title = ''
104
+
105
+ link = item.at('h3.r a')
106
+ meta = item.search('div.slp span.f').inner_html
107
+
108
+ link.children.each do |c|
109
+ title += c
110
+ end
111
+
112
+ result_node.title = title
113
+ result_node.source = meta.split('-')[0]
114
+ result_node.date = meta.split('-')[1]
115
+ result_node.link = link['href'].gsub!(/\/url\?q\=/, '').gsub!(/\&sa\=.*/,'')
116
+
117
+ return result_node
118
+ end
119
+
120
+ ##
121
+ # Checks if this is the last of the search result pages
122
+ # available.
123
+ #
124
+ def next_page_missing
125
+ return @page.at_css("table#nav tr td.b:last-child").at_css("a").nil?
126
+ end
127
+
128
+ ##
129
+ # Updates the @next_page instance variable to point
130
+ # to the next search result page.
131
+ #
132
+ def update_next_page
133
+ next_uri = @page.at_css("table#nav tr td.b:last-child").at_css("a")['href']
134
+ @next_page = "http://www.google.com" + next_uri
135
+ end
136
+
137
+ ##
138
+ # Performs the search-parse-update routine on the next
139
+ # page of search results if available.
140
+ #
141
+ def next_results(page_number = 1)
142
+ page_number.times.each do
143
+ !next_page_missing ? perform_search(@next_page) : break
144
+ end
145
+
146
+ return @results
147
+ end
148
+
149
+ ##
150
+ # Takes a text query and substitutes spaces for plus signs
151
+ # as the google search engine expects to be fed with.
152
+ #
153
+ def transform_query(q)
154
+ return q.gsub!(/\s/, '+')
155
+ end
156
+
157
+ def each(&blk)
158
+ @results.each(&blk)
159
+ end
160
+
161
+ end
data/lib/result.rb ADDED
@@ -0,0 +1,22 @@
1
+ class Result
2
+ attr_accessor :link, :title, :source, :date
3
+ def initialize(link='', title='', source = '', date = '')
4
+ @link = link
5
+ @title = title
6
+ @source = source
7
+ @date = date
8
+ end
9
+
10
+ def ==(other)
11
+ self.class === other and
12
+ other.author == @link and
13
+ other.title == @title and
14
+ other.source = @source and
15
+ other.date == @date
16
+ end
17
+
18
+ alias eql? ==
19
+ def hash
20
+ @link.hash ^ @title.hash ^ @source.hash ^ @date.hash # XOR
21
+ end
22
+ end
@@ -0,0 +1,12 @@
1
+ class SearchOptions
2
+ attr_accessor :literal, :sort_by, :date_range, :safe, :mode
3
+
4
+ def initialize(literal = nil, sort_by = nil, date_range = nil, safe = nil, mode = nil)
5
+ @literal = literal
6
+ @sort_by = sort_by
7
+ @date_range = date_range
8
+ @safe = safe
9
+ @mode = mode
10
+ end
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ralert
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Spyros Livathinos
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-08-22 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Queries the Google search engine and returns an array of Result objects.
14
+ email: livathinos.spyros@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/ralert.rb
20
+ - lib/result.rb
21
+ - lib/search-options.rb
22
+ homepage: http://thinkcactus.com
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - '>='
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.0.3
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: Ralert is a simple Ruby gem for parsing Google News search queries programmatically.
46
+ test_files: []
47
+ has_rdoc: