ralert 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/ralert.rb +161 -0
- data/lib/result.rb +22 -0
- data/lib/search-options.rb +12 -0
- metadata +47 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: db68c2b3754d20707792ffde3ef60b33a18bbec3
|
4
|
+
data.tar.gz: af6df9818eaa6513b5393becf25dd7a87d226324
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 06e3243385de2ced3f3051cf067a0fa47f4826abbd6dc182f30e95cb8b9747b866610304cd5747ea0effbb2294fb890def0f88332a2e7f0bbe455312ef1207e7
|
7
|
+
data.tar.gz: 643b0b4d7efd401c33b94fd9cc36f374dc0fbf660cb0dd502114738bc315cc8c01b11c0e1b067e22eda334e5a03a34ff264e3af2ae426bb8bd25217fed0c663b
|
data/lib/ralert.rb
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#encoding: UTF-8
|
3
|
+
|
4
|
+
require 'result'
|
5
|
+
require 'search-options'
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'nokogiri'
|
9
|
+
require 'open-uri'
|
10
|
+
|
11
|
+
class Ralert
|
12
|
+
attr_accessor :results, :next_page
|
13
|
+
@page
|
14
|
+
|
15
|
+
def initialize(query, options = nil)
|
16
|
+
@results = Array.new
|
17
|
+
|
18
|
+
# Take care of spaces and other special
|
19
|
+
# characters.
|
20
|
+
query = transform_query(query) if query.index(" ")
|
21
|
+
|
22
|
+
# Instantiante a new search options object if
|
23
|
+
# the user hasn't provided any when calling
|
24
|
+
# the class
|
25
|
+
options = SearchOptions.new unless !options.nil?
|
26
|
+
|
27
|
+
uri = construct_uri(query, options)
|
28
|
+
@results = perform_search(uri)
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# Takes the query string and a SearchOptions object
|
33
|
+
# and constructs a new search query.
|
34
|
+
#
|
35
|
+
def construct_uri(query, options)
|
36
|
+
base_uri = "https://google.com/search?q="
|
37
|
+
|
38
|
+
if !options.literal.nil?
|
39
|
+
query = "\"" + query + "\""
|
40
|
+
end
|
41
|
+
|
42
|
+
if !options.date_range.nil?
|
43
|
+
query += "&tbs=qdr:#{options.date_range}"
|
44
|
+
else
|
45
|
+
query += "&tbs=qdr:w"
|
46
|
+
end
|
47
|
+
|
48
|
+
if options.sort_by.nil?
|
49
|
+
query += ",sbd:1"
|
50
|
+
end
|
51
|
+
|
52
|
+
if !options.safe.nil?
|
53
|
+
query += "&safe=on"
|
54
|
+
else
|
55
|
+
query += "&safe=off"
|
56
|
+
end
|
57
|
+
|
58
|
+
if options.mode.nil?
|
59
|
+
query += "&tbm=nws"
|
60
|
+
end
|
61
|
+
|
62
|
+
return base_uri + query
|
63
|
+
end
|
64
|
+
|
65
|
+
##
|
66
|
+
# Given a URI, performs a request and scans
|
67
|
+
# the resulting page with the Nokogiri parser.
|
68
|
+
#
|
69
|
+
def perform_search(uri)
|
70
|
+
html = open(uri)
|
71
|
+
@page = Nokogiri::HTML(html.read)
|
72
|
+
@page.encoding = 'utf-8'
|
73
|
+
|
74
|
+
parse_results
|
75
|
+
end
|
76
|
+
|
77
|
+
##
|
78
|
+
# Parses the page resulting from the search query
|
79
|
+
# and returns the search items found in a result
|
80
|
+
# array.
|
81
|
+
#
|
82
|
+
def parse_results
|
83
|
+
cur_results = Array.new
|
84
|
+
|
85
|
+
@page.search('li.g').each do |item|
|
86
|
+
cur_results << node_from_item(item)
|
87
|
+
end
|
88
|
+
|
89
|
+
update_next_page unless next_page_missing
|
90
|
+
@results += cur_results
|
91
|
+
|
92
|
+
return cur_results
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Takes an HTML li block which represents a search
|
97
|
+
# result and extracts all the information from it
|
98
|
+
# like: a title, a link, a (relative) date and the
|
99
|
+
# articles source.
|
100
|
+
#
|
101
|
+
def node_from_item(item)
|
102
|
+
result_node = Result.new
|
103
|
+
title = ''
|
104
|
+
|
105
|
+
link = item.at('h3.r a')
|
106
|
+
meta = item.search('div.slp span.f').inner_html
|
107
|
+
|
108
|
+
link.children.each do |c|
|
109
|
+
title += c
|
110
|
+
end
|
111
|
+
|
112
|
+
result_node.title = title
|
113
|
+
result_node.source = meta.split('-')[0]
|
114
|
+
result_node.date = meta.split('-')[1]
|
115
|
+
result_node.link = link['href'].gsub!(/\/url\?q\=/, '').gsub!(/\&sa\=.*/,'')
|
116
|
+
|
117
|
+
return result_node
|
118
|
+
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Checks if this is the last of the search result pages
|
122
|
+
# available.
|
123
|
+
#
|
124
|
+
def next_page_missing
|
125
|
+
return @page.at_css("table#nav tr td.b:last-child").at_css("a").nil?
|
126
|
+
end
|
127
|
+
|
128
|
+
##
|
129
|
+
# Updates the @next_page instance variable to point
|
130
|
+
# to the next search result page.
|
131
|
+
#
|
132
|
+
def update_next_page
|
133
|
+
next_uri = @page.at_css("table#nav tr td.b:last-child").at_css("a")['href']
|
134
|
+
@next_page = "http://www.google.com" + next_uri
|
135
|
+
end
|
136
|
+
|
137
|
+
##
|
138
|
+
# Performs the search-parse-update routine on the next
|
139
|
+
# page of search results if available.
|
140
|
+
#
|
141
|
+
def next_results(page_number = 1)
|
142
|
+
page_number.times.each do
|
143
|
+
!next_page_missing ? perform_search(@next_page) : break
|
144
|
+
end
|
145
|
+
|
146
|
+
return @results
|
147
|
+
end
|
148
|
+
|
149
|
+
##
|
150
|
+
# Takes a text query and substitutes spaces for plus signs
|
151
|
+
# as the google search engine expects to be fed with.
|
152
|
+
#
|
153
|
+
def transform_query(q)
|
154
|
+
return q.gsub!(/\s/, '+')
|
155
|
+
end
|
156
|
+
|
157
|
+
def each(&blk)
|
158
|
+
@results.each(&blk)
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
data/lib/result.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
class Result
|
2
|
+
attr_accessor :link, :title, :source, :date
|
3
|
+
def initialize(link='', title='', source = '', date = '')
|
4
|
+
@link = link
|
5
|
+
@title = title
|
6
|
+
@source = source
|
7
|
+
@date = date
|
8
|
+
end
|
9
|
+
|
10
|
+
def ==(other)
|
11
|
+
self.class === other and
|
12
|
+
other.author == @link and
|
13
|
+
other.title == @title and
|
14
|
+
other.source = @source and
|
15
|
+
other.date == @date
|
16
|
+
end
|
17
|
+
|
18
|
+
alias eql? ==
|
19
|
+
def hash
|
20
|
+
@link.hash ^ @title.hash ^ @source.hash ^ @date.hash # XOR
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class SearchOptions
|
2
|
+
attr_accessor :literal, :sort_by, :date_range, :safe, :mode
|
3
|
+
|
4
|
+
def initialize(literal = nil, sort_by = nil, date_range = nil, safe = nil, mode = nil)
|
5
|
+
@literal = literal
|
6
|
+
@sort_by = sort_by
|
7
|
+
@date_range = date_range
|
8
|
+
@safe = safe
|
9
|
+
@mode = mode
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
metadata
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ralert
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Spyros Livathinos
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-08-22 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Queries the Google search engine and returns an array of Result objects.
|
14
|
+
email: livathinos.spyros@gmail.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/ralert.rb
|
20
|
+
- lib/result.rb
|
21
|
+
- lib/search-options.rb
|
22
|
+
homepage: http://thinkcactus.com
|
23
|
+
licenses:
|
24
|
+
- MIT
|
25
|
+
metadata: {}
|
26
|
+
post_install_message:
|
27
|
+
rdoc_options: []
|
28
|
+
require_paths:
|
29
|
+
- lib
|
30
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - '>='
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '0'
|
35
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 2.0.3
|
43
|
+
signing_key:
|
44
|
+
specification_version: 4
|
45
|
+
summary: Ralert is a simple Ruby gem for parsing Google News search queries programmatically.
|
46
|
+
test_files: []
|
47
|
+
has_rdoc:
|