search_logger 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rspec +1 -0
- data/.travis.yml +6 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +36 -0
- data/Guardfile +9 -0
- data/README +0 -0
- data/Rakefile +1 -0
- data/bin/search_logger +12 -0
- data/lib/search_logger/csv_exporter.rb +20 -0
- data/lib/search_logger/exec.rb +131 -0
- data/lib/search_logger/google_parser/result.rb +60 -0
- data/lib/search_logger/google_parser.rb +59 -0
- data/lib/search_logger/persistence.rb +68 -0
- data/lib/search_logger/version.rb +3 -0
- data/lib/search_logger/xml_parser.rb +15 -0
- data/lib/search_logger.rb +11 -0
- data/schema.sql +15 -0
- data/search_logger.gemspec +26 -0
- data/spec/acceptance/google_parser_acceptance_spec.rb +93 -0
- data/spec/acceptance/mysql_to_csv_exportation_acceptance_spec.rb +39 -0
- data/spec/acceptance/persistence_acceptance_spec.rb +26 -0
- data/spec/acceptance/test_acceptance_spec.rb +61 -0
- data/spec/acceptance/xml_parser_acceptance_spec.rb +10 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/support/file_repository/exported_from_persistence.csv +119 -0
- data/spec/support/file_repository/google_result.html +115 -0
- data/spec/support/file_repository/google_result_2.html +636 -0
- data/spec/support/file_repository/rankabove_test.xml +9 -0
- data/spec/support/file_repository/sample_mysql_data.rb +6 -0
- data/spec/unit/google_parser/google_parser_result_spec.rb +79 -0
- data/spec/unit/google_parser_spec.rb +69 -0
- data/spec/unit/persistence_spec.rb +86 -0
- data/spec/unit/shell_exec_spec.rb +28 -0
- data/spec/unit/xml_parser_spec.rb +22 -0
- metadata +152 -0
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--colour
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
search_logger (0.0.1)
|
5
|
+
httpclient
|
6
|
+
mysql2
|
7
|
+
nokogiri
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: http://rubygems.org/
|
11
|
+
specs:
|
12
|
+
diff-lcs (1.1.3)
|
13
|
+
ffi (1.0.11)
|
14
|
+
guard (1.0.0)
|
15
|
+
ffi (>= 0.5.0)
|
16
|
+
thor (~> 0.14.6)
|
17
|
+
httpclient (2.2.4)
|
18
|
+
mysql2 (0.3.11)
|
19
|
+
nokogiri (1.5.0)
|
20
|
+
rspec (2.8.0)
|
21
|
+
rspec-core (~> 2.8.0)
|
22
|
+
rspec-expectations (~> 2.8.0)
|
23
|
+
rspec-mocks (~> 2.8.0)
|
24
|
+
rspec-core (2.8.0)
|
25
|
+
rspec-expectations (2.8.0)
|
26
|
+
diff-lcs (~> 1.1.2)
|
27
|
+
rspec-mocks (2.8.0)
|
28
|
+
thor (0.14.6)
|
29
|
+
|
30
|
+
PLATFORMS
|
31
|
+
ruby
|
32
|
+
|
33
|
+
DEPENDENCIES
|
34
|
+
guard
|
35
|
+
rspec
|
36
|
+
search_logger!
|
data/Guardfile
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
guard 'rspec', :version => 2 do
|
5
|
+
watch(%r{^spec/.+_spec\.rb$})
|
6
|
+
watch(%r{^lib/search_logger/(.+)\.rb$}) { |m| puts "spec/unit/#{m[1]}_spec.rb"; "spec/unit/#{m[1]}_spec.rb" }
|
7
|
+
watch('spec/spec_helper.rb') { "spec" }
|
8
|
+
end
|
9
|
+
|
data/README
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/search_logger
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
$:.unshift File.expand_path('../../lib/', __FILE__)
|
4
|
+
$:.unshift File.expand_path('../../', __FILE__)
|
5
|
+
require "search_logger/exec"
|
6
|
+
|
7
|
+
begin
|
8
|
+
shell = SearchLogger::Exec.new(ARGV).run
|
9
|
+
rescue Interrupt
|
10
|
+
system "stty echo"
|
11
|
+
exit
|
12
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
class CSVExporter
|
4
|
+
def export(data, options)
|
5
|
+
@data = data
|
6
|
+
@to = options[:to]
|
7
|
+
|
8
|
+
save_to_file unless @data.empty?
|
9
|
+
end
|
10
|
+
|
11
|
+
def save_to_file
|
12
|
+
CSV.open(@to, "wb") do |csv|
|
13
|
+
csv << %w{keyword position url title description}
|
14
|
+
@data.each do |d|
|
15
|
+
csv << [d[:searched_keyword], d[:position], d[:url], d[:title], d[:description]]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
require "search_logger"
|
2
|
+
|
3
|
+
module SearchLogger
|
4
|
+
class Exec
|
5
|
+
attr_accessor :command
|
6
|
+
attr_reader :argv
|
7
|
+
|
8
|
+
def initialize argv
|
9
|
+
@argv = argv
|
10
|
+
unless valid_argv?
|
11
|
+
puts "Please, specify a xml file with keywords."
|
12
|
+
puts ""
|
13
|
+
puts "Example:"
|
14
|
+
puts ""
|
15
|
+
puts "\s\ssearch_logger ~/my/folder/keywords.xml"
|
16
|
+
exit
|
17
|
+
end
|
18
|
+
unless valid_file?
|
19
|
+
puts "The file you specified doesn't exist."
|
20
|
+
exit
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "Please, enter your MySQL database information."
|
24
|
+
asks_for_database_config
|
25
|
+
end
|
26
|
+
|
27
|
+
def asks_for_database_config
|
28
|
+
database_config = {
|
29
|
+
database: "search_logger",
|
30
|
+
host: "localhost",
|
31
|
+
username: "root"
|
32
|
+
}
|
33
|
+
|
34
|
+
print "Host address (defaults to 'localhost'): "
|
35
|
+
input = input_text and !input.empty? and database_config[:host] = input
|
36
|
+
|
37
|
+
print "Username (defaults to 'root'): "
|
38
|
+
input = input_text and !input.empty? and database_config[:username] = input
|
39
|
+
|
40
|
+
system "stty -echo"
|
41
|
+
print "Password: "
|
42
|
+
database_config[:password] = input_text
|
43
|
+
system "stty echo"
|
44
|
+
|
45
|
+
begin
|
46
|
+
@database_connection = SearchLogger::Persistence.new(database_config)
|
47
|
+
puts "\n\nA connection was established, starting operation.\n\n"
|
48
|
+
rescue
|
49
|
+
puts "The specified DB does not exists. Please, try again.\n\n"
|
50
|
+
asks_for_database_config
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def input_text
|
55
|
+
begin
|
56
|
+
STDOUT.flush
|
57
|
+
STDIN.gets.strip
|
58
|
+
rescue
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def valid_argv?
|
63
|
+
@argv.length > 0
|
64
|
+
end
|
65
|
+
|
66
|
+
def valid_file?
|
67
|
+
File.exists? @argv[0]
|
68
|
+
end
|
69
|
+
|
70
|
+
def run
|
71
|
+
puts "1) Parsing the XML file"
|
72
|
+
xml = load_xml
|
73
|
+
|
74
|
+
puts "2) Searching Google and saving to MySQL (first 2 pages, 100 results each)"
|
75
|
+
xml.each do |value|
|
76
|
+
puts "Keyword: #{value.to_s}"
|
77
|
+
|
78
|
+
print "\s\sGoogle: "
|
79
|
+
google_results = search_google(value)
|
80
|
+
print "\e[0;32mdone.\e[0m "
|
81
|
+
|
82
|
+
print "\s\sMySQL: "
|
83
|
+
save_into_mysql(google_results)
|
84
|
+
print "\e[0;32mdone.\e[0m\n"
|
85
|
+
end
|
86
|
+
|
87
|
+
puts ""
|
88
|
+
export_to_csv_file
|
89
|
+
|
90
|
+
puts "\nCongratulations! Everything worked as expected. Please audit the CSV file to guarantee the quality of the data."
|
91
|
+
end
|
92
|
+
|
93
|
+
def load_xml
|
94
|
+
xml_parser = SearchLogger::XmlParser.new(@argv.first).parse
|
95
|
+
end
|
96
|
+
|
97
|
+
def search_google(query_string)
|
98
|
+
source = File.open('spec/support/file_repository/google_result_2.html').read
|
99
|
+
#page_one = SearchLogger::GoogleParser.new.query(query_string).per_page(100).page(1).search
|
100
|
+
#page_two = SearchLogger::GoogleParser.new.query(query_string).per_page(100).page(2).last_result(page_one).search
|
101
|
+
page_one = SearchLogger::GoogleParser.new(source).search
|
102
|
+
page_two = SearchLogger::GoogleParser.new(source).last_result(page_one).search
|
103
|
+
results = []
|
104
|
+
position = 1
|
105
|
+
(page_one + page_two).each do |e|
|
106
|
+
results << e.as_ary
|
107
|
+
position += 1
|
108
|
+
end
|
109
|
+
results
|
110
|
+
end
|
111
|
+
|
112
|
+
def save_into_mysql(google_results)
|
113
|
+
persistence = @database_connection
|
114
|
+
persistence.data(google_results).table('google_results').save
|
115
|
+
end
|
116
|
+
|
117
|
+
def export_to_csv_file
|
118
|
+
csv_path = ENV["HOME"] + "/search_logger.csv"
|
119
|
+
csv_file = File.open(csv_path, "wb")
|
120
|
+
|
121
|
+
print "3) Loading data from MySQL google_results table... "
|
122
|
+
data = @database_connection.table("google_results").load_data
|
123
|
+
print "\e[0;32mdone.\n\e[0m"
|
124
|
+
|
125
|
+
print "4) Creating CSV file and adding data in #{csv_path}..."
|
126
|
+
File.delete csv_path if File.exists? csv_path
|
127
|
+
CSVExporter.new.export data, to: csv_file
|
128
|
+
print "\e[0;32mdone.\n\e[0m"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module SearchLogger
|
2
|
+
class GoogleParser
|
3
|
+
class Result
|
4
|
+
attr_accessor :title, :url, :description, :position, :searched_keyword
|
5
|
+
|
6
|
+
def initialize(node, position, searched_keyword)
|
7
|
+
raise "No HTML node was specified" if node == false
|
8
|
+
@position, @searched_keyword = position, searched_keyword
|
9
|
+
@title, @url, @description = nil
|
10
|
+
@node = node
|
11
|
+
end
|
12
|
+
|
13
|
+
def parse
|
14
|
+
parse_normal_result if @node[:id].nil? || @node[:id] =~ /mbb/
|
15
|
+
parse_news_result if @node[:id] == "newsbox"
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
def as_ary
|
20
|
+
{ title: title,
|
21
|
+
url: url,
|
22
|
+
description: description,
|
23
|
+
position: position,
|
24
|
+
searched_keyword: searched_keyword
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_normal_result
|
29
|
+
self.tap do |e|
|
30
|
+
e.title = sanitize_string @node.at_css('h3 a').content unless @node.at_css('h3 a').nil?
|
31
|
+
e.url = sanitize_string @node.at_css('h3 a')[:href] unless @node.at_css('h3 a').nil?
|
32
|
+
e.description = sanitize_string @node.at_css('div.s').content unless @node.at_css('div.s').nil?
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_news_result
|
37
|
+
self.tap do |e|
|
38
|
+
title_link = @node.at_css('li.w0 span.tl a')
|
39
|
+
description = @node.at_css('li.w0 span[dir=ltr]')
|
40
|
+
e.title = sanitize_string title_link.content unless title_link.nil?
|
41
|
+
e.url = sanitize_string title_link[:href] unless title_link.nil?
|
42
|
+
e.description = sanitize_string description.content unless description.nil?
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def sanitize_string(string)
|
47
|
+
string.gsub(/&/, "&")
|
48
|
+
.gsub(/[\s]{1,99}/, " ")
|
49
|
+
.strip
|
50
|
+
.gsub(/\s\.\.\.[\s]{0,1}[w]{0,3}.*- Cached - Similar/, " ...")
|
51
|
+
end
|
52
|
+
|
53
|
+
def set_result(options = {})
|
54
|
+
@title = options[:title]
|
55
|
+
@url = options[:url]
|
56
|
+
@description = options[:description]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module SearchLogger
|
2
|
+
class GoogleParser
|
3
|
+
attr_accessor :query, :result, :position_offset, :start, :num, :rough_query
|
4
|
+
|
5
|
+
def initialize(result = false)
|
6
|
+
@result = result if result
|
7
|
+
@start, @position_offset = 0, 1
|
8
|
+
@rough_query, @query = "", ""
|
9
|
+
@num = 100
|
10
|
+
@base_url = "https://www.google.com/search?"
|
11
|
+
end
|
12
|
+
|
13
|
+
# query options
|
14
|
+
|
15
|
+
def query(query)
|
16
|
+
self.tap { |s| s.query = query.gsub(/\s/, "+"); s.rough_query = query }
|
17
|
+
end
|
18
|
+
|
19
|
+
def per_page(quantity)
|
20
|
+
self.tap { |s| s.num = quantity}
|
21
|
+
end
|
22
|
+
|
23
|
+
def page(current_page)
|
24
|
+
self.tap { |s| s.start = ((current_page-1) * (s.num)); s.position_offset = s.start+1 }
|
25
|
+
end
|
26
|
+
|
27
|
+
def last_result(result)
|
28
|
+
self.tap { |s| s.position_offset = result.last.position + 1 }
|
29
|
+
end
|
30
|
+
|
31
|
+
def search(result_object = Result)
|
32
|
+
require "nokogiri"
|
33
|
+
Nokogiri::HTML.parse(get_response).css('li.g').each_with_object([]) do |e, all|
|
34
|
+
all << result_object.new(e, @position_offset, @rough_query).parse
|
35
|
+
@position_offset += 1 unless all.empty?
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def url
|
40
|
+
url = @base_url
|
41
|
+
query_strings = []
|
42
|
+
query_strings << "q=#{@query}" if @query
|
43
|
+
query_strings << "num=#{num}"
|
44
|
+
query_strings << "hl=en"
|
45
|
+
query_strings << "start=#{@start}"
|
46
|
+
url += query_strings.join("&")
|
47
|
+
require "uri"
|
48
|
+
url = URI.encode(url)
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def get_response
|
54
|
+
return @result if @result
|
55
|
+
require 'httpclient'
|
56
|
+
clnt = HTTPClient.new.get(url, :follow_redirect => true).body
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "mysql2"
|
3
|
+
module SearchLogger
|
4
|
+
class Persistence
|
5
|
+
attr_accessor :table, :client, :connection_config
|
6
|
+
attr_reader :data
|
7
|
+
|
8
|
+
def initialize(connection_config = { host: "localhost", username: "root", database: "search_logger" })
|
9
|
+
@data = []
|
10
|
+
@connection_config = connection_config
|
11
|
+
establish_connection
|
12
|
+
end
|
13
|
+
|
14
|
+
def establish_connection
|
15
|
+
@client = ::Mysql2::Client.new(@connection_config)
|
16
|
+
end
|
17
|
+
|
18
|
+
# sets up the operation properties
|
19
|
+
|
20
|
+
def data(data = [])
|
21
|
+
return @data if data.empty?
|
22
|
+
data = [data] if data.is_a?(Hash)
|
23
|
+
@data = data
|
24
|
+
self
|
25
|
+
end
|
26
|
+
|
27
|
+
def table(table = nil)
|
28
|
+
return @table unless table
|
29
|
+
@table = table
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
def save_to_sql
|
34
|
+
fields, values = [], []
|
35
|
+
fields_complete = false
|
36
|
+
# gathers fields and values
|
37
|
+
data.each_with_index do |e, index|
|
38
|
+
values[index] = []
|
39
|
+
e.each do |key, value|
|
40
|
+
fields << key.to_s unless fields_complete
|
41
|
+
values[index] << client.escape(value.to_s)
|
42
|
+
end
|
43
|
+
fields_complete = true
|
44
|
+
end
|
45
|
+
|
46
|
+
# creates values string
|
47
|
+
each_record_values = []
|
48
|
+
values.each do |e|
|
49
|
+
each_record_values << "('#{e.join("', '")}')"
|
50
|
+
end
|
51
|
+
sql = "INSERT INTO #{table} (#{fields.join(', ')}) VALUES #{each_record_values.join(', ')}"
|
52
|
+
end
|
53
|
+
|
54
|
+
def save(client = @client)
|
55
|
+
client.query(save_to_sql)
|
56
|
+
end
|
57
|
+
|
58
|
+
def load_to_sql
|
59
|
+
"SELECT * FROM #{table}"
|
60
|
+
end
|
61
|
+
|
62
|
+
def load_data(client = @client)
|
63
|
+
[].tap do |e|
|
64
|
+
client.query(load_to_sql).each(symbolize_keys: true) { |row| e << row }
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module SearchLogger
|
2
|
+
class XmlParser
|
3
|
+
attr_reader :file
|
4
|
+
|
5
|
+
def initialize(xml_file)
|
6
|
+
@file = File.open(xml_file)
|
7
|
+
end
|
8
|
+
|
9
|
+
def parse(path = 'keywords keyword')
|
10
|
+
require "nokogiri"
|
11
|
+
doc = Nokogiri::XML @file
|
12
|
+
doc.css(path).each_with_object([]) { |e, all| all << e.content }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require "search_logger/version"
|
2
|
+
|
3
|
+
require "search_logger/csv_exporter"
|
4
|
+
require "search_logger/google_parser"
|
5
|
+
require "search_logger/google_parser/result"
|
6
|
+
require "search_logger/persistence"
|
7
|
+
require "search_logger/xml_parser"
|
8
|
+
|
9
|
+
module SearchLogger
|
10
|
+
|
11
|
+
end
|
data/schema.sql
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
DROP DATABASE IF EXISTS search_logger;
|
2
|
+
CREATE DATABASE search_logger;
|
3
|
+
USE search_logger;
|
4
|
+
|
5
|
+
CREATE TABLE google_results(
|
6
|
+
id int auto_increment,
|
7
|
+
searched_keyword varchar(250),
|
8
|
+
title text,
|
9
|
+
url text,
|
10
|
+
description text,
|
11
|
+
position int,
|
12
|
+
created_at datetime,
|
13
|
+
PRIMARY KEY(id)
|
14
|
+
) DEFAULT CHARACTER SET utf8;
|
15
|
+
CREATE INDEX position_idx ON google_results(position);
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "search_logger/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "search_logger"
|
7
|
+
s.version = SearchLogger::VERSION
|
8
|
+
s.authors = ["kurko"]
|
9
|
+
s.email = ["chavedomundo@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Searches Google and saves results.}
|
12
|
+
s.description = %q{This gem read a XML file, searching for each one of the keywords. Then, all results are saved into MySQL and later exported to CSV. This is a concept app.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "search_logger"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_development_dependency "rspec"
|
22
|
+
s.add_development_dependency "guard"
|
23
|
+
s.add_runtime_dependency "nokogiri"
|
24
|
+
s.add_runtime_dependency "httpclient"
|
25
|
+
s.add_runtime_dependency "mysql2"
|
26
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe "Google parser" do
|
5
|
+
before :all do
|
6
|
+
@response = SearchLogger::GoogleParser.new.query('test').per_page(5).search
|
7
|
+
end
|
8
|
+
|
9
|
+
it "searches for a keyword" do
|
10
|
+
@response.should be_kind_of Array
|
11
|
+
end
|
12
|
+
|
13
|
+
it "searches with hebraic keywords" do
|
14
|
+
@response = SearchLogger::GoogleParser.new.query('שפות תכנות').per_page(5).search
|
15
|
+
@response.should have(5).items
|
16
|
+
end
|
17
|
+
|
18
|
+
# depending on the query string, Google returns more or less than 100 links
|
19
|
+
it "returns around 100 results per page by default" do
|
20
|
+
response = SearchLogger::GoogleParser.new.query('amazon').search
|
21
|
+
response.should have_at_least(95).items
|
22
|
+
response.should have_at_most(105).items
|
23
|
+
end
|
24
|
+
|
25
|
+
context "multiple pages" do
|
26
|
+
before :all do
|
27
|
+
@all_results = SearchLogger::GoogleParser.new.query('amazon').per_page(5).search.map { |e| e.title }
|
28
|
+
end
|
29
|
+
|
30
|
+
it "returns 2 results" do
|
31
|
+
(3..7).should cover(@all_results.size)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Google might include news in some responses, so we don't compare them with ==
|
35
|
+
it "takes the first 2 pages of results" do
|
36
|
+
@page_one = SearchLogger::GoogleParser.new.query('amazon').per_page(1).page(1)
|
37
|
+
@page_two = SearchLogger::GoogleParser.new.query('amazon').per_page(1).page(2)
|
38
|
+
result = @page_one.search.map { |e| e.title } + @page_two.search.map { |e| e.title }
|
39
|
+
@all_results.should include(result.first, result.last)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "has the right position numbers" do
|
43
|
+
@page_one = SearchLogger::GoogleParser.new.query('amazon').per_page(1).page(1).search
|
44
|
+
@page_two = SearchLogger::GoogleParser.new.query('amazon').per_page(1).page(2).last_result(@page_one).search
|
45
|
+
@page_one.first.position.should == 1
|
46
|
+
@page_two.first.position.should == @page_one.size + 1
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "for each result" do
|
51
|
+
it "extracts title" do
|
52
|
+
@response.each { |e| e.title.should_not == "" }
|
53
|
+
end
|
54
|
+
|
55
|
+
it "extract URL" do
|
56
|
+
@response.each { |e| e.url.should_not == "" }
|
57
|
+
end
|
58
|
+
|
59
|
+
it "extract description" do
|
60
|
+
@response.each { |e| e.description.should_not == "" }
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
describe "parsing a mocked response", wip: true do
|
65
|
+
let(:result_double) { File.open('spec/support/file_repository/google_result_2.html').read }
|
66
|
+
context "item 1" do
|
67
|
+
subject { SearchLogger::GoogleParser.new(result_double).search[0] }
|
68
|
+
|
69
|
+
its(:title) { should == "Xovi: mehr als ein SEO Tool - online Marketing (SEO, SEM, Affiliate ..." }
|
70
|
+
its(:url) { should == "http://www.xovi.de/" }
|
71
|
+
its(:description) { should == "Setzen Sie unsere SEO Software f?r Ihr Online Marketing Budget intelligent und erfolgreich ein. Verlassen Sie sich nicht auf Ihr Bauchgef?hl oder Ihre Erfahrung ..." }
|
72
|
+
its(:position) { should == 1 }
|
73
|
+
end
|
74
|
+
|
75
|
+
context "item 7" do
|
76
|
+
subject { SearchLogger::GoogleParser.new(result_double).search[7] }
|
77
|
+
|
78
|
+
its(:title) { should == "SEO Company India" }
|
79
|
+
its(:url) { should == "http://www.seosoftwareservices.com/" }
|
80
|
+
its(:description) { should == "SEO Company India Services SEO Company Offers Standard SEO Company to enhance your ranking.We Provide Professional SEO Company India, SEO India ..." }
|
81
|
+
its(:position) { should == 8 }
|
82
|
+
end
|
83
|
+
|
84
|
+
context "item 18" do
|
85
|
+
subject { SearchLogger::GoogleParser.new(result_double).search[18] }
|
86
|
+
|
87
|
+
its(:title) { should == "Microsite Masters Rank Tracker - Accurate Keyword Tracking for ..." }
|
88
|
+
its(:url) { should == "http://www.micrositemasters.com/" }
|
89
|
+
its(:description) { should == "Microsite Masters search engine optimization software tracks keywords across multiple URL's, ..." }
|
90
|
+
its(:position) { should == 19 }
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe "Exporting MySQL data to a CSV file" do
|
5
|
+
let(:data) {
|
6
|
+
[
|
7
|
+
{ id: 726, searched_keyword: "amazon", title: "This is a title",
|
8
|
+
url: "www.github.com", description: "First description.",
|
9
|
+
position: 1, created_at: nil },
|
10
|
+
{ id: 727, searched_keyword: "שפות תכנות", title: "שפות תכנות",
|
11
|
+
url: "www.github.com", description: "שפות, תכנות.",
|
12
|
+
position: 2, created_at: nil },
|
13
|
+
{ id: 728, searched_keyword: "amazon", title: "This is the, third title",
|
14
|
+
url: "www.github.com", description: "Third description.",
|
15
|
+
position: 3, created_at: nil }
|
16
|
+
]
|
17
|
+
}
|
18
|
+
let(:target_file) { File.expand_path("../../support/file_repository/exported_from_persistence.csv", __FILE__) }
|
19
|
+
|
20
|
+
before do
|
21
|
+
File.delete target_file if File.exists? target_file
|
22
|
+
end
|
23
|
+
|
24
|
+
it "if no data is sent, no data is saved" do
|
25
|
+
CSVExporter.new.export [], to: target_file
|
26
|
+
end
|
27
|
+
|
28
|
+
it "saves data into a CSV file" do
|
29
|
+
CSVExporter.new.export data, to: target_file
|
30
|
+
File.exists?(target_file).should be_true
|
31
|
+
saved_data = CSV.parse File.read(target_file)
|
32
|
+
saved_data[0].join(',').should == 'keyword,position,url,title,description'
|
33
|
+
saved_data[1].join(',').should == 'amazon,1,www.github.com,This is a title,First description.'
|
34
|
+
saved_data[2].join(',').should == 'שפות תכנות,2,www.github.com,שפות תכנות,שפות, תכנות.'
|
35
|
+
saved_data[3].join(',').should == 'amazon,3,www.github.com,This is the, third title,Third description.'
|
36
|
+
end
|
37
|
+
|
38
|
+
pending "check if dir has write permission"
|
39
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe "Data persistence" do
|
4
|
+
let(:data) {
|
5
|
+
[ {searched_keyword: "amazon", title: "This is a title", url: "www.github.com", description: "First description.", position: 1},
|
6
|
+
{searched_keyword: "amazon", title: "This is the second title", url: "www.github.com", description: "Second description.", position: 2},
|
7
|
+
{searched_keyword: "amazon", title: "This is the third title", url: "www.github.com", description: "Third description.", position: 3}
|
8
|
+
]
|
9
|
+
}
|
10
|
+
|
11
|
+
before do
|
12
|
+
@persistence = SearchLogger::Persistence.new
|
13
|
+
@persistence.client.query("DELETE FROM google_results")
|
14
|
+
end
|
15
|
+
|
16
|
+
it "stores an array of values in the database" do
|
17
|
+
@persistence.data(data)
|
18
|
+
@persistence.table('google_results').save
|
19
|
+
@another_persistence_object = SearchLogger::Persistence.new
|
20
|
+
saved_data = @another_persistence_object.table("google_results").load_data
|
21
|
+
saved_data.map { |e| e.tap { |x| x.delete(:id) }.tap { |x| x.delete(:created_at) } }.should == data
|
22
|
+
end
|
23
|
+
|
24
|
+
pending "when there's no database created"
|
25
|
+
|
26
|
+
end
|