yahoo_answers_scraper 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/yahoo_answers_scraper.rb +4 -0
- data/lib/yahoo_answers_scraper/query.rb +48 -0
- data/lib/yahoo_answers_scraper/question.rb +57 -0
- metadata +60 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7c75433af56113a2ce17e042cb4a130e122b6eb1
|
4
|
+
data.tar.gz: 6edbb2c53fa085192ed49381de6561de04b0ca82
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ef8027a7ebe00c93673e2c049a97bb0ffb5b7e12a0ad52bdb2309105cf1e8e60e6c84f28456194f3c77540de1e989e264f5c9765c7a17e9d71433827d55b11ec
|
7
|
+
data.tar.gz: ed59c5119c7c5c16fefdc1b2a25205bb96edbb36505c881c8ccc1bd2135e24ef583a3cce94df0ac3b4f7a28d610b9722ca9a2e9726947f9120834495b0dd5405
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module YahooAnswersScraper
|
2
|
+
class Query
|
3
|
+
BASE_URL = "https://answers.yahoo.com"
|
4
|
+
|
5
|
+
attr_reader :query, :questions, :current_page
|
6
|
+
|
7
|
+
def initialize(query, options={})
|
8
|
+
@query = query
|
9
|
+
@mode = options[:mode] || "rel"
|
10
|
+
@current_page = options[:offset] || 0
|
11
|
+
@questions = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def fetch(pages=1)
|
15
|
+
page = @current_page + 1
|
16
|
+
|
17
|
+
pages.times do
|
18
|
+
doc = Nokogiri::HTML.parse(open(search_url(page)))
|
19
|
+
|
20
|
+
doc.css("h3.question-title > a").each do |link_el|
|
21
|
+
question = link_el.text.strip
|
22
|
+
link = BASE_URL + link_el.attributes["href"].value
|
23
|
+
|
24
|
+
@questions << YahooAnswersScraper::Question.new(question: question, link: link)
|
25
|
+
end
|
26
|
+
|
27
|
+
@current_page = page
|
28
|
+
page += 1
|
29
|
+
end
|
30
|
+
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
def fetch_questions
|
35
|
+
@questions.each(&:fetch)
|
36
|
+
self
|
37
|
+
end
|
38
|
+
|
39
|
+
def search_url(page=1)
|
40
|
+
q = URI.escape(@query)
|
41
|
+
"https://answers.yahoo.com/search/search_result?p=#{q}&s=#{page}&sort=#{@mode}"
|
42
|
+
end
|
43
|
+
|
44
|
+
def inspect
|
45
|
+
"#<YahooAnswersScraper::Query query: #{@query.inspect}, current_page: #{@current_page.inspect}>"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module YahooAnswersScraper
|
2
|
+
class Question
|
3
|
+
attr_reader :question, :link
|
4
|
+
|
5
|
+
def initialize(attributes={})
|
6
|
+
@question = attributes[:question]
|
7
|
+
@link = attributes[:link]
|
8
|
+
|
9
|
+
@question_body = nil
|
10
|
+
|
11
|
+
@answer_texts = []
|
12
|
+
@answer_htmls = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def fetch
|
16
|
+
doc = Nokogiri::HTML.parse(open(@link))
|
17
|
+
contents = doc.css('div.content').to_a
|
18
|
+
|
19
|
+
@question_body = contents[0]
|
20
|
+
|
21
|
+
@answers = contents[1..-1]
|
22
|
+
@answer_texts = @answers.map(&:text)
|
23
|
+
@answer_htmls = @answers.map(&:to_s)
|
24
|
+
|
25
|
+
@answer_texts.map(&:strip!)
|
26
|
+
|
27
|
+
self
|
28
|
+
end
|
29
|
+
|
30
|
+
def question_body(mode=:text)
|
31
|
+
return nil unless @question_body
|
32
|
+
case mode
|
33
|
+
when :text
|
34
|
+
@question_body.text.strip
|
35
|
+
when :html
|
36
|
+
@question_body.to_s
|
37
|
+
else
|
38
|
+
fail
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def answers(mode=:text)
|
43
|
+
case mode
|
44
|
+
when :text
|
45
|
+
@answer_texts
|
46
|
+
when :html
|
47
|
+
@answer_htmls
|
48
|
+
else
|
49
|
+
fail
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def inspect
|
54
|
+
"#<YahooAnswersScraper::Question question: #{@question.inspect}>"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
metadata
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: yahoo_answers_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Rob Dawson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-21 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
description: A simple web-scraping interface to query Yahoo Answers
|
28
|
+
email: robhdawson@gmail.com
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- lib/yahoo_answers_scraper.rb
|
34
|
+
- lib/yahoo_answers_scraper/query.rb
|
35
|
+
- lib/yahoo_answers_scraper/question.rb
|
36
|
+
homepage: http://rubygems.org/gems/yahoo_answers
|
37
|
+
licenses:
|
38
|
+
- MIT
|
39
|
+
metadata: {}
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
requirements: []
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 2.1.11
|
57
|
+
signing_key:
|
58
|
+
specification_version: 4
|
59
|
+
summary: Yahoo Answers Scraper
|
60
|
+
test_files: []
|