yahoo_answers_scraper 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7c75433af56113a2ce17e042cb4a130e122b6eb1
4
+ data.tar.gz: 6edbb2c53fa085192ed49381de6561de04b0ca82
5
+ SHA512:
6
+ metadata.gz: ef8027a7ebe00c93673e2c049a97bb0ffb5b7e12a0ad52bdb2309105cf1e8e60e6c84f28456194f3c77540de1e989e264f5c9765c7a17e9d71433827d55b11ec
7
+ data.tar.gz: ed59c5119c7c5c16fefdc1b2a25205bb96edbb36505c881c8ccc1bd2135e24ef583a3cce94df0ac3b4f7a28d610b9722ca9a2e9726947f9120834495b0dd5405
@@ -0,0 +1,4 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'yahoo_answers_scraper/question'
4
+ require 'yahoo_answers_scraper/query'
@@ -0,0 +1,48 @@
1
+ module YahooAnswersScraper
2
+ class Query
3
+ BASE_URL = "https://answers.yahoo.com"
4
+
5
+ attr_reader :query, :questions, :current_page
6
+
7
+ def initialize(query, options={})
8
+ @query = query
9
+ @mode = options[:mode] || "rel"
10
+ @current_page = options[:offset] || 0
11
+ @questions = []
12
+ end
13
+
14
+ def fetch(pages=1)
15
+ page = @current_page + 1
16
+
17
+ pages.times do
18
+ doc = Nokogiri::HTML.parse(open(search_url(page)))
19
+
20
+ doc.css("h3.question-title > a").each do |link_el|
21
+ question = link_el.text.strip
22
+ link = BASE_URL + link_el.attributes["href"].value
23
+
24
+ @questions << YahooAnswersScraper::Question.new(question: question, link: link)
25
+ end
26
+
27
+ @current_page = page
28
+ page += 1
29
+ end
30
+
31
+ self
32
+ end
33
+
34
+ def fetch_questions
35
+ @questions.each(&:fetch)
36
+ self
37
+ end
38
+
39
+ def search_url(page=1)
40
+ q = URI.escape(@query)
41
+ "https://answers.yahoo.com/search/search_result?p=#{q}&s=#{page}&sort=#{@mode}"
42
+ end
43
+
44
+ def inspect
45
+ "#<YahooAnswersScraper::Query query: #{@query.inspect}, current_page: #{@current_page.inspect}>"
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,57 @@
1
+ module YahooAnswersScraper
2
+ class Question
3
+ attr_reader :question, :link
4
+
5
+ def initialize(attributes={})
6
+ @question = attributes[:question]
7
+ @link = attributes[:link]
8
+
9
+ @question_body = nil
10
+
11
+ @answer_texts = []
12
+ @answer_htmls = []
13
+ end
14
+
15
+ def fetch
16
+ doc = Nokogiri::HTML.parse(open(@link))
17
+ contents = doc.css('div.content').to_a
18
+
19
+ @question_body = contents[0]
20
+
21
+ @answers = contents[1..-1]
22
+ @answer_texts = @answers.map(&:text)
23
+ @answer_htmls = @answers.map(&:to_s)
24
+
25
+ @answer_texts.map(&:strip!)
26
+
27
+ self
28
+ end
29
+
30
+ def question_body(mode=:text)
31
+ return nil unless @question_body
32
+ case mode
33
+ when :text
34
+ @question_body.text.strip
35
+ when :html
36
+ @question_body.to_s
37
+ else
38
+ fail
39
+ end
40
+ end
41
+
42
+ def answers(mode=:text)
43
+ case mode
44
+ when :text
45
+ @answer_texts
46
+ when :html
47
+ @answer_htmls
48
+ else
49
+ fail
50
+ end
51
+ end
52
+
53
+ def inspect
54
+ "#<YahooAnswersScraper::Question question: #{@question.inspect}>"
55
+ end
56
+ end
57
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yahoo_answers_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Rob Dawson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ description: A simple web-scraping interface to query Yahoo Answers
28
+ email: robhdawson@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/yahoo_answers_scraper.rb
34
+ - lib/yahoo_answers_scraper/query.rb
35
+ - lib/yahoo_answers_scraper/question.rb
36
+ homepage: http://rubygems.org/gems/yahoo_answers
37
+ licenses:
38
+ - MIT
39
+ metadata: {}
40
+ post_install_message:
41
+ rdoc_options: []
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 2.1.11
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Yahoo Answers Scraper
60
+ test_files: []