wikihow 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7049014cdd5bf0f414c1d25b67ff23cb510071007c60cae0310588da402a7564
4
- data.tar.gz: d8c84ca96eda9d23f318b5dc4b6b276d332aec6d68d8f405cfdfc87eb8e88965
3
+ metadata.gz: c9ddfdd1a9076593a482625840ecb771a52fc2a5e087de6d5f4c214334aee9ae
4
+ data.tar.gz: 525467dc49b64bb6a42a45240aa072934378a9499241ccac780e8016f2e351fe
5
5
  SHA512:
6
- metadata.gz: 6cd866fa89a9175087255dc9f538c2b8550681d747030a1a679fdc38cc72a0e424ff86c12cc501b2ff9f89ca06d1472c44182ff1f3e0af0ef2a0b4118b91ba32
7
- data.tar.gz: 49ee6a9276be12580ac75bf880f1ce0ff51598125b71c0d50190380d240d37990c291769e7a0d67c3aa0eeef76a8a9f442303f4b9169e47b1a9133e70e1a83eb
6
+ metadata.gz: 68cd4e28703925cef8b23db2a12628aa2d652ac76cf1298c16e70dc33d28e0b1c1e80927583a4642409d580662a63948708a017e198854e3b01c698005ace4a1
7
+ data.tar.gz: 0a47cb859671095a0befc78e077cb88fcc203958d32bd523af0545246441d897673a87013944507e448b762652fc52a90169fd1eb6e2910db18df66557baa449
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- wikihow (0.1.0)
4
+ wikihow (0.1.1)
5
5
  nokogiri
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -22,7 +22,7 @@ Or install it yourself as:
22
22
 
23
23
  Type the below and follow the on screen prompts.
24
24
 
25
- ./bin/wikihow
25
+ $ wikihow
26
26
 
27
27
  ## Development
28
28
 
@@ -18,20 +18,8 @@ class Wikihow::Category
18
18
 
19
19
  def self.get_or_create_categories
20
20
  if self.all == []
21
- self.scrape_for_categories.each{|category| self.new(category)}
21
+ Wikihow::Scraper.scrape_for_categories.each{|category| self.new(category)}
22
22
  end
23
- #binding.pry
24
23
  self.all
25
24
  end
26
-
27
- def self.scrape_for_categories
28
- doc = Nokogiri::HTML(open("https://www.wikihow.com/Main-Page"))
29
- categories_array = []
30
- doc.search("#hp_categories a").each do |category|
31
- title = category.text
32
- url = category.attr("href")
33
- categories_array << {:title => title,:url => url}
34
- end
35
- categories_array
36
- end
37
25
  end
@@ -0,0 +1,47 @@
1
+ class Wikihow::Scraper
2
+ def self.scrape_for_categories
3
+ doc = Nokogiri::HTML(open("https://www.wikihow.com/Main-Page"))
4
+ categories_array = []
5
+ doc.search("#hp_categories a").each do |category|
6
+ title = category.text
7
+ url = category.attr("href")
8
+ categories_array << {:title => title,:url => url}
9
+ end
10
+ categories_array
11
+ end
12
+
13
+ def self.scrape_for_topics(category)
14
+ doc = Nokogiri::HTML(open("https://www.wikihow.com" + category.url))
15
+ topics_array = []
16
+ doc.search("#cat_container #cat_all a").each do |topic|
17
+ title = topic.search("span").text.strip
18
+ url = topic.attr("href")
19
+ topics_array << {:title => title,:url => url} if title != ""
20
+ end
21
+ topics_array
22
+ end
23
+
24
+ def self.scrape_topic(topic)
25
+ doc = Nokogiri::HTML(open(topic.url))
26
+
27
+ topic.intro = doc.search("#intro p").last.text
28
+ sections_array = []
29
+ doc.search("#intro #method_toc .toc_method").each do |method|
30
+ sections_array << {:section_title => method.text, :section_steps => []}
31
+ end
32
+
33
+ sections_array.each.with_index do |section, i|
34
+ doc.search(".steps_list_2")[i].search(".step").each do |section_li|
35
+ step_description = [section_li.search(".whb").text.strip + " " + section_li.search("> text()").text.strip]
36
+ section_li.search("> ul > li").each do |step_li|
37
+ bullet_point = [step_li.search("> text(), a").text.strip]
38
+ sub_bullet_point = step_li.search("> ul > li").collect {|bullet_point_li|bullet_point_li.search("> text()").text.strip}
39
+ bullet_point << sub_bullet_point if sub_bullet_point !=[]
40
+ step_description << bullet_point if bullet_point != []
41
+ end
42
+ section[:section_steps] << step_description
43
+ end
44
+ end
45
+ sections_array
46
+ end
47
+ end
data/lib/wikihow/topic.rb CHANGED
@@ -15,7 +15,7 @@ class Wikihow::Topic
15
15
 
16
16
  def sections
17
17
  if @sections == []
18
- @sections = self.scrape_topic
18
+ @sections = Wikihow::Scraper.scrape_topic(self)
19
19
  end
20
20
  @sections
21
21
  end
@@ -24,53 +24,10 @@ class Wikihow::Topic
24
24
  @sections = sections
25
25
  end
26
26
 
27
- def scrape_topic
28
- doc = Nokogiri::HTML(open(self.url))
29
-
30
- self.intro = doc.search("#intro p").last.text
31
- sections_array = []
32
- doc.search("#intro #method_toc .toc_method").each do |method|
33
- sections_array << {:section_title => method.text, :section_steps => []}
34
- end
35
-
36
- sections_array.each.with_index do |section, i|
37
- doc.search(".steps_list_2")[i].search(".step").each do |section_li|
38
- step_description = [section_li.search(".whb").text.strip + " " + section_li.search("> text()").text.strip]
39
- section_li.search("> ul > li").each do |step_li|
40
- bullet_point = [step_li.search("> text(), a").text.strip]
41
- sub_bullet_point = step_li.search("> ul > li").collect {|bullet_point_li|bullet_point_li.search("> text()").text.strip}
42
- bullet_point << sub_bullet_point if sub_bullet_point !=[]
43
- step_description << bullet_point if bullet_point != []
44
- end
45
- section[:section_steps] << step_description
46
- end
47
- end
48
- sections_array
49
- end
50
-
51
- # def self.sentence_to_snake_case(string)
52
- # string.gsub(" ","_")
53
- # end
54
- #
55
- # def self.snake_case_to_sentence(string)
56
- # string.gsub("_"," ")
57
- # end
58
-
59
27
  def self.get_or_create_topics_from_category(category)
60
28
  if category.topics == []
61
- self.scrape_for_topics(category).each{|topic_hash|self.new(topic_hash, category)}
29
+ Wikihow::Scraper.scrape_for_topics(category).each{|topic_hash|self.new(topic_hash, category)}
62
30
  end
63
31
  category.topics
64
32
  end
65
-
66
- def self.scrape_for_topics(category)
67
- doc = Nokogiri::HTML(open("https://www.wikihow.com" + category.url))
68
- topics_array = []
69
- doc.search("#cat_container #cat_all a").each do |topic|
70
- title = topic.search("span").text.strip
71
- url = topic.attr("href")
72
- topics_array << {:title => title,:url => url} if title != ""
73
- end
74
- topics_array
75
- end
76
33
  end
@@ -1,3 +1,3 @@
1
1
  module Wikihow
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
data/lib/wikihow.rb CHANGED
@@ -5,3 +5,4 @@ require_relative './wikihow/version'
5
5
  require_relative './wikihow/cli'
6
6
  require_relative './wikihow/category'
7
7
  require_relative './wikihow/topic'
8
+ require_relative './wikihow/scraper'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wikihow
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - "'Jared Grove'"
@@ -90,6 +90,7 @@ files:
90
90
  - lib/wikihow.rb
91
91
  - lib/wikihow/category.rb
92
92
  - lib/wikihow/cli.rb
93
+ - lib/wikihow/scraper.rb
93
94
  - lib/wikihow/topic.rb
94
95
  - lib/wikihow/version.rb
95
96
  - wikihow.gemspec