apw_articles 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 762d2ad97904f469c347327ca3bf900ea49ed4af
4
+ data.tar.gz: 9af397ae00b4c5781ef1d419aa8e6f329397b23e
5
+ SHA512:
6
+ metadata.gz: 7e857e447b235f45510310db1fee667d447d52d858991de58b8750cc9dbaa51a3dac448718a5135788660a508640008cbc7b7e1aa346298c4c6c54ced3bf8e3e
7
+ data.tar.gz: f1dce15cbb89fc5524c23a6b42f1834a8bff0b5fce399b3a1553a100cb63ce2ca6d3150ce628342565d9e05c06ccf1ab91d7ed0fde82390980e04fc97b53a5d3
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/apw_articles.rb'
4
+
5
+ APWArticles::CLI.new.run
@@ -0,0 +1,10 @@
1
+ require 'pry'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+ require 'colorize'
5
+
6
+
7
+ require_relative '../lib/apw_articles/article.rb'
8
+ require_relative '../lib/apw_articles/category.rb'
9
+ require_relative '../lib/apw_articles/cli.rb'
10
+ require_relative '../lib/apw_articles/scraper.rb'
@@ -0,0 +1,5 @@
1
+ module APWArticles
2
+
3
+ end
4
+
5
+ require_relative '../config/environment.rb'
@@ -0,0 +1,33 @@
1
+ class APWArticles::Article
2
+ attr_accessor :title, :author, :blurb, :url, :categories
3
+ @@all = []
4
+
5
+ def initialize(attribute_hash)
6
+ self.categories = []
7
+ attribute_hash.each do |key, value|
8
+ if key == :categories
9
+ value.each do |category|
10
+ c = APWArticles::Category.find_or_create_by_url(category)
11
+ self.categories << c
12
+ c.articles << self
13
+ end
14
+ else
15
+ self.send(("#{key}="), value)
16
+ end
17
+ @@all << self
18
+ end
19
+ end
20
+
21
+ def self.new_from_url(url)
22
+ self.new(APWArticles::Scraper.scrape_article(url))
23
+ end
24
+
25
+ def self.new_from_list(list_url)
26
+ # call scraper for list and then initalize from hash
27
+ end
28
+
29
+ def self.all
30
+ @@all
31
+ end
32
+
33
+ end
@@ -0,0 +1,36 @@
1
+ class APWArticles::Category
2
+ attr_accessor :name, :articles, :url
3
+ @@all = []
4
+ CATEGORIES = ["divorce", "kids-no-kids", "sex", "career", "life", "marriage-essays", "money", "feminism", "essays", "the-hard-stuff", "reclaiming-wife", "advice", "genderfeminism", "friends-relations", "engagements-proposals", "happy-hour"]
5
+ # NOTE: the CATEGORIES constant was gathered using the APWArticles::Scraper.scrape_categories method, however as the only way to scrape this information was iterating over 66 articles which is rather time-consuming in the CLI, I have elected to hard code the categories as default categories. New category objects will still be made when they're encountered in articles.
6
+
7
+ def self.defaults
8
+ CATEGORIES.each {|url| self.find_or_create_by_url(url)}
9
+ end
10
+
11
+ def initialize(url)
12
+ self.name = url.gsub(/-/, ' ').split.map(&:capitalize).join(' ')
13
+ self.url = url
14
+ self.class.all << self
15
+ self.articles = []
16
+ end
17
+
18
+ def self.all
19
+ @@all
20
+ end
21
+
22
+ def self.find_or_create_by_url(url)
23
+ if self.all.detect{|category| category.url == url } == nil
24
+ self.new(url)
25
+ else
26
+ self.all.detect{|category| category.url == url }
27
+ end
28
+ end
29
+
30
+ def self.create_from_url
31
+ APWArticles::Scraper.scrape_categories.each do |category|
32
+ self.find_or_create_by_url(category)
33
+ end
34
+ end
35
+
36
+ end
@@ -0,0 +1,85 @@
1
+ class APWArticles::CLI
2
+
3
+ def run
4
+ APWArticles::Category.defaults
5
+ self.list_categories
6
+ end # basic functionality
7
+
8
+ # Lists categories by iterating over APWARrticles::Category.all and requests input to view article list based on category
9
+ def list_categories
10
+ puts "------------ A Practical Wedding - Marriage Essays ------------\n".colorize(:cyan)
11
+ puts "CATEGORIES:"
12
+ APWArticles::Category.all.each_with_index do |category, index|
13
+ print "#{index+1}.\t".colorize(:cyan)
14
+ puts "#{category.name}"
15
+ end # do loop end
16
+ puts "\nPlease choose a category by number".colorize(:blue)
17
+ input = gets.strip
18
+ until input.to_i > 0 && input.to_i <= APWArticles::Category.all.size
19
+ puts "Please type a number between 1 and #{APWArticles::Category.all.size}.".colorize(:blue)
20
+ input = gets.strip
21
+ end # until end
22
+ self.list_articles_in_category_by_page(APWArticles::Category.all[input.to_i-1], 1)
23
+ end # list_categories def end
24
+
25
+ # Based on a category and a page number, this method creates an array of indexes of articles to list, then calls the scraper to scrape the category page to generate article objects for that category. The method then iterates over the array of articles to list, printing the article number and name for the article at that index in the category object's articles array.
26
+ # Method then asks user to choose an article number to display.
27
+ def list_articles_in_category_by_page(category, page = 1)
28
+ articles_to_display = Array (((page*10)-10)..((page*10)-1)) # page 1 = 0-9, page 2 = 10-19
29
+ puts "\n\n------------ Articles in #{category.name} ------------".colorize(:cyan)
30
+ APWArticles::Scraper.scrape_list(category.url, page) unless page.between?(2,5) || page.between?(7,12)
31
+ # NOTE this is very laggy and perhaps shouldn't take place here.
32
+ articles_to_display.each do |article_num|
33
+ print "#{article_num+1}.\t".colorize(:cyan) unless
34
+ category.articles[article_num] == nil
35
+ puts "#{category.articles[article_num].title}" unless
36
+ category.articles[article_num] == nil
37
+ end # do end
38
+ # NOTE: I might like to split out the input logic here.
39
+ puts "\n\nType the article number to view more information about the article. \nOr type 'next' to view the next page of articles.".colorize(:blue)
40
+ input = gets.strip
41
+ until /(?i)next/ === input || ( input.to_i >= (articles_to_display[0]+1) && input.to_i <= (articles_to_display[-1]+1) )
42
+ puts "Please type a number between #{articles_to_display[0]+1} and #{articles_to_display[-1]+1} or type 'next'.".colorize(:blue)
43
+ input = gets.strip
44
+ end # until end
45
+ if /(?i)next/ === input
46
+ page += 1
47
+ list_articles_in_category_by_page(category, page)
48
+ else
49
+ self.article_information(
50
+ category.articles[input.to_i-1].url)
51
+ end # if end
52
+ end # list_articles_in_category_by_page def end
53
+
54
+ # This method creates a new Article object from the URL passed to the method and assigns it a local variable. The method then calls instance methods for each of the object's variables (title, author, blurb, url and categories). Then it requests input to view more information or exit.
55
+ def article_information(article_url)
56
+ article = APWArticles::Article.new_from_url(article_url)
57
+ print "\nTitle:".colorize(:cyan)
58
+ puts "#{article.title}"
59
+ print "\nAuthor:".colorize(:cyan)
60
+ puts "#{article.author}"
61
+ print "\n\nBlurb:".colorize(:cyan)
62
+ puts "\"#{article.blurb}...\""
63
+ print "\nURL:".colorize(:cyan)
64
+ puts "#{article_url}"
65
+ article_categories = []
66
+ article.categories.each do |category| # category is an object, and I want its name
67
+ article_categories << category.name
68
+ end # do end
69
+ print "\nCategories:".colorize(:cyan)
70
+ puts "#{article_categories.join(", ")}."
71
+ puts "Type 'list' to return to the category list page. To exit, type 'exit'".colorize(:blue)
72
+ input = gets.strip
73
+ # validating input
74
+ until /(?i)exit/ === input || /(?i)list/ === input
75
+ puts "Please type 'list' or 'exit'.".colorize(:blue)
76
+ input = gets.strip
77
+ end # until end
78
+ if /(?i)exit/ === input
79
+ abort("Thank you.")
80
+ elsif /(?i)list/ === input
81
+ self.list_categories
82
+ end # if end
83
+ end # def article_information end
84
+
85
+ end # class end
@@ -0,0 +1,53 @@
1
+ class APWArticles::Scraper
2
+
3
+ # This class method defines variables i and j to determine what url number needs to be scraped based on the number of items on each page (at time of publicaion, 66 articles/page). The method then scrapes a url based on the category and URL number and uses that page's list of articles to creates a new article object per article link. Article objects include title, url and category. The method returns nil
4
+ def self.scrape_list(category, page = 1)
5
+ # NOTE: probably I should only scrape for the # of articles I need for the given request / call - this is very laggy
6
+ i = 1 if page.between?(1,6)
7
+ i = 2 if page.between?(7,13)
8
+ i = 3 if page > 13
9
+ j = 1 if page.between?(1,5)
10
+ j = 2 if page.between?(6,12)
11
+ j = 3 if page > 12
12
+ until i > j
13
+ Nokogiri::HTML(open("https://apracticalwedding.com/category/marriage-essays/#{category}/page/#{i}/?listas=list")).css(".type-post").each do |post|
14
+ APWArticles::Article.new({url: post.css("a").attribute("href").value, title: post.css("h2").text, categories: [category]})
15
+ end # do end
16
+ i += 1
17
+ end # until loop end
18
+ nil
19
+ end # def end
20
+
21
+ # This method takes an article URL, creates an article hash then populates that article hash using information scraped from the URL. The method then returns the article hash.
22
+ def self.scrape_article(url)
23
+ article = {}
24
+ doc = Nokogiri::HTML(open(url))
25
+ article[:title] = doc.css("h1").text
26
+ article[:author] = doc.css(".staff-info h2").text
27
+ article[:url] = url
28
+ article[:blurb] = doc.css(".entry p").text[0,400]
29
+ categories = []
30
+ doc.css(".categories a").each do |link|
31
+ categories << link.attribute("href").value.split("/")[-1]
32
+ end # do end
33
+ article[:categories] = categories
34
+ article
35
+ end # returns hash of information on the article.
36
+
37
+ # This method takes in a URL of a list page of essays at APW and creates an array of all link attributes on the essay links in the list. It then breaks apart the string of link attributes and returns an array of those attributes with the preface "category-",
38
+ def self.scrape_categories(url = "https://apracticalwedding.com/category/marriage-essays/?listas=list")
39
+ doc = Nokogiri::HTML(open(url)).css(".type-post")
40
+ link_attributes = []
41
+ categories = []
42
+ doc.each { |link| link_attributes << link.attribute("class").value }
43
+ link_attributes.each do |attributes_list|
44
+ attributes_array = attributes_list.split(/ category-/)
45
+ attributes_array.slice!(0)
46
+ attributes_array.each do |category|
47
+ categories << category.split[0]
48
+ end # attributes_array do end
49
+ end # link_attributes do end
50
+ categories.uniq
51
+ end # self.scrape_categories end
52
+
53
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: apw_articles
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Rachel Walwood
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-03-21 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A Ruby Gem to explore articles on the A Practical Wedding website that
14
+ are useful after the wedding.
15
+ email: walwoodr@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/apw_articles
21
+ - config/environment.rb
22
+ - lib/apw_articles.rb
23
+ - lib/apw_articles/article.rb
24
+ - lib/apw_articles/category.rb
25
+ - lib/apw_articles/cli.rb
26
+ - lib/apw_articles/scraper.rb
27
+ homepage: http://rubygems.org/gems/apw_articles
28
+ licenses:
29
+ - MIT
30
+ metadata: {}
31
+ post_install_message:
32
+ rdoc_options: []
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubyforge_project:
47
+ rubygems_version: 2.6.10
48
+ signing_key:
49
+ specification_version: 4
50
+ summary: Articles from A Practical Wedding
51
+ test_files: []