strigil 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8ef92f9f1864c48161d4cc1095f76e25e9fb3c8d
4
- data.tar.gz: 78c6e7f7cf9c967700570dec8e24b8c658cb809c
3
+ metadata.gz: 14e5b5afe824abec6ddcae00f102eb785b55c033
4
+ data.tar.gz: 2674268125891b755c502b7d0321e352c99c66a8
5
5
  SHA512:
6
- metadata.gz: e43012eed26881ef3b460c181193b32f815f0c5ad264ba776a011892e6122078bcdeb2714b190d2c535e93f0a4a3cddcda8080db3d2f15e36d0da58ae7417760
7
- data.tar.gz: f61ce715184dd3fd6fa9e69b1f0969c3f550a35a45517dc19f86b960186fddf7125d6d4bca1f213109d531f4536acb07d7a8e0f47d3853d8cb8d13ccaf477f6a
6
+ metadata.gz: 2107c4baebc045cf2034bc18aec704065dae8677a880db227cf227d3fc934ba04f2ef035adddee0a1d5cba9b163df83ac391b752135dc261cc4a64d7705f78ca
7
+ data.tar.gz: '025218138f22d2332c3e89ef1a59c511759a49b3ecabb636e8f49cb650a0e9e342e835b8751beaa0c9422c6ec12ffdd53c779de8a613ed29b3a8f04dc11d993a'
data/lib/strigil.rb CHANGED
@@ -1,8 +1,19 @@
1
1
  class Strigil
2
2
  def self.engage(user)
3
- client = Strigil::StrigilClient.new(user)
3
+ client_config
4
+
5
+ puts "Initializing client..."
6
+ client = Strigil::StrigilClient.new(
7
+ username: @username,
8
+ password: @password,
9
+ browser: @browser,
10
+ target: user
11
+ )
12
+
13
+ puts "Initializing comments store..."
4
14
  comments = Strigil::Comments.new
5
15
 
16
+ puts "Beginning scrape. This may take a while."
6
17
  processing = true
7
18
  while processing == true
8
19
  comments.add_comments(client.pull_comments)
@@ -22,6 +33,27 @@ class Strigil
22
33
  end
23
34
  end
24
35
  end
36
+
37
+ private
38
+
39
+ def self.client_config
40
+ puts "Valid Reddit account details are neccessary to scrape correctly."
41
+ puts "This information is _not_ sent to any third party - it is simply"
42
+ puts "used to properly configure how user comments are displayed in"
43
+ puts "order to scrape them correctly. Feel free to use a throwaway."
44
+ puts "Username:"
45
+ print "> "
46
+ @username = gets.chomp
47
+ puts "Password:"
48
+ print "> "
49
+ @password = gets.chomp
50
+ puts "Do you have Chrome or Firefox installed?"
51
+ puts "Type either 'chrome' or 'firefox' without quotes."
52
+ @browser = gets.chomp
53
+ end
54
+ end
55
+
56
+ class ConfigurationError < StandardError
25
57
  end
26
58
 
27
59
  require 'strigil/comment'
@@ -1,10 +1,12 @@
1
1
  class Strigil::CommentsParser
2
2
 
3
- def self.parse(comments)
3
+ def self.parse(client)
4
+ comments = get_comments(client)
5
+
4
6
  comments.map do |comment|
5
7
  Strigil::Comment.new(
6
- author: comment.attribute("data-author"),
7
- subreddit: comment.attribute("data-subreddit"),
8
+ author: get_author(comment),
9
+ subreddit: get_subreddit(comment),
8
10
  permalink: get_permalink(comment),
9
11
  timestamp: get_timestamp(comment),
10
12
  text: get_text(comment)
@@ -14,19 +16,28 @@ class Strigil::CommentsParser
14
16
 
15
17
  private
16
18
 
19
+ def self.get_comments(client)
20
+ client.find_elements(class: "comment")
21
+ end
22
+
23
+ def self.get_author(comment)
24
+ comment.attribute("data-author")
25
+ end
26
+
27
+ def self.get_subreddit(comment)
28
+ comment.attribute("data-subreddit")
29
+ end
30
+
17
31
  def self.get_permalink(comment)
18
- entry = get_entry(comment)
19
- entry.find_element(class: "bylink").attribute(:href)
32
+ 'https://reddit.com/r/' + comment.attribute("data-permalink")
20
33
  end
21
34
 
22
35
  def self.get_timestamp(comment)
23
- entry = get_entry(comment)
24
- entry.find_element(tag_name: "time").attribute(:title)
36
+ get_entry(comment).find_element(tag_name: "time").attribute(:title)
25
37
  end
26
38
 
27
39
  def self.get_text(comment)
28
- entry = get_entry(comment)
29
- entry.find_element(class: "usertext-body").text
40
+ get_entry(comment).find_element(class: "usertext-body").text
30
41
  end
31
42
 
32
43
  def self.get_entry(comment)
@@ -4,14 +4,20 @@ require 'webdrivers'
4
4
  class Strigil::StrigilClient
5
5
  attr_reader :client
6
6
 
7
- def initialize(user)
8
- @client = Selenium::WebDriver.for :chrome
9
- client.navigate.to "https://reddit.com/u/#{user}/comments"
7
+ def initialize(params)
8
+ target = params[:target]
9
+ username = params[:username]
10
+ password = params[:password]
11
+
12
+ @client = Selenium::WebDriver.for(params[:browser].to_sym)
13
+
14
+ login_and_make_legacy(username, password)
15
+
16
+ client.navigate.to "https://reddit.com/u/#{target}/comments"
10
17
  end
11
18
 
12
19
  def pull_comments
13
- raw_comments = client.find_elements(class: "comment")
14
- Strigil::CommentsParser.parse(raw_comments)
20
+ Strigil::CommentsParser.parse(client)
15
21
  end
16
22
 
17
23
  def close
@@ -26,6 +32,46 @@ class Strigil::StrigilClient
26
32
  end
27
33
  end
28
34
 
35
+ private
36
+
37
+ def login_and_make_legacy(username, password)
38
+ login_to_reddit(username, password)
39
+ set_legacy_preference
40
+ end
41
+
42
+ def login_to_reddit(username, password)
43
+ client.navigate.to 'https://reddit.com/login'
44
+
45
+ login_panel = client.find_element(id: "login-form")
46
+
47
+ username_field = login_panel.find_element(id: "user_login")
48
+ password_field = login_panel.find_element(id: "passwd_login")
49
+ login_button = login_panel.find_element(tag_name: "button", class: "c-btn")
50
+
51
+ username_field.send_keys(username)
52
+ password_field.send_keys(password)
53
+
54
+ login_button.click
55
+
56
+ wait = Selenium::WebDriver::Wait.new(timeout: 5)
57
+ wait.until do
58
+ client.find_element(id: "header-bottom-right").find_element(link_text: username)
59
+ end
60
+ end
61
+
62
+ def set_legacy_preference
63
+ client.navigate.to 'https://reddit.com/prefs/'
64
+
65
+ legacy_box = client.find_element(id: "profile_opt_out")
66
+
67
+ unless legacy_box.attribute(:checked)
68
+ legacy_box.click
69
+
70
+ save_button = client.find_element(class: "save-preferences")
71
+ save_button.click
72
+ end
73
+ end
74
+
29
75
  end
30
76
 
31
77
  class EndOfQueueError < StandardError
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: strigil
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Harry Stebbins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-03 00:00:00.000000000 Z
11
+ date: 2018-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: selenium-webdriver