strigil 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8ef92f9f1864c48161d4cc1095f76e25e9fb3c8d
4
- data.tar.gz: 78c6e7f7cf9c967700570dec8e24b8c658cb809c
3
+ metadata.gz: 14e5b5afe824abec6ddcae00f102eb785b55c033
4
+ data.tar.gz: 2674268125891b755c502b7d0321e352c99c66a8
5
5
  SHA512:
6
- metadata.gz: e43012eed26881ef3b460c181193b32f815f0c5ad264ba776a011892e6122078bcdeb2714b190d2c535e93f0a4a3cddcda8080db3d2f15e36d0da58ae7417760
7
- data.tar.gz: f61ce715184dd3fd6fa9e69b1f0969c3f550a35a45517dc19f86b960186fddf7125d6d4bca1f213109d531f4536acb07d7a8e0f47d3853d8cb8d13ccaf477f6a
6
+ metadata.gz: 2107c4baebc045cf2034bc18aec704065dae8677a880db227cf227d3fc934ba04f2ef035adddee0a1d5cba9b163df83ac391b752135dc261cc4a64d7705f78ca
7
+ data.tar.gz: '025218138f22d2332c3e89ef1a59c511759a49b3ecabb636e8f49cb650a0e9e342e835b8751beaa0c9422c6ec12ffdd53c779de8a613ed29b3a8f04dc11d993a'
data/lib/strigil.rb CHANGED
@@ -1,8 +1,19 @@
1
1
  class Strigil
2
2
  def self.engage(user)
3
- client = Strigil::StrigilClient.new(user)
3
+ client_config
4
+
5
+ puts "Initializing client..."
6
+ client = Strigil::StrigilClient.new(
7
+ username: @username,
8
+ password: @password,
9
+ browser: @browser,
10
+ target: user
11
+ )
12
+
13
+ puts "Initializing comments store..."
4
14
  comments = Strigil::Comments.new
5
15
 
16
+ puts "Beginning scrape. This may take a while."
6
17
  processing = true
7
18
  while processing == true
8
19
  comments.add_comments(client.pull_comments)
@@ -22,6 +33,27 @@ class Strigil
22
33
  end
23
34
  end
24
35
  end
36
+
37
+ private
38
+
39
+ def self.client_config
40
+ puts "Valid Reddit account details are neccessary to scrape correctly."
41
+ puts "This information is _not_ sent to any third party - it is simply"
42
+ puts "used to properly configure how user comments are displayed in"
43
+ puts "order to scrape them correctly. Feel free to use a throwaway."
44
+ puts "Username:"
45
+ print "> "
46
+ @username = gets.chomp
47
+ puts "Password:"
48
+ print "> "
49
+ @password = gets.chomp
50
+ puts "Do you have Chrome or Firefox installed?"
51
+ puts "Type either 'chrome' or 'firefox' without quotes."
52
+ @browser = gets.chomp
53
+ end
54
+ end
55
+
56
+ class ConfigurationError < StandardError
25
57
  end
26
58
 
27
59
  require 'strigil/comment'
@@ -1,10 +1,12 @@
1
1
  class Strigil::CommentsParser
2
2
 
3
- def self.parse(comments)
3
+ def self.parse(client)
4
+ comments = get_comments(client)
5
+
4
6
  comments.map do |comment|
5
7
  Strigil::Comment.new(
6
- author: comment.attribute("data-author"),
7
- subreddit: comment.attribute("data-subreddit"),
8
+ author: get_author(comment),
9
+ subreddit: get_subreddit(comment),
8
10
  permalink: get_permalink(comment),
9
11
  timestamp: get_timestamp(comment),
10
12
  text: get_text(comment)
@@ -14,19 +16,28 @@ class Strigil::CommentsParser
14
16
 
15
17
  private
16
18
 
19
+ def self.get_comments(client)
20
+ client.find_elements(class: "comment")
21
+ end
22
+
23
+ def self.get_author(comment)
24
+ comment.attribute("data-author")
25
+ end
26
+
27
+ def self.get_subreddit(comment)
28
+ comment.attribute("data-subreddit")
29
+ end
30
+
17
31
  def self.get_permalink(comment)
18
- entry = get_entry(comment)
19
- entry.find_element(class: "bylink").attribute(:href)
32
+ 'https://reddit.com/r/' + comment.attribute("data-permalink")
20
33
  end
21
34
 
22
35
  def self.get_timestamp(comment)
23
- entry = get_entry(comment)
24
- entry.find_element(tag_name: "time").attribute(:title)
36
+ get_entry(comment).find_element(tag_name: "time").attribute(:title)
25
37
  end
26
38
 
27
39
  def self.get_text(comment)
28
- entry = get_entry(comment)
29
- entry.find_element(class: "usertext-body").text
40
+ get_entry(comment).find_element(class: "usertext-body").text
30
41
  end
31
42
 
32
43
  def self.get_entry(comment)
@@ -4,14 +4,20 @@ require 'webdrivers'
4
4
  class Strigil::StrigilClient
5
5
  attr_reader :client
6
6
 
7
- def initialize(user)
8
- @client = Selenium::WebDriver.for :chrome
9
- client.navigate.to "https://reddit.com/u/#{user}/comments"
7
+ def initialize(params)
8
+ target = params[:target]
9
+ username = params[:username]
10
+ password = params[:password]
11
+
12
+ @client = Selenium::WebDriver.for(params[:browser].to_sym)
13
+
14
+ login_and_make_legacy(username, password)
15
+
16
+ client.navigate.to "https://reddit.com/u/#{target}/comments"
10
17
  end
11
18
 
12
19
  def pull_comments
13
- raw_comments = client.find_elements(class: "comment")
14
- Strigil::CommentsParser.parse(raw_comments)
20
+ Strigil::CommentsParser.parse(client)
15
21
  end
16
22
 
17
23
  def close
@@ -26,6 +32,46 @@ class Strigil::StrigilClient
26
32
  end
27
33
  end
28
34
 
35
+ private
36
+
37
+ def login_and_make_legacy(username, password)
38
+ login_to_reddit(username, password)
39
+ set_legacy_preference
40
+ end
41
+
42
+ def login_to_reddit(username, password)
43
+ client.navigate.to 'https://reddit.com/login'
44
+
45
+ login_panel = client.find_element(id: "login-form")
46
+
47
+ username_field = login_panel.find_element(id: "user_login")
48
+ password_field = login_panel.find_element(id: "passwd_login")
49
+ login_button = login_panel.find_element(tag_name: "button", class: "c-btn")
50
+
51
+ username_field.send_keys(username)
52
+ password_field.send_keys(password)
53
+
54
+ login_button.click
55
+
56
+ wait = Selenium::WebDriver::Wait.new(timeout: 5)
57
+ wait.until do
58
+ client.find_element(id: "header-bottom-right").find_element(link_text: username)
59
+ end
60
+ end
61
+
62
+ def set_legacy_preference
63
+ client.navigate.to 'https://reddit.com/prefs/'
64
+
65
+ legacy_box = client.find_element(id: "profile_opt_out")
66
+
67
+ unless legacy_box.attribute(:checked)
68
+ legacy_box.click
69
+
70
+ save_button = client.find_element(class: "save-preferences")
71
+ save_button.click
72
+ end
73
+ end
74
+
29
75
  end
30
76
 
31
77
  class EndOfQueueError < StandardError
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: strigil
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Harry Stebbins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-03 00:00:00.000000000 Z
11
+ date: 2018-04-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: selenium-webdriver