strigil 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8ef92f9f1864c48161d4cc1095f76e25e9fb3c8d
4
+ data.tar.gz: 78c6e7f7cf9c967700570dec8e24b8c658cb809c
5
+ SHA512:
6
+ metadata.gz: e43012eed26881ef3b460c181193b32f815f0c5ad264ba776a011892e6122078bcdeb2714b190d2c535e93f0a4a3cddcda8080db3d2f15e36d0da58ae7417760
7
+ data.tar.gz: f61ce715184dd3fd6fa9e69b1f0969c3f550a35a45517dc19f86b960186fddf7125d6d4bca1f213109d531f4536acb07d7a8e0f47d3853d8cb8d13ccaf477f6a
data/lib/strigil.rb ADDED
@@ -0,0 +1,30 @@
1
+ class Strigil
2
+ def self.engage(user)
3
+ client = Strigil::StrigilClient.new(user)
4
+ comments = Strigil::Comments.new
5
+
6
+ processing = true
7
+ while processing == true
8
+ comments.add_comments(client.pull_comments)
9
+
10
+ begin
11
+ client.iterate
12
+ rescue EndOfQueueError
13
+ processing = false
14
+
15
+ client.close
16
+ comments.save
17
+
18
+ puts "Pulled #{comments.log.size} comments."
19
+ puts "JSON data stored in ./data/comments.json"
20
+
21
+ comments.clear
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ require 'strigil/comment'
28
+ require 'strigil/comments'
29
+ require 'strigil/comments_parser'
30
+ require 'strigil/strigil_client'
@@ -0,0 +1,32 @@
1
+ require 'time'
2
+
3
+ class Strigil::Comment
4
+ attr_reader :author, :subreddit, :permalink, :timestamp, :text
5
+
6
+ def initialize(params={})
7
+ @author = params[:author]
8
+ @subreddit = params[:subreddit]
9
+ @permalink = params[:permalink]
10
+ @timestamp = Time.parse(params[:timestamp])
11
+ @text = params[:text]
12
+ end
13
+
14
+ def to_json
15
+ {
16
+ author: author,
17
+ subreddit: subreddit,
18
+ permalink: permalink,
19
+ timestamp: timestamp,
20
+ text: text
21
+ }.to_json
22
+ end
23
+
24
+ def display
25
+ %{
26
+ posted by #{author} on /r/#{subreddit}
27
+ #{timestamp} | #{permalink}
28
+ -------------------------------------
29
+ #{text}
30
+ }
31
+ end
32
+ end
@@ -0,0 +1,42 @@
1
+ require 'fileutils'
2
+
3
+ class Strigil::Comments
4
+ attr_accessor :log
5
+
6
+ def initialize
7
+ @log = []
8
+ end
9
+
10
+ def add_comments(comments)
11
+ @log += comments
12
+ end
13
+
14
+ def add_comment(comment)
15
+ @log.push(comment)
16
+ end
17
+
18
+ def save
19
+ unless File.exist?('./data/')
20
+ FileUtils.mkdir('./data/')
21
+ end
22
+ File.open("./data/comments.json", 'w') do |f|
23
+ f.write(log_to_json)
24
+ end
25
+ end
26
+
27
+ def clear
28
+ @log = []
29
+ end
30
+
31
+ private
32
+
33
+ def log_to_json
34
+ result = '['
35
+ log.each do |comment|
36
+ result << comment.to_json
37
+ result << ',' unless comment == log.last
38
+ end
39
+ result << ']'
40
+ result
41
+ end
42
+ end
@@ -0,0 +1,35 @@
1
+ class Strigil::CommentsParser
2
+
3
+ def self.parse(comments)
4
+ comments.map do |comment|
5
+ Strigil::Comment.new(
6
+ author: comment.attribute("data-author"),
7
+ subreddit: comment.attribute("data-subreddit"),
8
+ permalink: get_permalink(comment),
9
+ timestamp: get_timestamp(comment),
10
+ text: get_text(comment)
11
+ )
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ def self.get_permalink(comment)
18
+ entry = get_entry(comment)
19
+ entry.find_element(class: "bylink").attribute(:href)
20
+ end
21
+
22
+ def self.get_timestamp(comment)
23
+ entry = get_entry(comment)
24
+ entry.find_element(tag_name: "time").attribute(:title)
25
+ end
26
+
27
+ def self.get_text(comment)
28
+ entry = get_entry(comment)
29
+ entry.find_element(class: "usertext-body").text
30
+ end
31
+
32
+ def self.get_entry(comment)
33
+ comment.find_element(class: "entry")
34
+ end
35
+ end
@@ -0,0 +1,32 @@
1
+ require 'selenium-webdriver'
2
+ require 'webdrivers'
3
+
4
+ class Strigil::StrigilClient
5
+ attr_reader :client
6
+
7
+ def initialize(user)
8
+ @client = Selenium::WebDriver.for :chrome
9
+ client.navigate.to "https://reddit.com/u/#{user}/comments"
10
+ end
11
+
12
+ def pull_comments
13
+ raw_comments = client.find_elements(class: "comment")
14
+ Strigil::CommentsParser.parse(raw_comments)
15
+ end
16
+
17
+ def close
18
+ client.close
19
+ end
20
+
21
+ def iterate
22
+ begin
23
+ client.find_element(link_text: "next ›").click
24
+ rescue
25
+ raise EndOfQueueError
26
+ end
27
+ end
28
+
29
+ end
30
+
31
+ class EndOfQueueError < StandardError
32
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: strigil
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Harry Stebbins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-04-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: selenium-webdriver
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.11'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: webdrivers
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.2'
41
+ description:
42
+ email: hcstebbins@gmail.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/strigil.rb
48
+ - lib/strigil/comment.rb
49
+ - lib/strigil/comments.rb
50
+ - lib/strigil/comments_parser.rb
51
+ - lib/strigil/strigil_client.rb
52
+ homepage: https://github.com/stebbins/strigil
53
+ licenses:
54
+ - MIT
55
+ metadata: {}
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 2.6.13
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: Strigil is a gem for easily scraping a Reddit user's comment history into
76
+ a JSON file.
77
+ test_files: []