strigil 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8ef92f9f1864c48161d4cc1095f76e25e9fb3c8d
4
+ data.tar.gz: 78c6e7f7cf9c967700570dec8e24b8c658cb809c
5
+ SHA512:
6
+ metadata.gz: e43012eed26881ef3b460c181193b32f815f0c5ad264ba776a011892e6122078bcdeb2714b190d2c535e93f0a4a3cddcda8080db3d2f15e36d0da58ae7417760
7
+ data.tar.gz: f61ce715184dd3fd6fa9e69b1f0969c3f550a35a45517dc19f86b960186fddf7125d6d4bca1f213109d531f4536acb07d7a8e0f47d3853d8cb8d13ccaf477f6a
data/lib/strigil.rb ADDED
@@ -0,0 +1,30 @@
1
+ class Strigil
2
+ def self.engage(user)
3
+ client = Strigil::StrigilClient.new(user)
4
+ comments = Strigil::Comments.new
5
+
6
+ processing = true
7
+ while processing == true
8
+ comments.add_comments(client.pull_comments)
9
+
10
+ begin
11
+ client.iterate
12
+ rescue EndOfQueueError
13
+ processing = false
14
+
15
+ client.close
16
+ comments.save
17
+
18
+ puts "Pulled #{comments.log.size} comments."
19
+ puts "JSON data stored in ./data/comments.json"
20
+
21
+ comments.clear
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ require 'strigil/comment'
28
+ require 'strigil/comments'
29
+ require 'strigil/comments_parser'
30
+ require 'strigil/strigil_client'
@@ -0,0 +1,32 @@
1
+ require 'time'
2
+
3
+ class Strigil::Comment
4
+ attr_reader :author, :subreddit, :permalink, :timestamp, :text
5
+
6
+ def initialize(params={})
7
+ @author = params[:author]
8
+ @subreddit = params[:subreddit]
9
+ @permalink = params[:permalink]
10
+ @timestamp = Time.parse(params[:timestamp])
11
+ @text = params[:text]
12
+ end
13
+
14
+ def to_json
15
+ {
16
+ author: author,
17
+ subreddit: subreddit,
18
+ permalink: permalink,
19
+ timestamp: timestamp,
20
+ text: text
21
+ }.to_json
22
+ end
23
+
24
+ def display
25
+ %{
26
+ posted by #{author} on /r/#{subreddit}
27
+ #{timestamp} | #{permalink}
28
+ -------------------------------------
29
+ #{text}
30
+ }
31
+ end
32
+ end
@@ -0,0 +1,42 @@
1
+ require 'fileutils'
2
+
3
+ class Strigil::Comments
4
+ attr_accessor :log
5
+
6
+ def initialize
7
+ @log = []
8
+ end
9
+
10
+ def add_comments(comments)
11
+ @log += comments
12
+ end
13
+
14
+ def add_comment(comment)
15
+ @log.push(comment)
16
+ end
17
+
18
+ def save
19
+ unless File.exist?('./data/')
20
+ FileUtils.mkdir('./data/')
21
+ end
22
+ File.open("./data/comments.json", 'w') do |f|
23
+ f.write(log_to_json)
24
+ end
25
+ end
26
+
27
+ def clear
28
+ @log = []
29
+ end
30
+
31
+ private
32
+
33
+ def log_to_json
34
+ result = '['
35
+ log.each do |comment|
36
+ result << comment.to_json
37
+ result << ',' unless comment == log.last
38
+ end
39
+ result << ']'
40
+ result
41
+ end
42
+ end
@@ -0,0 +1,35 @@
1
+ class Strigil::CommentsParser
2
+
3
+ def self.parse(comments)
4
+ comments.map do |comment|
5
+ Strigil::Comment.new(
6
+ author: comment.attribute("data-author"),
7
+ subreddit: comment.attribute("data-subreddit"),
8
+ permalink: get_permalink(comment),
9
+ timestamp: get_timestamp(comment),
10
+ text: get_text(comment)
11
+ )
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ def self.get_permalink(comment)
18
+ entry = get_entry(comment)
19
+ entry.find_element(class: "bylink").attribute(:href)
20
+ end
21
+
22
+ def self.get_timestamp(comment)
23
+ entry = get_entry(comment)
24
+ entry.find_element(tag_name: "time").attribute(:title)
25
+ end
26
+
27
+ def self.get_text(comment)
28
+ entry = get_entry(comment)
29
+ entry.find_element(class: "usertext-body").text
30
+ end
31
+
32
+ def self.get_entry(comment)
33
+ comment.find_element(class: "entry")
34
+ end
35
+ end
@@ -0,0 +1,32 @@
1
+ require 'selenium-webdriver'
2
+ require 'webdrivers'
3
+
4
+ class Strigil::StrigilClient
5
+ attr_reader :client
6
+
7
+ def initialize(user)
8
+ @client = Selenium::WebDriver.for :chrome
9
+ client.navigate.to "https://reddit.com/u/#{user}/comments"
10
+ end
11
+
12
+ def pull_comments
13
+ raw_comments = client.find_elements(class: "comment")
14
+ Strigil::CommentsParser.parse(raw_comments)
15
+ end
16
+
17
+ def close
18
+ client.close
19
+ end
20
+
21
+ def iterate
22
+ begin
23
+ client.find_element(link_text: "next ›").click
24
+ rescue
25
+ raise EndOfQueueError
26
+ end
27
+ end
28
+
29
+ end
30
+
31
+ class EndOfQueueError < StandardError
32
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: strigil
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Harry Stebbins
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-04-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: selenium-webdriver
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.11'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.11'
27
+ - !ruby/object:Gem::Dependency
28
+ name: webdrivers
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.2'
41
+ description:
42
+ email: hcstebbins@gmail.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/strigil.rb
48
+ - lib/strigil/comment.rb
49
+ - lib/strigil/comments.rb
50
+ - lib/strigil/comments_parser.rb
51
+ - lib/strigil/strigil_client.rb
52
+ homepage: https://github.com/stebbins/strigil
53
+ licenses:
54
+ - MIT
55
+ metadata: {}
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubyforge_project:
72
+ rubygems_version: 2.6.13
73
+ signing_key:
74
+ specification_version: 4
75
+ summary: Strigil is a gem for easily scraping a Reddit user's comment history into
76
+ a JSON file.
77
+ test_files: []