strigil 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/strigil.rb +30 -0
- data/lib/strigil/comment.rb +32 -0
- data/lib/strigil/comments.rb +42 -0
- data/lib/strigil/comments_parser.rb +35 -0
- data/lib/strigil/strigil_client.rb +32 -0
- metadata +77 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8ef92f9f1864c48161d4cc1095f76e25e9fb3c8d
|
4
|
+
data.tar.gz: 78c6e7f7cf9c967700570dec8e24b8c658cb809c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e43012eed26881ef3b460c181193b32f815f0c5ad264ba776a011892e6122078bcdeb2714b190d2c535e93f0a4a3cddcda8080db3d2f15e36d0da58ae7417760
|
7
|
+
data.tar.gz: f61ce715184dd3fd6fa9e69b1f0969c3f550a35a45517dc19f86b960186fddf7125d6d4bca1f213109d531f4536acb07d7a8e0f47d3853d8cb8d13ccaf477f6a
|
data/lib/strigil.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
class Strigil
|
2
|
+
def self.engage(user)
|
3
|
+
client = Strigil::StrigilClient.new(user)
|
4
|
+
comments = Strigil::Comments.new
|
5
|
+
|
6
|
+
processing = true
|
7
|
+
while processing == true
|
8
|
+
comments.add_comments(client.pull_comments)
|
9
|
+
|
10
|
+
begin
|
11
|
+
client.iterate
|
12
|
+
rescue EndOfQueueError
|
13
|
+
processing = false
|
14
|
+
|
15
|
+
client.close
|
16
|
+
comments.save
|
17
|
+
|
18
|
+
puts "Pulled #{comments.log.size} comments."
|
19
|
+
puts "JSON data stored in ./data/comments.json"
|
20
|
+
|
21
|
+
comments.clear
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'strigil/comment'
|
28
|
+
require 'strigil/comments'
|
29
|
+
require 'strigil/comments_parser'
|
30
|
+
require 'strigil/strigil_client'
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
class Strigil::Comment
|
4
|
+
attr_reader :author, :subreddit, :permalink, :timestamp, :text
|
5
|
+
|
6
|
+
def initialize(params={})
|
7
|
+
@author = params[:author]
|
8
|
+
@subreddit = params[:subreddit]
|
9
|
+
@permalink = params[:permalink]
|
10
|
+
@timestamp = Time.parse(params[:timestamp])
|
11
|
+
@text = params[:text]
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_json
|
15
|
+
{
|
16
|
+
author: author,
|
17
|
+
subreddit: subreddit,
|
18
|
+
permalink: permalink,
|
19
|
+
timestamp: timestamp,
|
20
|
+
text: text
|
21
|
+
}.to_json
|
22
|
+
end
|
23
|
+
|
24
|
+
def display
|
25
|
+
%{
|
26
|
+
posted by #{author} on /r/#{subreddit}
|
27
|
+
#{timestamp} | #{permalink}
|
28
|
+
-------------------------------------
|
29
|
+
#{text}
|
30
|
+
}
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
class Strigil::Comments
|
4
|
+
attr_accessor :log
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@log = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def add_comments(comments)
|
11
|
+
@log += comments
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_comment(comment)
|
15
|
+
@log.push(comment)
|
16
|
+
end
|
17
|
+
|
18
|
+
def save
|
19
|
+
unless File.exist?('./data/')
|
20
|
+
FileUtils.mkdir('./data/')
|
21
|
+
end
|
22
|
+
File.open("./data/comments.json", 'w') do |f|
|
23
|
+
f.write(log_to_json)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def clear
|
28
|
+
@log = []
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def log_to_json
|
34
|
+
result = '['
|
35
|
+
log.each do |comment|
|
36
|
+
result << comment.to_json
|
37
|
+
result << ',' unless comment == log.last
|
38
|
+
end
|
39
|
+
result << ']'
|
40
|
+
result
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class Strigil::CommentsParser
|
2
|
+
|
3
|
+
def self.parse(comments)
|
4
|
+
comments.map do |comment|
|
5
|
+
Strigil::Comment.new(
|
6
|
+
author: comment.attribute("data-author"),
|
7
|
+
subreddit: comment.attribute("data-subreddit"),
|
8
|
+
permalink: get_permalink(comment),
|
9
|
+
timestamp: get_timestamp(comment),
|
10
|
+
text: get_text(comment)
|
11
|
+
)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def self.get_permalink(comment)
|
18
|
+
entry = get_entry(comment)
|
19
|
+
entry.find_element(class: "bylink").attribute(:href)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.get_timestamp(comment)
|
23
|
+
entry = get_entry(comment)
|
24
|
+
entry.find_element(tag_name: "time").attribute(:title)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.get_text(comment)
|
28
|
+
entry = get_entry(comment)
|
29
|
+
entry.find_element(class: "usertext-body").text
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.get_entry(comment)
|
33
|
+
comment.find_element(class: "entry")
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'selenium-webdriver'
|
2
|
+
require 'webdrivers'
|
3
|
+
|
4
|
+
class Strigil::StrigilClient
|
5
|
+
attr_reader :client
|
6
|
+
|
7
|
+
def initialize(user)
|
8
|
+
@client = Selenium::WebDriver.for :chrome
|
9
|
+
client.navigate.to "https://reddit.com/u/#{user}/comments"
|
10
|
+
end
|
11
|
+
|
12
|
+
def pull_comments
|
13
|
+
raw_comments = client.find_elements(class: "comment")
|
14
|
+
Strigil::CommentsParser.parse(raw_comments)
|
15
|
+
end
|
16
|
+
|
17
|
+
def close
|
18
|
+
client.close
|
19
|
+
end
|
20
|
+
|
21
|
+
def iterate
|
22
|
+
begin
|
23
|
+
client.find_element(link_text: "next ›").click
|
24
|
+
rescue
|
25
|
+
raise EndOfQueueError
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
class EndOfQueueError < StandardError
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: strigil
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Harry Stebbins
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-04-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: selenium-webdriver
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.11'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: webdrivers
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.2'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.2'
|
41
|
+
description:
|
42
|
+
email: hcstebbins@gmail.com
|
43
|
+
executables: []
|
44
|
+
extensions: []
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- lib/strigil.rb
|
48
|
+
- lib/strigil/comment.rb
|
49
|
+
- lib/strigil/comments.rb
|
50
|
+
- lib/strigil/comments_parser.rb
|
51
|
+
- lib/strigil/strigil_client.rb
|
52
|
+
homepage: https://github.com/stebbins/strigil
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 2.6.13
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: Strigil is a gem for easily scraping a Reddit user's comment history into
|
76
|
+
a JSON file.
|
77
|
+
test_files: []
|