strigil 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/strigil.rb +30 -0
- data/lib/strigil/comment.rb +32 -0
- data/lib/strigil/comments.rb +42 -0
- data/lib/strigil/comments_parser.rb +35 -0
- data/lib/strigil/strigil_client.rb +32 -0
- metadata +77 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8ef92f9f1864c48161d4cc1095f76e25e9fb3c8d
|
4
|
+
data.tar.gz: 78c6e7f7cf9c967700570dec8e24b8c658cb809c
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e43012eed26881ef3b460c181193b32f815f0c5ad264ba776a011892e6122078bcdeb2714b190d2c535e93f0a4a3cddcda8080db3d2f15e36d0da58ae7417760
|
7
|
+
data.tar.gz: f61ce715184dd3fd6fa9e69b1f0969c3f550a35a45517dc19f86b960186fddf7125d6d4bca1f213109d531f4536acb07d7a8e0f47d3853d8cb8d13ccaf477f6a
|
data/lib/strigil.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
class Strigil
|
2
|
+
def self.engage(user)
|
3
|
+
client = Strigil::StrigilClient.new(user)
|
4
|
+
comments = Strigil::Comments.new
|
5
|
+
|
6
|
+
processing = true
|
7
|
+
while processing == true
|
8
|
+
comments.add_comments(client.pull_comments)
|
9
|
+
|
10
|
+
begin
|
11
|
+
client.iterate
|
12
|
+
rescue EndOfQueueError
|
13
|
+
processing = false
|
14
|
+
|
15
|
+
client.close
|
16
|
+
comments.save
|
17
|
+
|
18
|
+
puts "Pulled #{comments.log.size} comments."
|
19
|
+
puts "JSON data stored in ./data/comments.json"
|
20
|
+
|
21
|
+
comments.clear
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
require 'strigil/comment'
|
28
|
+
require 'strigil/comments'
|
29
|
+
require 'strigil/comments_parser'
|
30
|
+
require 'strigil/strigil_client'
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
class Strigil::Comment
|
4
|
+
attr_reader :author, :subreddit, :permalink, :timestamp, :text
|
5
|
+
|
6
|
+
def initialize(params={})
|
7
|
+
@author = params[:author]
|
8
|
+
@subreddit = params[:subreddit]
|
9
|
+
@permalink = params[:permalink]
|
10
|
+
@timestamp = Time.parse(params[:timestamp])
|
11
|
+
@text = params[:text]
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_json
|
15
|
+
{
|
16
|
+
author: author,
|
17
|
+
subreddit: subreddit,
|
18
|
+
permalink: permalink,
|
19
|
+
timestamp: timestamp,
|
20
|
+
text: text
|
21
|
+
}.to_json
|
22
|
+
end
|
23
|
+
|
24
|
+
def display
|
25
|
+
%{
|
26
|
+
posted by #{author} on /r/#{subreddit}
|
27
|
+
#{timestamp} | #{permalink}
|
28
|
+
-------------------------------------
|
29
|
+
#{text}
|
30
|
+
}
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
class Strigil::Comments
|
4
|
+
attr_accessor :log
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@log = []
|
8
|
+
end
|
9
|
+
|
10
|
+
def add_comments(comments)
|
11
|
+
@log += comments
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_comment(comment)
|
15
|
+
@log.push(comment)
|
16
|
+
end
|
17
|
+
|
18
|
+
def save
|
19
|
+
unless File.exist?('./data/')
|
20
|
+
FileUtils.mkdir('./data/')
|
21
|
+
end
|
22
|
+
File.open("./data/comments.json", 'w') do |f|
|
23
|
+
f.write(log_to_json)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def clear
|
28
|
+
@log = []
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def log_to_json
|
34
|
+
result = '['
|
35
|
+
log.each do |comment|
|
36
|
+
result << comment.to_json
|
37
|
+
result << ',' unless comment == log.last
|
38
|
+
end
|
39
|
+
result << ']'
|
40
|
+
result
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
class Strigil::CommentsParser
|
2
|
+
|
3
|
+
def self.parse(comments)
|
4
|
+
comments.map do |comment|
|
5
|
+
Strigil::Comment.new(
|
6
|
+
author: comment.attribute("data-author"),
|
7
|
+
subreddit: comment.attribute("data-subreddit"),
|
8
|
+
permalink: get_permalink(comment),
|
9
|
+
timestamp: get_timestamp(comment),
|
10
|
+
text: get_text(comment)
|
11
|
+
)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def self.get_permalink(comment)
|
18
|
+
entry = get_entry(comment)
|
19
|
+
entry.find_element(class: "bylink").attribute(:href)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.get_timestamp(comment)
|
23
|
+
entry = get_entry(comment)
|
24
|
+
entry.find_element(tag_name: "time").attribute(:title)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.get_text(comment)
|
28
|
+
entry = get_entry(comment)
|
29
|
+
entry.find_element(class: "usertext-body").text
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.get_entry(comment)
|
33
|
+
comment.find_element(class: "entry")
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'selenium-webdriver'
|
2
|
+
require 'webdrivers'
|
3
|
+
|
4
|
+
class Strigil::StrigilClient
|
5
|
+
attr_reader :client
|
6
|
+
|
7
|
+
def initialize(user)
|
8
|
+
@client = Selenium::WebDriver.for :chrome
|
9
|
+
client.navigate.to "https://reddit.com/u/#{user}/comments"
|
10
|
+
end
|
11
|
+
|
12
|
+
def pull_comments
|
13
|
+
raw_comments = client.find_elements(class: "comment")
|
14
|
+
Strigil::CommentsParser.parse(raw_comments)
|
15
|
+
end
|
16
|
+
|
17
|
+
def close
|
18
|
+
client.close
|
19
|
+
end
|
20
|
+
|
21
|
+
def iterate
|
22
|
+
begin
|
23
|
+
client.find_element(link_text: "next ›").click
|
24
|
+
rescue
|
25
|
+
raise EndOfQueueError
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
class EndOfQueueError < StandardError
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: strigil
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Harry Stebbins
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-04-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: selenium-webdriver
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.11'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.11'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: webdrivers
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '3.2'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '3.2'
|
41
|
+
description:
|
42
|
+
email: hcstebbins@gmail.com
|
43
|
+
executables: []
|
44
|
+
extensions: []
|
45
|
+
extra_rdoc_files: []
|
46
|
+
files:
|
47
|
+
- lib/strigil.rb
|
48
|
+
- lib/strigil/comment.rb
|
49
|
+
- lib/strigil/comments.rb
|
50
|
+
- lib/strigil/comments_parser.rb
|
51
|
+
- lib/strigil/strigil_client.rb
|
52
|
+
homepage: https://github.com/stebbins/strigil
|
53
|
+
licenses:
|
54
|
+
- MIT
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 2.6.13
|
73
|
+
signing_key:
|
74
|
+
specification_version: 4
|
75
|
+
summary: Strigil is a gem for easily scraping a Reddit user's comment history into
|
76
|
+
a JSON file.
|
77
|
+
test_files: []
|