reddit_archiver 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +35 -0
- data/Rakefile +1 -0
- data/bin/reddit_archiver +15 -0
- data/config.yml.example +3 -0
- data/lib/reddit_archiver/account.rb +50 -0
- data/lib/reddit_archiver/comment.rb +38 -0
- data/lib/reddit_archiver/config.rb +36 -0
- data/lib/reddit_archiver/file_writer.rb +35 -0
- data/lib/reddit_archiver/post.rb +30 -0
- data/lib/reddit_archiver/submission.rb +32 -0
- data/lib/reddit_archiver/version.rb +3 -0
- data/lib/reddit_archiver.rb +13 -0
- data/reddit_archiver.gemspec +28 -0
- data/spec/account_spec.rb +21 -0
- data/spec/comment_spec.rb +29 -0
- data/spec/config_spec.rb +38 -0
- data/spec/output/.keep +0 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/submission_spec.rb +39 -0
- metadata +172 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 24a075b973c4926b6dde15536b3b2fa4a4cd20bc
|
4
|
+
data.tar.gz: 075052f1d7a6e61659bd80ce5cbf52f9900d720f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 31aaab845087793e3febfc3f23f214701fd4648d9873c23a961ac26e0cb7a2489bbd03eef927833ed1104e595c0ada4ba23d5d985f924ec7a685295f66e023a6
|
7
|
+
data.tar.gz: 6060034dfb3282c53472e1174c2038a704c5debf5fbcac7a9a95d5067cd182050846a2bdb2b41014a3f651af4f32d5b3faddb310beb2d1748b76a296fcb8420e
|
data/.gitignore
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
vendor/bundle
|
19
|
+
config.yml
|
20
|
+
*/comments
|
21
|
+
*/submissions
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Timothy King
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# RedditArchiver
|
2
|
+
|
3
|
+
**RedditArchiver** is a simple command line tool that will retrieve the contents of a reddit account, up to reddit's hard limit of 1000 items. This means that if your account contains a total of 1001 submissions or comments, only 1000 of them can be retrieved. There is currently no way around this limit.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'reddit_archiver'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install reddit_archiver
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
$ mv config.yml.example config.yml
|
22
|
+
|
23
|
+
Edit the configuration file to supply your account details. Then, run
|
24
|
+
|
25
|
+
$ bundle exec reddit_archiver
|
26
|
+
|
27
|
+
Once the script finishes, your posts will be available in the directory specified in config.yml.
|
28
|
+
|
29
|
+
## Contributing
|
30
|
+
|
31
|
+
1. Fork it
|
32
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
33
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
34
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
35
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/reddit_archiver
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env ruby -Ilib
|
2
|
+
|
3
|
+
require 'reddit_archiver'
|
4
|
+
|
5
|
+
account = RedditArchiver::Account.new
|
6
|
+
|
7
|
+
print "Retrieving posts:"
|
8
|
+
posts = (account.submissions | account.comments)
|
9
|
+
print " done with #{posts.count} items.\n"
|
10
|
+
|
11
|
+
posts.each do |post|
|
12
|
+
print "Generating #{post.filename}:"
|
13
|
+
post.write
|
14
|
+
print " done.\n"
|
15
|
+
end
|
data/config.yml.example
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'snoo'
|
2
|
+
|
3
|
+
module RedditArchiver
|
4
|
+
class Account
|
5
|
+
attr_reader :connection, :config
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@config = RedditArchiver.config
|
9
|
+
@connection = Snoo::Client.new(username: config.username,
|
10
|
+
password: config.password,
|
11
|
+
useragent: "RedditArchiver #{RedditArchiver::VERSION}")
|
12
|
+
end
|
13
|
+
|
14
|
+
def comments
|
15
|
+
posts(:comments).map { |p| RedditArchiver::Comment.new(p) }
|
16
|
+
end
|
17
|
+
|
18
|
+
def submissions
|
19
|
+
posts(:submitted).map { |p| RedditArchiver::Submission.new(p) }
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def posts(type)
|
25
|
+
# Reddit's API only allows a maximum of 100 comments at a time.
|
26
|
+
# To retrieve more than that (up to the hard limit of 1k), we
|
27
|
+
# have to get the 'name' attribute of the last comment and then
|
28
|
+
# request the next collection of comments that occur after it.
|
29
|
+
posts = retrieve_posts(type: type.to_s)
|
30
|
+
current_name = ""
|
31
|
+
|
32
|
+
begin
|
33
|
+
previous_name = current_name
|
34
|
+
current_name = posts.last["name"]
|
35
|
+
posts += retrieve_posts(type: type.to_s, after: current_name)
|
36
|
+
end until previous_name == current_name
|
37
|
+
|
38
|
+
posts
|
39
|
+
end
|
40
|
+
|
41
|
+
def retrieve_posts(args = {})
|
42
|
+
args.merge!(sort: 'new', limit: 100)
|
43
|
+
parse_response(connection.get_user_listing(connection.username, args))
|
44
|
+
end
|
45
|
+
|
46
|
+
def parse_response(http_response)
|
47
|
+
http_response["data"]["children"].map { |p| p["data"] }
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module RedditArchiver
|
2
|
+
class Comment < Post
|
3
|
+
attr_reader :body, :score, :link_title, :link_id, :link_author
|
4
|
+
|
5
|
+
def initialize(comment)
|
6
|
+
@body = comment["body"]
|
7
|
+
@score = comment["ups"] - comment["downs"]
|
8
|
+
@link_title = comment["link_title"]
|
9
|
+
@link_id = comment["link_id"]
|
10
|
+
@link_author = comment["link_author"]
|
11
|
+
super
|
12
|
+
end
|
13
|
+
|
14
|
+
def submission_link
|
15
|
+
"http://reddit.com/r/#{subreddit}/comments/#{link_id.split('_').last}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def title
|
19
|
+
body[0..96]
|
20
|
+
end
|
21
|
+
|
22
|
+
def submission_title
|
23
|
+
link_title.parameterize[0..96]
|
24
|
+
end
|
25
|
+
|
26
|
+
def file_contents
|
27
|
+
<<-EOF.gsub(/^ {8}/, '')
|
28
|
+
Date: #{created}
|
29
|
+
On Post: #{link_title} by #{link_author}
|
30
|
+
Subreddit: /r/#{subreddit}
|
31
|
+
Link: #{submission_link}
|
32
|
+
Score: #{score}
|
33
|
+
|
34
|
+
#{body}
|
35
|
+
EOF
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module RedditArchiver
|
4
|
+
class Config
|
5
|
+
attr_reader :username, :password, :directory
|
6
|
+
|
7
|
+
def initialize(args = {})
|
8
|
+
@username, @password, @directory = YAML.load_file('config.yml').merge!(args).values_at("username", "password", "directory")
|
9
|
+
@directory ||= username
|
10
|
+
|
11
|
+
maybe_create_target_directory
|
12
|
+
maybe_create_target_subdirectories
|
13
|
+
end
|
14
|
+
|
15
|
+
def subdirectories
|
16
|
+
%w(submissions comments).map { |subdir| "#{directory}/#{subdir}" }
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def maybe_create_target_directory
|
22
|
+
unless File.directory?(directory)
|
23
|
+
Dir.mkdir(directory)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def maybe_create_target_subdirectories
|
28
|
+
%w(submissions comments).each do |subdir|
|
29
|
+
target_subdir = File.join(directory, subdir)
|
30
|
+
unless File.directory?(target_subdir)
|
31
|
+
Dir.mkdir(target_subdir)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module RedditArchiver
|
2
|
+
module FileWriter
|
3
|
+
|
4
|
+
def subdirectory
|
5
|
+
self.class.name.split('::').last.downcase.pluralize
|
6
|
+
end
|
7
|
+
|
8
|
+
def subdirectories
|
9
|
+
self.submission_title ? File.join(subdirectory, self.submission_title) : subdirectory
|
10
|
+
end
|
11
|
+
|
12
|
+
def filename
|
13
|
+
"#{self.year_month_date}_#{self.title_url}"[0..96]
|
14
|
+
end
|
15
|
+
|
16
|
+
def ext
|
17
|
+
".md"
|
18
|
+
end
|
19
|
+
|
20
|
+
def filename_with_full_path
|
21
|
+
File.join(File.dirname(__FILE__), '../../', RedditArchiver.config.directory, subdirectories, filename + ext)
|
22
|
+
end
|
23
|
+
|
24
|
+
def target_directory
|
25
|
+
File.dirname(filename_with_full_path)
|
26
|
+
end
|
27
|
+
|
28
|
+
def write
|
29
|
+
Dir.mkdir(target_directory) unless File.directory?(target_directory)
|
30
|
+
file = File.new(filename_with_full_path, 'w')
|
31
|
+
file.puts(self.file_contents)
|
32
|
+
file.close
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'htmlentities'
|
2
|
+
require 'active_support/inflector'
|
3
|
+
|
4
|
+
I18n.enforce_available_locales = false
|
5
|
+
|
6
|
+
module RedditArchiver
|
7
|
+
class Post
|
8
|
+
include RedditArchiver::FileWriter
|
9
|
+
|
10
|
+
attr_reader :created, :subreddit, :reddit_id, :title
|
11
|
+
|
12
|
+
def initialize(post)
|
13
|
+
@created = Time.at(post["created"])
|
14
|
+
@subreddit = post["subreddit"]
|
15
|
+
@reddit_id = post["name"]
|
16
|
+
end
|
17
|
+
|
18
|
+
def title_url
|
19
|
+
title.parameterize
|
20
|
+
end
|
21
|
+
|
22
|
+
def submission_title
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def year_month_date
|
27
|
+
created.strftime('%F_%H-%M-%S')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module RedditArchiver
|
2
|
+
class Submission < Post
|
3
|
+
attr_reader :permalink, :title, :url, :selftext, :score, :is_self
|
4
|
+
|
5
|
+
def initialize(submission)
|
6
|
+
@permalink = submission["permalink"]
|
7
|
+
@title = submission["title"]
|
8
|
+
@url = submission["url"]
|
9
|
+
@selftext = HTMLEntities.new.decode(submission["selftext"])
|
10
|
+
@score = submission["score"]
|
11
|
+
@is_self = submission["is_self"]
|
12
|
+
super
|
13
|
+
end
|
14
|
+
|
15
|
+
def body
|
16
|
+
is_self ? selftext : url
|
17
|
+
end
|
18
|
+
|
19
|
+
def file_contents
|
20
|
+
<<-EOF.gsub(/^ {8}/, '')
|
21
|
+
Date: #{created}
|
22
|
+
Subreddit: /r/#{subreddit}
|
23
|
+
Link: http://reddit.com#{permalink}
|
24
|
+
Score: #{score}
|
25
|
+
|
26
|
+
# #{title}
|
27
|
+
|
28
|
+
#{body}
|
29
|
+
EOF
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require "reddit_archiver/version"
|
2
|
+
require "reddit_archiver/config"
|
3
|
+
require "reddit_archiver/account"
|
4
|
+
require "reddit_archiver/file_writer"
|
5
|
+
require "reddit_archiver/post"
|
6
|
+
require "reddit_archiver/comment"
|
7
|
+
require "reddit_archiver/submission"
|
8
|
+
|
9
|
+
module RedditArchiver
|
10
|
+
def self.config
|
11
|
+
RedditArchiver::Config.new
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'reddit_archiver/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "reddit_archiver"
|
8
|
+
spec.version = RedditArchiver::VERSION
|
9
|
+
spec.authors = ["Timothy King"]
|
10
|
+
spec.email = ["tmk@lordzork.com"]
|
11
|
+
spec.description = "Reddit_archiver is a command line tool that will download all of the posts and comments from a given reddit account and store them in flat text files."
|
12
|
+
spec.summary = "A simple command line tool to archive the contents of a reddit account."
|
13
|
+
spec.homepage = "http://github.com/tmking/reddit_archiver"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.1.1"
|
23
|
+
spec.add_development_dependency "rspec", "~> 2.14.1"
|
24
|
+
spec.add_development_dependency "webmock", "~> 1.17.1"
|
25
|
+
spec.add_runtime_dependency "snoo", "~> 0.1.2"
|
26
|
+
spec.add_runtime_dependency "activesupport", "~> 4.0.2"
|
27
|
+
spec.add_runtime_dependency "htmlentities", "~> 4.3.1"
|
28
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe RedditArchiver::Account do
|
4
|
+
let(:connection) { double(Snoo::Client).as_null_object }
|
5
|
+
let(:parsed_response) {{ "json" => { "errors" => [] }}}
|
6
|
+
let(:http_response) { double("HTTP response", code: "200") }
|
7
|
+
let(:httparty_response) { double(HTTParty::Response,
|
8
|
+
parsed_response: parsed_response,
|
9
|
+
response: http_response) }
|
10
|
+
|
11
|
+
subject(:account) { RedditArchiver::Account.new }
|
12
|
+
|
13
|
+
before do
|
14
|
+
allow(Snoo::Client).to receive(:new).and_return(connection)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "retreives the posts three times" do
|
18
|
+
expect(connection).to receive(:get_user_listing).exactly(3).times
|
19
|
+
account.comments
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe RedditArchiver::Comment do
|
4
|
+
let(:post) { EXAMPLE_COMMENT["data"] }
|
5
|
+
|
6
|
+
subject { RedditArchiver::Comment.new(post) }
|
7
|
+
|
8
|
+
its(:year_month_date) { should eq("2013-09-19_00-29-37") }
|
9
|
+
its(:filename) { should eq('2013-09-19_00-29-37_hello') }
|
10
|
+
its(:body) { should eq('Hello.') }
|
11
|
+
its(:score) { should eq(3) }
|
12
|
+
its(:subdirectory) { should eq('comments') }
|
13
|
+
its(:title) { should eq('Hello.') }
|
14
|
+
|
15
|
+
context "The file" do
|
16
|
+
let(:comment) { RedditArchiver::Comment.new(post) }
|
17
|
+
|
18
|
+
before { comment.write }
|
19
|
+
|
20
|
+
it "is created" do
|
21
|
+
expect(File.exists?(comment.filename_with_full_path)).to be_true
|
22
|
+
end
|
23
|
+
|
24
|
+
it "has the contents of the post" do
|
25
|
+
file = File.read(comment.filename_with_full_path)
|
26
|
+
expect(file).to eq(comment.file_contents)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/spec/config_spec.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe RedditArchiver::Config do
|
4
|
+
let(:yaml_data) { YAML.load(YAML_CONTENT) }
|
5
|
+
|
6
|
+
before { allow(YAML).to receive(:load_file).
|
7
|
+
with('config.yml').
|
8
|
+
and_return(yaml_data) }
|
9
|
+
|
10
|
+
context "With the values from the config file" do
|
11
|
+
subject { RedditArchiver::Config.new }
|
12
|
+
its(:username) { should eq('foo') }
|
13
|
+
its(:password) { should eq('bar') }
|
14
|
+
its(:directory) { should eq('spec/output/foobar') }
|
15
|
+
end
|
16
|
+
|
17
|
+
context "With no directory value" do
|
18
|
+
subject { RedditArchiver::Config.new }
|
19
|
+
|
20
|
+
before do
|
21
|
+
yaml_data.delete('directory')
|
22
|
+
Dir.stub(:mkdir).and_return(true)
|
23
|
+
end
|
24
|
+
|
25
|
+
its(:directory) { should eq('foo') }
|
26
|
+
end
|
27
|
+
|
28
|
+
context "Overriding the configuration values" do
|
29
|
+
subject { RedditArchiver::Config.new('username' => 'something',
|
30
|
+
'password' => 'else',
|
31
|
+
'directory' => 'altogether')
|
32
|
+
}
|
33
|
+
|
34
|
+
its(:username) { should eq('something') }
|
35
|
+
its(:password) { should eq('else') }
|
36
|
+
its(:directory) { should eq('altogether') }
|
37
|
+
end
|
38
|
+
end
|
data/spec/output/.keep
ADDED
File without changes
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'reddit_archiver'
|
2
|
+
require 'webmock/rspec'
|
3
|
+
WebMock.disable_net_connect!(allow_localhost: true)
|
4
|
+
|
5
|
+
YAML_CONTENT = <<-EOF
|
6
|
+
username: foo
|
7
|
+
password: bar
|
8
|
+
directory: spec/output/foobar
|
9
|
+
EOF
|
10
|
+
|
11
|
+
PARSED_YAML = YAML.load(YAML_CONTENT)
|
12
|
+
|
13
|
+
EXAMPLE_COMMENT = {"data" => {"subreddit_id"=>"t5_aaaaa", "link_title"=>"Test Test Test", "banned_by"=>nil, "subreddit"=>"test", "link_author"=>"test", "likes"=>true, "replies"=>nil, "saved"=>false, "id"=>"abc123", "gilded"=>0, "author"=>"me", "parent_id"=>"t3_aaaaa", "approved_by"=>nil, "body"=>"Hello.", "edited"=>false, "author_flair_css_class"=>"", "downs"=>1, "body_html"=>"<div class=\"md\"><p>Hello.</p>\n</div>", "link_id"=>"t3_bbbbb", "score_hidden"=>false, "name"=>"t1_cccccc", "created"=>1379564977.0, "author_flair_text"=>"flair", "created_utc"=>1379561377.0, "ups"=>4, "num_reports"=>0, "distinguished"=>nil}}
|
14
|
+
|
15
|
+
EXAMPLE_SUBMISSION = {"data" => {"subreddit_id"=>"t5_aaaaa", "link_title"=>"Test Test Test", "banned_by"=>nil, "subreddit"=>"test", "link_author"=>"test", "likes"=>true, "replies"=>nil, "saved"=>false, "id"=>"abc123", "gilded"=>0, "author"=>"me", "parent_id"=>"t3_aaaaa", "approved_by"=>nil, "body"=>"Hello.", "edited"=>false, "author_flair_css_class"=>"", "downs"=>1, "body_html"=>"<div class=\"md\"><p>Hello.</p>\n</div>", "link_id"=>"t3_bbbbb", "score_hidden"=>false, "name"=>"t1_cccccc", "created"=>1379564977.0, "author_flair_text"=>"flair", "created_utc"=>1379561377.0, "ups"=>4, "num_reports"=>0, "distinguished"=>nil, "permalink" => "http://example.com", "title" => "Test post", "is_self" => true, "selftext" => "> Hello.", "selftext_html" => " Hello.", "url" => "http://google.com"}}
|
16
|
+
|
17
|
+
RSpec.configure do |config|
|
18
|
+
config.before(:each) do
|
19
|
+
YAML.stub(:load_file).and_call_original
|
20
|
+
YAML.stub(:load_file).with('config.yml').and_return(PARSED_YAML)
|
21
|
+
end
|
22
|
+
config.after(:all) { system('rm -Rf spec/output/*') }
|
23
|
+
end
|
24
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe RedditArchiver::Submission do
|
4
|
+
let(:post) { EXAMPLE_SUBMISSION["data"] }
|
5
|
+
subject { RedditArchiver::Submission.new(post) }
|
6
|
+
|
7
|
+
its(:subdirectory) { should eq('submissions') }
|
8
|
+
its(:filename) { should eq('2013-09-19_00-29-37_test-post') }
|
9
|
+
|
10
|
+
context "When the post has its own text" do
|
11
|
+
its(:body) { should eq("> Hello.") }
|
12
|
+
end
|
13
|
+
|
14
|
+
context "When the post is just a link" do
|
15
|
+
before { post.merge!("is_self" => false) }
|
16
|
+
its(:body) { should eq("http://google.com") }
|
17
|
+
end
|
18
|
+
|
19
|
+
context "The file" do
|
20
|
+
let(:submission) { RedditArchiver::Submission.new(post) }
|
21
|
+
|
22
|
+
before { submission.write }
|
23
|
+
|
24
|
+
it "is created" do
|
25
|
+
expect(File.exists?(submission.filename_with_full_path)).to be_true
|
26
|
+
end
|
27
|
+
|
28
|
+
it "has the contents of the post" do
|
29
|
+
file = File.read(submission.filename_with_full_path)
|
30
|
+
expect(file).to eq(submission.file_contents)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "truncates the name to 97 characters" do
|
34
|
+
long_filename = "f"*101
|
35
|
+
post = EXAMPLE_COMMENT["data"].merge!("title" => long_filename)
|
36
|
+
expect(RedditArchiver::Submission.new(post).filename.size).to eq(97)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: reddit_archiver
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Timothy King
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-01-23 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 10.1.1
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 10.1.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 2.14.1
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 2.14.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: webmock
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.17.1
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.17.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: snoo
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.1.2
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.1.2
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: activesupport
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ~>
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 4.0.2
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ~>
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 4.0.2
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: htmlentities
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 4.3.1
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ~>
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 4.3.1
|
111
|
+
description: Reddit_archiver is a command line tool that will download all of the
|
112
|
+
posts and comments from a given reddit account and store them in flat text files.
|
113
|
+
email:
|
114
|
+
- tmk@lordzork.com
|
115
|
+
executables:
|
116
|
+
- reddit_archiver
|
117
|
+
extensions: []
|
118
|
+
extra_rdoc_files: []
|
119
|
+
files:
|
120
|
+
- .gitignore
|
121
|
+
- Gemfile
|
122
|
+
- LICENSE.txt
|
123
|
+
- README.md
|
124
|
+
- Rakefile
|
125
|
+
- bin/reddit_archiver
|
126
|
+
- config.yml.example
|
127
|
+
- lib/reddit_archiver.rb
|
128
|
+
- lib/reddit_archiver/account.rb
|
129
|
+
- lib/reddit_archiver/comment.rb
|
130
|
+
- lib/reddit_archiver/config.rb
|
131
|
+
- lib/reddit_archiver/file_writer.rb
|
132
|
+
- lib/reddit_archiver/post.rb
|
133
|
+
- lib/reddit_archiver/submission.rb
|
134
|
+
- lib/reddit_archiver/version.rb
|
135
|
+
- reddit_archiver.gemspec
|
136
|
+
- spec/account_spec.rb
|
137
|
+
- spec/comment_spec.rb
|
138
|
+
- spec/config_spec.rb
|
139
|
+
- spec/output/.keep
|
140
|
+
- spec/spec_helper.rb
|
141
|
+
- spec/submission_spec.rb
|
142
|
+
homepage: http://github.com/tmking/reddit_archiver
|
143
|
+
licenses:
|
144
|
+
- MIT
|
145
|
+
metadata: {}
|
146
|
+
post_install_message:
|
147
|
+
rdoc_options: []
|
148
|
+
require_paths:
|
149
|
+
- lib
|
150
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
151
|
+
requirements:
|
152
|
+
- - '>='
|
153
|
+
- !ruby/object:Gem::Version
|
154
|
+
version: '0'
|
155
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
requirements: []
|
161
|
+
rubyforge_project:
|
162
|
+
rubygems_version: 2.0.3
|
163
|
+
signing_key:
|
164
|
+
specification_version: 4
|
165
|
+
summary: A simple command line tool to archive the contents of a reddit account.
|
166
|
+
test_files:
|
167
|
+
- spec/account_spec.rb
|
168
|
+
- spec/comment_spec.rb
|
169
|
+
- spec/config_spec.rb
|
170
|
+
- spec/output/.keep
|
171
|
+
- spec/spec_helper.rb
|
172
|
+
- spec/submission_spec.rb
|