fourchan-kit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.rspec +2 -0
- data/.travis.yml +3 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +99 -0
- data/Rakefile +7 -0
- data/bin/fourchan +4 -0
- data/features/cassettes/Fourchan/I_want_to_download_a_thread.yml +1668 -0
- data/features/fourchan.feature +24 -0
- data/features/step_definitions/cli_steps.rb +11 -0
- data/features/support/env.rb +39 -0
- data/fourchan-kit.gemspec +32 -0
- data/lib/fourchan/kit.rb +34 -0
- data/lib/fourchan/kit/api.rb +62 -0
- data/lib/fourchan/kit/board.rb +74 -0
- data/lib/fourchan/kit/cli.rb +37 -0
- data/lib/fourchan/kit/post.rb +24 -0
- data/lib/fourchan/kit/thread.rb +58 -0
- data/lib/fourchan/kit/tools.rb +158 -0
- data/lib/fourchan/kit/version.rb +5 -0
- data/spec/cassettes/Fourchan_Kit/should_be_able_to_get_an_array_of_board_names.yml +69 -0
- data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_info_for_all_boards.yml +69 -0
- data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_the_catalog_for_a_board.yml +2427 -0
- data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_the_posts_from_a_thread.yml +68 -0
- data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_the_threads_for_a_board.yml +46 -0
- data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_threads_from_a_page.yml +274 -0
- data/spec/cassettes/Fourchan_Kit_Board/and_a_total_of_7685_posts.yml +31381 -0
- data/spec/cassettes/Fourchan_Kit_Board/and_have_15_threads_per_page.yml +2464 -0
- data/spec/cassettes/Fourchan_Kit_Board/and_should_have_a_total_of_150_threads.yml +2464 -0
- data/spec/cassettes/Fourchan_Kit_Board/should_have_10_pages.yml +2530 -0
- data/spec/cassettes/Fourchan_Kit_Board/should_have_908_posts_on_the_first_page.yml +6038 -0
- data/spec/cassettes/Fourchan_Kit_Thread/and_return_nothing_if_poster_didn_t_submit_one.yml +68 -0
- data/spec/cassettes/Fourchan_Kit_Thread/should_have_posts.yml +68 -0
- data/spec/cassettes/Fourchan_Kit_Thread/should_have_replies.yml +68 -0
- data/spec/cassettes/Fourchan_Kit_Thread/should_have_some_images.yml +68 -0
- data/spec/cassettes/Fourchan_Kit_Thread/when_using_op/should_have_a_name.yml +68 -0
- data/spec/cassettes/Fourchan_Kit_Thread/when_using_op/with_a_link_to_the_image.yml +68 -0
- data/spec/cassettes/Fourchan_Kit_Tools/should_download_a_list_of_threads.yml +1942 -0
- data/spec/cassettes/Fourchan_Kit_Tools/should_download_an_image.yml +599 -0
- data/spec/cassettes/Fourchan_Kit_Tools/should_download_images_from_a_thread.yml +823 -0
- data/spec/cassettes/Fourchan_Kit_Tools/should_verify_it_s_not_a_dead_thread.yml +54 -0
- data/spec/cassettes/Fourchan_Kit_Tools/when_it_is_dead/should_handle_that.yml +108 -0
- data/spec/fourchan_kit_api_spec.rb +35 -0
- data/spec/fourchan_kit_board_spec.rb +28 -0
- data/spec/fourchan_kit_spec.rb +10 -0
- data/spec/fourchan_kit_thread_spec.rb +33 -0
- data/spec/fourchan_kit_tools_spec.rb +59 -0
- data/spec/spec_helper.rb +17 -0
- data/spec/threads.txt +2 -0
- metadata +270 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
Feature: Fourchan
|
2
|
+
In order to be useful
|
3
|
+
As a CLI
|
4
|
+
I want to be able to do stuff
|
5
|
+
|
6
|
+
Scenario: I need some help
|
7
|
+
When I run `fourchan help`
|
8
|
+
Then the output should contain "fourchan download"
|
9
|
+
|
10
|
+
Scenario: I need some help to download
|
11
|
+
When I run `fourchan help download`
|
12
|
+
Then the output should contain "A valid URL for a thread"
|
13
|
+
|
14
|
+
Scenario: I need some help to download
|
15
|
+
When I run `fourchan help lurk`
|
16
|
+
Then the output should contain "Where to save images"
|
17
|
+
|
18
|
+
@vcr
|
19
|
+
Scenario: I want to download a thread
|
20
|
+
When I run `fourchan download -u http://boards.4chan.org/g/thread/41705021`
|
21
|
+
Then the following folders should exist:
|
22
|
+
| tmp/aruba/images |
|
23
|
+
And the following folders should have "2" files combined:
|
24
|
+
| tmp/aruba/images |
|
@@ -0,0 +1,11 @@
|
|
1
|
+
Then /^the following folders should exist:$/ do |folders|
|
2
|
+
folders = folders.raw.flatten
|
3
|
+
folders.each { |f| File.exists?(f).should == true }
|
4
|
+
end
|
5
|
+
|
6
|
+
And /^the following folders should have "(\d+)" files combined:$/ do |count, folders|
|
7
|
+
folders = folders.raw.flatten
|
8
|
+
files = 0
|
9
|
+
folders.each { |f| files += Dir["#{f}/**/*"].length }
|
10
|
+
files.should == count.to_i
|
11
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'aruba/cucumber'
|
2
|
+
require 'aruba/in_process'
|
3
|
+
require 'fourchan/kit/cli'
|
4
|
+
require 'vcr'
|
5
|
+
require 'webmock'
|
6
|
+
|
7
|
+
VCR.configure do |c|
|
8
|
+
c.cassette_library_dir = 'features/cassettes'
|
9
|
+
c.hook_into :webmock
|
10
|
+
end
|
11
|
+
|
12
|
+
VCR.cucumber_tags do |t|
|
13
|
+
t.tag '@vcr', use_scenario_name: true
|
14
|
+
end
|
15
|
+
|
16
|
+
# Magic by http://georgemcintosh.com/vcr-and-aruba/
|
17
|
+
class VcrFriendlyMain
|
18
|
+
def initialize(argv, stdin, stdout, stderr, kernel)
|
19
|
+
@argv, @stdin, @stdout, @stderr, @kernel = argv, stdin, stdout, stderr, kernel
|
20
|
+
end
|
21
|
+
|
22
|
+
def execute!
|
23
|
+
$stdin = @stdin
|
24
|
+
$stdout = @stdout
|
25
|
+
Fourchan::Kit::CLI.start(@argv)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
Before('@vcr') do
|
30
|
+
Aruba::InProcess.main_class = VcrFriendlyMain
|
31
|
+
Aruba.process = Aruba::InProcess
|
32
|
+
end
|
33
|
+
|
34
|
+
After('@vcr') do
|
35
|
+
Aruba.process = Aruba::SpawnProcess
|
36
|
+
VCR.eject_cassette
|
37
|
+
$stdin = STDIN
|
38
|
+
$stdout = STDOUT
|
39
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'fourchan/kit/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "fourchan-kit"
|
8
|
+
spec.version = Fourchan::Kit::VERSION
|
9
|
+
spec.authors = ["lauritzsh"]
|
10
|
+
spec.email = ["mail@lauritz.me"]
|
11
|
+
spec.summary = %q{A tool and API wrapper for the 4chan API.}
|
12
|
+
spec.description = %q{Fourchan Kit is a Ruby wrapper and tool for the 4chan API. Use Fourchan Kit to interact with the API using Ruby, or use the tool to interact with the threads on 4chan.}
|
13
|
+
spec.homepage = "http://rubygems.org/gems/fourchan-kit"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency "json", "~> 1.8"
|
22
|
+
spec.add_dependency "mechanize", "~> 2.7"
|
23
|
+
spec.add_dependency "thor", "~> 0.19"
|
24
|
+
|
25
|
+
spec.add_development_dependency "aruba", "~> 0.5"
|
26
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
27
|
+
spec.add_development_dependency "cucumber", "~> 1.3"
|
28
|
+
spec.add_development_dependency "rake", "~> 10.3"
|
29
|
+
spec.add_development_dependency "rspec", "~> 2.14"
|
30
|
+
spec.add_development_dependency "vcr", "~> 2.9"
|
31
|
+
spec.add_development_dependency "webmock", "~> 1.17"
|
32
|
+
end
|
data/lib/fourchan/kit.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require "fourchan/kit/api"
|
2
|
+
require "fourchan/kit/board"
|
3
|
+
require "fourchan/kit/post"
|
4
|
+
require "fourchan/kit/thread"
|
5
|
+
require "fourchan/kit/tools"
|
6
|
+
require "fourchan/kit/version"
|
7
|
+
|
8
|
+
module Fourchan
|
9
|
+
module Kit
|
10
|
+
$fourchan_boards = []
|
11
|
+
|
12
|
+
##
|
13
|
+
# Returns an array of all boards' title, such as _b_, _g_, _fit_ etc.
|
14
|
+
#
|
15
|
+
# @return [Array] name of all boards
|
16
|
+
def self.fourchan_boards
|
17
|
+
fetch_fourchan_boards if $fourchan_boards.empty?
|
18
|
+
$fourchan_boards
|
19
|
+
end
|
20
|
+
|
21
|
+
##
|
22
|
+
# Updates the list of boards and returns an array of the boards.
|
23
|
+
#
|
24
|
+
# @return [Array] name of all boards
|
25
|
+
def self.fetch_fourchan_boards
|
26
|
+
puts "Fetching all 4chan boards"
|
27
|
+
$fourchan_boards = []
|
28
|
+
Fourchan::Kit::API.get_boards.each do |board|
|
29
|
+
$fourchan_boards << board["board"]
|
30
|
+
end
|
31
|
+
$fourchan_boards
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
module Fourchan
|
5
|
+
module Kit
|
6
|
+
|
7
|
+
##
|
8
|
+
# This module contains methods for the 4chan API.
|
9
|
+
# They all parse the JSON 4chan delivers and returns a Hash object.
|
10
|
+
module API
|
11
|
+
|
12
|
+
##
|
13
|
+
# Returns information for all boards across 4chan.
|
14
|
+
#
|
15
|
+
# @return [Array] information for all boards.
|
16
|
+
def self.get_boards
|
17
|
+
JSON.parse(open("http://a.4cdn.org/boards.json").read)['boards']
|
18
|
+
end
|
19
|
+
|
20
|
+
##
|
21
|
+
# Returns information for all threads on specified board.
|
22
|
+
#
|
23
|
+
# @param board [String] the board.
|
24
|
+
# @return [Array] all threads for a board.
|
25
|
+
def self.get_catalog(board)
|
26
|
+
JSON.parse(open("http://a.4cdn.org/#{board}/catalog.json").read)
|
27
|
+
end
|
28
|
+
|
29
|
+
##
|
30
|
+
# Returns only id and time for threads on specified board.
|
31
|
+
#
|
32
|
+
# @param board [String] the board.
|
33
|
+
# @return [Array] the id and time for all threads.
|
34
|
+
def self.get_threads(board)
|
35
|
+
JSON.parse(open("http://a.4cdn.org/#{board}/threads.json").read)
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Returns all posts for the specified thread.
|
40
|
+
#
|
41
|
+
# @param board [String] the board.
|
42
|
+
# @param thread [Integer] the thread number.
|
43
|
+
# @return [Array] the posts in from a thread.
|
44
|
+
def self.get_thread(board, thread)
|
45
|
+
JSON.parse(open("http://a.4cdn.org/#{board}/thread/#{thread}.json").read)['posts']
|
46
|
+
end
|
47
|
+
|
48
|
+
##
|
49
|
+
# Returns the threads at a page number on specified board.
|
50
|
+
#
|
51
|
+
# 4chan stopped using zero-index pages in April. Instead of first page
|
52
|
+
# is at 0, it is now at 1. 0 returns nothing.
|
53
|
+
#
|
54
|
+
# @param board [String] the board.
|
55
|
+
# @param page [Integer] the thread number.
|
56
|
+
# @return [Array] all threads from a page.
|
57
|
+
def self.get_page(board, page)
|
58
|
+
JSON.parse(open("http://a.4cdn.org/#{board}/#{page}.json").read)['threads']
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Fourchan
|
2
|
+
module Kit
|
3
|
+
|
4
|
+
##
|
5
|
+
# Board is used to deal with a 4chan board.
|
6
|
+
class Board
|
7
|
+
attr_reader :board
|
8
|
+
|
9
|
+
def initialize(board)
|
10
|
+
if Kit.fourchan_boards.include?(board)
|
11
|
+
@name = board
|
12
|
+
@board = API.get_catalog(board)
|
13
|
+
else
|
14
|
+
raise "Not a valid board."
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Returns only the first post (OP) from the threads on a page.
|
20
|
+
#
|
21
|
+
# @param page [Integer] the page to get threads from.
|
22
|
+
# @return [Array]
|
23
|
+
def threads(page = 1)
|
24
|
+
threads = []
|
25
|
+
@board[page - 1]["threads"].each do |thread|
|
26
|
+
threads << Post.new(thread, @name)
|
27
|
+
end
|
28
|
+
threads
|
29
|
+
end
|
30
|
+
|
31
|
+
##
|
32
|
+
# Returns all threads, but not its replies, for the entire board.
|
33
|
+
#
|
34
|
+
# @return [Array]
|
35
|
+
def all_threads
|
36
|
+
all_threads = []
|
37
|
+
@board.each do |page|
|
38
|
+
all_threads << threads(page["page"])
|
39
|
+
end
|
40
|
+
all_threads.flatten
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# Returns all the posts from the threads on a page.
|
45
|
+
#
|
46
|
+
# @param page [Integer] the page to get threads from.
|
47
|
+
# @return [Array]
|
48
|
+
def posts(page = 1)
|
49
|
+
posts = []
|
50
|
+
threads = threads(page)
|
51
|
+
threads.each do |t|
|
52
|
+
thread = Thread.new(@name, t.no)
|
53
|
+
posts << thread.posts
|
54
|
+
end
|
55
|
+
posts.flatten
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Returns all posts for the entire board.
|
60
|
+
# *Note*: This method is pretty slow. Just wait for it to finish.
|
61
|
+
#
|
62
|
+
# @return [Array]
|
63
|
+
def all_posts
|
64
|
+
posts = []
|
65
|
+
@board.each_with_index do |_, i|
|
66
|
+
posts << posts(i + 1)
|
67
|
+
end
|
68
|
+
posts.flatten
|
69
|
+
end
|
70
|
+
|
71
|
+
alias_method :catalog, :board
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'fourchan/kit'
|
2
|
+
require 'thor'
|
3
|
+
|
4
|
+
module Fourchan
|
5
|
+
module Kit
|
6
|
+
|
7
|
+
class CLI < Thor
|
8
|
+
option :url, aliases: '-u', desc: 'A valid URL for a thread'
|
9
|
+
option :file, aliases: '-f', desc: 'Download images for every thread in a file'
|
10
|
+
option :out, aliases: '-o', desc: 'In what folder should the images be saved to', default: 'images'
|
11
|
+
option :quiet, aliases: '-q', desc: 'Do not output unecessary messages', type: :boolean
|
12
|
+
desc "download", "Download all images from a thread"
|
13
|
+
def download
|
14
|
+
url, file = options[:url], options[:file]
|
15
|
+
if url
|
16
|
+
Fourchan::Kit::Tools.download_thread(url, options.dup)
|
17
|
+
elsif file
|
18
|
+
Fourchan::Kit::Tools.download_threads(file, options.dup)
|
19
|
+
else
|
20
|
+
puts "I need some input to download the images. See `fourgem help download` for options."
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
option :timeout, aliases: '-t', desc: 'For how long should the thread be lurked. 0 to disable timeout', type: :numeric, default: 60
|
25
|
+
option :quiet, aliases: '-q', desc: 'Do not output unecessary messages', type: :boolean
|
26
|
+
option :refresh, aliases: '-r', desc: 'How often to check for new replies', type: :numeric, default: 30
|
27
|
+
option :out, aliases: '-o', desc: 'Where to save images', default: 'images'
|
28
|
+
# option :download, aliases: '-d', desc: 'Lurk for new images and download them', type: :boolean
|
29
|
+
# option :messages, aliases: '-m', desc: 'Lurk for new messages', type: :boolean, default: true
|
30
|
+
# option :out, aliases: '-o', desc: 'There folder to store the images', default: 'images'
|
31
|
+
desc "lurk THREAD", "Look for new messages and/or download new images"
|
32
|
+
def lurk(thread)
|
33
|
+
options[:refresh] >= 5 ? Fourchan::Kit::Tools.lurk(thread, options.dup) : puts("Be fair, have refresh >= 5")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module Fourchan
|
4
|
+
module Kit
|
5
|
+
|
6
|
+
##
|
7
|
+
# Post should make it easy to use OpenStruct for posts in a thread.
|
8
|
+
# Also make it possible to get the link for the image, if the post has one.
|
9
|
+
class Post < OpenStruct
|
10
|
+
def initialize(hash, board)
|
11
|
+
super(hash)
|
12
|
+
@board = board
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
# Return an URL for the image (if user submitted an image).
|
17
|
+
#
|
18
|
+
# @return [URL] the URL for the image.
|
19
|
+
def image_link
|
20
|
+
"http://i.4cdn.org/#{@board}/#{self.tim}#{self.ext}" if self.tim
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Fourchan
|
2
|
+
module Kit
|
3
|
+
|
4
|
+
##
|
5
|
+
# Thread is used to deal with a thread from a board.
|
6
|
+
class Thread
|
7
|
+
attr_reader :thread, :board
|
8
|
+
|
9
|
+
def initialize(board, thread)
|
10
|
+
@posts = []
|
11
|
+
@board = board
|
12
|
+
@thread = API.get_thread(board, thread)
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
# Returns all posts from the thread, including OP.
|
17
|
+
def posts
|
18
|
+
if @posts.empty?
|
19
|
+
@thread.each do |post|
|
20
|
+
@posts << Post.new(post, @board)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
@posts
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Return only the first post from the thread.
|
28
|
+
def op
|
29
|
+
self.posts[0]
|
30
|
+
end
|
31
|
+
|
32
|
+
##
|
33
|
+
# Get all replies from the thread. OP is not included.
|
34
|
+
# It then returns the replies.
|
35
|
+
def fetch_replies
|
36
|
+
@posts = []
|
37
|
+
@thread = API.get_thread(@board, self.op.no)
|
38
|
+
self.replies
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
# Return all the replies. OP is not included.
|
43
|
+
def replies
|
44
|
+
self.posts[1..-1]
|
45
|
+
end
|
46
|
+
|
47
|
+
##
|
48
|
+
# Returns an array of image URLs from the thread (see {Fourgem::Post#image_link}).
|
49
|
+
def images
|
50
|
+
images = []
|
51
|
+
self.posts.each do |post|
|
52
|
+
images << post.image_link
|
53
|
+
end
|
54
|
+
images.compact
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
module Fourchan
|
5
|
+
module Kit
|
6
|
+
|
7
|
+
module Tools
|
8
|
+
$agent = Mechanize.new
|
9
|
+
|
10
|
+
|
11
|
+
##
|
12
|
+
# Downloads the image from an URL.
|
13
|
+
#
|
14
|
+
# @param link [URL] the URL where the image is.
|
15
|
+
def self.download_image(link, options = {})
|
16
|
+
options[:fsize] ||= 0
|
17
|
+
options[:name] ||= link.split('/').last
|
18
|
+
options[:out] ||= "#{Dir.pwd}/images"
|
19
|
+
options[:quiet] ||= false
|
20
|
+
|
21
|
+
image = "#{create_dir(options[:out])}/#{options[:name]}"
|
22
|
+
unless File.exists?(image)
|
23
|
+
if valid_link?(link)
|
24
|
+
output = "Downloading: #{link}" unless options[:quiet]
|
25
|
+
output << (options[:fsize].zero? ? "" : " @ " << "#{(options[:fsize] / 1024.0).round(2)}kB".rjust(9))
|
26
|
+
puts output
|
27
|
+
$agent.get(link).save(image)
|
28
|
+
end
|
29
|
+
else
|
30
|
+
puts "Already got image, skipping" unless options[:quiet]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
##
|
35
|
+
# Downloads every image from a thread.
|
36
|
+
#
|
37
|
+
# @param link [URL] the URL for the thread to download.
|
38
|
+
def self.download_thread(link, options = {})
|
39
|
+
options[:checked] ||= false
|
40
|
+
|
41
|
+
if options[:checked] || ( valid_thread?(link) && valid_link?(link) )
|
42
|
+
board, thread_no = get_info(link)
|
43
|
+
thread = Thread.new(board, thread_no)
|
44
|
+
|
45
|
+
thread.posts.each do |post|
|
46
|
+
options[:fsize] = post.fsize
|
47
|
+
download_image(post.image_link, options.dup) if post.image_link
|
48
|
+
end
|
49
|
+
else
|
50
|
+
puts "Not a 4chan thread" unless options[:quiet]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
##
|
55
|
+
# Download all images from each thread in a file.
|
56
|
+
#
|
57
|
+
# Each thread must be on its own line and only be the URL, nothing else.
|
58
|
+
# For example:
|
59
|
+
# # threads.txt
|
60
|
+
# http://boards.4chan.org/wg/thread/5777567
|
61
|
+
# http://boards.4chan.org/wg/thread/5776602
|
62
|
+
#
|
63
|
+
# It takes care of dead threads or wrong URLs.
|
64
|
+
#
|
65
|
+
# @param file [File] the location of the file.
|
66
|
+
def self.download_threads(file, options = {})
|
67
|
+
options[:quiet] ||= false
|
68
|
+
|
69
|
+
if File.exists?(file)
|
70
|
+
File.open(file, 'r').each_line do |link|
|
71
|
+
puts "Getting images from thread: #{link}" unless options[:quiet]
|
72
|
+
if valid_thread?(link) && valid_link?(link)
|
73
|
+
options[:out] = "images/#{link.scan(/(\d+)$/).first.first}"
|
74
|
+
options[:checked] = true
|
75
|
+
download_thread(link, options)
|
76
|
+
puts
|
77
|
+
else
|
78
|
+
puts "Not a 4chan thread" unless options[:quiet]
|
79
|
+
puts
|
80
|
+
end
|
81
|
+
end
|
82
|
+
else
|
83
|
+
puts "Not able to find the input file"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
##
|
88
|
+
# Check the thread for new images every x seconds.
|
89
|
+
#
|
90
|
+
# - The refresh rate is determined by options[:refresh] and is an integer.
|
91
|
+
# - The time to lurk is determined by options[:timeout] and is an integer.
|
92
|
+
#
|
93
|
+
# @param link [URL] the thread to lurk
|
94
|
+
def self.lurk(link, options = {})
|
95
|
+
puts "Started lurking #{link}"
|
96
|
+
|
97
|
+
downloaded = []
|
98
|
+
board, thread_no = get_info(link)
|
99
|
+
thread = Thread.new(board, thread_no)
|
100
|
+
|
101
|
+
download_image(thread.op.image_link, options.dup)
|
102
|
+
|
103
|
+
begin
|
104
|
+
timeout(options[:timeout]) do
|
105
|
+
loop do
|
106
|
+
puts "Checking for images" unless options[:quiet]
|
107
|
+
new = thread.fetch_replies
|
108
|
+
|
109
|
+
(new - downloaded).each do |post|
|
110
|
+
options[:fsize] = post.fsize
|
111
|
+
download_image(post.image_link, options.dup) if post.image_link
|
112
|
+
|
113
|
+
downloaded << post
|
114
|
+
end
|
115
|
+
|
116
|
+
sleep(options[:refresh])
|
117
|
+
end
|
118
|
+
end
|
119
|
+
rescue Timeout::Error
|
120
|
+
puts "Timeout after #{options[:timeout]} second(s)"
|
121
|
+
exit 0
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
private
|
126
|
+
def self.create_dir(directory)
|
127
|
+
FileUtils.mkdir_p(directory) unless File.exists?(directory)
|
128
|
+
Pathname.new(directory).realpath.to_s
|
129
|
+
end
|
130
|
+
|
131
|
+
def self.get_info(link)
|
132
|
+
board = link.scan(/(\w+)\/thread\//).first.first
|
133
|
+
thread = link.scan(/\/thread\/([0-9]+)/).first.first.to_i
|
134
|
+
[board, thread]
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.valid_thread?(link)
|
138
|
+
link =~ /boards.4chan.org\/\w+\/thread\/\d+$/ ? true : false
|
139
|
+
end
|
140
|
+
|
141
|
+
def self.valid_link?(link)
|
142
|
+
begin
|
143
|
+
if link =~ /^#{URI::regexp(['http', 'https'])}$/
|
144
|
+
begin
|
145
|
+
$agent.get(link)
|
146
|
+
rescue Mechanize::ResponseCodeError
|
147
|
+
return false
|
148
|
+
end
|
149
|
+
else
|
150
|
+
return false
|
151
|
+
end
|
152
|
+
|
153
|
+
true
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|