fourchan-kit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +3 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +99 -0
  8. data/Rakefile +7 -0
  9. data/bin/fourchan +4 -0
  10. data/features/cassettes/Fourchan/I_want_to_download_a_thread.yml +1668 -0
  11. data/features/fourchan.feature +24 -0
  12. data/features/step_definitions/cli_steps.rb +11 -0
  13. data/features/support/env.rb +39 -0
  14. data/fourchan-kit.gemspec +32 -0
  15. data/lib/fourchan/kit.rb +34 -0
  16. data/lib/fourchan/kit/api.rb +62 -0
  17. data/lib/fourchan/kit/board.rb +74 -0
  18. data/lib/fourchan/kit/cli.rb +37 -0
  19. data/lib/fourchan/kit/post.rb +24 -0
  20. data/lib/fourchan/kit/thread.rb +58 -0
  21. data/lib/fourchan/kit/tools.rb +158 -0
  22. data/lib/fourchan/kit/version.rb +5 -0
  23. data/spec/cassettes/Fourchan_Kit/should_be_able_to_get_an_array_of_board_names.yml +69 -0
  24. data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_info_for_all_boards.yml +69 -0
  25. data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_the_catalog_for_a_board.yml +2427 -0
  26. data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_the_posts_from_a_thread.yml +68 -0
  27. data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_the_threads_for_a_board.yml +46 -0
  28. data/spec/cassettes/Fourchan_Kit_API/should_be_able_to_get_threads_from_a_page.yml +274 -0
  29. data/spec/cassettes/Fourchan_Kit_Board/and_a_total_of_7685_posts.yml +31381 -0
  30. data/spec/cassettes/Fourchan_Kit_Board/and_have_15_threads_per_page.yml +2464 -0
  31. data/spec/cassettes/Fourchan_Kit_Board/and_should_have_a_total_of_150_threads.yml +2464 -0
  32. data/spec/cassettes/Fourchan_Kit_Board/should_have_10_pages.yml +2530 -0
  33. data/spec/cassettes/Fourchan_Kit_Board/should_have_908_posts_on_the_first_page.yml +6038 -0
  34. data/spec/cassettes/Fourchan_Kit_Thread/and_return_nothing_if_poster_didn_t_submit_one.yml +68 -0
  35. data/spec/cassettes/Fourchan_Kit_Thread/should_have_posts.yml +68 -0
  36. data/spec/cassettes/Fourchan_Kit_Thread/should_have_replies.yml +68 -0
  37. data/spec/cassettes/Fourchan_Kit_Thread/should_have_some_images.yml +68 -0
  38. data/spec/cassettes/Fourchan_Kit_Thread/when_using_op/should_have_a_name.yml +68 -0
  39. data/spec/cassettes/Fourchan_Kit_Thread/when_using_op/with_a_link_to_the_image.yml +68 -0
  40. data/spec/cassettes/Fourchan_Kit_Tools/should_download_a_list_of_threads.yml +1942 -0
  41. data/spec/cassettes/Fourchan_Kit_Tools/should_download_an_image.yml +599 -0
  42. data/spec/cassettes/Fourchan_Kit_Tools/should_download_images_from_a_thread.yml +823 -0
  43. data/spec/cassettes/Fourchan_Kit_Tools/should_verify_it_s_not_a_dead_thread.yml +54 -0
  44. data/spec/cassettes/Fourchan_Kit_Tools/when_it_is_dead/should_handle_that.yml +108 -0
  45. data/spec/fourchan_kit_api_spec.rb +35 -0
  46. data/spec/fourchan_kit_board_spec.rb +28 -0
  47. data/spec/fourchan_kit_spec.rb +10 -0
  48. data/spec/fourchan_kit_thread_spec.rb +33 -0
  49. data/spec/fourchan_kit_tools_spec.rb +59 -0
  50. data/spec/spec_helper.rb +17 -0
  51. data/spec/threads.txt +2 -0
  52. metadata +270 -0
@@ -0,0 +1,24 @@
1
+ Feature: Fourchan
2
+ In order to be useful
3
+ As a CLI
4
+ I want to be able to do stuff
5
+
6
+ Scenario: I need some help
7
+ When I run `fourchan help`
8
+ Then the output should contain "fourchan download"
9
+
10
+ Scenario: I need some help to download
11
+ When I run `fourchan help download`
12
+ Then the output should contain "A valid URL for a thread"
13
+
14
+ Scenario: I need some help to download
15
+ When I run `fourchan help lurk`
16
+ Then the output should contain "Where to save images"
17
+
18
+ @vcr
19
+ Scenario: I want to download a thread
20
+ When I run `fourchan download -u http://boards.4chan.org/g/thread/41705021`
21
+ Then the following folders should exist:
22
+ | tmp/aruba/images |
23
+ And the following folders should have "2" files combined:
24
+ | tmp/aruba/images |
@@ -0,0 +1,11 @@
1
+ Then /^the following folders should exist:$/ do |folders|
2
+ folders = folders.raw.flatten
3
+ folders.each { |f| File.exists?(f).should == true }
4
+ end
5
+
6
+ And /^the following folders should have "(\d+)" files combined:$/ do |count, folders|
7
+ folders = folders.raw.flatten
8
+ files = 0
9
+ folders.each { |f| files += Dir["#{f}/**/*"].length }
10
+ files.should == count.to_i
11
+ end
@@ -0,0 +1,39 @@
1
+ require 'aruba/cucumber'
2
+ require 'aruba/in_process'
3
+ require 'fourchan/kit/cli'
4
+ require 'vcr'
5
+ require 'webmock'
6
+
7
+ VCR.configure do |c|
8
+ c.cassette_library_dir = 'features/cassettes'
9
+ c.hook_into :webmock
10
+ end
11
+
12
+ VCR.cucumber_tags do |t|
13
+ t.tag '@vcr', use_scenario_name: true
14
+ end
15
+
16
+ # Magic by http://georgemcintosh.com/vcr-and-aruba/
17
+ class VcrFriendlyMain
18
+ def initialize(argv, stdin, stdout, stderr, kernel)
19
+ @argv, @stdin, @stdout, @stderr, @kernel = argv, stdin, stdout, stderr, kernel
20
+ end
21
+
22
+ def execute!
23
+ $stdin = @stdin
24
+ $stdout = @stdout
25
+ Fourchan::Kit::CLI.start(@argv)
26
+ end
27
+ end
28
+
29
+ Before('@vcr') do
30
+ Aruba::InProcess.main_class = VcrFriendlyMain
31
+ Aruba.process = Aruba::InProcess
32
+ end
33
+
34
+ After('@vcr') do
35
+ Aruba.process = Aruba::SpawnProcess
36
+ VCR.eject_cassette
37
+ $stdin = STDIN
38
+ $stdout = STDOUT
39
+ end
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fourchan/kit/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fourchan-kit"
8
+ spec.version = Fourchan::Kit::VERSION
9
+ spec.authors = ["lauritzsh"]
10
+ spec.email = ["mail@lauritz.me"]
11
+ spec.summary = %q{A tool and API wrapper for the 4chan API.}
12
+ spec.description = %q{Fourchan Kit is a Ruby wrapper and tool for the 4chan API. Use Fourchan Kit to interact with the API using Ruby, or use the tool to interact with the threads on 4chan.}
13
+ spec.homepage = "http://rubygems.org/gems/fourchan-kit"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "json", "~> 1.8"
22
+ spec.add_dependency "mechanize", "~> 2.7"
23
+ spec.add_dependency "thor", "~> 0.19"
24
+
25
+ spec.add_development_dependency "aruba", "~> 0.5"
26
+ spec.add_development_dependency "bundler", "~> 1.6"
27
+ spec.add_development_dependency "cucumber", "~> 1.3"
28
+ spec.add_development_dependency "rake", "~> 10.3"
29
+ spec.add_development_dependency "rspec", "~> 2.14"
30
+ spec.add_development_dependency "vcr", "~> 2.9"
31
+ spec.add_development_dependency "webmock", "~> 1.17"
32
+ end
@@ -0,0 +1,34 @@
1
+ require "fourchan/kit/api"
2
+ require "fourchan/kit/board"
3
+ require "fourchan/kit/post"
4
+ require "fourchan/kit/thread"
5
+ require "fourchan/kit/tools"
6
+ require "fourchan/kit/version"
7
+
8
+ module Fourchan
9
+ module Kit
10
+ $fourchan_boards = []
11
+
12
+ ##
13
+ # Returns an array of all boards' title, such as _b_, _g_, _fit_ etc.
14
+ #
15
+ # @return [Array] name of all boards
16
+ def self.fourchan_boards
17
+ fetch_fourchan_boards if $fourchan_boards.empty?
18
+ $fourchan_boards
19
+ end
20
+
21
+ ##
22
+ # Updates the list of boards and returns an array of the boards.
23
+ #
24
+ # @return [Array] name of all boards
25
+ def self.fetch_fourchan_boards
26
+ puts "Fetching all 4chan boards"
27
+ $fourchan_boards = []
28
+ Fourchan::Kit::API.get_boards.each do |board|
29
+ $fourchan_boards << board["board"]
30
+ end
31
+ $fourchan_boards
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,62 @@
1
+ require 'json'
2
+ require 'open-uri'
3
+
4
+ module Fourchan
5
+ module Kit
6
+
7
+ ##
8
+ # This module contains methods for the 4chan API.
9
+ # They all parse the JSON 4chan delivers and returns a Hash object.
10
+ module API
11
+
12
+ ##
13
+ # Returns information for all boards across 4chan.
14
+ #
15
+ # @return [Array] information for all boards.
16
+ def self.get_boards
17
+ JSON.parse(open("http://a.4cdn.org/boards.json").read)['boards']
18
+ end
19
+
20
+ ##
21
+ # Returns information for all threads on specified board.
22
+ #
23
+ # @param board [String] the board.
24
+ # @return [Array] all threads for a board.
25
+ def self.get_catalog(board)
26
+ JSON.parse(open("http://a.4cdn.org/#{board}/catalog.json").read)
27
+ end
28
+
29
+ ##
30
+ # Returns only id and time for threads on specified board.
31
+ #
32
+ # @param board [String] the board.
33
+ # @return [Array] the id and time for all threads.
34
+ def self.get_threads(board)
35
+ JSON.parse(open("http://a.4cdn.org/#{board}/threads.json").read)
36
+ end
37
+
38
+ ##
39
+ # Returns all posts for the specified thread.
40
+ #
41
+ # @param board [String] the board.
42
+ # @param thread [Integer] the thread number.
43
+ # @return [Array] the posts in from a thread.
44
+ def self.get_thread(board, thread)
45
+ JSON.parse(open("http://a.4cdn.org/#{board}/thread/#{thread}.json").read)['posts']
46
+ end
47
+
48
+ ##
49
+ # Returns the threads at a page number on specified board.
50
+ #
51
+ # 4chan stopped using zero-index pages in April. Instead of first page
52
+ # is at 0, it is now at 1. 0 returns nothing.
53
+ #
54
+ # @param board [String] the board.
55
+ # @param page [Integer] the thread number.
56
+ # @return [Array] all threads from a page.
57
+ def self.get_page(board, page)
58
+ JSON.parse(open("http://a.4cdn.org/#{board}/#{page}.json").read)['threads']
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,74 @@
1
+ module Fourchan
2
+ module Kit
3
+
4
+ ##
5
+ # Board is used to deal with a 4chan board.
6
+ class Board
7
+ attr_reader :board
8
+
9
+ def initialize(board)
10
+ if Kit.fourchan_boards.include?(board)
11
+ @name = board
12
+ @board = API.get_catalog(board)
13
+ else
14
+ raise "Not a valid board."
15
+ end
16
+ end
17
+
18
+ ##
19
+ # Returns only the first post (OP) from the threads on a page.
20
+ #
21
+ # @param page [Integer] the page to get threads from.
22
+ # @return [Array]
23
+ def threads(page = 1)
24
+ threads = []
25
+ @board[page - 1]["threads"].each do |thread|
26
+ threads << Post.new(thread, @name)
27
+ end
28
+ threads
29
+ end
30
+
31
+ ##
32
+ # Returns all threads, but not its replies, for the entire board.
33
+ #
34
+ # @return [Array]
35
+ def all_threads
36
+ all_threads = []
37
+ @board.each do |page|
38
+ all_threads << threads(page["page"])
39
+ end
40
+ all_threads.flatten
41
+ end
42
+
43
+ ##
44
+ # Returns all the posts from the threads on a page.
45
+ #
46
+ # @param page [Integer] the page to get threads from.
47
+ # @return [Array]
48
+ def posts(page = 1)
49
+ posts = []
50
+ threads = threads(page)
51
+ threads.each do |t|
52
+ thread = Thread.new(@name, t.no)
53
+ posts << thread.posts
54
+ end
55
+ posts.flatten
56
+ end
57
+
58
+ ##
59
+ # Returns all posts for the entire board.
60
+ # *Note*: This method is pretty slow. Just wait for it to finish.
61
+ #
62
+ # @return [Array]
63
+ def all_posts
64
+ posts = []
65
+ @board.each_with_index do |_, i|
66
+ posts << posts(i + 1)
67
+ end
68
+ posts.flatten
69
+ end
70
+
71
+ alias_method :catalog, :board
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,37 @@
1
+ require 'fourchan/kit'
2
+ require 'thor'
3
+
4
+ module Fourchan
5
+ module Kit
6
+
7
+ class CLI < Thor
8
+ option :url, aliases: '-u', desc: 'A valid URL for a thread'
9
+ option :file, aliases: '-f', desc: 'Download images for every thread in a file'
10
+ option :out, aliases: '-o', desc: 'In what folder should the images be saved to', default: 'images'
11
+ option :quiet, aliases: '-q', desc: 'Do not output unecessary messages', type: :boolean
12
+ desc "download", "Download all images from a thread"
13
+ def download
14
+ url, file = options[:url], options[:file]
15
+ if url
16
+ Fourchan::Kit::Tools.download_thread(url, options.dup)
17
+ elsif file
18
+ Fourchan::Kit::Tools.download_threads(file, options.dup)
19
+ else
20
+ puts "I need some input to download the images. See `fourgem help download` for options."
21
+ end
22
+ end
23
+
24
+ option :timeout, aliases: '-t', desc: 'For how long should the thread be lurked. 0 to disable timeout', type: :numeric, default: 60
25
+ option :quiet, aliases: '-q', desc: 'Do not output unecessary messages', type: :boolean
26
+ option :refresh, aliases: '-r', desc: 'How often to check for new replies', type: :numeric, default: 30
27
+ option :out, aliases: '-o', desc: 'Where to save images', default: 'images'
28
+ # option :download, aliases: '-d', desc: 'Lurk for new images and download them', type: :boolean
29
+ # option :messages, aliases: '-m', desc: 'Lurk for new messages', type: :boolean, default: true
30
+ # option :out, aliases: '-o', desc: 'There folder to store the images', default: 'images'
31
+ desc "lurk THREAD", "Look for new messages and/or download new images"
32
+ def lurk(thread)
33
+ options[:refresh] >= 5 ? Fourchan::Kit::Tools.lurk(thread, options.dup) : puts("Be fair, have refresh >= 5")
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,24 @@
1
+ require 'ostruct'
2
+
3
+ module Fourchan
4
+ module Kit
5
+
6
+ ##
7
+ # Post should make it easy to use OpenStruct for posts in a thread.
8
+ # Also make it possible to get the link for the image, if the post has one.
9
+ class Post < OpenStruct
10
+ def initialize(hash, board)
11
+ super(hash)
12
+ @board = board
13
+ end
14
+
15
+ ##
16
+ # Return an URL for the image (if user submitted an image).
17
+ #
18
+ # @return [URL] the URL for the image.
19
+ def image_link
20
+ "http://i.4cdn.org/#{@board}/#{self.tim}#{self.ext}" if self.tim
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,58 @@
1
+ module Fourchan
2
+ module Kit
3
+
4
+ ##
5
+ # Thread is used to deal with a thread from a board.
6
+ class Thread
7
+ attr_reader :thread, :board
8
+
9
+ def initialize(board, thread)
10
+ @posts = []
11
+ @board = board
12
+ @thread = API.get_thread(board, thread)
13
+ end
14
+
15
+ ##
16
+ # Returns all posts from the thread, including OP.
17
+ def posts
18
+ if @posts.empty?
19
+ @thread.each do |post|
20
+ @posts << Post.new(post, @board)
21
+ end
22
+ end
23
+ @posts
24
+ end
25
+
26
+ ##
27
+ # Return only the first post from the thread.
28
+ def op
29
+ self.posts[0]
30
+ end
31
+
32
+ ##
33
+ # Get all replies from the thread. OP is not included.
34
+ # It then returns the replies.
35
+ def fetch_replies
36
+ @posts = []
37
+ @thread = API.get_thread(@board, self.op.no)
38
+ self.replies
39
+ end
40
+
41
+ ##
42
+ # Return all the replies. OP is not included.
43
+ def replies
44
+ self.posts[1..-1]
45
+ end
46
+
47
+ ##
48
+ # Returns an array of image URLs from the thread (see {Fourgem::Post#image_link}).
49
+ def images
50
+ images = []
51
+ self.posts.each do |post|
52
+ images << post.image_link
53
+ end
54
+ images.compact
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,158 @@
1
+ require 'mechanize'
2
+ require 'pathname'
3
+
4
+ module Fourchan
5
+ module Kit
6
+
7
+ module Tools
8
+ $agent = Mechanize.new
9
+
10
+
11
+ ##
12
+ # Downloads the image from an URL.
13
+ #
14
+ # @param link [URL] the URL where the image is.
15
+ def self.download_image(link, options = {})
16
+ options[:fsize] ||= 0
17
+ options[:name] ||= link.split('/').last
18
+ options[:out] ||= "#{Dir.pwd}/images"
19
+ options[:quiet] ||= false
20
+
21
+ image = "#{create_dir(options[:out])}/#{options[:name]}"
22
+ unless File.exists?(image)
23
+ if valid_link?(link)
24
+ output = "Downloading: #{link}" unless options[:quiet]
25
+ output << (options[:fsize].zero? ? "" : " @ " << "#{(options[:fsize] / 1024.0).round(2)}kB".rjust(9))
26
+ puts output
27
+ $agent.get(link).save(image)
28
+ end
29
+ else
30
+ puts "Already got image, skipping" unless options[:quiet]
31
+ end
32
+ end
33
+
34
+ ##
35
+ # Downloads every image from a thread.
36
+ #
37
+ # @param link [URL] the URL for the thread to download.
38
+ def self.download_thread(link, options = {})
39
+ options[:checked] ||= false
40
+
41
+ if options[:checked] || ( valid_thread?(link) && valid_link?(link) )
42
+ board, thread_no = get_info(link)
43
+ thread = Thread.new(board, thread_no)
44
+
45
+ thread.posts.each do |post|
46
+ options[:fsize] = post.fsize
47
+ download_image(post.image_link, options.dup) if post.image_link
48
+ end
49
+ else
50
+ puts "Not a 4chan thread" unless options[:quiet]
51
+ end
52
+ end
53
+
54
+ ##
55
+ # Download all images from each thread in a file.
56
+ #
57
+ # Each thread must be on its own line and only be the URL, nothing else.
58
+ # For example:
59
+ # # threads.txt
60
+ # http://boards.4chan.org/wg/thread/5777567
61
+ # http://boards.4chan.org/wg/thread/5776602
62
+ #
63
+ # It takes care of dead threads or wrong URLs.
64
+ #
65
+ # @param file [File] the location of the file.
66
+ def self.download_threads(file, options = {})
67
+ options[:quiet] ||= false
68
+
69
+ if File.exists?(file)
70
+ File.open(file, 'r').each_line do |link|
71
+ puts "Getting images from thread: #{link}" unless options[:quiet]
72
+ if valid_thread?(link) && valid_link?(link)
73
+ options[:out] = "images/#{link.scan(/(\d+)$/).first.first}"
74
+ options[:checked] = true
75
+ download_thread(link, options)
76
+ puts
77
+ else
78
+ puts "Not a 4chan thread" unless options[:quiet]
79
+ puts
80
+ end
81
+ end
82
+ else
83
+ puts "Not able to find the input file"
84
+ end
85
+ end
86
+
87
+ ##
88
+ # Check the thread for new images every x seconds.
89
+ #
90
+ # - The refresh rate is determined by options[:refresh] and is an integer.
91
+ # - The time to lurk is determined by options[:timeout] and is an integer.
92
+ #
93
+ # @param link [URL] the thread to lurk
94
+ def self.lurk(link, options = {})
95
+ puts "Started lurking #{link}"
96
+
97
+ downloaded = []
98
+ board, thread_no = get_info(link)
99
+ thread = Thread.new(board, thread_no)
100
+
101
+ download_image(thread.op.image_link, options.dup)
102
+
103
+ begin
104
+ timeout(options[:timeout]) do
105
+ loop do
106
+ puts "Checking for images" unless options[:quiet]
107
+ new = thread.fetch_replies
108
+
109
+ (new - downloaded).each do |post|
110
+ options[:fsize] = post.fsize
111
+ download_image(post.image_link, options.dup) if post.image_link
112
+
113
+ downloaded << post
114
+ end
115
+
116
+ sleep(options[:refresh])
117
+ end
118
+ end
119
+ rescue Timeout::Error
120
+ puts "Timeout after #{options[:timeout]} second(s)"
121
+ exit 0
122
+ end
123
+ end
124
+
125
+ private
126
+ def self.create_dir(directory)
127
+ FileUtils.mkdir_p(directory) unless File.exists?(directory)
128
+ Pathname.new(directory).realpath.to_s
129
+ end
130
+
131
+ def self.get_info(link)
132
+ board = link.scan(/(\w+)\/thread\//).first.first
133
+ thread = link.scan(/\/thread\/([0-9]+)/).first.first.to_i
134
+ [board, thread]
135
+ end
136
+
137
+ def self.valid_thread?(link)
138
+ link =~ /boards.4chan.org\/\w+\/thread\/\d+$/ ? true : false
139
+ end
140
+
141
+ def self.valid_link?(link)
142
+ begin
143
+ if link =~ /^#{URI::regexp(['http', 'https'])}$/
144
+ begin
145
+ $agent.get(link)
146
+ rescue Mechanize::ResponseCodeError
147
+ return false
148
+ end
149
+ else
150
+ return false
151
+ end
152
+
153
+ true
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end