rubychan 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm default@rubychan
@@ -0,0 +1,2 @@
1
+ language: ruby
2
+ rvm: 1.9.2
data/Gemfile ADDED
@@ -0,0 +1,18 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "thor"
6
+ gem "nokogiri"
7
+ gem "fakeweb"
8
+
9
+ # Add dependencies to develop your gem here.
10
+ # Include everything needed to run rake, tests, features, etc.
11
+ group :development do
12
+ gem "rspec", "~> 2.8.0"
13
+ gem "rdoc", "~> 3.12"
14
+ gem "cucumber", ">= 0"
15
+ gem "bundler", "~> 1.0.0"
16
+ gem "jeweler", "~> 1.8.3"
17
+ gem "simplecov", ">= 0"
18
+ end
@@ -0,0 +1,54 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ builder (3.0.0)
5
+ cucumber (1.1.9)
6
+ builder (>= 2.1.2)
7
+ diff-lcs (>= 1.1.2)
8
+ gherkin (~> 2.9.0)
9
+ json (>= 1.4.6)
10
+ term-ansicolor (>= 1.0.6)
11
+ diff-lcs (1.1.3)
12
+ fakeweb (1.3.0)
13
+ gherkin (2.9.0)
14
+ json (>= 1.4.6)
15
+ git (1.2.5)
16
+ jeweler (1.8.3)
17
+ bundler (~> 1.0)
18
+ git (>= 1.2.5)
19
+ rake
20
+ rdoc
21
+ json (1.6.5)
22
+ multi_json (1.1.0)
23
+ nokogiri (1.5.2)
24
+ rake (0.9.2.2)
25
+ rdoc (3.12)
26
+ json (~> 1.4)
27
+ rspec (2.8.0)
28
+ rspec-core (~> 2.8.0)
29
+ rspec-expectations (~> 2.8.0)
30
+ rspec-mocks (~> 2.8.0)
31
+ rspec-core (2.8.0)
32
+ rspec-expectations (2.8.0)
33
+ diff-lcs (~> 1.1.2)
34
+ rspec-mocks (2.8.0)
35
+ simplecov (0.6.1)
36
+ multi_json (~> 1.0)
37
+ simplecov-html (~> 0.5.3)
38
+ simplecov-html (0.5.3)
39
+ term-ansicolor (1.0.7)
40
+ thor (0.14.6)
41
+
42
+ PLATFORMS
43
+ ruby
44
+
45
+ DEPENDENCIES
46
+ bundler (~> 1.0.0)
47
+ cucumber
48
+ fakeweb
49
+ jeweler (~> 1.8.3)
50
+ nokogiri
51
+ rdoc (~> 3.12)
52
+ rspec (~> 2.8.0)
53
+ simplecov
54
+ thor
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Blaine Pace
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ = rubychan
2
+
3
+ RubyChan is a command line tool and library for scraping 4Chan threads.
4
+
5
+ == Contributing to rubychan
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
9
+ * Fork the project.
10
+ * Start a feature/bugfix branch.
11
+ * Commit and push until you are happy with your contribution.
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2012 Blaine Pace. See LICENSE.txt for
18
+ further details.
19
+
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "rubychan"
18
+ gem.homepage = "http://github.com/sensae/rubychan"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{4Chan Scraping Toolkit}
21
+ gem.description = %Q{A script and library for scraping 4chan threads}
22
+ gem.email = "blainepace@gmail.com"
23
+ gem.authors = ["Blaine Pace"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ require 'cucumber/rake/task'
40
+ Cucumber::Rake::Task.new(:features)
41
+
42
+ task :default => :spec
43
+
44
+ require 'rdoc/task'
45
+ Rake::RDocTask.new do |rdoc|
46
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
47
+
48
+ rdoc.rdoc_dir = 'rdoc'
49
+ rdoc.title = "rubychan #{version}"
50
+ rdoc.rdoc_files.include('README*')
51
+ rdoc.rdoc_files.include('lib/**/*.rb')
52
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.1
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubychan'
4
+
5
+ RubyChan::Runner.start
@@ -0,0 +1,8 @@
1
+ Feature: Scraping 4chan
2
+ In order to scrape 4chan
3
+ A user presents a URI to a thread
4
+
5
+ Scenario: Scraping a URI
6
+ Given a valid URI
7
+ When I call scrape
8
+ Then I should see output
@@ -0,0 +1,21 @@
1
+ Given /^a valid URI$/ do
2
+ @uri = "http://4chan.org"
3
+ @html = <<-eos
4
+ <html>
5
+ <body>
6
+ <a href="http://images.4chan.org/a/src/1.jpg"></a>
7
+ <a href="http://images.4chan.org/b/src/2.jpg"></a>
8
+ </body>
9
+ </html>
10
+ eos
11
+ FakeWeb.register_uri(:get, "http://4chan.org", :body => @html)
12
+ end
13
+
14
+ When /^I call scrape$/ do
15
+ @runner = RubyChan::Runner.new
16
+ @runner.scrape(@uri)
17
+ end
18
+
19
+ Then /^I should see output$/ do
20
+ STDOUT.should_not be_nil
21
+ end
@@ -0,0 +1,17 @@
1
+ require 'fakeweb'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+
11
+ STDOUT = $stdout = StringIO.new
12
+ STDERR = $stderr = StringIO.new
13
+
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
15
+ require 'rubychan'
16
+
17
+ require 'rspec/expectations'
@@ -0,0 +1,14 @@
1
+ require 'thor'
2
+
3
+ require 'scraper'
4
+
5
+ module RubyChan
6
+ class Runner < Thor
7
+
8
+ desc "Scrape URL", "Scrape a 4chan URL"
9
+ def scrape(uri)
10
+ @scraper = RubyChan::Scraper.new(uri)
11
+ @scraper.scrape
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,27 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+
4
+ module RubyChan
5
+ class Scraper
6
+
7
+ def initialize(uri)
8
+ @uri = uri
9
+ end
10
+
11
+ def scrape
12
+ doc = Nokogiri::HTML(open(@uri))
13
+ doc.xpath('//a').each do |link|
14
+ if(link['href'] =~ /\/\/images.4chan.org\/.+\/src\/\d+\..../)
15
+ uri = URI(link['href'])
16
+ puts "Downloading #{uri}"
17
+ Net::HTTP.start(uri.host) do |http|
18
+ resp = http.get(uri.path)
19
+ open(File.basename(uri.path), "wb") do |file|
20
+ file.write(resp.body)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,25 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Rubychan" do
4
+ before :each do
5
+ @runner = RubyChan::Runner.new
6
+ @scraper = mock("Scraper").as_null_object
7
+ RubyChan::Scraper.stub!(:new).and_return(@scraper)
8
+ @uri = "http://4chan.org"
9
+ end
10
+
11
+ context "#scrape" do
12
+
13
+ it "should scrape a URI" do
14
+ @scraper.should_receive(:scrape)
15
+ @runner.scrape(@uri)
16
+ end
17
+
18
+ it "should print imagelinks" do
19
+ @imagelinks = "Image Links!"
20
+ @scraper.stub!(:imagelinks).and_return(@imagelinks)
21
+ STDOUT.should_receive(:puts).with(@imagelinks)
22
+ @runner.scrape(@uri)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,29 @@
1
+ require 'spec_helper'
2
+ require 'fakeweb'
3
+
4
+ describe RubyChan::Scraper do
5
+
6
+ before :each do
7
+ @uri = "http://4chan.org"
8
+ @html = <<-eos
9
+ <html>
10
+ <body>
11
+ <a href="http://images.4chan.org/a/src/1.jpg"></a>
12
+ <a href="http://images.4chan.org/b/src/2.jpg"></a>
13
+ </body>
14
+ </html>
15
+ eos
16
+ FakeWeb.register_uri(:get, "http://4chan.org", :body => @html)
17
+ @scraper = RubyChan::Scraper.new(@uri)
18
+ end
19
+
20
+ it "should open a URI" do
21
+ @scraper.should_receive(:open).with(@uri)
22
+ @scraper.scrape
23
+ end
24
+
25
+ it "should extract image URIs from the page source" do
26
+ @scraper.scrape
27
+ @scraper.imagelinks.should =~ /http:\/\/images.4chan.org\/.\/src\/.+/
28
+ end
29
+ end
@@ -0,0 +1,15 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'rubychan'
5
+
6
+ STDOUT = $stdout = StringIO.new
7
+ STDERR = $stderr = StringIO.new
8
+
9
+ # Requires supporting files with custom matchers and macros, etc,
10
+ # in ./support/ and its subdirectories.
11
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
12
+
13
+ RSpec.configure do |config|
14
+
15
+ end
metadata ADDED
@@ -0,0 +1,169 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubychan
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Blaine Pace
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-16 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: thor
16
+ requirement: &70172203969060 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70172203969060
25
+ - !ruby/object:Gem::Dependency
26
+ name: nokogiri
27
+ requirement: &70172203966900 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70172203966900
36
+ - !ruby/object:Gem::Dependency
37
+ name: fakeweb
38
+ requirement: &70172203964980 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70172203964980
47
+ - !ruby/object:Gem::Dependency
48
+ name: rspec
49
+ requirement: &70172203961940 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 2.8.0
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *70172203961940
58
+ - !ruby/object:Gem::Dependency
59
+ name: rdoc
60
+ requirement: &70172203944300 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ~>
64
+ - !ruby/object:Gem::Version
65
+ version: '3.12'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *70172203944300
69
+ - !ruby/object:Gem::Dependency
70
+ name: cucumber
71
+ requirement: &70172203941560 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *70172203941560
80
+ - !ruby/object:Gem::Dependency
81
+ name: bundler
82
+ requirement: &70172203928560 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
87
+ version: 1.0.0
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *70172203928560
91
+ - !ruby/object:Gem::Dependency
92
+ name: jeweler
93
+ requirement: &70172203926840 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ~>
97
+ - !ruby/object:Gem::Version
98
+ version: 1.8.3
99
+ type: :development
100
+ prerelease: false
101
+ version_requirements: *70172203926840
102
+ - !ruby/object:Gem::Dependency
103
+ name: simplecov
104
+ requirement: &70172203921980 !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: *70172203921980
113
+ description: A script and library for scraping 4chan threads
114
+ email: blainepace@gmail.com
115
+ executables:
116
+ - rubychan
117
+ extensions: []
118
+ extra_rdoc_files:
119
+ - LICENSE.txt
120
+ - README.rdoc
121
+ files:
122
+ - .document
123
+ - .rspec
124
+ - .rvmrc
125
+ - .travis.yml
126
+ - Gemfile
127
+ - Gemfile.lock
128
+ - LICENSE.txt
129
+ - README.rdoc
130
+ - Rakefile
131
+ - VERSION
132
+ - bin/rubychan
133
+ - features/rubychan.feature
134
+ - features/step_definitions/rubychan_steps.rb
135
+ - features/support/env.rb
136
+ - lib/rubychan.rb
137
+ - lib/scraper.rb
138
+ - spec/rubychan_spec.rb
139
+ - spec/scraper_spec.rb
140
+ - spec/spec_helper.rb
141
+ homepage: http://github.com/sensae/rubychan
142
+ licenses:
143
+ - MIT
144
+ post_install_message:
145
+ rdoc_options: []
146
+ require_paths:
147
+ - lib
148
+ required_ruby_version: !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ! '>='
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ segments:
155
+ - 0
156
+ hash: 165603134148496118
157
+ required_rubygems_version: !ruby/object:Gem::Requirement
158
+ none: false
159
+ requirements:
160
+ - - ! '>='
161
+ - !ruby/object:Gem::Version
162
+ version: '0'
163
+ requirements: []
164
+ rubyforge_project:
165
+ rubygems_version: 1.8.6
166
+ signing_key:
167
+ specification_version: 3
168
+ summary: 4Chan Scraping Toolkit
169
+ test_files: []