ascraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8a95cfbe149bb77c19450a761e899ad8e9811fa2
4
+ data.tar.gz: 11958eb8b2195a5336ce6e4e4ce055b8df6c5773
5
+ SHA512:
6
+ metadata.gz: e7fdecb114609b09b2b4f5d3f5f7d0aa74babd5d4c62011adb1342971452b705256cb7614df3b36f0dd875cd7e7a8b30433ee82d1507dcd1703fb1b4ca7f8377
7
+ data.tar.gz: ae55f31b711ee451c75c10d217ce54ea58ffa7a89ecb3fe03be21bd69d6c942ba58cd1a59805db0d9bfde61b552042199e9615abb62c2fa5f3a506b475885a31
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.bundle
19
+ *.so
20
+ *.o
21
+ *.a
22
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --warnings
3
+ --require spec_helper
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ scraper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in comics.gemspec
4
+ gemspec
data/Guardfile ADDED
@@ -0,0 +1,5 @@
1
+ guard :rspec do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Ahmet
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,19 @@
1
+ # Scraper
2
+
3
+ ## Installation
4
+
5
+ Add this line to your application's Gemfile:
6
+
7
+ gem 'scraper'
8
+
9
+ And then execute:
10
+
11
+ $ bundle
12
+
13
+ Or install it yourself as:
14
+
15
+ $ gem install scraper
16
+
17
+ ## Usage
18
+
19
+ TODO: Write usage instructions here
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
data/lib/scraper.rb ADDED
@@ -0,0 +1,8 @@
1
+ require "scraper/version"
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+ require 'ostruct'
5
+
6
+ ["comics", "movies"].each do |inc|
7
+ require File.join(File.dirname(__FILE__), "scraper", inc)
8
+ end
@@ -0,0 +1,56 @@
1
+ module Scraper
2
+ class Comics
3
+
4
+ def self.all(start_page = 1, last_page = 5)
5
+ results = []
6
+ start_page.upto(last_page).each do |i|
7
+ doc = Nokogiri::HTML(open("http://www.newcomic.org/page/#{i}/"))
8
+
9
+ doc.css('.story_short').each do |block|
10
+ result = OpenStruct.new
11
+
12
+ url = block.search('a').map {|a| a['href']}.first
13
+
14
+ result.title = block.at_css('.story_h').content
15
+ result.url = url
16
+
17
+ comic = Nokogiri::HTML(open(url))
18
+
19
+ result.main_image = main_image(comic)
20
+ result.other_images = other_images(comic)
21
+ result.download_link = download_link(comic)
22
+ result.tags = tags(comic)
23
+
24
+ results << result
25
+ end
26
+
27
+ end
28
+ results
29
+ end
30
+
31
+ def self.main_image(comic)
32
+ comic.at_css('.story_c').search('img').map {|a| a['src']}.first
33
+ end
34
+
35
+ def self.other_images(comic)
36
+ comic.at_css('.story_c').search('a').map {|a| a['href']}.collect! {|x| x if %r{\Ahttps?:\/\/.+\.(?:jpe?g|png)\z}.match(x) }.compact
37
+ end
38
+
39
+ def self.download_link(comic)
40
+ if comic.at_css('.story_c')
41
+ comic.at_css('.story_c').search('a').map {|a| a['href']}.first
42
+ end
43
+ end
44
+
45
+ def self.tags(comic)
46
+ if comic.at_xpath('//*[@id="dle-content"]/div/article/div[2]/p/i')
47
+ comic.at_xpath('//*[@id="dle-content"]/div/article/div[2]/p/i').content.gsub('Tags: ', '').split(',').collect(&:strip)
48
+ end
49
+ end
50
+
51
+ def self.last_page
52
+ Nokogiri::HTML(open('http://www.newcomic.org/')).search('#dle-content > div.basenavi > span.navigation > a:nth-child(12)').first.children[0].to_s.to_i
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,36 @@
1
+ module Scraper
2
+ class Movies
3
+
4
+ def self.all(start_page = 1, last_page = 5)
5
+ results = []
6
+ doc = Nokogiri::HTML(open('http://www.wrzko.eu/movies/'))
7
+
8
+ doc.css('.category_block').each do |block|
9
+ result = OpenStruct.new
10
+
11
+ url = block.at_css('.maintitle_base').search('a').map {|a| a['href']}.first
12
+
13
+ result.title = block.at_css('.maintitle_base').content.strip
14
+ result.description = block.at_css('.description').content
15
+ result.url = url
16
+
17
+ movie = Nokogiri::HTML(open(url))
18
+
19
+ # [0] NFO [1] Main Image [Rest] Other Images
20
+ images = movie.at_css('.image').search('a').map {|a| a['href']}.collect! {|x| x if %r{\Ahttps?:\/\/.+\.(?:jpe?g|png)\z}.match(x) }.compact.inspect
21
+
22
+ # movie.css('.postarea p').each do |p|
23
+ # raise p.inspect
24
+ # end
25
+
26
+ results << result
27
+ end
28
+ results
29
+ end
30
+
31
+ def self.last_page
32
+ Nokogiri::HTML(open('http://www.wrzko.eu/movies/')).css('#board_index > div.wp-pagenavi > div > span.pages').first.to_s.split(' ').last.tr('</span>', '')
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,3 @@
1
+ module Scraper
2
+ VERSION = "0.0.1"
3
+ end
data/scraper.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scraper/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "ascraper"
8
+ spec.version = Scraper::VERSION
9
+ spec.authors = ["Ahmet"]
10
+ spec.email = ["ahmet@kyan.com"]
11
+ spec.summary = %q{A Scraper.}
12
+ spec.description = %q{A Scraper.}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "rspec"
24
+ spec.add_development_dependency "guard"
25
+ spec.add_development_dependency "guard-rspec"
26
+ spec.add_dependency "nokogiri"
27
+ end
@@ -0,0 +1,41 @@
1
+ require 'rspec'
2
+ require 'spec_helper'
3
+
4
+ describe Scraper::Comics do
5
+ describe "Scraping the site" do
6
+ before(:each) do
7
+ @comics = Scraper::Comics.all(1,1)
8
+ end
9
+
10
+ it "should return 15 comics for first page" do
11
+ expect(@comics.count).to eq(15)
12
+ end
13
+ end
14
+
15
+ describe "Scraping a comic" do
16
+ before(:all) do
17
+ @comic = Scraper::Comics.all(1,1).first
18
+ end
19
+
20
+ it "should always return a title" do
21
+ puts @comic.title
22
+ expect(@comic.title).not_to be_empty
23
+ end
24
+
25
+ it "should always return a url" do
26
+ puts @comic.url
27
+ expect(@comic.url).not_to be_empty
28
+ end
29
+
30
+ it "should always return a main_image" do
31
+ puts @comic.main_image
32
+ expect(@comic.main_image).not_to be_empty
33
+ end
34
+
35
+ it "should always return a download_link" do
36
+ puts @comic.download_link
37
+ expect(@comic.download_link).not_to be_empty
38
+ end
39
+ end
40
+
41
+ end
@@ -0,0 +1,37 @@
1
+ require 'rspec'
2
+ require 'spec_helper'
3
+
4
+ describe Scraper::Movies do
5
+
6
+ describe "Scraping the site" do
7
+ before(:each) do
8
+ @movies = Scraper::Movies.all(1,1)
9
+ end
10
+
11
+ it "should return all 10 movies for first page" do
12
+ expect(@movies.count).to eq(10)
13
+ end
14
+ end
15
+
16
+ describe "Scraping a movie" do
17
+ before(:all) do
18
+ @movie = Scraper::Movies.all(1,1).first
19
+ end
20
+
21
+ it "should always return a title" do
22
+ puts @movie.title
23
+ expect(@movie.title).not_to be_empty
24
+ end
25
+
26
+ it "should always return a description" do
27
+ puts @movie.description
28
+ expect(@movie.description).not_to be_empty
29
+ end
30
+
31
+ it "should always return a url" do
32
+ puts @movie.url
33
+ expect(@movie.url).not_to be_empty
34
+ end
35
+ end
36
+
37
+ end
@@ -0,0 +1,80 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
4
+ # file to always be loaded, without a need to explicitly require it in any files.
5
+ #
6
+ # Given that it is always loaded, you are encouraged to keep this file as
7
+ # light-weight as possible. Requiring heavyweight dependencies from this file
8
+ # will add to the boot time of your test suite on EVERY test run, even for an
9
+ # individual file that may not need all of that loaded. Instead, make a
10
+ # separate helper file that requires this one and then use it only in the specs
11
+ # that actually need it.
12
+ #
13
+ # The `.rspec` file also contains a few flags that are not defaults but that
14
+ # users commonly want.
15
+ #
16
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
17
+ require_relative '../lib/scraper.rb'
18
+
19
+ RSpec.configure do |config|
20
+ # The settings below are suggested to provide a good initial experience
21
+ # with RSpec, but feel free to customize to your heart's content.
22
+ =begin
23
+ # These two settings work together to allow you to limit a spec run
24
+ # to individual examples or groups you care about by tagging them with
25
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
26
+ # get run.
27
+ config.filter_run :focus
28
+ config.run_all_when_everything_filtered = true
29
+
30
+ # Many RSpec users commonly either run the entire suite or an individual
31
+ # file, and it's useful to allow more verbose output when running an
32
+ # individual spec file.
33
+ if config.files_to_run.one?
34
+ # Use the documentation formatter for detailed output,
35
+ # unless a formatter has already been configured
36
+ # (e.g. via a command-line flag).
37
+ config.default_formatter = 'doc'
38
+ end
39
+
40
+ # Print the 10 slowest examples and example groups at the
41
+ # end of the spec run, to help surface which specs are running
42
+ # particularly slow.
43
+ config.profile_examples = 10
44
+
45
+ # Run specs in random order to surface order dependencies. If you find an
46
+ # order dependency and want to debug it, you can fix the order by providing
47
+ # the seed, which is printed after each run.
48
+ # --seed 1234
49
+ config.order = :random
50
+
51
+ # Seed global randomization in this process using the `--seed` CLI option.
52
+ # Setting this allows you to use `--seed` to deterministically reproduce
53
+ # test failures related to randomization by passing the same `--seed` value
54
+ # as the one that triggered the failure.
55
+ Kernel.srand config.seed
56
+
57
+ # rspec-expectations config goes here. You can use an alternate
58
+ # assertion/expectation library such as wrong or the stdlib/minitest
59
+ # assertions if you prefer.
60
+ config.expect_with :rspec do |expectations|
61
+ # Enable only the newer, non-monkey-patching expect syntax.
62
+ # For more details, see:
63
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
64
+ expectations.syntax = :expect
65
+ end
66
+
67
+ # rspec-mocks config goes here. You can use an alternate test double
68
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
69
+ config.mock_with :rspec do |mocks|
70
+ # Enable only the newer, non-monkey-patching expect syntax.
71
+ # For more details, see:
72
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
73
+ mocks.syntax = :expect
74
+
75
+ # Prevents you from mocking or stubbing a method that does not exist on
76
+ # a real object. This is generally recommended.
77
+ mocks.verify_partial_doubles = true
78
+ end
79
+ =end
80
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ascraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Ahmet
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-07-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: nokogiri
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: A Scraper.
98
+ email:
99
+ - ahmet@kyan.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".rspec"
106
+ - ".ruby-gemset"
107
+ - ".ruby-version"
108
+ - Gemfile
109
+ - Guardfile
110
+ - LICENSE.txt
111
+ - README.md
112
+ - Rakefile
113
+ - lib/scraper.rb
114
+ - lib/scraper/comics.rb
115
+ - lib/scraper/movies.rb
116
+ - lib/scraper/version.rb
117
+ - scraper.gemspec
118
+ - spec/comics_spec.rb
119
+ - spec/movies_spec.rb
120
+ - spec/spec_helper.rb
121
+ homepage: ''
122
+ licenses:
123
+ - MIT
124
+ metadata: {}
125
+ post_install_message:
126
+ rdoc_options: []
127
+ require_paths:
128
+ - lib
129
+ required_ruby_version: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - ">="
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ requirements: []
140
+ rubyforge_project:
141
+ rubygems_version: 2.2.2
142
+ signing_key:
143
+ specification_version: 4
144
+ summary: A Scraper.
145
+ test_files:
146
+ - spec/comics_spec.rb
147
+ - spec/movies_spec.rb
148
+ - spec/spec_helper.rb