wcrawler 0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3edfb127df16c0d081013257c89526ddf3c80b91
4
+ data.tar.gz: 972479202985b4f9fea112ea9ebb881a15cfa6e1
5
+ SHA512:
6
+ metadata.gz: 16df66e728189400c2488238913fb8026957eac78fcd96e7e5cf195978d3108af6aa10ff40e459ee206860608577ce204fc6eb549359e33f60f7f80b3cdb33e4
7
+ data.tar.gz: 099de1e9c2e6362cab78212a13759832e177f560c047b18d99318e2746fd6bfa83a6a6de0f1f5dcdb83dd1b22b0c36599bd392bb173827804cca86757f9e88bd
data/LICENSE.md ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 J.K et al.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,5 @@
1
+ # This tool is for downloading file from weipan.
2
+
3
+ ## To do list:
4
+ - Support to directory. Example: http://vdisk.weibo.com/u/3262874910
5
+ - Good Test case 2554082943
data/bin/wcrawler ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- mode: ruby -*-
3
+
4
+ require "wei_disk_crawler_cli"
5
+
6
+ WeiDiskCrawlerCLI.start(ARGV)
data/lib/version.rb ADDED
@@ -0,0 +1,3 @@
1
+ class WeiDiskCrawler
2
+ VERSION = "0.1"
3
+ end
@@ -0,0 +1,87 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'typhoeus'
4
+ require 'json'
5
+
6
+ class WeiDiskCrawler
7
+
8
+ WEI_DISK_PREFIX = "http://vdisk.weibo.com/u/"
9
+
10
+ def initialize(params = {})
11
+ raise "Missing User ID" if params[:uid].nil?
12
+ @uid = params[:uid]
13
+ @resources = {}
14
+ @page_total = 0
15
+ end
16
+
17
+ def max_page_number
18
+ return @page_total unless @page_total.eql? 0
19
+ request = Typhoeus::Request.new(WEI_DISK_PREFIX + "#{@uid}")
20
+ request.run
21
+ response_body = Nokogiri::HTML(request.response.response_body)
22
+ return @page_total = 1 if response_body.css('.vd_page_main .vd_page').empty?
23
+ @page_total = response_body.css('.vd_page a:nth-last-child(2)').text.to_i
24
+ end
25
+
26
+ def list_all_resources
27
+ (1..max_page_number).each do |page_number|
28
+ @resources = @resources.merge(list_resources_on_page page_number)
29
+ end
30
+ @resources
31
+ end
32
+
33
+ def list_resources_on_page page_number
34
+ hydra = Typhoeus::Hydra.new
35
+
36
+ page_request = Typhoeus::Request.new(WEI_DISK_PREFIX + "#{@uid}" + "?page=#{page_number}")
37
+
38
+ page_request.on_complete do |response|
39
+ page_doc = Nokogiri::HTML(response.response_body)
40
+
41
+ resource_urls = page_doc.css('td.sort_name_m div.sort_name_pic a').map { |x| x.attr('href') }
42
+ resource_ids = resource_urls.map { |url| url.split(/\//).last }
43
+
44
+ requests = resource_ids.map do |resource_id|
45
+ now_timestamp = Time.now.to_datetime.strftime '%Q'
46
+ request = Typhoeus::Request.new(
47
+ "http://vdisk.weibo.com/api/weipan/fileopsStatCount?link=#{resource_id}&ops=download&_=#{now_timestamp}",
48
+ :method => :get,
49
+ headers: {
50
+ :Accept => "application/json, text/javascript, */*; q=0.01",
51
+ :Referer => "http://vdisk.weibo.com/s/#{resource_id}",
52
+ :'X-Requested-With' => "XMLHttpRequest",
53
+ :'Connection' => "keep-alive",
54
+ :'x-response-version' => "2"
55
+ }
56
+ )
57
+ request.on_complete do |ajax_response|
58
+ p "#{JSON.parse(ajax_response.response_body)['name']} has been parsed successfully."
59
+ end
60
+
61
+ hydra.queue request
62
+ request
63
+ end
64
+
65
+ hydra.run
66
+
67
+ requests.each do |request|
68
+ json = JSON.parse(request.response.response_body)
69
+ @resources[json["name"]] = json["download_list"].first
70
+ end
71
+
72
+ end
73
+
74
+ hydra.queue page_request
75
+ hydra.run
76
+ p "#"*50
77
+ p "Page #{page_number} has been parsed successfully."
78
+ p "#"*50
79
+ @resources
80
+ end
81
+
82
+ def write_resources_to_file file_name
83
+ File.open("downloads/#{file_name}", 'a') { |file| file.write(list_all_resources) }
84
+ end
85
+ end
86
+
87
+
@@ -0,0 +1,30 @@
1
+ require 'wcrawler/wei_disk_crawler'
2
+ require 'thor'
3
+
4
+ class WeiDiskCrawlerCLI < Thor
5
+ desc "crawl UID", "crawl resources from UID"
6
+ option :uid, :required => true
7
+ def crawl
8
+ crawler = WeiDiskCrawler.new({:uid => options[:uid]})
9
+ crawler.list_all_resources
10
+ end
11
+
12
+ desc "list <page-number>", "list resources from all pages / specific page number"
13
+ option :uid, :required => true
14
+ def list(page_number=nil)
15
+ crawler = WeiDiskCrawler.new({:uid => options[:uid]})
16
+ if page_number
17
+ p crawler.list_resources_on_page page_number
18
+ else
19
+ p crawler.list_all_resources
20
+ end
21
+ end
22
+
23
+ desc "max_page_num", "list max page number"
24
+ option :uid, :required => true
25
+ def max_page_num
26
+ crawler = WeiDiskCrawler.new({:uid => options[:uid]})
27
+ p "There's #{crawler.max_page_number} pages in #{options[:uid]}'s resource"
28
+ end
29
+ end
30
+
data/wcrawler.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib/", __FILE__)
3
+ $LOAD_PATH.unshift lib unless $LOAD_PATH.include?(lib)
4
+ require "version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.add_development_dependency "bundler", "~> 1.0"
8
+ spec.authors = ["J.K"]
9
+ spec.summary = "Downloader Gem No.1"
10
+ spec.description = "This is a tool to provide a easy way to download the resources from weipan"
11
+ spec.email = "jiukunz@gmail.com"
12
+ spec.executables = %w[wcrawler]
13
+ spec.files = %w[wcrawler.gemspec] + Dir['*.md', 'bin/*', 'lib/**/*.rb']
14
+ spec.licenses = %w[MIT]
15
+ spec.name = "wcrawler"
16
+ spec.homepage = "http://jiukunz.github.io/"
17
+ spec.require_paths = %w[lib]
18
+ spec.required_rubygems_version = ">= 1.3.5"
19
+ spec.version = WeiDiskCrawler::VERSION
20
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wcrawler
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - J.K
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ description: This is a tool to provide a easy way to download the resources from weipan
28
+ email: jiukunz@gmail.com
29
+ executables:
30
+ - wcrawler
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - LICENSE.md
35
+ - README.md
36
+ - bin/wcrawler
37
+ - lib/version.rb
38
+ - lib/wcrawler/wei_disk_crawler.rb
39
+ - lib/wei_disk_crawler_cli.rb
40
+ - wcrawler.gemspec
41
+ homepage: http://jiukunz.github.io/
42
+ licenses:
43
+ - MIT
44
+ metadata: {}
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: 1.3.5
59
+ requirements: []
60
+ rubyforge_project:
61
+ rubygems_version: 2.4.5
62
+ signing_key:
63
+ specification_version: 4
64
+ summary: Downloader Gem No.1
65
+ test_files: []