wcrawler 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3edfb127df16c0d081013257c89526ddf3c80b91
4
+ data.tar.gz: 972479202985b4f9fea112ea9ebb881a15cfa6e1
5
+ SHA512:
6
+ metadata.gz: 16df66e728189400c2488238913fb8026957eac78fcd96e7e5cf195978d3108af6aa10ff40e459ee206860608577ce204fc6eb549359e33f60f7f80b3cdb33e4
7
+ data.tar.gz: 099de1e9c2e6362cab78212a13759832e177f560c047b18d99318e2746fd6bfa83a6a6de0f1f5dcdb83dd1b22b0c36599bd392bb173827804cca86757f9e88bd
data/LICENSE.md ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 J.K et al.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,5 @@
1
+ # This tool is for downloading file from weipan.
2
+
3
+ ## To do list:
4
+ - Support to directory. Example: http://vdisk.weibo.com/u/3262874910
5
+ - Good Test case 2554082943
data/bin/wcrawler ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- mode: ruby -*-
3
+
4
+ require "wei_disk_crawler_cli"
5
+
6
+ WeiDiskCrawlerCLI.start(ARGV)
data/lib/version.rb ADDED
@@ -0,0 +1,3 @@
1
+ class WeiDiskCrawler
2
+ VERSION = "0.1"
3
+ end
@@ -0,0 +1,87 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'typhoeus'
4
+ require 'json'
5
+
6
+ class WeiDiskCrawler
7
+
8
+ WEI_DISK_PREFIX = "http://vdisk.weibo.com/u/"
9
+
10
+ def initialize(params = {})
11
+ raise "Missing User ID" if params[:uid].nil?
12
+ @uid = params[:uid]
13
+ @resources = {}
14
+ @page_total = 0
15
+ end
16
+
17
+ def max_page_number
18
+ return @page_total unless @page_total.eql? 0
19
+ request = Typhoeus::Request.new(WEI_DISK_PREFIX + "#{@uid}")
20
+ request.run
21
+ response_body = Nokogiri::HTML(request.response.response_body)
22
+ return @page_total = 1 if response_body.css('.vd_page_main .vd_page').empty?
23
+ @page_total = response_body.css('.vd_page a:nth-last-child(2)').text.to_i
24
+ end
25
+
26
+ def list_all_resources
27
+ (1..max_page_number).each do |page_number|
28
+ @resources = @resources.merge(list_resources_on_page page_number)
29
+ end
30
+ @resources
31
+ end
32
+
33
+ def list_resources_on_page page_number
34
+ hydra = Typhoeus::Hydra.new
35
+
36
+ page_request = Typhoeus::Request.new(WEI_DISK_PREFIX + "#{@uid}" + "?page=#{page_number}")
37
+
38
+ page_request.on_complete do |response|
39
+ page_doc = Nokogiri::HTML(response.response_body)
40
+
41
+ resource_urls = page_doc.css('td.sort_name_m div.sort_name_pic a').map { |x| x.attr('href') }
42
+ resource_ids = resource_urls.map { |url| url.split(/\//).last }
43
+
44
+ requests = resource_ids.map do |resource_id|
45
+ now_timestamp = Time.now.to_datetime.strftime '%Q'
46
+ request = Typhoeus::Request.new(
47
+ "http://vdisk.weibo.com/api/weipan/fileopsStatCount?link=#{resource_id}&ops=download&_=#{now_timestamp}",
48
+ :method => :get,
49
+ headers: {
50
+ :Accept => "application/json, text/javascript, */*; q=0.01",
51
+ :Referer => "http://vdisk.weibo.com/s/#{resource_id}",
52
+ :'X-Requested-With' => "XMLHttpRequest",
53
+ :'Connection' => "keep-alive",
54
+ :'x-response-version' => "2"
55
+ }
56
+ )
57
+ request.on_complete do |ajax_response|
58
+ p "#{JSON.parse(ajax_response.response_body)['name']} has been parsed successfully."
59
+ end
60
+
61
+ hydra.queue request
62
+ request
63
+ end
64
+
65
+ hydra.run
66
+
67
+ requests.each do |request|
68
+ json = JSON.parse(request.response.response_body)
69
+ @resources[json["name"]] = json["download_list"].first
70
+ end
71
+
72
+ end
73
+
74
+ hydra.queue page_request
75
+ hydra.run
76
+ p "#"*50
77
+ p "Page #{page_number} has been parsed successfully."
78
+ p "#"*50
79
+ @resources
80
+ end
81
+
82
+ def write_resources_to_file file_name
83
+ File.open("downloads/#{file_name}", 'a') { |file| file.write(list_all_resources) }
84
+ end
85
+ end
86
+
87
+
@@ -0,0 +1,30 @@
1
+ require 'wcrawler/wei_disk_crawler'
2
+ require 'thor'
3
+
4
+ class WeiDiskCrawlerCLI < Thor
5
+ desc "crawl UID", "crawl resources from UID"
6
+ option :uid, :required => true
7
+ def crawl
8
+ crawler = WeiDiskCrawler.new({:uid => options[:uid]})
9
+ crawler.list_all_resources
10
+ end
11
+
12
+ desc "list <page-number>", "list resources from all pages / specific page number"
13
+ option :uid, :required => true
14
+ def list(page_number=nil)
15
+ crawler = WeiDiskCrawler.new({:uid => options[:uid]})
16
+ if page_number
17
+ p crawler.list_resources_on_page page_number
18
+ else
19
+ p crawler.list_all_resources
20
+ end
21
+ end
22
+
23
+ desc "max_page_num", "list max page number"
24
+ option :uid, :required => true
25
+ def max_page_num
26
+ crawler = WeiDiskCrawler.new({:uid => options[:uid]})
27
+ p "There's #{crawler.max_page_number} pages in #{options[:uid]}'s resource"
28
+ end
29
+ end
30
+
data/wcrawler.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path("../lib/", __FILE__)
3
+ $LOAD_PATH.unshift lib unless $LOAD_PATH.include?(lib)
4
+ require "version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.add_development_dependency "bundler", "~> 1.0"
8
+ spec.authors = ["J.K"]
9
+ spec.summary = "Downloader Gem No.1"
10
+ spec.description = "This is a tool to provide a easy way to download the resources from weipan"
11
+ spec.email = "jiukunz@gmail.com"
12
+ spec.executables = %w[wcrawler]
13
+ spec.files = %w[wcrawler.gemspec] + Dir['*.md', 'bin/*', 'lib/**/*.rb']
14
+ spec.licenses = %w[MIT]
15
+ spec.name = "wcrawler"
16
+ spec.homepage = "http://jiukunz.github.io/"
17
+ spec.require_paths = %w[lib]
18
+ spec.required_rubygems_version = ">= 1.3.5"
19
+ spec.version = WeiDiskCrawler::VERSION
20
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wcrawler
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - J.K
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ description: This is a tool to provide a easy way to download the resources from weipan
28
+ email: jiukunz@gmail.com
29
+ executables:
30
+ - wcrawler
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - LICENSE.md
35
+ - README.md
36
+ - bin/wcrawler
37
+ - lib/version.rb
38
+ - lib/wcrawler/wei_disk_crawler.rb
39
+ - lib/wei_disk_crawler_cli.rb
40
+ - wcrawler.gemspec
41
+ homepage: http://jiukunz.github.io/
42
+ licenses:
43
+ - MIT
44
+ metadata: {}
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: 1.3.5
59
+ requirements: []
60
+ rubyforge_project:
61
+ rubygems_version: 2.4.5
62
+ signing_key:
63
+ specification_version: 4
64
+ summary: Downloader Gem No.1
65
+ test_files: []