qzone 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: de40edfabb8ee78b618d131b72e16b68b17d90e8
4
+ data.tar.gz: 0f942d1ecce66437adfc072755b77a5636b49573
5
+ SHA512:
6
+ metadata.gz: cd036b848e27cd0024459529ea2e85d75332963e5bd1d9a7726be58e3ffffbdace1be3f8d4bbf8b0bceb9243a0ab19398af99c5d246da9d1a3bfebdd25f867f2
7
+ data.tar.gz: 8061cdf3f9c10af5001a56335c3f83e47ca7bf75c301961d3032774aed70e7640282588a89509576f10a5bb6979778744aa5a30f25fba7f3cf9437e26401225f
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 tuitu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,41 @@
1
+ # Qzone
2
+
3
+ Qzone是一个为qq空间定制的爬虫,它提供qzone的登录以及相册,好友等数据接口.
4
+
5
+ ## Installation
6
+
7
+ `gem install qzone`
8
+
9
+ ## Usage
10
+
11
+ 首先,通过qq号和密码实例化一个Qzone类的对象:
12
+ ```ruby
13
+ qzone = Qzone.new user, password
14
+ ```
15
+
16
+ Qzone类提供了以下接口:
17
+ - `ablums`:该方法接受任意一个合法的QQ号为参数,返回的是该QQ号下所有的相册构成了数组.每一个相册都是一个哈希,它包含以下项
18
+ ```ruby
19
+ ablum[:host] # 拥有该相册的qq
20
+ ablum[:id] # 相册id
21
+ ablum[:name] # 相册名称
22
+ ablum[:total] # 照片数量
23
+ ablum[:allowAccess] # 相册权限 只有权限为1时, 该相册才是公开可爬的
24
+ ```
25
+ - `photos_in_ablum`:该方法接受任意一个合法的相册哈希(由ablums接口返回的,或是手动构造相同结构的哈希),返回该相册下所有的照片组成的数组, 每一个相片都是一个哈希, 它包含以下项
26
+ ```ruby
27
+ photo[:id] # 照片id
28
+ photo[:name] # 照片名称
29
+ photo[:url] # 照片的url, 可通过该url下载该照片
30
+ ```
31
+ - `friends`:该方法无参数, 返回该qzone下所有的好友组成的数组,每一个好友都是一个哈希,它包含以下项
32
+ ```ruby
33
+ friend[:uin] = # 该好友的qq号
34
+ friend[:name] = # 该好友对于空间拥有者的备注
35
+ friend[:score] # 亲密度评分
36
+ friend[:img] = # 该好友的头像的url
37
+ ```
38
+
39
+ ## License
40
+
41
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "qzone"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,193 @@
1
+ require "qzone/version"
2
+ require 'qzone/qzone_login'
3
+ require "qzone/interface"
4
+
5
+ class Qzone
6
+ def initialize(user, password)
7
+ @user = user
8
+ @password = password
9
+ @spider = Mechanize.new
10
+
11
+ if File.exist? "cookie#{@user}"
12
+ @spider.cookie_jar.load "cookie#{@user}"
13
+ skey = @spider.cookie_jar.find do |e|
14
+ e.name == 'p_skey'
15
+ end
16
+ @gtk = self.gtk(skey)
17
+
18
+ # if cookie is not valid
19
+ begin
20
+ unless self.cookies_valid?
21
+ File.delete "cookie#{@user}"
22
+ self.cookies
23
+ end
24
+ rescue
25
+ self.cookies
26
+ end
27
+
28
+ else
29
+ self.cookies
30
+ end
31
+ end
32
+
33
+ def cookies
34
+ account = {}
35
+ account[:user] = @user
36
+ account[:password] = @password
37
+
38
+ qzone = Login.new @user, @password, 5
39
+ qzone.login
40
+ @cookies = qzone.cookies
41
+ qzone.close
42
+
43
+ # construct cookies
44
+ @cookies.each do |cookie|
45
+ cookie[:expires] = Date.today + 1 if cookie[:expires].nil?
46
+ cookie = Mechanize::Cookie.new(
47
+ domain: cookie[:domain],
48
+ name: cookie[:name],
49
+ value: cookie[:value],
50
+ path: cookie[:path],
51
+ expires: cookie[:expires].to_s
52
+ )
53
+ @spider.cookie_jar << cookie
54
+ end
55
+
56
+ skey = @spider.cookie_jar.find do |e|
57
+ e.name == 'p_skey'
58
+ end
59
+
60
+ @gtk = self.gtk skey
61
+
62
+ @spider.request_headers = {
63
+ 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
64
+ }
65
+
66
+ @spider.cookie_jar.save_as "cookie#{@user}", session: true
67
+ end
68
+
69
+ def ablums(dest_qq)
70
+ result = @spider.get(sprintf(Interface["ablum"], @gtk.to_s, dest_qq, @user))
71
+
72
+ result = self.json result.content
73
+
74
+ ablums = []
75
+
76
+ return ablums if result['data']['albumListModeSort'].nil?
77
+
78
+ result['data']['albumListModeSort'].each do |e|
79
+ temp = {}
80
+ temp[:host] = dest_qq
81
+ temp[:id] = e['id']
82
+ temp[:name] = e['name']
83
+ temp[:total] = e['total']
84
+ temp[:allowAccess] = e['allowAccess'].to_i
85
+ # temp[:allowAccess] = 0 if temp[:allowAccess] != 1
86
+ ablums.push temp
87
+ end
88
+
89
+ ablums
90
+ end
91
+
92
+ def photos_in_ablum(ablum)
93
+ if ablum[:allowAccess] != 1
94
+ raise "can't access to ablum \"#{ablum[:name]}\""
95
+ end
96
+
97
+ photos = []
98
+
99
+ return photos if ablum[:total].to_i.zero?
100
+
101
+ total_photos = ablum[:total].to_i
102
+ pages = total_photos / 30
103
+ pages += 1 if total_photos % 30 != 0
104
+
105
+ pages.times do |e|
106
+ result = @spider.get(sprintf(Interface["photo"], @gtk, ablum[:host], ablum[:id], @user, (e*30).to_s))
107
+
108
+ result = self.json result.content
109
+
110
+ return photos if result['data']['photoList'].nil?
111
+
112
+ result['data']['photoList'].each do |e|
113
+ photo = {}
114
+ photo[:id] = e['id']
115
+ photo[:name] = e['name']
116
+ photo[:url] = e['url']
117
+
118
+ photos.push photo
119
+ end
120
+ end
121
+
122
+ photos
123
+ end
124
+
125
+ def friends
126
+ result = @spider.get(sprintf(Interface["friend"], @user, @gtk))
127
+
128
+ result = self.json result.content
129
+
130
+ friends = []
131
+
132
+ return friends if result['data']['items_list'].nil?
133
+
134
+ result['data']['items_list'].each do |e|
135
+ friend = {}
136
+ friend[:uin] = e['uin']
137
+ friend[:name] = e['name']
138
+ # 亲密度
139
+ friend[:score] = e['score']
140
+ friend[:img] = e['img']
141
+
142
+ friends.push friend
143
+ end
144
+
145
+ friends
146
+ end
147
+
148
+ def download_photos(photos, path)
149
+ worker = Mechanize.new
150
+ worker.request_headers = {
151
+ 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
152
+ "Referer" => "http://qzone.qq.com/"
153
+ }
154
+ photos.each do |e|
155
+ result = worker.get e[:url]
156
+ result.save_as path + e[:name] + "_#{Time.now.hash}" + '.jpg'
157
+ end
158
+ end
159
+
160
+
161
+ def gtk(skey)
162
+ raise "Skey can't be nil" if skey.nil?
163
+
164
+ hashes = 5381
165
+ skey.value.each_char do |c|
166
+ hashes += (hashes << 5) + c.ord
167
+ end
168
+ hashes & 0x7fffffff
169
+ end
170
+ # private
171
+
172
+ def json(str)
173
+ # str.gsub!(/shine0_Callback\(/, "" )
174
+ str.sub!(/.*_Callback\(/, '')
175
+ str.sub!(/\);/, '')
176
+
177
+ JSON.parse str
178
+ end
179
+
180
+ def cookies_valid?
181
+ begin
182
+ result = @spider.get(sprintf(Interface["ablum"], @gtk.to_s, @user, @user))
183
+ rescue Exception => error
184
+ if error.response_code == '403'
185
+ raise 'Connection refused'
186
+ else
187
+ raise 'unknown error happened in checking cookies'
188
+ end
189
+ end
190
+
191
+ !result.content.toutf8.include? '尚未登录或者登录超时'
192
+ end
193
+ end
@@ -0,0 +1,7 @@
1
+ class Qzone
2
+ Interface = {
3
+ "ablum" => "http://h5.qzone.qq.com/proxy/domain/tjalist.photo.qq.com/fcgi-bin/fcg_list_album_v3?g_tk=%s&callback=shine0_Callback&t=419043014&hostUin=%s&uin=%s&appid=4&inCharset=utf-8&outCharset=utf-8&source=qzone&plat=qzone&format=jsonp&notice=0&filter=1&handset=4&pageNumModeSort=40&pageNumModeClass=15&needUserInfo=1&idcNum=5&callbackFun=shine0&_=1475649079634",
4
+ "photo" => "http://h5.qzone.qq.com/proxy/domain/tjplist.photo.qzone.qq.com/fcgi-bin/cgi_list_photo?g_tk=%s&callback=shine0_Callback&t=775643811&mode=0&idcNum=5&hostUin=%s&topicId=%s&noTopic=0&uin=%s&pageStart=%s&pageNum=30&skipCmtCount=0&singleurl=1&batchId=&notice=0&appid=4&inCharset=utf-8&outCharset=utf-8&source=qzone&plat=qzone&outstyle=json&format=jsonp&json_esc=1&question=&answer=&callbackFun=shine0&_=1475649257165",
5
+ "friend" => "https://h5.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?uin=%s&do=1&rd=0.9983570276719183&fupdate=1&clean=1&g_tk=%s"
6
+ }
7
+ end
@@ -0,0 +1,99 @@
1
+ require 'selenium-webdriver'
2
+ require 'mechanize'
3
+ require 'date'
4
+ require 'yaml'
5
+ require 'json'
6
+
7
+ class Qzone
8
+ class Login
9
+ def initialize user, password, wait_time
10
+ @user = user
11
+ @password = password
12
+
13
+ begin
14
+ user_agent = 'User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
15
+
16
+ capabilities = Selenium::WebDriver::Remote::Capabilities.phantomjs(
17
+ 'phantomjs.page.settings.userAgent' => user_agent,
18
+ "phantomhs.page.settings.loadImages" => false
19
+ )
20
+ @driver = Selenium::WebDriver.for(:phantomjs, :desired_capabilities => capabilities)
21
+ @waiter = Selenium::WebDriver::Wait.new(timeout: wait_time)
22
+ rescue
23
+ raise 'init error,make sure phantomjs has been installed correctly.'
24
+ end
25
+ end
26
+
27
+ def login
28
+ @waiter.until {@driver.navigate.to 'http://qzone.qq.com/'}
29
+
30
+ @waiter.until {@driver.switch_to.frame 'login_frame'}
31
+
32
+ @waiter.until { @driver.find_element(id: 'switcher_plogin') }
33
+ plogin = @driver.find_element(id: 'switcher_plogin')
34
+ plogin.click
35
+
36
+ user = @driver.find_element(id: 'u')
37
+ user.send_keys @user
38
+ password = @driver.find_element(id: 'p')
39
+ password.send_keys @password
40
+
41
+ @waiter.until { @driver.find_element(xpath: '//*[@id="login_button"]') }
42
+ login = @driver.find_element(xpath: '//*[@id="login_button"]')
43
+
44
+ login.click
45
+
46
+ sleep(1)
47
+
48
+ # 如果正常的话 目前就已经返回true
49
+ return @pass = true if @driver.current_url != 'http://qzone.qq.com/'
50
+
51
+ # 下面分析可能的原因
52
+ begin
53
+ @waiter.until {@driver.find_element(id: 'vcode')}
54
+ raise 'Need verify code, please manualy login Qzone first'
55
+ rescue Exception => error
56
+ if error.message == 'Need verify code, please manualy login Qzone first'
57
+ raise error.message
58
+ end
59
+ end
60
+
61
+ # if find element with id "err_m", indicate something wrong
62
+ begin
63
+ @waiter.until {@driver.find_element(id: 'err_m')}
64
+ err = @driver.find_element(id: 'err_m')
65
+ error_message = err.text
66
+ rescue
67
+ # if can't find that element, indicate something wrong happened but ->
68
+ # we don't know about that.
69
+ raise 'Unknown error happened in login'
70
+ end
71
+
72
+ if error_message.include? '您输入的帐号或密码不正确'
73
+ raise 'Account error'
74
+ elsif error_message.include? '网络繁忙'
75
+ raise 'Network busy, try later'
76
+ else
77
+ raise 'Unknown error happened in login'
78
+ end
79
+ end
80
+
81
+ def gtk
82
+ # 获取gtk
83
+ @skey = @driver.manage.cookie_named 'p_skey'
84
+ hashes = 5381
85
+ @skey[:value].each_char do |c|
86
+ hashes += (hashes << 5) + c.ord
87
+ end
88
+ hashes & 0x7fffffff
89
+ end
90
+
91
+ def cookies
92
+ @driver.manage.all_cookies
93
+ end
94
+
95
+ def close
96
+ @driver.close if @driver
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,3 @@
1
+ class Qzone
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'qzone/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "qzone"
8
+ spec.version = Qzone::VERSION
9
+ spec.authors = ["tuitu"]
10
+ spec.email = ["1965972530@qq.com"]
11
+
12
+ spec.summary = %q{"a spider aimed for qzone"}
13
+ spec.description = %q{"provide apis to crawl data from qzone"}
14
+ spec.homepage = "https://github.com/hellotuitu/qzone"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ if spec.respond_to?(:metadata)
20
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
+ else
22
+ raise "RubyGems 2.0 or newer is required to protect against " \
23
+ "public gem pushes."
24
+ end
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+ spec.bindir = "exe"
30
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ spec.add_development_dependency "bundler", "~> 1.13"
34
+ spec.add_development_dependency "rake", "~> 10.0"
35
+ spec.add_development_dependency "selenium-webdriver"
36
+ spec.add_development_dependency "mechanize"
37
+ spec.add_development_dependency "json"
38
+ end
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: qzone
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - tuitu
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-06-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.13'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: selenium-webdriver
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mechanize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: json
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: '"provide apis to crawl data from qzone"'
84
+ email:
85
+ - 1965972530@qq.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - LICENSE.txt
92
+ - README.md
93
+ - Rakefile
94
+ - bin/console
95
+ - bin/setup
96
+ - lib/qzone.rb
97
+ - lib/qzone/interface.rb
98
+ - lib/qzone/qzone_login.rb
99
+ - lib/qzone/version.rb
100
+ - qzone.gemspec
101
+ homepage: https://github.com/hellotuitu/qzone
102
+ licenses:
103
+ - MIT
104
+ metadata:
105
+ allowed_push_host: https://rubygems.org
106
+ post_install_message:
107
+ rdoc_options: []
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ requirements: []
121
+ rubyforge_project:
122
+ rubygems_version: 2.5.1
123
+ signing_key:
124
+ specification_version: 4
125
+ summary: '"a spider aimed for qzone"'
126
+ test_files: []