qzone 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: de40edfabb8ee78b618d131b72e16b68b17d90e8
4
+ data.tar.gz: 0f942d1ecce66437adfc072755b77a5636b49573
5
+ SHA512:
6
+ metadata.gz: cd036b848e27cd0024459529ea2e85d75332963e5bd1d9a7726be58e3ffffbdace1be3f8d4bbf8b0bceb9243a0ab19398af99c5d246da9d1a3bfebdd25f867f2
7
+ data.tar.gz: 8061cdf3f9c10af5001a56335c3f83e47ca7bf75c301961d3032774aed70e7640282588a89509576f10a5bb6979778744aa5a30f25fba7f3cf9437e26401225f
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 tuitu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,41 @@
1
+ # Qzone
2
+
3
+ Qzone是一个为qq空间定制的爬虫,它提供qzone的登录以及相册,好友等数据接口.
4
+
5
+ ## Installation
6
+
7
+ `gem install qzone`
8
+
9
+ ## Usage
10
+
11
+ 首先,通过qq号和密码实例化一个Qzone类的对象:
12
+ ```ruby
13
+ qzone = Qzone.new user, password
14
+ ```
15
+
16
+ Qzone类提供了以下接口:
17
+ - `ablums`:该方法接受任意一个合法的QQ号为参数,返回的是该QQ号下所有的相册构成了数组.每一个相册都是一个哈希,它包含以下项
18
+ ```ruby
19
+ ablum[:host] # 拥有该相册的qq
20
+ ablum[:id] # 相册id
21
+ ablum[:name] # 相册名称
22
+ ablum[:total] # 照片数量
23
+ ablum[:allowAccess] # 相册权限 只有权限为1时, 该相册才是公开可爬的
24
+ ```
25
+ - `photos_in_ablum`:该方法接受任意一个合法的相册哈希(由ablums接口返回的,或是手动构造相同结构的哈希),返回该相册下所有的照片组成的数组, 每一个相片都是一个哈希, 它包含以下项
26
+ ```ruby
27
+ photo[:id] # 照片id
28
+ photo[:name] # 照片名称
29
+ photo[:url] # 照片的url, 可通过该url下载该照片
30
+ ```
31
+ - `friends`:该方法无参数, 返回该qzone下所有的好友组成的数组,每一个好友都是一个哈希,它包含以下项
32
+ ```ruby
33
+ friend[:uin] = # 该好友的qq号
34
+ friend[:name] = # 该好友对于空间拥有者的备注
35
+ friend[:score] # 亲密度评分
36
+ friend[:img] = # 该好友的头像的url
37
+ ```
38
+
39
+ ## License
40
+
41
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "qzone"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,193 @@
1
+ require "qzone/version"
2
+ require 'qzone/qzone_login'
3
+ require "qzone/interface"
4
+
5
+ class Qzone
6
+ def initialize(user, password)
7
+ @user = user
8
+ @password = password
9
+ @spider = Mechanize.new
10
+
11
+ if File.exist? "cookie#{@user}"
12
+ @spider.cookie_jar.load "cookie#{@user}"
13
+ skey = @spider.cookie_jar.find do |e|
14
+ e.name == 'p_skey'
15
+ end
16
+ @gtk = self.gtk(skey)
17
+
18
+ # if cookie is not valid
19
+ begin
20
+ unless self.cookies_valid?
21
+ File.delete "cookie#{@user}"
22
+ self.cookies
23
+ end
24
+ rescue
25
+ self.cookies
26
+ end
27
+
28
+ else
29
+ self.cookies
30
+ end
31
+ end
32
+
33
+ def cookies
34
+ account = {}
35
+ account[:user] = @user
36
+ account[:password] = @password
37
+
38
+ qzone = Login.new @user, @password, 5
39
+ qzone.login
40
+ @cookies = qzone.cookies
41
+ qzone.close
42
+
43
+ # construct cookies
44
+ @cookies.each do |cookie|
45
+ cookie[:expires] = Date.today + 1 if cookie[:expires].nil?
46
+ cookie = Mechanize::Cookie.new(
47
+ domain: cookie[:domain],
48
+ name: cookie[:name],
49
+ value: cookie[:value],
50
+ path: cookie[:path],
51
+ expires: cookie[:expires].to_s
52
+ )
53
+ @spider.cookie_jar << cookie
54
+ end
55
+
56
+ skey = @spider.cookie_jar.find do |e|
57
+ e.name == 'p_skey'
58
+ end
59
+
60
+ @gtk = self.gtk skey
61
+
62
+ @spider.request_headers = {
63
+ 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
64
+ }
65
+
66
+ @spider.cookie_jar.save_as "cookie#{@user}", session: true
67
+ end
68
+
69
+ def ablums(dest_qq)
70
+ result = @spider.get(sprintf(Interface["ablum"], @gtk.to_s, dest_qq, @user))
71
+
72
+ result = self.json result.content
73
+
74
+ ablums = []
75
+
76
+ return ablums if result['data']['albumListModeSort'].nil?
77
+
78
+ result['data']['albumListModeSort'].each do |e|
79
+ temp = {}
80
+ temp[:host] = dest_qq
81
+ temp[:id] = e['id']
82
+ temp[:name] = e['name']
83
+ temp[:total] = e['total']
84
+ temp[:allowAccess] = e['allowAccess'].to_i
85
+ # temp[:allowAccess] = 0 if temp[:allowAccess] != 1
86
+ ablums.push temp
87
+ end
88
+
89
+ ablums
90
+ end
91
+
92
+ def photos_in_ablum(ablum)
93
+ if ablum[:allowAccess] != 1
94
+ raise "can't access to ablum \"#{ablum[:name]}\""
95
+ end
96
+
97
+ photos = []
98
+
99
+ return photos if ablum[:total].to_i.zero?
100
+
101
+ total_photos = ablum[:total].to_i
102
+ pages = total_photos / 30
103
+ pages += 1 if total_photos % 30 != 0
104
+
105
+ pages.times do |e|
106
+ result = @spider.get(sprintf(Interface["photo"], @gtk, ablum[:host], ablum[:id], @user, (e*30).to_s))
107
+
108
+ result = self.json result.content
109
+
110
+ return photos if result['data']['photoList'].nil?
111
+
112
+ result['data']['photoList'].each do |e|
113
+ photo = {}
114
+ photo[:id] = e['id']
115
+ photo[:name] = e['name']
116
+ photo[:url] = e['url']
117
+
118
+ photos.push photo
119
+ end
120
+ end
121
+
122
+ photos
123
+ end
124
+
125
+ def friends
126
+ result = @spider.get(sprintf(Interface["friend"], @user, @gtk))
127
+
128
+ result = self.json result.content
129
+
130
+ friends = []
131
+
132
+ return friends if result['data']['items_list'].nil?
133
+
134
+ result['data']['items_list'].each do |e|
135
+ friend = {}
136
+ friend[:uin] = e['uin']
137
+ friend[:name] = e['name']
138
+ # 亲密度
139
+ friend[:score] = e['score']
140
+ friend[:img] = e['img']
141
+
142
+ friends.push friend
143
+ end
144
+
145
+ friends
146
+ end
147
+
148
+ def download_photos(photos, path)
149
+ worker = Mechanize.new
150
+ worker.request_headers = {
151
+ 'User-Agent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
152
+ "Referer" => "http://qzone.qq.com/"
153
+ }
154
+ photos.each do |e|
155
+ result = worker.get e[:url]
156
+ result.save_as path + e[:name] + "_#{Time.now.hash}" + '.jpg'
157
+ end
158
+ end
159
+
160
+
161
+ def gtk(skey)
162
+ raise "Skey can't be nil" if skey.nil?
163
+
164
+ hashes = 5381
165
+ skey.value.each_char do |c|
166
+ hashes += (hashes << 5) + c.ord
167
+ end
168
+ hashes & 0x7fffffff
169
+ end
170
+ # private
171
+
172
+ def json(str)
173
+ # str.gsub!(/shine0_Callback\(/, "" )
174
+ str.sub!(/.*_Callback\(/, '')
175
+ str.sub!(/\);/, '')
176
+
177
+ JSON.parse str
178
+ end
179
+
180
+ def cookies_valid?
181
+ begin
182
+ result = @spider.get(sprintf(Interface["ablum"], @gtk.to_s, @user, @user))
183
+ rescue Exception => error
184
+ if error.response_code == '403'
185
+ raise 'Connection refused'
186
+ else
187
+ raise 'unknown error happened in checking cookies'
188
+ end
189
+ end
190
+
191
+ !result.content.toutf8.include? '尚未登录或者登录超时'
192
+ end
193
+ end
@@ -0,0 +1,7 @@
1
+ class Qzone
2
+ Interface = {
3
+ "ablum" => "http://h5.qzone.qq.com/proxy/domain/tjalist.photo.qq.com/fcgi-bin/fcg_list_album_v3?g_tk=%s&callback=shine0_Callback&t=419043014&hostUin=%s&uin=%s&appid=4&inCharset=utf-8&outCharset=utf-8&source=qzone&plat=qzone&format=jsonp&notice=0&filter=1&handset=4&pageNumModeSort=40&pageNumModeClass=15&needUserInfo=1&idcNum=5&callbackFun=shine0&_=1475649079634",
4
+ "photo" => "http://h5.qzone.qq.com/proxy/domain/tjplist.photo.qzone.qq.com/fcgi-bin/cgi_list_photo?g_tk=%s&callback=shine0_Callback&t=775643811&mode=0&idcNum=5&hostUin=%s&topicId=%s&noTopic=0&uin=%s&pageStart=%s&pageNum=30&skipCmtCount=0&singleurl=1&batchId=&notice=0&appid=4&inCharset=utf-8&outCharset=utf-8&source=qzone&plat=qzone&outstyle=json&format=jsonp&json_esc=1&question=&answer=&callbackFun=shine0&_=1475649257165",
5
+ "friend" => "https://h5.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_ship_manager.cgi?uin=%s&do=1&rd=0.9983570276719183&fupdate=1&clean=1&g_tk=%s"
6
+ }
7
+ end
@@ -0,0 +1,99 @@
1
+ require 'selenium-webdriver'
2
+ require 'mechanize'
3
+ require 'date'
4
+ require 'yaml'
5
+ require 'json'
6
+
7
+ class Qzone
8
+ class Login
9
+ def initialize user, password, wait_time
10
+ @user = user
11
+ @password = password
12
+
13
+ begin
14
+ user_agent = 'User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
15
+
16
+ capabilities = Selenium::WebDriver::Remote::Capabilities.phantomjs(
17
+ 'phantomjs.page.settings.userAgent' => user_agent,
18
+ "phantomhs.page.settings.loadImages" => false
19
+ )
20
+ @driver = Selenium::WebDriver.for(:phantomjs, :desired_capabilities => capabilities)
21
+ @waiter = Selenium::WebDriver::Wait.new(timeout: wait_time)
22
+ rescue
23
+ raise 'init error,make sure phantomjs has been installed correctly.'
24
+ end
25
+ end
26
+
27
+ def login
28
+ @waiter.until {@driver.navigate.to 'http://qzone.qq.com/'}
29
+
30
+ @waiter.until {@driver.switch_to.frame 'login_frame'}
31
+
32
+ @waiter.until { @driver.find_element(id: 'switcher_plogin') }
33
+ plogin = @driver.find_element(id: 'switcher_plogin')
34
+ plogin.click
35
+
36
+ user = @driver.find_element(id: 'u')
37
+ user.send_keys @user
38
+ password = @driver.find_element(id: 'p')
39
+ password.send_keys @password
40
+
41
+ @waiter.until { @driver.find_element(xpath: '//*[@id="login_button"]') }
42
+ login = @driver.find_element(xpath: '//*[@id="login_button"]')
43
+
44
+ login.click
45
+
46
+ sleep(1)
47
+
48
+ # 如果正常的话 目前就已经返回true
49
+ return @pass = true if @driver.current_url != 'http://qzone.qq.com/'
50
+
51
+ # 下面分析可能的原因
52
+ begin
53
+ @waiter.until {@driver.find_element(id: 'vcode')}
54
+ raise 'Need verify code, please manualy login Qzone first'
55
+ rescue Exception => error
56
+ if error.message == 'Need verify code, please manualy login Qzone first'
57
+ raise error.message
58
+ end
59
+ end
60
+
61
+ # if find element with id "err_m", indicate something wrong
62
+ begin
63
+ @waiter.until {@driver.find_element(id: 'err_m')}
64
+ err = @driver.find_element(id: 'err_m')
65
+ error_message = err.text
66
+ rescue
67
+ # if can't find that element, indicate something wrong happened but ->
68
+ # we don't know about that.
69
+ raise 'Unknown error happened in login'
70
+ end
71
+
72
+ if error_message.include? '您输入的帐号或密码不正确'
73
+ raise 'Account error'
74
+ elsif error_message.include? '网络繁忙'
75
+ raise 'Network busy, try later'
76
+ else
77
+ raise 'Unknown error happened in login'
78
+ end
79
+ end
80
+
81
+ def gtk
82
+ # 获取gtk
83
+ @skey = @driver.manage.cookie_named 'p_skey'
84
+ hashes = 5381
85
+ @skey[:value].each_char do |c|
86
+ hashes += (hashes << 5) + c.ord
87
+ end
88
+ hashes & 0x7fffffff
89
+ end
90
+
91
+ def cookies
92
+ @driver.manage.all_cookies
93
+ end
94
+
95
+ def close
96
+ @driver.close if @driver
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,3 @@
1
+ class Qzone
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'qzone/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "qzone"
8
+ spec.version = Qzone::VERSION
9
+ spec.authors = ["tuitu"]
10
+ spec.email = ["1965972530@qq.com"]
11
+
12
+ spec.summary = %q{"a spider aimed for qzone"}
13
+ spec.description = %q{"provide apis to crawl data from qzone"}
14
+ spec.homepage = "https://github.com/hellotuitu/qzone"
15
+ spec.license = "MIT"
16
+
17
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
18
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
19
+ if spec.respond_to?(:metadata)
20
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
21
+ else
22
+ raise "RubyGems 2.0 or newer is required to protect against " \
23
+ "public gem pushes."
24
+ end
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
27
+ f.match(%r{^(test|spec|features)/})
28
+ end
29
+ spec.bindir = "exe"
30
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
31
+ spec.require_paths = ["lib"]
32
+
33
+ spec.add_development_dependency "bundler", "~> 1.13"
34
+ spec.add_development_dependency "rake", "~> 10.0"
35
+ spec.add_development_dependency "selenium-webdriver"
36
+ spec.add_development_dependency "mechanize"
37
+ spec.add_development_dependency "json"
38
+ end
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: qzone
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - tuitu
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-06-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.13'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: selenium-webdriver
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mechanize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: json
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: '"provide apis to crawl data from qzone"'
84
+ email:
85
+ - 1965972530@qq.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - LICENSE.txt
92
+ - README.md
93
+ - Rakefile
94
+ - bin/console
95
+ - bin/setup
96
+ - lib/qzone.rb
97
+ - lib/qzone/interface.rb
98
+ - lib/qzone/qzone_login.rb
99
+ - lib/qzone/version.rb
100
+ - qzone.gemspec
101
+ homepage: https://github.com/hellotuitu/qzone
102
+ licenses:
103
+ - MIT
104
+ metadata:
105
+ allowed_push_host: https://rubygems.org
106
+ post_install_message:
107
+ rdoc_options: []
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ version: '0'
115
+ required_rubygems_version: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ requirements: []
121
+ rubyforge_project:
122
+ rubygems_version: 2.5.1
123
+ signing_key:
124
+ specification_version: 4
125
+ summary: '"a spider aimed for qzone"'
126
+ test_files: []