twitter_with_auto_pagination 0.8.6 → 0.8.7
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56a94b64a5ad2a5729d3c2ad616201f1e067b130
|
4
|
+
data.tar.gz: 238b267742d48a7e1f0c7221b55f10efcae83c5c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa63ca46d23ccebd336e048626050d29b3d75c1c8a31c8b504bd36fadfc8aab35cddeb10309b1d3eaa93980f39ec5d3c277fce924034c1eec05bf60d29c63831
|
7
|
+
data.tar.gz: 71f61eb0a34de144ed119914cf4e0bb39d486070647e02dce56ffa9797d95af3a952ddc7a03cfbeb2cf57a48df45745a2f0e4317e74f0f0f0455fca0cabfa991
|
@@ -56,7 +56,7 @@ module TwitterWithAutoPagination
|
|
56
56
|
hashtags.each_with_object(Hash.new(0)) { |h, memo| memo[h] += 1 }.sort_by { |k, v| [-v, -k.size] }.slice(0, limit).to_h
|
57
57
|
end
|
58
58
|
|
59
|
-
def list_clusters(user, shrink: false,
|
59
|
+
def list_clusters(user, shrink: false, shrink_limit: 100, list_member: 300, total_member: 3000, total_list: 50, rate: 0.3, limit: 10, debug: false)
|
60
60
|
begin
|
61
61
|
require 'mecab'
|
62
62
|
rescue => e
|
@@ -65,35 +65,32 @@ module TwitterWithAutoPagination
|
|
65
65
|
end
|
66
66
|
|
67
67
|
begin
|
68
|
-
lists = memberships(user).sort_by { |li| li.member_count }
|
69
|
-
rescue => e
|
70
|
-
puts "#{e.class}
|
68
|
+
lists = memberships(user, count: 500, call_limit: 2).sort_by { |li| li.member_count }
|
69
|
+
rescue Twitter::Error::ServiceUnavailable => e
|
70
|
+
puts "#{__method__}: #{e.class} #{e.message} #{user.inspect}" if debug
|
71
71
|
lists = []
|
72
72
|
end
|
73
73
|
puts "lists: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
74
74
|
return {} if lists.empty?
|
75
75
|
|
76
|
-
|
77
|
-
percentile25 = ((lists.length * 0.25).ceil) - 1
|
78
|
-
percentile75 = ((lists.length * 0.75).ceil) - 1
|
79
|
-
lists = lists[percentile25..percentile75]
|
80
|
-
puts "lists sliced by 25-75 percentile: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
81
|
-
end if shrink
|
76
|
+
open('lists.txt', 'w') {|f| f.write lists.map(&:full_name).join("\n") } if debug
|
82
77
|
|
83
78
|
list_special_words = %w()
|
84
|
-
|
85
|
-
|
79
|
+
list_exclude_names = %r(list[0-9]*|people-ive-faved|twizard-magic-list|my-favstar-fm-list|timeline-list|conversationlist|who-i-met)
|
80
|
+
list_exclude_words = %w(it list people who met)
|
86
81
|
|
82
|
+
# リスト名を - で分割 -> 1文字の単語を除去 -> 出現頻度の降順でソート
|
87
83
|
words = lists.map { |li| li.full_name.split('/')[1] }.
|
88
|
-
select { |n| !n.match(
|
84
|
+
select { |n| !n.match(list_exclude_names) }.
|
89
85
|
map { |n| n.split('-') }.flatten.
|
90
|
-
delete_if { |w| w.size < 2 ||
|
86
|
+
delete_if { |w| w.size < 2 || list_exclude_words.include?(w) }.
|
91
87
|
each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.
|
92
88
|
sort_by { |k, v| [-v, -k.size] }
|
93
89
|
|
94
90
|
puts "words: #{words.slice(0, 10)}" if debug
|
95
91
|
return {} if words.empty?
|
96
92
|
|
93
|
+
# 出現頻度の高い単語を名前に含むリストを抽出
|
97
94
|
_words = []
|
98
95
|
lists =
|
99
96
|
filter(lists, min: 2) do |li, i|
|
@@ -104,35 +101,59 @@ module TwitterWithAutoPagination
|
|
104
101
|
puts "lists include #{_words.inspect}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
105
102
|
return {} if lists.empty?
|
106
103
|
|
107
|
-
|
104
|
+
# 中間の 25-75% のリストを抽出
|
105
|
+
while lists.size > shrink_limit
|
106
|
+
percentile25 = ((lists.length * 0.25).ceil) - 1
|
107
|
+
percentile75 = ((lists.length * 0.75).ceil) - 1
|
108
|
+
lists = lists[percentile25..percentile75]
|
109
|
+
puts "lists sliced by 25-75 percentile: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
110
|
+
end if shrink || lists.size > shrink_limit
|
111
|
+
|
112
|
+
# メンバー数がしきい値より少ないリストを抽出
|
113
|
+
_list_member = 0
|
114
|
+
_min_list_member = 10 < lists.size ? 10 : 0
|
108
115
|
lists =
|
109
116
|
filter(lists, min: 2) do |li, i|
|
110
|
-
|
111
|
-
|
117
|
+
_list_member = list_member * (1.0 + 0.25 * i)
|
118
|
+
_min_list_member < li.member_count && li.member_count < _list_member
|
112
119
|
end
|
113
|
-
puts "lists limited by
|
120
|
+
puts "lists limited by list member #{_min_list_member}..#{_list_member.round}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
114
121
|
return {} if lists.empty?
|
115
122
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
123
|
+
# トータルメンバー数がしきい値より少なくなるリストを抽出
|
124
|
+
_lists = []
|
125
|
+
lists.size.times do |i|
|
126
|
+
_lists = lists[0..(-1 - i)]
|
127
|
+
if _lists.map { |li| li.member_count }.sum < total_member
|
128
|
+
break
|
129
|
+
else
|
130
|
+
_lists = []
|
121
131
|
end
|
122
|
-
|
132
|
+
end
|
133
|
+
lists = _lists.empty? ? [lists[0]] : _lists
|
134
|
+
puts "lists limited by total members #{total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
135
|
+
return {} if lists.empty?
|
136
|
+
|
137
|
+
# リスト数がしきい値より少なくなるリストを抽出
|
138
|
+
if lists.size > total_list
|
139
|
+
lists = lists[0..(total_list - 1)]
|
140
|
+
end
|
141
|
+
puts "lists limited by total lists #{total_list}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
123
142
|
return {} if lists.empty?
|
124
143
|
|
125
144
|
members = lists.map do |li|
|
126
145
|
begin
|
127
146
|
list_members(li.id)
|
128
|
-
rescue => e
|
129
|
-
puts "#{e.class}
|
147
|
+
rescue Twitter::Error::NotFound => e
|
148
|
+
puts "#{__method__}: #{e.class} #{e.message} #{li.id} #{li.full_name} #{li.mode}" if debug
|
130
149
|
nil
|
131
150
|
end
|
132
151
|
end.compact.flatten
|
133
152
|
puts "candidate members: #{members.size}" if debug
|
134
153
|
return {} if members.empty?
|
135
154
|
|
155
|
+
open('members.txt', 'w') {|f| f.write members.map{ |m| m.description.gsub(/\R/, ' ') }.join("\n") } if debug
|
156
|
+
|
136
157
|
3.times do
|
137
158
|
_members = members.each_with_object(Hash.new(0)) { |member, memo| memo[member] += 1 }.
|
138
159
|
select { |_, v| lists.size * rate < v }.keys
|
@@ -140,14 +161,14 @@ module TwitterWithAutoPagination
|
|
140
161
|
members = _members
|
141
162
|
break
|
142
163
|
else
|
143
|
-
rate
|
164
|
+
rate -= 0.05
|
144
165
|
end
|
145
166
|
end
|
146
|
-
puts "members included multi lists #{rate}: #{members.size}" if debug
|
167
|
+
puts "members included multi lists #{rate.round(3)}: #{members.size}" if debug
|
147
168
|
|
148
169
|
|
149
170
|
profile_special_words = %w()
|
150
|
-
profile_exclude_words = %w(in at of my no er the and for inc Inc com gmail 好き こと 最近 情報 さん ちゃん くん 発言 関係 もの 活動 見解 所属 組織 連絡 大好き サイト ブログ つぶやき こちら アカ アカウント イベント フォロー)
|
171
|
+
profile_exclude_words = %w(in at of my no er the and for inc Inc com info gmail 好き こと 最近 連載 発売 依頼 情報 さん ちゃん くん 発言 関係 もの 活動 見解 所属 組織 代表 連絡 大好き サイト ブログ つぶやき 株式会社 こちら 届け お仕事 アカ アカウント ツイート たま ブロック 時間 お願い お願いします お願いいたします イベント フォロー)
|
151
172
|
|
152
173
|
descriptions = members.map { |m| m.description.remove(URI.regexp) }
|
153
174
|
|
@@ -42,7 +42,7 @@ module TwitterWithAutoPagination
|
|
42
42
|
rescue Twitter::Error::NotFound => e
|
43
43
|
e.message == 'No user matches for specified terms.' ? [] : (raise e)
|
44
44
|
rescue => e
|
45
|
-
logger.warn "#{__method__} #{
|
45
|
+
logger.warn "#{__method__}: #{e.class} #{e.message} #{args.inspect}"
|
46
46
|
raise e
|
47
47
|
end
|
48
48
|
|
@@ -34,14 +34,14 @@ module TwitterWithAutoPagination
|
|
34
34
|
begin
|
35
35
|
instrument('request', nil, options) { method.call(*args, api_options) }
|
36
36
|
rescue Twitter::Error::TooManyRequests => e
|
37
|
-
logger.warn "#{__method__}: #{
|
37
|
+
logger.warn "#{__method__}: #{e.class} #{e.message} Retry after #{e.rate_limit.reset_in} seconds. #{options.inspect}"
|
38
38
|
raise e
|
39
39
|
rescue Twitter::Error::ServiceUnavailable, Twitter::Error::InternalServerError,
|
40
40
|
Twitter::Error::Forbidden, Twitter::Error::NotFound => e
|
41
|
-
logger.warn "#{__method__}: #{
|
41
|
+
logger.warn "#{__method__}: #{e.class} #{e.message} #{options.inspect}"
|
42
42
|
raise e
|
43
43
|
rescue => e
|
44
|
-
logger.warn "CATCH ME! #{__method__}: #{
|
44
|
+
logger.warn "CATCH ME! #{__method__}: #{e.class} #{e.message} #{options.inspect}"
|
45
45
|
raise e
|
46
46
|
end
|
47
47
|
end
|
@@ -71,10 +71,11 @@ module TwitterWithAutoPagination
|
|
71
71
|
# friends, followers
|
72
72
|
def collect_with_cursor(method, *args)
|
73
73
|
options = args.extract_options!
|
74
|
+
call_limit = options.delete(:call_limit) || 30
|
74
75
|
return_data = []
|
75
76
|
call_num = 0
|
76
77
|
|
77
|
-
while call_num <
|
78
|
+
while call_num < call_limit
|
78
79
|
last_response = call_api(method, *args, options).attrs
|
79
80
|
call_num += 1
|
80
81
|
return_data += (last_response[:users] || last_response[:ids] || last_response[:lists])
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_with_auto_pagination
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shinohara Teruki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: twitter
|