twitter_with_auto_pagination 0.8.6 → 0.8.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5dc8b7e411aacd1368c185e91b0ce3639a98a7fa
4
- data.tar.gz: 82c7e2b1e7ccffb6fd834227af7a723c4f11e2f8
3
+ metadata.gz: 56a94b64a5ad2a5729d3c2ad616201f1e067b130
4
+ data.tar.gz: 238b267742d48a7e1f0c7221b55f10efcae83c5c
5
5
  SHA512:
6
- metadata.gz: 978e0d5023163d376eb8b10c56ef10ac542f7e7d1e29656abbc20e23ebf5cfcf86cad6480c3e9b5273d12cbc1172fa77402ead345e2f10bab8fd193a8d298422
7
- data.tar.gz: 2c04bcac90233c24cc2ada868474a3a453406f8194ad09154b6e980ee1b54dd5b5ac9939c7add88d03762de75988d70683e3820526bab03313d63f1fb506a508
6
+ metadata.gz: fa63ca46d23ccebd336e048626050d29b3d75c1c8a31c8b504bd36fadfc8aab35cddeb10309b1d3eaa93980f39ec5d3c277fce924034c1eec05bf60d29c63831
7
+ data.tar.gz: 71f61eb0a34de144ed119914cf4e0bb39d486070647e02dce56ffa9797d95af3a952ddc7a03cfbeb2cf57a48df45745a2f0e4317e74f0f0f0455fca0cabfa991
@@ -56,7 +56,7 @@ module TwitterWithAutoPagination
56
56
  hashtags.each_with_object(Hash.new(0)) { |h, memo| memo[h] += 1 }.sort_by { |k, v| [-v, -k.size] }.slice(0, limit).to_h
57
57
  end
58
58
 
59
- def list_clusters(user, shrink: false, each_member: 300, total_member: 1000, rate: 0.3, limit: 10, debug: false)
59
+ def list_clusters(user, shrink: false, shrink_limit: 100, list_member: 300, total_member: 3000, total_list: 50, rate: 0.3, limit: 10, debug: false)
60
60
  begin
61
61
  require 'mecab'
62
62
  rescue => e
@@ -65,35 +65,32 @@ module TwitterWithAutoPagination
65
65
  end
66
66
 
67
67
  begin
68
- lists = memberships(user).sort_by { |li| li.member_count }
69
- rescue => e
70
- puts "#{e.class}: #{e.message} #{user.inspect}" if debug
68
+ lists = memberships(user, count: 500, call_limit: 2).sort_by { |li| li.member_count }
69
+ rescue Twitter::Error::ServiceUnavailable => e
70
+ puts "#{__method__}: #{e.class} #{e.message} #{user.inspect}" if debug
71
71
  lists = []
72
72
  end
73
73
  puts "lists: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
74
74
  return {} if lists.empty?
75
75
 
76
- while lists.size > 200
77
- percentile25 = ((lists.length * 0.25).ceil) - 1
78
- percentile75 = ((lists.length * 0.75).ceil) - 1
79
- lists = lists[percentile25..percentile75]
80
- puts "lists sliced by 25-75 percentile: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
81
- end if shrink
76
+ open('lists.txt', 'w') {|f| f.write lists.map(&:full_name).join("\n") } if debug
82
77
 
83
78
  list_special_words = %w()
84
- list_exclude_words1 = %r(list[0-9]*|people-ive-faved|twizard-magic-list|my-favstar-fm-list)
85
- list_exclude_words2 = %w(it list people met)
79
+ list_exclude_names = %r(list[0-9]*|people-ive-faved|twizard-magic-list|my-favstar-fm-list|timeline-list|conversationlist|who-i-met)
80
+ list_exclude_words = %w(it list people who met)
86
81
 
82
+ # リスト名を - で分割 -> 1文字の単語を除去 -> 出現頻度の降順でソート
87
83
  words = lists.map { |li| li.full_name.split('/')[1] }.
88
- select { |n| !n.match(list_exclude_words1) }.
84
+ select { |n| !n.match(list_exclude_names) }.
89
85
  map { |n| n.split('-') }.flatten.
90
- delete_if { |w| w.size < 2 || list_exclude_words2.include?(w) }.
86
+ delete_if { |w| w.size < 2 || list_exclude_words.include?(w) }.
91
87
  each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.
92
88
  sort_by { |k, v| [-v, -k.size] }
93
89
 
94
90
  puts "words: #{words.slice(0, 10)}" if debug
95
91
  return {} if words.empty?
96
92
 
93
+ # 出現頻度の高い単語を名前に含むリストを抽出
97
94
  _words = []
98
95
  lists =
99
96
  filter(lists, min: 2) do |li, i|
@@ -104,35 +101,59 @@ module TwitterWithAutoPagination
104
101
  puts "lists include #{_words.inspect}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
105
102
  return {} if lists.empty?
106
103
 
107
- _each_member = 0
104
+ # 中間の 25-75% のリストを抽出
105
+ while lists.size > shrink_limit
106
+ percentile25 = ((lists.length * 0.25).ceil) - 1
107
+ percentile75 = ((lists.length * 0.75).ceil) - 1
108
+ lists = lists[percentile25..percentile75]
109
+ puts "lists sliced by 25-75 percentile: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
110
+ end if shrink || lists.size > shrink_limit
111
+
112
+ # メンバー数がしきい値より少ないリストを抽出
113
+ _list_member = 0
114
+ _min_list_member = 10 < lists.size ? 10 : 0
108
115
  lists =
109
116
  filter(lists, min: 2) do |li, i|
110
- _each_member = each_member * (1.0 + 0.25 * i)
111
- (10 < lists.size ? 10 : 0) < li.member_count && li.member_count < _each_member
117
+ _list_member = list_member * (1.0 + 0.25 * i)
118
+ _min_list_member < li.member_count && li.member_count < _list_member
112
119
  end
113
- puts "lists limited by each member #{_each_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
120
+ puts "lists limited by list member #{_min_list_member}..#{_list_member.round}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
114
121
  return {} if lists.empty?
115
122
 
116
- _total_member = 0
117
- lists =
118
- filter(lists, min: 1) do |_, i|
119
- _total_member = total_member * (1.0 + 0.25 * i)
120
- lists[0..i].map { |li| li.member_count }.sum < _total_member
123
+ # トータルメンバー数がしきい値より少なくなるリストを抽出
124
+ _lists = []
125
+ lists.size.times do |i|
126
+ _lists = lists[0..(-1 - i)]
127
+ if _lists.map { |li| li.member_count }.sum < total_member
128
+ break
129
+ else
130
+ _lists = []
121
131
  end
122
- puts "lists limited by total members #{_total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
132
+ end
133
+ lists = _lists.empty? ? [lists[0]] : _lists
134
+ puts "lists limited by total members #{total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
135
+ return {} if lists.empty?
136
+
137
+ # リスト数がしきい値より少なくなるリストを抽出
138
+ if lists.size > total_list
139
+ lists = lists[0..(total_list - 1)]
140
+ end
141
+ puts "lists limited by total lists #{total_list}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
123
142
  return {} if lists.empty?
124
143
 
125
144
  members = lists.map do |li|
126
145
  begin
127
146
  list_members(li.id)
128
- rescue => e
129
- puts "#{e.class}: #{e.message} #{li.id} #{li.full_name} #{li.mode}" if debug
147
+ rescue Twitter::Error::NotFound => e
148
+ puts "#{__method__}: #{e.class} #{e.message} #{li.id} #{li.full_name} #{li.mode}" if debug
130
149
  nil
131
150
  end
132
151
  end.compact.flatten
133
152
  puts "candidate members: #{members.size}" if debug
134
153
  return {} if members.empty?
135
154
 
155
+ open('members.txt', 'w') {|f| f.write members.map{ |m| m.description.gsub(/\R/, ' ') }.join("\n") } if debug
156
+
136
157
  3.times do
137
158
  _members = members.each_with_object(Hash.new(0)) { |member, memo| memo[member] += 1 }.
138
159
  select { |_, v| lists.size * rate < v }.keys
@@ -140,14 +161,14 @@ module TwitterWithAutoPagination
140
161
  members = _members
141
162
  break
142
163
  else
143
- rate += 0.1
164
+ rate -= 0.05
144
165
  end
145
166
  end
146
- puts "members included multi lists #{rate}: #{members.size}" if debug
167
+ puts "members included multi lists #{rate.round(3)}: #{members.size}" if debug
147
168
 
148
169
 
149
170
  profile_special_words = %w()
150
- profile_exclude_words = %w(in at of my no er the and for inc Inc com gmail 好き こと 最近 情報 さん ちゃん くん 発言 関係 もの 活動 見解 所属 組織 連絡 大好き サイト ブログ つぶやき こちら アカ アカウント イベント フォロー)
171
+ profile_exclude_words = %w(in at of my no er the and for inc Inc com info gmail 好き こと 最近 連載 発売 依頼 情報 さん ちゃん くん 発言 関係 もの 活動 見解 所属 組織 代表 連絡 大好き サイト ブログ つぶやき 株式会社 こちら 届け お仕事 アカ アカウント ツイート たま ブロック 時間 お願い お願いします お願いいたします イベント フォロー)
151
172
 
152
173
  descriptions = members.map { |m| m.description.remove(URI.regexp) }
153
174
 
@@ -42,7 +42,7 @@ module TwitterWithAutoPagination
42
42
  rescue Twitter::Error::NotFound => e
43
43
  e.message == 'No user matches for specified terms.' ? [] : (raise e)
44
44
  rescue => e
45
- logger.warn "#{__method__} #{args.inspect} #{e.class} #{e.message}"
45
+ logger.warn "#{__method__}: #{e.class} #{e.message} #{args.inspect}"
46
46
  raise e
47
47
  end
48
48
 
@@ -34,14 +34,14 @@ module TwitterWithAutoPagination
34
34
  begin
35
35
  instrument('request', nil, options) { method.call(*args, api_options) }
36
36
  rescue Twitter::Error::TooManyRequests => e
37
- logger.warn "#{__method__}: #{options.inspect} #{e.class} Retry after #{e.rate_limit.reset_in} seconds."
37
+ logger.warn "#{__method__}: #{e.class} #{e.message} Retry after #{e.rate_limit.reset_in} seconds. #{options.inspect}"
38
38
  raise e
39
39
  rescue Twitter::Error::ServiceUnavailable, Twitter::Error::InternalServerError,
40
40
  Twitter::Error::Forbidden, Twitter::Error::NotFound => e
41
- logger.warn "#{__method__}: #{options.inspect} #{e.class} #{e.message}"
41
+ logger.warn "#{__method__}: #{e.class} #{e.message} #{options.inspect}"
42
42
  raise e
43
43
  rescue => e
44
- logger.warn "CATCH ME! #{__method__}: #{options.inspect} #{e.class} #{e.message}"
44
+ logger.warn "CATCH ME! #{__method__}: #{e.class} #{e.message} #{options.inspect}"
45
45
  raise e
46
46
  end
47
47
  end
@@ -71,10 +71,11 @@ module TwitterWithAutoPagination
71
71
  # friends, followers
72
72
  def collect_with_cursor(method, *args)
73
73
  options = args.extract_options!
74
+ call_limit = options.delete(:call_limit) || 30
74
75
  return_data = []
75
76
  call_num = 0
76
77
 
77
- while call_num < 30
78
+ while call_num < call_limit
78
79
  last_response = call_api(method, *args, options).attrs
79
80
  call_num += 1
80
81
  return_data += (last_response[:users] || last_response[:ids] || last_response[:lists])
@@ -22,5 +22,5 @@ Gem::Specification.new do |spec|
22
22
  spec.required_ruby_version = '>= 2.3'
23
23
  spec.summary = spec.description
24
24
  spec.test_files = Dir.glob('spec/**/*')
25
- spec.version = '0.8.6'
25
+ spec.version = '0.8.7'
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_with_auto_pagination
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.6
4
+ version: 0.8.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shinohara Teruki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-11 00:00:00.000000000 Z
11
+ date: 2016-10-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: twitter