twitter_with_auto_pagination 0.8.2 → 0.8.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35aa309dc97a81e62812edb59c1fe61c62da69b9
4
- data.tar.gz: 9c3a07ab41c467685e144edadda32c3c399b1b47
3
+ metadata.gz: 21617993d79d35c184e0a5eb3bd68cedf7f9fe36
4
+ data.tar.gz: 017b1a7d412e514c697f4296292fc7075bbff88d
5
5
  SHA512:
6
- metadata.gz: 55f1e531dc9dbaa174cff7f1a13dbec4a8e6ea85c920b58af0525048efcbbbee3746444790fb3120ea7e24fab647db4e2180a0a218811c0f0cf5421f59ca70b3
7
- data.tar.gz: 52859557ffae6a65e7fc4c56900aab1d0dc234a4641a1ed22db744e6a2ea70a1e137de8fcddebb8239fe5a3cdaadde68d267387f5939ce5e111f35d38c6175aa
6
+ metadata.gz: c7667ea3c41a50167aa7765ca7bce2a6d6a321ea71f417a44f57fb213b258d6425190148fe944d371a41b66165e1e76814694c99a4aee1d290425b1c53ae2d93
7
+ data.tar.gz: 49ac0e5a907bf133ed1a8ab304873c5efae18c525b4a1b56e7291178a459bfdf9f6b3989e5d8acecbc3c31316488f85280a89ca83c8bb14ec9d3763c070240f6
@@ -43,8 +43,123 @@ module TwitterWithAutoPagination
43
43
  frequency.select { |_, v| 2 < v }.sort_by { |_, v| -v }.slice(0, limit).to_h
44
44
  end
45
45
 
46
- def clusters_assigned_to
47
- raise NotImplementedError
46
+ alias tweet_clusters clusters_belong_to
47
+
48
+ def list_clusters(user, each_member: 300, total_member: 1000, rate: 0.3, limit: 10, debug: false)
49
+ lists = memberships(user).sort_by { |li| li.member_count }
50
+ puts "lists: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
51
+ return {} if lists.empty?
52
+
53
+ while lists.size > 200
54
+ percentile25 = ((lists.length * 0.25).ceil) - 1
55
+ percentile75 = ((lists.length * 0.75).ceil) - 1
56
+ lists = lists[percentile25..percentile75]
57
+ puts "lists sliced by 25-75 percentile: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
58
+ end
59
+
60
+ list_special_words = %w()
61
+ list_exclude_words = %w(list people met)
62
+
63
+ words = lists.map { |li| li.full_name.split('/')[1].split('-') }.flatten.delete_if { |w| w.size < 2 || list_exclude_words.include?(w) }.
64
+ each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.select { |_, v| (10 < lists.size ? 1 : 0) < v }.sort_by { |k, v| [-v, -k.size] }
65
+
66
+ puts "words: #{words.slice(0, 10)}" if debug
67
+ return {} if words.empty?
68
+
69
+ word = words[0][0]
70
+ puts "word: #{word}" if debug
71
+
72
+ # TODO: listsの数が小さすぎる場合はwordを増やす
73
+ lists = lists.select { |li| li.full_name.split('/')[1].include?(word) }
74
+ puts "lists include specified word: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
75
+ return {} if lists.empty?
76
+
77
+ 3.times do
78
+ _lists = lists.select { |li| (10 < lists.size ? 10 : 0) < li.member_count && li.member_count < each_member }
79
+ if _lists.size > 2 || _lists.size == lists.size
80
+ lists = _lists
81
+ break
82
+ else
83
+ each_member *= 1.25
84
+ end
85
+ end
86
+ puts "lists limited by each member #{each_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
87
+ return {} if lists.empty?
88
+
89
+ 3.times do
90
+ _lists = lists.select.with_index { |_, i| lists[0..i].map { |li| li.member_count }.sum < total_member }
91
+ if _lists.any?
92
+ lists = _lists
93
+ break
94
+ else
95
+ total_member *= 1.25
96
+ end
97
+ end
98
+ puts "lists limited by total members #{total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
99
+ return {} if lists.empty?
100
+
101
+ members = lists.map do |li|
102
+ begin
103
+ list_members(li.id)
104
+ rescue => e
105
+ puts "#{e.class}: #{e.message} #{li.id} #{li.full_name} #{li.mode}" if debug
106
+ nil
107
+ end
108
+ end.compact.flatten
109
+ puts "candidate members: #{members.size}" if debug
110
+ return {} if members.empty?
111
+
112
+ 3.times do
113
+ _members = members.each_with_object(Hash.new(0)) { |member, memo| memo[member] += 1 }.select { |_, v| lists.size * rate < v }.keys
114
+ if _members.size > 100
115
+ members = _members
116
+ break
117
+ else
118
+ rate += 0.1
119
+ end
120
+ end
121
+ puts "members included multi lists #{rate}: #{members.size}" if debug
122
+
123
+ require 'mecab'
124
+
125
+ profile_special_words = %w()
126
+ profile_exclude_words = %w(in at of my no er the and for inc Inc com gmail 好き こと 最近 情報 さん ちゃん くん 発言 関係 もの 活動 見解 所属 組織 連絡 大好き サイト ブログ つぶやき こちら アカ アカウント イベント フォロー)
127
+
128
+ descriptions = members.map { |m| m.description.remove(URI.regexp) }
129
+
130
+ candidates, remains = descriptions.partition { |desc| desc.scan('/').size > 2 }
131
+ slash_freq = count_by_word_with_delim(candidates, delim: '/')
132
+ puts "words splitted by /: #{slash_freq.to_a.slice(0, 10)}" if debug
133
+
134
+ candidates, remains = remains.partition { |desc| desc.scan('|').size > 2 }
135
+ pipe_freq = count_by_word_with_delim(candidates, delim: '|')
136
+ puts "words splitted by |: #{pipe_freq.to_a.slice(0, 10)}" if debug
137
+
138
+ noun_freq = count_by_word_with_tagger(remains, exclude_words: profile_exclude_words)
139
+ puts "words with nouns added: #{noun_freq.to_a.slice(0, 10)}" if debug
140
+
141
+ slash_freq.merge(pipe_freq) { |_, old, neww| old + neww }.merge(noun_freq) { |_, old, neww| old + neww }.sort_by { |k, v| [-v, -k.size] }.slice(0, limit)
142
+ end
143
+
144
+ private
145
+
146
+ def count_by_word_with_delim(texts, delim:)
147
+ texts.map { |t| t.split(delim) }.flatten.
148
+ map(&:strip).
149
+ delete_if { |w| w.empty? || w.size < 2 || 5 < w.size }.
150
+ each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.
151
+ sort_by { |k, v| [-v, -k.size] }.to_h
152
+ end
153
+
154
+ def count_by_word_with_tagger(texts, tagger: nil, exclude_words: [])
155
+ tagger = MeCab::Tagger.new("-d #{`mecab-config --dicdir`.chomp}/mecab-ipadic-neologd/") if tagger.nil?
156
+ nouns = tagger.parse(texts.join(' ')).split("\n").
157
+ select { |line| line.include?('名詞') }.
158
+ map { |line| line.split("\t")[0] }.
159
+ delete_if { |w| w.empty? || w.size < 2 || 5 < w.size || exclude_words.include?(w) }
160
+
161
+ nouns.each_with_object(Hash.new(0)) { |noun, memo| memo[noun] += 1 }.
162
+ sort_by { |k, v| [-v, -k.size] }.to_h
48
163
  end
49
164
  end
50
165
  end
@@ -14,6 +14,15 @@ module TwitterWithAutoPagination
14
14
  end
15
15
  end
16
16
  end
17
+
18
+ def list_members(*args)
19
+ options = {count: 5000, skip_status: 1, cursor: -1}.merge(args.extract_options!)
20
+ instrument(__method__, nil, options) do
21
+ fetch_cache_or_call_api(__method__, args[0], options) do
22
+ collect_with_cursor(method(__method__).super_method, *args, options)
23
+ end
24
+ end
25
+ end
17
26
  end
18
27
  end
19
28
  end
@@ -108,7 +108,7 @@ module TwitterWithAutoPagination
108
108
  while (next_cursor = last_response[:next_cursor]) && next_cursor != 0
109
109
  options[:cursor] = next_cursor
110
110
  last_response = call_api(method_obj, *args, options).attrs
111
- return_data += (last_response[:users] || last_response[:ids])
111
+ return_data += (last_response[:users] || last_response[:ids] || last_response[:lists])
112
112
  end
113
113
 
114
114
  return_data
@@ -122,6 +122,8 @@ module TwitterWithAutoPagination
122
122
  "hash-str#{delim}#{credentials_hash}"
123
123
  when method_name == :search
124
124
  "str#{delim}#{user.to_s}"
125
+ when method_name == :list_members
126
+ "list_id#{delim}#{user.to_s}"
125
127
  when method_name == :mentions_timeline
126
128
  "#{user.kind_of?(Integer) ? 'id' : 'sn'}#{delim}#{user.to_s}"
127
129
  when method_name == :home_timeline
@@ -6,6 +6,7 @@ Gem::Specification.new do |spec|
6
6
  spec.add_dependency 'activesupport'
7
7
  spec.add_dependency 'hashie'
8
8
  spec.add_dependency 'parallel'
9
+ spec.add_dependency 'mecab'
9
10
 
10
11
  spec.add_development_dependency 'bundler'
11
12
 
@@ -22,5 +23,5 @@ Gem::Specification.new do |spec|
22
23
  spec.required_ruby_version = '>= 2.3'
23
24
  spec.summary = spec.description
24
25
  spec.test_files = Dir.glob('spec/**/*')
25
- spec.version = '0.8.2'
26
+ spec.version = '0.8.3'
26
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_with_auto_pagination
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ version: 0.8.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shinohara Teruki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-01 00:00:00.000000000 Z
11
+ date: 2016-10-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: twitter
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mecab
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: bundler
71
85
  requirement: !ruby/object:Gem::Requirement