twitter_with_auto_pagination 0.8.2 → 0.8.3
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21617993d79d35c184e0a5eb3bd68cedf7f9fe36
|
4
|
+
data.tar.gz: 017b1a7d412e514c697f4296292fc7075bbff88d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c7667ea3c41a50167aa7765ca7bce2a6d6a321ea71f417a44f57fb213b258d6425190148fe944d371a41b66165e1e76814694c99a4aee1d290425b1c53ae2d93
|
7
|
+
data.tar.gz: 49ac0e5a907bf133ed1a8ab304873c5efae18c525b4a1b56e7291178a459bfdf9f6b3989e5d8acecbc3c31316488f85280a89ca83c8bb14ec9d3763c070240f6
|
@@ -43,8 +43,123 @@ module TwitterWithAutoPagination
|
|
43
43
|
frequency.select { |_, v| 2 < v }.sort_by { |_, v| -v }.slice(0, limit).to_h
|
44
44
|
end
|
45
45
|
|
46
|
-
|
47
|
-
|
46
|
+
alias tweet_clusters clusters_belong_to
|
47
|
+
|
48
|
+
def list_clusters(user, each_member: 300, total_member: 1000, rate: 0.3, limit: 10, debug: false)
|
49
|
+
lists = memberships(user).sort_by { |li| li.member_count }
|
50
|
+
puts "lists: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
51
|
+
return {} if lists.empty?
|
52
|
+
|
53
|
+
while lists.size > 200
|
54
|
+
percentile25 = ((lists.length * 0.25).ceil) - 1
|
55
|
+
percentile75 = ((lists.length * 0.75).ceil) - 1
|
56
|
+
lists = lists[percentile25..percentile75]
|
57
|
+
puts "lists sliced by 25-75 percentile: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
58
|
+
end
|
59
|
+
|
60
|
+
list_special_words = %w()
|
61
|
+
list_exclude_words = %w(list people met)
|
62
|
+
|
63
|
+
words = lists.map { |li| li.full_name.split('/')[1].split('-') }.flatten.delete_if { |w| w.size < 2 || list_exclude_words.include?(w) }.
|
64
|
+
each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.select { |_, v| (10 < lists.size ? 1 : 0) < v }.sort_by { |k, v| [-v, -k.size] }
|
65
|
+
|
66
|
+
puts "words: #{words.slice(0, 10)}" if debug
|
67
|
+
return {} if words.empty?
|
68
|
+
|
69
|
+
word = words[0][0]
|
70
|
+
puts "word: #{word}" if debug
|
71
|
+
|
72
|
+
# TODO: listsの数が小さすぎる場合はwordを増やす
|
73
|
+
lists = lists.select { |li| li.full_name.split('/')[1].include?(word) }
|
74
|
+
puts "lists include specified word: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
75
|
+
return {} if lists.empty?
|
76
|
+
|
77
|
+
3.times do
|
78
|
+
_lists = lists.select { |li| (10 < lists.size ? 10 : 0) < li.member_count && li.member_count < each_member }
|
79
|
+
if _lists.size > 2 || _lists.size == lists.size
|
80
|
+
lists = _lists
|
81
|
+
break
|
82
|
+
else
|
83
|
+
each_member *= 1.25
|
84
|
+
end
|
85
|
+
end
|
86
|
+
puts "lists limited by each member #{each_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
87
|
+
return {} if lists.empty?
|
88
|
+
|
89
|
+
3.times do
|
90
|
+
_lists = lists.select.with_index { |_, i| lists[0..i].map { |li| li.member_count }.sum < total_member }
|
91
|
+
if _lists.any?
|
92
|
+
lists = _lists
|
93
|
+
break
|
94
|
+
else
|
95
|
+
total_member *= 1.25
|
96
|
+
end
|
97
|
+
end
|
98
|
+
puts "lists limited by total members #{total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
|
99
|
+
return {} if lists.empty?
|
100
|
+
|
101
|
+
members = lists.map do |li|
|
102
|
+
begin
|
103
|
+
list_members(li.id)
|
104
|
+
rescue => e
|
105
|
+
puts "#{e.class}: #{e.message} #{li.id} #{li.full_name} #{li.mode}" if debug
|
106
|
+
nil
|
107
|
+
end
|
108
|
+
end.compact.flatten
|
109
|
+
puts "candidate members: #{members.size}" if debug
|
110
|
+
return {} if members.empty?
|
111
|
+
|
112
|
+
3.times do
|
113
|
+
_members = members.each_with_object(Hash.new(0)) { |member, memo| memo[member] += 1 }.select { |_, v| lists.size * rate < v }.keys
|
114
|
+
if _members.size > 100
|
115
|
+
members = _members
|
116
|
+
break
|
117
|
+
else
|
118
|
+
rate += 0.1
|
119
|
+
end
|
120
|
+
end
|
121
|
+
puts "members included multi lists #{rate}: #{members.size}" if debug
|
122
|
+
|
123
|
+
require 'mecab'
|
124
|
+
|
125
|
+
profile_special_words = %w()
|
126
|
+
profile_exclude_words = %w(in at of my no er the and for inc Inc com gmail 好き こと 最近 情報 さん ちゃん くん 発言 関係 もの 活動 見解 所属 組織 連絡 大好き サイト ブログ つぶやき こちら アカ アカウント イベント フォロー)
|
127
|
+
|
128
|
+
descriptions = members.map { |m| m.description.remove(URI.regexp) }
|
129
|
+
|
130
|
+
candidates, remains = descriptions.partition { |desc| desc.scan('/').size > 2 }
|
131
|
+
slash_freq = count_by_word_with_delim(candidates, delim: '/')
|
132
|
+
puts "words splitted by /: #{slash_freq.to_a.slice(0, 10)}" if debug
|
133
|
+
|
134
|
+
candidates, remains = remains.partition { |desc| desc.scan('|').size > 2 }
|
135
|
+
pipe_freq = count_by_word_with_delim(candidates, delim: '|')
|
136
|
+
puts "words splitted by |: #{pipe_freq.to_a.slice(0, 10)}" if debug
|
137
|
+
|
138
|
+
noun_freq = count_by_word_with_tagger(remains, exclude_words: profile_exclude_words)
|
139
|
+
puts "words with nouns added: #{noun_freq.to_a.slice(0, 10)}" if debug
|
140
|
+
|
141
|
+
slash_freq.merge(pipe_freq) { |_, old, neww| old + neww }.merge(noun_freq) { |_, old, neww| old + neww }.sort_by { |k, v| [-v, -k.size] }.slice(0, limit)
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
|
146
|
+
def count_by_word_with_delim(texts, delim:)
|
147
|
+
texts.map { |t| t.split(delim) }.flatten.
|
148
|
+
map(&:strip).
|
149
|
+
delete_if { |w| w.empty? || w.size < 2 || 5 < w.size }.
|
150
|
+
each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.
|
151
|
+
sort_by { |k, v| [-v, -k.size] }.to_h
|
152
|
+
end
|
153
|
+
|
154
|
+
def count_by_word_with_tagger(texts, tagger: nil, exclude_words: [])
|
155
|
+
tagger = MeCab::Tagger.new("-d #{`mecab-config --dicdir`.chomp}/mecab-ipadic-neologd/") if tagger.nil?
|
156
|
+
nouns = tagger.parse(texts.join(' ')).split("\n").
|
157
|
+
select { |line| line.include?('名詞') }.
|
158
|
+
map { |line| line.split("\t")[0] }.
|
159
|
+
delete_if { |w| w.empty? || w.size < 2 || 5 < w.size || exclude_words.include?(w) }
|
160
|
+
|
161
|
+
nouns.each_with_object(Hash.new(0)) { |noun, memo| memo[noun] += 1 }.
|
162
|
+
sort_by { |k, v| [-v, -k.size] }.to_h
|
48
163
|
end
|
49
164
|
end
|
50
165
|
end
|
@@ -14,6 +14,15 @@ module TwitterWithAutoPagination
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
end
|
17
|
+
|
18
|
+
def list_members(*args)
|
19
|
+
options = {count: 5000, skip_status: 1, cursor: -1}.merge(args.extract_options!)
|
20
|
+
instrument(__method__, nil, options) do
|
21
|
+
fetch_cache_or_call_api(__method__, args[0], options) do
|
22
|
+
collect_with_cursor(method(__method__).super_method, *args, options)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
17
26
|
end
|
18
27
|
end
|
19
28
|
end
|
@@ -108,7 +108,7 @@ module TwitterWithAutoPagination
|
|
108
108
|
while (next_cursor = last_response[:next_cursor]) && next_cursor != 0
|
109
109
|
options[:cursor] = next_cursor
|
110
110
|
last_response = call_api(method_obj, *args, options).attrs
|
111
|
-
return_data += (last_response[:users] || last_response[:ids])
|
111
|
+
return_data += (last_response[:users] || last_response[:ids] || last_response[:lists])
|
112
112
|
end
|
113
113
|
|
114
114
|
return_data
|
@@ -122,6 +122,8 @@ module TwitterWithAutoPagination
|
|
122
122
|
"hash-str#{delim}#{credentials_hash}"
|
123
123
|
when method_name == :search
|
124
124
|
"str#{delim}#{user.to_s}"
|
125
|
+
when method_name == :list_members
|
126
|
+
"list_id#{delim}#{user.to_s}"
|
125
127
|
when method_name == :mentions_timeline
|
126
128
|
"#{user.kind_of?(Integer) ? 'id' : 'sn'}#{delim}#{user.to_s}"
|
127
129
|
when method_name == :home_timeline
|
@@ -6,6 +6,7 @@ Gem::Specification.new do |spec|
|
|
6
6
|
spec.add_dependency 'activesupport'
|
7
7
|
spec.add_dependency 'hashie'
|
8
8
|
spec.add_dependency 'parallel'
|
9
|
+
spec.add_dependency 'mecab'
|
9
10
|
|
10
11
|
spec.add_development_dependency 'bundler'
|
11
12
|
|
@@ -22,5 +23,5 @@ Gem::Specification.new do |spec|
|
|
22
23
|
spec.required_ruby_version = '>= 2.3'
|
23
24
|
spec.summary = spec.description
|
24
25
|
spec.test_files = Dir.glob('spec/**/*')
|
25
|
-
spec.version = '0.8.
|
26
|
+
spec.version = '0.8.3'
|
26
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twitter_with_auto_pagination
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shinohara Teruki
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: twitter
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: mecab
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: bundler
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|