twitter_with_auto_pagination 0.8.3 → 0.8.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 21617993d79d35c184e0a5eb3bd68cedf7f9fe36
4
- data.tar.gz: 017b1a7d412e514c697f4296292fc7075bbff88d
3
+ metadata.gz: 7c6e0231b2860c7fca4ae5d2b5b8c8a7ab397c8b
4
+ data.tar.gz: 9f8698b8e86923e4855d75be50440ac803a96d3e
5
5
  SHA512:
6
- metadata.gz: c7667ea3c41a50167aa7765ca7bce2a6d6a321ea71f417a44f57fb213b258d6425190148fe944d371a41b66165e1e76814694c99a4aee1d290425b1c53ae2d93
7
- data.tar.gz: 49ac0e5a907bf133ed1a8ab304873c5efae18c525b4a1b56e7291178a459bfdf9f6b3989e5d8acecbc3c31316488f85280a89ca83c8bb14ec9d3763c070240f6
6
+ metadata.gz: bf30ac3e1930e548e99941977260e5bf9ac70d48b51e5a978638cd4b7b71a998cda96f7dc1b9ea12e2587a4a70992bcb5df99cddd96086f84b1824bfdb07edaf
7
+ data.tar.gz: a2c5ca9ec3c096d8a892f616139211ee76eafc4e7a27a5c1e93a2259e38387a5fdf5e16d5f1d39e02e4ac199e64c8c14cb6861b0685f4ba5eb74e769396cacfe
@@ -45,8 +45,13 @@ module TwitterWithAutoPagination
45
45
 
46
46
  alias tweet_clusters clusters_belong_to
47
47
 
48
- def list_clusters(user, each_member: 300, total_member: 1000, rate: 0.3, limit: 10, debug: false)
49
- lists = memberships(user).sort_by { |li| li.member_count }
48
+ def list_clusters(user, shrink: false, each_member: 300, total_member: 1000, rate: 0.3, limit: 10, debug: false)
49
+ begin
50
+ lists = memberships(user).sort_by { |li| li.member_count }
51
+ rescue => e
52
+ puts "#{e.class}: #{e.message} #{user.inspect}" if debug
53
+ lists = []
54
+ end
50
55
  puts "lists: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
51
56
  return {} if lists.empty?
52
57
 
@@ -55,47 +60,48 @@ module TwitterWithAutoPagination
55
60
  percentile75 = ((lists.length * 0.75).ceil) - 1
56
61
  lists = lists[percentile25..percentile75]
57
62
  puts "lists sliced by 25-75 percentile: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
58
- end
63
+ end if shrink
59
64
 
60
65
  list_special_words = %w()
61
- list_exclude_words = %w(list people met)
66
+ list_exclude_words1 = %r(list[0-9]*|people-ive-faved|twizard-magic-list|my-favstar-fm-list)
67
+ list_exclude_words2 = %w(it list people met)
62
68
 
63
- words = lists.map { |li| li.full_name.split('/')[1].split('-') }.flatten.delete_if { |w| w.size < 2 || list_exclude_words.include?(w) }.
64
- each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.select { |_, v| (10 < lists.size ? 1 : 0) < v }.sort_by { |k, v| [-v, -k.size] }
69
+ words = lists.map { |li| li.full_name.split('/')[1] }.
70
+ select { |n| !n.match(list_exclude_words1) }.
71
+ map { |n| n.split('-') }.flatten.
72
+ delete_if { |w| w.size < 2 || list_exclude_words2.include?(w) }.
73
+ each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.
74
+ sort_by { |k, v| [-v, -k.size] }
65
75
 
66
76
  puts "words: #{words.slice(0, 10)}" if debug
67
77
  return {} if words.empty?
68
78
 
69
- word = words[0][0]
70
- puts "word: #{word}" if debug
71
-
72
- # TODO: listsの数が小さすぎる場合はwordを増やす
73
- lists = lists.select { |li| li.full_name.split('/')[1].include?(word) }
74
- puts "lists include specified word: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
79
+ _words = []
80
+ lists =
81
+ filter(lists, min: 2) do |li, i|
82
+ _words = words[0..i].map(&:first)
83
+ name = li.full_name.split('/')[1]
84
+ _words.any? { |w| name.include?(w) }
85
+ end
86
+ puts "lists include #{_words.inspect}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
75
87
  return {} if lists.empty?
76
88
 
77
- 3.times do
78
- _lists = lists.select { |li| (10 < lists.size ? 10 : 0) < li.member_count && li.member_count < each_member }
79
- if _lists.size > 2 || _lists.size == lists.size
80
- lists = _lists
81
- break
82
- else
83
- each_member *= 1.25
89
+ _each_member = 0
90
+ lists =
91
+ filter(lists, min: 2) do |li, i|
92
+ _each_member = each_member * (1.0 + 0.25 * i)
93
+ (10 < lists.size ? 10 : 0) < li.member_count && li.member_count < _each_member
84
94
  end
85
- end
86
- puts "lists limited by each member #{each_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
95
+ puts "lists limited by each member #{_each_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
87
96
  return {} if lists.empty?
88
97
 
89
- 3.times do
90
- _lists = lists.select.with_index { |_, i| lists[0..i].map { |li| li.member_count }.sum < total_member }
91
- if _lists.any?
92
- lists = _lists
93
- break
94
- else
95
- total_member *= 1.25
98
+ _total_member = 0
99
+ lists =
100
+ filter(lists, min: 1) do |_, i|
101
+ _total_member = total_member * (1.0 + 0.25 * i)
102
+ lists[0..i].map { |li| li.member_count }.sum < _total_member
96
103
  end
97
- end
98
- puts "lists limited by total members #{total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
104
+ puts "lists limited by total members #{_total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
99
105
  return {} if lists.empty?
100
106
 
101
107
  members = lists.map do |li|
@@ -110,7 +116,8 @@ module TwitterWithAutoPagination
110
116
  return {} if members.empty?
111
117
 
112
118
  3.times do
113
- _members = members.each_with_object(Hash.new(0)) { |member, memo| memo[member] += 1 }.select { |_, v| lists.size * rate < v }.keys
119
+ _members = members.each_with_object(Hash.new(0)) { |member, memo| memo[member] += 1 }.
120
+ select { |_, v| lists.size * rate < v }.keys
114
121
  if _members.size > 100
115
122
  members = _members
116
123
  break
@@ -120,7 +127,6 @@ module TwitterWithAutoPagination
120
127
  end
121
128
  puts "members included multi lists #{rate}: #{members.size}" if debug
122
129
 
123
- require 'mecab'
124
130
 
125
131
  profile_special_words = %w()
126
132
  profile_exclude_words = %w(in at of my no er the and for inc Inc com gmail 好き こと 最近 情報 さん ちゃん くん 発言 関係 もの 活動 見解 所属 組織 連絡 大好き サイト ブログ つぶやき こちら アカ アカウント イベント フォロー)
@@ -128,37 +134,49 @@ module TwitterWithAutoPagination
128
134
  descriptions = members.map { |m| m.description.remove(URI.regexp) }
129
135
 
130
136
  candidates, remains = descriptions.partition { |desc| desc.scan('/').size > 2 }
131
- slash_freq = count_by_word_with_delim(candidates, delim: '/')
137
+ slash_freq = count_by_word(candidates, delim: '/')
132
138
  puts "words splitted by /: #{slash_freq.to_a.slice(0, 10)}" if debug
133
139
 
134
140
  candidates, remains = remains.partition { |desc| desc.scan('|').size > 2 }
135
- pipe_freq = count_by_word_with_delim(candidates, delim: '|')
141
+ pipe_freq = count_by_word(candidates, delim: '|')
136
142
  puts "words splitted by |: #{pipe_freq.to_a.slice(0, 10)}" if debug
137
143
 
138
- noun_freq = count_by_word_with_tagger(remains, exclude_words: profile_exclude_words)
139
- puts "words with nouns added: #{noun_freq.to_a.slice(0, 10)}" if debug
144
+ require 'mecab'
145
+ tagger = MeCab::Tagger.new("-d #{`mecab-config --dicdir`.chomp}/mecab-ipadic-neologd/")
146
+
147
+ noun_freq = count_by_word(remains, tagger: tagger, exclude_words: profile_exclude_words)
148
+ puts "words tagged as noun: #{noun_freq.to_a.slice(0, 10)}" if debug
140
149
 
141
150
  slash_freq.merge(pipe_freq) { |_, old, neww| old + neww }.merge(noun_freq) { |_, old, neww| old + neww }.sort_by { |k, v| [-v, -k.size] }.slice(0, limit)
142
151
  end
143
152
 
144
153
  private
145
154
 
146
- def count_by_word_with_delim(texts, delim:)
147
- texts.map { |t| t.split(delim) }.flatten.
148
- map(&:strip).
149
- delete_if { |w| w.empty? || w.size < 2 || 5 < w.size }.
150
- each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.
151
- sort_by { |k, v| [-v, -k.size] }.to_h
155
+ def filter(lists, min:)
156
+ min = [min, lists.size].min
157
+ _lists = []
158
+ 3.times do |i|
159
+ _lists = lists.select { |li| yield(li, i) }
160
+ break if _lists.size >= min
161
+ end
162
+ _lists
152
163
  end
153
164
 
154
- def count_by_word_with_tagger(texts, tagger: nil, exclude_words: [])
155
- tagger = MeCab::Tagger.new("-d #{`mecab-config --dicdir`.chomp}/mecab-ipadic-neologd/") if tagger.nil?
156
- nouns = tagger.parse(texts.join(' ')).split("\n").
157
- select { |line| line.include?('名詞') }.
158
- map { |line| line.split("\t")[0] }.
159
- delete_if { |w| w.empty? || w.size < 2 || 5 < w.size || exclude_words.include?(w) }
165
+ def count_by_word(texts, delim: nil, tagger: nil, exclude_words: [])
166
+ texts = texts.dup
167
+
168
+ if delim
169
+ texts = texts.map { |t| t.split(delim) }.flatten.map(&:strip)
170
+ end
171
+
172
+ if tagger
173
+ texts = tagger.parse(texts.join(' ')).split("\n").
174
+ select { |line| line.include?('名詞') }.
175
+ map { |line| line.split("\t")[0] }
176
+ end
160
177
 
161
- nouns.each_with_object(Hash.new(0)) { |noun, memo| memo[noun] += 1 }.
178
+ texts.delete_if { |w| w.empty? || w.size < 2 || 5 < w.size || exclude_words.include?(w) }.
179
+ each_with_object(Hash.new(0)) { |word, memo| memo[word] += 1 }.
162
180
  sort_by { |k, v| [-v, -k.size] }.to_h
163
181
  end
164
182
  end
@@ -23,5 +23,5 @@ Gem::Specification.new do |spec|
23
23
  spec.required_ruby_version = '>= 2.3'
24
24
  spec.summary = spec.description
25
25
  spec.test_files = Dir.glob('spec/**/*')
26
- spec.version = '0.8.3'
26
+ spec.version = '0.8.4'
27
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_with_auto_pagination
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.3
4
+ version: 0.8.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shinohara Teruki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-04 00:00:00.000000000 Z
11
+ date: 2016-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: twitter