twitter_with_auto_pagination 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 21617993d79d35c184e0a5eb3bd68cedf7f9fe36
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 017b1a7d412e514c697f4296292fc7075bbff88d
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: c7667ea3c41a50167aa7765ca7bce2a6d6a321ea71f417a44f57fb213b258d6425190148fe944d371a41b66165e1e76814694c99a4aee1d290425b1c53ae2d93
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 49ac0e5a907bf133ed1a8ab304873c5efae18c525b4a1b56e7291178a459bfdf9f6b3989e5d8acecbc3c31316488f85280a89ca83c8bb14ec9d3763c070240f6
         
     | 
| 
         @@ -43,8 +43,123 @@ module TwitterWithAutoPagination 
     | 
|
| 
       43 
43 
     | 
    
         
             
                      frequency.select { |_, v| 2 < v }.sort_by { |_, v| -v }.slice(0, limit).to_h
         
     | 
| 
       44 
44 
     | 
    
         
             
                    end
         
     | 
| 
       45 
45 
     | 
    
         | 
| 
       46 
     | 
    
         
            -
                     
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
      
 46 
     | 
    
         
            +
                    alias tweet_clusters clusters_belong_to
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                    def list_clusters(user, each_member: 300, total_member: 1000, rate: 0.3, limit: 10, debug: false)
         
     | 
| 
      
 49 
     | 
    
         
            +
                      lists = memberships(user).sort_by { |li| li.member_count }
         
     | 
| 
      
 50 
     | 
    
         
            +
                      puts "lists: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         
     | 
| 
      
 51 
     | 
    
         
            +
                      return {} if lists.empty?
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                      while lists.size > 200
         
     | 
| 
      
 54 
     | 
    
         
            +
                        percentile25 = ((lists.length * 0.25).ceil) - 1
         
     | 
| 
      
 55 
     | 
    
         
            +
                        percentile75 = ((lists.length * 0.75).ceil) - 1
         
     | 
| 
      
 56 
     | 
    
         
            +
                        lists = lists[percentile25..percentile75]
         
     | 
| 
      
 57 
     | 
    
         
            +
                        puts "lists sliced by 25-75 percentile: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         
     | 
| 
      
 58 
     | 
    
         
            +
                      end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                      list_special_words = %w()
         
     | 
| 
      
 61 
     | 
    
         
            +
                      list_exclude_words = %w(list people met)
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                      words = lists.map { |li| li.full_name.split('/')[1].split('-') }.flatten.delete_if { |w| w.size < 2 || list_exclude_words.include?(w) }.
         
     | 
| 
      
 64 
     | 
    
         
            +
                        each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.select { |_, v| (10 < lists.size ? 1 : 0) < v }.sort_by { |k, v| [-v, -k.size] }
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                      puts "words: #{words.slice(0, 10)}" if debug
         
     | 
| 
      
 67 
     | 
    
         
            +
                      return {} if words.empty?
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                      word = words[0][0]
         
     | 
| 
      
 70 
     | 
    
         
            +
                      puts "word: #{word}" if debug
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                      # TODO: listsの数が小さすぎる場合はwordを増やす
         
     | 
| 
      
 73 
     | 
    
         
            +
                      lists = lists.select { |li| li.full_name.split('/')[1].include?(word) }
         
     | 
| 
      
 74 
     | 
    
         
            +
                      puts "lists include specified word: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         
     | 
| 
      
 75 
     | 
    
         
            +
                      return {} if lists.empty?
         
     | 
| 
      
 76 
     | 
    
         
            +
             
     | 
| 
      
 77 
     | 
    
         
            +
                      3.times do
         
     | 
| 
      
 78 
     | 
    
         
            +
                        _lists = lists.select { |li| (10 < lists.size ? 10 : 0) < li.member_count && li.member_count < each_member }
         
     | 
| 
      
 79 
     | 
    
         
            +
                        if _lists.size > 2 || _lists.size == lists.size
         
     | 
| 
      
 80 
     | 
    
         
            +
                          lists = _lists
         
     | 
| 
      
 81 
     | 
    
         
            +
                          break
         
     | 
| 
      
 82 
     | 
    
         
            +
                        else
         
     | 
| 
      
 83 
     | 
    
         
            +
                          each_member *= 1.25
         
     | 
| 
      
 84 
     | 
    
         
            +
                        end
         
     | 
| 
      
 85 
     | 
    
         
            +
                      end
         
     | 
| 
      
 86 
     | 
    
         
            +
                      puts "lists limited by each member #{each_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         
     | 
| 
      
 87 
     | 
    
         
            +
                      return {} if lists.empty?
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                      3.times do
         
     | 
| 
      
 90 
     | 
    
         
            +
                        _lists = lists.select.with_index { |_, i| lists[0..i].map { |li| li.member_count }.sum < total_member }
         
     | 
| 
      
 91 
     | 
    
         
            +
                        if _lists.any?
         
     | 
| 
      
 92 
     | 
    
         
            +
                          lists = _lists
         
     | 
| 
      
 93 
     | 
    
         
            +
                          break
         
     | 
| 
      
 94 
     | 
    
         
            +
                        else
         
     | 
| 
      
 95 
     | 
    
         
            +
                          total_member *= 1.25
         
     | 
| 
      
 96 
     | 
    
         
            +
                        end
         
     | 
| 
      
 97 
     | 
    
         
            +
                      end
         
     | 
| 
      
 98 
     | 
    
         
            +
                      puts "lists limited by total members #{total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         
     | 
| 
      
 99 
     | 
    
         
            +
                      return {} if lists.empty?
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
                      members = lists.map do |li|
         
     | 
| 
      
 102 
     | 
    
         
            +
                        begin
         
     | 
| 
      
 103 
     | 
    
         
            +
                          list_members(li.id)
         
     | 
| 
      
 104 
     | 
    
         
            +
                        rescue => e
         
     | 
| 
      
 105 
     | 
    
         
            +
                          puts "#{e.class}: #{e.message} #{li.id} #{li.full_name} #{li.mode}" if debug
         
     | 
| 
      
 106 
     | 
    
         
            +
                          nil
         
     | 
| 
      
 107 
     | 
    
         
            +
                        end
         
     | 
| 
      
 108 
     | 
    
         
            +
                      end.compact.flatten
         
     | 
| 
      
 109 
     | 
    
         
            +
                      puts "candidate members: #{members.size}" if debug
         
     | 
| 
      
 110 
     | 
    
         
            +
                      return {} if members.empty?
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
                      3.times do
         
     | 
| 
      
 113 
     | 
    
         
            +
                        _members = members.each_with_object(Hash.new(0)) { |member, memo| memo[member] += 1 }.select { |_, v| lists.size * rate < v }.keys
         
     | 
| 
      
 114 
     | 
    
         
            +
                        if _members.size > 100
         
     | 
| 
      
 115 
     | 
    
         
            +
                          members = _members
         
     | 
| 
      
 116 
     | 
    
         
            +
                          break
         
     | 
| 
      
 117 
     | 
    
         
            +
                        else
         
     | 
| 
      
 118 
     | 
    
         
            +
                          rate += 0.1
         
     | 
| 
      
 119 
     | 
    
         
            +
                        end
         
     | 
| 
      
 120 
     | 
    
         
            +
                      end
         
     | 
| 
      
 121 
     | 
    
         
            +
                      puts "members included multi lists #{rate}: #{members.size}" if debug
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                      require 'mecab'
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
                      profile_special_words = %w()
         
     | 
| 
      
 126 
     | 
    
         
            +
                      profile_exclude_words = %w(in at of my no er the and for inc Inc com gmail 好き こと 最近 情報 さん ちゃん くん 発言 関係 もの 活動 見解 所属 組織 連絡 大好き サイト ブログ つぶやき こちら アカ アカウント イベント フォロー)
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
                      descriptions = members.map { |m| m.description.remove(URI.regexp) }
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
                      candidates, remains = descriptions.partition { |desc| desc.scan('/').size > 2 }
         
     | 
| 
      
 131 
     | 
    
         
            +
                      slash_freq = count_by_word_with_delim(candidates, delim: '/')
         
     | 
| 
      
 132 
     | 
    
         
            +
                      puts "words splitted by /: #{slash_freq.to_a.slice(0, 10)}" if debug
         
     | 
| 
      
 133 
     | 
    
         
            +
             
     | 
| 
      
 134 
     | 
    
         
            +
                      candidates, remains = remains.partition { |desc| desc.scan('|').size > 2 }
         
     | 
| 
      
 135 
     | 
    
         
            +
                      pipe_freq = count_by_word_with_delim(candidates, delim: '|')
         
     | 
| 
      
 136 
     | 
    
         
            +
                      puts "words splitted by |: #{pipe_freq.to_a.slice(0, 10)}" if debug
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
                      noun_freq = count_by_word_with_tagger(remains, exclude_words: profile_exclude_words)
         
     | 
| 
      
 139 
     | 
    
         
            +
                      puts "words with nouns added: #{noun_freq.to_a.slice(0, 10)}" if debug
         
     | 
| 
      
 140 
     | 
    
         
            +
             
     | 
| 
      
 141 
     | 
    
         
            +
                      slash_freq.merge(pipe_freq) { |_, old, neww| old + neww }.merge(noun_freq) { |_, old, neww| old + neww }.sort_by { |k, v| [-v, -k.size] }.slice(0, limit)
         
     | 
| 
      
 142 
     | 
    
         
            +
                    end
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
                    private
         
     | 
| 
      
 145 
     | 
    
         
            +
             
     | 
| 
      
 146 
     | 
    
         
            +
                    def count_by_word_with_delim(texts, delim:)
         
     | 
| 
      
 147 
     | 
    
         
            +
                      texts.map { |t| t.split(delim) }.flatten.
         
     | 
| 
      
 148 
     | 
    
         
            +
                        map(&:strip).
         
     | 
| 
      
 149 
     | 
    
         
            +
                        delete_if { |w| w.empty? || w.size < 2 || 5 < w.size }.
         
     | 
| 
      
 150 
     | 
    
         
            +
                        each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.
         
     | 
| 
      
 151 
     | 
    
         
            +
                        sort_by { |k, v| [-v, -k.size] }.to_h
         
     | 
| 
      
 152 
     | 
    
         
            +
                    end
         
     | 
| 
      
 153 
     | 
    
         
            +
             
     | 
| 
      
 154 
     | 
    
         
            +
                    def count_by_word_with_tagger(texts, tagger: nil, exclude_words: [])
         
     | 
| 
      
 155 
     | 
    
         
            +
                      tagger = MeCab::Tagger.new("-d #{`mecab-config --dicdir`.chomp}/mecab-ipadic-neologd/") if tagger.nil?
         
     | 
| 
      
 156 
     | 
    
         
            +
                      nouns = tagger.parse(texts.join(' ')).split("\n").
         
     | 
| 
      
 157 
     | 
    
         
            +
                        select { |line| line.include?('名詞') }.
         
     | 
| 
      
 158 
     | 
    
         
            +
                        map { |line| line.split("\t")[0] }.
         
     | 
| 
      
 159 
     | 
    
         
            +
                        delete_if { |w| w.empty? || w.size < 2 || 5 < w.size || exclude_words.include?(w) }
         
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
                      nouns.each_with_object(Hash.new(0)) { |noun, memo| memo[noun] += 1 }.
         
     | 
| 
      
 162 
     | 
    
         
            +
                        sort_by { |k, v| [-v, -k.size] }.to_h
         
     | 
| 
       48 
163 
     | 
    
         
             
                    end
         
     | 
| 
       49 
164 
     | 
    
         
             
                  end
         
     | 
| 
       50 
165 
     | 
    
         
             
                end
         
     | 
| 
         @@ -14,6 +14,15 @@ module TwitterWithAutoPagination 
     | 
|
| 
       14 
14 
     | 
    
         
             
                      end
         
     | 
| 
       15 
15 
     | 
    
         
             
                    end
         
     | 
| 
       16 
16 
     | 
    
         
             
                  end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                  def list_members(*args)
         
     | 
| 
      
 19 
     | 
    
         
            +
                    options = {count: 5000, skip_status: 1, cursor: -1}.merge(args.extract_options!)
         
     | 
| 
      
 20 
     | 
    
         
            +
                    instrument(__method__, nil, options) do
         
     | 
| 
      
 21 
     | 
    
         
            +
                      fetch_cache_or_call_api(__method__, args[0], options) do
         
     | 
| 
      
 22 
     | 
    
         
            +
                        collect_with_cursor(method(__method__).super_method, *args, options)
         
     | 
| 
      
 23 
     | 
    
         
            +
                      end
         
     | 
| 
      
 24 
     | 
    
         
            +
                    end
         
     | 
| 
      
 25 
     | 
    
         
            +
                  end
         
     | 
| 
       17 
26 
     | 
    
         
             
                end
         
     | 
| 
       18 
27 
     | 
    
         
             
              end
         
     | 
| 
       19 
28 
     | 
    
         
             
            end
         
     | 
| 
         @@ -108,7 +108,7 @@ module TwitterWithAutoPagination 
     | 
|
| 
       108 
108 
     | 
    
         
             
                    while (next_cursor = last_response[:next_cursor]) && next_cursor != 0
         
     | 
| 
       109 
109 
     | 
    
         
             
                      options[:cursor] = next_cursor
         
     | 
| 
       110 
110 
     | 
    
         
             
                      last_response = call_api(method_obj, *args, options).attrs
         
     | 
| 
       111 
     | 
    
         
            -
                      return_data += (last_response[:users] || last_response[:ids])
         
     | 
| 
      
 111 
     | 
    
         
            +
                      return_data += (last_response[:users] || last_response[:ids] || last_response[:lists])
         
     | 
| 
       112 
112 
     | 
    
         
             
                    end
         
     | 
| 
       113 
113 
     | 
    
         | 
| 
       114 
114 
     | 
    
         
             
                    return_data
         
     | 
| 
         @@ -122,6 +122,8 @@ module TwitterWithAutoPagination 
     | 
|
| 
       122 
122 
     | 
    
         
             
                          "hash-str#{delim}#{credentials_hash}"
         
     | 
| 
       123 
123 
     | 
    
         
             
                        when method_name == :search
         
     | 
| 
       124 
124 
     | 
    
         
             
                          "str#{delim}#{user.to_s}"
         
     | 
| 
      
 125 
     | 
    
         
            +
                        when method_name == :list_members
         
     | 
| 
      
 126 
     | 
    
         
            +
                          "list_id#{delim}#{user.to_s}"
         
     | 
| 
       125 
127 
     | 
    
         
             
                        when method_name == :mentions_timeline
         
     | 
| 
       126 
128 
     | 
    
         
             
                          "#{user.kind_of?(Integer) ? 'id' : 'sn'}#{delim}#{user.to_s}"
         
     | 
| 
       127 
129 
     | 
    
         
             
                        when method_name == :home_timeline
         
     | 
| 
         @@ -6,6 +6,7 @@ Gem::Specification.new do |spec| 
     | 
|
| 
       6 
6 
     | 
    
         
             
              spec.add_dependency 'activesupport'
         
     | 
| 
       7 
7 
     | 
    
         
             
              spec.add_dependency 'hashie'
         
     | 
| 
       8 
8 
     | 
    
         
             
              spec.add_dependency 'parallel'
         
     | 
| 
      
 9 
     | 
    
         
            +
              spec.add_dependency 'mecab'
         
     | 
| 
       9 
10 
     | 
    
         | 
| 
       10 
11 
     | 
    
         
             
              spec.add_development_dependency 'bundler'
         
     | 
| 
       11 
12 
     | 
    
         | 
| 
         @@ -22,5 +23,5 @@ Gem::Specification.new do |spec| 
     | 
|
| 
       22 
23 
     | 
    
         
             
              spec.required_ruby_version = '>= 2.3'
         
     | 
| 
       23 
24 
     | 
    
         
             
              spec.summary = spec.description
         
     | 
| 
       24 
25 
     | 
    
         
             
              spec.test_files = Dir.glob('spec/**/*')
         
     | 
| 
       25 
     | 
    
         
            -
              spec.version = '0.8. 
     | 
| 
      
 26 
     | 
    
         
            +
              spec.version = '0.8.3'
         
     | 
| 
       26 
27 
     | 
    
         
             
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: twitter_with_auto_pagination
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.8. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.8.3
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Shinohara Teruki
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2016-10- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2016-10-04 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: twitter
         
     | 
| 
         @@ -66,6 +66,20 @@ dependencies: 
     | 
|
| 
       66 
66 
     | 
    
         
             
                - - ">="
         
     | 
| 
       67 
67 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       68 
68 
     | 
    
         
             
                    version: '0'
         
     | 
| 
      
 69 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 70 
     | 
    
         
            +
              name: mecab
         
     | 
| 
      
 71 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 72 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 73 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 74 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 75 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 76 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 77 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 78 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 79 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 80 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 81 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 82 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
       69 
83 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       70 
84 
     | 
    
         
             
              name: bundler
         
     | 
| 
       71 
85 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     |