twitter_with_auto_pagination 0.8.3 → 0.8.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 7c6e0231b2860c7fca4ae5d2b5b8c8a7ab397c8b
         | 
| 4 | 
            +
              data.tar.gz: 9f8698b8e86923e4855d75be50440ac803a96d3e
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: bf30ac3e1930e548e99941977260e5bf9ac70d48b51e5a978638cd4b7b71a998cda96f7dc1b9ea12e2587a4a70992bcb5df99cddd96086f84b1824bfdb07edaf
         | 
| 7 | 
            +
              data.tar.gz: a2c5ca9ec3c096d8a892f616139211ee76eafc4e7a27a5c1e93a2259e38387a5fdf5e16d5f1d39e02e4ac199e64c8c14cb6861b0685f4ba5eb74e769396cacfe
         | 
| @@ -45,8 +45,13 @@ module TwitterWithAutoPagination | |
| 45 45 |  | 
| 46 46 | 
             
                    alias tweet_clusters clusters_belong_to
         | 
| 47 47 |  | 
| 48 | 
            -
                    def list_clusters(user, each_member: 300, total_member: 1000, rate: 0.3, limit: 10, debug: false)
         | 
| 49 | 
            -
                       | 
| 48 | 
            +
                    def list_clusters(user, shrink: false, each_member: 300, total_member: 1000, rate: 0.3, limit: 10, debug: false)
         | 
| 49 | 
            +
                      begin
         | 
| 50 | 
            +
                        lists = memberships(user).sort_by { |li| li.member_count }
         | 
| 51 | 
            +
                      rescue => e
         | 
| 52 | 
            +
                        puts "#{e.class}: #{e.message} #{user.inspect}" if debug
         | 
| 53 | 
            +
                        lists = []
         | 
| 54 | 
            +
                      end
         | 
| 50 55 | 
             
                      puts "lists: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         | 
| 51 56 | 
             
                      return {} if lists.empty?
         | 
| 52 57 |  | 
| @@ -55,47 +60,48 @@ module TwitterWithAutoPagination | |
| 55 60 | 
             
                        percentile75 = ((lists.length * 0.75).ceil) - 1
         | 
| 56 61 | 
             
                        lists = lists[percentile25..percentile75]
         | 
| 57 62 | 
             
                        puts "lists sliced by 25-75 percentile: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         | 
| 58 | 
            -
                      end
         | 
| 63 | 
            +
                      end if shrink
         | 
| 59 64 |  | 
| 60 65 | 
             
                      list_special_words = %w()
         | 
| 61 | 
            -
                       | 
| 66 | 
            +
                      list_exclude_words1 = %r(list[0-9]*|people-ive-faved|twizard-magic-list|my-favstar-fm-list)
         | 
| 67 | 
            +
                      list_exclude_words2 = %w(it list people met)
         | 
| 62 68 |  | 
| 63 | 
            -
                      words = lists.map { |li| li.full_name.split('/')[1] | 
| 64 | 
            -
                         | 
| 69 | 
            +
                      words = lists.map { |li| li.full_name.split('/')[1] }.
         | 
| 70 | 
            +
                        select { |n| !n.match(list_exclude_words1) }.
         | 
| 71 | 
            +
                        map { |n| n.split('-') }.flatten.
         | 
| 72 | 
            +
                        delete_if { |w| w.size < 2 || list_exclude_words2.include?(w) }.
         | 
| 73 | 
            +
                        each_with_object(Hash.new(0)) { |w, memo| memo[w] += 1 }.
         | 
| 74 | 
            +
                        sort_by { |k, v| [-v, -k.size] }
         | 
| 65 75 |  | 
| 66 76 | 
             
                      puts "words: #{words.slice(0, 10)}" if debug
         | 
| 67 77 | 
             
                      return {} if words.empty?
         | 
| 68 78 |  | 
| 69 | 
            -
                       | 
| 70 | 
            -
                       | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 79 | 
            +
                      _words = []
         | 
| 80 | 
            +
                      lists =
         | 
| 81 | 
            +
                        filter(lists, min: 2) do |li, i|
         | 
| 82 | 
            +
                          _words = words[0..i].map(&:first)
         | 
| 83 | 
            +
                          name = li.full_name.split('/')[1]
         | 
| 84 | 
            +
                          _words.any? { |w| name.include?(w) }
         | 
| 85 | 
            +
                        end
         | 
| 86 | 
            +
                      puts "lists include #{_words.inspect}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         | 
| 75 87 | 
             
                      return {} if lists.empty?
         | 
| 76 88 |  | 
| 77 | 
            -
                       | 
| 78 | 
            -
             | 
| 79 | 
            -
                         | 
| 80 | 
            -
                           | 
| 81 | 
            -
                           | 
| 82 | 
            -
                        else
         | 
| 83 | 
            -
                          each_member *= 1.25
         | 
| 89 | 
            +
                      _each_member = 0
         | 
| 90 | 
            +
                      lists =
         | 
| 91 | 
            +
                        filter(lists, min: 2) do |li, i|
         | 
| 92 | 
            +
                          _each_member = each_member * (1.0 + 0.25 * i)
         | 
| 93 | 
            +
                          (10 < lists.size ? 10 : 0) < li.member_count && li.member_count < _each_member
         | 
| 84 94 | 
             
                        end
         | 
| 85 | 
            -
                       | 
| 86 | 
            -
                      puts "lists limited by each member #{each_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         | 
| 95 | 
            +
                      puts "lists limited by each member #{_each_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         | 
| 87 96 | 
             
                      return {} if lists.empty?
         | 
| 88 97 |  | 
| 89 | 
            -
                       | 
| 90 | 
            -
             | 
| 91 | 
            -
                         | 
| 92 | 
            -
                           | 
| 93 | 
            -
                           | 
| 94 | 
            -
                        else
         | 
| 95 | 
            -
                          total_member *= 1.25
         | 
| 98 | 
            +
                      _total_member = 0
         | 
| 99 | 
            +
                      lists =
         | 
| 100 | 
            +
                        filter(lists, min: 1) do |_, i|
         | 
| 101 | 
            +
                          _total_member = total_member * (1.0 + 0.25 * i)
         | 
| 102 | 
            +
                          lists[0..i].map { |li| li.member_count }.sum < _total_member
         | 
| 96 103 | 
             
                        end
         | 
| 97 | 
            -
                       | 
| 98 | 
            -
                      puts "lists limited by total members #{total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         | 
| 104 | 
            +
                      puts "lists limited by total members #{_total_member}: #{lists.size} (#{lists.map { |li| li.member_count }.join(', ')})" if debug
         | 
| 99 105 | 
             
                      return {} if lists.empty?
         | 
| 100 106 |  | 
| 101 107 | 
             
                      members = lists.map do |li|
         | 
| @@ -110,7 +116,8 @@ module TwitterWithAutoPagination | |
| 110 116 | 
             
                      return {} if members.empty?
         | 
| 111 117 |  | 
| 112 118 | 
             
                      3.times do
         | 
| 113 | 
            -
                        _members = members.each_with_object(Hash.new(0)) { |member, memo| memo[member] += 1 }. | 
| 119 | 
            +
                        _members = members.each_with_object(Hash.new(0)) { |member, memo| memo[member] += 1 }.
         | 
| 120 | 
            +
                          select { |_, v| lists.size * rate < v }.keys
         | 
| 114 121 | 
             
                        if _members.size > 100
         | 
| 115 122 | 
             
                          members = _members
         | 
| 116 123 | 
             
                          break
         | 
| @@ -120,7 +127,6 @@ module TwitterWithAutoPagination | |
| 120 127 | 
             
                      end
         | 
| 121 128 | 
             
                      puts "members included multi lists #{rate}: #{members.size}" if debug
         | 
| 122 129 |  | 
| 123 | 
            -
                      require 'mecab'
         | 
| 124 130 |  | 
| 125 131 | 
             
                      profile_special_words = %w()
         | 
| 126 132 | 
             
                      profile_exclude_words = %w(in at of my no er the and for inc Inc com gmail 好き こと 最近 情報 さん ちゃん くん 発言 関係 もの 活動 見解 所属 組織 連絡 大好き サイト ブログ つぶやき こちら アカ アカウント イベント フォロー)
         | 
| @@ -128,37 +134,49 @@ module TwitterWithAutoPagination | |
| 128 134 | 
             
                      descriptions = members.map { |m| m.description.remove(URI.regexp) }
         | 
| 129 135 |  | 
| 130 136 | 
             
                      candidates, remains = descriptions.partition { |desc| desc.scan('/').size > 2 }
         | 
| 131 | 
            -
                      slash_freq =  | 
| 137 | 
            +
                      slash_freq = count_by_word(candidates, delim: '/')
         | 
| 132 138 | 
             
                      puts "words splitted by /: #{slash_freq.to_a.slice(0, 10)}" if debug
         | 
| 133 139 |  | 
| 134 140 | 
             
                      candidates, remains = remains.partition { |desc| desc.scan('|').size > 2 }
         | 
| 135 | 
            -
                      pipe_freq =  | 
| 141 | 
            +
                      pipe_freq = count_by_word(candidates, delim: '|')
         | 
| 136 142 | 
             
                      puts "words splitted by |: #{pipe_freq.to_a.slice(0, 10)}" if debug
         | 
| 137 143 |  | 
| 138 | 
            -
                       | 
| 139 | 
            -
                       | 
| 144 | 
            +
                      require 'mecab'
         | 
| 145 | 
            +
                      tagger = MeCab::Tagger.new("-d #{`mecab-config --dicdir`.chomp}/mecab-ipadic-neologd/")
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                      noun_freq = count_by_word(remains, tagger: tagger, exclude_words: profile_exclude_words)
         | 
| 148 | 
            +
                      puts "words tagged as noun: #{noun_freq.to_a.slice(0, 10)}" if debug
         | 
| 140 149 |  | 
| 141 150 | 
             
                      slash_freq.merge(pipe_freq) { |_, old, neww| old + neww }.merge(noun_freq) { |_, old, neww| old + neww }.sort_by { |k, v| [-v, -k.size] }.slice(0, limit)
         | 
| 142 151 | 
             
                    end
         | 
| 143 152 |  | 
| 144 153 | 
             
                    private
         | 
| 145 154 |  | 
| 146 | 
            -
                    def  | 
| 147 | 
            -
                       | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
                         | 
| 151 | 
            -
                         | 
| 155 | 
            +
                    def filter(lists, min:)
         | 
| 156 | 
            +
                      min = [min, lists.size].min
         | 
| 157 | 
            +
                      _lists = []
         | 
| 158 | 
            +
                      3.times do |i|
         | 
| 159 | 
            +
                        _lists = lists.select { |li| yield(li, i) }
         | 
| 160 | 
            +
                        break if _lists.size >= min
         | 
| 161 | 
            +
                      end
         | 
| 162 | 
            +
                      _lists
         | 
| 152 163 | 
             
                    end
         | 
| 153 164 |  | 
| 154 | 
            -
                    def  | 
| 155 | 
            -
                       | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 158 | 
            -
                        map { | | 
| 159 | 
            -
             | 
| 165 | 
            +
                    def count_by_word(texts, delim: nil, tagger: nil, exclude_words: [])
         | 
| 166 | 
            +
                      texts = texts.dup
         | 
| 167 | 
            +
             | 
| 168 | 
            +
                      if delim
         | 
| 169 | 
            +
                        texts = texts.map { |t| t.split(delim) }.flatten.map(&:strip)
         | 
| 170 | 
            +
                      end
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                      if tagger
         | 
| 173 | 
            +
                        texts = tagger.parse(texts.join(' ')).split("\n").
         | 
| 174 | 
            +
                          select { |line| line.include?('名詞') }.
         | 
| 175 | 
            +
                          map { |line| line.split("\t")[0] }
         | 
| 176 | 
            +
                      end
         | 
| 160 177 |  | 
| 161 | 
            -
                       | 
| 178 | 
            +
                      texts.delete_if { |w| w.empty? || w.size < 2 || 5 < w.size || exclude_words.include?(w) }.
         | 
| 179 | 
            +
                        each_with_object(Hash.new(0)) { |word, memo| memo[word] += 1 }.
         | 
| 162 180 | 
             
                        sort_by { |k, v| [-v, -k.size] }.to_h
         | 
| 163 181 | 
             
                    end
         | 
| 164 182 | 
             
                  end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: twitter_with_auto_pagination
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.8. | 
| 4 | 
            +
              version: 0.8.4
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Shinohara Teruki
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2016-10- | 
| 11 | 
            +
            date: 2016-10-05 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: twitter
         |