twords 0.1.8 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ff2afbb65117a20ea8ae1fae8889372a292fbe50
4
- data.tar.gz: 1315cb7fe01bb2d995337929df84934b3c22e167
3
+ metadata.gz: deb67eadb91095b9a3ef2a0cefe7c9452038c0f1
4
+ data.tar.gz: 7cb612c4aa544038bfb9eb2c88c5367269b1c4c6
5
5
  SHA512:
6
- metadata.gz: 0b26b32f09d1b70c235da6c0fd8af11660b25412d39489e84022caed28d5ec64033e5f78adf9ffb617182d52322161fc7fab54e873366346c8a04af3544481b9
7
- data.tar.gz: 5d4161ecb3543542b1622817e9079e373d5a0222db3c12f35afbef2332876887a4ad86c858856b606d7f9450ac4cba333c8f1a9972cb0d215e4ca0126427830d
6
+ metadata.gz: 615f31dd12aea64aa8ff1304e947f36706a2e23145ca35153227aac72999359d87bfa2ef97c5f6beaae258be2f97f403c1cba7c78502e7c28991318da081768c
7
+ data.tar.gz: 49357b0e2060a1726956fb5c1f996ff2281eb2306a4f2ebe1fa2176851d95af5181c5c4c2f5610cd36db782193f6c60776a374d7a7ac70d8f73b79b3a3802394
data/README.md CHANGED
@@ -22,35 +22,59 @@ Or install it yourself as:
22
22
 
23
23
  ## Usage
24
24
 
25
+ Twords takes a configuration block, and if it doesn't find one it will set the following defaults:
26
+
25
27
  ```ruby
26
28
  Twords.config do |config|
27
- config.rejects = %w[are this is from be on the for to and at our of in rt a with &] # these words will not be counted (example, not a default; you must provide your own list)
28
- config.range = 14 # number of days to check, no default
29
-
30
- config.include_hashtags = false # default, excludes words beginning with '#'
31
- config.include_uris = false # default, uses URI#regexp to match
32
- config.include_mentions = false # default, excludes words beginning with '@'
33
-
29
+ config.rejects = %w[my us we an w/ because b/c or are this is from
30
+ be on the for to and at our of in rt a with &
31
+ that it by as if was] # These words will not be counted
32
+
33
+ config.range = 30 # Number of days to check
34
+
35
+ config.include_hashtags = false # Excludes strings beginning with '#'
36
+ config.include_uris = false # Excludes strings that match URI#regexp
37
+ config.include_mentions = false # Excludes strings beginning with '@'
38
+
34
39
  config.up_to { Time.now } # The block must return an object that responds to #to_time. The time is lazy evaluated and the range is counted backward from here.
35
-
40
+
41
+ # By default the Twitter client will look for keys stored as system variables by the names listed below. Feel free to change the configuration, but never hard code the keys.
36
42
  config.twitter_client do |twitter|
37
- twitter.consumer_key = YOUR_TWITTER_CONSUMER_KEY
38
- twitter.consumer_secret = YOUR_TWITTER_CONSUMER_SECRET
39
- twitter.access_token = YOUR_TWITTER_ACCESS_TOKEN
40
- twitter.access_token_secret = YOUR_TWITTER_ACCESS_TOKEN_SECRET
43
+ twitter.consumer_key = ENV['TWITTER_CONSUMER_KEY']
44
+ twitter.consumer_secret = ENV['TWITTER_CONSUMER_SECRET']
45
+ twitter.access_token = ENV['TWITTER_ACCESS_TOKEN']
46
+ twitter.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET']
41
47
  end
42
48
  end
43
49
 
44
50
  twords = Twords.new 'user_one', 'user_two' # A list of Twitter handles to include in the count.
45
51
 
46
52
  twords.audit
53
+ # Fetched user_one's timeline
54
+ # Fetched user_two's timeline
47
55
  # => true
48
56
 
49
57
  twords.words
50
- # => { "butts"=>32, "poo"=>28, "pups"=>36, ... }
58
+ # => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
51
59
 
52
60
  twords.words_forward # Sort descending. Alias #sort_words
53
- # => [["pups", 36], ["butts", 32], ["poo", 28], ...]
61
+ # => [["pups", 36], ["pizza", 32], ["burger", 28], ...]
62
+
63
+ Twords.config { |config| config.include_hashtags = true }
64
+
65
+ twords.audit
66
+ # => true
67
+
68
+ twords.words
69
+ # => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
70
+
71
+ twords.audit!
72
+ # Fetched user_one's timeline
73
+ # Fetched user_two's timeline
74
+ # => true
75
+
76
+ twords.words
77
+ # => { "#TACOSTACOSTACOS"=>14321, "pizza"=>32, "burger"=>28, "pups"=>36, ... }
54
78
  ```
55
79
 
56
80
  ## Development
@@ -67,4 +91,3 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/msimon
67
91
  ## License
68
92
 
69
93
  The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
70
-
@@ -1,15 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'csv'
4
- require 'twitter'
5
- require 'uri'
6
-
3
+ require 'twords/configuration'
4
+ require 'twords/instance_methods'
7
5
  require 'twords/version'
8
6
 
9
7
  # Twords.config do |config|
10
- # config.rejects = %w[the for and a i of if]
11
- # config.max_age = 14
8
+ # config.rejects = %w[my us we an w/ because b/c or are this is from
9
+ # be on the for to and at our of in rt a with &
10
+ # that it by as if was]
11
+
12
+ # config.range = 30
12
13
  # config.up_to { Time.now }
14
+ # config.include_hashtags = false
15
+ # config.include_uris = false
16
+ # config.include_mentions = false
13
17
  #
14
18
  # config.twitter_client do |twitter|
15
19
  # twitter.consumer_key = YOUR_TWITTER_CONSUMER_KEY
@@ -25,212 +29,18 @@ require 'twords/version'
25
29
  # # => true
26
30
  #
27
31
  # twords.words
28
- # # => { "butts"=>35, "poo"=>32, "pups"=>28, ... }
32
+ # # => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
29
33
  class Twords
30
- class << self
31
- attr_reader :rejects, :client, :up_to_block, :include_hashtags, :include_uris,
32
- :include_mentions
33
- attr_accessor :range
34
-
35
- def config
36
- yield self
37
- end
38
-
39
- def twitter_client(&block)
40
- @client = Twitter::REST::Client.new(&block)
41
- end
42
-
43
- def rejects=(*args)
44
- @rejects = args.flatten
45
- end
46
-
47
- def include_hashtags=(boolean)
48
- not_a_boolean_error(boolean)
49
- @include_hashtags = boolean
50
- end
51
-
52
- def include_uris=(boolean)
53
- not_a_boolean_error(boolean)
54
- @include_uris = boolean
55
- end
56
- alias include_urls include_uris
57
-
58
- def include_mentions=(boolean)
59
- not_a_boolean_error(boolean)
60
- @include_mentions = boolean
61
- end
62
-
63
- def not_a_boolean_error(boolean)
64
- raise ArgumentError, 'argument must be a booolean value' unless a_boolean?(boolean)
65
- end
66
-
67
- def a_boolean?(other)
68
- [true, false].include?(other)
69
- end
70
-
71
- def up_to(&time_block)
72
- @up_to_block = time_block
73
- end
74
- end
75
-
76
- attr_reader :screen_names, :words, :requests, :client
77
-
78
- def initialize(*screen_names)
79
- @screen_names = screen_names.flatten
80
- @words = {}
81
- @requests = 0
82
- end
83
-
84
- def client
85
- @_client ||= self.class.client
86
- end
87
-
88
- def range
89
- @_range ||= self.class.range
90
- end
91
-
92
- def rejects
93
- @_rejects ||= self.class.rejects
94
- end
95
-
96
- def audited?
97
- @audited
98
- end
99
-
100
- def hashtag?(word)
101
- return false if self.class.include_hashtags
102
- !(word =~ /#(\w+)/).nil?
103
- end
104
-
105
- def uri?(word)
106
- return false if self.class.include_uris
107
- !(word =~ URI.regexp).nil?
108
- end
109
-
110
- def mention?(word)
111
- return false if self.class.include_mentions
112
- !(word =~ /@(\w+)/).nil?
113
- end
114
-
115
- def hashtags
116
- /#/
117
- end
118
-
119
- def should_be_skipped?(word)
120
- rejects.include?(word) || hashtag?(word) || uri?(word) || mention?(word)
121
- end
122
-
123
- def sort_words
124
- words.sort { |a, b| b.last <=> a.last }
125
- end
126
- alias words_forward sort_words
127
-
128
- def timeline
129
- @_timeline ||= screen_names.map { |name| fetch_timeline(name) }.flatten
130
- end
131
-
132
- # Make two cursored API calls to fetch the 400 most recent tweets
133
- def fetch_timeline(screen_name)
134
- return [] if screen_name.to_s.empty?
135
- @requests += 1
136
- timeline = client.user_timeline(screen_name, tweet_mode: 'extended', count: 200)
137
- return timeline if timeline.empty?
138
- timeline = fetch_older_tweets(timeline, screen_name)
139
- puts "Fetched #{screen_name}'s timeline"
140
- timeline
141
- end
142
-
143
- def fetch_older_tweets(timeline, screen_name)
144
- return timeline if age_of_tweet_in_days(timeline.last) > range
145
- @requests += 1
146
- first_count = timeline.count
147
- timeline += client.user_timeline(
148
- screen_name,
149
- tweet_mode: 'extended',
150
- max_id: timeline.last.id - 1,
151
- count: 200
152
- )
153
- second_count = timeline.count
154
- return timeline if second_count == first_count
155
- fetch_older_tweets(timeline, screen_name)
156
- end
157
-
158
- def tweets
159
- @_tweets ||= timeline.each_with_object([]) do |tweet, memo|
160
- next if tweet.created_at > up_to_time
161
- memo << tweet if age_of_tweet_in_days(tweet) <= range
162
- end
163
- end
164
-
165
- def sort_tweets
166
- tweets.sort { |a, b| b.created_at <=> a.created_at }
167
- end
168
-
169
- def sort_tweets!
170
- tweets.sort! { |a, b| b.created_at <=> a.created_at }
171
- end
172
-
173
- def age_of_tweet_in_days(tweet)
174
- (up_to_time - tweet.created_at) / 86_400
175
- end
176
-
177
- def up_to_time
178
- self.class.up_to_block.call.to_time
179
- end
180
-
181
- def count_words
182
- words.clear
183
- tweets.each do |tweet|
184
- words_array = tweet.attrs[:full_text].downcase.split(' ')
185
- words_array.each do |word|
186
- next if should_be_skipped?(word)
187
- if words.key?(word)
188
- words[word] += 1
189
- else
190
- words[word] = 1
191
- end
192
- end
193
- end
194
- end
195
-
196
- def audit
197
- count_words unless audited?
198
- @audited = true
199
- end
200
-
201
- def audit!
202
- @audited = false
203
- audit
204
- end
205
-
206
- def tweets_count
207
- @_tweets_count ||= tweets.count
208
- end
209
-
210
- def to_csv
211
- CSV.generate do |csv|
212
- csv << %w[word count]
213
- sort_words.each do |word_count|
214
- csv << word_count
215
- end
216
- end
217
- end
218
-
219
- def write_to_csv(opts = {})
220
- filename = opts.fetch(:filename) { 'twords_report.csv' }
221
- write_file(filename, :to_csv, opts)
222
- end
223
-
224
- def to_json
225
- sort_words.to_h.to_json
34
+ def self.config
35
+ @configuration ||= Configuration.new
36
+ @configuration.tap { |config| yield config if block_given? }
226
37
  end
227
38
 
228
- def write_to_json(opts = {})
229
- filename = opts.fetch(:filename) { 'twords_report.json' }
230
- write_file(filename, :to_json, opts)
39
+ def self.reset_config!
40
+ config.reset!
231
41
  end
232
42
 
233
- def write_file(filename, method, opts = {})
234
- File.open(filename, 'w', opts) { |file| file.write send(method) }
43
+ def self.client
44
+ config.client
235
45
  end
236
46
  end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Twords
4
+ # include ConfigAccessable to access shared configuration settings
5
+ module ConfigAccessible
6
+ module_function
7
+
8
+ def config
9
+ Twords.config
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'twitter'
4
+ require 'twords/twitter_client'
5
+
6
+ class Twords
7
+ # Configuration object
8
+ class Configuration
9
+ DEFAULT_REJECTS = %w[
10
+ my us we an w/ because
11
+ b/c or are this is from
12
+ be on the for to and at
13
+ our of in rt a with &amp;
14
+ that it by as if was
15
+ ].freeze
16
+
17
+ DEFAULT_TWITTER_CONFIG = lambda do |twitter|
18
+ twitter.consumer_key = ENV['TWITTER_CONSUMER_KEY']
19
+ twitter.consumer_secret = ENV['TWITTER_CONSUMER_SECRET']
20
+ twitter.access_token = ENV['TWITTER_ACCESS_TOKEN']
21
+ twitter.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET']
22
+ end
23
+
24
+ DEFAULT_OPTIONS = {
25
+ include_uris: false,
26
+ include_hashtags: false,
27
+ include_mentions: false,
28
+ range: 30,
29
+ client: TwitterClient.new(&DEFAULT_TWITTER_CONFIG),
30
+ up_to_block: -> { Time.now },
31
+ rejects: DEFAULT_REJECTS
32
+ }.freeze
33
+
34
+ attr_reader :rejects, :client, :up_to_block, :include_hashtags, :include_uris,
35
+ :include_mentions
36
+
37
+ attr_accessor :range
38
+
39
+ def initialize
40
+ set_defaults
41
+ end
42
+
43
+ def reset!
44
+ tap { set_defaults }
45
+ end
46
+
47
+ def twitter_client(&block)
48
+ @client = TwitterClient.new(&block)
49
+ end
50
+
51
+ def rejects=(*args)
52
+ @rejects = args.flatten
53
+ end
54
+
55
+ def include_hashtags=(boolean)
56
+ not_a_boolean_error(boolean)
57
+ @include_hashtags = boolean
58
+ end
59
+
60
+ def include_uris=(boolean)
61
+ not_a_boolean_error(boolean)
62
+ @include_uris = boolean
63
+ end
64
+ alias include_urls= include_uris=
65
+
66
+ def include_mentions=(boolean)
67
+ not_a_boolean_error(boolean)
68
+ @include_mentions = boolean
69
+ end
70
+
71
+ def up_to(&time_block)
72
+ @up_to_block = time_block
73
+ end
74
+
75
+ def up_to_time
76
+ up_to_block.call.to_time
77
+ end
78
+
79
+ private
80
+
81
+ # private method
82
+ def set_defaults
83
+ ivars = %i[include_uris include_hashtags include_mentions range client up_to_block rejects]
84
+ ivars.each { |ivar| instance_variable_set("@#{ivar}", DEFAULT_OPTIONS[ivar]) }
85
+ end
86
+
87
+ # private method
88
+ def a_boolean?(other)
89
+ [true, false].include?(other)
90
+ end
91
+
92
+ # private method
93
+ def not_a_boolean_error(boolean)
94
+ raise ArgumentError, 'argument must be a booolean value' unless a_boolean?(boolean)
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+ require 'uri'
5
+
6
+ require 'twords/config_accessible'
7
+ require 'twords/word_matcher'
8
+
9
+ # Instance methods
10
+ class Twords
11
+ include ConfigAccessible
12
+
13
+ attr_reader :screen_names, :words
14
+
15
+ def initialize(*screen_names)
16
+ @screen_names = screen_names.flatten
17
+ @words = {}
18
+ end
19
+
20
+ def audited?
21
+ @audited
22
+ end
23
+
24
+ def audit
25
+ count_words unless audited?
26
+ @audited = true
27
+ end
28
+
29
+ def audit!
30
+ instance_variables.reject { |ivar| %i[@screen_names @words].include?(ivar) }.each do |ivar|
31
+ instance_variable_set(ivar, nil)
32
+ end
33
+
34
+ audit
35
+ end
36
+
37
+ def sort_words
38
+ @_sort_words ||= words.sort { |a, b| b.last <=> a.last }
39
+ end
40
+ alias words_forward sort_words
41
+
42
+ def tweets
43
+ @_tweets ||= client.filter_tweets(screen_names)
44
+ end
45
+
46
+ def sort_tweets
47
+ tweets.sort { |a, b| b.created_at <=> a.created_at }
48
+ end
49
+
50
+ def sort_tweets!
51
+ tweets.sort! { |a, b| b.created_at <=> a.created_at }
52
+ end
53
+
54
+ def tweets_count
55
+ @_tweets_count ||= tweets.count
56
+ end
57
+
58
+ def total_word_count
59
+ @_total_word_count ||= words.values.reduce(:+)
60
+ end
61
+
62
+ def percentages
63
+ @_percentages ||= words.each_with_object({}) do |word_count, hash|
64
+ hash[word_count.first] = percentage(word_count.last)
65
+ end
66
+ end
67
+
68
+ def sort_percentages
69
+ @_sort_percentages ||= percentages.sort { |a, b| b.last <=> a.last }
70
+ end
71
+
72
+ def to_csv
73
+ CSV.generate do |csv|
74
+ csv << %w[word count]
75
+ sort_words.each do |word_count|
76
+ csv << word_count
77
+ end
78
+ end
79
+ end
80
+
81
+ def write_to_csv(opts = {})
82
+ filename = opts.fetch(:filename) { 'twords_report.csv' }
83
+ write_file(filename, :to_csv, opts)
84
+ end
85
+
86
+ def to_json
87
+ sort_words.to_h.to_json
88
+ end
89
+
90
+ def write_to_json(opts = {})
91
+ filename = opts.fetch(:filename) { 'twords_report.json' }
92
+ write_file(filename, :to_json, opts)
93
+ end
94
+
95
+ private
96
+
97
+ # private method
98
+ def client
99
+ config.client
100
+ end
101
+
102
+ # private method
103
+ def count_words
104
+ words.clear
105
+ tweets.each do |tweet|
106
+ words_array(tweet).each do |word|
107
+ next if WordMatcher.should_be_skipped?(word)
108
+ words.key?(word) ? words[word] += 1 : words[word] = 1
109
+ end
110
+ end
111
+ end
112
+
113
+ # private method
114
+ def words_array(tweet)
115
+ tweet.attrs[:full_text].downcase.split(' ')
116
+ end
117
+
118
+ # private method
119
+ def percentage(count)
120
+ (count / total_word_count.to_f * 100)
121
+ end
122
+
123
+ # private method
124
+ def write_file(filename, method, opts = {})
125
+ File.open(filename, 'w', opts) { |file| file.write send(method) }
126
+ end
127
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal
2
+
3
+ require 'twords/config_accessible'
4
+
5
+ class Twords
6
+ # Twitter REST API client
7
+ class TwitterClient
8
+ include ConfigAccessible
9
+
10
+ attr_reader :client
11
+
12
+ def initialize(&block)
13
+ @client = Twitter::REST::Client.new(&block)
14
+ end
15
+
16
+ def filter_tweets(screen_names)
17
+ full_timeline(screen_names).each_with_object([]) do |tweet, memo|
18
+ next if tweet.created_at > up_to_time
19
+ memo << tweet if age_of_tweet_in_days(tweet) <= range
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ # private method
26
+ def full_timeline(screen_names)
27
+ screen_names.map { |screen_name| fetch_user_timeline(screen_name) }.flatten.uniq
28
+ end
29
+
30
+ # private method
31
+ def fetch_user_timeline(screen_name)
32
+ return [] if screen_name.to_s.empty?
33
+ user_timeline = client.user_timeline(screen_name, tweet_mode: 'extended', count: 200)
34
+ return user_timeline if user_timeline.empty?
35
+ user_timeline = fetch_older_tweets(user_timeline, screen_name)
36
+ puts "Fetched #{screen_name}'s timeline"
37
+ user_timeline
38
+ rescue Twitter::Error::TooManyRequests
39
+ puts 'Rate limit exceeded, waiting 5 minutes' && sleep(300)
40
+ fetch_user_timeline(screen_name)
41
+ end
42
+
43
+ # private method
44
+ def age_of_tweet_in_days(tweet)
45
+ (up_to_time - tweet.created_at) / 86_400
46
+ end
47
+
48
+ # private method
49
+ def up_to_time
50
+ config.up_to_time
51
+ end
52
+
53
+ # private method
54
+ def range
55
+ config.range
56
+ end
57
+
58
+ # private method
59
+ def fetch_older_tweets(user_timeline, screen_name)
60
+ return user_timeline if age_of_tweet_in_days(user_timeline.last) > range
61
+ first_count = user_timeline.count
62
+ user_timeline += client.user_timeline(
63
+ screen_name,
64
+ tweet_mode: 'extended',
65
+ max_id: user_timeline.last.id - 1,
66
+ count: 200
67
+ )
68
+ return user_timeline if user_timeline.count == first_count
69
+ fetch_older_tweets(user_timeline, screen_name)
70
+ end
71
+ end
72
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Twords
4
- VERSION = '0.1.8'.freeze
4
+ VERSION = '0.2.0'.freeze
5
5
  end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'twords/config_accessible'
4
+
5
+ class Twords
6
+ # Checks if words should be counted or not
7
+ class WordMatcher
8
+ class << self
9
+ include ConfigAccessible
10
+
11
+ def should_be_skipped?(word)
12
+ reject?(word) || hashtag?(word) || uri?(word) || mention?(word)
13
+ end
14
+
15
+ def reject?(word)
16
+ config.rejects.include?(word)
17
+ end
18
+
19
+ def hashtag?(word)
20
+ return if config.include_hashtags
21
+ !(word =~ /#(\w+)/).nil?
22
+ end
23
+
24
+ def uri?(word)
25
+ return if config.include_uris
26
+ !(word =~ URI.regexp).nil?
27
+ end
28
+
29
+ def mention?(word)
30
+ return if config.include_mentions
31
+ !(word =~ /@(\w+)/).nil?
32
+ end
33
+ end
34
+ end
35
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twords
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. Simon Borg
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-06-22 00:00:00.000000000 Z
11
+ date: 2017-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: twitter
@@ -62,7 +62,12 @@ files:
62
62
  - LICENSE.txt
63
63
  - README.md
64
64
  - lib/twords.rb
65
+ - lib/twords/config_accessible.rb
66
+ - lib/twords/configuration.rb
67
+ - lib/twords/instance_methods.rb
68
+ - lib/twords/twitter_client.rb
65
69
  - lib/twords/version.rb
70
+ - lib/twords/word_matcher.rb
66
71
  - twords.gemspec
67
72
  homepage: https://github.com/msimonborg/twords
68
73
  licenses: