twords 0.1.8 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +38 -15
- data/lib/twords.rb +18 -208
- data/lib/twords/config_accessible.rb +12 -0
- data/lib/twords/configuration.rb +97 -0
- data/lib/twords/instance_methods.rb +127 -0
- data/lib/twords/twitter_client.rb +72 -0
- data/lib/twords/version.rb +1 -1
- data/lib/twords/word_matcher.rb +35 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: deb67eadb91095b9a3ef2a0cefe7c9452038c0f1
|
4
|
+
data.tar.gz: 7cb612c4aa544038bfb9eb2c88c5367269b1c4c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 615f31dd12aea64aa8ff1304e947f36706a2e23145ca35153227aac72999359d87bfa2ef97c5f6beaae258be2f97f403c1cba7c78502e7c28991318da081768c
|
7
|
+
data.tar.gz: 49357b0e2060a1726956fb5c1f996ff2281eb2306a4f2ebe1fa2176851d95af5181c5c4c2f5610cd36db782193f6c60776a374d7a7ac70d8f73b79b3a3802394
|
data/README.md
CHANGED
@@ -22,35 +22,59 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
+
Twords takes a configuration block, and if it doesn't find one it will set the following defaults:
|
26
|
+
|
25
27
|
```ruby
|
26
28
|
Twords.config do |config|
|
27
|
-
config.rejects = %w[
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
config.
|
32
|
-
|
33
|
-
|
29
|
+
config.rejects = %w[my us we an w/ because b/c or are this is from
|
30
|
+
be on the for to and at our of in rt a with &
|
31
|
+
that it by as if was] # These words will not be counted
|
32
|
+
|
33
|
+
config.range = 30 # Number of days to check
|
34
|
+
|
35
|
+
config.include_hashtags = false # Excludes strings beginning with '#'
|
36
|
+
config.include_uris = false # Excludes strings that match URI#regexp
|
37
|
+
config.include_mentions = false # Excludes strings beginning with '@'
|
38
|
+
|
34
39
|
config.up_to { Time.now } # The block must return an object that responds to #to_time. The time is lazy evaluated and the range is counted backward from here.
|
35
|
-
|
40
|
+
|
41
|
+
# By default the Twitter client will look for keys stored as system variables by the names listed below. Feel free to change the configuration, but never hard code the keys.
|
36
42
|
config.twitter_client do |twitter|
|
37
|
-
twitter.consumer_key =
|
38
|
-
twitter.consumer_secret =
|
39
|
-
twitter.access_token =
|
40
|
-
twitter.access_token_secret =
|
43
|
+
twitter.consumer_key = ENV['TWITTER_CONSUMER_KEY']
|
44
|
+
twitter.consumer_secret = ENV['TWITTER_CONSUMER_SECRET']
|
45
|
+
twitter.access_token = ENV['TWITTER_ACCESS_TOKEN']
|
46
|
+
twitter.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET']
|
41
47
|
end
|
42
48
|
end
|
43
49
|
|
44
50
|
twords = Twords.new 'user_one', 'user_two' # A list of Twitter handles to include in the count.
|
45
51
|
|
46
52
|
twords.audit
|
53
|
+
# Fetched user_one's timeline
|
54
|
+
# Fetched user_two's timeline
|
47
55
|
# => true
|
48
56
|
|
49
57
|
twords.words
|
50
|
-
# => { "
|
58
|
+
# => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
|
51
59
|
|
52
60
|
twords.words_forward # Sort descending. Alias #sort_words
|
53
|
-
# => [["pups", 36], ["
|
61
|
+
# => [["pups", 36], ["pizza", 32], ["burger", 28], ...]
|
62
|
+
|
63
|
+
Twords.config { |config| config.include_hashtags = true }
|
64
|
+
|
65
|
+
twords.audit
|
66
|
+
# => true
|
67
|
+
|
68
|
+
twords.words
|
69
|
+
# => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
|
70
|
+
|
71
|
+
twords.audit!
|
72
|
+
# Fetched user_one's timeline
|
73
|
+
# Fetched user_two's timeline
|
74
|
+
# => true
|
75
|
+
|
76
|
+
twords.words
|
77
|
+
# => { "#TACOSTACOSTACOS"=>14321, "pizza"=>32, "burger"=>28, "pups"=>36, ... }
|
54
78
|
```
|
55
79
|
|
56
80
|
## Development
|
@@ -67,4 +91,3 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/msimon
|
|
67
91
|
## License
|
68
92
|
|
69
93
|
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
70
|
-
|
data/lib/twords.rb
CHANGED
@@ -1,15 +1,19 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require '
|
4
|
-
require '
|
5
|
-
require 'uri'
|
6
|
-
|
3
|
+
require 'twords/configuration'
|
4
|
+
require 'twords/instance_methods'
|
7
5
|
require 'twords/version'
|
8
6
|
|
9
7
|
# Twords.config do |config|
|
10
|
-
# config.rejects = %w[
|
11
|
-
#
|
8
|
+
# config.rejects = %w[my us we an w/ because b/c or are this is from
|
9
|
+
# be on the for to and at our of in rt a with &
|
10
|
+
# that it by as if was]
|
11
|
+
|
12
|
+
# config.range = 30
|
12
13
|
# config.up_to { Time.now }
|
14
|
+
# config.include_hashtags = false
|
15
|
+
# config.include_uris = false
|
16
|
+
# config.include_mentions = false
|
13
17
|
#
|
14
18
|
# config.twitter_client do |twitter|
|
15
19
|
# twitter.consumer_key = YOUR_TWITTER_CONSUMER_KEY
|
@@ -25,212 +29,18 @@ require 'twords/version'
|
|
25
29
|
# # => true
|
26
30
|
#
|
27
31
|
# twords.words
|
28
|
-
# # => { "
|
32
|
+
# # => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
|
29
33
|
class Twords
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
attr_accessor :range
|
34
|
-
|
35
|
-
def config
|
36
|
-
yield self
|
37
|
-
end
|
38
|
-
|
39
|
-
def twitter_client(&block)
|
40
|
-
@client = Twitter::REST::Client.new(&block)
|
41
|
-
end
|
42
|
-
|
43
|
-
def rejects=(*args)
|
44
|
-
@rejects = args.flatten
|
45
|
-
end
|
46
|
-
|
47
|
-
def include_hashtags=(boolean)
|
48
|
-
not_a_boolean_error(boolean)
|
49
|
-
@include_hashtags = boolean
|
50
|
-
end
|
51
|
-
|
52
|
-
def include_uris=(boolean)
|
53
|
-
not_a_boolean_error(boolean)
|
54
|
-
@include_uris = boolean
|
55
|
-
end
|
56
|
-
alias include_urls include_uris
|
57
|
-
|
58
|
-
def include_mentions=(boolean)
|
59
|
-
not_a_boolean_error(boolean)
|
60
|
-
@include_mentions = boolean
|
61
|
-
end
|
62
|
-
|
63
|
-
def not_a_boolean_error(boolean)
|
64
|
-
raise ArgumentError, 'argument must be a booolean value' unless a_boolean?(boolean)
|
65
|
-
end
|
66
|
-
|
67
|
-
def a_boolean?(other)
|
68
|
-
[true, false].include?(other)
|
69
|
-
end
|
70
|
-
|
71
|
-
def up_to(&time_block)
|
72
|
-
@up_to_block = time_block
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
attr_reader :screen_names, :words, :requests, :client
|
77
|
-
|
78
|
-
def initialize(*screen_names)
|
79
|
-
@screen_names = screen_names.flatten
|
80
|
-
@words = {}
|
81
|
-
@requests = 0
|
82
|
-
end
|
83
|
-
|
84
|
-
def client
|
85
|
-
@_client ||= self.class.client
|
86
|
-
end
|
87
|
-
|
88
|
-
def range
|
89
|
-
@_range ||= self.class.range
|
90
|
-
end
|
91
|
-
|
92
|
-
def rejects
|
93
|
-
@_rejects ||= self.class.rejects
|
94
|
-
end
|
95
|
-
|
96
|
-
def audited?
|
97
|
-
@audited
|
98
|
-
end
|
99
|
-
|
100
|
-
def hashtag?(word)
|
101
|
-
return false if self.class.include_hashtags
|
102
|
-
!(word =~ /#(\w+)/).nil?
|
103
|
-
end
|
104
|
-
|
105
|
-
def uri?(word)
|
106
|
-
return false if self.class.include_uris
|
107
|
-
!(word =~ URI.regexp).nil?
|
108
|
-
end
|
109
|
-
|
110
|
-
def mention?(word)
|
111
|
-
return false if self.class.include_mentions
|
112
|
-
!(word =~ /@(\w+)/).nil?
|
113
|
-
end
|
114
|
-
|
115
|
-
def hashtags
|
116
|
-
/#/
|
117
|
-
end
|
118
|
-
|
119
|
-
def should_be_skipped?(word)
|
120
|
-
rejects.include?(word) || hashtag?(word) || uri?(word) || mention?(word)
|
121
|
-
end
|
122
|
-
|
123
|
-
def sort_words
|
124
|
-
words.sort { |a, b| b.last <=> a.last }
|
125
|
-
end
|
126
|
-
alias words_forward sort_words
|
127
|
-
|
128
|
-
def timeline
|
129
|
-
@_timeline ||= screen_names.map { |name| fetch_timeline(name) }.flatten
|
130
|
-
end
|
131
|
-
|
132
|
-
# Make two cursored API calls to fetch the 400 most recent tweets
|
133
|
-
def fetch_timeline(screen_name)
|
134
|
-
return [] if screen_name.to_s.empty?
|
135
|
-
@requests += 1
|
136
|
-
timeline = client.user_timeline(screen_name, tweet_mode: 'extended', count: 200)
|
137
|
-
return timeline if timeline.empty?
|
138
|
-
timeline = fetch_older_tweets(timeline, screen_name)
|
139
|
-
puts "Fetched #{screen_name}'s timeline"
|
140
|
-
timeline
|
141
|
-
end
|
142
|
-
|
143
|
-
def fetch_older_tweets(timeline, screen_name)
|
144
|
-
return timeline if age_of_tweet_in_days(timeline.last) > range
|
145
|
-
@requests += 1
|
146
|
-
first_count = timeline.count
|
147
|
-
timeline += client.user_timeline(
|
148
|
-
screen_name,
|
149
|
-
tweet_mode: 'extended',
|
150
|
-
max_id: timeline.last.id - 1,
|
151
|
-
count: 200
|
152
|
-
)
|
153
|
-
second_count = timeline.count
|
154
|
-
return timeline if second_count == first_count
|
155
|
-
fetch_older_tweets(timeline, screen_name)
|
156
|
-
end
|
157
|
-
|
158
|
-
def tweets
|
159
|
-
@_tweets ||= timeline.each_with_object([]) do |tweet, memo|
|
160
|
-
next if tweet.created_at > up_to_time
|
161
|
-
memo << tweet if age_of_tweet_in_days(tweet) <= range
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
def sort_tweets
|
166
|
-
tweets.sort { |a, b| b.created_at <=> a.created_at }
|
167
|
-
end
|
168
|
-
|
169
|
-
def sort_tweets!
|
170
|
-
tweets.sort! { |a, b| b.created_at <=> a.created_at }
|
171
|
-
end
|
172
|
-
|
173
|
-
def age_of_tweet_in_days(tweet)
|
174
|
-
(up_to_time - tweet.created_at) / 86_400
|
175
|
-
end
|
176
|
-
|
177
|
-
def up_to_time
|
178
|
-
self.class.up_to_block.call.to_time
|
179
|
-
end
|
180
|
-
|
181
|
-
def count_words
|
182
|
-
words.clear
|
183
|
-
tweets.each do |tweet|
|
184
|
-
words_array = tweet.attrs[:full_text].downcase.split(' ')
|
185
|
-
words_array.each do |word|
|
186
|
-
next if should_be_skipped?(word)
|
187
|
-
if words.key?(word)
|
188
|
-
words[word] += 1
|
189
|
-
else
|
190
|
-
words[word] = 1
|
191
|
-
end
|
192
|
-
end
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
def audit
|
197
|
-
count_words unless audited?
|
198
|
-
@audited = true
|
199
|
-
end
|
200
|
-
|
201
|
-
def audit!
|
202
|
-
@audited = false
|
203
|
-
audit
|
204
|
-
end
|
205
|
-
|
206
|
-
def tweets_count
|
207
|
-
@_tweets_count ||= tweets.count
|
208
|
-
end
|
209
|
-
|
210
|
-
def to_csv
|
211
|
-
CSV.generate do |csv|
|
212
|
-
csv << %w[word count]
|
213
|
-
sort_words.each do |word_count|
|
214
|
-
csv << word_count
|
215
|
-
end
|
216
|
-
end
|
217
|
-
end
|
218
|
-
|
219
|
-
def write_to_csv(opts = {})
|
220
|
-
filename = opts.fetch(:filename) { 'twords_report.csv' }
|
221
|
-
write_file(filename, :to_csv, opts)
|
222
|
-
end
|
223
|
-
|
224
|
-
def to_json
|
225
|
-
sort_words.to_h.to_json
|
34
|
+
def self.config
|
35
|
+
@configuration ||= Configuration.new
|
36
|
+
@configuration.tap { |config| yield config if block_given? }
|
226
37
|
end
|
227
38
|
|
228
|
-
def
|
229
|
-
|
230
|
-
write_file(filename, :to_json, opts)
|
39
|
+
def self.reset_config!
|
40
|
+
config.reset!
|
231
41
|
end
|
232
42
|
|
233
|
-
def
|
234
|
-
|
43
|
+
def self.client
|
44
|
+
config.client
|
235
45
|
end
|
236
46
|
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'twitter'
|
4
|
+
require 'twords/twitter_client'
|
5
|
+
|
6
|
+
class Twords
|
7
|
+
# Configuration object
|
8
|
+
class Configuration
|
9
|
+
DEFAULT_REJECTS = %w[
|
10
|
+
my us we an w/ because
|
11
|
+
b/c or are this is from
|
12
|
+
be on the for to and at
|
13
|
+
our of in rt a with &
|
14
|
+
that it by as if was
|
15
|
+
].freeze
|
16
|
+
|
17
|
+
DEFAULT_TWITTER_CONFIG = lambda do |twitter|
|
18
|
+
twitter.consumer_key = ENV['TWITTER_CONSUMER_KEY']
|
19
|
+
twitter.consumer_secret = ENV['TWITTER_CONSUMER_SECRET']
|
20
|
+
twitter.access_token = ENV['TWITTER_ACCESS_TOKEN']
|
21
|
+
twitter.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET']
|
22
|
+
end
|
23
|
+
|
24
|
+
DEFAULT_OPTIONS = {
|
25
|
+
include_uris: false,
|
26
|
+
include_hashtags: false,
|
27
|
+
include_mentions: false,
|
28
|
+
range: 30,
|
29
|
+
client: TwitterClient.new(&DEFAULT_TWITTER_CONFIG),
|
30
|
+
up_to_block: -> { Time.now },
|
31
|
+
rejects: DEFAULT_REJECTS
|
32
|
+
}.freeze
|
33
|
+
|
34
|
+
attr_reader :rejects, :client, :up_to_block, :include_hashtags, :include_uris,
|
35
|
+
:include_mentions
|
36
|
+
|
37
|
+
attr_accessor :range
|
38
|
+
|
39
|
+
def initialize
|
40
|
+
set_defaults
|
41
|
+
end
|
42
|
+
|
43
|
+
def reset!
|
44
|
+
tap { set_defaults }
|
45
|
+
end
|
46
|
+
|
47
|
+
def twitter_client(&block)
|
48
|
+
@client = TwitterClient.new(&block)
|
49
|
+
end
|
50
|
+
|
51
|
+
def rejects=(*args)
|
52
|
+
@rejects = args.flatten
|
53
|
+
end
|
54
|
+
|
55
|
+
def include_hashtags=(boolean)
|
56
|
+
not_a_boolean_error(boolean)
|
57
|
+
@include_hashtags = boolean
|
58
|
+
end
|
59
|
+
|
60
|
+
def include_uris=(boolean)
|
61
|
+
not_a_boolean_error(boolean)
|
62
|
+
@include_uris = boolean
|
63
|
+
end
|
64
|
+
alias include_urls= include_uris=
|
65
|
+
|
66
|
+
def include_mentions=(boolean)
|
67
|
+
not_a_boolean_error(boolean)
|
68
|
+
@include_mentions = boolean
|
69
|
+
end
|
70
|
+
|
71
|
+
def up_to(&time_block)
|
72
|
+
@up_to_block = time_block
|
73
|
+
end
|
74
|
+
|
75
|
+
def up_to_time
|
76
|
+
up_to_block.call.to_time
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# private method
|
82
|
+
def set_defaults
|
83
|
+
ivars = %i[include_uris include_hashtags include_mentions range client up_to_block rejects]
|
84
|
+
ivars.each { |ivar| instance_variable_set("@#{ivar}", DEFAULT_OPTIONS[ivar]) }
|
85
|
+
end
|
86
|
+
|
87
|
+
# private method
|
88
|
+
def a_boolean?(other)
|
89
|
+
[true, false].include?(other)
|
90
|
+
end
|
91
|
+
|
92
|
+
# private method
|
93
|
+
def not_a_boolean_error(boolean)
|
94
|
+
raise ArgumentError, 'argument must be a booolean value' unless a_boolean?(boolean)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
require 'twords/config_accessible'
|
7
|
+
require 'twords/word_matcher'
|
8
|
+
|
9
|
+
# Instance methods
|
10
|
+
class Twords
|
11
|
+
include ConfigAccessible
|
12
|
+
|
13
|
+
attr_reader :screen_names, :words
|
14
|
+
|
15
|
+
def initialize(*screen_names)
|
16
|
+
@screen_names = screen_names.flatten
|
17
|
+
@words = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def audited?
|
21
|
+
@audited
|
22
|
+
end
|
23
|
+
|
24
|
+
def audit
|
25
|
+
count_words unless audited?
|
26
|
+
@audited = true
|
27
|
+
end
|
28
|
+
|
29
|
+
def audit!
|
30
|
+
instance_variables.reject { |ivar| %i[@screen_names @words].include?(ivar) }.each do |ivar|
|
31
|
+
instance_variable_set(ivar, nil)
|
32
|
+
end
|
33
|
+
|
34
|
+
audit
|
35
|
+
end
|
36
|
+
|
37
|
+
def sort_words
|
38
|
+
@_sort_words ||= words.sort { |a, b| b.last <=> a.last }
|
39
|
+
end
|
40
|
+
alias words_forward sort_words
|
41
|
+
|
42
|
+
def tweets
|
43
|
+
@_tweets ||= client.filter_tweets(screen_names)
|
44
|
+
end
|
45
|
+
|
46
|
+
def sort_tweets
|
47
|
+
tweets.sort { |a, b| b.created_at <=> a.created_at }
|
48
|
+
end
|
49
|
+
|
50
|
+
def sort_tweets!
|
51
|
+
tweets.sort! { |a, b| b.created_at <=> a.created_at }
|
52
|
+
end
|
53
|
+
|
54
|
+
def tweets_count
|
55
|
+
@_tweets_count ||= tweets.count
|
56
|
+
end
|
57
|
+
|
58
|
+
def total_word_count
|
59
|
+
@_total_word_count ||= words.values.reduce(:+)
|
60
|
+
end
|
61
|
+
|
62
|
+
def percentages
|
63
|
+
@_percentages ||= words.each_with_object({}) do |word_count, hash|
|
64
|
+
hash[word_count.first] = percentage(word_count.last)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def sort_percentages
|
69
|
+
@_sort_percentages ||= percentages.sort { |a, b| b.last <=> a.last }
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_csv
|
73
|
+
CSV.generate do |csv|
|
74
|
+
csv << %w[word count]
|
75
|
+
sort_words.each do |word_count|
|
76
|
+
csv << word_count
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def write_to_csv(opts = {})
|
82
|
+
filename = opts.fetch(:filename) { 'twords_report.csv' }
|
83
|
+
write_file(filename, :to_csv, opts)
|
84
|
+
end
|
85
|
+
|
86
|
+
def to_json
|
87
|
+
sort_words.to_h.to_json
|
88
|
+
end
|
89
|
+
|
90
|
+
def write_to_json(opts = {})
|
91
|
+
filename = opts.fetch(:filename) { 'twords_report.json' }
|
92
|
+
write_file(filename, :to_json, opts)
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
# private method
|
98
|
+
def client
|
99
|
+
config.client
|
100
|
+
end
|
101
|
+
|
102
|
+
# private method
|
103
|
+
def count_words
|
104
|
+
words.clear
|
105
|
+
tweets.each do |tweet|
|
106
|
+
words_array(tweet).each do |word|
|
107
|
+
next if WordMatcher.should_be_skipped?(word)
|
108
|
+
words.key?(word) ? words[word] += 1 : words[word] = 1
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# private method
|
114
|
+
def words_array(tweet)
|
115
|
+
tweet.attrs[:full_text].downcase.split(' ')
|
116
|
+
end
|
117
|
+
|
118
|
+
# private method
|
119
|
+
def percentage(count)
|
120
|
+
(count / total_word_count.to_f * 100)
|
121
|
+
end
|
122
|
+
|
123
|
+
# private method
|
124
|
+
def write_file(filename, method, opts = {})
|
125
|
+
File.open(filename, 'w', opts) { |file| file.write send(method) }
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal
|
2
|
+
|
3
|
+
require 'twords/config_accessible'
|
4
|
+
|
5
|
+
class Twords
|
6
|
+
# Twitter REST API client
|
7
|
+
class TwitterClient
|
8
|
+
include ConfigAccessible
|
9
|
+
|
10
|
+
attr_reader :client
|
11
|
+
|
12
|
+
def initialize(&block)
|
13
|
+
@client = Twitter::REST::Client.new(&block)
|
14
|
+
end
|
15
|
+
|
16
|
+
def filter_tweets(screen_names)
|
17
|
+
full_timeline(screen_names).each_with_object([]) do |tweet, memo|
|
18
|
+
next if tweet.created_at > up_to_time
|
19
|
+
memo << tweet if age_of_tweet_in_days(tweet) <= range
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# private method
|
26
|
+
def full_timeline(screen_names)
|
27
|
+
screen_names.map { |screen_name| fetch_user_timeline(screen_name) }.flatten.uniq
|
28
|
+
end
|
29
|
+
|
30
|
+
# private method
|
31
|
+
def fetch_user_timeline(screen_name)
|
32
|
+
return [] if screen_name.to_s.empty?
|
33
|
+
user_timeline = client.user_timeline(screen_name, tweet_mode: 'extended', count: 200)
|
34
|
+
return user_timeline if user_timeline.empty?
|
35
|
+
user_timeline = fetch_older_tweets(user_timeline, screen_name)
|
36
|
+
puts "Fetched #{screen_name}'s timeline"
|
37
|
+
user_timeline
|
38
|
+
rescue Twitter::Error::TooManyRequests
|
39
|
+
puts 'Rate limit exceeded, waiting 5 minutes' && sleep(300)
|
40
|
+
fetch_user_timeline(screen_name)
|
41
|
+
end
|
42
|
+
|
43
|
+
# private method
|
44
|
+
def age_of_tweet_in_days(tweet)
|
45
|
+
(up_to_time - tweet.created_at) / 86_400
|
46
|
+
end
|
47
|
+
|
48
|
+
# private method
|
49
|
+
def up_to_time
|
50
|
+
config.up_to_time
|
51
|
+
end
|
52
|
+
|
53
|
+
# private method
|
54
|
+
def range
|
55
|
+
config.range
|
56
|
+
end
|
57
|
+
|
58
|
+
# private method
|
59
|
+
def fetch_older_tweets(user_timeline, screen_name)
|
60
|
+
return user_timeline if age_of_tweet_in_days(user_timeline.last) > range
|
61
|
+
first_count = user_timeline.count
|
62
|
+
user_timeline += client.user_timeline(
|
63
|
+
screen_name,
|
64
|
+
tweet_mode: 'extended',
|
65
|
+
max_id: user_timeline.last.id - 1,
|
66
|
+
count: 200
|
67
|
+
)
|
68
|
+
return user_timeline if user_timeline.count == first_count
|
69
|
+
fetch_older_tweets(user_timeline, screen_name)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/twords/version.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'twords/config_accessible'
|
4
|
+
|
5
|
+
class Twords
|
6
|
+
# Checks if words should be counted or not
|
7
|
+
class WordMatcher
|
8
|
+
class << self
|
9
|
+
include ConfigAccessible
|
10
|
+
|
11
|
+
def should_be_skipped?(word)
|
12
|
+
reject?(word) || hashtag?(word) || uri?(word) || mention?(word)
|
13
|
+
end
|
14
|
+
|
15
|
+
def reject?(word)
|
16
|
+
config.rejects.include?(word)
|
17
|
+
end
|
18
|
+
|
19
|
+
def hashtag?(word)
|
20
|
+
return if config.include_hashtags
|
21
|
+
!(word =~ /#(\w+)/).nil?
|
22
|
+
end
|
23
|
+
|
24
|
+
def uri?(word)
|
25
|
+
return if config.include_uris
|
26
|
+
!(word =~ URI.regexp).nil?
|
27
|
+
end
|
28
|
+
|
29
|
+
def mention?(word)
|
30
|
+
return if config.include_mentions
|
31
|
+
!(word =~ /@(\w+)/).nil?
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twords
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. Simon Borg
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: twitter
|
@@ -62,7 +62,12 @@ files:
|
|
62
62
|
- LICENSE.txt
|
63
63
|
- README.md
|
64
64
|
- lib/twords.rb
|
65
|
+
- lib/twords/config_accessible.rb
|
66
|
+
- lib/twords/configuration.rb
|
67
|
+
- lib/twords/instance_methods.rb
|
68
|
+
- lib/twords/twitter_client.rb
|
65
69
|
- lib/twords/version.rb
|
70
|
+
- lib/twords/word_matcher.rb
|
66
71
|
- twords.gemspec
|
67
72
|
homepage: https://github.com/msimonborg/twords
|
68
73
|
licenses:
|