twords 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +38 -15
- data/lib/twords.rb +18 -208
- data/lib/twords/config_accessible.rb +12 -0
- data/lib/twords/configuration.rb +97 -0
- data/lib/twords/instance_methods.rb +127 -0
- data/lib/twords/twitter_client.rb +72 -0
- data/lib/twords/version.rb +1 -1
- data/lib/twords/word_matcher.rb +35 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: deb67eadb91095b9a3ef2a0cefe7c9452038c0f1
|
4
|
+
data.tar.gz: 7cb612c4aa544038bfb9eb2c88c5367269b1c4c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 615f31dd12aea64aa8ff1304e947f36706a2e23145ca35153227aac72999359d87bfa2ef97c5f6beaae258be2f97f403c1cba7c78502e7c28991318da081768c
|
7
|
+
data.tar.gz: 49357b0e2060a1726956fb5c1f996ff2281eb2306a4f2ebe1fa2176851d95af5181c5c4c2f5610cd36db782193f6c60776a374d7a7ac70d8f73b79b3a3802394
|
data/README.md
CHANGED
@@ -22,35 +22,59 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
+
Twords takes a configuration block, and if it doesn't find one it will set the following defaults:
|
26
|
+
|
25
27
|
```ruby
|
26
28
|
Twords.config do |config|
|
27
|
-
config.rejects = %w[
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
config.
|
32
|
-
|
33
|
-
|
29
|
+
config.rejects = %w[my us we an w/ because b/c or are this is from
|
30
|
+
be on the for to and at our of in rt a with &
|
31
|
+
that it by as if was] # These words will not be counted
|
32
|
+
|
33
|
+
config.range = 30 # Number of days to check
|
34
|
+
|
35
|
+
config.include_hashtags = false # Excludes strings beginning with '#'
|
36
|
+
config.include_uris = false # Excludes strings that match URI#regexp
|
37
|
+
config.include_mentions = false # Excludes strings beginning with '@'
|
38
|
+
|
34
39
|
config.up_to { Time.now } # The block must return an object that responds to #to_time. The time is lazy evaluated and the range is counted backward from here.
|
35
|
-
|
40
|
+
|
41
|
+
# By default the Twitter client will look for keys stored as system variables by the names listed below. Feel free to change the configuration, but never hard code the keys.
|
36
42
|
config.twitter_client do |twitter|
|
37
|
-
twitter.consumer_key =
|
38
|
-
twitter.consumer_secret =
|
39
|
-
twitter.access_token =
|
40
|
-
twitter.access_token_secret =
|
43
|
+
twitter.consumer_key = ENV['TWITTER_CONSUMER_KEY']
|
44
|
+
twitter.consumer_secret = ENV['TWITTER_CONSUMER_SECRET']
|
45
|
+
twitter.access_token = ENV['TWITTER_ACCESS_TOKEN']
|
46
|
+
twitter.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET']
|
41
47
|
end
|
42
48
|
end
|
43
49
|
|
44
50
|
twords = Twords.new 'user_one', 'user_two' # A list of Twitter handles to include in the count.
|
45
51
|
|
46
52
|
twords.audit
|
53
|
+
# Fetched user_one's timeline
|
54
|
+
# Fetched user_two's timeline
|
47
55
|
# => true
|
48
56
|
|
49
57
|
twords.words
|
50
|
-
# => { "
|
58
|
+
# => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
|
51
59
|
|
52
60
|
twords.words_forward # Sort descending. Alias #sort_words
|
53
|
-
# => [["pups", 36], ["
|
61
|
+
# => [["pups", 36], ["pizza", 32], ["burger", 28], ...]
|
62
|
+
|
63
|
+
Twords.config { |config| config.include_hashtags = true }
|
64
|
+
|
65
|
+
twords.audit
|
66
|
+
# => true
|
67
|
+
|
68
|
+
twords.words
|
69
|
+
# => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
|
70
|
+
|
71
|
+
twords.audit!
|
72
|
+
# Fetched user_one's timeline
|
73
|
+
# Fetched user_two's timeline
|
74
|
+
# => true
|
75
|
+
|
76
|
+
twords.words
|
77
|
+
# => { "#TACOSTACOSTACOS"=>14321, "pizza"=>32, "burger"=>28, "pups"=>36, ... }
|
54
78
|
```
|
55
79
|
|
56
80
|
## Development
|
@@ -67,4 +91,3 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/msimon
|
|
67
91
|
## License
|
68
92
|
|
69
93
|
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
70
|
-
|
data/lib/twords.rb
CHANGED
@@ -1,15 +1,19 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require '
|
4
|
-
require '
|
5
|
-
require 'uri'
|
6
|
-
|
3
|
+
require 'twords/configuration'
|
4
|
+
require 'twords/instance_methods'
|
7
5
|
require 'twords/version'
|
8
6
|
|
9
7
|
# Twords.config do |config|
|
10
|
-
# config.rejects = %w[
|
11
|
-
#
|
8
|
+
# config.rejects = %w[my us we an w/ because b/c or are this is from
|
9
|
+
# be on the for to and at our of in rt a with &
|
10
|
+
# that it by as if was]
|
11
|
+
|
12
|
+
# config.range = 30
|
12
13
|
# config.up_to { Time.now }
|
14
|
+
# config.include_hashtags = false
|
15
|
+
# config.include_uris = false
|
16
|
+
# config.include_mentions = false
|
13
17
|
#
|
14
18
|
# config.twitter_client do |twitter|
|
15
19
|
# twitter.consumer_key = YOUR_TWITTER_CONSUMER_KEY
|
@@ -25,212 +29,18 @@ require 'twords/version'
|
|
25
29
|
# # => true
|
26
30
|
#
|
27
31
|
# twords.words
|
28
|
-
# # => { "
|
32
|
+
# # => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
|
29
33
|
class Twords
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
attr_accessor :range
|
34
|
-
|
35
|
-
def config
|
36
|
-
yield self
|
37
|
-
end
|
38
|
-
|
39
|
-
def twitter_client(&block)
|
40
|
-
@client = Twitter::REST::Client.new(&block)
|
41
|
-
end
|
42
|
-
|
43
|
-
def rejects=(*args)
|
44
|
-
@rejects = args.flatten
|
45
|
-
end
|
46
|
-
|
47
|
-
def include_hashtags=(boolean)
|
48
|
-
not_a_boolean_error(boolean)
|
49
|
-
@include_hashtags = boolean
|
50
|
-
end
|
51
|
-
|
52
|
-
def include_uris=(boolean)
|
53
|
-
not_a_boolean_error(boolean)
|
54
|
-
@include_uris = boolean
|
55
|
-
end
|
56
|
-
alias include_urls include_uris
|
57
|
-
|
58
|
-
def include_mentions=(boolean)
|
59
|
-
not_a_boolean_error(boolean)
|
60
|
-
@include_mentions = boolean
|
61
|
-
end
|
62
|
-
|
63
|
-
def not_a_boolean_error(boolean)
|
64
|
-
raise ArgumentError, 'argument must be a booolean value' unless a_boolean?(boolean)
|
65
|
-
end
|
66
|
-
|
67
|
-
def a_boolean?(other)
|
68
|
-
[true, false].include?(other)
|
69
|
-
end
|
70
|
-
|
71
|
-
def up_to(&time_block)
|
72
|
-
@up_to_block = time_block
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
attr_reader :screen_names, :words, :requests, :client
|
77
|
-
|
78
|
-
def initialize(*screen_names)
|
79
|
-
@screen_names = screen_names.flatten
|
80
|
-
@words = {}
|
81
|
-
@requests = 0
|
82
|
-
end
|
83
|
-
|
84
|
-
def client
|
85
|
-
@_client ||= self.class.client
|
86
|
-
end
|
87
|
-
|
88
|
-
def range
|
89
|
-
@_range ||= self.class.range
|
90
|
-
end
|
91
|
-
|
92
|
-
def rejects
|
93
|
-
@_rejects ||= self.class.rejects
|
94
|
-
end
|
95
|
-
|
96
|
-
def audited?
|
97
|
-
@audited
|
98
|
-
end
|
99
|
-
|
100
|
-
def hashtag?(word)
|
101
|
-
return false if self.class.include_hashtags
|
102
|
-
!(word =~ /#(\w+)/).nil?
|
103
|
-
end
|
104
|
-
|
105
|
-
def uri?(word)
|
106
|
-
return false if self.class.include_uris
|
107
|
-
!(word =~ URI.regexp).nil?
|
108
|
-
end
|
109
|
-
|
110
|
-
def mention?(word)
|
111
|
-
return false if self.class.include_mentions
|
112
|
-
!(word =~ /@(\w+)/).nil?
|
113
|
-
end
|
114
|
-
|
115
|
-
def hashtags
|
116
|
-
/#/
|
117
|
-
end
|
118
|
-
|
119
|
-
def should_be_skipped?(word)
|
120
|
-
rejects.include?(word) || hashtag?(word) || uri?(word) || mention?(word)
|
121
|
-
end
|
122
|
-
|
123
|
-
def sort_words
|
124
|
-
words.sort { |a, b| b.last <=> a.last }
|
125
|
-
end
|
126
|
-
alias words_forward sort_words
|
127
|
-
|
128
|
-
def timeline
|
129
|
-
@_timeline ||= screen_names.map { |name| fetch_timeline(name) }.flatten
|
130
|
-
end
|
131
|
-
|
132
|
-
# Make two cursored API calls to fetch the 400 most recent tweets
|
133
|
-
def fetch_timeline(screen_name)
|
134
|
-
return [] if screen_name.to_s.empty?
|
135
|
-
@requests += 1
|
136
|
-
timeline = client.user_timeline(screen_name, tweet_mode: 'extended', count: 200)
|
137
|
-
return timeline if timeline.empty?
|
138
|
-
timeline = fetch_older_tweets(timeline, screen_name)
|
139
|
-
puts "Fetched #{screen_name}'s timeline"
|
140
|
-
timeline
|
141
|
-
end
|
142
|
-
|
143
|
-
def fetch_older_tweets(timeline, screen_name)
|
144
|
-
return timeline if age_of_tweet_in_days(timeline.last) > range
|
145
|
-
@requests += 1
|
146
|
-
first_count = timeline.count
|
147
|
-
timeline += client.user_timeline(
|
148
|
-
screen_name,
|
149
|
-
tweet_mode: 'extended',
|
150
|
-
max_id: timeline.last.id - 1,
|
151
|
-
count: 200
|
152
|
-
)
|
153
|
-
second_count = timeline.count
|
154
|
-
return timeline if second_count == first_count
|
155
|
-
fetch_older_tweets(timeline, screen_name)
|
156
|
-
end
|
157
|
-
|
158
|
-
def tweets
|
159
|
-
@_tweets ||= timeline.each_with_object([]) do |tweet, memo|
|
160
|
-
next if tweet.created_at > up_to_time
|
161
|
-
memo << tweet if age_of_tweet_in_days(tweet) <= range
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
def sort_tweets
|
166
|
-
tweets.sort { |a, b| b.created_at <=> a.created_at }
|
167
|
-
end
|
168
|
-
|
169
|
-
def sort_tweets!
|
170
|
-
tweets.sort! { |a, b| b.created_at <=> a.created_at }
|
171
|
-
end
|
172
|
-
|
173
|
-
def age_of_tweet_in_days(tweet)
|
174
|
-
(up_to_time - tweet.created_at) / 86_400
|
175
|
-
end
|
176
|
-
|
177
|
-
def up_to_time
|
178
|
-
self.class.up_to_block.call.to_time
|
179
|
-
end
|
180
|
-
|
181
|
-
def count_words
|
182
|
-
words.clear
|
183
|
-
tweets.each do |tweet|
|
184
|
-
words_array = tweet.attrs[:full_text].downcase.split(' ')
|
185
|
-
words_array.each do |word|
|
186
|
-
next if should_be_skipped?(word)
|
187
|
-
if words.key?(word)
|
188
|
-
words[word] += 1
|
189
|
-
else
|
190
|
-
words[word] = 1
|
191
|
-
end
|
192
|
-
end
|
193
|
-
end
|
194
|
-
end
|
195
|
-
|
196
|
-
def audit
|
197
|
-
count_words unless audited?
|
198
|
-
@audited = true
|
199
|
-
end
|
200
|
-
|
201
|
-
def audit!
|
202
|
-
@audited = false
|
203
|
-
audit
|
204
|
-
end
|
205
|
-
|
206
|
-
def tweets_count
|
207
|
-
@_tweets_count ||= tweets.count
|
208
|
-
end
|
209
|
-
|
210
|
-
def to_csv
|
211
|
-
CSV.generate do |csv|
|
212
|
-
csv << %w[word count]
|
213
|
-
sort_words.each do |word_count|
|
214
|
-
csv << word_count
|
215
|
-
end
|
216
|
-
end
|
217
|
-
end
|
218
|
-
|
219
|
-
def write_to_csv(opts = {})
|
220
|
-
filename = opts.fetch(:filename) { 'twords_report.csv' }
|
221
|
-
write_file(filename, :to_csv, opts)
|
222
|
-
end
|
223
|
-
|
224
|
-
def to_json
|
225
|
-
sort_words.to_h.to_json
|
34
|
+
def self.config
|
35
|
+
@configuration ||= Configuration.new
|
36
|
+
@configuration.tap { |config| yield config if block_given? }
|
226
37
|
end
|
227
38
|
|
228
|
-
def
|
229
|
-
|
230
|
-
write_file(filename, :to_json, opts)
|
39
|
+
def self.reset_config!
|
40
|
+
config.reset!
|
231
41
|
end
|
232
42
|
|
233
|
-
def
|
234
|
-
|
43
|
+
def self.client
|
44
|
+
config.client
|
235
45
|
end
|
236
46
|
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'twitter'
|
4
|
+
require 'twords/twitter_client'
|
5
|
+
|
6
|
+
class Twords
|
7
|
+
# Configuration object
|
8
|
+
class Configuration
|
9
|
+
DEFAULT_REJECTS = %w[
|
10
|
+
my us we an w/ because
|
11
|
+
b/c or are this is from
|
12
|
+
be on the for to and at
|
13
|
+
our of in rt a with &
|
14
|
+
that it by as if was
|
15
|
+
].freeze
|
16
|
+
|
17
|
+
DEFAULT_TWITTER_CONFIG = lambda do |twitter|
|
18
|
+
twitter.consumer_key = ENV['TWITTER_CONSUMER_KEY']
|
19
|
+
twitter.consumer_secret = ENV['TWITTER_CONSUMER_SECRET']
|
20
|
+
twitter.access_token = ENV['TWITTER_ACCESS_TOKEN']
|
21
|
+
twitter.access_token_secret = ENV['TWITTER_ACCESS_TOKEN_SECRET']
|
22
|
+
end
|
23
|
+
|
24
|
+
DEFAULT_OPTIONS = {
|
25
|
+
include_uris: false,
|
26
|
+
include_hashtags: false,
|
27
|
+
include_mentions: false,
|
28
|
+
range: 30,
|
29
|
+
client: TwitterClient.new(&DEFAULT_TWITTER_CONFIG),
|
30
|
+
up_to_block: -> { Time.now },
|
31
|
+
rejects: DEFAULT_REJECTS
|
32
|
+
}.freeze
|
33
|
+
|
34
|
+
attr_reader :rejects, :client, :up_to_block, :include_hashtags, :include_uris,
|
35
|
+
:include_mentions
|
36
|
+
|
37
|
+
attr_accessor :range
|
38
|
+
|
39
|
+
def initialize
|
40
|
+
set_defaults
|
41
|
+
end
|
42
|
+
|
43
|
+
def reset!
|
44
|
+
tap { set_defaults }
|
45
|
+
end
|
46
|
+
|
47
|
+
def twitter_client(&block)
|
48
|
+
@client = TwitterClient.new(&block)
|
49
|
+
end
|
50
|
+
|
51
|
+
def rejects=(*args)
|
52
|
+
@rejects = args.flatten
|
53
|
+
end
|
54
|
+
|
55
|
+
def include_hashtags=(boolean)
|
56
|
+
not_a_boolean_error(boolean)
|
57
|
+
@include_hashtags = boolean
|
58
|
+
end
|
59
|
+
|
60
|
+
def include_uris=(boolean)
|
61
|
+
not_a_boolean_error(boolean)
|
62
|
+
@include_uris = boolean
|
63
|
+
end
|
64
|
+
alias include_urls= include_uris=
|
65
|
+
|
66
|
+
def include_mentions=(boolean)
|
67
|
+
not_a_boolean_error(boolean)
|
68
|
+
@include_mentions = boolean
|
69
|
+
end
|
70
|
+
|
71
|
+
def up_to(&time_block)
|
72
|
+
@up_to_block = time_block
|
73
|
+
end
|
74
|
+
|
75
|
+
def up_to_time
|
76
|
+
up_to_block.call.to_time
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# private method
|
82
|
+
def set_defaults
|
83
|
+
ivars = %i[include_uris include_hashtags include_mentions range client up_to_block rejects]
|
84
|
+
ivars.each { |ivar| instance_variable_set("@#{ivar}", DEFAULT_OPTIONS[ivar]) }
|
85
|
+
end
|
86
|
+
|
87
|
+
# private method
|
88
|
+
def a_boolean?(other)
|
89
|
+
[true, false].include?(other)
|
90
|
+
end
|
91
|
+
|
92
|
+
# private method
|
93
|
+
def not_a_boolean_error(boolean)
|
94
|
+
raise ArgumentError, 'argument must be a booolean value' unless a_boolean?(boolean)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
require 'twords/config_accessible'
|
7
|
+
require 'twords/word_matcher'
|
8
|
+
|
9
|
+
# Instance methods
|
10
|
+
class Twords
|
11
|
+
include ConfigAccessible
|
12
|
+
|
13
|
+
attr_reader :screen_names, :words
|
14
|
+
|
15
|
+
def initialize(*screen_names)
|
16
|
+
@screen_names = screen_names.flatten
|
17
|
+
@words = {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def audited?
|
21
|
+
@audited
|
22
|
+
end
|
23
|
+
|
24
|
+
def audit
|
25
|
+
count_words unless audited?
|
26
|
+
@audited = true
|
27
|
+
end
|
28
|
+
|
29
|
+
def audit!
|
30
|
+
instance_variables.reject { |ivar| %i[@screen_names @words].include?(ivar) }.each do |ivar|
|
31
|
+
instance_variable_set(ivar, nil)
|
32
|
+
end
|
33
|
+
|
34
|
+
audit
|
35
|
+
end
|
36
|
+
|
37
|
+
def sort_words
|
38
|
+
@_sort_words ||= words.sort { |a, b| b.last <=> a.last }
|
39
|
+
end
|
40
|
+
alias words_forward sort_words
|
41
|
+
|
42
|
+
def tweets
|
43
|
+
@_tweets ||= client.filter_tweets(screen_names)
|
44
|
+
end
|
45
|
+
|
46
|
+
def sort_tweets
|
47
|
+
tweets.sort { |a, b| b.created_at <=> a.created_at }
|
48
|
+
end
|
49
|
+
|
50
|
+
def sort_tweets!
|
51
|
+
tweets.sort! { |a, b| b.created_at <=> a.created_at }
|
52
|
+
end
|
53
|
+
|
54
|
+
def tweets_count
|
55
|
+
@_tweets_count ||= tweets.count
|
56
|
+
end
|
57
|
+
|
58
|
+
def total_word_count
|
59
|
+
@_total_word_count ||= words.values.reduce(:+)
|
60
|
+
end
|
61
|
+
|
62
|
+
def percentages
|
63
|
+
@_percentages ||= words.each_with_object({}) do |word_count, hash|
|
64
|
+
hash[word_count.first] = percentage(word_count.last)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def sort_percentages
|
69
|
+
@_sort_percentages ||= percentages.sort { |a, b| b.last <=> a.last }
|
70
|
+
end
|
71
|
+
|
72
|
+
def to_csv
|
73
|
+
CSV.generate do |csv|
|
74
|
+
csv << %w[word count]
|
75
|
+
sort_words.each do |word_count|
|
76
|
+
csv << word_count
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def write_to_csv(opts = {})
|
82
|
+
filename = opts.fetch(:filename) { 'twords_report.csv' }
|
83
|
+
write_file(filename, :to_csv, opts)
|
84
|
+
end
|
85
|
+
|
86
|
+
def to_json
|
87
|
+
sort_words.to_h.to_json
|
88
|
+
end
|
89
|
+
|
90
|
+
def write_to_json(opts = {})
|
91
|
+
filename = opts.fetch(:filename) { 'twords_report.json' }
|
92
|
+
write_file(filename, :to_json, opts)
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
# private method
|
98
|
+
def client
|
99
|
+
config.client
|
100
|
+
end
|
101
|
+
|
102
|
+
# private method
|
103
|
+
def count_words
|
104
|
+
words.clear
|
105
|
+
tweets.each do |tweet|
|
106
|
+
words_array(tweet).each do |word|
|
107
|
+
next if WordMatcher.should_be_skipped?(word)
|
108
|
+
words.key?(word) ? words[word] += 1 : words[word] = 1
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# private method
|
114
|
+
def words_array(tweet)
|
115
|
+
tweet.attrs[:full_text].downcase.split(' ')
|
116
|
+
end
|
117
|
+
|
118
|
+
# private method
|
119
|
+
def percentage(count)
|
120
|
+
(count / total_word_count.to_f * 100)
|
121
|
+
end
|
122
|
+
|
123
|
+
# private method
|
124
|
+
def write_file(filename, method, opts = {})
|
125
|
+
File.open(filename, 'w', opts) { |file| file.write send(method) }
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal
|
2
|
+
|
3
|
+
require 'twords/config_accessible'
|
4
|
+
|
5
|
+
class Twords
|
6
|
+
# Twitter REST API client
|
7
|
+
class TwitterClient
|
8
|
+
include ConfigAccessible
|
9
|
+
|
10
|
+
attr_reader :client
|
11
|
+
|
12
|
+
def initialize(&block)
|
13
|
+
@client = Twitter::REST::Client.new(&block)
|
14
|
+
end
|
15
|
+
|
16
|
+
def filter_tweets(screen_names)
|
17
|
+
full_timeline(screen_names).each_with_object([]) do |tweet, memo|
|
18
|
+
next if tweet.created_at > up_to_time
|
19
|
+
memo << tweet if age_of_tweet_in_days(tweet) <= range
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
# private method
|
26
|
+
def full_timeline(screen_names)
|
27
|
+
screen_names.map { |screen_name| fetch_user_timeline(screen_name) }.flatten.uniq
|
28
|
+
end
|
29
|
+
|
30
|
+
# private method
|
31
|
+
def fetch_user_timeline(screen_name)
|
32
|
+
return [] if screen_name.to_s.empty?
|
33
|
+
user_timeline = client.user_timeline(screen_name, tweet_mode: 'extended', count: 200)
|
34
|
+
return user_timeline if user_timeline.empty?
|
35
|
+
user_timeline = fetch_older_tweets(user_timeline, screen_name)
|
36
|
+
puts "Fetched #{screen_name}'s timeline"
|
37
|
+
user_timeline
|
38
|
+
rescue Twitter::Error::TooManyRequests
|
39
|
+
puts 'Rate limit exceeded, waiting 5 minutes' && sleep(300)
|
40
|
+
fetch_user_timeline(screen_name)
|
41
|
+
end
|
42
|
+
|
43
|
+
# private method
|
44
|
+
def age_of_tweet_in_days(tweet)
|
45
|
+
(up_to_time - tweet.created_at) / 86_400
|
46
|
+
end
|
47
|
+
|
48
|
+
# private method
|
49
|
+
def up_to_time
|
50
|
+
config.up_to_time
|
51
|
+
end
|
52
|
+
|
53
|
+
# private method
|
54
|
+
def range
|
55
|
+
config.range
|
56
|
+
end
|
57
|
+
|
58
|
+
# private method
|
59
|
+
def fetch_older_tweets(user_timeline, screen_name)
|
60
|
+
return user_timeline if age_of_tweet_in_days(user_timeline.last) > range
|
61
|
+
first_count = user_timeline.count
|
62
|
+
user_timeline += client.user_timeline(
|
63
|
+
screen_name,
|
64
|
+
tweet_mode: 'extended',
|
65
|
+
max_id: user_timeline.last.id - 1,
|
66
|
+
count: 200
|
67
|
+
)
|
68
|
+
return user_timeline if user_timeline.count == first_count
|
69
|
+
fetch_older_tweets(user_timeline, screen_name)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/twords/version.rb
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'twords/config_accessible'
|
4
|
+
|
5
|
+
class Twords
|
6
|
+
# Checks if words should be counted or not
|
7
|
+
class WordMatcher
|
8
|
+
class << self
|
9
|
+
include ConfigAccessible
|
10
|
+
|
11
|
+
def should_be_skipped?(word)
|
12
|
+
reject?(word) || hashtag?(word) || uri?(word) || mention?(word)
|
13
|
+
end
|
14
|
+
|
15
|
+
def reject?(word)
|
16
|
+
config.rejects.include?(word)
|
17
|
+
end
|
18
|
+
|
19
|
+
def hashtag?(word)
|
20
|
+
return if config.include_hashtags
|
21
|
+
!(word =~ /#(\w+)/).nil?
|
22
|
+
end
|
23
|
+
|
24
|
+
def uri?(word)
|
25
|
+
return if config.include_uris
|
26
|
+
!(word =~ URI.regexp).nil?
|
27
|
+
end
|
28
|
+
|
29
|
+
def mention?(word)
|
30
|
+
return if config.include_mentions
|
31
|
+
!(word =~ /@(\w+)/).nil?
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twords
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. Simon Borg
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: twitter
|
@@ -62,7 +62,12 @@ files:
|
|
62
62
|
- LICENSE.txt
|
63
63
|
- README.md
|
64
64
|
- lib/twords.rb
|
65
|
+
- lib/twords/config_accessible.rb
|
66
|
+
- lib/twords/configuration.rb
|
67
|
+
- lib/twords/instance_methods.rb
|
68
|
+
- lib/twords/twitter_client.rb
|
65
69
|
- lib/twords/version.rb
|
70
|
+
- lib/twords/word_matcher.rb
|
66
71
|
- twords.gemspec
|
67
72
|
homepage: https://github.com/msimonborg/twords
|
68
73
|
licenses:
|