twords 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/twords.rb +71 -17
- data/lib/twords/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0d9a3acb581e8d88cccecaa1f81755b24ccae3d8
|
4
|
+
data.tar.gz: e537d3ea87fec86ae57d06f3902b9b27a2ddfa89
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4adc74dfeb9c6aae7897aca0a8daf5eb519d3ac53cb826fee954b3ead3a38c462a5fc010c88ca28199012456759739ca0f3c1c544137e0558ca349b19fad53d
|
7
|
+
data.tar.gz: 2838be4f9d6f6a83fb8aab1cdee4c201d1665a545c595cd64e3fddde6a7c2e53bc263a44544ada42b531caaa12f94f8b5c1772d4f8d72cd13f8f89c3bdffad07
|
data/README.md
CHANGED
@@ -24,8 +24,8 @@ Or install it yourself as:
|
|
24
24
|
|
25
25
|
```ruby
|
26
26
|
Twords.config do |config|
|
27
|
-
config.rejects
|
28
|
-
config.range
|
27
|
+
config.rejects = %w[the for and a i of if]
|
28
|
+
config.range = 14
|
29
29
|
config.up_to { Time.now } # A time object to be lazy evaluated. The range is counted backward from here.
|
30
30
|
|
31
31
|
config.twitter_client do |twitter|
|
data/lib/twords.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'date'
|
4
3
|
require 'twitter'
|
4
|
+
require 'uri'
|
5
5
|
|
6
6
|
require 'twords/version'
|
7
7
|
|
@@ -27,10 +27,11 @@ require 'twords/version'
|
|
27
27
|
# # => { "butts"=>35, "poo"=>32, "pups"=>28, ... }
|
28
28
|
class Twords
|
29
29
|
class << self
|
30
|
-
attr_reader :rejects, :range, :client, :up_to_block
|
30
|
+
attr_reader :rejects, :range, :client, :up_to_block, :include_hashtags, :include_uris,
|
31
|
+
:include_mentions
|
31
32
|
|
32
|
-
def config
|
33
|
-
|
33
|
+
def config
|
34
|
+
yield self
|
34
35
|
end
|
35
36
|
|
36
37
|
def twitter_client(&block)
|
@@ -41,12 +42,35 @@ class Twords
|
|
41
42
|
@rejects = args.flatten
|
42
43
|
end
|
43
44
|
|
45
|
+
def include_hashtags=(boolean)
|
46
|
+
not_a_boolean_error(boolean)
|
47
|
+
@include_hashtags = boolean
|
48
|
+
end
|
49
|
+
|
50
|
+
def include_uris=(boolean)
|
51
|
+
not_a_boolean_error(boolean)
|
52
|
+
@include_uris = boolean
|
53
|
+
end
|
54
|
+
alias include_urls include_uris
|
55
|
+
|
56
|
+
def include_mentions=(boolean)
|
57
|
+
not_a_boolean_error(boolean)
|
58
|
+
@include_mentions = boolean
|
59
|
+
end
|
60
|
+
|
61
|
+
def not_a_boolean_error(boolean)
|
62
|
+
raise ArgumentError, 'argument must be a booolean value' unless is_a_boolean?(boolean)
|
63
|
+
end
|
64
|
+
|
65
|
+
def is_a_boolean?(other)
|
66
|
+
[true, false].include?(other)
|
67
|
+
end
|
68
|
+
|
44
69
|
def range=(integer)
|
45
70
|
@range = integer
|
46
71
|
end
|
47
72
|
|
48
73
|
def up_to(&time_block)
|
49
|
-
raise ArgumentError, 'object must respond to #call' unless time_block.respond_to?(:call)
|
50
74
|
@up_to_block = time_block
|
51
75
|
end
|
52
76
|
end
|
@@ -54,7 +78,7 @@ class Twords
|
|
54
78
|
attr_reader :screen_names, :words, :requests, :client
|
55
79
|
|
56
80
|
def initialize(*screen_names)
|
57
|
-
@screen_names = screen_names
|
81
|
+
@screen_names = screen_names.flatten
|
58
82
|
@words = {}
|
59
83
|
@requests = 0
|
60
84
|
end
|
@@ -67,10 +91,37 @@ class Twords
|
|
67
91
|
@_range ||= self.class.range
|
68
92
|
end
|
69
93
|
|
94
|
+
def rejects
|
95
|
+
@_rejects ||= self.class.rejects
|
96
|
+
end
|
97
|
+
|
70
98
|
def audited?
|
71
99
|
@audited
|
72
100
|
end
|
73
101
|
|
102
|
+
def hashtag?(word)
|
103
|
+
return false if self.class.include_hashtags
|
104
|
+
word.match?(/#/)
|
105
|
+
end
|
106
|
+
|
107
|
+
def uri?(word)
|
108
|
+
return false if self.class.include_uris
|
109
|
+
word.match?(URI.regexp)
|
110
|
+
end
|
111
|
+
|
112
|
+
def mention?(word)
|
113
|
+
return false if self.class.include_mentions
|
114
|
+
word.match?(/@/)
|
115
|
+
end
|
116
|
+
|
117
|
+
def hashtags
|
118
|
+
/#/
|
119
|
+
end
|
120
|
+
|
121
|
+
def should_be_skipped?(word)
|
122
|
+
rejects.include?(word) || hashtag?(word) || uri?(word) || mention?(word)
|
123
|
+
end
|
124
|
+
|
74
125
|
def sort_words
|
75
126
|
words.sort { |a, b| b.last <=> a.last }
|
76
127
|
end
|
@@ -84,9 +135,11 @@ class Twords
|
|
84
135
|
def fetch_timeline(screen_name)
|
85
136
|
return [] if screen_name.to_s.empty?
|
86
137
|
@requests += 1
|
87
|
-
timeline = client.user_timeline(screen_name, count: 200)
|
138
|
+
timeline = client.user_timeline(screen_name, tweet_mode: 'extended', count: 200)
|
88
139
|
return timeline if timeline.empty?
|
89
|
-
fetch_older_tweets(timeline, screen_name)
|
140
|
+
timeline = fetch_older_tweets(timeline, screen_name)
|
141
|
+
puts "Fetched #{screen_name}'s timeline"
|
142
|
+
timeline
|
90
143
|
end
|
91
144
|
|
92
145
|
def fetch_older_tweets(timeline, screen_name)
|
@@ -94,6 +147,7 @@ class Twords
|
|
94
147
|
@requests += 1
|
95
148
|
timeline += client.user_timeline(
|
96
149
|
screen_name,
|
150
|
+
tweet_mode: 'extended',
|
97
151
|
max_id: timeline.last.id - 1,
|
98
152
|
count: 200
|
99
153
|
)
|
@@ -107,15 +161,15 @@ class Twords
|
|
107
161
|
end
|
108
162
|
|
109
163
|
def age_of_tweet_in_days(tweet)
|
110
|
-
(self.class.up_to_block.call - tweet.created_at) /
|
164
|
+
(self.class.up_to_block.call.to_time - tweet.created_at) / 86400
|
111
165
|
end
|
112
166
|
|
113
167
|
def count_words
|
168
|
+
words.clear
|
114
169
|
recent_tweets.each do |tweet|
|
115
|
-
|
116
|
-
words_array = tweet_with_full_text.attrs[:full_text].downcase.split(' ')
|
170
|
+
words_array = tweet.attrs[:full_text].downcase.split(' ')
|
117
171
|
words_array.each do |word|
|
118
|
-
next if
|
172
|
+
next if should_be_skipped?(word)
|
119
173
|
if words.has_key?(word)
|
120
174
|
words[word] += 1
|
121
175
|
else
|
@@ -125,16 +179,16 @@ class Twords
|
|
125
179
|
end
|
126
180
|
end
|
127
181
|
|
128
|
-
def fetch_tweet_with_full_text(tweet)
|
129
|
-
@requests += 1
|
130
|
-
client.status(tweet.id, tweet_mode: 'extended')
|
131
|
-
end
|
132
|
-
|
133
182
|
def audit
|
134
183
|
count_words unless audited?
|
135
184
|
@audited = true
|
136
185
|
end
|
137
186
|
|
187
|
+
def audit!
|
188
|
+
@audited = false
|
189
|
+
audit
|
190
|
+
end
|
191
|
+
|
138
192
|
def recent_tweets_count
|
139
193
|
@_recent_tweets_count ||= recent_tweets.count
|
140
194
|
end
|
data/lib/twords/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twords
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. Simon Borg
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-06-
|
11
|
+
date: 2017-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: twitter
|