twords 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: deb67eadb91095b9a3ef2a0cefe7c9452038c0f1
4
- data.tar.gz: 7cb612c4aa544038bfb9eb2c88c5367269b1c4c6
3
+ metadata.gz: bd0f26ec512e5184542436f0313c827888abed77
4
+ data.tar.gz: fc38f75b82aefeac2b695398f22659ebe251743c
5
5
  SHA512:
6
- metadata.gz: 615f31dd12aea64aa8ff1304e947f36706a2e23145ca35153227aac72999359d87bfa2ef97c5f6beaae258be2f97f403c1cba7c78502e7c28991318da081768c
7
- data.tar.gz: 49357b0e2060a1726956fb5c1f996ff2281eb2306a4f2ebe1fa2176851d95af5181c5c4c2f5610cd36db782193f6c60776a374d7a7ac70d8f73b79b3a3802394
6
+ metadata.gz: 79960eed5ede9ea409a0b60cda01e383fba21d960a6c9112a67e6495d840b78cc26bc5593ee1fdce56ee9d34544b73484ab3de27e78f7f9ad9106b199e9a9f5e
7
+ data.tar.gz: 2f1f574ffb732f92a0fa2d7e5b1c1dc4665b2e4bc799e7ff31b16b8598fdee5549b36f2eb0c392a5cd49f84198a5dc911ad559819163559c5b000c2cb59d36f0
data/README.md CHANGED
@@ -4,6 +4,8 @@
4
4
 
5
5
  Count the occurrences of words in a tweeter's tweets.
6
6
 
7
+ Configurable - set the words to ignore, the range of dates to look at, and whether to include hashtags, @-mentions, and URLs. Customize your Twitter configuration, too. Sensible defaults are provided for all options. Look at the data in different ways. Easily convert and/or export to CSV and JSON. Change configuration options on the fly and re-audit with ease.
8
+
7
9
  ## Installation
8
10
 
9
11
  Add this line to your application's Gemfile:
@@ -77,6 +79,22 @@ twords.words
77
79
  # => { "#TACOSTACOSTACOS"=>14321, "pizza"=>32, "burger"=>28, "pups"=>36, ... }
78
80
  ```
79
81
 
82
+ Other useful methods:
83
+
84
+ ```ruby
85
+ twords = Twords.new 'user'
86
+ twords.audit
87
+ twords.tweets # An array of the Twitter::Tweet objects included in the count
88
+ twords.total_word_count # The total combined occurrences of the included words
89
+ twords.percentages # Replace word count with the word's percentage of total words
90
+ twords.sort_percentages # Sort the above results in descending order
91
+ twords.to_[csv|json] # Generate CSV || JSON for results
92
+ twords.write_to_[csv|json](opts) # Write CSV || JSON to file.
93
+ # Options - :filename writes the file to the
94
+ # specified relative path (default = 'twords_report.[csv|json]').
95
+ # Other options are passed along to File#open
96
+
97
+ ```
80
98
  ## Development
81
99
 
82
100
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -4,42 +4,61 @@ require 'twords/configuration'
4
4
  require 'twords/instance_methods'
5
5
  require 'twords/version'
6
6
 
7
- # Twords.config do |config|
8
- # config.rejects = %w[my us we an w/ because b/c or are this is from
9
- # be on the for to and at our of in rt a with &
10
- # that it by as if was]
11
-
12
- # config.range = 30
13
- # config.up_to { Time.now }
14
- # config.include_hashtags = false
15
- # config.include_uris = false
16
- # config.include_mentions = false
7
+ # Count the occurrences of words in a tweeter's tweets
8
+ #
9
+ # Twords.config do |config|
10
+ # config.rejects = %w[my us we an w/ because b/c or are this is from
11
+ # be on the for to and at our of in rt a with &
12
+ # that it by as if was]
13
+ #
14
+ # config.range = 30
15
+ # config.up_to { Time.now }
16
+ # config.include_hashtags = false
17
+ # config.include_uris = false
18
+ # config.include_mentions = false
17
19
  #
18
- # config.twitter_client do |twitter|
19
- # twitter.consumer_key = YOUR_TWITTER_CONSUMER_KEY
20
- # twitter.consumer_secret = YOUR_TWITTER_CONSUMER_SECRET
21
- # twitter.access_token = YOUR_TWITTER_ACCESS_TOKEN
22
- # twitter.access_token_secret = YOUR_TWITTER_ACCESS_TOKEN_SECRET
20
+ # config.twitter_client do |twitter|
21
+ # twitter.consumer_key = YOUR_TWITTER_CONSUMER_KEY
22
+ # twitter.consumer_secret = YOUR_TWITTER_CONSUMER_SECRET
23
+ # twitter.access_token = YOUR_TWITTER_ACCESS_TOKEN
24
+ # twitter.access_token_secret = YOUR_TWITTER_ACCESS_TOKEN_SECRET
25
+ # end
23
26
  # end
24
- # end
25
27
  #
26
- # twords = Twords.new 'user_one', 'user_two'
28
+ # twords = Twords.new 'user_one', 'user_two'
27
29
  #
28
- # twords.audit
29
- # # => true
30
+ # twords.audit
31
+ # # => true
30
32
  #
31
- # twords.words
32
- # # => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
33
+ # twords.words
34
+ # # => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
33
35
  class Twords
36
+ # Set configuration options. The same configuration is shared accross all objects in the
37
+ # Twords namespace. Configuration can be changed on the fly and will affect all instantiated
38
+ # objects.
39
+ #
40
+ # @api public
41
+ # for block { |config| ... }
42
+ # @yield [Twords::Configuration] call methods on an instance of Twords::Configuration to override
43
+ # the default configuration settings.
44
+ # @return [Twords::Configuration]
34
45
  def self.config
35
46
  @configuration ||= Configuration.new
36
47
  @configuration.tap { |config| yield config if block_given? }
37
48
  end
38
49
 
50
+ # Resets all configuration options to default settings
51
+ #
52
+ # @api public
53
+ # @return [Twords::Configuration]
39
54
  def self.reset_config!
40
55
  config.reset!
41
56
  end
42
57
 
58
+ # Access the Twitter client
59
+ #
60
+ # @api public
61
+ # @return [Twords::TwitterClient]
43
62
  def self.client
44
63
  config.client
45
64
  end
@@ -5,6 +5,9 @@ class Twords
5
5
  module ConfigAccessible
6
6
  module_function
7
7
 
8
+ # Provides a private method to access the shared config when included in a Module or Class
9
+ #
10
+ # @return [Twords::Configuration]
8
11
  def config
9
12
  Twords.config
10
13
  end
@@ -78,18 +78,18 @@ class Twords
78
78
 
79
79
  private
80
80
 
81
- # private method
81
+ # @api private
82
82
  def set_defaults
83
83
  ivars = %i[include_uris include_hashtags include_mentions range client up_to_block rejects]
84
84
  ivars.each { |ivar| instance_variable_set("@#{ivar}", DEFAULT_OPTIONS[ivar]) }
85
85
  end
86
86
 
87
- # private method
87
+ # @api private
88
88
  def a_boolean?(other)
89
89
  [true, false].include?(other)
90
90
  end
91
91
 
92
- # private method
92
+ # @api private
93
93
  def not_a_boolean_error(boolean)
94
94
  raise ArgumentError, 'argument must be a booolean value' unless a_boolean?(boolean)
95
95
  end
@@ -10,65 +10,133 @@ require 'twords/word_matcher'
10
10
  class Twords
11
11
  include ConfigAccessible
12
12
 
13
- attr_reader :screen_names, :words
14
-
13
+ # The screen names included in the analysis
14
+ #
15
+ # @api public
16
+ # @return [Array<String>] if names are provided to #initialize
17
+ # @return [Array] if no names are provided to #initialize
18
+ attr_reader :screen_names
19
+
20
+ # The words and their number of occurrences
21
+ #
22
+ # @api public
23
+ # @return [Hash] returns the word(String) and counts(Integer) as key-value pairs
24
+ attr_reader :words
25
+
26
+ # Initializes a new Twords object
27
+ #
28
+ # @api public
29
+ # @param screen_names [Array<String>] any number of screen names to include in the analysis
30
+ # @return [Twords]
15
31
  def initialize(*screen_names)
16
32
  @screen_names = screen_names.flatten
17
33
  @words = {}
34
+ @audited = false
18
35
  end
19
36
 
37
+ # Have the #screen_names already been audited?
38
+ #
39
+ # @api public
40
+ # @return [true] if already audited
41
+ # @return [false] if not audited yet
20
42
  def audited?
21
43
  @audited
22
44
  end
23
45
 
46
+ # Fetch tweets and count words. Short circuits and returns true if already audited.
47
+ #
48
+ # @api public
49
+ # @return [true]
24
50
  def audit
25
51
  count_words unless audited?
26
52
  @audited = true
27
53
  end
28
54
 
55
+ # Clear all results and audit from scratch
56
+ #
57
+ # @api public
58
+ # @return [true] always returns true unless an error is raised
29
59
  def audit!
30
60
  instance_variables.reject { |ivar| %i[@screen_names @words].include?(ivar) }.each do |ivar|
31
61
  instance_variable_set(ivar, nil)
32
62
  end
33
63
 
64
+ @audited = false
65
+
34
66
  audit
35
67
  end
36
68
 
69
+ # Sort words by frequency in descending order
70
+ #
71
+ # @api public
72
+ # @return [Array<Array<String, Integer>>]
37
73
  def sort_words
38
74
  @_sort_words ||= words.sort { |a, b| b.last <=> a.last }
39
75
  end
40
76
  alias words_forward sort_words
41
77
 
78
+ # Returns all of the tweets that fall within the configured time range
79
+ #
80
+ # @api public
81
+ # @return [Array<Twitter::Tweet>]
42
82
  def tweets
43
83
  @_tweets ||= client.filter_tweets(screen_names)
44
84
  end
45
85
 
86
+ # Returns an array of #tweets sorted by time created in descending order
87
+ #
88
+ # @api public
89
+ # @return [Array<Twitter::Tweet>]
46
90
  def sort_tweets
47
91
  tweets.sort { |a, b| b.created_at <=> a.created_at }
48
92
  end
49
93
 
94
+ # #sort_tweets destructively
95
+ #
96
+ # @api public
97
+ # @return [Array<Twitter::Tweet>]
50
98
  def sort_tweets!
51
99
  tweets.sort! { |a, b| b.created_at <=> a.created_at }
52
100
  end
53
101
 
102
+ # Number of tweets being analyzed
103
+ #
104
+ # @api public
105
+ # @return [Integer]
54
106
  def tweets_count
55
107
  @_tweets_count ||= tweets.count
56
108
  end
57
109
 
110
+ # Total occurrences of all words included in analysis, i.e. sum of the count of all words.
111
+ #
112
+ # @api public
113
+ # @return [Integer]
58
114
  def total_word_count
59
115
  @_total_word_count ||= words.values.reduce(:+)
60
116
  end
61
117
 
118
+ # The frequency of each word as a share of the #total_word_count
119
+ #
120
+ # @api public
121
+ # @return [Hash] returns the word(String) and percentage(Float) as key-value pairs
62
122
  def percentages
63
123
  @_percentages ||= words.each_with_object({}) do |word_count, hash|
64
124
  hash[word_count.first] = percentage(word_count.last)
65
125
  end
66
126
  end
67
127
 
128
+ # Sorts #percentages in descending order
129
+ #
130
+ # @api public
131
+ # @return [Array<Array<String, Float>>]
68
132
  def sort_percentages
69
133
  @_sort_percentages ||= percentages.sort { |a, b| b.last <=> a.last }
70
134
  end
71
135
 
136
+ # Generate a CSV formatted String of the sorted results, with column headers "word, count"
137
+ #
138
+ # @api public
139
+ # @return [String] in CSV format
72
140
  def to_csv
73
141
  CSV.generate do |csv|
74
142
  csv << %w[word count]
@@ -78,15 +146,32 @@ class Twords
78
146
  end
79
147
  end
80
148
 
149
+ # Write the output of #to_csv to a file.
150
+ #
151
+ # @api public
152
+ # @return [Integer] representing the byte count of the file
153
+ # @param opts [Hash] customizable file writing options. All but :filename are passed to File#open
154
+ # @option opts [String] :filename A relative pathname to define the destination of the new file
81
155
  def write_to_csv(opts = {})
82
156
  filename = opts.fetch(:filename) { 'twords_report.csv' }
83
157
  write_file(filename, :to_csv, opts)
84
158
  end
85
159
 
160
+ # Generate a JSON formatted String of the sorted results, as one hash object with word-count
161
+ # key-value pairs.
162
+ #
163
+ # @api public
164
+ # @return [String] in JSON format
86
165
  def to_json
87
166
  sort_words.to_h.to_json
88
167
  end
89
168
 
169
+ # Write the output of #to_json to a file.
170
+ #
171
+ # @api public
172
+ # @return [Integer] representing the byte count of the file
173
+ # @param opts [Hash] customizable file writing options. All but :filename are passed to File#open
174
+ # @option opts [String] :filename A relative pathname to define the destination of the new file
90
175
  def write_to_json(opts = {})
91
176
  filename = opts.fetch(:filename) { 'twords_report.json' }
92
177
  write_file(filename, :to_json, opts)
@@ -94,12 +179,12 @@ class Twords
94
179
 
95
180
  private
96
181
 
97
- # private method
182
+ # @api private
98
183
  def client
99
184
  config.client
100
185
  end
101
186
 
102
- # private method
187
+ # @api private
103
188
  def count_words
104
189
  words.clear
105
190
  tweets.each do |tweet|
@@ -110,17 +195,17 @@ class Twords
110
195
  end
111
196
  end
112
197
 
113
- # private method
198
+ # @api private
114
199
  def words_array(tweet)
115
200
  tweet.attrs[:full_text].downcase.split(' ')
116
201
  end
117
202
 
118
- # private method
203
+ # @api private
119
204
  def percentage(count)
120
205
  (count / total_word_count.to_f * 100)
121
206
  end
122
207
 
123
- # private method
208
+ # @api private
124
209
  def write_file(filename, method, opts = {})
125
210
  File.open(filename, 'w', opts) { |file| file.write send(method) }
126
211
  end
@@ -7,12 +7,34 @@ class Twords
7
7
  class TwitterClient
8
8
  include ConfigAccessible
9
9
 
10
+ # A Twitter::REST::Client that provides an interface to the Twitter API
11
+ #
12
+ # @api public
13
+ # @returns [Twitter::REST::Client]
10
14
  attr_reader :client
11
15
 
16
+ # Initializes a new Twords::TwitterClient object and assigns to the @client instance variable
17
+ #
18
+ # Twords::TwitterClient.new do |twitter|
19
+ # twitter.consumer_key = "YOUR_CONSUMER_KEY"
20
+ # twitter.consumer_secret = "YOUR_CONSUMER_SECRET"
21
+ # twitter.access_token = "YOUR_ACCESS_TOKEN"
22
+ # twitter.access_token_secret = "YOUR_ACCESS_SECRET"
23
+ # end
24
+ #
25
+ # @api public
26
+ # for block { |twitter| ... }
27
+ # @yield [Twitter::REST::Client] yields the Twitter::REST::Client for configuration
28
+ # @see https://github.com/sferik/twitter#configuration
12
29
  def initialize(&block)
13
30
  @client = Twitter::REST::Client.new(&block)
14
31
  end
15
32
 
33
+ # Fetches the timelines for an array of screen names and filters them
34
+ # by the configured time range.
35
+ #
36
+ # @api public
37
+ # @param screen_names [Array<String>] the twitter screen names from which to pull the tweets
16
38
  def filter_tweets(screen_names)
17
39
  full_timeline(screen_names).each_with_object([]) do |tweet, memo|
18
40
  next if tweet.created_at > up_to_time
@@ -22,12 +44,12 @@ class Twords
22
44
 
23
45
  private
24
46
 
25
- # private method
47
+ # @api private
26
48
  def full_timeline(screen_names)
27
49
  screen_names.map { |screen_name| fetch_user_timeline(screen_name) }.flatten.uniq
28
50
  end
29
51
 
30
- # private method
52
+ # @api private
31
53
  def fetch_user_timeline(screen_name)
32
54
  return [] if screen_name.to_s.empty?
33
55
  user_timeline = client.user_timeline(screen_name, tweet_mode: 'extended', count: 200)
@@ -40,22 +62,22 @@ class Twords
40
62
  fetch_user_timeline(screen_name)
41
63
  end
42
64
 
43
- # private method
65
+ # @api private
44
66
  def age_of_tweet_in_days(tweet)
45
67
  (up_to_time - tweet.created_at) / 86_400
46
68
  end
47
69
 
48
- # private method
70
+ # @api private
49
71
  def up_to_time
50
72
  config.up_to_time
51
73
  end
52
74
 
53
- # private method
75
+ # @api private
54
76
  def range
55
77
  config.range
56
78
  end
57
79
 
58
- # private method
80
+ # @api private
59
81
  def fetch_older_tweets(user_timeline, screen_name)
60
82
  return user_timeline if age_of_tweet_in_days(user_timeline.last) > range
61
83
  first_count = user_timeline.count
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Twords
4
- VERSION = '0.2.0'.freeze
4
+ VERSION = '0.2.1'.freeze
5
5
  end
@@ -8,26 +8,51 @@ class Twords
8
8
  class << self
9
9
  include ConfigAccessible
10
10
 
11
+ # Check if a word should not be counted.
12
+ #
13
+ # @api public
14
+ # @return [true] if word should be skipped
15
+ # @return [false] if word should not be skipped
11
16
  def should_be_skipped?(word)
12
17
  reject?(word) || hashtag?(word) || uri?(word) || mention?(word)
13
18
  end
14
19
 
20
+ # Check if a word is one of the configured rejects to ignore
21
+ #
22
+ # @api public
23
+ # @return [true] if word is a reject
24
+ # @return [false] if word is not a reject
15
25
  def reject?(word)
16
26
  config.rejects.include?(word)
17
27
  end
18
28
 
29
+ # Check if a word is a hashtag.
30
+ #
31
+ # @api public
32
+ # @return [true] if hashtags should not be included and word is a hashtag
33
+ # @return [false] if all hashtags should be included or word is not a hashtag
19
34
  def hashtag?(word)
20
- return if config.include_hashtags
35
+ return false if config.include_hashtags
21
36
  !(word =~ /#(\w+)/).nil?
22
37
  end
23
38
 
39
+ # Check if a word is a URI. Uses URI#regexp to match URIs
40
+ #
41
+ # @api public
42
+ # @return [true] if URIs should not be included and word is a URI
43
+ # @return [false] if all URIs should be included or word is not a URI
24
44
  def uri?(word)
25
- return if config.include_uris
45
+ return false if config.include_uris
26
46
  !(word =~ URI.regexp).nil?
27
47
  end
28
48
 
49
+ # Check if a word is a @-mention.
50
+ #
51
+ # @api public
52
+ # @return [true] if @-mentions should not be included and word is a @-mention
53
+ # @return [false] if all @-mentions should be included or word is not a @-mention
29
54
  def mention?(word)
30
- return if config.include_mentions
55
+ return false if config.include_mentions
31
56
  !(word =~ /@(\w+)/).nil?
32
57
  end
33
58
  end
@@ -12,7 +12,13 @@ Gem::Specification.new do |spec|
12
12
  spec.email = ['msimonborg@gmail.com']
13
13
 
14
14
  spec.summary = 'Twitter word clouds'
15
- spec.description = 'Twitter word clouds'
15
+ spec.description = 'Twitter word clouds. Analyse the frequency of word occurrences for a '\
16
+ 'user or list of users. Configurable - set the words to ignore, the range of dates to look '\
17
+ 'at, and whether to include hashtags, @-mentions, and URLs. Customize your Twitter '\
18
+ 'configuration, too. Sensible defaults are provided for all options. Look at the data in '\
19
+ 'different ways. Easily convert and/or export to CSV and JSON. Change configuration options '\
20
+ 'on the fly and re-audit with ease.'
21
+
16
22
  spec.homepage = 'https://github.com/msimonborg/twords'
17
23
  spec.license = 'MIT'
18
24
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twords
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. Simon Borg
@@ -52,7 +52,12 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '10.0'
55
- description: Twitter word clouds
55
+ description: Twitter word clouds. Analyse the frequency of word occurrences for a
56
+ user or list of users. Configurable - set the words to ignore, the range of dates
57
+ to look at, and whether to include hashtags, @-mentions, and URLs. Customize your
58
+ Twitter configuration, too. Sensible defaults are provided for all options. Look
59
+ at the data in different ways. Easily convert and/or export to CSV and JSON. Change
60
+ configuration options on the fly and re-audit with ease.
56
61
  email:
57
62
  - msimonborg@gmail.com
58
63
  executables: []