twords 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: deb67eadb91095b9a3ef2a0cefe7c9452038c0f1
4
- data.tar.gz: 7cb612c4aa544038bfb9eb2c88c5367269b1c4c6
3
+ metadata.gz: bd0f26ec512e5184542436f0313c827888abed77
4
+ data.tar.gz: fc38f75b82aefeac2b695398f22659ebe251743c
5
5
  SHA512:
6
- metadata.gz: 615f31dd12aea64aa8ff1304e947f36706a2e23145ca35153227aac72999359d87bfa2ef97c5f6beaae258be2f97f403c1cba7c78502e7c28991318da081768c
7
- data.tar.gz: 49357b0e2060a1726956fb5c1f996ff2281eb2306a4f2ebe1fa2176851d95af5181c5c4c2f5610cd36db782193f6c60776a374d7a7ac70d8f73b79b3a3802394
6
+ metadata.gz: 79960eed5ede9ea409a0b60cda01e383fba21d960a6c9112a67e6495d840b78cc26bc5593ee1fdce56ee9d34544b73484ab3de27e78f7f9ad9106b199e9a9f5e
7
+ data.tar.gz: 2f1f574ffb732f92a0fa2d7e5b1c1dc4665b2e4bc799e7ff31b16b8598fdee5549b36f2eb0c392a5cd49f84198a5dc911ad559819163559c5b000c2cb59d36f0
data/README.md CHANGED
@@ -4,6 +4,8 @@
4
4
 
5
5
  Count the occurrences of words in a tweeter's tweets.
6
6
 
7
+ Configurable - set the words to ignore, the range of dates to look at, and whether to include hashtags, @-mentions, and URLs. Customize your Twitter configuration, too. Sensible defaults are provided for all options. Look at the data in different ways. Easily convert and/or export to CSV and JSON. Change configuration options on the fly and re-audit with ease.
8
+
7
9
  ## Installation
8
10
 
9
11
  Add this line to your application's Gemfile:
@@ -77,6 +79,22 @@ twords.words
77
79
  # => { "#TACOSTACOSTACOS"=>14321, "pizza"=>32, "burger"=>28, "pups"=>36, ... }
78
80
  ```
79
81
 
82
+ Other useful methods:
83
+
84
+ ```ruby
85
+ twords = Twords.new 'user'
86
+ twords.audit
87
+ twords.tweets # An array of the Twitter::Tweet objects included in the count
88
+ twords.total_word_count # The total combined occurrences of the included words
89
+ twords.percentages # Replace word count with the word's percentage of total words
90
+ twords.sort_percentages # Sort the above results in descending order
91
+ twords.to_[csv|json] # Generate CSV || JSON for results
92
+ twords.write_to_[csv|json](opts) # Write CSV || JSON to file.
93
+ # Options - :filename writes the file to the
94
+ # specified relative path (default = 'twords_report.[csv|json]').
95
+ # Other options are passed along to File#open
96
+
97
+ ```
80
98
  ## Development
81
99
 
82
100
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -4,42 +4,61 @@ require 'twords/configuration'
4
4
  require 'twords/instance_methods'
5
5
  require 'twords/version'
6
6
 
7
- # Twords.config do |config|
8
- # config.rejects = %w[my us we an w/ because b/c or are this is from
9
- # be on the for to and at our of in rt a with &
10
- # that it by as if was]
11
-
12
- # config.range = 30
13
- # config.up_to { Time.now }
14
- # config.include_hashtags = false
15
- # config.include_uris = false
16
- # config.include_mentions = false
7
+ # Count the occurrences of words in a tweeter's tweets
8
+ #
9
+ # Twords.config do |config|
10
+ # config.rejects = %w[my us we an w/ because b/c or are this is from
11
+ # be on the for to and at our of in rt a with &
12
+ # that it by as if was]
13
+ #
14
+ # config.range = 30
15
+ # config.up_to { Time.now }
16
+ # config.include_hashtags = false
17
+ # config.include_uris = false
18
+ # config.include_mentions = false
17
19
  #
18
- # config.twitter_client do |twitter|
19
- # twitter.consumer_key = YOUR_TWITTER_CONSUMER_KEY
20
- # twitter.consumer_secret = YOUR_TWITTER_CONSUMER_SECRET
21
- # twitter.access_token = YOUR_TWITTER_ACCESS_TOKEN
22
- # twitter.access_token_secret = YOUR_TWITTER_ACCESS_TOKEN_SECRET
20
+ # config.twitter_client do |twitter|
21
+ # twitter.consumer_key = YOUR_TWITTER_CONSUMER_KEY
22
+ # twitter.consumer_secret = YOUR_TWITTER_CONSUMER_SECRET
23
+ # twitter.access_token = YOUR_TWITTER_ACCESS_TOKEN
24
+ # twitter.access_token_secret = YOUR_TWITTER_ACCESS_TOKEN_SECRET
25
+ # end
23
26
  # end
24
- # end
25
27
  #
26
- # twords = Twords.new 'user_one', 'user_two'
28
+ # twords = Twords.new 'user_one', 'user_two'
27
29
  #
28
- # twords.audit
29
- # # => true
30
+ # twords.audit
31
+ # # => true
30
32
  #
31
- # twords.words
32
- # # => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
33
+ # twords.words
34
+ # # => { "pizza"=>32, "burger"=>28, "pups"=>36, ... }
33
35
  class Twords
36
+ # Set configuration options. The same configuration is shared accross all objects in the
37
+ # Twords namespace. Configuration can be changed on the fly and will affect all instantiated
38
+ # objects.
39
+ #
40
+ # @api public
41
+ # for block { |config| ... }
42
+ # @yield [Twords::Configuration] call methods on an instance of Twords::Configuration to override
43
+ # the default configuration settings.
44
+ # @return [Twords::Configuration]
34
45
  def self.config
35
46
  @configuration ||= Configuration.new
36
47
  @configuration.tap { |config| yield config if block_given? }
37
48
  end
38
49
 
50
+ # Resets all configuration options to default settings
51
+ #
52
+ # @api public
53
+ # @return [Twords::Configuration]
39
54
  def self.reset_config!
40
55
  config.reset!
41
56
  end
42
57
 
58
+ # Access the Twitter client
59
+ #
60
+ # @api public
61
+ # @return [Twords::TwitterClient]
43
62
  def self.client
44
63
  config.client
45
64
  end
@@ -5,6 +5,9 @@ class Twords
5
5
  module ConfigAccessible
6
6
  module_function
7
7
 
8
+ # Provides a private method to access the shared config when included in a Module or Class
9
+ #
10
+ # @return [Twords::Configuration]
8
11
  def config
9
12
  Twords.config
10
13
  end
@@ -78,18 +78,18 @@ class Twords
78
78
 
79
79
  private
80
80
 
81
- # private method
81
+ # @api private
82
82
  def set_defaults
83
83
  ivars = %i[include_uris include_hashtags include_mentions range client up_to_block rejects]
84
84
  ivars.each { |ivar| instance_variable_set("@#{ivar}", DEFAULT_OPTIONS[ivar]) }
85
85
  end
86
86
 
87
- # private method
87
+ # @api private
88
88
  def a_boolean?(other)
89
89
  [true, false].include?(other)
90
90
  end
91
91
 
92
- # private method
92
+ # @api private
93
93
  def not_a_boolean_error(boolean)
94
94
  raise ArgumentError, 'argument must be a booolean value' unless a_boolean?(boolean)
95
95
  end
@@ -10,65 +10,133 @@ require 'twords/word_matcher'
10
10
  class Twords
11
11
  include ConfigAccessible
12
12
 
13
- attr_reader :screen_names, :words
14
-
13
+ # The screen names included in the analysis
14
+ #
15
+ # @api public
16
+ # @return [Array<String>] if names are provided to #initialize
17
+ # @return [Array] if no names are provided to #initialize
18
+ attr_reader :screen_names
19
+
20
+ # The words and their number of occurrences
21
+ #
22
+ # @api public
23
+ # @return [Hash] returns the word(String) and counts(Integer) as key-value pairs
24
+ attr_reader :words
25
+
26
+ # Initializes a new Twords object
27
+ #
28
+ # @api public
29
+ # @param screen_names [Array<String>] any number of screen names to include in the analysis
30
+ # @return [Twords]
15
31
  def initialize(*screen_names)
16
32
  @screen_names = screen_names.flatten
17
33
  @words = {}
34
+ @audited = false
18
35
  end
19
36
 
37
+ # Have the #screen_names already been audited?
38
+ #
39
+ # @api public
40
+ # @return [true] if already audited
41
+ # @return [false] if not audited yet
20
42
  def audited?
21
43
  @audited
22
44
  end
23
45
 
46
+ # Fetch tweets and count words. Short circuits and returns true if already audited.
47
+ #
48
+ # @api public
49
+ # @return [true]
24
50
  def audit
25
51
  count_words unless audited?
26
52
  @audited = true
27
53
  end
28
54
 
55
+ # Clear all results and audit from scratch
56
+ #
57
+ # @api public
58
+ # @return [true] always returns true unless an error is raised
29
59
  def audit!
30
60
  instance_variables.reject { |ivar| %i[@screen_names @words].include?(ivar) }.each do |ivar|
31
61
  instance_variable_set(ivar, nil)
32
62
  end
33
63
 
64
+ @audited = false
65
+
34
66
  audit
35
67
  end
36
68
 
69
+ # Sort words by frequency in descending order
70
+ #
71
+ # @api public
72
+ # @return [Array<Array<String, Integer>>]
37
73
  def sort_words
38
74
  @_sort_words ||= words.sort { |a, b| b.last <=> a.last }
39
75
  end
40
76
  alias words_forward sort_words
41
77
 
78
+ # Returns all of the tweets that fall within the configured time range
79
+ #
80
+ # @api public
81
+ # @return [Array<Twitter::Tweet>]
42
82
  def tweets
43
83
  @_tweets ||= client.filter_tweets(screen_names)
44
84
  end
45
85
 
86
+ # Returns an array of #tweets sorted by time created in descending order
87
+ #
88
+ # @api public
89
+ # @return [Array<Twitter::Tweet>]
46
90
  def sort_tweets
47
91
  tweets.sort { |a, b| b.created_at <=> a.created_at }
48
92
  end
49
93
 
94
+ # #sort_tweets destructively
95
+ #
96
+ # @api public
97
+ # @return [Array<Twitter::Tweet>]
50
98
  def sort_tweets!
51
99
  tweets.sort! { |a, b| b.created_at <=> a.created_at }
52
100
  end
53
101
 
102
+ # Number of tweets being analyzed
103
+ #
104
+ # @api public
105
+ # @return [Integer]
54
106
  def tweets_count
55
107
  @_tweets_count ||= tweets.count
56
108
  end
57
109
 
110
+ # Total occurrences of all words included in analysis, i.e. sum of the count of all words.
111
+ #
112
+ # @api public
113
+ # @return [Integer]
58
114
  def total_word_count
59
115
  @_total_word_count ||= words.values.reduce(:+)
60
116
  end
61
117
 
118
+ # The frequency of each word as a share of the #total_word_count
119
+ #
120
+ # @api public
121
+ # @return [Hash] returns the word(String) and percentage(Float) as key-value pairs
62
122
  def percentages
63
123
  @_percentages ||= words.each_with_object({}) do |word_count, hash|
64
124
  hash[word_count.first] = percentage(word_count.last)
65
125
  end
66
126
  end
67
127
 
128
+ # Sorts #percentages in descending order
129
+ #
130
+ # @api public
131
+ # @return [Array<Array<String, Float>>]
68
132
  def sort_percentages
69
133
  @_sort_percentages ||= percentages.sort { |a, b| b.last <=> a.last }
70
134
  end
71
135
 
136
+ # Generate a CSV formatted String of the sorted results, with column headers "word, count"
137
+ #
138
+ # @api public
139
+ # @return [String] in CSV format
72
140
  def to_csv
73
141
  CSV.generate do |csv|
74
142
  csv << %w[word count]
@@ -78,15 +146,32 @@ class Twords
78
146
  end
79
147
  end
80
148
 
149
+ # Write the output of #to_csv to a file.
150
+ #
151
+ # @api public
152
+ # @return [Integer] representing the byte count of the file
153
+ # @param opts [Hash] customizable file writing options. All but :filename are passed to File#open
154
+ # @option opts [String] :filename A relative pathname to define the destination of the new file
81
155
  def write_to_csv(opts = {})
82
156
  filename = opts.fetch(:filename) { 'twords_report.csv' }
83
157
  write_file(filename, :to_csv, opts)
84
158
  end
85
159
 
160
+ # Generate a JSON formatted String of the sorted results, as one hash object with word-count
161
+ # key-value pairs.
162
+ #
163
+ # @api public
164
+ # @return [String] in JSON format
86
165
  def to_json
87
166
  sort_words.to_h.to_json
88
167
  end
89
168
 
169
+ # Write the output of #to_json to a file.
170
+ #
171
+ # @api public
172
+ # @return [Integer] representing the byte count of the file
173
+ # @param opts [Hash] customizable file writing options. All but :filename are passed to File#open
174
+ # @option opts [String] :filename A relative pathname to define the destination of the new file
90
175
  def write_to_json(opts = {})
91
176
  filename = opts.fetch(:filename) { 'twords_report.json' }
92
177
  write_file(filename, :to_json, opts)
@@ -94,12 +179,12 @@ class Twords
94
179
 
95
180
  private
96
181
 
97
- # private method
182
+ # @api private
98
183
  def client
99
184
  config.client
100
185
  end
101
186
 
102
- # private method
187
+ # @api private
103
188
  def count_words
104
189
  words.clear
105
190
  tweets.each do |tweet|
@@ -110,17 +195,17 @@ class Twords
110
195
  end
111
196
  end
112
197
 
113
- # private method
198
+ # @api private
114
199
  def words_array(tweet)
115
200
  tweet.attrs[:full_text].downcase.split(' ')
116
201
  end
117
202
 
118
- # private method
203
+ # @api private
119
204
  def percentage(count)
120
205
  (count / total_word_count.to_f * 100)
121
206
  end
122
207
 
123
- # private method
208
+ # @api private
124
209
  def write_file(filename, method, opts = {})
125
210
  File.open(filename, 'w', opts) { |file| file.write send(method) }
126
211
  end
@@ -7,12 +7,34 @@ class Twords
7
7
  class TwitterClient
8
8
  include ConfigAccessible
9
9
 
10
+ # A Twitter::REST::Client that provides an interface to the Twitter API
11
+ #
12
+ # @api public
13
+ # @returns [Twitter::REST::Client]
10
14
  attr_reader :client
11
15
 
16
+ # Initializes a new Twords::TwitterClient object and assigns to the @client instance variable
17
+ #
18
+ # Twords::TwitterClient.new do |twitter|
19
+ # twitter.consumer_key = "YOUR_CONSUMER_KEY"
20
+ # twitter.consumer_secret = "YOUR_CONSUMER_SECRET"
21
+ # twitter.access_token = "YOUR_ACCESS_TOKEN"
22
+ # twitter.access_token_secret = "YOUR_ACCESS_SECRET"
23
+ # end
24
+ #
25
+ # @api public
26
+ # for block { |twitter| ... }
27
+ # @yield [Twitter::REST::Client] yields the Twitter::REST::Client for configuration
28
+ # @see https://github.com/sferik/twitter#configuration
12
29
  def initialize(&block)
13
30
  @client = Twitter::REST::Client.new(&block)
14
31
  end
15
32
 
33
+ # Fetches the timelines for an array of screen names and filters them
34
+ # by the configured time range.
35
+ #
36
+ # @api public
37
+ # @param screen_names [Array<String>] the twitter screen names from which to pull the tweets
16
38
  def filter_tweets(screen_names)
17
39
  full_timeline(screen_names).each_with_object([]) do |tweet, memo|
18
40
  next if tweet.created_at > up_to_time
@@ -22,12 +44,12 @@ class Twords
22
44
 
23
45
  private
24
46
 
25
- # private method
47
+ # @api private
26
48
  def full_timeline(screen_names)
27
49
  screen_names.map { |screen_name| fetch_user_timeline(screen_name) }.flatten.uniq
28
50
  end
29
51
 
30
- # private method
52
+ # @api private
31
53
  def fetch_user_timeline(screen_name)
32
54
  return [] if screen_name.to_s.empty?
33
55
  user_timeline = client.user_timeline(screen_name, tweet_mode: 'extended', count: 200)
@@ -40,22 +62,22 @@ class Twords
40
62
  fetch_user_timeline(screen_name)
41
63
  end
42
64
 
43
- # private method
65
+ # @api private
44
66
  def age_of_tweet_in_days(tweet)
45
67
  (up_to_time - tweet.created_at) / 86_400
46
68
  end
47
69
 
48
- # private method
70
+ # @api private
49
71
  def up_to_time
50
72
  config.up_to_time
51
73
  end
52
74
 
53
- # private method
75
+ # @api private
54
76
  def range
55
77
  config.range
56
78
  end
57
79
 
58
- # private method
80
+ # @api private
59
81
  def fetch_older_tweets(user_timeline, screen_name)
60
82
  return user_timeline if age_of_tweet_in_days(user_timeline.last) > range
61
83
  first_count = user_timeline.count
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Twords
4
- VERSION = '0.2.0'.freeze
4
+ VERSION = '0.2.1'.freeze
5
5
  end
@@ -8,26 +8,51 @@ class Twords
8
8
  class << self
9
9
  include ConfigAccessible
10
10
 
11
+ # Check if a word should not be counted.
12
+ #
13
+ # @api public
14
+ # @return [true] if word should be skipped
15
+ # @return [false] if word should not be skipped
11
16
  def should_be_skipped?(word)
12
17
  reject?(word) || hashtag?(word) || uri?(word) || mention?(word)
13
18
  end
14
19
 
20
+ # Check if a word is one of the configured rejects to ignore
21
+ #
22
+ # @api public
23
+ # @return [true] if word is a reject
24
+ # @return [false] if word is not a reject
15
25
  def reject?(word)
16
26
  config.rejects.include?(word)
17
27
  end
18
28
 
29
+ # Check if a word is a hashtag.
30
+ #
31
+ # @api public
32
+ # @return [true] if hashtags should not be included and word is a hashtag
33
+ # @return [false] if all hashtags should be included or word is not a hashtag
19
34
  def hashtag?(word)
20
- return if config.include_hashtags
35
+ return false if config.include_hashtags
21
36
  !(word =~ /#(\w+)/).nil?
22
37
  end
23
38
 
39
+ # Check if a word is a URI. Uses URI#regexp to match URIs
40
+ #
41
+ # @api public
42
+ # @return [true] if URIs should not be included and word is a URI
43
+ # @return [false] if all URIs should be included or word is not a URI
24
44
  def uri?(word)
25
- return if config.include_uris
45
+ return false if config.include_uris
26
46
  !(word =~ URI.regexp).nil?
27
47
  end
28
48
 
49
+ # Check if a word is a @-mention.
50
+ #
51
+ # @api public
52
+ # @return [true] if @-mentions should not be included and word is a @-mention
53
+ # @return [false] if all @-mentions should be included or word is not a @-mention
29
54
  def mention?(word)
30
- return if config.include_mentions
55
+ return false if config.include_mentions
31
56
  !(word =~ /@(\w+)/).nil?
32
57
  end
33
58
  end
@@ -12,7 +12,13 @@ Gem::Specification.new do |spec|
12
12
  spec.email = ['msimonborg@gmail.com']
13
13
 
14
14
  spec.summary = 'Twitter word clouds'
15
- spec.description = 'Twitter word clouds'
15
+ spec.description = 'Twitter word clouds. Analyse the frequency of word occurrences for a '\
16
+ 'user or list of users. Configurable - set the words to ignore, the range of dates to look '\
17
+ 'at, and whether to include hashtags, @-mentions, and URLs. Customize your Twitter '\
18
+ 'configuration, too. Sensible defaults are provided for all options. Look at the data in '\
19
+ 'different ways. Easily convert and/or export to CSV and JSON. Change configuration options '\
20
+ 'on the fly and re-audit with ease.'
21
+
16
22
  spec.homepage = 'https://github.com/msimonborg/twords'
17
23
  spec.license = 'MIT'
18
24
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twords
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. Simon Borg
@@ -52,7 +52,12 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '10.0'
55
- description: Twitter word clouds
55
+ description: Twitter word clouds. Analyse the frequency of word occurrences for a
56
+ user or list of users. Configurable - set the words to ignore, the range of dates
57
+ to look at, and whether to include hashtags, @-mentions, and URLs. Customize your
58
+ Twitter configuration, too. Sensible defaults are provided for all options. Look
59
+ at the data in different ways. Easily convert and/or export to CSV and JSON. Change
60
+ configuration options on the fly and re-audit with ease.
56
61
  email:
57
62
  - msimonborg@gmail.com
58
63
  executables: []