birdwatcher 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9967717a3d089165d8c2184b7028cf965d747277
4
- data.tar.gz: 098b63db5bf37e20bb3a5d72a515c01b1dad42e7
3
+ metadata.gz: 9fe692202c5f66cd4792434688658820d13f659f
4
+ data.tar.gz: afd0ce664e68cfe1228b46110ebedcfec139f260
5
5
  SHA512:
6
- metadata.gz: 6bd0fb95954c3493cb7b3adce85a68f2b86a439b87fb81477e00ba7f0fce3b12b6c2e7cfb38b9744c19d5345d1a7748c0aaa11e138c0028981337fd167918688
7
- data.tar.gz: fe6e8192f569912ab7c3e3e6d7e51496fd408c48a11b713d883d387e155a2154a637414153da14ccf243af1f949dc4969dbfdc46c1175e968827710becf2f468
6
+ metadata.gz: 25e84e1841967733afe7fd0a5967c2637da26bb0f9b540eeb0be1e219983fbb6e555216b4a111b0eaca7816023bc7bee675676772f5814c883e79c56e83521e0
7
+ data.tar.gz: 1edea6733ed598f18e649397e48b483dedff510ca686d8f94c26b26e6c23d0b7783fc532939072418608bec31778f868ffe0d124ed465bc17acf1e98355c8ac7
data/CHANGELOG.md CHANGED
@@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
5
  ## [Unreleased]
6
+ ### Added
7
+
8
+ ## [0.4.0]
9
+ ### Added
10
+ - New `spool` command to spool console input and output to a file
11
+ - Write command history to `~/.birdwatcher_history` and load history on startup
12
+ for persistent command history
13
+ - New module `statuses/word_list` to generate a simple word list from statuses
14
+ - Introduced new `Birdwatcher::WordList` class to process text into a word list
15
+
16
+ ### Changed
17
+ - Refactored `statuses/word_cloud` module to use new `Birdwatcher::WordList` class
6
18
 
7
19
  ## [0.3.1]
8
20
  ### Added
@@ -14,4 +26,4 @@ This project adheres to [Semantic Versioning](http://semver.org/).
14
26
  - `posted_at` column added to `urls` for better and easier ordering
15
27
 
16
28
  ### Fixed
17
- - make `status search` command case insensitive
29
+ - Make `status search` command case insensitive
data/lib/birdwatcher.rb CHANGED
@@ -19,6 +19,7 @@ require "birdwatcher/http_client"
19
19
  require "birdwatcher/klout_client"
20
20
  require "birdwatcher/punchcard"
21
21
  require "birdwatcher/kml"
22
+ require "birdwatcher/word_list"
22
23
  require "birdwatcher/console"
23
24
  require "birdwatcher/configuration"
24
25
  require "birdwatcher/configuration_wizard"
@@ -16,7 +16,7 @@ can be very convenient for common or repetitive workflows.
16
16
  #{'USAGE:'.bold}
17
17
 
18
18
  #{'Execute commands from a resource file:'.bold}
19
- resource <FILE>
19
+ resource FILE
20
20
  USAGE
21
21
  end
22
22
 
@@ -0,0 +1,76 @@
1
+ module Birdwatcher
2
+ module Commands
3
+ class Spool < Birdwatcher::Command
4
+ self.meta = {
5
+ :description => "Write console output into a file as well the screen",
6
+ :names => %w(spool),
7
+ :usage => "spool FILE|off"
8
+ }
9
+
10
+ def self.detailed_usage
11
+ <<-USAGE
12
+ The #{'spool'.bold} command can be used to write all console output into a file
13
+ as well the screen. The output will be appended to the file if it already exists.
14
+
15
+ #{'USAGE:'.bold}
16
+
17
+ #{'Spool output to a file:'.bold}
18
+ spool FILE
19
+
20
+ #{'Turn off spooling:'.bold}
21
+ spool off
22
+
23
+ #{'See status of spooling:'.bold}
24
+ spool status
25
+ USAGE
26
+ end
27
+
28
+ def run
29
+ if !arguments?
30
+ error("You must provide a path to a file or an action")
31
+ return false
32
+ end
33
+ action = arguments.first.downcase
34
+ case action
35
+ when "start"
36
+ start_spooling
37
+ when "off", "stop"
38
+ stop_spooling
39
+ when "status"
40
+ status_spooling
41
+ else
42
+ start_spooling
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def start_spooling
49
+ if arguments.first.downcase == "start"
50
+ file = arguments[1, -1].join(" ")
51
+ else
52
+ file = arguments.join(" ")
53
+ end
54
+ if file.empty?
55
+ error("You must provide a path to a file")
56
+ return false
57
+ end
58
+ console.spool = File.open(file, "a").tap { |f| f.sync = true }
59
+ info("Spooling output to #{file.bold}")
60
+ end
61
+
62
+ def stop_spooling
63
+ console.spool = nil
64
+ info("Output spooling stopped")
65
+ end
66
+
67
+ def status_spooling
68
+ if console.spool && console.spool.is_a?(File)
69
+ info("Spooling output to #{console.spool.path.bold}")
70
+ else
71
+ info("Output spooling is stopped")
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -107,7 +107,7 @@ module Birdwatcher
107
107
  # 0 / 0
108
108
  # end
109
109
  def confirm(question)
110
- HighLine.agree("#{question} (y/n) ")
110
+ Birdwatcher::Console.instance.confirm(question)
111
111
  end
112
112
  end
113
113
  end
@@ -114,8 +114,7 @@ module Birdwatcher
114
114
  # If the text is long, it will be automatically paged with the system's
115
115
  # currently configured pager command (usually `less`).
116
116
  def page_text(text)
117
- ::TTY::Pager::SystemPager.new.page(text)
118
- rescue Errno::EPIPE
117
+ Birdwatcher::Console.instance.page_text(text)
119
118
  end
120
119
  end
121
120
  end
@@ -0,0 +1,21 @@
1
+ module Birdwatcher
2
+ module Concerns
3
+ module WordList
4
+ def self.included(base)
5
+ base.extend(ClassMethods)
6
+ end
7
+
8
+ module ClassMethods
9
+ end
10
+
11
+ # Get a new word list instance
12
+ #
13
+ # @param options [Hash] Word list options
14
+ #
15
+ # @return [Birdwatcher::WordList]
16
+ def make_word_list(options = {})
17
+ Birdwatcher::WordList.new(options)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -3,14 +3,17 @@ module Birdwatcher
3
3
  include Singleton
4
4
 
5
5
  DEFAULT_AUTO_COMPLETION_STRINGS = [].freeze
6
- DB_MIGRATIONS_PATH = File.expand_path("../../../db/migrations", __FILE__).freeze
7
- LINE_SEPARATOR = ("=" * 80).freeze
6
+ DB_MIGRATIONS_PATH = File.expand_path("../../../db/migrations", __FILE__).freeze
7
+ LINE_SEPARATOR = ("=" * 80).freeze
8
+ HISTORY_FILE_NAME = ".birdwatcher_history".freeze
9
+ HISTORY_FILE_LOCATION = File.join(Dir.home, HISTORY_FILE_NAME).freeze
8
10
 
9
- attr_accessor :current_workspace, :current_module
11
+ attr_accessor :current_workspace, :current_module, :spool
10
12
  attr_reader :database
11
13
 
12
14
  def initialize
13
15
  @output_mutex = Mutex.new
16
+ @spool_mutex = Mutex.new
14
17
  end
15
18
 
16
19
  def start!
@@ -21,7 +24,9 @@ module Birdwatcher
21
24
  Birdwatcher::Console.instance.auto_completion_strings.grep(/\A#{Regexp.escape(s)}/) + Dir["#{expanded_s}*"].grep(/^#{Regexp.escape(expanded_s)}/)
22
25
  end
23
26
  Readline.completion_append_character = ""
27
+ load_command_history
24
28
  while input = Readline.readline(prompt_line, true)
29
+ save_to_spool(prompt_line)
25
30
  input = input.to_s.strip
26
31
  handle_input(input) unless input.empty?
27
32
  end
@@ -29,6 +34,8 @@ module Birdwatcher
29
34
 
30
35
  def handle_input(input)
31
36
  input.strip!
37
+ save_command_to_history(input)
38
+ save_to_spool("#{input}\n")
32
39
  command_name, argument_line = input.split(" ", 2).map(&:strip)
33
40
  command_name.downcase
34
41
  commands.each do |command|
@@ -52,14 +59,15 @@ module Birdwatcher
52
59
  def output(data, newline = true)
53
60
  data = "#{data}\n" if newline
54
61
  with_output_mutex { print data }
62
+ save_to_spool(data)
55
63
  end
56
64
 
57
65
  def output_formatted(*args)
58
- with_output_mutex { printf(*args) }
66
+ output(sprintf(*args), false)
59
67
  end
60
68
 
61
69
  def newline
62
- with_output_mutex { puts }
70
+ output ""
63
71
  end
64
72
 
65
73
  def line_separator
@@ -77,6 +85,7 @@ module Birdwatcher
77
85
  rescue => e
78
86
  output " failed".bold.light_red
79
87
  error "#{e.class}: ".bold + e.message
88
+ e.backtrace.each { |l| error l } if debugging_enabled?
80
89
  exit(1) if fatal
81
90
  end
82
91
 
@@ -92,6 +101,24 @@ module Birdwatcher
92
101
  output "[-]".white.bold.on_red + " #{message}"
93
102
  end
94
103
 
104
+ def confirm(question)
105
+ question = "#{question} (y/n) "
106
+ save_to_spool(question)
107
+ if HighLine.agree("#{question}")
108
+ save_to_spool("y\n")
109
+ true
110
+ else
111
+ save_to_spool("n\n")
112
+ false
113
+ end
114
+ end
115
+
116
+ def page_text(text)
117
+ save_to_spool(text)
118
+ ::TTY::Pager::SystemPager.new.page(text)
119
+ rescue Errno::EPIPE
120
+ end
121
+
95
122
  def twitter_client
96
123
  if !@twitter_clients
97
124
  @twitter_clients = create_twitter_clients!
@@ -179,6 +206,10 @@ module Birdwatcher
179
206
  @output_mutex.synchronize { yield }
180
207
  end
181
208
 
209
+ def with_spool_mutex
210
+ @spool_mutex.synchronize { yield }
211
+ end
212
+
182
213
  def create_twitter_clients!
183
214
  clients = []
184
215
  configuration.get!(:twitter).each do |keypair|
@@ -197,5 +228,41 @@ module Birdwatcher
197
228
  end
198
229
  clients
199
230
  end
231
+
232
+ def load_command_history
233
+ if File.exist?(HISTORY_FILE_LOCATION)
234
+ if File.readable?(HISTORY_FILE_LOCATION)
235
+ File.open(HISTORY_FILE_LOCATION).each_line do |command|
236
+ Readline::HISTORY << command.strip
237
+ end
238
+ else
239
+ warn("Cannot load command history: #{HISTORY_FILE_LOCATION} is not readable")
240
+ end
241
+ end
242
+ end
243
+
244
+ def save_command_to_history(command)
245
+ if File.exist?(HISTORY_FILE_LOCATION) && !File.writable?(HISTORY_FILE_LOCATION)
246
+ warn("Cannot save command to history: #{HISTORY_FILE_LOCATION} is not writable")
247
+ return
248
+ end
249
+ File.open(HISTORY_FILE_LOCATION, "a") do |file|
250
+ file.puts(command)
251
+ end
252
+ end
253
+
254
+ def save_to_spool(string)
255
+ return unless spool_enabled?
256
+ string = string.to_s.uncolorize
257
+ with_spool_mutex { self.spool.write(string) }
258
+ end
259
+
260
+ def spool_enabled?
261
+ self.spool && self.spool.is_a?(File)
262
+ end
263
+
264
+ def debugging_enabled?
265
+ ENV.key?("BIRDWATCHER_DEBUG")
266
+ end
200
267
  end
201
268
  end
@@ -11,6 +11,7 @@ module Birdwatcher
11
11
  include Birdwatcher::Concerns::Presentation
12
12
  include Birdwatcher::Concerns::Persistence
13
13
  include Birdwatcher::Concerns::Concurrency
14
+ include Birdwatcher::Concerns::WordList
14
15
 
15
16
  # Path to modules directory
16
17
  # @private
@@ -95,8 +95,6 @@ module Birdwatcher
95
95
  }
96
96
  }
97
97
 
98
- DEFAULT_EXCLUDED_WORDS = %w(rt via oh)
99
-
100
98
  def self.info
101
99
  <<-INFO
102
100
  The Word Cloud module can generate a classic weighted word cloud from words used
@@ -133,37 +131,34 @@ INFO
133
131
  error("There are no statuses to process")
134
132
  return false
135
133
  end
136
- prepare_exclusion_list
137
- words = {}
138
- sorted_words = []
134
+ word_list = make_word_list(
135
+ :min_word_count => option_setting("MIN_WORD_COUNT"),
136
+ :min_word_length => option_setting("MIN_WORD_LENGTH"),
137
+ :exclude_words => option_setting("EXCLUDE_WORDS").to_s.split(" ").map(&:strip),
138
+ :exclude_stopwords => option_setting("EXCLUDE_STOPWORDS"),
139
+ :exclude_common_words => option_setting("EXCLUDE_COMMON"),
140
+ :exclude_hashtags => option_setting("EXCLUDE_HASHTAGS"),
141
+ :exclude_mentions => option_setting("EXCLUDE_MENTIONS"),
142
+ :word_cap => option_setting("WORD_CAP"),
143
+ :stopwords_file => File.join(DATA_DIRECTORY, "english_stopwords.txt"),
144
+ :common_words_file => File.join(DATA_DIRECTORY, "top100Kenglishwords.txt")
145
+ )
139
146
  task("Processing #{statuses.count.to_s.bold} statuses...") do
140
147
  statuses.each do |status|
141
- split_into_words(status.text).each do |word|
142
- next if exclude_word?(word)
143
- words.key?(word) ? words[word] += 1 : words[word] = 1
144
- end
148
+ word_list.add_to_corpus(status.text)
145
149
  if option_setting("INCLUDE_PAGE_TITLES")
146
150
  status.urls_dataset
147
- .where("title IS NOT NULL")
148
- .where("final_url NOT LIKE 'https://twitter.com/%'")
149
- .map(&:title).each do |page_title|
150
- split_into_words(page_title).each do |word|
151
- next if exclude_word?(word)
152
- words.key?(word) ? words[word] += 1 : words[word] = 1
153
- end
151
+ .where("title IS NOT NULL")
152
+ .where("final_url NOT LIKE 'https://twitter.com/%'")
153
+ .map(&:title).each do |page_title|
154
+ word_list.add_to_corpus(page_title)
154
155
  end
155
156
  end
156
157
  end
157
- if option_setting("MIN_WORD_COUNT")
158
- words.delete_if { |word, count| count < option_setting("MIN_WORD_COUNT").to_i }
159
- end
160
- sorted_words = words.sort_by { |word, count| count}.reverse
161
- if option_setting("WORD_CAP")
162
- sorted_words = sorted_words.take(option_setting("WORD_CAP").to_i)
163
- end
158
+ word_list.process
164
159
  end
165
160
  task("Generating word cloud, patience please...") do
166
- cloud = MagicCloud::Cloud.new(sorted_words,
161
+ cloud = MagicCloud::Cloud.new(word_list.word_list,
167
162
  :rotate => :none,
168
163
  :palette => option_setting("PALETTE").split(" ").map(&:strip)
169
164
  ).draw(option_setting("IMAGE_WIDTH").to_i, option_setting("IMAGE_HEIGHT").to_i).to_blob { self.format = "png" }
@@ -171,34 +166,6 @@ INFO
171
166
  end
172
167
  info("Word cloud written to #{option_setting('DEST').bold}")
173
168
  end
174
-
175
- private
176
-
177
- def prepare_exclusion_list
178
- @exclusion_list = DEFAULT_EXCLUDED_WORDS
179
- if option_setting("EXCLUDE_WORDS")
180
- @exclusion_list += option_setting("EXCLUDE_WORDS").split(" ").map { |w| w.strip.downcase }
181
- end
182
- if option_setting("EXCLUDE_STOPWORDS")
183
- @exclusion_list += read_data_file("english_stopwords.txt").split("\n").map { |w| w.strip.downcase }
184
- end
185
- if option_setting("EXCLUDE_COMMON")
186
- @exclusion_list += read_data_file("top100Kenglishwords.txt").split("\n").map(&:strip)
187
- end
188
- end
189
-
190
- def exclude_word?(word)
191
- return true if word.empty?
192
- return true if option_setting("MIN_WORD_LENGTH") && word.length < option_setting("MIN_WORD_LENGTH").to_i
193
- return true if option_setting("EXCLUDE_HASHTAGS") && word.start_with?("#")
194
- return true if option_setting("EXCLUDE_MENTIONS") && word.start_with?("@")
195
- return true if @exclusion_list.include?(word)
196
- end
197
-
198
- def split_into_words(text)
199
- text = text.downcase.strip.gsub(/https?:\/\/[\S]+/, "").gsub(/[^0-9a-z@#_ ]/i, " ")
200
- text.split(" ").map(&:strip)
201
- end
202
169
  end
203
170
  end
204
171
  end
@@ -0,0 +1,145 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Statuses
4
+ class Wordlist < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "Word List",
7
+ :description => "Generates a word list from statuses",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "DEST" => {
11
+ :value => nil,
12
+ :description => "Destination file",
13
+ :required => true
14
+ },
15
+ "USERS" => {
16
+ :value => nil,
17
+ :description => "Space-separated list of screen names (all users if empty)",
18
+ :required => false
19
+ },
20
+ "MIN_WORD_COUNT" => {
21
+ :value => 3,
22
+ :description => "Exclude words mentioned fewer times than specified",
23
+ :required => false
24
+ },
25
+ "MIN_WORD_LENGTH" => {
26
+ :value => 6,
27
+ :description => "Exclude words smaller than specified",
28
+ :required => false
29
+ },
30
+ "EXCLUDE_STOPWORDS" => {
31
+ :value => true,
32
+ :description => "Exclude english stopwords",
33
+ :required => false,
34
+ :boolean => true
35
+ },
36
+ "EXCLUDE_COMMON" => {
37
+ :value => true,
38
+ :description => "Exclude common english words",
39
+ :required => false,
40
+ :boolean => true
41
+ },
42
+ "EXCLUDE_WORDS" => {
43
+ :value => nil,
44
+ :description => "Space-separated list of words to exclude",
45
+ :required => false
46
+ },
47
+ "EXCLUDE_HASHTAGS" => {
48
+ :value => true,
49
+ :description => "Exclude Hashtags",
50
+ :required => false,
51
+ :boolean => true
52
+ },
53
+ "EXCLUDE_MENTIONS" => {
54
+ :value => true,
55
+ :description => "Exclude @username mentions",
56
+ :required => false,
57
+ :boolean => true
58
+ },
59
+ "INCLUDE_PAGE_TITLES" => {
60
+ :value => false,
61
+ :description => "Include web page titles from shared URLs (requires crawling with urls/crawl)",
62
+ :required => false,
63
+ :boolean => true
64
+ },
65
+ "WORD_CAP" => {
66
+ :value => nil,
67
+ :description => "Cap list of words to specified amount",
68
+ :required => false
69
+ },
70
+ "INCLUDE_COUNT" => {
71
+ :value => false,
72
+ :description => "Include the count with the words",
73
+ :required => false,
74
+ :boolean => true
75
+ }
76
+ }
77
+ }
78
+
79
+ def self.info
80
+ <<-INFO
81
+ The Word List module can generate a simple word list or dictionary from words
82
+ used in statuses across all or specific users.
83
+
84
+ Since users Tweet about their hobbies, interests, work, etc. generating a word
85
+ list from statuses can be very effective for password cracking.
86
+ INFO
87
+ end
88
+
89
+ def run
90
+ if option_setting("USERS")
91
+ screen_names = option_setting("USERS").split(" ").map(&:strip)
92
+ user_ids = current_workspace.users_dataset.where("screen_name IN ?", screen_names).map(&:id)
93
+ statuses = current_workspace.statuses_dataset.where("user_id IN ?", user_ids)
94
+ else
95
+ statuses = current_workspace.statuses_dataset
96
+ end
97
+ if statuses.count.zero?
98
+ error("There are no statuses to process")
99
+ return false
100
+ end
101
+ word_list = make_word_list(
102
+ :min_word_count => option_setting("MIN_WORD_COUNT"),
103
+ :min_word_length => option_setting("MIN_WORD_LENGTH"),
104
+ :exclude_words => option_setting("EXCLUDE_WORDS").to_s.split(" ").map(&:strip),
105
+ :exclude_stopwords => option_setting("EXCLUDE_STOPWORDS"),
106
+ :exclude_common_words => option_setting("EXCLUDE_COMMON"),
107
+ :exclude_hashtags => option_setting("EXCLUDE_HASHTAGS"),
108
+ :exclude_mentions => option_setting("EXCLUDE_MENTIONS"),
109
+ :word_cap => option_setting("WORD_CAP"),
110
+ :stopwords_file => File.join(DATA_DIRECTORY, "english_stopwords.txt"),
111
+ :common_words_file => File.join(DATA_DIRECTORY, "top100Kenglishwords.txt")
112
+ )
113
+ task("Processing #{statuses.count.to_s.bold} statuses...") do
114
+ statuses.each do |status|
115
+ word_list.add_to_corpus(status.text)
116
+ if option_setting("INCLUDE_PAGE_TITLES")
117
+ status.urls_dataset
118
+ .where("title IS NOT NULL")
119
+ .where("final_url NOT LIKE 'https://twitter.com/%'")
120
+ .map(&:title).each do |page_title|
121
+ word_list.add_to_corpus(page_title)
122
+ end
123
+ end
124
+ end
125
+ word_list.process
126
+ end
127
+ task("Writing #{pluralize(word_list.word_list.length, 'word', 'words')} to file...") do
128
+ File.open(option_setting("DEST"), "w") do |f|
129
+ word_list.word_list.each do |word_and_count|
130
+ word, count = word_and_count
131
+ if option_setting("INCLUDE_COUNT")
132
+ f.puts("#{word}, #{count}")
133
+ else
134
+ f.puts(word)
135
+ end
136
+ end
137
+ end
138
+ end
139
+ file_size = number_to_human_size(File.size(option_setting("DEST")))
140
+ info("Wrote #{file_size.bold} to #{option_setting('DEST').bold}")
141
+ end
142
+ end
143
+ end
144
+ end
145
+ end
@@ -1,3 +1,3 @@
1
1
  module Birdwatcher
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -0,0 +1,66 @@
1
+ module Birdwatcher
2
+ class WordList
3
+ attr_reader :options, :corpus, :word_list
4
+
5
+ def initialize(options)
6
+ @options = options
7
+ @corpus = []
8
+ @word_list = {}
9
+ end
10
+
11
+ def add_to_corpus(text)
12
+ @corpus << text.to_s
13
+ end
14
+
15
+ def process
16
+ words = {}
17
+ corpus.each do |text|
18
+ normalize_and_split(text).each do |word|
19
+ next if exclude_word?(word)
20
+ words.key?(word) ? words[word] += 1 : words[word] = 1
21
+ end
22
+ end
23
+ if options[:min_word_count]
24
+ words.delete_if { |word, count| count < options[:min_word_count].to_i }
25
+ end
26
+ sorted_words = words.sort_by { |word, count| count }.reverse
27
+ if options[:word_cap]
28
+ sorted_words = sorted_words.take(options[:word_cap].to_i)
29
+ end
30
+ @word_list = sorted_words
31
+ end
32
+
33
+ private
34
+
35
+ def exclusion_list
36
+ if !@exclusion_list
37
+ @exclusion_list = options[:exclude_words] || []
38
+ if options[:stopwords_file] && options[:exclude_stopwords]
39
+ @exclusion_list += File.read(options[:stopwords_file]).split("\n").map do |w|
40
+ w.strip.downcase
41
+ end
42
+ end
43
+ if options[:common_words_file] && options[:exclude_common_words]
44
+ @exclusion_list += File.read(options[:common_words_file]).split("\n").map do |w|
45
+ w.strip.downcase
46
+ end
47
+ end
48
+ end
49
+ @exclusion_list
50
+ end
51
+
52
+ def normalize_and_split(text)
53
+ text = text.downcase.strip.gsub(/https?:\/\/[\S]+/, "").gsub(/[^0-9a-z@#_ ]/i, " ")
54
+ text.split(" ").map(&:strip)
55
+ end
56
+
57
+ def exclude_word?(word)
58
+ return true if word.empty?
59
+ return true if options[:min_word_length] && word.length < options[:min_word_length]
60
+ return true if options[:exclude_hashtags] && word.start_with?("#")
61
+ return true if options[:exclude_mentions] && word.start_with?("@")
62
+ return true if exclusion_list.include?(word)
63
+ false
64
+ end
65
+ end
66
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: birdwatcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Henrikesn
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-10-22 00:00:00.000000000 Z
11
+ date: 2016-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
@@ -337,6 +337,7 @@ files:
337
337
  - lib/birdwatcher/commands/set.rb
338
338
  - lib/birdwatcher/commands/shell.rb
339
339
  - lib/birdwatcher/commands/show.rb
340
+ - lib/birdwatcher/commands/spool.rb
340
341
  - lib/birdwatcher/commands/status.rb
341
342
  - lib/birdwatcher/commands/unset.rb
342
343
  - lib/birdwatcher/commands/use.rb
@@ -348,6 +349,7 @@ files:
348
349
  - lib/birdwatcher/concerns/persistence.rb
349
350
  - lib/birdwatcher/concerns/presentation.rb
350
351
  - lib/birdwatcher/concerns/util.rb
352
+ - lib/birdwatcher/concerns/word_list.rb
351
353
  - lib/birdwatcher/configuration.rb
352
354
  - lib/birdwatcher/configuration_wizard.rb
353
355
  - lib/birdwatcher/console.rb
@@ -360,6 +362,7 @@ files:
360
362
  - lib/birdwatcher/modules/statuses/kml.rb
361
363
  - lib/birdwatcher/modules/statuses/sentiment.rb
362
364
  - lib/birdwatcher/modules/statuses/word_cloud.rb
365
+ - lib/birdwatcher/modules/statuses/word_list.rb
363
366
  - lib/birdwatcher/modules/urls/crawl.rb
364
367
  - lib/birdwatcher/modules/urls/most_shared.rb
365
368
  - lib/birdwatcher/modules/users/activity_plot.rb
@@ -373,6 +376,7 @@ files:
373
376
  - lib/birdwatcher/punchcard.rb
374
377
  - lib/birdwatcher/util.rb
375
378
  - lib/birdwatcher/version.rb
379
+ - lib/birdwatcher/word_list.rb
376
380
  - models/hashtag.rb
377
381
  - models/influencee.rb
378
382
  - models/influencer.rb