birdwatcher 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9967717a3d089165d8c2184b7028cf965d747277
4
- data.tar.gz: 098b63db5bf37e20bb3a5d72a515c01b1dad42e7
3
+ metadata.gz: 9fe692202c5f66cd4792434688658820d13f659f
4
+ data.tar.gz: afd0ce664e68cfe1228b46110ebedcfec139f260
5
5
  SHA512:
6
- metadata.gz: 6bd0fb95954c3493cb7b3adce85a68f2b86a439b87fb81477e00ba7f0fce3b12b6c2e7cfb38b9744c19d5345d1a7748c0aaa11e138c0028981337fd167918688
7
- data.tar.gz: fe6e8192f569912ab7c3e3e6d7e51496fd408c48a11b713d883d387e155a2154a637414153da14ccf243af1f949dc4969dbfdc46c1175e968827710becf2f468
6
+ metadata.gz: 25e84e1841967733afe7fd0a5967c2637da26bb0f9b540eeb0be1e219983fbb6e555216b4a111b0eaca7816023bc7bee675676772f5814c883e79c56e83521e0
7
+ data.tar.gz: 1edea6733ed598f18e649397e48b483dedff510ca686d8f94c26b26e6c23d0b7783fc532939072418608bec31778f868ffe0d124ed465bc17acf1e98355c8ac7
data/CHANGELOG.md CHANGED
@@ -3,6 +3,18 @@ All notable changes to this project will be documented in this file.
3
3
  This project adheres to [Semantic Versioning](http://semver.org/).
4
4
 
5
5
  ## [Unreleased]
6
+ ### Added
7
+
8
+ ## [0.4.0]
9
+ ### Added
10
+ - New `spool` command to spool console input and output to a file
11
+ - Write command history to `~/.birdwatcher_history` and load history on startup
12
+ for persistent command history
13
+ - New module `statuses/word_list` to generate a simple word list from statuses
14
+ - Introduced new `Birdwatcher::WordList` class to process text into a word list
15
+
16
+ ### Changed
17
+ - Refactored `statuses/word_cloud` module to use new `Birdwatcher::WordList` class
6
18
 
7
19
  ## [0.3.1]
8
20
  ### Added
@@ -14,4 +26,4 @@ This project adheres to [Semantic Versioning](http://semver.org/).
14
26
  - `posted_at` column added to `urls` for better and easier ordering
15
27
 
16
28
  ### Fixed
17
- - make `status search` command case insensitive
29
+ - Make `status search` command case insensitive
data/lib/birdwatcher.rb CHANGED
@@ -19,6 +19,7 @@ require "birdwatcher/http_client"
19
19
  require "birdwatcher/klout_client"
20
20
  require "birdwatcher/punchcard"
21
21
  require "birdwatcher/kml"
22
+ require "birdwatcher/word_list"
22
23
  require "birdwatcher/console"
23
24
  require "birdwatcher/configuration"
24
25
  require "birdwatcher/configuration_wizard"
@@ -16,7 +16,7 @@ can be very convenient for common or repetitive workflows.
16
16
  #{'USAGE:'.bold}
17
17
 
18
18
  #{'Execute commands from a resource file:'.bold}
19
- resource <FILE>
19
+ resource FILE
20
20
  USAGE
21
21
  end
22
22
 
@@ -0,0 +1,76 @@
1
+ module Birdwatcher
2
+ module Commands
3
+ class Spool < Birdwatcher::Command
4
+ self.meta = {
5
+ :description => "Write console output into a file as well the screen",
6
+ :names => %w(spool),
7
+ :usage => "spool FILE|off"
8
+ }
9
+
10
+ def self.detailed_usage
11
+ <<-USAGE
12
+ The #{'spool'.bold} command can be used to write all console output into a file
13
+ as well the screen. The output will be appended to the file if it already exists.
14
+
15
+ #{'USAGE:'.bold}
16
+
17
+ #{'Spool output to a file:'.bold}
18
+ spool FILE
19
+
20
+ #{'Turn off spooling:'.bold}
21
+ spool off
22
+
23
+ #{'See status of spooling:'.bold}
24
+ spool status
25
+ USAGE
26
+ end
27
+
28
+ def run
29
+ if !arguments?
30
+ error("You must provide a path to a file or an action")
31
+ return false
32
+ end
33
+ action = arguments.first.downcase
34
+ case action
35
+ when "start"
36
+ start_spooling
37
+ when "off", "stop"
38
+ stop_spooling
39
+ when "status"
40
+ status_spooling
41
+ else
42
+ start_spooling
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def start_spooling
49
+ if arguments.first.downcase == "start"
50
+ file = arguments[1, -1].join(" ")
51
+ else
52
+ file = arguments.join(" ")
53
+ end
54
+ if file.empty?
55
+ error("You must provide a path to a file")
56
+ return false
57
+ end
58
+ console.spool = File.open(file, "a").tap { |f| f.sync = true }
59
+ info("Spooling output to #{file.bold}")
60
+ end
61
+
62
+ def stop_spooling
63
+ console.spool = nil
64
+ info("Output spooling stopped")
65
+ end
66
+
67
+ def status_spooling
68
+ if console.spool && console.spool.is_a?(File)
69
+ info("Spooling output to #{console.spool.path.bold}")
70
+ else
71
+ info("Output spooling is stopped")
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -107,7 +107,7 @@ module Birdwatcher
107
107
  # 0 / 0
108
108
  # end
109
109
  def confirm(question)
110
- HighLine.agree("#{question} (y/n) ")
110
+ Birdwatcher::Console.instance.confirm(question)
111
111
  end
112
112
  end
113
113
  end
@@ -114,8 +114,7 @@ module Birdwatcher
114
114
  # If the text is long, it will be automatically paged with the system's
115
115
  # currently configured pager command (usually `less`).
116
116
  def page_text(text)
117
- ::TTY::Pager::SystemPager.new.page(text)
118
- rescue Errno::EPIPE
117
+ Birdwatcher::Console.instance.page_text(text)
119
118
  end
120
119
  end
121
120
  end
@@ -0,0 +1,21 @@
1
+ module Birdwatcher
2
+ module Concerns
3
+ module WordList
4
+ def self.included(base)
5
+ base.extend(ClassMethods)
6
+ end
7
+
8
+ module ClassMethods
9
+ end
10
+
11
+ # Get a new word list instance
12
+ #
13
+ # @param options [Hash] Word list options
14
+ #
15
+ # @return [Birdwatcher::WordList]
16
+ def make_word_list(options = {})
17
+ Birdwatcher::WordList.new(options)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -3,14 +3,17 @@ module Birdwatcher
3
3
  include Singleton
4
4
 
5
5
  DEFAULT_AUTO_COMPLETION_STRINGS = [].freeze
6
- DB_MIGRATIONS_PATH = File.expand_path("../../../db/migrations", __FILE__).freeze
7
- LINE_SEPARATOR = ("=" * 80).freeze
6
+ DB_MIGRATIONS_PATH = File.expand_path("../../../db/migrations", __FILE__).freeze
7
+ LINE_SEPARATOR = ("=" * 80).freeze
8
+ HISTORY_FILE_NAME = ".birdwatcher_history".freeze
9
+ HISTORY_FILE_LOCATION = File.join(Dir.home, HISTORY_FILE_NAME).freeze
8
10
 
9
- attr_accessor :current_workspace, :current_module
11
+ attr_accessor :current_workspace, :current_module, :spool
10
12
  attr_reader :database
11
13
 
12
14
  def initialize
13
15
  @output_mutex = Mutex.new
16
+ @spool_mutex = Mutex.new
14
17
  end
15
18
 
16
19
  def start!
@@ -21,7 +24,9 @@ module Birdwatcher
21
24
  Birdwatcher::Console.instance.auto_completion_strings.grep(/\A#{Regexp.escape(s)}/) + Dir["#{expanded_s}*"].grep(/^#{Regexp.escape(expanded_s)}/)
22
25
  end
23
26
  Readline.completion_append_character = ""
27
+ load_command_history
24
28
  while input = Readline.readline(prompt_line, true)
29
+ save_to_spool(prompt_line)
25
30
  input = input.to_s.strip
26
31
  handle_input(input) unless input.empty?
27
32
  end
@@ -29,6 +34,8 @@ module Birdwatcher
29
34
 
30
35
  def handle_input(input)
31
36
  input.strip!
37
+ save_command_to_history(input)
38
+ save_to_spool("#{input}\n")
32
39
  command_name, argument_line = input.split(" ", 2).map(&:strip)
33
40
  command_name.downcase
34
41
  commands.each do |command|
@@ -52,14 +59,15 @@ module Birdwatcher
52
59
  def output(data, newline = true)
53
60
  data = "#{data}\n" if newline
54
61
  with_output_mutex { print data }
62
+ save_to_spool(data)
55
63
  end
56
64
 
57
65
  def output_formatted(*args)
58
- with_output_mutex { printf(*args) }
66
+ output(sprintf(*args), false)
59
67
  end
60
68
 
61
69
  def newline
62
- with_output_mutex { puts }
70
+ output ""
63
71
  end
64
72
 
65
73
  def line_separator
@@ -77,6 +85,7 @@ module Birdwatcher
77
85
  rescue => e
78
86
  output " failed".bold.light_red
79
87
  error "#{e.class}: ".bold + e.message
88
+ e.backtrace.each { |l| error l } if debugging_enabled?
80
89
  exit(1) if fatal
81
90
  end
82
91
 
@@ -92,6 +101,24 @@ module Birdwatcher
92
101
  output "[-]".white.bold.on_red + " #{message}"
93
102
  end
94
103
 
104
+ def confirm(question)
105
+ question = "#{question} (y/n) "
106
+ save_to_spool(question)
107
+ if HighLine.agree("#{question}")
108
+ save_to_spool("y\n")
109
+ true
110
+ else
111
+ save_to_spool("n\n")
112
+ false
113
+ end
114
+ end
115
+
116
+ def page_text(text)
117
+ save_to_spool(text)
118
+ ::TTY::Pager::SystemPager.new.page(text)
119
+ rescue Errno::EPIPE
120
+ end
121
+
95
122
  def twitter_client
96
123
  if !@twitter_clients
97
124
  @twitter_clients = create_twitter_clients!
@@ -179,6 +206,10 @@ module Birdwatcher
179
206
  @output_mutex.synchronize { yield }
180
207
  end
181
208
 
209
+ def with_spool_mutex
210
+ @spool_mutex.synchronize { yield }
211
+ end
212
+
182
213
  def create_twitter_clients!
183
214
  clients = []
184
215
  configuration.get!(:twitter).each do |keypair|
@@ -197,5 +228,41 @@ module Birdwatcher
197
228
  end
198
229
  clients
199
230
  end
231
+
232
+ def load_command_history
233
+ if File.exist?(HISTORY_FILE_LOCATION)
234
+ if File.readable?(HISTORY_FILE_LOCATION)
235
+ File.open(HISTORY_FILE_LOCATION).each_line do |command|
236
+ Readline::HISTORY << command.strip
237
+ end
238
+ else
239
+ warn("Cannot load command history: #{HISTORY_FILE_LOCATION} is not readable")
240
+ end
241
+ end
242
+ end
243
+
244
+ def save_command_to_history(command)
245
+ if File.exist?(HISTORY_FILE_LOCATION) && !File.writable?(HISTORY_FILE_LOCATION)
246
+ warn("Cannot save command to history: #{HISTORY_FILE_LOCATION} is not writable")
247
+ return
248
+ end
249
+ File.open(HISTORY_FILE_LOCATION, "a") do |file|
250
+ file.puts(command)
251
+ end
252
+ end
253
+
254
+ def save_to_spool(string)
255
+ return unless spool_enabled?
256
+ string = string.to_s.uncolorize
257
+ with_spool_mutex { self.spool.write(string) }
258
+ end
259
+
260
+ def spool_enabled?
261
+ self.spool && self.spool.is_a?(File)
262
+ end
263
+
264
+ def debugging_enabled?
265
+ ENV.key?("BIRDWATCHER_DEBUG")
266
+ end
200
267
  end
201
268
  end
@@ -11,6 +11,7 @@ module Birdwatcher
11
11
  include Birdwatcher::Concerns::Presentation
12
12
  include Birdwatcher::Concerns::Persistence
13
13
  include Birdwatcher::Concerns::Concurrency
14
+ include Birdwatcher::Concerns::WordList
14
15
 
15
16
  # Path to modules directory
16
17
  # @private
@@ -95,8 +95,6 @@ module Birdwatcher
95
95
  }
96
96
  }
97
97
 
98
- DEFAULT_EXCLUDED_WORDS = %w(rt via oh)
99
-
100
98
  def self.info
101
99
  <<-INFO
102
100
  The Word Cloud module can generate a classic weighted word cloud from words used
@@ -133,37 +131,34 @@ INFO
133
131
  error("There are no statuses to process")
134
132
  return false
135
133
  end
136
- prepare_exclusion_list
137
- words = {}
138
- sorted_words = []
134
+ word_list = make_word_list(
135
+ :min_word_count => option_setting("MIN_WORD_COUNT"),
136
+ :min_word_length => option_setting("MIN_WORD_LENGTH"),
137
+ :exclude_words => option_setting("EXCLUDE_WORDS").to_s.split(" ").map(&:strip),
138
+ :exclude_stopwords => option_setting("EXCLUDE_STOPWORDS"),
139
+ :exclude_common_words => option_setting("EXCLUDE_COMMON"),
140
+ :exclude_hashtags => option_setting("EXCLUDE_HASHTAGS"),
141
+ :exclude_mentions => option_setting("EXCLUDE_MENTIONS"),
142
+ :word_cap => option_setting("WORD_CAP"),
143
+ :stopwords_file => File.join(DATA_DIRECTORY, "english_stopwords.txt"),
144
+ :common_words_file => File.join(DATA_DIRECTORY, "top100Kenglishwords.txt")
145
+ )
139
146
  task("Processing #{statuses.count.to_s.bold} statuses...") do
140
147
  statuses.each do |status|
141
- split_into_words(status.text).each do |word|
142
- next if exclude_word?(word)
143
- words.key?(word) ? words[word] += 1 : words[word] = 1
144
- end
148
+ word_list.add_to_corpus(status.text)
145
149
  if option_setting("INCLUDE_PAGE_TITLES")
146
150
  status.urls_dataset
147
- .where("title IS NOT NULL")
148
- .where("final_url NOT LIKE 'https://twitter.com/%'")
149
- .map(&:title).each do |page_title|
150
- split_into_words(page_title).each do |word|
151
- next if exclude_word?(word)
152
- words.key?(word) ? words[word] += 1 : words[word] = 1
153
- end
151
+ .where("title IS NOT NULL")
152
+ .where("final_url NOT LIKE 'https://twitter.com/%'")
153
+ .map(&:title).each do |page_title|
154
+ word_list.add_to_corpus(page_title)
154
155
  end
155
156
  end
156
157
  end
157
- if option_setting("MIN_WORD_COUNT")
158
- words.delete_if { |word, count| count < option_setting("MIN_WORD_COUNT").to_i }
159
- end
160
- sorted_words = words.sort_by { |word, count| count}.reverse
161
- if option_setting("WORD_CAP")
162
- sorted_words = sorted_words.take(option_setting("WORD_CAP").to_i)
163
- end
158
+ word_list.process
164
159
  end
165
160
  task("Generating word cloud, patience please...") do
166
- cloud = MagicCloud::Cloud.new(sorted_words,
161
+ cloud = MagicCloud::Cloud.new(word_list.word_list,
167
162
  :rotate => :none,
168
163
  :palette => option_setting("PALETTE").split(" ").map(&:strip)
169
164
  ).draw(option_setting("IMAGE_WIDTH").to_i, option_setting("IMAGE_HEIGHT").to_i).to_blob { self.format = "png" }
@@ -171,34 +166,6 @@ INFO
171
166
  end
172
167
  info("Word cloud written to #{option_setting('DEST').bold}")
173
168
  end
174
-
175
- private
176
-
177
- def prepare_exclusion_list
178
- @exclusion_list = DEFAULT_EXCLUDED_WORDS
179
- if option_setting("EXCLUDE_WORDS")
180
- @exclusion_list += option_setting("EXCLUDE_WORDS").split(" ").map { |w| w.strip.downcase }
181
- end
182
- if option_setting("EXCLUDE_STOPWORDS")
183
- @exclusion_list += read_data_file("english_stopwords.txt").split("\n").map { |w| w.strip.downcase }
184
- end
185
- if option_setting("EXCLUDE_COMMON")
186
- @exclusion_list += read_data_file("top100Kenglishwords.txt").split("\n").map(&:strip)
187
- end
188
- end
189
-
190
- def exclude_word?(word)
191
- return true if word.empty?
192
- return true if option_setting("MIN_WORD_LENGTH") && word.length < option_setting("MIN_WORD_LENGTH").to_i
193
- return true if option_setting("EXCLUDE_HASHTAGS") && word.start_with?("#")
194
- return true if option_setting("EXCLUDE_MENTIONS") && word.start_with?("@")
195
- return true if @exclusion_list.include?(word)
196
- end
197
-
198
- def split_into_words(text)
199
- text = text.downcase.strip.gsub(/https?:\/\/[\S]+/, "").gsub(/[^0-9a-z@#_ ]/i, " ")
200
- text.split(" ").map(&:strip)
201
- end
202
169
  end
203
170
  end
204
171
  end
@@ -0,0 +1,145 @@
1
+ module Birdwatcher
2
+ module Modules
3
+ module Statuses
4
+ class Wordlist < Birdwatcher::Module
5
+ self.meta = {
6
+ :name => "Word List",
7
+ :description => "Generates a word list from statuses",
8
+ :author => "Michael Henriksen <michenriksen@neomailbox.ch>",
9
+ :options => {
10
+ "DEST" => {
11
+ :value => nil,
12
+ :description => "Destination file",
13
+ :required => true
14
+ },
15
+ "USERS" => {
16
+ :value => nil,
17
+ :description => "Space-separated list of screen names (all users if empty)",
18
+ :required => false
19
+ },
20
+ "MIN_WORD_COUNT" => {
21
+ :value => 3,
22
+ :description => "Exclude words mentioned fewer times than specified",
23
+ :required => false
24
+ },
25
+ "MIN_WORD_LENGTH" => {
26
+ :value => 6,
27
+ :description => "Exclude words smaller than specified",
28
+ :required => false
29
+ },
30
+ "EXCLUDE_STOPWORDS" => {
31
+ :value => true,
32
+ :description => "Exclude english stopwords",
33
+ :required => false,
34
+ :boolean => true
35
+ },
36
+ "EXCLUDE_COMMON" => {
37
+ :value => true,
38
+ :description => "Exclude common english words",
39
+ :required => false,
40
+ :boolean => true
41
+ },
42
+ "EXCLUDE_WORDS" => {
43
+ :value => nil,
44
+ :description => "Space-separated list of words to exclude",
45
+ :required => false
46
+ },
47
+ "EXCLUDE_HASHTAGS" => {
48
+ :value => true,
49
+ :description => "Exclude Hashtags",
50
+ :required => false,
51
+ :boolean => true
52
+ },
53
+ "EXCLUDE_MENTIONS" => {
54
+ :value => true,
55
+ :description => "Exclude @username mentions",
56
+ :required => false,
57
+ :boolean => true
58
+ },
59
+ "INCLUDE_PAGE_TITLES" => {
60
+ :value => false,
61
+ :description => "Include web page titles from shared URLs (requires crawling with urls/crawl)",
62
+ :required => false,
63
+ :boolean => true
64
+ },
65
+ "WORD_CAP" => {
66
+ :value => nil,
67
+ :description => "Cap list of words to specified amount",
68
+ :required => false
69
+ },
70
+ "INCLUDE_COUNT" => {
71
+ :value => false,
72
+ :description => "Include the count with the words",
73
+ :required => false,
74
+ :boolean => true
75
+ }
76
+ }
77
+ }
78
+
79
+ def self.info
80
+ <<-INFO
81
+ The Word List module can generate a simple word list or dictionary from words
82
+ used in statuses across all or specific users.
83
+
84
+ Since users Tweet about their hobbies, interests, work, etc. generating a word
85
+ list from statuses can be very effective for password cracking.
86
+ INFO
87
+ end
88
+
89
+ def run
90
+ if option_setting("USERS")
91
+ screen_names = option_setting("USERS").split(" ").map(&:strip)
92
+ user_ids = current_workspace.users_dataset.where("screen_name IN ?", screen_names).map(&:id)
93
+ statuses = current_workspace.statuses_dataset.where("user_id IN ?", user_ids)
94
+ else
95
+ statuses = current_workspace.statuses_dataset
96
+ end
97
+ if statuses.count.zero?
98
+ error("There are no statuses to process")
99
+ return false
100
+ end
101
+ word_list = make_word_list(
102
+ :min_word_count => option_setting("MIN_WORD_COUNT"),
103
+ :min_word_length => option_setting("MIN_WORD_LENGTH"),
104
+ :exclude_words => option_setting("EXCLUDE_WORDS").to_s.split(" ").map(&:strip),
105
+ :exclude_stopwords => option_setting("EXCLUDE_STOPWORDS"),
106
+ :exclude_common_words => option_setting("EXCLUDE_COMMON"),
107
+ :exclude_hashtags => option_setting("EXCLUDE_HASHTAGS"),
108
+ :exclude_mentions => option_setting("EXCLUDE_MENTIONS"),
109
+ :word_cap => option_setting("WORD_CAP"),
110
+ :stopwords_file => File.join(DATA_DIRECTORY, "english_stopwords.txt"),
111
+ :common_words_file => File.join(DATA_DIRECTORY, "top100Kenglishwords.txt")
112
+ )
113
+ task("Processing #{statuses.count.to_s.bold} statuses...") do
114
+ statuses.each do |status|
115
+ word_list.add_to_corpus(status.text)
116
+ if option_setting("INCLUDE_PAGE_TITLES")
117
+ status.urls_dataset
118
+ .where("title IS NOT NULL")
119
+ .where("final_url NOT LIKE 'https://twitter.com/%'")
120
+ .map(&:title).each do |page_title|
121
+ word_list.add_to_corpus(page_title)
122
+ end
123
+ end
124
+ end
125
+ word_list.process
126
+ end
127
+ task("Writing #{pluralize(word_list.word_list.length, 'word', 'words')} to file...") do
128
+ File.open(option_setting("DEST"), "w") do |f|
129
+ word_list.word_list.each do |word_and_count|
130
+ word, count = word_and_count
131
+ if option_setting("INCLUDE_COUNT")
132
+ f.puts("#{word}, #{count}")
133
+ else
134
+ f.puts(word)
135
+ end
136
+ end
137
+ end
138
+ end
139
+ file_size = number_to_human_size(File.size(option_setting("DEST")))
140
+ info("Wrote #{file_size.bold} to #{option_setting('DEST').bold}")
141
+ end
142
+ end
143
+ end
144
+ end
145
+ end
@@ -1,3 +1,3 @@
1
1
  module Birdwatcher
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -0,0 +1,66 @@
1
+ module Birdwatcher
2
+ class WordList
3
+ attr_reader :options, :corpus, :word_list
4
+
5
+ def initialize(options)
6
+ @options = options
7
+ @corpus = []
8
+ @word_list = {}
9
+ end
10
+
11
+ def add_to_corpus(text)
12
+ @corpus << text.to_s
13
+ end
14
+
15
+ def process
16
+ words = {}
17
+ corpus.each do |text|
18
+ normalize_and_split(text).each do |word|
19
+ next if exclude_word?(word)
20
+ words.key?(word) ? words[word] += 1 : words[word] = 1
21
+ end
22
+ end
23
+ if options[:min_word_count]
24
+ words.delete_if { |word, count| count < options[:min_word_count].to_i }
25
+ end
26
+ sorted_words = words.sort_by { |word, count| count }.reverse
27
+ if options[:word_cap]
28
+ sorted_words = sorted_words.take(options[:word_cap].to_i)
29
+ end
30
+ @word_list = sorted_words
31
+ end
32
+
33
+ private
34
+
35
+ def exclusion_list
36
+ if !@exclusion_list
37
+ @exclusion_list = options[:exclude_words] || []
38
+ if options[:stopwords_file] && options[:exclude_stopwords]
39
+ @exclusion_list += File.read(options[:stopwords_file]).split("\n").map do |w|
40
+ w.strip.downcase
41
+ end
42
+ end
43
+ if options[:common_words_file] && options[:exclude_common_words]
44
+ @exclusion_list += File.read(options[:common_words_file]).split("\n").map do |w|
45
+ w.strip.downcase
46
+ end
47
+ end
48
+ end
49
+ @exclusion_list
50
+ end
51
+
52
+ def normalize_and_split(text)
53
+ text = text.downcase.strip.gsub(/https?:\/\/[\S]+/, "").gsub(/[^0-9a-z@#_ ]/i, " ")
54
+ text.split(" ").map(&:strip)
55
+ end
56
+
57
+ def exclude_word?(word)
58
+ return true if word.empty?
59
+ return true if options[:min_word_length] && word.length < options[:min_word_length]
60
+ return true if options[:exclude_hashtags] && word.start_with?("#")
61
+ return true if options[:exclude_mentions] && word.start_with?("@")
62
+ return true if exclusion_list.include?(word)
63
+ false
64
+ end
65
+ end
66
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: birdwatcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Henrikesn
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-10-22 00:00:00.000000000 Z
11
+ date: 2016-11-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel
@@ -337,6 +337,7 @@ files:
337
337
  - lib/birdwatcher/commands/set.rb
338
338
  - lib/birdwatcher/commands/shell.rb
339
339
  - lib/birdwatcher/commands/show.rb
340
+ - lib/birdwatcher/commands/spool.rb
340
341
  - lib/birdwatcher/commands/status.rb
341
342
  - lib/birdwatcher/commands/unset.rb
342
343
  - lib/birdwatcher/commands/use.rb
@@ -348,6 +349,7 @@ files:
348
349
  - lib/birdwatcher/concerns/persistence.rb
349
350
  - lib/birdwatcher/concerns/presentation.rb
350
351
  - lib/birdwatcher/concerns/util.rb
352
+ - lib/birdwatcher/concerns/word_list.rb
351
353
  - lib/birdwatcher/configuration.rb
352
354
  - lib/birdwatcher/configuration_wizard.rb
353
355
  - lib/birdwatcher/console.rb
@@ -360,6 +362,7 @@ files:
360
362
  - lib/birdwatcher/modules/statuses/kml.rb
361
363
  - lib/birdwatcher/modules/statuses/sentiment.rb
362
364
  - lib/birdwatcher/modules/statuses/word_cloud.rb
365
+ - lib/birdwatcher/modules/statuses/word_list.rb
363
366
  - lib/birdwatcher/modules/urls/crawl.rb
364
367
  - lib/birdwatcher/modules/urls/most_shared.rb
365
368
  - lib/birdwatcher/modules/users/activity_plot.rb
@@ -373,6 +376,7 @@ files:
373
376
  - lib/birdwatcher/punchcard.rb
374
377
  - lib/birdwatcher/util.rb
375
378
  - lib/birdwatcher/version.rb
379
+ - lib/birdwatcher/word_list.rb
376
380
  - models/hashtag.rb
377
381
  - models/influencee.rb
378
382
  - models/influencer.rb