twexicon 0.1.7.1 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/twexicon/analyzer.rb +11 -7
- data/lib/twexicon/scraper.rb +4 -4
- data/lib/twexicon/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 166ba87d5e9f7c5039ffc03a8a9b74aaf25d6599
|
4
|
+
data.tar.gz: c3f0f992b626d751057db8850589fec2c5fdb47f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a575dc49c0ee39ee40b0501cc65d1d47d7567ea035cac6a2c02ebeb26712be09edeeaffc893c2f13c009bed0bfbf2960ccb38df66d0923e1b6fa1944a9d57bd0
|
7
|
+
data.tar.gz: 521809c9442611e804c9b36d4affa9534f7c002a8089011eb5c200757914f456e8a50e64c4b7450380b1850ffb4fc5fbe12836275c67df6b1ef267ad16b88624
|
data/lib/twexicon/analyzer.rb
CHANGED
@@ -13,7 +13,7 @@ class Twexicon::Analyzer
|
|
13
13
|
until quit
|
14
14
|
@input = nil
|
15
15
|
@int = ""
|
16
|
-
puts "
|
16
|
+
puts "\nWhat would you like to do next? For options, type 'help'."
|
17
17
|
until is_valid?
|
18
18
|
@input = gets.strip.gsub(/\W/, "").downcase
|
19
19
|
end
|
@@ -69,14 +69,16 @@ class Twexicon::Analyzer
|
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
72
|
-
#
|
72
|
+
# Words from https://en.wikipedia.org/wiki/Most_common_words_in_English –– Left out some common yet still interesting ones (People, Good, Think, Work, First, One, Two, Want, New, Give, Know)
|
73
73
|
COMMON_WORDS = ["A", "About", "An", "And", "Are", "As", "At", "Be", "Been", "But", "By", "For", "From", "Get", "Had", "Has", "Have", "In", "Into", "Is", "It", "It's", "Its", "Just", "Not", "Of", "On", "Or", "Say", "So", "Some", "That", "The", "There", "These", "This", "Those", "To", "Up", "With", "I", "My", "Your", "They", "He", "You", "Do", "His", "We", "Her", "She", "Will", "All", "Would", "Their", "What", "Out", "If", "Who", "Which", "Go", "Me", "When", "Make", "Can", "Time", "No", "Him", "Take", "Year", "Could", "Them", "See", "Other", "Than", "Then", "Now", "Look", "Only", "Come", "Over", "Also", "Back", "After", "Use", "How", "Our", "Well", "Way", "Even", "Because", "Any", "Day", "Most", "Us",
|
74
74
|
# other additions
|
75
|
-
"Wouldn't", "Couldn't", "Shouldn't", "Mustn't", "Would've", "Could've", "Should've", "Must've", "Hadn't", "Wasn't", "Weren't", "Ain't", "Aint", "Here", "Seem", "Seems", "That's", "Took", "Much", "More", "You're", "We're", "We've", "I've", "I'm",
|
75
|
+
"Wouldn't", "Couldn't", "Shouldn't", "Mustn't", "Would've", "Could've", "Should've", "Must've", "Hadn't", "Wasn't", "Weren't", "Ain't", "Aint", "Here", "Seem", "Seems", "That's", "Took", "Much", "More", "You're", "We're", "We've", "I've", "I'm", "Don't", "Got", "Soon",
|
76
76
|
# contraction endings until I fix the parsing error
|
77
77
|
"Re", "Ll", "Ve",
|
78
78
|
# Letters until I fix the contraction parsing error
|
79
|
-
"B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"
|
79
|
+
"B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
|
80
|
+
# Common words in other languages that *aren't* uncommon English words (so, "Con" doesn't count)
|
81
|
+
"En"]
|
80
82
|
|
81
83
|
def get_words
|
82
84
|
words = {}
|
@@ -93,15 +95,17 @@ class Twexicon::Analyzer
|
|
93
95
|
end
|
94
96
|
words.each do |w, n| # Create strings for the words that occur >1 times
|
95
97
|
case n
|
96
|
-
when 1..9 then word_array << "
|
97
|
-
when 10..99 then word_array << "#{n}x #{w}"
|
98
|
+
when 1..9 then word_array << "000#{n}x #{w}"
|
99
|
+
when 10..99 then word_array << "00#{n}x #{w}"
|
100
|
+
when 100..999 then word_array << "0#{n}x #{w}"
|
101
|
+
when 1000..9999 then word_array << "#{n}x #{w}"
|
98
102
|
end
|
99
103
|
end
|
100
104
|
if word_array.empty?
|
101
105
|
puts "\nIt appears that @#{username} is not much of a talker."
|
102
106
|
else
|
103
107
|
puts "\n@#{username}'s current favorite word(s):"
|
104
|
-
puts word_array.sort.reverse.take(10)
|
108
|
+
puts word_array.sort.reverse.each{|w| w.sub!(/^0+/, "")}.take(10)
|
105
109
|
end
|
106
110
|
end
|
107
111
|
|
data/lib/twexicon/scraper.rb
CHANGED
@@ -14,12 +14,12 @@ class Twexicon::Scraper
|
|
14
14
|
tweets.each do |num, tweet|
|
15
15
|
t = tweet.keys[0].dup
|
16
16
|
t.scan(/pic.twitter.com\/\w{10}/){|p| tweet.values[0][:pix] << p.strip}.gsub!(/pic.twitter.com\/\w{10}/, " ")
|
17
|
-
t.scan(/https?:\/\/[\w
|
17
|
+
t.scan(/https?:\/\/[\w\.\?\=\&\-\/\#\:]+/){|w| tweet.values[0][:links] << w.strip}.gsub!(/https?:\/\/[\w\.\?\=\&\-\/\#\:]+/, " ")
|
18
18
|
t.scan(/#\w+/){|h| tweet.values[0][:hashtags] << h.gsub(/\W/, "").prepend("#")}.gsub!(/#\w+/, " ")
|
19
19
|
t.scan(/@\w+/){|u| tweet.values[0][:usernames] << u.gsub(/\W/, "").prepend("@")}.gsub!(/@\w+/, " ")
|
20
|
-
t.scan(/(\d+[:\.\
|
21
|
-
t.scan(/(\b[A-Z][\.\
|
22
|
-
t.scan(/(([A-Z]
|
20
|
+
t.scan(/(\d+[:\.\ ]?\d*)+/){|n| tweet.values[0][:numbers] << n.first.gsub(/(^\W+|\W+$)/, "")}.gsub!(/(\d+[:\.\ ]?\d*)+/, " ")
|
21
|
+
t.scan(/(\b[A-Z][\.\ ][A-Z][\.\ ][A-Z][\.\ ]|\b[A-Z][\.\ ][A-Z][\.\ ])/){|a| tweet.values[0][:acronyms] << a.first.strip}.gsub!(/(\b[A-Z][\.\ ][A-Z][\.\ ][A-Z][\.\ ]|\b[A-Z][\.\ ][A-Z][\.\ ])/, " ")
|
22
|
+
t.scan(/(([A-Z]+[\s\,\&\:\-]+){2,}|[A-Z]{4,}\W)/){|s| tweet.values[0][:shouts] << s.first.gsub(/\W/, " ").strip}.gsub!(/(([A-Z]+[\s\,\&\:\-]+){2,}|[A-Z]{4,}\W)/, " ")
|
23
23
|
t.scan(/\b[A-Z]{2,3}\b/){|a| tweet.values[0][:acronyms] << a.strip}.gsub!(/\b[A-Z]{2,3}\b/, " ")
|
24
24
|
t.scan(/\w+['\/]?\w*/){|w| tweet.values[0][:words] << w.strip}.gsub!(/\w+['\/]?\w*/, " ")
|
25
25
|
end
|
data/lib/twexicon/version.rb
CHANGED