twexicon 0.1.7.1 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/twexicon/analyzer.rb +11 -7
- data/lib/twexicon/scraper.rb +4 -4
- data/lib/twexicon/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 166ba87d5e9f7c5039ffc03a8a9b74aaf25d6599
|
4
|
+
data.tar.gz: c3f0f992b626d751057db8850589fec2c5fdb47f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a575dc49c0ee39ee40b0501cc65d1d47d7567ea035cac6a2c02ebeb26712be09edeeaffc893c2f13c009bed0bfbf2960ccb38df66d0923e1b6fa1944a9d57bd0
|
7
|
+
data.tar.gz: 521809c9442611e804c9b36d4affa9534f7c002a8089011eb5c200757914f456e8a50e64c4b7450380b1850ffb4fc5fbe12836275c67df6b1ef267ad16b88624
|
data/lib/twexicon/analyzer.rb
CHANGED
@@ -13,7 +13,7 @@ class Twexicon::Analyzer
|
|
13
13
|
until quit
|
14
14
|
@input = nil
|
15
15
|
@int = ""
|
16
|
-
puts "
|
16
|
+
puts "\nWhat would you like to do next? For options, type 'help'."
|
17
17
|
until is_valid?
|
18
18
|
@input = gets.strip.gsub(/\W/, "").downcase
|
19
19
|
end
|
@@ -69,14 +69,16 @@ class Twexicon::Analyzer
|
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
72
|
-
#
|
72
|
+
# Words from https://en.wikipedia.org/wiki/Most_common_words_in_English –– Left out some common yet still interesting ones (People, Good, Think, Work, First, One, Two, Want, New, Give, Know)
|
73
73
|
COMMON_WORDS = ["A", "About", "An", "And", "Are", "As", "At", "Be", "Been", "But", "By", "For", "From", "Get", "Had", "Has", "Have", "In", "Into", "Is", "It", "It's", "Its", "Just", "Not", "Of", "On", "Or", "Say", "So", "Some", "That", "The", "There", "These", "This", "Those", "To", "Up", "With", "I", "My", "Your", "They", "He", "You", "Do", "His", "We", "Her", "She", "Will", "All", "Would", "Their", "What", "Out", "If", "Who", "Which", "Go", "Me", "When", "Make", "Can", "Time", "No", "Him", "Take", "Year", "Could", "Them", "See", "Other", "Than", "Then", "Now", "Look", "Only", "Come", "Over", "Also", "Back", "After", "Use", "How", "Our", "Well", "Way", "Even", "Because", "Any", "Day", "Most", "Us",
|
74
74
|
# other additions
|
75
|
-
"Wouldn't", "Couldn't", "Shouldn't", "Mustn't", "Would've", "Could've", "Should've", "Must've", "Hadn't", "Wasn't", "Weren't", "Ain't", "Aint", "Here", "Seem", "Seems", "That's", "Took", "Much", "More", "You're", "We're", "We've", "I've", "I'm",
|
75
|
+
"Wouldn't", "Couldn't", "Shouldn't", "Mustn't", "Would've", "Could've", "Should've", "Must've", "Hadn't", "Wasn't", "Weren't", "Ain't", "Aint", "Here", "Seem", "Seems", "That's", "Took", "Much", "More", "You're", "We're", "We've", "I've", "I'm", "Don't", "Got", "Soon",
|
76
76
|
# contraction endings until I fix the parsing error
|
77
77
|
"Re", "Ll", "Ve",
|
78
78
|
# Letters until I fix the contraction parsing error
|
79
|
-
"B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"
|
79
|
+
"B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
|
80
|
+
# Common words in other languages that *aren't* uncommon English words (so, "Con" doesn't count)
|
81
|
+
"En"]
|
80
82
|
|
81
83
|
def get_words
|
82
84
|
words = {}
|
@@ -93,15 +95,17 @@ class Twexicon::Analyzer
|
|
93
95
|
end
|
94
96
|
words.each do |w, n| # Create strings for the words that occur >1 times
|
95
97
|
case n
|
96
|
-
when 1..9 then word_array << "
|
97
|
-
when 10..99 then word_array << "#{n}x #{w}"
|
98
|
+
when 1..9 then word_array << "000#{n}x #{w}"
|
99
|
+
when 10..99 then word_array << "00#{n}x #{w}"
|
100
|
+
when 100..999 then word_array << "0#{n}x #{w}"
|
101
|
+
when 1000..9999 then word_array << "#{n}x #{w}"
|
98
102
|
end
|
99
103
|
end
|
100
104
|
if word_array.empty?
|
101
105
|
puts "\nIt appears that @#{username} is not much of a talker."
|
102
106
|
else
|
103
107
|
puts "\n@#{username}'s current favorite word(s):"
|
104
|
-
puts word_array.sort.reverse.take(10)
|
108
|
+
puts word_array.sort.reverse.each{|w| w.sub!(/^0+/, "")}.take(10)
|
105
109
|
end
|
106
110
|
end
|
107
111
|
|
data/lib/twexicon/scraper.rb
CHANGED
@@ -14,12 +14,12 @@ class Twexicon::Scraper
|
|
14
14
|
tweets.each do |num, tweet|
|
15
15
|
t = tweet.keys[0].dup
|
16
16
|
t.scan(/pic.twitter.com\/\w{10}/){|p| tweet.values[0][:pix] << p.strip}.gsub!(/pic.twitter.com\/\w{10}/, " ")
|
17
|
-
t.scan(/https?:\/\/[\w
|
17
|
+
t.scan(/https?:\/\/[\w\.\?\=\&\-\/\#\:]+/){|w| tweet.values[0][:links] << w.strip}.gsub!(/https?:\/\/[\w\.\?\=\&\-\/\#\:]+/, " ")
|
18
18
|
t.scan(/#\w+/){|h| tweet.values[0][:hashtags] << h.gsub(/\W/, "").prepend("#")}.gsub!(/#\w+/, " ")
|
19
19
|
t.scan(/@\w+/){|u| tweet.values[0][:usernames] << u.gsub(/\W/, "").prepend("@")}.gsub!(/@\w+/, " ")
|
20
|
-
t.scan(/(\d+[:\.\
|
21
|
-
t.scan(/(\b[A-Z][\.\
|
22
|
-
t.scan(/(([A-Z]
|
20
|
+
t.scan(/(\d+[:\.\ ]?\d*)+/){|n| tweet.values[0][:numbers] << n.first.gsub(/(^\W+|\W+$)/, "")}.gsub!(/(\d+[:\.\ ]?\d*)+/, " ")
|
21
|
+
t.scan(/(\b[A-Z][\.\ ][A-Z][\.\ ][A-Z][\.\ ]|\b[A-Z][\.\ ][A-Z][\.\ ])/){|a| tweet.values[0][:acronyms] << a.first.strip}.gsub!(/(\b[A-Z][\.\ ][A-Z][\.\ ][A-Z][\.\ ]|\b[A-Z][\.\ ][A-Z][\.\ ])/, " ")
|
22
|
+
t.scan(/(([A-Z]+[\s\,\&\:\-]+){2,}|[A-Z]{4,}\W)/){|s| tweet.values[0][:shouts] << s.first.gsub(/\W/, " ").strip}.gsub!(/(([A-Z]+[\s\,\&\:\-]+){2,}|[A-Z]{4,}\W)/, " ")
|
23
23
|
t.scan(/\b[A-Z]{2,3}\b/){|a| tweet.values[0][:acronyms] << a.strip}.gsub!(/\b[A-Z]{2,3}\b/, " ")
|
24
24
|
t.scan(/\w+['\/]?\w*/){|w| tweet.values[0][:words] << w.strip}.gsub!(/\w+['\/]?\w*/, " ")
|
25
25
|
end
|
data/lib/twexicon/version.rb
CHANGED