tweet-words 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/tweet-words.rb +122 -0
- metadata +46 -0
data/lib/tweet-words.rb
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
# The program takes a username and number of latest tweets
|
2
|
+
# to be considered. It then reads the latest 'n' tweets for user
|
3
|
+
# and extracts all the words from them.
|
4
|
+
# The program finally sorts the words in order of frequency of occurrence and prints them.
|
5
|
+
#
|
6
|
+
# Author:: Himani Ahuja
|
7
|
+
# Email:: himani.ahuja@sv.cmu.edu
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'rubygems'
|
11
|
+
require 'httparty'
|
12
|
+
|
13
|
+
class TweetWords
|
14
|
+
include HTTParty
|
15
|
+
format :json
|
16
|
+
|
17
|
+
# This method takes the parameters of the userName and number of tweets to be parsed
|
18
|
+
# and calls the methods to get the tweets and extract words from them. It then sorts
|
19
|
+
# the words in decreasing order of frequencies and prints the word list.
|
20
|
+
|
21
|
+
def self.topWords(userName, maximumNumberOfTweets)
|
22
|
+
|
23
|
+
tweets = topTweets(userName, maximumNumberOfTweets)
|
24
|
+
wordSet = findTopWords(tweets)
|
25
|
+
|
26
|
+
puts "Top words in descending order of frequency: "
|
27
|
+
|
28
|
+
wordSet.sort_by { |word, count| count }.reverse.each do |wrd|
|
29
|
+
puts "#{wrd[0]}"
|
30
|
+
|
31
|
+
end
|
32
|
+
return ' '
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# This method gets top 'n' tweets from Twitter as requested by the user.
|
37
|
+
# Twitter API only returns 20 most recent statuses posted by the user. Thus, this method
|
38
|
+
# invokes the request multiple times until the requested number is reached.
|
39
|
+
|
40
|
+
def self.topTweets(screen_name, maxNumber)
|
41
|
+
|
42
|
+
count = 0
|
43
|
+
id = 0
|
44
|
+
response = Array.new(1000)
|
45
|
+
|
46
|
+
begin
|
47
|
+
|
48
|
+
responseFromTwitter = getTweets(screen_name, id)
|
49
|
+
|
50
|
+
#responseFromTwitter.parsed_response.each_with_index do |elem, index|
|
51
|
+
responseFromTwitter.parsed_response.each do |elem|
|
52
|
+
|
53
|
+
response[count] = elem['text']
|
54
|
+
count = count + 1
|
55
|
+
|
56
|
+
id = Integer(elem['id']) - 1 # storing max_id to be used in subsequent calls
|
57
|
+
|
58
|
+
if count >=maxNumber
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end while count < maxNumber
|
63
|
+
|
64
|
+
return response
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
# This method gets top tweets from Twitter by making an API call via HTTParty.
|
69
|
+
# The parameters passed are the 'screen_name' which is screen name of the user
|
70
|
+
# for whom to return results for and 'max_id' which returns results with an ID
|
71
|
+
# less than (that is, older than) or equal to the specified ID. The method returns
|
72
|
+
# the top 20 tweets as 'responseTweets'.
|
73
|
+
|
74
|
+
def self.getTweets(screen_name, max=0)
|
75
|
+
|
76
|
+
parameters = {:screen_name => screen_name }
|
77
|
+
|
78
|
+
if max > 0
|
79
|
+
parameters[:max_id] = max
|
80
|
+
end
|
81
|
+
|
82
|
+
responseTweets = get('http://api.twitter.com/1/statuses/user_timeline.json', :query => parameters )
|
83
|
+
return responseTweets
|
84
|
+
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# This method splits the tweet strings into words, counts each word's frequency and returns the word set.
|
89
|
+
|
90
|
+
def self.findTopWords(strArray)
|
91
|
+
|
92
|
+
countOfWords = Hash.new
|
93
|
+
|
94
|
+
strArray.each do |str|
|
95
|
+
if str.nil?
|
96
|
+
next
|
97
|
+
end
|
98
|
+
|
99
|
+
# strips the string
|
100
|
+
strippedString = str.gsub(/[^0-9A-Za-z]/, ' ')
|
101
|
+
|
102
|
+
# splits the string into words
|
103
|
+
splitString = strippedString.split(" ")
|
104
|
+
|
105
|
+
# count frequency of words.
|
106
|
+
splitString.each do |word|
|
107
|
+
if countOfWords.has_key?(word)
|
108
|
+
countOfWords[word] += 1
|
109
|
+
else
|
110
|
+
countOfWords[word] = 1
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
return countOfWords
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
# Sample invocation of the class for a particular user. Uncomment and run.
|
120
|
+
# TweetWords.topWords('justinbieber', 1000)
|
121
|
+
|
122
|
+
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tweet-words
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Himani Ahuja
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-06-08 00:00:00.000000000Z
|
13
|
+
dependencies: []
|
14
|
+
description: This is the gem to sort and print the words in a given twitter user’s
|
15
|
+
last 'n' tweets sorted by frequency of use.
|
16
|
+
email: himani.ahuja@sv.cmu.edu
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- lib/tweet-words.rb
|
22
|
+
homepage: http://rubygems.org/gems/tweet-words
|
23
|
+
licenses: []
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
none: false
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
requirements: []
|
41
|
+
rubyforge_project:
|
42
|
+
rubygems_version: 1.8.10
|
43
|
+
signing_key:
|
44
|
+
specification_version: 3
|
45
|
+
summary: Top words in a persons top 'n' tweets
|
46
|
+
test_files: []
|