tweet-words 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/tweet-words.rb +122 -0
  2. metadata +46 -0
@@ -0,0 +1,122 @@
1
+ # The program takes a username and number of latest tweets
2
+ # to be considered. It then reads the latest 'n' tweets for user
3
+ # and extracts all the words from them.
4
+ # The program finally sorts the words in order of frequency of occurrence and prints them.
5
+ #
6
+ # Author:: Himani Ahuja
7
+ # Email:: himani.ahuja@sv.cmu.edu
8
+ #
9
+
10
+ require 'rubygems'
11
+ require 'httparty'
12
+
13
+ class TweetWords
14
+ include HTTParty
15
+ format :json
16
+
17
+ # This method takes the parameters of the userName and number of tweets to be parsed
18
+ # and calls the methods to get the tweets and extract words from them. It then sorts
19
+ # the words in decreasing order of frequencies and prints the word list.
20
+
21
+ def self.topWords(userName, maximumNumberOfTweets)
22
+
23
+ tweets = topTweets(userName, maximumNumberOfTweets)
24
+ wordSet = findTopWords(tweets)
25
+
26
+ puts "Top words in descending order of frequency: "
27
+
28
+ wordSet.sort_by { |word, count| count }.reverse.each do |wrd|
29
+ puts "#{wrd[0]}"
30
+
31
+ end
32
+ return ' '
33
+ end
34
+
35
+
36
+ # This method gets top 'n' tweets from Twitter as requested by the user.
37
+ # Twitter API only returns 20 most recent statuses posted by the user. Thus, this method
38
+ # invokes the request multiple times until the requested number is reached.
39
+
40
+ def self.topTweets(screen_name, maxNumber)
41
+
42
+ count = 0
43
+ id = 0
44
+ response = Array.new(1000)
45
+
46
+ begin
47
+
48
+ responseFromTwitter = getTweets(screen_name, id)
49
+
50
+ #responseFromTwitter.parsed_response.each_with_index do |elem, index|
51
+ responseFromTwitter.parsed_response.each do |elem|
52
+
53
+ response[count] = elem['text']
54
+ count = count + 1
55
+
56
+ id = Integer(elem['id']) - 1 # storing max_id to be used in subsequent calls
57
+
58
+ if count >=maxNumber
59
+ break
60
+ end
61
+ end
62
+ end while count < maxNumber
63
+
64
+ return response
65
+ end
66
+
67
+
68
+ # This method gets top tweets from Twitter by making an API call via HTTParty.
69
+ # The parameters passed are the 'screen_name' which is screen name of the user
70
+ # for whom to return results for and 'max_id' which returns results with an ID
71
+ # less than (that is, older than) or equal to the specified ID. The method returns
72
+ # the top 20 tweets as 'responseTweets'.
73
+
74
+ def self.getTweets(screen_name, max=0)
75
+
76
+ parameters = {:screen_name => screen_name }
77
+
78
+ if max > 0
79
+ parameters[:max_id] = max
80
+ end
81
+
82
+ responseTweets = get('http://api.twitter.com/1/statuses/user_timeline.json', :query => parameters )
83
+ return responseTweets
84
+
85
+ end
86
+
87
+
88
+ # This method splits the tweet strings into words, counts each word's frequency and returns the word set.
89
+
90
+ def self.findTopWords(strArray)
91
+
92
+ countOfWords = Hash.new
93
+
94
+ strArray.each do |str|
95
+ if str.nil?
96
+ next
97
+ end
98
+
99
+ # strips the string
100
+ strippedString = str.gsub(/[^0-9A-Za-z]/, ' ')
101
+
102
+ # splits the string into words
103
+ splitString = strippedString.split(" ")
104
+
105
+ # count frequency of words.
106
+ splitString.each do |word|
107
+ if countOfWords.has_key?(word)
108
+ countOfWords[word] += 1
109
+ else
110
+ countOfWords[word] = 1
111
+ end
112
+ end
113
+ end
114
+ return countOfWords
115
+ end
116
+ end
117
+
118
+
119
+ # Sample invocation of the class for a particular user. Uncomment and run.
120
+ # TweetWords.topWords('justinbieber', 1000)
121
+
122
+
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tweet-words
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Himani Ahuja
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-08 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: This is the gem to sort and print the words in a given twitter user’s
15
+ last 'n' tweets sorted by frequency of use.
16
+ email: himani.ahuja@sv.cmu.edu
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/tweet-words.rb
22
+ homepage: http://rubygems.org/gems/tweet-words
23
+ licenses: []
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 1.8.10
43
+ signing_key:
44
+ specification_version: 3
45
+ summary: Top words in a persons top 'n' tweets
46
+ test_files: []