tweet-words 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/tweet-words.rb +122 -0
  2. metadata +46 -0
@@ -0,0 +1,122 @@
1
+ # The program takes a username and number of latest tweets
2
+ # to be considered. It then reads the latest 'n' tweets for user
3
+ # and extracts all the words from them.
4
+ # The program finally sorts the words in order of frequency of occurrence and prints them.
5
+ #
6
+ # Author:: Himani Ahuja
7
+ # Email:: himani.ahuja@sv.cmu.edu
8
+ #
9
+
10
+ require 'rubygems'
11
+ require 'httparty'
12
+
13
+ class TweetWords
14
+ include HTTParty
15
+ format :json
16
+
17
+ # This method takes the parameters of the userName and number of tweets to be parsed
18
+ # and calls the methods to get the tweets and extract words from them. It then sorts
19
+ # the words in decreasing order of frequencies and prints the word list.
20
+
21
+ def self.topWords(userName, maximumNumberOfTweets)
22
+
23
+ tweets = topTweets(userName, maximumNumberOfTweets)
24
+ wordSet = findTopWords(tweets)
25
+
26
+ puts "Top words in descending order of frequency: "
27
+
28
+ wordSet.sort_by { |word, count| count }.reverse.each do |wrd|
29
+ puts "#{wrd[0]}"
30
+
31
+ end
32
+ return ' '
33
+ end
34
+
35
+
36
+ # This method gets top 'n' tweets from Twitter as requested by the user.
37
+ # Twitter API only returns 20 most recent statuses posted by the user. Thus, this method
38
+ # invokes the request multiple times until the requested number is reached.
39
+
40
+ def self.topTweets(screen_name, maxNumber)
41
+
42
+ count = 0
43
+ id = 0
44
+ response = Array.new(1000)
45
+
46
+ begin
47
+
48
+ responseFromTwitter = getTweets(screen_name, id)
49
+
50
+ #responseFromTwitter.parsed_response.each_with_index do |elem, index|
51
+ responseFromTwitter.parsed_response.each do |elem|
52
+
53
+ response[count] = elem['text']
54
+ count = count + 1
55
+
56
+ id = Integer(elem['id']) - 1 # storing max_id to be used in subsequent calls
57
+
58
+ if count >=maxNumber
59
+ break
60
+ end
61
+ end
62
+ end while count < maxNumber
63
+
64
+ return response
65
+ end
66
+
67
+
68
+ # This method gets top tweets from Twitter by making an API call via HTTParty.
69
+ # The parameters passed are the 'screen_name' which is screen name of the user
70
+ # for whom to return results for and 'max_id' which returns results with an ID
71
+ # less than (that is, older than) or equal to the specified ID. The method returns
72
+ # the top 20 tweets as 'responseTweets'.
73
+
74
+ def self.getTweets(screen_name, max=0)
75
+
76
+ parameters = {:screen_name => screen_name }
77
+
78
+ if max > 0
79
+ parameters[:max_id] = max
80
+ end
81
+
82
+ responseTweets = get('http://api.twitter.com/1/statuses/user_timeline.json', :query => parameters )
83
+ return responseTweets
84
+
85
+ end
86
+
87
+
88
+ # This method splits the tweet strings into words, counts each word's frequency and returns the word set.
89
+
90
+ def self.findTopWords(strArray)
91
+
92
+ countOfWords = Hash.new
93
+
94
+ strArray.each do |str|
95
+ if str.nil?
96
+ next
97
+ end
98
+
99
+ # strips the string
100
+ strippedString = str.gsub(/[^0-9A-Za-z]/, ' ')
101
+
102
+ # splits the string into words
103
+ splitString = strippedString.split(" ")
104
+
105
+ # count frequency of words.
106
+ splitString.each do |word|
107
+ if countOfWords.has_key?(word)
108
+ countOfWords[word] += 1
109
+ else
110
+ countOfWords[word] = 1
111
+ end
112
+ end
113
+ end
114
+ return countOfWords
115
+ end
116
+ end
117
+
118
+
119
+ # Sample invocation of the class for a particular user. Uncomment and run.
120
+ # TweetWords.topWords('justinbieber', 1000)
121
+
122
+
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tweet-words
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Himani Ahuja
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-08 00:00:00.000000000Z
13
+ dependencies: []
14
+ description: This is the gem to sort and print the words in a given twitter user’s
15
+ last 'n' tweets sorted by frequency of use.
16
+ email: himani.ahuja@sv.cmu.edu
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - lib/tweet-words.rb
22
+ homepage: http://rubygems.org/gems/tweet-words
23
+ licenses: []
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 1.8.10
43
+ signing_key:
44
+ specification_version: 3
45
+ summary: Top words in a persons top 'n' tweets
46
+ test_files: []