twittercrawler 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/twitter_parser.rb +25 -17
- data/lib/twittercrawler.rb +0 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5079bdd6191c5c0e0fa991e9d0360cc51b5af1c
|
4
|
+
data.tar.gz: f4de3c927a9b746ac259b8091ca01783d7410285
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae97b80fac20b1e3faead7e5df3547323851331b9b10ca8f8464f1c2e08a92ed6de84d3b8b9c6eddfcdf3e696e597b584a1f82bd3d4d0d95685e52a5e3b79377
|
7
|
+
data.tar.gz: f2ae34c5a5894f035317cce55e125cbc1faf231164123c7df469d6d03bac658e269897ac77ba67e4a5a7519258ce711f0d508b6f01565339db3c6c89c34bcfbc
|
data/lib/twitter_parser.rb
CHANGED
@@ -19,8 +19,11 @@ class TwitterParser
|
|
19
19
|
profile_pic: get_profile_pic,
|
20
20
|
hashtags: get_hashtags,
|
21
21
|
mentioned_urls: get_mentioned_urls,
|
22
|
+
conversation_id: get_conversation_id,
|
23
|
+
is_reply_to: get_is_reply_to,
|
22
24
|
reply_to_user: get_reply_to_user[0],
|
23
25
|
reply_to_uid: get_reply_to_user[1],
|
26
|
+
tweet_id: get_tweet_id,
|
24
27
|
tweet_time: get_tweet_time,
|
25
28
|
tweet_link: get_tweet_link,
|
26
29
|
retweet_count: get_retweet_count,
|
@@ -34,34 +37,31 @@ class TwitterParser
|
|
34
37
|
end
|
35
38
|
end
|
36
39
|
|
37
|
-
# Get
|
40
|
+
# Get URL to the profile pic
|
38
41
|
def get_profile_pic
|
39
42
|
@tweet.css("img.avatar")[0]['src']
|
40
43
|
end
|
41
44
|
|
42
|
-
# Get
|
45
|
+
# Get URLS in the tweet
|
43
46
|
def get_mentioned_urls
|
44
47
|
tweet = get_tweet_text
|
45
48
|
return extract_urls(tweet)
|
46
49
|
end
|
47
50
|
|
48
|
-
# Get
|
51
|
+
# Get hashtags in the tweet
|
49
52
|
def get_hashtags
|
50
53
|
tweet = get_tweet_text
|
51
54
|
return extract_hashtags(tweet)
|
52
55
|
end
|
53
56
|
|
54
|
-
# Get the username
|
55
57
|
def get_username
|
56
|
-
@tweet.css(".
|
58
|
+
@tweet.css(".tweet")[0]["data-screen-name"]
|
57
59
|
end
|
58
60
|
|
59
|
-
# Get the fullname
|
60
61
|
def get_fullname
|
61
62
|
@tweet.css(".fullname").text
|
62
63
|
end
|
63
64
|
|
64
|
-
# Get user ID number
|
65
65
|
def get_user_id
|
66
66
|
@tweet.css(".js-user-profile-link").css(".account-group")[0]["data-user-id"]
|
67
67
|
end
|
@@ -71,36 +71,44 @@ class TwitterParser
|
|
71
71
|
@tweet.css(".js-tweet-text-container").text.lstrip.strip
|
72
72
|
end
|
73
73
|
|
74
|
-
# Get the time
|
74
|
+
# Get the time of the tweet
|
75
75
|
def get_tweet_time
|
76
76
|
DateTime.parse(@tweet.css(".tweet-timestamp")[0]["title"]).strftime('%d %b %Y %H:%M:%S')
|
77
77
|
end
|
78
78
|
|
79
|
-
|
79
|
+
def get_tweet_id
|
80
|
+
@tweet.css(".tweet")[0]["data-tweet-id"]
|
81
|
+
end
|
82
|
+
|
80
83
|
def get_tweet_link
|
81
|
-
"https://twitter.com"+@tweet.css(".tweet
|
84
|
+
"https://twitter.com"+@tweet.css(".tweet")[0]["data-permalink-path"]
|
82
85
|
end
|
83
86
|
|
84
|
-
# Get the # of retweets
|
85
87
|
def get_retweet_count
|
86
88
|
@tweet.css(".ProfileTweet-action--retweet")[0].css("span")[0]['data-tweet-stat-count']
|
87
89
|
end
|
88
90
|
|
89
|
-
# Get the # of favorites
|
90
91
|
def get_favorite_count
|
91
92
|
@tweet.css(".ProfileTweet-action--favorite")[0].css("span")[0]['data-tweet-stat-count']
|
92
93
|
end
|
93
94
|
|
94
|
-
|
95
|
+
def get_conversation_id
|
96
|
+
@tweet.css(".tweet")[0]["data-conversation-id"]
|
97
|
+
end
|
98
|
+
|
99
|
+
def get_is_reply_to
|
100
|
+
@tweet.css(".tweet")[0]["data-is-reply-to"]
|
101
|
+
end
|
102
|
+
|
95
103
|
def get_reply_count
|
96
104
|
@tweet.css(".ProfileTweet-action--reply")[0].css("span")[0]['data-tweet-stat-count']
|
97
105
|
end
|
98
106
|
|
99
|
-
#
|
107
|
+
# The user of the tweet that is being replied to (if any)
|
100
108
|
def get_reply_to_user
|
101
109
|
reply_to = @tweet.css("span").select{|s| s.text.include?("In reply")}[0]
|
102
110
|
if reply_to
|
103
|
-
reply_to_user = reply_to.css("a")[0]['href'].gsub("/", "
|
111
|
+
reply_to_user = reply_to.css("a")[0]['href'].gsub("/", "")
|
104
112
|
reply_to_uid = reply_to.css("a")[0]['data-user-id']
|
105
113
|
return reply_to_user, reply_to_uid
|
106
114
|
else
|
@@ -108,11 +116,11 @@ class TwitterParser
|
|
108
116
|
end
|
109
117
|
end
|
110
118
|
|
111
|
-
# Get
|
119
|
+
# Get account names and uids that are mentioned
|
112
120
|
def get_mentions
|
113
121
|
mentions = @tweet.css(".twitter-atreply")
|
114
122
|
if !mentions.empty?
|
115
|
-
mention_names = mentions.map{|t| t.text}
|
123
|
+
mention_names = mentions.map{|t| t.css("b").text}
|
116
124
|
mention_uids = mentions.map{|t| t['data-mentioned-user-id']}
|
117
125
|
return mention_names, mention_uids
|
118
126
|
else
|
data/lib/twittercrawler.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twittercrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
|
+
- Brennan Novak
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
12
|
+
date: 2017-02-26 00:00:00.000000000 Z
|
12
13
|
dependencies: []
|
13
14
|
description: Crawls Twitter
|
14
15
|
email: shidash@shidash.com
|