twittercrawler 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/twitter_parser.rb +25 -17
- data/lib/twittercrawler.rb +0 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c5079bdd6191c5c0e0fa991e9d0360cc51b5af1c
|
4
|
+
data.tar.gz: f4de3c927a9b746ac259b8091ca01783d7410285
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae97b80fac20b1e3faead7e5df3547323851331b9b10ca8f8464f1c2e08a92ed6de84d3b8b9c6eddfcdf3e696e597b584a1f82bd3d4d0d95685e52a5e3b79377
|
7
|
+
data.tar.gz: f2ae34c5a5894f035317cce55e125cbc1faf231164123c7df469d6d03bac658e269897ac77ba67e4a5a7519258ce711f0d508b6f01565339db3c6c89c34bcfbc
|
data/lib/twitter_parser.rb
CHANGED
@@ -19,8 +19,11 @@ class TwitterParser
|
|
19
19
|
profile_pic: get_profile_pic,
|
20
20
|
hashtags: get_hashtags,
|
21
21
|
mentioned_urls: get_mentioned_urls,
|
22
|
+
conversation_id: get_conversation_id,
|
23
|
+
is_reply_to: get_is_reply_to,
|
22
24
|
reply_to_user: get_reply_to_user[0],
|
23
25
|
reply_to_uid: get_reply_to_user[1],
|
26
|
+
tweet_id: get_tweet_id,
|
24
27
|
tweet_time: get_tweet_time,
|
25
28
|
tweet_link: get_tweet_link,
|
26
29
|
retweet_count: get_retweet_count,
|
@@ -34,34 +37,31 @@ class TwitterParser
|
|
34
37
|
end
|
35
38
|
end
|
36
39
|
|
37
|
-
# Get
|
40
|
+
# Get URL to the profile pic
|
38
41
|
def get_profile_pic
|
39
42
|
@tweet.css("img.avatar")[0]['src']
|
40
43
|
end
|
41
44
|
|
42
|
-
# Get
|
45
|
+
# Get URLS in the tweet
|
43
46
|
def get_mentioned_urls
|
44
47
|
tweet = get_tweet_text
|
45
48
|
return extract_urls(tweet)
|
46
49
|
end
|
47
50
|
|
48
|
-
# Get
|
51
|
+
# Get hashtags in the tweet
|
49
52
|
def get_hashtags
|
50
53
|
tweet = get_tweet_text
|
51
54
|
return extract_hashtags(tweet)
|
52
55
|
end
|
53
56
|
|
54
|
-
# Get the username
|
55
57
|
def get_username
|
56
|
-
@tweet.css(".
|
58
|
+
@tweet.css(".tweet")[0]["data-screen-name"]
|
57
59
|
end
|
58
60
|
|
59
|
-
# Get the fullname
|
60
61
|
def get_fullname
|
61
62
|
@tweet.css(".fullname").text
|
62
63
|
end
|
63
64
|
|
64
|
-
# Get user ID number
|
65
65
|
def get_user_id
|
66
66
|
@tweet.css(".js-user-profile-link").css(".account-group")[0]["data-user-id"]
|
67
67
|
end
|
@@ -71,36 +71,44 @@ class TwitterParser
|
|
71
71
|
@tweet.css(".js-tweet-text-container").text.lstrip.strip
|
72
72
|
end
|
73
73
|
|
74
|
-
# Get the time
|
74
|
+
# Get the time of the tweet
|
75
75
|
def get_tweet_time
|
76
76
|
DateTime.parse(@tweet.css(".tweet-timestamp")[0]["title"]).strftime('%d %b %Y %H:%M:%S')
|
77
77
|
end
|
78
78
|
|
79
|
-
|
79
|
+
def get_tweet_id
|
80
|
+
@tweet.css(".tweet")[0]["data-tweet-id"]
|
81
|
+
end
|
82
|
+
|
80
83
|
def get_tweet_link
|
81
|
-
"https://twitter.com"+@tweet.css(".tweet
|
84
|
+
"https://twitter.com"+@tweet.css(".tweet")[0]["data-permalink-path"]
|
82
85
|
end
|
83
86
|
|
84
|
-
# Get the # of retweets
|
85
87
|
def get_retweet_count
|
86
88
|
@tweet.css(".ProfileTweet-action--retweet")[0].css("span")[0]['data-tweet-stat-count']
|
87
89
|
end
|
88
90
|
|
89
|
-
# Get the # of favorites
|
90
91
|
def get_favorite_count
|
91
92
|
@tweet.css(".ProfileTweet-action--favorite")[0].css("span")[0]['data-tweet-stat-count']
|
92
93
|
end
|
93
94
|
|
94
|
-
|
95
|
+
def get_conversation_id
|
96
|
+
@tweet.css(".tweet")[0]["data-conversation-id"]
|
97
|
+
end
|
98
|
+
|
99
|
+
def get_is_reply_to
|
100
|
+
@tweet.css(".tweet")[0]["data-is-reply-to"]
|
101
|
+
end
|
102
|
+
|
95
103
|
def get_reply_count
|
96
104
|
@tweet.css(".ProfileTweet-action--reply")[0].css("span")[0]['data-tweet-stat-count']
|
97
105
|
end
|
98
106
|
|
99
|
-
#
|
107
|
+
# The user of the tweet that is being replied to (if any)
|
100
108
|
def get_reply_to_user
|
101
109
|
reply_to = @tweet.css("span").select{|s| s.text.include?("In reply")}[0]
|
102
110
|
if reply_to
|
103
|
-
reply_to_user = reply_to.css("a")[0]['href'].gsub("/", "
|
111
|
+
reply_to_user = reply_to.css("a")[0]['href'].gsub("/", "")
|
104
112
|
reply_to_uid = reply_to.css("a")[0]['data-user-id']
|
105
113
|
return reply_to_user, reply_to_uid
|
106
114
|
else
|
@@ -108,11 +116,11 @@ class TwitterParser
|
|
108
116
|
end
|
109
117
|
end
|
110
118
|
|
111
|
-
# Get
|
119
|
+
# Get account names and uids that are mentioned
|
112
120
|
def get_mentions
|
113
121
|
mentions = @tweet.css(".twitter-atreply")
|
114
122
|
if !mentions.empty?
|
115
|
-
mention_names = mentions.map{|t| t.text}
|
123
|
+
mention_names = mentions.map{|t| t.css("b").text}
|
116
124
|
mention_uids = mentions.map{|t| t['data-mentioned-user-id']}
|
117
125
|
return mention_names, mention_uids
|
118
126
|
else
|
data/lib/twittercrawler.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twittercrawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
|
+
- Brennan Novak
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
12
|
+
date: 2017-02-26 00:00:00.000000000 Z
|
12
13
|
dependencies: []
|
13
14
|
description: Crawls Twitter
|
14
15
|
email: shidash@shidash.com
|