twitterscraper-ruby 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9cfd03782734642da8ac29839788f142399d2a3f4ec601e8b6f47ae1ca38c17f
4
- data.tar.gz: 07a398e51fd2fbdc735ae27008d9a23e97dc390632179738045db4c81bd4fcad
3
+ metadata.gz: 5a0a2d55fac0a72e83d696c088daa6ca84b7b13519fbbe7a259dd1979373039a
4
+ data.tar.gz: a6cf2a0793f05d03d8d9b489eba985a244c7dce9f70e935d03207a7e103d6365
5
5
  SHA512:
6
- metadata.gz: 6f417fe3379a3d9d134c308a9ea9d4e01b458018c9c5a3f8508a85e7f5890d01991838cfcabe87b8246f69edf4458c66d17924359798017907862071353f643d
7
- data.tar.gz: 758bcb55ded936c3696f99647f64bc9921386b3cb0c783c218510c0e36991ae6b95a9d08fa071e02072c8b727bbadb6674ceeb19a74e356a842d62c1ec4c038f
6
+ metadata.gz: 3b4ca939b22a48fc53e1c1cb9ea25f55cdd6f8a53eb26fa1733948a8df44cd46fa51884668a70bbc31e85c4b986172d23995633557644b5ea93d7640b4034cf9
7
+ data.tar.gz: 9b1d61933990c916734fc6722bc12e6fdda513c4532edcb86982feabc30dabeaa13f39db03c8555fb8ddaa2aafc0493cb88069fbc374515737ed1465522f153b
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.7.0)
4
+ twitterscraper-ruby (0.8.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
data/README.md CHANGED
@@ -93,6 +93,9 @@ $ cat tweets.json | jq . | less
93
93
  - user_id
94
94
  - screen_name
95
95
  - name
96
+ - links
97
+ - hashtags
98
+ - image_urls
96
99
  - tweet_url
97
100
  - created_at
98
101
 
@@ -2,7 +2,27 @@ require 'time'
2
2
 
3
3
  module Twitterscraper
4
4
  class Tweet
5
- KEYS = [:screen_name, :name, :user_id, :tweet_id, :tweet_url, :created_at, :text]
5
+ KEYS = [
6
+ :screen_name,
7
+ :name,
8
+ :user_id,
9
+ :tweet_id,
10
+ :text,
11
+ :links,
12
+ :hashtags,
13
+ :image_urls,
14
+ :video_url,
15
+ :has_media,
16
+ :likes,
17
+ :retweets,
18
+ :replies,
19
+ :is_replied,
20
+ :is_reply_to,
21
+ :parent_tweet_id,
22
+ :reply_to_users,
23
+ :tweet_url,
24
+ :created_at,
25
+ ]
6
26
  attr_reader *KEYS
7
27
 
8
28
  def initialize(attrs)
@@ -31,15 +51,50 @@ module Twitterscraper
31
51
 
32
52
  def from_tweet_html(html)
33
53
  inner_html = Nokogiri::HTML(html.inner_html)
34
- timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i
54
+ tweet_id = html.attr('data-tweet-id').to_i
55
+ text = inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text
56
+ links = inner_html.xpath("//a[@class[contains(., 'twitter-timeline-link')]]").map { |elem| elem.attr('data-expanded-url') }.select { |link| link && !link.include?('pic.twitter') }
57
+ image_urls = inner_html.xpath("//div[@class[contains(., 'AdaptiveMedia-photoContainer')]]").map { |elem| elem.attr('data-image-url') }
58
+ video_url = inner_html.xpath("//div[@class[contains(., 'PlayableMedia-container')]]/a").map { |elem| elem.attr('href') }[0]
59
+ has_media = !image_urls.empty? || (video_url && !video_url.empty?)
60
+
61
+ actions = inner_html.xpath("//div[@class[contains(., 'ProfileTweet-actionCountList')]]")
62
+ likes = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--favorite')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
63
+ retweets = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--retweet')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
64
+ replies = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--reply u-hiddenVisually')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
65
+ is_replied = replies != 0
66
+
67
+ parent_tweet_id = inner_html.xpath('//*[@data-conversation-id]').first.attr('data-conversation-id').to_i
68
+ if tweet_id == parent_tweet_id
69
+ is_reply_to = false
70
+ parent_tweet_id = nil
71
+ reply_to_users = []
72
+ else
73
+ is_reply_to = true
74
+ reply_to_users = inner_html.xpath("//div[@class[contains(., 'ReplyingToContextBelowAuthor')]]/a").map { |user| {screen_name: user.text.delete_prefix('@'), user_id: user.attr('data-user-id')} }
75
+ end
76
+
77
+ timestamp = inner_html.xpath("//span[@class[contains(., 'ProfileTweet-action--favorite')]]").first.attr('data-time').to_i
35
78
  new(
36
79
  screen_name: html.attr('data-screen-name'),
37
80
  name: html.attr('data-name'),
38
81
  user_id: html.attr('data-user-id').to_i,
39
- tweet_id: html.attr('data-tweet-id').to_i,
82
+ tweet_id: tweet_id,
83
+ text: text,
84
+ links: links,
85
+ hashtags: text.scan(/#\w+/).map { |tag| tag.delete_prefix('#') },
86
+ image_urls: image_urls,
87
+ video_url: video_url,
88
+ has_media: has_media,
89
+ likes: likes,
90
+ retweets: retweets,
91
+ replies: replies,
92
+ is_replied: is_replied,
93
+ is_reply_to: is_reply_to,
94
+ parent_tweet_id: parent_tweet_id,
95
+ reply_to_users: reply_to_users,
40
96
  tweet_url: 'https://twitter.com' + html.attr('data-permalink-path'),
41
97
  created_at: Time.at(timestamp, in: '+00:00'),
42
- text: inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text,
43
98
  )
44
99
  end
45
100
  end
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.7.0'
2
+ VERSION = '0.8.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156