twitterscraper-ruby 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9cfd03782734642da8ac29839788f142399d2a3f4ec601e8b6f47ae1ca38c17f
4
- data.tar.gz: 07a398e51fd2fbdc735ae27008d9a23e97dc390632179738045db4c81bd4fcad
3
+ metadata.gz: 5a0a2d55fac0a72e83d696c088daa6ca84b7b13519fbbe7a259dd1979373039a
4
+ data.tar.gz: a6cf2a0793f05d03d8d9b489eba985a244c7dce9f70e935d03207a7e103d6365
5
5
  SHA512:
6
- metadata.gz: 6f417fe3379a3d9d134c308a9ea9d4e01b458018c9c5a3f8508a85e7f5890d01991838cfcabe87b8246f69edf4458c66d17924359798017907862071353f643d
7
- data.tar.gz: 758bcb55ded936c3696f99647f64bc9921386b3cb0c783c218510c0e36991ae6b95a9d08fa071e02072c8b727bbadb6674ceeb19a74e356a842d62c1ec4c038f
6
+ metadata.gz: 3b4ca939b22a48fc53e1c1cb9ea25f55cdd6f8a53eb26fa1733948a8df44cd46fa51884668a70bbc31e85c4b986172d23995633557644b5ea93d7640b4034cf9
7
+ data.tar.gz: 9b1d61933990c916734fc6722bc12e6fdda513c4532edcb86982feabc30dabeaa13f39db03c8555fb8ddaa2aafc0493cb88069fbc374515737ed1465522f153b
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- twitterscraper-ruby (0.7.0)
4
+ twitterscraper-ruby (0.8.0)
5
5
  nokogiri
6
6
  parallel
7
7
 
data/README.md CHANGED
@@ -93,6 +93,9 @@ $ cat tweets.json | jq . | less
93
93
  - user_id
94
94
  - screen_name
95
95
  - name
96
+ - links
97
+ - hashtags
98
+ - image_urls
96
99
  - tweet_url
97
100
  - created_at
98
101
 
@@ -2,7 +2,27 @@ require 'time'
2
2
 
3
3
  module Twitterscraper
4
4
  class Tweet
5
- KEYS = [:screen_name, :name, :user_id, :tweet_id, :tweet_url, :created_at, :text]
5
+ KEYS = [
6
+ :screen_name,
7
+ :name,
8
+ :user_id,
9
+ :tweet_id,
10
+ :text,
11
+ :links,
12
+ :hashtags,
13
+ :image_urls,
14
+ :video_url,
15
+ :has_media,
16
+ :likes,
17
+ :retweets,
18
+ :replies,
19
+ :is_replied,
20
+ :is_reply_to,
21
+ :parent_tweet_id,
22
+ :reply_to_users,
23
+ :tweet_url,
24
+ :created_at,
25
+ ]
6
26
  attr_reader *KEYS
7
27
 
8
28
  def initialize(attrs)
@@ -31,15 +51,50 @@ module Twitterscraper
31
51
 
32
52
  def from_tweet_html(html)
33
53
  inner_html = Nokogiri::HTML(html.inner_html)
34
- timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i
54
+ tweet_id = html.attr('data-tweet-id').to_i
55
+ text = inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text
56
+ links = inner_html.xpath("//a[@class[contains(., 'twitter-timeline-link')]]").map { |elem| elem.attr('data-expanded-url') }.select { |link| link && !link.include?('pic.twitter') }
57
+ image_urls = inner_html.xpath("//div[@class[contains(., 'AdaptiveMedia-photoContainer')]]").map { |elem| elem.attr('data-image-url') }
58
+ video_url = inner_html.xpath("//div[@class[contains(., 'PlayableMedia-container')]]/a").map { |elem| elem.attr('href') }[0]
59
+ has_media = !image_urls.empty? || (video_url && !video_url.empty?)
60
+
61
+ actions = inner_html.xpath("//div[@class[contains(., 'ProfileTweet-actionCountList')]]")
62
+ likes = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--favorite')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
63
+ retweets = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--retweet')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
64
+ replies = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--reply u-hiddenVisually')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
65
+ is_replied = replies != 0
66
+
67
+ parent_tweet_id = inner_html.xpath('//*[@data-conversation-id]').first.attr('data-conversation-id').to_i
68
+ if tweet_id == parent_tweet_id
69
+ is_reply_to = false
70
+ parent_tweet_id = nil
71
+ reply_to_users = []
72
+ else
73
+ is_reply_to = true
74
+ reply_to_users = inner_html.xpath("//div[@class[contains(., 'ReplyingToContextBelowAuthor')]]/a").map { |user| {screen_name: user.text.delete_prefix('@'), user_id: user.attr('data-user-id')} }
75
+ end
76
+
77
+ timestamp = inner_html.xpath("//span[@class[contains(., 'ProfileTweet-action--favorite')]]").first.attr('data-time').to_i
35
78
  new(
36
79
  screen_name: html.attr('data-screen-name'),
37
80
  name: html.attr('data-name'),
38
81
  user_id: html.attr('data-user-id').to_i,
39
- tweet_id: html.attr('data-tweet-id').to_i,
82
+ tweet_id: tweet_id,
83
+ text: text,
84
+ links: links,
85
+ hashtags: text.scan(/#\w+/).map { |tag| tag.delete_prefix('#') },
86
+ image_urls: image_urls,
87
+ video_url: video_url,
88
+ has_media: has_media,
89
+ likes: likes,
90
+ retweets: retweets,
91
+ replies: replies,
92
+ is_replied: is_replied,
93
+ is_reply_to: is_reply_to,
94
+ parent_tweet_id: parent_tweet_id,
95
+ reply_to_users: reply_to_users,
40
96
  tweet_url: 'https://twitter.com' + html.attr('data-permalink-path'),
41
97
  created_at: Time.at(timestamp, in: '+00:00'),
42
- text: inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text,
43
98
  )
44
99
  end
45
100
  end
@@ -1,3 +1,3 @@
1
1
  module Twitterscraper
2
- VERSION = '0.7.0'
2
+ VERSION = '0.8.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitterscraper-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ts-3156