RubyGems - twitterscraper-ruby - Versions diffs - 0.7.0 → 0.8.0 - Mend

twitterscraper-ruby 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9cfd03782734642da8ac29839788f142399d2a3f4ec601e8b6f47ae1ca38c17f
-  data.tar.gz: 07a398e51fd2fbdc735ae27008d9a23e97dc390632179738045db4c81bd4fcad
+  metadata.gz: 5a0a2d55fac0a72e83d696c088daa6ca84b7b13519fbbe7a259dd1979373039a
+  data.tar.gz: a6cf2a0793f05d03d8d9b489eba985a244c7dce9f70e935d03207a7e103d6365
 SHA512:
-  metadata.gz: 6f417fe3379a3d9d134c308a9ea9d4e01b458018c9c5a3f8508a85e7f5890d01991838cfcabe87b8246f69edf4458c66d17924359798017907862071353f643d
-  data.tar.gz: 758bcb55ded936c3696f99647f64bc9921386b3cb0c783c218510c0e36991ae6b95a9d08fa071e02072c8b727bbadb6674ceeb19a74e356a842d62c1ec4c038f
+  metadata.gz: 3b4ca939b22a48fc53e1c1cb9ea25f55cdd6f8a53eb26fa1733948a8df44cd46fa51884668a70bbc31e85c4b986172d23995633557644b5ea93d7640b4034cf9
+  data.tar.gz: 9b1d61933990c916734fc6722bc12e6fdda513c4532edcb86982feabc30dabeaa13f39db03c8555fb8ddaa2aafc0493cb88069fbc374515737ed1465522f153b

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    twitterscraper-ruby (0.7.0)
+    twitterscraper-ruby (0.8.0)
       nokogiri
       parallel

data/README.md CHANGED

@@ -93,6 +93,9 @@ $ cat tweets.json | jq . | less
 - user_id
 - screen_name
 - name
+- links
+- hashtags
+- image_urls
 - tweet_url
 - created_at

data/lib/twitterscraper/tweet.rb CHANGED

@@ -2,7 +2,27 @@ require 'time'
 module Twitterscraper
   class Tweet
-    KEYS = [:screen_name, :name, :user_id, :tweet_id, :tweet_url, :created_at, :text]
+    KEYS = [
+        :screen_name,
+        :name,
+        :user_id,
+        :tweet_id,
+        :text,
+        :links,
+        :hashtags,
+        :image_urls,
+        :video_url,
+        :has_media,
+        :likes,
+        :retweets,
+        :replies,
+        :is_replied,
+        :is_reply_to,
+        :parent_tweet_id,
+        :reply_to_users,
+        :tweet_url,
+        :created_at,
+    ]
     attr_reader *KEYS
     def initialize(attrs)
@@ -31,15 +51,50 @@ module Twitterscraper
       def from_tweet_html(html)
         inner_html = Nokogiri::HTML(html.inner_html)
-        timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i
+        tweet_id = html.attr('data-tweet-id').to_i
+        text = inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text
+        links = inner_html.xpath("//a[@class[contains(., 'twitter-timeline-link')]]").map { |elem| elem.attr('data-expanded-url') }.select { |link| link && !link.include?('pic.twitter') }
+        image_urls = inner_html.xpath("//div[@class[contains(., 'AdaptiveMedia-photoContainer')]]").map { |elem| elem.attr('data-image-url') }
+        video_url = inner_html.xpath("//div[@class[contains(., 'PlayableMedia-container')]]/a").map { |elem| elem.attr('href') }[0]
+        has_media = !image_urls.empty? || (video_url && !video_url.empty?)
+        actions = inner_html.xpath("//div[@class[contains(., 'ProfileTweet-actionCountList')]]")
+        likes = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--favorite')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
+        retweets = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--retweet')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
+        replies = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--reply u-hiddenVisually')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
+        is_replied = replies != 0
+        parent_tweet_id = inner_html.xpath('//*[@data-conversation-id]').first.attr('data-conversation-id').to_i
+        if tweet_id == parent_tweet_id
+          is_reply_to = false
+          parent_tweet_id = nil
+          reply_to_users = []
+        else
+          is_reply_to = true
+          reply_to_users = inner_html.xpath("//div[@class[contains(., 'ReplyingToContextBelowAuthor')]]/a").map { |user| {screen_name: user.text.delete_prefix('@'), user_id: user.attr('data-user-id')} }
+        end
+        timestamp = inner_html.xpath("//span[@class[contains(., 'ProfileTweet-action--favorite')]]").first.attr('data-time').to_i
         new(
             screen_name: html.attr('data-screen-name'),
             name: html.attr('data-name'),
             user_id: html.attr('data-user-id').to_i,
-            tweet_id: html.attr('data-tweet-id').to_i,
+            tweet_id: tweet_id,
+            text: text,
+            links: links,
+            hashtags: text.scan(/#\w+/).map { |tag| tag.delete_prefix('#') },
+            image_urls: image_urls,
+            video_url: video_url,
+            has_media: has_media,
+            likes: likes,
+            retweets: retweets,
+            replies: replies,
+            is_replied: is_replied,
+            is_reply_to: is_reply_to,
+            parent_tweet_id: parent_tweet_id,
+            reply_to_users: reply_to_users,
             tweet_url: 'https://twitter.com' + html.attr('data-permalink-path'),
             created_at: Time.at(timestamp, in: '+00:00'),
-            text: inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text,
         )
       end
     end

data/lib/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Twitterscraper
-  VERSION = '0.7.0'
+  VERSION = '0.8.0'
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: twitterscraper-ruby
 version: !ruby/object:Gem::Version
-  version: 0.7.0
+  version: 0.8.0
 platform: ruby
 authors:
 - ts-3156