RubyGems - twitterscraper-ruby - Versions diffs - 0.15.0 - Mend

twitterscraper-ruby 0.15.0

Files changed (31) hide show

checksums.yaml +7 -0
data/.circleci/config.yml +31 -0
data/.gitignore +10 -0
data/.irbrc +7 -0
data/.rspec +2 -0
data/.ruby-version +1 -0
data/.travis.yml +6 -0
data/CODE_OF_CONDUCT.md +74 -0
data/Gemfile +8 -0
data/Gemfile.lock +42 -0
data/LICENSE.txt +21 -0
data/README.md +174 -0
data/Rakefile +10 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/bin/twitterscraper +13 -0
data/lib/twitterscraper-ruby.rb +1 -0
data/lib/twitterscraper.rb +27 -0
data/lib/twitterscraper/cache.rb +69 -0
data/lib/twitterscraper/cli.rb +119 -0
data/lib/twitterscraper/client.rb +18 -0
data/lib/twitterscraper/http.rb +31 -0
data/lib/twitterscraper/lang.rb +40 -0
data/lib/twitterscraper/logger.rb +9 -0
data/lib/twitterscraper/proxy.rb +65 -0
data/lib/twitterscraper/query.rb +254 -0
data/lib/twitterscraper/template.rb +48 -0
data/lib/twitterscraper/tweet.rb +123 -0
data/lib/version.rb +3 -0
data/twitterscraper-ruby.gemspec +31 -0
metadata +104 -0

data/lib/twitterscraper/template.rb ADDED

@@ -0,0 +1,48 @@
+module Twitterscraper
+  module Template
+    module_function
+    def tweets_embedded_html(tweets)
+      tweets_html = tweets.map { |t| EMBED_TWEET_HTML.sub('__TWEET_URL__', t.tweet_url) }
+      EMBED_TWEETS_HTML.sub('__TWEETS__', tweets_html.join)
+    end
+    EMBED_TWEET_HTML = <<~'HTML'
+      <blockquote class="twitter-tweet">
+        <a href="__TWEET_URL__"></a>
+      </blockquote>
+    HTML
+    EMBED_TWEETS_HTML = <<~'HTML'
+      <html>
+        <head>
+          <style type=text/css>
+            .twitter-tweet {
+              margin: 30px auto 0 auto !important;
+            }
+          </style>
+          <script>
+            window.twttr = (function(d, s, id) {
+              var js, fjs = d.getElementsByTagName(s)[0], t = window.twttr || {};
+              if (d.getElementById(id)) return t;
+              js = d.createElement(s);
+              js.id = id;
+              js.src = "https://platform.twitter.com/widgets.js";
+              fjs.parentNode.insertBefore(js, fjs);
+              t._e = [];
+              t.ready = function(f) {
+                  t._e.push(f);
+              };
+              return t;
+            }(document, "script", "twitter-wjs"));
+          </script>
+        </head>
+        <body>
+          __TWEETS__
+        </body>
+      </html>
+    HTML
+  end
+end

data/lib/twitterscraper/tweet.rb ADDED

@@ -0,0 +1,123 @@
+require 'time'
+module Twitterscraper
+  class Tweet
+    KEYS = [
+        :screen_name,
+        :name,
+        :user_id,
+        :tweet_id,
+        :text,
+        :links,
+        :hashtags,
+        :image_urls,
+        :video_url,
+        :has_media,
+        :likes,
+        :retweets,
+        :replies,
+        :is_replied,
+        :is_reply_to,
+        :parent_tweet_id,
+        :reply_to_users,
+        :tweet_url,
+        :timestamp,
+        :created_at,
+    ]
+    attr_reader *KEYS
+    def initialize(attrs)
+      attrs.each do |key, value|
+        instance_variable_set("@#{key}", value)
+      end
+    end
+    def attrs
+      KEYS.map do |key|
+        [key, send(key)]
+      end.to_h
+    end
+    def to_json(options = {})
+      attrs.to_json
+    end
+    class << self
+      def from_json(text)
+        json = JSON.parse(text)
+        json.map do |tweet|
+          tweet['created_at'] = Time.parse(tweet['created_at'])
+          new(tweet)
+        end
+      end
+      def from_html(text)
+        html = Nokogiri::HTML(text)
+        from_tweets_html(html.xpath("//li[@class[contains(., 'js-stream-item')]]/div[@class[contains(., 'js-stream-tweet')]]"))
+      end
+      def from_tweets_html(html)
+        html.map do |tweet|
+          from_tweet_html(tweet)
+        end.compact
+      end
+      def from_tweet_html(html)
+        screen_name = html.attr('data-screen-name')
+        tweet_id = html.attr('data-tweet-id')&.to_i
+        unless html.to_s.include?('js-tweet-text-container')
+          Twitterscraper.logger.warn "html doesn't include div.js-tweet-text-container url=https://twitter.com/#{screen_name}/status/#{tweet_id}"
+          return nil
+        end
+        inner_html = Nokogiri::HTML(html.inner_html)
+        text = inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text
+        links = inner_html.xpath("//a[@class[contains(., 'twitter-timeline-link')]]").map { |elem| elem.attr('data-expanded-url') }.select { |link| link && !link.include?('pic.twitter') }
+        image_urls = inner_html.xpath("//div[@class[contains(., 'AdaptiveMedia-photoContainer')]]").map { |elem| elem.attr('data-image-url') }
+        video_url = inner_html.xpath("//div[@class[contains(., 'PlayableMedia-container')]]/a").map { |elem| elem.attr('href') }[0]
+        has_media = !image_urls.empty? || (video_url && !video_url.empty?)
+        actions = inner_html.xpath("//div[@class[contains(., 'ProfileTweet-actionCountList')]]")
+        likes = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--favorite')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
+        retweets = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--retweet')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
+        replies = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--reply u-hiddenVisually')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
+        is_replied = replies != 0
+        parent_tweet_id = inner_html.xpath('//*[@data-conversation-id]').first.attr('data-conversation-id').to_i
+        if tweet_id == parent_tweet_id
+          is_reply_to = false
+          parent_tweet_id = nil
+          reply_to_users = []
+        else
+          is_reply_to = true
+          reply_to_users = inner_html.xpath("//div[@class[contains(., 'ReplyingToContextBelowAuthor')]]/a").map { |user| {screen_name: user.text.delete_prefix('@'), user_id: user.attr('data-user-id')} }
+        end
+        timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i
+        new(
+            screen_name: screen_name,
+            name: html.attr('data-name'),
+            user_id: html.attr('data-user-id').to_i,
+            tweet_id: tweet_id,
+            text: text,
+            links: links,
+            hashtags: text.scan(/#\w+/).map { |tag| tag.delete_prefix('#') },
+            image_urls: image_urls,
+            video_url: video_url,
+            has_media: has_media,
+            likes: likes,
+            retweets: retweets,
+            replies: replies,
+            is_replied: is_replied,
+            is_reply_to: is_reply_to,
+            parent_tweet_id: parent_tweet_id,
+            reply_to_users: reply_to_users,
+            tweet_url: 'https://twitter.com' + html.attr('data-permalink-path'),
+            timestamp: timestamp,
+            created_at: Time.at(timestamp, in: '+00:00'),
+        )
+      end
+    end
+  end
+end

data/lib/version.rb ADDED

@@ -0,0 +1,3 @@
+module Twitterscraper
+  VERSION = '0.15.0'
+end

data/twitterscraper-ruby.gemspec ADDED

@@ -0,0 +1,31 @@
+require_relative 'lib/version'
+Gem::Specification.new do |spec|
+  spec.name          = "twitterscraper-ruby"
+  spec.version       = Twitterscraper::VERSION
+  spec.authors       = ["ts-3156"]
+  spec.email         = ["ts_3156@yahoo.co.jp"]
+  spec.summary       = %q{A gem  to scrape Tweets}
+  spec.description   = %q{A gem  to scrape Tweets}
+  spec.homepage      = "https://github.com/ts-3156/twitterscraper-ruby"
+  spec.license       = "MIT"
+  spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
+  spec.metadata["homepage_uri"] = spec.homepage
+  spec.metadata["source_code_uri"] = spec.homepage
+  spec.metadata["changelog_uri"] = spec.homepage
+  # Specify which files should be added to the gem when it is released.
+  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
+  spec.files         = Dir.chdir(File.expand_path('..', __FILE__)) do
+    `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
+  end
+  spec.executables   = ["twitterscraper"]
+  spec.require_paths = ["lib"]
+  spec.required_ruby_version = ">= 2.6.4"
+  spec.add_dependency "nokogiri"
+  spec.add_dependency "parallel"
+end

metadata ADDED

@@ -0,0 +1,104 @@
+--- !ruby/object:Gem::Specification
+name: twitterscraper-ruby
+version: !ruby/object:Gem::Version
+  version: 0.15.0
+platform: ruby
+authors:
+- ts-3156
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2020-07-17 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: parallel
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+description: A gem  to scrape Tweets
+email:
+- ts_3156@yahoo.co.jp
+executables:
+- twitterscraper
+extensions: []
+extra_rdoc_files: []
+files:
+- ".circleci/config.yml"
+- ".gitignore"
+- ".irbrc"
+- ".rspec"
+- ".ruby-version"
+- ".travis.yml"
+- CODE_OF_CONDUCT.md
+- Gemfile
+- Gemfile.lock
+- LICENSE.txt
+- README.md
+- Rakefile
+- bin/console
+- bin/setup
+- bin/twitterscraper
+- lib/twitterscraper-ruby.rb
+- lib/twitterscraper.rb
+- lib/twitterscraper/cache.rb
+- lib/twitterscraper/cli.rb
+- lib/twitterscraper/client.rb
+- lib/twitterscraper/http.rb
+- lib/twitterscraper/lang.rb
+- lib/twitterscraper/logger.rb
+- lib/twitterscraper/proxy.rb
+- lib/twitterscraper/query.rb
+- lib/twitterscraper/template.rb
+- lib/twitterscraper/tweet.rb
+- lib/version.rb
+- twitterscraper-ruby.gemspec
+homepage: https://github.com/ts-3156/twitterscraper-ruby
+licenses:
+- MIT
+metadata:
+  homepage_uri: https://github.com/ts-3156/twitterscraper-ruby
+  source_code_uri: https://github.com/ts-3156/twitterscraper-ruby
+  changelog_uri: https://github.com/ts-3156/twitterscraper-ruby
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: 2.6.4
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.0.3
+signing_key:
+specification_version: 4
+summary: A gem  to scrape Tweets
+test_files: []