RubyGems - webwatchr - Versions diffs - 0.0.2 - Mend

webwatchr 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

data/README.md ADDED Viewed

@@ -0,0 +1,122 @@
+# WebWatchr
+Silly script to periodically check webpage changes.
+1. run script every minute from cron
+2. pulls data for every Website to check, if the last time we did that is long ago
+4. if content is different, from the last time, alerts you with the new content (email, telegram)
+# Installation
+```shell
+$ gem install webwatchr
+# if you want fancier Diffs, for DiffString objects, apt install ruby-diffy
+```
+And then make your own `dsl.rb` script. Example:
+```ruby
+require "webwatchr"
+class SomeSimpleSite < Site::SimpleString
+  def initialize()
+    @url = "https://somesimplesite.com/shops"
+    super()
+  end
+  # Implement this function, to return what you want to compare every run
+  def get_content
+    res = ""
+    @parsed_content.css("div.shop-main a").map do |a|
+      url = "https://somesimplesite.com/shop/#{a['href']}"
+      if a.css('img')[0]['src'] == "soldout.png"
+        next
+      end
+      res << "#{url}\n"
+    end
+    res == "" ? nil : res
+  end
+end
+Webwatchr::Main.new do
+  # Some configuration, first for the alerting
+  # Send emails
+  add_default_alert :email do
+    set :smtp_port, 25
+    set :smtp_server, "localhost"
+    set :from_addr, "webwatchr@domain.eu"
+    set :dest_addr, "admin@domain.eu"
+  end
+  # Use telegram bot to send you messages
+  add_default_alert :telegram do
+    set :token, "12345:LONGTOKEN09876543"
+    set :chat_id, 1234567890
+  end
+  # # Just outputs update to the terminal
+  # add_default_alert :stdout
+  update BskySearch do
+    set "username", "toto"
+    set "password", "toto"
+    keyword "#danemark"
+  end
+  update SomeSimpleSite
+  update PostCH do
+    track_id "LS123476US"
+  end
+end
+```
+Run the cron often:
+```
+*/5 * * * * cd /home/poil/my_fav_scripts/; ruby dsl.rb
+```
+# Supported websites
+List of sites that are somewhat maintained are [listed here](https://github.com/conchyliculture/webwatchr).
+Some examples:
+* Bluesky
+* Bandcamp merch pages
+* Package tracking (DHL, Colissimo, i-parcel, Royalmail, PostNL, UPS, USPS, etc.)
+## Force a site check, ignoring the 'wait' parameter
+This can be useful to run a site update at a specific time/day with a crontab, instead of every specified amount of time. You can force update a website using the -s flag:
+```bash
+ruby webwatchr.rb -t -s SiteClass
+```
+# FAQ
+## Tests?
+There are like like, two!
+Run `rake`
+## Logs ?
+Call `logger`, as you would a classic `Logger` object in your `mysite.rb`.
+## Alerting
+Email is the main method of alerting, but you can also set webwatchr to talk to you on Telegram through a bot.
+### Email
+First make a bot and grab a token following the [Telegram procedure](https://core.telegram.org/bots#6-botfather).
+You also need to know the `chat_id` for its discussion with you. The code in [there](https://github.com/atipugin/telegram-bot-ruby/blob/master/examples/bot.rb) can help you.

data/Rakefile ADDED Viewed

@@ -0,0 +1,11 @@
+task default: %w[infra_tests sites_tests]
+task :infra_tests do
+  ruby "tests/infra_test.rb"
+end
+task :sites_tests do
+  Dir.glob("tests/sites/*.rb").each do |t|
+    ruby t
+  end
+end

data/lib/sites/bandcamp.rb ADDED Viewed

@@ -0,0 +1,60 @@
+require_relative "../webwatchr/site"
+class BandcampMerch < Site::Articles
+  require "net/http"
+  require "nokogiri"
+  def band(value)
+    @band = value
+    @url = "https://#{@band}.bandcamp.com/merch"
+    self
+  end
+  def get_content()
+    if @html_content =~ /You are being redirected, please follow <a href="([^"]+)"/
+      new_url = ::Regexp.last_match(1)
+      @html_content = Net::HTTP.get(URI.parse(new_url))
+      @parsed_content = Nokogiri::HTML.parse(@html_content)
+      item = @parsed_content.css('div#merch-item')
+      if item.css(".notable").text == "Sold Out"
+        logger.debug "That item is sold out =("
+        return
+      end
+      url = new_url
+      title = item.css("h2.title").text
+      img_url = item.css("img.main-art").attr("src").text
+      add_article({
+                    "id" => url,
+                    "url" => url,
+                    "img_src" => img_url,
+                    "title" => title
+                  })
+    else
+      f = File.new("/tmp/b", "w")
+      f.write(@html_content)
+      f.close
+      @parsed_content.css('ol.merch-grid li').each do |xx|
+        unless xx.css('p.sold-out').empty?
+          logger.debug "That item is sold out =("
+          next
+        end
+        x = xx.css('a')
+        url = "http://#{URI.parse(@url).host + x.attr('href').text}"
+        img = x.css('img')
+        img_url = img.attr('src').text
+        if img_url =~ /\/img\/43.gif/
+          img_url = img.attr('data-original')
+        end
+        title = x.css('p.title').text.strip().gsub(/ *\n */, '')
+        price = x.css('span.price').text
+        add_article({
+                      "id" => url,
+                      "url" => url,
+                      "img_src" => img_url,
+                      "title" => "#{title} #{price}"
+                    })
+      end
+    end
+  end
+end

data/lib/sites/bsky.rb ADDED Viewed

@@ -0,0 +1,176 @@
+require_relative "../webwatchr/site"
+require "mechanize"
+class BskyBase < Site::Articles
+  API_PUBLIC_HOSTS = [
+    'public.api.bsky.app'
+  ].freeze
+  API_PRIVATE_HOSTS = [
+    'scalycap.us-west.host.bsky.network'
+    #'oysterling.us-west.host.bsky.network'
+    #    'amanita.us-east.host.bsky.network'
+  ].freeze
+  def _get_bearer()
+    url = "https://bsky.social/xrpc/com.atproto.server.createSession"
+    data = {
+      "identifier" => @username,
+      "password" => @password,
+      "authFactorToken" => "",
+      "allowTakendown" => true
+    }
+    headers = {
+      "content-type" => "application/json"
+    }
+    resp = @mechanize.post(url, data.to_json, headers)
+    j = JSON.parse(resp.body)
+    begin
+      return j['accessJwt']
+    rescue StandardError => e
+      raise Site::ParseError "Error while logging in #{e}"
+    end
+  end
+  def _api_get(path, params: [], headers: nil)
+    api_hosts = @bearer ? API_PRIVATE_HOSTS : API_PUBLIC_HOSTS
+    api_host = api_hosts.sample
+    url = "https://#{api_host}#{path}"
+    resp = @mechanize.get(url, params, nil, headers)
+    return resp
+  end
+  def _profile_to_did(account)
+    path = "/xrpc/com.atproto.identity.resolveHandle?handle=#{account}"
+    resp = _api_get(path)
+    return JSON.parse(resp.body)['did']
+  end
+  def _article_from_post(post)
+    post_id = post["uri"].split("/")[-1]
+    text = post['record']['text']
+    art = {
+      "id" => post["uri"],
+      "url" => "https://bsky.app/profile/#{@account}/post/#{post_id}",
+      "title" => "#{post['record']['createdAt']}: #{text}"
+    }
+    if post["embed"]
+      case post["embed"]["$type"]
+      when "app.bsky.embed.images#view"
+        images = post["embed"]["images"].sort_by { |image| (image["aspectRatio"] || { 'height' => 0 })["height"] }
+        case images.size
+        when 0
+          # noop
+        when 1
+          art['img_src'] = images[0]["thumb"]
+        else
+          art['img_src'] = images[1]["thumb"]
+        end
+      end
+    end
+    return art
+  end
+end
+class BskyAccount < BskyBase
+  # Parses Bluesky "profile" pages and returns every post
+  #
+  # ==== Examples
+  #
+  # Webwatchr::Main.new do
+  #     update BskyAccount do
+  #          account "theonion.com"
+  #          # Optional settings
+  #          set "reposts", false   # Disable reposts
+  #          set "regex", /fun/     # Only posts where the text matches the regex
+  #     end
+  #     ....
+  # end
+  attr_accessor :reposts, :regex
+  def account(account)
+    @account = account
+    @url = "https://bsky.app/profile/#{account}"
+    self
+  end
+  def initialize
+    super()
+    @reposts = true
+    @mechanize = Mechanize.new()
+    @json_results_key = "feed"
+  end
+  def pull_things
+    did = _profile_to_did(@account)
+    path = "/xrpc/app.bsky.feed.getAuthorFeed?actor=#{did}&filter=posts_and_author_threads&limit=30"
+    resp = _api_get(path)
+    @parsed_content = JSON.parse(resp.body)
+    f = File.open("/tmp/qsd", 'w')
+    f.write(resp.body)
+    f.close
+  end
+  def get_content
+    @parsed_content['feed'].each do |p|
+      post = p['post']
+      text = post['record']['text']
+      next if @regex and (text !~ @regex)
+      next if !@reposts && (post['author']['handle'] != @account)
+      art = _article_from_post(post)
+      add_article(art)
+    end
+  end
+end
+class BskySearch < BskyBase
+  # Runs a search on Bluesky for a keyword
+  #
+  # ==== Examples
+  #
+  # Webwatchr::Main.new do
+  #     update BskyAccount do
+  #          keyword "#danemark"
+  #          # Mandatory settings, you need to be logged in to search
+  #          set "username", "username"
+  #          set "password", "password"
+  #     end
+  #     ....
+  # end
+  attr_accessor :username, :password
+  def keyword(keyword)
+    @keyword = keyword
+    @url = "https://bsky.app/search?#{keyword}"
+    self
+  end
+  def initialize
+    super()
+    @mechanize = Mechanize.new()
+    @json_results_key = "posts"
+  end
+  def pull_things
+    raise StandardError, 'Need both username & password to run searches' unless @password and @username
+    @bearer ||= _get_bearer()
+    headers = {
+      "authorization" => "Bearer #{@bearer}"
+    }
+    params = { "q" => "#danemark", "limit" => 30, "sort" => "top" }
+    resp = _api_get("/xrpc/app.bsky.feed.searchPosts", params: params.to_a, headers: headers)
+    @parsed_content = JSON.parse(resp.body)
+  end
+  def get_content
+    @parsed_content['posts'].each do |post|
+      add_article(_article_from_post(post))
+    end
+  end
+end

data/lib/sites/postch.rb ADDED Viewed

@@ -0,0 +1,112 @@
+require_relative "../webwatchr/site"
+require "json"
+require "mechanize"
+class PostCH < Site::SimpleString
+  def track_id(track_id)
+    # Sets the Track ID & URL
+    @track_id = track_id
+    @url = "https://www.post.ch/api/TrackAndTrace/Get?sc_lang=en&id=#{@track_id}"
+    self
+  end
+  def initialize
+    super()
+    @events = []
+    @mechanize = Mechanize.new()
+    @mechanize.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:132.0) Gecko/20100101 Firefox/132.0'
+    @text_messages = {}
+  end
+  def code_to_message(code)
+    @text_messages = JSON.parse(@mechanize.get("https://service.post.ch/ekp-web/core/rest/translations/en/shipment-text-messages").body)['shipment-text--'] if @text_messages == {}
+    @text_messages.each do |k, v|
+      ccode = code.split('.')
+      kk = k.split('.')
+      0.upto(ccode.size()) do |i|
+        c = ccode[i]
+        e = kk[i]
+        if c.nil? and i == kk.size - 1
+          return v
+        end
+        next if e == "*"
+        next if c == e
+        break
+      end
+    end
+    return code
+  end
+  def pull_things()
+    # First we need an anonymous userId
+    resp = @mechanize.get("https://service.post.ch/ekp-web/api/user", nil, nil, { 'accept' => 'application/json' })
+    user_id = JSON.parse(resp.body)['userIdentifier']
+    csrf_token = resp.header["x-csrf-token"]
+    headers = {
+      'accept' => 'application/json, text/plain, */*',
+      'Accept-Encoding' => 'gzip,deflate,br,zstd',
+      'accept-language' => 'en',
+      'Cache-Control' => 'no-cache',
+      'Content-Type' => 'application/json',
+      'Origin' => 'https://service.post.ch',
+      'Referer' => 'https://service.post.ch',
+      'X-Csrf-Token' => csrf_token
+    }
+    resp = @mechanize.post("https://service.post.ch/ekp-web/api/history?userId=#{user_id}", { 'searchQuery' => @track_id }.to_json, headers)
+    hash = JSON.parse(resp.body)['hash']
+    resp = @mechanize.get("https://service.post.ch/ekp-web/api/history/not-included/#{hash}?userId=#{user_id}", nil, nil, headers)
+    jresp = JSON.parse(resp.body)
+    if jresp.empty?
+      raise Site::ParseError, "No result for #{@track_id}. Check that it is valid."
+    end
+    identity = jresp[0]['identity']
+    resp = @mechanize.get("https://service.post.ch/ekp-web/api/shipment/id/#{identity}/events", nil, nil, headers)
+    json_content = JSON.parse(resp.body)
+    @parsed_content = []
+    json_content.each do |event|
+      event['description'] = code_to_message(event['eventCode'])
+      @parsed_content << event
+    end
+  end
+  def get_html_content()
+    res = []
+    res << Site::HTML_HEADER
+    res << "<ul>"
+    @events.each do |e|
+      res << "<li>#{e}</li>"
+    end
+    res << "</ul>"
+    return res.join("\n")
+  end
+  def get_content()
+    evs = @parsed_content.map { |e|
+      e['timestamp'] = DateTime.strptime(e['timestamp'], "%Y-%m-%dT%H:%M:%S%Z")
+      e
+    }
+    evs.sort { |a, b| a['timestamp'] == b['timestamp'] ? a['description'] <=> b['description'] : a['timestamp'] <=> b['timestamp'] }.reverse.each do |event|
+      msg = "#{event['timestamp']}: #{event['description']}"
+      if event['city'] and event['city'] != ""
+        msg += " (#{event['city']} #{event['zip']})"
+      end
+      @events << msg
+    end
+    return ResultObject.new(@events.join("\n"))
+  end
+end
+# Example:
+#
+#  update PostCH do
+#    track_id "LS203038460CH"
+#  end

data/lib/sites/songkick.rb ADDED Viewed

@@ -0,0 +1,28 @@
+require_relative "../webwatchr/site"
+class Songkick < Site::Articles
+  def full_url(url)
+    @url = url
+    unless @url.end_with?("/calendar")
+      logger.warn("Songkick should end with /calendar to get all concerts")
+    end
+    return self
+  end
+  def get_content()
+    events = @parsed_content.css('ol.event-listings')[0]
+    events.css('li').each do |event|
+      j = JSON.parse(event.css('script')[0].text)[0]
+      date = j["startDate"]
+      url = j["url"]
+      artist = j["name"]
+      loc = j["location"]
+      location = "#{loc['name']} #{loc['address']['addressLocality']}, #{loc['address']['addressCountry']}"
+      add_article({
+                    "id" => url,
+                    "url" => url,
+                    "title" => "#{date}: #{artist} at #{location}"
+                  })
+    end
+  end
+end

data/lib/webwatchr/alerting.rb ADDED Viewed

@@ -0,0 +1,129 @@
+require "net/smtp"
+require 'telegram/bot'
+require_relative "./logger"
+module Webwatchr
+  module Alerting
+    class Base
+      include Loggable
+      REQUIRED_SETTINGS = [].freeze
+      def validate
+        missing_settings = REQUIRED_SETTINGS - @config.to_a.select { |s| s[1] }.map { |s| s[0] }
+        raise StandardError, "Missing required settings for #{self.class}: #{missing_settings}" unless missing_settings.empty?
+      end
+      def self.create(&block)
+        if block
+          new.instance_eval(&block)
+        else
+          new
+        end
+      end
+      def alert(site)
+        raise StandardError, "Need to pass a Site instance" unless site
+        validate
+      end
+      def initialize
+        @config = {}
+      end
+      def set(key, val)
+        @config[key] = val
+        self
+      end
+    end
+    class EmailAlert < Base
+      REQUIRED_SETTINGS = %i[from_addr dest_addr smtp_server smtp_port].freeze
+      IDENTIFIER = :email
+      # This class will send you email if content changes.
+      #
+      # ==== Examples
+      #
+      # Webwatchr::Main.new do
+      #     add_default_alert :email do
+      #       set :smtp_port, 25
+      #       set :smtp_server, "localhost"
+      #       set :dest_addr, "dest@email.eu"
+      #       set :from_addr, "source@email.eu"
+      #     end
+      #     ....
+      # "end"
+      def alert(site)
+        super(site)
+        subject = site.get_email_subject() || "Update from #{site.class}"
+        formatted_content = site.generate_html_content()
+        msgstr = <<~END_OF_MESSAGE
+          From: #{@config[:from_addr]}
+          To: #{@config[:dest_addr]}
+          MIME-Version: 1.0
+          Content-type: text/html; charset=UTF-8
+          Subject: [Webwatchr] #{subject}
+          Update from #{site.get_email_url()}
+          #{formatted_content}
+        END_OF_MESSAGE
+        begin
+          Net::SMTP.start(@config[:smtp_server], @config[:smtp_port], starttls: false) do |smtp|
+            raise StandardError, "from address cannot be nil" unless @config[:from_addr]
+            smtp.send_message(msgstr, @config[:from_addr], @config[:dest_addr])
+            logger.debug("Sending mail to #{@config[:dest_addr]}")
+          end
+        rescue Net::SMTPFatalError => e
+          logger.error "Couldn't send email from #{@config[:from_addr]} to #{@config[:dest_addr]}. #{@config[:smtp_server]}:#{@config[:smtp_port]} said #{e.message}"
+        end
+      end
+    end
+    class TelegramAlert < Base
+      IDENTIFIER = :telegram
+      REQUIRED_SETTINGS = %i[token chat_id].freeze
+      # This class will use a Telegram bot to send you a message if content changes.
+      #
+      # ==== Examples
+      #
+      # Webwatchr::Main.new do
+      #     ...
+      #     add_default_alert :telegram do
+      #       set :token, "95123456YU:AArestoftoken"
+      #       set :chat_id, 123456789
+      #     end
+      #     ....
+      # "end"
+      def alert(site)
+        super(site)
+        bot = Telegram::Bot::Client.new(@config[:token])
+        msg_pieces = [site.get_email_subject]
+        msg_pieces << site.get_email_url()
+        msg_pieces += site.generate_telegram_message_pieces()
+        msg_pieces = msg_pieces.map { |x| x.size > 4096 ? x.split("\n") : x }.flatten()
+        split_msg = msg_pieces.each_with_object(['']) { |str, sum|
+          sum.last.length + str.length > 4000 ? sum << "#{str}\n" : sum.last << "#{str}\n"
+        }
+        split_msg.each do |m|
+          bot.api.send_message(chat_id: @config[:chat_id], text: m)
+        end
+      end
+    end
+    class StdoutAlert < Base
+      IDENTIFIER = :stdout
+      def alert(site)
+        super(site)
+        msg = "Update rom #{site.url}\n#{site.generate_html_content}"
+        puts(msg)
+      end
+    end
+  end
+end