RubyGems - rss_searcher - Versions diffs - 0.1.1 - Mend

rss_searcher 0.1.1

Files changed (9) hide show

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 6a748b23904a57481f5838d40aacfff1b6a26dca
+  data.tar.gz: 23358084b8f1f4c19e8f11243abd22c4253fef25
+SHA512:
+  metadata.gz: 228bd90ed5b24259db12d4df5e589e9181f1dfa30d09654b217139b94c2f6341427b4a431d6fbafb6edc497976783de33c3d1541596f8f067ece3de19bb16fa7
+  data.tar.gz: 5a93943e8bae27326cb80843e81f6381448d39c6dec9cc9f71aa550322078186e4498df27992f2215a2a2d10438a019157539234fd14551e2640a7b6a9c8a7ef

data/LICENSE ADDED

@@ -0,0 +1,22 @@
+The MIT License (MIT)
+Copyright (c) 2015 Gregory White
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md ADDED

	@@ -0,0 +1 @@
1	+ # rss_searcher

data/lib/file_reader.rb ADDED

@@ -0,0 +1,28 @@
+class File_Reader
+  URL_REGEX = "((http|ftp|https):\/{2})+([0-9a-z_-]+\.)+[a-z]+"
+  def self.get_lines(input)
+    if input.is_a? Array
+      return input
+    else
+      return File_Reader.get_lines_from_file(input)
+    end
+  end
+  # assumes file has one line to one url ratio
+  def self.get_lines_from_file(filename)
+    return null if !filename
+    begin
+      return File.readlines(filename).each_with_object(Array.new) do |line, urls|
+        if( line =~ /#{URL_REGEX}/)
+          urls.push line.chomp
+        end
+      end
+    rescue
+      puts "failed to open file #{filename}"
+      return nil
+    end
+  end
+end

data/lib/rss_keyword_finder.rb ADDED

@@ -0,0 +1,80 @@
+require_relative "rss_reader.rb"
+require 'date'
+INTEREST_LEVEL_ALL = "ALL"
+INTEREST_LEVEL_ONE_OR_MORE = "ONE_OR_MORE"
+class RSS_Keyword_Searcher
+  def initialize(feed_urls, keywords, match_level, minimum_publish_date=Date.today)
+    @reader = RSS_Reader.new(feed_urls)
+    @news_items = @reader.get_news_items
+    @keywords = keywords
+    @match_level = match_level
+    @minimum_publish_date = minimum_publish_date
+    raise ArgumentError, "no valid feed urls given" unless @news_items
+    raise ArgumentError, "no keywords given" unless keywords.is_a?(Array) && keywords.length > 0
+    raise ArgumentError, "invalid match_level" unless self.valid_match_level(match_level)
+  end
+  def get_links_of_interest(links_of_interest=Array.new)
+    @news_items.each do |item|
+      content = self.content_from_item(item)
+      next if self.out_of_date(item)
+      links_of_interest.push(item.link) if self.is_of_interest(content)
+    end
+    return links_of_interest
+  end
+  def is_of_interest(content)
+    if(@match_level === INTEREST_LEVEL_ALL)
+      regex_string = @keywords.inject("") {|ret, keyword| ret+"(?=.*#{keyword.downcase})"}
+      return content =~ /#{regex_string}/
+    elsif(@match_level === INTEREST_LEVEL_ONE_OR_MORE)
+      regex_string = @keywords.map(&:downcase).join("|")
+      return content =~ /#{regex_string}/
+    elsif(@match_level.is_a?(Integer) && @match_level > 1 && @match_level < 100 )
+      match_percentage = self.get_keyword_match_percentage(content)
+      (match_percentage*100) > @match_level ? (return true) : (return false)
+    else
+      return false
+    end
+  end
+  def event_triggered?(flag=false)
+    @news_items.each do |item|
+      content = self.content_from_item(item)
+      flag = self.is_of_interest(content)
+      return flag if flag
+    end
+    return flag
+  end
+  def get_keyword_match_percentage(content)
+    match_count = 0
+    @keywords.each do |keyword|
+      if content.include?(keyword.downcase)
+        match_count += 1
+      end
+    end
+    return (match_count * 1.0) / @keywords.length
+  end
+  def out_of_date(news_item)
+    pub_date_string = news_item.pubDate.to_s
+    pub_date = Date.parse(pub_date_string)
+    return pub_date < @minimum_publish_date
+  end
+  def content_from_item(item)
+    content_string = item.categories.each_with_object("") do |category, string|
+      string.concat category.content
+    end
+    return (content_string+item.description+item.title).downcase
+  end
+  def valid_match_level(match_level)
+    return (match_level === INTEREST_LEVEL_ALL || match_level === INTEREST_LEVEL_ONE_OR_MORE || (match_level.is_a?(Integer) && match_level > 1 && match_level < 100))
+  end
+end

data/lib/rss_reader.rb ADDED

@@ -0,0 +1,21 @@
+require 'rss'
+require 'open-uri'
+class RSS_Reader
+  def initialize(feed_urls)
+    @feed_urls = feed_urls
+  end
+  # returns an array of rss items
+  def get_news_items
+    news_items = @feed_urls.each_with_object(Array.new) do |url, items|
+      open(url) do |rss|
+        feed = RSS::Parser.parse(rss)
+        items.concat feed.items
+      end
+    end
+    return news_items.empty? ? nil : news_items
+  end
+end

data/lib/rss_searcher.rb ADDED

@@ -0,0 +1,26 @@
+require_relative "rss_keyword_finder.rb"
+require_relative "file_reader.rb"
+class RSS_Searcher
+  def get_links_of_interest(feeds, keywords, match_level="ONE_OR_MORE", minimum_publish_date=Date.today)
+    self.set_relevant_params(feeds, keywords, minimum_publish_date)
+    return Array.new unless @feed_urls && @keywords && @minimum_publish_date
+    links_of_interest = Array.new
+    begin
+      searcher = RSS_Keyword_Searcher.new(@feed_urls, @keywords, match_level, @minimum_publish_date)
+      links_of_interest = searcher.get_links_of_interest
+    rescue ArgumentError => ae
+      puts ae.message
+      links_of_interest = Array.new
+    end
+    return links_of_interest
+  end
+  def set_relevant_params(feeds, keywords, minimum_publish_date)
+    @feed_urls = File_Reader::get_lines(feeds)
+    @keywords = File_Reader::get_lines(keywords)
+    @minimum_publish_date = minimum_publish_date
+  end
+end

data/lib/send_text.rb ADDED

@@ -0,0 +1,24 @@
+require 'twilio-ruby'
+account_sid = 'ACa2d607218147d7fefd923c2b1a8f9d11'
+auth_token = '38ea477a666c1975c9d3dacbc4b2dfe5'
+@client = Twilio::REST::Client.new account_sid, auth_token
+args = Hash[*ARGV]
+content = args.has_key?("-m") ? args["-m"] : nil
+if content
+  puts "Your message was #{content}"
+  message = @client.account.messages.create(
+      :body => content,
+      :to => "+15038413886",
+      :from => "+19712051238")
+  puts message.to
+else
+  puts "No message given, no action taken"
+end

metadata ADDED

@@ -0,0 +1,51 @@
+--- !ruby/object:Gem::Specification
+name: rss_searcher
+version: !ruby/object:Gem::Version
+  version: 0.1.1
+platform: ruby
+authors:
+- Greg White
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2015-05-03 00:00:00.000000000 Z
+dependencies: []
+description: A simple rss feed curator based on keyword matches
+email: gwhite@dayspring-tech.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- LICENSE
+- README.md
+- lib/file_reader.rb
+- lib/rss_keyword_finder.rb
+- lib/rss_reader.rb
+- lib/rss_searcher.rb
+- lib/send_text.rb
+homepage:
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.2.2
+signing_key:
+specification_version: 4
+summary: This gem reads rss feeds and then searches them for keywords returning only
+  stories of interest
+test_files: []