rss_searcher 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6a748b23904a57481f5838d40aacfff1b6a26dca
4
+ data.tar.gz: 23358084b8f1f4c19e8f11243abd22c4253fef25
5
+ SHA512:
6
+ metadata.gz: 228bd90ed5b24259db12d4df5e589e9181f1dfa30d09654b217139b94c2f6341427b4a431d6fbafb6edc497976783de33c3d1541596f8f067ece3de19bb16fa7
7
+ data.tar.gz: 5a93943e8bae27326cb80843e81f6381448d39c6dec9cc9f71aa550322078186e4498df27992f2215a2a2d10438a019157539234fd14551e2640a7b6a9c8a7ef
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Gregory White
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1 @@
1
+ # rss_searcher
@@ -0,0 +1,28 @@
1
+ class File_Reader
2
+
3
+ URL_REGEX = "((http|ftp|https):\/{2})+([0-9a-z_-]+\.)+[a-z]+"
4
+
5
+ def self.get_lines(input)
6
+ if input.is_a? Array
7
+ return input
8
+ else
9
+ return File_Reader.get_lines_from_file(input)
10
+ end
11
+ end
12
+
13
+ # assumes file has one line to one url ratio
14
+ def self.get_lines_from_file(filename)
15
+ return null if !filename
16
+ begin
17
+ return File.readlines(filename).each_with_object(Array.new) do |line, urls|
18
+ if( line =~ /#{URL_REGEX}/)
19
+ urls.push line.chomp
20
+ end
21
+ end
22
+ rescue
23
+ puts "failed to open file #{filename}"
24
+ return nil
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,80 @@
1
+ require_relative "rss_reader.rb"
2
+ require 'date'
3
+
4
+ INTEREST_LEVEL_ALL = "ALL"
5
+ INTEREST_LEVEL_ONE_OR_MORE = "ONE_OR_MORE"
6
+
7
+ class RSS_Keyword_Searcher
8
+
9
+ def initialize(feed_urls, keywords, match_level, minimum_publish_date=Date.today)
10
+ @reader = RSS_Reader.new(feed_urls)
11
+ @news_items = @reader.get_news_items
12
+ @keywords = keywords
13
+ @match_level = match_level
14
+ @minimum_publish_date = minimum_publish_date
15
+ raise ArgumentError, "no valid feed urls given" unless @news_items
16
+ raise ArgumentError, "no keywords given" unless keywords.is_a?(Array) && keywords.length > 0
17
+ raise ArgumentError, "invalid match_level" unless self.valid_match_level(match_level)
18
+ end
19
+
20
+ def get_links_of_interest(links_of_interest=Array.new)
21
+ @news_items.each do |item|
22
+ content = self.content_from_item(item)
23
+ next if self.out_of_date(item)
24
+ links_of_interest.push(item.link) if self.is_of_interest(content)
25
+ end
26
+ return links_of_interest
27
+ end
28
+
29
+ def is_of_interest(content)
30
+ if(@match_level === INTEREST_LEVEL_ALL)
31
+ regex_string = @keywords.inject("") {|ret, keyword| ret+"(?=.*#{keyword.downcase})"}
32
+ return content =~ /#{regex_string}/
33
+ elsif(@match_level === INTEREST_LEVEL_ONE_OR_MORE)
34
+ regex_string = @keywords.map(&:downcase).join("|")
35
+ return content =~ /#{regex_string}/
36
+ elsif(@match_level.is_a?(Integer) && @match_level > 1 && @match_level < 100 )
37
+ match_percentage = self.get_keyword_match_percentage(content)
38
+ (match_percentage*100) > @match_level ? (return true) : (return false)
39
+ else
40
+ return false
41
+ end
42
+ end
43
+
44
+ def event_triggered?(flag=false)
45
+ @news_items.each do |item|
46
+ content = self.content_from_item(item)
47
+ flag = self.is_of_interest(content)
48
+ return flag if flag
49
+ end
50
+ return flag
51
+ end
52
+
53
+ def get_keyword_match_percentage(content)
54
+ match_count = 0
55
+ @keywords.each do |keyword|
56
+ if content.include?(keyword.downcase)
57
+ match_count += 1
58
+ end
59
+ end
60
+ return (match_count * 1.0) / @keywords.length
61
+ end
62
+
63
+ def out_of_date(news_item)
64
+ pub_date_string = news_item.pubDate.to_s
65
+ pub_date = Date.parse(pub_date_string)
66
+ return pub_date < @minimum_publish_date
67
+ end
68
+
69
+ def content_from_item(item)
70
+ content_string = item.categories.each_with_object("") do |category, string|
71
+ string.concat category.content
72
+ end
73
+ return (content_string+item.description+item.title).downcase
74
+ end
75
+
76
+ def valid_match_level(match_level)
77
+ return (match_level === INTEREST_LEVEL_ALL || match_level === INTEREST_LEVEL_ONE_OR_MORE || (match_level.is_a?(Integer) && match_level > 1 && match_level < 100))
78
+ end
79
+
80
+ end
@@ -0,0 +1,21 @@
1
+ require 'rss'
2
+ require 'open-uri'
3
+
4
+ class RSS_Reader
5
+
6
+ def initialize(feed_urls)
7
+ @feed_urls = feed_urls
8
+ end
9
+
10
+ # returns an array of rss items
11
+ def get_news_items
12
+ news_items = @feed_urls.each_with_object(Array.new) do |url, items|
13
+ open(url) do |rss|
14
+ feed = RSS::Parser.parse(rss)
15
+ items.concat feed.items
16
+ end
17
+ end
18
+ return news_items.empty? ? nil : news_items
19
+ end
20
+
21
+ end
@@ -0,0 +1,26 @@
1
+ require_relative "rss_keyword_finder.rb"
2
+ require_relative "file_reader.rb"
3
+
4
+ class RSS_Searcher
5
+
6
+ def get_links_of_interest(feeds, keywords, match_level="ONE_OR_MORE", minimum_publish_date=Date.today)
7
+ self.set_relevant_params(feeds, keywords, minimum_publish_date)
8
+ return Array.new unless @feed_urls && @keywords && @minimum_publish_date
9
+ links_of_interest = Array.new
10
+ begin
11
+ searcher = RSS_Keyword_Searcher.new(@feed_urls, @keywords, match_level, @minimum_publish_date)
12
+ links_of_interest = searcher.get_links_of_interest
13
+ rescue ArgumentError => ae
14
+ puts ae.message
15
+ links_of_interest = Array.new
16
+ end
17
+ return links_of_interest
18
+ end
19
+
20
+ def set_relevant_params(feeds, keywords, minimum_publish_date)
21
+ @feed_urls = File_Reader::get_lines(feeds)
22
+ @keywords = File_Reader::get_lines(keywords)
23
+ @minimum_publish_date = minimum_publish_date
24
+ end
25
+
26
+ end
@@ -0,0 +1,24 @@
1
+ require 'twilio-ruby'
2
+
3
+ account_sid = 'ACa2d607218147d7fefd923c2b1a8f9d11'
4
+ auth_token = '38ea477a666c1975c9d3dacbc4b2dfe5'
5
+ @client = Twilio::REST::Client.new account_sid, auth_token
6
+
7
+ args = Hash[*ARGV]
8
+
9
+
10
+
11
+ content = args.has_key?("-m") ? args["-m"] : nil
12
+
13
+ if content
14
+ puts "Your message was #{content}"
15
+
16
+ message = @client.account.messages.create(
17
+ :body => content,
18
+ :to => "+15038413886",
19
+ :from => "+19712051238")
20
+ puts message.to
21
+
22
+ else
23
+ puts "No message given, no action taken"
24
+ end
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rss_searcher
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Greg White
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-03 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A simple rss feed curator based on keyword matches
14
+ email: gwhite@dayspring-tech.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - LICENSE
20
+ - README.md
21
+ - lib/file_reader.rb
22
+ - lib/rss_keyword_finder.rb
23
+ - lib/rss_reader.rb
24
+ - lib/rss_searcher.rb
25
+ - lib/send_text.rb
26
+ homepage:
27
+ licenses:
28
+ - MIT
29
+ metadata: {}
30
+ post_install_message:
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubyforge_project:
46
+ rubygems_version: 2.2.2
47
+ signing_key:
48
+ specification_version: 4
49
+ summary: This gem reads rss feeds and then searches them for keywords returning only
50
+ stories of interest
51
+ test_files: []