rss_searcher 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +22 -0
- data/README.md +1 -0
- data/lib/file_reader.rb +28 -0
- data/lib/rss_keyword_finder.rb +80 -0
- data/lib/rss_reader.rb +21 -0
- data/lib/rss_searcher.rb +26 -0
- data/lib/send_text.rb +24 -0
- metadata +51 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6a748b23904a57481f5838d40aacfff1b6a26dca
|
4
|
+
data.tar.gz: 23358084b8f1f4c19e8f11243abd22c4253fef25
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 228bd90ed5b24259db12d4df5e589e9181f1dfa30d09654b217139b94c2f6341427b4a431d6fbafb6edc497976783de33c3d1541596f8f067ece3de19bb16fa7
|
7
|
+
data.tar.gz: 5a93943e8bae27326cb80843e81f6381448d39c6dec9cc9f71aa550322078186e4498df27992f2215a2a2d10438a019157539234fd14551e2640a7b6a9c8a7ef
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Gregory White
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# rss_searcher
|
data/lib/file_reader.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
class File_Reader
|
2
|
+
|
3
|
+
URL_REGEX = "((http|ftp|https):\/{2})+([0-9a-z_-]+\.)+[a-z]+"
|
4
|
+
|
5
|
+
def self.get_lines(input)
|
6
|
+
if input.is_a? Array
|
7
|
+
return input
|
8
|
+
else
|
9
|
+
return File_Reader.get_lines_from_file(input)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# assumes file has one line to one url ratio
|
14
|
+
def self.get_lines_from_file(filename)
|
15
|
+
return null if !filename
|
16
|
+
begin
|
17
|
+
return File.readlines(filename).each_with_object(Array.new) do |line, urls|
|
18
|
+
if( line =~ /#{URL_REGEX}/)
|
19
|
+
urls.push line.chomp
|
20
|
+
end
|
21
|
+
end
|
22
|
+
rescue
|
23
|
+
puts "failed to open file #{filename}"
|
24
|
+
return nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require_relative "rss_reader.rb"
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
INTEREST_LEVEL_ALL = "ALL"
|
5
|
+
INTEREST_LEVEL_ONE_OR_MORE = "ONE_OR_MORE"
|
6
|
+
|
7
|
+
class RSS_Keyword_Searcher
|
8
|
+
|
9
|
+
def initialize(feed_urls, keywords, match_level, minimum_publish_date=Date.today)
|
10
|
+
@reader = RSS_Reader.new(feed_urls)
|
11
|
+
@news_items = @reader.get_news_items
|
12
|
+
@keywords = keywords
|
13
|
+
@match_level = match_level
|
14
|
+
@minimum_publish_date = minimum_publish_date
|
15
|
+
raise ArgumentError, "no valid feed urls given" unless @news_items
|
16
|
+
raise ArgumentError, "no keywords given" unless keywords.is_a?(Array) && keywords.length > 0
|
17
|
+
raise ArgumentError, "invalid match_level" unless self.valid_match_level(match_level)
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_links_of_interest(links_of_interest=Array.new)
|
21
|
+
@news_items.each do |item|
|
22
|
+
content = self.content_from_item(item)
|
23
|
+
next if self.out_of_date(item)
|
24
|
+
links_of_interest.push(item.link) if self.is_of_interest(content)
|
25
|
+
end
|
26
|
+
return links_of_interest
|
27
|
+
end
|
28
|
+
|
29
|
+
def is_of_interest(content)
|
30
|
+
if(@match_level === INTEREST_LEVEL_ALL)
|
31
|
+
regex_string = @keywords.inject("") {|ret, keyword| ret+"(?=.*#{keyword.downcase})"}
|
32
|
+
return content =~ /#{regex_string}/
|
33
|
+
elsif(@match_level === INTEREST_LEVEL_ONE_OR_MORE)
|
34
|
+
regex_string = @keywords.map(&:downcase).join("|")
|
35
|
+
return content =~ /#{regex_string}/
|
36
|
+
elsif(@match_level.is_a?(Integer) && @match_level > 1 && @match_level < 100 )
|
37
|
+
match_percentage = self.get_keyword_match_percentage(content)
|
38
|
+
(match_percentage*100) > @match_level ? (return true) : (return false)
|
39
|
+
else
|
40
|
+
return false
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def event_triggered?(flag=false)
|
45
|
+
@news_items.each do |item|
|
46
|
+
content = self.content_from_item(item)
|
47
|
+
flag = self.is_of_interest(content)
|
48
|
+
return flag if flag
|
49
|
+
end
|
50
|
+
return flag
|
51
|
+
end
|
52
|
+
|
53
|
+
def get_keyword_match_percentage(content)
|
54
|
+
match_count = 0
|
55
|
+
@keywords.each do |keyword|
|
56
|
+
if content.include?(keyword.downcase)
|
57
|
+
match_count += 1
|
58
|
+
end
|
59
|
+
end
|
60
|
+
return (match_count * 1.0) / @keywords.length
|
61
|
+
end
|
62
|
+
|
63
|
+
def out_of_date(news_item)
|
64
|
+
pub_date_string = news_item.pubDate.to_s
|
65
|
+
pub_date = Date.parse(pub_date_string)
|
66
|
+
return pub_date < @minimum_publish_date
|
67
|
+
end
|
68
|
+
|
69
|
+
def content_from_item(item)
|
70
|
+
content_string = item.categories.each_with_object("") do |category, string|
|
71
|
+
string.concat category.content
|
72
|
+
end
|
73
|
+
return (content_string+item.description+item.title).downcase
|
74
|
+
end
|
75
|
+
|
76
|
+
def valid_match_level(match_level)
|
77
|
+
return (match_level === INTEREST_LEVEL_ALL || match_level === INTEREST_LEVEL_ONE_OR_MORE || (match_level.is_a?(Integer) && match_level > 1 && match_level < 100))
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
data/lib/rss_reader.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rss'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
class RSS_Reader
|
5
|
+
|
6
|
+
def initialize(feed_urls)
|
7
|
+
@feed_urls = feed_urls
|
8
|
+
end
|
9
|
+
|
10
|
+
# returns an array of rss items
|
11
|
+
def get_news_items
|
12
|
+
news_items = @feed_urls.each_with_object(Array.new) do |url, items|
|
13
|
+
open(url) do |rss|
|
14
|
+
feed = RSS::Parser.parse(rss)
|
15
|
+
items.concat feed.items
|
16
|
+
end
|
17
|
+
end
|
18
|
+
return news_items.empty? ? nil : news_items
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
data/lib/rss_searcher.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require_relative "rss_keyword_finder.rb"
|
2
|
+
require_relative "file_reader.rb"
|
3
|
+
|
4
|
+
class RSS_Searcher
|
5
|
+
|
6
|
+
def get_links_of_interest(feeds, keywords, match_level="ONE_OR_MORE", minimum_publish_date=Date.today)
|
7
|
+
self.set_relevant_params(feeds, keywords, minimum_publish_date)
|
8
|
+
return Array.new unless @feed_urls && @keywords && @minimum_publish_date
|
9
|
+
links_of_interest = Array.new
|
10
|
+
begin
|
11
|
+
searcher = RSS_Keyword_Searcher.new(@feed_urls, @keywords, match_level, @minimum_publish_date)
|
12
|
+
links_of_interest = searcher.get_links_of_interest
|
13
|
+
rescue ArgumentError => ae
|
14
|
+
puts ae.message
|
15
|
+
links_of_interest = Array.new
|
16
|
+
end
|
17
|
+
return links_of_interest
|
18
|
+
end
|
19
|
+
|
20
|
+
def set_relevant_params(feeds, keywords, minimum_publish_date)
|
21
|
+
@feed_urls = File_Reader::get_lines(feeds)
|
22
|
+
@keywords = File_Reader::get_lines(keywords)
|
23
|
+
@minimum_publish_date = minimum_publish_date
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
data/lib/send_text.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'twilio-ruby'
|
2
|
+
|
3
|
+
account_sid = 'ACa2d607218147d7fefd923c2b1a8f9d11'
|
4
|
+
auth_token = '38ea477a666c1975c9d3dacbc4b2dfe5'
|
5
|
+
@client = Twilio::REST::Client.new account_sid, auth_token
|
6
|
+
|
7
|
+
args = Hash[*ARGV]
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
content = args.has_key?("-m") ? args["-m"] : nil
|
12
|
+
|
13
|
+
if content
|
14
|
+
puts "Your message was #{content}"
|
15
|
+
|
16
|
+
message = @client.account.messages.create(
|
17
|
+
:body => content,
|
18
|
+
:to => "+15038413886",
|
19
|
+
:from => "+19712051238")
|
20
|
+
puts message.to
|
21
|
+
|
22
|
+
else
|
23
|
+
puts "No message given, no action taken"
|
24
|
+
end
|
metadata
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rss_searcher
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Greg White
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-05-03 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A simple rss feed curator based on keyword matches
|
14
|
+
email: gwhite@dayspring-tech.com
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- LICENSE
|
20
|
+
- README.md
|
21
|
+
- lib/file_reader.rb
|
22
|
+
- lib/rss_keyword_finder.rb
|
23
|
+
- lib/rss_reader.rb
|
24
|
+
- lib/rss_searcher.rb
|
25
|
+
- lib/send_text.rb
|
26
|
+
homepage:
|
27
|
+
licenses:
|
28
|
+
- MIT
|
29
|
+
metadata: {}
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
require_paths:
|
33
|
+
- lib
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
requirements: []
|
45
|
+
rubyforge_project:
|
46
|
+
rubygems_version: 2.2.2
|
47
|
+
signing_key:
|
48
|
+
specification_version: 4
|
49
|
+
summary: This gem reads rss feeds and then searches them for keywords returning only
|
50
|
+
stories of interest
|
51
|
+
test_files: []
|