govfeed 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +15 -0
- data/govfeed.gemspec +21 -0
- data/lib/govfeed.rb +119 -0
- data/lib/govfeed/version.rb +3 -0
- metadata +62 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'bundler'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Bundler::GemHelper.install_tasks
|
6
|
+
|
7
|
+
desc 'Default: run unit tests.'
|
8
|
+
task :default => :test
|
9
|
+
|
10
|
+
desc 'Run unit tests.'
|
11
|
+
Rake::TestTask.new(:test) do |t|
|
12
|
+
t.libs << 'lib'
|
13
|
+
t.pattern = 'test/*_test.rb'
|
14
|
+
t.verbose = true
|
15
|
+
end
|
data/govfeed.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "govfeed/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "govfeed"
|
7
|
+
s.version = Govfeed::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Matthew Grigajtis"]
|
10
|
+
s.email = ["matthew.grigajtis@gmail.com"]
|
11
|
+
s.homepage = ""
|
12
|
+
s.summary = %q{Grab a government RSS feed.}
|
13
|
+
s.description = %q{This gem grabs an RSS feed from a specified government agency.}
|
14
|
+
|
15
|
+
s.rubyforge_project = "govfeed"
|
16
|
+
|
17
|
+
s.files = `git ls-files`.split("\n")
|
18
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.require_paths = ["lib"]
|
21
|
+
end
|
data/lib/govfeed.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'curb-fu'
|
2
|
+
require 'hpricot'
|
3
|
+
|
4
|
+
module Govfeed
|
5
|
+
|
6
|
+
# A helper function to truncate down to the specified number of words, default is 30
|
7
|
+
def self.truncate_words(text, length = 30, end_string = ' ...')
|
8
|
+
if text == nil
|
9
|
+
return
|
10
|
+
end
|
11
|
+
words = text.split()
|
12
|
+
words[0..(length-1)].join(' ') + (words.length > length ? end_string : '')
|
13
|
+
end
|
14
|
+
|
15
|
+
# returns a list of government feeds
|
16
|
+
def self.getFeedList
|
17
|
+
{
|
18
|
+
:us_federal => "http://www.usa.gov/rss/updates.xml",
|
19
|
+
:us_al => "http://media.alabama.gov/rss/rss.aspx",
|
20
|
+
:us_az => "http://www.azdps.gov/RSS/News/",
|
21
|
+
:us_ca => "http://news.ca.gov/news/feed",
|
22
|
+
:us_de => "http://portal.delaware.gov/delaware-rss.xml",
|
23
|
+
:us_ga => "http://georgia.gov/rss/ga-agency-news.rss",
|
24
|
+
:us_hi => "http://www.oha.org/index2.php?option=ds-syndicate&version=1&feed_id=1",
|
25
|
+
:us_il => "http://www.illinois.gov/PressReleases/RSS/Main_State_Page.xml",
|
26
|
+
:us_in => "http://www.in.gov/portal/news_events/39832.xml",
|
27
|
+
:us_ks => "http://www.kansas.gov/feed/",
|
28
|
+
:us_ky => "http://migration.kentucky.gov/g2p/KII.G2P.Portal.CMS.Templates/G2PPortalRssPage.aspx?application=NEWSROOM",
|
29
|
+
:us_la => "http://wwwprd.doa.louisiana.gov/LaNews/PublicPages/RSSFeed.xml",
|
30
|
+
:us_me => "http://www.maine.gov/tools/whatsnew/rss.php?tid=27",
|
31
|
+
:us_md => "http://choosemaryland.org/RSS/DBEDNewsFeed.aspx",
|
32
|
+
:us_ma => "http://innovation.blog.state.ma.us/blog/atom.xml",
|
33
|
+
:us_mi => "http://www.michigan.gov/rss/0,2348,7-124-53818--62431-,00.xml",
|
34
|
+
:us_mo => "http://www.mo.gov/news/?xml=all",
|
35
|
+
:us_nj => "http://www.state.nj.us/nj/home/features/news/approved/rss.xml",
|
36
|
+
:us_nm => "http://newmexico.sks.com/government/CalendarRssService.ashx?Id=4ae1958b9f094127bcb6e6580e29cf85&type=c&uri=%2fgovernment%2fopen_meetings.aspx",
|
37
|
+
:us_ny => "http://www.nysenate.gov/rss",
|
38
|
+
:us_nc => "http://www.dornc.com/rss/headlines.xml",
|
39
|
+
:us_nd => "http://www.commerce.nd.gov/news/newsFeed.asp",
|
40
|
+
:us_oh => "http://development.ohio.gov/rss/Feeds/BusinessInvestmentsIncentives.xml",
|
41
|
+
:us_ok => "feed://www.ok.gov/genthree/rss.php?agency_id=0",
|
42
|
+
:us_pa => "http://www.state.pa.us/portal/server.pt/gateway/PTARGS_0_2_134550_3013_803012_43/SearchXml/SnapshotQueryRss.axd?pubdate=104&id=2067",
|
43
|
+
:us_ri => "http://rigov.tumblr.com/rss",
|
44
|
+
:us_sd => "http://www.sd.gov/rss/",
|
45
|
+
:us_tn => "http://news.tn.gov/rss.xml",
|
46
|
+
:us_tx => "http://txapps.texas.gov/portal/tol/en/rss",
|
47
|
+
:us_va => "http://www.governor.virginia.gov/News/rss/index.cfm",
|
48
|
+
:us_vt => "http://www.vermont.gov/portal/rss/feeds/news.php",
|
49
|
+
:us_wa => "http://access.wa.gov/news/thismonth.xml",
|
50
|
+
:us_wv => "http://www.wv.gov/_layouts/feed.aspx?xsl=1&web=%2Fnews&page=fa057236-2db7-4147-b6d0-d95f3025f5a2&wp=38869cd8-fc30-4334-b410-c84c890ab9a5",
|
51
|
+
:us_wi => "http://www.dhs.wisconsin.gov/news/pressreleases/rss.xml",
|
52
|
+
:cato => "http://feeds.cato.org/CatoDispatch.xml",
|
53
|
+
:heritage => "http://blog.heritage.org/feed/",
|
54
|
+
:mises => "http://feeds.mises.org/MisesDailyArticles?format=xml",
|
55
|
+
:ca_federal => "http://news.gc.ca/web/rss-eng.do",
|
56
|
+
:ca_ab => "http://www.gov.ab.ca/acn/RSS_FEEDS/RSS_ALL_News.xml",
|
57
|
+
:ca_bc => "http://www2.news.gov.bc.ca/nrm_rss_news/govwide.xml",
|
58
|
+
:ca_mn => "http://news.gov.mb.ca/news/index.rss",
|
59
|
+
:ca_nb => "http://www2.gnb.ca/content/gnb/en/news/local_government/_jcr_content/mainContent_par/newslist.rss1.html",
|
60
|
+
:ca_ns => "http://www.gov.ns.ca/news/rss/rss.asp",
|
61
|
+
:ca_on => "http://news.ontario.ca/newsroom/en/rss/allnews.rss",
|
62
|
+
:ca_pe => "http://www.gov.pe.ca/index.php3?number=rss",
|
63
|
+
:ca_sk => "http://www.gov.sk.ca/Common/PageTemplates/rss.aspx"
|
64
|
+
}
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def self.getFeed(feed, numberOfStories = 0)
|
69
|
+
|
70
|
+
# This hash contains the Feed URLs
|
71
|
+
# The format goes Federal feeds, State\Province feeds, Think Tank\Institution feeds
|
72
|
+
# Some states\provinces\territories do not yet have RSS feeds or feeds that work (Idaho).
|
73
|
+
# They will be added to this hash when they are created or fixed.
|
74
|
+
feed_url = self.getFeedList
|
75
|
+
|
76
|
+
# Curb-Fu will be fetching the feed.
|
77
|
+
feed = CurbFu.get(feed_url[feed])
|
78
|
+
|
79
|
+
# the hpricot gem will be used to parse it as we build the HTML
|
80
|
+
doc = Hpricot(feed.body.to_s)
|
81
|
+
|
82
|
+
# RSS feed heading
|
83
|
+
rss = "<h1 id=\"govfeedTitle\">" + (doc/"title")[0].inner_text + "</h1>\n"
|
84
|
+
|
85
|
+
# Make sure that the feed description exists before attempting to parse it
|
86
|
+
if (((doc/"description")[0] != nil) || ((doc/"description")[0] != ""))
|
87
|
+
rss += "<div class=\"govfeedDiv\">" + (doc/"description")[0].inner_text + "</div>\n"
|
88
|
+
end
|
89
|
+
|
90
|
+
# Make sure that the feed image exists before attempting to parse it
|
91
|
+
if (doc/"image")[0] != nil
|
92
|
+
rss += "<div class=\"govfeedImage\"><img src=\"" + (doc/"image"/"url")[0].inner_text + "\" alt=\"\" /></div>\n"
|
93
|
+
end
|
94
|
+
|
95
|
+
# individual RSS feed items
|
96
|
+
# If the number of stories parameter was not passed in, fetch all
|
97
|
+
if numberOfStories == 0
|
98
|
+
(doc/"item").each do |item|
|
99
|
+
rss += "<a href=\"" + (item/"link").inner_text + "\"><h2 class=\"govfeedHeading\">" + (item/"title").inner_text + "</h2></a>\n"
|
100
|
+
rss += "<div class=\"govfeedDiv\">" + (item/"pubDate").inner_text + "<br />" + self.truncate_words((item/"description").inner_text, 40) + "</div>\n"
|
101
|
+
end
|
102
|
+
else
|
103
|
+
counter = 0
|
104
|
+
(doc/"item").each do |item|
|
105
|
+
rss += item.inner_text
|
106
|
+
rss += "<a href=\"" + (item/"link").inner_text + "\"><h2 class=\"govfeedHeading\">" + (item/"title").inner_text + "</h2></a>\n"
|
107
|
+
rss += "<div class=\"govfeedDiv\">" + (item/"pubDate").inner_text + "<br />" + self.truncate_words((item/"description").inner_text, 40) + "</div>\n"
|
108
|
+
counter = counter + 1
|
109
|
+
if counter == numberOfStories
|
110
|
+
break
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
rss
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: govfeed
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Matthew Grigajtis
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-10-21 00:00:00 -04:00
|
14
|
+
default_executable:
|
15
|
+
dependencies: []
|
16
|
+
|
17
|
+
description: This gem grabs an RSS feed from a specified government agency.
|
18
|
+
email:
|
19
|
+
- matthew.grigajtis@gmail.com
|
20
|
+
executables: []
|
21
|
+
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files: []
|
25
|
+
|
26
|
+
files:
|
27
|
+
- .gitignore
|
28
|
+
- Gemfile
|
29
|
+
- Rakefile
|
30
|
+
- govfeed.gemspec
|
31
|
+
- lib/govfeed.rb
|
32
|
+
- lib/govfeed/version.rb
|
33
|
+
has_rdoc: true
|
34
|
+
homepage: ""
|
35
|
+
licenses: []
|
36
|
+
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "0"
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: "0"
|
54
|
+
requirements: []
|
55
|
+
|
56
|
+
rubyforge_project: govfeed
|
57
|
+
rubygems_version: 1.5.2
|
58
|
+
signing_key:
|
59
|
+
specification_version: 3
|
60
|
+
summary: Grab a government RSS feed.
|
61
|
+
test_files: []
|
62
|
+
|