just_inform 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a9d8c59057f16fd2e11ac1cc01088f690f1e508b
4
+ data.tar.gz: e7d8c5d339a28f4cc640f02eecd0ca0a5338f30f
5
+ SHA512:
6
+ metadata.gz: 6a0a174207bb7e1148180622225bcdba1ea821e54a127ef27e64bd3a1122eb00b06705ba2ccfdd4bb9d816815074b1152a68998be5da0e2feb9c4b2b8baaaa70
7
+ data.tar.gz: 8e812e6e68700d18d64bbd11ccb44ffe800d46a69249d3e8d27cefccbbe965a9e079ada9d2b63d19ca8c773f62b11bd1fa617efe79d2008b3becb8b1e4f1d2e1
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gem 'nokogiri', '~> 1.6.0'
@@ -0,0 +1,12 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ mini_portile (0.5.1)
5
+ nokogiri (1.6.0)
6
+ mini_portile (~> 0.5.0)
7
+
8
+ PLATFORMS
9
+ ruby
10
+
11
+ DEPENDENCIES
12
+ nokogiri (~> 1.6.0)
@@ -0,0 +1,22 @@
1
+ # MIT License
2
+
3
+ Copyright (c) 2013 [Office of Citizen Services and Innovative Technologies, U.S. General Services Administration (GSA-OCSIT)]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ [Office of Citizen Services and Innovative Technologies, U.S. General Services Administration (GSA-OCSIT)]: https://github.com/GSA-OCSIT
@@ -0,0 +1,30 @@
1
+ # What Is Just Inform
2
+
3
+ Just Inform is a simple library to help parse the daily XML feed from [RegInfo.gov]. We needed a tool to help sort government forms based on burden hours, cost, and responses.
4
+
5
+ ## Basic Usage
6
+
7
+ #### Install the gem
8
+ gem install just_inform
9
+
10
+ #### Create a new parser instance
11
+ parser = JustInform.new
12
+
13
+ #### Return an array with the sort value (cost) and InformationCollectionRequest objects
14
+ parser.top(10, :cost) =>
15
+ [["33,672,000,000", U.S. Individual Income Tax Return], ["8,190,000,004", Rule 10b-10 Confirmation of Securities Transactions (17 C.F.R. 240.10b-10)], ["2,857,465,000", Exchange Act Form 10-K], ["2,727,479,226", America Invents Act Section 10 Patent Fee Adjustments], ["1,801,830,000", Application for a U.S. Passport], ["1,300,147,200", ASSIST Database], ["1,280,341,567", Form SD], ["910,000,000", Online Application for Nonimmigrant Visa], ["791,160,764", Rules for Patent Maintenance Fees], ["772,798,833", Initial Patent Applications]]
16
+
17
+ You can also sort by burden hours (:burden) and number of responses (:responses).
18
+
19
+ ## Todo
20
+
21
+ * Add tests with 100% coverage
22
+ * Add ActiveRecord like finder methods (e.g. Parser.find_by_title('Passport'))
23
+ * Build into a Sinatra/Rails app that creates an API/webpage exhibiting the topX forms in gov by burden hours, cost, and response numbers. This would be updated daily from the XML feed.
24
+
25
+ ## License
26
+ [MIT License]
27
+
28
+ [RegInfo.gov]: http://www.reginfo.gov
29
+ [MIT License]: https://github.com/GSA-OCSIT/just_inform/blob/master/LICENSE.md
30
+ [README]: https://github.com/GSA-OCSIT/just_inform/blob/master/README.md
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'just_inform/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'just_inform'
8
+ s.version = JustInform::VERSION
9
+ s.date = '2013-08-15'
10
+ s.authors = ["Justin Grevich"]
11
+ s.summary = "Reginfo.gov XML Forms Report Data Parser"
12
+ s.description = "A library to help parse the XML report published by Reginfo.gov (http://www.reginfo.gov/public/do/PRAXML)"
13
+ s.email = 'justin@grevi.ch'
14
+ s.files = [
15
+ "Gemfile",
16
+ "Gemfile.lock",
17
+ "just_inform.gemspec",
18
+ "LICENSE.md",
19
+ "README.md",
20
+ 'data',
21
+ "lib/just_inform.rb",
22
+ "lib/just_inform/downloader.rb",
23
+ "lib/just_inform/information_collection.rb",
24
+ "lib/just_inform/information_collection_request.rb",
25
+ "lib/just_inform/parser.rb",
26
+ "lib/just_inform/version.rb"]
27
+ s.homepage = 'https://github.com/GSA-OCSIT/just_inform'
28
+ s.license = 'MIT'
29
+ s.platform = Gem::Platform::RUBY
30
+ s.add_dependency('nokogiri', '~> 1.6.0')
31
+ s.add_development_dependency "bundler", "~> 1.3"
32
+ s.add_development_dependency "rake"
33
+ s.extra_rdoc_files = %w[README.md LICENSE.md]
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
37
+
38
+ end
@@ -0,0 +1,32 @@
1
+ module JustInform; end
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class Node
6
+
7
+ def inspect_attributes
8
+ [:name]
9
+ end
10
+
11
+ end
12
+ end
13
+ end
14
+
15
+ module Nokogiri
16
+ module XML
17
+
18
+ class Document < Nokogiri::XML::Node
19
+
20
+ def inspect_attributes
21
+ [:name, :children]
22
+ end
23
+
24
+ end
25
+ end
26
+ end
27
+
28
+ require 'just_inform/downloader'
29
+ require 'just_inform/information_collection'
30
+ require 'just_inform/information_collection_request'
31
+ require 'just_inform/parser'
32
+ require 'just_inform/version'
@@ -0,0 +1,28 @@
1
+ module JustInform
2
+ class Downloader
3
+ require 'open-uri' unless defined?(OpenURI)
4
+
5
+ def self.get_latest
6
+ if self.current_file?
7
+ puts "Using downloaded file"
8
+ latest = File.open(File.join 'data', self.current_filename)
9
+ else
10
+ puts 'Downloading file, this could take a minute...'
11
+ latest = File.open(File.join('data',self.current_filename), 'wb') {|file| file << open('http://www.reginfo.gov/public/do/PRAXML?type=inventory').read}
12
+ latest = File.open(File.join 'data', self.current_filename)
13
+ end
14
+ end
15
+
16
+ private
17
+
18
+ def self.current_file?
19
+ true if File.exists?(File.join 'data', self.current_filename)
20
+ end
21
+
22
+ def self.current_filename
23
+ filename = Time.now.strftime("%Y.%m.%d-CurrentInventoryReport.xml")
24
+ end
25
+
26
+ end
27
+ end
28
+
@@ -0,0 +1,11 @@
1
+ module JustInform
2
+ class InformationCollection
3
+ attr_accessor :burden_hours, :cost, :file_name, :form_name, :raw_data, :parent_id, :responses, :title
4
+
5
+ def initialize(data)
6
+ parent_id = data.at_css('ICRReferenceNumber')
7
+ @raw_data, @parent_id = data, parent_id
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,50 @@
1
+ module JustInform
2
+ class InformationCollectionRequest
3
+ attr_accessor :raw_data
4
+
5
+ def inspect
6
+ title
7
+ end
8
+
9
+ def initialize(data)
10
+ @raw_data = data
11
+ end
12
+
13
+ def abstract
14
+ @raw_data.at_css('Abstract').content
15
+ end
16
+
17
+ def agency_code
18
+ @raw_data.at_css('AgencyCode').content
19
+ end
20
+
21
+ def icr_reference_number
22
+ @raw_data.at_css('ICRReferenceNumber').content
23
+ end
24
+
25
+ def omb_control_number
26
+ @raw_data.at_css('OMBControlNumber').content
27
+ end
28
+
29
+ def title
30
+ @raw_data.at_css('Title').content
31
+ end
32
+
33
+ def total_burden_hours
34
+ @raw_data.at_css('Burden BurdenHour TotalQuantity').content.to_i
35
+ end
36
+ alias_method :burden_hours, :total_burden_hours
37
+ alias_method :burden, :total_burden_hours
38
+
39
+ def total_cost
40
+ @raw_data.at_css('Burden BurdenCost TotalAmount').content.to_i
41
+ end
42
+ alias_method :cost, :total_cost
43
+
44
+ def total_responses
45
+ @raw_data.at_css('Burden BurdenResponse TotalQuantity').content.to_i
46
+ end
47
+ alias_method :responses, :total_responses
48
+
49
+ end
50
+ end
@@ -0,0 +1,66 @@
1
+ module JustInform
2
+ class Parser
3
+ require 'nokogiri' unless defined?(Nokogiri)
4
+
5
+ attr_accessor :doc
6
+
7
+ def initialize
8
+ load
9
+ end
10
+
11
+ # Show top 10 forms, PRA.top(sort_by_symbol, number_of_results)
12
+ # parser.top(:burden,2) => [<Nokogiri::XML::Element>, <Nokogiri::XML::Element>]
13
+ def top(limit=10, attrib_name=:burden_hours)
14
+ @topx = []
15
+ @topx_size = limit
16
+ sort_method = attrib_name.to_sym
17
+
18
+ reports.each_with_index do |report, index|
19
+ if @topx.last
20
+ add_report_to_topx(report.send(sort_method), index) if report.send(sort_method) > @topx.last[0]
21
+ else
22
+ @topx << [report.send(sort_method), reports[index]]
23
+ end
24
+ end
25
+
26
+ @topx.map! {|x| [number_to_delimited(x[0]), x[1]]}
27
+ end
28
+
29
+ def reports(doc=@doc)
30
+ reports = []
31
+
32
+ doc.xpath('//InformationCollectionRequest').map {|r| reports << InformationCollectionRequest.new(r) }
33
+ reports
34
+ end
35
+
36
+ private
37
+
38
+ # pretify number results
39
+ def number_to_delimited(number)
40
+ number.to_s.gsub!(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
41
+ end
42
+
43
+ # add report search value (e.g. value of burden hours, responses, or cost) and index to @topx array
44
+ def add_report_to_topx(sort_value, index)
45
+ (0..@topx_size).map do |n|
46
+ if sort_value > @topx[n][0]
47
+ @topx.insert(n, [sort_value, reports[index]])
48
+ break
49
+ else
50
+ next
51
+ end
52
+ end
53
+
54
+ @topx = @topx[0..(@topx_size-1)]
55
+ end
56
+
57
+ # load XML data into Nokogiri and set @doc instance variable
58
+ def load
59
+ puts 'Loading >50 MB of XML data and parsing with Nokogiri, this could take awhile (e.g. 2-3m)'
60
+ data = Downloader.get_latest
61
+ @doc = Nokogiri::XML(data)
62
+ end
63
+
64
+
65
+ end
66
+ end
@@ -0,0 +1,3 @@
1
+ module JustInform
2
+ VERSION = "0.0.2"
3
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: just_inform
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Justin Grevich
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-08-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: A library to help parse the XML report published by Reginfo.gov (http://www.reginfo.gov/public/do/PRAXML)
56
+ email: justin@grevi.ch
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files:
60
+ - README.md
61
+ - LICENSE.md
62
+ files:
63
+ - Gemfile
64
+ - Gemfile.lock
65
+ - just_inform.gemspec
66
+ - LICENSE.md
67
+ - README.md
68
+ - lib/just_inform.rb
69
+ - lib/just_inform/downloader.rb
70
+ - lib/just_inform/information_collection.rb
71
+ - lib/just_inform/information_collection_request.rb
72
+ - lib/just_inform/parser.rb
73
+ - lib/just_inform/version.rb
74
+ homepage: https://github.com/GSA-OCSIT/just_inform
75
+ licenses:
76
+ - MIT
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options:
80
+ - --charset=UTF-8
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - '>='
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 2.0.5
96
+ signing_key:
97
+ specification_version: 4
98
+ summary: Reginfo.gov XML Forms Report Data Parser
99
+ test_files: []