just_inform 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a9d8c59057f16fd2e11ac1cc01088f690f1e508b
4
+ data.tar.gz: e7d8c5d339a28f4cc640f02eecd0ca0a5338f30f
5
+ SHA512:
6
+ metadata.gz: 6a0a174207bb7e1148180622225bcdba1ea821e54a127ef27e64bd3a1122eb00b06705ba2ccfdd4bb9d816815074b1152a68998be5da0e2feb9c4b2b8baaaa70
7
+ data.tar.gz: 8e812e6e68700d18d64bbd11ccb44ffe800d46a69249d3e8d27cefccbbe965a9e079ada9d2b63d19ca8c773f62b11bd1fa617efe79d2008b3becb8b1e4f1d2e1
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+
3
+ gem 'nokogiri', '~> 1.6.0'
@@ -0,0 +1,12 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ mini_portile (0.5.1)
5
+ nokogiri (1.6.0)
6
+ mini_portile (~> 0.5.0)
7
+
8
+ PLATFORMS
9
+ ruby
10
+
11
+ DEPENDENCIES
12
+ nokogiri (~> 1.6.0)
@@ -0,0 +1,22 @@
1
+ # MIT License
2
+
3
+ Copyright (c) 2013 [Office of Citizen Services and Innovative Technologies, U.S. General Services Administration (GSA-OCSIT)]
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
22
+ [Office of Citizen Services and Innovative Technologies, U.S. General Services Administration (GSA-OCSIT)]: https://github.com/GSA-OCSIT
@@ -0,0 +1,30 @@
1
+ # What Is Just Inform
2
+
3
+ Just Inform is a simple library to help parse the daily XML feed from [RegInfo.gov]. We needed a tool to help sort government forms based on burden hours, cost, and responses.
4
+
5
+ ## Basic Usage
6
+
7
+ #### Install the gem
8
+ gem install just_inform
9
+
10
+ #### Create a new parser instance
11
+ parser = JustInform.new
12
+
13
+ #### Return an array with the sort value (cost) and InformationCollectionRequest objects
14
+ parser.top(10, :cost) =>
15
+ [["33,672,000,000", U.S. Individual Income Tax Return], ["8,190,000,004", Rule 10b-10 Confirmation of Securities Transactions (17 C.F.R. 240.10b-10)], ["2,857,465,000", Exchange Act Form 10-K], ["2,727,479,226", America Invents Act Section 10 Patent Fee Adjustments], ["1,801,830,000", Application for a U.S. Passport], ["1,300,147,200", ASSIST Database], ["1,280,341,567", Form SD], ["910,000,000", Online Application for Nonimmigrant Visa], ["791,160,764", Rules for Patent Maintenance Fees], ["772,798,833", Initial Patent Applications]]
16
+
17
+ You can also sort by burden hours (:burden) and number of responses (:responses).
18
+
19
+ ## Todo
20
+
21
+ * Add tests with 100% coverage
22
+ * Add ActiveRecord like finder methods (e.g. Parser.find_by_title('Passport'))
23
+ * Build into a Sinatra/Rails app that creates an API/webpage exhibiting the topX forms in gov by burden hours, cost, and response numbers. This would be updated daily from the XML feed.
24
+
25
+ ## License
26
+ [MIT License]
27
+
28
+ [RegInfo.gov]: http://www.reginfo.gov
29
+ [MIT License]: https://github.com/GSA-OCSIT/just_inform/blob/master/LICENSE.md
30
+ [README]: https://github.com/GSA-OCSIT/just_inform/blob/master/README.md
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'just_inform/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'just_inform'
8
+ s.version = JustInform::VERSION
9
+ s.date = '2013-08-15'
10
+ s.authors = ["Justin Grevich"]
11
+ s.summary = "Reginfo.gov XML Forms Report Data Parser"
12
+ s.description = "A library to help parse the XML report published by Reginfo.gov (http://www.reginfo.gov/public/do/PRAXML)"
13
+ s.email = 'justin@grevi.ch'
14
+ s.files = [
15
+ "Gemfile",
16
+ "Gemfile.lock",
17
+ "just_inform.gemspec",
18
+ "LICENSE.md",
19
+ "README.md",
20
+ 'data',
21
+ "lib/just_inform.rb",
22
+ "lib/just_inform/downloader.rb",
23
+ "lib/just_inform/information_collection.rb",
24
+ "lib/just_inform/information_collection_request.rb",
25
+ "lib/just_inform/parser.rb",
26
+ "lib/just_inform/version.rb"]
27
+ s.homepage = 'https://github.com/GSA-OCSIT/just_inform'
28
+ s.license = 'MIT'
29
+ s.platform = Gem::Platform::RUBY
30
+ s.add_dependency('nokogiri', '~> 1.6.0')
31
+ s.add_development_dependency "bundler", "~> 1.3"
32
+ s.add_development_dependency "rake"
33
+ s.extra_rdoc_files = %w[README.md LICENSE.md]
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
37
+
38
+ end
@@ -0,0 +1,32 @@
1
+ module JustInform; end
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class Node
6
+
7
+ def inspect_attributes
8
+ [:name]
9
+ end
10
+
11
+ end
12
+ end
13
+ end
14
+
15
+ module Nokogiri
16
+ module XML
17
+
18
+ class Document < Nokogiri::XML::Node
19
+
20
+ def inspect_attributes
21
+ [:name, :children]
22
+ end
23
+
24
+ end
25
+ end
26
+ end
27
+
28
+ require 'just_inform/downloader'
29
+ require 'just_inform/information_collection'
30
+ require 'just_inform/information_collection_request'
31
+ require 'just_inform/parser'
32
+ require 'just_inform/version'
@@ -0,0 +1,28 @@
1
+ module JustInform
2
+ class Downloader
3
+ require 'open-uri' unless defined?(OpenURI)
4
+
5
+ def self.get_latest
6
+ if self.current_file?
7
+ puts "Using downloaded file"
8
+ latest = File.open(File.join 'data', self.current_filename)
9
+ else
10
+ puts 'Downloading file, this could take a minute...'
11
+ latest = File.open(File.join('data',self.current_filename), 'wb') {|file| file << open('http://www.reginfo.gov/public/do/PRAXML?type=inventory').read}
12
+ latest = File.open(File.join 'data', self.current_filename)
13
+ end
14
+ end
15
+
16
+ private
17
+
18
+ def self.current_file?
19
+ true if File.exists?(File.join 'data', self.current_filename)
20
+ end
21
+
22
+ def self.current_filename
23
+ filename = Time.now.strftime("%Y.%m.%d-CurrentInventoryReport.xml")
24
+ end
25
+
26
+ end
27
+ end
28
+
@@ -0,0 +1,11 @@
1
+ module JustInform
2
+ class InformationCollection
3
+ attr_accessor :burden_hours, :cost, :file_name, :form_name, :raw_data, :parent_id, :responses, :title
4
+
5
+ def initialize(data)
6
+ parent_id = data.at_css('ICRReferenceNumber')
7
+ @raw_data, @parent_id = data, parent_id
8
+ end
9
+
10
+ end
11
+ end
@@ -0,0 +1,50 @@
1
+ module JustInform
2
+ class InformationCollectionRequest
3
+ attr_accessor :raw_data
4
+
5
+ def inspect
6
+ title
7
+ end
8
+
9
+ def initialize(data)
10
+ @raw_data = data
11
+ end
12
+
13
+ def abstract
14
+ @raw_data.at_css('Abstract').content
15
+ end
16
+
17
+ def agency_code
18
+ @raw_data.at_css('AgencyCode').content
19
+ end
20
+
21
+ def icr_reference_number
22
+ @raw_data.at_css('ICRReferenceNumber').content
23
+ end
24
+
25
+ def omb_control_number
26
+ @raw_data.at_css('OMBControlNumber').content
27
+ end
28
+
29
+ def title
30
+ @raw_data.at_css('Title').content
31
+ end
32
+
33
+ def total_burden_hours
34
+ @raw_data.at_css('Burden BurdenHour TotalQuantity').content.to_i
35
+ end
36
+ alias_method :burden_hours, :total_burden_hours
37
+ alias_method :burden, :total_burden_hours
38
+
39
+ def total_cost
40
+ @raw_data.at_css('Burden BurdenCost TotalAmount').content.to_i
41
+ end
42
+ alias_method :cost, :total_cost
43
+
44
+ def total_responses
45
+ @raw_data.at_css('Burden BurdenResponse TotalQuantity').content.to_i
46
+ end
47
+ alias_method :responses, :total_responses
48
+
49
+ end
50
+ end
@@ -0,0 +1,66 @@
1
+ module JustInform
2
+ class Parser
3
+ require 'nokogiri' unless defined?(Nokogiri)
4
+
5
+ attr_accessor :doc
6
+
7
+ def initialize
8
+ load
9
+ end
10
+
11
+ # Show top 10 forms, PRA.top(sort_by_symbol, number_of_results)
12
+ # parser.top(:burden,2) => [<Nokogiri::XML::Element>, <Nokogiri::XML::Element>]
13
+ def top(limit=10, attrib_name=:burden_hours)
14
+ @topx = []
15
+ @topx_size = limit
16
+ sort_method = attrib_name.to_sym
17
+
18
+ reports.each_with_index do |report, index|
19
+ if @topx.last
20
+ add_report_to_topx(report.send(sort_method), index) if report.send(sort_method) > @topx.last[0]
21
+ else
22
+ @topx << [report.send(sort_method), reports[index]]
23
+ end
24
+ end
25
+
26
+ @topx.map! {|x| [number_to_delimited(x[0]), x[1]]}
27
+ end
28
+
29
+ def reports(doc=@doc)
30
+ reports = []
31
+
32
+ doc.xpath('//InformationCollectionRequest').map {|r| reports << InformationCollectionRequest.new(r) }
33
+ reports
34
+ end
35
+
36
+ private
37
+
38
+ # pretify number results
39
+ def number_to_delimited(number)
40
+ number.to_s.gsub!(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
41
+ end
42
+
43
+ # add report search value (e.g. value of burden hours, responses, or cost) and index to @topx array
44
+ def add_report_to_topx(sort_value, index)
45
+ (0..@topx_size).map do |n|
46
+ if sort_value > @topx[n][0]
47
+ @topx.insert(n, [sort_value, reports[index]])
48
+ break
49
+ else
50
+ next
51
+ end
52
+ end
53
+
54
+ @topx = @topx[0..(@topx_size-1)]
55
+ end
56
+
57
+ # load XML data into Nokogiri and set @doc instance variable
58
+ def load
59
+ puts 'Loading >50 MB of XML data and parsing with Nokogiri, this could take awhile (e.g. 2-3m)'
60
+ data = Downloader.get_latest
61
+ @doc = Nokogiri::XML(data)
62
+ end
63
+
64
+
65
+ end
66
+ end
@@ -0,0 +1,3 @@
1
+ module JustInform
2
+ VERSION = "0.0.2"
3
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: just_inform
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Justin Grevich
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-08-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ~>
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: A library to help parse the XML report published by Reginfo.gov (http://www.reginfo.gov/public/do/PRAXML)
56
+ email: justin@grevi.ch
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files:
60
+ - README.md
61
+ - LICENSE.md
62
+ files:
63
+ - Gemfile
64
+ - Gemfile.lock
65
+ - just_inform.gemspec
66
+ - LICENSE.md
67
+ - README.md
68
+ - lib/just_inform.rb
69
+ - lib/just_inform/downloader.rb
70
+ - lib/just_inform/information_collection.rb
71
+ - lib/just_inform/information_collection_request.rb
72
+ - lib/just_inform/parser.rb
73
+ - lib/just_inform/version.rb
74
+ homepage: https://github.com/GSA-OCSIT/just_inform
75
+ licenses:
76
+ - MIT
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options:
80
+ - --charset=UTF-8
81
+ require_paths:
82
+ - lib
83
+ required_ruby_version: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - '>='
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ requirements: []
94
+ rubyforge_project:
95
+ rubygems_version: 2.0.5
96
+ signing_key:
97
+ specification_version: 4
98
+ summary: Reginfo.gov XML Forms Report Data Parser
99
+ test_files: []