just_inform 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +12 -0
- data/LICENSE.md +22 -0
- data/README.md +30 -0
- data/just_inform.gemspec +38 -0
- data/lib/just_inform.rb +32 -0
- data/lib/just_inform/downloader.rb +28 -0
- data/lib/just_inform/information_collection.rb +11 -0
- data/lib/just_inform/information_collection_request.rb +50 -0
- data/lib/just_inform/parser.rb +66 -0
- data/lib/just_inform/version.rb +3 -0
- metadata +99 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: a9d8c59057f16fd2e11ac1cc01088f690f1e508b
|
4
|
+
data.tar.gz: e7d8c5d339a28f4cc640f02eecd0ca0a5338f30f
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6a0a174207bb7e1148180622225bcdba1ea821e54a127ef27e64bd3a1122eb00b06705ba2ccfdd4bb9d816815074b1152a68998be5da0e2feb9c4b2b8baaaa70
|
7
|
+
data.tar.gz: 8e812e6e68700d18d64bbd11ccb44ffe800d46a69249d3e8d27cefccbbe965a9e079ada9d2b63d19ca8c773f62b11bd1fa617efe79d2008b3becb8b1e4f1d2e1
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/LICENSE.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2013 [Office of Citizen Services and Innovative Technologies, U.S. General Services Administration (GSA-OCSIT)]
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
21
|
+
|
22
|
+
[Office of Citizen Services and Innovative Technologies, U.S. General Services Administration (GSA-OCSIT)]: https://github.com/GSA-OCSIT
|
data/README.md
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# What Is Just Inform
|
2
|
+
|
3
|
+
Just Inform is a simple library to help parse the daily XML feed from [RegInfo.gov]. We needed a tool to help sort government forms based on burden hours, cost, and responses.
|
4
|
+
|
5
|
+
## Basic Usage
|
6
|
+
|
7
|
+
#### Install the gem
|
8
|
+
gem install just_inform
|
9
|
+
|
10
|
+
#### Create a new parser instance
|
11
|
+
parser = JustInform.new
|
12
|
+
|
13
|
+
#### Return an array with the sort value (cost) and InformationCollectionRequest objects
|
14
|
+
parser.top(10, :cost) =>
|
15
|
+
[["33,672,000,000", U.S. Individual Income Tax Return], ["8,190,000,004", Rule 10b-10 Confirmation of Securities Transactions (17 C.F.R. 240.10b-10)], ["2,857,465,000", Exchange Act Form 10-K], ["2,727,479,226", America Invents Act Section 10 Patent Fee Adjustments], ["1,801,830,000", Application for a U.S. Passport], ["1,300,147,200", ASSIST Database], ["1,280,341,567", Form SD], ["910,000,000", Online Application for Nonimmigrant Visa], ["791,160,764", Rules for Patent Maintenance Fees], ["772,798,833", Initial Patent Applications]]
|
16
|
+
|
17
|
+
You can also sort by burden hours (:burden) and number of responses (:responses).
|
18
|
+
|
19
|
+
## Todo
|
20
|
+
|
21
|
+
* Add tests with 100% coverage
|
22
|
+
* Add ActiveRecord like finder methods (e.g. Parser.find_by_title('Passport'))
|
23
|
+
* Build into a Sinatra/Rails app that creates an API/webpage exhibiting the topX forms in gov by burden hours, cost, and response numbers. This would be updated daily from the XML feed.
|
24
|
+
|
25
|
+
## License
|
26
|
+
[MIT License]
|
27
|
+
|
28
|
+
[RegInfo.gov]: http://www.reginfo.gov
|
29
|
+
[MIT License]: https://github.com/GSA-OCSIT/just_inform/blob/master/LICENSE.md
|
30
|
+
[README]: https://github.com/GSA-OCSIT/just_inform/blob/master/README.md
|
data/just_inform.gemspec
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'just_inform/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = 'just_inform'
|
8
|
+
s.version = JustInform::VERSION
|
9
|
+
s.date = '2013-08-15'
|
10
|
+
s.authors = ["Justin Grevich"]
|
11
|
+
s.summary = "Reginfo.gov XML Forms Report Data Parser"
|
12
|
+
s.description = "A library to help parse the XML report published by Reginfo.gov (http://www.reginfo.gov/public/do/PRAXML)"
|
13
|
+
s.email = 'justin@grevi.ch'
|
14
|
+
s.files = [
|
15
|
+
"Gemfile",
|
16
|
+
"Gemfile.lock",
|
17
|
+
"just_inform.gemspec",
|
18
|
+
"LICENSE.md",
|
19
|
+
"README.md",
|
20
|
+
'data',
|
21
|
+
"lib/just_inform.rb",
|
22
|
+
"lib/just_inform/downloader.rb",
|
23
|
+
"lib/just_inform/information_collection.rb",
|
24
|
+
"lib/just_inform/information_collection_request.rb",
|
25
|
+
"lib/just_inform/parser.rb",
|
26
|
+
"lib/just_inform/version.rb"]
|
27
|
+
s.homepage = 'https://github.com/GSA-OCSIT/just_inform'
|
28
|
+
s.license = 'MIT'
|
29
|
+
s.platform = Gem::Platform::RUBY
|
30
|
+
s.add_dependency('nokogiri', '~> 1.6.0')
|
31
|
+
s.add_development_dependency "bundler", "~> 1.3"
|
32
|
+
s.add_development_dependency "rake"
|
33
|
+
s.extra_rdoc_files = %w[README.md LICENSE.md]
|
34
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
35
|
+
s.require_paths = ["lib"]
|
36
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
37
|
+
|
38
|
+
end
|
data/lib/just_inform.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module JustInform; end
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
class Node
|
6
|
+
|
7
|
+
def inspect_attributes
|
8
|
+
[:name]
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module Nokogiri
|
16
|
+
module XML
|
17
|
+
|
18
|
+
class Document < Nokogiri::XML::Node
|
19
|
+
|
20
|
+
def inspect_attributes
|
21
|
+
[:name, :children]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'just_inform/downloader'
|
29
|
+
require 'just_inform/information_collection'
|
30
|
+
require 'just_inform/information_collection_request'
|
31
|
+
require 'just_inform/parser'
|
32
|
+
require 'just_inform/version'
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module JustInform
|
2
|
+
class Downloader
|
3
|
+
require 'open-uri' unless defined?(OpenURI)
|
4
|
+
|
5
|
+
def self.get_latest
|
6
|
+
if self.current_file?
|
7
|
+
puts "Using downloaded file"
|
8
|
+
latest = File.open(File.join 'data', self.current_filename)
|
9
|
+
else
|
10
|
+
puts 'Downloading file, this could take a minute...'
|
11
|
+
latest = File.open(File.join('data',self.current_filename), 'wb') {|file| file << open('http://www.reginfo.gov/public/do/PRAXML?type=inventory').read}
|
12
|
+
latest = File.open(File.join 'data', self.current_filename)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def self.current_file?
|
19
|
+
true if File.exists?(File.join 'data', self.current_filename)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.current_filename
|
23
|
+
filename = Time.now.strftime("%Y.%m.%d-CurrentInventoryReport.xml")
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
module JustInform
|
2
|
+
class InformationCollection
|
3
|
+
attr_accessor :burden_hours, :cost, :file_name, :form_name, :raw_data, :parent_id, :responses, :title
|
4
|
+
|
5
|
+
def initialize(data)
|
6
|
+
parent_id = data.at_css('ICRReferenceNumber')
|
7
|
+
@raw_data, @parent_id = data, parent_id
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module JustInform
|
2
|
+
class InformationCollectionRequest
|
3
|
+
attr_accessor :raw_data
|
4
|
+
|
5
|
+
def inspect
|
6
|
+
title
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(data)
|
10
|
+
@raw_data = data
|
11
|
+
end
|
12
|
+
|
13
|
+
def abstract
|
14
|
+
@raw_data.at_css('Abstract').content
|
15
|
+
end
|
16
|
+
|
17
|
+
def agency_code
|
18
|
+
@raw_data.at_css('AgencyCode').content
|
19
|
+
end
|
20
|
+
|
21
|
+
def icr_reference_number
|
22
|
+
@raw_data.at_css('ICRReferenceNumber').content
|
23
|
+
end
|
24
|
+
|
25
|
+
def omb_control_number
|
26
|
+
@raw_data.at_css('OMBControlNumber').content
|
27
|
+
end
|
28
|
+
|
29
|
+
def title
|
30
|
+
@raw_data.at_css('Title').content
|
31
|
+
end
|
32
|
+
|
33
|
+
def total_burden_hours
|
34
|
+
@raw_data.at_css('Burden BurdenHour TotalQuantity').content.to_i
|
35
|
+
end
|
36
|
+
alias_method :burden_hours, :total_burden_hours
|
37
|
+
alias_method :burden, :total_burden_hours
|
38
|
+
|
39
|
+
def total_cost
|
40
|
+
@raw_data.at_css('Burden BurdenCost TotalAmount').content.to_i
|
41
|
+
end
|
42
|
+
alias_method :cost, :total_cost
|
43
|
+
|
44
|
+
def total_responses
|
45
|
+
@raw_data.at_css('Burden BurdenResponse TotalQuantity').content.to_i
|
46
|
+
end
|
47
|
+
alias_method :responses, :total_responses
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module JustInform
|
2
|
+
class Parser
|
3
|
+
require 'nokogiri' unless defined?(Nokogiri)
|
4
|
+
|
5
|
+
attr_accessor :doc
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
load
|
9
|
+
end
|
10
|
+
|
11
|
+
# Show top 10 forms, PRA.top(sort_by_symbol, number_of_results)
|
12
|
+
# parser.top(:burden,2) => [<Nokogiri::XML::Element>, <Nokogiri::XML::Element>]
|
13
|
+
def top(limit=10, attrib_name=:burden_hours)
|
14
|
+
@topx = []
|
15
|
+
@topx_size = limit
|
16
|
+
sort_method = attrib_name.to_sym
|
17
|
+
|
18
|
+
reports.each_with_index do |report, index|
|
19
|
+
if @topx.last
|
20
|
+
add_report_to_topx(report.send(sort_method), index) if report.send(sort_method) > @topx.last[0]
|
21
|
+
else
|
22
|
+
@topx << [report.send(sort_method), reports[index]]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
@topx.map! {|x| [number_to_delimited(x[0]), x[1]]}
|
27
|
+
end
|
28
|
+
|
29
|
+
def reports(doc=@doc)
|
30
|
+
reports = []
|
31
|
+
|
32
|
+
doc.xpath('//InformationCollectionRequest').map {|r| reports << InformationCollectionRequest.new(r) }
|
33
|
+
reports
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
# pretify number results
|
39
|
+
def number_to_delimited(number)
|
40
|
+
number.to_s.gsub!(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1,")
|
41
|
+
end
|
42
|
+
|
43
|
+
# add report search value (e.g. value of burden hours, responses, or cost) and index to @topx array
|
44
|
+
def add_report_to_topx(sort_value, index)
|
45
|
+
(0..@topx_size).map do |n|
|
46
|
+
if sort_value > @topx[n][0]
|
47
|
+
@topx.insert(n, [sort_value, reports[index]])
|
48
|
+
break
|
49
|
+
else
|
50
|
+
next
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
@topx = @topx[0..(@topx_size-1)]
|
55
|
+
end
|
56
|
+
|
57
|
+
# load XML data into Nokogiri and set @doc instance variable
|
58
|
+
def load
|
59
|
+
puts 'Loading >50 MB of XML data and parsing with Nokogiri, this could take awhile (e.g. 2-3m)'
|
60
|
+
data = Downloader.get_latest
|
61
|
+
@doc = Nokogiri::XML(data)
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: just_inform
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Justin Grevich
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-08-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.6.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.6.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: bundler
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.3'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: A library to help parse the XML report published by Reginfo.gov (http://www.reginfo.gov/public/do/PRAXML)
|
56
|
+
email: justin@grevi.ch
|
57
|
+
executables: []
|
58
|
+
extensions: []
|
59
|
+
extra_rdoc_files:
|
60
|
+
- README.md
|
61
|
+
- LICENSE.md
|
62
|
+
files:
|
63
|
+
- Gemfile
|
64
|
+
- Gemfile.lock
|
65
|
+
- just_inform.gemspec
|
66
|
+
- LICENSE.md
|
67
|
+
- README.md
|
68
|
+
- lib/just_inform.rb
|
69
|
+
- lib/just_inform/downloader.rb
|
70
|
+
- lib/just_inform/information_collection.rb
|
71
|
+
- lib/just_inform/information_collection_request.rb
|
72
|
+
- lib/just_inform/parser.rb
|
73
|
+
- lib/just_inform/version.rb
|
74
|
+
homepage: https://github.com/GSA-OCSIT/just_inform
|
75
|
+
licenses:
|
76
|
+
- MIT
|
77
|
+
metadata: {}
|
78
|
+
post_install_message:
|
79
|
+
rdoc_options:
|
80
|
+
- --charset=UTF-8
|
81
|
+
require_paths:
|
82
|
+
- lib
|
83
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
84
|
+
requirements:
|
85
|
+
- - '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
requirements: []
|
94
|
+
rubyforge_project:
|
95
|
+
rubygems_version: 2.0.5
|
96
|
+
signing_key:
|
97
|
+
specification_version: 4
|
98
|
+
summary: Reginfo.gov XML Forms Report Data Parser
|
99
|
+
test_files: []
|