just_inform 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -9
- data/just_inform.gemspec +0 -1
- data/lib/just_inform.rb +3 -24
- data/lib/just_inform/downloader.rb +32 -11
- data/lib/just_inform/parser.rb +6 -3
- data/lib/just_inform/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec2675e3a079ced6653676bdea664378f0e7bb1e
|
4
|
+
data.tar.gz: 3e68625e479cf8959c347d4b1bfe777ea64f2c3e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4726db2cd02fcb8f5b0cc698b2cfe05e6f35ad43504828d6393481d4b47fe6088a411190ebdc70be3c1efe8d034e51fdf86f6b2ef2c44074daf4969075743bd7
|
7
|
+
data.tar.gz: bd4bfa4acbb2a9271b296c037458adcc58215d6ef7465c16ddf3bba7e6f5265000c8757e3903732be0c85700cd439cda3b08b6c014de28f1dc632225e9ed4bac
|
data/README.md
CHANGED
@@ -4,15 +4,18 @@ Just Inform is a simple library to help parse the daily XML feed from [RegInfo.g
|
|
4
4
|
|
5
5
|
## Basic Usage
|
6
6
|
|
7
|
-
|
8
|
-
gem install just_inform
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
parser
|
15
|
-
|
7
|
+
# Install the gem
|
8
|
+
gem install just_inform
|
9
|
+
|
10
|
+
# Load IRB
|
11
|
+
irb
|
12
|
+
|
13
|
+
# Create a new parser instance (same as JustInform::Parser.new)
|
14
|
+
parser = JustInform.new
|
15
|
+
|
16
|
+
# Return an array with the sort value (cost) and InformationCollectionRequest objects
|
17
|
+
parser.top(10, :cost) =>
|
18
|
+
[["33,672,000,000", U.S. Individual Income Tax Return], ["8,190,000,004", Rule 10b-10 Confirmation of Securities Transactions (17 C.F.R. 240.10b-10)], ["2,857,465,000", Exchange Act Form 10-K], ["2,727,479,226", America Invents Act Section 10 Patent Fee Adjustments], ["1,801,830,000", Application for a U.S. Passport], ["1,300,147,200", ASSIST Database], ["1,280,341,567", Form SD], ["910,000,000", Online Application for Nonimmigrant Visa], ["791,160,764", Rules for Patent Maintenance Fees], ["772,798,833", Initial Patent Applications]]
|
16
19
|
|
17
20
|
You can also sort by burden hours (:burden) and number of responses (:responses).
|
18
21
|
|
data/just_inform.gemspec
CHANGED
data/lib/just_inform.rb
CHANGED
@@ -1,27 +1,6 @@
|
|
1
|
-
module JustInform
|
2
|
-
|
3
|
-
|
4
|
-
module XML
|
5
|
-
class Node
|
6
|
-
|
7
|
-
def inspect_attributes
|
8
|
-
[:name]
|
9
|
-
end
|
10
|
-
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
module Nokogiri
|
16
|
-
module XML
|
17
|
-
|
18
|
-
class Document < Nokogiri::XML::Node
|
19
|
-
|
20
|
-
def inspect_attributes
|
21
|
-
[:name, :children]
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
1
|
+
module JustInform
|
2
|
+
def self.new
|
3
|
+
Parser.new
|
25
4
|
end
|
26
5
|
end
|
27
6
|
|
@@ -1,27 +1,48 @@
|
|
1
1
|
module JustInform
|
2
2
|
class Downloader
|
3
|
+
require 'tempfile' unless defined?(Tempfile)
|
3
4
|
require 'fileutils' unless defined?(FileUtils)
|
4
|
-
require '
|
5
|
+
require 'net/http' unless defined?(Net::HTTP)
|
5
6
|
|
6
7
|
def self.get_latest
|
7
8
|
if self.current_file?
|
8
|
-
puts "Using downloaded file"
|
9
|
-
|
9
|
+
puts "Using downloaded file at #{File.join('/tmp', self.current_filename)} and parsing with Nokogiri"
|
10
|
+
|
11
|
+
latest = File.open(File.join('/tmp', self.current_filename), 'r')
|
10
12
|
else
|
11
|
-
|
12
|
-
|
13
|
-
|
13
|
+
thread = download
|
14
|
+
|
15
|
+
print "%.2f%% Complete\r" % thread[:progress].to_f until thread.join 1
|
16
|
+
latest = File.open(File.join('/tmp', self.current_filename), 'r')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.download
|
21
|
+
Thread.new do
|
22
|
+
thread = Thread.current
|
23
|
+
body = thread[:body] = []
|
24
|
+
|
25
|
+
file = File.open(File.join('/tmp',self.current_filename), 'w+b')
|
26
|
+
|
27
|
+
Net::HTTP.new('www.reginfo.gov').request_get('/public/do/PRAXML?type=inventory') do |response|
|
28
|
+
length = thread[:length] = response['Content-Length'].to_i
|
29
|
+
file_size_in_mb = response['Content-Length'].to_i/1024/1024
|
30
|
+
puts "Downloading ~#{file_size_in_mb} MB of XML data and parsing with Nokogiri. This may take a few minutes depending on your processor and internet connection"
|
31
|
+
|
32
|
+
response.read_body do |fragment|
|
33
|
+
file.write(fragment)
|
34
|
+
thread[:done] = (thread[:done] || 0) + fragment.length
|
35
|
+
thread[:progress] = thread[:done].quo(length) * 100
|
36
|
+
end
|
37
|
+
file.close
|
38
|
+
end
|
14
39
|
end
|
15
40
|
end
|
16
41
|
|
17
42
|
private
|
18
43
|
|
19
44
|
def self.current_file?
|
20
|
-
|
21
|
-
unless File.directory?(dir)
|
22
|
-
FileUtils.mkdir_p(dir)
|
23
|
-
end
|
24
|
-
true if File.exists?(File.join dir, self.current_filename)
|
45
|
+
true if File.exists?(File.join('/tmp', self.current_filename))
|
25
46
|
end
|
26
47
|
|
27
48
|
def self.current_filename
|
data/lib/just_inform/parser.rb
CHANGED
@@ -1,13 +1,17 @@
|
|
1
1
|
module JustInform
|
2
2
|
class Parser
|
3
3
|
require 'nokogiri' unless defined?(Nokogiri)
|
4
|
-
|
4
|
+
require 'pry'
|
5
5
|
attr_accessor :doc
|
6
6
|
|
7
7
|
def initialize
|
8
8
|
load
|
9
9
|
end
|
10
10
|
|
11
|
+
def inspect
|
12
|
+
"#{self.class} - #{self.doc.children.first.name}"
|
13
|
+
end
|
14
|
+
|
11
15
|
# Show top 10 forms, PRA.top(sort_by_symbol, number_of_results)
|
12
16
|
# parser.top(:burden,2) => [<Nokogiri::XML::Element>, <Nokogiri::XML::Element>]
|
13
17
|
def top(limit=10, attrib_name=:burden_hours)
|
@@ -56,9 +60,8 @@ module JustInform
|
|
56
60
|
|
57
61
|
# load XML data into Nokogiri and set @doc instance variable
|
58
62
|
def load
|
59
|
-
puts 'Loading >50 MB of XML data and parsing with Nokogiri, this could take awhile (e.g. 2-3m)'
|
60
63
|
data = Downloader.get_latest
|
61
|
-
@doc = Nokogiri::XML(data)
|
64
|
+
@doc = ::Nokogiri::XML(data)
|
62
65
|
end
|
63
66
|
|
64
67
|
|
data/lib/just_inform/version.rb
CHANGED