just_inform 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +12 -9
- data/just_inform.gemspec +0 -1
- data/lib/just_inform.rb +3 -24
- data/lib/just_inform/downloader.rb +32 -11
- data/lib/just_inform/parser.rb +6 -3
- data/lib/just_inform/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ec2675e3a079ced6653676bdea664378f0e7bb1e
|
4
|
+
data.tar.gz: 3e68625e479cf8959c347d4b1bfe777ea64f2c3e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4726db2cd02fcb8f5b0cc698b2cfe05e6f35ad43504828d6393481d4b47fe6088a411190ebdc70be3c1efe8d034e51fdf86f6b2ef2c44074daf4969075743bd7
|
7
|
+
data.tar.gz: bd4bfa4acbb2a9271b296c037458adcc58215d6ef7465c16ddf3bba7e6f5265000c8757e3903732be0c85700cd439cda3b08b6c014de28f1dc632225e9ed4bac
|
data/README.md
CHANGED
@@ -4,15 +4,18 @@ Just Inform is a simple library to help parse the daily XML feed from [RegInfo.g
|
|
4
4
|
|
5
5
|
## Basic Usage
|
6
6
|
|
7
|
-
|
8
|
-
gem install just_inform
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
parser
|
15
|
-
|
7
|
+
# Install the gem
|
8
|
+
gem install just_inform
|
9
|
+
|
10
|
+
# Load IRB
|
11
|
+
irb
|
12
|
+
|
13
|
+
# Create a new parser instance (same as JustInform::Parser.new)
|
14
|
+
parser = JustInform.new
|
15
|
+
|
16
|
+
# Return an array with the sort value (cost) and InformationCollectionRequest objects
|
17
|
+
parser.top(10, :cost) =>
|
18
|
+
[["33,672,000,000", U.S. Individual Income Tax Return], ["8,190,000,004", Rule 10b-10 Confirmation of Securities Transactions (17 C.F.R. 240.10b-10)], ["2,857,465,000", Exchange Act Form 10-K], ["2,727,479,226", America Invents Act Section 10 Patent Fee Adjustments], ["1,801,830,000", Application for a U.S. Passport], ["1,300,147,200", ASSIST Database], ["1,280,341,567", Form SD], ["910,000,000", Online Application for Nonimmigrant Visa], ["791,160,764", Rules for Patent Maintenance Fees], ["772,798,833", Initial Patent Applications]]
|
16
19
|
|
17
20
|
You can also sort by burden hours (:burden) and number of responses (:responses).
|
18
21
|
|
data/just_inform.gemspec
CHANGED
data/lib/just_inform.rb
CHANGED
@@ -1,27 +1,6 @@
|
|
1
|
-
module JustInform
|
2
|
-
|
3
|
-
|
4
|
-
module XML
|
5
|
-
class Node
|
6
|
-
|
7
|
-
def inspect_attributes
|
8
|
-
[:name]
|
9
|
-
end
|
10
|
-
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
module Nokogiri
|
16
|
-
module XML
|
17
|
-
|
18
|
-
class Document < Nokogiri::XML::Node
|
19
|
-
|
20
|
-
def inspect_attributes
|
21
|
-
[:name, :children]
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
1
|
+
module JustInform
|
2
|
+
def self.new
|
3
|
+
Parser.new
|
25
4
|
end
|
26
5
|
end
|
27
6
|
|
@@ -1,27 +1,48 @@
|
|
1
1
|
module JustInform
|
2
2
|
class Downloader
|
3
|
+
require 'tempfile' unless defined?(Tempfile)
|
3
4
|
require 'fileutils' unless defined?(FileUtils)
|
4
|
-
require '
|
5
|
+
require 'net/http' unless defined?(Net::HTTP)
|
5
6
|
|
6
7
|
def self.get_latest
|
7
8
|
if self.current_file?
|
8
|
-
puts "Using downloaded file"
|
9
|
-
|
9
|
+
puts "Using downloaded file at #{File.join('/tmp', self.current_filename)} and parsing with Nokogiri"
|
10
|
+
|
11
|
+
latest = File.open(File.join('/tmp', self.current_filename), 'r')
|
10
12
|
else
|
11
|
-
|
12
|
-
|
13
|
-
|
13
|
+
thread = download
|
14
|
+
|
15
|
+
print "%.2f%% Complete\r" % thread[:progress].to_f until thread.join 1
|
16
|
+
latest = File.open(File.join('/tmp', self.current_filename), 'r')
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.download
|
21
|
+
Thread.new do
|
22
|
+
thread = Thread.current
|
23
|
+
body = thread[:body] = []
|
24
|
+
|
25
|
+
file = File.open(File.join('/tmp',self.current_filename), 'w+b')
|
26
|
+
|
27
|
+
Net::HTTP.new('www.reginfo.gov').request_get('/public/do/PRAXML?type=inventory') do |response|
|
28
|
+
length = thread[:length] = response['Content-Length'].to_i
|
29
|
+
file_size_in_mb = response['Content-Length'].to_i/1024/1024
|
30
|
+
puts "Downloading ~#{file_size_in_mb} MB of XML data and parsing with Nokogiri. This may take a few minutes depending on your processor and internet connection"
|
31
|
+
|
32
|
+
response.read_body do |fragment|
|
33
|
+
file.write(fragment)
|
34
|
+
thread[:done] = (thread[:done] || 0) + fragment.length
|
35
|
+
thread[:progress] = thread[:done].quo(length) * 100
|
36
|
+
end
|
37
|
+
file.close
|
38
|
+
end
|
14
39
|
end
|
15
40
|
end
|
16
41
|
|
17
42
|
private
|
18
43
|
|
19
44
|
def self.current_file?
|
20
|
-
|
21
|
-
unless File.directory?(dir)
|
22
|
-
FileUtils.mkdir_p(dir)
|
23
|
-
end
|
24
|
-
true if File.exists?(File.join dir, self.current_filename)
|
45
|
+
true if File.exists?(File.join('/tmp', self.current_filename))
|
25
46
|
end
|
26
47
|
|
27
48
|
def self.current_filename
|
data/lib/just_inform/parser.rb
CHANGED
@@ -1,13 +1,17 @@
|
|
1
1
|
module JustInform
|
2
2
|
class Parser
|
3
3
|
require 'nokogiri' unless defined?(Nokogiri)
|
4
|
-
|
4
|
+
require 'pry'
|
5
5
|
attr_accessor :doc
|
6
6
|
|
7
7
|
def initialize
|
8
8
|
load
|
9
9
|
end
|
10
10
|
|
11
|
+
def inspect
|
12
|
+
"#{self.class} - #{self.doc.children.first.name}"
|
13
|
+
end
|
14
|
+
|
11
15
|
# Show top 10 forms, PRA.top(sort_by_symbol, number_of_results)
|
12
16
|
# parser.top(:burden,2) => [<Nokogiri::XML::Element>, <Nokogiri::XML::Element>]
|
13
17
|
def top(limit=10, attrib_name=:burden_hours)
|
@@ -56,9 +60,8 @@ module JustInform
|
|
56
60
|
|
57
61
|
# load XML data into Nokogiri and set @doc instance variable
|
58
62
|
def load
|
59
|
-
puts 'Loading >50 MB of XML data and parsing with Nokogiri, this could take awhile (e.g. 2-3m)'
|
60
63
|
data = Downloader.get_latest
|
61
|
-
@doc = Nokogiri::XML(data)
|
64
|
+
@doc = ::Nokogiri::XML(data)
|
62
65
|
end
|
63
66
|
|
64
67
|
|
data/lib/just_inform/version.rb
CHANGED