just_inform 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 33226c88491ca873a4a890b6210b238d71d2303b
4
- data.tar.gz: 3f4a35a9f074f03b5174652d339e6e620b581530
3
+ metadata.gz: ec2675e3a079ced6653676bdea664378f0e7bb1e
4
+ data.tar.gz: 3e68625e479cf8959c347d4b1bfe777ea64f2c3e
5
5
  SHA512:
6
- metadata.gz: bf2bf9d4ef845106848dd4d29e42d0bf6cac9fe9f423a4fcb7b33d8188b7b1dfad96d0ec7e8c9d7ab4eb9489756ccd8eadc407a79b041a6ea42631c2a72c161a
7
- data.tar.gz: a6d3abaa960709c01ea484c60e8b74d05b03d7bbe4c8aa10b888577011f1212552e0491debf210b2ddf6189cf4fa1302f0897f58012bd9add3d10a00417099b9
6
+ metadata.gz: 4726db2cd02fcb8f5b0cc698b2cfe05e6f35ad43504828d6393481d4b47fe6088a411190ebdc70be3c1efe8d034e51fdf86f6b2ef2c44074daf4969075743bd7
7
+ data.tar.gz: bd4bfa4acbb2a9271b296c037458adcc58215d6ef7465c16ddf3bba7e6f5265000c8757e3903732be0c85700cd439cda3b08b6c014de28f1dc632225e9ed4bac
data/README.md CHANGED
@@ -4,15 +4,18 @@ Just Inform is a simple library to help parse the daily XML feed from [RegInfo.g
4
4
 
5
5
  ## Basic Usage
6
6
 
7
- #### Install the gem
8
- gem install just_inform
9
-
10
- #### Create a new parser instance
11
- parser = JustInform.new
12
-
13
- #### Return an array with the sort value (cost) and InformationCollectionRequest objects
14
- parser.top(10, :cost) =>
15
- [["33,672,000,000", U.S. Individual Income Tax Return], ["8,190,000,004", Rule 10b-10 Confirmation of Securities Transactions (17 C.F.R. 240.10b-10)], ["2,857,465,000", Exchange Act Form 10-K], ["2,727,479,226", America Invents Act Section 10 Patent Fee Adjustments], ["1,801,830,000", Application for a U.S. Passport], ["1,300,147,200", ASSIST Database], ["1,280,341,567", Form SD], ["910,000,000", Online Application for Nonimmigrant Visa], ["791,160,764", Rules for Patent Maintenance Fees], ["772,798,833", Initial Patent Applications]]
7
+ # Install the gem
8
+ gem install just_inform
9
+
10
+ # Load IRB
11
+ irb
12
+
13
+ # Create a new parser instance (same as JustInform::Parser.new)
14
+ parser = JustInform.new
15
+
16
+ # Return an array with the sort value (cost) and InformationCollectionRequest objects
17
+ parser.top(10, :cost) =>
18
+ [["33,672,000,000", U.S. Individual Income Tax Return], ["8,190,000,004", Rule 10b-10 Confirmation of Securities Transactions (17 C.F.R. 240.10b-10)], ["2,857,465,000", Exchange Act Form 10-K], ["2,727,479,226", America Invents Act Section 10 Patent Fee Adjustments], ["1,801,830,000", Application for a U.S. Passport], ["1,300,147,200", ASSIST Database], ["1,280,341,567", Form SD], ["910,000,000", Online Application for Nonimmigrant Visa], ["791,160,764", Rules for Patent Maintenance Fees], ["772,798,833", Initial Patent Applications]]
16
19
 
17
20
  You can also sort by burden hours (:burden) and number of responses (:responses).
18
21
 
@@ -17,7 +17,6 @@ Gem::Specification.new do |s|
17
17
  "just_inform.gemspec",
18
18
  "LICENSE.md",
19
19
  "README.md",
20
- 'data',
21
20
  "lib/just_inform.rb",
22
21
  "lib/just_inform/downloader.rb",
23
22
  "lib/just_inform/information_collection.rb",
@@ -1,27 +1,6 @@
1
- module JustInform; end
2
-
3
- module Nokogiri
4
- module XML
5
- class Node
6
-
7
- def inspect_attributes
8
- [:name]
9
- end
10
-
11
- end
12
- end
13
- end
14
-
15
- module Nokogiri
16
- module XML
17
-
18
- class Document < Nokogiri::XML::Node
19
-
20
- def inspect_attributes
21
- [:name, :children]
22
- end
23
-
24
- end
1
+ module JustInform
2
+ def self.new
3
+ Parser.new
25
4
  end
26
5
  end
27
6
 
@@ -1,27 +1,48 @@
1
1
  module JustInform
2
2
  class Downloader
3
+ require 'tempfile' unless defined?(Tempfile)
3
4
  require 'fileutils' unless defined?(FileUtils)
4
- require 'open-uri' unless defined?(OpenURI)
5
+ require 'net/http' unless defined?(Net::HTTP)
5
6
 
6
7
  def self.get_latest
7
8
  if self.current_file?
8
- puts "Using downloaded file"
9
- latest = File.open(File.join 'data', self.current_filename)
9
+ puts "Using downloaded file at #{File.join('/tmp', self.current_filename)} and parsing with Nokogiri"
10
+
11
+ latest = File.open(File.join('/tmp', self.current_filename), 'r')
10
12
  else
11
- puts 'Downloading file, this could take a minute...'
12
- latest = File.open(File.join('data',self.current_filename), 'wb') {|file| file << open('http://www.reginfo.gov/public/do/PRAXML?type=inventory').read}
13
- latest = File.open(File.join 'data', self.current_filename)
13
+ thread = download
14
+
15
+ print "%.2f%% Complete\r" % thread[:progress].to_f until thread.join 1
16
+ latest = File.open(File.join('/tmp', self.current_filename), 'r')
17
+ end
18
+ end
19
+
20
+ def self.download
21
+ Thread.new do
22
+ thread = Thread.current
23
+ body = thread[:body] = []
24
+
25
+ file = File.open(File.join('/tmp',self.current_filename), 'w+b')
26
+
27
+ Net::HTTP.new('www.reginfo.gov').request_get('/public/do/PRAXML?type=inventory') do |response|
28
+ length = thread[:length] = response['Content-Length'].to_i
29
+ file_size_in_mb = response['Content-Length'].to_i/1024/1024
30
+ puts "Downloading ~#{file_size_in_mb} MB of XML data and parsing with Nokogiri. This may take a few minutes depending on your processor and internet connection"
31
+
32
+ response.read_body do |fragment|
33
+ file.write(fragment)
34
+ thread[:done] = (thread[:done] || 0) + fragment.length
35
+ thread[:progress] = thread[:done].quo(length) * 100
36
+ end
37
+ file.close
38
+ end
14
39
  end
15
40
  end
16
41
 
17
42
  private
18
43
 
19
44
  def self.current_file?
20
- dir = 'data'
21
- unless File.directory?(dir)
22
- FileUtils.mkdir_p(dir)
23
- end
24
- true if File.exists?(File.join dir, self.current_filename)
45
+ true if File.exists?(File.join('/tmp', self.current_filename))
25
46
  end
26
47
 
27
48
  def self.current_filename
@@ -1,13 +1,17 @@
1
1
  module JustInform
2
2
  class Parser
3
3
  require 'nokogiri' unless defined?(Nokogiri)
4
-
4
+ require 'pry'
5
5
  attr_accessor :doc
6
6
 
7
7
  def initialize
8
8
  load
9
9
  end
10
10
 
11
+ def inspect
12
+ "#{self.class} - #{self.doc.children.first.name}"
13
+ end
14
+
11
15
  # Show top 10 forms, PRA.top(sort_by_symbol, number_of_results)
12
16
  # parser.top(:burden,2) => [<Nokogiri::XML::Element>, <Nokogiri::XML::Element>]
13
17
  def top(limit=10, attrib_name=:burden_hours)
@@ -56,9 +60,8 @@ module JustInform
56
60
 
57
61
  # load XML data into Nokogiri and set @doc instance variable
58
62
  def load
59
- puts 'Loading >50 MB of XML data and parsing with Nokogiri, this could take awhile (e.g. 2-3m)'
60
63
  data = Downloader.get_latest
61
- @doc = Nokogiri::XML(data)
64
+ @doc = ::Nokogiri::XML(data)
62
65
  end
63
66
 
64
67
 
@@ -1,3 +1,3 @@
1
1
  module JustInform
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: just_inform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Grevich