just_inform 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 33226c88491ca873a4a890b6210b238d71d2303b
4
- data.tar.gz: 3f4a35a9f074f03b5174652d339e6e620b581530
3
+ metadata.gz: ec2675e3a079ced6653676bdea664378f0e7bb1e
4
+ data.tar.gz: 3e68625e479cf8959c347d4b1bfe777ea64f2c3e
5
5
  SHA512:
6
- metadata.gz: bf2bf9d4ef845106848dd4d29e42d0bf6cac9fe9f423a4fcb7b33d8188b7b1dfad96d0ec7e8c9d7ab4eb9489756ccd8eadc407a79b041a6ea42631c2a72c161a
7
- data.tar.gz: a6d3abaa960709c01ea484c60e8b74d05b03d7bbe4c8aa10b888577011f1212552e0491debf210b2ddf6189cf4fa1302f0897f58012bd9add3d10a00417099b9
6
+ metadata.gz: 4726db2cd02fcb8f5b0cc698b2cfe05e6f35ad43504828d6393481d4b47fe6088a411190ebdc70be3c1efe8d034e51fdf86f6b2ef2c44074daf4969075743bd7
7
+ data.tar.gz: bd4bfa4acbb2a9271b296c037458adcc58215d6ef7465c16ddf3bba7e6f5265000c8757e3903732be0c85700cd439cda3b08b6c014de28f1dc632225e9ed4bac
data/README.md CHANGED
@@ -4,15 +4,18 @@ Just Inform is a simple library to help parse the daily XML feed from [RegInfo.g
4
4
 
5
5
  ## Basic Usage
6
6
 
7
- #### Install the gem
8
- gem install just_inform
9
-
10
- #### Create a new parser instance
11
- parser = JustInform.new
12
-
13
- #### Return an array with the sort value (cost) and InformationCollectionRequest objects
14
- parser.top(10, :cost) =>
15
- [["33,672,000,000", U.S. Individual Income Tax Return], ["8,190,000,004", Rule 10b-10 Confirmation of Securities Transactions (17 C.F.R. 240.10b-10)], ["2,857,465,000", Exchange Act Form 10-K], ["2,727,479,226", America Invents Act Section 10 Patent Fee Adjustments], ["1,801,830,000", Application for a U.S. Passport], ["1,300,147,200", ASSIST Database], ["1,280,341,567", Form SD], ["910,000,000", Online Application for Nonimmigrant Visa], ["791,160,764", Rules for Patent Maintenance Fees], ["772,798,833", Initial Patent Applications]]
7
+ # Install the gem
8
+ gem install just_inform
9
+
10
+ # Load IRB
11
+ irb
12
+
13
+ # Create a new parser instance (same as JustInform::Parser.new)
14
+ parser = JustInform.new
15
+
16
+ # Return an array with the sort value (cost) and InformationCollectionRequest objects
17
+ parser.top(10, :cost) =>
18
+ [["33,672,000,000", U.S. Individual Income Tax Return], ["8,190,000,004", Rule 10b-10 Confirmation of Securities Transactions (17 C.F.R. 240.10b-10)], ["2,857,465,000", Exchange Act Form 10-K], ["2,727,479,226", America Invents Act Section 10 Patent Fee Adjustments], ["1,801,830,000", Application for a U.S. Passport], ["1,300,147,200", ASSIST Database], ["1,280,341,567", Form SD], ["910,000,000", Online Application for Nonimmigrant Visa], ["791,160,764", Rules for Patent Maintenance Fees], ["772,798,833", Initial Patent Applications]]
16
19
 
17
20
  You can also sort by burden hours (:burden) and number of responses (:responses).
18
21
 
@@ -17,7 +17,6 @@ Gem::Specification.new do |s|
17
17
  "just_inform.gemspec",
18
18
  "LICENSE.md",
19
19
  "README.md",
20
- 'data',
21
20
  "lib/just_inform.rb",
22
21
  "lib/just_inform/downloader.rb",
23
22
  "lib/just_inform/information_collection.rb",
@@ -1,27 +1,6 @@
1
- module JustInform; end
2
-
3
- module Nokogiri
4
- module XML
5
- class Node
6
-
7
- def inspect_attributes
8
- [:name]
9
- end
10
-
11
- end
12
- end
13
- end
14
-
15
- module Nokogiri
16
- module XML
17
-
18
- class Document < Nokogiri::XML::Node
19
-
20
- def inspect_attributes
21
- [:name, :children]
22
- end
23
-
24
- end
1
+ module JustInform
2
+ def self.new
3
+ Parser.new
25
4
  end
26
5
  end
27
6
 
@@ -1,27 +1,48 @@
1
1
  module JustInform
2
2
  class Downloader
3
+ require 'tempfile' unless defined?(Tempfile)
3
4
  require 'fileutils' unless defined?(FileUtils)
4
- require 'open-uri' unless defined?(OpenURI)
5
+ require 'net/http' unless defined?(Net::HTTP)
5
6
 
6
7
  def self.get_latest
7
8
  if self.current_file?
8
- puts "Using downloaded file"
9
- latest = File.open(File.join 'data', self.current_filename)
9
+ puts "Using downloaded file at #{File.join('/tmp', self.current_filename)} and parsing with Nokogiri"
10
+
11
+ latest = File.open(File.join('/tmp', self.current_filename), 'r')
10
12
  else
11
- puts 'Downloading file, this could take a minute...'
12
- latest = File.open(File.join('data',self.current_filename), 'wb') {|file| file << open('http://www.reginfo.gov/public/do/PRAXML?type=inventory').read}
13
- latest = File.open(File.join 'data', self.current_filename)
13
+ thread = download
14
+
15
+ print "%.2f%% Complete\r" % thread[:progress].to_f until thread.join 1
16
+ latest = File.open(File.join('/tmp', self.current_filename), 'r')
17
+ end
18
+ end
19
+
20
+ def self.download
21
+ Thread.new do
22
+ thread = Thread.current
23
+ body = thread[:body] = []
24
+
25
+ file = File.open(File.join('/tmp',self.current_filename), 'w+b')
26
+
27
+ Net::HTTP.new('www.reginfo.gov').request_get('/public/do/PRAXML?type=inventory') do |response|
28
+ length = thread[:length] = response['Content-Length'].to_i
29
+ file_size_in_mb = response['Content-Length'].to_i/1024/1024
30
+ puts "Downloading ~#{file_size_in_mb} MB of XML data and parsing with Nokogiri. This may take a few minutes depending on your processor and internet connection"
31
+
32
+ response.read_body do |fragment|
33
+ file.write(fragment)
34
+ thread[:done] = (thread[:done] || 0) + fragment.length
35
+ thread[:progress] = thread[:done].quo(length) * 100
36
+ end
37
+ file.close
38
+ end
14
39
  end
15
40
  end
16
41
 
17
42
  private
18
43
 
19
44
  def self.current_file?
20
- dir = 'data'
21
- unless File.directory?(dir)
22
- FileUtils.mkdir_p(dir)
23
- end
24
- true if File.exists?(File.join dir, self.current_filename)
45
+ true if File.exists?(File.join('/tmp', self.current_filename))
25
46
  end
26
47
 
27
48
  def self.current_filename
@@ -1,13 +1,17 @@
1
1
  module JustInform
2
2
  class Parser
3
3
  require 'nokogiri' unless defined?(Nokogiri)
4
-
4
+ require 'pry'
5
5
  attr_accessor :doc
6
6
 
7
7
  def initialize
8
8
  load
9
9
  end
10
10
 
11
+ def inspect
12
+ "#{self.class} - #{self.doc.children.first.name}"
13
+ end
14
+
11
15
  # Show top 10 forms, PRA.top(sort_by_symbol, number_of_results)
12
16
  # parser.top(:burden,2) => [<Nokogiri::XML::Element>, <Nokogiri::XML::Element>]
13
17
  def top(limit=10, attrib_name=:burden_hours)
@@ -56,9 +60,8 @@ module JustInform
56
60
 
57
61
  # load XML data into Nokogiri and set @doc instance variable
58
62
  def load
59
- puts 'Loading >50 MB of XML data and parsing with Nokogiri, this could take awhile (e.g. 2-3m)'
60
63
  data = Downloader.get_latest
61
- @doc = Nokogiri::XML(data)
64
+ @doc = ::Nokogiri::XML(data)
62
65
  end
63
66
 
64
67
 
@@ -1,3 +1,3 @@
1
1
  module JustInform
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: just_inform
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Grevich