ecfs 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ require "ecfs/error"
9
9
  require "ecfs/too_many_filings_error"
10
10
  require "ecfs/bulk_filings_query"
11
11
  require "ecfs/daily_releases_query"
12
+ require "ecfs/solr_scrape_query"
12
13
 
13
14
  module ECFS
14
15
  end
@@ -0,0 +1,73 @@
1
+ require "pp"
2
+ require "pry"
3
+ require "mechanize"
4
+
5
+ module ECFS
6
+
7
+ class SolrScrapeQuery
8
+ attr_accessor :docket_number
9
+
10
+ def initialize
11
+ end
12
+
13
+ def filings_from_docket_number(docket_number, start=0)
14
+ url = "http://apps.fcc.gov/ecfs/solr/search?sort=dateRcpt&proceeding=#{docket_number}&dir=asc&start=#{start}"
15
+
16
+ agent = Mechanize.new
17
+ page = agent.get(url)
18
+
19
+ total = page.search('div').find {|div| div.text.start_with?("Showing results")}.text.split('of ')[1].to_i
20
+ table = page.search('div.dataTable table').first
21
+
22
+ rows = table.search('tr')
23
+ rows.shift
24
+
25
+ filings = rows.map do |row|
26
+ columns = row.search('td')
27
+
28
+ proceeding = columns[0].text.strip
29
+ name_of_filer = columns[1].text.strip
30
+ date_recieved = columns[2].text.strip
31
+ type_of_filing = columns[3].text.strip
32
+ pages = columns[4].text.strip.to_i
33
+
34
+ id = columns[1].search('a').first.attributes['href'].value.split('?id=')[1]
35
+ url = "http://apps.fcc.gov/ecfs/comment/view?id=#{id}"
36
+
37
+ {
38
+ 'docket_number' => proceeding,
39
+ 'name_of_filer' => name_of_filer,
40
+ 'type_of_filing' => type_of_filing,
41
+ 'url' => url,
42
+ 'date_recieved' => date_recieved,
43
+ 'pages' => pages
44
+ }
45
+ end
46
+
47
+ return filings, total
48
+ end
49
+
50
+ def get(fetch_document_urls=false)
51
+ url = "http://apps.fcc.gov/ecfs/solr/search?sort=dateRcpt&proceeding=#{@docket_number}&dir=asc&start=0"
52
+ filings = []
53
+
54
+ first_page_of_filings, total = filings_from_docket_number(@docket_number, 0)
55
+
56
+ pages = (total.to_f/20.0).ceil.to_i.times.map {|n| n*20} # divide, round up, then map *20
57
+ pages.shift
58
+
59
+ filings.concat first_page_of_filings
60
+
61
+ pages.each do |page|
62
+ filings.concat filings_from_docket_number(@docket_number, page)[0]
63
+ end
64
+
65
+ if fetch_document_urls
66
+ p "pretending to fetch some urls"
67
+ end
68
+
69
+ filings
70
+ end
71
+
72
+ end
73
+ end
@@ -1,3 +1,3 @@
1
1
  module ECFS
2
- VERSION = "0.3.3"
2
+ VERSION = "0.3.4"
3
3
  end
@@ -0,0 +1,18 @@
1
+ require "helper"
2
+ require "pp"
3
+ require "pry"
4
+
5
+ class TestSolrScrape < MiniTest::Unit::TestCase
6
+
7
+ def test_synopsis
8
+ VCR.use_cassette('main_cassette') do
9
+ filings = ECFS::SolrScrapeQuery.new.tap do |q|
10
+ q.docket_number = '12-83'
11
+ end.get
12
+
13
+ assert filings.first.is_a?(Hash)
14
+ assert filings.first.has_key?('docket_number')
15
+ end
16
+ end
17
+
18
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ecfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alan deLevie
@@ -196,6 +196,7 @@ files:
196
196
  - lib/ecfs/proceeding.rb
197
197
  - lib/ecfs/proceedings_query.rb
198
198
  - lib/ecfs/query.rb
199
+ - lib/ecfs/solr_scrape_query.rb
199
200
  - lib/ecfs/spreadsheet_parser.rb
200
201
  - lib/ecfs/too_many_filings_error.rb
201
202
  - lib/ecfs/util.rb
@@ -207,6 +208,7 @@ files:
207
208
  - test/test_large_proceeding.rb
208
209
  - test/test_proceeding.rb
209
210
  - test/test_proceedings_query.rb
211
+ - test/test_solr_scrape.rb
210
212
  homepage: http://github.com/adelevie/ecfs
211
213
  licenses:
212
214
  - MIT
@@ -240,3 +242,4 @@ test_files:
240
242
  - test/test_large_proceeding.rb
241
243
  - test/test_proceeding.rb
242
244
  - test/test_proceedings_query.rb
245
+ - test/test_solr_scrape.rb