ecfs 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,6 +9,7 @@ require "ecfs/error"
9
9
  require "ecfs/too_many_filings_error"
10
10
  require "ecfs/bulk_filings_query"
11
11
  require "ecfs/daily_releases_query"
12
+ require "ecfs/solr_scrape_query"
12
13
 
13
14
  module ECFS
14
15
  end
@@ -0,0 +1,73 @@
1
+ require "pp"
2
+ require "pry"
3
+ require "mechanize"
4
+
5
+ module ECFS
6
+
7
+ class SolrScrapeQuery
8
+ attr_accessor :docket_number
9
+
10
+ def initialize
11
+ end
12
+
13
+ def filings_from_docket_number(docket_number, start=0)
14
+ url = "http://apps.fcc.gov/ecfs/solr/search?sort=dateRcpt&proceeding=#{docket_number}&dir=asc&start=#{start}"
15
+
16
+ agent = Mechanize.new
17
+ page = agent.get(url)
18
+
19
+ total = page.search('div').find {|div| div.text.start_with?("Showing results")}.text.split('of ')[1].to_i
20
+ table = page.search('div.dataTable table').first
21
+
22
+ rows = table.search('tr')
23
+ rows.shift
24
+
25
+ filings = rows.map do |row|
26
+ columns = row.search('td')
27
+
28
+ proceeding = columns[0].text.strip
29
+ name_of_filer = columns[1].text.strip
30
+ date_recieved = columns[2].text.strip
31
+ type_of_filing = columns[3].text.strip
32
+ pages = columns[4].text.strip.to_i
33
+
34
+ id = columns[1].search('a').first.attributes['href'].value.split('?id=')[1]
35
+ url = "http://apps.fcc.gov/ecfs/comment/view?id=#{id}"
36
+
37
+ {
38
+ 'docket_number' => proceeding,
39
+ 'name_of_filer' => name_of_filer,
40
+ 'type_of_filing' => type_of_filing,
41
+ 'url' => url,
42
+ 'date_recieved' => date_recieved,
43
+ 'pages' => pages
44
+ }
45
+ end
46
+
47
+ return filings, total
48
+ end
49
+
50
+ def get(fetch_document_urls=false)
51
+ url = "http://apps.fcc.gov/ecfs/solr/search?sort=dateRcpt&proceeding=#{@docket_number}&dir=asc&start=0"
52
+ filings = []
53
+
54
+ first_page_of_filings, total = filings_from_docket_number(@docket_number, 0)
55
+
56
+ pages = (total.to_f/20.0).ceil.to_i.times.map {|n| n*20} # divide, round up, then map *20
57
+ pages.shift
58
+
59
+ filings.concat first_page_of_filings
60
+
61
+ pages.each do |page|
62
+ filings.concat filings_from_docket_number(@docket_number, page)[0]
63
+ end
64
+
65
+ if fetch_document_urls
66
+ p "pretending to fetch some urls"
67
+ end
68
+
69
+ filings
70
+ end
71
+
72
+ end
73
+ end
@@ -1,3 +1,3 @@
1
1
  module ECFS
2
- VERSION = "0.3.3"
2
+ VERSION = "0.3.4"
3
3
  end
@@ -0,0 +1,18 @@
1
+ require "helper"
2
+ require "pp"
3
+ require "pry"
4
+
5
+ class TestSolrScrape < MiniTest::Unit::TestCase
6
+
7
+ def test_synopsis
8
+ VCR.use_cassette('main_cassette') do
9
+ filings = ECFS::SolrScrapeQuery.new.tap do |q|
10
+ q.docket_number = '12-83'
11
+ end.get
12
+
13
+ assert filings.first.is_a?(Hash)
14
+ assert filings.first.has_key?('docket_number')
15
+ end
16
+ end
17
+
18
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ecfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alan deLevie
@@ -196,6 +196,7 @@ files:
196
196
  - lib/ecfs/proceeding.rb
197
197
  - lib/ecfs/proceedings_query.rb
198
198
  - lib/ecfs/query.rb
199
+ - lib/ecfs/solr_scrape_query.rb
199
200
  - lib/ecfs/spreadsheet_parser.rb
200
201
  - lib/ecfs/too_many_filings_error.rb
201
202
  - lib/ecfs/util.rb
@@ -207,6 +208,7 @@ files:
207
208
  - test/test_large_proceeding.rb
208
209
  - test/test_proceeding.rb
209
210
  - test/test_proceedings_query.rb
211
+ - test/test_solr_scrape.rb
210
212
  homepage: http://github.com/adelevie/ecfs
211
213
  licenses:
212
214
  - MIT
@@ -240,3 +242,4 @@ test_files:
240
242
  - test/test_large_proceeding.rb
241
243
  - test/test_proceeding.rb
242
244
  - test/test_proceedings_query.rb
245
+ - test/test_solr_scrape.rb