ecfs 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -54,14 +54,8 @@ module ECFS
54
54
  end
55
55
 
56
56
  def get
57
- rows = download_spreadsheet.rows
58
- if @typecast_results
59
- return rows.map do |row|
60
- row_to_filing(row)
61
- end
62
- else
63
- return rows
64
- end
57
+ download_spreadsheet!
58
+ @typecast_results ? @rows.map {|row| row_to_filing(row)} : @rows
65
59
  end
66
60
 
67
61
  def row_to_filing(row)
@@ -69,20 +63,18 @@ module ECFS
69
63
  end
70
64
 
71
65
  def mechanize_agent
72
- agent = Mechanize.new
73
- agent.follow_meta_refresh = true
74
- agent.pluggable_parser["application/vnd.ms-excel"] = ECFS::SpreadsheetParser
75
-
76
- agent
66
+ Mechanize.new.tap do |agent|
67
+ agent.follow_meta_refresh = true
68
+ agent.pluggable_parser["application/vnd.ms-excel"] = ECFS::SpreadsheetParser
69
+ end
77
70
  end
78
71
 
79
- def download_spreadsheet
80
- agent = self.mechanize_agent
81
- page = agent.get(self.url)
72
+ def download_spreadsheet!
73
+ agent = mechanize_agent
82
74
  link_text = "\r\n \t \t \tExport to Excel file\r\n \t \t"
83
- link = page.link_with(:text => link_text)
84
-
85
- agent.click(link)
75
+ link = agent.get(url).link_with(:text => link_text)
76
+
77
+ @rows = agent.click(link).rows
86
78
  end
87
79
  end
88
80
  end
@@ -16,7 +16,7 @@ module ECFS
16
16
  def self.find(docket_number)
17
17
  query.tap do |q|
18
18
  q.eq("docket_number", docket_number)
19
- end.get
19
+ end.get.merge!({"docket_number" => docket_number})
20
20
  end
21
21
 
22
22
  def fetch_info!
@@ -51,8 +51,8 @@ module ECFS
51
51
  # if docket_number is given along with other constraints, the other constraints will be ignored.
52
52
  warn "Constraints other than `docket_number` will be ignored." if @constraints.keys.length > 1
53
53
 
54
- return scrape_proceedings_page unless @typecast_results
55
- results = ECFS::Proceeding.new(scrape_proceedings_page)
54
+ return scrape_proceeding_page! unless @typecast_results
55
+ results = ECFS::Proceeding.new(scrape_proceeding_page!)
56
56
  else
57
57
  return scrape_results_page unless @typecast_results
58
58
  results = ECFS::Proceeding::ResultSet.new(scrape_results_page)
@@ -61,85 +61,136 @@ module ECFS
61
61
  results
62
62
  end
63
63
 
64
- def mechanize_page
65
- Mechanize.new.get(self.url)
66
- end
67
64
 
68
65
  private
69
66
 
70
- def scrape_proceedings_page
71
- page = self.mechanize_page
72
-
73
- container = []
74
- page.search("div").select do |d|
75
- d.attributes["class"].nil? == false
76
- end.select do |d|
77
- d.attributes["class"].text == "wwgrp"
78
- end.each do |node|
79
- node.search("span").each do |span|
80
- search = span.search("label")
81
- pair = []
82
- if search.length > 0
83
- key = search.first.children.first.text.lstrip.rstrip.split(":")[0].gsub(" ", "_").downcase
84
- pair << key
85
- else
86
- value = span.text.lstrip.rstrip
87
- value.gsub!(",", "") if value.is_a?(String)
88
- pair << value
89
- end
90
- container << pair
67
+ def mechanize_page
68
+ Mechanize.new.get(url)
69
+ end
70
+
71
+ def scrape_proceeding_page!
72
+ container_to_hash do
73
+ mechanize_page.search("div").select do |div|
74
+ div.attributes["class"].nil? == false
75
+ end.select do |div|
76
+ div.attributes["class"].text == "wwgrp"
77
+ end.map do |node|
78
+ search_node(node)
91
79
  end
92
80
  end
81
+ end
82
+
83
+ def container_to_hash(&block)
93
84
  hash = {}
94
- container.each_slice(2) do |chunk|
95
- hash.merge!({chunk[0][0] => chunk[1][0]})
85
+ block.call.flatten!.each_slice(2) do |chunk|
86
+ hash[chunk[0]] = chunk[1]
96
87
  end
97
88
 
98
- hash["date_created"] = format_date(hash["date_created"])
99
-
100
89
  hash
101
90
  end
102
91
 
103
- def scrape_results_page
104
- page = self.mechanize_page
92
+ def search_node(node)
93
+ node.search("span").map do |span|
94
+ search = span.search("label")
95
+ key_or_value_from_search_and_span(search, span)
96
+ end
97
+ end
98
+
99
+ def key_or_value_from_search_and_span(search, span)
100
+ search.length > 0 ? key_from_search(search) : value_from_span(span)
101
+ end
102
+
103
+ def key_from_search(search)
104
+ format_key_text(search.first.children.first.text)
105
+ end
106
+
107
+ def format_key_text(key_text)
108
+ key_text.lstrip!.rstrip!
109
+ key_text = key_text.split(":")[0]
110
+ key_text.gsub!(" ", "_")
111
+ key_text.downcase!
112
+ end
113
+
114
+ def value_from_span(span)
115
+ value = text_from_span(span)
116
+ value.gsub!(",", "") if value.is_a?(String)
117
+
118
+ value
119
+ end
120
+
121
+ def text_from_span(span)
122
+ span.text.lstrip.rstrip
123
+ end
105
124
 
106
- total_pages = page.link_with(:text => "Last").attributes.first[1].split("pageNumber=")[1].gsub(",","").to_i
107
- banner = page.search("//*[@id='yui-main']/div/div[2]/table/tbody/tr[2]/td/span[1]").text.lstrip.rstrip.split("Modify Search")[0].rstrip.split
108
- first = banner[1].gsub(",","").to_i
109
- last = banner[3].gsub(",","").to_i
110
- total = banner[5].gsub(",","").to_i
111
- table_rows = page.search("//*[@id='yui-main']/div/div[2]/table/tbody/tr[2]/td/table/tbody").children
112
- results = table_rows.map { |row| row_to_proceeding(row) }
125
+ def scrape_results_page
126
+ page = mechanize_page
127
+ banner = extract_banner_from_page(page)
113
128
 
114
129
  {
115
130
  "constraints" => @constraints,
116
- "fcc_url" => self.url,
117
- "current_page" => self.constraints["page_number"].gsub(",","").to_i,
118
- "total_pages" => total_pages,
119
- "first_result" => first,
120
- "last_result" => last,
121
- "total_results" => total,
122
- "results" => results
131
+ "fcc_url" => url,
132
+ "current_page" => current_page,
133
+ "total_pages" => total_pages_from_page(page),
134
+ "first_result" => first_from_banner(banner),
135
+ "last_result" => last_from_banner(banner),
136
+ "total_results" => total_from_banner(banner),
137
+ "results" => proceedings_from_page(page)
123
138
  }
124
139
  end
125
140
 
126
- def row_to_proceeding(row)
127
- hash = row_to_hash(row)
141
+ def current_page
142
+ self.constraints["page_number"].gsub(",","").to_i
143
+ end
128
144
 
129
- ECFS::Proceeding.new(hash)
145
+ def proceedings_from_page(page)
146
+ extract_table_rows_from_page(page).map do |row|
147
+ row_to_proceeding(row)
148
+ end
130
149
  end
131
150
 
132
- def row_to_hash(row)
133
- bureau = bureau_from_row(row)
134
- subject = subject_from_row(row)
135
- docket_number = docket_number_from_row(row)
136
- filings_in_last_30_days = filings_in_last_30_days_from_row(row)
151
+ def extract_table_rows_from_page(page)
152
+ xpath = "//*[@id='yui-main']/div/div[2]/table/tbody/tr[2]/td/table/tbody"
153
+ page.search(xpath).children
154
+ end
155
+
156
+ def first_from_banner(banner)
157
+ extract_from_banner(banner, 1)
158
+ end
159
+
160
+ def last_from_banner(banner)
161
+ extract_from_banner(banner, 3)
162
+ end
163
+
164
+ def total_from_banner(banner)
165
+ extract_from_banner(banner, 5)
166
+ end
167
+
168
+ def extract_banner_from_page(page)
169
+ xpath = "//*[@id='yui-main']/div/div[2]/table/tbody/tr[2]/td/span[1]"
170
+ page.search(xpath).text.tap do |t|
171
+ t.lstrip!
172
+ t.rstrip!
173
+ end.split("Modify Search")[0].rstrip.split
174
+ end
137
175
 
176
+ def extract_from_banner(banner, index)
177
+ banner[index].gsub(",", "").to_i
178
+ end
179
+
180
+ def total_pages_from_page(page)
181
+ page.link_with(:text => "Last").attributes.first[1].split("pageNumber=")[1].gsub(",","").to_i
182
+ end
183
+
184
+ def row_to_proceeding(row)
185
+ ECFS::Proceeding.new(row_to_hash(row))
186
+ end
187
+
188
+ def row_to_hash(row)
138
189
  {
139
- "docket_number" => docket_number,
140
- "bureau" => bureau,
141
- "subject" => subject,
142
- "filings_in_last_30_days" => filings_in_last_30_days
190
+ "docket_number" => docket_number_from_row(row),
191
+ "bureau" => bureau_from_row(row),
192
+ "subject" => subject_from_row(row),
193
+ "filings_in_last_30_days" => filings_in_last_30_days_from_row(row)
143
194
  }
144
195
  end
145
196
 
@@ -158,6 +209,7 @@ module ECFS
158
209
  def filings_in_last_30_days_from_row(row)
159
210
  row.children[6].children.first.text.lstrip.rstrip.to_i
160
211
  end
212
+ #####
161
213
 
162
214
  end
163
215
  end
@@ -11,62 +11,78 @@ module ECFS
11
11
  def initialize(uri = nil, response = nil, body = nil, code = nil)
12
12
  super(uri, response, body, code)
13
13
  @body = body
14
- extract_rows!
15
- format_rows!
14
+ @rows = formatted_rows
16
15
  end
17
16
 
18
17
  private
19
18
 
20
- def extract_rows!
21
- book = Spreadsheet.open(StringIO.new(@body))
22
- sheet1 = book.worksheet 0
23
- @rows = []
19
+ def file
20
+ StringIO.new(@body)
21
+ end
22
+
23
+ def book
24
+ Spreadsheet.open(file)
25
+ end
26
+
27
+ def sheet
28
+ book.worksheet(0)
29
+ end
30
+
31
+ def unformatted_rows
32
+ my_rows = []
24
33
  first = false
25
- sheet1.each do |row|
26
- @rows << row if first
34
+ sheet.each do |row|
35
+ my_rows << row if first
27
36
  first = true
28
37
  end
29
38
 
30
- @rows
39
+ my_rows
31
40
  end
32
41
 
33
- def format_rows!
34
- @rows.map! do |row|
35
- urls = []
36
- indices = (7..row.length-1).to_a
37
- indices.each do |i|
38
- text = row[i].data.split("id=")[1]
39
- urls << "http://apps.fcc.gov/ecfs/document/view?id=#{extract_filing_id(text)}"
40
- end
41
-
42
- {
43
- "name_of_filer" => row[1],
44
- "docket_number" => row[0],
45
- "lawfirm_name" => row[2],
46
- "date_received" => format_date(row[3]),
47
- "date_posted" => format_date(row[4]),
48
- "exparte" => format_exparte(row[5]),
49
- "type_of_filing" => row[6],
50
- "document_urls" => urls
51
- }
42
+ def formatted_rows
43
+ unformatted_rows.map do |row|
44
+ row_to_hash(row)
52
45
  end
53
46
  end
54
47
 
55
- def format_exparte(my_bool)
56
- return true if my_bool == "Y"
57
- return false if my_bool == "N"
58
- return nil
48
+ def row_to_hash(row)
49
+ {
50
+ "name_of_filer" => row[1],
51
+ "docket_number" => row[0],
52
+ "lawfirm_name" => row[2],
53
+ "date_received" => format_iso_date(row[3]),
54
+ "date_posted" => format_iso_date(row[4]),
55
+ "exparte" => bool_from_exparte(row[5]),
56
+ "type_of_filing" => row[6],
57
+ "document_urls" => urls_from_row(row)
58
+ }
59
+ end
60
+
61
+ def urls_from_row(row)
62
+ indices = (7..row.length-1).to_a
63
+
64
+ indices.map do |index|
65
+ extract_url_from_row_and_index(row, index)
66
+ end
67
+ end
68
+
69
+ def extract_url_from_row_and_index(row, index)
70
+ text = row[index].data.split("id=")[1]
71
+
72
+ "http://apps.fcc.gov/ecfs/document/view?id=#{extract_filing_id(text)}"
59
73
  end
60
74
 
61
75
  def extract_filing_id(txt)
62
76
  re1='(\\d+)'
63
77
  re=(re1)
64
78
  m = Regexp.new(re, Regexp::IGNORECASE)
65
- if m.match(txt)
66
- int1 = m.match(txt)[1]
67
- return int1
68
- end
79
+
80
+ m.match(txt)[1]
81
+ end
82
+
83
+ def bool_from_exparte(y_or_n)
84
+ {"Y" => true, "N" => false}[y_or_n]
69
85
  end
70
86
 
71
- end # end class
72
- end # end module
87
+ end
88
+ end
data/lib/ecfs/util.rb CHANGED
@@ -1,10 +1,15 @@
1
1
  module ECFS
2
2
  module Util
3
- def format_date(date)
3
+ def format_iso_date(date)
4
4
  # input format 12/22/1988
5
5
  chunks = date.split("/")
6
6
  new_date = "#{chunks[2]}-#{chunks[0]}-#{chunks[1]}" # "22-12-1988"
7
7
  "#{new_date}T00:00:00.000Z" # dumb hack
8
8
  end
9
+
10
+ def iso_date_to_simple_date(iso_date)
11
+ chunks = iso_date.split("T")[0].split("-")
12
+ "#{chunks[1]}-#{chunks[0]}-#{chunks[2]}"
13
+ end
9
14
  end
10
15
  end
data/lib/ecfs/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module ECFS
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
data/test/test_filing.rb CHANGED
@@ -42,6 +42,10 @@ class TestFiling < Test::Unit::TestCase
42
42
  assert_equal ECFS::Document::Page, page.class
43
43
  assert_equal String, page.text.class
44
44
  assert_equal Fixnum, page.page_number.class
45
+
46
+ #VCR.use_cassette('test_proceedings_query_test_get_proceeding_info') do
47
+ # binding.pry
48
+ #end
45
49
  end
46
50
  end
47
51
  end
@@ -9,6 +9,7 @@ class TestProceeding < Test::Unit::TestCase
9
9
  %w[
10
10
  bureau_name subject date_created status
11
11
  total_filings filings_in_last_30_days
12
+ docket_number
12
13
  ].each do |key|
13
14
  assert proceeding.keys.include?(key)
14
15
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ecfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-24 00:00:00.000000000 Z
12
+ date: 2013-05-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler