ecfs 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,14 +54,8 @@ module ECFS
54
54
  end
55
55
 
56
56
  def get
57
- rows = download_spreadsheet.rows
58
- if @typecast_results
59
- return rows.map do |row|
60
- row_to_filing(row)
61
- end
62
- else
63
- return rows
64
- end
57
+ download_spreadsheet!
58
+ @typecast_results ? @rows.map {|row| row_to_filing(row)} : @rows
65
59
  end
66
60
 
67
61
  def row_to_filing(row)
@@ -69,20 +63,18 @@ module ECFS
69
63
  end
70
64
 
71
65
  def mechanize_agent
72
- agent = Mechanize.new
73
- agent.follow_meta_refresh = true
74
- agent.pluggable_parser["application/vnd.ms-excel"] = ECFS::SpreadsheetParser
75
-
76
- agent
66
+ Mechanize.new.tap do |agent|
67
+ agent.follow_meta_refresh = true
68
+ agent.pluggable_parser["application/vnd.ms-excel"] = ECFS::SpreadsheetParser
69
+ end
77
70
  end
78
71
 
79
- def download_spreadsheet
80
- agent = self.mechanize_agent
81
- page = agent.get(self.url)
72
+ def download_spreadsheet!
73
+ agent = mechanize_agent
82
74
  link_text = "\r\n \t \t \tExport to Excel file\r\n \t \t"
83
- link = page.link_with(:text => link_text)
84
-
85
- agent.click(link)
75
+ link = agent.get(url).link_with(:text => link_text)
76
+
77
+ @rows = agent.click(link).rows
86
78
  end
87
79
  end
88
80
  end
@@ -16,7 +16,7 @@ module ECFS
16
16
  def self.find(docket_number)
17
17
  query.tap do |q|
18
18
  q.eq("docket_number", docket_number)
19
- end.get
19
+ end.get.merge!({"docket_number" => docket_number})
20
20
  end
21
21
 
22
22
  def fetch_info!
@@ -51,8 +51,8 @@ module ECFS
51
51
  # if docket_number is given along with other constraints, the other constraints will be ignored.
52
52
  warn "Constraints other than `docket_number` will be ignored." if @constraints.keys.length > 1
53
53
 
54
- return scrape_proceedings_page unless @typecast_results
55
- results = ECFS::Proceeding.new(scrape_proceedings_page)
54
+ return scrape_proceeding_page! unless @typecast_results
55
+ results = ECFS::Proceeding.new(scrape_proceeding_page!)
56
56
  else
57
57
  return scrape_results_page unless @typecast_results
58
58
  results = ECFS::Proceeding::ResultSet.new(scrape_results_page)
@@ -61,85 +61,136 @@ module ECFS
61
61
  results
62
62
  end
63
63
 
64
- def mechanize_page
65
- Mechanize.new.get(self.url)
66
- end
67
64
 
68
65
  private
69
66
 
70
- def scrape_proceedings_page
71
- page = self.mechanize_page
72
-
73
- container = []
74
- page.search("div").select do |d|
75
- d.attributes["class"].nil? == false
76
- end.select do |d|
77
- d.attributes["class"].text == "wwgrp"
78
- end.each do |node|
79
- node.search("span").each do |span|
80
- search = span.search("label")
81
- pair = []
82
- if search.length > 0
83
- key = search.first.children.first.text.lstrip.rstrip.split(":")[0].gsub(" ", "_").downcase
84
- pair << key
85
- else
86
- value = span.text.lstrip.rstrip
87
- value.gsub!(",", "") if value.is_a?(String)
88
- pair << value
89
- end
90
- container << pair
67
+ def mechanize_page
68
+ Mechanize.new.get(url)
69
+ end
70
+
71
+ def scrape_proceeding_page!
72
+ container_to_hash do
73
+ mechanize_page.search("div").select do |div|
74
+ div.attributes["class"].nil? == false
75
+ end.select do |div|
76
+ div.attributes["class"].text == "wwgrp"
77
+ end.map do |node|
78
+ search_node(node)
91
79
  end
92
80
  end
81
+ end
82
+
83
+ def container_to_hash(&block)
93
84
  hash = {}
94
- container.each_slice(2) do |chunk|
95
- hash.merge!({chunk[0][0] => chunk[1][0]})
85
+ block.call.flatten!.each_slice(2) do |chunk|
86
+ hash[chunk[0]] = chunk[1]
96
87
  end
97
88
 
98
- hash["date_created"] = format_date(hash["date_created"])
99
-
100
89
  hash
101
90
  end
102
91
 
103
- def scrape_results_page
104
- page = self.mechanize_page
92
+ def search_node(node)
93
+ node.search("span").map do |span|
94
+ search = span.search("label")
95
+ key_or_value_from_search_and_span(search, span)
96
+ end
97
+ end
98
+
99
+ def key_or_value_from_search_and_span(search, span)
100
+ search.length > 0 ? key_from_search(search) : value_from_span(span)
101
+ end
102
+
103
+ def key_from_search(search)
104
+ format_key_text(search.first.children.first.text)
105
+ end
106
+
107
+ def format_key_text(key_text)
108
+ key_text.lstrip!.rstrip!
109
+ key_text = key_text.split(":")[0]
110
+ key_text.gsub!(" ", "_")
111
+ key_text.downcase!
112
+ end
113
+
114
+ def value_from_span(span)
115
+ value = text_from_span(span)
116
+ value.gsub!(",", "") if value.is_a?(String)
117
+
118
+ value
119
+ end
120
+
121
+ def text_from_span(span)
122
+ span.text.lstrip.rstrip
123
+ end
105
124
 
106
- total_pages = page.link_with(:text => "Last").attributes.first[1].split("pageNumber=")[1].gsub(",","").to_i
107
- banner = page.search("//*[@id='yui-main']/div/div[2]/table/tbody/tr[2]/td/span[1]").text.lstrip.rstrip.split("Modify Search")[0].rstrip.split
108
- first = banner[1].gsub(",","").to_i
109
- last = banner[3].gsub(",","").to_i
110
- total = banner[5].gsub(",","").to_i
111
- table_rows = page.search("//*[@id='yui-main']/div/div[2]/table/tbody/tr[2]/td/table/tbody").children
112
- results = table_rows.map { |row| row_to_proceeding(row) }
125
+ def scrape_results_page
126
+ page = mechanize_page
127
+ banner = extract_banner_from_page(page)
113
128
 
114
129
  {
115
130
  "constraints" => @constraints,
116
- "fcc_url" => self.url,
117
- "current_page" => self.constraints["page_number"].gsub(",","").to_i,
118
- "total_pages" => total_pages,
119
- "first_result" => first,
120
- "last_result" => last,
121
- "total_results" => total,
122
- "results" => results
131
+ "fcc_url" => url,
132
+ "current_page" => current_page,
133
+ "total_pages" => total_pages_from_page(page),
134
+ "first_result" => first_from_banner(banner),
135
+ "last_result" => last_from_banner(banner),
136
+ "total_results" => total_from_banner(banner),
137
+ "results" => proceedings_from_page(page)
123
138
  }
124
139
  end
125
140
 
126
- def row_to_proceeding(row)
127
- hash = row_to_hash(row)
141
+ def current_page
142
+ self.constraints["page_number"].gsub(",","").to_i
143
+ end
128
144
 
129
- ECFS::Proceeding.new(hash)
145
+ def proceedings_from_page(page)
146
+ extract_table_rows_from_page(page).map do |row|
147
+ row_to_proceeding(row)
148
+ end
130
149
  end
131
150
 
132
- def row_to_hash(row)
133
- bureau = bureau_from_row(row)
134
- subject = subject_from_row(row)
135
- docket_number = docket_number_from_row(row)
136
- filings_in_last_30_days = filings_in_last_30_days_from_row(row)
151
+ def extract_table_rows_from_page(page)
152
+ xpath = "//*[@id='yui-main']/div/div[2]/table/tbody/tr[2]/td/table/tbody"
153
+ page.search(xpath).children
154
+ end
155
+
156
+ def first_from_banner(banner)
157
+ extract_from_banner(banner, 1)
158
+ end
159
+
160
+ def last_from_banner(banner)
161
+ extract_from_banner(banner, 3)
162
+ end
163
+
164
+ def total_from_banner(banner)
165
+ extract_from_banner(banner, 5)
166
+ end
167
+
168
+ def extract_banner_from_page(page)
169
+ xpath = "//*[@id='yui-main']/div/div[2]/table/tbody/tr[2]/td/span[1]"
170
+ page.search(xpath).text.tap do |t|
171
+ t.lstrip!
172
+ t.rstrip!
173
+ end.split("Modify Search")[0].rstrip.split
174
+ end
137
175
 
176
+ def extract_from_banner(banner, index)
177
+ banner[index].gsub(",", "").to_i
178
+ end
179
+
180
+ def total_pages_from_page(page)
181
+ page.link_with(:text => "Last").attributes.first[1].split("pageNumber=")[1].gsub(",","").to_i
182
+ end
183
+
184
+ def row_to_proceeding(row)
185
+ ECFS::Proceeding.new(row_to_hash(row))
186
+ end
187
+
188
+ def row_to_hash(row)
138
189
  {
139
- "docket_number" => docket_number,
140
- "bureau" => bureau,
141
- "subject" => subject,
142
- "filings_in_last_30_days" => filings_in_last_30_days
190
+ "docket_number" => docket_number_from_row(row),
191
+ "bureau" => bureau_from_row(row),
192
+ "subject" => subject_from_row(row),
193
+ "filings_in_last_30_days" => filings_in_last_30_days_from_row(row)
143
194
  }
144
195
  end
145
196
 
@@ -158,6 +209,7 @@ module ECFS
158
209
  def filings_in_last_30_days_from_row(row)
159
210
  row.children[6].children.first.text.lstrip.rstrip.to_i
160
211
  end
212
+ #####
161
213
 
162
214
  end
163
215
  end
@@ -11,62 +11,78 @@ module ECFS
11
11
  def initialize(uri = nil, response = nil, body = nil, code = nil)
12
12
  super(uri, response, body, code)
13
13
  @body = body
14
- extract_rows!
15
- format_rows!
14
+ @rows = formatted_rows
16
15
  end
17
16
 
18
17
  private
19
18
 
20
- def extract_rows!
21
- book = Spreadsheet.open(StringIO.new(@body))
22
- sheet1 = book.worksheet 0
23
- @rows = []
19
+ def file
20
+ StringIO.new(@body)
21
+ end
22
+
23
+ def book
24
+ Spreadsheet.open(file)
25
+ end
26
+
27
+ def sheet
28
+ book.worksheet(0)
29
+ end
30
+
31
+ def unformatted_rows
32
+ my_rows = []
24
33
  first = false
25
- sheet1.each do |row|
26
- @rows << row if first
34
+ sheet.each do |row|
35
+ my_rows << row if first
27
36
  first = true
28
37
  end
29
38
 
30
- @rows
39
+ my_rows
31
40
  end
32
41
 
33
- def format_rows!
34
- @rows.map! do |row|
35
- urls = []
36
- indices = (7..row.length-1).to_a
37
- indices.each do |i|
38
- text = row[i].data.split("id=")[1]
39
- urls << "http://apps.fcc.gov/ecfs/document/view?id=#{extract_filing_id(text)}"
40
- end
41
-
42
- {
43
- "name_of_filer" => row[1],
44
- "docket_number" => row[0],
45
- "lawfirm_name" => row[2],
46
- "date_received" => format_date(row[3]),
47
- "date_posted" => format_date(row[4]),
48
- "exparte" => format_exparte(row[5]),
49
- "type_of_filing" => row[6],
50
- "document_urls" => urls
51
- }
42
+ def formatted_rows
43
+ unformatted_rows.map do |row|
44
+ row_to_hash(row)
52
45
  end
53
46
  end
54
47
 
55
- def format_exparte(my_bool)
56
- return true if my_bool == "Y"
57
- return false if my_bool == "N"
58
- return nil
48
+ def row_to_hash(row)
49
+ {
50
+ "name_of_filer" => row[1],
51
+ "docket_number" => row[0],
52
+ "lawfirm_name" => row[2],
53
+ "date_received" => format_iso_date(row[3]),
54
+ "date_posted" => format_iso_date(row[4]),
55
+ "exparte" => bool_from_exparte(row[5]),
56
+ "type_of_filing" => row[6],
57
+ "document_urls" => urls_from_row(row)
58
+ }
59
+ end
60
+
61
+ def urls_from_row(row)
62
+ indices = (7..row.length-1).to_a
63
+
64
+ indices.map do |index|
65
+ extract_url_from_row_and_index(row, index)
66
+ end
67
+ end
68
+
69
+ def extract_url_from_row_and_index(row, index)
70
+ text = row[index].data.split("id=")[1]
71
+
72
+ "http://apps.fcc.gov/ecfs/document/view?id=#{extract_filing_id(text)}"
59
73
  end
60
74
 
61
75
  def extract_filing_id(txt)
62
76
  re1='(\\d+)'
63
77
  re=(re1)
64
78
  m = Regexp.new(re, Regexp::IGNORECASE)
65
- if m.match(txt)
66
- int1 = m.match(txt)[1]
67
- return int1
68
- end
79
+
80
+ m.match(txt)[1]
81
+ end
82
+
83
+ def bool_from_exparte(y_or_n)
84
+ {"Y" => true, "N" => false}[y_or_n]
69
85
  end
70
86
 
71
- end # end class
72
- end # end module
87
+ end
88
+ end
data/lib/ecfs/util.rb CHANGED
@@ -1,10 +1,15 @@
1
1
  module ECFS
2
2
  module Util
3
- def format_date(date)
3
+ def format_iso_date(date)
4
4
  # input format 12/22/1988
5
5
  chunks = date.split("/")
6
6
  new_date = "#{chunks[2]}-#{chunks[0]}-#{chunks[1]}" # "22-12-1988"
7
7
  "#{new_date}T00:00:00.000Z" # dumb hack
8
8
  end
9
+
10
+ def iso_date_to_simple_date(iso_date)
11
+ chunks = iso_date.split("T")[0].split("-")
12
+ "#{chunks[1]}-#{chunks[0]}-#{chunks[2]}"
13
+ end
9
14
  end
10
15
  end
data/lib/ecfs/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module ECFS
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
data/test/test_filing.rb CHANGED
@@ -42,6 +42,10 @@ class TestFiling < Test::Unit::TestCase
42
42
  assert_equal ECFS::Document::Page, page.class
43
43
  assert_equal String, page.text.class
44
44
  assert_equal Fixnum, page.page_number.class
45
+
46
+ #VCR.use_cassette('test_proceedings_query_test_get_proceeding_info') do
47
+ # binding.pry
48
+ #end
45
49
  end
46
50
  end
47
51
  end
@@ -9,6 +9,7 @@ class TestProceeding < Test::Unit::TestCase
9
9
  %w[
10
10
  bureau_name subject date_created status
11
11
  total_filings filings_in_last_30_days
12
+ docket_number
12
13
  ].each do |key|
13
14
  assert proceeding.keys.include?(key)
14
15
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ecfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-24 00:00:00.000000000 Z
12
+ date: 2013-05-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler