ecfs 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,11 +1 @@
1
- require 'bundler'
2
- Bundler::GemHelper.install_tasks
3
-
4
- require 'rake/testtask'
5
-
6
- Rake::TestTask.new do |t|
7
- t.libs << 'test'
8
- end
9
-
10
- desc "Run tests"
11
- task :default => :test
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "ecfs"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -8,27 +8,30 @@ Gem::Specification.new do |spec|
8
8
  spec.version = ECFS::VERSION
9
9
  spec.authors = ["Alan deLevie"]
10
10
  spec.email = ["adelevie@gmail.com"]
11
- spec.description = %q{ECFS provides a set of utilities for scraping FCC rulemakings}
12
- spec.summary = %q{ECFS helps you obtain comments and other filings from the FCC's Electronic Comment Filing System}
13
- spec.homepage = "http://github.com/adelevie/ecfs"
11
+
12
+ spec.summary = %q{Scraper for the FCC's Electronic Comment Filing System}
13
+ spec.description = %q{Provides Ruby-based access to the FCC's Electronic Comment Filing System}
14
+ spec.homepage = "https://github.com/adelevie/ecfs"
14
15
  spec.license = "MIT"
15
16
 
16
- spec.files = `git ls-files`.split($/)
17
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
20
  spec.require_paths = ["lib"]
20
21
 
21
- spec.add_development_dependency "bundler", "~> 1.3"
22
- spec.add_development_dependency "rake"
23
- spec.add_development_dependency "vcr"
24
- spec.add_development_dependency "webmock", "1.9.0"
25
- spec.add_development_dependency "minitest", "~> 4.7.3"
26
- spec.add_development_dependency "pry-rescue"
27
- spec.add_development_dependency "m", "~> 1.3.1"
28
-
22
+ #if spec.respond_to?(:metadata)
23
+ # spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com' to prevent pushes to rubygems.org, or delete to allow pushes to any server."
24
+ #end
29
25
 
30
- spec.add_dependency "pdf-reader"
26
+ spec.add_dependency "nokogiri"
31
27
  spec.add_dependency "pry"
32
- spec.add_dependency "mechanize"
33
- spec.add_dependency "spreadsheet"
28
+ spec.add_dependency "unirest"
29
+ spec.add_dependency "rubyzip"
30
+ spec.add_dependency "open_uri_redirections"
31
+
32
+ spec.add_development_dependency "webmock"
33
+ spec.add_development_dependency "bundler"#, "~> 1.9"
34
+ spec.add_development_dependency "rake", "~> 10.0"
35
+ spec.add_development_dependency "rspec"
36
+ spec.add_development_dependency "vcr"
34
37
  end
@@ -1,15 +1,211 @@
1
- require "ecfs/version"
2
- require "ecfs/query"
3
- require "ecfs/filings_query"
4
- require "ecfs/filing"
5
- require "ecfs/proceedings_query"
6
- require "ecfs/proceeding"
7
- require "ecfs/document"
8
- require "ecfs/error"
9
- require "ecfs/too_many_filings_error"
10
- require "ecfs/bulk_filings_query"
11
- require "ecfs/daily_releases_query"
12
- require "ecfs/solr_scrape_query"
1
+ require 'ecfs/version'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+ require 'pry'
5
+ require 'net/http'
6
+ require 'uri'
7
+ require 'unirest'
8
+ require 'zip'
9
+ require 'open_uri_redirections'
10
+ require 'fileutils'
13
11
 
14
12
  module ECFS
15
- end
13
+ module Util
14
+ SIGNALS = [
15
+ 'E.g.', 'Accord', 'See', 'See also', 'Cf.',
16
+ 'Compare', 'Contra', 'But see', 'But cf.',
17
+ 'See generally'
18
+ ].map {|s| "#{s} Id."} << 'Id.'
19
+
20
+ def self.get_footnotes(url: nil, id_tree: false)
21
+ # hacky 'temp' file
22
+ rando = (rand * 1000000000000000000).to_i
23
+ FileUtils.mkdir_p "tmp/#{rando}"
24
+ path = "tmp/#{rando}/document.doc.zip"
25
+
26
+ open(path, 'wb', allow_redirections: :all) do |file|
27
+ file << open(url, allow_redirections: :all).read
28
+ `unzip #{path} -d tmp/#{rando}`
29
+ end
30
+
31
+ xml = File.open("tmp/#{rando}/word/footnotes.xml").read
32
+ doc = Nokogiri::XML(xml)
33
+
34
+ footnotes = doc.children[0].children[3..-1]
35
+
36
+ my_footnotes = footnotes.to_ary.map do |fn|
37
+ {
38
+ index: fn.attributes['id'].value.to_i - 1,
39
+ text: fn.text.strip
40
+ }
41
+ end
42
+
43
+ # compute the tree of id. citations
44
+ if id_tree
45
+ my_footnotes.each {|fn| fn[:ids] = []}
46
+ my_footnotes.each {|fn| fn[:id] = false}
47
+ ids = my_footnotes.select {|fn| fn[:text].start_with?(*ECFS::Util::SIGNALS)}
48
+ ids.each {|id| id[:id] = true}
49
+
50
+ my_footnotes.each do |fn|
51
+ if fn[:id] == true
52
+ parent_idx = fn[:index]-1
53
+ my_footnotes.find {|fn| fn[:index] == parent_idx}[:ids] << fn
54
+ end
55
+ end
56
+
57
+ my_footnotes = send_ids_to_parent(my_footnotes)
58
+
59
+ end
60
+
61
+ FileUtils.rm_rf("tmp/#{rando}")
62
+
63
+ my_footnotes
64
+ end
65
+
66
+ private
67
+
68
+ # if a footnote is an id and has ids, we send its ids to its parent
69
+ # these footnotes are reflected as parents, but are actually siblings
70
+ # so we call these ptsbs (parents that should be siblings). <3 software.
71
+ def self.send_ids_to_parent(footnotes)
72
+ ptsbs_array = footnotes.select {|fn| fn[:id] == true && fn[:ids].length > 0}
73
+ if ptsbs_array.empty?
74
+ return footnotes
75
+ else
76
+ ptsbs_array.each do |ptsbs|
77
+ parent_idx = ptsbs[:index]-1
78
+ footnotes.find {|fn| fn[:index] == parent_idx}[:ids].concat(ptsbs[:ids])
79
+ ptsbs[:ids] = []
80
+ end
81
+ self.send_ids_to_parent(footnotes)
82
+ end
83
+ end
84
+ end
85
+
86
+ module EDOCS
87
+ def self.search(docket: nil, da: nil, fcc: nil, report: nil, file: nil, fcc_rcd_vol: nil, fcc_rcd_page: nil)
88
+ uri = URI.parse("https://apps.fcc.gov/edocs_public/Query.do?mode=advanced&rpt=cond")
89
+ params = {
90
+ 'fccNo' => fcc,
91
+ 'daNo' => da,
92
+ 'fileNo' => file,
93
+ 'docket' => docket,
94
+ 'reportNo' => report,
95
+ 'fccRecordVol' => fcc_rcd_vol,
96
+ 'fccRecordPage' => fcc_rcd_page
97
+ }
98
+ params.reject! {|_k,v| v.nil?}
99
+
100
+ url = 'https://apps.fcc.gov/edocs_public/Query.do?mode=advance&rpt=cond'
101
+ response = Unirest.post url, parameters: params
102
+ doc = Nokogiri::HTML(response.raw_body)
103
+
104
+ tables = doc.css('table.tableWithOutBorder').children.css('table.tableWithOutBorder')
105
+ results = tables[2].css('table.tableWithBorder')
106
+
107
+ results.map do |result|
108
+ links = result.search('a').to_a
109
+ links.shift
110
+ links = links.map do |link|
111
+ path = link.attributes["href"].value
112
+
113
+ "https://apps.fcc.gov/edocs_public/#{path}"
114
+ end
115
+
116
+ word = links.select {|link| link.end_with?('.doc', '.docx')}
117
+ pdf = links.select {|link| link.end_with?('.pdf')}
118
+ txt = links.select {|link| link.end_with?('.txt')}
119
+
120
+ rows = result.search('tr')
121
+
122
+ {
123
+ title: rows[0].text.strip,
124
+ released: rows[1].text.strip.split(': ')[1],
125
+ description: rows[2].text.strip.split('Description: ')[1],
126
+ word: word,
127
+ pdf: pdf,
128
+ txt: txt
129
+ }.reject {|_k,v| v.nil?}
130
+ end
131
+ end
132
+ end
133
+
134
+ module Proceedings
135
+ def self.search(docket: nil)
136
+ url = "http://apps.fcc.gov/ecfs/proceeding/view?name=#{docket}"
137
+ response = Unirest.get url
138
+ doc = Nokogiri::HTML(response.raw_body)
139
+ table = doc.search('table.dataTable').first
140
+ rows = table.search('div.wwgrp')
141
+
142
+ proceeding = {}
143
+ rows.each do |row|
144
+ key = row.search('span')[0].text.strip
145
+ key.gsub!(" ", "")
146
+ key.gsub!(":", "")
147
+ key.downcase!
148
+ value = row.search('span')[1].text.strip
149
+ proceeding[key.to_sym] = value
150
+ end
151
+
152
+ proceeding
153
+ end
154
+ end
155
+
156
+ module Filings
157
+ ATTRS = [
158
+ :docket, :filer, :lawfirm, :received,
159
+ :posted, :exparte, :type, :pages
160
+ ]
161
+
162
+ def self.get_document_links(url: url)
163
+ doc = Nokogiri::HTML(open(url))
164
+ xpath = "//*[@id=\"documents.link\"]"
165
+ links = doc.xpath(xpath).search('a')
166
+
167
+ links.map do |link|
168
+ id = link.attributes["href"].value.split('?id=')[1]
169
+
170
+ "http://apps.fcc.gov/ecfs/document/view?id=#{id}"
171
+ end
172
+ end
173
+
174
+ def self.search(docket: nil, size: 1000, start: 0, order: 'asc')
175
+ url = "http://apps.fcc.gov/ecfs/comment_search_solr/doSearch?proceeding=#{docket}&dir=#{order}&start=#{start}&size=#{size}"
176
+ doc = Nokogiri::HTML(open(url))
177
+ xpath = "//*[@id='yui-main']/div/div[4]"
178
+ table = doc.xpath(xpath).children[1]
179
+ rows = table.search('tr')
180
+ rows.shift
181
+
182
+ filings = []
183
+ rows.each do |row|
184
+ row_hash = {}
185
+ cols = row.search('td')
186
+
187
+ cols.each_with_index do |col, i|
188
+ attribute = ECFS::Filings::ATTRS[i]
189
+ row_hash[attribute] = col.text.strip
190
+
191
+ # get the url
192
+ if attribute == :filer
193
+ path = col.search('a').first.attributes["href"].value
194
+ id = path.split('?id=')[1]
195
+ url = "http://apps.fcc.gov/ecfs/comment/view?id=#{id}"
196
+ row_hash[:url] = url
197
+ end
198
+ end
199
+
200
+ # cast dates and int
201
+ row_hash[:received] = DateTime.parse(row_hash[:received]).to_s
202
+ row_hash[:posted] = DateTime.parse(row_hash[:posted]).to_s
203
+ row_hash[:pages] = row_hash[:pages].to_i
204
+
205
+ filings << row_hash
206
+ end
207
+
208
+ filings
209
+ end
210
+ end
211
+ end
@@ -1,3 +1,3 @@
1
1
  module ECFS
2
- VERSION = "0.5.1"
2
+ VERSION = "0.6.0"
3
3
  end
metadata CHANGED
@@ -1,37 +1,23 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ecfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alan deLevie
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2015-01-19 00:00:00.000000000 Z
11
+ date: 2015-04-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.3'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '1.3'
27
- - !ruby/object:Gem::Dependency
28
- name: rake
14
+ name: nokogiri
29
15
  requirement: !ruby/object:Gem::Requirement
30
16
  requirements:
31
17
  - - ">="
32
18
  - !ruby/object:Gem::Version
33
19
  version: '0'
34
- type: :development
20
+ type: :runtime
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
@@ -39,13 +25,13 @@ dependencies:
39
25
  - !ruby/object:Gem::Version
40
26
  version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
- name: vcr
28
+ name: pry
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
31
  - - ">="
46
32
  - !ruby/object:Gem::Version
47
33
  version: '0'
48
- type: :development
34
+ type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
@@ -53,41 +39,41 @@ dependencies:
53
39
  - !ruby/object:Gem::Version
54
40
  version: '0'
55
41
  - !ruby/object:Gem::Dependency
56
- name: webmock
42
+ name: unirest
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - '='
45
+ - - ">="
60
46
  - !ruby/object:Gem::Version
61
- version: 1.9.0
62
- type: :development
47
+ version: '0'
48
+ type: :runtime
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - '='
52
+ - - ">="
67
53
  - !ruby/object:Gem::Version
68
- version: 1.9.0
54
+ version: '0'
69
55
  - !ruby/object:Gem::Dependency
70
- name: minitest
56
+ name: rubyzip
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
- - - "~>"
59
+ - - ">="
74
60
  - !ruby/object:Gem::Version
75
- version: 4.7.3
76
- type: :development
61
+ version: '0'
62
+ type: :runtime
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
- - - "~>"
66
+ - - ">="
81
67
  - !ruby/object:Gem::Version
82
- version: 4.7.3
68
+ version: '0'
83
69
  - !ruby/object:Gem::Dependency
84
- name: pry-rescue
70
+ name: open_uri_redirections
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - ">="
88
74
  - !ruby/object:Gem::Version
89
75
  version: '0'
90
- type: :development
76
+ type: :runtime
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
@@ -95,27 +81,27 @@ dependencies:
95
81
  - !ruby/object:Gem::Version
96
82
  version: '0'
97
83
  - !ruby/object:Gem::Dependency
98
- name: m
84
+ name: webmock
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
- - - "~>"
87
+ - - ">="
102
88
  - !ruby/object:Gem::Version
103
- version: 1.3.1
89
+ version: '0'
104
90
  type: :development
105
91
  prerelease: false
106
92
  version_requirements: !ruby/object:Gem::Requirement
107
93
  requirements:
108
- - - "~>"
94
+ - - ">="
109
95
  - !ruby/object:Gem::Version
110
- version: 1.3.1
96
+ version: '0'
111
97
  - !ruby/object:Gem::Dependency
112
- name: pdf-reader
98
+ name: bundler
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
101
  - - ">="
116
102
  - !ruby/object:Gem::Version
117
103
  version: '0'
118
- type: :runtime
104
+ type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
107
  requirements:
@@ -123,27 +109,27 @@ dependencies:
123
109
  - !ruby/object:Gem::Version
124
110
  version: '0'
125
111
  - !ruby/object:Gem::Dependency
126
- name: pry
112
+ name: rake
127
113
  requirement: !ruby/object:Gem::Requirement
128
114
  requirements:
129
- - - ">="
115
+ - - "~>"
130
116
  - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :runtime
117
+ version: '10.0'
118
+ type: :development
133
119
  prerelease: false
134
120
  version_requirements: !ruby/object:Gem::Requirement
135
121
  requirements:
136
- - - ">="
122
+ - - "~>"
137
123
  - !ruby/object:Gem::Version
138
- version: '0'
124
+ version: '10.0'
139
125
  - !ruby/object:Gem::Dependency
140
- name: mechanize
126
+ name: rspec
141
127
  requirement: !ruby/object:Gem::Requirement
142
128
  requirements:
143
129
  - - ">="
144
130
  - !ruby/object:Gem::Version
145
131
  version: '0'
146
- type: :runtime
132
+ type: :development
147
133
  prerelease: false
148
134
  version_requirements: !ruby/object:Gem::Requirement
149
135
  requirements:
@@ -151,20 +137,20 @@ dependencies:
151
137
  - !ruby/object:Gem::Version
152
138
  version: '0'
153
139
  - !ruby/object:Gem::Dependency
154
- name: spreadsheet
140
+ name: vcr
155
141
  requirement: !ruby/object:Gem::Requirement
156
142
  requirements:
157
143
  - - ">="
158
144
  - !ruby/object:Gem::Version
159
145
  version: '0'
160
- type: :runtime
146
+ type: :development
161
147
  prerelease: false
162
148
  version_requirements: !ruby/object:Gem::Requirement
163
149
  requirements:
164
150
  - - ">="
165
151
  - !ruby/object:Gem::Version
166
152
  version: '0'
167
- description: ECFS provides a set of utilities for scraping FCC rulemakings
153
+ description: Provides Ruby-based access to the FCC's Electronic Comment Filing System
168
154
  email:
169
155
  - adelevie@gmail.com
170
156
  executables: []
@@ -172,11 +158,16 @@ extensions: []
172
158
  extra_rdoc_files: []
173
159
  files:
174
160
  - ".gitignore"
161
+ - ".rspec"
162
+ - ".ruby-version"
175
163
  - ".travis.yml"
164
+ - CODE_OF_CONDUCT.md
176
165
  - Gemfile
177
166
  - LICENSE.txt
178
167
  - README.md
179
168
  - Rakefile
169
+ - bin/console
170
+ - bin/setup
180
171
  - ecfs.gemspec
181
172
  - fixtures/vcr_cassettes/bulk_cassette.yml
182
173
  - fixtures/vcr_cassettes/main_cassette.yml
@@ -202,15 +193,7 @@ files:
202
193
  - lib/ecfs/too_many_filings_error.rb
203
194
  - lib/ecfs/util.rb
204
195
  - lib/ecfs/version.rb
205
- - test/helper.rb
206
- - test/test_daily_releases.rb
207
- - test/test_filing.rb
208
- - test/test_filings_query.rb
209
- - test/test_large_proceeding.rb
210
- - test/test_proceeding.rb
211
- - test/test_proceedings_query.rb
212
- - test/test_solr_scrape.rb
213
- homepage: http://github.com/adelevie/ecfs
196
+ homepage: https://github.com/adelevie/ecfs
214
197
  licenses:
215
198
  - MIT
216
199
  metadata: {}
@@ -233,14 +216,5 @@ rubyforge_project:
233
216
  rubygems_version: 2.2.2
234
217
  signing_key:
235
218
  specification_version: 4
236
- summary: ECFS helps you obtain comments and other filings from the FCC's Electronic
237
- Comment Filing System
238
- test_files:
239
- - test/helper.rb
240
- - test/test_daily_releases.rb
241
- - test/test_filing.rb
242
- - test/test_filings_query.rb
243
- - test/test_large_proceeding.rb
244
- - test/test_proceeding.rb
245
- - test/test_proceedings_query.rb
246
- - test/test_solr_scrape.rb
219
+ summary: Scraper for the FCC's Electronic Comment Filing System
220
+ test_files: []