ecfs 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,11 +1 @@
1
- require 'bundler'
2
- Bundler::GemHelper.install_tasks
3
-
4
- require 'rake/testtask'
5
-
6
- Rake::TestTask.new do |t|
7
- t.libs << 'test'
8
- end
9
-
10
- desc "Run tests"
11
- task :default => :test
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "ecfs"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -8,27 +8,30 @@ Gem::Specification.new do |spec|
8
8
  spec.version = ECFS::VERSION
9
9
  spec.authors = ["Alan deLevie"]
10
10
  spec.email = ["adelevie@gmail.com"]
11
- spec.description = %q{ECFS provides a set of utilities for scraping FCC rulemakings}
12
- spec.summary = %q{ECFS helps you obtain comments and other filings from the FCC's Electronic Comment Filing System}
13
- spec.homepage = "http://github.com/adelevie/ecfs"
11
+
12
+ spec.summary = %q{Scraper for the FCC's Electronic Comment Filing System}
13
+ spec.description = %q{Provides Ruby-based access to the FCC's Electronic Comment Filing System}
14
+ spec.homepage = "https://github.com/adelevie/ecfs"
14
15
  spec.license = "MIT"
15
16
 
16
- spec.files = `git ls-files`.split($/)
17
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
20
  spec.require_paths = ["lib"]
20
21
 
21
- spec.add_development_dependency "bundler", "~> 1.3"
22
- spec.add_development_dependency "rake"
23
- spec.add_development_dependency "vcr"
24
- spec.add_development_dependency "webmock", "1.9.0"
25
- spec.add_development_dependency "minitest", "~> 4.7.3"
26
- spec.add_development_dependency "pry-rescue"
27
- spec.add_development_dependency "m", "~> 1.3.1"
28
-
22
+ #if spec.respond_to?(:metadata)
23
+ # spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com' to prevent pushes to rubygems.org, or delete to allow pushes to any server."
24
+ #end
29
25
 
30
- spec.add_dependency "pdf-reader"
26
+ spec.add_dependency "nokogiri"
31
27
  spec.add_dependency "pry"
32
- spec.add_dependency "mechanize"
33
- spec.add_dependency "spreadsheet"
28
+ spec.add_dependency "unirest"
29
+ spec.add_dependency "rubyzip"
30
+ spec.add_dependency "open_uri_redirections"
31
+
32
+ spec.add_development_dependency "webmock"
33
+ spec.add_development_dependency "bundler"#, "~> 1.9"
34
+ spec.add_development_dependency "rake", "~> 10.0"
35
+ spec.add_development_dependency "rspec"
36
+ spec.add_development_dependency "vcr"
34
37
  end
@@ -1,15 +1,211 @@
1
- require "ecfs/version"
2
- require "ecfs/query"
3
- require "ecfs/filings_query"
4
- require "ecfs/filing"
5
- require "ecfs/proceedings_query"
6
- require "ecfs/proceeding"
7
- require "ecfs/document"
8
- require "ecfs/error"
9
- require "ecfs/too_many_filings_error"
10
- require "ecfs/bulk_filings_query"
11
- require "ecfs/daily_releases_query"
12
- require "ecfs/solr_scrape_query"
1
+ require 'ecfs/version'
2
+ require 'nokogiri'
3
+ require 'open-uri'
4
+ require 'pry'
5
+ require 'net/http'
6
+ require 'uri'
7
+ require 'unirest'
8
+ require 'zip'
9
+ require 'open_uri_redirections'
10
+ require 'fileutils'
13
11
 
14
12
  module ECFS
15
- end
13
+ module Util
14
+ SIGNALS = [
15
+ 'E.g.', 'Accord', 'See', 'See also', 'Cf.',
16
+ 'Compare', 'Contra', 'But see', 'But cf.',
17
+ 'See generally'
18
+ ].map {|s| "#{s} Id."} << 'Id.'
19
+
20
+ def self.get_footnotes(url: nil, id_tree: false)
21
+ # hacky 'temp' file
22
+ rando = (rand * 1000000000000000000).to_i
23
+ FileUtils.mkdir_p "tmp/#{rando}"
24
+ path = "tmp/#{rando}/document.doc.zip"
25
+
26
+ open(path, 'wb', allow_redirections: :all) do |file|
27
+ file << open(url, allow_redirections: :all).read
28
+ `unzip #{path} -d tmp/#{rando}`
29
+ end
30
+
31
+ xml = File.open("tmp/#{rando}/word/footnotes.xml").read
32
+ doc = Nokogiri::XML(xml)
33
+
34
+ footnotes = doc.children[0].children[3..-1]
35
+
36
+ my_footnotes = footnotes.to_ary.map do |fn|
37
+ {
38
+ index: fn.attributes['id'].value.to_i - 1,
39
+ text: fn.text.strip
40
+ }
41
+ end
42
+
43
+ # compute the tree of id. citations
44
+ if id_tree
45
+ my_footnotes.each {|fn| fn[:ids] = []}
46
+ my_footnotes.each {|fn| fn[:id] = false}
47
+ ids = my_footnotes.select {|fn| fn[:text].start_with?(*ECFS::Util::SIGNALS)}
48
+ ids.each {|id| id[:id] = true}
49
+
50
+ my_footnotes.each do |fn|
51
+ if fn[:id] == true
52
+ parent_idx = fn[:index]-1
53
+ my_footnotes.find {|fn| fn[:index] == parent_idx}[:ids] << fn
54
+ end
55
+ end
56
+
57
+ my_footnotes = send_ids_to_parent(my_footnotes)
58
+
59
+ end
60
+
61
+ FileUtils.rm_rf("tmp/#{rando}")
62
+
63
+ my_footnotes
64
+ end
65
+
66
+ private
67
+
68
+ # if a footnote is an id and has ids, we send its ids to its parent
69
+ # these footnotes are reflected as parents, but are actually siblings
70
+ # so we call these ptsbs (parents that should be siblings). <3 software.
71
+ def self.send_ids_to_parent(footnotes)
72
+ ptsbs_array = footnotes.select {|fn| fn[:id] == true && fn[:ids].length > 0}
73
+ if ptsbs_array.empty?
74
+ return footnotes
75
+ else
76
+ ptsbs_array.each do |ptsbs|
77
+ parent_idx = ptsbs[:index]-1
78
+ footnotes.find {|fn| fn[:index] == parent_idx}[:ids].concat(ptsbs[:ids])
79
+ ptsbs[:ids] = []
80
+ end
81
+ self.send_ids_to_parent(footnotes)
82
+ end
83
+ end
84
+ end
85
+
86
+ module EDOCS
87
+ def self.search(docket: nil, da: nil, fcc: nil, report: nil, file: nil, fcc_rcd_vol: nil, fcc_rcd_page: nil)
88
+ uri = URI.parse("https://apps.fcc.gov/edocs_public/Query.do?mode=advanced&rpt=cond")
89
+ params = {
90
+ 'fccNo' => fcc,
91
+ 'daNo' => da,
92
+ 'fileNo' => file,
93
+ 'docket' => docket,
94
+ 'reportNo' => report,
95
+ 'fccRecordVol' => fcc_rcd_vol,
96
+ 'fccRecordPage' => fcc_rcd_page
97
+ }
98
+ params.reject! {|_k,v| v.nil?}
99
+
100
+ url = 'https://apps.fcc.gov/edocs_public/Query.do?mode=advance&rpt=cond'
101
+ response = Unirest.post url, parameters: params
102
+ doc = Nokogiri::HTML(response.raw_body)
103
+
104
+ tables = doc.css('table.tableWithOutBorder').children.css('table.tableWithOutBorder')
105
+ results = tables[2].css('table.tableWithBorder')
106
+
107
+ results.map do |result|
108
+ links = result.search('a').to_a
109
+ links.shift
110
+ links = links.map do |link|
111
+ path = link.attributes["href"].value
112
+
113
+ "https://apps.fcc.gov/edocs_public/#{path}"
114
+ end
115
+
116
+ word = links.select {|link| link.end_with?('.doc', '.docx')}
117
+ pdf = links.select {|link| link.end_with?('.pdf')}
118
+ txt = links.select {|link| link.end_with?('.txt')}
119
+
120
+ rows = result.search('tr')
121
+
122
+ {
123
+ title: rows[0].text.strip,
124
+ released: rows[1].text.strip.split(': ')[1],
125
+ description: rows[2].text.strip.split('Description: ')[1],
126
+ word: word,
127
+ pdf: pdf,
128
+ txt: txt
129
+ }.reject {|_k,v| v.nil?}
130
+ end
131
+ end
132
+ end
133
+
134
+ module Proceedings
135
+ def self.search(docket: nil)
136
+ url = "http://apps.fcc.gov/ecfs/proceeding/view?name=#{docket}"
137
+ response = Unirest.get url
138
+ doc = Nokogiri::HTML(response.raw_body)
139
+ table = doc.search('table.dataTable').first
140
+ rows = table.search('div.wwgrp')
141
+
142
+ proceeding = {}
143
+ rows.each do |row|
144
+ key = row.search('span')[0].text.strip
145
+ key.gsub!(" ", "")
146
+ key.gsub!(":", "")
147
+ key.downcase!
148
+ value = row.search('span')[1].text.strip
149
+ proceeding[key.to_sym] = value
150
+ end
151
+
152
+ proceeding
153
+ end
154
+ end
155
+
156
+ module Filings
157
+ ATTRS = [
158
+ :docket, :filer, :lawfirm, :received,
159
+ :posted, :exparte, :type, :pages
160
+ ]
161
+
162
+ def self.get_document_links(url: url)
163
+ doc = Nokogiri::HTML(open(url))
164
+ xpath = "//*[@id=\"documents.link\"]"
165
+ links = doc.xpath(xpath).search('a')
166
+
167
+ links.map do |link|
168
+ id = link.attributes["href"].value.split('?id=')[1]
169
+
170
+ "http://apps.fcc.gov/ecfs/document/view?id=#{id}"
171
+ end
172
+ end
173
+
174
+ def self.search(docket: nil, size: 1000, start: 0, order: 'asc')
175
+ url = "http://apps.fcc.gov/ecfs/comment_search_solr/doSearch?proceeding=#{docket}&dir=#{order}&start=#{start}&size=#{size}"
176
+ doc = Nokogiri::HTML(open(url))
177
+ xpath = "//*[@id='yui-main']/div/div[4]"
178
+ table = doc.xpath(xpath).children[1]
179
+ rows = table.search('tr')
180
+ rows.shift
181
+
182
+ filings = []
183
+ rows.each do |row|
184
+ row_hash = {}
185
+ cols = row.search('td')
186
+
187
+ cols.each_with_index do |col, i|
188
+ attribute = ECFS::Filings::ATTRS[i]
189
+ row_hash[attribute] = col.text.strip
190
+
191
+ # get the url
192
+ if attribute == :filer
193
+ path = col.search('a').first.attributes["href"].value
194
+ id = path.split('?id=')[1]
195
+ url = "http://apps.fcc.gov/ecfs/comment/view?id=#{id}"
196
+ row_hash[:url] = url
197
+ end
198
+ end
199
+
200
+ # cast dates and int
201
+ row_hash[:received] = DateTime.parse(row_hash[:received]).to_s
202
+ row_hash[:posted] = DateTime.parse(row_hash[:posted]).to_s
203
+ row_hash[:pages] = row_hash[:pages].to_i
204
+
205
+ filings << row_hash
206
+ end
207
+
208
+ filings
209
+ end
210
+ end
211
+ end
@@ -1,3 +1,3 @@
1
1
  module ECFS
2
- VERSION = "0.5.1"
2
+ VERSION = "0.6.0"
3
3
  end
metadata CHANGED
@@ -1,37 +1,23 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ecfs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alan deLevie
8
8
  autorequire:
9
- bindir: bin
9
+ bindir: exe
10
10
  cert_chain: []
11
- date: 2015-01-19 00:00:00.000000000 Z
11
+ date: 2015-04-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.3'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '1.3'
27
- - !ruby/object:Gem::Dependency
28
- name: rake
14
+ name: nokogiri
29
15
  requirement: !ruby/object:Gem::Requirement
30
16
  requirements:
31
17
  - - ">="
32
18
  - !ruby/object:Gem::Version
33
19
  version: '0'
34
- type: :development
20
+ type: :runtime
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
@@ -39,13 +25,13 @@ dependencies:
39
25
  - !ruby/object:Gem::Version
40
26
  version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
- name: vcr
28
+ name: pry
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
31
  - - ">="
46
32
  - !ruby/object:Gem::Version
47
33
  version: '0'
48
- type: :development
34
+ type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
@@ -53,41 +39,41 @@ dependencies:
53
39
  - !ruby/object:Gem::Version
54
40
  version: '0'
55
41
  - !ruby/object:Gem::Dependency
56
- name: webmock
42
+ name: unirest
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - '='
45
+ - - ">="
60
46
  - !ruby/object:Gem::Version
61
- version: 1.9.0
62
- type: :development
47
+ version: '0'
48
+ type: :runtime
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - '='
52
+ - - ">="
67
53
  - !ruby/object:Gem::Version
68
- version: 1.9.0
54
+ version: '0'
69
55
  - !ruby/object:Gem::Dependency
70
- name: minitest
56
+ name: rubyzip
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
- - - "~>"
59
+ - - ">="
74
60
  - !ruby/object:Gem::Version
75
- version: 4.7.3
76
- type: :development
61
+ version: '0'
62
+ type: :runtime
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
- - - "~>"
66
+ - - ">="
81
67
  - !ruby/object:Gem::Version
82
- version: 4.7.3
68
+ version: '0'
83
69
  - !ruby/object:Gem::Dependency
84
- name: pry-rescue
70
+ name: open_uri_redirections
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - ">="
88
74
  - !ruby/object:Gem::Version
89
75
  version: '0'
90
- type: :development
76
+ type: :runtime
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
@@ -95,27 +81,27 @@ dependencies:
95
81
  - !ruby/object:Gem::Version
96
82
  version: '0'
97
83
  - !ruby/object:Gem::Dependency
98
- name: m
84
+ name: webmock
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
- - - "~>"
87
+ - - ">="
102
88
  - !ruby/object:Gem::Version
103
- version: 1.3.1
89
+ version: '0'
104
90
  type: :development
105
91
  prerelease: false
106
92
  version_requirements: !ruby/object:Gem::Requirement
107
93
  requirements:
108
- - - "~>"
94
+ - - ">="
109
95
  - !ruby/object:Gem::Version
110
- version: 1.3.1
96
+ version: '0'
111
97
  - !ruby/object:Gem::Dependency
112
- name: pdf-reader
98
+ name: bundler
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
101
  - - ">="
116
102
  - !ruby/object:Gem::Version
117
103
  version: '0'
118
- type: :runtime
104
+ type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
107
  requirements:
@@ -123,27 +109,27 @@ dependencies:
123
109
  - !ruby/object:Gem::Version
124
110
  version: '0'
125
111
  - !ruby/object:Gem::Dependency
126
- name: pry
112
+ name: rake
127
113
  requirement: !ruby/object:Gem::Requirement
128
114
  requirements:
129
- - - ">="
115
+ - - "~>"
130
116
  - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :runtime
117
+ version: '10.0'
118
+ type: :development
133
119
  prerelease: false
134
120
  version_requirements: !ruby/object:Gem::Requirement
135
121
  requirements:
136
- - - ">="
122
+ - - "~>"
137
123
  - !ruby/object:Gem::Version
138
- version: '0'
124
+ version: '10.0'
139
125
  - !ruby/object:Gem::Dependency
140
- name: mechanize
126
+ name: rspec
141
127
  requirement: !ruby/object:Gem::Requirement
142
128
  requirements:
143
129
  - - ">="
144
130
  - !ruby/object:Gem::Version
145
131
  version: '0'
146
- type: :runtime
132
+ type: :development
147
133
  prerelease: false
148
134
  version_requirements: !ruby/object:Gem::Requirement
149
135
  requirements:
@@ -151,20 +137,20 @@ dependencies:
151
137
  - !ruby/object:Gem::Version
152
138
  version: '0'
153
139
  - !ruby/object:Gem::Dependency
154
- name: spreadsheet
140
+ name: vcr
155
141
  requirement: !ruby/object:Gem::Requirement
156
142
  requirements:
157
143
  - - ">="
158
144
  - !ruby/object:Gem::Version
159
145
  version: '0'
160
- type: :runtime
146
+ type: :development
161
147
  prerelease: false
162
148
  version_requirements: !ruby/object:Gem::Requirement
163
149
  requirements:
164
150
  - - ">="
165
151
  - !ruby/object:Gem::Version
166
152
  version: '0'
167
- description: ECFS provides a set of utilities for scraping FCC rulemakings
153
+ description: Provides Ruby-based access to the FCC's Electronic Comment Filing System
168
154
  email:
169
155
  - adelevie@gmail.com
170
156
  executables: []
@@ -172,11 +158,16 @@ extensions: []
172
158
  extra_rdoc_files: []
173
159
  files:
174
160
  - ".gitignore"
161
+ - ".rspec"
162
+ - ".ruby-version"
175
163
  - ".travis.yml"
164
+ - CODE_OF_CONDUCT.md
176
165
  - Gemfile
177
166
  - LICENSE.txt
178
167
  - README.md
179
168
  - Rakefile
169
+ - bin/console
170
+ - bin/setup
180
171
  - ecfs.gemspec
181
172
  - fixtures/vcr_cassettes/bulk_cassette.yml
182
173
  - fixtures/vcr_cassettes/main_cassette.yml
@@ -202,15 +193,7 @@ files:
202
193
  - lib/ecfs/too_many_filings_error.rb
203
194
  - lib/ecfs/util.rb
204
195
  - lib/ecfs/version.rb
205
- - test/helper.rb
206
- - test/test_daily_releases.rb
207
- - test/test_filing.rb
208
- - test/test_filings_query.rb
209
- - test/test_large_proceeding.rb
210
- - test/test_proceeding.rb
211
- - test/test_proceedings_query.rb
212
- - test/test_solr_scrape.rb
213
- homepage: http://github.com/adelevie/ecfs
196
+ homepage: https://github.com/adelevie/ecfs
214
197
  licenses:
215
198
  - MIT
216
199
  metadata: {}
@@ -233,14 +216,5 @@ rubyforge_project:
233
216
  rubygems_version: 2.2.2
234
217
  signing_key:
235
218
  specification_version: 4
236
- summary: ECFS helps you obtain comments and other filings from the FCC's Electronic
237
- Comment Filing System
238
- test_files:
239
- - test/helper.rb
240
- - test/test_daily_releases.rb
241
- - test/test_filing.rb
242
- - test/test_filings_query.rb
243
- - test/test_large_proceeding.rb
244
- - test/test_proceeding.rb
245
- - test/test_proceedings_query.rb
246
- - test/test_solr_scrape.rb
219
+ summary: Scraper for the FCC's Electronic Comment Filing System
220
+ test_files: []