coa-op-scraper 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/.rspec +2 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +116 -0
- data/LICENSE.txt +20 -0
- data/README.md +65 -0
- data/Rakefile +31 -0
- data/VERSION +1 -0
- data/coa-op-scraper.gemspec +77 -0
- data/lib/coa_docket_no.rb +81 -0
- data/lib/coa_op_scraper.rb +92 -0
- data/lib/legacy.rb +77 -0
- data/lib/tames.rb +91 -0
- data/spec/scrapers/legacy_scraper_spec.rb +110 -0
- data/spec/scrapers/main_spec.rb +18 -0
- data/spec/scrapers/tames_scraper_spec.rb +108 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/support/vcr.rb +15 -0
- data/spec/vcr/legacy/02-2003-02-20.json +1 -0
- data/spec/vcr/legacy/03-2013-01-10.json +1 -0
- data/spec/vcr/tames/01-2012-01-19.json +1 -0
- data/spec/vcr/tames/12-2003-01-31.json +1 -0
- data/spec/vcr/tames/14-2005-01-20.json +1 -0
- metadata +169 -0
data/.document
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem 'nokogiri'
|
4
|
+
gem 'rails'
|
5
|
+
|
6
|
+
group :development, :test do
|
7
|
+
gem 'rspec-rails'
|
8
|
+
end
|
9
|
+
|
10
|
+
group :test do
|
11
|
+
gem 'timecop'
|
12
|
+
gem 'vcr' #, '~> 2.0.rc'
|
13
|
+
gem 'fakeweb'
|
14
|
+
end
|
15
|
+
|
16
|
+
# Add dependencies to develop your gem here.
|
17
|
+
# Include everything needed to run rake, tests, features, etc.
|
18
|
+
group :development do
|
19
|
+
gem "rdoc" #, "~> 3.12"
|
20
|
+
gem "bundler" #, "~> 1.0.0"
|
21
|
+
gem "jeweler" #, "~> 1.8.4"
|
22
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
actionmailer (3.2.11)
|
5
|
+
actionpack (= 3.2.11)
|
6
|
+
mail (~> 2.4.4)
|
7
|
+
actionpack (3.2.11)
|
8
|
+
activemodel (= 3.2.11)
|
9
|
+
activesupport (= 3.2.11)
|
10
|
+
builder (~> 3.0.0)
|
11
|
+
erubis (~> 2.7.0)
|
12
|
+
journey (~> 1.0.4)
|
13
|
+
rack (~> 1.4.0)
|
14
|
+
rack-cache (~> 1.2)
|
15
|
+
rack-test (~> 0.6.1)
|
16
|
+
sprockets (~> 2.2.1)
|
17
|
+
activemodel (3.2.11)
|
18
|
+
activesupport (= 3.2.11)
|
19
|
+
builder (~> 3.0.0)
|
20
|
+
activerecord (3.2.11)
|
21
|
+
activemodel (= 3.2.11)
|
22
|
+
activesupport (= 3.2.11)
|
23
|
+
arel (~> 3.0.2)
|
24
|
+
tzinfo (~> 0.3.29)
|
25
|
+
activeresource (3.2.11)
|
26
|
+
activemodel (= 3.2.11)
|
27
|
+
activesupport (= 3.2.11)
|
28
|
+
activesupport (3.2.11)
|
29
|
+
i18n (~> 0.6)
|
30
|
+
multi_json (~> 1.0)
|
31
|
+
arel (3.0.2)
|
32
|
+
builder (3.0.4)
|
33
|
+
diff-lcs (1.1.3)
|
34
|
+
erubis (2.7.0)
|
35
|
+
fakeweb (1.3.0)
|
36
|
+
git (1.2.5)
|
37
|
+
hike (1.2.1)
|
38
|
+
i18n (0.6.1)
|
39
|
+
jeweler (1.8.4)
|
40
|
+
bundler (~> 1.0)
|
41
|
+
git (>= 1.2.5)
|
42
|
+
rake
|
43
|
+
rdoc
|
44
|
+
journey (1.0.4)
|
45
|
+
json (1.7.6)
|
46
|
+
mail (2.4.4)
|
47
|
+
i18n (>= 0.4.0)
|
48
|
+
mime-types (~> 1.16)
|
49
|
+
treetop (~> 1.4.8)
|
50
|
+
mime-types (1.19)
|
51
|
+
multi_json (1.5.0)
|
52
|
+
nokogiri (1.5.6)
|
53
|
+
polyglot (0.3.3)
|
54
|
+
rack (1.4.4)
|
55
|
+
rack-cache (1.2)
|
56
|
+
rack (>= 0.4)
|
57
|
+
rack-ssl (1.3.2)
|
58
|
+
rack
|
59
|
+
rack-test (0.6.2)
|
60
|
+
rack (>= 1.0)
|
61
|
+
rails (3.2.11)
|
62
|
+
actionmailer (= 3.2.11)
|
63
|
+
actionpack (= 3.2.11)
|
64
|
+
activerecord (= 3.2.11)
|
65
|
+
activeresource (= 3.2.11)
|
66
|
+
activesupport (= 3.2.11)
|
67
|
+
bundler (~> 1.0)
|
68
|
+
railties (= 3.2.11)
|
69
|
+
railties (3.2.11)
|
70
|
+
actionpack (= 3.2.11)
|
71
|
+
activesupport (= 3.2.11)
|
72
|
+
rack-ssl (~> 1.3.2)
|
73
|
+
rake (>= 0.8.7)
|
74
|
+
rdoc (~> 3.4)
|
75
|
+
thor (>= 0.14.6, < 2.0)
|
76
|
+
rake (10.0.3)
|
77
|
+
rdoc (3.12)
|
78
|
+
json (~> 1.4)
|
79
|
+
rspec-core (2.12.2)
|
80
|
+
rspec-expectations (2.12.1)
|
81
|
+
diff-lcs (~> 1.1.3)
|
82
|
+
rspec-mocks (2.12.1)
|
83
|
+
rspec-rails (2.12.2)
|
84
|
+
actionpack (>= 3.0)
|
85
|
+
activesupport (>= 3.0)
|
86
|
+
railties (>= 3.0)
|
87
|
+
rspec-core (~> 2.12.0)
|
88
|
+
rspec-expectations (~> 2.12.0)
|
89
|
+
rspec-mocks (~> 2.12.0)
|
90
|
+
sprockets (2.2.2)
|
91
|
+
hike (~> 1.2)
|
92
|
+
multi_json (~> 1.0)
|
93
|
+
rack (~> 1.0)
|
94
|
+
tilt (~> 1.1, != 1.3.0)
|
95
|
+
thor (0.16.0)
|
96
|
+
tilt (1.3.3)
|
97
|
+
timecop (0.5.9)
|
98
|
+
treetop (1.4.12)
|
99
|
+
polyglot
|
100
|
+
polyglot (>= 0.3.1)
|
101
|
+
tzinfo (0.3.35)
|
102
|
+
vcr (2.4.0)
|
103
|
+
|
104
|
+
PLATFORMS
|
105
|
+
ruby
|
106
|
+
|
107
|
+
DEPENDENCIES
|
108
|
+
bundler
|
109
|
+
fakeweb
|
110
|
+
jeweler
|
111
|
+
nokogiri
|
112
|
+
rails
|
113
|
+
rdoc
|
114
|
+
rspec-rails
|
115
|
+
timecop
|
116
|
+
vcr
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Don Cruse
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Texas COA Op Scraper - a gem for Texas courts of appeals
|
2
|
+
|
3
|
+
### What's this about?
|
4
|
+
|
5
|
+
This gem understands how to parse the opinion lists released by each of
|
6
|
+
Texas's fourteen intermediate courts of appeals.
|
7
|
+
|
8
|
+
Opinion releases are announced on a separate webpage for each court of
|
9
|
+
appeals. Some courts use a legacy system; others have shifted to the new
|
10
|
+
TAMES system employed by the Texas Supreme Court.
|
11
|
+
|
12
|
+
### Why does this gem exist?
|
13
|
+
|
14
|
+
It was developed as part of the TexApp.org project ([github](http://github.com/texapp)),
|
15
|
+
which aims to ensure that Texas's court of appeals opinions are available in
|
16
|
+
a reliable — and citable — location available to the general public,
|
17
|
+
members of the bar, and the court system itself.
|
18
|
+
|
19
|
+
In Texas, unpublished decisions of intermediate courts of appeals are precedential.
|
20
|
+
Yet litigants do not always have a reliable way to locate or cite this authority.
|
21
|
+
In the past, it was possible to use a well-crafted Google search to locate
|
22
|
+
relevant opinions (a technique discussed in [this 2009 blog post](http://www.scotxblog.com/practice-notes/researching-unpublished-coa-opinions-in-texas/)). But with the courts'
|
23
|
+
new TAMES system, these Google searches no longer work. The TAMES system does provide
|
24
|
+
many of these opinions in an online archive, but its URLs are prohitively long and
|
25
|
+
complex to include in any printed legal brief.
|
26
|
+
|
27
|
+
### How can I use this?
|
28
|
+
|
29
|
+
This gem can be folded into the application of your choice to store
|
30
|
+
information about these opinions or queue up downloads of the opinions
|
31
|
+
themselves. It does not contain code related to data storage or any
|
32
|
+
interface to a file storage service. Those implementation details
|
33
|
+
are up to you.
|
34
|
+
|
35
|
+
The simplest way to use this gem is to specify a particular court of appeals
|
36
|
+
(using its two-digit numberical notation, like "03" for the Third Court) and a
|
37
|
+
particular date on which you want to check for opinions. The gem will then
|
38
|
+
determine the correct URL to use, check that page, and parse what is found to
|
39
|
+
retrieve the metadata for each opinion released on that date. What you get back
|
40
|
+
is a list of that metadata.
|
41
|
+
|
42
|
+
The data for each opinion is a simple hash. The overall
|
43
|
+
set of results is just an array of those hashes, or an empty array if no
|
44
|
+
results were found for that page. Here is an example of the hash for one opinion:
|
45
|
+
|
46
|
+
> { :author_string => "Opinion by Justice Pemberton",
|
47
|
+
:opinion_urls => {"html"=>"/opinions/htmlopinion.asp?OpinionId=20764",
|
48
|
+
"pdf"=>"/opinions/PDFOpinion.asp?OpinionId=20764"},
|
49
|
+
:disposition => "AFFIRMED:",
|
50
|
+
:panel_string => "(Before Chief Justice Jones, Justices Pemberton and Henson)",
|
51
|
+
:release_date => Fri, 20 Jan 2012,
|
52
|
+
:case_style => "Janeen Denise Smith v. The State of Texas",
|
53
|
+
:origin => "Appeal from County Court at Law No. 1 of Caldwell County",
|
54
|
+
:docket_no => "03-10-00725-CR",
|
55
|
+
:docket_page_url => "/opinions/case.asp?FilingID=15750" }
|
56
|
+
|
57
|
+
It's up to you to write code that does something interesting with that metadata —
|
58
|
+
such as storing it or downloading the opinion PDFs themselves (as is being done
|
59
|
+
for TexApp.org).
|
60
|
+
|
61
|
+
## Copyright
|
62
|
+
|
63
|
+
Copyright (c) 2013 Don Cruse. See LICENSE.txt for
|
64
|
+
further details.
|
65
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "coa-op-scraper"
|
18
|
+
gem.homepage = "http://github.com/doncruse/coa-op-scraper"
|
19
|
+
gem.license = "(c)2013 Don Cruse"
|
20
|
+
gem.summary = "A scraper for intermediate Texas appellate opinions"
|
21
|
+
gem.description = "A scraper for intermediate appellate opinions"
|
22
|
+
gem.email = "doncruse@gmail.com"
|
23
|
+
gem.authors = ["Don Cruse"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core/rake_task'
|
29
|
+
RSpec::Core::RakeTask.new
|
30
|
+
task :default => :spec
|
31
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.2
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "coa-op-scraper"
|
8
|
+
s.version = "0.2.2"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Don Cruse"]
|
12
|
+
s.date = "2013-02-09"
|
13
|
+
s.description = "A scraper for intermediate appellate opinions"
|
14
|
+
s.email = "doncruse@gmail.com"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.md"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".rspec",
|
22
|
+
"Gemfile",
|
23
|
+
"Gemfile.lock",
|
24
|
+
"LICENSE.txt",
|
25
|
+
"README.md",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION",
|
28
|
+
"coa-op-scraper.gemspec",
|
29
|
+
"lib/coa_docket_no.rb",
|
30
|
+
"lib/coa_op_scraper.rb",
|
31
|
+
"lib/legacy.rb",
|
32
|
+
"lib/tames.rb",
|
33
|
+
"spec/scrapers/legacy_scraper_spec.rb",
|
34
|
+
"spec/scrapers/main_spec.rb",
|
35
|
+
"spec/scrapers/tames_scraper_spec.rb",
|
36
|
+
"spec/spec_helper.rb",
|
37
|
+
"spec/support/vcr.rb",
|
38
|
+
"spec/vcr/legacy/02-2003-02-20.json",
|
39
|
+
"spec/vcr/legacy/03-2013-01-10.json",
|
40
|
+
"spec/vcr/tames/01-2012-01-19.json",
|
41
|
+
"spec/vcr/tames/12-2003-01-31.json",
|
42
|
+
"spec/vcr/tames/14-2005-01-20.json"
|
43
|
+
]
|
44
|
+
s.homepage = "http://github.com/doncruse/coa-op-scraper"
|
45
|
+
s.licenses = ["(c)2013 Don Cruse"]
|
46
|
+
s.require_paths = ["lib"]
|
47
|
+
s.rubygems_version = "1.8.23"
|
48
|
+
s.summary = "A scraper for intermediate Texas appellate opinions"
|
49
|
+
|
50
|
+
if s.respond_to? :specification_version then
|
51
|
+
s.specification_version = 3
|
52
|
+
|
53
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
54
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
55
|
+
s.add_runtime_dependency(%q<rails>, [">= 0"])
|
56
|
+
s.add_development_dependency(%q<rspec-rails>, [">= 0"])
|
57
|
+
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
58
|
+
s.add_development_dependency(%q<bundler>, [">= 0"])
|
59
|
+
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
62
|
+
s.add_dependency(%q<rails>, [">= 0"])
|
63
|
+
s.add_dependency(%q<rspec-rails>, [">= 0"])
|
64
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
65
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
66
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
67
|
+
end
|
68
|
+
else
|
69
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
70
|
+
s.add_dependency(%q<rails>, [">= 0"])
|
71
|
+
s.add_dependency(%q<rspec-rails>, [">= 0"])
|
72
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
73
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
74
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module CoaOpScraper
|
2
|
+
class CoaDocketNo
|
3
|
+
attr_accessor :no
|
4
|
+
# Encapsulating the logic of working with COA docket numbers.
|
5
|
+
# Note: Distinct from knowing if a valid docket number was actually used
|
6
|
+
|
7
|
+
def initialize(no)
|
8
|
+
parts = no.split("-")
|
9
|
+
if parts.count == 4 and (parts.last == "CR" or parts.last == "CV")
|
10
|
+
@no = no
|
11
|
+
else
|
12
|
+
@no = nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def valid?
|
17
|
+
!@no.nil?
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
self.fixed_length
|
22
|
+
end
|
23
|
+
|
24
|
+
def fixed_length
|
25
|
+
if self.valid?
|
26
|
+
(coa,year,number,type_suffix) = @no.split("-")
|
27
|
+
[padded(coa,2), padded(year,2), padded(number,5), type_suffix].join('-')
|
28
|
+
else
|
29
|
+
""
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def without_type
|
34
|
+
self.fixed_length.sub("-CR","").sub("-CV","")
|
35
|
+
end
|
36
|
+
|
37
|
+
#####################################
|
38
|
+
# For accessing pieces
|
39
|
+
|
40
|
+
def coa_number
|
41
|
+
self.canonical.split("-")[0]
|
42
|
+
end
|
43
|
+
|
44
|
+
def year_number
|
45
|
+
self.canonical.split("-")[1]
|
46
|
+
end
|
47
|
+
|
48
|
+
def case_number
|
49
|
+
self.canonical.split("-")[2]
|
50
|
+
end
|
51
|
+
|
52
|
+
def civil?
|
53
|
+
self.canonical.split("-")[3] == "CV"
|
54
|
+
end
|
55
|
+
|
56
|
+
def criminal?
|
57
|
+
self.canonical.split("-")[3] == "CR"
|
58
|
+
end
|
59
|
+
|
60
|
+
###################################
|
61
|
+
# Standardizing how used internally
|
62
|
+
|
63
|
+
def for_database_key
|
64
|
+
self.without_type
|
65
|
+
end
|
66
|
+
# because the -CV/-CR suffix is not relevant to uniqueness
|
67
|
+
|
68
|
+
def for_web_urls
|
69
|
+
self.fixed_length
|
70
|
+
end
|
71
|
+
|
72
|
+
def canonical
|
73
|
+
self.fixed_length
|
74
|
+
end
|
75
|
+
|
76
|
+
protected
|
77
|
+
def padded(number,digits)
|
78
|
+
sprintf("%0#{digits}d", number)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# encoding=utf-8
|
2
|
+
module CoaOpScraper
|
3
|
+
require 'legacy'
|
4
|
+
require 'tames'
|
5
|
+
require 'coa_docket_no'
|
6
|
+
|
7
|
+
require 'date'
|
8
|
+
require 'open-uri'
|
9
|
+
require 'active_support/core_ext'
|
10
|
+
|
11
|
+
# The Texas appellate websites are sometimes fragile.
|
12
|
+
# These sleep intervals should give ample time between requests.
|
13
|
+
HISTORICAL_THROTTLE = 10
|
14
|
+
CURRENT_THROTTLE = 5
|
15
|
+
|
16
|
+
@@check_weekends = FALSE
|
17
|
+
|
18
|
+
# A court's placement in one of these two hashes tells you about the webpage format
|
19
|
+
# currently used by that court.
|
20
|
+
TAMES_COAS = [ "01", "03", "04", "05", "06", "09", "11", "12", "14" ]
|
21
|
+
LEGACY_COAS = [ "02", "07", "08", "10", "13" ]
|
22
|
+
|
23
|
+
############################################################
|
24
|
+
# This is the easiest method to use here. Feed it a COA value
|
25
|
+
# (in the form "03", for example) and the date for which you
|
26
|
+
# want the results (in the form of a Ruby date object).
|
27
|
+
#
|
28
|
+
def self.scrape_one_opinion_list(coa,target_date)
|
29
|
+
doc = self.retrieve_list_for_coa_for_date(coa,target_date)
|
30
|
+
if CoaOpScraper::TAMES_COAS[coa]
|
31
|
+
CoaOpScraper::Tames.parse_opinion_list(doc)
|
32
|
+
elsif CoaOpScraper::LEGACY_COAS[coa]
|
33
|
+
CoaOpScraper::Legacy.parse_opinion_list(doc)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
############################################################
|
38
|
+
# These methods would be useful to populate a queue of opinion
|
39
|
+
# lists to check later.
|
40
|
+
#
|
41
|
+
# The #urls_for_historical_range method will, as expected,
|
42
|
+
# compute a list of the URLs that are appropriate (excluding
|
43
|
+
# weekends by default).
|
44
|
+
#
|
45
|
+
# The #parse_coa_opinion_list_at method will take a coa number
|
46
|
+
# and a URL and return back a list of the results.
|
47
|
+
|
48
|
+
def self.urls_for_historical_range(coa, start_date, end_date)
|
49
|
+
result = []
|
50
|
+
(start_date .. end_date).each do |target_date|
|
51
|
+
next unless @@check_weekends or target_date.weekday?
|
52
|
+
result << self.url_for_coa_for_date(coa, target_date)
|
53
|
+
end
|
54
|
+
result
|
55
|
+
end # returns an array of URLs
|
56
|
+
|
57
|
+
def self.parse_coa_opinion_list_at(coa, url)
|
58
|
+
self.scrape_one_opinion_list(coa, url) || []
|
59
|
+
end # takes a URL, returns a list of the opinion data
|
60
|
+
|
61
|
+
protected
|
62
|
+
|
63
|
+
def self.url_for_coa_for_date(coa,date)
|
64
|
+
if CoaOpScraper::TAMES_COAS.include?(coa)
|
65
|
+
CoaOpScraper::Tames.url_for_coa_for_date(coa,date)
|
66
|
+
elsif CoaOpScraper::LEGACY_COAS.include?(coa)
|
67
|
+
CoaOpScraper::Legacy.url_for_coa_for_date(coa,date)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.retrieve_list_for_coa_for_date(coa,date)
|
72
|
+
url = self.url_for_coa_for_date(coa,date)
|
73
|
+
open(url)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# This is required (and helpful) to parse Texas court docket pages
|
78
|
+
class String
|
79
|
+
def nbsp_strip
|
80
|
+
strip.gsub(/\u00a0$/,"").gsub(/^\u00a0/,"").strip
|
81
|
+
end # gets rid of some pesky unicode found on Texas OCA sites
|
82
|
+
|
83
|
+
def strip_both_ends
|
84
|
+
nbsp_strip.nbsp_strip.reverse.nbsp_strip.nbsp_strip.reverse
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class Date
|
89
|
+
def weekday?
|
90
|
+
!self.saturday? and !self.sunday?
|
91
|
+
end
|
92
|
+
end
|