coa-op-scraper 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.rspec +2 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +116 -0
- data/LICENSE.txt +20 -0
- data/README.md +65 -0
- data/Rakefile +31 -0
- data/VERSION +1 -0
- data/coa-op-scraper.gemspec +77 -0
- data/lib/coa_docket_no.rb +81 -0
- data/lib/coa_op_scraper.rb +92 -0
- data/lib/legacy.rb +77 -0
- data/lib/tames.rb +91 -0
- data/spec/scrapers/legacy_scraper_spec.rb +110 -0
- data/spec/scrapers/main_spec.rb +18 -0
- data/spec/scrapers/tames_scraper_spec.rb +108 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/support/vcr.rb +15 -0
- data/spec/vcr/legacy/02-2003-02-20.json +1 -0
- data/spec/vcr/legacy/03-2013-01-10.json +1 -0
- data/spec/vcr/tames/01-2012-01-19.json +1 -0
- data/spec/vcr/tames/12-2003-01-31.json +1 -0
- data/spec/vcr/tames/14-2005-01-20.json +1 -0
- metadata +169 -0
data/.document
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
|
3
|
+
gem 'nokogiri'
|
4
|
+
gem 'rails'
|
5
|
+
|
6
|
+
group :development, :test do
|
7
|
+
gem 'rspec-rails'
|
8
|
+
end
|
9
|
+
|
10
|
+
group :test do
|
11
|
+
gem 'timecop'
|
12
|
+
gem 'vcr' #, '~> 2.0.rc'
|
13
|
+
gem 'fakeweb'
|
14
|
+
end
|
15
|
+
|
16
|
+
# Add dependencies to develop your gem here.
|
17
|
+
# Include everything needed to run rake, tests, features, etc.
|
18
|
+
group :development do
|
19
|
+
gem "rdoc" #, "~> 3.12"
|
20
|
+
gem "bundler" #, "~> 1.0.0"
|
21
|
+
gem "jeweler" #, "~> 1.8.4"
|
22
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
actionmailer (3.2.11)
|
5
|
+
actionpack (= 3.2.11)
|
6
|
+
mail (~> 2.4.4)
|
7
|
+
actionpack (3.2.11)
|
8
|
+
activemodel (= 3.2.11)
|
9
|
+
activesupport (= 3.2.11)
|
10
|
+
builder (~> 3.0.0)
|
11
|
+
erubis (~> 2.7.0)
|
12
|
+
journey (~> 1.0.4)
|
13
|
+
rack (~> 1.4.0)
|
14
|
+
rack-cache (~> 1.2)
|
15
|
+
rack-test (~> 0.6.1)
|
16
|
+
sprockets (~> 2.2.1)
|
17
|
+
activemodel (3.2.11)
|
18
|
+
activesupport (= 3.2.11)
|
19
|
+
builder (~> 3.0.0)
|
20
|
+
activerecord (3.2.11)
|
21
|
+
activemodel (= 3.2.11)
|
22
|
+
activesupport (= 3.2.11)
|
23
|
+
arel (~> 3.0.2)
|
24
|
+
tzinfo (~> 0.3.29)
|
25
|
+
activeresource (3.2.11)
|
26
|
+
activemodel (= 3.2.11)
|
27
|
+
activesupport (= 3.2.11)
|
28
|
+
activesupport (3.2.11)
|
29
|
+
i18n (~> 0.6)
|
30
|
+
multi_json (~> 1.0)
|
31
|
+
arel (3.0.2)
|
32
|
+
builder (3.0.4)
|
33
|
+
diff-lcs (1.1.3)
|
34
|
+
erubis (2.7.0)
|
35
|
+
fakeweb (1.3.0)
|
36
|
+
git (1.2.5)
|
37
|
+
hike (1.2.1)
|
38
|
+
i18n (0.6.1)
|
39
|
+
jeweler (1.8.4)
|
40
|
+
bundler (~> 1.0)
|
41
|
+
git (>= 1.2.5)
|
42
|
+
rake
|
43
|
+
rdoc
|
44
|
+
journey (1.0.4)
|
45
|
+
json (1.7.6)
|
46
|
+
mail (2.4.4)
|
47
|
+
i18n (>= 0.4.0)
|
48
|
+
mime-types (~> 1.16)
|
49
|
+
treetop (~> 1.4.8)
|
50
|
+
mime-types (1.19)
|
51
|
+
multi_json (1.5.0)
|
52
|
+
nokogiri (1.5.6)
|
53
|
+
polyglot (0.3.3)
|
54
|
+
rack (1.4.4)
|
55
|
+
rack-cache (1.2)
|
56
|
+
rack (>= 0.4)
|
57
|
+
rack-ssl (1.3.2)
|
58
|
+
rack
|
59
|
+
rack-test (0.6.2)
|
60
|
+
rack (>= 1.0)
|
61
|
+
rails (3.2.11)
|
62
|
+
actionmailer (= 3.2.11)
|
63
|
+
actionpack (= 3.2.11)
|
64
|
+
activerecord (= 3.2.11)
|
65
|
+
activeresource (= 3.2.11)
|
66
|
+
activesupport (= 3.2.11)
|
67
|
+
bundler (~> 1.0)
|
68
|
+
railties (= 3.2.11)
|
69
|
+
railties (3.2.11)
|
70
|
+
actionpack (= 3.2.11)
|
71
|
+
activesupport (= 3.2.11)
|
72
|
+
rack-ssl (~> 1.3.2)
|
73
|
+
rake (>= 0.8.7)
|
74
|
+
rdoc (~> 3.4)
|
75
|
+
thor (>= 0.14.6, < 2.0)
|
76
|
+
rake (10.0.3)
|
77
|
+
rdoc (3.12)
|
78
|
+
json (~> 1.4)
|
79
|
+
rspec-core (2.12.2)
|
80
|
+
rspec-expectations (2.12.1)
|
81
|
+
diff-lcs (~> 1.1.3)
|
82
|
+
rspec-mocks (2.12.1)
|
83
|
+
rspec-rails (2.12.2)
|
84
|
+
actionpack (>= 3.0)
|
85
|
+
activesupport (>= 3.0)
|
86
|
+
railties (>= 3.0)
|
87
|
+
rspec-core (~> 2.12.0)
|
88
|
+
rspec-expectations (~> 2.12.0)
|
89
|
+
rspec-mocks (~> 2.12.0)
|
90
|
+
sprockets (2.2.2)
|
91
|
+
hike (~> 1.2)
|
92
|
+
multi_json (~> 1.0)
|
93
|
+
rack (~> 1.0)
|
94
|
+
tilt (~> 1.1, != 1.3.0)
|
95
|
+
thor (0.16.0)
|
96
|
+
tilt (1.3.3)
|
97
|
+
timecop (0.5.9)
|
98
|
+
treetop (1.4.12)
|
99
|
+
polyglot
|
100
|
+
polyglot (>= 0.3.1)
|
101
|
+
tzinfo (0.3.35)
|
102
|
+
vcr (2.4.0)
|
103
|
+
|
104
|
+
PLATFORMS
|
105
|
+
ruby
|
106
|
+
|
107
|
+
DEPENDENCIES
|
108
|
+
bundler
|
109
|
+
fakeweb
|
110
|
+
jeweler
|
111
|
+
nokogiri
|
112
|
+
rails
|
113
|
+
rdoc
|
114
|
+
rspec-rails
|
115
|
+
timecop
|
116
|
+
vcr
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2013 Don Cruse
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# Texas COA Op Scraper - a gem for Texas courts of appeals
|
2
|
+
|
3
|
+
### What's this about?
|
4
|
+
|
5
|
+
This gem understands how to parse the opinion lists released by each of
|
6
|
+
Texas's fourteen intermediate courts of appeals.
|
7
|
+
|
8
|
+
Opinion releases are announced on a separate webpage for each court of
|
9
|
+
appeals. Some courts use a legacy system; others have shifted to the new
|
10
|
+
TAMES system employed by the Texas Supreme Court.
|
11
|
+
|
12
|
+
### Why does this gem exist?
|
13
|
+
|
14
|
+
It was developed as part of the TexApp.org project ([github](http://github.com/texapp)),
|
15
|
+
which aims to ensure that Texas's court of appeals opinions are available in
|
16
|
+
a reliable — and citable — location available to the general public,
|
17
|
+
members of the bar, and the court system itself.
|
18
|
+
|
19
|
+
In Texas, unpublished decisions of intermediate courts of appeals are precedential.
|
20
|
+
Yet litigants do not always have a reliable way to locate or cite this authority.
|
21
|
+
In the past, it was possible to use a well-crafted Google search to locate
|
22
|
+
relevant opinions (a technique discussed in [this 2009 blog post](http://www.scotxblog.com/practice-notes/researching-unpublished-coa-opinions-in-texas/)). But with the courts'
|
23
|
+
new TAMES system, these Google searches no longer work. The TAMES system does provide
|
24
|
+
many of these opinions in an online archive, but its URLs are prohitively long and
|
25
|
+
complex to include in any printed legal brief.
|
26
|
+
|
27
|
+
### How can I use this?
|
28
|
+
|
29
|
+
This gem can be folded into the application of your choice to store
|
30
|
+
information about these opinions or queue up downloads of the opinions
|
31
|
+
themselves. It does not contain code related to data storage or any
|
32
|
+
interface to a file storage service. Those implementation details
|
33
|
+
are up to you.
|
34
|
+
|
35
|
+
The simplest way to use this gem is to specify a particular court of appeals
|
36
|
+
(using its two-digit numberical notation, like "03" for the Third Court) and a
|
37
|
+
particular date on which you want to check for opinions. The gem will then
|
38
|
+
determine the correct URL to use, check that page, and parse what is found to
|
39
|
+
retrieve the metadata for each opinion released on that date. What you get back
|
40
|
+
is a list of that metadata.
|
41
|
+
|
42
|
+
The data for each opinion is a simple hash. The overall
|
43
|
+
set of results is just an array of those hashes, or an empty array if no
|
44
|
+
results were found for that page. Here is an example of the hash for one opinion:
|
45
|
+
|
46
|
+
> { :author_string => "Opinion by Justice Pemberton",
|
47
|
+
:opinion_urls => {"html"=>"/opinions/htmlopinion.asp?OpinionId=20764",
|
48
|
+
"pdf"=>"/opinions/PDFOpinion.asp?OpinionId=20764"},
|
49
|
+
:disposition => "AFFIRMED:",
|
50
|
+
:panel_string => "(Before Chief Justice Jones, Justices Pemberton and Henson)",
|
51
|
+
:release_date => Fri, 20 Jan 2012,
|
52
|
+
:case_style => "Janeen Denise Smith v. The State of Texas",
|
53
|
+
:origin => "Appeal from County Court at Law No. 1 of Caldwell County",
|
54
|
+
:docket_no => "03-10-00725-CR",
|
55
|
+
:docket_page_url => "/opinions/case.asp?FilingID=15750" }
|
56
|
+
|
57
|
+
It's up to you to write code that does something interesting with that metadata —
|
58
|
+
such as storing it or downloading the opinion PDFs themselves (as is being done
|
59
|
+
for TexApp.org).
|
60
|
+
|
61
|
+
## Copyright
|
62
|
+
|
63
|
+
Copyright (c) 2013 Don Cruse. See LICENSE.txt for
|
64
|
+
further details.
|
65
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "coa-op-scraper"
|
18
|
+
gem.homepage = "http://github.com/doncruse/coa-op-scraper"
|
19
|
+
gem.license = "(c)2013 Don Cruse"
|
20
|
+
gem.summary = "A scraper for intermediate Texas appellate opinions"
|
21
|
+
gem.description = "A scraper for intermediate appellate opinions"
|
22
|
+
gem.email = "doncruse@gmail.com"
|
23
|
+
gem.authors = ["Don Cruse"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core/rake_task'
|
29
|
+
RSpec::Core::RakeTask.new
|
30
|
+
task :default => :spec
|
31
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.2.2
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "coa-op-scraper"
|
8
|
+
s.version = "0.2.2"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Don Cruse"]
|
12
|
+
s.date = "2013-02-09"
|
13
|
+
s.description = "A scraper for intermediate appellate opinions"
|
14
|
+
s.email = "doncruse@gmail.com"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.md"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".document",
|
21
|
+
".rspec",
|
22
|
+
"Gemfile",
|
23
|
+
"Gemfile.lock",
|
24
|
+
"LICENSE.txt",
|
25
|
+
"README.md",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION",
|
28
|
+
"coa-op-scraper.gemspec",
|
29
|
+
"lib/coa_docket_no.rb",
|
30
|
+
"lib/coa_op_scraper.rb",
|
31
|
+
"lib/legacy.rb",
|
32
|
+
"lib/tames.rb",
|
33
|
+
"spec/scrapers/legacy_scraper_spec.rb",
|
34
|
+
"spec/scrapers/main_spec.rb",
|
35
|
+
"spec/scrapers/tames_scraper_spec.rb",
|
36
|
+
"spec/spec_helper.rb",
|
37
|
+
"spec/support/vcr.rb",
|
38
|
+
"spec/vcr/legacy/02-2003-02-20.json",
|
39
|
+
"spec/vcr/legacy/03-2013-01-10.json",
|
40
|
+
"spec/vcr/tames/01-2012-01-19.json",
|
41
|
+
"spec/vcr/tames/12-2003-01-31.json",
|
42
|
+
"spec/vcr/tames/14-2005-01-20.json"
|
43
|
+
]
|
44
|
+
s.homepage = "http://github.com/doncruse/coa-op-scraper"
|
45
|
+
s.licenses = ["(c)2013 Don Cruse"]
|
46
|
+
s.require_paths = ["lib"]
|
47
|
+
s.rubygems_version = "1.8.23"
|
48
|
+
s.summary = "A scraper for intermediate Texas appellate opinions"
|
49
|
+
|
50
|
+
if s.respond_to? :specification_version then
|
51
|
+
s.specification_version = 3
|
52
|
+
|
53
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
54
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
55
|
+
s.add_runtime_dependency(%q<rails>, [">= 0"])
|
56
|
+
s.add_development_dependency(%q<rspec-rails>, [">= 0"])
|
57
|
+
s.add_development_dependency(%q<rdoc>, [">= 0"])
|
58
|
+
s.add_development_dependency(%q<bundler>, [">= 0"])
|
59
|
+
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
60
|
+
else
|
61
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
62
|
+
s.add_dependency(%q<rails>, [">= 0"])
|
63
|
+
s.add_dependency(%q<rspec-rails>, [">= 0"])
|
64
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
65
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
66
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
67
|
+
end
|
68
|
+
else
|
69
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
70
|
+
s.add_dependency(%q<rails>, [">= 0"])
|
71
|
+
s.add_dependency(%q<rspec-rails>, [">= 0"])
|
72
|
+
s.add_dependency(%q<rdoc>, [">= 0"])
|
73
|
+
s.add_dependency(%q<bundler>, [">= 0"])
|
74
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module CoaOpScraper
|
2
|
+
class CoaDocketNo
|
3
|
+
attr_accessor :no
|
4
|
+
# Encapsulating the logic of working with COA docket numbers.
|
5
|
+
# Note: Distinct from knowing if a valid docket number was actually used
|
6
|
+
|
7
|
+
def initialize(no)
|
8
|
+
parts = no.split("-")
|
9
|
+
if parts.count == 4 and (parts.last == "CR" or parts.last == "CV")
|
10
|
+
@no = no
|
11
|
+
else
|
12
|
+
@no = nil
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def valid?
|
17
|
+
!@no.nil?
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
self.fixed_length
|
22
|
+
end
|
23
|
+
|
24
|
+
def fixed_length
|
25
|
+
if self.valid?
|
26
|
+
(coa,year,number,type_suffix) = @no.split("-")
|
27
|
+
[padded(coa,2), padded(year,2), padded(number,5), type_suffix].join('-')
|
28
|
+
else
|
29
|
+
""
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def without_type
|
34
|
+
self.fixed_length.sub("-CR","").sub("-CV","")
|
35
|
+
end
|
36
|
+
|
37
|
+
#####################################
|
38
|
+
# For accessing pieces
|
39
|
+
|
40
|
+
def coa_number
|
41
|
+
self.canonical.split("-")[0]
|
42
|
+
end
|
43
|
+
|
44
|
+
def year_number
|
45
|
+
self.canonical.split("-")[1]
|
46
|
+
end
|
47
|
+
|
48
|
+
def case_number
|
49
|
+
self.canonical.split("-")[2]
|
50
|
+
end
|
51
|
+
|
52
|
+
def civil?
|
53
|
+
self.canonical.split("-")[3] == "CV"
|
54
|
+
end
|
55
|
+
|
56
|
+
def criminal?
|
57
|
+
self.canonical.split("-")[3] == "CR"
|
58
|
+
end
|
59
|
+
|
60
|
+
###################################
|
61
|
+
# Standardizing how used internally
|
62
|
+
|
63
|
+
def for_database_key
|
64
|
+
self.without_type
|
65
|
+
end
|
66
|
+
# because the -CV/-CR suffix is not relevant to uniqueness
|
67
|
+
|
68
|
+
def for_web_urls
|
69
|
+
self.fixed_length
|
70
|
+
end
|
71
|
+
|
72
|
+
def canonical
|
73
|
+
self.fixed_length
|
74
|
+
end
|
75
|
+
|
76
|
+
protected
|
77
|
+
def padded(number,digits)
|
78
|
+
sprintf("%0#{digits}d", number)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# encoding=utf-8
|
2
|
+
module CoaOpScraper
|
3
|
+
require 'legacy'
|
4
|
+
require 'tames'
|
5
|
+
require 'coa_docket_no'
|
6
|
+
|
7
|
+
require 'date'
|
8
|
+
require 'open-uri'
|
9
|
+
require 'active_support/core_ext'
|
10
|
+
|
11
|
+
# The Texas appellate websites are sometimes fragile.
|
12
|
+
# These sleep intervals should give ample time between requests.
|
13
|
+
HISTORICAL_THROTTLE = 10
|
14
|
+
CURRENT_THROTTLE = 5
|
15
|
+
|
16
|
+
@@check_weekends = FALSE
|
17
|
+
|
18
|
+
# A court's placement in one of these two hashes tells you about the webpage format
|
19
|
+
# currently used by that court.
|
20
|
+
TAMES_COAS = [ "01", "03", "04", "05", "06", "09", "11", "12", "14" ]
|
21
|
+
LEGACY_COAS = [ "02", "07", "08", "10", "13" ]
|
22
|
+
|
23
|
+
############################################################
|
24
|
+
# This is the easiest method to use here. Feed it a COA value
|
25
|
+
# (in the form "03", for example) and the date for which you
|
26
|
+
# want the results (in the form of a Ruby date object).
|
27
|
+
#
|
28
|
+
def self.scrape_one_opinion_list(coa,target_date)
|
29
|
+
doc = self.retrieve_list_for_coa_for_date(coa,target_date)
|
30
|
+
if CoaOpScraper::TAMES_COAS[coa]
|
31
|
+
CoaOpScraper::Tames.parse_opinion_list(doc)
|
32
|
+
elsif CoaOpScraper::LEGACY_COAS[coa]
|
33
|
+
CoaOpScraper::Legacy.parse_opinion_list(doc)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
############################################################
|
38
|
+
# These methods would be useful to populate a queue of opinion
|
39
|
+
# lists to check later.
|
40
|
+
#
|
41
|
+
# The #urls_for_historical_range method will, as expected,
|
42
|
+
# compute a list of the URLs that are appropriate (excluding
|
43
|
+
# weekends by default).
|
44
|
+
#
|
45
|
+
# The #parse_coa_opinion_list_at method will take a coa number
|
46
|
+
# and a URL and return back a list of the results.
|
47
|
+
|
48
|
+
def self.urls_for_historical_range(coa, start_date, end_date)
|
49
|
+
result = []
|
50
|
+
(start_date .. end_date).each do |target_date|
|
51
|
+
next unless @@check_weekends or target_date.weekday?
|
52
|
+
result << self.url_for_coa_for_date(coa, target_date)
|
53
|
+
end
|
54
|
+
result
|
55
|
+
end # returns an array of URLs
|
56
|
+
|
57
|
+
def self.parse_coa_opinion_list_at(coa, url)
|
58
|
+
self.scrape_one_opinion_list(coa, url) || []
|
59
|
+
end # takes a URL, returns a list of the opinion data
|
60
|
+
|
61
|
+
protected
|
62
|
+
|
63
|
+
def self.url_for_coa_for_date(coa,date)
|
64
|
+
if CoaOpScraper::TAMES_COAS.include?(coa)
|
65
|
+
CoaOpScraper::Tames.url_for_coa_for_date(coa,date)
|
66
|
+
elsif CoaOpScraper::LEGACY_COAS.include?(coa)
|
67
|
+
CoaOpScraper::Legacy.url_for_coa_for_date(coa,date)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.retrieve_list_for_coa_for_date(coa,date)
|
72
|
+
url = self.url_for_coa_for_date(coa,date)
|
73
|
+
open(url)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# This is required (and helpful) to parse Texas court docket pages
|
78
|
+
class String
|
79
|
+
def nbsp_strip
|
80
|
+
strip.gsub(/\u00a0$/,"").gsub(/^\u00a0/,"").strip
|
81
|
+
end # gets rid of some pesky unicode found on Texas OCA sites
|
82
|
+
|
83
|
+
def strip_both_ends
|
84
|
+
nbsp_strip.nbsp_strip.reverse.nbsp_strip.nbsp_strip.reverse
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class Date
|
89
|
+
def weekday?
|
90
|
+
!self.saturday? and !self.sunday?
|
91
|
+
end
|
92
|
+
end
|