imdb-parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +27 -0
- data/LICENSE.txt +22 -0
- data/README.md +61 -0
- data/Rakefile +6 -0
- data/imdb-parser.gemspec +23 -0
- data/lib/imdb/parser.rb +52 -0
- data/lib/imdb/parser/actor.rb +21 -0
- data/lib/imdb/parser/role.rb +102 -0
- data/lib/imdb/parser/util.rb +14 -0
- data/lib/imdb/parser/version.rb +5 -0
- data/spec/actors_spec.rb +20 -0
- data/spec/parser_spec.rb +58 -0
- data/spec/role_spec.rb +176 -0
- data/tools/next_edgecase.rb +16 -0
- metadata +108 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 864a142a24c3d4a7f66b4602372f023af7da6222
|
4
|
+
data.tar.gz: b7bdb4c6c32dd28fb354d52b11ea91ff0db85427
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e69636a6a33391e03ff5af875c53964f4e093a3da0c747ffee36d8ab3a14ba4ae0e043d361065880f5ac5fc2ebba388a3b2f4ee7f26220d096c2207715f399fe
|
7
|
+
data.tar.gz: c44ff40e20eff3b4fa58f08ff89f40ccfdd3de1797f681d42946133717d96479b6d72cfe0fddf7c0ac60be1341f75021c314861d5866d306a2280a81fc194e70
|
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.0.0
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
imdb-parser (0.0.1)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.2.5)
|
10
|
+
rake (10.1.0)
|
11
|
+
rspec (2.14.1)
|
12
|
+
rspec-core (~> 2.14.0)
|
13
|
+
rspec-expectations (~> 2.14.0)
|
14
|
+
rspec-mocks (~> 2.14.0)
|
15
|
+
rspec-core (2.14.7)
|
16
|
+
rspec-expectations (2.14.4)
|
17
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
18
|
+
rspec-mocks (2.14.4)
|
19
|
+
|
20
|
+
PLATFORMS
|
21
|
+
ruby
|
22
|
+
|
23
|
+
DEPENDENCIES
|
24
|
+
bundler (~> 1.3)
|
25
|
+
imdb-parser!
|
26
|
+
rake
|
27
|
+
rspec
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Ben Olive
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
IMDB Parser
|
2
|
+
===========
|
3
|
+
|
4
|
+
[](https://travis-ci.org/sionide21/imdb-actors)
|
5
|
+
|
6
|
+
A parser for the IMDB actor and actress files.
|
7
|
+
|
8
|
+
Parses the `actors.list` and `actresses.list` files from IMDB's [alternate interfaces](http://www.imdb.com/interfaces) page.
|
9
|
+
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
gem 'imdb-parser'
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
$ bundle
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
$ gem install imdb-parser
|
24
|
+
|
25
|
+
|
26
|
+
## Usage
|
27
|
+
|
28
|
+
Here is a simple example of how to use the library. Run `rspec` to see a list of the available methods.
|
29
|
+
|
30
|
+
```ruby
|
31
|
+
require 'imdb/parser'
|
32
|
+
|
33
|
+
parser = IMDB::Parser::Parser.new(File.open("actors.list" ,'rb'))
|
34
|
+
parser.each do |actor|
|
35
|
+
puts actor.name
|
36
|
+
puts "=" * actor.name.length
|
37
|
+
actor.roles.each do |role|
|
38
|
+
puts " * #{role.title} (#{role.character})"
|
39
|
+
end
|
40
|
+
puts
|
41
|
+
end
|
42
|
+
```
|
43
|
+
|
44
|
+
## Contributing
|
45
|
+
|
46
|
+
1. Fork it
|
47
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
48
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
49
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
50
|
+
5. Create new Pull Request
|
51
|
+
|
52
|
+
### Fixing broken records
|
53
|
+
|
54
|
+
1. Use the utility "next_edgecase" utility to generate a test case for the broken record
|
55
|
+
|
56
|
+
```sh
|
57
|
+
ruby -I lib tools/next_edgecase.rb /path/to/actors.list
|
58
|
+
```
|
59
|
+
|
60
|
+
2. Add the resulting case to `spec/role_spec.rb` in the `describe "::parse" do` section.
|
61
|
+
3. Fix it
|
data/Rakefile
ADDED
data/imdb-parser.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'imdb/parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "imdb-parser"
|
8
|
+
spec.version = IMDB::Parser::VERSION
|
9
|
+
spec.authors = ["Ben Olive"]
|
10
|
+
spec.email = ["sionide21@gmail.com"]
|
11
|
+
spec.description = %q{Parses the `actors.list` and `actresses.list` files from IMDB's alternate interfaces page.}
|
12
|
+
spec.summary = %q{A parser for the IMDB actor and actress files.}
|
13
|
+
spec.homepage = "https://github.com/sionide21/imdb-actors"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
21
|
+
spec.add_development_dependency "rspec"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
end
|
data/lib/imdb/parser.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require "imdb/parser/version"
|
2
|
+
require 'imdb/parser/util'
|
3
|
+
require 'imdb/parser/role'
|
4
|
+
require 'imdb/parser/actor'
|
5
|
+
|
6
|
+
|
7
|
+
module IMDB
|
8
|
+
module Parser
|
9
|
+
class Parser
|
10
|
+
include Enumerable
|
11
|
+
attr_reader :input
|
12
|
+
|
13
|
+
def initialize(input)
|
14
|
+
if input.respond_to? :gets
|
15
|
+
@input = input
|
16
|
+
else
|
17
|
+
@input = StringIO.new(input.strip)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def each
|
22
|
+
strip_header
|
23
|
+
record = ""
|
24
|
+
input.each do |line|
|
25
|
+
if line.strip.empty?
|
26
|
+
yield Actor.new(record)
|
27
|
+
record = ""
|
28
|
+
elsif line.strip =~ /^-+$/
|
29
|
+
break
|
30
|
+
else
|
31
|
+
record << line
|
32
|
+
end
|
33
|
+
end
|
34
|
+
unless record.strip.empty?
|
35
|
+
yield Actor.new(record)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def actors
|
40
|
+
self.to_a
|
41
|
+
end
|
42
|
+
|
43
|
+
private :input
|
44
|
+
|
45
|
+
def strip_header
|
46
|
+
while input.gets.strip !~ /^Name\s+Titles$/
|
47
|
+
end
|
48
|
+
input.gets
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'imdb/parser/util'
|
2
|
+
|
3
|
+
|
4
|
+
module IMDB
|
5
|
+
module Parser
|
6
|
+
class Actor
|
7
|
+
include TakesStringInput
|
8
|
+
def roles
|
9
|
+
split_input[1].split(/\n/).map{ |m| Role.parse(m) }
|
10
|
+
end
|
11
|
+
def name
|
12
|
+
@name ||= split_input[0]
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
def split_input
|
17
|
+
@split_input ||= input.strip.split("\t", 2)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'imdb/parser/util'
|
2
|
+
|
3
|
+
module IMDB
|
4
|
+
module Parser
|
5
|
+
class Role
|
6
|
+
include TakesStringInput
|
7
|
+
def self.parse(input)
|
8
|
+
input = input.strip
|
9
|
+
if input =~ /^"/
|
10
|
+
TVRole.new(input)
|
11
|
+
else
|
12
|
+
new(input)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(*args)
|
17
|
+
super
|
18
|
+
raise ParseError.new(input) if matches.nil?
|
19
|
+
end
|
20
|
+
def type
|
21
|
+
:movie
|
22
|
+
end
|
23
|
+
|
24
|
+
def title
|
25
|
+
matches[:title]
|
26
|
+
end
|
27
|
+
|
28
|
+
def year
|
29
|
+
matches[:year].to_i if matches[:year]
|
30
|
+
end
|
31
|
+
|
32
|
+
def character
|
33
|
+
matches[:alt_character] or matches[:character]
|
34
|
+
end
|
35
|
+
|
36
|
+
def credit
|
37
|
+
matches[:credit].to_i if matches[:credit]
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def matches
|
43
|
+
@matches ||= self.class.cached_regex.match(input)
|
44
|
+
end
|
45
|
+
def self.cached_regex
|
46
|
+
@regex ||= Regexp.new regex
|
47
|
+
end
|
48
|
+
def self.regex
|
49
|
+
/^(?<title>.+?)\s+
|
50
|
+
#{year_regex} \s*?
|
51
|
+
\)? \s* # One of the records has a random trailing paren
|
52
|
+
(?:\((?:uncredited|TV|V|.+?)\))? \s*?
|
53
|
+
(?<suspended>{{SUSPENDED}})? \s*?
|
54
|
+
(?:\(rumored\))? \s*?
|
55
|
+
#{alt_character_regex} \s*?
|
56
|
+
(?:\[(?<character>.+?)\])? \s*?
|
57
|
+
(?:<(?<credit>\d+)>)?
|
58
|
+
$/x
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.year_regex
|
62
|
+
/\((?:(?<year>\d{4})|[\?]{4})(:?\/[IVX]+?)?\)/
|
63
|
+
end
|
64
|
+
def self.alt_character_regex
|
65
|
+
/(?:\((?:uncredited|as\s(?<alt_character>.+?)|.+?)\))?/
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class TVRole < Role
|
70
|
+
def type
|
71
|
+
:tv
|
72
|
+
end
|
73
|
+
|
74
|
+
def episode_title
|
75
|
+
matches[:episode_title]
|
76
|
+
end
|
77
|
+
|
78
|
+
def season
|
79
|
+
(matches[:season] or matches[:episode_title]).to_i
|
80
|
+
end
|
81
|
+
|
82
|
+
def episode
|
83
|
+
matches[:episode].to_i if matches[:episode]
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
def self.regex
|
88
|
+
/^"(?<title>.+?)"\s+
|
89
|
+
#{year_regex}
|
90
|
+
(:?\s{(:?
|
91
|
+
(?<episode_title>.+?)? \s*? \(\#(?<season>\d+)\.(?<episode>\d+)\) |
|
92
|
+
\((?<episode_title>[\d\-]+)\) |
|
93
|
+
(?<episode_title>.+?)
|
94
|
+
)})? \s*?
|
95
|
+
#{alt_character_regex} \s*?
|
96
|
+
(?:\[(?<character>.+?)\])?\s*?
|
97
|
+
(?:<(?<credit>\d+)>)?
|
98
|
+
$/x
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
data/spec/actors_spec.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'imdb/parser/actor'
|
2
|
+
|
3
|
+
|
4
|
+
describe IMDB::Parser::Actor do
|
5
|
+
let(:actor) {
|
6
|
+
IMDB::Parser::Actor.new %{Trachtenberg, Michelle\tEuroTrip (2004) [Jenny] <6>
|
7
|
+
"Buffy the Vampire Slayer" (1997) {After Life (#6.3)} [Dawn Summers] <4>}
|
8
|
+
}
|
9
|
+
describe '#name' do
|
10
|
+
it "returns the actors name" do
|
11
|
+
expect(actor.name).to eq("Trachtenberg, Michelle")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe '#roles' do
|
16
|
+
it "returns a list of roles the actor has been in" do
|
17
|
+
expect(actor.roles.count).to eq(2)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/spec/parser_spec.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'imdb/parser'
|
2
|
+
|
3
|
+
|
4
|
+
describe IMDB::Parser::Parser, '#parse' do
|
5
|
+
let(:contents) { %{
|
6
|
+
Name\t\t\tTitles
|
7
|
+
----\t\t\t------
|
8
|
+
Trachtenberg, Michelle\tEuroTrip (2004) [Jenny] <6>
|
9
|
+
"Buffy the Vampire Slayer" (1997) {After Life (#6.3)} [Dawn Summers] <4>
|
10
|
+
}}
|
11
|
+
|
12
|
+
it "parses actors" do
|
13
|
+
parser = IMDB::Parser::Parser.new(contents)
|
14
|
+
expect(parser.actors.count).to eq(1)
|
15
|
+
end
|
16
|
+
it "parses roles" do
|
17
|
+
parser = IMDB::Parser::Parser.new(contents)
|
18
|
+
expect(parser.actors.first.roles.count).to eq(2)
|
19
|
+
end
|
20
|
+
describe "knows role type" do
|
21
|
+
let(:roles) { IMDB::Parser::Parser.new(contents).actors.first.roles }
|
22
|
+
it "parses movies" do
|
23
|
+
expect(roles.first.type).to eq(:movie)
|
24
|
+
end
|
25
|
+
it "parses tv shows" do
|
26
|
+
expect(roles.last.type).to eq(:tv)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
context "when passed an IO" do
|
30
|
+
it "parses actors" do
|
31
|
+
parser = IMDB::Parser::Parser.new(StringIO.new(contents.strip))
|
32
|
+
expect(parser.actors.count).to eq(1)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context "when input has header or footer" do
|
37
|
+
let(:contents) { %{
|
38
|
+
blah blah blah
|
39
|
+
Somehting....
|
40
|
+
===============================
|
41
|
+
Name\t\t\tTitles
|
42
|
+
----\t\t\t------
|
43
|
+
Trachtenberg, Michelle\tEuroTrip (2004) [Jenny] <6>
|
44
|
+
"Buffy the Vampire Slayer" (1997) {After Life (#6.3)} [Dawn Summers] <4>
|
45
|
+
|
46
|
+
-----------------------------------------------------------------------------
|
47
|
+
|
48
|
+
Some more stuff
|
49
|
+
===============
|
50
|
+
|
51
|
+
gobbledy gook
|
52
|
+
}}
|
53
|
+
it "strips it" do
|
54
|
+
parser = IMDB::Parser::Parser.new(StringIO.new(contents.strip))
|
55
|
+
expect(parser.actors.count).to eq(1)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/spec/role_spec.rb
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
require 'imdb/parser/role'
|
2
|
+
|
3
|
+
|
4
|
+
describe IMDB::Parser::Role do
|
5
|
+
let(:role) { IMDB::Parser::Role.new 'EuroTrip (2004) [Jenny] <6>' }
|
6
|
+
it "fails fast when input is malformed" do
|
7
|
+
expect { IMDB::Parser::Role.new 'bobloblaw attorney at law' }.to raise_exception(IMDB::Parser::ParseError, "bobloblaw attorney at law")
|
8
|
+
end
|
9
|
+
describe '#title' do
|
10
|
+
it "returns the movie title" do
|
11
|
+
expect(role.title).to eq("EuroTrip")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
describe '#year' do
|
15
|
+
it "returns the year of release" do
|
16
|
+
expect(role.year).to eq(2004)
|
17
|
+
end
|
18
|
+
it "returns nil if the year is not known" do
|
19
|
+
expect(IMDB::Parser::Role.new("Nailed (????) [Reporter]").year).to be_nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
describe '#character' do
|
23
|
+
it "returns the character name" do
|
24
|
+
expect(role.character).to eq("Jenny")
|
25
|
+
end
|
26
|
+
it "returns character name istead 'Themself' credited 'as character'" do
|
27
|
+
expect(IMDB::Parser::Role.new(
|
28
|
+
"The Magnificent Duo (1992) {{SUSPENDED}} (as Carol Roberts) [Muriel] <15>"
|
29
|
+
).character).to eq("Carol Roberts")
|
30
|
+
end
|
31
|
+
it "is nil if no character provided" do
|
32
|
+
expect(IMDB::Parser::Role.new("El secreto de la Veneno (1997) (V) <1>").character).to be_nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
describe '#credit' do
|
36
|
+
it "returns the billing position in credits" do
|
37
|
+
expect(role.credit).to eq(6)
|
38
|
+
end
|
39
|
+
it "is nil if not credited" do
|
40
|
+
expect(IMDB::Parser::Role.new("Night of the Demons (2009) (uncredited) [Goth raver]").credit).to be_nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe "::parse" do
|
45
|
+
def parse(string)
|
46
|
+
IMDB::Parser::Role.parse(string)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "handles uncredited roles" do
|
50
|
+
expect { parse "Night of the Demons (2009) (uncredited) [Goth raver]" }.not_to raise_error
|
51
|
+
end
|
52
|
+
it "handles just title and year" do
|
53
|
+
expect { parse "Llamada (2011)" }.not_to raise_error
|
54
|
+
end
|
55
|
+
it "handles uncredited tv roles" do
|
56
|
+
expect { parse '"Four Star Revue" (1950) {(#1.15)} [Guest Apache Dancers]' }.not_to raise_error
|
57
|
+
expect { parse '"Supernatural" (2005) {99 Problems (#5.17)} (uncredited) [Herself]' }.not_to raise_error
|
58
|
+
end
|
59
|
+
it "handles made for TV movies" do
|
60
|
+
expect { parse "This American Life Live! (2012) (TV) [Dancers]" }.not_to raise_error
|
61
|
+
end
|
62
|
+
it "handles tv shows by date" do
|
63
|
+
expect { parse '"El hormiguero" (2006) {(2011-03-23)} [Herself]' }.not_to raise_error
|
64
|
+
end
|
65
|
+
it "handles tv shows wihtout episode information" do
|
66
|
+
expect { parse '"La granja tolima" (2004) [Herself]' }.not_to raise_error
|
67
|
+
end
|
68
|
+
it "handles tv shows with title but not episode number" do
|
69
|
+
expect { parse '"Jenny Jones" (1991) {I Got No Shame, \'Cuz My Chest Gives Me All Game!} [Herself]' }.not_to raise_error
|
70
|
+
end
|
71
|
+
it "handles straight to video movies" do
|
72
|
+
expect { parse "El secreto de la Veneno (1997) (V) <1>" }.not_to raise_error
|
73
|
+
end
|
74
|
+
it "handles ucredited straight to video movies" do
|
75
|
+
expect { parse "Fillet of Soul (2001) (V)" }.not_to raise_error
|
76
|
+
end
|
77
|
+
it "handles weird ass release years" do
|
78
|
+
expect { parse "Splitter (2011/I) [Kidnapped Girl]" }.not_to raise_error
|
79
|
+
expect { parse "The Pact (2003/III) [Brittany Vickson] <4>" }.not_to raise_error
|
80
|
+
expect { parse '"Furor" (1998/I) {(1998-12-19)} (as Ella Baila Sola) [Herself]' }.not_to raise_error
|
81
|
+
expect { parse "Hush (2013/IV) [Nanda]" }.not_to raise_error
|
82
|
+
expect { parse "Run (2012/V) [Tanishca]" }.not_to raise_error
|
83
|
+
expect { parse "Redemption (2013/X)" }.not_to raise_error
|
84
|
+
end
|
85
|
+
it "handles weird ass unkown release years" do
|
86
|
+
expect { parse "Hamlet (????/II) [Gertrude]" }.not_to raise_error
|
87
|
+
end
|
88
|
+
it "handles no character name in tv shows" do
|
89
|
+
expect { parse '"Crackhorse Presents" (2012) {High Speed (#1.10)}' }.not_to raise_error
|
90
|
+
end
|
91
|
+
it "handles alternate character listing" do
|
92
|
+
expect { parse '"Casting Qs" (2010) {An Interview with Tracy \'Twinkie\' Byrd (#2.14)} (as Twinkie Byrd) [Herself]' }.not_to raise_error
|
93
|
+
expect { parse "The Magnificent Duo (1992) {{SUSPENDED}} (as Carol Roberts) [Muriel] <15>" }.not_to raise_error
|
94
|
+
end
|
95
|
+
it "handles arbitrary episode notes" do
|
96
|
+
expect { parse '"The Xtra Factor" (2004) {Tulisa\'s Best and Worst (#8.34)} (archive footage) [Themselves]' }.not_to raise_error
|
97
|
+
end
|
98
|
+
it "handles arbitrary movie notes" do
|
99
|
+
expect { parse "2nd Annual BET Awards (2002) (TV) (as 3LW) [Themselves]" }.not_to raise_error
|
100
|
+
end
|
101
|
+
it "handles suspended" do
|
102
|
+
expect { parse "Rock da Boat (2001) (TV) {{SUSPENDED}} [Herself] <1>" }.not_to raise_error
|
103
|
+
end
|
104
|
+
it "handles unknown year" do
|
105
|
+
expect { parse "Nailed (????) [Reporter]" }.not_to raise_error
|
106
|
+
end
|
107
|
+
it "handles romured movies" do
|
108
|
+
expect { parse "Desi Movie (2010) {{SUSPENDED}} (rumored)" }.not_to raise_error
|
109
|
+
end
|
110
|
+
it "handles one really stupid annoying typo" do
|
111
|
+
expect { parse "Asphalt (1951) ) [Helli] <28>" }.not_to raise_error
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
describe IMDB::Parser::TVRole do
|
117
|
+
let(:role) { IMDB::Parser::TVRole.new '"Buffy the Vampire Slayer" (1997) {After Life (#6.3)} [Dawn Summers] <4>' }
|
118
|
+
it "fails fast when input is malformed" do
|
119
|
+
expect { IMDB::Parser::TVRole.new 'bobloblaw attorney at law' }.to raise_exception(IMDB::Parser::ParseError, "bobloblaw attorney at law")
|
120
|
+
end
|
121
|
+
describe '#title' do
|
122
|
+
it "returns the title of the series" do
|
123
|
+
expect(role.title).to eq("Buffy the Vampire Slayer")
|
124
|
+
end
|
125
|
+
end
|
126
|
+
describe '#episode_title' do
|
127
|
+
it "returns the title of the episode" do
|
128
|
+
expect(role.episode_title).to eq("After Life")
|
129
|
+
end
|
130
|
+
it "is the date of the episode if titles are dates" do
|
131
|
+
expect(IMDB::Parser::TVRole.new('"El hormiguero" (2006) {(2011-03-23)} [Herself]').episode_title).to eq("2011-03-23")
|
132
|
+
end
|
133
|
+
it "is nil if the title is not provided" do
|
134
|
+
expect(IMDB::Parser::TVRole.new('"Four Star Revue" (1950) {(#1.15)} [Guest Apache Dancers]').episode_title).to be_nil
|
135
|
+
end
|
136
|
+
end
|
137
|
+
describe '#season' do
|
138
|
+
it "returns the season of the episode" do
|
139
|
+
expect(role.season).to eq(6)
|
140
|
+
end
|
141
|
+
it "returns the year of the episode if episode titles are dates" do
|
142
|
+
expect(IMDB::Parser::TVRole.new('"El hormiguero" (2006) {(2011-03-23)} [Herself]').season).to eq(2011)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
describe '#episode' do
|
146
|
+
it "returns the episode number within the season" do
|
147
|
+
expect(role.episode).to eq(3)
|
148
|
+
end
|
149
|
+
it "returns nil if the episode number is not provied" do
|
150
|
+
expect(IMDB::Parser::TVRole.new('"El hormiguero" (2006) {(2011-03-23)} [Herself]').episode).to be_nil
|
151
|
+
end
|
152
|
+
end
|
153
|
+
describe '#year' do
|
154
|
+
it "returns the year the series came out" do
|
155
|
+
expect(role.year).to eq(1997)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
describe '#character' do
|
159
|
+
it "returns the character name" do
|
160
|
+
expect(role.character).to eq("Dawn Summers")
|
161
|
+
end
|
162
|
+
it "returns character name istead 'Themself' credited 'as character'" do
|
163
|
+
expect(IMDB::Parser::TVRole.new(
|
164
|
+
'"Casting Qs" (2010) {An Interview with Tracy \'Twinkie\' Byrd (#2.14)} (as Twinkie Byrd) [Herself]'
|
165
|
+
).character).to eq("Twinkie Byrd")
|
166
|
+
end
|
167
|
+
it "is nil if not credited" do
|
168
|
+
expect(IMDB::Parser::TVRole.new('"Crackhorse Presents" (2012) {High Speed (#1.10)}').character).to be_nil
|
169
|
+
end
|
170
|
+
end
|
171
|
+
describe '#credit' do
|
172
|
+
it "returns the billing position in credits" do
|
173
|
+
expect(role.credit).to eq(4)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
require 'parser'
|
3
|
+
|
4
|
+
# USAGE: ruby -I lib tools/next_edgecase.rb /path/to/actors.list
|
5
|
+
|
6
|
+
begin
|
7
|
+
IMDB::Parser.new(File.open(ARGV[0], 'rb')).each do |a|
|
8
|
+
a.roles
|
9
|
+
end
|
10
|
+
rescue IMDB::ParseError => e
|
11
|
+
puts %{
|
12
|
+
it "handles " do
|
13
|
+
expect { parse "#{e}" }.not_to raise_error
|
14
|
+
end
|
15
|
+
}.strip
|
16
|
+
end
|
metadata
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: imdb-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ben Olive
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-01-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: Parses the `actors.list` and `actresses.list` files from IMDB's alternate
|
56
|
+
interfaces page.
|
57
|
+
email:
|
58
|
+
- sionide21@gmail.com
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- .rspec
|
64
|
+
- .ruby-version
|
65
|
+
- .travis.yml
|
66
|
+
- Gemfile
|
67
|
+
- Gemfile.lock
|
68
|
+
- LICENSE.txt
|
69
|
+
- README.md
|
70
|
+
- Rakefile
|
71
|
+
- imdb-parser.gemspec
|
72
|
+
- lib/imdb/parser.rb
|
73
|
+
- lib/imdb/parser/actor.rb
|
74
|
+
- lib/imdb/parser/role.rb
|
75
|
+
- lib/imdb/parser/util.rb
|
76
|
+
- lib/imdb/parser/version.rb
|
77
|
+
- spec/actors_spec.rb
|
78
|
+
- spec/parser_spec.rb
|
79
|
+
- spec/role_spec.rb
|
80
|
+
- tools/next_edgecase.rb
|
81
|
+
homepage: https://github.com/sionide21/imdb-actors
|
82
|
+
licenses:
|
83
|
+
- MIT
|
84
|
+
metadata: {}
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 2.0.3
|
102
|
+
signing_key:
|
103
|
+
specification_version: 4
|
104
|
+
summary: A parser for the IMDB actor and actress files.
|
105
|
+
test_files:
|
106
|
+
- spec/actors_spec.rb
|
107
|
+
- spec/parser_spec.rb
|
108
|
+
- spec/role_spec.rb
|