toname 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +52 -0
- data/lib/file_name_info.rb +41 -0
- data/lib/to_name.rb +103 -0
- metadata +81 -0
data/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# ToName
|
2
|
+
|
3
|
+
Overview
|
4
|
+
--------
|
5
|
+
ToName is a Ruby library which parses media filenames and extracts metadata (e.g. name and year)
|
6
|
+
|
7
|
+
e.g.
|
8
|
+
|
9
|
+
* Primer.2004.DVDRip.x264.AC3.avi become Name => 'Primer', year => 2004
|
10
|
+
* ManMen.S01E01.720p.HDTV.x264-CTU.mkv becomes Name => 'ManMen', series => 1, episode => 1
|
11
|
+
|
12
|
+
Most media file names sourced from P2P sites are not immediately machine readable, however, they generally have a fairly consistent naming schema. ToName applies some simple parsing rules to extract enough information so that the movie/tv show can be looked up in a metadata database (e.g. [IMDB](http://www.imdb.com)). ToName does not claim a 100% success rate, however, it's generally good enough to find a match on IMDB.
|
13
|
+
|
14
|
+
Features
|
15
|
+
--------
|
16
|
+
* Extract movie name and year
|
17
|
+
* Extract TV show series and episode
|
18
|
+
* Will look at the parent folder name if media name looks like it has been abbreviated
|
19
|
+
* Works with both media and torrent filenames
|
20
|
+
|
21
|
+
Installation
|
22
|
+
------------
|
23
|
+
|
24
|
+
gem install toname
|
25
|
+
|
26
|
+
Examples
|
27
|
+
--------
|
28
|
+
|
29
|
+
ruby-1.9.2-preview3 > require 'to_name'
|
30
|
+
=> true
|
31
|
+
ruby-1.9.2-preview3 > info = ToName.to_name('Primer.2004.DVDRip.x264.AC3.avi')
|
32
|
+
=> Primer (2004)
|
33
|
+
ruby-1.9.2-preview3 > info.name
|
34
|
+
=> "Primer"
|
35
|
+
ruby-1.9.2-preview3 > info.year
|
36
|
+
=> 2004
|
37
|
+
ruby-1.9.2-preview3 > info = ToName.to_name('ManMen.S01E01.720p.HDTV.x264-CTU.mkv')
|
38
|
+
=> ManMen S: 1 E: 1
|
39
|
+
ruby-1.9.2-preview3 > info.name
|
40
|
+
=> "ManMen"
|
41
|
+
ruby-1.9.2-preview3 > info.series
|
42
|
+
=> 1
|
43
|
+
ruby-1.9.2-preview3 > info.episode
|
44
|
+
=> 1
|
45
|
+
|
46
|
+
Licence
|
47
|
+
-------
|
48
|
+
MIT
|
49
|
+
|
50
|
+
Contact
|
51
|
+
-------
|
52
|
+
Sam Cavenagh [(cavenaghweb@hotmail.com)](mailto:cavenaghweb@hotmail.com)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class FileNameInfo
|
2
|
+
attr_accessor :raw_name, :location, :name, :year, :series, :episode
|
3
|
+
|
4
|
+
def initialize(params = {})
|
5
|
+
@name = params[:name]
|
6
|
+
@year = params[:year]
|
7
|
+
@raw_name = params[:raw_name]
|
8
|
+
@location = params[:location]
|
9
|
+
@series = params[:series]
|
10
|
+
@episode = params[:episode]
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
s = ''
|
15
|
+
if @name
|
16
|
+
s += @name
|
17
|
+
s += " (#{@year})" if @year
|
18
|
+
s += " S: #{@series} E: #{@episode}" if @series || @episode
|
19
|
+
elsif @raw_name
|
20
|
+
s += @raw_name
|
21
|
+
else
|
22
|
+
s += @location
|
23
|
+
end
|
24
|
+
s
|
25
|
+
end
|
26
|
+
|
27
|
+
def <=>(other)
|
28
|
+
if series && other.series && series != other.series
|
29
|
+
return (series <=> other.series)
|
30
|
+
elsif episode && other.episode && episode != other.episode
|
31
|
+
return (episode <=> other.episode)
|
32
|
+
elsif name != other.name
|
33
|
+
return (name <=> other.name)
|
34
|
+
elsif year && other.year
|
35
|
+
#Note: with year we want newest first
|
36
|
+
return (other.year <=> year)
|
37
|
+
else
|
38
|
+
return 0
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/to_name.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'file_name_info'
|
2
|
+
|
3
|
+
class ToName
|
4
|
+
|
5
|
+
FILE_SEP_REGEX = /\//
|
6
|
+
FILE_EXT_SEP_REGEX = /\./
|
7
|
+
CD_FOLDER_REGEX = /\/CD(\d)\//
|
8
|
+
#Chars used in file names as a subsitude for spaces
|
9
|
+
SPACE_SUB_REGEX = /(\.|_|\-)/
|
10
|
+
VIDEO_TYPE_NAMES = ['DVDRIP', '1080p', '720p','R5', 'DVDSCR', 'BDRip', 'CAM', 'TS', 'PPV', 'Xvid', 'divx', 'DVDSCREENER']
|
11
|
+
CONTENT_SOURCE_FOLDER_TEST_REGEX = /#{VIDEO_TYPE_NAMES.join('|')}/i
|
12
|
+
CONTENT_SOURCE_REGEX = /(\(|\[|\s)+(#{VIDEO_TYPE_NAMES.join('|')})(\)|\]|\s|$)+/i
|
13
|
+
YEAR_REGEX = /(\(|\[|\s)+\d{4}(,|\)|\]|\s|$)+/
|
14
|
+
SESSION_ESP_REGEX_1 = /S(\d{2})\s?E(\d{2})/i
|
15
|
+
SESSION_ESP_REGEX_2 = /\s+(\d+)x(\d+)(\s|$)+/i
|
16
|
+
SESSION_ESP_REGEX_3 = /Season (\d+) Episode (\d+)/i
|
17
|
+
SESSION_ESP_REGEX_OF = /(\d+)\s?of\s?(\d+)/i
|
18
|
+
SESSION_REGEXS = [SESSION_ESP_REGEX_1, SESSION_ESP_REGEX_2, SESSION_ESP_REGEX_3]
|
19
|
+
|
20
|
+
def self.to_name(location)
|
21
|
+
raw_name = self.get_file_name(location)
|
22
|
+
|
23
|
+
#Check to see if we are better off looking at the folder name
|
24
|
+
check_extention = true
|
25
|
+
unless raw_name =~ CONTENT_SOURCE_REGEX || raw_name =~ SESSION_ESP_REGEX_1
|
26
|
+
parent_folder = self.parent_folder_name(location)
|
27
|
+
if parent_folder && parent_folder =~ CONTENT_SOURCE_FOLDER_TEST_REGEX
|
28
|
+
raw_name = parent_folder
|
29
|
+
check_extention = false
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
#Remove file extention
|
34
|
+
raw_name = raw_name[0, raw_name.rindex(FILE_EXT_SEP_REGEX)] if check_extention && raw_name =~ FILE_EXT_SEP_REGEX
|
35
|
+
#Remove space sub chars
|
36
|
+
raw_name = raw_name.gsub(SPACE_SUB_REGEX, ' ')
|
37
|
+
|
38
|
+
name = raw_name.dup
|
39
|
+
#Chop off any info about the movie format or source
|
40
|
+
name = $` if name =~ CONTENT_SOURCE_REGEX
|
41
|
+
|
42
|
+
#Extract year if it's in the filename
|
43
|
+
if name =~ YEAR_REGEX && name.index(YEAR_REGEX) > 0
|
44
|
+
name = $`
|
45
|
+
#Strip any surronding brackets and convert to int
|
46
|
+
year = $&.gsub(/\(|\)|\[|\]/, '').to_i
|
47
|
+
end
|
48
|
+
|
49
|
+
#Strip LIMITED off the end. Note: This is NOT case sensitive
|
50
|
+
name = $` if name =~ /LIMITED$/
|
51
|
+
|
52
|
+
#Try to extract the session and episode
|
53
|
+
session = nil
|
54
|
+
episode = nil
|
55
|
+
SESSION_REGEXS.each do |session_regex|
|
56
|
+
if name =~ session_regex
|
57
|
+
name = $`
|
58
|
+
session = $1.to_i
|
59
|
+
episode = $2.to_i
|
60
|
+
break
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
if session.nil? && name =~ SESSION_ESP_REGEX_OF
|
65
|
+
name = $`
|
66
|
+
session = 1
|
67
|
+
episode = $1.to_i
|
68
|
+
end
|
69
|
+
|
70
|
+
# Sometimes there can be multiple media files for a single movie, we want to remove the version number if this is the case
|
71
|
+
if location =~ CD_FOLDER_REGEX
|
72
|
+
cd_number = $1.to_i
|
73
|
+
if name =~ /#{cd_number}$/
|
74
|
+
name = $`
|
75
|
+
elsif name =~ /part\s?#{cd_number}/i
|
76
|
+
name = $`
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
name.strip!
|
81
|
+
return FileNameInfo.new(:raw_name => raw_name, :name => name, :year => year,
|
82
|
+
:series => session, :episode => episode, :location => location)
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.get_file_name(location)
|
86
|
+
file_name = location.dup
|
87
|
+
#Change to just the filename
|
88
|
+
file_name = file_name[file_name.rindex(FILE_SEP_REGEX) + 1, file_name.length] if file_name =~ FILE_SEP_REGEX
|
89
|
+
|
90
|
+
return file_name
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.parent_folder_name(location)
|
94
|
+
# Remove first / and break by folder name
|
95
|
+
folders = location.sub(/^\//, '').split('/')
|
96
|
+
return nil if folders.empty? || folders.size < 2
|
97
|
+
parent_folder = folders[folders.size - 2]
|
98
|
+
# If the folder is a CD folder e.g. CD1 go up 1 more
|
99
|
+
return folders[folders.size - 3] if folders.size > 2 && parent_folder =~ /CD\d/i
|
100
|
+
return parent_folder
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: toname
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Sam Cavenagh
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-08-08 00:00:00 -07:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 2
|
30
|
+
- 5
|
31
|
+
- 8
|
32
|
+
version: 2.5.8
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Convert video/torrent filename into movie/tv series name and year
|
36
|
+
email: cavenaghweb@hotmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README.md
|
43
|
+
files:
|
44
|
+
- README.md
|
45
|
+
- lib/file_name_info.rb
|
46
|
+
- lib/to_name.rb
|
47
|
+
has_rdoc: true
|
48
|
+
homepage: http://github.com/o-sam-o/toname
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options:
|
53
|
+
- --main
|
54
|
+
- README.md
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
version: "0"
|
73
|
+
requirements: []
|
74
|
+
|
75
|
+
rubyforge_project:
|
76
|
+
rubygems_version: 1.3.7
|
77
|
+
signing_key:
|
78
|
+
specification_version: 3
|
79
|
+
summary: To Name
|
80
|
+
test_files: []
|
81
|
+
|