toname 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +52 -0
- data/lib/file_name_info.rb +41 -0
- data/lib/to_name.rb +103 -0
- metadata +81 -0
data/README.md
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
# ToName
|
2
|
+
|
3
|
+
Overview
|
4
|
+
--------
|
5
|
+
ToName is a Ruby library which parses media filenames and extracts metadata (e.g. name and year)
|
6
|
+
|
7
|
+
e.g.
|
8
|
+
|
9
|
+
* Primer.2004.DVDRip.x264.AC3.avi become Name => 'Primer', year => 2004
|
10
|
+
* ManMen.S01E01.720p.HDTV.x264-CTU.mkv becomes Name => 'ManMen', series => 1, episode => 1
|
11
|
+
|
12
|
+
Most media file names sourced from P2P sites are not immediately machine readable, however, they generally have a fairly consistent naming schema. ToName applies some simple parsing rules to extract enough information so that the movie/tv show can be looked up in a metadata database (e.g. [IMDB](http://www.imdb.com)). ToName does not claim a 100% success rate, however, it's generally good enough to find a match on IMDB.
|
13
|
+
|
14
|
+
Features
|
15
|
+
--------
|
16
|
+
* Extract movie name and year
|
17
|
+
* Extract TV show series and episode
|
18
|
+
* Will look at the parent folder name if media name looks like it has been abbreviated
|
19
|
+
* Works with both media and torrent filenames
|
20
|
+
|
21
|
+
Installation
|
22
|
+
------------
|
23
|
+
|
24
|
+
gem install toname
|
25
|
+
|
26
|
+
Examples
|
27
|
+
--------
|
28
|
+
|
29
|
+
ruby-1.9.2-preview3 > require 'to_name'
|
30
|
+
=> true
|
31
|
+
ruby-1.9.2-preview3 > info = ToName.to_name('Primer.2004.DVDRip.x264.AC3.avi')
|
32
|
+
=> Primer (2004)
|
33
|
+
ruby-1.9.2-preview3 > info.name
|
34
|
+
=> "Primer"
|
35
|
+
ruby-1.9.2-preview3 > info.year
|
36
|
+
=> 2004
|
37
|
+
ruby-1.9.2-preview3 > info = ToName.to_name('ManMen.S01E01.720p.HDTV.x264-CTU.mkv')
|
38
|
+
=> ManMen S: 1 E: 1
|
39
|
+
ruby-1.9.2-preview3 > info.name
|
40
|
+
=> "ManMen"
|
41
|
+
ruby-1.9.2-preview3 > info.series
|
42
|
+
=> 1
|
43
|
+
ruby-1.9.2-preview3 > info.episode
|
44
|
+
=> 1
|
45
|
+
|
46
|
+
Licence
|
47
|
+
-------
|
48
|
+
MIT
|
49
|
+
|
50
|
+
Contact
|
51
|
+
-------
|
52
|
+
Sam Cavenagh [(cavenaghweb@hotmail.com)](mailto:cavenaghweb@hotmail.com)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class FileNameInfo
|
2
|
+
attr_accessor :raw_name, :location, :name, :year, :series, :episode
|
3
|
+
|
4
|
+
def initialize(params = {})
|
5
|
+
@name = params[:name]
|
6
|
+
@year = params[:year]
|
7
|
+
@raw_name = params[:raw_name]
|
8
|
+
@location = params[:location]
|
9
|
+
@series = params[:series]
|
10
|
+
@episode = params[:episode]
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
s = ''
|
15
|
+
if @name
|
16
|
+
s += @name
|
17
|
+
s += " (#{@year})" if @year
|
18
|
+
s += " S: #{@series} E: #{@episode}" if @series || @episode
|
19
|
+
elsif @raw_name
|
20
|
+
s += @raw_name
|
21
|
+
else
|
22
|
+
s += @location
|
23
|
+
end
|
24
|
+
s
|
25
|
+
end
|
26
|
+
|
27
|
+
def <=>(other)
|
28
|
+
if series && other.series && series != other.series
|
29
|
+
return (series <=> other.series)
|
30
|
+
elsif episode && other.episode && episode != other.episode
|
31
|
+
return (episode <=> other.episode)
|
32
|
+
elsif name != other.name
|
33
|
+
return (name <=> other.name)
|
34
|
+
elsif year && other.year
|
35
|
+
#Note: with year we want newest first
|
36
|
+
return (other.year <=> year)
|
37
|
+
else
|
38
|
+
return 0
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/to_name.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'file_name_info'
|
2
|
+
|
3
|
+
class ToName
|
4
|
+
|
5
|
+
FILE_SEP_REGEX = /\//
|
6
|
+
FILE_EXT_SEP_REGEX = /\./
|
7
|
+
CD_FOLDER_REGEX = /\/CD(\d)\//
|
8
|
+
#Chars used in file names as a subsitude for spaces
|
9
|
+
SPACE_SUB_REGEX = /(\.|_|\-)/
|
10
|
+
VIDEO_TYPE_NAMES = ['DVDRIP', '1080p', '720p','R5', 'DVDSCR', 'BDRip', 'CAM', 'TS', 'PPV', 'Xvid', 'divx', 'DVDSCREENER']
|
11
|
+
CONTENT_SOURCE_FOLDER_TEST_REGEX = /#{VIDEO_TYPE_NAMES.join('|')}/i
|
12
|
+
CONTENT_SOURCE_REGEX = /(\(|\[|\s)+(#{VIDEO_TYPE_NAMES.join('|')})(\)|\]|\s|$)+/i
|
13
|
+
YEAR_REGEX = /(\(|\[|\s)+\d{4}(,|\)|\]|\s|$)+/
|
14
|
+
SESSION_ESP_REGEX_1 = /S(\d{2})\s?E(\d{2})/i
|
15
|
+
SESSION_ESP_REGEX_2 = /\s+(\d+)x(\d+)(\s|$)+/i
|
16
|
+
SESSION_ESP_REGEX_3 = /Season (\d+) Episode (\d+)/i
|
17
|
+
SESSION_ESP_REGEX_OF = /(\d+)\s?of\s?(\d+)/i
|
18
|
+
SESSION_REGEXS = [SESSION_ESP_REGEX_1, SESSION_ESP_REGEX_2, SESSION_ESP_REGEX_3]
|
19
|
+
|
20
|
+
def self.to_name(location)
|
21
|
+
raw_name = self.get_file_name(location)
|
22
|
+
|
23
|
+
#Check to see if we are better off looking at the folder name
|
24
|
+
check_extention = true
|
25
|
+
unless raw_name =~ CONTENT_SOURCE_REGEX || raw_name =~ SESSION_ESP_REGEX_1
|
26
|
+
parent_folder = self.parent_folder_name(location)
|
27
|
+
if parent_folder && parent_folder =~ CONTENT_SOURCE_FOLDER_TEST_REGEX
|
28
|
+
raw_name = parent_folder
|
29
|
+
check_extention = false
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
#Remove file extention
|
34
|
+
raw_name = raw_name[0, raw_name.rindex(FILE_EXT_SEP_REGEX)] if check_extention && raw_name =~ FILE_EXT_SEP_REGEX
|
35
|
+
#Remove space sub chars
|
36
|
+
raw_name = raw_name.gsub(SPACE_SUB_REGEX, ' ')
|
37
|
+
|
38
|
+
name = raw_name.dup
|
39
|
+
#Chop off any info about the movie format or source
|
40
|
+
name = $` if name =~ CONTENT_SOURCE_REGEX
|
41
|
+
|
42
|
+
#Extract year if it's in the filename
|
43
|
+
if name =~ YEAR_REGEX && name.index(YEAR_REGEX) > 0
|
44
|
+
name = $`
|
45
|
+
#Strip any surronding brackets and convert to int
|
46
|
+
year = $&.gsub(/\(|\)|\[|\]/, '').to_i
|
47
|
+
end
|
48
|
+
|
49
|
+
#Strip LIMITED off the end. Note: This is NOT case sensitive
|
50
|
+
name = $` if name =~ /LIMITED$/
|
51
|
+
|
52
|
+
#Try to extract the session and episode
|
53
|
+
session = nil
|
54
|
+
episode = nil
|
55
|
+
SESSION_REGEXS.each do |session_regex|
|
56
|
+
if name =~ session_regex
|
57
|
+
name = $`
|
58
|
+
session = $1.to_i
|
59
|
+
episode = $2.to_i
|
60
|
+
break
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
if session.nil? && name =~ SESSION_ESP_REGEX_OF
|
65
|
+
name = $`
|
66
|
+
session = 1
|
67
|
+
episode = $1.to_i
|
68
|
+
end
|
69
|
+
|
70
|
+
# Sometimes there can be multiple media files for a single movie, we want to remove the version number if this is the case
|
71
|
+
if location =~ CD_FOLDER_REGEX
|
72
|
+
cd_number = $1.to_i
|
73
|
+
if name =~ /#{cd_number}$/
|
74
|
+
name = $`
|
75
|
+
elsif name =~ /part\s?#{cd_number}/i
|
76
|
+
name = $`
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
name.strip!
|
81
|
+
return FileNameInfo.new(:raw_name => raw_name, :name => name, :year => year,
|
82
|
+
:series => session, :episode => episode, :location => location)
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.get_file_name(location)
|
86
|
+
file_name = location.dup
|
87
|
+
#Change to just the filename
|
88
|
+
file_name = file_name[file_name.rindex(FILE_SEP_REGEX) + 1, file_name.length] if file_name =~ FILE_SEP_REGEX
|
89
|
+
|
90
|
+
return file_name
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.parent_folder_name(location)
|
94
|
+
# Remove first / and break by folder name
|
95
|
+
folders = location.sub(/^\//, '').split('/')
|
96
|
+
return nil if folders.empty? || folders.size < 2
|
97
|
+
parent_folder = folders[folders.size - 2]
|
98
|
+
# If the folder is a CD folder e.g. CD1 go up 1 more
|
99
|
+
return folders[folders.size - 3] if folders.size > 2 && parent_folder =~ /CD\d/i
|
100
|
+
return parent_folder
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: toname
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Sam Cavenagh
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-08-08 00:00:00 -07:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: rspec
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 2
|
30
|
+
- 5
|
31
|
+
- 8
|
32
|
+
version: 2.5.8
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Convert video/torrent filename into movie/tv series name and year
|
36
|
+
email: cavenaghweb@hotmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README.md
|
43
|
+
files:
|
44
|
+
- README.md
|
45
|
+
- lib/file_name_info.rb
|
46
|
+
- lib/to_name.rb
|
47
|
+
has_rdoc: true
|
48
|
+
homepage: http://github.com/o-sam-o/toname
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options:
|
53
|
+
- --main
|
54
|
+
- README.md
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
version: "0"
|
73
|
+
requirements: []
|
74
|
+
|
75
|
+
rubyforge_project:
|
76
|
+
rubygems_version: 1.3.7
|
77
|
+
signing_key:
|
78
|
+
specification_version: 3
|
79
|
+
summary: To Name
|
80
|
+
test_files: []
|
81
|
+
|