toname 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/README.md +52 -0
  2. data/lib/file_name_info.rb +41 -0
  3. data/lib/to_name.rb +103 -0
  4. metadata +81 -0
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # ToName
2
+
3
+ Overview
4
+ --------
5
+ ToName is a Ruby library which parses media filenames and extracts metadata (e.g. name and year)
6
+
7
+ e.g.
8
+
9
+ * Primer.2004.DVDRip.x264.AC3.avi become Name => 'Primer', year => 2004
10
+ * ManMen.S01E01.720p.HDTV.x264-CTU.mkv becomes Name => 'ManMen', series => 1, episode => 1
11
+
12
+ Most media file names sourced from P2P sites are not immediately machine readable, however, they generally have a fairly consistent naming schema. ToName applies some simple parsing rules to extract enough information so that the movie/tv show can be looked up in a metadata database (e.g. [IMDB](http://www.imdb.com)). ToName does not claim a 100% success rate, however, it's generally good enough to find a match on IMDB.
13
+
14
+ Features
15
+ --------
16
+ * Extract movie name and year
17
+ * Extract TV show series and episode
18
+ * Will look at the parent folder name if media name looks like it has been abbreviated
19
+ * Works with both media and torrent filenames
20
+
21
+ Installation
22
+ ------------
23
+
24
+ gem install toname
25
+
26
+ Examples
27
+ --------
28
+
29
+ ruby-1.9.2-preview3 > require 'to_name'
30
+ => true
31
+ ruby-1.9.2-preview3 > info = ToName.to_name('Primer.2004.DVDRip.x264.AC3.avi')
32
+ => Primer (2004)
33
+ ruby-1.9.2-preview3 > info.name
34
+ => "Primer"
35
+ ruby-1.9.2-preview3 > info.year
36
+ => 2004
37
+ ruby-1.9.2-preview3 > info = ToName.to_name('ManMen.S01E01.720p.HDTV.x264-CTU.mkv')
38
+ => ManMen S: 1 E: 1
39
+ ruby-1.9.2-preview3 > info.name
40
+ => "ManMen"
41
+ ruby-1.9.2-preview3 > info.series
42
+ => 1
43
+ ruby-1.9.2-preview3 > info.episode
44
+ => 1
45
+
46
+ Licence
47
+ -------
48
+ MIT
49
+
50
+ Contact
51
+ -------
52
+ Sam Cavenagh [(cavenaghweb@hotmail.com)](mailto:cavenaghweb@hotmail.com)
@@ -0,0 +1,41 @@
1
+ class FileNameInfo
2
+ attr_accessor :raw_name, :location, :name, :year, :series, :episode
3
+
4
+ def initialize(params = {})
5
+ @name = params[:name]
6
+ @year = params[:year]
7
+ @raw_name = params[:raw_name]
8
+ @location = params[:location]
9
+ @series = params[:series]
10
+ @episode = params[:episode]
11
+ end
12
+
13
+ def to_s
14
+ s = ''
15
+ if @name
16
+ s += @name
17
+ s += " (#{@year})" if @year
18
+ s += " S: #{@series} E: #{@episode}" if @series || @episode
19
+ elsif @raw_name
20
+ s += @raw_name
21
+ else
22
+ s += @location
23
+ end
24
+ s
25
+ end
26
+
27
+ def <=>(other)
28
+ if series && other.series && series != other.series
29
+ return (series <=> other.series)
30
+ elsif episode && other.episode && episode != other.episode
31
+ return (episode <=> other.episode)
32
+ elsif name != other.name
33
+ return (name <=> other.name)
34
+ elsif year && other.year
35
+ #Note: with year we want newest first
36
+ return (other.year <=> year)
37
+ else
38
+ return 0
39
+ end
40
+ end
41
+ end
data/lib/to_name.rb ADDED
@@ -0,0 +1,103 @@
1
+ require 'file_name_info'
2
+
3
+ class ToName
4
+
5
+ FILE_SEP_REGEX = /\//
6
+ FILE_EXT_SEP_REGEX = /\./
7
+ CD_FOLDER_REGEX = /\/CD(\d)\//
8
+ #Chars used in file names as a subsitude for spaces
9
+ SPACE_SUB_REGEX = /(\.|_|\-)/
10
+ VIDEO_TYPE_NAMES = ['DVDRIP', '1080p', '720p','R5', 'DVDSCR', 'BDRip', 'CAM', 'TS', 'PPV', 'Xvid', 'divx', 'DVDSCREENER']
11
+ CONTENT_SOURCE_FOLDER_TEST_REGEX = /#{VIDEO_TYPE_NAMES.join('|')}/i
12
+ CONTENT_SOURCE_REGEX = /(\(|\[|\s)+(#{VIDEO_TYPE_NAMES.join('|')})(\)|\]|\s|$)+/i
13
+ YEAR_REGEX = /(\(|\[|\s)+\d{4}(,|\)|\]|\s|$)+/
14
+ SESSION_ESP_REGEX_1 = /S(\d{2})\s?E(\d{2})/i
15
+ SESSION_ESP_REGEX_2 = /\s+(\d+)x(\d+)(\s|$)+/i
16
+ SESSION_ESP_REGEX_3 = /Season (\d+) Episode (\d+)/i
17
+ SESSION_ESP_REGEX_OF = /(\d+)\s?of\s?(\d+)/i
18
+ SESSION_REGEXS = [SESSION_ESP_REGEX_1, SESSION_ESP_REGEX_2, SESSION_ESP_REGEX_3]
19
+
20
+ def self.to_name(location)
21
+ raw_name = self.get_file_name(location)
22
+
23
+ #Check to see if we are better off looking at the folder name
24
+ check_extention = true
25
+ unless raw_name =~ CONTENT_SOURCE_REGEX || raw_name =~ SESSION_ESP_REGEX_1
26
+ parent_folder = self.parent_folder_name(location)
27
+ if parent_folder && parent_folder =~ CONTENT_SOURCE_FOLDER_TEST_REGEX
28
+ raw_name = parent_folder
29
+ check_extention = false
30
+ end
31
+ end
32
+
33
+ #Remove file extention
34
+ raw_name = raw_name[0, raw_name.rindex(FILE_EXT_SEP_REGEX)] if check_extention && raw_name =~ FILE_EXT_SEP_REGEX
35
+ #Remove space sub chars
36
+ raw_name = raw_name.gsub(SPACE_SUB_REGEX, ' ')
37
+
38
+ name = raw_name.dup
39
+ #Chop off any info about the movie format or source
40
+ name = $` if name =~ CONTENT_SOURCE_REGEX
41
+
42
+ #Extract year if it's in the filename
43
+ if name =~ YEAR_REGEX && name.index(YEAR_REGEX) > 0
44
+ name = $`
45
+ #Strip any surronding brackets and convert to int
46
+ year = $&.gsub(/\(|\)|\[|\]/, '').to_i
47
+ end
48
+
49
+ #Strip LIMITED off the end. Note: This is NOT case sensitive
50
+ name = $` if name =~ /LIMITED$/
51
+
52
+ #Try to extract the session and episode
53
+ session = nil
54
+ episode = nil
55
+ SESSION_REGEXS.each do |session_regex|
56
+ if name =~ session_regex
57
+ name = $`
58
+ session = $1.to_i
59
+ episode = $2.to_i
60
+ break
61
+ end
62
+ end
63
+
64
+ if session.nil? && name =~ SESSION_ESP_REGEX_OF
65
+ name = $`
66
+ session = 1
67
+ episode = $1.to_i
68
+ end
69
+
70
+ # Sometimes there can be multiple media files for a single movie, we want to remove the version number if this is the case
71
+ if location =~ CD_FOLDER_REGEX
72
+ cd_number = $1.to_i
73
+ if name =~ /#{cd_number}$/
74
+ name = $`
75
+ elsif name =~ /part\s?#{cd_number}/i
76
+ name = $`
77
+ end
78
+ end
79
+
80
+ name.strip!
81
+ return FileNameInfo.new(:raw_name => raw_name, :name => name, :year => year,
82
+ :series => session, :episode => episode, :location => location)
83
+ end
84
+
85
+ def self.get_file_name(location)
86
+ file_name = location.dup
87
+ #Change to just the filename
88
+ file_name = file_name[file_name.rindex(FILE_SEP_REGEX) + 1, file_name.length] if file_name =~ FILE_SEP_REGEX
89
+
90
+ return file_name
91
+ end
92
+
93
+ def self.parent_folder_name(location)
94
+ # Remove first / and break by folder name
95
+ folders = location.sub(/^\//, '').split('/')
96
+ return nil if folders.empty? || folders.size < 2
97
+ parent_folder = folders[folders.size - 2]
98
+ # If the folder is a CD folder e.g. CD1 go up 1 more
99
+ return folders[folders.size - 3] if folders.size > 2 && parent_folder =~ /CD\d/i
100
+ return parent_folder
101
+ end
102
+
103
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: toname
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Sam Cavenagh
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-08-08 00:00:00 -07:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 2
30
+ - 5
31
+ - 8
32
+ version: 2.5.8
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: Convert video/torrent filename into movie/tv series name and year
36
+ email: cavenaghweb@hotmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README.md
43
+ files:
44
+ - README.md
45
+ - lib/file_name_info.rb
46
+ - lib/to_name.rb
47
+ has_rdoc: true
48
+ homepage: http://github.com/o-sam-o/toname
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --main
54
+ - README.md
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ segments:
71
+ - 0
72
+ version: "0"
73
+ requirements: []
74
+
75
+ rubyforge_project:
76
+ rubygems_version: 1.3.7
77
+ signing_key:
78
+ specification_version: 3
79
+ summary: To Name
80
+ test_files: []
81
+