toname 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/README.md +52 -0
  2. data/lib/file_name_info.rb +41 -0
  3. data/lib/to_name.rb +103 -0
  4. metadata +81 -0
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # ToName
2
+
3
+ Overview
4
+ --------
5
+ ToName is a Ruby library which parses media filenames and extracts metadata (e.g. name and year)
6
+
7
+ e.g.
8
+
9
+ * Primer.2004.DVDRip.x264.AC3.avi become Name => 'Primer', year => 2004
10
+ * ManMen.S01E01.720p.HDTV.x264-CTU.mkv becomes Name => 'ManMen', series => 1, episode => 1
11
+
12
+ Most media file names sourced from P2P sites are not immediately machine readable, however, they generally have a fairly consistent naming schema. ToName applies some simple parsing rules to extract enough information so that the movie/tv show can be looked up in a metadata database (e.g. [IMDB](http://www.imdb.com)). ToName does not claim a 100% success rate, however, it's generally good enough to find a match on IMDB.
13
+
14
+ Features
15
+ --------
16
+ * Extract movie name and year
17
+ * Extract TV show series and episode
18
+ * Will look at the parent folder name if media name looks like it has been abbreviated
19
+ * Works with both media and torrent filenames
20
+
21
+ Installation
22
+ ------------
23
+
24
+ gem install toname
25
+
26
+ Examples
27
+ --------
28
+
29
+ ruby-1.9.2-preview3 > require 'to_name'
30
+ => true
31
+ ruby-1.9.2-preview3 > info = ToName.to_name('Primer.2004.DVDRip.x264.AC3.avi')
32
+ => Primer (2004)
33
+ ruby-1.9.2-preview3 > info.name
34
+ => "Primer"
35
+ ruby-1.9.2-preview3 > info.year
36
+ => 2004
37
+ ruby-1.9.2-preview3 > info = ToName.to_name('ManMen.S01E01.720p.HDTV.x264-CTU.mkv')
38
+ => ManMen S: 1 E: 1
39
+ ruby-1.9.2-preview3 > info.name
40
+ => "ManMen"
41
+ ruby-1.9.2-preview3 > info.series
42
+ => 1
43
+ ruby-1.9.2-preview3 > info.episode
44
+ => 1
45
+
46
+ Licence
47
+ -------
48
+ MIT
49
+
50
+ Contact
51
+ -------
52
+ Sam Cavenagh [(cavenaghweb@hotmail.com)](mailto:cavenaghweb@hotmail.com)
@@ -0,0 +1,41 @@
1
+ class FileNameInfo
2
+ attr_accessor :raw_name, :location, :name, :year, :series, :episode
3
+
4
+ def initialize(params = {})
5
+ @name = params[:name]
6
+ @year = params[:year]
7
+ @raw_name = params[:raw_name]
8
+ @location = params[:location]
9
+ @series = params[:series]
10
+ @episode = params[:episode]
11
+ end
12
+
13
+ def to_s
14
+ s = ''
15
+ if @name
16
+ s += @name
17
+ s += " (#{@year})" if @year
18
+ s += " S: #{@series} E: #{@episode}" if @series || @episode
19
+ elsif @raw_name
20
+ s += @raw_name
21
+ else
22
+ s += @location
23
+ end
24
+ s
25
+ end
26
+
27
+ def <=>(other)
28
+ if series && other.series && series != other.series
29
+ return (series <=> other.series)
30
+ elsif episode && other.episode && episode != other.episode
31
+ return (episode <=> other.episode)
32
+ elsif name != other.name
33
+ return (name <=> other.name)
34
+ elsif year && other.year
35
+ #Note: with year we want newest first
36
+ return (other.year <=> year)
37
+ else
38
+ return 0
39
+ end
40
+ end
41
+ end
data/lib/to_name.rb ADDED
@@ -0,0 +1,103 @@
1
+ require 'file_name_info'
2
+
3
+ class ToName
4
+
5
+ FILE_SEP_REGEX = /\//
6
+ FILE_EXT_SEP_REGEX = /\./
7
+ CD_FOLDER_REGEX = /\/CD(\d)\//
8
+ #Chars used in file names as a subsitude for spaces
9
+ SPACE_SUB_REGEX = /(\.|_|\-)/
10
+ VIDEO_TYPE_NAMES = ['DVDRIP', '1080p', '720p','R5', 'DVDSCR', 'BDRip', 'CAM', 'TS', 'PPV', 'Xvid', 'divx', 'DVDSCREENER']
11
+ CONTENT_SOURCE_FOLDER_TEST_REGEX = /#{VIDEO_TYPE_NAMES.join('|')}/i
12
+ CONTENT_SOURCE_REGEX = /(\(|\[|\s)+(#{VIDEO_TYPE_NAMES.join('|')})(\)|\]|\s|$)+/i
13
+ YEAR_REGEX = /(\(|\[|\s)+\d{4}(,|\)|\]|\s|$)+/
14
+ SESSION_ESP_REGEX_1 = /S(\d{2})\s?E(\d{2})/i
15
+ SESSION_ESP_REGEX_2 = /\s+(\d+)x(\d+)(\s|$)+/i
16
+ SESSION_ESP_REGEX_3 = /Season (\d+) Episode (\d+)/i
17
+ SESSION_ESP_REGEX_OF = /(\d+)\s?of\s?(\d+)/i
18
+ SESSION_REGEXS = [SESSION_ESP_REGEX_1, SESSION_ESP_REGEX_2, SESSION_ESP_REGEX_3]
19
+
20
+ def self.to_name(location)
21
+ raw_name = self.get_file_name(location)
22
+
23
+ #Check to see if we are better off looking at the folder name
24
+ check_extention = true
25
+ unless raw_name =~ CONTENT_SOURCE_REGEX || raw_name =~ SESSION_ESP_REGEX_1
26
+ parent_folder = self.parent_folder_name(location)
27
+ if parent_folder && parent_folder =~ CONTENT_SOURCE_FOLDER_TEST_REGEX
28
+ raw_name = parent_folder
29
+ check_extention = false
30
+ end
31
+ end
32
+
33
+ #Remove file extention
34
+ raw_name = raw_name[0, raw_name.rindex(FILE_EXT_SEP_REGEX)] if check_extention && raw_name =~ FILE_EXT_SEP_REGEX
35
+ #Remove space sub chars
36
+ raw_name = raw_name.gsub(SPACE_SUB_REGEX, ' ')
37
+
38
+ name = raw_name.dup
39
+ #Chop off any info about the movie format or source
40
+ name = $` if name =~ CONTENT_SOURCE_REGEX
41
+
42
+ #Extract year if it's in the filename
43
+ if name =~ YEAR_REGEX && name.index(YEAR_REGEX) > 0
44
+ name = $`
45
+ #Strip any surronding brackets and convert to int
46
+ year = $&.gsub(/\(|\)|\[|\]/, '').to_i
47
+ end
48
+
49
+ #Strip LIMITED off the end. Note: This is NOT case sensitive
50
+ name = $` if name =~ /LIMITED$/
51
+
52
+ #Try to extract the session and episode
53
+ session = nil
54
+ episode = nil
55
+ SESSION_REGEXS.each do |session_regex|
56
+ if name =~ session_regex
57
+ name = $`
58
+ session = $1.to_i
59
+ episode = $2.to_i
60
+ break
61
+ end
62
+ end
63
+
64
+ if session.nil? && name =~ SESSION_ESP_REGEX_OF
65
+ name = $`
66
+ session = 1
67
+ episode = $1.to_i
68
+ end
69
+
70
+ # Sometimes there can be multiple media files for a single movie, we want to remove the version number if this is the case
71
+ if location =~ CD_FOLDER_REGEX
72
+ cd_number = $1.to_i
73
+ if name =~ /#{cd_number}$/
74
+ name = $`
75
+ elsif name =~ /part\s?#{cd_number}/i
76
+ name = $`
77
+ end
78
+ end
79
+
80
+ name.strip!
81
+ return FileNameInfo.new(:raw_name => raw_name, :name => name, :year => year,
82
+ :series => session, :episode => episode, :location => location)
83
+ end
84
+
85
+ def self.get_file_name(location)
86
+ file_name = location.dup
87
+ #Change to just the filename
88
+ file_name = file_name[file_name.rindex(FILE_SEP_REGEX) + 1, file_name.length] if file_name =~ FILE_SEP_REGEX
89
+
90
+ return file_name
91
+ end
92
+
93
+ def self.parent_folder_name(location)
94
+ # Remove first / and break by folder name
95
+ folders = location.sub(/^\//, '').split('/')
96
+ return nil if folders.empty? || folders.size < 2
97
+ parent_folder = folders[folders.size - 2]
98
+ # If the folder is a CD folder e.g. CD1 go up 1 more
99
+ return folders[folders.size - 3] if folders.size > 2 && parent_folder =~ /CD\d/i
100
+ return parent_folder
101
+ end
102
+
103
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: toname
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Sam Cavenagh
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-08-08 00:00:00 -07:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 2
30
+ - 5
31
+ - 8
32
+ version: 2.5.8
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: Convert video/torrent filename into movie/tv series name and year
36
+ email: cavenaghweb@hotmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README.md
43
+ files:
44
+ - README.md
45
+ - lib/file_name_info.rb
46
+ - lib/to_name.rb
47
+ has_rdoc: true
48
+ homepage: http://github.com/o-sam-o/toname
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --main
54
+ - README.md
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ segments:
71
+ - 0
72
+ version: "0"
73
+ requirements: []
74
+
75
+ rubyforge_project:
76
+ rubygems_version: 1.3.7
77
+ signing_key:
78
+ specification_version: 3
79
+ summary: To Name
80
+ test_files: []
81
+