sjunkieex 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ *.swp
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/vcr_cassettes
18
+ test/version_tmp
19
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem "turn"
4
+ gem "ansi"
5
+
6
+ # Specify your gem's dependencies in sjunkieex.gemspec
7
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,18 @@
1
+
2
+ (General Public License)
3
+
4
+ Copyright (c) 2012 Philipp Böhm
5
+
6
+ This program is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation in version 3 of the License.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18
+ MA 02110-1301, USA.
data/README.md ADDED
@@ -0,0 +1,22 @@
1
+ # Sjunkieex (Serienjunkies Extractor)
2
+
3
+ Program that extracts links from serienjunkies.org for your series and
4
+ searches for new episodes for existing series.
5
+
6
+ ## Installation
7
+
8
+ Install it yourself with (add sudo for systemwide installation):
9
+
10
+ $ gem install sjunkieex
11
+
12
+ ## Usage
13
+
14
+ TODO: Write usage instructions here
15
+
16
+ ## Contributing
17
+
18
+ 1. Fork it
19
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
20
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
21
+ 4. Push to the branch (`git push origin my-new-feature`)
22
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env rake
2
+ require 'bundler/gem_tasks'
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "test"
7
+ t.test_files = FileList['test/test*.rb']
8
+ t.verbose = true
9
+ end
data/bin/sjunkieex ADDED
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- ruby -*-
3
+ # encoding: UTF-8
4
+
5
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
6
+
7
+ require 'sjunkieex'
8
+ require 'hashconfig'
9
+ require 'optparse'
10
+ require 'fileutils'
11
+
12
+ # create program configuration dirs/files
13
+ CONFIG_DIR = File.join( File.expand_path("~"), ".sjunkieex" )
14
+ CONFIG_FILE = File.join( CONFIG_DIR, "config.yml" )
15
+ FileUtils.mkdir(CONFIG_DIR) unless File.directory?(CONFIG_DIR)
16
+
17
+ ###
18
+ # configuration
19
+ STANDARD_CONFIG = {
20
+ :hd_series => ["SERIES_NOT_EXISTING_PLACEHOLDER"],
21
+ :index_directory => File.join(CONFIG_DIR, ".index/"),
22
+ :index_suffix => "xml",
23
+ :hoster_id => "nl",
24
+ :dump_links => true,
25
+ :dump_file => "/tmp/gsl_links.txt",
26
+ }.merge(Sjunkieex::Interface::STANDARD_CONFIG)
27
+
28
+ config = STANDARD_CONFIG.merge_with_serialized(CONFIG_FILE)
29
+
30
+ ###
31
+ # option definition and handling
32
+ options = {}
33
+ OptionParser.new do |opts|
34
+ opts.banner = "Usage: #{File.basename($PROGRAM_NAME)}"
35
+
36
+ opts.separator("")
37
+ opts.separator("Tool that extracts episodelinks from serienjunkies.org.")
38
+ opts.separator("")
39
+ opts.separator(" Options:")
40
+
41
+ # opts.on( "-i", "--ignore-seriesinfo",
42
+ # "do not use the information from the infostore") do |opt|
43
+ # config[:read_episode_info] = false
44
+ # end
45
+
46
+ opts.on( "-v", "--version",
47
+ "Outputs the version number.") do |opt|
48
+ puts Sjunkieex::VERSION
49
+ exit
50
+ end
51
+
52
+ opts.separator("")
53
+
54
+ end.parse!
55
+
56
+ fail "index directory #{ config[:index_directory]} does not exist" unless
57
+ File.directory?(config[:index_directory])
58
+
59
+
60
+ glob_pattern = File.join(config[:index_directory] ,"*.#{config[:index_suffix]}")
61
+ files = Dir[glob_pattern]
62
+
63
+ series_index = Sjunkieex::SeriesIndex.new(files: files)
64
+
65
+ fail "there is data for series existing" if series_index.empty?
66
+
67
+ ###
68
+ # look for new series
69
+ dump_links = []
70
+ interface = Sjunkieex::Interface.new(series_index, config)
71
+
72
+ interface.look_for_new_episodes.each do |link,series|
73
+ puts "\nLook for new episodes in '#{series}'"
74
+
75
+ links = interface.parse_series_page(series, link)
76
+ links.each do |identifier, link_data|
77
+ puts link_data[:episodedata]
78
+
79
+ hd = false
80
+ (hd = true) if config[:hd_series].include? series
81
+
82
+ ###
83
+ # select links, depending on wanted resolution
84
+ links = []
85
+ if hd
86
+ if link_data[:hd_1080p]
87
+ links = link_data[:hd_1080p]
88
+ elsif link_data[:hd_720p]
89
+ links = link_data[:hd_720p]
90
+ end
91
+ else
92
+ if link_data[:sd]
93
+ links = link_data[:sd]
94
+ end
95
+ end
96
+
97
+ if links.empty?
98
+ puts "there are no links in this resolution"
99
+ next
100
+ end
101
+
102
+ download_links = links.select do |link|
103
+ link.match(/\/f-\w+\/#{ config[:hoster_id] }_/)
104
+ end
105
+
106
+ if download_links.empty?
107
+ puts "there are no links for this hoster"
108
+ next
109
+ end
110
+
111
+ print download_links[0].to_yaml
112
+ dump_links << download_links[0]
113
+ end
114
+ end
115
+
116
+ ###
117
+ # dump links to file
118
+ if config[:dump_links]
119
+ File.open(config[:dump_file], 'w') do |f|
120
+ dump_links.each { |l| f.write(l + "\n")}
121
+ end
122
+ end
@@ -0,0 +1,147 @@
1
+ require 'open-uri'
2
+ require 'zlib'
3
+
4
+ module Sjunkieex
5
+
6
+ class Interface
7
+
8
+ STANDARD_CONFIG = {
9
+ url: "http://serienjunkies.org",
10
+ german_only: true,
11
+ hd: false,
12
+ subbed_allowed: false,
13
+ }
14
+
15
+ attr_reader :options
16
+
17
+ def initialize(series_index, options = {})
18
+ @options = STANDARD_CONFIG.merge(options)
19
+ @index = series_index
20
+ end
21
+
22
+ # Public: Looks for new episodes on the homepage
23
+ #
24
+ # Returns a Hash of links for sites that should be visited
25
+ def look_for_new_episodes
26
+ links = Hash.new
27
+
28
+ doc = Nokogiri::XML(get_page_data(@options[:url]))
29
+ doc.css("div#content > div.post > div.post-content a").each do |link|
30
+ c = link.content
31
+
32
+ ####
33
+ # skip links that are not suitable
34
+ next unless is_link_useful?(c)
35
+
36
+ next unless @index.is_series_in_index?(c)
37
+
38
+ series_name = Sjunkieex::SeriesIndex.extract_seriesname(c)
39
+ next unless series_name
40
+
41
+ next if @index.episode_existing?(series_name, c)
42
+
43
+ href = link[:href]
44
+ next if links.include?(href)
45
+
46
+ links[href] = series_name
47
+ end
48
+
49
+ return links
50
+ end
51
+
52
+ # Public: parses a series page and extracts links
53
+ #
54
+ # series_name - the series name and the key in the index
55
+ # series_link - the link to the page
56
+ #
57
+ # Returns a hash indexed by series identifier
58
+ def parse_series_page(series_name, series_link)
59
+
60
+ link_data = Hash.new
61
+
62
+ doc = Nokogiri::XML(get_page_data(series_link))
63
+ doc.css("div#content > div.post > div.post-content p").each do |paragraph|
64
+
65
+ next if paragraph[:class]
66
+
67
+ episode_data = paragraph.css("strong:first-child").text
68
+ next unless is_link_useful?(episode_data)
69
+
70
+ next if @index.episode_existing?(series_name, episode_data)
71
+
72
+ if id = Sjunkieex::SeriesIndex.extract_episode_identifier(episode_data)
73
+
74
+ # classify episode resolution
75
+ resolution = :sd
76
+ (resolution = :hd_720p) if episode_data.match(/720[pi]/i)
77
+ (resolution = :hd_1080p) if episode_data.match(/1080[pi]/i)
78
+
79
+ # extract hoster links
80
+ episode_links = []
81
+ paragraph.css("a").each do |link|
82
+ episode_links << link[:href]
83
+ end
84
+
85
+ (link_data[id] = Hash.new) unless link_data[id]
86
+ link_data[id][resolution] = episode_links
87
+ link_data[id][:episodedata] = episode_data
88
+ link_data[id][:series] = series_name
89
+ end
90
+ end
91
+
92
+ return link_data
93
+ end
94
+
95
+ private
96
+
97
+ # Internal: check the link data against criterias
98
+ #
99
+ # link_data - data for the link
100
+ #
101
+ # Returns true if the link is useful or false if it can be skipped
102
+ def is_link_useful?(link_data)
103
+
104
+ return false unless link_data.match(/S\w+E\w+/i)
105
+
106
+ # skip links depending on language
107
+ if @options[:german_only]
108
+ return false unless link_data.match(/German/i)
109
+
110
+ unless @options[:subbed_allowed]
111
+ return false if link_data.match(/Subbed/i)
112
+ end
113
+ else
114
+ return false if link_data.match(/German/i)
115
+ end
116
+
117
+ true
118
+ end
119
+
120
+ # Internal: get a page and do some stuff if the page is gzip encoded
121
+ #
122
+ # link - the link that is fetched
123
+ #
124
+ # Returns the page content
125
+ def get_page_data(link)
126
+
127
+ body = nil
128
+
129
+ stream = open(link)
130
+ if stream.is_a? File
131
+ # file is a local file, has not methods below
132
+ body = stream.read
133
+ else
134
+ # file is web uri
135
+ if (stream.content_encoding.empty?)
136
+ body = stream.read
137
+ else
138
+ body = Zlib::GzipReader.new(stream).read
139
+ end
140
+ end
141
+
142
+ return body
143
+ end
144
+
145
+ end
146
+
147
+ end
@@ -0,0 +1,134 @@
1
+ require 'nokogiri'
2
+
3
+ module Sjunkieex
4
+
5
+ class SeriesIndex
6
+
7
+ attr_reader :options, :series_data
8
+
9
+ # Public: instantiate a new series_index
10
+ #
11
+ # options - Options (default: {})
12
+ # :files - Array of series indizes
13
+ #
14
+ def initialize(options = {})
15
+ @options = {files: [], }.merge(options)
16
+
17
+ @series_data = Hash.new
18
+ @options[:files].each do |file|
19
+ @series_data.merge!(parse_file(file))
20
+ end
21
+
22
+ end
23
+
24
+ # Public: checks if there are entries in the index
25
+ #
26
+ # Returns true if there no entries loaded
27
+ def empty?
28
+ @series_data.length == 0
29
+ end
30
+
31
+ # Public: Check if a supplied episode is in the index
32
+ #
33
+ # series_name - Name of the series in the index
34
+ # episode_text - episode data
35
+ #
36
+ # Returns true if the episode is existing, false otherwise
37
+ def episode_existing?(series_name, episode_text)
38
+ if @series_data[series_name]
39
+
40
+ if id = SeriesIndex.extract_episode_identifier(episode_text)
41
+ if @series_data[series_name][id]
42
+ return true
43
+ end
44
+ end
45
+ end
46
+
47
+ return false
48
+ end
49
+
50
+ # Public: checks if the seriesname in the supplied data is in the index
51
+ #
52
+ # episode_text - data that contains the episode information
53
+ #
54
+ # Returns true if the series is in the index, false otherwise
55
+ def is_series_in_index?(episode_text)
56
+
57
+ if series_name = SeriesIndex.extract_seriesname(episode_text)
58
+ if @series_data[series_name]
59
+ return true
60
+ end
61
+ end
62
+
63
+ return false
64
+ end
65
+
66
+ # Public: tries to extract the seriesname from supplied data
67
+ #
68
+ # data - data that holds the episode information
69
+ #
70
+ # Returns the seriesname or nil if there is no seriesname
71
+ def self.extract_seriesname(data)
72
+ if md = data.match(/(.*)S\d+E\d+/)
73
+ return md[1].gsub(/\./, " ").strip
74
+ end
75
+ nil
76
+ end
77
+
78
+ # Public: tries to extract the episode identifier from the episode data
79
+ #
80
+ # data - data that holds the episode information
81
+ #
82
+ # Returns the identifier xx_xx or nil if there is no identifier
83
+ def self.extract_episode_identifier(data)
84
+ if md = data.match(/S(\d+)E(\d+)/i)
85
+ return "%s_%s" % [md[1].to_i, md[2].to_i]
86
+ end
87
+ nil
88
+ end
89
+
90
+ private
91
+
92
+ # Internal: parse this file to a hash indexed by seriesname
93
+ #
94
+ # file - path to the xml file
95
+ #
96
+ # Returns a Hash indexed by seriesname with Hashes as values
97
+ #
98
+ # hash = {
99
+ # "Chase": {
100
+ # "1_1": "S01E01 - test.avi",
101
+ # }
102
+ # }
103
+ def parse_file(file)
104
+
105
+ series_data = Hash.new
106
+
107
+ content = File.open(file, "r").read
108
+ doc = Nokogiri::XML(content)
109
+
110
+ doc.css("serienindex > directory").each do |series_node|
111
+
112
+ title = series_node[:name]
113
+ next unless title && title.match(/\w+/)
114
+
115
+ series = Hash.new
116
+ series_node.css("file").each do |file_node|
117
+
118
+ filename = file_node[:name]
119
+ next unless filename
120
+
121
+ if id = SeriesIndex.extract_episode_identifier(filename)
122
+ series[id] = filename
123
+ end
124
+ end
125
+
126
+ series_data[title] = series
127
+ end
128
+
129
+ series_data
130
+ end
131
+
132
+ end
133
+
134
+ end
@@ -0,0 +1,3 @@
1
+ module Sjunkieex
2
+ VERSION = "0.0.1"
3
+ end
data/lib/sjunkieex.rb ADDED
@@ -0,0 +1,7 @@
1
+ require "sjunkieex/version"
2
+ require "sjunkieex/interface"
3
+ require "sjunkieex/series_index"
4
+
5
+ module Sjunkieex
6
+
7
+ end
data/sjunkieex.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/sjunkieex/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Philipp Böhm"]
6
+ gem.email = ["philipp@i77i.de"]
7
+ gem.description = %q{Tool that extracts links from serienjunkies.org}
8
+ gem.summary = %q{serienjunkies.org link extractor}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "sjunkieex"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Sjunkieex::VERSION
17
+
18
+ gem.add_runtime_dependency(%q<nokogiri>, [">= 1.5"])
19
+ gem.add_runtime_dependency(%q<hashconfig>, [">= 0.0.1"])
20
+ end