sjunkieex 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ *.swp
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/vcr_cassettes
18
+ test/version_tmp
19
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,7 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem "turn"
4
+ gem "ansi"
5
+
6
+ # Specify your gem's dependencies in sjunkieex.gemspec
7
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,18 @@
1
+
2
+ (General Public License)
3
+
4
+ Copyright (c) 2012 Philipp Böhm
5
+
6
+ This program is free software; you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation in version 3 of the License.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program; if not, write to the Free Software
17
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18
+ MA 02110-1301, USA.
data/README.md ADDED
@@ -0,0 +1,22 @@
1
+ # Sjunkieex (Serienjunkies Extractor)
2
+
3
+ Program that extracts links from serienjunkies.org for your series and
4
+ searches for new episodes for existing series.
5
+
6
+ ## Installation
7
+
8
+ Install it yourself with (add sudo for systemwide installation):
9
+
10
+ $ gem install sjunkieex
11
+
12
+ ## Usage
13
+
14
+ TODO: Write usage instructions here
15
+
16
+ ## Contributing
17
+
18
+ 1. Fork it
19
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
20
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
21
+ 4. Push to the branch (`git push origin my-new-feature`)
22
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env rake
2
+ require 'bundler/gem_tasks'
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "test"
7
+ t.test_files = FileList['test/test*.rb']
8
+ t.verbose = true
9
+ end
data/bin/sjunkieex ADDED
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- ruby -*-
3
+ # encoding: UTF-8
4
+
5
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
6
+
7
+ require 'sjunkieex'
8
+ require 'hashconfig'
9
+ require 'optparse'
10
+ require 'fileutils'
11
+
12
+ # create program configuration dirs/files
13
+ CONFIG_DIR = File.join( File.expand_path("~"), ".sjunkieex" )
14
+ CONFIG_FILE = File.join( CONFIG_DIR, "config.yml" )
15
+ FileUtils.mkdir(CONFIG_DIR) unless File.directory?(CONFIG_DIR)
16
+
17
+ ###
18
+ # configuration
19
+ STANDARD_CONFIG = {
20
+ :hd_series => ["SERIES_NOT_EXISTING_PLACEHOLDER"],
21
+ :index_directory => File.join(CONFIG_DIR, ".index/"),
22
+ :index_suffix => "xml",
23
+ :hoster_id => "nl",
24
+ :dump_links => true,
25
+ :dump_file => "/tmp/gsl_links.txt",
26
+ }.merge(Sjunkieex::Interface::STANDARD_CONFIG)
27
+
28
+ config = STANDARD_CONFIG.merge_with_serialized(CONFIG_FILE)
29
+
30
+ ###
31
+ # option definition and handling
32
+ options = {}
33
+ OptionParser.new do |opts|
34
+ opts.banner = "Usage: #{File.basename($PROGRAM_NAME)}"
35
+
36
+ opts.separator("")
37
+ opts.separator("Tool that extracts episodelinks from serienjunkies.org.")
38
+ opts.separator("")
39
+ opts.separator(" Options:")
40
+
41
+ # opts.on( "-i", "--ignore-seriesinfo",
42
+ # "do not use the information from the infostore") do |opt|
43
+ # config[:read_episode_info] = false
44
+ # end
45
+
46
+ opts.on( "-v", "--version",
47
+ "Outputs the version number.") do |opt|
48
+ puts Sjunkieex::VERSION
49
+ exit
50
+ end
51
+
52
+ opts.separator("")
53
+
54
+ end.parse!
55
+
56
+ fail "index directory #{ config[:index_directory]} does not exist" unless
57
+ File.directory?(config[:index_directory])
58
+
59
+
60
+ glob_pattern = File.join(config[:index_directory] ,"*.#{config[:index_suffix]}")
61
+ files = Dir[glob_pattern]
62
+
63
+ series_index = Sjunkieex::SeriesIndex.new(files: files)
64
+
65
+ fail "there is data for series existing" if series_index.empty?
66
+
67
+ ###
68
+ # look for new series
69
+ dump_links = []
70
+ interface = Sjunkieex::Interface.new(series_index, config)
71
+
72
+ interface.look_for_new_episodes.each do |link,series|
73
+ puts "\nLook for new episodes in '#{series}'"
74
+
75
+ links = interface.parse_series_page(series, link)
76
+ links.each do |identifier, link_data|
77
+ puts link_data[:episodedata]
78
+
79
+ hd = false
80
+ (hd = true) if config[:hd_series].include? series
81
+
82
+ ###
83
+ # select links, depending on wanted resolution
84
+ links = []
85
+ if hd
86
+ if link_data[:hd_1080p]
87
+ links = link_data[:hd_1080p]
88
+ elsif link_data[:hd_720p]
89
+ links = link_data[:hd_720p]
90
+ end
91
+ else
92
+ if link_data[:sd]
93
+ links = link_data[:sd]
94
+ end
95
+ end
96
+
97
+ if links.empty?
98
+ puts "there are no links in this resolution"
99
+ next
100
+ end
101
+
102
+ download_links = links.select do |link|
103
+ link.match(/\/f-\w+\/#{ config[:hoster_id] }_/)
104
+ end
105
+
106
+ if download_links.empty?
107
+ puts "there are no links for this hoster"
108
+ next
109
+ end
110
+
111
+ print download_links[0].to_yaml
112
+ dump_links << download_links[0]
113
+ end
114
+ end
115
+
116
+ ###
117
+ # dump links to file
118
+ if config[:dump_links]
119
+ File.open(config[:dump_file], 'w') do |f|
120
+ dump_links.each { |l| f.write(l + "\n")}
121
+ end
122
+ end
@@ -0,0 +1,147 @@
1
+ require 'open-uri'
2
+ require 'zlib'
3
+
4
+ module Sjunkieex
5
+
6
+ class Interface
7
+
8
+ STANDARD_CONFIG = {
9
+ url: "http://serienjunkies.org",
10
+ german_only: true,
11
+ hd: false,
12
+ subbed_allowed: false,
13
+ }
14
+
15
+ attr_reader :options
16
+
17
+ def initialize(series_index, options = {})
18
+ @options = STANDARD_CONFIG.merge(options)
19
+ @index = series_index
20
+ end
21
+
22
+ # Public: Looks for new episodes on the homepage
23
+ #
24
+ # Returns a Hash of links for sites that should be visited
25
+ def look_for_new_episodes
26
+ links = Hash.new
27
+
28
+ doc = Nokogiri::XML(get_page_data(@options[:url]))
29
+ doc.css("div#content > div.post > div.post-content a").each do |link|
30
+ c = link.content
31
+
32
+ ####
33
+ # skip links that are not suitable
34
+ next unless is_link_useful?(c)
35
+
36
+ next unless @index.is_series_in_index?(c)
37
+
38
+ series_name = Sjunkieex::SeriesIndex.extract_seriesname(c)
39
+ next unless series_name
40
+
41
+ next if @index.episode_existing?(series_name, c)
42
+
43
+ href = link[:href]
44
+ next if links.include?(href)
45
+
46
+ links[href] = series_name
47
+ end
48
+
49
+ return links
50
+ end
51
+
52
+ # Public: parses a series page and extracts links
53
+ #
54
+ # series_name - the series name and the key in the index
55
+ # series_link - the link to the page
56
+ #
57
+ # Returns a hash indexed by series identifier
58
+ def parse_series_page(series_name, series_link)
59
+
60
+ link_data = Hash.new
61
+
62
+ doc = Nokogiri::XML(get_page_data(series_link))
63
+ doc.css("div#content > div.post > div.post-content p").each do |paragraph|
64
+
65
+ next if paragraph[:class]
66
+
67
+ episode_data = paragraph.css("strong:first-child").text
68
+ next unless is_link_useful?(episode_data)
69
+
70
+ next if @index.episode_existing?(series_name, episode_data)
71
+
72
+ if id = Sjunkieex::SeriesIndex.extract_episode_identifier(episode_data)
73
+
74
+ # classify episode resolution
75
+ resolution = :sd
76
+ (resolution = :hd_720p) if episode_data.match(/720[pi]/i)
77
+ (resolution = :hd_1080p) if episode_data.match(/1080[pi]/i)
78
+
79
+ # extract hoster links
80
+ episode_links = []
81
+ paragraph.css("a").each do |link|
82
+ episode_links << link[:href]
83
+ end
84
+
85
+ (link_data[id] = Hash.new) unless link_data[id]
86
+ link_data[id][resolution] = episode_links
87
+ link_data[id][:episodedata] = episode_data
88
+ link_data[id][:series] = series_name
89
+ end
90
+ end
91
+
92
+ return link_data
93
+ end
94
+
95
+ private
96
+
97
+ # Internal: check the link data against criterias
98
+ #
99
+ # link_data - data for the link
100
+ #
101
+ # Returns true if the link is useful or false if it can be skipped
102
+ def is_link_useful?(link_data)
103
+
104
+ return false unless link_data.match(/S\w+E\w+/i)
105
+
106
+ # skip links depending on language
107
+ if @options[:german_only]
108
+ return false unless link_data.match(/German/i)
109
+
110
+ unless @options[:subbed_allowed]
111
+ return false if link_data.match(/Subbed/i)
112
+ end
113
+ else
114
+ return false if link_data.match(/German/i)
115
+ end
116
+
117
+ true
118
+ end
119
+
120
+ # Internal: get a page and do some stuff if the page is gzip encoded
121
+ #
122
+ # link - the link that is fetched
123
+ #
124
+ # Returns the page content
125
+ def get_page_data(link)
126
+
127
+ body = nil
128
+
129
+ stream = open(link)
130
+ if stream.is_a? File
131
+ # file is a local file, has not methods below
132
+ body = stream.read
133
+ else
134
+ # file is web uri
135
+ if (stream.content_encoding.empty?)
136
+ body = stream.read
137
+ else
138
+ body = Zlib::GzipReader.new(stream).read
139
+ end
140
+ end
141
+
142
+ return body
143
+ end
144
+
145
+ end
146
+
147
+ end
@@ -0,0 +1,134 @@
1
+ require 'nokogiri'
2
+
3
+ module Sjunkieex
4
+
5
+ class SeriesIndex
6
+
7
+ attr_reader :options, :series_data
8
+
9
+ # Public: instantiate a new series_index
10
+ #
11
+ # options - Options (default: {})
12
+ # :files - Array of series indizes
13
+ #
14
+ def initialize(options = {})
15
+ @options = {files: [], }.merge(options)
16
+
17
+ @series_data = Hash.new
18
+ @options[:files].each do |file|
19
+ @series_data.merge!(parse_file(file))
20
+ end
21
+
22
+ end
23
+
24
+ # Public: checks if there are entries in the index
25
+ #
26
+ # Returns true if there no entries loaded
27
+ def empty?
28
+ @series_data.length == 0
29
+ end
30
+
31
+ # Public: Check if a supplied episode is in the index
32
+ #
33
+ # series_name - Name of the series in the index
34
+ # episode_text - episode data
35
+ #
36
+ # Returns true if the episode is existing, false otherwise
37
+ def episode_existing?(series_name, episode_text)
38
+ if @series_data[series_name]
39
+
40
+ if id = SeriesIndex.extract_episode_identifier(episode_text)
41
+ if @series_data[series_name][id]
42
+ return true
43
+ end
44
+ end
45
+ end
46
+
47
+ return false
48
+ end
49
+
50
+ # Public: checks if the seriesname in the supplied data is in the index
51
+ #
52
+ # episode_text - data that contains the episode information
53
+ #
54
+ # Returns true if the series is in the index, false otherwise
55
+ def is_series_in_index?(episode_text)
56
+
57
+ if series_name = SeriesIndex.extract_seriesname(episode_text)
58
+ if @series_data[series_name]
59
+ return true
60
+ end
61
+ end
62
+
63
+ return false
64
+ end
65
+
66
+ # Public: tries to extract the seriesname from supplied data
67
+ #
68
+ # data - data that holds the episode information
69
+ #
70
+ # Returns the seriesname or nil if there is no seriesname
71
+ def self.extract_seriesname(data)
72
+ if md = data.match(/(.*)S\d+E\d+/)
73
+ return md[1].gsub(/\./, " ").strip
74
+ end
75
+ nil
76
+ end
77
+
78
+ # Public: tries to extract the episode identifier from the episode data
79
+ #
80
+ # data - data that holds the episode information
81
+ #
82
+ # Returns the identifier xx_xx or nil if there is no identifier
83
+ def self.extract_episode_identifier(data)
84
+ if md = data.match(/S(\d+)E(\d+)/i)
85
+ return "%s_%s" % [md[1].to_i, md[2].to_i]
86
+ end
87
+ nil
88
+ end
89
+
90
+ private
91
+
92
+ # Internal: parse this file to a hash indexed by seriesname
93
+ #
94
+ # file - path to the xml file
95
+ #
96
+ # Returns a Hash indexed by seriesname with Hashes as values
97
+ #
98
+ # hash = {
99
+ # "Chase": {
100
+ # "1_1": "S01E01 - test.avi",
101
+ # }
102
+ # }
103
+ def parse_file(file)
104
+
105
+ series_data = Hash.new
106
+
107
+ content = File.open(file, "r").read
108
+ doc = Nokogiri::XML(content)
109
+
110
+ doc.css("serienindex > directory").each do |series_node|
111
+
112
+ title = series_node[:name]
113
+ next unless title && title.match(/\w+/)
114
+
115
+ series = Hash.new
116
+ series_node.css("file").each do |file_node|
117
+
118
+ filename = file_node[:name]
119
+ next unless filename
120
+
121
+ if id = SeriesIndex.extract_episode_identifier(filename)
122
+ series[id] = filename
123
+ end
124
+ end
125
+
126
+ series_data[title] = series
127
+ end
128
+
129
+ series_data
130
+ end
131
+
132
+ end
133
+
134
+ end
@@ -0,0 +1,3 @@
1
+ module Sjunkieex
2
+ VERSION = "0.0.1"
3
+ end
data/lib/sjunkieex.rb ADDED
@@ -0,0 +1,7 @@
1
+ require "sjunkieex/version"
2
+ require "sjunkieex/interface"
3
+ require "sjunkieex/series_index"
4
+
5
+ module Sjunkieex
6
+
7
+ end
data/sjunkieex.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/sjunkieex/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Philipp Böhm"]
6
+ gem.email = ["philipp@i77i.de"]
7
+ gem.description = %q{Tool that extracts links from serienjunkies.org}
8
+ gem.summary = %q{serienjunkies.org link extractor}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "sjunkieex"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Sjunkieex::VERSION
17
+
18
+ gem.add_runtime_dependency(%q<nokogiri>, [">= 1.5"])
19
+ gem.add_runtime_dependency(%q<hashconfig>, [">= 0.0.1"])
20
+ end