sjunkieex 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/Gemfile +7 -0
- data/LICENSE +18 -0
- data/README.md +22 -0
- data/Rakefile +9 -0
- data/bin/sjunkieex +122 -0
- data/lib/sjunkieex/interface.rb +147 -0
- data/lib/sjunkieex/series_index.rb +134 -0
- data/lib/sjunkieex/version.rb +3 -0
- data/lib/sjunkieex.rb +7 -0
- data/sjunkieex.gemspec +20 -0
- data/test/seriesindex_example.xml +4568 -0
- data/test/site_dumps/chase.html +757 -0
- data/test/site_dumps/homepage.html +3345 -0
- data/test/test_helper.rb +8 -0
- data/test/test_interface.rb +53 -0
- data/test/test_series_index.rb +69 -0
- metadata +101 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
(General Public License)
|
3
|
+
|
4
|
+
Copyright (c) 2012 Philipp Böhm
|
5
|
+
|
6
|
+
This program is free software; you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation in version 3 of the License.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program; if not, write to the Free Software
|
17
|
+
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
18
|
+
MA 02110-1301, USA.
|
data/README.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# Sjunkieex (Serienjunkies Extractor)
|
2
|
+
|
3
|
+
Program that extracts links from serienjunkies.org for your series and
|
4
|
+
searches for new episodes for existing series.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
Install it yourself with (add sudo for systemwide installation):
|
9
|
+
|
10
|
+
$ gem install sjunkieex
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
TODO: Write usage instructions here
|
15
|
+
|
16
|
+
## Contributing
|
17
|
+
|
18
|
+
1. Fork it
|
19
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
20
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
21
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
22
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bin/sjunkieex
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
# encoding: UTF-8
|
4
|
+
|
5
|
+
$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'lib')
|
6
|
+
|
7
|
+
require 'sjunkieex'
|
8
|
+
require 'hashconfig'
|
9
|
+
require 'optparse'
|
10
|
+
require 'fileutils'
|
11
|
+
|
12
|
+
# create program configuration dirs/files
|
13
|
+
CONFIG_DIR = File.join( File.expand_path("~"), ".sjunkieex" )
|
14
|
+
CONFIG_FILE = File.join( CONFIG_DIR, "config.yml" )
|
15
|
+
FileUtils.mkdir(CONFIG_DIR) unless File.directory?(CONFIG_DIR)
|
16
|
+
|
17
|
+
###
|
18
|
+
# configuration
|
19
|
+
STANDARD_CONFIG = {
|
20
|
+
:hd_series => ["SERIES_NOT_EXISTING_PLACEHOLDER"],
|
21
|
+
:index_directory => File.join(CONFIG_DIR, ".index/"),
|
22
|
+
:index_suffix => "xml",
|
23
|
+
:hoster_id => "nl",
|
24
|
+
:dump_links => true,
|
25
|
+
:dump_file => "/tmp/gsl_links.txt",
|
26
|
+
}.merge(Sjunkieex::Interface::STANDARD_CONFIG)
|
27
|
+
|
28
|
+
config = STANDARD_CONFIG.merge_with_serialized(CONFIG_FILE)
|
29
|
+
|
30
|
+
###
|
31
|
+
# option definition and handling
|
32
|
+
options = {}
|
33
|
+
OptionParser.new do |opts|
|
34
|
+
opts.banner = "Usage: #{File.basename($PROGRAM_NAME)}"
|
35
|
+
|
36
|
+
opts.separator("")
|
37
|
+
opts.separator("Tool that extracts episodelinks from serienjunkies.org.")
|
38
|
+
opts.separator("")
|
39
|
+
opts.separator(" Options:")
|
40
|
+
|
41
|
+
# opts.on( "-i", "--ignore-seriesinfo",
|
42
|
+
# "do not use the information from the infostore") do |opt|
|
43
|
+
# config[:read_episode_info] = false
|
44
|
+
# end
|
45
|
+
|
46
|
+
opts.on( "-v", "--version",
|
47
|
+
"Outputs the version number.") do |opt|
|
48
|
+
puts Sjunkieex::VERSION
|
49
|
+
exit
|
50
|
+
end
|
51
|
+
|
52
|
+
opts.separator("")
|
53
|
+
|
54
|
+
end.parse!
|
55
|
+
|
56
|
+
fail "index directory #{ config[:index_directory]} does not exist" unless
|
57
|
+
File.directory?(config[:index_directory])
|
58
|
+
|
59
|
+
|
60
|
+
glob_pattern = File.join(config[:index_directory] ,"*.#{config[:index_suffix]}")
|
61
|
+
files = Dir[glob_pattern]
|
62
|
+
|
63
|
+
series_index = Sjunkieex::SeriesIndex.new(files: files)
|
64
|
+
|
65
|
+
fail "there is data for series existing" if series_index.empty?
|
66
|
+
|
67
|
+
###
|
68
|
+
# look for new series
|
69
|
+
dump_links = []
|
70
|
+
interface = Sjunkieex::Interface.new(series_index, config)
|
71
|
+
|
72
|
+
interface.look_for_new_episodes.each do |link,series|
|
73
|
+
puts "\nLook for new episodes in '#{series}'"
|
74
|
+
|
75
|
+
links = interface.parse_series_page(series, link)
|
76
|
+
links.each do |identifier, link_data|
|
77
|
+
puts link_data[:episodedata]
|
78
|
+
|
79
|
+
hd = false
|
80
|
+
(hd = true) if config[:hd_series].include? series
|
81
|
+
|
82
|
+
###
|
83
|
+
# select links, depending on wanted resolution
|
84
|
+
links = []
|
85
|
+
if hd
|
86
|
+
if link_data[:hd_1080p]
|
87
|
+
links = link_data[:hd_1080p]
|
88
|
+
elsif link_data[:hd_720p]
|
89
|
+
links = link_data[:hd_720p]
|
90
|
+
end
|
91
|
+
else
|
92
|
+
if link_data[:sd]
|
93
|
+
links = link_data[:sd]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
if links.empty?
|
98
|
+
puts "there are no links in this resolution"
|
99
|
+
next
|
100
|
+
end
|
101
|
+
|
102
|
+
download_links = links.select do |link|
|
103
|
+
link.match(/\/f-\w+\/#{ config[:hoster_id] }_/)
|
104
|
+
end
|
105
|
+
|
106
|
+
if download_links.empty?
|
107
|
+
puts "there are no links for this hoster"
|
108
|
+
next
|
109
|
+
end
|
110
|
+
|
111
|
+
print download_links[0].to_yaml
|
112
|
+
dump_links << download_links[0]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
###
|
117
|
+
# dump links to file
|
118
|
+
if config[:dump_links]
|
119
|
+
File.open(config[:dump_file], 'w') do |f|
|
120
|
+
dump_links.each { |l| f.write(l + "\n")}
|
121
|
+
end
|
122
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'zlib'
|
3
|
+
|
4
|
+
module Sjunkieex
|
5
|
+
|
6
|
+
class Interface
|
7
|
+
|
8
|
+
STANDARD_CONFIG = {
|
9
|
+
url: "http://serienjunkies.org",
|
10
|
+
german_only: true,
|
11
|
+
hd: false,
|
12
|
+
subbed_allowed: false,
|
13
|
+
}
|
14
|
+
|
15
|
+
attr_reader :options
|
16
|
+
|
17
|
+
def initialize(series_index, options = {})
|
18
|
+
@options = STANDARD_CONFIG.merge(options)
|
19
|
+
@index = series_index
|
20
|
+
end
|
21
|
+
|
22
|
+
# Public: Looks for new episodes on the homepage
|
23
|
+
#
|
24
|
+
# Returns a Hash of links for sites that should be visited
|
25
|
+
def look_for_new_episodes
|
26
|
+
links = Hash.new
|
27
|
+
|
28
|
+
doc = Nokogiri::XML(get_page_data(@options[:url]))
|
29
|
+
doc.css("div#content > div.post > div.post-content a").each do |link|
|
30
|
+
c = link.content
|
31
|
+
|
32
|
+
####
|
33
|
+
# skip links that are not suitable
|
34
|
+
next unless is_link_useful?(c)
|
35
|
+
|
36
|
+
next unless @index.is_series_in_index?(c)
|
37
|
+
|
38
|
+
series_name = Sjunkieex::SeriesIndex.extract_seriesname(c)
|
39
|
+
next unless series_name
|
40
|
+
|
41
|
+
next if @index.episode_existing?(series_name, c)
|
42
|
+
|
43
|
+
href = link[:href]
|
44
|
+
next if links.include?(href)
|
45
|
+
|
46
|
+
links[href] = series_name
|
47
|
+
end
|
48
|
+
|
49
|
+
return links
|
50
|
+
end
|
51
|
+
|
52
|
+
# Public: parses a series page and extracts links
|
53
|
+
#
|
54
|
+
# series_name - the series name and the key in the index
|
55
|
+
# series_link - the link to the page
|
56
|
+
#
|
57
|
+
# Returns a hash indexed by series identifier
|
58
|
+
def parse_series_page(series_name, series_link)
|
59
|
+
|
60
|
+
link_data = Hash.new
|
61
|
+
|
62
|
+
doc = Nokogiri::XML(get_page_data(series_link))
|
63
|
+
doc.css("div#content > div.post > div.post-content p").each do |paragraph|
|
64
|
+
|
65
|
+
next if paragraph[:class]
|
66
|
+
|
67
|
+
episode_data = paragraph.css("strong:first-child").text
|
68
|
+
next unless is_link_useful?(episode_data)
|
69
|
+
|
70
|
+
next if @index.episode_existing?(series_name, episode_data)
|
71
|
+
|
72
|
+
if id = Sjunkieex::SeriesIndex.extract_episode_identifier(episode_data)
|
73
|
+
|
74
|
+
# classify episode resolution
|
75
|
+
resolution = :sd
|
76
|
+
(resolution = :hd_720p) if episode_data.match(/720[pi]/i)
|
77
|
+
(resolution = :hd_1080p) if episode_data.match(/1080[pi]/i)
|
78
|
+
|
79
|
+
# extract hoster links
|
80
|
+
episode_links = []
|
81
|
+
paragraph.css("a").each do |link|
|
82
|
+
episode_links << link[:href]
|
83
|
+
end
|
84
|
+
|
85
|
+
(link_data[id] = Hash.new) unless link_data[id]
|
86
|
+
link_data[id][resolution] = episode_links
|
87
|
+
link_data[id][:episodedata] = episode_data
|
88
|
+
link_data[id][:series] = series_name
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
return link_data
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
# Internal: check the link data against criterias
|
98
|
+
#
|
99
|
+
# link_data - data for the link
|
100
|
+
#
|
101
|
+
# Returns true if the link is useful or false if it can be skipped
|
102
|
+
def is_link_useful?(link_data)
|
103
|
+
|
104
|
+
return false unless link_data.match(/S\w+E\w+/i)
|
105
|
+
|
106
|
+
# skip links depending on language
|
107
|
+
if @options[:german_only]
|
108
|
+
return false unless link_data.match(/German/i)
|
109
|
+
|
110
|
+
unless @options[:subbed_allowed]
|
111
|
+
return false if link_data.match(/Subbed/i)
|
112
|
+
end
|
113
|
+
else
|
114
|
+
return false if link_data.match(/German/i)
|
115
|
+
end
|
116
|
+
|
117
|
+
true
|
118
|
+
end
|
119
|
+
|
120
|
+
# Internal: get a page and do some stuff if the page is gzip encoded
|
121
|
+
#
|
122
|
+
# link - the link that is fetched
|
123
|
+
#
|
124
|
+
# Returns the page content
|
125
|
+
def get_page_data(link)
|
126
|
+
|
127
|
+
body = nil
|
128
|
+
|
129
|
+
stream = open(link)
|
130
|
+
if stream.is_a? File
|
131
|
+
# file is a local file, has not methods below
|
132
|
+
body = stream.read
|
133
|
+
else
|
134
|
+
# file is web uri
|
135
|
+
if (stream.content_encoding.empty?)
|
136
|
+
body = stream.read
|
137
|
+
else
|
138
|
+
body = Zlib::GzipReader.new(stream).read
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
return body
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Sjunkieex
|
4
|
+
|
5
|
+
class SeriesIndex
|
6
|
+
|
7
|
+
attr_reader :options, :series_data
|
8
|
+
|
9
|
+
# Public: instantiate a new series_index
|
10
|
+
#
|
11
|
+
# options - Options (default: {})
|
12
|
+
# :files - Array of series indizes
|
13
|
+
#
|
14
|
+
def initialize(options = {})
|
15
|
+
@options = {files: [], }.merge(options)
|
16
|
+
|
17
|
+
@series_data = Hash.new
|
18
|
+
@options[:files].each do |file|
|
19
|
+
@series_data.merge!(parse_file(file))
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
# Public: checks if there are entries in the index
|
25
|
+
#
|
26
|
+
# Returns true if there no entries loaded
|
27
|
+
def empty?
|
28
|
+
@series_data.length == 0
|
29
|
+
end
|
30
|
+
|
31
|
+
# Public: Check if a supplied episode is in the index
|
32
|
+
#
|
33
|
+
# series_name - Name of the series in the index
|
34
|
+
# episode_text - episode data
|
35
|
+
#
|
36
|
+
# Returns true if the episode is existing, false otherwise
|
37
|
+
def episode_existing?(series_name, episode_text)
|
38
|
+
if @series_data[series_name]
|
39
|
+
|
40
|
+
if id = SeriesIndex.extract_episode_identifier(episode_text)
|
41
|
+
if @series_data[series_name][id]
|
42
|
+
return true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
|
50
|
+
# Public: checks if the seriesname in the supplied data is in the index
|
51
|
+
#
|
52
|
+
# episode_text - data that contains the episode information
|
53
|
+
#
|
54
|
+
# Returns true if the series is in the index, false otherwise
|
55
|
+
def is_series_in_index?(episode_text)
|
56
|
+
|
57
|
+
if series_name = SeriesIndex.extract_seriesname(episode_text)
|
58
|
+
if @series_data[series_name]
|
59
|
+
return true
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
return false
|
64
|
+
end
|
65
|
+
|
66
|
+
# Public: tries to extract the seriesname from supplied data
|
67
|
+
#
|
68
|
+
# data - data that holds the episode information
|
69
|
+
#
|
70
|
+
# Returns the seriesname or nil if there is no seriesname
|
71
|
+
def self.extract_seriesname(data)
|
72
|
+
if md = data.match(/(.*)S\d+E\d+/)
|
73
|
+
return md[1].gsub(/\./, " ").strip
|
74
|
+
end
|
75
|
+
nil
|
76
|
+
end
|
77
|
+
|
78
|
+
# Public: tries to extract the episode identifier from the episode data
|
79
|
+
#
|
80
|
+
# data - data that holds the episode information
|
81
|
+
#
|
82
|
+
# Returns the identifier xx_xx or nil if there is no identifier
|
83
|
+
def self.extract_episode_identifier(data)
|
84
|
+
if md = data.match(/S(\d+)E(\d+)/i)
|
85
|
+
return "%s_%s" % [md[1].to_i, md[2].to_i]
|
86
|
+
end
|
87
|
+
nil
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
# Internal: parse this file to a hash indexed by seriesname
|
93
|
+
#
|
94
|
+
# file - path to the xml file
|
95
|
+
#
|
96
|
+
# Returns a Hash indexed by seriesname with Hashes as values
|
97
|
+
#
|
98
|
+
# hash = {
|
99
|
+
# "Chase": {
|
100
|
+
# "1_1": "S01E01 - test.avi",
|
101
|
+
# }
|
102
|
+
# }
|
103
|
+
def parse_file(file)
|
104
|
+
|
105
|
+
series_data = Hash.new
|
106
|
+
|
107
|
+
content = File.open(file, "r").read
|
108
|
+
doc = Nokogiri::XML(content)
|
109
|
+
|
110
|
+
doc.css("serienindex > directory").each do |series_node|
|
111
|
+
|
112
|
+
title = series_node[:name]
|
113
|
+
next unless title && title.match(/\w+/)
|
114
|
+
|
115
|
+
series = Hash.new
|
116
|
+
series_node.css("file").each do |file_node|
|
117
|
+
|
118
|
+
filename = file_node[:name]
|
119
|
+
next unless filename
|
120
|
+
|
121
|
+
if id = SeriesIndex.extract_episode_identifier(filename)
|
122
|
+
series[id] = filename
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
series_data[title] = series
|
127
|
+
end
|
128
|
+
|
129
|
+
series_data
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|
data/lib/sjunkieex.rb
ADDED
data/sjunkieex.gemspec
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/sjunkieex/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Philipp Böhm"]
|
6
|
+
gem.email = ["philipp@i77i.de"]
|
7
|
+
gem.description = %q{Tool that extracts links from serienjunkies.org}
|
8
|
+
gem.summary = %q{serienjunkies.org link extractor}
|
9
|
+
gem.homepage = ""
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "sjunkieex"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = Sjunkieex::VERSION
|
17
|
+
|
18
|
+
gem.add_runtime_dependency(%q<nokogiri>, [">= 1.5"])
|
19
|
+
gem.add_runtime_dependency(%q<hashconfig>, [">= 0.0.1"])
|
20
|
+
end
|